[
  {
    "path": ".dockerignore",
    "content": "# File .dockerignore\n.git\n.gitignore\n.github\n\n# directories to ignore\nexample/\ntest/\nrecipe/\n# in source code\nsrc/*.o\nexternal_libs/pgenlib/*.o\nexternal_libs/pgenlib/include/*.o\n\n# Files\nREADME.md\nLICENSE\n"
  },
  {
    "path": ".github/workflows/docker-image.yml",
    "content": "name: Docker Image CI\n\non:\n  push:\n    tags:\n      - 'v*'\n  workflow_dispatch:\n\nenv:\n  REGISTRY: docker.pkg.github.com\n  REPO_NAME: ${{ github.repository }}\n\njobs:\n  build-and-push-image:\n  \n    runs-on: ubuntu-latest\n    \n    steps:      \n    - \n      name: Checkout repository\n      uses: actions/checkout@v3\n\n    - \n      name: Read VERSION file\n      id: getversion\n      run: echo \"version=$(cat VERSION)\" >> $GITHUB_OUTPUT\n      #run: echo \"::set-output name=version::$(cat VERSION)\"\n    \n    - name: Build and push image to Docker\n      run: |\n        IMAGE_NAME=\"docker.pkg.github.com/${{ env.REPO_NAME }}/regenie:v${{ steps.getversion.outputs.version }}.gz\"\n        echo \"${{ secrets.GH_PAT }}\" | docker login https://docker.pkg.github.com -u ${GITHUB_ACTOR} --password-stdin\n        docker build -f Dockerfile_mkl \\\n          --build-arg BOOST_IO=1 \\\n          --build-arg LIB_INSTALL=libboost-iostreams-dev \\\n          -t $IMAGE_NAME . \n        docker push $IMAGE_NAME\n"
  },
  {
    "path": ".github/workflows/manual.yml",
    "content": "# This is a basic workflow that is manually triggered\n\nname: Manual workflow\n\n# Controls when the action will run. Workflow runs when manually triggered using the UI\n# or API.\non:\n  workflow_dispatch:\n\n# A workflow run is made up of one or more jobs that can run sequentially or in parallel\njobs:\n  build:\n    runs-on: ubuntu-20.04\n    name: Build\n    env:\n      BOOST_IO: 0\n      STATIC: 1\n      BGEN_VERSION: v1.1.7\n      LIB_INSTALL: g++ libgomp1 make python3 zlib1g-dev wget\n    steps:\n      - name: Checkout repository\n        uses: actions/checkout@v3\n\n      - name: Prep GCC install\n        run: |\n          sudo apt-get install -y software-properties-common\n          sudo add-apt-repository ppa:ubuntu-toolchain-r/test\n          sudo apt update\n\n      - name: Apt packages\n        run: sudo apt install -y --no-install-recommends ${LIB_INSTALL}\n\n      - name: Cache Bgen\n        id: bgen-cache\n        uses: actions/cache@v3\n        with:\n          path: bgen.tgz\n          key: ${{ runner.os }}-bgen\n\n      - name: Download Bgen if not cached\n        if: steps.bgen-cache.outputs.cache-hit != 'true'\n        run: |\n          wget -O bgen.tgz \"http://code.enkre.net/bgen/tarball/release/${BGEN_VERSION}\"\n      - name: Install Bgen\n        run: |\n          tar xzf bgen.tgz\n          pushd ${BGEN_VERSION}\n          python3 waf configure\n          python3 waf -v\n          popd\n      - name: Build\n        run: make BGEN_PATH=./${BGEN_VERSION} HAS_BOOST_IOSTREAM=$BOOST_IO STATIC=$STATIC all\n\n      - name: Test\n        run: ./regenie --help\n"
  },
  {
    "path": ".github/workflows/test.yml",
    "content": "name: build\n\non: \n  push:\n    tags:\n      - 'v*'\n  pull_request:\n    types:\n      - opened\n  release:\n    types:\n      - created\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    name: Build\n    env:\n      BOOST_IO: 0\n      STATIC: 1\n      BGEN_VERSION: v1.1.7\n      CMAKE_VERSION_MAJOR: 3.28\n      CMAKE_VERSION_MINOR: 0\n      LIB_INSTALL: g++-9 gfortran-9 libgomp1 make python3 zlib1g-dev wget libcurl4-openssl-dev\n    steps:\n      - name: Checkout repository\n        uses: actions/checkout@v3\n\n      - name: Prep GCC install\n        run: |\n          sudo apt-get install -y software-properties-common\n          sudo add-apt-repository ppa:ubuntu-toolchain-r/test\n          sudo apt update\n\n      - name: Apt packages\n        run: |\n          sudo apt install -y --no-install-recommends ${LIB_INSTALL}\n          sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9\n          sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-9 70\n          sudo update-alternatives --install /usr/bin/f77 f77 /usr/bin/gfortran-9 70\n          sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc-9 70\n          wget -O cmake_install.sh \"http://cmake.org/files/v${CMAKE_VERSION_MAJOR}/cmake-${CMAKE_VERSION_MAJOR}.${CMAKE_VERSION_MINOR}-linux-x86_64.sh\"\n          sudo sh cmake_install.sh --prefix=/usr/local --skip-license --exclude-subdir\n          sudo rm cmake_install.sh\n\n      - name: Cache Bgen\n        id: bgen-cache\n        uses: actions/cache@v3\n        with:\n          path: bgen.tgz\n          key: ${{ runner.os }}-bgen\n\n      - name: Download Bgen if not cached\n        if: steps.bgen-cache.outputs.cache-hit != 'true'\n        run: |\n          wget -O bgen.tgz \"http://code.enkre.net/bgen/tarball/release/${BGEN_VERSION}\"\n\n      - name: Install Bgen\n        run: |\n          tar -xzf bgen.tgz\n          pushd ${BGEN_VERSION}\n          python3 waf configure\n          python3 waf\n          popd\n\n      - name: Build\n        run: |\n          BGEN_PATH=./${BGEN_VERSION} HAS_BOOST_IOSTREAM=$BOOST_IO STATIC=$STATIC cmake .\n          make\n    \n      - name: Test\n        run: |\n          ./regenie --help\n          ./regenie --version\n"
  },
  {
    "path": ".gitignore",
    "content": "src/*.o\nexternal_libs/pgenlib/*.o\nexternal_libs/pgenlib/include/*.o\ndocs/nature.csl\nbuild/\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "# For Intel MKL, set MKLROOT=<MKL_path> when running cmake\n#   e.g. MKLROOT=/opt/mkl/ cmake -S regenie_dir/ -B regenie_dir/build/\n# For OpenBLAS, set OPENBLAS_ROOT=<OpenBLAS_path> when running cmake\n#   note: it also requires lapacke library\n# For static compilation on Linux systems, set STATIC=1 when running cmake\n#   -> this excludes GLIBC\n\n\ncmake_minimum_required(VERSION 3.13)\n\n# detect OS architecture\nexecute_process(\n  COMMAND uname -s\n  OUTPUT_VARIABLE UNAME_S\n  OUTPUT_STRIP_TRAILING_WHITESPACE\n  )\n\n# Get Regenie version\nfile(STRINGS \"VERSION\" RG_VERSION)\n\nproject(regenie\n  VERSION ${RG_VERSION}\n  )\n\ninclude(CMakePackageConfigHelpers)\ninclude(GNUInstallDirs)\n\n# specify the C++ standard\nset(CMAKE_CXX_STANDARD 11)\nset(CMAKE_CXX_STANDARD_REQUIRED ON)\nset(CMAKE_CXX_EXTENSIONS OFF)  # Ensures -std=c++11\n\n######################################\n######## check input variables\n\n# check BGEN_PATH\nif(\"$ENV{BGEN_PATH}\" STREQUAL \"\")\n  message( FATAL_ERROR \"Must specify path to BGEN library in 'BGEN_PATH'\")\nelse()\n  set(BGEN_PATH \"$ENV{BGEN_PATH}\" CACHE INTERNAL \"Set BGEN library path\")\n  if (NOT EXISTS ${BGEN_PATH})\n    message( FATAL_ERROR \"Specified BGEN library directory '${BGEN_PATH}' does not exist\")\n  endif()\nendif()\n\n# check for static compilation\nif($ENV{STATIC})\n  set(BUILD_STATIC ON CACHE INTERNAL \"Static compilation\")\n  message( STATUS \"Static compilation mode\")\nendif()\n\n# check Boost IOStreams\nif($ENV{HAS_BOOST_IOSTREAM})\n  set(HAS_BOOST_IOSTREAM 1 CACHE INTERNAL \"Add Boost IO\")\n  set(RG_VERSION \"${RG_VERSION}.gz\" CACHE INTERNAL \"Update version\")\nelse()\n  set(HAS_BOOST_IOSTREAM 0 CACHE INTERNAL \"Skip Boost IO\")\nendif()\n\n# check MKL\nif(NOT \"$ENV{MKLROOT}\" STREQUAL \"\")\n  set(MKLROOT \"$ENV{MKLROOT}\" CACHE INTERNAL \"Set MKL library path\")\n  if (NOT EXISTS ${MKLROOT})\n    message( FATAL_ERROR \"Specified MKL library directory '${MKLROOT}' does not exist\")\n  endif()\n  message( STATUS \"Will compile with Intel MKL library\")\nendif()\n\n# check HTSlib\nif(NOT \"$ENV{HTSLIB_PATH}\" STREQUAL \"\")\n  set(HTSLIB_PATH \"$ENV{HTSLIB_PATH}\" CACHE INTERNAL \"Set HTSlib library path\")\n  if (NOT EXISTS ${HTSLIB_PATH})\n    message( FATAL_ERROR \"Specified HTSlib library directory '${HTSLIB_PATH}' does not exist\")\n  endif()\n  message( STATUS \"Will compile with HTSlib\")\nendif()\n\n# check OpenBLAS\nif(NOT \"$ENV{OPENBLAS_ROOT}\" STREQUAL \"\")\n  set(OPENBLAS_ROOT \"$ENV{OPENBLAS_ROOT}\" CACHE INTERNAL \"Set OpenBLAS library path\")\n  if (NOT EXISTS ${OPENBLAS_ROOT})\n    message( FATAL_ERROR \"Specified OpenBLAS library directory '${OPENBLAS_ROOT}' does not exist\")\n  endif()\n  message( STATUS \"Will compile with OpenBLAS library\")\nendif()\n\n######################################\n######## set flags and required libraries\n\nset(BLA_STATIC               ${BUILD_STATIC})\nset(Boost_USE_STATIC_LIBS    ${BUILD_STATIC})\nset(Boost_USE_DEBUG_LIBS     OFF)\nset(Boost_USE_MULTITHREADED  ON)\nset(Boost_USE_STATIC_RUNTIME OFF)\n\n# list each file specifically\nadd_executable(regenie\n  ${CMAKE_SOURCE_DIR}/src/Data.cpp\n  ${CMAKE_SOURCE_DIR}/src/Files.cpp\n  ${CMAKE_SOURCE_DIR}/src/Geno.cpp\n  ${CMAKE_SOURCE_DIR}/src/HLM.cpp\n  ${CMAKE_SOURCE_DIR}/src/Interaction.cpp\n  ${CMAKE_SOURCE_DIR}/src/Joint_Tests.cpp\n  ${CMAKE_SOURCE_DIR}/src/Masks.cpp\n  ${CMAKE_SOURCE_DIR}/src/NNLS.cpp\n  ${CMAKE_SOURCE_DIR}/src/Pheno.cpp\n  ${CMAKE_SOURCE_DIR}/src/Regenie.cpp\n  ${CMAKE_SOURCE_DIR}/src/SKAT.cpp\n  ${CMAKE_SOURCE_DIR}/src/Step1_Models.cpp\n  ${CMAKE_SOURCE_DIR}/src/Step2_Models.cpp\n  ${CMAKE_SOURCE_DIR}/src/MultiTrait_Tests.cpp\n  ${CMAKE_SOURCE_DIR}/src/MCC.cpp\n  ${CMAKE_SOURCE_DIR}/src/Ordinal.cpp\n  ${CMAKE_SOURCE_DIR}/src/survival_data.cpp\n  ${CMAKE_SOURCE_DIR}/src/cox_ridge.cpp\n  ${CMAKE_SOURCE_DIR}/src/cox_score.cpp\n  ${CMAKE_SOURCE_DIR}/src/cox_firth.cpp\n  )\ntarget_include_directories(regenie PRIVATE ${CMAKE_SOURCE_DIR}/src)\n\nset(CMAKE_CXX_FLAGS \"-O3 -Wall -pedantic -ffast-math -Wno-unused-local-typedefs -Wno-deprecated-declarations -Wno-long-long -Wno-c11-extensions -fPIC\")\nadd_definitions(-DVERSION_NUMBER=\"${RG_VERSION}\")\n\nif(\"${UNAME_S}\" STREQUAL \"Linux\")\n  find_package(OpenMP REQUIRED)\n  target_link_libraries(regenie PRIVATE OpenMP::OpenMP_CXX)\n  if(${BUILD_STATIC})\n    target_link_options(regenie BEFORE PRIVATE -static-libgcc PRIVATE -static-libstdc++)\n  endif()\nelseif(\"${UNAME_S}\" STREQUAL \"Darwin\")\n    set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -stdlib=libc++\")\nendif()\n\nset(EXTERN_LIBS_PATH \"${CMAKE_SOURCE_DIR}/external_libs\")\ntarget_include_directories(regenie PRIVATE ${EXTERN_LIBS_PATH}/)\n\n# BGEN library and its dependencies\nfind_library(ZSTD_LIBRARY libzstd.a HINTS \"${BGEN_PATH}/build/3rd_party/zstd-1.1.0\" REQUIRED)\nfind_library(DB_LIBRARY libdb.a HINTS \"${BGEN_PATH}/build/db\" REQUIRED)\nfind_library(SQLITE3_LIBRARY libsqlite3.a HINTS \"${BGEN_PATH}/build/3rd_party/sqlite3\" REQUIRED)\nfind_library(Boost_LIBRARY libboost.a HINTS \"${BGEN_PATH}/build/3rd_party/boost_1_55_0\" REQUIRED)\nfind_library(BGEN_LIBRARY libbgen.a HINTS \"${BGEN_PATH}/build\" REQUIRED)\ntarget_link_libraries(regenie PRIVATE ${ZSTD_LIBRARY} ${BGEN_LIBRARY} ${DB_LIBRARY} ${SQLITE3_LIBRARY} ${Boost_LIBRARY})\ntarget_include_directories(regenie PRIVATE ${BGEN_PATH} ${BGEN_PATH}/genfile/include/ ${BGEN_PATH}/3rd_party/boost_1_55_0/ ${BGEN_PATH}/3rd_party/zstd-1.1.0/lib ${BGEN_PATH}/db/include/ ${BGEN_PATH}/3rd_party/sqlite3)\n\n# MVTNorm library\nset(MVTN_PATH \"${EXTERN_LIBS_PATH}/mvtnorm\")\nadd_custom_target(\n   libMvtnorm\n   COMMAND make\n   WORKING_DIRECTORY ${MVTN_PATH}\n)\ntarget_link_libraries(regenie PRIVATE ${MVTN_PATH}/libMvtnorm.a)\nadd_dependencies(regenie libMvtnorm)\n\n# QF library\nset(QF_PATH \"${EXTERN_LIBS_PATH}/qf\")\nadd_custom_target(\n   libqf\n   COMMAND make\n   WORKING_DIRECTORY ${QF_PATH}\n)\ntarget_link_libraries(regenie PRIVATE ${QF_PATH}/qf.a)\nadd_dependencies(regenie libqf)\n\n# Quadpack library\nset(QUAD_PATH \"${EXTERN_LIBS_PATH}/quadpack\")\nadd_custom_target(\n   libquad\n   COMMAND make\n   WORKING_DIRECTORY ${QUAD_PATH}\n)\ntarget_link_libraries(regenie PRIVATE ${QUAD_PATH}/libquad.a)\nadd_dependencies(regenie libquad)\n\n# PGEN library\nset(PGEN_PATH \"${EXTERN_LIBS_PATH}/pgenlib\")\nadd_custom_target(\n   pgenlib\n   COMMAND make\n   WORKING_DIRECTORY ${PGEN_PATH}\n)\ntarget_link_libraries(regenie PRIVATE ${PGEN_PATH}/pgenlib.a)\ntarget_include_directories(regenie PRIVATE ${PGEN_PATH} ${PGEN_PATH}/simde/ ${PGEN_PATH}/include/)\nadd_dependencies(regenie pgenlib)\n\n# REMETA library\nif(EXISTS ${HTSLIB_PATH})\n  set(REMETA_PATH \"${EXTERN_LIBS_PATH}/remeta\")\n  add_custom_target(\n     remeta\n     COMMAND make HTSLIB_PATH=${HTSLIB_PATH}\n     WORKING_DIRECTORY ${REMETA_PATH}\n  )\n  target_link_libraries(regenie PUBLIC ${REMETA_PATH}/remeta.a)\n  target_include_directories(regenie PUBLIC ${REMETA_PATH})\n  add_dependencies(regenie remeta)\n\n  add_definitions(-DWITH_HTSLIB)\n  find_library(HTSLIB libhts.a HINTS ${HTSLIB_PATH})\n  find_library(BZ2_LIB bz2 REQUIRED)\n  find_library(LZMA_LIB lzma REQUIRED)\n  find_library(CURL_LIB curl REQUIRED)\n  find_library(CRYPTO_LIB crypto REQUIRED)\n  target_link_libraries(regenie PUBLIC \n    ${HTSLIB}\n    ${BZ2_LIB} ${LZMA_LIB} ${CURL_LIB} ${CRYPTO_LIB} \n  )\nendif()\n\n# Intel MKL\nif(EXISTS ${MKLROOT})\n  add_definitions(-DWITH_MKL -DEIGEN_USE_BLAS -DEIGEN_USE_LAPACKE)\n  target_include_directories(regenie PRIVATE ${MKLROOT}/include/)\n  if(${BUILD_STATIC}) # specify static libs\n    find_library(MKL_LP64_LIB libmkl_intel_lp64.a \n      HINTS \"${MKLROOT}/lib/intel64\"\n      \"${MKLROOT}/lib\" \n      REQUIRED)\n    find_library(MKL_THREAD_LIB libmkl_gnu_thread.a \n      HINTS \"${MKLROOT}/lib/intel64\"\n      \"${MKLROOT}/lib\" \n      REQUIRED)\n    find_library(MKL_CORE_LIB libmkl_core.a \n      HINTS \"${MKLROOT}/lib/intel64\"\n      \"${MKLROOT}/lib\" \n      REQUIRED)\n    target_link_libraries(regenie PRIVATE \"-Wl,--start-group\" ${MKL_LP64_LIB} ${MKL_THREAD_LIB} ${MKL_CORE_LIB} \"-Wl,--end-group\" -lgomp)\n  else() # use dynamic libs\n    find_library(MKL_LP64_LIB mkl_intel_lp64 \n      PATHS \"${MKLROOT}/lib/intel64\"\n      \"${MKLROOT}/lib\" \n      REQUIRED)\n    find_library(MKL_THREAD_LIB mkl_gnu_thread \n      PATHS \"${MKLROOT}/lib/intel64\"\n      \"${MKLROOT}/lib\" \n      REQUIRED)\n    find_library(MKL_CORE_LIB mkl_core \n      PATHS \"${MKLROOT}/lib/intel64\"\n      \"${MKLROOT}/lib\" \n      REQUIRED)\n    target_link_libraries(regenie PRIVATE \"-Wl,--no-as-needed\" ${MKL_LP64_LIB} ${MKL_THREAD_LIB} ${MKL_CORE_LIB} -lgomp)\n  endif()\nelseif(EXISTS ${OPENBLAS_ROOT}) # OpenBLAS\n  add_definitions(-DWITH_OPENBLAS -DEIGEN_USE_BLAS -DEIGEN_USE_LAPACKE)\n  target_include_directories(regenie PRIVATE ${OPENBLAS_ROOT}/include/)\n  find_library(LAPACK_LIB lapack REQUIRED)\n  find_library(BLAS_LIB openblas HINTS \"${OPENBLAS_ROOT}/lib/\" REQUIRED)\n  target_link_libraries(regenie PRIVATE ${LAPACK_LIB} -llapacke ${BLAS_LIB})\nendif()\n\n# cxxopts (header-only)\ntarget_include_directories(regenie PRIVATE ${EXTERN_LIBS_PATH}/cxxopts/include/)\n\n# LBFGS (header-only)\ntarget_include_directories(regenie PRIVATE ${EXTERN_LIBS_PATH}/LBFGSpp/include/)\n\n# Eigen (header-only)\ntarget_include_directories(regenie PRIVATE ${EXTERN_LIBS_PATH}/eigen-3.4.0/)\n\n# Boost IO\nif(${HAS_BOOST_IOSTREAM})\n  if(\"${UNAME_S}\" STREQUAL \"Darwin\")\n    find_library(BOOST_LIB_IO libboost_iostreams libboost_iostreams.a REQUIRED)\n    target_link_libraries(regenie PRIVATE ${BOOST_LIB_IO})\n  elseif(${BUILD_STATIC})\n    find_library(BOOST_LIB_IO libboost_iostreams.a REQUIRED)\n    target_link_libraries(regenie PRIVATE ${BOOST_LIB_IO})\n  else()\n    target_link_libraries(regenie PRIVATE -lboost_iostreams)\n  endif()\n  add_definitions(-DHAS_BOOST_IOSTREAM)\n  message( STATUS \"Will compile with Boost Iostreams library\")\nendif()\n\n# Other libraries\nfind_library(ZLIB_LIBRARY libz.a z REQUIRED)\nfind_library(M_LIB m REQUIRED)\nfind_library(DL_LIB dl REQUIRED)\nif(\"${UNAME_S}\" STREQUAL \"Linux\")\n  set(GFORTRAN_LIBRARY \"-lgfortran\")\nelseif(\"${UNAME_S}\" STREQUAL \"Darwin\")\n  find_library(GFORTRAN_LIBRARY gfortran REQUIRED)\nendif()\ntarget_link_libraries(\n  regenie PRIVATE\n  ${ZLIB_LIBRARY} ${M_LIB} ${DL_LIB} ${PTHREAD_LIB}\n  ${GFORTRAN_LIBRARY}\n)\n\ninstall(TARGETS regenie RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})\nmessage( STATUS \"REGENIE v\" ${RG_VERSION})\n\nadd_custom_target(full-clean\n  COMMAND cd \"${MVTN_PATH}\" && make clean\n  COMMAND cd \"${QF_PATH}\" && make clean\n  COMMAND cd \"${QUAD_PATH}\" && make clean\n  COMMAND cd \"${PGEN_PATH}\" && make clean\n  )\n"
  },
  {
    "path": "Dockerfile",
    "content": "# Thanks to Nathan Weeks for sharing this Dockerfile\n# minor edits were made\n# Filename: Dockerfile\n\n# make this global \nARG LIB_INSTALL\nARG LIB_INSTALL2\n\nFROM public.ecr.aws/ubuntu/ubuntu:22.04 AS builder\n\nARG BOOST_IO\nARG LIB_INSTALL\nARG STATIC\nARG CMAKE_VERSION_MAJOR=3.13\nARG CMAKE_VERSION_MINOR=0\nARG HTSLIB_VERSION=1.18\n\nWORKDIR /src\n\nADD http://cmake.org/files/v${CMAKE_VERSION_MAJOR}/cmake-${CMAKE_VERSION_MAJOR}.${CMAKE_VERSION_MINOR}-Linux-x86_64.sh cmake_install.sh\nADD http://code.enkre.net/bgen/tarball/release/v1.1.7 v1.1.7.tgz\nADD https://github.com/samtools/htslib/releases/download/$HTSLIB_VERSION/htslib-$HTSLIB_VERSION.tar.bz2 htslib-$HTSLIB_VERSION.tar.bz2\n\n# install BGEN and HTSlib libraries\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n      gcc-9 \\\n      g++-9 \\\n      gfortran-9 \\\n      make \\\n      libz-dev \\\n      bzip2 \\\n      libbz2-dev \\\n      liblzma-dev \\\n      libcurl4-openssl-dev \\\n      libssl-dev \\\n      python3 \\\n      zlib1g-dev \\\n      $LIB_INSTALL \\\n      && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \\\n      && update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-9 70 \\\n      && update-alternatives --install /usr/bin/f77 f77 /usr/bin/gfortran-9 70 \\\n      && update-alternatives --install /usr/bin/cc cc /usr/bin/gcc-9 70 \\\n      && tar -xf htslib-$HTSLIB_VERSION.tar.bz2 \\\n      && cd htslib-$HTSLIB_VERSION/ \\\n      && ./configure \\\n      && make \\\n      && make install \\\n      && cd .. \\\n      && sh cmake_install.sh --prefix=/usr/local --skip-license --exclude-subdir \\\n      && rm cmake_install.sh \\\n      && tar -xzf v1.1.7.tgz \\\n      && rm v1.1.7.tgz \\\n      && cd v1.1.7 \\\n      && python3 waf configure \\\n      && python3 waf\n\nCOPY . /src/regenie\n\nWORKDIR /src/regenie\n\nRUN BGEN_PATH=/src/v1.1.7 HAS_BOOST_IOSTREAM=$BOOST_IO HTSLIB_PATH=/usr/local/lib/ STATIC=$STATIC cmake . \\\n      && make\n\nFROM public.ecr.aws/ubuntu/ubuntu:22.04\nARG LIB_INSTALL2\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n      libgomp1 libcurl4 gfortran-9 $LIB_INSTALL2 \\\n      && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /src/regenie/regenie /usr/local/bin\n"
  },
  {
    "path": "Dockerfile_mkl",
    "content": "# Filename: Dockerfile_mkl\n\n# make this global \nARG LIB_INSTALL\nARG LIB_INSTALL2\n\n\nFROM public.ecr.aws/ubuntu/ubuntu:22.04 AS builder\n\nARG BOOST_IO\nARG LIB_INSTALL\nARG STATIC\nARG CMAKE_VERSION_MAJOR=3.13\nARG CMAKE_VERSION_MINOR=0\nARG HTSLIB_VERSION=1.18\n\nWORKDIR /src\n\nADD http://cmake.org/files/v${CMAKE_VERSION_MAJOR}/cmake-${CMAKE_VERSION_MAJOR}.${CMAKE_VERSION_MINOR}-Linux-x86_64.sh cmake_install.sh\nADD http://code.enkre.net/bgen/tarball/release/v1.1.7 v1.1.7.tgz\nADD https://github.com/samtools/htslib/releases/download/$HTSLIB_VERSION/htslib-$HTSLIB_VERSION.tar.bz2 htslib-$HTSLIB_VERSION.tar.bz2\nADD https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB intel_key.PUB\nCOPY . /src/regenie\n\n# install BGEN and HTSlib libraries\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n      gcc-9 \\\n      g++-9 \\\n      gfortran-9 \\\n      libz-dev \\\n      bzip2 \\\n      libbz2-dev \\\n      liblzma-dev \\\n      libcurl4-openssl-dev \\\n      libssl-dev \\\n      make \\\n      gnupg \\\n      python3 \\\n      zlib1g-dev \\\n      apt-transport-https ca-certificates \\\n      $LIB_INSTALL \\\n      && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \\\n      && update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-9 70 \\\n      && update-alternatives --install /usr/bin/f77 f77 /usr/bin/gfortran-9 70 \\\n      && update-alternatives --install /usr/bin/cc cc /usr/bin/gcc-9 70 \\\n      && tar -xf htslib-$HTSLIB_VERSION.tar.bz2 \\\n      && cd htslib-$HTSLIB_VERSION/ \\\n      && ./configure \\\n      && make \\\n      && make install \\\n      && cd .. \\\n      && sh cmake_install.sh --prefix=/usr/local --skip-license --exclude-subdir \\\n      && rm cmake_install.sh \\\n      && tar -xzf v1.1.7.tgz \\\n      && rm v1.1.7.tgz \\\n      && cd v1.1.7 \\\n      && python3 waf configure \\\n      && python3 waf \\\n      && cd .. \\\n      && sh -c 'cat intel_key.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null' \\\n      && sh -c 'echo \"deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main\" | tee /etc/apt/sources.list.d/oneAPI.list' \\\n      && apt-get update \\\n      && apt-get install intel-oneapi-mkl-devel -y --no-install-recommends \\\n      && . /opt/intel/oneapi/setvars.sh \\\n      && echo \"MKL_THREADING_LAYER=GNU\" >> /etc/environment \\\n      && cd /src/regenie \\\n      && BGEN_PATH=/src/v1.1.7 HAS_BOOST_IOSTREAM=$BOOST_IO MKLROOT=$MKLROOT HTSLIB_PATH=/usr/local/lib/ STATIC=1 cmake . \\\n      && make\n\n# no need to install Boost IO and MKL here (static linking)\nFROM public.ecr.aws/ubuntu/ubuntu:22.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n      libgomp1 gfortran-9 \\\n      libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libssl-dev \\\n      && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /src/regenie/regenie /usr/local/bin\n\n"
  },
  {
    "path": "Dockerfile_openblas",
    "content": "# Thanks to Nathan Weeks for sharing this Dockerfile\n# minor edits were made\n# Filename: Dockerfile_openblas\n\n# make this global \nARG LIB_INSTALL\nARG LIB_INSTALL2\n\n\nFROM ubuntu:18.04 AS builder\n\nARG BOOST_IO\nARG LIB_INSTALL\nARG STATIC\nARG CMAKE_VERSION_MAJOR=3.13\nARG CMAKE_VERSION_MINOR=0\n\nWORKDIR /src\n\nADD http://cmake.org/files/v${CMAKE_VERSION_MAJOR}/cmake-${CMAKE_VERSION_MAJOR}.${CMAKE_VERSION_MINOR}-Linux-x86_64.sh cmake_install.sh\nADD http://code.enkre.net/bgen/tarball/release/v1.1.7 v1.1.7.tgz\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n      g++ \\\n      make \\\n      python3 \\\n      gfortran \\\n      zlib1g-dev \\\n      liblapacke-dev \\\n      libopenblas-dev \\\n      $LIB_INSTALL \\\n      && sh cmake_install.sh --prefix=/usr/local --skip-license --exclude-subdir \\\n      && rm cmake_install.sh \\\n      && tar -xzf v1.1.7.tgz \\\n      && rm v1.1.7.tgz \\\n      && cd v1.1.7 \\\n      && python3 waf configure \\\n      && python3 waf \\\n      && cd ..\n\nCOPY . /src/regenie\n\nWORKDIR /src/regenie\n\nRUN BGEN_PATH=/src/v1.1.7 HAS_BOOST_IOSTREAM=$BOOST_IO OPENBLAS_ROOT=/usr/lib/x86_64-linux-gnu/openblas STATIC=1 cmake . \\\n      && make\n\nFROM ubuntu:18.04\nARG LIB_INSTALL2\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n      libgomp1 gfortran $LIB_INSTALL2 \\\n      && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /src/regenie/regenie /usr/local/bin\n\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2020-2021 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n-----------------------------------------------------------------------------------\n\nThis software links code from the BGEN library which is licensed under \nthe \"Boost Software License, Version 1.0\":\n\nCopyright Gavin Band 2008 - 2012.\n\nPermission is hereby granted, free of charge, to any person or organization\nobtaining a copy of the software and accompanying documentation covered by\nthis license (the \"Software\") to use, reproduce, display, distribute,\nexecute, and transmit the Software, and to prepare derivative works of the\nSoftware, and to permit third-parties to whom the Software is furnished to\ndo so, all subject to the following:\n\nThe copyright notices in the Software and this entire statement, including\nthe above license grant, this restriction and the following disclaimer,\nmust be included in all copies of the Software, in whole or in part, and\nall derivative works of the Software, unless such copies or derivative\nworks are solely in the form of machine-executable object code generated by\na source language processor.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT\nSHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE\nFOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,\nARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\nDEALINGS IN THE SOFTWARE.\n"
  },
  {
    "path": "Makefile",
    "content": "# Makefile for Linux and Mac OSX systems for REGENIE\n#\n# * User needs to specify BGEN_PATH which is the directory\n# \twhere the BGEN library is installed\n# * If the Boost Iostream library is installed on the system,\n# \tuser can specify to link to it during compilation by\n# \tsetting  HAS_BOOST_IOSTREAM to 1\n#\n###############################################################\n#\n# Optional: To use external BLAS/LAPACK routines in Eigen\t\n#\n# For Intel MKL, add path of installed library to MKLROOT\n# For OpenBLAS, add path of installed library to OPENBLAS_ROOT\n#   note: it also requires lapacke library\n#  \n# For static compilation on Linux systems, set STATIC=1\n#   -> this excludes GLIBC\n#\n\nBGEN_PATH     =\nHAS_BOOST_IOSTREAM := 0\nMKLROOT       =\n# directory containing libhts.a or libhts.so\nHTSLIB_PATH   =\nOPENBLAS_ROOT = \nSTATIC       := 0\n\n############\n\n# Use only if not set\nCXX          ?= g++\nCXXFLAGS      = -O3 -Wall -pedantic -ffast-math -std=c++11 -Wno-unused-local-typedefs -Wno-deprecated-declarations -Wno-long-long -Wno-c11-extensions -fPIC\n\nEFILE         = regenie\nCFLAGS       ?=\n\n# check BGEN_PATH is set\nifneq ($(MAKECMDGOALS),clean)\nifeq ($(strip $(BGEN_PATH)),)\n $(warning You did not set the path to BGEN library, i.e. \"BGEN_PATH\")\nendif\nendif\n\n\n# detect OS architecture and add flags\nUNAME_S      := $(shell uname -s)\nifeq ($(UNAME_S),Linux)\n INC          = -I${BGEN_PATH}/3rd_party/boost_1_55_0\n CFLAGS      += -fopenmp\n ifeq ($(strip $(STATIC)),1)\n\tLPATHS      = -static-libgcc -static-libstdc++\n\tDLIBS       = -Wl,-Bdynamic\n endif\nelse ifeq ($(UNAME_S),Darwin)\n RGFLAGS     += -stdlib=libc++\nendif\n\n\nRG_VERSION    = $(shell cat VERSION)\n\n## for docker\nDFILE         = ./Dockerfile\nTEST_SCRIPT   = ./test/test_docker.sh\nifeq ($(strip $(STATIC)),1)\n ifneq ($(strip $(MKLROOT)),)\n\tDFILE       = ./Dockerfile_mkl # only for static linking\n endif\nendif\n\n\n## for boost iostream\nifeq ($(HAS_BOOST_IOSTREAM),1)\n RG_VERSION  := $(RG_VERSION).gz\n RGFLAGS     += -DHAS_BOOST_IOSTREAM\n ifeq ($(strip $(STATIC)),1)\n\tSLIBS       = -Wl,-Bstatic -lboost_iostreams\n else\n\tDLIBS      += -lboost_iostreams\n\tLIB_BIO2    = libboost-iostreams-dev ## for docker build\n endif\n LIB_BIO      = libboost-iostreams-dev ## for docker build\nendif\n\n\n# Intel MKL or OpenBLAS\nifneq ($(strip $(MKLROOT)),)\n ifeq ($(UNAME_S),Linux)\n\tRGFLAGS    += -DWITH_MKL -DEIGEN_USE_BLAS -DEIGEN_USE_LAPACKE\n\tINC        += -I${MKLROOT}/include/\n\t# static linking\n\tifeq ($(strip $(STATIC)),1)\n\t SLIBS     += -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_lp64.a ${MKLROOT}/lib/intel64/libmkl_gnu_thread.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group\n\t DLIBS     += -lgomp -lpthread\n\t# dynamic linking\n\telse\n\t LIBMKL     = -L${MKLROOT}/lib/intel64/\n\t DLIBS     += -Wl,--no-as-needed -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread\n\tendif\n endif\n\nelse ifneq ($(strip $(OPENBLAS_ROOT)),)\n ifeq ($(UNAME_S),Linux)\n\tRGFLAGS    += -DWITH_OPENBLAS -DEIGEN_USE_BLAS -DEIGEN_USE_LAPACKE\n\tINC        += -I${OPENBLAS_ROOT}/include/\n\t# static linking\n\tifeq ($(strip $(STATIC)),1)\n\t SLIBS     += -Wl,-rpath=${OPENBLAS_ROOT}/lib/ -llapack -llapacke -lopenblas\n\t# dynamic linking\n\telse\n\t DLIBS     += -Wl,-rpath=${OPENBLAS_ROOT}/lib/ -llapack -llapacke -lopenblas\n\tendif\n endif\nendif\n\n## for HTSlib\nifneq ($(strip $(HTSLIB_PATH)),)\n\tifeq ($(UNAME_S),Linux)\n\t\tRGFLAGS += -DWITH_HTSLIB\n\t\tifeq ($(strip $(STATIC)),1)\n\t\t\tSLIBS += ${HTSLIB_PATH}/libhts.a\n\t\telse\n\t\t\tSLIBS += ${HTSLIB_PATH}/libhts.so\n\t\tendif\n\t\tDLIBS += -lz -lbz2 -llzma -lcurl\n\tendif\nendif\n\n# pass on version number to software\nRGFLAGS      += -DVERSION_NUMBER=\\\"$(RG_VERSION)\\\"\n\n\nOBJECTS       = $(patsubst %.cpp,%.o,$(wildcard ./src/*.cpp))\n\nPGEN_PATH     = ./external_libs/pgenlib/\nINC          += -I${PGEN_PATH} -I${PGEN_PATH}/simde/ -I${PGEN_PATH}/include/ -I./external_libs/cxxopts/include/ -I./external_libs/LBFGSpp/include/ -I${BGEN_PATH} -I./external_libs/eigen-3.4.0/ -I${BGEN_PATH}/genfile/include/ -I${BGEN_PATH}/3rd_party/boost_1_55_0/ -I${BGEN_PATH}/3rd_party/zstd-1.1.0/lib -I${BGEN_PATH}/db/include/ -I${BGEN_PATH}/3rd_party/sqlite3 -I./external_libs/remeta -I./external_libs/\n\nLPATHS       += ${LIBMKL} -L${BGEN_PATH}/build/ -L${BGEN_PATH}/build/3rd_party/zstd-1.1.0/ -L${BGEN_PATH}/build/db/ -L${BGEN_PATH}/build/3rd_party/sqlite3/ -L${BGEN_PATH}/build/3rd_party/boost_1_55_0 -L/usr/lib/\n\nLIBS         += ${SLIBS} -lbgen -lzstd -ldb  -lsqlite3 -lboost\nLIBS         += -lz ${DLIBS} -lm -ldl -lgfortran\n\n\n\n.PHONY: docker-build docker-test debug clean\n\nall: ${EFILE}\n\n${EFILE}: libMvtnorm libqf libquad pgenlib remeta ${OBJECTS}\n\t${CXX} ${CXXFLAGS} ${RGFLAGS} ${CFLAGS} -o ${EFILE} ${OBJECTS} ./external_libs/mvtnorm/libMvtnorm.a ./external_libs/qf/qf.a ./external_libs/quadpack/libquad.a ./external_libs/pgenlib/pgenlib.a ./external_libs/remeta/remeta.a ${LPATHS} ${LIBS}\n\n%.o: %.cpp\n\t${CXX} ${CXXFLAGS} ${RGFLAGS} -o $@ -c $< ${INC} ${CFLAGS}\n\nlibMvtnorm: \n\t\t(cd ./external_libs/mvtnorm/;$(MAKE))\n\nlibqf: \n\t\t(cd ./external_libs/qf/;$(MAKE))\n\nlibquad: \n\t\t(cd ./external_libs/quadpack/;$(MAKE))\n\npgenlib: \n\t\t(cd ./external_libs/pgenlib/;$(MAKE))\n\nremeta:\n\t\t(cd ./external_libs/remeta/;$(MAKE) remeta.a HTSLIB_PATH=${HTSLIB_PATH})\n\n#####\n## For use with Docker\n# create Docker image\ndocker-build:\n\t@echo \"Building docker image for REGENIE v${RG_VERSION}\"\nifeq ($(HAS_BOOST_IOSTREAM),1)\n\t@echo Compiling with Boost Iostream library\nendif\nifeq ($(STATIC),1)\n ifneq ($(strip $(MKLROOT)),)\n\t@echo Compiling with Intel MKL library\n endif\n\t@echo Linking = static\n else\n\t@echo Linking = dynamic\nendif\n\n\t@docker build --rm -f ${DFILE} \\\n\t\t--no-cache --pull \\\n\t\t--build-arg BOOST_IO=${HAS_BOOST_IOSTREAM} \\\n\t\t--build-arg LIB_INSTALL=${LIB_BIO} \\\n\t\t--build-arg LIB_INSTALL2=${LIB_BIO2} \\\n\t\t--build-arg STATIC=${STATIC} \\\n\t\t-t regenie:v${RG_VERSION} .\n\ndocker-test:\n\t@${TEST_SCRIPT} . \"regenie:v${RG_VERSION}\" ${HAS_BOOST_IOSTREAM}\n####\n\n\ndebug: CXXFLAGS  = -O0 -g -std=c++11 -fPIC\ndebug: ${EFILE}\n\nclean:\n\trm -f ${EFILE} ./src/*.o\n\t(cd ./external_libs/mvtnorm/;$(MAKE) clean)\n\t(cd ./external_libs/qf/;$(MAKE) clean)\n\t(cd ./external_libs/quadpack/;$(MAKE) clean)\n\t(cd ./external_libs/pgenlib/;$(MAKE) clean)\n\t(cd ./external_libs/remeta/;$(MAKE) clean)\n"
  },
  {
    "path": "README.md",
    "content": "[![build](https://github.com/rgcgithub/regenie/actions/workflows/test.yml/badge.svg)](https://github.com/rgcgithub/regenie/actions/workflows/test.yml)\n![GitHub release (latest by date)](https://img.shields.io/github/v/release/rgcgithub/regenie?logo=Github)\n[![install with conda](https://img.shields.io/badge/install%20with-conda-brightgreen.svg)](https://anaconda.org/bioconda/regenie)\n[![Github All Releases](https://img.shields.io/github/downloads/rgcgithub/regenie/total.svg)]()\n[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)\n\n**regenie** is a C++ program for whole genome regression modelling of large [genome-wide association studies](https://en.wikipedia.org/wiki/Genome-wide_association_study).\n\nIt is developed and supported by a team of scientists at the Regeneron Genetics Center.\n\nThe method has the following properties\n\n- It works on quantitative, binary, and time-to-event traits, including binary traits with unbalanced case-control ratios and time-to-event traits with low event rates\n- It can handle population structure and relatedness\n- It can process multiple phenotypes at once efficiently\n- It is fast and memory efficient 🔥\n- For binary traits, it supports Firth logistic regression and an SPA test\n- For time-to-event traits, it supports Firth cox regression\n- It can perform gene/region-based tests, interaction tests and conditional analyses\n- It supports the [BGEN](https://www.well.ox.ac.uk/~gav/bgen_format/), [PLINK](https://www.cog-genomics.org/plink/1.9/formats#bed) bed/bim/fam and [PLINK2](https://www.cog-genomics.org/plink/2.0/formats#pgen) pgen/pvar/psam genetic data formats\n- It is ideally suited for implementation in [Apache Spark](https://spark.apache.org/) (see [GLOW](https://projectglow.io/))\n- It can be installed with [Conda](https://anaconda.org/bioconda/regenie)\n\nFull documentation for the **regenie** can be found [here](https://rgcgithub.github.io/regenie/).\n\n## Citation \nMbatchou, J., Barnard, L., Backman, J. et al. Computationally efficient whole-genome regression for quantitative and binary traits. Nat Genet 53, 1097–1103 (2021). https://doi.org/10.1038/s41588-021-00870-7\n\n## License\n\n**regenie** is distributed under an [MIT license](https://github.com/rgcgithub/regenie/blob/master/LICENSE).\n\n## Contact\nIf you have any questions about regenie please contact\n\n- <jonathan.marchini@regeneron.com>\n- <joelle.mbatchou@regeneron.com>\n\nIf you want to submit a issue concerning the software please do so\nusing the **regenie** [Github repository](https://github.com/rgcgithub/regenie/issues).\n\n\n## Version history\n[Version 4.1](https://github.com/rgcgithub/regenie/releases/tag/v4.1) (Timing reduction for single variant association tests; New option --htp to output summary statistics in the [HTP](https://rgcgithub.github.io/remeta/file_formats/#-htp) format; New option --skip-dosage-comp to skip dosage compensation for males in non-PAR chrX regions; Various bug fixes)\n\n[Version 4.0](https://github.com/rgcgithub/regenie/releases/tag/v4.0) (New options `--t2e` and `--eventColList` for time-to-event analysis to specify time-to-event analysis and the event phenotype name, respectively; Fix algorithm used to fit logistic Firth model when using `--write-null-firth` to match closer to the approach used in step 2)\n\n[Version 3.6](https://github.com/rgcgithub/regenie/releases/tag/v3.6) (Bug fix for the approximate Firth test when ultra-rare variants [MAC below 50] are being tested; Address convergence failures & speed-up exact Firth by using warm starts based on null model with just covariates)\n\n[Version 3.5](https://github.com/rgcgithub/regenie/releases/tag/v3.5) (Added CHR/POS columns to snplist output file when using `--write-mask-snplist`; Genotype counts are now reported in the sumstats file when using `--no-split`; Improved efficiency of LOOCV scheme in ridge level 0; Detect carriage return in fam/psam/bim/pvar/sample files; Minor bug fixes)\n\n[Version 3.4.1](https://github.com/rgcgithub/regenie/releases/tag/v3.4.1) (Reduction in memory usage for LD computation when writing to text files; Fix bug rejecting valid PVAR files)\n\n[Version 3.4](https://github.com/rgcgithub/regenie/releases/tag/v3.4) (Reduction in memory usage for LD computation with dosages; Minor bug fixes for LD computation; Bug fix for when carriage returns are in optional input files)\n\n[Version 3.3](https://github.com/rgcgithub/regenie/releases/tag/v3.3) (Faster implementation of approximate Firth LRT; New strategy for approximate Firth LRT with ultra-rare variants; Relaxed convergence criterion of Firth LRT from 1E-4 to 2.5E-4)\n\n[Version 3.2.9](https://github.com/rgcgithub/regenie/releases/tag/v3.2.9) (Switch to robust version of ACAT to handle very small p-values; Bug fix for Step1 when sex chromosome was included in the analysis; Allow for 64 domains when using the 4-column annotation file)\n\n[Version 3.2.8](https://github.com/rgcgithub/regenie/releases/tag/v3.2.8) (New option `--bgi` to specify custom index bgi file accompagnying BGEN file; Relax matching criteria between BGEN and index bgi files to use CPRA instead of variant ID)\n\n[Version 3.2.7](https://github.com/rgcgithub/regenie/releases/tag/v3.2.7) (New option `--force-mac-filter` to apply different MAC filter to subset of SNPs; Extend maximum number of domains to 32 for 4-column anno-file; Update PGEN library)\n\n[Version 3.2.6](https://github.com/rgcgithub/regenie/releases/tag/v3.2.6) (Relax tolerance parameter for null unpenalized logistic regression from 1e-8 to 1e-6; Minor bug fixes)\n\n[Version 3.2.5.3](https://github.com/rgcgithub/regenie/releases/tag/v3.2.5.3) (Fix inflation issue when testing main effect of SNP in GxE model; Minor bug fixes)\n\n[Version 3.2.5](https://github.com/rgcgithub/regenie/releases/tag/v3.2.5) (Use pseudo-data representation algorithm as default in step 2 single variant tests; Use ACAT to get SBAT p-value across POS/NEG models; Bug fix for ACATV when set has a single variant with zero weight)\n\n[Version 3.2.4](https://github.com/rgcgithub/regenie/releases/tag/v3.2.4) (Relaxed the requirement on the minimum number of unique values for QTs to 3; Various bug fixes)\n\n[Version 3.2.3](https://github.com/rgcgithub/regenie/releases/tag/v3.2.3) (Address convergence issues in Firth regression; Various bug fixes)\n\n[Version 3.2.2](https://github.com/rgcgithub/regenie/releases/tag/v3.2.2) (New columns in sumstats file (N_CASES/N_CONTROLS) to output the number of cases/controls when using `--af-cc`; Various bug fixes)\n\n[Version 3.2.1](https://github.com/rgcgithub/regenie/releases/tag/v3.2.1) (New option `--lovo-snplist` to only consider a subset of LOVO masks; Improve efficiency of LOVO for large sets to reduce memory usage; Bug fix for SPA with numerical overflow; For SKAT/ACAT tests with Firth correction, don't include SKAT weights when running Firth on single variants)\n\n[Version 3.2](https://github.com/rgcgithub/regenie/releases/tag/v3.2) (Bug fix for SKAT/SKATO when testing on binary traits using Firth/SPA; Switched name of NNLS joint test to SBAT test altering name of corresponding options and applied Bonferroni correction before reporting its p-value [correcting for minP of 2 tests])\n\n[Version 3.1.4](https://github.com/rgcgithub/regenie/releases/tag/v3.1.4) (New option `--par-region` to specify build to determine bounds for chrX PAR regions; new option `--force-qt` to force QT runs for traits with fewer than 10 values [otherwise will throw an error]; phenotype imputation for missing values is now applied after RINTing when using `--apply-rint`; several bug fixes)\n\n[Version 3.1.2](https://github.com/rgcgithub/regenie/releases/tag/v3.1.2) (Reduction in memory usage for SKAT/SKATO tests; Bug fix for LOVO with SKAT/ACAT tests; Improvements for null Firth logistic algorithm to address reported convergence issues)\n\n[Version 3.1.1](https://github.com/rgcgithub/regenie/releases/tag/v3.1.1) (Reduction in memory usage for SKAT/SKATO tests; Improvements for logistic regressions algorithms to address reported convergence issues)\n\n[Version 3.1](https://github.com/rgcgithub/regenie/releases/tag/v3.1) (Fixed bug in SKAT/SKATO tests when applying Firth/SPA correction; Improved SPA implementation by computing both tail probabilities; New option `--set-singletons` to specify variants to consider as singletons for burden masks; New option `--l1-phenoList` to run level 1 models in Step 1 in parallel across phenotypes; Several bug fixes)\n\n[Version 3.0.3](https://github.com/rgcgithub/regenie/releases/tag/v3.0.3) (Skip BTs where null model fit failed; Bug fix for BURDEN-ACAT; Bug fix when nan/inf values are in phenotype/covariate file)\n\n[Version 3.0.1](https://github.com/rgcgithub/regenie/releases/tag/v3.0.1) (Improve ridge logistic regression in Step 1; Add compilation with Cmake)\n\n[Version 3.0](https://github.com/rgcgithub/regenie/releases/tag/v3.0) (New gene-based tests: SKAT, SKATO, ACATV, ACATO and NNLS [Non-Negative Least Square test]; New GxE and GxG interaction testing functionality; New conditional analysis functionality; see [release page](https://github.com/rgcgithub/regenie/releases/tag/v3.0) for minor additions)\n\nFor past releases, see [here](RELEASE_LOG.md).\n"
  },
  {
    "path": "RELEASE_LOG.md",
    "content": "## Changelog for past releases\n\nVersion 2.2.4 (Bug fix for multi-trait step 1 run with binary traits of different missingness patterns)\n\nVersion 2.2.3 (Bug fix for binary traits for which null logistic regression gives 0/1 fitted probabilities; enabled multi-threaded null model fitting for approximate Firth null models)\n\nVersion 2.2.2 (Bug fix for binary traits for which null logistic regression gives 0/1 fitted probabilities [i.e. highly imbalanced or low case counts]; New option `--sex-specific` for sex-specific analyses)\n\nVersion 2.2.1 (Bug fix for Step 2 with binary traits for which null logistic regression gives 0/1 fitted probabilities [i.e. highly imbalanced or low case counts])\n\nVersion 2.2 (Faster implementation of Step 1 and 2 (see [here](https://rgcgithub.github.io/regenie/performance/#new-timings-improvements) for details); new options `--write-null-firth/--use-null-firth` to store the estimates from approximate Firth null model; new option `--minCaseCount` to filter out BTs with low number of cases from the analysis; new option `--no-split` to enforce output of summary stats to a single file for all traits; added support for tranposed phenotype file format with `--tphenoFile`)\n\nVersion 2.0.2 (Bug fix for burden testing with BGEN files not in v1.2 with 8-bit encoding; enabled faster step 2 implementation with Zstd compressed BGEN files in v1.2 with 8-bit encoding)\n\nVersion 2.0.1 (New option `--catCovList` to specify categorical covariates; Enabled parameter expansion when specifying select phenotypes/covariates to analyze [e.g. 'PC{1:10}'])\n\nVersion 2.0 (Added burden testing functionality for region or gene-based tests [see [website](https://rgcgithub.github.io/regenie/options/#burden-testing) for details]; added sample size column in summary stats output).\n\nVersion 1.0.7 (Enabled for level 0 models in step 1 to be run in parallel [see [Wiki](https://github.com/rgcgithub/regenie/wiki/Further-parallelization-for-level-0-models-in-Step-1) for details]).\n\nVersion 1.0.6.9 (Improved step 2 for BGEN format files not in v1.2 or 8-bit encoding).\n\nVersion 1.0.6.8 (New option `--range` to specify a chromosome region of variants to test in step 2).\n\nVersion 1.0.6.7 (New option `--print-prs` in step 1 to print the whole genome predictions (i.e. PRS) without using LOCO; \nnew flag `--use-prs` in step 2 to use these in the association tests).\n\nVersion 1.0.6.6 (Fixed MAC calculation for variants on sex chromosomes when sex information is available in the genotype file).\n\nVersion 1.0.6.5 (Enabled options `--extract/--exclude` in step 2).\n\nVersion 1.0.6.4 (New option `--minINFO` to filter imputed variants in Step 2; added Regenie binary compiled with Intel MKL (only for x86_64 Linux)).\n\nVersion 1.0.6.3 (Improved ridge logistic regression to avoid convergence issues in step 1 with low case-count traits).\n\nVersion 1.0.6.2 (New option `--ref-first` to use the first allele for each variant as the reference allele for BGEN or PLINK bed/bim/fam file input [default is to use the last allele as the reference]).\n\nVersion 1.0.6.1 (Bug fix: Mach R^2 info score is only printed for PGEN input when dosages are present; added flag `--print-pheno` to write the phenotype name in 1st line of sample IDs file [i.e. when using `--write-samples`]).\n\nVersion 1.0.6.0 (Improved logistic regression implementation to address convergence issues with low case counts; add new option `--firth-se` to compute SE using effect size estimate and LRT p-value when using Firth correction).\n\n\nVersion 1.0.5.9 (Fixed bug printing variant ID  when variant with variance = 0 occurs in step 1).\n\nVersion 1.0.5.8 (Fixed bug due to input genotype file not sorted by chromosome and one of options `--extract/--exclude/--chr/--chrList` is used).\n\nVersion 1.0.5.7 (New option `--with-bgi` to read variant information from a .bgi index file for BGEN input format; added option `--write-samples` to write IDs of samples analyzed for each trait in step 2; added Mach Rsq imputation quality metric in INFO column for step 2 with PGEN input file format).\n\nVersion 1.0.5.6 (Enabled output of LOCO predictions files and association result files in gzip compressed format using option `--gz` [requires compiling with Boost Iostream library]; added automatic removal from the analysis of genotyped samples in step 2 not present in the LOCO prediction files from step 1 [done separately for each trait]).\n\nVersion 1.0.5.5 (fixed bug when setting the total number of blocks [the bug was introduced in v1.0.5.3 due to `n_blocks` being uninitialized]; addressed bug in step 1 with boost filesystem on some machines due to invalid LC_ALL locale) (Note: can now build docker image using `make docker-build`).\n\nVersion 1.0.5.4 (Enable using gzip compressed phenotype/covariate files as input [requires installed Boost Iostream library and setting `HAS_BOOST_IOSTREAM = 1` in Makefile] )\n\nVersion 1.0.5.31 (Print out ID of problematic variants with low variance in step 1)\n\nVersion 1.0.5.3 (Use cxxopts header-only library to parse command line arguments; changed program options `--p/--c/--b/--o/--1` to `--phenoFile/--covarFile/--bsize/--out/--cc12`, respectively; added options `--lowmem-prefix/--pThresh`)\n\nVersion 1.0.5.2 (Changed default behavior to remove individuals who have missing data at all phenotypes in the analysis; absolute paths are written in the predictions list file created in step 1)\n\nVersion 1.0.5.1 (Reduced memory usage and computational time when using options to keep/remove genotyped samples from the analysis)\n\nVersion 1.0.4.2 (Fixed bug excluding/including variants in step 1 with PGEN input format and improved the implementation of how it's done)\n\nVersion 1.0.4.1 (Can specify multiple phenotypes/covariates/chromosomes using comma separated arguments; chromosome names can start with 'chr' in the input genotype file)\n\nVersion 1.0.4 (Enabled PLINK 2.0 PGEN format files as input using the PLINK 2.0 PGEN library)\n\nVersion 1.0.3 (fixed genotype coding in dominant/recessive test for BGEN input format)\n\nVersion 1.0.2 (fixed numerical overflow bug when using option `--chr` in step 2; changed to boost split function to read all input files [either space/tab delimited])\n\nVersion 1.0.1 (fixed numerical overflow bug for quantile calculation; added new strategy for fitting null model for approximate Firth test) \n\nVersion 1.0 (22 June 2020): Initial release\n\n\n\n"
  },
  {
    "path": "VERSION",
    "content": "4.1.2\n"
  },
  {
    "path": "docs/cinder/404.html",
    "content": "{% extends \"base.html\" %}\n\n{% block content %}\n\n    <div class=\"row-fluid\">\n      <div id=\"main-content\" class=\"span12\">\n        <h1 id=\"404-page-not-found\" style=\"text-align: center\">404</h1>\n        <p style=\"text-align: center\"><strong>Page not found</strong></p>\n        <p style=\"text-align: center\"><a href=\"{{ base_url }}\">Home</a></p>\n      </div>\n    </div>\n\n{% endblock %}\n"
  },
  {
    "path": "docs/cinder/__init__.py",
    "content": ""
  },
  {
    "path": "docs/cinder/base.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    {% if config.site_description %}<meta name=\"description\" content=\"{{ config.site_description }}\">{% endif %}\n    {% if config.site_author %}<meta name=\"author\" content=\"{{ config.site_author }}\">{% endif %}\n    {% if page.canonical_url %}<link rel=\"canonical\" href=\"{{ page.canonical_url }}\">{% endif %}\n    <link rel=\"shortcut icon\" href=\"{{ 'img/favicon.ico'|url }}\">\n\n    {% block htmltitle %}\n    <title>{% if page.title %}{{ page.title }} - {% endif %}{{ config.site_name }}</title>\n    {% endblock %}\n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"{{ 'css/bootstrap-custom.min.css'|url }}\" rel=\"stylesheet\">\n    <link href=\"{{ 'css/base.min.css'|url }}\" rel=\"stylesheet\">\n    <link href=\"{{ 'css/cinder.min.css'|url }}\" rel=\"stylesheet\">\n\n    {% if config.theme.highlightjs %}\n        {% if config.theme.colorscheme %}\n        <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/gh/highlightjs/cdn-release@9.18.0/build/styles/{{ config.theme.colorscheme }}.min.css\">\n        {% else %}\n        <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/gh/highlightjs/cdn-release@9.18.0/build/styles/github.min.css\">\n        {% endif %}\n    {% endif %}\n\n    {%- for path in config['extra_css'] %}\n    <link href=\"{{ path|url }}\" rel=\"stylesheet\">\n    {%- endfor %}\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    {% if config.google_analytics %}\n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', '{{ config.google_analytics[0] }}', '{{ config.google_analytics[1] }}');\n    ga('send', 'pageview');\n    </script>\n    {% endif %}\n\n    {% block extrahead %} {% endblock %}\n</head>\n\n<body{% if page and page.is_homepage %} class=\"homepage\" {% endif %}>\n\n    {% include \"nav.html\" %}\n\n    <div class=\"container\">\n        {% block content %}\n        {% if page.meta.disable_toc %}\n        <div class=\"col-md-12\" role=\"main\">{% include \"content.html\" %}</div>\n        {% else %}\n        <div class=\"col-md-3\">{% include \"toc.html\" %}</div>\n        <div class=\"col-md-9\" role=\"main\">{% include \"content.html\" %}</div>\n        {% endif %}\n        {% endblock %}\n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        {% block footer %}\n        <hr>\n        <p>{% if config.copyright %}\n        <small>{{ config.copyright }}</small><br>\n        {% endif %}\n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        {% if page and page.meta.revision_date %}<br>\n        <small>Revised on: {{ page.meta.revision_date }}</small>\n        {% endif %}\n        {% endblock %}\n    </footer>\n\n    {%- block scripts %}\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"{{ 'js/bootstrap-3.0.3.min.js'|url }}\"></script>\n    {% if config.theme.highlightjs %}\n    <script src=\"//cdn.jsdelivr.net/gh/highlightjs/cdn-release@9.18.0/build/highlight.min.js\"></script>\n    {%- for lang in config.theme.hljs_languages %}\n    <script src=\"//cdn.jsdelivr.net/gh/highlightjs/cdn-release@9.18.0/build/languages/{{lang}}.min.js\"></script>\n    {%- endfor %}\n    <script>hljs.initHighlightingOnLoad();</script>\n    {% endif %}\n    <script>var base_url = {{ base_url | tojson }}</script>\n    {% if config.shortcuts %}\n        <script>var shortcuts = {{ config.shortcuts | tojson }}</script>\n    {% endif %}\n    <script src=\"{{ 'js/base.js'|url }}\"></script>\n    {%- for path in config['extra_javascript'] %}\n    <script src=\"{{ path|url }}\"></script>\n    {%- endfor %}\n    {%- endblock %}\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    {% if 'search' in config['plugins'] %}{%- include \"search-modal.html\" %}{% endif %}\n    {%- include \"keyboard-modal.html\" %}\n    </body>\n\n\n</html>\n{% if page and page.is_homepage %}\n<!--\nMkDocs version : {{ mkdocs_version }}\nBuild Date UTC : {{ build_date_utc }}\n-->\n{% endif %}\n"
  },
  {
    "path": "docs/cinder/content.html",
    "content": "{% if page.meta.source %}\n<div class=\"source-links\">\n    {% for filename in page.meta.source %}\n    <span class=\"label label-primary\">{{ filename }}</span>\n    {% endfor %}\n</div>\n{% endif %}\n\n{{ page.content }}\n"
  },
  {
    "path": "docs/cinder/css/base.css",
    "content": "body {\n    padding-top: 70px;\n}\n\nh1[id]:before, h2[id]:before, h3[id]:before, h4[id]:before, h5[id]:before, h6[id]:before {\n    content: \"\";\n    display: block;\n    margin-top: -75px;\n    height: 75px;\n}\n\np > img {\n    max-width: 100%;\n    height: auto;\n}\n\nul.nav li.first-level {\n    font-weight: bold;\n}\n\nul.nav li.third-level {\n    padding-left: 12px;\n}\n\ndiv.col-md-3 {\n    padding-left: 0;\n}\n\ndiv.col-md-9 {\n    padding-bottom: 100px;\n}\n\ndiv.source-links {\n    float: right;\n}\n\n/*\n * Side navigation\n *\n * Scrollspy and affixed enhanced navigation to highlight sections and secondary\n * sections of docs content.\n */\n\n/* By default it's not affixed in mobile views, so undo that */\n.bs-sidebar.affix {\n    position: static;\n}\n\n.bs-sidebar.well {\n    padding: 0;\n}\n\n/* First level of nav */\n.bs-sidenav {\n    margin-top: 30px;\n    margin-bottom: 30px;\n    padding-top:    10px;\n    padding-bottom: 10px;\n    border-radius: 5px;\n}\n\n/* All levels of nav */\n.bs-sidebar .nav > li > a {\n    display: block;\n    padding: 5px 20px;\n    z-index: 1;\n}\n.bs-sidebar .nav > li > a:hover,\n.bs-sidebar .nav > li > a:focus {\n    text-decoration: none;\n    border-right: 1px solid;\n}\n.bs-sidebar .nav > .active > a,\n.bs-sidebar .nav > .active:hover > a,\n.bs-sidebar .nav > .active:focus > a {\n    font-weight: bold;\n    background-color: transparent;\n    border-right: 1px solid;\n}\n\n/* Nav: second level (shown on .active) */\n.bs-sidebar .nav .nav {\n    display: none; /* Hide by default, but at >768px, show it */\n    margin-bottom: 8px;\n}\n.bs-sidebar .nav .nav > li > a {\n    padding-top:    3px;\n    padding-bottom: 3px;\n    padding-left: 30px;\n    font-size: 90%;\n}\n\n/* Show and affix the side nav when space allows it */\n@media (min-width: 992px) {\n    /* Allow the sidebar to scroll if it overflows the page. */\n    .bs-sidebar {\n        overflow-y: scroll;\n    }\n\n    .bs-sidebar .nav > .active > ul {\n        display: block;\n    }\n    /* Widen the fixed sidebar */\n    .bs-sidebar.affix,\n    .bs-sidebar.affix-bottom {\n        width: 213px;\n    }\n    .bs-sidebar.affix {\n        position: fixed; /* Undo the static from mobile first approach */\n        top: 80px;\n        max-height: calc(100% - 90px);\n    }\n    .bs-sidebar.affix-bottom {\n        position: absolute; /* Undo the static from mobile first approach */\n    }\n    .bs-sidebar.affix-bottom .bs-sidenav,\n    .bs-sidebar.affix .bs-sidenav {\n        margin-top: 0;\n        margin-bottom: 0;\n    }\n}\n@media (min-width: 1200px) {\n    /* Widen the fixed sidebar again */\n    .bs-sidebar.affix-bottom,\n    .bs-sidebar.affix {\n        width: 263px;\n    }\n}\n\n\n/* Added to support >2 level nav in drop down */\n\n.dropdown-submenu {\n    position: relative;\n}\n\n.dropdown-submenu>.dropdown-menu {\n    top: 0;\n    left: 100%;\n    margin-top: 0px;\n    margin-left: 0px;\n}\n\n.dropdown-submenu:hover>.dropdown-menu {\n    display: block;\n}\n\n.dropdown-submenu>a:after {\n    display: block;\n    content: \" \";\n    float: right;\n    width: 0;\n    height: 0;\n    border-color: transparent;\n    border-style: solid;\n    border-width: 5px 0 5px 5px;\n    border-left-color: #ccc;\n    margin-top: 5px;\n    margin-right: -10px;\n}\n\n.dropdown-submenu:hover>a:after {\n    border-left-color: #fff;\n}\n\n.dropdown-submenu.pull-left {\n    float: none;\n}\n\n.dropdown-submenu.pull-left>.dropdown-menu {\n    left: -100%;\n    margin-left: 00px;\n}\n/* Start Bootstrap Callouts CSS Source by Chris Pratt (https://codepen.io/chrisdpratt/pen/IAymB) MIT License*/\n.bs-callout {\n    padding: 20px;\n    margin: 20px 0;\n    border: 1px solid #eee;\n    border-left-width: 5px;\n    border-radius: 3px;\n    background-color: #FCFDFF;\n}\n.bs-callout h4 {\n    font-style: normal;\n    font-weight: 400;\n    margin-top: 0;\n    margin-bottom: 5px;\n}\n.bs-callout p:last-child {\n    margin-bottom: 0;\n}\n.bs-callout code {\n    border-radius: 3px;\n}\n.bs-callout+.bs-callout {\n    margin-top: -5px;\n}\n.bs-callout-default {\n    border-left-color: #FA023C; /*modified from upstream default by Christopher Simpkins*/\n}\n.bs-callout-default h4 {\n    color: #FA023C; /*modified from upstream default by Christopher Simpkins*/\n}\n.bs-callout-primary {\n    border-left-color: #428bca;\n}\n.bs-callout-primary h4 {\n    color: #428bca;\n}\n.bs-callout-success {\n    border-left-color: #5cb85c;\n}\n.bs-callout-success h4 {\n    color: #5cb85c;\n}\n.bs-callout-danger {\n    border-left-color: #d9534f;\n}\n.bs-callout-danger h4 {\n    color: #d9534f;\n}\n.bs-callout-warning {\n    border-left-color: #f0ad4e;\n}\n.bs-callout-warning h4 {\n    color: #f0ad4e;\n}\n.bs-callout-info {\n    border-left-color: #5bc0de;\n}\n.bs-callout-info h4 {\n    color: #5bc0de;\n}\n/* End Bootstrap Callouts CSS Source by Chris Pratt */\n\n/* Admonitions */\n.admonition {\n    padding: 20px;\n    margin: 20px 0;\n    border: 1px solid #eee;\n    border-left-width: 5px;\n    border-radius: 3px;\n    background-color: #FCFDFF;\n}\n\n.admonition p:last-child {\n    margin-bottom: 0;\n}\n.admonition code {\n    border-radius: 3px;\n}\n.admonition+.admonition {\n    margin-top: -5px;\n}\n\n.admonition.note { /* csslint allow: adjoining-classes */\n    border-left-color: #428bca;\n}\n\n.admonition.warning { /* csslint allow: adjoining-classes */\n    border-left-color: #f0ad4e;\n}\n\n.admonition.danger { /* csslint allow: adjoining-classes */\n    border-left-color: #d9534f;\n}\n\n.admonition-title {\n    font-size: 19px;\n    font-style: normal;\n    font-weight: 400;\n    margin-top: 0;\n    margin-bottom: 5px;\n}\n\n.admonition.note > .admonition-title {\n    color: #428bca;\n}\n\n.admonition.warning > .admonition-title {\n    color: #f0ad4e;\n}\n\n.admonition.danger > .admonition-title {\n    color: #d9534f;\n}\n"
  },
  {
    "path": "docs/cinder/css/bootstrap-custom.css",
    "content": "/*! normalize.css v2.1.3 | MIT License | git.io/normalize */\narticle, aside, details, figcaption, figure, footer, header, hgroup, main, nav, section, summary {\n    display: block;\n}\naudio, canvas, video {\n    display: inline-block;\n}\naudio:not([controls]) {\n    display: none;\n    height: 0;\n}\n[hidden], template {\n    display: none;\n}\nhtml {\n    font-family: sans-serif;\n    -webkit-text-size-adjust: 100%;\n    -ms-text-size-adjust: 100%}\nbody {\n    margin: 0;\n}\na {\n    background: transparent;\n}\na:focus {\n    outline: thin dotted;\n}\na:active, a:hover {\n    outline: 0;\n}\nh1 {\n    margin: .67em 0;\n    font-size: 2em;\n}\nabbr[title] {\n    border-bottom: 1px dotted;\n}\nb, strong {\n    font-weight: bold;\n}\ndfn {\n    font-style: italic;\n}\nhr {\n    height: 0;\n    -moz-box-sizing: content-box;\n    box-sizing: content-box;\n}\nmark {\n    color: #000;\n    background: #ff0;\n}\ncode, kbd, pre, samp {\n    font-family: Hack, monospace, serif;\n    font-size: 1em;\n}\npre {\n    white-space: pre-wrap;\n}\nq {\n    quotes: \"\\201C\" \"\\201D\" \"\\2018\" \"\\2019\"}\nsmall {\n    font-size: 80%}\nsub, sup {\n    position: relative;\n    font-size: 75%;\n    line-height: 0;\n    vertical-align: baseline;\n}\nsup {\n    top: -0.5em;\n}\nsub {\n    bottom: -0.25em;\n}\nimg {\n    border: 0;\n}\nsvg:not(:root) {\n    overflow: hidden;\n}\nfigure {\n    margin: 0;\n}\nfieldset {\n    padding: .35em .625em .75em;\n    margin: 0 2px;\n    border: 1px solid #c0c0c0;\n}\nlegend {\n    padding: 0;\n    border: 0;\n}\nbutton, input, select, textarea {\n    margin: 0;\n    font-family: inherit;\n    font-size: 100%}\nbutton, input {\n    line-height: normal;\n}\nbutton, select {\n    text-transform: none;\n}\nbutton, html input[type=\"button\"], input[type=\"reset\"], input[type=\"submit\"] {\n    cursor: pointer;\n    -webkit-appearance: button;\n}\nbutton[disabled], html input[disabled] {\n    cursor: default;\n}\ninput[type=\"checkbox\"], input[type=\"radio\"] {\n    padding: 0;\n    box-sizing: border-box;\n}\ninput[type=\"search\"] {\n    -webkit-box-sizing: content-box;\n    -moz-box-sizing: content-box;\n    box-sizing: content-box;\n    -webkit-appearance: textfield;\n}\ninput[type=\"search\"]::-webkit-search-cancel-button, input[type=\"search\"]::-webkit-search-decoration {\n    -webkit-appearance: none;\n}\nbutton::-moz-focus-inner, input::-moz-focus-inner {\n    padding: 0;\n    border: 0;\n}\ntextarea {\n    overflow: auto;\n    vertical-align: top;\n}\ntable {\n    border-collapse: collapse;\n    border-spacing: 0;\n}\n@media print {\n    * {\n    color: #000!important;\n    text-shadow: none!important;\n    background: transparent!important;\n    box-shadow: none!important;\n}\na, a:visited {\n    text-decoration: underline;\n}\na[href]:after {\n    content: \" (\" attr(href) \")\"}\nabbr[title]:after {\n    content: \" (\" attr(title) \")\"}\na[href^=\"javascript:\"]:after, a[href^=\"#\"]:after {\n    content: \"\"}\npre, blockquote {\n    border: 1px solid #999;\n    page-break-inside: avoid;\n}\nthead {\n    display: table-header-group;\n}\ntr, img {\n    page-break-inside: avoid;\n}\nimg {\n    max-width: 100%!important;\n}\n@page {\n    margin: 2cm .5cm;\n}\np, h2, h3 {\n    orphans: 3;\n    widows: 3;\n}\nh2, h3 {\n    page-break-after: avoid;\n}\nselect {\n    background: #fff!important;\n}\n.navbar {\n    display: none;\n}\n.table td, .table th {\n    background-color: #fff!important;\n}\n.btn>.caret, .dropup>.btn>.caret {\n    border-top-color: #000!important;\n}\n.label {\n    border: 1px solid #000;\n}\n.table {\n    border-collapse: collapse!important;\n}\n.table-bordered th, .table-bordered td {\n    border: 1px solid #ddd!important;\n}\n}*, *:before, *:after {\n    -webkit-box-sizing: border-box;\n    -moz-box-sizing: border-box;\n    box-sizing: border-box;\n}\nhtml {\n    font-size: 62.5%;\n    -webkit-tap-highlight-color: rgba(0, 0, 0, 0);\n}\nbody {\n    font-family: Merriweather, Georgia, serif;\n    font-size: 14px;\n    line-height: 1.428571429;\n    color: #222;\n    background-color: #fff;\n}\ninput, button, select, textarea {\n    font-family: inherit;\n    font-size: inherit;\n    line-height: inherit;\n}\na {\n    color: #008cba;\n    text-decoration: none;\n}\na:hover, a:focus {\n    color: #00526e;\n    text-decoration: underline;\n}\na:focus {\n    outline: thin dotted;\n    outline: 5px auto -webkit-focus-ring-color;\n    outline-offset: -2px;\n}\nimg {\n    vertical-align: middle;\n}\n.img-responsive {\n    display: block;\n    height: auto;\n    max-width: 100%}\n.img-rounded {\n    border-radius: 0;\n}\n.img-thumbnail {\n    display: inline-block;\n    height: auto;\n    max-width: 100%;\n    padding: 4px;\n    line-height: 1.428571429;\n    background-color: #fff;\n    border: 1px solid #ddd;\n    border-radius: 0;\n    -webkit-transition: all .2s ease-in-out;\n    transition: all .2s ease-in-out;\n}\n.img-circle {\n    border-radius: 50%}\nhr {\n    margin-top: 21px;\n    margin-bottom: 21px;\n    border: 0;\n    border-top: 1px solid #ddd;\n}\n.sr-only {\n    position: absolute;\n    width: 1px;\n    height: 1px;\n    padding: 0;\n    margin: -1px;\n    overflow: hidden;\n    clip: rect(0, 0, 0, 0);\n    border: 0;\n}\nh1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {\n    font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-weight: 300;\n    line-height: 1.1;\n    color: inherit;\n}\nh1 small, h2 small, h3 small, h4 small, h5 small, h6 small, .h1 small, .h2 small, .h3 small, .h4 small, .h5 small, .h6 small, h1 .small, h2 .small, h3 .small, h4 .small, h5 .small, h6 .small, .h1 .small, .h2 .small, .h3 .small, .h4 .small, .h5 .small, .h6 .small {\n    font-weight: normal;\n    line-height: 1;\n    color: #999;\n}\nh1, h2, h3 {\n    margin-top: 21px;\n    margin-bottom: 10.5px;\n}\nh1 small, h2 small, h3 small, h1 .small, h2 .small, h3 .small {\n    font-size: 65%}\nh4, h5, h6 {\n    margin-top: 10.5px;\n    margin-bottom: 10.5px;\n}\nh4 small, h5 small, h6 small, h4 .small, h5 .small, h6 .small {\n    font-size: 75%}\nh1, .h1 {\n    font-size: 39px;\n}\nh2, .h2 {\n    font-size: 32px;\n}\nh3, .h3 {\n    font-size: 26px;\n}\nh4, .h4 {\n    font-size: 19px;\n}\nh5, .h5 {\n    font-size: 15px;\n}\nh6, .h6 {\n    font-size: 13px;\n}\np {\n    margin: 0 0 10.5px;\n}\n.lead {\n    margin-bottom: 21px;\n    font-size: 17px;\n    font-weight: 200;\n    line-height: 1.4;\n}\n@media(min-width:768px) {\n    .lead {\n    font-size: 22.5px;\n}\n}small, .small {\n    font-size: 85%}\ncite {\n    font-style: normal;\n}\n.text-muted {\n    color: #999;\n}\n.text-primary {\n    color: #008cba;\n}\n.text-primary:hover {\n    color: #006687;\n}\n.text-warning {\n    color: #e99002;\n}\n.text-warning:hover {\n    color: #b67102;\n}\n.text-danger {\n    color: #f04124;\n}\n.text-danger:hover {\n    color: #d32a0e;\n}\n.text-success {\n    color: #43ac6a;\n}\n.text-success:hover {\n    color: #358753;\n}\n.text-info {\n    color: #5bc0de;\n}\n.text-info:hover {\n    color: #31b0d5;\n}\n.text-left {\n    text-align: left;\n}\n.text-right {\n    text-align: right;\n}\n.text-center {\n    text-align: center;\n}\n.page-header {\n    padding-bottom: 9.5px;\n    margin: 42px 0 21px;\n    border-bottom: 1px solid #ddd;\n}\nul, ol {\n    margin-top: 0;\n    margin-bottom: 10.5px;\n}\nul ul, ol ul, ul ol, ol ol {\n    margin-bottom: 0;\n}\n.list-unstyled {\n    padding-left: 0;\n    list-style: none;\n}\n.list-inline {\n    padding-left: 0;\n    list-style: none;\n}\n.list-inline>li {\n    display: inline-block;\n    padding-right: 5px;\n    padding-left: 5px;\n}\n.list-inline>li:first-child {\n    padding-left: 0;\n}\ndl {\n    margin-top: 0;\n    margin-bottom: 21px;\n}\ndt, dd {\n    line-height: 1.428571429;\n}\ndt {\n    font-weight: bold;\n}\ndd {\n    margin-left: 0;\n}\n@media(min-width:768px) {\n    .dl-horizontal dt {\n    float: left;\n    width: 160px;\n    overflow: hidden;\n    clear: left;\n    text-align: right;\n    text-overflow: ellipsis;\n    white-space: nowrap;\n}\n.dl-horizontal dd {\n    margin-left: 180px;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n}abbr[title], abbr[data-original-title] {\n    cursor: help;\n    border-bottom: 1px dotted #999;\n}\n.initialism {\n    font-size: 90%;\n    text-transform: uppercase;\n}\nblockquote {\n    padding: 10.5px 21px;\n    margin: 0 0 21px;\n    border-left: 5px solid #ddd;\n}\nblockquote p {\n    font-size: 18.75px;\n    font-weight: 300;\n    line-height: 1.25;\n}\nblockquote p:last-child {\n    margin-bottom: 0;\n}\nblockquote small, blockquote .small {\n    display: block;\n    line-height: 1.428571429;\n    color: #6f6f6f;\n}\nblockquote small:before, blockquote .small:before {\n    content: '\\2014 \\00A0'}\nblockquote.pull-right {\n    padding-right: 15px;\n    padding-left: 0;\n    border-right: 5px solid #ddd;\n    border-left: 0;\n}\nblockquote.pull-right p, blockquote.pull-right small, blockquote.pull-right .small {\n    text-align: right;\n}\nblockquote.pull-right small:before, blockquote.pull-right .small:before {\n    content: ''}\nblockquote.pull-right small:after, blockquote.pull-right .small:after {\n    content: '\\00A0 \\2014'}\nblockquote:before, blockquote:after {\n    content: \"\"}\naddress {\n    margin-bottom: 21px;\n    font-style: normal;\n    line-height: 1.428571429;\n}\ncode, kbd, pre, samp {\n    font-family: Hack, Menlo, Monaco, Consolas, \"Courier New\", monospace;\n}\ncode {\n    padding: 2px 4px;\n    font-size: 90%;\n    color: #c7254e;\n    white-space: nowrap;\n    background-color: #f9f2f4;\n    border-radius: 0;\n}\npre {\n    display: block;\n    padding: 10px;\n    margin: 0 0 10.5px;\n    font-size: 14px;\n    line-height: 1.428571429;\n    color: #333;\n    word-break: break-all;\n    word-wrap: break-word;\n    background-color: #f5f5f5;\n    border: 1px solid #ccc;\n    border-radius: 0;\n}\npre code {\n    padding: 0;\n    font-size: inherit;\n    color: inherit;\n    white-space: pre-wrap;\n    background-color: transparent;\n    border-radius: 0;\n}\n.pre-scrollable {\n    max-height: 340px;\n    overflow-y: scroll;\n}\n.container {\n    padding-right: 15px;\n    padding-left: 15px;\n    margin-right: auto;\n    margin-left: auto;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n@media(min-width:768px) {\n    .container {\n    width: 750px;\n}\n}@media(min-width:992px) {\n    .container {\n    width: 970px;\n}\n}@media(min-width:1200px) {\n    .container {\n    width: 1170px;\n}\n}.row {\n    margin-right: -15px;\n    margin-left: -15px;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.col-xs-1, .col-sm-1, .col-md-1, .col-lg-1, .col-xs-2, .col-sm-2, .col-md-2, .col-lg-2, .col-xs-3, .col-sm-3, .col-md-3, .col-lg-3, .col-xs-4, .col-sm-4, .col-md-4, .col-lg-4, .col-xs-5, .col-sm-5, .col-md-5, .col-lg-5, .col-xs-6, .col-sm-6, .col-md-6, .col-lg-6, .col-xs-7, .col-sm-7, .col-md-7, .col-lg-7, .col-xs-8, .col-sm-8, .col-md-8, .col-lg-8, .col-xs-9, .col-sm-9, .col-md-9, .col-lg-9, .col-xs-10, .col-sm-10, .col-md-10, .col-lg-10, .col-xs-11, .col-sm-11, .col-md-11, .col-lg-11, .col-xs-12, .col-sm-12, .col-md-12, .col-lg-12 {\n    position: relative;\n    min-height: 1px;\n    padding-right: 15px;\n    padding-left: 15px;\n}\n.col-xs-1, .col-xs-2, .col-xs-3, .col-xs-4, .col-xs-5, .col-xs-6, .col-xs-7, .col-xs-8, .col-xs-9, .col-xs-10, .col-xs-11, .col-xs-12 {\n    float: left;\n}\n.col-xs-12 {\n    width: 100%}\n.col-xs-11 {\n    width: 91.66666666666666%}\n.col-xs-10 {\n    width: 83.33333333333334%}\n.col-xs-9 {\n    width: 75%}\n.col-xs-8 {\n    width: 66.66666666666666%}\n.col-xs-7 {\n    width: 58.333333333333336%}\n.col-xs-6 {\n    width: 50%}\n.col-xs-5 {\n    width: 41.66666666666667%}\n.col-xs-4 {\n    width: 33.33333333333333%}\n.col-xs-3 {\n    width: 25%}\n.col-xs-2 {\n    width: 16.666666666666664%}\n.col-xs-1 {\n    width: 8.333333333333332%}\n.col-xs-pull-12 {\n    right: 100%}\n.col-xs-pull-11 {\n    right: 91.66666666666666%}\n.col-xs-pull-10 {\n    right: 83.33333333333334%}\n.col-xs-pull-9 {\n    right: 75%}\n.col-xs-pull-8 {\n    right: 66.66666666666666%}\n.col-xs-pull-7 {\n    right: 58.333333333333336%}\n.col-xs-pull-6 {\n    right: 50%}\n.col-xs-pull-5 {\n    right: 41.66666666666667%}\n.col-xs-pull-4 {\n    right: 33.33333333333333%}\n.col-xs-pull-3 {\n    right: 25%}\n.col-xs-pull-2 {\n    right: 16.666666666666664%}\n.col-xs-pull-1 {\n    right: 8.333333333333332%}\n.col-xs-pull-0 {\n    right: 0;\n}\n.col-xs-push-12 {\n    left: 100%}\n.col-xs-push-11 {\n    left: 91.66666666666666%}\n.col-xs-push-10 {\n    left: 83.33333333333334%}\n.col-xs-push-9 {\n    left: 75%}\n.col-xs-push-8 {\n    left: 66.66666666666666%}\n.col-xs-push-7 {\n    left: 58.333333333333336%}\n.col-xs-push-6 {\n    left: 50%}\n.col-xs-push-5 {\n    left: 41.66666666666667%}\n.col-xs-push-4 {\n    left: 33.33333333333333%}\n.col-xs-push-3 {\n    left: 25%}\n.col-xs-push-2 {\n    left: 16.666666666666664%}\n.col-xs-push-1 {\n    left: 8.333333333333332%}\n.col-xs-push-0 {\n    left: 0;\n}\n.col-xs-offset-12 {\n    margin-left: 100%}\n.col-xs-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-xs-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-xs-offset-9 {\n    margin-left: 75%}\n.col-xs-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-xs-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-xs-offset-6 {\n    margin-left: 50%}\n.col-xs-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-xs-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-xs-offset-3 {\n    margin-left: 25%}\n.col-xs-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-xs-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-xs-offset-0 {\n    margin-left: 0;\n}\n@media(min-width:768px) {\n    .col-sm-1, .col-sm-2, .col-sm-3, .col-sm-4, .col-sm-5, .col-sm-6, .col-sm-7, .col-sm-8, .col-sm-9, .col-sm-10, .col-sm-11, .col-sm-12 {\n    float: left;\n}\n.col-sm-12 {\n    width: 100%}\n.col-sm-11 {\n    width: 91.66666666666666%}\n.col-sm-10 {\n    width: 83.33333333333334%}\n.col-sm-9 {\n    width: 75%}\n.col-sm-8 {\n    width: 66.66666666666666%}\n.col-sm-7 {\n    width: 58.333333333333336%}\n.col-sm-6 {\n    width: 50%}\n.col-sm-5 {\n    width: 41.66666666666667%}\n.col-sm-4 {\n    width: 33.33333333333333%}\n.col-sm-3 {\n    width: 25%}\n.col-sm-2 {\n    width: 16.666666666666664%}\n.col-sm-1 {\n    width: 8.333333333333332%}\n.col-sm-pull-12 {\n    right: 100%}\n.col-sm-pull-11 {\n    right: 91.66666666666666%}\n.col-sm-pull-10 {\n    right: 83.33333333333334%}\n.col-sm-pull-9 {\n    right: 75%}\n.col-sm-pull-8 {\n    right: 66.66666666666666%}\n.col-sm-pull-7 {\n    right: 58.333333333333336%}\n.col-sm-pull-6 {\n    right: 50%}\n.col-sm-pull-5 {\n    right: 41.66666666666667%}\n.col-sm-pull-4 {\n    right: 33.33333333333333%}\n.col-sm-pull-3 {\n    right: 25%}\n.col-sm-pull-2 {\n    right: 16.666666666666664%}\n.col-sm-pull-1 {\n    right: 8.333333333333332%}\n.col-sm-pull-0 {\n    right: 0;\n}\n.col-sm-push-12 {\n    left: 100%}\n.col-sm-push-11 {\n    left: 91.66666666666666%}\n.col-sm-push-10 {\n    left: 83.33333333333334%}\n.col-sm-push-9 {\n    left: 75%}\n.col-sm-push-8 {\n    left: 66.66666666666666%}\n.col-sm-push-7 {\n    left: 58.333333333333336%}\n.col-sm-push-6 {\n    left: 50%}\n.col-sm-push-5 {\n    left: 41.66666666666667%}\n.col-sm-push-4 {\n    left: 33.33333333333333%}\n.col-sm-push-3 {\n    left: 25%}\n.col-sm-push-2 {\n    left: 16.666666666666664%}\n.col-sm-push-1 {\n    left: 8.333333333333332%}\n.col-sm-push-0 {\n    left: 0;\n}\n.col-sm-offset-12 {\n    margin-left: 100%}\n.col-sm-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-sm-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-sm-offset-9 {\n    margin-left: 75%}\n.col-sm-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-sm-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-sm-offset-6 {\n    margin-left: 50%}\n.col-sm-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-sm-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-sm-offset-3 {\n    margin-left: 25%}\n.col-sm-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-sm-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-sm-offset-0 {\n    margin-left: 0;\n}\n}@media(min-width:992px) {\n    .col-md-1, .col-md-2, .col-md-3, .col-md-4, .col-md-5, .col-md-6, .col-md-7, .col-md-8, .col-md-9, .col-md-10, .col-md-11, .col-md-12 {\n    float: left;\n}\n.col-md-12 {\n    width: 100%}\n.col-md-11 {\n    width: 91.66666666666666%}\n.col-md-10 {\n    width: 83.33333333333334%}\n.col-md-9 {\n    width: 75%}\n.col-md-8 {\n    width: 66.66666666666666%}\n.col-md-7 {\n    width: 58.333333333333336%}\n.col-md-6 {\n    width: 50%}\n.col-md-5 {\n    width: 41.66666666666667%}\n.col-md-4 {\n    width: 33.33333333333333%}\n.col-md-3 {\n    width: 25%}\n.col-md-2 {\n    width: 16.666666666666664%}\n.col-md-1 {\n    width: 8.333333333333332%}\n.col-md-pull-12 {\n    right: 100%}\n.col-md-pull-11 {\n    right: 91.66666666666666%}\n.col-md-pull-10 {\n    right: 83.33333333333334%}\n.col-md-pull-9 {\n    right: 75%}\n.col-md-pull-8 {\n    right: 66.66666666666666%}\n.col-md-pull-7 {\n    right: 58.333333333333336%}\n.col-md-pull-6 {\n    right: 50%}\n.col-md-pull-5 {\n    right: 41.66666666666667%}\n.col-md-pull-4 {\n    right: 33.33333333333333%}\n.col-md-pull-3 {\n    right: 25%}\n.col-md-pull-2 {\n    right: 16.666666666666664%}\n.col-md-pull-1 {\n    right: 8.333333333333332%}\n.col-md-pull-0 {\n    right: 0;\n}\n.col-md-push-12 {\n    left: 100%}\n.col-md-push-11 {\n    left: 91.66666666666666%}\n.col-md-push-10 {\n    left: 83.33333333333334%}\n.col-md-push-9 {\n    left: 75%}\n.col-md-push-8 {\n    left: 66.66666666666666%}\n.col-md-push-7 {\n    left: 58.333333333333336%}\n.col-md-push-6 {\n    left: 50%}\n.col-md-push-5 {\n    left: 41.66666666666667%}\n.col-md-push-4 {\n    left: 33.33333333333333%}\n.col-md-push-3 {\n    left: 25%}\n.col-md-push-2 {\n    left: 16.666666666666664%}\n.col-md-push-1 {\n    left: 8.333333333333332%}\n.col-md-push-0 {\n    left: 0;\n}\n.col-md-offset-12 {\n    margin-left: 100%}\n.col-md-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-md-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-md-offset-9 {\n    margin-left: 75%}\n.col-md-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-md-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-md-offset-6 {\n    margin-left: 50%}\n.col-md-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-md-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-md-offset-3 {\n    margin-left: 25%}\n.col-md-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-md-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-md-offset-0 {\n    margin-left: 0;\n}\n}@media(min-width:1200px) {\n    .col-lg-1, .col-lg-2, .col-lg-3, .col-lg-4, .col-lg-5, .col-lg-6, .col-lg-7, .col-lg-8, .col-lg-9, .col-lg-10, .col-lg-11, .col-lg-12 {\n    float: left;\n}\n.col-lg-12 {\n    width: 100%}\n.col-lg-11 {\n    width: 91.66666666666666%}\n.col-lg-10 {\n    width: 83.33333333333334%}\n.col-lg-9 {\n    width: 75%}\n.col-lg-8 {\n    width: 66.66666666666666%}\n.col-lg-7 {\n    width: 58.333333333333336%}\n.col-lg-6 {\n    width: 50%}\n.col-lg-5 {\n    width: 41.66666666666667%}\n.col-lg-4 {\n    width: 33.33333333333333%}\n.col-lg-3 {\n    width: 25%}\n.col-lg-2 {\n    width: 16.666666666666664%}\n.col-lg-1 {\n    width: 8.333333333333332%}\n.col-lg-pull-12 {\n    right: 100%}\n.col-lg-pull-11 {\n    right: 91.66666666666666%}\n.col-lg-pull-10 {\n    right: 83.33333333333334%}\n.col-lg-pull-9 {\n    right: 75%}\n.col-lg-pull-8 {\n    right: 66.66666666666666%}\n.col-lg-pull-7 {\n    right: 58.333333333333336%}\n.col-lg-pull-6 {\n    right: 50%}\n.col-lg-pull-5 {\n    right: 41.66666666666667%}\n.col-lg-pull-4 {\n    right: 33.33333333333333%}\n.col-lg-pull-3 {\n    right: 25%}\n.col-lg-pull-2 {\n    right: 16.666666666666664%}\n.col-lg-pull-1 {\n    right: 8.333333333333332%}\n.col-lg-pull-0 {\n    right: 0;\n}\n.col-lg-push-12 {\n    left: 100%}\n.col-lg-push-11 {\n    left: 91.66666666666666%}\n.col-lg-push-10 {\n    left: 83.33333333333334%}\n.col-lg-push-9 {\n    left: 75%}\n.col-lg-push-8 {\n    left: 66.66666666666666%}\n.col-lg-push-7 {\n    left: 58.333333333333336%}\n.col-lg-push-6 {\n    left: 50%}\n.col-lg-push-5 {\n    left: 41.66666666666667%}\n.col-lg-push-4 {\n    left: 33.33333333333333%}\n.col-lg-push-3 {\n    left: 25%}\n.col-lg-push-2 {\n    left: 16.666666666666664%}\n.col-lg-push-1 {\n    left: 8.333333333333332%}\n.col-lg-push-0 {\n    left: 0;\n}\n.col-lg-offset-12 {\n    margin-left: 100%}\n.col-lg-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-lg-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-lg-offset-9 {\n    margin-left: 75%}\n.col-lg-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-lg-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-lg-offset-6 {\n    margin-left: 50%}\n.col-lg-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-lg-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-lg-offset-3 {\n    margin-left: 25%}\n.col-lg-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-lg-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-lg-offset-0 {\n    margin-left: 0;\n}\n}table {\n    max-width: 100%;\n    background-color: transparent;\n}\nth {\n    text-align: left;\n}\n.table {\n    width: 100%;\n    margin-bottom: 21px;\n}\n.table>thead>tr>th, .table>tbody>tr>th, .table>tfoot>tr>th, .table>thead>tr>td, .table>tbody>tr>td, .table>tfoot>tr>td {\n    padding: 8px;\n    line-height: 1.428571429;\n    vertical-align: top;\n    border-top: 1px solid #ddd;\n}\n.table>thead>tr>th {\n    vertical-align: bottom;\n    border-bottom: 2px solid #ddd;\n}\n.table>caption+thead>tr:first-child>th, .table>colgroup+thead>tr:first-child>th, .table>thead:first-child>tr:first-child>th, .table>caption+thead>tr:first-child>td, .table>colgroup+thead>tr:first-child>td, .table>thead:first-child>tr:first-child>td {\n    border-top: 0;\n}\n.table>tbody+tbody {\n    border-top: 2px solid #ddd;\n}\n.table .table {\n    background-color: #fff;\n}\n.table-condensed>thead>tr>th, .table-condensed>tbody>tr>th, .table-condensed>tfoot>tr>th, .table-condensed>thead>tr>td, .table-condensed>tbody>tr>td, .table-condensed>tfoot>tr>td {\n    padding: 5px;\n}\n.table-bordered {\n    border: 1px solid #ddd;\n}\n.table-bordered>thead>tr>th, .table-bordered>tbody>tr>th, .table-bordered>tfoot>tr>th, .table-bordered>thead>tr>td, .table-bordered>tbody>tr>td, .table-bordered>tfoot>tr>td {\n    border: 1px solid #ddd;\n}\n.table-bordered>thead>tr>th, .table-bordered>thead>tr>td {\n    border-bottom-width: 2px;\n}\n.table-striped>tbody>tr:nth-child(odd)>td, .table-striped>tbody>tr:nth-child(odd)>th {\n    background-color: #f9f9f9;\n}\n.table-hover>tbody>tr:hover>td, .table-hover>tbody>tr:hover>th {\n    background-color: #f5f5f5;\n}\ntable col[class*=\"col-\"] {\n    position: static;\n    display: table-column;\n    float: none;\n}\ntable td[class*=\"col-\"], table th[class*=\"col-\"] {\n    display: table-cell;\n    float: none;\n}\n.table>thead>tr>.active, .table>tbody>tr>.active, .table>tfoot>tr>.active, .table>thead>.active>td, .table>tbody>.active>td, .table>tfoot>.active>td, .table>thead>.active>th, .table>tbody>.active>th, .table>tfoot>.active>th {\n    background-color: #f5f5f5;\n}\n.table-hover>tbody>tr>.active:hover, .table-hover>tbody>.active:hover>td, .table-hover>tbody>.active:hover>th {\n    background-color: #e8e8e8;\n}\n.table>thead>tr>.success, .table>tbody>tr>.success, .table>tfoot>tr>.success, .table>thead>.success>td, .table>tbody>.success>td, .table>tfoot>.success>td, .table>thead>.success>th, .table>tbody>.success>th, .table>tfoot>.success>th {\n    background-color: #dff0d8;\n}\n.table-hover>tbody>tr>.success:hover, .table-hover>tbody>.success:hover>td, .table-hover>tbody>.success:hover>th {\n    background-color: #d0e9c6;\n}\n.table>thead>tr>.danger, .table>tbody>tr>.danger, .table>tfoot>tr>.danger, .table>thead>.danger>td, .table>tbody>.danger>td, .table>tfoot>.danger>td, .table>thead>.danger>th, .table>tbody>.danger>th, .table>tfoot>.danger>th {\n    background-color: #f2dede;\n}\n.table-hover>tbody>tr>.danger:hover, .table-hover>tbody>.danger:hover>td, .table-hover>tbody>.danger:hover>th {\n    background-color: #ebcccc;\n}\n.table>thead>tr>.warning, .table>tbody>tr>.warning, .table>tfoot>tr>.warning, .table>thead>.warning>td, .table>tbody>.warning>td, .table>tfoot>.warning>td, .table>thead>.warning>th, .table>tbody>.warning>th, .table>tfoot>.warning>th {\n    background-color: #fcf8e3;\n}\n.table-hover>tbody>tr>.warning:hover, .table-hover>tbody>.warning:hover>td, .table-hover>tbody>.warning:hover>th {\n    background-color: #faf2cc;\n}\n@media(max-width:767px) {\n    .table-responsive {\n    width: 100%;\n    margin-bottom: 15.75px;\n    overflow-x: scroll;\n    overflow-y: hidden;\n    border: 1px solid #ddd;\n    -ms-overflow-style: -ms-autohiding-scrollbar;\n    -webkit-overflow-scrolling: touch;\n}\n.table-responsive>.table {\n    margin-bottom: 0;\n}\n.table-responsive>.table>thead>tr>th, .table-responsive>.table>tbody>tr>th, .table-responsive>.table>tfoot>tr>th, .table-responsive>.table>thead>tr>td, .table-responsive>.table>tbody>tr>td, .table-responsive>.table>tfoot>tr>td {\n    white-space: nowrap;\n}\n.table-responsive>.table-bordered {\n    border: 0;\n}\n.table-responsive>.table-bordered>thead>tr>th:first-child, .table-responsive>.table-bordered>tbody>tr>th:first-child, .table-responsive>.table-bordered>tfoot>tr>th:first-child, .table-responsive>.table-bordered>thead>tr>td:first-child, .table-responsive>.table-bordered>tbody>tr>td:first-child, .table-responsive>.table-bordered>tfoot>tr>td:first-child {\n    border-left: 0;\n}\n.table-responsive>.table-bordered>thead>tr>th:last-child, .table-responsive>.table-bordered>tbody>tr>th:last-child, .table-responsive>.table-bordered>tfoot>tr>th:last-child, .table-responsive>.table-bordered>thead>tr>td:last-child, .table-responsive>.table-bordered>tbody>tr>td:last-child, .table-responsive>.table-bordered>tfoot>tr>td:last-child {\n    border-right: 0;\n}\n.table-responsive>.table-bordered>tbody>tr:last-child>th, .table-responsive>.table-bordered>tfoot>tr:last-child>th, .table-responsive>.table-bordered>tbody>tr:last-child>td, .table-responsive>.table-bordered>tfoot>tr:last-child>td {\n    border-bottom: 0;\n}\n}fieldset {\n    padding: 0;\n    margin: 0;\n    border: 0;\n}\nlegend {\n    display: block;\n    width: 100%;\n    padding: 0;\n    margin-bottom: 21px;\n    font-size: 22.5px;\n    line-height: inherit;\n    color: #333;\n    border: 0;\n    border-bottom: 1px solid #e5e5e5;\n}\nlabel {\n    display: inline-block;\n    margin-bottom: 5px;\n    font-weight: bold;\n}\ninput[type=\"search\"] {\n    -webkit-box-sizing: border-box;\n    -moz-box-sizing: border-box;\n    box-sizing: border-box;\n}\ninput[type=\"radio\"], input[type=\"checkbox\"] {\n    margin: 4px 0 0;\n    margin-top: 1px \\9;\n    line-height: normal;\n}\ninput[type=\"file\"] {\n    display: block;\n}\nselect[multiple], select[size] {\n    height: auto;\n}\nselect optgroup {\n    font-family: inherit;\n    font-size: inherit;\n    font-style: inherit;\n}\ninput[type=\"file\"]:focus, input[type=\"radio\"]:focus, input[type=\"checkbox\"]:focus {\n    outline: thin dotted;\n    outline: 5px auto -webkit-focus-ring-color;\n    outline-offset: -2px;\n}\ninput[type=\"number\"]::-webkit-outer-spin-button, input[type=\"number\"]::-webkit-inner-spin-button {\n    height: auto;\n}\noutput {\n    display: block;\n    padding-top: 7px;\n    font-size: 15px;\n    line-height: 1.428571429;\n    color: #6f6f6f;\n    vertical-align: middle;\n}\n.form-control {\n    display: block;\n    width: 100%;\n    height: 35px;\n    padding: 6px 12px;\n    font-size: 15px;\n    line-height: 1.428571429;\n    color: #6f6f6f;\n    vertical-align: middle;\n    background-color: #fff;\n    background-image: none;\n    border: 1px solid #ccc;\n    border-radius: 0;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    -webkit-transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s;\n    transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s;\n}\n.form-control:focus {\n    border-color: #66afe9;\n    outline: 0;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(102, 175, 233, 0.6);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(102, 175, 233, 0.6);\n}\n.form-control:-moz-placeholder {\n    color: #999;\n}\n.form-control::-moz-placeholder {\n    color: #999;\n    opacity: 1;\n}\n.form-control:-ms-input-placeholder {\n    color: #999;\n}\n.form-control::-webkit-input-placeholder {\n    color: #999;\n}\n.form-control[disabled], .form-control[readonly], fieldset[disabled] .form-control {\n    cursor: not-allowed;\n    background-color: #eee;\n}\ntextarea.form-control {\n    height: auto;\n}\n.form-group {\n    margin-bottom: 15px;\n}\n.radio, .checkbox {\n    display: block;\n    min-height: 21px;\n    padding-left: 20px;\n    margin-top: 10px;\n    margin-bottom: 10px;\n    vertical-align: middle;\n}\n.radio label, .checkbox label {\n    display: inline;\n    margin-bottom: 0;\n    font-weight: normal;\n    cursor: pointer;\n}\n.radio input[type=\"radio\"], .radio-inline input[type=\"radio\"], .checkbox input[type=\"checkbox\"], .checkbox-inline input[type=\"checkbox\"] {\n    float: left;\n    margin-left: -20px;\n}\n.radio+.radio, .checkbox+.checkbox {\n    margin-top: -5px;\n}\n.radio-inline, .checkbox-inline {\n    display: inline-block;\n    padding-left: 20px;\n    margin-bottom: 0;\n    font-weight: normal;\n    vertical-align: middle;\n    cursor: pointer;\n}\n.radio-inline+.radio-inline, .checkbox-inline+.checkbox-inline {\n    margin-top: 0;\n    margin-left: 10px;\n}\ninput[type=\"radio\"][disabled], input[type=\"checkbox\"][disabled], .radio[disabled], .radio-inline[disabled], .checkbox[disabled], .checkbox-inline[disabled], fieldset[disabled] input[type=\"radio\"], fieldset[disabled] input[type=\"checkbox\"], fieldset[disabled] .radio, fieldset[disabled] .radio-inline, fieldset[disabled] .checkbox, fieldset[disabled] .checkbox-inline {\n    cursor: not-allowed;\n}\n.input-sm {\n    height: 30px;\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\nselect.input-sm {\n    height: 30px;\n    line-height: 30px;\n}\ntextarea.input-sm {\n    height: auto;\n}\n.input-lg {\n    height: 48px;\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\nselect.input-lg {\n    height: 48px;\n    line-height: 48px;\n}\ntextarea.input-lg {\n    height: auto;\n}\n.has-warning .help-block, .has-warning .control-label, .has-warning .radio, .has-warning .checkbox, .has-warning .radio-inline, .has-warning .checkbox-inline {\n    color: #e99002;\n}\n.has-warning .form-control {\n    border-color: #e99002;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n}\n.has-warning .form-control:focus {\n    border-color: #b67102;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #febc53;\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #febc53;\n}\n.has-warning .input-group-addon {\n    color: #e99002;\n    background-color: #fcf8e3;\n    border-color: #e99002;\n}\n.has-error .help-block, .has-error .control-label, .has-error .radio, .has-error .checkbox, .has-error .radio-inline, .has-error .checkbox-inline {\n    color: #f04124;\n}\n.has-error .form-control {\n    border-color: #f04124;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n}\n.has-error .form-control:focus {\n    border-color: #d32a0e;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #f79483;\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #f79483;\n}\n.has-error .input-group-addon {\n    color: #f04124;\n    background-color: #f2dede;\n    border-color: #f04124;\n}\n.has-success .help-block, .has-success .control-label, .has-success .radio, .has-success .checkbox, .has-success .radio-inline, .has-success .checkbox-inline {\n    color: #43ac6a;\n}\n.has-success .form-control {\n    border-color: #43ac6a;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n}\n.has-success .form-control:focus {\n    border-color: #358753;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #85d0a1;\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #85d0a1;\n}\n.has-success .input-group-addon {\n    color: #43ac6a;\n    background-color: #dff0d8;\n    border-color: #43ac6a;\n}\n.form-control-static {\n    margin-bottom: 0;\n}\n.help-block {\n    display: block;\n    margin-top: 5px;\n    margin-bottom: 10px;\n    color: #626262;\n}\n@media(min-width:768px) {\n    .form-inline .form-group {\n    display: inline-block;\n    margin-bottom: 0;\n    vertical-align: middle;\n}\n.form-inline .form-control {\n    display: inline-block;\n}\n.form-inline select.form-control {\n    width: auto;\n}\n.form-inline .radio, .form-inline .checkbox {\n    display: inline-block;\n    padding-left: 0;\n    margin-top: 0;\n    margin-bottom: 0;\n}\n.form-inline .radio input[type=\"radio\"], .form-inline .checkbox input[type=\"checkbox\"] {\n    float: none;\n    margin-left: 0;\n}\n}.form-horizontal .control-label, .form-horizontal .radio, .form-horizontal .checkbox, .form-horizontal .radio-inline, .form-horizontal .checkbox-inline {\n    padding-top: 7px;\n    margin-top: 0;\n    margin-bottom: 0;\n}\n.form-horizontal .radio, .form-horizontal .checkbox {\n    min-height: 28px;\n}\n.form-horizontal .form-group {\n    margin-right: -15px;\n    margin-left: -15px;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-control-static {\n    padding-top: 7px;\n}\n@media(min-width:768px) {\n    .form-horizontal .control-label {\n    text-align: right;\n}\n}.btn {\n    display: inline-block;\n    padding: 6px 12px;\n    margin-bottom: 0;\n    font-size: 15px;\n    font-weight: normal;\n    line-height: 1.428571429;\n    text-align: center;\n    white-space: nowrap;\n    vertical-align: middle;\n    cursor: pointer;\n    background-image: none;\n    border: 1px solid transparent;\n    border-radius: 0;\n    -webkit-user-select: none;\n    -moz-user-select: none;\n    -ms-user-select: none;\n    -o-user-select: none;\n    user-select: none;\n}\n.btn:focus {\n    outline: thin dotted;\n    outline: 5px auto -webkit-focus-ring-color;\n    outline-offset: -2px;\n}\n.btn:hover, .btn:focus {\n    color: #333;\n    text-decoration: none;\n}\n.btn:active, .btn.active {\n    background-image: none;\n    outline: 0;\n    -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n    box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n}\n.btn.disabled, .btn[disabled], fieldset[disabled] .btn {\n    pointer-events: none;\n    cursor: not-allowed;\n    opacity: .65;\n    filter: alpha(opacity=65);\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.btn-default {\n    color: #333;\n    background-color: #e7e7e7;\n    border-color: #dadada;\n}\n.btn-default:hover, .btn-default:focus, .btn-default:active, .btn-default.active, .open .dropdown-toggle.btn-default {\n    color: #333;\n    background-color: #d3d3d3;\n    border-color: #bbb;\n}\n.btn-default:active, .btn-default.active, .open .dropdown-toggle.btn-default {\n    background-image: none;\n}\n.btn-default.disabled, .btn-default[disabled], fieldset[disabled] .btn-default, .btn-default.disabled:hover, .btn-default[disabled]:hover, fieldset[disabled] .btn-default:hover, .btn-default.disabled:focus, .btn-default[disabled]:focus, fieldset[disabled] .btn-default:focus, .btn-default.disabled:active, .btn-default[disabled]:active, fieldset[disabled] .btn-default:active, .btn-default.disabled.active, .btn-default[disabled].active, fieldset[disabled] .btn-default.active {\n    background-color: #e7e7e7;\n    border-color: #dadada;\n}\n.btn-default .badge {\n    color: #e7e7e7;\n    background-color: #fff;\n}\n.btn-primary {\n    color: #fff;\n    background-color: #008cba;\n    border-color: #0079a1;\n}\n.btn-primary:hover, .btn-primary:focus, .btn-primary:active, .btn-primary.active, .open .dropdown-toggle.btn-primary {\n    color: #fff;\n    background-color: #006d91;\n    border-color: #004b63;\n}\n.btn-primary:active, .btn-primary.active, .open .dropdown-toggle.btn-primary {\n    background-image: none;\n}\n.btn-primary.disabled, .btn-primary[disabled], fieldset[disabled] .btn-primary, .btn-primary.disabled:hover, .btn-primary[disabled]:hover, fieldset[disabled] .btn-primary:hover, .btn-primary.disabled:focus, .btn-primary[disabled]:focus, fieldset[disabled] .btn-primary:focus, .btn-primary.disabled:active, .btn-primary[disabled]:active, fieldset[disabled] .btn-primary:active, .btn-primary.disabled.active, .btn-primary[disabled].active, fieldset[disabled] .btn-primary.active {\n    background-color: #008cba;\n    border-color: #0079a1;\n}\n.btn-primary .badge {\n    color: #008cba;\n    background-color: #fff;\n}\n.btn-warning {\n    color: #fff;\n    background-color: #e99002;\n    border-color: #d08002;\n}\n.btn-warning:hover, .btn-warning:focus, .btn-warning:active, .btn-warning.active, .open .dropdown-toggle.btn-warning {\n    color: #fff;\n    background-color: #c17702;\n    border-color: #935b01;\n}\n.btn-warning:active, .btn-warning.active, .open .dropdown-toggle.btn-warning {\n    background-image: none;\n}\n.btn-warning.disabled, .btn-warning[disabled], fieldset[disabled] .btn-warning, .btn-warning.disabled:hover, .btn-warning[disabled]:hover, fieldset[disabled] .btn-warning:hover, .btn-warning.disabled:focus, .btn-warning[disabled]:focus, fieldset[disabled] .btn-warning:focus, .btn-warning.disabled:active, .btn-warning[disabled]:active, fieldset[disabled] .btn-warning:active, .btn-warning.disabled.active, .btn-warning[disabled].active, fieldset[disabled] .btn-warning.active {\n    background-color: #e99002;\n    border-color: #d08002;\n}\n.btn-warning .badge {\n    color: #e99002;\n    background-color: #fff;\n}\n.btn-danger {\n    color: #fff;\n    background-color: #f04124;\n    border-color: #ea2f10;\n}\n.btn-danger:hover, .btn-danger:focus, .btn-danger:active, .btn-danger.active, .open .dropdown-toggle.btn-danger {\n    color: #fff;\n    background-color: #dc2c0f;\n    border-color: #b1240c;\n}\n.btn-danger:active, .btn-danger.active, .open .dropdown-toggle.btn-danger {\n    background-image: none;\n}\n.btn-danger.disabled, .btn-danger[disabled], fieldset[disabled] .btn-danger, .btn-danger.disabled:hover, .btn-danger[disabled]:hover, fieldset[disabled] .btn-danger:hover, .btn-danger.disabled:focus, .btn-danger[disabled]:focus, fieldset[disabled] .btn-danger:focus, .btn-danger.disabled:active, .btn-danger[disabled]:active, fieldset[disabled] .btn-danger:active, .btn-danger.disabled.active, .btn-danger[disabled].active, fieldset[disabled] .btn-danger.active {\n    background-color: #f04124;\n    border-color: #ea2f10;\n}\n.btn-danger .badge {\n    color: #f04124;\n    background-color: #fff;\n}\n.btn-success {\n    color: #fff;\n    background-color: #43ac6a;\n    border-color: #3c9a5f;\n}\n.btn-success:hover, .btn-success:focus, .btn-success:active, .btn-success.active, .open .dropdown-toggle.btn-success {\n    color: #fff;\n    background-color: #388f58;\n    border-color: #2b6e44;\n}\n.btn-success:active, .btn-success.active, .open .dropdown-toggle.btn-success {\n    background-image: none;\n}\n.btn-success.disabled, .btn-success[disabled], fieldset[disabled] .btn-success, .btn-success.disabled:hover, .btn-success[disabled]:hover, fieldset[disabled] .btn-success:hover, .btn-success.disabled:focus, .btn-success[disabled]:focus, fieldset[disabled] .btn-success:focus, .btn-success.disabled:active, .btn-success[disabled]:active, fieldset[disabled] .btn-success:active, .btn-success.disabled.active, .btn-success[disabled].active, fieldset[disabled] .btn-success.active {\n    background-color: #43ac6a;\n    border-color: #3c9a5f;\n}\n.btn-success .badge {\n    color: #43ac6a;\n    background-color: #fff;\n}\n.btn-info {\n    color: #fff;\n    background-color: #5bc0de;\n    border-color: #46b8da;\n}\n.btn-info:hover, .btn-info:focus, .btn-info:active, .btn-info.active, .open .dropdown-toggle.btn-info {\n    color: #fff;\n    background-color: #39b3d7;\n    border-color: #269abc;\n}\n.btn-info:active, .btn-info.active, .open .dropdown-toggle.btn-info {\n    background-image: none;\n}\n.btn-info.disabled, .btn-info[disabled], fieldset[disabled] .btn-info, .btn-info.disabled:hover, .btn-info[disabled]:hover, fieldset[disabled] .btn-info:hover, .btn-info.disabled:focus, .btn-info[disabled]:focus, fieldset[disabled] .btn-info:focus, .btn-info.disabled:active, .btn-info[disabled]:active, fieldset[disabled] .btn-info:active, .btn-info.disabled.active, .btn-info[disabled].active, fieldset[disabled] .btn-info.active {\n    background-color: #5bc0de;\n    border-color: #46b8da;\n}\n.btn-info .badge {\n    color: #5bc0de;\n    background-color: #fff;\n}\n.btn-link {\n    font-weight: normal;\n    color: #008cba;\n    cursor: pointer;\n    border-radius: 0;\n}\n.btn-link, .btn-link:active, .btn-link[disabled], fieldset[disabled] .btn-link {\n    background-color: transparent;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.btn-link, .btn-link:hover, .btn-link:focus, .btn-link:active {\n    border-color: transparent;\n}\n.btn-link:hover, .btn-link:focus {\n    color: #00526e;\n    text-decoration: underline;\n    background-color: transparent;\n}\n.btn-link[disabled]:hover, fieldset[disabled] .btn-link:hover, .btn-link[disabled]:focus, fieldset[disabled] .btn-link:focus {\n    color: #999;\n    text-decoration: none;\n}\n.btn-lg {\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\n.btn-sm {\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-xs {\n    padding: 1px 5px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-block {\n    display: block;\n    width: 100%;\n    padding-right: 0;\n    padding-left: 0;\n}\n.btn-block+.btn-block {\n    margin-top: 5px;\n}\ninput[type=\"submit\"].btn-block, input[type=\"reset\"].btn-block, input[type=\"button\"].btn-block {\n    width: 100%}\n.fade {\n    opacity: 0;\n    -webkit-transition: opacity .15s linear;\n    transition: opacity .15s linear;\n}\n.fade.in {\n    opacity: 1;\n}\n.collapse {\n    display: none;\n}\n.collapse.in {\n    display: block;\n}\n.collapsing {\n    position: relative;\n    height: 0;\n    overflow: hidden;\n    -webkit-transition: height .35s ease;\n    transition: height .35s ease;\n}\n@font-face {\n    font-family: 'Glyphicons Halflings';\n    src: url('../fonts/glyphicons-halflings-regular.eot');\n    src: url('../fonts/glyphicons-halflings-regular.eot?#iefix') format('embedded-opentype'), url('../fonts/glyphicons-halflings-regular.woff') format('woff'), url('../fonts/glyphicons-halflings-regular.ttf') format('truetype'), url('../fonts/glyphicons-halflings-regular.svg#glyphicons-halflingsregular') format('svg');\n}\n.glyphicon {\n    position: relative;\n    top: 1px;\n    display: inline-block;\n    font-family: 'Glyphicons Halflings';\n    -webkit-font-smoothing: antialiased;\n    font-style: normal;\n    font-weight: normal;\n    line-height: 1;\n    -moz-osx-font-smoothing: grayscale;\n}\n.glyphicon:empty {\n    width: 1em;\n}\n.glyphicon-asterisk:before {\n    content: \"\\2a\"}\n.glyphicon-plus:before {\n    content: \"\\2b\"}\n.glyphicon-euro:before {\n    content: \"\\20ac\"}\n.glyphicon-minus:before {\n    content: \"\\2212\"}\n.glyphicon-cloud:before {\n    content: \"\\2601\"}\n.glyphicon-envelope:before {\n    content: \"\\2709\"}\n.glyphicon-pencil:before {\n    content: \"\\270f\"}\n.glyphicon-glass:before {\n    content: \"\\e001\"}\n.glyphicon-music:before {\n    content: \"\\e002\"}\n.glyphicon-search:before {\n    content: \"\\e003\"}\n.glyphicon-heart:before {\n    content: \"\\e005\"}\n.glyphicon-star:before {\n    content: \"\\e006\"}\n.glyphicon-star-empty:before {\n    content: \"\\e007\"}\n.glyphicon-user:before {\n    content: \"\\e008\"}\n.glyphicon-film:before {\n    content: \"\\e009\"}\n.glyphicon-th-large:before {\n    content: \"\\e010\"}\n.glyphicon-th:before {\n    content: \"\\e011\"}\n.glyphicon-th-list:before {\n    content: \"\\e012\"}\n.glyphicon-ok:before {\n    content: \"\\e013\"}\n.glyphicon-remove:before {\n    content: \"\\e014\"}\n.glyphicon-zoom-in:before {\n    content: \"\\e015\"}\n.glyphicon-zoom-out:before {\n    content: \"\\e016\"}\n.glyphicon-off:before {\n    content: \"\\e017\"}\n.glyphicon-signal:before {\n    content: \"\\e018\"}\n.glyphicon-cog:before {\n    content: \"\\e019\"}\n.glyphicon-trash:before {\n    content: \"\\e020\"}\n.glyphicon-home:before {\n    content: \"\\e021\"}\n.glyphicon-file:before {\n    content: \"\\e022\"}\n.glyphicon-time:before {\n    content: \"\\e023\"}\n.glyphicon-road:before {\n    content: \"\\e024\"}\n.glyphicon-download-alt:before {\n    content: \"\\e025\"}\n.glyphicon-download:before {\n    content: \"\\e026\"}\n.glyphicon-upload:before {\n    content: \"\\e027\"}\n.glyphicon-inbox:before {\n    content: \"\\e028\"}\n.glyphicon-play-circle:before {\n    content: \"\\e029\"}\n.glyphicon-repeat:before {\n    content: \"\\e030\"}\n.glyphicon-refresh:before {\n    content: \"\\e031\"}\n.glyphicon-list-alt:before {\n    content: \"\\e032\"}\n.glyphicon-lock:before {\n    content: \"\\e033\"}\n.glyphicon-flag:before {\n    content: \"\\e034\"}\n.glyphicon-headphones:before {\n    content: \"\\e035\"}\n.glyphicon-volume-off:before {\n    content: \"\\e036\"}\n.glyphicon-volume-down:before {\n    content: \"\\e037\"}\n.glyphicon-volume-up:before {\n    content: \"\\e038\"}\n.glyphicon-qrcode:before {\n    content: \"\\e039\"}\n.glyphicon-barcode:before {\n    content: \"\\e040\"}\n.glyphicon-tag:before {\n    content: \"\\e041\"}\n.glyphicon-tags:before {\n    content: \"\\e042\"}\n.glyphicon-book:before {\n    content: \"\\e043\"}\n.glyphicon-bookmark:before {\n    content: \"\\e044\"}\n.glyphicon-print:before {\n    content: \"\\e045\"}\n.glyphicon-camera:before {\n    content: \"\\e046\"}\n.glyphicon-font:before {\n    content: \"\\e047\"}\n.glyphicon-bold:before {\n    content: \"\\e048\"}\n.glyphicon-italic:before {\n    content: \"\\e049\"}\n.glyphicon-text-height:before {\n    content: \"\\e050\"}\n.glyphicon-text-width:before {\n    content: \"\\e051\"}\n.glyphicon-align-left:before {\n    content: \"\\e052\"}\n.glyphicon-align-center:before {\n    content: \"\\e053\"}\n.glyphicon-align-right:before {\n    content: \"\\e054\"}\n.glyphicon-align-justify:before {\n    content: \"\\e055\"}\n.glyphicon-list:before {\n    content: \"\\e056\"}\n.glyphicon-indent-left:before {\n    content: \"\\e057\"}\n.glyphicon-indent-right:before {\n    content: \"\\e058\"}\n.glyphicon-facetime-video:before {\n    content: \"\\e059\"}\n.glyphicon-picture:before {\n    content: \"\\e060\"}\n.glyphicon-map-marker:before {\n    content: \"\\e062\"}\n.glyphicon-adjust:before {\n    content: \"\\e063\"}\n.glyphicon-tint:before {\n    content: \"\\e064\"}\n.glyphicon-edit:before {\n    content: \"\\e065\"}\n.glyphicon-share:before {\n    content: \"\\e066\"}\n.glyphicon-check:before {\n    content: \"\\e067\"}\n.glyphicon-move:before {\n    content: \"\\e068\"}\n.glyphicon-step-backward:before {\n    content: \"\\e069\"}\n.glyphicon-fast-backward:before {\n    content: \"\\e070\"}\n.glyphicon-backward:before {\n    content: \"\\e071\"}\n.glyphicon-play:before {\n    content: \"\\e072\"}\n.glyphicon-pause:before {\n    content: \"\\e073\"}\n.glyphicon-stop:before {\n    content: \"\\e074\"}\n.glyphicon-forward:before {\n    content: \"\\e075\"}\n.glyphicon-fast-forward:before {\n    content: \"\\e076\"}\n.glyphicon-step-forward:before {\n    content: \"\\e077\"}\n.glyphicon-eject:before {\n    content: \"\\e078\"}\n.glyphicon-chevron-left:before {\n    content: \"\\e079\"}\n.glyphicon-chevron-right:before {\n    content: \"\\e080\"}\n.glyphicon-plus-sign:before {\n    content: \"\\e081\"}\n.glyphicon-minus-sign:before {\n    content: \"\\e082\"}\n.glyphicon-remove-sign:before {\n    content: \"\\e083\"}\n.glyphicon-ok-sign:before {\n    content: \"\\e084\"}\n.glyphicon-question-sign:before {\n    content: \"\\e085\"}\n.glyphicon-info-sign:before {\n    content: \"\\e086\"}\n.glyphicon-screenshot:before {\n    content: \"\\e087\"}\n.glyphicon-remove-circle:before {\n    content: \"\\e088\"}\n.glyphicon-ok-circle:before {\n    content: \"\\e089\"}\n.glyphicon-ban-circle:before {\n    content: \"\\e090\"}\n.glyphicon-arrow-left:before {\n    content: \"\\e091\"}\n.glyphicon-arrow-right:before {\n    content: \"\\e092\"}\n.glyphicon-arrow-up:before {\n    content: \"\\e093\"}\n.glyphicon-arrow-down:before {\n    content: \"\\e094\"}\n.glyphicon-share-alt:before {\n    content: \"\\e095\"}\n.glyphicon-resize-full:before {\n    content: \"\\e096\"}\n.glyphicon-resize-small:before {\n    content: \"\\e097\"}\n.glyphicon-exclamation-sign:before {\n    content: \"\\e101\"}\n.glyphicon-gift:before {\n    content: \"\\e102\"}\n.glyphicon-leaf:before {\n    content: \"\\e103\"}\n.glyphicon-fire:before {\n    content: \"\\e104\"}\n.glyphicon-eye-open:before {\n    content: \"\\e105\"}\n.glyphicon-eye-close:before {\n    content: \"\\e106\"}\n.glyphicon-warning-sign:before {\n    content: \"\\e107\"}\n.glyphicon-plane:before {\n    content: \"\\e108\"}\n.glyphicon-calendar:before {\n    content: \"\\e109\"}\n.glyphicon-random:before {\n    content: \"\\e110\"}\n.glyphicon-comment:before {\n    content: \"\\e111\"}\n.glyphicon-magnet:before {\n    content: \"\\e112\"}\n.glyphicon-chevron-up:before {\n    content: \"\\e113\"}\n.glyphicon-chevron-down:before {\n    content: \"\\e114\"}\n.glyphicon-retweet:before {\n    content: \"\\e115\"}\n.glyphicon-shopping-cart:before {\n    content: \"\\e116\"}\n.glyphicon-folder-close:before {\n    content: \"\\e117\"}\n.glyphicon-folder-open:before {\n    content: \"\\e118\"}\n.glyphicon-resize-vertical:before {\n    content: \"\\e119\"}\n.glyphicon-resize-horizontal:before {\n    content: \"\\e120\"}\n.glyphicon-hdd:before {\n    content: \"\\e121\"}\n.glyphicon-bullhorn:before {\n    content: \"\\e122\"}\n.glyphicon-bell:before {\n    content: \"\\e123\"}\n.glyphicon-certificate:before {\n    content: \"\\e124\"}\n.glyphicon-thumbs-up:before {\n    content: \"\\e125\"}\n.glyphicon-thumbs-down:before {\n    content: \"\\e126\"}\n.glyphicon-hand-right:before {\n    content: \"\\e127\"}\n.glyphicon-hand-left:before {\n    content: \"\\e128\"}\n.glyphicon-hand-up:before {\n    content: \"\\e129\"}\n.glyphicon-hand-down:before {\n    content: \"\\e130\"}\n.glyphicon-circle-arrow-right:before {\n    content: \"\\e131\"}\n.glyphicon-circle-arrow-left:before {\n    content: \"\\e132\"}\n.glyphicon-circle-arrow-up:before {\n    content: \"\\e133\"}\n.glyphicon-circle-arrow-down:before {\n    content: \"\\e134\"}\n.glyphicon-globe:before {\n    content: \"\\e135\"}\n.glyphicon-wrench:before {\n    content: \"\\e136\"}\n.glyphicon-tasks:before {\n    content: \"\\e137\"}\n.glyphicon-filter:before {\n    content: \"\\e138\"}\n.glyphicon-briefcase:before {\n    content: \"\\e139\"}\n.glyphicon-fullscreen:before {\n    content: \"\\e140\"}\n.glyphicon-dashboard:before {\n    content: \"\\e141\"}\n.glyphicon-paperclip:before {\n    content: \"\\e142\"}\n.glyphicon-heart-empty:before {\n    content: \"\\e143\"}\n.glyphicon-link:before {\n    content: \"\\e144\"}\n.glyphicon-phone:before {\n    content: \"\\e145\"}\n.glyphicon-pushpin:before {\n    content: \"\\e146\"}\n.glyphicon-usd:before {\n    content: \"\\e148\"}\n.glyphicon-gbp:before {\n    content: \"\\e149\"}\n.glyphicon-sort:before {\n    content: \"\\e150\"}\n.glyphicon-sort-by-alphabet:before {\n    content: \"\\e151\"}\n.glyphicon-sort-by-alphabet-alt:before {\n    content: \"\\e152\"}\n.glyphicon-sort-by-order:before {\n    content: \"\\e153\"}\n.glyphicon-sort-by-order-alt:before {\n    content: \"\\e154\"}\n.glyphicon-sort-by-attributes:before {\n    content: \"\\e155\"}\n.glyphicon-sort-by-attributes-alt:before {\n    content: \"\\e156\"}\n.glyphicon-unchecked:before {\n    content: \"\\e157\"}\n.glyphicon-expand:before {\n    content: \"\\e158\"}\n.glyphicon-collapse-down:before {\n    content: \"\\e159\"}\n.glyphicon-collapse-up:before {\n    content: \"\\e160\"}\n.glyphicon-log-in:before {\n    content: \"\\e161\"}\n.glyphicon-flash:before {\n    content: \"\\e162\"}\n.glyphicon-log-out:before {\n    content: \"\\e163\"}\n.glyphicon-new-window:before {\n    content: \"\\e164\"}\n.glyphicon-record:before {\n    content: \"\\e165\"}\n.glyphicon-save:before {\n    content: \"\\e166\"}\n.glyphicon-open:before {\n    content: \"\\e167\"}\n.glyphicon-saved:before {\n    content: \"\\e168\"}\n.glyphicon-import:before {\n    content: \"\\e169\"}\n.glyphicon-export:before {\n    content: \"\\e170\"}\n.glyphicon-send:before {\n    content: \"\\e171\"}\n.glyphicon-floppy-disk:before {\n    content: \"\\e172\"}\n.glyphicon-floppy-saved:before {\n    content: \"\\e173\"}\n.glyphicon-floppy-remove:before {\n    content: \"\\e174\"}\n.glyphicon-floppy-save:before {\n    content: \"\\e175\"}\n.glyphicon-floppy-open:before {\n    content: \"\\e176\"}\n.glyphicon-credit-card:before {\n    content: \"\\e177\"}\n.glyphicon-transfer:before {\n    content: \"\\e178\"}\n.glyphicon-cutlery:before {\n    content: \"\\e179\"}\n.glyphicon-header:before {\n    content: \"\\e180\"}\n.glyphicon-compressed:before {\n    content: \"\\e181\"}\n.glyphicon-earphone:before {\n    content: \"\\e182\"}\n.glyphicon-phone-alt:before {\n    content: \"\\e183\"}\n.glyphicon-tower:before {\n    content: \"\\e184\"}\n.glyphicon-stats:before {\n    content: \"\\e185\"}\n.glyphicon-sd-video:before {\n    content: \"\\e186\"}\n.glyphicon-hd-video:before {\n    content: \"\\e187\"}\n.glyphicon-subtitles:before {\n    content: \"\\e188\"}\n.glyphicon-sound-stereo:before {\n    content: \"\\e189\"}\n.glyphicon-sound-dolby:before {\n    content: \"\\e190\"}\n.glyphicon-sound-5-1:before {\n    content: \"\\e191\"}\n.glyphicon-sound-6-1:before {\n    content: \"\\e192\"}\n.glyphicon-sound-7-1:before {\n    content: \"\\e193\"}\n.glyphicon-copyright-mark:before {\n    content: \"\\e194\"}\n.glyphicon-registration-mark:before {\n    content: \"\\e195\"}\n.glyphicon-cloud-download:before {\n    content: \"\\e197\"}\n.glyphicon-cloud-upload:before {\n    content: \"\\e198\"}\n.glyphicon-tree-conifer:before {\n    content: \"\\e199\"}\n.glyphicon-tree-deciduous:before {\n    content: \"\\e200\"}\n.caret {\n    display: inline-block;\n    width: 0;\n    height: 0;\n    margin-left: 2px;\n    vertical-align: middle;\n    border-top: 4px solid;\n    border-right: 4px solid transparent;\n    border-left: 4px solid transparent;\n}\n.dropdown {\n    position: relative;\n}\n.dropdown-toggle:focus {\n    outline: 0;\n}\n.dropdown-menu {\n    position: absolute;\n    top: 100%;\n    left: 0;\n    z-index: 1000;\n    display: none;\n    float: left;\n    min-width: 160px;\n    padding: 5px 0;\n    margin: 2px 0 0;\n    font-size: 15px;\n    list-style: none;\n    background-color: #fff;\n    border: 1px solid #ccc;\n    border: 1px solid rgba(0, 0, 0, 0.15);\n    border-radius: 0;\n    -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);\n    box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);\n    background-clip: padding-box;\n}\n.dropdown-menu.pull-right {\n    right: 0;\n    left: auto;\n}\n.dropdown-menu .divider {\n    height: 1px;\n    margin: 9.5px 0;\n    overflow: hidden;\n    background-color: rgba(0, 0, 0, 0.2);\n}\n.dropdown-menu>li>a {\n    display: block;\n    padding: 3px 20px;\n    clear: both;\n    font-weight: normal;\n    line-height: 1.428571429;\n    color: #555;\n    white-space: nowrap;\n}\n.dropdown-menu>li>a:hover, .dropdown-menu>li>a:focus {\n    color: #262626;\n    text-decoration: none;\n    background-color: #eee;\n}\n.dropdown-menu>.active>a, .dropdown-menu>.active>a:hover, .dropdown-menu>.active>a:focus {\n    color: #fff;\n    text-decoration: none;\n    background-color: #008cba;\n    outline: 0;\n}\n.dropdown-menu>.disabled>a, .dropdown-menu>.disabled>a:hover, .dropdown-menu>.disabled>a:focus {\n    color: #999;\n}\n.dropdown-menu>.disabled>a:hover, .dropdown-menu>.disabled>a:focus {\n    text-decoration: none;\n    cursor: not-allowed;\n    background-color: transparent;\n    background-image: none;\n    filter: progid:DXImageTransform.Microsoft.gradient(enabled=false);\n}\n.open>.dropdown-menu {\n    display: block;\n}\n.open>a {\n    outline: 0;\n}\n.dropdown-header {\n    display: block;\n    padding: 3px 20px;\n    font-size: 12px;\n    line-height: 1.428571429;\n    color: #999;\n}\n.dropdown-backdrop {\n    position: fixed;\n    top: 0;\n    right: 0;\n    bottom: 0;\n    left: 0;\n    z-index: 990;\n}\n.pull-right>.dropdown-menu {\n    right: 0;\n    left: auto;\n}\n.dropup .caret, .navbar-fixed-bottom .dropdown .caret {\n    border-top: 0;\n    border-bottom: 4px solid;\n    content: \"\"}\n.dropup .dropdown-menu, .navbar-fixed-bottom .dropdown .dropdown-menu {\n    top: auto;\n    bottom: 100%;\n    margin-bottom: 1px;\n}\n@media(min-width:768px) {\n    .navbar-right .dropdown-menu {\n    right: 0;\n    left: auto;\n}\n}.btn-group, .btn-group-vertical {\n    position: relative;\n    display: inline-block;\n    vertical-align: middle;\n}\n.btn-group>.btn, .btn-group-vertical>.btn {\n    position: relative;\n    float: left;\n}\n.btn-group>.btn:hover, .btn-group-vertical>.btn:hover, .btn-group>.btn:focus, .btn-group-vertical>.btn:focus, .btn-group>.btn:active, .btn-group-vertical>.btn:active, .btn-group>.btn.active, .btn-group-vertical>.btn.active {\n    z-index: 2;\n}\n.btn-group>.btn:focus, .btn-group-vertical>.btn:focus {\n    outline: 0;\n}\n.btn-group .btn+.btn, .btn-group .btn+.btn-group, .btn-group .btn-group+.btn, .btn-group .btn-group+.btn-group {\n    margin-left: -1px;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar .btn-group {\n    float: left;\n}\n.btn-toolbar>.btn+.btn, .btn-toolbar>.btn-group+.btn, .btn-toolbar>.btn+.btn-group, .btn-toolbar>.btn-group+.btn-group {\n    margin-left: 5px;\n}\n.btn-group>.btn:not(:first-child):not(:last-child):not(.dropdown-toggle) {\n    border-radius: 0;\n}\n.btn-group>.btn:first-child {\n    margin-left: 0;\n}\n.btn-group>.btn:first-child:not(:last-child):not(.dropdown-toggle) {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.btn-group>.btn:last-child:not(:first-child), .btn-group>.dropdown-toggle:not(:first-child) {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group>.btn-group {\n    float: left;\n}\n.btn-group>.btn-group:not(:first-child):not(:last-child)>.btn {\n    border-radius: 0;\n}\n.btn-group>.btn-group:first-child>.btn:last-child, .btn-group>.btn-group:first-child>.dropdown-toggle {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.btn-group>.btn-group:last-child>.btn:first-child {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group .dropdown-toggle:active, .btn-group.open .dropdown-toggle {\n    outline: 0;\n}\n.btn-group-xs>.btn {\n    padding: 1px 5px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-group-sm>.btn {\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-group-lg>.btn {\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\n.btn-group>.btn+.dropdown-toggle {\n    padding-right: 8px;\n    padding-left: 8px;\n}\n.btn-group>.btn-lg+.dropdown-toggle {\n    padding-right: 12px;\n    padding-left: 12px;\n}\n.btn-group.open .dropdown-toggle {\n    -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n    box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n}\n.btn-group.open .dropdown-toggle.btn-link {\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.btn .caret {\n    margin-left: 0;\n}\n.btn-lg .caret {\n    border-width: 5px 5px 0;\n    border-bottom-width: 0;\n}\n.dropup .btn-lg .caret {\n    border-width: 0 5px 5px;\n}\n.btn-group-vertical>.btn, .btn-group-vertical>.btn-group, .btn-group-vertical>.btn-group>.btn {\n    display: block;\n    float: none;\n    width: 100%;\n    max-width: 100%}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group>.btn {\n    float: none;\n}\n.btn-group-vertical>.btn+.btn, .btn-group-vertical>.btn+.btn-group, .btn-group-vertical>.btn-group+.btn, .btn-group-vertical>.btn-group+.btn-group {\n    margin-top: -1px;\n    margin-left: 0;\n}\n.btn-group-vertical>.btn:not(:first-child):not(:last-child) {\n    border-radius: 0;\n}\n.btn-group-vertical>.btn:first-child:not(:last-child) {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.btn-group-vertical>.btn:last-child:not(:first-child) {\n    border-top-right-radius: 0;\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group-vertical>.btn-group:not(:first-child):not(:last-child)>.btn {\n    border-radius: 0;\n}\n.btn-group-vertical>.btn-group:first-child>.btn:last-child, .btn-group-vertical>.btn-group:first-child>.dropdown-toggle {\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.btn-group-vertical>.btn-group:last-child>.btn:first-child {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group-justified {\n    display: table;\n    width: 100%;\n    border-collapse: separate;\n    table-layout: fixed;\n}\n.btn-group-justified>.btn, .btn-group-justified>.btn-group {\n    display: table-cell;\n    float: none;\n    width: 1%}\n.btn-group-justified>.btn-group .btn {\n    width: 100%}\n[data-toggle=\"buttons\"]>.btn>input[type=\"radio\"], [data-toggle=\"buttons\"]>.btn>input[type=\"checkbox\"] {\n    display: none;\n}\n.input-group {\n    position: relative;\n    display: table;\n    border-collapse: separate;\n}\n.input-group[class*=\"col-\"] {\n    float: none;\n    padding-right: 0;\n    padding-left: 0;\n}\n.input-group .form-control {\n    width: 100%;\n    margin-bottom: 0;\n}\n.input-group-lg>.form-control, .input-group-lg>.input-group-addon, .input-group-lg>.input-group-btn>.btn {\n    height: 48px;\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\nselect.input-group-lg>.form-control, select.input-group-lg>.input-group-addon, select.input-group-lg>.input-group-btn>.btn {\n    height: 48px;\n    line-height: 48px;\n}\ntextarea.input-group-lg>.form-control, textarea.input-group-lg>.input-group-addon, textarea.input-group-lg>.input-group-btn>.btn {\n    height: auto;\n}\n.input-group-sm>.form-control, .input-group-sm>.input-group-addon, .input-group-sm>.input-group-btn>.btn {\n    height: 30px;\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\nselect.input-group-sm>.form-control, select.input-group-sm>.input-group-addon, select.input-group-sm>.input-group-btn>.btn {\n    height: 30px;\n    line-height: 30px;\n}\ntextarea.input-group-sm>.form-control, textarea.input-group-sm>.input-group-addon, textarea.input-group-sm>.input-group-btn>.btn {\n    height: auto;\n}\n.input-group-addon, .input-group-btn, .input-group .form-control {\n    display: table-cell;\n}\n.input-group-addon:not(:first-child):not(:last-child), .input-group-btn:not(:first-child):not(:last-child), .input-group .form-control:not(:first-child):not(:last-child) {\n    border-radius: 0;\n}\n.input-group-addon, .input-group-btn {\n    width: 1%;\n    white-space: nowrap;\n    vertical-align: middle;\n}\n.input-group-addon {\n    padding: 6px 12px;\n    font-size: 15px;\n    font-weight: normal;\n    line-height: 1;\n    color: #6f6f6f;\n    text-align: center;\n    background-color: #eee;\n    border: 1px solid #ccc;\n    border-radius: 0;\n}\n.input-group-addon.input-sm {\n    padding: 5px 10px;\n    font-size: 12px;\n    border-radius: 0;\n}\n.input-group-addon.input-lg {\n    padding: 10px 16px;\n    font-size: 19px;\n    border-radius: 0;\n}\n.input-group-addon input[type=\"radio\"], .input-group-addon input[type=\"checkbox\"] {\n    margin-top: 0;\n}\n.input-group .form-control:first-child, .input-group-addon:first-child, .input-group-btn:first-child>.btn, .input-group-btn:first-child>.dropdown-toggle, .input-group-btn:last-child>.btn:not(:last-child):not(.dropdown-toggle) {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.input-group-addon:first-child {\n    border-right: 0;\n}\n.input-group .form-control:last-child, .input-group-addon:last-child, .input-group-btn:last-child>.btn, .input-group-btn:last-child>.dropdown-toggle, .input-group-btn:first-child>.btn:not(:first-child) {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.input-group-addon:last-child {\n    border-left: 0;\n}\n.input-group-btn {\n    position: relative;\n    white-space: nowrap;\n}\n.input-group-btn:first-child>.btn {\n    margin-right: -1px;\n}\n.input-group-btn:last-child>.btn {\n    margin-left: -1px;\n}\n.input-group-btn>.btn {\n    position: relative;\n}\n.input-group-btn>.btn+.btn {\n    margin-left: -4px;\n}\n.input-group-btn>.btn:hover, .input-group-btn>.btn:active {\n    z-index: 2;\n}\n.nav {\n    font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    padding-left: 0;\n    margin-bottom: 0;\n    list-style: none;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav>li {\n    position: relative;\n    display: block;\n}\n.nav>li>a {\n    position: relative;\n    display: block;\n    padding: 10px 15px;\n}\n.nav>li>a:hover, .nav>li>a:focus {\n    text-decoration: none;\n    background-color: #eee;\n}\n.nav>li.disabled>a {\n    color: #999;\n}\n.nav>li.disabled>a:hover, .nav>li.disabled>a:focus {\n    color: #999;\n    text-decoration: none;\n    cursor: not-allowed;\n    background-color: transparent;\n}\n.nav .open>a, .nav .open>a:hover, .nav .open>a:focus {\n    background-color: #eee;\n    border-color: #008cba;\n}\n.nav .nav-divider {\n    height: 1px;\n    margin: 9.5px 0;\n    overflow: hidden;\n    background-color: #e5e5e5;\n}\n.nav>li>a>img {\n    max-width: none;\n}\n.nav-tabs {\n    border-bottom: 1px solid #ddd;\n}\n.nav-tabs>li {\n    float: left;\n    margin-bottom: -1px;\n}\n.nav-tabs>li>a {\n    margin-right: 2px;\n    line-height: 1.428571429;\n    border: 1px solid transparent;\n    border-radius: 0;\n}\n.nav-tabs>li>a:hover {\n    border-color: #eee #eee #ddd;\n}\n.nav-tabs>li.active>a, .nav-tabs>li.active>a:hover, .nav-tabs>li.active>a:focus {\n    color: #6f6f6f;\n    cursor: default;\n    background-color: #fff;\n    border: 1px solid #ddd;\n    border-bottom-color: transparent;\n}\n.nav-tabs.nav-justified {\n    width: 100%;\n    border-bottom: 0;\n}\n.nav-tabs.nav-justified>li {\n    float: none;\n}\n.nav-tabs.nav-justified>li>a {\n    margin-bottom: 5px;\n    text-align: center;\n}\n.nav-tabs.nav-justified>.dropdown .dropdown-menu {\n    top: auto;\n    left: auto;\n}\n@media(min-width:768px) {\n    .nav-tabs.nav-justified>li {\n    display: table-cell;\n    width: 1%}\n.nav-tabs.nav-justified>li>a {\n    margin-bottom: 0;\n}\n}.nav-tabs.nav-justified>li>a {\n    margin-right: 0;\n    border-radius: 0;\n}\n.nav-tabs.nav-justified>.active>a, .nav-tabs.nav-justified>.active>a:hover, .nav-tabs.nav-justified>.active>a:focus {\n    border: 1px solid #ddd;\n}\n@media(min-width:768px) {\n    .nav-tabs.nav-justified>li>a {\n    border-bottom: 1px solid #ddd;\n    border-radius: 0;\n}\n.nav-tabs.nav-justified>.active>a, .nav-tabs.nav-justified>.active>a:hover, .nav-tabs.nav-justified>.active>a:focus {\n    border-bottom-color: #fff;\n}\n}.nav-pills>li {\n    float: left;\n}\n.nav-pills>li>a {\n    border-radius: 0;\n}\n.nav-pills>li+li {\n    margin-left: 2px;\n}\n.nav-pills>li.active>a, .nav-pills>li.active>a:hover, .nav-pills>li.active>a:focus {\n    color: #fff;\n    background-color: #008cba;\n}\n.nav-stacked>li {\n    float: none;\n}\n.nav-stacked>li+li {\n    margin-top: 2px;\n    margin-left: 0;\n}\n.nav-justified {\n    width: 100%}\n.nav-justified>li {\n    float: none;\n}\n.nav-justified>li>a {\n    margin-bottom: 5px;\n    text-align: center;\n}\n.nav-justified>.dropdown .dropdown-menu {\n    top: auto;\n    left: auto;\n}\n@media(min-width:768px) {\n    .nav-justified>li {\n    display: table-cell;\n    width: 1%}\n.nav-justified>li>a {\n    margin-bottom: 0;\n}\n}.nav-tabs-justified {\n    border-bottom: 0;\n}\n.nav-tabs-justified>li>a {\n    margin-right: 0;\n    border-radius: 0;\n}\n.nav-tabs-justified>.active>a, .nav-tabs-justified>.active>a:hover, .nav-tabs-justified>.active>a:focus {\n    border: 1px solid #ddd;\n}\n@media(min-width:768px) {\n    .nav-tabs-justified>li>a {\n    border-bottom: 1px solid #ddd;\n    border-radius: 0;\n}\n.nav-tabs-justified>.active>a, .nav-tabs-justified>.active>a:hover, .nav-tabs-justified>.active>a:focus {\n    border-bottom-color: #fff;\n}\n}.tab-content>.tab-pane {\n    display: none;\n}\n.tab-content>.active {\n    display: block;\n}\n.nav-tabs .dropdown-menu {\n    margin-top: -1px;\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.navbar {\n    position: relative;\n    min-height: 45px;\n    margin-bottom: 21px;\n    border: 1px solid transparent;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n@media(min-width:768px) {\n    .navbar {\n    border-radius: 0;\n}\n}.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n@media(min-width:768px) {\n    .navbar-header {\n    float: left;\n}\n}.navbar-collapse {\n    max-height: 340px;\n    padding-right: 15px;\n    padding-left: 15px;\n    overflow-x: visible;\n    border-top: 1px solid transparent;\n    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1);\n    -webkit-overflow-scrolling: touch;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse.in {\n    overflow-y: auto;\n}\n@media(min-width:768px) {\n    .navbar-collapse {\n    width: auto;\n    border-top: 0;\n    box-shadow: none;\n}\n.navbar-collapse.collapse {\n    display: block!important;\n    height: auto!important;\n    padding-bottom: 0;\n    overflow: visible!important;\n}\n.navbar-collapse.in {\n    overflow-y: visible;\n}\n.navbar-fixed-top .navbar-collapse, .navbar-static-top .navbar-collapse, .navbar-fixed-bottom .navbar-collapse {\n    padding-right: 0;\n    padding-left: 0;\n}\n}.container>.navbar-header, .container>.navbar-collapse {\n    margin-right: -15px;\n    margin-left: -15px;\n}\n@media(min-width:768px) {\n    .container>.navbar-header, .container>.navbar-collapse {\n    margin-right: 0;\n    margin-left: 0;\n}\n}.navbar-static-top {\n    z-index: 1000;\n    border-width: 0 0 1px;\n}\n@media(min-width:768px) {\n    .navbar-static-top {\n    border-radius: 0;\n}\n}.navbar-fixed-top, .navbar-fixed-bottom {\n    position: fixed;\n    right: 0;\n    left: 0;\n    z-index: 1030;\n}\n@media(min-width:768px) {\n    .navbar-fixed-top, .navbar-fixed-bottom {\n    border-radius: 0;\n}\n}.navbar-fixed-top {\n    top: 0;\n    border-width: 0 0 1px;\n}\n.navbar-fixed-bottom {\n    bottom: 0;\n    margin-bottom: 0;\n    border-width: 1px 0 0;\n}\n.navbar-brand {\n    font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    float: left;\n    padding: 12px 15px;\n    font-size: 19px;\n    line-height: 21px;\n}\n.navbar-brand:hover, .navbar-brand:focus {\n    text-decoration: none;\n}\n@media(min-width:768px) {\n    .navbar>.container .navbar-brand {\n    margin-left: -15px;\n}\n}.navbar-toggle {\n    position: relative;\n    float: right;\n    padding: 9px 10px;\n    margin-top: 5.5px;\n    margin-right: 15px;\n    margin-bottom: 5.5px;\n    background-color: transparent;\n    background-image: none;\n    border: 1px solid transparent;\n    border-radius: 0;\n}\n.navbar-toggle .icon-bar {\n    display: block;\n    width: 22px;\n    height: 2px;\n    border-radius: 1px;\n}\n.navbar-toggle .icon-bar+.icon-bar {\n    margin-top: 4px;\n}\n@media(min-width:768px) {\n    .navbar-toggle {\n    display: none;\n}\n}.navbar-nav {\n    margin: 6px -15px;\n}\n.navbar-nav>li>a {\n    padding-top: 10px;\n    padding-bottom: 10px;\n    line-height: 21px;\n}\n@media(max-width:767px) {\n    .navbar-nav .open .dropdown-menu {\n    position: static;\n    float: none;\n    width: auto;\n    margin-top: 0;\n    background-color: transparent;\n    border: 0;\n    box-shadow: none;\n}\n.navbar-nav .open .dropdown-menu>li>a, .navbar-nav .open .dropdown-menu .dropdown-header {\n    padding: 5px 15px 5px 25px;\n}\n.navbar-nav .open .dropdown-menu>li>a {\n    line-height: 21px;\n}\n.navbar-nav .open .dropdown-menu>li>a:hover, .navbar-nav .open .dropdown-menu>li>a:focus {\n    background-image: none;\n}\n}@media(min-width:768px) {\n    .navbar-nav {\n    float: left;\n    margin: 0;\n}\n.navbar-nav>li {\n    float: left;\n}\n.navbar-nav>li>a {\n    padding-top: 12px;\n    padding-bottom: 12px;\n}\n.navbar-nav.navbar-right:last-child {\n    margin-right: -15px;\n}\n}@media(min-width:768px) {\n    .navbar-left {\n    float: left!important;\n}\n.navbar-right {\n    float: right!important;\n}\n}.navbar-form {\n    padding: 10px 15px;\n    margin-top: 5px;\n    margin-right: -15px;\n    margin-bottom: 5px;\n    margin-left: -15px;\n    border-top: 1px solid transparent;\n    border-bottom: 1px solid transparent;\n    -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 1px 0 rgba(255, 255, 255, 0.1);\n    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 1px 0 rgba(255, 255, 255, 0.1);\n}\n@media(min-width:768px) {\n    .navbar-form .form-group {\n    display: inline-block;\n    margin-bottom: 0;\n    vertical-align: middle;\n}\n.navbar-form .form-control {\n    display: inline-block;\n}\n.navbar-form select.form-control {\n    width: auto;\n}\n.navbar-form .radio, .navbar-form .checkbox {\n    display: inline-block;\n    padding-left: 0;\n    margin-top: 0;\n    margin-bottom: 0;\n}\n.navbar-form .radio input[type=\"radio\"], .navbar-form .checkbox input[type=\"checkbox\"] {\n    float: none;\n    margin-left: 0;\n}\n}@media(max-width:767px) {\n    .navbar-form .form-group {\n    margin-bottom: 5px;\n}\n}@media(min-width:768px) {\n    .navbar-form {\n    width: auto;\n    padding-top: 0;\n    padding-bottom: 0;\n    margin-right: 0;\n    margin-left: 0;\n    border: 0;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.navbar-form.navbar-right:last-child {\n    margin-right: -15px;\n}\n}.navbar-nav>li>.dropdown-menu {\n    margin-top: 0;\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.navbar-fixed-bottom .navbar-nav>li>.dropdown-menu {\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.navbar-nav.pull-right>li>.dropdown-menu, .navbar-nav>li>.dropdown-menu.pull-right {\n    right: 0;\n    left: auto;\n}\n.navbar-btn {\n    margin-top: 5px;\n    margin-bottom: 5px;\n}\n.navbar-btn.btn-sm {\n    margin-top: 7.5px;\n    margin-bottom: 7.5px;\n}\n.navbar-btn.btn-xs {\n    margin-top: 11.5px;\n    margin-bottom: 11.5px;\n}\n.navbar-text {\n    margin-top: 12px;\n    margin-bottom: 12px;\n}\n@media(min-width:768px) {\n    .navbar-text {\n    float: left;\n    margin-right: 15px;\n    margin-left: 15px;\n}\n.navbar-text.navbar-right:last-child {\n    margin-right: 0;\n}\n}.navbar-default {\n    background-color: #333;\n    border-color: #222;\n}\n.navbar-default .navbar-brand {\n    color: #fff;\n}\n.navbar-default .navbar-brand:hover, .navbar-default .navbar-brand:focus {\n    color: #fff;\n    background-color: transparent;\n}\n.navbar-default .navbar-text {\n    color: #fff;\n}\n.navbar-default .navbar-nav>li>a {\n    color: #fff;\n}\n.navbar-default .navbar-nav>li>a:hover, .navbar-default .navbar-nav>li>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav>.active>a, .navbar-default .navbar-nav>.active>a:hover, .navbar-default .navbar-nav>.active>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav>.disabled>a, .navbar-default .navbar-nav>.disabled>a:hover, .navbar-default .navbar-nav>.disabled>a:focus {\n    color: #ccc;\n    background-color: transparent;\n}\n.navbar-default .navbar-toggle {\n    border-color: transparent;\n}\n.navbar-default .navbar-toggle:hover, .navbar-default .navbar-toggle:focus {\n    background-color: transparent;\n}\n.navbar-default .navbar-toggle .icon-bar {\n    background-color: #fff;\n}\n.navbar-default .navbar-collapse, .navbar-default .navbar-form {\n    border-color: #222;\n}\n.navbar-default .navbar-nav>.open>a, .navbar-default .navbar-nav>.open>a:hover, .navbar-default .navbar-nav>.open>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n@media(max-width:767px) {\n    .navbar-default .navbar-nav .open .dropdown-menu>li>a {\n    color: #fff;\n}\n.navbar-default .navbar-nav .open .dropdown-menu>li>a:hover, .navbar-default .navbar-nav .open .dropdown-menu>li>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav .open .dropdown-menu>.active>a, .navbar-default .navbar-nav .open .dropdown-menu>.active>a:hover, .navbar-default .navbar-nav .open .dropdown-menu>.active>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav .open .dropdown-menu>.disabled>a, .navbar-default .navbar-nav .open .dropdown-menu>.disabled>a:hover, .navbar-default .navbar-nav .open .dropdown-menu>.disabled>a:focus {\n    color: #ccc;\n    background-color: transparent;\n}\n}.navbar-default .navbar-link {\n    color: #fff;\n}\n.navbar-default .navbar-link:hover {\n    color: #fff;\n}\n.navbar-inverse {\n    background-color: #008cba;\n    border-color: #006687;\n}\n.navbar-inverse .navbar-brand {\n    color: #fff;\n}\n.navbar-inverse .navbar-brand:hover, .navbar-inverse .navbar-brand:focus {\n    color: #fff;\n    background-color: transparent;\n}\n.navbar-inverse .navbar-text {\n    color: #fff;\n}\n.navbar-inverse .navbar-nav>li>a {\n    color: #fff;\n}\n.navbar-inverse .navbar-nav>li>a:hover, .navbar-inverse .navbar-nav>li>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav>.active>a, .navbar-inverse .navbar-nav>.active>a:hover, .navbar-inverse .navbar-nav>.active>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav>.disabled>a, .navbar-inverse .navbar-nav>.disabled>a:hover, .navbar-inverse .navbar-nav>.disabled>a:focus {\n    color: #444;\n    background-color: transparent;\n}\n.navbar-inverse .navbar-toggle {\n    border-color: transparent;\n}\n.navbar-inverse .navbar-toggle:hover, .navbar-inverse .navbar-toggle:focus {\n    background-color: transparent;\n}\n.navbar-inverse .navbar-toggle .icon-bar {\n    background-color: #fff;\n}\n.navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form {\n    border-color: #007196;\n}\n.navbar-inverse .navbar-nav>.open>a, .navbar-inverse .navbar-nav>.open>a:hover, .navbar-inverse .navbar-nav>.open>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n@media(max-width:767px) {\n    .navbar-inverse .navbar-nav .open .dropdown-menu>.dropdown-header {\n    border-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu .divider {\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>li>a {\n    color: #fff;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>li>a:hover, .navbar-inverse .navbar-nav .open .dropdown-menu>li>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>.active>a, .navbar-inverse .navbar-nav .open .dropdown-menu>.active>a:hover, .navbar-inverse .navbar-nav .open .dropdown-menu>.active>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>.disabled>a, .navbar-inverse .navbar-nav .open .dropdown-menu>.disabled>a:hover, .navbar-inverse .navbar-nav .open .dropdown-menu>.disabled>a:focus {\n    color: #444;\n    background-color: transparent;\n}\n}.navbar-inverse .navbar-link {\n    color: #fff;\n}\n.navbar-inverse .navbar-link:hover {\n    color: #fff;\n}\n.breadcrumb {\n    padding: 8px 15px;\n    margin-bottom: 21px;\n    list-style: none;\n    background-color: #f5f5f5;\n    border-radius: 0;\n}\n.breadcrumb>li {\n    display: inline-block;\n}\n.breadcrumb>li+li:before {\n    padding: 0 5px;\n    color: #999;\n    content: \"/\\00a0\"}\n.breadcrumb>.active {\n    color: #333;\n}\n.pagination {\n    display: inline-block;\n    padding-left: 0;\n    margin: 21px 0;\n    border-radius: 0;\n}\n.pagination>li {\n    display: inline;\n}\n.pagination>li>a, .pagination>li>span {\n    position: relative;\n    float: left;\n    padding: 6px 12px;\n    margin-left: -1px;\n    line-height: 1.428571429;\n    text-decoration: none;\n    background-color: transparent;\n    border: 1px solid transparent;\n}\n.pagination>li:first-child>a, .pagination>li:first-child>span {\n    margin-left: 0;\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.pagination>li:last-child>a, .pagination>li:last-child>span {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.pagination>li>a:hover, .pagination>li>span:hover, .pagination>li>a:focus, .pagination>li>span:focus {\n    background-color: #eee;\n}\n.pagination>.active>a, .pagination>.active>span, .pagination>.active>a:hover, .pagination>.active>span:hover, .pagination>.active>a:focus, .pagination>.active>span:focus {\n    z-index: 2;\n    color: #fff;\n    cursor: default;\n    background-color: #008cba;\n    border-color: #008cba;\n}\n.pagination>.disabled>span, .pagination>.disabled>span:hover, .pagination>.disabled>span:focus, .pagination>.disabled>a, .pagination>.disabled>a:hover, .pagination>.disabled>a:focus {\n    color: #999;\n    cursor: not-allowed;\n    background-color: transparent;\n    border-color: transparent;\n}\n.pagination-lg>li>a, .pagination-lg>li>span {\n    padding: 10px 16px;\n    font-size: 19px;\n}\n.pagination-lg>li:first-child>a, .pagination-lg>li:first-child>span {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.pagination-lg>li:last-child>a, .pagination-lg>li:last-child>span {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.pagination-sm>li>a, .pagination-sm>li>span {\n    padding: 5px 10px;\n    font-size: 12px;\n}\n.pagination-sm>li:first-child>a, .pagination-sm>li:first-child>span {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.pagination-sm>li:last-child>a, .pagination-sm>li:last-child>span {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.pager {\n    padding-left: 0;\n    margin: 21px 0;\n    text-align: center;\n    list-style: none;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager li {\n    display: inline;\n}\n.pager li>a, .pager li>span {\n    display: inline-block;\n    padding: 5px 14px;\n    background-color: transparent;\n    border: 1px solid transparent;\n    border-radius: 3px;\n}\n.pager li>a:hover, .pager li>a:focus {\n    text-decoration: none;\n    background-color: #eee;\n}\n.pager .next>a, .pager .next>span {\n    float: right;\n}\n.pager .previous>a, .pager .previous>span {\n    float: left;\n}\n.pager .disabled>a, .pager .disabled>a:hover, .pager .disabled>a:focus, .pager .disabled>span {\n    color: #999;\n    cursor: not-allowed;\n    background-color: transparent;\n}\n.label {\n    display: inline;\n    padding: .2em .6em .3em;\n    font-size: 75%;\n    font-weight: bold;\n    line-height: 1;\n    color: #fff;\n    text-align: center;\n    white-space: nowrap;\n    vertical-align: baseline;\n    border-radius: .25em;\n}\n.label[href]:hover, .label[href]:focus {\n    color: #fff;\n    text-decoration: none;\n    cursor: pointer;\n}\n.label:empty {\n    display: none;\n}\n.btn .label {\n    position: relative;\n    top: -1px;\n}\n.label-default {\n    background-color: #999;\n}\n.label-default[href]:hover, .label-default[href]:focus {\n    background-color: #808080;\n}\n.label-primary {\n    background-color: #008cba;\n}\n.label-primary[href]:hover, .label-primary[href]:focus {\n    background-color: #006687;\n}\n.label-success {\n    background-color: #43ac6a;\n}\n.label-success[href]:hover, .label-success[href]:focus {\n    background-color: #358753;\n}\n.label-info {\n    background-color: #5bc0de;\n}\n.label-info[href]:hover, .label-info[href]:focus {\n    background-color: #31b0d5;\n}\n.label-warning {\n    background-color: #e99002;\n}\n.label-warning[href]:hover, .label-warning[href]:focus {\n    background-color: #b67102;\n}\n.label-danger {\n    background-color: #f04124;\n}\n.label-danger[href]:hover, .label-danger[href]:focus {\n    background-color: #d32a0e;\n}\n.badge {\n    display: inline-block;\n    min-width: 10px;\n    padding: 3px 7px;\n    font-size: 12px;\n    font-weight: bold;\n    line-height: 1;\n    color: #777;\n    text-align: center;\n    white-space: nowrap;\n    vertical-align: baseline;\n    background-color: #e7e7e7;\n    border-radius: 10px;\n}\n.badge:empty {\n    display: none;\n}\n.btn .badge {\n    position: relative;\n    top: -1px;\n}\na.badge:hover, a.badge:focus {\n    color: #fff;\n    text-decoration: none;\n    cursor: pointer;\n}\na.list-group-item.active>.badge, .nav-pills>.active>a>.badge {\n    color: #008cba;\n    background-color: #fff;\n}\n.nav-pills>li>a>.badge {\n    margin-left: 3px;\n}\n.jumbotron {\n    padding: 30px;\n    margin-bottom: 30px;\n    font-size: 23px;\n    font-weight: 200;\n    line-height: 2.1428571435;\n    color: inherit;\n    background-color: #fafafa;\n}\n.jumbotron h1, .jumbotron .h1 {\n    line-height: 1;\n    color: inherit;\n}\n.jumbotron p {\n    line-height: 1.4;\n}\n.container .jumbotron {\n    border-radius: 0;\n}\n.jumbotron .container {\n    max-width: 100%}\n@media screen and (min-width:768px) {\n    .jumbotron {\n    padding-top: 48px;\n    padding-bottom: 48px;\n}\n.container .jumbotron {\n    padding-right: 60px;\n    padding-left: 60px;\n}\n.jumbotron h1, .jumbotron .h1 {\n    font-size: 67.5px;\n}\n}.thumbnail {\n    display: block;\n    padding: 4px;\n    margin-bottom: 21px;\n    line-height: 1.428571429;\n    background-color: #fff;\n    border: 1px solid #ddd;\n    border-radius: 0;\n    -webkit-transition: all .2s ease-in-out;\n    transition: all .2s ease-in-out;\n}\n.thumbnail>img, .thumbnail a>img {\n    display: block;\n    height: auto;\n    max-width: 100%;\n    margin-right: auto;\n    margin-left: auto;\n}\na.thumbnail:hover, a.thumbnail:focus, a.thumbnail.active {\n    border-color: #008cba;\n}\n.thumbnail .caption {\n    padding: 9px;\n    color: #222;\n}\n.alert {\n    position: relative;\n    padding: 0.75rem 1.25rem;\n    margin-bottom: 1rem;\n    border: 1px solid transparent;\n    border-radius: 0.25rem;\n  }\n  \n  .alert-heading {\n    color: inherit;\n  }\n  \n  .alert-link {\n    font-weight: 700;\n  }\n  \n  .alert-dismissible {\n    padding-right: 4rem;\n  }\n  \n  .alert-dismissible .close {\n    position: absolute;\n    top: 0;\n    right: 0;\n    padding: 0.75rem 1.25rem;\n    color: inherit;\n  }\n  \n  .alert-primary {\n    color: #004085;\n    background-color: #cce5ff;\n    border-color: #b8daff;\n  }\n  \n  .alert-primary hr {\n    border-top-color: #9fcdff;\n  }\n  \n  .alert-primary .alert-link {\n    color: #002752;\n  }\n  \n  .alert-secondary {\n    color: #383d41;\n    background-color: #e2e3e5;\n    border-color: #d6d8db;\n  }\n  \n  .alert-secondary hr {\n    border-top-color: #c8cbcf;\n  }\n  \n  .alert-secondary .alert-link {\n    color: #202326;\n  }\n  \n  .alert-success {\n    color: #155724;\n    background-color: #d4edda;\n    border-color: #c3e6cb;\n  }\n  \n  .alert-success hr {\n    border-top-color: #b1dfbb;\n  }\n  \n  .alert-success .alert-link {\n    color: #0b2e13;\n  }\n  \n  .alert-info {\n    color: #0c5460;\n    background-color: #d1ecf1;\n    border-color: #bee5eb;\n  }\n  \n  .alert-info hr {\n    border-top-color: #abdde5;\n  }\n  \n  .alert-info .alert-link {\n    color: #062c33;\n  }\n  \n  .alert-warning {\n    color: #856404;\n    background-color: #fff3cd;\n    border-color: #ffeeba;\n  }\n  \n  .alert-warning hr {\n    border-top-color: #ffe8a1;\n  }\n  \n  .alert-warning .alert-link {\n    color: #533f03;\n  }\n  \n  .alert-danger {\n    color: #721c24;\n    background-color: #f8d7da;\n    border-color: #f5c6cb;\n  }\n  \n  .alert-danger hr {\n    border-top-color: #f1b0b7;\n  }\n  \n  .alert-danger .alert-link {\n    color: #491217;\n  }\n  \n  .alert-light {\n    color: #818182;\n    background-color: #fefefe;\n    border-color: #fdfdfe;\n  }\n  \n  .alert-light hr {\n    border-top-color: #ececf6;\n  }\n  \n  .alert-light .alert-link {\n    color: #686868;\n  }\n  \n  .alert-dark {\n    color: #1b1e21;\n    background-color: #d6d8d9;\n    border-color: #c6c8ca;\n  }\n  \n  .alert-dark hr {\n    border-top-color: #b9bbbe;\n  }\n  \n  .alert-dark .alert-link {\n    color: #040505;\n  }\n\n\n@-webkit-keyframes progress-bar-stripes {\n    from {\n    background-position: 40px 0;\n}\nto {\n    background-position: 0 0;\n}\n}@keyframes progress-bar-stripes {\n    from {\n    background-position: 40px 0;\n}\nto {\n    background-position: 0 0;\n}\n}.progress {\n    height: 21px;\n    margin-bottom: 21px;\n    overflow: hidden;\n    background-color: #f5f5f5;\n    border-radius: 0;\n    -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);\n    box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);\n}\n.progress-bar {\n    float: left;\n    width: 0;\n    height: 100%;\n    font-size: 12px;\n    line-height: 21px;\n    color: #fff;\n    text-align: center;\n    background-color: #008cba;\n    -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15);\n    box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15);\n    -webkit-transition: width .6s ease;\n    transition: width .6s ease;\n}\n.progress-striped .progress-bar {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-size: 40px 40px;\n}\n.progress.active .progress-bar {\n    -webkit-animation: progress-bar-stripes 2s linear infinite;\n    animation: progress-bar-stripes 2s linear infinite;\n}\n.progress-bar-success {\n    background-color: #43ac6a;\n}\n.progress-striped .progress-bar-success {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.progress-bar-info {\n    background-color: #5bc0de;\n}\n.progress-striped .progress-bar-info {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.progress-bar-warning {\n    background-color: #e99002;\n}\n.progress-striped .progress-bar-warning {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.progress-bar-danger {\n    background-color: #f04124;\n}\n.progress-striped .progress-bar-danger {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.media, .media-body {\n    overflow: hidden;\n    zoom: 1;\n}\n.media, .media .media {\n    margin-top: 15px;\n}\n.media:first-child {\n    margin-top: 0;\n}\n.media-object {\n    display: block;\n}\n.media-heading {\n    margin: 0 0 5px;\n}\n.media>.pull-left {\n    margin-right: 10px;\n}\n.media>.pull-right {\n    margin-left: 10px;\n}\n.media-list {\n    padding-left: 0;\n    list-style: none;\n}\n.list-group {\n    padding-left: 0;\n    margin-bottom: 20px;\n}\n.list-group-item {\n    position: relative;\n    display: block;\n    padding: 10px 15px;\n    margin-bottom: -1px;\n    background-color: #fff;\n    border: 1px solid #ddd;\n}\n.list-group-item:first-child {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.list-group-item:last-child {\n    margin-bottom: 0;\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.list-group-item>.badge {\n    float: right;\n}\n.list-group-item>.badge+.badge {\n    margin-right: 5px;\n}\na.list-group-item {\n    color: #555;\n}\na.list-group-item .list-group-item-heading {\n    color: #333;\n}\na.list-group-item:hover, a.list-group-item:focus {\n    text-decoration: none;\n    background-color: #f5f5f5;\n}\na.list-group-item.active, a.list-group-item.active:hover, a.list-group-item.active:focus {\n    z-index: 2;\n    color: #fff;\n    background-color: #008cba;\n    border-color: #008cba;\n}\na.list-group-item.active .list-group-item-heading, a.list-group-item.active:hover .list-group-item-heading, a.list-group-item.active:focus .list-group-item-heading {\n    color: inherit;\n}\na.list-group-item.active .list-group-item-text, a.list-group-item.active:hover .list-group-item-text, a.list-group-item.active:focus .list-group-item-text {\n    color: #87e1ff;\n}\n.list-group-item-heading {\n    margin-top: 0;\n    margin-bottom: 5px;\n}\n.list-group-item-text {\n    margin-bottom: 0;\n    line-height: 1.3;\n}\n.panel {\n    margin-bottom: 21px;\n    background-color: #fff;\n    border: 1px solid transparent;\n    border-radius: 0;\n    -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);\n    box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);\n}\n.panel-body {\n    padding: 15px;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel>.list-group {\n    margin-bottom: 0;\n}\n.panel>.list-group .list-group-item {\n    border-width: 1px 0;\n}\n.panel>.list-group .list-group-item:first-child {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.panel>.list-group .list-group-item:last-child {\n    border-bottom: 0;\n}\n.panel-heading+.list-group .list-group-item:first-child {\n    border-top-width: 0;\n}\n.panel>.table, .panel>.table-responsive>.table {\n    margin-bottom: 0;\n}\n.panel>.panel-body+.table, .panel>.panel-body+.table-responsive {\n    border-top: 1px solid #ddd;\n}\n.panel>.table>tbody:first-child th, .panel>.table>tbody:first-child td {\n    border-top: 0;\n}\n.panel>.table-bordered, .panel>.table-responsive>.table-bordered {\n    border: 0;\n}\n.panel>.table-bordered>thead>tr>th:first-child, .panel>.table-responsive>.table-bordered>thead>tr>th:first-child, .panel>.table-bordered>tbody>tr>th:first-child, .panel>.table-responsive>.table-bordered>tbody>tr>th:first-child, .panel>.table-bordered>tfoot>tr>th:first-child, .panel>.table-responsive>.table-bordered>tfoot>tr>th:first-child, .panel>.table-bordered>thead>tr>td:first-child, .panel>.table-responsive>.table-bordered>thead>tr>td:first-child, .panel>.table-bordered>tbody>tr>td:first-child, .panel>.table-responsive>.table-bordered>tbody>tr>td:first-child, .panel>.table-bordered>tfoot>tr>td:first-child, .panel>.table-responsive>.table-bordered>tfoot>tr>td:first-child {\n    border-left: 0;\n}\n.panel>.table-bordered>thead>tr>th:last-child, .panel>.table-responsive>.table-bordered>thead>tr>th:last-child, .panel>.table-bordered>tbody>tr>th:last-child, .panel>.table-responsive>.table-bordered>tbody>tr>th:last-child, .panel>.table-bordered>tfoot>tr>th:last-child, .panel>.table-responsive>.table-bordered>tfoot>tr>th:last-child, .panel>.table-bordered>thead>tr>td:last-child, .panel>.table-responsive>.table-bordered>thead>tr>td:last-child, .panel>.table-bordered>tbody>tr>td:last-child, .panel>.table-responsive>.table-bordered>tbody>tr>td:last-child, .panel>.table-bordered>tfoot>tr>td:last-child, .panel>.table-responsive>.table-bordered>tfoot>tr>td:last-child {\n    border-right: 0;\n}\n.panel>.table-bordered>thead>tr:last-child>th, .panel>.table-responsive>.table-bordered>thead>tr:last-child>th, .panel>.table-bordered>tbody>tr:last-child>th, .panel>.table-responsive>.table-bordered>tbody>tr:last-child>th, .panel>.table-bordered>tfoot>tr:last-child>th, .panel>.table-responsive>.table-bordered>tfoot>tr:last-child>th, .panel>.table-bordered>thead>tr:last-child>td, .panel>.table-responsive>.table-bordered>thead>tr:last-child>td, .panel>.table-bordered>tbody>tr:last-child>td, .panel>.table-responsive>.table-bordered>tbody>tr:last-child>td, .panel>.table-bordered>tfoot>tr:last-child>td, .panel>.table-responsive>.table-bordered>tfoot>tr:last-child>td {\n    border-bottom: 0;\n}\n.panel>.table-responsive {\n    margin-bottom: 0;\n    border: 0;\n}\n.panel-heading {\n    padding: 10px 15px;\n    border-bottom: 1px solid transparent;\n    border-top-right-radius: -1;\n    border-top-left-radius: -1;\n}\n.panel-heading>.dropdown .dropdown-toggle {\n    color: inherit;\n}\n.panel-title {\n    margin-top: 0;\n    margin-bottom: 0;\n    font-size: 17px;\n    color: inherit;\n}\n.panel-title>a {\n    color: inherit;\n}\n.panel-footer {\n    padding: 10px 15px;\n    background-color: #f5f5f5;\n    border-top: 1px solid #ddd;\n    border-bottom-right-radius: -1;\n    border-bottom-left-radius: -1;\n}\n.panel-group .panel {\n    margin-bottom: 0;\n    overflow: hidden;\n    border-radius: 0;\n}\n.panel-group .panel+.panel {\n    margin-top: 5px;\n}\n.panel-group .panel-heading {\n    border-bottom: 0;\n}\n.panel-group .panel-heading+.panel-collapse .panel-body {\n    border-top: 1px solid #ddd;\n}\n.panel-group .panel-footer {\n    border-top: 0;\n}\n.panel-group .panel-footer+.panel-collapse .panel-body {\n    border-bottom: 1px solid #ddd;\n}\n.panel-default {\n    border-color: #ddd;\n}\n.panel-default>.panel-heading {\n    color: #333;\n    background-color: #f5f5f5;\n    border-color: #ddd;\n}\n.panel-default>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #ddd;\n}\n.panel-default>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #ddd;\n}\n.panel-primary {\n    border-color: #008cba;\n}\n.panel-primary>.panel-heading {\n    color: #fff;\n    background-color: #008cba;\n    border-color: #008cba;\n}\n.panel-primary>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #008cba;\n}\n.panel-primary>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #008cba;\n}\n.panel-success {\n    border-color: #3c9a5f;\n}\n.panel-success>.panel-heading {\n    color: #43ac6a;\n    background-color: #dff0d8;\n    border-color: #3c9a5f;\n}\n.panel-success>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #3c9a5f;\n}\n.panel-success>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #3c9a5f;\n}\n.panel-warning {\n    border-color: #d08002;\n}\n.panel-warning>.panel-heading {\n    color: #e99002;\n    background-color: #fcf8e3;\n    border-color: #d08002;\n}\n.panel-warning>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #d08002;\n}\n.panel-warning>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #d08002;\n}\n.panel-danger {\n    border-color: #ea2f10;\n}\n.panel-danger>.panel-heading {\n    color: #f04124;\n    background-color: #f2dede;\n    border-color: #ea2f10;\n}\n.panel-danger>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #ea2f10;\n}\n.panel-danger>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #ea2f10;\n}\n.panel-info {\n    border-color: #3db5d8;\n}\n.panel-info>.panel-heading {\n    color: #5bc0de;\n    background-color: #d9edf7;\n    border-color: #3db5d8;\n}\n.panel-info>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #3db5d8;\n}\n.panel-info>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #3db5d8;\n}\n.well {\n    min-height: 20px;\n    padding: 19px;\n    margin-bottom: 20px;\n    background-color: #fafafa;\n    border: 1px solid #e8e8e8;\n    border-radius: 0;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05);\n}\n.well blockquote {\n    border-color: #ddd;\n    border-color: rgba(0, 0, 0, 0.15);\n}\n.well-lg {\n    padding: 24px;\n    border-radius: 0;\n}\n.well-sm {\n    padding: 9px;\n    border-radius: 0;\n}\n.close {\n    float: right;\n    font-size: 22.5px;\n    font-weight: bold;\n    line-height: 1;\n    color: #000;\n    text-shadow: 0 1px 0 #fff;\n    opacity: .2;\n    filter: alpha(opacity=20);\n}\n.close:hover, .close:focus {\n    color: #000;\n    text-decoration: none;\n    cursor: pointer;\n    opacity: .5;\n    filter: alpha(opacity=50);\n}\nbutton.close {\n    padding: 0;\n    cursor: pointer;\n    background: transparent;\n    border: 0;\n    -webkit-appearance: none;\n}\n.modal-open {\n    overflow: hidden;\n}\n.modal {\n    position: fixed;\n    top: 0;\n    right: 0;\n    bottom: 0;\n    left: 0;\n    z-index: 1040;\n    display: none;\n    overflow: auto;\n    overflow-y: scroll;\n}\n.modal.fade .modal-dialog {\n    -webkit-transform: translate(0, -25%);\n    -ms-transform: translate(0, -25%);\n    transform: translate(0, -25%);\n    -webkit-transition: -webkit-transform .3s ease-out;\n    -moz-transition: -moz-transform .3s ease-out;\n    -o-transition: -o-transform .3s ease-out;\n    transition: transform .3s ease-out;\n}\n.modal.in .modal-dialog {\n    -webkit-transform: translate(0, 0);\n    -ms-transform: translate(0, 0);\n    transform: translate(0, 0);\n}\n.modal-dialog {\n    position: relative;\n    z-index: 1050;\n    width: auto;\n    margin: 10px;\n}\n.modal-content {\n    position: relative;\n    background-color: #fff;\n    border: 1px solid #999;\n    border: 1px solid rgba(0, 0, 0, 0.2);\n    border-radius: 0;\n    outline: 0;\n    -webkit-box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5);\n    box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5);\n    background-clip: padding-box;\n}\n.modal-backdrop {\n    position: fixed;\n    top: 0;\n    right: 0;\n    bottom: 0;\n    left: 0;\n    z-index: 1030;\n    background-color: #000;\n}\n.modal-backdrop.fade {\n    opacity: 0;\n    filter: alpha(opacity=0);\n}\n.modal-backdrop.in {\n    opacity: .5;\n    filter: alpha(opacity=50);\n}\n.modal-header {\n    min-height: 16.428571429px;\n    padding: 15px;\n    border-bottom: 1px solid #e5e5e5;\n}\n.modal-header .close {\n    margin-top: -2px;\n}\n.modal-title {\n    margin: 0;\n    line-height: 1.428571429;\n}\n.modal-body {\n    position: relative;\n    padding: 20px;\n}\n.modal-footer {\n    padding: 19px 20px 20px;\n    margin-top: 15px;\n    text-align: right;\n    border-top: 1px solid #e5e5e5;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer .btn+.btn {\n    margin-bottom: 0;\n    margin-left: 5px;\n}\n.modal-footer .btn-group .btn+.btn {\n    margin-left: -1px;\n}\n.modal-footer .btn-block+.btn-block {\n    margin-left: 0;\n}\n@media screen and (min-width:768px) {\n    .modal-dialog {\n    width: 600px;\n    margin: 30px auto;\n}\n.modal-content {\n    -webkit-box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5);\n    box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5);\n}\n}.tooltip {\n    position: absolute;\n    z-index: 1030;\n    display: block;\n    font-size: 12px;\n    line-height: 1.4;\n    opacity: 0;\n    filter: alpha(opacity=0);\n    visibility: visible;\n}\n.tooltip.in {\n    opacity: .9;\n    filter: alpha(opacity=90);\n}\n.tooltip.top {\n    padding: 5px 0;\n    margin-top: -3px;\n}\n.tooltip.right {\n    padding: 0 5px;\n    margin-left: 3px;\n}\n.tooltip.bottom {\n    padding: 5px 0;\n    margin-top: 3px;\n}\n.tooltip.left {\n    padding: 0 5px;\n    margin-left: -3px;\n}\n.tooltip-inner {\n    max-width: 200px;\n    padding: 3px 8px;\n    color: #fff;\n    text-align: center;\n    text-decoration: none;\n    background-color: #333;\n    border-radius: 0;\n}\n.tooltip-arrow {\n    position: absolute;\n    width: 0;\n    height: 0;\n    border-color: transparent;\n    border-style: solid;\n}\n.tooltip.top .tooltip-arrow {\n    bottom: 0;\n    left: 50%;\n    margin-left: -5px;\n    border-top-color: #333;\n    border-width: 5px 5px 0;\n}\n.tooltip.top-left .tooltip-arrow {\n    bottom: 0;\n    left: 5px;\n    border-top-color: #333;\n    border-width: 5px 5px 0;\n}\n.tooltip.top-right .tooltip-arrow {\n    right: 5px;\n    bottom: 0;\n    border-top-color: #333;\n    border-width: 5px 5px 0;\n}\n.tooltip.right .tooltip-arrow {\n    top: 50%;\n    left: 0;\n    margin-top: -5px;\n    border-right-color: #333;\n    border-width: 5px 5px 5px 0;\n}\n.tooltip.left .tooltip-arrow {\n    top: 50%;\n    right: 0;\n    margin-top: -5px;\n    border-left-color: #333;\n    border-width: 5px 0 5px 5px;\n}\n.tooltip.bottom .tooltip-arrow {\n    top: 0;\n    left: 50%;\n    margin-left: -5px;\n    border-bottom-color: #333;\n    border-width: 0 5px 5px;\n}\n.tooltip.bottom-left .tooltip-arrow {\n    top: 0;\n    left: 5px;\n    border-bottom-color: #333;\n    border-width: 0 5px 5px;\n}\n.tooltip.bottom-right .tooltip-arrow {\n    top: 0;\n    right: 5px;\n    border-bottom-color: #333;\n    border-width: 0 5px 5px;\n}\n.popover {\n    position: absolute;\n    top: 0;\n    left: 0;\n    z-index: 1010;\n    display: none;\n    max-width: 276px;\n    padding: 1px;\n    text-align: left;\n    white-space: normal;\n    background-color: #333;\n    border: 1px solid #333;\n    border: 1px solid transparent;\n    border-radius: 0;\n    -webkit-box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2);\n    box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2);\n    background-clip: padding-box;\n}\n.popover.top {\n    margin-top: -10px;\n}\n.popover.right {\n    margin-left: 10px;\n}\n.popover.bottom {\n    margin-top: 10px;\n}\n.popover.left {\n    margin-left: -10px;\n}\n.popover-title {\n    padding: 8px 14px;\n    margin: 0;\n    font-size: 15px;\n    font-weight: normal;\n    line-height: 18px;\n    background-color: #333;\n    border-bottom: 1px solid #262626;\n    border-radius: 5px 5px 0 0;\n}\n.popover-content {\n    padding: 9px 14px;\n}\n.popover .arrow, .popover .arrow:after {\n    position: absolute;\n    display: block;\n    width: 0;\n    height: 0;\n    border-color: transparent;\n    border-style: solid;\n}\n.popover .arrow {\n    border-width: 11px;\n}\n.popover .arrow:after {\n    border-width: 10px;\n    content: \"\"}\n.popover.top .arrow {\n    bottom: -11px;\n    left: 50%;\n    margin-left: -11px;\n    border-top-color: #999;\n    border-top-color: rgba(0, 0, 0, 0.25);\n    border-bottom-width: 0;\n}\n.popover.top .arrow:after {\n    bottom: 1px;\n    margin-left: -10px;\n    border-top-color: #333;\n    border-bottom-width: 0;\n    content: \" \"}\n.popover.right .arrow {\n    top: 50%;\n    left: -11px;\n    margin-top: -11px;\n    border-right-color: #999;\n    border-right-color: rgba(0, 0, 0, 0.25);\n    border-left-width: 0;\n}\n.popover.right .arrow:after {\n    bottom: -10px;\n    left: 1px;\n    border-right-color: #333;\n    border-left-width: 0;\n    content: \" \"}\n.popover.bottom .arrow {\n    top: -11px;\n    left: 50%;\n    margin-left: -11px;\n    border-bottom-color: #999;\n    border-bottom-color: rgba(0, 0, 0, 0.25);\n    border-top-width: 0;\n}\n.popover.bottom .arrow:after {\n    top: 1px;\n    margin-left: -10px;\n    border-bottom-color: #333;\n    border-top-width: 0;\n    content: \" \"}\n.popover.left .arrow {\n    top: 50%;\n    right: -11px;\n    margin-top: -11px;\n    border-left-color: #999;\n    border-left-color: rgba(0, 0, 0, 0.25);\n    border-right-width: 0;\n}\n.popover.left .arrow:after {\n    right: 1px;\n    bottom: -10px;\n    border-left-color: #333;\n    border-right-width: 0;\n    content: \" \"}\n.carousel {\n    position: relative;\n}\n.carousel-inner {\n    position: relative;\n    width: 100%;\n    overflow: hidden;\n}\n.carousel-inner>.item {\n    position: relative;\n    display: none;\n    -webkit-transition: .6s ease-in-out left;\n    transition: .6s ease-in-out left;\n}\n.carousel-inner>.item>img, .carousel-inner>.item>a>img {\n    display: block;\n    height: auto;\n    max-width: 100%;\n    line-height: 1;\n}\n.carousel-inner>.active, .carousel-inner>.next, .carousel-inner>.prev {\n    display: block;\n}\n.carousel-inner>.active {\n    left: 0;\n}\n.carousel-inner>.next, .carousel-inner>.prev {\n    position: absolute;\n    top: 0;\n    width: 100%}\n.carousel-inner>.next {\n    left: 100%}\n.carousel-inner>.prev {\n    left: -100%}\n.carousel-inner>.next.left, .carousel-inner>.prev.right {\n    left: 0;\n}\n.carousel-inner>.active.left {\n    left: -100%}\n.carousel-inner>.active.right {\n    left: 100%}\n.carousel-control {\n    position: absolute;\n    top: 0;\n    bottom: 0;\n    left: 0;\n    width: 15%;\n    font-size: 20px;\n    color: #fff;\n    text-align: center;\n    text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);\n    opacity: .5;\n    filter: alpha(opacity=50);\n}\n.carousel-control.left {\n    background-image: -webkit-linear-gradient(left, color-stop(rgba(0, 0, 0, 0.5) 0), color-stop(rgba(0, 0, 0, 0.0001) 100%));\n    background-image: linear-gradient(to right, rgba(0, 0, 0, 0.5) 0, rgba(0, 0, 0, 0.0001) 100%);\n    background-repeat: repeat-x;\n    filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1);\n}\n.carousel-control.right {\n    right: 0;\n    left: auto;\n    background-image: -webkit-linear-gradient(left, color-stop(rgba(0, 0, 0, 0.0001) 0), color-stop(rgba(0, 0, 0, 0.5) 100%));\n    background-image: linear-gradient(to right, rgba(0, 0, 0, 0.0001) 0, rgba(0, 0, 0, 0.5) 100%);\n    background-repeat: repeat-x;\n    filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1);\n}\n.carousel-control:hover, .carousel-control:focus {\n    color: #fff;\n    text-decoration: none;\n    outline: 0;\n    opacity: .9;\n    filter: alpha(opacity=90);\n}\n.carousel-control .icon-prev, .carousel-control .icon-next, .carousel-control .glyphicon-chevron-left, .carousel-control .glyphicon-chevron-right {\n    position: absolute;\n    top: 50%;\n    z-index: 5;\n    display: inline-block;\n}\n.carousel-control .icon-prev, .carousel-control .glyphicon-chevron-left {\n    left: 50%}\n.carousel-control .icon-next, .carousel-control .glyphicon-chevron-right {\n    right: 50%}\n.carousel-control .icon-prev, .carousel-control .icon-next {\n    width: 20px;\n    height: 20px;\n    margin-top: -10px;\n    margin-left: -10px;\n    font-family: serif;\n}\n.carousel-control .icon-prev:before {\n    content: '\\2039'}\n.carousel-control .icon-next:before {\n    content: '\\203a'}\n.carousel-indicators {\n    position: absolute;\n    bottom: 10px;\n    left: 50%;\n    z-index: 15;\n    width: 60%;\n    padding-left: 0;\n    margin-left: -30%;\n    text-align: center;\n    list-style: none;\n}\n.carousel-indicators li {\n    display: inline-block;\n    width: 10px;\n    height: 10px;\n    margin: 1px;\n    text-indent: -999px;\n    cursor: pointer;\n    background-color: #000 \\9;\n    background-color: rgba(0, 0, 0, 0);\n    border: 1px solid #fff;\n    border-radius: 10px;\n}\n.carousel-indicators .active {\n    width: 12px;\n    height: 12px;\n    margin: 0;\n    background-color: #fff;\n}\n.carousel-caption {\n    position: absolute;\n    right: 15%;\n    bottom: 20px;\n    left: 15%;\n    z-index: 10;\n    padding-top: 20px;\n    padding-bottom: 20px;\n    color: #fff;\n    text-align: center;\n    text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);\n}\n.carousel-caption .btn {\n    text-shadow: none;\n}\n@media screen and (min-width:768px) {\n    .carousel-control .glyphicons-chevron-left, .carousel-control .glyphicons-chevron-right, .carousel-control .icon-prev, .carousel-control .icon-next {\n    width: 30px;\n    height: 30px;\n    margin-top: -15px;\n    margin-left: -15px;\n    font-size: 30px;\n}\n.carousel-caption {\n    right: 20%;\n    left: 20%;\n    padding-bottom: 30px;\n}\n.carousel-indicators {\n    bottom: 20px;\n}\n}.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.center-block {\n    display: block;\n    margin-right: auto;\n    margin-left: auto;\n}\n.pull-right {\n    float: right!important;\n}\n.pull-left {\n    float: left!important;\n}\n.hide {\n    display: none!important;\n}\n.show {\n    display: block!important;\n}\n.invisible {\n    visibility: hidden;\n}\n.text-hide {\n    font: 0/0 a;\n    color: transparent;\n    text-shadow: none;\n    background-color: transparent;\n    border: 0;\n}\n.hidden {\n    display: none!important;\n    visibility: hidden!important;\n}\n.affix {\n    position: fixed;\n}\n@-ms-viewport {\n    width: device-width;\n}\n.visible-xs, tr.visible-xs, th.visible-xs, td.visible-xs {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-xs {\n    display: block!important;\n}\ntable.visible-xs {\n    display: table;\n}\ntr.visible-xs {\n    display: table-row!important;\n}\nth.visible-xs, td.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-xs.visible-sm {\n    display: block!important;\n}\ntable.visible-xs.visible-sm {\n    display: table;\n}\ntr.visible-xs.visible-sm {\n    display: table-row!important;\n}\nth.visible-xs.visible-sm, td.visible-xs.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-xs.visible-md {\n    display: block!important;\n}\ntable.visible-xs.visible-md {\n    display: table;\n}\ntr.visible-xs.visible-md {\n    display: table-row!important;\n}\nth.visible-xs.visible-md, td.visible-xs.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-xs.visible-lg {\n    display: block!important;\n}\ntable.visible-xs.visible-lg {\n    display: table;\n}\ntr.visible-xs.visible-lg {\n    display: table-row!important;\n}\nth.visible-xs.visible-lg, td.visible-xs.visible-lg {\n    display: table-cell!important;\n}\n}.visible-sm, tr.visible-sm, th.visible-sm, td.visible-sm {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-sm.visible-xs {\n    display: block!important;\n}\ntable.visible-sm.visible-xs {\n    display: table;\n}\ntr.visible-sm.visible-xs {\n    display: table-row!important;\n}\nth.visible-sm.visible-xs, td.visible-sm.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-sm {\n    display: block!important;\n}\ntable.visible-sm {\n    display: table;\n}\ntr.visible-sm {\n    display: table-row!important;\n}\nth.visible-sm, td.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-sm.visible-md {\n    display: block!important;\n}\ntable.visible-sm.visible-md {\n    display: table;\n}\ntr.visible-sm.visible-md {\n    display: table-row!important;\n}\nth.visible-sm.visible-md, td.visible-sm.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-sm.visible-lg {\n    display: block!important;\n}\ntable.visible-sm.visible-lg {\n    display: table;\n}\ntr.visible-sm.visible-lg {\n    display: table-row!important;\n}\nth.visible-sm.visible-lg, td.visible-sm.visible-lg {\n    display: table-cell!important;\n}\n}.visible-md, tr.visible-md, th.visible-md, td.visible-md {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-md.visible-xs {\n    display: block!important;\n}\ntable.visible-md.visible-xs {\n    display: table;\n}\ntr.visible-md.visible-xs {\n    display: table-row!important;\n}\nth.visible-md.visible-xs, td.visible-md.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-md.visible-sm {\n    display: block!important;\n}\ntable.visible-md.visible-sm {\n    display: table;\n}\ntr.visible-md.visible-sm {\n    display: table-row!important;\n}\nth.visible-md.visible-sm, td.visible-md.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-md {\n    display: block!important;\n}\ntable.visible-md {\n    display: table;\n}\ntr.visible-md {\n    display: table-row!important;\n}\nth.visible-md, td.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-md.visible-lg {\n    display: block!important;\n}\ntable.visible-md.visible-lg {\n    display: table;\n}\ntr.visible-md.visible-lg {\n    display: table-row!important;\n}\nth.visible-md.visible-lg, td.visible-md.visible-lg {\n    display: table-cell!important;\n}\n}.visible-lg, tr.visible-lg, th.visible-lg, td.visible-lg {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-lg.visible-xs {\n    display: block!important;\n}\ntable.visible-lg.visible-xs {\n    display: table;\n}\ntr.visible-lg.visible-xs {\n    display: table-row!important;\n}\nth.visible-lg.visible-xs, td.visible-lg.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-lg.visible-sm {\n    display: block!important;\n}\ntable.visible-lg.visible-sm {\n    display: table;\n}\ntr.visible-lg.visible-sm {\n    display: table-row!important;\n}\nth.visible-lg.visible-sm, td.visible-lg.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-lg.visible-md {\n    display: block!important;\n}\ntable.visible-lg.visible-md {\n    display: table;\n}\ntr.visible-lg.visible-md {\n    display: table-row!important;\n}\nth.visible-lg.visible-md, td.visible-lg.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-lg {\n    display: block!important;\n}\ntable.visible-lg {\n    display: table;\n}\ntr.visible-lg {\n    display: table-row!important;\n}\nth.visible-lg, td.visible-lg {\n    display: table-cell!important;\n}\n}.hidden-xs {\n    display: block!important;\n}\ntable.hidden-xs {\n    display: table;\n}\ntr.hidden-xs {\n    display: table-row!important;\n}\nth.hidden-xs, td.hidden-xs {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-xs, tr.hidden-xs, th.hidden-xs, td.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-xs.hidden-sm, tr.hidden-xs.hidden-sm, th.hidden-xs.hidden-sm, td.hidden-xs.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-xs.hidden-md, tr.hidden-xs.hidden-md, th.hidden-xs.hidden-md, td.hidden-xs.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-xs.hidden-lg, tr.hidden-xs.hidden-lg, th.hidden-xs.hidden-lg, td.hidden-xs.hidden-lg {\n    display: none!important;\n}\n}.hidden-sm {\n    display: block!important;\n}\ntable.hidden-sm {\n    display: table;\n}\ntr.hidden-sm {\n    display: table-row!important;\n}\nth.hidden-sm, td.hidden-sm {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-sm.hidden-xs, tr.hidden-sm.hidden-xs, th.hidden-sm.hidden-xs, td.hidden-sm.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-sm, tr.hidden-sm, th.hidden-sm, td.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-sm.hidden-md, tr.hidden-sm.hidden-md, th.hidden-sm.hidden-md, td.hidden-sm.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-sm.hidden-lg, tr.hidden-sm.hidden-lg, th.hidden-sm.hidden-lg, td.hidden-sm.hidden-lg {\n    display: none!important;\n}\n}.hidden-md {\n    display: block!important;\n}\ntable.hidden-md {\n    display: table;\n}\ntr.hidden-md {\n    display: table-row!important;\n}\nth.hidden-md, td.hidden-md {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-md.hidden-xs, tr.hidden-md.hidden-xs, th.hidden-md.hidden-xs, td.hidden-md.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-md.hidden-sm, tr.hidden-md.hidden-sm, th.hidden-md.hidden-sm, td.hidden-md.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-md, tr.hidden-md, th.hidden-md, td.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-md.hidden-lg, tr.hidden-md.hidden-lg, th.hidden-md.hidden-lg, td.hidden-md.hidden-lg {\n    display: none!important;\n}\n}.hidden-lg {\n    display: block!important;\n}\ntable.hidden-lg {\n    display: table;\n}\ntr.hidden-lg {\n    display: table-row!important;\n}\nth.hidden-lg, td.hidden-lg {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-lg.hidden-xs, tr.hidden-lg.hidden-xs, th.hidden-lg.hidden-xs, td.hidden-lg.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-lg.hidden-sm, tr.hidden-lg.hidden-sm, th.hidden-lg.hidden-sm, td.hidden-lg.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-lg.hidden-md, tr.hidden-lg.hidden-md, th.hidden-lg.hidden-md, td.hidden-lg.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-lg, tr.hidden-lg, th.hidden-lg, td.hidden-lg {\n    display: none!important;\n}\n}.visible-print, tr.visible-print, th.visible-print, td.visible-print {\n    display: none!important;\n}\n@media print {\n    .visible-print {\n    display: block!important;\n}\ntable.visible-print {\n    display: table;\n}\ntr.visible-print {\n    display: table-row!important;\n}\nth.visible-print, td.visible-print {\n    display: table-cell!important;\n}\n.hidden-print, tr.hidden-print, th.hidden-print, td.hidden-print {\n    display: none!important;\n}\n}.navbar {\n    font-size: 13px;\n    font-weight: 300;\n    border: 0;\n}\n.navbar .navbar-toggle:hover .icon-bar {\n    background-color: #b3b3b3;\n}\n.navbar-collapse {\n    border-top-color: rgba(0, 0, 0, 0.2);\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.navbar .dropdown-menu {\n    border: 0;\n}\n.navbar .dropdown-menu>li>a, .navbar .dropdown-menu>li>a:focus {\n    font-size: 13px;\n    font-weight: 300;\n    background-color: transparent;\n}\n.navbar .dropdown-header {\n    color: rgba(255, 255, 255, 0.5);\n}\n.navbar-default .dropdown-menu {\n    background-color: #333;\n}\n.navbar-default .dropdown-menu>li>a, .navbar-default .dropdown-menu>li>a:focus {\n    color: #fff;\n}\n.navbar-default .dropdown-menu>li>a:hover, .navbar-default .dropdown-menu>.active>a, .navbar-default .dropdown-menu>.active>a:hover {\n    background-color: #272727;\n}\n.navbar-inverse .dropdown-menu {\n    background-color: #008cba;\n}\n.navbar-inverse .dropdown-menu>li>a, .navbar-inverse .dropdown-menu>li>a:focus {\n    color: #fff;\n}\n.navbar-inverse .dropdown-menu>li>a:hover, .navbar-inverse .dropdown-menu>.active>a, .navbar-inverse .dropdown-menu>.active>a:hover {\n    background-color: #006687;\n}\n.btn {\n    padding: 14px 28px;\n}\n.btn-lg {\n    padding: 16px 32px;\n}\n.btn-sm {\n    padding: 8px 16px;\n}\n.btn-xs {\n    padding: 4px 8px;\n}\n.btn-group .btn~.dropdown-toggle {\n    padding-right: 16px;\n    padding-left: 16px;\n}\n.btn-group .dropdown-menu {\n    border-top-width: 0;\n}\n.btn-group.dropup .dropdown-menu {\n    margin-bottom: 0;\n    border-top-width: 1px;\n    border-bottom-width: 0;\n}\n.btn-group .dropdown-toggle.btn-default~.dropdown-menu {\n    background-color: #e7e7e7;\n    border-color: #dadada;\n}\n.btn-group .dropdown-toggle.btn-default~.dropdown-menu>li>a {\n    color: #333;\n}\n.btn-group .dropdown-toggle.btn-default~.dropdown-menu>li>a:hover {\n    background-color: #d3d3d3;\n}\n.btn-group .dropdown-toggle.btn-primary~.dropdown-menu {\n    background-color: #008cba;\n    border-color: #0079a1;\n}\n.btn-group .dropdown-toggle.btn-primary~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-primary~.dropdown-menu>li>a:hover {\n    background-color: #006d91;\n}\n.btn-group .dropdown-toggle.btn-success~.dropdown-menu {\n    background-color: #43ac6a;\n    border-color: #3c9a5f;\n}\n.btn-group .dropdown-toggle.btn-success~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-success~.dropdown-menu>li>a:hover {\n    background-color: #388f58;\n}\n.btn-group .dropdown-toggle.btn-info~.dropdown-menu {\n    background-color: #5bc0de;\n    border-color: #46b8da;\n}\n.btn-group .dropdown-toggle.btn-info~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-info~.dropdown-menu>li>a:hover {\n    background-color: #39b3d7;\n}\n.btn-group .dropdown-toggle.btn-warning~.dropdown-menu {\n    background-color: #e99002;\n    border-color: #d08002;\n}\n.btn-group .dropdown-toggle.btn-warning~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-warning~.dropdown-menu>li>a:hover {\n    background-color: #c17702;\n}\n.btn-group .dropdown-toggle.btn-danger~.dropdown-menu {\n    background-color: #f04124;\n    border-color: #ea2f10;\n}\n.btn-group .dropdown-toggle.btn-danger~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-danger~.dropdown-menu>li>a:hover {\n    background-color: #dc2c0f;\n}\n.lead {\n    color: #6f6f6f;\n}\ncite {\n    font-style: italic;\n}\nblockquote {\n    color: #6f6f6f;\n    border-left-width: 1px;\n}\nblockquote.pull-right {\n    border-right-width: 1px;\n}\nblockquote small {\n    font-size: 12px;\n    font-weight: 300;\n}\ntable {\n    font-size: 12px;\n}\ninput, .form-control {\n    padding: 7px;\n    font-size: 12px;\n}\nlabel, .control-label, .help-block, .checkbox, .radio {\n    font-size: 12px;\n    font-weight: normal;\n}\n.form-group .btn, .input-group-addon, .input-group-btn .btn {\n    padding: 8px 14px;\n    font-size: 12px;\n}\n.nav .open>a, .nav .open>a:hover, .nav .open>a:focus {\n    border-color: transparent;\n}\n.nav-tabs>li>a {\n    color: #222;\n    background-color: #e7e7e7;\n}\n.nav-tabs .caret {\n    border-top-color: #222;\n    border-bottom-color: #222;\n}\n.nav-pills {\n    font-weight: 300;\n}\n.breadcrumb {\n    font-size: 10px;\n    font-weight: 300;\n    text-transform: uppercase;\n    border: 1px solid #ddd;\n    border-radius: 3px;\n}\n.pagination {\n    font-size: 12px;\n    font-weight: 300;\n    color: #999;\n}\n.pagination>li>a, .pagination>li>span {\n    margin-left: 4px;\n    color: #999;\n}\n.pagination>.active>a, .pagination>.active>span {\n    color: #fff;\n}\n.pagination>li>a, .pagination>li:first-child>a, .pagination>li:last-child>a, .pagination>li>span, .pagination>li:first-child>span, .pagination>li:last-child>span {\n    border-radius: 3px;\n}\n.pagination-lg>li>a {\n    padding-right: 22px;\n    padding-left: 22px;\n}\n.pagination-sm>li>a {\n    padding: 0 5px;\n}\n.pager {\n    font-size: 12px;\n    font-weight: 300;\n    color: #999;\n}\n.list-group {\n    font-size: 12px;\n    font-weight: 300;\n}\n.label {\n    padding-right: 1em;\n    padding-left: 1em;\n    font-weight: 300;\n    border-radius: 0;\n}\n.label-default {\n    color: #333;\n    background-color: #e7e7e7;\n}\n.badge {\n    font-weight: 300;\n}\n.progress {\n    height: 22px;\n    padding: 2px;\n    background-color: #f6f6f6;\n    border: 1px solid #ccc;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.dropdown-menu {\n    padding: 0;\n    margin-top: 0;\n    font-size: 12px;\n}\n.dropdown-menu>li>a {\n    padding: 12px 15px;\n}\n.dropdown-header {\n    padding-right: 15px;\n    padding-left: 15px;\n    font-size: 9px;\n    text-transform: uppercase;\n}\n.popover {\n    font-size: 12px;\n    font-weight: 300;\n    color: #fff;\n}\n.panel-heading, .panel-footer {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.center-block {\n    display: block;\n    margin-right: auto;\n    margin-left: auto;\n}\n.pull-right {\n    float: right!important;\n}\n.pull-left {\n    float: left!important;\n}\n.hide {\n    display: none!important;\n}\n.show {\n    display: block!important;\n}\n.invisible {\n    visibility: hidden;\n}\n.text-hide {\n    font: 0/0 a;\n    color: transparent;\n    text-shadow: none;\n    background-color: transparent;\n    border: 0;\n}\n.hidden {\n    display: none!important;\n    visibility: hidden!important;\n}\n.affix {\n    position: fixed;\n}\n"
  },
  {
    "path": "docs/cinder/css/cinder.css",
    "content": "/*\n  Cinder Theme for MkDocs | Copyright 2015 Christopher Simpkins | MIT License\n*/\n\nbody {\n    font-family:\"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-size: 16px;\n    line-height: 1.7;\n    background-color: #FFF;\n    color: #343838;\n}\nh1, h2, h3, h4, h5, h6 {\n    font-family:'Inter', 'Helvetica Neue', Helvetica, Arial, sans-serif;\n    color: #222;\n}\nh1 small, h2 small, h3 small, h4 small, h5 small, h6 small, .h1 small, .h2 small, .h3 small, .h4 small, .h5 small, .h6 small, h1 .small, h2 .small, h3 .small, h4 .small, h5 .small, h6 .small, .h1 .small, .h2 .small, .h3 .small, .h4 .small, .h5 .small, .h6 .small {\n    color: #B1B7B9;\n}\n\nh2 {\n    margin-top: 35px;\n}\n\nh1, h2 {\n    font-weight: 700;\n}\nh4 {\n    font-family: 'Inter', 'Helvetica Neue', Helvetica, Arial, sans-serif;\n    font-weight: 300;\n    margin-top: 20px;\n    font-style: italic;\n}\nh5 {\n    font-family: 'Inter', 'Helvetica Neue', Helvetica, Arial, sans-serif;\n    font-weight: 300;\n    font-variant: small-caps;\n}\npre, code {\n    background-color: #FCFDFF;\n}\npre>code {\n    font-size: 13px;\n}\npre {\n    margin-top: 25px;\n    margin-bottom: 25px;\n}\n.lead {\n    font-family:\"Inter\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-weight: 400;\n    line-height: 1.4;\n    letter-spacing: 0.0312em;\n    color: #B1B7B9;\n}\n.navbar-default {\n    background-color: #343838;\n    border-bottom: 8px #EBF2F2 solid;\n}\n.bs-sidenav {\n    background-image: url(\"../img/grid11.png\");\n    background-repeat: repeat;\n    font-family: Inter,\"Helvetica Neue\",Helvetica,Arial,sans-serif;\n    font-size: 13px;\n}\n.well {\n    background-color: #FCFDFF;\n}\n.btn-default {\n    background-color:#FCFDFF;\n}\n.table-striped > tbody > tr:nth-child(2n+1) > td, .table-striped > tbody > tr:nth-child(2n+1) > th {\n    background-color: #FCFDFF;\n}\n#mkdocs-search-query:focus {\n    outline: none;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n#mkdocs-search-query {\n    font-family:\"Inter\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-size: 20px;\n    font-weight: 700;\n    color: #343838;\n    height: 45px;\n}\nfooter > hr {\n    width: 35%;\n}\n"
  },
  {
    "path": "docs/cinder/css/highlight.css",
    "content": "/*\n\ngithub.com style (c) Vasily Polovnyov <vast@whiteants.net>\n\n*/\n\n.hljs {\n  display: block;\n  overflow-x: auto;\n  padding: 0.5em;\n  color: #333;\n  background: #FCFDFF;\n}\n\n.hljs-comment,\n.hljs-quote {\n  color: #998;\n  font-style: italic;\n}\n\n.hljs-keyword,\n.hljs-selector-tag,\n.hljs-subst {\n  color: #333;\n  font-weight: bold;\n}\n\n.hljs-number,\n.hljs-literal,\n.hljs-variable,\n.hljs-template-variable,\n.hljs-tag .hljs-attr {\n  color: #008080;\n}\n\n.hljs-string,\n.hljs-doctag {\n  color: #d14;\n}\n\n.hljs-title,\n.hljs-section,\n.hljs-selector-id {\n  color: #900;\n  font-weight: bold;\n}\n\n.hljs-subst {\n  font-weight: normal;\n}\n\n.hljs-type,\n.hljs-class .hljs-title {\n  color: #458;\n  font-weight: bold;\n}\n\n.hljs-tag,\n.hljs-name,\n.hljs-attribute {\n  color: #000080;\n  font-weight: normal;\n}\n\n.hljs-regexp,\n.hljs-link {\n  color: #009926;\n}\n\n.hljs-symbol,\n.hljs-bullet {\n  color: #990073;\n}\n\n.hljs-built_in,\n.hljs-builtin-name {\n  color: #0086b3;\n}\n\n.hljs-meta {\n  color: #999;\n  font-weight: bold;\n}\n\n.hljs-deletion {\n  background: #fdd;\n}\n\n.hljs-addition {\n  background: #dfd;\n}\n\n.hljs-emphasis {\n  font-style: italic;\n}\n\n.hljs-strong {\n  font-weight: bold;\n}\n"
  },
  {
    "path": "docs/cinder/js/base.js",
    "content": "function getSearchTerm() {\n    var sPageURL = window.location.search.substring(1);\n    var sURLVariables = sPageURL.split('&');\n    for (var i = 0; i < sURLVariables.length; i++) {\n        var sParameterName = sURLVariables[i].split('=');\n        if (sParameterName[0] == 'q') {\n            return sParameterName[1];\n        }\n    }\n}\n\n$(document).ready(function() {\n    /**\n     * ------------------------------------------------------------------------\n     * Taken from themes/mkdocs/js/base.js\n     * ------------------------------------------------------------------------\n     */\n    var search_term = getSearchTerm(),\n        $search_modal = $('#mkdocs_search_modal'),\n        $keyboard_modal = $('#mkdocs_keyboard_modal');\n\n    if (search_term) {\n        $search_modal.modal();\n    }\n\n    // make sure search input gets autofocus everytime modal opens.\n    $search_modal.on('shown.bs.modal', function() {\n        $search_modal.find('#mkdocs-search-query').focus();\n    });\n\n    // Close search modal when result is selected\n    // The links get added later so listen to parent\n    $('#mkdocs-search-results').click(function(e) {\n        if ($(e.target).is('a')) {\n            $search_modal.modal('hide');\n        }\n    });\n\n    if (typeof shortcuts !== 'undefined') {\n        // Populate keyboard modal with proper Keys\n        $keyboard_modal.find('.help.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.help];\n        $keyboard_modal.find('.prev.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.previous];\n        $keyboard_modal.find('.next.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.next];\n        $keyboard_modal.find('.search.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.search];\n\n        // Keyboard navigation\n        document.addEventListener(\"keydown\", function(e) {\n            if ($(e.target).is(':input')) return true;\n            var key = e.which || e.key || window.event && window.event.key;\n            var page;\n            switch (key) {\n                case shortcuts.next:\n                    page = $('.navbar a[rel=\"next\"]:first').prop('href');\n                    break;\n                case shortcuts.previous:\n                    page = $('.navbar a[rel=\"prev\"]:first').prop('href');\n                    break;\n                case shortcuts.search:\n                    e.preventDefault();\n                    $keyboard_modal.modal('hide');\n                    $search_modal.modal('show');\n                    $search_modal.find('#mkdocs-search-query').focus();\n                    break;\n                case shortcuts.help:\n                    $search_modal.modal('hide');\n                    $keyboard_modal.modal('show');\n                    break;\n                default:\n                    break;\n            }\n            if (page) {\n                $keyboard_modal.modal('hide');\n                window.location.href = page;\n            }\n        });\n    }\n\n    $('table').addClass('table table-striped table-hover');\n\n    // Improve the scrollspy behaviour when users click on a TOC item.\n    $(\".bs-sidenav a\").on(\"click\", function() {\n        var clicked = this;\n        setTimeout(function() {\n            var active = $('.nav li.active a');\n            active = active[active.length - 1];\n            if (clicked !== active) {\n                $(active).parent().removeClass(\"active\");\n                $(clicked).parent().addClass(\"active\");\n            }\n        }, 50);\n    });\n});\n\n\n/**\n * ------------------------------------------------------------------------\n * Taken from themes/mkdocs/js/base.js\n * ------------------------------------------------------------------------\n */\n\n$('body').scrollspy({\n    target: '.bs-sidebar',\n    offset: 100\n});\n\n/* Prevent disabled links from causing a page reload */\n$(\"li.disabled a\").click(function() {\n    event.preventDefault();\n});\n\n// See https://www.cambiaresearch.com/articles/15/javascript-char-codes-key-codes\n// We only list common keys below. Obscure keys are omited and their use is discouraged.\nvar keyCodes = {\n    8: 'backspace',\n    9: 'tab',\n    13: 'enter',\n    16: 'shift',\n    17: 'ctrl',\n    18: 'alt',\n    19: 'pause/break',\n    20: 'caps lock',\n    27: 'escape',\n    32: 'spacebar',\n    33: 'page up',\n    34: 'page down',\n    35: 'end',\n    36: 'home',\n    37: '&larr;',\n    38: '&uarr;',\n    39: '&rarr;',\n    40: '&darr;',\n    45: 'insert',\n    46: 'delete',\n    48: '0',\n    49: '1',\n    50: '2',\n    51: '3',\n    52: '4',\n    53: '5',\n    54: '6',\n    55: '7',\n    56: '8',\n    57: '9',\n    65: 'a',\n    66: 'b',\n    67: 'c',\n    68: 'd',\n    69: 'e',\n    70: 'f',\n    71: 'g',\n    72: 'h',\n    73: 'i',\n    74: 'j',\n    75: 'k',\n    76: 'l',\n    77: 'm',\n    78: 'n',\n    79: 'o',\n    80: 'p',\n    81: 'q',\n    82: 'r',\n    83: 's',\n    84: 't',\n    85: 'u',\n    86: 'v',\n    87: 'w',\n    88: 'x',\n    89: 'y',\n    90: 'z',\n    91: 'Left Windows Key / Left ⌘',\n    92: 'Right Windows Key',\n    93: 'Windows Menu / Right ⌘',\n    96: 'numpad 0',\n    97: 'numpad 1',\n    98: 'numpad 2',\n    99: 'numpad 3',\n    100: 'numpad 4',\n    101: 'numpad 5',\n    102: 'numpad 6',\n    103: 'numpad 7',\n    104: 'numpad 8',\n    105: 'numpad 9',\n    106: 'multiply',\n    107: 'add',\n    109: 'subtract',\n    110: 'decimal point',\n    111: 'divide',\n    112: 'f1',\n    113: 'f2',\n    114: 'f3',\n    115: 'f4',\n    116: 'f5',\n    117: 'f6',\n    118: 'f7',\n    119: 'f8',\n    120: 'f9',\n    121: 'f10',\n    122: 'f11',\n    123: 'f12',\n    124: 'f13',\n    125: 'f14',\n    126: 'f15',\n    127: 'f16',\n    128: 'f17',\n    129: 'f18',\n    130: 'f19',\n    131: 'f20',\n    132: 'f21',\n    133: 'f22',\n    134: 'f23',\n    135: 'f24',\n    144: 'num lock',\n    145: 'scroll lock',\n    186: '&semi;',\n    187: '&equals;',\n    188: '&comma;',\n    189: '&hyphen;',\n    190: '&period;',\n    191: '&quest;',\n    192: '&grave;',\n    219: '&lsqb;',\n    220: '&bsol;',\n    221: '&rsqb;',\n    222: '&apos;',\n};\n"
  },
  {
    "path": "docs/cinder/keyboard-modal.html",
    "content": "<div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n"
  },
  {
    "path": "docs/cinder/main.html",
    "content": "{% extends \"base.html\" %}\n\n{#\nThe entry point for the MkDocs Theme.\n\nAny theme customisations should override this file to redefine blocks defined in\nthe various templates. The custom theme should only need to define a main.html\nwhich `{% extends \"base.html\" %}` and defines various blocks which will replace\nthe blocks defined in base.html and its included child templates.\n#}"
  },
  {
    "path": "docs/cinder/mkdocs_theme.yml",
    "content": "cinder_theme: true\n\nstatic_templates:\n  - 404.html\n\ninclude_search_page: false\nsearch_index_only: false\n\nshortcuts:\n    help: 191    # ?\n    next: 78     # n\n    previous: 80 # p\n    search: 83   # s\n"
  },
  {
    "path": "docs/cinder/nav-sub.html",
    "content": "{% if not nav_item.children %}\n<li {% if nav_item.active %}class=\"active\"{% endif %}>\n    <a href=\"{{ nav_item.url|url }}\">{{ nav_item.title }}</a>\n</li>\n{% else %}\n  <li class=\"dropdown-submenu\">\n    <a tabindex=\"-1\" href=\"\">{{ nav_item.title }}</a>\n    <ul class=\"dropdown-menu\">\n        {% for nav_item in nav_item.children %}\n            {% include \"nav-sub.html\" %}\n        {% endfor %}\n    </ul>\n  </li>\n{% endif %}\n"
  },
  {
    "path": "docs/cinder/nav.html",
    "content": "<div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            {%- if nav|length>1 or (page and (page.next_page or page.previous_page)) or config.repo_url %}\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            {% endif %}\n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"{{ nav.homepage.url|url }}\">{{ config.site_name }}</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n            {%- block site_nav %}\n            {%- if nav|length>1 %}\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                {% for nav_item in nav %}\n                {% if nav_item.children %}\n                    <li class=\"dropdown{% if nav_item.active %} active{% endif %}\">\n                        <a href=\"#\" class=\"dropdown-toggle\" data-toggle=\"dropdown\">{{ nav_item.title }} <b class=\"caret\"></b></a>\n                        <ul class=\"dropdown-menu\">\n                        {% for nav_item in nav_item.children %}\n                            {% include \"nav-sub.html\" %}\n                        {% endfor %}\n                        </ul>\n                    </li>\n                {% else %}\n                    <li {% if nav_item.active %}class=\"active\"{% endif %}>\n                        <a href=\"{{ nav_item.url|url }}\">{{ nav_item.title }}</a>\n                    </li>\n                {% endif %}\n                {% endfor %}\n                </ul>\n            {%- endif %}\n            {%- endblock %}\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                {%- block search_button %}\n                    {%- if 'search' in config['plugins'] %}\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    {%- endif %}\n                {%- endblock %}\n\n                {%- block next_prev %}\n                    {%- if page and (page.next_page or page.previous_page) %}\n                    <li {% if not page.previous_page %}class=\"disabled\"{% endif %}>\n                        <a rel=\"prev\" {% if page.previous_page %}href=\"{{ page.previous_page.url|url }}\"{% endif %}>\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li {% if not page.next_page %}class=\"disabled\"{% endif %}>\n                        <a rel=\"next\" {% if page.next_page %}href=\"{{ page.next_page.url|url }}\"{% endif %}>\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n                    {%- endif %}\n                {%- endblock %}\n\n                {%- block repo %}\n                    {%- if page and page.edit_url %}\n                    <li>\n                        <a href=\"{{ page.edit_url }}\">\n                            {%- if config.repo_name == 'GitHub' -%}\n                            <i class=\"fab fa-github\"></i> Edit on {{ config.repo_name }}\n                            {%- elif config.repo_name == 'Bitbucket' -%}\n                            <i class=\"fab fa-bitbucket\"></i> Edit on {{ config.repo_name }}\n                            {%- elif config.repo_name == 'GitLab' -%}\n                            <i class=\"fab fa-gitlab\"></i> Edit on {{ config.repo_name }}\n                            {%- else -%}\n                            Edit on {{ config.repo_name }}\n                            {%- endif -%}\n                        </a>\n                    </li>\n                    {%- elif config.repo_url %}\n                    <li>\n                        <a href=\"{{ config.repo_url }}\">\n                            {%- if config.repo_name == 'GitHub' -%}\n                            <i class=\"fab fa-github\"></i> {{ config.repo_name }}\n                            {%- elif config.repo_name == 'Bitbucket' -%}\n                            <i class=\"fab fa-bitbucket\"></i> {{ config.repo_name }}\n                            {%- elif config.repo_name == 'GitLab' -%}\n                            <i class=\"fab fa-gitlab\"></i> {{ config.repo_name }}\n                            {%- else -%}\n                            {{ config.repo_name }}\n                            {%- endif -%}\n                        </a>\n                    </li>\n                    {%- endif %}\n                {%- endblock %}\n            </ul>\n        </div>\n    </div>\n</div>\n"
  },
  {
    "path": "docs/cinder/search-modal.html",
    "content": "<div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n"
  },
  {
    "path": "docs/cinder/toc.html",
    "content": "<div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        {%- for toc_item in page.toc %}\n        <li class=\"first-level {% if toc_item.active %}active{% endif %}\"><a href=\"{{ toc_item.url }}\">{{ toc_item.title }}</a></li>\n            {%- for toc_item in toc_item.children %}\n            <li class=\"second-level\"><a href=\"{{ toc_item.url }}\">{{ toc_item.title }}</a></li>\n                {% for toc_item in toc_item.children %}\n                <li class=\"third-level\"><a href=\"{{ toc_item.url }}\">{{ toc_item.title }}</a></li>\n                {%- endfor %}\n            {%- endfor %}\n        {%- endfor %}\n    </ul>\n</div>"
  },
  {
    "path": "docs/docs/custom/style.css",
    "content": "@media (min-width: 992px) {\n    /* Allow the sidebar to scroll if it overflows the page. */\n    .bs-sidebar {\n        overflow-y: scroll;\n    }\n}\n\n.btn.btn-primary {\n    /* Change for github issues buttons. */\n    border-radius: 30px;\n    font-size: 15px;\n}\n"
  },
  {
    "path": "docs/docs/faq.md",
    "content": "## Frequently asked questions\n\n### General\n*    <span style=\"font-size: large; font-style: italic;color:#404040\"> Why doesn’t **regenie** need a genetic relatedness matrix (GRM)? \n</span>\n\n**regenie** performs whole genome regression using the following model\n\n$$Y = X\\beta + \\epsilon$$\n\nwhere \\(Y_{N\\times 1}\\) is a phenotype, \\(X_{N\\times M}\\) is a genotype matrix, and \\(\\epsilon_i\\sim N(0,\\sigma^2)\\). \nThis model has close ties to a linear mixed model (LMM) based on an infinitesimal model \n\n$$Y = u + \\epsilon$$\n\nwhere \\(u\\sim N(0,\\sigma_u^2 K)\\) with \\(K_{N\\times N}=XX^T/M\\) is referred to as the genetic relatedness matrix (GRM). In the LMM, the polygenic effects have been integrated out so that model only involves the GRM $K$ through a variance component in the covariance matrix of the trait.\n\nIn **regenie**, we directly estimate the polygenic effects parameter \\(\\beta\\) by using ridge regression, which corresponds to fitting a linear regression model with a L2 penalty to impose shrinkage. Hence, we bypass having to use the GRM \\(K\\) and use the polygenic effect estimates \\(X\\hat{\\beta}\\) to control for population structure when testing variants for association.\n\n<br/>\n\n*    <span style=\"font-size: large; font-style: italic;color:#404040\"> Can **regenie** be run on small sample sizes? \n</span>\n\n For quantitative traits, we have not obtained issues running **regenie** on small data sets.\nFor binary traits, we have obtained successful runs of **regenie** (step 1 and 2) on data sets with as little as 300 samples. A few factors to consider:\n\n  1. Convergence issues may occur in step 1 (all the more if a trait is highly unbalanced) \\(-\\) see below\n  2. Similarly, convergence issues may occur in step 2 when using Firth approximation \\(-\\) see below \n\nNote: we have found that **regenie** can get conservative in more extreme relatedness scenarios so we recommend not to use it for smaller cohorts with high amounts of relatedness like founder populations where exact mixed-model methods can be used\n\n\n### Step 1\n*    <span style=\"font-size: large; font-style: italic;color:#404040\"> What block size to use in step 1? \n</span>\n\nWe recommend to use blocks of size 1000 as we have observed that it leads to a reasonable number of ridge predictors \nat level 1 (e.g. 2,500 with 500K SNPs used and the default **regenie** parameters) and have noticed little change in the \nfinal predictions when varying the block size.\n\n<br/>\n\n*    <span style=\"font-size: large; font-style: italic;color:#404040\"> How many variants to use in step 1? \n</span>\n\nWe recommend to use a smaller set of about 500K directly genotyped SNPs in step 1, which should be sufficient to capture genome-wide polygenic effects. Note that using too many SNPs in Step 1 (e.g. >1M) can lead to a high computational burden due to the resulting higher number of predictors in the level 1 models.\n\n<br/>\n\n*    <span style=\"font-size: large; font-style: italic;color:#404040\"> What do I do if I get the error \"Uh-oh, SNP XX has low variance (=XX)\" in step 1? \n</span>\n\nThis is due to variants with very low minor allele count (MAC) being included in step 1. To avoid this, you should use a MAC filter to remove such variants in a pre-processing step before running Regenie.\n\nFor example, in PLINK2 you would use the `--mac` option and obtain a list of variants that pass the MAC filter (note that if you are using `--keep/--remove` in Regenie, you should also use it in the PLINK2 command)\n```\nplink2 \\\n  --bfile my_bed_file \\\n  --mac 100 \\\n  --write-snplist \\\n  --out snps_pass\n```\n\nYou would then use the output file in **regenie** as `--extract snps_pass.snplist` (and this would avoid having to make a new genotype file).\n\n <br/>\n\n*    <span style=\"font-size: large;font-style: italic; color:#404040\"> What to do if Step 1 of **regenie** failed for a binary trait when fitting the penalized logsitic regression model? \n</span>\n\nThis can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. \n\n  1. If using K-fold CV, switch to LOOCV (option `--loocv`) to increase the size of the sample used to fit the model\n(note: LOOCV is now used by default when the sample size is below 5,000)\n  2. If it is due to quasi-separation (i.e. `Var(Y)=0` occurred in model fitting), either increase the sample size using LOOCV or increase the MAF threshold for variants included in step 1 analysis \n\n### Step 2\n*    <span style=\"font-size: large;font-style: italic; color:#404040\"> What to do if Step 2 of **regenie** fails when fitting the null model for the approximate Firth correction? \n</span>\n\nThis can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. \nWe have implemented the same measures as in the `logistf` function in R to avoid convergence issues, which include the use of a step size threshold when performing a Newton step. \n\n  1. We first try fitting the model with a step size threshold that is more liberal (=25) as well as a maximum number of iterations of 1,000 and if convergence fails, we retry the model fit using a more stringent step size threshold (=5) and a higher threshold for the number of iterations (=5,000), which will slow down convergence.\n  2. The user can also specify a maximum step size threshold using `--maxstep-null` (use value <5) as well as increase the maximum number of iterations using `--maxiter-null` (use value >5000). In that case, no retries are perfomed if convergence fails.\n      - We recommend to test chromosomes separately (using `--chr`) as these parameters may need to be altered when fitting the null model for each chromosome\n\n <br/>\n\n*    <span style=\"font-size: large;font-style: italic; color:#404040\"> What is reported in A1FREQ when building masks? \n</span>\n\n    - For the `max` and `comphet` rules, the resulting burden masks take on values in [0,2] just like single variants so we compute A1FREQ the same way as done for single variants (i.e. mean(G)/2 where G is a genotype vector).\n    - For the `sum` rule, A1FREQ is computed as the average of the effect allele frequencies across all sites included in the mask.\n\n <br/>\n\n*    <span style=\"font-size: large;font-style: italic; color:#404040\"> How is missingness handled in the genotype data? \n</span>\n\nMissing genotypes are imputed with per-SNP averages when performing association tests (note: the genotype summaries reported like AAF, INFO are only based on non-missing genotypes).\n"
  },
  {
    "path": "docs/docs/index.md",
    "content": "## regenie \n\n**regenie** is a C++ program for whole genome regression modelling of large\n[genome-wide association studies](https://en.wikipedia.org/wiki/Genome-wide_association_study).\n\nIt is developed and supported by a team of scientists at the Regeneron Genetics Center.\n\nThe method has the following properties\n\n- It works on quantitative and binary traits, including binary\ntraits with unbalanced case-control ratios\n- It can handle population structure and relatedness\n- It can process multiple phenotypes at once efficiently\n- For binary traits, it supports Firth logistic regression and an SPA test\n- It can perform gene/region-based tests (Burden, [SBAT](https://doi.org/10.1016/j.ajhg.2024.08.021), SKAT/SKATO, ACATV/ACATO)\n- It can perform interaction tests (GxE, GxG) as well as conditional analyses\n- Meta-analysis of REGENIE summary statistics can be performed using [REMETA](https://rgcgithub.github.io/remeta/)\n- It is fast and memory efficient 🔥\n- It supports the [BGEN](https://www.well.ox.ac.uk/~gav/bgen_format/), [PLINK](https://www.cog-genomics.org/plink/1.9/formats#bed) bed/bim/fam and [PLINK2](https://www.cog-genomics.org/plink/2.0/formats#pgen) pgen/pvar/psam genetic data formats\n- It is ideally suited for implementation in\n  [Apache Spark](https://spark.apache.org/) (see [GLOW](https://projectglow.io/))\n- It can be installed with [Conda](https://anaconda.org/bioconda/regenie)\n\n##Citation\n\nMbatchou, J., Barnard, L., Backman, J. et al. Computationally efficient whole-genome regression for quantitative and binary traits. Nat Genet 53, 1097–1103 (2021). [https://doi.org/10.1038/s41588-021-00870-7](https://doi.org/10.1038/s41588-021-00870-7)\n\n\n## License \n\n**regenie** is distributed under an [MIT license](https://github.com/rgcgithub/regenie/blob/master/LICENSE).\n\n\n## Contact \n\nIf you have any questions about **regenie** please contact\n\n- <jonathan.marchini@regeneron.com>\n- <joelle.mbatchou@regeneron.com> \n\nIf you want to submit a issue concerning the software please do so\nusing the **regenie** [Github repository](https://github.com/rgcgithub/regenie/issues).\n\n<a href=\"https://github.com/rgcgithub/regenie/issues/new\"><button class=\"btn btn-primary btn-sm\" type=\"submit\"><i class=\"fab fa-github fa-2x\"></i> Report Issue</button></a>\n<a href=\"https://github.com/rgcgithub/regenie/issues\"><button class=\"btn btn-primary btn-sm\" type=\"submit\"><i class=\"fab fa-github fa-2x\"></i> Active Issues </button></a>\n\n<!--\n## Version history\n\nVersion 1.0 (22 June 2020): Initial release\n-->\n"
  },
  {
    "path": "docs/docs/install.md",
    "content": "##Download\n\nThe **regenie** source code is hosted on\n[Github](https://github.com/rgcgithub/regenie).\n\n##Installation\n\n<div class=\"bs-callout bs-callout-default\">\n  <h4>Pre-requisites</h4>\n    <b>regenie</b> requires compilation with \n<a href=\"https://gcc.gnu.org\">GCC</a> version >= 5.1 (on Linux) \nor Clang version >=3.3 (on Mac OSX). \nIt also requires having GFortran library installed.\n    </div>\n\n### Pre-compiled binaries\n\nPre-compiled binaries are available in the \n[Github repository](https://github.com/rgcgithub/regenie/releases).\nThese are provided for Linux (including Centos7) and Mac OSX \ncomputing environments and are statically linked. \nFor the Linux binaries, users should have GLIBC version >= 2.22 installed.\nAdditionally, they are provided compiled with Intel MKL library which\nwill provide speedups for many of the operations done in **regenie**. \n\n\n### Standard installation\n1. **regenie** requires the\n  [BGEN library](https://enkre.net/cgi-bin/code/bgen/dir?ci=trunk) so\n  you will need to download and install that library.\n2. Edit the BGEN_PATH variable in the `Makefile`\n   to the BGEN library path.\n3. On the command line type `make` while in the main source code directory.\n4. This should produce the executable called `regenie`.\n\n**regenie** has been enhanced to allow for gzip compressed input \n(for phenotype/covariate files) and output (for association results files)\n using the Boost Iostream library. \nIf this library is installed on the system, you should compile using \n`make HAS_BOOST_IOSTREAM=1`. \n\nFurthermore, we have enabled compilation of **regenie** with\nthe Intel Math Kernel (MKL) library. You first need to have it installed \non your system and modify the MKLROOT variable in the `Makefile`\nto the installed MKL library path.\n\n### With CMake\nYou can compile the binary using CMake version >=3.13 (instead of `make` as above).\n```\nmkdir -p build\ncd build\nBGEN_PATH=<path_to_bgen_lib> cmake ..\nmake\n```\nThis will generate the binary in the `build/` subdirectory. \nTo use with Boost Iostreams and/or Intel MKL library,\nadd the corresponding flags before the `cmake` command on line 3\n(e.g. `BGEN_PATH=<path_to_bgen_lib> HAS_BOOST_IOSTREAM=1 cmake ..`).\n\n### With Docker\nAlternatively, you can use a Docker image to run **regenie**. \nA guide to using docker is available on \nthe [Github page](https://github.com/rgcgithub/regenie/wiki/Using-docker).\n\n### With conda\nTo install with [conda](https://anaconda.org/bioconda/regenie), you can use the following commands:\n```\n# create new environment\nconda create -n regenie_env -c conda-forge -c bioconda regenie\n# load it\nconda activate regenie_env\n```\n\n\n\n##Computing requirements\n\nWe have tested **regenie** on 64-bit Linux and 64-bit Mac OSX computing environments.\n \nNote that for Mac OSX computing environments, compiling is done without OpenMP, as the library is not built-in by default and has to be installed separately. \n\n### Memory usage\nIn both Step 1 and Step 2 of a **regenie** run the genetic data file is\nread once, in blocks of SNPs, so at no point is the full dataset ever stored in\nmemory.\n\n**regenie** uses a dimension reduction approach using ridge regression\n  to produce a relatively small set of genetic predictors, that are\n  then used to fit a whole-genome regression model. These genetic\n  predictors are stored in memory by default, and can be relatively\n  large if many phenotypes are stored at once.\n\nFor example, if there are \\(P\\) phenotypes, \\(M\\) SNPs and \\(N\\) samples, and a\nblock size of \\(B\\) SNPs is used with \\(R\\) ridge parameters,\n then **regenie** needs to store roughly \\(N\\times M/B\\times R\\)\ndoubles per phenotype, which is 8Gb per phenotype when \\(M=500,000,\nN=400,000, B =1,000,R=5\\) and 200Gb in total when \\(P=25\\).\n\nHowever, the `--lowmem` option can be used to avoid that memory usage,\nat negligible extra computational cost, by writing temporary files to disk.\n\n### Threading\n\n**regenie** can take advantage of multiple cores using threading. The\nnumber of threads can be specified using the `--threads` option.\n\n**regenie** uses the [Eigen library](http://eigen.tuxfamily.org/index.php?title=Main_Page) for \nefficient linear algebra operations and this uses threading where possible.\n\nFor PLINK bed/bim/fam files, PLINK2 pgen/pvar/psam files, as well as BGEN v1.2 files with 8-bit encoding (format used for UK Biobank\n500K imputed data), step 2 of **regenie** has been optimized by \nusing multithreading through [OpenMP](https://www.openmp.org).\n\nWhen running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and \ninstead parallelize the runs over partitions of the genome (e.g. groups of genes).\n\n### For Windows platforms\n\nIf you are on a Windows machine, we recommend to use [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install) (WSL)\nto install a Ubuntu distribution so that you will be able to run REGENIE\nfrom a Linux terminal.\nYou can download pre-compiled REGENIE binaries from the [Github repository](https://github.com/rgcgithub/regenie/releases) \n(note that you will need to install the `libgomp1` library).\n\nNote: from your Windows command prompt, you can run REGENIE using `wsl regenie`.\n"
  },
  {
    "path": "docs/docs/options.md",
    "content": "## Getting started\n\nTo run **regenie**, use the command `./regenie` on the command line,\nfollowed by options and flags as needed.\n\nTo get a full list of options use\n\n```\n./regenie --help\n```\n\nThe directory `examples/` contains some small example files that are\nuseful when getting started. A test run on a set of binary traits can be achieved by the\nfollowing 2 commands.\n\nIn **Step 1**, the whole genome regression model is fit to the traits, and\na set of genomic predictions are produced as output\n\n```\n./regenie \\\n  --step 1 \\\n  --bed example/example \\\n  --exclude example/snplist_rm.txt \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin.txt \\\n  --remove example/fid_iid_to_remove.txt \\\n  --bsize 100 \\\n  --bt --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out fit_bin_out\n```\n\nIn **Step 2**, a set of imputed SNPs are tested for association using a\nFirth logistic regression model\n\n```\n./regenie \\\n  --step 2 \\\n  --bgen example/example.bgen \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin.txt \\\n  --remove example/fid_iid_to_remove.txt \\\n  --bsize 200 \\\n  --bt \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred fit_bin_out_pred.list \\\n  --out test_bin_out_firth\n```\n\nOne of the output files from these two commands is included in `example/test_bin_out_firth_Y1.regenie`.\n\n## Basic options\n\n### Input \n\n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--bgen, --bed, --pgen`  | FILE | Required |Input genetic data file. Either BGEN file eg. `file.bgen`, or bed/bim/fam prefix that assumes`file.bed`, `file.bim`, `file.fam` exist, or pgen/pvar/psam prefix that assumes`file.pgen`, `file.pvar`, `file.psam` exist |\n|`--sample`  | FILE | Optional |Sample file corresponding to input BGEN file|\n|`--bgi`  | FILE | Optional |Index bgi file corresponding to input BGEN file|\n|`--ref-first`  | FLAG | Optional |Specify to use the first allele as the reference allele for BGEN or PLINK bed/bim/fam file input [default is to use the last allele as the reference]|\n|`--keep`  | FILE | Optional | Inclusion file that lists individuals to retain in the analysis|\n|`--remove`  | FILE | Optional | Exclusion file that lists individuals to remove from the analysis|\n|`--extract`  | FILE | Optional | Inclusion file that lists IDs of variants to keep|\n|`--exclude`  | FILE | Optional | Exclusion file that lists IDs of variants to remove|\n|`--extract-or`  | FILE | Optional | Inclusion file that lists IDs of variants to keep regardless of minimum MAC filter|\n|`--exclude-or`  | FILE | Optional | Exclusion file that lists IDs of variants to remove unless MAC is above threshold|\n|`--phenoFile`  | FILE | Required |Phenotypes file|\n|`--phenoCol` | STRING | Optional | Use for each phenotype you want to include in the analysis|\n|`--phenoColList` | STRING | Optional | Comma separated list of phenotypes to include in the analysis|\n|`--eventColList` | STRING | Optional | Comma separated list of columns in the phenotype file to include in the analysis that contain the event times |\n|`--phenoExcludeList` | STRING | Optional | Comma separated list of phenotypes to ignore from the analysis|\n|`--covarFile`  | FILE | Optional | Covariates file|\n|`--covarCol` | STRING | Optional | Use for each covariate you want to include in the analysis|\n|`--covarColList` | STRING | Optional | Comma separated list of covariates to include in the analysis|\n|`--catCovarList` | STRING | Optional | Comma separated list of categorical covariates to include in the analysis|\n|`--covarExcludeList` | STRING | Optional | Comma separated list of covariates to ignore|\n|`--pred`  | FILE | Optional  | File containing predictions from Step 1 (see Overview). **This is required for `--step 2`**|\n|`--tpheno-file`| STRING| Optional| to use a phenotype file in transposed format (e.g. BED format)|\n|`--tpheno-indexCol`| INT| Optional| index of phenotype name column in transposed phenotype file|\n|`--tpheno-ignoreCols`| INT| Optional| indexes of columns to ignore in transposed phenotype file|\n|`--iid-only`| FLAG | Optional| to specify if header in transposed phenotype file only contains sample IID (assume FID=IID)|\n\nNote: Parameter expansion can be used when specifying phenotypes/covariates (e.g. `--covarCol PC{1:10}`).\nAlso, multiple files can be specified for `--extract/--exclude/--keep/--remove` by using a comma-separated list.\n\n#### Genetic data file format\n\n**regenie** can read BGEN files, bed/bim/fam files or pgen/psam/pvar \nfiles in Step 1 and Step 2.\n\nThe BGEN file format is described\n[here](https://www.well.ox.ac.uk/~gav/bgen_format/).\n\nThe bed/bim/fam file format is described [here](https://www.cog-genomics.org/plink/1.9/formats).\n\nThe pgen/pvar/psam file format is described [here](https://www.cog-genomics.org/plink/2.0/formats#pgen).\n\nTools useful for genetic data file format conversion are : [PLINK](http://www.cog-genomics.org/plink/), [QCTOOL](https://www.well.ox.ac.uk/~gav/qctool/), [BCFTOOLS](https://samtools.github.io/bcftools/).\n\nStep 2 of **regenie** can be sped up by using BGEN files using v1.2 format with 8 bits encoding \n(genotype file can be generated with [PLINK2](https://www.cog-genomics.org/plink/2.0/) using \noption `--export bgen-1.2 'bits=8'`) as well as having an accompanying .bgi index file \n(a useful tool to create such file is bgenix which is part of the BGEN library).\n\nTo include X chromosome genotypes in step 1 and/or step 2, males should be coded as diploid \nso that their genotypes are 0/2 (this is done automatically for BED and PGEN file formats with haploid genotypes).\nChromosome values of 23 (for human analyses), X, Y, XY, PAR1 and PAR2 are all acceptable and \nwill be collapsed into a single chromosome.\n\n\n##### Sample inclusion/exclusion file format\n\n```\n2 2 \n7 7 \n.\n```\n\nNo header. Each line starts with individual FID IID. Space/tab separated.\n\nSamples listed in the file that are not in bgen/bed/pgen file are ignored.\n\n##### Variant inclusion/exclusion file format\n\n```\n20\n31\n.\n```\n\nNo header. Each line must start with variant ID \n(if there are additional columns, file must be space/tab separated).\n\nVariants listed in this file that are not in bgen/bed/pgen file are ignored.\n\n#### Covariate file format\n\n```\nFID IID V1 V2 V3\n1 1 1.46837294454993 1.93779743016325 0.152887004505393\n2 2 -1.2234390803815 -1.63408619199948 -0.190201446835255\n3 3 0.0711531925667286 0.0863906292357564 0.14254739715665\n.\n```\n\nLine 1 : Header with FID, IID and \\(C\\) covariate names.\n\nFollowed by lines of \\(C+2\\) values. Space/tab separated.\n\nEach line contains individual FID and IID followed by \\(C\\) covariate\nvalues.\n\nSamples listed in this file that are not in bgen/bed/pgen file are ignored.\nGenotyped samples that are not in this file are removed from the analysis as well as\nsamples with missing values at any of the covariates included.\n\nIf `--step 2` is specified, then the covariate file should be the same\nas that used in Step 1.\n\n#### Phenotype file format\n\n```\nFID IID Y1 Y2\n1 1 1.64818554321186 2.2765234736685\n2 2 -2.67352013711554 -1.53680421614647\n3 3 0.217542851471485 0.437289912695016\n.\n```\n\nLine 1 : Header with FID, IID and \\(P\\) phenotypes names.\n\nFollowed by lines of \\(P+2\\) values. Space/tab separated. \nEach line contains individual FID and IID followed by P phenotype values\n(for binary traits, must be coded as 0=control, 1=case, NA=missing unless using `--1`).\n\nSamples listed in this file that are not in bgen/bed/pgen file are ignored.\nGenotyped samples that are not in this file are removed from the analysis.\n\nMissing values must be coded as NA.\n\nWith QTs, missing values are mean-imputed in Step 1 and they are dropped when testing each phenotype in Step 2 (unless using `--force-impute`).\n\nWith BTs, missing values are mean-imputed in Step 1 when fitting the\nlevel 0 linear ridge regression and \nthey are dropped when fitting the level 1 logistic ridge regression for each trait . \nIn Step 2, missing values are dropped when testing each trait.\n\nTo remove all samples that have missing values at **any** of the \\(P\\) phenotypes, use option `--strict` in Step 1 and 2.\n\nIf using the transposed phenotype file format with option `--tpheno-file`, \nthe header line must contain subject IDs as \"FID_IID\",\notherwise use option `--iid-only` and only include IIDs (so will assume FID=IID).\n\n#### Predictions file format\n\nRunning `--step 1 --out foo` will produce\n\n1. A set of files containing genomic predictions for each phenotype\n   from Step 1 (see Output section below).\n2. A file called `foo_pred.list` listing the locations of the prediction files.\n\nThe file list is needed as an input file when using `--step 2`\nvia the `--pred` option. \nIt has one line per phenotype (in any order) that specifies the name of the phenotype and its\ncorresponding prediction file name. \nEach phenotype must have exactly one prediction file and phenotype names \nmust match with those in the phenotype file.\nPhenotypes in this file not included in the analysis are ignored.\n\nEach prediction file contains the genetic predictions for the phenotype (space separated).\n\nLine 1 starts with 'FID_IID' followed by $N$ sample identifiers.\nIt is followed by 23 lines containing the genetic predictions for each chromosome \n(sex chromosomes are collapsed into chromosome 23).\n\nMore specifically, each line has $N+1$ values which are the chromosome number followed by the $N$\nleave-one chromosome out (LOCO) predictions for each individual.\n\nSamples in this file not in the bed/pgen/bgen input file are ignored. Genotyped samples not \npresent in this file will be ignored in the analysis of the corresponding trait. \n\nSamples with missing LOCO predictions must have their corresponding phenotype value set to missing.\n\n\n### Options\n\n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--step`| INT| Required| specify step for the regenie run (see Overview) [argument can be `1` or `2`] |\n|`--qt`| FLAG| Optional| specify that traits are quantitative (this is the default so can be ommitted)|\n|`--bt`| FLAG| Optional| specify that traits are binary with 0=control,1=case,NA=missing|\n|`--t2e`| FLAG| Optional| specify that traits are time-to-event data with 0=censoring,1=event,NA=missing in event column|\n|`-1,--cc12`| FLAG| Optional| specify to use 1/2/NA encoding for binary traits (1=control,2=case,NA=missing)|\n|`--bsize`| INT| Required| size of the genotype blocks|\n|`--cv`| INT| Optional| number of cross validation (CV) folds [default is 5]|\n|`--loocv`| FLAG | Optional| flag to use leave-one out cross validation|\n|`--lowmem`| FLAG | Optional | flag to reduce memory usage by writing level 0 predictions to disk (details below). This is very useful if the number of traits is large (e.g. greater than 10)|\n|`--lowmem-prefix`| FILE PREFIX | Optional | prefix where to temporarily write the level 0 predictions|\n|`--split-l0`| PREFIX,N | Optional | split level 0 across N jobs and set prefix of output files of level 0 predictions|\n|`--run-l0`| FILE,K | Optional | run level 0 for job K in {1..N} specifying the master file created from '--split-l0'|\n|`--run-l1`| FILE | Optional | run level 1 specifying the master file from '--split-l0'|\n|`--l1-phenoList` | STRING | Optional | to specify a subset of phenotypes to analyze when using `--run-l1`|\n|`--keep-l0`| FLAG | Optional | avoid deleting the level 0 predictions written on disk after fitting the level 1 models|\n|`--print-prs`|FLAG| Optional| flag to print whole genome predictions (i.e. PRS) without using LOCO scheme|\n|`--force-step1`|FLAG| Optional| flag to run step 1 when >1M variants are used (not recommened)|\n|`--minCaseCount`| INT | Optional | flag to ignore BTs with low case counts [default is 10]|\n|`--apply-rint`| FLAG | Optional| to apply Rank Inverse Normal Transformation (RINT) to quantitative phenotypes (use in both Step 1 & 2)|\n|`--nb`| INT| Optional| number of blocks (determined from block size if not provided)|\n|`--strict`|FLAG| Optional| flag to removing samples with missing data at any of the phenotypes|\n|`--ignore-pred`|FLAG| Optional| skip reading the file specified by `--pred` (corresponds to simple linear/logistic regression)|\n|`--htp`|STRING| Optional| to output the summary statistics file in the [HTP](https://rgcgithub.github.io/remeta/file_formats/#-htp) format (string should correspond to cohort name, e.g. 'UKB_450_EUR')|\n|`--exact-p`|FLAG| Optional|avoid capping p-values at 2.2E-307 in the HTP format summary statistics output|\n|`--use-relative-path`| FLAG| Optional| to use relative paths instead of absolute ones for the step 1 output pred.list file|\n|`--use-prs`|FLAG| Optional| flag to use whole genome PRS in `--pred` (this is output in step 1 when using `--print-prs`)|\n|`--gz`|FLAG| Optional| flag to output files in compressed gzip format (LOCO prediction files in step 1 and association results files in step 2) **[this only works when compiling with Boost Iostream library (see Install tab)]**. \n|`--force-impute`|FLAG| Optional| flag to keep and impute missing observations for QTs in step 2|\n|`--write-samples`|FLAG| Optional| flag to write sample IDs for those kept in the analysis for each trait in step 2|\n|`--print-pheno`|FLAG| Optional| flag to write phenotype name in the first line of the sample ID files when using `--write-samples`|\n|`--firth`| FLAG | Optional | specify to use Firth likelihood ratio test (LRT) as fallback for p-values less than threshold|\n|`--approx`|FLAG | Optional| flag to use approximate Firth LRT for computational speedup (only works when option `--firth` is used)|\n|`--firth-se`| FLAG | Optional | flag to compute SE based on effect size and LRT p-value when using Firth correction (instead of based on Hessian of unpenalized log-likelihood)|\n|`--write-null-firth`| FLAG| Optional| to write the null estimates for approximate Firth [can be used in step 1 or 2] |\n|`--compute-all`| FLAG| Optional| to write the null Firth estimates for all chromosomes (regardless of the genotype file) |\n|`--use-null-firth`| FILE| Optional| to use stored null estimates for approximate Firth in step 2 |\n|`--spa`| FLAG | Optional| specify to use Saddlepoint approximation as fallback for p-values less than threshold|\n|`--pThresh`| FLOAT | Optional| P-value threshold below which to apply Firth/SPA correction [default is 0.05]\n|`--test`| STRING | Optional | specify to carry out dominant or recessive test [default is additive; argument can be `dominant` or `recessive`]|\n|`--chr`| INT| Optional| specify which chromosomes to test in step 2 (use for each chromosome to include)|\n|`--chrList` | STRING | Optional | Comma separated list of chromosomes to test in step 2|\n|`--range` | STRING | Optional | specify chromosome region for variants to test in step 2 [format=CHR:MINPOS-MAXPOS] |\n|`--minMAC`| FLOAT| Optional| flag to specify the minimum minor allele count (MAC) when testing variants [default is 5]. Variants with lower MAC are ignored.|\n|`--minINFO`| FLOAT| Optional| flag to specify the minimum imputation info score (IMPUTE/MACH R^2) when testing variants. Variants with lower info score are ignored.|\n|`--sex-specific` | STRING | Optional | to perform sex-specific analyses [either 'male'/'female']|\n|`--af-cc`| FLAG | Optional| to output A1FREQ in case/controls separately in the step 2 result file|\n|`--no-split`|FLAG| Optional| flag to have summary statistics for all traits output in the same file|\n|`--starting-block`| INT| Optional| to start step 2 at a specific block/set number (useful if program crashes during a job)|\n|`--nauto`| INT| Optional| number of autosomal chromosomes (for non-human studies) [default is 22]|\n|`--maxCatLevels`| INT| Optional| maximum number of levels for categorical covariates (for non-human studies) [default is 10]|\n|`--niter`| INT| Optional| maximum number of iterations for logistic regression [default is 30]|\n|`--maxstep-null`| INT| Optional| maximum step size for logistic model with Firth penalty under the null [default is 25]|\n|`--maxiter-null`| INT| Optional| maximum number of iterations for logistic model with Firth penalty under the null [default is 1000]|\n|`--par-region` | STRING | Optional | specify build code to determine bounds for PAR1/PAR2 regions (can be 'b36/b37/b38/hg18/hg19/hg38' or 'start,end' bp bounds of non-PAR region) [default is hg38]|\n|`--force-qt`| FLAG | Optional | force QT run for binary traits|\n|`--threads`| INT | Optional| number of computational threads to use [default=all-1]|\n|`--debug`| FLAG | Optional | debug flag (for use by developers)|\n|`--verbose`| FLAG | Optional| verbose screen output|\n|`--version`| FLAG | Optional| print version number and exit|\n|`--help`| FLAG | Optional| Prints usage and options list to screen|\n\nWhen step 1 of **regenie** is run in low memory mode (i.e. using `--lowmem`), \ntemporary files are created on disk (using `--lowmem-prefix tmp_prefix` determines \nwhere the files are written [as in `tmp_prefix_l0_Y1`,...,`tmp_prefix_l0_YP` \nfor P phenotypes]). If the prefix is not specified, the default is to use the \nprefix specified by `--out` (see below).\nThese are automatically deleted at the end of the program (unless the run\nwas not successful in which case the user would need to delete the files)\n\nSee the [Wiki page](https://github.com/rgcgithub/regenie/wiki/Further-parallelization-for-level-0-models-in-Step-1) for more details on how to run the level 0 models for Step 1 \nof **regenie** in parallel.\n\n### Output\n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--out`| FILE PREFIX| Required| Output files that depends on `--step`|\n\nA log file `file.log` of the output is generated.\n\n**Using `--step 1 --out file`**\n\nFor the \\(P\\) phenotypes, files `file_1.loco`,...,`file_P.loco` are output with the\nper-chromosome LOCO predictions as rows of the files \n(following the order of the phenotypes in the phenotype file header). \nIf option `--gz` was used, the files will be compressed in gzip format and have extension `.loco.gz`.\n\nGenotyped individuals specified using option `--remove` are excluded from this file. \nIndividuals with missing phenotype values kept in the analysis \nare included in the file and have their predictions set to missing.\n\nThe list of blup files needed for step 2 (association testing) is written to  `file_pred.list`.\n\nIf using `--print-prs`, files `file_1.prs`,...,`file_P.prs` will be written with the \nwhole genome predictions (i.e. PRS) without using LOCO scheme (similar format as the .loco files).\nThe list of these files is written to `file_prs.list` and can be used in step 2 with `--pred` and \nspecifying flag `--use-prs`. Note that as these are not obtained using a LOCO scheme, \nassociation tests could suffer from proximal contamination.\n\nIf using option `--write-null-firth`, the estimates for approximate Firth under the null will be written to files\n`file_1.firth,...,file_P.firth` and the list of these files is written to `file_firth.list`. This can be\nused in step 2 as `--use-null-firth file_firth.list`. Note that it assumes the same set of covariates are \nused in Step 1 and 2.\n\n**Using`--step 2 --out file`** \n\nBy default, results are written in separate files for\neach phenotype\n`file_<phenotype1_name>.regenie,...,file_<phenotypeP_name>.regenie`.\nEach file has one line per\nSNP along with a header line.\nIf option `--gz` was used, the files will be compressed in gzip format and have extension `.regenie.gz`.\n\nThe entries of each row specify chromosome, position, ID, reference allele (allele 0), \nalternative allele (allele 1), frequency of the alternative allele, sample size and the test performed \n(additive/dominant/recessive).\nWith BGEN/PGEN files with dosages, the imputation INFO score is provided \n(IMPUTE info score for BGEN and Mach Rsq for PGEN).\nAllele frequency, sample size and INFO score, if applicable, are computed using only\nnon-missing samples for each phenotype.\n\nThese are followed by the estimated effect sizes (for allele 1 on the original scale), standard errors, chi-square test statistics \nand \\(-\\log_{10}\\) p-value. An additional column is included to specify if Firth/SPA corrections failed.\n\nWith option `--no-split`, the summary statistics for all traits are written to a single file `file.regenie`,\nwith the same format as above. Additionaly, an accompanying file with the trait names corresponding to Y1,Y2,... \nwill be generated in ‘file.regenie.Ydict’. Note that allele frequency, sample size and INFO score are computed using\nall analyzed samples.\n\nWith option `--htp`, the summary statistics file will follow the [HTP](https://rgcgithub.github.io/remeta/file_formats/#-htp) format.\n\nIf option `--write-samples` was used, IDs of samples used for each trait will be written in files\n`file_<phenotype1_name>.regenie.ids,...,file_<phenotypeP_name>.regenie.ids` (tab separated, no header).\n\nWhen using `--par-region`, the default boundaries used for the chrX PAR regions are:\n\n* b36/hg18: 2709520 and 154584238\n* b37/hg19: 2699520 and 154931044\n* b38/hg38: 2781479 and 155701383\n\n\n## Gene-based testing\n\nStarting from version 3.0, Step 2 of **regenie** provides a complimentary set of gene-based test \nin addition to the burden testing functionality introduced in version 2.0.\nMore specifically, for a given set of variants (eg within a gene) which can be defined using functional annotations,\n**regenie** can apply various set-based tests on the variants as well as collapse them into a single combined 'mask' genotype \nthat can be tested for association just like a single variant. \n\n### Input\n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--anno-file`  | FILE | Required | File with variant annotations for each set|\n|`--set-list`  | FILE | Required | File listing variant sets|\n|`--extract-sets`  | FILE | Optional | Inclusion file that lists IDs of variant sets to keep|\n|`--exclude-sets`  | FILE | Optional | Exclusion file that lists IDs of variant sets to remove|\n|`--extract-setlist`  | STRING | Optional | Comma-separated list of variant sets to keep|\n|`--exclude-setlist`  | STRING | Optional | Comma-separated list of variant sets to remove|\n|`--aaf-file`  | FILE | Optional | File with variant AAF to use when building masks (instead of AAF estimated from sample)|\n|`--mask-def`  | FILE | Required | File with mask definitions using the annotations defined in `--anno-file`|\n\nNote: multiple files can be specified for `--extract-sets/--exclude-sets` by using a comma-separated list.\n\n#### Annotation input files\n\nThe following files are used to define variant sets and \nfunctional annotations which will be used to generate masks.\n\n##### Annotation file\n\n```bash\n1:55039839:T:C PCSK9 LoF\n1:55039842:G:A PCSK9 missense\n.\n```\nThis file defines functional annotations for variants.\nIt is designed to accommodate for variants with \nseparate annotations for different sets/genes.\n\nEach line contains the variant name, the set/gene name and a single annotation category \n(space/tab separated). \n\nVariants not in this file will be assigned to a default \"NULL\" category. A maximum of 63 annotation \ncategories (+NULL category) is allowed.\n\nFor gene sets, tools you can use to obtain variant annotations per transcripts are \n[snpEFF](https://pcingola.github.io/SnpEff/se_introduction/) or \n[VEP](https://www.ensembl.org/info/docs/tools/vep/index.html).\nTo obtain a single annotation per gene, you could choose the most deleterious\nfunctional annotation across the gene transcripts or alternatively\nuse the canonical transcript (note that its definition can vary across software).\n\nWe have implemented an extended 4-column format of the annotation file which\nalso categorizes sets into domains (e.g. for gene sets, these would correspond to gene domains).\n\n```bash\n1:55039839:T:C PCSK9 Prodomain LoF\n1:55039842:G:A PCSK9 Prodomain missense\n.\n```\nMasks will be generated for each domain \n(maximum of 8 per set/gene) in addition \nto a mask combining across all domains.\nVariants can only be assigned to a single domain for each set/gene.\n\nStarting with v4.1, you can also specify custom variant weights which will be used in the burden, SKAT/SKAT-O and ACAT-V tests ($w_i$'s in the [gene-based testing overview](../overview/#step-2-gene-based-testing)). Multiple weights can be included in the annotation file after the 3rd column, e.g.\n\n```bash\n1:55039839:T:C PCSK9 LoF 0.9 0.812 1\n1:55039842:G:A PCSK9 missense 0.4 0.23 0.55\n.\n```\nUsing `--weights-col 4` will use weights in the 4-th column for the gene-based tests.\n\n##### Set list file\n\nThis file lists variants within each set/gene to use when \nbuilding masks. \nEach line contains the set/gene name followed by a chromosome and physical position for the set/gene,\nthen by a comma-separated list of variants included in the set/gene.\n\n```bash\nA1BG 19  58346922  19:58346922:C:A,19:58346924:G:A,...\nA1CF 10  50806630  10:50806630:A:G,10:50806630:A:AT,...\n.\n```\n\n##### Set inclusion/exclusion file format\nThe file must have a single column of set/gene names corresponding to those in the \nset list file.\n\n```bash\nPIGP\nZBTB38\n.\n```\n\n\n##### AAF file (optional)\n\nBoth functional annotations and alternative allele frequency (AAF) cutoffs \nare used when building masks (e.g. only considering LoF\nsites where AAF is below 1%). \nBy default, the AAF for each variant is computed from the sample but\nalternatively, the user can specify variant AAFs using this file.\n\nEach line contains the variant name followed by its AAF \n(it should be for the ALT allele used in the genetic data input). \nAAF must be a numerical value (i.e. it cannot be '.').\n\n```bash\n7:6187101:C:T 1.53918207864341e-05\n7:6190395:C:A 2.19920388819247e-06\n.\n```\n\nSince singleton variants cannot be identified from this file, they are determined by default\nbased on the input genetic data. To enforce which sites should be included in the singleton masks\n(see `--set-singletons`), you can add a third column in the file with a binary indicator \n(1=singleton; 0=not singleton). So only variants which are specified as singletons will be \nconsidered for the singleton masks, regardless of whether they are singletons in the input genetic data.\nNote that with this flag, singleton sites will be included in all masks (regardless of the AAF in file).\n\n```bash\n7:6187101:C:T 1.53918207864341e-05 0\n7:6190395:C:A 2.19920388819247e-06 1\n.\n```\n\n#### Mask definitions\n\n##### Mask file\nThis file specifies which annotation categories should be combined into masks. \nEach line contains a mask name followed by a comma-seperated list \nof categories included in the mask (i.e. union is taken over categories).\n\nFor example below, Mask1 uses only LoF variants and \nMask2 uses LoF and missense annotated variants.\n\n\n```bash\nMask1 LoF\nMask2 LoF,missense\n.\n```\n\n##### AAF cutoffs\nOption `--aaf-bins` specifies the AAF upper bounds used to generate burden masks \n(**AAF and not MAF [minor allele frequency] is used when deciding which variants go into a mask)**.\nBy default, a mask based on singleton sites are always included.\n\nFor example, `--aaf-bins 0.01,0.05` will generate 3 burden masks for AAFs in \n[0,0.01], [0,0.05] and singletons.\n\n\n#### SKAT/ACAT tests\n\nThe option `--vc-tests` is used to specify the gene-based tests to run. \nBy default, these tests use all variants in each mask category. \nIf you'd like to only include variants whose AAF is below a given threshold \n,e.g. only including rare variants, you can use `--vc-maxAAF`.\n\n| Test  | Name in **regenie**    | Description |\n| :----- |:-------------- | :--|\n| SKAT    | skat | Variance component test |\n| SKATO    | skato | Omnibus test combining features of SKAT and Burden|\n| SKATO-ACAT   | skato-acat | Same as SKATO but using Cauchy combination method to maximize power across SKATO models|\n| ACATV   | acatv | Test using Cauchy combination method to combine single-variant p-values|\n| ACATO   | acato | Omnibus test combining features of ACATV, SKAT and Burden|\n| ACATO-FULL   | acato-full | Same as ACATO but using the larger set of SKATO models used in the SKATO test|\n\n\nFor example, `--vc-tests skato,acato-full` will run SKATO and ACATO \n(both using the default grid of 8 `rho` values for the SKATO models) and \nthe p-values for SKAT, SKATO, ACATV and ACATO will be output.\n\nUltra-rare variants (defined by default as MAC$\\le$10, see `--vc-MACthr`) are collapsed into\na burden mask which is then included in the tests instead of the individual variants.\n\nFor additional details on the tests, [see here](../overview/#step-2-gene-based-testing).\n\n\n#### Joint test for burden masks\n\nThe following tests can be used to combine different burden masks \ngenerated using different annotation classes as well as AAF thresholds.\n\n| Test  | Name in **regenie**    | QT | BT | Robust to LD | Assumes same effect direction |\n| :----- |:--------------: |:---: |:---: | :---: |:---: |\n| Minimum P-value    | minp | $\\checkmark$ | $\\checkmark$ | $\\times$ | $\\times$       |\n| ACAT |acat  | $\\checkmark$|$\\checkmark$|$\\checkmark$|$\\times$        |\n| SBAT |sbat  | $\\checkmark$|$\\times$|$\\checkmark$|$\\checkmark$        |\n\nThe ACAT test combines the p-values of the individual burden masks using the Cauchy combination method \n(see ref. 14 [here](../overview/#fn:14)).\nThe SBAT test is described into more detail [here](../overview/#sparse-burden-association-test).\n\nIf you only want to output the results for the joint tests (ignore the marginal tests), use `--joint-only`.\n\n\n#### LOVO/LODO schemes\n\nThe leave-one-variant-out (LOVO) scheme takes all sites going into a mask,\nand builds LOVO masks \nby leaving out one variant at a time from the full set of sites. \nThe mask including all sites will also be computed.\n\nThe argument for `--mask-lovo` is a comma-separated list which \nconsists of \nthe set/gene name, \nthe mask name, \nand the AAF cutoff (either 'singleton' or a double in (0,1)).\n\nIf using a 4-column annotation file, then `--mask-lovo` should have \nthe gene name, \nthe domain name,\nthe mask name, \nand the AAF cutoff.\nSo the LOVO masks will be generated for a specific gene domain.\n\nThe leave-one-domain-out (LODO) scheme (specified by `--mask-lodo`) \ntakes all sites going into a mask and builds a LODO mask for each domain specified for the gene\nby excluding all variants in the domain. \nThe full mask including all sites will also be computed. \nThe argument for `--mask-lodo` should have the gene name, the mask name and the AAF cutoff.\n\n\n#### Writing mask files \nBurden masks built in **regenie** can be written to PLINK bed format. \nIf the input genetic data contains dosages, \nthe masks dosages will be converted to hard-calls prior to being written to file \nand these hard-calls will be used for the association testing.\n\nThe PLINK bed file is written using 'ref-last' encoding (i.e. REF allele is \nlisted last in the bim file).\n\nNote that this cannot be used with the LOVO/LODO schemes.\n\n### Options\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--aaf-bins`| FLOAT,...,FLOAT| Optional| comma-separated list of AAF upper bounds to use when building masks [default is a single cutoff of 1%]|\n|`--build-mask`| STRING| Optional| build masks using the maximum number of ALT alleles across sites (`'max'`; the default), or the sum of ALT alleles (`'sum'`), or thresholding the sum to 2 (`'comphet'`)|\n|`--singleton-carrier`| FLAG| Optional| to define singletons as variants with a single carrier in the sample (rather than alternative allele count=1)|\n|`--set-singletons`| FLAG| Optional| to use 3rd column in AAF file to specify variants included in singleton masks|\n|`--write-mask`| FLAG| Optional| write mask to PLINK bed format **(does not work when building masks with 'sum')**|\n|`--vc-tests`| STRING| Optional| comma-separated list of SKAT/ACAT-type tests to run|\n|`--vc-maxAAF`| FLOAT| Optional| AAF upper bound to use for SKAT/ACAT-type tests [default is 100%]|\n|`--skat-params`| FLOAT,FLAT| Optional| a1,a2 values for the single variant weights computed from Beta(MAF,a1,a2) used in SKAT/ACAT-type tests [default is (1,25)]|\n|`--skato-rho`| FLOAT,...,FLOAT| Optional| comma-separated list of $\\rho$ values used for SKATO models|\n|`--vc-MACthr`| FLOAT| Optional| MAC threshold below which to collapse variants in SKAT/ACAT-type tests [default is 10]|\n|`--joint`| STRING| Optional| comma-separated list of joint tests to apply on the generated burden masks|\n|`--rgc-gene-p`| FLAG| Optional| to compute the GENE_P test|\n|`--skip-test`| FLAG| Optional| to skip computing association tests after building masks and writing them to file|\n|`--mask-lovo`| STRING| Optional| to perform LOVO scheme|\n|`--lovo-snplist`| FILE| Optional| File with list of variants for which to compute LOVO masks|\n|`--mask-lodo`| FLAG| Optional| to perform LODO scheme|\n|`--weights-col`| INT| Optional| column index (1-based) in annotation file to use custom weights in gene-based tests|\n|`--write-mask-snplist`| FLAG| Optional| to write list of variants that went into each mask to file|\n|`--check-burden-files`| FLAG| Optional| to check the concordance between annotation, set list and mask files [see [below](https://rgcgithub.github.io/regenie/options/#checking-input-files)]|\n|`--strict-check-burden`| FLAG| Optional|to exit early if the annotation, set list and mask definition files dont agree [see [below](https://rgcgithub.github.io/regenie/options/#checking-input-files)]|\n\nThree rules can be used to build masks with `--build-mask` as shown in diagram below, \nwhere the last rule `comphet` applies a threshold of 2 to the mask from the `sum` rule.\n\n![MaskRules](img/mask_rules.png){ style=\"text-align:center;padding: 10px;width:60%;display: block;margin-left: auto;margin-right: auto\"}\n\n### Output\n**With `--out file`**\n\nResults are written in separate files for each phenotype\n`file_<phenotype1_name>.regenie,...,file_<phenotypeP_name>.regenie` \nwith the same output format mentioned [above](https://rgcgithub.github.io/regenie/options/#output).\nAdditionally, a header line is included (starting with `##`)\nwhich contains mask definition information.\n\nMasks will have name `<set_name>.<mask_name>.<AAF_cutoff>` with the \nchromosome and physical position having been defined in the set list file, \nand the reference allele being `ref`, and the alternate allele corresponding to \n`<mask_name>.<AAF_cutoff>`.\nWhen using `--mask-lovo`, the mask name will be the same as above but have suffix\n`_<variant_name>` to specify the variant which was excluded when building the mask.\n\nWith `--build-mask sum`, the reported mask AAF corresponds to the average \nAAF across sites included in the mask.\n\nIf using `--write-mask`, the masks will be saved to \n`file_masks.{bed,bim,fam}` and if using `--write-mask-snplist`, \nthe list of variants included in each mask will be saved to `file_masks.snplist`. \n\nWhen using `--rgc-gene-p`, it will apply the single p-value per gene GENE_P strategy \nusing all masks (see [here](../overview/#gene_p) for details).\n\n\n### Example run\nUsing Step 1 results from the [Step 1 command above](https://rgcgithub.github.io/regenie/options/#getting-started), we use the following command to build and test masks in Step 2\n```\n./regenie \\\n  --step 2 \\\n  --bed example/example_3chr \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin.txt \\\n  --bt \\\n  --remove example/fid_iid_to_remove.txt \\\n  --firth --approx \\\n  --pred fit_bin_out_pred.list \\\n  --anno-file example/example_3chr.annotations \\\n  --set-list example/example_3chr.setlist \\\n  --mask-def example/example_3chr.masks \\\n  --aaf-bins 0.1,0.05 \\\n  --write-mask \\\n  --bsize 200 \\\n  --out test_bin_out_firth\n```\n\nFor each set, this will produce masks using 3 AAF cutoffs (singletons, 5% and 10% AAF). \nThe masks are written to PLINK bed file (in `test_bin_out_firth_masks.{bed,bim,fam}`) \nand tested for association with each binary trait using Firth approximate test \n(summary stats in `test_bin_out_firth_<phenotype_name>.regenie`). \nNote that the test uses the whole genome regression LOCO PRS from Step 1 of **regenie** (specified by `--pred`).\n\n### Checking input files\nTo assess the concordance between the input files for building masks, you can use  `--check-burden-files` which will generate a report in `file_masks_report.txt` containing:\n \n1. for each set, the list the variants in the set-list file which are unrecognized (not genotyped \nor not present in annotation file for the set)\n\n2. for each mask, the list of annotations in the mask definition file which are not in the annotation file\n\nAdditionally, you can use `--strict-check-burden` to\nenforce full agreement between the three files \n(if not, program will terminate) :\n\n1. all genotyped variants in the set list file must be in the annotation file (for the corresponding set)\n\n2. all annotations in the mask definition file must be present in the annotation file\n\n## Interaction testing\nStarting from **regenie** v3.0, you can perform scans for interactions (either GxE or GxG). \nFor GxE tests, the interacting variable should be part of the covariate file \n(if it is categorical, specify it in `--catCovarList`).\nFor GxG tests, the interacting variant can be part of the input genetic file \nor it can be present in an external file (see `--interaction-snp-file`)\n\n### Options\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--interaction`| STRING| Optional| to run GxE test specifying the interacting covariate (see below)|\n|`--interaction-snp`| STRING| Optional| to run GxG test specifying the interacting variant (see below)|\n|`--interaction-file`| FORMAT,FILE| Optional| external genotype file containing the interacting variant [FORMAT can be bed/bgen/pgen and FILE is the file name (bgen) or file prefix (bed/pgen)]|\n|`--interaction-file-sample`| FILE| Optional| accompagnying sample file for BGEN format|\n|`--interaction-file-reffirst`| FLAG| Optional| use the first allele as the reference for BGEN or PLINK BED formats|\n|`--no-condtl`| FLAG| Optional| to print out all the main effects from the interaction model (see Output section below)|\n|`--force-condtl`| FLAG| Optional| to include the interacting SNP as a covariate in the marginal test (see Output section below)|\n|`--rare-mac`| FLOAT| Optional| minor allele count (MAC) threshold below which to use HLM method for QTs [default is 1000]|\n\nFor GxE tests where the interacting variable is categorical, you can specify the baseline level using `--interaction VARNAME[BASE_LEVEL]` (e.g. `--interaction BMI[<25]`). Otherwise, the first value found in the covariate file will be used as the baseline level.\n\nFor GxG tests, the default coding for the interacting variant is additive. If you would like to use dominant/recessive/categorical coding, use `--interaction-snp SNP_NAME[dom/rec/cat]` (for example with dominant coding, `--interaction-snp SNPNAME[dom]` will allow for separate effects between carriers vs non-carriers of the interacting variant). The allowed values in the brackets are `add/dom/rec/cat`.\n<!---\nforce-ltco : to use a Leave-Two-Chromosome-Out (LTCO) scheme specifying the chromosome to remove from the LOCO PRS of Step 1\n--->\n\n\n### Output\nThe result files will contain multiple lines for the same variant corresponding to the\ndifferent null hypotheses being tested in the [interaction model](../overview/#step-2-interaction-testing)\n$$\ng(\\mu) = E\\alpha + G\\beta + (G\\odot E)\\gamma\n$$\n\nThe suffix in the \"TEST\" column indicates which hypothesis is being tested:\n\n* \"ADD\": marginal test where the interacting variable has **not** been added as a covariate $-$ this corresponds to $H_0: \\beta = 0$ given $\\alpha=\\gamma = 0$\n    * this is only printed for GxG tests by default, or GxE using `--no-condtl`\n* \"ADD-CONDTL\": marginal test where the interacting variable has been added as a covariate (default for GxE tests) $-$ this corresponds to $H_0: \\beta = 0$ given $\\gamma = 0$\n    * this is only printed for GxE tests by default, or GxG using `--force-condtl`\n* \"ADD-INT_VAR\": test for the main effect of the interaction variable (\"VAR\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\alpha = 0$\n    * this is only printed for GxG tests by default, or GxE using `--no-condtl`\n    * If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \"ADD-INT_BMI=25-30\" and \"ADD-INT_BMI=30+\" where baseline level is \"$<$25\")\n    * will also output the effect of $E^2$ in \"ADD-INT_VAR^2\" if the trait is binary (see [here](../overview/#step-2-interaction-testing))\n* \"ADD-INT_SNP\": test for main effect of tested SNP in the interaction model $-$ this corresponds to $H_0: \\beta = 0$\n* \"ADD-INT_SNPxVAR\": test for interaction effect (\"VAR\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\gamma = 0$\n    * If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \"ADD-INT_SNPxBMI=25-30\" and \"ADD-INT_SNPxBMI=30+\" where baseline level is \"$<$25\")\n        * With Firth correction, only the effect sizes for the interaction effect at each level will be reported and the LRT p-value will only be computed for the joint test on the interaction effects\n* \"ADD-INT_$k$DF\": joint test for main and interaction effect of tested variant ($k\\ge2$ for categorical interacting variables) $-$ this corresponds to $H_0: \\beta = \\gamma = 0$\n\n\n## Conditional analyses\nStarting from **regenie** v3.0, you can specify genetic variants to add to the set of covariates when performing association testing. \nThis works in both step 1 and 2, and can be used in conjunction with the gene-based tests or the interactiong testing feature.\nThe conditioning variants will automatically be ignored from the analysis.\n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--condition-list`| FILE| Required| file with list of variants to condition on|\n|`--condition-file `| FORMAT,FILE| Optional| get conditioning variants from external file (same argument format as `--interaction-file`)|\n|`--condition-file-sample `| FILE| Optional| accompagnying sample file for BGEN format|\n|`--max-condition-vars `| INT| Optional| maximum number of conditioning variants [default is 10,000]|\n\n## Survival analyses\n\nStarting from **regenie** v4.0, you can conduct survival analysis for time-to-event data. \n\n### Phenotype file format\n\nIn this small example, there are 5 samples, and the event of interest is the diagnosis of cancer over a period of 10 years.\n\n![Survival_eg](img/survival_eg.png)\n\nSample 1 is diagnosed with cancer during the study; the `time` variable is the number of years until the sample is diagnosed with cancer. Sample 2 drops out of the study; sample 3 dies during the study; sample 4 and 5 complete the study without being diagnosed with cancer; they are all right-censored, and the `time` variable is the last encounter or death time. The corresponding phenotype file is \n```\nFID IID Time Cancer\n1 1 6 1\n2 2 5 0\n3 3 2 0\n4 4 10 0\n5 5 10 0\n```\n\n### Required options\n\nSurvival analysis in **regenie** requires the following specific options in step 1, step 2 and gene-based burden tests.\n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--t2e`| FLAG | Required| specify the traits are time-to-event data|\n|`--phenoColList` |\tSTRING | Required |\tComma separated list of time names to include in the analysis |\n|`--eventColList` |\tSTRING | Required |\tComma separated list of columns in the phenotype file to include in the analysis that contain the events. These event columns should have 0=no event,1=event,NA=missing |\n\nFor the example above, the regenie call is\n```\n./regenie \\\n--t2e \\\n--phenoColList Time \\\n--eventColList Cancer \\\n...\n```\n\nFor a phenotype file containing multiple time-to-event traits, the order of censor variables listed in `--eventColList` should match the order of time names specified in `--phenoColList`. For example, the phenotype file is\n```\nFID IID Cancer_Time Cancer Asthma_Time Asthma\n1 1 6 1 4 0\n2 2 5 0 8 1\n```\nThe regenie call is\n```\n./regenie \\\n--t2e \\\n--phenoColList Cancer_Time,Asthma_Time \\\n--eventColList Cancer,Asthma \\\n...\n```\n\nThe output format is the same as the [output file for quantitative and binary traits](#output), with the `BETA` column containing the estimated harzard ratio (on log scale).\n\n## LD computation\nREGENIE can calculate LD between a group of variants on the same chromosome. \n\n| Option | Argument | Type | Description|\n|---|-------|------|----|\n|`--compute-corr`| FLAG| Required| compute LD matrix and write to binary file|\n|`--output-corr-text`| FLAG| Optional| write Pearson correlations to text file|\n|`--forcein-vars`| FLAG| Optional| retain all variants specified in `--extract` which absent from the genetic data in the LD matrix|\n|`--ld-extract`| FILE| Optional| file listing single variants as well as burden masks to include in LD matrix (see below)|\n\nNote that this can be quite memory intensive for large groups of variants (memory ~$8M^2$ bytes for $M$ variants).\n\n\n### Output\n\n**Using`--step 2 --out file`** \n\nBy default, the LD matrix is stored in a binary compressed file `file.corr` and\nthe list of variants corresponding to the columns of the LD matrix are stored in `file.corr.snplist`.\nThe R script [scripts/parseLD.r](https://github.com/rgcgithub/regenie/blob/master/scripts/parseLD.r) contains a function which returns the LD matrix, e.g. `get.corr.sq.matrix(\"file.corr\")`.\nUsing `--output-corr-text` will write the Pearson correlations to a text file instead.\n\nWhen using `--forcein-vars`, variants not present in the genetic data will be added as extra column/rows in the LD matrix. \nFor these variants, the diagonal entries in the matrix will be set to 1 and the off-diagonal entries 0.\n\n**Using`--ld-extract info.txt`** \n\nThis option is used compute LD between single variants and burden masks generated on-the-fly in REGENIE; it requires specifying [annotation files](#annotation-input-files).\nThe file `info.txt` should have three columns: variant type ('sv' or 'mask'), variant name, followed by the set (e.g. gene) name (this can be 'NA' for single variant). For example, it would look like:\n\n```\nsv 1:1111:A:G NA \nsv 1:2222:C:T NA \nmask PCSK9.M1.0.01 PCSK9\n.\n```\n\nNote that the set and mask names must match that used in REGENIE based on provided annotation files and allele frequency cutoffs. Variant/masks not present in the data will be kept in the LD matrix but will have the corresponding correlations set to 0.\n"
  },
  {
    "path": "docs/docs/overview.md",
    "content": "## Overview \n\nThis page provides an overview of the models and methods implemented in\n**regenie**. A full description is given in our [paper](https://doi.org/10.1038/s41588-021-00870-7).\n\n**regenie** carries out genome-wide association tests for both\n  quantitative and binary (case-control) phenotypes. Starting at **regenie v4.0**, it also supports survival analysis for time-to-event data (See [Survival analysis](#survival-analysis) section below). It is designed to handle\n\n1. A large number of samples. For example, it is ideally suited to the\n[UK Biobank](https://www.ukbiobank.ac.uk/) dataset with 500,000 samples.\n2. A combination of genetic data from a micro-array, imputation and\nexome sequencing.\n3. A large number of either quantitative traits (QTs), binary\n(case-control) traits (BTs), or time-to-event traits (TTEs)\n4. Accounting for a set of covariates\n\nAn overview of the **regenie** method is provided in the figure below.\nEssentially, **regenie** is run in 2 steps:\n\n1. In the first step a subset of genetic markers are used to fit a\nwhole genome regression model that captures a good fraction of the\nphenotype variance attributable to genetic effects.\n2. In the second step, a larger set of genetic markers (e.g. imputed\nmarkers) are tested for association with the phenotype *conditional\nupon* the prediction from the regression model in Step 1, using a\nleave one chromosome out (LOCO) scheme, that avoids proximal\ncontamination.\n\n![Rflow](img/regenie_overview2.png){ style=\"text-align:center;padding: 10px;width:70%;border: 1px solid #ddd;display: block;margin-left: auto;margin-right: auto\"}\n<br>\n\n### Step 1 : Whole genome model\n\nIn Step 1 a whole genome regression model is fit at a subset of the\ntotal set of available genetic markers. These are typically a set of\nseveral hundred thousand (\\(M\\)) common markers from a micro-array. \n\n#### Ridge regression (level 0)\n\n**regenie** reads in the \\(M\\) markers in blocks of \\(B\\) consecutive markers (`--bsize` option).\n\nIn each block, a set of ridge regression\n  predictors are calculated for a small range of \\(J\\) shrinkage\n  parameters \\( \\{\\tau_1,\\ldots, \\tau_J\\} \\) (using `--l0` option [default is 5]) .\n\nFor a block of SNPs in a \\(N\\times B\\) matrix \\(X\\) and \\(N\\times 1\\)\nphenotype vector \\(Y\\) we calculate  \\(J\\)  predictors\n\n\\( X\\widehat{\\beta}_1 \\ldots, X\\widehat{\\beta}_J \\) \n\nwhere\n\n\\( \\widehat{\\beta}_j = (X^TX+\\tau_j I)^{-1}X^T Y\\) \n\n\nThe idea behind using a range of shrinkage\n  values is to capture the unknown number and size of truly\n  associated genetic markers within each window. The ridge regression takes account of Linkage disequilibrium (LD) within each block.\n\nThese predictors are stored in place of the\n  genetic markers in matrix \\(W\\), providing a large reduction in data size. \nFor example, if\n  \\(M=500,000\\) and \\(B=1,000\\) and \\(J=5\\) shrinkage parameters are used, then the\n  reduced dataset will have \\(JM/B=2,500\\) predictors.\n\nRidge regression is used in this step for both quantitative and binary\ntraits. \n\n#### Cross-validation (level 1)\n\nThe predictors generated by the ridge regression step will all be\npositively correlated with the phenotype. Thus, it is important to\naccount for that correlation when building a whole genome wide\nregression model.\n\nWhen analyzing a quantitative trait we use a second level of ridge\nregression on the full set of \\(JM/B\\) predictors in \\(W\\). This\napproach is inspired by the method of stacked regressions[@RN293].\n\nWe fit the ridge regression for a range of shrinkage parameters (`--l1` option) and choose a single\nbest value using K-fold cross validation scheme. This assesses the\npredictive performance of the model using held out sets of data, and aims to control\nany over-fitting induced by using the first level of ridge regression\nto derive the predictors.\n\nIn other words, we fit the model\n\n$$Y = W\\alpha + \\epsilon$$\n\nwhere \\(\\alpha\\) is estimated as \\(\\widehat{\\alpha} = (W^TW + \\phi\\,I)^{-1}W^TY\\)\nand the parameter \\(\\phi\\) is chosen\nvia K-fold cross-validation. \n\nFor binary traits, we use a logistic ridge regression model to combine\nthe predictors in \\(W\\)\n\n$$\\text{logit}(p) = \\mu + W\\alpha$$\nwhere \\(p\\) is the probability of being a case and \\(\\mu\\) captures the effects of non-genetic covariates.\n\n#### Genetic predictors and LOCO\n\nOnce \\(\\alpha\\) has been estimated we can construct the genetic prediction\n\n$$Z = W\\widehat{\\alpha}$$\n\nAlso, since each column of the matrix \\(W\\) will be associated with a\nchromosome we can can also construct a genetic prediction ignoring any\none chromosome, by simply ignoring those columns when calculating the\nprediction. This is known as the Leave One Chromosome Out (LOCO)\napproach. These LOCO predictions are valuable at Step 2 of **regenie**\nwhen each marker is tested for associated (see below).\n\nFor binary traits, it is the linear predictor in a logistic regression\nmodel using LOCO that is saved,\nand used as an offset when fitting logistic regression models to test\nfor association.\n\n#### Multiple phenotypes\n\nThe dimension reduction step using ridge regression can be used very\nefficiently to model multiple phenotypes at once. The ridge regression\nequations for a block of SNPs in a \\(N\\times B\\) matrix \\(X\\) and a single phenotype\nin a \\(N\\times 1\\) matrix \\(Y\\) take the form\n\n\\( \\widehat{\\beta} = AY \\) where \\(A = (X^TX+\\tau I)^{-1}X^T\\) **does not depend on \\(Y\\)**\n\nIf instead \\(P\\) phenotypes are stored in columns of a \\(N\\times P\\) matrix \\(Y\\), then the matrix \\(A\\)\ncan be applied jointly to calculate the matrix of estimates \\(\\widehat{\\beta} = AY\\),\nand this can take advantage of parallel linear algebra\nimplementations in the Eigen matrix library.\n\n#### Covariates\n\nCovariates, such as age and sex and batch effect variables can be\nincluded in the **regenie** model.\n\nFor quantitative traits, any covariates are regressed out of\nphenotypes and genotypes before fitting the model.\n\nFor binary traits, we fit a null model with only covariates, and use\npredictions from that model as an offset when fitting the logistic\nregression model.\n\n### Step 2 : Single-variant association testing\n\nIn Step 2, a larger set of markers are tested for association with the\ntrait (or traits). As with Step 1, these markers are also read in\nblocks of \\(B\\) markers, and tested for association. This avoids having\nto have all markers stored in memory at once.\n\n#### Quantitative traits\n\nFor quantitative traits, we use a linear regression model for\nassociation testing.\n\n- Covariates are regressed out of the phenotypes and genetic markers.\n- The LOCO predictions from Step 1 are removed from the phenotypes.\n- Linear regression is then used to test association of the residualized\nphenotype and the genetic marker.\n- Parallel linear algebra operations in the Eigen library are used\n  where possible.\n\n#### Binary traits\n\nFor binary traits, logistic regression score test is used to test association of\nthe phenotype and the genetic marker.\n\nThe logistic regression model includes the LOCO predictions from\n  Step 1 *as an offset*. Covariates are included in the linear\n  predictor in the usual way.\n\nWhen the case-control ratio is imbalanced, standard association tests\ndon't control Type I error well at rare genetic markers. **regenie**\nhas two options to handle this\n\n##### Firth logistic regression\n\nStandard maximum likelihood estimates are generally biased. The Firth\ncorrection[@RN248]\nremoves much of the bias, and results in better calibrated test\nstatistics. The correction involves adding a penalty term to the\nlog-likelihood,\n\n$$ \\widetilde{l}(\\theta) = l(\\theta) + {1 \\over 2} \\log I|\\theta| $$\n\nwhere the penalty term corresponds to the use of Jeffrey's Prior. \nThis prior has the effect of shrinking the effect size towards zero.\n\n**regenie** uses a Firth correction when the p-value from the standard\n  logistic regression test is below a threshold (default 0.05). \nIt also includes a novel, accurate and fast approximate Firth correction which \nis ~60x faster than the exact Firth correction\n(see the option `--firth`). \n\nThe p-value reported in **regenie** is based on a likelihood ratio test (LRT), and we use the Hessian of the log-likelihood without the penalty term to estimate the standard error (SE). \nThis may cause an issue in meta-analyses with rare variants, as the effect size estimate and SE may not match with the LRT p-value. \nHence, we added an option `--firth-se` to report a SE computed instead from the effect size estimate and the LRT p-value.\n\n##### Saddle point approxiation (SPA) test\n\nThe SPA test approximates the null distribution of the test statistic\nby approximating the cumulant generating function of the\ntest  statistic,  which  involves  all  of  the  higher  order\nmoments[@RN488]$^,$[@RN245]. This provides a better estimation of the tail probabilities compared to using\nstandard asymptotic theory which relies on the normal approximation and uses only the\nfirst two moments of the dsitribution. A tail probability is obtained as \n\n$$  \n\\begin{align*}  \nP&(T < t_{\\text{obs}}) \\approx\n    \\Phi(z), \\text{ where,}\\\\\n    z &= w + \\frac{1}{w}\\log{\\frac{v}{w}}\\\\\n    w &= \\text{sign}(\\delta^*)\\sqrt{ 2 [ t_{\\text{obs}}\\, \\delta^* - K(\\delta^*)}],\\,\n    v = \\delta^*\\sqrt{K''(\\delta^*)}\n\\end{align*}\n$$\n\nand \\(K(\\delta)\\) is the cumulant generating function of the test statistic and \\(\\delta^*\\) \nis obtained by using a root-finding algorithm for \\(K'(\\delta)=t_{\\text{obs}}\\). As this approximation\nhas been found not to work very well for ultra-rare variants, a minimum minor \nallele count (MAC) is used to filter out these variants before testing (option `--minMAC`).\n\n### Step 2 : Gene-based testing\n\nInstead of performing single-variant association tests, multiple variants can be aggregated\nin a given region, such as a gene, using the following model\n\n$$g(\\mu) = w_1G_1\\beta_1 + \\dots + w_mG_m\\beta_m$$\n\nwhere \\(G_i\\)'s represent the single variants included in the test,\n\\(w_i\\)'s and \\(\\beta_i\\)'s are weights and effect sizes, respectively, for each variant,\nand \\(g(.)\\) is a link function for the phenotypic mean \\(\\mu\\). \nWe also denote by \\(S_i\\) the score statistics obtained from the \n[single-variant tests](#step-2-single-variant-association-testing).\nThis can be especially helpful when testing rare variants as single-variant \ntests usually have lower power performance.\n\nTo avoid inflation in the gene-based tests due to rare variants as well as reduce computation time, we have implemented the collapsing approach\nproposed in SAIGE-GENE+[@RN492], where ultra-rare variants are aggregated into a mask.\nFor highly imbalanced binary traits, SPA/Firth correction can be used to calibrate the test statistics in the\ngene-based tests as proposed in Zhao et al. (2020)[@RN452] using `--firth/--spa`. \n\n#### Burden tests\nBurden tests, as defined in Lee et al. (2014)[@RN487], assume \\(\\beta_i=\\beta\\; \\forall i\\), where \\(\\beta\\) is a fixed coefficient, which then leads to the test statistic\n$$Q_{BURDEN} = \\left(\\sum_i w_iS_i\\right)^2$$\nThese tests collapse variants into a single variable which is then tested for association with the phenotype. Hence, they are more powerful when variants have effects in the same direction and of similar magnitude. \nIn **regenie**, multiple options are available to aggregate variants together into a burden mask beyond the linear combination above ([see here](../options/#options_1)). \nFor example, the burden tests that were employed in Backman et al. (2021)[@RN457]\nuse the default strategy in **regenie** of collapsing variants by taking\nthe maximum number of rare alleles across the sites.\n\n#### Variance component tests\nUnlike burden tests, SKAT[@RN386] assume the effect sizes $\\beta_i$ come from an arbitrary\ndistribution with mean 0 and variance $\\tau^2$ which leads to the test statistic\n$$Q_{SKAT} = \\sum_i w_i^2S_i^2$$\nHence, SKAT can remain powerful when variant effects are in opposite directions.\n\nThe omnibus test SKATO[@RN454] combines the SKAT and burden tests as \n$$Q_{SKATO} = \\rho Q_{BURDEN} + (1-\\rho) Q_{SKAT}$$\nSo setting $\\rho=0$ corresponds to SKAT and $\\rho=1$ to the burden test.\nIn practice, the parameter $\\rho$ is chosen to maximize the power \n[**regenie** uses a default grid of 8 values {$0, 0.1^2, 0.2^2, 0.3^2, 0.4^2, 0.5^2, 0.5, 1$}\nand set the weights $w_i = Beta(MAF_i,1,25)$].\n\nTo obtain the p-value from a linear combination of chi-squared variables, **regenie** uses Davies' exact method[@RN524] by default.\nFollowing Wu et al (2016)[@RN514], **regenie** uses Kuonen's saddlepoint approximation method[@RN526] when the Davies' p-value\nis below 1e-5 and if that fails, it uses Davies' method with more stringent convergence parameters (lim=1e5,acc=1e-9). \n\nThe original SKATO method uses numerical integration when maximizing power across the \nvarious SKATO models that use different values for $\\rho$. \nWe also implement a modification of SKATO, named SKATO-ACAT, \nwhich instead uses the Cauchy combination method[@RN482] \nto combine the p-values from the different SKATO models.\n\n\n#### Cauchy combination tests\nThe ACATV[@RN339] test uses the Cauchy combination method ACAT to combine single variant p-values $p_i$ as\n$$Q_{ACATV} = \\sum_i \\widetilde{w}_i^2\\tan{\\{\\pi(0.5 - p_i)\\}}$$\nwhere we set $\\widetilde{w}_i = w_i \\sqrt{MAF(1-MAF)}$. \nThis test is highly computationally tractable and is robust to correlation between the single variant tests.\n\nThe omnibus test ACATO[@RN339] combines ACATV with the SKAT and burden tests as \n$$\nQ_{ACATO} = \n\\frac{1}{3}\\tan{\\{\\pi(0.5 - p_{ACATV})\\}}+\n\\frac{1}{3}\\tan{\\{\\pi(0.5 - p_{Burden})\\}}+\n\\frac{1}{3}\\tan{\\{\\pi(0.5 - p_{SKAT})\\}}\n$$\n\nwhere unlike the original ACATO test, we only use one set of the weights $w_i$.\nAlternatively, we augment the test to include an extended set of SKATO models beyond SKAT and Burden\n(which correspond to $\\rho$ of 0 and 1 in SKATO respectively) and use the default SKATO grid of 8 values for $\\rho$.\n\n#### Sparse Burden Association Test\n**regenie** can generate burden masks which are obtained by aggregating single variants\nusing various annotation classes as well as allele frequency\nthresholds. The Sparse Burden Association Test (SBAT)[@RN703] combines these burden masks\nin a joint model imposing constraints of same direction of effects\n$$\n\\mu = \\sum_{\\text{mask }i} M_i\\gamma_i\n$$\nwhere $M_i$ represent a burden mask and we solve\n$$\n\\underset{\\boldsymbol\\gamma}{\\min} || Y - \\sum_i M_i\\gamma_i||^2 \n\\text{ subject to } \\gamma_i \\ge 0 \\text{ for all } i\n$$\n\nThe SBAT method tests the hypothesis $H_0: \\gamma_i=0$ for all $i$ vs.\n$H_1: \\gamma_i > 0$ for some $i$.\nBy using this joint model, the SBAT test accounts for the correlation structure between the burden masks \nand with the non-negative constraints,\nit can lead to boost in power performance when multiple burden masks are causal and have concordant effects.\nThis test has the nice property that it combines \nmodel selection of the masks (via the sparsity induced by the non-negative assumption) \nwith model inference (it is well calibrated and powerful).\n\n\n#### GENE_P\nAs the different gene-based tests in REGENIE can be more powerful under different genetic architectures,\nwe propose a unified strategy, named GENE_P, that combines the strengths of these tests.\nIt uses ACAT to combine the p-values of the SKATO, ACATV, Burden and SBAT tests \nand obtain an overall assessment of significance for a genetic region (e.g. gene).\nThe diagram below illustrates the GENE_P test using 4 masks (i.e. combination of variant annotations) and 3 allele frequency cutoffs \nwhen performing gene-based tests.\n\n![GENE_P_flow](img/GENE_P_diagram.png){ style=\"text-align:center;padding: 10px;width:70%;border: 1px solid #ddd;display: block;margin-left: auto;margin-right: auto\"}\n<br>\n\n\n\n### Step 2 : Interaction testing\n\nThe GxE tests are of the form\n$$\ng(\\mu) = E\\alpha + G\\beta + (G\\odot E)\\gamma\n$$\nwhere $E$ is an environmental risk factor and $G$ is a marker of interest,\nand $\\odot$ represents the Haddamard (entry-wise) product of the two.\nThe last term in the model allows for the variant to have different effects across values of the risk factor. \n*Note: if $E$ is categorical, we use a dummy variable for each level of $E$ in the model above.*\n\nWe can look at the following hypotheses:\n\n0. $H_0: \\beta = 0$ given $\\gamma = 0$, which is a marginal test for the SNP\n1. $H_0: \\beta = 0$, which is a test for the main effect of the SNP in the full model\n2. $H_0: \\gamma = 0$, which is a test for interaction\n3. $H_0: \\beta = \\gamma = 0$, which tests both main and interaction effects for the SNP\n\nMisspecification of the model above, \nsuch as in the presence of heteroskedasticity, or \nthe presence of high case-control imbalance can lead to inflation in the tests.\nRobust (sandwich) standard error (SE) estimators[@RN485] can be used to adress model misspecification however, \nthey can suffer from inflation when testing rare variants\nor in the presence of high case-control imbalance[@RN373]$^,$[@RN320].\n\nIn **regenie**, we use a hybrid approach which combines:\n\n* Wald test with sandwich estimators\n* Wald test with heteroskedastic linear models (for quantitative traits)\n* LRT with penalized Firth regression (for binary traits)\n\nFor quantitative traits,\nwe use the sandwich estimators HC3 to perform a Wald test for variants whose minor allele count (MAC) is above 1000 (see `--rare-mac`). \nFor the remaining variants, we fit a heteroskedastic linear model (HLM)[@RN417]\n$$\nY = E\\alpha + E^2\\zeta + G\\beta + (G\\odot E)\\gamma + \\epsilon\n$$\n\nwhere we assume $\\epsilon \\sim N(0, D)$ where $D$ is a diagonal matrix with entries $\\sigma^2\\exp{(1 + E\\theta_1 + E^2\\theta_2)}$.\nThis formulation allows for the phenotypic variance to also depend on the risk factor $E$.\nBy incorporating both the linear and quadratic effect of $E$ in the mean and variance of $Y$, \nthis model provides robustness to heteroskedasticity \n(*Note: the $E^2$ terms are only added when $E$ is quantitative*). \nWald tests are then performed for the null hypotheses listed above.\n\n\nFor binary traits, we consider the following interaction model\n$$\n\\text{logit}(\\mu) = E\\alpha + E^2\\zeta + G\\beta + (G\\odot E)\\gamma\n$$\nwhere we also include a non-linear effect for $E$ (not if categorical).\nThe sandwich estimator HC3 is used in a Wald test for variants whose MAC is above 1000 (see `--rare-mac`) otherwise the model-based standard errors are used.\nWhen Firth is specified, we only apply the Firth correction using LRT if the p-value for the interaction term $\\gamma$ from the Wald test is below a specified threshold (see `--pThresh`). So the added $E^2$ term as well as the use of the Firth penalty \nhelp with case-control imbalance and model misspecification for the effect of $E$ on the phenotype. \n\n### Survival analysis\n\nStarting with **regenie v4.0**, we have enabled survival analysis, improving the power for analyzing common diseases where time-to-event data is available by leveraging the Cox Proportional Harzard model. We assume that samples without an event are right-censored, i.e. the survival time is only known to be greater than a certain value. It is important to [encode this information correctly into the phenotypes](/regenie/options/#survival-analyses).\n\n#### Step 1: Whole genome model using cox ridge regression\n\nIn step 1, Level 0 is run using [linear ridge regression](#ridge-regression-level-0) with the `time` variable taken as the response. In Level 1, instead of linear/logistic ridge regression, we use Cox Ridge regression[@simon2011regularization] to combine the predictions $W$ from Level 0.\n\n$$\n\\lambda_i(t) = \\lambda_0(t) \\exp(\\mu_i + w_i^\\intercal \\alpha)\n$$\nwhere $\\lambda_0(t)$ is the baseline hazard function, and, for the $i$-th individual, $\\lambda_i(t)$ is the hazard function, $w_i$ is the set of ridge predictors from Level 0, and $\\mu_i$ captures the effects of non-genetic covariates.\n\nWe fit the cox ridge regression for a range of shrinkage parameters and select the best value using a K-fold cross validation scheme.\n\nWith the estimated $\\hat{\\alpha}$, we construct LOCO predictions which capture population structure, relatedness and polygenicity.\n\n#### Step 2: Single variant and gene-based burden tests\n\nFor time-to-event traits, the cox proportional hazard regression model is used to test the association between the phenotype and the genetic marker. **Note**: the only supported gene-based test is the burden test.\n\nThe cox proportional hazard regression model includes the LOCO predictions from Step 1 as an offset.\n\n$$\n\\lambda_i(t) = \\lambda_0(t) \\exp(\\mu_i + w_{i, LOCO} + g_i \\beta)\n$$\n\nWe test the null hypothesis $H_0: \\beta = 0$ using a score test. When the event rate is low, the standard score test doesn't control Type I error well at rare genetic markers. To reduce the bias and achieve a more robust test, regenie uses Firth correction[@heinze2001solution] when the p-value from the standard score test is below a threshold (default 0.05). \n\nThe firth correction provides a well-calibrated test, but comes with a computational cost. To mitigate this burden in Cox regression, we include a fast approximate test, which gives results very similar to the exact Firth test.\n\n### Missing Phenotype data\n\nWith QTs, missing values are mean-imputed in Step 1 and they are \ndropped when testing each phenotype in Step 2 (unless using `--force-impute`).\n\nWith BTs, missing values are mean-imputed in Step 1 when fitting the\nlevel 0 linear ridge regression and they are dropped when fitting the\nlevel 1 logistic ridge regression for each trait. In Step 2, missing \nvalues are dropped when testing each trait.\n\nTo remove all samples that have missing values at **any** of the \\(P\\)\nphenotypes from the analysis, use option `--strict` in step 1 and 2. \nThis can also be used when analyzing a single trait to only keep individuals with \ncomplete data by setting the phenotype values of individuals to remove to NA.\n\nNote: imputation is only applied to phenotypes; covariates are not allowed to have missing data.\n\n### References\n\n\\bibliography\n"
  },
  {
    "path": "docs/docs/performance.md",
    "content": "## Performance\n\nWe assessed the performance of **regenie** against 3 other\nprograms for GWAS on large cohorts.\n\n1. BOLT-LMM [Loh et al. (2015) Nature Genetics 47, 284–290](https://www.nature.com/articles/ng.3190) [[Software]](https://data.broadinstitute.org/alkesgroup/BOLT-LMM/)\n2. SAIGE - [Zhou et al. (2018) Nature Genetics 50, 1335–1341](https://www.nature.com/articles/s41588-018-0184-y)[[Software]](https://github.com/weizhouUMICH/SAIGE)\n3. fastGWA - [Jiang et al. (2019) Nature Genetics 51, 1749–1755](https://www.nature.com/articles/s41588-019-0530-8) [[Software]](https://cnsgenomics.com/software/gcta/#Overview)\n\nFull details for all the runs are available in our [paper](https://doi.org/10.1038/s41588-021-00870-7).\n\n### Quantitative traits\nWe ran **regenie**, BOLT-LMM and fastGWA on \n3 quantitative phenotypes measured on white British UK Biobank participants \n(LDL, N=389,189; Body mass index [BMI], N=407,609; and Bilirubin, N=388,303) \nwhere testing was performed on 9.8 million imputed SNPs. \n\nThe Manhattan plots for all three phenotypes (see below) show good agreement between the methods \nwith both **regenie** and BOLT-LMM resulting in stronger association signals relative to fastGWA \nat known peaks of association \n(note that in the plots, the scaling of the y-axis changes above the upper dashed line).\n\n![QTmanP](img/ManP_exemplarQTs_WB.jpg)\n<br/><br/>\n\n\nWe assessed the computational requirements of all three methods using a larger set of 50 \nquantitative traits from the UK Biobank, looking at computational timings as well as\nmemory usage. \nFor **regenie** and BOLT LMM, 469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) and \nfor fastGWA, these SNPs were used to compute the sparse GRM (timing not included).\nTests were performed on 11.4M imputed SNPs (step 2).\n\n\n![QTtime](img/qtTiming.png)\n\nFrom the table above, **regenie** was 151x faster than BOLT-LMM in elapsed time for Step 1 and 11.5x faster for Step 2, \nwhich translated into $>$30x overall speed-up in terms of elapsed time.\nIn addition, **regenie** had a maximum memory usage of 12.9 GB, which is mostly due to **regenie** \nonly reading a small portion of the genotype data at a time, whereas BOLT-LMM required 50GB.\n**regenie** was 2.8x faster than fastGWA, but fastGWA is very memory efficient and used only a maximum of 2GB.\n\n\n### Binary traits\n\n**regenie** was compared to BOLT-LMM and SAIGE on a set of four binary traits\nmeasured on white British UK Biobank participants (coronary artery disease [CAD], N=352,063, case-control ratio=1:11; \nglaucoma, N=406,927, case-control ratio=1:52;\n colorectal cancer, N=407,746, case-control ratio=1:97;\n and thyroid cancer, N=407,746, case-control ratio=1:660)\nand Step 2 testing was performed on 11.6 million imputed SNPs.\nA novel and fast approximate Firth correction was used in **regenie** as well as a SPA correction.\n\nAs seen in the Manhattan plots below (note that the scaling of the y-axis changes above the upper dashed line), \nall four approaches show very good agreement for the most balanced trait (CAD; case-control ratio=1:11), \nbut as the fraction of cases decreases BOLT-LMM tends to give inflated test statistics. \nHowever both **regenie** with Firth and SPA corrections, as well as SAIGE, which uses SPA correction, \nare all robust to this inflation and show similar agreement for the associations detected.\n\n\n![BTmanP](img/ManP_exemplarBTs_WB.jpg)\n<br/>\n\nWe assessed the computational requirements of **regenie** and SAIGE using a larger set of 50 \nbinary traits from the UK Biobank that have a range of different case-control ratios and distinct missing data patterns. \n469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) \nand tests were performed on 11.4M imputed SNPs (step 2).\nIn step 1, **regenie** was run using LOOCV and for two traits SAIGE did not finish as it took longer than the 4-week limit. \nIn step 2, the approximate Firth correction was used in **regenie** in addition to SPA correction.\n\n![BTtime](img/btTiming.png)\n\nFrom the table above, Step 1 of **regenie** was about 350x faster and required only $40\\%$ of the memory used by SAIGE. \nIn Step 2, **regenie** Firth and SPA were 2x and 3x faster than SAIGE in CPU time, respectively, but were 21x and 34x \nfaster than SAIGE in elapsed time, respectively, which suggests that **regenie** makes better use of parallelization in this step. \nOverall, **regenie** using Firth correction was 8x faster than SAIGE in CPU hours \nand 26.8x faster in elapsed time.\n\n\n\nAll runs above were done on the same computing environment (16 virtual CPU cores of \na 2.1GHz AMD EPYC 7571 processor, 64GB of memory, and 600GB solid-state disk).\n\n### Timings improvements in v2.2\n\nWe have several changes in **regenie** v2.2 to improve the computational efficiency:\n\n* The genotype file reading in Step 1 is now multi-threaded for all supported formats (i.e. BED, PGEN, and BGEN) and uses a faster file reading implementation for BGEN v1.2 format with 8-bit encoding. From our timings experiments below, these changes helped reduce the CPU time by 40-60% depending on the input format. \n\n![Step1time](img/timings_Step1.png)\n\n*Note that we used a small number of SNPs for Step 1 in our experiments (20K) so the timing improvement will not be as high in a real Step 1 run where ~500K SNPs would be used.*\n\n* We have improved the implementation of the score tests for binary traits to reduce the number of matrix operations performed and this reduced the CPU timings by ~60% from the previous version 2.0.2.\n\n![Step2time](img/timings_Step2_BT.png)\n*Note that there is an added memory cost of ~8NKP bytes [N=#samples; K=#covariates;P=#samples] so ~800MB extra for a UKB 500K run with 10 traits & 20 covariates.*\n\n* We have also made use of the sparsity of the genotype vector for rarer variants in Step 2 (more so with binary traits) and this reduced the timing in our experiments by ~20% on average.\n\n![Step2SparseTime](img/timings_Step2_BT_v2.2.png)\n\n*In our experiments, common variants are defined as having MAF > 5% and rare variants are defined as having MAF < 1% and no correction (i.e. Firth/SPA) is used.*\n\n* We have added new options `--write-null-firth` and `--use-null-firth` to reduce the timing of Step 2 with approximate Firth when ran in parallel jobs split in smaller chunks within chromosomes. More specifically, `--write-null-firth` can be used in Step 1 to fit the null model for approximate Firth test and store the resulting estimates to file. Then in Step 2, specifying `--use-null-firth` will re-use these parameter estimates to reduce the timing of the approximate Firth null model fitting. \n*We thank Juha Karjalainen for suggesting this feature.*\n\nNote: in our timings experiments, the PGEN genotype file only includes hard-calls. We ran a single trait in **regenie** and each setting was replicated 5 times.\n\n### Gene-based testing\n**regenie** v3.0 adds in a wide range of [gene-based tests](../overview/#step-2-gene-based-testing).\nWe have performed simulation experiments to assess the calibration of the tests with quantitative and binary traits \nusing real genetic data from the UK Biobank where we randomly selected 100,000 samples obtained from the set of white British participants\n(see the \"Methods\" section of the [Regenie paper](https://doi.org/10.1038/s41588-021-00870-7) for details on phenotype simulation\nwhere we set the heritability to 20%).\n\nUsing whole exome sequencing data, we constructed variant sets incorporating functional annotations \n(LoF and missense, where missense vairants were predicted as deleterious \nusing a score based on 5 in-silico algorithms), \nas well as allele frequency thresholds focusing on rarer variation (1%, 0.1% and 0.01%).\nThe SKAT/ACAT tests were applied only to variant sets using a 1% or 0.01% AAF threshold and \nSBAT and BURDEN-ACAT joint tests combined all burden mask signals from the 1%, 0.1%, 0.01% and singleton thresholds.\n1000 genes on even chromosomes were randonly selected and tested for association (causal variants were on odd chromosomes).\nThe QQ plots below show the distribution p-values for each test across the different annotation categories (ran in Regenie v3.2).\n\n#### Quantitative traits\nUsing a 1% allele frequency cutoff for the SKAT/ACAT tests.\n![QT_gene](img/Gene_burden_QTsims.png)\n\n#### Binary traits\nWe simulated highly imbalanced phenotypes with a disease prevalence of 1%  (case-control ratio of 1:99)\nand applied Firth/SPA correction to the tests.\n\nUsing a 1% allele frequency cutoff for the SKAT/ACAT tests.\n![BT_gene1](img/Gene_burden_BTsims_AF1pct.png)\n\nUsing a 0.01% allele frequency cutoff for the SKAT/ACAT tests.\n![BT_gene_001](img/Gene_burden_BTsims_AF001pct.png)\n\n"
  },
  {
    "path": "docs/docs/recommendations.md",
    "content": "## Recommendations for UK Biobank analysis\n\n**regenie** is ideally suited for large-scale analyses such as 500K UK\nBiobank (UKBB) data, where records are available for thousands of phenotypes.\n\nWe provide below a few guidelines on how to perform such analysis on\nthe UKBB files that all UKBB approved researchers have access to.\n\n### Pre-processing\n\nWe will first go over important steps to consider before running **regenie**.\n\n#### Selection of traits\n\n**regenie** can perform whole genome regression on multiple traits at once, which is where\nhigher computational gains are obtained.\n\nAs different traits can have distinct missing patterns, **regenie** uses an imputation scheme\nto handle missing data.\nFrom the real data applications we have performed so far with traits having up to ~20% (for quantitative) \nand ~5% (for binary) missing observations, our imputation scheme resulted in \nnearly identical results as from discarding missing observations when analyzing each trait \nseparately (see the [paper](https://doi.org/10.1038/s41588-021-00870-7) for details). \nHence, we recommend to analyze traits in groups that have similar missingness patterns with resonably \nlow amount of missingness (<15%).\n\n\nThe number of phenotypes in a group will affect the computational resources required\nand the table below shows typical computational requirements based on using \n500,000 markers in step 1 split in blocks of 1000 and using blocks of size 200 when \ntesting SNPs in step 2. The estimates are shown when step 1 of **regenie** is run in low-memory mode \nso that within-block predictions are temporarily stored on disk (see Documentation).\n\n![Rflow](img/comp_req_largeP.png){ style=\"text-align:center;padding: 10px;width:50%;display: block;margin-left: auto;margin-right: auto\"}\n\nIn the following sections, we'll assume traits (let's say binary) and covariates \nused in the analysis have been chosen and data are in files\n`ukb_phenotypes_BT.txt` and `ukb_covariates.txt`,\nwhich follow the format requirement for **regenie** (see Documentation).\n\n#### Preparing genotype file\n\nStep 1 of a **regenie** run requires a single genotype file as input; \nwe recommend using array genotypes for this step.\nThe UKBB genotype files are split by chromosome, so we recommend using\n[PLINK](http://www.cog-genomics.org/plink/) to merge the files using the following code.\n\n*NOTE*: please change **XXX** to you own UKBB application ID number\n```\nrm -f list_beds.txt\nfor chr in {2..22}; do echo \"ukb_cal_chr${chr}_v2.bed ukb_snp_chr${chr}_v2.bim ukbXXX_int_chr1_v2_s488373.fam\" >> list_beds.txt; done\n\nplink \\\n  --bed ukb_cal_chr1_v2.bed \\\n  --bim ukb_snp_chr1_v2.bim \\\n  --fam ukbXXX_int_chr1_v2_s488373.fam \\\n  --merge-list list_beds.txt \\\n  --make-bed --out ukb_cal_allChrs\n```\n\n\n#### Exclusion files\n\nQuality control (QC) filters can be applied using [PLINK2](https://www.cog-genomics.org/plink/2.0/) to filter out samples and\nmarkers in the genotype file prior to step 1 of **regenie**.\n\nNote: **regenie** will throw an error if \na low-variance SNP is included in the step 1 run.\nHence, the user should run adequate QC filtering prior to running **regenie** \nto identify and remove such SNPs.\n\nFor example, to filter out SNPs with \nminor allele frequency (MAF) below 1%, \nminor allele count (MAC) below 100, \ngenotype missingess above 10% and \nHardy-Weinberg equilibrium p-value exceeding \\(10^{-15}\\), and \nsamples with more than 10% missingness,\n\n```\nplink2 \\\n  --bfile ukb_cal_allChrs \\\n  --maf 0.01 --mac 100 --geno 0.1 --hwe 1e-15 \\\n  --mind 0.1 \\\n  --write-snplist --write-samples --no-id-header \\\n  --out qc_pass\n```\n\n\n### Step 1\n\nWe recommend to run **regenie** using multi-threading (8+ threads) which will \ndecrease the overall runtime of the program. \nAs this step can be quite memory intensive (due to storing block predictions), \nwe recommend to use option `--lowmem`, where the number of phenotypes analyzed\nwill determine how much disk space is required (see table above).\n\nRunning step 1 of **regenie** (by default, all available threads are used)\n\n```\n./regenie \\\n  --step 1 \\\n  --bed ukb_cal_allChrs \\\n  --extract qc_pass.snplist \\\n  --keep qc_pass.id \\\n  --phenoFile ukb_phenotypes_BT.txt \\\n  --covarFile ukb_covariates.txt \\\n  --bt \\\n  --bsize 1000 \\\n  --lowmem \\\n  --lowmem-prefix tmpdir/regenie_tmp_preds \\\n  --out ukb_step1_BT\n```\n\nFor P phenotypes analyzed, this will generate a set of $P$ files ending with `.loco`\nwhich contain the genetic predictions using a LOCO scheme that will be needed for step 2,\nas well as a prediction list file `ukb_step1_BT_pred.list`, which lists \nthe names of these predictions files and can be used as input for step 2.\n\n\n### Step 2\n\nAs step 1 and 2 are completely decoupled in **regenie**, you could either use all \nthe traits for testing in step 2 or select a subset of the traits to perform association testing.\nFurthermore, you can use the same Step 1 output to test on array, exome or \nimputed variants; below, we will illustrate testing on imputed variants.\n\n\nStep 2  of **regenie** can be run in parallel across chromosomes so if you have access to multiple machines, we recommend to split the runs over chromosomes (using 8+ threads).\n\n<!---\n#### Sample mismatch \n\nIt may be that the genotype file used in step 2 does not contain all of the samples used in step 1 \nor contains additional samples not used in step 1. \nIn such a case, you could for example use the following code to only retain samples that are \ncontained in both data sets (we assume that you are testing on BGEN input file)\n\n```\nexpand -t 1 qc_pass.id > qc_pass_space.id   # BGEN sample file is space-seperated\ngrep -wFf qc_pass_space.id ukbXXX_imp_chr1_v3_s487395.sample > fid_iid_step2.keep\n```\n--->\n\nRunning **regenie** tesing on a single chromosome (here chromosome 1) \nand using the fast Firth correction as fallback for p-values below 0.01\n\n```\n./regenie \\\n  --step 2 \\\n  --bgen ukb_imp_chr1_v3.bgen \\\n  --ref-first \\\n  --sample ukbXXX_imp_chr1_v3_s487395.sample \\\n  --phenoFile ukb_phenotypes_BT.txt \\\n  --covarFile ukb_covariates.txt \\\n  --bt \\\n  --firth --approx --pThresh 0.01 \\\n  --pred ukb_step1_BT_pred.list \\\n  --bsize 400 \\\n  --split \\\n  --out ukb_step2_BT_chr1\n\n```\n\nThis will create separate association results files for each phenotype as `ukb_step2_BT_chr1_*.regenie`.\n\nWhen running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and \ninstead parallelize the runs over partitions of the genome (e.g. groups of genes).\n"
  },
  {
    "path": "docs/mkdocs.yml",
    "content": "site_name: regenie\ntheme:\n  name: null\n  custom_dir: 'cinder'\ncopyright: \"REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license\"\n\nnav:\n  - Home: index.md\n  - Overview: overview.md\n  - Install: install.md\n  - Documentation: options.md\n  - Performance: performance.md\n  - UKBB Analysis: recommendations.md\n  - F.A.Q.: faq.md\n\nmarkdown_extensions:\n  - mdx_math\n  - attr_list\n  - footnotes\n\nplugins:\n  - search\n  - bibtex:\n      bib_file: \"refs.bib\"\n      csl_file: \"nature.csl\"\n\ngoogle_analytics:\n  - UA-166859683-2\n  - auto\n  \nextra_css:\n  - custom/style.css\n"
  },
  {
    "path": "docs/refs.bib",
    "content": "﻿@article{RN524,\r\n   author = {Davies, Robert B.},\r\n   title = {Algorithm AS 155: The Distribution of a Linear Combination of χ 2 Random Variables},\r\n   journal = {Applied Statistics},\r\n   volume = {29},\r\n   number = {3},\r\n   pages = {323-333},\r\n   year = {1980},\r\n}\r\n\r\n@article{RN485,\r\n   author = {MacKinnon, James G. and White, Halbert},\r\n   title = {Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties},\r\n   journal = {Journal of Econometrics},\r\n   volume = {29},\r\n   number = {3},\r\n   pages = {305-325},\r\n   year = {1985},\r\n}\r\n\r\n@article{RN248,\r\n   author = {Firth, David},\r\n   title = {Bias reduction of maximum likelihood estimates},\r\n   journal = {Biometrika},\r\n   volume = {80},\r\n   number = {1},\r\n   pages = {27-38},\r\n   year = {1993},\r\n}\r\n\r\n@article{RN293,\r\n   author = {Breiman, Leo},\r\n   title = {Stacked regressions},\r\n   journal = {Machine learning},\r\n   volume = {24},\r\n   number = {1},\r\n   pages = {49-64},\r\n   year = {1996},\r\n}\r\n\r\n@article{RN526,\r\n   author = {Kuonen, D},\r\n   title = {Miscellanea. Saddlepoint approximations for distributions of quadratic forms in normal variables},\r\n   journal = {Biometrika},\r\n   volume = {86},\r\n   number = {4},\r\n   pages = {929-935},\r\n   year = {1999},\r\n}\r\n\r\n@book{RN488,\r\n   author = {Butler, Ronald W.},\r\n   title = {Saddlepoint Approximations with Applications},\r\n   publisher = {Cambridge University Press},\r\n   year = {2007},\r\n}\r\n\r\n@article{RN373,\r\n   author = {Tchetgen Tchetgen, E. J. and Kraft, P.},\r\n   title = {On the robustness of tests of genetic associations incorporating gene-environment interaction when the environmental exposure is misspecified},\r\n   journal = {Epidemiology},\r\n   volume = {22},\r\n   number = {2},\r\n   pages = {257-61},\r\n   year = {2011},\r\n}\r\n\r\n@article{RN320,\r\n   author = {Voorman, A. and Lumley, T. and McKnight, B. and Rice, K.},\r\n   title = {Behavior of QQ-plots and genomic control in studies of gene-environment interaction},\r\n   journal = {PLoS One},\r\n   volume = {6},\r\n   number = {5},\r\n   year = {2011},\r\n}\r\n\r\n@article{RN386,\r\n   author = {Wu, M. C. and Lee, S. and Cai, T. and Li, Y. and Boehnke, M. and Lin, X.},\r\n   title = {Rare-variant association testing for sequencing data with the sequence kernel association test},\r\n   journal = {Am J Hum Genet},\r\n   volume = {89},\r\n   number = {1},\r\n   pages = {82-93},\r\n   year = {2011},\r\n}\r\n\r\n@article{RN454,\r\n   author = {Lee, S. and Wu, M. C. and Lin, X.},\r\n   title = {Optimal tests for rare variant effects in sequencing association studies},\r\n   journal = {Biostatistics},\r\n   volume = {13},\r\n   number = {4},\r\n   pages = {762-75},\r\n   year = {2012},\r\n}\r\n\r\n@article{RN487,\r\n   author = {Lee, S. and Abecasis, G. R. and Boehnke, M. and Lin, X.},\r\n   title = {Rare-variant association analysis: study designs and statistical tests},\r\n   journal = {Am J Hum Genet},\r\n   volume = {95},\r\n   number = {1},\r\n   pages = {5-23},\r\n   year = {2014},\r\n}\r\n\r\n@article{RN514,\r\n   author = {Wu, B. and Guan, W. and Pankow, J. S.},\r\n   title = {On Efficient and Accurate Calculation of Significance P-Values for Sequence Kernel Association Testing of Variant Set},\r\n   journal = {Ann Hum Genet},\r\n   volume = {80},\r\n   number = {2},\r\n   pages = {123-35},\r\n   year = {2016},\r\n}\r\n\r\n@article{RN245,\r\n   author = {Dey, Rounak and Schmidt, Ellen M. and Abecasis, Goncalo R. and Lee, Seunggeun},\r\n   title = {A Fast and Accurate Algorithm to Test for Binary Phenotypes and Its Application to PheWAS},\r\n   journal = {The American Journal of Human Genetics},\r\n   volume = {101},\r\n   number = {1},\r\n   pages = {37-49},\r\n   year = {2017},\r\n}\r\n\r\n@article{RN417,\r\n   author = {Young, A. I. and Wauthier, F. L. and Donnelly, P.},\r\n   title = {Identifying loci affecting trait variability and detecting interactions in genome-wide association studies},\r\n   journal = {Nat Genet},\r\n   volume = {50},\r\n   number = {11},\r\n   pages = {1608-1614},\r\n   year = {2018},\r\n}\r\n\r\n@article{RN339,\r\n   author = {Liu, Y. and Chen, S. and Li, Z. and Morrison, A. C. and Boerwinkle, E. and Lin, X.},\r\n   title = {ACAT: A Fast and Powerful p Value Combination Method for Rare-Variant Analysis in Sequencing Studies},\r\n   journal = {Am J Hum Genet},\r\n   volume = {104},\r\n   number = {3},\r\n   pages = {410-421},\r\n   year = {2019},\r\n}\r\n\r\n@article{RN482,\r\n   author = {Liu, Y. and Xie, J.},\r\n   title = {Cauchy combination test: a powerful test with analytic p-value calculation under arbitrary dependency structures},\r\n   journal = {J Am Stat Assoc},\r\n   volume = {115},\r\n   number = {529},\r\n   pages = {393-402},\r\n   year = {2020},\r\n}\r\n\r\n@article{RN452,\r\n   author = {Zhao, Z. and Bi, W. and Zhou, W. and VandeHaar, P. and Fritsche, L. G. and Lee, S.},\r\n   title = {UK Biobank Whole-Exome Sequence Binary Phenome Analysis with Robust Region-Based Rare-Variant Test},\r\n   journal = {Am J Hum Genet},\r\n   volume = {106},\r\n   number = {1},\r\n   pages = {3-12},\r\n   year = {2020},\r\n}\r\n\r\n@article{RN491,\r\n   author = {Ziyatdinov, Andrey and Barber, Mathew and Marchini, Jonathan},\r\n   title = {Pooling information across burden tests in the UK Biobank exome sequencing study.},\r\n   journal = {ASHG Conference},\r\n   year = {2020},\r\n}\r\n\r\n@article{RN457,\r\n   author = {Backman, J. D. and Li, A. H. and Marcketta, A. and Sun, D. and Mbatchou, J. and Kessler, M. D. and Benner, C. and Liu, D. and Locke, A. E. and Balasubramanian, S. and Yadav, A. and Banerjee, N. and Gillies, C. E. and Damask, A. and Liu, S. and Bai, X. and Hawes, A. and Maxwell, E. and Gurski, L. and Watanabe, K. and Kosmicki, J. A. and Rajagopal, V. and Mighty, J. and Regeneron Genetics, Center and DiscovEhr and Jones, M. and Mitnaul, L. and Stahl, E. and Coppola, G. and Jorgenson, E. and Habegger, L. and Salerno, W. J. and Shuldiner, A. R. and Lotta, L. A. and Overton, J. D. and Cantor, M. N. and Reid, J. G. and Yancopoulos, G. and Kang, H. M. and Marchini, J. and Baras, A. and Abecasis, G. R. and Ferreira, M. A. R.},\r\n   title = {Exome sequencing and analysis of 454,787 UK Biobank participants},\r\n   journal = {Nature},\r\n   volume = {599},\r\n   number = {7886},\r\n   pages = {628-634},\r\n   year = {2021},\r\n}\r\n\r\n@article{RN492,\r\n   author = {Zhou, Wei and Bi, Wenjian and Zhao, Zhangchen and Dey, Kushal K. and Jagadeesh, Karthik A. and Karczewski, Konrad J. and Daly, Mark J. and Neale, Benjamin M. and Lee, Seunggeun},\r\n   title = {Set-based rare variant association tests for biobank scale sequencing data sets},\r\n   journal = {medRxiv},\r\n   year = {2021},\r\n}\r\n\r\n@article{simon2011regularization,\r\n  title={Regularization paths for Cox's proportional hazards model via coordinate descent},\r\n  author={Simon, Noah and Friedman, Jerome H and Hastie, Trevor and Tibshirani, Rob},\r\n  journal={Journal of statistical software},\r\n  volume={39},\r\n  pages={1--13},\r\n  year={2011}\r\n}\r\n\r\n@article{heinze2001solution,\r\n  title={A solution to the problem of monotone likelihood in Cox regression},\r\n  author={Heinze, Georg and Schemper, Michael},\r\n  journal={Biometrics},\r\n  volume={57},\r\n  number={1},\r\n  pages={114--119},\r\n  year={2001},\r\n  publisher={Wiley Online Library}\r\n}\r\n\r\n@article{RN703,\r\n   author = {Ziyatdinov, Andrey and Mbatchou, Joelle and Marcketta, Anthony and Backman, Joshua and Gaynor, Sheila and Zou, Yuxin and Joseph, Tyler and Geraghty, Benjamin and Herman, Joseph and Watanabe, Kyoko and Ghosh, Arkopravo and Kosmicki, Jack and Locke, Adam and Thornton, Timothy and Kang, Hyun Min and Ferreira, Manuel and Baras, Aris and Abecasis, Goncalo and Marchini, Jonathan},\r\n   title = {Joint testing of rare variant burden scores using non-negative least squares},\r\n   journal = {The American Journal of Human Genetics},\r\n   volume = {111},\r\n   number = {10},\r\n   pages = {2139-2149},\r\n   year = {2024},\r\n}\r\n\r\n\r\n"
  },
  {
    "path": "docs/site/css/base.css",
    "content": "body {\n    padding-top: 70px;\n}\n\nh1[id]:before, h2[id]:before, h3[id]:before, h4[id]:before, h5[id]:before, h6[id]:before {\n    content: \"\";\n    display: block;\n    margin-top: -75px;\n    height: 75px;\n}\n\np > img {\n    max-width: 100%;\n    height: auto;\n}\n\nul.nav li.first-level {\n    font-weight: bold;\n}\n\nul.nav li.third-level {\n    padding-left: 12px;\n}\n\ndiv.col-md-3 {\n    padding-left: 0;\n}\n\ndiv.col-md-9 {\n    padding-bottom: 100px;\n}\n\ndiv.source-links {\n    float: right;\n}\n\n/*\n * Side navigation\n *\n * Scrollspy and affixed enhanced navigation to highlight sections and secondary\n * sections of docs content.\n */\n\n/* By default it's not affixed in mobile views, so undo that */\n.bs-sidebar.affix {\n    position: static;\n}\n\n.bs-sidebar.well {\n    padding: 0;\n}\n\n/* First level of nav */\n.bs-sidenav {\n    margin-top: 30px;\n    margin-bottom: 30px;\n    padding-top:    10px;\n    padding-bottom: 10px;\n    border-radius: 5px;\n}\n\n/* All levels of nav */\n.bs-sidebar .nav > li > a {\n    display: block;\n    padding: 5px 20px;\n    z-index: 1;\n}\n.bs-sidebar .nav > li > a:hover,\n.bs-sidebar .nav > li > a:focus {\n    text-decoration: none;\n    border-right: 1px solid;\n}\n.bs-sidebar .nav > .active > a,\n.bs-sidebar .nav > .active:hover > a,\n.bs-sidebar .nav > .active:focus > a {\n    font-weight: bold;\n    background-color: transparent;\n    border-right: 1px solid;\n}\n\n/* Nav: second level (shown on .active) */\n.bs-sidebar .nav .nav {\n    display: none; /* Hide by default, but at >768px, show it */\n    margin-bottom: 8px;\n}\n.bs-sidebar .nav .nav > li > a {\n    padding-top:    3px;\n    padding-bottom: 3px;\n    padding-left: 30px;\n    font-size: 90%;\n}\n\n/* Show and affix the side nav when space allows it */\n@media (min-width: 992px) {\n    /* Allow the sidebar to scroll if it overflows the page. */\n    .bs-sidebar {\n        overflow-y: scroll;\n    }\n\n    .bs-sidebar .nav > .active > ul {\n        display: block;\n    }\n    /* Widen the fixed sidebar */\n    .bs-sidebar.affix,\n    .bs-sidebar.affix-bottom {\n        width: 213px;\n    }\n    .bs-sidebar.affix {\n        position: fixed; /* Undo the static from mobile first approach */\n        top: 80px;\n        max-height: calc(100% - 90px);\n    }\n    .bs-sidebar.affix-bottom {\n        position: absolute; /* Undo the static from mobile first approach */\n    }\n    .bs-sidebar.affix-bottom .bs-sidenav,\n    .bs-sidebar.affix .bs-sidenav {\n        margin-top: 0;\n        margin-bottom: 0;\n    }\n}\n@media (min-width: 1200px) {\n    /* Widen the fixed sidebar again */\n    .bs-sidebar.affix-bottom,\n    .bs-sidebar.affix {\n        width: 263px;\n    }\n}\n\n\n/* Added to support >2 level nav in drop down */\n\n.dropdown-submenu {\n    position: relative;\n}\n\n.dropdown-submenu>.dropdown-menu {\n    top: 0;\n    left: 100%;\n    margin-top: 0px;\n    margin-left: 0px;\n}\n\n.dropdown-submenu:hover>.dropdown-menu {\n    display: block;\n}\n\n.dropdown-submenu>a:after {\n    display: block;\n    content: \" \";\n    float: right;\n    width: 0;\n    height: 0;\n    border-color: transparent;\n    border-style: solid;\n    border-width: 5px 0 5px 5px;\n    border-left-color: #ccc;\n    margin-top: 5px;\n    margin-right: -10px;\n}\n\n.dropdown-submenu:hover>a:after {\n    border-left-color: #fff;\n}\n\n.dropdown-submenu.pull-left {\n    float: none;\n}\n\n.dropdown-submenu.pull-left>.dropdown-menu {\n    left: -100%;\n    margin-left: 00px;\n}\n/* Start Bootstrap Callouts CSS Source by Chris Pratt (https://codepen.io/chrisdpratt/pen/IAymB) MIT License*/\n.bs-callout {\n    padding: 20px;\n    margin: 20px 0;\n    border: 1px solid #eee;\n    border-left-width: 5px;\n    border-radius: 3px;\n    background-color: #FCFDFF;\n}\n.bs-callout h4 {\n    font-style: normal;\n    font-weight: 400;\n    margin-top: 0;\n    margin-bottom: 5px;\n}\n.bs-callout p:last-child {\n    margin-bottom: 0;\n}\n.bs-callout code {\n    border-radius: 3px;\n}\n.bs-callout+.bs-callout {\n    margin-top: -5px;\n}\n.bs-callout-default {\n    border-left-color: #FA023C; /*modified from upstream default by Christopher Simpkins*/\n}\n.bs-callout-default h4 {\n    color: #FA023C; /*modified from upstream default by Christopher Simpkins*/\n}\n.bs-callout-primary {\n    border-left-color: #428bca;\n}\n.bs-callout-primary h4 {\n    color: #428bca;\n}\n.bs-callout-success {\n    border-left-color: #5cb85c;\n}\n.bs-callout-success h4 {\n    color: #5cb85c;\n}\n.bs-callout-danger {\n    border-left-color: #d9534f;\n}\n.bs-callout-danger h4 {\n    color: #d9534f;\n}\n.bs-callout-warning {\n    border-left-color: #f0ad4e;\n}\n.bs-callout-warning h4 {\n    color: #f0ad4e;\n}\n.bs-callout-info {\n    border-left-color: #5bc0de;\n}\n.bs-callout-info h4 {\n    color: #5bc0de;\n}\n/* End Bootstrap Callouts CSS Source by Chris Pratt */\n\n/* Admonitions */\n.admonition {\n    padding: 20px;\n    margin: 20px 0;\n    border: 1px solid #eee;\n    border-left-width: 5px;\n    border-radius: 3px;\n    background-color: #FCFDFF;\n}\n\n.admonition p:last-child {\n    margin-bottom: 0;\n}\n.admonition code {\n    border-radius: 3px;\n}\n.admonition+.admonition {\n    margin-top: -5px;\n}\n\n.admonition.note { /* csslint allow: adjoining-classes */\n    border-left-color: #428bca;\n}\n\n.admonition.warning { /* csslint allow: adjoining-classes */\n    border-left-color: #f0ad4e;\n}\n\n.admonition.danger { /* csslint allow: adjoining-classes */\n    border-left-color: #d9534f;\n}\n\n.admonition-title {\n    font-size: 19px;\n    font-style: normal;\n    font-weight: 400;\n    margin-top: 0;\n    margin-bottom: 5px;\n}\n\n.admonition.note > .admonition-title {\n    color: #428bca;\n}\n\n.admonition.warning > .admonition-title {\n    color: #f0ad4e;\n}\n\n.admonition.danger > .admonition-title {\n    color: #d9534f;\n}\n"
  },
  {
    "path": "docs/site/css/bootstrap-custom.css",
    "content": "/*! normalize.css v2.1.3 | MIT License | git.io/normalize */\narticle, aside, details, figcaption, figure, footer, header, hgroup, main, nav, section, summary {\n    display: block;\n}\naudio, canvas, video {\n    display: inline-block;\n}\naudio:not([controls]) {\n    display: none;\n    height: 0;\n}\n[hidden], template {\n    display: none;\n}\nhtml {\n    font-family: sans-serif;\n    -webkit-text-size-adjust: 100%;\n    -ms-text-size-adjust: 100%}\nbody {\n    margin: 0;\n}\na {\n    background: transparent;\n}\na:focus {\n    outline: thin dotted;\n}\na:active, a:hover {\n    outline: 0;\n}\nh1 {\n    margin: .67em 0;\n    font-size: 2em;\n}\nabbr[title] {\n    border-bottom: 1px dotted;\n}\nb, strong {\n    font-weight: bold;\n}\ndfn {\n    font-style: italic;\n}\nhr {\n    height: 0;\n    -moz-box-sizing: content-box;\n    box-sizing: content-box;\n}\nmark {\n    color: #000;\n    background: #ff0;\n}\ncode, kbd, pre, samp {\n    font-family: Hack, monospace, serif;\n    font-size: 1em;\n}\npre {\n    white-space: pre-wrap;\n}\nq {\n    quotes: \"\\201C\" \"\\201D\" \"\\2018\" \"\\2019\"}\nsmall {\n    font-size: 80%}\nsub, sup {\n    position: relative;\n    font-size: 75%;\n    line-height: 0;\n    vertical-align: baseline;\n}\nsup {\n    top: -0.5em;\n}\nsub {\n    bottom: -0.25em;\n}\nimg {\n    border: 0;\n}\nsvg:not(:root) {\n    overflow: hidden;\n}\nfigure {\n    margin: 0;\n}\nfieldset {\n    padding: .35em .625em .75em;\n    margin: 0 2px;\n    border: 1px solid #c0c0c0;\n}\nlegend {\n    padding: 0;\n    border: 0;\n}\nbutton, input, select, textarea {\n    margin: 0;\n    font-family: inherit;\n    font-size: 100%}\nbutton, input {\n    line-height: normal;\n}\nbutton, select {\n    text-transform: none;\n}\nbutton, html input[type=\"button\"], input[type=\"reset\"], input[type=\"submit\"] {\n    cursor: pointer;\n    -webkit-appearance: button;\n}\nbutton[disabled], html input[disabled] {\n    cursor: default;\n}\ninput[type=\"checkbox\"], input[type=\"radio\"] {\n    padding: 0;\n    box-sizing: border-box;\n}\ninput[type=\"search\"] {\n    -webkit-box-sizing: content-box;\n    -moz-box-sizing: content-box;\n    box-sizing: content-box;\n    -webkit-appearance: textfield;\n}\ninput[type=\"search\"]::-webkit-search-cancel-button, input[type=\"search\"]::-webkit-search-decoration {\n    -webkit-appearance: none;\n}\nbutton::-moz-focus-inner, input::-moz-focus-inner {\n    padding: 0;\n    border: 0;\n}\ntextarea {\n    overflow: auto;\n    vertical-align: top;\n}\ntable {\n    border-collapse: collapse;\n    border-spacing: 0;\n}\n@media print {\n    * {\n    color: #000!important;\n    text-shadow: none!important;\n    background: transparent!important;\n    box-shadow: none!important;\n}\na, a:visited {\n    text-decoration: underline;\n}\na[href]:after {\n    content: \" (\" attr(href) \")\"}\nabbr[title]:after {\n    content: \" (\" attr(title) \")\"}\na[href^=\"javascript:\"]:after, a[href^=\"#\"]:after {\n    content: \"\"}\npre, blockquote {\n    border: 1px solid #999;\n    page-break-inside: avoid;\n}\nthead {\n    display: table-header-group;\n}\ntr, img {\n    page-break-inside: avoid;\n}\nimg {\n    max-width: 100%!important;\n}\n@page {\n    margin: 2cm .5cm;\n}\np, h2, h3 {\n    orphans: 3;\n    widows: 3;\n}\nh2, h3 {\n    page-break-after: avoid;\n}\nselect {\n    background: #fff!important;\n}\n.navbar {\n    display: none;\n}\n.table td, .table th {\n    background-color: #fff!important;\n}\n.btn>.caret, .dropup>.btn>.caret {\n    border-top-color: #000!important;\n}\n.label {\n    border: 1px solid #000;\n}\n.table {\n    border-collapse: collapse!important;\n}\n.table-bordered th, .table-bordered td {\n    border: 1px solid #ddd!important;\n}\n}*, *:before, *:after {\n    -webkit-box-sizing: border-box;\n    -moz-box-sizing: border-box;\n    box-sizing: border-box;\n}\nhtml {\n    font-size: 62.5%;\n    -webkit-tap-highlight-color: rgba(0, 0, 0, 0);\n}\nbody {\n    font-family: Merriweather, Georgia, serif;\n    font-size: 14px;\n    line-height: 1.428571429;\n    color: #222;\n    background-color: #fff;\n}\ninput, button, select, textarea {\n    font-family: inherit;\n    font-size: inherit;\n    line-height: inherit;\n}\na {\n    color: #008cba;\n    text-decoration: none;\n}\na:hover, a:focus {\n    color: #00526e;\n    text-decoration: underline;\n}\na:focus {\n    outline: thin dotted;\n    outline: 5px auto -webkit-focus-ring-color;\n    outline-offset: -2px;\n}\nimg {\n    vertical-align: middle;\n}\n.img-responsive {\n    display: block;\n    height: auto;\n    max-width: 100%}\n.img-rounded {\n    border-radius: 0;\n}\n.img-thumbnail {\n    display: inline-block;\n    height: auto;\n    max-width: 100%;\n    padding: 4px;\n    line-height: 1.428571429;\n    background-color: #fff;\n    border: 1px solid #ddd;\n    border-radius: 0;\n    -webkit-transition: all .2s ease-in-out;\n    transition: all .2s ease-in-out;\n}\n.img-circle {\n    border-radius: 50%}\nhr {\n    margin-top: 21px;\n    margin-bottom: 21px;\n    border: 0;\n    border-top: 1px solid #ddd;\n}\n.sr-only {\n    position: absolute;\n    width: 1px;\n    height: 1px;\n    padding: 0;\n    margin: -1px;\n    overflow: hidden;\n    clip: rect(0, 0, 0, 0);\n    border: 0;\n}\nh1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {\n    font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-weight: 300;\n    line-height: 1.1;\n    color: inherit;\n}\nh1 small, h2 small, h3 small, h4 small, h5 small, h6 small, .h1 small, .h2 small, .h3 small, .h4 small, .h5 small, .h6 small, h1 .small, h2 .small, h3 .small, h4 .small, h5 .small, h6 .small, .h1 .small, .h2 .small, .h3 .small, .h4 .small, .h5 .small, .h6 .small {\n    font-weight: normal;\n    line-height: 1;\n    color: #999;\n}\nh1, h2, h3 {\n    margin-top: 21px;\n    margin-bottom: 10.5px;\n}\nh1 small, h2 small, h3 small, h1 .small, h2 .small, h3 .small {\n    font-size: 65%}\nh4, h5, h6 {\n    margin-top: 10.5px;\n    margin-bottom: 10.5px;\n}\nh4 small, h5 small, h6 small, h4 .small, h5 .small, h6 .small {\n    font-size: 75%}\nh1, .h1 {\n    font-size: 39px;\n}\nh2, .h2 {\n    font-size: 32px;\n}\nh3, .h3 {\n    font-size: 26px;\n}\nh4, .h4 {\n    font-size: 19px;\n}\nh5, .h5 {\n    font-size: 15px;\n}\nh6, .h6 {\n    font-size: 13px;\n}\np {\n    margin: 0 0 10.5px;\n}\n.lead {\n    margin-bottom: 21px;\n    font-size: 17px;\n    font-weight: 200;\n    line-height: 1.4;\n}\n@media(min-width:768px) {\n    .lead {\n    font-size: 22.5px;\n}\n}small, .small {\n    font-size: 85%}\ncite {\n    font-style: normal;\n}\n.text-muted {\n    color: #999;\n}\n.text-primary {\n    color: #008cba;\n}\n.text-primary:hover {\n    color: #006687;\n}\n.text-warning {\n    color: #e99002;\n}\n.text-warning:hover {\n    color: #b67102;\n}\n.text-danger {\n    color: #f04124;\n}\n.text-danger:hover {\n    color: #d32a0e;\n}\n.text-success {\n    color: #43ac6a;\n}\n.text-success:hover {\n    color: #358753;\n}\n.text-info {\n    color: #5bc0de;\n}\n.text-info:hover {\n    color: #31b0d5;\n}\n.text-left {\n    text-align: left;\n}\n.text-right {\n    text-align: right;\n}\n.text-center {\n    text-align: center;\n}\n.page-header {\n    padding-bottom: 9.5px;\n    margin: 42px 0 21px;\n    border-bottom: 1px solid #ddd;\n}\nul, ol {\n    margin-top: 0;\n    margin-bottom: 10.5px;\n}\nul ul, ol ul, ul ol, ol ol {\n    margin-bottom: 0;\n}\n.list-unstyled {\n    padding-left: 0;\n    list-style: none;\n}\n.list-inline {\n    padding-left: 0;\n    list-style: none;\n}\n.list-inline>li {\n    display: inline-block;\n    padding-right: 5px;\n    padding-left: 5px;\n}\n.list-inline>li:first-child {\n    padding-left: 0;\n}\ndl {\n    margin-top: 0;\n    margin-bottom: 21px;\n}\ndt, dd {\n    line-height: 1.428571429;\n}\ndt {\n    font-weight: bold;\n}\ndd {\n    margin-left: 0;\n}\n@media(min-width:768px) {\n    .dl-horizontal dt {\n    float: left;\n    width: 160px;\n    overflow: hidden;\n    clear: left;\n    text-align: right;\n    text-overflow: ellipsis;\n    white-space: nowrap;\n}\n.dl-horizontal dd {\n    margin-left: 180px;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n.dl-horizontal dd:before, .dl-horizontal dd:after {\n    display: table;\n    content: \" \"}\n.dl-horizontal dd:after {\n    clear: both;\n}\n}abbr[title], abbr[data-original-title] {\n    cursor: help;\n    border-bottom: 1px dotted #999;\n}\n.initialism {\n    font-size: 90%;\n    text-transform: uppercase;\n}\nblockquote {\n    padding: 10.5px 21px;\n    margin: 0 0 21px;\n    border-left: 5px solid #ddd;\n}\nblockquote p {\n    font-size: 18.75px;\n    font-weight: 300;\n    line-height: 1.25;\n}\nblockquote p:last-child {\n    margin-bottom: 0;\n}\nblockquote small, blockquote .small {\n    display: block;\n    line-height: 1.428571429;\n    color: #6f6f6f;\n}\nblockquote small:before, blockquote .small:before {\n    content: '\\2014 \\00A0'}\nblockquote.pull-right {\n    padding-right: 15px;\n    padding-left: 0;\n    border-right: 5px solid #ddd;\n    border-left: 0;\n}\nblockquote.pull-right p, blockquote.pull-right small, blockquote.pull-right .small {\n    text-align: right;\n}\nblockquote.pull-right small:before, blockquote.pull-right .small:before {\n    content: ''}\nblockquote.pull-right small:after, blockquote.pull-right .small:after {\n    content: '\\00A0 \\2014'}\nblockquote:before, blockquote:after {\n    content: \"\"}\naddress {\n    margin-bottom: 21px;\n    font-style: normal;\n    line-height: 1.428571429;\n}\ncode, kbd, pre, samp {\n    font-family: Hack, Menlo, Monaco, Consolas, \"Courier New\", monospace;\n}\ncode {\n    padding: 2px 4px;\n    font-size: 90%;\n    color: #c7254e;\n    white-space: nowrap;\n    background-color: #f9f2f4;\n    border-radius: 0;\n}\npre {\n    display: block;\n    padding: 10px;\n    margin: 0 0 10.5px;\n    font-size: 14px;\n    line-height: 1.428571429;\n    color: #333;\n    word-break: break-all;\n    word-wrap: break-word;\n    background-color: #f5f5f5;\n    border: 1px solid #ccc;\n    border-radius: 0;\n}\npre code {\n    padding: 0;\n    font-size: inherit;\n    color: inherit;\n    white-space: pre-wrap;\n    background-color: transparent;\n    border-radius: 0;\n}\n.pre-scrollable {\n    max-height: 340px;\n    overflow-y: scroll;\n}\n.container {\n    padding-right: 15px;\n    padding-left: 15px;\n    margin-right: auto;\n    margin-left: auto;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n.container:before, .container:after {\n    display: table;\n    content: \" \"}\n.container:after {\n    clear: both;\n}\n@media(min-width:768px) {\n    .container {\n    width: 750px;\n}\n}@media(min-width:992px) {\n    .container {\n    width: 970px;\n}\n}@media(min-width:1200px) {\n    .container {\n    width: 1170px;\n}\n}.row {\n    margin-right: -15px;\n    margin-left: -15px;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.row:before, .row:after {\n    display: table;\n    content: \" \"}\n.row:after {\n    clear: both;\n}\n.col-xs-1, .col-sm-1, .col-md-1, .col-lg-1, .col-xs-2, .col-sm-2, .col-md-2, .col-lg-2, .col-xs-3, .col-sm-3, .col-md-3, .col-lg-3, .col-xs-4, .col-sm-4, .col-md-4, .col-lg-4, .col-xs-5, .col-sm-5, .col-md-5, .col-lg-5, .col-xs-6, .col-sm-6, .col-md-6, .col-lg-6, .col-xs-7, .col-sm-7, .col-md-7, .col-lg-7, .col-xs-8, .col-sm-8, .col-md-8, .col-lg-8, .col-xs-9, .col-sm-9, .col-md-9, .col-lg-9, .col-xs-10, .col-sm-10, .col-md-10, .col-lg-10, .col-xs-11, .col-sm-11, .col-md-11, .col-lg-11, .col-xs-12, .col-sm-12, .col-md-12, .col-lg-12 {\n    position: relative;\n    min-height: 1px;\n    padding-right: 15px;\n    padding-left: 15px;\n}\n.col-xs-1, .col-xs-2, .col-xs-3, .col-xs-4, .col-xs-5, .col-xs-6, .col-xs-7, .col-xs-8, .col-xs-9, .col-xs-10, .col-xs-11, .col-xs-12 {\n    float: left;\n}\n.col-xs-12 {\n    width: 100%}\n.col-xs-11 {\n    width: 91.66666666666666%}\n.col-xs-10 {\n    width: 83.33333333333334%}\n.col-xs-9 {\n    width: 75%}\n.col-xs-8 {\n    width: 66.66666666666666%}\n.col-xs-7 {\n    width: 58.333333333333336%}\n.col-xs-6 {\n    width: 50%}\n.col-xs-5 {\n    width: 41.66666666666667%}\n.col-xs-4 {\n    width: 33.33333333333333%}\n.col-xs-3 {\n    width: 25%}\n.col-xs-2 {\n    width: 16.666666666666664%}\n.col-xs-1 {\n    width: 8.333333333333332%}\n.col-xs-pull-12 {\n    right: 100%}\n.col-xs-pull-11 {\n    right: 91.66666666666666%}\n.col-xs-pull-10 {\n    right: 83.33333333333334%}\n.col-xs-pull-9 {\n    right: 75%}\n.col-xs-pull-8 {\n    right: 66.66666666666666%}\n.col-xs-pull-7 {\n    right: 58.333333333333336%}\n.col-xs-pull-6 {\n    right: 50%}\n.col-xs-pull-5 {\n    right: 41.66666666666667%}\n.col-xs-pull-4 {\n    right: 33.33333333333333%}\n.col-xs-pull-3 {\n    right: 25%}\n.col-xs-pull-2 {\n    right: 16.666666666666664%}\n.col-xs-pull-1 {\n    right: 8.333333333333332%}\n.col-xs-pull-0 {\n    right: 0;\n}\n.col-xs-push-12 {\n    left: 100%}\n.col-xs-push-11 {\n    left: 91.66666666666666%}\n.col-xs-push-10 {\n    left: 83.33333333333334%}\n.col-xs-push-9 {\n    left: 75%}\n.col-xs-push-8 {\n    left: 66.66666666666666%}\n.col-xs-push-7 {\n    left: 58.333333333333336%}\n.col-xs-push-6 {\n    left: 50%}\n.col-xs-push-5 {\n    left: 41.66666666666667%}\n.col-xs-push-4 {\n    left: 33.33333333333333%}\n.col-xs-push-3 {\n    left: 25%}\n.col-xs-push-2 {\n    left: 16.666666666666664%}\n.col-xs-push-1 {\n    left: 8.333333333333332%}\n.col-xs-push-0 {\n    left: 0;\n}\n.col-xs-offset-12 {\n    margin-left: 100%}\n.col-xs-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-xs-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-xs-offset-9 {\n    margin-left: 75%}\n.col-xs-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-xs-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-xs-offset-6 {\n    margin-left: 50%}\n.col-xs-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-xs-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-xs-offset-3 {\n    margin-left: 25%}\n.col-xs-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-xs-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-xs-offset-0 {\n    margin-left: 0;\n}\n@media(min-width:768px) {\n    .col-sm-1, .col-sm-2, .col-sm-3, .col-sm-4, .col-sm-5, .col-sm-6, .col-sm-7, .col-sm-8, .col-sm-9, .col-sm-10, .col-sm-11, .col-sm-12 {\n    float: left;\n}\n.col-sm-12 {\n    width: 100%}\n.col-sm-11 {\n    width: 91.66666666666666%}\n.col-sm-10 {\n    width: 83.33333333333334%}\n.col-sm-9 {\n    width: 75%}\n.col-sm-8 {\n    width: 66.66666666666666%}\n.col-sm-7 {\n    width: 58.333333333333336%}\n.col-sm-6 {\n    width: 50%}\n.col-sm-5 {\n    width: 41.66666666666667%}\n.col-sm-4 {\n    width: 33.33333333333333%}\n.col-sm-3 {\n    width: 25%}\n.col-sm-2 {\n    width: 16.666666666666664%}\n.col-sm-1 {\n    width: 8.333333333333332%}\n.col-sm-pull-12 {\n    right: 100%}\n.col-sm-pull-11 {\n    right: 91.66666666666666%}\n.col-sm-pull-10 {\n    right: 83.33333333333334%}\n.col-sm-pull-9 {\n    right: 75%}\n.col-sm-pull-8 {\n    right: 66.66666666666666%}\n.col-sm-pull-7 {\n    right: 58.333333333333336%}\n.col-sm-pull-6 {\n    right: 50%}\n.col-sm-pull-5 {\n    right: 41.66666666666667%}\n.col-sm-pull-4 {\n    right: 33.33333333333333%}\n.col-sm-pull-3 {\n    right: 25%}\n.col-sm-pull-2 {\n    right: 16.666666666666664%}\n.col-sm-pull-1 {\n    right: 8.333333333333332%}\n.col-sm-pull-0 {\n    right: 0;\n}\n.col-sm-push-12 {\n    left: 100%}\n.col-sm-push-11 {\n    left: 91.66666666666666%}\n.col-sm-push-10 {\n    left: 83.33333333333334%}\n.col-sm-push-9 {\n    left: 75%}\n.col-sm-push-8 {\n    left: 66.66666666666666%}\n.col-sm-push-7 {\n    left: 58.333333333333336%}\n.col-sm-push-6 {\n    left: 50%}\n.col-sm-push-5 {\n    left: 41.66666666666667%}\n.col-sm-push-4 {\n    left: 33.33333333333333%}\n.col-sm-push-3 {\n    left: 25%}\n.col-sm-push-2 {\n    left: 16.666666666666664%}\n.col-sm-push-1 {\n    left: 8.333333333333332%}\n.col-sm-push-0 {\n    left: 0;\n}\n.col-sm-offset-12 {\n    margin-left: 100%}\n.col-sm-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-sm-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-sm-offset-9 {\n    margin-left: 75%}\n.col-sm-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-sm-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-sm-offset-6 {\n    margin-left: 50%}\n.col-sm-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-sm-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-sm-offset-3 {\n    margin-left: 25%}\n.col-sm-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-sm-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-sm-offset-0 {\n    margin-left: 0;\n}\n}@media(min-width:992px) {\n    .col-md-1, .col-md-2, .col-md-3, .col-md-4, .col-md-5, .col-md-6, .col-md-7, .col-md-8, .col-md-9, .col-md-10, .col-md-11, .col-md-12 {\n    float: left;\n}\n.col-md-12 {\n    width: 100%}\n.col-md-11 {\n    width: 91.66666666666666%}\n.col-md-10 {\n    width: 83.33333333333334%}\n.col-md-9 {\n    width: 75%}\n.col-md-8 {\n    width: 66.66666666666666%}\n.col-md-7 {\n    width: 58.333333333333336%}\n.col-md-6 {\n    width: 50%}\n.col-md-5 {\n    width: 41.66666666666667%}\n.col-md-4 {\n    width: 33.33333333333333%}\n.col-md-3 {\n    width: 25%}\n.col-md-2 {\n    width: 16.666666666666664%}\n.col-md-1 {\n    width: 8.333333333333332%}\n.col-md-pull-12 {\n    right: 100%}\n.col-md-pull-11 {\n    right: 91.66666666666666%}\n.col-md-pull-10 {\n    right: 83.33333333333334%}\n.col-md-pull-9 {\n    right: 75%}\n.col-md-pull-8 {\n    right: 66.66666666666666%}\n.col-md-pull-7 {\n    right: 58.333333333333336%}\n.col-md-pull-6 {\n    right: 50%}\n.col-md-pull-5 {\n    right: 41.66666666666667%}\n.col-md-pull-4 {\n    right: 33.33333333333333%}\n.col-md-pull-3 {\n    right: 25%}\n.col-md-pull-2 {\n    right: 16.666666666666664%}\n.col-md-pull-1 {\n    right: 8.333333333333332%}\n.col-md-pull-0 {\n    right: 0;\n}\n.col-md-push-12 {\n    left: 100%}\n.col-md-push-11 {\n    left: 91.66666666666666%}\n.col-md-push-10 {\n    left: 83.33333333333334%}\n.col-md-push-9 {\n    left: 75%}\n.col-md-push-8 {\n    left: 66.66666666666666%}\n.col-md-push-7 {\n    left: 58.333333333333336%}\n.col-md-push-6 {\n    left: 50%}\n.col-md-push-5 {\n    left: 41.66666666666667%}\n.col-md-push-4 {\n    left: 33.33333333333333%}\n.col-md-push-3 {\n    left: 25%}\n.col-md-push-2 {\n    left: 16.666666666666664%}\n.col-md-push-1 {\n    left: 8.333333333333332%}\n.col-md-push-0 {\n    left: 0;\n}\n.col-md-offset-12 {\n    margin-left: 100%}\n.col-md-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-md-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-md-offset-9 {\n    margin-left: 75%}\n.col-md-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-md-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-md-offset-6 {\n    margin-left: 50%}\n.col-md-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-md-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-md-offset-3 {\n    margin-left: 25%}\n.col-md-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-md-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-md-offset-0 {\n    margin-left: 0;\n}\n}@media(min-width:1200px) {\n    .col-lg-1, .col-lg-2, .col-lg-3, .col-lg-4, .col-lg-5, .col-lg-6, .col-lg-7, .col-lg-8, .col-lg-9, .col-lg-10, .col-lg-11, .col-lg-12 {\n    float: left;\n}\n.col-lg-12 {\n    width: 100%}\n.col-lg-11 {\n    width: 91.66666666666666%}\n.col-lg-10 {\n    width: 83.33333333333334%}\n.col-lg-9 {\n    width: 75%}\n.col-lg-8 {\n    width: 66.66666666666666%}\n.col-lg-7 {\n    width: 58.333333333333336%}\n.col-lg-6 {\n    width: 50%}\n.col-lg-5 {\n    width: 41.66666666666667%}\n.col-lg-4 {\n    width: 33.33333333333333%}\n.col-lg-3 {\n    width: 25%}\n.col-lg-2 {\n    width: 16.666666666666664%}\n.col-lg-1 {\n    width: 8.333333333333332%}\n.col-lg-pull-12 {\n    right: 100%}\n.col-lg-pull-11 {\n    right: 91.66666666666666%}\n.col-lg-pull-10 {\n    right: 83.33333333333334%}\n.col-lg-pull-9 {\n    right: 75%}\n.col-lg-pull-8 {\n    right: 66.66666666666666%}\n.col-lg-pull-7 {\n    right: 58.333333333333336%}\n.col-lg-pull-6 {\n    right: 50%}\n.col-lg-pull-5 {\n    right: 41.66666666666667%}\n.col-lg-pull-4 {\n    right: 33.33333333333333%}\n.col-lg-pull-3 {\n    right: 25%}\n.col-lg-pull-2 {\n    right: 16.666666666666664%}\n.col-lg-pull-1 {\n    right: 8.333333333333332%}\n.col-lg-pull-0 {\n    right: 0;\n}\n.col-lg-push-12 {\n    left: 100%}\n.col-lg-push-11 {\n    left: 91.66666666666666%}\n.col-lg-push-10 {\n    left: 83.33333333333334%}\n.col-lg-push-9 {\n    left: 75%}\n.col-lg-push-8 {\n    left: 66.66666666666666%}\n.col-lg-push-7 {\n    left: 58.333333333333336%}\n.col-lg-push-6 {\n    left: 50%}\n.col-lg-push-5 {\n    left: 41.66666666666667%}\n.col-lg-push-4 {\n    left: 33.33333333333333%}\n.col-lg-push-3 {\n    left: 25%}\n.col-lg-push-2 {\n    left: 16.666666666666664%}\n.col-lg-push-1 {\n    left: 8.333333333333332%}\n.col-lg-push-0 {\n    left: 0;\n}\n.col-lg-offset-12 {\n    margin-left: 100%}\n.col-lg-offset-11 {\n    margin-left: 91.66666666666666%}\n.col-lg-offset-10 {\n    margin-left: 83.33333333333334%}\n.col-lg-offset-9 {\n    margin-left: 75%}\n.col-lg-offset-8 {\n    margin-left: 66.66666666666666%}\n.col-lg-offset-7 {\n    margin-left: 58.333333333333336%}\n.col-lg-offset-6 {\n    margin-left: 50%}\n.col-lg-offset-5 {\n    margin-left: 41.66666666666667%}\n.col-lg-offset-4 {\n    margin-left: 33.33333333333333%}\n.col-lg-offset-3 {\n    margin-left: 25%}\n.col-lg-offset-2 {\n    margin-left: 16.666666666666664%}\n.col-lg-offset-1 {\n    margin-left: 8.333333333333332%}\n.col-lg-offset-0 {\n    margin-left: 0;\n}\n}table {\n    max-width: 100%;\n    background-color: transparent;\n}\nth {\n    text-align: left;\n}\n.table {\n    width: 100%;\n    margin-bottom: 21px;\n}\n.table>thead>tr>th, .table>tbody>tr>th, .table>tfoot>tr>th, .table>thead>tr>td, .table>tbody>tr>td, .table>tfoot>tr>td {\n    padding: 8px;\n    line-height: 1.428571429;\n    vertical-align: top;\n    border-top: 1px solid #ddd;\n}\n.table>thead>tr>th {\n    vertical-align: bottom;\n    border-bottom: 2px solid #ddd;\n}\n.table>caption+thead>tr:first-child>th, .table>colgroup+thead>tr:first-child>th, .table>thead:first-child>tr:first-child>th, .table>caption+thead>tr:first-child>td, .table>colgroup+thead>tr:first-child>td, .table>thead:first-child>tr:first-child>td {\n    border-top: 0;\n}\n.table>tbody+tbody {\n    border-top: 2px solid #ddd;\n}\n.table .table {\n    background-color: #fff;\n}\n.table-condensed>thead>tr>th, .table-condensed>tbody>tr>th, .table-condensed>tfoot>tr>th, .table-condensed>thead>tr>td, .table-condensed>tbody>tr>td, .table-condensed>tfoot>tr>td {\n    padding: 5px;\n}\n.table-bordered {\n    border: 1px solid #ddd;\n}\n.table-bordered>thead>tr>th, .table-bordered>tbody>tr>th, .table-bordered>tfoot>tr>th, .table-bordered>thead>tr>td, .table-bordered>tbody>tr>td, .table-bordered>tfoot>tr>td {\n    border: 1px solid #ddd;\n}\n.table-bordered>thead>tr>th, .table-bordered>thead>tr>td {\n    border-bottom-width: 2px;\n}\n.table-striped>tbody>tr:nth-child(odd)>td, .table-striped>tbody>tr:nth-child(odd)>th {\n    background-color: #f9f9f9;\n}\n.table-hover>tbody>tr:hover>td, .table-hover>tbody>tr:hover>th {\n    background-color: #f5f5f5;\n}\ntable col[class*=\"col-\"] {\n    position: static;\n    display: table-column;\n    float: none;\n}\ntable td[class*=\"col-\"], table th[class*=\"col-\"] {\n    display: table-cell;\n    float: none;\n}\n.table>thead>tr>.active, .table>tbody>tr>.active, .table>tfoot>tr>.active, .table>thead>.active>td, .table>tbody>.active>td, .table>tfoot>.active>td, .table>thead>.active>th, .table>tbody>.active>th, .table>tfoot>.active>th {\n    background-color: #f5f5f5;\n}\n.table-hover>tbody>tr>.active:hover, .table-hover>tbody>.active:hover>td, .table-hover>tbody>.active:hover>th {\n    background-color: #e8e8e8;\n}\n.table>thead>tr>.success, .table>tbody>tr>.success, .table>tfoot>tr>.success, .table>thead>.success>td, .table>tbody>.success>td, .table>tfoot>.success>td, .table>thead>.success>th, .table>tbody>.success>th, .table>tfoot>.success>th {\n    background-color: #dff0d8;\n}\n.table-hover>tbody>tr>.success:hover, .table-hover>tbody>.success:hover>td, .table-hover>tbody>.success:hover>th {\n    background-color: #d0e9c6;\n}\n.table>thead>tr>.danger, .table>tbody>tr>.danger, .table>tfoot>tr>.danger, .table>thead>.danger>td, .table>tbody>.danger>td, .table>tfoot>.danger>td, .table>thead>.danger>th, .table>tbody>.danger>th, .table>tfoot>.danger>th {\n    background-color: #f2dede;\n}\n.table-hover>tbody>tr>.danger:hover, .table-hover>tbody>.danger:hover>td, .table-hover>tbody>.danger:hover>th {\n    background-color: #ebcccc;\n}\n.table>thead>tr>.warning, .table>tbody>tr>.warning, .table>tfoot>tr>.warning, .table>thead>.warning>td, .table>tbody>.warning>td, .table>tfoot>.warning>td, .table>thead>.warning>th, .table>tbody>.warning>th, .table>tfoot>.warning>th {\n    background-color: #fcf8e3;\n}\n.table-hover>tbody>tr>.warning:hover, .table-hover>tbody>.warning:hover>td, .table-hover>tbody>.warning:hover>th {\n    background-color: #faf2cc;\n}\n@media(max-width:767px) {\n    .table-responsive {\n    width: 100%;\n    margin-bottom: 15.75px;\n    overflow-x: scroll;\n    overflow-y: hidden;\n    border: 1px solid #ddd;\n    -ms-overflow-style: -ms-autohiding-scrollbar;\n    -webkit-overflow-scrolling: touch;\n}\n.table-responsive>.table {\n    margin-bottom: 0;\n}\n.table-responsive>.table>thead>tr>th, .table-responsive>.table>tbody>tr>th, .table-responsive>.table>tfoot>tr>th, .table-responsive>.table>thead>tr>td, .table-responsive>.table>tbody>tr>td, .table-responsive>.table>tfoot>tr>td {\n    white-space: nowrap;\n}\n.table-responsive>.table-bordered {\n    border: 0;\n}\n.table-responsive>.table-bordered>thead>tr>th:first-child, .table-responsive>.table-bordered>tbody>tr>th:first-child, .table-responsive>.table-bordered>tfoot>tr>th:first-child, .table-responsive>.table-bordered>thead>tr>td:first-child, .table-responsive>.table-bordered>tbody>tr>td:first-child, .table-responsive>.table-bordered>tfoot>tr>td:first-child {\n    border-left: 0;\n}\n.table-responsive>.table-bordered>thead>tr>th:last-child, .table-responsive>.table-bordered>tbody>tr>th:last-child, .table-responsive>.table-bordered>tfoot>tr>th:last-child, .table-responsive>.table-bordered>thead>tr>td:last-child, .table-responsive>.table-bordered>tbody>tr>td:last-child, .table-responsive>.table-bordered>tfoot>tr>td:last-child {\n    border-right: 0;\n}\n.table-responsive>.table-bordered>tbody>tr:last-child>th, .table-responsive>.table-bordered>tfoot>tr:last-child>th, .table-responsive>.table-bordered>tbody>tr:last-child>td, .table-responsive>.table-bordered>tfoot>tr:last-child>td {\n    border-bottom: 0;\n}\n}fieldset {\n    padding: 0;\n    margin: 0;\n    border: 0;\n}\nlegend {\n    display: block;\n    width: 100%;\n    padding: 0;\n    margin-bottom: 21px;\n    font-size: 22.5px;\n    line-height: inherit;\n    color: #333;\n    border: 0;\n    border-bottom: 1px solid #e5e5e5;\n}\nlabel {\n    display: inline-block;\n    margin-bottom: 5px;\n    font-weight: bold;\n}\ninput[type=\"search\"] {\n    -webkit-box-sizing: border-box;\n    -moz-box-sizing: border-box;\n    box-sizing: border-box;\n}\ninput[type=\"radio\"], input[type=\"checkbox\"] {\n    margin: 4px 0 0;\n    margin-top: 1px \\9;\n    line-height: normal;\n}\ninput[type=\"file\"] {\n    display: block;\n}\nselect[multiple], select[size] {\n    height: auto;\n}\nselect optgroup {\n    font-family: inherit;\n    font-size: inherit;\n    font-style: inherit;\n}\ninput[type=\"file\"]:focus, input[type=\"radio\"]:focus, input[type=\"checkbox\"]:focus {\n    outline: thin dotted;\n    outline: 5px auto -webkit-focus-ring-color;\n    outline-offset: -2px;\n}\ninput[type=\"number\"]::-webkit-outer-spin-button, input[type=\"number\"]::-webkit-inner-spin-button {\n    height: auto;\n}\noutput {\n    display: block;\n    padding-top: 7px;\n    font-size: 15px;\n    line-height: 1.428571429;\n    color: #6f6f6f;\n    vertical-align: middle;\n}\n.form-control {\n    display: block;\n    width: 100%;\n    height: 35px;\n    padding: 6px 12px;\n    font-size: 15px;\n    line-height: 1.428571429;\n    color: #6f6f6f;\n    vertical-align: middle;\n    background-color: #fff;\n    background-image: none;\n    border: 1px solid #ccc;\n    border-radius: 0;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    -webkit-transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s;\n    transition: border-color ease-in-out .15s, box-shadow ease-in-out .15s;\n}\n.form-control:focus {\n    border-color: #66afe9;\n    outline: 0;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(102, 175, 233, 0.6);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(102, 175, 233, 0.6);\n}\n.form-control:-moz-placeholder {\n    color: #999;\n}\n.form-control::-moz-placeholder {\n    color: #999;\n    opacity: 1;\n}\n.form-control:-ms-input-placeholder {\n    color: #999;\n}\n.form-control::-webkit-input-placeholder {\n    color: #999;\n}\n.form-control[disabled], .form-control[readonly], fieldset[disabled] .form-control {\n    cursor: not-allowed;\n    background-color: #eee;\n}\ntextarea.form-control {\n    height: auto;\n}\n.form-group {\n    margin-bottom: 15px;\n}\n.radio, .checkbox {\n    display: block;\n    min-height: 21px;\n    padding-left: 20px;\n    margin-top: 10px;\n    margin-bottom: 10px;\n    vertical-align: middle;\n}\n.radio label, .checkbox label {\n    display: inline;\n    margin-bottom: 0;\n    font-weight: normal;\n    cursor: pointer;\n}\n.radio input[type=\"radio\"], .radio-inline input[type=\"radio\"], .checkbox input[type=\"checkbox\"], .checkbox-inline input[type=\"checkbox\"] {\n    float: left;\n    margin-left: -20px;\n}\n.radio+.radio, .checkbox+.checkbox {\n    margin-top: -5px;\n}\n.radio-inline, .checkbox-inline {\n    display: inline-block;\n    padding-left: 20px;\n    margin-bottom: 0;\n    font-weight: normal;\n    vertical-align: middle;\n    cursor: pointer;\n}\n.radio-inline+.radio-inline, .checkbox-inline+.checkbox-inline {\n    margin-top: 0;\n    margin-left: 10px;\n}\ninput[type=\"radio\"][disabled], input[type=\"checkbox\"][disabled], .radio[disabled], .radio-inline[disabled], .checkbox[disabled], .checkbox-inline[disabled], fieldset[disabled] input[type=\"radio\"], fieldset[disabled] input[type=\"checkbox\"], fieldset[disabled] .radio, fieldset[disabled] .radio-inline, fieldset[disabled] .checkbox, fieldset[disabled] .checkbox-inline {\n    cursor: not-allowed;\n}\n.input-sm {\n    height: 30px;\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\nselect.input-sm {\n    height: 30px;\n    line-height: 30px;\n}\ntextarea.input-sm {\n    height: auto;\n}\n.input-lg {\n    height: 48px;\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\nselect.input-lg {\n    height: 48px;\n    line-height: 48px;\n}\ntextarea.input-lg {\n    height: auto;\n}\n.has-warning .help-block, .has-warning .control-label, .has-warning .radio, .has-warning .checkbox, .has-warning .radio-inline, .has-warning .checkbox-inline {\n    color: #e99002;\n}\n.has-warning .form-control {\n    border-color: #e99002;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n}\n.has-warning .form-control:focus {\n    border-color: #b67102;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #febc53;\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #febc53;\n}\n.has-warning .input-group-addon {\n    color: #e99002;\n    background-color: #fcf8e3;\n    border-color: #e99002;\n}\n.has-error .help-block, .has-error .control-label, .has-error .radio, .has-error .checkbox, .has-error .radio-inline, .has-error .checkbox-inline {\n    color: #f04124;\n}\n.has-error .form-control {\n    border-color: #f04124;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n}\n.has-error .form-control:focus {\n    border-color: #d32a0e;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #f79483;\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #f79483;\n}\n.has-error .input-group-addon {\n    color: #f04124;\n    background-color: #f2dede;\n    border-color: #f04124;\n}\n.has-success .help-block, .has-success .control-label, .has-success .radio, .has-success .checkbox, .has-success .radio-inline, .has-success .checkbox-inline {\n    color: #43ac6a;\n}\n.has-success .form-control {\n    border-color: #43ac6a;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);\n}\n.has-success .form-control:focus {\n    border-color: #358753;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #85d0a1;\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #85d0a1;\n}\n.has-success .input-group-addon {\n    color: #43ac6a;\n    background-color: #dff0d8;\n    border-color: #43ac6a;\n}\n.form-control-static {\n    margin-bottom: 0;\n}\n.help-block {\n    display: block;\n    margin-top: 5px;\n    margin-bottom: 10px;\n    color: #626262;\n}\n@media(min-width:768px) {\n    .form-inline .form-group {\n    display: inline-block;\n    margin-bottom: 0;\n    vertical-align: middle;\n}\n.form-inline .form-control {\n    display: inline-block;\n}\n.form-inline select.form-control {\n    width: auto;\n}\n.form-inline .radio, .form-inline .checkbox {\n    display: inline-block;\n    padding-left: 0;\n    margin-top: 0;\n    margin-bottom: 0;\n}\n.form-inline .radio input[type=\"radio\"], .form-inline .checkbox input[type=\"checkbox\"] {\n    float: none;\n    margin-left: 0;\n}\n}.form-horizontal .control-label, .form-horizontal .radio, .form-horizontal .checkbox, .form-horizontal .radio-inline, .form-horizontal .checkbox-inline {\n    padding-top: 7px;\n    margin-top: 0;\n    margin-bottom: 0;\n}\n.form-horizontal .radio, .form-horizontal .checkbox {\n    min-height: 28px;\n}\n.form-horizontal .form-group {\n    margin-right: -15px;\n    margin-left: -15px;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-group:before, .form-horizontal .form-group:after {\n    display: table;\n    content: \" \"}\n.form-horizontal .form-group:after {\n    clear: both;\n}\n.form-horizontal .form-control-static {\n    padding-top: 7px;\n}\n@media(min-width:768px) {\n    .form-horizontal .control-label {\n    text-align: right;\n}\n}.btn {\n    display: inline-block;\n    padding: 6px 12px;\n    margin-bottom: 0;\n    font-size: 15px;\n    font-weight: normal;\n    line-height: 1.428571429;\n    text-align: center;\n    white-space: nowrap;\n    vertical-align: middle;\n    cursor: pointer;\n    background-image: none;\n    border: 1px solid transparent;\n    border-radius: 0;\n    -webkit-user-select: none;\n    -moz-user-select: none;\n    -ms-user-select: none;\n    -o-user-select: none;\n    user-select: none;\n}\n.btn:focus {\n    outline: thin dotted;\n    outline: 5px auto -webkit-focus-ring-color;\n    outline-offset: -2px;\n}\n.btn:hover, .btn:focus {\n    color: #333;\n    text-decoration: none;\n}\n.btn:active, .btn.active {\n    background-image: none;\n    outline: 0;\n    -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n    box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n}\n.btn.disabled, .btn[disabled], fieldset[disabled] .btn {\n    pointer-events: none;\n    cursor: not-allowed;\n    opacity: .65;\n    filter: alpha(opacity=65);\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.btn-default {\n    color: #333;\n    background-color: #e7e7e7;\n    border-color: #dadada;\n}\n.btn-default:hover, .btn-default:focus, .btn-default:active, .btn-default.active, .open .dropdown-toggle.btn-default {\n    color: #333;\n    background-color: #d3d3d3;\n    border-color: #bbb;\n}\n.btn-default:active, .btn-default.active, .open .dropdown-toggle.btn-default {\n    background-image: none;\n}\n.btn-default.disabled, .btn-default[disabled], fieldset[disabled] .btn-default, .btn-default.disabled:hover, .btn-default[disabled]:hover, fieldset[disabled] .btn-default:hover, .btn-default.disabled:focus, .btn-default[disabled]:focus, fieldset[disabled] .btn-default:focus, .btn-default.disabled:active, .btn-default[disabled]:active, fieldset[disabled] .btn-default:active, .btn-default.disabled.active, .btn-default[disabled].active, fieldset[disabled] .btn-default.active {\n    background-color: #e7e7e7;\n    border-color: #dadada;\n}\n.btn-default .badge {\n    color: #e7e7e7;\n    background-color: #fff;\n}\n.btn-primary {\n    color: #fff;\n    background-color: #008cba;\n    border-color: #0079a1;\n}\n.btn-primary:hover, .btn-primary:focus, .btn-primary:active, .btn-primary.active, .open .dropdown-toggle.btn-primary {\n    color: #fff;\n    background-color: #006d91;\n    border-color: #004b63;\n}\n.btn-primary:active, .btn-primary.active, .open .dropdown-toggle.btn-primary {\n    background-image: none;\n}\n.btn-primary.disabled, .btn-primary[disabled], fieldset[disabled] .btn-primary, .btn-primary.disabled:hover, .btn-primary[disabled]:hover, fieldset[disabled] .btn-primary:hover, .btn-primary.disabled:focus, .btn-primary[disabled]:focus, fieldset[disabled] .btn-primary:focus, .btn-primary.disabled:active, .btn-primary[disabled]:active, fieldset[disabled] .btn-primary:active, .btn-primary.disabled.active, .btn-primary[disabled].active, fieldset[disabled] .btn-primary.active {\n    background-color: #008cba;\n    border-color: #0079a1;\n}\n.btn-primary .badge {\n    color: #008cba;\n    background-color: #fff;\n}\n.btn-warning {\n    color: #fff;\n    background-color: #e99002;\n    border-color: #d08002;\n}\n.btn-warning:hover, .btn-warning:focus, .btn-warning:active, .btn-warning.active, .open .dropdown-toggle.btn-warning {\n    color: #fff;\n    background-color: #c17702;\n    border-color: #935b01;\n}\n.btn-warning:active, .btn-warning.active, .open .dropdown-toggle.btn-warning {\n    background-image: none;\n}\n.btn-warning.disabled, .btn-warning[disabled], fieldset[disabled] .btn-warning, .btn-warning.disabled:hover, .btn-warning[disabled]:hover, fieldset[disabled] .btn-warning:hover, .btn-warning.disabled:focus, .btn-warning[disabled]:focus, fieldset[disabled] .btn-warning:focus, .btn-warning.disabled:active, .btn-warning[disabled]:active, fieldset[disabled] .btn-warning:active, .btn-warning.disabled.active, .btn-warning[disabled].active, fieldset[disabled] .btn-warning.active {\n    background-color: #e99002;\n    border-color: #d08002;\n}\n.btn-warning .badge {\n    color: #e99002;\n    background-color: #fff;\n}\n.btn-danger {\n    color: #fff;\n    background-color: #f04124;\n    border-color: #ea2f10;\n}\n.btn-danger:hover, .btn-danger:focus, .btn-danger:active, .btn-danger.active, .open .dropdown-toggle.btn-danger {\n    color: #fff;\n    background-color: #dc2c0f;\n    border-color: #b1240c;\n}\n.btn-danger:active, .btn-danger.active, .open .dropdown-toggle.btn-danger {\n    background-image: none;\n}\n.btn-danger.disabled, .btn-danger[disabled], fieldset[disabled] .btn-danger, .btn-danger.disabled:hover, .btn-danger[disabled]:hover, fieldset[disabled] .btn-danger:hover, .btn-danger.disabled:focus, .btn-danger[disabled]:focus, fieldset[disabled] .btn-danger:focus, .btn-danger.disabled:active, .btn-danger[disabled]:active, fieldset[disabled] .btn-danger:active, .btn-danger.disabled.active, .btn-danger[disabled].active, fieldset[disabled] .btn-danger.active {\n    background-color: #f04124;\n    border-color: #ea2f10;\n}\n.btn-danger .badge {\n    color: #f04124;\n    background-color: #fff;\n}\n.btn-success {\n    color: #fff;\n    background-color: #43ac6a;\n    border-color: #3c9a5f;\n}\n.btn-success:hover, .btn-success:focus, .btn-success:active, .btn-success.active, .open .dropdown-toggle.btn-success {\n    color: #fff;\n    background-color: #388f58;\n    border-color: #2b6e44;\n}\n.btn-success:active, .btn-success.active, .open .dropdown-toggle.btn-success {\n    background-image: none;\n}\n.btn-success.disabled, .btn-success[disabled], fieldset[disabled] .btn-success, .btn-success.disabled:hover, .btn-success[disabled]:hover, fieldset[disabled] .btn-success:hover, .btn-success.disabled:focus, .btn-success[disabled]:focus, fieldset[disabled] .btn-success:focus, .btn-success.disabled:active, .btn-success[disabled]:active, fieldset[disabled] .btn-success:active, .btn-success.disabled.active, .btn-success[disabled].active, fieldset[disabled] .btn-success.active {\n    background-color: #43ac6a;\n    border-color: #3c9a5f;\n}\n.btn-success .badge {\n    color: #43ac6a;\n    background-color: #fff;\n}\n.btn-info {\n    color: #fff;\n    background-color: #5bc0de;\n    border-color: #46b8da;\n}\n.btn-info:hover, .btn-info:focus, .btn-info:active, .btn-info.active, .open .dropdown-toggle.btn-info {\n    color: #fff;\n    background-color: #39b3d7;\n    border-color: #269abc;\n}\n.btn-info:active, .btn-info.active, .open .dropdown-toggle.btn-info {\n    background-image: none;\n}\n.btn-info.disabled, .btn-info[disabled], fieldset[disabled] .btn-info, .btn-info.disabled:hover, .btn-info[disabled]:hover, fieldset[disabled] .btn-info:hover, .btn-info.disabled:focus, .btn-info[disabled]:focus, fieldset[disabled] .btn-info:focus, .btn-info.disabled:active, .btn-info[disabled]:active, fieldset[disabled] .btn-info:active, .btn-info.disabled.active, .btn-info[disabled].active, fieldset[disabled] .btn-info.active {\n    background-color: #5bc0de;\n    border-color: #46b8da;\n}\n.btn-info .badge {\n    color: #5bc0de;\n    background-color: #fff;\n}\n.btn-link {\n    font-weight: normal;\n    color: #008cba;\n    cursor: pointer;\n    border-radius: 0;\n}\n.btn-link, .btn-link:active, .btn-link[disabled], fieldset[disabled] .btn-link {\n    background-color: transparent;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.btn-link, .btn-link:hover, .btn-link:focus, .btn-link:active {\n    border-color: transparent;\n}\n.btn-link:hover, .btn-link:focus {\n    color: #00526e;\n    text-decoration: underline;\n    background-color: transparent;\n}\n.btn-link[disabled]:hover, fieldset[disabled] .btn-link:hover, .btn-link[disabled]:focus, fieldset[disabled] .btn-link:focus {\n    color: #999;\n    text-decoration: none;\n}\n.btn-lg {\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\n.btn-sm {\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-xs {\n    padding: 1px 5px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-block {\n    display: block;\n    width: 100%;\n    padding-right: 0;\n    padding-left: 0;\n}\n.btn-block+.btn-block {\n    margin-top: 5px;\n}\ninput[type=\"submit\"].btn-block, input[type=\"reset\"].btn-block, input[type=\"button\"].btn-block {\n    width: 100%}\n.fade {\n    opacity: 0;\n    -webkit-transition: opacity .15s linear;\n    transition: opacity .15s linear;\n}\n.fade.in {\n    opacity: 1;\n}\n.collapse {\n    display: none;\n}\n.collapse.in {\n    display: block;\n}\n.collapsing {\n    position: relative;\n    height: 0;\n    overflow: hidden;\n    -webkit-transition: height .35s ease;\n    transition: height .35s ease;\n}\n@font-face {\n    font-family: 'Glyphicons Halflings';\n    src: url('../fonts/glyphicons-halflings-regular.eot');\n    src: url('../fonts/glyphicons-halflings-regular.eot?#iefix') format('embedded-opentype'), url('../fonts/glyphicons-halflings-regular.woff') format('woff'), url('../fonts/glyphicons-halflings-regular.ttf') format('truetype'), url('../fonts/glyphicons-halflings-regular.svg#glyphicons-halflingsregular') format('svg');\n}\n.glyphicon {\n    position: relative;\n    top: 1px;\n    display: inline-block;\n    font-family: 'Glyphicons Halflings';\n    -webkit-font-smoothing: antialiased;\n    font-style: normal;\n    font-weight: normal;\n    line-height: 1;\n    -moz-osx-font-smoothing: grayscale;\n}\n.glyphicon:empty {\n    width: 1em;\n}\n.glyphicon-asterisk:before {\n    content: \"\\2a\"}\n.glyphicon-plus:before {\n    content: \"\\2b\"}\n.glyphicon-euro:before {\n    content: \"\\20ac\"}\n.glyphicon-minus:before {\n    content: \"\\2212\"}\n.glyphicon-cloud:before {\n    content: \"\\2601\"}\n.glyphicon-envelope:before {\n    content: \"\\2709\"}\n.glyphicon-pencil:before {\n    content: \"\\270f\"}\n.glyphicon-glass:before {\n    content: \"\\e001\"}\n.glyphicon-music:before {\n    content: \"\\e002\"}\n.glyphicon-search:before {\n    content: \"\\e003\"}\n.glyphicon-heart:before {\n    content: \"\\e005\"}\n.glyphicon-star:before {\n    content: \"\\e006\"}\n.glyphicon-star-empty:before {\n    content: \"\\e007\"}\n.glyphicon-user:before {\n    content: \"\\e008\"}\n.glyphicon-film:before {\n    content: \"\\e009\"}\n.glyphicon-th-large:before {\n    content: \"\\e010\"}\n.glyphicon-th:before {\n    content: \"\\e011\"}\n.glyphicon-th-list:before {\n    content: \"\\e012\"}\n.glyphicon-ok:before {\n    content: \"\\e013\"}\n.glyphicon-remove:before {\n    content: \"\\e014\"}\n.glyphicon-zoom-in:before {\n    content: \"\\e015\"}\n.glyphicon-zoom-out:before {\n    content: \"\\e016\"}\n.glyphicon-off:before {\n    content: \"\\e017\"}\n.glyphicon-signal:before {\n    content: \"\\e018\"}\n.glyphicon-cog:before {\n    content: \"\\e019\"}\n.glyphicon-trash:before {\n    content: \"\\e020\"}\n.glyphicon-home:before {\n    content: \"\\e021\"}\n.glyphicon-file:before {\n    content: \"\\e022\"}\n.glyphicon-time:before {\n    content: \"\\e023\"}\n.glyphicon-road:before {\n    content: \"\\e024\"}\n.glyphicon-download-alt:before {\n    content: \"\\e025\"}\n.glyphicon-download:before {\n    content: \"\\e026\"}\n.glyphicon-upload:before {\n    content: \"\\e027\"}\n.glyphicon-inbox:before {\n    content: \"\\e028\"}\n.glyphicon-play-circle:before {\n    content: \"\\e029\"}\n.glyphicon-repeat:before {\n    content: \"\\e030\"}\n.glyphicon-refresh:before {\n    content: \"\\e031\"}\n.glyphicon-list-alt:before {\n    content: \"\\e032\"}\n.glyphicon-lock:before {\n    content: \"\\e033\"}\n.glyphicon-flag:before {\n    content: \"\\e034\"}\n.glyphicon-headphones:before {\n    content: \"\\e035\"}\n.glyphicon-volume-off:before {\n    content: \"\\e036\"}\n.glyphicon-volume-down:before {\n    content: \"\\e037\"}\n.glyphicon-volume-up:before {\n    content: \"\\e038\"}\n.glyphicon-qrcode:before {\n    content: \"\\e039\"}\n.glyphicon-barcode:before {\n    content: \"\\e040\"}\n.glyphicon-tag:before {\n    content: \"\\e041\"}\n.glyphicon-tags:before {\n    content: \"\\e042\"}\n.glyphicon-book:before {\n    content: \"\\e043\"}\n.glyphicon-bookmark:before {\n    content: \"\\e044\"}\n.glyphicon-print:before {\n    content: \"\\e045\"}\n.glyphicon-camera:before {\n    content: \"\\e046\"}\n.glyphicon-font:before {\n    content: \"\\e047\"}\n.glyphicon-bold:before {\n    content: \"\\e048\"}\n.glyphicon-italic:before {\n    content: \"\\e049\"}\n.glyphicon-text-height:before {\n    content: \"\\e050\"}\n.glyphicon-text-width:before {\n    content: \"\\e051\"}\n.glyphicon-align-left:before {\n    content: \"\\e052\"}\n.glyphicon-align-center:before {\n    content: \"\\e053\"}\n.glyphicon-align-right:before {\n    content: \"\\e054\"}\n.glyphicon-align-justify:before {\n    content: \"\\e055\"}\n.glyphicon-list:before {\n    content: \"\\e056\"}\n.glyphicon-indent-left:before {\n    content: \"\\e057\"}\n.glyphicon-indent-right:before {\n    content: \"\\e058\"}\n.glyphicon-facetime-video:before {\n    content: \"\\e059\"}\n.glyphicon-picture:before {\n    content: \"\\e060\"}\n.glyphicon-map-marker:before {\n    content: \"\\e062\"}\n.glyphicon-adjust:before {\n    content: \"\\e063\"}\n.glyphicon-tint:before {\n    content: \"\\e064\"}\n.glyphicon-edit:before {\n    content: \"\\e065\"}\n.glyphicon-share:before {\n    content: \"\\e066\"}\n.glyphicon-check:before {\n    content: \"\\e067\"}\n.glyphicon-move:before {\n    content: \"\\e068\"}\n.glyphicon-step-backward:before {\n    content: \"\\e069\"}\n.glyphicon-fast-backward:before {\n    content: \"\\e070\"}\n.glyphicon-backward:before {\n    content: \"\\e071\"}\n.glyphicon-play:before {\n    content: \"\\e072\"}\n.glyphicon-pause:before {\n    content: \"\\e073\"}\n.glyphicon-stop:before {\n    content: \"\\e074\"}\n.glyphicon-forward:before {\n    content: \"\\e075\"}\n.glyphicon-fast-forward:before {\n    content: \"\\e076\"}\n.glyphicon-step-forward:before {\n    content: \"\\e077\"}\n.glyphicon-eject:before {\n    content: \"\\e078\"}\n.glyphicon-chevron-left:before {\n    content: \"\\e079\"}\n.glyphicon-chevron-right:before {\n    content: \"\\e080\"}\n.glyphicon-plus-sign:before {\n    content: \"\\e081\"}\n.glyphicon-minus-sign:before {\n    content: \"\\e082\"}\n.glyphicon-remove-sign:before {\n    content: \"\\e083\"}\n.glyphicon-ok-sign:before {\n    content: \"\\e084\"}\n.glyphicon-question-sign:before {\n    content: \"\\e085\"}\n.glyphicon-info-sign:before {\n    content: \"\\e086\"}\n.glyphicon-screenshot:before {\n    content: \"\\e087\"}\n.glyphicon-remove-circle:before {\n    content: \"\\e088\"}\n.glyphicon-ok-circle:before {\n    content: \"\\e089\"}\n.glyphicon-ban-circle:before {\n    content: \"\\e090\"}\n.glyphicon-arrow-left:before {\n    content: \"\\e091\"}\n.glyphicon-arrow-right:before {\n    content: \"\\e092\"}\n.glyphicon-arrow-up:before {\n    content: \"\\e093\"}\n.glyphicon-arrow-down:before {\n    content: \"\\e094\"}\n.glyphicon-share-alt:before {\n    content: \"\\e095\"}\n.glyphicon-resize-full:before {\n    content: \"\\e096\"}\n.glyphicon-resize-small:before {\n    content: \"\\e097\"}\n.glyphicon-exclamation-sign:before {\n    content: \"\\e101\"}\n.glyphicon-gift:before {\n    content: \"\\e102\"}\n.glyphicon-leaf:before {\n    content: \"\\e103\"}\n.glyphicon-fire:before {\n    content: \"\\e104\"}\n.glyphicon-eye-open:before {\n    content: \"\\e105\"}\n.glyphicon-eye-close:before {\n    content: \"\\e106\"}\n.glyphicon-warning-sign:before {\n    content: \"\\e107\"}\n.glyphicon-plane:before {\n    content: \"\\e108\"}\n.glyphicon-calendar:before {\n    content: \"\\e109\"}\n.glyphicon-random:before {\n    content: \"\\e110\"}\n.glyphicon-comment:before {\n    content: \"\\e111\"}\n.glyphicon-magnet:before {\n    content: \"\\e112\"}\n.glyphicon-chevron-up:before {\n    content: \"\\e113\"}\n.glyphicon-chevron-down:before {\n    content: \"\\e114\"}\n.glyphicon-retweet:before {\n    content: \"\\e115\"}\n.glyphicon-shopping-cart:before {\n    content: \"\\e116\"}\n.glyphicon-folder-close:before {\n    content: \"\\e117\"}\n.glyphicon-folder-open:before {\n    content: \"\\e118\"}\n.glyphicon-resize-vertical:before {\n    content: \"\\e119\"}\n.glyphicon-resize-horizontal:before {\n    content: \"\\e120\"}\n.glyphicon-hdd:before {\n    content: \"\\e121\"}\n.glyphicon-bullhorn:before {\n    content: \"\\e122\"}\n.glyphicon-bell:before {\n    content: \"\\e123\"}\n.glyphicon-certificate:before {\n    content: \"\\e124\"}\n.glyphicon-thumbs-up:before {\n    content: \"\\e125\"}\n.glyphicon-thumbs-down:before {\n    content: \"\\e126\"}\n.glyphicon-hand-right:before {\n    content: \"\\e127\"}\n.glyphicon-hand-left:before {\n    content: \"\\e128\"}\n.glyphicon-hand-up:before {\n    content: \"\\e129\"}\n.glyphicon-hand-down:before {\n    content: \"\\e130\"}\n.glyphicon-circle-arrow-right:before {\n    content: \"\\e131\"}\n.glyphicon-circle-arrow-left:before {\n    content: \"\\e132\"}\n.glyphicon-circle-arrow-up:before {\n    content: \"\\e133\"}\n.glyphicon-circle-arrow-down:before {\n    content: \"\\e134\"}\n.glyphicon-globe:before {\n    content: \"\\e135\"}\n.glyphicon-wrench:before {\n    content: \"\\e136\"}\n.glyphicon-tasks:before {\n    content: \"\\e137\"}\n.glyphicon-filter:before {\n    content: \"\\e138\"}\n.glyphicon-briefcase:before {\n    content: \"\\e139\"}\n.glyphicon-fullscreen:before {\n    content: \"\\e140\"}\n.glyphicon-dashboard:before {\n    content: \"\\e141\"}\n.glyphicon-paperclip:before {\n    content: \"\\e142\"}\n.glyphicon-heart-empty:before {\n    content: \"\\e143\"}\n.glyphicon-link:before {\n    content: \"\\e144\"}\n.glyphicon-phone:before {\n    content: \"\\e145\"}\n.glyphicon-pushpin:before {\n    content: \"\\e146\"}\n.glyphicon-usd:before {\n    content: \"\\e148\"}\n.glyphicon-gbp:before {\n    content: \"\\e149\"}\n.glyphicon-sort:before {\n    content: \"\\e150\"}\n.glyphicon-sort-by-alphabet:before {\n    content: \"\\e151\"}\n.glyphicon-sort-by-alphabet-alt:before {\n    content: \"\\e152\"}\n.glyphicon-sort-by-order:before {\n    content: \"\\e153\"}\n.glyphicon-sort-by-order-alt:before {\n    content: \"\\e154\"}\n.glyphicon-sort-by-attributes:before {\n    content: \"\\e155\"}\n.glyphicon-sort-by-attributes-alt:before {\n    content: \"\\e156\"}\n.glyphicon-unchecked:before {\n    content: \"\\e157\"}\n.glyphicon-expand:before {\n    content: \"\\e158\"}\n.glyphicon-collapse-down:before {\n    content: \"\\e159\"}\n.glyphicon-collapse-up:before {\n    content: \"\\e160\"}\n.glyphicon-log-in:before {\n    content: \"\\e161\"}\n.glyphicon-flash:before {\n    content: \"\\e162\"}\n.glyphicon-log-out:before {\n    content: \"\\e163\"}\n.glyphicon-new-window:before {\n    content: \"\\e164\"}\n.glyphicon-record:before {\n    content: \"\\e165\"}\n.glyphicon-save:before {\n    content: \"\\e166\"}\n.glyphicon-open:before {\n    content: \"\\e167\"}\n.glyphicon-saved:before {\n    content: \"\\e168\"}\n.glyphicon-import:before {\n    content: \"\\e169\"}\n.glyphicon-export:before {\n    content: \"\\e170\"}\n.glyphicon-send:before {\n    content: \"\\e171\"}\n.glyphicon-floppy-disk:before {\n    content: \"\\e172\"}\n.glyphicon-floppy-saved:before {\n    content: \"\\e173\"}\n.glyphicon-floppy-remove:before {\n    content: \"\\e174\"}\n.glyphicon-floppy-save:before {\n    content: \"\\e175\"}\n.glyphicon-floppy-open:before {\n    content: \"\\e176\"}\n.glyphicon-credit-card:before {\n    content: \"\\e177\"}\n.glyphicon-transfer:before {\n    content: \"\\e178\"}\n.glyphicon-cutlery:before {\n    content: \"\\e179\"}\n.glyphicon-header:before {\n    content: \"\\e180\"}\n.glyphicon-compressed:before {\n    content: \"\\e181\"}\n.glyphicon-earphone:before {\n    content: \"\\e182\"}\n.glyphicon-phone-alt:before {\n    content: \"\\e183\"}\n.glyphicon-tower:before {\n    content: \"\\e184\"}\n.glyphicon-stats:before {\n    content: \"\\e185\"}\n.glyphicon-sd-video:before {\n    content: \"\\e186\"}\n.glyphicon-hd-video:before {\n    content: \"\\e187\"}\n.glyphicon-subtitles:before {\n    content: \"\\e188\"}\n.glyphicon-sound-stereo:before {\n    content: \"\\e189\"}\n.glyphicon-sound-dolby:before {\n    content: \"\\e190\"}\n.glyphicon-sound-5-1:before {\n    content: \"\\e191\"}\n.glyphicon-sound-6-1:before {\n    content: \"\\e192\"}\n.glyphicon-sound-7-1:before {\n    content: \"\\e193\"}\n.glyphicon-copyright-mark:before {\n    content: \"\\e194\"}\n.glyphicon-registration-mark:before {\n    content: \"\\e195\"}\n.glyphicon-cloud-download:before {\n    content: \"\\e197\"}\n.glyphicon-cloud-upload:before {\n    content: \"\\e198\"}\n.glyphicon-tree-conifer:before {\n    content: \"\\e199\"}\n.glyphicon-tree-deciduous:before {\n    content: \"\\e200\"}\n.caret {\n    display: inline-block;\n    width: 0;\n    height: 0;\n    margin-left: 2px;\n    vertical-align: middle;\n    border-top: 4px solid;\n    border-right: 4px solid transparent;\n    border-left: 4px solid transparent;\n}\n.dropdown {\n    position: relative;\n}\n.dropdown-toggle:focus {\n    outline: 0;\n}\n.dropdown-menu {\n    position: absolute;\n    top: 100%;\n    left: 0;\n    z-index: 1000;\n    display: none;\n    float: left;\n    min-width: 160px;\n    padding: 5px 0;\n    margin: 2px 0 0;\n    font-size: 15px;\n    list-style: none;\n    background-color: #fff;\n    border: 1px solid #ccc;\n    border: 1px solid rgba(0, 0, 0, 0.15);\n    border-radius: 0;\n    -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);\n    box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175);\n    background-clip: padding-box;\n}\n.dropdown-menu.pull-right {\n    right: 0;\n    left: auto;\n}\n.dropdown-menu .divider {\n    height: 1px;\n    margin: 9.5px 0;\n    overflow: hidden;\n    background-color: rgba(0, 0, 0, 0.2);\n}\n.dropdown-menu>li>a {\n    display: block;\n    padding: 3px 20px;\n    clear: both;\n    font-weight: normal;\n    line-height: 1.428571429;\n    color: #555;\n    white-space: nowrap;\n}\n.dropdown-menu>li>a:hover, .dropdown-menu>li>a:focus {\n    color: #262626;\n    text-decoration: none;\n    background-color: #eee;\n}\n.dropdown-menu>.active>a, .dropdown-menu>.active>a:hover, .dropdown-menu>.active>a:focus {\n    color: #fff;\n    text-decoration: none;\n    background-color: #008cba;\n    outline: 0;\n}\n.dropdown-menu>.disabled>a, .dropdown-menu>.disabled>a:hover, .dropdown-menu>.disabled>a:focus {\n    color: #999;\n}\n.dropdown-menu>.disabled>a:hover, .dropdown-menu>.disabled>a:focus {\n    text-decoration: none;\n    cursor: not-allowed;\n    background-color: transparent;\n    background-image: none;\n    filter: progid:DXImageTransform.Microsoft.gradient(enabled=false);\n}\n.open>.dropdown-menu {\n    display: block;\n}\n.open>a {\n    outline: 0;\n}\n.dropdown-header {\n    display: block;\n    padding: 3px 20px;\n    font-size: 12px;\n    line-height: 1.428571429;\n    color: #999;\n}\n.dropdown-backdrop {\n    position: fixed;\n    top: 0;\n    right: 0;\n    bottom: 0;\n    left: 0;\n    z-index: 990;\n}\n.pull-right>.dropdown-menu {\n    right: 0;\n    left: auto;\n}\n.dropup .caret, .navbar-fixed-bottom .dropdown .caret {\n    border-top: 0;\n    border-bottom: 4px solid;\n    content: \"\"}\n.dropup .dropdown-menu, .navbar-fixed-bottom .dropdown .dropdown-menu {\n    top: auto;\n    bottom: 100%;\n    margin-bottom: 1px;\n}\n@media(min-width:768px) {\n    .navbar-right .dropdown-menu {\n    right: 0;\n    left: auto;\n}\n}.btn-group, .btn-group-vertical {\n    position: relative;\n    display: inline-block;\n    vertical-align: middle;\n}\n.btn-group>.btn, .btn-group-vertical>.btn {\n    position: relative;\n    float: left;\n}\n.btn-group>.btn:hover, .btn-group-vertical>.btn:hover, .btn-group>.btn:focus, .btn-group-vertical>.btn:focus, .btn-group>.btn:active, .btn-group-vertical>.btn:active, .btn-group>.btn.active, .btn-group-vertical>.btn.active {\n    z-index: 2;\n}\n.btn-group>.btn:focus, .btn-group-vertical>.btn:focus {\n    outline: 0;\n}\n.btn-group .btn+.btn, .btn-group .btn+.btn-group, .btn-group .btn-group+.btn, .btn-group .btn-group+.btn-group {\n    margin-left: -1px;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar:before, .btn-toolbar:after {\n    display: table;\n    content: \" \"}\n.btn-toolbar:after {\n    clear: both;\n}\n.btn-toolbar .btn-group {\n    float: left;\n}\n.btn-toolbar>.btn+.btn, .btn-toolbar>.btn-group+.btn, .btn-toolbar>.btn+.btn-group, .btn-toolbar>.btn-group+.btn-group {\n    margin-left: 5px;\n}\n.btn-group>.btn:not(:first-child):not(:last-child):not(.dropdown-toggle) {\n    border-radius: 0;\n}\n.btn-group>.btn:first-child {\n    margin-left: 0;\n}\n.btn-group>.btn:first-child:not(:last-child):not(.dropdown-toggle) {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.btn-group>.btn:last-child:not(:first-child), .btn-group>.dropdown-toggle:not(:first-child) {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group>.btn-group {\n    float: left;\n}\n.btn-group>.btn-group:not(:first-child):not(:last-child)>.btn {\n    border-radius: 0;\n}\n.btn-group>.btn-group:first-child>.btn:last-child, .btn-group>.btn-group:first-child>.dropdown-toggle {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.btn-group>.btn-group:last-child>.btn:first-child {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group .dropdown-toggle:active, .btn-group.open .dropdown-toggle {\n    outline: 0;\n}\n.btn-group-xs>.btn {\n    padding: 1px 5px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-group-sm>.btn {\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\n.btn-group-lg>.btn {\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\n.btn-group>.btn+.dropdown-toggle {\n    padding-right: 8px;\n    padding-left: 8px;\n}\n.btn-group>.btn-lg+.dropdown-toggle {\n    padding-right: 12px;\n    padding-left: 12px;\n}\n.btn-group.open .dropdown-toggle {\n    -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n    box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);\n}\n.btn-group.open .dropdown-toggle.btn-link {\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.btn .caret {\n    margin-left: 0;\n}\n.btn-lg .caret {\n    border-width: 5px 5px 0;\n    border-bottom-width: 0;\n}\n.dropup .btn-lg .caret {\n    border-width: 0 5px 5px;\n}\n.btn-group-vertical>.btn, .btn-group-vertical>.btn-group, .btn-group-vertical>.btn-group>.btn {\n    display: block;\n    float: none;\n    width: 100%;\n    max-width: 100%}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group:before, .btn-group-vertical>.btn-group:after {\n    display: table;\n    content: \" \"}\n.btn-group-vertical>.btn-group:after {\n    clear: both;\n}\n.btn-group-vertical>.btn-group>.btn {\n    float: none;\n}\n.btn-group-vertical>.btn+.btn, .btn-group-vertical>.btn+.btn-group, .btn-group-vertical>.btn-group+.btn, .btn-group-vertical>.btn-group+.btn-group {\n    margin-top: -1px;\n    margin-left: 0;\n}\n.btn-group-vertical>.btn:not(:first-child):not(:last-child) {\n    border-radius: 0;\n}\n.btn-group-vertical>.btn:first-child:not(:last-child) {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.btn-group-vertical>.btn:last-child:not(:first-child) {\n    border-top-right-radius: 0;\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group-vertical>.btn-group:not(:first-child):not(:last-child)>.btn {\n    border-radius: 0;\n}\n.btn-group-vertical>.btn-group:first-child>.btn:last-child, .btn-group-vertical>.btn-group:first-child>.dropdown-toggle {\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.btn-group-vertical>.btn-group:last-child>.btn:first-child {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.btn-group-justified {\n    display: table;\n    width: 100%;\n    border-collapse: separate;\n    table-layout: fixed;\n}\n.btn-group-justified>.btn, .btn-group-justified>.btn-group {\n    display: table-cell;\n    float: none;\n    width: 1%}\n.btn-group-justified>.btn-group .btn {\n    width: 100%}\n[data-toggle=\"buttons\"]>.btn>input[type=\"radio\"], [data-toggle=\"buttons\"]>.btn>input[type=\"checkbox\"] {\n    display: none;\n}\n.input-group {\n    position: relative;\n    display: table;\n    border-collapse: separate;\n}\n.input-group[class*=\"col-\"] {\n    float: none;\n    padding-right: 0;\n    padding-left: 0;\n}\n.input-group .form-control {\n    width: 100%;\n    margin-bottom: 0;\n}\n.input-group-lg>.form-control, .input-group-lg>.input-group-addon, .input-group-lg>.input-group-btn>.btn {\n    height: 48px;\n    padding: 10px 16px;\n    font-size: 19px;\n    line-height: 1.33;\n    border-radius: 0;\n}\nselect.input-group-lg>.form-control, select.input-group-lg>.input-group-addon, select.input-group-lg>.input-group-btn>.btn {\n    height: 48px;\n    line-height: 48px;\n}\ntextarea.input-group-lg>.form-control, textarea.input-group-lg>.input-group-addon, textarea.input-group-lg>.input-group-btn>.btn {\n    height: auto;\n}\n.input-group-sm>.form-control, .input-group-sm>.input-group-addon, .input-group-sm>.input-group-btn>.btn {\n    height: 30px;\n    padding: 5px 10px;\n    font-size: 12px;\n    line-height: 1.5;\n    border-radius: 0;\n}\nselect.input-group-sm>.form-control, select.input-group-sm>.input-group-addon, select.input-group-sm>.input-group-btn>.btn {\n    height: 30px;\n    line-height: 30px;\n}\ntextarea.input-group-sm>.form-control, textarea.input-group-sm>.input-group-addon, textarea.input-group-sm>.input-group-btn>.btn {\n    height: auto;\n}\n.input-group-addon, .input-group-btn, .input-group .form-control {\n    display: table-cell;\n}\n.input-group-addon:not(:first-child):not(:last-child), .input-group-btn:not(:first-child):not(:last-child), .input-group .form-control:not(:first-child):not(:last-child) {\n    border-radius: 0;\n}\n.input-group-addon, .input-group-btn {\n    width: 1%;\n    white-space: nowrap;\n    vertical-align: middle;\n}\n.input-group-addon {\n    padding: 6px 12px;\n    font-size: 15px;\n    font-weight: normal;\n    line-height: 1;\n    color: #6f6f6f;\n    text-align: center;\n    background-color: #eee;\n    border: 1px solid #ccc;\n    border-radius: 0;\n}\n.input-group-addon.input-sm {\n    padding: 5px 10px;\n    font-size: 12px;\n    border-radius: 0;\n}\n.input-group-addon.input-lg {\n    padding: 10px 16px;\n    font-size: 19px;\n    border-radius: 0;\n}\n.input-group-addon input[type=\"radio\"], .input-group-addon input[type=\"checkbox\"] {\n    margin-top: 0;\n}\n.input-group .form-control:first-child, .input-group-addon:first-child, .input-group-btn:first-child>.btn, .input-group-btn:first-child>.dropdown-toggle, .input-group-btn:last-child>.btn:not(:last-child):not(.dropdown-toggle) {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.input-group-addon:first-child {\n    border-right: 0;\n}\n.input-group .form-control:last-child, .input-group-addon:last-child, .input-group-btn:last-child>.btn, .input-group-btn:last-child>.dropdown-toggle, .input-group-btn:first-child>.btn:not(:first-child) {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.input-group-addon:last-child {\n    border-left: 0;\n}\n.input-group-btn {\n    position: relative;\n    white-space: nowrap;\n}\n.input-group-btn:first-child>.btn {\n    margin-right: -1px;\n}\n.input-group-btn:last-child>.btn {\n    margin-left: -1px;\n}\n.input-group-btn>.btn {\n    position: relative;\n}\n.input-group-btn>.btn+.btn {\n    margin-left: -4px;\n}\n.input-group-btn>.btn:hover, .input-group-btn>.btn:active {\n    z-index: 2;\n}\n.nav {\n    font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    padding-left: 0;\n    margin-bottom: 0;\n    list-style: none;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav:before, .nav:after {\n    display: table;\n    content: \" \"}\n.nav:after {\n    clear: both;\n}\n.nav>li {\n    position: relative;\n    display: block;\n}\n.nav>li>a {\n    position: relative;\n    display: block;\n    padding: 10px 15px;\n}\n.nav>li>a:hover, .nav>li>a:focus {\n    text-decoration: none;\n    background-color: #eee;\n}\n.nav>li.disabled>a {\n    color: #999;\n}\n.nav>li.disabled>a:hover, .nav>li.disabled>a:focus {\n    color: #999;\n    text-decoration: none;\n    cursor: not-allowed;\n    background-color: transparent;\n}\n.nav .open>a, .nav .open>a:hover, .nav .open>a:focus {\n    background-color: #eee;\n    border-color: #008cba;\n}\n.nav .nav-divider {\n    height: 1px;\n    margin: 9.5px 0;\n    overflow: hidden;\n    background-color: #e5e5e5;\n}\n.nav>li>a>img {\n    max-width: none;\n}\n.nav-tabs {\n    border-bottom: 1px solid #ddd;\n}\n.nav-tabs>li {\n    float: left;\n    margin-bottom: -1px;\n}\n.nav-tabs>li>a {\n    margin-right: 2px;\n    line-height: 1.428571429;\n    border: 1px solid transparent;\n    border-radius: 0;\n}\n.nav-tabs>li>a:hover {\n    border-color: #eee #eee #ddd;\n}\n.nav-tabs>li.active>a, .nav-tabs>li.active>a:hover, .nav-tabs>li.active>a:focus {\n    color: #6f6f6f;\n    cursor: default;\n    background-color: #fff;\n    border: 1px solid #ddd;\n    border-bottom-color: transparent;\n}\n.nav-tabs.nav-justified {\n    width: 100%;\n    border-bottom: 0;\n}\n.nav-tabs.nav-justified>li {\n    float: none;\n}\n.nav-tabs.nav-justified>li>a {\n    margin-bottom: 5px;\n    text-align: center;\n}\n.nav-tabs.nav-justified>.dropdown .dropdown-menu {\n    top: auto;\n    left: auto;\n}\n@media(min-width:768px) {\n    .nav-tabs.nav-justified>li {\n    display: table-cell;\n    width: 1%}\n.nav-tabs.nav-justified>li>a {\n    margin-bottom: 0;\n}\n}.nav-tabs.nav-justified>li>a {\n    margin-right: 0;\n    border-radius: 0;\n}\n.nav-tabs.nav-justified>.active>a, .nav-tabs.nav-justified>.active>a:hover, .nav-tabs.nav-justified>.active>a:focus {\n    border: 1px solid #ddd;\n}\n@media(min-width:768px) {\n    .nav-tabs.nav-justified>li>a {\n    border-bottom: 1px solid #ddd;\n    border-radius: 0;\n}\n.nav-tabs.nav-justified>.active>a, .nav-tabs.nav-justified>.active>a:hover, .nav-tabs.nav-justified>.active>a:focus {\n    border-bottom-color: #fff;\n}\n}.nav-pills>li {\n    float: left;\n}\n.nav-pills>li>a {\n    border-radius: 0;\n}\n.nav-pills>li+li {\n    margin-left: 2px;\n}\n.nav-pills>li.active>a, .nav-pills>li.active>a:hover, .nav-pills>li.active>a:focus {\n    color: #fff;\n    background-color: #008cba;\n}\n.nav-stacked>li {\n    float: none;\n}\n.nav-stacked>li+li {\n    margin-top: 2px;\n    margin-left: 0;\n}\n.nav-justified {\n    width: 100%}\n.nav-justified>li {\n    float: none;\n}\n.nav-justified>li>a {\n    margin-bottom: 5px;\n    text-align: center;\n}\n.nav-justified>.dropdown .dropdown-menu {\n    top: auto;\n    left: auto;\n}\n@media(min-width:768px) {\n    .nav-justified>li {\n    display: table-cell;\n    width: 1%}\n.nav-justified>li>a {\n    margin-bottom: 0;\n}\n}.nav-tabs-justified {\n    border-bottom: 0;\n}\n.nav-tabs-justified>li>a {\n    margin-right: 0;\n    border-radius: 0;\n}\n.nav-tabs-justified>.active>a, .nav-tabs-justified>.active>a:hover, .nav-tabs-justified>.active>a:focus {\n    border: 1px solid #ddd;\n}\n@media(min-width:768px) {\n    .nav-tabs-justified>li>a {\n    border-bottom: 1px solid #ddd;\n    border-radius: 0;\n}\n.nav-tabs-justified>.active>a, .nav-tabs-justified>.active>a:hover, .nav-tabs-justified>.active>a:focus {\n    border-bottom-color: #fff;\n}\n}.tab-content>.tab-pane {\n    display: none;\n}\n.tab-content>.active {\n    display: block;\n}\n.nav-tabs .dropdown-menu {\n    margin-top: -1px;\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.navbar {\n    position: relative;\n    min-height: 45px;\n    margin-bottom: 21px;\n    border: 1px solid transparent;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n.navbar:before, .navbar:after {\n    display: table;\n    content: \" \"}\n.navbar:after {\n    clear: both;\n}\n@media(min-width:768px) {\n    .navbar {\n    border-radius: 0;\n}\n}.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n.navbar-header:before, .navbar-header:after {\n    display: table;\n    content: \" \"}\n.navbar-header:after {\n    clear: both;\n}\n@media(min-width:768px) {\n    .navbar-header {\n    float: left;\n}\n}.navbar-collapse {\n    max-height: 340px;\n    padding-right: 15px;\n    padding-left: 15px;\n    overflow-x: visible;\n    border-top: 1px solid transparent;\n    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1);\n    -webkit-overflow-scrolling: touch;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse:before, .navbar-collapse:after {\n    display: table;\n    content: \" \"}\n.navbar-collapse:after {\n    clear: both;\n}\n.navbar-collapse.in {\n    overflow-y: auto;\n}\n@media(min-width:768px) {\n    .navbar-collapse {\n    width: auto;\n    border-top: 0;\n    box-shadow: none;\n}\n.navbar-collapse.collapse {\n    display: block!important;\n    height: auto!important;\n    padding-bottom: 0;\n    overflow: visible!important;\n}\n.navbar-collapse.in {\n    overflow-y: visible;\n}\n.navbar-fixed-top .navbar-collapse, .navbar-static-top .navbar-collapse, .navbar-fixed-bottom .navbar-collapse {\n    padding-right: 0;\n    padding-left: 0;\n}\n}.container>.navbar-header, .container>.navbar-collapse {\n    margin-right: -15px;\n    margin-left: -15px;\n}\n@media(min-width:768px) {\n    .container>.navbar-header, .container>.navbar-collapse {\n    margin-right: 0;\n    margin-left: 0;\n}\n}.navbar-static-top {\n    z-index: 1000;\n    border-width: 0 0 1px;\n}\n@media(min-width:768px) {\n    .navbar-static-top {\n    border-radius: 0;\n}\n}.navbar-fixed-top, .navbar-fixed-bottom {\n    position: fixed;\n    right: 0;\n    left: 0;\n    z-index: 1030;\n}\n@media(min-width:768px) {\n    .navbar-fixed-top, .navbar-fixed-bottom {\n    border-radius: 0;\n}\n}.navbar-fixed-top {\n    top: 0;\n    border-width: 0 0 1px;\n}\n.navbar-fixed-bottom {\n    bottom: 0;\n    margin-bottom: 0;\n    border-width: 1px 0 0;\n}\n.navbar-brand {\n    font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    float: left;\n    padding: 12px 15px;\n    font-size: 19px;\n    line-height: 21px;\n}\n.navbar-brand:hover, .navbar-brand:focus {\n    text-decoration: none;\n}\n@media(min-width:768px) {\n    .navbar>.container .navbar-brand {\n    margin-left: -15px;\n}\n}.navbar-toggle {\n    position: relative;\n    float: right;\n    padding: 9px 10px;\n    margin-top: 5.5px;\n    margin-right: 15px;\n    margin-bottom: 5.5px;\n    background-color: transparent;\n    background-image: none;\n    border: 1px solid transparent;\n    border-radius: 0;\n}\n.navbar-toggle .icon-bar {\n    display: block;\n    width: 22px;\n    height: 2px;\n    border-radius: 1px;\n}\n.navbar-toggle .icon-bar+.icon-bar {\n    margin-top: 4px;\n}\n@media(min-width:768px) {\n    .navbar-toggle {\n    display: none;\n}\n}.navbar-nav {\n    margin: 6px -15px;\n}\n.navbar-nav>li>a {\n    padding-top: 10px;\n    padding-bottom: 10px;\n    line-height: 21px;\n}\n@media(max-width:767px) {\n    .navbar-nav .open .dropdown-menu {\n    position: static;\n    float: none;\n    width: auto;\n    margin-top: 0;\n    background-color: transparent;\n    border: 0;\n    box-shadow: none;\n}\n.navbar-nav .open .dropdown-menu>li>a, .navbar-nav .open .dropdown-menu .dropdown-header {\n    padding: 5px 15px 5px 25px;\n}\n.navbar-nav .open .dropdown-menu>li>a {\n    line-height: 21px;\n}\n.navbar-nav .open .dropdown-menu>li>a:hover, .navbar-nav .open .dropdown-menu>li>a:focus {\n    background-image: none;\n}\n}@media(min-width:768px) {\n    .navbar-nav {\n    float: left;\n    margin: 0;\n}\n.navbar-nav>li {\n    float: left;\n}\n.navbar-nav>li>a {\n    padding-top: 12px;\n    padding-bottom: 12px;\n}\n.navbar-nav.navbar-right:last-child {\n    margin-right: -15px;\n}\n}@media(min-width:768px) {\n    .navbar-left {\n    float: left!important;\n}\n.navbar-right {\n    float: right!important;\n}\n}.navbar-form {\n    padding: 10px 15px;\n    margin-top: 5px;\n    margin-right: -15px;\n    margin-bottom: 5px;\n    margin-left: -15px;\n    border-top: 1px solid transparent;\n    border-bottom: 1px solid transparent;\n    -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 1px 0 rgba(255, 255, 255, 0.1);\n    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 1px 0 rgba(255, 255, 255, 0.1);\n}\n@media(min-width:768px) {\n    .navbar-form .form-group {\n    display: inline-block;\n    margin-bottom: 0;\n    vertical-align: middle;\n}\n.navbar-form .form-control {\n    display: inline-block;\n}\n.navbar-form select.form-control {\n    width: auto;\n}\n.navbar-form .radio, .navbar-form .checkbox {\n    display: inline-block;\n    padding-left: 0;\n    margin-top: 0;\n    margin-bottom: 0;\n}\n.navbar-form .radio input[type=\"radio\"], .navbar-form .checkbox input[type=\"checkbox\"] {\n    float: none;\n    margin-left: 0;\n}\n}@media(max-width:767px) {\n    .navbar-form .form-group {\n    margin-bottom: 5px;\n}\n}@media(min-width:768px) {\n    .navbar-form {\n    width: auto;\n    padding-top: 0;\n    padding-bottom: 0;\n    margin-right: 0;\n    margin-left: 0;\n    border: 0;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.navbar-form.navbar-right:last-child {\n    margin-right: -15px;\n}\n}.navbar-nav>li>.dropdown-menu {\n    margin-top: 0;\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.navbar-fixed-bottom .navbar-nav>li>.dropdown-menu {\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.navbar-nav.pull-right>li>.dropdown-menu, .navbar-nav>li>.dropdown-menu.pull-right {\n    right: 0;\n    left: auto;\n}\n.navbar-btn {\n    margin-top: 5px;\n    margin-bottom: 5px;\n}\n.navbar-btn.btn-sm {\n    margin-top: 7.5px;\n    margin-bottom: 7.5px;\n}\n.navbar-btn.btn-xs {\n    margin-top: 11.5px;\n    margin-bottom: 11.5px;\n}\n.navbar-text {\n    margin-top: 12px;\n    margin-bottom: 12px;\n}\n@media(min-width:768px) {\n    .navbar-text {\n    float: left;\n    margin-right: 15px;\n    margin-left: 15px;\n}\n.navbar-text.navbar-right:last-child {\n    margin-right: 0;\n}\n}.navbar-default {\n    background-color: #333;\n    border-color: #222;\n}\n.navbar-default .navbar-brand {\n    color: #fff;\n}\n.navbar-default .navbar-brand:hover, .navbar-default .navbar-brand:focus {\n    color: #fff;\n    background-color: transparent;\n}\n.navbar-default .navbar-text {\n    color: #fff;\n}\n.navbar-default .navbar-nav>li>a {\n    color: #fff;\n}\n.navbar-default .navbar-nav>li>a:hover, .navbar-default .navbar-nav>li>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav>.active>a, .navbar-default .navbar-nav>.active>a:hover, .navbar-default .navbar-nav>.active>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav>.disabled>a, .navbar-default .navbar-nav>.disabled>a:hover, .navbar-default .navbar-nav>.disabled>a:focus {\n    color: #ccc;\n    background-color: transparent;\n}\n.navbar-default .navbar-toggle {\n    border-color: transparent;\n}\n.navbar-default .navbar-toggle:hover, .navbar-default .navbar-toggle:focus {\n    background-color: transparent;\n}\n.navbar-default .navbar-toggle .icon-bar {\n    background-color: #fff;\n}\n.navbar-default .navbar-collapse, .navbar-default .navbar-form {\n    border-color: #222;\n}\n.navbar-default .navbar-nav>.open>a, .navbar-default .navbar-nav>.open>a:hover, .navbar-default .navbar-nav>.open>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n@media(max-width:767px) {\n    .navbar-default .navbar-nav .open .dropdown-menu>li>a {\n    color: #fff;\n}\n.navbar-default .navbar-nav .open .dropdown-menu>li>a:hover, .navbar-default .navbar-nav .open .dropdown-menu>li>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav .open .dropdown-menu>.active>a, .navbar-default .navbar-nav .open .dropdown-menu>.active>a:hover, .navbar-default .navbar-nav .open .dropdown-menu>.active>a:focus {\n    color: #fff;\n    background-color: #272727;\n}\n.navbar-default .navbar-nav .open .dropdown-menu>.disabled>a, .navbar-default .navbar-nav .open .dropdown-menu>.disabled>a:hover, .navbar-default .navbar-nav .open .dropdown-menu>.disabled>a:focus {\n    color: #ccc;\n    background-color: transparent;\n}\n}.navbar-default .navbar-link {\n    color: #fff;\n}\n.navbar-default .navbar-link:hover {\n    color: #fff;\n}\n.navbar-inverse {\n    background-color: #008cba;\n    border-color: #006687;\n}\n.navbar-inverse .navbar-brand {\n    color: #fff;\n}\n.navbar-inverse .navbar-brand:hover, .navbar-inverse .navbar-brand:focus {\n    color: #fff;\n    background-color: transparent;\n}\n.navbar-inverse .navbar-text {\n    color: #fff;\n}\n.navbar-inverse .navbar-nav>li>a {\n    color: #fff;\n}\n.navbar-inverse .navbar-nav>li>a:hover, .navbar-inverse .navbar-nav>li>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav>.active>a, .navbar-inverse .navbar-nav>.active>a:hover, .navbar-inverse .navbar-nav>.active>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav>.disabled>a, .navbar-inverse .navbar-nav>.disabled>a:hover, .navbar-inverse .navbar-nav>.disabled>a:focus {\n    color: #444;\n    background-color: transparent;\n}\n.navbar-inverse .navbar-toggle {\n    border-color: transparent;\n}\n.navbar-inverse .navbar-toggle:hover, .navbar-inverse .navbar-toggle:focus {\n    background-color: transparent;\n}\n.navbar-inverse .navbar-toggle .icon-bar {\n    background-color: #fff;\n}\n.navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form {\n    border-color: #007196;\n}\n.navbar-inverse .navbar-nav>.open>a, .navbar-inverse .navbar-nav>.open>a:hover, .navbar-inverse .navbar-nav>.open>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n@media(max-width:767px) {\n    .navbar-inverse .navbar-nav .open .dropdown-menu>.dropdown-header {\n    border-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu .divider {\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>li>a {\n    color: #fff;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>li>a:hover, .navbar-inverse .navbar-nav .open .dropdown-menu>li>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>.active>a, .navbar-inverse .navbar-nav .open .dropdown-menu>.active>a:hover, .navbar-inverse .navbar-nav .open .dropdown-menu>.active>a:focus {\n    color: #fff;\n    background-color: #006687;\n}\n.navbar-inverse .navbar-nav .open .dropdown-menu>.disabled>a, .navbar-inverse .navbar-nav .open .dropdown-menu>.disabled>a:hover, .navbar-inverse .navbar-nav .open .dropdown-menu>.disabled>a:focus {\n    color: #444;\n    background-color: transparent;\n}\n}.navbar-inverse .navbar-link {\n    color: #fff;\n}\n.navbar-inverse .navbar-link:hover {\n    color: #fff;\n}\n.breadcrumb {\n    padding: 8px 15px;\n    margin-bottom: 21px;\n    list-style: none;\n    background-color: #f5f5f5;\n    border-radius: 0;\n}\n.breadcrumb>li {\n    display: inline-block;\n}\n.breadcrumb>li+li:before {\n    padding: 0 5px;\n    color: #999;\n    content: \"/\\00a0\"}\n.breadcrumb>.active {\n    color: #333;\n}\n.pagination {\n    display: inline-block;\n    padding-left: 0;\n    margin: 21px 0;\n    border-radius: 0;\n}\n.pagination>li {\n    display: inline;\n}\n.pagination>li>a, .pagination>li>span {\n    position: relative;\n    float: left;\n    padding: 6px 12px;\n    margin-left: -1px;\n    line-height: 1.428571429;\n    text-decoration: none;\n    background-color: transparent;\n    border: 1px solid transparent;\n}\n.pagination>li:first-child>a, .pagination>li:first-child>span {\n    margin-left: 0;\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.pagination>li:last-child>a, .pagination>li:last-child>span {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.pagination>li>a:hover, .pagination>li>span:hover, .pagination>li>a:focus, .pagination>li>span:focus {\n    background-color: #eee;\n}\n.pagination>.active>a, .pagination>.active>span, .pagination>.active>a:hover, .pagination>.active>span:hover, .pagination>.active>a:focus, .pagination>.active>span:focus {\n    z-index: 2;\n    color: #fff;\n    cursor: default;\n    background-color: #008cba;\n    border-color: #008cba;\n}\n.pagination>.disabled>span, .pagination>.disabled>span:hover, .pagination>.disabled>span:focus, .pagination>.disabled>a, .pagination>.disabled>a:hover, .pagination>.disabled>a:focus {\n    color: #999;\n    cursor: not-allowed;\n    background-color: transparent;\n    border-color: transparent;\n}\n.pagination-lg>li>a, .pagination-lg>li>span {\n    padding: 10px 16px;\n    font-size: 19px;\n}\n.pagination-lg>li:first-child>a, .pagination-lg>li:first-child>span {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.pagination-lg>li:last-child>a, .pagination-lg>li:last-child>span {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.pagination-sm>li>a, .pagination-sm>li>span {\n    padding: 5px 10px;\n    font-size: 12px;\n}\n.pagination-sm>li:first-child>a, .pagination-sm>li:first-child>span {\n    border-bottom-left-radius: 0;\n    border-top-left-radius: 0;\n}\n.pagination-sm>li:last-child>a, .pagination-sm>li:last-child>span {\n    border-top-right-radius: 0;\n    border-bottom-right-radius: 0;\n}\n.pager {\n    padding-left: 0;\n    margin: 21px 0;\n    text-align: center;\n    list-style: none;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager:before, .pager:after {\n    display: table;\n    content: \" \"}\n.pager:after {\n    clear: both;\n}\n.pager li {\n    display: inline;\n}\n.pager li>a, .pager li>span {\n    display: inline-block;\n    padding: 5px 14px;\n    background-color: transparent;\n    border: 1px solid transparent;\n    border-radius: 3px;\n}\n.pager li>a:hover, .pager li>a:focus {\n    text-decoration: none;\n    background-color: #eee;\n}\n.pager .next>a, .pager .next>span {\n    float: right;\n}\n.pager .previous>a, .pager .previous>span {\n    float: left;\n}\n.pager .disabled>a, .pager .disabled>a:hover, .pager .disabled>a:focus, .pager .disabled>span {\n    color: #999;\n    cursor: not-allowed;\n    background-color: transparent;\n}\n.label {\n    display: inline;\n    padding: .2em .6em .3em;\n    font-size: 75%;\n    font-weight: bold;\n    line-height: 1;\n    color: #fff;\n    text-align: center;\n    white-space: nowrap;\n    vertical-align: baseline;\n    border-radius: .25em;\n}\n.label[href]:hover, .label[href]:focus {\n    color: #fff;\n    text-decoration: none;\n    cursor: pointer;\n}\n.label:empty {\n    display: none;\n}\n.btn .label {\n    position: relative;\n    top: -1px;\n}\n.label-default {\n    background-color: #999;\n}\n.label-default[href]:hover, .label-default[href]:focus {\n    background-color: #808080;\n}\n.label-primary {\n    background-color: #008cba;\n}\n.label-primary[href]:hover, .label-primary[href]:focus {\n    background-color: #006687;\n}\n.label-success {\n    background-color: #43ac6a;\n}\n.label-success[href]:hover, .label-success[href]:focus {\n    background-color: #358753;\n}\n.label-info {\n    background-color: #5bc0de;\n}\n.label-info[href]:hover, .label-info[href]:focus {\n    background-color: #31b0d5;\n}\n.label-warning {\n    background-color: #e99002;\n}\n.label-warning[href]:hover, .label-warning[href]:focus {\n    background-color: #b67102;\n}\n.label-danger {\n    background-color: #f04124;\n}\n.label-danger[href]:hover, .label-danger[href]:focus {\n    background-color: #d32a0e;\n}\n.badge {\n    display: inline-block;\n    min-width: 10px;\n    padding: 3px 7px;\n    font-size: 12px;\n    font-weight: bold;\n    line-height: 1;\n    color: #777;\n    text-align: center;\n    white-space: nowrap;\n    vertical-align: baseline;\n    background-color: #e7e7e7;\n    border-radius: 10px;\n}\n.badge:empty {\n    display: none;\n}\n.btn .badge {\n    position: relative;\n    top: -1px;\n}\na.badge:hover, a.badge:focus {\n    color: #fff;\n    text-decoration: none;\n    cursor: pointer;\n}\na.list-group-item.active>.badge, .nav-pills>.active>a>.badge {\n    color: #008cba;\n    background-color: #fff;\n}\n.nav-pills>li>a>.badge {\n    margin-left: 3px;\n}\n.jumbotron {\n    padding: 30px;\n    margin-bottom: 30px;\n    font-size: 23px;\n    font-weight: 200;\n    line-height: 2.1428571435;\n    color: inherit;\n    background-color: #fafafa;\n}\n.jumbotron h1, .jumbotron .h1 {\n    line-height: 1;\n    color: inherit;\n}\n.jumbotron p {\n    line-height: 1.4;\n}\n.container .jumbotron {\n    border-radius: 0;\n}\n.jumbotron .container {\n    max-width: 100%}\n@media screen and (min-width:768px) {\n    .jumbotron {\n    padding-top: 48px;\n    padding-bottom: 48px;\n}\n.container .jumbotron {\n    padding-right: 60px;\n    padding-left: 60px;\n}\n.jumbotron h1, .jumbotron .h1 {\n    font-size: 67.5px;\n}\n}.thumbnail {\n    display: block;\n    padding: 4px;\n    margin-bottom: 21px;\n    line-height: 1.428571429;\n    background-color: #fff;\n    border: 1px solid #ddd;\n    border-radius: 0;\n    -webkit-transition: all .2s ease-in-out;\n    transition: all .2s ease-in-out;\n}\n.thumbnail>img, .thumbnail a>img {\n    display: block;\n    height: auto;\n    max-width: 100%;\n    margin-right: auto;\n    margin-left: auto;\n}\na.thumbnail:hover, a.thumbnail:focus, a.thumbnail.active {\n    border-color: #008cba;\n}\n.thumbnail .caption {\n    padding: 9px;\n    color: #222;\n}\n.alert {\n    position: relative;\n    padding: 0.75rem 1.25rem;\n    margin-bottom: 1rem;\n    border: 1px solid transparent;\n    border-radius: 0.25rem;\n  }\n  \n  .alert-heading {\n    color: inherit;\n  }\n  \n  .alert-link {\n    font-weight: 700;\n  }\n  \n  .alert-dismissible {\n    padding-right: 4rem;\n  }\n  \n  .alert-dismissible .close {\n    position: absolute;\n    top: 0;\n    right: 0;\n    padding: 0.75rem 1.25rem;\n    color: inherit;\n  }\n  \n  .alert-primary {\n    color: #004085;\n    background-color: #cce5ff;\n    border-color: #b8daff;\n  }\n  \n  .alert-primary hr {\n    border-top-color: #9fcdff;\n  }\n  \n  .alert-primary .alert-link {\n    color: #002752;\n  }\n  \n  .alert-secondary {\n    color: #383d41;\n    background-color: #e2e3e5;\n    border-color: #d6d8db;\n  }\n  \n  .alert-secondary hr {\n    border-top-color: #c8cbcf;\n  }\n  \n  .alert-secondary .alert-link {\n    color: #202326;\n  }\n  \n  .alert-success {\n    color: #155724;\n    background-color: #d4edda;\n    border-color: #c3e6cb;\n  }\n  \n  .alert-success hr {\n    border-top-color: #b1dfbb;\n  }\n  \n  .alert-success .alert-link {\n    color: #0b2e13;\n  }\n  \n  .alert-info {\n    color: #0c5460;\n    background-color: #d1ecf1;\n    border-color: #bee5eb;\n  }\n  \n  .alert-info hr {\n    border-top-color: #abdde5;\n  }\n  \n  .alert-info .alert-link {\n    color: #062c33;\n  }\n  \n  .alert-warning {\n    color: #856404;\n    background-color: #fff3cd;\n    border-color: #ffeeba;\n  }\n  \n  .alert-warning hr {\n    border-top-color: #ffe8a1;\n  }\n  \n  .alert-warning .alert-link {\n    color: #533f03;\n  }\n  \n  .alert-danger {\n    color: #721c24;\n    background-color: #f8d7da;\n    border-color: #f5c6cb;\n  }\n  \n  .alert-danger hr {\n    border-top-color: #f1b0b7;\n  }\n  \n  .alert-danger .alert-link {\n    color: #491217;\n  }\n  \n  .alert-light {\n    color: #818182;\n    background-color: #fefefe;\n    border-color: #fdfdfe;\n  }\n  \n  .alert-light hr {\n    border-top-color: #ececf6;\n  }\n  \n  .alert-light .alert-link {\n    color: #686868;\n  }\n  \n  .alert-dark {\n    color: #1b1e21;\n    background-color: #d6d8d9;\n    border-color: #c6c8ca;\n  }\n  \n  .alert-dark hr {\n    border-top-color: #b9bbbe;\n  }\n  \n  .alert-dark .alert-link {\n    color: #040505;\n  }\n\n\n@-webkit-keyframes progress-bar-stripes {\n    from {\n    background-position: 40px 0;\n}\nto {\n    background-position: 0 0;\n}\n}@keyframes progress-bar-stripes {\n    from {\n    background-position: 40px 0;\n}\nto {\n    background-position: 0 0;\n}\n}.progress {\n    height: 21px;\n    margin-bottom: 21px;\n    overflow: hidden;\n    background-color: #f5f5f5;\n    border-radius: 0;\n    -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);\n    box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1);\n}\n.progress-bar {\n    float: left;\n    width: 0;\n    height: 100%;\n    font-size: 12px;\n    line-height: 21px;\n    color: #fff;\n    text-align: center;\n    background-color: #008cba;\n    -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15);\n    box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15);\n    -webkit-transition: width .6s ease;\n    transition: width .6s ease;\n}\n.progress-striped .progress-bar {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-size: 40px 40px;\n}\n.progress.active .progress-bar {\n    -webkit-animation: progress-bar-stripes 2s linear infinite;\n    animation: progress-bar-stripes 2s linear infinite;\n}\n.progress-bar-success {\n    background-color: #43ac6a;\n}\n.progress-striped .progress-bar-success {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.progress-bar-info {\n    background-color: #5bc0de;\n}\n.progress-striped .progress-bar-info {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.progress-bar-warning {\n    background-color: #e99002;\n}\n.progress-striped .progress-bar-warning {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.progress-bar-danger {\n    background-color: #f04124;\n}\n.progress-striped .progress-bar-danger {\n    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);\n}\n.media, .media-body {\n    overflow: hidden;\n    zoom: 1;\n}\n.media, .media .media {\n    margin-top: 15px;\n}\n.media:first-child {\n    margin-top: 0;\n}\n.media-object {\n    display: block;\n}\n.media-heading {\n    margin: 0 0 5px;\n}\n.media>.pull-left {\n    margin-right: 10px;\n}\n.media>.pull-right {\n    margin-left: 10px;\n}\n.media-list {\n    padding-left: 0;\n    list-style: none;\n}\n.list-group {\n    padding-left: 0;\n    margin-bottom: 20px;\n}\n.list-group-item {\n    position: relative;\n    display: block;\n    padding: 10px 15px;\n    margin-bottom: -1px;\n    background-color: #fff;\n    border: 1px solid #ddd;\n}\n.list-group-item:first-child {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.list-group-item:last-child {\n    margin-bottom: 0;\n    border-bottom-right-radius: 0;\n    border-bottom-left-radius: 0;\n}\n.list-group-item>.badge {\n    float: right;\n}\n.list-group-item>.badge+.badge {\n    margin-right: 5px;\n}\na.list-group-item {\n    color: #555;\n}\na.list-group-item .list-group-item-heading {\n    color: #333;\n}\na.list-group-item:hover, a.list-group-item:focus {\n    text-decoration: none;\n    background-color: #f5f5f5;\n}\na.list-group-item.active, a.list-group-item.active:hover, a.list-group-item.active:focus {\n    z-index: 2;\n    color: #fff;\n    background-color: #008cba;\n    border-color: #008cba;\n}\na.list-group-item.active .list-group-item-heading, a.list-group-item.active:hover .list-group-item-heading, a.list-group-item.active:focus .list-group-item-heading {\n    color: inherit;\n}\na.list-group-item.active .list-group-item-text, a.list-group-item.active:hover .list-group-item-text, a.list-group-item.active:focus .list-group-item-text {\n    color: #87e1ff;\n}\n.list-group-item-heading {\n    margin-top: 0;\n    margin-bottom: 5px;\n}\n.list-group-item-text {\n    margin-bottom: 0;\n    line-height: 1.3;\n}\n.panel {\n    margin-bottom: 21px;\n    background-color: #fff;\n    border: 1px solid transparent;\n    border-radius: 0;\n    -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);\n    box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05);\n}\n.panel-body {\n    padding: 15px;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel-body:before, .panel-body:after {\n    display: table;\n    content: \" \"}\n.panel-body:after {\n    clear: both;\n}\n.panel>.list-group {\n    margin-bottom: 0;\n}\n.panel>.list-group .list-group-item {\n    border-width: 1px 0;\n}\n.panel>.list-group .list-group-item:first-child {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.panel>.list-group .list-group-item:last-child {\n    border-bottom: 0;\n}\n.panel-heading+.list-group .list-group-item:first-child {\n    border-top-width: 0;\n}\n.panel>.table, .panel>.table-responsive>.table {\n    margin-bottom: 0;\n}\n.panel>.panel-body+.table, .panel>.panel-body+.table-responsive {\n    border-top: 1px solid #ddd;\n}\n.panel>.table>tbody:first-child th, .panel>.table>tbody:first-child td {\n    border-top: 0;\n}\n.panel>.table-bordered, .panel>.table-responsive>.table-bordered {\n    border: 0;\n}\n.panel>.table-bordered>thead>tr>th:first-child, .panel>.table-responsive>.table-bordered>thead>tr>th:first-child, .panel>.table-bordered>tbody>tr>th:first-child, .panel>.table-responsive>.table-bordered>tbody>tr>th:first-child, .panel>.table-bordered>tfoot>tr>th:first-child, .panel>.table-responsive>.table-bordered>tfoot>tr>th:first-child, .panel>.table-bordered>thead>tr>td:first-child, .panel>.table-responsive>.table-bordered>thead>tr>td:first-child, .panel>.table-bordered>tbody>tr>td:first-child, .panel>.table-responsive>.table-bordered>tbody>tr>td:first-child, .panel>.table-bordered>tfoot>tr>td:first-child, .panel>.table-responsive>.table-bordered>tfoot>tr>td:first-child {\n    border-left: 0;\n}\n.panel>.table-bordered>thead>tr>th:last-child, .panel>.table-responsive>.table-bordered>thead>tr>th:last-child, .panel>.table-bordered>tbody>tr>th:last-child, .panel>.table-responsive>.table-bordered>tbody>tr>th:last-child, .panel>.table-bordered>tfoot>tr>th:last-child, .panel>.table-responsive>.table-bordered>tfoot>tr>th:last-child, .panel>.table-bordered>thead>tr>td:last-child, .panel>.table-responsive>.table-bordered>thead>tr>td:last-child, .panel>.table-bordered>tbody>tr>td:last-child, .panel>.table-responsive>.table-bordered>tbody>tr>td:last-child, .panel>.table-bordered>tfoot>tr>td:last-child, .panel>.table-responsive>.table-bordered>tfoot>tr>td:last-child {\n    border-right: 0;\n}\n.panel>.table-bordered>thead>tr:last-child>th, .panel>.table-responsive>.table-bordered>thead>tr:last-child>th, .panel>.table-bordered>tbody>tr:last-child>th, .panel>.table-responsive>.table-bordered>tbody>tr:last-child>th, .panel>.table-bordered>tfoot>tr:last-child>th, .panel>.table-responsive>.table-bordered>tfoot>tr:last-child>th, .panel>.table-bordered>thead>tr:last-child>td, .panel>.table-responsive>.table-bordered>thead>tr:last-child>td, .panel>.table-bordered>tbody>tr:last-child>td, .panel>.table-responsive>.table-bordered>tbody>tr:last-child>td, .panel>.table-bordered>tfoot>tr:last-child>td, .panel>.table-responsive>.table-bordered>tfoot>tr:last-child>td {\n    border-bottom: 0;\n}\n.panel>.table-responsive {\n    margin-bottom: 0;\n    border: 0;\n}\n.panel-heading {\n    padding: 10px 15px;\n    border-bottom: 1px solid transparent;\n    border-top-right-radius: -1;\n    border-top-left-radius: -1;\n}\n.panel-heading>.dropdown .dropdown-toggle {\n    color: inherit;\n}\n.panel-title {\n    margin-top: 0;\n    margin-bottom: 0;\n    font-size: 17px;\n    color: inherit;\n}\n.panel-title>a {\n    color: inherit;\n}\n.panel-footer {\n    padding: 10px 15px;\n    background-color: #f5f5f5;\n    border-top: 1px solid #ddd;\n    border-bottom-right-radius: -1;\n    border-bottom-left-radius: -1;\n}\n.panel-group .panel {\n    margin-bottom: 0;\n    overflow: hidden;\n    border-radius: 0;\n}\n.panel-group .panel+.panel {\n    margin-top: 5px;\n}\n.panel-group .panel-heading {\n    border-bottom: 0;\n}\n.panel-group .panel-heading+.panel-collapse .panel-body {\n    border-top: 1px solid #ddd;\n}\n.panel-group .panel-footer {\n    border-top: 0;\n}\n.panel-group .panel-footer+.panel-collapse .panel-body {\n    border-bottom: 1px solid #ddd;\n}\n.panel-default {\n    border-color: #ddd;\n}\n.panel-default>.panel-heading {\n    color: #333;\n    background-color: #f5f5f5;\n    border-color: #ddd;\n}\n.panel-default>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #ddd;\n}\n.panel-default>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #ddd;\n}\n.panel-primary {\n    border-color: #008cba;\n}\n.panel-primary>.panel-heading {\n    color: #fff;\n    background-color: #008cba;\n    border-color: #008cba;\n}\n.panel-primary>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #008cba;\n}\n.panel-primary>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #008cba;\n}\n.panel-success {\n    border-color: #3c9a5f;\n}\n.panel-success>.panel-heading {\n    color: #43ac6a;\n    background-color: #dff0d8;\n    border-color: #3c9a5f;\n}\n.panel-success>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #3c9a5f;\n}\n.panel-success>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #3c9a5f;\n}\n.panel-warning {\n    border-color: #d08002;\n}\n.panel-warning>.panel-heading {\n    color: #e99002;\n    background-color: #fcf8e3;\n    border-color: #d08002;\n}\n.panel-warning>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #d08002;\n}\n.panel-warning>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #d08002;\n}\n.panel-danger {\n    border-color: #ea2f10;\n}\n.panel-danger>.panel-heading {\n    color: #f04124;\n    background-color: #f2dede;\n    border-color: #ea2f10;\n}\n.panel-danger>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #ea2f10;\n}\n.panel-danger>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #ea2f10;\n}\n.panel-info {\n    border-color: #3db5d8;\n}\n.panel-info>.panel-heading {\n    color: #5bc0de;\n    background-color: #d9edf7;\n    border-color: #3db5d8;\n}\n.panel-info>.panel-heading+.panel-collapse .panel-body {\n    border-top-color: #3db5d8;\n}\n.panel-info>.panel-footer+.panel-collapse .panel-body {\n    border-bottom-color: #3db5d8;\n}\n.well {\n    min-height: 20px;\n    padding: 19px;\n    margin-bottom: 20px;\n    background-color: #fafafa;\n    border: 1px solid #e8e8e8;\n    border-radius: 0;\n    -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05);\n    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05);\n}\n.well blockquote {\n    border-color: #ddd;\n    border-color: rgba(0, 0, 0, 0.15);\n}\n.well-lg {\n    padding: 24px;\n    border-radius: 0;\n}\n.well-sm {\n    padding: 9px;\n    border-radius: 0;\n}\n.close {\n    float: right;\n    font-size: 22.5px;\n    font-weight: bold;\n    line-height: 1;\n    color: #000;\n    text-shadow: 0 1px 0 #fff;\n    opacity: .2;\n    filter: alpha(opacity=20);\n}\n.close:hover, .close:focus {\n    color: #000;\n    text-decoration: none;\n    cursor: pointer;\n    opacity: .5;\n    filter: alpha(opacity=50);\n}\nbutton.close {\n    padding: 0;\n    cursor: pointer;\n    background: transparent;\n    border: 0;\n    -webkit-appearance: none;\n}\n.modal-open {\n    overflow: hidden;\n}\n.modal {\n    position: fixed;\n    top: 0;\n    right: 0;\n    bottom: 0;\n    left: 0;\n    z-index: 1040;\n    display: none;\n    overflow: auto;\n    overflow-y: scroll;\n}\n.modal.fade .modal-dialog {\n    -webkit-transform: translate(0, -25%);\n    -ms-transform: translate(0, -25%);\n    transform: translate(0, -25%);\n    -webkit-transition: -webkit-transform .3s ease-out;\n    -moz-transition: -moz-transform .3s ease-out;\n    -o-transition: -o-transform .3s ease-out;\n    transition: transform .3s ease-out;\n}\n.modal.in .modal-dialog {\n    -webkit-transform: translate(0, 0);\n    -ms-transform: translate(0, 0);\n    transform: translate(0, 0);\n}\n.modal-dialog {\n    position: relative;\n    z-index: 1050;\n    width: auto;\n    margin: 10px;\n}\n.modal-content {\n    position: relative;\n    background-color: #fff;\n    border: 1px solid #999;\n    border: 1px solid rgba(0, 0, 0, 0.2);\n    border-radius: 0;\n    outline: 0;\n    -webkit-box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5);\n    box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5);\n    background-clip: padding-box;\n}\n.modal-backdrop {\n    position: fixed;\n    top: 0;\n    right: 0;\n    bottom: 0;\n    left: 0;\n    z-index: 1030;\n    background-color: #000;\n}\n.modal-backdrop.fade {\n    opacity: 0;\n    filter: alpha(opacity=0);\n}\n.modal-backdrop.in {\n    opacity: .5;\n    filter: alpha(opacity=50);\n}\n.modal-header {\n    min-height: 16.428571429px;\n    padding: 15px;\n    border-bottom: 1px solid #e5e5e5;\n}\n.modal-header .close {\n    margin-top: -2px;\n}\n.modal-title {\n    margin: 0;\n    line-height: 1.428571429;\n}\n.modal-body {\n    position: relative;\n    padding: 20px;\n}\n.modal-footer {\n    padding: 19px 20px 20px;\n    margin-top: 15px;\n    text-align: right;\n    border-top: 1px solid #e5e5e5;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer:before, .modal-footer:after {\n    display: table;\n    content: \" \"}\n.modal-footer:after {\n    clear: both;\n}\n.modal-footer .btn+.btn {\n    margin-bottom: 0;\n    margin-left: 5px;\n}\n.modal-footer .btn-group .btn+.btn {\n    margin-left: -1px;\n}\n.modal-footer .btn-block+.btn-block {\n    margin-left: 0;\n}\n@media screen and (min-width:768px) {\n    .modal-dialog {\n    width: 600px;\n    margin: 30px auto;\n}\n.modal-content {\n    -webkit-box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5);\n    box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5);\n}\n}.tooltip {\n    position: absolute;\n    z-index: 1030;\n    display: block;\n    font-size: 12px;\n    line-height: 1.4;\n    opacity: 0;\n    filter: alpha(opacity=0);\n    visibility: visible;\n}\n.tooltip.in {\n    opacity: .9;\n    filter: alpha(opacity=90);\n}\n.tooltip.top {\n    padding: 5px 0;\n    margin-top: -3px;\n}\n.tooltip.right {\n    padding: 0 5px;\n    margin-left: 3px;\n}\n.tooltip.bottom {\n    padding: 5px 0;\n    margin-top: 3px;\n}\n.tooltip.left {\n    padding: 0 5px;\n    margin-left: -3px;\n}\n.tooltip-inner {\n    max-width: 200px;\n    padding: 3px 8px;\n    color: #fff;\n    text-align: center;\n    text-decoration: none;\n    background-color: #333;\n    border-radius: 0;\n}\n.tooltip-arrow {\n    position: absolute;\n    width: 0;\n    height: 0;\n    border-color: transparent;\n    border-style: solid;\n}\n.tooltip.top .tooltip-arrow {\n    bottom: 0;\n    left: 50%;\n    margin-left: -5px;\n    border-top-color: #333;\n    border-width: 5px 5px 0;\n}\n.tooltip.top-left .tooltip-arrow {\n    bottom: 0;\n    left: 5px;\n    border-top-color: #333;\n    border-width: 5px 5px 0;\n}\n.tooltip.top-right .tooltip-arrow {\n    right: 5px;\n    bottom: 0;\n    border-top-color: #333;\n    border-width: 5px 5px 0;\n}\n.tooltip.right .tooltip-arrow {\n    top: 50%;\n    left: 0;\n    margin-top: -5px;\n    border-right-color: #333;\n    border-width: 5px 5px 5px 0;\n}\n.tooltip.left .tooltip-arrow {\n    top: 50%;\n    right: 0;\n    margin-top: -5px;\n    border-left-color: #333;\n    border-width: 5px 0 5px 5px;\n}\n.tooltip.bottom .tooltip-arrow {\n    top: 0;\n    left: 50%;\n    margin-left: -5px;\n    border-bottom-color: #333;\n    border-width: 0 5px 5px;\n}\n.tooltip.bottom-left .tooltip-arrow {\n    top: 0;\n    left: 5px;\n    border-bottom-color: #333;\n    border-width: 0 5px 5px;\n}\n.tooltip.bottom-right .tooltip-arrow {\n    top: 0;\n    right: 5px;\n    border-bottom-color: #333;\n    border-width: 0 5px 5px;\n}\n.popover {\n    position: absolute;\n    top: 0;\n    left: 0;\n    z-index: 1010;\n    display: none;\n    max-width: 276px;\n    padding: 1px;\n    text-align: left;\n    white-space: normal;\n    background-color: #333;\n    border: 1px solid #333;\n    border: 1px solid transparent;\n    border-radius: 0;\n    -webkit-box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2);\n    box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2);\n    background-clip: padding-box;\n}\n.popover.top {\n    margin-top: -10px;\n}\n.popover.right {\n    margin-left: 10px;\n}\n.popover.bottom {\n    margin-top: 10px;\n}\n.popover.left {\n    margin-left: -10px;\n}\n.popover-title {\n    padding: 8px 14px;\n    margin: 0;\n    font-size: 15px;\n    font-weight: normal;\n    line-height: 18px;\n    background-color: #333;\n    border-bottom: 1px solid #262626;\n    border-radius: 5px 5px 0 0;\n}\n.popover-content {\n    padding: 9px 14px;\n}\n.popover .arrow, .popover .arrow:after {\n    position: absolute;\n    display: block;\n    width: 0;\n    height: 0;\n    border-color: transparent;\n    border-style: solid;\n}\n.popover .arrow {\n    border-width: 11px;\n}\n.popover .arrow:after {\n    border-width: 10px;\n    content: \"\"}\n.popover.top .arrow {\n    bottom: -11px;\n    left: 50%;\n    margin-left: -11px;\n    border-top-color: #999;\n    border-top-color: rgba(0, 0, 0, 0.25);\n    border-bottom-width: 0;\n}\n.popover.top .arrow:after {\n    bottom: 1px;\n    margin-left: -10px;\n    border-top-color: #333;\n    border-bottom-width: 0;\n    content: \" \"}\n.popover.right .arrow {\n    top: 50%;\n    left: -11px;\n    margin-top: -11px;\n    border-right-color: #999;\n    border-right-color: rgba(0, 0, 0, 0.25);\n    border-left-width: 0;\n}\n.popover.right .arrow:after {\n    bottom: -10px;\n    left: 1px;\n    border-right-color: #333;\n    border-left-width: 0;\n    content: \" \"}\n.popover.bottom .arrow {\n    top: -11px;\n    left: 50%;\n    margin-left: -11px;\n    border-bottom-color: #999;\n    border-bottom-color: rgba(0, 0, 0, 0.25);\n    border-top-width: 0;\n}\n.popover.bottom .arrow:after {\n    top: 1px;\n    margin-left: -10px;\n    border-bottom-color: #333;\n    border-top-width: 0;\n    content: \" \"}\n.popover.left .arrow {\n    top: 50%;\n    right: -11px;\n    margin-top: -11px;\n    border-left-color: #999;\n    border-left-color: rgba(0, 0, 0, 0.25);\n    border-right-width: 0;\n}\n.popover.left .arrow:after {\n    right: 1px;\n    bottom: -10px;\n    border-left-color: #333;\n    border-right-width: 0;\n    content: \" \"}\n.carousel {\n    position: relative;\n}\n.carousel-inner {\n    position: relative;\n    width: 100%;\n    overflow: hidden;\n}\n.carousel-inner>.item {\n    position: relative;\n    display: none;\n    -webkit-transition: .6s ease-in-out left;\n    transition: .6s ease-in-out left;\n}\n.carousel-inner>.item>img, .carousel-inner>.item>a>img {\n    display: block;\n    height: auto;\n    max-width: 100%;\n    line-height: 1;\n}\n.carousel-inner>.active, .carousel-inner>.next, .carousel-inner>.prev {\n    display: block;\n}\n.carousel-inner>.active {\n    left: 0;\n}\n.carousel-inner>.next, .carousel-inner>.prev {\n    position: absolute;\n    top: 0;\n    width: 100%}\n.carousel-inner>.next {\n    left: 100%}\n.carousel-inner>.prev {\n    left: -100%}\n.carousel-inner>.next.left, .carousel-inner>.prev.right {\n    left: 0;\n}\n.carousel-inner>.active.left {\n    left: -100%}\n.carousel-inner>.active.right {\n    left: 100%}\n.carousel-control {\n    position: absolute;\n    top: 0;\n    bottom: 0;\n    left: 0;\n    width: 15%;\n    font-size: 20px;\n    color: #fff;\n    text-align: center;\n    text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);\n    opacity: .5;\n    filter: alpha(opacity=50);\n}\n.carousel-control.left {\n    background-image: -webkit-linear-gradient(left, color-stop(rgba(0, 0, 0, 0.5) 0), color-stop(rgba(0, 0, 0, 0.0001) 100%));\n    background-image: linear-gradient(to right, rgba(0, 0, 0, 0.5) 0, rgba(0, 0, 0, 0.0001) 100%);\n    background-repeat: repeat-x;\n    filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1);\n}\n.carousel-control.right {\n    right: 0;\n    left: auto;\n    background-image: -webkit-linear-gradient(left, color-stop(rgba(0, 0, 0, 0.0001) 0), color-stop(rgba(0, 0, 0, 0.5) 100%));\n    background-image: linear-gradient(to right, rgba(0, 0, 0, 0.0001) 0, rgba(0, 0, 0, 0.5) 100%);\n    background-repeat: repeat-x;\n    filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1);\n}\n.carousel-control:hover, .carousel-control:focus {\n    color: #fff;\n    text-decoration: none;\n    outline: 0;\n    opacity: .9;\n    filter: alpha(opacity=90);\n}\n.carousel-control .icon-prev, .carousel-control .icon-next, .carousel-control .glyphicon-chevron-left, .carousel-control .glyphicon-chevron-right {\n    position: absolute;\n    top: 50%;\n    z-index: 5;\n    display: inline-block;\n}\n.carousel-control .icon-prev, .carousel-control .glyphicon-chevron-left {\n    left: 50%}\n.carousel-control .icon-next, .carousel-control .glyphicon-chevron-right {\n    right: 50%}\n.carousel-control .icon-prev, .carousel-control .icon-next {\n    width: 20px;\n    height: 20px;\n    margin-top: -10px;\n    margin-left: -10px;\n    font-family: serif;\n}\n.carousel-control .icon-prev:before {\n    content: '\\2039'}\n.carousel-control .icon-next:before {\n    content: '\\203a'}\n.carousel-indicators {\n    position: absolute;\n    bottom: 10px;\n    left: 50%;\n    z-index: 15;\n    width: 60%;\n    padding-left: 0;\n    margin-left: -30%;\n    text-align: center;\n    list-style: none;\n}\n.carousel-indicators li {\n    display: inline-block;\n    width: 10px;\n    height: 10px;\n    margin: 1px;\n    text-indent: -999px;\n    cursor: pointer;\n    background-color: #000 \\9;\n    background-color: rgba(0, 0, 0, 0);\n    border: 1px solid #fff;\n    border-radius: 10px;\n}\n.carousel-indicators .active {\n    width: 12px;\n    height: 12px;\n    margin: 0;\n    background-color: #fff;\n}\n.carousel-caption {\n    position: absolute;\n    right: 15%;\n    bottom: 20px;\n    left: 15%;\n    z-index: 10;\n    padding-top: 20px;\n    padding-bottom: 20px;\n    color: #fff;\n    text-align: center;\n    text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6);\n}\n.carousel-caption .btn {\n    text-shadow: none;\n}\n@media screen and (min-width:768px) {\n    .carousel-control .glyphicons-chevron-left, .carousel-control .glyphicons-chevron-right, .carousel-control .icon-prev, .carousel-control .icon-next {\n    width: 30px;\n    height: 30px;\n    margin-top: -15px;\n    margin-left: -15px;\n    font-size: 30px;\n}\n.carousel-caption {\n    right: 20%;\n    left: 20%;\n    padding-bottom: 30px;\n}\n.carousel-indicators {\n    bottom: 20px;\n}\n}.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.center-block {\n    display: block;\n    margin-right: auto;\n    margin-left: auto;\n}\n.pull-right {\n    float: right!important;\n}\n.pull-left {\n    float: left!important;\n}\n.hide {\n    display: none!important;\n}\n.show {\n    display: block!important;\n}\n.invisible {\n    visibility: hidden;\n}\n.text-hide {\n    font: 0/0 a;\n    color: transparent;\n    text-shadow: none;\n    background-color: transparent;\n    border: 0;\n}\n.hidden {\n    display: none!important;\n    visibility: hidden!important;\n}\n.affix {\n    position: fixed;\n}\n@-ms-viewport {\n    width: device-width;\n}\n.visible-xs, tr.visible-xs, th.visible-xs, td.visible-xs {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-xs {\n    display: block!important;\n}\ntable.visible-xs {\n    display: table;\n}\ntr.visible-xs {\n    display: table-row!important;\n}\nth.visible-xs, td.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-xs.visible-sm {\n    display: block!important;\n}\ntable.visible-xs.visible-sm {\n    display: table;\n}\ntr.visible-xs.visible-sm {\n    display: table-row!important;\n}\nth.visible-xs.visible-sm, td.visible-xs.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-xs.visible-md {\n    display: block!important;\n}\ntable.visible-xs.visible-md {\n    display: table;\n}\ntr.visible-xs.visible-md {\n    display: table-row!important;\n}\nth.visible-xs.visible-md, td.visible-xs.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-xs.visible-lg {\n    display: block!important;\n}\ntable.visible-xs.visible-lg {\n    display: table;\n}\ntr.visible-xs.visible-lg {\n    display: table-row!important;\n}\nth.visible-xs.visible-lg, td.visible-xs.visible-lg {\n    display: table-cell!important;\n}\n}.visible-sm, tr.visible-sm, th.visible-sm, td.visible-sm {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-sm.visible-xs {\n    display: block!important;\n}\ntable.visible-sm.visible-xs {\n    display: table;\n}\ntr.visible-sm.visible-xs {\n    display: table-row!important;\n}\nth.visible-sm.visible-xs, td.visible-sm.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-sm {\n    display: block!important;\n}\ntable.visible-sm {\n    display: table;\n}\ntr.visible-sm {\n    display: table-row!important;\n}\nth.visible-sm, td.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-sm.visible-md {\n    display: block!important;\n}\ntable.visible-sm.visible-md {\n    display: table;\n}\ntr.visible-sm.visible-md {\n    display: table-row!important;\n}\nth.visible-sm.visible-md, td.visible-sm.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-sm.visible-lg {\n    display: block!important;\n}\ntable.visible-sm.visible-lg {\n    display: table;\n}\ntr.visible-sm.visible-lg {\n    display: table-row!important;\n}\nth.visible-sm.visible-lg, td.visible-sm.visible-lg {\n    display: table-cell!important;\n}\n}.visible-md, tr.visible-md, th.visible-md, td.visible-md {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-md.visible-xs {\n    display: block!important;\n}\ntable.visible-md.visible-xs {\n    display: table;\n}\ntr.visible-md.visible-xs {\n    display: table-row!important;\n}\nth.visible-md.visible-xs, td.visible-md.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-md.visible-sm {\n    display: block!important;\n}\ntable.visible-md.visible-sm {\n    display: table;\n}\ntr.visible-md.visible-sm {\n    display: table-row!important;\n}\nth.visible-md.visible-sm, td.visible-md.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-md {\n    display: block!important;\n}\ntable.visible-md {\n    display: table;\n}\ntr.visible-md {\n    display: table-row!important;\n}\nth.visible-md, td.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-md.visible-lg {\n    display: block!important;\n}\ntable.visible-md.visible-lg {\n    display: table;\n}\ntr.visible-md.visible-lg {\n    display: table-row!important;\n}\nth.visible-md.visible-lg, td.visible-md.visible-lg {\n    display: table-cell!important;\n}\n}.visible-lg, tr.visible-lg, th.visible-lg, td.visible-lg {\n    display: none!important;\n}\n@media(max-width:767px) {\n    .visible-lg.visible-xs {\n    display: block!important;\n}\ntable.visible-lg.visible-xs {\n    display: table;\n}\ntr.visible-lg.visible-xs {\n    display: table-row!important;\n}\nth.visible-lg.visible-xs, td.visible-lg.visible-xs {\n    display: table-cell!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .visible-lg.visible-sm {\n    display: block!important;\n}\ntable.visible-lg.visible-sm {\n    display: table;\n}\ntr.visible-lg.visible-sm {\n    display: table-row!important;\n}\nth.visible-lg.visible-sm, td.visible-lg.visible-sm {\n    display: table-cell!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .visible-lg.visible-md {\n    display: block!important;\n}\ntable.visible-lg.visible-md {\n    display: table;\n}\ntr.visible-lg.visible-md {\n    display: table-row!important;\n}\nth.visible-lg.visible-md, td.visible-lg.visible-md {\n    display: table-cell!important;\n}\n}@media(min-width:1200px) {\n    .visible-lg {\n    display: block!important;\n}\ntable.visible-lg {\n    display: table;\n}\ntr.visible-lg {\n    display: table-row!important;\n}\nth.visible-lg, td.visible-lg {\n    display: table-cell!important;\n}\n}.hidden-xs {\n    display: block!important;\n}\ntable.hidden-xs {\n    display: table;\n}\ntr.hidden-xs {\n    display: table-row!important;\n}\nth.hidden-xs, td.hidden-xs {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-xs, tr.hidden-xs, th.hidden-xs, td.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-xs.hidden-sm, tr.hidden-xs.hidden-sm, th.hidden-xs.hidden-sm, td.hidden-xs.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-xs.hidden-md, tr.hidden-xs.hidden-md, th.hidden-xs.hidden-md, td.hidden-xs.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-xs.hidden-lg, tr.hidden-xs.hidden-lg, th.hidden-xs.hidden-lg, td.hidden-xs.hidden-lg {\n    display: none!important;\n}\n}.hidden-sm {\n    display: block!important;\n}\ntable.hidden-sm {\n    display: table;\n}\ntr.hidden-sm {\n    display: table-row!important;\n}\nth.hidden-sm, td.hidden-sm {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-sm.hidden-xs, tr.hidden-sm.hidden-xs, th.hidden-sm.hidden-xs, td.hidden-sm.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-sm, tr.hidden-sm, th.hidden-sm, td.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-sm.hidden-md, tr.hidden-sm.hidden-md, th.hidden-sm.hidden-md, td.hidden-sm.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-sm.hidden-lg, tr.hidden-sm.hidden-lg, th.hidden-sm.hidden-lg, td.hidden-sm.hidden-lg {\n    display: none!important;\n}\n}.hidden-md {\n    display: block!important;\n}\ntable.hidden-md {\n    display: table;\n}\ntr.hidden-md {\n    display: table-row!important;\n}\nth.hidden-md, td.hidden-md {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-md.hidden-xs, tr.hidden-md.hidden-xs, th.hidden-md.hidden-xs, td.hidden-md.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-md.hidden-sm, tr.hidden-md.hidden-sm, th.hidden-md.hidden-sm, td.hidden-md.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-md, tr.hidden-md, th.hidden-md, td.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-md.hidden-lg, tr.hidden-md.hidden-lg, th.hidden-md.hidden-lg, td.hidden-md.hidden-lg {\n    display: none!important;\n}\n}.hidden-lg {\n    display: block!important;\n}\ntable.hidden-lg {\n    display: table;\n}\ntr.hidden-lg {\n    display: table-row!important;\n}\nth.hidden-lg, td.hidden-lg {\n    display: table-cell!important;\n}\n@media(max-width:767px) {\n    .hidden-lg.hidden-xs, tr.hidden-lg.hidden-xs, th.hidden-lg.hidden-xs, td.hidden-lg.hidden-xs {\n    display: none!important;\n}\n}@media(min-width:768px) and (max-width:991px) {\n    .hidden-lg.hidden-sm, tr.hidden-lg.hidden-sm, th.hidden-lg.hidden-sm, td.hidden-lg.hidden-sm {\n    display: none!important;\n}\n}@media(min-width:992px) and (max-width:1199px) {\n    .hidden-lg.hidden-md, tr.hidden-lg.hidden-md, th.hidden-lg.hidden-md, td.hidden-lg.hidden-md {\n    display: none!important;\n}\n}@media(min-width:1200px) {\n    .hidden-lg, tr.hidden-lg, th.hidden-lg, td.hidden-lg {\n    display: none!important;\n}\n}.visible-print, tr.visible-print, th.visible-print, td.visible-print {\n    display: none!important;\n}\n@media print {\n    .visible-print {\n    display: block!important;\n}\ntable.visible-print {\n    display: table;\n}\ntr.visible-print {\n    display: table-row!important;\n}\nth.visible-print, td.visible-print {\n    display: table-cell!important;\n}\n.hidden-print, tr.hidden-print, th.hidden-print, td.hidden-print {\n    display: none!important;\n}\n}.navbar {\n    font-size: 13px;\n    font-weight: 300;\n    border: 0;\n}\n.navbar .navbar-toggle:hover .icon-bar {\n    background-color: #b3b3b3;\n}\n.navbar-collapse {\n    border-top-color: rgba(0, 0, 0, 0.2);\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.navbar .dropdown-menu {\n    border: 0;\n}\n.navbar .dropdown-menu>li>a, .navbar .dropdown-menu>li>a:focus {\n    font-size: 13px;\n    font-weight: 300;\n    background-color: transparent;\n}\n.navbar .dropdown-header {\n    color: rgba(255, 255, 255, 0.5);\n}\n.navbar-default .dropdown-menu {\n    background-color: #333;\n}\n.navbar-default .dropdown-menu>li>a, .navbar-default .dropdown-menu>li>a:focus {\n    color: #fff;\n}\n.navbar-default .dropdown-menu>li>a:hover, .navbar-default .dropdown-menu>.active>a, .navbar-default .dropdown-menu>.active>a:hover {\n    background-color: #272727;\n}\n.navbar-inverse .dropdown-menu {\n    background-color: #008cba;\n}\n.navbar-inverse .dropdown-menu>li>a, .navbar-inverse .dropdown-menu>li>a:focus {\n    color: #fff;\n}\n.navbar-inverse .dropdown-menu>li>a:hover, .navbar-inverse .dropdown-menu>.active>a, .navbar-inverse .dropdown-menu>.active>a:hover {\n    background-color: #006687;\n}\n.btn {\n    padding: 14px 28px;\n}\n.btn-lg {\n    padding: 16px 32px;\n}\n.btn-sm {\n    padding: 8px 16px;\n}\n.btn-xs {\n    padding: 4px 8px;\n}\n.btn-group .btn~.dropdown-toggle {\n    padding-right: 16px;\n    padding-left: 16px;\n}\n.btn-group .dropdown-menu {\n    border-top-width: 0;\n}\n.btn-group.dropup .dropdown-menu {\n    margin-bottom: 0;\n    border-top-width: 1px;\n    border-bottom-width: 0;\n}\n.btn-group .dropdown-toggle.btn-default~.dropdown-menu {\n    background-color: #e7e7e7;\n    border-color: #dadada;\n}\n.btn-group .dropdown-toggle.btn-default~.dropdown-menu>li>a {\n    color: #333;\n}\n.btn-group .dropdown-toggle.btn-default~.dropdown-menu>li>a:hover {\n    background-color: #d3d3d3;\n}\n.btn-group .dropdown-toggle.btn-primary~.dropdown-menu {\n    background-color: #008cba;\n    border-color: #0079a1;\n}\n.btn-group .dropdown-toggle.btn-primary~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-primary~.dropdown-menu>li>a:hover {\n    background-color: #006d91;\n}\n.btn-group .dropdown-toggle.btn-success~.dropdown-menu {\n    background-color: #43ac6a;\n    border-color: #3c9a5f;\n}\n.btn-group .dropdown-toggle.btn-success~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-success~.dropdown-menu>li>a:hover {\n    background-color: #388f58;\n}\n.btn-group .dropdown-toggle.btn-info~.dropdown-menu {\n    background-color: #5bc0de;\n    border-color: #46b8da;\n}\n.btn-group .dropdown-toggle.btn-info~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-info~.dropdown-menu>li>a:hover {\n    background-color: #39b3d7;\n}\n.btn-group .dropdown-toggle.btn-warning~.dropdown-menu {\n    background-color: #e99002;\n    border-color: #d08002;\n}\n.btn-group .dropdown-toggle.btn-warning~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-warning~.dropdown-menu>li>a:hover {\n    background-color: #c17702;\n}\n.btn-group .dropdown-toggle.btn-danger~.dropdown-menu {\n    background-color: #f04124;\n    border-color: #ea2f10;\n}\n.btn-group .dropdown-toggle.btn-danger~.dropdown-menu>li>a {\n    color: #fff;\n}\n.btn-group .dropdown-toggle.btn-danger~.dropdown-menu>li>a:hover {\n    background-color: #dc2c0f;\n}\n.lead {\n    color: #6f6f6f;\n}\ncite {\n    font-style: italic;\n}\nblockquote {\n    color: #6f6f6f;\n    border-left-width: 1px;\n}\nblockquote.pull-right {\n    border-right-width: 1px;\n}\nblockquote small {\n    font-size: 12px;\n    font-weight: 300;\n}\ntable {\n    font-size: 12px;\n}\ninput, .form-control {\n    padding: 7px;\n    font-size: 12px;\n}\nlabel, .control-label, .help-block, .checkbox, .radio {\n    font-size: 12px;\n    font-weight: normal;\n}\n.form-group .btn, .input-group-addon, .input-group-btn .btn {\n    padding: 8px 14px;\n    font-size: 12px;\n}\n.nav .open>a, .nav .open>a:hover, .nav .open>a:focus {\n    border-color: transparent;\n}\n.nav-tabs>li>a {\n    color: #222;\n    background-color: #e7e7e7;\n}\n.nav-tabs .caret {\n    border-top-color: #222;\n    border-bottom-color: #222;\n}\n.nav-pills {\n    font-weight: 300;\n}\n.breadcrumb {\n    font-size: 10px;\n    font-weight: 300;\n    text-transform: uppercase;\n    border: 1px solid #ddd;\n    border-radius: 3px;\n}\n.pagination {\n    font-size: 12px;\n    font-weight: 300;\n    color: #999;\n}\n.pagination>li>a, .pagination>li>span {\n    margin-left: 4px;\n    color: #999;\n}\n.pagination>.active>a, .pagination>.active>span {\n    color: #fff;\n}\n.pagination>li>a, .pagination>li:first-child>a, .pagination>li:last-child>a, .pagination>li>span, .pagination>li:first-child>span, .pagination>li:last-child>span {\n    border-radius: 3px;\n}\n.pagination-lg>li>a {\n    padding-right: 22px;\n    padding-left: 22px;\n}\n.pagination-sm>li>a {\n    padding: 0 5px;\n}\n.pager {\n    font-size: 12px;\n    font-weight: 300;\n    color: #999;\n}\n.list-group {\n    font-size: 12px;\n    font-weight: 300;\n}\n.label {\n    padding-right: 1em;\n    padding-left: 1em;\n    font-weight: 300;\n    border-radius: 0;\n}\n.label-default {\n    color: #333;\n    background-color: #e7e7e7;\n}\n.badge {\n    font-weight: 300;\n}\n.progress {\n    height: 22px;\n    padding: 2px;\n    background-color: #f6f6f6;\n    border: 1px solid #ccc;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n.dropdown-menu {\n    padding: 0;\n    margin-top: 0;\n    font-size: 12px;\n}\n.dropdown-menu>li>a {\n    padding: 12px 15px;\n}\n.dropdown-header {\n    padding-right: 15px;\n    padding-left: 15px;\n    font-size: 9px;\n    text-transform: uppercase;\n}\n.popover {\n    font-size: 12px;\n    font-weight: 300;\n    color: #fff;\n}\n.panel-heading, .panel-footer {\n    border-top-right-radius: 0;\n    border-top-left-radius: 0;\n}\n.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.clearfix:before, .clearfix:after {\n    display: table;\n    content: \" \"}\n.clearfix:after {\n    clear: both;\n}\n.center-block {\n    display: block;\n    margin-right: auto;\n    margin-left: auto;\n}\n.pull-right {\n    float: right!important;\n}\n.pull-left {\n    float: left!important;\n}\n.hide {\n    display: none!important;\n}\n.show {\n    display: block!important;\n}\n.invisible {\n    visibility: hidden;\n}\n.text-hide {\n    font: 0/0 a;\n    color: transparent;\n    text-shadow: none;\n    background-color: transparent;\n    border: 0;\n}\n.hidden {\n    display: none!important;\n    visibility: hidden!important;\n}\n.affix {\n    position: fixed;\n}\n"
  },
  {
    "path": "docs/site/css/cinder.css",
    "content": "/*\n  Cinder Theme for MkDocs | Copyright 2015 Christopher Simpkins | MIT License\n*/\n\nbody {\n    font-family:\"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-size: 16px;\n    line-height: 1.7;\n    background-color: #FFF;\n    color: #343838;\n}\nh1, h2, h3, h4, h5, h6 {\n    font-family:'Inter', 'Helvetica Neue', Helvetica, Arial, sans-serif;\n    color: #222;\n}\nh1 small, h2 small, h3 small, h4 small, h5 small, h6 small, .h1 small, .h2 small, .h3 small, .h4 small, .h5 small, .h6 small, h1 .small, h2 .small, h3 .small, h4 .small, h5 .small, h6 .small, .h1 .small, .h2 .small, .h3 .small, .h4 .small, .h5 .small, .h6 .small {\n    color: #B1B7B9;\n}\n\nh2 {\n    margin-top: 35px;\n}\n\nh1, h2 {\n    font-weight: 700;\n}\nh4 {\n    font-family: 'Inter', 'Helvetica Neue', Helvetica, Arial, sans-serif;\n    font-weight: 300;\n    margin-top: 20px;\n    font-style: italic;\n}\nh5 {\n    font-family: 'Inter', 'Helvetica Neue', Helvetica, Arial, sans-serif;\n    font-weight: 300;\n    font-variant: small-caps;\n}\npre, code {\n    background-color: #FCFDFF;\n}\npre>code {\n    font-size: 13px;\n}\npre {\n    margin-top: 25px;\n    margin-bottom: 25px;\n}\n.lead {\n    font-family:\"Inter\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-weight: 400;\n    line-height: 1.4;\n    letter-spacing: 0.0312em;\n    color: #B1B7B9;\n}\n.navbar-default {\n    background-color: #343838;\n    border-bottom: 8px #EBF2F2 solid;\n}\n.bs-sidenav {\n    background-image: url(\"../img/grid11.png\");\n    background-repeat: repeat;\n    font-family: Inter,\"Helvetica Neue\",Helvetica,Arial,sans-serif;\n    font-size: 13px;\n}\n.well {\n    background-color: #FCFDFF;\n}\n.btn-default {\n    background-color:#FCFDFF;\n}\n.table-striped > tbody > tr:nth-child(2n+1) > td, .table-striped > tbody > tr:nth-child(2n+1) > th {\n    background-color: #FCFDFF;\n}\n#mkdocs-search-query:focus {\n    outline: none;\n    -webkit-box-shadow: none;\n    box-shadow: none;\n}\n#mkdocs-search-query {\n    font-family:\"Inter\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n    font-size: 20px;\n    font-weight: 700;\n    color: #343838;\n    height: 45px;\n}\nfooter > hr {\n    width: 35%;\n}\n"
  },
  {
    "path": "docs/site/css/highlight.css",
    "content": "/*\n\ngithub.com style (c) Vasily Polovnyov <vast@whiteants.net>\n\n*/\n\n.hljs {\n  display: block;\n  overflow-x: auto;\n  padding: 0.5em;\n  color: #333;\n  background: #FCFDFF;\n}\n\n.hljs-comment,\n.hljs-quote {\n  color: #998;\n  font-style: italic;\n}\n\n.hljs-keyword,\n.hljs-selector-tag,\n.hljs-subst {\n  color: #333;\n  font-weight: bold;\n}\n\n.hljs-number,\n.hljs-literal,\n.hljs-variable,\n.hljs-template-variable,\n.hljs-tag .hljs-attr {\n  color: #008080;\n}\n\n.hljs-string,\n.hljs-doctag {\n  color: #d14;\n}\n\n.hljs-title,\n.hljs-section,\n.hljs-selector-id {\n  color: #900;\n  font-weight: bold;\n}\n\n.hljs-subst {\n  font-weight: normal;\n}\n\n.hljs-type,\n.hljs-class .hljs-title {\n  color: #458;\n  font-weight: bold;\n}\n\n.hljs-tag,\n.hljs-name,\n.hljs-attribute {\n  color: #000080;\n  font-weight: normal;\n}\n\n.hljs-regexp,\n.hljs-link {\n  color: #009926;\n}\n\n.hljs-symbol,\n.hljs-bullet {\n  color: #990073;\n}\n\n.hljs-built_in,\n.hljs-builtin-name {\n  color: #0086b3;\n}\n\n.hljs-meta {\n  color: #999;\n  font-weight: bold;\n}\n\n.hljs-deletion {\n  background: #fdd;\n}\n\n.hljs-addition {\n  background: #dfd;\n}\n\n.hljs-emphasis {\n  font-style: italic;\n}\n\n.hljs-strong {\n  font-weight: bold;\n}\n"
  },
  {
    "path": "docs/site/custom/style.css",
    "content": "@media (min-width: 992px) {\n    /* Allow the sidebar to scroll if it overflows the page. */\n    .bs-sidebar {\n        overflow-y: scroll;\n    }\n}\n\n.btn.btn-primary {\n    /* Change for github issues buttons. */\n    border-radius: 30px;\n    font-size: 15px;\n}\n"
  },
  {
    "path": "docs/site/faq/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"../img/favicon.ico\">\n\n    \n    <title>F.A.Q. - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"../css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"../custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body>\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"..\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li >\n                        <a href=\"..\">Home</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../overview/\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../install/\">Install</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../options/\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../performance/\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../recommendations/\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li class=\"active\">\n                        <a href=\"./\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"prev\" href=\"../recommendations/\">\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li class=\"disabled\">\n                        <a rel=\"next\" >\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#frequently-asked-questions\">Frequently asked questions</a></li>\n            <li class=\"second-level\"><a href=\"#general\">General</a></li>\n                \n            <li class=\"second-level\"><a href=\"#step-1\">Step 1</a></li>\n                \n            <li class=\"second-level\"><a href=\"#step-2\">Step 2</a></li>\n                \n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"frequently-asked-questions\">Frequently asked questions</h2>\n<h3 id=\"general\">General</h3>\n<ul>\n<li><span style=\"font-size: large; font-style: italic;color:#404040\"> Why doesn’t <strong>regenie</strong> need a genetic relatedness matrix (GRM)? \n</span></li>\n</ul>\n<p><strong>regenie</strong> performs whole genome regression using the following model</p>\n<p>\n<script type=\"math/tex; mode=display\">Y = X\\beta + \\epsilon</script>\n</p>\n<p>where <script type=\"math/tex\">Y_{N\\times 1}</script> is a phenotype, <script type=\"math/tex\">X_{N\\times M}</script> is a genotype matrix, and <script type=\"math/tex\">\\epsilon_i\\sim N(0,\\sigma^2)</script>. \nThis model has close ties to a linear mixed model (LMM) based on an infinitesimal model </p>\n<p>\n<script type=\"math/tex; mode=display\">Y = u + \\epsilon</script>\n</p>\n<p>where <script type=\"math/tex\">u\\sim N(0,\\sigma_u^2 K)</script> with <script type=\"math/tex\">K_{N\\times N}=XX^T/M</script> is referred to as the genetic relatedness matrix (GRM). In the LMM, the polygenic effects have been integrated out so that model only involves the GRM $K$ through a variance component in the covariance matrix of the trait.</p>\n<p>In <strong>regenie</strong>, we directly estimate the polygenic effects parameter <script type=\"math/tex\">\\beta</script> by using ridge regression, which corresponds to fitting a linear regression model with a L2 penalty to impose shrinkage. Hence, we bypass having to use the GRM <script type=\"math/tex\">K</script> and use the polygenic effect estimates <script type=\"math/tex\">X\\hat{\\beta}</script> to control for population structure when testing variants for association.</p>\n<p><br/></p>\n<ul>\n<li><span style=\"font-size: large; font-style: italic;color:#404040\"> Can <strong>regenie</strong> be run on small sample sizes? \n</span></li>\n</ul>\n<p>For quantitative traits, we have not obtained issues running <strong>regenie</strong> on small data sets.\nFor binary traits, we have obtained successful runs of <strong>regenie</strong> (step 1 and 2) on data sets with as little as 300 samples. A few factors to consider:</p>\n<ol>\n<li>Convergence issues may occur in step 1 (all the more if a trait is highly unbalanced) <script type=\"math/tex\">-</script> see below</li>\n<li>Similarly, convergence issues may occur in step 2 when using Firth approximation <script type=\"math/tex\">-</script> see below </li>\n</ol>\n<p>Note: we have found that <strong>regenie</strong> can get conservative in more extreme relatedness scenarios so we recommend not to use it for smaller cohorts with high amounts of relatedness like founder populations where exact mixed-model methods can be used</p>\n<h3 id=\"step-1\">Step 1</h3>\n<ul>\n<li><span style=\"font-size: large; font-style: italic;color:#404040\"> What block size to use in step 1? \n</span></li>\n</ul>\n<p>We recommend to use blocks of size 1000 as we have observed that it leads to a reasonable number of ridge predictors \nat level 1 (e.g. 2,500 with 500K SNPs used and the default <strong>regenie</strong> parameters) and have noticed little change in the \nfinal predictions when varying the block size.</p>\n<p><br/></p>\n<ul>\n<li><span style=\"font-size: large; font-style: italic;color:#404040\"> How many variants to use in step 1? \n</span></li>\n</ul>\n<p>We recommend to use a smaller set of about 500K directly genotyped SNPs in step 1, which should be sufficient to capture genome-wide polygenic effects. Note that using too many SNPs in Step 1 (e.g. &gt;1M) can lead to a high computational burden due to the resulting higher number of predictors in the level 1 models.</p>\n<p><br/></p>\n<ul>\n<li><span style=\"font-size: large; font-style: italic;color:#404040\"> What do I do if I get the error \"Uh-oh, SNP XX has low variance (=XX)\" in step 1? \n</span></li>\n</ul>\n<p>This is due to variants with very low minor allele count (MAC) being included in step 1. To avoid this, you should use a MAC filter to remove such variants in a pre-processing step before running Regenie.</p>\n<p>For example, in PLINK2 you would use the <code>--mac</code> option and obtain a list of variants that pass the MAC filter (note that if you are using <code>--keep/--remove</code> in Regenie, you should also use it in the PLINK2 command)</p>\n<pre><code>plink2 \\\n  --bfile my_bed_file \\\n  --mac 100 \\\n  --write-snplist \\\n  --out snps_pass\n</code></pre>\n<p>You would then use the output file in <strong>regenie</strong> as <code>--extract snps_pass.snplist</code> (and this would avoid having to make a new genotype file).</p>\n<p><br/></p>\n<ul>\n<li><span style=\"font-size: large;font-style: italic; color:#404040\"> What to do if Step 1 of <strong>regenie</strong> failed for a binary trait when fitting the penalized logsitic regression model? \n</span></li>\n</ul>\n<p>This can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. </p>\n<ol>\n<li>If using K-fold CV, switch to LOOCV (option <code>--loocv</code>) to increase the size of the sample used to fit the model\n(note: LOOCV is now used by default when the sample size is below 5,000)</li>\n<li>If it is due to quasi-separation (i.e. <code>Var(Y)=0</code> occurred in model fitting), either increase the sample size using LOOCV or increase the MAF threshold for variants included in step 1 analysis </li>\n</ol>\n<h3 id=\"step-2\">Step 2</h3>\n<ul>\n<li><span style=\"font-size: large;font-style: italic; color:#404040\"> What to do if Step 2 of <strong>regenie</strong> fails when fitting the null model for the approximate Firth correction? \n</span></li>\n</ul>\n<p>This can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. \nWe have implemented the same measures as in the <code>logistf</code> function in R to avoid convergence issues, which include the use of a step size threshold when performing a Newton step. </p>\n<ol>\n<li>We first try fitting the model with a step size threshold that is more liberal (=25) as well as a maximum number of iterations of 1,000 and if convergence fails, we retry the model fit using a more stringent step size threshold (=5) and a higher threshold for the number of iterations (=5,000), which will slow down convergence.</li>\n<li>The user can also specify a maximum step size threshold using <code>--maxstep-null</code> (use value &lt;5) as well as increase the maximum number of iterations using <code>--maxiter-null</code> (use value &gt;5000). In that case, no retries are perfomed if convergence fails.<ul>\n<li>We recommend to test chromosomes separately (using <code>--chr</code>) as these parameters may need to be altered when fitting the null model for each chromosome</li>\n</ul>\n</li>\n</ol>\n<p><br/></p>\n<ul>\n<li>\n<p><span style=\"font-size: large;font-style: italic; color:#404040\"> What is reported in A1FREQ when building masks? \n</span></p>\n<ul>\n<li>For the <code>max</code> and <code>comphet</code> rules, the resulting burden masks take on values in [0,2] just like single variants so we compute A1FREQ the same way as done for single variants (i.e. mean(G)/2 where G is a genotype vector).</li>\n<li>For the <code>sum</code> rule, A1FREQ is computed as the average of the effect allele frequencies across all sites included in the mask.</li>\n</ul>\n</li>\n</ul>\n<p><br/></p>\n<ul>\n<li><span style=\"font-size: large;font-style: italic; color:#404040\"> How is missingness handled in the genotype data? \n</span></li>\n</ul>\n<p>Missing genotypes are imputed with per-SNP averages when performing association tests (note: the genotype summaries reported like AAF, INFO are only based on non-missing genotypes).</p></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"../js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \"..\"</script>\n    \n    <script src=\"../js/base.js\"></script>\n    <script src=\"../search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n"
  },
  {
    "path": "docs/site/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"img/favicon.ico\">\n\n    \n    <title>Home - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body class=\"homepage\" >\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\".\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li class=\"active\">\n                        <a href=\".\">Home</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"overview/\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"install/\">Install</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"options/\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"performance/\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"recommendations/\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"faq/\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li class=\"disabled\">\n                        <a rel=\"prev\" >\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"next\" href=\"overview/\">\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#regenie\">regenie</a></li>\n        <li class=\"first-level \"><a href=\"#citation\">Citation</a></li>\n        <li class=\"first-level \"><a href=\"#license\">License</a></li>\n        <li class=\"first-level \"><a href=\"#contact\">Contact</a></li>\n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"regenie\">regenie</h2>\n<p><strong>regenie</strong> is a C++ program for whole genome regression modelling of large\n<a href=\"https://en.wikipedia.org/wiki/Genome-wide_association_study\">genome-wide association studies</a>.</p>\n<p>It is developed and supported by a team of scientists at the Regeneron Genetics Center.</p>\n<p>The method has the following properties</p>\n<ul>\n<li>It works on quantitative and binary traits, including binary\ntraits with unbalanced case-control ratios</li>\n<li>It can handle population structure and relatedness</li>\n<li>It can process multiple phenotypes at once efficiently</li>\n<li>For binary traits, it supports Firth logistic regression and an SPA test</li>\n<li>It can perform gene/region-based tests (Burden, <a href=\"https://doi.org/10.1016/j.ajhg.2024.08.021\">SBAT</a>, SKAT/SKATO, ACATV/ACATO)</li>\n<li>It can perform interaction tests (GxE, GxG) as well as conditional analyses</li>\n<li>Meta-analysis of REGENIE summary statistics can be performed using <a href=\"https://rgcgithub.github.io/remeta/\">REMETA</a></li>\n<li>It is fast and memory efficient 🔥</li>\n<li>It supports the <a href=\"https://www.well.ox.ac.uk/~gav/bgen_format/\">BGEN</a>, <a href=\"https://www.cog-genomics.org/plink/1.9/formats#bed\">PLINK</a> bed/bim/fam and <a href=\"https://www.cog-genomics.org/plink/2.0/formats#pgen\">PLINK2</a> pgen/pvar/psam genetic data formats</li>\n<li>It is ideally suited for implementation in\n  <a href=\"https://spark.apache.org/\">Apache Spark</a> (see <a href=\"https://projectglow.io/\">GLOW</a>)</li>\n<li>It can be installed with <a href=\"https://anaconda.org/bioconda/regenie\">Conda</a></li>\n</ul>\n<h2 id=\"citation\">Citation</h2>\n<p>Mbatchou, J., Barnard, L., Backman, J. et al. Computationally efficient whole-genome regression for quantitative and binary traits. Nat Genet 53, 1097–1103 (2021). <a href=\"https://doi.org/10.1038/s41588-021-00870-7\">https://doi.org/10.1038/s41588-021-00870-7</a></p>\n<h2 id=\"license\">License</h2>\n<p><strong>regenie</strong> is distributed under an <a href=\"https://github.com/rgcgithub/regenie/blob/master/LICENSE\">MIT license</a>.</p>\n<h2 id=\"contact\">Contact</h2>\n<p>If you have any questions about <strong>regenie</strong> please contact</p>\n<ul>\n<li><a href=\"&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#106;&#111;&#110;&#97;&#116;&#104;&#97;&#110;&#46;&#109;&#97;&#114;&#99;&#104;&#105;&#110;&#105;&#64;&#114;&#101;&#103;&#101;&#110;&#101;&#114;&#111;&#110;&#46;&#99;&#111;&#109;\">&#106;&#111;&#110;&#97;&#116;&#104;&#97;&#110;&#46;&#109;&#97;&#114;&#99;&#104;&#105;&#110;&#105;&#64;&#114;&#101;&#103;&#101;&#110;&#101;&#114;&#111;&#110;&#46;&#99;&#111;&#109;</a></li>\n<li><a href=\"&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#106;&#111;&#101;&#108;&#108;&#101;&#46;&#109;&#98;&#97;&#116;&#99;&#104;&#111;&#117;&#64;&#114;&#101;&#103;&#101;&#110;&#101;&#114;&#111;&#110;&#46;&#99;&#111;&#109;\">&#106;&#111;&#101;&#108;&#108;&#101;&#46;&#109;&#98;&#97;&#116;&#99;&#104;&#111;&#117;&#64;&#114;&#101;&#103;&#101;&#110;&#101;&#114;&#111;&#110;&#46;&#99;&#111;&#109;</a> </li>\n</ul>\n<p>If you want to submit a issue concerning the software please do so\nusing the <strong>regenie</strong> <a href=\"https://github.com/rgcgithub/regenie/issues\">Github repository</a>.</p>\n<p><a href=\"https://github.com/rgcgithub/regenie/issues/new\"><button class=\"btn btn-primary btn-sm\" type=\"submit\"><i class=\"fab fa-github fa-2x\"></i> Report Issue</button></a>\n<a href=\"https://github.com/rgcgithub/regenie/issues\"><button class=\"btn btn-primary btn-sm\" type=\"submit\"><i class=\"fab fa-github fa-2x\"></i> Active Issues </button></a></p>\n<!--\n## Version history\n\nVersion 1.0 (22 June 2020): Initial release\n--></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \".\"</script>\n    \n    <script src=\"js/base.js\"></script>\n    <script src=\"search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n\n<!--\nMkDocs version : 1.4.3\nBuild Date UTC : 2025-01-16 18:18:16.540760+00:00\n-->\n"
  },
  {
    "path": "docs/site/install/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"../img/favicon.ico\">\n\n    \n    <title>Install - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"../css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"../custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body>\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"..\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li >\n                        <a href=\"..\">Home</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../overview/\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li class=\"active\">\n                        <a href=\"./\">Install</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../options/\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../performance/\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../recommendations/\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../faq/\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"prev\" href=\"../overview/\">\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"next\" href=\"../options/\">\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#download\">Download</a></li>\n        <li class=\"first-level \"><a href=\"#installation\">Installation</a></li>\n            <li class=\"second-level\"><a href=\"#pre-compiled-binaries\">Pre-compiled binaries</a></li>\n                \n            <li class=\"second-level\"><a href=\"#standard-installation\">Standard installation</a></li>\n                \n            <li class=\"second-level\"><a href=\"#with-cmake\">With CMake</a></li>\n                \n            <li class=\"second-level\"><a href=\"#with-docker\">With Docker</a></li>\n                \n            <li class=\"second-level\"><a href=\"#with-conda\">With conda</a></li>\n                \n        <li class=\"first-level \"><a href=\"#computing-requirements\">Computing requirements</a></li>\n            <li class=\"second-level\"><a href=\"#memory-usage\">Memory usage</a></li>\n                \n            <li class=\"second-level\"><a href=\"#threading\">Threading</a></li>\n                \n            <li class=\"second-level\"><a href=\"#for-windows-platforms\">For Windows platforms</a></li>\n                \n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"download\">Download</h2>\n<p>The <strong>regenie</strong> source code is hosted on\n<a href=\"https://github.com/rgcgithub/regenie\">Github</a>.</p>\n<h2 id=\"installation\">Installation</h2>\n<div class=\"bs-callout bs-callout-default\">\n  <h4>Pre-requisites</h4>\n    <b>regenie</b> requires compilation with \n<a href=\"https://gcc.gnu.org\">GCC</a> version >= 5.1 (on Linux) \nor Clang version >=3.3 (on Mac OSX). \nIt also requires having GFortran library installed.\n    </div>\n\n<h3 id=\"pre-compiled-binaries\">Pre-compiled binaries</h3>\n<p>Pre-compiled binaries are available in the \n<a href=\"https://github.com/rgcgithub/regenie/releases\">Github repository</a>.\nThese are provided for Linux (including Centos7) and Mac OSX \ncomputing environments and are statically linked. \nFor the Linux binaries, users should have GLIBC version &gt;= 2.22 installed.\nAdditionally, they are provided compiled with Intel MKL library which\nwill provide speedups for many of the operations done in <strong>regenie</strong>. </p>\n<h3 id=\"standard-installation\">Standard installation</h3>\n<ol>\n<li><strong>regenie</strong> requires the\n  <a href=\"https://enkre.net/cgi-bin/code/bgen/dir?ci=trunk\">BGEN library</a> so\n  you will need to download and install that library.</li>\n<li>Edit the BGEN_PATH variable in the <code>Makefile</code>\n   to the BGEN library path.</li>\n<li>On the command line type <code>make</code> while in the main source code directory.</li>\n<li>This should produce the executable called <code>regenie</code>.</li>\n</ol>\n<p><strong>regenie</strong> has been enhanced to allow for gzip compressed input \n(for phenotype/covariate files) and output (for association results files)\n using the Boost Iostream library. \nIf this library is installed on the system, you should compile using \n<code>make HAS_BOOST_IOSTREAM=1</code>. </p>\n<p>Furthermore, we have enabled compilation of <strong>regenie</strong> with\nthe Intel Math Kernel (MKL) library. You first need to have it installed \non your system and modify the MKLROOT variable in the <code>Makefile</code>\nto the installed MKL library path.</p>\n<h3 id=\"with-cmake\">With CMake</h3>\n<p>You can compile the binary using CMake version &gt;=3.13 (instead of <code>make</code> as above).</p>\n<pre><code>mkdir -p build\ncd build\nBGEN_PATH=&lt;path_to_bgen_lib&gt; cmake ..\nmake\n</code></pre>\n<p>This will generate the binary in the <code>build/</code> subdirectory. \nTo use with Boost Iostreams and/or Intel MKL library,\nadd the corresponding flags before the <code>cmake</code> command on line 3\n(e.g. <code>BGEN_PATH=&lt;path_to_bgen_lib&gt; HAS_BOOST_IOSTREAM=1 cmake ..</code>).</p>\n<h3 id=\"with-docker\">With Docker</h3>\n<p>Alternatively, you can use a Docker image to run <strong>regenie</strong>. \nA guide to using docker is available on \nthe <a href=\"https://github.com/rgcgithub/regenie/wiki/Using-docker\">Github page</a>.</p>\n<h3 id=\"with-conda\">With conda</h3>\n<p>To install with <a href=\"https://anaconda.org/bioconda/regenie\">conda</a>, you can use the following commands:</p>\n<pre><code># create new environment\nconda create -n regenie_env -c conda-forge -c bioconda regenie\n# load it\nconda activate regenie_env\n</code></pre>\n<h2 id=\"computing-requirements\">Computing requirements</h2>\n<p>We have tested <strong>regenie</strong> on 64-bit Linux and 64-bit Mac OSX computing environments.</p>\n<p>Note that for Mac OSX computing environments, compiling is done without OpenMP, as the library is not built-in by default and has to be installed separately. </p>\n<h3 id=\"memory-usage\">Memory usage</h3>\n<p>In both Step 1 and Step 2 of a <strong>regenie</strong> run the genetic data file is\nread once, in blocks of SNPs, so at no point is the full dataset ever stored in\nmemory.</p>\n<p><strong>regenie</strong> uses a dimension reduction approach using ridge regression\n  to produce a relatively small set of genetic predictors, that are\n  then used to fit a whole-genome regression model. These genetic\n  predictors are stored in memory by default, and can be relatively\n  large if many phenotypes are stored at once.</p>\n<p>For example, if there are <script type=\"math/tex\">P</script> phenotypes, <script type=\"math/tex\">M</script> SNPs and <script type=\"math/tex\">N</script> samples, and a\nblock size of <script type=\"math/tex\">B</script> SNPs is used with <script type=\"math/tex\">R</script> ridge parameters,\n then <strong>regenie</strong> needs to store roughly <script type=\"math/tex\">N\\times M/B\\times R</script>\ndoubles per phenotype, which is 8Gb per phenotype when <script type=\"math/tex\">M=500,000,\nN=400,000, B =1,000,R=5</script> and 200Gb in total when <script type=\"math/tex\">P=25</script>.</p>\n<p>However, the <code>--lowmem</code> option can be used to avoid that memory usage,\nat negligible extra computational cost, by writing temporary files to disk.</p>\n<h3 id=\"threading\">Threading</h3>\n<p><strong>regenie</strong> can take advantage of multiple cores using threading. The\nnumber of threads can be specified using the <code>--threads</code> option.</p>\n<p><strong>regenie</strong> uses the <a href=\"http://eigen.tuxfamily.org/index.php?title=Main_Page\">Eigen library</a> for \nefficient linear algebra operations and this uses threading where possible.</p>\n<p>For PLINK bed/bim/fam files, PLINK2 pgen/pvar/psam files, as well as BGEN v1.2 files with 8-bit encoding (format used for UK Biobank\n500K imputed data), step 2 of <strong>regenie</strong> has been optimized by \nusing multithreading through <a href=\"https://www.openmp.org\">OpenMP</a>.</p>\n<p>When running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and \ninstead parallelize the runs over partitions of the genome (e.g. groups of genes).</p>\n<h3 id=\"for-windows-platforms\">For Windows platforms</h3>\n<p>If you are on a Windows machine, we recommend to use <a href=\"https://docs.microsoft.com/en-us/windows/wsl/install\">Windows Subsystem for Linux</a> (WSL)\nto install a Ubuntu distribution so that you will be able to run REGENIE\nfrom a Linux terminal.\nYou can download pre-compiled REGENIE binaries from the <a href=\"https://github.com/rgcgithub/regenie/releases\">Github repository</a> \n(note that you will need to install the <code>libgomp1</code> library).</p>\n<p>Note: from your Windows command prompt, you can run REGENIE using <code>wsl regenie</code>.</p></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"../js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \"..\"</script>\n    \n    <script src=\"../js/base.js\"></script>\n    <script src=\"../search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n"
  },
  {
    "path": "docs/site/js/base.js",
    "content": "function getSearchTerm() {\n    var sPageURL = window.location.search.substring(1);\n    var sURLVariables = sPageURL.split('&');\n    for (var i = 0; i < sURLVariables.length; i++) {\n        var sParameterName = sURLVariables[i].split('=');\n        if (sParameterName[0] == 'q') {\n            return sParameterName[1];\n        }\n    }\n}\n\n$(document).ready(function() {\n    /**\n     * ------------------------------------------------------------------------\n     * Taken from themes/mkdocs/js/base.js\n     * ------------------------------------------------------------------------\n     */\n    var search_term = getSearchTerm(),\n        $search_modal = $('#mkdocs_search_modal'),\n        $keyboard_modal = $('#mkdocs_keyboard_modal');\n\n    if (search_term) {\n        $search_modal.modal();\n    }\n\n    // make sure search input gets autofocus everytime modal opens.\n    $search_modal.on('shown.bs.modal', function() {\n        $search_modal.find('#mkdocs-search-query').focus();\n    });\n\n    // Close search modal when result is selected\n    // The links get added later so listen to parent\n    $('#mkdocs-search-results').click(function(e) {\n        if ($(e.target).is('a')) {\n            $search_modal.modal('hide');\n        }\n    });\n\n    if (typeof shortcuts !== 'undefined') {\n        // Populate keyboard modal with proper Keys\n        $keyboard_modal.find('.help.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.help];\n        $keyboard_modal.find('.prev.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.previous];\n        $keyboard_modal.find('.next.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.next];\n        $keyboard_modal.find('.search.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.search];\n\n        // Keyboard navigation\n        document.addEventListener(\"keydown\", function(e) {\n            if ($(e.target).is(':input')) return true;\n            var key = e.which || e.key || window.event && window.event.key;\n            var page;\n            switch (key) {\n                case shortcuts.next:\n                    page = $('.navbar a[rel=\"next\"]:first').prop('href');\n                    break;\n                case shortcuts.previous:\n                    page = $('.navbar a[rel=\"prev\"]:first').prop('href');\n                    break;\n                case shortcuts.search:\n                    e.preventDefault();\n                    $keyboard_modal.modal('hide');\n                    $search_modal.modal('show');\n                    $search_modal.find('#mkdocs-search-query').focus();\n                    break;\n                case shortcuts.help:\n                    $search_modal.modal('hide');\n                    $keyboard_modal.modal('show');\n                    break;\n                default:\n                    break;\n            }\n            if (page) {\n                $keyboard_modal.modal('hide');\n                window.location.href = page;\n            }\n        });\n    }\n\n    $('table').addClass('table table-striped table-hover');\n\n    // Improve the scrollspy behaviour when users click on a TOC item.\n    $(\".bs-sidenav a\").on(\"click\", function() {\n        var clicked = this;\n        setTimeout(function() {\n            var active = $('.nav li.active a');\n            active = active[active.length - 1];\n            if (clicked !== active) {\n                $(active).parent().removeClass(\"active\");\n                $(clicked).parent().addClass(\"active\");\n            }\n        }, 50);\n    });\n});\n\n\n/**\n * ------------------------------------------------------------------------\n * Taken from themes/mkdocs/js/base.js\n * ------------------------------------------------------------------------\n */\n\n$('body').scrollspy({\n    target: '.bs-sidebar',\n    offset: 100\n});\n\n/* Prevent disabled links from causing a page reload */\n$(\"li.disabled a\").click(function() {\n    event.preventDefault();\n});\n\n// See https://www.cambiaresearch.com/articles/15/javascript-char-codes-key-codes\n// We only list common keys below. Obscure keys are omited and their use is discouraged.\nvar keyCodes = {\n    8: 'backspace',\n    9: 'tab',\n    13: 'enter',\n    16: 'shift',\n    17: 'ctrl',\n    18: 'alt',\n    19: 'pause/break',\n    20: 'caps lock',\n    27: 'escape',\n    32: 'spacebar',\n    33: 'page up',\n    34: 'page down',\n    35: 'end',\n    36: 'home',\n    37: '&larr;',\n    38: '&uarr;',\n    39: '&rarr;',\n    40: '&darr;',\n    45: 'insert',\n    46: 'delete',\n    48: '0',\n    49: '1',\n    50: '2',\n    51: '3',\n    52: '4',\n    53: '5',\n    54: '6',\n    55: '7',\n    56: '8',\n    57: '9',\n    65: 'a',\n    66: 'b',\n    67: 'c',\n    68: 'd',\n    69: 'e',\n    70: 'f',\n    71: 'g',\n    72: 'h',\n    73: 'i',\n    74: 'j',\n    75: 'k',\n    76: 'l',\n    77: 'm',\n    78: 'n',\n    79: 'o',\n    80: 'p',\n    81: 'q',\n    82: 'r',\n    83: 's',\n    84: 't',\n    85: 'u',\n    86: 'v',\n    87: 'w',\n    88: 'x',\n    89: 'y',\n    90: 'z',\n    91: 'Left Windows Key / Left ⌘',\n    92: 'Right Windows Key',\n    93: 'Windows Menu / Right ⌘',\n    96: 'numpad 0',\n    97: 'numpad 1',\n    98: 'numpad 2',\n    99: 'numpad 3',\n    100: 'numpad 4',\n    101: 'numpad 5',\n    102: 'numpad 6',\n    103: 'numpad 7',\n    104: 'numpad 8',\n    105: 'numpad 9',\n    106: 'multiply',\n    107: 'add',\n    109: 'subtract',\n    110: 'decimal point',\n    111: 'divide',\n    112: 'f1',\n    113: 'f2',\n    114: 'f3',\n    115: 'f4',\n    116: 'f5',\n    117: 'f6',\n    118: 'f7',\n    119: 'f8',\n    120: 'f9',\n    121: 'f10',\n    122: 'f11',\n    123: 'f12',\n    124: 'f13',\n    125: 'f14',\n    126: 'f15',\n    127: 'f16',\n    128: 'f17',\n    129: 'f18',\n    130: 'f19',\n    131: 'f20',\n    132: 'f21',\n    133: 'f22',\n    134: 'f23',\n    135: 'f24',\n    144: 'num lock',\n    145: 'scroll lock',\n    186: '&semi;',\n    187: '&equals;',\n    188: '&comma;',\n    189: '&hyphen;',\n    190: '&period;',\n    191: '&quest;',\n    192: '&grave;',\n    219: '&lsqb;',\n    220: '&bsol;',\n    221: '&rsqb;',\n    222: '&apos;',\n};\n"
  },
  {
    "path": "docs/site/options/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"../img/favicon.ico\">\n\n    \n    <title>Documentation - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"../css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"../custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body>\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"..\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li >\n                        <a href=\"..\">Home</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../overview/\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../install/\">Install</a>\n                    </li>\n                \n                \n                \n                    <li class=\"active\">\n                        <a href=\"./\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../performance/\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../recommendations/\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../faq/\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"prev\" href=\"../install/\">\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"next\" href=\"../performance/\">\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#getting-started\">Getting started</a></li>\n        <li class=\"first-level \"><a href=\"#basic-options\">Basic options</a></li>\n            <li class=\"second-level\"><a href=\"#input\">Input</a></li>\n                \n                <li class=\"third-level\"><a href=\"#genetic-data-file-format\">Genetic data file format</a></li>\n                <li class=\"third-level\"><a href=\"#covariate-file-format\">Covariate file format</a></li>\n                <li class=\"third-level\"><a href=\"#phenotype-file-format\">Phenotype file format</a></li>\n                <li class=\"third-level\"><a href=\"#predictions-file-format\">Predictions file format</a></li>\n            <li class=\"second-level\"><a href=\"#options\">Options</a></li>\n                \n            <li class=\"second-level\"><a href=\"#output\">Output</a></li>\n                \n        <li class=\"first-level \"><a href=\"#gene-based-testing\">Gene-based testing</a></li>\n            <li class=\"second-level\"><a href=\"#input_1\">Input</a></li>\n                \n                <li class=\"third-level\"><a href=\"#annotation-input-files\">Annotation input files</a></li>\n                <li class=\"third-level\"><a href=\"#mask-definitions\">Mask definitions</a></li>\n                <li class=\"third-level\"><a href=\"#skatacat-tests\">SKAT/ACAT tests</a></li>\n                <li class=\"third-level\"><a href=\"#joint-test-for-burden-masks\">Joint test for burden masks</a></li>\n                <li class=\"third-level\"><a href=\"#lovolodo-schemes\">LOVO/LODO schemes</a></li>\n                <li class=\"third-level\"><a href=\"#writing-mask-files\">Writing mask files</a></li>\n            <li class=\"second-level\"><a href=\"#options_1\">Options</a></li>\n                \n            <li class=\"second-level\"><a href=\"#output_1\">Output</a></li>\n                \n            <li class=\"second-level\"><a href=\"#example-run\">Example run</a></li>\n                \n            <li class=\"second-level\"><a href=\"#checking-input-files\">Checking input files</a></li>\n                \n        <li class=\"first-level \"><a href=\"#interaction-testing\">Interaction testing</a></li>\n            <li class=\"second-level\"><a href=\"#options_2\">Options</a></li>\n                \n            <li class=\"second-level\"><a href=\"#output_2\">Output</a></li>\n                \n        <li class=\"first-level \"><a href=\"#conditional-analyses\">Conditional analyses</a></li>\n        <li class=\"first-level \"><a href=\"#survival-analyses\">Survival analyses</a></li>\n            <li class=\"second-level\"><a href=\"#phenotype-file-format_1\">Phenotype file format</a></li>\n                \n            <li class=\"second-level\"><a href=\"#required-options\">Required options</a></li>\n                \n        <li class=\"first-level \"><a href=\"#ld-computation\">LD computation</a></li>\n            <li class=\"second-level\"><a href=\"#output_3\">Output</a></li>\n                \n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"getting-started\">Getting started</h2>\n<p>To run <strong>regenie</strong>, use the command <code>./regenie</code> on the command line,\nfollowed by options and flags as needed.</p>\n<p>To get a full list of options use</p>\n<pre><code>./regenie --help\n</code></pre>\n<p>The directory <code>examples/</code> contains some small example files that are\nuseful when getting started. A test run on a set of binary traits can be achieved by the\nfollowing 2 commands.</p>\n<p>In <strong>Step 1</strong>, the whole genome regression model is fit to the traits, and\na set of genomic predictions are produced as output</p>\n<pre><code>./regenie \\\n  --step 1 \\\n  --bed example/example \\\n  --exclude example/snplist_rm.txt \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin.txt \\\n  --remove example/fid_iid_to_remove.txt \\\n  --bsize 100 \\\n  --bt --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out fit_bin_out\n</code></pre>\n<p>In <strong>Step 2</strong>, a set of imputed SNPs are tested for association using a\nFirth logistic regression model</p>\n<pre><code>./regenie \\\n  --step 2 \\\n  --bgen example/example.bgen \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin.txt \\\n  --remove example/fid_iid_to_remove.txt \\\n  --bsize 200 \\\n  --bt \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred fit_bin_out_pred.list \\\n  --out test_bin_out_firth\n</code></pre>\n<p>One of the output files from these two commands is included in <code>example/test_bin_out_firth_Y1.regenie</code>.</p>\n<h2 id=\"basic-options\">Basic options</h2>\n<h3 id=\"input\">Input</h3>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--bgen, --bed, --pgen</code></td>\n<td>FILE</td>\n<td>Required</td>\n<td>Input genetic data file. Either BGEN file eg. <code>file.bgen</code>, or bed/bim/fam prefix that assumes<code>file.bed</code>, <code>file.bim</code>, <code>file.fam</code> exist, or pgen/pvar/psam prefix that assumes<code>file.pgen</code>, <code>file.pvar</code>, <code>file.psam</code> exist</td>\n</tr>\n<tr>\n<td><code>--sample</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Sample file corresponding to input BGEN file</td>\n</tr>\n<tr>\n<td><code>--bgi</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Index bgi file corresponding to input BGEN file</td>\n</tr>\n<tr>\n<td><code>--ref-first</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>Specify to use the first allele as the reference allele for BGEN or PLINK bed/bim/fam file input [default is to use the last allele as the reference]</td>\n</tr>\n<tr>\n<td><code>--keep</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Inclusion file that lists individuals to retain in the analysis</td>\n</tr>\n<tr>\n<td><code>--remove</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Exclusion file that lists individuals to remove from the analysis</td>\n</tr>\n<tr>\n<td><code>--extract</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Inclusion file that lists IDs of variants to keep</td>\n</tr>\n<tr>\n<td><code>--exclude</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Exclusion file that lists IDs of variants to remove</td>\n</tr>\n<tr>\n<td><code>--extract-or</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Inclusion file that lists IDs of variants to keep regardless of minimum MAC filter</td>\n</tr>\n<tr>\n<td><code>--exclude-or</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Exclusion file that lists IDs of variants to remove unless MAC is above threshold</td>\n</tr>\n<tr>\n<td><code>--phenoFile</code></td>\n<td>FILE</td>\n<td>Required</td>\n<td>Phenotypes file</td>\n</tr>\n<tr>\n<td><code>--phenoCol</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Use for each phenotype you want to include in the analysis</td>\n</tr>\n<tr>\n<td><code>--phenoColList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of phenotypes to include in the analysis</td>\n</tr>\n<tr>\n<td><code>--eventColList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of columns in the phenotype file to include in the analysis that contain the event times</td>\n</tr>\n<tr>\n<td><code>--phenoExcludeList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of phenotypes to ignore from the analysis</td>\n</tr>\n<tr>\n<td><code>--covarFile</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Covariates file</td>\n</tr>\n<tr>\n<td><code>--covarCol</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Use for each covariate you want to include in the analysis</td>\n</tr>\n<tr>\n<td><code>--covarColList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of covariates to include in the analysis</td>\n</tr>\n<tr>\n<td><code>--catCovarList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of categorical covariates to include in the analysis</td>\n</tr>\n<tr>\n<td><code>--covarExcludeList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of covariates to ignore</td>\n</tr>\n<tr>\n<td><code>--pred</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>File containing predictions from Step 1 (see Overview). <strong>This is required for <code>--step 2</code></strong></td>\n</tr>\n<tr>\n<td><code>--tpheno-file</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to use a phenotype file in transposed format (e.g. BED format)</td>\n</tr>\n<tr>\n<td><code>--tpheno-indexCol</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>index of phenotype name column in transposed phenotype file</td>\n</tr>\n<tr>\n<td><code>--tpheno-ignoreCols</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>indexes of columns to ignore in transposed phenotype file</td>\n</tr>\n<tr>\n<td><code>--iid-only</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to specify if header in transposed phenotype file only contains sample IID (assume FID=IID)</td>\n</tr>\n</tbody>\n</table>\n<p>Note: Parameter expansion can be used when specifying phenotypes/covariates (e.g. <code>--covarCol PC{1:10}</code>).\nAlso, multiple files can be specified for <code>--extract/--exclude/--keep/--remove</code> by using a comma-separated list.</p>\n<h4 id=\"genetic-data-file-format\">Genetic data file format</h4>\n<p><strong>regenie</strong> can read BGEN files, bed/bim/fam files or pgen/psam/pvar \nfiles in Step 1 and Step 2.</p>\n<p>The BGEN file format is described\n<a href=\"https://www.well.ox.ac.uk/~gav/bgen_format/\">here</a>.</p>\n<p>The bed/bim/fam file format is described <a href=\"https://www.cog-genomics.org/plink/1.9/formats\">here</a>.</p>\n<p>The pgen/pvar/psam file format is described <a href=\"https://www.cog-genomics.org/plink/2.0/formats#pgen\">here</a>.</p>\n<p>Tools useful for genetic data file format conversion are : <a href=\"http://www.cog-genomics.org/plink/\">PLINK</a>, <a href=\"https://www.well.ox.ac.uk/~gav/qctool/\">QCTOOL</a>, <a href=\"https://samtools.github.io/bcftools/\">BCFTOOLS</a>.</p>\n<p>Step 2 of <strong>regenie</strong> can be sped up by using BGEN files using v1.2 format with 8 bits encoding \n(genotype file can be generated with <a href=\"https://www.cog-genomics.org/plink/2.0/\">PLINK2</a> using \noption <code>--export bgen-1.2 'bits=8'</code>) as well as having an accompanying .bgi index file \n(a useful tool to create such file is bgenix which is part of the BGEN library).</p>\n<p>To include X chromosome genotypes in step 1 and/or step 2, males should be coded as diploid \nso that their genotypes are 0/2 (this is done automatically for BED and PGEN file formats with haploid genotypes).\nChromosome values of 23 (for human analyses), X, Y, XY, PAR1 and PAR2 are all acceptable and \nwill be collapsed into a single chromosome.</p>\n<h5 id=\"sample-inclusionexclusion-file-format\">Sample inclusion/exclusion file format</h5>\n<pre><code>2 2 \n7 7 \n.\n</code></pre>\n<p>No header. Each line starts with individual FID IID. Space/tab separated.</p>\n<p>Samples listed in the file that are not in bgen/bed/pgen file are ignored.</p>\n<h5 id=\"variant-inclusionexclusion-file-format\">Variant inclusion/exclusion file format</h5>\n<pre><code>20\n31\n.\n</code></pre>\n<p>No header. Each line must start with variant ID \n(if there are additional columns, file must be space/tab separated).</p>\n<p>Variants listed in this file that are not in bgen/bed/pgen file are ignored.</p>\n<h4 id=\"covariate-file-format\">Covariate file format</h4>\n<pre><code>FID IID V1 V2 V3\n1 1 1.46837294454993 1.93779743016325 0.152887004505393\n2 2 -1.2234390803815 -1.63408619199948 -0.190201446835255\n3 3 0.0711531925667286 0.0863906292357564 0.14254739715665\n.\n</code></pre>\n<p>Line 1 : Header with FID, IID and <script type=\"math/tex\">C</script> covariate names.</p>\n<p>Followed by lines of <script type=\"math/tex\">C+2</script> values. Space/tab separated.</p>\n<p>Each line contains individual FID and IID followed by <script type=\"math/tex\">C</script> covariate\nvalues.</p>\n<p>Samples listed in this file that are not in bgen/bed/pgen file are ignored.\nGenotyped samples that are not in this file are removed from the analysis as well as\nsamples with missing values at any of the covariates included.</p>\n<p>If <code>--step 2</code> is specified, then the covariate file should be the same\nas that used in Step 1.</p>\n<h4 id=\"phenotype-file-format\">Phenotype file format</h4>\n<pre><code>FID IID Y1 Y2\n1 1 1.64818554321186 2.2765234736685\n2 2 -2.67352013711554 -1.53680421614647\n3 3 0.217542851471485 0.437289912695016\n.\n</code></pre>\n<p>Line 1 : Header with FID, IID and <script type=\"math/tex\">P</script> phenotypes names.</p>\n<p>Followed by lines of <script type=\"math/tex\">P+2</script> values. Space/tab separated. \nEach line contains individual FID and IID followed by P phenotype values\n(for binary traits, must be coded as 0=control, 1=case, NA=missing unless using <code>--1</code>).</p>\n<p>Samples listed in this file that are not in bgen/bed/pgen file are ignored.\nGenotyped samples that are not in this file are removed from the analysis.</p>\n<p>Missing values must be coded as NA.</p>\n<p>With QTs, missing values are mean-imputed in Step 1 and they are dropped when testing each phenotype in Step 2 (unless using <code>--force-impute</code>).</p>\n<p>With BTs, missing values are mean-imputed in Step 1 when fitting the\nlevel 0 linear ridge regression and \nthey are dropped when fitting the level 1 logistic ridge regression for each trait . \nIn Step 2, missing values are dropped when testing each trait.</p>\n<p>To remove all samples that have missing values at <strong>any</strong> of the <script type=\"math/tex\">P</script> phenotypes, use option <code>--strict</code> in Step 1 and 2.</p>\n<p>If using the transposed phenotype file format with option <code>--tpheno-file</code>, \nthe header line must contain subject IDs as \"FID_IID\",\notherwise use option <code>--iid-only</code> and only include IIDs (so will assume FID=IID).</p>\n<h4 id=\"predictions-file-format\">Predictions file format</h4>\n<p>Running <code>--step 1 --out foo</code> will produce</p>\n<ol>\n<li>A set of files containing genomic predictions for each phenotype\n   from Step 1 (see Output section below).</li>\n<li>A file called <code>foo_pred.list</code> listing the locations of the prediction files.</li>\n</ol>\n<p>The file list is needed as an input file when using <code>--step 2</code>\nvia the <code>--pred</code> option. \nIt has one line per phenotype (in any order) that specifies the name of the phenotype and its\ncorresponding prediction file name. \nEach phenotype must have exactly one prediction file and phenotype names \nmust match with those in the phenotype file.\nPhenotypes in this file not included in the analysis are ignored.</p>\n<p>Each prediction file contains the genetic predictions for the phenotype (space separated).</p>\n<p>Line 1 starts with 'FID_IID' followed by $N$ sample identifiers.\nIt is followed by 23 lines containing the genetic predictions for each chromosome \n(sex chromosomes are collapsed into chromosome 23).</p>\n<p>More specifically, each line has $N+1$ values which are the chromosome number followed by the $N$\nleave-one chromosome out (LOCO) predictions for each individual.</p>\n<p>Samples in this file not in the bed/pgen/bgen input file are ignored. Genotyped samples not \npresent in this file will be ignored in the analysis of the corresponding trait. </p>\n<p>Samples with missing LOCO predictions must have their corresponding phenotype value set to missing.</p>\n<h3 id=\"options\">Options</h3>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--step</code></td>\n<td>INT</td>\n<td>Required</td>\n<td>specify step for the regenie run (see Overview) [argument can be <code>1</code> or <code>2</code>]</td>\n</tr>\n<tr>\n<td><code>--qt</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>specify that traits are quantitative (this is the default so can be ommitted)</td>\n</tr>\n<tr>\n<td><code>--bt</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>specify that traits are binary with 0=control,1=case,NA=missing</td>\n</tr>\n<tr>\n<td><code>--t2e</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>specify that traits are time-to-event data with 0=censoring,1=event,NA=missing in event column</td>\n</tr>\n<tr>\n<td><code>-1,--cc12</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>specify to use 1/2/NA encoding for binary traits (1=control,2=case,NA=missing)</td>\n</tr>\n<tr>\n<td><code>--bsize</code></td>\n<td>INT</td>\n<td>Required</td>\n<td>size of the genotype blocks</td>\n</tr>\n<tr>\n<td><code>--cv</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>number of cross validation (CV) folds [default is 5]</td>\n</tr>\n<tr>\n<td><code>--loocv</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to use leave-one out cross validation</td>\n</tr>\n<tr>\n<td><code>--lowmem</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to reduce memory usage by writing level 0 predictions to disk (details below). This is very useful if the number of traits is large (e.g. greater than 10)</td>\n</tr>\n<tr>\n<td><code>--lowmem-prefix</code></td>\n<td>FILE PREFIX</td>\n<td>Optional</td>\n<td>prefix where to temporarily write the level 0 predictions</td>\n</tr>\n<tr>\n<td><code>--split-l0</code></td>\n<td>PREFIX,N</td>\n<td>Optional</td>\n<td>split level 0 across N jobs and set prefix of output files of level 0 predictions</td>\n</tr>\n<tr>\n<td><code>--run-l0</code></td>\n<td>FILE,K</td>\n<td>Optional</td>\n<td>run level 0 for job K in {1..N} specifying the master file created from '--split-l0'</td>\n</tr>\n<tr>\n<td><code>--run-l1</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>run level 1 specifying the master file from '--split-l0'</td>\n</tr>\n<tr>\n<td><code>--l1-phenoList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to specify a subset of phenotypes to analyze when using <code>--run-l1</code></td>\n</tr>\n<tr>\n<td><code>--keep-l0</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>avoid deleting the level 0 predictions written on disk after fitting the level 1 models</td>\n</tr>\n<tr>\n<td><code>--print-prs</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to print whole genome predictions (i.e. PRS) without using LOCO scheme</td>\n</tr>\n<tr>\n<td><code>--force-step1</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to run step 1 when &gt;1M variants are used (not recommened)</td>\n</tr>\n<tr>\n<td><code>--minCaseCount</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>flag to ignore BTs with low case counts [default is 10]</td>\n</tr>\n<tr>\n<td><code>--apply-rint</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to apply Rank Inverse Normal Transformation (RINT) to quantitative phenotypes (use in both Step 1 &amp; 2)</td>\n</tr>\n<tr>\n<td><code>--nb</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>number of blocks (determined from block size if not provided)</td>\n</tr>\n<tr>\n<td><code>--strict</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to removing samples with missing data at any of the phenotypes</td>\n</tr>\n<tr>\n<td><code>--ignore-pred</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>skip reading the file specified by <code>--pred</code> (corresponds to simple linear/logistic regression)</td>\n</tr>\n<tr>\n<td><code>--htp</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to output the summary statistics file in the <a href=\"https://rgcgithub.github.io/remeta/file_formats/#-htp\">HTP</a> format (string should correspond to cohort name, e.g. 'UKB_450_EUR')</td>\n</tr>\n<tr>\n<td><code>--exact-p</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>avoid capping p-values at 2.2E-307 in the HTP format summary statistics output</td>\n</tr>\n<tr>\n<td><code>--use-relative-path</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to use relative paths instead of absolute ones for the step 1 output pred.list file</td>\n</tr>\n<tr>\n<td><code>--use-prs</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to use whole genome PRS in <code>--pred</code> (this is output in step 1 when using <code>--print-prs</code>)</td>\n</tr>\n<tr>\n<td><code>--gz</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to output files in compressed gzip format (LOCO prediction files in step 1 and association results files in step 2) <strong>[this only works when compiling with Boost Iostream library (see Install tab)]</strong>.</td>\n</tr>\n<tr>\n<td><code>--force-impute</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to keep and impute missing observations for QTs in step 2</td>\n</tr>\n<tr>\n<td><code>--write-samples</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to write sample IDs for those kept in the analysis for each trait in step 2</td>\n</tr>\n<tr>\n<td><code>--print-pheno</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to write phenotype name in the first line of the sample ID files when using <code>--write-samples</code></td>\n</tr>\n<tr>\n<td><code>--firth</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>specify to use Firth likelihood ratio test (LRT) as fallback for p-values less than threshold</td>\n</tr>\n<tr>\n<td><code>--approx</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to use approximate Firth LRT for computational speedup (only works when option <code>--firth</code> is used)</td>\n</tr>\n<tr>\n<td><code>--firth-se</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to compute SE based on effect size and LRT p-value when using Firth correction (instead of based on Hessian of unpenalized log-likelihood)</td>\n</tr>\n<tr>\n<td><code>--write-null-firth</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to write the null estimates for approximate Firth [can be used in step 1 or 2]</td>\n</tr>\n<tr>\n<td><code>--compute-all</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to write the null Firth estimates for all chromosomes (regardless of the genotype file)</td>\n</tr>\n<tr>\n<td><code>--use-null-firth</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>to use stored null estimates for approximate Firth in step 2</td>\n</tr>\n<tr>\n<td><code>--spa</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>specify to use Saddlepoint approximation as fallback for p-values less than threshold</td>\n</tr>\n<tr>\n<td><code>--pThresh</code></td>\n<td>FLOAT</td>\n<td>Optional</td>\n<td>P-value threshold below which to apply Firth/SPA correction [default is 0.05]</td>\n</tr>\n<tr>\n<td><code>--test</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>specify to carry out dominant or recessive test [default is additive; argument can be <code>dominant</code> or <code>recessive</code>]</td>\n</tr>\n<tr>\n<td><code>--chr</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>specify which chromosomes to test in step 2 (use for each chromosome to include)</td>\n</tr>\n<tr>\n<td><code>--chrList</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma separated list of chromosomes to test in step 2</td>\n</tr>\n<tr>\n<td><code>--range</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>specify chromosome region for variants to test in step 2 [format=CHR:MINPOS-MAXPOS]</td>\n</tr>\n<tr>\n<td><code>--minMAC</code></td>\n<td>FLOAT</td>\n<td>Optional</td>\n<td>flag to specify the minimum minor allele count (MAC) when testing variants [default is 5]. Variants with lower MAC are ignored.</td>\n</tr>\n<tr>\n<td><code>--minINFO</code></td>\n<td>FLOAT</td>\n<td>Optional</td>\n<td>flag to specify the minimum imputation info score (IMPUTE/MACH R^2) when testing variants. Variants with lower info score are ignored.</td>\n</tr>\n<tr>\n<td><code>--sex-specific</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to perform sex-specific analyses [either 'male'/'female']</td>\n</tr>\n<tr>\n<td><code>--af-cc</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to output A1FREQ in case/controls separately in the step 2 result file</td>\n</tr>\n<tr>\n<td><code>--no-split</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>flag to have summary statistics for all traits output in the same file</td>\n</tr>\n<tr>\n<td><code>--starting-block</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>to start step 2 at a specific block/set number (useful if program crashes during a job)</td>\n</tr>\n<tr>\n<td><code>--nauto</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>number of autosomal chromosomes (for non-human studies) [default is 22]</td>\n</tr>\n<tr>\n<td><code>--maxCatLevels</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>maximum number of levels for categorical covariates (for non-human studies) [default is 10]</td>\n</tr>\n<tr>\n<td><code>--niter</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>maximum number of iterations for logistic regression [default is 30]</td>\n</tr>\n<tr>\n<td><code>--maxstep-null</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>maximum step size for logistic model with Firth penalty under the null [default is 25]</td>\n</tr>\n<tr>\n<td><code>--maxiter-null</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>maximum number of iterations for logistic model with Firth penalty under the null [default is 1000]</td>\n</tr>\n<tr>\n<td><code>--par-region</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>specify build code to determine bounds for PAR1/PAR2 regions (can be 'b36/b37/b38/hg18/hg19/hg38' or 'start,end' bp bounds of non-PAR region) [default is hg38]</td>\n</tr>\n<tr>\n<td><code>--force-qt</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>force QT run for binary traits</td>\n</tr>\n<tr>\n<td><code>--threads</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>number of computational threads to use [default=all-1]</td>\n</tr>\n<tr>\n<td><code>--debug</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>debug flag (for use by developers)</td>\n</tr>\n<tr>\n<td><code>--verbose</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>verbose screen output</td>\n</tr>\n<tr>\n<td><code>--version</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>print version number and exit</td>\n</tr>\n<tr>\n<td><code>--help</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>Prints usage and options list to screen</td>\n</tr>\n</tbody>\n</table>\n<p>When step 1 of <strong>regenie</strong> is run in low memory mode (i.e. using <code>--lowmem</code>), \ntemporary files are created on disk (using <code>--lowmem-prefix tmp_prefix</code> determines \nwhere the files are written [as in <code>tmp_prefix_l0_Y1</code>,...,<code>tmp_prefix_l0_YP</code> \nfor P phenotypes]). If the prefix is not specified, the default is to use the \nprefix specified by <code>--out</code> (see below).\nThese are automatically deleted at the end of the program (unless the run\nwas not successful in which case the user would need to delete the files)</p>\n<p>See the <a href=\"https://github.com/rgcgithub/regenie/wiki/Further-parallelization-for-level-0-models-in-Step-1\">Wiki page</a> for more details on how to run the level 0 models for Step 1 \nof <strong>regenie</strong> in parallel.</p>\n<h3 id=\"output\">Output</h3>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--out</code></td>\n<td>FILE PREFIX</td>\n<td>Required</td>\n<td>Output files that depends on <code>--step</code></td>\n</tr>\n</tbody>\n</table>\n<p>A log file <code>file.log</code> of the output is generated.</p>\n<p><strong>Using <code>--step 1 --out file</code></strong></p>\n<p>For the <script type=\"math/tex\">P</script> phenotypes, files <code>file_1.loco</code>,...,<code>file_P.loco</code> are output with the\nper-chromosome LOCO predictions as rows of the files \n(following the order of the phenotypes in the phenotype file header). \nIf option <code>--gz</code> was used, the files will be compressed in gzip format and have extension <code>.loco.gz</code>.</p>\n<p>Genotyped individuals specified using option <code>--remove</code> are excluded from this file. \nIndividuals with missing phenotype values kept in the analysis \nare included in the file and have their predictions set to missing.</p>\n<p>The list of blup files needed for step 2 (association testing) is written to  <code>file_pred.list</code>.</p>\n<p>If using <code>--print-prs</code>, files <code>file_1.prs</code>,...,<code>file_P.prs</code> will be written with the \nwhole genome predictions (i.e. PRS) without using LOCO scheme (similar format as the .loco files).\nThe list of these files is written to <code>file_prs.list</code> and can be used in step 2 with <code>--pred</code> and \nspecifying flag <code>--use-prs</code>. Note that as these are not obtained using a LOCO scheme, \nassociation tests could suffer from proximal contamination.</p>\n<p>If using option <code>--write-null-firth</code>, the estimates for approximate Firth under the null will be written to files\n<code>file_1.firth,...,file_P.firth</code> and the list of these files is written to <code>file_firth.list</code>. This can be\nused in step 2 as <code>--use-null-firth file_firth.list</code>. Note that it assumes the same set of covariates are \nused in Step 1 and 2.</p>\n<p><strong>Using<code>--step 2 --out file</code></strong> </p>\n<p>By default, results are written in separate files for\neach phenotype\n<code>file_&lt;phenotype1_name&gt;.regenie,...,file_&lt;phenotypeP_name&gt;.regenie</code>.\nEach file has one line per\nSNP along with a header line.\nIf option <code>--gz</code> was used, the files will be compressed in gzip format and have extension <code>.regenie.gz</code>.</p>\n<p>The entries of each row specify chromosome, position, ID, reference allele (allele 0), \nalternative allele (allele 1), frequency of the alternative allele, sample size and the test performed \n(additive/dominant/recessive).\nWith BGEN/PGEN files with dosages, the imputation INFO score is provided \n(IMPUTE info score for BGEN and Mach Rsq for PGEN).\nAllele frequency, sample size and INFO score, if applicable, are computed using only\nnon-missing samples for each phenotype.</p>\n<p>These are followed by the estimated effect sizes (for allele 1 on the original scale), standard errors, chi-square test statistics \nand <script type=\"math/tex\">-\\log_{10}</script> p-value. An additional column is included to specify if Firth/SPA corrections failed.</p>\n<p>With option <code>--no-split</code>, the summary statistics for all traits are written to a single file <code>file.regenie</code>,\nwith the same format as above. Additionaly, an accompanying file with the trait names corresponding to Y1,Y2,... \nwill be generated in ‘file.regenie.Ydict’. Note that allele frequency, sample size and INFO score are computed using\nall analyzed samples.</p>\n<p>With option <code>--htp</code>, the summary statistics file will follow the <a href=\"https://rgcgithub.github.io/remeta/file_formats/#-htp\">HTP</a> format.</p>\n<p>If option <code>--write-samples</code> was used, IDs of samples used for each trait will be written in files\n<code>file_&lt;phenotype1_name&gt;.regenie.ids,...,file_&lt;phenotypeP_name&gt;.regenie.ids</code> (tab separated, no header).</p>\n<p>When using <code>--par-region</code>, the default boundaries used for the chrX PAR regions are:</p>\n<ul>\n<li>b36/hg18: 2709520 and 154584238</li>\n<li>b37/hg19: 2699520 and 154931044</li>\n<li>b38/hg38: 2781479 and 155701383</li>\n</ul>\n<h2 id=\"gene-based-testing\">Gene-based testing</h2>\n<p>Starting from version 3.0, Step 2 of <strong>regenie</strong> provides a complimentary set of gene-based test \nin addition to the burden testing functionality introduced in version 2.0.\nMore specifically, for a given set of variants (eg within a gene) which can be defined using functional annotations,\n<strong>regenie</strong> can apply various set-based tests on the variants as well as collapse them into a single combined 'mask' genotype \nthat can be tested for association just like a single variant. </p>\n<h3 id=\"input_1\">Input</h3>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--anno-file</code></td>\n<td>FILE</td>\n<td>Required</td>\n<td>File with variant annotations for each set</td>\n</tr>\n<tr>\n<td><code>--set-list</code></td>\n<td>FILE</td>\n<td>Required</td>\n<td>File listing variant sets</td>\n</tr>\n<tr>\n<td><code>--extract-sets</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Inclusion file that lists IDs of variant sets to keep</td>\n</tr>\n<tr>\n<td><code>--exclude-sets</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>Exclusion file that lists IDs of variant sets to remove</td>\n</tr>\n<tr>\n<td><code>--extract-setlist</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma-separated list of variant sets to keep</td>\n</tr>\n<tr>\n<td><code>--exclude-setlist</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>Comma-separated list of variant sets to remove</td>\n</tr>\n<tr>\n<td><code>--aaf-file</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>File with variant AAF to use when building masks (instead of AAF estimated from sample)</td>\n</tr>\n<tr>\n<td><code>--mask-def</code></td>\n<td>FILE</td>\n<td>Required</td>\n<td>File with mask definitions using the annotations defined in <code>--anno-file</code></td>\n</tr>\n</tbody>\n</table>\n<p>Note: multiple files can be specified for <code>--extract-sets/--exclude-sets</code> by using a comma-separated list.</p>\n<h4 id=\"annotation-input-files\">Annotation input files</h4>\n<p>The following files are used to define variant sets and \nfunctional annotations which will be used to generate masks.</p>\n<h5 id=\"annotation-file\">Annotation file</h5>\n<pre><code class=\"language-bash\">1:55039839:T:C PCSK9 LoF\n1:55039842:G:A PCSK9 missense\n.\n</code></pre>\n<p>This file defines functional annotations for variants.\nIt is designed to accommodate for variants with \nseparate annotations for different sets/genes.</p>\n<p>Each line contains the variant name, the set/gene name and a single annotation category \n(space/tab separated). </p>\n<p>Variants not in this file will be assigned to a default \"NULL\" category. A maximum of 63 annotation \ncategories (+NULL category) is allowed.</p>\n<p>For gene sets, tools you can use to obtain variant annotations per transcripts are \n<a href=\"https://pcingola.github.io/SnpEff/se_introduction/\">snpEFF</a> or \n<a href=\"https://www.ensembl.org/info/docs/tools/vep/index.html\">VEP</a>.\nTo obtain a single annotation per gene, you could choose the most deleterious\nfunctional annotation across the gene transcripts or alternatively\nuse the canonical transcript (note that its definition can vary across software).</p>\n<p>We have implemented an extended 4-column format of the annotation file which\nalso categorizes sets into domains (e.g. for gene sets, these would correspond to gene domains).</p>\n<pre><code class=\"language-bash\">1:55039839:T:C PCSK9 Prodomain LoF\n1:55039842:G:A PCSK9 Prodomain missense\n.\n</code></pre>\n<p>Masks will be generated for each domain \n(maximum of 8 per set/gene) in addition \nto a mask combining across all domains.\nVariants can only be assigned to a single domain for each set/gene.</p>\n<p>Starting with v4.1, you can also specify custom variant weights which will be used in the burden, SKAT/SKAT-O and ACAT-V tests ($w_i$'s in the <a href=\"../overview/#step-2-gene-based-testing\">gene-based testing overview</a>). Multiple weights can be included in the annotation file after the 3rd column, e.g.</p>\n<pre><code class=\"language-bash\">1:55039839:T:C PCSK9 LoF 0.9 0.812 1\n1:55039842:G:A PCSK9 missense 0.4 0.23 0.55\n.\n</code></pre>\n<p>Using <code>--weights-col 4</code> will use weights in the 4-th column for the gene-based tests.</p>\n<h5 id=\"set-list-file\">Set list file</h5>\n<p>This file lists variants within each set/gene to use when \nbuilding masks. \nEach line contains the set/gene name followed by a chromosome and physical position for the set/gene,\nthen by a comma-separated list of variants included in the set/gene.</p>\n<pre><code class=\"language-bash\">A1BG 19  58346922  19:58346922:C:A,19:58346924:G:A,...\nA1CF 10  50806630  10:50806630:A:G,10:50806630:A:AT,...\n.\n</code></pre>\n<h5 id=\"set-inclusionexclusion-file-format\">Set inclusion/exclusion file format</h5>\n<p>The file must have a single column of set/gene names corresponding to those in the \nset list file.</p>\n<pre><code class=\"language-bash\">PIGP\nZBTB38\n.\n</code></pre>\n<h5 id=\"aaf-file-optional\">AAF file (optional)</h5>\n<p>Both functional annotations and alternative allele frequency (AAF) cutoffs \nare used when building masks (e.g. only considering LoF\nsites where AAF is below 1%). \nBy default, the AAF for each variant is computed from the sample but\nalternatively, the user can specify variant AAFs using this file.</p>\n<p>Each line contains the variant name followed by its AAF \n(it should be for the ALT allele used in the genetic data input). \nAAF must be a numerical value (i.e. it cannot be '.').</p>\n<pre><code class=\"language-bash\">7:6187101:C:T 1.53918207864341e-05\n7:6190395:C:A 2.19920388819247e-06\n.\n</code></pre>\n<p>Since singleton variants cannot be identified from this file, they are determined by default\nbased on the input genetic data. To enforce which sites should be included in the singleton masks\n(see <code>--set-singletons</code>), you can add a third column in the file with a binary indicator \n(1=singleton; 0=not singleton). So only variants which are specified as singletons will be \nconsidered for the singleton masks, regardless of whether they are singletons in the input genetic data.\nNote that with this flag, singleton sites will be included in all masks (regardless of the AAF in file).</p>\n<pre><code class=\"language-bash\">7:6187101:C:T 1.53918207864341e-05 0\n7:6190395:C:A 2.19920388819247e-06 1\n.\n</code></pre>\n<h4 id=\"mask-definitions\">Mask definitions</h4>\n<h5 id=\"mask-file\">Mask file</h5>\n<p>This file specifies which annotation categories should be combined into masks. \nEach line contains a mask name followed by a comma-seperated list \nof categories included in the mask (i.e. union is taken over categories).</p>\n<p>For example below, Mask1 uses only LoF variants and \nMask2 uses LoF and missense annotated variants.</p>\n<pre><code class=\"language-bash\">Mask1 LoF\nMask2 LoF,missense\n.\n</code></pre>\n<h5 id=\"aaf-cutoffs\">AAF cutoffs</h5>\n<p>Option <code>--aaf-bins</code> specifies the AAF upper bounds used to generate burden masks \n(<strong>AAF and not MAF [minor allele frequency] is used when deciding which variants go into a mask)</strong>.\nBy default, a mask based on singleton sites are always included.</p>\n<p>For example, <code>--aaf-bins 0.01,0.05</code> will generate 3 burden masks for AAFs in \n[0,0.01], [0,0.05] and singletons.</p>\n<h4 id=\"skatacat-tests\">SKAT/ACAT tests</h4>\n<p>The option <code>--vc-tests</code> is used to specify the gene-based tests to run. \nBy default, these tests use all variants in each mask category. \nIf you'd like to only include variants whose AAF is below a given threshold \n,e.g. only including rare variants, you can use <code>--vc-maxAAF</code>.</p>\n<table>\n<thead>\n<tr>\n<th align=\"left\">Test</th>\n<th align=\"left\">Name in <strong>regenie</strong></th>\n<th align=\"left\">Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td align=\"left\">SKAT</td>\n<td align=\"left\">skat</td>\n<td align=\"left\">Variance component test</td>\n</tr>\n<tr>\n<td align=\"left\">SKATO</td>\n<td align=\"left\">skato</td>\n<td align=\"left\">Omnibus test combining features of SKAT and Burden</td>\n</tr>\n<tr>\n<td align=\"left\">SKATO-ACAT</td>\n<td align=\"left\">skato-acat</td>\n<td align=\"left\">Same as SKATO but using Cauchy combination method to maximize power across SKATO models</td>\n</tr>\n<tr>\n<td align=\"left\">ACATV</td>\n<td align=\"left\">acatv</td>\n<td align=\"left\">Test using Cauchy combination method to combine single-variant p-values</td>\n</tr>\n<tr>\n<td align=\"left\">ACATO</td>\n<td align=\"left\">acato</td>\n<td align=\"left\">Omnibus test combining features of ACATV, SKAT and Burden</td>\n</tr>\n<tr>\n<td align=\"left\">ACATO-FULL</td>\n<td align=\"left\">acato-full</td>\n<td align=\"left\">Same as ACATO but using the larger set of SKATO models used in the SKATO test</td>\n</tr>\n</tbody>\n</table>\n<p>For example, <code>--vc-tests skato,acato-full</code> will run SKATO and ACATO \n(both using the default grid of 8 <code>rho</code> values for the SKATO models) and \nthe p-values for SKAT, SKATO, ACATV and ACATO will be output.</p>\n<p>Ultra-rare variants (defined by default as MAC$\\le$10, see <code>--vc-MACthr</code>) are collapsed into\na burden mask which is then included in the tests instead of the individual variants.</p>\n<p>For additional details on the tests, <a href=\"../overview/#step-2-gene-based-testing\">see here</a>.</p>\n<h4 id=\"joint-test-for-burden-masks\">Joint test for burden masks</h4>\n<p>The following tests can be used to combine different burden masks \ngenerated using different annotation classes as well as AAF thresholds.</p>\n<table>\n<thead>\n<tr>\n<th align=\"left\">Test</th>\n<th align=\"center\">Name in <strong>regenie</strong></th>\n<th align=\"center\">QT</th>\n<th align=\"center\">BT</th>\n<th align=\"center\">Robust to LD</th>\n<th align=\"center\">Assumes same effect direction</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td align=\"left\">Minimum P-value</td>\n<td align=\"center\">minp</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\times$</td>\n<td align=\"center\">$\\times$</td>\n</tr>\n<tr>\n<td align=\"left\">ACAT</td>\n<td align=\"center\">acat</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\times$</td>\n</tr>\n<tr>\n<td align=\"left\">SBAT</td>\n<td align=\"center\">sbat</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\times$</td>\n<td align=\"center\">$\\checkmark$</td>\n<td align=\"center\">$\\checkmark$</td>\n</tr>\n</tbody>\n</table>\n<p>The ACAT test combines the p-values of the individual burden masks using the Cauchy combination method \n(see ref. 14 <a href=\"../overview/#fn:14\">here</a>).\nThe SBAT test is described into more detail <a href=\"../overview/#sparse-burden-association-test\">here</a>.</p>\n<p>If you only want to output the results for the joint tests (ignore the marginal tests), use <code>--joint-only</code>.</p>\n<h4 id=\"lovolodo-schemes\">LOVO/LODO schemes</h4>\n<p>The leave-one-variant-out (LOVO) scheme takes all sites going into a mask,\nand builds LOVO masks \nby leaving out one variant at a time from the full set of sites. \nThe mask including all sites will also be computed.</p>\n<p>The argument for <code>--mask-lovo</code> is a comma-separated list which \nconsists of \nthe set/gene name, \nthe mask name, \nand the AAF cutoff (either 'singleton' or a double in (0,1)).</p>\n<p>If using a 4-column annotation file, then <code>--mask-lovo</code> should have \nthe gene name, \nthe domain name,\nthe mask name, \nand the AAF cutoff.\nSo the LOVO masks will be generated for a specific gene domain.</p>\n<p>The leave-one-domain-out (LODO) scheme (specified by <code>--mask-lodo</code>) \ntakes all sites going into a mask and builds a LODO mask for each domain specified for the gene\nby excluding all variants in the domain. \nThe full mask including all sites will also be computed. \nThe argument for <code>--mask-lodo</code> should have the gene name, the mask name and the AAF cutoff.</p>\n<h4 id=\"writing-mask-files\">Writing mask files</h4>\n<p>Burden masks built in <strong>regenie</strong> can be written to PLINK bed format. \nIf the input genetic data contains dosages, \nthe masks dosages will be converted to hard-calls prior to being written to file \nand these hard-calls will be used for the association testing.</p>\n<p>The PLINK bed file is written using 'ref-last' encoding (i.e. REF allele is \nlisted last in the bim file).</p>\n<p>Note that this cannot be used with the LOVO/LODO schemes.</p>\n<h3 id=\"options_1\">Options</h3>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--aaf-bins</code></td>\n<td>FLOAT,...,FLOAT</td>\n<td>Optional</td>\n<td>comma-separated list of AAF upper bounds to use when building masks [default is a single cutoff of 1%]</td>\n</tr>\n<tr>\n<td><code>--build-mask</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>build masks using the maximum number of ALT alleles across sites (<code>'max'</code>; the default), or the sum of ALT alleles (<code>'sum'</code>), or thresholding the sum to 2 (<code>'comphet'</code>)</td>\n</tr>\n<tr>\n<td><code>--singleton-carrier</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to define singletons as variants with a single carrier in the sample (rather than alternative allele count=1)</td>\n</tr>\n<tr>\n<td><code>--set-singletons</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to use 3rd column in AAF file to specify variants included in singleton masks</td>\n</tr>\n<tr>\n<td><code>--write-mask</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>write mask to PLINK bed format <strong>(does not work when building masks with 'sum')</strong></td>\n</tr>\n<tr>\n<td><code>--vc-tests</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>comma-separated list of SKAT/ACAT-type tests to run</td>\n</tr>\n<tr>\n<td><code>--vc-maxAAF</code></td>\n<td>FLOAT</td>\n<td>Optional</td>\n<td>AAF upper bound to use for SKAT/ACAT-type tests [default is 100%]</td>\n</tr>\n<tr>\n<td><code>--skat-params</code></td>\n<td>FLOAT,FLAT</td>\n<td>Optional</td>\n<td>a1,a2 values for the single variant weights computed from Beta(MAF,a1,a2) used in SKAT/ACAT-type tests [default is (1,25)]</td>\n</tr>\n<tr>\n<td><code>--skato-rho</code></td>\n<td>FLOAT,...,FLOAT</td>\n<td>Optional</td>\n<td>comma-separated list of $\\rho$ values used for SKATO models</td>\n</tr>\n<tr>\n<td><code>--vc-MACthr</code></td>\n<td>FLOAT</td>\n<td>Optional</td>\n<td>MAC threshold below which to collapse variants in SKAT/ACAT-type tests [default is 10]</td>\n</tr>\n<tr>\n<td><code>--joint</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>comma-separated list of joint tests to apply on the generated burden masks</td>\n</tr>\n<tr>\n<td><code>--rgc-gene-p</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to compute the GENE_P test</td>\n</tr>\n<tr>\n<td><code>--skip-test</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to skip computing association tests after building masks and writing them to file</td>\n</tr>\n<tr>\n<td><code>--mask-lovo</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to perform LOVO scheme</td>\n</tr>\n<tr>\n<td><code>--lovo-snplist</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>File with list of variants for which to compute LOVO masks</td>\n</tr>\n<tr>\n<td><code>--mask-lodo</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to perform LODO scheme</td>\n</tr>\n<tr>\n<td><code>--weights-col</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>column index (1-based) in annotation file to use custom weights in gene-based tests</td>\n</tr>\n<tr>\n<td><code>--write-mask-snplist</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to write list of variants that went into each mask to file</td>\n</tr>\n<tr>\n<td><code>--check-burden-files</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to check the concordance between annotation, set list and mask files [see <a href=\"https://rgcgithub.github.io/regenie/options/#checking-input-files\">below</a>]</td>\n</tr>\n<tr>\n<td><code>--strict-check-burden</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to exit early if the annotation, set list and mask definition files dont agree [see <a href=\"https://rgcgithub.github.io/regenie/options/#checking-input-files\">below</a>]</td>\n</tr>\n</tbody>\n</table>\n<p>Three rules can be used to build masks with <code>--build-mask</code> as shown in diagram below, \nwhere the last rule <code>comphet</code> applies a threshold of 2 to the mask from the <code>sum</code> rule.</p>\n<p><img alt=\"MaskRules\" src=\"../img/mask_rules.png\" style=\"text-align:center;padding: 10px;width:60%;display: block;margin-left: auto;margin-right: auto\" /></p>\n<h3 id=\"output_1\">Output</h3>\n<p><strong>With <code>--out file</code></strong></p>\n<p>Results are written in separate files for each phenotype\n<code>file_&lt;phenotype1_name&gt;.regenie,...,file_&lt;phenotypeP_name&gt;.regenie</code> \nwith the same output format mentioned <a href=\"https://rgcgithub.github.io/regenie/options/#output\">above</a>.\nAdditionally, a header line is included (starting with <code>##</code>)\nwhich contains mask definition information.</p>\n<p>Masks will have name <code>&lt;set_name&gt;.&lt;mask_name&gt;.&lt;AAF_cutoff&gt;</code> with the \nchromosome and physical position having been defined in the set list file, \nand the reference allele being <code>ref</code>, and the alternate allele corresponding to \n<code>&lt;mask_name&gt;.&lt;AAF_cutoff&gt;</code>.\nWhen using <code>--mask-lovo</code>, the mask name will be the same as above but have suffix\n<code>_&lt;variant_name&gt;</code> to specify the variant which was excluded when building the mask.</p>\n<p>With <code>--build-mask sum</code>, the reported mask AAF corresponds to the average \nAAF across sites included in the mask.</p>\n<p>If using <code>--write-mask</code>, the masks will be saved to \n<code>file_masks.{bed,bim,fam}</code> and if using <code>--write-mask-snplist</code>, \nthe list of variants included in each mask will be saved to <code>file_masks.snplist</code>. </p>\n<p>When using <code>--rgc-gene-p</code>, it will apply the single p-value per gene GENE_P strategy \nusing all masks (see <a href=\"../overview/#gene_p\">here</a> for details).</p>\n<h3 id=\"example-run\">Example run</h3>\n<p>Using Step 1 results from the <a href=\"https://rgcgithub.github.io/regenie/options/#getting-started\">Step 1 command above</a>, we use the following command to build and test masks in Step 2</p>\n<pre><code>./regenie \\\n  --step 2 \\\n  --bed example/example_3chr \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin.txt \\\n  --bt \\\n  --remove example/fid_iid_to_remove.txt \\\n  --firth --approx \\\n  --pred fit_bin_out_pred.list \\\n  --anno-file example/example_3chr.annotations \\\n  --set-list example/example_3chr.setlist \\\n  --mask-def example/example_3chr.masks \\\n  --aaf-bins 0.1,0.05 \\\n  --write-mask \\\n  --bsize 200 \\\n  --out test_bin_out_firth\n</code></pre>\n<p>For each set, this will produce masks using 3 AAF cutoffs (singletons, 5% and 10% AAF). \nThe masks are written to PLINK bed file (in <code>test_bin_out_firth_masks.{bed,bim,fam}</code>) \nand tested for association with each binary trait using Firth approximate test \n(summary stats in <code>test_bin_out_firth_&lt;phenotype_name&gt;.regenie</code>). \nNote that the test uses the whole genome regression LOCO PRS from Step 1 of <strong>regenie</strong> (specified by <code>--pred</code>).</p>\n<h3 id=\"checking-input-files\">Checking input files</h3>\n<p>To assess the concordance between the input files for building masks, you can use  <code>--check-burden-files</code> which will generate a report in <code>file_masks_report.txt</code> containing:</p>\n<ol>\n<li>\n<p>for each set, the list the variants in the set-list file which are unrecognized (not genotyped \nor not present in annotation file for the set)</p>\n</li>\n<li>\n<p>for each mask, the list of annotations in the mask definition file which are not in the annotation file</p>\n</li>\n</ol>\n<p>Additionally, you can use <code>--strict-check-burden</code> to\nenforce full agreement between the three files \n(if not, program will terminate) :</p>\n<ol>\n<li>\n<p>all genotyped variants in the set list file must be in the annotation file (for the corresponding set)</p>\n</li>\n<li>\n<p>all annotations in the mask definition file must be present in the annotation file</p>\n</li>\n</ol>\n<h2 id=\"interaction-testing\">Interaction testing</h2>\n<p>Starting from <strong>regenie</strong> v3.0, you can perform scans for interactions (either GxE or GxG). \nFor GxE tests, the interacting variable should be part of the covariate file \n(if it is categorical, specify it in <code>--catCovarList</code>).\nFor GxG tests, the interacting variant can be part of the input genetic file \nor it can be present in an external file (see <code>--interaction-snp-file</code>)</p>\n<h3 id=\"options_2\">Options</h3>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--interaction</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to run GxE test specifying the interacting covariate (see below)</td>\n</tr>\n<tr>\n<td><code>--interaction-snp</code></td>\n<td>STRING</td>\n<td>Optional</td>\n<td>to run GxG test specifying the interacting variant (see below)</td>\n</tr>\n<tr>\n<td><code>--interaction-file</code></td>\n<td>FORMAT,FILE</td>\n<td>Optional</td>\n<td>external genotype file containing the interacting variant [FORMAT can be bed/bgen/pgen and FILE is the file name (bgen) or file prefix (bed/pgen)]</td>\n</tr>\n<tr>\n<td><code>--interaction-file-sample</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>accompagnying sample file for BGEN format</td>\n</tr>\n<tr>\n<td><code>--interaction-file-reffirst</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>use the first allele as the reference for BGEN or PLINK BED formats</td>\n</tr>\n<tr>\n<td><code>--no-condtl</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to print out all the main effects from the interaction model (see Output section below)</td>\n</tr>\n<tr>\n<td><code>--force-condtl</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>to include the interacting SNP as a covariate in the marginal test (see Output section below)</td>\n</tr>\n<tr>\n<td><code>--rare-mac</code></td>\n<td>FLOAT</td>\n<td>Optional</td>\n<td>minor allele count (MAC) threshold below which to use HLM method for QTs [default is 1000]</td>\n</tr>\n</tbody>\n</table>\n<p>For GxE tests where the interacting variable is categorical, you can specify the baseline level using <code>--interaction VARNAME[BASE_LEVEL]</code> (e.g. <code>--interaction BMI[&lt;25]</code>). Otherwise, the first value found in the covariate file will be used as the baseline level.</p>\n<p>For GxG tests, the default coding for the interacting variant is additive. If you would like to use dominant/recessive/categorical coding, use <code>--interaction-snp SNP_NAME[dom/rec/cat]</code> (for example with dominant coding, <code>--interaction-snp SNPNAME[dom]</code> will allow for separate effects between carriers vs non-carriers of the interacting variant). The allowed values in the brackets are <code>add/dom/rec/cat</code>.</p>\n<!---\nforce-ltco : to use a Leave-Two-Chromosome-Out (LTCO) scheme specifying the chromosome to remove from the LOCO PRS of Step 1\n--->\n\n<h3 id=\"output_2\">Output</h3>\n<p>The result files will contain multiple lines for the same variant corresponding to the\ndifferent null hypotheses being tested in the <a href=\"../overview/#step-2-interaction-testing\">interaction model</a>\n<script type=\"math/tex; mode=display\">\ng(\\mu) = E\\alpha + G\\beta + (G\\odot E)\\gamma\n</script>\n</p>\n<p>The suffix in the \"TEST\" column indicates which hypothesis is being tested:</p>\n<ul>\n<li>\"ADD\": marginal test where the interacting variable has <strong>not</strong> been added as a covariate $-$ this corresponds to $H_0: \\beta = 0$ given $\\alpha=\\gamma = 0$<ul>\n<li>this is only printed for GxG tests by default, or GxE using <code>--no-condtl</code></li>\n</ul>\n</li>\n<li>\"ADD-CONDTL\": marginal test where the interacting variable has been added as a covariate (default for GxE tests) $-$ this corresponds to $H_0: \\beta = 0$ given $\\gamma = 0$<ul>\n<li>this is only printed for GxE tests by default, or GxG using <code>--force-condtl</code></li>\n</ul>\n</li>\n<li>\"ADD-INT_VAR\": test for the main effect of the interaction variable (\"VAR\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\alpha = 0$<ul>\n<li>this is only printed for GxG tests by default, or GxE using <code>--no-condtl</code></li>\n<li>If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \"ADD-INT_BMI=25-30\" and \"ADD-INT_BMI=30+\" where baseline level is \"$&lt;$25\")</li>\n<li>will also output the effect of $E^2$ in \"ADD-INT_VAR^2\" if the trait is binary (see <a href=\"../overview/#step-2-interaction-testing\">here</a>)</li>\n</ul>\n</li>\n<li>\"ADD-INT_SNP\": test for main effect of tested SNP in the interaction model $-$ this corresponds to $H_0: \\beta = 0$</li>\n<li>\"ADD-INT_SNPxVAR\": test for interaction effect (\"VAR\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\gamma = 0$<ul>\n<li>If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \"ADD-INT_SNPxBMI=25-30\" and \"ADD-INT_SNPxBMI=30+\" where baseline level is \"$&lt;$25\")<ul>\n<li>With Firth correction, only the effect sizes for the interaction effect at each level will be reported and the LRT p-value will only be computed for the joint test on the interaction effects</li>\n</ul>\n</li>\n</ul>\n</li>\n<li>\"ADD-INT_$k$DF\": joint test for main and interaction effect of tested variant ($k\\ge2$ for categorical interacting variables) $-$ this corresponds to $H_0: \\beta = \\gamma = 0$</li>\n</ul>\n<h2 id=\"conditional-analyses\">Conditional analyses</h2>\n<p>Starting from <strong>regenie</strong> v3.0, you can specify genetic variants to add to the set of covariates when performing association testing. \nThis works in both step 1 and 2, and can be used in conjunction with the gene-based tests or the interactiong testing feature.\nThe conditioning variants will automatically be ignored from the analysis.</p>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--condition-list</code></td>\n<td>FILE</td>\n<td>Required</td>\n<td>file with list of variants to condition on</td>\n</tr>\n<tr>\n<td><code>--condition-file</code></td>\n<td>FORMAT,FILE</td>\n<td>Optional</td>\n<td>get conditioning variants from external file (same argument format as <code>--interaction-file</code>)</td>\n</tr>\n<tr>\n<td><code>--condition-file-sample</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>accompagnying sample file for BGEN format</td>\n</tr>\n<tr>\n<td><code>--max-condition-vars</code></td>\n<td>INT</td>\n<td>Optional</td>\n<td>maximum number of conditioning variants [default is 10,000]</td>\n</tr>\n</tbody>\n</table>\n<h2 id=\"survival-analyses\">Survival analyses</h2>\n<p>Starting from <strong>regenie</strong> v4.0, you can conduct survival analysis for time-to-event data. </p>\n<h3 id=\"phenotype-file-format_1\">Phenotype file format</h3>\n<p>In this small example, there are 5 samples, and the event of interest is the diagnosis of cancer over a period of 10 years.</p>\n<p><img alt=\"Survival_eg\" src=\"../img/survival_eg.png\" /></p>\n<p>Sample 1 is diagnosed with cancer during the study; the <code>time</code> variable is the number of years until the sample is diagnosed with cancer. Sample 2 drops out of the study; sample 3 dies during the study; sample 4 and 5 complete the study without being diagnosed with cancer; they are all right-censored, and the <code>time</code> variable is the last encounter or death time. The corresponding phenotype file is </p>\n<pre><code>FID IID Time Cancer\n1 1 6 1\n2 2 5 0\n3 3 2 0\n4 4 10 0\n5 5 10 0\n</code></pre>\n<h3 id=\"required-options\">Required options</h3>\n<p>Survival analysis in <strong>regenie</strong> requires the following specific options in step 1, step 2 and gene-based burden tests.</p>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--t2e</code></td>\n<td>FLAG</td>\n<td>Required</td>\n<td>specify the traits are time-to-event data</td>\n</tr>\n<tr>\n<td><code>--phenoColList</code></td>\n<td>STRING</td>\n<td>Required</td>\n<td>Comma separated list of time names to include in the analysis</td>\n</tr>\n<tr>\n<td><code>--eventColList</code></td>\n<td>STRING</td>\n<td>Required</td>\n<td>Comma separated list of columns in the phenotype file to include in the analysis that contain the events. These event columns should have 0=no event,1=event,NA=missing</td>\n</tr>\n</tbody>\n</table>\n<p>For the example above, the regenie call is</p>\n<pre><code>./regenie \\\n--t2e \\\n--phenoColList Time \\\n--eventColList Cancer \\\n...\n</code></pre>\n<p>For a phenotype file containing multiple time-to-event traits, the order of censor variables listed in <code>--eventColList</code> should match the order of time names specified in <code>--phenoColList</code>. For example, the phenotype file is</p>\n<pre><code>FID IID Cancer_Time Cancer Asthma_Time Asthma\n1 1 6 1 4 0\n2 2 5 0 8 1\n</code></pre>\n<p>The regenie call is</p>\n<pre><code>./regenie \\\n--t2e \\\n--phenoColList Cancer_Time,Asthma_Time \\\n--eventColList Cancer,Asthma \\\n...\n</code></pre>\n<p>The output format is the same as the <a href=\"#output\">output file for quantitative and binary traits</a>, with the <code>BETA</code> column containing the estimated harzard ratio (on log scale).</p>\n<h2 id=\"ld-computation\">LD computation</h2>\n<p>REGENIE can calculate LD between a group of variants on the same chromosome. </p>\n<table>\n<thead>\n<tr>\n<th>Option</th>\n<th>Argument</th>\n<th>Type</th>\n<th>Description</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><code>--compute-corr</code></td>\n<td>FLAG</td>\n<td>Required</td>\n<td>compute LD matrix and write to binary file</td>\n</tr>\n<tr>\n<td><code>--output-corr-text</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>write Pearson correlations to text file</td>\n</tr>\n<tr>\n<td><code>--forcein-vars</code></td>\n<td>FLAG</td>\n<td>Optional</td>\n<td>retain all variants specified in <code>--extract</code> which absent from the genetic data in the LD matrix</td>\n</tr>\n<tr>\n<td><code>--ld-extract</code></td>\n<td>FILE</td>\n<td>Optional</td>\n<td>file listing single variants as well as burden masks to include in LD matrix (see below)</td>\n</tr>\n</tbody>\n</table>\n<p>Note that this can be quite memory intensive for large groups of variants (memory ~$8M^2$ bytes for $M$ variants).</p>\n<h3 id=\"output_3\">Output</h3>\n<p><strong>Using<code>--step 2 --out file</code></strong> </p>\n<p>By default, the LD matrix is stored in a binary compressed file <code>file.corr</code> and\nthe list of variants corresponding to the columns of the LD matrix are stored in <code>file.corr.snplist</code>.\nThe R script <a href=\"https://github.com/rgcgithub/regenie/blob/master/scripts/parseLD.r\">scripts/parseLD.r</a> contains a function which returns the LD matrix, e.g. <code>get.corr.sq.matrix(\"file.corr\")</code>.\nUsing <code>--output-corr-text</code> will write the Pearson correlations to a text file instead.</p>\n<p>When using <code>--forcein-vars</code>, variants not present in the genetic data will be added as extra column/rows in the LD matrix. \nFor these variants, the diagonal entries in the matrix will be set to 1 and the off-diagonal entries 0.</p>\n<p><strong>Using<code>--ld-extract info.txt</code></strong> </p>\n<p>This option is used compute LD between single variants and burden masks generated on-the-fly in REGENIE; it requires specifying <a href=\"#annotation-input-files\">annotation files</a>.\nThe file <code>info.txt</code> should have three columns: variant type ('sv' or 'mask'), variant name, followed by the set (e.g. gene) name (this can be 'NA' for single variant). For example, it would look like:</p>\n<pre><code>sv 1:1111:A:G NA \nsv 1:2222:C:T NA \nmask PCSK9.M1.0.01 PCSK9\n.\n</code></pre>\n<p>Note that the set and mask names must match that used in REGENIE based on provided annotation files and allele frequency cutoffs. Variant/masks not present in the data will be kept in the LD matrix but will have the corresponding correlations set to 0.</p></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"../js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \"..\"</script>\n    \n    <script src=\"../js/base.js\"></script>\n    <script src=\"../search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n"
  },
  {
    "path": "docs/site/overview/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"../img/favicon.ico\">\n\n    \n    <title>Overview - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"../css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"../custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body>\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"..\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li >\n                        <a href=\"..\">Home</a>\n                    </li>\n                \n                \n                \n                    <li class=\"active\">\n                        <a href=\"./\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../install/\">Install</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../options/\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../performance/\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../recommendations/\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../faq/\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"prev\" href=\"..\">\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"next\" href=\"../install/\">\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#overview\">Overview</a></li>\n            <li class=\"second-level\"><a href=\"#step-1-whole-genome-model\">Step 1 : Whole genome model</a></li>\n                \n                <li class=\"third-level\"><a href=\"#ridge-regression-level-0\">Ridge regression (level 0)</a></li>\n                <li class=\"third-level\"><a href=\"#cross-validation-level-1\">Cross-validation (level 1)</a></li>\n                <li class=\"third-level\"><a href=\"#genetic-predictors-and-loco\">Genetic predictors and LOCO</a></li>\n                <li class=\"third-level\"><a href=\"#multiple-phenotypes\">Multiple phenotypes</a></li>\n                <li class=\"third-level\"><a href=\"#covariates\">Covariates</a></li>\n            <li class=\"second-level\"><a href=\"#step-2-single-variant-association-testing\">Step 2 : Single-variant association testing</a></li>\n                \n                <li class=\"third-level\"><a href=\"#quantitative-traits\">Quantitative traits</a></li>\n                <li class=\"third-level\"><a href=\"#binary-traits\">Binary traits</a></li>\n            <li class=\"second-level\"><a href=\"#step-2-gene-based-testing\">Step 2 : Gene-based testing</a></li>\n                \n                <li class=\"third-level\"><a href=\"#burden-tests\">Burden tests</a></li>\n                <li class=\"third-level\"><a href=\"#variance-component-tests\">Variance component tests</a></li>\n                <li class=\"third-level\"><a href=\"#cauchy-combination-tests\">Cauchy combination tests</a></li>\n                <li class=\"third-level\"><a href=\"#sparse-burden-association-test\">Sparse Burden Association Test</a></li>\n                <li class=\"third-level\"><a href=\"#gene_p\">GENE_P</a></li>\n            <li class=\"second-level\"><a href=\"#step-2-interaction-testing\">Step 2 : Interaction testing</a></li>\n                \n            <li class=\"second-level\"><a href=\"#survival-analysis\">Survival analysis</a></li>\n                \n                <li class=\"third-level\"><a href=\"#step-1-whole-genome-model-using-cox-ridge-regression\">Step 1: Whole genome model using cox ridge regression</a></li>\n                <li class=\"third-level\"><a href=\"#step-2-single-variant-and-gene-based-burden-tests\">Step 2: Single variant and gene-based burden tests</a></li>\n            <li class=\"second-level\"><a href=\"#missing-phenotype-data\">Missing Phenotype data</a></li>\n                \n            <li class=\"second-level\"><a href=\"#references\">References</a></li>\n                \n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"overview\">Overview</h2>\n<p>This page provides an overview of the models and methods implemented in\n<strong>regenie</strong>. A full description is given in our <a href=\"https://doi.org/10.1038/s41588-021-00870-7\">paper</a>.</p>\n<p><strong>regenie</strong> carries out genome-wide association tests for both\n  quantitative and binary (case-control) phenotypes. Starting at <strong>regenie v4.0</strong>, it also supports survival analysis for time-to-event data (See <a href=\"#survival-analysis\">Survival analysis</a> section below). It is designed to handle</p>\n<ol>\n<li>A large number of samples. For example, it is ideally suited to the\n<a href=\"https://www.ukbiobank.ac.uk/\">UK Biobank</a> dataset with 500,000 samples.</li>\n<li>A combination of genetic data from a micro-array, imputation and\nexome sequencing.</li>\n<li>A large number of either quantitative traits (QTs), binary\n(case-control) traits (BTs), or time-to-event traits (TTEs)</li>\n<li>Accounting for a set of covariates</li>\n</ol>\n<p>An overview of the <strong>regenie</strong> method is provided in the figure below.\nEssentially, <strong>regenie</strong> is run in 2 steps:</p>\n<ol>\n<li>In the first step a subset of genetic markers are used to fit a\nwhole genome regression model that captures a good fraction of the\nphenotype variance attributable to genetic effects.</li>\n<li>In the second step, a larger set of genetic markers (e.g. imputed\nmarkers) are tested for association with the phenotype <em>conditional\nupon</em> the prediction from the regression model in Step 1, using a\nleave one chromosome out (LOCO) scheme, that avoids proximal\ncontamination.</li>\n</ol>\n<p><img alt=\"Rflow\" src=\"../img/regenie_overview2.png\" style=\"text-align:center;padding: 10px;width:70%;border: 1px solid #ddd;display: block;margin-left: auto;margin-right: auto\" />\n<br></p>\n<h3 id=\"step-1-whole-genome-model\">Step 1 : Whole genome model</h3>\n<p>In Step 1 a whole genome regression model is fit at a subset of the\ntotal set of available genetic markers. These are typically a set of\nseveral hundred thousand (<script type=\"math/tex\">M</script>) common markers from a micro-array. </p>\n<h4 id=\"ridge-regression-level-0\">Ridge regression (level 0)</h4>\n<p><strong>regenie</strong> reads in the <script type=\"math/tex\">M</script> markers in blocks of <script type=\"math/tex\">B</script> consecutive markers (<code>--bsize</code> option).</p>\n<p>In each block, a set of ridge regression\n  predictors are calculated for a small range of <script type=\"math/tex\">J</script> shrinkage\n  parameters <script type=\"math/tex\"> \\{\\tau_1,\\ldots, \\tau_J\\} </script> (using <code>--l0</code> option [default is 5]) .</p>\n<p>For a block of SNPs in a <script type=\"math/tex\">N\\times B</script> matrix <script type=\"math/tex\">X</script> and <script type=\"math/tex\">N\\times 1</script>\nphenotype vector <script type=\"math/tex\">Y</script> we calculate  <script type=\"math/tex\">J</script>  predictors</p>\n<p>\n<script type=\"math/tex\"> X\\widehat{\\beta}_1 \\ldots, X\\widehat{\\beta}_J </script>\n</p>\n<p>where</p>\n<p>\n<script type=\"math/tex\"> \\widehat{\\beta}_j = (X^TX+\\tau_j I)^{-1}X^T Y</script>\n</p>\n<p>The idea behind using a range of shrinkage\n  values is to capture the unknown number and size of truly\n  associated genetic markers within each window. The ridge regression takes account of Linkage disequilibrium (LD) within each block.</p>\n<p>These predictors are stored in place of the\n  genetic markers in matrix <script type=\"math/tex\">W</script>, providing a large reduction in data size. \nFor example, if\n  <script type=\"math/tex\">M=500,000</script> and <script type=\"math/tex\">B=1,000</script> and <script type=\"math/tex\">J=5</script> shrinkage parameters are used, then the\n  reduced dataset will have <script type=\"math/tex\">JM/B=2,500</script> predictors.</p>\n<p>Ridge regression is used in this step for both quantitative and binary\ntraits. </p>\n<h4 id=\"cross-validation-level-1\">Cross-validation (level 1)</h4>\n<p>The predictors generated by the ridge regression step will all be\npositively correlated with the phenotype. Thus, it is important to\naccount for that correlation when building a whole genome wide\nregression model.</p>\n<p>When analyzing a quantitative trait we use a second level of ridge\nregression on the full set of <script type=\"math/tex\">JM/B</script> predictors in <script type=\"math/tex\">W</script>. This\napproach is inspired by the method of stacked regressions<sup id=\"fnref:1\"><a class=\"footnote-ref\" href=\"#fn:1\">1</a></sup>.</p>\n<p>We fit the ridge regression for a range of shrinkage parameters (<code>--l1</code> option) and choose a single\nbest value using K-fold cross validation scheme. This assesses the\npredictive performance of the model using held out sets of data, and aims to control\nany over-fitting induced by using the first level of ridge regression\nto derive the predictors.</p>\n<p>In other words, we fit the model</p>\n<p>\n<script type=\"math/tex; mode=display\">Y = W\\alpha + \\epsilon</script>\n</p>\n<p>where <script type=\"math/tex\">\\alpha</script> is estimated as <script type=\"math/tex\">\\widehat{\\alpha} = (W^TW + \\phi\\,I)^{-1}W^TY</script>\nand the parameter <script type=\"math/tex\">\\phi</script> is chosen\nvia K-fold cross-validation. </p>\n<p>For binary traits, we use a logistic ridge regression model to combine\nthe predictors in <script type=\"math/tex\">W</script>\n</p>\n<p>\n<script type=\"math/tex; mode=display\">\\text{logit}(p) = \\mu + W\\alpha</script>\nwhere <script type=\"math/tex\">p</script> is the probability of being a case and <script type=\"math/tex\">\\mu</script> captures the effects of non-genetic covariates.</p>\n<h4 id=\"genetic-predictors-and-loco\">Genetic predictors and LOCO</h4>\n<p>Once <script type=\"math/tex\">\\alpha</script> has been estimated we can construct the genetic prediction</p>\n<p>\n<script type=\"math/tex; mode=display\">Z = W\\widehat{\\alpha}</script>\n</p>\n<p>Also, since each column of the matrix <script type=\"math/tex\">W</script> will be associated with a\nchromosome we can can also construct a genetic prediction ignoring any\none chromosome, by simply ignoring those columns when calculating the\nprediction. This is known as the Leave One Chromosome Out (LOCO)\napproach. These LOCO predictions are valuable at Step 2 of <strong>regenie</strong>\nwhen each marker is tested for associated (see below).</p>\n<p>For binary traits, it is the linear predictor in a logistic regression\nmodel using LOCO that is saved,\nand used as an offset when fitting logistic regression models to test\nfor association.</p>\n<h4 id=\"multiple-phenotypes\">Multiple phenotypes</h4>\n<p>The dimension reduction step using ridge regression can be used very\nefficiently to model multiple phenotypes at once. The ridge regression\nequations for a block of SNPs in a <script type=\"math/tex\">N\\times B</script> matrix <script type=\"math/tex\">X</script> and a single phenotype\nin a <script type=\"math/tex\">N\\times 1</script> matrix <script type=\"math/tex\">Y</script> take the form</p>\n<p>\n<script type=\"math/tex\"> \\widehat{\\beta} = AY </script> where <script type=\"math/tex\">A = (X^TX+\\tau I)^{-1}X^T</script>\n<strong>does not depend on <script type=\"math/tex\">Y</script></strong></p>\n<p>If instead <script type=\"math/tex\">P</script> phenotypes are stored in columns of a <script type=\"math/tex\">N\\times P</script> matrix <script type=\"math/tex\">Y</script>, then the matrix <script type=\"math/tex\">A</script>\ncan be applied jointly to calculate the matrix of estimates <script type=\"math/tex\">\\widehat{\\beta} = AY</script>,\nand this can take advantage of parallel linear algebra\nimplementations in the Eigen matrix library.</p>\n<h4 id=\"covariates\">Covariates</h4>\n<p>Covariates, such as age and sex and batch effect variables can be\nincluded in the <strong>regenie</strong> model.</p>\n<p>For quantitative traits, any covariates are regressed out of\nphenotypes and genotypes before fitting the model.</p>\n<p>For binary traits, we fit a null model with only covariates, and use\npredictions from that model as an offset when fitting the logistic\nregression model.</p>\n<h3 id=\"step-2-single-variant-association-testing\">Step 2 : Single-variant association testing</h3>\n<p>In Step 2, a larger set of markers are tested for association with the\ntrait (or traits). As with Step 1, these markers are also read in\nblocks of <script type=\"math/tex\">B</script> markers, and tested for association. This avoids having\nto have all markers stored in memory at once.</p>\n<h4 id=\"quantitative-traits\">Quantitative traits</h4>\n<p>For quantitative traits, we use a linear regression model for\nassociation testing.</p>\n<ul>\n<li>Covariates are regressed out of the phenotypes and genetic markers.</li>\n<li>The LOCO predictions from Step 1 are removed from the phenotypes.</li>\n<li>Linear regression is then used to test association of the residualized\nphenotype and the genetic marker.</li>\n<li>Parallel linear algebra operations in the Eigen library are used\n  where possible.</li>\n</ul>\n<h4 id=\"binary-traits\">Binary traits</h4>\n<p>For binary traits, logistic regression score test is used to test association of\nthe phenotype and the genetic marker.</p>\n<p>The logistic regression model includes the LOCO predictions from\n  Step 1 <em>as an offset</em>. Covariates are included in the linear\n  predictor in the usual way.</p>\n<p>When the case-control ratio is imbalanced, standard association tests\ndon't control Type I error well at rare genetic markers. <strong>regenie</strong>\nhas two options to handle this</p>\n<h5 id=\"firth-logistic-regression\">Firth logistic regression</h5>\n<p>Standard maximum likelihood estimates are generally biased. The Firth\ncorrection<sup id=\"fnref:2\"><a class=\"footnote-ref\" href=\"#fn:2\">2</a></sup>\nremoves much of the bias, and results in better calibrated test\nstatistics. The correction involves adding a penalty term to the\nlog-likelihood,</p>\n<p>\n<script type=\"math/tex; mode=display\"> \\widetilde{l}(\\theta) = l(\\theta) + {1 \\over 2} \\log I|\\theta| </script>\n</p>\n<p>where the penalty term corresponds to the use of Jeffrey's Prior. \nThis prior has the effect of shrinking the effect size towards zero.</p>\n<p><strong>regenie</strong> uses a Firth correction when the p-value from the standard\n  logistic regression test is below a threshold (default 0.05). \nIt also includes a novel, accurate and fast approximate Firth correction which \nis ~60x faster than the exact Firth correction\n(see the option <code>--firth</code>). </p>\n<p>The p-value reported in <strong>regenie</strong> is based on a likelihood ratio test (LRT), and we use the Hessian of the log-likelihood without the penalty term to estimate the standard error (SE). \nThis may cause an issue in meta-analyses with rare variants, as the effect size estimate and SE may not match with the LRT p-value. \nHence, we added an option <code>--firth-se</code> to report a SE computed instead from the effect size estimate and the LRT p-value.</p>\n<h5 id=\"saddle-point-approxiation-spa-test\">Saddle point approxiation (SPA) test</h5>\n<p>The SPA test approximates the null distribution of the test statistic\nby approximating the cumulant generating function of the\ntest  statistic,  which  involves  all  of  the  higher  order\nmoments<sup id=\"fnref:3\"><a class=\"footnote-ref\" href=\"#fn:3\">3</a></sup>$^,$<sup id=\"fnref:4\"><a class=\"footnote-ref\" href=\"#fn:4\">4</a></sup>. This provides a better estimation of the tail probabilities compared to using\nstandard asymptotic theory which relies on the normal approximation and uses only the\nfirst two moments of the dsitribution. A tail probability is obtained as </p>\n<p>\n<script type=\"math/tex; mode=display\">  \n\\begin{align*}  \nP&(T < t_{\\text{obs}}) \\approx\n    \\Phi(z), \\text{ where,}\\\\\n    z &= w + \\frac{1}{w}\\log{\\frac{v}{w}}\\\\\n    w &= \\text{sign}(\\delta^*)\\sqrt{ 2 [ t_{\\text{obs}}\\, \\delta^* - K(\\delta^*)}],\\,\n    v = \\delta^*\\sqrt{K''(\\delta^*)}\n\\end{align*}\n</script>\n</p>\n<p>and <script type=\"math/tex\">K(\\delta)</script> is the cumulant generating function of the test statistic and <script type=\"math/tex\">\\delta^*</script> \nis obtained by using a root-finding algorithm for <script type=\"math/tex\">K'(\\delta)=t_{\\text{obs}}</script>. As this approximation\nhas been found not to work very well for ultra-rare variants, a minimum minor \nallele count (MAC) is used to filter out these variants before testing (option <code>--minMAC</code>).</p>\n<h3 id=\"step-2-gene-based-testing\">Step 2 : Gene-based testing</h3>\n<p>Instead of performing single-variant association tests, multiple variants can be aggregated\nin a given region, such as a gene, using the following model</p>\n<p>\n<script type=\"math/tex; mode=display\">g(\\mu) = w_1G_1\\beta_1 + \\dots + w_mG_m\\beta_m</script>\n</p>\n<p>where <script type=\"math/tex\">G_i</script>'s represent the single variants included in the test,\n<script type=\"math/tex\">w_i</script>'s and <script type=\"math/tex\">\\beta_i</script>'s are weights and effect sizes, respectively, for each variant,\nand <script type=\"math/tex\">g(.)</script> is a link function for the phenotypic mean <script type=\"math/tex\">\\mu</script>. \nWe also denote by <script type=\"math/tex\">S_i</script> the score statistics obtained from the \n<a href=\"#step-2-single-variant-association-testing\">single-variant tests</a>.\nThis can be especially helpful when testing rare variants as single-variant \ntests usually have lower power performance.</p>\n<p>To avoid inflation in the gene-based tests due to rare variants as well as reduce computation time, we have implemented the collapsing approach\nproposed in SAIGE-GENE+<sup id=\"fnref:5\"><a class=\"footnote-ref\" href=\"#fn:5\">5</a></sup>, where ultra-rare variants are aggregated into a mask.\nFor highly imbalanced binary traits, SPA/Firth correction can be used to calibrate the test statistics in the\ngene-based tests as proposed in Zhao et al. (2020)<sup id=\"fnref:6\"><a class=\"footnote-ref\" href=\"#fn:6\">6</a></sup> using <code>--firth/--spa</code>. </p>\n<h4 id=\"burden-tests\">Burden tests</h4>\n<p>Burden tests, as defined in Lee et al. (2014)<sup id=\"fnref:7\"><a class=\"footnote-ref\" href=\"#fn:7\">7</a></sup>, assume <script type=\"math/tex\">\\beta_i=\\beta\\; \\forall i</script>, where <script type=\"math/tex\">\\beta</script> is a fixed coefficient, which then leads to the test statistic\n<script type=\"math/tex; mode=display\">Q_{BURDEN} = \\left(\\sum_i w_iS_i\\right)^2</script>\nThese tests collapse variants into a single variable which is then tested for association with the phenotype. Hence, they are more powerful when variants have effects in the same direction and of similar magnitude. \nIn <strong>regenie</strong>, multiple options are available to aggregate variants together into a burden mask beyond the linear combination above (<a href=\"../options/#options_1\">see here</a>). \nFor example, the burden tests that were employed in Backman et al. (2021)<sup id=\"fnref:8\"><a class=\"footnote-ref\" href=\"#fn:8\">8</a></sup>\nuse the default strategy in <strong>regenie</strong> of collapsing variants by taking\nthe maximum number of rare alleles across the sites.</p>\n<h4 id=\"variance-component-tests\">Variance component tests</h4>\n<p>Unlike burden tests, SKAT<sup id=\"fnref:9\"><a class=\"footnote-ref\" href=\"#fn:9\">9</a></sup> assume the effect sizes $\\beta_i$ come from an arbitrary\ndistribution with mean 0 and variance $\\tau^2$ which leads to the test statistic\n<script type=\"math/tex; mode=display\">Q_{SKAT} = \\sum_i w_i^2S_i^2</script>\nHence, SKAT can remain powerful when variant effects are in opposite directions.</p>\n<p>The omnibus test SKATO<sup id=\"fnref:10\"><a class=\"footnote-ref\" href=\"#fn:10\">10</a></sup> combines the SKAT and burden tests as \n<script type=\"math/tex; mode=display\">Q_{SKATO} = \\rho Q_{BURDEN} + (1-\\rho) Q_{SKAT}</script>\nSo setting $\\rho=0$ corresponds to SKAT and $\\rho=1$ to the burden test.\nIn practice, the parameter $\\rho$ is chosen to maximize the power \n[<strong>regenie</strong> uses a default grid of 8 values {$0, 0.1^2, 0.2^2, 0.3^2, 0.4^2, 0.5^2, 0.5, 1$}\nand set the weights $w_i = Beta(MAF_i,1,25)$].</p>\n<p>To obtain the p-value from a linear combination of chi-squared variables, <strong>regenie</strong> uses Davies' exact method<sup id=\"fnref:11\"><a class=\"footnote-ref\" href=\"#fn:11\">11</a></sup> by default.\nFollowing Wu et al (2016)<sup id=\"fnref:12\"><a class=\"footnote-ref\" href=\"#fn:12\">12</a></sup>, <strong>regenie</strong> uses Kuonen's saddlepoint approximation method<sup id=\"fnref:13\"><a class=\"footnote-ref\" href=\"#fn:13\">13</a></sup> when the Davies' p-value\nis below 1e-5 and if that fails, it uses Davies' method with more stringent convergence parameters (lim=1e5,acc=1e-9). </p>\n<p>The original SKATO method uses numerical integration when maximizing power across the \nvarious SKATO models that use different values for $\\rho$. \nWe also implement a modification of SKATO, named SKATO-ACAT, \nwhich instead uses the Cauchy combination method<sup id=\"fnref:14\"><a class=\"footnote-ref\" href=\"#fn:14\">14</a></sup> \nto combine the p-values from the different SKATO models.</p>\n<h4 id=\"cauchy-combination-tests\">Cauchy combination tests</h4>\n<p>The ACATV<sup id=\"fnref:15\"><a class=\"footnote-ref\" href=\"#fn:15\">15</a></sup> test uses the Cauchy combination method ACAT to combine single variant p-values $p_i$ as\n<script type=\"math/tex; mode=display\">Q_{ACATV} = \\sum_i \\widetilde{w}_i^2\\tan{\\{\\pi(0.5 - p_i)\\}}</script>\nwhere we set $\\widetilde{w}_i = w_i \\sqrt{MAF(1-MAF)}$. \nThis test is highly computationally tractable and is robust to correlation between the single variant tests.</p>\n<p>The omnibus test ACATO<sup id=\"fnref2:15\"><a class=\"footnote-ref\" href=\"#fn:15\">15</a></sup> combines ACATV with the SKAT and burden tests as \n<script type=\"math/tex; mode=display\">\nQ_{ACATO} = \n\\frac{1}{3}\\tan{\\{\\pi(0.5 - p_{ACATV})\\}}+\n\\frac{1}{3}\\tan{\\{\\pi(0.5 - p_{Burden})\\}}+\n\\frac{1}{3}\\tan{\\{\\pi(0.5 - p_{SKAT})\\}}\n</script>\n</p>\n<p>where unlike the original ACATO test, we only use one set of the weights $w_i$.\nAlternatively, we augment the test to include an extended set of SKATO models beyond SKAT and Burden\n(which correspond to $\\rho$ of 0 and 1 in SKATO respectively) and use the default SKATO grid of 8 values for $\\rho$.</p>\n<h4 id=\"sparse-burden-association-test\">Sparse Burden Association Test</h4>\n<p><strong>regenie</strong> can generate burden masks which are obtained by aggregating single variants\nusing various annotation classes as well as allele frequency\nthresholds. The Sparse Burden Association Test (SBAT)<sup id=\"fnref:16\"><a class=\"footnote-ref\" href=\"#fn:16\">16</a></sup> combines these burden masks\nin a joint model imposing constraints of same direction of effects\n<script type=\"math/tex; mode=display\">\n\\mu = \\sum_{\\text{mask }i} M_i\\gamma_i\n</script>\nwhere $M_i$ represent a burden mask and we solve\n<script type=\"math/tex; mode=display\">\n\\underset{\\boldsymbol\\gamma}{\\min} || Y - \\sum_i M_i\\gamma_i||^2 \n\\text{ subject to } \\gamma_i \\ge 0 \\text{ for all } i\n</script>\n</p>\n<p>The SBAT method tests the hypothesis $H_0: \\gamma_i=0$ for all $i$ vs.\n$H_1: \\gamma_i &gt; 0$ for some $i$.\nBy using this joint model, the SBAT test accounts for the correlation structure between the burden masks \nand with the non-negative constraints,\nit can lead to boost in power performance when multiple burden masks are causal and have concordant effects.\nThis test has the nice property that it combines \nmodel selection of the masks (via the sparsity induced by the non-negative assumption) \nwith model inference (it is well calibrated and powerful).</p>\n<h4 id=\"gene_p\">GENE_P</h4>\n<p>As the different gene-based tests in REGENIE can be more powerful under different genetic architectures,\nwe propose a unified strategy, named GENE_P, that combines the strengths of these tests.\nIt uses ACAT to combine the p-values of the SKATO, ACATV, Burden and SBAT tests \nand obtain an overall assessment of significance for a genetic region (e.g. gene).\nThe diagram below illustrates the GENE_P test using 4 masks (i.e. combination of variant annotations) and 3 allele frequency cutoffs \nwhen performing gene-based tests.</p>\n<p><img alt=\"GENE_P_flow\" src=\"../img/GENE_P_diagram.png\" style=\"text-align:center;padding: 10px;width:70%;border: 1px solid #ddd;display: block;margin-left: auto;margin-right: auto\" />\n<br></p>\n<h3 id=\"step-2-interaction-testing\">Step 2 : Interaction testing</h3>\n<p>The GxE tests are of the form\n<script type=\"math/tex; mode=display\">\ng(\\mu) = E\\alpha + G\\beta + (G\\odot E)\\gamma\n</script>\nwhere $E$ is an environmental risk factor and $G$ is a marker of interest,\nand $\\odot$ represents the Haddamard (entry-wise) product of the two.\nThe last term in the model allows for the variant to have different effects across values of the risk factor. \n<em>Note: if $E$ is categorical, we use a dummy variable for each level of $E$ in the model above.</em></p>\n<p>We can look at the following hypotheses:</p>\n<ol>\n<li>$H_0: \\beta = 0$ given $\\gamma = 0$, which is a marginal test for the SNP</li>\n<li>$H_0: \\beta = 0$, which is a test for the main effect of the SNP in the full model</li>\n<li>$H_0: \\gamma = 0$, which is a test for interaction</li>\n<li>$H_0: \\beta = \\gamma = 0$, which tests both main and interaction effects for the SNP</li>\n</ol>\n<p>Misspecification of the model above, \nsuch as in the presence of heteroskedasticity, or \nthe presence of high case-control imbalance can lead to inflation in the tests.\nRobust (sandwich) standard error (SE) estimators<sup id=\"fnref:17\"><a class=\"footnote-ref\" href=\"#fn:17\">17</a></sup> can be used to adress model misspecification however, \nthey can suffer from inflation when testing rare variants\nor in the presence of high case-control imbalance<sup id=\"fnref:18\"><a class=\"footnote-ref\" href=\"#fn:18\">18</a></sup>$^,$<sup id=\"fnref:19\"><a class=\"footnote-ref\" href=\"#fn:19\">19</a></sup>.</p>\n<p>In <strong>regenie</strong>, we use a hybrid approach which combines:</p>\n<ul>\n<li>Wald test with sandwich estimators</li>\n<li>Wald test with heteroskedastic linear models (for quantitative traits)</li>\n<li>LRT with penalized Firth regression (for binary traits)</li>\n</ul>\n<p>For quantitative traits,\nwe use the sandwich estimators HC3 to perform a Wald test for variants whose minor allele count (MAC) is above 1000 (see <code>--rare-mac</code>). \nFor the remaining variants, we fit a heteroskedastic linear model (HLM)<sup id=\"fnref:20\"><a class=\"footnote-ref\" href=\"#fn:20\">20</a></sup>\n<script type=\"math/tex; mode=display\">\nY = E\\alpha + E^2\\zeta + G\\beta + (G\\odot E)\\gamma + \\epsilon\n</script>\n</p>\n<p>where we assume $\\epsilon \\sim N(0, D)$ where $D$ is a diagonal matrix with entries $\\sigma^2\\exp{(1 + E\\theta_1 + E^2\\theta_2)}$.\nThis formulation allows for the phenotypic variance to also depend on the risk factor $E$.\nBy incorporating both the linear and quadratic effect of $E$ in the mean and variance of $Y$, \nthis model provides robustness to heteroskedasticity \n(<em>Note: the $E^2$ terms are only added when $E$ is quantitative</em>). \nWald tests are then performed for the null hypotheses listed above.</p>\n<p>For binary traits, we consider the following interaction model\n<script type=\"math/tex; mode=display\">\n\\text{logit}(\\mu) = E\\alpha + E^2\\zeta + G\\beta + (G\\odot E)\\gamma\n</script>\nwhere we also include a non-linear effect for $E$ (not if categorical).\nThe sandwich estimator HC3 is used in a Wald test for variants whose MAC is above 1000 (see <code>--rare-mac</code>) otherwise the model-based standard errors are used.\nWhen Firth is specified, we only apply the Firth correction using LRT if the p-value for the interaction term $\\gamma$ from the Wald test is below a specified threshold (see <code>--pThresh</code>). So the added $E^2$ term as well as the use of the Firth penalty \nhelp with case-control imbalance and model misspecification for the effect of $E$ on the phenotype. </p>\n<h3 id=\"survival-analysis\">Survival analysis</h3>\n<p>Starting with <strong>regenie v4.0</strong>, we have enabled survival analysis, improving the power for analyzing common diseases where time-to-event data is available by leveraging the Cox Proportional Harzard model. We assume that samples without an event are right-censored, i.e. the survival time is only known to be greater than a certain value. It is important to <a href=\"/regenie/options/#survival-analyses\">encode this information correctly into the phenotypes</a>.</p>\n<h4 id=\"step-1-whole-genome-model-using-cox-ridge-regression\">Step 1: Whole genome model using cox ridge regression</h4>\n<p>In step 1, Level 0 is run using <a href=\"#ridge-regression-level-0\">linear ridge regression</a> with the <code>time</code> variable taken as the response. In Level 1, instead of linear/logistic ridge regression, we use Cox Ridge regression<sup id=\"fnref:21\"><a class=\"footnote-ref\" href=\"#fn:21\">21</a></sup> to combine the predictions $W$ from Level 0.</p>\n<p>\n<script type=\"math/tex; mode=display\">\n\\lambda_i(t) = \\lambda_0(t) \\exp(\\mu_i + w_i^\\intercal \\alpha)\n</script>\nwhere $\\lambda_0(t)$ is the baseline hazard function, and, for the $i$-th individual, $\\lambda_i(t)$ is the hazard function, $w_i$ is the set of ridge predictors from Level 0, and $\\mu_i$ captures the effects of non-genetic covariates.</p>\n<p>We fit the cox ridge regression for a range of shrinkage parameters and select the best value using a K-fold cross validation scheme.</p>\n<p>With the estimated $\\hat{\\alpha}$, we construct LOCO predictions which capture population structure, relatedness and polygenicity.</p>\n<h4 id=\"step-2-single-variant-and-gene-based-burden-tests\">Step 2: Single variant and gene-based burden tests</h4>\n<p>For time-to-event traits, the cox proportional hazard regression model is used to test the association between the phenotype and the genetic marker. <strong>Note</strong>: the only supported gene-based test is the burden test.</p>\n<p>The cox proportional hazard regression model includes the LOCO predictions from Step 1 as an offset.</p>\n<p>\n<script type=\"math/tex; mode=display\">\n\\lambda_i(t) = \\lambda_0(t) \\exp(\\mu_i + w_{i, LOCO} + g_i \\beta)\n</script>\n</p>\n<p>We test the null hypothesis $H_0: \\beta = 0$ using a score test. When the event rate is low, the standard score test doesn't control Type I error well at rare genetic markers. To reduce the bias and achieve a more robust test, regenie uses Firth correction<sup id=\"fnref:22\"><a class=\"footnote-ref\" href=\"#fn:22\">22</a></sup> when the p-value from the standard score test is below a threshold (default 0.05). </p>\n<p>The firth correction provides a well-calibrated test, but comes with a computational cost. To mitigate this burden in Cox regression, we include a fast approximate test, which gives results very similar to the exact Firth test.</p>\n<h3 id=\"missing-phenotype-data\">Missing Phenotype data</h3>\n<p>With QTs, missing values are mean-imputed in Step 1 and they are \ndropped when testing each phenotype in Step 2 (unless using <code>--force-impute</code>).</p>\n<p>With BTs, missing values are mean-imputed in Step 1 when fitting the\nlevel 0 linear ridge regression and they are dropped when fitting the\nlevel 1 logistic ridge regression for each trait. In Step 2, missing \nvalues are dropped when testing each trait.</p>\n<p>To remove all samples that have missing values at <strong>any</strong> of the <script type=\"math/tex\">P</script>\nphenotypes from the analysis, use option <code>--strict</code> in step 1 and 2. \nThis can also be used when analyzing a single trait to only keep individuals with \ncomplete data by setting the phenotype values of individuals to remove to NA.</p>\n<p>Note: imputation is only applied to phenotypes; covariates are not allowed to have missing data.</p>\n<h3 id=\"references\">References</h3>\n<div class=\"footnote\">\n<hr />\n<ol>\n<li id=\"fn:1\">\n<p>Breiman, L. Stacked regressions. <em>Machine learning</em> <strong>24</strong>, 49–64 (1996).&#160;<a class=\"footnote-backref\" href=\"#fnref:1\" title=\"Jump back to footnote 1 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:2\">\n<p>Firth, D. Bias reduction of maximum likelihood estimates. <em>Biometrika</em> <strong>80</strong>, 27–38 (1993).&#160;<a class=\"footnote-backref\" href=\"#fnref:2\" title=\"Jump back to footnote 2 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:3\">\n<p>Butler, R. W. <em>Saddlepoint approximations with applications</em>. (Cambridge University Press, 2007).&#160;<a class=\"footnote-backref\" href=\"#fnref:3\" title=\"Jump back to footnote 3 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:4\">\n<p>Dey, R., Schmidt, E. M., Abecasis, G. R. &amp; Lee, S. A fast and accurate algorithm to test for binary phenotypes and its application to PheWAS. <em>The American Journal of Human Genetics</em> <strong>101</strong>, 37–49 (2017).&#160;<a class=\"footnote-backref\" href=\"#fnref:4\" title=\"Jump back to footnote 4 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:5\">\n<p>Zhou, W. <em>et al.</em> Set-based rare variant association tests for biobank scale sequencing data sets. <em>medRxiv</em> (2021).&#160;<a class=\"footnote-backref\" href=\"#fnref:5\" title=\"Jump back to footnote 5 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:6\">\n<p>Zhao, Z. <em>et al.</em> UK biobank whole-exome sequence binary phenome analysis with robust region-based rare-variant test. <em>Am J Hum Genet</em> <strong>106</strong>, 3–12 (2020).&#160;<a class=\"footnote-backref\" href=\"#fnref:6\" title=\"Jump back to footnote 6 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:7\">\n<p>Lee, S., Abecasis, G. R., Boehnke, M. &amp; Lin, X. Rare-variant association analysis: Study designs and statistical tests. <em>Am J Hum Genet</em> <strong>95</strong>, 5–23 (2014).&#160;<a class=\"footnote-backref\" href=\"#fnref:7\" title=\"Jump back to footnote 7 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:8\">\n<p>Backman, J. D. <em>et al.</em> Exome sequencing and analysis of 454,787 UK biobank participants. <em>Nature</em> <strong>599</strong>, 628–634 (2021).&#160;<a class=\"footnote-backref\" href=\"#fnref:8\" title=\"Jump back to footnote 8 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:9\">\n<p>Wu, M. C. <em>et al.</em> Rare-variant association testing for sequencing data with the sequence kernel association test. <em>Am J Hum Genet</em> <strong>89</strong>, 82–93 (2011).&#160;<a class=\"footnote-backref\" href=\"#fnref:9\" title=\"Jump back to footnote 9 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:10\">\n<p>Lee, S., Wu, M. C. &amp; Lin, X. Optimal tests for rare variant effects in sequencing association studies. <em>Biostatistics</em> <strong>13</strong>, 762–75 (2012).&#160;<a class=\"footnote-backref\" href=\"#fnref:10\" title=\"Jump back to footnote 10 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:11\">\n<p>Davies, R. B. Algorithm AS 155: The distribution of a linear combination of χ 2 random variables. <em>Applied Statistics</em> <strong>29</strong>, 323–333 (1980).&#160;<a class=\"footnote-backref\" href=\"#fnref:11\" title=\"Jump back to footnote 11 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:12\">\n<p>Wu, B., Guan, W. &amp; Pankow, J. S. On efficient and accurate calculation of significance p-values for sequence kernel association testing of variant set. <em>Ann Hum Genet</em> <strong>80</strong>, 123–35 (2016).&#160;<a class=\"footnote-backref\" href=\"#fnref:12\" title=\"Jump back to footnote 12 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:13\">\n<p>Kuonen, D. Miscellanea. Saddlepoint approximations for distributions of quadratic forms in normal variables. <em>Biometrika</em> <strong>86</strong>, 929–935 (1999).&#160;<a class=\"footnote-backref\" href=\"#fnref:13\" title=\"Jump back to footnote 13 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:14\">\n<p>Liu, Y. &amp; Xie, J. Cauchy combination test: A powerful test with analytic p-value calculation under arbitrary dependency structures. <em>J Am Stat Assoc</em> <strong>115</strong>, 393–402 (2020).&#160;<a class=\"footnote-backref\" href=\"#fnref:14\" title=\"Jump back to footnote 14 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:15\">\n<p>Liu, Y. <em>et al.</em> ACAT: A fast and powerful p value combination method for rare-variant analysis in sequencing studies. <em>Am J Hum Genet</em> <strong>104</strong>, 410–421 (2019).&#160;<a class=\"footnote-backref\" href=\"#fnref:15\" title=\"Jump back to footnote 15 in the text\">&#8617;</a><a class=\"footnote-backref\" href=\"#fnref2:15\" title=\"Jump back to footnote 15 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:16\">\n<p>Ziyatdinov, A. <em>et al.</em> Joint testing of rare variant burden scores using non-negative least squares. <em>The American Journal of Human Genetics</em> <strong>111</strong>, 2139–2149 (2024).&#160;<a class=\"footnote-backref\" href=\"#fnref:16\" title=\"Jump back to footnote 16 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:17\">\n<p>MacKinnon, J. G. &amp; White, H. Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties. <em>Journal of Econometrics</em> <strong>29</strong>, 305–325 (1985).&#160;<a class=\"footnote-backref\" href=\"#fnref:17\" title=\"Jump back to footnote 17 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:18\">\n<p>Tchetgen Tchetgen, E. J. &amp; Kraft, P. On the robustness of tests of genetic associations incorporating gene-environment interaction when the environmental exposure is misspecified. <em>Epidemiology</em> <strong>22</strong>, 257–61 (2011).&#160;<a class=\"footnote-backref\" href=\"#fnref:18\" title=\"Jump back to footnote 18 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:19\">\n<p>Voorman, A., Lumley, T., McKnight, B. &amp; Rice, K. Behavior of QQ-plots and genomic control in studies of gene-environment interaction. <em>PLoS One</em> <strong>6</strong>, (2011).&#160;<a class=\"footnote-backref\" href=\"#fnref:19\" title=\"Jump back to footnote 19 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:20\">\n<p>Young, A. I., Wauthier, F. L. &amp; Donnelly, P. Identifying loci affecting trait variability and detecting interactions in genome-wide association studies. <em>Nat Genet</em> <strong>50</strong>, 1608–1614 (2018).&#160;<a class=\"footnote-backref\" href=\"#fnref:20\" title=\"Jump back to footnote 20 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:21\">\n<p>Simon, N., Friedman, J. H., Hastie, T. &amp; Tibshirani, R. Regularization paths for cox’s proportional hazards model via coordinate descent. <em>Journal of statistical software</em> <strong>39</strong>, 1–13 (2011).&#160;<a class=\"footnote-backref\" href=\"#fnref:21\" title=\"Jump back to footnote 21 in the text\">&#8617;</a></p>\n</li>\n<li id=\"fn:22\">\n<p>Heinze, G. &amp; Schemper, M. A solution to the problem of monotone likelihood in cox regression. <em>Biometrics</em> <strong>57</strong>, 114–119 (2001).&#160;<a class=\"footnote-backref\" href=\"#fnref:22\" title=\"Jump back to footnote 22 in the text\">&#8617;</a></p>\n</li>\n</ol>\n</div></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"../js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \"..\"</script>\n    \n    <script src=\"../js/base.js\"></script>\n    <script src=\"../search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n"
  },
  {
    "path": "docs/site/performance/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"../img/favicon.ico\">\n\n    \n    <title>Performance - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"../css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"../custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body>\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"..\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li >\n                        <a href=\"..\">Home</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../overview/\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../install/\">Install</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../options/\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li class=\"active\">\n                        <a href=\"./\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../recommendations/\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../faq/\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"prev\" href=\"../options/\">\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"next\" href=\"../recommendations/\">\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#performance\">Performance</a></li>\n            <li class=\"second-level\"><a href=\"#quantitative-traits\">Quantitative traits</a></li>\n                \n            <li class=\"second-level\"><a href=\"#binary-traits\">Binary traits</a></li>\n                \n            <li class=\"second-level\"><a href=\"#timings-improvements-in-v22\">Timings improvements in v2.2</a></li>\n                \n            <li class=\"second-level\"><a href=\"#gene-based-testing\">Gene-based testing</a></li>\n                \n                <li class=\"third-level\"><a href=\"#quantitative-traits_1\">Quantitative traits</a></li>\n                <li class=\"third-level\"><a href=\"#binary-traits_1\">Binary traits</a></li>\n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"performance\">Performance</h2>\n<p>We assessed the performance of <strong>regenie</strong> against 3 other\nprograms for GWAS on large cohorts.</p>\n<ol>\n<li>BOLT-LMM <a href=\"https://www.nature.com/articles/ng.3190\">Loh et al. (2015) Nature Genetics 47, 284–290</a> <a href=\"https://data.broadinstitute.org/alkesgroup/BOLT-LMM/\">[Software]</a></li>\n<li>SAIGE - <a href=\"https://www.nature.com/articles/s41588-018-0184-y\">Zhou et al. (2018) Nature Genetics 50, 1335–1341</a><a href=\"https://github.com/weizhouUMICH/SAIGE\">[Software]</a></li>\n<li>fastGWA - <a href=\"https://www.nature.com/articles/s41588-019-0530-8\">Jiang et al. (2019) Nature Genetics 51, 1749–1755</a> <a href=\"https://cnsgenomics.com/software/gcta/#Overview\">[Software]</a></li>\n</ol>\n<p>Full details for all the runs are available in our <a href=\"https://doi.org/10.1038/s41588-021-00870-7\">paper</a>.</p>\n<h3 id=\"quantitative-traits\">Quantitative traits</h3>\n<p>We ran <strong>regenie</strong>, BOLT-LMM and fastGWA on \n3 quantitative phenotypes measured on white British UK Biobank participants \n(LDL, N=389,189; Body mass index [BMI], N=407,609; and Bilirubin, N=388,303) \nwhere testing was performed on 9.8 million imputed SNPs. </p>\n<p>The Manhattan plots for all three phenotypes (see below) show good agreement between the methods \nwith both <strong>regenie</strong> and BOLT-LMM resulting in stronger association signals relative to fastGWA \nat known peaks of association \n(note that in the plots, the scaling of the y-axis changes above the upper dashed line).</p>\n<p><img alt=\"QTmanP\" src=\"../img/ManP_exemplarQTs_WB.jpg\" />\n<br/><br/></p>\n<p>We assessed the computational requirements of all three methods using a larger set of 50 \nquantitative traits from the UK Biobank, looking at computational timings as well as\nmemory usage. \nFor <strong>regenie</strong> and BOLT LMM, 469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) and \nfor fastGWA, these SNPs were used to compute the sparse GRM (timing not included).\nTests were performed on 11.4M imputed SNPs (step 2).</p>\n<p><img alt=\"QTtime\" src=\"../img/qtTiming.png\" /></p>\n<p>From the table above, <strong>regenie</strong> was 151x faster than BOLT-LMM in elapsed time for Step 1 and 11.5x faster for Step 2, \nwhich translated into $&gt;$30x overall speed-up in terms of elapsed time.\nIn addition, <strong>regenie</strong> had a maximum memory usage of 12.9 GB, which is mostly due to <strong>regenie</strong> \nonly reading a small portion of the genotype data at a time, whereas BOLT-LMM required 50GB.\n<strong>regenie</strong> was 2.8x faster than fastGWA, but fastGWA is very memory efficient and used only a maximum of 2GB.</p>\n<h3 id=\"binary-traits\">Binary traits</h3>\n<p><strong>regenie</strong> was compared to BOLT-LMM and SAIGE on a set of four binary traits\nmeasured on white British UK Biobank participants (coronary artery disease [CAD], N=352,063, case-control ratio=1:11; \nglaucoma, N=406,927, case-control ratio=1:52;\n colorectal cancer, N=407,746, case-control ratio=1:97;\n and thyroid cancer, N=407,746, case-control ratio=1:660)\nand Step 2 testing was performed on 11.6 million imputed SNPs.\nA novel and fast approximate Firth correction was used in <strong>regenie</strong> as well as a SPA correction.</p>\n<p>As seen in the Manhattan plots below (note that the scaling of the y-axis changes above the upper dashed line), \nall four approaches show very good agreement for the most balanced trait (CAD; case-control ratio=1:11), \nbut as the fraction of cases decreases BOLT-LMM tends to give inflated test statistics. \nHowever both <strong>regenie</strong> with Firth and SPA corrections, as well as SAIGE, which uses SPA correction, \nare all robust to this inflation and show similar agreement for the associations detected.</p>\n<p><img alt=\"BTmanP\" src=\"../img/ManP_exemplarBTs_WB.jpg\" />\n<br/></p>\n<p>We assessed the computational requirements of <strong>regenie</strong> and SAIGE using a larger set of 50 \nbinary traits from the UK Biobank that have a range of different case-control ratios and distinct missing data patterns. \n469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) \nand tests were performed on 11.4M imputed SNPs (step 2).\nIn step 1, <strong>regenie</strong> was run using LOOCV and for two traits SAIGE did not finish as it took longer than the 4-week limit. \nIn step 2, the approximate Firth correction was used in <strong>regenie</strong> in addition to SPA correction.</p>\n<p><img alt=\"BTtime\" src=\"../img/btTiming.png\" /></p>\n<p>From the table above, Step 1 of <strong>regenie</strong> was about 350x faster and required only $40\\%$ of the memory used by SAIGE. \nIn Step 2, <strong>regenie</strong> Firth and SPA were 2x and 3x faster than SAIGE in CPU time, respectively, but were 21x and 34x \nfaster than SAIGE in elapsed time, respectively, which suggests that <strong>regenie</strong> makes better use of parallelization in this step. \nOverall, <strong>regenie</strong> using Firth correction was 8x faster than SAIGE in CPU hours \nand 26.8x faster in elapsed time.</p>\n<p>All runs above were done on the same computing environment (16 virtual CPU cores of \na 2.1GHz AMD EPYC 7571 processor, 64GB of memory, and 600GB solid-state disk).</p>\n<h3 id=\"timings-improvements-in-v22\">Timings improvements in v2.2</h3>\n<p>We have several changes in <strong>regenie</strong> v2.2 to improve the computational efficiency:</p>\n<ul>\n<li>The genotype file reading in Step 1 is now multi-threaded for all supported formats (i.e. BED, PGEN, and BGEN) and uses a faster file reading implementation for BGEN v1.2 format with 8-bit encoding. From our timings experiments below, these changes helped reduce the CPU time by 40-60% depending on the input format. </li>\n</ul>\n<p><img alt=\"Step1time\" src=\"../img/timings_Step1.png\" /></p>\n<p><em>Note that we used a small number of SNPs for Step 1 in our experiments (20K) so the timing improvement will not be as high in a real Step 1 run where ~500K SNPs would be used.</em></p>\n<ul>\n<li>We have improved the implementation of the score tests for binary traits to reduce the number of matrix operations performed and this reduced the CPU timings by ~60% from the previous version 2.0.2.</li>\n</ul>\n<p><img alt=\"Step2time\" src=\"../img/timings_Step2_BT.png\" />\n<em>Note that there is an added memory cost of ~8NKP bytes [N=#samples; K=#covariates;P=#samples] so ~800MB extra for a UKB 500K run with 10 traits &amp; 20 covariates.</em></p>\n<ul>\n<li>We have also made use of the sparsity of the genotype vector for rarer variants in Step 2 (more so with binary traits) and this reduced the timing in our experiments by ~20% on average.</li>\n</ul>\n<p><img alt=\"Step2SparseTime\" src=\"../img/timings_Step2_BT_v2.2.png\" /></p>\n<p><em>In our experiments, common variants are defined as having MAF &gt; 5% and rare variants are defined as having MAF &lt; 1% and no correction (i.e. Firth/SPA) is used.</em></p>\n<ul>\n<li>We have added new options <code>--write-null-firth</code> and <code>--use-null-firth</code> to reduce the timing of Step 2 with approximate Firth when ran in parallel jobs split in smaller chunks within chromosomes. More specifically, <code>--write-null-firth</code> can be used in Step 1 to fit the null model for approximate Firth test and store the resulting estimates to file. Then in Step 2, specifying <code>--use-null-firth</code> will re-use these parameter estimates to reduce the timing of the approximate Firth null model fitting. \n<em>We thank Juha Karjalainen for suggesting this feature.</em></li>\n</ul>\n<p>Note: in our timings experiments, the PGEN genotype file only includes hard-calls. We ran a single trait in <strong>regenie</strong> and each setting was replicated 5 times.</p>\n<h3 id=\"gene-based-testing\">Gene-based testing</h3>\n<p><strong>regenie</strong> v3.0 adds in a wide range of <a href=\"../overview/#step-2-gene-based-testing\">gene-based tests</a>.\nWe have performed simulation experiments to assess the calibration of the tests with quantitative and binary traits \nusing real genetic data from the UK Biobank where we randomly selected 100,000 samples obtained from the set of white British participants\n(see the \"Methods\" section of the <a href=\"https://doi.org/10.1038/s41588-021-00870-7\">Regenie paper</a> for details on phenotype simulation\nwhere we set the heritability to 20%).</p>\n<p>Using whole exome sequencing data, we constructed variant sets incorporating functional annotations \n(LoF and missense, where missense vairants were predicted as deleterious \nusing a score based on 5 in-silico algorithms), \nas well as allele frequency thresholds focusing on rarer variation (1%, 0.1% and 0.01%).\nThe SKAT/ACAT tests were applied only to variant sets using a 1% or 0.01% AAF threshold and \nSBAT and BURDEN-ACAT joint tests combined all burden mask signals from the 1%, 0.1%, 0.01% and singleton thresholds.\n1000 genes on even chromosomes were randonly selected and tested for association (causal variants were on odd chromosomes).\nThe QQ plots below show the distribution p-values for each test across the different annotation categories (ran in Regenie v3.2).</p>\n<h4 id=\"quantitative-traits_1\">Quantitative traits</h4>\n<p>Using a 1% allele frequency cutoff for the SKAT/ACAT tests.\n<img alt=\"QT_gene\" src=\"../img/Gene_burden_QTsims.png\" /></p>\n<h4 id=\"binary-traits_1\">Binary traits</h4>\n<p>We simulated highly imbalanced phenotypes with a disease prevalence of 1%  (case-control ratio of 1:99)\nand applied Firth/SPA correction to the tests.</p>\n<p>Using a 1% allele frequency cutoff for the SKAT/ACAT tests.\n<img alt=\"BT_gene1\" src=\"../img/Gene_burden_BTsims_AF1pct.png\" /></p>\n<p>Using a 0.01% allele frequency cutoff for the SKAT/ACAT tests.\n<img alt=\"BT_gene_001\" src=\"../img/Gene_burden_BTsims_AF001pct.png\" /></p></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"../js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \"..\"</script>\n    \n    <script src=\"../js/base.js\"></script>\n    <script src=\"../search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n"
  },
  {
    "path": "docs/site/recommendations/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    \n    \n    \n    <link rel=\"shortcut icon\" href=\"../img/favicon.ico\">\n\n    \n    <title>UKBB Analysis - regenie</title>\n    \n\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/all.css\">\n    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.12.0/css/v4-shims.css\">\n    <link rel=\"stylesheet\" href=\"//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack.min.css\">\n    <link href='//rsms.me/inter/inter.css' rel='stylesheet' type='text/css'>\n    <link href='//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,600,700&subset=latin-ext,latin' rel='stylesheet' type='text/css'>\n    <link href=\"../css/bootstrap-custom.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/base.min.css\" rel=\"stylesheet\">\n    <link href=\"../css/cinder.min.css\" rel=\"stylesheet\">\n\n    \n    <link href=\"../custom/style.css\" rel=\"stylesheet\">\n\n    <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->\n    <!--[if lt IE 9]>\n            <script src=\"https://cdn.jsdelivr.net/npm/html5shiv@3.7.3/dist/html5shiv.min.js\"></script>\n            <script src=\"https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js\"></script>\n        <![endif]-->\n\n    \n    <script>\n    (function(i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r;\n        i[r] = i[r] || function() {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date();\n        a = s.createElement(o),\n        m = s.getElementsByTagName(o)[0];\n        a.async = 1;\n        a.src = g;\n        m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-166859683-2', 'auto');\n    ga('send', 'pageview');\n    </script>\n    \n\n     \n</head>\n\n<body>\n\n    <div class=\"navbar navbar-default navbar-fixed-top\" role=\"navigation\">\n    <div class=\"container\">\n\n        <!-- Collapsed navigation -->\n        <div class=\"navbar-header\">\n            <!-- Expander button -->\n            <button type=\"button\" class=\"navbar-toggle\" data-toggle=\"collapse\" data-target=\".navbar-collapse\">\n                <span class=\"sr-only\">Toggle navigation</span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n                <span class=\"icon-bar\"></span>\n            </button>\n            \n\n            <!-- Main title -->\n\n            <a class=\"navbar-brand\" href=\"..\">regenie</a>\n        </div>\n\n        <!-- Expanded navigation -->\n        <div class=\"navbar-collapse collapse\">\n                <!-- Main navigation -->\n                <ul class=\"nav navbar-nav\">\n                \n                \n                    <li >\n                        <a href=\"..\">Home</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../overview/\">Overview</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../install/\">Install</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../options/\">Documentation</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../performance/\">Performance</a>\n                    </li>\n                \n                \n                \n                    <li class=\"active\">\n                        <a href=\"./\">UKBB Analysis</a>\n                    </li>\n                \n                \n                \n                    <li >\n                        <a href=\"../faq/\">F.A.Q.</a>\n                    </li>\n                \n                \n                </ul>\n\n            <ul class=\"nav navbar-nav navbar-right\">\n                    <li>\n                        <a href=\"#\" data-toggle=\"modal\" data-target=\"#mkdocs_search_modal\">\n                            <i class=\"fas fa-search\"></i> Search\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"prev\" href=\"../performance/\">\n                            <i class=\"fas fa-arrow-left\"></i> Previous\n                        </a>\n                    </li>\n                    <li >\n                        <a rel=\"next\" href=\"../faq/\">\n                            Next <i class=\"fas fa-arrow-right\"></i>\n                        </a>\n                    </li>\n            </ul>\n        </div>\n    </div>\n</div>\n\n    <div class=\"container\">\n        \n        \n        <div class=\"col-md-3\"><div class=\"bs-sidebar hidden-print affix well\" role=\"complementary\">\n    <ul class=\"nav bs-sidenav\">\n        <li class=\"first-level active\"><a href=\"#recommendations-for-uk-biobank-analysis\">Recommendations for UK Biobank analysis</a></li>\n            <li class=\"second-level\"><a href=\"#pre-processing\">Pre-processing</a></li>\n                \n                <li class=\"third-level\"><a href=\"#selection-of-traits\">Selection of traits</a></li>\n                <li class=\"third-level\"><a href=\"#preparing-genotype-file\">Preparing genotype file</a></li>\n                <li class=\"third-level\"><a href=\"#exclusion-files\">Exclusion files</a></li>\n            <li class=\"second-level\"><a href=\"#step-1\">Step 1</a></li>\n                \n            <li class=\"second-level\"><a href=\"#step-2\">Step 2</a></li>\n                \n    </ul>\n</div></div>\n        <div class=\"col-md-9\" role=\"main\">\n\n<h2 id=\"recommendations-for-uk-biobank-analysis\">Recommendations for UK Biobank analysis</h2>\n<p><strong>regenie</strong> is ideally suited for large-scale analyses such as 500K UK\nBiobank (UKBB) data, where records are available for thousands of phenotypes.</p>\n<p>We provide below a few guidelines on how to perform such analysis on\nthe UKBB files that all UKBB approved researchers have access to.</p>\n<h3 id=\"pre-processing\">Pre-processing</h3>\n<p>We will first go over important steps to consider before running <strong>regenie</strong>.</p>\n<h4 id=\"selection-of-traits\">Selection of traits</h4>\n<p><strong>regenie</strong> can perform whole genome regression on multiple traits at once, which is where\nhigher computational gains are obtained.</p>\n<p>As different traits can have distinct missing patterns, <strong>regenie</strong> uses an imputation scheme\nto handle missing data.\nFrom the real data applications we have performed so far with traits having up to ~20% (for quantitative) \nand ~5% (for binary) missing observations, our imputation scheme resulted in \nnearly identical results as from discarding missing observations when analyzing each trait \nseparately (see the <a href=\"https://doi.org/10.1038/s41588-021-00870-7\">paper</a> for details). \nHence, we recommend to analyze traits in groups that have similar missingness patterns with resonably \nlow amount of missingness (&lt;15%).</p>\n<p>The number of phenotypes in a group will affect the computational resources required\nand the table below shows typical computational requirements based on using \n500,000 markers in step 1 split in blocks of 1000 and using blocks of size 200 when \ntesting SNPs in step 2. The estimates are shown when step 1 of <strong>regenie</strong> is run in low-memory mode \nso that within-block predictions are temporarily stored on disk (see Documentation).</p>\n<p><img alt=\"Rflow\" src=\"../img/comp_req_largeP.png\" style=\"text-align:center;padding: 10px;width:50%;display: block;margin-left: auto;margin-right: auto\" /></p>\n<p>In the following sections, we'll assume traits (let's say binary) and covariates \nused in the analysis have been chosen and data are in files\n<code>ukb_phenotypes_BT.txt</code> and <code>ukb_covariates.txt</code>,\nwhich follow the format requirement for <strong>regenie</strong> (see Documentation).</p>\n<h4 id=\"preparing-genotype-file\">Preparing genotype file</h4>\n<p>Step 1 of a <strong>regenie</strong> run requires a single genotype file as input; \nwe recommend using array genotypes for this step.\nThe UKBB genotype files are split by chromosome, so we recommend using\n<a href=\"http://www.cog-genomics.org/plink/\">PLINK</a> to merge the files using the following code.</p>\n<p><em>NOTE</em>: please change <strong>XXX</strong> to you own UKBB application ID number</p>\n<pre><code>rm -f list_beds.txt\nfor chr in {2..22}; do echo &quot;ukb_cal_chr${chr}_v2.bed ukb_snp_chr${chr}_v2.bim ukbXXX_int_chr1_v2_s488373.fam&quot; &gt;&gt; list_beds.txt; done\n\nplink \\\n  --bed ukb_cal_chr1_v2.bed \\\n  --bim ukb_snp_chr1_v2.bim \\\n  --fam ukbXXX_int_chr1_v2_s488373.fam \\\n  --merge-list list_beds.txt \\\n  --make-bed --out ukb_cal_allChrs\n</code></pre>\n<h4 id=\"exclusion-files\">Exclusion files</h4>\n<p>Quality control (QC) filters can be applied using <a href=\"https://www.cog-genomics.org/plink/2.0/\">PLINK2</a> to filter out samples and\nmarkers in the genotype file prior to step 1 of <strong>regenie</strong>.</p>\n<p>Note: <strong>regenie</strong> will throw an error if \na low-variance SNP is included in the step 1 run.\nHence, the user should run adequate QC filtering prior to running <strong>regenie</strong> \nto identify and remove such SNPs.</p>\n<p>For example, to filter out SNPs with \nminor allele frequency (MAF) below 1%, \nminor allele count (MAC) below 100, \ngenotype missingess above 10% and \nHardy-Weinberg equilibrium p-value exceeding <script type=\"math/tex\">10^{-15}</script>, and \nsamples with more than 10% missingness,</p>\n<pre><code>plink2 \\\n  --bfile ukb_cal_allChrs \\\n  --maf 0.01 --mac 100 --geno 0.1 --hwe 1e-15 \\\n  --mind 0.1 \\\n  --write-snplist --write-samples --no-id-header \\\n  --out qc_pass\n</code></pre>\n<h3 id=\"step-1\">Step 1</h3>\n<p>We recommend to run <strong>regenie</strong> using multi-threading (8+ threads) which will \ndecrease the overall runtime of the program. \nAs this step can be quite memory intensive (due to storing block predictions), \nwe recommend to use option <code>--lowmem</code>, where the number of phenotypes analyzed\nwill determine how much disk space is required (see table above).</p>\n<p>Running step 1 of <strong>regenie</strong> (by default, all available threads are used)</p>\n<pre><code>./regenie \\\n  --step 1 \\\n  --bed ukb_cal_allChrs \\\n  --extract qc_pass.snplist \\\n  --keep qc_pass.id \\\n  --phenoFile ukb_phenotypes_BT.txt \\\n  --covarFile ukb_covariates.txt \\\n  --bt \\\n  --bsize 1000 \\\n  --lowmem \\\n  --lowmem-prefix tmpdir/regenie_tmp_preds \\\n  --out ukb_step1_BT\n</code></pre>\n<p>For P phenotypes analyzed, this will generate a set of $P$ files ending with <code>.loco</code>\nwhich contain the genetic predictions using a LOCO scheme that will be needed for step 2,\nas well as a prediction list file <code>ukb_step1_BT_pred.list</code>, which lists \nthe names of these predictions files and can be used as input for step 2.</p>\n<h3 id=\"step-2\">Step 2</h3>\n<p>As step 1 and 2 are completely decoupled in <strong>regenie</strong>, you could either use all \nthe traits for testing in step 2 or select a subset of the traits to perform association testing.\nFurthermore, you can use the same Step 1 output to test on array, exome or \nimputed variants; below, we will illustrate testing on imputed variants.</p>\n<p>Step 2  of <strong>regenie</strong> can be run in parallel across chromosomes so if you have access to multiple machines, we recommend to split the runs over chromosomes (using 8+ threads).</p>\n<!---\n#### Sample mismatch \n\nIt may be that the genotype file used in step 2 does not contain all of the samples used in step 1 \nor contains additional samples not used in step 1. \nIn such a case, you could for example use the following code to only retain samples that are \ncontained in both data sets (we assume that you are testing on BGEN input file)\n\n\n<pre><code>expand -t 1 qc_pass.id &gt; qc_pass_space.id   # BGEN sample file is space-seperated\ngrep -wFf qc_pass_space.id ukbXXX_imp_chr1_v3_s487395.sample &gt; fid_iid_step2.keep\n</code></pre>\n\n--->\n\n<p>Running <strong>regenie</strong> tesing on a single chromosome (here chromosome 1) \nand using the fast Firth correction as fallback for p-values below 0.01</p>\n<pre><code>./regenie \\\n  --step 2 \\\n  --bgen ukb_imp_chr1_v3.bgen \\\n  --ref-first \\\n  --sample ukbXXX_imp_chr1_v3_s487395.sample \\\n  --phenoFile ukb_phenotypes_BT.txt \\\n  --covarFile ukb_covariates.txt \\\n  --bt \\\n  --firth --approx --pThresh 0.01 \\\n  --pred ukb_step1_BT_pred.list \\\n  --bsize 400 \\\n  --split \\\n  --out ukb_step2_BT_chr1\n\n</code></pre>\n<p>This will create separate association results files for each phenotype as <code>ukb_step2_BT_chr1_*.regenie</code>.</p>\n<p>When running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and \ninstead parallelize the runs over partitions of the genome (e.g. groups of genes).</p></div>\n        \n        \n    </div>\n\n    <footer class=\"col-md-12 text-center\">\n        \n        <hr>\n        <p>\n        <small>REGENIE is licensed under the <a href='https://github.com/rgcgithub/regenie/blob/master/LICENSE'>MIT license</small><br>\n        \n        <small>Documentation built with <a href=\"http://www.mkdocs.org/\">MkDocs</a>.</small>\n        </p>\n\n        \n        \n    </footer>\n    <script src=\"//ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js\"></script>\n    <script src=\"../js/bootstrap-3.0.3.min.js\"></script>\n    \n    <script>var base_url = \"..\"</script>\n    \n    <script src=\"../js/base.js\"></script>\n    <script src=\"../search/main.js\"></script>\n\n  <script type=\"text/javascript\"\n          src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML\"></script>\n\n  <script type=\"text/x-mathjax-config\">\n    MathJax.Hub.Config({\ntex2jax: {\ninlineMath: [['$','$'], ['\\\\(','\\\\)']],\nprocessEscapes: true},\njax: [\"input/TeX\",\"input/MathML\",\"input/AsciiMath\",\"output/CommonHTML\"],\nextensions: [\"tex2jax.js\",\"mml2jax.js\",\"asciimath2jax.js\",\"MathMenu.js\",\"MathZoom.js\",\"AssistiveMML.js\", \"[Contrib]/a11y/accessibility-menu.js\"],\nTeX: {\nextensions: [\"AMSmath.js\",\"AMSsymbols.js\",\"noErrors.js\",\"noUndefined.js\"],\nequationNumbers: {\nautoNumber: \"AMS\"\n}\n}\n});\n  </script>\n\n    <div class=\"modal\" id=\"mkdocs_search_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"searchModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog modal-lg\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\">\n                    <span aria-hidden=\"true\">&times;</span>\n                    <span class=\"sr-only\">Close</span>\n                </button>\n                <h4 class=\"modal-title\" id=\"searchModalLabel\">Search</h4>\n            </div>\n            <div class=\"modal-body\">\n                <p>\n                    From here you can search these documents. Enter\n                    your search terms below.\n                </p>\n                <form>\n                    <div class=\"form-group\">\n                        <input type=\"text\" class=\"form-control\" placeholder=\"Search...\" id=\"mkdocs-search-query\" title=\"Type search term here\">\n                    </div>\n                </form>\n                <div id=\"mkdocs-search-results\"></div>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div><div class=\"modal\" id=\"mkdocs_keyboard_modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"keyboardModalLabel\" aria-hidden=\"true\">\n    <div class=\"modal-dialog\">\n        <div class=\"modal-content\">\n            <div class=\"modal-header\">\n                <h4 class=\"modal-title\" id=\"keyboardModalLabel\">Keyboard Shortcuts</h4>\n                <button type=\"button\" class=\"close\" data-dismiss=\"modal\"><span aria-hidden=\"true\">&times;</span><span class=\"sr-only\">Close</span></button>\n            </div>\n            <div class=\"modal-body\">\n              <table class=\"table\">\n                <thead>\n                  <tr>\n                    <th style=\"width: 20%;\">Keys</th>\n                    <th>Action</th>\n                  </tr>\n                </thead>\n                <tbody>\n                  <tr>\n                    <td class=\"help shortcut\"><kbd>?</kbd></td>\n                    <td>Open this help</td>\n                  </tr>\n                  <tr>\n                    <td class=\"next shortcut\"><kbd>n</kbd></td>\n                    <td>Next page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"prev shortcut\"><kbd>p</kbd></td>\n                    <td>Previous page</td>\n                  </tr>\n                  <tr>\n                    <td class=\"search shortcut\"><kbd>s</kbd></td>\n                    <td>Search</td>\n                  </tr>\n                </tbody>\n              </table>\n            </div>\n            <div class=\"modal-footer\">\n            </div>\n        </div>\n    </div>\n</div>\n    </body>\n\n\n</html>\n"
  },
  {
    "path": "docs/site/search/lunr.js",
    "content": "/**\n * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.9\n * Copyright (C) 2020 Oliver Nightingale\n * @license MIT\n */\n\n;(function(){\n\n/**\n * A convenience function for configuring and constructing\n * a new lunr Index.\n *\n * A lunr.Builder instance is created and the pipeline setup\n * with a trimmer, stop word filter and stemmer.\n *\n * This builder object is yielded to the configuration function\n * that is passed as a parameter, allowing the list of fields\n * and other builder parameters to be customised.\n *\n * All documents _must_ be added within the passed config function.\n *\n * @example\n * var idx = lunr(function () {\n *   this.field('title')\n *   this.field('body')\n *   this.ref('id')\n *\n *   documents.forEach(function (doc) {\n *     this.add(doc)\n *   }, this)\n * })\n *\n * @see {@link lunr.Builder}\n * @see {@link lunr.Pipeline}\n * @see {@link lunr.trimmer}\n * @see {@link lunr.stopWordFilter}\n * @see {@link lunr.stemmer}\n * @namespace {function} lunr\n */\nvar lunr = function (config) {\n  var builder = new lunr.Builder\n\n  builder.pipeline.add(\n    lunr.trimmer,\n    lunr.stopWordFilter,\n    lunr.stemmer\n  )\n\n  builder.searchPipeline.add(\n    lunr.stemmer\n  )\n\n  config.call(builder, builder)\n  return builder.build()\n}\n\nlunr.version = \"2.3.9\"\n/*!\n * lunr.utils\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * A namespace containing utils for the rest of the lunr library\n * @namespace lunr.utils\n */\nlunr.utils = {}\n\n/**\n * Print a warning message to the console.\n *\n * @param {String} message The message to be printed.\n * @memberOf lunr.utils\n * @function\n */\nlunr.utils.warn = (function (global) {\n  /* eslint-disable no-console */\n  return function (message) {\n    if (global.console && console.warn) {\n      console.warn(message)\n    }\n  }\n  /* eslint-enable no-console */\n})(this)\n\n/**\n * Convert an object to a string.\n *\n * In the case of `null` and `undefined` the function returns\n * the empty string, in all other cases the result of calling\n * `toString` on the passed object is returned.\n *\n * @param {Any} obj The object to convert to a string.\n * @return {String} string representation of the passed object.\n * @memberOf lunr.utils\n */\nlunr.utils.asString = function (obj) {\n  if (obj === void 0 || obj === null) {\n    return \"\"\n  } else {\n    return obj.toString()\n  }\n}\n\n/**\n * Clones an object.\n *\n * Will create a copy of an existing object such that any mutations\n * on the copy cannot affect the original.\n *\n * Only shallow objects are supported, passing a nested object to this\n * function will cause a TypeError.\n *\n * Objects with primitives, and arrays of primitives are supported.\n *\n * @param {Object} obj The object to clone.\n * @return {Object} a clone of the passed object.\n * @throws {TypeError} when a nested object is passed.\n * @memberOf Utils\n */\nlunr.utils.clone = function (obj) {\n  if (obj === null || obj === undefined) {\n    return obj\n  }\n\n  var clone = Object.create(null),\n      keys = Object.keys(obj)\n\n  for (var i = 0; i < keys.length; i++) {\n    var key = keys[i],\n        val = obj[key]\n\n    if (Array.isArray(val)) {\n      clone[key] = val.slice()\n      continue\n    }\n\n    if (typeof val === 'string' ||\n        typeof val === 'number' ||\n        typeof val === 'boolean') {\n      clone[key] = val\n      continue\n    }\n\n    throw new TypeError(\"clone is not deep and does not support nested objects\")\n  }\n\n  return clone\n}\nlunr.FieldRef = function (docRef, fieldName, stringValue) {\n  this.docRef = docRef\n  this.fieldName = fieldName\n  this._stringValue = stringValue\n}\n\nlunr.FieldRef.joiner = \"/\"\n\nlunr.FieldRef.fromString = function (s) {\n  var n = s.indexOf(lunr.FieldRef.joiner)\n\n  if (n === -1) {\n    throw \"malformed field ref string\"\n  }\n\n  var fieldRef = s.slice(0, n),\n      docRef = s.slice(n + 1)\n\n  return new lunr.FieldRef (docRef, fieldRef, s)\n}\n\nlunr.FieldRef.prototype.toString = function () {\n  if (this._stringValue == undefined) {\n    this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef\n  }\n\n  return this._stringValue\n}\n/*!\n * lunr.Set\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * A lunr set.\n *\n * @constructor\n */\nlunr.Set = function (elements) {\n  this.elements = Object.create(null)\n\n  if (elements) {\n    this.length = elements.length\n\n    for (var i = 0; i < this.length; i++) {\n      this.elements[elements[i]] = true\n    }\n  } else {\n    this.length = 0\n  }\n}\n\n/**\n * A complete set that contains all elements.\n *\n * @static\n * @readonly\n * @type {lunr.Set}\n */\nlunr.Set.complete = {\n  intersect: function (other) {\n    return other\n  },\n\n  union: function () {\n    return this\n  },\n\n  contains: function () {\n    return true\n  }\n}\n\n/**\n * An empty set that contains no elements.\n *\n * @static\n * @readonly\n * @type {lunr.Set}\n */\nlunr.Set.empty = {\n  intersect: function () {\n    return this\n  },\n\n  union: function (other) {\n    return other\n  },\n\n  contains: function () {\n    return false\n  }\n}\n\n/**\n * Returns true if this set contains the specified object.\n *\n * @param {object} object - Object whose presence in this set is to be tested.\n * @returns {boolean} - True if this set contains the specified object.\n */\nlunr.Set.prototype.contains = function (object) {\n  return !!this.elements[object]\n}\n\n/**\n * Returns a new set containing only the elements that are present in both\n * this set and the specified set.\n *\n * @param {lunr.Set} other - set to intersect with this set.\n * @returns {lunr.Set} a new set that is the intersection of this and the specified set.\n */\n\nlunr.Set.prototype.intersect = function (other) {\n  var a, b, elements, intersection = []\n\n  if (other === lunr.Set.complete) {\n    return this\n  }\n\n  if (other === lunr.Set.empty) {\n    return other\n  }\n\n  if (this.length < other.length) {\n    a = this\n    b = other\n  } else {\n    a = other\n    b = this\n  }\n\n  elements = Object.keys(a.elements)\n\n  for (var i = 0; i < elements.length; i++) {\n    var element = elements[i]\n    if (element in b.elements) {\n      intersection.push(element)\n    }\n  }\n\n  return new lunr.Set (intersection)\n}\n\n/**\n * Returns a new set combining the elements of this and the specified set.\n *\n * @param {lunr.Set} other - set to union with this set.\n * @return {lunr.Set} a new set that is the union of this and the specified set.\n */\n\nlunr.Set.prototype.union = function (other) {\n  if (other === lunr.Set.complete) {\n    return lunr.Set.complete\n  }\n\n  if (other === lunr.Set.empty) {\n    return this\n  }\n\n  return new lunr.Set(Object.keys(this.elements).concat(Object.keys(other.elements)))\n}\n/**\n * A function to calculate the inverse document frequency for\n * a posting. This is shared between the builder and the index\n *\n * @private\n * @param {object} posting - The posting for a given term\n * @param {number} documentCount - The total number of documents.\n */\nlunr.idf = function (posting, documentCount) {\n  var documentsWithTerm = 0\n\n  for (var fieldName in posting) {\n    if (fieldName == '_index') continue // Ignore the term index, its not a field\n    documentsWithTerm += Object.keys(posting[fieldName]).length\n  }\n\n  var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5)\n\n  return Math.log(1 + Math.abs(x))\n}\n\n/**\n * A token wraps a string representation of a token\n * as it is passed through the text processing pipeline.\n *\n * @constructor\n * @param {string} [str=''] - The string token being wrapped.\n * @param {object} [metadata={}] - Metadata associated with this token.\n */\nlunr.Token = function (str, metadata) {\n  this.str = str || \"\"\n  this.metadata = metadata || {}\n}\n\n/**\n * Returns the token string that is being wrapped by this object.\n *\n * @returns {string}\n */\nlunr.Token.prototype.toString = function () {\n  return this.str\n}\n\n/**\n * A token update function is used when updating or optionally\n * when cloning a token.\n *\n * @callback lunr.Token~updateFunction\n * @param {string} str - The string representation of the token.\n * @param {Object} metadata - All metadata associated with this token.\n */\n\n/**\n * Applies the given function to the wrapped string token.\n *\n * @example\n * token.update(function (str, metadata) {\n *   return str.toUpperCase()\n * })\n *\n * @param {lunr.Token~updateFunction} fn - A function to apply to the token string.\n * @returns {lunr.Token}\n */\nlunr.Token.prototype.update = function (fn) {\n  this.str = fn(this.str, this.metadata)\n  return this\n}\n\n/**\n * Creates a clone of this token. Optionally a function can be\n * applied to the cloned token.\n *\n * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.\n * @returns {lunr.Token}\n */\nlunr.Token.prototype.clone = function (fn) {\n  fn = fn || function (s) { return s }\n  return new lunr.Token (fn(this.str, this.metadata), this.metadata)\n}\n/*!\n * lunr.tokenizer\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * A function for splitting a string into tokens ready to be inserted into\n * the search index. Uses `lunr.tokenizer.separator` to split strings, change\n * the value of this property to change how strings are split into tokens.\n *\n * This tokenizer will convert its parameter to a string by calling `toString` and\n * then will split this string on the character in `lunr.tokenizer.separator`.\n * Arrays will have their elements converted to strings and wrapped in a lunr.Token.\n *\n * Optional metadata can be passed to the tokenizer, this metadata will be cloned and\n * added as metadata to every token that is created from the object to be tokenized.\n *\n * @static\n * @param {?(string|object|object[])} obj - The object to convert into tokens\n * @param {?object} metadata - Optional metadata to associate with every token\n * @returns {lunr.Token[]}\n * @see {@link lunr.Pipeline}\n */\nlunr.tokenizer = function (obj, metadata) {\n  if (obj == null || obj == undefined) {\n    return []\n  }\n\n  if (Array.isArray(obj)) {\n    return obj.map(function (t) {\n      return new lunr.Token(\n        lunr.utils.asString(t).toLowerCase(),\n        lunr.utils.clone(metadata)\n      )\n    })\n  }\n\n  var str = obj.toString().toLowerCase(),\n      len = str.length,\n      tokens = []\n\n  for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {\n    var char = str.charAt(sliceEnd),\n        sliceLength = sliceEnd - sliceStart\n\n    if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {\n\n      if (sliceLength > 0) {\n        var tokenMetadata = lunr.utils.clone(metadata) || {}\n        tokenMetadata[\"position\"] = [sliceStart, sliceLength]\n        tokenMetadata[\"index\"] = tokens.length\n\n        tokens.push(\n          new lunr.Token (\n            str.slice(sliceStart, sliceEnd),\n            tokenMetadata\n          )\n        )\n      }\n\n      sliceStart = sliceEnd + 1\n    }\n\n  }\n\n  return tokens\n}\n\n/**\n * The separator used to split a string into tokens. Override this property to change the behaviour of\n * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.\n *\n * @static\n * @see lunr.tokenizer\n */\nlunr.tokenizer.separator = /[\\s\\-]+/\n/*!\n * lunr.Pipeline\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * lunr.Pipelines maintain an ordered list of functions to be applied to all\n * tokens in documents entering the search index and queries being ran against\n * the index.\n *\n * An instance of lunr.Index created with the lunr shortcut will contain a\n * pipeline with a stop word filter and an English language stemmer. Extra\n * functions can be added before or after either of these functions or these\n * default functions can be removed.\n *\n * When run the pipeline will call each function in turn, passing a token, the\n * index of that token in the original list of all tokens and finally a list of\n * all the original tokens.\n *\n * The output of functions in the pipeline will be passed to the next function\n * in the pipeline. To exclude a token from entering the index the function\n * should return undefined, the rest of the pipeline will not be called with\n * this token.\n *\n * For serialisation of pipelines to work, all functions used in an instance of\n * a pipeline should be registered with lunr.Pipeline. Registered functions can\n * then be loaded. If trying to load a serialised pipeline that uses functions\n * that are not registered an error will be thrown.\n *\n * If not planning on serialising the pipeline then registering pipeline functions\n * is not necessary.\n *\n * @constructor\n */\nlunr.Pipeline = function () {\n  this._stack = []\n}\n\nlunr.Pipeline.registeredFunctions = Object.create(null)\n\n/**\n * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token\n * string as well as all known metadata. A pipeline function can mutate the token string\n * or mutate (or add) metadata for a given token.\n *\n * A pipeline function can indicate that the passed token should be discarded by returning\n * null, undefined or an empty string. This token will not be passed to any downstream pipeline\n * functions and will not be added to the index.\n *\n * Multiple tokens can be returned by returning an array of tokens. Each token will be passed\n * to any downstream pipeline functions and all will returned tokens will be added to the index.\n *\n * Any number of pipeline functions may be chained together using a lunr.Pipeline.\n *\n * @interface lunr.PipelineFunction\n * @param {lunr.Token} token - A token from the document being processed.\n * @param {number} i - The index of this token in the complete list of tokens for this document/field.\n * @param {lunr.Token[]} tokens - All tokens for this document/field.\n * @returns {(?lunr.Token|lunr.Token[])}\n */\n\n/**\n * Register a function with the pipeline.\n *\n * Functions that are used in the pipeline should be registered if the pipeline\n * needs to be serialised, or a serialised pipeline needs to be loaded.\n *\n * Registering a function does not add it to a pipeline, functions must still be\n * added to instances of the pipeline for them to be used when running a pipeline.\n *\n * @param {lunr.PipelineFunction} fn - The function to check for.\n * @param {String} label - The label to register this function with\n */\nlunr.Pipeline.registerFunction = function (fn, label) {\n  if (label in this.registeredFunctions) {\n    lunr.utils.warn('Overwriting existing registered function: ' + label)\n  }\n\n  fn.label = label\n  lunr.Pipeline.registeredFunctions[fn.label] = fn\n}\n\n/**\n * Warns if the function is not registered as a Pipeline function.\n *\n * @param {lunr.PipelineFunction} fn - The function to check for.\n * @private\n */\nlunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {\n  var isRegistered = fn.label && (fn.label in this.registeredFunctions)\n\n  if (!isRegistered) {\n    lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\\n', fn)\n  }\n}\n\n/**\n * Loads a previously serialised pipeline.\n *\n * All functions to be loaded must already be registered with lunr.Pipeline.\n * If any function from the serialised data has not been registered then an\n * error will be thrown.\n *\n * @param {Object} serialised - The serialised pipeline to load.\n * @returns {lunr.Pipeline}\n */\nlunr.Pipeline.load = function (serialised) {\n  var pipeline = new lunr.Pipeline\n\n  serialised.forEach(function (fnName) {\n    var fn = lunr.Pipeline.registeredFunctions[fnName]\n\n    if (fn) {\n      pipeline.add(fn)\n    } else {\n      throw new Error('Cannot load unregistered function: ' + fnName)\n    }\n  })\n\n  return pipeline\n}\n\n/**\n * Adds new functions to the end of the pipeline.\n *\n * Logs a warning if the function has not been registered.\n *\n * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.\n */\nlunr.Pipeline.prototype.add = function () {\n  var fns = Array.prototype.slice.call(arguments)\n\n  fns.forEach(function (fn) {\n    lunr.Pipeline.warnIfFunctionNotRegistered(fn)\n    this._stack.push(fn)\n  }, this)\n}\n\n/**\n * Adds a single function after a function that already exists in the\n * pipeline.\n *\n * Logs a warning if the function has not been registered.\n *\n * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.\n * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.\n */\nlunr.Pipeline.prototype.after = function (existingFn, newFn) {\n  lunr.Pipeline.warnIfFunctionNotRegistered(newFn)\n\n  var pos = this._stack.indexOf(existingFn)\n  if (pos == -1) {\n    throw new Error('Cannot find existingFn')\n  }\n\n  pos = pos + 1\n  this._stack.splice(pos, 0, newFn)\n}\n\n/**\n * Adds a single function before a function that already exists in the\n * pipeline.\n *\n * Logs a warning if the function has not been registered.\n *\n * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.\n * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.\n */\nlunr.Pipeline.prototype.before = function (existingFn, newFn) {\n  lunr.Pipeline.warnIfFunctionNotRegistered(newFn)\n\n  var pos = this._stack.indexOf(existingFn)\n  if (pos == -1) {\n    throw new Error('Cannot find existingFn')\n  }\n\n  this._stack.splice(pos, 0, newFn)\n}\n\n/**\n * Removes a function from the pipeline.\n *\n * @param {lunr.PipelineFunction} fn The function to remove from the pipeline.\n */\nlunr.Pipeline.prototype.remove = function (fn) {\n  var pos = this._stack.indexOf(fn)\n  if (pos == -1) {\n    return\n  }\n\n  this._stack.splice(pos, 1)\n}\n\n/**\n * Runs the current list of functions that make up the pipeline against the\n * passed tokens.\n *\n * @param {Array} tokens The tokens to run through the pipeline.\n * @returns {Array}\n */\nlunr.Pipeline.prototype.run = function (tokens) {\n  var stackLength = this._stack.length\n\n  for (var i = 0; i < stackLength; i++) {\n    var fn = this._stack[i]\n    var memo = []\n\n    for (var j = 0; j < tokens.length; j++) {\n      var result = fn(tokens[j], j, tokens)\n\n      if (result === null || result === void 0 || result === '') continue\n\n      if (Array.isArray(result)) {\n        for (var k = 0; k < result.length; k++) {\n          memo.push(result[k])\n        }\n      } else {\n        memo.push(result)\n      }\n    }\n\n    tokens = memo\n  }\n\n  return tokens\n}\n\n/**\n * Convenience method for passing a string through a pipeline and getting\n * strings out. This method takes care of wrapping the passed string in a\n * token and mapping the resulting tokens back to strings.\n *\n * @param {string} str - The string to pass through the pipeline.\n * @param {?object} metadata - Optional metadata to associate with the token\n * passed to the pipeline.\n * @returns {string[]}\n */\nlunr.Pipeline.prototype.runString = function (str, metadata) {\n  var token = new lunr.Token (str, metadata)\n\n  return this.run([token]).map(function (t) {\n    return t.toString()\n  })\n}\n\n/**\n * Resets the pipeline by removing any existing processors.\n *\n */\nlunr.Pipeline.prototype.reset = function () {\n  this._stack = []\n}\n\n/**\n * Returns a representation of the pipeline ready for serialisation.\n *\n * Logs a warning if the function has not been registered.\n *\n * @returns {Array}\n */\nlunr.Pipeline.prototype.toJSON = function () {\n  return this._stack.map(function (fn) {\n    lunr.Pipeline.warnIfFunctionNotRegistered(fn)\n\n    return fn.label\n  })\n}\n/*!\n * lunr.Vector\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * A vector is used to construct the vector space of documents and queries. These\n * vectors support operations to determine the similarity between two documents or\n * a document and a query.\n *\n * Normally no parameters are required for initializing a vector, but in the case of\n * loading a previously dumped vector the raw elements can be provided to the constructor.\n *\n * For performance reasons vectors are implemented with a flat array, where an elements\n * index is immediately followed by its value. E.g. [index, value, index, value]. This\n * allows the underlying array to be as sparse as possible and still offer decent\n * performance when being used for vector calculations.\n *\n * @constructor\n * @param {Number[]} [elements] - The flat list of element index and element value pairs.\n */\nlunr.Vector = function (elements) {\n  this._magnitude = 0\n  this.elements = elements || []\n}\n\n\n/**\n * Calculates the position within the vector to insert a given index.\n *\n * This is used internally by insert and upsert. If there are duplicate indexes then\n * the position is returned as if the value for that index were to be updated, but it\n * is the callers responsibility to check whether there is a duplicate at that index\n *\n * @param {Number} insertIdx - The index at which the element should be inserted.\n * @returns {Number}\n */\nlunr.Vector.prototype.positionForIndex = function (index) {\n  // For an empty vector the tuple can be inserted at the beginning\n  if (this.elements.length == 0) {\n    return 0\n  }\n\n  var start = 0,\n      end = this.elements.length / 2,\n      sliceLength = end - start,\n      pivotPoint = Math.floor(sliceLength / 2),\n      pivotIndex = this.elements[pivotPoint * 2]\n\n  while (sliceLength > 1) {\n    if (pivotIndex < index) {\n      start = pivotPoint\n    }\n\n    if (pivotIndex > index) {\n      end = pivotPoint\n    }\n\n    if (pivotIndex == index) {\n      break\n    }\n\n    sliceLength = end - start\n    pivotPoint = start + Math.floor(sliceLength / 2)\n    pivotIndex = this.elements[pivotPoint * 2]\n  }\n\n  if (pivotIndex == index) {\n    return pivotPoint * 2\n  }\n\n  if (pivotIndex > index) {\n    return pivotPoint * 2\n  }\n\n  if (pivotIndex < index) {\n    return (pivotPoint + 1) * 2\n  }\n}\n\n/**\n * Inserts an element at an index within the vector.\n *\n * Does not allow duplicates, will throw an error if there is already an entry\n * for this index.\n *\n * @param {Number} insertIdx - The index at which the element should be inserted.\n * @param {Number} val - The value to be inserted into the vector.\n */\nlunr.Vector.prototype.insert = function (insertIdx, val) {\n  this.upsert(insertIdx, val, function () {\n    throw \"duplicate index\"\n  })\n}\n\n/**\n * Inserts or updates an existing index within the vector.\n *\n * @param {Number} insertIdx - The index at which the element should be inserted.\n * @param {Number} val - The value to be inserted into the vector.\n * @param {function} fn - A function that is called for updates, the existing value and the\n * requested value are passed as arguments\n */\nlunr.Vector.prototype.upsert = function (insertIdx, val, fn) {\n  this._magnitude = 0\n  var position = this.positionForIndex(insertIdx)\n\n  if (this.elements[position] == insertIdx) {\n    this.elements[position + 1] = fn(this.elements[position + 1], val)\n  } else {\n    this.elements.splice(position, 0, insertIdx, val)\n  }\n}\n\n/**\n * Calculates the magnitude of this vector.\n *\n * @returns {Number}\n */\nlunr.Vector.prototype.magnitude = function () {\n  if (this._magnitude) return this._magnitude\n\n  var sumOfSquares = 0,\n      elementsLength = this.elements.length\n\n  for (var i = 1; i < elementsLength; i += 2) {\n    var val = this.elements[i]\n    sumOfSquares += val * val\n  }\n\n  return this._magnitude = Math.sqrt(sumOfSquares)\n}\n\n/**\n * Calculates the dot product of this vector and another vector.\n *\n * @param {lunr.Vector} otherVector - The vector to compute the dot product with.\n * @returns {Number}\n */\nlunr.Vector.prototype.dot = function (otherVector) {\n  var dotProduct = 0,\n      a = this.elements, b = otherVector.elements,\n      aLen = a.length, bLen = b.length,\n      aVal = 0, bVal = 0,\n      i = 0, j = 0\n\n  while (i < aLen && j < bLen) {\n    aVal = a[i], bVal = b[j]\n    if (aVal < bVal) {\n      i += 2\n    } else if (aVal > bVal) {\n      j += 2\n    } else if (aVal == bVal) {\n      dotProduct += a[i + 1] * b[j + 1]\n      i += 2\n      j += 2\n    }\n  }\n\n  return dotProduct\n}\n\n/**\n * Calculates the similarity between this vector and another vector.\n *\n * @param {lunr.Vector} otherVector - The other vector to calculate the\n * similarity with.\n * @returns {Number}\n */\nlunr.Vector.prototype.similarity = function (otherVector) {\n  return this.dot(otherVector) / this.magnitude() || 0\n}\n\n/**\n * Converts the vector to an array of the elements within the vector.\n *\n * @returns {Number[]}\n */\nlunr.Vector.prototype.toArray = function () {\n  var output = new Array (this.elements.length / 2)\n\n  for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {\n    output[j] = this.elements[i]\n  }\n\n  return output\n}\n\n/**\n * A JSON serializable representation of the vector.\n *\n * @returns {Number[]}\n */\nlunr.Vector.prototype.toJSON = function () {\n  return this.elements\n}\n/* eslint-disable */\n/*!\n * lunr.stemmer\n * Copyright (C) 2020 Oliver Nightingale\n * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt\n */\n\n/**\n * lunr.stemmer is an english language stemmer, this is a JavaScript\n * implementation of the PorterStemmer taken from http://tartarus.org/~martin\n *\n * @static\n * @implements {lunr.PipelineFunction}\n * @param {lunr.Token} token - The string to stem\n * @returns {lunr.Token}\n * @see {@link lunr.Pipeline}\n * @function\n */\nlunr.stemmer = (function(){\n  var step2list = {\n      \"ational\" : \"ate\",\n      \"tional\" : \"tion\",\n      \"enci\" : \"ence\",\n      \"anci\" : \"ance\",\n      \"izer\" : \"ize\",\n      \"bli\" : \"ble\",\n      \"alli\" : \"al\",\n      \"entli\" : \"ent\",\n      \"eli\" : \"e\",\n      \"ousli\" : \"ous\",\n      \"ization\" : \"ize\",\n      \"ation\" : \"ate\",\n      \"ator\" : \"ate\",\n      \"alism\" : \"al\",\n      \"iveness\" : \"ive\",\n      \"fulness\" : \"ful\",\n      \"ousness\" : \"ous\",\n      \"aliti\" : \"al\",\n      \"iviti\" : \"ive\",\n      \"biliti\" : \"ble\",\n      \"logi\" : \"log\"\n    },\n\n    step3list = {\n      \"icate\" : \"ic\",\n      \"ative\" : \"\",\n      \"alize\" : \"al\",\n      \"iciti\" : \"ic\",\n      \"ical\" : \"ic\",\n      \"ful\" : \"\",\n      \"ness\" : \"\"\n    },\n\n    c = \"[^aeiou]\",          // consonant\n    v = \"[aeiouy]\",          // vowel\n    C = c + \"[^aeiouy]*\",    // consonant sequence\n    V = v + \"[aeiou]*\",      // vowel sequence\n\n    mgr0 = \"^(\" + C + \")?\" + V + C,               // [C]VC... is m>0\n    meq1 = \"^(\" + C + \")?\" + V + C + \"(\" + V + \")?$\",  // [C]VC[V] is m=1\n    mgr1 = \"^(\" + C + \")?\" + V + C + V + C,       // [C]VCVC... is m>1\n    s_v = \"^(\" + C + \")?\" + v;                   // vowel in stem\n\n  var re_mgr0 = new RegExp(mgr0);\n  var re_mgr1 = new RegExp(mgr1);\n  var re_meq1 = new RegExp(meq1);\n  var re_s_v = new RegExp(s_v);\n\n  var re_1a = /^(.+?)(ss|i)es$/;\n  var re2_1a = /^(.+?)([^s])s$/;\n  var re_1b = /^(.+?)eed$/;\n  var re2_1b = /^(.+?)(ed|ing)$/;\n  var re_1b_2 = /.$/;\n  var re2_1b_2 = /(at|bl|iz)$/;\n  var re3_1b_2 = new RegExp(\"([^aeiouylsz])\\\\1$\");\n  var re4_1b_2 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n\n  var re_1c = /^(.+?[^aeiou])y$/;\n  var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;\n\n  var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;\n\n  var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;\n  var re2_4 = /^(.+?)(s|t)(ion)$/;\n\n  var re_5 = /^(.+?)e$/;\n  var re_5_1 = /ll$/;\n  var re3_5 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n\n  var porterStemmer = function porterStemmer(w) {\n    var stem,\n      suffix,\n      firstch,\n      re,\n      re2,\n      re3,\n      re4;\n\n    if (w.length < 3) { return w; }\n\n    firstch = w.substr(0,1);\n    if (firstch == \"y\") {\n      w = firstch.toUpperCase() + w.substr(1);\n    }\n\n    // Step 1a\n    re = re_1a\n    re2 = re2_1a;\n\n    if (re.test(w)) { w = w.replace(re,\"$1$2\"); }\n    else if (re2.test(w)) { w = w.replace(re2,\"$1$2\"); }\n\n    // Step 1b\n    re = re_1b;\n    re2 = re2_1b;\n    if (re.test(w)) {\n      var fp = re.exec(w);\n      re = re_mgr0;\n      if (re.test(fp[1])) {\n        re = re_1b_2;\n        w = w.replace(re,\"\");\n      }\n    } else if (re2.test(w)) {\n      var fp = re2.exec(w);\n      stem = fp[1];\n      re2 = re_s_v;\n      if (re2.test(stem)) {\n        w = stem;\n        re2 = re2_1b_2;\n        re3 = re3_1b_2;\n        re4 = re4_1b_2;\n        if (re2.test(w)) { w = w + \"e\"; }\n        else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,\"\"); }\n        else if (re4.test(w)) { w = w + \"e\"; }\n      }\n    }\n\n    // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)\n    re = re_1c;\n    if (re.test(w)) {\n      var fp = re.exec(w);\n      stem = fp[1];\n      w = stem + \"i\";\n    }\n\n    // Step 2\n    re = re_2;\n    if (re.test(w)) {\n      var fp = re.exec(w);\n      stem = fp[1];\n      suffix = fp[2];\n      re = re_mgr0;\n      if (re.test(stem)) {\n        w = stem + step2list[suffix];\n      }\n    }\n\n    // Step 3\n    re = re_3;\n    if (re.test(w)) {\n      var fp = re.exec(w);\n      stem = fp[1];\n      suffix = fp[2];\n      re = re_mgr0;\n      if (re.test(stem)) {\n        w = stem + step3list[suffix];\n      }\n    }\n\n    // Step 4\n    re = re_4;\n    re2 = re2_4;\n    if (re.test(w)) {\n      var fp = re.exec(w);\n      stem = fp[1];\n      re = re_mgr1;\n      if (re.test(stem)) {\n        w = stem;\n      }\n    } else if (re2.test(w)) {\n      var fp = re2.exec(w);\n      stem = fp[1] + fp[2];\n      re2 = re_mgr1;\n      if (re2.test(stem)) {\n        w = stem;\n      }\n    }\n\n    // Step 5\n    re = re_5;\n    if (re.test(w)) {\n      var fp = re.exec(w);\n      stem = fp[1];\n      re = re_mgr1;\n      re2 = re_meq1;\n      re3 = re3_5;\n      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {\n        w = stem;\n      }\n    }\n\n    re = re_5_1;\n    re2 = re_mgr1;\n    if (re.test(w) && re2.test(w)) {\n      re = re_1b_2;\n      w = w.replace(re,\"\");\n    }\n\n    // and turn initial Y back to y\n\n    if (firstch == \"y\") {\n      w = firstch.toLowerCase() + w.substr(1);\n    }\n\n    return w;\n  };\n\n  return function (token) {\n    return token.update(porterStemmer);\n  }\n})();\n\nlunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')\n/*!\n * lunr.stopWordFilter\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * lunr.generateStopWordFilter builds a stopWordFilter function from the provided\n * list of stop words.\n *\n * The built in lunr.stopWordFilter is built using this generator and can be used\n * to generate custom stopWordFilters for applications or non English languages.\n *\n * @function\n * @param {Array} token The token to pass through the filter\n * @returns {lunr.PipelineFunction}\n * @see lunr.Pipeline\n * @see lunr.stopWordFilter\n */\nlunr.generateStopWordFilter = function (stopWords) {\n  var words = stopWords.reduce(function (memo, stopWord) {\n    memo[stopWord] = stopWord\n    return memo\n  }, {})\n\n  return function (token) {\n    if (token && words[token.toString()] !== token.toString()) return token\n  }\n}\n\n/**\n * lunr.stopWordFilter is an English language stop word list filter, any words\n * contained in the list will not be passed through the filter.\n *\n * This is intended to be used in the Pipeline. If the token does not pass the\n * filter then undefined will be returned.\n *\n * @function\n * @implements {lunr.PipelineFunction}\n * @params {lunr.Token} token - A token to check for being a stop word.\n * @returns {lunr.Token}\n * @see {@link lunr.Pipeline}\n */\nlunr.stopWordFilter = lunr.generateStopWordFilter([\n  'a',\n  'able',\n  'about',\n  'across',\n  'after',\n  'all',\n  'almost',\n  'also',\n  'am',\n  'among',\n  'an',\n  'and',\n  'any',\n  'are',\n  'as',\n  'at',\n  'be',\n  'because',\n  'been',\n  'but',\n  'by',\n  'can',\n  'cannot',\n  'could',\n  'dear',\n  'did',\n  'do',\n  'does',\n  'either',\n  'else',\n  'ever',\n  'every',\n  'for',\n  'from',\n  'get',\n  'got',\n  'had',\n  'has',\n  'have',\n  'he',\n  'her',\n  'hers',\n  'him',\n  'his',\n  'how',\n  'however',\n  'i',\n  'if',\n  'in',\n  'into',\n  'is',\n  'it',\n  'its',\n  'just',\n  'least',\n  'let',\n  'like',\n  'likely',\n  'may',\n  'me',\n  'might',\n  'most',\n  'must',\n  'my',\n  'neither',\n  'no',\n  'nor',\n  'not',\n  'of',\n  'off',\n  'often',\n  'on',\n  'only',\n  'or',\n  'other',\n  'our',\n  'own',\n  'rather',\n  'said',\n  'say',\n  'says',\n  'she',\n  'should',\n  'since',\n  'so',\n  'some',\n  'than',\n  'that',\n  'the',\n  'their',\n  'them',\n  'then',\n  'there',\n  'these',\n  'they',\n  'this',\n  'tis',\n  'to',\n  'too',\n  'twas',\n  'us',\n  'wants',\n  'was',\n  'we',\n  'were',\n  'what',\n  'when',\n  'where',\n  'which',\n  'while',\n  'who',\n  'whom',\n  'why',\n  'will',\n  'with',\n  'would',\n  'yet',\n  'you',\n  'your'\n])\n\nlunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')\n/*!\n * lunr.trimmer\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * lunr.trimmer is a pipeline function for trimming non word\n * characters from the beginning and end of tokens before they\n * enter the index.\n *\n * This implementation may not work correctly for non latin\n * characters and should either be removed or adapted for use\n * with languages with non-latin characters.\n *\n * @static\n * @implements {lunr.PipelineFunction}\n * @param {lunr.Token} token The token to pass through the filter\n * @returns {lunr.Token}\n * @see lunr.Pipeline\n */\nlunr.trimmer = function (token) {\n  return token.update(function (s) {\n    return s.replace(/^\\W+/, '').replace(/\\W+$/, '')\n  })\n}\n\nlunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')\n/*!\n * lunr.TokenSet\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * A token set is used to store the unique list of all tokens\n * within an index. Token sets are also used to represent an\n * incoming query to the index, this query token set and index\n * token set are then intersected to find which tokens to look\n * up in the inverted index.\n *\n * A token set can hold multiple tokens, as in the case of the\n * index token set, or it can hold a single token as in the\n * case of a simple query token set.\n *\n * Additionally token sets are used to perform wildcard matching.\n * Leading, contained and trailing wildcards are supported, and\n * from this edit distance matching can also be provided.\n *\n * Token sets are implemented as a minimal finite state automata,\n * where both common prefixes and suffixes are shared between tokens.\n * This helps to reduce the space used for storing the token set.\n *\n * @constructor\n */\nlunr.TokenSet = function () {\n  this.final = false\n  this.edges = {}\n  this.id = lunr.TokenSet._nextId\n  lunr.TokenSet._nextId += 1\n}\n\n/**\n * Keeps track of the next, auto increment, identifier to assign\n * to a new tokenSet.\n *\n * TokenSets require a unique identifier to be correctly minimised.\n *\n * @private\n */\nlunr.TokenSet._nextId = 1\n\n/**\n * Creates a TokenSet instance from the given sorted array of words.\n *\n * @param {String[]} arr - A sorted array of strings to create the set from.\n * @returns {lunr.TokenSet}\n * @throws Will throw an error if the input array is not sorted.\n */\nlunr.TokenSet.fromArray = function (arr) {\n  var builder = new lunr.TokenSet.Builder\n\n  for (var i = 0, len = arr.length; i < len; i++) {\n    builder.insert(arr[i])\n  }\n\n  builder.finish()\n  return builder.root\n}\n\n/**\n * Creates a token set from a query clause.\n *\n * @private\n * @param {Object} clause - A single clause from lunr.Query.\n * @param {string} clause.term - The query clause term.\n * @param {number} [clause.editDistance] - The optional edit distance for the term.\n * @returns {lunr.TokenSet}\n */\nlunr.TokenSet.fromClause = function (clause) {\n  if ('editDistance' in clause) {\n    return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance)\n  } else {\n    return lunr.TokenSet.fromString(clause.term)\n  }\n}\n\n/**\n * Creates a token set representing a single string with a specified\n * edit distance.\n *\n * Insertions, deletions, substitutions and transpositions are each\n * treated as an edit distance of 1.\n *\n * Increasing the allowed edit distance will have a dramatic impact\n * on the performance of both creating and intersecting these TokenSets.\n * It is advised to keep the edit distance less than 3.\n *\n * @param {string} str - The string to create the token set from.\n * @param {number} editDistance - The allowed edit distance to match.\n * @returns {lunr.Vector}\n */\nlunr.TokenSet.fromFuzzyString = function (str, editDistance) {\n  var root = new lunr.TokenSet\n\n  var stack = [{\n    node: root,\n    editsRemaining: editDistance,\n    str: str\n  }]\n\n  while (stack.length) {\n    var frame = stack.pop()\n\n    // no edit\n    if (frame.str.length > 0) {\n      var char = frame.str.charAt(0),\n          noEditNode\n\n      if (char in frame.node.edges) {\n        noEditNode = frame.node.edges[char]\n      } else {\n        noEditNode = new lunr.TokenSet\n        frame.node.edges[char] = noEditNode\n      }\n\n      if (frame.str.length == 1) {\n        noEditNode.final = true\n      }\n\n      stack.push({\n        node: noEditNode,\n        editsRemaining: frame.editsRemaining,\n        str: frame.str.slice(1)\n      })\n    }\n\n    if (frame.editsRemaining == 0) {\n      continue\n    }\n\n    // insertion\n    if (\"*\" in frame.node.edges) {\n      var insertionNode = frame.node.edges[\"*\"]\n    } else {\n      var insertionNode = new lunr.TokenSet\n      frame.node.edges[\"*\"] = insertionNode\n    }\n\n    if (frame.str.length == 0) {\n      insertionNode.final = true\n    }\n\n    stack.push({\n      node: insertionNode,\n      editsRemaining: frame.editsRemaining - 1,\n      str: frame.str\n    })\n\n    // deletion\n    // can only do a deletion if we have enough edits remaining\n    // and if there are characters left to delete in the string\n    if (frame.str.length > 1) {\n      stack.push({\n        node: frame.node,\n        editsRemaining: frame.editsRemaining - 1,\n        str: frame.str.slice(1)\n      })\n    }\n\n    // deletion\n    // just removing the last character from the str\n    if (frame.str.length == 1) {\n      frame.node.final = true\n    }\n\n    // substitution\n    // can only do a substitution if we have enough edits remaining\n    // and if there are characters left to substitute\n    if (frame.str.length >= 1) {\n      if (\"*\" in frame.node.edges) {\n        var substitutionNode = frame.node.edges[\"*\"]\n      } else {\n        var substitutionNode = new lunr.TokenSet\n        frame.node.edges[\"*\"] = substitutionNode\n      }\n\n      if (frame.str.length == 1) {\n        substitutionNode.final = true\n      }\n\n      stack.push({\n        node: substitutionNode,\n        editsRemaining: frame.editsRemaining - 1,\n        str: frame.str.slice(1)\n      })\n    }\n\n    // transposition\n    // can only do a transposition if there are edits remaining\n    // and there are enough characters to transpose\n    if (frame.str.length > 1) {\n      var charA = frame.str.charAt(0),\n          charB = frame.str.charAt(1),\n          transposeNode\n\n      if (charB in frame.node.edges) {\n        transposeNode = frame.node.edges[charB]\n      } else {\n        transposeNode = new lunr.TokenSet\n        frame.node.edges[charB] = transposeNode\n      }\n\n      if (frame.str.length == 1) {\n        transposeNode.final = true\n      }\n\n      stack.push({\n        node: transposeNode,\n        editsRemaining: frame.editsRemaining - 1,\n        str: charA + frame.str.slice(2)\n      })\n    }\n  }\n\n  return root\n}\n\n/**\n * Creates a TokenSet from a string.\n *\n * The string may contain one or more wildcard characters (*)\n * that will allow wildcard matching when intersecting with\n * another TokenSet.\n *\n * @param {string} str - The string to create a TokenSet from.\n * @returns {lunr.TokenSet}\n */\nlunr.TokenSet.fromString = function (str) {\n  var node = new lunr.TokenSet,\n      root = node\n\n  /*\n   * Iterates through all characters within the passed string\n   * appending a node for each character.\n   *\n   * When a wildcard character is found then a self\n   * referencing edge is introduced to continually match\n   * any number of any characters.\n   */\n  for (var i = 0, len = str.length; i < len; i++) {\n    var char = str[i],\n        final = (i == len - 1)\n\n    if (char == \"*\") {\n      node.edges[char] = node\n      node.final = final\n\n    } else {\n      var next = new lunr.TokenSet\n      next.final = final\n\n      node.edges[char] = next\n      node = next\n    }\n  }\n\n  return root\n}\n\n/**\n * Converts this TokenSet into an array of strings\n * contained within the TokenSet.\n *\n * This is not intended to be used on a TokenSet that\n * contains wildcards, in these cases the results are\n * undefined and are likely to cause an infinite loop.\n *\n * @returns {string[]}\n */\nlunr.TokenSet.prototype.toArray = function () {\n  var words = []\n\n  var stack = [{\n    prefix: \"\",\n    node: this\n  }]\n\n  while (stack.length) {\n    var frame = stack.pop(),\n        edges = Object.keys(frame.node.edges),\n        len = edges.length\n\n    if (frame.node.final) {\n      /* In Safari, at this point the prefix is sometimes corrupted, see:\n       * https://github.com/olivernn/lunr.js/issues/279 Calling any\n       * String.prototype method forces Safari to \"cast\" this string to what\n       * it's supposed to be, fixing the bug. */\n      frame.prefix.charAt(0)\n      words.push(frame.prefix)\n    }\n\n    for (var i = 0; i < len; i++) {\n      var edge = edges[i]\n\n      stack.push({\n        prefix: frame.prefix.concat(edge),\n        node: frame.node.edges[edge]\n      })\n    }\n  }\n\n  return words\n}\n\n/**\n * Generates a string representation of a TokenSet.\n *\n * This is intended to allow TokenSets to be used as keys\n * in objects, largely to aid the construction and minimisation\n * of a TokenSet. As such it is not designed to be a human\n * friendly representation of the TokenSet.\n *\n * @returns {string}\n */\nlunr.TokenSet.prototype.toString = function () {\n  // NOTE: Using Object.keys here as this.edges is very likely\n  // to enter 'hash-mode' with many keys being added\n  //\n  // avoiding a for-in loop here as it leads to the function\n  // being de-optimised (at least in V8). From some simple\n  // benchmarks the performance is comparable, but allowing\n  // V8 to optimize may mean easy performance wins in the future.\n\n  if (this._str) {\n    return this._str\n  }\n\n  var str = this.final ? '1' : '0',\n      labels = Object.keys(this.edges).sort(),\n      len = labels.length\n\n  for (var i = 0; i < len; i++) {\n    var label = labels[i],\n        node = this.edges[label]\n\n    str = str + label + node.id\n  }\n\n  return str\n}\n\n/**\n * Returns a new TokenSet that is the intersection of\n * this TokenSet and the passed TokenSet.\n *\n * This intersection will take into account any wildcards\n * contained within the TokenSet.\n *\n * @param {lunr.TokenSet} b - An other TokenSet to intersect with.\n * @returns {lunr.TokenSet}\n */\nlunr.TokenSet.prototype.intersect = function (b) {\n  var output = new lunr.TokenSet,\n      frame = undefined\n\n  var stack = [{\n    qNode: b,\n    output: output,\n    node: this\n  }]\n\n  while (stack.length) {\n    frame = stack.pop()\n\n    // NOTE: As with the #toString method, we are using\n    // Object.keys and a for loop instead of a for-in loop\n    // as both of these objects enter 'hash' mode, causing\n    // the function to be de-optimised in V8\n    var qEdges = Object.keys(frame.qNode.edges),\n        qLen = qEdges.length,\n        nEdges = Object.keys(frame.node.edges),\n        nLen = nEdges.length\n\n    for (var q = 0; q < qLen; q++) {\n      var qEdge = qEdges[q]\n\n      for (var n = 0; n < nLen; n++) {\n        var nEdge = nEdges[n]\n\n        if (nEdge == qEdge || qEdge == '*') {\n          var node = frame.node.edges[nEdge],\n              qNode = frame.qNode.edges[qEdge],\n              final = node.final && qNode.final,\n              next = undefined\n\n          if (nEdge in frame.output.edges) {\n            // an edge already exists for this character\n            // no need to create a new node, just set the finality\n            // bit unless this node is already final\n            next = frame.output.edges[nEdge]\n            next.final = next.final || final\n\n          } else {\n            // no edge exists yet, must create one\n            // set the finality bit and insert it\n            // into the output\n            next = new lunr.TokenSet\n            next.final = final\n            frame.output.edges[nEdge] = next\n          }\n\n          stack.push({\n            qNode: qNode,\n            output: next,\n            node: node\n          })\n        }\n      }\n    }\n  }\n\n  return output\n}\nlunr.TokenSet.Builder = function () {\n  this.previousWord = \"\"\n  this.root = new lunr.TokenSet\n  this.uncheckedNodes = []\n  this.minimizedNodes = {}\n}\n\nlunr.TokenSet.Builder.prototype.insert = function (word) {\n  var node,\n      commonPrefix = 0\n\n  if (word < this.previousWord) {\n    throw new Error (\"Out of order word insertion\")\n  }\n\n  for (var i = 0; i < word.length && i < this.previousWord.length; i++) {\n    if (word[i] != this.previousWord[i]) break\n    commonPrefix++\n  }\n\n  this.minimize(commonPrefix)\n\n  if (this.uncheckedNodes.length == 0) {\n    node = this.root\n  } else {\n    node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child\n  }\n\n  for (var i = commonPrefix; i < word.length; i++) {\n    var nextNode = new lunr.TokenSet,\n        char = word[i]\n\n    node.edges[char] = nextNode\n\n    this.uncheckedNodes.push({\n      parent: node,\n      char: char,\n      child: nextNode\n    })\n\n    node = nextNode\n  }\n\n  node.final = true\n  this.previousWord = word\n}\n\nlunr.TokenSet.Builder.prototype.finish = function () {\n  this.minimize(0)\n}\n\nlunr.TokenSet.Builder.prototype.minimize = function (downTo) {\n  for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {\n    var node = this.uncheckedNodes[i],\n        childKey = node.child.toString()\n\n    if (childKey in this.minimizedNodes) {\n      node.parent.edges[node.char] = this.minimizedNodes[childKey]\n    } else {\n      // Cache the key for this node since\n      // we know it can't change anymore\n      node.child._str = childKey\n\n      this.minimizedNodes[childKey] = node.child\n    }\n\n    this.uncheckedNodes.pop()\n  }\n}\n/*!\n * lunr.Index\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * An index contains the built index of all documents and provides a query interface\n * to the index.\n *\n * Usually instances of lunr.Index will not be created using this constructor, instead\n * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be\n * used to load previously built and serialized indexes.\n *\n * @constructor\n * @param {Object} attrs - The attributes of the built search index.\n * @param {Object} attrs.invertedIndex - An index of term/field to document reference.\n * @param {Object<string, lunr.Vector>} attrs.fieldVectors - Field vectors\n * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.\n * @param {string[]} attrs.fields - The names of indexed document fields.\n * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.\n */\nlunr.Index = function (attrs) {\n  this.invertedIndex = attrs.invertedIndex\n  this.fieldVectors = attrs.fieldVectors\n  this.tokenSet = attrs.tokenSet\n  this.fields = attrs.fields\n  this.pipeline = attrs.pipeline\n}\n\n/**\n * A result contains details of a document matching a search query.\n * @typedef {Object} lunr.Index~Result\n * @property {string} ref - The reference of the document this result represents.\n * @property {number} score - A number between 0 and 1 representing how similar this document is to the query.\n * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.\n */\n\n/**\n * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple\n * query language which itself is parsed into an instance of lunr.Query.\n *\n * For programmatically building queries it is advised to directly use lunr.Query, the query language\n * is best used for human entered text rather than program generated text.\n *\n * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported\n * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'\n * or 'world', though those that contain both will rank higher in the results.\n *\n * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can\n * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding\n * wildcards will increase the number of documents that will be found but can also have a negative\n * impact on query performance, especially with wildcards at the beginning of a term.\n *\n * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term\n * hello in the title field will match this query. Using a field not present in the index will lead\n * to an error being thrown.\n *\n * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term\n * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported\n * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.\n * Avoid large values for edit distance to improve query performance.\n *\n * Each term also supports a presence modifier. By default a term's presence in document is optional, however\n * this can be changed to either required or prohibited. For a term's presence to be required in a document the\n * term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and\n * optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not\n * appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'.\n *\n * To escape special characters the backslash character '\\' can be used, this allows searches to include\n * characters that would normally be considered modifiers, e.g. `foo\\~2` will search for a term \"foo~2\" instead\n * of attempting to apply a boost of 2 to the search term \"foo\".\n *\n * @typedef {string} lunr.Index~QueryString\n * @example <caption>Simple single term query</caption>\n * hello\n * @example <caption>Multiple term query</caption>\n * hello world\n * @example <caption>term scoped to a field</caption>\n * title:hello\n * @example <caption>term with a boost of 10</caption>\n * hello^10\n * @example <caption>term with an edit distance of 2</caption>\n * hello~2\n * @example <caption>terms with presence modifiers</caption>\n * -foo +bar baz\n */\n\n/**\n * Performs a search against the index using lunr query syntax.\n *\n * Results will be returned sorted by their score, the most relevant results\n * will be returned first.  For details on how the score is calculated, please see\n * the {@link https://lunrjs.com/guides/searching.html#scoring|guide}.\n *\n * For more programmatic querying use lunr.Index#query.\n *\n * @param {lunr.Index~QueryString} queryString - A string containing a lunr query.\n * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.\n * @returns {lunr.Index~Result[]}\n */\nlunr.Index.prototype.search = function (queryString) {\n  return this.query(function (query) {\n    var parser = new lunr.QueryParser(queryString, query)\n    parser.parse()\n  })\n}\n\n/**\n * A query builder callback provides a query object to be used to express\n * the query to perform on the index.\n *\n * @callback lunr.Index~queryBuilder\n * @param {lunr.Query} query - The query object to build up.\n * @this lunr.Query\n */\n\n/**\n * Performs a query against the index using the yielded lunr.Query object.\n *\n * If performing programmatic queries against the index, this method is preferred\n * over lunr.Index#search so as to avoid the additional query parsing overhead.\n *\n * A query object is yielded to the supplied function which should be used to\n * express the query to be run against the index.\n *\n * Note that although this function takes a callback parameter it is _not_ an\n * asynchronous operation, the callback is just yielded a query object to be\n * customized.\n *\n * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.\n * @returns {lunr.Index~Result[]}\n */\nlunr.Index.prototype.query = function (fn) {\n  // for each query clause\n  // * process terms\n  // * expand terms from token set\n  // * find matching documents and metadata\n  // * get document vectors\n  // * score documents\n\n  var query = new lunr.Query(this.fields),\n      matchingFields = Object.create(null),\n      queryVectors = Object.create(null),\n      termFieldCache = Object.create(null),\n      requiredMatches = Object.create(null),\n      prohibitedMatches = Object.create(null)\n\n  /*\n   * To support field level boosts a query vector is created per\n   * field. An empty vector is eagerly created to support negated\n   * queries.\n   */\n  for (var i = 0; i < this.fields.length; i++) {\n    queryVectors[this.fields[i]] = new lunr.Vector\n  }\n\n  fn.call(query, query)\n\n  for (var i = 0; i < query.clauses.length; i++) {\n    /*\n     * Unless the pipeline has been disabled for this term, which is\n     * the case for terms with wildcards, we need to pass the clause\n     * term through the search pipeline. A pipeline returns an array\n     * of processed terms. Pipeline functions may expand the passed\n     * term, which means we may end up performing multiple index lookups\n     * for a single query term.\n     */\n    var clause = query.clauses[i],\n        terms = null,\n        clauseMatches = lunr.Set.empty\n\n    if (clause.usePipeline) {\n      terms = this.pipeline.runString(clause.term, {\n        fields: clause.fields\n      })\n    } else {\n      terms = [clause.term]\n    }\n\n    for (var m = 0; m < terms.length; m++) {\n      var term = terms[m]\n\n      /*\n       * Each term returned from the pipeline needs to use the same query\n       * clause object, e.g. the same boost and or edit distance. The\n       * simplest way to do this is to re-use the clause object but mutate\n       * its term property.\n       */\n      clause.term = term\n\n      /*\n       * From the term in the clause we create a token set which will then\n       * be used to intersect the indexes token set to get a list of terms\n       * to lookup in the inverted index\n       */\n      var termTokenSet = lunr.TokenSet.fromClause(clause),\n          expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()\n\n      /*\n       * If a term marked as required does not exist in the tokenSet it is\n       * impossible for the search to return any matches. We set all the field\n       * scoped required matches set to empty and stop examining any further\n       * clauses.\n       */\n      if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {\n        for (var k = 0; k < clause.fields.length; k++) {\n          var field = clause.fields[k]\n          requiredMatches[field] = lunr.Set.empty\n        }\n\n        break\n      }\n\n      for (var j = 0; j < expandedTerms.length; j++) {\n        /*\n         * For each term get the posting and termIndex, this is required for\n         * building the query vector.\n         */\n        var expandedTerm = expandedTerms[j],\n            posting = this.invertedIndex[expandedTerm],\n            termIndex = posting._index\n\n        for (var k = 0; k < clause.fields.length; k++) {\n          /*\n           * For each field that this query term is scoped by (by default\n           * all fields are in scope) we need to get all the document refs\n           * that have this term in that field.\n           *\n           * The posting is the entry in the invertedIndex for the matching\n           * term from above.\n           */\n          var field = clause.fields[k],\n              fieldPosting = posting[field],\n              matchingDocumentRefs = Object.keys(fieldPosting),\n              termField = expandedTerm + \"/\" + field,\n              matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)\n\n          /*\n           * if the presence of this term is required ensure that the matching\n           * documents are added to the set of required matches for this clause.\n           *\n           */\n          if (clause.presence == lunr.Query.presence.REQUIRED) {\n            clauseMatches = clauseMatches.union(matchingDocumentsSet)\n\n            if (requiredMatches[field] === undefined) {\n              requiredMatches[field] = lunr.Set.complete\n            }\n          }\n\n          /*\n           * if the presence of this term is prohibited ensure that the matching\n           * documents are added to the set of prohibited matches for this field,\n           * creating that set if it does not yet exist.\n           */\n          if (clause.presence == lunr.Query.presence.PROHIBITED) {\n            if (prohibitedMatches[field] === undefined) {\n              prohibitedMatches[field] = lunr.Set.empty\n            }\n\n            prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet)\n\n            /*\n             * Prohibited matches should not be part of the query vector used for\n             * similarity scoring and no metadata should be extracted so we continue\n             * to the next field\n             */\n            continue\n          }\n\n          /*\n           * The query field vector is populated using the termIndex found for\n           * the term and a unit value with the appropriate boost applied.\n           * Using upsert because there could already be an entry in the vector\n           * for the term we are working with. In that case we just add the scores\n           * together.\n           */\n          queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b })\n\n          /**\n           * If we've already seen this term, field combo then we've already collected\n           * the matching documents and metadata, no need to go through all that again\n           */\n          if (termFieldCache[termField]) {\n            continue\n          }\n\n          for (var l = 0; l < matchingDocumentRefs.length; l++) {\n            /*\n             * All metadata for this term/field/document triple\n             * are then extracted and collected into an instance\n             * of lunr.MatchData ready to be returned in the query\n             * results\n             */\n            var matchingDocumentRef = matchingDocumentRefs[l],\n                matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),\n                metadata = fieldPosting[matchingDocumentRef],\n                fieldMatch\n\n            if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {\n              matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)\n            } else {\n              fieldMatch.add(expandedTerm, field, metadata)\n            }\n\n          }\n\n          termFieldCache[termField] = true\n        }\n      }\n    }\n\n    /**\n     * If the presence was required we need to update the requiredMatches field sets.\n     * We do this after all fields for the term have collected their matches because\n     * the clause terms presence is required in _any_ of the fields not _all_ of the\n     * fields.\n     */\n    if (clause.presence === lunr.Query.presence.REQUIRED) {\n      for (var k = 0; k < clause.fields.length; k++) {\n        var field = clause.fields[k]\n        requiredMatches[field] = requiredMatches[field].intersect(clauseMatches)\n      }\n    }\n  }\n\n  /**\n   * Need to combine the field scoped required and prohibited\n   * matching documents into a global set of required and prohibited\n   * matches\n   */\n  var allRequiredMatches = lunr.Set.complete,\n      allProhibitedMatches = lunr.Set.empty\n\n  for (var i = 0; i < this.fields.length; i++) {\n    var field = this.fields[i]\n\n    if (requiredMatches[field]) {\n      allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field])\n    }\n\n    if (prohibitedMatches[field]) {\n      allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field])\n    }\n  }\n\n  var matchingFieldRefs = Object.keys(matchingFields),\n      results = [],\n      matches = Object.create(null)\n\n  /*\n   * If the query is negated (contains only prohibited terms)\n   * we need to get _all_ fieldRefs currently existing in the\n   * index. This is only done when we know that the query is\n   * entirely prohibited terms to avoid any cost of getting all\n   * fieldRefs unnecessarily.\n   *\n   * Additionally, blank MatchData must be created to correctly\n   * populate the results.\n   */\n  if (query.isNegated()) {\n    matchingFieldRefs = Object.keys(this.fieldVectors)\n\n    for (var i = 0; i < matchingFieldRefs.length; i++) {\n      var matchingFieldRef = matchingFieldRefs[i]\n      var fieldRef = lunr.FieldRef.fromString(matchingFieldRef)\n      matchingFields[matchingFieldRef] = new lunr.MatchData\n    }\n  }\n\n  for (var i = 0; i < matchingFieldRefs.length; i++) {\n    /*\n     * Currently we have document fields that match the query, but we\n     * need to return documents. The matchData and scores are combined\n     * from multiple fields belonging to the same document.\n     *\n     * Scores are calculated by field, using the query vectors created\n     * above, and combined into a final document score using addition.\n     */\n    var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),\n        docRef = fieldRef.docRef\n\n    if (!allRequiredMatches.contains(docRef)) {\n      continue\n    }\n\n    if (allProhibitedMatches.contains(docRef)) {\n      continue\n    }\n\n    var fieldVector = this.fieldVectors[fieldRef],\n        score = queryVectors[fieldRef.fieldName].similarity(fieldVector),\n        docMatch\n\n    if ((docMatch = matches[docRef]) !== undefined) {\n      docMatch.score += score\n      docMatch.matchData.combine(matchingFields[fieldRef])\n    } else {\n      var match = {\n        ref: docRef,\n        score: score,\n        matchData: matchingFields[fieldRef]\n      }\n      matches[docRef] = match\n      results.push(match)\n    }\n  }\n\n  /*\n   * Sort the results objects by score, highest first.\n   */\n  return results.sort(function (a, b) {\n    return b.score - a.score\n  })\n}\n\n/**\n * Prepares the index for JSON serialization.\n *\n * The schema for this JSON blob will be described in a\n * separate JSON schema file.\n *\n * @returns {Object}\n */\nlunr.Index.prototype.toJSON = function () {\n  var invertedIndex = Object.keys(this.invertedIndex)\n    .sort()\n    .map(function (term) {\n      return [term, this.invertedIndex[term]]\n    }, this)\n\n  var fieldVectors = Object.keys(this.fieldVectors)\n    .map(function (ref) {\n      return [ref, this.fieldVectors[ref].toJSON()]\n    }, this)\n\n  return {\n    version: lunr.version,\n    fields: this.fields,\n    fieldVectors: fieldVectors,\n    invertedIndex: invertedIndex,\n    pipeline: this.pipeline.toJSON()\n  }\n}\n\n/**\n * Loads a previously serialized lunr.Index\n *\n * @param {Object} serializedIndex - A previously serialized lunr.Index\n * @returns {lunr.Index}\n */\nlunr.Index.load = function (serializedIndex) {\n  var attrs = {},\n      fieldVectors = {},\n      serializedVectors = serializedIndex.fieldVectors,\n      invertedIndex = Object.create(null),\n      serializedInvertedIndex = serializedIndex.invertedIndex,\n      tokenSetBuilder = new lunr.TokenSet.Builder,\n      pipeline = lunr.Pipeline.load(serializedIndex.pipeline)\n\n  if (serializedIndex.version != lunr.version) {\n    lunr.utils.warn(\"Version mismatch when loading serialised index. Current version of lunr '\" + lunr.version + \"' does not match serialized index '\" + serializedIndex.version + \"'\")\n  }\n\n  for (var i = 0; i < serializedVectors.length; i++) {\n    var tuple = serializedVectors[i],\n        ref = tuple[0],\n        elements = tuple[1]\n\n    fieldVectors[ref] = new lunr.Vector(elements)\n  }\n\n  for (var i = 0; i < serializedInvertedIndex.length; i++) {\n    var tuple = serializedInvertedIndex[i],\n        term = tuple[0],\n        posting = tuple[1]\n\n    tokenSetBuilder.insert(term)\n    invertedIndex[term] = posting\n  }\n\n  tokenSetBuilder.finish()\n\n  attrs.fields = serializedIndex.fields\n\n  attrs.fieldVectors = fieldVectors\n  attrs.invertedIndex = invertedIndex\n  attrs.tokenSet = tokenSetBuilder.root\n  attrs.pipeline = pipeline\n\n  return new lunr.Index(attrs)\n}\n/*!\n * lunr.Builder\n * Copyright (C) 2020 Oliver Nightingale\n */\n\n/**\n * lunr.Builder performs indexing on a set of documents and\n * returns instances of lunr.Index ready for querying.\n *\n * All configuration of the index is done via the builder, the\n * fields to index, the document reference, the text processing\n * pipeline and document scoring parameters are all set on the\n * builder before indexing.\n *\n * @constructor\n * @property {string} _ref - Internal reference to the document reference field.\n * @property {string[]} _fields - Internal reference to the document fields to index.\n * @property {object} invertedIndex - The inverted index maps terms to document fields.\n * @property {object} documentTermFrequencies - Keeps track of document term frequencies.\n * @property {object} documentLengths - Keeps track of the length of documents added to the index.\n * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing.\n * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing.\n * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index.\n * @property {number} documentCount - Keeps track of the total number of documents indexed.\n * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.\n * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.\n * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space.\n * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index.\n */\nlunr.Builder = function () {\n  this._ref = \"id\"\n  this._fields = Object.create(null)\n  this._documents = Object.create(null)\n  this.invertedIndex = Object.create(null)\n  this.fieldTermFrequencies = {}\n  this.fieldLengths = {}\n  this.tokenizer = lunr.tokenizer\n  this.pipeline = new lunr.Pipeline\n  this.searchPipeline = new lunr.Pipeline\n  this.documentCount = 0\n  this._b = 0.75\n  this._k1 = 1.2\n  this.termIndex = 0\n  this.metadataWhitelist = []\n}\n\n/**\n * Sets the document field used as the document reference. Every document must have this field.\n * The type of this field in the document should be a string, if it is not a string it will be\n * coerced into a string by calling toString.\n *\n * The default ref is 'id'.\n *\n * The ref should _not_ be changed during indexing, it should be set before any documents are\n * added to the index. Changing it during indexing can lead to inconsistent results.\n *\n * @param {string} ref - The name of the reference field in the document.\n */\nlunr.Builder.prototype.ref = function (ref) {\n  this._ref = ref\n}\n\n/**\n * A function that is used to extract a field from a document.\n *\n * Lunr expects a field to be at the top level of a document, if however the field\n * is deeply nested within a document an extractor function can be used to extract\n * the right field for indexing.\n *\n * @callback fieldExtractor\n * @param {object} doc - The document being added to the index.\n * @returns {?(string|object|object[])} obj - The object that will be indexed for this field.\n * @example <caption>Extracting a nested field</caption>\n * function (doc) { return doc.nested.field }\n */\n\n/**\n * Adds a field to the list of document fields that will be indexed. Every document being\n * indexed should have this field. Null values for this field in indexed documents will\n * not cause errors but will limit the chance of that document being retrieved by searches.\n *\n * All fields should be added before adding documents to the index. Adding fields after\n * a document has been indexed will have no effect on already indexed documents.\n *\n * Fields can be boosted at build time. This allows terms within that field to have more\n * importance when ranking search results. Use a field boost to specify that matches within\n * one field are more important than other fields.\n *\n * @param {string} fieldName - The name of a field to index in all documents.\n * @param {object} attributes - Optional attributes associated with this field.\n * @param {number} [attributes.boost=1] - Boost applied to all terms within this field.\n * @param {fieldExtractor} [attributes.extractor] - Function to extract a field from a document.\n * @throws {RangeError} fieldName cannot contain unsupported characters '/'\n */\nlunr.Builder.prototype.field = function (fieldName, attributes) {\n  if (/\\//.test(fieldName)) {\n    throw new RangeError (\"Field '\" + fieldName + \"' contains illegal character '/'\")\n  }\n\n  this._fields[fieldName] = attributes || {}\n}\n\n/**\n * A parameter to tune the amount of field length normalisation that is applied when\n * calculating relevance scores. A value of 0 will completely disable any normalisation\n * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b\n * will be clamped to the range 0 - 1.\n *\n * @param {number} number - The value to set for this tuning parameter.\n */\nlunr.Builder.prototype.b = function (number) {\n  if (number < 0) {\n    this._b = 0\n  } else if (number > 1) {\n    this._b = 1\n  } else {\n    this._b = number\n  }\n}\n\n/**\n * A parameter that controls the speed at which a rise in term frequency results in term\n * frequency saturation. The default value is 1.2. Setting this to a higher value will give\n * slower saturation levels, a lower value will result in quicker saturation.\n *\n * @param {number} number - The value to set for this tuning parameter.\n */\nlunr.Builder.prototype.k1 = function (number) {\n  this._k1 = number\n}\n\n/**\n * Adds a document to the index.\n *\n * Before adding fields to the index the index should have been fully setup, with the document\n * ref and all fields to index already having been specified.\n *\n * The document must have a field name as specified by the ref (by default this is 'id') and\n * it should have all fields defined for indexing, though null or undefined values will not\n * cause errors.\n *\n * Entire documents can be boosted at build time. Applying a boost to a document indicates that\n * this document should rank higher in search results than other documents.\n *\n * @param {object} doc - The document to add to the index.\n * @param {object} attributes - Optional attributes associated with this document.\n * @param {number} [attributes.boost=1] - Boost applied to all terms within this document.\n */\nlunr.Builder.prototype.add = function (doc, attributes) {\n  var docRef = doc[this._ref],\n      fields = Object.keys(this._fields)\n\n  this._documents[docRef] = attributes || {}\n  this.documentCount += 1\n\n  for (var i = 0; i < fields.length; i++) {\n    var fieldName = fields[i],\n        extractor = this._fields[fieldName].extractor,\n        field = extractor ? extractor(doc) : doc[fieldName],\n        tokens = this.tokenizer(field, {\n          fields: [fieldName]\n        }),\n        terms = this.pipeline.run(tokens),\n        fieldRef = new lunr.FieldRef (docRef, fieldName),\n        fieldTerms = Object.create(null)\n\n    this.fieldTermFrequencies[fieldRef] = fieldTerms\n    this.fieldLengths[fieldRef] = 0\n\n    // store the length of this field for this document\n    this.fieldLengths[fieldRef] += terms.length\n\n    // calculate term frequencies for this field\n    for (var j = 0; j < terms.length; j++) {\n      var term = terms[j]\n\n      if (fieldTerms[term] == undefined) {\n        fieldTerms[term] = 0\n      }\n\n      fieldTerms[term] += 1\n\n      // add to inverted index\n      // create an initial posting if one doesn't exist\n      if (this.invertedIndex[term] == undefined) {\n        var posting = Object.create(null)\n        posting[\"_index\"] = this.termIndex\n        this.termIndex += 1\n\n        for (var k = 0; k < fields.length; k++) {\n          posting[fields[k]] = Object.create(null)\n        }\n\n        this.invertedIndex[term] = posting\n      }\n\n      // add an entry for this term/fieldName/docRef to the invertedIndex\n      if (this.invertedIndex[term][fieldName][docRef] == undefined) {\n        this.invertedIndex[term][fieldName][docRef] = Object.create(null)\n      }\n\n      // store all whitelisted metadata about this token in the\n      // inverted index\n      for (var l = 0; l < this.metadataWhitelist.length; l++) {\n        var metadataKey = this.metadataWhitelist[l],\n            metadata = term.metadata[metadataKey]\n\n        if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) {\n          this.invertedIndex[term][fieldName][docRef][metadataKey] = []\n        }\n\n        this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata)\n      }\n    }\n\n  }\n}\n\n/**\n * Calculates the average document length for this index\n *\n * @private\n */\nlunr.Builder.prototype.calculateAverageFieldLengths = function () {\n\n  var fieldRefs = Object.keys(this.fieldLengths),\n      numberOfFields = fieldRefs.length,\n      accumulator = {},\n      documentsWithField = {}\n\n  for (var i = 0; i < numberOfFields; i++) {\n    var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),\n        field = fieldRef.fieldName\n\n    documentsWithField[field] || (documentsWithField[field] = 0)\n    documentsWithField[field] += 1\n\n    accumulator[field] || (accumulator[field] = 0)\n    accumulator[field] += this.fieldLengths[fieldRef]\n  }\n\n  var fields = Object.keys(this._fields)\n\n  for (var i = 0; i < fields.length; i++) {\n    var fieldName = fields[i]\n    accumulator[fieldName] = accumulator[fieldName] / documentsWithField[fieldName]\n  }\n\n  this.averageFieldLength = accumulator\n}\n\n/**\n * Builds a vector space model of every document using lunr.Vector\n *\n * @private\n */\nlunr.Builder.prototype.createFieldVectors = function () {\n  var fieldVectors = {},\n      fieldRefs = Object.keys(this.fieldTermFrequencies),\n      fieldRefsLength = fieldRefs.length,\n      termIdfCache = Object.create(null)\n\n  for (var i = 0; i < fieldRefsLength; i++) {\n    var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),\n        fieldName = fieldRef.fieldName,\n        fieldLength = this.fieldLengths[fieldRef],\n        fieldVector = new lunr.Vector,\n        termFrequencies = this.fieldTermFrequencies[fieldRef],\n        terms = Object.keys(termFrequencies),\n        termsLength = terms.length\n\n\n    var fieldBoost = this._fields[fieldName].boost || 1,\n        docBoost = this._documents[fieldRef.docRef].boost || 1\n\n    for (var j = 0; j < termsLength; j++) {\n      var term = terms[j],\n          tf = termFrequencies[term],\n          termIndex = this.invertedIndex[term]._index,\n          idf, score, scoreWithPrecision\n\n      if (termIdfCache[term] === undefined) {\n        idf = lunr.idf(this.invertedIndex[term], this.documentCount)\n        termIdfCache[term] = idf\n      } else {\n        idf = termIdfCache[term]\n      }\n\n      score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[fieldName])) + tf)\n      score *= fieldBoost\n      score *= docBoost\n      scoreWithPrecision = Math.round(score * 1000) / 1000\n      // Converts 1.23456789 to 1.234.\n      // Reducing the precision so that the vectors take up less\n      // space when serialised. Doing it now so that they behave\n      // the same before and after serialisation. Also, this is\n      // the fastest approach to reducing a number's precision in\n      // JavaScript.\n\n      fieldVector.insert(termIndex, scoreWithPrecision)\n    }\n\n    fieldVectors[fieldRef] = fieldVector\n  }\n\n  this.fieldVectors = fieldVectors\n}\n\n/**\n * Creates a token set of all tokens in the index using lunr.TokenSet\n *\n * @private\n */\nlunr.Builder.prototype.createTokenSet = function () {\n  this.tokenSet = lunr.TokenSet.fromArray(\n    Object.keys(this.invertedIndex).sort()\n  )\n}\n\n/**\n * Builds the index, creating an instance of lunr.Index.\n *\n * This completes the indexing process and should only be called\n * once all documents have been added to the index.\n *\n * @returns {lunr.Index}\n */\nlunr.Builder.prototype.build = function () {\n  this.calculateAverageFieldLengths()\n  this.createFieldVectors()\n  this.createTokenSet()\n\n  return new lunr.Index({\n    invertedIndex: this.invertedIndex,\n    fieldVectors: this.fieldVectors,\n    tokenSet: this.tokenSet,\n    fields: Object.keys(this._fields),\n    pipeline: this.searchPipeline\n  })\n}\n\n/**\n * Applies a plugin to the index builder.\n *\n * A plugin is a function that is called with the index builder as its context.\n * Plugins can be used to customise or extend the behaviour of the index\n * in some way. A plugin is just a function, that encapsulated the custom\n * behaviour that should be applied when building the index.\n *\n * The plugin function will be called with the index builder as its argument, additional\n * arguments can also be passed when calling use. The function will be called\n * with the index builder as its context.\n *\n * @param {Function} plugin The plugin to apply.\n */\nlunr.Builder.prototype.use = function (fn) {\n  var args = Array.prototype.slice.call(arguments, 1)\n  args.unshift(this)\n  fn.apply(this, args)\n}\n/**\n * Contains and collects metadata about a matching document.\n * A single instance of lunr.MatchData is returned as part of every\n * lunr.Index~Result.\n *\n * @constructor\n * @param {string} term - The term this match data is associated with\n * @param {string} field - The field in which the term was found\n * @param {object} metadata - The metadata recorded about this term in this field\n * @property {object} metadata - A cloned collection of metadata associated with this document.\n * @see {@link lunr.Index~Result}\n */\nlunr.MatchData = function (term, field, metadata) {\n  var clonedMetadata = Object.create(null),\n      metadataKeys = Object.keys(metadata || {})\n\n  // Cloning the metadata to prevent the original\n  // being mutated during match data combination.\n  // Metadata is kept in an array within the inverted\n  // index so cloning the data can be done with\n  // Array#slice\n  for (var i = 0; i < metadataKeys.length; i++) {\n    var key = metadataKeys[i]\n    clonedMetadata[key] = metadata[key].slice()\n  }\n\n  this.metadata = Object.create(null)\n\n  if (term !== undefined) {\n    this.metadata[term] = Object.create(null)\n    this.metadata[term][field] = clonedMetadata\n  }\n}\n\n/**\n * An instance of lunr.MatchData will be created for every term that matches a\n * document. However only one instance is required in a lunr.Index~Result. This\n * method combines metadata from another instance of lunr.MatchData with this\n * objects metadata.\n *\n * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one.\n * @see {@link lunr.Index~Result}\n */\nlunr.MatchData.prototype.combine = function (otherMatchData) {\n  var terms = Object.keys(otherMatchData.metadata)\n\n  for (var i = 0; i < terms.length; i++) {\n    var term = terms[i],\n        fields = Object.keys(otherMatchData.metadata[term])\n\n    if (this.metadata[term] == undefined) {\n      this.metadata[term] = Object.create(null)\n    }\n\n    for (var j = 0; j < fields.length; j++) {\n      var field = fields[j],\n          keys = Object.keys(otherMatchData.metadata[term][field])\n\n      if (this.metadata[term][field] == undefined) {\n        this.metadata[term][field] = Object.create(null)\n      }\n\n      for (var k = 0; k < keys.length; k++) {\n        var key = keys[k]\n\n        if (this.metadata[term][field][key] == undefined) {\n          this.metadata[term][field][key] = otherMatchData.metadata[term][field][key]\n        } else {\n          this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key])\n        }\n\n      }\n    }\n  }\n}\n\n/**\n * Add metadata for a term/field pair to this instance of match data.\n *\n * @param {string} term - The term this match data is associated with\n * @param {string} field - The field in which the term was found\n * @param {object} metadata - The metadata recorded about this term in this field\n */\nlunr.MatchData.prototype.add = function (term, field, metadata) {\n  if (!(term in this.metadata)) {\n    this.metadata[term] = Object.create(null)\n    this.metadata[term][field] = metadata\n    return\n  }\n\n  if (!(field in this.metadata[term])) {\n    this.metadata[term][field] = metadata\n    return\n  }\n\n  var metadataKeys = Object.keys(metadata)\n\n  for (var i = 0; i < metadataKeys.length; i++) {\n    var key = metadataKeys[i]\n\n    if (key in this.metadata[term][field]) {\n      this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key])\n    } else {\n      this.metadata[term][field][key] = metadata[key]\n    }\n  }\n}\n/**\n * A lunr.Query provides a programmatic way of defining queries to be performed\n * against a {@link lunr.Index}.\n *\n * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method\n * so the query object is pre-initialized with the right index fields.\n *\n * @constructor\n * @property {lunr.Query~Clause[]} clauses - An array of query clauses.\n * @property {string[]} allFields - An array of all available fields in a lunr.Index.\n */\nlunr.Query = function (allFields) {\n  this.clauses = []\n  this.allFields = allFields\n}\n\n/**\n * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause.\n *\n * This allows wildcards to be added to the beginning and end of a term without having to manually do any string\n * concatenation.\n *\n * The wildcard constants can be bitwise combined to select both leading and trailing wildcards.\n *\n * @constant\n * @default\n * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour\n * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists\n * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists\n * @see lunr.Query~Clause\n * @see lunr.Query#clause\n * @see lunr.Query#term\n * @example <caption>query term with trailing wildcard</caption>\n * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING })\n * @example <caption>query term with leading and trailing wildcard</caption>\n * query.term('foo', {\n *   wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING\n * })\n */\n\nlunr.Query.wildcard = new String (\"*\")\nlunr.Query.wildcard.NONE = 0\nlunr.Query.wildcard.LEADING = 1\nlunr.Query.wildcard.TRAILING = 2\n\n/**\n * Constants for indicating what kind of presence a term must have in matching documents.\n *\n * @constant\n * @enum {number}\n * @see lunr.Query~Clause\n * @see lunr.Query#clause\n * @see lunr.Query#term\n * @example <caption>query term with required presence</caption>\n * query.term('foo', { presence: lunr.Query.presence.REQUIRED })\n */\nlunr.Query.presence = {\n  /**\n   * Term's presence in a document is optional, this is the default value.\n   */\n  OPTIONAL: 1,\n\n  /**\n   * Term's presence in a document is required, documents that do not contain\n   * this term will not be returned.\n   */\n  REQUIRED: 2,\n\n  /**\n   * Term's presence in a document is prohibited, documents that do contain\n   * this term will not be returned.\n   */\n  PROHIBITED: 3\n}\n\n/**\n * A single clause in a {@link lunr.Query} contains a term and details on how to\n * match that term against a {@link lunr.Index}.\n *\n * @typedef {Object} lunr.Query~Clause\n * @property {string[]} fields - The fields in an index this clause should be matched against.\n * @property {number} [boost=1] - Any boost that should be applied when matching this clause.\n * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.\n * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.\n * @property {number} [wildcard=lunr.Query.wildcard.NONE] - Whether the term should have wildcards appended or prepended.\n * @property {number} [presence=lunr.Query.presence.OPTIONAL] - The terms presence in any matching documents.\n */\n\n/**\n * Adds a {@link lunr.Query~Clause} to this query.\n *\n * Unless the clause contains the fields to be matched all fields will be matched. In addition\n * a default boost of 1 is applied to the clause.\n *\n * @param {lunr.Query~Clause} clause - The clause to add to this query.\n * @see lunr.Query~Clause\n * @returns {lunr.Query}\n */\nlunr.Query.prototype.clause = function (clause) {\n  if (!('fields' in clause)) {\n    clause.fields = this.allFields\n  }\n\n  if (!('boost' in clause)) {\n    clause.boost = 1\n  }\n\n  if (!('usePipeline' in clause)) {\n    clause.usePipeline = true\n  }\n\n  if (!('wildcard' in clause)) {\n    clause.wildcard = lunr.Query.wildcard.NONE\n  }\n\n  if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) {\n    clause.term = \"*\" + clause.term\n  }\n\n  if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) {\n    clause.term = \"\" + clause.term + \"*\"\n  }\n\n  if (!('presence' in clause)) {\n    clause.presence = lunr.Query.presence.OPTIONAL\n  }\n\n  this.clauses.push(clause)\n\n  return this\n}\n\n/**\n * A negated query is one in which every clause has a presence of\n * prohibited. These queries require some special processing to return\n * the expected results.\n *\n * @returns boolean\n */\nlunr.Query.prototype.isNegated = function () {\n  for (var i = 0; i < this.clauses.length; i++) {\n    if (this.clauses[i].presence != lunr.Query.presence.PROHIBITED) {\n      return false\n    }\n  }\n\n  return true\n}\n\n/**\n * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}\n * to the list of clauses that make up this query.\n *\n * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion\n * to a token or token-like string should be done before calling this method.\n *\n * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an\n * array, each term in the array will share the same options.\n *\n * @param {object|object[]} term - The term(s) to add to the query.\n * @param {object} [options] - Any additional properties to add to the query clause.\n * @returns {lunr.Query}\n * @see lunr.Query#clause\n * @see lunr.Query~Clause\n * @example <caption>adding a single term to a query</caption>\n * query.term(\"foo\")\n * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>\n * query.term(\"foo\", {\n *   fields: [\"title\"],\n *   boost: 10,\n *   wildcard: lunr.Query.wildcard.TRAILING\n * })\n * @example <caption>using lunr.tokenizer to convert a string to tokens before using them as terms</caption>\n * query.term(lunr.tokenizer(\"foo bar\"))\n */\nlunr.Query.prototype.term = function (term, options) {\n  if (Array.isArray(term)) {\n    term.forEach(function (t) { this.term(t, lunr.utils.clone(options)) }, this)\n    return this\n  }\n\n  var clause = options || {}\n  clause.term = term.toString()\n\n  this.clause(clause)\n\n  return this\n}\nlunr.QueryParseError = function (message, start, end) {\n  this.name = \"QueryParseError\"\n  this.message = message\n  this.start = start\n  this.end = end\n}\n\nlunr.QueryParseError.prototype = new Error\nlunr.QueryLexer = function (str) {\n  this.lexemes = []\n  this.str = str\n  this.length = str.length\n  this.pos = 0\n  this.start = 0\n  this.escapeCharPositions = []\n}\n\nlunr.QueryLexer.prototype.run = function () {\n  var state = lunr.QueryLexer.lexText\n\n  while (state) {\n    state = state(this)\n  }\n}\n\nlunr.QueryLexer.prototype.sliceString = function () {\n  var subSlices = [],\n      sliceStart = this.start,\n      sliceEnd = this.pos\n\n  for (var i = 0; i < this.escapeCharPositions.length; i++) {\n    sliceEnd = this.escapeCharPositions[i]\n    subSlices.push(this.str.slice(sliceStart, sliceEnd))\n    sliceStart = sliceEnd + 1\n  }\n\n  subSlices.push(this.str.slice(sliceStart, this.pos))\n  this.escapeCharPositions.length = 0\n\n  return subSlices.join('')\n}\n\nlunr.QueryLexer.prototype.emit = function (type) {\n  this.lexemes.push({\n    type: type,\n    str: this.sliceString(),\n    start: this.start,\n    end: this.pos\n  })\n\n  this.start = this.pos\n}\n\nlunr.QueryLexer.prototype.escapeCharacter = function () {\n  this.escapeCharPositions.push(this.pos - 1)\n  this.pos += 1\n}\n\nlunr.QueryLexer.prototype.next = function () {\n  if (this.pos >= this.length) {\n    return lunr.QueryLexer.EOS\n  }\n\n  var char = this.str.charAt(this.pos)\n  this.pos += 1\n  return char\n}\n\nlunr.QueryLexer.prototype.width = function () {\n  return this.pos - this.start\n}\n\nlunr.QueryLexer.prototype.ignore = function () {\n  if (this.start == this.pos) {\n    this.pos += 1\n  }\n\n  this.start = this.pos\n}\n\nlunr.QueryLexer.prototype.backup = function () {\n  this.pos -= 1\n}\n\nlunr.QueryLexer.prototype.acceptDigitRun = function () {\n  var char, charCode\n\n  do {\n    char = this.next()\n    charCode = char.charCodeAt(0)\n  } while (charCode > 47 && charCode < 58)\n\n  if (char != lunr.QueryLexer.EOS) {\n    this.backup()\n  }\n}\n\nlunr.QueryLexer.prototype.more = function () {\n  return this.pos < this.length\n}\n\nlunr.QueryLexer.EOS = 'EOS'\nlunr.QueryLexer.FIELD = 'FIELD'\nlunr.QueryLexer.TERM = 'TERM'\nlunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'\nlunr.QueryLexer.BOOST = 'BOOST'\nlunr.QueryLexer.PRESENCE = 'PRESENCE'\n\nlunr.QueryLexer.lexField = function (lexer) {\n  lexer.backup()\n  lexer.emit(lunr.QueryLexer.FIELD)\n  lexer.ignore()\n  return lunr.QueryLexer.lexText\n}\n\nlunr.QueryLexer.lexTerm = function (lexer) {\n  if (lexer.width() > 1) {\n    lexer.backup()\n    lexer.emit(lunr.QueryLexer.TERM)\n  }\n\n  lexer.ignore()\n\n  if (lexer.more()) {\n    return lunr.QueryLexer.lexText\n  }\n}\n\nlunr.QueryLexer.lexEditDistance = function (lexer) {\n  lexer.ignore()\n  lexer.acceptDigitRun()\n  lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)\n  return lunr.QueryLexer.lexText\n}\n\nlunr.QueryLexer.lexBoost = function (lexer) {\n  lexer.ignore()\n  lexer.acceptDigitRun()\n  lexer.emit(lunr.QueryLexer.BOOST)\n  return lunr.QueryLexer.lexText\n}\n\nlunr.QueryLexer.lexEOS = function (lexer) {\n  if (lexer.width() > 0) {\n    lexer.emit(lunr.QueryLexer.TERM)\n  }\n}\n\n// This matches the separator used when tokenising fields\n// within a document. These should match otherwise it is\n// not possible to search for some tokens within a document.\n//\n// It is possible for the user to change the separator on the\n// tokenizer so it _might_ clash with any other of the special\n// characters already used within the search string, e.g. :.\n//\n// This means that it is possible to change the separator in\n// such a way that makes some words unsearchable using a search\n// string.\nlunr.QueryLexer.termSeparator = lunr.tokenizer.separator\n\nlunr.QueryLexer.lexText = function (lexer) {\n  while (true) {\n    var char = lexer.next()\n\n    if (char == lunr.QueryLexer.EOS) {\n      return lunr.QueryLexer.lexEOS\n    }\n\n    // Escape character is '\\'\n    if (char.charCodeAt(0) == 92) {\n      lexer.escapeCharacter()\n      continue\n    }\n\n    if (char == \":\") {\n      return lunr.QueryLexer.lexField\n    }\n\n    if (char == \"~\") {\n      lexer.backup()\n      if (lexer.width() > 0) {\n        lexer.emit(lunr.QueryLexer.TERM)\n      }\n      return lunr.QueryLexer.lexEditDistance\n    }\n\n    if (char == \"^\") {\n      lexer.backup()\n      if (lexer.width() > 0) {\n        lexer.emit(lunr.QueryLexer.TERM)\n      }\n      return lunr.QueryLexer.lexBoost\n    }\n\n    // \"+\" indicates term presence is required\n    // checking for length to ensure that only\n    // leading \"+\" are considered\n    if (char == \"+\" && lexer.width() === 1) {\n      lexer.emit(lunr.QueryLexer.PRESENCE)\n      return lunr.QueryLexer.lexText\n    }\n\n    // \"-\" indicates term presence is prohibited\n    // checking for length to ensure that only\n    // leading \"-\" are considered\n    if (char == \"-\" && lexer.width() === 1) {\n      lexer.emit(lunr.QueryLexer.PRESENCE)\n      return lunr.QueryLexer.lexText\n    }\n\n    if (char.match(lunr.QueryLexer.termSeparator)) {\n      return lunr.QueryLexer.lexTerm\n    }\n  }\n}\n\nlunr.QueryParser = function (str, query) {\n  this.lexer = new lunr.QueryLexer (str)\n  this.query = query\n  this.currentClause = {}\n  this.lexemeIdx = 0\n}\n\nlunr.QueryParser.prototype.parse = function () {\n  this.lexer.run()\n  this.lexemes = this.lexer.lexemes\n\n  var state = lunr.QueryParser.parseClause\n\n  while (state) {\n    state = state(this)\n  }\n\n  return this.query\n}\n\nlunr.QueryParser.prototype.peekLexeme = function () {\n  return this.lexemes[this.lexemeIdx]\n}\n\nlunr.QueryParser.prototype.consumeLexeme = function () {\n  var lexeme = this.peekLexeme()\n  this.lexemeIdx += 1\n  return lexeme\n}\n\nlunr.QueryParser.prototype.nextClause = function () {\n  var completedClause = this.currentClause\n  this.query.clause(completedClause)\n  this.currentClause = {}\n}\n\nlunr.QueryParser.parseClause = function (parser) {\n  var lexeme = parser.peekLexeme()\n\n  if (lexeme == undefined) {\n    return\n  }\n\n  switch (lexeme.type) {\n    case lunr.QueryLexer.PRESENCE:\n      return lunr.QueryParser.parsePresence\n    case lunr.QueryLexer.FIELD:\n      return lunr.QueryParser.parseField\n    case lunr.QueryLexer.TERM:\n      return lunr.QueryParser.parseTerm\n    default:\n      var errorMessage = \"expected either a field or a term, found \" + lexeme.type\n\n      if (lexeme.str.length >= 1) {\n        errorMessage += \" with value '\" + lexeme.str + \"'\"\n      }\n\n      throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n}\n\nlunr.QueryParser.parsePresence = function (parser) {\n  var lexeme = parser.consumeLexeme()\n\n  if (lexeme == undefined) {\n    return\n  }\n\n  switch (lexeme.str) {\n    case \"-\":\n      parser.currentClause.presence = lunr.Query.presence.PROHIBITED\n      break\n    case \"+\":\n      parser.currentClause.presence = lunr.Query.presence.REQUIRED\n      break\n    default:\n      var errorMessage = \"unrecognised presence operator'\" + lexeme.str + \"'\"\n      throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n\n  var nextLexeme = parser.peekLexeme()\n\n  if (nextLexeme == undefined) {\n    var errorMessage = \"expecting term or field, found nothing\"\n    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n\n  switch (nextLexeme.type) {\n    case lunr.QueryLexer.FIELD:\n      return lunr.QueryParser.parseField\n    case lunr.QueryLexer.TERM:\n      return lunr.QueryParser.parseTerm\n    default:\n      var errorMessage = \"expecting term or field, found '\" + nextLexeme.type + \"'\"\n      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)\n  }\n}\n\nlunr.QueryParser.parseField = function (parser) {\n  var lexeme = parser.consumeLexeme()\n\n  if (lexeme == undefined) {\n    return\n  }\n\n  if (parser.query.allFields.indexOf(lexeme.str) == -1) {\n    var possibleFields = parser.query.allFields.map(function (f) { return \"'\" + f + \"'\" }).join(', '),\n        errorMessage = \"unrecognised field '\" + lexeme.str + \"', possible fields: \" + possibleFields\n\n    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n\n  parser.currentClause.fields = [lexeme.str]\n\n  var nextLexeme = parser.peekLexeme()\n\n  if (nextLexeme == undefined) {\n    var errorMessage = \"expecting term, found nothing\"\n    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n\n  switch (nextLexeme.type) {\n    case lunr.QueryLexer.TERM:\n      return lunr.QueryParser.parseTerm\n    default:\n      var errorMessage = \"expecting term, found '\" + nextLexeme.type + \"'\"\n      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)\n  }\n}\n\nlunr.QueryParser.parseTerm = function (parser) {\n  var lexeme = parser.consumeLexeme()\n\n  if (lexeme == undefined) {\n    return\n  }\n\n  parser.currentClause.term = lexeme.str.toLowerCase()\n\n  if (lexeme.str.indexOf(\"*\") != -1) {\n    parser.currentClause.usePipeline = false\n  }\n\n  var nextLexeme = parser.peekLexeme()\n\n  if (nextLexeme == undefined) {\n    parser.nextClause()\n    return\n  }\n\n  switch (nextLexeme.type) {\n    case lunr.QueryLexer.TERM:\n      parser.nextClause()\n      return lunr.QueryParser.parseTerm\n    case lunr.QueryLexer.FIELD:\n      parser.nextClause()\n      return lunr.QueryParser.parseField\n    case lunr.QueryLexer.EDIT_DISTANCE:\n      return lunr.QueryParser.parseEditDistance\n    case lunr.QueryLexer.BOOST:\n      return lunr.QueryParser.parseBoost\n    case lunr.QueryLexer.PRESENCE:\n      parser.nextClause()\n      return lunr.QueryParser.parsePresence\n    default:\n      var errorMessage = \"Unexpected lexeme type '\" + nextLexeme.type + \"'\"\n      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)\n  }\n}\n\nlunr.QueryParser.parseEditDistance = function (parser) {\n  var lexeme = parser.consumeLexeme()\n\n  if (lexeme == undefined) {\n    return\n  }\n\n  var editDistance = parseInt(lexeme.str, 10)\n\n  if (isNaN(editDistance)) {\n    var errorMessage = \"edit distance must be numeric\"\n    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n\n  parser.currentClause.editDistance = editDistance\n\n  var nextLexeme = parser.peekLexeme()\n\n  if (nextLexeme == undefined) {\n    parser.nextClause()\n    return\n  }\n\n  switch (nextLexeme.type) {\n    case lunr.QueryLexer.TERM:\n      parser.nextClause()\n      return lunr.QueryParser.parseTerm\n    case lunr.QueryLexer.FIELD:\n      parser.nextClause()\n      return lunr.QueryParser.parseField\n    case lunr.QueryLexer.EDIT_DISTANCE:\n      return lunr.QueryParser.parseEditDistance\n    case lunr.QueryLexer.BOOST:\n      return lunr.QueryParser.parseBoost\n    case lunr.QueryLexer.PRESENCE:\n      parser.nextClause()\n      return lunr.QueryParser.parsePresence\n    default:\n      var errorMessage = \"Unexpected lexeme type '\" + nextLexeme.type + \"'\"\n      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)\n  }\n}\n\nlunr.QueryParser.parseBoost = function (parser) {\n  var lexeme = parser.consumeLexeme()\n\n  if (lexeme == undefined) {\n    return\n  }\n\n  var boost = parseInt(lexeme.str, 10)\n\n  if (isNaN(boost)) {\n    var errorMessage = \"boost must be numeric\"\n    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)\n  }\n\n  parser.currentClause.boost = boost\n\n  var nextLexeme = parser.peekLexeme()\n\n  if (nextLexeme == undefined) {\n    parser.nextClause()\n    return\n  }\n\n  switch (nextLexeme.type) {\n    case lunr.QueryLexer.TERM:\n      parser.nextClause()\n      return lunr.QueryParser.parseTerm\n    case lunr.QueryLexer.FIELD:\n      parser.nextClause()\n      return lunr.QueryParser.parseField\n    case lunr.QueryLexer.EDIT_DISTANCE:\n      return lunr.QueryParser.parseEditDistance\n    case lunr.QueryLexer.BOOST:\n      return lunr.QueryParser.parseBoost\n    case lunr.QueryLexer.PRESENCE:\n      parser.nextClause()\n      return lunr.QueryParser.parsePresence\n    default:\n      var errorMessage = \"Unexpected lexeme type '\" + nextLexeme.type + \"'\"\n      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)\n  }\n}\n\n  /**\n   * export the module via AMD, CommonJS or as a browser global\n   * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js\n   */\n  ;(function (root, factory) {\n    if (typeof define === 'function' && define.amd) {\n      // AMD. Register as an anonymous module.\n      define(factory)\n    } else if (typeof exports === 'object') {\n      /**\n       * Node. Does not work with strict CommonJS, but\n       * only CommonJS-like environments that support module.exports,\n       * like Node.\n       */\n      module.exports = factory()\n    } else {\n      // Browser globals (root is window)\n      root.lunr = factory()\n    }\n  }(this, function () {\n    /**\n     * Just return a value to define the module export.\n     * This example returns an object, but the module\n     * can return a function as the exported value.\n     */\n    return lunr\n  }))\n})();\n"
  },
  {
    "path": "docs/site/search/main.js",
    "content": "function getSearchTermFromLocation() {\n  var sPageURL = window.location.search.substring(1);\n  var sURLVariables = sPageURL.split('&');\n  for (var i = 0; i < sURLVariables.length; i++) {\n    var sParameterName = sURLVariables[i].split('=');\n    if (sParameterName[0] == 'q') {\n      return decodeURIComponent(sParameterName[1].replace(/\\+/g, '%20'));\n    }\n  }\n}\n\nfunction joinUrl (base, path) {\n  if (path.substring(0, 1) === \"/\") {\n    // path starts with `/`. Thus it is absolute.\n    return path;\n  }\n  if (base.substring(base.length-1) === \"/\") {\n    // base ends with `/`\n    return base + path;\n  }\n  return base + \"/\" + path;\n}\n\nfunction escapeHtml (value) {\n  return value.replace(/&/g, '&amp;')\n    .replace(/\"/g, '&quot;')\n    .replace(/</g, '&lt;')\n    .replace(/>/g, '&gt;');\n}\n\nfunction formatResult (location, title, summary) {\n  return '<article><h3><a href=\"' + joinUrl(base_url, location) + '\">'+ escapeHtml(title) + '</a></h3><p>' + escapeHtml(summary) +'</p></article>';\n}\n\nfunction displayResults (results) {\n  var search_results = document.getElementById(\"mkdocs-search-results\");\n  while (search_results.firstChild) {\n    search_results.removeChild(search_results.firstChild);\n  }\n  if (results.length > 0){\n    for (var i=0; i < results.length; i++){\n      var result = results[i];\n      var html = formatResult(result.location, result.title, result.summary);\n      search_results.insertAdjacentHTML('beforeend', html);\n    }\n  } else {\n    var noResultsText = search_results.getAttribute('data-no-results-text');\n    if (!noResultsText) {\n      noResultsText = \"No results found\";\n    }\n    search_results.insertAdjacentHTML('beforeend', '<p>' + noResultsText + '</p>');\n  }\n}\n\nfunction doSearch () {\n  var query = document.getElementById('mkdocs-search-query').value;\n  if (query.length > min_search_length) {\n    if (!window.Worker) {\n      displayResults(search(query));\n    } else {\n      searchWorker.postMessage({query: query});\n    }\n  } else {\n    // Clear results for short queries\n    displayResults([]);\n  }\n}\n\nfunction initSearch () {\n  var search_input = document.getElementById('mkdocs-search-query');\n  if (search_input) {\n    search_input.addEventListener(\"keyup\", doSearch);\n  }\n  var term = getSearchTermFromLocation();\n  if (term) {\n    search_input.value = term;\n    doSearch();\n  }\n}\n\nfunction onWorkerMessage (e) {\n  if (e.data.allowSearch) {\n    initSearch();\n  } else if (e.data.results) {\n    var results = e.data.results;\n    displayResults(results);\n  } else if (e.data.config) {\n    min_search_length = e.data.config.min_search_length-1;\n  }\n}\n\nif (!window.Worker) {\n  console.log('Web Worker API not supported');\n  // load index in main thread\n  $.getScript(joinUrl(base_url, \"search/worker.js\")).done(function () {\n    console.log('Loaded worker');\n    init();\n    window.postMessage = function (msg) {\n      onWorkerMessage({data: msg});\n    };\n  }).fail(function (jqxhr, settings, exception) {\n    console.error('Could not load worker.js');\n  });\n} else {\n  // Wrap search in a web worker\n  var searchWorker = new Worker(joinUrl(base_url, \"search/worker.js\"));\n  searchWorker.postMessage({init: true});\n  searchWorker.onmessage = onWorkerMessage;\n}\n"
  },
  {
    "path": "docs/site/search/search_index.json",
    "content": "{\"config\":{\"indexing\":\"full\",\"lang\":[\"en\"],\"min_search_length\":3,\"prebuild_index\":false,\"separator\":\"[\\\\s\\\\-]+\"},\"docs\":[{\"location\":\"\",\"text\":\"regenie regenie is a C++ program for whole genome regression modelling of large genome-wide association studies . It is developed and supported by a team of scientists at the Regeneron Genetics Center. The method has the following properties It works on quantitative and binary traits, including binary traits with unbalanced case-control ratios It can handle population structure and relatedness It can process multiple phenotypes at once efficiently For binary traits, it supports Firth logistic regression and an SPA test It can perform gene/region-based tests (Burden, SBAT , SKAT/SKATO, ACATV/ACATO) It can perform interaction tests (GxE, GxG) as well as conditional analyses Meta-analysis of REGENIE summary statistics can be performed using REMETA It is fast and memory efficient \\ud83d\\udd25 It supports the BGEN , PLINK bed/bim/fam and PLINK2 pgen/pvar/psam genetic data formats It is ideally suited for implementation in Apache Spark (see GLOW ) It can be installed with Conda Citation Mbatchou, J., Barnard, L., Backman, J. et al. Computationally efficient whole-genome regression for quantitative and binary traits. Nat Genet 53, 1097\\u20131103 (2021). https://doi.org/10.1038/s41588-021-00870-7 License regenie is distributed under an MIT license . Contact If you have any questions about regenie please contact jonathan.marchini@regeneron.com joelle.mbatchou@regeneron.com If you want to submit a issue concerning the software please do so using the regenie Github repository . Report Issue Active Issues\",\"title\":\"Home\"},{\"location\":\"#regenie\",\"text\":\"regenie is a C++ program for whole genome regression modelling of large genome-wide association studies . It is developed and supported by a team of scientists at the Regeneron Genetics Center. The method has the following properties It works on quantitative and binary traits, including binary traits with unbalanced case-control ratios It can handle population structure and relatedness It can process multiple phenotypes at once efficiently For binary traits, it supports Firth logistic regression and an SPA test It can perform gene/region-based tests (Burden, SBAT , SKAT/SKATO, ACATV/ACATO) It can perform interaction tests (GxE, GxG) as well as conditional analyses Meta-analysis of REGENIE summary statistics can be performed using REMETA It is fast and memory efficient \\ud83d\\udd25 It supports the BGEN , PLINK bed/bim/fam and PLINK2 pgen/pvar/psam genetic data formats It is ideally suited for implementation in Apache Spark (see GLOW ) It can be installed with Conda\",\"title\":\"regenie\"},{\"location\":\"#citation\",\"text\":\"Mbatchou, J., Barnard, L., Backman, J. et al. Computationally efficient whole-genome regression for quantitative and binary traits. Nat Genet 53, 1097\\u20131103 (2021). https://doi.org/10.1038/s41588-021-00870-7\",\"title\":\"Citation\"},{\"location\":\"#license\",\"text\":\"regenie is distributed under an MIT license .\",\"title\":\"License\"},{\"location\":\"#contact\",\"text\":\"If you have any questions about regenie please contact jonathan.marchini@regeneron.com joelle.mbatchou@regeneron.com If you want to submit a issue concerning the software please do so using the regenie Github repository . Report Issue Active Issues\",\"title\":\"Contact\"},{\"location\":\"faq/\",\"text\":\"Frequently asked questions General Why doesn\\u2019t regenie need a genetic relatedness matrix (GRM)? regenie performs whole genome regression using the following model Y = X\\\\beta + \\\\epsilon where Y_{N\\\\times 1} is a phenotype, X_{N\\\\times M} is a genotype matrix, and \\\\epsilon_i\\\\sim N(0,\\\\sigma^2) . This model has close ties to a linear mixed model (LMM) based on an infinitesimal model Y = u + \\\\epsilon where u\\\\sim N(0,\\\\sigma_u^2 K) with K_{N\\\\times N}=XX^T/M is referred to as the genetic relatedness matrix (GRM). In the LMM, the polygenic effects have been integrated out so that model only involves the GRM $K$ through a variance component in the covariance matrix of the trait. In regenie , we directly estimate the polygenic effects parameter \\\\beta by using ridge regression, which corresponds to fitting a linear regression model with a L2 penalty to impose shrinkage. Hence, we bypass having to use the GRM K and use the polygenic effect estimates X\\\\hat{\\\\beta} to control for population structure when testing variants for association. Can regenie be run on small sample sizes? For quantitative traits, we have not obtained issues running regenie on small data sets. For binary traits, we have obtained successful runs of regenie (step 1 and 2) on data sets with as little as 300 samples. A few factors to consider: Convergence issues may occur in step 1 (all the more if a trait is highly unbalanced) - see below Similarly, convergence issues may occur in step 2 when using Firth approximation - see below Note: we have found that regenie can get conservative in more extreme relatedness scenarios so we recommend not to use it for smaller cohorts with high amounts of relatedness like founder populations where exact mixed-model methods can be used Step 1 What block size to use in step 1? We recommend to use blocks of size 1000 as we have observed that it leads to a reasonable number of ridge predictors at level 1 (e.g. 2,500 with 500K SNPs used and the default regenie parameters) and have noticed little change in the final predictions when varying the block size. How many variants to use in step 1? We recommend to use a smaller set of about 500K directly genotyped SNPs in step 1, which should be sufficient to capture genome-wide polygenic effects. Note that using too many SNPs in Step 1 (e.g. >1M) can lead to a high computational burden due to the resulting higher number of predictors in the level 1 models. What do I do if I get the error \\\"Uh-oh, SNP XX has low variance (=XX)\\\" in step 1? This is due to variants with very low minor allele count (MAC) being included in step 1. To avoid this, you should use a MAC filter to remove such variants in a pre-processing step before running Regenie. For example, in PLINK2 you would use the --mac option and obtain a list of variants that pass the MAC filter (note that if you are using --keep/--remove in Regenie, you should also use it in the PLINK2 command) plink2 \\\\ --bfile my_bed_file \\\\ --mac 100 \\\\ --write-snplist \\\\ --out snps_pass You would then use the output file in regenie as --extract snps_pass.snplist (and this would avoid having to make a new genotype file). What to do if Step 1 of regenie failed for a binary trait when fitting the penalized logsitic regression model? This can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. If using K-fold CV, switch to LOOCV (option --loocv ) to increase the size of the sample used to fit the model (note: LOOCV is now used by default when the sample size is below 5,000) If it is due to quasi-separation (i.e. Var(Y)=0 occurred in model fitting), either increase the sample size using LOOCV or increase the MAF threshold for variants included in step 1 analysis Step 2 What to do if Step 2 of regenie fails when fitting the null model for the approximate Firth correction? This can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. We have implemented the same measures as in the logistf function in R to avoid convergence issues, which include the use of a step size threshold when performing a Newton step. We first try fitting the model with a step size threshold that is more liberal (=25) as well as a maximum number of iterations of 1,000 and if convergence fails, we retry the model fit using a more stringent step size threshold (=5) and a higher threshold for the number of iterations (=5,000), which will slow down convergence. The user can also specify a maximum step size threshold using --maxstep-null (use value <5) as well as increase the maximum number of iterations using --maxiter-null (use value >5000). In that case, no retries are perfomed if convergence fails. We recommend to test chromosomes separately (using --chr ) as these parameters may need to be altered when fitting the null model for each chromosome What is reported in A1FREQ when building masks? For the max and comphet rules, the resulting burden masks take on values in [0,2] just like single variants so we compute A1FREQ the same way as done for single variants (i.e. mean(G)/2 where G is a genotype vector). For the sum rule, A1FREQ is computed as the average of the effect allele frequencies across all sites included in the mask. How is missingness handled in the genotype data? Missing genotypes are imputed with per-SNP averages when performing association tests (note: the genotype summaries reported like AAF, INFO are only based on non-missing genotypes).\",\"title\":\"F.A.Q.\"},{\"location\":\"faq/#frequently-asked-questions\",\"text\":\"\",\"title\":\"Frequently asked questions\"},{\"location\":\"faq/#general\",\"text\":\"Why doesn\\u2019t regenie need a genetic relatedness matrix (GRM)? regenie performs whole genome regression using the following model Y = X\\\\beta + \\\\epsilon where Y_{N\\\\times 1} is a phenotype, X_{N\\\\times M} is a genotype matrix, and \\\\epsilon_i\\\\sim N(0,\\\\sigma^2) . This model has close ties to a linear mixed model (LMM) based on an infinitesimal model Y = u + \\\\epsilon where u\\\\sim N(0,\\\\sigma_u^2 K) with K_{N\\\\times N}=XX^T/M is referred to as the genetic relatedness matrix (GRM). In the LMM, the polygenic effects have been integrated out so that model only involves the GRM $K$ through a variance component in the covariance matrix of the trait. In regenie , we directly estimate the polygenic effects parameter \\\\beta by using ridge regression, which corresponds to fitting a linear regression model with a L2 penalty to impose shrinkage. Hence, we bypass having to use the GRM K and use the polygenic effect estimates X\\\\hat{\\\\beta} to control for population structure when testing variants for association. Can regenie be run on small sample sizes? For quantitative traits, we have not obtained issues running regenie on small data sets. For binary traits, we have obtained successful runs of regenie (step 1 and 2) on data sets with as little as 300 samples. A few factors to consider: Convergence issues may occur in step 1 (all the more if a trait is highly unbalanced) - see below Similarly, convergence issues may occur in step 2 when using Firth approximation - see below Note: we have found that regenie can get conservative in more extreme relatedness scenarios so we recommend not to use it for smaller cohorts with high amounts of relatedness like founder populations where exact mixed-model methods can be used\",\"title\":\"General\"},{\"location\":\"faq/#step-1\",\"text\":\"What block size to use in step 1? We recommend to use blocks of size 1000 as we have observed that it leads to a reasonable number of ridge predictors at level 1 (e.g. 2,500 with 500K SNPs used and the default regenie parameters) and have noticed little change in the final predictions when varying the block size. How many variants to use in step 1? We recommend to use a smaller set of about 500K directly genotyped SNPs in step 1, which should be sufficient to capture genome-wide polygenic effects. Note that using too many SNPs in Step 1 (e.g. >1M) can lead to a high computational burden due to the resulting higher number of predictors in the level 1 models. What do I do if I get the error \\\"Uh-oh, SNP XX has low variance (=XX)\\\" in step 1? This is due to variants with very low minor allele count (MAC) being included in step 1. To avoid this, you should use a MAC filter to remove such variants in a pre-processing step before running Regenie. For example, in PLINK2 you would use the --mac option and obtain a list of variants that pass the MAC filter (note that if you are using --keep/--remove in Regenie, you should also use it in the PLINK2 command) plink2 \\\\ --bfile my_bed_file \\\\ --mac 100 \\\\ --write-snplist \\\\ --out snps_pass You would then use the output file in regenie as --extract snps_pass.snplist (and this would avoid having to make a new genotype file). What to do if Step 1 of regenie failed for a binary trait when fitting the penalized logsitic regression model? This can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. If using K-fold CV, switch to LOOCV (option --loocv ) to increase the size of the sample used to fit the model (note: LOOCV is now used by default when the sample size is below 5,000) If it is due to quasi-separation (i.e. Var(Y)=0 occurred in model fitting), either increase the sample size using LOOCV or increase the MAF threshold for variants included in step 1 analysis\",\"title\":\"Step 1\"},{\"location\":\"faq/#step-2\",\"text\":\"What to do if Step 2 of regenie fails when fitting the null model for the approximate Firth correction? This can occur when the sample size used to fit the model is small and/or if the trait is extremely unbalanced. We have implemented the same measures as in the logistf function in R to avoid convergence issues, which include the use of a step size threshold when performing a Newton step. We first try fitting the model with a step size threshold that is more liberal (=25) as well as a maximum number of iterations of 1,000 and if convergence fails, we retry the model fit using a more stringent step size threshold (=5) and a higher threshold for the number of iterations (=5,000), which will slow down convergence. The user can also specify a maximum step size threshold using --maxstep-null (use value <5) as well as increase the maximum number of iterations using --maxiter-null (use value >5000). In that case, no retries are perfomed if convergence fails. We recommend to test chromosomes separately (using --chr ) as these parameters may need to be altered when fitting the null model for each chromosome What is reported in A1FREQ when building masks? For the max and comphet rules, the resulting burden masks take on values in [0,2] just like single variants so we compute A1FREQ the same way as done for single variants (i.e. mean(G)/2 where G is a genotype vector). For the sum rule, A1FREQ is computed as the average of the effect allele frequencies across all sites included in the mask. How is missingness handled in the genotype data? Missing genotypes are imputed with per-SNP averages when performing association tests (note: the genotype summaries reported like AAF, INFO are only based on non-missing genotypes).\",\"title\":\"Step 2\"},{\"location\":\"install/\",\"text\":\"Download The regenie source code is hosted on Github . Installation Pre-requisites regenie requires compilation with GCC version >= 5.1 (on Linux) or Clang version >=3.3 (on Mac OSX). It also requires having GFortran library installed. Pre-compiled binaries Pre-compiled binaries are available in the Github repository . These are provided for Linux (including Centos7) and Mac OSX computing environments and are statically linked. For the Linux binaries, users should have GLIBC version >= 2.22 installed. Additionally, they are provided compiled with Intel MKL library which will provide speedups for many of the operations done in regenie . Standard installation regenie requires the BGEN library so you will need to download and install that library. Edit the BGEN_PATH variable in the Makefile to the BGEN library path. On the command line type make while in the main source code directory. This should produce the executable called regenie . regenie has been enhanced to allow for gzip compressed input (for phenotype/covariate files) and output (for association results files) using the Boost Iostream library. If this library is installed on the system, you should compile using make HAS_BOOST_IOSTREAM=1 . Furthermore, we have enabled compilation of regenie with the Intel Math Kernel (MKL) library. You first need to have it installed on your system and modify the MKLROOT variable in the Makefile to the installed MKL library path. With CMake You can compile the binary using CMake version >=3.13 (instead of make as above). mkdir -p build cd build BGEN_PATH=<path_to_bgen_lib> cmake .. make This will generate the binary in the build/ subdirectory. To use with Boost Iostreams and/or Intel MKL library, add the corresponding flags before the cmake command on line 3 (e.g. BGEN_PATH=<path_to_bgen_lib> HAS_BOOST_IOSTREAM=1 cmake .. ). With Docker Alternatively, you can use a Docker image to run regenie . A guide to using docker is available on the Github page . With conda To install with conda , you can use the following commands: # create new environment conda create -n regenie_env -c conda-forge -c bioconda regenie # load it conda activate regenie_env Computing requirements We have tested regenie on 64-bit Linux and 64-bit Mac OSX computing environments. Note that for Mac OSX computing environments, compiling is done without OpenMP, as the library is not built-in by default and has to be installed separately. Memory usage In both Step 1 and Step 2 of a regenie run the genetic data file is read once, in blocks of SNPs, so at no point is the full dataset ever stored in memory. regenie uses a dimension reduction approach using ridge regression to produce a relatively small set of genetic predictors, that are then used to fit a whole-genome regression model. These genetic predictors are stored in memory by default, and can be relatively large if many phenotypes are stored at once. For example, if there are P phenotypes, M SNPs and N samples, and a block size of B SNPs is used with R ridge parameters, then regenie needs to store roughly N\\\\times M/B\\\\times R doubles per phenotype, which is 8Gb per phenotype when M=500,000, N=400,000, B =1,000,R=5 and 200Gb in total when P=25 . However, the --lowmem option can be used to avoid that memory usage, at negligible extra computational cost, by writing temporary files to disk. Threading regenie can take advantage of multiple cores using threading. The number of threads can be specified using the --threads option. regenie uses the Eigen library for efficient linear algebra operations and this uses threading where possible. For PLINK bed/bim/fam files, PLINK2 pgen/pvar/psam files, as well as BGEN v1.2 files with 8-bit encoding (format used for UK Biobank 500K imputed data), step 2 of regenie has been optimized by using multithreading through OpenMP . When running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and instead parallelize the runs over partitions of the genome (e.g. groups of genes). For Windows platforms If you are on a Windows machine, we recommend to use Windows Subsystem for Linux (WSL) to install a Ubuntu distribution so that you will be able to run REGENIE from a Linux terminal. You can download pre-compiled REGENIE binaries from the Github repository (note that you will need to install the libgomp1 library). Note: from your Windows command prompt, you can run REGENIE using wsl regenie .\",\"title\":\"Install\"},{\"location\":\"install/#download\",\"text\":\"The regenie source code is hosted on Github .\",\"title\":\"Download\"},{\"location\":\"install/#installation\",\"text\":\"\",\"title\":\"Installation\"},{\"location\":\"install/#pre-compiled-binaries\",\"text\":\"Pre-compiled binaries are available in the Github repository . These are provided for Linux (including Centos7) and Mac OSX computing environments and are statically linked. For the Linux binaries, users should have GLIBC version >= 2.22 installed. Additionally, they are provided compiled with Intel MKL library which will provide speedups for many of the operations done in regenie .\",\"title\":\"Pre-compiled binaries\"},{\"location\":\"install/#standard-installation\",\"text\":\"regenie requires the BGEN library so you will need to download and install that library. Edit the BGEN_PATH variable in the Makefile to the BGEN library path. On the command line type make while in the main source code directory. This should produce the executable called regenie . regenie has been enhanced to allow for gzip compressed input (for phenotype/covariate files) and output (for association results files) using the Boost Iostream library. If this library is installed on the system, you should compile using make HAS_BOOST_IOSTREAM=1 . Furthermore, we have enabled compilation of regenie with the Intel Math Kernel (MKL) library. You first need to have it installed on your system and modify the MKLROOT variable in the Makefile to the installed MKL library path.\",\"title\":\"Standard installation\"},{\"location\":\"install/#with-cmake\",\"text\":\"You can compile the binary using CMake version >=3.13 (instead of make as above). mkdir -p build cd build BGEN_PATH=<path_to_bgen_lib> cmake .. make This will generate the binary in the build/ subdirectory. To use with Boost Iostreams and/or Intel MKL library, add the corresponding flags before the cmake command on line 3 (e.g. BGEN_PATH=<path_to_bgen_lib> HAS_BOOST_IOSTREAM=1 cmake .. ).\",\"title\":\"With CMake\"},{\"location\":\"install/#with-docker\",\"text\":\"Alternatively, you can use a Docker image to run regenie . A guide to using docker is available on the Github page .\",\"title\":\"With Docker\"},{\"location\":\"install/#with-conda\",\"text\":\"To install with conda , you can use the following commands: # create new environment conda create -n regenie_env -c conda-forge -c bioconda regenie # load it conda activate regenie_env\",\"title\":\"With conda\"},{\"location\":\"install/#computing-requirements\",\"text\":\"We have tested regenie on 64-bit Linux and 64-bit Mac OSX computing environments. Note that for Mac OSX computing environments, compiling is done without OpenMP, as the library is not built-in by default and has to be installed separately.\",\"title\":\"Computing requirements\"},{\"location\":\"install/#memory-usage\",\"text\":\"In both Step 1 and Step 2 of a regenie run the genetic data file is read once, in blocks of SNPs, so at no point is the full dataset ever stored in memory. regenie uses a dimension reduction approach using ridge regression to produce a relatively small set of genetic predictors, that are then used to fit a whole-genome regression model. These genetic predictors are stored in memory by default, and can be relatively large if many phenotypes are stored at once. For example, if there are P phenotypes, M SNPs and N samples, and a block size of B SNPs is used with R ridge parameters, then regenie needs to store roughly N\\\\times M/B\\\\times R doubles per phenotype, which is 8Gb per phenotype when M=500,000, N=400,000, B =1,000,R=5 and 200Gb in total when P=25 . However, the --lowmem option can be used to avoid that memory usage, at negligible extra computational cost, by writing temporary files to disk.\",\"title\":\"Memory usage\"},{\"location\":\"install/#threading\",\"text\":\"regenie can take advantage of multiple cores using threading. The number of threads can be specified using the --threads option. regenie uses the Eigen library for efficient linear algebra operations and this uses threading where possible. For PLINK bed/bim/fam files, PLINK2 pgen/pvar/psam files, as well as BGEN v1.2 files with 8-bit encoding (format used for UK Biobank 500K imputed data), step 2 of regenie has been optimized by using multithreading through OpenMP . When running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and instead parallelize the runs over partitions of the genome (e.g. groups of genes).\",\"title\":\"Threading\"},{\"location\":\"install/#for-windows-platforms\",\"text\":\"If you are on a Windows machine, we recommend to use Windows Subsystem for Linux (WSL) to install a Ubuntu distribution so that you will be able to run REGENIE from a Linux terminal. You can download pre-compiled REGENIE binaries from the Github repository (note that you will need to install the libgomp1 library). Note: from your Windows command prompt, you can run REGENIE using wsl regenie .\",\"title\":\"For Windows platforms\"},{\"location\":\"options/\",\"text\":\"Getting started To run regenie , use the command ./regenie on the command line, followed by options and flags as needed. To get a full list of options use ./regenie --help The directory examples/ contains some small example files that are useful when getting started. A test run on a set of binary traits can be achieved by the following 2 commands. In Step 1 , the whole genome regression model is fit to the traits, and a set of genomic predictions are produced as output ./regenie \\\\ --step 1 \\\\ --bed example/example \\\\ --exclude example/snplist_rm.txt \\\\ --covarFile example/covariates.txt \\\\ --phenoFile example/phenotype_bin.txt \\\\ --remove example/fid_iid_to_remove.txt \\\\ --bsize 100 \\\\ --bt --lowmem \\\\ --lowmem-prefix tmp_rg \\\\ --out fit_bin_out In Step 2 , a set of imputed SNPs are tested for association using a Firth logistic regression model ./regenie \\\\ --step 2 \\\\ --bgen example/example.bgen \\\\ --covarFile example/covariates.txt \\\\ --phenoFile example/phenotype_bin.txt \\\\ --remove example/fid_iid_to_remove.txt \\\\ --bsize 200 \\\\ --bt \\\\ --firth --approx \\\\ --pThresh 0.01 \\\\ --pred fit_bin_out_pred.list \\\\ --out test_bin_out_firth One of the output files from these two commands is included in example/test_bin_out_firth_Y1.regenie . Basic options Input Option Argument Type Description --bgen, --bed, --pgen FILE Required Input genetic data file. Either BGEN file eg. file.bgen , or bed/bim/fam prefix that assumes file.bed , file.bim , file.fam exist, or pgen/pvar/psam prefix that assumes file.pgen , file.pvar , file.psam exist --sample FILE Optional Sample file corresponding to input BGEN file --bgi FILE Optional Index bgi file corresponding to input BGEN file --ref-first FLAG Optional Specify to use the first allele as the reference allele for BGEN or PLINK bed/bim/fam file input [default is to use the last allele as the reference] --keep FILE Optional Inclusion file that lists individuals to retain in the analysis --remove FILE Optional Exclusion file that lists individuals to remove from the analysis --extract FILE Optional Inclusion file that lists IDs of variants to keep --exclude FILE Optional Exclusion file that lists IDs of variants to remove --extract-or FILE Optional Inclusion file that lists IDs of variants to keep regardless of minimum MAC filter --exclude-or FILE Optional Exclusion file that lists IDs of variants to remove unless MAC is above threshold --phenoFile FILE Required Phenotypes file --phenoCol STRING Optional Use for each phenotype you want to include in the analysis --phenoColList STRING Optional Comma separated list of phenotypes to include in the analysis --eventColList STRING Optional Comma separated list of columns in the phenotype file to include in the analysis that contain the event times --phenoExcludeList STRING Optional Comma separated list of phenotypes to ignore from the analysis --covarFile FILE Optional Covariates file --covarCol STRING Optional Use for each covariate you want to include in the analysis --covarColList STRING Optional Comma separated list of covariates to include in the analysis --catCovarList STRING Optional Comma separated list of categorical covariates to include in the analysis --covarExcludeList STRING Optional Comma separated list of covariates to ignore --pred FILE Optional File containing predictions from Step 1 (see Overview). This is required for --step 2 --tpheno-file STRING Optional to use a phenotype file in transposed format (e.g. BED format) --tpheno-indexCol INT Optional index of phenotype name column in transposed phenotype file --tpheno-ignoreCols INT Optional indexes of columns to ignore in transposed phenotype file --iid-only FLAG Optional to specify if header in transposed phenotype file only contains sample IID (assume FID=IID) Note: Parameter expansion can be used when specifying phenotypes/covariates (e.g. --covarCol PC{1:10} ). Also, multiple files can be specified for --extract/--exclude/--keep/--remove by using a comma-separated list. Genetic data file format regenie can read BGEN files, bed/bim/fam files or pgen/psam/pvar files in Step 1 and Step 2. The BGEN file format is described here . The bed/bim/fam file format is described here . The pgen/pvar/psam file format is described here . Tools useful for genetic data file format conversion are : PLINK , QCTOOL , BCFTOOLS . Step 2 of regenie can be sped up by using BGEN files using v1.2 format with 8 bits encoding (genotype file can be generated with PLINK2 using option --export bgen-1.2 'bits=8' ) as well as having an accompanying .bgi index file (a useful tool to create such file is bgenix which is part of the BGEN library). To include X chromosome genotypes in step 1 and/or step 2, males should be coded as diploid so that their genotypes are 0/2 (this is done automatically for BED and PGEN file formats with haploid genotypes). Chromosome values of 23 (for human analyses), X, Y, XY, PAR1 and PAR2 are all acceptable and will be collapsed into a single chromosome. Sample inclusion/exclusion file format 2 2 7 7 . No header. Each line starts with individual FID IID. Space/tab separated. Samples listed in the file that are not in bgen/bed/pgen file are ignored. Variant inclusion/exclusion file format 20 31 . No header. Each line must start with variant ID (if there are additional columns, file must be space/tab separated). Variants listed in this file that are not in bgen/bed/pgen file are ignored. Covariate file format FID IID V1 V2 V3 1 1 1.46837294454993 1.93779743016325 0.152887004505393 2 2 -1.2234390803815 -1.63408619199948 -0.190201446835255 3 3 0.0711531925667286 0.0863906292357564 0.14254739715665 . Line 1 : Header with FID, IID and C covariate names. Followed by lines of C+2 values. Space/tab separated. Each line contains individual FID and IID followed by C covariate values. Samples listed in this file that are not in bgen/bed/pgen file are ignored. Genotyped samples that are not in this file are removed from the analysis as well as samples with missing values at any of the covariates included. If --step 2 is specified, then the covariate file should be the same as that used in Step 1. Phenotype file format FID IID Y1 Y2 1 1 1.64818554321186 2.2765234736685 2 2 -2.67352013711554 -1.53680421614647 3 3 0.217542851471485 0.437289912695016 . Line 1 : Header with FID, IID and P phenotypes names. Followed by lines of P+2 values. Space/tab separated. Each line contains individual FID and IID followed by P phenotype values (for binary traits, must be coded as 0=control, 1=case, NA=missing unless using --1 ). Samples listed in this file that are not in bgen/bed/pgen file are ignored. Genotyped samples that are not in this file are removed from the analysis. Missing values must be coded as NA. With QTs, missing values are mean-imputed in Step 1 and they are dropped when testing each phenotype in Step 2 (unless using --force-impute ). With BTs, missing values are mean-imputed in Step 1 when fitting the level 0 linear ridge regression and they are dropped when fitting the level 1 logistic ridge regression for each trait . In Step 2, missing values are dropped when testing each trait. To remove all samples that have missing values at any of the P phenotypes, use option --strict in Step 1 and 2. If using the transposed phenotype file format with option --tpheno-file , the header line must contain subject IDs as \\\"FID_IID\\\", otherwise use option --iid-only and only include IIDs (so will assume FID=IID). Predictions file format Running --step 1 --out foo will produce A set of files containing genomic predictions for each phenotype from Step 1 (see Output section below). A file called foo_pred.list listing the locations of the prediction files. The file list is needed as an input file when using --step 2 via the --pred option. It has one line per phenotype (in any order) that specifies the name of the phenotype and its corresponding prediction file name. Each phenotype must have exactly one prediction file and phenotype names must match with those in the phenotype file. Phenotypes in this file not included in the analysis are ignored. Each prediction file contains the genetic predictions for the phenotype (space separated). Line 1 starts with 'FID_IID' followed by $N$ sample identifiers. It is followed by 23 lines containing the genetic predictions for each chromosome (sex chromosomes are collapsed into chromosome 23). More specifically, each line has $N+1$ values which are the chromosome number followed by the $N$ leave-one chromosome out (LOCO) predictions for each individual. Samples in this file not in the bed/pgen/bgen input file are ignored. Genotyped samples not present in this file will be ignored in the analysis of the corresponding trait. Samples with missing LOCO predictions must have their corresponding phenotype value set to missing. Options Option Argument Type Description --step INT Required specify step for the regenie run (see Overview) [argument can be 1 or 2 ] --qt FLAG Optional specify that traits are quantitative (this is the default so can be ommitted) --bt FLAG Optional specify that traits are binary with 0=control,1=case,NA=missing --t2e FLAG Optional specify that traits are time-to-event data with 0=censoring,1=event,NA=missing in event column -1,--cc12 FLAG Optional specify to use 1/2/NA encoding for binary traits (1=control,2=case,NA=missing) --bsize INT Required size of the genotype blocks --cv INT Optional number of cross validation (CV) folds [default is 5] --loocv FLAG Optional flag to use leave-one out cross validation --lowmem FLAG Optional flag to reduce memory usage by writing level 0 predictions to disk (details below). This is very useful if the number of traits is large (e.g. greater than 10) --lowmem-prefix FILE PREFIX Optional prefix where to temporarily write the level 0 predictions --split-l0 PREFIX,N Optional split level 0 across N jobs and set prefix of output files of level 0 predictions --run-l0 FILE,K Optional run level 0 for job K in {1..N} specifying the master file created from '--split-l0' --run-l1 FILE Optional run level 1 specifying the master file from '--split-l0' --l1-phenoList STRING Optional to specify a subset of phenotypes to analyze when using --run-l1 --keep-l0 FLAG Optional avoid deleting the level 0 predictions written on disk after fitting the level 1 models --print-prs FLAG Optional flag to print whole genome predictions (i.e. PRS) without using LOCO scheme --force-step1 FLAG Optional flag to run step 1 when >1M variants are used (not recommened) --minCaseCount INT Optional flag to ignore BTs with low case counts [default is 10] --apply-rint FLAG Optional to apply Rank Inverse Normal Transformation (RINT) to quantitative phenotypes (use in both Step 1 & 2) --nb INT Optional number of blocks (determined from block size if not provided) --strict FLAG Optional flag to removing samples with missing data at any of the phenotypes --ignore-pred FLAG Optional skip reading the file specified by --pred (corresponds to simple linear/logistic regression) --htp STRING Optional to output the summary statistics file in the HTP format (string should correspond to cohort name, e.g. 'UKB_450_EUR') --exact-p FLAG Optional avoid capping p-values at 2.2E-307 in the HTP format summary statistics output --use-relative-path FLAG Optional to use relative paths instead of absolute ones for the step 1 output pred.list file --use-prs FLAG Optional flag to use whole genome PRS in --pred (this is output in step 1 when using --print-prs ) --gz FLAG Optional flag to output files in compressed gzip format (LOCO prediction files in step 1 and association results files in step 2) [this only works when compiling with Boost Iostream library (see Install tab)] . --force-impute FLAG Optional flag to keep and impute missing observations for QTs in step 2 --write-samples FLAG Optional flag to write sample IDs for those kept in the analysis for each trait in step 2 --print-pheno FLAG Optional flag to write phenotype name in the first line of the sample ID files when using --write-samples --firth FLAG Optional specify to use Firth likelihood ratio test (LRT) as fallback for p-values less than threshold --approx FLAG Optional flag to use approximate Firth LRT for computational speedup (only works when option --firth is used) --firth-se FLAG Optional flag to compute SE based on effect size and LRT p-value when using Firth correction (instead of based on Hessian of unpenalized log-likelihood) --write-null-firth FLAG Optional to write the null estimates for approximate Firth [can be used in step 1 or 2] --compute-all FLAG Optional to write the null Firth estimates for all chromosomes (regardless of the genotype file) --use-null-firth FILE Optional to use stored null estimates for approximate Firth in step 2 --spa FLAG Optional specify to use Saddlepoint approximation as fallback for p-values less than threshold --pThresh FLOAT Optional P-value threshold below which to apply Firth/SPA correction [default is 0.05] --test STRING Optional specify to carry out dominant or recessive test [default is additive; argument can be dominant or recessive ] --chr INT Optional specify which chromosomes to test in step 2 (use for each chromosome to include) --chrList STRING Optional Comma separated list of chromosomes to test in step 2 --range STRING Optional specify chromosome region for variants to test in step 2 [format=CHR:MINPOS-MAXPOS] --minMAC FLOAT Optional flag to specify the minimum minor allele count (MAC) when testing variants [default is 5]. Variants with lower MAC are ignored. --minINFO FLOAT Optional flag to specify the minimum imputation info score (IMPUTE/MACH R^2) when testing variants. Variants with lower info score are ignored. --sex-specific STRING Optional to perform sex-specific analyses [either 'male'/'female'] --af-cc FLAG Optional to output A1FREQ in case/controls separately in the step 2 result file --no-split FLAG Optional flag to have summary statistics for all traits output in the same file --starting-block INT Optional to start step 2 at a specific block/set number (useful if program crashes during a job) --nauto INT Optional number of autosomal chromosomes (for non-human studies) [default is 22] --maxCatLevels INT Optional maximum number of levels for categorical covariates (for non-human studies) [default is 10] --niter INT Optional maximum number of iterations for logistic regression [default is 30] --maxstep-null INT Optional maximum step size for logistic model with Firth penalty under the null [default is 25] --maxiter-null INT Optional maximum number of iterations for logistic model with Firth penalty under the null [default is 1000] --par-region STRING Optional specify build code to determine bounds for PAR1/PAR2 regions (can be 'b36/b37/b38/hg18/hg19/hg38' or 'start,end' bp bounds of non-PAR region) [default is hg38] --force-qt FLAG Optional force QT run for binary traits --threads INT Optional number of computational threads to use [default=all-1] --debug FLAG Optional debug flag (for use by developers) --verbose FLAG Optional verbose screen output --version FLAG Optional print version number and exit --help FLAG Optional Prints usage and options list to screen When step 1 of regenie is run in low memory mode (i.e. using --lowmem ), temporary files are created on disk (using --lowmem-prefix tmp_prefix determines where the files are written [as in tmp_prefix_l0_Y1 ,..., tmp_prefix_l0_YP for P phenotypes]). If the prefix is not specified, the default is to use the prefix specified by --out (see below). These are automatically deleted at the end of the program (unless the run was not successful in which case the user would need to delete the files) See the Wiki page for more details on how to run the level 0 models for Step 1 of regenie in parallel. Output Option Argument Type Description --out FILE PREFIX Required Output files that depends on --step A log file file.log of the output is generated. Using --step 1 --out file For the P phenotypes, files file_1.loco ,..., file_P.loco are output with the per-chromosome LOCO predictions as rows of the files (following the order of the phenotypes in the phenotype file header). If option --gz was used, the files will be compressed in gzip format and have extension .loco.gz . Genotyped individuals specified using option --remove are excluded from this file. Individuals with missing phenotype values kept in the analysis are included in the file and have their predictions set to missing. The list of blup files needed for step 2 (association testing) is written to file_pred.list . If using --print-prs , files file_1.prs ,..., file_P.prs will be written with the whole genome predictions (i.e. PRS) without using LOCO scheme (similar format as the .loco files). The list of these files is written to file_prs.list and can be used in step 2 with --pred and specifying flag --use-prs . Note that as these are not obtained using a LOCO scheme, association tests could suffer from proximal contamination. If using option --write-null-firth , the estimates for approximate Firth under the null will be written to files file_1.firth,...,file_P.firth and the list of these files is written to file_firth.list . This can be used in step 2 as --use-null-firth file_firth.list . Note that it assumes the same set of covariates are used in Step 1 and 2. Using --step 2 --out file By default, results are written in separate files for each phenotype file_<phenotype1_name>.regenie,...,file_<phenotypeP_name>.regenie . Each file has one line per SNP along with a header line. If option --gz was used, the files will be compressed in gzip format and have extension .regenie.gz . The entries of each row specify chromosome, position, ID, reference allele (allele 0), alternative allele (allele 1), frequency of the alternative allele, sample size and the test performed (additive/dominant/recessive). With BGEN/PGEN files with dosages, the imputation INFO score is provided (IMPUTE info score for BGEN and Mach Rsq for PGEN). Allele frequency, sample size and INFO score, if applicable, are computed using only non-missing samples for each phenotype. These are followed by the estimated effect sizes (for allele 1 on the original scale), standard errors, chi-square test statistics and -\\\\log_{10} p-value. An additional column is included to specify if Firth/SPA corrections failed. With option --no-split , the summary statistics for all traits are written to a single file file.regenie , with the same format as above. Additionaly, an accompanying file with the trait names corresponding to Y1,Y2,... will be generated in \\u2018file.regenie.Ydict\\u2019. Note that allele frequency, sample size and INFO score are computed using all analyzed samples. With option --htp , the summary statistics file will follow the HTP format. If option --write-samples was used, IDs of samples used for each trait will be written in files file_<phenotype1_name>.regenie.ids,...,file_<phenotypeP_name>.regenie.ids (tab separated, no header). When using --par-region , the default boundaries used for the chrX PAR regions are: b36/hg18: 2709520 and 154584238 b37/hg19: 2699520 and 154931044 b38/hg38: 2781479 and 155701383 Gene-based testing Starting from version 3.0, Step 2 of regenie provides a complimentary set of gene-based test in addition to the burden testing functionality introduced in version 2.0. More specifically, for a given set of variants (eg within a gene) which can be defined using functional annotations, regenie can apply various set-based tests on the variants as well as collapse them into a single combined 'mask' genotype that can be tested for association just like a single variant. Input Option Argument Type Description --anno-file FILE Required File with variant annotations for each set --set-list FILE Required File listing variant sets --extract-sets FILE Optional Inclusion file that lists IDs of variant sets to keep --exclude-sets FILE Optional Exclusion file that lists IDs of variant sets to remove --extract-setlist STRING Optional Comma-separated list of variant sets to keep --exclude-setlist STRING Optional Comma-separated list of variant sets to remove --aaf-file FILE Optional File with variant AAF to use when building masks (instead of AAF estimated from sample) --mask-def FILE Required File with mask definitions using the annotations defined in --anno-file Note: multiple files can be specified for --extract-sets/--exclude-sets by using a comma-separated list. Annotation input files The following files are used to define variant sets and functional annotations which will be used to generate masks. Annotation file 1:55039839:T:C PCSK9 LoF 1:55039842:G:A PCSK9 missense . This file defines functional annotations for variants. It is designed to accommodate for variants with separate annotations for different sets/genes. Each line contains the variant name, the set/gene name and a single annotation category (space/tab separated). Variants not in this file will be assigned to a default \\\"NULL\\\" category. A maximum of 63 annotation categories (+NULL category) is allowed. For gene sets, tools you can use to obtain variant annotations per transcripts are snpEFF or VEP . To obtain a single annotation per gene, you could choose the most deleterious functional annotation across the gene transcripts or alternatively use the canonical transcript (note that its definition can vary across software). We have implemented an extended 4-column format of the annotation file which also categorizes sets into domains (e.g. for gene sets, these would correspond to gene domains). 1:55039839:T:C PCSK9 Prodomain LoF 1:55039842:G:A PCSK9 Prodomain missense . Masks will be generated for each domain (maximum of 8 per set/gene) in addition to a mask combining across all domains. Variants can only be assigned to a single domain for each set/gene. Starting with v4.1, you can also specify custom variant weights which will be used in the burden, SKAT/SKAT-O and ACAT-V tests ($w_i$'s in the gene-based testing overview ). Multiple weights can be included in the annotation file after the 3rd column, e.g. 1:55039839:T:C PCSK9 LoF 0.9 0.812 1 1:55039842:G:A PCSK9 missense 0.4 0.23 0.55 . Using --weights-col 4 will use weights in the 4-th column for the gene-based tests. Set list file This file lists variants within each set/gene to use when building masks. Each line contains the set/gene name followed by a chromosome and physical position for the set/gene, then by a comma-separated list of variants included in the set/gene. A1BG 19 58346922 19:58346922:C:A,19:58346924:G:A,... A1CF 10 50806630 10:50806630:A:G,10:50806630:A:AT,... . Set inclusion/exclusion file format The file must have a single column of set/gene names corresponding to those in the set list file. PIGP ZBTB38 . AAF file (optional) Both functional annotations and alternative allele frequency (AAF) cutoffs are used when building masks (e.g. only considering LoF sites where AAF is below 1%). By default, the AAF for each variant is computed from the sample but alternatively, the user can specify variant AAFs using this file. Each line contains the variant name followed by its AAF (it should be for the ALT allele used in the genetic data input). AAF must be a numerical value (i.e. it cannot be '.'). 7:6187101:C:T 1.53918207864341e-05 7:6190395:C:A 2.19920388819247e-06 . Since singleton variants cannot be identified from this file, they are determined by default based on the input genetic data. To enforce which sites should be included in the singleton masks (see --set-singletons ), you can add a third column in the file with a binary indicator (1=singleton; 0=not singleton). So only variants which are specified as singletons will be considered for the singleton masks, regardless of whether they are singletons in the input genetic data. Note that with this flag, singleton sites will be included in all masks (regardless of the AAF in file). 7:6187101:C:T 1.53918207864341e-05 0 7:6190395:C:A 2.19920388819247e-06 1 . Mask definitions Mask file This file specifies which annotation categories should be combined into masks. Each line contains a mask name followed by a comma-seperated list of categories included in the mask (i.e. union is taken over categories). For example below, Mask1 uses only LoF variants and Mask2 uses LoF and missense annotated variants. Mask1 LoF Mask2 LoF,missense . AAF cutoffs Option --aaf-bins specifies the AAF upper bounds used to generate burden masks ( AAF and not MAF [minor allele frequency] is used when deciding which variants go into a mask) . By default, a mask based on singleton sites are always included. For example, --aaf-bins 0.01,0.05 will generate 3 burden masks for AAFs in [0,0.01], [0,0.05] and singletons. SKAT/ACAT tests The option --vc-tests is used to specify the gene-based tests to run. By default, these tests use all variants in each mask category. If you'd like to only include variants whose AAF is below a given threshold ,e.g. only including rare variants, you can use --vc-maxAAF . Test Name in regenie Description SKAT skat Variance component test SKATO skato Omnibus test combining features of SKAT and Burden SKATO-ACAT skato-acat Same as SKATO but using Cauchy combination method to maximize power across SKATO models ACATV acatv Test using Cauchy combination method to combine single-variant p-values ACATO acato Omnibus test combining features of ACATV, SKAT and Burden ACATO-FULL acato-full Same as ACATO but using the larger set of SKATO models used in the SKATO test For example, --vc-tests skato,acato-full will run SKATO and ACATO (both using the default grid of 8 rho values for the SKATO models) and the p-values for SKAT, SKATO, ACATV and ACATO will be output. Ultra-rare variants (defined by default as MAC$\\\\le$10, see --vc-MACthr ) are collapsed into a burden mask which is then included in the tests instead of the individual variants. For additional details on the tests, see here . Joint test for burden masks The following tests can be used to combine different burden masks generated using different annotation classes as well as AAF thresholds. Test Name in regenie QT BT Robust to LD Assumes same effect direction Minimum P-value minp $\\\\checkmark$ $\\\\checkmark$ $\\\\times$ $\\\\times$ ACAT acat $\\\\checkmark$ $\\\\checkmark$ $\\\\checkmark$ $\\\\times$ SBAT sbat $\\\\checkmark$ $\\\\times$ $\\\\checkmark$ $\\\\checkmark$ The ACAT test combines the p-values of the individual burden masks using the Cauchy combination method (see ref. 14 here ). The SBAT test is described into more detail here . If you only want to output the results for the joint tests (ignore the marginal tests), use --joint-only . LOVO/LODO schemes The leave-one-variant-out (LOVO) scheme takes all sites going into a mask, and builds LOVO masks by leaving out one variant at a time from the full set of sites. The mask including all sites will also be computed. The argument for --mask-lovo is a comma-separated list which consists of the set/gene name, the mask name, and the AAF cutoff (either 'singleton' or a double in (0,1)). If using a 4-column annotation file, then --mask-lovo should have the gene name, the domain name, the mask name, and the AAF cutoff. So the LOVO masks will be generated for a specific gene domain. The leave-one-domain-out (LODO) scheme (specified by --mask-lodo ) takes all sites going into a mask and builds a LODO mask for each domain specified for the gene by excluding all variants in the domain. The full mask including all sites will also be computed. The argument for --mask-lodo should have the gene name, the mask name and the AAF cutoff. Writing mask files Burden masks built in regenie can be written to PLINK bed format. If the input genetic data contains dosages, the masks dosages will be converted to hard-calls prior to being written to file and these hard-calls will be used for the association testing. The PLINK bed file is written using 'ref-last' encoding (i.e. REF allele is listed last in the bim file). Note that this cannot be used with the LOVO/LODO schemes. Options Option Argument Type Description --aaf-bins FLOAT,...,FLOAT Optional comma-separated list of AAF upper bounds to use when building masks [default is a single cutoff of 1%] --build-mask STRING Optional build masks using the maximum number of ALT alleles across sites ( 'max' ; the default), or the sum of ALT alleles ( 'sum' ), or thresholding the sum to 2 ( 'comphet' ) --singleton-carrier FLAG Optional to define singletons as variants with a single carrier in the sample (rather than alternative allele count=1) --set-singletons FLAG Optional to use 3rd column in AAF file to specify variants included in singleton masks --write-mask FLAG Optional write mask to PLINK bed format (does not work when building masks with 'sum') --vc-tests STRING Optional comma-separated list of SKAT/ACAT-type tests to run --vc-maxAAF FLOAT Optional AAF upper bound to use for SKAT/ACAT-type tests [default is 100%] --skat-params FLOAT,FLAT Optional a1,a2 values for the single variant weights computed from Beta(MAF,a1,a2) used in SKAT/ACAT-type tests [default is (1,25)] --skato-rho FLOAT,...,FLOAT Optional comma-separated list of $\\\\rho$ values used for SKATO models --vc-MACthr FLOAT Optional MAC threshold below which to collapse variants in SKAT/ACAT-type tests [default is 10] --joint STRING Optional comma-separated list of joint tests to apply on the generated burden masks --rgc-gene-p FLAG Optional to compute the GENE_P test --skip-test FLAG Optional to skip computing association tests after building masks and writing them to file --mask-lovo STRING Optional to perform LOVO scheme --lovo-snplist FILE Optional File with list of variants for which to compute LOVO masks --mask-lodo FLAG Optional to perform LODO scheme --weights-col INT Optional column index (1-based) in annotation file to use custom weights in gene-based tests --write-mask-snplist FLAG Optional to write list of variants that went into each mask to file --check-burden-files FLAG Optional to check the concordance between annotation, set list and mask files [see below ] --strict-check-burden FLAG Optional to exit early if the annotation, set list and mask definition files dont agree [see below ] Three rules can be used to build masks with --build-mask as shown in diagram below, where the last rule comphet applies a threshold of 2 to the mask from the sum rule. Output With --out file Results are written in separate files for each phenotype file_<phenotype1_name>.regenie,...,file_<phenotypeP_name>.regenie with the same output format mentioned above . Additionally, a header line is included (starting with ## ) which contains mask definition information. Masks will have name <set_name>.<mask_name>.<AAF_cutoff> with the chromosome and physical position having been defined in the set list file, and the reference allele being ref , and the alternate allele corresponding to <mask_name>.<AAF_cutoff> . When using --mask-lovo , the mask name will be the same as above but have suffix _<variant_name> to specify the variant which was excluded when building the mask. With --build-mask sum , the reported mask AAF corresponds to the average AAF across sites included in the mask. If using --write-mask , the masks will be saved to file_masks.{bed,bim,fam} and if using --write-mask-snplist , the list of variants included in each mask will be saved to file_masks.snplist . When using --rgc-gene-p , it will apply the single p-value per gene GENE_P strategy using all masks (see here for details). Example run Using Step 1 results from the Step 1 command above , we use the following command to build and test masks in Step 2 ./regenie \\\\ --step 2 \\\\ --bed example/example_3chr \\\\ --covarFile example/covariates.txt \\\\ --phenoFile example/phenotype_bin.txt \\\\ --bt \\\\ --remove example/fid_iid_to_remove.txt \\\\ --firth --approx \\\\ --pred fit_bin_out_pred.list \\\\ --anno-file example/example_3chr.annotations \\\\ --set-list example/example_3chr.setlist \\\\ --mask-def example/example_3chr.masks \\\\ --aaf-bins 0.1,0.05 \\\\ --write-mask \\\\ --bsize 200 \\\\ --out test_bin_out_firth For each set, this will produce masks using 3 AAF cutoffs (singletons, 5% and 10% AAF). The masks are written to PLINK bed file (in test_bin_out_firth_masks.{bed,bim,fam} ) and tested for association with each binary trait using Firth approximate test (summary stats in test_bin_out_firth_<phenotype_name>.regenie ). Note that the test uses the whole genome regression LOCO PRS from Step 1 of regenie (specified by --pred ). Checking input files To assess the concordance between the input files for building masks, you can use --check-burden-files which will generate a report in file_masks_report.txt containing: for each set, the list the variants in the set-list file which are unrecognized (not genotyped or not present in annotation file for the set) for each mask, the list of annotations in the mask definition file which are not in the annotation file Additionally, you can use --strict-check-burden to enforce full agreement between the three files (if not, program will terminate) : all genotyped variants in the set list file must be in the annotation file (for the corresponding set) all annotations in the mask definition file must be present in the annotation file Interaction testing Starting from regenie v3.0, you can perform scans for interactions (either GxE or GxG). For GxE tests, the interacting variable should be part of the covariate file (if it is categorical, specify it in --catCovarList ). For GxG tests, the interacting variant can be part of the input genetic file or it can be present in an external file (see --interaction-snp-file ) Options Option Argument Type Description --interaction STRING Optional to run GxE test specifying the interacting covariate (see below) --interaction-snp STRING Optional to run GxG test specifying the interacting variant (see below) --interaction-file FORMAT,FILE Optional external genotype file containing the interacting variant [FORMAT can be bed/bgen/pgen and FILE is the file name (bgen) or file prefix (bed/pgen)] --interaction-file-sample FILE Optional accompagnying sample file for BGEN format --interaction-file-reffirst FLAG Optional use the first allele as the reference for BGEN or PLINK BED formats --no-condtl FLAG Optional to print out all the main effects from the interaction model (see Output section below) --force-condtl FLAG Optional to include the interacting SNP as a covariate in the marginal test (see Output section below) --rare-mac FLOAT Optional minor allele count (MAC) threshold below which to use HLM method for QTs [default is 1000] For GxE tests where the interacting variable is categorical, you can specify the baseline level using --interaction VARNAME[BASE_LEVEL] (e.g. --interaction BMI[<25] ). Otherwise, the first value found in the covariate file will be used as the baseline level. For GxG tests, the default coding for the interacting variant is additive. If you would like to use dominant/recessive/categorical coding, use --interaction-snp SNP_NAME[dom/rec/cat] (for example with dominant coding, --interaction-snp SNPNAME[dom] will allow for separate effects between carriers vs non-carriers of the interacting variant). The allowed values in the brackets are add/dom/rec/cat . Output The result files will contain multiple lines for the same variant corresponding to the different null hypotheses being tested in the interaction model g(\\\\mu) = E\\\\alpha + G\\\\beta + (G\\\\odot E)\\\\gamma The suffix in the \\\"TEST\\\" column indicates which hypothesis is being tested: \\\"ADD\\\": marginal test where the interacting variable has not been added as a covariate $-$ this corresponds to $H_0: \\\\beta = 0$ given $\\\\alpha=\\\\gamma = 0$ this is only printed for GxG tests by default, or GxE using --no-condtl \\\"ADD-CONDTL\\\": marginal test where the interacting variable has been added as a covariate (default for GxE tests) $-$ this corresponds to $H_0: \\\\beta = 0$ given $\\\\gamma = 0$ this is only printed for GxE tests by default, or GxG using --force-condtl \\\"ADD-INT_VAR\\\": test for the main effect of the interaction variable (\\\"VAR\\\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\\\alpha = 0$ this is only printed for GxG tests by default, or GxE using --no-condtl If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \\\"ADD-INT_BMI=25-30\\\" and \\\"ADD-INT_BMI=30+\\\" where baseline level is \\\"$<$25\\\") will also output the effect of $E^2$ in \\\"ADD-INT_VAR^2\\\" if the trait is binary (see here ) \\\"ADD-INT_SNP\\\": test for main effect of tested SNP in the interaction model $-$ this corresponds to $H_0: \\\\beta = 0$ \\\"ADD-INT_SNPxVAR\\\": test for interaction effect (\\\"VAR\\\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\\\gamma = 0$ If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \\\"ADD-INT_SNPxBMI=25-30\\\" and \\\"ADD-INT_SNPxBMI=30+\\\" where baseline level is \\\"$<$25\\\") With Firth correction, only the effect sizes for the interaction effect at each level will be reported and the LRT p-value will only be computed for the joint test on the interaction effects \\\"ADD-INT_$k$DF\\\": joint test for main and interaction effect of tested variant ($k\\\\ge2$ for categorical interacting variables) $-$ this corresponds to $H_0: \\\\beta = \\\\gamma = 0$ Conditional analyses Starting from regenie v3.0, you can specify genetic variants to add to the set of covariates when performing association testing. This works in both step 1 and 2, and can be used in conjunction with the gene-based tests or the interactiong testing feature. The conditioning variants will automatically be ignored from the analysis. Option Argument Type Description --condition-list FILE Required file with list of variants to condition on --condition-file FORMAT,FILE Optional get conditioning variants from external file (same argument format as --interaction-file ) --condition-file-sample FILE Optional accompagnying sample file for BGEN format --max-condition-vars INT Optional maximum number of conditioning variants [default is 10,000] Survival analyses Starting from regenie v4.0, you can conduct survival analysis for time-to-event data. Phenotype file format In this small example, there are 5 samples, and the event of interest is the diagnosis of cancer over a period of 10 years. Sample 1 is diagnosed with cancer during the study; the time variable is the number of years until the sample is diagnosed with cancer. Sample 2 drops out of the study; sample 3 dies during the study; sample 4 and 5 complete the study without being diagnosed with cancer; they are all right-censored, and the time variable is the last encounter or death time. The corresponding phenotype file is FID IID Time Cancer 1 1 6 1 2 2 5 0 3 3 2 0 4 4 10 0 5 5 10 0 Required options Survival analysis in regenie requires the following specific options in step 1, step 2 and gene-based burden tests. Option Argument Type Description --t2e FLAG Required specify the traits are time-to-event data --phenoColList STRING Required Comma separated list of time names to include in the analysis --eventColList STRING Required Comma separated list of columns in the phenotype file to include in the analysis that contain the events. These event columns should have 0=no event,1=event,NA=missing For the example above, the regenie call is ./regenie \\\\ --t2e \\\\ --phenoColList Time \\\\ --eventColList Cancer \\\\ ... For a phenotype file containing multiple time-to-event traits, the order of censor variables listed in --eventColList should match the order of time names specified in --phenoColList . For example, the phenotype file is FID IID Cancer_Time Cancer Asthma_Time Asthma 1 1 6 1 4 0 2 2 5 0 8 1 The regenie call is ./regenie \\\\ --t2e \\\\ --phenoColList Cancer_Time,Asthma_Time \\\\ --eventColList Cancer,Asthma \\\\ ... The output format is the same as the output file for quantitative and binary traits , with the BETA column containing the estimated harzard ratio (on log scale). LD computation REGENIE can calculate LD between a group of variants on the same chromosome. Option Argument Type Description --compute-corr FLAG Required compute LD matrix and write to binary file --output-corr-text FLAG Optional write Pearson correlations to text file --forcein-vars FLAG Optional retain all variants specified in --extract which absent from the genetic data in the LD matrix --ld-extract FILE Optional file listing single variants as well as burden masks to include in LD matrix (see below) Note that this can be quite memory intensive for large groups of variants (memory ~$8M^2$ bytes for $M$ variants). Output Using --step 2 --out file By default, the LD matrix is stored in a binary compressed file file.corr and the list of variants corresponding to the columns of the LD matrix are stored in file.corr.snplist . The R script scripts/parseLD.r contains a function which returns the LD matrix, e.g. get.corr.sq.matrix(\\\"file.corr\\\") . Using --output-corr-text will write the Pearson correlations to a text file instead. When using --forcein-vars , variants not present in the genetic data will be added as extra column/rows in the LD matrix. For these variants, the diagonal entries in the matrix will be set to 1 and the off-diagonal entries 0. Using --ld-extract info.txt This option is used compute LD between single variants and burden masks generated on-the-fly in REGENIE; it requires specifying annotation files . The file info.txt should have three columns: variant type ('sv' or 'mask'), variant name, followed by the set (e.g. gene) name (this can be 'NA' for single variant). For example, it would look like: sv 1:1111:A:G NA sv 1:2222:C:T NA mask PCSK9.M1.0.01 PCSK9 . Note that the set and mask names must match that used in REGENIE based on provided annotation files and allele frequency cutoffs. Variant/masks not present in the data will be kept in the LD matrix but will have the corresponding correlations set to 0.\",\"title\":\"Documentation\"},{\"location\":\"options/#getting-started\",\"text\":\"To run regenie , use the command ./regenie on the command line, followed by options and flags as needed. To get a full list of options use ./regenie --help The directory examples/ contains some small example files that are useful when getting started. A test run on a set of binary traits can be achieved by the following 2 commands. In Step 1 , the whole genome regression model is fit to the traits, and a set of genomic predictions are produced as output ./regenie \\\\ --step 1 \\\\ --bed example/example \\\\ --exclude example/snplist_rm.txt \\\\ --covarFile example/covariates.txt \\\\ --phenoFile example/phenotype_bin.txt \\\\ --remove example/fid_iid_to_remove.txt \\\\ --bsize 100 \\\\ --bt --lowmem \\\\ --lowmem-prefix tmp_rg \\\\ --out fit_bin_out In Step 2 , a set of imputed SNPs are tested for association using a Firth logistic regression model ./regenie \\\\ --step 2 \\\\ --bgen example/example.bgen \\\\ --covarFile example/covariates.txt \\\\ --phenoFile example/phenotype_bin.txt \\\\ --remove example/fid_iid_to_remove.txt \\\\ --bsize 200 \\\\ --bt \\\\ --firth --approx \\\\ --pThresh 0.01 \\\\ --pred fit_bin_out_pred.list \\\\ --out test_bin_out_firth One of the output files from these two commands is included in example/test_bin_out_firth_Y1.regenie .\",\"title\":\"Getting started\"},{\"location\":\"options/#basic-options\",\"text\":\"\",\"title\":\"Basic options\"},{\"location\":\"options/#input\",\"text\":\"Option Argument Type Description --bgen, --bed, --pgen FILE Required Input genetic data file. Either BGEN file eg. file.bgen , or bed/bim/fam prefix that assumes file.bed , file.bim , file.fam exist, or pgen/pvar/psam prefix that assumes file.pgen , file.pvar , file.psam exist --sample FILE Optional Sample file corresponding to input BGEN file --bgi FILE Optional Index bgi file corresponding to input BGEN file --ref-first FLAG Optional Specify to use the first allele as the reference allele for BGEN or PLINK bed/bim/fam file input [default is to use the last allele as the reference] --keep FILE Optional Inclusion file that lists individuals to retain in the analysis --remove FILE Optional Exclusion file that lists individuals to remove from the analysis --extract FILE Optional Inclusion file that lists IDs of variants to keep --exclude FILE Optional Exclusion file that lists IDs of variants to remove --extract-or FILE Optional Inclusion file that lists IDs of variants to keep regardless of minimum MAC filter --exclude-or FILE Optional Exclusion file that lists IDs of variants to remove unless MAC is above threshold --phenoFile FILE Required Phenotypes file --phenoCol STRING Optional Use for each phenotype you want to include in the analysis --phenoColList STRING Optional Comma separated list of phenotypes to include in the analysis --eventColList STRING Optional Comma separated list of columns in the phenotype file to include in the analysis that contain the event times --phenoExcludeList STRING Optional Comma separated list of phenotypes to ignore from the analysis --covarFile FILE Optional Covariates file --covarCol STRING Optional Use for each covariate you want to include in the analysis --covarColList STRING Optional Comma separated list of covariates to include in the analysis --catCovarList STRING Optional Comma separated list of categorical covariates to include in the analysis --covarExcludeList STRING Optional Comma separated list of covariates to ignore --pred FILE Optional File containing predictions from Step 1 (see Overview). This is required for --step 2 --tpheno-file STRING Optional to use a phenotype file in transposed format (e.g. BED format) --tpheno-indexCol INT Optional index of phenotype name column in transposed phenotype file --tpheno-ignoreCols INT Optional indexes of columns to ignore in transposed phenotype file --iid-only FLAG Optional to specify if header in transposed phenotype file only contains sample IID (assume FID=IID) Note: Parameter expansion can be used when specifying phenotypes/covariates (e.g. --covarCol PC{1:10} ). Also, multiple files can be specified for --extract/--exclude/--keep/--remove by using a comma-separated list.\",\"title\":\"Input\"},{\"location\":\"options/#genetic-data-file-format\",\"text\":\"regenie can read BGEN files, bed/bim/fam files or pgen/psam/pvar files in Step 1 and Step 2. The BGEN file format is described here . The bed/bim/fam file format is described here . The pgen/pvar/psam file format is described here . Tools useful for genetic data file format conversion are : PLINK , QCTOOL , BCFTOOLS . Step 2 of regenie can be sped up by using BGEN files using v1.2 format with 8 bits encoding (genotype file can be generated with PLINK2 using option --export bgen-1.2 'bits=8' ) as well as having an accompanying .bgi index file (a useful tool to create such file is bgenix which is part of the BGEN library). To include X chromosome genotypes in step 1 and/or step 2, males should be coded as diploid so that their genotypes are 0/2 (this is done automatically for BED and PGEN file formats with haploid genotypes). Chromosome values of 23 (for human analyses), X, Y, XY, PAR1 and PAR2 are all acceptable and will be collapsed into a single chromosome.\",\"title\":\"Genetic data file format\"},{\"location\":\"options/#sample-inclusionexclusion-file-format\",\"text\":\"2 2 7 7 . No header. Each line starts with individual FID IID. Space/tab separated. Samples listed in the file that are not in bgen/bed/pgen file are ignored.\",\"title\":\"Sample inclusion/exclusion file format\"},{\"location\":\"options/#variant-inclusionexclusion-file-format\",\"text\":\"20 31 . No header. Each line must start with variant ID (if there are additional columns, file must be space/tab separated). Variants listed in this file that are not in bgen/bed/pgen file are ignored.\",\"title\":\"Variant inclusion/exclusion file format\"},{\"location\":\"options/#covariate-file-format\",\"text\":\"FID IID V1 V2 V3 1 1 1.46837294454993 1.93779743016325 0.152887004505393 2 2 -1.2234390803815 -1.63408619199948 -0.190201446835255 3 3 0.0711531925667286 0.0863906292357564 0.14254739715665 . Line 1 : Header with FID, IID and C covariate names. Followed by lines of C+2 values. Space/tab separated. Each line contains individual FID and IID followed by C covariate values. Samples listed in this file that are not in bgen/bed/pgen file are ignored. Genotyped samples that are not in this file are removed from the analysis as well as samples with missing values at any of the covariates included. If --step 2 is specified, then the covariate file should be the same as that used in Step 1.\",\"title\":\"Covariate file format\"},{\"location\":\"options/#phenotype-file-format\",\"text\":\"FID IID Y1 Y2 1 1 1.64818554321186 2.2765234736685 2 2 -2.67352013711554 -1.53680421614647 3 3 0.217542851471485 0.437289912695016 . Line 1 : Header with FID, IID and P phenotypes names. Followed by lines of P+2 values. Space/tab separated. Each line contains individual FID and IID followed by P phenotype values (for binary traits, must be coded as 0=control, 1=case, NA=missing unless using --1 ). Samples listed in this file that are not in bgen/bed/pgen file are ignored. Genotyped samples that are not in this file are removed from the analysis. Missing values must be coded as NA. With QTs, missing values are mean-imputed in Step 1 and they are dropped when testing each phenotype in Step 2 (unless using --force-impute ). With BTs, missing values are mean-imputed in Step 1 when fitting the level 0 linear ridge regression and they are dropped when fitting the level 1 logistic ridge regression for each trait . In Step 2, missing values are dropped when testing each trait. To remove all samples that have missing values at any of the P phenotypes, use option --strict in Step 1 and 2. If using the transposed phenotype file format with option --tpheno-file , the header line must contain subject IDs as \\\"FID_IID\\\", otherwise use option --iid-only and only include IIDs (so will assume FID=IID).\",\"title\":\"Phenotype file format\"},{\"location\":\"options/#predictions-file-format\",\"text\":\"Running --step 1 --out foo will produce A set of files containing genomic predictions for each phenotype from Step 1 (see Output section below). A file called foo_pred.list listing the locations of the prediction files. The file list is needed as an input file when using --step 2 via the --pred option. It has one line per phenotype (in any order) that specifies the name of the phenotype and its corresponding prediction file name. Each phenotype must have exactly one prediction file and phenotype names must match with those in the phenotype file. Phenotypes in this file not included in the analysis are ignored. Each prediction file contains the genetic predictions for the phenotype (space separated). Line 1 starts with 'FID_IID' followed by $N$ sample identifiers. It is followed by 23 lines containing the genetic predictions for each chromosome (sex chromosomes are collapsed into chromosome 23). More specifically, each line has $N+1$ values which are the chromosome number followed by the $N$ leave-one chromosome out (LOCO) predictions for each individual. Samples in this file not in the bed/pgen/bgen input file are ignored. Genotyped samples not present in this file will be ignored in the analysis of the corresponding trait. Samples with missing LOCO predictions must have their corresponding phenotype value set to missing.\",\"title\":\"Predictions file format\"},{\"location\":\"options/#options\",\"text\":\"Option Argument Type Description --step INT Required specify step for the regenie run (see Overview) [argument can be 1 or 2 ] --qt FLAG Optional specify that traits are quantitative (this is the default so can be ommitted) --bt FLAG Optional specify that traits are binary with 0=control,1=case,NA=missing --t2e FLAG Optional specify that traits are time-to-event data with 0=censoring,1=event,NA=missing in event column -1,--cc12 FLAG Optional specify to use 1/2/NA encoding for binary traits (1=control,2=case,NA=missing) --bsize INT Required size of the genotype blocks --cv INT Optional number of cross validation (CV) folds [default is 5] --loocv FLAG Optional flag to use leave-one out cross validation --lowmem FLAG Optional flag to reduce memory usage by writing level 0 predictions to disk (details below). This is very useful if the number of traits is large (e.g. greater than 10) --lowmem-prefix FILE PREFIX Optional prefix where to temporarily write the level 0 predictions --split-l0 PREFIX,N Optional split level 0 across N jobs and set prefix of output files of level 0 predictions --run-l0 FILE,K Optional run level 0 for job K in {1..N} specifying the master file created from '--split-l0' --run-l1 FILE Optional run level 1 specifying the master file from '--split-l0' --l1-phenoList STRING Optional to specify a subset of phenotypes to analyze when using --run-l1 --keep-l0 FLAG Optional avoid deleting the level 0 predictions written on disk after fitting the level 1 models --print-prs FLAG Optional flag to print whole genome predictions (i.e. PRS) without using LOCO scheme --force-step1 FLAG Optional flag to run step 1 when >1M variants are used (not recommened) --minCaseCount INT Optional flag to ignore BTs with low case counts [default is 10] --apply-rint FLAG Optional to apply Rank Inverse Normal Transformation (RINT) to quantitative phenotypes (use in both Step 1 & 2) --nb INT Optional number of blocks (determined from block size if not provided) --strict FLAG Optional flag to removing samples with missing data at any of the phenotypes --ignore-pred FLAG Optional skip reading the file specified by --pred (corresponds to simple linear/logistic regression) --htp STRING Optional to output the summary statistics file in the HTP format (string should correspond to cohort name, e.g. 'UKB_450_EUR') --exact-p FLAG Optional avoid capping p-values at 2.2E-307 in the HTP format summary statistics output --use-relative-path FLAG Optional to use relative paths instead of absolute ones for the step 1 output pred.list file --use-prs FLAG Optional flag to use whole genome PRS in --pred (this is output in step 1 when using --print-prs ) --gz FLAG Optional flag to output files in compressed gzip format (LOCO prediction files in step 1 and association results files in step 2) [this only works when compiling with Boost Iostream library (see Install tab)] . --force-impute FLAG Optional flag to keep and impute missing observations for QTs in step 2 --write-samples FLAG Optional flag to write sample IDs for those kept in the analysis for each trait in step 2 --print-pheno FLAG Optional flag to write phenotype name in the first line of the sample ID files when using --write-samples --firth FLAG Optional specify to use Firth likelihood ratio test (LRT) as fallback for p-values less than threshold --approx FLAG Optional flag to use approximate Firth LRT for computational speedup (only works when option --firth is used) --firth-se FLAG Optional flag to compute SE based on effect size and LRT p-value when using Firth correction (instead of based on Hessian of unpenalized log-likelihood) --write-null-firth FLAG Optional to write the null estimates for approximate Firth [can be used in step 1 or 2] --compute-all FLAG Optional to write the null Firth estimates for all chromosomes (regardless of the genotype file) --use-null-firth FILE Optional to use stored null estimates for approximate Firth in step 2 --spa FLAG Optional specify to use Saddlepoint approximation as fallback for p-values less than threshold --pThresh FLOAT Optional P-value threshold below which to apply Firth/SPA correction [default is 0.05] --test STRING Optional specify to carry out dominant or recessive test [default is additive; argument can be dominant or recessive ] --chr INT Optional specify which chromosomes to test in step 2 (use for each chromosome to include) --chrList STRING Optional Comma separated list of chromosomes to test in step 2 --range STRING Optional specify chromosome region for variants to test in step 2 [format=CHR:MINPOS-MAXPOS] --minMAC FLOAT Optional flag to specify the minimum minor allele count (MAC) when testing variants [default is 5]. Variants with lower MAC are ignored. --minINFO FLOAT Optional flag to specify the minimum imputation info score (IMPUTE/MACH R^2) when testing variants. Variants with lower info score are ignored. --sex-specific STRING Optional to perform sex-specific analyses [either 'male'/'female'] --af-cc FLAG Optional to output A1FREQ in case/controls separately in the step 2 result file --no-split FLAG Optional flag to have summary statistics for all traits output in the same file --starting-block INT Optional to start step 2 at a specific block/set number (useful if program crashes during a job) --nauto INT Optional number of autosomal chromosomes (for non-human studies) [default is 22] --maxCatLevels INT Optional maximum number of levels for categorical covariates (for non-human studies) [default is 10] --niter INT Optional maximum number of iterations for logistic regression [default is 30] --maxstep-null INT Optional maximum step size for logistic model with Firth penalty under the null [default is 25] --maxiter-null INT Optional maximum number of iterations for logistic model with Firth penalty under the null [default is 1000] --par-region STRING Optional specify build code to determine bounds for PAR1/PAR2 regions (can be 'b36/b37/b38/hg18/hg19/hg38' or 'start,end' bp bounds of non-PAR region) [default is hg38] --force-qt FLAG Optional force QT run for binary traits --threads INT Optional number of computational threads to use [default=all-1] --debug FLAG Optional debug flag (for use by developers) --verbose FLAG Optional verbose screen output --version FLAG Optional print version number and exit --help FLAG Optional Prints usage and options list to screen When step 1 of regenie is run in low memory mode (i.e. using --lowmem ), temporary files are created on disk (using --lowmem-prefix tmp_prefix determines where the files are written [as in tmp_prefix_l0_Y1 ,..., tmp_prefix_l0_YP for P phenotypes]). If the prefix is not specified, the default is to use the prefix specified by --out (see below). These are automatically deleted at the end of the program (unless the run was not successful in which case the user would need to delete the files) See the Wiki page for more details on how to run the level 0 models for Step 1 of regenie in parallel.\",\"title\":\"Options\"},{\"location\":\"options/#output\",\"text\":\"Option Argument Type Description --out FILE PREFIX Required Output files that depends on --step A log file file.log of the output is generated. Using --step 1 --out file For the P phenotypes, files file_1.loco ,..., file_P.loco are output with the per-chromosome LOCO predictions as rows of the files (following the order of the phenotypes in the phenotype file header). If option --gz was used, the files will be compressed in gzip format and have extension .loco.gz . Genotyped individuals specified using option --remove are excluded from this file. Individuals with missing phenotype values kept in the analysis are included in the file and have their predictions set to missing. The list of blup files needed for step 2 (association testing) is written to file_pred.list . If using --print-prs , files file_1.prs ,..., file_P.prs will be written with the whole genome predictions (i.e. PRS) without using LOCO scheme (similar format as the .loco files). The list of these files is written to file_prs.list and can be used in step 2 with --pred and specifying flag --use-prs . Note that as these are not obtained using a LOCO scheme, association tests could suffer from proximal contamination. If using option --write-null-firth , the estimates for approximate Firth under the null will be written to files file_1.firth,...,file_P.firth and the list of these files is written to file_firth.list . This can be used in step 2 as --use-null-firth file_firth.list . Note that it assumes the same set of covariates are used in Step 1 and 2. Using --step 2 --out file By default, results are written in separate files for each phenotype file_<phenotype1_name>.regenie,...,file_<phenotypeP_name>.regenie . Each file has one line per SNP along with a header line. If option --gz was used, the files will be compressed in gzip format and have extension .regenie.gz . The entries of each row specify chromosome, position, ID, reference allele (allele 0), alternative allele (allele 1), frequency of the alternative allele, sample size and the test performed (additive/dominant/recessive). With BGEN/PGEN files with dosages, the imputation INFO score is provided (IMPUTE info score for BGEN and Mach Rsq for PGEN). Allele frequency, sample size and INFO score, if applicable, are computed using only non-missing samples for each phenotype. These are followed by the estimated effect sizes (for allele 1 on the original scale), standard errors, chi-square test statistics and -\\\\log_{10} p-value. An additional column is included to specify if Firth/SPA corrections failed. With option --no-split , the summary statistics for all traits are written to a single file file.regenie , with the same format as above. Additionaly, an accompanying file with the trait names corresponding to Y1,Y2,... will be generated in \\u2018file.regenie.Ydict\\u2019. Note that allele frequency, sample size and INFO score are computed using all analyzed samples. With option --htp , the summary statistics file will follow the HTP format. If option --write-samples was used, IDs of samples used for each trait will be written in files file_<phenotype1_name>.regenie.ids,...,file_<phenotypeP_name>.regenie.ids (tab separated, no header). When using --par-region , the default boundaries used for the chrX PAR regions are: b36/hg18: 2709520 and 154584238 b37/hg19: 2699520 and 154931044 b38/hg38: 2781479 and 155701383\",\"title\":\"Output\"},{\"location\":\"options/#gene-based-testing\",\"text\":\"Starting from version 3.0, Step 2 of regenie provides a complimentary set of gene-based test in addition to the burden testing functionality introduced in version 2.0. More specifically, for a given set of variants (eg within a gene) which can be defined using functional annotations, regenie can apply various set-based tests on the variants as well as collapse them into a single combined 'mask' genotype that can be tested for association just like a single variant.\",\"title\":\"Gene-based testing\"},{\"location\":\"options/#input_1\",\"text\":\"Option Argument Type Description --anno-file FILE Required File with variant annotations for each set --set-list FILE Required File listing variant sets --extract-sets FILE Optional Inclusion file that lists IDs of variant sets to keep --exclude-sets FILE Optional Exclusion file that lists IDs of variant sets to remove --extract-setlist STRING Optional Comma-separated list of variant sets to keep --exclude-setlist STRING Optional Comma-separated list of variant sets to remove --aaf-file FILE Optional File with variant AAF to use when building masks (instead of AAF estimated from sample) --mask-def FILE Required File with mask definitions using the annotations defined in --anno-file Note: multiple files can be specified for --extract-sets/--exclude-sets by using a comma-separated list.\",\"title\":\"Input\"},{\"location\":\"options/#annotation-input-files\",\"text\":\"The following files are used to define variant sets and functional annotations which will be used to generate masks.\",\"title\":\"Annotation input files\"},{\"location\":\"options/#annotation-file\",\"text\":\"1:55039839:T:C PCSK9 LoF 1:55039842:G:A PCSK9 missense . This file defines functional annotations for variants. It is designed to accommodate for variants with separate annotations for different sets/genes. Each line contains the variant name, the set/gene name and a single annotation category (space/tab separated). Variants not in this file will be assigned to a default \\\"NULL\\\" category. A maximum of 63 annotation categories (+NULL category) is allowed. For gene sets, tools you can use to obtain variant annotations per transcripts are snpEFF or VEP . To obtain a single annotation per gene, you could choose the most deleterious functional annotation across the gene transcripts or alternatively use the canonical transcript (note that its definition can vary across software). We have implemented an extended 4-column format of the annotation file which also categorizes sets into domains (e.g. for gene sets, these would correspond to gene domains). 1:55039839:T:C PCSK9 Prodomain LoF 1:55039842:G:A PCSK9 Prodomain missense . Masks will be generated for each domain (maximum of 8 per set/gene) in addition to a mask combining across all domains. Variants can only be assigned to a single domain for each set/gene. Starting with v4.1, you can also specify custom variant weights which will be used in the burden, SKAT/SKAT-O and ACAT-V tests ($w_i$'s in the gene-based testing overview ). Multiple weights can be included in the annotation file after the 3rd column, e.g. 1:55039839:T:C PCSK9 LoF 0.9 0.812 1 1:55039842:G:A PCSK9 missense 0.4 0.23 0.55 . Using --weights-col 4 will use weights in the 4-th column for the gene-based tests.\",\"title\":\"Annotation file\"},{\"location\":\"options/#set-list-file\",\"text\":\"This file lists variants within each set/gene to use when building masks. Each line contains the set/gene name followed by a chromosome and physical position for the set/gene, then by a comma-separated list of variants included in the set/gene. A1BG 19 58346922 19:58346922:C:A,19:58346924:G:A,... A1CF 10 50806630 10:50806630:A:G,10:50806630:A:AT,... .\",\"title\":\"Set list file\"},{\"location\":\"options/#set-inclusionexclusion-file-format\",\"text\":\"The file must have a single column of set/gene names corresponding to those in the set list file. PIGP ZBTB38 .\",\"title\":\"Set inclusion/exclusion file format\"},{\"location\":\"options/#aaf-file-optional\",\"text\":\"Both functional annotations and alternative allele frequency (AAF) cutoffs are used when building masks (e.g. only considering LoF sites where AAF is below 1%). By default, the AAF for each variant is computed from the sample but alternatively, the user can specify variant AAFs using this file. Each line contains the variant name followed by its AAF (it should be for the ALT allele used in the genetic data input). AAF must be a numerical value (i.e. it cannot be '.'). 7:6187101:C:T 1.53918207864341e-05 7:6190395:C:A 2.19920388819247e-06 . Since singleton variants cannot be identified from this file, they are determined by default based on the input genetic data. To enforce which sites should be included in the singleton masks (see --set-singletons ), you can add a third column in the file with a binary indicator (1=singleton; 0=not singleton). So only variants which are specified as singletons will be considered for the singleton masks, regardless of whether they are singletons in the input genetic data. Note that with this flag, singleton sites will be included in all masks (regardless of the AAF in file). 7:6187101:C:T 1.53918207864341e-05 0 7:6190395:C:A 2.19920388819247e-06 1 .\",\"title\":\"AAF file (optional)\"},{\"location\":\"options/#mask-definitions\",\"text\":\"\",\"title\":\"Mask definitions\"},{\"location\":\"options/#mask-file\",\"text\":\"This file specifies which annotation categories should be combined into masks. Each line contains a mask name followed by a comma-seperated list of categories included in the mask (i.e. union is taken over categories). For example below, Mask1 uses only LoF variants and Mask2 uses LoF and missense annotated variants. Mask1 LoF Mask2 LoF,missense .\",\"title\":\"Mask file\"},{\"location\":\"options/#aaf-cutoffs\",\"text\":\"Option --aaf-bins specifies the AAF upper bounds used to generate burden masks ( AAF and not MAF [minor allele frequency] is used when deciding which variants go into a mask) . By default, a mask based on singleton sites are always included. For example, --aaf-bins 0.01,0.05 will generate 3 burden masks for AAFs in [0,0.01], [0,0.05] and singletons.\",\"title\":\"AAF cutoffs\"},{\"location\":\"options/#skatacat-tests\",\"text\":\"The option --vc-tests is used to specify the gene-based tests to run. By default, these tests use all variants in each mask category. If you'd like to only include variants whose AAF is below a given threshold ,e.g. only including rare variants, you can use --vc-maxAAF . Test Name in regenie Description SKAT skat Variance component test SKATO skato Omnibus test combining features of SKAT and Burden SKATO-ACAT skato-acat Same as SKATO but using Cauchy combination method to maximize power across SKATO models ACATV acatv Test using Cauchy combination method to combine single-variant p-values ACATO acato Omnibus test combining features of ACATV, SKAT and Burden ACATO-FULL acato-full Same as ACATO but using the larger set of SKATO models used in the SKATO test For example, --vc-tests skato,acato-full will run SKATO and ACATO (both using the default grid of 8 rho values for the SKATO models) and the p-values for SKAT, SKATO, ACATV and ACATO will be output. Ultra-rare variants (defined by default as MAC$\\\\le$10, see --vc-MACthr ) are collapsed into a burden mask which is then included in the tests instead of the individual variants. For additional details on the tests, see here .\",\"title\":\"SKAT/ACAT tests\"},{\"location\":\"options/#joint-test-for-burden-masks\",\"text\":\"The following tests can be used to combine different burden masks generated using different annotation classes as well as AAF thresholds. Test Name in regenie QT BT Robust to LD Assumes same effect direction Minimum P-value minp $\\\\checkmark$ $\\\\checkmark$ $\\\\times$ $\\\\times$ ACAT acat $\\\\checkmark$ $\\\\checkmark$ $\\\\checkmark$ $\\\\times$ SBAT sbat $\\\\checkmark$ $\\\\times$ $\\\\checkmark$ $\\\\checkmark$ The ACAT test combines the p-values of the individual burden masks using the Cauchy combination method (see ref. 14 here ). The SBAT test is described into more detail here . If you only want to output the results for the joint tests (ignore the marginal tests), use --joint-only .\",\"title\":\"Joint test for burden masks\"},{\"location\":\"options/#lovolodo-schemes\",\"text\":\"The leave-one-variant-out (LOVO) scheme takes all sites going into a mask, and builds LOVO masks by leaving out one variant at a time from the full set of sites. The mask including all sites will also be computed. The argument for --mask-lovo is a comma-separated list which consists of the set/gene name, the mask name, and the AAF cutoff (either 'singleton' or a double in (0,1)). If using a 4-column annotation file, then --mask-lovo should have the gene name, the domain name, the mask name, and the AAF cutoff. So the LOVO masks will be generated for a specific gene domain. The leave-one-domain-out (LODO) scheme (specified by --mask-lodo ) takes all sites going into a mask and builds a LODO mask for each domain specified for the gene by excluding all variants in the domain. The full mask including all sites will also be computed. The argument for --mask-lodo should have the gene name, the mask name and the AAF cutoff.\",\"title\":\"LOVO/LODO schemes\"},{\"location\":\"options/#writing-mask-files\",\"text\":\"Burden masks built in regenie can be written to PLINK bed format. If the input genetic data contains dosages, the masks dosages will be converted to hard-calls prior to being written to file and these hard-calls will be used for the association testing. The PLINK bed file is written using 'ref-last' encoding (i.e. REF allele is listed last in the bim file). Note that this cannot be used with the LOVO/LODO schemes.\",\"title\":\"Writing mask files\"},{\"location\":\"options/#options_1\",\"text\":\"Option Argument Type Description --aaf-bins FLOAT,...,FLOAT Optional comma-separated list of AAF upper bounds to use when building masks [default is a single cutoff of 1%] --build-mask STRING Optional build masks using the maximum number of ALT alleles across sites ( 'max' ; the default), or the sum of ALT alleles ( 'sum' ), or thresholding the sum to 2 ( 'comphet' ) --singleton-carrier FLAG Optional to define singletons as variants with a single carrier in the sample (rather than alternative allele count=1) --set-singletons FLAG Optional to use 3rd column in AAF file to specify variants included in singleton masks --write-mask FLAG Optional write mask to PLINK bed format (does not work when building masks with 'sum') --vc-tests STRING Optional comma-separated list of SKAT/ACAT-type tests to run --vc-maxAAF FLOAT Optional AAF upper bound to use for SKAT/ACAT-type tests [default is 100%] --skat-params FLOAT,FLAT Optional a1,a2 values for the single variant weights computed from Beta(MAF,a1,a2) used in SKAT/ACAT-type tests [default is (1,25)] --skato-rho FLOAT,...,FLOAT Optional comma-separated list of $\\\\rho$ values used for SKATO models --vc-MACthr FLOAT Optional MAC threshold below which to collapse variants in SKAT/ACAT-type tests [default is 10] --joint STRING Optional comma-separated list of joint tests to apply on the generated burden masks --rgc-gene-p FLAG Optional to compute the GENE_P test --skip-test FLAG Optional to skip computing association tests after building masks and writing them to file --mask-lovo STRING Optional to perform LOVO scheme --lovo-snplist FILE Optional File with list of variants for which to compute LOVO masks --mask-lodo FLAG Optional to perform LODO scheme --weights-col INT Optional column index (1-based) in annotation file to use custom weights in gene-based tests --write-mask-snplist FLAG Optional to write list of variants that went into each mask to file --check-burden-files FLAG Optional to check the concordance between annotation, set list and mask files [see below ] --strict-check-burden FLAG Optional to exit early if the annotation, set list and mask definition files dont agree [see below ] Three rules can be used to build masks with --build-mask as shown in diagram below, where the last rule comphet applies a threshold of 2 to the mask from the sum rule.\",\"title\":\"Options\"},{\"location\":\"options/#output_1\",\"text\":\"With --out file Results are written in separate files for each phenotype file_<phenotype1_name>.regenie,...,file_<phenotypeP_name>.regenie with the same output format mentioned above . Additionally, a header line is included (starting with ## ) which contains mask definition information. Masks will have name <set_name>.<mask_name>.<AAF_cutoff> with the chromosome and physical position having been defined in the set list file, and the reference allele being ref , and the alternate allele corresponding to <mask_name>.<AAF_cutoff> . When using --mask-lovo , the mask name will be the same as above but have suffix _<variant_name> to specify the variant which was excluded when building the mask. With --build-mask sum , the reported mask AAF corresponds to the average AAF across sites included in the mask. If using --write-mask , the masks will be saved to file_masks.{bed,bim,fam} and if using --write-mask-snplist , the list of variants included in each mask will be saved to file_masks.snplist . When using --rgc-gene-p , it will apply the single p-value per gene GENE_P strategy using all masks (see here for details).\",\"title\":\"Output\"},{\"location\":\"options/#example-run\",\"text\":\"Using Step 1 results from the Step 1 command above , we use the following command to build and test masks in Step 2 ./regenie \\\\ --step 2 \\\\ --bed example/example_3chr \\\\ --covarFile example/covariates.txt \\\\ --phenoFile example/phenotype_bin.txt \\\\ --bt \\\\ --remove example/fid_iid_to_remove.txt \\\\ --firth --approx \\\\ --pred fit_bin_out_pred.list \\\\ --anno-file example/example_3chr.annotations \\\\ --set-list example/example_3chr.setlist \\\\ --mask-def example/example_3chr.masks \\\\ --aaf-bins 0.1,0.05 \\\\ --write-mask \\\\ --bsize 200 \\\\ --out test_bin_out_firth For each set, this will produce masks using 3 AAF cutoffs (singletons, 5% and 10% AAF). The masks are written to PLINK bed file (in test_bin_out_firth_masks.{bed,bim,fam} ) and tested for association with each binary trait using Firth approximate test (summary stats in test_bin_out_firth_<phenotype_name>.regenie ). Note that the test uses the whole genome regression LOCO PRS from Step 1 of regenie (specified by --pred ).\",\"title\":\"Example run\"},{\"location\":\"options/#checking-input-files\",\"text\":\"To assess the concordance between the input files for building masks, you can use --check-burden-files which will generate a report in file_masks_report.txt containing: for each set, the list the variants in the set-list file which are unrecognized (not genotyped or not present in annotation file for the set) for each mask, the list of annotations in the mask definition file which are not in the annotation file Additionally, you can use --strict-check-burden to enforce full agreement between the three files (if not, program will terminate) : all genotyped variants in the set list file must be in the annotation file (for the corresponding set) all annotations in the mask definition file must be present in the annotation file\",\"title\":\"Checking input files\"},{\"location\":\"options/#interaction-testing\",\"text\":\"Starting from regenie v3.0, you can perform scans for interactions (either GxE or GxG). For GxE tests, the interacting variable should be part of the covariate file (if it is categorical, specify it in --catCovarList ). For GxG tests, the interacting variant can be part of the input genetic file or it can be present in an external file (see --interaction-snp-file )\",\"title\":\"Interaction testing\"},{\"location\":\"options/#options_2\",\"text\":\"Option Argument Type Description --interaction STRING Optional to run GxE test specifying the interacting covariate (see below) --interaction-snp STRING Optional to run GxG test specifying the interacting variant (see below) --interaction-file FORMAT,FILE Optional external genotype file containing the interacting variant [FORMAT can be bed/bgen/pgen and FILE is the file name (bgen) or file prefix (bed/pgen)] --interaction-file-sample FILE Optional accompagnying sample file for BGEN format --interaction-file-reffirst FLAG Optional use the first allele as the reference for BGEN or PLINK BED formats --no-condtl FLAG Optional to print out all the main effects from the interaction model (see Output section below) --force-condtl FLAG Optional to include the interacting SNP as a covariate in the marginal test (see Output section below) --rare-mac FLOAT Optional minor allele count (MAC) threshold below which to use HLM method for QTs [default is 1000] For GxE tests where the interacting variable is categorical, you can specify the baseline level using --interaction VARNAME[BASE_LEVEL] (e.g. --interaction BMI[<25] ). Otherwise, the first value found in the covariate file will be used as the baseline level. For GxG tests, the default coding for the interacting variant is additive. If you would like to use dominant/recessive/categorical coding, use --interaction-snp SNP_NAME[dom/rec/cat] (for example with dominant coding, --interaction-snp SNPNAME[dom] will allow for separate effects between carriers vs non-carriers of the interacting variant). The allowed values in the brackets are add/dom/rec/cat .\",\"title\":\"Options\"},{\"location\":\"options/#output_2\",\"text\":\"The result files will contain multiple lines for the same variant corresponding to the different null hypotheses being tested in the interaction model g(\\\\mu) = E\\\\alpha + G\\\\beta + (G\\\\odot E)\\\\gamma The suffix in the \\\"TEST\\\" column indicates which hypothesis is being tested: \\\"ADD\\\": marginal test where the interacting variable has not been added as a covariate $-$ this corresponds to $H_0: \\\\beta = 0$ given $\\\\alpha=\\\\gamma = 0$ this is only printed for GxG tests by default, or GxE using --no-condtl \\\"ADD-CONDTL\\\": marginal test where the interacting variable has been added as a covariate (default for GxE tests) $-$ this corresponds to $H_0: \\\\beta = 0$ given $\\\\gamma = 0$ this is only printed for GxE tests by default, or GxG using --force-condtl \\\"ADD-INT_VAR\\\": test for the main effect of the interaction variable (\\\"VAR\\\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\\\alpha = 0$ this is only printed for GxG tests by default, or GxE using --no-condtl If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \\\"ADD-INT_BMI=25-30\\\" and \\\"ADD-INT_BMI=30+\\\" where baseline level is \\\"$<$25\\\") will also output the effect of $E^2$ in \\\"ADD-INT_VAR^2\\\" if the trait is binary (see here ) \\\"ADD-INT_SNP\\\": test for main effect of tested SNP in the interaction model $-$ this corresponds to $H_0: \\\\beta = 0$ \\\"ADD-INT_SNPxVAR\\\": test for interaction effect (\\\"VAR\\\" will be replaced by the name of the interacting variable) $-$ this corresponds to $H_0: \\\\gamma = 0$ If the interacting variable is categorical, you will have separate lines for each level aside from the baseline level (e.g. \\\"ADD-INT_SNPxBMI=25-30\\\" and \\\"ADD-INT_SNPxBMI=30+\\\" where baseline level is \\\"$<$25\\\") With Firth correction, only the effect sizes for the interaction effect at each level will be reported and the LRT p-value will only be computed for the joint test on the interaction effects \\\"ADD-INT_$k$DF\\\": joint test for main and interaction effect of tested variant ($k\\\\ge2$ for categorical interacting variables) $-$ this corresponds to $H_0: \\\\beta = \\\\gamma = 0$\",\"title\":\"Output\"},{\"location\":\"options/#conditional-analyses\",\"text\":\"Starting from regenie v3.0, you can specify genetic variants to add to the set of covariates when performing association testing. This works in both step 1 and 2, and can be used in conjunction with the gene-based tests or the interactiong testing feature. The conditioning variants will automatically be ignored from the analysis. Option Argument Type Description --condition-list FILE Required file with list of variants to condition on --condition-file FORMAT,FILE Optional get conditioning variants from external file (same argument format as --interaction-file ) --condition-file-sample FILE Optional accompagnying sample file for BGEN format --max-condition-vars INT Optional maximum number of conditioning variants [default is 10,000]\",\"title\":\"Conditional analyses\"},{\"location\":\"options/#survival-analyses\",\"text\":\"Starting from regenie v4.0, you can conduct survival analysis for time-to-event data.\",\"title\":\"Survival analyses\"},{\"location\":\"options/#phenotype-file-format_1\",\"text\":\"In this small example, there are 5 samples, and the event of interest is the diagnosis of cancer over a period of 10 years. Sample 1 is diagnosed with cancer during the study; the time variable is the number of years until the sample is diagnosed with cancer. Sample 2 drops out of the study; sample 3 dies during the study; sample 4 and 5 complete the study without being diagnosed with cancer; they are all right-censored, and the time variable is the last encounter or death time. The corresponding phenotype file is FID IID Time Cancer 1 1 6 1 2 2 5 0 3 3 2 0 4 4 10 0 5 5 10 0\",\"title\":\"Phenotype file format\"},{\"location\":\"options/#required-options\",\"text\":\"Survival analysis in regenie requires the following specific options in step 1, step 2 and gene-based burden tests. Option Argument Type Description --t2e FLAG Required specify the traits are time-to-event data --phenoColList STRING Required Comma separated list of time names to include in the analysis --eventColList STRING Required Comma separated list of columns in the phenotype file to include in the analysis that contain the events. These event columns should have 0=no event,1=event,NA=missing For the example above, the regenie call is ./regenie \\\\ --t2e \\\\ --phenoColList Time \\\\ --eventColList Cancer \\\\ ... For a phenotype file containing multiple time-to-event traits, the order of censor variables listed in --eventColList should match the order of time names specified in --phenoColList . For example, the phenotype file is FID IID Cancer_Time Cancer Asthma_Time Asthma 1 1 6 1 4 0 2 2 5 0 8 1 The regenie call is ./regenie \\\\ --t2e \\\\ --phenoColList Cancer_Time,Asthma_Time \\\\ --eventColList Cancer,Asthma \\\\ ... The output format is the same as the output file for quantitative and binary traits , with the BETA column containing the estimated harzard ratio (on log scale).\",\"title\":\"Required options\"},{\"location\":\"options/#ld-computation\",\"text\":\"REGENIE can calculate LD between a group of variants on the same chromosome. Option Argument Type Description --compute-corr FLAG Required compute LD matrix and write to binary file --output-corr-text FLAG Optional write Pearson correlations to text file --forcein-vars FLAG Optional retain all variants specified in --extract which absent from the genetic data in the LD matrix --ld-extract FILE Optional file listing single variants as well as burden masks to include in LD matrix (see below) Note that this can be quite memory intensive for large groups of variants (memory ~$8M^2$ bytes for $M$ variants).\",\"title\":\"LD computation\"},{\"location\":\"options/#output_3\",\"text\":\"Using --step 2 --out file By default, the LD matrix is stored in a binary compressed file file.corr and the list of variants corresponding to the columns of the LD matrix are stored in file.corr.snplist . The R script scripts/parseLD.r contains a function which returns the LD matrix, e.g. get.corr.sq.matrix(\\\"file.corr\\\") . Using --output-corr-text will write the Pearson correlations to a text file instead. When using --forcein-vars , variants not present in the genetic data will be added as extra column/rows in the LD matrix. For these variants, the diagonal entries in the matrix will be set to 1 and the off-diagonal entries 0. Using --ld-extract info.txt This option is used compute LD between single variants and burden masks generated on-the-fly in REGENIE; it requires specifying annotation files . The file info.txt should have three columns: variant type ('sv' or 'mask'), variant name, followed by the set (e.g. gene) name (this can be 'NA' for single variant). For example, it would look like: sv 1:1111:A:G NA sv 1:2222:C:T NA mask PCSK9.M1.0.01 PCSK9 . Note that the set and mask names must match that used in REGENIE based on provided annotation files and allele frequency cutoffs. Variant/masks not present in the data will be kept in the LD matrix but will have the corresponding correlations set to 0.\",\"title\":\"Output\"},{\"location\":\"overview/\",\"text\":\"Overview This page provides an overview of the models and methods implemented in regenie . A full description is given in our paper . regenie carries out genome-wide association tests for both quantitative and binary (case-control) phenotypes. Starting at regenie v4.0 , it also supports survival analysis for time-to-event data (See Survival analysis section below). It is designed to handle A large number of samples. For example, it is ideally suited to the UK Biobank dataset with 500,000 samples. A combination of genetic data from a micro-array, imputation and exome sequencing. A large number of either quantitative traits (QTs), binary (case-control) traits (BTs), or time-to-event traits (TTEs) Accounting for a set of covariates An overview of the regenie method is provided in the figure below. Essentially, regenie is run in 2 steps: In the first step a subset of genetic markers are used to fit a whole genome regression model that captures a good fraction of the phenotype variance attributable to genetic effects. In the second step, a larger set of genetic markers (e.g. imputed markers) are tested for association with the phenotype conditional upon the prediction from the regression model in Step 1, using a leave one chromosome out (LOCO) scheme, that avoids proximal contamination. Step 1 : Whole genome model In Step 1 a whole genome regression model is fit at a subset of the total set of available genetic markers. These are typically a set of several hundred thousand ( M ) common markers from a micro-array. Ridge regression (level 0) regenie reads in the M markers in blocks of B consecutive markers ( --bsize option). In each block, a set of ridge regression predictors are calculated for a small range of J shrinkage parameters \\\\{\\\\tau_1,\\\\ldots, \\\\tau_J\\\\} (using --l0 option [default is 5]) . For a block of SNPs in a N\\\\times B matrix X and N\\\\times 1 phenotype vector Y we calculate J predictors X\\\\widehat{\\\\beta}_1 \\\\ldots, X\\\\widehat{\\\\beta}_J where \\\\widehat{\\\\beta}_j = (X^TX+\\\\tau_j I)^{-1}X^T Y The idea behind using a range of shrinkage values is to capture the unknown number and size of truly associated genetic markers within each window. The ridge regression takes account of Linkage disequilibrium (LD) within each block. These predictors are stored in place of the genetic markers in matrix W , providing a large reduction in data size. For example, if M=500,000 and B=1,000 and J=5 shrinkage parameters are used, then the reduced dataset will have JM/B=2,500 predictors. Ridge regression is used in this step for both quantitative and binary traits. Cross-validation (level 1) The predictors generated by the ridge regression step will all be positively correlated with the phenotype. Thus, it is important to account for that correlation when building a whole genome wide regression model. When analyzing a quantitative trait we use a second level of ridge regression on the full set of JM/B predictors in W . This approach is inspired by the method of stacked regressions 1 . We fit the ridge regression for a range of shrinkage parameters ( --l1 option) and choose a single best value using K-fold cross validation scheme. This assesses the predictive performance of the model using held out sets of data, and aims to control any over-fitting induced by using the first level of ridge regression to derive the predictors. In other words, we fit the model Y = W\\\\alpha + \\\\epsilon where \\\\alpha is estimated as \\\\widehat{\\\\alpha} = (W^TW + \\\\phi\\\\,I)^{-1}W^TY and the parameter \\\\phi is chosen via K-fold cross-validation. For binary traits, we use a logistic ridge regression model to combine the predictors in W \\\\text{logit}(p) = \\\\mu + W\\\\alpha where p is the probability of being a case and \\\\mu captures the effects of non-genetic covariates. Genetic predictors and LOCO Once \\\\alpha has been estimated we can construct the genetic prediction Z = W\\\\widehat{\\\\alpha} Also, since each column of the matrix W will be associated with a chromosome we can can also construct a genetic prediction ignoring any one chromosome, by simply ignoring those columns when calculating the prediction. This is known as the Leave One Chromosome Out (LOCO) approach. These LOCO predictions are valuable at Step 2 of regenie when each marker is tested for associated (see below). For binary traits, it is the linear predictor in a logistic regression model using LOCO that is saved, and used as an offset when fitting logistic regression models to test for association. Multiple phenotypes The dimension reduction step using ridge regression can be used very efficiently to model multiple phenotypes at once. The ridge regression equations for a block of SNPs in a N\\\\times B matrix X and a single phenotype in a N\\\\times 1 matrix Y take the form \\\\widehat{\\\\beta} = AY where A = (X^TX+\\\\tau I)^{-1}X^T does not depend on Y If instead P phenotypes are stored in columns of a N\\\\times P matrix Y , then the matrix A can be applied jointly to calculate the matrix of estimates \\\\widehat{\\\\beta} = AY , and this can take advantage of parallel linear algebra implementations in the Eigen matrix library. Covariates Covariates, such as age and sex and batch effect variables can be included in the regenie model. For quantitative traits, any covariates are regressed out of phenotypes and genotypes before fitting the model. For binary traits, we fit a null model with only covariates, and use predictions from that model as an offset when fitting the logistic regression model. Step 2 : Single-variant association testing In Step 2, a larger set of markers are tested for association with the trait (or traits). As with Step 1, these markers are also read in blocks of B markers, and tested for association. This avoids having to have all markers stored in memory at once. Quantitative traits For quantitative traits, we use a linear regression model for association testing. Covariates are regressed out of the phenotypes and genetic markers. The LOCO predictions from Step 1 are removed from the phenotypes. Linear regression is then used to test association of the residualized phenotype and the genetic marker. Parallel linear algebra operations in the Eigen library are used where possible. Binary traits For binary traits, logistic regression score test is used to test association of the phenotype and the genetic marker. The logistic regression model includes the LOCO predictions from Step 1 as an offset . Covariates are included in the linear predictor in the usual way. When the case-control ratio is imbalanced, standard association tests don't control Type I error well at rare genetic markers. regenie has two options to handle this Firth logistic regression Standard maximum likelihood estimates are generally biased. The Firth correction 2 removes much of the bias, and results in better calibrated test statistics. The correction involves adding a penalty term to the log-likelihood, \\\\widetilde{l}(\\\\theta) = l(\\\\theta) + {1 \\\\over 2} \\\\log I|\\\\theta| where the penalty term corresponds to the use of Jeffrey's Prior. This prior has the effect of shrinking the effect size towards zero. regenie uses a Firth correction when the p-value from the standard logistic regression test is below a threshold (default 0.05). It also includes a novel, accurate and fast approximate Firth correction which is ~60x faster than the exact Firth correction (see the option --firth ). The p-value reported in regenie is based on a likelihood ratio test (LRT), and we use the Hessian of the log-likelihood without the penalty term to estimate the standard error (SE). This may cause an issue in meta-analyses with rare variants, as the effect size estimate and SE may not match with the LRT p-value. Hence, we added an option --firth-se to report a SE computed instead from the effect size estimate and the LRT p-value. Saddle point approxiation (SPA) test The SPA test approximates the null distribution of the test statistic by approximating the cumulant generating function of the test statistic, which involves all of the higher order moments 3 $^,$ 4 . This provides a better estimation of the tail probabilities compared to using standard asymptotic theory which relies on the normal approximation and uses only the first two moments of the dsitribution. A tail probability is obtained as \\\\begin{align*} P&(T < t_{\\\\text{obs}}) \\\\approx \\\\Phi(z), \\\\text{ where,}\\\\\\\\ z &= w + \\\\frac{1}{w}\\\\log{\\\\frac{v}{w}}\\\\\\\\ w &= \\\\text{sign}(\\\\delta^*)\\\\sqrt{ 2 [ t_{\\\\text{obs}}\\\\, \\\\delta^* - K(\\\\delta^*)}],\\\\, v = \\\\delta^*\\\\sqrt{K''(\\\\delta^*)} \\\\end{align*} and K(\\\\delta) is the cumulant generating function of the test statistic and \\\\delta^* is obtained by using a root-finding algorithm for K'(\\\\delta)=t_{\\\\text{obs}} . As this approximation has been found not to work very well for ultra-rare variants, a minimum minor allele count (MAC) is used to filter out these variants before testing (option --minMAC ). Step 2 : Gene-based testing Instead of performing single-variant association tests, multiple variants can be aggregated in a given region, such as a gene, using the following model g(\\\\mu) = w_1G_1\\\\beta_1 + \\\\dots + w_mG_m\\\\beta_m where G_i 's represent the single variants included in the test, w_i 's and \\\\beta_i 's are weights and effect sizes, respectively, for each variant, and g(.) is a link function for the phenotypic mean \\\\mu . We also denote by S_i the score statistics obtained from the single-variant tests . This can be especially helpful when testing rare variants as single-variant tests usually have lower power performance. To avoid inflation in the gene-based tests due to rare variants as well as reduce computation time, we have implemented the collapsing approach proposed in SAIGE-GENE+ 5 , where ultra-rare variants are aggregated into a mask. For highly imbalanced binary traits, SPA/Firth correction can be used to calibrate the test statistics in the gene-based tests as proposed in Zhao et al. (2020) 6 using --firth/--spa . Burden tests Burden tests, as defined in Lee et al. (2014) 7 , assume \\\\beta_i=\\\\beta\\\\; \\\\forall i , where \\\\beta is a fixed coefficient, which then leads to the test statistic Q_{BURDEN} = \\\\left(\\\\sum_i w_iS_i\\\\right)^2 These tests collapse variants into a single variable which is then tested for association with the phenotype. Hence, they are more powerful when variants have effects in the same direction and of similar magnitude. In regenie , multiple options are available to aggregate variants together into a burden mask beyond the linear combination above ( see here ). For example, the burden tests that were employed in Backman et al. (2021) 8 use the default strategy in regenie of collapsing variants by taking the maximum number of rare alleles across the sites. Variance component tests Unlike burden tests, SKAT 9 assume the effect sizes $\\\\beta_i$ come from an arbitrary distribution with mean 0 and variance $\\\\tau^2$ which leads to the test statistic Q_{SKAT} = \\\\sum_i w_i^2S_i^2 Hence, SKAT can remain powerful when variant effects are in opposite directions. The omnibus test SKATO 10 combines the SKAT and burden tests as Q_{SKATO} = \\\\rho Q_{BURDEN} + (1-\\\\rho) Q_{SKAT} So setting $\\\\rho=0$ corresponds to SKAT and $\\\\rho=1$ to the burden test. In practice, the parameter $\\\\rho$ is chosen to maximize the power [ regenie uses a default grid of 8 values {$0, 0.1^2, 0.2^2, 0.3^2, 0.4^2, 0.5^2, 0.5, 1$} and set the weights $w_i = Beta(MAF_i,1,25)$]. To obtain the p-value from a linear combination of chi-squared variables, regenie uses Davies' exact method 11 by default. Following Wu et al (2016) 12 , regenie uses Kuonen's saddlepoint approximation method 13 when the Davies' p-value is below 1e-5 and if that fails, it uses Davies' method with more stringent convergence parameters (lim=1e5,acc=1e-9). The original SKATO method uses numerical integration when maximizing power across the various SKATO models that use different values for $\\\\rho$. We also implement a modification of SKATO, named SKATO-ACAT, which instead uses the Cauchy combination method 14 to combine the p-values from the different SKATO models. Cauchy combination tests The ACATV 15 test uses the Cauchy combination method ACAT to combine single variant p-values $p_i$ as Q_{ACATV} = \\\\sum_i \\\\widetilde{w}_i^2\\\\tan{\\\\{\\\\pi(0.5 - p_i)\\\\}} where we set $\\\\widetilde{w}_i = w_i \\\\sqrt{MAF(1-MAF)}$. This test is highly computationally tractable and is robust to correlation between the single variant tests. The omnibus test ACATO 15 combines ACATV with the SKAT and burden tests as Q_{ACATO} = \\\\frac{1}{3}\\\\tan{\\\\{\\\\pi(0.5 - p_{ACATV})\\\\}}+ \\\\frac{1}{3}\\\\tan{\\\\{\\\\pi(0.5 - p_{Burden})\\\\}}+ \\\\frac{1}{3}\\\\tan{\\\\{\\\\pi(0.5 - p_{SKAT})\\\\}} where unlike the original ACATO test, we only use one set of the weights $w_i$. Alternatively, we augment the test to include an extended set of SKATO models beyond SKAT and Burden (which correspond to $\\\\rho$ of 0 and 1 in SKATO respectively) and use the default SKATO grid of 8 values for $\\\\rho$. Sparse Burden Association Test regenie can generate burden masks which are obtained by aggregating single variants using various annotation classes as well as allele frequency thresholds. The Sparse Burden Association Test (SBAT) 16 combines these burden masks in a joint model imposing constraints of same direction of effects \\\\mu = \\\\sum_{\\\\text{mask }i} M_i\\\\gamma_i where $M_i$ represent a burden mask and we solve \\\\underset{\\\\boldsymbol\\\\gamma}{\\\\min} || Y - \\\\sum_i M_i\\\\gamma_i||^2 \\\\text{ subject to } \\\\gamma_i \\\\ge 0 \\\\text{ for all } i The SBAT method tests the hypothesis $H_0: \\\\gamma_i=0$ for all $i$ vs. $H_1: \\\\gamma_i > 0$ for some $i$. By using this joint model, the SBAT test accounts for the correlation structure between the burden masks and with the non-negative constraints, it can lead to boost in power performance when multiple burden masks are causal and have concordant effects. This test has the nice property that it combines model selection of the masks (via the sparsity induced by the non-negative assumption) with model inference (it is well calibrated and powerful). GENE_P As the different gene-based tests in REGENIE can be more powerful under different genetic architectures, we propose a unified strategy, named GENE_P, that combines the strengths of these tests. It uses ACAT to combine the p-values of the SKATO, ACATV, Burden and SBAT tests and obtain an overall assessment of significance for a genetic region (e.g. gene). The diagram below illustrates the GENE_P test using 4 masks (i.e. combination of variant annotations) and 3 allele frequency cutoffs when performing gene-based tests. Step 2 : Interaction testing The GxE tests are of the form g(\\\\mu) = E\\\\alpha + G\\\\beta + (G\\\\odot E)\\\\gamma where $E$ is an environmental risk factor and $G$ is a marker of interest, and $\\\\odot$ represents the Haddamard (entry-wise) product of the two. The last term in the model allows for the variant to have different effects across values of the risk factor. Note: if $E$ is categorical, we use a dummy variable for each level of $E$ in the model above. We can look at the following hypotheses: $H_0: \\\\beta = 0$ given $\\\\gamma = 0$, which is a marginal test for the SNP $H_0: \\\\beta = 0$, which is a test for the main effect of the SNP in the full model $H_0: \\\\gamma = 0$, which is a test for interaction $H_0: \\\\beta = \\\\gamma = 0$, which tests both main and interaction effects for the SNP Misspecification of the model above, such as in the presence of heteroskedasticity, or the presence of high case-control imbalance can lead to inflation in the tests. Robust (sandwich) standard error (SE) estimators 17 can be used to adress model misspecification however, they can suffer from inflation when testing rare variants or in the presence of high case-control imbalance 18 $^,$ 19 . In regenie , we use a hybrid approach which combines: Wald test with sandwich estimators Wald test with heteroskedastic linear models (for quantitative traits) LRT with penalized Firth regression (for binary traits) For quantitative traits, we use the sandwich estimators HC3 to perform a Wald test for variants whose minor allele count (MAC) is above 1000 (see --rare-mac ). For the remaining variants, we fit a heteroskedastic linear model (HLM) 20 Y = E\\\\alpha + E^2\\\\zeta + G\\\\beta + (G\\\\odot E)\\\\gamma + \\\\epsilon where we assume $\\\\epsilon \\\\sim N(0, D)$ where $D$ is a diagonal matrix with entries $\\\\sigma^2\\\\exp{(1 + E\\\\theta_1 + E^2\\\\theta_2)}$. This formulation allows for the phenotypic variance to also depend on the risk factor $E$. By incorporating both the linear and quadratic effect of $E$ in the mean and variance of $Y$, this model provides robustness to heteroskedasticity ( Note: the $E^2$ terms are only added when $E$ is quantitative ). Wald tests are then performed for the null hypotheses listed above. For binary traits, we consider the following interaction model \\\\text{logit}(\\\\mu) = E\\\\alpha + E^2\\\\zeta + G\\\\beta + (G\\\\odot E)\\\\gamma where we also include a non-linear effect for $E$ (not if categorical). The sandwich estimator HC3 is used in a Wald test for variants whose MAC is above 1000 (see --rare-mac ) otherwise the model-based standard errors are used. When Firth is specified, we only apply the Firth correction using LRT if the p-value for the interaction term $\\\\gamma$ from the Wald test is below a specified threshold (see --pThresh ). So the added $E^2$ term as well as the use of the Firth penalty help with case-control imbalance and model misspecification for the effect of $E$ on the phenotype. Survival analysis Starting with regenie v4.0 , we have enabled survival analysis, improving the power for analyzing common diseases where time-to-event data is available by leveraging the Cox Proportional Harzard model. We assume that samples without an event are right-censored, i.e. the survival time is only known to be greater than a certain value. It is important to encode this information correctly into the phenotypes . Step 1: Whole genome model using cox ridge regression In step 1, Level 0 is run using linear ridge regression with the time variable taken as the response. In Level 1, instead of linear/logistic ridge regression, we use Cox Ridge regression 21 to combine the predictions $W$ from Level 0. \\\\lambda_i(t) = \\\\lambda_0(t) \\\\exp(\\\\mu_i + w_i^\\\\intercal \\\\alpha) where $\\\\lambda_0(t)$ is the baseline hazard function, and, for the $i$-th individual, $\\\\lambda_i(t)$ is the hazard function, $w_i$ is the set of ridge predictors from Level 0, and $\\\\mu_i$ captures the effects of non-genetic covariates. We fit the cox ridge regression for a range of shrinkage parameters and select the best value using a K-fold cross validation scheme. With the estimated $\\\\hat{\\\\alpha}$, we construct LOCO predictions which capture population structure, relatedness and polygenicity. Step 2: Single variant and gene-based burden tests For time-to-event traits, the cox proportional hazard regression model is used to test the association between the phenotype and the genetic marker. Note : the only supported gene-based test is the burden test. The cox proportional hazard regression model includes the LOCO predictions from Step 1 as an offset. \\\\lambda_i(t) = \\\\lambda_0(t) \\\\exp(\\\\mu_i + w_{i, LOCO} + g_i \\\\beta) We test the null hypothesis $H_0: \\\\beta = 0$ using a score test. When the event rate is low, the standard score test doesn't control Type I error well at rare genetic markers. To reduce the bias and achieve a more robust test, regenie uses Firth correction 22 when the p-value from the standard score test is below a threshold (default 0.05). The firth correction provides a well-calibrated test, but comes with a computational cost. To mitigate this burden in Cox regression, we include a fast approximate test, which gives results very similar to the exact Firth test. Missing Phenotype data With QTs, missing values are mean-imputed in Step 1 and they are dropped when testing each phenotype in Step 2 (unless using --force-impute ). With BTs, missing values are mean-imputed in Step 1 when fitting the level 0 linear ridge regression and they are dropped when fitting the level 1 logistic ridge regression for each trait. In Step 2, missing values are dropped when testing each trait. To remove all samples that have missing values at any of the P phenotypes from the analysis, use option --strict in step 1 and 2. This can also be used when analyzing a single trait to only keep individuals with complete data by setting the phenotype values of individuals to remove to NA. Note: imputation is only applied to phenotypes; covariates are not allowed to have missing data. References Breiman, L. Stacked regressions. Machine learning 24 , 49\\u201364 (1996). \\u21a9 Firth, D. Bias reduction of maximum likelihood estimates. Biometrika 80 , 27\\u201338 (1993). \\u21a9 Butler, R. W. Saddlepoint approximations with applications . (Cambridge University Press, 2007). \\u21a9 Dey, R., Schmidt, E. M., Abecasis, G. R. & Lee, S. A fast and accurate algorithm to test for binary phenotypes and its application to PheWAS. The American Journal of Human Genetics 101 , 37\\u201349 (2017). \\u21a9 Zhou, W. et al. Set-based rare variant association tests for biobank scale sequencing data sets. medRxiv (2021). \\u21a9 Zhao, Z. et al. UK biobank whole-exome sequence binary phenome analysis with robust region-based rare-variant test. Am J Hum Genet 106 , 3\\u201312 (2020). \\u21a9 Lee, S., Abecasis, G. R., Boehnke, M. & Lin, X. Rare-variant association analysis: Study designs and statistical tests. Am J Hum Genet 95 , 5\\u201323 (2014). \\u21a9 Backman, J. D. et al. Exome sequencing and analysis of 454,787 UK biobank participants. Nature 599 , 628\\u2013634 (2021). \\u21a9 Wu, M. C. et al. Rare-variant association testing for sequencing data with the sequence kernel association test. Am J Hum Genet 89 , 82\\u201393 (2011). \\u21a9 Lee, S., Wu, M. C. & Lin, X. Optimal tests for rare variant effects in sequencing association studies. Biostatistics 13 , 762\\u201375 (2012). \\u21a9 Davies, R. B. Algorithm AS 155: The distribution of a linear combination of \\u03c7 2 random variables. Applied Statistics 29 , 323\\u2013333 (1980). \\u21a9 Wu, B., Guan, W. & Pankow, J. S. On efficient and accurate calculation of significance p-values for sequence kernel association testing of variant set. Ann Hum Genet 80 , 123\\u201335 (2016). \\u21a9 Kuonen, D. Miscellanea. Saddlepoint approximations for distributions of quadratic forms in normal variables. Biometrika 86 , 929\\u2013935 (1999). \\u21a9 Liu, Y. & Xie, J. Cauchy combination test: A powerful test with analytic p-value calculation under arbitrary dependency structures. J Am Stat Assoc 115 , 393\\u2013402 (2020). \\u21a9 Liu, Y. et al. ACAT: A fast and powerful p value combination method for rare-variant analysis in sequencing studies. Am J Hum Genet 104 , 410\\u2013421 (2019). \\u21a9 \\u21a9 Ziyatdinov, A. et al. Joint testing of rare variant burden scores using non-negative least squares. The American Journal of Human Genetics 111 , 2139\\u20132149 (2024). \\u21a9 MacKinnon, J. G. & White, H. Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties. Journal of Econometrics 29 , 305\\u2013325 (1985). \\u21a9 Tchetgen Tchetgen, E. J. & Kraft, P. On the robustness of tests of genetic associations incorporating gene-environment interaction when the environmental exposure is misspecified. Epidemiology 22 , 257\\u201361 (2011). \\u21a9 Voorman, A., Lumley, T., McKnight, B. & Rice, K. Behavior of QQ-plots and genomic control in studies of gene-environment interaction. PLoS One 6 , (2011). \\u21a9 Young, A. I., Wauthier, F. L. & Donnelly, P. Identifying loci affecting trait variability and detecting interactions in genome-wide association studies. Nat Genet 50 , 1608\\u20131614 (2018). \\u21a9 Simon, N., Friedman, J. H., Hastie, T. & Tibshirani, R. Regularization paths for cox\\u2019s proportional hazards model via coordinate descent. Journal of statistical software 39 , 1\\u201313 (2011). \\u21a9 Heinze, G. & Schemper, M. A solution to the problem of monotone likelihood in cox regression. Biometrics 57 , 114\\u2013119 (2001). \\u21a9\",\"title\":\"Overview\"},{\"location\":\"overview/#overview\",\"text\":\"This page provides an overview of the models and methods implemented in regenie . A full description is given in our paper . regenie carries out genome-wide association tests for both quantitative and binary (case-control) phenotypes. Starting at regenie v4.0 , it also supports survival analysis for time-to-event data (See Survival analysis section below). It is designed to handle A large number of samples. For example, it is ideally suited to the UK Biobank dataset with 500,000 samples. A combination of genetic data from a micro-array, imputation and exome sequencing. A large number of either quantitative traits (QTs), binary (case-control) traits (BTs), or time-to-event traits (TTEs) Accounting for a set of covariates An overview of the regenie method is provided in the figure below. Essentially, regenie is run in 2 steps: In the first step a subset of genetic markers are used to fit a whole genome regression model that captures a good fraction of the phenotype variance attributable to genetic effects. In the second step, a larger set of genetic markers (e.g. imputed markers) are tested for association with the phenotype conditional upon the prediction from the regression model in Step 1, using a leave one chromosome out (LOCO) scheme, that avoids proximal contamination.\",\"title\":\"Overview\"},{\"location\":\"overview/#step-1-whole-genome-model\",\"text\":\"In Step 1 a whole genome regression model is fit at a subset of the total set of available genetic markers. These are typically a set of several hundred thousand ( M ) common markers from a micro-array.\",\"title\":\"Step 1 : Whole genome model\"},{\"location\":\"overview/#ridge-regression-level-0\",\"text\":\"regenie reads in the M markers in blocks of B consecutive markers ( --bsize option). In each block, a set of ridge regression predictors are calculated for a small range of J shrinkage parameters \\\\{\\\\tau_1,\\\\ldots, \\\\tau_J\\\\} (using --l0 option [default is 5]) . For a block of SNPs in a N\\\\times B matrix X and N\\\\times 1 phenotype vector Y we calculate J predictors X\\\\widehat{\\\\beta}_1 \\\\ldots, X\\\\widehat{\\\\beta}_J where \\\\widehat{\\\\beta}_j = (X^TX+\\\\tau_j I)^{-1}X^T Y The idea behind using a range of shrinkage values is to capture the unknown number and size of truly associated genetic markers within each window. The ridge regression takes account of Linkage disequilibrium (LD) within each block. These predictors are stored in place of the genetic markers in matrix W , providing a large reduction in data size. For example, if M=500,000 and B=1,000 and J=5 shrinkage parameters are used, then the reduced dataset will have JM/B=2,500 predictors. Ridge regression is used in this step for both quantitative and binary traits.\",\"title\":\"Ridge regression (level 0)\"},{\"location\":\"overview/#cross-validation-level-1\",\"text\":\"The predictors generated by the ridge regression step will all be positively correlated with the phenotype. Thus, it is important to account for that correlation when building a whole genome wide regression model. When analyzing a quantitative trait we use a second level of ridge regression on the full set of JM/B predictors in W . This approach is inspired by the method of stacked regressions 1 . We fit the ridge regression for a range of shrinkage parameters ( --l1 option) and choose a single best value using K-fold cross validation scheme. This assesses the predictive performance of the model using held out sets of data, and aims to control any over-fitting induced by using the first level of ridge regression to derive the predictors. In other words, we fit the model Y = W\\\\alpha + \\\\epsilon where \\\\alpha is estimated as \\\\widehat{\\\\alpha} = (W^TW + \\\\phi\\\\,I)^{-1}W^TY and the parameter \\\\phi is chosen via K-fold cross-validation. For binary traits, we use a logistic ridge regression model to combine the predictors in W \\\\text{logit}(p) = \\\\mu + W\\\\alpha where p is the probability of being a case and \\\\mu captures the effects of non-genetic covariates.\",\"title\":\"Cross-validation (level 1)\"},{\"location\":\"overview/#genetic-predictors-and-loco\",\"text\":\"Once \\\\alpha has been estimated we can construct the genetic prediction Z = W\\\\widehat{\\\\alpha} Also, since each column of the matrix W will be associated with a chromosome we can can also construct a genetic prediction ignoring any one chromosome, by simply ignoring those columns when calculating the prediction. This is known as the Leave One Chromosome Out (LOCO) approach. These LOCO predictions are valuable at Step 2 of regenie when each marker is tested for associated (see below). For binary traits, it is the linear predictor in a logistic regression model using LOCO that is saved, and used as an offset when fitting logistic regression models to test for association.\",\"title\":\"Genetic predictors and LOCO\"},{\"location\":\"overview/#multiple-phenotypes\",\"text\":\"The dimension reduction step using ridge regression can be used very efficiently to model multiple phenotypes at once. The ridge regression equations for a block of SNPs in a N\\\\times B matrix X and a single phenotype in a N\\\\times 1 matrix Y take the form \\\\widehat{\\\\beta} = AY where A = (X^TX+\\\\tau I)^{-1}X^T does not depend on Y If instead P phenotypes are stored in columns of a N\\\\times P matrix Y , then the matrix A can be applied jointly to calculate the matrix of estimates \\\\widehat{\\\\beta} = AY , and this can take advantage of parallel linear algebra implementations in the Eigen matrix library.\",\"title\":\"Multiple phenotypes\"},{\"location\":\"overview/#covariates\",\"text\":\"Covariates, such as age and sex and batch effect variables can be included in the regenie model. For quantitative traits, any covariates are regressed out of phenotypes and genotypes before fitting the model. For binary traits, we fit a null model with only covariates, and use predictions from that model as an offset when fitting the logistic regression model.\",\"title\":\"Covariates\"},{\"location\":\"overview/#step-2-single-variant-association-testing\",\"text\":\"In Step 2, a larger set of markers are tested for association with the trait (or traits). As with Step 1, these markers are also read in blocks of B markers, and tested for association. This avoids having to have all markers stored in memory at once.\",\"title\":\"Step 2 : Single-variant association testing\"},{\"location\":\"overview/#quantitative-traits\",\"text\":\"For quantitative traits, we use a linear regression model for association testing. Covariates are regressed out of the phenotypes and genetic markers. The LOCO predictions from Step 1 are removed from the phenotypes. Linear regression is then used to test association of the residualized phenotype and the genetic marker. Parallel linear algebra operations in the Eigen library are used where possible.\",\"title\":\"Quantitative traits\"},{\"location\":\"overview/#binary-traits\",\"text\":\"For binary traits, logistic regression score test is used to test association of the phenotype and the genetic marker. The logistic regression model includes the LOCO predictions from Step 1 as an offset . Covariates are included in the linear predictor in the usual way. When the case-control ratio is imbalanced, standard association tests don't control Type I error well at rare genetic markers. regenie has two options to handle this\",\"title\":\"Binary traits\"},{\"location\":\"overview/#firth-logistic-regression\",\"text\":\"Standard maximum likelihood estimates are generally biased. The Firth correction 2 removes much of the bias, and results in better calibrated test statistics. The correction involves adding a penalty term to the log-likelihood, \\\\widetilde{l}(\\\\theta) = l(\\\\theta) + {1 \\\\over 2} \\\\log I|\\\\theta| where the penalty term corresponds to the use of Jeffrey's Prior. This prior has the effect of shrinking the effect size towards zero. regenie uses a Firth correction when the p-value from the standard logistic regression test is below a threshold (default 0.05). It also includes a novel, accurate and fast approximate Firth correction which is ~60x faster than the exact Firth correction (see the option --firth ). The p-value reported in regenie is based on a likelihood ratio test (LRT), and we use the Hessian of the log-likelihood without the penalty term to estimate the standard error (SE). This may cause an issue in meta-analyses with rare variants, as the effect size estimate and SE may not match with the LRT p-value. Hence, we added an option --firth-se to report a SE computed instead from the effect size estimate and the LRT p-value.\",\"title\":\"Firth logistic regression\"},{\"location\":\"overview/#saddle-point-approxiation-spa-test\",\"text\":\"The SPA test approximates the null distribution of the test statistic by approximating the cumulant generating function of the test statistic, which involves all of the higher order moments 3 $^,$ 4 . This provides a better estimation of the tail probabilities compared to using standard asymptotic theory which relies on the normal approximation and uses only the first two moments of the dsitribution. A tail probability is obtained as \\\\begin{align*} P&(T < t_{\\\\text{obs}}) \\\\approx \\\\Phi(z), \\\\text{ where,}\\\\\\\\ z &= w + \\\\frac{1}{w}\\\\log{\\\\frac{v}{w}}\\\\\\\\ w &= \\\\text{sign}(\\\\delta^*)\\\\sqrt{ 2 [ t_{\\\\text{obs}}\\\\, \\\\delta^* - K(\\\\delta^*)}],\\\\, v = \\\\delta^*\\\\sqrt{K''(\\\\delta^*)} \\\\end{align*} and K(\\\\delta) is the cumulant generating function of the test statistic and \\\\delta^* is obtained by using a root-finding algorithm for K'(\\\\delta)=t_{\\\\text{obs}} . As this approximation has been found not to work very well for ultra-rare variants, a minimum minor allele count (MAC) is used to filter out these variants before testing (option --minMAC ).\",\"title\":\"Saddle point approxiation (SPA) test\"},{\"location\":\"overview/#step-2-gene-based-testing\",\"text\":\"Instead of performing single-variant association tests, multiple variants can be aggregated in a given region, such as a gene, using the following model g(\\\\mu) = w_1G_1\\\\beta_1 + \\\\dots + w_mG_m\\\\beta_m where G_i 's represent the single variants included in the test, w_i 's and \\\\beta_i 's are weights and effect sizes, respectively, for each variant, and g(.) is a link function for the phenotypic mean \\\\mu . We also denote by S_i the score statistics obtained from the single-variant tests . This can be especially helpful when testing rare variants as single-variant tests usually have lower power performance. To avoid inflation in the gene-based tests due to rare variants as well as reduce computation time, we have implemented the collapsing approach proposed in SAIGE-GENE+ 5 , where ultra-rare variants are aggregated into a mask. For highly imbalanced binary traits, SPA/Firth correction can be used to calibrate the test statistics in the gene-based tests as proposed in Zhao et al. (2020) 6 using --firth/--spa .\",\"title\":\"Step 2 : Gene-based testing\"},{\"location\":\"overview/#burden-tests\",\"text\":\"Burden tests, as defined in Lee et al. (2014) 7 , assume \\\\beta_i=\\\\beta\\\\; \\\\forall i , where \\\\beta is a fixed coefficient, which then leads to the test statistic Q_{BURDEN} = \\\\left(\\\\sum_i w_iS_i\\\\right)^2 These tests collapse variants into a single variable which is then tested for association with the phenotype. Hence, they are more powerful when variants have effects in the same direction and of similar magnitude. In regenie , multiple options are available to aggregate variants together into a burden mask beyond the linear combination above ( see here ). For example, the burden tests that were employed in Backman et al. (2021) 8 use the default strategy in regenie of collapsing variants by taking the maximum number of rare alleles across the sites.\",\"title\":\"Burden tests\"},{\"location\":\"overview/#variance-component-tests\",\"text\":\"Unlike burden tests, SKAT 9 assume the effect sizes $\\\\beta_i$ come from an arbitrary distribution with mean 0 and variance $\\\\tau^2$ which leads to the test statistic Q_{SKAT} = \\\\sum_i w_i^2S_i^2 Hence, SKAT can remain powerful when variant effects are in opposite directions. The omnibus test SKATO 10 combines the SKAT and burden tests as Q_{SKATO} = \\\\rho Q_{BURDEN} + (1-\\\\rho) Q_{SKAT} So setting $\\\\rho=0$ corresponds to SKAT and $\\\\rho=1$ to the burden test. In practice, the parameter $\\\\rho$ is chosen to maximize the power [ regenie uses a default grid of 8 values {$0, 0.1^2, 0.2^2, 0.3^2, 0.4^2, 0.5^2, 0.5, 1$} and set the weights $w_i = Beta(MAF_i,1,25)$]. To obtain the p-value from a linear combination of chi-squared variables, regenie uses Davies' exact method 11 by default. Following Wu et al (2016) 12 , regenie uses Kuonen's saddlepoint approximation method 13 when the Davies' p-value is below 1e-5 and if that fails, it uses Davies' method with more stringent convergence parameters (lim=1e5,acc=1e-9). The original SKATO method uses numerical integration when maximizing power across the various SKATO models that use different values for $\\\\rho$. We also implement a modification of SKATO, named SKATO-ACAT, which instead uses the Cauchy combination method 14 to combine the p-values from the different SKATO models.\",\"title\":\"Variance component tests\"},{\"location\":\"overview/#cauchy-combination-tests\",\"text\":\"The ACATV 15 test uses the Cauchy combination method ACAT to combine single variant p-values $p_i$ as Q_{ACATV} = \\\\sum_i \\\\widetilde{w}_i^2\\\\tan{\\\\{\\\\pi(0.5 - p_i)\\\\}} where we set $\\\\widetilde{w}_i = w_i \\\\sqrt{MAF(1-MAF)}$. This test is highly computationally tractable and is robust to correlation between the single variant tests. The omnibus test ACATO 15 combines ACATV with the SKAT and burden tests as Q_{ACATO} = \\\\frac{1}{3}\\\\tan{\\\\{\\\\pi(0.5 - p_{ACATV})\\\\}}+ \\\\frac{1}{3}\\\\tan{\\\\{\\\\pi(0.5 - p_{Burden})\\\\}}+ \\\\frac{1}{3}\\\\tan{\\\\{\\\\pi(0.5 - p_{SKAT})\\\\}} where unlike the original ACATO test, we only use one set of the weights $w_i$. Alternatively, we augment the test to include an extended set of SKATO models beyond SKAT and Burden (which correspond to $\\\\rho$ of 0 and 1 in SKATO respectively) and use the default SKATO grid of 8 values for $\\\\rho$.\",\"title\":\"Cauchy combination tests\"},{\"location\":\"overview/#sparse-burden-association-test\",\"text\":\"regenie can generate burden masks which are obtained by aggregating single variants using various annotation classes as well as allele frequency thresholds. The Sparse Burden Association Test (SBAT) 16 combines these burden masks in a joint model imposing constraints of same direction of effects \\\\mu = \\\\sum_{\\\\text{mask }i} M_i\\\\gamma_i where $M_i$ represent a burden mask and we solve \\\\underset{\\\\boldsymbol\\\\gamma}{\\\\min} || Y - \\\\sum_i M_i\\\\gamma_i||^2 \\\\text{ subject to } \\\\gamma_i \\\\ge 0 \\\\text{ for all } i The SBAT method tests the hypothesis $H_0: \\\\gamma_i=0$ for all $i$ vs. $H_1: \\\\gamma_i > 0$ for some $i$. By using this joint model, the SBAT test accounts for the correlation structure between the burden masks and with the non-negative constraints, it can lead to boost in power performance when multiple burden masks are causal and have concordant effects. This test has the nice property that it combines model selection of the masks (via the sparsity induced by the non-negative assumption) with model inference (it is well calibrated and powerful).\",\"title\":\"Sparse Burden Association Test\"},{\"location\":\"overview/#gene_p\",\"text\":\"As the different gene-based tests in REGENIE can be more powerful under different genetic architectures, we propose a unified strategy, named GENE_P, that combines the strengths of these tests. It uses ACAT to combine the p-values of the SKATO, ACATV, Burden and SBAT tests and obtain an overall assessment of significance for a genetic region (e.g. gene). The diagram below illustrates the GENE_P test using 4 masks (i.e. combination of variant annotations) and 3 allele frequency cutoffs when performing gene-based tests.\",\"title\":\"GENE_P\"},{\"location\":\"overview/#step-2-interaction-testing\",\"text\":\"The GxE tests are of the form g(\\\\mu) = E\\\\alpha + G\\\\beta + (G\\\\odot E)\\\\gamma where $E$ is an environmental risk factor and $G$ is a marker of interest, and $\\\\odot$ represents the Haddamard (entry-wise) product of the two. The last term in the model allows for the variant to have different effects across values of the risk factor. Note: if $E$ is categorical, we use a dummy variable for each level of $E$ in the model above. We can look at the following hypotheses: $H_0: \\\\beta = 0$ given $\\\\gamma = 0$, which is a marginal test for the SNP $H_0: \\\\beta = 0$, which is a test for the main effect of the SNP in the full model $H_0: \\\\gamma = 0$, which is a test for interaction $H_0: \\\\beta = \\\\gamma = 0$, which tests both main and interaction effects for the SNP Misspecification of the model above, such as in the presence of heteroskedasticity, or the presence of high case-control imbalance can lead to inflation in the tests. Robust (sandwich) standard error (SE) estimators 17 can be used to adress model misspecification however, they can suffer from inflation when testing rare variants or in the presence of high case-control imbalance 18 $^,$ 19 . In regenie , we use a hybrid approach which combines: Wald test with sandwich estimators Wald test with heteroskedastic linear models (for quantitative traits) LRT with penalized Firth regression (for binary traits) For quantitative traits, we use the sandwich estimators HC3 to perform a Wald test for variants whose minor allele count (MAC) is above 1000 (see --rare-mac ). For the remaining variants, we fit a heteroskedastic linear model (HLM) 20 Y = E\\\\alpha + E^2\\\\zeta + G\\\\beta + (G\\\\odot E)\\\\gamma + \\\\epsilon where we assume $\\\\epsilon \\\\sim N(0, D)$ where $D$ is a diagonal matrix with entries $\\\\sigma^2\\\\exp{(1 + E\\\\theta_1 + E^2\\\\theta_2)}$. This formulation allows for the phenotypic variance to also depend on the risk factor $E$. By incorporating both the linear and quadratic effect of $E$ in the mean and variance of $Y$, this model provides robustness to heteroskedasticity ( Note: the $E^2$ terms are only added when $E$ is quantitative ). Wald tests are then performed for the null hypotheses listed above. For binary traits, we consider the following interaction model \\\\text{logit}(\\\\mu) = E\\\\alpha + E^2\\\\zeta + G\\\\beta + (G\\\\odot E)\\\\gamma where we also include a non-linear effect for $E$ (not if categorical). The sandwich estimator HC3 is used in a Wald test for variants whose MAC is above 1000 (see --rare-mac ) otherwise the model-based standard errors are used. When Firth is specified, we only apply the Firth correction using LRT if the p-value for the interaction term $\\\\gamma$ from the Wald test is below a specified threshold (see --pThresh ). So the added $E^2$ term as well as the use of the Firth penalty help with case-control imbalance and model misspecification for the effect of $E$ on the phenotype.\",\"title\":\"Step 2 : Interaction testing\"},{\"location\":\"overview/#survival-analysis\",\"text\":\"Starting with regenie v4.0 , we have enabled survival analysis, improving the power for analyzing common diseases where time-to-event data is available by leveraging the Cox Proportional Harzard model. We assume that samples without an event are right-censored, i.e. the survival time is only known to be greater than a certain value. It is important to encode this information correctly into the phenotypes .\",\"title\":\"Survival analysis\"},{\"location\":\"overview/#step-1-whole-genome-model-using-cox-ridge-regression\",\"text\":\"In step 1, Level 0 is run using linear ridge regression with the time variable taken as the response. In Level 1, instead of linear/logistic ridge regression, we use Cox Ridge regression 21 to combine the predictions $W$ from Level 0. \\\\lambda_i(t) = \\\\lambda_0(t) \\\\exp(\\\\mu_i + w_i^\\\\intercal \\\\alpha) where $\\\\lambda_0(t)$ is the baseline hazard function, and, for the $i$-th individual, $\\\\lambda_i(t)$ is the hazard function, $w_i$ is the set of ridge predictors from Level 0, and $\\\\mu_i$ captures the effects of non-genetic covariates. We fit the cox ridge regression for a range of shrinkage parameters and select the best value using a K-fold cross validation scheme. With the estimated $\\\\hat{\\\\alpha}$, we construct LOCO predictions which capture population structure, relatedness and polygenicity.\",\"title\":\"Step 1: Whole genome model using cox ridge regression\"},{\"location\":\"overview/#step-2-single-variant-and-gene-based-burden-tests\",\"text\":\"For time-to-event traits, the cox proportional hazard regression model is used to test the association between the phenotype and the genetic marker. Note : the only supported gene-based test is the burden test. The cox proportional hazard regression model includes the LOCO predictions from Step 1 as an offset. \\\\lambda_i(t) = \\\\lambda_0(t) \\\\exp(\\\\mu_i + w_{i, LOCO} + g_i \\\\beta) We test the null hypothesis $H_0: \\\\beta = 0$ using a score test. When the event rate is low, the standard score test doesn't control Type I error well at rare genetic markers. To reduce the bias and achieve a more robust test, regenie uses Firth correction 22 when the p-value from the standard score test is below a threshold (default 0.05). The firth correction provides a well-calibrated test, but comes with a computational cost. To mitigate this burden in Cox regression, we include a fast approximate test, which gives results very similar to the exact Firth test.\",\"title\":\"Step 2: Single variant and gene-based burden tests\"},{\"location\":\"overview/#missing-phenotype-data\",\"text\":\"With QTs, missing values are mean-imputed in Step 1 and they are dropped when testing each phenotype in Step 2 (unless using --force-impute ). With BTs, missing values are mean-imputed in Step 1 when fitting the level 0 linear ridge regression and they are dropped when fitting the level 1 logistic ridge regression for each trait. In Step 2, missing values are dropped when testing each trait. To remove all samples that have missing values at any of the P phenotypes from the analysis, use option --strict in step 1 and 2. This can also be used when analyzing a single trait to only keep individuals with complete data by setting the phenotype values of individuals to remove to NA. Note: imputation is only applied to phenotypes; covariates are not allowed to have missing data.\",\"title\":\"Missing Phenotype data\"},{\"location\":\"overview/#references\",\"text\":\"Breiman, L. Stacked regressions. Machine learning 24 , 49\\u201364 (1996). \\u21a9 Firth, D. Bias reduction of maximum likelihood estimates. Biometrika 80 , 27\\u201338 (1993). \\u21a9 Butler, R. W. Saddlepoint approximations with applications . (Cambridge University Press, 2007). \\u21a9 Dey, R., Schmidt, E. M., Abecasis, G. R. & Lee, S. A fast and accurate algorithm to test for binary phenotypes and its application to PheWAS. The American Journal of Human Genetics 101 , 37\\u201349 (2017). \\u21a9 Zhou, W. et al. Set-based rare variant association tests for biobank scale sequencing data sets. medRxiv (2021). \\u21a9 Zhao, Z. et al. UK biobank whole-exome sequence binary phenome analysis with robust region-based rare-variant test. Am J Hum Genet 106 , 3\\u201312 (2020). \\u21a9 Lee, S., Abecasis, G. R., Boehnke, M. & Lin, X. Rare-variant association analysis: Study designs and statistical tests. Am J Hum Genet 95 , 5\\u201323 (2014). \\u21a9 Backman, J. D. et al. Exome sequencing and analysis of 454,787 UK biobank participants. Nature 599 , 628\\u2013634 (2021). \\u21a9 Wu, M. C. et al. Rare-variant association testing for sequencing data with the sequence kernel association test. Am J Hum Genet 89 , 82\\u201393 (2011). \\u21a9 Lee, S., Wu, M. C. & Lin, X. Optimal tests for rare variant effects in sequencing association studies. Biostatistics 13 , 762\\u201375 (2012). \\u21a9 Davies, R. B. Algorithm AS 155: The distribution of a linear combination of \\u03c7 2 random variables. Applied Statistics 29 , 323\\u2013333 (1980). \\u21a9 Wu, B., Guan, W. & Pankow, J. S. On efficient and accurate calculation of significance p-values for sequence kernel association testing of variant set. Ann Hum Genet 80 , 123\\u201335 (2016). \\u21a9 Kuonen, D. Miscellanea. Saddlepoint approximations for distributions of quadratic forms in normal variables. Biometrika 86 , 929\\u2013935 (1999). \\u21a9 Liu, Y. & Xie, J. Cauchy combination test: A powerful test with analytic p-value calculation under arbitrary dependency structures. J Am Stat Assoc 115 , 393\\u2013402 (2020). \\u21a9 Liu, Y. et al. ACAT: A fast and powerful p value combination method for rare-variant analysis in sequencing studies. Am J Hum Genet 104 , 410\\u2013421 (2019). \\u21a9 \\u21a9 Ziyatdinov, A. et al. Joint testing of rare variant burden scores using non-negative least squares. The American Journal of Human Genetics 111 , 2139\\u20132149 (2024). \\u21a9 MacKinnon, J. G. & White, H. Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties. Journal of Econometrics 29 , 305\\u2013325 (1985). \\u21a9 Tchetgen Tchetgen, E. J. & Kraft, P. On the robustness of tests of genetic associations incorporating gene-environment interaction when the environmental exposure is misspecified. Epidemiology 22 , 257\\u201361 (2011). \\u21a9 Voorman, A., Lumley, T., McKnight, B. & Rice, K. Behavior of QQ-plots and genomic control in studies of gene-environment interaction. PLoS One 6 , (2011). \\u21a9 Young, A. I., Wauthier, F. L. & Donnelly, P. Identifying loci affecting trait variability and detecting interactions in genome-wide association studies. Nat Genet 50 , 1608\\u20131614 (2018). \\u21a9 Simon, N., Friedman, J. H., Hastie, T. & Tibshirani, R. Regularization paths for cox\\u2019s proportional hazards model via coordinate descent. Journal of statistical software 39 , 1\\u201313 (2011). \\u21a9 Heinze, G. & Schemper, M. A solution to the problem of monotone likelihood in cox regression. Biometrics 57 , 114\\u2013119 (2001). \\u21a9\",\"title\":\"References\"},{\"location\":\"performance/\",\"text\":\"Performance We assessed the performance of regenie against 3 other programs for GWAS on large cohorts. BOLT-LMM Loh et al. (2015) Nature Genetics 47, 284\\u2013290 [Software] SAIGE - Zhou et al. (2018) Nature Genetics 50, 1335\\u20131341 [Software] fastGWA - Jiang et al. (2019) Nature Genetics 51, 1749\\u20131755 [Software] Full details for all the runs are available in our paper . Quantitative traits We ran regenie , BOLT-LMM and fastGWA on 3 quantitative phenotypes measured on white British UK Biobank participants (LDL, N=389,189; Body mass index [BMI], N=407,609; and Bilirubin, N=388,303) where testing was performed on 9.8 million imputed SNPs. The Manhattan plots for all three phenotypes (see below) show good agreement between the methods with both regenie and BOLT-LMM resulting in stronger association signals relative to fastGWA at known peaks of association (note that in the plots, the scaling of the y-axis changes above the upper dashed line). We assessed the computational requirements of all three methods using a larger set of 50 quantitative traits from the UK Biobank, looking at computational timings as well as memory usage. For regenie and BOLT LMM, 469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) and for fastGWA, these SNPs were used to compute the sparse GRM (timing not included). Tests were performed on 11.4M imputed SNPs (step 2). From the table above, regenie was 151x faster than BOLT-LMM in elapsed time for Step 1 and 11.5x faster for Step 2, which translated into $>$30x overall speed-up in terms of elapsed time. In addition, regenie had a maximum memory usage of 12.9 GB, which is mostly due to regenie only reading a small portion of the genotype data at a time, whereas BOLT-LMM required 50GB. regenie was 2.8x faster than fastGWA, but fastGWA is very memory efficient and used only a maximum of 2GB. Binary traits regenie was compared to BOLT-LMM and SAIGE on a set of four binary traits measured on white British UK Biobank participants (coronary artery disease [CAD], N=352,063, case-control ratio=1:11; glaucoma, N=406,927, case-control ratio=1:52; colorectal cancer, N=407,746, case-control ratio=1:97; and thyroid cancer, N=407,746, case-control ratio=1:660) and Step 2 testing was performed on 11.6 million imputed SNPs. A novel and fast approximate Firth correction was used in regenie as well as a SPA correction. As seen in the Manhattan plots below (note that the scaling of the y-axis changes above the upper dashed line), all four approaches show very good agreement for the most balanced trait (CAD; case-control ratio=1:11), but as the fraction of cases decreases BOLT-LMM tends to give inflated test statistics. However both regenie with Firth and SPA corrections, as well as SAIGE, which uses SPA correction, are all robust to this inflation and show similar agreement for the associations detected. We assessed the computational requirements of regenie and SAIGE using a larger set of 50 binary traits from the UK Biobank that have a range of different case-control ratios and distinct missing data patterns. 469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) and tests were performed on 11.4M imputed SNPs (step 2). In step 1, regenie was run using LOOCV and for two traits SAIGE did not finish as it took longer than the 4-week limit. In step 2, the approximate Firth correction was used in regenie in addition to SPA correction. From the table above, Step 1 of regenie was about 350x faster and required only $40\\\\%$ of the memory used by SAIGE. In Step 2, regenie Firth and SPA were 2x and 3x faster than SAIGE in CPU time, respectively, but were 21x and 34x faster than SAIGE in elapsed time, respectively, which suggests that regenie makes better use of parallelization in this step. Overall, regenie using Firth correction was 8x faster than SAIGE in CPU hours and 26.8x faster in elapsed time. All runs above were done on the same computing environment (16 virtual CPU cores of a 2.1GHz AMD EPYC 7571 processor, 64GB of memory, and 600GB solid-state disk). Timings improvements in v2.2 We have several changes in regenie v2.2 to improve the computational efficiency: The genotype file reading in Step 1 is now multi-threaded for all supported formats (i.e. BED, PGEN, and BGEN) and uses a faster file reading implementation for BGEN v1.2 format with 8-bit encoding. From our timings experiments below, these changes helped reduce the CPU time by 40-60% depending on the input format. Note that we used a small number of SNPs for Step 1 in our experiments (20K) so the timing improvement will not be as high in a real Step 1 run where ~500K SNPs would be used. We have improved the implementation of the score tests for binary traits to reduce the number of matrix operations performed and this reduced the CPU timings by ~60% from the previous version 2.0.2. Note that there is an added memory cost of ~8NKP bytes [N=#samples; K=#covariates;P=#samples] so ~800MB extra for a UKB 500K run with 10 traits & 20 covariates. We have also made use of the sparsity of the genotype vector for rarer variants in Step 2 (more so with binary traits) and this reduced the timing in our experiments by ~20% on average. In our experiments, common variants are defined as having MAF > 5% and rare variants are defined as having MAF < 1% and no correction (i.e. Firth/SPA) is used. We have added new options --write-null-firth and --use-null-firth to reduce the timing of Step 2 with approximate Firth when ran in parallel jobs split in smaller chunks within chromosomes. More specifically, --write-null-firth can be used in Step 1 to fit the null model for approximate Firth test and store the resulting estimates to file. Then in Step 2, specifying --use-null-firth will re-use these parameter estimates to reduce the timing of the approximate Firth null model fitting. We thank Juha Karjalainen for suggesting this feature. Note: in our timings experiments, the PGEN genotype file only includes hard-calls. We ran a single trait in regenie and each setting was replicated 5 times. Gene-based testing regenie v3.0 adds in a wide range of gene-based tests . We have performed simulation experiments to assess the calibration of the tests with quantitative and binary traits using real genetic data from the UK Biobank where we randomly selected 100,000 samples obtained from the set of white British participants (see the \\\"Methods\\\" section of the Regenie paper for details on phenotype simulation where we set the heritability to 20%). Using whole exome sequencing data, we constructed variant sets incorporating functional annotations (LoF and missense, where missense vairants were predicted as deleterious using a score based on 5 in-silico algorithms), as well as allele frequency thresholds focusing on rarer variation (1%, 0.1% and 0.01%). The SKAT/ACAT tests were applied only to variant sets using a 1% or 0.01% AAF threshold and SBAT and BURDEN-ACAT joint tests combined all burden mask signals from the 1%, 0.1%, 0.01% and singleton thresholds. 1000 genes on even chromosomes were randonly selected and tested for association (causal variants were on odd chromosomes). The QQ plots below show the distribution p-values for each test across the different annotation categories (ran in Regenie v3.2). Quantitative traits Using a 1% allele frequency cutoff for the SKAT/ACAT tests. Binary traits We simulated highly imbalanced phenotypes with a disease prevalence of 1% (case-control ratio of 1:99) and applied Firth/SPA correction to the tests. Using a 1% allele frequency cutoff for the SKAT/ACAT tests. Using a 0.01% allele frequency cutoff for the SKAT/ACAT tests.\",\"title\":\"Performance\"},{\"location\":\"performance/#performance\",\"text\":\"We assessed the performance of regenie against 3 other programs for GWAS on large cohorts. BOLT-LMM Loh et al. (2015) Nature Genetics 47, 284\\u2013290 [Software] SAIGE - Zhou et al. (2018) Nature Genetics 50, 1335\\u20131341 [Software] fastGWA - Jiang et al. (2019) Nature Genetics 51, 1749\\u20131755 [Software] Full details for all the runs are available in our paper .\",\"title\":\"Performance\"},{\"location\":\"performance/#quantitative-traits\",\"text\":\"We ran regenie , BOLT-LMM and fastGWA on 3 quantitative phenotypes measured on white British UK Biobank participants (LDL, N=389,189; Body mass index [BMI], N=407,609; and Bilirubin, N=388,303) where testing was performed on 9.8 million imputed SNPs. The Manhattan plots for all three phenotypes (see below) show good agreement between the methods with both regenie and BOLT-LMM resulting in stronger association signals relative to fastGWA at known peaks of association (note that in the plots, the scaling of the y-axis changes above the upper dashed line). We assessed the computational requirements of all three methods using a larger set of 50 quantitative traits from the UK Biobank, looking at computational timings as well as memory usage. For regenie and BOLT LMM, 469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) and for fastGWA, these SNPs were used to compute the sparse GRM (timing not included). Tests were performed on 11.4M imputed SNPs (step 2). From the table above, regenie was 151x faster than BOLT-LMM in elapsed time for Step 1 and 11.5x faster for Step 2, which translated into $>$30x overall speed-up in terms of elapsed time. In addition, regenie had a maximum memory usage of 12.9 GB, which is mostly due to regenie only reading a small portion of the genotype data at a time, whereas BOLT-LMM required 50GB. regenie was 2.8x faster than fastGWA, but fastGWA is very memory efficient and used only a maximum of 2GB.\",\"title\":\"Quantitative traits\"},{\"location\":\"performance/#binary-traits\",\"text\":\"regenie was compared to BOLT-LMM and SAIGE on a set of four binary traits measured on white British UK Biobank participants (coronary artery disease [CAD], N=352,063, case-control ratio=1:11; glaucoma, N=406,927, case-control ratio=1:52; colorectal cancer, N=407,746, case-control ratio=1:97; and thyroid cancer, N=407,746, case-control ratio=1:660) and Step 2 testing was performed on 11.6 million imputed SNPs. A novel and fast approximate Firth correction was used in regenie as well as a SPA correction. As seen in the Manhattan plots below (note that the scaling of the y-axis changes above the upper dashed line), all four approaches show very good agreement for the most balanced trait (CAD; case-control ratio=1:11), but as the fraction of cases decreases BOLT-LMM tends to give inflated test statistics. However both regenie with Firth and SPA corrections, as well as SAIGE, which uses SPA correction, are all robust to this inflation and show similar agreement for the associations detected. We assessed the computational requirements of regenie and SAIGE using a larger set of 50 binary traits from the UK Biobank that have a range of different case-control ratios and distinct missing data patterns. 469,336 LD-pruned SNPs were used as model SNPs when fitting the null model (step 1) and tests were performed on 11.4M imputed SNPs (step 2). In step 1, regenie was run using LOOCV and for two traits SAIGE did not finish as it took longer than the 4-week limit. In step 2, the approximate Firth correction was used in regenie in addition to SPA correction. From the table above, Step 1 of regenie was about 350x faster and required only $40\\\\%$ of the memory used by SAIGE. In Step 2, regenie Firth and SPA were 2x and 3x faster than SAIGE in CPU time, respectively, but were 21x and 34x faster than SAIGE in elapsed time, respectively, which suggests that regenie makes better use of parallelization in this step. Overall, regenie using Firth correction was 8x faster than SAIGE in CPU hours and 26.8x faster in elapsed time. All runs above were done on the same computing environment (16 virtual CPU cores of a 2.1GHz AMD EPYC 7571 processor, 64GB of memory, and 600GB solid-state disk).\",\"title\":\"Binary traits\"},{\"location\":\"performance/#timings-improvements-in-v22\",\"text\":\"We have several changes in regenie v2.2 to improve the computational efficiency: The genotype file reading in Step 1 is now multi-threaded for all supported formats (i.e. BED, PGEN, and BGEN) and uses a faster file reading implementation for BGEN v1.2 format with 8-bit encoding. From our timings experiments below, these changes helped reduce the CPU time by 40-60% depending on the input format. Note that we used a small number of SNPs for Step 1 in our experiments (20K) so the timing improvement will not be as high in a real Step 1 run where ~500K SNPs would be used. We have improved the implementation of the score tests for binary traits to reduce the number of matrix operations performed and this reduced the CPU timings by ~60% from the previous version 2.0.2. Note that there is an added memory cost of ~8NKP bytes [N=#samples; K=#covariates;P=#samples] so ~800MB extra for a UKB 500K run with 10 traits & 20 covariates. We have also made use of the sparsity of the genotype vector for rarer variants in Step 2 (more so with binary traits) and this reduced the timing in our experiments by ~20% on average. In our experiments, common variants are defined as having MAF > 5% and rare variants are defined as having MAF < 1% and no correction (i.e. Firth/SPA) is used. We have added new options --write-null-firth and --use-null-firth to reduce the timing of Step 2 with approximate Firth when ran in parallel jobs split in smaller chunks within chromosomes. More specifically, --write-null-firth can be used in Step 1 to fit the null model for approximate Firth test and store the resulting estimates to file. Then in Step 2, specifying --use-null-firth will re-use these parameter estimates to reduce the timing of the approximate Firth null model fitting. We thank Juha Karjalainen for suggesting this feature. Note: in our timings experiments, the PGEN genotype file only includes hard-calls. We ran a single trait in regenie and each setting was replicated 5 times.\",\"title\":\"Timings improvements in v2.2\"},{\"location\":\"performance/#gene-based-testing\",\"text\":\"regenie v3.0 adds in a wide range of gene-based tests . We have performed simulation experiments to assess the calibration of the tests with quantitative and binary traits using real genetic data from the UK Biobank where we randomly selected 100,000 samples obtained from the set of white British participants (see the \\\"Methods\\\" section of the Regenie paper for details on phenotype simulation where we set the heritability to 20%). Using whole exome sequencing data, we constructed variant sets incorporating functional annotations (LoF and missense, where missense vairants were predicted as deleterious using a score based on 5 in-silico algorithms), as well as allele frequency thresholds focusing on rarer variation (1%, 0.1% and 0.01%). The SKAT/ACAT tests were applied only to variant sets using a 1% or 0.01% AAF threshold and SBAT and BURDEN-ACAT joint tests combined all burden mask signals from the 1%, 0.1%, 0.01% and singleton thresholds. 1000 genes on even chromosomes were randonly selected and tested for association (causal variants were on odd chromosomes). The QQ plots below show the distribution p-values for each test across the different annotation categories (ran in Regenie v3.2).\",\"title\":\"Gene-based testing\"},{\"location\":\"performance/#quantitative-traits_1\",\"text\":\"Using a 1% allele frequency cutoff for the SKAT/ACAT tests.\",\"title\":\"Quantitative traits\"},{\"location\":\"performance/#binary-traits_1\",\"text\":\"We simulated highly imbalanced phenotypes with a disease prevalence of 1% (case-control ratio of 1:99) and applied Firth/SPA correction to the tests. Using a 1% allele frequency cutoff for the SKAT/ACAT tests. Using a 0.01% allele frequency cutoff for the SKAT/ACAT tests.\",\"title\":\"Binary traits\"},{\"location\":\"recommendations/\",\"text\":\"Recommendations for UK Biobank analysis regenie is ideally suited for large-scale analyses such as 500K UK Biobank (UKBB) data, where records are available for thousands of phenotypes. We provide below a few guidelines on how to perform such analysis on the UKBB files that all UKBB approved researchers have access to. Pre-processing We will first go over important steps to consider before running regenie . Selection of traits regenie can perform whole genome regression on multiple traits at once, which is where higher computational gains are obtained. As different traits can have distinct missing patterns, regenie uses an imputation scheme to handle missing data. From the real data applications we have performed so far with traits having up to ~20% (for quantitative) and ~5% (for binary) missing observations, our imputation scheme resulted in nearly identical results as from discarding missing observations when analyzing each trait separately (see the paper for details). Hence, we recommend to analyze traits in groups that have similar missingness patterns with resonably low amount of missingness (<15%). The number of phenotypes in a group will affect the computational resources required and the table below shows typical computational requirements based on using 500,000 markers in step 1 split in blocks of 1000 and using blocks of size 200 when testing SNPs in step 2. The estimates are shown when step 1 of regenie is run in low-memory mode so that within-block predictions are temporarily stored on disk (see Documentation). In the following sections, we'll assume traits (let's say binary) and covariates used in the analysis have been chosen and data are in files ukb_phenotypes_BT.txt and ukb_covariates.txt , which follow the format requirement for regenie (see Documentation). Preparing genotype file Step 1 of a regenie run requires a single genotype file as input; we recommend using array genotypes for this step. The UKBB genotype files are split by chromosome, so we recommend using PLINK to merge the files using the following code. NOTE : please change XXX to you own UKBB application ID number rm -f list_beds.txt for chr in {2..22}; do echo \\\"ukb_cal_chr${chr}_v2.bed ukb_snp_chr${chr}_v2.bim ukbXXX_int_chr1_v2_s488373.fam\\\" >> list_beds.txt; done plink \\\\ --bed ukb_cal_chr1_v2.bed \\\\ --bim ukb_snp_chr1_v2.bim \\\\ --fam ukbXXX_int_chr1_v2_s488373.fam \\\\ --merge-list list_beds.txt \\\\ --make-bed --out ukb_cal_allChrs Exclusion files Quality control (QC) filters can be applied using PLINK2 to filter out samples and markers in the genotype file prior to step 1 of regenie . Note: regenie will throw an error if a low-variance SNP is included in the step 1 run. Hence, the user should run adequate QC filtering prior to running regenie to identify and remove such SNPs. For example, to filter out SNPs with minor allele frequency (MAF) below 1%, minor allele count (MAC) below 100, genotype missingess above 10% and Hardy-Weinberg equilibrium p-value exceeding 10^{-15} , and samples with more than 10% missingness, plink2 \\\\ --bfile ukb_cal_allChrs \\\\ --maf 0.01 --mac 100 --geno 0.1 --hwe 1e-15 \\\\ --mind 0.1 \\\\ --write-snplist --write-samples --no-id-header \\\\ --out qc_pass Step 1 We recommend to run regenie using multi-threading (8+ threads) which will decrease the overall runtime of the program. As this step can be quite memory intensive (due to storing block predictions), we recommend to use option --lowmem , where the number of phenotypes analyzed will determine how much disk space is required (see table above). Running step 1 of regenie (by default, all available threads are used) ./regenie \\\\ --step 1 \\\\ --bed ukb_cal_allChrs \\\\ --extract qc_pass.snplist \\\\ --keep qc_pass.id \\\\ --phenoFile ukb_phenotypes_BT.txt \\\\ --covarFile ukb_covariates.txt \\\\ --bt \\\\ --bsize 1000 \\\\ --lowmem \\\\ --lowmem-prefix tmpdir/regenie_tmp_preds \\\\ --out ukb_step1_BT For P phenotypes analyzed, this will generate a set of $P$ files ending with .loco which contain the genetic predictions using a LOCO scheme that will be needed for step 2, as well as a prediction list file ukb_step1_BT_pred.list , which lists the names of these predictions files and can be used as input for step 2. Step 2 As step 1 and 2 are completely decoupled in regenie , you could either use all the traits for testing in step 2 or select a subset of the traits to perform association testing. Furthermore, you can use the same Step 1 output to test on array, exome or imputed variants; below, we will illustrate testing on imputed variants. Step 2 of regenie can be run in parallel across chromosomes so if you have access to multiple machines, we recommend to split the runs over chromosomes (using 8+ threads). Running regenie tesing on a single chromosome (here chromosome 1) and using the fast Firth correction as fallback for p-values below 0.01 ./regenie \\\\ --step 2 \\\\ --bgen ukb_imp_chr1_v3.bgen \\\\ --ref-first \\\\ --sample ukbXXX_imp_chr1_v3_s487395.sample \\\\ --phenoFile ukb_phenotypes_BT.txt \\\\ --covarFile ukb_covariates.txt \\\\ --bt \\\\ --firth --approx --pThresh 0.01 \\\\ --pred ukb_step1_BT_pred.list \\\\ --bsize 400 \\\\ --split \\\\ --out ukb_step2_BT_chr1 This will create separate association results files for each phenotype as ukb_step2_BT_chr1_*.regenie . When running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and instead parallelize the runs over partitions of the genome (e.g. groups of genes).\",\"title\":\"UKBB Analysis\"},{\"location\":\"recommendations/#recommendations-for-uk-biobank-analysis\",\"text\":\"regenie is ideally suited for large-scale analyses such as 500K UK Biobank (UKBB) data, where records are available for thousands of phenotypes. We provide below a few guidelines on how to perform such analysis on the UKBB files that all UKBB approved researchers have access to.\",\"title\":\"Recommendations for UK Biobank analysis\"},{\"location\":\"recommendations/#pre-processing\",\"text\":\"We will first go over important steps to consider before running regenie .\",\"title\":\"Pre-processing\"},{\"location\":\"recommendations/#selection-of-traits\",\"text\":\"regenie can perform whole genome regression on multiple traits at once, which is where higher computational gains are obtained. As different traits can have distinct missing patterns, regenie uses an imputation scheme to handle missing data. From the real data applications we have performed so far with traits having up to ~20% (for quantitative) and ~5% (for binary) missing observations, our imputation scheme resulted in nearly identical results as from discarding missing observations when analyzing each trait separately (see the paper for details). Hence, we recommend to analyze traits in groups that have similar missingness patterns with resonably low amount of missingness (<15%). The number of phenotypes in a group will affect the computational resources required and the table below shows typical computational requirements based on using 500,000 markers in step 1 split in blocks of 1000 and using blocks of size 200 when testing SNPs in step 2. The estimates are shown when step 1 of regenie is run in low-memory mode so that within-block predictions are temporarily stored on disk (see Documentation). In the following sections, we'll assume traits (let's say binary) and covariates used in the analysis have been chosen and data are in files ukb_phenotypes_BT.txt and ukb_covariates.txt , which follow the format requirement for regenie (see Documentation).\",\"title\":\"Selection of traits\"},{\"location\":\"recommendations/#preparing-genotype-file\",\"text\":\"Step 1 of a regenie run requires a single genotype file as input; we recommend using array genotypes for this step. The UKBB genotype files are split by chromosome, so we recommend using PLINK to merge the files using the following code. NOTE : please change XXX to you own UKBB application ID number rm -f list_beds.txt for chr in {2..22}; do echo \\\"ukb_cal_chr${chr}_v2.bed ukb_snp_chr${chr}_v2.bim ukbXXX_int_chr1_v2_s488373.fam\\\" >> list_beds.txt; done plink \\\\ --bed ukb_cal_chr1_v2.bed \\\\ --bim ukb_snp_chr1_v2.bim \\\\ --fam ukbXXX_int_chr1_v2_s488373.fam \\\\ --merge-list list_beds.txt \\\\ --make-bed --out ukb_cal_allChrs\",\"title\":\"Preparing genotype file\"},{\"location\":\"recommendations/#exclusion-files\",\"text\":\"Quality control (QC) filters can be applied using PLINK2 to filter out samples and markers in the genotype file prior to step 1 of regenie . Note: regenie will throw an error if a low-variance SNP is included in the step 1 run. Hence, the user should run adequate QC filtering prior to running regenie to identify and remove such SNPs. For example, to filter out SNPs with minor allele frequency (MAF) below 1%, minor allele count (MAC) below 100, genotype missingess above 10% and Hardy-Weinberg equilibrium p-value exceeding 10^{-15} , and samples with more than 10% missingness, plink2 \\\\ --bfile ukb_cal_allChrs \\\\ --maf 0.01 --mac 100 --geno 0.1 --hwe 1e-15 \\\\ --mind 0.1 \\\\ --write-snplist --write-samples --no-id-header \\\\ --out qc_pass\",\"title\":\"Exclusion files\"},{\"location\":\"recommendations/#step-1\",\"text\":\"We recommend to run regenie using multi-threading (8+ threads) which will decrease the overall runtime of the program. As this step can be quite memory intensive (due to storing block predictions), we recommend to use option --lowmem , where the number of phenotypes analyzed will determine how much disk space is required (see table above). Running step 1 of regenie (by default, all available threads are used) ./regenie \\\\ --step 1 \\\\ --bed ukb_cal_allChrs \\\\ --extract qc_pass.snplist \\\\ --keep qc_pass.id \\\\ --phenoFile ukb_phenotypes_BT.txt \\\\ --covarFile ukb_covariates.txt \\\\ --bt \\\\ --bsize 1000 \\\\ --lowmem \\\\ --lowmem-prefix tmpdir/regenie_tmp_preds \\\\ --out ukb_step1_BT For P phenotypes analyzed, this will generate a set of $P$ files ending with .loco which contain the genetic predictions using a LOCO scheme that will be needed for step 2, as well as a prediction list file ukb_step1_BT_pred.list , which lists the names of these predictions files and can be used as input for step 2.\",\"title\":\"Step 1\"},{\"location\":\"recommendations/#step-2\",\"text\":\"As step 1 and 2 are completely decoupled in regenie , you could either use all the traits for testing in step 2 or select a subset of the traits to perform association testing. Furthermore, you can use the same Step 1 output to test on array, exome or imputed variants; below, we will illustrate testing on imputed variants. Step 2 of regenie can be run in parallel across chromosomes so if you have access to multiple machines, we recommend to split the runs over chromosomes (using 8+ threads). Running regenie tesing on a single chromosome (here chromosome 1) and using the fast Firth correction as fallback for p-values below 0.01 ./regenie \\\\ --step 2 \\\\ --bgen ukb_imp_chr1_v3.bgen \\\\ --ref-first \\\\ --sample ukbXXX_imp_chr1_v3_s487395.sample \\\\ --phenoFile ukb_phenotypes_BT.txt \\\\ --covarFile ukb_covariates.txt \\\\ --bt \\\\ --firth --approx --pThresh 0.01 \\\\ --pred ukb_step1_BT_pred.list \\\\ --bsize 400 \\\\ --split \\\\ --out ukb_step2_BT_chr1 This will create separate association results files for each phenotype as ukb_step2_BT_chr1_*.regenie . When running the SKAT/ACAT gene-based tests, we recommend to use at most 2 threads and instead parallelize the runs over partitions of the genome (e.g. groups of genes).\",\"title\":\"Step 2\"}]}"
  },
  {
    "path": "docs/site/search/worker.js",
    "content": "var base_path = 'function' === typeof importScripts ? '.' : '/search/';\nvar allowSearch = false;\nvar index;\nvar documents = {};\nvar lang = ['en'];\nvar data;\n\nfunction getScript(script, callback) {\n  console.log('Loading script: ' + script);\n  $.getScript(base_path + script).done(function () {\n    callback();\n  }).fail(function (jqxhr, settings, exception) {\n    console.log('Error: ' + exception);\n  });\n}\n\nfunction getScriptsInOrder(scripts, callback) {\n  if (scripts.length === 0) {\n    callback();\n    return;\n  }\n  getScript(scripts[0], function() {\n    getScriptsInOrder(scripts.slice(1), callback);\n  });\n}\n\nfunction loadScripts(urls, callback) {\n  if( 'function' === typeof importScripts ) {\n    importScripts.apply(null, urls);\n    callback();\n  } else {\n    getScriptsInOrder(urls, callback);\n  }\n}\n\nfunction onJSONLoaded () {\n  data = JSON.parse(this.responseText);\n  var scriptsToLoad = ['lunr.js'];\n  if (data.config && data.config.lang && data.config.lang.length) {\n    lang = data.config.lang;\n  }\n  if (lang.length > 1 || lang[0] !== \"en\") {\n    scriptsToLoad.push('lunr.stemmer.support.js');\n    if (lang.length > 1) {\n      scriptsToLoad.push('lunr.multi.js');\n    }\n    if (lang.includes(\"ja\") || lang.includes(\"jp\")) {\n      scriptsToLoad.push('tinyseg.js');\n    }\n    for (var i=0; i < lang.length; i++) {\n      if (lang[i] != 'en') {\n        scriptsToLoad.push(['lunr', lang[i], 'js'].join('.'));\n      }\n    }\n  }\n  loadScripts(scriptsToLoad, onScriptsLoaded);\n}\n\nfunction onScriptsLoaded () {\n  console.log('All search scripts loaded, building Lunr index...');\n  if (data.config && data.config.separator && data.config.separator.length) {\n    lunr.tokenizer.separator = new RegExp(data.config.separator);\n  }\n\n  if (data.index) {\n    index = lunr.Index.load(data.index);\n    data.docs.forEach(function (doc) {\n      documents[doc.location] = doc;\n    });\n    console.log('Lunr pre-built index loaded, search ready');\n  } else {\n    index = lunr(function () {\n      if (lang.length === 1 && lang[0] !== \"en\" && lunr[lang[0]]) {\n        this.use(lunr[lang[0]]);\n      } else if (lang.length > 1) {\n        this.use(lunr.multiLanguage.apply(null, lang));  // spread operator not supported in all browsers: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Spread_operator#Browser_compatibility\n      }\n      this.field('title');\n      this.field('text');\n      this.ref('location');\n\n      for (var i=0; i < data.docs.length; i++) {\n        var doc = data.docs[i];\n        this.add(doc);\n        documents[doc.location] = doc;\n      }\n    });\n    console.log('Lunr index built, search ready');\n  }\n  allowSearch = true;\n  postMessage({config: data.config});\n  postMessage({allowSearch: allowSearch});\n}\n\nfunction init () {\n  var oReq = new XMLHttpRequest();\n  oReq.addEventListener(\"load\", onJSONLoaded);\n  var index_path = base_path + '/search_index.json';\n  if( 'function' === typeof importScripts ){\n      index_path = 'search_index.json';\n  }\n  oReq.open(\"GET\", index_path);\n  oReq.send();\n}\n\nfunction search (query) {\n  if (!allowSearch) {\n    console.error('Assets for search still loading');\n    return;\n  }\n\n  var resultDocuments = [];\n  var results = index.search(query);\n  for (var i=0; i < results.length; i++){\n    var result = results[i];\n    doc = documents[result.ref];\n    doc.summary = doc.text.substring(0, 200);\n    resultDocuments.push(doc);\n  }\n  return resultDocuments;\n}\n\nif( 'function' === typeof importScripts ) {\n  onmessage = function (e) {\n    if (e.data.init) {\n      init();\n    } else if (e.data.query) {\n      postMessage({ results: search(e.data.query) });\n    } else {\n      console.error(\"Worker - Unrecognized message: \" + e);\n    }\n  };\n}\n"
  },
  {
    "path": "docs/site/sitemap.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n</urlset>"
  },
  {
    "path": "example/covariates.txt",
    "content": "FID IID V1 V2 V3\n1 1 1.46837294454993 1.93779743016325 0.152887004505393\n2 2 -1.2234390803815 -1.63408619199948 -0.190201446835255\n3 3 0.0711531925667286 0.0863906292357564 0.14254739715665\n4 4 -1.92175859469857 -1.08321238171368 0.208887472917422\n5 5 -0.0373289339713292 -1.02834354890759 0.113500249292194\n6 6 1.12924506994131 1.1642622220428 -1.02039868555522\n7 7 0.124570790547411 -0.561055497363546 0.266002220960573\n8 8 2.31188685141419 -2.08340899187172 0.312355654322118\n9 9 0.832119089619467 0.743684102977159 1.4746796732347\n10 10 1.19036673925773 -0.918912693235152 1.00113959536299\n11 11 1.01766824645197 -0.583733242666349 0.595862997663805\n12 12 -0.191218100107435 0.124413606154074 -0.445679018953628\n13 13 -0.418092416107334 -1.22068710435991 -0.25828255958323\n14 14 -1.54261453972438 -1.14822985845664 0.947663490874804\n15 15 -1.49758037380989 0.353191967570147 -1.67473384357928\n16 16 0.700744945491556 -1.62219012789145 0.387547883143897\n17 17 -1.25780414213882 -0.436076358461935 0.285499268174478\n18 18 -1.82390541901684 0.198218919902063 0.666614806000583\n19 19 1.13251580815108 -0.265775299506002 -0.0622306805127668\n20 20 0.492114198246884 -0.00131788422869951 0.62113003145282\n21 21 0.020249216380171 0.207516653723503 -0.0720650681777961\n22 22 -0.127426361484408 0.111646074875877 -0.374605100762307\n23 23 -0.351735264827349 -1.8897756276129 -1.31446887736062\n24 24 0.556201645642524 -0.927216741194837 -0.670921897387582\n25 25 -0.288227411101263 -0.773639775533156 0.918378798010682\n26 26 0.179204261620817 1.98212082136632 -1.74323255012925\n27 27 -2.99510044590467 1.01591614247507 -1.30656023151223\n28 28 0.687164856614254 0.022038634604751 -0.653736026177123\n29 29 -0.458120017369043 -1.66046537945916 -0.764679088096362\n30 30 0.298664963756903 -0.874055548644782 -0.968587352908588\n31 31 -0.704867333000956 1.32657624471891 -1.33679098153286\n32 32 0.268933322714231 0.634297194777424 1.04284699511091\n33 33 -0.0576012700122912 0.99383446207855 1.10194755457429\n34 34 -0.252690865124064 1.23747530714928 -0.0517462476666047\n35 35 0.816772061606045 1.53381358254476 0.474727910680923\n36 36 0.0813075115319745 -0.087716758923924 0.100410038903451\n37 37 -1.13911071553845 1.29564833748642 -0.478956382232604\n38 38 -2.30071562796601 0.300738731314805 -0.587279800319767\n39 39 -0.665550531017432 1.60151461457328 -0.578031352497875\n40 40 0.535504325816876 -1.17772515604129 -0.636974061582677\n41 41 -0.10719986983704 1.66696346737119 -0.540441207204226\n42 42 1.73168907075819 0.512908202298933 0.349054138222236\n43 43 1.11493936987582 1.19669056647332 0.228930749067015\n44 44 -0.346445574383567 0.367702234607877 -2.01861235544002\n45 45 -0.739824907504471 -1.30905646878106 1.52321090007486\n46 46 1.41317473388373 -0.603924429292353 -0.203040768129117\n47 47 1.85238519526021 -0.706549112690604 -0.261031628260632\n48 48 -0.704988799276764 0.114948625495636 0.355374004192616\n49 49 -0.717470008055363 -1.44512231702472 0.355989548318291\n50 50 -1.28379457125631 -0.382175768757946 0.520030874615731\n51 51 -0.980701226576033 -0.344572505242403 -0.345634149621111\n52 52 -1.80608450923547 -1.01970469537602 -2.3276246808876\n53 53 0.519469347430971 -0.844754918129089 -1.2448698272613\n54 54 -1.03887288283886 0.120699005791946 -1.69087920435963\n55 55 0.19044941960321 -0.0120025498001641 0.994938056990633\n56 56 1.36752395617169 1.84207966816907 0.0771531293507437\n57 57 0.420300599394053 -0.93219359335265 -1.31203233361224\n58 58 -0.238882262339433 -0.0165310522813234 0.734926334380315\n59 59 0.358719740827443 -1.33502048770872 -0.263817757991046\n60 60 0.504605771212752 0.681252342506506 -0.904607552825463\n61 61 -0.934190605753742 -1.26782065594274 0.208035565503809\n62 62 -1.22455262725579 0.1509010640874 0.124649479798584\n63 63 1.30730858869995 -0.110656485868179 0.263451989549831\n64 64 1.39985768650602 1.026009752497 0.517978165296883\n65 65 0.095020228154413 -1.1000660873379 0.270727993728319\n66 66 0.608899641360111 0.540149707396411 -1.54990095597012\n67 67 0.515181246442935 1.03283997845031 -0.598311429953097\n68 68 -1.05053525281487 0.262030474106311 -1.6341839988117\n69 69 1.21927685400957 -0.857445715909335 -0.594636964825966\n70 70 1.67375624667927 -0.548726446385269 1.75395956332388\n71 71 -0.359877890356401 -3.5067326146456 1.37036291074209\n72 72 -1.09299550519151 1.44830224283004 -2.16743621882942\n73 73 -0.644736566441107 -0.089869044743121 0.0157496509969957\n74 74 -0.304781057115279 -0.899765409637697 -0.911192242073904\n75 75 -1.2386459290929 -1.08396716080357 0.53095029374935\n76 76 -1.43742182165094 -0.505799913163813 -1.07762545065595\n77 77 -0.61144987235439 1.09389573161241 0.915378408790216\n78 78 0.335552961558524 -0.151631851536941 0.520623670829672\n79 79 -1.72481360521243 0.18465858767883 -1.23789473827296\n80 80 -1.0628773718348 0.938054224929031 -1.07536881040853\n81 81 -2.0725041748917 1.28659123477811 -1.27480582537542\n82 82 0.278898903282693 0.915747535671584 0.55591984832299\n83 83 0.621800798547842 -0.0411897312845134 0.326220002147429\n84 84 1.01833141709715 -0.343103002784949 0.614665471191135\n85 85 1.09275660021931 -0.768682708638166 -0.251264904741988\n86 86 -2.19013321410478 -0.965279741272168 1.67709773629732\n87 87 0.82468268003704 1.09516657815012 -0.602551763995042\n88 88 -3.32757908284742 -0.613034406172903 0.995413942815396\n89 89 2.14490953111301 0.41713527242053 0.119270852551482\n90 90 -0.123021396945538 0.584708744638207 0.258426919134879\n91 91 1.34707751342789 0.241851724594599 1.7743815928603\n92 92 -1.48611681075474 -0.960983764449003 -1.10120747231238\n93 93 0.602164533183808 1.11310413285595 -0.986378172755791\n94 94 -1.44188156628395 0.260944168274445 0.399639205570009\n95 95 0.453576746586724 0.141478026440519 -0.534967261642291\n96 96 -0.575544600833662 1.09070201543903 0.0984300561574584\n97 97 0.175108204140961 0.0423557034039549 0.830270964031336\n98 98 0.573709971323814 -0.986655769988478 -0.971784839773789\n99 99 -1.42869342105159 0.78872631854699 -1.32537307288953\n100 100 -0.606025357733767 0.501351903678155 -0.154203629882223\n101 101 -1.45292711932486 0.443502488353178 -1.13128315823256\n102 102 0.541792990472648 -1.60372007316255 1.15784127932312\n103 103 -0.220676004523668 -0.494421188316861 1.36807060346171\n104 104 -1.6615260282383 0.159178498429531 1.83335537436144\n105 105 -0.0830454257821562 -0.447210910574869 0.718339375016038\n106 106 0.522189914991477 0.9967269236725 0.449048331266212\n107 107 -1.13210863595693 0.479113626661238 -1.56296622564352\n108 108 -0.36380704330813 -1.7603273967847 -1.77651590916735\n109 109 -1.68813107977496 1.16438431893709 2.03822965391838\n110 110 1.58554022854969 -0.00724971621805309 -1.11900650247014\n111 111 -0.998291492606998 0.00462754240275728 1.88127607035992\n112 112 0.223497419145838 -1.32110280636091 0.524684732289228\n113 113 0.573627223018489 0.311650995532555 -0.784376215211658\n114 114 -1.61446325699769 1.08295716575695 1.79168179051707\n115 115 1.44890500897322 0.0976050676871842 0.413386003360211\n116 116 -1.33405141338608 0.784867755549606 0.454339460511901\n117 117 1.0281760163777 -0.944176657853787 -0.849636093259524\n118 118 0.442700323648412 1.00456272654013 -0.443199964334976\n119 119 1.01841904624174 0.268656649347677 1.33990112172931\n120 120 0.382476233707342 -0.133406454088862 -1.05408793400058\n121 121 -0.0606854651306287 -0.311512521960318 0.605184001519958\n122 122 -0.639314792216002 -1.51802669112881 -0.236607587664175\n123 123 -0.991173875599332 -0.321572448363572 -0.236993428622127\n124 124 0.895207300368696 0.245650549532927 1.34382738721527\n125 125 0.0111936474428609 0.380086793801876 1.81260457198851\n126 126 -0.38738113615745 -0.491841324400474 -1.01039561599705\n127 127 -0.447750196400813 0.0866189943446268 -0.567089508145962\n128 128 -1.48635114845613 -1.07099393611687 -0.830055469382584\n129 129 0.430022245376917 -0.00469201513204358 -0.46713689804785\n130 130 0.782370060240196 -1.15989373062202 -1.17827870092203\n131 131 0.136338134070754 1.71167953754715 1.48059989978744\n132 132 -0.431976638941024 -0.578190737622724 0.425291746107116\n133 133 1.40246708059836 0.502745109733989 -0.944177272188536\n134 134 0.720069270112552 -0.935145947387637 0.443871660569289\n135 135 -0.31657492076431 -0.409901081300051 -0.145753345426725\n136 136 0.11060517957052 -1.04605134276557 -1.12208334105097\n137 137 0.346605932663756 0.770750900962366 -0.2274781373841\n138 138 -0.199304950037624 1.51815799081416 1.46398209533402\n139 139 -0.252212740769667 1.66993552357908 0.306825359307706\n140 140 -0.888389791747243 0.232247159480735 0.499389375456319\n141 141 -0.890347761887952 -0.728192084856763 0.551271266403087\n142 142 0.152029144559496 2.23859123192986 0.274484191198587\n143 143 -0.0701403417878827 0.927860684923288 0.556779551740602\n144 144 0.639338644011505 0.149843448572823 -0.418168329086757\n145 145 1.35683552548457 0.71545062816494 0.480718819473869\n146 146 -0.0671855408009469 -1.00497960305324 -0.591564299670578\n147 147 -0.0417901194712151 -0.740058558181522 -0.880215881028116\n148 148 0.208014563936714 0.310692176853491 0.336489601755259\n149 149 0.974528118011505 0.700687451289159 -1.96272108587345\n150 150 0.434609242618411 1.06890916681593 -0.75356705741942\n151 151 -2.02537341898317 -0.188261566304373 0.471200182938936\n152 152 0.860627995182198 -0.720148339360642 -0.770997708738759\n153 153 -0.9230520186645 1.51275275292047 2.05561908754561\n154 154 -2.89482526571973 0.0142176828942394 -0.673661017251696\n155 155 2.14764230980825 1.11836182023814 0.561414328616239\n156 156 0.191691835549594 1.69954007643337 1.10060477315528\n157 157 1.93391937196771 -2.14759361064734 0.103062267210891\n158 158 -1.00666688093144 0.27389584173868 1.25113180056298\n159 159 0.867271912923313 0.550421539001593 -0.606209597163998\n160 160 0.239744640961947 0.871455671502197 -1.33433166716954\n161 161 -1.16673162601532 -0.923729049541734 0.882931371107068\n162 162 1.19623249643198 0.267536839445432 0.140965625037645\n163 163 -0.606314083879166 0.83108108713256 2.24244959411634\n164 164 0.627891419343745 0.316909753958488 -0.0892333697150752\n165 165 0.681239498389416 0.0685010764996153 1.60785602940155\n166 166 -0.137622766408917 -0.219358052457248 0.0236094358672588\n167 167 1.65738740105425 -0.168396179630893 -0.391093556590735\n168 168 -0.0532183341365486 -0.0971607395158495 -0.185753106858513\n169 169 1.33325582492572 0.04748966314378 0.930597282445587\n170 170 0.503095045815024 0.620836572358743 1.32907834680476\n171 171 -0.833086643323573 1.57378927452153 0.282482508773599\n172 172 0.263414370073657 1.46655819123308 -0.440357980899672\n173 173 -3.01149418318528 -0.758603658576047 1.29712100326943\n174 174 0.775355123038991 -0.345255487752741 -0.432940728990831\n175 175 -0.736899573490554 0.475328905667524 -0.486614757521328\n176 176 -0.705382244908399 -0.317658781371191 -0.950665118202799\n177 177 -1.10205074755081 1.04709464276291 0.358444545273432\n178 178 0.291626211880807 -2.24414390554635 -0.6114433881961\n179 179 -0.616258050186511 -0.213352918058229 -0.206848220065374\n180 180 -0.258221445691404 1.13323440192968 -0.520510426658636\n181 181 1.0173814591059 0.335230742824172 1.80773614044068\n182 182 -0.754582244072964 0.558863580112927 2.22272421500294\n183 183 0.962254883537811 0.952711758939796 0.26257629591948\n184 184 1.32361093350711 0.726199653116666 -1.20705098917927\n185 185 0.451636911802343 1.05241988015365 -0.704114083257637\n186 186 1.02960288576369 -0.915563518318848 0.224463903618335\n187 187 0.215069384601449 1.58967716981485 -2.36256657114121\n188 188 0.857706091343454 0.469806032976427 0.218452229983434\n189 189 0.9829607592393 -0.0244241933878937 -0.425470731739842\n190 190 0.113277626559119 0.0683882014999158 -0.277787951143784\n191 191 -0.244180929885842 -1.82173257463395 0.394327358484765\n192 192 0.705378901432609 -0.442735603414705 -0.723030937642743\n193 193 1.15231800216468 0.594526314383486 1.49937846443481\n194 194 -0.0824836561278376 0.502688722215784 1.02692569356233\n195 195 -0.51533276030488 -1.111301710122 -1.00163288726315\n196 196 0.352508349429745 -1.58333473521584 -0.321749790049319\n197 197 0.509737117278231 1.89680480039478 -0.415803626548454\n198 198 0.408429190860523 -0.283375339941657 -1.7310558218924\n199 199 2.63318259159922 -1.37220135481674 -1.07011609149891\n200 200 -0.300070178621705 0.116053618430679 -0.128957573780873\n201 201 0.0618158070410208 -0.257104455391491 0.700960192076722\n202 202 1.1967442135137 -2.53945571436358 -1.18290930991659\n203 203 -0.390733348779326 -0.565154112561019 1.54609780274072\n204 204 -0.635434530912397 -0.783691703875507 -0.241253478697975\n205 205 0.453171623475049 -0.963323712166816 -0.17591279169021\n206 206 0.497137673599994 -0.298292757497476 1.52472484723303\n207 207 0.983044864096795 -0.182609328743605 1.08285068777089\n208 208 0.121863300305586 -0.785472105457895 -0.024584067857987\n209 209 0.238155363079482 0.0801843336257735 0.699724938898343\n210 210 -1.54623884763307 0.583293080796527 -1.01858438189806\n211 211 -0.968055736721857 -0.638630086897045 1.40318747864031\n212 212 -0.00349665688432686 1.414451490428 -1.73762899052836\n213 213 -0.00457607386072327 -0.525635302551504 -1.45373867624621\n214 214 -0.366134409356681 -0.666235621241421 -0.505336833078742\n215 215 1.89024923544878 0.741764948637318 -1.0155203395098\n216 216 -0.918308636321609 -0.0868765217265333 -0.509826529424641\n217 217 -0.591074858895521 0.566777955769371 0.99434556045379\n218 218 2.25670365684985 -0.315748293528222 0.365477681343191\n219 219 -1.57233822739218 -0.850792763875279 0.904498367464702\n220 220 -0.0682314485516372 -0.795866178677262 1.66930347041193\n221 221 -0.533686551757233 -0.465695098147421 -0.981684351830436\n222 222 -0.650744878298164 0.715410607037681 0.815096345271332\n223 223 -1.73483240099147 1.15510538447811 0.744813039979084\n224 224 1.08722268180171 1.19413693413092 -0.240273728814771\n225 225 -0.195458351526377 -1.12708948860974 0.0483699010155095\n226 226 -0.377051472458811 -0.762275563055573 0.738733077024699\n227 227 -0.18142884904785 1.08723994079201 0.8524808816003\n228 228 -1.08408259984194 -0.11623045844534 0.0936085321037543\n229 229 0.980923357579053 -1.36753072430618 0.501720860018512\n230 230 -1.41202933071188 0.0433877791285629 -0.0803230747661825\n231 231 -0.629131853656102 -0.549328075703398 1.62922107570998\n232 232 1.70458554314851 -0.127408146304953 -0.247666680166544\n233 233 0.0114661310717726 -1.14727853739666 0.601845776132469\n234 234 0.0055302906322214 0.548837223017019 0.998310060979668\n235 235 -1.47169674493235 -2.51801462427645 -0.565532684958423\n236 236 0.375248849183357 1.88760055391672 -0.256481599828999\n237 237 0.264296911165029 -1.33259359592374 0.0487652939597159\n238 238 0.17719004528226 -0.193701321174802 2.39465797007541\n239 239 -0.188329921279762 0.319697619525775 0.00805067416711713\n240 240 0.638949836393311 -0.175011559041567 -0.565935078051149\n241 241 -0.521513973408463 0.790904235650827 0.086850161445771\n242 242 0.119178273767935 -0.18779594966625 -0.665865387079382\n243 243 1.48244098394491 -1.24068304414465 1.73031811007768\n244 244 1.68422348507766 -1.12591550053402 -1.06202319844172\n245 245 -0.617850744913592 -0.719053349775352 -0.395999153143368\n246 246 -0.962425935625463 -1.37957764887356 -0.935088714487197\n247 247 -0.676307216820063 0.455639497484506 0.988656830560121\n248 248 -0.429803260399542 -0.95897644987832 0.349991019844699\n249 249 -0.734494319621749 0.503398184636999 -0.400247221684379\n250 250 -0.989528415514397 0.839136294267078 0.358362266762923\n251 251 -0.297578949619101 -0.304572573458164 0.820639931338014\n252 252 0.0466280488595221 -0.125534729133879 0.753705551836252\n253 253 -0.736475374938984 0.818502653041985 -0.0959214566187271\n254 254 0.762687847709622 -0.992359225839721 -0.619136896682819\n255 255 1.96977235146495 -1.56438387774728 0.595399026635289\n256 256 1.44477711575254 0.102540189204669 0.119666699672157\n257 257 2.93644221009186 0.132408626655896 -0.571253063956656\n258 258 -1.64340825575722 -1.71647380349127 0.624636435244488\n259 259 0.199821292254365 0.0108931677881925 0.832449327129177\n260 260 -2.89904313683447 -0.665617585302284 -0.32470371115627\n261 261 0.0110238851144448 -1.1241917371126 -1.70605047017061\n262 262 -0.111135443892226 -2.02513040821815 -0.595550283069873\n263 263 0.296956907479138 -1.49235331862979 0.674294916418284\n264 264 -1.58806982701449 -0.87293526942144 0.618676976339737\n265 265 -1.20599790707864 -0.668896146097892 -0.285472859290853\n266 266 -0.984446076989712 0.405512286601059 -0.992812408330569\n267 267 0.873981222118058 0.130636382839266 0.223138783736783\n268 268 1.15137901211244 0.269341912478004 -0.370276060801822\n269 269 -0.237522073285531 -1.16941542159274 -0.292375309742259\n270 270 0.521451560069117 0.229210872300789 0.117145303707847\n271 271 -0.721109911792939 0.973353505683337 0.725840321699984\n272 272 -1.71579509456485 0.331053754108528 -0.309686238454332\n273 273 1.41631965677878 0.40590149419227 -0.776581169847057\n274 274 -1.26764883299582 0.602765703689368 0.20167773869961\n275 275 0.350036194906335 0.944979275948657 -2.23712574677926\n276 276 0.0841339962579271 1.07519734277879 -0.674417825943384\n277 277 -0.38880461324062 1.68072282441877 -1.55548606060339\n278 278 0.712520575225716 -0.783525953643761 -0.0692911345356956\n279 279 1.37756660256157 -1.09914891659094 -0.112098933740495\n280 280 -2.09233520668221 0.122388401239843 0.0745129102324621\n281 281 0.0444170182142923 0.789377590728428 0.781404555410717\n282 282 -1.04676495729127 0.560383614998423 -0.620929998169237\n283 283 -0.464676156313911 0.78832301597018 0.933006648642985\n284 284 1.57997933674385 0.808076546805481 0.620128179317599\n285 285 -0.062235704507387 -1.1873901649143 0.233185939981184\n286 286 0.64560171538111 1.19921535710876 -0.360792071066905\n287 287 1.7600404868672 0.86971773121211 -1.1225428057822\n288 288 -1.73949071882268 0.70280563800663 -0.514981312838415\n289 289 1.65736411683642 -1.1437399326179 -1.3031282156944\n290 290 -0.296467972076117 0.444850043146052 1.20300225757892\n291 291 1.82460367451811 -1.94034273153412 0.155062051356154\n292 292 -0.217403205034682 -2.29536071176852 1.44441176711928\n293 293 -1.49109063118087 -0.952864885483231 -0.648197148440543\n294 294 0.417681616508874 -1.1291558960589 -0.627291915158984\n295 295 -0.335530289243836 -1.56795725710696 -2.19360085121039\n296 296 0.562029616139559 -1.00515927301376 0.471217907177394\n297 297 1.10716329206117 0.275315611994426 0.726923493424635\n298 298 -0.376045966162074 -0.545860364876667 0.695162762922577\n299 299 -1.01169881487892 0.421285842494731 -0.127456459580007\n300 300 -0.233735457417624 0.234001376530343 -0.279210500761352\n301 301 -0.191041983162951 -1.26656674240632 0.268176102391084\n302 302 -2.14079033007813 -0.661582208240859 -1.45671616402897\n303 303 0.171697377948325 1.19790586667098 -0.303744643932396\n304 304 -0.51178438806716 -0.370359287706688 0.492310733041444\n305 305 -2.1521556116937 0.211447694327415 1.13994230933885\n306 306 1.78336611406654 1.81258706009664 2.21509213437292\n307 307 -1.66459000979854 1.05607813943896 1.01618343377454\n308 308 -1.23831539739212 0.174125333480291 -0.211814661895119\n309 309 0.748196335956892 1.31893965065117 -0.811219973030862\n310 310 0.288925126152792 -0.790047372466064 0.774907436039824\n311 311 0.218752902211661 -0.295691693617648 -0.369586168708846\n312 312 -1.31711372445576 -0.382572814059577 -0.698393008336702\n313 313 0.746165575759412 -1.10415473494955 0.926750246438677\n314 314 0.558474345090657 0.46862507716215 -1.33073599449416\n315 315 -0.949839805579198 1.45613405551077 -0.171630169552659\n316 316 -1.00348096735362 0.0952170340869715 -0.20894959044786\n317 317 1.06190967772141 0.938709503535816 -0.191898660438186\n318 318 -0.203767226505989 1.45446480582589 -0.266331940952407\n319 319 0.514968594844155 0.474162886855075 1.3500815223921\n320 320 -0.876709951973288 -0.64595588629736 -0.354804926698781\n321 321 1.05242687576023 0.11435781464134 -1.2750296485476\n322 322 -1.01289579517596 -0.0270744081097838 -0.974583689186217\n323 323 0.628597759786413 0.871057057558597 0.878410823840972\n324 324 -1.18536815436662 0.237656085500654 -0.0974493631407294\n325 325 -0.263263415095149 -1.44171449442434 -0.308858102387813\n326 326 2.187931429856 -0.0448832133939714 -0.0916768117939348\n327 327 0.721356341862245 1.92071654161163 -0.953374022528519\n328 328 -0.848363397809302 -0.917608912846968 0.552862656060789\n329 329 -0.988155592500341 -0.462056013220845 -1.00920674215424\n330 330 -1.94058845096277 -0.856316424989464 -1.0908984822071\n331 331 -0.0505232383727004 0.531545092071155 -0.00176212129430811\n332 332 -0.145093288027965 -0.726857837930715 0.00291095009469781\n333 333 -0.619765511690178 0.717145096834534 0.405405931529327\n334 334 0.764451579068619 1.47070599637375 -0.669689678089283\n335 335 0.183358257558995 -0.0831282511368581 0.433822596451396\n336 336 1.01683887092612 -0.486724368684249 -0.00835953903736881\n337 337 -0.231482025850402 -1.04112143594274 -0.312872327654019\n338 338 0.125226073338694 -0.612502253575669 1.9613718864005\n339 339 -1.5853989455743 -0.0332048306778363 -0.809711201156886\n340 340 -0.270814906273368 1.15150627999799 -1.31156103028854\n341 341 -0.327455226114505 -0.299027016341994 1.15257784935637\n342 342 2.29830643498446 2.01125132335651 -0.248521329194713\n343 343 0.00699298749237794 -0.256989059952088 2.39698946550435\n344 344 -0.840138408391043 1.4845727187796 -0.581505185126562\n345 345 0.299514178506534 -1.49891507259923 -1.79871610742881\n346 346 0.686550825661833 -0.528289957966227 1.48029842182583\n347 347 -0.45001370605832 0.684382356816486 -0.55071117530745\n348 348 -1.31452129325411 0.552333014970822 -0.955732804108503\n349 349 -0.889682558174095 -3.75500098361948 0.951415382296232\n350 350 0.127431832080324 -0.632802305886436 -0.50627913852573\n351 351 -0.112048132821479 -0.503652039649308 0.658186027591559\n352 352 0.735555560160039 -0.0103500057568764 -0.288778640557989\n353 353 0.0770376641676405 0.26837203940238 0.139251213851349\n354 354 -1.99258172213664 -0.145854539283609 -0.937226004051109\n355 355 -1.4921334606253 0.0760290066359275 -0.638203445360267\n356 356 -0.363076454646316 0.341697577899652 -1.66226713422437\n357 357 0.187916092171361 -1.52064147800193 -0.363134323290455\n358 358 -0.676067238311435 -1.41454450811721 -0.360419845512291\n359 359 -1.01818400664782 2.70689103153848 -0.736411170974868\n360 360 0.643991543128254 -0.84244496364137 0.00437956636730414\n361 361 0.845439889290235 -0.656289941005438 -0.194854955864464\n362 362 2.17243394080198 1.499611543096 0.502358577899669\n363 363 0.478976866575967 -1.17700453542394 -0.617667076190438\n364 364 -2.40615948097953 -1.81461389360954 0.802839788167135\n365 365 -0.706072441494411 2.02228197750379 -0.393308607676681\n366 366 -0.0805784158116847 0.716674158295844 -1.20246006637217\n367 367 -0.732597151075011 0.415601284249849 0.597589404816867\n368 368 0.667621992718285 1.23843683950396 -0.418220413620227\n369 369 0.443331659603023 -1.78033791585069 1.64559431645404\n370 370 -1.49759006993034 -1.1661095878874 1.38886171190481\n371 371 0.635231718115305 -0.570508750553901 0.270193218342475\n372 372 0.795574536325585 1.38952054318901 -0.205021901201099\n373 373 0.56175250763996 0.368582315379003 0.299124792547437\n374 374 -0.00544817501613968 1.47285432322312 -0.175075944959192\n375 375 -1.69155549579082 -0.522545241727092 1.35619066795485\n376 376 -0.338280840627237 0.64061716359716 0.495058300057172\n377 377 -1.84062788360435 -0.355930890969487 -1.47395465638291\n378 378 -0.397429042886146 -0.30562148009681 0.120362124409584\n379 379 -1.4104795179153 0.152843003542133 1.6088215156232\n380 380 -0.178517695940274 0.293126647080975 1.04376969864201\n381 381 -0.948112187155606 -0.104263127842352 2.01674968189082\n382 382 -1.10574447253708 -2.08945842089725 1.25622755105695\n383 383 -0.979730638517688 0.0244491496356701 0.0131309866640956\n384 384 -0.673906243517 -0.531445823842769 -0.790683740625223\n385 385 -1.13364545455527 -0.404537314875365 0.155635186040039\n386 386 -0.544213920615126 0.0943483987549832 -0.742451229248762\n387 387 0.10566817085088 -0.228181117759124 -0.221996955410298\n388 388 1.08989772734619 1.07364626904529 0.762173293406158\n389 389 1.40469250953944 -0.850683420122761 -0.58653583423649\n390 390 0.949585868985058 1.03517537412855 -1.87918662807997\n391 391 -0.286124446529293 0.865896478013803 -0.807980429679611\n392 392 2.41071373910191 -1.40414958026209 -1.43279514328709\n393 393 -1.17629915685666 1.22302514103282 -1.64891740853755\n394 394 -1.0735260519271 -0.181197904670042 1.8538972843443\n395 395 -0.249694418967338 0.568791794274843 0.799644391373855\n396 396 0.53313024140876 0.823257011438389 0.324712688536363\n397 397 -0.550317144410589 0.0436005278514163 0.207942449005962\n398 398 -0.155761262486867 -1.14990959168668 -0.764908731593276\n399 399 0.773772943927362 1.81975886909259 0.429061628933955\n400 400 0.674723374180998 0.877810943242963 0.155783475742268\n401 401 0.416502491862716 0.371382546084973 0.633832727114365\n402 402 0.936820455308487 0.374779777022824 -1.16031064831011\n403 403 0.138571094768104 1.00722231541159 1.94388506551436\n404 404 1.01977424998782 0.934263379091051 -0.190841598063365\n405 405 -0.515549291460727 -0.761080273866144 1.49489860185877\n406 406 1.57382617280842 -0.6285950389952 0.221592151733597\n407 407 -0.284188972855081 1.31926052837367 1.20558354387342\n408 408 -0.492467477098651 0.304497413279957 -1.30021219352541\n409 409 -1.55223859906654 -0.169894465482847 1.73532648540805\n410 410 -1.04365171199321 -1.07757830499426 0.402542715177659\n411 411 0.337313503023761 0.0199790004416643 -1.01955597162537\n412 412 0.567261737834834 0.343272499619591 0.783825957039741\n413 413 1.33158903264638 -1.01907899097234 0.98033162542721\n414 414 -0.943621151033741 0.511887198257892 1.95695832761542\n415 415 1.29204107155955 0.685314763403991 1.35338962435436\n416 416 1.11776473650734 -0.216629079829078 0.718584038343791\n417 417 0.285225183001854 0.871964787681798 1.08138246909397\n418 418 0.148853156447729 -0.418646580112241 -0.00875510873997657\n419 419 0.9634192645878 2.06024524083206 1.4282865686655\n420 420 0.6982079781091 -0.255827825174668 -0.801982390755466\n421 421 0.101877872863502 0.337286115522036 0.142716608348129\n422 422 -0.13514834258085 -0.343489389055775 -0.758550307258719\n423 423 0.780193790567571 -0.262196400577283 0.222568348036814\n424 424 0.521884516147666 -0.417526584412931 -0.906475641944693\n425 425 -0.296271249700639 0.464767162492862 0.573146275298587\n426 426 2.12583517147699 -1.58032505698937 -0.380971930202218\n427 427 -0.164761291230633 1.19279208925353 0.0835927061035099\n428 428 0.14894821294202 0.76227578842078 0.762337668292594\n429 429 1.6625574780885 -1.6267191199893 -1.60935036726086\n430 430 0.153377215776703 0.139804711923426 -0.578691503310998\n431 431 -0.0609168353150149 1.00578542421274 -1.77184370913112\n432 432 1.0195152003104 0.37007924120698 -0.102790498134211\n433 433 0.639411740672879 -0.0510277115266688 -1.00291397287483\n434 434 0.320803185489027 -1.51000030797827 -0.5238142805057\n435 435 0.166796515737489 -0.125374640293625 0.0127264778716913\n436 436 -0.183655679680401 -0.89949432185981 -0.709872187274793\n437 437 2.71174074551991 1.57493867587087 -0.688704599128927\n438 438 0.30320726335083 0.398285443256563 0.452069614194104\n439 439 -0.412194239388631 -1.55399453877199 -0.0474936213927865\n440 440 -1.32210482514306 -0.0332684613144726 -0.564368017626722\n441 441 0.498774860942123 0.824228609926728 0.258177910093118\n442 442 0.21463224937359 -2.06128512704817 -1.93738868157208\n443 443 1.63849897616018 1.38946467311879 -0.782600239716082\n444 444 1.52595304052576 -0.173539684026595 -0.947528184549204\n445 445 0.0943440845394678 0.048931842077725 -0.382348837456813\n446 446 -1.45168435143579 0.461979227971236 0.705776365553554\n447 447 1.52072550218058 0.726264516797679 -0.298656274471634\n448 448 -0.323154618456321 0.556172618285506 -0.65020645754813\n449 449 2.28150475194183 1.2238359582073 -0.162320784068865\n450 450 -0.00321711228625246 1.97248349042151 -0.146851872836864\n451 451 -0.556550139004402 -0.887450564229401 0.0246591820773215\n452 452 0.870946331189198 -1.4242008489949 -0.228144189377569\n453 453 -0.109528854075021 1.29851845650738 1.21485370791291\n454 454 -0.19355636344534 -0.416879433808446 -1.31833494834954\n455 455 -0.411465876118703 -0.086353157737709 -1.88835452998271\n456 456 -2.4756239289431 -0.567341520954236 1.58452126894583\n457 457 0.60407680403628 -0.104022236597136 0.303493523505913\n458 458 0.435994231991519 0.490238626736052 0.465480501523705\n459 459 1.21081371343492 -0.94108333025015 0.0647524048924119\n460 460 -2.58974911648449 1.28896474154759 -1.05216239641956\n461 461 1.68527704564746 -0.169581954989677 -1.4370621269365\n462 462 -1.15537249457013 2.48262760909883 0.956932733058004\n463 463 -0.8369395055165 0.327073379726694 -0.0250871351453328\n464 464 -0.0246541906360803 1.01311111432097 -0.0406603252497205\n465 465 -0.393540020013427 0.193263006981446 -0.245017187295811\n466 466 0.117928972785921 0.969861570326182 -1.68803941979426\n467 467 -2.05095990457672 -0.717641462001806 -0.434291460991705\n468 468 -0.791287688998045 0.619984113947591 -0.191140071315513\n469 469 -0.661430923721985 -0.0264585781045273 -0.815567832840785\n470 470 0.110540452409726 -1.60122797696722 -1.47297570682647\n471 471 -1.13396161893926 0.302603493382267 1.17916441880041\n472 472 -0.428447500162837 0.032378655629795 -1.44127777219403\n473 473 1.97777565279044 0.188174543632119 0.496490678160897\n474 474 0.478443080714877 -0.247911441813345 -0.389620540233551\n475 475 1.88049273822024 0.758524313832996 0.815524985613147\n476 476 -0.94451472563566 -1.46724267422952 0.0614521434616583\n477 477 1.20862127917688 -1.29044795160467 0.161638205861625\n478 478 -0.426800286844427 -2.13199751519076 -0.889888607904783\n479 479 0.0701446752362957 -0.814190152930211 1.54613706801577\n480 480 0.0247771557425567 2.21242490448656 0.170594216971674\n481 481 -2.14479871510897 -1.26350470899781 -0.624638080485053\n482 482 0.219042700250454 -0.997236453336354 0.895137214974744\n483 483 -0.838546037838302 -1.36990425123737 0.392828897434324\n484 484 1.95646567445163 -0.766893627230944 -2.25829087824114\n485 485 -0.623694160458828 0.0617174904526453 0.223828879773566\n486 486 0.105750753213039 -0.718526178019354 1.6796384333845\n487 487 0.352984673775129 0.034045039443356 -0.236623000367563\n488 488 -0.86099663862995 -0.899747742375208 0.739197695312269\n489 489 1.90188671459261 0.945309117844356 0.270334048921035\n490 490 0.232188200392006 -0.766806972425336 1.44920475451477\n491 491 0.439708463348097 -0.100496086063684 1.19993066069441\n492 492 -0.470814417895129 0.0876334399810952 0.0778379117905729\n493 493 0.720297214674885 1.55770018984445 1.7692979541215\n494 494 -0.996123733624525 0.0915562557433139 0.846450462395275\n495 495 -0.138496948697237 0.0402966317738195 0.333453434403395\n496 496 0.537127981267897 -0.0564681049010715 0.48119562320416\n497 497 -0.0511684766636894 1.20246232935816 0.693052533966201\n498 498 -1.46506280238144 1.17424105978661 0.642855642554621\n499 499 -0.516533720439536 -0.576053588281794 0.161069003359382\n500 500 -1.04827117969018 0.926623357058696 0.86635561319519\n"
  },
  {
    "path": "example/covariates_wBin.txt",
    "content": "FID\tIID\tV1\tV2\tV3\tV4\tV5\n1\t1\t1.46837294454993\t1.93779743016325\t0.152887004505393\t1\turban\n2\t2\t-1.2234390803815\t-1.63408619199948\t-0.190201446835255\t2\tother\n3\t3\t0.0711531925667286\t0.0863906292357564\t0.14254739715665\t1\trural\n4\t4\t-1.92175859469857\t-1.08321238171368\t0.208887472917422\t2\turban\n5\t5\t-0.0373289339713292\t-1.02834354890759\t0.113500249292194\t1\turban\n6\t6\t1.12924506994131\t1.1642622220428\t-1.02039868555522\t1\tother\n7\t7\t0.124570790547411\t-0.561055497363546\t0.266002220960573\t1\turban\n8\t8\t2.31188685141419\t-2.08340899187172\t0.312355654322118\t2\tother\n9\t9\t0.832119089619467\t0.743684102977159\t1.4746796732347\t1\turban\n10\t10\t1.19036673925773\t-0.918912693235152\t1.00113959536299\t2\tother\n11\t11\t1.01766824645197\t-0.583733242666349\t0.595862997663805\t2\tother\n12\t12\t-0.191218100107435\t0.124413606154074\t-0.445679018953628\t2\trural\n13\t13\t-0.418092416107334\t-1.22068710435991\t-0.25828255958323\t1\trural\n14\t14\t-1.54261453972438\t-1.14822985845664\t0.947663490874804\t1\trural\n15\t15\t-1.49758037380989\t0.353191967570147\t-1.67473384357928\t1\tother\n16\t16\t0.700744945491556\t-1.62219012789145\t0.387547883143897\t1\trural\n17\t17\t-1.25780414213882\t-0.436076358461935\t0.285499268174478\t1\turban\n18\t18\t-1.82390541901684\t0.198218919902063\t0.666614806000583\t1\turban\n19\t19\t1.13251580815108\t-0.265775299506002\t-0.0622306805127668\t2\tother\n20\t20\t0.492114198246884\t-0.00131788422869951\t0.62113003145282\t1\trural\n21\t21\t0.020249216380171\t0.207516653723503\t-0.0720650681777961\t2\trural\n22\t22\t-0.127426361484408\t0.111646074875877\t-0.374605100762307\t1\turban\n23\t23\t-0.351735264827349\t-1.8897756276129\t-1.31446887736062\t2\tother\n24\t24\t0.556201645642524\t-0.927216741194837\t-0.670921897387582\t2\tother\n25\t25\t-0.288227411101263\t-0.773639775533156\t0.918378798010682\t1\tother\n26\t26\t0.179204261620817\t1.98212082136632\t-1.74323255012925\t1\tother\n27\t27\t-2.99510044590467\t1.01591614247507\t-1.30656023151223\t2\turban\n28\t28\t0.687164856614254\t0.022038634604751\t-0.653736026177123\t1\turban\n29\t29\t-0.458120017369043\t-1.66046537945916\t-0.764679088096362\t2\turban\n30\t30\t0.298664963756903\t-0.874055548644782\t-0.968587352908588\t1\tother\n31\t31\t-0.704867333000956\t1.32657624471891\t-1.33679098153286\t2\turban\n32\t32\t0.268933322714231\t0.634297194777424\t1.04284699511091\t1\tother\n33\t33\t-0.0576012700122912\t0.99383446207855\t1.10194755457429\t2\tother\n34\t34\t-0.252690865124064\t1.23747530714928\t-0.0517462476666047\t1\trural\n35\t35\t0.816772061606045\t1.53381358254476\t0.474727910680923\t2\tother\n36\t36\t0.0813075115319745\t-0.087716758923924\t0.100410038903451\t1\tother\n37\t37\t-1.13911071553845\t1.29564833748642\t-0.478956382232604\t2\turban\n38\t38\t-2.30071562796601\t0.300738731314805\t-0.587279800319767\t2\turban\n39\t39\t-0.665550531017432\t1.60151461457328\t-0.578031352497875\t1\tother\n40\t40\t0.535504325816876\t-1.17772515604129\t-0.636974061582677\t2\trural\n41\t41\t-0.10719986983704\t1.66696346737119\t-0.540441207204226\t2\trural\n42\t42\t1.73168907075819\t0.512908202298933\t0.349054138222236\t1\trural\n43\t43\t1.11493936987582\t1.19669056647332\t0.228930749067015\t2\tother\n44\t44\t-0.346445574383567\t0.367702234607877\t-2.01861235544002\t1\turban\n45\t45\t-0.739824907504471\t-1.30905646878106\t1.52321090007486\t2\tother\n46\t46\t1.41317473388373\t-0.603924429292353\t-0.203040768129117\t1\trural\n47\t47\t1.85238519526021\t-0.706549112690604\t-0.261031628260632\t1\turban\n48\t48\t-0.704988799276764\t0.114948625495636\t0.355374004192616\t1\tother\n49\t49\t-0.717470008055363\t-1.44512231702472\t0.355989548318291\t1\turban\n50\t50\t-1.28379457125631\t-0.382175768757946\t0.520030874615731\t1\tother\n51\t51\t-0.980701226576033\t-0.344572505242403\t-0.345634149621111\t2\turban\n52\t52\t-1.80608450923547\t-1.01970469537602\t-2.3276246808876\t1\trural\n53\t53\t0.519469347430971\t-0.844754918129089\t-1.2448698272613\t2\turban\n54\t54\t-1.03887288283886\t0.120699005791946\t-1.69087920435963\t2\tother\n55\t55\t0.19044941960321\t-0.0120025498001641\t0.994938056990633\t2\tother\n56\t56\t1.36752395617169\t1.84207966816907\t0.0771531293507437\t2\trural\n57\t57\t0.420300599394053\t-0.93219359335265\t-1.31203233361224\t2\turban\n58\t58\t-0.238882262339433\t-0.0165310522813234\t0.734926334380315\t1\turban\n59\t59\t0.358719740827443\t-1.33502048770872\t-0.263817757991046\t2\turban\n60\t60\t0.504605771212752\t0.681252342506506\t-0.904607552825463\t1\tother\n61\t61\t-0.934190605753742\t-1.26782065594274\t0.208035565503809\t1\trural\n62\t62\t-1.22455262725579\t0.1509010640874\t0.124649479798584\t1\trural\n63\t63\t1.30730858869995\t-0.110656485868179\t0.263451989549831\t1\turban\n64\t64\t1.39985768650602\t1.026009752497\t0.517978165296883\t2\turban\n65\t65\t0.095020228154413\t-1.1000660873379\t0.270727993728319\t2\tother\n66\t66\t0.608899641360111\t0.540149707396411\t-1.54990095597012\t1\turban\n67\t67\t0.515181246442935\t1.03283997845031\t-0.598311429953097\t1\turban\n68\t68\t-1.05053525281487\t0.262030474106311\t-1.6341839988117\t1\trural\n69\t69\t1.21927685400957\t-0.857445715909335\t-0.594636964825966\t2\turban\n70\t70\t1.67375624667927\t-0.548726446385269\t1.75395956332388\t1\trural\n71\t71\t-0.359877890356401\t-3.5067326146456\t1.37036291074209\t1\tother\n72\t72\t-1.09299550519151\t1.44830224283004\t-2.16743621882942\t2\trural\n73\t73\t-0.644736566441107\t-0.089869044743121\t0.0157496509969957\t1\trural\n74\t74\t-0.304781057115279\t-0.899765409637697\t-0.911192242073904\t1\turban\n75\t75\t-1.2386459290929\t-1.08396716080357\t0.53095029374935\t2\turban\n76\t76\t-1.43742182165094\t-0.505799913163813\t-1.07762545065595\t2\trural\n77\t77\t-0.61144987235439\t1.09389573161241\t0.915378408790216\t1\turban\n78\t78\t0.335552961558524\t-0.151631851536941\t0.520623670829672\t2\trural\n79\t79\t-1.72481360521243\t0.18465858767883\t-1.23789473827296\t1\turban\n80\t80\t-1.0628773718348\t0.938054224929031\t-1.07536881040853\t2\trural\n81\t81\t-2.0725041748917\t1.28659123477811\t-1.27480582537542\t2\turban\n82\t82\t0.278898903282693\t0.915747535671584\t0.55591984832299\t2\turban\n83\t83\t0.621800798547842\t-0.0411897312845134\t0.326220002147429\t1\trural\n84\t84\t1.01833141709715\t-0.343103002784949\t0.614665471191135\t1\turban\n85\t85\t1.09275660021931\t-0.768682708638166\t-0.251264904741988\t1\tother\n86\t86\t-2.19013321410478\t-0.965279741272168\t1.67709773629732\t1\tother\n87\t87\t0.82468268003704\t1.09516657815012\t-0.602551763995042\t2\trural\n88\t88\t-3.32757908284742\t-0.613034406172903\t0.995413942815396\t1\turban\n89\t89\t2.14490953111301\t0.41713527242053\t0.119270852551482\t2\tother\n90\t90\t-0.123021396945538\t0.584708744638207\t0.258426919134879\t1\turban\n91\t91\t1.34707751342789\t0.241851724594599\t1.7743815928603\t2\turban\n92\t92\t-1.48611681075474\t-0.960983764449003\t-1.10120747231238\t1\trural\n93\t93\t0.602164533183808\t1.11310413285595\t-0.986378172755791\t2\tother\n94\t94\t-1.44188156628395\t0.260944168274445\t0.399639205570009\t1\trural\n95\t95\t0.453576746586724\t0.141478026440519\t-0.534967261642291\t1\tother\n96\t96\t-0.575544600833662\t1.09070201543903\t0.0984300561574584\t2\tother\n97\t97\t0.175108204140961\t0.0423557034039549\t0.830270964031336\t1\trural\n98\t98\t0.573709971323814\t-0.986655769988478\t-0.971784839773789\t1\trural\n99\t99\t-1.42869342105159\t0.78872631854699\t-1.32537307288953\t2\turban\n100\t100\t-0.606025357733767\t0.501351903678155\t-0.154203629882223\t1\tother\n101\t101\t-1.45292711932486\t0.443502488353178\t-1.13128315823256\t2\trural\n102\t102\t0.541792990472648\t-1.60372007316255\t1.15784127932312\t1\tother\n103\t103\t-0.220676004523668\t-0.494421188316861\t1.36807060346171\t1\turban\n104\t104\t-1.6615260282383\t0.159178498429531\t1.83335537436144\t1\trural\n105\t105\t-0.0830454257821562\t-0.447210910574869\t0.718339375016038\t1\turban\n106\t106\t0.522189914991477\t0.9967269236725\t0.449048331266212\t2\tother\n107\t107\t-1.13210863595693\t0.479113626661238\t-1.56296622564352\t2\trural\n108\t108\t-0.36380704330813\t-1.7603273967847\t-1.77651590916735\t1\trural\n109\t109\t-1.68813107977496\t1.16438431893709\t2.03822965391838\t2\trural\n110\t110\t1.58554022854969\t-0.00724971621805309\t-1.11900650247014\t1\turban\n111\t111\t-0.998291492606998\t0.00462754240275728\t1.88127607035992\t1\tother\n112\t112\t0.223497419145838\t-1.32110280636091\t0.524684732289228\t2\turban\n113\t113\t0.573627223018489\t0.311650995532555\t-0.784376215211658\t2\tother\n114\t114\t-1.61446325699769\t1.08295716575695\t1.79168179051707\t2\turban\n115\t115\t1.44890500897322\t0.0976050676871842\t0.413386003360211\t1\tother\n116\t116\t-1.33405141338608\t0.784867755549606\t0.454339460511901\t1\turban\n117\t117\t1.0281760163777\t-0.944176657853787\t-0.849636093259524\t2\turban\n118\t118\t0.442700323648412\t1.00456272654013\t-0.443199964334976\t2\tother\n119\t119\t1.01841904624174\t0.268656649347677\t1.33990112172931\t1\tother\n120\t120\t0.382476233707342\t-0.133406454088862\t-1.05408793400058\t2\tother\n121\t121\t-0.0606854651306287\t-0.311512521960318\t0.605184001519958\t2\tother\n122\t122\t-0.639314792216002\t-1.51802669112881\t-0.236607587664175\t1\tother\n123\t123\t-0.991173875599332\t-0.321572448363572\t-0.236993428622127\t1\turban\n124\t124\t0.895207300368696\t0.245650549532927\t1.34382738721527\t2\turban\n125\t125\t0.0111936474428609\t0.380086793801876\t1.81260457198851\t1\tother\n126\t126\t-0.38738113615745\t-0.491841324400474\t-1.01039561599705\t1\tother\n127\t127\t-0.447750196400813\t0.0866189943446268\t-0.567089508145962\t2\tother\n128\t128\t-1.48635114845613\t-1.07099393611687\t-0.830055469382584\t1\trural\n129\t129\t0.430022245376917\t-0.00469201513204358\t-0.46713689804785\t2\trural\n130\t130\t0.782370060240196\t-1.15989373062202\t-1.17827870092203\t2\turban\n131\t131\t0.136338134070754\t1.71167953754715\t1.48059989978744\t1\turban\n132\t132\t-0.431976638941024\t-0.578190737622724\t0.425291746107116\t2\turban\n133\t133\t1.40246708059836\t0.502745109733989\t-0.944177272188536\t2\turban\n134\t134\t0.720069270112552\t-0.935145947387637\t0.443871660569289\t2\tother\n135\t135\t-0.31657492076431\t-0.409901081300051\t-0.145753345426725\t2\trural\n136\t136\t0.11060517957052\t-1.04605134276557\t-1.12208334105097\t1\turban\n137\t137\t0.346605932663756\t0.770750900962366\t-0.2274781373841\t2\tother\n138\t138\t-0.199304950037624\t1.51815799081416\t1.46398209533402\t2\trural\n139\t139\t-0.252212740769667\t1.66993552357908\t0.306825359307706\t2\turban\n140\t140\t-0.888389791747243\t0.232247159480735\t0.499389375456319\t2\trural\n141\t141\t-0.890347761887952\t-0.728192084856763\t0.551271266403087\t1\turban\n142\t142\t0.152029144559496\t2.23859123192986\t0.274484191198587\t1\tother\n143\t143\t-0.0701403417878827\t0.927860684923288\t0.556779551740602\t2\turban\n144\t144\t0.639338644011505\t0.149843448572823\t-0.418168329086757\t2\trural\n145\t145\t1.35683552548457\t0.71545062816494\t0.480718819473869\t2\tother\n146\t146\t-0.0671855408009469\t-1.00497960305324\t-0.591564299670578\t2\turban\n147\t147\t-0.0417901194712151\t-0.740058558181522\t-0.880215881028116\t1\turban\n148\t148\t0.208014563936714\t0.310692176853491\t0.336489601755259\t2\trural\n149\t149\t0.974528118011505\t0.700687451289159\t-1.96272108587345\t2\turban\n150\t150\t0.434609242618411\t1.06890916681593\t-0.75356705741942\t2\tother\n151\t151\t-2.02537341898317\t-0.188261566304373\t0.471200182938936\t1\turban\n152\t152\t0.860627995182198\t-0.720148339360642\t-0.770997708738759\t1\turban\n153\t153\t-0.9230520186645\t1.51275275292047\t2.05561908754561\t1\turban\n154\t154\t-2.89482526571973\t0.0142176828942394\t-0.673661017251696\t1\turban\n155\t155\t2.14764230980825\t1.11836182023814\t0.561414328616239\t2\turban\n156\t156\t0.191691835549594\t1.69954007643337\t1.10060477315528\t1\trural\n157\t157\t1.93391937196771\t-2.14759361064734\t0.103062267210891\t1\tother\n158\t158\t-1.00666688093144\t0.27389584173868\t1.25113180056298\t1\turban\n159\t159\t0.867271912923313\t0.550421539001593\t-0.606209597163998\t2\trural\n160\t160\t0.239744640961947\t0.871455671502197\t-1.33433166716954\t2\trural\n161\t161\t-1.16673162601532\t-0.923729049541734\t0.882931371107068\t1\turban\n162\t162\t1.19623249643198\t0.267536839445432\t0.140965625037645\t2\trural\n163\t163\t-0.606314083879166\t0.83108108713256\t2.24244959411634\t1\turban\n164\t164\t0.627891419343745\t0.316909753958488\t-0.0892333697150752\t2\tother\n165\t165\t0.681239498389416\t0.0685010764996153\t1.60785602940155\t1\turban\n166\t166\t-0.137622766408917\t-0.219358052457248\t0.0236094358672588\t1\turban\n167\t167\t1.65738740105425\t-0.168396179630893\t-0.391093556590735\t1\tother\n168\t168\t-0.0532183341365486\t-0.0971607395158495\t-0.185753106858513\t2\tother\n169\t169\t1.33325582492572\t0.04748966314378\t0.930597282445587\t2\tother\n170\t170\t0.503095045815024\t0.620836572358743\t1.32907834680476\t1\tother\n171\t171\t-0.833086643323573\t1.57378927452153\t0.282482508773599\t1\turban\n172\t172\t0.263414370073657\t1.46655819123308\t-0.440357980899672\t2\tother\n173\t173\t-3.01149418318528\t-0.758603658576047\t1.29712100326943\t2\tother\n174\t174\t0.775355123038991\t-0.345255487752741\t-0.432940728990831\t1\tother\n175\t175\t-0.736899573490554\t0.475328905667524\t-0.486614757521328\t2\trural\n176\t176\t-0.705382244908399\t-0.317658781371191\t-0.950665118202799\t1\trural\n177\t177\t-1.10205074755081\t1.04709464276291\t0.358444545273432\t1\turban\n178\t178\t0.291626211880807\t-2.24414390554635\t-0.6114433881961\t1\turban\n179\t179\t-0.616258050186511\t-0.213352918058229\t-0.206848220065374\t1\turban\n180\t180\t-0.258221445691404\t1.13323440192968\t-0.520510426658636\t2\tother\n181\t181\t1.0173814591059\t0.335230742824172\t1.80773614044068\t1\tother\n182\t182\t-0.754582244072964\t0.558863580112927\t2.22272421500294\t2\trural\n183\t183\t0.962254883537811\t0.952711758939796\t0.26257629591948\t1\trural\n184\t184\t1.32361093350711\t0.726199653116666\t-1.20705098917927\t2\trural\n185\t185\t0.451636911802343\t1.05241988015365\t-0.704114083257637\t2\trural\n186\t186\t1.02960288576369\t-0.915563518318848\t0.224463903618335\t1\turban\n187\t187\t0.215069384601449\t1.58967716981485\t-2.36256657114121\t2\turban\n188\t188\t0.857706091343454\t0.469806032976427\t0.218452229983434\t2\turban\n189\t189\t0.9829607592393\t-0.0244241933878937\t-0.425470731739842\t2\trural\n190\t190\t0.113277626559119\t0.0683882014999158\t-0.277787951143784\t1\tother\n191\t191\t-0.244180929885842\t-1.82173257463395\t0.394327358484765\t1\turban\n192\t192\t0.705378901432609\t-0.442735603414705\t-0.723030937642743\t1\trural\n193\t193\t1.15231800216468\t0.594526314383486\t1.49937846443481\t1\tother\n194\t194\t-0.0824836561278376\t0.502688722215784\t1.02692569356233\t2\turban\n195\t195\t-0.51533276030488\t-1.111301710122\t-1.00163288726315\t2\turban\n196\t196\t0.352508349429745\t-1.58333473521584\t-0.321749790049319\t1\trural\n197\t197\t0.509737117278231\t1.89680480039478\t-0.415803626548454\t1\trural\n198\t198\t0.408429190860523\t-0.283375339941657\t-1.7310558218924\t1\trural\n199\t199\t2.63318259159922\t-1.37220135481674\t-1.07011609149891\t2\turban\n200\t200\t-0.300070178621705\t0.116053618430679\t-0.128957573780873\t2\trural\n201\t201\t0.0618158070410208\t-0.257104455391491\t0.700960192076722\t2\trural\n202\t202\t1.1967442135137\t-2.53945571436358\t-1.18290930991659\t1\turban\n203\t203\t-0.390733348779326\t-0.565154112561019\t1.54609780274072\t2\tother\n204\t204\t-0.635434530912397\t-0.783691703875507\t-0.241253478697975\t2\turban\n205\t205\t0.453171623475049\t-0.963323712166816\t-0.17591279169021\t2\tother\n206\t206\t0.497137673599994\t-0.298292757497476\t1.52472484723303\t2\tother\n207\t207\t0.983044864096795\t-0.182609328743605\t1.08285068777089\t1\tother\n208\t208\t0.121863300305586\t-0.785472105457895\t-0.024584067857987\t2\turban\n209\t209\t0.238155363079482\t0.0801843336257735\t0.699724938898343\t1\trural\n210\t210\t-1.54623884763307\t0.583293080796527\t-1.01858438189806\t1\tother\n211\t211\t-0.968055736721857\t-0.638630086897045\t1.40318747864031\t1\trural\n212\t212\t-0.00349665688432686\t1.414451490428\t-1.73762899052836\t2\trural\n213\t213\t-0.00457607386072327\t-0.525635302551504\t-1.45373867624621\t1\turban\n214\t214\t-0.366134409356681\t-0.666235621241421\t-0.505336833078742\t1\tother\n215\t215\t1.89024923544878\t0.741764948637318\t-1.0155203395098\t2\trural\n216\t216\t-0.918308636321609\t-0.0868765217265333\t-0.509826529424641\t2\tother\n217\t217\t-0.591074858895521\t0.566777955769371\t0.99434556045379\t1\tother\n218\t218\t2.25670365684985\t-0.315748293528222\t0.365477681343191\t2\tother\n219\t219\t-1.57233822739218\t-0.850792763875279\t0.904498367464702\t1\turban\n220\t220\t-0.0682314485516372\t-0.795866178677262\t1.66930347041193\t1\tother\n221\t221\t-0.533686551757233\t-0.465695098147421\t-0.981684351830436\t2\trural\n222\t222\t-0.650744878298164\t0.715410607037681\t0.815096345271332\t1\tother\n223\t223\t-1.73483240099147\t1.15510538447811\t0.744813039979084\t1\turban\n224\t224\t1.08722268180171\t1.19413693413092\t-0.240273728814771\t2\turban\n225\t225\t-0.195458351526377\t-1.12708948860974\t0.0483699010155095\t1\trural\n226\t226\t-0.377051472458811\t-0.762275563055573\t0.738733077024699\t1\turban\n227\t227\t-0.18142884904785\t1.08723994079201\t0.8524808816003\t1\trural\n228\t228\t-1.08408259984194\t-0.11623045844534\t0.0936085321037543\t2\tother\n229\t229\t0.980923357579053\t-1.36753072430618\t0.501720860018512\t2\tother\n230\t230\t-1.41202933071188\t0.0433877791285629\t-0.0803230747661825\t1\turban\n231\t231\t-0.629131853656102\t-0.549328075703398\t1.62922107570998\t1\tother\n232\t232\t1.70458554314851\t-0.127408146304953\t-0.247666680166544\t1\trural\n233\t233\t0.0114661310717726\t-1.14727853739666\t0.601845776132469\t1\turban\n234\t234\t0.0055302906322214\t0.548837223017019\t0.998310060979668\t1\tother\n235\t235\t-1.47169674493235\t-2.51801462427645\t-0.565532684958423\t2\tother\n236\t236\t0.375248849183357\t1.88760055391672\t-0.256481599828999\t1\turban\n237\t237\t0.264296911165029\t-1.33259359592374\t0.0487652939597159\t1\tother\n238\t238\t0.17719004528226\t-0.193701321174802\t2.39465797007541\t1\tother\n239\t239\t-0.188329921279762\t0.319697619525775\t0.00805067416711713\t2\trural\n240\t240\t0.638949836393311\t-0.175011559041567\t-0.565935078051149\t2\trural\n241\t241\t-0.521513973408463\t0.790904235650827\t0.086850161445771\t2\trural\n242\t242\t0.119178273767935\t-0.18779594966625\t-0.665865387079382\t1\turban\n243\t243\t1.48244098394491\t-1.24068304414465\t1.73031811007768\t1\tother\n244\t244\t1.68422348507766\t-1.12591550053402\t-1.06202319844172\t1\trural\n245\t245\t-0.617850744913592\t-0.719053349775352\t-0.395999153143368\t2\tother\n246\t246\t-0.962425935625463\t-1.37957764887356\t-0.935088714487197\t2\tother\n247\t247\t-0.676307216820063\t0.455639497484506\t0.988656830560121\t2\turban\n248\t248\t-0.429803260399542\t-0.95897644987832\t0.349991019844699\t2\tother\n249\t249\t-0.734494319621749\t0.503398184636999\t-0.400247221684379\t2\tother\n250\t250\t-0.989528415514397\t0.839136294267078\t0.358362266762923\t1\tother\n251\t251\t-0.297578949619101\t-0.304572573458164\t0.820639931338014\t2\turban\n252\t252\t0.0466280488595221\t-0.125534729133879\t0.753705551836252\t2\trural\n253\t253\t-0.736475374938984\t0.818502653041985\t-0.0959214566187271\t1\tother\n254\t254\t0.762687847709622\t-0.992359225839721\t-0.619136896682819\t1\tother\n255\t255\t1.96977235146495\t-1.56438387774728\t0.595399026635289\t1\tother\n256\t256\t1.44477711575254\t0.102540189204669\t0.119666699672157\t1\turban\n257\t257\t2.93644221009186\t0.132408626655896\t-0.571253063956656\t2\turban\n258\t258\t-1.64340825575722\t-1.71647380349127\t0.624636435244488\t2\turban\n259\t259\t0.199821292254365\t0.0108931677881925\t0.832449327129177\t2\trural\n260\t260\t-2.89904313683447\t-0.665617585302284\t-0.32470371115627\t2\turban\n261\t261\t0.0110238851144448\t-1.1241917371126\t-1.70605047017061\t1\turban\n262\t262\t-0.111135443892226\t-2.02513040821815\t-0.595550283069873\t2\tother\n263\t263\t0.296956907479138\t-1.49235331862979\t0.674294916418284\t1\trural\n264\t264\t-1.58806982701449\t-0.87293526942144\t0.618676976339737\t2\turban\n265\t265\t-1.20599790707864\t-0.668896146097892\t-0.285472859290853\t1\turban\n266\t266\t-0.984446076989712\t0.405512286601059\t-0.992812408330569\t1\turban\n267\t267\t0.873981222118058\t0.130636382839266\t0.223138783736783\t2\turban\n268\t268\t1.15137901211244\t0.269341912478004\t-0.370276060801822\t2\turban\n269\t269\t-0.237522073285531\t-1.16941542159274\t-0.292375309742259\t1\turban\n270\t270\t0.521451560069117\t0.229210872300789\t0.117145303707847\t1\tother\n271\t271\t-0.721109911792939\t0.973353505683337\t0.725840321699984\t1\tother\n272\t272\t-1.71579509456485\t0.331053754108528\t-0.309686238454332\t2\tother\n273\t273\t1.41631965677878\t0.40590149419227\t-0.776581169847057\t1\turban\n274\t274\t-1.26764883299582\t0.602765703689368\t0.20167773869961\t1\trural\n275\t275\t0.350036194906335\t0.944979275948657\t-2.23712574677926\t2\turban\n276\t276\t0.0841339962579271\t1.07519734277879\t-0.674417825943384\t2\trural\n277\t277\t-0.38880461324062\t1.68072282441877\t-1.55548606060339\t1\trural\n278\t278\t0.712520575225716\t-0.783525953643761\t-0.0692911345356956\t1\trural\n279\t279\t1.37756660256157\t-1.09914891659094\t-0.112098933740495\t2\trural\n280\t280\t-2.09233520668221\t0.122388401239843\t0.0745129102324621\t1\tother\n281\t281\t0.0444170182142923\t0.789377590728428\t0.781404555410717\t2\trural\n282\t282\t-1.04676495729127\t0.560383614998423\t-0.620929998169237\t1\turban\n283\t283\t-0.464676156313911\t0.78832301597018\t0.933006648642985\t1\turban\n284\t284\t1.57997933674385\t0.808076546805481\t0.620128179317599\t2\turban\n285\t285\t-0.062235704507387\t-1.1873901649143\t0.233185939981184\t2\trural\n286\t286\t0.64560171538111\t1.19921535710876\t-0.360792071066905\t1\trural\n287\t287\t1.7600404868672\t0.86971773121211\t-1.1225428057822\t1\trural\n288\t288\t-1.73949071882268\t0.70280563800663\t-0.514981312838415\t1\trural\n289\t289\t1.65736411683642\t-1.1437399326179\t-1.3031282156944\t2\tother\n290\t290\t-0.296467972076117\t0.444850043146052\t1.20300225757892\t1\turban\n291\t291\t1.82460367451811\t-1.94034273153412\t0.155062051356154\t2\tother\n292\t292\t-0.217403205034682\t-2.29536071176852\t1.44441176711928\t2\turban\n293\t293\t-1.49109063118087\t-0.952864885483231\t-0.648197148440543\t1\trural\n294\t294\t0.417681616508874\t-1.1291558960589\t-0.627291915158984\t1\tother\n295\t295\t-0.335530289243836\t-1.56795725710696\t-2.19360085121039\t2\turban\n296\t296\t0.562029616139559\t-1.00515927301376\t0.471217907177394\t1\trural\n297\t297\t1.10716329206117\t0.275315611994426\t0.726923493424635\t2\tother\n298\t298\t-0.376045966162074\t-0.545860364876667\t0.695162762922577\t2\trural\n299\t299\t-1.01169881487892\t0.421285842494731\t-0.127456459580007\t2\tother\n300\t300\t-0.233735457417624\t0.234001376530343\t-0.279210500761352\t1\tother\n301\t301\t-0.191041983162951\t-1.26656674240632\t0.268176102391084\t1\turban\n302\t302\t-2.14079033007813\t-0.661582208240859\t-1.45671616402897\t1\tother\n303\t303\t0.171697377948325\t1.19790586667098\t-0.303744643932396\t2\tother\n304\t304\t-0.51178438806716\t-0.370359287706688\t0.492310733041444\t2\turban\n305\t305\t-2.1521556116937\t0.211447694327415\t1.13994230933885\t1\turban\n306\t306\t1.78336611406654\t1.81258706009664\t2.21509213437292\t1\turban\n307\t307\t-1.66459000979854\t1.05607813943896\t1.01618343377454\t2\tother\n308\t308\t-1.23831539739212\t0.174125333480291\t-0.211814661895119\t2\turban\n309\t309\t0.748196335956892\t1.31893965065117\t-0.811219973030862\t2\tother\n310\t310\t0.288925126152792\t-0.790047372466064\t0.774907436039824\t1\tother\n311\t311\t0.218752902211661\t-0.295691693617648\t-0.369586168708846\t1\turban\n312\t312\t-1.31711372445576\t-0.382572814059577\t-0.698393008336702\t1\trural\n313\t313\t0.746165575759412\t-1.10415473494955\t0.926750246438677\t1\trural\n314\t314\t0.558474345090657\t0.46862507716215\t-1.33073599449416\t1\trural\n315\t315\t-0.949839805579198\t1.45613405551077\t-0.171630169552659\t2\turban\n316\t316\t-1.00348096735362\t0.0952170340869715\t-0.20894959044786\t1\tother\n317\t317\t1.06190967772141\t0.938709503535816\t-0.191898660438186\t2\tother\n318\t318\t-0.203767226505989\t1.45446480582589\t-0.266331940952407\t2\turban\n319\t319\t0.514968594844155\t0.474162886855075\t1.3500815223921\t1\tother\n320\t320\t-0.876709951973288\t-0.64595588629736\t-0.354804926698781\t2\trural\n321\t321\t1.05242687576023\t0.11435781464134\t-1.2750296485476\t2\trural\n322\t322\t-1.01289579517596\t-0.0270744081097838\t-0.974583689186217\t2\turban\n323\t323\t0.628597759786413\t0.871057057558597\t0.878410823840972\t1\turban\n324\t324\t-1.18536815436662\t0.237656085500654\t-0.0974493631407294\t2\turban\n325\t325\t-0.263263415095149\t-1.44171449442434\t-0.308858102387813\t1\trural\n326\t326\t2.187931429856\t-0.0448832133939714\t-0.0916768117939348\t1\trural\n327\t327\t0.721356341862245\t1.92071654161163\t-0.953374022528519\t2\tother\n328\t328\t-0.848363397809302\t-0.917608912846968\t0.552862656060789\t1\turban\n329\t329\t-0.988155592500341\t-0.462056013220845\t-1.00920674215424\t1\trural\n330\t330\t-1.94058845096277\t-0.856316424989464\t-1.0908984822071\t1\trural\n331\t331\t-0.0505232383727004\t0.531545092071155\t-0.00176212129430811\t1\trural\n332\t332\t-0.145093288027965\t-0.726857837930715\t0.00291095009469781\t1\tother\n333\t333\t-0.619765511690178\t0.717145096834534\t0.405405931529327\t2\tother\n334\t334\t0.764451579068619\t1.47070599637375\t-0.669689678089283\t1\turban\n335\t335\t0.183358257558995\t-0.0831282511368581\t0.433822596451396\t2\tother\n336\t336\t1.01683887092612\t-0.486724368684249\t-0.00835953903736881\t2\tother\n337\t337\t-0.231482025850402\t-1.04112143594274\t-0.312872327654019\t2\tother\n338\t338\t0.125226073338694\t-0.612502253575669\t1.9613718864005\t2\trural\n339\t339\t-1.5853989455743\t-0.0332048306778363\t-0.809711201156886\t1\tother\n340\t340\t-0.270814906273368\t1.15150627999799\t-1.31156103028854\t1\turban\n341\t341\t-0.327455226114505\t-0.299027016341994\t1.15257784935637\t2\trural\n342\t342\t2.29830643498446\t2.01125132335651\t-0.248521329194713\t1\tother\n343\t343\t0.00699298749237794\t-0.256989059952088\t2.39698946550435\t2\trural\n344\t344\t-0.840138408391043\t1.4845727187796\t-0.581505185126562\t2\turban\n345\t345\t0.299514178506534\t-1.49891507259923\t-1.79871610742881\t1\turban\n346\t346\t0.686550825661833\t-0.528289957966227\t1.48029842182583\t2\tother\n347\t347\t-0.45001370605832\t0.684382356816486\t-0.55071117530745\t2\turban\n348\t348\t-1.31452129325411\t0.552333014970822\t-0.955732804108503\t1\turban\n349\t349\t-0.889682558174095\t-3.75500098361948\t0.951415382296232\t1\trural\n350\t350\t0.127431832080324\t-0.632802305886436\t-0.50627913852573\t2\tother\n351\t351\t-0.112048132821479\t-0.503652039649308\t0.658186027591559\t2\tother\n352\t352\t0.735555560160039\t-0.0103500057568764\t-0.288778640557989\t1\trural\n353\t353\t0.0770376641676405\t0.26837203940238\t0.139251213851349\t2\trural\n354\t354\t-1.99258172213664\t-0.145854539283609\t-0.937226004051109\t1\turban\n355\t355\t-1.4921334606253\t0.0760290066359275\t-0.638203445360267\t2\tother\n356\t356\t-0.363076454646316\t0.341697577899652\t-1.66226713422437\t1\trural\n357\t357\t0.187916092171361\t-1.52064147800193\t-0.363134323290455\t2\turban\n358\t358\t-0.676067238311435\t-1.41454450811721\t-0.360419845512291\t2\turban\n359\t359\t-1.01818400664782\t2.70689103153848\t-0.736411170974868\t2\turban\n360\t360\t0.643991543128254\t-0.84244496364137\t0.00437956636730414\t2\tother\n361\t361\t0.845439889290235\t-0.656289941005438\t-0.194854955864464\t1\turban\n362\t362\t2.17243394080198\t1.499611543096\t0.502358577899669\t1\tother\n363\t363\t0.478976866575967\t-1.17700453542394\t-0.617667076190438\t1\turban\n364\t364\t-2.40615948097953\t-1.81461389360954\t0.802839788167135\t2\trural\n365\t365\t-0.706072441494411\t2.02228197750379\t-0.393308607676681\t1\turban\n366\t366\t-0.0805784158116847\t0.716674158295844\t-1.20246006637217\t2\trural\n367\t367\t-0.732597151075011\t0.415601284249849\t0.597589404816867\t1\tother\n368\t368\t0.667621992718285\t1.23843683950396\t-0.418220413620227\t1\trural\n369\t369\t0.443331659603023\t-1.78033791585069\t1.64559431645404\t1\trural\n370\t370\t-1.49759006993034\t-1.1661095878874\t1.38886171190481\t1\turban\n371\t371\t0.635231718115305\t-0.570508750553901\t0.270193218342475\t1\turban\n372\t372\t0.795574536325585\t1.38952054318901\t-0.205021901201099\t2\trural\n373\t373\t0.56175250763996\t0.368582315379003\t0.299124792547437\t2\trural\n374\t374\t-0.00544817501613968\t1.47285432322312\t-0.175075944959192\t2\tother\n375\t375\t-1.69155549579082\t-0.522545241727092\t1.35619066795485\t2\tother\n376\t376\t-0.338280840627237\t0.64061716359716\t0.495058300057172\t2\trural\n377\t377\t-1.84062788360435\t-0.355930890969487\t-1.47395465638291\t2\tother\n378\t378\t-0.397429042886146\t-0.30562148009681\t0.120362124409584\t1\tother\n379\t379\t-1.4104795179153\t0.152843003542133\t1.6088215156232\t1\turban\n380\t380\t-0.178517695940274\t0.293126647080975\t1.04376969864201\t2\turban\n381\t381\t-0.948112187155606\t-0.104263127842352\t2.01674968189082\t2\tother\n382\t382\t-1.10574447253708\t-2.08945842089725\t1.25622755105695\t1\tother\n383\t383\t-0.979730638517688\t0.0244491496356701\t0.0131309866640956\t1\turban\n384\t384\t-0.673906243517\t-0.531445823842769\t-0.790683740625223\t2\tother\n385\t385\t-1.13364545455527\t-0.404537314875365\t0.155635186040039\t1\trural\n386\t386\t-0.544213920615126\t0.0943483987549832\t-0.742451229248762\t2\turban\n387\t387\t0.10566817085088\t-0.228181117759124\t-0.221996955410298\t1\tother\n388\t388\t1.08989772734619\t1.07364626904529\t0.762173293406158\t1\turban\n389\t389\t1.40469250953944\t-0.850683420122761\t-0.58653583423649\t1\turban\n390\t390\t0.949585868985058\t1.03517537412855\t-1.87918662807997\t2\trural\n391\t391\t-0.286124446529293\t0.865896478013803\t-0.807980429679611\t1\trural\n392\t392\t2.41071373910191\t-1.40414958026209\t-1.43279514328709\t2\trural\n393\t393\t-1.17629915685666\t1.22302514103282\t-1.64891740853755\t2\turban\n394\t394\t-1.0735260519271\t-0.181197904670042\t1.8538972843443\t1\turban\n395\t395\t-0.249694418967338\t0.568791794274843\t0.799644391373855\t1\turban\n396\t396\t0.53313024140876\t0.823257011438389\t0.324712688536363\t2\turban\n397\t397\t-0.550317144410589\t0.0436005278514163\t0.207942449005962\t1\tother\n398\t398\t-0.155761262486867\t-1.14990959168668\t-0.764908731593276\t2\turban\n399\t399\t0.773772943927362\t1.81975886909259\t0.429061628933955\t2\tother\n400\t400\t0.674723374180998\t0.877810943242963\t0.155783475742268\t2\turban\n401\t401\t0.416502491862716\t0.371382546084973\t0.633832727114365\t2\turban\n402\t402\t0.936820455308487\t0.374779777022824\t-1.16031064831011\t1\turban\n403\t403\t0.138571094768104\t1.00722231541159\t1.94388506551436\t2\trural\n404\t404\t1.01977424998782\t0.934263379091051\t-0.190841598063365\t1\tother\n405\t405\t-0.515549291460727\t-0.761080273866144\t1.49489860185877\t2\tother\n406\t406\t1.57382617280842\t-0.6285950389952\t0.221592151733597\t2\turban\n407\t407\t-0.284188972855081\t1.31926052837367\t1.20558354387342\t2\tother\n408\t408\t-0.492467477098651\t0.304497413279957\t-1.30021219352541\t2\turban\n409\t409\t-1.55223859906654\t-0.169894465482847\t1.73532648540805\t1\tother\n410\t410\t-1.04365171199321\t-1.07757830499426\t0.402542715177659\t2\turban\n411\t411\t0.337313503023761\t0.0199790004416643\t-1.01955597162537\t2\tother\n412\t412\t0.567261737834834\t0.343272499619591\t0.783825957039741\t2\trural\n413\t413\t1.33158903264638\t-1.01907899097234\t0.98033162542721\t2\trural\n414\t414\t-0.943621151033741\t0.511887198257892\t1.95695832761542\t1\turban\n415\t415\t1.29204107155955\t0.685314763403991\t1.35338962435436\t1\turban\n416\t416\t1.11776473650734\t-0.216629079829078\t0.718584038343791\t1\trural\n417\t417\t0.285225183001854\t0.871964787681798\t1.08138246909397\t2\trural\n418\t418\t0.148853156447729\t-0.418646580112241\t-0.00875510873997657\t1\trural\n419\t419\t0.9634192645878\t2.06024524083206\t1.4282865686655\t2\tother\n420\t420\t0.6982079781091\t-0.255827825174668\t-0.801982390755466\t1\tother\n421\t421\t0.101877872863502\t0.337286115522036\t0.142716608348129\t2\trural\n422\t422\t-0.13514834258085\t-0.343489389055775\t-0.758550307258719\t2\turban\n423\t423\t0.780193790567571\t-0.262196400577283\t0.222568348036814\t2\trural\n424\t424\t0.521884516147666\t-0.417526584412931\t-0.906475641944693\t2\tother\n425\t425\t-0.296271249700639\t0.464767162492862\t0.573146275298587\t1\tother\n426\t426\t2.12583517147699\t-1.58032505698937\t-0.380971930202218\t1\tother\n427\t427\t-0.164761291230633\t1.19279208925353\t0.0835927061035099\t2\trural\n428\t428\t0.14894821294202\t0.76227578842078\t0.762337668292594\t2\turban\n429\t429\t1.6625574780885\t-1.6267191199893\t-1.60935036726086\t1\tother\n430\t430\t0.153377215776703\t0.139804711923426\t-0.578691503310998\t1\tother\n431\t431\t-0.0609168353150149\t1.00578542421274\t-1.77184370913112\t2\trural\n432\t432\t1.0195152003104\t0.37007924120698\t-0.102790498134211\t1\trural\n433\t433\t0.639411740672879\t-0.0510277115266688\t-1.00291397287483\t2\tother\n434\t434\t0.320803185489027\t-1.51000030797827\t-0.5238142805057\t1\trural\n435\t435\t0.166796515737489\t-0.125374640293625\t0.0127264778716913\t2\tother\n436\t436\t-0.183655679680401\t-0.89949432185981\t-0.709872187274793\t1\trural\n437\t437\t2.71174074551991\t1.57493867587087\t-0.688704599128927\t2\trural\n438\t438\t0.30320726335083\t0.398285443256563\t0.452069614194104\t2\turban\n439\t439\t-0.412194239388631\t-1.55399453877199\t-0.0474936213927865\t1\tother\n440\t440\t-1.32210482514306\t-0.0332684613144726\t-0.564368017626722\t2\tother\n441\t441\t0.498774860942123\t0.824228609926728\t0.258177910093118\t2\trural\n442\t442\t0.21463224937359\t-2.06128512704817\t-1.93738868157208\t2\tother\n443\t443\t1.63849897616018\t1.38946467311879\t-0.782600239716082\t1\tother\n444\t444\t1.52595304052576\t-0.173539684026595\t-0.947528184549204\t1\turban\n445\t445\t0.0943440845394678\t0.048931842077725\t-0.382348837456813\t1\trural\n446\t446\t-1.45168435143579\t0.461979227971236\t0.705776365553554\t2\trural\n447\t447\t1.52072550218058\t0.726264516797679\t-0.298656274471634\t1\turban\n448\t448\t-0.323154618456321\t0.556172618285506\t-0.65020645754813\t1\turban\n449\t449\t2.28150475194183\t1.2238359582073\t-0.162320784068865\t2\tother\n450\t450\t-0.00321711228625246\t1.97248349042151\t-0.146851872836864\t2\trural\n451\t451\t-0.556550139004402\t-0.887450564229401\t0.0246591820773215\t2\turban\n452\t452\t0.870946331189198\t-1.4242008489949\t-0.228144189377569\t1\tother\n453\t453\t-0.109528854075021\t1.29851845650738\t1.21485370791291\t1\trural\n454\t454\t-0.19355636344534\t-0.416879433808446\t-1.31833494834954\t1\tother\n455\t455\t-0.411465876118703\t-0.086353157737709\t-1.88835452998271\t1\tother\n456\t456\t-2.4756239289431\t-0.567341520954236\t1.58452126894583\t1\trural\n457\t457\t0.60407680403628\t-0.104022236597136\t0.303493523505913\t2\tother\n458\t458\t0.435994231991519\t0.490238626736052\t0.465480501523705\t1\trural\n459\t459\t1.21081371343492\t-0.94108333025015\t0.0647524048924119\t2\tother\n460\t460\t-2.58974911648449\t1.28896474154759\t-1.05216239641956\t2\tother\n461\t461\t1.68527704564746\t-0.169581954989677\t-1.4370621269365\t1\trural\n462\t462\t-1.15537249457013\t2.48262760909883\t0.956932733058004\t2\trural\n463\t463\t-0.8369395055165\t0.327073379726694\t-0.0250871351453328\t2\tother\n464\t464\t-0.0246541906360803\t1.01311111432097\t-0.0406603252497205\t2\turban\n465\t465\t-0.393540020013427\t0.193263006981446\t-0.245017187295811\t1\tother\n466\t466\t0.117928972785921\t0.969861570326182\t-1.68803941979426\t1\trural\n467\t467\t-2.05095990457672\t-0.717641462001806\t-0.434291460991705\t2\tother\n468\t468\t-0.791287688998045\t0.619984113947591\t-0.191140071315513\t2\trural\n469\t469\t-0.661430923721985\t-0.0264585781045273\t-0.815567832840785\t1\tother\n470\t470\t0.110540452409726\t-1.60122797696722\t-1.47297570682647\t1\turban\n471\t471\t-1.13396161893926\t0.302603493382267\t1.17916441880041\t1\trural\n472\t472\t-0.428447500162837\t0.032378655629795\t-1.44127777219403\t2\turban\n473\t473\t1.97777565279044\t0.188174543632119\t0.496490678160897\t1\trural\n474\t474\t0.478443080714877\t-0.247911441813345\t-0.389620540233551\t2\turban\n475\t475\t1.88049273822024\t0.758524313832996\t0.815524985613147\t1\turban\n476\t476\t-0.94451472563566\t-1.46724267422952\t0.0614521434616583\t2\trural\n477\t477\t1.20862127917688\t-1.29044795160467\t0.161638205861625\t2\tother\n478\t478\t-0.426800286844427\t-2.13199751519076\t-0.889888607904783\t1\tother\n479\t479\t0.0701446752362957\t-0.814190152930211\t1.54613706801577\t2\trural\n480\t480\t0.0247771557425567\t2.21242490448656\t0.170594216971674\t2\trural\n481\t481\t-2.14479871510897\t-1.26350470899781\t-0.624638080485053\t2\tother\n482\t482\t0.219042700250454\t-0.997236453336354\t0.895137214974744\t1\trural\n483\t483\t-0.838546037838302\t-1.36990425123737\t0.392828897434324\t2\trural\n484\t484\t1.95646567445163\t-0.766893627230944\t-2.25829087824114\t1\trural\n485\t485\t-0.623694160458828\t0.0617174904526453\t0.223828879773566\t1\turban\n486\t486\t0.105750753213039\t-0.718526178019354\t1.6796384333845\t1\trural\n487\t487\t0.352984673775129\t0.034045039443356\t-0.236623000367563\t2\trural\n488\t488\t-0.86099663862995\t-0.899747742375208\t0.739197695312269\t1\trural\n489\t489\t1.90188671459261\t0.945309117844356\t0.270334048921035\t1\trural\n490\t490\t0.232188200392006\t-0.766806972425336\t1.44920475451477\t2\tother\n491\t491\t0.439708463348097\t-0.100496086063684\t1.19993066069441\t2\trural\n492\t492\t-0.470814417895129\t0.0876334399810952\t0.0778379117905729\t2\turban\n493\t493\t0.720297214674885\t1.55770018984445\t1.7692979541215\t1\trural\n494\t494\t-0.996123733624525\t0.0915562557433139\t0.846450462395275\t2\trural\n495\t495\t-0.138496948697237\t0.0402966317738195\t0.333453434403395\t1\turban\n496\t496\t0.537127981267897\t-0.0564681049010715\t0.48119562320416\t2\turban\n497\t497\t-0.0511684766636894\t1.20246232935816\t0.693052533966201\t2\turban\n498\t498\t-1.46506280238144\t1.17424105978661\t0.642855642554621\t1\turban\n499\t499\t-0.516533720439536\t-0.576053588281794\t0.161069003359382\t2\trural\n500\t500\t-1.04827117969018\t0.926623357058696\t0.86635561319519\t1\turban\n"
  },
  {
    "path": "example/example.bed",
    "content": "l\u001b\u0001+;>>ο*+?꼾;;Ͽ?룿?;ﾻ;:ώ>ʻ??ή.?㿺ﳿ;Ͼ\u000f뿋?꾪뿯믯Ͽꫳ?>?ﯪϿ>>;Ϫ2￾˿?/*/;뿿ﺺ￺?>:￿;οϫ̯򿻺￾<;:>뫾̿//˪ﻫ믮뾿/+꿫ﻯ>﫿2뿾㿯￿󿿳>(뿻:κ￯+;꿿請ﯿ믿<?ꫢʿ?<?볫뾻./ﺾ/?￮+>Ϋﳻ좻ﯯ;?:?:/;˿￺33:.?믿?쌯+;//￾>ﾾ꿋λ?כּ;>>\";>:??ξ..?/:ﯲ/򯏿:꾾﫮?ﻏ?:㻺//>뾫￾,>.ﯾ꺮몿<3<;﮿謹⿿..겿?;+ￊ;̯/?ﯻó/;;ﺪ/>>꺮/Ͽ<Ϻ;>:ϻ?ξ;?뫾?3/>/3?ﯺʾ>2믯;<+￿:>뿿?뾫쯯?;+請+*:꿾꿿<ϻ>þ?.ﺾ˸﾿ʿ.:;;.꾻+»ˮ﫿.>˿;/.ﻻϿ?<>Ͽ>/3/<˺﫾*﮿:/>ﾯ뻿σ#˿/￿?:>뿿ﯻ\u000e;\u000fꫳ磌請뿯ʾ*:/\u000fﯮ?꪿(/Ȼ?絛:￿＿??+;>?>￻﫫?莿>꺾:?ί뻾?ο>﾿ȿ?뿺3˿;;￪/;>?﾿뾾Ͽ;???ί￯ο꿾?ﻫ﮻;\u0003.?ο??껮﻾/?໾?󻿫/?ﯺ?>￺:￿*꿿˫Ͽï꯯?+ﯿ+>??;;;??*>?:꿿\n⿿츿2.;ﻯϾ:?˯*<8.￿Ͽ/.+/?﫻??ϻ8κ*꿿./ȿ謹ʯ?*/﻾>뿿뻮>*;Ϣꪺ;㮫쮳#ϲ;/ί>>ﺾϻ+￿ﾲ;⿻꿿??㺻﫯;>/<?;?//Ϻ*>?򿮌?:>볺/˾?/?￾˻꾯?/*?</\u000f?﫻:?8,ϻ뺫?>Ͽ?:??ﺻ>ξ.￻+?+뾯/ί?뾿?ﾫ<磌㾮ʫ쯪:/:먿?뺮?ϻ:󿿻*;︿+/˾뿋Ϻ.Ͽ?쾿?.￼8:;Ϻ￪.ﯯ/﮿;+.;>>Ϻ*+<Ϻ/?//ϻ뿮>>?뾣(:;?+ο?﫻ί/>/>Ͽ￻>>.뿯?.?ﯫ>+ꮯ?;?/::?/.>??뿾\nﺫ?\u000f꾯˻﻾?/*\u000bϫﯾ믳;;?꿳˸?￯,.+ﺿ뻿+뾾ﾯ?/\u000f>˿+;ϯ?.?ﯾ﫨꺿뻿￿?.뿻˺+뿾>;;﮳ￎ?>/ˮ>>?￾*?;;<˾뿯Ͼ˿Ϋﻯ?Ͼ;ϯ*+뾻/>믯:/;>κ?\u000b꾻￿?>?;￻ﻯ?/謹*+?Ⱥ?:￿뻮;꾿;;ʪ뫮ﾻϿ?ꮿ:<ο?>:꯿?.3?>ﾏ>˿?;\b﮻2꺺;>;ﺾ>˪ﻈ?Ͽ꿳?,?\u000e;*볻.?뻪/￿/?,?ϲ믾뿺?;>;캿:;:﾿??;>˫돺￯?迫??Ͽ￼컿κ;ﯫ﾿˯ϋʻ?뿿?<?>ﯾ?Ͽʼ/\u000f?>*﫾뿿Ͽ??뿫+>?󻿯˿?*﻿/￮Ͽ/ϯ>ﯾ>;+>?﫻;Ｂ+3>?/꺿/ϻ>+ο..ê?::<?ﮮ￯̿?\u000b<?뻮/??>\u000f꿿￻?Ϫ:˾ﯮ/>>,;\u000bϫﺮξ﮿?;ﺮ:;/:;軾볿?//?￯;;</뿿?.?ﻺ??꫺󫻻?/.\"ο+??ﻸ<;:ϾϾϺ?>ο﫼>Ͽ㺯ﯮ￯돻﻿ꮾ룮???3>:?8ﾫ:⾣:˯;>?><+>;;?ﾯ+ʿ>꿯̪뺏ﯿ﮻Ͼ+2?뾪\"?꯾?3?Ͽ>?龜\"*>?/?Ͽ;/>(..뿿ﺃ+?+￿?￿::?>ﺬ</ﾻﾮ?λﻼ>*ꪮ?3⯫?﾿믻#?￾?;>/?꿯/;+.﫾<<ﾻ뿨;>\u000e뻫:⬻˺/켫>;￫ξ++輯￯﫾ﺫ?;˿>;ﯾ?.?﾿>>*뻻﮻ȼ>￿󻿻?;.￺꿫ﻮ￿>꿾>?/ﻻﻻ?Ϻ뾫κ:￯뿳.ﺯ\u000bϺ>\n\u000fλ+꿪Ⱦﾮ꿪Ϋ?/>?￮﫿?.?/:?ꯪϺ辿2?;+>+*>ﻻ??>>..˿*민ϻﯻﯿ>;>/뿯ϫﯪÿ?>򻮫.뫮><+>ﾎ꬏;<+/?ﺯ?뮮ϯ#;>+Ͽ/Ͼ/,뺿:뻿Ͽ/.:돳??뮺:?ꪺ㳿ﯻ>ﯻ?;ȯﯿ/?3.￿ʮ+*γ/ꎿ￾￫?>㯻?.?\u000f>?Ϯ΋￿뿯>?򿻾ˮ迿/\u000f+ϯ?.\u000e?뿿󯯮;;.>;?\u000b3ﺿ:>뾻;꾊.//κ?<뺯ﮊ+.Ϻ?﯋;ί//+뾻/:˸??Ϫ>>??ϯϮ˾>꿫??￾??>??˾\":*;뫺뿻Ϻ>/˿/ﺪﯻ뾿;ʿϪ㮪.?뺾;*>.>Ϯ>쾮믿//?,+#*ʾ>ﻺ:*\u000f:뢮.꿲?꺯;¯\u000bϳ+?﻿믻㻯.??︿ȯϿξ>2￯?+??\u000f?ϯ/?:<?﻾￯>>;*?;뫾?ﻫ/?..?.?ﾻ뿿>ʫ쿮,\u000fȿ*??￿뾿>;΋ϯ>>>믫#:￯ﺼ/:\"볿?;쿻>>>쳺>?>+*/ﾻ뾻>Ͽ뼯̢ﺾ?￫*>?\u000f뻫?諭.Ͽ?˿?Ϻ>+/￿?\u000bꫪ>;⿻8/뿾??뾿쪾;꿬//+.ϯ:Ϯ˿>ʺϪꮻ볾.Ͽ?+;/껮￼\u000f뺼/꿎;˿/꿳ÿ+>ﯫ+볿;8??????3;\u000bꪮ(ʻ:ϾϮ>>//﮲?/뿻?:?꿲?>/;/.?˻+ﾺ/?;:꯿돻??誾;;?/λίﺿ>;믯?㾻>ϻ?﻿/::?ꫯ꿫\"?λήϮ;\u000f:ﾻ.?;?:;+?˿.ϯ/+.诿좾￸௯/?㯾/;;/?ί.>꿿ﯯ??ﾫ:?ﾮ磌꿳??/>/ﾺϻ..><>>/꺳뻯>>??ﯿ뎿;3ϋʿ?3;?/Ͽ2ï/ﺾ?㿿?;>￿ή2:;?ϻ￿3?¾:?￿;󿯢מּ달?/ﺿ:3ϲ;*+*;ﯫ˾#>뾮3˫?￮:>;/.뿾?㻯ﻻ?>?;;;?ﻻ??Ϊ?ʾ>?꿾뾯:Ͽ?:˿묢뾯?>\u000f/Ͽ/ﯿ뮫ﮫ?ϻ;믺ﺪ8?Ͽ?Ψ>/>?2??î+󼺾;ﯺ?몺Ｋ?>ﾻʪ?껾??뿮Ͽ뫯\u000f?////8:?:??>꯮屮.><ʺ?Ͼ;*.뾻;/;>:/<Ͽ?ϫ*?뿻絛﫮>(/?ﯾ>ﾯο:?;;?直ﺻ㾯>˫.(򾸾뫿:뿯껫﮾뿿ﺾ믾>˻\"ξ?<\n￫>/3.>??ﯺ?>/*ΫήϿ?Ͼ?>뿾Ϯ/ϻ컯꫿뿺>??ϻ??⻸>>>ﯿ;ʻ?˾;\nϫ<뿿뼮ϯʪ/?㿮*Ͽﯯ몿;ϻ?;?￯+ꮯ뻻;㯿;+ﾾ몮;Ͼ;뮻?/>?>/Ͽÿ+?뼿ꮻϿﾻ:/+??+?*\u000e>⾻:뿿.//꾺꿾﬿/???뿾;ˮ?;:쿯\u000f*ϫ﫪󯿫:뻾.ϫ/?￯/￾?Ϋ./;/+;+/ﯳ/ﾾ̪+뿿﾿>*.뾿???+￻:꺯;.*:꯿;?;辯;;>ￊﯿ/ﾻ<諸>﮻;>ίﯲ.˿Ͽ뾯뻾?8ʻ﮻ʯ???;￿>￮벰뫯>;;אָ/￿;ﺯ?>˿﫯?\u000b?ϯ.?ﲿ;:??뿿:3/?/;(./ʿ;>󿾾˿+;>*>>ʿϏ\n뾻꿫ﾯëο˾??:/#?ﮯ8˯?/ＯϪﺯ>?3:/;.;/;?/ﲯ:΋뫬?>?꯿/?/+/뿿?ˮ;/;<?>돪*Ͼ뫻/몿ϫ;;?뻿?*Ͽώ*˾;￿.??<ʻ\u000f/ήﺻ뿾\u000bￏ?ϻ뾮?;믿￾Ͼ3>?ﯺ3>?﾿?>ﾼ뻻/̲??﮻\u000eξ￬/;+*>?˾￾˫뾿ﺬʯ3ﾼ;*??ﯿ;ﾏ?*;ﯿ˿˯8..>ﯿ/?*>ﺿ?:諭??/ή/?<?﫲??\";>??3>;쮻>Ϻ￼+;+벪﫮?>?>ﻺ.?λﻺ?>￺:뾻:>/?꾾ﮬϯ?>8.;Ϯ˾뻿κ/ϳ\u000b;?Ϋ믻?ϣ;/,;￾?:?:믯??/￪﻿?>???>诫?Ｋ#3./.8ﯻ:>뿾뾿ϻ￻,﾿￿(:Ͽ??/︿迿;ʫ뻻ÿ?컫/ʾ뫯絛/?⿫3?ﻼ*ʏ+<ﾯ;*\u000f>ʋ;/﮺/﮻/>墨뾺﮿:뿮룿+ﻳ?\u000f/θ뻻?/>￬>?˾󫺿\u000b>ﮯϪ2:>8ﯿ쯾>:Ϻ>?뻿?(￯+닾?ﾫί:>\u000f;??﫺Ϋ?뾻>*/ﯾ?/?>￿;>ϯ;ﺻ>꯻?>뻾/?軮뺮꿻/諭?:뾿Ͽ못λ?>뾿+/:>ﻸϾ﫿ʯ?˾:>ο˺벯/?;뾮/>ϋγ//￮￿?>돎뿿ϻ/?꿻Σ>?Ͼο?+:ﰿ/﫬++*>￻ʺ;>쿋*?/(.?ˋ*ϫﯪ;?.￿>>Ⱦ﻾Ͽ\u000f/⿿+?\u000e?󾳳뿻﻿3+겫\n﫻˼;??ϻ+:ϲ/?>󻺻?/󯺾ﾫ/\u000e?˻?;Ͽ?ο3/.몿?㻻<ϻ?￾+ꬫϻꪾ?돯>꿻㿯믾﫯￿ήϿ̿.ﺿ;;/Ͽ?㿯/ﾾ??￪?벯:뾾뿺﻾/ϻξ몮뻿?ﯻ껯+꾯\u000f+/:?>??>?﮿?;￾?믻>(></렏Ͽ謹꯼?<?;:>>??++??*;뿿;?뻺ο/꾫Ͽ/꾾;/*˾;;?>/?뺻?>믬+﾿뻿꾪:˾ë:辿?﯋>>?.:λ/𿻺ο;:Ͼ3<3/??ʻ?;:/+:>>ﯿ2뿻?.﮾3>.?﻿??꾫?+?¾?;:/.?ﯺ￪???;<;/>?뺾>?뺻￿:ʾ>/,뾿/:?꾮/;˿?>?;ϻ>/?//￿/:<(//.뻻++ο.:뻿￿:;ﺯ?﾿2אָ/￾;?;./﮻ﾫﯳȃ￯/>;;:?迾:?#;?ﺪ?\u000f;ﯻϻ.ϫ?￫뫯.?;Ͽο<뿿;/*/ﾻ;,ϻ?;Ϯ˯+ʺ뿺?\u000eϿ﻿﫨\u000fκ;Ͽ꾻:;>>??￯/Ͽ*>˾/﻾>+/+ή;껻΋ʳ>￾.㾾/+#>꾿Ͼ/꿿+;Ͽ뾾?뿿絛?믿*;/絛.￾;?뿯;;??>Ͼ;﮻;>>:뮿꼿?>>Ͽ?\u000f>>+꿿?￯ﯮ/ﮮ˻.ﯾ<뿯ﺏ?Ͼ?뿮.쯯?ﾾ뮿뎿?ꣿ?Ͼ*/˿Ͽ﫺뾺>븿\u000f?.>˺שׁ꾪̯믬﫯ﻣ?<>*;믫???<>/ﻯ>?ο;Ͼ?;ϫϿ;>,/￾Ͽ/?>ﾫ﫬>>>;ʮ?;뿾>;/꿺+>#ﾺ/+/㻻?뿿Ϫﾻ?ꮪ﫯ﺺ>#⿻/?/;<\u000fﾻ/뺾>ﾾ?￫ϣﯿ:;?Ώ;󫾫㫋󾻪￻;:>Ϻ*뻾®?#?˾ﻻ/3+>꿎+<ʿ./?+/*\u000fˮ﮺ʮ\u000e3\b;?,꫺?˾/+/>/?>ﾪ+;?￿*?\">?ﻪ?믺﫻??﫻;뿿+ϫ\u000b̿/+￮￫/?ﻻ?>ήί2?;;뿫˿?\nￋ>ˀ+>뾮ﯨ몪/+ﻻﻻ㿲?뿾ϯ:Ϗ뻾ϫ*˫ϫ:?3㿻﫿뻸￾:뻼㯿+Ͽ?/뫺ί뾳꾳?;ϻ.Ώ,;;\u000e뾊￪룿뿿«;㻿:3.￻﫾+,>﫿+>?:¾﻾?.;?+/>?>꯳/??:꯻ˋϾ/ﾪ(Ͽ㳿?/\u000bﾯﯿΫ/.:/ϋ>?>뻼˿ο/謹/Ϊ﻾;>+˿쾿뿿>˯3?諭?﯏﮿.ﯿ??몾?쾿;ϣ+껿??ϻ꯸?.>ﺮ.#οʯ?뿿?;˪ﯫ\u0002?>˫;/;?>?돮﫯뎯+˯>\n:.?>캺8;믻//??﮾?;Ͼ/ÿ꾼??./?﮺?/\u000f*ί:ﾾ?꿫;꿻㿿*3?:?뿾.;Ͽ.ʻ>*+:˿/꯾;?꿯뻿*?/>?Ͽ?뿿..?*.誯\u000b￾볯*ﯾ?;,>￯?ϻ룯뿢￼>ꮾ꼯\u000f?￻;+.﫮;?>?Ͼî>˿????\"꪿.??ﯳﾎ;?ϯ?#ο;?󿿯쏿>Ͽ￿.?./￻;\u000f/ϫ㿻?뻯*.>/ﯾ;껿;?ο꿳Ͼﺿ:;*.;ﯣλꣾ./;?;?﫣/>/#ﾻ:+;/?껿?/ﾯ/;?:벿/ﯾθ/￿>쾳//￼Ϊ￾ￋ;+>Ϫ뾾ϯ2?ί*ﯿ>?Ϻ.*게ˮ뿪ﻏ˯?꾾?/쿏\u000e2￿?>??Ͼ;￺Ϋﾻ??ꫯﳾ+ﻻ󾿯絛ﾮ*;/;+Ͽ?>﻿;?+뾯꺮>:ﯯ?﫯/󾻪ʼ;?\u000f/;<ή/+￻λϾ?꯿>ϺϪ뿫/뺿..꺫謹輪;뻣뿿;*ﻊﮯ?;뫫.뿿:ￏˮ.⯾˿?＿;++뾾;ﺿ>:+?￯*쫻.?/ί쫲￻﾿??3>⎿˯?ϻ﫻*>꯻:ʋ讯﮿++>/뮮?+;?Ͽ?.?+Ͽϯ?>뮿꫏￿?:Ϻ/???/ﻯ볮:?/ϯ++?﾿ο\"*+>??>Ϊ踿+;Ͽ뎿꿾*>?ﾫ+:Ϋ믺뿿*ﻼ˿ﯺ꫿ί;븿?:ʻϿ뻾ϿϺ:˿ʿ?/.Ͽ+?+8,>\u000eϫﾫ?.뿯￿?>ﾯ˻쳺\u000e;/ﾾ;?꿿?..뿿;븿/Ͽ.?ϯ:/.?ﺼ￳?/\u000f;ﯻ˸￮?;;:ꮮ>?￾?;﫫:/;?3??뫼*2꯾ϻ/?\n﻾(\u000eﾻ뻼ﯯ뫿?뿮￨ú뺻?<?Ͽ/?￿?뻾::??>??????￯?﻿￾?????˿???﾿;?˿ﯯ;/Ͽ???￮;?﫿?/????￿￻+?˿?;;?ﯿϻ˾?Ͼ﾿//??뿿Ͽﯾ￿2￫;?ﾺ./ﯻϯ˿+￿?;;?ο?>?>￿믿>>??￿˿;?/ʿ﾿￿/??￻?￿??>Ͼﻮ.;???﻿?\u000f?￿ϻﾾ?ﺯ/.???;?Ͽﳿ￻￾/￿>?㿿Ͼ?￿>￿????￿?￯>/+?［;￻??뿾???/?;ﻻ??뿾﾿?>?￿ϻ?/﮻Ͽ>ﯿ???;Ͽ?;/?뿿>뿿?/꿿￿￻?+;?ˮ￿￿ο쿯꿿?/?Ͽ????3Ͽώ???;﻿ﻯ̿?￯/?ʾ??;ʻ￮Ͽ/뿿￲??>ϯ?þ˿ί>?Ͼ?￫￿ﯿ￿￮ﾻ￫㿫??/˾￿?﫿>Ͽ??2>Ͼ;Ͽ3ﻻ:ﾺ?;;믻?Ͽ??ﳮ??/ﯿ￿>;ﾾ??ϻ:ﺺ??.￿?￿>￯ϻ?;ﯿΪ￿ﺿ???;??﫿;???￾???뿿￿?>;?￺>/??﫾οϿ￿>>ﻯ￻>﾿\u000f/뿿￺?;￻Ͽϻ꯯>\u000f???+ﯻ뫿꿿뻿??:Ͽﯿ￾﾿;מּ/;>?>;?>????.;㻯?￻?;ο﻿󾿻??￻?/﾿믿￿ﯾϿ?/?￿???;Ͼ????﻿>˾?ϫ뻿?ώ??Ͼί;￿;?ﯿ?󾻿;㿿뾿*??//:Ͽ>;??.???+?3??Ͽ?￿?ﺿ?￿?￿?3?;뻿￫﾿??ϯ??>뻾>>￻Ͽ˿??뾯//>ﯿ>>?Ͽ??뿻;//믯￿?.??︣뻿?￿?//?믿ϿϿ﻿?>?;?￾//?￻?￾ﾻϻ???>??﻿ﻯ>￿?Ͽ??>/?;Ͽﮯ뿿>?+?;꿯?8;+?뿾뾿ϳ??￿/?Ͽ??ﯿ￿??ￏ/?;?﮻+ﯿ뿿?+λ?>+>???.ￏ￿;??ϳ?﻿￿Ϯ;﻿/??￿Ͽ**?뿿>8￻>?.\u000b;뿿;????????/;?>;+/뿿ﾯ?￿ﳿ뿾?;ﯾ/ﯾ;꿪ﳎ?/?Ͽ?.?>????3謹￿??Ϋ????ϻϾ?+￿﾿0￿ﾺ︿?ﯿ￿ﻻ?>?+믮㿿?￻>?/;>﾿.?/??︿￳?ί?>?+//??;????+￫￿ﯿﯿ믿/ο>￮>ϻ?ﻺ???ﳿ;￮;/￿\u0003￺>/??뿯Ͼ;?￿.￿⿿Ͼ?￿???Ͽ??￻믾뻿/﻿￫>;?>?>??﻿???+?>???ﾫ??򯾾ϿξϿ￿﾿뿿?/;?>?;:Ͽ믻Ͽﯿ?Ͽ￾?￺/￿:????>?;??:￿??Ͽ??;￺>￻ά;???+ﯯ?˾?λ?ﾯϻ??˿ﯿ>?>?뿿Ͽﯿ꿻?????οϫ/뾿???뿿﾿￿>?﮿?/??/??????￿?.>￿ﾻϫ?￿￾+.￺.;￯?ﯿ￿?ο???????ﯿ./??뿻>???ﯳ++/﻿*??>\u000b+?/;̿˻??Ͽ뻿?\u000f>?￺Ͽ"
  },
  {
    "path": "example/example.bim",
    "content": "1\t1\t0\t1\t1\t2\n1\t2\t0\t2\t1\t2\n1\t3\t0\t3\t1\t2\n1\t4\t0\t4\t1\t2\n1\t5\t0\t5\t1\t2\n1\t6\t0\t6\t1\t2\n1\t7\t0\t7\t1\t2\n1\t8\t0\t8\t1\t2\n1\t9\t0\t9\t1\t2\n1\t10\t0\t10\t1\t2\n1\t11\t0\t11\t1\t2\n1\t12\t0\t12\t1\t2\n1\t13\t0\t13\t1\t2\n1\t14\t0\t14\t1\t2\n1\t15\t0\t15\t1\t2\n1\t16\t0\t16\t1\t2\n1\t17\t0\t17\t1\t2\n1\t18\t0\t18\t1\t2\n1\t19\t0\t19\t1\t2\n1\t20\t0\t20\t1\t2\n1\t21\t0\t21\t1\t2\n1\t22\t0\t22\t1\t2\n1\t23\t0\t23\t1\t2\n1\t24\t0\t24\t1\t2\n1\t25\t0\t25\t1\t2\n1\t26\t0\t26\t1\t2\n1\t27\t0\t27\t1\t2\n1\t28\t0\t28\t1\t2\n1\t29\t0\t29\t1\t2\n1\t30\t0\t30\t1\t2\n1\t31\t0\t31\t1\t2\n1\t32\t0\t32\t1\t2\n1\t33\t0\t33\t1\t2\n1\t34\t0\t34\t1\t2\n1\t35\t0\t35\t1\t2\n1\t36\t0\t36\t1\t2\n1\t37\t0\t37\t1\t2\n1\t38\t0\t38\t1\t2\n1\t39\t0\t39\t1\t2\n1\t40\t0\t40\t1\t2\n1\t41\t0\t41\t1\t2\n1\t42\t0\t42\t1\t2\n1\t43\t0\t43\t1\t2\n1\t44\t0\t44\t1\t2\n1\t45\t0\t45\t1\t2\n1\t46\t0\t46\t1\t2\n1\t47\t0\t47\t1\t2\n1\t48\t0\t48\t1\t2\n1\t49\t0\t49\t1\t2\n1\t50\t0\t50\t1\t2\n1\t51\t0\t51\t1\t2\n1\t52\t0\t52\t1\t2\n1\t53\t0\t53\t1\t2\n1\t54\t0\t54\t1\t2\n1\t55\t0\t55\t1\t2\n1\t56\t0\t56\t1\t2\n1\t57\t0\t57\t1\t2\n1\t58\t0\t58\t1\t2\n1\t59\t0\t59\t1\t2\n1\t60\t0\t60\t1\t2\n1\t61\t0\t61\t1\t2\n1\t62\t0\t62\t1\t2\n1\t63\t0\t63\t1\t2\n1\t64\t0\t64\t1\t2\n1\t65\t0\t65\t1\t2\n1\t66\t0\t66\t1\t2\n1\t67\t0\t67\t1\t2\n1\t68\t0\t68\t1\t2\n1\t69\t0\t69\t1\t2\n1\t70\t0\t70\t1\t2\n1\t71\t0\t71\t1\t2\n1\t72\t0\t72\t1\t2\n1\t73\t0\t73\t1\t2\n1\t74\t0\t74\t1\t2\n1\t75\t0\t75\t1\t2\n1\t76\t0\t76\t1\t2\n1\t77\t0\t77\t1\t2\n1\t78\t0\t78\t1\t2\n1\t79\t0\t79\t1\t2\n1\t80\t0\t80\t1\t2\n1\t81\t0\t81\t1\t2\n1\t82\t0\t82\t1\t2\n1\t83\t0\t83\t1\t2\n1\t84\t0\t84\t1\t2\n1\t85\t0\t85\t1\t2\n1\t86\t0\t86\t1\t2\n1\t87\t0\t87\t1\t2\n1\t88\t0\t88\t1\t2\n1\t89\t0\t89\t1\t2\n1\t90\t0\t90\t1\t2\n1\t91\t0\t91\t1\t2\n1\t92\t0\t92\t1\t2\n1\t93\t0\t93\t1\t2\n1\t94\t0\t94\t1\t2\n1\t95\t0\t95\t1\t2\n1\t96\t0\t96\t1\t2\n1\t97\t0\t97\t1\t2\n1\t98\t0\t98\t1\t2\n1\t99\t0\t99\t1\t2\n1\t100\t0\t100\t1\t2\n1\t101\t0\t101\t1\t2\n1\t102\t0\t102\t1\t2\n1\t103\t0\t103\t1\t2\n1\t104\t0\t104\t1\t2\n1\t105\t0\t105\t1\t2\n1\t106\t0\t106\t1\t2\n1\t107\t0\t107\t1\t2\n1\t108\t0\t108\t1\t2\n1\t109\t0\t109\t1\t2\n1\t110\t0\t110\t1\t2\n1\t111\t0\t111\t1\t2\n1\t112\t0\t112\t1\t2\n1\t113\t0\t113\t1\t2\n1\t114\t0\t114\t1\t2\n1\t115\t0\t115\t1\t2\n1\t116\t0\t116\t1\t2\n1\t117\t0\t117\t1\t2\n1\t118\t0\t118\t1\t2\n1\t119\t0\t119\t1\t2\n1\t120\t0\t120\t1\t2\n1\t121\t0\t121\t1\t2\n1\t122\t0\t122\t1\t2\n1\t123\t0\t123\t1\t2\n1\t124\t0\t124\t1\t2\n1\t125\t0\t125\t1\t2\n1\t126\t0\t126\t1\t2\n1\t127\t0\t127\t1\t2\n1\t128\t0\t128\t1\t2\n1\t129\t0\t129\t1\t2\n1\t130\t0\t130\t1\t2\n1\t131\t0\t131\t1\t2\n1\t132\t0\t132\t1\t2\n1\t133\t0\t133\t1\t2\n1\t134\t0\t134\t1\t2\n1\t135\t0\t135\t1\t2\n1\t136\t0\t136\t1\t2\n1\t137\t0\t137\t1\t2\n1\t138\t0\t138\t1\t2\n1\t139\t0\t139\t1\t2\n1\t140\t0\t140\t1\t2\n1\t141\t0\t141\t1\t2\n1\t142\t0\t142\t1\t2\n1\t143\t0\t143\t1\t2\n1\t144\t0\t144\t1\t2\n1\t145\t0\t145\t1\t2\n1\t146\t0\t146\t1\t2\n1\t147\t0\t147\t1\t2\n1\t148\t0\t148\t1\t2\n1\t149\t0\t149\t1\t2\n1\t150\t0\t150\t1\t2\n1\t151\t0\t151\t1\t2\n1\t152\t0\t152\t1\t2\n1\t153\t0\t153\t1\t2\n1\t154\t0\t154\t1\t2\n1\t155\t0\t155\t1\t2\n1\t156\t0\t156\t1\t2\n1\t157\t0\t157\t1\t2\n1\t158\t0\t158\t1\t2\n1\t159\t0\t159\t1\t2\n1\t160\t0\t160\t1\t2\n1\t161\t0\t161\t1\t2\n1\t162\t0\t162\t1\t2\n1\t163\t0\t163\t1\t2\n1\t164\t0\t164\t1\t2\n1\t165\t0\t165\t1\t2\n1\t166\t0\t166\t1\t2\n1\t167\t0\t167\t1\t2\n1\t168\t0\t168\t1\t2\n1\t169\t0\t169\t1\t2\n1\t170\t0\t170\t1\t2\n1\t171\t0\t171\t1\t2\n1\t172\t0\t172\t1\t2\n1\t173\t0\t173\t1\t2\n1\t174\t0\t174\t1\t2\n1\t175\t0\t175\t1\t2\n1\t176\t0\t176\t1\t2\n1\t177\t0\t177\t1\t2\n1\t178\t0\t178\t1\t2\n1\t179\t0\t179\t1\t2\n1\t180\t0\t180\t1\t2\n1\t181\t0\t181\t1\t2\n1\t182\t0\t182\t1\t2\n1\t183\t0\t183\t1\t2\n1\t184\t0\t184\t1\t2\n1\t185\t0\t185\t1\t2\n1\t186\t0\t186\t1\t2\n1\t187\t0\t187\t1\t2\n1\t188\t0\t188\t1\t2\n1\t189\t0\t189\t1\t2\n1\t190\t0\t190\t1\t2\n1\t191\t0\t191\t1\t2\n1\t192\t0\t192\t1\t2\n1\t193\t0\t193\t1\t2\n1\t194\t0\t194\t1\t2\n1\t195\t0\t195\t1\t2\n1\t196\t0\t196\t1\t2\n1\t197\t0\t197\t1\t2\n1\t198\t0\t198\t1\t2\n1\t199\t0\t199\t1\t2\n1\t200\t0\t200\t1\t2\n1\t201\t0\t201\t1\t2\n1\t202\t0\t202\t1\t2\n1\t203\t0\t203\t1\t2\n1\t204\t0\t204\t1\t2\n1\t205\t0\t205\t1\t2\n1\t206\t0\t206\t1\t2\n1\t207\t0\t207\t1\t2\n1\t208\t0\t208\t1\t2\n1\t209\t0\t209\t1\t2\n1\t210\t0\t210\t1\t2\n1\t211\t0\t211\t1\t2\n1\t212\t0\t212\t1\t2\n1\t213\t0\t213\t1\t2\n1\t214\t0\t214\t1\t2\n1\t215\t0\t215\t1\t2\n1\t216\t0\t216\t1\t2\n1\t217\t0\t217\t1\t2\n1\t218\t0\t218\t1\t2\n1\t219\t0\t219\t1\t2\n1\t220\t0\t220\t1\t2\n1\t221\t0\t221\t1\t2\n1\t222\t0\t222\t1\t2\n1\t223\t0\t223\t1\t2\n1\t224\t0\t224\t1\t2\n1\t225\t0\t225\t1\t2\n1\t226\t0\t226\t1\t2\n1\t227\t0\t227\t1\t2\n1\t228\t0\t228\t1\t2\n1\t229\t0\t229\t1\t2\n1\t230\t0\t230\t1\t2\n1\t231\t0\t231\t1\t2\n1\t232\t0\t232\t1\t2\n1\t233\t0\t233\t1\t2\n1\t234\t0\t234\t1\t2\n1\t235\t0\t235\t1\t2\n1\t236\t0\t236\t1\t2\n1\t237\t0\t237\t1\t2\n1\t238\t0\t238\t1\t2\n1\t239\t0\t239\t1\t2\n1\t240\t0\t240\t1\t2\n1\t241\t0\t241\t1\t2\n1\t242\t0\t242\t1\t2\n1\t243\t0\t243\t1\t2\n1\t244\t0\t244\t1\t2\n1\t245\t0\t245\t1\t2\n1\t246\t0\t246\t1\t2\n1\t247\t0\t247\t1\t2\n1\t248\t0\t248\t1\t2\n1\t249\t0\t249\t1\t2\n1\t250\t0\t250\t1\t2\n1\t251\t0\t251\t1\t2\n1\t252\t0\t252\t1\t2\n1\t253\t0\t253\t1\t2\n1\t254\t0\t254\t1\t2\n1\t255\t0\t255\t1\t2\n1\t256\t0\t256\t1\t2\n1\t257\t0\t257\t1\t2\n1\t258\t0\t258\t1\t2\n1\t259\t0\t259\t1\t2\n1\t260\t0\t260\t1\t2\n1\t261\t0\t261\t1\t2\n1\t262\t0\t262\t1\t2\n1\t263\t0\t263\t1\t2\n1\t264\t0\t264\t1\t2\n1\t265\t0\t265\t1\t2\n1\t266\t0\t266\t1\t2\n1\t267\t0\t267\t1\t2\n1\t268\t0\t268\t1\t2\n1\t269\t0\t269\t1\t2\n1\t270\t0\t270\t1\t2\n1\t271\t0\t271\t1\t2\n1\t272\t0\t272\t1\t2\n1\t273\t0\t273\t1\t2\n1\t274\t0\t274\t1\t2\n1\t275\t0\t275\t1\t2\n1\t276\t0\t276\t1\t2\n1\t277\t0\t277\t1\t2\n1\t278\t0\t278\t1\t2\n1\t279\t0\t279\t1\t2\n1\t280\t0\t280\t1\t2\n1\t281\t0\t281\t1\t2\n1\t282\t0\t282\t1\t2\n1\t283\t0\t283\t1\t2\n1\t284\t0\t284\t1\t2\n1\t285\t0\t285\t1\t2\n1\t286\t0\t286\t1\t2\n1\t287\t0\t287\t1\t2\n1\t288\t0\t288\t1\t2\n1\t289\t0\t289\t1\t2\n1\t290\t0\t290\t1\t2\n1\t291\t0\t291\t1\t2\n1\t292\t0\t292\t1\t2\n1\t293\t0\t293\t1\t2\n1\t294\t0\t294\t1\t2\n1\t295\t0\t295\t1\t2\n1\t296\t0\t296\t1\t2\n1\t297\t0\t297\t1\t2\n1\t298\t0\t298\t1\t2\n1\t299\t0\t299\t1\t2\n1\t300\t0\t300\t1\t2\n1\t301\t0\t301\t1\t2\n1\t302\t0\t302\t1\t2\n1\t303\t0\t303\t1\t2\n1\t304\t0\t304\t1\t2\n1\t305\t0\t305\t1\t2\n1\t306\t0\t306\t1\t2\n1\t307\t0\t307\t1\t2\n1\t308\t0\t308\t1\t2\n1\t309\t0\t309\t1\t2\n1\t310\t0\t310\t1\t2\n1\t311\t0\t311\t1\t2\n1\t312\t0\t312\t1\t2\n1\t313\t0\t313\t1\t2\n1\t314\t0\t314\t1\t2\n1\t315\t0\t315\t1\t2\n1\t316\t0\t316\t1\t2\n1\t317\t0\t317\t1\t2\n1\t318\t0\t318\t1\t2\n1\t319\t0\t319\t1\t2\n1\t320\t0\t320\t1\t2\n1\t321\t0\t321\t1\t2\n1\t322\t0\t322\t1\t2\n1\t323\t0\t323\t1\t2\n1\t324\t0\t324\t1\t2\n1\t325\t0\t325\t1\t2\n1\t326\t0\t326\t1\t2\n1\t327\t0\t327\t1\t2\n1\t328\t0\t328\t1\t2\n1\t329\t0\t329\t1\t2\n1\t330\t0\t330\t1\t2\n1\t331\t0\t331\t1\t2\n1\t332\t0\t332\t1\t2\n1\t333\t0\t333\t1\t2\n1\t334\t0\t334\t1\t2\n1\t335\t0\t335\t1\t2\n1\t336\t0\t336\t1\t2\n1\t337\t0\t337\t1\t2\n1\t338\t0\t338\t1\t2\n1\t339\t0\t339\t1\t2\n1\t340\t0\t340\t1\t2\n1\t341\t0\t341\t1\t2\n1\t342\t0\t342\t1\t2\n1\t343\t0\t343\t1\t2\n1\t344\t0\t344\t1\t2\n1\t345\t0\t345\t1\t2\n1\t346\t0\t346\t1\t2\n1\t347\t0\t347\t1\t2\n1\t348\t0\t348\t1\t2\n1\t349\t0\t349\t1\t2\n1\t350\t0\t350\t1\t2\n1\t351\t0\t351\t1\t2\n1\t352\t0\t352\t1\t2\n1\t353\t0\t353\t1\t2\n1\t354\t0\t354\t1\t2\n1\t355\t0\t355\t1\t2\n1\t356\t0\t356\t1\t2\n1\t357\t0\t357\t1\t2\n1\t358\t0\t358\t1\t2\n1\t359\t0\t359\t1\t2\n1\t360\t0\t360\t1\t2\n1\t361\t0\t361\t1\t2\n1\t362\t0\t362\t1\t2\n1\t363\t0\t363\t1\t2\n1\t364\t0\t364\t1\t2\n1\t365\t0\t365\t1\t2\n1\t366\t0\t366\t1\t2\n1\t367\t0\t367\t1\t2\n1\t368\t0\t368\t1\t2\n1\t369\t0\t369\t1\t2\n1\t370\t0\t370\t1\t2\n1\t371\t0\t371\t1\t2\n1\t372\t0\t372\t1\t2\n1\t373\t0\t373\t1\t2\n1\t374\t0\t374\t1\t2\n1\t375\t0\t375\t1\t2\n1\t376\t0\t376\t1\t2\n1\t377\t0\t377\t1\t2\n1\t378\t0\t378\t1\t2\n1\t379\t0\t379\t1\t2\n1\t380\t0\t380\t1\t2\n1\t381\t0\t381\t1\t2\n1\t382\t0\t382\t1\t2\n1\t383\t0\t383\t1\t2\n1\t384\t0\t384\t1\t2\n1\t385\t0\t385\t1\t2\n1\t386\t0\t386\t1\t2\n1\t387\t0\t387\t1\t2\n1\t388\t0\t388\t1\t2\n1\t389\t0\t389\t1\t2\n1\t390\t0\t390\t1\t2\n1\t391\t0\t391\t1\t2\n1\t392\t0\t392\t1\t2\n1\t393\t0\t393\t1\t2\n1\t394\t0\t394\t1\t2\n1\t395\t0\t395\t1\t2\n1\t396\t0\t396\t1\t2\n1\t397\t0\t397\t1\t2\n1\t398\t0\t398\t1\t2\n1\t399\t0\t399\t1\t2\n1\t400\t0\t400\t1\t2\n1\t401\t0\t401\t1\t2\n1\t402\t0\t402\t1\t2\n1\t403\t0\t403\t1\t2\n1\t404\t0\t404\t1\t2\n1\t405\t0\t405\t1\t2\n1\t406\t0\t406\t1\t2\n1\t407\t0\t407\t1\t2\n1\t408\t0\t408\t1\t2\n1\t409\t0\t409\t1\t2\n1\t410\t0\t410\t1\t2\n1\t411\t0\t411\t1\t2\n1\t412\t0\t412\t1\t2\n1\t413\t0\t413\t1\t2\n1\t414\t0\t414\t1\t2\n1\t415\t0\t415\t1\t2\n1\t416\t0\t416\t1\t2\n1\t417\t0\t417\t1\t2\n1\t418\t0\t418\t1\t2\n1\t419\t0\t419\t1\t2\n1\t420\t0\t420\t1\t2\n1\t421\t0\t421\t1\t2\n1\t422\t0\t422\t1\t2\n1\t423\t0\t423\t1\t2\n1\t424\t0\t424\t1\t2\n1\t425\t0\t425\t1\t2\n1\t426\t0\t426\t1\t2\n1\t427\t0\t427\t1\t2\n1\t428\t0\t428\t1\t2\n1\t429\t0\t429\t1\t2\n1\t430\t0\t430\t1\t2\n1\t431\t0\t431\t1\t2\n1\t432\t0\t432\t1\t2\n1\t433\t0\t433\t1\t2\n1\t434\t0\t434\t1\t2\n1\t435\t0\t435\t1\t2\n1\t436\t0\t436\t1\t2\n1\t437\t0\t437\t1\t2\n1\t438\t0\t438\t1\t2\n1\t439\t0\t439\t1\t2\n1\t440\t0\t440\t1\t2\n1\t441\t0\t441\t1\t2\n1\t442\t0\t442\t1\t2\n1\t443\t0\t443\t1\t2\n1\t444\t0\t444\t1\t2\n1\t445\t0\t445\t1\t2\n1\t446\t0\t446\t1\t2\n1\t447\t0\t447\t1\t2\n1\t448\t0\t448\t1\t2\n1\t449\t0\t449\t1\t2\n1\t450\t0\t450\t1\t2\n1\t451\t0\t451\t1\t2\n1\t452\t0\t452\t1\t2\n1\t453\t0\t453\t1\t2\n1\t454\t0\t454\t1\t2\n1\t455\t0\t455\t1\t2\n1\t456\t0\t456\t1\t2\n1\t457\t0\t457\t1\t2\n1\t458\t0\t458\t1\t2\n1\t459\t0\t459\t1\t2\n1\t460\t0\t460\t1\t2\n1\t461\t0\t461\t1\t2\n1\t462\t0\t462\t1\t2\n1\t463\t0\t463\t1\t2\n1\t464\t0\t464\t1\t2\n1\t465\t0\t465\t1\t2\n1\t466\t0\t466\t1\t2\n1\t467\t0\t467\t1\t2\n1\t468\t0\t468\t1\t2\n1\t469\t0\t469\t1\t2\n1\t470\t0\t470\t1\t2\n1\t471\t0\t471\t1\t2\n1\t472\t0\t472\t1\t2\n1\t473\t0\t473\t1\t2\n1\t474\t0\t474\t1\t2\n1\t475\t0\t475\t1\t2\n1\t476\t0\t476\t1\t2\n1\t477\t0\t477\t1\t2\n1\t478\t0\t478\t1\t2\n1\t479\t0\t479\t1\t2\n1\t480\t0\t480\t1\t2\n1\t481\t0\t481\t1\t2\n1\t482\t0\t482\t1\t2\n1\t483\t0\t483\t1\t2\n1\t484\t0\t484\t1\t2\n1\t485\t0\t485\t1\t2\n1\t486\t0\t486\t1\t2\n1\t487\t0\t487\t1\t2\n1\t488\t0\t488\t1\t2\n1\t489\t0\t489\t1\t2\n1\t490\t0\t490\t1\t2\n1\t491\t0\t491\t1\t2\n1\t492\t0\t492\t1\t2\n1\t493\t0\t493\t1\t2\n1\t494\t0\t494\t1\t2\n1\t495\t0\t495\t1\t2\n1\t496\t0\t496\t1\t2\n1\t497\t0\t497\t1\t2\n1\t498\t0\t498\t1\t2\n1\t499\t0\t499\t1\t2\n1\t500\t0\t500\t1\t2\n1\t501\t0\t501\t1\t2\n1\t502\t0\t502\t1\t2\n1\t503\t0\t503\t1\t2\n1\t504\t0\t504\t1\t2\n1\t505\t0\t505\t1\t2\n1\t506\t0\t506\t1\t2\n1\t507\t0\t507\t1\t2\n1\t508\t0\t508\t1\t2\n1\t509\t0\t509\t1\t2\n1\t510\t0\t510\t1\t2\n1\t511\t0\t511\t1\t2\n1\t512\t0\t512\t1\t2\n1\t513\t0\t513\t1\t2\n1\t514\t0\t514\t1\t2\n1\t515\t0\t515\t1\t2\n1\t516\t0\t516\t1\t2\n1\t517\t0\t517\t1\t2\n1\t518\t0\t518\t1\t2\n1\t519\t0\t519\t1\t2\n1\t520\t0\t520\t1\t2\n1\t521\t0\t521\t1\t2\n1\t522\t0\t522\t1\t2\n1\t523\t0\t523\t1\t2\n1\t524\t0\t524\t1\t2\n1\t525\t0\t525\t1\t2\n1\t526\t0\t526\t1\t2\n1\t527\t0\t527\t1\t2\n1\t528\t0\t528\t1\t2\n1\t529\t0\t529\t1\t2\n1\t530\t0\t530\t1\t2\n1\t531\t0\t531\t1\t2\n1\t532\t0\t532\t1\t2\n1\t533\t0\t533\t1\t2\n1\t534\t0\t534\t1\t2\n1\t535\t0\t535\t1\t2\n1\t536\t0\t536\t1\t2\n1\t537\t0\t537\t1\t2\n1\t538\t0\t538\t1\t2\n1\t539\t0\t539\t1\t2\n1\t540\t0\t540\t1\t2\n1\t541\t0\t541\t1\t2\n1\t542\t0\t542\t1\t2\n1\t543\t0\t543\t1\t2\n1\t544\t0\t544\t1\t2\n1\t545\t0\t545\t1\t2\n1\t546\t0\t546\t1\t2\n1\t547\t0\t547\t1\t2\n1\t548\t0\t548\t1\t2\n1\t549\t0\t549\t1\t2\n1\t550\t0\t550\t1\t2\n1\t551\t0\t551\t1\t2\n1\t552\t0\t552\t1\t2\n1\t553\t0\t553\t1\t2\n1\t554\t0\t554\t1\t2\n1\t555\t0\t555\t1\t2\n1\t556\t0\t556\t1\t2\n1\t557\t0\t557\t1\t2\n1\t558\t0\t558\t1\t2\n1\t559\t0\t559\t1\t2\n1\t560\t0\t560\t1\t2\n1\t561\t0\t561\t1\t2\n1\t562\t0\t562\t1\t2\n1\t563\t0\t563\t1\t2\n1\t564\t0\t564\t1\t2\n1\t565\t0\t565\t1\t2\n1\t566\t0\t566\t1\t2\n1\t567\t0\t567\t1\t2\n1\t568\t0\t568\t1\t2\n1\t569\t0\t569\t1\t2\n1\t570\t0\t570\t1\t2\n1\t571\t0\t571\t1\t2\n1\t572\t0\t572\t1\t2\n1\t573\t0\t573\t1\t2\n1\t574\t0\t574\t1\t2\n1\t575\t0\t575\t1\t2\n1\t576\t0\t576\t1\t2\n1\t577\t0\t577\t1\t2\n1\t578\t0\t578\t1\t2\n1\t579\t0\t579\t1\t2\n1\t580\t0\t580\t1\t2\n1\t581\t0\t581\t1\t2\n1\t582\t0\t582\t1\t2\n1\t583\t0\t583\t1\t2\n1\t584\t0\t584\t1\t2\n1\t585\t0\t585\t1\t2\n1\t586\t0\t586\t1\t2\n1\t587\t0\t587\t1\t2\n1\t588\t0\t588\t1\t2\n1\t589\t0\t589\t1\t2\n1\t590\t0\t590\t1\t2\n1\t591\t0\t591\t1\t2\n1\t592\t0\t592\t1\t2\n1\t593\t0\t593\t1\t2\n1\t594\t0\t594\t1\t2\n1\t595\t0\t595\t1\t2\n1\t596\t0\t596\t1\t2\n1\t597\t0\t597\t1\t2\n1\t598\t0\t598\t1\t2\n1\t599\t0\t599\t1\t2\n1\t600\t0\t600\t1\t2\n1\t601\t0\t601\t1\t2\n1\t602\t0\t602\t1\t2\n1\t603\t0\t603\t1\t2\n1\t604\t0\t604\t1\t2\n1\t605\t0\t605\t1\t2\n1\t606\t0\t606\t1\t2\n1\t607\t0\t607\t1\t2\n1\t608\t0\t608\t1\t2\n1\t609\t0\t609\t1\t2\n1\t610\t0\t610\t1\t2\n1\t611\t0\t611\t1\t2\n1\t612\t0\t612\t1\t2\n1\t613\t0\t613\t1\t2\n1\t614\t0\t614\t1\t2\n1\t615\t0\t615\t1\t2\n1\t616\t0\t616\t1\t2\n1\t617\t0\t617\t1\t2\n1\t618\t0\t618\t1\t2\n1\t619\t0\t619\t1\t2\n1\t620\t0\t620\t1\t2\n1\t621\t0\t621\t1\t2\n1\t622\t0\t622\t1\t2\n1\t623\t0\t623\t1\t2\n1\t624\t0\t624\t1\t2\n1\t625\t0\t625\t1\t2\n1\t626\t0\t626\t1\t2\n1\t627\t0\t627\t1\t2\n1\t628\t0\t628\t1\t2\n1\t629\t0\t629\t1\t2\n1\t630\t0\t630\t1\t2\n1\t631\t0\t631\t1\t2\n1\t632\t0\t632\t1\t2\n1\t633\t0\t633\t1\t2\n1\t634\t0\t634\t1\t2\n1\t635\t0\t635\t1\t2\n1\t636\t0\t636\t1\t2\n1\t637\t0\t637\t1\t2\n1\t638\t0\t638\t1\t2\n1\t639\t0\t639\t1\t2\n1\t640\t0\t640\t1\t2\n1\t641\t0\t641\t1\t2\n1\t642\t0\t642\t1\t2\n1\t643\t0\t643\t1\t2\n1\t644\t0\t644\t1\t2\n1\t645\t0\t645\t1\t2\n1\t646\t0\t646\t1\t2\n1\t647\t0\t647\t1\t2\n1\t648\t0\t648\t1\t2\n1\t649\t0\t649\t1\t2\n1\t650\t0\t650\t1\t2\n1\t651\t0\t651\t1\t2\n1\t652\t0\t652\t1\t2\n1\t653\t0\t653\t1\t2\n1\t654\t0\t654\t1\t2\n1\t655\t0\t655\t1\t2\n1\t656\t0\t656\t1\t2\n1\t657\t0\t657\t1\t2\n1\t658\t0\t658\t1\t2\n1\t659\t0\t659\t1\t2\n1\t660\t0\t660\t1\t2\n1\t661\t0\t661\t1\t2\n1\t662\t0\t662\t1\t2\n1\t663\t0\t663\t1\t2\n1\t664\t0\t664\t1\t2\n1\t665\t0\t665\t1\t2\n1\t666\t0\t666\t1\t2\n1\t667\t0\t667\t1\t2\n1\t668\t0\t668\t1\t2\n1\t669\t0\t669\t1\t2\n1\t670\t0\t670\t1\t2\n1\t671\t0\t671\t1\t2\n1\t672\t0\t672\t1\t2\n1\t673\t0\t673\t1\t2\n1\t674\t0\t674\t1\t2\n1\t675\t0\t675\t1\t2\n1\t676\t0\t676\t1\t2\n1\t677\t0\t677\t1\t2\n1\t678\t0\t678\t1\t2\n1\t679\t0\t679\t1\t2\n1\t680\t0\t680\t1\t2\n1\t681\t0\t681\t1\t2\n1\t682\t0\t682\t1\t2\n1\t683\t0\t683\t1\t2\n1\t684\t0\t684\t1\t2\n1\t685\t0\t685\t1\t2\n1\t686\t0\t686\t1\t2\n1\t687\t0\t687\t1\t2\n1\t688\t0\t688\t1\t2\n1\t689\t0\t689\t1\t2\n1\t690\t0\t690\t1\t2\n1\t691\t0\t691\t1\t2\n1\t692\t0\t692\t1\t2\n1\t693\t0\t693\t1\t2\n1\t694\t0\t694\t1\t2\n1\t695\t0\t695\t1\t2\n1\t696\t0\t696\t1\t2\n1\t697\t0\t697\t1\t2\n1\t698\t0\t698\t1\t2\n1\t699\t0\t699\t1\t2\n1\t700\t0\t700\t1\t2\n1\t701\t0\t701\t1\t2\n1\t702\t0\t702\t1\t2\n1\t703\t0\t703\t1\t2\n1\t704\t0\t704\t1\t2\n1\t705\t0\t705\t1\t2\n1\t706\t0\t706\t1\t2\n1\t707\t0\t707\t1\t2\n1\t708\t0\t708\t1\t2\n1\t709\t0\t709\t1\t2\n1\t710\t0\t710\t1\t2\n1\t711\t0\t711\t1\t2\n1\t712\t0\t712\t1\t2\n1\t713\t0\t713\t1\t2\n1\t714\t0\t714\t1\t2\n1\t715\t0\t715\t1\t2\n1\t716\t0\t716\t1\t2\n1\t717\t0\t717\t1\t2\n1\t718\t0\t718\t1\t2\n1\t719\t0\t719\t1\t2\n1\t720\t0\t720\t1\t2\n1\t721\t0\t721\t1\t2\n1\t722\t0\t722\t1\t2\n1\t723\t0\t723\t1\t2\n1\t724\t0\t724\t1\t2\n1\t725\t0\t725\t1\t2\n1\t726\t0\t726\t1\t2\n1\t727\t0\t727\t1\t2\n1\t728\t0\t728\t1\t2\n1\t729\t0\t729\t1\t2\n1\t730\t0\t730\t1\t2\n1\t731\t0\t731\t1\t2\n1\t732\t0\t732\t1\t2\n1\t733\t0\t733\t1\t2\n1\t734\t0\t734\t1\t2\n1\t735\t0\t735\t1\t2\n1\t736\t0\t736\t1\t2\n1\t737\t0\t737\t1\t2\n1\t738\t0\t738\t1\t2\n1\t739\t0\t739\t1\t2\n1\t740\t0\t740\t1\t2\n1\t741\t0\t741\t1\t2\n1\t742\t0\t742\t1\t2\n1\t743\t0\t743\t1\t2\n1\t744\t0\t744\t1\t2\n1\t745\t0\t745\t1\t2\n1\t746\t0\t746\t1\t2\n1\t747\t0\t747\t1\t2\n1\t748\t0\t748\t1\t2\n1\t749\t0\t749\t1\t2\n1\t750\t0\t750\t1\t2\n1\t751\t0\t751\t1\t2\n1\t752\t0\t752\t1\t2\n1\t753\t0\t753\t1\t2\n1\t754\t0\t754\t1\t2\n1\t755\t0\t755\t1\t2\n1\t756\t0\t756\t1\t2\n1\t757\t0\t757\t1\t2\n1\t758\t0\t758\t1\t2\n1\t759\t0\t759\t1\t2\n1\t760\t0\t760\t1\t2\n1\t761\t0\t761\t1\t2\n1\t762\t0\t762\t1\t2\n1\t763\t0\t763\t1\t2\n1\t764\t0\t764\t1\t2\n1\t765\t0\t765\t1\t2\n1\t766\t0\t766\t1\t2\n1\t767\t0\t767\t1\t2\n1\t768\t0\t768\t1\t2\n1\t769\t0\t769\t1\t2\n1\t770\t0\t770\t1\t2\n1\t771\t0\t771\t1\t2\n1\t772\t0\t772\t1\t2\n1\t773\t0\t773\t1\t2\n1\t774\t0\t774\t1\t2\n1\t775\t0\t775\t1\t2\n1\t776\t0\t776\t1\t2\n1\t777\t0\t777\t1\t2\n1\t778\t0\t778\t1\t2\n1\t779\t0\t779\t1\t2\n1\t780\t0\t780\t1\t2\n1\t781\t0\t781\t1\t2\n1\t782\t0\t782\t1\t2\n1\t783\t0\t783\t1\t2\n1\t784\t0\t784\t1\t2\n1\t785\t0\t785\t1\t2\n1\t786\t0\t786\t1\t2\n1\t787\t0\t787\t1\t2\n1\t788\t0\t788\t1\t2\n1\t789\t0\t789\t1\t2\n1\t790\t0\t790\t1\t2\n1\t791\t0\t791\t1\t2\n1\t792\t0\t792\t1\t2\n1\t793\t0\t793\t1\t2\n1\t794\t0\t794\t1\t2\n1\t795\t0\t795\t1\t2\n1\t796\t0\t796\t1\t2\n1\t797\t0\t797\t1\t2\n1\t798\t0\t798\t1\t2\n1\t799\t0\t799\t1\t2\n1\t800\t0\t800\t1\t2\n1\t801\t0\t801\t1\t2\n1\t802\t0\t802\t1\t2\n1\t803\t0\t803\t1\t2\n1\t804\t0\t804\t1\t2\n1\t805\t0\t805\t1\t2\n1\t806\t0\t806\t1\t2\n1\t807\t0\t807\t1\t2\n1\t808\t0\t808\t1\t2\n1\t809\t0\t809\t1\t2\n1\t810\t0\t810\t1\t2\n1\t811\t0\t811\t1\t2\n1\t812\t0\t812\t1\t2\n1\t813\t0\t813\t1\t2\n1\t814\t0\t814\t1\t2\n1\t815\t0\t815\t1\t2\n1\t816\t0\t816\t1\t2\n1\t817\t0\t817\t1\t2\n1\t818\t0\t818\t1\t2\n1\t819\t0\t819\t1\t2\n1\t820\t0\t820\t1\t2\n1\t821\t0\t821\t1\t2\n1\t822\t0\t822\t1\t2\n1\t823\t0\t823\t1\t2\n1\t824\t0\t824\t1\t2\n1\t825\t0\t825\t1\t2\n1\t826\t0\t826\t1\t2\n1\t827\t0\t827\t1\t2\n1\t828\t0\t828\t1\t2\n1\t829\t0\t829\t1\t2\n1\t830\t0\t830\t1\t2\n1\t831\t0\t831\t1\t2\n1\t832\t0\t832\t1\t2\n1\t833\t0\t833\t1\t2\n1\t834\t0\t834\t1\t2\n1\t835\t0\t835\t1\t2\n1\t836\t0\t836\t1\t2\n1\t837\t0\t837\t1\t2\n1\t838\t0\t838\t1\t2\n1\t839\t0\t839\t1\t2\n1\t840\t0\t840\t1\t2\n1\t841\t0\t841\t1\t2\n1\t842\t0\t842\t1\t2\n1\t843\t0\t843\t1\t2\n1\t844\t0\t844\t1\t2\n1\t845\t0\t845\t1\t2\n1\t846\t0\t846\t1\t2\n1\t847\t0\t847\t1\t2\n1\t848\t0\t848\t1\t2\n1\t849\t0\t849\t1\t2\n1\t850\t0\t850\t1\t2\n1\t851\t0\t851\t1\t2\n1\t852\t0\t852\t1\t2\n1\t853\t0\t853\t1\t2\n1\t854\t0\t854\t1\t2\n1\t855\t0\t855\t1\t2\n1\t856\t0\t856\t1\t2\n1\t857\t0\t857\t1\t2\n1\t858\t0\t858\t1\t2\n1\t859\t0\t859\t1\t2\n1\t860\t0\t860\t1\t2\n1\t861\t0\t861\t1\t2\n1\t862\t0\t862\t1\t2\n1\t863\t0\t863\t1\t2\n1\t864\t0\t864\t1\t2\n1\t865\t0\t865\t1\t2\n1\t866\t0\t866\t1\t2\n1\t867\t0\t867\t1\t2\n1\t868\t0\t868\t1\t2\n1\t869\t0\t869\t1\t2\n1\t870\t0\t870\t1\t2\n1\t871\t0\t871\t1\t2\n1\t872\t0\t872\t1\t2\n1\t873\t0\t873\t1\t2\n1\t874\t0\t874\t1\t2\n1\t875\t0\t875\t1\t2\n1\t876\t0\t876\t1\t2\n1\t877\t0\t877\t1\t2\n1\t878\t0\t878\t1\t2\n1\t879\t0\t879\t1\t2\n1\t880\t0\t880\t1\t2\n1\t881\t0\t881\t1\t2\n1\t882\t0\t882\t1\t2\n1\t883\t0\t883\t1\t2\n1\t884\t0\t884\t1\t2\n1\t885\t0\t885\t1\t2\n1\t886\t0\t886\t1\t2\n1\t887\t0\t887\t1\t2\n1\t888\t0\t888\t1\t2\n1\t889\t0\t889\t1\t2\n1\t890\t0\t890\t1\t2\n1\t891\t0\t891\t1\t2\n1\t892\t0\t892\t1\t2\n1\t893\t0\t893\t1\t2\n1\t894\t0\t894\t1\t2\n1\t895\t0\t895\t1\t2\n1\t896\t0\t896\t1\t2\n1\t897\t0\t897\t1\t2\n1\t898\t0\t898\t1\t2\n1\t899\t0\t899\t1\t2\n1\t900\t0\t900\t1\t2\n1\t901\t0\t901\t1\t2\n1\t902\t0\t902\t1\t2\n1\t903\t0\t903\t1\t2\n1\t904\t0\t904\t1\t2\n1\t905\t0\t905\t1\t2\n1\t906\t0\t906\t1\t2\n1\t907\t0\t907\t1\t2\n1\t908\t0\t908\t1\t2\n1\t909\t0\t909\t1\t2\n1\t910\t0\t910\t1\t2\n1\t911\t0\t911\t1\t2\n1\t912\t0\t912\t1\t2\n1\t913\t0\t913\t1\t2\n1\t914\t0\t914\t1\t2\n1\t915\t0\t915\t1\t2\n1\t916\t0\t916\t1\t2\n1\t917\t0\t917\t1\t2\n1\t918\t0\t918\t1\t2\n1\t919\t0\t919\t1\t2\n1\t920\t0\t920\t1\t2\n1\t921\t0\t921\t1\t2\n1\t922\t0\t922\t1\t2\n1\t923\t0\t923\t1\t2\n1\t924\t0\t924\t1\t2\n1\t925\t0\t925\t1\t2\n1\t926\t0\t926\t1\t2\n1\t927\t0\t927\t1\t2\n1\t928\t0\t928\t1\t2\n1\t929\t0\t929\t1\t2\n1\t930\t0\t930\t1\t2\n1\t931\t0\t931\t1\t2\n1\t932\t0\t932\t1\t2\n1\t933\t0\t933\t1\t2\n1\t934\t0\t934\t1\t2\n1\t935\t0\t935\t1\t2\n1\t936\t0\t936\t1\t2\n1\t937\t0\t937\t1\t2\n1\t938\t0\t938\t1\t2\n1\t939\t0\t939\t1\t2\n1\t940\t0\t940\t1\t2\n1\t941\t0\t941\t1\t2\n1\t942\t0\t942\t1\t2\n1\t943\t0\t943\t1\t2\n1\t944\t0\t944\t1\t2\n1\t945\t0\t945\t1\t2\n1\t946\t0\t946\t1\t2\n1\t947\t0\t947\t1\t2\n1\t948\t0\t948\t1\t2\n1\t949\t0\t949\t1\t2\n1\t950\t0\t950\t1\t2\n1\t951\t0\t951\t1\t2\n1\t952\t0\t952\t1\t2\n1\t953\t0\t953\t1\t2\n1\t954\t0\t954\t1\t2\n1\t955\t0\t955\t1\t2\n1\t956\t0\t956\t1\t2\n1\t957\t0\t957\t1\t2\n1\t958\t0\t958\t1\t2\n1\t959\t0\t959\t1\t2\n1\t960\t0\t960\t1\t2\n1\t961\t0\t961\t1\t2\n1\t962\t0\t962\t1\t2\n1\t963\t0\t963\t1\t2\n1\t964\t0\t964\t1\t2\n1\t965\t0\t965\t1\t2\n1\t966\t0\t966\t1\t2\n1\t967\t0\t967\t1\t2\n1\t968\t0\t968\t1\t2\n1\t969\t0\t969\t1\t2\n1\t970\t0\t970\t1\t2\n1\t971\t0\t971\t1\t2\n1\t972\t0\t972\t1\t2\n1\t973\t0\t973\t1\t2\n1\t974\t0\t974\t1\t2\n1\t975\t0\t975\t1\t2\n1\t976\t0\t976\t1\t2\n1\t977\t0\t977\t1\t2\n1\t978\t0\t978\t1\t2\n1\t979\t0\t979\t1\t2\n1\t980\t0\t980\t1\t2\n1\t981\t0\t981\t1\t2\n1\t982\t0\t982\t1\t2\n1\t983\t0\t983\t1\t2\n1\t984\t0\t984\t1\t2\n1\t985\t0\t985\t1\t2\n1\t986\t0\t986\t1\t2\n1\t987\t0\t987\t1\t2\n1\t988\t0\t988\t1\t2\n1\t989\t0\t989\t1\t2\n1\t990\t0\t990\t1\t2\n1\t991\t0\t991\t1\t2\n1\t992\t0\t992\t1\t2\n1\t993\t0\t993\t1\t2\n1\t994\t0\t994\t1\t2\n1\t995\t0\t995\t1\t2\n1\t996\t0\t996\t1\t2\n1\t997\t0\t997\t1\t2\n1\t998\t0\t998\t1\t2\n1\t999\t0\t999\t1\t2\n1\t1000\t0\t1000\t1\t2\n"
  },
  {
    "path": "example/example.fam",
    "content": "1\t1\t0\t0\t2\t-0.4267761781290719\n2\t2\t0\t0\t2\t0.553949147355921\n3\t3\t0\t0\t2\t-0.6501615855731889\n4\t4\t0\t0\t2\t0.6658237998926421\n5\t5\t0\t0\t2\t2.153525389108508\n6\t6\t0\t0\t2\t1.603607081080267\n7\t7\t0\t0\t2\t0.9245092282950891\n8\t8\t0\t0\t2\t0.16695520568640973\n9\t9\t0\t0\t2\t-0.44678076953973184\n10\t10\t0\t0\t2\t0.9130182655669581\n11\t11\t0\t0\t2\t-0.12365315280028831\n12\t12\t0\t0\t2\t0.45120658048742923\n13\t13\t0\t0\t2\t-1.813422352445905\n14\t14\t0\t0\t2\t0.10916042619322039\n15\t15\t0\t0\t2\t1.2606692333542373\n16\t16\t0\t0\t2\t0.5532311359350739\n17\t17\t0\t0\t2\t0.20945215196889685\n18\t18\t0\t0\t2\t-0.5132181734188253\n19\t19\t0\t0\t2\t0.6119067760087953\n20\t20\t0\t0\t2\t1.5394566797469251\n21\t21\t0\t0\t2\t-0.3865037693784083\n22\t22\t0\t0\t2\t-2.5143419880300866\n23\t23\t0\t0\t2\t0.405703045213449\n24\t24\t0\t0\t2\t0.5185603274229008\n25\t25\t0\t0\t2\t1.0611484493881813\n26\t26\t0\t0\t2\t0.19594051011058977\n27\t27\t0\t0\t2\t-0.24979581720203475\n28\t28\t0\t0\t2\t-0.2676477096598135\n29\t29\t0\t0\t2\t0.7177935231258309\n30\t30\t0\t0\t2\t1.1861246517682622\n31\t31\t0\t0\t2\t0.08629622130288517\n32\t32\t0\t0\t2\t-0.35304293360949707\n33\t33\t0\t0\t2\t0.2354296885149071\n34\t34\t0\t0\t2\t1.530452595962293\n35\t35\t0\t0\t2\t0.4579547646635299\n36\t36\t0\t0\t2\t0.4118304659618171\n37\t37\t0\t0\t2\t1.0173491056828072\n38\t38\t0\t0\t2\t-1.3848053256406825\n39\t39\t0\t0\t2\t1.7510265942816932\n40\t40\t0\t0\t2\t0.274954268508773\n41\t41\t0\t0\t2\t-1.9897038536605167\n42\t42\t0\t0\t2\t-0.2744230179428616\n43\t43\t0\t0\t2\t1.426414899954135\n44\t44\t0\t0\t2\t1.6794358446939521\n45\t45\t0\t0\t2\t-1.9762223272212731\n46\t46\t0\t0\t2\t1.31328879515592\n47\t47\t0\t0\t2\t-1.2379452021381565\n48\t48\t0\t0\t2\t-1.2230942245689198\n49\t49\t0\t0\t2\t-0.2086823379220931\n50\t50\t0\t0\t2\t0.046591344965807927\n51\t51\t0\t0\t2\t0.6454664759221705\n52\t52\t0\t0\t2\t-0.3366711458816414\n53\t53\t0\t0\t2\t-1.4251415011354847\n54\t54\t0\t0\t2\t-1.7404106776289252\n55\t55\t0\t0\t2\t-0.9899916360986122\n56\t56\t0\t0\t2\t-0.6813055796709735\n57\t57\t0\t0\t2\t1.4523360524496092\n58\t58\t0\t0\t2\t-0.14395595141327489\n59\t59\t0\t0\t2\t-0.4378362565094535\n60\t60\t0\t0\t2\t-0.1519268773269188\n61\t61\t0\t0\t2\t-1.0918707470545093\n62\t62\t0\t0\t2\t0.11886585929161704\n63\t63\t0\t0\t2\t-0.7446821129490644\n64\t64\t0\t0\t2\t-0.3515988379695028\n65\t65\t0\t0\t2\t-0.2505242457789806\n66\t66\t0\t0\t2\t-1.1445325100301038\n67\t67\t0\t0\t2\t0.4021008709651108\n68\t68\t0\t0\t2\t-0.26967294800034874\n69\t69\t0\t0\t2\t-1.071043452154829\n70\t70\t0\t0\t2\t0.5466104498110623\n71\t71\t0\t0\t2\t0.9059888912622454\n72\t72\t0\t0\t2\t-0.9302463996121689\n73\t73\t0\t0\t2\t-0.1649348560132909\n74\t74\t0\t0\t2\t-0.2592977286319428\n75\t75\t0\t0\t2\t-2.114476477667718\n76\t76\t0\t0\t2\t-0.6076341029698831\n77\t77\t0\t0\t2\t0.7748006221315341\n78\t78\t0\t0\t2\t-2.0962224712289843\n79\t79\t0\t0\t2\t-1.9116579659512944\n80\t80\t0\t0\t2\t1.0353397078763547\n81\t81\t0\t0\t2\t0.7294502561042351\n82\t82\t0\t0\t2\t-0.09793421592240978\n83\t83\t0\t0\t2\t0.24566617144478756\n84\t84\t0\t0\t2\t0.9386731103091898\n85\t85\t0\t0\t2\t1.7337564931524567\n86\t86\t0\t0\t2\t3.5965463607888486\n87\t87\t0\t0\t2\t0.10417950242086334\n88\t88\t0\t0\t2\t0.8181457381771525\n89\t89\t0\t0\t2\t-0.4717474685706561\n90\t90\t0\t0\t2\t1.3263850865010316\n91\t91\t0\t0\t2\t-0.616465031940274\n92\t92\t0\t0\t2\t-1.1963740018230864\n93\t93\t0\t0\t2\t-2.523429651265045\n94\t94\t0\t0\t2\t-1.6775958547520566\n95\t95\t0\t0\t2\t-1.325183339632047\n96\t96\t0\t0\t2\t0.5882935940652537\n97\t97\t0\t0\t2\t1.2377386081497168\n98\t98\t0\t0\t2\t-0.2927710750283572\n99\t99\t0\t0\t2\t2.2130683684766117\n100\t100\t0\t0\t2\t1.3907852235803377\n101\t101\t0\t0\t2\t0.3489648675330413\n102\t102\t0\t0\t2\t-0.17965231137324936\n103\t103\t0\t0\t2\t-1.714950186712027\n104\t104\t0\t0\t2\t-0.3606425914729865\n105\t105\t0\t0\t2\t0.2558071082994665\n106\t106\t0\t0\t2\t0.7848503716979157\n107\t107\t0\t0\t2\t2.505180527543909\n108\t108\t0\t0\t2\t-1.8168385585899676\n109\t109\t0\t0\t2\t-0.01565688433622611\n110\t110\t0\t0\t2\t0.7040291497308593\n111\t111\t0\t0\t2\t-0.011386617718583729\n112\t112\t0\t0\t2\t-0.6404979013481404\n113\t113\t0\t0\t2\t2.0784381335821682\n114\t114\t0\t0\t2\t-0.2254928329575596\n115\t115\t0\t0\t2\t0.0021879911560499826\n116\t116\t0\t0\t2\t0.4530758437690287\n117\t117\t0\t0\t2\t0.7111556836519528\n118\t118\t0\t0\t2\t-7.926457716849677e-5\n119\t119\t0\t0\t2\t1.2716437121248285\n120\t120\t0\t0\t2\t0.13091641539525625\n121\t121\t0\t0\t2\t0.9601642531802883\n122\t122\t0\t0\t2\t-0.6946269492816117\n123\t123\t0\t0\t2\t1.2083545960448898\n124\t124\t0\t0\t2\t-0.5558287669378148\n125\t125\t0\t0\t2\t0.40869932288799415\n126\t126\t0\t0\t2\t0.8632834147663099\n127\t127\t0\t0\t2\t0.8642022294643745\n128\t128\t0\t0\t2\t-0.45151257769571096\n129\t129\t0\t0\t2\t-0.1914051114382599\n130\t130\t0\t0\t2\t-0.037649137239090524\n131\t131\t0\t0\t2\t1.069953907594907\n132\t132\t0\t0\t2\t0.7332632139169448\n133\t133\t0\t0\t2\t-1.3859563214470878\n134\t134\t0\t0\t2\t-0.16882598316337785\n135\t135\t0\t0\t2\t1.2341578254519214\n136\t136\t0\t0\t2\t-0.13018118909812346\n137\t137\t0\t0\t2\t-0.849622061183973\n138\t138\t0\t0\t2\t1.65558791310931\n139\t139\t0\t0\t2\t-0.8557488969973114\n140\t140\t0\t0\t2\t-0.591996102804245\n141\t141\t0\t0\t2\t1.4766090213622798\n142\t142\t0\t0\t2\t-1.7606788880705646\n143\t143\t0\t0\t2\t0.3331349454598871\n144\t144\t0\t0\t2\t-1.316796818445614\n145\t145\t0\t0\t2\t2.2658417254598926\n146\t146\t0\t0\t2\t-0.5855466346375702\n147\t147\t0\t0\t2\t1.004242318973581\n148\t148\t0\t0\t2\t0.5950634513012234\n149\t149\t0\t0\t2\t-2.3589818092376342\n150\t150\t0\t0\t2\t1.020059747279963\n151\t151\t0\t0\t2\t-1.2578575928423548\n152\t152\t0\t0\t2\t-0.09700531448496873\n153\t153\t0\t0\t2\t0.33638529718875043\n154\t154\t0\t0\t2\t1.7972793724047749\n155\t155\t0\t0\t2\t0.27013487552553606\n156\t156\t0\t0\t2\t-0.3808633841949227\n157\t157\t0\t0\t2\t0.04018926790384476\n158\t158\t0\t0\t2\t-0.3965127185254764\n159\t159\t0\t0\t2\t1.4936455086379186\n160\t160\t0\t0\t2\t-1.2993293254003835\n161\t161\t0\t0\t2\t0.2519277543876505\n162\t162\t0\t0\t2\t0.691774608729148\n163\t163\t0\t0\t2\t0.7407513064839535\n164\t164\t0\t0\t2\t-0.31056687191510746\n165\t165\t0\t0\t2\t-0.5552597332076762\n166\t166\t0\t0\t2\t-1.0057835319597124\n167\t167\t0\t0\t2\t0.05450686156271301\n168\t168\t0\t0\t2\t-0.6349282429298715\n169\t169\t0\t0\t2\t-0.6695465895569117\n170\t170\t0\t0\t2\t-0.23226063332510133\n171\t171\t0\t0\t2\t0.7035825230515401\n172\t172\t0\t0\t2\t-0.31327835211735766\n173\t173\t0\t0\t2\t-0.24917314601702809\n174\t174\t0\t0\t2\t-0.6592080848536208\n175\t175\t0\t0\t2\t-0.9477879953319399\n176\t176\t0\t0\t2\t0.48009125370245204\n177\t177\t0\t0\t2\t0.4706100983663504\n178\t178\t0\t0\t2\t-0.06505632309350708\n179\t179\t0\t0\t2\t-1.4959269095746512\n180\t180\t0\t0\t2\t0.5405485678497597\n181\t181\t0\t0\t2\t1.1232696382170495\n182\t182\t0\t0\t2\t0.0074418075185122565\n183\t183\t0\t0\t2\t0.6639986494873168\n184\t184\t0\t0\t2\t-0.5678566487451827\n185\t185\t0\t0\t2\t0.2299383937915558\n186\t186\t0\t0\t2\t-0.17076218909155186\n187\t187\t0\t0\t2\t0.3420901284971981\n188\t188\t0\t0\t2\t-0.10950363726652125\n189\t189\t0\t0\t2\t-0.06808902942736765\n190\t190\t0\t0\t2\t-2.3144897119335965\n191\t191\t0\t0\t2\t-1.0777725703353438\n192\t192\t0\t0\t2\t-0.3394034459756072\n193\t193\t0\t0\t2\t-1.5321020124626434\n194\t194\t0\t0\t2\t1.277838035763474\n195\t195\t0\t0\t2\t-0.6186969481783715\n196\t196\t0\t0\t2\t0.24483826998859312\n197\t197\t0\t0\t2\t-0.08972472068807524\n198\t198\t0\t0\t2\t0.28563820059484707\n199\t199\t0\t0\t2\t0.02665228610242174\n200\t200\t0\t0\t2\t-0.8558456209545621\n201\t201\t0\t0\t2\t0.10326780109110945\n202\t202\t0\t0\t2\t-0.4683803455991253\n203\t203\t0\t0\t2\t-0.039629458692737506\n204\t204\t0\t0\t2\t0.5010288555718972\n205\t205\t0\t0\t2\t-1.614633788145904\n206\t206\t0\t0\t2\t-0.8221081713776588\n207\t207\t0\t0\t2\t-0.4154327362321607\n208\t208\t0\t0\t2\t0.19255584244438567\n209\t209\t0\t0\t2\t-0.013616312773019643\n210\t210\t0\t0\t2\t-0.475529391106999\n211\t211\t0\t0\t2\t0.10232966150402548\n212\t212\t0\t0\t2\t0.7488940724700807\n213\t213\t0\t0\t2\t0.44197886128432695\n214\t214\t0\t0\t2\t1.018806470717939\n215\t215\t0\t0\t2\t1.187231623861802\n216\t216\t0\t0\t2\t0.8022976145656721\n217\t217\t0\t0\t2\t-0.4774086494408861\n218\t218\t0\t0\t2\t-0.14197769264202192\n219\t219\t0\t0\t2\t-0.1747777783307673\n220\t220\t0\t0\t2\t1.0313282851738685\n221\t221\t0\t0\t2\t0.09414395331571555\n222\t222\t0\t0\t2\t0.07864110999174483\n223\t223\t0\t0\t2\t-0.5611965282485419\n224\t224\t0\t0\t2\t-0.6176835872308691\n225\t225\t0\t0\t2\t-0.624780748917754\n226\t226\t0\t0\t2\t0.7262504583960367\n227\t227\t0\t0\t2\t0.6736681336462624\n228\t228\t0\t0\t2\t1.2074649724277058\n229\t229\t0\t0\t2\t-0.7000910464946777\n230\t230\t0\t0\t2\t1.0132498153713987\n231\t231\t0\t0\t2\t0.1086125064282297\n232\t232\t0\t0\t2\t1.5378432998509857\n233\t233\t0\t0\t2\t-0.38864483884544576\n234\t234\t0\t0\t2\t0.5853901243650999\n235\t235\t0\t0\t2\t0.6361493273442497\n236\t236\t0\t0\t2\t-2.3727188226249276\n237\t237\t0\t0\t2\t-0.1546518851418047\n238\t238\t0\t0\t2\t-0.4535321516545124\n239\t239\t0\t0\t2\t0.20059745542046717\n240\t240\t0\t0\t2\t1.6700071185481635\n241\t241\t0\t0\t2\t1.0303745125812633\n242\t242\t0\t0\t2\t-0.2080769141106961\n243\t243\t0\t0\t2\t0.1640491851783853\n244\t244\t0\t0\t2\t-0.28813258919928886\n245\t245\t0\t0\t2\t0.5230500717020431\n246\t246\t0\t0\t2\t0.3072979372247395\n247\t247\t0\t0\t2\t0.11651870056658613\n248\t248\t0\t0\t2\t-0.0020664157587947755\n249\t249\t0\t0\t2\t0.6554963064376974\n250\t250\t0\t0\t2\t-1.2541501943392634\n251\t251\t0\t0\t2\t0.0072657109833799294\n252\t252\t0\t0\t2\t2.6072042439689764\n253\t253\t0\t0\t2\t0.7873316028203755\n254\t254\t0\t0\t2\t-0.5739394940501295\n255\t255\t0\t0\t2\t0.4301393873631534\n256\t256\t0\t0\t2\t0.4980033880636772\n257\t257\t0\t0\t2\t-0.472708427483925\n258\t258\t0\t0\t2\t-2.115773568613388\n259\t259\t0\t0\t2\t-0.3027911974132581\n260\t260\t0\t0\t2\t0.1357209008468077\n261\t261\t0\t0\t2\t0.7462827401513036\n262\t262\t0\t0\t2\t1.0510433172779923\n263\t263\t0\t0\t2\t-0.9143361188838738\n264\t264\t0\t0\t2\t-1.980776740834876\n265\t265\t0\t0\t2\t0.5590372219564149\n266\t266\t0\t0\t2\t-1.4215222131613703\n267\t267\t0\t0\t2\t0.9366889840084335\n268\t268\t0\t0\t2\t0.38076689746763476\n269\t269\t0\t0\t2\t0.9453281558278972\n270\t270\t0\t0\t2\t0.057035845832356935\n271\t271\t0\t0\t2\t1.4745050564310078\n272\t272\t0\t0\t2\t-0.5174737513107168\n273\t273\t0\t0\t2\t-1.5571441883774875\n274\t274\t0\t0\t2\t-1.0887963283029611\n275\t275\t0\t0\t2\t1.2310204712452886\n276\t276\t0\t0\t2\t-0.4769402620727404\n277\t277\t0\t0\t2\t-0.22419481188150195\n278\t278\t0\t0\t2\t-0.5375714580159776\n279\t279\t0\t0\t2\t0.019698038241142538\n280\t280\t0\t0\t2\t1.1825833509904307\n281\t281\t0\t0\t2\t-0.7811541586520457\n282\t282\t0\t0\t2\t-0.4601117935870594\n283\t283\t0\t0\t2\t0.7033250873409186\n284\t284\t0\t0\t2\t-0.8409243958041983\n285\t285\t0\t0\t2\t0.5573349116266615\n286\t286\t0\t0\t2\t-1.6278769464058696\n287\t287\t0\t0\t2\t1.6583911801881541\n288\t288\t0\t0\t2\t-1.212032621830397\n289\t289\t0\t0\t2\t-0.9721467722081951\n290\t290\t0\t0\t2\t-0.5965740298455487\n291\t291\t0\t0\t2\t-1.2351363670664182\n292\t292\t0\t0\t2\t1.641895301641597\n293\t293\t0\t0\t2\t-0.42141703006240455\n294\t294\t0\t0\t2\t0.38753100560495457\n295\t295\t0\t0\t2\t-1.070382200424481\n296\t296\t0\t0\t2\t-0.517780452691308\n297\t297\t0\t0\t2\t1.1565871160947803\n298\t298\t0\t0\t2\t-0.6679020556063455\n299\t299\t0\t0\t2\t-0.970542580114166\n300\t300\t0\t0\t2\t-0.8655455001063305\n301\t301\t0\t0\t2\t-1.0789380083099478\n302\t302\t0\t0\t2\t-1.2440796197575608\n303\t303\t0\t0\t2\t-1.0446790730803917\n304\t304\t0\t0\t2\t-0.5695802535356469\n305\t305\t0\t0\t2\t-0.8473514861984687\n306\t306\t0\t0\t2\t-1.190100919435714\n307\t307\t0\t0\t2\t1.022380976723825\n308\t308\t0\t0\t2\t-1.9790492535631858\n309\t309\t0\t0\t2\t-1.5253493737129327\n310\t310\t0\t0\t2\t1.0894028037803278\n311\t311\t0\t0\t2\t0.6962558464296542\n312\t312\t0\t0\t2\t0.9874048999158505\n313\t313\t0\t0\t2\t0.11245625794433396\n314\t314\t0\t0\t2\t-0.050027987070537086\n315\t315\t0\t0\t2\t-0.8717433837106624\n316\t316\t0\t0\t2\t0.8146794446207906\n317\t317\t0\t0\t2\t-0.5526641897761696\n318\t318\t0\t0\t2\t1.2573937760035447\n319\t319\t0\t0\t2\t-1.2648304446149456\n320\t320\t0\t0\t2\t1.6530274769204985\n321\t321\t0\t0\t2\t1.30110161745003\n322\t322\t0\t0\t2\t-0.02018948002691863\n323\t323\t0\t0\t2\t-1.8836740231429716\n324\t324\t0\t0\t2\t1.4205846188601483\n325\t325\t0\t0\t2\t-0.689172153743396\n326\t326\t0\t0\t2\t-0.3044880253487302\n327\t327\t0\t0\t2\t-0.08520417701732463\n328\t328\t0\t0\t2\t2.851634453916995\n329\t329\t0\t0\t2\t-1.156810978360592\n330\t330\t0\t0\t2\t0.2825803847207173\n331\t331\t0\t0\t2\t-1.9500708808705018\n332\t332\t0\t0\t2\t-1.1752612835403988\n333\t333\t0\t0\t2\t-1.3324335054916632\n334\t334\t0\t0\t2\t0.706410116238176\n335\t335\t0\t0\t2\t-0.4389661673885548\n336\t336\t0\t0\t2\t-0.47198568412366393\n337\t337\t0\t0\t2\t-0.196005627932633\n338\t338\t0\t0\t2\t-0.3926918515697784\n339\t339\t0\t0\t2\t-1.2586337442612614\n340\t340\t0\t0\t2\t-0.28572183266312384\n341\t341\t0\t0\t2\t-0.821333467643559\n342\t342\t0\t0\t2\t1.0406689409301526\n343\t343\t0\t0\t2\t-0.38669408702632374\n344\t344\t0\t0\t2\t-1.7090986597057412\n345\t345\t0\t0\t2\t0.567446046270448\n346\t346\t0\t0\t2\t0.01714990950414205\n347\t347\t0\t0\t2\t0.0977745741751222\n348\t348\t0\t0\t2\t0.822230557498965\n349\t349\t0\t0\t2\t0.3963812215631262\n350\t350\t0\t0\t2\t-1.4985341158085754\n351\t351\t0\t0\t2\t-0.029960047096039638\n352\t352\t0\t0\t2\t-0.6528662364260229\n353\t353\t0\t0\t2\t-0.12170910991193998\n354\t354\t0\t0\t2\t1.0300254802129547\n355\t355\t0\t0\t2\t-0.16690139482289537\n356\t356\t0\t0\t2\t0.8377731253742943\n357\t357\t0\t0\t2\t-0.3887229530927819\n358\t358\t0\t0\t2\t0.14451356330621065\n359\t359\t0\t0\t2\t1.6073233297076983\n360\t360\t0\t0\t2\t0.07950441212552996\n361\t361\t0\t0\t2\t2.1497157962418174\n362\t362\t0\t0\t2\t-0.30427052777531594\n363\t363\t0\t0\t2\t-0.3025923417388714\n364\t364\t0\t0\t2\t-0.4397850120995375\n365\t365\t0\t0\t2\t0.45039620392874236\n366\t366\t0\t0\t2\t-0.8475714740581328\n367\t367\t0\t0\t2\t0.49438582682424076\n368\t368\t0\t0\t2\t1.7877305550184646\n369\t369\t0\t0\t2\t1.4928728753892113\n370\t370\t0\t0\t2\t1.0028215376206\n371\t371\t0\t0\t2\t-1.579481582437942\n372\t372\t0\t0\t2\t-1.4112071973049491\n373\t373\t0\t0\t2\t-0.3287910850620258\n374\t374\t0\t0\t2\t0.6335258906599922\n375\t375\t0\t0\t2\t1.5884402065569267\n376\t376\t0\t0\t2\t0.239439976093505\n377\t377\t0\t0\t2\t0.3730542303129533\n378\t378\t0\t0\t2\t-0.23125572369556183\n379\t379\t0\t0\t2\t0.04114817676435756\n380\t380\t0\t0\t2\t1.3148209110260505\n381\t381\t0\t0\t2\t0.4153394063226016\n382\t382\t0\t0\t2\t-2.119037113120731\n383\t383\t0\t0\t2\t-0.7357404438785312\n384\t384\t0\t0\t2\t-0.5458519940698457\n385\t385\t0\t0\t2\t-1.2600086040654477\n386\t386\t0\t0\t2\t0.5429663518299074\n387\t387\t0\t0\t2\t-0.24886613651789177\n388\t388\t0\t0\t2\t1.393586775779869\n389\t389\t0\t0\t2\t0.7655351504255072\n390\t390\t0\t0\t2\t1.574341948844662\n391\t391\t0\t0\t2\t0.28337819436221234\n392\t392\t0\t0\t2\t-0.28215538210870866\n393\t393\t0\t0\t2\t0.32677930042765563\n394\t394\t0\t0\t2\t-0.8933433333160953\n395\t395\t0\t0\t2\t-0.30611223649551184\n396\t396\t0\t0\t2\t-1.6327825926249717\n397\t397\t0\t0\t2\t0.983996289771191\n398\t398\t0\t0\t2\t-2.1094276028051713\n399\t399\t0\t0\t2\t-0.03004231237784647\n400\t400\t0\t0\t2\t0.2677517204111873\n401\t401\t0\t0\t2\t0.024441125372044015\n402\t402\t0\t0\t2\t-0.5518207938279379\n403\t403\t0\t0\t2\t-0.012559704828846413\n404\t404\t0\t0\t2\t0.7819900863897892\n405\t405\t0\t0\t2\t0.1331898731839282\n406\t406\t0\t0\t2\t0.48489696095614787\n407\t407\t0\t0\t2\t0.6642443953751778\n408\t408\t0\t0\t2\t-0.8030871260696271\n409\t409\t0\t0\t2\t-0.1895839551659786\n410\t410\t0\t0\t2\t-0.8034926266352286\n411\t411\t0\t0\t2\t0.09187570772040049\n412\t412\t0\t0\t2\t0.029248814008252747\n413\t413\t0\t0\t2\t-0.01533630905590288\n414\t414\t0\t0\t2\t1.1280827301911354\n415\t415\t0\t0\t2\t-0.2065160921398899\n416\t416\t0\t0\t2\t-0.05126145885052354\n417\t417\t0\t0\t2\t-1.3434267805209248\n418\t418\t0\t0\t2\t-0.18518422658884665\n419\t419\t0\t0\t2\t-0.24844114482463325\n420\t420\t0\t0\t2\t-0.06967600698783584\n421\t421\t0\t0\t2\t0.37664675241912393\n422\t422\t0\t0\t2\t-0.2569614554903415\n423\t423\t0\t0\t2\t0.7233028036905063\n424\t424\t0\t0\t2\t1.1103498482848735\n425\t425\t0\t0\t2\t-0.11945647973589903\n426\t426\t0\t0\t2\t-0.6396046050764921\n427\t427\t0\t0\t2\t0.5643452560242491\n428\t428\t0\t0\t2\t-0.3440947789770541\n429\t429\t0\t0\t2\t-2.22653169151592\n430\t430\t0\t0\t2\t-0.956032968271431\n431\t431\t0\t0\t2\t-0.8191888326171307\n432\t432\t0\t0\t2\t-2.4494492803640022\n433\t433\t0\t0\t2\t-0.0670051947640402\n434\t434\t0\t0\t2\t-1.0212450480574558\n435\t435\t0\t0\t2\t1.5642134356006847\n436\t436\t0\t0\t2\t-0.7686850358271917\n437\t437\t0\t0\t2\t0.9013174516851364\n438\t438\t0\t0\t2\t0.32615832192874417\n439\t439\t0\t0\t2\t-0.1396620902625725\n440\t440\t0\t0\t2\t0.7175824082953346\n441\t441\t0\t0\t2\t-1.298832108780333\n442\t442\t0\t0\t2\t-0.6380248204283618\n443\t443\t0\t0\t2\t1.3777981524996168\n444\t444\t0\t0\t2\t1.650381986618911\n445\t445\t0\t0\t2\t0.5867431709972836\n446\t446\t0\t0\t2\t-0.005896067871319781\n447\t447\t0\t0\t2\t0.13319016302360676\n448\t448\t0\t0\t2\t-1.2696437250217552\n449\t449\t0\t0\t2\t-0.12518065096985312\n450\t450\t0\t0\t2\t-1.244542673659207\n451\t451\t0\t0\t2\t-0.06610093209244285\n452\t452\t0\t0\t2\t2.5740374683027536\n453\t453\t0\t0\t2\t0.9289638283088296\n454\t454\t0\t0\t2\t0.5106401356154838\n455\t455\t0\t0\t2\t-1.3219451447015316\n456\t456\t0\t0\t2\t1.3056699098740336\n457\t457\t0\t0\t2\t-0.7429788972407904\n458\t458\t0\t0\t2\t-0.5935982739213578\n459\t459\t0\t0\t2\t-1.2444771294790702\n460\t460\t0\t0\t2\t-0.11027752291961805\n461\t461\t0\t0\t2\t0.15968337228108687\n462\t462\t0\t0\t2\t0.10015721622156738\n463\t463\t0\t0\t2\t-1.6712997326777923\n464\t464\t0\t0\t2\t1.1224222088370677\n465\t465\t0\t0\t2\t-0.8848696262558821\n466\t466\t0\t0\t2\t-0.43388376934423967\n467\t467\t0\t0\t2\t0.2330906457766794\n468\t468\t0\t0\t2\t-0.06850210820133934\n469\t469\t0\t0\t2\t-0.43079920208533806\n470\t470\t0\t0\t2\t0.6801247297121218\n471\t471\t0\t0\t2\t0.7062942755405903\n472\t472\t0\t0\t2\t-1.0125163790266498\n473\t473\t0\t0\t2\t1.1402191818385525\n474\t474\t0\t0\t2\t-0.09512756382895157\n475\t475\t0\t0\t2\t1.2043113720050476\n476\t476\t0\t0\t2\t-1.3101439365860013\n477\t477\t0\t0\t2\t-1.010236868117858\n478\t478\t0\t0\t2\t0.8681701631563481\n479\t479\t0\t0\t2\t-1.1399558386575048\n480\t480\t0\t0\t2\t0.2205328932586634\n481\t481\t0\t0\t2\t-0.5859626268400073\n482\t482\t0\t0\t2\t0.9365883769687702\n483\t483\t0\t0\t2\t-1.589591178874074\n484\t484\t0\t0\t2\t0.1145164389832793\n485\t485\t0\t0\t2\t0.275658677194189\n486\t486\t0\t0\t2\t1.8238148121727578\n487\t487\t0\t0\t2\t0.9195632859199347\n488\t488\t0\t0\t2\t1.1992281082636649\n489\t489\t0\t0\t2\t-0.28717216124743894\n490\t490\t0\t0\t2\t-0.1255644094578015\n491\t491\t0\t0\t2\t0.7660599548056037\n492\t492\t0\t0\t2\t0.0485016949386254\n493\t493\t0\t0\t2\t0.9191104275425048\n494\t494\t0\t0\t2\t-0.07849111590769194\n495\t495\t0\t0\t2\t-1.602274500274993\n496\t496\t0\t0\t2\t-2.016184028763443\n497\t497\t0\t0\t2\t1.3436149731702158\n498\t498\t0\t0\t2\t-1.8171186074532928\n499\t499\t0\t0\t2\t-0.46317733040012826\n500\t500\t0\t0\t2\t-0.9433895891176977\n"
  },
  {
    "path": "example/example.psam",
    "content": "#FID\tIID\tSEX\tPHENO1\n1\t1\tNA\t-0.426776\n2\t2\tNA\t0.553949\n3\t3\tNA\t-0.650162\n4\t4\tNA\t0.665824\n5\t5\tNA\t2.15353\n6\t6\tNA\t1.60361\n7\t7\tNA\t0.924509\n8\t8\tNA\t0.166955\n9\t9\tNA\t-0.446781\n10\t10\tNA\t0.913018\n11\t11\tNA\t-0.123653\n12\t12\tNA\t0.451207\n13\t13\tNA\t-1.81342\n14\t14\tNA\t0.10916\n15\t15\tNA\t1.26067\n16\t16\tNA\t0.553231\n17\t17\tNA\t0.209452\n18\t18\tNA\t-0.513218\n19\t19\tNA\t0.611907\n20\t20\tNA\t1.53946\n21\t21\tNA\t-0.386504\n22\t22\tNA\t-2.51434\n23\t23\tNA\t0.405703\n24\t24\tNA\t0.51856\n25\t25\tNA\t1.06115\n26\t26\tNA\t0.195941\n27\t27\tNA\t-0.249796\n28\t28\tNA\t-0.267648\n29\t29\tNA\t0.717794\n30\t30\tNA\t1.18612\n31\t31\tNA\t0.0862962\n32\t32\tNA\t-0.353043\n33\t33\tNA\t0.23543\n34\t34\tNA\t1.53045\n35\t35\tNA\t0.457955\n36\t36\tNA\t0.41183\n37\t37\tNA\t1.01735\n38\t38\tNA\t-1.38481\n39\t39\tNA\t1.75103\n40\t40\tNA\t0.274954\n41\t41\tNA\t-1.9897\n42\t42\tNA\t-0.274423\n43\t43\tNA\t1.42641\n44\t44\tNA\t1.67944\n45\t45\tNA\t-1.97622\n46\t46\tNA\t1.31329\n47\t47\tNA\t-1.23795\n48\t48\tNA\t-1.22309\n49\t49\tNA\t-0.208682\n50\t50\tNA\t0.0465913\n51\t51\tNA\t0.645466\n52\t52\tNA\t-0.336671\n53\t53\tNA\t-1.42514\n54\t54\tNA\t-1.74041\n55\t55\tNA\t-0.989992\n56\t56\tNA\t-0.681306\n57\t57\tNA\t1.45234\n58\t58\tNA\t-0.143956\n59\t59\tNA\t-0.437836\n60\t60\tNA\t-0.151927\n61\t61\tNA\t-1.09187\n62\t62\tNA\t0.118866\n63\t63\tNA\t-0.744682\n64\t64\tNA\t-0.351599\n65\t65\tNA\t-0.250524\n66\t66\tNA\t-1.14453\n67\t67\tNA\t0.402101\n68\t68\tNA\t-0.269673\n69\t69\tNA\t-1.07104\n70\t70\tNA\t0.54661\n71\t71\tNA\t0.905989\n72\t72\tNA\t-0.930246\n73\t73\tNA\t-0.164935\n74\t74\tNA\t-0.259298\n75\t75\tNA\t-2.11448\n76\t76\tNA\t-0.607634\n77\t77\tNA\t0.774801\n78\t78\tNA\t-2.09622\n79\t79\tNA\t-1.91166\n80\t80\tNA\t1.03534\n81\t81\tNA\t0.72945\n82\t82\tNA\t-0.0979342\n83\t83\tNA\t0.245666\n84\t84\tNA\t0.938673\n85\t85\tNA\t1.73376\n86\t86\tNA\t3.59655\n87\t87\tNA\t0.10418\n88\t88\tNA\t0.818146\n89\t89\tNA\t-0.471747\n90\t90\tNA\t1.32639\n91\t91\tNA\t-0.616465\n92\t92\tNA\t-1.19637\n93\t93\tNA\t-2.52343\n94\t94\tNA\t-1.6776\n95\t95\tNA\t-1.32518\n96\t96\tNA\t0.588294\n97\t97\tNA\t1.23774\n98\t98\tNA\t-0.292771\n99\t99\tNA\t2.21307\n100\t100\tNA\t1.39079\n101\t101\tNA\t0.348965\n102\t102\tNA\t-0.179652\n103\t103\tNA\t-1.71495\n104\t104\tNA\t-0.360643\n105\t105\tNA\t0.255807\n106\t106\tNA\t0.78485\n107\t107\tNA\t2.50518\n108\t108\tNA\t-1.81684\n109\t109\tNA\t-0.0156569\n110\t110\tNA\t0.704029\n111\t111\tNA\t-0.0113866\n112\t112\tNA\t-0.640498\n113\t113\tNA\t2.07844\n114\t114\tNA\t-0.225493\n115\t115\tNA\t0.00218799\n116\t116\tNA\t0.453076\n117\t117\tNA\t0.711156\n118\t118\tNA\t-7.92646e-05\n119\t119\tNA\t1.27164\n120\t120\tNA\t0.130916\n121\t121\tNA\t0.960164\n122\t122\tNA\t-0.694627\n123\t123\tNA\t1.20835\n124\t124\tNA\t-0.555829\n125\t125\tNA\t0.408699\n126\t126\tNA\t0.863283\n127\t127\tNA\t0.864202\n128\t128\tNA\t-0.451513\n129\t129\tNA\t-0.191405\n130\t130\tNA\t-0.0376491\n131\t131\tNA\t1.06995\n132\t132\tNA\t0.733263\n133\t133\tNA\t-1.38596\n134\t134\tNA\t-0.168826\n135\t135\tNA\t1.23416\n136\t136\tNA\t-0.130181\n137\t137\tNA\t-0.849622\n138\t138\tNA\t1.65559\n139\t139\tNA\t-0.855749\n140\t140\tNA\t-0.591996\n141\t141\tNA\t1.47661\n142\t142\tNA\t-1.76068\n143\t143\tNA\t0.333135\n144\t144\tNA\t-1.3168\n145\t145\tNA\t2.26584\n146\t146\tNA\t-0.585547\n147\t147\tNA\t1.00424\n148\t148\tNA\t0.595063\n149\t149\tNA\t-2.35898\n150\t150\tNA\t1.02006\n151\t151\tNA\t-1.25786\n152\t152\tNA\t-0.0970053\n153\t153\tNA\t0.336385\n154\t154\tNA\t1.79728\n155\t155\tNA\t0.270135\n156\t156\tNA\t-0.380863\n157\t157\tNA\t0.0401893\n158\t158\tNA\t-0.396513\n159\t159\tNA\t1.49365\n160\t160\tNA\t-1.29933\n161\t161\tNA\t0.251928\n162\t162\tNA\t0.691775\n163\t163\tNA\t0.740751\n164\t164\tNA\t-0.310567\n165\t165\tNA\t-0.55526\n166\t166\tNA\t-1.00578\n167\t167\tNA\t0.0545069\n168\t168\tNA\t-0.634928\n169\t169\tNA\t-0.669547\n170\t170\tNA\t-0.232261\n171\t171\tNA\t0.703583\n172\t172\tNA\t-0.313278\n173\t173\tNA\t-0.249173\n174\t174\tNA\t-0.659208\n175\t175\tNA\t-0.947788\n176\t176\tNA\t0.480091\n177\t177\tNA\t0.47061\n178\t178\tNA\t-0.0650563\n179\t179\tNA\t-1.49593\n180\t180\tNA\t0.540549\n181\t181\tNA\t1.12327\n182\t182\tNA\t0.00744181\n183\t183\tNA\t0.663999\n184\t184\tNA\t-0.567857\n185\t185\tNA\t0.229938\n186\t186\tNA\t-0.170762\n187\t187\tNA\t0.34209\n188\t188\tNA\t-0.109504\n189\t189\tNA\t-0.068089\n190\t190\tNA\t-2.31449\n191\t191\tNA\t-1.07777\n192\t192\tNA\t-0.339403\n193\t193\tNA\t-1.5321\n194\t194\tNA\t1.27784\n195\t195\tNA\t-0.618697\n196\t196\tNA\t0.244838\n197\t197\tNA\t-0.0897247\n198\t198\tNA\t0.285638\n199\t199\tNA\t0.0266523\n200\t200\tNA\t-0.855846\n201\t201\tNA\t0.103268\n202\t202\tNA\t-0.46838\n203\t203\tNA\t-0.0396295\n204\t204\tNA\t0.501029\n205\t205\tNA\t-1.61463\n206\t206\tNA\t-0.822108\n207\t207\tNA\t-0.415433\n208\t208\tNA\t0.192556\n209\t209\tNA\t-0.0136163\n210\t210\tNA\t-0.475529\n211\t211\tNA\t0.10233\n212\t212\tNA\t0.748894\n213\t213\tNA\t0.441979\n214\t214\tNA\t1.01881\n215\t215\tNA\t1.18723\n216\t216\tNA\t0.802298\n217\t217\tNA\t-0.477409\n218\t218\tNA\t-0.141978\n219\t219\tNA\t-0.174778\n220\t220\tNA\t1.03133\n221\t221\tNA\t0.094144\n222\t222\tNA\t0.0786411\n223\t223\tNA\t-0.561197\n224\t224\tNA\t-0.617684\n225\t225\tNA\t-0.624781\n226\t226\tNA\t0.72625\n227\t227\tNA\t0.673668\n228\t228\tNA\t1.20746\n229\t229\tNA\t-0.700091\n230\t230\tNA\t1.01325\n231\t231\tNA\t0.108613\n232\t232\tNA\t1.53784\n233\t233\tNA\t-0.388645\n234\t234\tNA\t0.58539\n235\t235\tNA\t0.636149\n236\t236\tNA\t-2.37272\n237\t237\tNA\t-0.154652\n238\t238\tNA\t-0.453532\n239\t239\tNA\t0.200597\n240\t240\tNA\t1.67001\n241\t241\tNA\t1.03037\n242\t242\tNA\t-0.208077\n243\t243\tNA\t0.164049\n244\t244\tNA\t-0.288133\n245\t245\tNA\t0.52305\n246\t246\tNA\t0.307298\n247\t247\tNA\t0.116519\n248\t248\tNA\t-0.00206642\n249\t249\tNA\t0.655496\n250\t250\tNA\t-1.25415\n251\t251\tNA\t0.00726571\n252\t252\tNA\t2.6072\n253\t253\tNA\t0.787332\n254\t254\tNA\t-0.573939\n255\t255\tNA\t0.430139\n256\t256\tNA\t0.498003\n257\t257\tNA\t-0.472708\n258\t258\tNA\t-2.11577\n259\t259\tNA\t-0.302791\n260\t260\tNA\t0.135721\n261\t261\tNA\t0.746283\n262\t262\tNA\t1.05104\n263\t263\tNA\t-0.914336\n264\t264\tNA\t-1.98078\n265\t265\tNA\t0.559037\n266\t266\tNA\t-1.42152\n267\t267\tNA\t0.936689\n268\t268\tNA\t0.380767\n269\t269\tNA\t0.945328\n270\t270\tNA\t0.0570358\n271\t271\tNA\t1.47451\n272\t272\tNA\t-0.517474\n273\t273\tNA\t-1.55714\n274\t274\tNA\t-1.0888\n275\t275\tNA\t1.23102\n276\t276\tNA\t-0.47694\n277\t277\tNA\t-0.224195\n278\t278\tNA\t-0.537571\n279\t279\tNA\t0.019698\n280\t280\tNA\t1.18258\n281\t281\tNA\t-0.781154\n282\t282\tNA\t-0.460112\n283\t283\tNA\t0.703325\n284\t284\tNA\t-0.840924\n285\t285\tNA\t0.557335\n286\t286\tNA\t-1.62788\n287\t287\tNA\t1.65839\n288\t288\tNA\t-1.21203\n289\t289\tNA\t-0.972147\n290\t290\tNA\t-0.596574\n291\t291\tNA\t-1.23514\n292\t292\tNA\t1.6419\n293\t293\tNA\t-0.421417\n294\t294\tNA\t0.387531\n295\t295\tNA\t-1.07038\n296\t296\tNA\t-0.51778\n297\t297\tNA\t1.15659\n298\t298\tNA\t-0.667902\n299\t299\tNA\t-0.970543\n300\t300\tNA\t-0.865546\n301\t301\tNA\t-1.07894\n302\t302\tNA\t-1.24408\n303\t303\tNA\t-1.04468\n304\t304\tNA\t-0.56958\n305\t305\tNA\t-0.847351\n306\t306\tNA\t-1.1901\n307\t307\tNA\t1.02238\n308\t308\tNA\t-1.97905\n309\t309\tNA\t-1.52535\n310\t310\tNA\t1.0894\n311\t311\tNA\t0.696256\n312\t312\tNA\t0.987405\n313\t313\tNA\t0.112456\n314\t314\tNA\t-0.050028\n315\t315\tNA\t-0.871743\n316\t316\tNA\t0.814679\n317\t317\tNA\t-0.552664\n318\t318\tNA\t1.25739\n319\t319\tNA\t-1.26483\n320\t320\tNA\t1.65303\n321\t321\tNA\t1.3011\n322\t322\tNA\t-0.0201895\n323\t323\tNA\t-1.88367\n324\t324\tNA\t1.42058\n325\t325\tNA\t-0.689172\n326\t326\tNA\t-0.304488\n327\t327\tNA\t-0.0852042\n328\t328\tNA\t2.85163\n329\t329\tNA\t-1.15681\n330\t330\tNA\t0.28258\n331\t331\tNA\t-1.95007\n332\t332\tNA\t-1.17526\n333\t333\tNA\t-1.33243\n334\t334\tNA\t0.70641\n335\t335\tNA\t-0.438966\n336\t336\tNA\t-0.471986\n337\t337\tNA\t-0.196006\n338\t338\tNA\t-0.392692\n339\t339\tNA\t-1.25863\n340\t340\tNA\t-0.285722\n341\t341\tNA\t-0.821333\n342\t342\tNA\t1.04067\n343\t343\tNA\t-0.386694\n344\t344\tNA\t-1.7091\n345\t345\tNA\t0.567446\n346\t346\tNA\t0.0171499\n347\t347\tNA\t0.0977746\n348\t348\tNA\t0.822231\n349\t349\tNA\t0.396381\n350\t350\tNA\t-1.49853\n351\t351\tNA\t-0.02996\n352\t352\tNA\t-0.652866\n353\t353\tNA\t-0.121709\n354\t354\tNA\t1.03003\n355\t355\tNA\t-0.166901\n356\t356\tNA\t0.837773\n357\t357\tNA\t-0.388723\n358\t358\tNA\t0.144514\n359\t359\tNA\t1.60732\n360\t360\tNA\t0.0795044\n361\t361\tNA\t2.14972\n362\t362\tNA\t-0.304271\n363\t363\tNA\t-0.302592\n364\t364\tNA\t-0.439785\n365\t365\tNA\t0.450396\n366\t366\tNA\t-0.847571\n367\t367\tNA\t0.494386\n368\t368\tNA\t1.78773\n369\t369\tNA\t1.49287\n370\t370\tNA\t1.00282\n371\t371\tNA\t-1.57948\n372\t372\tNA\t-1.41121\n373\t373\tNA\t-0.328791\n374\t374\tNA\t0.633526\n375\t375\tNA\t1.58844\n376\t376\tNA\t0.23944\n377\t377\tNA\t0.373054\n378\t378\tNA\t-0.231256\n379\t379\tNA\t0.0411482\n380\t380\tNA\t1.31482\n381\t381\tNA\t0.415339\n382\t382\tNA\t-2.11904\n383\t383\tNA\t-0.73574\n384\t384\tNA\t-0.545852\n385\t385\tNA\t-1.26001\n386\t386\tNA\t0.542966\n387\t387\tNA\t-0.248866\n388\t388\tNA\t1.39359\n389\t389\tNA\t0.765535\n390\t390\tNA\t1.57434\n391\t391\tNA\t0.283378\n392\t392\tNA\t-0.282155\n393\t393\tNA\t0.326779\n394\t394\tNA\t-0.893343\n395\t395\tNA\t-0.306112\n396\t396\tNA\t-1.63278\n397\t397\tNA\t0.983996\n398\t398\tNA\t-2.10943\n399\t399\tNA\t-0.0300423\n400\t400\tNA\t0.267752\n401\t401\tNA\t0.0244411\n402\t402\tNA\t-0.551821\n403\t403\tNA\t-0.0125597\n404\t404\tNA\t0.78199\n405\t405\tNA\t0.13319\n406\t406\tNA\t0.484897\n407\t407\tNA\t0.664244\n408\t408\tNA\t-0.803087\n409\t409\tNA\t-0.189584\n410\t410\tNA\t-0.803493\n411\t411\tNA\t0.0918757\n412\t412\tNA\t0.0292488\n413\t413\tNA\t-0.0153363\n414\t414\tNA\t1.12808\n415\t415\tNA\t-0.206516\n416\t416\tNA\t-0.0512615\n417\t417\tNA\t-1.34343\n418\t418\tNA\t-0.185184\n419\t419\tNA\t-0.248441\n420\t420\tNA\t-0.069676\n421\t421\tNA\t0.376647\n422\t422\tNA\t-0.256961\n423\t423\tNA\t0.723303\n424\t424\tNA\t1.11035\n425\t425\tNA\t-0.119456\n426\t426\tNA\t-0.639605\n427\t427\tNA\t0.564345\n428\t428\tNA\t-0.344095\n429\t429\tNA\t-2.22653\n430\t430\tNA\t-0.956033\n431\t431\tNA\t-0.819189\n432\t432\tNA\t-2.44945\n433\t433\tNA\t-0.0670052\n434\t434\tNA\t-1.02125\n435\t435\tNA\t1.56421\n436\t436\tNA\t-0.768685\n437\t437\tNA\t0.901317\n438\t438\tNA\t0.326158\n439\t439\tNA\t-0.139662\n440\t440\tNA\t0.717582\n441\t441\tNA\t-1.29883\n442\t442\tNA\t-0.638025\n443\t443\tNA\t1.3778\n444\t444\tNA\t1.65038\n445\t445\tNA\t0.586743\n446\t446\tNA\t-0.00589607\n447\t447\tNA\t0.13319\n448\t448\tNA\t-1.26964\n449\t449\tNA\t-0.125181\n450\t450\tNA\t-1.24454\n451\t451\tNA\t-0.0661009\n452\t452\tNA\t2.57404\n453\t453\tNA\t0.928964\n454\t454\tNA\t0.51064\n455\t455\tNA\t-1.32195\n456\t456\tNA\t1.30567\n457\t457\tNA\t-0.742979\n458\t458\tNA\t-0.593598\n459\t459\tNA\t-1.24448\n460\t460\tNA\t-0.110278\n461\t461\tNA\t0.159683\n462\t462\tNA\t0.100157\n463\t463\tNA\t-1.6713\n464\t464\tNA\t1.12242\n465\t465\tNA\t-0.88487\n466\t466\tNA\t-0.433884\n467\t467\tNA\t0.233091\n468\t468\tNA\t-0.0685021\n469\t469\tNA\t-0.430799\n470\t470\tNA\t0.680125\n471\t471\tNA\t0.706294\n472\t472\tNA\t-1.01252\n473\t473\tNA\t1.14022\n474\t474\tNA\t-0.0951276\n475\t475\tNA\t1.20431\n476\t476\tNA\t-1.31014\n477\t477\tNA\t-1.01024\n478\t478\tNA\t0.86817\n479\t479\tNA\t-1.13996\n480\t480\tNA\t0.220533\n481\t481\tNA\t-0.585963\n482\t482\tNA\t0.936588\n483\t483\tNA\t-1.58959\n484\t484\tNA\t0.114516\n485\t485\tNA\t0.275659\n486\t486\tNA\t1.82381\n487\t487\tNA\t0.919563\n488\t488\tNA\t1.19923\n489\t489\tNA\t-0.287172\n490\t490\tNA\t-0.125564\n491\t491\tNA\t0.76606\n492\t492\tNA\t0.0485017\n493\t493\tNA\t0.91911\n494\t494\tNA\t-0.0784911\n495\t495\tNA\t-1.60227\n496\t496\tNA\t-2.01618\n497\t497\tNA\t1.34361\n498\t498\tNA\t-1.81712\n499\t499\tNA\t-0.463177\n500\t500\tNA\t-0.94339\n"
  },
  {
    "path": "example/example.pvar",
    "content": "#CHROM\tPOS\tID\tREF\tALT\n1\t1\t1\t2\t1\n1\t2\t2\t2\t1\n1\t3\t3\t2\t1\n1\t4\t4\t2\t1\n1\t5\t5\t2\t1\n1\t6\t6\t2\t1\n1\t7\t7\t2\t1\n1\t8\t8\t2\t1\n1\t9\t9\t2\t1\n1\t10\t10\t2\t1\n1\t11\t11\t2\t1\n1\t12\t12\t2\t1\n1\t13\t13\t2\t1\n1\t14\t14\t2\t1\n1\t15\t15\t2\t1\n1\t16\t16\t2\t1\n1\t17\t17\t2\t1\n1\t18\t18\t2\t1\n1\t19\t19\t2\t1\n1\t20\t20\t2\t1\n1\t21\t21\t2\t1\n1\t22\t22\t2\t1\n1\t23\t23\t2\t1\n1\t24\t24\t2\t1\n1\t25\t25\t2\t1\n1\t26\t26\t2\t1\n1\t27\t27\t2\t1\n1\t28\t28\t2\t1\n1\t29\t29\t2\t1\n1\t30\t30\t2\t1\n1\t31\t31\t2\t1\n1\t32\t32\t2\t1\n1\t33\t33\t2\t1\n1\t34\t34\t2\t1\n1\t35\t35\t2\t1\n1\t36\t36\t2\t1\n1\t37\t37\t2\t1\n1\t38\t38\t2\t1\n1\t39\t39\t2\t1\n1\t40\t40\t2\t1\n1\t41\t41\t2\t1\n1\t42\t42\t2\t1\n1\t43\t43\t2\t1\n1\t44\t44\t2\t1\n1\t45\t45\t2\t1\n1\t46\t46\t2\t1\n1\t47\t47\t2\t1\n1\t48\t48\t2\t1\n1\t49\t49\t2\t1\n1\t50\t50\t2\t1\n1\t51\t51\t2\t1\n1\t52\t52\t2\t1\n1\t53\t53\t2\t1\n1\t54\t54\t2\t1\n1\t55\t55\t2\t1\n1\t56\t56\t2\t1\n1\t57\t57\t2\t1\n1\t58\t58\t2\t1\n1\t59\t59\t2\t1\n1\t60\t60\t2\t1\n1\t61\t61\t2\t1\n1\t62\t62\t2\t1\n1\t63\t63\t2\t1\n1\t64\t64\t2\t1\n1\t65\t65\t2\t1\n1\t66\t66\t2\t1\n1\t67\t67\t2\t1\n1\t68\t68\t2\t1\n1\t69\t69\t2\t1\n1\t70\t70\t2\t1\n1\t71\t71\t2\t1\n1\t72\t72\t2\t1\n1\t73\t73\t2\t1\n1\t74\t74\t2\t1\n1\t75\t75\t2\t1\n1\t76\t76\t2\t1\n1\t77\t77\t2\t1\n1\t78\t78\t2\t1\n1\t79\t79\t2\t1\n1\t80\t80\t2\t1\n1\t81\t81\t2\t1\n1\t82\t82\t2\t1\n1\t83\t83\t2\t1\n1\t84\t84\t2\t1\n1\t85\t85\t2\t1\n1\t86\t86\t2\t1\n1\t87\t87\t2\t1\n1\t88\t88\t2\t1\n1\t89\t89\t2\t1\n1\t90\t90\t2\t1\n1\t91\t91\t2\t1\n1\t92\t92\t2\t1\n1\t93\t93\t2\t1\n1\t94\t94\t2\t1\n1\t95\t95\t2\t1\n1\t96\t96\t2\t1\n1\t97\t97\t2\t1\n1\t98\t98\t2\t1\n1\t99\t99\t2\t1\n1\t100\t100\t2\t1\n1\t101\t101\t2\t1\n1\t102\t102\t2\t1\n1\t103\t103\t2\t1\n1\t104\t104\t2\t1\n1\t105\t105\t2\t1\n1\t106\t106\t2\t1\n1\t107\t107\t2\t1\n1\t108\t108\t2\t1\n1\t109\t109\t2\t1\n1\t110\t110\t2\t1\n1\t111\t111\t2\t1\n1\t112\t112\t2\t1\n1\t113\t113\t2\t1\n1\t114\t114\t2\t1\n1\t115\t115\t2\t1\n1\t116\t116\t2\t1\n1\t117\t117\t2\t1\n1\t118\t118\t2\t1\n1\t119\t119\t2\t1\n1\t120\t120\t2\t1\n1\t121\t121\t2\t1\n1\t122\t122\t2\t1\n1\t123\t123\t2\t1\n1\t124\t124\t2\t1\n1\t125\t125\t2\t1\n1\t126\t126\t2\t1\n1\t127\t127\t2\t1\n1\t128\t128\t2\t1\n1\t129\t129\t2\t1\n1\t130\t130\t2\t1\n1\t131\t131\t2\t1\n1\t132\t132\t2\t1\n1\t133\t133\t2\t1\n1\t134\t134\t2\t1\n1\t135\t135\t2\t1\n1\t136\t136\t2\t1\n1\t137\t137\t2\t1\n1\t138\t138\t2\t1\n1\t139\t139\t2\t1\n1\t140\t140\t2\t1\n1\t141\t141\t2\t1\n1\t142\t142\t2\t1\n1\t143\t143\t2\t1\n1\t144\t144\t2\t1\n1\t145\t145\t2\t1\n1\t146\t146\t2\t1\n1\t147\t147\t2\t1\n1\t148\t148\t2\t1\n1\t149\t149\t2\t1\n1\t150\t150\t2\t1\n1\t151\t151\t2\t1\n1\t152\t152\t2\t1\n1\t153\t153\t2\t1\n1\t154\t154\t2\t1\n1\t155\t155\t2\t1\n1\t156\t156\t2\t1\n1\t157\t157\t2\t1\n1\t158\t158\t2\t1\n1\t159\t159\t2\t1\n1\t160\t160\t2\t1\n1\t161\t161\t2\t1\n1\t162\t162\t2\t1\n1\t163\t163\t2\t1\n1\t164\t164\t2\t1\n1\t165\t165\t2\t1\n1\t166\t166\t2\t1\n1\t167\t167\t2\t1\n1\t168\t168\t2\t1\n1\t169\t169\t2\t1\n1\t170\t170\t2\t1\n1\t171\t171\t2\t1\n1\t172\t172\t2\t1\n1\t173\t173\t2\t1\n1\t174\t174\t2\t1\n1\t175\t175\t2\t1\n1\t176\t176\t2\t1\n1\t177\t177\t2\t1\n1\t178\t178\t2\t1\n1\t179\t179\t2\t1\n1\t180\t180\t2\t1\n1\t181\t181\t2\t1\n1\t182\t182\t2\t1\n1\t183\t183\t2\t1\n1\t184\t184\t2\t1\n1\t185\t185\t2\t1\n1\t186\t186\t2\t1\n1\t187\t187\t2\t1\n1\t188\t188\t2\t1\n1\t189\t189\t2\t1\n1\t190\t190\t2\t1\n1\t191\t191\t2\t1\n1\t192\t192\t2\t1\n1\t193\t193\t2\t1\n1\t194\t194\t2\t1\n1\t195\t195\t2\t1\n1\t196\t196\t2\t1\n1\t197\t197\t2\t1\n1\t198\t198\t2\t1\n1\t199\t199\t2\t1\n1\t200\t200\t2\t1\n1\t201\t201\t2\t1\n1\t202\t202\t2\t1\n1\t203\t203\t2\t1\n1\t204\t204\t2\t1\n1\t205\t205\t2\t1\n1\t206\t206\t2\t1\n1\t207\t207\t2\t1\n1\t208\t208\t2\t1\n1\t209\t209\t2\t1\n1\t210\t210\t2\t1\n1\t211\t211\t2\t1\n1\t212\t212\t2\t1\n1\t213\t213\t2\t1\n1\t214\t214\t2\t1\n1\t215\t215\t2\t1\n1\t216\t216\t2\t1\n1\t217\t217\t2\t1\n1\t218\t218\t2\t1\n1\t219\t219\t2\t1\n1\t220\t220\t2\t1\n1\t221\t221\t2\t1\n1\t222\t222\t2\t1\n1\t223\t223\t2\t1\n1\t224\t224\t2\t1\n1\t225\t225\t2\t1\n1\t226\t226\t2\t1\n1\t227\t227\t2\t1\n1\t228\t228\t2\t1\n1\t229\t229\t2\t1\n1\t230\t230\t2\t1\n1\t231\t231\t2\t1\n1\t232\t232\t2\t1\n1\t233\t233\t2\t1\n1\t234\t234\t2\t1\n1\t235\t235\t2\t1\n1\t236\t236\t2\t1\n1\t237\t237\t2\t1\n1\t238\t238\t2\t1\n1\t239\t239\t2\t1\n1\t240\t240\t2\t1\n1\t241\t241\t2\t1\n1\t242\t242\t2\t1\n1\t243\t243\t2\t1\n1\t244\t244\t2\t1\n1\t245\t245\t2\t1\n1\t246\t246\t2\t1\n1\t247\t247\t2\t1\n1\t248\t248\t2\t1\n1\t249\t249\t2\t1\n1\t250\t250\t2\t1\n1\t251\t251\t2\t1\n1\t252\t252\t2\t1\n1\t253\t253\t2\t1\n1\t254\t254\t2\t1\n1\t255\t255\t2\t1\n1\t256\t256\t2\t1\n1\t257\t257\t2\t1\n1\t258\t258\t2\t1\n1\t259\t259\t2\t1\n1\t260\t260\t2\t1\n1\t261\t261\t2\t1\n1\t262\t262\t2\t1\n1\t263\t263\t2\t1\n1\t264\t264\t2\t1\n1\t265\t265\t2\t1\n1\t266\t266\t2\t1\n1\t267\t267\t2\t1\n1\t268\t268\t2\t1\n1\t269\t269\t2\t1\n1\t270\t270\t2\t1\n1\t271\t271\t2\t1\n1\t272\t272\t2\t1\n1\t273\t273\t2\t1\n1\t274\t274\t2\t1\n1\t275\t275\t2\t1\n1\t276\t276\t2\t1\n1\t277\t277\t2\t1\n1\t278\t278\t2\t1\n1\t279\t279\t2\t1\n1\t280\t280\t2\t1\n1\t281\t281\t2\t1\n1\t282\t282\t2\t1\n1\t283\t283\t2\t1\n1\t284\t284\t2\t1\n1\t285\t285\t2\t1\n1\t286\t286\t2\t1\n1\t287\t287\t2\t1\n1\t288\t288\t2\t1\n1\t289\t289\t2\t1\n1\t290\t290\t2\t1\n1\t291\t291\t2\t1\n1\t292\t292\t2\t1\n1\t293\t293\t2\t1\n1\t294\t294\t2\t1\n1\t295\t295\t2\t1\n1\t296\t296\t2\t1\n1\t297\t297\t2\t1\n1\t298\t298\t2\t1\n1\t299\t299\t2\t1\n1\t300\t300\t2\t1\n1\t301\t301\t2\t1\n1\t302\t302\t2\t1\n1\t303\t303\t2\t1\n1\t304\t304\t2\t1\n1\t305\t305\t2\t1\n1\t306\t306\t2\t1\n1\t307\t307\t2\t1\n1\t308\t308\t2\t1\n1\t309\t309\t2\t1\n1\t310\t310\t2\t1\n1\t311\t311\t2\t1\n1\t312\t312\t2\t1\n1\t313\t313\t2\t1\n1\t314\t314\t2\t1\n1\t315\t315\t2\t1\n1\t316\t316\t2\t1\n1\t317\t317\t2\t1\n1\t318\t318\t2\t1\n1\t319\t319\t2\t1\n1\t320\t320\t2\t1\n1\t321\t321\t2\t1\n1\t322\t322\t2\t1\n1\t323\t323\t2\t1\n1\t324\t324\t2\t1\n1\t325\t325\t2\t1\n1\t326\t326\t2\t1\n1\t327\t327\t2\t1\n1\t328\t328\t2\t1\n1\t329\t329\t2\t1\n1\t330\t330\t2\t1\n1\t331\t331\t2\t1\n1\t332\t332\t2\t1\n1\t333\t333\t2\t1\n1\t334\t334\t2\t1\n1\t335\t335\t2\t1\n1\t336\t336\t2\t1\n1\t337\t337\t2\t1\n1\t338\t338\t2\t1\n1\t339\t339\t2\t1\n1\t340\t340\t2\t1\n1\t341\t341\t2\t1\n1\t342\t342\t2\t1\n1\t343\t343\t2\t1\n1\t344\t344\t2\t1\n1\t345\t345\t2\t1\n1\t346\t346\t2\t1\n1\t347\t347\t2\t1\n1\t348\t348\t2\t1\n1\t349\t349\t2\t1\n1\t350\t350\t2\t1\n1\t351\t351\t2\t1\n1\t352\t352\t2\t1\n1\t353\t353\t2\t1\n1\t354\t354\t2\t1\n1\t355\t355\t2\t1\n1\t356\t356\t2\t1\n1\t357\t357\t2\t1\n1\t358\t358\t2\t1\n1\t359\t359\t2\t1\n1\t360\t360\t2\t1\n1\t361\t361\t2\t1\n1\t362\t362\t2\t1\n1\t363\t363\t2\t1\n1\t364\t364\t2\t1\n1\t365\t365\t2\t1\n1\t366\t366\t2\t1\n1\t367\t367\t2\t1\n1\t368\t368\t2\t1\n1\t369\t369\t2\t1\n1\t370\t370\t2\t1\n1\t371\t371\t2\t1\n1\t372\t372\t2\t1\n1\t373\t373\t2\t1\n1\t374\t374\t2\t1\n1\t375\t375\t2\t1\n1\t376\t376\t2\t1\n1\t377\t377\t2\t1\n1\t378\t378\t2\t1\n1\t379\t379\t2\t1\n1\t380\t380\t2\t1\n1\t381\t381\t2\t1\n1\t382\t382\t2\t1\n1\t383\t383\t2\t1\n1\t384\t384\t2\t1\n1\t385\t385\t2\t1\n1\t386\t386\t2\t1\n1\t387\t387\t2\t1\n1\t388\t388\t2\t1\n1\t389\t389\t2\t1\n1\t390\t390\t2\t1\n1\t391\t391\t2\t1\n1\t392\t392\t2\t1\n1\t393\t393\t2\t1\n1\t394\t394\t2\t1\n1\t395\t395\t2\t1\n1\t396\t396\t2\t1\n1\t397\t397\t2\t1\n1\t398\t398\t2\t1\n1\t399\t399\t2\t1\n1\t400\t400\t2\t1\n1\t401\t401\t2\t1\n1\t402\t402\t2\t1\n1\t403\t403\t2\t1\n1\t404\t404\t2\t1\n1\t405\t405\t2\t1\n1\t406\t406\t2\t1\n1\t407\t407\t2\t1\n1\t408\t408\t2\t1\n1\t409\t409\t2\t1\n1\t410\t410\t2\t1\n1\t411\t411\t2\t1\n1\t412\t412\t2\t1\n1\t413\t413\t2\t1\n1\t414\t414\t2\t1\n1\t415\t415\t2\t1\n1\t416\t416\t2\t1\n1\t417\t417\t2\t1\n1\t418\t418\t2\t1\n1\t419\t419\t2\t1\n1\t420\t420\t2\t1\n1\t421\t421\t2\t1\n1\t422\t422\t2\t1\n1\t423\t423\t2\t1\n1\t424\t424\t2\t1\n1\t425\t425\t2\t1\n1\t426\t426\t2\t1\n1\t427\t427\t2\t1\n1\t428\t428\t2\t1\n1\t429\t429\t2\t1\n1\t430\t430\t2\t1\n1\t431\t431\t2\t1\n1\t432\t432\t2\t1\n1\t433\t433\t2\t1\n1\t434\t434\t2\t1\n1\t435\t435\t2\t1\n1\t436\t436\t2\t1\n1\t437\t437\t2\t1\n1\t438\t438\t2\t1\n1\t439\t439\t2\t1\n1\t440\t440\t2\t1\n1\t441\t441\t2\t1\n1\t442\t442\t2\t1\n1\t443\t443\t2\t1\n1\t444\t444\t2\t1\n1\t445\t445\t2\t1\n1\t446\t446\t2\t1\n1\t447\t447\t2\t1\n1\t448\t448\t2\t1\n1\t449\t449\t2\t1\n1\t450\t450\t2\t1\n1\t451\t451\t2\t1\n1\t452\t452\t2\t1\n1\t453\t453\t2\t1\n1\t454\t454\t2\t1\n1\t455\t455\t2\t1\n1\t456\t456\t2\t1\n1\t457\t457\t2\t1\n1\t458\t458\t2\t1\n1\t459\t459\t2\t1\n1\t460\t460\t2\t1\n1\t461\t461\t2\t1\n1\t462\t462\t2\t1\n1\t463\t463\t2\t1\n1\t464\t464\t2\t1\n1\t465\t465\t2\t1\n1\t466\t466\t2\t1\n1\t467\t467\t2\t1\n1\t468\t468\t2\t1\n1\t469\t469\t2\t1\n1\t470\t470\t2\t1\n1\t471\t471\t2\t1\n1\t472\t472\t2\t1\n1\t473\t473\t2\t1\n1\t474\t474\t2\t1\n1\t475\t475\t2\t1\n1\t476\t476\t2\t1\n1\t477\t477\t2\t1\n1\t478\t478\t2\t1\n1\t479\t479\t2\t1\n1\t480\t480\t2\t1\n1\t481\t481\t2\t1\n1\t482\t482\t2\t1\n1\t483\t483\t2\t1\n1\t484\t484\t2\t1\n1\t485\t485\t2\t1\n1\t486\t486\t2\t1\n1\t487\t487\t2\t1\n1\t488\t488\t2\t1\n1\t489\t489\t2\t1\n1\t490\t490\t2\t1\n1\t491\t491\t2\t1\n1\t492\t492\t2\t1\n1\t493\t493\t2\t1\n1\t494\t494\t2\t1\n1\t495\t495\t2\t1\n1\t496\t496\t2\t1\n1\t497\t497\t2\t1\n1\t498\t498\t2\t1\n1\t499\t499\t2\t1\n1\t500\t500\t2\t1\n1\t501\t501\t2\t1\n1\t502\t502\t2\t1\n1\t503\t503\t2\t1\n1\t504\t504\t2\t1\n1\t505\t505\t2\t1\n1\t506\t506\t2\t1\n1\t507\t507\t2\t1\n1\t508\t508\t2\t1\n1\t509\t509\t2\t1\n1\t510\t510\t2\t1\n1\t511\t511\t2\t1\n1\t512\t512\t2\t1\n1\t513\t513\t2\t1\n1\t514\t514\t2\t1\n1\t515\t515\t2\t1\n1\t516\t516\t2\t1\n1\t517\t517\t2\t1\n1\t518\t518\t2\t1\n1\t519\t519\t2\t1\n1\t520\t520\t2\t1\n1\t521\t521\t2\t1\n1\t522\t522\t2\t1\n1\t523\t523\t2\t1\n1\t524\t524\t2\t1\n1\t525\t525\t2\t1\n1\t526\t526\t2\t1\n1\t527\t527\t2\t1\n1\t528\t528\t2\t1\n1\t529\t529\t2\t1\n1\t530\t530\t2\t1\n1\t531\t531\t2\t1\n1\t532\t532\t2\t1\n1\t533\t533\t2\t1\n1\t534\t534\t2\t1\n1\t535\t535\t2\t1\n1\t536\t536\t2\t1\n1\t537\t537\t2\t1\n1\t538\t538\t2\t1\n1\t539\t539\t2\t1\n1\t540\t540\t2\t1\n1\t541\t541\t2\t1\n1\t542\t542\t2\t1\n1\t543\t543\t2\t1\n1\t544\t544\t2\t1\n1\t545\t545\t2\t1\n1\t546\t546\t2\t1\n1\t547\t547\t2\t1\n1\t548\t548\t2\t1\n1\t549\t549\t2\t1\n1\t550\t550\t2\t1\n1\t551\t551\t2\t1\n1\t552\t552\t2\t1\n1\t553\t553\t2\t1\n1\t554\t554\t2\t1\n1\t555\t555\t2\t1\n1\t556\t556\t2\t1\n1\t557\t557\t2\t1\n1\t558\t558\t2\t1\n1\t559\t559\t2\t1\n1\t560\t560\t2\t1\n1\t561\t561\t2\t1\n1\t562\t562\t2\t1\n1\t563\t563\t2\t1\n1\t564\t564\t2\t1\n1\t565\t565\t2\t1\n1\t566\t566\t2\t1\n1\t567\t567\t2\t1\n1\t568\t568\t2\t1\n1\t569\t569\t2\t1\n1\t570\t570\t2\t1\n1\t571\t571\t2\t1\n1\t572\t572\t2\t1\n1\t573\t573\t2\t1\n1\t574\t574\t2\t1\n1\t575\t575\t2\t1\n1\t576\t576\t2\t1\n1\t577\t577\t2\t1\n1\t578\t578\t2\t1\n1\t579\t579\t2\t1\n1\t580\t580\t2\t1\n1\t581\t581\t2\t1\n1\t582\t582\t2\t1\n1\t583\t583\t2\t1\n1\t584\t584\t2\t1\n1\t585\t585\t2\t1\n1\t586\t586\t2\t1\n1\t587\t587\t2\t1\n1\t588\t588\t2\t1\n1\t589\t589\t2\t1\n1\t590\t590\t2\t1\n1\t591\t591\t2\t1\n1\t592\t592\t2\t1\n1\t593\t593\t2\t1\n1\t594\t594\t2\t1\n1\t595\t595\t2\t1\n1\t596\t596\t2\t1\n1\t597\t597\t2\t1\n1\t598\t598\t2\t1\n1\t599\t599\t2\t1\n1\t600\t600\t2\t1\n1\t601\t601\t2\t1\n1\t602\t602\t2\t1\n1\t603\t603\t2\t1\n1\t604\t604\t2\t1\n1\t605\t605\t2\t1\n1\t606\t606\t2\t1\n1\t607\t607\t2\t1\n1\t608\t608\t2\t1\n1\t609\t609\t2\t1\n1\t610\t610\t2\t1\n1\t611\t611\t2\t1\n1\t612\t612\t2\t1\n1\t613\t613\t2\t1\n1\t614\t614\t2\t1\n1\t615\t615\t2\t1\n1\t616\t616\t2\t1\n1\t617\t617\t2\t1\n1\t618\t618\t2\t1\n1\t619\t619\t2\t1\n1\t620\t620\t2\t1\n1\t621\t621\t2\t1\n1\t622\t622\t2\t1\n1\t623\t623\t2\t1\n1\t624\t624\t2\t1\n1\t625\t625\t2\t1\n1\t626\t626\t2\t1\n1\t627\t627\t2\t1\n1\t628\t628\t2\t1\n1\t629\t629\t2\t1\n1\t630\t630\t2\t1\n1\t631\t631\t2\t1\n1\t632\t632\t2\t1\n1\t633\t633\t2\t1\n1\t634\t634\t2\t1\n1\t635\t635\t2\t1\n1\t636\t636\t2\t1\n1\t637\t637\t2\t1\n1\t638\t638\t2\t1\n1\t639\t639\t2\t1\n1\t640\t640\t2\t1\n1\t641\t641\t2\t1\n1\t642\t642\t2\t1\n1\t643\t643\t2\t1\n1\t644\t644\t2\t1\n1\t645\t645\t2\t1\n1\t646\t646\t2\t1\n1\t647\t647\t2\t1\n1\t648\t648\t2\t1\n1\t649\t649\t2\t1\n1\t650\t650\t2\t1\n1\t651\t651\t2\t1\n1\t652\t652\t2\t1\n1\t653\t653\t2\t1\n1\t654\t654\t2\t1\n1\t655\t655\t2\t1\n1\t656\t656\t2\t1\n1\t657\t657\t2\t1\n1\t658\t658\t2\t1\n1\t659\t659\t2\t1\n1\t660\t660\t2\t1\n1\t661\t661\t2\t1\n1\t662\t662\t2\t1\n1\t663\t663\t2\t1\n1\t664\t664\t2\t1\n1\t665\t665\t2\t1\n1\t666\t666\t2\t1\n1\t667\t667\t2\t1\n1\t668\t668\t2\t1\n1\t669\t669\t2\t1\n1\t670\t670\t2\t1\n1\t671\t671\t2\t1\n1\t672\t672\t2\t1\n1\t673\t673\t2\t1\n1\t674\t674\t2\t1\n1\t675\t675\t2\t1\n1\t676\t676\t2\t1\n1\t677\t677\t2\t1\n1\t678\t678\t2\t1\n1\t679\t679\t2\t1\n1\t680\t680\t2\t1\n1\t681\t681\t2\t1\n1\t682\t682\t2\t1\n1\t683\t683\t2\t1\n1\t684\t684\t2\t1\n1\t685\t685\t2\t1\n1\t686\t686\t2\t1\n1\t687\t687\t2\t1\n1\t688\t688\t2\t1\n1\t689\t689\t2\t1\n1\t690\t690\t2\t1\n1\t691\t691\t2\t1\n1\t692\t692\t2\t1\n1\t693\t693\t2\t1\n1\t694\t694\t2\t1\n1\t695\t695\t2\t1\n1\t696\t696\t2\t1\n1\t697\t697\t2\t1\n1\t698\t698\t2\t1\n1\t699\t699\t2\t1\n1\t700\t700\t2\t1\n1\t701\t701\t2\t1\n1\t702\t702\t2\t1\n1\t703\t703\t2\t1\n1\t704\t704\t2\t1\n1\t705\t705\t2\t1\n1\t706\t706\t2\t1\n1\t707\t707\t2\t1\n1\t708\t708\t2\t1\n1\t709\t709\t2\t1\n1\t710\t710\t2\t1\n1\t711\t711\t2\t1\n1\t712\t712\t2\t1\n1\t713\t713\t2\t1\n1\t714\t714\t2\t1\n1\t715\t715\t2\t1\n1\t716\t716\t2\t1\n1\t717\t717\t2\t1\n1\t718\t718\t2\t1\n1\t719\t719\t2\t1\n1\t720\t720\t2\t1\n1\t721\t721\t2\t1\n1\t722\t722\t2\t1\n1\t723\t723\t2\t1\n1\t724\t724\t2\t1\n1\t725\t725\t2\t1\n1\t726\t726\t2\t1\n1\t727\t727\t2\t1\n1\t728\t728\t2\t1\n1\t729\t729\t2\t1\n1\t730\t730\t2\t1\n1\t731\t731\t2\t1\n1\t732\t732\t2\t1\n1\t733\t733\t2\t1\n1\t734\t734\t2\t1\n1\t735\t735\t2\t1\n1\t736\t736\t2\t1\n1\t737\t737\t2\t1\n1\t738\t738\t2\t1\n1\t739\t739\t2\t1\n1\t740\t740\t2\t1\n1\t741\t741\t2\t1\n1\t742\t742\t2\t1\n1\t743\t743\t2\t1\n1\t744\t744\t2\t1\n1\t745\t745\t2\t1\n1\t746\t746\t2\t1\n1\t747\t747\t2\t1\n1\t748\t748\t2\t1\n1\t749\t749\t2\t1\n1\t750\t750\t2\t1\n1\t751\t751\t2\t1\n1\t752\t752\t2\t1\n1\t753\t753\t2\t1\n1\t754\t754\t2\t1\n1\t755\t755\t2\t1\n1\t756\t756\t2\t1\n1\t757\t757\t2\t1\n1\t758\t758\t2\t1\n1\t759\t759\t2\t1\n1\t760\t760\t2\t1\n1\t761\t761\t2\t1\n1\t762\t762\t2\t1\n1\t763\t763\t2\t1\n1\t764\t764\t2\t1\n1\t765\t765\t2\t1\n1\t766\t766\t2\t1\n1\t767\t767\t2\t1\n1\t768\t768\t2\t1\n1\t769\t769\t2\t1\n1\t770\t770\t2\t1\n1\t771\t771\t2\t1\n1\t772\t772\t2\t1\n1\t773\t773\t2\t1\n1\t774\t774\t2\t1\n1\t775\t775\t2\t1\n1\t776\t776\t2\t1\n1\t777\t777\t2\t1\n1\t778\t778\t2\t1\n1\t779\t779\t2\t1\n1\t780\t780\t2\t1\n1\t781\t781\t2\t1\n1\t782\t782\t2\t1\n1\t783\t783\t2\t1\n1\t784\t784\t2\t1\n1\t785\t785\t2\t1\n1\t786\t786\t2\t1\n1\t787\t787\t2\t1\n1\t788\t788\t2\t1\n1\t789\t789\t2\t1\n1\t790\t790\t2\t1\n1\t791\t791\t2\t1\n1\t792\t792\t2\t1\n1\t793\t793\t2\t1\n1\t794\t794\t2\t1\n1\t795\t795\t2\t1\n1\t796\t796\t2\t1\n1\t797\t797\t2\t1\n1\t798\t798\t2\t1\n1\t799\t799\t2\t1\n1\t800\t800\t2\t1\n1\t801\t801\t2\t1\n1\t802\t802\t2\t1\n1\t803\t803\t2\t1\n1\t804\t804\t2\t1\n1\t805\t805\t2\t1\n1\t806\t806\t2\t1\n1\t807\t807\t2\t1\n1\t808\t808\t2\t1\n1\t809\t809\t2\t1\n1\t810\t810\t2\t1\n1\t811\t811\t2\t1\n1\t812\t812\t2\t1\n1\t813\t813\t2\t1\n1\t814\t814\t2\t1\n1\t815\t815\t2\t1\n1\t816\t816\t2\t1\n1\t817\t817\t2\t1\n1\t818\t818\t2\t1\n1\t819\t819\t2\t1\n1\t820\t820\t2\t1\n1\t821\t821\t2\t1\n1\t822\t822\t2\t1\n1\t823\t823\t2\t1\n1\t824\t824\t2\t1\n1\t825\t825\t2\t1\n1\t826\t826\t2\t1\n1\t827\t827\t2\t1\n1\t828\t828\t2\t1\n1\t829\t829\t2\t1\n1\t830\t830\t2\t1\n1\t831\t831\t2\t1\n1\t832\t832\t2\t1\n1\t833\t833\t2\t1\n1\t834\t834\t2\t1\n1\t835\t835\t2\t1\n1\t836\t836\t2\t1\n1\t837\t837\t2\t1\n1\t838\t838\t2\t1\n1\t839\t839\t2\t1\n1\t840\t840\t2\t1\n1\t841\t841\t2\t1\n1\t842\t842\t2\t1\n1\t843\t843\t2\t1\n1\t844\t844\t2\t1\n1\t845\t845\t2\t1\n1\t846\t846\t2\t1\n1\t847\t847\t2\t1\n1\t848\t848\t2\t1\n1\t849\t849\t2\t1\n1\t850\t850\t2\t1\n1\t851\t851\t2\t1\n1\t852\t852\t2\t1\n1\t853\t853\t2\t1\n1\t854\t854\t2\t1\n1\t855\t855\t2\t1\n1\t856\t856\t2\t1\n1\t857\t857\t2\t1\n1\t858\t858\t2\t1\n1\t859\t859\t2\t1\n1\t860\t860\t2\t1\n1\t861\t861\t2\t1\n1\t862\t862\t2\t1\n1\t863\t863\t2\t1\n1\t864\t864\t2\t1\n1\t865\t865\t2\t1\n1\t866\t866\t2\t1\n1\t867\t867\t2\t1\n1\t868\t868\t2\t1\n1\t869\t869\t2\t1\n1\t870\t870\t2\t1\n1\t871\t871\t2\t1\n1\t872\t872\t2\t1\n1\t873\t873\t2\t1\n1\t874\t874\t2\t1\n1\t875\t875\t2\t1\n1\t876\t876\t2\t1\n1\t877\t877\t2\t1\n1\t878\t878\t2\t1\n1\t879\t879\t2\t1\n1\t880\t880\t2\t1\n1\t881\t881\t2\t1\n1\t882\t882\t2\t1\n1\t883\t883\t2\t1\n1\t884\t884\t2\t1\n1\t885\t885\t2\t1\n1\t886\t886\t2\t1\n1\t887\t887\t2\t1\n1\t888\t888\t2\t1\n1\t889\t889\t2\t1\n1\t890\t890\t2\t1\n1\t891\t891\t2\t1\n1\t892\t892\t2\t1\n1\t893\t893\t2\t1\n1\t894\t894\t2\t1\n1\t895\t895\t2\t1\n1\t896\t896\t2\t1\n1\t897\t897\t2\t1\n1\t898\t898\t2\t1\n1\t899\t899\t2\t1\n1\t900\t900\t2\t1\n1\t901\t901\t2\t1\n1\t902\t902\t2\t1\n1\t903\t903\t2\t1\n1\t904\t904\t2\t1\n1\t905\t905\t2\t1\n1\t906\t906\t2\t1\n1\t907\t907\t2\t1\n1\t908\t908\t2\t1\n1\t909\t909\t2\t1\n1\t910\t910\t2\t1\n1\t911\t911\t2\t1\n1\t912\t912\t2\t1\n1\t913\t913\t2\t1\n1\t914\t914\t2\t1\n1\t915\t915\t2\t1\n1\t916\t916\t2\t1\n1\t917\t917\t2\t1\n1\t918\t918\t2\t1\n1\t919\t919\t2\t1\n1\t920\t920\t2\t1\n1\t921\t921\t2\t1\n1\t922\t922\t2\t1\n1\t923\t923\t2\t1\n1\t924\t924\t2\t1\n1\t925\t925\t2\t1\n1\t926\t926\t2\t1\n1\t927\t927\t2\t1\n1\t928\t928\t2\t1\n1\t929\t929\t2\t1\n1\t930\t930\t2\t1\n1\t931\t931\t2\t1\n1\t932\t932\t2\t1\n1\t933\t933\t2\t1\n1\t934\t934\t2\t1\n1\t935\t935\t2\t1\n1\t936\t936\t2\t1\n1\t937\t937\t2\t1\n1\t938\t938\t2\t1\n1\t939\t939\t2\t1\n1\t940\t940\t2\t1\n1\t941\t941\t2\t1\n1\t942\t942\t2\t1\n1\t943\t943\t2\t1\n1\t944\t944\t2\t1\n1\t945\t945\t2\t1\n1\t946\t946\t2\t1\n1\t947\t947\t2\t1\n1\t948\t948\t2\t1\n1\t949\t949\t2\t1\n1\t950\t950\t2\t1\n1\t951\t951\t2\t1\n1\t952\t952\t2\t1\n1\t953\t953\t2\t1\n1\t954\t954\t2\t1\n1\t955\t955\t2\t1\n1\t956\t956\t2\t1\n1\t957\t957\t2\t1\n1\t958\t958\t2\t1\n1\t959\t959\t2\t1\n1\t960\t960\t2\t1\n1\t961\t961\t2\t1\n1\t962\t962\t2\t1\n1\t963\t963\t2\t1\n1\t964\t964\t2\t1\n1\t965\t965\t2\t1\n1\t966\t966\t2\t1\n1\t967\t967\t2\t1\n1\t968\t968\t2\t1\n1\t969\t969\t2\t1\n1\t970\t970\t2\t1\n1\t971\t971\t2\t1\n1\t972\t972\t2\t1\n1\t973\t973\t2\t1\n1\t974\t974\t2\t1\n1\t975\t975\t2\t1\n1\t976\t976\t2\t1\n1\t977\t977\t2\t1\n1\t978\t978\t2\t1\n1\t979\t979\t2\t1\n1\t980\t980\t2\t1\n1\t981\t981\t2\t1\n1\t982\t982\t2\t1\n1\t983\t983\t2\t1\n1\t984\t984\t2\t1\n1\t985\t985\t2\t1\n1\t986\t986\t2\t1\n1\t987\t987\t2\t1\n1\t988\t988\t2\t1\n1\t989\t989\t2\t1\n1\t990\t990\t2\t1\n1\t991\t991\t2\t1\n1\t992\t992\t2\t1\n1\t993\t993\t2\t1\n1\t994\t994\t2\t1\n1\t995\t995\t2\t1\n1\t996\t996\t2\t1\n1\t997\t997\t2\t1\n1\t998\t998\t2\t1\n1\t999\t999\t2\t1\n1\t1000\t1000\t2\t1\n"
  },
  {
    "path": "example/example.test_bin_out_firth_Y1.regenie",
    "content": "CHROM GENPOS ID ALLELE0 ALLELE1 A1FREQ INFO N TEST BETA SE CHISQ LOG10P\n1 1 1 2 1 0.214575 1 494 ADD 0.0775674 0.230001 0.113736 0.133163\n1 2 2 2 1 0.218623 1 494 ADD 0.131068 0.239808 0.29872 0.233077\n1 3 3 2 1 0.211538 1 494 ADD -0.256723 0.244611 1.10148 0.531739\n1 4 4 2 1 0.191296 1 494 ADD -0.131175 0.250523 0.274164 0.221449\n1 5 5 2 1 0.195344 1 494 ADD -0.187228 0.235372 0.632751 0.370236\n1 6 6 2 1 0.190283 1 494 ADD -0.234935 0.245557 0.91536 0.47019\n1 7 7 2 1 0.206478 1 494 ADD 0.11647 0.227747 0.26153 0.215332\n1 8 8 2 1 0.188259 1 494 ADD -0.353772 0.251712 1.97533 0.796197\n1 9 9 2 1 0.194332 1 494 ADD 0.283254 0.241072 1.38057 0.619781\n1 10 10 2 1 0.210526 1 494 ADD 0.0244317 0.236825 0.0106427 0.0372363\n1 11 11 2 1 0.183198 1 494 ADD -0.754117 0.264706 8.70619 2.49876\n1 12 12 2 1 0.20749 1 494 ADD 0.150619 0.229383 0.431159 0.291222\n1 13 13 2 1 0.198381 1 494 ADD 0.176768 0.249666 0.501286 0.319723\n1 14 14 2 1 0.191296 1 494 ADD -0.0898607 0.250264 0.128927 0.142941\n1 15 15 2 1 0.196356 1 494 ADD 0.0706531 0.24931 0.0803127 0.109649\n1 16 16 2 1 0.213563 1 494 ADD 0.127126 0.224028 0.322005 0.243817\n1 17 17 2 1 0.173077 1 494 ADD -0.0815387 0.258668 0.0993673 0.123441\n1 18 18 2 1 0.181174 1 494 ADD -0.245885 0.247926 0.983608 0.493076\n1 19 19 2 1 0.197368 1 494 ADD -0.229608 0.245176 0.87704 0.457157\n1 20 20 2 1 0.210526 1 494 ADD 0.0317603 0.23126 0.0188612 0.0502367\n1 21 21 2 1 0.197368 1 494 ADD -0.0978523 0.248042 0.15563 0.159134\n1 22 22 2 1 0.190283 1 494 ADD 0.0389733 0.24774 0.024748 0.0579936\n1 23 23 2 1 0.200405 1 494 ADD -0.0571134 0.229344 0.0620155 0.0951015\n1 24 24 2 1 0.197368 1 494 ADD -0.0144344 0.234569 0.00378665 0.0218503\n1 25 25 2 1 0.226721 1 494 ADD 0.482245 0.232228 4.31227 1.42207\n1 26 26 2 1 0.203441 1 494 ADD -0.237063 0.224814 1.11194 0.535123\n1 27 27 2 1 0.208502 1 494 ADD -0.0111253 0.23845 0.00217683 0.0164698\n1 28 28 2 1 0.17004 1 494 ADD 0.200861 0.25324 0.629112 0.36888\n1 29 29 2 1 0.210526 1 494 ADD -0.0404564 0.25006 0.026175 0.0597457\n1 30 30 2 1 0.169028 1 494 ADD -0.292083 0.244579 1.42618 0.633785\n1 31 31 2 1 0.224696 1 494 ADD -0.114124 0.24243 0.221604 0.195302\n1 32 32 2 1 0.209514 1 494 ADD 0.375668 0.224165 2.80849 1.02795\n1 33 33 2 1 0.211538 1 494 ADD -0.0482982 0.2361 0.0418475 0.0768022\n1 34 34 2 1 0.178138 1 494 ADD -0.078399 0.24941 0.0988087 0.123053\n1 35 35 2 1 0.187247 1 494 ADD -0.24506 0.233506 1.10141 0.531716\n1 36 36 2 1 0.188259 1 494 ADD 0.311055 0.249723 1.55151 0.671799\n1 37 37 2 1 0.190283 1 494 ADD -0.269829 0.260116 1.07608 0.523491\n1 38 38 2 1 0.188259 1 494 ADD 0.512363 0.237949 4.63648 1.50447\n1 39 39 2 1 0.211538 1 494 ADD 0.095859 0.218075 0.19322 0.180292\n1 40 40 2 1 0.174089 1 494 ADD 0.0137348 0.251811 0.00297503 0.0193142\n1 41 41 2 1 0.188259 1 494 ADD 0.0336069 0.245772 0.0186979 0.0500072\n1 42 42 2 1 0.223684 1 494 ADD -0.526916 0.226359 5.41861 1.70064\n1 43 43 2 1 0.197368 1 494 ADD 0.191427 0.248981 0.591118 0.35459\n1 44 44 2 1 0.194332 1 494 ADD -0.0685846 0.224834 0.0930525 0.118997\n1 45 45 2 1 0.20749 1 494 ADD 0.329215 0.231375 2.02453 0.810295\n1 46 46 2 1 0.214575 1 494 ADD 0.318271 0.233289 1.86126 0.763262\n1 47 47 2 1 0.178138 1 494 ADD 0.0441797 0.262924 0.0282347 0.0622022\n1 48 48 2 1 0.210526 1 494 ADD 0.0576357 0.231684 0.0618859 0.0949927\n1 49 49 2 1 0.196356 1 494 ADD -0.080932 0.262523 0.0950402 0.120408\n1 50 50 2 1 0.198381 1 494 ADD -0.0320333 0.236181 0.0183956 0.0495799\n1 51 51 2 1 0.200405 1 494 ADD -0.162379 0.258126 0.395724 0.276293\n1 52 52 2 1 0.183198 1 494 ADD -0.295989 0.238547 1.53959 0.668211\n1 53 53 2 1 0.206478 1 494 ADD 0.196395 0.227096 0.7479 0.41213\n1 54 54 2 1 0.201417 1 494 ADD 0.539329 0.236908 5.18259 1.64179\n1 55 55 2 1 0.197368 1 494 ADD 0.0152131 0.229608 0.00439001 0.0235706\n1 56 56 2 1 0.172065 1 494 ADD 0.185535 0.252526 0.539807 0.334877\n1 57 57 2 1 0.17915 1 494 ADD 0.429497 0.259998 2.72884 1.00634\n1 58 58 2 1 0.196356 1 494 ADD -0.229797 0.242554 0.897575 0.464158\n1 59 59 2 1 0.208502 1 494 ADD 0.187161 0.218477 0.733865 0.407121\n1 60 60 2 1 0.210526 1 494 ADD 0.0522758 0.231395 0.0510379 0.0855156\n1 61 61 2 1 0.208502 1 494 ADD 0.260822 0.240551 1.17564 0.555573\n1 62 62 2 1 0.216599 1 494 ADD -0.0669554 0.219653 0.0929169 0.1189\n1 63 63 2 1 0.201417 1 494 ADD 0.336326 0.245913 1.87051 0.765947\n1 64 64 2 1 0.17915 1 494 ADD -0.086792 0.252983 0.1177 0.13576\n1 65 65 2 1 0.209514 1 494 ADD -0.396664 0.226994 3.05362 1.09389\n1 66 66 2 1 0.215587 1 494 ADD -0.142472 0.234949 0.367717 0.264201\n1 67 67 2 1 0.188259 1 494 ADD 0.140352 0.23105 0.369001 0.264761\n1 68 68 2 1 0.197368 1 494 ADD -0.164904 0.232686 0.50225 0.320106\n1 69 69 2 1 0.211538 1 494 ADD -0.205059 0.227167 0.814833 0.435695\n1 70 70 2 1 0.182186 1 494 ADD 0.217693 0.246619 0.779177 0.423206\n1 71 71 2 1 0.20749 1 494 ADD -0.00511998 0.253841 0.000406829 0.00704562\n1 72 72 2 1 0.204453 1 494 ADD -0.157021 0.234262 0.449275 0.298709\n1 73 73 2 1 0.205466 1 494 ADD 0.287335 0.24273 1.40129 0.626155\n1 74 74 2 1 0.194332 1 494 ADD 0.456076 0.246102 3.43436 1.19482\n1 75 75 2 1 0.172065 1 494 ADD 0.158581 0.264409 0.359709 0.26069\n1 76 76 2 1 0.240891 1 494 ADD 0.391273 0.221665 3.11577 1.11049\n1 77 77 2 1 0.201417 1 494 ADD -0.118872 0.235721 0.254308 0.21179\n1 78 78 2 1 0.211538 1 494 ADD -0.274321 0.236845 1.3415 0.60771\n1 79 79 2 1 0.199393 1 494 ADD -0.239712 0.230005 1.08619 0.52678\n1 80 80 2 1 0.185223 1 494 ADD -0.0376364 0.260651 0.0208496 0.0529637\n1 81 81 2 1 0.201417 1 494 ADD -0.325847 0.235856 1.90869 0.777\n1 82 82 2 1 0.213563 1 494 ADD 0.130661 0.22521 0.336603 0.250421\n1 83 83 2 1 0.201417 1 494 ADD 0.279562 0.239 1.36824 0.615979\n1 84 84 2 1 0.216599 1 494 ADD -0.0182119 0.229628 0.00629015 0.0283598\n1 85 85 2 1 0.219636 1 494 ADD -0.0530995 0.235816 0.050703 0.0852104\n1 86 86 2 1 0.202429 1 494 ADD -0.0830595 0.230345 0.130023 0.143629\n1 87 87 2 1 0.195344 1 494 ADD -0.1256 0.233704 0.288832 0.228435\n1 88 88 2 1 0.203441 1 494 ADD 0.294784 0.244878 1.44913 0.640795\n1 89 89 2 1 0.210526 1 494 ADD -0.244525 0.234147 1.09061 0.528216\n1 90 90 2 1 0.183198 1 494 ADD 0.4162 0.252007 2.7276 1.006\n1 91 91 2 1 0.203441 1 494 ADD 0.0327289 0.231936 0.0199126 0.0516939\n1 92 92 2 1 0.204453 1 494 ADD 0.288721 0.249324 1.341 0.607553\n1 93 93 2 1 0.196356 1 494 ADD -0.736078 0.254769 8.86739 2.53714\n1 94 94 2 1 0.204453 1 494 ADD -0.257101 0.236491 1.1819 0.557566\n1 95 95 2 1 0.195344 1 494 ADD 0.161937 0.259032 0.390827 0.274199\n1 96 96 2 1 0.19332 1 494 ADD -0.337784 0.243267 1.92802 0.782582\n1 97 97 2 1 0.187247 1 494 ADD 0.218805 0.2386 0.840957 0.444756\n1 98 98 2 1 0.204453 1 494 ADD 0.0927028 0.233065 0.15821 0.160641\n1 99 99 2 1 0.194332 1 494 ADD 0.272401 0.23222 1.37599 0.618371\n1 100 100 2 1 0.197368 1 494 ADD -0.0079439 0.258667 0.000943158 0.0107727\n1 101 101 2 1 0.208502 1 494 ADD -0.00976784 0.22342 0.00191141 0.0154151\n1 102 102 2 1 0.189271 1 494 ADD -0.0719744 0.227469 0.100118 0.123962\n1 103 103 2 1 0.195344 1 494 ADD 0.267385 0.23489 1.29583 0.593499\n1 104 104 2 1 0.186235 1 494 ADD 0.0171918 0.236113 0.00530154 0.0259694\n1 105 105 2 1 0.224696 1 494 ADD 0.0463889 0.229778 0.0407578 0.0757175\n1 106 106 2 1 0.204453 1 494 ADD 0.209573 0.227116 0.851479 0.448386\n1 107 107 2 1 0.189271 1 494 ADD 0.199423 0.224984 0.785686 0.425497\n1 108 108 2 1 0.216599 1 494 ADD 0.337977 0.237924 2.0179 0.808398\n1 109 109 2 1 0.210526 1 494 ADD -0.0987036 0.237601 0.172572 0.168875\n1 110 110 2 1 0.215587 1 494 ADD -0.0390161 0.229524 0.0288956 0.0629737\n1 111 111 2 1 0.197368 1 494 ADD 0.286579 0.246533 1.35125 0.610729\n1 112 112 2 1 0.180162 1 494 ADD 0.360755 0.266713 1.82952 0.754032\n1 113 113 2 1 0.214575 1 494 ADD -0.416251 0.226915 3.36498 1.17655\n1 114 114 2 1 0.215587 1 494 ADD -0.0622402 0.232877 0.0714314 0.102778\n1 115 115 2 1 0.196356 1 494 ADD 0.542127 0.245454 4.8782 1.56546\n1 116 116 2 1 0.210526 1 494 ADD -0.0249269 0.227466 0.0120089 0.0396537\n1 117 117 2 1 0.20749 1 494 ADD 0.362257 0.229285 2.49621 0.942635\n1 118 118 2 1 0.189271 1 494 ADD 0.153986 0.239736 0.412567 0.283438\n1 119 119 2 1 0.194332 1 494 ADD -0.0445348 0.257544 0.0299017 0.0641334\n1 120 120 2 1 0.208502 1 494 ADD -0.13184 0.225575 0.341597 0.252659\n1 121 121 2 1 0.183198 1 494 ADD -0.103698 0.245506 0.178411 0.172151\n1 122 122 2 1 0.187247 1 494 ADD -0.168238 0.260791 0.416162 0.284951\n1 123 123 2 1 0.187247 1 494 ADD 0.56839 0.259103 4.81224 1.54885\n1 124 124 2 1 0.194332 1 494 ADD 0.0990443 0.238431 0.172557 0.168867\n1 125 125 2 1 0.199393 1 494 ADD 0.0242139 0.233819 0.0107244 0.0373846\n1 126 126 2 1 0.192308 1 494 ADD -0.0635237 0.247483 0.0658841 0.0983093\n1 127 127 2 1 0.211538 1 494 ADD -0.310185 0.216771 2.04758 0.816876\n1 128 128 2 1 0.189271 1 494 ADD 0.130459 0.245538 0.282299 0.225338\n1 129 129 2 1 0.199393 1 494 ADD 0.011662 0.245723 0.00225244 0.0167587\n1 130 130 2 1 0.208502 1 494 ADD -0.481741 0.231767 4.3204 1.42414\n1 131 131 2 1 0.186235 1 494 ADD -0.0707131 0.251207 0.0792386 0.108835\n1 132 132 2 1 0.19332 1 494 ADD 0.0197285 0.251053 0.00617529 0.0280916\n1 133 133 2 1 0.19332 1 494 ADD -0.113898 0.226748 0.252316 0.210808\n1 134 134 2 1 0.181174 1 494 ADD 0.0272865 0.257303 0.0112462 0.0383207\n1 135 135 2 1 0.212551 1 494 ADD 0.551368 0.23462 5.52274 1.72652\n1 136 136 2 1 0.198381 1 494 ADD -0.0377658 0.237599 0.0252644 0.0586327\n1 137 137 2 1 0.210526 1 494 ADD 0.172952 0.22854 0.5727 0.347572\n1 138 138 2 1 0.206478 1 494 ADD 0.654425 0.227399 8.05732 2.34371\n1 139 139 2 1 0.188259 1 494 ADD 0.127514 0.255762 0.248566 0.208951\n1 140 140 2 1 0.195344 1 494 ADD 0.137092 0.229052 0.358224 0.260037\n1 141 141 2 1 0.197368 1 494 ADD 0.221022 0.239153 0.854121 0.449296\n1 142 142 2 1 0.190283 1 494 ADD -0.107879 0.243711 0.19594 0.181762\n1 143 143 2 1 0.220648 1 494 ADD -0.171045 0.236941 0.521126 0.327568\n1 144 144 2 1 0.188259 1 494 ADD -0.445567 0.244672 3.31632 1.16371\n1 145 145 2 1 0.200405 1 494 ADD 0.336883 0.247684 1.84995 0.759977\n1 146 146 2 1 0.20749 1 494 ADD -0.0354943 0.233717 0.0230641 0.0558676\n1 147 147 2 1 0.223684 1 494 ADD -0.0783441 0.219802 0.127043 0.141753\n1 148 148 2 1 0.192308 1 494 ADD 0.462904 0.238452 3.76859 1.28213\n1 149 149 2 1 0.197368 1 494 ADD 0.00482043 0.236012 0.000417162 0.00713525\n1 150 150 2 1 0.209514 1 494 ADD 0.0733423 0.242037 0.0918215 0.118117\n1 151 151 2 1 0.19332 1 494 ADD 0.137108 0.260469 0.277085 0.22285\n1 152 152 2 1 0.189271 1 494 ADD 0.269233 0.254198 1.12179 0.538302\n1 153 153 2 1 0.181174 1 494 ADD 0.0524316 0.246177 0.0453619 0.0802214\n1 154 154 2 1 0.204453 1 494 ADD -0.0532762 0.237997 0.0501098 0.0846675\n1 155 155 2 1 0.209514 1 494 ADD 0.0024961 0.235479 0.000112363 0.00368867\n1 156 156 2 1 0.186235 1 494 ADD -0.249799 0.236163 1.11881 0.537341\n1 157 157 2 1 0.208502 1 494 ADD -0.421515 0.249013 2.86538 1.04333\n1 158 158 2 1 0.189271 1 494 ADD -0.0767536 0.240251 0.102063 0.125305\n1 159 159 2 1 0.202429 1 494 ADD 0.364345 0.236085 2.38171 0.910932\n1 160 160 2 1 0.187247 1 494 ADD -0.130187 0.24101 0.291786 0.229827\n1 161 161 2 1 0.205466 1 494 ADD -0.324356 0.23566 1.8944 0.772869\n1 162 162 2 1 0.199393 1 494 ADD -0.251815 0.236096 1.13759 0.543388\n1 163 163 2 1 0.17915 1 494 ADD 0.549634 0.262562 4.38211 1.43988\n1 164 164 2 1 0.197368 1 494 ADD 0.0498387 0.249499 0.0399021 0.0748571\n1 165 165 2 1 0.188259 1 494 ADD -0.0048665 0.243378 0.000399827 0.00698424\n1 166 166 2 1 0.196356 1 494 ADD 0.189153 0.25008 0.572093 0.34734\n1 167 167 2 1 0.187247 1 494 ADD 0.114594 0.228898 0.250635 0.209976\n1 168 168 2 1 0.205466 1 494 ADD 0.20514 0.235785 0.756952 0.415348\n1 169 169 2 1 0.201417 1 494 ADD 0.371147 0.235378 2.48634 0.939912\n1 170 170 2 1 0.188259 1 494 ADD -0.0325784 0.243087 0.0179612 0.0489602\n1 171 171 2 1 0.205466 1 494 ADD 0.0806316 0.232338 0.12044 0.137536\n1 172 172 2 1 0.215587 1 494 ADD -0.0291194 0.246949 0.0139043 0.0428077\n1 173 173 2 1 0.210526 1 494 ADD -0.525094 0.246333 4.54389 1.48101\n1 174 174 2 1 0.20749 1 494 ADD -0.477343 0.241393 3.91031 1.31884\n1 175 175 2 1 0.22166 1 494 ADD 0.159839 0.239087 0.446944 0.297751\n1 176 176 2 1 0.182186 1 494 ADD -0.273088 0.248103 1.21156 0.566992\n1 177 177 2 1 0.212551 1 494 ADD 0.100509 0.240498 0.174657 0.17005\n1 178 178 2 1 0.219636 1 494 ADD 0.219401 0.239146 0.841688 0.445009\n1 179 179 2 1 0.194332 1 494 ADD -0.44054 0.248412 3.14505 1.11829\n1 180 180 2 1 0.198381 1 494 ADD -0.0222736 0.231566 0.0092519 0.0346232\n1 181 181 2 1 0.212551 1 494 ADD 0.0446498 0.225942 0.0390522 0.0739946\n1 182 182 2 1 0.189271 1 494 ADD -0.529348 0.232001 5.20601 1.64765\n1 183 183 2 1 0.200405 1 494 ADD -0.173841 0.232296 0.56004 0.342711\n1 184 184 2 1 0.197368 1 494 ADD -0.0869827 0.23773 0.133875 0.146029\n1 185 185 2 1 0.198381 1 494 ADD -0.31736 0.246738 1.65436 0.702532\n1 186 186 2 1 0.219636 1 494 ADD 0.415424 0.217999 3.63141 1.24643\n1 187 187 2 1 0.204453 1 494 ADD -0.127341 0.230953 0.30401 0.23554\n1 188 188 2 1 0.197368 1 494 ADD -0.131344 0.237589 0.30561 0.236283\n1 189 189 2 1 0.195344 1 494 ADD 0.282696 0.235248 1.44406 0.639249\n1 190 190 2 1 0.187247 1 494 ADD -0.190845 0.25003 0.582608 0.351355\n1 191 191 2 1 0.209514 1 494 ADD 0.579174 0.216042 7.07423 2.10679\n1 192 192 2 1 0.171053 1 494 ADD 0.125639 0.24471 0.263599 0.21634\n1 193 193 2 1 0.192308 1 494 ADD -0.102295 0.254299 0.161814 0.162733\n1 194 194 2 1 0.208502 1 494 ADD -0.0561295 0.236682 0.0562408 0.0901556\n1 195 195 2 1 0.217611 1 494 ADD 0.00759132 0.234183 0.00105081 0.0113785\n1 196 196 2 1 0.203441 1 494 ADD 0.336144 0.244164 1.89533 0.773138\n1 197 197 2 1 0.202429 1 494 ADD 0.326072 0.237557 1.88405 0.769871\n1 198 198 2 1 0.198381 1 494 ADD -0.124779 0.228909 0.297134 0.232336\n1 199 199 2 1 0.185223 1 494 ADD -0.526894 0.242737 4.71165 1.52348\n1 200 200 2 1 0.188259 1 494 ADD 0.104052 0.243099 0.183203 0.174811\n1 201 201 2 1 0.216599 1 494 ADD -0.233208 0.235642 0.979449 0.491693\n1 202 202 2 1 0.175101 1 494 ADD -0.0460542 0.241999 0.0362167 0.0710572\n1 203 203 2 1 0.222672 1 494 ADD -0.353509 0.24956 2.00655 0.80515\n1 204 204 2 1 0.183198 1 494 ADD -0.0277105 0.236089 0.0137764 0.0426013\n1 205 205 2 1 0.190283 1 494 ADD -0.225238 0.234208 0.924866 0.473402\n1 206 206 2 1 0.181174 1 494 ADD -0.118214 0.250118 0.223381 0.196218\n1 207 207 2 1 0.19332 1 494 ADD 0.194133 0.248703 0.609304 0.361461\n1 208 208 2 1 0.208502 1 494 ADD 0.0870885 0.248417 0.122902 0.139118\n1 209 209 2 1 0.176113 1 494 ADD -0.316245 0.235764 1.79925 0.745203\n1 210 210 2 1 0.210526 1 494 ADD -0.452566 0.2169 4.35358 1.43261\n1 211 211 2 1 0.191296 1 494 ADD 0.347337 0.250765 1.91853 0.779843\n1 212 212 2 1 0.209514 1 494 ADD -0.0637143 0.2245 0.0805453 0.109825\n1 213 213 2 1 0.180162 1 494 ADD 0.125401 0.257536 0.237096 0.203209\n1 214 214 2 1 0.192308 1 494 ADD -0.280003 0.254352 1.21187 0.56709\n1 215 215 2 1 0.202429 1 494 ADD 0.273454 0.240409 1.2938 0.592866\n1 216 216 2 1 0.211538 1 494 ADD 0.0877622 0.244877 0.128445 0.142638\n1 217 217 2 1 0.215587 1 494 ADD 0.147027 0.219577 0.44835 0.298329\n1 218 218 2 1 0.195344 1 494 ADD 0.11561 0.247524 0.218148 0.193512\n1 219 219 2 1 0.201417 1 494 ADD -0.165098 0.22573 0.534945 0.332981\n1 220 220 2 1 0.185223 1 494 ADD -0.227127 0.241249 0.886352 0.460337\n1 221 221 2 1 0.19332 1 494 ADD -0.245511 0.241253 1.03561 0.51026\n1 222 222 2 1 0.17915 1 494 ADD 0.126374 0.238576 0.280581 0.22452\n1 223 223 2 1 0.20749 1 494 ADD 0.0432905 0.235591 0.033765 0.0684366\n1 224 224 2 1 0.203441 1 494 ADD -0.239607 0.250357 0.915968 0.470395\n1 225 225 2 1 0.182186 1 494 ADD 0.208018 0.235924 0.777421 0.422588\n1 226 226 2 1 0.211538 1 494 ADD 0.00814029 0.217043 0.00140666 0.0131916\n1 227 227 2 1 0.203441 1 494 ADD 0.0233924 0.247722 0.00891698 0.0339673\n1 228 228 2 1 0.178138 1 494 ADD 0.00411336 0.243644 0.000285025 0.00588961\n1 229 229 2 1 0.208502 1 494 ADD 0.205819 0.237378 0.751776 0.413509\n1 230 230 2 1 0.171053 1 494 ADD -0.28898 0.249136 1.34543 0.608927\n1 231 231 2 1 0.188259 1 494 ADD 0.388264 0.241396 2.58699 0.967602\n1 232 232 2 1 0.181174 1 494 ADD 0.171533 0.247554 0.480128 0.311256\n1 233 233 2 1 0.201417 1 494 ADD 0.0469302 0.237934 0.0389036 0.073843\n1 234 234 2 1 0.197368 1 494 ADD 0.186589 0.241115 0.598854 0.35752\n1 235 235 2 1 0.186235 1 494 ADD 0.164584 0.243268 0.457722 0.302169\n1 236 236 2 1 0.196356 1 494 ADD 0.686872 0.246731 7.53337 2.21777\n1 237 237 2 1 0.216599 1 494 ADD -0.232181 0.237769 0.953545 0.483044\n1 238 238 2 1 0.190283 1 494 ADD 0.507344 0.241471 4.41444 1.44811\n1 239 239 2 1 0.17915 1 494 ADD -0.141981 0.252332 0.316603 0.241348\n1 240 240 2 1 0.217611 1 494 ADD 0.0184468 0.227884 0.00655256 0.0289639\n1 241 241 2 1 0.185223 1 494 ADD -0.440588 0.24769 3.1641 1.12335\n1 242 242 2 1 0.198381 1 494 ADD 0.256868 0.240376 1.14192 0.544779\n1 243 243 2 1 0.219636 1 494 ADD -0.0875526 0.243105 0.129703 0.143428\n1 244 244 2 1 0.196356 1 494 ADD 0.241806 0.23834 1.02929 0.508183\n1 245 245 2 1 0.187247 1 494 ADD -0.440328 0.23728 3.44375 1.19729\n1 246 246 2 1 0.196356 1 494 ADD -0.310991 0.233608 1.77224 0.737298\n1 247 247 2 1 0.189271 1 494 ADD 0.107724 0.255013 0.178445 0.17217\n1 248 248 2 1 0.183198 1 494 ADD 0.0316461 0.238284 0.0176381 0.0484949\n1 249 249 2 1 0.175101 1 494 ADD 0.27444 0.257176 1.13876 0.543765\n1 250 250 2 1 0.17915 1 494 ADD 0.0614279 0.252795 0.0590466 0.0925837\n1 251 251 2 1 0.226721 1 494 ADD -0.228603 0.237876 0.923556 0.472959\n1 252 252 2 1 0.197368 1 494 ADD 0.121706 0.230814 0.278035 0.223305\n1 253 253 2 1 0.209514 1 494 ADD 0.488668 0.224401 4.74218 1.53118\n1 254 254 2 1 0.209514 1 494 ADD 0.177626 0.229557 0.598737 0.357476\n1 255 255 2 1 0.212551 1 494 ADD 0.313204 0.240614 1.69439 0.71439\n1 256 256 2 1 0.195344 1 494 ADD -0.191068 0.237769 0.645748 0.375061\n1 257 257 2 1 0.186235 1 494 ADD 0.472663 0.24021 3.87186 1.3089\n1 258 258 2 1 0.212551 1 494 ADD 0.454249 0.230918 3.86965 1.30833\n1 259 259 2 1 0.218623 1 494 ADD -0.00686996 0.233375 0.000866564 0.0103208\n1 260 260 2 1 0.201417 1 494 ADD -0.633816 0.247666 6.84194 2.0504\n1 261 261 2 1 0.186235 1 494 ADD 0.0238221 0.239516 0.00989219 0.0358472\n1 262 262 2 1 0.225709 1 494 ADD -0.360946 0.250435 2.07727 0.825338\n1 263 263 2 1 0.187247 1 494 ADD -0.169709 0.240098 0.499613 0.319057\n1 264 264 2 1 0.183198 1 494 ADD -0.256245 0.242189 1.11944 0.537544\n1 265 265 2 1 0.20749 1 494 ADD 0.0243429 0.222907 0.0119261 0.0395108\n1 266 266 2 1 0.194332 1 494 ADD -0.662439 0.251044 7.3146 2.16497\n1 267 267 2 1 0.203441 1 494 ADD -0.239132 0.241563 0.979975 0.491868\n1 268 268 2 1 0.219636 1 494 ADD 0.0951827 0.225713 0.17783 0.171827\n1 269 269 2 1 0.184211 1 494 ADD 0.282981 0.231405 1.49544 0.654873\n1 270 270 2 1 0.191296 1 494 ADD 0.453645 0.255768 3.14586 1.1185\n1 271 271 2 1 0.200405 1 494 ADD -0.0808228 0.240269 0.113154 0.132779\n1 272 272 2 1 0.186235 1 494 ADD -0.042941 0.242151 0.0314466 0.0658816\n1 273 273 2 1 0.191296 1 494 ADD 0.0133505 0.243043 0.00301736 0.0194541\n1 274 274 2 1 0.177126 1 494 ADD 0.0551893 0.250015 0.0487279 0.0833925\n1 275 275 2 1 0.216599 1 494 ADD -0.210649 0.219884 0.917769 0.471004\n1 276 276 2 1 0.195344 1 494 ADD -0.155271 0.231073 0.451528 0.299634\n1 277 277 2 1 0.180162 1 494 ADD 0.159026 0.236068 0.453795 0.300563\n1 278 278 2 1 0.200405 1 494 ADD 0.0996347 0.235063 0.179661 0.172847\n1 279 279 2 1 0.210526 1 494 ADD 0.317616 0.23165 1.87992 0.768675\n1 280 280 2 1 0.202429 1 494 ADD 0.0963706 0.22526 0.18303 0.174715\n1 281 281 2 1 0.210526 1 494 ADD -0.315037 0.228802 1.89585 0.773289\n1 282 282 2 1 0.219636 1 494 ADD -0.076133 0.236931 0.103252 0.126121\n1 283 283 2 1 0.213563 1 494 ADD 0.0220752 0.223137 0.0097874 0.0356494\n1 284 284 2 1 0.185223 1 494 ADD 0.351599 0.252479 1.93929 0.785831\n1 285 285 2 1 0.202429 1 494 ADD -0.321853 0.245204 1.72291 0.722805\n1 286 286 2 1 0.219636 1 494 ADD -0.147106 0.223449 0.433414 0.292159\n1 287 287 2 1 0.192308 1 494 ADD 0.151032 0.246317 0.375968 0.267792\n1 288 288 2 1 0.200405 1 494 ADD -0.187541 0.240239 0.609402 0.361498\n1 289 289 2 1 0.189271 1 494 ADD -0.0840863 0.227902 0.136131 0.147423\n1 290 290 2 1 0.214575 1 494 ADD 0.292289 0.23618 1.53158 0.665798\n1 291 291 2 1 0.216599 1 494 ADD 0.268234 0.219577 1.4923 0.653921\n1 292 292 2 1 0.19332 1 494 ADD -0.344072 0.235297 2.13829 0.842658\n1 293 293 2 1 0.205466 1 494 ADD -0.0864626 0.230699 0.140464 0.150077\n1 294 294 2 1 0.184211 1 494 ADD 0.489442 0.262504 3.47641 1.20586\n1 295 295 2 1 0.203441 1 494 ADD 0.331204 0.231665 2.04395 0.815841\n1 296 296 2 1 0.209514 1 494 ADD -0.0805619 0.239349 0.113291 0.13287\n1 297 297 2 1 0.215587 1 494 ADD 0.165862 0.22921 0.523633 0.328553\n1 298 298 2 1 0.20749 1 494 ADD -0.188592 0.237428 0.630932 0.369558\n1 299 299 2 1 0.216599 1 494 ADD 0.176591 0.233697 0.570993 0.346918\n1 300 300 2 1 0.202429 1 494 ADD -0.144656 0.248427 0.339061 0.251524\n1 301 301 2 1 0.209514 1 494 ADD -0.0933582 0.244644 0.145625 0.153198\n1 302 302 2 1 0.209514 1 494 ADD -0.154235 0.239447 0.414902 0.284421\n1 303 303 2 1 0.187247 1 494 ADD 0.112938 0.226548 0.248519 0.208927\n1 304 304 2 1 0.19332 1 494 ADD -0.0369757 0.234443 0.0248747 0.058151\n1 305 305 2 1 0.185223 1 494 ADD -0.160382 0.230493 0.484166 0.31288\n1 306 306 2 1 0.195344 1 494 ADD 0.148343 0.227192 0.426332 0.289211\n1 307 307 2 1 0.202429 1 494 ADD -0.364964 0.246993 2.18339 0.855404\n1 308 308 2 1 0.189271 1 494 ADD 0.0113414 0.230555 0.00241983 0.0173822\n1 309 309 2 1 0.200405 1 494 ADD -0.550167 0.232754 5.58719 1.74251\n1 310 310 2 1 0.208502 1 494 ADD -0.313848 0.246007 1.62758 0.694567\n1 311 311 2 1 0.201417 1 494 ADD -0.0230018 0.231591 0.00986463 0.0357953\n1 312 312 2 1 0.204453 1 494 ADD -0.247301 0.217641 1.29114 0.592034\n1 313 313 2 1 0.208502 1 494 ADD 0.0489485 0.230112 0.0452481 0.0801124\n1 314 314 2 1 0.197368 1 494 ADD -0.236284 0.243026 0.94528 0.480272\n1 315 315 2 1 0.186235 1 494 ADD -0.0544366 0.238678 0.0520186 0.0864048\n1 316 316 2 1 0.191296 1 494 ADD -0.156108 0.233868 0.445563 0.297182\n1 317 317 2 1 0.200405 1 494 ADD -0.170493 0.230598 0.546638 0.337531\n1 318 318 2 1 0.201417 1 494 ADD -0.0836236 0.240343 0.121059 0.137935\n1 319 319 2 1 0.218623 1 494 ADD -0.264531 0.224167 1.39255 0.623468\n1 320 320 2 1 0.183198 1 494 ADD -0.207568 0.226662 0.838617 0.443948\n1 321 321 2 1 0.178138 1 494 ADD -0.0122218 0.240989 0.00257204 0.0179313\n1 322 322 2 1 0.194332 1 494 ADD -0.000149906 0.246105 3.71023e-07 0.00021112\n1 323 323 2 1 0.201417 1 494 ADD 0.32419 0.254198 1.62651 0.694247\n1 324 324 2 1 0.210526 1 494 ADD 0.0770296 0.248347 0.0962048 0.12123\n1 325 325 2 1 0.19332 1 494 ADD 0.313602 0.236354 1.76048 0.733851\n1 326 326 2 1 0.185223 1 494 ADD -0.0525833 0.240257 0.047901 0.0826222\n1 327 327 2 1 0.200405 1 494 ADD -0.0591845 0.238504 0.0615781 0.0947337\n1 328 328 2 1 0.210526 1 494 ADD 0.354506 0.233132 2.3123 0.891591\n1 329 329 2 1 0.208502 1 494 ADD 0.111997 0.229837 0.237451 0.203389\n1 330 330 2 1 0.17915 1 494 ADD -0.12819 0.236699 0.2933 0.230539\n1 331 331 2 1 0.19332 1 494 ADD 0.408404 0.23602 2.99421 1.07799\n1 332 332 2 1 0.188259 1 494 ADD 0.195833 0.246878 0.629226 0.368922\n1 333 333 2 1 0.209514 1 494 ADD -0.151887 0.226026 0.451569 0.29965\n1 334 334 2 1 0.19332 1 494 ADD 0.0416929 0.230151 0.032817 0.0674014\n1 335 335 2 1 0.212551 1 494 ADD 0.0613165 0.225989 0.0736173 0.1045\n1 336 336 2 1 0.186235 1 494 ADD 0.129016 0.238361 0.292966 0.230382\n1 337 337 2 1 0.218623 1 494 ADD 0.152594 0.23628 0.417082 0.285338\n1 338 338 2 1 0.190283 1 494 ADD -0.239065 0.254347 0.883446 0.459346\n1 339 339 2 1 0.19332 1 494 ADD -0.135831 0.242476 0.313803 0.240064\n1 340 340 2 1 0.192308 1 494 ADD -0.263052 0.233923 1.26455 0.583707\n1 341 341 2 1 0.191296 1 494 ADD -0.184736 0.232458 0.631558 0.369791\n1 342 342 2 1 0.197368 1 494 ADD -0.280695 0.252277 1.23798 0.575345\n1 343 343 2 1 0.216599 1 494 ADD 0.119036 0.229639 0.268699 0.218815\n1 344 344 2 1 0.17915 1 494 ADD 0.643606 0.228023 7.76068 2.27249\n1 345 345 2 1 0.178138 1 494 ADD -0.249486 0.256349 0.947174 0.480908\n1 346 346 2 1 0.226721 1 494 ADD -0.119133 0.22382 0.283311 0.225819\n1 347 347 2 1 0.183198 1 494 ADD -0.271723 0.256981 1.11802 0.537086\n1 348 348 2 1 0.190283 1 494 ADD -0.132357 0.24202 0.299083 0.233247\n1 349 349 2 1 0.185223 1 494 ADD -0.406789 0.238202 2.9164 1.05708\n1 350 350 2 1 0.197368 1 494 ADD -0.309055 0.244167 1.60213 0.686973\n1 351 351 2 1 0.211538 1 494 ADD -0.0346362 0.242827 0.0203455 0.0522838\n1 352 352 2 1 0.197368 1 494 ADD 0.222792 0.243363 0.83809 0.443766\n1 353 353 2 1 0.195344 1 494 ADD 0.220816 0.259398 0.724649 0.403818\n1 354 354 2 1 0.214575 1 494 ADD 0.423568 0.232507 3.31876 1.16435\n1 355 355 2 1 0.218623 1 494 ADD 0.15006 0.229652 0.426959 0.289472\n1 356 356 2 1 0.201417 1 494 ADD 0.137602 0.227091 0.367158 0.263956\n1 357 357 2 1 0.189271 1 494 ADD -0.357142 0.233423 2.34096 0.89959\n1 358 358 2 1 0.213563 1 494 ADD -0.415095 0.244066 2.89255 1.05066\n1 359 359 2 1 0.180162 1 494 ADD 0.18873 0.242497 0.605714 0.360109\n1 360 360 2 1 0.20749 1 494 ADD 0.194504 0.239236 0.661007 0.380693\n1 361 361 2 1 0.19332 1 494 ADD -0.231122 0.239437 0.931756 0.475724\n1 362 362 2 1 0.196356 1 494 ADD -0.000245258 0.23587 1.08119e-06 0.000360459\n1 363 363 2 1 0.214575 1 494 ADD -0.168907 0.234424 0.519147 0.326789\n1 364 364 2 1 0.201417 1 494 ADD 0.340148 0.254778 1.78242 0.740282\n1 365 365 2 1 0.17915 1 494 ADD 0.259736 0.243495 1.13786 0.543473\n1 366 366 2 1 0.205466 1 494 ADD -0.326632 0.231234 1.99532 0.801932\n1 367 367 2 1 0.162955 1 494 ADD -0.568275 0.268885 4.46667 1.4614\n1 368 368 2 1 0.187247 1 494 ADD 0.0128547 0.244311 0.00276847 0.0186174\n1 369 369 2 1 0.198381 1 494 ADD 0.0975411 0.228457 0.182291 0.174307\n1 370 370 2 1 0.212551 1 494 ADD -0.084123 0.226825 0.137546 0.148294\n1 371 371 2 1 0.202429 1 494 ADD 0.0480171 0.223419 0.0461905 0.0810109\n1 372 372 2 1 0.20749 1 494 ADD 0.56381 0.218129 6.54312 1.97761\n1 373 373 2 1 0.191296 1 494 ADD -0.0834276 0.243188 0.117688 0.135753\n1 374 374 2 1 0.182186 1 494 ADD -0.363118 0.23859 2.31629 0.892704\n1 375 375 2 1 0.196356 1 494 ADD 0.0651676 0.237644 0.0751985 0.105732\n1 376 376 2 1 0.19332 1 494 ADD -0.241227 0.235149 1.05236 0.515752\n1 377 377 2 1 0.205466 1 494 ADD 0.341688 0.249721 1.87218 0.766432\n1 378 378 2 1 0.204453 1 494 ADD 0.158864 0.248605 0.408348 0.281657\n1 379 379 2 1 0.199393 1 494 ADD -0.0481929 0.244 0.0390108 0.0739524\n1 380 380 2 1 0.195344 1 494 ADD -0.0734079 0.248604 0.0871903 0.114763\n1 381 381 2 1 0.210526 1 494 ADD -0.0105475 0.228929 0.00212273 0.0162601\n1 382 382 2 1 0.194332 1 494 ADD 0.0416356 0.239626 0.0301899 0.0644624\n1 383 383 2 1 0.197368 1 494 ADD -0.0417624 0.234217 0.0317932 0.0662686\n1 384 384 2 1 0.197368 1 494 ADD -0.0227064 0.258763 0.00770003 0.0314818\n1 385 385 2 1 0.19332 1 494 ADD -0.17419 0.2265 0.591438 0.354712\n1 386 386 2 1 0.209514 1 494 ADD 0.204922 0.236581 0.750272 0.412974\n1 387 387 2 1 0.216599 1 494 ADD -0.433229 0.242266 3.19781 1.13231\n1 388 388 2 1 0.182186 1 494 ADD -0.0664087 0.261258 0.0646118 0.0972631\n1 389 389 2 1 0.201417 1 494 ADD -0.0964735 0.24814 0.151155 0.156497\n1 390 390 2 1 0.211538 1 494 ADD -0.21901 0.235598 0.864138 0.452738\n1 391 391 2 1 0.192308 1 494 ADD -0.336766 0.239517 1.9769 0.796649\n1 392 392 2 1 0.218623 1 494 ADD -0.191544 0.228947 0.699952 0.394911\n1 393 393 2 1 0.190283 1 494 ADD 0.0611308 0.245398 0.0620552 0.0951349\n1 394 394 2 1 0.194332 1 494 ADD 0.0518504 0.246149 0.0443718 0.0792698\n1 395 395 2 1 0.172065 1 494 ADD -0.224502 0.261258 0.738422 0.40875\n1 396 396 2 1 0.210526 1 494 ADD 0.0912253 0.234707 0.15107 0.156447\n1 397 397 2 1 0.178138 1 494 ADD -0.162788 0.252472 0.415737 0.284773\n1 398 398 2 1 0.231781 1 494 ADD 0.01235 0.235618 0.00274739 0.0185449\n1 399 399 2 1 0.175101 1 494 ADD -0.0479172 0.267481 0.0320921 0.066601\n1 400 400 2 1 0.209514 1 494 ADD 0.0439179 0.233626 0.0353379 0.0701269\n1 401 401 2 1 0.200405 1 494 ADD -0.329615 0.247943 1.76729 0.735849\n1 402 402 2 1 0.203441 1 494 ADD 0.272774 0.22381 1.48541 0.651832\n1 403 403 2 1 0.195344 1 494 ADD -0.0152635 0.246255 0.00384184 0.0220128\n1 404 404 2 1 0.17915 1 494 ADD -0.140991 0.243505 0.33525 0.249812\n1 405 405 2 1 0.22166 1 494 ADD -0.256102 0.22605 1.28357 0.589667\n1 406 406 2 1 0.191296 1 494 ADD -0.723328 0.262164 8.04727 2.3413\n1 407 407 2 1 0.204453 1 494 ADD 0.00669707 0.228699 0.000857517 0.0102661\n1 408 408 2 1 0.195344 1 494 ADD 0.00669866 0.229694 0.000850504 0.0102236\n1 409 409 2 1 0.225709 1 494 ADD 0.100527 0.224966 0.199679 0.183772\n1 410 410 2 1 0.204453 1 494 ADD -0.0269847 0.230987 0.0136477 0.0423928\n1 411 411 2 1 0.205466 1 494 ADD -0.469791 0.240515 3.81526 1.29424\n1 412 412 2 1 0.19332 1 494 ADD 0.0515073 0.229386 0.05042 0.0849517\n1 413 413 2 1 0.20749 1 494 ADD 0.182291 0.248882 0.53647 0.333576\n1 414 414 2 1 0.190283 1 494 ADD 0.289216 0.2395 1.45825 0.643574\n1 415 415 2 1 0.199393 1 494 ADD 0.113183 0.232928 0.236113 0.202713\n1 416 416 2 1 0.203441 1 494 ADD 0.252299 0.243939 1.06972 0.52142\n1 417 417 2 1 0.197368 1 494 ADD 0.686466 0.233128 8.5754 2.46758\n1 418 418 2 1 0.19332 1 494 ADD -0.223279 0.240453 0.862256 0.452092\n1 419 419 2 1 0.184211 1 494 ADD 0.0289128 0.254994 0.0128565 0.0410902\n1 420 420 2 1 0.208502 1 494 ADD 0.258976 0.231243 1.25423 0.580465\n1 421 421 2 1 0.190283 1 494 ADD 0.261297 0.236457 1.22113 0.570024\n1 422 422 2 1 0.211538 1 494 ADD 0.363307 0.228262 2.53327 0.952846\n1 423 423 2 1 0.188259 1 494 ADD 0.0530746 0.237276 0.0500343 0.0845983\n1 424 424 2 1 0.211538 1 494 ADD -0.139296 0.219001 0.404565 0.280055\n1 425 425 2 1 0.202429 1 494 ADD -0.0346592 0.247346 0.0196348 0.0513124\n1 426 426 2 1 0.209514 1 494 ADD 0.0378373 0.226134 0.0279968 0.0619226\n1 427 427 2 1 0.19332 1 494 ADD 0.09836 0.262449 0.140459 0.150074\n1 428 428 2 1 0.192308 1 494 ADD 0.25426 0.241674 1.10687 0.533483\n1 429 429 2 1 0.22166 1 494 ADD 0.14721 0.237802 0.383212 0.270925\n1 430 430 2 1 0.225709 1 494 ADD 0.278268 0.223631 1.54832 0.670839\n1 431 431 2 1 0.200405 1 494 ADD -0.26871 0.219408 1.4999 0.656223\n1 432 432 2 1 0.194332 1 494 ADD -0.226225 0.237684 0.905906 0.466987\n1 433 433 2 1 0.187247 1 494 ADD -0.117095 0.24086 0.236346 0.202831\n1 434 434 2 1 0.229757 1 494 ADD 0.0478362 0.221533 0.0466272 0.0814246\n1 435 435 2 1 0.215587 1 494 ADD -0.11193 0.230876 0.235038 0.202169\n1 436 436 2 1 0.198381 1 494 ADD -0.102686 0.235687 0.189825 0.178446\n1 437 437 2 1 0.187247 1 494 ADD 0.728619 0.24406 8.84078 2.53081\n1 438 438 2 1 0.19332 1 494 ADD 0.0639365 0.257064 0.061861 0.0949717\n1 439 439 2 1 0.211538 1 494 ADD 0.0643578 0.227473 0.0800465 0.109448\n1 440 440 2 1 0.208502 1 494 ADD -0.178705 0.230782 0.599609 0.357805\n1 441 441 2 1 0.17915 1 494 ADD -0.149983 0.246238 0.371001 0.265633\n1 442 442 2 1 0.192308 1 494 ADD -0.121635 0.23594 0.265772 0.217397\n1 443 443 2 1 0.210526 1 494 ADD 0.316958 0.234743 1.82314 0.752174\n1 444 444 2 1 0.212551 1 494 ADD -0.0059142 0.2386 0.000614398 0.00867431\n1 445 445 2 1 0.183198 1 494 ADD 0.382624 0.260668 2.1546 0.847274\n1 446 446 2 1 0.211538 1 494 ADD -0.210875 0.227938 0.855887 0.449903\n1 447 447 2 1 0.228745 1 494 ADD -0.284376 0.231705 1.50632 0.658167\n1 448 448 2 1 0.212551 1 494 ADD -0.0950486 0.23883 0.158385 0.160743\n1 449 449 2 1 0.190283 1 494 ADD 0.461651 0.231861 3.96434 1.33279\n1 450 450 2 1 0.199393 1 494 ADD 0.45498 0.234569 3.76221 1.28047\n1 451 451 2 1 0.219636 1 494 ADD 0.334421 0.221311 2.28338 0.883504\n1 452 452 2 1 0.205466 1 494 ADD -0.0479219 0.231013 0.0430323 0.077968\n1 453 453 2 1 0.203441 1 494 ADD 0.234922 0.240263 0.956035 0.483877\n1 454 454 2 1 0.196356 1 494 ADD -0.155836 0.234057 0.443291 0.296246\n1 455 455 2 1 0.209514 1 494 ADD -0.330102 0.237573 1.93065 0.783339\n1 456 456 2 1 0.173077 1 494 ADD 0.144614 0.26702 0.293312 0.230545\n1 457 457 2 1 0.210526 1 494 ADD 0.145348 0.234285 0.384882 0.271644\n1 458 458 2 1 0.196356 1 494 ADD 0.140847 0.234703 0.360127 0.260874\n1 459 459 2 1 0.208502 1 494 ADD 0.193186 0.233238 0.686043 0.389858\n1 460 460 2 1 0.203441 1 494 ADD 0.21919 0.23817 0.846969 0.446832\n1 461 461 2 1 0.215587 1 494 ADD 0.0552649 0.217075 0.0648158 0.0974314\n1 462 462 2 1 0.219636 1 494 ADD 0.502447 0.232709 4.66178 1.51087\n1 463 463 2 1 0.181174 1 494 ADD 0.304298 0.254609 1.42841 0.634466\n1 464 464 2 1 0.189271 1 494 ADD 0.116401 0.240485 0.234283 0.201787\n1 465 465 2 1 0.20749 1 494 ADD 0.492254 0.244773 4.04438 1.35341\n1 466 466 2 1 0.187247 1 494 ADD -0.286202 0.233459 1.50287 0.657124\n1 467 467 2 1 0.222672 1 494 ADD -0.327509 0.237497 1.90165 0.774965\n1 468 468 2 1 0.201417 1 494 ADD 0.314591 0.245737 1.6389 0.697936\n1 469 469 2 1 0.209514 1 494 ADD -0.0718081 0.240121 0.0894309 0.116394\n1 470 470 2 1 0.20749 1 494 ADD -0.146232 0.238125 0.377114 0.268289\n1 471 471 2 1 0.201417 1 494 ADD -0.233836 0.230135 1.03242 0.509212\n1 472 472 2 1 0.205466 1 494 ADD -0.17052 0.23098 0.545008 0.336898\n1 473 473 2 1 0.206478 1 494 ADD 0.26603 0.240309 1.22553 0.571414\n1 474 474 2 1 0.206478 1 494 ADD -0.321393 0.238402 1.81741 0.750504\n1 475 475 2 1 0.197368 1 494 ADD 0.16096 0.234656 0.470515 0.307373\n1 476 476 2 1 0.177126 1 494 ADD 0.318222 0.264096 1.4519 0.641639\n1 477 477 2 1 0.204453 1 494 ADD -0.246297 0.23133 1.13359 0.542103\n1 478 478 2 1 0.190283 1 494 ADD -0.319282 0.244343 1.70746 0.718251\n1 479 479 2 1 0.200405 1 494 ADD 0.088342 0.23918 0.136422 0.147603\n1 480 480 2 1 0.202429 1 494 ADD -0.367588 0.241076 2.32495 0.895122\n1 481 481 2 1 0.214575 1 494 ADD -0.0688842 0.215156 0.102502 0.125607\n1 482 482 2 1 0.224696 1 494 ADD -0.247391 0.229359 1.16341 0.551666\n1 483 483 2 1 0.209514 1 494 ADD -0.0761799 0.245489 0.0962979 0.121295\n1 484 484 2 1 0.192308 1 494 ADD 0.239937 0.245257 0.957087 0.48423\n1 485 485 2 1 0.200405 1 494 ADD -0.176733 0.243536 0.526638 0.329732\n1 486 486 2 1 0.20749 1 494 ADD -0.0285663 0.23836 0.0143628 0.0435405\n1 487 487 2 1 0.189271 1 494 ADD 0.241237 0.239242 1.01675 0.504051\n1 488 488 2 1 0.175101 1 494 ADD 0.199374 0.24945 0.638806 0.372487\n1 489 489 2 1 0.184211 1 494 ADD -0.0551669 0.240631 0.0525597 0.0868924\n1 490 490 2 1 0.206478 1 494 ADD 0.0445959 0.242774 0.0337432 0.0684129\n1 491 491 2 1 0.194332 1 494 ADD -0.172776 0.241915 0.510081 0.323212\n1 492 492 2 1 0.208502 1 494 ADD -0.412855 0.235595 3.07089 1.09851\n1 493 493 2 1 0.186235 1 494 ADD -0.391253 0.247675 2.49547 0.942432\n1 494 494 2 1 0.197368 1 494 ADD -0.655712 0.250739 7.25122 2.14964\n1 495 495 2 1 0.189271 1 494 ADD -0.249574 0.25173 0.982941 0.492854\n1 496 496 2 1 0.199393 1 494 ADD 0.306995 0.239526 1.64269 0.699064\n1 497 497 2 1 0.217611 1 494 ADD -0.130407 0.225958 0.33308 0.248836\n1 498 498 2 1 0.201417 1 494 ADD -0.225674 0.236261 0.912386 0.469183\n1 499 499 2 1 0.197368 1 494 ADD -0.102442 0.22702 0.203622 0.185877\n1 500 500 2 1 0.192308 1 494 ADD 0.119692 0.231191 0.268035 0.218494\n1 501 501 2 1 0.0941296 1 494 ADD -0.436143 0.326923 1.77978 0.739508\n1 502 502 2 1 0.090081 1 494 ADD 0.310088 0.331368 0.875688 0.456695\n1 503 503 2 1 0.110324 1 494 ADD 0.00955919 0.295072 0.00104951 0.0113714\n1 504 504 2 1 0.105263 1 494 ADD -0.198829 0.29301 0.460463 0.303288\n1 505 505 2 1 0.090081 1 494 ADD 0.102455 0.337407 0.0922059 0.118392\n1 506 506 2 1 0.0921053 1 494 ADD -0.440926 0.344414 1.63897 0.697958\n1 507 507 2 1 0.0991903 1 494 ADD -0.276535 0.311384 0.788693 0.426553\n1 508 508 2 1 0.11336 1 494 ADD -0.381384 0.29551 1.66564 0.70588\n1 509 509 2 1 0.118421 1 494 ADD 0.0492791 0.295938 0.0277283 0.0616058\n1 510 510 2 1 0.0850202 1 494 ADD 0.175166 0.356854 0.240946 0.205147\n1 511 511 2 1 0.102227 1 494 ADD 0.442525 0.309508 2.04424 0.815924\n1 512 512 2 1 0.0840081 1 494 ADD 0.404764 0.340089 1.4165 0.630821\n1 513 513 2 1 0.0941296 1 494 ADD -0.187753 0.353107 0.282722 0.22554\n1 514 514 2 1 0.0961538 1 494 ADD -0.290289 0.335819 0.747222 0.411889\n1 515 515 2 1 0.120445 1 494 ADD -0.273577 0.293766 0.867278 0.453815\n1 516 516 2 1 0.118421 1 494 ADD -0.0311153 0.272691 0.0130198 0.0413621\n1 517 517 2 1 0.107287 1 494 ADD 0.0467026 0.302144 0.0238922 0.0569214\n1 518 518 2 1 0.090081 1 494 ADD -0.274852 0.328647 0.699418 0.394717\n1 519 519 2 1 0.097166 1 494 ADD 0.0331264 0.314412 0.0111007 0.0380618\n1 520 520 2 1 0.0961538 1 494 ADD 0.67885 0.32347 4.40432 1.44553\n1 521 521 2 1 0.0981781 1 494 ADD -0.22432 0.321376 0.487203 0.314099\n1 522 522 2 1 0.101215 1 494 ADD -0.336213 0.330141 1.03713 0.510757\n1 523 523 2 1 0.118421 1 494 ADD -0.194195 0.288911 0.451804 0.299747\n1 524 524 2 1 0.097166 1 494 ADD 0.0436768 0.318337 0.0188247 0.0501856\n1 525 525 2 1 0.0981781 1 494 ADD 0.0439357 0.304233 0.0208556 0.0529718\n1 526 526 2 1 0.107287 1 494 ADD -0.154487 0.304537 0.257339 0.213281\n1 527 527 2 1 0.102227 1 494 ADD -0.497902 0.309017 2.59611 0.970103\n1 528 528 2 1 0.112348 1 494 ADD 0.419456 0.301802 1.93164 0.783626\n1 529 529 2 1 0.107287 1 494 ADD 0.202182 0.315614 0.41037 0.282511\n1 530 530 2 1 0.103239 1 494 ADD -0.284723 0.314279 0.820758 0.437756\n1 531 531 2 1 0.097166 1 494 ADD 0.45056 0.306155 2.16583 0.850446\n1 532 532 2 1 0.111336 1 494 ADD -0.12298 0.302944 0.164796 0.16445\n1 533 533 2 1 0.103239 1 494 ADD -0.00615803 0.331467 0.000345145 0.00648543\n1 534 534 2 1 0.0910931 1 494 ADD 0.0614936 0.35137 0.0306289 0.064961\n1 535 535 2 1 0.105263 1 494 ADD -0.56162 0.308947 3.30458 1.1606\n1 536 536 2 1 0.123482 1 494 ADD 0.0686062 0.280172 0.0599623 0.0933657\n1 537 537 2 1 0.0931174 1 494 ADD -0.261754 0.334214 0.613392 0.362997\n1 538 538 2 1 0.097166 1 494 ADD -0.591172 0.340475 3.01479 1.0835\n1 539 539 2 1 0.0809717 1 494 ADD 0.288105 0.339798 0.718885 0.401746\n1 540 540 2 1 0.102227 1 494 ADD -0.415544 0.317059 1.71772 0.721276\n1 541 541 2 1 0.0910931 1 494 ADD -0.208688 0.314065 0.441523 0.295516\n1 542 542 2 1 0.110324 1 494 ADD -0.275261 0.33381 0.679969 0.387643\n1 543 543 2 1 0.109312 1 494 ADD 0.260132 0.306144 0.721994 0.402864\n1 544 544 2 1 0.0961538 1 494 ADD -0.220378 0.310263 0.504515 0.321006\n1 545 545 2 1 0.0951417 1 494 ADD 0.358749 0.329067 1.18854 0.559682\n1 546 546 2 1 0.0991903 1 494 ADD -0.180057 0.334088 0.29047 0.229207\n1 547 547 2 1 0.097166 1 494 ADD 0.377218 0.297313 1.60975 0.689249\n1 548 548 2 1 0.0941296 1 494 ADD 0.0172713 0.33197 0.00270679 0.0184045\n1 549 549 2 1 0.0951417 1 494 ADD -0.353715 0.305211 1.34309 0.608202\n1 550 550 2 1 0.0961538 1 494 ADD 0.488113 0.28927 2.8473 1.03845\n1 551 551 2 1 0.103239 1 494 ADD -0.327791 0.311237 1.10921 0.534239\n1 552 552 2 1 0.0931174 1 494 ADD 0.528817 0.333953 2.50749 0.945747\n1 553 553 2 1 0.0981781 1 494 ADD -0.197011 0.317247 0.385644 0.271973\n1 554 554 2 1 0.100202 1 494 ADD 0.824718 0.296976 7.50068 2.20989\n1 555 555 2 1 0.105263 1 494 ADD 0.0664456 0.315261 0.0444214 0.0793177\n1 556 556 2 1 0.0961538 1 494 ADD -0.59352 0.313385 3.58686 1.23479\n1 557 557 2 1 0.082996 1 494 ADD -0.820179 0.360396 5.17913 1.64093\n1 558 558 2 1 0.0991903 1 494 ADD -0.0604722 0.279883 0.0466831 0.0814775\n1 559 559 2 1 0.0981781 1 494 ADD -0.22426 0.318886 0.494576 0.317049\n1 560 560 2 1 0.107287 1 494 ADD 0.0816086 0.322466 0.0640477 0.0967965\n1 561 561 2 1 0.0789474 1 494 ADD -0.324927 0.308107 1.11217 0.535196\n1 562 562 2 1 0.101215 1 494 ADD 0.726097 0.328889 4.87406 1.56442\n1 563 563 2 1 0.0991903 1 494 ADD -0.282247 0.318377 0.785917 0.425578\n1 564 564 2 1 0.107287 1 494 ADD -0.314569 0.301737 1.08686 0.526999\n1 565 565 2 1 0.103239 1 494 ADD -0.0507034 0.310511 0.0266636 0.060336\n1 566 566 2 1 0.0981781 1 494 ADD 0.180708 0.317846 0.323236 0.244377\n1 567 567 2 1 0.101215 1 494 ADD 0.0743624 0.29151 0.065073 0.0976433\n1 568 568 2 1 0.0981781 1 494 ADD 0.511137 0.338147 2.28488 0.883922\n1 569 569 2 1 0.0951417 1 494 ADD 0.0731738 0.331255 0.0487962 0.0834559\n1 570 570 2 1 0.100202 1 494 ADD -0.460413 0.31248 2.17096 0.851895\n1 571 571 2 1 0.0890688 1 494 ADD 0.180927 0.3386 0.285517 0.226867\n1 572 572 2 1 0.119433 1 494 ADD -0.228535 0.294676 0.601473 0.358509\n1 573 573 2 1 0.0921053 1 494 ADD 0.375456 0.342802 1.19959 0.563195\n1 574 574 2 1 0.090081 1 494 ADD 0.00581415 0.338572 0.000294897 0.00599143\n1 575 575 2 1 0.0931174 1 494 ADD -0.0765169 0.310069 0.0608972 0.0941591\n1 576 576 2 1 0.116397 1 494 ADD -0.146635 0.287723 0.259734 0.214455\n1 577 577 2 1 0.0921053 1 494 ADD -0.381399 0.345533 1.21838 0.569152\n1 578 578 2 1 0.0850202 1 494 ADD -0.351122 0.363064 0.935301 0.476918\n1 579 579 2 1 0.107287 1 494 ADD 0.424554 0.302515 1.96956 0.794541\n1 580 580 2 1 0.109312 1 494 ADD -0.587852 0.291894 4.05589 1.35638\n1 581 581 2 1 0.102227 1 494 ADD 0.0873115 0.29797 0.0858615 0.113788\n1 582 582 2 1 0.100202 1 494 ADD -0.198621 0.337732 0.345866 0.254564\n1 583 583 2 1 0.105263 1 494 ADD 0.408208 0.33393 1.49435 0.654542\n1 584 584 2 1 0.102227 1 494 ADD -0.40841 0.305103 1.79185 0.74304\n1 585 585 2 1 0.102227 1 494 ADD 0.196912 0.304119 0.419234 0.286241\n1 586 586 2 1 0.0910931 1 494 ADD -0.349025 0.348287 1.00424 0.499921\n1 587 587 2 1 0.118421 1 494 ADD 0.0416863 0.294488 0.0200379 0.0518653\n1 588 588 2 1 0.100202 1 494 ADD 0.340765 0.322271 1.11806 0.5371\n1 589 589 2 1 0.105263 1 494 ADD 0.177953 0.303563 0.343646 0.253574\n1 590 590 2 1 0.0910931 1 494 ADD 0.296492 0.310501 0.911801 0.468985\n1 591 591 2 1 0.0961538 1 494 ADD 0.013249 0.34901 0.00144109 0.0133544\n1 592 592 2 1 0.102227 1 494 ADD 0.465436 0.347409 1.79489 0.743928\n1 593 593 2 1 0.0991903 1 494 ADD 0.0934979 0.335674 0.077583 0.107571\n1 594 594 2 1 0.0931174 1 494 ADD -0.169601 0.319473 0.28183 0.225115\n1 595 595 2 1 0.0880567 1 494 ADD -0.201814 0.345762 0.340683 0.25225\n1 596 596 2 1 0.097166 1 494 ADD -0.167482 0.331951 0.254558 0.211914\n1 597 597 2 1 0.124494 1 494 ADD 0.190094 0.278027 0.467482 0.306143\n1 598 598 2 1 0.0921053 1 494 ADD -0.37021 0.32461 1.30069 0.595016\n1 599 599 2 1 0.0840081 1 494 ADD 0.573399 0.345478 2.75469 1.01336\n1 600 600 2 1 0.0951417 1 494 ADD 0.453057 0.322865 1.96908 0.794401\n1 601 601 2 1 0.0880567 1 494 ADD -0.308767 0.347657 0.788789 0.426587\n1 602 602 2 1 0.104251 1 494 ADD -0.0118625 0.308863 0.00147508 0.0135134\n1 603 603 2 1 0.0981781 1 494 ADD 0.00980135 0.317011 0.000955924 0.0108462\n1 604 604 2 1 0.102227 1 494 ADD 0.6312 0.314387 4.03093 1.34995\n1 605 605 2 1 0.100202 1 494 ADD 0.354484 0.317973 1.24283 0.576874\n1 606 606 2 1 0.0890688 1 494 ADD 0.226357 0.362994 0.388858 0.273354\n1 607 607 2 1 0.0991903 1 494 ADD 0.391518 0.308032 1.61552 0.690971\n1 608 608 2 1 0.0860324 1 494 ADD -0.203021 0.343307 0.349716 0.256275\n1 609 609 2 1 0.105263 1 494 ADD 0.254146 0.33115 0.589003 0.353788\n1 610 610 2 1 0.0870445 1 494 ADD -0.199706 0.317443 0.395779 0.276317\n1 611 611 2 1 0.0931174 1 494 ADD -0.747393 0.354263 4.45087 1.45738\n1 612 612 2 1 0.0991903 1 494 ADD -0.063561 0.298737 0.0452692 0.0801326\n1 613 613 2 1 0.0931174 1 494 ADD 0.455252 0.338464 1.80917 0.748098\n1 614 614 2 1 0.101215 1 494 ADD -0.190535 0.328875 0.33565 0.249992\n1 615 615 2 1 0.102227 1 494 ADD 0.287376 0.300412 0.915093 0.470099\n1 616 616 2 1 0.090081 1 494 ADD -0.177886 0.315293 0.318315 0.242132\n1 617 617 2 1 0.0921053 1 494 ADD -0.158722 0.37289 0.18118 0.173691\n1 618 618 2 1 0.116397 1 494 ADD 0.144019 0.303738 0.224824 0.196961\n1 619 619 2 1 0.082996 1 494 ADD -1.50511 0.433968 14.3319 3.81462\n1 620 620 2 1 0.110324 1 494 ADD 0.319165 0.278322 1.31503 0.599485\n1 621 621 2 1 0.0910931 1 494 ADD -0.434971 0.327481 1.7642 0.734943\n1 622 622 2 1 0.110324 1 494 ADD 0.00335707 0.289329 0.000134628 0.00403925\n1 623 623 2 1 0.0981781 1 494 ADD 0.11689 0.340496 0.11785 0.135858\n1 624 624 2 1 0.110324 1 494 ADD 0.236098 0.336082 0.49351 0.316624\n1 625 625 2 1 0.100202 1 494 ADD 0.429245 0.285215 2.26498 0.878347\n1 626 626 2 1 0.106275 1 494 ADD 0.433645 0.309938 1.95758 0.791095\n1 627 627 2 1 0.109312 1 494 ADD -0.320424 0.289873 1.2219 0.570267\n1 628 628 2 1 0.0840081 1 494 ADD -0.616213 0.343009 3.22739 1.14016\n1 629 629 2 1 0.0961538 1 494 ADD 0.167743 0.301854 0.308811 0.237764\n1 630 630 2 1 0.0961538 1 494 ADD 0.0365878 0.303249 0.0145571 0.0438479\n1 631 631 2 1 0.0860324 1 494 ADD 0.103795 0.328547 0.0998064 0.123746\n1 632 632 2 1 0.104251 1 494 ADD 0.310586 0.313347 0.982457 0.492694\n1 633 633 2 1 0.119433 1 494 ADD 0.00278218 0.274045 0.000103068 0.00353219\n1 634 634 2 1 0.0941296 1 494 ADD -0.0578071 0.316995 0.0332551 0.0678814\n1 635 635 2 1 0.102227 1 494 ADD 0.496969 0.289865 2.93945 1.06329\n1 636 636 2 1 0.0991903 1 494 ADD 0.0180658 0.304152 0.00352804 0.0210732\n1 637 637 2 1 0.102227 1 494 ADD 0.126479 0.332471 0.144721 0.152654\n1 638 638 2 1 0.111336 1 494 ADD 0.0436574 0.294947 0.0219093 0.0543696\n1 639 639 2 1 0.111336 1 494 ADD -0.581278 0.305376 3.62325 1.2443\n1 640 640 2 1 0.0991903 1 494 ADD 0.417522 0.322791 1.67307 0.708083\n1 641 641 2 1 0.0991903 1 494 ADD 0.108077 0.318881 0.114871 0.13391\n1 642 642 2 1 0.109312 1 494 ADD 0.0848109 0.302968 0.0783629 0.108168\n1 643 643 2 1 0.104251 1 494 ADD -0.198427 0.325269 0.372148 0.266133\n1 644 644 2 1 0.097166 1 494 ADD -0.0469447 0.332968 0.0198777 0.0516462\n1 645 645 2 1 0.1083 1 494 ADD -0.0480022 0.30452 0.0248479 0.0581177\n1 646 646 2 1 0.0880567 1 494 ADD 0.123623 0.335671 0.135635 0.147118\n1 647 647 2 1 0.111336 1 494 ADD 0.36958 0.29253 1.59616 0.685188\n1 648 648 2 1 0.0981781 1 494 ADD 0.672383 0.313677 4.59481 1.49392\n1 649 649 2 1 0.126518 1 494 ADD -0.17034 0.271135 0.394695 0.275854\n1 650 650 2 1 0.1083 1 494 ADD -0.147151 0.288967 0.259317 0.214251\n1 651 651 2 1 0.124494 1 494 ADD 0.169238 0.306522 0.304839 0.235925\n1 652 652 2 1 0.100202 1 494 ADD 0.236572 0.329367 0.515904 0.325512\n1 653 653 2 1 0.102227 1 494 ADD -0.254036 0.32999 0.592637 0.355166\n1 654 654 2 1 0.0921053 1 494 ADD -0.240042 0.318119 0.56937 0.346296\n1 655 655 2 1 0.0921053 1 494 ADD 0.322536 0.340868 0.89533 0.463395\n1 656 656 2 1 0.0809717 1 494 ADD 0.603658 0.342091 3.11386 1.10998\n1 657 657 2 1 0.104251 1 494 ADD -0.0829316 0.310929 0.0711403 0.102547\n1 658 658 2 1 0.097166 1 494 ADD -0.342029 0.316176 1.17022 0.553843\n1 659 659 2 1 0.105263 1 494 ADD -0.0946202 0.319957 0.0874547 0.114957\n1 660 660 2 1 0.101215 1 494 ADD -0.373496 0.344597 1.17476 0.55529\n1 661 661 2 1 0.115385 1 494 ADD 0.0664295 0.287107 0.0535344 0.0877654\n1 662 662 2 1 0.0951417 1 494 ADD -0.6004 0.313048 3.67842 1.25868\n1 663 663 2 1 0.104251 1 494 ADD -0.01625 0.301644 0.00290213 0.019071\n1 664 664 2 1 0.118421 1 494 ADD 0.126744 0.278869 0.206565 0.187439\n1 665 665 2 1 0.107287 1 494 ADD -0.744817 0.315733 5.56494 1.73699\n1 666 666 2 1 0.0961538 1 494 ADD 0.593632 0.313519 3.58514 1.23434\n1 667 667 2 1 0.090081 1 494 ADD 0.180964 0.334361 0.292922 0.230361\n1 668 668 2 1 0.0799595 1 494 ADD 0.312292 0.331877 0.885455 0.460031\n1 669 669 2 1 0.0951417 1 494 ADD 0.104018 0.30973 0.112784 0.132534\n1 670 670 2 1 0.0921053 1 494 ADD 0.0447955 0.332924 0.0181042 0.049165\n1 671 671 2 1 0.1083 1 494 ADD 0.00190864 0.297955 4.10343e-05 0.0022254\n1 672 672 2 1 0.097166 1 494 ADD -0.48063 0.329908 2.12244 0.838169\n1 673 673 2 1 0.097166 1 494 ADD 0.246399 0.360598 0.46691 0.305911\n1 674 674 2 1 0.103239 1 494 ADD -0.289069 0.304386 0.901892 0.465625\n1 675 675 2 1 0.111336 1 494 ADD -0.0402951 0.292478 0.0189809 0.0504045\n1 676 676 2 1 0.106275 1 494 ADD 0.0284315 0.323399 0.00772896 0.0315429\n1 677 677 2 1 0.090081 1 494 ADD 0.289791 0.35093 0.681915 0.388353\n1 678 678 2 1 0.0951417 1 494 ADD -0.0614315 0.305285 0.0404922 0.0754513\n1 679 679 2 1 0.11336 1 494 ADD -0.374606 0.301244 1.54637 0.670252\n1 680 680 2 1 0.102227 1 494 ADD 0.290837 0.301849 0.928369 0.474583\n1 681 681 2 1 0.12247 1 494 ADD -0.18676 0.290505 0.413296 0.283745\n1 682 682 2 1 0.097166 1 494 ADD -0.156277 0.354132 0.194741 0.181115\n1 683 683 2 1 0.101215 1 494 ADD 0.272695 0.309501 0.7763 0.422192\n1 684 684 2 1 0.0860324 1 494 ADD 0.29743 0.331155 0.806692 0.432856\n1 685 685 2 1 0.0931174 1 494 ADD 0.504035 0.32166 2.45543 0.931373\n1 686 686 2 1 0.082996 1 494 ADD 0.278421 0.345914 0.64784 0.375836\n1 687 687 2 1 0.0840081 1 494 ADD 0.0152457 0.321919 0.00224286 0.0167224\n1 688 688 2 1 0.100202 1 494 ADD -0.0440424 0.314047 0.0196677 0.0513576\n1 689 689 2 1 0.0890688 1 494 ADD -0.428954 0.324062 1.75213 0.731399\n1 690 690 2 1 0.101215 1 494 ADD 0.495217 0.306018 2.61877 0.976309\n1 691 691 2 1 0.0921053 1 494 ADD 0.0600308 0.366272 0.0268622 0.0605745\n1 692 692 2 1 0.0991903 1 494 ADD -0.0622685 0.336887 0.034164 0.0688685\n1 693 693 2 1 0.12247 1 494 ADD 0.20889 0.286301 0.532342 0.331965\n1 694 694 2 1 0.0850202 1 494 ADD 0.138027 0.354657 0.151465 0.15668\n1 695 695 2 1 0.0991903 1 494 ADD 0.0421837 0.32922 0.0164178 0.0467021\n1 696 696 2 1 0.107287 1 494 ADD 0.408166 0.30228 1.82328 0.752215\n1 697 697 2 1 0.103239 1 494 ADD 0.0195487 0.305664 0.00409022 0.022731\n1 698 698 2 1 0.102227 1 494 ADD 0.62237 0.307755 4.08964 1.36505\n1 699 699 2 1 0.100202 1 494 ADD 0.106159 0.330715 0.103039 0.125975\n1 700 700 2 1 0.097166 1 494 ADD -0.35219 0.323003 1.18889 0.559794\n1 701 701 2 1 0.0931174 1 494 ADD -0.0913276 0.309074 0.0873131 0.114853\n1 702 702 2 1 0.102227 1 494 ADD -0.0411284 0.31314 0.0172507 0.0479321\n1 703 703 2 1 0.124494 1 494 ADD 0.368346 0.292056 1.59068 0.683548\n1 704 704 2 1 0.101215 1 494 ADD 0.104827 0.323346 0.105102 0.127383\n1 705 705 2 1 0.0991903 1 494 ADD -0.0834127 0.323988 0.0662837 0.0986361\n1 706 706 2 1 0.104251 1 494 ADD 0.371069 0.320284 1.34226 0.607945\n1 707 707 2 1 0.0931174 1 494 ADD 0.431687 0.306936 1.97806 0.796982\n1 708 708 2 1 0.0910931 1 494 ADD 0.0369178 0.308988 0.0142754 0.0434016\n1 709 709 2 1 0.101215 1 494 ADD -0.368832 0.326152 1.27884 0.588187\n1 710 710 2 1 0.0991903 1 494 ADD -0.0679006 0.341843 0.0394542 0.0744035\n1 711 711 2 1 0.12753 1 494 ADD -0.0981107 0.285537 0.118062 0.135996\n1 712 712 2 1 0.0769231 1 494 ADD -0.441524 0.345384 1.6342 0.696537\n1 713 713 2 1 0.104251 1 494 ADD 0.219182 0.328427 0.44538 0.297107\n1 714 714 2 1 0.103239 1 494 ADD -0.223007 0.28944 0.593634 0.355544\n1 715 715 2 1 0.0870445 1 494 ADD -0.252563 0.326983 0.596608 0.356671\n1 716 716 2 1 0.0951417 1 494 ADD -0.075434 0.332345 0.0515177 0.0859515\n1 717 717 2 1 0.111336 1 494 ADD -0.194333 0.276109 0.495369 0.317366\n1 718 718 2 1 0.101215 1 494 ADD -0.349951 0.289619 1.46003 0.644118\n1 719 719 2 1 0.0961538 1 494 ADD 0.185264 0.326076 0.322809 0.244183\n1 720 720 2 1 0.103239 1 494 ADD 0.262951 0.30762 0.730667 0.405975\n1 721 721 2 1 0.0981781 1 494 ADD 0.611909 0.313935 3.79921 1.29008\n1 722 722 2 1 0.0850202 1 494 ADD -0.496555 0.317464 2.44651 0.928904\n1 723 723 2 1 0.115385 1 494 ADD 0.422743 0.303129 1.94489 0.787444\n1 724 724 2 1 0.120445 1 494 ADD 0.259153 0.280662 0.852601 0.448773\n1 725 725 2 1 0.0941296 1 494 ADD 0.601641 0.340177 3.12799 1.11374\n1 726 726 2 1 0.103239 1 494 ADD 0.122195 0.320673 0.145206 0.152946\n1 727 727 2 1 0.0991903 1 494 ADD -0.539699 0.322548 2.79972 1.02558\n1 728 728 2 1 0.1083 1 494 ADD 0.470551 0.31614 2.21541 0.864427\n1 729 729 2 1 0.0890688 1 494 ADD -0.197758 0.345709 0.327225 0.246189\n1 730 730 2 1 0.0951417 1 494 ADD 0.0226142 0.344256 0.00431518 0.0233637\n1 731 731 2 1 0.0991903 1 494 ADD -0.679177 0.3328 4.16487 1.38436\n1 732 732 2 1 0.0981781 1 494 ADD -0.472029 0.328014 2.07087 0.823516\n1 733 733 2 1 0.0840081 1 494 ADD 0.483733 0.328544 2.16782 0.851009\n1 734 734 2 1 0.109312 1 494 ADD 0.20891 0.297766 0.492228 0.316111\n1 735 735 2 1 0.0870445 1 494 ADD -0.212645 0.318953 0.444485 0.296738\n1 736 736 2 1 0.1083 1 494 ADD 0.26843 0.309415 0.752624 0.413811\n1 737 737 2 1 0.0890688 1 494 ADD 0.376959 0.312176 1.45811 0.643531\n1 738 738 2 1 0.121457 1 494 ADD 0.145689 0.26711 0.297489 0.232502\n1 739 739 2 1 0.0981781 1 494 ADD -0.0528801 0.309473 0.0291971 0.0633231\n1 740 740 2 1 0.121457 1 494 ADD 0.251144 0.2854 0.774354 0.421506\n1 741 741 2 1 0.0941296 1 494 ADD 0.728732 0.310343 5.51382 1.7243\n1 742 742 2 1 0.0981781 1 494 ADD -0.16799 0.292482 0.329888 0.247395\n1 743 743 2 1 0.0880567 1 494 ADD -0.207039 0.347466 0.355042 0.258633\n1 744 744 2 1 0.106275 1 494 ADD 0.265025 0.305393 0.7531 0.41398\n1 745 745 2 1 0.0890688 1 494 ADD -0.243662 0.349314 0.486568 0.313845\n1 746 746 2 1 0.1083 1 494 ADD -0.103813 0.325153 0.101937 0.125218\n1 747 747 2 1 0.100202 1 494 ADD 0.482516 0.349913 1.90153 0.774932\n1 748 748 2 1 0.0981781 1 494 ADD -0.0639759 0.322693 0.0393055 0.0742525\n1 749 749 2 1 0.0981781 1 494 ADD 0.624201 0.331865 3.53774 1.22194\n1 750 750 2 1 0.111336 1 494 ADD 0.0580158 0.302329 0.0368243 0.0716947\n1 751 751 2 1 0.0870445 1 494 ADD -0.156513 0.351523 0.198239 0.183\n1 752 752 2 1 0.111336 1 494 ADD -0.215062 0.287642 0.559013 0.342315\n1 753 753 2 1 0.0931174 1 494 ADD -0.0399905 0.31757 0.0158575 0.0458588\n1 754 754 2 1 0.0910931 1 494 ADD 0.255346 0.329684 0.599878 0.357907\n1 755 755 2 1 0.107287 1 494 ADD 0.513648 0.310949 2.72868 1.0063\n1 756 756 2 1 0.0981781 1 494 ADD -0.196656 0.313598 0.393251 0.275237\n1 757 757 2 1 0.103239 1 494 ADD 0.279992 0.325451 0.740154 0.409368\n1 758 758 2 1 0.0951417 1 494 ADD 0.819266 0.310904 6.77517 2.03416\n1 759 759 2 1 0.0961538 1 494 ADD 0.245266 0.31554 0.604179 0.35953\n1 760 760 2 1 0.106275 1 494 ADD -0.309298 0.296397 1.08895 0.527676\n1 761 761 2 1 0.1083 1 494 ADD -0.171865 0.313403 0.300727 0.234013\n1 762 762 2 1 0.0890688 1 494 ADD 0.122688 0.337279 0.132321 0.145064\n1 763 763 2 1 0.0921053 1 494 ADD 0.363825 0.33193 1.20141 0.563774\n1 764 764 2 1 0.106275 1 494 ADD 0.0739797 0.313886 0.0555498 0.08955\n1 765 765 2 1 0.0890688 1 494 ADD 0.224676 0.326546 0.473396 0.308539\n1 766 766 2 1 0.115385 1 494 ADD 0.128133 0.293003 0.191239 0.179216\n1 767 767 2 1 0.112348 1 494 ADD 0.45929 0.279713 2.69617 0.997447\n1 768 768 2 1 0.103239 1 494 ADD -0.0463494 0.335352 0.0191023 0.050574\n1 769 769 2 1 0.112348 1 494 ADD -0.207646 0.281981 0.542261 0.335831\n1 770 770 2 1 0.100202 1 494 ADD 0.166074 0.317172 0.274167 0.221451\n1 771 771 2 1 0.107287 1 494 ADD -0.136726 0.31038 0.19405 0.180741\n1 772 772 2 1 0.0981781 1 494 ADD -0.026698 0.329301 0.00657311 0.0290107\n1 773 773 2 1 0.104251 1 494 ADD 0.293707 0.316027 0.863734 0.4526\n1 774 774 2 1 0.105263 1 494 ADD -0.0551695 0.304395 0.032849 0.0674365\n1 775 775 2 1 0.0819838 1 494 ADD -0.00974098 0.348476 0.000781373 0.00979457\n1 776 776 2 1 0.0991903 1 494 ADD -0.0514859 0.328703 0.0245341 0.0577272\n1 777 777 2 1 0.100202 1 494 ADD -0.244458 0.297845 0.67364 0.385329\n1 778 778 2 1 0.107287 1 494 ADD -0.296762 0.29798 0.991842 0.495811\n1 779 779 2 1 0.119433 1 494 ADD 0.0642437 0.288952 0.0494322 0.0840442\n1 780 780 2 1 0.0941296 1 494 ADD 0.338729 0.311938 1.17915 0.55669\n1 781 781 2 1 0.111336 1 494 ADD -0.0481514 0.303728 0.0251332 0.0584709\n1 782 782 2 1 0.101215 1 494 ADD -0.158085 0.344591 0.210462 0.189495\n1 783 783 2 1 0.104251 1 494 ADD -0.0236376 0.303662 0.00605935 0.0278185\n1 784 784 2 1 0.090081 1 494 ADD -0.290561 0.32478 0.80038 0.43065\n1 785 785 2 1 0.0991903 1 494 ADD -0.0354098 0.309865 0.0130587 0.0414266\n1 786 786 2 1 0.106275 1 494 ADD -0.93239 0.339799 8.48722 2.44654\n1 787 787 2 1 0.105263 1 494 ADD 0.196986 0.296909 0.440173 0.294958\n1 788 788 2 1 0.0961538 1 494 ADD -0.0669659 0.325543 0.0423146 0.0772635\n1 789 789 2 1 0.100202 1 494 ADD 0.575469 0.303442 3.5966 1.23734\n1 790 790 2 1 0.082996 1 494 ADD 0.34616 0.345046 1.00647 0.500656\n1 791 791 2 1 0.117409 1 494 ADD 0.228108 0.287404 0.629939 0.369188\n1 792 792 2 1 0.082996 1 494 ADD 0.251363 0.343154 0.536567 0.333614\n1 793 793 2 1 0.106275 1 494 ADD 0.229253 0.304041 0.568549 0.345981\n1 794 794 2 1 0.0819838 1 494 ADD -0.961496 0.379548 7.01324 2.092\n1 795 795 2 1 0.0931174 1 494 ADD -0.040321 0.322413 0.0156401 0.045528\n1 796 796 2 1 0.109312 1 494 ADD 0.199684 0.305186 0.428113 0.289953\n1 797 797 2 1 0.097166 1 494 ADD 0.459865 0.320896 2.05367 0.818614\n1 798 798 2 1 0.0910931 1 494 ADD 0.199157 0.336986 0.349275 0.25608\n1 799 799 2 1 0.0870445 1 494 ADD -0.173328 0.337347 0.263988 0.21653\n1 800 800 2 1 0.0850202 1 494 ADD 0.183535 0.326348 0.316284 0.241202\n1 801 801 2 1 0.0840081 1 494 ADD 0.120376 0.33571 0.128572 0.142718\n1 802 802 2 1 0.102227 1 494 ADD -0.20224 0.324815 0.387667 0.272843\n1 803 803 2 1 0.107287 1 494 ADD 0.422694 0.312827 1.82577 0.752939\n1 804 804 2 1 0.0910931 1 494 ADD -0.333904 0.303474 1.21061 0.566691\n1 805 805 2 1 0.0890688 1 494 ADD 0.0262905 0.321202 0.00669948 0.0292972\n1 806 806 2 1 0.104251 1 494 ADD -0.133027 0.289116 0.211707 0.190149\n1 807 807 2 1 0.101215 1 494 ADD 0.518187 0.309018 2.81194 1.02889\n1 808 808 2 1 0.0961538 1 494 ADD -0.427605 0.321053 1.77391 0.737788\n1 809 809 2 1 0.103239 1 494 ADD -0.212694 0.316732 0.450946 0.299395\n1 810 810 2 1 0.0910931 1 494 ADD -0.14822 0.321204 0.212937 0.190794\n1 811 811 2 1 0.121457 1 494 ADD 0.154717 0.30904 0.250638 0.209978\n1 812 812 2 1 0.0870445 1 494 ADD 0.461988 0.345062 1.79253 0.743238\n1 813 813 2 1 0.118421 1 494 ADD -0.302529 0.293549 1.06212 0.518941\n1 814 814 2 1 0.117409 1 494 ADD -0.251453 0.298158 0.71125 0.398995\n1 815 815 2 1 0.104251 1 494 ADD -0.134737 0.315843 0.181983 0.174136\n1 816 816 2 1 0.107287 1 494 ADD 0.41279 0.307901 1.79737 0.744652\n1 817 817 2 1 0.118421 1 494 ADD 0.573616 0.318533 3.24289 1.14428\n1 818 818 2 1 0.0981781 1 494 ADD -0.224518 0.345962 0.421159 0.287048\n1 819 819 2 1 0.0890688 1 494 ADD -0.391462 0.311626 1.57802 0.679757\n1 820 820 2 1 0.114372 1 494 ADD 0.150255 0.296806 0.25628 0.212761\n1 821 821 2 1 0.0840081 1 494 ADD 0.227747 0.360649 0.398783 0.277598\n1 822 822 2 1 0.109312 1 494 ADD 0.217202 0.317121 0.469113 0.306805\n1 823 823 2 1 0.101215 1 494 ADD -0.0117819 0.312598 0.00142054 0.0132575\n1 824 824 2 1 0.0941296 1 494 ADD 0.016859 0.315112 0.00286242 0.0189373\n1 825 825 2 1 0.0840081 1 494 ADD -0.136272 0.34522 0.15582 0.159245\n1 826 826 2 1 0.111336 1 494 ADD -0.201846 0.294602 0.469428 0.306933\n1 827 827 2 1 0.097166 1 494 ADD 0.0557876 0.336352 0.0275097 0.0613468\n1 828 828 2 1 0.0981781 1 494 ADD 0.104453 0.326458 0.102374 0.125519\n1 829 829 2 1 0.100202 1 494 ADD -0.419195 0.327063 1.64275 0.699081\n1 830 830 2 1 0.0840081 1 494 ADD 0.0775549 0.315612 0.0603824 0.0937229\n1 831 831 2 1 0.0961538 1 494 ADD -0.497017 0.321706 2.38685 0.912362\n1 832 832 2 1 0.0941296 1 494 ADD 0.153953 0.301588 0.260585 0.214871\n1 833 833 2 1 0.107287 1 494 ADD -0.288205 0.282754 1.03892 0.511348\n1 834 834 2 1 0.0961538 1 494 ADD -0.248457 0.337375 0.542348 0.335865\n1 835 835 2 1 0.107287 1 494 ADD -0.487755 0.314821 2.40036 0.916114\n1 836 836 2 1 0.101215 1 494 ADD -0.389641 0.324417 1.44252 0.638779\n1 837 837 2 1 0.102227 1 494 ADD 0.135945 0.313153 0.188456 0.177698\n1 838 838 2 1 0.0961538 1 494 ADD -0.149826 0.328549 0.207955 0.188174\n1 839 839 2 1 0.103239 1 494 ADD -0.668923 0.318222 4.41866 1.44919\n1 840 840 2 1 0.0951417 1 494 ADD -0.208724 0.318111 0.430515 0.290954\n1 841 841 2 1 0.106275 1 494 ADD -0.169803 0.297396 0.326002 0.245634\n1 842 842 2 1 0.101215 1 494 ADD -0.585282 0.335572 3.042 1.09079\n1 843 843 2 1 0.0931174 1 494 ADD -0.156612 0.333036 0.221142 0.195063\n1 844 844 2 1 0.0779352 1 494 ADD -0.0769737 0.336234 0.0524085 0.0867563\n1 845 845 2 1 0.0931174 1 494 ADD -0.219567 0.321335 0.466894 0.305904\n1 846 846 2 1 0.115385 1 494 ADD 0.114671 0.288942 0.157502 0.160229\n1 847 847 2 1 0.114372 1 494 ADD 0.0768471 0.293261 0.0686668 0.100569\n1 848 848 2 1 0.1083 1 494 ADD 0.207336 0.302916 0.468493 0.306554\n1 849 849 2 1 0.101215 1 494 ADD 0.324333 0.277197 1.369 0.616214\n1 850 850 2 1 0.0941296 1 494 ADD 0.578325 0.336333 2.95669 1.06792\n1 851 851 2 1 0.0951417 1 494 ADD -0.0446944 0.323085 0.0191369 0.0506224\n1 852 852 2 1 0.0840081 1 494 ADD 0.68666 0.342331 4.02339 1.34801\n1 853 853 2 1 0.119433 1 494 ADD 0.165057 0.28222 0.342052 0.252862\n1 854 854 2 1 0.109312 1 494 ADD 0.107793 0.313249 0.118414 0.136224\n1 855 855 2 1 0.116397 1 494 ADD -0.150818 0.288197 0.273859 0.221303\n1 856 856 2 1 0.0981781 1 494 ADD 0.154224 0.307088 0.25222 0.210761\n1 857 857 2 1 0.0981781 1 494 ADD -0.71047 0.359464 3.90643 1.31784\n1 858 858 2 1 0.106275 1 494 ADD -0.256215 0.312279 0.673171 0.385157\n1 859 859 2 1 0.0850202 1 494 ADD -0.184266 0.342895 0.28878 0.22841\n1 860 860 2 1 0.104251 1 494 ADD 0.211303 0.317969 0.441615 0.295554\n1 861 861 2 1 0.0961538 1 494 ADD 0.631858 0.331203 3.63958 1.24856\n1 862 862 2 1 0.0931174 1 494 ADD 0.323357 0.332627 0.945037 0.48019\n1 863 863 2 1 0.0991903 1 494 ADD -0.294522 0.32613 0.815558 0.435947\n1 864 864 2 1 0.103239 1 494 ADD -0.747542 0.3021 6.71721 2.02006\n1 865 865 2 1 0.0799595 1 494 ADD -0.159455 0.369365 0.186366 0.176553\n1 866 866 2 1 0.0991903 1 494 ADD -0.0610923 0.314236 0.0377974 0.0727063\n1 867 867 2 1 0.118421 1 494 ADD -0.356341 0.306278 1.35363 0.611464\n1 868 868 2 1 0.110324 1 494 ADD 0.219799 0.306095 0.515633 0.325405\n1 869 869 2 1 0.090081 1 494 ADD -0.18782 0.310294 0.366383 0.263617\n1 870 870 2 1 0.102227 1 494 ADD -0.0804586 0.30742 0.0684982 0.100433\n1 871 871 2 1 0.0809717 1 494 ADD -0.304277 0.358927 0.718665 0.401667\n1 872 872 2 1 0.103239 1 494 ADD -0.282635 0.331933 0.725018 0.40395\n1 873 873 2 1 0.0890688 1 494 ADD -0.136656 0.352269 0.15049 0.156103\n1 874 874 2 1 0.110324 1 494 ADD 0.117748 0.29896 0.155125 0.158838\n1 875 875 2 1 0.0991903 1 494 ADD -0.383708 0.327424 1.37335 0.617555\n1 876 876 2 1 0.100202 1 494 ADD 0.0709819 0.323359 0.0481864 0.0828887\n1 877 877 2 1 0.0981781 1 494 ADD -0.464502 0.327646 2.00986 0.806097\n1 878 878 2 1 0.106275 1 494 ADD -0.370287 0.307215 1.45276 0.641901\n1 879 879 2 1 0.109312 1 494 ADD 0.0968104 0.286451 0.11422 0.133483\n1 880 880 2 1 0.0910931 1 494 ADD 0.351748 0.367194 0.917639 0.47096\n1 881 881 2 1 0.0910931 1 494 ADD 0.109134 0.325974 0.112087 0.132073\n1 882 882 2 1 0.112348 1 494 ADD -0.337832 0.308913 1.19599 0.562053\n1 883 883 2 1 0.104251 1 494 ADD -0.132837 0.315457 0.17732 0.171542\n1 884 884 2 1 0.097166 1 494 ADD -0.0464263 0.318415 0.0212589 0.0535105\n1 885 885 2 1 0.0981781 1 494 ADD 0.0589363 0.289117 0.0415544 0.0765117\n1 886 886 2 1 0.0991903 1 494 ADD 0.0338843 0.321601 0.011101 0.0380624\n1 887 887 2 1 0.101215 1 494 ADD -0.0421687 0.328555 0.0164727 0.046784\n1 888 888 2 1 0.090081 1 494 ADD 0.0738572 0.31961 0.0534006 0.087646\n1 889 889 2 1 0.090081 1 494 ADD -0.326606 0.308121 1.12358 0.53888\n1 890 890 2 1 0.107287 1 494 ADD -0.471041 0.294646 2.55574 0.959026\n1 891 891 2 1 0.1083 1 494 ADD -0.484897 0.302352 2.57201 0.963494\n1 892 892 2 1 0.116397 1 494 ADD -0.350952 0.303563 1.33659 0.606186\n1 893 893 2 1 0.097166 1 494 ADD 0.0477092 0.308491 0.0239177 0.0569535\n1 894 894 2 1 0.0961538 1 494 ADD 0.210241 0.315981 0.442703 0.296004\n1 895 895 2 1 0.115385 1 494 ADD 0.323838 0.299935 1.16574 0.552411\n1 896 896 2 1 0.118421 1 494 ADD -0.292681 0.294497 0.987711 0.49444\n1 897 897 2 1 0.0991903 1 494 ADD -0.0343325 0.345831 0.00985558 0.0357782\n1 898 898 2 1 0.11336 1 494 ADD 0.611811 0.291699 4.3991 1.44421\n1 899 899 2 1 0.1083 1 494 ADD -0.734433 0.320907 5.23776 1.65557\n1 900 900 2 1 0.0981781 1 494 ADD 1.07961 0.293139 13.5434 3.63245\n1 901 901 2 1 0.0890688 1 494 ADD 0.370849 0.31866 1.35438 0.611696\n1 902 902 2 1 0.0890688 1 494 ADD 0.883087 0.35294 6.26045 1.90846\n1 903 903 2 1 0.1083 1 494 ADD 0.483535 0.311467 2.41008 0.91881\n1 904 904 2 1 0.0688259 1 494 ADD -0.221007 0.352297 0.393543 0.275361\n1 905 905 2 1 0.097166 1 494 ADD 0.0626792 0.303494 0.0426527 0.077596\n1 906 906 2 1 0.0779352 1 494 ADD -0.0354856 0.374167 0.0089944 0.0341199\n1 907 907 2 1 0.110324 1 494 ADD 0.267862 0.30351 0.77889 0.423105\n1 908 908 2 1 0.0951417 1 494 ADD 0.56125 0.34306 2.67653 0.992091\n1 909 909 2 1 0.0961538 1 494 ADD -0.45069 0.317235 2.01833 0.808521\n1 910 910 2 1 0.117409 1 494 ADD 0.448923 0.302799 2.19804 0.859534\n1 911 911 2 1 0.0981781 1 494 ADD 0.36326 0.32211 1.27183 0.585989\n1 912 912 2 1 0.111336 1 494 ADD 0.0966092 0.313377 0.0950393 0.120408\n1 913 913 2 1 0.0890688 1 494 ADD 0.320095 0.357086 0.80355 0.431758\n1 914 914 2 1 0.107287 1 494 ADD 0.227016 0.308203 0.542549 0.335943\n1 915 915 2 1 0.0991903 1 494 ADD 0.385802 0.310098 1.54786 0.6707\n1 916 916 2 1 0.102227 1 494 ADD 0.647971 0.324107 3.99699 1.34121\n1 917 917 2 1 0.101215 1 494 ADD 0.289154 0.318868 0.822312 0.438297\n1 918 918 2 1 0.0961538 1 494 ADD 0.374032 0.321656 1.35218 0.611015\n1 919 919 2 1 0.11336 1 494 ADD 0.048277 0.312373 0.0238855 0.0569129\n1 920 920 2 1 0.0941296 1 494 ADD -0.551839 0.348059 2.51374 0.947468\n1 921 921 2 1 0.106275 1 494 ADD 0.532446 0.311713 2.9177 1.05743\n1 922 922 2 1 0.103239 1 494 ADD 0.1285 0.313078 0.168461 0.166545\n1 923 923 2 1 0.101215 1 494 ADD 0.0951912 0.311093 0.0936299 0.119408\n1 924 924 2 1 0.118421 1 494 ADD -0.581543 0.297158 3.82992 1.29804\n1 925 925 2 1 0.097166 1 494 ADD 0.131467 0.33338 0.155508 0.159062\n1 926 926 2 1 0.090081 1 494 ADD 0.103624 0.331895 0.0974814 0.122126\n1 927 927 2 1 0.0860324 1 494 ADD -0.539985 0.337693 2.55693 0.959352\n1 928 928 2 1 0.0951417 1 494 ADD -0.678216 0.33671 4.05718 1.35671\n1 929 929 2 1 0.0931174 1 494 ADD -0.062903 0.327696 0.036847 0.0717184\n1 930 930 2 1 0.117409 1 494 ADD 0.0311768 0.278267 0.0125528 0.0405807\n1 931 931 2 1 0.0951417 1 494 ADD -0.0207888 0.303694 0.00468581 0.0243729\n1 932 932 2 1 0.101215 1 494 ADD -0.0281666 0.311968 0.00815172 0.0324243\n1 933 933 2 1 0.111336 1 494 ADD -0.125818 0.328397 0.146787 0.153895\n1 934 934 2 1 0.0809717 1 494 ADD -0.00540114 0.339265 0.000253451 0.0055517\n1 935 935 2 1 0.0961538 1 494 ADD 0.0854552 0.315683 0.0732781 0.104234\n1 936 936 2 1 0.0991903 1 494 ADD 0.308974 0.295162 1.09578 0.529892\n1 937 937 2 1 0.0961538 1 494 ADD 0.00478781 0.315185 0.000230751 0.00529571\n1 938 938 2 1 0.0991903 1 494 ADD 0.804481 0.314108 6.55957 1.98163\n1 939 939 2 1 0.0910931 1 494 ADD -0.269829 0.319819 0.711818 0.3992\n1 940 940 2 1 0.0981781 1 494 ADD 0.291839 0.320734 0.827937 0.440249\n1 941 941 2 1 0.0910931 1 494 ADD -0.406809 0.325383 1.56312 0.675287\n1 942 942 2 1 0.1083 1 494 ADD -0.111771 0.315272 0.125686 0.140893\n1 943 943 2 1 0.0819838 1 494 ADD -0.409007 0.331388 1.52331 0.663302\n1 944 944 2 1 0.104251 1 494 ADD -0.251167 0.306807 0.670181 0.384062\n1 945 945 2 1 0.0870445 1 494 ADD 0.374096 0.322313 1.34713 0.609453\n1 946 946 2 1 0.110324 1 494 ADD -0.0119146 0.292117 0.00166359 0.0143645\n1 947 947 2 1 0.112348 1 494 ADD 0.0987406 0.291545 0.114705 0.133801\n1 948 948 2 1 0.112348 1 494 ADD -0.190394 0.311259 0.374166 0.26701\n1 949 949 2 1 0.112348 1 494 ADD -0.142297 0.314719 0.204431 0.186307\n1 950 950 2 1 0.104251 1 494 ADD -0.210362 0.32476 0.419574 0.286384\n1 951 951 2 1 0.102227 1 494 ADD -0.239762 0.334902 0.512537 0.324182\n1 952 952 2 1 0.0981781 1 494 ADD -0.411637 0.333471 1.52374 0.663432\n1 953 953 2 1 0.102227 1 494 ADD -0.112478 0.303571 0.137283 0.148132\n1 954 954 2 1 0.0941296 1 494 ADD -0.585852 0.311158 3.54499 1.22384\n1 955 955 2 1 0.0890688 1 494 ADD 0.265764 0.342405 0.602435 0.358873\n1 956 956 2 1 0.100202 1 494 ADD 0.0406184 0.316105 0.0165114 0.0468418\n1 957 957 2 1 0.097166 1 494 ADD -0.284559 0.32428 0.770027 0.419978\n1 958 958 2 1 0.104251 1 494 ADD 0.366401 0.295748 1.53487 0.666788\n1 959 959 2 1 0.0921053 1 494 ADD 0.0649031 0.332198 0.0381713 0.0730921\n1 960 960 2 1 0.110324 1 494 ADD 0.629564 0.331301 3.61105 1.24111\n1 961 961 2 1 0.102227 1 494 ADD -0.0588028 0.335168 0.0307801 0.0651321\n1 962 962 2 1 0.090081 1 494 ADD -0.229848 0.329947 0.485284 0.313329\n1 963 963 2 1 0.0941296 1 494 ADD 0.35503 0.34828 1.03914 0.511418\n1 964 964 2 1 0.090081 1 494 ADD 0.605797 0.342471 3.129 1.11401\n1 965 965 2 1 0.109312 1 494 ADD 0.180911 0.299726 0.364318 0.262714\n1 966 966 2 1 0.0850202 1 494 ADD 0.532672 0.347196 2.35381 0.903169\n1 967 967 2 1 0.097166 1 494 ADD -0.436032 0.321018 1.84492 0.758515\n1 968 968 2 1 0.103239 1 494 ADD -0.00634591 0.333525 0.000362018 0.00664326\n1 969 969 2 1 0.0961538 1 494 ADD -0.232087 0.317154 0.535502 0.333198\n1 970 970 2 1 0.0951417 1 494 ADD 0.373221 0.310884 1.44124 0.638387\n1 971 971 2 1 0.097166 1 494 ADD -0.239757 0.315575 0.577213 0.349298\n1 972 972 2 1 0.100202 1 494 ADD -0.689433 0.316092 4.75727 1.53499\n1 973 973 2 1 0.0910931 1 494 ADD 0.796849 0.325646 5.98772 1.84146\n1 974 974 2 1 0.1083 1 494 ADD 0.342355 0.308272 1.23335 0.573884\n1 975 975 2 1 0.104251 1 494 ADD 0.045303 0.304268 0.0221688 0.0547092\n1 976 976 2 1 0.103239 1 494 ADD 0.253727 0.287726 0.777636 0.422663\n1 977 977 2 1 0.0991903 1 494 ADD 0.295869 0.32383 0.834765 0.442615\n1 978 978 2 1 0.0951417 1 494 ADD -0.217941 0.319437 0.465488 0.305333\n1 979 979 2 1 0.103239 1 494 ADD -0.0681002 0.300092 0.0514975 0.0859333\n1 980 980 2 1 0.100202 1 494 ADD 0.396365 0.31035 1.63112 0.695622\n1 981 981 2 1 0.0840081 1 494 ADD -0.48036 0.33505 2.05548 0.81913\n1 982 982 2 1 0.0870445 1 494 ADD -0.281532 0.352418 0.638173 0.372252\n1 983 983 2 1 0.102227 1 494 ADD -0.564838 0.336733 2.81369 1.02936\n1 984 984 2 1 0.102227 1 494 ADD 0.263928 0.293691 0.807583 0.433167\n1 985 985 2 1 0.0961538 1 494 ADD -0.0939484 0.300055 0.0980339 0.122512\n1 986 986 2 1 0.103239 1 494 ADD -0.271388 0.297233 0.833656 0.442231\n1 987 987 2 1 0.103239 1 494 ADD 0.24434 0.302888 0.650763 0.376916\n1 988 988 2 1 0.100202 1 494 ADD 0.167054 0.301468 0.307064 0.236956\n1 989 989 2 1 0.0941296 1 494 ADD 0.00091681 0.333017 7.57927e-06 0.000955025\n1 990 990 2 1 0.100202 1 494 ADD -0.0686065 0.346434 0.0392184 0.0741639\n1 991 991 2 1 0.107287 1 494 ADD 0.00766918 0.280053 0.000749924 0.00959327\n1 992 992 2 1 0.102227 1 494 ADD -0.231287 0.291735 0.628526 0.368661\n1 993 993 2 1 0.118421 1 494 ADD 0.0661996 0.269154 0.0604934 0.093817\n1 994 994 2 1 0.103239 1 494 ADD 0.367353 0.292254 1.57996 0.68034\n1 995 995 2 1 0.0921053 1 494 ADD 0.0355499 0.356648 0.00993565 0.0359289\n1 996 996 2 1 0.0941296 1 494 ADD -0.367872 0.341627 1.15955 0.55043\n1 997 997 2 1 0.117409 1 494 ADD 0.133392 0.268002 0.247731 0.208536\n1 998 998 2 1 0.0789474 1 494 ADD 0.407348 0.345873 1.38707 0.621782\n1 999 999 2 1 0.102227 1 494 ADD 0.112324 0.327073 0.11794 0.135916\n1 1000 1000 2 1 0.117409 1 494 ADD -0.0405477 0.280429 0.0209068 0.0530405\n"
  },
  {
    "path": "example/example_3chr.annotations",
    "content": "mog_0\tSET1\tmissense\nmog_1\tSET1\tmissense\nmog_2\tSET1\tpLof\nmog_3\tSET1\tpLof\nmog_4\tSET1\tmissense\nmog_5\tSET1\tpLof\nmog_6\tSET1\tpLof\nmog_7\tSET1\tmissense\nmog_8\tSET1\tmissense\nmog_9\tSET1\tpLof\nmog_10\tSET1\tmissense\nmog_11\tSET1\tpLof\nmog_12\tSET1\tpLof\nmog_13\tSET1\tmissense\nmog_14\tSET1\tmissense\nmog_15\tSET1\tmissense\nmog_16\tSET1\tpLof\nmog_17\tSET1\tmissense\nmog_18\tSET1\tpLof\nmog_19\tSET1\tpLof\nmog_20\tSET1\tpLof\nmog_21\tSET1\tpLof\nmog_22\tSET1\tpLof\nmog_23\tSET1\tpLof\nmog_24\tSET1\tpLof\nmog_25\tSET1\tpLof\nmog_26\tSET1\tpLof\nmog_27\tSET1\tmissense\nmog_28\tSET1\tpLof\nmog_29\tSET1\tpLof\nmog_30\tSET1\tpLof\nmog_31\tSET1\tpLof\nmog_32\tSET1\tmissense\nmog_33\tSET1\tmissense\nmog_34\tSET1\tpLof\nmog_35\tSET1\tpLof\nmog_36\tSET1\tmissense\nmog_37\tSET1\tpLof\nmog_38\tSET1\tpLof\nmog_39\tSET1\tpLof\nmog_40\tSET1\tpLof\nmog_41\tSET1\tpLof\nmog_42\tSET1\tmissense\nmog_43\tSET1\tmissense\nmog_44\tSET1\tpLof\nmog_45\tSET1\tmissense\nmog_46\tSET1\tmissense\nmog_47\tSET1\tpLof\nmog_48\tSET1\tmissense\nmog_49\tSET1\tpLof\ninf_0\tSET1\tpLof\ninf_1\tSET1\tpLof\ninf_2\tSET1\tpLof\ninf_3\tSET1\tpLof\ninf_4\tSET1\tpLof\ninf_5\tSET1\tmissense\ninf_6\tSET1\tpLof\ninf_7\tSET1\tpLof\ninf_8\tSET1\tpLof\ninf_9\tSET1\tmissense\ninf_10\tSET1\tmissense\ninf_11\tSET1\tpLof\ninf_12\tSET1\tpLof\ninf_13\tSET1\tpLof\ninf_14\tSET1\tmissense\ninf_15\tSET1\tpLof\ninf_16\tSET1\tpLof\ninf_17\tSET1\tmissense\ninf_18\tSET1\tpLof\ninf_19\tSET1\tpLof\ninf_20\tSET1\tmissense\ninf_21\tSET1\tmissense\ninf_22\tSET1\tmissense\ninf_23\tSET1\tpLof\ninf_24\tSET1\tpLof\ninf_25\tSET1\tpLof\ninf_26\tSET1\tpLof\ninf_27\tSET1\tpLof\ninf_28\tSET1\tpLof\ninf_29\tSET1\tpLof\ninf_30\tSET1\tpLof\ninf_31\tSET1\tmissense\ninf_32\tSET1\tpLof\ninf_33\tSET1\tpLof\ninf_34\tSET1\tpLof\ninf_35\tSET1\tmissense\ninf_36\tSET1\tpLof\ninf_37\tSET1\tmissense\ninf_38\tSET1\tmissense\ninf_39\tSET1\tpLof\ninf_40\tSET1\tpLof\ninf_41\tSET1\tmissense\ninf_42\tSET1\tpLof\ninf_43\tSET1\tpLof\ninf_44\tSET1\tpLof\ninf_45\tSET1\tmissense\ninf_46\tSET1\tmissense\ninf_47\tSET1\tpLof\ninf_48\tSET1\tpLof\ninf_49\tSET1\tpLof\ninf_50\tSET1\tpLof\ninf_51\tSET1\tpLof\ninf_52\tSET1\tpLof\ninf_53\tSET1\tpLof\ninf_54\tSET1\tpLof\ninf_55\tSET1\tpLof\ninf_56\tSET1\tpLof\ninf_57\tSET1\tpLof\ninf_58\tSET1\tmissense\ninf_59\tSET1\tpLof\ninf_60\tSET1\tpLof\ninf_61\tSET1\tpLof\ninf_62\tSET1\tpLof\ninf_63\tSET1\tpLof\ninf_64\tSET1\tpLof\ninf_65\tSET1\tpLof\ninf_66\tSET1\tpLof\ninf_67\tSET1\tpLof\ninf_68\tSET1\tpLof\ninf_69\tSET1\tmissense\ninf_70\tSET1\tpLof\ninf_71\tSET1\tmissense\ninf_72\tSET1\tpLof\ninf_73\tSET1\tpLof\ninf_74\tSET1\tpLof\ninf_75\tSET1\tpLof\ninf_76\tSET1\tpLof\ninf_77\tSET1\tpLof\ninf_78\tSET1\tpLof\ninf_79\tSET1\tpLof\ninf_80\tSET1\tpLof\ninf_81\tSET1\tmissense\ninf_82\tSET1\tmissense\ninf_83\tSET1\tpLof\ninf_84\tSET1\tpLof\ninf_85\tSET1\tpLof\ninf_86\tSET1\tmissense\ninf_87\tSET1\tmissense\ninf_88\tSET1\tpLof\ninf_89\tSET1\tpLof\ninf_90\tSET1\tpLof\ninf_91\tSET1\tpLof\ninf_92\tSET1\tpLof\ninf_93\tSET1\tpLof\ninf_94\tSET1\tmissense\ninf_95\tSET1\tmissense\ninf_96\tSET1\tmissense\ninf_97\tSET1\tmissense\ninf_98\tSET1\tpLof\ninf_99\tSET1\tpLof\ninf_100\tSET1\tpLof\ninf_101\tSET1\tpLof\ninf_102\tSET1\tpLof\ninf_103\tSET1\tpLof\ninf_104\tSET1\tmissense\ninf_105\tSET1\tpLof\ninf_106\tSET1\tpLof\ninf_107\tSET1\tpLof\ninf_108\tSET1\tmissense\ninf_109\tSET1\tpLof\ninf_110\tSET1\tpLof\ninf_111\tSET1\tmissense\ninf_112\tSET1\tmissense\ninf_113\tSET1\tmissense\ninf_114\tSET1\tpLof\ninf_115\tSET1\tmissense\ninf_116\tSET1\tpLof\ninf_117\tSET1\tpLof\ninf_118\tSET1\tmissense\ninf_119\tSET1\tmissense\ninf_120\tSET1\tmissense\ninf_121\tSET1\tpLof\ninf_122\tSET1\tpLof\ninf_123\tSET1\tpLof\ninf_124\tSET1\tpLof\ninf_125\tSET1\tpLof\ninf_126\tSET1\tmissense\ninf_127\tSET1\tpLof\ninf_128\tSET1\tpLof\ninf_129\tSET1\tpLof\ninf_130\tSET1\tpLof\ninf_131\tSET1\tmissense\ninf_132\tSET1\tmissense\ninf_133\tSET1\tpLof\ninf_134\tSET1\tpLof\ninf_135\tSET1\tmissense\ninf_136\tSET1\tpLof\ninf_137\tSET1\tpLof\ninf_138\tSET1\tpLof\ninf_139\tSET1\tpLof\ninf_140\tSET1\tmissense\ninf_141\tSET1\tpLof\ninf_142\tSET1\tmissense\ninf_143\tSET1\tpLof\ninf_144\tSET1\tpLof\ninf_145\tSET1\tpLof\ninf_146\tSET1\tpLof\ninf_147\tSET1\tpLof\ninf_148\tSET1\tmissense\ninf_149\tSET1\tpLof\ninf_150\tSET1\tpLof\ninf_151\tSET1\tpLof\ninf_152\tSET1\tmissense\ninf_153\tSET1\tmissense\ninf_154\tSET1\tpLof\ninf_155\tSET1\tmissense\ninf_156\tSET1\tpLof\ninf_157\tSET1\tpLof\ninf_158\tSET1\tpLof\ninf_159\tSET1\tmissense\ninf_160\tSET1\tpLof\ninf_161\tSET1\tpLof\ninf_162\tSET1\tpLof\ninf_163\tSET1\tpLof\ninf_164\tSET1\tpLof\ninf_165\tSET1\tpLof\ninf_166\tSET1\tpLof\ninf_167\tSET1\tpLof\ninf_168\tSET1\tmissense\ninf_169\tSET1\tpLof\ninf_170\tSET1\tpLof\ninf_171\tSET1\tpLof\ninf_172\tSET1\tpLof\ninf_173\tSET1\tpLof\ninf_174\tSET1\tpLof\ninf_175\tSET1\tmissense\ninf_176\tSET1\tmissense\ninf_177\tSET1\tpLof\ninf_178\tSET1\tmissense\ninf_179\tSET1\tpLof\ninf_180\tSET1\tpLof\ninf_181\tSET1\tpLof\ninf_182\tSET1\tpLof\ninf_183\tSET1\tmissense\ninf_184\tSET1\tmissense\ninf_185\tSET1\tpLof\ninf_186\tSET1\tpLof\ninf_187\tSET1\tpLof\ninf_188\tSET1\tmissense\ninf_189\tSET1\tpLof\ninf_190\tSET1\tmissense\ninf_191\tSET1\tpLof\ninf_192\tSET1\tpLof\ninf_193\tSET1\tpLof\ninf_194\tSET1\tpLof\ninf_195\tSET1\tpLof\ninf_196\tSET1\tpLof\ninf_197\tSET1\tpLof\ninf_198\tSET1\tpLof\ninf_199\tSET1\tmissense\ninf_200\tSET2\tpLof\ninf_201\tSET2\tpLof\ninf_202\tSET2\tpLof\ninf_203\tSET2\tmissense\ninf_204\tSET2\tpLof\ninf_205\tSET2\tmissense\ninf_206\tSET2\tpLof\ninf_207\tSET2\tmissense\ninf_208\tSET2\tpLof\ninf_209\tSET2\tpLof\ninf_210\tSET2\tpLof\ninf_211\tSET2\tmissense\ninf_212\tSET2\tpLof\ninf_213\tSET2\tmissense\ninf_214\tSET2\tmissense\ninf_215\tSET2\tmissense\ninf_216\tSET2\tpLof\ninf_217\tSET2\tpLof\ninf_218\tSET2\tpLof\ninf_219\tSET2\tpLof\ninf_220\tSET2\tpLof\ninf_221\tSET2\tpLof\ninf_222\tSET2\tpLof\ninf_223\tSET2\tpLof\ninf_224\tSET2\tpLof\ninf_225\tSET2\tmissense\ninf_226\tSET2\tpLof\ninf_227\tSET2\tpLof\ninf_228\tSET2\tpLof\ninf_229\tSET2\tpLof\ninf_230\tSET2\tpLof\ninf_231\tSET2\tmissense\ninf_232\tSET2\tpLof\ninf_233\tSET2\tpLof\ninf_234\tSET2\tpLof\ninf_235\tSET2\tmissense\ninf_236\tSET2\tpLof\ninf_237\tSET2\tmissense\ninf_238\tSET2\tmissense\ninf_239\tSET2\tmissense\ninf_240\tSET2\tpLof\ninf_241\tSET2\tpLof\ninf_242\tSET2\tpLof\ninf_243\tSET2\tmissense\ninf_244\tSET2\tpLof\ninf_245\tSET2\tmissense\ninf_246\tSET2\tpLof\ninf_247\tSET2\tpLof\ninf_248\tSET2\tpLof\ninf_249\tSET2\tpLof\ninf_250\tSET2\tmissense\ninf_251\tSET2\tpLof\ninf_252\tSET2\tpLof\ninf_253\tSET2\tpLof\ninf_254\tSET2\tmissense\ninf_255\tSET2\tpLof\ninf_256\tSET2\tpLof\ninf_257\tSET2\tmissense\ninf_258\tSET2\tmissense\ninf_259\tSET2\tpLof\ninf_260\tSET2\tmissense\ninf_261\tSET2\tpLof\ninf_262\tSET2\tmissense\ninf_263\tSET2\tpLof\ninf_264\tSET2\tpLof\ninf_265\tSET2\tmissense\ninf_266\tSET2\tpLof\ninf_267\tSET2\tpLof\ninf_268\tSET2\tpLof\ninf_269\tSET2\tmissense\ninf_270\tSET2\tpLof\ninf_271\tSET2\tpLof\ninf_272\tSET2\tpLof\ninf_273\tSET2\tpLof\ninf_274\tSET2\tpLof\ninf_275\tSET2\tpLof\ninf_276\tSET2\tpLof\ninf_277\tSET2\tpLof\ninf_278\tSET2\tmissense\ninf_279\tSET2\tpLof\ninf_280\tSET2\tpLof\ninf_281\tSET2\tpLof\ninf_282\tSET2\tpLof\ninf_283\tSET2\tpLof\ninf_284\tSET2\tpLof\ninf_285\tSET2\tmissense\ninf_286\tSET2\tpLof\ninf_287\tSET2\tpLof\ninf_288\tSET2\tpLof\ninf_289\tSET2\tpLof\ninf_290\tSET2\tpLof\ninf_291\tSET2\tpLof\ninf_292\tSET2\tmissense\ninf_293\tSET2\tmissense\ninf_294\tSET2\tmissense\ninf_295\tSET2\tpLof\ninf_296\tSET2\tpLof\ninf_297\tSET2\tmissense\ninf_298\tSET2\tpLof\ninf_299\tSET2\tpLof\ninf_300\tSET2\tmissense\ninf_301\tSET2\tpLof\ninf_302\tSET2\tpLof\ninf_303\tSET2\tpLof\ninf_304\tSET2\tpLof\ninf_305\tSET2\tmissense\ninf_306\tSET2\tmissense\ninf_307\tSET2\tpLof\ninf_308\tSET2\tpLof\ninf_309\tSET2\tmissense\ninf_310\tSET2\tmissense\ninf_311\tSET2\tpLof\ninf_312\tSET2\tpLof\ninf_313\tSET2\tpLof\ninf_314\tSET2\tpLof\ninf_315\tSET2\tpLof\ninf_316\tSET2\tpLof\ninf_317\tSET2\tpLof\ninf_318\tSET2\tpLof\ninf_319\tSET2\tpLof\ninf_320\tSET2\tmissense\ninf_321\tSET2\tmissense\ninf_322\tSET2\tpLof\ninf_323\tSET2\tmissense\ninf_324\tSET2\tmissense\ninf_325\tSET2\tpLof\ninf_326\tSET2\tpLof\ninf_327\tSET2\tmissense\ninf_328\tSET2\tpLof\ninf_329\tSET2\tmissense\ninf_330\tSET2\tpLof\ninf_331\tSET2\tpLof\ninf_332\tSET2\tmissense\ninf_333\tSET2\tpLof\ninf_334\tSET2\tpLof\ninf_335\tSET2\tpLof\ninf_336\tSET2\tpLof\ninf_337\tSET2\tpLof\ninf_338\tSET2\tmissense\ninf_339\tSET2\tpLof\ninf_340\tSET2\tpLof\ninf_341\tSET2\tmissense\ninf_342\tSET2\tpLof\ninf_343\tSET2\tmissense\ninf_344\tSET2\tpLof\ninf_345\tSET2\tpLof\ninf_346\tSET2\tpLof\ninf_347\tSET2\tpLof\ninf_348\tSET2\tpLof\ninf_349\tSET2\tpLof\ninf_350\tSET2\tpLof\ninf_351\tSET2\tpLof\ninf_352\tSET2\tpLof\ninf_353\tSET2\tpLof\ninf_354\tSET2\tmissense\ninf_355\tSET2\tpLof\ninf_356\tSET2\tpLof\ninf_357\tSET2\tpLof\ninf_358\tSET2\tpLof\ninf_359\tSET2\tpLof\ninf_360\tSET2\tmissense\ninf_361\tSET2\tpLof\ninf_362\tSET2\tpLof\ninf_363\tSET2\tpLof\ninf_364\tSET2\tpLof\ninf_365\tSET2\tpLof\ninf_366\tSET2\tpLof\ninf_367\tSET2\tpLof\ninf_368\tSET2\tmissense\ninf_369\tSET2\tmissense\ninf_370\tSET2\tmissense\ninf_371\tSET2\tpLof\ninf_372\tSET2\tpLof\ninf_373\tSET2\tmissense\ninf_374\tSET2\tpLof\ninf_375\tSET2\tmissense\ninf_376\tSET2\tmissense\ninf_377\tSET2\tpLof\ninf_378\tSET2\tpLof\ninf_379\tSET2\tpLof\ninf_380\tSET2\tpLof\ninf_381\tSET2\tpLof\ninf_382\tSET2\tpLof\ninf_383\tSET2\tpLof\ninf_384\tSET2\tpLof\ninf_385\tSET2\tpLof\ninf_386\tSET2\tmissense\ninf_387\tSET2\tpLof\ninf_388\tSET2\tpLof\ninf_389\tSET2\tpLof\ninf_390\tSET2\tpLof\ninf_391\tSET2\tmissense\ninf_392\tSET2\tpLof\ninf_393\tSET2\tpLof\ninf_394\tSET2\tpLof\ninf_395\tSET2\tpLof\ninf_396\tSET2\tpLof\ninf_397\tSET2\tpLof\ninf_398\tSET2\tmissense\ninf_399\tSET2\tpLof\nnull_0\tSET2\tpLof\nnull_1\tSET2\tpLof\nnull_2\tSET2\tmissense\nnull_3\tSET2\tpLof\nnull_4\tSET2\tmissense\nnull_5\tSET2\tpLof\nnull_6\tSET2\tpLof\nnull_7\tSET2\tpLof\nnull_8\tSET2\tpLof\nnull_9\tSET2\tpLof\nnull_10\tSET2\tpLof\nnull_11\tSET2\tpLof\nnull_12\tSET2\tmissense\nnull_13\tSET2\tpLof\nnull_14\tSET2\tpLof\nnull_15\tSET2\tpLof\nnull_16\tSET2\tpLof\nnull_17\tSET2\tpLof\nnull_18\tSET2\tpLof\nnull_19\tSET2\tmissense\nnull_20\tSET2\tpLof\nnull_21\tSET2\tpLof\nnull_22\tSET2\tpLof\nnull_23\tSET2\tmissense\nnull_24\tSET2\tpLof\nnull_25\tSET2\tpLof\nnull_26\tSET2\tmissense\nnull_27\tSET2\tpLof\nnull_28\tSET2\tmissense\nnull_29\tSET2\tpLof\nnull_30\tSET2\tpLof\nnull_31\tSET2\tpLof\nnull_32\tSET2\tpLof\nnull_33\tSET2\tmissense\nnull_34\tSET2\tpLof\nnull_35\tSET2\tmissense\nnull_36\tSET2\tpLof\nnull_37\tSET2\tmissense\nnull_38\tSET2\tpLof\nnull_39\tSET2\tpLof\nnull_40\tSET2\tpLof\nnull_41\tSET2\tpLof\nnull_42\tSET2\tmissense\nnull_43\tSET2\tmissense\nnull_44\tSET2\tpLof\nnull_45\tSET2\tpLof\nnull_46\tSET2\tpLof\nnull_47\tSET2\tpLof\nnull_48\tSET2\tmissense\nnull_49\tSET2\tpLof\n"
  },
  {
    "path": "example/example_3chr.annotationsV2",
    "content": "mog_0\tSET1\tB-sheet\tmissense\nmog_1\tSET1\tCatalytic\tmissense\nmog_2\tSET1\tB-sheet\tpLof\nmog_3\tSET1\tCatalytic\tpLof\nmog_4\tSET1\tCatalytic\tmissense\nmog_5\tSET1\tCatalytic\tpLof\nmog_6\tSET1\tCatalytic\tpLof\nmog_7\tSET1\tC‐terminal\tmissense\nmog_8\tSET1\tB-sheet\tmissense\nmog_9\tSET1\tCatalytic\tpLof\nmog_10\tSET1\tB-sheet\tmissense\nmog_11\tSET1\tB-sheet\tpLof\nmog_12\tSET1\tCatalytic\tpLof\nmog_13\tSET1\tC‐terminal\tmissense\nmog_14\tSET1\tC‐terminal\tmissense\nmog_15\tSET1\tB-sheet\tmissense\nmog_16\tSET1\tC‐terminal\tpLof\nmog_17\tSET1\tCatalytic\tmissense\nmog_18\tSET1\tCatalytic\tpLof\nmog_19\tSET1\tB-sheet\tpLof\nmog_20\tSET1\tC‐terminal\tpLof\nmog_21\tSET1\tC‐terminal\tpLof\nmog_22\tSET1\tC‐terminal\tpLof\nmog_23\tSET1\tCatalytic\tpLof\nmog_24\tSET1\tCatalytic\tpLof\nmog_25\tSET1\tB-sheet\tpLof\nmog_26\tSET1\tCatalytic\tpLof\nmog_27\tSET1\tC‐terminal\tmissense\nmog_28\tSET1\tCatalytic\tpLof\nmog_29\tSET1\tC‐terminal\tpLof\nmog_30\tSET1\tB-sheet\tpLof\nmog_31\tSET1\tCatalytic\tpLof\nmog_32\tSET1\tC‐terminal\tmissense\nmog_33\tSET1\tB-sheet\tmissense\nmog_34\tSET1\tB-sheet\tpLof\nmog_35\tSET1\tC‐terminal\tpLof\nmog_36\tSET1\tCatalytic\tmissense\nmog_37\tSET1\tB-sheet\tpLof\nmog_38\tSET1\tB-sheet\tpLof\nmog_39\tSET1\tCatalytic\tpLof\nmog_40\tSET1\tB-sheet\tpLof\nmog_41\tSET1\tB-sheet\tpLof\nmog_42\tSET1\tCatalytic\tmissense\nmog_43\tSET1\tC‐terminal\tmissense\nmog_44\tSET1\tB-sheet\tpLof\nmog_45\tSET1\tC‐terminal\tmissense\nmog_46\tSET1\tB-sheet\tmissense\nmog_47\tSET1\tCatalytic\tpLof\nmog_48\tSET1\tC‐terminal\tmissense\nmog_49\tSET1\tB-sheet\tpLof\ninf_0\tSET1\tB-sheet\tpLof\ninf_1\tSET1\tC‐terminal\tpLof\ninf_2\tSET1\tC‐terminal\tpLof\ninf_3\tSET1\tB-sheet\tpLof\ninf_4\tSET1\tC‐terminal\tpLof\ninf_5\tSET1\tCatalytic\tmissense\ninf_6\tSET1\tB-sheet\tpLof\ninf_7\tSET1\tC‐terminal\tpLof\ninf_8\tSET1\tCatalytic\tpLof\ninf_9\tSET1\tB-sheet\tmissense\ninf_10\tSET1\tB-sheet\tmissense\ninf_11\tSET1\tCatalytic\tpLof\ninf_12\tSET1\tC‐terminal\tpLof\ninf_13\tSET1\tC‐terminal\tpLof\ninf_14\tSET1\tCatalytic\tmissense\ninf_15\tSET1\tB-sheet\tpLof\ninf_16\tSET1\tCatalytic\tpLof\ninf_17\tSET1\tCatalytic\tmissense\ninf_18\tSET1\tC‐terminal\tpLof\ninf_19\tSET1\tCatalytic\tpLof\ninf_20\tSET1\tCatalytic\tmissense\ninf_21\tSET1\tB-sheet\tmissense\ninf_22\tSET1\tCatalytic\tmissense\ninf_23\tSET1\tCatalytic\tpLof\ninf_24\tSET1\tCatalytic\tpLof\ninf_25\tSET1\tB-sheet\tpLof\ninf_26\tSET1\tC‐terminal\tpLof\ninf_27\tSET1\tC‐terminal\tpLof\ninf_28\tSET1\tC‐terminal\tpLof\ninf_29\tSET1\tCatalytic\tpLof\ninf_30\tSET1\tB-sheet\tpLof\ninf_31\tSET1\tC‐terminal\tmissense\ninf_32\tSET1\tCatalytic\tpLof\ninf_33\tSET1\tC‐terminal\tpLof\ninf_34\tSET1\tC‐terminal\tpLof\ninf_35\tSET1\tB-sheet\tmissense\ninf_36\tSET1\tCatalytic\tpLof\ninf_37\tSET1\tC‐terminal\tmissense\ninf_38\tSET1\tB-sheet\tmissense\ninf_39\tSET1\tC‐terminal\tpLof\ninf_40\tSET1\tCatalytic\tpLof\ninf_41\tSET1\tB-sheet\tmissense\ninf_42\tSET1\tB-sheet\tpLof\ninf_43\tSET1\tB-sheet\tpLof\ninf_44\tSET1\tC‐terminal\tpLof\ninf_45\tSET1\tC‐terminal\tmissense\ninf_46\tSET1\tB-sheet\tmissense\ninf_47\tSET1\tC‐terminal\tpLof\ninf_48\tSET1\tCatalytic\tpLof\ninf_49\tSET1\tB-sheet\tpLof\ninf_50\tSET1\tC‐terminal\tpLof\ninf_51\tSET1\tCatalytic\tpLof\ninf_52\tSET1\tC‐terminal\tpLof\ninf_53\tSET1\tC‐terminal\tpLof\ninf_54\tSET1\tB-sheet\tpLof\ninf_55\tSET1\tC‐terminal\tpLof\ninf_56\tSET1\tB-sheet\tpLof\ninf_57\tSET1\tCatalytic\tpLof\ninf_58\tSET1\tB-sheet\tmissense\ninf_59\tSET1\tC‐terminal\tpLof\ninf_60\tSET1\tCatalytic\tpLof\ninf_61\tSET1\tCatalytic\tpLof\ninf_62\tSET1\tCatalytic\tpLof\ninf_63\tSET1\tB-sheet\tpLof\ninf_64\tSET1\tC‐terminal\tpLof\ninf_65\tSET1\tC‐terminal\tpLof\ninf_66\tSET1\tB-sheet\tpLof\ninf_67\tSET1\tC‐terminal\tpLof\ninf_68\tSET1\tB-sheet\tpLof\ninf_69\tSET1\tCatalytic\tmissense\ninf_70\tSET1\tC‐terminal\tpLof\ninf_71\tSET1\tB-sheet\tmissense\ninf_72\tSET1\tC‐terminal\tpLof\ninf_73\tSET1\tCatalytic\tpLof\ninf_74\tSET1\tCatalytic\tpLof\ninf_75\tSET1\tB-sheet\tpLof\ninf_76\tSET1\tC‐terminal\tpLof\ninf_77\tSET1\tB-sheet\tpLof\ninf_78\tSET1\tC‐terminal\tpLof\ninf_79\tSET1\tC‐terminal\tpLof\ninf_80\tSET1\tB-sheet\tpLof\ninf_81\tSET1\tB-sheet\tmissense\ninf_82\tSET1\tB-sheet\tmissense\ninf_83\tSET1\tCatalytic\tpLof\ninf_84\tSET1\tC‐terminal\tpLof\ninf_85\tSET1\tC‐terminal\tpLof\ninf_86\tSET1\tB-sheet\tmissense\ninf_87\tSET1\tCatalytic\tmissense\ninf_88\tSET1\tB-sheet\tpLof\ninf_89\tSET1\tCatalytic\tpLof\ninf_90\tSET1\tC‐terminal\tpLof\ninf_91\tSET1\tC‐terminal\tpLof\ninf_92\tSET1\tB-sheet\tpLof\ninf_93\tSET1\tCatalytic\tpLof\ninf_94\tSET1\tC‐terminal\tmissense\ninf_95\tSET1\tC‐terminal\tmissense\ninf_96\tSET1\tB-sheet\tmissense\ninf_97\tSET1\tB-sheet\tmissense\ninf_98\tSET1\tB-sheet\tpLof\ninf_99\tSET1\tC‐terminal\tpLof\ninf_100\tSET1\tCatalytic\tpLof\ninf_101\tSET1\tC‐terminal\tpLof\ninf_102\tSET1\tC‐terminal\tpLof\ninf_103\tSET1\tCatalytic\tpLof\ninf_104\tSET1\tC‐terminal\tmissense\ninf_105\tSET1\tB-sheet\tpLof\ninf_106\tSET1\tCatalytic\tpLof\ninf_107\tSET1\tCatalytic\tpLof\ninf_108\tSET1\tC‐terminal\tmissense\ninf_109\tSET1\tC‐terminal\tpLof\ninf_110\tSET1\tCatalytic\tpLof\ninf_111\tSET1\tCatalytic\tmissense\ninf_112\tSET1\tCatalytic\tmissense\ninf_113\tSET1\tCatalytic\tmissense\ninf_114\tSET1\tB-sheet\tpLof\ninf_115\tSET1\tB-sheet\tmissense\ninf_116\tSET1\tC‐terminal\tpLof\ninf_117\tSET1\tB-sheet\tpLof\ninf_118\tSET1\tC‐terminal\tmissense\ninf_119\tSET1\tC‐terminal\tmissense\ninf_120\tSET1\tC‐terminal\tmissense\ninf_121\tSET1\tCatalytic\tpLof\ninf_122\tSET1\tCatalytic\tpLof\ninf_123\tSET1\tB-sheet\tpLof\ninf_124\tSET1\tC‐terminal\tpLof\ninf_125\tSET1\tCatalytic\tpLof\ninf_126\tSET1\tC‐terminal\tmissense\ninf_127\tSET1\tC‐terminal\tpLof\ninf_128\tSET1\tCatalytic\tpLof\ninf_129\tSET1\tB-sheet\tpLof\ninf_130\tSET1\tC‐terminal\tpLof\ninf_131\tSET1\tC‐terminal\tmissense\ninf_132\tSET1\tB-sheet\tmissense\ninf_133\tSET1\tB-sheet\tpLof\ninf_134\tSET1\tC‐terminal\tpLof\ninf_135\tSET1\tCatalytic\tmissense\ninf_136\tSET1\tB-sheet\tpLof\ninf_137\tSET1\tCatalytic\tpLof\ninf_138\tSET1\tC‐terminal\tpLof\ninf_139\tSET1\tB-sheet\tpLof\ninf_140\tSET1\tC‐terminal\tmissense\ninf_141\tSET1\tC‐terminal\tpLof\ninf_142\tSET1\tC‐terminal\tmissense\ninf_143\tSET1\tCatalytic\tpLof\ninf_144\tSET1\tC‐terminal\tpLof\ninf_145\tSET1\tCatalytic\tpLof\ninf_146\tSET1\tC‐terminal\tpLof\ninf_147\tSET1\tB-sheet\tpLof\ninf_148\tSET1\tC‐terminal\tmissense\ninf_149\tSET1\tCatalytic\tpLof\ninf_150\tSET1\tB-sheet\tpLof\ninf_151\tSET1\tB-sheet\tpLof\ninf_152\tSET1\tC‐terminal\tmissense\ninf_153\tSET1\tCatalytic\tmissense\ninf_154\tSET1\tC‐terminal\tpLof\ninf_155\tSET1\tB-sheet\tmissense\ninf_156\tSET1\tB-sheet\tpLof\ninf_157\tSET1\tB-sheet\tpLof\ninf_158\tSET1\tCatalytic\tpLof\ninf_159\tSET1\tCatalytic\tmissense\ninf_160\tSET1\tCatalytic\tpLof\ninf_161\tSET1\tCatalytic\tpLof\ninf_162\tSET1\tC‐terminal\tpLof\ninf_163\tSET1\tCatalytic\tpLof\ninf_164\tSET1\tB-sheet\tpLof\ninf_165\tSET1\tCatalytic\tpLof\ninf_166\tSET1\tB-sheet\tpLof\ninf_167\tSET1\tB-sheet\tpLof\ninf_168\tSET1\tB-sheet\tmissense\ninf_169\tSET1\tCatalytic\tpLof\ninf_170\tSET1\tC‐terminal\tpLof\ninf_171\tSET1\tB-sheet\tpLof\ninf_172\tSET1\tCatalytic\tpLof\ninf_173\tSET1\tC‐terminal\tpLof\ninf_174\tSET1\tC‐terminal\tpLof\ninf_175\tSET1\tCatalytic\tmissense\ninf_176\tSET1\tC‐terminal\tmissense\ninf_177\tSET1\tB-sheet\tpLof\ninf_178\tSET1\tC‐terminal\tmissense\ninf_179\tSET1\tC‐terminal\tpLof\ninf_180\tSET1\tC‐terminal\tpLof\ninf_181\tSET1\tC‐terminal\tpLof\ninf_182\tSET1\tC‐terminal\tpLof\ninf_183\tSET1\tCatalytic\tmissense\ninf_184\tSET1\tB-sheet\tmissense\ninf_185\tSET1\tC‐terminal\tpLof\ninf_186\tSET1\tCatalytic\tpLof\ninf_187\tSET1\tB-sheet\tpLof\ninf_188\tSET1\tB-sheet\tmissense\ninf_189\tSET1\tCatalytic\tpLof\ninf_190\tSET1\tB-sheet\tmissense\ninf_191\tSET1\tCatalytic\tpLof\ninf_192\tSET1\tB-sheet\tpLof\ninf_193\tSET1\tCatalytic\tpLof\ninf_194\tSET1\tB-sheet\tpLof\ninf_195\tSET1\tB-sheet\tpLof\ninf_196\tSET1\tC‐terminal\tpLof\ninf_197\tSET1\tC‐terminal\tpLof\ninf_198\tSET1\tB-sheet\tpLof\ninf_199\tSET1\tC‐terminal\tmissense\ninf_200\tSET2\tTrans-membrane\tpLof\ninf_201\tSET2\tN-terminal\tpLof\ninf_202\tSET2\tN-terminal\tpLof\ninf_203\tSET2\tEGF\tmissense\ninf_204\tSET2\tEGF\tpLof\ninf_205\tSET2\tTrans-membrane\tmissense\ninf_206\tSET2\tTrans-membrane\tpLof\ninf_207\tSET2\tEGF\tmissense\ninf_208\tSET2\tTrans-membrane\tpLof\ninf_209\tSET2\tTrans-membrane\tpLof\ninf_210\tSET2\tEGF\tpLof\ninf_211\tSET2\tEGF\tmissense\ninf_212\tSET2\tTrans-membrane\tpLof\ninf_213\tSET2\tTrans-membrane\tmissense\ninf_214\tSET2\tTrans-membrane\tmissense\ninf_215\tSET2\tTrans-membrane\tmissense\ninf_216\tSET2\tEGF\tpLof\ninf_217\tSET2\tTrans-membrane\tpLof\ninf_218\tSET2\tTrans-membrane\tpLof\ninf_219\tSET2\tEGF\tpLof\ninf_220\tSET2\tN-terminal\tpLof\ninf_221\tSET2\tN-terminal\tpLof\ninf_222\tSET2\tN-terminal\tpLof\ninf_223\tSET2\tTrans-membrane\tpLof\ninf_224\tSET2\tEGF\tpLof\ninf_225\tSET2\tTrans-membrane\tmissense\ninf_226\tSET2\tTrans-membrane\tpLof\ninf_227\tSET2\tTrans-membrane\tpLof\ninf_228\tSET2\tN-terminal\tpLof\ninf_229\tSET2\tTrans-membrane\tpLof\ninf_230\tSET2\tTrans-membrane\tpLof\ninf_231\tSET2\tTrans-membrane\tmissense\ninf_232\tSET2\tEGF\tpLof\ninf_233\tSET2\tN-terminal\tpLof\ninf_234\tSET2\tN-terminal\tpLof\ninf_235\tSET2\tTrans-membrane\tmissense\ninf_236\tSET2\tN-terminal\tpLof\ninf_237\tSET2\tN-terminal\tmissense\ninf_238\tSET2\tN-terminal\tmissense\ninf_239\tSET2\tTrans-membrane\tmissense\ninf_240\tSET2\tTrans-membrane\tpLof\ninf_241\tSET2\tTrans-membrane\tpLof\ninf_242\tSET2\tN-terminal\tpLof\ninf_243\tSET2\tN-terminal\tmissense\ninf_244\tSET2\tEGF\tpLof\ninf_245\tSET2\tEGF\tmissense\ninf_246\tSET2\tN-terminal\tpLof\ninf_247\tSET2\tEGF\tpLof\ninf_248\tSET2\tEGF\tpLof\ninf_249\tSET2\tTrans-membrane\tpLof\ninf_250\tSET2\tEGF\tmissense\ninf_251\tSET2\tTrans-membrane\tpLof\ninf_252\tSET2\tEGF\tpLof\ninf_253\tSET2\tTrans-membrane\tpLof\ninf_254\tSET2\tN-terminal\tmissense\ninf_255\tSET2\tEGF\tpLof\ninf_256\tSET2\tTrans-membrane\tpLof\ninf_257\tSET2\tTrans-membrane\tmissense\ninf_258\tSET2\tEGF\tmissense\ninf_259\tSET2\tN-terminal\tpLof\ninf_260\tSET2\tTrans-membrane\tmissense\ninf_261\tSET2\tN-terminal\tpLof\ninf_262\tSET2\tTrans-membrane\tmissense\ninf_263\tSET2\tEGF\tpLof\ninf_264\tSET2\tEGF\tpLof\ninf_265\tSET2\tN-terminal\tmissense\ninf_266\tSET2\tN-terminal\tpLof\ninf_267\tSET2\tTrans-membrane\tpLof\ninf_268\tSET2\tEGF\tpLof\ninf_269\tSET2\tTrans-membrane\tmissense\ninf_270\tSET2\tEGF\tpLof\ninf_271\tSET2\tEGF\tpLof\ninf_272\tSET2\tN-terminal\tpLof\ninf_273\tSET2\tTrans-membrane\tpLof\ninf_274\tSET2\tTrans-membrane\tpLof\ninf_275\tSET2\tTrans-membrane\tpLof\ninf_276\tSET2\tTrans-membrane\tpLof\ninf_277\tSET2\tTrans-membrane\tpLof\ninf_278\tSET2\tEGF\tmissense\ninf_279\tSET2\tEGF\tpLof\ninf_280\tSET2\tEGF\tpLof\ninf_281\tSET2\tN-terminal\tpLof\ninf_282\tSET2\tTrans-membrane\tpLof\ninf_283\tSET2\tN-terminal\tpLof\ninf_284\tSET2\tTrans-membrane\tpLof\ninf_285\tSET2\tTrans-membrane\tmissense\ninf_286\tSET2\tEGF\tpLof\ninf_287\tSET2\tTrans-membrane\tpLof\ninf_288\tSET2\tEGF\tpLof\ninf_289\tSET2\tEGF\tpLof\ninf_290\tSET2\tEGF\tpLof\ninf_291\tSET2\tEGF\tpLof\ninf_292\tSET2\tTrans-membrane\tmissense\ninf_293\tSET2\tEGF\tmissense\ninf_294\tSET2\tTrans-membrane\tmissense\ninf_295\tSET2\tEGF\tpLof\ninf_296\tSET2\tN-terminal\tpLof\ninf_297\tSET2\tTrans-membrane\tmissense\ninf_298\tSET2\tTrans-membrane\tpLof\ninf_299\tSET2\tEGF\tpLof\ninf_300\tSET2\tEGF\tmissense\ninf_301\tSET2\tEGF\tpLof\ninf_302\tSET2\tEGF\tpLof\ninf_303\tSET2\tTrans-membrane\tpLof\ninf_304\tSET2\tEGF\tpLof\ninf_305\tSET2\tEGF\tmissense\ninf_306\tSET2\tN-terminal\tmissense\ninf_307\tSET2\tEGF\tpLof\ninf_308\tSET2\tEGF\tpLof\ninf_309\tSET2\tTrans-membrane\tmissense\ninf_310\tSET2\tN-terminal\tmissense\ninf_311\tSET2\tN-terminal\tpLof\ninf_312\tSET2\tEGF\tpLof\ninf_313\tSET2\tTrans-membrane\tpLof\ninf_314\tSET2\tTrans-membrane\tpLof\ninf_315\tSET2\tN-terminal\tpLof\ninf_316\tSET2\tTrans-membrane\tpLof\ninf_317\tSET2\tTrans-membrane\tpLof\ninf_318\tSET2\tN-terminal\tpLof\ninf_319\tSET2\tTrans-membrane\tpLof\ninf_320\tSET2\tEGF\tmissense\ninf_321\tSET2\tTrans-membrane\tmissense\ninf_322\tSET2\tEGF\tpLof\ninf_323\tSET2\tEGF\tmissense\ninf_324\tSET2\tEGF\tmissense\ninf_325\tSET2\tEGF\tpLof\ninf_326\tSET2\tN-terminal\tpLof\ninf_327\tSET2\tN-terminal\tmissense\ninf_328\tSET2\tN-terminal\tpLof\ninf_329\tSET2\tEGF\tmissense\ninf_330\tSET2\tTrans-membrane\tpLof\ninf_331\tSET2\tN-terminal\tpLof\ninf_332\tSET2\tN-terminal\tmissense\ninf_333\tSET2\tTrans-membrane\tpLof\ninf_334\tSET2\tTrans-membrane\tpLof\ninf_335\tSET2\tN-terminal\tpLof\ninf_336\tSET2\tEGF\tpLof\ninf_337\tSET2\tTrans-membrane\tpLof\ninf_338\tSET2\tTrans-membrane\tmissense\ninf_339\tSET2\tTrans-membrane\tpLof\ninf_340\tSET2\tEGF\tpLof\ninf_341\tSET2\tTrans-membrane\tmissense\ninf_342\tSET2\tTrans-membrane\tpLof\ninf_343\tSET2\tTrans-membrane\tmissense\ninf_344\tSET2\tTrans-membrane\tpLof\ninf_345\tSET2\tN-terminal\tpLof\ninf_346\tSET2\tEGF\tpLof\ninf_347\tSET2\tN-terminal\tpLof\ninf_348\tSET2\tN-terminal\tpLof\ninf_349\tSET2\tEGF\tpLof\ninf_350\tSET2\tTrans-membrane\tpLof\ninf_351\tSET2\tEGF\tpLof\ninf_352\tSET2\tTrans-membrane\tpLof\ninf_353\tSET2\tEGF\tpLof\ninf_354\tSET2\tEGF\tmissense\ninf_355\tSET2\tEGF\tpLof\ninf_356\tSET2\tN-terminal\tpLof\ninf_357\tSET2\tN-terminal\tpLof\ninf_358\tSET2\tEGF\tpLof\ninf_359\tSET2\tTrans-membrane\tpLof\ninf_360\tSET2\tN-terminal\tmissense\ninf_361\tSET2\tN-terminal\tpLof\ninf_362\tSET2\tN-terminal\tpLof\ninf_363\tSET2\tEGF\tpLof\ninf_364\tSET2\tEGF\tpLof\ninf_365\tSET2\tEGF\tpLof\ninf_366\tSET2\tTrans-membrane\tpLof\ninf_367\tSET2\tTrans-membrane\tpLof\ninf_368\tSET2\tEGF\tmissense\ninf_369\tSET2\tEGF\tmissense\ninf_370\tSET2\tEGF\tmissense\ninf_371\tSET2\tTrans-membrane\tpLof\ninf_372\tSET2\tN-terminal\tpLof\ninf_373\tSET2\tTrans-membrane\tmissense\ninf_374\tSET2\tN-terminal\tpLof\ninf_375\tSET2\tN-terminal\tmissense\ninf_376\tSET2\tEGF\tmissense\ninf_377\tSET2\tEGF\tpLof\ninf_378\tSET2\tEGF\tpLof\ninf_379\tSET2\tTrans-membrane\tpLof\ninf_380\tSET2\tEGF\tpLof\ninf_381\tSET2\tN-terminal\tpLof\ninf_382\tSET2\tN-terminal\tpLof\ninf_383\tSET2\tTrans-membrane\tpLof\ninf_384\tSET2\tN-terminal\tpLof\ninf_385\tSET2\tEGF\tpLof\ninf_386\tSET2\tTrans-membrane\tmissense\ninf_387\tSET2\tTrans-membrane\tpLof\ninf_388\tSET2\tEGF\tpLof\ninf_389\tSET2\tN-terminal\tpLof\ninf_390\tSET2\tN-terminal\tpLof\ninf_391\tSET2\tTrans-membrane\tmissense\ninf_392\tSET2\tTrans-membrane\tpLof\ninf_393\tSET2\tTrans-membrane\tpLof\ninf_394\tSET2\tN-terminal\tpLof\ninf_395\tSET2\tN-terminal\tpLof\ninf_396\tSET2\tEGF\tpLof\ninf_397\tSET2\tTrans-membrane\tpLof\ninf_398\tSET2\tEGF\tmissense\ninf_399\tSET2\tTrans-membrane\tpLof\nnull_0\tSET2\tEGF\tpLof\nnull_1\tSET2\tN-terminal\tpLof\nnull_2\tSET2\tEGF\tmissense\nnull_3\tSET2\tEGF\tpLof\nnull_4\tSET2\tTrans-membrane\tmissense\nnull_5\tSET2\tEGF\tpLof\nnull_6\tSET2\tN-terminal\tpLof\nnull_7\tSET2\tN-terminal\tpLof\nnull_8\tSET2\tTrans-membrane\tpLof\nnull_9\tSET2\tN-terminal\tpLof\nnull_10\tSET2\tTrans-membrane\tpLof\nnull_11\tSET2\tEGF\tpLof\nnull_12\tSET2\tN-terminal\tmissense\nnull_13\tSET2\tTrans-membrane\tpLof\nnull_14\tSET2\tN-terminal\tpLof\nnull_15\tSET2\tEGF\tpLof\nnull_16\tSET2\tTrans-membrane\tpLof\nnull_17\tSET2\tTrans-membrane\tpLof\nnull_18\tSET2\tN-terminal\tpLof\nnull_19\tSET2\tEGF\tmissense\nnull_20\tSET2\tN-terminal\tpLof\nnull_21\tSET2\tEGF\tpLof\nnull_22\tSET2\tTrans-membrane\tpLof\nnull_23\tSET2\tN-terminal\tmissense\nnull_24\tSET2\tEGF\tpLof\nnull_25\tSET2\tN-terminal\tpLof\nnull_26\tSET2\tN-terminal\tmissense\nnull_27\tSET2\tN-terminal\tpLof\nnull_28\tSET2\tEGF\tmissense\nnull_29\tSET2\tEGF\tpLof\nnull_30\tSET2\tTrans-membrane\tpLof\nnull_31\tSET2\tN-terminal\tpLof\nnull_32\tSET2\tN-terminal\tpLof\nnull_33\tSET2\tN-terminal\tmissense\nnull_34\tSET2\tN-terminal\tpLof\nnull_35\tSET2\tEGF\tmissense\nnull_36\tSET2\tN-terminal\tpLof\nnull_37\tSET2\tN-terminal\tmissense\nnull_38\tSET2\tTrans-membrane\tpLof\nnull_39\tSET2\tTrans-membrane\tpLof\nnull_40\tSET2\tTrans-membrane\tpLof\nnull_41\tSET2\tTrans-membrane\tpLof\nnull_42\tSET2\tTrans-membrane\tmissense\nnull_43\tSET2\tTrans-membrane\tmissense\nnull_44\tSET2\tTrans-membrane\tpLof\nnull_45\tSET2\tTrans-membrane\tpLof\nnull_46\tSET2\tN-terminal\tpLof\nnull_47\tSET2\tTrans-membrane\tpLof\nnull_48\tSET2\tN-terminal\tmissense\nnull_49\tSET2\tEGF\tpLof\n"
  },
  {
    "path": "example/example_3chr.bim",
    "content": "1\tmog_0\t0\t1\tD\td\n1\tmog_1\t0\t2\tD\td\n1\tmog_2\t0\t3\tD\td\n1\tmog_3\t0\t4\tD\td\n1\tmog_4\t0\t5\tD\td\n1\tmog_5\t0\t6\tD\td\n1\tmog_6\t0\t7\tD\td\n1\tmog_7\t0\t8\tD\td\n1\tmog_8\t0\t9\tD\td\n1\tmog_9\t0\t10\tD\td\n1\tmog_10\t0\t11\tD\td\n1\tmog_11\t0\t12\tD\td\n1\tmog_12\t0\t13\tD\td\n1\tmog_13\t0\t14\tD\td\n1\tmog_14\t0\t15\tD\td\n1\tmog_15\t0\t16\tD\td\n1\tmog_16\t0\t17\tD\td\n1\tmog_17\t0\t18\tD\td\n1\tmog_18\t0\t19\tD\td\n1\tmog_19\t0\t20\tD\td\n1\tmog_20\t0\t21\tD\td\n1\tmog_21\t0\t22\tD\td\n1\tmog_22\t0\t23\tD\td\n1\tmog_23\t0\t24\tD\td\n1\tmog_24\t0\t25\tD\td\n1\tmog_25\t0\t26\tD\td\n1\tmog_26\t0\t27\tD\td\n1\tmog_27\t0\t28\tD\td\n1\tmog_28\t0\t29\tD\td\n1\tmog_29\t0\t30\tD\td\n1\tmog_30\t0\t31\tD\td\n1\tmog_31\t0\t32\tD\td\n1\tmog_32\t0\t33\tD\td\n1\tmog_33\t0\t34\tD\td\n1\tmog_34\t0\t35\tD\td\n1\tmog_35\t0\t36\tD\td\n1\tmog_36\t0\t37\tD\td\n1\tmog_37\t0\t38\tD\td\n1\tmog_38\t0\t39\tD\td\n1\tmog_39\t0\t40\tD\td\n1\tmog_40\t0\t41\tD\td\n1\tmog_41\t0\t42\tD\td\n1\tmog_42\t0\t43\tD\td\n1\tmog_43\t0\t44\tD\td\n1\tmog_44\t0\t45\tD\td\n1\tmog_45\t0\t46\tD\td\n1\tmog_46\t0\t47\tD\td\n1\tmog_47\t0\t48\tD\td\n1\tmog_48\t0\t49\tD\td\n1\tmog_49\t0\t50\tD\td\n2\tinf_0\t0\t51\tD\td\n2\tinf_1\t0\t52\tD\td\n2\tinf_2\t0\t53\tD\td\n2\tinf_3\t0\t54\tD\td\n2\tinf_4\t0\t55\tD\td\n2\tinf_5\t0\t56\tD\td\n2\tinf_6\t0\t57\tD\td\n2\tinf_7\t0\t58\tD\td\n2\tinf_8\t0\t59\tD\td\n2\tinf_9\t0\t60\tD\td\n2\tinf_10\t0\t61\tD\td\n2\tinf_11\t0\t62\tD\td\n2\tinf_12\t0\t63\tD\td\n2\tinf_13\t0\t64\tD\td\n2\tinf_14\t0\t65\tD\td\n2\tinf_15\t0\t66\tD\td\n2\tinf_16\t0\t67\tD\td\n2\tinf_17\t0\t68\tD\td\n2\tinf_18\t0\t69\tD\td\n2\tinf_19\t0\t70\tD\td\n2\tinf_20\t0\t71\tD\td\n2\tinf_21\t0\t72\tD\td\n2\tinf_22\t0\t73\tD\td\n2\tinf_23\t0\t74\tD\td\n2\tinf_24\t0\t75\tD\td\n2\tinf_25\t0\t76\tD\td\n2\tinf_26\t0\t77\tD\td\n2\tinf_27\t0\t78\tD\td\n2\tinf_28\t0\t79\tD\td\n2\tinf_29\t0\t80\tD\td\n2\tinf_30\t0\t81\tD\td\n2\tinf_31\t0\t82\tD\td\n2\tinf_32\t0\t83\tD\td\n2\tinf_33\t0\t84\tD\td\n2\tinf_34\t0\t85\tD\td\n2\tinf_35\t0\t86\tD\td\n2\tinf_36\t0\t87\tD\td\n2\tinf_37\t0\t88\tD\td\n2\tinf_38\t0\t89\tD\td\n2\tinf_39\t0\t90\tD\td\n2\tinf_40\t0\t91\tD\td\n2\tinf_41\t0\t92\tD\td\n2\tinf_42\t0\t93\tD\td\n2\tinf_43\t0\t94\tD\td\n2\tinf_44\t0\t95\tD\td\n2\tinf_45\t0\t96\tD\td\n2\tinf_46\t0\t97\tD\td\n2\tinf_47\t0\t98\tD\td\n2\tinf_48\t0\t99\tD\td\n2\tinf_49\t0\t100\tD\td\n2\tinf_50\t0\t101\tD\td\n2\tinf_51\t0\t102\tD\td\n2\tinf_52\t0\t103\tD\td\n2\tinf_53\t0\t104\tD\td\n2\tinf_54\t0\t105\tD\td\n2\tinf_55\t0\t106\tD\td\n2\tinf_56\t0\t107\tD\td\n2\tinf_57\t0\t108\tD\td\n2\tinf_58\t0\t109\tD\td\n2\tinf_59\t0\t110\tD\td\n2\tinf_60\t0\t111\tD\td\n2\tinf_61\t0\t112\tD\td\n2\tinf_62\t0\t113\tD\td\n2\tinf_63\t0\t114\tD\td\n2\tinf_64\t0\t115\tD\td\n2\tinf_65\t0\t116\tD\td\n2\tinf_66\t0\t117\tD\td\n2\tinf_67\t0\t118\tD\td\n2\tinf_68\t0\t119\tD\td\n2\tinf_69\t0\t120\tD\td\n2\tinf_70\t0\t121\tD\td\n2\tinf_71\t0\t122\tD\td\n2\tinf_72\t0\t123\tD\td\n2\tinf_73\t0\t124\tD\td\n2\tinf_74\t0\t125\tD\td\n2\tinf_75\t0\t126\tD\td\n2\tinf_76\t0\t127\tD\td\n2\tinf_77\t0\t128\tD\td\n2\tinf_78\t0\t129\tD\td\n2\tinf_79\t0\t130\tD\td\n2\tinf_80\t0\t131\tD\td\n2\tinf_81\t0\t132\tD\td\n2\tinf_82\t0\t133\tD\td\n2\tinf_83\t0\t134\tD\td\n2\tinf_84\t0\t135\tD\td\n2\tinf_85\t0\t136\tD\td\n2\tinf_86\t0\t137\tD\td\n2\tinf_87\t0\t138\tD\td\n2\tinf_88\t0\t139\tD\td\n2\tinf_89\t0\t140\tD\td\n2\tinf_90\t0\t141\tD\td\n2\tinf_91\t0\t142\tD\td\n2\tinf_92\t0\t143\tD\td\n2\tinf_93\t0\t144\tD\td\n2\tinf_94\t0\t145\tD\td\n2\tinf_95\t0\t146\tD\td\n2\tinf_96\t0\t147\tD\td\n2\tinf_97\t0\t148\tD\td\n2\tinf_98\t0\t149\tD\td\n2\tinf_99\t0\t150\tD\td\n2\tinf_100\t0\t151\tD\td\n2\tinf_101\t0\t152\tD\td\n2\tinf_102\t0\t153\tD\td\n2\tinf_103\t0\t154\tD\td\n2\tinf_104\t0\t155\tD\td\n2\tinf_105\t0\t156\tD\td\n2\tinf_106\t0\t157\tD\td\n2\tinf_107\t0\t158\tD\td\n2\tinf_108\t0\t159\tD\td\n2\tinf_109\t0\t160\tD\td\n2\tinf_110\t0\t161\tD\td\n2\tinf_111\t0\t162\tD\td\n2\tinf_112\t0\t163\tD\td\n2\tinf_113\t0\t164\tD\td\n2\tinf_114\t0\t165\tD\td\n2\tinf_115\t0\t166\tD\td\n2\tinf_116\t0\t167\tD\td\n2\tinf_117\t0\t168\tD\td\n2\tinf_118\t0\t169\tD\td\n2\tinf_119\t0\t170\tD\td\n2\tinf_120\t0\t171\tD\td\n2\tinf_121\t0\t172\tD\td\n2\tinf_122\t0\t173\tD\td\n2\tinf_123\t0\t174\tD\td\n2\tinf_124\t0\t175\tD\td\n2\tinf_125\t0\t176\tD\td\n2\tinf_126\t0\t177\tD\td\n2\tinf_127\t0\t178\tD\td\n2\tinf_128\t0\t179\tD\td\n2\tinf_129\t0\t180\tD\td\n2\tinf_130\t0\t181\tD\td\n2\tinf_131\t0\t182\tD\td\n2\tinf_132\t0\t183\tD\td\n2\tinf_133\t0\t184\tD\td\n2\tinf_134\t0\t185\tD\td\n2\tinf_135\t0\t186\tD\td\n2\tinf_136\t0\t187\tD\td\n2\tinf_137\t0\t188\tD\td\n2\tinf_138\t0\t189\tD\td\n2\tinf_139\t0\t190\tD\td\n2\tinf_140\t0\t191\tD\td\n2\tinf_141\t0\t192\tD\td\n2\tinf_142\t0\t193\tD\td\n2\tinf_143\t0\t194\tD\td\n2\tinf_144\t0\t195\tD\td\n2\tinf_145\t0\t196\tD\td\n2\tinf_146\t0\t197\tD\td\n2\tinf_147\t0\t198\tD\td\n2\tinf_148\t0\t199\tD\td\n2\tinf_149\t0\t200\tD\td\n2\tinf_150\t0\t201\tD\td\n2\tinf_151\t0\t202\tD\td\n2\tinf_152\t0\t203\tD\td\n2\tinf_153\t0\t204\tD\td\n2\tinf_154\t0\t205\tD\td\n2\tinf_155\t0\t206\tD\td\n2\tinf_156\t0\t207\tD\td\n2\tinf_157\t0\t208\tD\td\n2\tinf_158\t0\t209\tD\td\n2\tinf_159\t0\t210\tD\td\n2\tinf_160\t0\t211\tD\td\n2\tinf_161\t0\t212\tD\td\n2\tinf_162\t0\t213\tD\td\n2\tinf_163\t0\t214\tD\td\n2\tinf_164\t0\t215\tD\td\n2\tinf_165\t0\t216\tD\td\n2\tinf_166\t0\t217\tD\td\n2\tinf_167\t0\t218\tD\td\n2\tinf_168\t0\t219\tD\td\n2\tinf_169\t0\t220\tD\td\n2\tinf_170\t0\t221\tD\td\n2\tinf_171\t0\t222\tD\td\n2\tinf_172\t0\t223\tD\td\n2\tinf_173\t0\t224\tD\td\n2\tinf_174\t0\t225\tD\td\n2\tinf_175\t0\t226\tD\td\n2\tinf_176\t0\t227\tD\td\n2\tinf_177\t0\t228\tD\td\n2\tinf_178\t0\t229\tD\td\n2\tinf_179\t0\t230\tD\td\n2\tinf_180\t0\t231\tD\td\n2\tinf_181\t0\t232\tD\td\n2\tinf_182\t0\t233\tD\td\n2\tinf_183\t0\t234\tD\td\n2\tinf_184\t0\t235\tD\td\n2\tinf_185\t0\t236\tD\td\n2\tinf_186\t0\t237\tD\td\n2\tinf_187\t0\t238\tD\td\n2\tinf_188\t0\t239\tD\td\n2\tinf_189\t0\t240\tD\td\n2\tinf_190\t0\t241\tD\td\n2\tinf_191\t0\t242\tD\td\n2\tinf_192\t0\t243\tD\td\n2\tinf_193\t0\t244\tD\td\n2\tinf_194\t0\t245\tD\td\n2\tinf_195\t0\t246\tD\td\n2\tinf_196\t0\t247\tD\td\n2\tinf_197\t0\t248\tD\td\n2\tinf_198\t0\t249\tD\td\n2\tinf_199\t0\t250\tD\td\n2\tinf_200\t0\t251\tD\td\n2\tinf_201\t0\t252\tD\td\n2\tinf_202\t0\t253\tD\td\n2\tinf_203\t0\t254\tD\td\n2\tinf_204\t0\t255\tD\td\n2\tinf_205\t0\t256\tD\td\n2\tinf_206\t0\t257\tD\td\n2\tinf_207\t0\t258\tD\td\n2\tinf_208\t0\t259\tD\td\n2\tinf_209\t0\t260\tD\td\n2\tinf_210\t0\t261\tD\td\n2\tinf_211\t0\t262\tD\td\n2\tinf_212\t0\t263\tD\td\n2\tinf_213\t0\t264\tD\td\n2\tinf_214\t0\t265\tD\td\n2\tinf_215\t0\t266\tD\td\n2\tinf_216\t0\t267\tD\td\n2\tinf_217\t0\t268\tD\td\n2\tinf_218\t0\t269\tD\td\n2\tinf_219\t0\t270\tD\td\n2\tinf_220\t0\t271\tD\td\n2\tinf_221\t0\t272\tD\td\n2\tinf_222\t0\t273\tD\td\n2\tinf_223\t0\t274\tD\td\n2\tinf_224\t0\t275\tD\td\n2\tinf_225\t0\t276\tD\td\n2\tinf_226\t0\t277\tD\td\n2\tinf_227\t0\t278\tD\td\n2\tinf_228\t0\t279\tD\td\n2\tinf_229\t0\t280\tD\td\n2\tinf_230\t0\t281\tD\td\n2\tinf_231\t0\t282\tD\td\n2\tinf_232\t0\t283\tD\td\n2\tinf_233\t0\t284\tD\td\n2\tinf_234\t0\t285\tD\td\n2\tinf_235\t0\t286\tD\td\n2\tinf_236\t0\t287\tD\td\n2\tinf_237\t0\t288\tD\td\n2\tinf_238\t0\t289\tD\td\n2\tinf_239\t0\t290\tD\td\n2\tinf_240\t0\t291\tD\td\n2\tinf_241\t0\t292\tD\td\n2\tinf_242\t0\t293\tD\td\n2\tinf_243\t0\t294\tD\td\n2\tinf_244\t0\t295\tD\td\n2\tinf_245\t0\t296\tD\td\n2\tinf_246\t0\t297\tD\td\n2\tinf_247\t0\t298\tD\td\n2\tinf_248\t0\t299\tD\td\n2\tinf_249\t0\t300\tD\td\n2\tinf_250\t0\t301\tD\td\n2\tinf_251\t0\t302\tD\td\n2\tinf_252\t0\t303\tD\td\n2\tinf_253\t0\t304\tD\td\n2\tinf_254\t0\t305\tD\td\n2\tinf_255\t0\t306\tD\td\n2\tinf_256\t0\t307\tD\td\n2\tinf_257\t0\t308\tD\td\n2\tinf_258\t0\t309\tD\td\n2\tinf_259\t0\t310\tD\td\n2\tinf_260\t0\t311\tD\td\n2\tinf_261\t0\t312\tD\td\n2\tinf_262\t0\t313\tD\td\n2\tinf_263\t0\t314\tD\td\n2\tinf_264\t0\t315\tD\td\n2\tinf_265\t0\t316\tD\td\n2\tinf_266\t0\t317\tD\td\n2\tinf_267\t0\t318\tD\td\n2\tinf_268\t0\t319\tD\td\n2\tinf_269\t0\t320\tD\td\n2\tinf_270\t0\t321\tD\td\n2\tinf_271\t0\t322\tD\td\n2\tinf_272\t0\t323\tD\td\n2\tinf_273\t0\t324\tD\td\n2\tinf_274\t0\t325\tD\td\n2\tinf_275\t0\t326\tD\td\n2\tinf_276\t0\t327\tD\td\n2\tinf_277\t0\t328\tD\td\n2\tinf_278\t0\t329\tD\td\n2\tinf_279\t0\t330\tD\td\n2\tinf_280\t0\t331\tD\td\n2\tinf_281\t0\t332\tD\td\n2\tinf_282\t0\t333\tD\td\n2\tinf_283\t0\t334\tD\td\n2\tinf_284\t0\t335\tD\td\n2\tinf_285\t0\t336\tD\td\n2\tinf_286\t0\t337\tD\td\n2\tinf_287\t0\t338\tD\td\n2\tinf_288\t0\t339\tD\td\n2\tinf_289\t0\t340\tD\td\n2\tinf_290\t0\t341\tD\td\n2\tinf_291\t0\t342\tD\td\n2\tinf_292\t0\t343\tD\td\n2\tinf_293\t0\t344\tD\td\n2\tinf_294\t0\t345\tD\td\n2\tinf_295\t0\t346\tD\td\n2\tinf_296\t0\t347\tD\td\n2\tinf_297\t0\t348\tD\td\n2\tinf_298\t0\t349\tD\td\n2\tinf_299\t0\t350\tD\td\n2\tinf_300\t0\t351\tD\td\n2\tinf_301\t0\t352\tD\td\n2\tinf_302\t0\t353\tD\td\n2\tinf_303\t0\t354\tD\td\n2\tinf_304\t0\t355\tD\td\n2\tinf_305\t0\t356\tD\td\n2\tinf_306\t0\t357\tD\td\n2\tinf_307\t0\t358\tD\td\n2\tinf_308\t0\t359\tD\td\n2\tinf_309\t0\t360\tD\td\n2\tinf_310\t0\t361\tD\td\n2\tinf_311\t0\t362\tD\td\n2\tinf_312\t0\t363\tD\td\n2\tinf_313\t0\t364\tD\td\n2\tinf_314\t0\t365\tD\td\n2\tinf_315\t0\t366\tD\td\n2\tinf_316\t0\t367\tD\td\n2\tinf_317\t0\t368\tD\td\n2\tinf_318\t0\t369\tD\td\n2\tinf_319\t0\t370\tD\td\n2\tinf_320\t0\t371\tD\td\n2\tinf_321\t0\t372\tD\td\n2\tinf_322\t0\t373\tD\td\n2\tinf_323\t0\t374\tD\td\n2\tinf_324\t0\t375\tD\td\n2\tinf_325\t0\t376\tD\td\n2\tinf_326\t0\t377\tD\td\n2\tinf_327\t0\t378\tD\td\n2\tinf_328\t0\t379\tD\td\n2\tinf_329\t0\t380\tD\td\n2\tinf_330\t0\t381\tD\td\n2\tinf_331\t0\t382\tD\td\n2\tinf_332\t0\t383\tD\td\n2\tinf_333\t0\t384\tD\td\n2\tinf_334\t0\t385\tD\td\n2\tinf_335\t0\t386\tD\td\n2\tinf_336\t0\t387\tD\td\n2\tinf_337\t0\t388\tD\td\n2\tinf_338\t0\t389\tD\td\n2\tinf_339\t0\t390\tD\td\n2\tinf_340\t0\t391\tD\td\n2\tinf_341\t0\t392\tD\td\n2\tinf_342\t0\t393\tD\td\n2\tinf_343\t0\t394\tD\td\n2\tinf_344\t0\t395\tD\td\n2\tinf_345\t0\t396\tD\td\n2\tinf_346\t0\t397\tD\td\n2\tinf_347\t0\t398\tD\td\n2\tinf_348\t0\t399\tD\td\n2\tinf_349\t0\t400\tD\td\n2\tinf_350\t0\t401\tD\td\n2\tinf_351\t0\t402\tD\td\n2\tinf_352\t0\t403\tD\td\n2\tinf_353\t0\t404\tD\td\n2\tinf_354\t0\t405\tD\td\n2\tinf_355\t0\t406\tD\td\n2\tinf_356\t0\t407\tD\td\n2\tinf_357\t0\t408\tD\td\n2\tinf_358\t0\t409\tD\td\n2\tinf_359\t0\t410\tD\td\n2\tinf_360\t0\t411\tD\td\n2\tinf_361\t0\t412\tD\td\n2\tinf_362\t0\t413\tD\td\n2\tinf_363\t0\t414\tD\td\n2\tinf_364\t0\t415\tD\td\n2\tinf_365\t0\t416\tD\td\n2\tinf_366\t0\t417\tD\td\n2\tinf_367\t0\t418\tD\td\n2\tinf_368\t0\t419\tD\td\n2\tinf_369\t0\t420\tD\td\n2\tinf_370\t0\t421\tD\td\n2\tinf_371\t0\t422\tD\td\n2\tinf_372\t0\t423\tD\td\n2\tinf_373\t0\t424\tD\td\n2\tinf_374\t0\t425\tD\td\n2\tinf_375\t0\t426\tD\td\n2\tinf_376\t0\t427\tD\td\n2\tinf_377\t0\t428\tD\td\n2\tinf_378\t0\t429\tD\td\n2\tinf_379\t0\t430\tD\td\n2\tinf_380\t0\t431\tD\td\n2\tinf_381\t0\t432\tD\td\n2\tinf_382\t0\t433\tD\td\n2\tinf_383\t0\t434\tD\td\n2\tinf_384\t0\t435\tD\td\n2\tinf_385\t0\t436\tD\td\n2\tinf_386\t0\t437\tD\td\n2\tinf_387\t0\t438\tD\td\n2\tinf_388\t0\t439\tD\td\n2\tinf_389\t0\t440\tD\td\n2\tinf_390\t0\t441\tD\td\n2\tinf_391\t0\t442\tD\td\n2\tinf_392\t0\t443\tD\td\n2\tinf_393\t0\t444\tD\td\n2\tinf_394\t0\t445\tD\td\n2\tinf_395\t0\t446\tD\td\n2\tinf_396\t0\t447\tD\td\n2\tinf_397\t0\t448\tD\td\n2\tinf_398\t0\t449\tD\td\n2\tinf_399\t0\t450\tD\td\n3\tnull_0\t0\t451\tD\td\n3\tnull_1\t0\t452\tD\td\n3\tnull_2\t0\t453\tD\td\n3\tnull_3\t0\t454\tD\td\n3\tnull_4\t0\t455\tD\td\n3\tnull_5\t0\t456\tD\td\n3\tnull_6\t0\t457\tD\td\n3\tnull_7\t0\t458\tD\td\n3\tnull_8\t0\t459\tD\td\n3\tnull_9\t0\t460\tD\td\n3\tnull_10\t0\t461\tD\td\n3\tnull_11\t0\t462\tD\td\n3\tnull_12\t0\t463\tD\td\n3\tnull_13\t0\t464\tD\td\n3\tnull_14\t0\t465\tD\td\n3\tnull_15\t0\t466\tD\td\n3\tnull_16\t0\t467\tD\td\n3\tnull_17\t0\t468\tD\td\n3\tnull_18\t0\t469\tD\td\n3\tnull_19\t0\t470\tD\td\n3\tnull_20\t0\t471\tD\td\n3\tnull_21\t0\t472\tD\td\n3\tnull_22\t0\t473\tD\td\n3\tnull_23\t0\t474\tD\td\n3\tnull_24\t0\t475\tD\td\n3\tnull_25\t0\t476\tD\td\n3\tnull_26\t0\t477\tD\td\n3\tnull_27\t0\t478\tD\td\n3\tnull_28\t0\t479\tD\td\n3\tnull_29\t0\t480\tD\td\n3\tnull_30\t0\t481\tD\td\n3\tnull_31\t0\t482\tD\td\n3\tnull_32\t0\t483\tD\td\n3\tnull_33\t0\t484\tD\td\n3\tnull_34\t0\t485\tD\td\n3\tnull_35\t0\t486\tD\td\n3\tnull_36\t0\t487\tD\td\n3\tnull_37\t0\t488\tD\td\n3\tnull_38\t0\t489\tD\td\n3\tnull_39\t0\t490\tD\td\n3\tnull_40\t0\t491\tD\td\n3\tnull_41\t0\t492\tD\td\n3\tnull_42\t0\t493\tD\td\n3\tnull_43\t0\t494\tD\td\n3\tnull_44\t0\t495\tD\td\n3\tnull_45\t0\t496\tD\td\n3\tnull_46\t0\t497\tD\td\n3\tnull_47\t0\t498\tD\td\n3\tnull_48\t0\t499\tD\td\n3\tnull_49\t0\t500\tD\td\n"
  },
  {
    "path": "example/example_3chr.fam",
    "content": "1\t1\t0\t0\t0\t-0.4267761781290719\n2\t2\t0\t0\t0\t0.553949147355921\n3\t3\t0\t0\t0\t-0.6501615855731889\n4\t4\t0\t0\t0\t0.6658237998926421\n5\t5\t0\t0\t0\t2.153525389108508\n6\t6\t0\t0\t0\t1.603607081080267\n7\t7\t0\t0\t0\t0.9245092282950891\n8\t8\t0\t0\t0\t0.16695520568640973\n9\t9\t0\t0\t0\t-0.44678076953973184\n10\t10\t0\t0\t0\t0.9130182655669581\n11\t11\t0\t0\t0\t-0.12365315280028831\n12\t12\t0\t0\t0\t0.45120658048742923\n13\t13\t0\t0\t0\t-1.813422352445905\n14\t14\t0\t0\t0\t0.10916042619322039\n15\t15\t0\t0\t0\t1.2606692333542373\n16\t16\t0\t0\t0\t0.5532311359350739\n17\t17\t0\t0\t0\t0.20945215196889685\n18\t18\t0\t0\t0\t-0.5132181734188253\n19\t19\t0\t0\t0\t0.6119067760087953\n20\t20\t0\t0\t0\t1.5394566797469251\n21\t21\t0\t0\t0\t-0.3865037693784083\n22\t22\t0\t0\t0\t-2.5143419880300866\n23\t23\t0\t0\t0\t0.405703045213449\n24\t24\t0\t0\t0\t0.5185603274229008\n25\t25\t0\t0\t0\t1.0611484493881813\n26\t26\t0\t0\t0\t0.19594051011058977\n27\t27\t0\t0\t0\t-0.24979581720203475\n28\t28\t0\t0\t0\t-0.2676477096598135\n29\t29\t0\t0\t0\t0.7177935231258309\n30\t30\t0\t0\t0\t1.1861246517682622\n31\t31\t0\t0\t0\t0.08629622130288517\n32\t32\t0\t0\t0\t-0.35304293360949707\n33\t33\t0\t0\t0\t0.2354296885149071\n34\t34\t0\t0\t0\t1.530452595962293\n35\t35\t0\t0\t0\t0.4579547646635299\n36\t36\t0\t0\t0\t0.4118304659618171\n37\t37\t0\t0\t0\t1.0173491056828072\n38\t38\t0\t0\t0\t-1.3848053256406825\n39\t39\t0\t0\t0\t1.7510265942816932\n40\t40\t0\t0\t0\t0.274954268508773\n41\t41\t0\t0\t0\t-1.9897038536605167\n42\t42\t0\t0\t0\t-0.2744230179428616\n43\t43\t0\t0\t0\t1.426414899954135\n44\t44\t0\t0\t0\t1.6794358446939521\n45\t45\t0\t0\t0\t-1.9762223272212731\n46\t46\t0\t0\t0\t1.31328879515592\n47\t47\t0\t0\t0\t-1.2379452021381565\n48\t48\t0\t0\t0\t-1.2230942245689198\n49\t49\t0\t0\t0\t-0.2086823379220931\n50\t50\t0\t0\t0\t0.046591344965807927\n51\t51\t0\t0\t0\t0.6454664759221705\n52\t52\t0\t0\t0\t-0.3366711458816414\n53\t53\t0\t0\t0\t-1.4251415011354847\n54\t54\t0\t0\t0\t-1.7404106776289252\n55\t55\t0\t0\t0\t-0.9899916360986122\n56\t56\t0\t0\t0\t-0.6813055796709735\n57\t57\t0\t0\t0\t1.4523360524496092\n58\t58\t0\t0\t0\t-0.14395595141327489\n59\t59\t0\t0\t0\t-0.4378362565094535\n60\t60\t0\t0\t0\t-0.1519268773269188\n61\t61\t0\t0\t0\t-1.0918707470545093\n62\t62\t0\t0\t0\t0.11886585929161704\n63\t63\t0\t0\t0\t-0.7446821129490644\n64\t64\t0\t0\t0\t-0.3515988379695028\n65\t65\t0\t0\t0\t-0.2505242457789806\n66\t66\t0\t0\t0\t-1.1445325100301038\n67\t67\t0\t0\t0\t0.4021008709651108\n68\t68\t0\t0\t0\t-0.26967294800034874\n69\t69\t0\t0\t0\t-1.071043452154829\n70\t70\t0\t0\t0\t0.5466104498110623\n71\t71\t0\t0\t0\t0.9059888912622454\n72\t72\t0\t0\t0\t-0.9302463996121689\n73\t73\t0\t0\t0\t-0.1649348560132909\n74\t74\t0\t0\t0\t-0.2592977286319428\n75\t75\t0\t0\t0\t-2.114476477667718\n76\t76\t0\t0\t0\t-0.6076341029698831\n77\t77\t0\t0\t0\t0.7748006221315341\n78\t78\t0\t0\t0\t-2.0962224712289843\n79\t79\t0\t0\t0\t-1.9116579659512944\n80\t80\t0\t0\t0\t1.0353397078763547\n81\t81\t0\t0\t0\t0.7294502561042351\n82\t82\t0\t0\t0\t-0.09793421592240978\n83\t83\t0\t0\t0\t0.24566617144478756\n84\t84\t0\t0\t0\t0.9386731103091898\n85\t85\t0\t0\t0\t1.7337564931524567\n86\t86\t0\t0\t0\t3.5965463607888486\n87\t87\t0\t0\t0\t0.10417950242086334\n88\t88\t0\t0\t0\t0.8181457381771525\n89\t89\t0\t0\t0\t-0.4717474685706561\n90\t90\t0\t0\t0\t1.3263850865010316\n91\t91\t0\t0\t0\t-0.616465031940274\n92\t92\t0\t0\t0\t-1.1963740018230864\n93\t93\t0\t0\t0\t-2.523429651265045\n94\t94\t0\t0\t0\t-1.6775958547520566\n95\t95\t0\t0\t0\t-1.325183339632047\n96\t96\t0\t0\t0\t0.5882935940652537\n97\t97\t0\t0\t0\t1.2377386081497168\n98\t98\t0\t0\t0\t-0.2927710750283572\n99\t99\t0\t0\t0\t2.2130683684766117\n100\t100\t0\t0\t0\t1.3907852235803377\n101\t101\t0\t0\t0\t0.3489648675330413\n102\t102\t0\t0\t0\t-0.17965231137324936\n103\t103\t0\t0\t0\t-1.714950186712027\n104\t104\t0\t0\t0\t-0.3606425914729865\n105\t105\t0\t0\t0\t0.2558071082994665\n106\t106\t0\t0\t0\t0.7848503716979157\n107\t107\t0\t0\t0\t2.505180527543909\n108\t108\t0\t0\t0\t-1.8168385585899676\n109\t109\t0\t0\t0\t-0.01565688433622611\n110\t110\t0\t0\t0\t0.7040291497308593\n111\t111\t0\t0\t0\t-0.011386617718583729\n112\t112\t0\t0\t0\t-0.6404979013481404\n113\t113\t0\t0\t0\t2.0784381335821682\n114\t114\t0\t0\t0\t-0.2254928329575596\n115\t115\t0\t0\t0\t0.0021879911560499826\n116\t116\t0\t0\t0\t0.4530758437690287\n117\t117\t0\t0\t0\t0.7111556836519528\n118\t118\t0\t0\t0\t-7.926457716849677e-5\n119\t119\t0\t0\t0\t1.2716437121248285\n120\t120\t0\t0\t0\t0.13091641539525625\n121\t121\t0\t0\t0\t0.9601642531802883\n122\t122\t0\t0\t0\t-0.6946269492816117\n123\t123\t0\t0\t0\t1.2083545960448898\n124\t124\t0\t0\t0\t-0.5558287669378148\n125\t125\t0\t0\t0\t0.40869932288799415\n126\t126\t0\t0\t0\t0.8632834147663099\n127\t127\t0\t0\t0\t0.8642022294643745\n128\t128\t0\t0\t0\t-0.45151257769571096\n129\t129\t0\t0\t0\t-0.1914051114382599\n130\t130\t0\t0\t0\t-0.037649137239090524\n131\t131\t0\t0\t0\t1.069953907594907\n132\t132\t0\t0\t0\t0.7332632139169448\n133\t133\t0\t0\t0\t-1.3859563214470878\n134\t134\t0\t0\t0\t-0.16882598316337785\n135\t135\t0\t0\t0\t1.2341578254519214\n136\t136\t0\t0\t0\t-0.13018118909812346\n137\t137\t0\t0\t0\t-0.849622061183973\n138\t138\t0\t0\t0\t1.65558791310931\n139\t139\t0\t0\t0\t-0.8557488969973114\n140\t140\t0\t0\t0\t-0.591996102804245\n141\t141\t0\t0\t0\t1.4766090213622798\n142\t142\t0\t0\t0\t-1.7606788880705646\n143\t143\t0\t0\t0\t0.3331349454598871\n144\t144\t0\t0\t0\t-1.316796818445614\n145\t145\t0\t0\t0\t2.2658417254598926\n146\t146\t0\t0\t0\t-0.5855466346375702\n147\t147\t0\t0\t0\t1.004242318973581\n148\t148\t0\t0\t0\t0.5950634513012234\n149\t149\t0\t0\t0\t-2.3589818092376342\n150\t150\t0\t0\t0\t1.020059747279963\n151\t151\t0\t0\t0\t-1.2578575928423548\n152\t152\t0\t0\t0\t-0.09700531448496873\n153\t153\t0\t0\t0\t0.33638529718875043\n154\t154\t0\t0\t0\t1.7972793724047749\n155\t155\t0\t0\t0\t0.27013487552553606\n156\t156\t0\t0\t0\t-0.3808633841949227\n157\t157\t0\t0\t0\t0.04018926790384476\n158\t158\t0\t0\t0\t-0.3965127185254764\n159\t159\t0\t0\t0\t1.4936455086379186\n160\t160\t0\t0\t0\t-1.2993293254003835\n161\t161\t0\t0\t0\t0.2519277543876505\n162\t162\t0\t0\t0\t0.691774608729148\n163\t163\t0\t0\t0\t0.7407513064839535\n164\t164\t0\t0\t0\t-0.31056687191510746\n165\t165\t0\t0\t0\t-0.5552597332076762\n166\t166\t0\t0\t0\t-1.0057835319597124\n167\t167\t0\t0\t0\t0.05450686156271301\n168\t168\t0\t0\t0\t-0.6349282429298715\n169\t169\t0\t0\t0\t-0.6695465895569117\n170\t170\t0\t0\t0\t-0.23226063332510133\n171\t171\t0\t0\t0\t0.7035825230515401\n172\t172\t0\t0\t0\t-0.31327835211735766\n173\t173\t0\t0\t0\t-0.24917314601702809\n174\t174\t0\t0\t0\t-0.6592080848536208\n175\t175\t0\t0\t0\t-0.9477879953319399\n176\t176\t0\t0\t0\t0.48009125370245204\n177\t177\t0\t0\t0\t0.4706100983663504\n178\t178\t0\t0\t0\t-0.06505632309350708\n179\t179\t0\t0\t0\t-1.4959269095746512\n180\t180\t0\t0\t0\t0.5405485678497597\n181\t181\t0\t0\t0\t1.1232696382170495\n182\t182\t0\t0\t0\t0.0074418075185122565\n183\t183\t0\t0\t0\t0.6639986494873168\n184\t184\t0\t0\t0\t-0.5678566487451827\n185\t185\t0\t0\t0\t0.2299383937915558\n186\t186\t0\t0\t0\t-0.17076218909155186\n187\t187\t0\t0\t0\t0.3420901284971981\n188\t188\t0\t0\t0\t-0.10950363726652125\n189\t189\t0\t0\t0\t-0.06808902942736765\n190\t190\t0\t0\t0\t-2.3144897119335965\n191\t191\t0\t0\t0\t-1.0777725703353438\n192\t192\t0\t0\t0\t-0.3394034459756072\n193\t193\t0\t0\t0\t-1.5321020124626434\n194\t194\t0\t0\t0\t1.277838035763474\n195\t195\t0\t0\t0\t-0.6186969481783715\n196\t196\t0\t0\t0\t0.24483826998859312\n197\t197\t0\t0\t0\t-0.08972472068807524\n198\t198\t0\t0\t0\t0.28563820059484707\n199\t199\t0\t0\t0\t0.02665228610242174\n200\t200\t0\t0\t0\t-0.8558456209545621\n201\t201\t0\t0\t0\t0.10326780109110945\n202\t202\t0\t0\t0\t-0.4683803455991253\n203\t203\t0\t0\t0\t-0.039629458692737506\n204\t204\t0\t0\t0\t0.5010288555718972\n205\t205\t0\t0\t0\t-1.614633788145904\n206\t206\t0\t0\t0\t-0.8221081713776588\n207\t207\t0\t0\t0\t-0.4154327362321607\n208\t208\t0\t0\t0\t0.19255584244438567\n209\t209\t0\t0\t0\t-0.013616312773019643\n210\t210\t0\t0\t0\t-0.475529391106999\n211\t211\t0\t0\t0\t0.10232966150402548\n212\t212\t0\t0\t0\t0.7488940724700807\n213\t213\t0\t0\t0\t0.44197886128432695\n214\t214\t0\t0\t0\t1.018806470717939\n215\t215\t0\t0\t0\t1.187231623861802\n216\t216\t0\t0\t0\t0.8022976145656721\n217\t217\t0\t0\t0\t-0.4774086494408861\n218\t218\t0\t0\t0\t-0.14197769264202192\n219\t219\t0\t0\t0\t-0.1747777783307673\n220\t220\t0\t0\t0\t1.0313282851738685\n221\t221\t0\t0\t0\t0.09414395331571555\n222\t222\t0\t0\t0\t0.07864110999174483\n223\t223\t0\t0\t0\t-0.5611965282485419\n224\t224\t0\t0\t0\t-0.6176835872308691\n225\t225\t0\t0\t0\t-0.624780748917754\n226\t226\t0\t0\t0\t0.7262504583960367\n227\t227\t0\t0\t0\t0.6736681336462624\n228\t228\t0\t0\t0\t1.2074649724277058\n229\t229\t0\t0\t0\t-0.7000910464946777\n230\t230\t0\t0\t0\t1.0132498153713987\n231\t231\t0\t0\t0\t0.1086125064282297\n232\t232\t0\t0\t0\t1.5378432998509857\n233\t233\t0\t0\t0\t-0.38864483884544576\n234\t234\t0\t0\t0\t0.5853901243650999\n235\t235\t0\t0\t0\t0.6361493273442497\n236\t236\t0\t0\t0\t-2.3727188226249276\n237\t237\t0\t0\t0\t-0.1546518851418047\n238\t238\t0\t0\t0\t-0.4535321516545124\n239\t239\t0\t0\t0\t0.20059745542046717\n240\t240\t0\t0\t0\t1.6700071185481635\n241\t241\t0\t0\t0\t1.0303745125812633\n242\t242\t0\t0\t0\t-0.2080769141106961\n243\t243\t0\t0\t0\t0.1640491851783853\n244\t244\t0\t0\t0\t-0.28813258919928886\n245\t245\t0\t0\t0\t0.5230500717020431\n246\t246\t0\t0\t0\t0.3072979372247395\n247\t247\t0\t0\t0\t0.11651870056658613\n248\t248\t0\t0\t0\t-0.0020664157587947755\n249\t249\t0\t0\t0\t0.6554963064376974\n250\t250\t0\t0\t0\t-1.2541501943392634\n251\t251\t0\t0\t0\t0.0072657109833799294\n252\t252\t0\t0\t0\t2.6072042439689764\n253\t253\t0\t0\t0\t0.7873316028203755\n254\t254\t0\t0\t0\t-0.5739394940501295\n255\t255\t0\t0\t0\t0.4301393873631534\n256\t256\t0\t0\t0\t0.4980033880636772\n257\t257\t0\t0\t0\t-0.472708427483925\n258\t258\t0\t0\t0\t-2.115773568613388\n259\t259\t0\t0\t0\t-0.3027911974132581\n260\t260\t0\t0\t0\t0.1357209008468077\n261\t261\t0\t0\t0\t0.7462827401513036\n262\t262\t0\t0\t0\t1.0510433172779923\n263\t263\t0\t0\t0\t-0.9143361188838738\n264\t264\t0\t0\t0\t-1.980776740834876\n265\t265\t0\t0\t0\t0.5590372219564149\n266\t266\t0\t0\t0\t-1.4215222131613703\n267\t267\t0\t0\t0\t0.9366889840084335\n268\t268\t0\t0\t0\t0.38076689746763476\n269\t269\t0\t0\t0\t0.9453281558278972\n270\t270\t0\t0\t0\t0.057035845832356935\n271\t271\t0\t0\t0\t1.4745050564310078\n272\t272\t0\t0\t0\t-0.5174737513107168\n273\t273\t0\t0\t0\t-1.5571441883774875\n274\t274\t0\t0\t0\t-1.0887963283029611\n275\t275\t0\t0\t0\t1.2310204712452886\n276\t276\t0\t0\t0\t-0.4769402620727404\n277\t277\t0\t0\t0\t-0.22419481188150195\n278\t278\t0\t0\t0\t-0.5375714580159776\n279\t279\t0\t0\t0\t0.019698038241142538\n280\t280\t0\t0\t0\t1.1825833509904307\n281\t281\t0\t0\t0\t-0.7811541586520457\n282\t282\t0\t0\t0\t-0.4601117935870594\n283\t283\t0\t0\t0\t0.7033250873409186\n284\t284\t0\t0\t0\t-0.8409243958041983\n285\t285\t0\t0\t0\t0.5573349116266615\n286\t286\t0\t0\t0\t-1.6278769464058696\n287\t287\t0\t0\t0\t1.6583911801881541\n288\t288\t0\t0\t0\t-1.212032621830397\n289\t289\t0\t0\t0\t-0.9721467722081951\n290\t290\t0\t0\t0\t-0.5965740298455487\n291\t291\t0\t0\t0\t-1.2351363670664182\n292\t292\t0\t0\t0\t1.641895301641597\n293\t293\t0\t0\t0\t-0.42141703006240455\n294\t294\t0\t0\t0\t0.38753100560495457\n295\t295\t0\t0\t0\t-1.070382200424481\n296\t296\t0\t0\t0\t-0.517780452691308\n297\t297\t0\t0\t0\t1.1565871160947803\n298\t298\t0\t0\t0\t-0.6679020556063455\n299\t299\t0\t0\t0\t-0.970542580114166\n300\t300\t0\t0\t0\t-0.8655455001063305\n301\t301\t0\t0\t0\t-1.0789380083099478\n302\t302\t0\t0\t0\t-1.2440796197575608\n303\t303\t0\t0\t0\t-1.0446790730803917\n304\t304\t0\t0\t0\t-0.5695802535356469\n305\t305\t0\t0\t0\t-0.8473514861984687\n306\t306\t0\t0\t0\t-1.190100919435714\n307\t307\t0\t0\t0\t1.022380976723825\n308\t308\t0\t0\t0\t-1.9790492535631858\n309\t309\t0\t0\t0\t-1.5253493737129327\n310\t310\t0\t0\t0\t1.0894028037803278\n311\t311\t0\t0\t0\t0.6962558464296542\n312\t312\t0\t0\t0\t0.9874048999158505\n313\t313\t0\t0\t0\t0.11245625794433396\n314\t314\t0\t0\t0\t-0.050027987070537086\n315\t315\t0\t0\t0\t-0.8717433837106624\n316\t316\t0\t0\t0\t0.8146794446207906\n317\t317\t0\t0\t0\t-0.5526641897761696\n318\t318\t0\t0\t0\t1.2573937760035447\n319\t319\t0\t0\t0\t-1.2648304446149456\n320\t320\t0\t0\t0\t1.6530274769204985\n321\t321\t0\t0\t0\t1.30110161745003\n322\t322\t0\t0\t0\t-0.02018948002691863\n323\t323\t0\t0\t0\t-1.8836740231429716\n324\t324\t0\t0\t0\t1.4205846188601483\n325\t325\t0\t0\t0\t-0.689172153743396\n326\t326\t0\t0\t0\t-0.3044880253487302\n327\t327\t0\t0\t0\t-0.08520417701732463\n328\t328\t0\t0\t0\t2.851634453916995\n329\t329\t0\t0\t0\t-1.156810978360592\n330\t330\t0\t0\t0\t0.2825803847207173\n331\t331\t0\t0\t0\t-1.9500708808705018\n332\t332\t0\t0\t0\t-1.1752612835403988\n333\t333\t0\t0\t0\t-1.3324335054916632\n334\t334\t0\t0\t0\t0.706410116238176\n335\t335\t0\t0\t0\t-0.4389661673885548\n336\t336\t0\t0\t0\t-0.47198568412366393\n337\t337\t0\t0\t0\t-0.196005627932633\n338\t338\t0\t0\t0\t-0.3926918515697784\n339\t339\t0\t0\t0\t-1.2586337442612614\n340\t340\t0\t0\t0\t-0.28572183266312384\n341\t341\t0\t0\t0\t-0.821333467643559\n342\t342\t0\t0\t0\t1.0406689409301526\n343\t343\t0\t0\t0\t-0.38669408702632374\n344\t344\t0\t0\t0\t-1.7090986597057412\n345\t345\t0\t0\t0\t0.567446046270448\n346\t346\t0\t0\t0\t0.01714990950414205\n347\t347\t0\t0\t0\t0.0977745741751222\n348\t348\t0\t0\t0\t0.822230557498965\n349\t349\t0\t0\t0\t0.3963812215631262\n350\t350\t0\t0\t0\t-1.4985341158085754\n351\t351\t0\t0\t0\t-0.029960047096039638\n352\t352\t0\t0\t0\t-0.6528662364260229\n353\t353\t0\t0\t0\t-0.12170910991193998\n354\t354\t0\t0\t0\t1.0300254802129547\n355\t355\t0\t0\t0\t-0.16690139482289537\n356\t356\t0\t0\t0\t0.8377731253742943\n357\t357\t0\t0\t0\t-0.3887229530927819\n358\t358\t0\t0\t0\t0.14451356330621065\n359\t359\t0\t0\t0\t1.6073233297076983\n360\t360\t0\t0\t0\t0.07950441212552996\n361\t361\t0\t0\t0\t2.1497157962418174\n362\t362\t0\t0\t0\t-0.30427052777531594\n363\t363\t0\t0\t0\t-0.3025923417388714\n364\t364\t0\t0\t0\t-0.4397850120995375\n365\t365\t0\t0\t0\t0.45039620392874236\n366\t366\t0\t0\t0\t-0.8475714740581328\n367\t367\t0\t0\t0\t0.49438582682424076\n368\t368\t0\t0\t0\t1.7877305550184646\n369\t369\t0\t0\t0\t1.4928728753892113\n370\t370\t0\t0\t0\t1.0028215376206\n371\t371\t0\t0\t0\t-1.579481582437942\n372\t372\t0\t0\t0\t-1.4112071973049491\n373\t373\t0\t0\t0\t-0.3287910850620258\n374\t374\t0\t0\t0\t0.6335258906599922\n375\t375\t0\t0\t0\t1.5884402065569267\n376\t376\t0\t0\t0\t0.239439976093505\n377\t377\t0\t0\t0\t0.3730542303129533\n378\t378\t0\t0\t0\t-0.23125572369556183\n379\t379\t0\t0\t0\t0.04114817676435756\n380\t380\t0\t0\t0\t1.3148209110260505\n381\t381\t0\t0\t0\t0.4153394063226016\n382\t382\t0\t0\t0\t-2.119037113120731\n383\t383\t0\t0\t0\t-0.7357404438785312\n384\t384\t0\t0\t0\t-0.5458519940698457\n385\t385\t0\t0\t0\t-1.2600086040654477\n386\t386\t0\t0\t0\t0.5429663518299074\n387\t387\t0\t0\t0\t-0.24886613651789177\n388\t388\t0\t0\t0\t1.393586775779869\n389\t389\t0\t0\t0\t0.7655351504255072\n390\t390\t0\t0\t0\t1.574341948844662\n391\t391\t0\t0\t0\t0.28337819436221234\n392\t392\t0\t0\t0\t-0.28215538210870866\n393\t393\t0\t0\t0\t0.32677930042765563\n394\t394\t0\t0\t0\t-0.8933433333160953\n395\t395\t0\t0\t0\t-0.30611223649551184\n396\t396\t0\t0\t0\t-1.6327825926249717\n397\t397\t0\t0\t0\t0.983996289771191\n398\t398\t0\t0\t0\t-2.1094276028051713\n399\t399\t0\t0\t0\t-0.03004231237784647\n400\t400\t0\t0\t0\t0.2677517204111873\n401\t401\t0\t0\t0\t0.024441125372044015\n402\t402\t0\t0\t0\t-0.5518207938279379\n403\t403\t0\t0\t0\t-0.012559704828846413\n404\t404\t0\t0\t0\t0.7819900863897892\n405\t405\t0\t0\t0\t0.1331898731839282\n406\t406\t0\t0\t0\t0.48489696095614787\n407\t407\t0\t0\t0\t0.6642443953751778\n408\t408\t0\t0\t0\t-0.8030871260696271\n409\t409\t0\t0\t0\t-0.1895839551659786\n410\t410\t0\t0\t0\t-0.8034926266352286\n411\t411\t0\t0\t0\t0.09187570772040049\n412\t412\t0\t0\t0\t0.029248814008252747\n413\t413\t0\t0\t0\t-0.01533630905590288\n414\t414\t0\t0\t0\t1.1280827301911354\n415\t415\t0\t0\t0\t-0.2065160921398899\n416\t416\t0\t0\t0\t-0.05126145885052354\n417\t417\t0\t0\t0\t-1.3434267805209248\n418\t418\t0\t0\t0\t-0.18518422658884665\n419\t419\t0\t0\t0\t-0.24844114482463325\n420\t420\t0\t0\t0\t-0.06967600698783584\n421\t421\t0\t0\t0\t0.37664675241912393\n422\t422\t0\t0\t0\t-0.2569614554903415\n423\t423\t0\t0\t0\t0.7233028036905063\n424\t424\t0\t0\t0\t1.1103498482848735\n425\t425\t0\t0\t0\t-0.11945647973589903\n426\t426\t0\t0\t0\t-0.6396046050764921\n427\t427\t0\t0\t0\t0.5643452560242491\n428\t428\t0\t0\t0\t-0.3440947789770541\n429\t429\t0\t0\t0\t-2.22653169151592\n430\t430\t0\t0\t0\t-0.956032968271431\n431\t431\t0\t0\t0\t-0.8191888326171307\n432\t432\t0\t0\t0\t-2.4494492803640022\n433\t433\t0\t0\t0\t-0.0670051947640402\n434\t434\t0\t0\t0\t-1.0212450480574558\n435\t435\t0\t0\t0\t1.5642134356006847\n436\t436\t0\t0\t0\t-0.7686850358271917\n437\t437\t0\t0\t0\t0.9013174516851364\n438\t438\t0\t0\t0\t0.32615832192874417\n439\t439\t0\t0\t0\t-0.1396620902625725\n440\t440\t0\t0\t0\t0.7175824082953346\n441\t441\t0\t0\t0\t-1.298832108780333\n442\t442\t0\t0\t0\t-0.6380248204283618\n443\t443\t0\t0\t0\t1.3777981524996168\n444\t444\t0\t0\t0\t1.650381986618911\n445\t445\t0\t0\t0\t0.5867431709972836\n446\t446\t0\t0\t0\t-0.005896067871319781\n447\t447\t0\t0\t0\t0.13319016302360676\n448\t448\t0\t0\t0\t-1.2696437250217552\n449\t449\t0\t0\t0\t-0.12518065096985312\n450\t450\t0\t0\t0\t-1.244542673659207\n451\t451\t0\t0\t0\t-0.06610093209244285\n452\t452\t0\t0\t0\t2.5740374683027536\n453\t453\t0\t0\t0\t0.9289638283088296\n454\t454\t0\t0\t0\t0.5106401356154838\n455\t455\t0\t0\t0\t-1.3219451447015316\n456\t456\t0\t0\t0\t1.3056699098740336\n457\t457\t0\t0\t0\t-0.7429788972407904\n458\t458\t0\t0\t0\t-0.5935982739213578\n459\t459\t0\t0\t0\t-1.2444771294790702\n460\t460\t0\t0\t0\t-0.11027752291961805\n461\t461\t0\t0\t0\t0.15968337228108687\n462\t462\t0\t0\t0\t0.10015721622156738\n463\t463\t0\t0\t0\t-1.6712997326777923\n464\t464\t0\t0\t0\t1.1224222088370677\n465\t465\t0\t0\t0\t-0.8848696262558821\n466\t466\t0\t0\t0\t-0.43388376934423967\n467\t467\t0\t0\t0\t0.2330906457766794\n468\t468\t0\t0\t0\t-0.06850210820133934\n469\t469\t0\t0\t0\t-0.43079920208533806\n470\t470\t0\t0\t0\t0.6801247297121218\n471\t471\t0\t0\t0\t0.7062942755405903\n472\t472\t0\t0\t0\t-1.0125163790266498\n473\t473\t0\t0\t0\t1.1402191818385525\n474\t474\t0\t0\t0\t-0.09512756382895157\n475\t475\t0\t0\t0\t1.2043113720050476\n476\t476\t0\t0\t0\t-1.3101439365860013\n477\t477\t0\t0\t0\t-1.010236868117858\n478\t478\t0\t0\t0\t0.8681701631563481\n479\t479\t0\t0\t0\t-1.1399558386575048\n480\t480\t0\t0\t0\t0.2205328932586634\n481\t481\t0\t0\t0\t-0.5859626268400073\n482\t482\t0\t0\t0\t0.9365883769687702\n483\t483\t0\t0\t0\t-1.589591178874074\n484\t484\t0\t0\t0\t0.1145164389832793\n485\t485\t0\t0\t0\t0.275658677194189\n486\t486\t0\t0\t0\t1.8238148121727578\n487\t487\t0\t0\t0\t0.9195632859199347\n488\t488\t0\t0\t0\t1.1992281082636649\n489\t489\t0\t0\t0\t-0.28717216124743894\n490\t490\t0\t0\t0\t-0.1255644094578015\n491\t491\t0\t0\t0\t0.7660599548056037\n492\t492\t0\t0\t0\t0.0485016949386254\n493\t493\t0\t0\t0\t0.9191104275425048\n494\t494\t0\t0\t0\t-0.07849111590769194\n495\t495\t0\t0\t0\t-1.602274500274993\n496\t496\t0\t0\t0\t-2.016184028763443\n497\t497\t0\t0\t0\t1.3436149731702158\n498\t498\t0\t0\t0\t-1.8171186074532928\n499\t499\t0\t0\t0\t-0.46317733040012826\n500\t500\t0\t0\t0\t-0.9433895891176977\n"
  },
  {
    "path": "example/example_3chr.joint_setlist",
    "content": "SET1 1 0 mog_19,mog_20,mog_21,mog_22,mog_23,mog_24,mog_25,mog_26,mog_27,mog_28,mog_29,mog_30,mog_31,mog_32,mog_33,mog_34\nSET2 3 400 null_0 null_1 null_2 null_3 null_4\n"
  },
  {
    "path": "example/example_3chr.masks",
    "content": "M1\tpLof\nM2\tpLof,missense,unknown1\nM3\tunknown1,unknown2\n"
  },
  {
    "path": "example/example_3chr.sample",
    "content": "ID_1 ID_2 missing sex PHENO1\n0 0 0 D P\n1 1 0 NA -0.426776\n2 2 0 NA 0.553949\n3 3 0 NA -0.650162\n4 4 0 NA 0.665824\n5 5 0 NA 2.15353\n6 6 0 NA 1.60361\n7 7 0 NA 0.924509\n8 8 0 NA 0.166955\n9 9 0 NA -0.446781\n10 10 0 NA 0.913018\n11 11 0 NA -0.123653\n12 12 0 NA 0.451207\n13 13 0 NA -1.81342\n14 14 0 NA 0.10916\n15 15 0 NA 1.26067\n16 16 0 NA 0.553231\n17 17 0 NA 0.209452\n18 18 0 NA -0.513218\n19 19 0 NA 0.611907\n20 20 0 NA 1.53946\n21 21 0 NA -0.386504\n22 22 0 NA -2.51434\n23 23 0 NA 0.405703\n24 24 0 NA 0.51856\n25 25 0 NA 1.06115\n26 26 0 NA 0.195941\n27 27 0 NA -0.249796\n28 28 0 NA -0.267648\n29 29 0 NA 0.717794\n30 30 0 NA 1.18612\n31 31 0 NA 0.0862962\n32 32 0 NA -0.353043\n33 33 0 NA 0.23543\n34 34 0 NA 1.53045\n35 35 0 NA 0.457955\n36 36 0 NA 0.41183\n37 37 0 NA 1.01735\n38 38 0 NA -1.38481\n39 39 0 NA 1.75103\n40 40 0 NA 0.274954\n41 41 0 NA -1.9897\n42 42 0 NA -0.274423\n43 43 0 NA 1.42641\n44 44 0 NA 1.67944\n45 45 0 NA -1.97622\n46 46 0 NA 1.31329\n47 47 0 NA -1.23795\n48 48 0 NA -1.22309\n49 49 0 NA -0.208682\n50 50 0 NA 0.0465913\n51 51 0 NA 0.645466\n52 52 0 NA -0.336671\n53 53 0 NA -1.42514\n54 54 0 NA -1.74041\n55 55 0 NA -0.989992\n56 56 0 NA -0.681306\n57 57 0 NA 1.45234\n58 58 0 NA -0.143956\n59 59 0 NA -0.437836\n60 60 0 NA -0.151927\n61 61 0 NA -1.09187\n62 62 0 NA 0.118866\n63 63 0 NA -0.744682\n64 64 0 NA -0.351599\n65 65 0 NA -0.250524\n66 66 0 NA -1.14453\n67 67 0 NA 0.402101\n68 68 0 NA -0.269673\n69 69 0 NA -1.07104\n70 70 0 NA 0.54661\n71 71 0 NA 0.905989\n72 72 0 NA -0.930246\n73 73 0 NA -0.164935\n74 74 0 NA -0.259298\n75 75 0 NA -2.11448\n76 76 0 NA -0.607634\n77 77 0 NA 0.774801\n78 78 0 NA -2.09622\n79 79 0 NA -1.91166\n80 80 0 NA 1.03534\n81 81 0 NA 0.72945\n82 82 0 NA -0.0979342\n83 83 0 NA 0.245666\n84 84 0 NA 0.938673\n85 85 0 NA 1.73376\n86 86 0 NA 3.59655\n87 87 0 NA 0.10418\n88 88 0 NA 0.818146\n89 89 0 NA -0.471747\n90 90 0 NA 1.32639\n91 91 0 NA -0.616465\n92 92 0 NA -1.19637\n93 93 0 NA -2.52343\n94 94 0 NA -1.6776\n95 95 0 NA -1.32518\n96 96 0 NA 0.588294\n97 97 0 NA 1.23774\n98 98 0 NA -0.292771\n99 99 0 NA 2.21307\n100 100 0 NA 1.39079\n101 101 0 NA 0.348965\n102 102 0 NA -0.179652\n103 103 0 NA -1.71495\n104 104 0 NA -0.360643\n105 105 0 NA 0.255807\n106 106 0 NA 0.78485\n107 107 0 NA 2.50518\n108 108 0 NA -1.81684\n109 109 0 NA -0.0156569\n110 110 0 NA 0.704029\n111 111 0 NA -0.0113866\n112 112 0 NA -0.640498\n113 113 0 NA 2.07844\n114 114 0 NA -0.225493\n115 115 0 NA 0.00218799\n116 116 0 NA 0.453076\n117 117 0 NA 0.711156\n118 118 0 NA -7.92646e-05\n119 119 0 NA 1.27164\n120 120 0 NA 0.130916\n121 121 0 NA 0.960164\n122 122 0 NA -0.694627\n123 123 0 NA 1.20835\n124 124 0 NA -0.555829\n125 125 0 NA 0.408699\n126 126 0 NA 0.863283\n127 127 0 NA 0.864202\n128 128 0 NA -0.451513\n129 129 0 NA -0.191405\n130 130 0 NA -0.0376491\n131 131 0 NA 1.06995\n132 132 0 NA 0.733263\n133 133 0 NA -1.38596\n134 134 0 NA -0.168826\n135 135 0 NA 1.23416\n136 136 0 NA -0.130181\n137 137 0 NA -0.849622\n138 138 0 NA 1.65559\n139 139 0 NA -0.855749\n140 140 0 NA -0.591996\n141 141 0 NA 1.47661\n142 142 0 NA -1.76068\n143 143 0 NA 0.333135\n144 144 0 NA -1.3168\n145 145 0 NA 2.26584\n146 146 0 NA -0.585547\n147 147 0 NA 1.00424\n148 148 0 NA 0.595063\n149 149 0 NA -2.35898\n150 150 0 NA 1.02006\n151 151 0 NA -1.25786\n152 152 0 NA -0.0970053\n153 153 0 NA 0.336385\n154 154 0 NA 1.79728\n155 155 0 NA 0.270135\n156 156 0 NA -0.380863\n157 157 0 NA 0.0401893\n158 158 0 NA -0.396513\n159 159 0 NA 1.49365\n160 160 0 NA -1.29933\n161 161 0 NA 0.251928\n162 162 0 NA 0.691775\n163 163 0 NA 0.740751\n164 164 0 NA -0.310567\n165 165 0 NA -0.55526\n166 166 0 NA -1.00578\n167 167 0 NA 0.0545069\n168 168 0 NA -0.634928\n169 169 0 NA -0.669547\n170 170 0 NA -0.232261\n171 171 0 NA 0.703583\n172 172 0 NA -0.313278\n173 173 0 NA -0.249173\n174 174 0 NA -0.659208\n175 175 0 NA -0.947788\n176 176 0 NA 0.480091\n177 177 0 NA 0.47061\n178 178 0 NA -0.0650563\n179 179 0 NA -1.49593\n180 180 0 NA 0.540549\n181 181 0 NA 1.12327\n182 182 0 NA 0.00744181\n183 183 0 NA 0.663999\n184 184 0 NA -0.567857\n185 185 0 NA 0.229938\n186 186 0 NA -0.170762\n187 187 0 NA 0.34209\n188 188 0 NA -0.109504\n189 189 0 NA -0.068089\n190 190 0 NA -2.31449\n191 191 0 NA -1.07777\n192 192 0 NA -0.339403\n193 193 0 NA -1.5321\n194 194 0 NA 1.27784\n195 195 0 NA -0.618697\n196 196 0 NA 0.244838\n197 197 0 NA -0.0897247\n198 198 0 NA 0.285638\n199 199 0 NA 0.0266523\n200 200 0 NA -0.855846\n201 201 0 NA 0.103268\n202 202 0 NA -0.46838\n203 203 0 NA -0.0396295\n204 204 0 NA 0.501029\n205 205 0 NA -1.61463\n206 206 0 NA -0.822108\n207 207 0 NA -0.415433\n208 208 0 NA 0.192556\n209 209 0 NA -0.0136163\n210 210 0 NA -0.475529\n211 211 0 NA 0.10233\n212 212 0 NA 0.748894\n213 213 0 NA 0.441979\n214 214 0 NA 1.01881\n215 215 0 NA 1.18723\n216 216 0 NA 0.802298\n217 217 0 NA -0.477409\n218 218 0 NA -0.141978\n219 219 0 NA -0.174778\n220 220 0 NA 1.03133\n221 221 0 NA 0.094144\n222 222 0 NA 0.0786411\n223 223 0 NA -0.561197\n224 224 0 NA -0.617684\n225 225 0 NA -0.624781\n226 226 0 NA 0.72625\n227 227 0 NA 0.673668\n228 228 0 NA 1.20746\n229 229 0 NA -0.700091\n230 230 0 NA 1.01325\n231 231 0 NA 0.108613\n232 232 0 NA 1.53784\n233 233 0 NA -0.388645\n234 234 0 NA 0.58539\n235 235 0 NA 0.636149\n236 236 0 NA -2.37272\n237 237 0 NA -0.154652\n238 238 0 NA -0.453532\n239 239 0 NA 0.200597\n240 240 0 NA 1.67001\n241 241 0 NA 1.03037\n242 242 0 NA -0.208077\n243 243 0 NA 0.164049\n244 244 0 NA -0.288133\n245 245 0 NA 0.52305\n246 246 0 NA 0.307298\n247 247 0 NA 0.116519\n248 248 0 NA -0.00206642\n249 249 0 NA 0.655496\n250 250 0 NA -1.25415\n251 251 0 NA 0.00726571\n252 252 0 NA 2.6072\n253 253 0 NA 0.787332\n254 254 0 NA -0.573939\n255 255 0 NA 0.430139\n256 256 0 NA 0.498003\n257 257 0 NA -0.472708\n258 258 0 NA -2.11577\n259 259 0 NA -0.302791\n260 260 0 NA 0.135721\n261 261 0 NA 0.746283\n262 262 0 NA 1.05104\n263 263 0 NA -0.914336\n264 264 0 NA -1.98078\n265 265 0 NA 0.559037\n266 266 0 NA -1.42152\n267 267 0 NA 0.936689\n268 268 0 NA 0.380767\n269 269 0 NA 0.945328\n270 270 0 NA 0.0570358\n271 271 0 NA 1.47451\n272 272 0 NA -0.517474\n273 273 0 NA -1.55714\n274 274 0 NA -1.0888\n275 275 0 NA 1.23102\n276 276 0 NA -0.47694\n277 277 0 NA -0.224195\n278 278 0 NA -0.537571\n279 279 0 NA 0.019698\n280 280 0 NA 1.18258\n281 281 0 NA -0.781154\n282 282 0 NA -0.460112\n283 283 0 NA 0.703325\n284 284 0 NA -0.840924\n285 285 0 NA 0.557335\n286 286 0 NA -1.62788\n287 287 0 NA 1.65839\n288 288 0 NA -1.21203\n289 289 0 NA -0.972147\n290 290 0 NA -0.596574\n291 291 0 NA -1.23514\n292 292 0 NA 1.6419\n293 293 0 NA -0.421417\n294 294 0 NA 0.387531\n295 295 0 NA -1.07038\n296 296 0 NA -0.51778\n297 297 0 NA 1.15659\n298 298 0 NA -0.667902\n299 299 0 NA -0.970543\n300 300 0 NA -0.865546\n301 301 0 NA -1.07894\n302 302 0 NA -1.24408\n303 303 0 NA -1.04468\n304 304 0 NA -0.56958\n305 305 0 NA -0.847351\n306 306 0 NA -1.1901\n307 307 0 NA 1.02238\n308 308 0 NA -1.97905\n309 309 0 NA -1.52535\n310 310 0 NA 1.0894\n311 311 0 NA 0.696256\n312 312 0 NA 0.987405\n313 313 0 NA 0.112456\n314 314 0 NA -0.050028\n315 315 0 NA -0.871743\n316 316 0 NA 0.814679\n317 317 0 NA -0.552664\n318 318 0 NA 1.25739\n319 319 0 NA -1.26483\n320 320 0 NA 1.65303\n321 321 0 NA 1.3011\n322 322 0 NA -0.0201895\n323 323 0 NA -1.88367\n324 324 0 NA 1.42058\n325 325 0 NA -0.689172\n326 326 0 NA -0.304488\n327 327 0 NA -0.0852042\n328 328 0 NA 2.85163\n329 329 0 NA -1.15681\n330 330 0 NA 0.28258\n331 331 0 NA -1.95007\n332 332 0 NA -1.17526\n333 333 0 NA -1.33243\n334 334 0 NA 0.70641\n335 335 0 NA -0.438966\n336 336 0 NA -0.471986\n337 337 0 NA -0.196006\n338 338 0 NA -0.392692\n339 339 0 NA -1.25863\n340 340 0 NA -0.285722\n341 341 0 NA -0.821333\n342 342 0 NA 1.04067\n343 343 0 NA -0.386694\n344 344 0 NA -1.7091\n345 345 0 NA 0.567446\n346 346 0 NA 0.0171499\n347 347 0 NA 0.0977746\n348 348 0 NA 0.822231\n349 349 0 NA 0.396381\n350 350 0 NA -1.49853\n351 351 0 NA -0.02996\n352 352 0 NA -0.652866\n353 353 0 NA -0.121709\n354 354 0 NA 1.03003\n355 355 0 NA -0.166901\n356 356 0 NA 0.837773\n357 357 0 NA -0.388723\n358 358 0 NA 0.144514\n359 359 0 NA 1.60732\n360 360 0 NA 0.0795044\n361 361 0 NA 2.14972\n362 362 0 NA -0.304271\n363 363 0 NA -0.302592\n364 364 0 NA -0.439785\n365 365 0 NA 0.450396\n366 366 0 NA -0.847571\n367 367 0 NA 0.494386\n368 368 0 NA 1.78773\n369 369 0 NA 1.49287\n370 370 0 NA 1.00282\n371 371 0 NA -1.57948\n372 372 0 NA -1.41121\n373 373 0 NA -0.328791\n374 374 0 NA 0.633526\n375 375 0 NA 1.58844\n376 376 0 NA 0.23944\n377 377 0 NA 0.373054\n378 378 0 NA -0.231256\n379 379 0 NA 0.0411482\n380 380 0 NA 1.31482\n381 381 0 NA 0.415339\n382 382 0 NA -2.11904\n383 383 0 NA -0.73574\n384 384 0 NA -0.545852\n385 385 0 NA -1.26001\n386 386 0 NA 0.542966\n387 387 0 NA -0.248866\n388 388 0 NA 1.39359\n389 389 0 NA 0.765535\n390 390 0 NA 1.57434\n391 391 0 NA 0.283378\n392 392 0 NA -0.282155\n393 393 0 NA 0.326779\n394 394 0 NA -0.893343\n395 395 0 NA -0.306112\n396 396 0 NA -1.63278\n397 397 0 NA 0.983996\n398 398 0 NA -2.10943\n399 399 0 NA -0.0300423\n400 400 0 NA 0.267752\n401 401 0 NA 0.0244411\n402 402 0 NA -0.551821\n403 403 0 NA -0.0125597\n404 404 0 NA 0.78199\n405 405 0 NA 0.13319\n406 406 0 NA 0.484897\n407 407 0 NA 0.664244\n408 408 0 NA -0.803087\n409 409 0 NA -0.189584\n410 410 0 NA -0.803493\n411 411 0 NA 0.0918757\n412 412 0 NA 0.0292488\n413 413 0 NA -0.0153363\n414 414 0 NA 1.12808\n415 415 0 NA -0.206516\n416 416 0 NA -0.0512615\n417 417 0 NA -1.34343\n418 418 0 NA -0.185184\n419 419 0 NA -0.248441\n420 420 0 NA -0.069676\n421 421 0 NA 0.376647\n422 422 0 NA -0.256961\n423 423 0 NA 0.723303\n424 424 0 NA 1.11035\n425 425 0 NA -0.119456\n426 426 0 NA -0.639605\n427 427 0 NA 0.564345\n428 428 0 NA -0.344095\n429 429 0 NA -2.22653\n430 430 0 NA -0.956033\n431 431 0 NA -0.819189\n432 432 0 NA -2.44945\n433 433 0 NA -0.0670052\n434 434 0 NA -1.02125\n435 435 0 NA 1.56421\n436 436 0 NA -0.768685\n437 437 0 NA 0.901317\n438 438 0 NA 0.326158\n439 439 0 NA -0.139662\n440 440 0 NA 0.717582\n441 441 0 NA -1.29883\n442 442 0 NA -0.638025\n443 443 0 NA 1.3778\n444 444 0 NA 1.65038\n445 445 0 NA 0.586743\n446 446 0 NA -0.00589607\n447 447 0 NA 0.13319\n448 448 0 NA -1.26964\n449 449 0 NA -0.125181\n450 450 0 NA -1.24454\n451 451 0 NA -0.0661009\n452 452 0 NA 2.57404\n453 453 0 NA 0.928964\n454 454 0 NA 0.51064\n455 455 0 NA -1.32195\n456 456 0 NA 1.30567\n457 457 0 NA -0.742979\n458 458 0 NA -0.593598\n459 459 0 NA -1.24448\n460 460 0 NA -0.110278\n461 461 0 NA 0.159683\n462 462 0 NA 0.100157\n463 463 0 NA -1.6713\n464 464 0 NA 1.12242\n465 465 0 NA -0.88487\n466 466 0 NA -0.433884\n467 467 0 NA 0.233091\n468 468 0 NA -0.0685021\n469 469 0 NA -0.430799\n470 470 0 NA 0.680125\n471 471 0 NA 0.706294\n472 472 0 NA -1.01252\n473 473 0 NA 1.14022\n474 474 0 NA -0.0951276\n475 475 0 NA 1.20431\n476 476 0 NA -1.31014\n477 477 0 NA -1.01024\n478 478 0 NA 0.86817\n479 479 0 NA -1.13996\n480 480 0 NA 0.220533\n481 481 0 NA -0.585963\n482 482 0 NA 0.936588\n483 483 0 NA -1.58959\n484 484 0 NA 0.114516\n485 485 0 NA 0.275659\n486 486 0 NA 1.82381\n487 487 0 NA 0.919563\n488 488 0 NA 1.19923\n489 489 0 NA -0.287172\n490 490 0 NA -0.125564\n491 491 0 NA 0.76606\n492 492 0 NA 0.0485017\n493 493 0 NA 0.91911\n494 494 0 NA -0.0784911\n495 495 0 NA -1.60227\n496 496 0 NA -2.01618\n497 497 0 NA 1.34361\n498 498 0 NA -1.81712\n499 499 0 NA -0.463177\n500 500 0 NA -0.94339\n"
  },
  {
    "path": "example/example_3chr.setlist",
    "content": "SET1 1 1 mog_0,mog_1,mog_2,mog_3,mog_4,mog_5,mog_6,mog_7,mog_8,mog_9,mog_10,mog_11,mog_12,mog_13,mog_14,mog_15,mog_16,mog_17,mog_18,mog_19,mog_20,mog_21,mog_22,mog_23,mog_24,mog_25,mog_26,mog_27,mog_28,mog_29,mog_30,mog_31,mog_32,mog_33,mog_34,mog_35,mog_36,mog_37,mog_38,mog_39\nSET2 3 500 null_20,null_21,null_22,null_23,null_24,null_25,null_26,null_27,null_28,null_29,null_30,null_31,null_32,null_33,null_34,null_35,null_36,null_37,null_38,null_39,null_40,null_41,null_42,null_43,null_44,null_45,null_46,null_47,null_48,null_49,rsidNull1\nSET4 3 23 rsidNull2,rsidNull3\n"
  },
  {
    "path": "example/example_3chr.write_sets",
    "content": "tmp1 M1\ntmp2 M1,M2\n"
  },
  {
    "path": "example/example_3chr_masks.bim",
    "content": "1\tSET1.M1.0.2\t0\t1\tD\td\n3\tSET2.M2.0.2\t0\t500\tD\td\n"
  },
  {
    "path": "example/example_3chr_masks.fam",
    "content": "1\t1\t0\t0\t0\t-0.4267761781290719\n2\t2\t0\t0\t0\t0.553949147355921\n3\t3\t0\t0\t0\t-0.6501615855731889\n4\t4\t0\t0\t0\t0.6658237998926421\n5\t5\t0\t0\t0\t2.153525389108508\n6\t6\t0\t0\t0\t1.603607081080267\n7\t7\t0\t0\t0\t0.9245092282950891\n8\t8\t0\t0\t0\t0.16695520568640973\n9\t9\t0\t0\t0\t-0.44678076953973184\n10\t10\t0\t0\t0\t0.9130182655669581\n11\t11\t0\t0\t0\t-0.12365315280028831\n12\t12\t0\t0\t0\t0.45120658048742923\n13\t13\t0\t0\t0\t-1.813422352445905\n14\t14\t0\t0\t0\t0.10916042619322039\n15\t15\t0\t0\t0\t1.2606692333542373\n16\t16\t0\t0\t0\t0.5532311359350739\n17\t17\t0\t0\t0\t0.20945215196889685\n18\t18\t0\t0\t0\t-0.5132181734188253\n19\t19\t0\t0\t0\t0.6119067760087953\n20\t20\t0\t0\t0\t1.5394566797469251\n21\t21\t0\t0\t0\t-0.3865037693784083\n22\t22\t0\t0\t0\t-2.5143419880300866\n23\t23\t0\t0\t0\t0.405703045213449\n24\t24\t0\t0\t0\t0.5185603274229008\n25\t25\t0\t0\t0\t1.0611484493881813\n26\t26\t0\t0\t0\t0.19594051011058977\n27\t27\t0\t0\t0\t-0.24979581720203475\n28\t28\t0\t0\t0\t-0.2676477096598135\n29\t29\t0\t0\t0\t0.7177935231258309\n30\t30\t0\t0\t0\t1.1861246517682622\n31\t31\t0\t0\t0\t0.08629622130288517\n32\t32\t0\t0\t0\t-0.35304293360949707\n33\t33\t0\t0\t0\t0.2354296885149071\n34\t34\t0\t0\t0\t1.530452595962293\n35\t35\t0\t0\t0\t0.4579547646635299\n36\t36\t0\t0\t0\t0.4118304659618171\n37\t37\t0\t0\t0\t1.0173491056828072\n38\t38\t0\t0\t0\t-1.3848053256406825\n39\t39\t0\t0\t0\t1.7510265942816932\n40\t40\t0\t0\t0\t0.274954268508773\n41\t41\t0\t0\t0\t-1.9897038536605167\n42\t42\t0\t0\t0\t-0.2744230179428616\n43\t43\t0\t0\t0\t1.426414899954135\n44\t44\t0\t0\t0\t1.6794358446939521\n45\t45\t0\t0\t0\t-1.9762223272212731\n46\t46\t0\t0\t0\t1.31328879515592\n47\t47\t0\t0\t0\t-1.2379452021381565\n48\t48\t0\t0\t0\t-1.2230942245689198\n49\t49\t0\t0\t0\t-0.2086823379220931\n50\t50\t0\t0\t0\t0.046591344965807927\n51\t51\t0\t0\t0\t0.6454664759221705\n52\t52\t0\t0\t0\t-0.3366711458816414\n53\t53\t0\t0\t0\t-1.4251415011354847\n54\t54\t0\t0\t0\t-1.7404106776289252\n55\t55\t0\t0\t0\t-0.9899916360986122\n56\t56\t0\t0\t0\t-0.6813055796709735\n57\t57\t0\t0\t0\t1.4523360524496092\n58\t58\t0\t0\t0\t-0.14395595141327489\n59\t59\t0\t0\t0\t-0.4378362565094535\n60\t60\t0\t0\t0\t-0.1519268773269188\n61\t61\t0\t0\t0\t-1.0918707470545093\n62\t62\t0\t0\t0\t0.11886585929161704\n63\t63\t0\t0\t0\t-0.7446821129490644\n64\t64\t0\t0\t0\t-0.3515988379695028\n65\t65\t0\t0\t0\t-0.2505242457789806\n66\t66\t0\t0\t0\t-1.1445325100301038\n67\t67\t0\t0\t0\t0.4021008709651108\n68\t68\t0\t0\t0\t-0.26967294800034874\n69\t69\t0\t0\t0\t-1.071043452154829\n70\t70\t0\t0\t0\t0.5466104498110623\n71\t71\t0\t0\t0\t0.9059888912622454\n72\t72\t0\t0\t0\t-0.9302463996121689\n73\t73\t0\t0\t0\t-0.1649348560132909\n74\t74\t0\t0\t0\t-0.2592977286319428\n75\t75\t0\t0\t0\t-2.114476477667718\n76\t76\t0\t0\t0\t-0.6076341029698831\n77\t77\t0\t0\t0\t0.7748006221315341\n78\t78\t0\t0\t0\t-2.0962224712289843\n79\t79\t0\t0\t0\t-1.9116579659512944\n80\t80\t0\t0\t0\t1.0353397078763547\n81\t81\t0\t0\t0\t0.7294502561042351\n82\t82\t0\t0\t0\t-0.09793421592240978\n83\t83\t0\t0\t0\t0.24566617144478756\n84\t84\t0\t0\t0\t0.9386731103091898\n85\t85\t0\t0\t0\t1.7337564931524567\n86\t86\t0\t0\t0\t3.5965463607888486\n87\t87\t0\t0\t0\t0.10417950242086334\n88\t88\t0\t0\t0\t0.8181457381771525\n89\t89\t0\t0\t0\t-0.4717474685706561\n90\t90\t0\t0\t0\t1.3263850865010316\n91\t91\t0\t0\t0\t-0.616465031940274\n92\t92\t0\t0\t0\t-1.1963740018230864\n93\t93\t0\t0\t0\t-2.523429651265045\n94\t94\t0\t0\t0\t-1.6775958547520566\n95\t95\t0\t0\t0\t-1.325183339632047\n96\t96\t0\t0\t0\t0.5882935940652537\n97\t97\t0\t0\t0\t1.2377386081497168\n98\t98\t0\t0\t0\t-0.2927710750283572\n99\t99\t0\t0\t0\t2.2130683684766117\n100\t100\t0\t0\t0\t1.3907852235803377\n101\t101\t0\t0\t0\t0.3489648675330413\n102\t102\t0\t0\t0\t-0.17965231137324936\n103\t103\t0\t0\t0\t-1.714950186712027\n104\t104\t0\t0\t0\t-0.3606425914729865\n105\t105\t0\t0\t0\t0.2558071082994665\n106\t106\t0\t0\t0\t0.7848503716979157\n107\t107\t0\t0\t0\t2.505180527543909\n108\t108\t0\t0\t0\t-1.8168385585899676\n109\t109\t0\t0\t0\t-0.01565688433622611\n110\t110\t0\t0\t0\t0.7040291497308593\n111\t111\t0\t0\t0\t-0.011386617718583729\n112\t112\t0\t0\t0\t-0.6404979013481404\n113\t113\t0\t0\t0\t2.0784381335821682\n114\t114\t0\t0\t0\t-0.2254928329575596\n115\t115\t0\t0\t0\t0.0021879911560499826\n116\t116\t0\t0\t0\t0.4530758437690287\n117\t117\t0\t0\t0\t0.7111556836519528\n118\t118\t0\t0\t0\t-7.926457716849677e-5\n119\t119\t0\t0\t0\t1.2716437121248285\n120\t120\t0\t0\t0\t0.13091641539525625\n121\t121\t0\t0\t0\t0.9601642531802883\n122\t122\t0\t0\t0\t-0.6946269492816117\n123\t123\t0\t0\t0\t1.2083545960448898\n124\t124\t0\t0\t0\t-0.5558287669378148\n125\t125\t0\t0\t0\t0.40869932288799415\n126\t126\t0\t0\t0\t0.8632834147663099\n127\t127\t0\t0\t0\t0.8642022294643745\n128\t128\t0\t0\t0\t-0.45151257769571096\n129\t129\t0\t0\t0\t-0.1914051114382599\n130\t130\t0\t0\t0\t-0.037649137239090524\n131\t131\t0\t0\t0\t1.069953907594907\n132\t132\t0\t0\t0\t0.7332632139169448\n133\t133\t0\t0\t0\t-1.3859563214470878\n134\t134\t0\t0\t0\t-0.16882598316337785\n135\t135\t0\t0\t0\t1.2341578254519214\n136\t136\t0\t0\t0\t-0.13018118909812346\n137\t137\t0\t0\t0\t-0.849622061183973\n138\t138\t0\t0\t0\t1.65558791310931\n139\t139\t0\t0\t0\t-0.8557488969973114\n140\t140\t0\t0\t0\t-0.591996102804245\n141\t141\t0\t0\t0\t1.4766090213622798\n142\t142\t0\t0\t0\t-1.7606788880705646\n143\t143\t0\t0\t0\t0.3331349454598871\n144\t144\t0\t0\t0\t-1.316796818445614\n145\t145\t0\t0\t0\t2.2658417254598926\n146\t146\t0\t0\t0\t-0.5855466346375702\n147\t147\t0\t0\t0\t1.004242318973581\n148\t148\t0\t0\t0\t0.5950634513012234\n149\t149\t0\t0\t0\t-2.3589818092376342\n150\t150\t0\t0\t0\t1.020059747279963\n151\t151\t0\t0\t0\t-1.2578575928423548\n152\t152\t0\t0\t0\t-0.09700531448496873\n153\t153\t0\t0\t0\t0.33638529718875043\n154\t154\t0\t0\t0\t1.7972793724047749\n155\t155\t0\t0\t0\t0.27013487552553606\n156\t156\t0\t0\t0\t-0.3808633841949227\n157\t157\t0\t0\t0\t0.04018926790384476\n158\t158\t0\t0\t0\t-0.3965127185254764\n159\t159\t0\t0\t0\t1.4936455086379186\n160\t160\t0\t0\t0\t-1.2993293254003835\n161\t161\t0\t0\t0\t0.2519277543876505\n162\t162\t0\t0\t0\t0.691774608729148\n163\t163\t0\t0\t0\t0.7407513064839535\n164\t164\t0\t0\t0\t-0.31056687191510746\n165\t165\t0\t0\t0\t-0.5552597332076762\n166\t166\t0\t0\t0\t-1.0057835319597124\n167\t167\t0\t0\t0\t0.05450686156271301\n168\t168\t0\t0\t0\t-0.6349282429298715\n169\t169\t0\t0\t0\t-0.6695465895569117\n170\t170\t0\t0\t0\t-0.23226063332510133\n171\t171\t0\t0\t0\t0.7035825230515401\n172\t172\t0\t0\t0\t-0.31327835211735766\n173\t173\t0\t0\t0\t-0.24917314601702809\n174\t174\t0\t0\t0\t-0.6592080848536208\n175\t175\t0\t0\t0\t-0.9477879953319399\n176\t176\t0\t0\t0\t0.48009125370245204\n177\t177\t0\t0\t0\t0.4706100983663504\n178\t178\t0\t0\t0\t-0.06505632309350708\n179\t179\t0\t0\t0\t-1.4959269095746512\n180\t180\t0\t0\t0\t0.5405485678497597\n181\t181\t0\t0\t0\t1.1232696382170495\n182\t182\t0\t0\t0\t0.0074418075185122565\n183\t183\t0\t0\t0\t0.6639986494873168\n184\t184\t0\t0\t0\t-0.5678566487451827\n185\t185\t0\t0\t0\t0.2299383937915558\n186\t186\t0\t0\t0\t-0.17076218909155186\n187\t187\t0\t0\t0\t0.3420901284971981\n188\t188\t0\t0\t0\t-0.10950363726652125\n189\t189\t0\t0\t0\t-0.06808902942736765\n190\t190\t0\t0\t0\t-2.3144897119335965\n191\t191\t0\t0\t0\t-1.0777725703353438\n192\t192\t0\t0\t0\t-0.3394034459756072\n193\t193\t0\t0\t0\t-1.5321020124626434\n194\t194\t0\t0\t0\t1.277838035763474\n195\t195\t0\t0\t0\t-0.6186969481783715\n196\t196\t0\t0\t0\t0.24483826998859312\n197\t197\t0\t0\t0\t-0.08972472068807524\n198\t198\t0\t0\t0\t0.28563820059484707\n199\t199\t0\t0\t0\t0.02665228610242174\n200\t200\t0\t0\t0\t-0.8558456209545621\n201\t201\t0\t0\t0\t0.10326780109110945\n202\t202\t0\t0\t0\t-0.4683803455991253\n203\t203\t0\t0\t0\t-0.039629458692737506\n204\t204\t0\t0\t0\t0.5010288555718972\n205\t205\t0\t0\t0\t-1.614633788145904\n206\t206\t0\t0\t0\t-0.8221081713776588\n207\t207\t0\t0\t0\t-0.4154327362321607\n208\t208\t0\t0\t0\t0.19255584244438567\n209\t209\t0\t0\t0\t-0.013616312773019643\n210\t210\t0\t0\t0\t-0.475529391106999\n211\t211\t0\t0\t0\t0.10232966150402548\n212\t212\t0\t0\t0\t0.7488940724700807\n213\t213\t0\t0\t0\t0.44197886128432695\n214\t214\t0\t0\t0\t1.018806470717939\n215\t215\t0\t0\t0\t1.187231623861802\n216\t216\t0\t0\t0\t0.8022976145656721\n217\t217\t0\t0\t0\t-0.4774086494408861\n218\t218\t0\t0\t0\t-0.14197769264202192\n219\t219\t0\t0\t0\t-0.1747777783307673\n220\t220\t0\t0\t0\t1.0313282851738685\n221\t221\t0\t0\t0\t0.09414395331571555\n222\t222\t0\t0\t0\t0.07864110999174483\n223\t223\t0\t0\t0\t-0.5611965282485419\n224\t224\t0\t0\t0\t-0.6176835872308691\n225\t225\t0\t0\t0\t-0.624780748917754\n226\t226\t0\t0\t0\t0.7262504583960367\n227\t227\t0\t0\t0\t0.6736681336462624\n228\t228\t0\t0\t0\t1.2074649724277058\n229\t229\t0\t0\t0\t-0.7000910464946777\n230\t230\t0\t0\t0\t1.0132498153713987\n231\t231\t0\t0\t0\t0.1086125064282297\n232\t232\t0\t0\t0\t1.5378432998509857\n233\t233\t0\t0\t0\t-0.38864483884544576\n234\t234\t0\t0\t0\t0.5853901243650999\n235\t235\t0\t0\t0\t0.6361493273442497\n236\t236\t0\t0\t0\t-2.3727188226249276\n237\t237\t0\t0\t0\t-0.1546518851418047\n238\t238\t0\t0\t0\t-0.4535321516545124\n239\t239\t0\t0\t0\t0.20059745542046717\n240\t240\t0\t0\t0\t1.6700071185481635\n241\t241\t0\t0\t0\t1.0303745125812633\n242\t242\t0\t0\t0\t-0.2080769141106961\n243\t243\t0\t0\t0\t0.1640491851783853\n244\t244\t0\t0\t0\t-0.28813258919928886\n245\t245\t0\t0\t0\t0.5230500717020431\n246\t246\t0\t0\t0\t0.3072979372247395\n247\t247\t0\t0\t0\t0.11651870056658613\n248\t248\t0\t0\t0\t-0.0020664157587947755\n249\t249\t0\t0\t0\t0.6554963064376974\n250\t250\t0\t0\t0\t-1.2541501943392634\n251\t251\t0\t0\t0\t0.0072657109833799294\n252\t252\t0\t0\t0\t2.6072042439689764\n253\t253\t0\t0\t0\t0.7873316028203755\n254\t254\t0\t0\t0\t-0.5739394940501295\n255\t255\t0\t0\t0\t0.4301393873631534\n256\t256\t0\t0\t0\t0.4980033880636772\n257\t257\t0\t0\t0\t-0.472708427483925\n258\t258\t0\t0\t0\t-2.115773568613388\n259\t259\t0\t0\t0\t-0.3027911974132581\n260\t260\t0\t0\t0\t0.1357209008468077\n261\t261\t0\t0\t0\t0.7462827401513036\n262\t262\t0\t0\t0\t1.0510433172779923\n263\t263\t0\t0\t0\t-0.9143361188838738\n264\t264\t0\t0\t0\t-1.980776740834876\n265\t265\t0\t0\t0\t0.5590372219564149\n266\t266\t0\t0\t0\t-1.4215222131613703\n267\t267\t0\t0\t0\t0.9366889840084335\n268\t268\t0\t0\t0\t0.38076689746763476\n269\t269\t0\t0\t0\t0.9453281558278972\n270\t270\t0\t0\t0\t0.057035845832356935\n271\t271\t0\t0\t0\t1.4745050564310078\n272\t272\t0\t0\t0\t-0.5174737513107168\n273\t273\t0\t0\t0\t-1.5571441883774875\n274\t274\t0\t0\t0\t-1.0887963283029611\n275\t275\t0\t0\t0\t1.2310204712452886\n276\t276\t0\t0\t0\t-0.4769402620727404\n277\t277\t0\t0\t0\t-0.22419481188150195\n278\t278\t0\t0\t0\t-0.5375714580159776\n279\t279\t0\t0\t0\t0.019698038241142538\n280\t280\t0\t0\t0\t1.1825833509904307\n281\t281\t0\t0\t0\t-0.7811541586520457\n282\t282\t0\t0\t0\t-0.4601117935870594\n283\t283\t0\t0\t0\t0.7033250873409186\n284\t284\t0\t0\t0\t-0.8409243958041983\n285\t285\t0\t0\t0\t0.5573349116266615\n286\t286\t0\t0\t0\t-1.6278769464058696\n287\t287\t0\t0\t0\t1.6583911801881541\n288\t288\t0\t0\t0\t-1.212032621830397\n289\t289\t0\t0\t0\t-0.9721467722081951\n290\t290\t0\t0\t0\t-0.5965740298455487\n291\t291\t0\t0\t0\t-1.2351363670664182\n292\t292\t0\t0\t0\t1.641895301641597\n293\t293\t0\t0\t0\t-0.42141703006240455\n294\t294\t0\t0\t0\t0.38753100560495457\n295\t295\t0\t0\t0\t-1.070382200424481\n296\t296\t0\t0\t0\t-0.517780452691308\n297\t297\t0\t0\t0\t1.1565871160947803\n298\t298\t0\t0\t0\t-0.6679020556063455\n299\t299\t0\t0\t0\t-0.970542580114166\n300\t300\t0\t0\t0\t-0.8655455001063305\n301\t301\t0\t0\t0\t-1.0789380083099478\n302\t302\t0\t0\t0\t-1.2440796197575608\n303\t303\t0\t0\t0\t-1.0446790730803917\n304\t304\t0\t0\t0\t-0.5695802535356469\n305\t305\t0\t0\t0\t-0.8473514861984687\n306\t306\t0\t0\t0\t-1.190100919435714\n307\t307\t0\t0\t0\t1.022380976723825\n308\t308\t0\t0\t0\t-1.9790492535631858\n309\t309\t0\t0\t0\t-1.5253493737129327\n310\t310\t0\t0\t0\t1.0894028037803278\n311\t311\t0\t0\t0\t0.6962558464296542\n312\t312\t0\t0\t0\t0.9874048999158505\n313\t313\t0\t0\t0\t0.11245625794433396\n314\t314\t0\t0\t0\t-0.050027987070537086\n315\t315\t0\t0\t0\t-0.8717433837106624\n316\t316\t0\t0\t0\t0.8146794446207906\n317\t317\t0\t0\t0\t-0.5526641897761696\n318\t318\t0\t0\t0\t1.2573937760035447\n319\t319\t0\t0\t0\t-1.2648304446149456\n320\t320\t0\t0\t0\t1.6530274769204985\n321\t321\t0\t0\t0\t1.30110161745003\n322\t322\t0\t0\t0\t-0.02018948002691863\n323\t323\t0\t0\t0\t-1.8836740231429716\n324\t324\t0\t0\t0\t1.4205846188601483\n325\t325\t0\t0\t0\t-0.689172153743396\n326\t326\t0\t0\t0\t-0.3044880253487302\n327\t327\t0\t0\t0\t-0.08520417701732463\n328\t328\t0\t0\t0\t2.851634453916995\n329\t329\t0\t0\t0\t-1.156810978360592\n330\t330\t0\t0\t0\t0.2825803847207173\n331\t331\t0\t0\t0\t-1.9500708808705018\n332\t332\t0\t0\t0\t-1.1752612835403988\n333\t333\t0\t0\t0\t-1.3324335054916632\n334\t334\t0\t0\t0\t0.706410116238176\n335\t335\t0\t0\t0\t-0.4389661673885548\n336\t336\t0\t0\t0\t-0.47198568412366393\n337\t337\t0\t0\t0\t-0.196005627932633\n338\t338\t0\t0\t0\t-0.3926918515697784\n339\t339\t0\t0\t0\t-1.2586337442612614\n340\t340\t0\t0\t0\t-0.28572183266312384\n341\t341\t0\t0\t0\t-0.821333467643559\n342\t342\t0\t0\t0\t1.0406689409301526\n343\t343\t0\t0\t0\t-0.38669408702632374\n344\t344\t0\t0\t0\t-1.7090986597057412\n345\t345\t0\t0\t0\t0.567446046270448\n346\t346\t0\t0\t0\t0.01714990950414205\n347\t347\t0\t0\t0\t0.0977745741751222\n348\t348\t0\t0\t0\t0.822230557498965\n349\t349\t0\t0\t0\t0.3963812215631262\n350\t350\t0\t0\t0\t-1.4985341158085754\n351\t351\t0\t0\t0\t-0.029960047096039638\n352\t352\t0\t0\t0\t-0.6528662364260229\n353\t353\t0\t0\t0\t-0.12170910991193998\n354\t354\t0\t0\t0\t1.0300254802129547\n355\t355\t0\t0\t0\t-0.16690139482289537\n356\t356\t0\t0\t0\t0.8377731253742943\n357\t357\t0\t0\t0\t-0.3887229530927819\n358\t358\t0\t0\t0\t0.14451356330621065\n359\t359\t0\t0\t0\t1.6073233297076983\n360\t360\t0\t0\t0\t0.07950441212552996\n361\t361\t0\t0\t0\t2.1497157962418174\n362\t362\t0\t0\t0\t-0.30427052777531594\n363\t363\t0\t0\t0\t-0.3025923417388714\n364\t364\t0\t0\t0\t-0.4397850120995375\n365\t365\t0\t0\t0\t0.45039620392874236\n366\t366\t0\t0\t0\t-0.8475714740581328\n367\t367\t0\t0\t0\t0.49438582682424076\n368\t368\t0\t0\t0\t1.7877305550184646\n369\t369\t0\t0\t0\t1.4928728753892113\n370\t370\t0\t0\t0\t1.0028215376206\n371\t371\t0\t0\t0\t-1.579481582437942\n372\t372\t0\t0\t0\t-1.4112071973049491\n373\t373\t0\t0\t0\t-0.3287910850620258\n374\t374\t0\t0\t0\t0.6335258906599922\n375\t375\t0\t0\t0\t1.5884402065569267\n376\t376\t0\t0\t0\t0.239439976093505\n377\t377\t0\t0\t0\t0.3730542303129533\n378\t378\t0\t0\t0\t-0.23125572369556183\n379\t379\t0\t0\t0\t0.04114817676435756\n380\t380\t0\t0\t0\t1.3148209110260505\n381\t381\t0\t0\t0\t0.4153394063226016\n382\t382\t0\t0\t0\t-2.119037113120731\n383\t383\t0\t0\t0\t-0.7357404438785312\n384\t384\t0\t0\t0\t-0.5458519940698457\n385\t385\t0\t0\t0\t-1.2600086040654477\n386\t386\t0\t0\t0\t0.5429663518299074\n387\t387\t0\t0\t0\t-0.24886613651789177\n388\t388\t0\t0\t0\t1.393586775779869\n389\t389\t0\t0\t0\t0.7655351504255072\n390\t390\t0\t0\t0\t1.574341948844662\n391\t391\t0\t0\t0\t0.28337819436221234\n392\t392\t0\t0\t0\t-0.28215538210870866\n393\t393\t0\t0\t0\t0.32677930042765563\n394\t394\t0\t0\t0\t-0.8933433333160953\n395\t395\t0\t0\t0\t-0.30611223649551184\n396\t396\t0\t0\t0\t-1.6327825926249717\n397\t397\t0\t0\t0\t0.983996289771191\n398\t398\t0\t0\t0\t-2.1094276028051713\n399\t399\t0\t0\t0\t-0.03004231237784647\n400\t400\t0\t0\t0\t0.2677517204111873\n401\t401\t0\t0\t0\t0.024441125372044015\n402\t402\t0\t0\t0\t-0.5518207938279379\n403\t403\t0\t0\t0\t-0.012559704828846413\n404\t404\t0\t0\t0\t0.7819900863897892\n405\t405\t0\t0\t0\t0.1331898731839282\n406\t406\t0\t0\t0\t0.48489696095614787\n407\t407\t0\t0\t0\t0.6642443953751778\n408\t408\t0\t0\t0\t-0.8030871260696271\n409\t409\t0\t0\t0\t-0.1895839551659786\n410\t410\t0\t0\t0\t-0.8034926266352286\n411\t411\t0\t0\t0\t0.09187570772040049\n412\t412\t0\t0\t0\t0.029248814008252747\n413\t413\t0\t0\t0\t-0.01533630905590288\n414\t414\t0\t0\t0\t1.1280827301911354\n415\t415\t0\t0\t0\t-0.2065160921398899\n416\t416\t0\t0\t0\t-0.05126145885052354\n417\t417\t0\t0\t0\t-1.3434267805209248\n418\t418\t0\t0\t0\t-0.18518422658884665\n419\t419\t0\t0\t0\t-0.24844114482463325\n420\t420\t0\t0\t0\t-0.06967600698783584\n421\t421\t0\t0\t0\t0.37664675241912393\n422\t422\t0\t0\t0\t-0.2569614554903415\n423\t423\t0\t0\t0\t0.7233028036905063\n424\t424\t0\t0\t0\t1.1103498482848735\n425\t425\t0\t0\t0\t-0.11945647973589903\n426\t426\t0\t0\t0\t-0.6396046050764921\n427\t427\t0\t0\t0\t0.5643452560242491\n428\t428\t0\t0\t0\t-0.3440947789770541\n429\t429\t0\t0\t0\t-2.22653169151592\n430\t430\t0\t0\t0\t-0.956032968271431\n431\t431\t0\t0\t0\t-0.8191888326171307\n432\t432\t0\t0\t0\t-2.4494492803640022\n433\t433\t0\t0\t0\t-0.0670051947640402\n434\t434\t0\t0\t0\t-1.0212450480574558\n435\t435\t0\t0\t0\t1.5642134356006847\n436\t436\t0\t0\t0\t-0.7686850358271917\n437\t437\t0\t0\t0\t0.9013174516851364\n438\t438\t0\t0\t0\t0.32615832192874417\n439\t439\t0\t0\t0\t-0.1396620902625725\n440\t440\t0\t0\t0\t0.7175824082953346\n441\t441\t0\t0\t0\t-1.298832108780333\n442\t442\t0\t0\t0\t-0.6380248204283618\n443\t443\t0\t0\t0\t1.3777981524996168\n444\t444\t0\t0\t0\t1.650381986618911\n445\t445\t0\t0\t0\t0.5867431709972836\n446\t446\t0\t0\t0\t-0.005896067871319781\n447\t447\t0\t0\t0\t0.13319016302360676\n448\t448\t0\t0\t0\t-1.2696437250217552\n449\t449\t0\t0\t0\t-0.12518065096985312\n450\t450\t0\t0\t0\t-1.244542673659207\n451\t451\t0\t0\t0\t-0.06610093209244285\n452\t452\t0\t0\t0\t2.5740374683027536\n453\t453\t0\t0\t0\t0.9289638283088296\n454\t454\t0\t0\t0\t0.5106401356154838\n455\t455\t0\t0\t0\t-1.3219451447015316\n456\t456\t0\t0\t0\t1.3056699098740336\n457\t457\t0\t0\t0\t-0.7429788972407904\n458\t458\t0\t0\t0\t-0.5935982739213578\n459\t459\t0\t0\t0\t-1.2444771294790702\n460\t460\t0\t0\t0\t-0.11027752291961805\n461\t461\t0\t0\t0\t0.15968337228108687\n462\t462\t0\t0\t0\t0.10015721622156738\n463\t463\t0\t0\t0\t-1.6712997326777923\n464\t464\t0\t0\t0\t1.1224222088370677\n465\t465\t0\t0\t0\t-0.8848696262558821\n466\t466\t0\t0\t0\t-0.43388376934423967\n467\t467\t0\t0\t0\t0.2330906457766794\n468\t468\t0\t0\t0\t-0.06850210820133934\n469\t469\t0\t0\t0\t-0.43079920208533806\n470\t470\t0\t0\t0\t0.6801247297121218\n471\t471\t0\t0\t0\t0.7062942755405903\n472\t472\t0\t0\t0\t-1.0125163790266498\n473\t473\t0\t0\t0\t1.1402191818385525\n474\t474\t0\t0\t0\t-0.09512756382895157\n475\t475\t0\t0\t0\t1.2043113720050476\n476\t476\t0\t0\t0\t-1.3101439365860013\n477\t477\t0\t0\t0\t-1.010236868117858\n478\t478\t0\t0\t0\t0.8681701631563481\n479\t479\t0\t0\t0\t-1.1399558386575048\n480\t480\t0\t0\t0\t0.2205328932586634\n481\t481\t0\t0\t0\t-0.5859626268400073\n482\t482\t0\t0\t0\t0.9365883769687702\n483\t483\t0\t0\t0\t-1.589591178874074\n484\t484\t0\t0\t0\t0.1145164389832793\n485\t485\t0\t0\t0\t0.275658677194189\n486\t486\t0\t0\t0\t1.8238148121727578\n487\t487\t0\t0\t0\t0.9195632859199347\n488\t488\t0\t0\t0\t1.1992281082636649\n489\t489\t0\t0\t0\t-0.28717216124743894\n490\t490\t0\t0\t0\t-0.1255644094578015\n491\t491\t0\t0\t0\t0.7660599548056037\n492\t492\t0\t0\t0\t0.0485016949386254\n493\t493\t0\t0\t0\t0.9191104275425048\n494\t494\t0\t0\t0\t-0.07849111590769194\n495\t495\t0\t0\t0\t-1.602274500274993\n496\t496\t0\t0\t0\t-2.016184028763443\n497\t497\t0\t0\t0\t1.3436149731702158\n498\t498\t0\t0\t0\t-1.8171186074532928\n499\t499\t0\t0\t0\t-0.46317733040012826\n500\t500\t0\t0\t0\t-0.9433895891176977\n"
  },
  {
    "path": "example/fid_iid_to_remove.txt",
    "content": "35 35\n136 136\n77 77\n100 100\n204 204\n474 474\n"
  },
  {
    "path": "example/phenotype.txt",
    "content": "FID IID Y1 Y2\n1 1 1.64818554321186 2.2765234736685\n2 2 -2.67352013711554 -1.53680421614647\n3 3 0.217542851471485 0.437289912695016\n4 4 -1.0682692958554 -0.172344146164226\n5 5 -0.132812553179586 -0.156011181306344\n6 6 0.540099244934828 0.58172506850607\n7 7 0.200655325192179 -0.0628377394374\n8 8 -0.0459854613468314 0.265509522791825\n9 9 1.3307615630166 1.71743500320666\n10 10 0.912180962372195 0.187551558913078\n11 11 -0.534547354840137 0.127496169914245\n12 12 0.27896641757041 -1.42731255991967\n13 13 -1.53970463226097 -0.532864539532899\n14 14 -0.90082624741872 -1.56632086145511\n15 15 -0.820750086136125 -1.90758965913578\n16 16 -0.505187618648548 -0.18312079881002\n17 17 -0.89722597069202 -0.217412756332358\n18 18 -0.530246039837079 -0.514277172286367\n19 19 -1.11005552246422 -0.362149833733552\n20 20 0.135219961957339 1.10246617780573\n21 21 0.674908877044016 0.657896901326819\n22 22 -0.871416975041294 -0.114765123686352\n23 23 -1.15266855203515 -2.56052285573202\n24 24 -1.33078180898898 0.901847508702005\n25 25 -0.33689786864605 0.564196915751343\n26 26 1.28793604289375 -0.460800742311677\n27 27 -1.89614375166477 -2.16796161162642\n28 28 0.932225717344624 -0.993295429461682\n29 29 -1.43304637355045 -1.17580626345375\n30 30 -0.79692352468168 -0.0936092127952809\n31 31 -0.818843785975256 -0.53253698628974\n32 32 1.2087128520519 1.03501254726163\n33 33 0.633931435132684 0.44615886486519\n34 34 0.576237437022807 1.0904346633205\n35 35 2.42290456969191 1.43948290383436\n36 36 0.23103458500135 -0.0587673810762215\n37 37 -0.0251633689616453 -1.57102910327571\n38 38 -1.49850486580673 -2.05632349978027\n39 39 0.316565138599832 0.325314072043067\n40 40 -1.52502765452174 -0.213827394178769\n41 41 0.740940592283646 1.09893022241462\n42 42 1.78712212335387 1.19127592854588\n43 43 1.58192040703461 1.20206719830346\n44 44 -0.509715644341248 -1.14873512768313\n45 45 -0.830082090282309 0.228605478322461\n46 46 -0.0491164852343067 -0.137775954204466\n47 47 1.94157887063373 0.847918312742163\n48 48 1.20359097976829 -0.399549607806882\n49 49 -1.23913127708202 0.0150388307226249\n50 50 -0.966968307164566 -0.737477146954062\n51 51 -0.820430264709707 -1.6636201222228\n52 52 -2.40812671796387 -1.20651243686562\n53 53 -1.57969403761047 -0.0257757200489035\n54 54 -1.13320388228761 -0.751544439673961\n55 55 0.893631925732109 1.13746868718638\n56 56 1.20360529801521 2.79242866277356\n57 57 -0.0843877814729617 -1.28592741018806\n58 58 0.339837217622896 -0.541310610515075\n59 59 -0.385458384395854 -0.934702284494623\n60 60 0.468594330006473 -0.73016882826337\n61 61 -1.02043354926932 -1.60187469930237\n62 62 0.243972076651283 -0.563633473100453\n63 63 1.49199775918827 1.52568787191561\n64 64 1.13206847302443 1.3101469387406\n65 65 -0.716368601292129 -0.135512918841699\n66 66 -0.169108730249288 1.09575193085288\n67 67 1.15790001716763 1.1640397855604\n68 68 -0.688576811210818 -1.76869188869093\n69 69 0.14456018958467 -0.15378734253803\n70 70 1.64206973022781 1.18242742349138\n71 71 -1.08724554035947 -0.561082534301814\n72 72 -1.41116766250683 -0.255074407110056\n73 73 0.182430548153859 0.653581598797204\n74 74 -1.25394949287272 -0.508093610110444\n75 75 -1.53359611068566 -1.12886141633028\n76 76 -1.53800060087613 -1.27444991752437\n77 77 0.825952408418855 -0.279010226303386\n78 78 0.142438147648119 1.51511335052744\n79 79 -0.924303826504151 -0.729459474153155\n80 80 -1.40601321953547 0.593044606426811\n81 81 -0.801830843986271 0.338893590157569\n82 82 1.10825917004715 0.439870409850444\n83 83 0.24592028029576 -0.164772212550257\n84 84 1.79606838091344 1.3039822304618\n85 85 -1.22260977344593 -0.790503121154428\n86 86 -0.700220780811092 0.0328995754123342\n87 87 -0.578998772557048 0.392475781422545\n88 88 -1.69720729321778 -1.5698428789583\n89 89 1.16340862426281 0.792014175554376\n90 90 0.206954556715569 0.415456904166939\n91 91 1.70489999393387 2.09775597676056\n92 92 -1.33104685763959 -1.05475280759534\n93 93 0.645203760475826 0.926294471929944\n94 94 0.1423053293667 -0.216502503763709\n95 95 1.10221712348662 -0.32080884129509\n96 96 0.543348907607857 0.269892488144619\n97 97 1.04941909388814 0.982666978739268\n98 98 -0.258309955845433 0.183650733005576\n99 99 -0.492073649275113 0.0761496561256492\n100 100 0.338403199323624 -0.374239866716357\n101 101 -0.895668365919068 0.11336451596702\n102 102 0.437699897857805 -0.91352569239983\n103 103 0.456605590549223 -0.313164454606283\n104 104 1.40282753554395 0.610811431835396\n105 105 0.0495822014705532 -0.507000617846699\n106 106 1.2587777556985 1.66055013290369\n107 107 -1.40353930047844 -1.32895934946041\n108 108 -1.34550374507602 -1.57417300223271\n109 109 1.22996245176712 0.814167653438195\n110 110 0.311564106800903 -0.111390972866106\n111 111 -0.0587559633492112 -0.0121323250182511\n112 112 -0.436492819499775 0.162428164511859\n113 113 0.0791694982161289 -0.401198944858844\n114 114 1.10356302409986 0.969569111102586\n115 115 1.31474898137036 2.0940945724578\n116 116 -0.843413208640851 -0.0848682739314613\n117 117 0.202497366256568 0.0187349827693815\n118 118 -0.599625493377744 0.906427507141332\n119 119 1.8179555041733 1.3369461892012\n120 120 -0.854885536015852 -0.785095990071871\n121 121 -0.415877865867224 -0.166438439147711\n122 122 0.742162696858347 -1.28954626748198\n123 123 -0.972779674771823 -1.03927262494365\n124 124 1.46293183400694 -0.114484977863908\n125 125 1.00822090954253 0.681871812073643\n126 126 -1.34645832044921 -0.730195246217766\n127 127 -0.591561999406591 -2.12951882663727\n128 128 -0.934510817986235 -1.75494841647204\n129 129 -0.28369951969648 0.17692265891176\n130 130 -1.17614531868063 -0.12403839659914\n131 131 1.65461590969623 2.70192093415576\n132 132 -0.745615276587652 -0.417865474273032\n133 133 0.0738259684462418 1.21278114494829\n134 134 -0.96815375987633 -0.753301992358909\n135 135 -0.939813096800652 -0.0216214318733658\n136 136 -0.879803232355895 -0.0744910663374311\n137 137 0.743172411526409 0.21433313608583\n138 138 2.08195119549803 1.93621885516642\n139 139 0.927100100117178 1.5554541907711\n140 140 0.214045612674062 0.346353741568354\n141 141 -0.949431236638889 -0.305817193168958\n142 142 1.2666979721919 0.787777955161434\n143 143 1.01376904051764 1.12648365697115\n144 144 0.142555917554621 0.707233765426958\n145 145 1.62179916747734 0.885638229075908\n146 146 -1.00645219747244 -0.995434127819415\n147 147 -0.394602826813623 0.473657962928038\n148 148 -0.73982150411597 0.273299958780123\n149 149 -0.365184995360586 -0.452611626430038\n150 150 0.691097586326135 0.766691213629834\n151 151 -0.301611701858653 -1.33319111438154\n152 152 0.531222597236094 -1.60038997213879\n153 153 1.63012914542121 -0.100063647106269\n154 154 -1.34924073424189 -1.53664514984632\n155 155 0.177577385290678 1.78028934270584\n156 156 1.8760154081232 2.52387197823727\n157 157 -0.16113138252359 0.554611083217085\n158 158 0.622139846203371 -0.291011158963719\n159 159 0.188666359888691 0.257710421689275\n160 160 -0.836071049877044 -0.509251680504923\n161 161 -0.9363957038285 -0.786864007114868\n162 162 0.576850550394612 1.16180237778149\n163 163 1.53563335135132 2.39295406664206\n164 164 0.480251240792056 0.593367377417818\n165 165 0.816802232052774 1.22809833905884\n166 166 0.128093838306365 -0.696009541973336\n167 167 0.692166473072011 0.47598401037244\n168 168 -0.15467430169029 0.887623422309889\n169 169 1.85530898859882 1.34874265859183\n170 170 1.09585407339195 0.853399050828037\n171 171 1.08620951547742 0.222309525046723\n172 172 0.651399299000373 0.204610710960643\n173 173 -0.77205986349291 -1.38438918818234\n174 174 -0.44510199324887 -0.390092864365805\n175 175 0.924297817311385 0.15026757119897\n176 176 -0.185300859903109 -1.47662328084495\n177 177 -0.206992886353605 0.859401094995375\n178 178 -0.547073733586552 -2.31806176595107\n179 179 -1.04319780488037 -0.839081179843443\n180 180 -1.11295843904746 0.687100547210174\n181 181 1.57596863039491 0.655163984249925\n182 182 0.72747085312975 2.74321505256345\n183 183 -0.640180564559575 1.26100797049328\n184 184 0.942240481238108 0.688383995282273\n185 185 0.181892828507572 0.373068219547715\n186 186 0.358693183830045 -0.190608438843274\n187 187 -0.000910571279300776 -0.680961607498789\n188 188 0.919849570661649 1.00572232849211\n189 189 1.01006236214779 -0.190675450107784\n190 190 0.316481287087008 0.281269720213603\n191 191 -0.918286729336156 -0.477703257990269\n192 192 0.183452280040454 -0.0344651227327666\n193 193 2.12717634373411 0.997577554465131\n194 194 1.47020736764189 1.12795785888551\n195 195 -0.971309615501744 -1.18014880718682\n196 196 -0.157281269372447 -0.481653661616169\n197 197 0.646807492342463 0.979218813509775\n198 198 -0.578656676271822 -0.139221375128339\n199 199 0.229313404231838 -0.692324141138655\n200 200 1.20229702102537 1.10892733679558\n201 201 0.133632622403511 0.583256246184469\n202 202 -1.03246454473759 -1.451471999586\n203 203 -0.330370224331379 -0.425464803042189\n204 204 -1.75790734014916 -0.586614309361142\n205 205 -0.0371919747840802 -1.50684309284697\n206 206 1.26551340744515 1.47525772630116\n207 207 -0.0115076783573195 0.993713242482709\n208 208 -0.209351023351956 -0.0948866052629332\n209 209 0.948254398587786 0.148035769503489\n210 210 -0.645842405513505 -0.571836365758735\n211 211 -0.341295789839788 0.713244941767623\n212 212 0.662118020652933 -1.22995421423467\n213 213 -1.05857786323882 0.0897524096764291\n214 214 -0.778042671629221 -0.278468921484683\n215 215 1.22107687285223 1.21869820953845\n216 216 -1.21476298713056 -0.758578615621622\n217 217 1.30022895329161 1.36168696120916\n218 218 1.29409391113734 0.705917131098944\n219 219 -1.00259273604201 -1.07113678345588\n220 220 0.82759989331864 0.536929026455464\n221 221 -0.997619692500241 -1.73114849603647\n222 222 0.932630130413901 0.78082733464099\n223 223 -0.0568079370743356 0.306833710833381\n224 224 1.07078853863247 0.235644832856485\n225 225 -0.878845195403184 -0.262793896775195\n226 226 0.173927378306735 -0.388407346972015\n227 227 0.152001500987953 1.91286468229403\n228 228 -0.118029235005445 -1.10432942001368\n229 229 0.668677093920131 0.0682515661339391\n230 230 -0.986068075843878 -1.7924446101623\n231 231 -0.638189951124171 -0.52191017460648\n232 232 0.197008131055164 0.616574004663493\n233 233 0.0641541307571732 -0.633902784236753\n234 234 1.44314263446451 1.15961150166424\n235 235 -2.14144160123174 -2.2518725440883\n236 236 0.164937563582549 0.789070903671559\n237 237 -0.211441573032738 -1.37962886952607\n238 238 0.122209498375711 1.06401605887066\n239 239 0.404152731207434 0.360589609073474\n240 240 -0.661421846239432 0.172077306941891\n241 241 0.350246959829443 -1.60470497742995\n242 242 -0.102366085106328 -0.0709881642276646\n243 243 0.241955743295996 0.576852382824044\n244 244 -0.40473713391172 -1.07997472892628\n245 245 -0.572093717322186 0.100529726362697\n246 246 -1.0107055115864 -0.547988289844201\n247 247 1.45918006019184 0.660871721892713\n248 248 0.0290306485101474 -0.814784839126176\n249 249 -0.311786315045172 0.167922092543147\n250 250 0.159157749619645 -0.0782928496695848\n251 251 0.298597154717647 0.436117416649192\n252 252 0.727937895616386 0.00668361955307917\n253 253 0.255383276328498 0.503481625578282\n254 254 -1.14073218073273 -0.368151547937105\n255 255 0.62186689571353 0.6197590990651\n256 256 1.60754780729681 -0.230602646961016\n257 257 1.36612892184012 0.509177236220553\n258 258 -0.17835978703554 -1.22084401906767\n259 259 0.734318792628253 0.94835378968232\n260 260 -0.554150763377311 -2.83632688909111\n261 261 -1.34807262732901 -2.13520534222759\n262 262 -1.30602233938388 -1.36331046931889\n263 263 0.157552245303531 -0.945871858236152\n264 264 -0.378485119558589 -0.24846719242668\n265 265 -1.29759462322226 -1.28542417440431\n266 266 -0.112162859740148 -1.42400346960075\n267 267 0.0319028692133309 -0.143128209114543\n268 268 0.0329971668044517 -0.788254983707128\n269 269 -2.15328238033762 -0.602605195669155\n270 270 -0.268561460565615 0.255528693190225\n271 271 -0.37700865407203 0.123795636414963\n272 272 -0.647681343010691 -0.0873696471965655\n273 273 0.48085051264185 1.27886089783222\n274 274 -0.0863718720740099 -0.189965371237141\n275 275 -0.577723909217335 -0.459218791691393\n276 276 -0.0642922550342538 0.261575763198014\n277 277 0.283276349600993 -0.449390933664188\n278 278 -0.719999871944151 -1.21245981759747\n279 279 0.398917842857218 -0.687190068446156\n280 280 -0.585983681331922 -0.955004739381687\n281 281 0.632640074811404 1.1319661207624\n282 282 -0.332296300590307 -0.481474852783556\n283 283 1.24105669045869 1.39772599908205\n284 284 0.60411314341004 1.45030139833737\n285 285 -0.262533617238517 -1.29592042189868\n286 286 1.0782966724744 0.224757351161194\n287 287 0.375642795853784 0.865080603311589\n288 288 -0.635451819284519 -0.584950334594471\n289 289 -0.0794197303450192 -0.544824440342012\n290 290 -0.391647046085526 1.05466407410057\n291 291 0.746920858892639 0.141390319582209\n292 292 0.174757162932909 0.0431735424655879\n293 293 -1.88125948300107 -1.74328408616805\n294 294 -0.792933999764447 -0.859898846612656\n295 295 -1.97522103278445 -2.14003373808461\n296 296 0.947755627232544 -0.863782160598166\n297 297 1.43280466086758 0.971878727763115\n298 298 0.864435589061877 -0.0944046128667593\n299 299 -0.00918736103969609 -0.42457248714019\n300 300 -0.291134070040785 0.156895831614518\n301 301 -1.01793787032963 -1.13451805794859\n302 302 -2.41767296199479 -2.51027080485175\n303 303 0.90975365148871 -0.295416703422529\n304 304 0.00472194091889469 0.684227815501152\n305 305 0.757484536788318 -0.252724822707596\n306 306 2.68342594840941 3.22129555834226\n307 307 -1.47397795674507 0.0655794205012332\n308 308 -0.615122248015615 -0.747272395276123\n309 309 -0.763134711595199 0.479560083833197\n310 310 -0.282135287850453 -0.0734428079165186\n311 311 -0.523049782720623 -0.540763850960254\n312 312 0.310621119271665 -0.728862242403757\n313 313 0.133048454536538 1.50713613471873\n314 314 -0.653706464545025 -0.798806385558514\n315 315 -0.114236294075412 0.0546801088322116\n316 316 0.292020057364464 -0.631135873978695\n317 317 1.18938474823704 -0.39028550769223\n318 318 -0.0312650731831535 0.584947591213862\n319 319 0.00887339861556713 0.314512559170236\n320 320 -0.380929811867056 -1.70738447701585\n321 321 -0.52058291401555 -0.260867706649619\n322 322 -2.16053908474117 -1.28838966290168\n323 323 0.726713067234621 2.24691954684607\n324 324 -0.606561923596174 -0.0914334086129591\n325 325 0.544013014174095 -0.865107331509786\n326 326 0.955045058453847 0.203630981513071\n327 327 0.560592666217991 0.484825432668592\n328 328 -0.327752444014553 -0.842104917773965\n329 329 -1.59123845281852 -1.12021839176868\n330 330 -2.07483814869395 -1.79470709475569\n331 331 1.05244467218929 0.182000352523624\n332 332 -0.664191903404875 -0.186883124790877\n333 333 1.21008686273248 0.219592839108128\n334 334 0.413022019819573 -0.268098837611482\n335 335 0.0422280336978795 -0.179623002537761\n336 336 0.477120730310813 -0.446762180563648\n337 337 -0.568223885079031 -1.51167377356618\n338 338 0.519928383814578 -0.0391126750678009\n339 339 -2.00309717324039 -1.08186232237306\n340 340 -0.433534755849648 1.17327729178832\n341 341 -0.00378810769496557 1.23024602577198\n342 342 2.31053344424751 1.59313832899737\n343 343 1.80889515476201 0.0998280113430569\n344 344 -0.271006937008757 1.26456200258843\n345 345 -1.13765326547872 -1.9266689414778\n346 346 0.966578134757589 1.89681338904382\n347 347 0.0266319999198697 0.12947094522922\n348 348 -0.64417803887998 -1.21725478837165\n349 349 -0.848704931186809 -1.0790489589685\n350 350 -0.789348470705069 -0.56855290765307\n351 351 0.0568787682696287 0.778941309946098\n352 352 -0.404167121170396 0.261426782843497\n353 353 0.599169489486594 -0.251353186284818\n354 354 -1.64392413329453 -1.41435768483274\n355 355 -1.61030044520426 -1.9082486009141\n356 356 -1.41648068460129 -1.20293527150969\n357 357 -1.98524086037857 -1.36431496381439\n358 358 -1.16234995642306 -1.50505401496676\n359 359 0.906014336241588 0.883254683920683\n360 360 -0.120663318433096 -1.06230400849685\n361 361 -0.404735456099786 -0.0827256299965876\n362 362 2.28415232253025 2.43185831726563\n363 363 -0.277750113149245 -1.09564243491786\n364 364 -1.43759960759041 -1.91927954999413\n365 365 0.517492255903491 0.163452435167785\n366 366 0.0409231818719817 0.82466451440847\n367 367 0.607080685056537 -0.0116689613015914\n368 368 0.346303068348099 0.963784166003815\n369 369 0.799888006661189 1.15922046221216\n370 370 -0.200658067955632 -0.777948063573182\n371 371 -0.838552588394187 0.436409291416985\n372 372 1.23010938197522 0.633037531507123\n373 373 0.451186523228701 1.30706174499908\n374 374 -0.270558948178047 1.07548009305743\n375 375 -1.11030125107289 -0.613917996247648\n376 376 0.99867624624863 0.43248481571298\n377 377 -0.944635733669833 -2.57000960039567\n378 378 -0.345150360564216 -0.262612324483112\n379 379 0.231225309396651 -0.148069994302184\n380 380 1.19488394763505 0.407445818725747\n381 381 0.421237213096837 1.29168791564525\n382 382 -1.17224093491159 -1.65030352659117\n383 383 -0.150362722343577 0.664688399394185\n384 384 -0.911942086647952 -1.29935956493263\n385 385 -0.550435701050605 0.00996277569490833\n386 386 -0.503452926922047 -0.977572202467078\n387 387 -0.840437479519366 -1.24690421568669\n388 388 2.72565962844896 2.16469111759104\n389 389 -0.342084855015524 -0.891313460113479\n390 390 -0.0981646867577042 1.66825888406037\n391 391 -0.300238776909948 -0.0323886291964357\n392 392 -0.445814191574771 -0.451543558138376\n393 393 -2.35159528945653 -2.09382102637383\n394 394 0.771347458438544 0.632248517764153\n395 395 1.51073283708396 -0.332959071293271\n396 396 0.0966520620608959 0.435762577755818\n397 397 -0.318708566172272 0.0792983338274879\n398 398 -0.205588999330254 -0.899203470644444\n399 399 1.02270253018406 2.52010320454192\n400 400 0.176860283634206 0.99581811996131\n401 401 1.41839609903773 0.471544708439966\n402 402 1.55159790192982 -0.173816204502474\n403 403 1.93445877775084 1.56420930381227\n404 404 1.36456851123372 -0.286294743401895\n405 405 -0.6945847993009 0.147099044069608\n406 406 1.56216890070534 0.251227211152395\n407 407 1.36800089619423 1.81218092067337\n408 408 -0.263676614327316 -0.111685103193269\n409 409 0.0179108059498879 -0.919265891275284\n410 410 -0.307261985899011 -2.23149808636095\n411 411 -0.163969947581505 -0.236523608175905\n412 412 -0.120449337781418 0.896576482064549\n413 413 0.0739026611409629 1.06472909089829\n414 414 1.10867285753491 0.785042317541263\n415 415 1.46298257470589 1.41564218144868\n416 416 1.16431140493005 0.670593830253462\n417 417 1.17529765650528 0.817968052859188\n418 418 -0.814854327992495 -0.418607403904381\n419 419 2.47723943340456 2.49031917643836\n420 420 -0.0244212103557031 0.312995467662414\n421 421 0.806638213205707 0.356370937482444\n422 422 -1.62405579119699 -0.537954266398098\n423 423 0.957904112604383 0.291943665966988\n424 424 -0.812805871869217 -1.1582721319521\n425 425 -0.996674982917241 -0.655419374338246\n426 426 0.113649695666726 -0.366980808329557\n427 427 0.894610854068111 1.1136191426835\n428 428 1.20288685709081 0.638884320637003\n429 429 -1.13683741901365 -0.36027044117294\n430 430 -0.129060905564338 -0.180888444348373\n431 431 0.425381045801254 -0.295400589763373\n432 432 1.89515871261082 1.49062217238053\n433 433 0.233818384451627 -0.972204780435252\n434 434 -0.28595441088866 -1.35168392568458\n435 435 0.2703210749173 -0.444959487771617\n436 436 -1.56414434282183 -0.0863816258062674\n437 437 2.04419819063683 1.91366743170003\n438 438 0.84728785689777 1.34007526149256\n439 439 -0.476344798814009 -0.415780404846241\n440 440 0.00590700828159529 0.0815516311831695\n441 441 0.083078642301709 1.23675983324076\n442 442 -1.6028594029328 -1.60925772902573\n443 443 0.681131471138643 2.20895277775767\n444 444 -0.234861802575625 0.0901511911648287\n445 445 0.424684341700282 0.434940188281203\n446 446 -0.835902000254762 0.0397065459202534\n447 447 1.11036363363419 0.216098787738172\n448 448 -1.12099310415726 -1.07651593723548\n449 449 1.6340049762297 2.04681289330294\n450 450 0.998408676587167 1.00481750170249\n451 451 0.413689828973942 -0.514524722457101\n452 452 -1.44558897330313 0.129757035586368\n453 453 1.20796668073352 0.567121006271089\n454 454 -1.29743961756537 -0.375012816916177\n455 455 -0.565344622118762 -0.731344444270503\n456 456 -0.0856730764892192 -0.366819526081972\n457 457 0.428172094069993 0.0273966031800197\n458 458 0.186470987165479 0.760848872452122\n459 459 0.259371700050186 -0.501625551500019\n460 460 -1.64360010971628 -1.5828290417955\n461 461 0.734163267878191 0.642535470348961\n462 462 1.03827780061464 1.72965834140257\n463 463 -0.548596685640441 -0.318234975328041\n464 464 0.593701471751184 0.493491384986627\n465 465 -0.845456340462967 0.0420851950366854\n466 466 0.218498725360759 0.246976588078252\n467 467 -1.42799921095182 -1.377473806456\n468 468 -0.690670805163897 1.14500512746399\n469 469 -1.37016414798422 0.471825842492376\n470 470 -2.16041725986498 -1.59664099346357\n471 471 1.33905008715574 0.432270114438061\n472 472 0.163608946537458 -0.690435257310719\n473 473 0.935034634622461 1.52887578295159\n474 474 -0.395808329051314 -0.115243233858534\n475 475 2.04696381576314 2.05384325934844\n476 476 -0.907980731362225 -1.18512743230553\n477 477 0.503297960174636 0.0187769439792987\n478 478 -1.48550762425544 -1.47956369843223\n479 479 0.0666068333457948 1.35033923857239\n480 480 0.920664330425592 0.888008561134299\n481 481 -1.69266858919314 -0.204902976272\n482 482 0.427795287676371 -0.396576743545858\n483 483 -1.64292333261635 -1.13354208818298\n484 484 -0.701072352663313 -0.597049529192512\n485 485 0.112547088195915 1.06099877308249\n486 486 -0.74472232004593 -0.377658612540862\n487 487 1.12383483725755 0.560401527099076\n488 488 -1.42854840668183 -0.243672850363049\n489 489 0.607078625074716 1.18692220136358\n490 490 -0.274271192221796 0.848095718841269\n491 491 1.32274201252664 0.38412405018288\n492 492 -0.0233621288703684 0.272238343116884\n493 493 1.89704044055135 1.07767259562204\n494 494 -0.541441306194392 0.0771998886816035\n495 495 0.833091178133436 -0.708760130304379\n496 496 0.44386192196959 -1.12413404785683\n497 497 0.926963710431585 0.875564184000742\n498 498 -0.671209470055134 0.0950948055928938\n499 499 0.236038898960188 -0.865160384289287\n500 500 -1.30064654680219 0.0686997377448884\n"
  },
  {
    "path": "example/phenotype_bin.txt",
    "content": "FID IID Y1 Y2\n1 1 1 1\n2 2 0 0\n3 3 0 0\n4 4 0 0\n5 5 0 1\n6 6 0 1\n7 7 0 0\n8 8 1 0\n9 9 1 1\n10 10 0 1\n11 11 1 0\n12 12 0 0\n13 13 0 0\n14 14 0 0\n15 15 0 0\n16 16 0 0\n17 17 0 0\n18 18 0 0\n19 19 1 0\n20 20 0 0\n21 21 0 0\n22 22 0 0\n23 23 0 0\n24 24 0 0\n25 25 1 0\n26 26 0 0\n27 27 0 0\n28 28 0 0\n29 29 0 0\n30 30 0 0\n31 31 0 0\n32 32 1 0\n33 33 0 1\n34 34 0 1\n35 35 1 1\n36 36 0 0\n37 37 0 0\n38 38 0 0\n39 39 0 1\n40 40 0 0\n41 41 1 0\n42 42 1 1\n43 43 1 1\n44 44 0 0\n45 45 0 0\n46 46 1 1\n47 47 1 0\n48 48 0 0\n49 49 0 0\n50 50 0 0\n51 51 0 0\n52 52 0 0\n53 53 0 0\n54 54 0 0\n55 55 1 0\n56 56 1 1\n57 57 0 0\n58 58 0 0\n59 59 0 0\n60 60 0 0\n61 61 0 1\n62 62 0 0\n63 63 1 1\n64 64 1 1\n65 65 0 0\n66 66 0 0\n67 67 0 1\n68 68 0 0\n69 69 0 0\n70 70 0 1\n71 71 0 0\n72 72 0 0\n73 73 0 0\n74 74 0 0\n75 75 0 0\n76 76 0 0\n77 77 0 0\n78 78 0 0\n79 79 0 0\n80 80 0 0\n81 81 0 0\n82 82 0 1\n83 83 0 1\n84 84 0 0\n85 85 0 0\n86 86 0 0\n87 87 1 1\n88 88 0 0\n89 89 1 1\n90 90 0 1\n91 91 1 1\n92 92 0 0\n93 93 0 0\n94 94 0 0\n95 95 0 0\n96 96 1 0\n97 97 0 1\n98 98 0 0\n99 99 0 0\n100 100 0 0\n101 101 0 0\n102 102 0 1\n103 103 0 0\n104 104 0 0\n105 105 0 0\n106 106 0 1\n107 107 0 0\n108 108 0 0\n109 109 1 0\n110 110 0 1\n111 111 0 0\n112 112 0 0\n113 113 1 0\n114 114 0 0\n115 115 0 0\n116 116 0 0\n117 117 0 0\n118 118 0 0\n119 119 1 1\n120 120 0 0\n121 121 0 0\n122 122 0 0\n123 123 0 0\n124 124 1 1\n125 125 0 1\n126 126 0 0\n127 127 0 0\n128 128 0 0\n129 129 1 0\n130 130 0 0\n131 131 1 1\n132 132 0 0\n133 133 1 1\n134 134 0 0\n135 135 0 0\n136 136 0 0\n137 137 0 1\n138 138 0 1\n139 139 1 0\n140 140 0 0\n141 141 0 0\n142 142 1 0\n143 143 1 1\n144 144 0 0\n145 145 1 1\n146 146 0 0\n147 147 0 0\n148 148 1 0\n149 149 0 0\n150 150 0 1\n151 151 0 0\n152 152 0 0\n153 153 1 0\n154 154 0 0\n155 155 1 1\n156 156 1 1\n157 157 0 0\n158 158 1 0\n159 159 0 0\n160 160 0 0\n161 161 0 0\n162 162 1 1\n163 163 1 1\n164 164 0 1\n165 165 1 1\n166 166 0 0\n167 167 1 0\n168 168 0 1\n169 169 1 1\n170 170 1 1\n171 171 0 0\n172 172 0 0\n173 173 0 0\n174 174 0 0\n175 175 0 0\n176 176 0 0\n177 177 0 1\n178 178 0 0\n179 179 0 0\n180 180 0 0\n181 181 1 1\n182 182 0 1\n183 183 1 0\n184 184 0 0\n185 185 1 1\n186 186 1 0\n187 187 0 0\n188 188 0 0\n189 189 0 0\n190 190 0 0\n191 191 0 0\n192 192 0 0\n193 193 1 1\n194 194 0 0\n195 195 0 0\n196 196 0 0\n197 197 1 1\n198 198 0 0\n199 199 0 1\n200 200 0 0\n201 201 0 0\n202 202 0 0\n203 203 0 0\n204 204 0 0\n205 205 1 0\n206 206 1 1\n207 207 1 1\n208 208 0 0\n209 209 0 1\n210 210 0 0\n211 211 0 1\n212 212 0 0\n213 213 0 0\n214 214 0 0\n215 215 0 0\n216 216 0 0\n217 217 1 0\n218 218 0 1\n219 219 0 0\n220 220 1 0\n221 221 0 0\n222 222 1 0\n223 223 0 0\n224 224 1 0\n225 225 0 0\n226 226 0 0\n227 227 0 1\n228 228 0 0\n229 229 0 0\n230 230 0 0\n231 231 0 0\n232 232 1 1\n233 233 0 0\n234 234 1 0\n235 235 0 0\n236 236 1 1\n237 237 0 0\n238 238 1 1\n239 239 0 0\n240 240 0 0\n241 241 0 0\n242 242 0 0\n243 243 1 1\n244 244 1 0\n245 245 0 0\n246 246 0 0\n247 247 1 0\n248 248 0 0\n249 249 0 0\n250 250 1 0\n251 251 0 0\n252 252 0 1\n253 253 1 1\n254 254 0 0\n255 255 0 0\n256 256 0 0\n257 257 0 1\n258 258 0 0\n259 259 0 0\n260 260 0 0\n261 261 0 0\n262 262 0 0\n263 263 0 0\n264 264 0 0\n265 265 0 0\n266 266 0 0\n267 267 1 1\n268 268 0 1\n269 269 0 0\n270 270 0 0\n271 271 1 0\n272 272 0 0\n273 273 1 0\n274 274 0 0\n275 275 0 0\n276 276 0 0\n277 277 0 0\n278 278 0 0\n279 279 0 0\n280 280 0 0\n281 281 0 1\n282 282 0 0\n283 283 0 1\n284 284 1 1\n285 285 0 0\n286 286 0 0\n287 287 0 0\n288 288 0 0\n289 289 0 0\n290 290 1 0\n291 291 0 0\n292 292 0 0\n293 293 0 0\n294 294 0 0\n295 295 0 0\n296 296 1 1\n297 297 1 1\n298 298 0 0\n299 299 0 0\n300 300 0 0\n301 301 0 0\n302 302 0 0\n303 303 1 1\n304 304 0 0\n305 305 0 0\n306 306 1 1\n307 307 0 0\n308 308 0 0\n309 309 1 1\n310 310 0 0\n311 311 0 0\n312 312 0 0\n313 313 1 0\n314 314 0 0\n315 315 0 0\n316 316 0 0\n317 317 1 1\n318 318 0 0\n319 319 0 0\n320 320 0 0\n321 321 0 1\n322 322 0 0\n323 323 1 1\n324 324 0 0\n325 325 0 0\n326 326 0 1\n327 327 0 1\n328 328 0 0\n329 329 0 0\n330 330 0 0\n331 331 0 0\n332 332 0 0\n333 333 0 0\n334 334 1 0\n335 335 0 0\n336 336 1 0\n337 337 0 0\n338 338 1 1\n339 339 0 0\n340 340 0 0\n341 341 0 0\n342 342 1 1\n343 343 0 1\n344 344 0 0\n345 345 0 0\n346 346 1 1\n347 347 0 0\n348 348 0 0\n349 349 0 0\n350 350 1 0\n351 351 0 0\n352 352 0 0\n353 353 1 0\n354 354 0 0\n355 355 0 0\n356 356 0 0\n357 357 0 0\n358 358 0 0\n359 359 0 1\n360 360 0 0\n361 361 0 0\n362 362 1 1\n363 363 0 0\n364 364 0 0\n365 365 0 0\n366 366 0 0\n367 367 0 0\n368 368 0 1\n369 369 0 0\n370 370 0 0\n371 371 0 0\n372 372 0 1\n373 373 1 1\n374 374 0 1\n375 375 0 0\n376 376 0 0\n377 377 0 0\n378 378 0 0\n379 379 1 0\n380 380 0 0\n381 381 0 0\n382 382 0 0\n383 383 0 0\n384 384 0 0\n385 385 0 0\n386 386 0 0\n387 387 0 0\n388 388 0 0\n389 389 0 0\n390 390 0 0\n391 391 0 0\n392 392 0 0\n393 393 0 0\n394 394 0 0\n395 395 1 1\n396 396 0 0\n397 397 1 0\n398 398 0 0\n399 399 0 0\n400 400 0 0\n401 401 0 0\n402 402 1 0\n403 403 1 1\n404 404 0 0\n405 405 1 0\n406 406 1 0\n407 407 0 1\n408 408 0 0\n409 409 1 0\n410 410 0 0\n411 411 0 0\n412 412 0 1\n413 413 0 0\n414 414 1 0\n415 415 1 1\n416 416 1 1\n417 417 1 1\n418 418 0 0\n419 419 1 1\n420 420 0 0\n421 421 1 0\n422 422 0 0\n423 423 0 0\n424 424 0 0\n425 425 0 1\n426 426 0 0\n427 427 0 0\n428 428 1 0\n429 429 0 0\n430 430 0 0\n431 431 0 0\n432 432 0 0\n433 433 0 0\n434 434 0 0\n435 435 0 0\n436 436 0 0\n437 437 1 1\n438 438 1 1\n439 439 0 0\n440 440 0 0\n441 441 0 1\n442 442 0 0\n443 443 1 0\n444 444 0 0\n445 445 0 0\n446 446 0 0\n447 447 1 0\n448 448 0 0\n449 449 1 0\n450 450 1 1\n451 451 0 0\n452 452 0 0\n453 453 1 1\n454 454 0 0\n455 455 0 0\n456 456 1 0\n457 457 0 0\n458 458 1 0\n459 459 1 0\n460 460 0 0\n461 461 0 0\n462 462 1 1\n463 463 0 0\n464 464 0 1\n465 465 0 0\n466 466 0 0\n467 467 0 0\n468 468 0 0\n469 469 0 0\n470 470 0 0\n471 471 0 0\n472 472 0 1\n473 473 1 1\n474 474 0 0\n475 475 1 1\n476 476 0 0\n477 477 0 0\n478 478 0 0\n479 479 0 0\n480 480 1 1\n481 481 0 0\n482 482 0 0\n483 483 0 0\n484 484 0 0\n485 485 1 0\n486 486 1 0\n487 487 0 0\n488 488 0 0\n489 489 1 1\n490 490 0 0\n491 491 0 0\n492 492 0 0\n493 493 1 1\n494 494 0 1\n495 495 0 0\n496 496 0 0\n497 497 0 0\n498 498 0 0\n499 499 0 0\n500 500 1 0\n"
  },
  {
    "path": "example/phenotype_bin_wNA.txt",
    "content": "FID IID Y1 Y2\n1 1 1 1\n2 2 0 0\n3 3 0 0\n4 4 0 0\n5 5 0 1\n6 6 NA 1\n7 7 0 0\n8 8 1 0\n9 9 1 1\n10 10 0 1\n11 11 1 0\n12 12 0 0\n13 13 0 0\n14 14 0 0\n15 15 0 0\n16 16 NA 0\n17 17 0 0\n18 18 0 0\n19 19 1 0\n20 20 0 0\n21 21 0 0\n22 22 0 0\n23 23 0 0\n24 24 0 0\n25 25 1 0\n26 26 0 0\n27 27 0 0\n28 28 0 0\n29 29 NA 0\n30 30 NA 0\n31 31 0 0\n32 32 1 0\n33 33 0 1\n34 34 0 1\n35 35 1 1\n36 36 0 0\n37 37 0 0\n38 38 0 0\n39 39 0 1\n40 40 0 0\n41 41 1 0\n42 42 NA 1\n43 43 1 1\n44 44 0 0\n45 45 0 0\n46 46 1 1\n47 47 1 0\n48 48 0 0\n49 49 0 0\n50 50 0 0\n51 51 0 0\n52 52 0 0\n53 53 0 0\n54 54 0 0\n55 55 1 0\n56 56 1 1\n57 57 0 0\n58 58 0 0\n59 59 0 0\n60 60 0 0\n61 61 0 1\n62 62 0 0\n63 63 1 1\n64 64 1 1\n65 65 NA 0\n66 66 0 0\n67 67 0 1\n68 68 0 0\n69 69 0 0\n70 70 0 1\n71 71 NA 0\n72 72 0 0\n73 73 NA 0\n74 74 0 0\n75 75 0 0\n76 76 0 0\n77 77 0 0\n78 78 0 0\n79 79 0 0\n80 80 0 0\n81 81 0 0\n82 82 0 1\n83 83 0 1\n84 84 0 0\n85 85 0 0\n86 86 0 0\n87 87 1 1\n88 88 0 0\n89 89 1 1\n90 90 0 1\n91 91 1 1\n92 92 0 0\n93 93 0 0\n94 94 0 0\n95 95 0 0\n96 96 1 0\n97 97 0 1\n98 98 0 0\n99 99 0 0\n100 100 0 0\n101 101 0 0\n102 102 NA 1\n103 103 0 0\n104 104 0 0\n105 105 0 0\n106 106 0 1\n107 107 0 0\n108 108 0 0\n109 109 NA 0\n110 110 0 1\n111 111 0 0\n112 112 0 0\n113 113 1 0\n114 114 0 0\n115 115 0 0\n116 116 0 0\n117 117 0 0\n118 118 0 0\n119 119 1 1\n120 120 0 0\n121 121 0 0\n122 122 0 0\n123 123 0 0\n124 124 1 1\n125 125 0 1\n126 126 0 0\n127 127 0 0\n128 128 0 0\n129 129 1 0\n130 130 0 0\n131 131 1 1\n132 132 0 0\n133 133 1 1\n134 134 0 0\n135 135 0 0\n136 136 0 0\n137 137 0 1\n138 138 0 1\n139 139 1 0\n140 140 0 0\n141 141 0 0\n142 142 1 0\n143 143 NA 1\n144 144 0 0\n145 145 1 1\n146 146 0 0\n147 147 0 0\n148 148 1 0\n149 149 0 0\n150 150 0 1\n151 151 0 0\n152 152 0 0\n153 153 NA 0\n154 154 0 0\n155 155 1 1\n156 156 1 1\n157 157 0 0\n158 158 1 0\n159 159 NA 0\n160 160 0 0\n161 161 0 0\n162 162 1 1\n163 163 1 1\n164 164 NA 1\n165 165 1 1\n166 166 0 0\n167 167 1 0\n168 168 0 1\n169 169 1 1\n170 170 1 1\n171 171 0 0\n172 172 0 0\n173 173 NA 0\n174 174 NA 0\n175 175 0 0\n176 176 0 0\n177 177 0 1\n178 178 0 0\n179 179 0 0\n180 180 NA 0\n181 181 1 1\n182 182 NA 1\n183 183 1 0\n184 184 0 0\n185 185 1 1\n186 186 1 0\n187 187 0 0\n188 188 0 0\n189 189 0 0\n190 190 0 0\n191 191 0 0\n192 192 0 0\n193 193 NA 1\n194 194 0 0\n195 195 0 0\n196 196 0 0\n197 197 1 1\n198 198 0 0\n199 199 0 1\n200 200 0 0\n201 201 0 0\n202 202 0 0\n203 203 NA 0\n204 204 0 0\n205 205 1 0\n206 206 1 1\n207 207 1 1\n208 208 0 0\n209 209 0 1\n210 210 0 0\n211 211 0 1\n212 212 0 0\n213 213 0 0\n214 214 0 0\n215 215 0 0\n216 216 0 0\n217 217 1 0\n218 218 NA 1\n219 219 0 0\n220 220 1 0\n221 221 0 0\n222 222 1 0\n223 223 0 0\n224 224 1 0\n225 225 0 0\n226 226 0 0\n227 227 0 1\n228 228 0 0\n229 229 0 0\n230 230 0 0\n231 231 0 0\n232 232 NA 1\n233 233 0 0\n234 234 1 0\n235 235 0 0\n236 236 1 1\n237 237 0 0\n238 238 NA 1\n239 239 0 0\n240 240 0 0\n241 241 0 0\n242 242 NA 0\n243 243 1 1\n244 244 1 0\n245 245 0 0\n246 246 0 0\n247 247 1 0\n248 248 NA 0\n249 249 0 0\n250 250 1 0\n251 251 0 0\n252 252 0 1\n253 253 1 1\n254 254 0 0\n255 255 0 0\n256 256 0 0\n257 257 0 1\n258 258 0 0\n259 259 0 0\n260 260 0 0\n261 261 0 0\n262 262 0 0\n263 263 0 0\n264 264 0 0\n265 265 0 0\n266 266 0 0\n267 267 1 1\n268 268 0 1\n269 269 0 0\n270 270 0 0\n271 271 1 0\n272 272 0 0\n273 273 1 0\n274 274 0 0\n275 275 0 0\n276 276 0 0\n277 277 0 0\n278 278 0 0\n279 279 NA 0\n280 280 0 0\n281 281 0 1\n282 282 0 0\n283 283 0 1\n284 284 1 1\n285 285 0 0\n286 286 0 0\n287 287 0 0\n288 288 0 0\n289 289 0 0\n290 290 1 0\n291 291 NA 0\n292 292 0 0\n293 293 0 0\n294 294 0 0\n295 295 0 0\n296 296 1 1\n297 297 1 1\n298 298 0 0\n299 299 0 0\n300 300 NA 0\n301 301 0 0\n302 302 NA 0\n303 303 1 1\n304 304 0 0\n305 305 0 0\n306 306 1 1\n307 307 NA 0\n308 308 0 0\n309 309 NA 1\n310 310 NA 0\n311 311 0 0\n312 312 0 0\n313 313 1 0\n314 314 0 0\n315 315 0 0\n316 316 0 0\n317 317 1 1\n318 318 0 0\n319 319 0 0\n320 320 NA 0\n321 321 0 1\n322 322 0 0\n323 323 1 1\n324 324 0 0\n325 325 0 0\n326 326 0 1\n327 327 0 1\n328 328 0 0\n329 329 0 0\n330 330 0 0\n331 331 NA 0\n332 332 0 0\n333 333 0 0\n334 334 1 0\n335 335 0 0\n336 336 1 0\n337 337 0 0\n338 338 1 1\n339 339 0 0\n340 340 0 0\n341 341 0 0\n342 342 1 1\n343 343 0 1\n344 344 0 0\n345 345 0 0\n346 346 1 1\n347 347 0 0\n348 348 0 0\n349 349 0 0\n350 350 1 0\n351 351 NA 0\n352 352 0 0\n353 353 1 0\n354 354 0 0\n355 355 0 0\n356 356 0 0\n357 357 0 0\n358 358 NA 0\n359 359 0 1\n360 360 0 0\n361 361 0 0\n362 362 1 1\n363 363 0 0\n364 364 0 0\n365 365 0 0\n366 366 0 0\n367 367 0 0\n368 368 0 1\n369 369 0 0\n370 370 0 0\n371 371 0 0\n372 372 NA 1\n373 373 1 1\n374 374 NA 1\n375 375 0 0\n376 376 0 0\n377 377 0 0\n378 378 0 0\n379 379 1 0\n380 380 0 0\n381 381 0 0\n382 382 0 0\n383 383 0 0\n384 384 0 0\n385 385 0 0\n386 386 0 0\n387 387 0 0\n388 388 0 0\n389 389 0 0\n390 390 0 0\n391 391 0 0\n392 392 0 0\n393 393 0 0\n394 394 0 0\n395 395 1 1\n396 396 NA 0\n397 397 NA 0\n398 398 0 0\n399 399 0 0\n400 400 0 0\n401 401 0 0\n402 402 1 0\n403 403 1 1\n404 404 0 0\n405 405 NA 0\n406 406 1 0\n407 407 NA 1\n408 408 0 0\n409 409 1 0\n410 410 NA 0\n411 411 0 0\n412 412 0 1\n413 413 0 0\n414 414 1 0\n415 415 1 1\n416 416 1 1\n417 417 1 1\n418 418 0 0\n419 419 1 1\n420 420 0 0\n421 421 1 0\n422 422 0 0\n423 423 NA 0\n424 424 0 0\n425 425 0 1\n426 426 0 0\n427 427 NA 0\n428 428 1 0\n429 429 0 0\n430 430 0 0\n431 431 0 0\n432 432 0 0\n433 433 0 0\n434 434 0 0\n435 435 0 0\n436 436 0 0\n437 437 1 1\n438 438 1 1\n439 439 0 0\n440 440 0 0\n441 441 0 1\n442 442 0 0\n443 443 1 0\n444 444 0 0\n445 445 0 0\n446 446 NA 0\n447 447 1 0\n448 448 0 0\n449 449 1 0\n450 450 1 1\n451 451 0 0\n452 452 0 0\n453 453 1 1\n454 454 0 0\n455 455 0 0\n456 456 1 0\n457 457 0 0\n458 458 1 0\n459 459 1 0\n460 460 0 0\n461 461 0 0\n462 462 1 1\n463 463 0 0\n464 464 NA 1\n465 465 0 0\n466 466 0 0\n467 467 0 0\n468 468 0 0\n469 469 0 0\n470 470 0 0\n471 471 0 0\n472 472 0 1\n473 473 1 1\n474 474 0 0\n475 475 1 1\n476 476 0 0\n477 477 NA 0\n478 478 0 0\n479 479 0 0\n480 480 1 1\n481 481 0 0\n482 482 0 0\n483 483 0 0\n484 484 NA 0\n485 485 1 0\n486 486 1 0\n487 487 0 0\n488 488 0 0\n489 489 1 1\n490 490 0 0\n491 491 0 0\n492 492 0 0\n493 493 1 1\n494 494 0 1\n495 495 0 0\n496 496 0 0\n497 497 0 0\n498 498 0 0\n499 499 NA 0\n500 500 1 0\n"
  },
  {
    "path": "example/snplist_rm.txt",
    "content": "114\n191\n307\n310\n499\n500\n"
  },
  {
    "path": "example/test_bin_out_firth_Y1.regenie",
    "content": "CHROM GENPOS ID ALLELE0 ALLELE1 A1FREQ INFO N TEST BETA SE CHISQ LOG10P EXTRA\n1 1 1 2 1 0.214575 1 494 ADD 0.0775674 0.230001 0.113736 0.133163 NA\n1 2 2 2 1 0.218623 1 494 ADD 0.131068 0.239808 0.29872 0.233077 NA\n1 3 3 2 1 0.211538 1 494 ADD -0.256723 0.244611 1.10148 0.531739 NA\n1 4 4 2 1 0.191296 1 494 ADD -0.131175 0.250523 0.274164 0.221449 NA\n1 5 5 2 1 0.195344 1 494 ADD -0.187228 0.235372 0.632751 0.370236 NA\n1 6 6 2 1 0.190283 1 494 ADD -0.234935 0.245557 0.91536 0.47019 NA\n1 7 7 2 1 0.206478 1 494 ADD 0.11647 0.227747 0.26153 0.215332 NA\n1 8 8 2 1 0.188259 1 494 ADD -0.353772 0.251712 1.97533 0.796197 NA\n1 9 9 2 1 0.194332 1 494 ADD 0.283254 0.241072 1.38057 0.619781 NA\n1 10 10 2 1 0.210526 1 494 ADD 0.0244317 0.236825 0.0106427 0.0372363 NA\n1 11 11 2 1 0.183198 1 494 ADD -0.750767 0.263409 8.68164 2.49291 NA\n1 12 12 2 1 0.20749 1 494 ADD 0.150619 0.229383 0.431159 0.291222 NA\n1 13 13 2 1 0.198381 1 494 ADD 0.176768 0.249666 0.501286 0.319723 NA\n1 14 14 2 1 0.191296 1 494 ADD -0.0898607 0.250264 0.128927 0.142941 NA\n1 15 15 2 1 0.196356 1 494 ADD 0.0706531 0.24931 0.0803127 0.109649 NA\n1 16 16 2 1 0.213563 1 494 ADD 0.127126 0.224028 0.322005 0.243817 NA\n1 17 17 2 1 0.173077 1 494 ADD -0.0815387 0.258668 0.0993673 0.123441 NA\n1 18 18 2 1 0.181174 1 494 ADD -0.245885 0.247926 0.983608 0.493076 NA\n1 19 19 2 1 0.197368 1 494 ADD -0.229608 0.245176 0.87704 0.457157 NA\n1 20 20 2 1 0.210526 1 494 ADD 0.0317603 0.23126 0.0188612 0.0502367 NA\n1 21 21 2 1 0.197368 1 494 ADD -0.0978523 0.248042 0.15563 0.159134 NA\n1 22 22 2 1 0.190283 1 494 ADD 0.0389733 0.24774 0.024748 0.0579936 NA\n1 23 23 2 1 0.200405 1 494 ADD -0.0571134 0.229344 0.0620155 0.0951015 NA\n1 24 24 2 1 0.197368 1 494 ADD -0.0144344 0.234569 0.00378665 0.0218503 NA\n1 25 25 2 1 0.226721 1 494 ADD 0.482245 0.232228 4.31227 1.42207 NA\n1 26 26 2 1 0.203441 1 494 ADD -0.237063 0.224814 1.11194 0.535123 NA\n1 27 27 2 1 0.208502 1 494 ADD -0.0111253 0.23845 0.00217683 0.0164698 NA\n1 28 28 2 1 0.17004 1 494 ADD 0.200861 0.25324 0.629112 0.36888 NA\n1 29 29 2 1 0.210526 1 494 ADD -0.0404564 0.25006 0.026175 0.0597457 NA\n1 30 30 2 1 0.169028 1 494 ADD -0.292083 0.244579 1.42618 0.633785 NA\n1 31 31 2 1 0.224696 1 494 ADD -0.114124 0.24243 0.221604 0.195302 NA\n1 32 32 2 1 0.209514 1 494 ADD 0.375668 0.224165 2.80849 1.02795 NA\n1 33 33 2 1 0.211538 1 494 ADD -0.0482982 0.2361 0.0418475 0.0768022 NA\n1 34 34 2 1 0.178138 1 494 ADD -0.078399 0.24941 0.0988087 0.123053 NA\n1 35 35 2 1 0.187247 1 494 ADD -0.24506 0.233506 1.10141 0.531716 NA\n1 36 36 2 1 0.188259 1 494 ADD 0.311055 0.249723 1.55151 0.671799 NA\n1 37 37 2 1 0.190283 1 494 ADD -0.269829 0.260116 1.07608 0.523491 NA\n1 38 38 2 1 0.188259 1 494 ADD 0.512363 0.237949 4.63648 1.50447 NA\n1 39 39 2 1 0.211538 1 494 ADD 0.095859 0.218075 0.19322 0.180292 NA\n1 40 40 2 1 0.174089 1 494 ADD 0.0137348 0.251811 0.00297503 0.0193142 NA\n1 41 41 2 1 0.188259 1 494 ADD 0.0336069 0.245772 0.0186979 0.0500072 NA\n1 42 42 2 1 0.223684 1 494 ADD -0.526916 0.226359 5.41861 1.70064 NA\n1 43 43 2 1 0.197368 1 494 ADD 0.191427 0.248981 0.591118 0.35459 NA\n1 44 44 2 1 0.194332 1 494 ADD -0.0685846 0.224834 0.0930525 0.118997 NA\n1 45 45 2 1 0.20749 1 494 ADD 0.329215 0.231375 2.02453 0.810295 NA\n1 46 46 2 1 0.214575 1 494 ADD 0.318271 0.233289 1.86126 0.763262 NA\n1 47 47 2 1 0.178138 1 494 ADD 0.0441797 0.262924 0.0282347 0.0622022 NA\n1 48 48 2 1 0.210526 1 494 ADD 0.0576357 0.231684 0.0618859 0.0949927 NA\n1 49 49 2 1 0.196356 1 494 ADD -0.080932 0.262523 0.0950402 0.120408 NA\n1 50 50 2 1 0.198381 1 494 ADD -0.0320333 0.236181 0.0183956 0.0495799 NA\n1 51 51 2 1 0.200405 1 494 ADD -0.162379 0.258126 0.395724 0.276293 NA\n1 52 52 2 1 0.183198 1 494 ADD -0.295989 0.238547 1.53959 0.668211 NA\n1 53 53 2 1 0.206478 1 494 ADD 0.196395 0.227096 0.7479 0.41213 NA\n1 54 54 2 1 0.201417 1 494 ADD 0.539329 0.236908 5.18259 1.64179 NA\n1 55 55 2 1 0.197368 1 494 ADD 0.0152131 0.229608 0.00439001 0.0235706 NA\n1 56 56 2 1 0.172065 1 494 ADD 0.185535 0.252526 0.539807 0.334877 NA\n1 57 57 2 1 0.17915 1 494 ADD 0.429497 0.259998 2.72884 1.00634 NA\n1 58 58 2 1 0.196356 1 494 ADD -0.229797 0.242554 0.897575 0.464158 NA\n1 59 59 2 1 0.208502 1 494 ADD 0.187161 0.218477 0.733865 0.407121 NA\n1 60 60 2 1 0.210526 1 494 ADD 0.0522758 0.231395 0.0510379 0.0855156 NA\n1 61 61 2 1 0.208502 1 494 ADD 0.260822 0.240551 1.17564 0.555573 NA\n1 62 62 2 1 0.216599 1 494 ADD -0.0669554 0.219653 0.0929169 0.1189 NA\n1 63 63 2 1 0.201417 1 494 ADD 0.336326 0.245913 1.87051 0.765947 NA\n1 64 64 2 1 0.17915 1 494 ADD -0.086792 0.252983 0.1177 0.13576 NA\n1 65 65 2 1 0.209514 1 494 ADD -0.396664 0.226994 3.05362 1.09389 NA\n1 66 66 2 1 0.215587 1 494 ADD -0.142472 0.234949 0.367717 0.264201 NA\n1 67 67 2 1 0.188259 1 494 ADD 0.140352 0.23105 0.369001 0.264761 NA\n1 68 68 2 1 0.197368 1 494 ADD -0.164904 0.232686 0.50225 0.320106 NA\n1 69 69 2 1 0.211538 1 494 ADD -0.205059 0.227167 0.814833 0.435695 NA\n1 70 70 2 1 0.182186 1 494 ADD 0.217693 0.246619 0.779177 0.423206 NA\n1 71 71 2 1 0.20749 1 494 ADD -0.00511998 0.253841 0.000406829 0.00704562 NA\n1 72 72 2 1 0.204453 1 494 ADD -0.157021 0.234262 0.449275 0.298709 NA\n1 73 73 2 1 0.205466 1 494 ADD 0.287335 0.24273 1.40129 0.626155 NA\n1 74 74 2 1 0.194332 1 494 ADD 0.456076 0.246102 3.43436 1.19482 NA\n1 75 75 2 1 0.172065 1 494 ADD 0.158581 0.264409 0.359709 0.26069 NA\n1 76 76 2 1 0.240891 1 494 ADD 0.391273 0.221665 3.11577 1.11049 NA\n1 77 77 2 1 0.201417 1 494 ADD -0.118872 0.235721 0.254308 0.21179 NA\n1 78 78 2 1 0.211538 1 494 ADD -0.274321 0.236845 1.3415 0.60771 NA\n1 79 79 2 1 0.199393 1 494 ADD -0.239712 0.230005 1.08619 0.52678 NA\n1 80 80 2 1 0.185223 1 494 ADD -0.0376364 0.260651 0.0208496 0.0529637 NA\n1 81 81 2 1 0.201417 1 494 ADD -0.325847 0.235856 1.90869 0.777 NA\n1 82 82 2 1 0.213563 1 494 ADD 0.130661 0.22521 0.336603 0.250421 NA\n1 83 83 2 1 0.201417 1 494 ADD 0.279562 0.239 1.36824 0.615979 NA\n1 84 84 2 1 0.216599 1 494 ADD -0.0182119 0.229628 0.00629015 0.0283598 NA\n1 85 85 2 1 0.219636 1 494 ADD -0.0530995 0.235816 0.050703 0.0852104 NA\n1 86 86 2 1 0.202429 1 494 ADD -0.0830595 0.230345 0.130023 0.143629 NA\n1 87 87 2 1 0.195344 1 494 ADD -0.1256 0.233704 0.288832 0.228435 NA\n1 88 88 2 1 0.203441 1 494 ADD 0.294784 0.244878 1.44913 0.640795 NA\n1 89 89 2 1 0.210526 1 494 ADD -0.244525 0.234147 1.09061 0.528216 NA\n1 90 90 2 1 0.183198 1 494 ADD 0.4162 0.252007 2.7276 1.006 NA\n1 91 91 2 1 0.203441 1 494 ADD 0.0327289 0.231936 0.0199126 0.0516939 NA\n1 92 92 2 1 0.204453 1 494 ADD 0.288721 0.249324 1.341 0.607553 NA\n1 93 93 2 1 0.196356 1 494 ADD -0.739102 0.255729 8.89386 2.54344 NA\n1 94 94 2 1 0.204453 1 494 ADD -0.257101 0.236491 1.1819 0.557566 NA\n1 95 95 2 1 0.195344 1 494 ADD 0.161937 0.259032 0.390827 0.274199 NA\n1 96 96 2 1 0.19332 1 494 ADD -0.337784 0.243267 1.92802 0.782582 NA\n1 97 97 2 1 0.187247 1 494 ADD 0.218805 0.2386 0.840957 0.444756 NA\n1 98 98 2 1 0.204453 1 494 ADD 0.0927028 0.233065 0.15821 0.160641 NA\n1 99 99 2 1 0.194332 1 494 ADD 0.272401 0.23222 1.37599 0.618371 NA\n1 100 100 2 1 0.197368 1 494 ADD -0.0079439 0.258667 0.000943158 0.0107727 NA\n1 101 101 2 1 0.208502 1 494 ADD -0.00976784 0.22342 0.00191141 0.0154151 NA\n1 102 102 2 1 0.189271 1 494 ADD -0.0719744 0.227469 0.100118 0.123962 NA\n1 103 103 2 1 0.195344 1 494 ADD 0.267385 0.23489 1.29583 0.593499 NA\n1 104 104 2 1 0.186235 1 494 ADD 0.0171918 0.236113 0.00530154 0.0259694 NA\n1 105 105 2 1 0.224696 1 494 ADD 0.0463889 0.229778 0.0407578 0.0757175 NA\n1 106 106 2 1 0.204453 1 494 ADD 0.209573 0.227116 0.851479 0.448386 NA\n1 107 107 2 1 0.189271 1 494 ADD 0.199423 0.224984 0.785686 0.425497 NA\n1 108 108 2 1 0.216599 1 494 ADD 0.337977 0.237924 2.0179 0.808398 NA\n1 109 109 2 1 0.210526 1 494 ADD -0.0987036 0.237601 0.172572 0.168875 NA\n1 110 110 2 1 0.215587 1 494 ADD -0.0390161 0.229524 0.0288956 0.0629737 NA\n1 111 111 2 1 0.197368 1 494 ADD 0.286579 0.246533 1.35125 0.610729 NA\n1 112 112 2 1 0.180162 1 494 ADD 0.360755 0.266713 1.82952 0.754032 NA\n1 113 113 2 1 0.214575 1 494 ADD -0.416251 0.226915 3.36498 1.17655 NA\n1 114 114 2 1 0.215587 1 494 ADD -0.0622402 0.232877 0.0714314 0.102778 NA\n1 115 115 2 1 0.196356 1 494 ADD 0.542127 0.245454 4.8782 1.56546 NA\n1 116 116 2 1 0.210526 1 494 ADD -0.0249269 0.227466 0.0120089 0.0396537 NA\n1 117 117 2 1 0.20749 1 494 ADD 0.362257 0.229285 2.49621 0.942635 NA\n1 118 118 2 1 0.189271 1 494 ADD 0.153986 0.239736 0.412567 0.283438 NA\n1 119 119 2 1 0.194332 1 494 ADD -0.0445348 0.257544 0.0299017 0.0641334 NA\n1 120 120 2 1 0.208502 1 494 ADD -0.13184 0.225575 0.341597 0.252659 NA\n1 121 121 2 1 0.183198 1 494 ADD -0.103698 0.245506 0.178411 0.172151 NA\n1 122 122 2 1 0.187247 1 494 ADD -0.168238 0.260791 0.416162 0.284951 NA\n1 123 123 2 1 0.187247 1 494 ADD 0.56839 0.259103 4.81224 1.54885 NA\n1 124 124 2 1 0.194332 1 494 ADD 0.0990443 0.238431 0.172557 0.168867 NA\n1 125 125 2 1 0.199393 1 494 ADD 0.0242139 0.233819 0.0107244 0.0373846 NA\n1 126 126 2 1 0.192308 1 494 ADD -0.0635237 0.247483 0.0658841 0.0983093 NA\n1 127 127 2 1 0.211538 1 494 ADD -0.310185 0.216771 2.04758 0.816876 NA\n1 128 128 2 1 0.189271 1 494 ADD 0.130459 0.245538 0.282299 0.225338 NA\n1 129 129 2 1 0.199393 1 494 ADD 0.011662 0.245723 0.00225244 0.0167587 NA\n1 130 130 2 1 0.208502 1 494 ADD -0.481741 0.231767 4.3204 1.42414 NA\n1 131 131 2 1 0.186235 1 494 ADD -0.0707131 0.251207 0.0792386 0.108835 NA\n1 132 132 2 1 0.19332 1 494 ADD 0.0197285 0.251053 0.00617529 0.0280916 NA\n1 133 133 2 1 0.19332 1 494 ADD -0.113898 0.226748 0.252316 0.210808 NA\n1 134 134 2 1 0.181174 1 494 ADD 0.0272865 0.257303 0.0112462 0.0383207 NA\n1 135 135 2 1 0.212551 1 494 ADD 0.551368 0.23462 5.52274 1.72652 NA\n1 136 136 2 1 0.198381 1 494 ADD -0.0377658 0.237599 0.0252644 0.0586327 NA\n1 137 137 2 1 0.210526 1 494 ADD 0.172952 0.22854 0.5727 0.347572 NA\n1 138 138 2 1 0.206478 1 494 ADD 0.660426 0.228858 8.12051 2.35885 NA\n1 139 139 2 1 0.188259 1 494 ADD 0.127514 0.255762 0.248566 0.208951 NA\n1 140 140 2 1 0.195344 1 494 ADD 0.137092 0.229052 0.358224 0.260037 NA\n1 141 141 2 1 0.197368 1 494 ADD 0.221022 0.239153 0.854121 0.449296 NA\n1 142 142 2 1 0.190283 1 494 ADD -0.107879 0.243711 0.19594 0.181762 NA\n1 143 143 2 1 0.220648 1 494 ADD -0.171045 0.236941 0.521126 0.327568 NA\n1 144 144 2 1 0.188259 1 494 ADD -0.445567 0.244672 3.31632 1.16371 NA\n1 145 145 2 1 0.200405 1 494 ADD 0.336883 0.247684 1.84995 0.759977 NA\n1 146 146 2 1 0.20749 1 494 ADD -0.0354943 0.233717 0.0230641 0.0558676 NA\n1 147 147 2 1 0.223684 1 494 ADD -0.0783441 0.219802 0.127043 0.141753 NA\n1 148 148 2 1 0.192308 1 494 ADD 0.462904 0.238452 3.76859 1.28213 NA\n1 149 149 2 1 0.197368 1 494 ADD 0.00482043 0.236012 0.000417162 0.00713525 NA\n1 150 150 2 1 0.209514 1 494 ADD 0.0733423 0.242037 0.0918215 0.118117 NA\n1 151 151 2 1 0.19332 1 494 ADD 0.137108 0.260469 0.277085 0.22285 NA\n1 152 152 2 1 0.189271 1 494 ADD 0.269233 0.254198 1.12179 0.538302 NA\n1 153 153 2 1 0.181174 1 494 ADD 0.0524316 0.246177 0.0453619 0.0802214 NA\n1 154 154 2 1 0.204453 1 494 ADD -0.0532762 0.237997 0.0501098 0.0846675 NA\n1 155 155 2 1 0.209514 1 494 ADD 0.0024961 0.235479 0.000112363 0.00368867 NA\n1 156 156 2 1 0.186235 1 494 ADD -0.249799 0.236163 1.11881 0.537341 NA\n1 157 157 2 1 0.208502 1 494 ADD -0.421515 0.249013 2.86538 1.04333 NA\n1 158 158 2 1 0.189271 1 494 ADD -0.0767536 0.240251 0.102063 0.125305 NA\n1 159 159 2 1 0.202429 1 494 ADD 0.364345 0.236085 2.38171 0.910932 NA\n1 160 160 2 1 0.187247 1 494 ADD -0.130187 0.24101 0.291786 0.229827 NA\n1 161 161 2 1 0.205466 1 494 ADD -0.324356 0.23566 1.8944 0.772869 NA\n1 162 162 2 1 0.199393 1 494 ADD -0.251815 0.236096 1.13759 0.543388 NA\n1 163 163 2 1 0.17915 1 494 ADD 0.549634 0.262562 4.38211 1.43988 NA\n1 164 164 2 1 0.197368 1 494 ADD 0.0498387 0.249499 0.0399021 0.0748571 NA\n1 165 165 2 1 0.188259 1 494 ADD -0.0048665 0.243378 0.000399827 0.00698424 NA\n1 166 166 2 1 0.196356 1 494 ADD 0.189153 0.25008 0.572093 0.34734 NA\n1 167 167 2 1 0.187247 1 494 ADD 0.114594 0.228898 0.250635 0.209976 NA\n1 168 168 2 1 0.205466 1 494 ADD 0.20514 0.235785 0.756952 0.415348 NA\n1 169 169 2 1 0.201417 1 494 ADD 0.371147 0.235378 2.48634 0.939912 NA\n1 170 170 2 1 0.188259 1 494 ADD -0.0325784 0.243087 0.0179612 0.0489602 NA\n1 171 171 2 1 0.205466 1 494 ADD 0.0806316 0.232338 0.12044 0.137536 NA\n1 172 172 2 1 0.215587 1 494 ADD -0.0291194 0.246949 0.0139043 0.0428077 NA\n1 173 173 2 1 0.210526 1 494 ADD -0.525094 0.246333 4.54389 1.48101 NA\n1 174 174 2 1 0.20749 1 494 ADD -0.477343 0.241393 3.91031 1.31884 NA\n1 175 175 2 1 0.22166 1 494 ADD 0.159839 0.239087 0.446944 0.297751 NA\n1 176 176 2 1 0.182186 1 494 ADD -0.273088 0.248103 1.21156 0.566992 NA\n1 177 177 2 1 0.212551 1 494 ADD 0.100509 0.240498 0.174657 0.17005 NA\n1 178 178 2 1 0.219636 1 494 ADD 0.219401 0.239146 0.841688 0.445009 NA\n1 179 179 2 1 0.194332 1 494 ADD -0.44054 0.248412 3.14505 1.11829 NA\n1 180 180 2 1 0.198381 1 494 ADD -0.0222736 0.231566 0.0092519 0.0346232 NA\n1 181 181 2 1 0.212551 1 494 ADD 0.0446498 0.225942 0.0390522 0.0739946 NA\n1 182 182 2 1 0.189271 1 494 ADD -0.529348 0.232001 5.20601 1.64765 NA\n1 183 183 2 1 0.200405 1 494 ADD -0.173841 0.232296 0.56004 0.342711 NA\n1 184 184 2 1 0.197368 1 494 ADD -0.0869827 0.23773 0.133875 0.146029 NA\n1 185 185 2 1 0.198381 1 494 ADD -0.31736 0.246738 1.65436 0.702532 NA\n1 186 186 2 1 0.219636 1 494 ADD 0.415424 0.217999 3.63141 1.24643 NA\n1 187 187 2 1 0.204453 1 494 ADD -0.127341 0.230953 0.30401 0.23554 NA\n1 188 188 2 1 0.197368 1 494 ADD -0.131344 0.237589 0.30561 0.236283 NA\n1 189 189 2 1 0.195344 1 494 ADD 0.282696 0.235248 1.44406 0.639249 NA\n1 190 190 2 1 0.187247 1 494 ADD -0.190845 0.25003 0.582608 0.351355 NA\n1 191 191 2 1 0.209514 1 494 ADD 0.581521 0.216742 7.09912 2.11282 NA\n1 192 192 2 1 0.171053 1 494 ADD 0.125639 0.24471 0.263599 0.21634 NA\n1 193 193 2 1 0.192308 1 494 ADD -0.102295 0.254299 0.161814 0.162733 NA\n1 194 194 2 1 0.208502 1 494 ADD -0.0561295 0.236682 0.0562408 0.0901556 NA\n1 195 195 2 1 0.217611 1 494 ADD 0.00759132 0.234183 0.00105081 0.0113785 NA\n1 196 196 2 1 0.203441 1 494 ADD 0.336144 0.244164 1.89533 0.773138 NA\n1 197 197 2 1 0.202429 1 494 ADD 0.326072 0.237557 1.88405 0.769871 NA\n1 198 198 2 1 0.198381 1 494 ADD -0.124779 0.228909 0.297134 0.232336 NA\n1 199 199 2 1 0.185223 1 494 ADD -0.526894 0.242737 4.71165 1.52348 NA\n1 200 200 2 1 0.188259 1 494 ADD 0.104052 0.243099 0.183203 0.174811 NA\n1 201 201 2 1 0.216599 1 494 ADD -0.233208 0.235642 0.979449 0.491693 NA\n1 202 202 2 1 0.175101 1 494 ADD -0.0460542 0.241999 0.0362167 0.0710572 NA\n1 203 203 2 1 0.222672 1 494 ADD -0.353509 0.24956 2.00655 0.80515 NA\n1 204 204 2 1 0.183198 1 494 ADD -0.0277105 0.236089 0.0137764 0.0426013 NA\n1 205 205 2 1 0.190283 1 494 ADD -0.225238 0.234208 0.924866 0.473402 NA\n1 206 206 2 1 0.181174 1 494 ADD -0.118214 0.250118 0.223381 0.196218 NA\n1 207 207 2 1 0.19332 1 494 ADD 0.194133 0.248703 0.609304 0.361461 NA\n1 208 208 2 1 0.208502 1 494 ADD 0.0870885 0.248417 0.122902 0.139118 NA\n1 209 209 2 1 0.176113 1 494 ADD -0.316245 0.235764 1.79925 0.745203 NA\n1 210 210 2 1 0.210526 1 494 ADD -0.452566 0.2169 4.35358 1.43261 NA\n1 211 211 2 1 0.191296 1 494 ADD 0.347337 0.250765 1.91853 0.779843 NA\n1 212 212 2 1 0.209514 1 494 ADD -0.0637143 0.2245 0.0805453 0.109825 NA\n1 213 213 2 1 0.180162 1 494 ADD 0.125401 0.257536 0.237096 0.203209 NA\n1 214 214 2 1 0.192308 1 494 ADD -0.280003 0.254352 1.21187 0.56709 NA\n1 215 215 2 1 0.202429 1 494 ADD 0.273454 0.240409 1.2938 0.592866 NA\n1 216 216 2 1 0.211538 1 494 ADD 0.0877622 0.244877 0.128445 0.142638 NA\n1 217 217 2 1 0.215587 1 494 ADD 0.147027 0.219577 0.44835 0.298329 NA\n1 218 218 2 1 0.195344 1 494 ADD 0.11561 0.247524 0.218148 0.193512 NA\n1 219 219 2 1 0.201417 1 494 ADD -0.165098 0.22573 0.534945 0.332981 NA\n1 220 220 2 1 0.185223 1 494 ADD -0.227127 0.241249 0.886352 0.460337 NA\n1 221 221 2 1 0.19332 1 494 ADD -0.245511 0.241253 1.03561 0.51026 NA\n1 222 222 2 1 0.17915 1 494 ADD 0.126374 0.238576 0.280581 0.22452 NA\n1 223 223 2 1 0.20749 1 494 ADD 0.0432905 0.235591 0.033765 0.0684366 NA\n1 224 224 2 1 0.203441 1 494 ADD -0.239607 0.250357 0.915968 0.470395 NA\n1 225 225 2 1 0.182186 1 494 ADD 0.208018 0.235924 0.777421 0.422588 NA\n1 226 226 2 1 0.211538 1 494 ADD 0.00814029 0.217043 0.00140666 0.0131916 NA\n1 227 227 2 1 0.203441 1 494 ADD 0.0233924 0.247722 0.00891698 0.0339673 NA\n1 228 228 2 1 0.178138 1 494 ADD 0.00411336 0.243644 0.000285025 0.00588961 NA\n1 229 229 2 1 0.208502 1 494 ADD 0.205819 0.237378 0.751776 0.413509 NA\n1 230 230 2 1 0.171053 1 494 ADD -0.28898 0.249136 1.34543 0.608927 NA\n1 231 231 2 1 0.188259 1 494 ADD 0.388264 0.241396 2.58699 0.967602 NA\n1 232 232 2 1 0.181174 1 494 ADD 0.171533 0.247554 0.480128 0.311256 NA\n1 233 233 2 1 0.201417 1 494 ADD 0.0469302 0.237934 0.0389036 0.073843 NA\n1 234 234 2 1 0.197368 1 494 ADD 0.186589 0.241115 0.598854 0.35752 NA\n1 235 235 2 1 0.186235 1 494 ADD 0.164584 0.243268 0.457722 0.302169 NA\n1 236 236 2 1 0.196356 1 494 ADD 0.685354 0.244762 7.54733 2.22113 NA\n1 237 237 2 1 0.216599 1 494 ADD -0.232181 0.237769 0.953545 0.483044 NA\n1 238 238 2 1 0.190283 1 494 ADD 0.507344 0.241471 4.41444 1.44811 NA\n1 239 239 2 1 0.17915 1 494 ADD -0.141981 0.252332 0.316603 0.241348 NA\n1 240 240 2 1 0.217611 1 494 ADD 0.0184468 0.227884 0.00655256 0.0289639 NA\n1 241 241 2 1 0.185223 1 494 ADD -0.440588 0.24769 3.1641 1.12335 NA\n1 242 242 2 1 0.198381 1 494 ADD 0.256868 0.240376 1.14192 0.544779 NA\n1 243 243 2 1 0.219636 1 494 ADD -0.0875526 0.243105 0.129703 0.143428 NA\n1 244 244 2 1 0.196356 1 494 ADD 0.241806 0.23834 1.02929 0.508183 NA\n1 245 245 2 1 0.187247 1 494 ADD -0.440328 0.23728 3.44375 1.19729 NA\n1 246 246 2 1 0.196356 1 494 ADD -0.310991 0.233608 1.77224 0.737298 NA\n1 247 247 2 1 0.189271 1 494 ADD 0.107724 0.255013 0.178445 0.17217 NA\n1 248 248 2 1 0.183198 1 494 ADD 0.0316461 0.238284 0.0176381 0.0484949 NA\n1 249 249 2 1 0.175101 1 494 ADD 0.27444 0.257176 1.13876 0.543765 NA\n1 250 250 2 1 0.17915 1 494 ADD 0.0614279 0.252795 0.0590466 0.0925837 NA\n1 251 251 2 1 0.226721 1 494 ADD -0.228603 0.237876 0.923556 0.472959 NA\n1 252 252 2 1 0.197368 1 494 ADD 0.121706 0.230814 0.278035 0.223305 NA\n1 253 253 2 1 0.209514 1 494 ADD 0.488668 0.224401 4.74218 1.53118 NA\n1 254 254 2 1 0.209514 1 494 ADD 0.177626 0.229557 0.598737 0.357476 NA\n1 255 255 2 1 0.212551 1 494 ADD 0.313204 0.240614 1.69439 0.71439 NA\n1 256 256 2 1 0.195344 1 494 ADD -0.191068 0.237769 0.645748 0.375061 NA\n1 257 257 2 1 0.186235 1 494 ADD 0.472663 0.24021 3.87186 1.3089 NA\n1 258 258 2 1 0.212551 1 494 ADD 0.454249 0.230918 3.86965 1.30833 NA\n1 259 259 2 1 0.218623 1 494 ADD -0.00686996 0.233375 0.000866564 0.0103208 NA\n1 260 260 2 1 0.201417 1 494 ADD -0.641684 0.249596 6.92329 2.07017 NA\n1 261 261 2 1 0.186235 1 494 ADD 0.0238221 0.239516 0.00989219 0.0358472 NA\n1 262 262 2 1 0.225709 1 494 ADD -0.360946 0.250435 2.07727 0.825338 NA\n1 263 263 2 1 0.187247 1 494 ADD -0.169709 0.240098 0.499613 0.319057 NA\n1 264 264 2 1 0.183198 1 494 ADD -0.256245 0.242189 1.11944 0.537544 NA\n1 265 265 2 1 0.20749 1 494 ADD 0.0243429 0.222907 0.0119261 0.0395108 NA\n1 266 266 2 1 0.194332 1 494 ADD -0.664331 0.251561 7.33325 2.16947 NA\n1 267 267 2 1 0.203441 1 494 ADD -0.239132 0.241563 0.979975 0.491868 NA\n1 268 268 2 1 0.219636 1 494 ADD 0.0951827 0.225713 0.17783 0.171827 NA\n1 269 269 2 1 0.184211 1 494 ADD 0.282981 0.231405 1.49544 0.654873 NA\n1 270 270 2 1 0.191296 1 494 ADD 0.453645 0.255768 3.14586 1.1185 NA\n1 271 271 2 1 0.200405 1 494 ADD -0.0808228 0.240269 0.113154 0.132779 NA\n1 272 272 2 1 0.186235 1 494 ADD -0.042941 0.242151 0.0314466 0.0658816 NA\n1 273 273 2 1 0.191296 1 494 ADD 0.0133505 0.243043 0.00301736 0.0194541 NA\n1 274 274 2 1 0.177126 1 494 ADD 0.0551893 0.250015 0.0487279 0.0833925 NA\n1 275 275 2 1 0.216599 1 494 ADD -0.210649 0.219884 0.917769 0.471004 NA\n1 276 276 2 1 0.195344 1 494 ADD -0.155271 0.231073 0.451528 0.299634 NA\n1 277 277 2 1 0.180162 1 494 ADD 0.159026 0.236068 0.453795 0.300563 NA\n1 278 278 2 1 0.200405 1 494 ADD 0.0996347 0.235063 0.179661 0.172847 NA\n1 279 279 2 1 0.210526 1 494 ADD 0.317616 0.23165 1.87992 0.768675 NA\n1 280 280 2 1 0.202429 1 494 ADD 0.0963706 0.22526 0.18303 0.174715 NA\n1 281 281 2 1 0.210526 1 494 ADD -0.315037 0.228802 1.89585 0.773289 NA\n1 282 282 2 1 0.219636 1 494 ADD -0.076133 0.236931 0.103252 0.126121 NA\n1 283 283 2 1 0.213563 1 494 ADD 0.0220752 0.223137 0.0097874 0.0356494 NA\n1 284 284 2 1 0.185223 1 494 ADD 0.351599 0.252479 1.93929 0.785831 NA\n1 285 285 2 1 0.202429 1 494 ADD -0.321853 0.245204 1.72291 0.722805 NA\n1 286 286 2 1 0.219636 1 494 ADD -0.147106 0.223449 0.433414 0.292159 NA\n1 287 287 2 1 0.192308 1 494 ADD 0.151032 0.246317 0.375968 0.267792 NA\n1 288 288 2 1 0.200405 1 494 ADD -0.187541 0.240239 0.609402 0.361498 NA\n1 289 289 2 1 0.189271 1 494 ADD -0.0840863 0.227902 0.136131 0.147423 NA\n1 290 290 2 1 0.214575 1 494 ADD 0.292289 0.23618 1.53158 0.665798 NA\n1 291 291 2 1 0.216599 1 494 ADD 0.268234 0.219577 1.4923 0.653921 NA\n1 292 292 2 1 0.19332 1 494 ADD -0.344072 0.235297 2.13829 0.842658 NA\n1 293 293 2 1 0.205466 1 494 ADD -0.0864626 0.230699 0.140464 0.150077 NA\n1 294 294 2 1 0.184211 1 494 ADD 0.489442 0.262504 3.47641 1.20586 NA\n1 295 295 2 1 0.203441 1 494 ADD 0.331204 0.231665 2.04395 0.815841 NA\n1 296 296 2 1 0.209514 1 494 ADD -0.0805619 0.239349 0.113291 0.13287 NA\n1 297 297 2 1 0.215587 1 494 ADD 0.165862 0.22921 0.523633 0.328553 NA\n1 298 298 2 1 0.20749 1 494 ADD -0.188592 0.237428 0.630932 0.369558 NA\n1 299 299 2 1 0.216599 1 494 ADD 0.176591 0.233697 0.570993 0.346918 NA\n1 300 300 2 1 0.202429 1 494 ADD -0.144656 0.248427 0.339061 0.251524 NA\n1 301 301 2 1 0.209514 1 494 ADD -0.0933582 0.244644 0.145625 0.153198 NA\n1 302 302 2 1 0.209514 1 494 ADD -0.154235 0.239447 0.414902 0.284421 NA\n1 303 303 2 1 0.187247 1 494 ADD 0.112938 0.226548 0.248519 0.208927 NA\n1 304 304 2 1 0.19332 1 494 ADD -0.0369757 0.234443 0.0248747 0.058151 NA\n1 305 305 2 1 0.185223 1 494 ADD -0.160382 0.230493 0.484166 0.31288 NA\n1 306 306 2 1 0.195344 1 494 ADD 0.148343 0.227192 0.426332 0.289211 NA\n1 307 307 2 1 0.202429 1 494 ADD -0.364964 0.246993 2.18339 0.855404 NA\n1 308 308 2 1 0.189271 1 494 ADD 0.0113414 0.230555 0.00241983 0.0173822 NA\n1 309 309 2 1 0.200405 1 494 ADD -0.550167 0.232754 5.58719 1.74251 NA\n1 310 310 2 1 0.208502 1 494 ADD -0.313848 0.246007 1.62758 0.694567 NA\n1 311 311 2 1 0.201417 1 494 ADD -0.0230018 0.231591 0.00986463 0.0357953 NA\n1 312 312 2 1 0.204453 1 494 ADD -0.247301 0.217641 1.29114 0.592034 NA\n1 313 313 2 1 0.208502 1 494 ADD 0.0489485 0.230112 0.0452481 0.0801124 NA\n1 314 314 2 1 0.197368 1 494 ADD -0.236284 0.243026 0.94528 0.480272 NA\n1 315 315 2 1 0.186235 1 494 ADD -0.0544366 0.238678 0.0520186 0.0864048 NA\n1 316 316 2 1 0.191296 1 494 ADD -0.156108 0.233868 0.445563 0.297182 NA\n1 317 317 2 1 0.200405 1 494 ADD -0.170493 0.230598 0.546638 0.337531 NA\n1 318 318 2 1 0.201417 1 494 ADD -0.0836236 0.240343 0.121059 0.137935 NA\n1 319 319 2 1 0.218623 1 494 ADD -0.264531 0.224167 1.39255 0.623468 NA\n1 320 320 2 1 0.183198 1 494 ADD -0.207568 0.226662 0.838617 0.443948 NA\n1 321 321 2 1 0.178138 1 494 ADD -0.0122218 0.240989 0.00257204 0.0179313 NA\n1 322 322 2 1 0.194332 1 494 ADD -0.000149906 0.246105 3.71023e-07 0.00021112 NA\n1 323 323 2 1 0.201417 1 494 ADD 0.32419 0.254198 1.62651 0.694247 NA\n1 324 324 2 1 0.210526 1 494 ADD 0.0770296 0.248347 0.0962048 0.12123 NA\n1 325 325 2 1 0.19332 1 494 ADD 0.313602 0.236354 1.76048 0.733851 NA\n1 326 326 2 1 0.185223 1 494 ADD -0.0525833 0.240257 0.047901 0.0826222 NA\n1 327 327 2 1 0.200405 1 494 ADD -0.0591845 0.238504 0.0615781 0.0947337 NA\n1 328 328 2 1 0.210526 1 494 ADD 0.354506 0.233132 2.3123 0.891591 NA\n1 329 329 2 1 0.208502 1 494 ADD 0.111997 0.229837 0.237451 0.203389 NA\n1 330 330 2 1 0.17915 1 494 ADD -0.12819 0.236699 0.2933 0.230539 NA\n1 331 331 2 1 0.19332 1 494 ADD 0.408404 0.23602 2.99421 1.07799 NA\n1 332 332 2 1 0.188259 1 494 ADD 0.195833 0.246878 0.629226 0.368922 NA\n1 333 333 2 1 0.209514 1 494 ADD -0.151887 0.226026 0.451569 0.29965 NA\n1 334 334 2 1 0.19332 1 494 ADD 0.0416929 0.230151 0.032817 0.0674014 NA\n1 335 335 2 1 0.212551 1 494 ADD 0.0613165 0.225989 0.0736173 0.1045 NA\n1 336 336 2 1 0.186235 1 494 ADD 0.129016 0.238361 0.292966 0.230382 NA\n1 337 337 2 1 0.218623 1 494 ADD 0.152594 0.23628 0.417082 0.285338 NA\n1 338 338 2 1 0.190283 1 494 ADD -0.239065 0.254347 0.883446 0.459346 NA\n1 339 339 2 1 0.19332 1 494 ADD -0.135831 0.242476 0.313803 0.240064 NA\n1 340 340 2 1 0.192308 1 494 ADD -0.263052 0.233923 1.26455 0.583707 NA\n1 341 341 2 1 0.191296 1 494 ADD -0.184736 0.232458 0.631558 0.369791 NA\n1 342 342 2 1 0.197368 1 494 ADD -0.280695 0.252277 1.23798 0.575345 NA\n1 343 343 2 1 0.216599 1 494 ADD 0.119036 0.229639 0.268699 0.218815 NA\n1 344 344 2 1 0.17915 1 494 ADD 0.64969 0.229737 7.82274 2.28741 NA\n1 345 345 2 1 0.178138 1 494 ADD -0.249486 0.256349 0.947174 0.480908 NA\n1 346 346 2 1 0.226721 1 494 ADD -0.119133 0.22382 0.283311 0.225819 NA\n1 347 347 2 1 0.183198 1 494 ADD -0.271723 0.256981 1.11802 0.537086 NA\n1 348 348 2 1 0.190283 1 494 ADD -0.132357 0.24202 0.299083 0.233247 NA\n1 349 349 2 1 0.185223 1 494 ADD -0.406789 0.238202 2.9164 1.05708 NA\n1 350 350 2 1 0.197368 1 494 ADD -0.309055 0.244167 1.60213 0.686973 NA\n1 351 351 2 1 0.211538 1 494 ADD -0.0346362 0.242827 0.0203455 0.0522838 NA\n1 352 352 2 1 0.197368 1 494 ADD 0.222792 0.243363 0.83809 0.443766 NA\n1 353 353 2 1 0.195344 1 494 ADD 0.220816 0.259398 0.724649 0.403818 NA\n1 354 354 2 1 0.214575 1 494 ADD 0.423568 0.232507 3.31876 1.16435 NA\n1 355 355 2 1 0.218623 1 494 ADD 0.15006 0.229652 0.426959 0.289472 NA\n1 356 356 2 1 0.201417 1 494 ADD 0.137602 0.227091 0.367158 0.263956 NA\n1 357 357 2 1 0.189271 1 494 ADD -0.357142 0.233423 2.34096 0.89959 NA\n1 358 358 2 1 0.213563 1 494 ADD -0.415095 0.244066 2.89255 1.05066 NA\n1 359 359 2 1 0.180162 1 494 ADD 0.18873 0.242497 0.605714 0.360109 NA\n1 360 360 2 1 0.20749 1 494 ADD 0.194504 0.239236 0.661007 0.380693 NA\n1 361 361 2 1 0.19332 1 494 ADD -0.231122 0.239437 0.931756 0.475724 NA\n1 362 362 2 1 0.196356 1 494 ADD -0.000245258 0.23587 1.08119e-06 0.000360459 NA\n1 363 363 2 1 0.214575 1 494 ADD -0.168907 0.234424 0.519147 0.326789 NA\n1 364 364 2 1 0.201417 1 494 ADD 0.340148 0.254778 1.78242 0.740282 NA\n1 365 365 2 1 0.17915 1 494 ADD 0.259736 0.243495 1.13786 0.543473 NA\n1 366 366 2 1 0.205466 1 494 ADD -0.326632 0.231234 1.99532 0.801932 NA\n1 367 367 2 1 0.162955 1 494 ADD -0.568275 0.268885 4.46667 1.4614 NA\n1 368 368 2 1 0.187247 1 494 ADD 0.0128547 0.244311 0.00276847 0.0186174 NA\n1 369 369 2 1 0.198381 1 494 ADD 0.0975411 0.228457 0.182291 0.174307 NA\n1 370 370 2 1 0.212551 1 494 ADD -0.084123 0.226825 0.137546 0.148294 NA\n1 371 371 2 1 0.202429 1 494 ADD 0.0480171 0.223419 0.0461905 0.0810109 NA\n1 372 372 2 1 0.20749 1 494 ADD 0.568191 0.219481 6.5877 1.98849 NA\n1 373 373 2 1 0.191296 1 494 ADD -0.0834276 0.243188 0.117688 0.135753 NA\n1 374 374 2 1 0.182186 1 494 ADD -0.363118 0.23859 2.31629 0.892704 NA\n1 375 375 2 1 0.196356 1 494 ADD 0.0651676 0.237644 0.0751985 0.105732 NA\n1 376 376 2 1 0.19332 1 494 ADD -0.241227 0.235149 1.05236 0.515752 NA\n1 377 377 2 1 0.205466 1 494 ADD 0.341688 0.249721 1.87218 0.766432 NA\n1 378 378 2 1 0.204453 1 494 ADD 0.158864 0.248605 0.408348 0.281657 NA\n1 379 379 2 1 0.199393 1 494 ADD -0.0481929 0.244 0.0390108 0.0739524 NA\n1 380 380 2 1 0.195344 1 494 ADD -0.0734079 0.248604 0.0871903 0.114763 NA\n1 381 381 2 1 0.210526 1 494 ADD -0.0105475 0.228929 0.00212273 0.0162601 NA\n1 382 382 2 1 0.194332 1 494 ADD 0.0416356 0.239626 0.0301899 0.0644624 NA\n1 383 383 2 1 0.197368 1 494 ADD -0.0417624 0.234217 0.0317932 0.0662686 NA\n1 384 384 2 1 0.197368 1 494 ADD -0.0227064 0.258763 0.00770003 0.0314818 NA\n1 385 385 2 1 0.19332 1 494 ADD -0.17419 0.2265 0.591438 0.354712 NA\n1 386 386 2 1 0.209514 1 494 ADD 0.204922 0.236581 0.750272 0.412974 NA\n1 387 387 2 1 0.216599 1 494 ADD -0.433229 0.242266 3.19781 1.13231 NA\n1 388 388 2 1 0.182186 1 494 ADD -0.0664087 0.261258 0.0646118 0.0972631 NA\n1 389 389 2 1 0.201417 1 494 ADD -0.0964735 0.24814 0.151155 0.156497 NA\n1 390 390 2 1 0.211538 1 494 ADD -0.21901 0.235598 0.864138 0.452738 NA\n1 391 391 2 1 0.192308 1 494 ADD -0.336766 0.239517 1.9769 0.796649 NA\n1 392 392 2 1 0.218623 1 494 ADD -0.191544 0.228947 0.699952 0.394911 NA\n1 393 393 2 1 0.190283 1 494 ADD 0.0611308 0.245398 0.0620552 0.0951349 NA\n1 394 394 2 1 0.194332 1 494 ADD 0.0518504 0.246149 0.0443718 0.0792698 NA\n1 395 395 2 1 0.172065 1 494 ADD -0.224502 0.261258 0.738422 0.40875 NA\n1 396 396 2 1 0.210526 1 494 ADD 0.0912253 0.234707 0.15107 0.156447 NA\n1 397 397 2 1 0.178138 1 494 ADD -0.162788 0.252472 0.415737 0.284773 NA\n1 398 398 2 1 0.231781 1 494 ADD 0.01235 0.235618 0.00274739 0.0185449 NA\n1 399 399 2 1 0.175101 1 494 ADD -0.0479172 0.267481 0.0320921 0.066601 NA\n1 400 400 2 1 0.209514 1 494 ADD 0.0439179 0.233626 0.0353379 0.0701269 NA\n1 401 401 2 1 0.200405 1 494 ADD -0.329615 0.247943 1.76729 0.735849 NA\n1 402 402 2 1 0.203441 1 494 ADD 0.272774 0.22381 1.48541 0.651832 NA\n1 403 403 2 1 0.195344 1 494 ADD -0.0152635 0.246255 0.00384184 0.0220128 NA\n1 404 404 2 1 0.17915 1 494 ADD -0.140991 0.243505 0.33525 0.249812 NA\n1 405 405 2 1 0.22166 1 494 ADD -0.256102 0.22605 1.28357 0.589667 NA\n1 406 406 2 1 0.191296 1 494 ADD -0.719464 0.260554 8.01813 2.33431 NA\n1 407 407 2 1 0.204453 1 494 ADD 0.00669707 0.228699 0.000857517 0.0102661 NA\n1 408 408 2 1 0.195344 1 494 ADD 0.00669866 0.229694 0.000850504 0.0102236 NA\n1 409 409 2 1 0.225709 1 494 ADD 0.100527 0.224966 0.199679 0.183772 NA\n1 410 410 2 1 0.204453 1 494 ADD -0.0269847 0.230987 0.0136477 0.0423928 NA\n1 411 411 2 1 0.205466 1 494 ADD -0.469791 0.240515 3.81526 1.29424 NA\n1 412 412 2 1 0.19332 1 494 ADD 0.0515073 0.229386 0.05042 0.0849517 NA\n1 413 413 2 1 0.20749 1 494 ADD 0.182291 0.248882 0.53647 0.333576 NA\n1 414 414 2 1 0.190283 1 494 ADD 0.289216 0.2395 1.45825 0.643574 NA\n1 415 415 2 1 0.199393 1 494 ADD 0.113183 0.232928 0.236113 0.202713 NA\n1 416 416 2 1 0.203441 1 494 ADD 0.252299 0.243939 1.06972 0.52142 NA\n1 417 417 2 1 0.197368 1 494 ADD 0.684579 0.232363 8.56402 2.46487 NA\n1 418 418 2 1 0.19332 1 494 ADD -0.223279 0.240453 0.862256 0.452092 NA\n1 419 419 2 1 0.184211 1 494 ADD 0.0289128 0.254994 0.0128565 0.0410902 NA\n1 420 420 2 1 0.208502 1 494 ADD 0.258976 0.231243 1.25423 0.580465 NA\n1 421 421 2 1 0.190283 1 494 ADD 0.261297 0.236457 1.22113 0.570024 NA\n1 422 422 2 1 0.211538 1 494 ADD 0.363307 0.228262 2.53327 0.952846 NA\n1 423 423 2 1 0.188259 1 494 ADD 0.0530746 0.237276 0.0500343 0.0845983 NA\n1 424 424 2 1 0.211538 1 494 ADD -0.139296 0.219001 0.404565 0.280055 NA\n1 425 425 2 1 0.202429 1 494 ADD -0.0346592 0.247346 0.0196348 0.0513124 NA\n1 426 426 2 1 0.209514 1 494 ADD 0.0378373 0.226134 0.0279968 0.0619226 NA\n1 427 427 2 1 0.19332 1 494 ADD 0.09836 0.262449 0.140459 0.150074 NA\n1 428 428 2 1 0.192308 1 494 ADD 0.25426 0.241674 1.10687 0.533483 NA\n1 429 429 2 1 0.22166 1 494 ADD 0.14721 0.237802 0.383212 0.270925 NA\n1 430 430 2 1 0.225709 1 494 ADD 0.278268 0.223631 1.54832 0.670839 NA\n1 431 431 2 1 0.200405 1 494 ADD -0.26871 0.219408 1.4999 0.656223 NA\n1 432 432 2 1 0.194332 1 494 ADD -0.226225 0.237684 0.905906 0.466987 NA\n1 433 433 2 1 0.187247 1 494 ADD -0.117095 0.24086 0.236346 0.202831 NA\n1 434 434 2 1 0.229757 1 494 ADD 0.0478362 0.221533 0.0466272 0.0814246 NA\n1 435 435 2 1 0.215587 1 494 ADD -0.11193 0.230876 0.235038 0.202169 NA\n1 436 436 2 1 0.198381 1 494 ADD -0.102686 0.235687 0.189825 0.178446 NA\n1 437 437 2 1 0.187247 1 494 ADD 0.737775 0.246765 8.93134 2.55235 NA\n1 438 438 2 1 0.19332 1 494 ADD 0.0639365 0.257064 0.061861 0.0949717 NA\n1 439 439 2 1 0.211538 1 494 ADD 0.0643578 0.227473 0.0800465 0.109448 NA\n1 440 440 2 1 0.208502 1 494 ADD -0.178705 0.230782 0.599609 0.357805 NA\n1 441 441 2 1 0.17915 1 494 ADD -0.149983 0.246238 0.371001 0.265633 NA\n1 442 442 2 1 0.192308 1 494 ADD -0.121635 0.23594 0.265772 0.217397 NA\n1 443 443 2 1 0.210526 1 494 ADD 0.316958 0.234743 1.82314 0.752174 NA\n1 444 444 2 1 0.212551 1 494 ADD -0.0059142 0.2386 0.000614398 0.00867431 NA\n1 445 445 2 1 0.183198 1 494 ADD 0.382624 0.260668 2.1546 0.847274 NA\n1 446 446 2 1 0.211538 1 494 ADD -0.210875 0.227938 0.855887 0.449903 NA\n1 447 447 2 1 0.228745 1 494 ADD -0.284376 0.231705 1.50632 0.658167 NA\n1 448 448 2 1 0.212551 1 494 ADD -0.0950486 0.23883 0.158385 0.160743 NA\n1 449 449 2 1 0.190283 1 494 ADD 0.461651 0.231861 3.96434 1.33279 NA\n1 450 450 2 1 0.199393 1 494 ADD 0.45498 0.234569 3.76221 1.28047 NA\n1 451 451 2 1 0.219636 1 494 ADD 0.334421 0.221311 2.28338 0.883504 NA\n1 452 452 2 1 0.205466 1 494 ADD -0.0479219 0.231013 0.0430323 0.077968 NA\n1 453 453 2 1 0.203441 1 494 ADD 0.234922 0.240263 0.956035 0.483877 NA\n1 454 454 2 1 0.196356 1 494 ADD -0.155836 0.234057 0.443291 0.296246 NA\n1 455 455 2 1 0.209514 1 494 ADD -0.330102 0.237573 1.93065 0.783339 NA\n1 456 456 2 1 0.173077 1 494 ADD 0.144614 0.26702 0.293312 0.230545 NA\n1 457 457 2 1 0.210526 1 494 ADD 0.145348 0.234285 0.384882 0.271644 NA\n1 458 458 2 1 0.196356 1 494 ADD 0.140847 0.234703 0.360127 0.260874 NA\n1 459 459 2 1 0.208502 1 494 ADD 0.193186 0.233238 0.686043 0.389858 NA\n1 460 460 2 1 0.203441 1 494 ADD 0.21919 0.23817 0.846969 0.446832 NA\n1 461 461 2 1 0.215587 1 494 ADD 0.0552649 0.217075 0.0648158 0.0974314 NA\n1 462 462 2 1 0.219636 1 494 ADD 0.502447 0.232709 4.66178 1.51087 NA\n1 463 463 2 1 0.181174 1 494 ADD 0.304298 0.254609 1.42841 0.634466 NA\n1 464 464 2 1 0.189271 1 494 ADD 0.116401 0.240485 0.234283 0.201787 NA\n1 465 465 2 1 0.20749 1 494 ADD 0.492254 0.244773 4.04438 1.35341 NA\n1 466 466 2 1 0.187247 1 494 ADD -0.286202 0.233459 1.50287 0.657124 NA\n1 467 467 2 1 0.222672 1 494 ADD -0.327509 0.237497 1.90165 0.774965 NA\n1 468 468 2 1 0.201417 1 494 ADD 0.314591 0.245737 1.6389 0.697936 NA\n1 469 469 2 1 0.209514 1 494 ADD -0.0718081 0.240121 0.0894309 0.116394 NA\n1 470 470 2 1 0.20749 1 494 ADD -0.146232 0.238125 0.377114 0.268289 NA\n1 471 471 2 1 0.201417 1 494 ADD -0.233836 0.230135 1.03242 0.509212 NA\n1 472 472 2 1 0.205466 1 494 ADD -0.17052 0.23098 0.545008 0.336898 NA\n1 473 473 2 1 0.206478 1 494 ADD 0.26603 0.240309 1.22553 0.571414 NA\n1 474 474 2 1 0.206478 1 494 ADD -0.321393 0.238402 1.81741 0.750504 NA\n1 475 475 2 1 0.197368 1 494 ADD 0.16096 0.234656 0.470515 0.307373 NA\n1 476 476 2 1 0.177126 1 494 ADD 0.318222 0.264096 1.4519 0.641639 NA\n1 477 477 2 1 0.204453 1 494 ADD -0.246297 0.23133 1.13359 0.542103 NA\n1 478 478 2 1 0.190283 1 494 ADD -0.319282 0.244343 1.70746 0.718251 NA\n1 479 479 2 1 0.200405 1 494 ADD 0.088342 0.23918 0.136422 0.147603 NA\n1 480 480 2 1 0.202429 1 494 ADD -0.367588 0.241076 2.32495 0.895122 NA\n1 481 481 2 1 0.214575 1 494 ADD -0.0688842 0.215156 0.102502 0.125607 NA\n1 482 482 2 1 0.224696 1 494 ADD -0.247391 0.229359 1.16341 0.551666 NA\n1 483 483 2 1 0.209514 1 494 ADD -0.0761799 0.245489 0.0962979 0.121295 NA\n1 484 484 2 1 0.192308 1 494 ADD 0.239937 0.245257 0.957087 0.48423 NA\n1 485 485 2 1 0.200405 1 494 ADD -0.176733 0.243536 0.526638 0.329732 NA\n1 486 486 2 1 0.20749 1 494 ADD -0.0285663 0.23836 0.0143628 0.0435405 NA\n1 487 487 2 1 0.189271 1 494 ADD 0.241237 0.239242 1.01675 0.504051 NA\n1 488 488 2 1 0.175101 1 494 ADD 0.199374 0.24945 0.638806 0.372487 NA\n1 489 489 2 1 0.184211 1 494 ADD -0.0551669 0.240631 0.0525597 0.0868924 NA\n1 490 490 2 1 0.206478 1 494 ADD 0.0445959 0.242774 0.0337432 0.0684129 NA\n1 491 491 2 1 0.194332 1 494 ADD -0.172776 0.241915 0.510081 0.323212 NA\n1 492 492 2 1 0.208502 1 494 ADD -0.412855 0.235595 3.07089 1.09851 NA\n1 493 493 2 1 0.186235 1 494 ADD -0.391253 0.247675 2.49547 0.942432 NA\n1 494 494 2 1 0.197368 1 494 ADD -0.650522 0.248596 7.21293 2.14038 NA\n1 495 495 2 1 0.189271 1 494 ADD -0.249574 0.25173 0.982941 0.492854 NA\n1 496 496 2 1 0.199393 1 494 ADD 0.306995 0.239526 1.64269 0.699064 NA\n1 497 497 2 1 0.217611 1 494 ADD -0.130407 0.225958 0.33308 0.248836 NA\n1 498 498 2 1 0.201417 1 494 ADD -0.225674 0.236261 0.912386 0.469183 NA\n1 499 499 2 1 0.197368 1 494 ADD -0.102442 0.22702 0.203622 0.185877 NA\n1 500 500 2 1 0.192308 1 494 ADD 0.119692 0.231191 0.268035 0.218494 NA\n1 501 501 2 1 0.0941296 1 494 ADD -0.436143 0.326923 1.77978 0.739508 NA\n1 502 502 2 1 0.090081 1 494 ADD 0.310088 0.331368 0.875688 0.456695 NA\n1 503 503 2 1 0.110324 1 494 ADD 0.00955919 0.295072 0.00104951 0.0113714 NA\n1 504 504 2 1 0.105263 1 494 ADD -0.198829 0.29301 0.460463 0.303288 NA\n1 505 505 2 1 0.090081 1 494 ADD 0.102455 0.337407 0.0922059 0.118392 NA\n1 506 506 2 1 0.0921053 1 494 ADD -0.440926 0.344414 1.63897 0.697958 NA\n1 507 507 2 1 0.0991903 1 494 ADD -0.276535 0.311384 0.788693 0.426553 NA\n1 508 508 2 1 0.11336 1 494 ADD -0.381384 0.29551 1.66564 0.70588 NA\n1 509 509 2 1 0.118421 1 494 ADD 0.0492791 0.295938 0.0277283 0.0616058 NA\n1 510 510 2 1 0.0850202 1 494 ADD 0.175166 0.356854 0.240946 0.205147 NA\n1 511 511 2 1 0.102227 1 494 ADD 0.442525 0.309508 2.04424 0.815924 NA\n1 512 512 2 1 0.0840081 1 494 ADD 0.404764 0.340089 1.4165 0.630821 NA\n1 513 513 2 1 0.0941296 1 494 ADD -0.187753 0.353107 0.282722 0.22554 NA\n1 514 514 2 1 0.0961538 1 494 ADD -0.290289 0.335819 0.747222 0.411889 NA\n1 515 515 2 1 0.120445 1 494 ADD -0.273577 0.293766 0.867278 0.453815 NA\n1 516 516 2 1 0.118421 1 494 ADD -0.0311153 0.272691 0.0130198 0.0413621 NA\n1 517 517 2 1 0.107287 1 494 ADD 0.0467026 0.302144 0.0238922 0.0569214 NA\n1 518 518 2 1 0.090081 1 494 ADD -0.274852 0.328647 0.699418 0.394717 NA\n1 519 519 2 1 0.097166 1 494 ADD 0.0331264 0.314412 0.0111007 0.0380618 NA\n1 520 520 2 1 0.0961538 1 494 ADD 0.67885 0.32347 4.40432 1.44553 NA\n1 521 521 2 1 0.0981781 1 494 ADD -0.22432 0.321376 0.487203 0.314099 NA\n1 522 522 2 1 0.101215 1 494 ADD -0.336213 0.330141 1.03713 0.510757 NA\n1 523 523 2 1 0.118421 1 494 ADD -0.194195 0.288911 0.451804 0.299747 NA\n1 524 524 2 1 0.097166 1 494 ADD 0.0436768 0.318337 0.0188247 0.0501856 NA\n1 525 525 2 1 0.0981781 1 494 ADD 0.0439357 0.304233 0.0208556 0.0529718 NA\n1 526 526 2 1 0.107287 1 494 ADD -0.154487 0.304537 0.257339 0.213281 NA\n1 527 527 2 1 0.102227 1 494 ADD -0.497902 0.309017 2.59611 0.970103 NA\n1 528 528 2 1 0.112348 1 494 ADD 0.419456 0.301802 1.93164 0.783626 NA\n1 529 529 2 1 0.107287 1 494 ADD 0.202182 0.315614 0.41037 0.282511 NA\n1 530 530 2 1 0.103239 1 494 ADD -0.284723 0.314279 0.820758 0.437756 NA\n1 531 531 2 1 0.097166 1 494 ADD 0.45056 0.306155 2.16583 0.850446 NA\n1 532 532 2 1 0.111336 1 494 ADD -0.12298 0.302944 0.164796 0.16445 NA\n1 533 533 2 1 0.103239 1 494 ADD -0.00615803 0.331467 0.000345145 0.00648543 NA\n1 534 534 2 1 0.0910931 1 494 ADD 0.0614936 0.35137 0.0306289 0.064961 NA\n1 535 535 2 1 0.105263 1 494 ADD -0.56162 0.308947 3.30458 1.1606 NA\n1 536 536 2 1 0.123482 1 494 ADD 0.0686062 0.280172 0.0599623 0.0933657 NA\n1 537 537 2 1 0.0931174 1 494 ADD -0.261754 0.334214 0.613392 0.362997 NA\n1 538 538 2 1 0.097166 1 494 ADD -0.591172 0.340475 3.01479 1.0835 NA\n1 539 539 2 1 0.0809717 1 494 ADD 0.288105 0.339798 0.718885 0.401746 NA\n1 540 540 2 1 0.102227 1 494 ADD -0.415544 0.317059 1.71772 0.721276 NA\n1 541 541 2 1 0.0910931 1 494 ADD -0.208688 0.314065 0.441523 0.295516 NA\n1 542 542 2 1 0.110324 1 494 ADD -0.275261 0.33381 0.679969 0.387643 NA\n1 543 543 2 1 0.109312 1 494 ADD 0.260132 0.306144 0.721994 0.402864 NA\n1 544 544 2 1 0.0961538 1 494 ADD -0.220378 0.310263 0.504515 0.321006 NA\n1 545 545 2 1 0.0951417 1 494 ADD 0.358749 0.329067 1.18854 0.559682 NA\n1 546 546 2 1 0.0991903 1 494 ADD -0.180057 0.334088 0.29047 0.229207 NA\n1 547 547 2 1 0.097166 1 494 ADD 0.377218 0.297313 1.60975 0.689249 NA\n1 548 548 2 1 0.0941296 1 494 ADD 0.0172713 0.33197 0.00270679 0.0184045 NA\n1 549 549 2 1 0.0951417 1 494 ADD -0.353715 0.305211 1.34309 0.608202 NA\n1 550 550 2 1 0.0961538 1 494 ADD 0.488113 0.28927 2.8473 1.03845 NA\n1 551 551 2 1 0.103239 1 494 ADD -0.327791 0.311237 1.10921 0.534239 NA\n1 552 552 2 1 0.0931174 1 494 ADD 0.528817 0.333953 2.50749 0.945747 NA\n1 553 553 2 1 0.0981781 1 494 ADD -0.197011 0.317247 0.385644 0.271973 NA\n1 554 554 2 1 0.100202 1 494 ADD 0.820126 0.294232 7.48063 2.20505 NA\n1 555 555 2 1 0.105263 1 494 ADD 0.0664456 0.315261 0.0444214 0.0793177 NA\n1 556 556 2 1 0.0961538 1 494 ADD -0.59352 0.313385 3.58686 1.23479 NA\n1 557 557 2 1 0.082996 1 494 ADD -0.820179 0.360396 5.17913 1.64093 NA\n1 558 558 2 1 0.0991903 1 494 ADD -0.0604722 0.279883 0.0466831 0.0814775 NA\n1 559 559 2 1 0.0981781 1 494 ADD -0.22426 0.318886 0.494576 0.317049 NA\n1 560 560 2 1 0.107287 1 494 ADD 0.0816086 0.322466 0.0640477 0.0967965 NA\n1 561 561 2 1 0.0789474 1 494 ADD -0.324927 0.308107 1.11217 0.535196 NA\n1 562 562 2 1 0.101215 1 494 ADD 0.726097 0.328889 4.87406 1.56442 NA\n1 563 563 2 1 0.0991903 1 494 ADD -0.282247 0.318377 0.785917 0.425578 NA\n1 564 564 2 1 0.107287 1 494 ADD -0.314569 0.301737 1.08686 0.526999 NA\n1 565 565 2 1 0.103239 1 494 ADD -0.0507034 0.310511 0.0266636 0.060336 NA\n1 566 566 2 1 0.0981781 1 494 ADD 0.180708 0.317846 0.323236 0.244377 NA\n1 567 567 2 1 0.101215 1 494 ADD 0.0743624 0.29151 0.065073 0.0976433 NA\n1 568 568 2 1 0.0981781 1 494 ADD 0.511137 0.338147 2.28488 0.883922 NA\n1 569 569 2 1 0.0951417 1 494 ADD 0.0731738 0.331255 0.0487962 0.0834559 NA\n1 570 570 2 1 0.100202 1 494 ADD -0.460413 0.31248 2.17096 0.851895 NA\n1 571 571 2 1 0.0890688 1 494 ADD 0.180927 0.3386 0.285517 0.226867 NA\n1 572 572 2 1 0.119433 1 494 ADD -0.228535 0.294676 0.601473 0.358509 NA\n1 573 573 2 1 0.0921053 1 494 ADD 0.375456 0.342802 1.19959 0.563195 NA\n1 574 574 2 1 0.090081 1 494 ADD 0.00581415 0.338572 0.000294897 0.00599143 NA\n1 575 575 2 1 0.0931174 1 494 ADD -0.0765169 0.310069 0.0608972 0.0941591 NA\n1 576 576 2 1 0.116397 1 494 ADD -0.146635 0.287723 0.259734 0.214455 NA\n1 577 577 2 1 0.0921053 1 494 ADD -0.381399 0.345533 1.21838 0.569152 NA\n1 578 578 2 1 0.0850202 1 494 ADD -0.351122 0.363064 0.935301 0.476918 NA\n1 579 579 2 1 0.107287 1 494 ADD 0.424554 0.302515 1.96956 0.794541 NA\n1 580 580 2 1 0.109312 1 494 ADD -0.587852 0.291894 4.05589 1.35638 NA\n1 581 581 2 1 0.102227 1 494 ADD 0.0873115 0.29797 0.0858615 0.113788 NA\n1 582 582 2 1 0.100202 1 494 ADD -0.198621 0.337732 0.345866 0.254564 NA\n1 583 583 2 1 0.105263 1 494 ADD 0.408208 0.33393 1.49435 0.654542 NA\n1 584 584 2 1 0.102227 1 494 ADD -0.40841 0.305103 1.79185 0.74304 NA\n1 585 585 2 1 0.102227 1 494 ADD 0.196912 0.304119 0.419234 0.286241 NA\n1 586 586 2 1 0.0910931 1 494 ADD -0.349025 0.348287 1.00424 0.499921 NA\n1 587 587 2 1 0.118421 1 494 ADD 0.0416863 0.294488 0.0200379 0.0518653 NA\n1 588 588 2 1 0.100202 1 494 ADD 0.340765 0.322271 1.11806 0.5371 NA\n1 589 589 2 1 0.105263 1 494 ADD 0.177953 0.303563 0.343646 0.253574 NA\n1 590 590 2 1 0.0910931 1 494 ADD 0.296492 0.310501 0.911801 0.468985 NA\n1 591 591 2 1 0.0961538 1 494 ADD 0.013249 0.34901 0.00144109 0.0133544 NA\n1 592 592 2 1 0.102227 1 494 ADD 0.465436 0.347409 1.79489 0.743928 NA\n1 593 593 2 1 0.0991903 1 494 ADD 0.0934979 0.335674 0.077583 0.107571 NA\n1 594 594 2 1 0.0931174 1 494 ADD -0.169601 0.319473 0.28183 0.225115 NA\n1 595 595 2 1 0.0880567 1 494 ADD -0.201814 0.345762 0.340683 0.25225 NA\n1 596 596 2 1 0.097166 1 494 ADD -0.167482 0.331951 0.254558 0.211914 NA\n1 597 597 2 1 0.124494 1 494 ADD 0.190094 0.278027 0.467482 0.306143 NA\n1 598 598 2 1 0.0921053 1 494 ADD -0.37021 0.32461 1.30069 0.595016 NA\n1 599 599 2 1 0.0840081 1 494 ADD 0.573399 0.345478 2.75469 1.01336 NA\n1 600 600 2 1 0.0951417 1 494 ADD 0.453057 0.322865 1.96908 0.794401 NA\n1 601 601 2 1 0.0880567 1 494 ADD -0.308767 0.347657 0.788789 0.426587 NA\n1 602 602 2 1 0.104251 1 494 ADD -0.0118625 0.308863 0.00147508 0.0135134 NA\n1 603 603 2 1 0.0981781 1 494 ADD 0.00980135 0.317011 0.000955924 0.0108462 NA\n1 604 604 2 1 0.102227 1 494 ADD 0.6312 0.314387 4.03093 1.34995 NA\n1 605 605 2 1 0.100202 1 494 ADD 0.354484 0.317973 1.24283 0.576874 NA\n1 606 606 2 1 0.0890688 1 494 ADD 0.226357 0.362994 0.388858 0.273354 NA\n1 607 607 2 1 0.0991903 1 494 ADD 0.391518 0.308032 1.61552 0.690971 NA\n1 608 608 2 1 0.0860324 1 494 ADD -0.203021 0.343307 0.349716 0.256275 NA\n1 609 609 2 1 0.105263 1 494 ADD 0.254146 0.33115 0.589003 0.353788 NA\n1 610 610 2 1 0.0870445 1 494 ADD -0.199706 0.317443 0.395779 0.276317 NA\n1 611 611 2 1 0.0931174 1 494 ADD -0.747393 0.354263 4.45087 1.45738 NA\n1 612 612 2 1 0.0991903 1 494 ADD -0.063561 0.298737 0.0452692 0.0801326 NA\n1 613 613 2 1 0.0931174 1 494 ADD 0.455252 0.338464 1.80917 0.748098 NA\n1 614 614 2 1 0.101215 1 494 ADD -0.190535 0.328875 0.33565 0.249992 NA\n1 615 615 2 1 0.102227 1 494 ADD 0.287376 0.300412 0.915093 0.470099 NA\n1 616 616 2 1 0.090081 1 494 ADD -0.177886 0.315293 0.318315 0.242132 NA\n1 617 617 2 1 0.0921053 1 494 ADD -0.158722 0.37289 0.18118 0.173691 NA\n1 618 618 2 1 0.116397 1 494 ADD 0.144019 0.303738 0.224824 0.196961 NA\n1 619 619 2 1 0.082996 1 494 ADD -1.50861 0.434451 14.3671 3.82272 NA\n1 620 620 2 1 0.110324 1 494 ADD 0.319165 0.278322 1.31503 0.599485 NA\n1 621 621 2 1 0.0910931 1 494 ADD -0.434971 0.327481 1.7642 0.734943 NA\n1 622 622 2 1 0.110324 1 494 ADD 0.00335707 0.289329 0.000134628 0.00403925 NA\n1 623 623 2 1 0.0981781 1 494 ADD 0.11689 0.340496 0.11785 0.135858 NA\n1 624 624 2 1 0.110324 1 494 ADD 0.236098 0.336082 0.49351 0.316624 NA\n1 625 625 2 1 0.100202 1 494 ADD 0.429245 0.285215 2.26498 0.878347 NA\n1 626 626 2 1 0.106275 1 494 ADD 0.433645 0.309938 1.95758 0.791095 NA\n1 627 627 2 1 0.109312 1 494 ADD -0.320424 0.289873 1.2219 0.570267 NA\n1 628 628 2 1 0.0840081 1 494 ADD -0.616213 0.343009 3.22739 1.14016 NA\n1 629 629 2 1 0.0961538 1 494 ADD 0.167743 0.301854 0.308811 0.237764 NA\n1 630 630 2 1 0.0961538 1 494 ADD 0.0365878 0.303249 0.0145571 0.0438479 NA\n1 631 631 2 1 0.0860324 1 494 ADD 0.103795 0.328547 0.0998064 0.123746 NA\n1 632 632 2 1 0.104251 1 494 ADD 0.310586 0.313347 0.982457 0.492694 NA\n1 633 633 2 1 0.119433 1 494 ADD 0.00278218 0.274045 0.000103068 0.00353219 NA\n1 634 634 2 1 0.0941296 1 494 ADD -0.0578071 0.316995 0.0332551 0.0678814 NA\n1 635 635 2 1 0.102227 1 494 ADD 0.496969 0.289865 2.93945 1.06329 NA\n1 636 636 2 1 0.0991903 1 494 ADD 0.0180658 0.304152 0.00352804 0.0210732 NA\n1 637 637 2 1 0.102227 1 494 ADD 0.126479 0.332471 0.144721 0.152654 NA\n1 638 638 2 1 0.111336 1 494 ADD 0.0436574 0.294947 0.0219093 0.0543696 NA\n1 639 639 2 1 0.111336 1 494 ADD -0.581278 0.305376 3.62325 1.2443 NA\n1 640 640 2 1 0.0991903 1 494 ADD 0.417522 0.322791 1.67307 0.708083 NA\n1 641 641 2 1 0.0991903 1 494 ADD 0.108077 0.318881 0.114871 0.13391 NA\n1 642 642 2 1 0.109312 1 494 ADD 0.0848109 0.302968 0.0783629 0.108168 NA\n1 643 643 2 1 0.104251 1 494 ADD -0.198427 0.325269 0.372148 0.266133 NA\n1 644 644 2 1 0.097166 1 494 ADD -0.0469447 0.332968 0.0198777 0.0516462 NA\n1 645 645 2 1 0.1083 1 494 ADD -0.0480022 0.30452 0.0248479 0.0581177 NA\n1 646 646 2 1 0.0880567 1 494 ADD 0.123623 0.335671 0.135635 0.147118 NA\n1 647 647 2 1 0.111336 1 494 ADD 0.36958 0.29253 1.59616 0.685188 NA\n1 648 648 2 1 0.0981781 1 494 ADD 0.672383 0.313677 4.59481 1.49392 NA\n1 649 649 2 1 0.126518 1 494 ADD -0.17034 0.271135 0.394695 0.275854 NA\n1 650 650 2 1 0.1083 1 494 ADD -0.147151 0.288967 0.259317 0.214251 NA\n1 651 651 2 1 0.124494 1 494 ADD 0.169238 0.306522 0.304839 0.235925 NA\n1 652 652 2 1 0.100202 1 494 ADD 0.236572 0.329367 0.515904 0.325512 NA\n1 653 653 2 1 0.102227 1 494 ADD -0.254036 0.32999 0.592637 0.355166 NA\n1 654 654 2 1 0.0921053 1 494 ADD -0.240042 0.318119 0.56937 0.346296 NA\n1 655 655 2 1 0.0921053 1 494 ADD 0.322536 0.340868 0.89533 0.463395 NA\n1 656 656 2 1 0.0809717 1 494 ADD 0.603658 0.342091 3.11386 1.10998 NA\n1 657 657 2 1 0.104251 1 494 ADD -0.0829316 0.310929 0.0711403 0.102547 NA\n1 658 658 2 1 0.097166 1 494 ADD -0.342029 0.316176 1.17022 0.553843 NA\n1 659 659 2 1 0.105263 1 494 ADD -0.0946202 0.319957 0.0874547 0.114957 NA\n1 660 660 2 1 0.101215 1 494 ADD -0.373496 0.344597 1.17476 0.55529 NA\n1 661 661 2 1 0.115385 1 494 ADD 0.0664295 0.287107 0.0535344 0.0877654 NA\n1 662 662 2 1 0.0951417 1 494 ADD -0.6004 0.313048 3.67842 1.25868 NA\n1 663 663 2 1 0.104251 1 494 ADD -0.01625 0.301644 0.00290213 0.019071 NA\n1 664 664 2 1 0.118421 1 494 ADD 0.126744 0.278869 0.206565 0.187439 NA\n1 665 665 2 1 0.107287 1 494 ADD -0.744817 0.315733 5.56494 1.73699 NA\n1 666 666 2 1 0.0961538 1 494 ADD 0.593632 0.313519 3.58514 1.23434 NA\n1 667 667 2 1 0.090081 1 494 ADD 0.180964 0.334361 0.292922 0.230361 NA\n1 668 668 2 1 0.0799595 1 494 ADD 0.312292 0.331877 0.885455 0.460031 NA\n1 669 669 2 1 0.0951417 1 494 ADD 0.104018 0.30973 0.112784 0.132534 NA\n1 670 670 2 1 0.0921053 1 494 ADD 0.0447955 0.332924 0.0181042 0.049165 NA\n1 671 671 2 1 0.1083 1 494 ADD 0.00190864 0.297955 4.10343e-05 0.0022254 NA\n1 672 672 2 1 0.097166 1 494 ADD -0.48063 0.329908 2.12244 0.838169 NA\n1 673 673 2 1 0.097166 1 494 ADD 0.246399 0.360598 0.46691 0.305911 NA\n1 674 674 2 1 0.103239 1 494 ADD -0.289069 0.304386 0.901892 0.465625 NA\n1 675 675 2 1 0.111336 1 494 ADD -0.0402951 0.292478 0.0189809 0.0504045 NA\n1 676 676 2 1 0.106275 1 494 ADD 0.0284315 0.323399 0.00772896 0.0315429 NA\n1 677 677 2 1 0.090081 1 494 ADD 0.289791 0.35093 0.681915 0.388353 NA\n1 678 678 2 1 0.0951417 1 494 ADD -0.0614315 0.305285 0.0404922 0.0754513 NA\n1 679 679 2 1 0.11336 1 494 ADD -0.374606 0.301244 1.54637 0.670252 NA\n1 680 680 2 1 0.102227 1 494 ADD 0.290837 0.301849 0.928369 0.474583 NA\n1 681 681 2 1 0.12247 1 494 ADD -0.18676 0.290505 0.413296 0.283745 NA\n1 682 682 2 1 0.097166 1 494 ADD -0.156277 0.354132 0.194741 0.181115 NA\n1 683 683 2 1 0.101215 1 494 ADD 0.272695 0.309501 0.7763 0.422192 NA\n1 684 684 2 1 0.0860324 1 494 ADD 0.29743 0.331155 0.806692 0.432856 NA\n1 685 685 2 1 0.0931174 1 494 ADD 0.504035 0.32166 2.45543 0.931373 NA\n1 686 686 2 1 0.082996 1 494 ADD 0.278421 0.345914 0.64784 0.375836 NA\n1 687 687 2 1 0.0840081 1 494 ADD 0.0152457 0.321919 0.00224286 0.0167224 NA\n1 688 688 2 1 0.100202 1 494 ADD -0.0440424 0.314047 0.0196677 0.0513576 NA\n1 689 689 2 1 0.0890688 1 494 ADD -0.428954 0.324062 1.75213 0.731399 NA\n1 690 690 2 1 0.101215 1 494 ADD 0.495217 0.306018 2.61877 0.976309 NA\n1 691 691 2 1 0.0921053 1 494 ADD 0.0600308 0.366272 0.0268622 0.0605745 NA\n1 692 692 2 1 0.0991903 1 494 ADD -0.0622685 0.336887 0.034164 0.0688685 NA\n1 693 693 2 1 0.12247 1 494 ADD 0.20889 0.286301 0.532342 0.331965 NA\n1 694 694 2 1 0.0850202 1 494 ADD 0.138027 0.354657 0.151465 0.15668 NA\n1 695 695 2 1 0.0991903 1 494 ADD 0.0421837 0.32922 0.0164178 0.0467021 NA\n1 696 696 2 1 0.107287 1 494 ADD 0.408166 0.30228 1.82328 0.752215 NA\n1 697 697 2 1 0.103239 1 494 ADD 0.0195487 0.305664 0.00409022 0.022731 NA\n1 698 698 2 1 0.102227 1 494 ADD 0.62237 0.307755 4.08964 1.36505 NA\n1 699 699 2 1 0.100202 1 494 ADD 0.106159 0.330715 0.103039 0.125975 NA\n1 700 700 2 1 0.097166 1 494 ADD -0.35219 0.323003 1.18889 0.559794 NA\n1 701 701 2 1 0.0931174 1 494 ADD -0.0913276 0.309074 0.0873131 0.114853 NA\n1 702 702 2 1 0.102227 1 494 ADD -0.0411284 0.31314 0.0172507 0.0479321 NA\n1 703 703 2 1 0.124494 1 494 ADD 0.368346 0.292056 1.59068 0.683548 NA\n1 704 704 2 1 0.101215 1 494 ADD 0.104827 0.323346 0.105102 0.127383 NA\n1 705 705 2 1 0.0991903 1 494 ADD -0.0834127 0.323988 0.0662837 0.0986361 NA\n1 706 706 2 1 0.104251 1 494 ADD 0.371069 0.320284 1.34226 0.607945 NA\n1 707 707 2 1 0.0931174 1 494 ADD 0.431687 0.306936 1.97806 0.796982 NA\n1 708 708 2 1 0.0910931 1 494 ADD 0.0369178 0.308988 0.0142754 0.0434016 NA\n1 709 709 2 1 0.101215 1 494 ADD -0.368832 0.326152 1.27884 0.588187 NA\n1 710 710 2 1 0.0991903 1 494 ADD -0.0679006 0.341843 0.0394542 0.0744035 NA\n1 711 711 2 1 0.12753 1 494 ADD -0.0981107 0.285537 0.118062 0.135996 NA\n1 712 712 2 1 0.0769231 1 494 ADD -0.441524 0.345384 1.6342 0.696537 NA\n1 713 713 2 1 0.104251 1 494 ADD 0.219182 0.328427 0.44538 0.297107 NA\n1 714 714 2 1 0.103239 1 494 ADD -0.223007 0.28944 0.593634 0.355544 NA\n1 715 715 2 1 0.0870445 1 494 ADD -0.252563 0.326983 0.596608 0.356671 NA\n1 716 716 2 1 0.0951417 1 494 ADD -0.075434 0.332345 0.0515177 0.0859515 NA\n1 717 717 2 1 0.111336 1 494 ADD -0.194333 0.276109 0.495369 0.317366 NA\n1 718 718 2 1 0.101215 1 494 ADD -0.349951 0.289619 1.46003 0.644118 NA\n1 719 719 2 1 0.0961538 1 494 ADD 0.185264 0.326076 0.322809 0.244183 NA\n1 720 720 2 1 0.103239 1 494 ADD 0.262951 0.30762 0.730667 0.405975 NA\n1 721 721 2 1 0.0981781 1 494 ADD 0.611909 0.313935 3.79921 1.29008 NA\n1 722 722 2 1 0.0850202 1 494 ADD -0.496555 0.317464 2.44651 0.928904 NA\n1 723 723 2 1 0.115385 1 494 ADD 0.422743 0.303129 1.94489 0.787444 NA\n1 724 724 2 1 0.120445 1 494 ADD 0.259153 0.280662 0.852601 0.448773 NA\n1 725 725 2 1 0.0941296 1 494 ADD 0.601641 0.340177 3.12799 1.11374 NA\n1 726 726 2 1 0.103239 1 494 ADD 0.122195 0.320673 0.145206 0.152946 NA\n1 727 727 2 1 0.0991903 1 494 ADD -0.539699 0.322548 2.79972 1.02558 NA\n1 728 728 2 1 0.1083 1 494 ADD 0.470551 0.31614 2.21541 0.864427 NA\n1 729 729 2 1 0.0890688 1 494 ADD -0.197758 0.345709 0.327225 0.246189 NA\n1 730 730 2 1 0.0951417 1 494 ADD 0.0226142 0.344256 0.00431518 0.0233637 NA\n1 731 731 2 1 0.0991903 1 494 ADD -0.679177 0.3328 4.16487 1.38436 NA\n1 732 732 2 1 0.0981781 1 494 ADD -0.472029 0.328014 2.07087 0.823516 NA\n1 733 733 2 1 0.0840081 1 494 ADD 0.483733 0.328544 2.16782 0.851009 NA\n1 734 734 2 1 0.109312 1 494 ADD 0.20891 0.297766 0.492228 0.316111 NA\n1 735 735 2 1 0.0870445 1 494 ADD -0.212645 0.318953 0.444485 0.296738 NA\n1 736 736 2 1 0.1083 1 494 ADD 0.26843 0.309415 0.752624 0.413811 NA\n1 737 737 2 1 0.0890688 1 494 ADD 0.376959 0.312176 1.45811 0.643531 NA\n1 738 738 2 1 0.121457 1 494 ADD 0.145689 0.26711 0.297489 0.232502 NA\n1 739 739 2 1 0.0981781 1 494 ADD -0.0528801 0.309473 0.0291971 0.0633231 NA\n1 740 740 2 1 0.121457 1 494 ADD 0.251144 0.2854 0.774354 0.421506 NA\n1 741 741 2 1 0.0941296 1 494 ADD 0.728732 0.310343 5.51382 1.7243 NA\n1 742 742 2 1 0.0981781 1 494 ADD -0.16799 0.292482 0.329888 0.247395 NA\n1 743 743 2 1 0.0880567 1 494 ADD -0.207039 0.347466 0.355042 0.258633 NA\n1 744 744 2 1 0.106275 1 494 ADD 0.265025 0.305393 0.7531 0.41398 NA\n1 745 745 2 1 0.0890688 1 494 ADD -0.243662 0.349314 0.486568 0.313845 NA\n1 746 746 2 1 0.1083 1 494 ADD -0.103813 0.325153 0.101937 0.125218 NA\n1 747 747 2 1 0.100202 1 494 ADD 0.482516 0.349913 1.90153 0.774932 NA\n1 748 748 2 1 0.0981781 1 494 ADD -0.0639759 0.322693 0.0393055 0.0742525 NA\n1 749 749 2 1 0.0981781 1 494 ADD 0.624201 0.331865 3.53774 1.22194 NA\n1 750 750 2 1 0.111336 1 494 ADD 0.0580158 0.302329 0.0368243 0.0716947 NA\n1 751 751 2 1 0.0870445 1 494 ADD -0.156513 0.351523 0.198239 0.183 NA\n1 752 752 2 1 0.111336 1 494 ADD -0.215062 0.287642 0.559013 0.342315 NA\n1 753 753 2 1 0.0931174 1 494 ADD -0.0399905 0.31757 0.0158575 0.0458588 NA\n1 754 754 2 1 0.0910931 1 494 ADD 0.255346 0.329684 0.599878 0.357907 NA\n1 755 755 2 1 0.107287 1 494 ADD 0.513648 0.310949 2.72868 1.0063 NA\n1 756 756 2 1 0.0981781 1 494 ADD -0.196656 0.313598 0.393251 0.275237 NA\n1 757 757 2 1 0.103239 1 494 ADD 0.279992 0.325451 0.740154 0.409368 NA\n1 758 758 2 1 0.0951417 1 494 ADD 0.814533 0.308479 6.75339 2.02887 NA\n1 759 759 2 1 0.0961538 1 494 ADD 0.245266 0.31554 0.604179 0.35953 NA\n1 760 760 2 1 0.106275 1 494 ADD -0.309298 0.296397 1.08895 0.527676 NA\n1 761 761 2 1 0.1083 1 494 ADD -0.171865 0.313403 0.300727 0.234013 NA\n1 762 762 2 1 0.0890688 1 494 ADD 0.122688 0.337279 0.132321 0.145064 NA\n1 763 763 2 1 0.0921053 1 494 ADD 0.363825 0.33193 1.20141 0.563774 NA\n1 764 764 2 1 0.106275 1 494 ADD 0.0739797 0.313886 0.0555498 0.08955 NA\n1 765 765 2 1 0.0890688 1 494 ADD 0.224676 0.326546 0.473396 0.308539 NA\n1 766 766 2 1 0.115385 1 494 ADD 0.128133 0.293003 0.191239 0.179216 NA\n1 767 767 2 1 0.112348 1 494 ADD 0.45929 0.279713 2.69617 0.997447 NA\n1 768 768 2 1 0.103239 1 494 ADD -0.0463494 0.335352 0.0191023 0.050574 NA\n1 769 769 2 1 0.112348 1 494 ADD -0.207646 0.281981 0.542261 0.335831 NA\n1 770 770 2 1 0.100202 1 494 ADD 0.166074 0.317172 0.274167 0.221451 NA\n1 771 771 2 1 0.107287 1 494 ADD -0.136726 0.31038 0.19405 0.180741 NA\n1 772 772 2 1 0.0981781 1 494 ADD -0.026698 0.329301 0.00657311 0.0290107 NA\n1 773 773 2 1 0.104251 1 494 ADD 0.293707 0.316027 0.863734 0.4526 NA\n1 774 774 2 1 0.105263 1 494 ADD -0.0551695 0.304395 0.032849 0.0674365 NA\n1 775 775 2 1 0.0819838 1 494 ADD -0.00974098 0.348476 0.000781373 0.00979457 NA\n1 776 776 2 1 0.0991903 1 494 ADD -0.0514859 0.328703 0.0245341 0.0577272 NA\n1 777 777 2 1 0.100202 1 494 ADD -0.244458 0.297845 0.67364 0.385329 NA\n1 778 778 2 1 0.107287 1 494 ADD -0.296762 0.29798 0.991842 0.495811 NA\n1 779 779 2 1 0.119433 1 494 ADD 0.0642437 0.288952 0.0494322 0.0840442 NA\n1 780 780 2 1 0.0941296 1 494 ADD 0.338729 0.311938 1.17915 0.55669 NA\n1 781 781 2 1 0.111336 1 494 ADD -0.0481514 0.303728 0.0251332 0.0584709 NA\n1 782 782 2 1 0.101215 1 494 ADD -0.158085 0.344591 0.210462 0.189495 NA\n1 783 783 2 1 0.104251 1 494 ADD -0.0236376 0.303662 0.00605935 0.0278185 NA\n1 784 784 2 1 0.090081 1 494 ADD -0.290561 0.32478 0.80038 0.43065 NA\n1 785 785 2 1 0.0991903 1 494 ADD -0.0354098 0.309865 0.0130587 0.0414266 NA\n1 786 786 2 1 0.106275 1 494 ADD -0.933803 0.339365 8.51153 2.45234 NA\n1 787 787 2 1 0.105263 1 494 ADD 0.196986 0.296909 0.440173 0.294958 NA\n1 788 788 2 1 0.0961538 1 494 ADD -0.0669659 0.325543 0.0423146 0.0772635 NA\n1 789 789 2 1 0.100202 1 494 ADD 0.575469 0.303442 3.5966 1.23734 NA\n1 790 790 2 1 0.082996 1 494 ADD 0.34616 0.345046 1.00647 0.500656 NA\n1 791 791 2 1 0.117409 1 494 ADD 0.228108 0.287404 0.629939 0.369188 NA\n1 792 792 2 1 0.082996 1 494 ADD 0.251363 0.343154 0.536567 0.333614 NA\n1 793 793 2 1 0.106275 1 494 ADD 0.229253 0.304041 0.568549 0.345981 NA\n1 794 794 2 1 0.0819838 1 494 ADD -0.968887 0.38122 7.06565 2.10471 NA\n1 795 795 2 1 0.0931174 1 494 ADD -0.040321 0.322413 0.0156401 0.045528 NA\n1 796 796 2 1 0.109312 1 494 ADD 0.199684 0.305186 0.428113 0.289953 NA\n1 797 797 2 1 0.097166 1 494 ADD 0.459865 0.320896 2.05367 0.818614 NA\n1 798 798 2 1 0.0910931 1 494 ADD 0.199157 0.336986 0.349275 0.25608 NA\n1 799 799 2 1 0.0870445 1 494 ADD -0.173328 0.337347 0.263988 0.21653 NA\n1 800 800 2 1 0.0850202 1 494 ADD 0.183535 0.326348 0.316284 0.241202 NA\n1 801 801 2 1 0.0840081 1 494 ADD 0.120376 0.33571 0.128572 0.142718 NA\n1 802 802 2 1 0.102227 1 494 ADD -0.20224 0.324815 0.387667 0.272843 NA\n1 803 803 2 1 0.107287 1 494 ADD 0.422694 0.312827 1.82577 0.752939 NA\n1 804 804 2 1 0.0910931 1 494 ADD -0.333904 0.303474 1.21061 0.566691 NA\n1 805 805 2 1 0.0890688 1 494 ADD 0.0262905 0.321202 0.00669948 0.0292972 NA\n1 806 806 2 1 0.104251 1 494 ADD -0.133027 0.289116 0.211707 0.190149 NA\n1 807 807 2 1 0.101215 1 494 ADD 0.518187 0.309018 2.81194 1.02889 NA\n1 808 808 2 1 0.0961538 1 494 ADD -0.427605 0.321053 1.77391 0.737788 NA\n1 809 809 2 1 0.103239 1 494 ADD -0.212694 0.316732 0.450946 0.299395 NA\n1 810 810 2 1 0.0910931 1 494 ADD -0.14822 0.321204 0.212937 0.190794 NA\n1 811 811 2 1 0.121457 1 494 ADD 0.154717 0.30904 0.250638 0.209978 NA\n1 812 812 2 1 0.0870445 1 494 ADD 0.461988 0.345062 1.79253 0.743238 NA\n1 813 813 2 1 0.118421 1 494 ADD -0.302529 0.293549 1.06212 0.518941 NA\n1 814 814 2 1 0.117409 1 494 ADD -0.251453 0.298158 0.71125 0.398995 NA\n1 815 815 2 1 0.104251 1 494 ADD -0.134737 0.315843 0.181983 0.174136 NA\n1 816 816 2 1 0.107287 1 494 ADD 0.41279 0.307901 1.79737 0.744652 NA\n1 817 817 2 1 0.118421 1 494 ADD 0.573616 0.318533 3.24289 1.14428 NA\n1 818 818 2 1 0.0981781 1 494 ADD -0.224518 0.345962 0.421159 0.287048 NA\n1 819 819 2 1 0.0890688 1 494 ADD -0.391462 0.311626 1.57802 0.679757 NA\n1 820 820 2 1 0.114372 1 494 ADD 0.150255 0.296806 0.25628 0.212761 NA\n1 821 821 2 1 0.0840081 1 494 ADD 0.227747 0.360649 0.398783 0.277598 NA\n1 822 822 2 1 0.109312 1 494 ADD 0.217202 0.317121 0.469113 0.306805 NA\n1 823 823 2 1 0.101215 1 494 ADD -0.0117819 0.312598 0.00142054 0.0132575 NA\n1 824 824 2 1 0.0941296 1 494 ADD 0.016859 0.315112 0.00286242 0.0189373 NA\n1 825 825 2 1 0.0840081 1 494 ADD -0.136272 0.34522 0.15582 0.159245 NA\n1 826 826 2 1 0.111336 1 494 ADD -0.201846 0.294602 0.469428 0.306933 NA\n1 827 827 2 1 0.097166 1 494 ADD 0.0557876 0.336352 0.0275097 0.0613468 NA\n1 828 828 2 1 0.0981781 1 494 ADD 0.104453 0.326458 0.102374 0.125519 NA\n1 829 829 2 1 0.100202 1 494 ADD -0.419195 0.327063 1.64275 0.699081 NA\n1 830 830 2 1 0.0840081 1 494 ADD 0.0775549 0.315612 0.0603824 0.0937229 NA\n1 831 831 2 1 0.0961538 1 494 ADD -0.497017 0.321706 2.38685 0.912362 NA\n1 832 832 2 1 0.0941296 1 494 ADD 0.153953 0.301588 0.260585 0.214871 NA\n1 833 833 2 1 0.107287 1 494 ADD -0.288205 0.282754 1.03892 0.511348 NA\n1 834 834 2 1 0.0961538 1 494 ADD -0.248457 0.337375 0.542348 0.335865 NA\n1 835 835 2 1 0.107287 1 494 ADD -0.487755 0.314821 2.40036 0.916114 NA\n1 836 836 2 1 0.101215 1 494 ADD -0.389641 0.324417 1.44252 0.638779 NA\n1 837 837 2 1 0.102227 1 494 ADD 0.135945 0.313153 0.188456 0.177698 NA\n1 838 838 2 1 0.0961538 1 494 ADD -0.149826 0.328549 0.207955 0.188174 NA\n1 839 839 2 1 0.103239 1 494 ADD -0.668923 0.318222 4.41866 1.44919 NA\n1 840 840 2 1 0.0951417 1 494 ADD -0.208724 0.318111 0.430515 0.290954 NA\n1 841 841 2 1 0.106275 1 494 ADD -0.169803 0.297396 0.326002 0.245634 NA\n1 842 842 2 1 0.101215 1 494 ADD -0.585282 0.335572 3.042 1.09079 NA\n1 843 843 2 1 0.0931174 1 494 ADD -0.156612 0.333036 0.221142 0.195063 NA\n1 844 844 2 1 0.0779352 1 494 ADD -0.0769737 0.336234 0.0524085 0.0867563 NA\n1 845 845 2 1 0.0931174 1 494 ADD -0.219567 0.321335 0.466894 0.305904 NA\n1 846 846 2 1 0.115385 1 494 ADD 0.114671 0.288942 0.157502 0.160229 NA\n1 847 847 2 1 0.114372 1 494 ADD 0.0768471 0.293261 0.0686668 0.100569 NA\n1 848 848 2 1 0.1083 1 494 ADD 0.207336 0.302916 0.468493 0.306554 NA\n1 849 849 2 1 0.101215 1 494 ADD 0.324333 0.277197 1.369 0.616214 NA\n1 850 850 2 1 0.0941296 1 494 ADD 0.578325 0.336333 2.95669 1.06792 NA\n1 851 851 2 1 0.0951417 1 494 ADD -0.0446944 0.323085 0.0191369 0.0506224 NA\n1 852 852 2 1 0.0840081 1 494 ADD 0.68666 0.342331 4.02339 1.34801 NA\n1 853 853 2 1 0.119433 1 494 ADD 0.165057 0.28222 0.342052 0.252862 NA\n1 854 854 2 1 0.109312 1 494 ADD 0.107793 0.313249 0.118414 0.136224 NA\n1 855 855 2 1 0.116397 1 494 ADD -0.150818 0.288197 0.273859 0.221303 NA\n1 856 856 2 1 0.0981781 1 494 ADD 0.154224 0.307088 0.25222 0.210761 NA\n1 857 857 2 1 0.0981781 1 494 ADD -0.71047 0.359464 3.90643 1.31784 NA\n1 858 858 2 1 0.106275 1 494 ADD -0.256215 0.312279 0.673171 0.385157 NA\n1 859 859 2 1 0.0850202 1 494 ADD -0.184266 0.342895 0.28878 0.22841 NA\n1 860 860 2 1 0.104251 1 494 ADD 0.211303 0.317969 0.441615 0.295554 NA\n1 861 861 2 1 0.0961538 1 494 ADD 0.631858 0.331203 3.63958 1.24856 NA\n1 862 862 2 1 0.0931174 1 494 ADD 0.323357 0.332627 0.945037 0.48019 NA\n1 863 863 2 1 0.0991903 1 494 ADD -0.294522 0.32613 0.815558 0.435947 NA\n1 864 864 2 1 0.103239 1 494 ADD -0.752217 0.303083 6.75915 2.03027 NA\n1 865 865 2 1 0.0799595 1 494 ADD -0.159455 0.369365 0.186366 0.176553 NA\n1 866 866 2 1 0.0991903 1 494 ADD -0.0610923 0.314236 0.0377974 0.0727063 NA\n1 867 867 2 1 0.118421 1 494 ADD -0.356341 0.306278 1.35363 0.611464 NA\n1 868 868 2 1 0.110324 1 494 ADD 0.219799 0.306095 0.515633 0.325405 NA\n1 869 869 2 1 0.090081 1 494 ADD -0.18782 0.310294 0.366383 0.263617 NA\n1 870 870 2 1 0.102227 1 494 ADD -0.0804586 0.30742 0.0684982 0.100433 NA\n1 871 871 2 1 0.0809717 1 494 ADD -0.304277 0.358927 0.718665 0.401667 NA\n1 872 872 2 1 0.103239 1 494 ADD -0.282635 0.331933 0.725018 0.40395 NA\n1 873 873 2 1 0.0890688 1 494 ADD -0.136656 0.352269 0.15049 0.156103 NA\n1 874 874 2 1 0.110324 1 494 ADD 0.117748 0.29896 0.155125 0.158838 NA\n1 875 875 2 1 0.0991903 1 494 ADD -0.383708 0.327424 1.37335 0.617555 NA\n1 876 876 2 1 0.100202 1 494 ADD 0.0709819 0.323359 0.0481864 0.0828887 NA\n1 877 877 2 1 0.0981781 1 494 ADD -0.464502 0.327646 2.00986 0.806097 NA\n1 878 878 2 1 0.106275 1 494 ADD -0.370287 0.307215 1.45276 0.641901 NA\n1 879 879 2 1 0.109312 1 494 ADD 0.0968104 0.286451 0.11422 0.133483 NA\n1 880 880 2 1 0.0910931 1 494 ADD 0.351748 0.367194 0.917639 0.47096 NA\n1 881 881 2 1 0.0910931 1 494 ADD 0.109134 0.325974 0.112087 0.132073 NA\n1 882 882 2 1 0.112348 1 494 ADD -0.337832 0.308913 1.19599 0.562053 NA\n1 883 883 2 1 0.104251 1 494 ADD -0.132837 0.315457 0.17732 0.171542 NA\n1 884 884 2 1 0.097166 1 494 ADD -0.0464263 0.318415 0.0212589 0.0535105 NA\n1 885 885 2 1 0.0981781 1 494 ADD 0.0589363 0.289117 0.0415544 0.0765117 NA\n1 886 886 2 1 0.0991903 1 494 ADD 0.0338843 0.321601 0.011101 0.0380624 NA\n1 887 887 2 1 0.101215 1 494 ADD -0.0421687 0.328555 0.0164727 0.046784 NA\n1 888 888 2 1 0.090081 1 494 ADD 0.0738572 0.31961 0.0534006 0.087646 NA\n1 889 889 2 1 0.090081 1 494 ADD -0.326606 0.308121 1.12358 0.53888 NA\n1 890 890 2 1 0.107287 1 494 ADD -0.471041 0.294646 2.55574 0.959026 NA\n1 891 891 2 1 0.1083 1 494 ADD -0.484897 0.302352 2.57201 0.963494 NA\n1 892 892 2 1 0.116397 1 494 ADD -0.350952 0.303563 1.33659 0.606186 NA\n1 893 893 2 1 0.097166 1 494 ADD 0.0477092 0.308491 0.0239177 0.0569535 NA\n1 894 894 2 1 0.0961538 1 494 ADD 0.210241 0.315981 0.442703 0.296004 NA\n1 895 895 2 1 0.115385 1 494 ADD 0.323838 0.299935 1.16574 0.552411 NA\n1 896 896 2 1 0.118421 1 494 ADD -0.292681 0.294497 0.987711 0.49444 NA\n1 897 897 2 1 0.0991903 1 494 ADD -0.0343325 0.345831 0.00985558 0.0357782 NA\n1 898 898 2 1 0.11336 1 494 ADD 0.611811 0.291699 4.3991 1.44421 NA\n1 899 899 2 1 0.1083 1 494 ADD -0.734433 0.320907 5.23776 1.65557 NA\n1 900 900 2 1 0.0981781 1 494 ADD 1.09775 0.296301 13.7472 3.67957 NA\n1 901 901 2 1 0.0890688 1 494 ADD 0.370849 0.31866 1.35438 0.611696 NA\n1 902 902 2 1 0.0890688 1 494 ADD 0.883087 0.35294 6.26045 1.90846 NA\n1 903 903 2 1 0.1083 1 494 ADD 0.483535 0.311467 2.41008 0.91881 NA\n1 904 904 2 1 0.0688259 1 494 ADD -0.221007 0.352297 0.393543 0.275361 NA\n1 905 905 2 1 0.097166 1 494 ADD 0.0626792 0.303494 0.0426527 0.077596 NA\n1 906 906 2 1 0.0779352 1 494 ADD -0.0354856 0.374167 0.0089944 0.0341199 NA\n1 907 907 2 1 0.110324 1 494 ADD 0.267862 0.30351 0.77889 0.423105 NA\n1 908 908 2 1 0.0951417 1 494 ADD 0.56125 0.34306 2.67653 0.992091 NA\n1 909 909 2 1 0.0961538 1 494 ADD -0.45069 0.317235 2.01833 0.808521 NA\n1 910 910 2 1 0.117409 1 494 ADD 0.448923 0.302799 2.19804 0.859534 NA\n1 911 911 2 1 0.0981781 1 494 ADD 0.36326 0.32211 1.27183 0.585989 NA\n1 912 912 2 1 0.111336 1 494 ADD 0.0966092 0.313377 0.0950393 0.120408 NA\n1 913 913 2 1 0.0890688 1 494 ADD 0.320095 0.357086 0.80355 0.431758 NA\n1 914 914 2 1 0.107287 1 494 ADD 0.227016 0.308203 0.542549 0.335943 NA\n1 915 915 2 1 0.0991903 1 494 ADD 0.385802 0.310098 1.54786 0.6707 NA\n1 916 916 2 1 0.102227 1 494 ADD 0.647971 0.324107 3.99699 1.34121 NA\n1 917 917 2 1 0.101215 1 494 ADD 0.289154 0.318868 0.822312 0.438297 NA\n1 918 918 2 1 0.0961538 1 494 ADD 0.374032 0.321656 1.35218 0.611015 NA\n1 919 919 2 1 0.11336 1 494 ADD 0.048277 0.312373 0.0238855 0.0569129 NA\n1 920 920 2 1 0.0941296 1 494 ADD -0.551839 0.348059 2.51374 0.947468 NA\n1 921 921 2 1 0.106275 1 494 ADD 0.532446 0.311713 2.9177 1.05743 NA\n1 922 922 2 1 0.103239 1 494 ADD 0.1285 0.313078 0.168461 0.166545 NA\n1 923 923 2 1 0.101215 1 494 ADD 0.0951912 0.311093 0.0936299 0.119408 NA\n1 924 924 2 1 0.118421 1 494 ADD -0.581543 0.297158 3.82992 1.29804 NA\n1 925 925 2 1 0.097166 1 494 ADD 0.131467 0.33338 0.155508 0.159062 NA\n1 926 926 2 1 0.090081 1 494 ADD 0.103624 0.331895 0.0974814 0.122126 NA\n1 927 927 2 1 0.0860324 1 494 ADD -0.539985 0.337693 2.55693 0.959352 NA\n1 928 928 2 1 0.0951417 1 494 ADD -0.678216 0.33671 4.05718 1.35671 NA\n1 929 929 2 1 0.0931174 1 494 ADD -0.062903 0.327696 0.036847 0.0717184 NA\n1 930 930 2 1 0.117409 1 494 ADD 0.0311768 0.278267 0.0125528 0.0405807 NA\n1 931 931 2 1 0.0951417 1 494 ADD -0.0207888 0.303694 0.00468581 0.0243729 NA\n1 932 932 2 1 0.101215 1 494 ADD -0.0281666 0.311968 0.00815172 0.0324243 NA\n1 933 933 2 1 0.111336 1 494 ADD -0.125818 0.328397 0.146787 0.153895 NA\n1 934 934 2 1 0.0809717 1 494 ADD -0.00540114 0.339265 0.000253451 0.0055517 NA\n1 935 935 2 1 0.0961538 1 494 ADD 0.0854552 0.315683 0.0732781 0.104234 NA\n1 936 936 2 1 0.0991903 1 494 ADD 0.308974 0.295162 1.09578 0.529892 NA\n1 937 937 2 1 0.0961538 1 494 ADD 0.00478781 0.315185 0.000230751 0.00529571 NA\n1 938 938 2 1 0.0991903 1 494 ADD 0.804481 0.314108 6.55957 1.98163 NA\n1 939 939 2 1 0.0910931 1 494 ADD -0.269829 0.319819 0.711818 0.3992 NA\n1 940 940 2 1 0.0981781 1 494 ADD 0.291839 0.320734 0.827937 0.440249 NA\n1 941 941 2 1 0.0910931 1 494 ADD -0.406809 0.325383 1.56312 0.675287 NA\n1 942 942 2 1 0.1083 1 494 ADD -0.111771 0.315272 0.125686 0.140893 NA\n1 943 943 2 1 0.0819838 1 494 ADD -0.409007 0.331388 1.52331 0.663302 NA\n1 944 944 2 1 0.104251 1 494 ADD -0.251167 0.306807 0.670181 0.384062 NA\n1 945 945 2 1 0.0870445 1 494 ADD 0.374096 0.322313 1.34713 0.609453 NA\n1 946 946 2 1 0.110324 1 494 ADD -0.0119146 0.292117 0.00166359 0.0143645 NA\n1 947 947 2 1 0.112348 1 494 ADD 0.0987406 0.291545 0.114705 0.133801 NA\n1 948 948 2 1 0.112348 1 494 ADD -0.190394 0.311259 0.374166 0.26701 NA\n1 949 949 2 1 0.112348 1 494 ADD -0.142297 0.314719 0.204431 0.186307 NA\n1 950 950 2 1 0.104251 1 494 ADD -0.210362 0.32476 0.419574 0.286384 NA\n1 951 951 2 1 0.102227 1 494 ADD -0.239762 0.334902 0.512537 0.324182 NA\n1 952 952 2 1 0.0981781 1 494 ADD -0.411637 0.333471 1.52374 0.663432 NA\n1 953 953 2 1 0.102227 1 494 ADD -0.112478 0.303571 0.137283 0.148132 NA\n1 954 954 2 1 0.0941296 1 494 ADD -0.585852 0.311158 3.54499 1.22384 NA\n1 955 955 2 1 0.0890688 1 494 ADD 0.265764 0.342405 0.602435 0.358873 NA\n1 956 956 2 1 0.100202 1 494 ADD 0.0406184 0.316105 0.0165114 0.0468418 NA\n1 957 957 2 1 0.097166 1 494 ADD -0.284559 0.32428 0.770027 0.419978 NA\n1 958 958 2 1 0.104251 1 494 ADD 0.366401 0.295748 1.53487 0.666788 NA\n1 959 959 2 1 0.0921053 1 494 ADD 0.0649031 0.332198 0.0381713 0.0730921 NA\n1 960 960 2 1 0.110324 1 494 ADD 0.629564 0.331301 3.61105 1.24111 NA\n1 961 961 2 1 0.102227 1 494 ADD -0.0588028 0.335168 0.0307801 0.0651321 NA\n1 962 962 2 1 0.090081 1 494 ADD -0.229848 0.329947 0.485284 0.313329 NA\n1 963 963 2 1 0.0941296 1 494 ADD 0.35503 0.34828 1.03914 0.511418 NA\n1 964 964 2 1 0.090081 1 494 ADD 0.605797 0.342471 3.129 1.11401 NA\n1 965 965 2 1 0.109312 1 494 ADD 0.180911 0.299726 0.364318 0.262714 NA\n1 966 966 2 1 0.0850202 1 494 ADD 0.532672 0.347196 2.35381 0.903169 NA\n1 967 967 2 1 0.097166 1 494 ADD -0.436032 0.321018 1.84492 0.758515 NA\n1 968 968 2 1 0.103239 1 494 ADD -0.00634591 0.333525 0.000362018 0.00664326 NA\n1 969 969 2 1 0.0961538 1 494 ADD -0.232087 0.317154 0.535502 0.333198 NA\n1 970 970 2 1 0.0951417 1 494 ADD 0.373221 0.310884 1.44124 0.638387 NA\n1 971 971 2 1 0.097166 1 494 ADD -0.239757 0.315575 0.577213 0.349298 NA\n1 972 972 2 1 0.100202 1 494 ADD -0.689433 0.316092 4.75727 1.53499 NA\n1 973 973 2 1 0.0910931 1 494 ADD 0.796849 0.325646 5.98772 1.84146 NA\n1 974 974 2 1 0.1083 1 494 ADD 0.342355 0.308272 1.23335 0.573884 NA\n1 975 975 2 1 0.104251 1 494 ADD 0.045303 0.304268 0.0221688 0.0547092 NA\n1 976 976 2 1 0.103239 1 494 ADD 0.253727 0.287726 0.777636 0.422663 NA\n1 977 977 2 1 0.0991903 1 494 ADD 0.295869 0.32383 0.834765 0.442615 NA\n1 978 978 2 1 0.0951417 1 494 ADD -0.217941 0.319437 0.465488 0.305333 NA\n1 979 979 2 1 0.103239 1 494 ADD -0.0681002 0.300092 0.0514975 0.0859333 NA\n1 980 980 2 1 0.100202 1 494 ADD 0.396365 0.31035 1.63112 0.695622 NA\n1 981 981 2 1 0.0840081 1 494 ADD -0.48036 0.33505 2.05548 0.81913 NA\n1 982 982 2 1 0.0870445 1 494 ADD -0.281532 0.352418 0.638173 0.372252 NA\n1 983 983 2 1 0.102227 1 494 ADD -0.564838 0.336733 2.81369 1.02936 NA\n1 984 984 2 1 0.102227 1 494 ADD 0.263928 0.293691 0.807583 0.433167 NA\n1 985 985 2 1 0.0961538 1 494 ADD -0.0939484 0.300055 0.0980339 0.122512 NA\n1 986 986 2 1 0.103239 1 494 ADD -0.271388 0.297233 0.833656 0.442231 NA\n1 987 987 2 1 0.103239 1 494 ADD 0.24434 0.302888 0.650763 0.376916 NA\n1 988 988 2 1 0.100202 1 494 ADD 0.167054 0.301468 0.307064 0.236956 NA\n1 989 989 2 1 0.0941296 1 494 ADD 0.00091681 0.333017 7.57927e-06 0.000955025 NA\n1 990 990 2 1 0.100202 1 494 ADD -0.0686065 0.346434 0.0392184 0.0741639 NA\n1 991 991 2 1 0.107287 1 494 ADD 0.00766918 0.280053 0.000749924 0.00959327 NA\n1 992 992 2 1 0.102227 1 494 ADD -0.231287 0.291735 0.628526 0.368661 NA\n1 993 993 2 1 0.118421 1 494 ADD 0.0661996 0.269154 0.0604934 0.093817 NA\n1 994 994 2 1 0.103239 1 494 ADD 0.367353 0.292254 1.57996 0.68034 NA\n1 995 995 2 1 0.0921053 1 494 ADD 0.0355499 0.356648 0.00993565 0.0359289 NA\n1 996 996 2 1 0.0941296 1 494 ADD -0.367872 0.341627 1.15955 0.55043 NA\n1 997 997 2 1 0.117409 1 494 ADD 0.133392 0.268002 0.247731 0.208536 NA\n1 998 998 2 1 0.0789474 1 494 ADD 0.407348 0.345873 1.38707 0.621782 NA\n1 999 999 2 1 0.102227 1 494 ADD 0.112324 0.327073 0.11794 0.135916 NA\n1 1000 1000 2 1 0.117409 1 494 ADD -0.0405477 0.280429 0.0209068 0.0530405 NA\n"
  },
  {
    "path": "external_libs/LBFGSpp/AUTHORS.md",
    "content": "The LBFGS++ library was adapted from the libLBFGS library\n(https://github.com/chokkan/liblbfgs), written by\nNaoaki Okazaki <<okazaki@c.titech.ac.jp>>.\n\nThe files\n\n- `include/LBFGS/LineSearchBracketing.h`\n- `include/LBFGS/LineSearchNocedalWright.h`\n\nwere contributed by Dirk Toewe <<DirkToewe@GoogleMail.com>>.\n\nOther part of LBFGS++ was written by Yixuan Qiu <<yixuan.qiu@cos.name>>.\n"
  },
  {
    "path": "external_libs/LBFGSpp/LICENSE.md",
    "content": "## The MIT License\n\nCopyright (c) 1990 Jorge Nocedal\n\nCopyright (c) 2007-2010 Naoaki Okazaki\n\nCopyright (c) 2016-2019 Yixuan Qiu\n\nCopyright (c) 2018-2019 Dirk Toewe\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n\"Software\"), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\nNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE\nLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\nWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n"
  },
  {
    "path": "external_libs/LBFGSpp/README.md",
    "content": "# LBFGS++ <img src=\"https://statr.me/images/sticker-lbfgspp.png\" alt=\"LBFGS++\" height=\"150px\" align=\"right\" />\n\n> **UPDATE on 2020-03-06**: **LBFGS++** now includes a new L-BFGS-B solver for\n> box-constrained optimization problems. Check the example below for its usage.\n\n**LBFGS++** is a header-only C++ library that implements the Limited-memory\nBFGS algorithm (L-BFGS) for unconstrained minimization problems, and a modified\nversion of the L-BFGS-B algorithm for box-constrained ones.\n\nThe code for the L-BFGS solver is derived and modified from the\n[libLBFGS](https://github.com/chokkan/liblbfgs)\nlibrary developed by [Naoaki Okazaki](http://www.chokkan.org/).\n\n**LBFGS++** is implemented as a header-only C++ library, whose only dependency,\n[Eigen](http://eigen.tuxfamily.org/), is also header-only.\n\n## A Quick Example\n\nTo use **LBFGS++**, one needs to first define a functor to represent the\nmultivariate function to be minimized. It should return the objective function\nvalue on a vector `x` and overwrite the vector `grad` with the gradient\nevaluated on `x`. For example we could define the\n[Rosenbrock function](https://en.wikipedia.org/wiki/Rosenbrock_function) in the\nfollowing way:\n\n```cpp\n#include <Eigen/Core>\n#include <iostream>\n#include <LBFGS.h>\n\nusing Eigen::VectorXd;\nusing namespace LBFGSpp;\n\nclass Rosenbrock\n{\nprivate:\n    int n;\npublic:\n    Rosenbrock(int n_) : n(n_) {}\n    double operator()(const VectorXd& x, VectorXd& grad)\n    {\n        double fx = 0.0;\n        for(int i = 0; i < n; i += 2)\n        {\n            double t1 = 1.0 - x[i];\n            double t2 = 10 * (x[i + 1] - x[i] * x[i]);\n            grad[i + 1] = 20 * t2;\n            grad[i]     = -2.0 * (x[i] * grad[i + 1] + t1);\n            fx += t1 * t1 + t2 * t2;\n        }\n        return fx;\n    }\n};\n```\n\nThen we just need to set up parameters, create solver object,\nprovide initial guess, and then run the minimization function.\n\n```cpp\nint main()\n{\n    const int n = 10;\n    // Set up parameters\n    LBFGSParam<double> param;\n    param.epsilon = 1e-6;\n    param.max_iterations = 100;\n\n    // Create solver and function object\n    LBFGSSolver<double> solver(param);\n    Rosenbrock fun(n);\n\n    // Initial guess\n    VectorXd x = VectorXd::Zero(n);\n    // x will be overwritten to be the best point found\n    double fx;\n    int niter = solver.minimize(fun, x, fx);\n\n    std::cout << niter << \" iterations\" << std::endl;\n    std::cout << \"x = \\n\" << x.transpose() << std::endl;\n    std::cout << \"f(x) = \" << fx << std::endl;\n\n    return 0;\n}\n```\n\nThe example can then be compiled and run.\n\n```bash\n$ g++ -I/path/to/eigen -I/path/to/lbfgspp/include -O2 example.cpp\n$ ./a.out\n23 iterations\nx =\n1 1 1 1 1 1 1 1 1 1\nf(x) = 1.87948e-19\n```\n\nYou can also use a different line search algorithm by providing a second template parameter\nto `LBFGSSolver`. For example, the code below illustrates the bracketing line search algorithm\n(contributed by [@DirkToewe](https://github.com/DirkToewe)).\n\n```cpp\nint main()\n{\n    const int n = 10;\n    // Set up parameters\n    LBFGSParam<double> param;\n    param.epsilon = 1e-6;\n    param.max_iterations = 100;\n\n    // Create solver and function object\n    LBFGSSolver<double, LineSearchBracketing> solver(param);\n    Rosenbrock fun(n);\n\n    // Initial guess\n    VectorXd x = VectorXd::Zero(n);\n    // x will be overwritten to be the best point found\n    double fx;\n    int niter = solver.minimize(fun, x, fx);\n\n    std::cout << niter << \" iterations\" << std::endl;\n    std::cout << \"x = \\n\" << x.transpose() << std::endl;\n    std::cout << \"f(x) = \" << fx << std::endl;\n\n    return 0;\n}\n```\n\n## Box-constrained Problem\n\nIf the parameters to be optimized have simple bounds, then the\nL-BFGS-**B** solver class `LBFGSBSolver` can be used.\nThe code is very similar to that of `LBFGSSolver`. Below is the same Rosenbrock\nexample, but we require that all variables should be between 2 and 4.\n\n```cpp\n#include <Eigen/Core>\n#include <iostream>\n#include <LBFGSB.h>  // Note the different header file\n\nusing Eigen::VectorXd;\nusing namespace LBFGSpp;\n\nclass Rosenbrock\n{\nprivate:\n    int n;\npublic:\n    Rosenbrock(int n_) : n(n_) {}\n    double operator()(const VectorXd& x, VectorXd& grad)\n    {\n        double fx = 0.0;\n        for(int i = 0; i < n; i += 2)\n        {\n            double t1 = 1.0 - x[i];\n            double t2 = 10 * (x[i + 1] - x[i] * x[i]);\n            grad[i + 1] = 20 * t2;\n            grad[i]     = -2.0 * (x[i] * grad[i + 1] + t1);\n            fx += t1 * t1 + t2 * t2;\n        }\n        return fx;\n    }\n};\n\nint main()\n{\n    const int n = 10;\n    // Set up parameters\n    LBFGSBParam<double> param;  // New parameter class\n    param.epsilon = 1e-6;\n    param.max_iterations = 100;\n\n    // Create solver and function object\n    LBFGSBSolver<double> solver(param);  // New solver class\n    Rosenbrock fun(n);\n\n    // Bounds\n    VectorXd lb = VectorXd::Constant(n, 2.0);\n    VectorXd ub = VectorXd::Constant(n, 4.0);\n\n    // Initial guess\n    VectorXd x = VectorXd::Constant(n, 3.0);\n\n    // x will be overwritten to be the best point found\n    double fx;\n    int niter = solver.minimize(fun, x, fx, lb, ub);\n\n    std::cout << niter << \" iterations\" << std::endl;\n    std::cout << \"x = \\n\" << x.transpose() << std::endl;\n    std::cout << \"f(x) = \" << fx << std::endl;\n\n    return 0;\n}\n```\n\nNote that we also allow infinite values for the lower and upper bounds.\nIn such cases one can define `ub[i] = std::numeric_limits<double>::infinity()`,\nfor example.\n\n## Documentation\n\nThe [API reference](https://lbfgspp.statr.me/doc/) page contains the documentation\nof **LBFGS++** generated by [Doxygen](http://www.doxygen.org/).\n\n## License\n\n**LBFGS++** is an open source project under the MIT license.\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGS.h",
    "content": "// Copyright (C) 2016-2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef LBFGS_H\n#define LBFGS_H\n\n#include <Eigen/Core>\n#include \"LBFGSpp/Param.h\"\n#include \"LBFGSpp/BFGSMat.h\"\n#include \"LBFGSpp/LineSearchBacktracking.h\"\n#include \"LBFGSpp/LineSearchBracketing.h\"\n#include \"LBFGSpp/LineSearchNocedalWright.h\"\n\n\nnamespace LBFGSpp {\n\n\n///\n/// L-BFGS solver for unconstrained numerical optimization\n///\ntemplate < typename Scalar,\n           template<class> class LineSearch = LineSearchBacktracking >\nclass LBFGSSolver\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;\n    typedef Eigen::Map<Vector> MapVec;\n\n    const LBFGSParam<Scalar>& m_param;  // Parameters to control the LBFGS algorithm\n    BFGSMat<Scalar>           m_bfgs;   // Approximation to the Hessian matrix\n    Vector                    m_fx;     // History of the objective function values\n    Vector                    m_xp;     // Old x\n    Vector                    m_grad;   // New gradient\n    Vector                    m_gradp;  // Old gradient\n    Vector                    m_drt;    // Moving direction\n\n    // Reset internal variables\n    // n: dimension of the vector to be optimized\n    inline void reset(int n)\n    {\n        const int m = m_param.m;\n        m_bfgs.reset(n, m);\n        m_xp.resize(n);\n        m_grad.resize(n);\n        m_gradp.resize(n);\n        m_drt.resize(n);\n        if(m_param.past > 0)\n            m_fx.resize(m_param.past);\n    }\n\npublic:\n    ///\n    /// Constructor for the L-BFGS solver.\n    ///\n    /// \\param param An object of \\ref LBFGSParam to store parameters for the\n    ///        algorithm\n    ///\n    LBFGSSolver(const LBFGSParam<Scalar>& param) :\n        m_param(param)\n    {\n        m_param.check_param();\n    }\n\n    ///\n    /// Minimizing a multivariate function using the L-BFGS algorithm.\n    /// Exceptions will be thrown if error occurs.\n    ///\n    /// \\param f  A function object such that `f(x, grad)` returns the\n    ///           objective function value at `x`, and overwrites `grad` with\n    ///           the gradient.\n    /// \\param x  In: An initial guess of the optimal point. Out: The best point\n    ///           found.\n    /// \\param fx Out: The objective function value at `x`.\n    ///\n    /// \\return Number of iterations used.\n    ///\n    template <typename Foo>\n    inline int minimize(Foo& f, Vector& x, Scalar& fx)\n    {\n        using std::abs;\n\n        // Dimension of the vector\n        const int n = x.size();\n        reset(n);\n\n        // The length of lag for objective function value to test convergence\n        const int fpast = m_param.past;\n\n        // Evaluate function and compute gradient\n        fx = f(x, m_grad);\n        Scalar gnorm = m_grad.norm();\n        if(fpast > 0)\n            m_fx[0] = fx;\n\n        // Early exit if the initial x is already a minimizer\n        if(gnorm <= m_param.epsilon || gnorm <= m_param.epsilon_rel * x.norm())\n        {\n            return 1;\n        }\n\n        // Initial direction\n        m_drt.noalias() = -m_grad;\n        // Initial step size\n        Scalar step = Scalar(1) / m_drt.norm();\n\n        // Number of iterations used\n        int k = 1;\n        for( ; ; )\n        {\n            // Save the curent x and gradient\n            m_xp.noalias() = x;\n            m_gradp.noalias() = m_grad;\n\n            // Line search to update x, fx and gradient\n            LineSearch<Scalar>::LineSearch(f, fx, x, m_grad, step, m_drt, m_xp, m_param);\n\n            // New gradient norm\n            gnorm = m_grad.norm();\n\n            // Convergence test -- gradient\n            if(gnorm <= m_param.epsilon || gnorm <= m_param.epsilon_rel * x.norm())\n            {\n                return k;\n            }\n            // Convergence test -- objective function value\n            if(fpast > 0)\n            {\n                const Scalar fxd = m_fx[k % fpast];\n                if(k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1)))\n                    return k;\n\n                m_fx[k % fpast] = fx;\n            }\n            // Maximum number of iterations\n            if(m_param.max_iterations != 0 && k >= m_param.max_iterations)\n            {\n                return k;\n            }\n\n            // Update s and y\n            // s_{k+1} = x_{k+1} - x_k\n            // y_{k+1} = g_{k+1} - g_k\n            m_bfgs.add_correction(x - m_xp, m_grad - m_gradp);\n\n            // Recursive formula to compute d = -H * g\n            m_bfgs.apply_Hv(m_grad, -Scalar(1), m_drt);\n\n            // Reset step = 1.0 as initial guess for the next line search\n            step = Scalar(1);\n            k++;\n        }\n\n        return k;\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // LBFGS_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSB.h",
    "content": "// Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef LBFGSB_H\n#define LBFGSB_H\n\n#include <stdexcept>  // std::invalid_argument\n#include <vector>\n#include <Eigen/Core>\n#include \"LBFGSpp/Param.h\"\n#include \"LBFGSpp/BFGSMat.h\"\n#include \"LBFGSpp/Cauchy.h\"\n#include \"LBFGSpp/SubspaceMin.h\"\n#include \"LBFGSpp/LineSearchMoreThuente.h\"\n\n\nnamespace LBFGSpp {\n\n\n///\n/// L-BFGS-B solver for box-constrained numerical optimization\n///\ntemplate < typename Scalar,\n           template<class> class LineSearch = LineSearchMoreThuente >\nclass LBFGSBSolver\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;\n    typedef Eigen::Map<Vector> MapVec;\n    typedef std::vector<int> IndexSet;\n\n    const LBFGSBParam<Scalar>& m_param;  // Parameters to control the LBFGS algorithm\n    BFGSMat<Scalar, true>      m_bfgs;   // Approximation to the Hessian matrix\n    Vector                     m_fx;     // History of the objective function values\n    Vector                     m_xp;     // Old x\n    Vector                     m_grad;   // New gradient\n    Vector                     m_gradp;  // Old gradient\n    Vector                     m_drt;    // Moving direction\n\n    // Reset internal variables\n    // n: dimension of the vector to be optimized\n    inline void reset(int n)\n    {\n        const int m = m_param.m;\n        m_bfgs.reset(n, m);\n        m_xp.resize(n);\n        m_grad.resize(n);\n        m_gradp.resize(n);\n        m_drt.resize(n);\n        if(m_param.past > 0)\n            m_fx.resize(m_param.past);\n    }\n\n    // Project the vector x to the bound constraint set\n    static void force_bounds(Vector& x, const Vector& lb, const Vector& ub)\n    {\n        x.noalias() = x.cwiseMax(lb).cwiseMin(ub);\n    }\n\n    // Norm of the projected gradient\n    // ||P(x-g, l, u) - x||_inf\n    static Scalar proj_grad_norm(const Vector& x, const Vector& g, const Vector& lb, const Vector& ub)\n    {\n        return ((x - g).cwiseMax(lb).cwiseMin(ub) - x).cwiseAbs().maxCoeff();\n    }\n\n    // The maximum step size alpha such that x0 + alpha * d stays within the bounds\n    static Scalar max_step_size(const Vector& x0, const Vector& drt, const Vector& lb, const Vector& ub)\n    {\n        const int n = x0.size();\n        Scalar step = std::numeric_limits<Scalar>::infinity();\n\n        for(int i = 0; i < n; i++)\n        {\n            if(drt[i] > Scalar(0))\n            {\n                step = std::min(step, (ub[i] - x0[i]) / drt[i]);\n            } else if(drt[i] < Scalar(0)) {\n                step = std::min(step, (lb[i] - x0[i]) / drt[i]);\n            }\n        }\n\n        return step;\n    }\n\npublic:\n    ///\n    /// Constructor for the L-BFGS-B solver.\n    ///\n    /// \\param param An object of \\ref LBFGSParam to store parameters for the\n    ///        algorithm\n    ///\n    LBFGSBSolver(const LBFGSBParam<Scalar>& param) :\n        m_param(param)\n    {\n        m_param.check_param();\n    }\n\n    ///\n    /// Minimizing a multivariate function subject to box constraints, using the L-BFGS-B algorithm.\n    /// Exceptions will be thrown if error occurs.\n    ///\n    /// \\param f  A function object such that `f(x, grad)` returns the\n    ///           objective function value at `x`, and overwrites `grad` with\n    ///           the gradient.\n    /// \\param x  In: An initial guess of the optimal point. Out: The best point\n    ///           found.\n    /// \\param fx Out: The objective function value at `x`.\n    /// \\param lb Lower bounds for `x`.\n    /// \\param ub Upper bounds for `x`.\n    ///\n    /// \\return Number of iterations used.\n    ///\n    template <typename Foo>\n    inline int minimize(Foo& f, Vector& x, Scalar& fx, const Vector& lb, const Vector& ub)\n    {\n        using std::abs;\n\n        // Dimension of the vector\n        const int n = x.size();\n        if(lb.size() != n || ub.size() != n)\n            throw std::invalid_argument(\"'lb' and 'ub' must have the same size as 'x'\");\n\n        // Check whether the initial vector is within the bounds\n        // If not, project to the feasible set\n        force_bounds(x, lb, ub);\n\n        // Initialization\n        reset(n);\n\n        // The length of lag for objective function value to test convergence\n        const int fpast = m_param.past;\n\n        // Evaluate function and compute gradient\n        fx = f(x, m_grad);\n        Scalar projgnorm = proj_grad_norm(x, m_grad, lb, ub);\n        if(fpast > 0)\n            m_fx[0] = fx;\n\n        // std::cout << \"x0 = \" << x.transpose() << std::endl;\n        // std::cout << \"f(x0) = \" << fx << \", ||proj_grad|| = \" << projgnorm << std::endl << std::endl;\n\n        // Early exit if the initial x is already a minimizer\n        if(projgnorm <= m_param.epsilon || projgnorm <= m_param.epsilon_rel * x.norm())\n        {\n            return 1;\n        }\n\n        // Compute generalized Cauchy point\n        Vector xcp(n), vecc;\n        IndexSet newact_set, fv_set;\n        Cauchy<Scalar>::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set);\n\n        /* Vector gcp(n);\n        Scalar fcp = f(xcp, gcp);\n        Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub);\n        std::cout << \"xcp = \" << xcp.transpose() << std::endl;\n        std::cout << \"f(xcp) = \" << fcp << \", ||proj_grad|| = \" << projgcpnorm << std::endl << std::endl; */\n\n        // Initial direction\n        m_drt.noalias() = xcp - x;\n        m_drt.normalize();\n        // Tolerance for s'y >= eps * (y'y)\n        const Scalar eps = std::numeric_limits<Scalar>::epsilon();\n        // s and y vectors\n        Vector vecs(n), vecy(n);\n        // Number of iterations used\n        int k = 1;\n        for( ; ; )\n        {\n            // Save the curent x and gradient\n            m_xp.noalias() = x;\n            m_gradp.noalias() = m_grad;\n\n            // Line search to update x, fx and gradient\n            Scalar step_max = max_step_size(x, m_drt, lb, ub);\n            step_max = std::min(m_param.max_step, step_max);\n            Scalar step = Scalar(1);\n            step = std::min(step, step_max);\n            LineSearch<Scalar>::LineSearch(f, fx, x, m_grad, step, step_max, m_drt, m_xp, m_param);\n\n            // New projected gradient norm\n            projgnorm = proj_grad_norm(x, m_grad, lb, ub);\n\n            /* std::cout << \"** Iteration \" << k << std::endl;\n            std::cout << \"   x = \" << x.transpose() << std::endl;\n            std::cout << \"   f(x) = \" << fx << \", ||proj_grad|| = \" << projgnorm << std::endl << std::endl; */\n\n            // Convergence test -- gradient\n            if(projgnorm <= m_param.epsilon || projgnorm <= m_param.epsilon_rel * x.norm())\n            {\n                return k;\n            }\n            // Convergence test -- objective function value\n            if(fpast > 0)\n            {\n                const Scalar fxd = m_fx[k % fpast];\n                if(k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1)))\n                    return k;\n\n                m_fx[k % fpast] = fx;\n            }\n            // Maximum number of iterations\n            if(m_param.max_iterations != 0 && k >= m_param.max_iterations)\n            {\n                return k;\n            }\n\n            // Update s and y\n            // s_{k+1} = x_{k+1} - x_k\n            // y_{k+1} = g_{k+1} - g_k\n            vecs.noalias() = x - m_xp;\n            vecy.noalias() = m_grad - m_gradp;\n            if(vecs.dot(vecy) > eps * vecy.squaredNorm())\n                m_bfgs.add_correction(vecs, vecy);\n\n            force_bounds(x, lb, ub);\n            Cauchy<Scalar>::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set);\n\n            /*Vector gcp(n);\n            Scalar fcp = f(xcp, gcp);\n            Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub);\n            std::cout << \"xcp = \" << xcp.transpose() << std::endl;\n            std::cout << \"f(xcp) = \" << fcp << \", ||proj_grad|| = \" << projgcpnorm << std::endl << std::endl;*/\n\n            SubspaceMin<Scalar>::subspace_minimize(m_bfgs, x, xcp, m_grad, lb, ub,\n                vecc, newact_set, fv_set, m_param.max_submin, m_drt);\n\n            /*Vector gsm(n);\n            Scalar fsm = f(x + m_drt, gsm);\n            Scalar projgsmnorm = proj_grad_norm(x + m_drt, gsm, lb, ub);\n            std::cout << \"xsm = \" << (x + m_drt).transpose() << std::endl;\n            std::cout << \"f(xsm) = \" << fsm << \", ||proj_grad|| = \" << projgsmnorm << std::endl << std::endl;*/\n\n            k++;\n        }\n\n        return k;\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // LBFGSB_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/BFGSMat.h",
    "content": "// Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef BFGS_MAT_H\n#define BFGS_MAT_H\n\n#include <vector>\n#include <Eigen/Core>\n#include \"BKLDLT.h\"\n\n\n/// \\cond\n\nnamespace LBFGSpp {\n\n\n//\n// An *implicit* representation of the BFGS approximation to the Hessian matrix B\n//\n// B = theta * I - W * M * W'\n// H = inv(B)\n//\n// Reference:\n// [1] D. C. Liu and J. Nocedal (1989). On the limited memory BFGS method for large scale optimization.\n// [2] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.\n//\ntemplate <typename Scalar, bool LBFGSB = false>\nclass BFGSMat\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;\n    typedef Eigen::Ref<const Vector> RefConstVec;\n    typedef std::vector<int> IndexSet;\n\n    int    m_m;      // Maximum number of correction vectors\n    Scalar m_theta;  // theta * I is the initial approximation to the Hessian matrix\n    Matrix m_s;      // History of the s vectors\n    Matrix m_y;      // History of the y vectors\n    Vector m_ys;     // History of the s'y values\n    Vector m_alpha;  // Temporary values used in computing H * v\n    int    m_ncorr;  // Number of correction vectors in the history, m_ncorr <= m\n    int    m_ptr;    // A Pointer to locate the most recent history, 1 <= m_ptr <= m\n                     // Details: s and y vectors are stored in cyclic order.\n                     //          For example, if the current s-vector is stored in m_s[, m-1],\n                     //          then in the next iteration m_s[, 0] will be overwritten.\n                     //          m_s[, m_ptr-1] points to the most recent history,\n                     //          and m_s[, m_ptr % m] points to the most distant one.\n\n    //========== The following members are only used in L-BFGS-B algorithm ==========//\n    Matrix                      m_permMinv;     // Permutated M inverse\n    BKLDLT<Scalar>              m_permMsolver;  // Represents the permutated M matrix\n\npublic:\n    // Constructor\n    BFGSMat() {}\n\n    // Reset internal variables\n    // n: dimension of the vector to be optimized\n    // m: maximum number of corrections to approximate the Hessian matrix\n    inline void reset(int n, int m)\n    {\n        m_m = m;\n        m_theta = Scalar(1);\n        m_s.resize(n, m);\n        m_y.resize(n, m);\n        m_ys.resize(m);\n        m_alpha.resize(m);\n        m_ncorr = 0;\n        m_ptr = m;  // This makes sure that m_ptr % m == 0 in the first step\n\n        if(LBFGSB)\n        {\n            m_permMinv.resize(2 * m, 2 * m);\n            m_permMinv.setZero();\n            m_permMinv.diagonal().setOnes();\n        }\n    }\n\n    // Add correction vectors to the BFGS matrix\n    inline void add_correction(const RefConstVec& s, const RefConstVec& y)\n    {\n        const int loc = m_ptr % m_m;\n\n        m_s.col(loc).noalias() = s;\n        m_y.col(loc).noalias() = y;\n\n        // ys = y's = 1/rho\n        const Scalar ys = m_s.col(loc).dot(m_y.col(loc));\n        m_ys[loc] = ys;\n\n        m_theta = m_y.col(loc).squaredNorm() / ys;\n\n        if(m_ncorr < m_m)\n            m_ncorr++;\n\n        m_ptr = loc + 1;\n\n        if(LBFGSB)\n        {\n            // Minv = [-D         L']\n            //        [ L  theta*S'S]\n\n            // Copy -D\n            // Let S=[s[0], ..., s[m-1]], Y=[y[0], ..., y[m-1]]\n            // D = [s[0]'y[0], ..., s[m-1]'y[m-1]]\n            m_permMinv(loc, loc) = -ys;\n\n            // Update S'S\n            // We only store S'S in Minv, and multiply theta when LU decomposition is performed\n            Vector Ss = m_s.leftCols(m_ncorr).transpose() * m_s.col(loc);\n            m_permMinv.block(m_m + loc, m_m, 1, m_ncorr).noalias() = Ss.transpose();\n            m_permMinv.block(m_m, m_m + loc, m_ncorr, 1).noalias() = Ss;\n\n            // Compute L\n            // L = [          0                                     ]\n            //     [  s[1]'y[0]             0                       ]\n            //     [  s[2]'y[0]     s[2]'y[1]                       ]\n            //     ...\n            //     [s[m-1]'y[0] ... ... ... ... ... s[m-1]'y[m-2]  0]\n            //\n            // L_next = [        0                                   ]\n            //          [s[2]'y[1]             0                     ]\n            //          [s[3]'y[1]     s[3]'y[2]                     ]\n            //          ...\n            //          [s[m]'y[1] ... ... ... ... ... s[m]'y[m-1]  0]\n            const int len = m_ncorr - 1;\n            // First zero out the column of oldest y\n            if(m_ncorr >= m_m)\n                m_permMinv.block(m_m, loc, m_m, 1).setZero();\n            // Compute the row associated with new s\n            // The current row is loc\n            // End with column (loc + m - 1) % m\n            // Length is len\n            int yloc = (loc + m_m - 1) % m_m;\n            for(int i = 0; i < len; i++)\n            {\n                m_permMinv(m_m + loc, yloc) = m_s.col(loc).dot(m_y.col(yloc));\n                yloc = (yloc + m_m - 1) % m_m;\n            }\n\n            // Matrix LDLT factorization\n            m_permMinv.block(m_m, m_m, m_m, m_m) *= m_theta;\n            m_permMsolver.compute(m_permMinv);\n            m_permMinv.block(m_m, m_m, m_m, m_m) /= m_theta;\n        }\n    }\n\n    // Recursive formula to compute a * H * v, where a is a scalar, and v is [n x 1]\n    // H0 = (1/theta) * I is the initial approximation to H\n    // Algorithm 7.4 of Nocedal, J., & Wright, S. (2006). Numerical optimization.\n    inline void apply_Hv(const Vector& v, const Scalar& a, Vector& res)\n    {\n        res.resize(v.size());\n\n        // L-BFGS two-loop recursion\n\n        // Loop 1\n        res.noalias() = a * v;\n        int j = m_ptr % m_m;\n        for(int i = 0; i < m_ncorr; i++)\n        {\n            j = (j + m_m - 1) % m_m;\n            m_alpha[j] = m_s.col(j).dot(res) / m_ys[j];\n            res.noalias() -= m_alpha[j] * m_y.col(j);\n        }\n\n        // Apply initial H0\n        res /= m_theta;\n\n        // Loop 2\n        for(int i = 0; i < m_ncorr; i++)\n        {\n            const Scalar beta = m_y.col(j).dot(res) / m_ys[j];\n            res.noalias() += (m_alpha[j] - beta) * m_s.col(j);\n            j = (j + 1) % m_m;\n        }\n    }\n\n    //========== The following functions are only used in L-BFGS-B algorithm ==========//\n\n    // Return the value of theta\n    inline Scalar theta() const { return m_theta; }\n\n    // Return current number of correction vectors\n    inline int num_corrections() const { return m_ncorr; }\n\n    // W = [Y, theta * S]\n    // W [n x (2*ncorr)], v [n x 1], res [(2*ncorr) x 1]\n    // res preserves the ordering of Y and S columns\n    inline void apply_Wtv(const Vector& v, Vector& res) const\n    {\n        res.resize(2 * m_ncorr);\n        res.head(m_ncorr).noalias() = m_y.leftCols(m_ncorr).transpose() * v;\n        res.tail(m_ncorr).noalias() = m_theta * m_s.leftCols(m_ncorr).transpose() * v;\n    }\n\n    // The b-th row of the W matrix\n    // Preserves the ordering of Y and S columns\n    // Return as a column vector\n    inline Vector Wb(int b) const\n    {\n        Vector res(2 * m_ncorr);\n        for(int j = 0; j < m_ncorr; j++)\n        {\n            res[j] = m_y(b, j);\n            res[m_ncorr + j] = m_s(b, j);\n        }\n        res.tail(m_ncorr) *= m_theta;\n        return res;\n    }\n\n    // Extract rows of W\n    inline Matrix Wb(const IndexSet& b) const\n    {\n        const int nb = b.size();\n        const int* bptr = b.data();\n        Matrix res(nb, 2 * m_ncorr);\n\n        for(int j = 0; j < m_ncorr; j++)\n        {\n            const Scalar* Yptr = &m_y(0, j);\n            const Scalar* Sptr = &m_s(0, j);\n            Scalar* resYptr = res.data() + j * nb;\n            Scalar* resSptr = resYptr + m_ncorr * nb;\n            for(int i = 0; i < nb; i++)\n            {\n                const int row = bptr[i];\n                resYptr[i] = Yptr[row];\n                resSptr[i] = Sptr[row];\n            }\n        }\n        return res;\n    }\n\n    // M is [(2*ncorr) x (2*ncorr)], v is [(2*ncorr) x 1]\n    inline void apply_Mv(const Vector& v, Vector& res) const\n    {\n        res.resize(2 * m_ncorr);\n        if(m_ncorr < 1)\n            return;\n\n        Vector vpadding = Vector::Zero(2 * m_m);\n        vpadding.head(m_ncorr).noalias() = v.head(m_ncorr);\n        vpadding.segment(m_m, m_ncorr).noalias() = v.tail(m_ncorr);\n\n        // Solve linear equation\n        m_permMsolver.solve_inplace(vpadding);\n\n        res.head(m_ncorr).noalias() = vpadding.head(m_ncorr);\n        res.tail(m_ncorr).noalias() = vpadding.segment(m_m, m_ncorr);\n    }\n\n    // Compute W'Pv\n    // W [n x (2*ncorr)], v [nP x 1], res [(2*ncorr) x 1]\n    // res preserves the ordering of Y and S columns\n    // Returns false if the result is known to be zero\n    inline bool apply_WtPv(const IndexSet& P_set, const Vector& v, Vector& res, bool test_zero = false) const\n    {\n        const int* Pptr = P_set.data();\n        const Scalar* vptr = v.data();\n        int nP = P_set.size();\n\n        // Remove zeros in v to save computation\n        IndexSet P_reduced;\n        std::vector<Scalar> v_reduced;\n        if(test_zero)\n        {\n            P_reduced.reserve(nP);\n            for(int i = 0; i < nP; i++)\n            {\n                if(vptr[i] != Scalar(0))\n                {\n                    P_reduced.push_back(Pptr[i]);\n                    v_reduced.push_back(vptr[i]);\n                }\n            }\n            Pptr = P_reduced.data();\n            vptr = v_reduced.data();\n            nP = P_reduced.size();\n        }\n\n        res.resize(2 * m_ncorr);\n        if(m_ncorr < 1 || nP < 1)\n        {\n            res.setZero();\n            return false;\n        }\n\n        for(int j = 0; j < m_ncorr; j++)\n        {\n            Scalar resy = Scalar(0), ress = Scalar(0);\n            const Scalar* yptr = &m_y(0, j);\n            const Scalar* sptr = &m_s(0, j);\n            for(int i = 0; i < nP; i++)\n            {\n                const int row = Pptr[i];\n                resy += yptr[row] * vptr[i];\n                ress += sptr[row] * vptr[i];\n            }\n            res[j] = resy;\n            res[m_ncorr + j] = ress;\n        }\n        res.tail(m_ncorr) *= m_theta;\n        return true;\n    }\n\n    // Compute s * P'WMv\n    // Assume that v[2*ncorr x 1] has the same ordering (permutation) as W and M\n    // Returns false if the result is known to be zero\n    inline bool apply_PtWMv(const IndexSet& P_set, const Vector& v, Vector& res, const Scalar& scale) const\n    {\n        const int nP = P_set.size();\n        res.resize(nP);\n        res.setZero();\n        if(m_ncorr < 1 || nP < 1)\n            return false;\n\n        Vector Mv;\n        apply_Mv(v, Mv);\n        // WP * Mv\n        Mv.tail(m_ncorr) *= m_theta;\n        for(int j = 0; j < m_ncorr; j++)\n        {\n            const Scalar* yptr = &m_y(0, j);\n            const Scalar* sptr = &m_s(0, j);\n            const Scalar Mvy = Mv[j], Mvs = Mv[m_ncorr + j];\n            for(int i = 0; i < nP; i++)\n            {\n                const int row = P_set[i];\n                res[i] += Mvy * yptr[row] + Mvs * sptr[row];\n            }\n        }\n        res *= scale;\n        return true;\n    }\n    // If the P'W matrix has been explicitly formed, do a direct matrix multiplication\n    inline bool apply_PtWMv(const Matrix& WP, const Vector& v, Vector& res, const Scalar& scale) const\n    {\n        const int nP = WP.rows();\n        res.resize(nP);\n        if(m_ncorr < 1 || nP < 1)\n        {\n            res.setZero();\n            return false;\n        }\n\n        Vector Mv;\n        apply_Mv(v, Mv);\n        // WP * Mv\n        Mv.tail(m_ncorr) *= m_theta;\n        res.noalias() = scale * (WP * Mv);\n        return true;\n    }\n\n    // Compute F'BAb = -(F'W)M(W'AA'd)\n    // W'd is known, and AA'+FF'=I, so W'AA'd = W'd - W'FF'd\n    // Usually d contains many zeros, so we fist compute number of nonzero elements in A set and F set,\n    // denoted as nnz_act and nnz_fv, respectively\n    // If nnz_act is smaller, compute W'AA'd = WA' (A'd) directly\n    // If nnz_fv is smaller, compute W'AA'd = W'd - WF' * (F'd)\n    inline void compute_FtBAb(\n        const Matrix& WF, const IndexSet& fv_set, const IndexSet& newact_set, const Vector& Wd, const Vector& drt,\n        Vector& res\n    ) const\n    {\n        const int nact = newact_set.size();\n        const int nfree = WF.rows();\n        res.resize(nfree);\n        if(m_ncorr < 1 || nact < 1 || nfree < 1)\n        {\n            res.setZero();\n            return;\n        }\n\n        // W'AA'd\n        Vector rhs(2 * m_ncorr);\n        if(nact <= nfree)\n        {\n            // Construct A'd\n            Vector Ad(nfree);\n            for(int i = 0; i < nact; i++)\n                Ad[i] = drt[newact_set[i]];\n            apply_WtPv(newact_set, Ad, rhs);\n        } else {\n            // Construct F'd\n            Vector Fd(nfree);\n            for(int i = 0; i < nfree; i++)\n                Fd[i] = drt[fv_set[i]];\n            // Compute W'AA'd = W'd - WF' * (F'd)\n            rhs.noalias() = WF.transpose() * Fd;\n            rhs.tail(m_ncorr) *= m_theta;\n            rhs.noalias() = Wd - rhs;\n        }\n\n        apply_PtWMv(WF, rhs, res, Scalar(-1));\n    }\n\n    // Compute inv(P'BP) * v\n    // P represents an index set\n    // inv(P'BP) * v = v / theta + WP * inv(inv(M) - WP' * WP / theta) * WP' * v / theta^2\n    //\n    // v is [nP x 1]\n    inline void solve_PtBP(const Matrix& WP, const Vector& v, Vector& res) const\n    {\n        const int nP = WP.rows();\n        res.resize(nP);\n        if(m_ncorr < 1 || nP < 1)\n        {\n            res.noalias() = v / m_theta;\n            return;\n        }\n\n        // Compute the matrix in the middle (only the lower triangular part is needed)\n        // Remember that W = [Y, theta * S], but we do not store theta in WP\n        Matrix mid(2 * m_ncorr, 2 * m_ncorr);\n        // [0:(ncorr - 1), 0:(ncorr - 1)]\n        for(int j = 0; j < m_ncorr; j++)\n        {\n            mid.col(j).segment(j, m_ncorr - j).noalias() = m_permMinv.col(j).segment(j, m_ncorr - j) -\n                WP.block(0, j, nP, m_ncorr - j).transpose() * WP.col(j) / m_theta;\n        }\n        // [ncorr:(2 * ncorr - 1), 0:(ncorr - 1)]\n        mid.block(m_ncorr, 0, m_ncorr, m_ncorr).noalias() = m_permMinv.block(m_m, 0, m_ncorr, m_ncorr) -\n            WP.rightCols(m_ncorr).transpose() * WP.leftCols(m_ncorr);\n        // [ncorr:(2 * ncorr - 1), ncorr:(2 * ncorr - 1)]\n        for(int j = 0; j < m_ncorr; j++)\n        {\n            mid.col(m_ncorr + j).segment(m_ncorr + j, m_ncorr - j).noalias() = m_theta *\n                (m_permMinv.col(m_m + j).segment(m_m + j, m_ncorr - j) - WP.rightCols(m_ncorr - j).transpose() * WP.col(m_ncorr + j));\n        }\n        // Factorization\n        BKLDLT<Scalar> midsolver(mid);\n        // Compute the final result\n        Vector WPv = WP.transpose() * v;\n        WPv.tail(m_ncorr) *= m_theta;\n        midsolver.solve_inplace(WPv);\n        WPv.tail(m_ncorr) *= m_theta;\n        res.noalias() = v / m_theta + (WP * WPv) / (m_theta * m_theta);\n    }\n\n    // Compute P'BQv, where P and Q are two mutually exclusive index selection operators\n    // P'BQv = -WP * M * WQ' * v\n    // Returns false if the result is known to be zero\n    inline bool apply_PtBQv(const Matrix& WP, const IndexSet& Q_set, const Vector& v, Vector& res, bool test_zero = false) const\n    {\n        const int nP = WP.rows();\n        const int nQ = Q_set.size();\n        res.resize(nP);\n        if(m_ncorr < 1 || nP < 1 || nQ < 1)\n        {\n            res.setZero();\n            return false;\n        }\n\n        Vector WQtv;\n        bool nonzero = apply_WtPv(Q_set, v, WQtv, test_zero);\n        if(!nonzero)\n        {\n            res.setZero();\n            return false;\n        }\n\n        Vector MWQtv;\n        apply_Mv(WQtv, MWQtv);\n        MWQtv.tail(m_ncorr) *= m_theta;\n        res.noalias() = -WP * MWQtv;\n        return true;\n    }\n    // If the Q'W matrix has been explicitly formed, do a direct matrix multiplication\n    inline bool apply_PtBQv(const Matrix& WP, const Matrix& WQ, const Vector& v, Vector& res) const\n    {\n        const int nP = WP.rows();\n        const int nQ = WQ.rows();\n        res.resize(nP);\n        if(m_ncorr < 1 || nP < 1 || nQ < 1)\n        {\n            res.setZero();\n            return false;\n        }\n\n        // Remember that W = [Y, theta * S], so we need to multiply theta to the second half\n        Vector WQtv = WQ.transpose() * v;\n        WQtv.tail(m_ncorr) *= m_theta;\n        Vector MWQtv;\n        apply_Mv(WQtv, MWQtv);\n        MWQtv.tail(m_ncorr) *= m_theta;\n        res.noalias() = -WP * MWQtv;\n        return true;\n    }\n};\n\n\n} // namespace LBFGSpp\n\n/// \\endcond\n\n#endif // BFGS_MAT_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/BKLDLT.h",
    "content": "// Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef BK_LDLT_H\n#define BK_LDLT_H\n\n#include <vector>\n#include <stdexcept>\n#include <Eigen/Core>\n\n\n/// \\cond\n\nnamespace LBFGSpp {\n\n\nenum COMPUTATION_INFO\n{\n    SUCCESSFUL = 0,\n    NOT_COMPUTED,\n    NUMERICAL_ISSUE\n};\n\n\n// Bunch-Kaufman LDLT decomposition\n// References:\n// 1. Bunch, J. R., & Kaufman, L. (1977). Some stable methods for calculating inertia and solving symmetric linear systems.\n//    Mathematics of computation, 31(137), 163-179.\n// 2. Golub, G. H., & Van Loan, C. F. (2012). Matrix computations (Vol. 3). JHU press. Section 4.4.\n// 3. Bunch-Parlett diagonal pivoting <http://oz.nthu.edu.tw/~d947207/Chap13_GE3.ppt>\n// 4. Ashcraft, C., Grimes, R. G., & Lewis, J. G. (1998). Accurate symmetric indefinite linear equation solvers.\n//    SIAM Journal on Matrix Analysis and Applications, 20(2), 513-561.\ntemplate <typename Scalar = double>\nclass BKLDLT\n{\nprivate:\n    typedef Eigen::Index Index;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef Eigen::Map<Vector> MapVec;\n    typedef Eigen::Map<const Vector> MapConstVec;\n\n    typedef Eigen::Matrix<Index, Eigen::Dynamic, 1> IntVector;\n    typedef Eigen::Ref<Vector> GenericVector;\n    typedef Eigen::Ref<Matrix> GenericMatrix;\n    typedef const Eigen::Ref<const Matrix> ConstGenericMatrix;\n    typedef const Eigen::Ref<const Vector> ConstGenericVector;\n\n    Index m_n;\n    Vector m_data;                  // storage for a lower-triangular matrix\n    std::vector<Scalar*> m_colptr;  // pointers to columns\n    IntVector m_perm;               // [-2, -1, 3, 1, 4, 5]: 0 <-> 2, 1 <-> 1, 2 <-> 3, 3 <-> 1, 4 <-> 4, 5 <-> 5\n    std::vector< std::pair<Index, Index> > m_permc;  // compressed version of m_perm: [(0, 2), (2, 3), (3, 1)]\n\n    bool m_computed;\n    int  m_info;\n\n    // Access to elements\n    // Pointer to the k-th column\n    Scalar* col_pointer(Index k) { return m_colptr[k]; }\n    // A[i, j] -> m_colptr[j][i - j], i >= j\n    Scalar& coeff(Index i, Index j) { return m_colptr[j][i - j]; }\n    const Scalar& coeff(Index i, Index j) const { return m_colptr[j][i - j]; }\n    // A[i, i] -> m_colptr[i][0]\n    Scalar& diag_coeff(Index i) { return m_colptr[i][0]; }\n    const Scalar& diag_coeff(Index i) const { return m_colptr[i][0]; }\n\n    // Compute column pointers\n    void compute_pointer()\n    {\n        m_colptr.clear();\n        m_colptr.reserve(m_n);\n        Scalar* head = m_data.data();\n\n        for(Index i = 0; i < m_n; i++)\n        {\n            m_colptr.push_back(head);\n            head += (m_n - i);\n        }\n    }\n\n    // Copy mat - shift * I to m_data\n    void copy_data(ConstGenericMatrix& mat, int uplo, const Scalar& shift)\n    {\n        if(uplo == Eigen::Lower)\n        {\n            for(Index j = 0; j < m_n; j++)\n            {\n                const Scalar* begin = &mat.coeffRef(j, j);\n                const Index len = m_n - j;\n                std::copy(begin, begin + len, col_pointer(j));\n                diag_coeff(j) -= shift;\n            }\n        } else {\n            Scalar* dest = m_data.data();\n            for(Index i = 0; i < m_n; i++)\n            {\n                for(Index j = i; j < m_n; j++, dest++)\n                {\n                    *dest = mat.coeff(i, j);\n                }\n                diag_coeff(i) -= shift;\n            }\n        }\n    }\n\n    // Compute compressed permutations\n    void compress_permutation()\n    {\n        for(Index i = 0; i < m_n; i++)\n        {\n            // Recover the permutation action\n            const Index perm = (m_perm[i] >= 0) ? (m_perm[i]) : (-m_perm[i] - 1);\n            if(perm != i)\n                m_permc.push_back(std::make_pair(i, perm));\n        }\n    }\n\n    // Working on the A[k:end, k:end] submatrix\n    // Exchange k <-> r\n    // Assume r >= k\n    void pivoting_1x1(Index k, Index r)\n    {\n        // No permutation\n        if(k == r)\n        {\n            m_perm[k] = r;\n            return;\n        }\n\n        // A[k, k] <-> A[r, r]\n        std::swap(diag_coeff(k), diag_coeff(r));\n\n        // A[(r+1):end, k] <-> A[(r+1):end, r]\n        std::swap_ranges(&coeff(r + 1, k), col_pointer(k + 1), &coeff(r + 1, r));\n\n        // A[(k+1):(r-1), k] <-> A[r, (k+1):(r-1)]\n        Scalar* src = &coeff(k + 1, k);\n        for(Index j = k + 1; j < r; j++, src++)\n        {\n            std::swap(*src, coeff(r, j));\n        }\n\n        m_perm[k] = r;\n    }\n\n    // Working on the A[k:end, k:end] submatrix\n    // Exchange [k+1, k] <-> [r, p]\n    // Assume p >= k, r >= k+1\n    void pivoting_2x2(Index k, Index r, Index p)\n    {\n        pivoting_1x1(k, p);\n        pivoting_1x1(k + 1, r);\n\n        // A[k+1, k] <-> A[r, k]\n        std::swap(coeff(k + 1, k), coeff(r, k));\n\n        // Use negative signs to indicate a 2x2 block\n        // Also minus one to distinguish a negative zero from a positive zero\n        m_perm[k] = -m_perm[k] - 1;\n        m_perm[k + 1] = -m_perm[k + 1] - 1;\n    }\n\n    // A[r1, c1:c2] <-> A[r2, c1:c2]\n    // Assume r2 >= r1 > c2 >= c1\n    void interchange_rows(Index r1, Index r2, Index c1, Index c2)\n    {\n        if(r1 == r2)\n            return;\n\n        for(Index j = c1; j <= c2; j++)\n        {\n            std::swap(coeff(r1, j), coeff(r2, j));\n        }\n    }\n\n    // lambda = |A[r, k]| = max{|A[k+1, k]|, ..., |A[end, k]|}\n    // Largest (in magnitude) off-diagonal element in the first column of the current reduced matrix\n    // r is the row index\n    // Assume k < end\n    Scalar find_lambda(Index k, Index& r)\n    {\n        using std::abs;\n\n        const Scalar* head = col_pointer(k);  // => A[k, k]\n        const Scalar* end = col_pointer(k + 1);\n        // Start with r=k+1, lambda=A[k+1, k]\n        r = k + 1;\n        Scalar lambda = abs(head[1]);\n        // Scan remaining elements\n        for(const Scalar* ptr = head + 2; ptr < end; ptr++)\n        {\n            const Scalar abs_elem = abs(*ptr);\n            if(lambda < abs_elem)\n            {\n                lambda = abs_elem;\n                r = k + (ptr - head);\n            }\n        }\n\n        return lambda;\n    }\n\n    // sigma = |A[p, r]| = max {|A[k, r]|, ..., |A[end, r]|} \\ {A[r, r]}\n    // Largest (in magnitude) off-diagonal element in the r-th column of the current reduced matrix\n    // p is the row index\n    // Assume k < r < end\n    Scalar find_sigma(Index k, Index r, Index& p)\n    {\n        using std::abs;\n\n        // First search A[r+1, r], ...,  A[end, r], which has the same task as find_lambda()\n        // If r == end, we skip this search\n        Scalar sigma = Scalar(-1);\n        if(r < m_n - 1)\n            sigma = find_lambda(r, p);\n\n        // Then search A[k, r], ..., A[r-1, r], which maps to A[r, k], ..., A[r, r-1]\n        for(Index j = k; j < r; j++)\n        {\n            const Scalar abs_elem = abs(coeff(r, j));\n            if(sigma < abs_elem)\n            {\n                sigma = abs_elem;\n                p = j;\n            }\n        }\n\n        return sigma;\n    }\n\n    // Generate permutations and apply to A\n    // Return true if the resulting pivoting is 1x1, and false if 2x2\n    bool permutate_mat(Index k, const Scalar& alpha)\n    {\n        using std::abs;\n\n        Index r = k, p = k;\n        const Scalar lambda = find_lambda(k, r);\n\n        // If lambda=0, no need to interchange\n        if(lambda > Scalar(0))\n        {\n            const Scalar abs_akk = abs(diag_coeff(k));\n            // If |A[k, k]| >= alpha * lambda, no need to interchange\n            if(abs_akk < alpha * lambda)\n            {\n                const Scalar sigma = find_sigma(k, r, p);\n\n                // If sigma * |A[k, k]| >= alpha * lambda^2, no need to interchange\n                if(sigma * abs_akk < alpha * lambda * lambda)\n                {\n                    if(abs_akk >= alpha * sigma)\n                    {\n                        // Permutation on A\n                        pivoting_1x1(k, r);\n\n                        // Permutation on L\n                        interchange_rows(k, r, 0, k - 1);\n                        return true;\n                    } else {\n                        // There are two versions of permutation here\n                        // 1. A[k+1, k] <-> A[r, k]\n                        // 2. A[k+1, k] <-> A[r, p], where p >= k and r >= k+1\n                        //\n                        // Version 1 and 2 are used by Ref[1] and Ref[2], respectively\n\n                        // Version 1 implementation\n                        p = k;\n\n                        // Version 2 implementation\n                        // [r, p] and [p, r] are symmetric, but we need to make sure\n                        // p >= k and r >= k+1, so it is safe to always make r > p\n                        // One exception is when min{r,p} == k+1, in which case we make\n                        // r = k+1, so that only one permutation needs to be performed\n                        /* const Index rp_min = std::min(r, p);\n                        const Index rp_max = std::max(r, p);\n                        if(rp_min == k + 1)\n                        {\n                            r = rp_min; p = rp_max;\n                        } else {\n                            r = rp_max; p = rp_min;\n                        } */\n\n                        // Right now we use Version 1 since it reduces the overhead of interchange\n\n                        // Permutation on A\n                        pivoting_2x2(k, r, p);\n                        // Permutation on L\n                        interchange_rows(k, p, 0, k - 1);\n                        interchange_rows(k + 1, r, 0, k - 1);\n                        return false;\n                    }\n                }\n            }\n        }\n\n        return true;\n    }\n\n    // E = [e11, e12]\n    //     [e21, e22]\n    // Overwrite E with inv(E)\n    void inverse_inplace_2x2(Scalar& e11, Scalar& e21, Scalar& e22) const\n    {\n        // inv(E) = [d11, d12], d11 = e22/delta, d21 = -e21/delta, d22 = e11/delta\n        //          [d21, d22]\n        const Scalar delta = e11 * e22 - e21 * e21;\n        std::swap(e11, e22);\n        e11 /= delta;\n        e22 /= delta;\n        e21 = -e21 / delta;\n    }\n\n    // Return value is the status, SUCCESSFUL/NUMERICAL_ISSUE\n    int gaussian_elimination_1x1(Index k)\n    {\n        // D = 1 / A[k, k]\n        const Scalar akk = diag_coeff(k);\n        // Return NUMERICAL_ISSUE if not invertible\n        if(akk == Scalar(0))\n            return NUMERICAL_ISSUE;\n\n        diag_coeff(k) = Scalar(1) / akk;\n\n        // B -= l * l' / A[k, k], B := A[(k+1):end, (k+1):end], l := L[(k+1):end, k]\n        Scalar* lptr = col_pointer(k) + 1;\n        const Index ldim = m_n - k - 1;\n        MapVec l(lptr, ldim);\n        for(Index j = 0; j < ldim; j++)\n        {\n            MapVec(col_pointer(j + k + 1), ldim - j).noalias() -= (lptr[j] / akk) * l.tail(ldim - j);\n        }\n\n        // l /= A[k, k]\n        l /= akk;\n\n        return SUCCESSFUL;\n    }\n\n    // Return value is the status, SUCCESSFUL/NUMERICAL_ISSUE\n    int gaussian_elimination_2x2(Index k)\n    {\n        // D = inv(E)\n        Scalar& e11 = diag_coeff(k);\n        Scalar& e21 = coeff(k + 1, k);\n        Scalar& e22 = diag_coeff(k + 1);\n        // Return NUMERICAL_ISSUE if not invertible\n        if(e11 * e22 - e21 * e21 == Scalar(0))\n            return NUMERICAL_ISSUE;\n\n        inverse_inplace_2x2(e11, e21, e22);\n\n        // X = l * inv(E), l := L[(k+2):end, k:(k+1)]\n        Scalar* l1ptr = &coeff(k + 2, k);\n        Scalar* l2ptr = &coeff(k + 2, k + 1);\n        const Index ldim = m_n - k - 2;\n        MapVec l1(l1ptr, ldim), l2(l2ptr, ldim);\n\n        Eigen::Matrix<Scalar, Eigen::Dynamic, 2> X(ldim, 2);\n        X.col(0).noalias() = l1 * e11 + l2 * e21;\n        X.col(1).noalias() = l1 * e21 + l2 * e22;\n\n        // B -= l * inv(E) * l' = X * l', B = A[(k+2):end, (k+2):end]\n        for(Index j = 0; j < ldim; j++)\n        {\n            MapVec(col_pointer(j + k + 2), ldim - j).noalias() -= (X.col(0).tail(ldim - j) * l1ptr[j] + X.col(1).tail(ldim - j) * l2ptr[j]);\n        }\n\n        // l = X\n        l1.noalias() = X.col(0);\n        l2.noalias() = X.col(1);\n\n        return SUCCESSFUL;\n    }\n\npublic:\n    BKLDLT() :\n        m_n(0), m_computed(false), m_info(NOT_COMPUTED)\n    {}\n\n    // Factorize mat - shift * I\n    BKLDLT(ConstGenericMatrix& mat, int uplo = Eigen::Lower, const Scalar& shift = Scalar(0)) :\n        m_n(mat.rows()), m_computed(false), m_info(NOT_COMPUTED)\n    {\n        compute(mat, uplo, shift);\n    }\n\n    void compute(ConstGenericMatrix& mat, int uplo = Eigen::Lower, const Scalar& shift = Scalar(0))\n    {\n        using std::abs;\n\n        m_n = mat.rows();\n        if(m_n != mat.cols())\n            throw std::invalid_argument(\"BKLDLT: matrix must be square\");\n\n        m_perm.setLinSpaced(m_n, 0, m_n - 1);\n        m_permc.clear();\n\n        // Copy data\n        m_data.resize((m_n * (m_n + 1)) / 2);\n        compute_pointer();\n        copy_data(mat, uplo, shift);\n\n        const Scalar alpha = (1.0 + std::sqrt(17.0)) / 8.0;\n        Index k = 0;\n        for(k = 0; k < m_n - 1; k++)\n        {\n            // 1. Interchange rows and columns of A, and save the result to m_perm\n            bool is_1x1 = permutate_mat(k, alpha);\n\n            // 2. Gaussian elimination\n            if(is_1x1)\n            {\n                m_info = gaussian_elimination_1x1(k);\n            } else {\n                m_info = gaussian_elimination_2x2(k);\n                k++;\n            }\n\n            // 3. Check status\n            if(m_info != SUCCESSFUL)\n                break;\n        }\n        // Invert the last 1x1 block if it exists\n        if(k == m_n - 1)\n        {\n            const Scalar akk = diag_coeff(k);\n            if(akk == Scalar(0))\n                m_info = NUMERICAL_ISSUE;\n\n            diag_coeff(k) = Scalar(1) / diag_coeff(k);\n        }\n\n        compress_permutation();\n\n        m_computed = true;\n    }\n\n    // Solve Ax=b\n    void solve_inplace(GenericVector b) const\n    {\n        if(!m_computed)\n            throw std::logic_error(\"BKLDLT: need to call compute() first\");\n\n        // PAP' = LDL'\n        // 1. b -> Pb\n        Scalar* x = b.data();\n        MapVec res(x, m_n);\n        Index npermc = m_permc.size();\n        for(Index i = 0; i < npermc; i++)\n        {\n            std::swap(x[m_permc[i].first], x[m_permc[i].second]);\n        }\n\n        // 2. Lz = Pb\n        // If m_perm[end] < 0, then end with m_n - 3, otherwise end with m_n - 2\n        const Index end = (m_perm[m_n - 1] < 0) ? (m_n - 3) : (m_n - 2);\n        for(Index i = 0; i <= end; i++)\n        {\n            const Index b1size = m_n - i - 1;\n            const Index b2size = b1size - 1;\n            if(m_perm[i] >= 0)\n            {\n                MapConstVec l(&coeff(i + 1, i), b1size);\n                res.segment(i + 1, b1size).noalias() -= l * x[i];\n            } else {\n                MapConstVec l1(&coeff(i + 2, i), b2size);\n                MapConstVec l2(&coeff(i + 2, i + 1), b2size);\n                res.segment(i + 2, b2size).noalias() -= (l1 * x[i] + l2 * x[i + 1]);\n                i++;\n            }\n        }\n\n        // 3. Dw = z\n        for(Index i = 0; i < m_n; i++)\n        {\n            const Scalar e11 = diag_coeff(i);\n            if(m_perm[i] >= 0)\n            {\n                x[i] *= e11;\n            } else {\n                const Scalar e21 = coeff(i + 1, i), e22 = diag_coeff(i + 1);\n                const Scalar wi = x[i] * e11 + x[i + 1] * e21;\n                x[i + 1] = x[i] * e21 + x[i + 1] * e22;\n                x[i] = wi;\n                i++;\n            }\n        }\n\n        // 4. L'y = w\n        // If m_perm[end] < 0, then start with m_n - 3, otherwise start with m_n - 2\n        Index i = (m_perm[m_n - 1] < 0) ? (m_n - 3) : (m_n - 2);\n        for(; i >= 0; i--)\n        {\n            const Index ldim = m_n - i - 1;\n            MapConstVec l(&coeff(i + 1, i), ldim);\n            x[i] -= res.segment(i + 1, ldim).dot(l);\n\n            if(m_perm[i] < 0)\n            {\n                MapConstVec l2(&coeff(i + 1, i - 1), ldim);\n                x[i - 1] -= res.segment(i + 1, ldim).dot(l2);\n                i--;\n            }\n        }\n\n        // 5. x = P'y\n        for(Index i = npermc - 1; i >= 0; i--)\n        {\n            std::swap(x[m_permc[i].first], x[m_permc[i].second]);\n        }\n    }\n\n    Vector solve(ConstGenericVector& b) const\n    {\n        Vector res = b;\n        solve_inplace(res);\n        return res;\n    }\n\n    int info() const { return m_info; }\n};\n\n\n} // namespace LBFGSpp\n\n/// \\endcond\n\n#endif // BK_LDLT_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/Cauchy.h",
    "content": "// Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef CAUCHY_H\n#define CAUCHY_H\n\n#include <vector>\n#include <Eigen/Core>\n#include \"BFGSMat.h\"\n\n\n/// \\cond\n\nnamespace LBFGSpp {\n\n\n//\n// Class to compute the generalized Cauchy point (GCP) for the L-BFGS-B algorithm,\n// mainly for internal use.\n//\n// The target of the GCP procedure is to find a step size t such that\n// x(t) = x0 - t * g is a local minimum of the quadratic function m(x),\n// where m(x) is a local approximation to the objective function.\n//\n// First determine a sequence of break points t0=0, t1, t2, ..., tn.\n// On each interval [t[i-1], t[i]], x is changing linearly.\n// After passing a break point, one or more coordinates of x will be fixed at the bounds.\n// We search the first local minimum of m(x) by examining the intervals [t[i-1], t[i]] sequentially.\n//\n// Reference:\n// [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.\n//\ntemplate <typename Scalar>\nclass ArgSort\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef std::vector<int> IndexSet;\n\n    const Scalar* values;\n\npublic:\n    ArgSort(const Vector& value_vec) :\n        values(value_vec.data())\n    {}\n\n    inline bool operator()(int key1, int key2) { return values[key1] < values[key2]; }\n    inline void sort_key(IndexSet& key_vec) const\n    {\n        std::sort(key_vec.begin(), key_vec.end(), *this);\n    }\n};\n\ntemplate <typename Scalar>\nclass Cauchy\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef Eigen::Matrix<int, Eigen::Dynamic, 1> IntVector;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;\n    typedef std::vector<int> IndexSet;\n\n    // Find the smallest index i such that brk[ord[i]] > t, assuming brk[ord] is already sorted.\n    // If the return value equals n, then all values are <= t.\n    static int search_greater(const Vector& brk, const IndexSet& ord, const Scalar& t, int start = 0)\n    {\n        const int nord = ord.size();\n        int i;\n        for(i = start; i < nord; i++)\n        {\n            if(brk[ord[i]] > t)\n                break;\n        }\n\n        return i;\n    }\n\npublic:\n    // bfgs:       An object that represents the BFGS approximation matrix.\n    // x0:         Current parameter vector.\n    // g:          Gradient at x0.\n    // lb:         Lower bounds for x.\n    // ub:         Upper bounds for x.\n    // xcp:        The output generalized Cauchy point.\n    // vecc:       c = W'(xcp - x0), used in the subspace minimization routine.\n    // newact_set: Coordinates that newly become active during the GCP procedure.\n    // fv_set:     Free variable set.\n    static void get_cauchy_point(\n        const BFGSMat<Scalar, true>& bfgs, const Vector& x0, const Vector& g, const Vector& lb, const Vector& ub,\n        Vector& xcp, Vector& vecc, IndexSet& newact_set, IndexSet& fv_set\n    )\n    {\n        // std::cout << \"========================= Entering GCP search =========================\\n\\n\";\n\n        // Initialization\n        const int n = x0.size();\n        xcp.resize(n);\n        xcp.noalias() = x0;\n        vecc.resize(2 * bfgs.num_corrections());\n        vecc.setZero();\n        newact_set.clear();\n        newact_set.reserve(n);\n        fv_set.clear();\n        fv_set.reserve(n);\n\n        // Construct break points\n        Vector brk(n), vecd(n);\n        // If brk[i] == 0, i belongs to active set\n        // If brk[i] == Inf, i belongs to free variable set\n        // Others are currently undecided\n        IndexSet ord;\n        ord.reserve(n);\n        const Scalar inf = std::numeric_limits<Scalar>::infinity();\n        for(int i = 0; i < n; i++)\n        {\n            if(lb[i] == ub[i])\n                brk[i] = Scalar(0);\n            else if(g[i] < Scalar(0))\n                brk[i] = (x0[i] - ub[i]) / g[i];\n            else if(g[i] > Scalar(0))\n                brk[i] = (x0[i] - lb[i]) / g[i];\n            else\n                brk[i] = inf;\n\n            const bool iszero = (brk[i] == Scalar(0));\n            vecd[i] = iszero ? Scalar(0) : -g[i];\n\n            if(brk[i] == inf)\n                fv_set.push_back(i);\n            else if(!iszero)\n                ord.push_back(i);\n        }\n\n        // Sort indices of break points\n        ArgSort<Scalar> sorting(brk);\n        sorting.sort_key(ord);\n\n        // Break points `brko := brk[ord]` are in increasing order\n        // `ord` contains the coordinates that define the corresponding break points\n        // brk[i] == 0 <=> The i-th coordinate is on the boundary\n        const int nord = ord.size();\n        const int nfree = fv_set.size();\n        if( (nfree < 1) && (nord < 1) )\n        {\n            /* std::cout << \"** All coordinates at boundary **\\n\";\n            std::cout << \"\\n========================= Leaving GCP search =========================\\n\\n\"; */\n            return;\n        }\n\n        // First interval: [il=0, iu=brk[ord[0]]]\n        // In case ord is empty, we take iu=Inf\n\n        // p = W'd, c = 0\n        Vector vecp;\n        bfgs.apply_Wtv(vecd, vecp);\n        // f' = -d'd\n        Scalar fp = -vecd.squaredNorm();\n        // f'' = -theta * f' - p'Mp\n        Vector cache;\n        bfgs.apply_Mv(vecp, cache);  // cache = Mp\n        Scalar fpp = -bfgs.theta() * fp - vecp.dot(cache);\n\n        // Theoretical step size to move\n        Scalar deltatmin = -fp / fpp;\n\n        // Limit on the current interval\n        Scalar il = Scalar(0);\n        // We have excluded the case that max(brk) <= 0\n        int b = 0;\n        Scalar iu = (nord < 1) ? inf : brk[ord[b]];\n        Scalar deltat = iu - il;\n\n        /* int iter = 0;\n        std::cout << \"** Iter \" << iter << \" **\\n\";\n        std::cout << \"   fp = \" << fp << \", fpp = \" << fpp << \", deltatmin = \" << deltatmin << std::endl;\n        std::cout << \"   il = \" << il << \", iu = \" << iu << \", deltat = \" << deltat << std::endl; */\n\n        // If deltatmin >= deltat, we need to do the following things:\n        // 1. Update vecc\n        // 2. Since we are going to cross iu, the coordinates that define iu become active\n        // 3. Update some quantities on these new active coordinates (xcp, vecd, vecp)\n        // 4. Move to the next interval and compute the new deltatmin\n        bool crossed_all = false;\n        const int ncorr = bfgs.num_corrections();\n        Vector wact(2 * ncorr);\n        while(deltatmin >= deltat)\n        {\n            // Step 1\n            vecc.noalias() += deltat * vecp;\n\n            // Step 2\n            // First check how many coordinates will be active when we cross the previous iu\n            // b is the smallest number such that brko[b] == iu\n            // Let bp be the largest number such that brko[bp] == iu\n            // Then coordinates ord[b] to ord[bp] will be active\n            const int act_begin = b;\n            const int act_end = search_greater(brk, ord, iu, b) - 1;\n\n            // If nfree == 0 and act_end == nord-1, then we have crossed all coordinates\n            // We only need to update xcp from ord[b] to ord[bp], and then exit\n            if( (nfree == 0) && (act_end == nord - 1) )\n            {\n                // std::cout << \"** [ \";\n                for(int i = act_begin; i <= act_end; i++)\n                {\n                    const int act = ord[i];\n                    xcp[act] = (vecd[act] > Scalar(0)) ? ub[act] : lb[act];\n                    newact_set.push_back(act);\n                    // std::cout << act + 1 << \" \";\n                }\n                // std::cout << \"] become active **\\n\\n\";\n                // std::cout << \"** All break points visited **\\n\\n\";\n\n                crossed_all = true;\n                break;\n            }\n\n            // Step 3\n            // Update xcp and d on active coordinates\n            // std::cout << \"** [ \";\n            fp += deltat * fpp;\n            for(int i = act_begin; i <= act_end; i++)\n            {\n                const int act = ord[i];\n                xcp[act] = (vecd[act] > Scalar(0)) ? ub[act] : lb[act];\n                // z = xcp - x0\n                const Scalar zact = xcp[act] - x0[act];\n                const Scalar gact = g[act];\n                const Scalar ggact = gact * gact;\n                wact.noalias() = bfgs.Wb(act);\n                bfgs.apply_Mv(wact, cache);  // cache = Mw\n                fp += ggact + bfgs.theta() * gact * zact - gact * cache.dot(vecc);\n                fpp -= (bfgs.theta() * ggact + 2 * gact * cache.dot(vecp) + ggact * cache.dot(wact));\n                vecp.noalias() += gact * wact;\n                vecd[act] = Scalar(0);\n                newact_set.push_back(act);\n                // std::cout << act + 1 << \" \";\n            }\n            // std::cout << \"] become active **\\n\\n\";\n\n            // Step 4\n            // Theoretical step size to move\n            deltatmin = -fp / fpp;\n            // Update interval bound\n            il = iu;\n            b = act_end + 1;\n            // If we have visited all finite-valued break points, and have not exited earlier,\n            // then the next iu will be infinity. Simply exit the loop now\n            if(b >= nord)\n                break;\n            iu = brk[ord[b]];\n            // Width of the current interval\n            deltat = iu - il;\n\n            /* iter++;\n            std::cout << \"** Iter \" << iter << \" **\\n\";\n            std::cout << \"   fp = \" << fp << \", fpp = \" << fpp << \", deltatmin = \" << deltatmin << std::endl;\n            std::cout << \"   il = \" << il << \", iu = \" << iu << \", deltat = \" << deltat << std::endl; */\n        }\n\n        // Last step\n        if(!crossed_all)\n        {\n            deltatmin = std::max(deltatmin, Scalar(0));\n            vecc.noalias() += deltatmin * vecp;\n            const Scalar tfinal = il + deltatmin;\n            // Update xcp on free variable coordinates\n            for(int i = 0; i < nfree; i++)\n            {\n                const int coord = fv_set[i];\n                xcp[coord] = x0[coord] + tfinal * vecd[coord];\n            }\n            for(int i = b; i < nord; i++)\n            {\n                const int coord = ord[i];\n                xcp[coord] = x0[coord] + tfinal * vecd[coord];\n                fv_set.push_back(coord);\n            }\n        }\n        // std::cout << \"\\n========================= Leaving GCP search =========================\\n\\n\";\n    }\n};\n\n\n} // namespace LBFGSpp\n\n/// \\endcond\n\n#endif // CAUCHY_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/LineSearchBacktracking.h",
    "content": "// Copyright (C) 2016-2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef LINE_SEARCH_BACKTRACKING_H\n#define LINE_SEARCH_BACKTRACKING_H\n\n#include <Eigen/Core>\n#include <stdexcept>  // std::runtime_error\n\n\nnamespace LBFGSpp {\n\n\n///\n/// The backtracking line search algorithm for L-BFGS. Mainly for internal use.\n///\ntemplate <typename Scalar>\nclass LineSearchBacktracking\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n\npublic:\n    ///\n    /// Line search by backtracking.\n    ///\n    /// \\param f      A function object such that `f(x, grad)` returns the\n    ///               objective function value at `x`, and overwrites `grad` with\n    ///               the gradient.\n    /// \\param fx     In: The objective function value at the current point.\n    ///               Out: The function value at the new point.\n    /// \\param x      Out: The new point moved to.\n    /// \\param grad   In: The current gradient vector. Out: The gradient at the\n    ///               new point.\n    /// \\param step   In: The initial step length. Out: The calculated step length.\n    /// \\param drt    The current moving direction.\n    /// \\param xp     The current point.\n    /// \\param param  Parameters for the LBFGS algorithm\n    ///\n    template <typename Foo>\n    static void LineSearch(Foo& f, Scalar& fx, Vector& x, Vector& grad,\n                           Scalar& step,\n                           const Vector& drt, const Vector& xp,\n                           const LBFGSParam<Scalar>& param)\n    {\n        // Decreasing and increasing factors\n        const Scalar dec = 0.5;\n        const Scalar inc = 2.1;\n\n        // Check the value of step\n        if(step <= Scalar(0))\n            throw std::invalid_argument(\"'step' must be positive\");\n\n        // Save the function value at the current x\n        const Scalar fx_init = fx;\n        // Projection of gradient on the search direction\n        const Scalar dg_init = grad.dot(drt);\n        // Make sure d points to a descent direction\n        if(dg_init > 0)\n            throw std::logic_error(\"the moving direction increases the objective function value\");\n\n        const Scalar test_decr = param.ftol * dg_init;\n        Scalar width;\n\n        int iter;\n        for(iter = 0; iter < param.max_linesearch; iter++)\n        {\n            // x_{k+1} = x_k + step * d_k\n            x.noalias() = xp + step * drt;\n            // Evaluate this candidate\n            fx = f(x, grad);\n\n            if(fx > fx_init + step * test_decr || (fx != fx))\n            {\n                width = dec;\n            } else {\n                // Armijo condition is met\n                if(param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)\n                    break;\n\n                const Scalar dg = grad.dot(drt);\n                if(dg < param.wolfe * dg_init)\n                {\n                    width = inc;\n                } else {\n                    // Regular Wolfe condition is met\n                    if(param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE)\n                        break;\n\n                    if(dg > -param.wolfe * dg_init)\n                    {\n                        width = dec;\n                    } else {\n                        // Strong Wolfe condition is met\n                        break;\n                    }\n                }\n            }\n\n            if(step < param.min_step)\n                throw std::runtime_error(\"the line search step became smaller than the minimum value allowed\");\n\n            if(step > param.max_step)\n                throw std::runtime_error(\"the line search step became larger than the maximum value allowed\");\n\n            step *= width;\n        }\n\n        if(iter >= param.max_linesearch)\n            throw std::runtime_error(\"the line search routine reached the maximum number of iterations\");\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // LINE_SEARCH_BACKTRACKING_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/LineSearchBracketing.h",
    "content": "// Copyright (C) 2016-2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Copyright (C) 2016-2020 Dirk Toewe <DirkToewe@GoogleMail.com>\n// Under MIT license\n\n#ifndef LINE_SEARCH_BRACKETING_H\n#define LINE_SEARCH_BRACKETING_H\n\n#include <Eigen/Core>\n#include <stdexcept>  // std::runtime_error\n\nnamespace LBFGSpp {\n\n\n///\n/// The bracketing line search algorithm for L-BFGS. Mainly for internal use.\n///\ntemplate <typename Scalar>\nclass LineSearchBracketing\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n\npublic:\n    ///\n    /// Line search by bracketing. Similar to the backtracking line search\n    /// except that it actively maintains an upper and lower bound of the\n    /// current search range.\n    ///\n    /// \\param f      A function object such that `f(x, grad)` returns the\n    ///               objective function value at `x`, and overwrites `grad` with\n    ///               the gradient.\n    /// \\param fx     In: The objective function value at the current point.\n    ///               Out: The function value at the new point.\n    /// \\param x      Out: The new point moved to.\n    /// \\param grad   In: The current gradient vector. Out: The gradient at the\n    ///               new point.\n    /// \\param step   In: The initial step length. Out: The calculated step length.\n    /// \\param drt    The current moving direction.\n    /// \\param xp     The current point.\n    /// \\param param  Parameters for the LBFGS algorithm\n    ///\n    template <typename Foo>\n    static void LineSearch(Foo& f, Scalar& fx, Vector& x, Vector& grad,\n                           Scalar& step,\n                           const Vector& drt, const Vector& xp,\n                           const LBFGSParam<Scalar>& param)\n    {\n        // Check the value of step\n        if(step <= Scalar(0))\n            throw std::invalid_argument(\"'step' must be positive\");\n\n        // Save the function value at the current x\n        const Scalar fx_init = fx;\n        // Projection of gradient on the search direction\n        const Scalar dg_init = grad.dot(drt);\n        // Make sure d points to a descent direction\n        if(dg_init > 0)\n            throw std::logic_error(\"the moving direction increases the objective function value\");\n\n        const Scalar test_decr = param.ftol * dg_init;\n\n        // Upper and lower end of the current line search range\n        Scalar step_lo = 0,\n               step_hi = std::numeric_limits<Scalar>::infinity();\n\n        int iter;\n        for(iter = 0; iter < param.max_linesearch; iter++)\n        {\n            // x_{k+1} = x_k + step * d_k\n            x.noalias() = xp + step * drt;\n            // Evaluate this candidate\n            fx = f(x, grad);\n\n            if(fx > fx_init + step * test_decr || (fx != fx))\n            {\n                step_hi = step;\n            } else {\n                // Armijo condition is met\n                if(param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)\n                    break;\n\n                const Scalar dg = grad.dot(drt);\n                if(dg < param.wolfe * dg_init)\n                {\n                    step_lo = step;\n                } else {\n                    // Regular Wolfe condition is met\n                    if(param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE)\n                        break;\n\n                    if(dg > -param.wolfe * dg_init)\n                    {\n                        step_hi = step;\n                    } else {\n                        // Strong Wolfe condition is met\n                        break;\n                    }\n                }\n            }\n\n            assert( step_lo < step_hi );\n\n            if(step < param.min_step)\n                throw std::runtime_error(\"the line search step became smaller than the minimum value allowed\");\n\n            if(step > param.max_step)\n                throw std::runtime_error(\"the line search step became larger than the maximum value allowed\");\n\n            // continue search in mid of current search range\n            step = std::isinf(step_hi) ? 2*step : step_lo/2 + step_hi/2;\n        }\n\n        if(iter >= param.max_linesearch)\n            throw std::runtime_error(\"the line search routine reached the maximum number of iterations\");\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // LINE_SEARCH_BRACKETING_H\n\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/LineSearchMoreThuente.h",
    "content": "// Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef LINE_SEARCH_MORE_THUENTE_H\n#define LINE_SEARCH_MORE_THUENTE_H\n\n#include <stdexcept>  // std::invalid_argument, std::runtime_error\n#include <Eigen/Core>\n#include \"LBFGSpp/Param.h\"\n\n\nnamespace LBFGSpp {\n\n\n///\n/// The line search algorithm by Moré and Thuente (1994), currently used for the L-BFGS-B algorithm.\n///\n/// The target of this line search algorithm is to find a step size \\f$\\alpha\\f$ that satisfies the strong Wolfe condition\n/// \\f$f(x+\\alpha d) \\le f(x) + \\alpha\\mu g(x)^T d\\f$ and \\f$|g(x+\\alpha d)^T d| \\le \\eta|g(x)^T d|\\f$.\n/// Our implementation is a simplified version of the algorithm in [1]. We assume that \\f$0<\\mu<\\eta<1\\f$, while in [1]\n/// they do not assume \\f$\\eta>\\mu\\f$. As a result, the algorithm in [1] has two stages, but in our implementation we\n/// only need the first stage to guarantee the convergence.\n///\n/// Reference:\n/// [1] Moré, J. J., & Thuente, D. J. (1994). Line search algorithms with guaranteed sufficient decrease. \n///\ntemplate <typename Scalar>\nclass LineSearchMoreThuente\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n\n    // Mininum of a quadratic function that interpolates fa, ga, and fb\n    static Scalar quadratic_interp(const Scalar& a, const Scalar& b, const Scalar& fa, const Scalar& ga, const Scalar& fb)\n    {\n        const Scalar ba = b - a;\n        return a + Scalar(0.5) * ba * ba * ga / (fa - fb + ba * ga);\n    }\n\n    // Mininum of a quadratic function that interpolates ga and gb\n    // Assume that ga != gb\n    static Scalar quadratic_interp(const Scalar& a, const Scalar& b, const Scalar& ga, const Scalar& gb)\n    {\n        return b + (b - a) * gb / (ga - gb);\n    }\n\n    // Mininum of a cubic function that interpolates fa, ga, fb and gb\n    // Assume that a != b\n    static Scalar cubic_interp(const Scalar& a, const Scalar& b, const Scalar& fa, const Scalar& fb, const Scalar& ga, const Scalar& gb)\n    {\n        using std::abs;\n        using std::sqrt;\n\n        if(a == b)\n            return a;\n\n        const Scalar ba = b - a;\n        const Scalar ba2 = ba * ba;\n        const Scalar ba3 = ba2 * ba;\n        const Scalar fba = fb - fa;\n        const Scalar z = (ga + gb) * ba - Scalar(2) * fba;\n        const Scalar w = fba * ba - ga * ba2;\n\n        // If c3 = z/(b-a)^3 == 0, reduce to quadratic problem\n        const Scalar endmin = (fa < fb) ? a : b;\n        if(abs(z) < std::numeric_limits<Scalar>::epsilon())\n        {\n            const Scalar c2 = fba / ba2 - ga / ba;\n            const Scalar c1 = fba / ba - (a + b) * c2;\n            // Global minimum, can be infinity\n            const Scalar globmin = -c1 / (Scalar(2) * c2);\n            // If c2 <= 0, or globmin is outside [a, b], then the minimum is achieved at one end point\n            return (c2 > Scalar(0) && globmin >= a && globmin <= b) ? globmin : endmin;\n        }\n\n        // v = c1 / c2\n        const Scalar v = (-Scalar(2) * a * w + ga * ba3 + a * (a + Scalar(2) * b) * z) /\n            (w - (Scalar(2) * a + b) * z);\n        // u = c2 / (3 * c3), may be very large if c3 ~= 0\n        const Scalar u = (w / z - (Scalar(2) * a + b)) / Scalar(3);\n        // q'(x) = c1 + 2 * c2 * x + 3 * c3 * x^2 = 0\n        // x1 = -u * (1 + sqrt(1 - v/u))\n        // x2 = -u * (1 - sqrt(1 - v/u)) = -v / (1 + sqrt(1 - v/u))\n\n        // If q'(x) = 0 has no solution in [a, b], q(x) is monotone in [a, b]\n        // Case I: no solution globally, 1 - v/u <= 0\n        if(v / u >= Scalar(1))\n            return endmin;\n        // Case II: no solution in [a, b]\n        const Scalar vu = Scalar(1) + sqrt(Scalar(1) - v / u);\n        const Scalar sol1 = -u * vu;\n        const Scalar sol2 = -v / vu;\n        if( (sol1 - a) * (sol1 - b) >= Scalar(0) && (sol2 - a) * (sol2 - b) >= Scalar(0) )\n            return endmin;\n\n        // Now at least one solution is in (a, b)\n        // Check the second derivative\n        // q''(x) = 2 * c2 + 6 * c3 * x;\n        const Scalar c3 = z / ba3;\n        const Scalar c2 = Scalar(3) * c3 * u;\n        const Scalar qpp1 = Scalar(2) * c2 + Scalar(6) * c3 * sol1;\n        const Scalar sol = (qpp1 > Scalar(0)) ? sol1 : sol2;\n        // If the local minimum is not in [a, b], return one of the end points\n        if((sol - a) * (sol - b) >= Scalar(0))\n            return endmin;\n\n        // Compare the local minimum with the end points\n        const Scalar c1 = v * c2;\n        const Scalar fsol = fa + c1 * (sol- a) + c2 * (sol * sol - a * a) +\n            c3 * (sol * sol * sol - a * a * a);\n        return (fsol < std::min(fa, fb)) ? sol : endmin;\n    }\n\n    static Scalar step_selection(\n        const Scalar& al, const Scalar& au, const Scalar& at,\n        const Scalar& fl, const Scalar& fu, const Scalar& ft,\n        const Scalar& gl, const Scalar& gu, const Scalar& gt\n    )\n    {\n        if(al == au)\n            return al;\n\n        // ac: cubic interpolation of fl, ft, gl, gt\n        // aq: quadratic interpolation of fl, gl, ft\n        const Scalar ac = cubic_interp(al, at, fl, ft, gl, gt);\n        const Scalar aq = quadratic_interp(al, at, fl, gl, ft);\n        // Case 1: ft > fl\n        if(ft > fl)\n            return (std::abs(ac - al) < std::abs(aq - al)) ?\n                   ac :\n                   ((aq + ac) / Scalar(2));\n\n        // as: quadratic interpolation of gl and gt\n        const Scalar as = quadratic_interp(al, at, gl, gt);\n        // Case 2: ft <= fl, gt * gl < 0\n        if(gt * gl < Scalar(0))\n            return (std::abs(ac - at) >= std::abs(as - at)) ? ac : as;\n\n        // Case 3: ft <= fl, gt * gl >= 0, |gt| < |gl|\n        const Scalar delta = Scalar(0.66);\n        if(std::abs(gt) < std::abs(gl))\n        {\n            const Scalar res = (std::abs(ac - at) < std::abs(as - at)) ? ac : as;\n            return (at > al) ?\n                   std::min(at + delta * (au - at), res) :\n                   std::max(at + delta * (au - at), res);\n        }\n\n        // ae: cubic interpolation of ft, fu, gt, gu\n        const Scalar ae = cubic_interp(at, au, ft, fu, gt, gu);\n        // Case 4: ft <= fl, gt * gl >= 0, |gt| >= |gl|\n        return (at > al) ?\n               std::min(at + delta * (au - at), ae) :\n               std::max(at + delta * (au - at), ae);\n    }\n\npublic:\n    ///\n    /// Line search by Moré and Thuente (1994).\n    ///\n    /// \\param f        A function object such that `f(x, grad)` returns the\n    ///                 objective function value at `x`, and overwrites `grad` with\n    ///                 the gradient.\n    /// \\param fx       In: The objective function value at the current point.\n    ///                 Out: The function value at the new point.\n    /// \\param x        Out: The new point moved to.\n    /// \\param grad     In: The current gradient vector. Out: The gradient at the\n    ///                 new point.\n    /// \\param step     In: The initial step length. Out: The calculated step length.\n    /// \\param step_max The upper bound for the step size.\n    /// \\param drt      The current moving direction.\n    /// \\param xp       The current point.\n    /// \\param param    Parameters for the LBFGS algorithm\n    ///\n    template <typename Foo>\n    static void LineSearch(Foo& f, Scalar& fx, Vector& x, Vector& grad,\n                           Scalar& step, const Scalar& step_max,\n                           const Vector& drt, const Vector& xp,\n                           const LBFGSBParam<Scalar>& param)\n    {\n        // std::cout << \"========================= Entering line search =========================\\n\\n\";\n\n        // Check the value of step\n        if(step <= Scalar(0))\n            throw std::invalid_argument(\"'step' must be positive\");\n        if(step > step_max)\n            throw std::invalid_argument(\"'step' exceeds 'step_max'\");\n\n        // Save the function value at the current x\n        const Scalar fx_init = fx;\n        // Projection of gradient on the search direction\n        const Scalar dg_init = grad.dot(drt);\n\n        // std::cout << \"fx_init = \" << fx_init << \", dg_init = \" << dg_init << std::endl << std::endl;\n\n        // Make sure d points to a descent direction\n        if(dg_init >= 0)\n            throw std::logic_error(\"the moving direction does not decrease the objective function value\");\n\n        // Tolerance for convergence test\n        // Sufficient decrease\n        const Scalar test_decr = param.ftol * dg_init;\n        // Curvature\n        const Scalar test_curv = -param.wolfe * dg_init;\n\n        // The bracketing interval\n        Scalar  I_lo = Scalar(0),                           I_hi = std::numeric_limits<Scalar>::infinity();\n        Scalar fI_lo = Scalar(0),                          fI_hi = std::numeric_limits<Scalar>::infinity();\n        Scalar gI_lo = (Scalar(1) - param.ftol) * dg_init, gI_hi = std::numeric_limits<Scalar>::infinity();\n        // Function value and gradient at the current step size\n        x.noalias() = xp + step * drt;\n        fx = f(x, grad);\n        Scalar dg = grad.dot(drt);\n\n        // std::cout << \"max_step = \" << step_max << \", step = \" << step << \", fx = \" << fx << \", dg = \" << dg << std::endl;\n\n        // Convergence test\n        if(fx <= fx_init + step * test_decr && std::abs(dg) <= test_curv)\n        {\n            // std::cout << \"** Criteria met\\n\\n\";\n            // std::cout << \"========================= Leaving line search =========================\\n\\n\";\n            return;\n        }\n\n        // Extrapolation factor\n        const Scalar delta = Scalar(1.1);\n        int iter;\n        for(iter = 0; iter < param.max_linesearch; iter++)\n        {\n            // ft = psi(step) = f(xp + step * drt) - f(xp) - step * test_decr\n            // gt = psi'(step) = dg - mu * dg_init\n            // mu = param.ftol\n            const Scalar ft = fx - fx_init - step * test_decr;\n            const Scalar gt = dg - param.ftol * dg_init;\n\n            // Update bracketing interval and step size\n            Scalar new_step;\n            if(ft > fI_lo)\n            {\n                // Case 1: ft > fl\n                new_step = step_selection( I_lo,  I_hi, step,\n                                          fI_lo, fI_hi, ft,\n                                          gI_lo, gI_hi, gt);\n                I_hi = step;\n                fI_hi = ft;\n                gI_hi = gt;\n\n                // std::cout << \"Case 1: new step = \" << new_step;\n\n            } else if(gt * (fI_lo - step) > Scalar(0)) {\n                // Case 2: ft <= fl, gt * (al - at) > 0\n                new_step = std::min(step_max, step + delta * (step - I_lo));\n\n                I_lo = step;\n                fI_lo = ft;\n                gI_lo = gt;\n\n                // std::cout << \"Case 2: new step = \" << new_step;\n\n            } else {\n                // Case 3: ft <= fl, gt * (al - at) <= 0\n                new_step = step_selection( I_lo,  I_hi, step,\n                                          fI_lo, fI_hi, ft,\n                                          gI_lo, gI_hi, gt);\n                I_hi = I_lo;\n                fI_hi = fI_lo;\n                gI_hi = gI_lo;\n\n                I_lo = step;\n                fI_lo = ft;\n                gI_lo = gt;\n\n                // std::cout << \"Case 3: new step = \" << new_step;\n            }\n\n            // In case step, new_step, and step_max are equal, directly return the computed x and fx\n            if(step == step_max && new_step >= step_max)\n            {\n                // std::cout << \"** Maximum step size reached\\n\\n\";\n                // std::cout << \"========================= Leaving line search =========================\\n\\n\";\n                return;\n            }\n            // Otherwise, recompute x and fx based on new_step\n            step = new_step;\n\n            if(step < param.min_step)\n                throw std::runtime_error(\"the line search step became smaller than the minimum value allowed\");\n\n            if(step > param.max_step)\n                throw std::runtime_error(\"the line search step became larger than the maximum value allowed\");\n\n            // Update parameter, function value, and gradient\n            x.noalias() = xp + step * drt;\n            fx = f(x, grad);\n            dg = grad.dot(drt);\n\n            // std::cout << \", fx = \" << fx << std::endl;\n\n            // Convergence test\n            if(fx <= fx_init + step * test_decr && std::abs(dg) <= test_curv)\n            {\n                // std::cout << \"** Criteria met\\n\\n\";\n                // std::cout << \"========================= Leaving line search =========================\\n\\n\";\n                return;\n            }\n            if(step >= step_max)\n            {\n                // std::cout << \"** Maximum step size reached\\n\\n\";\n                // std::cout << \"========================= Leaving line search =========================\\n\\n\";\n                return;\n            }\n        }\n\n        if(iter >= param.max_linesearch)\n            throw std::runtime_error(\"the line search routine reached the maximum number of iterations\");\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // LINE_SEARCH_MORE_THUENTE_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/LineSearchNocedalWright.h",
    "content": "// Copyright (C) 2016-2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Copyright (C) 2016-2020 Dirk Toewe <DirkToewe@GoogleMail.com>\n// Under MIT license\n\n#ifndef LINE_SEARCH_NOCEDAL_WRIGHT_H\n#define LINE_SEARCH_NOCEDAL_WRIGHT_H\n\n#include <Eigen/Core>\n#include <stdexcept>\n\n\nnamespace LBFGSpp {\n\n\n///\n/// A line search algorithm for the strong Wolfe condition. Implementation based on:\n///\n///   \"Numerical Optimization\" 2nd Edition,\n///   Jorge Nocedal Stephen J. Wright,\n///   Chapter 3. Line Search Methods, page 60f.\n///\ntemplate <typename Scalar>\nclass LineSearchNocedalWright\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n\npublic:\n    ///\n    /// Line search by Nocedal and Wright (2006).\n    ///\n    /// \\param f      A function object such that `f(x, grad)` returns the\n    ///               objective function value at `x`, and overwrites `grad` with\n    ///               the gradient.\n    /// \\param fx     In: The objective function value at the current point.\n    ///               Out: The function value at the new point.\n    /// \\param x      Out: The new point moved to.\n    /// \\param grad   In: The current gradient vector. Out: The gradient at the\n    ///               new point.\n    /// \\param step   In: The initial step length. Out: The calculated step length.\n    /// \\param drt    The current moving direction.\n    /// \\param xp     The current point.\n    /// \\param param  Parameters for the LBFGS algorithm\n    ///\n    template <typename Foo>\n    static void LineSearch(Foo& f, Scalar& fx, Vector& x, Vector& grad,\n                           Scalar& step,\n                           const Vector& drt, const Vector& xp,\n                           const LBFGSParam<Scalar>& param)\n    {\n        // Check the value of step\n        if(step <= Scalar(0))\n            throw std::invalid_argument(\"'step' must be positive\");\n\n        if(param.linesearch != LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE)\n            throw std::invalid_argument(\"'param.linesearch' must be 'LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE' for LineSearchNocedalWright\");\n\n        // To make this implementation more similar to the other line search\n        // methods in LBFGSpp, the symbol names from the literature\n        // (\"Numerical Optimizations\") have been changed.\n        //\n        // Literature | LBFGSpp\n        // -----------|--------\n        // alpha      | step\n        // phi        | fx\n        // phi'       | dg\n\n        // the rate, by which the \n        const Scalar expansion = Scalar(2);      \n\n        // Save the function value at the current x\n        const Scalar fx_init = fx;\n        // Projection of gradient on the search direction\n        const Scalar dg_init = grad.dot(drt);\n        // Make sure d points to a descent direction\n        if(dg_init > 0)\n            throw std::logic_error(\"the moving direction increases the objective function value\");\n\n        const Scalar test_decr = param.ftol * dg_init,    // Sufficient decrease\n                     test_curv = -param.wolfe * dg_init;  // Curvature\n\n        // Ends of the line search range (step_lo > step_hi is allowed)\n        Scalar step_hi, step_lo = 0,\n                 fx_hi,   fx_lo = fx_init,\n                 dg_hi,   dg_lo = dg_init;\n\n        // STEP 1: Bracketing Phase\n        //   Find a range guaranteed to contain a step satisfying strong Wolfe.\n        //\n        //   See also:\n        //     \"Numerical Optimization\", \"Algorithm 3.5 (Line Search Algorithm)\".\n        int iter = 0;\n        for(;;)\n        {\n          x.noalias() = xp + step * drt;\n          fx = f(x, grad);\n\n          if(iter++ >= param.max_linesearch)\n            return;\n\n          const Scalar dg = grad.dot(drt);\n\n          if( fx - fx_init > step * test_decr || (0 < step_lo && fx >= fx_lo) )\n          {\n            step_hi = step;\n              fx_hi = fx;\n              dg_hi = dg;\n            break;\n          }\n\n          if( std::abs(dg) <= test_curv )\n            return;\n\n          step_hi = step_lo;\n            fx_hi =   fx_lo;   \n            dg_hi =   dg_lo;\n          step_lo = step;\n            fx_lo =   fx;\n            dg_lo =   dg;\n\n          if( dg >= 0 )\n            break;\n\n          step *= expansion;\n        }\n\n        // STEP 2: Zoom Phase\n        //   Given a range (step_lo,step_hi) that is guaranteed to\n        //   contain a valid strong Wolfe step value, this method\n        //   finds such a value.\n        //\n        //   See also:\n        //     \"Numerical Optimization\", \"Algorithm 3.6 (Zoom)\".\n        for(;;)\n        {\n          // use {fx_lo, fx_hi, dg_lo} to make a quadric interpolation of\n          // the function said interpolation is used to estimate the minimum\n          //\n          // polynomial: p (x) = c0*(x - step)² + c1\n          // conditions: p (step_hi) = fx_hi\n          //             p (step_lo) = fx_lo\n          //             p'(step_lo) = dg_lo\n          step  = (fx_hi-fx_lo)*step_lo - (step_hi*step_hi - step_lo*step_lo)*dg_lo/2;\n          step /= (fx_hi-fx_lo)         - (step_hi         - step_lo        )*dg_lo;\n\n          // if interpolation fails, bisection is used\n          if( step <= std::min(step_lo,step_hi) ||\n              step >= std::max(step_lo,step_hi) )\n              step  = step_lo/2 + step_hi/2;\n\n          x.noalias() = xp + step * drt;\n          fx = f(x, grad);\n\n          if(iter++ >= param.max_linesearch)\n            return;\n\n          const Scalar dg = grad.dot(drt);\n\n          if( fx - fx_init > step * test_decr || fx >= fx_lo )\n          {\n            if( step == step_hi )\n              throw std::runtime_error(\"the line search routine failed, possibly due to insufficient numeric precision\");\n\n            step_hi = step;\n              fx_hi = fx;\n              dg_hi = dg;\n          }\n          else\n          {\n            if( std::abs(dg) <= test_curv )\n              return;\n\n            if( dg * (step_hi - step_lo) >= 0 )\n            {\n              step_hi = step_lo;\n                fx_hi =   fx_lo;\n                dg_hi =   dg_lo;\n            }\n\n            if( step == step_lo )\n              throw std::runtime_error(\"the line search routine failed, possibly due to insufficient numeric precision\");\n\n            step_lo = step;\n              fx_lo =   fx;\n              dg_lo =   dg;\n          }\n        }\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // LINE_SEARCH_NOCEDAL_WRIGHT_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/Param.h",
    "content": "// Copyright (C) 2016-2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef PARAM_H\n#define PARAM_H\n\n#include <Eigen/Core>\n#include <stdexcept>  // std::invalid_argument\n\n\nnamespace LBFGSpp {\n\n\n///\n/// \\defgroup Enumerations\n///\n/// Enumeration types for line search.\n///\n\n///\n/// \\ingroup Enumerations\n///\n/// The enumeration of line search termination conditions.\n///\nenum LINE_SEARCH_TERMINATION_CONDITION\n{\n    ///\n    /// Backtracking method with the Armijo condition.\n    /// The backtracking method finds the step length such that it satisfies\n    /// the sufficient decrease (Armijo) condition,\n    /// \\f$f(x + a \\cdot d) \\le f(x) + \\beta' \\cdot a \\cdot g(x)^T d\\f$,\n    /// where \\f$x\\f$ is the current point, \\f$d\\f$ is the current search direction,\n    /// \\f$a\\f$ is the step length, and \\f$\\beta'\\f$ is the value specified by\n    /// \\ref LBFGSParam::ftol. \\f$f\\f$ and \\f$g\\f$ are the function\n    /// and gradient values respectively.\n    ///\n    LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1,\n\n    ///\n    /// The backtracking method with the defualt (regular Wolfe) condition.\n    /// An alias of `LBFGS_LINESEARCH_BACKTRACKING_WOLFE`.\n    ///\n    LBFGS_LINESEARCH_BACKTRACKING = 2,\n\n    ///\n    /// Backtracking method with regular Wolfe condition.\n    /// The backtracking method finds the step length such that it satisfies\n    /// both the Armijo condition (`LBFGS_LINESEARCH_BACKTRACKING_ARMIJO`)\n    /// and the curvature condition,\n    /// \\f$g(x + a \\cdot d)^T d \\ge \\beta \\cdot g(x)^T d\\f$, where \\f$\\beta\\f$\n    /// is the value specified by \\ref LBFGSParam::wolfe.\n    ///\n    LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2,\n\n    ///\n    /// Backtracking method with strong Wolfe condition.\n    /// The backtracking method finds the step length such that it satisfies\n    /// both the Armijo condition (`LBFGS_LINESEARCH_BACKTRACKING_ARMIJO`)\n    /// and the following condition,\n    /// \\f$\\vert g(x + a \\cdot d)^T d\\vert \\le \\beta \\cdot \\vert g(x)^T d\\vert\\f$,\n    /// where \\f$\\beta\\f$ is the value specified by \\ref LBFGSParam::wolfe.\n    ///\n    LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3\n};\n\n\n///\n/// Parameters to control the L-BFGS algorithm.\n///\ntemplate <typename Scalar = double>\nclass LBFGSParam\n{\npublic:\n    ///\n    /// The number of corrections to approximate the inverse Hessian matrix.\n    /// The L-BFGS routine stores the computation results of previous \\ref m\n    /// iterations to approximate the inverse Hessian matrix of the current\n    /// iteration. This parameter controls the size of the limited memories\n    /// (corrections). The default value is \\c 6. Values less than \\c 3 are\n    /// not recommended. Large values will result in excessive computing time.\n    ///\n    int    m;\n    ///\n    /// Absolute tolerance for convergence test.\n    /// This parameter determines the absolute accuracy \\f$\\epsilon_{abs}\\f$\n    /// with which the solution is to be found. A minimization terminates when\n    /// \\f$||g|| < \\max\\{\\epsilon_{abs}, \\epsilon_{rel}||x||\\}\\f$,\n    /// where \\f$||\\cdot||\\f$ denotes the Euclidean (L2) norm. The default value is\n    /// \\c 1e-5.\n    ///\n    Scalar epsilon;\n    ///\n    /// Relative tolerance for convergence test.\n    /// This parameter determines the relative accuracy \\f$\\epsilon_{rel}\\f$\n    /// with which the solution is to be found. A minimization terminates when\n    /// \\f$||g|| < \\max\\{\\epsilon_{abs}, \\epsilon_{rel}||x||\\}\\f$,\n    /// where \\f$||\\cdot||\\f$ denotes the Euclidean (L2) norm. The default value is\n    /// \\c 1e-5.\n    ///\n    Scalar epsilon_rel;\n    ///\n    /// Distance for delta-based convergence test.\n    /// This parameter determines the distance \\f$d\\f$ to compute the\n    /// rate of decrease of the objective function,\n    /// \\f$f_{k-d}(x)-f_k(x)\\f$, where \\f$k\\f$ is the current iteration\n    /// step. If the value of this parameter is zero, the delta-based convergence\n    /// test will not be performed. The default value is \\c 0.\n    ///\n    int    past;\n    ///\n    /// Delta for convergence test.\n    /// The algorithm stops when the following condition is met,\n    /// \\f$|f_{k-d}(x)-f_k(x)|<\\delta\\cdot\\max(1, |f_k(x)|, |f_{k-d}(x)|)\\f$, where \\f$f_k(x)\\f$ is\n    /// the current function value, and \\f$f_{k-d}(x)\\f$ is the function value\n    /// \\f$d\\f$ iterations ago (specified by the \\ref past parameter).\n    /// The default value is \\c 0.\n    ///\n    Scalar delta;\n    ///\n    /// The maximum number of iterations.\n    /// The optimization process is terminated when the iteration count\n    /// exceeds this parameter. Setting this parameter to zero continues an\n    /// optimization process until a convergence or error. The default value\n    /// is \\c 0.\n    ///\n    int    max_iterations;\n    ///\n    /// The line search termination condition.\n    /// This parameter specifies the line search termination condition that will be used\n    /// by the LBFGS routine. The default value is `LBFGS_LINESEARCH_BACKTRACKING_ARMIJO`.\n    ///\n    int    linesearch;\n    ///\n    /// The maximum number of trials for the line search.\n    /// This parameter controls the number of function and gradients evaluations\n    /// per iteration for the line search routine. The default value is \\c 20.\n    ///\n    int    max_linesearch;\n    ///\n    /// The minimum step length allowed in the line search.\n    /// The default value is \\c 1e-20. Usually this value does not need to be\n    /// modified.\n    ///\n    Scalar min_step;\n    ///\n    /// The maximum step length allowed in the line search.\n    /// The default value is \\c 1e+20. Usually this value does not need to be\n    /// modified.\n    ///\n    Scalar max_step;\n    ///\n    /// A parameter to control the accuracy of the line search routine.\n    /// The default value is \\c 1e-4. This parameter should be greater\n    /// than zero and smaller than \\c 0.5.\n    ///\n    Scalar ftol;\n    ///\n    /// The coefficient for the Wolfe condition.\n    /// This parameter is valid only when the line-search\n    /// algorithm is used with the Wolfe condition.\n    /// The default value is \\c 0.9. This parameter should be greater\n    /// the \\ref ftol parameter and smaller than \\c 1.0.\n    ///\n    Scalar wolfe;\n\npublic:\n    ///\n    /// Constructor for L-BFGS parameters.\n    /// Default values for parameters will be set when the object is created.\n    ///\n    LBFGSParam()\n    {\n        m              = 6;\n        epsilon        = Scalar(1e-5);\n        epsilon_rel    = Scalar(1e-5);\n        past           = 0;\n        delta          = Scalar(0);\n        max_iterations = 0;\n        linesearch     = LBFGS_LINESEARCH_BACKTRACKING_ARMIJO;\n        max_linesearch = 20;\n        min_step       = Scalar(1e-20);\n        max_step       = Scalar(1e+20);\n        ftol           = Scalar(1e-4);\n        wolfe          = Scalar(0.9);\n    }\n\n    ///\n    /// Checking the validity of L-BFGS parameters.\n    /// An `std::invalid_argument` exception will be thrown if some parameter\n    /// is invalid.\n    ///\n    inline void check_param() const\n    {\n        if(m <= 0)\n            throw std::invalid_argument(\"'m' must be positive\");\n        if(epsilon < 0)\n            throw std::invalid_argument(\"'epsilon' must be non-negative\");\n        if(epsilon_rel < 0)\n            throw std::invalid_argument(\"'epsilon_rel' must be non-negative\");\n        if(past < 0)\n            throw std::invalid_argument(\"'past' must be non-negative\");\n        if(delta < 0)\n            throw std::invalid_argument(\"'delta' must be non-negative\");\n        if(max_iterations < 0)\n            throw std::invalid_argument(\"'max_iterations' must be non-negative\");\n        if(linesearch < LBFGS_LINESEARCH_BACKTRACKING_ARMIJO ||\n           linesearch > LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE)\n           throw std::invalid_argument(\"unsupported line search termination condition\");\n        if(max_linesearch <= 0)\n            throw std::invalid_argument(\"'max_linesearch' must be positive\");\n        if(min_step < 0)\n            throw std::invalid_argument(\"'min_step' must be positive\");\n        if(max_step < min_step )\n            throw std::invalid_argument(\"'max_step' must be greater than 'min_step'\");\n        if(ftol <= 0 || ftol >= 0.5)\n            throw std::invalid_argument(\"'ftol' must satisfy 0 < ftol < 0.5\");\n        if(wolfe <= ftol || wolfe >= 1)\n            throw std::invalid_argument(\"'wolfe' must satisfy ftol < wolfe < 1\");\n    }\n};\n\n\n///\n/// Parameters to control the L-BFGS-B algorithm.\n///\ntemplate <typename Scalar = double>\nclass LBFGSBParam\n{\npublic:\n    ///\n    /// The number of corrections to approximate the inverse Hessian matrix.\n    /// The L-BFGS-B routine stores the computation results of previous \\ref m\n    /// iterations to approximate the inverse Hessian matrix of the current\n    /// iteration. This parameter controls the size of the limited memories\n    /// (corrections). The default value is \\c 6. Values less than \\c 3 are\n    /// not recommended. Large values will result in excessive computing time.\n    ///\n    int    m;\n    ///\n    /// Absolute tolerance for convergence test.\n    /// This parameter determines the absolute accuracy \\f$\\epsilon_{abs}\\f$\n    /// with which the solution is to be found. A minimization terminates when\n    /// \\f$||Pg||_{\\infty} < \\max\\{\\epsilon_{abs}, \\epsilon_{rel}||x||\\}\\f$,\n    /// where \\f$||x||\\f$ denotes the Euclidean (L2) norm of \\f$x\\f$, and\n    /// \\f$Pg=P(x-g,l,u)-x\\f$ is the projected gradient. The default value is\n    /// \\c 1e-5.\n    ///\n    Scalar epsilon;\n    ///\n    /// Relative tolerance for convergence test.\n    /// This parameter determines the relative accuracy \\f$\\epsilon_{rel}\\f$\n    /// with which the solution is to be found. A minimization terminates when\n    /// \\f$||Pg||_{\\infty} < \\max\\{\\epsilon_{abs}, \\epsilon_{rel}||x||\\}\\f$,\n    /// where \\f$||x||\\f$ denotes the Euclidean (L2) norm of \\f$x\\f$, and\n    /// \\f$Pg=P(x-g,l,u)-x\\f$ is the projected gradient. The default value is\n    /// \\c 1e-5.\n    ///\n    Scalar epsilon_rel;\n    ///\n    /// Distance for delta-based convergence test.\n    /// This parameter determines the distance \\f$d\\f$ to compute the\n    /// rate of decrease of the objective function,\n    /// \\f$f_{k-d}(x)-f_k(x)\\f$, where \\f$k\\f$ is the current iteration\n    /// step. If the value of this parameter is zero, the delta-based convergence\n    /// test will not be performed. The default value is \\c 1.\n    ///\n    int    past;\n    ///\n    /// Delta for convergence test.\n    /// The algorithm stops when the following condition is met,\n    /// \\f$|f_{k-d}(x)-f_k(x)|<\\delta\\cdot\\max(1, |f_k(x)|, |f_{k-d}(x)|)\\f$, where \\f$f_k(x)\\f$ is\n    /// the current function value, and \\f$f_{k-d}(x)\\f$ is the function value\n    /// \\f$d\\f$ iterations ago (specified by the \\ref past parameter).\n    /// The default value is \\c 1e-10.\n    ///\n    Scalar delta;\n    ///\n    /// The maximum number of iterations.\n    /// The optimization process is terminated when the iteration count\n    /// exceeds this parameter. Setting this parameter to zero continues an\n    /// optimization process until a convergence or error. The default value\n    /// is \\c 0.\n    ///\n    int    max_iterations;\n    ///\n    /// The maximum number of iterations in the subspace minimization.\n    /// This parameter controls the number of iterations in the subspace\n    /// minimization routine. The default value is \\c 10.\n    ///\n    int    max_submin;\n    ///\n    /// The maximum number of trials for the line search.\n    /// This parameter controls the number of function and gradients evaluations\n    /// per iteration for the line search routine. The default value is \\c 20.\n    ///\n    int    max_linesearch;\n    ///\n    /// The minimum step length allowed in the line search.\n    /// The default value is \\c 1e-20. Usually this value does not need to be\n    /// modified.\n    ///\n    Scalar min_step;\n    ///\n    /// The maximum step length allowed in the line search.\n    /// The default value is \\c 1e+20. Usually this value does not need to be\n    /// modified.\n    ///\n    Scalar max_step;\n    ///\n    /// A parameter to control the accuracy of the line search routine.\n    /// The default value is \\c 1e-4. This parameter should be greater\n    /// than zero and smaller than \\c 0.5.\n    ///\n    Scalar ftol;\n    ///\n    /// The coefficient for the Wolfe condition.\n    /// This parameter is valid only when the line-search\n    /// algorithm is used with the Wolfe condition.\n    /// The default value is \\c 0.9. This parameter should be greater\n    /// the \\ref ftol parameter and smaller than \\c 1.0.\n    ///\n    Scalar wolfe;\n\npublic:\n    ///\n    /// Constructor for L-BFGS-B parameters.\n    /// Default values for parameters will be set when the object is created.\n    ///\n    LBFGSBParam()\n    {\n        m              = 6;\n        epsilon        = Scalar(1e-5);\n        epsilon_rel    = Scalar(1e-5);\n        past           = 1;\n        delta          = Scalar(1e-10);\n        max_iterations = 0;\n        max_submin     = 10;\n        max_linesearch = 20;\n        min_step       = Scalar(1e-20);\n        max_step       = Scalar(1e+20);\n        ftol           = Scalar(1e-4);\n        wolfe          = Scalar(0.9);\n    }\n\n    ///\n    /// Checking the validity of L-BFGS-B parameters.\n    /// An `std::invalid_argument` exception will be thrown if some parameter\n    /// is invalid.\n    ///\n    inline void check_param() const\n    {\n        if(m <= 0)\n            throw std::invalid_argument(\"'m' must be positive\");\n        if(epsilon < 0)\n            throw std::invalid_argument(\"'epsilon' must be non-negative\");\n        if(epsilon_rel < 0)\n            throw std::invalid_argument(\"'epsilon_rel' must be non-negative\");\n        if(past < 0)\n            throw std::invalid_argument(\"'past' must be non-negative\");\n        if(delta < 0)\n            throw std::invalid_argument(\"'delta' must be non-negative\");\n        if(max_iterations < 0)\n            throw std::invalid_argument(\"'max_iterations' must be non-negative\");\n        if(max_submin < 0)\n            throw std::invalid_argument(\"'max_submin' must be non-negative\");\n        if(max_linesearch <= 0)\n            throw std::invalid_argument(\"'max_linesearch' must be positive\");\n        if(min_step < 0)\n            throw std::invalid_argument(\"'min_step' must be positive\");\n        if(max_step < min_step )\n            throw std::invalid_argument(\"'max_step' must be greater than 'min_step'\");\n        if(ftol <= 0 || ftol >= 0.5)\n            throw std::invalid_argument(\"'ftol' must satisfy 0 < ftol < 0.5\");\n        if(wolfe <= ftol || wolfe >= 1)\n            throw std::invalid_argument(\"'wolfe' must satisfy ftol < wolfe < 1\");\n    }\n};\n\n\n} // namespace LBFGSpp\n\n#endif // PARAM_H\n"
  },
  {
    "path": "external_libs/LBFGSpp/include/LBFGSpp/SubspaceMin.h",
    "content": "// Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>\n// Under MIT license\n\n#ifndef SUBSPACE_MIN_H\n#define SUBSPACE_MIN_H\n\n#include <stdexcept>\n#include <vector>\n#include <Eigen/Core>\n#include \"BFGSMat.h\"\n\n\n/// \\cond\n\nnamespace LBFGSpp {\n\n\n//\n// Subspace minimization procedure of the L-BFGS-B algorithm,\n// mainly for internal use.\n//\n// The target of subspace minimization is to minimize the quadratic function m(x)\n// over the free variables, subject to the bound condition.\n// Free variables stand for coordinates that are not at the boundary in xcp,\n// the generalized Cauchy point.\n//\n// In the classical implementation of L-BFGS-B [1], the minimization is done by first\n// ignoring the box constraints, followed by a line search. Our implementation is\n// an exact minimization subject to the bounds, based on the BOXCQP algorithm [2].\n//\n// Reference:\n// [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.\n// [2] C. Voglis and I. E. Lagaris (2004). BOXCQP: An algorithm for bound constrained convex quadratic problems.\n//\ntemplate <typename Scalar>\nclass SubspaceMin\n{\nprivate:\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;\n    typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;\n    typedef std::vector<int> IndexSet;\n\n    // v[ind]\n    static Vector subvec(const Vector& v, const IndexSet& ind)\n    {\n        const int nsub = ind.size();\n        Vector res(nsub);\n        for(int i = 0; i < nsub; i++)\n            res[i] = v[ind[i]];\n        return res;\n    }\n\n    // v[ind] = rhs\n    static void subvec_assign(Vector& v, const IndexSet& ind, const Vector& rhs)\n    {\n        const int nsub = ind.size();\n        for(int i = 0; i < nsub; i++)\n            v[ind[i]] = rhs[i];\n    }\n\n    // Check whether the vector is within the bounds\n    static bool in_bounds(const Vector& x, const Vector& lb, const Vector& ub)\n    {\n        const int n = x.size();\n        for(int i = 0; i < n; i++)\n        {\n            if(x[i] < lb[i] || x[i] > ub[i])\n                return false;\n        }\n        return true;\n    }\n\n    // Test convergence of P set\n    static bool P_converged(const IndexSet& yP_set, const Vector& vecy, const Vector& vecl, const Vector& vecu)\n    {\n        const int nP = yP_set.size();\n        for(int i = 0; i < nP; i++)\n        {\n            const int coord = yP_set[i];\n            if(vecy[coord] < vecl[coord] || vecy[coord] > vecu[coord])\n                return false;\n        }\n        return true;\n    }\n\n    // Test convergence of L set\n    static bool L_converged(const IndexSet& yL_set, const Vector& lambda)\n    {\n        const int nL = yL_set.size();\n        for(int i = 0; i < nL; i++)\n        {\n            const int coord = yL_set[i];\n            if(lambda[coord] < Scalar(0))\n                return false;\n        }\n        return true;\n    }\n\n    // Test convergence of L set\n    static bool U_converged(const IndexSet& yU_set, const Vector& mu)\n    {\n        const int nU = yU_set.size();\n        for(int i = 0; i < nU; i++)\n        {\n            const int coord = yU_set[i];\n            if(mu[coord] < Scalar(0))\n                return false;\n        }\n        return true;\n    }\n\npublic:\n    // bfgs:       An object that represents the BFGS approximation matrix.\n    // x0:         Current parameter vector.\n    // xcp:        Computed generalized Cauchy point.\n    // g:          Gradient at x0.\n    // lb:         Lower bounds for x.\n    // ub:         Upper bounds for x.\n    // Wd:         W'(xcp - x0)\n    // newact_set: Coordinates that newly become active during the GCP procedure.\n    // fv_set:     Free variable set.\n    // maxit:      Maximum number of iterations.\n    // drt:        The output direction vector, drt = xsm - x0.\n    static void subspace_minimize(\n        const BFGSMat<Scalar, true>& bfgs, const Vector& x0, const Vector& xcp, const Vector& g,\n        const Vector& lb, const Vector& ub, const Vector& Wd, const IndexSet& newact_set, const IndexSet& fv_set, int maxit,\n        Vector& drt\n    )\n    {\n        // std::cout << \"========================= Entering subspace minimization =========================\\n\\n\";\n\n        // d = xcp - x0\n        drt.noalias() = xcp - x0;\n        // Size of free variables\n        const int nfree = fv_set.size();\n        // If there is no free variable, simply return drt\n        if(nfree < 1)\n        {\n            // std::cout << \"========================= (Early) leaving subspace minimization =========================\\n\\n\";\n            return;\n        }\n\n        // std::cout << \"New active set = [ \"; for(std::size_t i = 0; i < newact_set.size(); i++)  std::cout << newact_set[i] << \" \"; std::cout << \"]\\n\";\n        // std::cout << \"Free variable set = [ \"; for(std::size_t i = 0; i < fv_set.size(); i++)  std::cout << fv_set[i] << \" \"; std::cout << \"]\\n\\n\";\n\n        // Extract the rows of W in the free variable set\n        Matrix WF = bfgs.Wb(fv_set);\n        // Compute F'BAb = -F'WMW'AA'd\n        Vector vecc(nfree);\n        bfgs.compute_FtBAb(WF, fv_set, newact_set, Wd, drt, vecc);\n        // Set the vector c=F'BAb+F'g for linear term, and vectors l and u for the new bounds\n        Vector vecl(nfree), vecu(nfree);\n        for(int i = 0; i < nfree; i++)\n        {\n            const int coord = fv_set[i];\n            vecl[i] = lb[coord] - x0[coord];\n            vecu[i] = ub[coord] - x0[coord];\n            vecc[i] += g[coord];\n        }\n        // Solve y = -inv(B[F, F]) * c\n        Vector vecy(nfree);\n        bfgs.solve_PtBP(WF, -vecc, vecy);\n        // Test feasibility\n        // If yes, then the solution has been found\n        if(in_bounds(vecy, vecl, vecu))\n        {\n            subvec_assign(drt, fv_set, vecy);\n            return;\n        }\n        // Otherwise, enter the iterations\n\n        // Make a copy of y as a fallback solution\n        Vector yfallback = vecy;\n        // Dual variables\n        Vector lambda = Vector::Zero(nfree), mu = Vector::Zero(nfree);\n\n        // Iterations\n        IndexSet L_set, U_set, P_set, yL_set, yU_set, yP_set;\n        L_set.reserve(nfree / 3); yL_set.reserve(nfree / 3);\n        U_set.reserve(nfree / 3); yU_set.reserve(nfree / 3);\n        P_set.reserve(nfree); yP_set.reserve(nfree);\n        int k;\n        for(k = 0; k < maxit; k++)\n        {\n            // Construct the L, U, and P sets, and then update values\n            // Indices in original drt vector\n            L_set.clear();\n            U_set.clear();\n            P_set.clear();\n            // Indices in y\n            yL_set.clear();\n            yU_set.clear();\n            yP_set.clear();\n            for(int i = 0; i < nfree; i++)\n            {\n                const int coord = fv_set[i];\n                const Scalar li = vecl[i], ui = vecu[i];\n                if( (vecy[i] < li) || (vecy[i] == li && lambda[i] >= Scalar(0)) )\n                {\n                    L_set.push_back(coord);\n                    yL_set.push_back(i);\n                    vecy[i] = li;\n                    mu[i] = Scalar(0);\n                } else if( (vecy[i] > ui) || (vecy[i] == ui && mu[i] >= Scalar(0)) ) {\n                    U_set.push_back(coord);\n                    yU_set.push_back(i);\n                    vecy[i] = ui;\n                    lambda[i] = Scalar(0);\n                } else {\n                    P_set.push_back(coord);\n                    yP_set.push_back(i);\n                    lambda[i] = Scalar(0);\n                    mu[i] = Scalar(0);\n                }\n            }\n\n            /* std::cout << \"** Iter \" << k << \" **\\n\";\n            std::cout << \"   L = [ \"; for(std::size_t i = 0; i < L_set.size(); i++)  std::cout << L_set[i] << \" \"; std::cout << \"]\\n\";\n            std::cout << \"   U = [ \"; for(std::size_t i = 0; i < U_set.size(); i++)  std::cout << U_set[i] << \" \"; std::cout << \"]\\n\";\n            std::cout << \"   P = [ \"; for(std::size_t i = 0; i < P_set.size(); i++)  std::cout << P_set[i] << \" \"; std::cout << \"]\\n\\n\"; */\n\n            // Extract the rows of W in the P set\n            Matrix WP = bfgs.Wb(P_set);\n            // Solve y[P] = -inv(B[P, P]) * (B[P, L] * l[L] + B[P, U] * u[U] + c[P])\n            const int nP = P_set.size();\n            if(nP > 0)\n            {\n                Vector rhs = subvec(vecc, yP_set);\n                Vector lL = subvec(vecl, yL_set);\n                Vector uU = subvec(vecu, yU_set);\n                Vector tmp(nP);\n                bool nonzero = bfgs.apply_PtBQv(WP, L_set, lL, tmp, true);\n                if(nonzero)\n                    rhs.noalias() += tmp;\n                nonzero = bfgs.apply_PtBQv(WP, U_set, uU, tmp, true);\n                if(nonzero)\n                    rhs.noalias() += tmp;\n\n                bfgs.solve_PtBP(WP, -rhs, tmp);\n                subvec_assign(vecy, yP_set, tmp);\n            }\n\n            // Solve lambda[L] = B[L, F] * y + c[L]\n            const int nL = L_set.size();\n            const int nU = U_set.size();\n            Vector Fy;\n            if(nL > 0 || nU > 0)\n                bfgs.apply_WtPv(fv_set, vecy, Fy);\n            if(nL > 0)\n            {\n                Vector res;\n                bfgs.apply_PtWMv(L_set, Fy, res, Scalar(-1));\n                res.noalias() += subvec(vecc, yL_set);\n                subvec_assign(lambda, yL_set, res);\n            }\n\n            // Solve mu[U] = -B[U, F] * y - c[U]\n            if(nU > 0)\n            {\n                Vector res;\n                bfgs.apply_PtWMv(U_set, Fy, res, Scalar(-1));\n                res.noalias() = -res - subvec(vecc, yU_set);\n                subvec_assign(mu, yU_set, res);\n            }\n\n            // Test convergence\n            if( L_converged(yL_set, lambda) && U_converged(yU_set, mu) && P_converged(yP_set, vecy, vecl, vecu) )\n                break;\n        }\n\n        // If the iterations do not converge, try the projection\n        if(k >= maxit)\n        {\n            vecy.noalias() = vecy.cwiseMax(vecl).cwiseMin(vecu);\n            subvec_assign(drt, fv_set, vecy);\n            // Test whether drt is a descent direction\n            Scalar dg = drt.dot(g);\n            // If yes, return the result\n            if(dg <= -std::numeric_limits<Scalar>::epsilon())\n                return;\n\n            // If not, fall back to the projected unconstrained solution\n            vecy.noalias() = yfallback.cwiseMax(vecl).cwiseMin(vecu);\n            subvec_assign(drt, fv_set, vecy);\n            dg = drt.dot(g);\n            if(dg <= -std::numeric_limits<Scalar>::epsilon())\n                return;\n\n            // If still not, fall back to the unconstrained solution\n            subvec_assign(drt, fv_set, yfallback);\n            return;\n        }\n\n        // std::cout << \"** Minimization finished in \" << k + 1 << \" iteration(s) **\\n\\n\";\n        // std::cout << \"========================= Leaving subspace minimization =========================\\n\\n\";\n\n        subvec_assign(drt, fv_set, vecy);\n    }\n};\n\n\n} // namespace LBFGSpp\n\n/// \\endcond\n\n#endif // SUBSPACE_MIN_H\n"
  },
  {
    "path": "external_libs/cxxopts/LICENSE",
    "content": "Copyright (c) 2014 Jarryd Beck\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
  },
  {
    "path": "external_libs/cxxopts/include/cxxopts.hpp",
    "content": "/*\n\nCopyright (c) 2014, 2015, 2016, 2017 Jarryd Beck\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n\n*/\n\n#ifndef CXXOPTS_HPP_INCLUDED\n#define CXXOPTS_HPP_INCLUDED\n\n#include <cctype>\n#include <cstring>\n#include <exception>\n#include <iostream>\n#include <limits>\n#include <list>\n#include <map>\n#include <memory>\n#include <sstream>\n#include <string>\n#include <unordered_map>\n#include <unordered_set>\n#include <utility>\n#include <vector>\n#include <algorithm>\n\n#if defined(__GNUC__) && !defined(__clang__)\n#  if (__GNUC__ * 10 + __GNUC_MINOR__) < 49\n#    define CXXOPTS_NO_REGEX true\n#  endif\n#endif\n\n#ifndef CXXOPTS_NO_REGEX\n#  include <regex>\n#endif  // CXXOPTS_NO_REGEX\n\n// Nonstandard before C++17, which is coincidentally what we also need for <optional>\n#ifdef __has_include\n#  if __has_include(<optional>)\n#    include <optional>\n#    ifdef __cpp_lib_optional\n#      define CXXOPTS_HAS_OPTIONAL\n#    endif\n#  endif\n#endif\n\n#if __cplusplus >= 201603L\n#define CXXOPTS_NODISCARD [[nodiscard]]\n#else\n#define CXXOPTS_NODISCARD\n#endif\n\n#ifndef CXXOPTS_VECTOR_DELIMITER\n#define CXXOPTS_VECTOR_DELIMITER ','\n#endif\n\n#define CXXOPTS__VERSION_MAJOR 3\n#define CXXOPTS__VERSION_MINOR 0\n#define CXXOPTS__VERSION_PATCH 0\n\n#if (__GNUC__ < 10 || (__GNUC__ == 10 && __GNUC_MINOR__ < 1)) && __GNUC__ >= 6\n  #define CXXOPTS_NULL_DEREF_IGNORE\n#endif\n\nnamespace cxxopts\n{\n  static constexpr struct {\n    uint8_t major, minor, patch;\n  } version = {\n    CXXOPTS__VERSION_MAJOR,\n    CXXOPTS__VERSION_MINOR,\n    CXXOPTS__VERSION_PATCH\n  };\n} // namespace cxxopts\n\n//when we ask cxxopts to use Unicode, help strings are processed using ICU,\n//which results in the correct lengths being computed for strings when they\n//are formatted for the help output\n//it is necessary to make sure that <unicode/unistr.h> can be found by the\n//compiler, and that icu-uc is linked in to the binary.\n\n#ifdef CXXOPTS_USE_UNICODE\n#include <unicode/unistr.h>\n\nnamespace cxxopts\n{\n  using String = icu::UnicodeString;\n\n  inline\n  String\n  toLocalString(std::string s)\n  {\n    return icu::UnicodeString::fromUTF8(std::move(s));\n  }\n\n#if defined(__GNUC__)\n// GNU GCC with -Weffc++ will issue a warning regarding the upcoming class, we want to silence it:\n// warning: base class 'class std::enable_shared_from_this<cxxopts::Value>' has accessible non-virtual destructor\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wnon-virtual-dtor\"\n#pragma GCC diagnostic ignored \"-Weffc++\"\n// This will be ignored under other compilers like LLVM clang.\n#endif\n  class UnicodeStringIterator : public\n    std::iterator<std::forward_iterator_tag, int32_t>\n  {\n    public:\n\n    UnicodeStringIterator(const icu::UnicodeString* string, int32_t pos)\n    : s(string)\n    , i(pos)\n    {\n    }\n\n    value_type\n    operator*() const\n    {\n      return s->char32At(i);\n    }\n\n    bool\n    operator==(const UnicodeStringIterator& rhs) const\n    {\n      return s == rhs.s && i == rhs.i;\n    }\n\n    bool\n    operator!=(const UnicodeStringIterator& rhs) const\n    {\n      return !(*this == rhs);\n    }\n\n    UnicodeStringIterator&\n    operator++()\n    {\n      ++i;\n      return *this;\n    }\n\n    UnicodeStringIterator\n    operator+(int32_t v)\n    {\n      return UnicodeStringIterator(s, i + v);\n    }\n\n    private:\n    const icu::UnicodeString* s;\n    int32_t i;\n  };\n#if defined(__GNUC__)\n#pragma GCC diagnostic pop\n#endif\n\n  inline\n  String&\n  stringAppend(String&s, String a)\n  {\n    return s.append(std::move(a));\n  }\n\n  inline\n  String&\n  stringAppend(String& s, size_t n, UChar32 c)\n  {\n    for (size_t i = 0; i != n; ++i)\n    {\n      s.append(c);\n    }\n\n    return s;\n  }\n\n  template <typename Iterator>\n  String&\n  stringAppend(String& s, Iterator begin, Iterator end)\n  {\n    while (begin != end)\n    {\n      s.append(*begin);\n      ++begin;\n    }\n\n    return s;\n  }\n\n  inline\n  size_t\n  stringLength(const String& s)\n  {\n    return s.length();\n  }\n\n  inline\n  std::string\n  toUTF8String(const String& s)\n  {\n    std::string result;\n    s.toUTF8String(result);\n\n    return result;\n  }\n\n  inline\n  bool\n  empty(const String& s)\n  {\n    return s.isEmpty();\n  }\n}\n\nnamespace std\n{\n  inline\n  cxxopts::UnicodeStringIterator\n  begin(const icu::UnicodeString& s)\n  {\n    return cxxopts::UnicodeStringIterator(&s, 0);\n  }\n\n  inline\n  cxxopts::UnicodeStringIterator\n  end(const icu::UnicodeString& s)\n  {\n    return cxxopts::UnicodeStringIterator(&s, s.length());\n  }\n}\n\n//ifdef CXXOPTS_USE_UNICODE\n#else\n\nnamespace cxxopts\n{\n  using String = std::string;\n\n  template <typename T>\n  T\n  toLocalString(T&& t)\n  {\n    return std::forward<T>(t);\n  }\n\n  inline\n  size_t\n  stringLength(const String& s)\n  {\n    return s.length();\n  }\n\n  inline\n  String&\n  stringAppend(String&s, const String& a)\n  {\n    return s.append(a);\n  }\n\n  inline\n  String&\n  stringAppend(String& s, size_t n, char c)\n  {\n    return s.append(n, c);\n  }\n\n  template <typename Iterator>\n  String&\n  stringAppend(String& s, Iterator begin, Iterator end)\n  {\n    return s.append(begin, end);\n  }\n\n  template <typename T>\n  std::string\n  toUTF8String(T&& t)\n  {\n    return std::forward<T>(t);\n  }\n\n  inline\n  bool\n  empty(const std::string& s)\n  {\n    return s.empty();\n  }\n} // namespace cxxopts\n\n//ifdef CXXOPTS_USE_UNICODE\n#endif\n\nnamespace cxxopts\n{\n  namespace\n  {\n#ifdef _WIN32\n    const std::string LQUOTE(\"\\'\");\n    const std::string RQUOTE(\"\\'\");\n#else\n    const std::string LQUOTE(\"‘\");\n    const std::string RQUOTE(\"’\");\n#endif\n  } // namespace\n\n#if defined(__GNUC__)\n// GNU GCC with -Weffc++ will issue a warning regarding the upcoming class, we want to silence it:\n// warning: base class 'class std::enable_shared_from_this<cxxopts::Value>' has accessible non-virtual destructor\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wnon-virtual-dtor\"\n#pragma GCC diagnostic ignored \"-Weffc++\"\n// This will be ignored under other compilers like LLVM clang.\n#endif\n  class Value : public std::enable_shared_from_this<Value>\n  {\n    public:\n\n    virtual ~Value() = default;\n\n    virtual\n    std::shared_ptr<Value>\n    clone() const = 0;\n\n    virtual void\n    parse(const std::string& text) const = 0;\n\n    virtual void\n    parse() const = 0;\n\n    virtual bool\n    has_default() const = 0;\n\n    virtual bool\n    is_container() const = 0;\n\n    virtual bool\n    has_implicit() const = 0;\n\n    virtual std::string\n    get_default_value() const = 0;\n\n    virtual std::string\n    get_implicit_value() const = 0;\n\n    virtual std::shared_ptr<Value>\n    default_value(const std::string& value) = 0;\n\n    virtual std::shared_ptr<Value>\n    implicit_value(const std::string& value) = 0;\n\n    virtual std::shared_ptr<Value>\n    no_implicit_value() = 0;\n\n    virtual bool\n    is_boolean() const = 0;\n  };\n#if defined(__GNUC__)\n#pragma GCC diagnostic pop\n#endif\n  class OptionException : public std::exception\n  {\n    public:\n    explicit OptionException(std::string  message)\n    : m_message(std::move(message))\n    {\n    }\n\n    CXXOPTS_NODISCARD\n    const char*\n    what() const noexcept override\n    {\n      return m_message.c_str();\n    }\n\n    private:\n    std::string m_message;\n  };\n\n  class OptionSpecException : public OptionException\n  {\n    public:\n\n    explicit OptionSpecException(const std::string& message)\n    : OptionException(message)\n    {\n    }\n  };\n\n  class OptionParseException : public OptionException\n  {\n    public:\n    explicit OptionParseException(const std::string& message)\n    : OptionException(message)\n    {\n    }\n  };\n\n  class option_exists_error : public OptionSpecException\n  {\n    public:\n    explicit option_exists_error(const std::string& option)\n    : OptionSpecException(\"Option \" + LQUOTE + option + RQUOTE + \" already exists\")\n    {\n    }\n  };\n\n  class invalid_option_format_error : public OptionSpecException\n  {\n    public:\n    explicit invalid_option_format_error(const std::string& format)\n    : OptionSpecException(\"Invalid option format \" + LQUOTE + format + RQUOTE)\n    {\n    }\n  };\n\n  class option_syntax_exception : public OptionParseException {\n    public:\n    explicit option_syntax_exception(const std::string& text)\n    : OptionParseException(\"Argument \" + LQUOTE + text + RQUOTE +\n        \" starts with a - but has incorrect syntax\")\n    {\n    }\n  };\n\n  class option_not_exists_exception : public OptionParseException\n  {\n    public:\n    explicit option_not_exists_exception(const std::string& option)\n    : OptionParseException(\"Option \" + LQUOTE + option + RQUOTE + \" does not exist\")\n    {\n    }\n  };\n\n  class missing_argument_exception : public OptionParseException\n  {\n    public:\n    explicit missing_argument_exception(const std::string& option)\n    : OptionParseException(\n        \"Option \" + LQUOTE + option + RQUOTE + \" is missing an argument\"\n      )\n    {\n    }\n  };\n\n  class option_requires_argument_exception : public OptionParseException\n  {\n    public:\n    explicit option_requires_argument_exception(const std::string& option)\n    : OptionParseException(\n        \"Option \" + LQUOTE + option + RQUOTE + \" requires an argument\"\n      )\n    {\n    }\n  };\n\n  class option_not_has_argument_exception : public OptionParseException\n  {\n    public:\n    option_not_has_argument_exception\n    (\n      const std::string& option,\n      const std::string& arg\n    )\n    : OptionParseException(\n        \"Option \" + LQUOTE + option + RQUOTE +\n        \" does not take an argument, but argument \" +\n        LQUOTE + arg + RQUOTE + \" given\"\n      )\n    {\n    }\n  };\n\n  class option_not_present_exception : public OptionParseException\n  {\n    public:\n    explicit option_not_present_exception(const std::string& option)\n    : OptionParseException(\"Option \" + LQUOTE + option + RQUOTE + \" not present\")\n    {\n    }\n  };\n\n  class option_has_no_value_exception : public OptionException\n  {\n    public:\n    explicit option_has_no_value_exception(const std::string& option)\n    : OptionException(\n        !option.empty() ?\n        (\"Option \" + LQUOTE + option + RQUOTE + \" has no value\") :\n        \"Option has no value\")\n    {\n    }\n  };\n\n  class argument_incorrect_type : public OptionParseException\n  {\n    public:\n    explicit argument_incorrect_type\n    (\n      const std::string& arg\n    )\n    : OptionParseException(\n        \"Argument \" + LQUOTE + arg + RQUOTE + \" failed to parse\"\n      )\n    {\n    }\n  };\n\n  class option_required_exception : public OptionParseException\n  {\n    public:\n    explicit option_required_exception(const std::string& option)\n    : OptionParseException(\n        \"Option \" + LQUOTE + option + RQUOTE + \" is required but not present\"\n      )\n    {\n    }\n  };\n\n  template <typename T>\n  void throw_or_mimic(const std::string& text)\n  {\n    static_assert(std::is_base_of<std::exception, T>::value,\n                  \"throw_or_mimic only works on std::exception and \"\n                  \"deriving classes\");\n\n#ifndef CXXOPTS_NO_EXCEPTIONS\n    // If CXXOPTS_NO_EXCEPTIONS is not defined, just throw\n    throw T{text};\n#else\n    // Otherwise manually instantiate the exception, print what() to stderr,\n    // and exit\n    T exception{text};\n    std::cerr << exception.what() << std::endl;\n    std::exit(EXIT_FAILURE);\n#endif\n  }\n\n  namespace values\n  {\n    namespace parser_tool\n    {\n      struct IntegerDesc\n      {\n        std::string negative = \"\";\n        std::string base     = \"\";\n        std::string value    = \"\";\n      };\n      struct ArguDesc {\n        std::string arg_name  = \"\";\n        bool        grouping  = false;\n        bool        set_value = false;\n        std::string value     = \"\";\n      };\n#ifdef CXXOPTS_NO_REGEX\n      inline IntegerDesc SplitInteger(const std::string &text)\n      {\n        if (text.empty())\n        {\n          throw_or_mimic<argument_incorrect_type>(text);\n        }\n        IntegerDesc desc;\n        const char *pdata = text.c_str();\n        if (*pdata == '-')\n        {\n          pdata += 1;\n          desc.negative = \"-\";\n        }\n        if (strncmp(pdata, \"0x\", 2) == 0)\n        {\n          pdata += 2;\n          desc.base = \"0x\";\n        }\n        if (*pdata != '\\0')\n        {\n          desc.value = std::string(pdata);\n        }\n        else\n        {\n          throw_or_mimic<argument_incorrect_type>(text);\n        }\n        return desc;\n      }\n\n      inline bool IsTrueText(const std::string &text)\n      {\n        const char *pdata = text.c_str();\n        if (*pdata == 't' || *pdata == 'T')\n        {\n          pdata += 1;\n          if (strncmp(pdata, \"rue\\0\", 4) == 0)\n          {\n            return true;\n          }\n        }\n        else if (strncmp(pdata, \"1\\0\", 2) == 0)\n        {\n          return true;\n        }\n        return false;\n      }\n\n      inline bool IsFalseText(const std::string &text)\n      {\n        const char *pdata = text.c_str();\n        if (*pdata == 'f' || *pdata == 'F')\n        {\n          pdata += 1;\n          if (strncmp(pdata, \"alse\\0\", 5) == 0)\n          {\n            return true;\n          }\n        }\n        else if (strncmp(pdata, \"0\\0\", 2) == 0)\n        {\n          return true;\n        }\n        return false;\n      }\n\n      inline std::pair<std::string, std::string> SplitSwitchDef(const std::string &text)\n      {\n        std::string short_sw, long_sw;\n        const char *pdata = text.c_str();\n        if (isalnum(*pdata) && *(pdata + 1) == ',') {\n          short_sw = std::string(1, *pdata);\n          pdata += 2;\n        }\n        while (*pdata == ' ') { pdata += 1; }\n        if (isalnum(*pdata)) {\n          const char *store = pdata;\n          pdata += 1;\n          while (isalnum(*pdata) || *pdata == '-' || *pdata == '_') {\n            pdata += 1;\n          }\n          if (*pdata == '\\0') {\n            long_sw = std::string(store, pdata - store);\n          } else {\n            throw_or_mimic<invalid_option_format_error>(text);\n          }\n        }\n        return std::pair<std::string, std::string>(short_sw, long_sw);\n      }\n\n      inline ArguDesc ParseArgument(const char *arg, bool &matched)\n      {\n        ArguDesc argu_desc;\n        const char *pdata = arg;\n        matched = false;\n        if (strncmp(pdata, \"--\", 2) == 0)\n        {\n          pdata += 2;\n          if (isalnum(*pdata))\n          {\n            argu_desc.arg_name.push_back(*pdata);\n            pdata += 1;\n            while (isalnum(*pdata) || *pdata == '-' || *pdata == '_')\n            {\n              argu_desc.arg_name.push_back(*pdata);\n              pdata += 1;\n            }\n            if (argu_desc.arg_name.length() > 1)\n            {\n              if (*pdata == '=')\n              {\n                argu_desc.set_value = true;\n                pdata += 1;\n                if (*pdata != '\\0')\n                {\n                  argu_desc.value = std::string(pdata);\n                }\n                matched = true;\n              }\n              else if (*pdata == '\\0')\n              {\n                matched = true;\n              }\n            }\n          }\n        }\n        else if (strncmp(pdata, \"-\", 1) == 0)\n        {\n          pdata += 1;\n          argu_desc.grouping = true;\n          while (isalnum(*pdata))\n          {\n            argu_desc.arg_name.push_back(*pdata);\n            pdata += 1;\n          }\n          matched = !argu_desc.arg_name.empty() && *pdata == '\\0';\n        }\n        return argu_desc;\n      }\n\n#else  // CXXOPTS_NO_REGEX\n\n      namespace\n      {\n\n        std::basic_regex<char> integer_pattern\n          (\"(-)?(0x)?([0-9a-zA-Z]+)|((0x)?0)\");\n        std::basic_regex<char> truthy_pattern\n          (\"(t|T)(rue)?|1\");\n        std::basic_regex<char> falsy_pattern\n          (\"(f|F)(alse)?|0\");\n\n        std::basic_regex<char> option_matcher\n          (\"--([[:alnum:]][-_[:alnum:]]+)(=(.*))?|-([[:alnum:]]+)\");\n        std::basic_regex<char> option_specifier\n          (\"(([[:alnum:]]),)?[ ]*([[:alnum:]][-_[:alnum:]]*)?\");\n\n      } // namespace\n\n      inline IntegerDesc SplitInteger(const std::string &text)\n      {\n        std::smatch match;\n        std::regex_match(text, match, integer_pattern);\n\n        if (match.length() == 0)\n        {\n          throw_or_mimic<argument_incorrect_type>(text);\n        }\n\n        IntegerDesc desc;\n        desc.negative = match[1];\n        desc.base = match[2];\n        desc.value = match[3];\n\n        if (match.length(4) > 0)\n        {\n          desc.base = match[5];\n          desc.value = \"0\";\n          return desc;\n        }\n\n        return desc;\n      }\n\n      inline bool IsTrueText(const std::string &text)\n      {\n        std::smatch result;\n        std::regex_match(text, result, truthy_pattern);\n        return !result.empty();\n      }\n\n      inline bool IsFalseText(const std::string &text)\n      {\n        std::smatch result;\n        std::regex_match(text, result, falsy_pattern);\n        return !result.empty();\n      }\n\n      inline std::pair<std::string, std::string> SplitSwitchDef(const std::string &text)\n      {\n        std::match_results<const char*> result;\n        std::regex_match(text.c_str(), result, option_specifier);\n        if (result.empty())\n        {\n          throw_or_mimic<invalid_option_format_error>(text);\n        }\n\n        const std::string& short_sw = result[2];\n        const std::string& long_sw = result[3];\n\n        return std::pair<std::string, std::string>(short_sw, long_sw);\n      }\n\n      inline ArguDesc ParseArgument(const char *arg, bool &matched)\n      {\n        std::match_results<const char*> result;\n        std::regex_match(arg, result, option_matcher);\n        matched = !result.empty();\n\n        ArguDesc argu_desc;\n        if (matched) {\n          argu_desc.arg_name = result[1].str();\n          argu_desc.set_value = result[2].length() > 0;\n          argu_desc.value = result[3].str();\n          if (result[4].length() > 0)\n          {\n            argu_desc.grouping = true;\n            argu_desc.arg_name = result[4].str();\n          }\n        }\n\n        return argu_desc;\n      }\n\n#endif  // CXXOPTS_NO_REGEX\n#undef CXXOPTS_NO_REGEX\n  }\n\n    namespace detail\n    {\n      template <typename T, bool B>\n      struct SignedCheck;\n\n      template <typename T>\n      struct SignedCheck<T, true>\n      {\n        template <typename U>\n        void\n        operator()(bool negative, U u, const std::string& text)\n        {\n          if (negative)\n          {\n            if (u > static_cast<U>((std::numeric_limits<T>::min)()))\n            {\n              throw_or_mimic<argument_incorrect_type>(text);\n            }\n          }\n          else\n          {\n            if (u > static_cast<U>((std::numeric_limits<T>::max)()))\n            {\n              throw_or_mimic<argument_incorrect_type>(text);\n            }\n          }\n        }\n      };\n\n      template <typename T>\n      struct SignedCheck<T, false>\n      {\n        template <typename U>\n        void\n        operator()(bool, U, const std::string&) const {}\n      };\n\n      template <typename T, typename U>\n      void\n      check_signed_range(bool negative, U value, const std::string& text)\n      {\n        SignedCheck<T, std::numeric_limits<T>::is_signed>()(negative, value, text);\n      }\n    } // namespace detail\n\n    template <typename R, typename T>\n    void\n    checked_negate(R& r, T&& t, const std::string&, std::true_type)\n    {\n      // if we got to here, then `t` is a positive number that fits into\n      // `R`. So to avoid MSVC C4146, we first cast it to `R`.\n      // See https://github.com/jarro2783/cxxopts/issues/62 for more details.\n      r = static_cast<R>(-static_cast<R>(t-1)-1);\n    }\n\n    template <typename R, typename T>\n    void\n    checked_negate(R&, T&&, const std::string& text, std::false_type)\n    {\n      throw_or_mimic<argument_incorrect_type>(text);\n    }\n\n    template <typename T>\n    void\n    integer_parser(const std::string& text, T& value)\n    {\n      parser_tool::IntegerDesc int_desc = parser_tool::SplitInteger(text);\n\n      using US = typename std::make_unsigned<T>::type;\n      constexpr bool is_signed = std::numeric_limits<T>::is_signed;\n\n      const bool          negative    = int_desc.negative.length() > 0;\n      const uint8_t       base        = int_desc.base.length() > 0 ? 16 : 10;\n      const std::string & value_match = int_desc.value;\n\n      US result = 0;\n\n      for (char ch : value_match)\n      {\n        US digit = 0;\n\n        if (ch >= '0' && ch <= '9')\n        {\n          digit = static_cast<US>(ch - '0');\n        }\n        else if (base == 16 && ch >= 'a' && ch <= 'f')\n        {\n          digit = static_cast<US>(ch - 'a' + 10);\n        }\n        else if (base == 16 && ch >= 'A' && ch <= 'F')\n        {\n          digit = static_cast<US>(ch - 'A' + 10);\n        }\n        else\n        {\n          throw_or_mimic<argument_incorrect_type>(text);\n        }\n\n        const US next = static_cast<US>(result * base + digit);\n        if (result > next)\n        {\n          throw_or_mimic<argument_incorrect_type>(text);\n        }\n\n        result = next;\n      }\n\n      detail::check_signed_range<T>(negative, result, text);\n\n      if (negative)\n      {\n        checked_negate<T>(value, result, text, std::integral_constant<bool, is_signed>());\n      }\n      else\n      {\n        value = static_cast<T>(result);\n      }\n    }\n\n    template <typename T>\n    void stringstream_parser(const std::string& text, T& value)\n    {\n      std::stringstream in(text);\n      in >> value;\n      if (!in) {\n        throw_or_mimic<argument_incorrect_type>(text);\n      }\n    }\n\n    template <typename T,\n             typename std::enable_if<std::is_integral<T>::value>::type* = nullptr\n             >\n    void parse_value(const std::string& text, T& value)\n    {\n        integer_parser(text, value);\n    }\n\n    inline\n    void\n    parse_value(const std::string& text, bool& value)\n    {\n      if (parser_tool::IsTrueText(text))\n      {\n        value = true;\n        return;\n      }\n\n      if (parser_tool::IsFalseText(text))\n      {\n        value = false;\n        return;\n      }\n\n      throw_or_mimic<argument_incorrect_type>(text);\n    }\n\n    inline\n    void\n    parse_value(const std::string& text, std::string& value)\n    {\n      value = text;\n    }\n\n    // The fallback parser. It uses the stringstream parser to parse all types\n    // that have not been overloaded explicitly.  It has to be placed in the\n    // source code before all other more specialized templates.\n    template <typename T,\n             typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr\n             >\n    void\n    parse_value(const std::string& text, T& value) {\n      stringstream_parser(text, value);\n    }\n\n    template <typename T>\n    void\n    parse_value(const std::string& text, std::vector<T>& value)\n    {\n      if (text.empty()) {\n        T v;\n        parse_value(text, v);\n        value.emplace_back(std::move(v));\n        return;\n      }\n      std::stringstream in(text);\n      std::string token;\n      while(!in.eof() && std::getline(in, token, CXXOPTS_VECTOR_DELIMITER)) {\n        T v;\n        parse_value(token, v);\n        value.emplace_back(std::move(v));\n      }\n    }\n\n#ifdef CXXOPTS_HAS_OPTIONAL\n    template <typename T>\n    void\n    parse_value(const std::string& text, std::optional<T>& value)\n    {\n      T result;\n      parse_value(text, result);\n      value = std::move(result);\n    }\n#endif\n\n    inline\n    void parse_value(const std::string& text, char& c)\n    {\n      if (text.length() != 1)\n      {\n        throw_or_mimic<argument_incorrect_type>(text);\n      }\n\n      c = text[0];\n    }\n\n    template <typename T>\n    struct type_is_container\n    {\n      static constexpr bool value = false;\n    };\n\n    template <typename T>\n    struct type_is_container<std::vector<T>>\n    {\n      static constexpr bool value = true;\n    };\n\n    template <typename T>\n    class abstract_value : public Value\n    {\n      using Self = abstract_value<T>;\n\n      public:\n      abstract_value()\n      : m_result(std::make_shared<T>())\n      , m_store(m_result.get())\n      {\n      }\n\n      explicit abstract_value(T* t)\n      : m_store(t)\n      {\n      }\n\n      ~abstract_value() override = default;\n\n      abstract_value& operator=(const abstract_value&) = default;\n\n      abstract_value(const abstract_value& rhs)\n      {\n        if (rhs.m_result)\n        {\n          m_result = std::make_shared<T>();\n          m_store = m_result.get();\n        }\n        else\n        {\n          m_store = rhs.m_store;\n        }\n\n        m_default = rhs.m_default;\n        m_implicit = rhs.m_implicit;\n        m_default_value = rhs.m_default_value;\n        m_implicit_value = rhs.m_implicit_value;\n      }\n\n      void\n      parse(const std::string& text) const override\n      {\n        parse_value(text, *m_store);\n      }\n\n      bool\n      is_container() const override\n      {\n        return type_is_container<T>::value;\n      }\n\n      void\n      parse() const override\n      {\n        parse_value(m_default_value, *m_store);\n      }\n\n      bool\n      has_default() const override\n      {\n        return m_default;\n      }\n\n      bool\n      has_implicit() const override\n      {\n        return m_implicit;\n      }\n\n      std::shared_ptr<Value>\n      default_value(const std::string& value) override\n      {\n        m_default = true;\n        m_default_value = value;\n        return shared_from_this();\n      }\n\n      std::shared_ptr<Value>\n      implicit_value(const std::string& value) override\n      {\n        m_implicit = true;\n        m_implicit_value = value;\n        return shared_from_this();\n      }\n\n      std::shared_ptr<Value>\n      no_implicit_value() override\n      {\n        m_implicit = false;\n        return shared_from_this();\n      }\n\n      std::string\n      get_default_value() const override\n      {\n        return m_default_value;\n      }\n\n      std::string\n      get_implicit_value() const override\n      {\n        return m_implicit_value;\n      }\n\n      bool\n      is_boolean() const override\n      {\n        return std::is_same<T, bool>::value;\n      }\n\n      const T&\n      get() const\n      {\n        if (m_store == nullptr)\n        {\n          return *m_result;\n        }\n        return *m_store;\n      }\n\n      protected:\n      std::shared_ptr<T> m_result{};\n      T* m_store{};\n\n      bool m_default = false;\n      bool m_implicit = false;\n\n      std::string m_default_value{};\n      std::string m_implicit_value{};\n    };\n\n    template <typename T>\n    class standard_value : public abstract_value<T>\n    {\n      public:\n      using abstract_value<T>::abstract_value;\n\n      CXXOPTS_NODISCARD\n      std::shared_ptr<Value>\n      clone() const override\n      {\n        return std::make_shared<standard_value<T>>(*this);\n      }\n    };\n\n    template <>\n    class standard_value<bool> : public abstract_value<bool>\n    {\n      public:\n      ~standard_value() override = default;\n\n      standard_value()\n      {\n        set_default_and_implicit();\n      }\n\n      explicit standard_value(bool* b)\n      : abstract_value(b)\n      {\n        set_default_and_implicit();\n      }\n\n      std::shared_ptr<Value>\n      clone() const override\n      {\n        return std::make_shared<standard_value<bool>>(*this);\n      }\n\n      private:\n\n      void\n      set_default_and_implicit()\n      {\n        m_default = true;\n        m_default_value = \"false\";\n        m_implicit = true;\n        m_implicit_value = \"true\";\n      }\n    };\n  } // namespace values\n\n  template <typename T>\n  std::shared_ptr<Value>\n  value()\n  {\n    return std::make_shared<values::standard_value<T>>();\n  }\n\n  template <typename T>\n  std::shared_ptr<Value>\n  value(T& t)\n  {\n    return std::make_shared<values::standard_value<T>>(&t);\n  }\n\n  class OptionAdder;\n\n  class OptionDetails\n  {\n    public:\n    OptionDetails\n    (\n      std::string short_,\n      std::string long_,\n      String desc,\n      std::shared_ptr<const Value> val\n    )\n    : m_short(std::move(short_))\n    , m_long(std::move(long_))\n    , m_desc(std::move(desc))\n    , m_value(std::move(val))\n    , m_count(0)\n    {\n      m_hash = std::hash<std::string>{}(m_long + m_short);\n    }\n\n    OptionDetails(const OptionDetails& rhs)\n    : m_desc(rhs.m_desc)\n    , m_value(rhs.m_value->clone())\n    , m_count(rhs.m_count)\n    {\n    }\n\n    OptionDetails(OptionDetails&& rhs) = default;\n\n    CXXOPTS_NODISCARD\n    const String&\n    description() const\n    {\n      return m_desc;\n    }\n\n    CXXOPTS_NODISCARD\n    const Value&\n    value() const {\n        return *m_value;\n    }\n\n    CXXOPTS_NODISCARD\n    std::shared_ptr<Value>\n    make_storage() const\n    {\n      return m_value->clone();\n    }\n\n    CXXOPTS_NODISCARD\n    const std::string&\n    short_name() const\n    {\n      return m_short;\n    }\n\n    CXXOPTS_NODISCARD\n    const std::string&\n    long_name() const\n    {\n      return m_long;\n    }\n\n    CXXOPTS_NODISCARD\n    const std::string&\n    essential_name() const\n    {\n      return m_long.empty() ? m_short : m_long;\n    }\n\n    size_t\n    hash() const\n    {\n      return m_hash;\n    }\n\n    private:\n    std::string m_short{};\n    std::string m_long{};\n    String m_desc{};\n    std::shared_ptr<const Value> m_value{};\n    int m_count;\n\n    size_t m_hash{};\n  };\n\n  struct HelpOptionDetails\n  {\n    std::string s;\n    std::string l;\n    String desc;\n    bool has_default;\n    std::string default_value;\n    bool has_implicit;\n    std::string implicit_value;\n    std::string arg_help;\n    bool is_container;\n    bool is_boolean;\n  };\n\n  struct HelpGroupDetails\n  {\n    std::string name{};\n    std::string description{};\n    std::vector<HelpOptionDetails> options{};\n  };\n\n  class OptionValue\n  {\n    public:\n    void\n    parse\n    (\n      const std::shared_ptr<const OptionDetails>& details,\n      const std::string& text\n    )\n    {\n      ensure_value(details);\n      ++m_count;\n      m_value->parse(text);\n      m_long_name = &details->long_name();\n    }\n\n    void\n    parse_default(const std::shared_ptr<const OptionDetails>& details)\n    {\n      ensure_value(details);\n      m_default = true;\n      m_long_name = &details->long_name();\n      m_value->parse();\n    }\n\n    void\n    parse_no_value(const std::shared_ptr<const OptionDetails>& details)\n    {\n      m_long_name = &details->long_name();\n    }\n\n#if defined(CXXOPTS_NULL_DEREF_IGNORE)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wnull-dereference\"\n#endif\n\n    CXXOPTS_NODISCARD\n    size_t\n    count() const noexcept\n    {\n      return m_count;\n    }\n\n#if defined(CXXOPTS_NULL_DEREF_IGNORE)\n#pragma GCC diagnostic pop\n#endif\n\n    // TODO: maybe default options should count towards the number of arguments\n    CXXOPTS_NODISCARD\n    bool\n    has_default() const noexcept\n    {\n      return m_default;\n    }\n\n    template <typename T>\n    const T&\n    as() const\n    {\n      if (m_value == nullptr) {\n          throw_or_mimic<option_has_no_value_exception>(\n              m_long_name == nullptr ? \"\" : *m_long_name);\n      }\n\n#ifdef CXXOPTS_NO_RTTI\n      return static_cast<const values::standard_value<T>&>(*m_value).get();\n#else\n      return dynamic_cast<const values::standard_value<T>&>(*m_value).get();\n#endif\n    }\n\n    private:\n    void\n    ensure_value(const std::shared_ptr<const OptionDetails>& details)\n    {\n      if (m_value == nullptr)\n      {\n        m_value = details->make_storage();\n      }\n    }\n\n\n    const std::string* m_long_name = nullptr;\n    // Holding this pointer is safe, since OptionValue's only exist in key-value pairs,\n    // where the key has the string we point to.\n    std::shared_ptr<Value> m_value{};\n    size_t m_count = 0;\n    bool m_default = false;\n  };\n\n  class KeyValue\n  {\n    public:\n    KeyValue(std::string key_, std::string value_)\n    : m_key(std::move(key_))\n    , m_value(std::move(value_))\n    {\n    }\n\n    CXXOPTS_NODISCARD\n    const std::string&\n    key() const\n    {\n      return m_key;\n    }\n\n    CXXOPTS_NODISCARD\n    const std::string&\n    value() const\n    {\n      return m_value;\n    }\n\n    template <typename T>\n    T\n    as() const\n    {\n      T result;\n      values::parse_value(m_value, result);\n      return result;\n    }\n\n    private:\n    std::string m_key;\n    std::string m_value;\n  };\n\n  using ParsedHashMap = std::unordered_map<size_t, OptionValue>;\n  using NameHashMap = std::unordered_map<std::string, size_t>;\n\n  class ParseResult\n  {\n    public:\n    class Iterator\n    {\n      public:\n      using iterator_category = std::forward_iterator_tag;\n      using value_type = KeyValue;\n      using difference_type = void;\n      using pointer = const KeyValue*;\n      using reference = const KeyValue&;\n\n      Iterator() = default;\n      Iterator(const Iterator&) = default;\n\n      Iterator(const ParseResult *pr, bool end=false)\n      : m_pr(pr)\n      , m_iter(end? pr->m_defaults.end(): pr->m_sequential.begin())\n      {\n      }\n\n      Iterator& operator++()\n      {\n        ++m_iter;\n        if(m_iter == m_pr->m_sequential.end())\n        {\n          m_iter = m_pr->m_defaults.begin();\n          return *this;\n        }\n        return *this;\n      }\n\n      Iterator operator++(int)\n      {\n        Iterator retval = *this;\n        ++(*this);\n        return retval;\n      }\n\n      bool operator==(const Iterator& other) const\n      {\n        return m_iter == other.m_iter;\n      }\n\n      bool operator!=(const Iterator& other) const\n      {\n        return !(*this == other);\n      }\n\n      const KeyValue& operator*()\n      {\n        return *m_iter;\n      }\n\n      const KeyValue* operator->()\n      {\n        return m_iter.operator->();\n      }\n\n      private:\n      const ParseResult* m_pr;\n      std::vector<KeyValue>::const_iterator m_iter;\n    };\n\n    ParseResult() = default;\n    ParseResult(const ParseResult&) = default;\n\n    ParseResult(NameHashMap&& keys, ParsedHashMap&& values, std::vector<KeyValue> sequential, \n            std::vector<KeyValue> default_opts, std::vector<std::string>&& unmatched_args)\n    : m_keys(std::move(keys))\n    , m_values(std::move(values))\n    , m_sequential(std::move(sequential))\n    , m_defaults(std::move(default_opts))\n    , m_unmatched(std::move(unmatched_args))\n    {\n    }\n\n    ParseResult& operator=(ParseResult&&) = default;\n    ParseResult& operator=(const ParseResult&) = default;\n\n    Iterator\n    begin() const\n    {\n      return Iterator(this);\n    }\n\n    Iterator\n    end() const\n    {\n      return Iterator(this, true);\n    }\n\n    size_t\n    count(const std::string& o) const\n    {\n      auto iter = m_keys.find(o);\n      if (iter == m_keys.end())\n      {\n        return 0;\n      }\n\n      auto viter = m_values.find(iter->second);\n\n      if (viter == m_values.end())\n      {\n        return 0;\n      }\n\n      return viter->second.count();\n    }\n\n    const OptionValue&\n    operator[](const std::string& option) const\n    {\n      auto iter = m_keys.find(option);\n\n      if (iter == m_keys.end())\n      {\n        throw_or_mimic<option_not_present_exception>(option);\n      }\n\n      auto viter = m_values.find(iter->second);\n\n      if (viter == m_values.end())\n      {\n        throw_or_mimic<option_not_present_exception>(option);\n      }\n\n      return viter->second;\n    }\n\n    const std::vector<KeyValue>&\n    arguments() const\n    {\n      return m_sequential;\n    }\n\n    const std::vector<std::string>&\n    unmatched() const\n    {\n      return m_unmatched;\n    }\n\n    const std::vector<KeyValue>&\n    defaults() const\n    {\n      return m_defaults;\n    }\n\n    const std::string\n    arguments_string() const\n    {\n      std::string result;\n      for(const auto& kv: m_sequential)\n      {\n        result += kv.key() + \" = \" + kv.value() + \"\\n\";\n      }\n      for(const auto& kv: m_defaults)\n      {\n        result += kv.key() + \" = \" + kv.value() + \" \" + \"(default)\" + \"\\n\";\n      }\n      return result;\n    }\n\n    private:\n    NameHashMap m_keys{};\n    ParsedHashMap m_values{};\n    std::vector<KeyValue> m_sequential{};\n    std::vector<KeyValue> m_defaults{};\n    std::vector<std::string> m_unmatched{};\n  };\n\n  struct Option\n  {\n    Option\n    (\n      std::string opts,\n      std::string desc,\n      std::shared_ptr<const Value>  value = ::cxxopts::value<bool>(),\n      std::string arg_help = \"\"\n    )\n    : opts_(std::move(opts))\n    , desc_(std::move(desc))\n    , value_(std::move(value))\n    , arg_help_(std::move(arg_help))\n    {\n    }\n\n    std::string opts_;\n    std::string desc_;\n    std::shared_ptr<const Value> value_;\n    std::string arg_help_;\n  };\n\n  using OptionMap = std::unordered_map<std::string, std::shared_ptr<OptionDetails>>;\n  using PositionalList = std::vector<std::string>;\n  using PositionalListIterator = PositionalList::const_iterator;\n\n  class OptionParser\n  {\n    public:\n    OptionParser(const OptionMap& options, const PositionalList& positional, bool allow_unrecognised)\n    : m_options(options)\n    , m_positional(positional)\n    , m_allow_unrecognised(allow_unrecognised)\n    {\n    }\n\n    ParseResult\n    parse(int argc, const char* const* argv);\n\n    bool\n    consume_positional(const std::string& a, PositionalListIterator& next);\n\n    void\n    checked_parse_arg\n    (\n      int argc,\n      const char* const* argv,\n      int& current,\n      const std::shared_ptr<OptionDetails>& value,\n      const std::string& name\n    );\n\n    void\n    add_to_option(OptionMap::const_iterator iter, const std::string& option, const std::string& arg);\n\n    void\n    parse_option\n    (\n      const std::shared_ptr<OptionDetails>& value,\n      const std::string& name,\n      const std::string& arg = \"\"\n    );\n\n    void\n    parse_default(const std::shared_ptr<OptionDetails>& details);\n\n    void\n    parse_no_value(const std::shared_ptr<OptionDetails>& details);\n\n    private:\n\n    void finalise_aliases();\n\n    const OptionMap& m_options;\n    const PositionalList& m_positional;\n\n    std::vector<KeyValue> m_sequential{};\n    std::vector<KeyValue> m_defaults{};\n    bool m_allow_unrecognised;\n\n    ParsedHashMap m_parsed{};\n    NameHashMap m_keys{};\n  };\n\n  class Options\n  {\n    public:\n\n    explicit Options(std::string program, std::string help_string = \"\")\n    : m_program(std::move(program))\n    , m_help_string(toLocalString(std::move(help_string)))\n    , m_custom_help(\"[OPTION...]\")\n    , m_positional_help(\"positional parameters\")\n    , m_show_positional(false)\n    , m_allow_unrecognised(false)\n    , m_width(76)\n    , m_tab_expansion(false)\n    , m_options(std::make_shared<OptionMap>())\n    {\n    }\n\n    Options&\n    positional_help(std::string help_text)\n    {\n      m_positional_help = std::move(help_text);\n      return *this;\n    }\n\n    Options&\n    custom_help(std::string help_text)\n    {\n      m_custom_help = std::move(help_text);\n      return *this;\n    }\n\n    Options&\n    show_positional_help()\n    {\n      m_show_positional = true;\n      return *this;\n    }\n\n    Options&\n    allow_unrecognised_options()\n    {\n      m_allow_unrecognised = true;\n      return *this;\n    }\n\n    Options&\n    set_width(size_t width)\n    {\n      m_width = width;\n      return *this;\n    }\n\n    Options&\n    set_tab_expansion(bool expansion=true)\n    {\n      m_tab_expansion = expansion;\n      return *this;\n    }\n\n    ParseResult\n    parse(int argc, const char* const* argv);\n\n    OptionAdder\n    add_options(std::string group = \"\");\n\n    void\n    add_options\n    (\n      const std::string& group,\n      std::initializer_list<Option> options\n    );\n\n    void\n    add_option\n    (\n      const std::string& group,\n      const Option& option\n    );\n\n    void\n    add_option\n    (\n      const std::string& group,\n      const std::string& s,\n      const std::string& l,\n      std::string desc,\n      const std::shared_ptr<const Value>& value,\n      std::string arg_help\n    );\n\n    //parse positional arguments into the given option\n    void\n    parse_positional(std::string option);\n\n    void\n    parse_positional(std::vector<std::string> options);\n\n    void\n    parse_positional(std::initializer_list<std::string> options);\n\n    template <typename Iterator>\n    void\n    parse_positional(Iterator begin, Iterator end) {\n      parse_positional(std::vector<std::string>{begin, end});\n    }\n\n    std::string\n    help(const std::vector<std::string>& groups = {}) const;\n\n    std::vector<std::string>\n    groups() const;\n\n    const HelpGroupDetails&\n    group_help(const std::string& group) const;\n\n    const std::string& program() const\n    {\n      return m_program;\n    }\n\n    private:\n\n    void\n    add_one_option\n    (\n      const std::string& option,\n      const std::shared_ptr<OptionDetails>& details\n    );\n\n    String\n    help_one_group(const std::string& group) const;\n\n    void\n    generate_group_help\n    (\n      String& result,\n      const std::vector<std::string>& groups\n    ) const;\n\n    void\n    generate_all_groups_help(String& result) const;\n\n    std::string m_program{};\n    String m_help_string{};\n    std::string m_custom_help{};\n    std::string m_positional_help{};\n    bool m_show_positional;\n    bool m_allow_unrecognised;\n    size_t m_width;\n    bool m_tab_expansion;\n\n    std::shared_ptr<OptionMap> m_options;\n    std::vector<std::string> m_positional{};\n    std::unordered_set<std::string> m_positional_set{};\n\n    //mapping from groups to help options\n    std::map<std::string, HelpGroupDetails> m_help{};\n  };\n\n  class OptionAdder\n  {\n    public:\n\n    OptionAdder(Options& options, std::string group)\n    : m_options(options), m_group(std::move(group))\n    {\n    }\n\n    OptionAdder&\n    operator()\n    (\n      const std::string& opts,\n      const std::string& desc,\n      const std::shared_ptr<const Value>& value\n        = ::cxxopts::value<bool>(),\n      std::string arg_help = \"\"\n    );\n\n    private:\n    Options& m_options;\n    std::string m_group;\n  };\n\n  namespace\n  {\n    constexpr size_t OPTION_LONGEST = 30;\n    constexpr size_t OPTION_DESC_GAP = 2;\n\n    String\n    format_option\n    (\n      const HelpOptionDetails& o\n    )\n    {\n      const auto& s = o.s;\n      const auto& l = o.l;\n\n      String result = \"  \";\n\n      if (!s.empty())\n      {\n        result += \"-\" + toLocalString(s);\n        if (!l.empty())\n        {\n          result += \",\";\n        }\n      }\n      else\n      {\n        result += \"   \";\n      }\n\n      if (!l.empty())\n      {\n        result += \" --\" + toLocalString(l);\n      }\n\n      auto arg = !o.arg_help.empty() ? toLocalString(o.arg_help) : \"arg\";\n\n      if (!o.is_boolean)\n      {\n        if (o.has_implicit)\n        {\n          result += \" [=\" + arg + \"(=\" + toLocalString(o.implicit_value) + \")]\";\n        }\n        else\n        {\n          result += \" \" + arg;\n        }\n      }\n\n      return result;\n    }\n\n    String\n    format_description\n    (\n      const HelpOptionDetails& o,\n      size_t start,\n      size_t allowed,\n      bool tab_expansion\n    )\n    {\n      auto desc = o.desc;\n\n      if (o.has_default && (!o.is_boolean || o.default_value != \"false\"))\n      {\n        if(!o.default_value.empty())\n        {\n          desc += toLocalString(\" (default: \" + o.default_value + \")\");\n        }\n        else\n        {\n          desc += toLocalString(\" (default: \\\"\\\")\");\n        }\n      }\n\n      String result;\n\n      if (tab_expansion)\n      {\n        String desc2;\n        auto size = size_t{ 0 };\n        for (auto c = std::begin(desc); c != std::end(desc); ++c)\n        {\n          if (*c == '\\n')\n          {\n            desc2 += *c;\n            size = 0;\n          }\n          else if (*c == '\\t')\n          {\n            auto skip = 8 - size % 8;\n            stringAppend(desc2, skip, ' ');\n            size += skip;\n          }\n          else\n          {\n            desc2 += *c;\n            ++size;\n          }\n        }\n        desc = desc2;\n      }\n\n      desc += \" \";\n\n      auto current = std::begin(desc);\n      auto previous = current;\n      auto startLine = current;\n      auto lastSpace = current;\n\n      auto size = size_t{};\n\n      bool appendNewLine;\n      bool onlyWhiteSpace = true;\n\n      while (current != std::end(desc))\n      {\n        appendNewLine = false;\n\n        if (std::isblank(*previous))\n        {\n          lastSpace = current;\n        }\n\n        if (!std::isblank(*current))\n        {\n          onlyWhiteSpace = false;\n        }\n\n        while (*current == '\\n')\n        {\n          previous = current;\n          ++current;\n          appendNewLine = true;\n        }\n\n        if (!appendNewLine && size >= allowed)\n        {\n          if (lastSpace != startLine)\n          {\n            current = lastSpace;\n            previous = current;\n          }\n          appendNewLine = true;\n        }\n\n        if (appendNewLine)\n        {\n          stringAppend(result, startLine, current);\n          startLine = current;\n          lastSpace = current;\n\n          if (*previous != '\\n')\n          {\n            stringAppend(result, \"\\n\");\n          }\n\n          stringAppend(result, start, ' ');\n\n          if (*previous != '\\n')\n          {\n            stringAppend(result, lastSpace, current);\n          }\n\n          onlyWhiteSpace = true;\n          size = 0;\n        }\n\n        previous = current;\n        ++current;\n        ++size;\n      }\n\n      //append whatever is left but ignore whitespace\n      if (!onlyWhiteSpace)\n      {\n        stringAppend(result, startLine, previous);\n      }\n\n      return result;\n    }\n  } // namespace\n\ninline\nvoid\nOptions::add_options\n(\n  const std::string &group,\n  std::initializer_list<Option> options\n)\n{\n OptionAdder option_adder(*this, group);\n for (const auto &option: options)\n {\n   option_adder(option.opts_, option.desc_, option.value_, option.arg_help_);\n }\n}\n\ninline\nOptionAdder\nOptions::add_options(std::string group)\n{\n  return OptionAdder(*this, std::move(group));\n}\n\ninline\nOptionAdder&\nOptionAdder::operator()\n(\n  const std::string& opts,\n  const std::string& desc,\n  const std::shared_ptr<const Value>& value,\n  std::string arg_help\n)\n{\n  std::string short_sw, long_sw;\n  std::tie(short_sw, long_sw) = values::parser_tool::SplitSwitchDef(opts);\n\n  if (!short_sw.length() && !long_sw.length())\n  {\n    throw_or_mimic<invalid_option_format_error>(opts);\n  }\n  else if (long_sw.length() == 1 && short_sw.length())\n  {\n    throw_or_mimic<invalid_option_format_error>(opts);\n  }\n\n  auto option_names = []\n  (\n    const std::string &short_,\n    const std::string &long_\n  )\n  {\n    if (long_.length() == 1)\n    {\n      return std::make_tuple(long_, short_);\n    }\n    return std::make_tuple(short_, long_);\n  }(short_sw, long_sw);\n\n  m_options.add_option\n  (\n    m_group,\n    std::get<0>(option_names),\n    std::get<1>(option_names),\n    desc,\n    value,\n    std::move(arg_help)\n  );\n\n  return *this;\n}\n\ninline\nvoid\nOptionParser::parse_default(const std::shared_ptr<OptionDetails>& details)\n{\n  // TODO: remove the duplicate code here\n  auto& store = m_parsed[details->hash()];\n  store.parse_default(details);\n  m_defaults.emplace_back(details->essential_name(), details->value().get_default_value());\n}\n\ninline\nvoid\nOptionParser::parse_no_value(const std::shared_ptr<OptionDetails>& details)\n{\n  auto& store = m_parsed[details->hash()];\n  store.parse_no_value(details);\n}\n\ninline\nvoid\nOptionParser::parse_option\n(\n  const std::shared_ptr<OptionDetails>& value,\n  const std::string& /*name*/,\n  const std::string& arg\n)\n{\n  auto hash = value->hash();\n  auto& result = m_parsed[hash];\n  result.parse(value, arg);\n\n  m_sequential.emplace_back(value->essential_name(), arg);\n}\n\ninline\nvoid\nOptionParser::checked_parse_arg\n(\n  int argc,\n  const char* const* argv,\n  int& current,\n  const std::shared_ptr<OptionDetails>& value,\n  const std::string& name\n)\n{\n  if (current + 1 >= argc)\n  {\n    if (value->value().has_implicit())\n    {\n      parse_option(value, name, value->value().get_implicit_value());\n    }\n    else\n    {\n      throw_or_mimic<missing_argument_exception>(name);\n    }\n  }\n  else\n  {\n    if (value->value().has_implicit())\n    {\n      parse_option(value, name, value->value().get_implicit_value());\n    }\n    else\n    {\n      parse_option(value, name, argv[current + 1]);\n      ++current;\n    }\n  }\n}\n\ninline\nvoid\nOptionParser::add_to_option(OptionMap::const_iterator iter, const std::string& option, const std::string& arg)\n{\n  parse_option(iter->second, option, arg);\n}\n\ninline\nbool\nOptionParser::consume_positional(const std::string& a, PositionalListIterator& next)\n{\n  while (next != m_positional.end())\n  {\n    auto iter = m_options.find(*next);\n    if (iter != m_options.end())\n    {\n      if (!iter->second->value().is_container())\n      {\n        auto& result = m_parsed[iter->second->hash()];\n        if (result.count() == 0)\n        {\n          add_to_option(iter, *next, a);\n          ++next;\n          return true;\n        }\n        ++next;\n        continue;\n      }\n      add_to_option(iter, *next, a);\n      return true;\n    }\n    throw_or_mimic<option_not_exists_exception>(*next);\n  }\n\n  return false;\n}\n\ninline\nvoid\nOptions::parse_positional(std::string option)\n{\n  parse_positional(std::vector<std::string>{std::move(option)});\n}\n\ninline\nvoid\nOptions::parse_positional(std::vector<std::string> options)\n{\n  m_positional = std::move(options);\n\n  m_positional_set.insert(m_positional.begin(), m_positional.end());\n}\n\ninline\nvoid\nOptions::parse_positional(std::initializer_list<std::string> options)\n{\n  parse_positional(std::vector<std::string>(options));\n}\n\ninline\nParseResult\nOptions::parse(int argc, const char* const* argv)\n{\n  OptionParser parser(*m_options, m_positional, m_allow_unrecognised);\n\n  return parser.parse(argc, argv);\n}\n\ninline ParseResult\nOptionParser::parse(int argc, const char* const* argv)\n{\n  int current = 1;\n  bool consume_remaining = false;\n  auto next_positional = m_positional.begin();\n\n  std::vector<std::string> unmatched;\n\n  while (current != argc)\n  {\n    if (strcmp(argv[current], \"--\") == 0)\n    {\n      consume_remaining = true;\n      ++current;\n      break;\n    }\n    bool matched = false;\n    values::parser_tool::ArguDesc argu_desc =\n        values::parser_tool::ParseArgument(argv[current], matched);\n\n    if (!matched)\n    {\n      //not a flag\n\n      // but if it starts with a `-`, then it's an error\n      if (argv[current][0] == '-' && argv[current][1] != '\\0') {\n        if (!m_allow_unrecognised) {\n          throw_or_mimic<option_syntax_exception>(argv[current]);\n        }\n      }\n\n      //if true is returned here then it was consumed, otherwise it is\n      //ignored\n      if (consume_positional(argv[current], next_positional))\n      {\n      }\n      else\n      {\n        unmatched.emplace_back(argv[current]);\n      }\n      //if we return from here then it was parsed successfully, so continue\n    }\n    else\n    {\n      //short or long option?\n      if (argu_desc.grouping)\n      {\n        const std::string& s = argu_desc.arg_name;\n\n        for (std::size_t i = 0; i != s.size(); ++i)\n        {\n          std::string name(1, s[i]);\n          auto iter = m_options.find(name);\n\n          if (iter == m_options.end())\n          {\n            if (m_allow_unrecognised)\n            {\n              unmatched.push_back(std::string(\"-\") + s[i]);\n              continue;\n            }\n            //error\n            throw_or_mimic<option_not_exists_exception>(name);\n          }\n\n          auto value = iter->second;\n\n          if (i + 1 == s.size())\n          {\n            //it must be the last argument\n            checked_parse_arg(argc, argv, current, value, name);\n          }\n          else if (value->value().has_implicit())\n          {\n            parse_option(value, name, value->value().get_implicit_value());\n          }\n          else if (i + 1 < s.size())\n          {\n            std::string arg_value = s.substr(i + 1);\n            parse_option(value, name, arg_value);\n            break;\n          }\n          else\n          {\n            //error\n            throw_or_mimic<option_requires_argument_exception>(name);\n          }\n        }\n      }\n      else if (argu_desc.arg_name.length() != 0)\n      {\n        const std::string& name = argu_desc.arg_name;\n\n        auto iter = m_options.find(name);\n\n        if (iter == m_options.end())\n        {\n          if (m_allow_unrecognised)\n          {\n            // keep unrecognised options in argument list, skip to next argument\n            unmatched.emplace_back(argv[current]);\n            ++current;\n            continue;\n          }\n          //error\n          throw_or_mimic<option_not_exists_exception>(name);\n        }\n\n        auto opt = iter->second;\n\n        //equals provided for long option?\n        if (argu_desc.set_value)\n        {\n          //parse the option given\n\n          parse_option(opt, name, argu_desc.value);\n        }\n        else\n        {\n          //parse the next argument\n          checked_parse_arg(argc, argv, current, opt, name);\n        }\n      }\n\n    }\n\n    ++current;\n  }\n\n  for (auto& opt : m_options)\n  {\n    auto& detail = opt.second;\n    const auto& value = detail->value();\n\n    auto& store = m_parsed[detail->hash()];\n\n    if (value.has_default()) {\n      if (!store.count() && !store.has_default()) {\n        parse_default(detail);\n      }\n    }\n    else {\n      parse_no_value(detail);\n    }\n  }\n\n  if (consume_remaining)\n  {\n    while (current < argc)\n    {\n      if (!consume_positional(argv[current], next_positional)) {\n        break;\n      }\n      ++current;\n    }\n\n    //adjust argv for any that couldn't be swallowed\n    while (current != argc) {\n      unmatched.emplace_back(argv[current]);\n      ++current;\n    }\n  }\n\n  finalise_aliases();\n\n  ParseResult parsed(std::move(m_keys), std::move(m_parsed), std::move(m_sequential), std::move(m_defaults), std::move(unmatched));\n  return parsed;\n}\n\ninline\nvoid\nOptionParser::finalise_aliases()\n{\n  for (auto& option: m_options)\n  {\n    auto& detail = *option.second;\n    auto hash = detail.hash();\n    m_keys[detail.short_name()] = hash;\n    m_keys[detail.long_name()] = hash;\n\n    m_parsed.emplace(hash, OptionValue());\n  }\n}\n\ninline\nvoid\nOptions::add_option\n(\n  const std::string& group,\n  const Option& option\n)\n{\n    add_options(group, {option});\n}\n\ninline\nvoid\nOptions::add_option\n(\n  const std::string& group,\n  const std::string& s,\n  const std::string& l,\n  std::string desc,\n  const std::shared_ptr<const Value>& value,\n  std::string arg_help\n)\n{\n  auto stringDesc = toLocalString(std::move(desc));\n  auto option = std::make_shared<OptionDetails>(s, l, stringDesc, value);\n\n  if (!s.empty())\n  {\n    add_one_option(s, option);\n  }\n\n  if (!l.empty())\n  {\n    add_one_option(l, option);\n  }\n\n  //add the help details\n  auto& options = m_help[group];\n\n  options.options.emplace_back(HelpOptionDetails{s, l, stringDesc,\n      value->has_default(), value->get_default_value(),\n      value->has_implicit(), value->get_implicit_value(),\n      std::move(arg_help),\n      value->is_container(),\n      value->is_boolean()});\n}\n\ninline\nvoid\nOptions::add_one_option\n(\n  const std::string& option,\n  const std::shared_ptr<OptionDetails>& details\n)\n{\n  auto in = m_options->emplace(option, details);\n\n  if (!in.second)\n  {\n    throw_or_mimic<option_exists_error>(option);\n  }\n}\n\ninline\nString\nOptions::help_one_group(const std::string& g) const\n{\n  using OptionHelp = std::vector<std::pair<String, String>>;\n\n  auto group = m_help.find(g);\n  if (group == m_help.end())\n  {\n    return \"\";\n  }\n\n  OptionHelp format;\n\n  size_t longest = 0;\n\n  String result;\n\n  if (!g.empty())\n  {\n    result += toLocalString(\" \" + g + \" options:\\n\");\n  }\n\n  for (const auto& o : group->second.options)\n  {\n    if (m_positional_set.find(o.l) != m_positional_set.end() &&\n        !m_show_positional)\n    {\n      continue;\n    }\n\n    auto s = format_option(o);\n    longest = (std::max)(longest, stringLength(s));\n    format.push_back(std::make_pair(s, String()));\n  }\n  longest = (std::min)(longest, OPTION_LONGEST);\n\n  //widest allowed description -- min 10 chars for helptext/line\n  size_t allowed = 10;\n  if (m_width > allowed + longest + OPTION_DESC_GAP)\n  {\n    allowed = m_width - longest - OPTION_DESC_GAP;\n  }\n\n  auto fiter = format.begin();\n  for (const auto& o : group->second.options)\n  {\n    if (m_positional_set.find(o.l) != m_positional_set.end() &&\n        !m_show_positional)\n    {\n      continue;\n    }\n\n    auto d = format_description(o, longest + OPTION_DESC_GAP, allowed, m_tab_expansion);\n\n    result += fiter->first;\n    if (stringLength(fiter->first) > longest)\n    {\n      result += '\\n';\n      result += toLocalString(std::string(longest + OPTION_DESC_GAP, ' '));\n    }\n    else\n    {\n      result += toLocalString(std::string(longest + OPTION_DESC_GAP -\n        stringLength(fiter->first),\n        ' '));\n    }\n    result += d;\n    result += '\\n';\n\n    ++fiter;\n  }\n\n  return result;\n}\n\ninline\nvoid\nOptions::generate_group_help\n(\n  String& result,\n  const std::vector<std::string>& print_groups\n) const\n{\n  for (size_t i = 0; i != print_groups.size(); ++i)\n  {\n    const String& group_help_text = help_one_group(print_groups[i]);\n    if (empty(group_help_text))\n    {\n      continue;\n    }\n    result += group_help_text;\n    if (i < print_groups.size() - 1)\n    {\n      result += '\\n';\n    }\n  }\n}\n\ninline\nvoid\nOptions::generate_all_groups_help(String& result) const\n{\n  std::vector<std::string> all_groups;\n\n  std::transform(\n    m_help.begin(),\n    m_help.end(),\n    std::back_inserter(all_groups),\n    [] (const std::map<std::string, HelpGroupDetails>::value_type& group)\n    {\n      return group.first;\n    }\n  );\n\n  generate_group_help(result, all_groups);\n}\n\ninline\nstd::string\nOptions::help(const std::vector<std::string>& help_groups) const\n{\n  String result = m_help_string + \"\\nUsage:\\n  \" +\n    toLocalString(m_program) + \" \" + toLocalString(m_custom_help);\n\n  if (!m_positional.empty() && !m_positional_help.empty()) {\n    result += \" \" + toLocalString(m_positional_help);\n  }\n\n  result += \"\\n\\n\";\n\n  if (help_groups.empty())\n  {\n    generate_all_groups_help(result);\n  }\n  else\n  {\n    generate_group_help(result, help_groups);\n  }\n\n  return toUTF8String(result);\n}\n\ninline\nstd::vector<std::string>\nOptions::groups() const\n{\n  std::vector<std::string> g;\n\n  std::transform(\n    m_help.begin(),\n    m_help.end(),\n    std::back_inserter(g),\n    [] (const std::map<std::string, HelpGroupDetails>::value_type& pair)\n    {\n      return pair.first;\n    }\n  );\n\n  return g;\n}\n\ninline\nconst HelpGroupDetails&\nOptions::group_help(const std::string& group) const\n{\n  return m_help.at(group);\n}\n\n} // namespace cxxopts\n\n#endif //CXXOPTS_HPP_INCLUDED\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Cholesky",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CHOLESKY_MODULE_H\n#define EIGEN_CHOLESKY_MODULE_H\n\n#include \"Core\"\n#include \"Jacobi\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup Cholesky_Module Cholesky module\n  *\n  *\n  *\n  * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.\n  * Those decompositions are also accessible via the following methods:\n  *  - MatrixBase::llt()\n  *  - MatrixBase::ldlt()\n  *  - SelfAdjointView::llt()\n  *  - SelfAdjointView::ldlt()\n  *\n  * \\code\n  * #include <Eigen/Cholesky>\n  * \\endcode\n  */\n\n#include \"src/Cholesky/LLT.h\"\n#include \"src/Cholesky/LDLT.h\"\n#ifdef EIGEN_USE_LAPACKE\n#ifdef EIGEN_USE_MKL\n#include \"mkl_lapacke.h\"\n#else\n#include \"src/misc/lapacke.h\"\n#endif\n#include \"src/Cholesky/LLT_LAPACKE.h\"\n#endif\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_CHOLESKY_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/CholmodSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H\n#define EIGEN_CHOLMODSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\nextern \"C\" {\n  #include <cholmod.h>\n}\n\n/** \\ingroup Support_modules\n  * \\defgroup CholmodSupport_Module CholmodSupport module\n  *\n  * This module provides an interface to the Cholmod library which is part of the <a href=\"http://www.suitesparse.com\">suitesparse</a> package.\n  * It provides the two following main factorization classes:\n  * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.\n  * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).\n  *\n  * For the sake of completeness, this module also propose the two following classes:\n  * - class CholmodSimplicialLLT\n  * - class CholmodSimplicialLDLT\n  * Note that these classes does not bring any particular advantage compared to the built-in\n  * SimplicialLLT and SimplicialLDLT factorization classes.\n  *\n  * \\code\n  * #include <Eigen/CholmodSupport>\n  * \\endcode\n  *\n  * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies.\n  * The dependencies depend on how cholmod has been compiled.\n  * For a cmake based project, you can use our FindCholmod.cmake module to help you in this task.\n  *\n  */\n\n#include \"src/CholmodSupport/CholmodSupport.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_CHOLMODSUPPORT_MODULE_H\n\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Core",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CORE_H\n#define EIGEN_CORE_H\n\n// first thing Eigen does: stop the compiler from reporting useless warnings.\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n// then include this file where all our macros are defined. It's really important to do it first because\n// it's where we do all the compiler/OS/arch detections and define most defaults.\n#include \"src/Core/util/Macros.h\"\n\n// This detects SSE/AVX/NEON/etc. and configure alignment settings\n#include \"src/Core/util/ConfigureVectorization.h\"\n\n// We need cuda_runtime.h/hip_runtime.h to ensure that\n// the EIGEN_USING_STD macro works properly on the device side\n#if defined(EIGEN_CUDACC)\n  #include <cuda_runtime.h>\n#elif defined(EIGEN_HIPCC)\n  #include <hip/hip_runtime.h>\n#endif\n\n\n#ifdef EIGEN_EXCEPTIONS\n  #include <new>\n#endif\n\n// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)\n// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.\n#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) && EIGEN_GNUC_AT_MOST(5,5)\n  #pragma GCC optimize (\"-fno-ipa-cp-clone\")\n#endif\n\n// Prevent ICC from specializing std::complex operators that silently fail\n// on device. This allows us to use our own device-compatible specializations\n// instead.\n#if defined(EIGEN_COMP_ICC) && defined(EIGEN_GPU_COMPILE_PHASE) \\\n    && !defined(_OVERRIDE_COMPLEX_SPECIALIZATION_)\n#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1\n#endif\n#include <complex>\n\n// this include file manages BLAS and MKL related macros\n// and inclusion of their respective header files\n#include \"src/Core/util/MKL_support.h\"\n\n\n#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)\n  #define EIGEN_HAS_GPU_FP16\n#endif\n\n#if defined(EIGEN_HAS_CUDA_BF16) || defined(EIGEN_HAS_HIP_BF16)\n  #define EIGEN_HAS_GPU_BF16\n#endif\n\n#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)\n  #define EIGEN_HAS_OPENMP\n#endif\n\n#ifdef EIGEN_HAS_OPENMP\n#include <omp.h>\n#endif\n\n// MSVC for windows mobile does not have the errno.h file\n#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM\n#define EIGEN_HAS_ERRNO\n#endif\n\n#ifdef EIGEN_HAS_ERRNO\n#include <cerrno>\n#endif\n#include <cstddef>\n#include <cstdlib>\n#include <cmath>\n#include <cassert>\n#include <functional>\n#include <sstream>\n#ifndef EIGEN_NO_IO\n  #include <iosfwd>\n#endif\n#include <cstring>\n#include <string>\n#include <limits>\n#include <climits> // for CHAR_BIT\n// for min/max:\n#include <algorithm>\n\n#if EIGEN_HAS_CXX11\n#include <array>\n#endif\n\n// for std::is_nothrow_move_assignable\n#ifdef EIGEN_INCLUDE_TYPE_TRAITS\n#include <type_traits>\n#endif\n\n// for outputting debug info\n#ifdef EIGEN_DEBUG_ASSIGN\n#include <iostream>\n#endif\n\n// required for __cpuid, needs to be included after cmath\n#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE\n  #include <intrin.h>\n#endif\n\n#if defined(EIGEN_USE_SYCL)\n  #undef min\n  #undef max\n  #undef isnan\n  #undef isinf\n  #undef isfinite\n  #include <CL/sycl.hpp>\n  #include <map>\n  #include <memory>\n  #include <utility>\n  #include <thread>\n  #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0\n  #define EIGEN_SYCL_LOCAL_THREAD_DIM0 16\n  #endif\n  #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM1\n  #define EIGEN_SYCL_LOCAL_THREAD_DIM1 16\n  #endif\n#endif\n\n\n#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT\n// This will generate an error message:\n#error Eigen2-support is only available up to version 3.2. Please go to \"http://eigen.tuxfamily.org/index.php?title=Eigen2\" for further information\n#endif\n\nnamespace Eigen {\n\n// we use size_t frequently and we'll never remember to prepend it with std:: every time just to\n// ensure QNX/QCC support\nusing std::size_t;\n// gcc 4.6.0 wants std:: for ptrdiff_t\nusing std::ptrdiff_t;\n\n}\n\n/** \\defgroup Core_Module Core module\n  * This is the main module of Eigen providing dense matrix and vector support\n  * (both fixed and dynamic size) with all the features corresponding to a BLAS library\n  * and much more...\n  *\n  * \\code\n  * #include <Eigen/Core>\n  * \\endcode\n  */\n\n#include \"src/Core/util/Constants.h\"\n#include \"src/Core/util/Meta.h\"\n#include \"src/Core/util/ForwardDeclarations.h\"\n#include \"src/Core/util/StaticAssert.h\"\n#include \"src/Core/util/XprHelper.h\"\n#include \"src/Core/util/Memory.h\"\n#include \"src/Core/util/IntegralConstant.h\"\n#include \"src/Core/util/SymbolicIndex.h\"\n\n#include \"src/Core/NumTraits.h\"\n#include \"src/Core/MathFunctions.h\"\n#include \"src/Core/GenericPacketMath.h\"\n#include \"src/Core/MathFunctionsImpl.h\"\n#include \"src/Core/arch/Default/ConjHelper.h\"\n// Generic half float support\n#include \"src/Core/arch/Default/Half.h\"\n#include \"src/Core/arch/Default/BFloat16.h\"\n#include \"src/Core/arch/Default/TypeCasting.h\"\n#include \"src/Core/arch/Default/GenericPacketMathFunctionsFwd.h\"\n\n#if defined EIGEN_VECTORIZE_AVX512\n  #include \"src/Core/arch/SSE/PacketMath.h\"\n  #include \"src/Core/arch/SSE/TypeCasting.h\"\n  #include \"src/Core/arch/SSE/Complex.h\"\n  #include \"src/Core/arch/AVX/PacketMath.h\"\n  #include \"src/Core/arch/AVX/TypeCasting.h\"\n  #include \"src/Core/arch/AVX/Complex.h\"\n  #include \"src/Core/arch/AVX512/PacketMath.h\"\n  #include \"src/Core/arch/AVX512/TypeCasting.h\"\n  #include \"src/Core/arch/AVX512/Complex.h\"\n  #include \"src/Core/arch/SSE/MathFunctions.h\"\n  #include \"src/Core/arch/AVX/MathFunctions.h\"\n  #include \"src/Core/arch/AVX512/MathFunctions.h\"\n#elif defined EIGEN_VECTORIZE_AVX\n  // Use AVX for floats and doubles, SSE for integers\n  #include \"src/Core/arch/SSE/PacketMath.h\"\n  #include \"src/Core/arch/SSE/TypeCasting.h\"\n  #include \"src/Core/arch/SSE/Complex.h\"\n  #include \"src/Core/arch/AVX/PacketMath.h\"\n  #include \"src/Core/arch/AVX/TypeCasting.h\"\n  #include \"src/Core/arch/AVX/Complex.h\"\n  #include \"src/Core/arch/SSE/MathFunctions.h\"\n  #include \"src/Core/arch/AVX/MathFunctions.h\"\n#elif defined EIGEN_VECTORIZE_SSE\n  #include \"src/Core/arch/SSE/PacketMath.h\"\n  #include \"src/Core/arch/SSE/TypeCasting.h\"\n  #include \"src/Core/arch/SSE/MathFunctions.h\"\n  #include \"src/Core/arch/SSE/Complex.h\"\n#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)\n  #include \"src/Core/arch/AltiVec/PacketMath.h\"\n  #include \"src/Core/arch/AltiVec/MathFunctions.h\"\n  #include \"src/Core/arch/AltiVec/Complex.h\"\n#elif defined EIGEN_VECTORIZE_NEON\n  #include \"src/Core/arch/NEON/PacketMath.h\"\n  #include \"src/Core/arch/NEON/TypeCasting.h\"\n  #include \"src/Core/arch/NEON/MathFunctions.h\"\n  #include \"src/Core/arch/NEON/Complex.h\"\n#elif defined EIGEN_VECTORIZE_SVE\n  #include \"src/Core/arch/SVE/PacketMath.h\"\n  #include \"src/Core/arch/SVE/TypeCasting.h\"\n  #include \"src/Core/arch/SVE/MathFunctions.h\"\n#elif defined EIGEN_VECTORIZE_ZVECTOR\n  #include \"src/Core/arch/ZVector/PacketMath.h\"\n  #include \"src/Core/arch/ZVector/MathFunctions.h\"\n  #include \"src/Core/arch/ZVector/Complex.h\"\n#elif defined EIGEN_VECTORIZE_MSA\n  #include \"src/Core/arch/MSA/PacketMath.h\"\n  #include \"src/Core/arch/MSA/MathFunctions.h\"\n  #include \"src/Core/arch/MSA/Complex.h\"\n#endif\n\n#if defined EIGEN_VECTORIZE_GPU\n  #include \"src/Core/arch/GPU/PacketMath.h\"\n  #include \"src/Core/arch/GPU/MathFunctions.h\"\n  #include \"src/Core/arch/GPU/TypeCasting.h\"\n#endif\n\n#if defined(EIGEN_USE_SYCL)\n  #include \"src/Core/arch/SYCL/SyclMemoryModel.h\"\n  #include \"src/Core/arch/SYCL/InteropHeaders.h\"\n#if !defined(EIGEN_DONT_VECTORIZE_SYCL)\n  #include \"src/Core/arch/SYCL/PacketMath.h\"\n  #include \"src/Core/arch/SYCL/MathFunctions.h\"\n  #include \"src/Core/arch/SYCL/TypeCasting.h\"\n#endif\n#endif\n\n#include \"src/Core/arch/Default/Settings.h\"\n// This file provides generic implementations valid for scalar as well\n#include \"src/Core/arch/Default/GenericPacketMathFunctions.h\"\n\n#include \"src/Core/functors/TernaryFunctors.h\"\n#include \"src/Core/functors/BinaryFunctors.h\"\n#include \"src/Core/functors/UnaryFunctors.h\"\n#include \"src/Core/functors/NullaryFunctors.h\"\n#include \"src/Core/functors/StlFunctors.h\"\n#include \"src/Core/functors/AssignmentFunctors.h\"\n\n// Specialized functors to enable the processing of complex numbers\n// on CUDA devices\n#ifdef EIGEN_CUDACC\n#include \"src/Core/arch/CUDA/Complex.h\"\n#endif\n\n#include \"src/Core/util/IndexedViewHelper.h\"\n#include \"src/Core/util/ReshapedHelper.h\"\n#include \"src/Core/ArithmeticSequence.h\"\n#ifndef EIGEN_NO_IO\n  #include \"src/Core/IO.h\"\n#endif\n#include \"src/Core/DenseCoeffsBase.h\"\n#include \"src/Core/DenseBase.h\"\n#include \"src/Core/MatrixBase.h\"\n#include \"src/Core/EigenBase.h\"\n\n#include \"src/Core/Product.h\"\n#include \"src/Core/CoreEvaluators.h\"\n#include \"src/Core/AssignEvaluator.h\"\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874\n                                // at least confirmed with Doxygen 1.5.5 and 1.5.6\n  #include \"src/Core/Assign.h\"\n#endif\n\n#include \"src/Core/ArrayBase.h\"\n#include \"src/Core/util/BlasUtil.h\"\n#include \"src/Core/DenseStorage.h\"\n#include \"src/Core/NestByValue.h\"\n\n// #include \"src/Core/ForceAlignedAccess.h\"\n\n#include \"src/Core/ReturnByValue.h\"\n#include \"src/Core/NoAlias.h\"\n#include \"src/Core/PlainObjectBase.h\"\n#include \"src/Core/Matrix.h\"\n#include \"src/Core/Array.h\"\n#include \"src/Core/CwiseTernaryOp.h\"\n#include \"src/Core/CwiseBinaryOp.h\"\n#include \"src/Core/CwiseUnaryOp.h\"\n#include \"src/Core/CwiseNullaryOp.h\"\n#include \"src/Core/CwiseUnaryView.h\"\n#include \"src/Core/SelfCwiseBinaryOp.h\"\n#include \"src/Core/Dot.h\"\n#include \"src/Core/StableNorm.h\"\n#include \"src/Core/Stride.h\"\n#include \"src/Core/MapBase.h\"\n#include \"src/Core/Map.h\"\n#include \"src/Core/Ref.h\"\n#include \"src/Core/Block.h\"\n#include \"src/Core/VectorBlock.h\"\n#include \"src/Core/IndexedView.h\"\n#include \"src/Core/Reshaped.h\"\n#include \"src/Core/Transpose.h\"\n#include \"src/Core/DiagonalMatrix.h\"\n#include \"src/Core/Diagonal.h\"\n#include \"src/Core/DiagonalProduct.h\"\n#include \"src/Core/Redux.h\"\n#include \"src/Core/Visitor.h\"\n#include \"src/Core/Fuzzy.h\"\n#include \"src/Core/Swap.h\"\n#include \"src/Core/CommaInitializer.h\"\n#include \"src/Core/GeneralProduct.h\"\n#include \"src/Core/Solve.h\"\n#include \"src/Core/Inverse.h\"\n#include \"src/Core/SolverBase.h\"\n#include \"src/Core/PermutationMatrix.h\"\n#include \"src/Core/Transpositions.h\"\n#include \"src/Core/TriangularMatrix.h\"\n#include \"src/Core/SelfAdjointView.h\"\n#include \"src/Core/products/GeneralBlockPanelKernel.h\"\n#include \"src/Core/products/Parallelizer.h\"\n#include \"src/Core/ProductEvaluators.h\"\n#include \"src/Core/products/GeneralMatrixVector.h\"\n#include \"src/Core/products/GeneralMatrixMatrix.h\"\n#include \"src/Core/SolveTriangular.h\"\n#include \"src/Core/products/GeneralMatrixMatrixTriangular.h\"\n#include \"src/Core/products/SelfadjointMatrixVector.h\"\n#include \"src/Core/products/SelfadjointMatrixMatrix.h\"\n#include \"src/Core/products/SelfadjointProduct.h\"\n#include \"src/Core/products/SelfadjointRank2Update.h\"\n#include \"src/Core/products/TriangularMatrixVector.h\"\n#include \"src/Core/products/TriangularMatrixMatrix.h\"\n#include \"src/Core/products/TriangularSolverMatrix.h\"\n#include \"src/Core/products/TriangularSolverVector.h\"\n#include \"src/Core/BandMatrix.h\"\n#include \"src/Core/CoreIterators.h\"\n#include \"src/Core/ConditionEstimator.h\"\n\n#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)\n  #include \"src/Core/arch/AltiVec/MatrixProduct.h\"\n#elif defined EIGEN_VECTORIZE_NEON\n  #include \"src/Core/arch/NEON/GeneralBlockPanelKernel.h\"\n#endif\n\n#include \"src/Core/BooleanRedux.h\"\n#include \"src/Core/Select.h\"\n#include \"src/Core/VectorwiseOp.h\"\n#include \"src/Core/PartialReduxEvaluator.h\"\n#include \"src/Core/Random.h\"\n#include \"src/Core/Replicate.h\"\n#include \"src/Core/Reverse.h\"\n#include \"src/Core/ArrayWrapper.h\"\n#include \"src/Core/StlIterators.h\"\n\n#ifdef EIGEN_USE_BLAS\n#include \"src/Core/products/GeneralMatrixMatrix_BLAS.h\"\n#include \"src/Core/products/GeneralMatrixVector_BLAS.h\"\n#include \"src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h\"\n#include \"src/Core/products/SelfadjointMatrixMatrix_BLAS.h\"\n#include \"src/Core/products/SelfadjointMatrixVector_BLAS.h\"\n#include \"src/Core/products/TriangularMatrixMatrix_BLAS.h\"\n#include \"src/Core/products/TriangularMatrixVector_BLAS.h\"\n#include \"src/Core/products/TriangularSolverMatrix_BLAS.h\"\n#endif // EIGEN_USE_BLAS\n\n#ifdef EIGEN_USE_MKL_VML\n#include \"src/Core/Assign_MKL.h\"\n#endif\n\n#include \"src/Core/GlobalFunctions.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_CORE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Dense",
    "content": "#include \"Core\"\n#include \"LU\"\n#include \"Cholesky\"\n#include \"QR\"\n#include \"SVD\"\n#include \"Geometry\"\n#include \"Eigenvalues\"\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Eigen",
    "content": "#include \"Dense\"\n#include \"Sparse\"\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Eigenvalues",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_EIGENVALUES_MODULE_H\n#define EIGEN_EIGENVALUES_MODULE_H\n\n#include \"Core\"\n\n#include \"Cholesky\"\n#include \"Jacobi\"\n#include \"Householder\"\n#include \"LU\"\n#include \"Geometry\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup Eigenvalues_Module Eigenvalues module\n  *\n  *\n  *\n  * This module mainly provides various eigenvalue solvers.\n  * This module also provides some MatrixBase methods, including:\n  *  - MatrixBase::eigenvalues(),\n  *  - MatrixBase::operatorNorm()\n  *\n  * \\code\n  * #include <Eigen/Eigenvalues>\n  * \\endcode\n  */\n\n#include \"src/misc/RealSvd2x2.h\"\n#include \"src/Eigenvalues/Tridiagonalization.h\"\n#include \"src/Eigenvalues/RealSchur.h\"\n#include \"src/Eigenvalues/EigenSolver.h\"\n#include \"src/Eigenvalues/SelfAdjointEigenSolver.h\"\n#include \"src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h\"\n#include \"src/Eigenvalues/HessenbergDecomposition.h\"\n#include \"src/Eigenvalues/ComplexSchur.h\"\n#include \"src/Eigenvalues/ComplexEigenSolver.h\"\n#include \"src/Eigenvalues/RealQZ.h\"\n#include \"src/Eigenvalues/GeneralizedEigenSolver.h\"\n#include \"src/Eigenvalues/MatrixBaseEigenvalues.h\"\n#ifdef EIGEN_USE_LAPACKE\n#ifdef EIGEN_USE_MKL\n#include \"mkl_lapacke.h\"\n#else\n#include \"src/misc/lapacke.h\"\n#endif\n#include \"src/Eigenvalues/RealSchur_LAPACKE.h\"\n#include \"src/Eigenvalues/ComplexSchur_LAPACKE.h\"\n#include \"src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h\"\n#endif\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_EIGENVALUES_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Geometry",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GEOMETRY_MODULE_H\n#define EIGEN_GEOMETRY_MODULE_H\n\n#include \"Core\"\n\n#include \"SVD\"\n#include \"LU\"\n#include <limits>\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup Geometry_Module Geometry module\n  *\n  * This module provides support for:\n  *  - fixed-size homogeneous transformations\n  *  - translation, scaling, 2D and 3D rotations\n  *  - \\link Quaternion quaternions \\endlink\n  *  - cross products (\\ref MatrixBase::cross, \\ref MatrixBase::cross3)\n  *  - orthognal vector generation (\\ref MatrixBase::unitOrthogonal)\n  *  - some linear components: \\link ParametrizedLine parametrized-lines \\endlink and \\link Hyperplane hyperplanes \\endlink\n  *  - \\link AlignedBox axis aligned bounding boxes \\endlink\n  *  - \\link umeyama least-square transformation fitting \\endlink\n  *\n  * \\code\n  * #include <Eigen/Geometry>\n  * \\endcode\n  */\n\n#include \"src/Geometry/OrthoMethods.h\"\n#include \"src/Geometry/EulerAngles.h\"\n\n#include \"src/Geometry/Homogeneous.h\"\n#include \"src/Geometry/RotationBase.h\"\n#include \"src/Geometry/Rotation2D.h\"\n#include \"src/Geometry/Quaternion.h\"\n#include \"src/Geometry/AngleAxis.h\"\n#include \"src/Geometry/Transform.h\"\n#include \"src/Geometry/Translation.h\"\n#include \"src/Geometry/Scaling.h\"\n#include \"src/Geometry/Hyperplane.h\"\n#include \"src/Geometry/ParametrizedLine.h\"\n#include \"src/Geometry/AlignedBox.h\"\n#include \"src/Geometry/Umeyama.h\"\n\n// Use the SSE optimized version whenever possible.\n#if (defined EIGEN_VECTORIZE_SSE) || (defined EIGEN_VECTORIZE_NEON)\n#include \"src/Geometry/arch/Geometry_SIMD.h\"\n#endif\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_GEOMETRY_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Householder",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_HOUSEHOLDER_MODULE_H\n#define EIGEN_HOUSEHOLDER_MODULE_H\n\n#include \"Core\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup Householder_Module Householder module\n  * This module provides Householder transformations.\n  *\n  * \\code\n  * #include <Eigen/Householder>\n  * \\endcode\n  */\n\n#include \"src/Householder/Householder.h\"\n#include \"src/Householder/HouseholderSequence.h\"\n#include \"src/Householder/BlockHouseholder.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_HOUSEHOLDER_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/IterativeLinearSolvers",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H\n#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H\n\n#include \"SparseCore\"\n#include \"OrderingMethods\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \n  * \\defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module\n  *\n  * This module currently provides iterative methods to solve problems of the form \\c A \\c x = \\c b, where \\c A is a squared matrix, usually very large and sparse.\n  * Those solvers are accessible via the following classes:\n  *  - ConjugateGradient for selfadjoint (hermitian) matrices,\n  *  - LeastSquaresConjugateGradient for rectangular least-square problems,\n  *  - BiCGSTAB for general square matrices.\n  *\n  * These iterative solvers are associated with some preconditioners:\n  *  - IdentityPreconditioner - not really useful\n  *  - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.\n  *  - IncompleteLUT - incomplete LU factorization with dual thresholding\n  *\n  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.\n  *\n    \\code\n    #include <Eigen/IterativeLinearSolvers>\n    \\endcode\n  */\n\n#include \"src/IterativeLinearSolvers/SolveWithGuess.h\"\n#include \"src/IterativeLinearSolvers/IterativeSolverBase.h\"\n#include \"src/IterativeLinearSolvers/BasicPreconditioners.h\"\n#include \"src/IterativeLinearSolvers/ConjugateGradient.h\"\n#include \"src/IterativeLinearSolvers/LeastSquareConjugateGradient.h\"\n#include \"src/IterativeLinearSolvers/BiCGSTAB.h\"\n#include \"src/IterativeLinearSolvers/IncompleteLUT.h\"\n#include \"src/IterativeLinearSolvers/IncompleteCholesky.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Jacobi",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_JACOBI_MODULE_H\n#define EIGEN_JACOBI_MODULE_H\n\n#include \"Core\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup Jacobi_Module Jacobi module\n  * This module provides Jacobi and Givens rotations.\n  *\n  * \\code\n  * #include <Eigen/Jacobi>\n  * \\endcode\n  *\n  * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation:\n  *  - MatrixBase::applyOnTheLeft()\n  *  - MatrixBase::applyOnTheRight().\n  */\n\n#include \"src/Jacobi/Jacobi.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_JACOBI_MODULE_H\n\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/KLUSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_KLUSUPPORT_MODULE_H\n#define EIGEN_KLUSUPPORT_MODULE_H\n\n#include <Eigen/SparseCore>\n\n#include <Eigen/src/Core/util/DisableStupidWarnings.h>\n\nextern \"C\" {\n#include <btf.h>\n#include <klu.h>\n   }\n\n/** \\ingroup Support_modules\n  * \\defgroup KLUSupport_Module KLUSupport module\n  *\n  * This module provides an interface to the KLU library which is part of the <a href=\"http://www.suitesparse.com\">suitesparse</a> package.\n  * It provides the following factorization class:\n  * - class KLU: a sparse LU factorization, well-suited for circuit simulation.\n  *\n  * \\code\n  * #include <Eigen/KLUSupport>\n  * \\endcode\n  *\n  * In order to use this module, the klu and btf headers must be accessible from the include paths, and your binary must be linked to the klu library and its dependencies.\n  * The dependencies depend on how umfpack has been compiled.\n  * For a cmake based project, you can use our FindKLU.cmake module to help you in this task.\n  *\n  */\n\n#include \"src/KLUSupport/KLUSupport.h\"\n\n#include <Eigen/src/Core/util/ReenableStupidWarnings.h>\n\n#endif // EIGEN_KLUSUPPORT_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/LU",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_LU_MODULE_H\n#define EIGEN_LU_MODULE_H\n\n#include \"Core\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup LU_Module LU module\n  * This module includes %LU decomposition and related notions such as matrix inversion and determinant.\n  * This module defines the following MatrixBase methods:\n  *  - MatrixBase::inverse()\n  *  - MatrixBase::determinant()\n  *\n  * \\code\n  * #include <Eigen/LU>\n  * \\endcode\n  */\n\n#include \"src/misc/Kernel.h\"\n#include \"src/misc/Image.h\"\n#include \"src/LU/FullPivLU.h\"\n#include \"src/LU/PartialPivLU.h\"\n#ifdef EIGEN_USE_LAPACKE\n#ifdef EIGEN_USE_MKL\n#include \"mkl_lapacke.h\"\n#else\n#include \"src/misc/lapacke.h\"\n#endif\n#include \"src/LU/PartialPivLU_LAPACKE.h\"\n#endif\n#include \"src/LU/Determinant.h\"\n#include \"src/LU/InverseImpl.h\"\n\n#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON\n  #include \"src/LU/arch/InverseSize4.h\"\n#endif\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_LU_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/MetisSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_METISSUPPORT_MODULE_H\n#define EIGEN_METISSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\nextern \"C\" {\n#include <metis.h>\n}\n\n\n/** \\ingroup Support_modules\n  * \\defgroup MetisSupport_Module MetisSupport module\n  *\n  * \\code\n  * #include <Eigen/MetisSupport>\n  * \\endcode\n  * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). \n  * It can be used just as any other built-in method as explained in \\link OrderingMethods_Module here. \\endlink\n  */\n\n\n#include \"src/MetisSupport/MetisSupport.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_METISSUPPORT_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/OrderingMethods",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ORDERINGMETHODS_MODULE_H\n#define EIGEN_ORDERINGMETHODS_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \n  * \\defgroup OrderingMethods_Module OrderingMethods module\n  *\n  * This module is currently for internal use only\n  * \n  * It defines various built-in and external ordering methods for sparse matrices. \n  * They are typically used to reduce the number of elements during \n  * the sparse matrix decomposition (LLT, LU, QR).\n  * Precisely, in a preprocessing step, a permutation matrix P is computed using \n  * those ordering methods and applied to the columns of the matrix. \n  * Using for instance the sparse Cholesky decomposition, it is expected that \n  * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A).\n  * \n  * \n  * Usage : \n  * \\code\n  * #include <Eigen/OrderingMethods>\n  * \\endcode\n  * \n  * A simple usage is as a template parameter in the sparse decomposition classes : \n  * \n  * \\code \n  * SparseLU<MatrixType, COLAMDOrdering<int> > solver;\n  * \\endcode \n  * \n  * \\code \n  * SparseQR<MatrixType, COLAMDOrdering<int> > solver;\n  * \\endcode\n  * \n  * It is possible as well to call directly a particular ordering method for your own purpose, \n  * \\code \n  * AMDOrdering<int> ordering;\n  * PermutationMatrix<Dynamic, Dynamic, int> perm;\n  * SparseMatrix<double> A; \n  * //Fill the matrix ...\n  * \n  * ordering(A, perm); // Call AMD\n  * \\endcode\n  * \n  * \\note Some of these methods (like AMD or METIS), need the sparsity pattern \n  * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, \n  * Eigen computes internally the pattern of \\f$A^T*A\\f$ before calling the method.\n  * If your matrix is already symmetric (at leat in structure), you can avoid that\n  * by calling the method with a SelfAdjointView type.\n  * \n  * \\code\n  *  // Call the ordering on the pattern of the lower triangular matrix A\n  * ordering(A.selfadjointView<Lower>(), perm);\n  * \\endcode\n  */\n\n#include \"src/OrderingMethods/Amd.h\"\n#include \"src/OrderingMethods/Ordering.h\"\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_ORDERINGMETHODS_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/PaStiXSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PASTIXSUPPORT_MODULE_H\n#define EIGEN_PASTIXSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\nextern \"C\" {\n#include <pastix_nompi.h>\n#include <pastix.h>\n}\n\n#ifdef complex\n#undef complex\n#endif\n\n/** \\ingroup Support_modules\n  * \\defgroup PaStiXSupport_Module PaStiXSupport module\n  * \n  * This module provides an interface to the <a href=\"http://pastix.gforge.inria.fr/\">PaSTiX</a> library.\n  * PaSTiX is a general \\b supernodal, \\b parallel and \\b opensource sparse solver.\n  * It provides the two following main factorization classes:\n  * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization.\n  * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization.\n  * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern).\n  * \n  * \\code\n  * #include <Eigen/PaStiXSupport>\n  * \\endcode\n  *\n  * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be linked to the PaSTiX library and its dependencies.\n  * This wrapper resuires PaStiX version 5.x compiled without MPI support.\n  * The dependencies depend on how PaSTiX has been compiled.\n  * For a cmake based project, you can use our FindPaSTiX.cmake module to help you in this task.\n  *\n  */\n\n#include \"src/PaStiXSupport/PaStiXSupport.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_PASTIXSUPPORT_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/PardisoSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARDISOSUPPORT_MODULE_H\n#define EIGEN_PARDISOSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n#include <mkl_pardiso.h>\n\n/** \\ingroup Support_modules\n  * \\defgroup PardisoSupport_Module PardisoSupport module\n  *\n  * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers.\n  *\n  * \\code\n  * #include <Eigen/PardisoSupport>\n  * \\endcode\n  *\n  * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be linked to the MKL library and its dependencies.\n  * See this \\ref TopicUsingIntelMKL \"page\" for more information on MKL-Eigen integration.\n  * \n  */\n\n#include \"src/PardisoSupport/PardisoSupport.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_PARDISOSUPPORT_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/QR",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_QR_MODULE_H\n#define EIGEN_QR_MODULE_H\n\n#include \"Core\"\n\n#include \"Cholesky\"\n#include \"Jacobi\"\n#include \"Householder\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup QR_Module QR module\n  *\n  *\n  *\n  * This module provides various QR decompositions\n  * This module also provides some MatrixBase methods, including:\n  *  - MatrixBase::householderQr()\n  *  - MatrixBase::colPivHouseholderQr()\n  *  - MatrixBase::fullPivHouseholderQr()\n  *\n  * \\code\n  * #include <Eigen/QR>\n  * \\endcode\n  */\n\n#include \"src/QR/HouseholderQR.h\"\n#include \"src/QR/FullPivHouseholderQR.h\"\n#include \"src/QR/ColPivHouseholderQR.h\"\n#include \"src/QR/CompleteOrthogonalDecomposition.h\"\n#ifdef EIGEN_USE_LAPACKE\n#ifdef EIGEN_USE_MKL\n#include \"mkl_lapacke.h\"\n#else\n#include \"src/misc/lapacke.h\"\n#endif\n#include \"src/QR/HouseholderQR_LAPACKE.h\"\n#include \"src/QR/ColPivHouseholderQR_LAPACKE.h\"\n#endif\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_QR_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/QtAlignedMalloc",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_QTMALLOC_MODULE_H\n#define EIGEN_QTMALLOC_MODULE_H\n\n#include \"Core\"\n\n#if (!EIGEN_MALLOC_ALREADY_ALIGNED)\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\nvoid *qMalloc(std::size_t size)\n{\n  return Eigen::internal::aligned_malloc(size);\n}\n\nvoid qFree(void *ptr)\n{\n  Eigen::internal::aligned_free(ptr);\n}\n\nvoid *qRealloc(void *ptr, std::size_t size)\n{\n  void* newPtr = Eigen::internal::aligned_malloc(size);\n  std::memcpy(newPtr, ptr, size);\n  Eigen::internal::aligned_free(ptr);\n  return newPtr;\n}\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif\n\n#endif // EIGEN_QTMALLOC_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SPQRSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPQRSUPPORT_MODULE_H\n#define EIGEN_SPQRSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n#include \"SuiteSparseQR.hpp\"\n\n/** \\ingroup Support_modules\n  * \\defgroup SPQRSupport_Module SuiteSparseQR module\n  * \n  * This module provides an interface to the SPQR library, which is part of the <a href=\"http://www.suitesparse.com\">suitesparse</a> package.\n  *\n  * \\code\n  * #include <Eigen/SPQRSupport>\n  * \\endcode\n  *\n  * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...).\n  * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules\n  *\n  */\n\n#include \"src/CholmodSupport/CholmodSupport.h\"\n#include \"src/SPQRSupport/SuiteSparseQRSupport.h\"\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SVD",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SVD_MODULE_H\n#define EIGEN_SVD_MODULE_H\n\n#include \"QR\"\n#include \"Householder\"\n#include \"Jacobi\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup SVD_Module SVD module\n  *\n  *\n  *\n  * This module provides SVD decomposition for matrices (both real and complex).\n  * Two decomposition algorithms are provided:\n  *  - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones.\n  *  - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems.\n  * These decompositions are accessible via the respective classes and following MatrixBase methods:\n  *  - MatrixBase::jacobiSvd()\n  *  - MatrixBase::bdcSvd()\n  *\n  * \\code\n  * #include <Eigen/SVD>\n  * \\endcode\n  */\n\n#include \"src/misc/RealSvd2x2.h\"\n#include \"src/SVD/UpperBidiagonalization.h\"\n#include \"src/SVD/SVDBase.h\"\n#include \"src/SVD/JacobiSVD.h\"\n#include \"src/SVD/BDCSVD.h\"\n#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)\n#ifdef EIGEN_USE_MKL\n#include \"mkl_lapacke.h\"\n#else\n#include \"src/misc/lapacke.h\"\n#endif\n#include \"src/SVD/JacobiSVD_LAPACKE.h\"\n#endif\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_SVD_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/Sparse",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_MODULE_H\n#define EIGEN_SPARSE_MODULE_H\n\n/** \\defgroup Sparse_Module Sparse meta-module\n  *\n  * Meta-module including all related modules:\n  * - \\ref SparseCore_Module\n  * - \\ref OrderingMethods_Module\n  * - \\ref SparseCholesky_Module\n  * - \\ref SparseLU_Module\n  * - \\ref SparseQR_Module\n  * - \\ref IterativeLinearSolvers_Module\n  *\n    \\code\n    #include <Eigen/Sparse>\n    \\endcode\n  */\n\n#include \"SparseCore\"\n#include \"OrderingMethods\"\n#include \"SparseCholesky\"\n#include \"SparseLU\"\n#include \"SparseQR\"\n#include \"IterativeLinearSolvers\"\n\n#endif // EIGEN_SPARSE_MODULE_H\n\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SparseCholesky",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2013 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSECHOLESKY_MODULE_H\n#define EIGEN_SPARSECHOLESKY_MODULE_H\n\n#include \"SparseCore\"\n#include \"OrderingMethods\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \n  * \\defgroup SparseCholesky_Module SparseCholesky module\n  *\n  * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.\n  * Those decompositions are accessible via the following classes:\n  *  - SimplicialLLt,\n  *  - SimplicialLDLt\n  *\n  * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.\n  *\n  * \\code\n  * #include <Eigen/SparseCholesky>\n  * \\endcode\n  */\n\n#include \"src/SparseCholesky/SimplicialCholesky.h\"\n#include \"src/SparseCholesky/SimplicialCholesky_impl.h\"\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_SPARSECHOLESKY_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SparseCore",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSECORE_MODULE_H\n#define EIGEN_SPARSECORE_MODULE_H\n\n#include \"Core\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n#include <vector>\n#include <map>\n#include <cstdlib>\n#include <cstring>\n#include <algorithm>\n\n/** \n  * \\defgroup SparseCore_Module SparseCore module\n  *\n  * This module provides a sparse matrix representation, and basic associated matrix manipulations\n  * and operations.\n  *\n  * See the \\ref TutorialSparse \"Sparse tutorial\"\n  *\n  * \\code\n  * #include <Eigen/SparseCore>\n  * \\endcode\n  *\n  * This module depends on: Core.\n  */\n\n#include \"src/SparseCore/SparseUtil.h\"\n#include \"src/SparseCore/SparseMatrixBase.h\"\n#include \"src/SparseCore/SparseAssign.h\"\n#include \"src/SparseCore/CompressedStorage.h\"\n#include \"src/SparseCore/AmbiVector.h\"\n#include \"src/SparseCore/SparseCompressedBase.h\"\n#include \"src/SparseCore/SparseMatrix.h\"\n#include \"src/SparseCore/SparseMap.h\"\n#include \"src/SparseCore/MappedSparseMatrix.h\"\n#include \"src/SparseCore/SparseVector.h\"\n#include \"src/SparseCore/SparseRef.h\"\n#include \"src/SparseCore/SparseCwiseUnaryOp.h\"\n#include \"src/SparseCore/SparseCwiseBinaryOp.h\"\n#include \"src/SparseCore/SparseTranspose.h\"\n#include \"src/SparseCore/SparseBlock.h\"\n#include \"src/SparseCore/SparseDot.h\"\n#include \"src/SparseCore/SparseRedux.h\"\n#include \"src/SparseCore/SparseView.h\"\n#include \"src/SparseCore/SparseDiagonalProduct.h\"\n#include \"src/SparseCore/ConservativeSparseSparseProduct.h\"\n#include \"src/SparseCore/SparseSparseProductWithPruning.h\"\n#include \"src/SparseCore/SparseProduct.h\"\n#include \"src/SparseCore/SparseDenseProduct.h\"\n#include \"src/SparseCore/SparseSelfAdjointView.h\"\n#include \"src/SparseCore/SparseTriangularView.h\"\n#include \"src/SparseCore/TriangularSolver.h\"\n#include \"src/SparseCore/SparsePermutation.h\"\n#include \"src/SparseCore/SparseFuzzy.h\"\n#include \"src/SparseCore/SparseSolverBase.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_SPARSECORE_MODULE_H\n\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SparseLU",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSELU_MODULE_H\n#define EIGEN_SPARSELU_MODULE_H\n\n#include \"SparseCore\"\n\n/** \n  * \\defgroup SparseLU_Module SparseLU module\n  * This module defines a supernodal factorization of general sparse matrices.\n  * The code is fully optimized for supernode-panel updates with specialized kernels.\n  * Please, see the documentation of the SparseLU class for more details.\n  */\n\n// Ordering interface\n#include \"OrderingMethods\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n#include \"src/SparseLU/SparseLU_gemm_kernel.h\"\n\n#include \"src/SparseLU/SparseLU_Structs.h\"\n#include \"src/SparseLU/SparseLU_SupernodalMatrix.h\"\n#include \"src/SparseLU/SparseLUImpl.h\"\n#include \"src/SparseCore/SparseColEtree.h\"\n#include \"src/SparseLU/SparseLU_Memory.h\"\n#include \"src/SparseLU/SparseLU_heap_relax_snode.h\"\n#include \"src/SparseLU/SparseLU_relax_snode.h\"\n#include \"src/SparseLU/SparseLU_pivotL.h\"\n#include \"src/SparseLU/SparseLU_panel_dfs.h\"\n#include \"src/SparseLU/SparseLU_kernel_bmod.h\"\n#include \"src/SparseLU/SparseLU_panel_bmod.h\"\n#include \"src/SparseLU/SparseLU_column_dfs.h\"\n#include \"src/SparseLU/SparseLU_column_bmod.h\"\n#include \"src/SparseLU/SparseLU_copy_to_ucol.h\"\n#include \"src/SparseLU/SparseLU_pruneL.h\"\n#include \"src/SparseLU/SparseLU_Utils.h\"\n#include \"src/SparseLU/SparseLU.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_SPARSELU_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SparseQR",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEQR_MODULE_H\n#define EIGEN_SPARSEQR_MODULE_H\n\n#include \"SparseCore\"\n#include \"OrderingMethods\"\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n/** \\defgroup SparseQR_Module SparseQR module\n  * \\brief Provides QR decomposition for sparse matrices\n  * \n  * This module provides a simplicial version of the left-looking Sparse QR decomposition. \n  * The columns of the input matrix should be reordered to limit the fill-in during the \n  * decomposition. Built-in methods (COLAMD, AMD) or external  methods (METIS) can be used to this end.\n  * See the \\link OrderingMethods_Module OrderingMethods\\endlink module for the list \n  * of built-in and external ordering methods.\n  * \n  * \\code\n  * #include <Eigen/SparseQR>\n  * \\endcode\n  * \n  * \n  */\n\n#include \"src/SparseCore/SparseColEtree.h\"\n#include \"src/SparseQR/SparseQR.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/StdDeque",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STDDEQUE_MODULE_H\n#define EIGEN_STDDEQUE_MODULE_H\n\n#include \"Core\"\n#include <deque>\n\n#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */\n\n#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)\n\n#else\n\n#include \"src/StlSupport/StdDeque.h\"\n\n#endif\n\n#endif // EIGEN_STDDEQUE_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/StdList",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STDLIST_MODULE_H\n#define EIGEN_STDLIST_MODULE_H\n\n#include \"Core\"\n#include <list>\n\n#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */\n\n#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...)\n\n#else\n\n#include \"src/StlSupport/StdList.h\"\n\n#endif\n\n#endif // EIGEN_STDLIST_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/StdVector",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STDVECTOR_MODULE_H\n#define EIGEN_STDVECTOR_MODULE_H\n\n#include \"Core\"\n#include <vector>\n\n#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */\n\n#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...)\n\n#else\n\n#include \"src/StlSupport/StdVector.h\"\n\n#endif\n\n#endif // EIGEN_STDVECTOR_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/SuperLUSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H\n#define EIGEN_SUPERLUSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\n#ifdef EMPTY\n#define EIGEN_EMPTY_WAS_ALREADY_DEFINED\n#endif\n\ntypedef int int_t;\n#include <slu_Cnames.h>\n#include <supermatrix.h>\n#include <slu_util.h>\n\n// slu_util.h defines a preprocessor token named EMPTY which is really polluting,\n// so we remove it in favor of a SUPERLU_EMPTY token.\n// If EMPTY was already defined then we don't undef it.\n\n#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)\n# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED\n#elif defined(EMPTY)\n# undef EMPTY\n#endif\n\n#define SUPERLU_EMPTY (-1)\n\nnamespace Eigen { struct SluMatrix; }\n\n/** \\ingroup Support_modules\n  * \\defgroup SuperLUSupport_Module SuperLUSupport module\n  *\n  * This module provides an interface to the <a href=\"http://crd-legacy.lbl.gov/~xiaoye/SuperLU/\">SuperLU</a> library.\n  * It provides the following factorization class:\n  * - class SuperLU: a supernodal sequential LU factorization.\n  * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods).\n  *\n  * \\warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported.\n  *\n  * \\warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.\n  *\n  * \\code\n  * #include <Eigen/SuperLUSupport>\n  * \\endcode\n  *\n  * In order to use this module, the superlu headers must be accessible from the include paths, and your binary must be linked to the superlu library and its dependencies.\n  * The dependencies depend on how superlu has been compiled.\n  * For a cmake based project, you can use our FindSuperLU.cmake module to help you in this task.\n  *\n  */\n\n#include \"src/SuperLUSupport/SuperLUSupport.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_SUPERLUSUPPORT_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/UmfPackSupport",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H\n#define EIGEN_UMFPACKSUPPORT_MODULE_H\n\n#include \"SparseCore\"\n\n#include \"src/Core/util/DisableStupidWarnings.h\"\n\nextern \"C\" {\n#include <umfpack.h>\n}\n\n/** \\ingroup Support_modules\n  * \\defgroup UmfPackSupport_Module UmfPackSupport module\n  *\n  * This module provides an interface to the UmfPack library which is part of the <a href=\"http://www.suitesparse.com\">suitesparse</a> package.\n  * It provides the following factorization class:\n  * - class UmfPackLU: a multifrontal sequential LU factorization.\n  *\n  * \\code\n  * #include <Eigen/UmfPackSupport>\n  * \\endcode\n  *\n  * In order to use this module, the umfpack headers must be accessible from the include paths, and your binary must be linked to the umfpack library and its dependencies.\n  * The dependencies depend on how umfpack has been compiled.\n  * For a cmake based project, you can use our FindUmfPack.cmake module to help you in this task.\n  *\n  */\n\n#include \"src/UmfPackSupport/UmfPackSupport.h\"\n\n#include \"src/Core/util/ReenableStupidWarnings.h\"\n\n#endif // EIGEN_UMFPACKSUPPORT_MODULE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Cholesky/LDLT.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_LDLT_H\n#define EIGEN_LDLT_H\n\nnamespace Eigen {\n\nnamespace internal {\n  template<typename _MatrixType, int _UpLo> struct traits<LDLT<_MatrixType, _UpLo> >\n   : traits<_MatrixType>\n  {\n    typedef MatrixXpr XprKind;\n    typedef SolverStorage StorageKind;\n    typedef int StorageIndex;\n    enum { Flags = 0 };\n  };\n\n  template<typename MatrixType, int UpLo> struct LDLT_Traits;\n\n  // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef\n  enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };\n}\n\n/** \\ingroup Cholesky_Module\n  *\n  * \\class LDLT\n  *\n  * \\brief Robust Cholesky decomposition of a matrix with pivoting\n  *\n  * \\tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition\n  * \\tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.\n  *             The other triangular part won't be read.\n  *\n  * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite\n  * matrix \\f$ A \\f$ such that \\f$ A =  P^TLDL^*P \\f$, where P is a permutation matrix, L\n  * is lower triangular with a unit diagonal and D is a diagonal matrix.\n  *\n  * The decomposition uses pivoting to ensure stability, so that D will have\n  * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root\n  * on D also stabilizes the computation.\n  *\n  * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky\n  * decomposition to determine whether a system of equations has a solution.\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  *\n  * \\sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT\n  */\ntemplate<typename _MatrixType, int _UpLo> class LDLT\n        : public SolverBase<LDLT<_MatrixType, _UpLo> >\n{\n  public:\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<LDLT> Base;\n    friend class SolverBase<LDLT>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(LDLT)\n    enum {\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n      UpLo = _UpLo\n    };\n    typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;\n\n    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;\n    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;\n\n    typedef internal::LDLT_Traits<MatrixType,UpLo> Traits;\n\n    /** \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via LDLT::compute(const MatrixType&).\n      */\n    LDLT()\n      : m_matrix(),\n        m_transpositions(),\n        m_sign(internal::ZeroSign),\n        m_isInitialized(false)\n    {}\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa LDLT()\n      */\n    explicit LDLT(Index size)\n      : m_matrix(size, size),\n        m_transpositions(size),\n        m_temporary(size),\n        m_sign(internal::ZeroSign),\n        m_isInitialized(false)\n    {}\n\n    /** \\brief Constructor with decomposition\n      *\n      * This calculates the decomposition for the input \\a matrix.\n      *\n      * \\sa LDLT(Index size)\n      */\n    template<typename InputType>\n    explicit LDLT(const EigenBase<InputType>& matrix)\n      : m_matrix(matrix.rows(), matrix.cols()),\n        m_transpositions(matrix.rows()),\n        m_temporary(matrix.rows()),\n        m_sign(internal::ZeroSign),\n        m_isInitialized(false)\n    {\n      compute(matrix.derived());\n    }\n\n    /** \\brief Constructs a LDLT factorization from a given matrix\n      *\n      * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when \\c MatrixType is a Eigen::Ref.\n      *\n      * \\sa LDLT(const EigenBase&)\n      */\n    template<typename InputType>\n    explicit LDLT(EigenBase<InputType>& matrix)\n      : m_matrix(matrix.derived()),\n        m_transpositions(matrix.rows()),\n        m_temporary(matrix.rows()),\n        m_sign(internal::ZeroSign),\n        m_isInitialized(false)\n    {\n      compute(matrix.derived());\n    }\n\n    /** Clear any existing decomposition\n     * \\sa rankUpdate(w,sigma)\n     */\n    void setZero()\n    {\n      m_isInitialized = false;\n    }\n\n    /** \\returns a view of the upper triangular matrix U */\n    inline typename Traits::MatrixU matrixU() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return Traits::getU(m_matrix);\n    }\n\n    /** \\returns a view of the lower triangular matrix L */\n    inline typename Traits::MatrixL matrixL() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return Traits::getL(m_matrix);\n    }\n\n    /** \\returns the permutation matrix P as a transposition sequence.\n      */\n    inline const TranspositionType& transpositionsP() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return m_transpositions;\n    }\n\n    /** \\returns the coefficients of the diagonal matrix D */\n    inline Diagonal<const MatrixType> vectorD() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return m_matrix.diagonal();\n    }\n\n    /** \\returns true if the matrix is positive (semidefinite) */\n    inline bool isPositive() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;\n    }\n\n    /** \\returns true if the matrix is negative (semidefinite) */\n    inline bool isNegative(void) const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** \\returns a solution x of \\f$ A x = b \\f$ using the current decomposition of A.\n      *\n      * This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .\n      *\n      * \\note_about_checking_solutions\n      *\n      * More precisely, this method solves \\f$ A x = b \\f$ using the decomposition \\f$ A = P^T L D L^* P \\f$\n      * by solving the systems \\f$ P^T y_1 = b \\f$, \\f$ L y_2 = y_1 \\f$, \\f$ D y_3 = y_2 \\f$,\n      * \\f$ L^* y_4 = y_3 \\f$ and \\f$ P x = y_4 \\f$ in succession. If the matrix \\f$ A \\f$ is singular, then\n      * \\f$ D \\f$ will also be singular (all the other matrices are invertible). In that case, the\n      * least-square solution of \\f$ D y_3 = y_2 \\f$ is computed. This does not mean that this function\n      * computes the least-square solution of \\f$ A x = b \\f$ if \\f$ A \\f$ is singular.\n      *\n      * \\sa MatrixBase::ldlt(), SelfAdjointView::ldlt()\n      */\n    template<typename Rhs>\n    inline const Solve<LDLT, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    template<typename Derived>\n    bool solveInPlace(MatrixBase<Derived> &bAndX) const;\n\n    template<typename InputType>\n    LDLT& compute(const EigenBase<InputType>& matrix);\n\n    /** \\returns an estimate of the reciprocal condition number of the matrix of\n     *  which \\c *this is the LDLT decomposition.\n     */\n    RealScalar rcond() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return internal::rcond_estimate_helper(m_l1_norm, *this);\n    }\n\n    template <typename Derived>\n    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);\n\n    /** \\returns the internal LDLT decomposition matrix\n      *\n      * TODO: document the storage layout\n      */\n    inline const MatrixType& matrixLDLT() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return m_matrix;\n    }\n\n    MatrixType reconstructedMatrix() const;\n\n    /** \\returns the adjoint of \\c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.\n      *\n      * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:\n      * \\code x = decomposition.adjoint().solve(b) \\endcode\n      */\n    const LDLT& adjoint() const { return *this; };\n\n    EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the factorization failed because of a zero pivot.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n      return m_info;\n    }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n    #endif\n\n  protected:\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    /** \\internal\n      * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.\n      * The strict upper part is used during the decomposition, the strict lower\n      * part correspond to the coefficients of L (its diagonal is equal to 1 and\n      * is not stored), and the diagonal entries correspond to D.\n      */\n    MatrixType m_matrix;\n    RealScalar m_l1_norm;\n    TranspositionType m_transpositions;\n    TmpMatrixType m_temporary;\n    internal::SignMatrix m_sign;\n    bool m_isInitialized;\n    ComputationInfo m_info;\n};\n\nnamespace internal {\n\ntemplate<int UpLo> struct ldlt_inplace;\n\ntemplate<> struct ldlt_inplace<Lower>\n{\n  template<typename MatrixType, typename TranspositionType, typename Workspace>\n  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)\n  {\n    using std::abs;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename TranspositionType::StorageIndex IndexType;\n    eigen_assert(mat.rows()==mat.cols());\n    const Index size = mat.rows();\n    bool found_zero_pivot = false;\n    bool ret = true;\n\n    if (size <= 1)\n    {\n      transpositions.setIdentity();\n      if(size==0) sign = ZeroSign;\n      else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;\n      else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;\n      else sign = ZeroSign;\n      return true;\n    }\n\n    for (Index k = 0; k < size; ++k)\n    {\n      // Find largest diagonal element\n      Index index_of_biggest_in_corner;\n      mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);\n      index_of_biggest_in_corner += k;\n\n      transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);\n      if(k != index_of_biggest_in_corner)\n      {\n        // apply the transposition while taking care to consider only\n        // the lower triangular part\n        Index s = size-index_of_biggest_in_corner-1; // trailing size after the biggest element\n        mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));\n        mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));\n        std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));\n        for(Index i=k+1;i<index_of_biggest_in_corner;++i)\n        {\n          Scalar tmp = mat.coeffRef(i,k);\n          mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));\n          mat.coeffRef(index_of_biggest_in_corner,i) = numext::conj(tmp);\n        }\n        if(NumTraits<Scalar>::IsComplex)\n          mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k));\n      }\n\n      // partition the matrix:\n      //       A00 |  -  |  -\n      // lu  = A10 | A11 |  -\n      //       A20 | A21 | A22\n      Index rs = size - k - 1;\n      Block<MatrixType,Dynamic,1> A21(mat,k+1,k,rs,1);\n      Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);\n      Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);\n\n      if(k>0)\n      {\n        temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint();\n        mat.coeffRef(k,k) -= (A10 * temp.head(k)).value();\n        if(rs>0)\n          A21.noalias() -= A20 * temp.head(k);\n      }\n\n      // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot\n      // was smaller than the cutoff value. However, since LDLT is not rank-revealing\n      // we should only make sure that we do not introduce INF or NaN values.\n      // Remark that LAPACK also uses 0 as the cutoff value.\n      RealScalar realAkk = numext::real(mat.coeffRef(k,k));\n      bool pivot_is_valid = (abs(realAkk) > RealScalar(0));\n\n      if(k==0 && !pivot_is_valid)\n      {\n        // The entire diagonal is zero, there is nothing more to do\n        // except filling the transpositions, and checking whether the matrix is zero.\n        sign = ZeroSign;\n        for(Index j = 0; j<size; ++j)\n        {\n          transpositions.coeffRef(j) = IndexType(j);\n          ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();\n        }\n        return ret;\n      }\n\n      if((rs>0) && pivot_is_valid)\n        A21 /= realAkk;\n      else if(rs>0)\n        ret = ret && (A21.array()==Scalar(0)).all();\n\n      if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed\n      else if(!pivot_is_valid) found_zero_pivot = true;\n\n      if (sign == PositiveSemiDef) {\n        if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;\n      } else if (sign == NegativeSemiDef) {\n        if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;\n      } else if (sign == ZeroSign) {\n        if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;\n        else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;\n      }\n    }\n\n    return ret;\n  }\n\n  // Reference for the algorithm: Davis and Hager, \"Multiple Rank\n  // Modifications of a Sparse Cholesky Factorization\" (Algorithm 1)\n  // Trivial rearrangements of their computations (Timothy E. Holy)\n  // allow their algorithm to work for rank-1 updates even if the\n  // original matrix is not of full rank.\n  // Here only rank-1 updates are implemented, to reduce the\n  // requirement for intermediate storage and improve accuracy\n  template<typename MatrixType, typename WDerived>\n  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)\n  {\n    using numext::isfinite;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n\n    const Index size = mat.rows();\n    eigen_assert(mat.cols() == size && w.size()==size);\n\n    RealScalar alpha = 1;\n\n    // Apply the update\n    for (Index j = 0; j < size; j++)\n    {\n      // Check for termination due to an original decomposition of low-rank\n      if (!(isfinite)(alpha))\n        break;\n\n      // Update the diagonal terms\n      RealScalar dj = numext::real(mat.coeff(j,j));\n      Scalar wj = w.coeff(j);\n      RealScalar swj2 = sigma*numext::abs2(wj);\n      RealScalar gamma = dj*alpha + swj2;\n\n      mat.coeffRef(j,j) += swj2/alpha;\n      alpha += swj2/dj;\n\n\n      // Update the terms of L\n      Index rs = size-j-1;\n      w.tail(rs) -= wj * mat.col(j).tail(rs);\n      if(gamma != 0)\n        mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs);\n    }\n    return true;\n  }\n\n  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>\n  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)\n  {\n    // Apply the permutation to the input w\n    tmp = transpositions * w;\n\n    return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);\n  }\n};\n\ntemplate<> struct ldlt_inplace<Upper>\n{\n  template<typename MatrixType, typename TranspositionType, typename Workspace>\n  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)\n  {\n    Transpose<MatrixType> matt(mat);\n    return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);\n  }\n\n  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>\n  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)\n  {\n    Transpose<MatrixType> matt(mat);\n    return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);\n  }\n};\n\ntemplate<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>\n{\n  typedef const TriangularView<const MatrixType, UnitLower> MatrixL;\n  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;\n  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }\n  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }\n};\n\ntemplate<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>\n{\n  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;\n  typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;\n  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }\n  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }\n};\n\n} // end namespace internal\n\n/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \\a matrix\n  */\ntemplate<typename MatrixType, int _UpLo>\ntemplate<typename InputType>\nLDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)\n{\n  check_template_parameters();\n\n  eigen_assert(a.rows()==a.cols());\n  const Index size = a.rows();\n\n  m_matrix = a.derived();\n\n  // Compute matrix L1 norm = max abs column sum.\n  m_l1_norm = RealScalar(0);\n  // TODO move this code to SelfAdjointView\n  for (Index col = 0; col < size; ++col) {\n    RealScalar abs_col_sum;\n    if (_UpLo == Lower)\n      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();\n    else\n      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();\n    if (abs_col_sum > m_l1_norm)\n      m_l1_norm = abs_col_sum;\n  }\n\n  m_transpositions.resize(size);\n  m_isInitialized = false;\n  m_temporary.resize(size);\n  m_sign = internal::ZeroSign;\n\n  m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;\n\n  m_isInitialized = true;\n  return *this;\n}\n\n/** Update the LDLT decomposition:  given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.\n * \\param w a vector to be incorporated into the decomposition.\n * \\param sigma a scalar, +1 for updates and -1 for \"downdates,\" which correspond to removing previously-added column vectors. Optional; default value is +1.\n * \\sa setZero()\n  */\ntemplate<typename MatrixType, int _UpLo>\ntemplate<typename Derived>\nLDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)\n{\n  typedef typename TranspositionType::StorageIndex IndexType;\n  const Index size = w.rows();\n  if (m_isInitialized)\n  {\n    eigen_assert(m_matrix.rows()==size);\n  }\n  else\n  {\n    m_matrix.resize(size,size);\n    m_matrix.setZero();\n    m_transpositions.resize(size);\n    for (Index i = 0; i < size; i++)\n      m_transpositions.coeffRef(i) = IndexType(i);\n    m_temporary.resize(size);\n    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;\n    m_isInitialized = true;\n  }\n\n  internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);\n\n  return *this;\n}\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename _MatrixType, int _UpLo>\ntemplate<typename RhsType, typename DstType>\nvoid LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  _solve_impl_transposed<true>(rhs, dst);\n}\n\ntemplate<typename _MatrixType,int _UpLo>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid LDLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  // dst = P b\n  dst = m_transpositions * rhs;\n\n  // dst = L^-1 (P b)\n  // dst = L^-*T (P b)\n  matrixL().template conjugateIf<!Conjugate>().solveInPlace(dst);\n\n  // dst = D^-* (L^-1 P b)\n  // dst = D^-1 (L^-*T P b)\n  // more precisely, use pseudo-inverse of D (see bug 241)\n  using std::abs;\n  const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());\n  // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())\n  // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:\n  // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());\n  // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest\n  // diagonal element is not well justified and leads to numerical issues in some cases.\n  // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.\n  // Using numeric_limits::min() gives us more robustness to denormals.\n  RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();\n  for (Index i = 0; i < vecD.size(); ++i)\n  {\n    if(abs(vecD(i)) > tolerance)\n      dst.row(i) /= vecD(i);\n    else\n      dst.row(i).setZero();\n  }\n\n  // dst = L^-* (D^-* L^-1 P b)\n  // dst = L^-T (D^-1 L^-*T P b)\n  matrixL().transpose().template conjugateIf<Conjugate>().solveInPlace(dst);\n\n  // dst = P^T (L^-* D^-* L^-1 P b) = A^-1 b\n  // dst = P^-T (L^-T D^-1 L^-*T P b) = A^-1 b\n  dst = m_transpositions.transpose() * dst;\n}\n#endif\n\n/** \\internal use x = ldlt_object.solve(x);\n  *\n  * This is the \\em in-place version of solve().\n  *\n  * \\param bAndX represents both the right-hand side matrix b and result x.\n  *\n  * \\returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.\n  *\n  * This version avoids a copy when the right hand side matrix b is not\n  * needed anymore.\n  *\n  * \\sa LDLT::solve(), MatrixBase::ldlt()\n  */\ntemplate<typename MatrixType,int _UpLo>\ntemplate<typename Derived>\nbool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const\n{\n  eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n  eigen_assert(m_matrix.rows() == bAndX.rows());\n\n  bAndX = this->solve(bAndX);\n\n  return true;\n}\n\n/** \\returns the matrix represented by the decomposition,\n * i.e., it returns the product: P^T L D L^* P.\n * This function is provided for debug purpose. */\ntemplate<typename MatrixType, int _UpLo>\nMatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const\n{\n  eigen_assert(m_isInitialized && \"LDLT is not initialized.\");\n  const Index size = m_matrix.rows();\n  MatrixType res(size,size);\n\n  // P\n  res.setIdentity();\n  res = transpositionsP() * res;\n  // L^* P\n  res = matrixU() * res;\n  // D(L^*P)\n  res = vectorD().real().asDiagonal() * res;\n  // L(DL^*P)\n  res = matrixL() * res;\n  // P^T (LDL^*P)\n  res = transpositionsP().transpose() * res;\n\n  return res;\n}\n\n/** \\cholesky_module\n  * \\returns the Cholesky decomposition with full pivoting without square root of \\c *this\n  * \\sa MatrixBase::ldlt()\n  */\ntemplate<typename MatrixType, unsigned int UpLo>\ninline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>\nSelfAdjointView<MatrixType, UpLo>::ldlt() const\n{\n  return LDLT<PlainObject,UpLo>(m_matrix);\n}\n\n/** \\cholesky_module\n  * \\returns the Cholesky decomposition with full pivoting without square root of \\c *this\n  * \\sa SelfAdjointView::ldlt()\n  */\ntemplate<typename Derived>\ninline const LDLT<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::ldlt() const\n{\n  return LDLT<PlainObject>(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_LDLT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Cholesky/LLT.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_LLT_H\n#define EIGEN_LLT_H\n\nnamespace Eigen {\n\nnamespace internal{\n\ntemplate<typename _MatrixType, int _UpLo> struct traits<LLT<_MatrixType, _UpLo> >\n : traits<_MatrixType>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n\ntemplate<typename MatrixType, int UpLo> struct LLT_Traits;\n}\n\n/** \\ingroup Cholesky_Module\n  *\n  * \\class LLT\n  *\n  * \\brief Standard Cholesky decomposition (LL^T) of a matrix and associated features\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition\n  * \\tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.\n  *               The other triangular part won't be read.\n  *\n  * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite\n  * matrix A such that A = LL^* = U^*U, where L is lower triangular.\n  *\n  * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like  D^*D x = b,\n  * for that purpose, we recommend the Cholesky decomposition without square root which is more stable\n  * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other\n  * situations like generalised eigen problems with hermitian matrices.\n  *\n  * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices,\n  * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations\n  * has a solution.\n  *\n  * Example: \\include LLT_example.cpp\n  * Output: \\verbinclude LLT_example.out\n  *\n  * \\b Performance: for best performance, it is recommended to use a column-major storage format\n  * with the Lower triangular part (the default), or, equivalently, a row-major storage format\n  * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization\n  * step, and rank-updates can be up to 3 times slower.\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  *\n  * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.\n  * Therefore, the strict lower part does not have to store correct values.\n  *\n  * \\sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT\n  */\ntemplate<typename _MatrixType, int _UpLo> class LLT\n        : public SolverBase<LLT<_MatrixType, _UpLo> >\n{\n  public:\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<LLT> Base;\n    friend class SolverBase<LLT>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(LLT)\n    enum {\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n    enum {\n      PacketSize = internal::packet_traits<Scalar>::size,\n      AlignmentMask = int(PacketSize)-1,\n      UpLo = _UpLo\n    };\n\n    typedef internal::LLT_Traits<MatrixType,UpLo> Traits;\n\n    /**\n      * \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via LLT::compute(const MatrixType&).\n      */\n    LLT() : m_matrix(), m_isInitialized(false) {}\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa LLT()\n      */\n    explicit LLT(Index size) : m_matrix(size, size),\n                    m_isInitialized(false) {}\n\n    template<typename InputType>\n    explicit LLT(const EigenBase<InputType>& matrix)\n      : m_matrix(matrix.rows(), matrix.cols()),\n        m_isInitialized(false)\n    {\n      compute(matrix.derived());\n    }\n\n    /** \\brief Constructs a LLT factorization from a given matrix\n      *\n      * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when\n      * \\c MatrixType is a Eigen::Ref.\n      *\n      * \\sa LLT(const EigenBase&)\n      */\n    template<typename InputType>\n    explicit LLT(EigenBase<InputType>& matrix)\n      : m_matrix(matrix.derived()),\n        m_isInitialized(false)\n    {\n      compute(matrix.derived());\n    }\n\n    /** \\returns a view of the upper triangular matrix U */\n    inline typename Traits::MatrixU matrixU() const\n    {\n      eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n      return Traits::getU(m_matrix);\n    }\n\n    /** \\returns a view of the lower triangular matrix L */\n    inline typename Traits::MatrixL matrixL() const\n    {\n      eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n      return Traits::getL(m_matrix);\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** \\returns the solution x of \\f$ A x = b \\f$ using the current decomposition of A.\n      *\n      * Since this LLT class assumes anyway that the matrix A is invertible, the solution\n      * theoretically exists and is unique regardless of b.\n      *\n      * Example: \\include LLT_solve.cpp\n      * Output: \\verbinclude LLT_solve.out\n      *\n      * \\sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()\n      */\n    template<typename Rhs>\n    inline const Solve<LLT, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    template<typename Derived>\n    void solveInPlace(const MatrixBase<Derived> &bAndX) const;\n\n    template<typename InputType>\n    LLT& compute(const EigenBase<InputType>& matrix);\n\n    /** \\returns an estimate of the reciprocal condition number of the matrix of\n      *  which \\c *this is the Cholesky decomposition.\n      */\n    RealScalar rcond() const\n    {\n      eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n      eigen_assert(m_info == Success && \"LLT failed because matrix appears to be negative\");\n      return internal::rcond_estimate_helper(m_l1_norm, *this);\n    }\n\n    /** \\returns the LLT decomposition matrix\n      *\n      * TODO: document the storage layout\n      */\n    inline const MatrixType& matrixLLT() const\n    {\n      eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n      return m_matrix;\n    }\n\n    MatrixType reconstructedMatrix() const;\n\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears not to be positive definite.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n      return m_info;\n    }\n\n    /** \\returns the adjoint of \\c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.\n      *\n      * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:\n      * \\code x = decomposition.adjoint().solve(b) \\endcode\n      */\n    const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; };\n\n    inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n    template<typename VectorType>\n    LLT & rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n    #endif\n\n  protected:\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    /** \\internal\n      * Used to compute and store L\n      * The strict upper part is not used and even not initialized.\n      */\n    MatrixType m_matrix;\n    RealScalar m_l1_norm;\n    bool m_isInitialized;\n    ComputationInfo m_info;\n};\n\nnamespace internal {\n\ntemplate<typename Scalar, int UpLo> struct llt_inplace;\n\ntemplate<typename MatrixType, typename VectorType>\nstatic Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)\n{\n  using std::sqrt;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  typedef typename MatrixType::ColXpr ColXpr;\n  typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;\n  typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;\n  typedef Matrix<Scalar,Dynamic,1> TempVectorType;\n  typedef typename TempVectorType::SegmentReturnType TempVecSegment;\n\n  Index n = mat.cols();\n  eigen_assert(mat.rows()==n && vec.size()==n);\n\n  TempVectorType temp;\n\n  if(sigma>0)\n  {\n    // This version is based on Givens rotations.\n    // It is faster than the other one below, but only works for updates,\n    // i.e., for sigma > 0\n    temp = sqrt(sigma) * vec;\n\n    for(Index i=0; i<n; ++i)\n    {\n      JacobiRotation<Scalar> g;\n      g.makeGivens(mat(i,i), -temp(i), &mat(i,i));\n\n      Index rs = n-i-1;\n      if(rs>0)\n      {\n        ColXprSegment x(mat.col(i).tail(rs));\n        TempVecSegment y(temp.tail(rs));\n        apply_rotation_in_the_plane(x, y, g);\n      }\n    }\n  }\n  else\n  {\n    temp = vec;\n    RealScalar beta = 1;\n    for(Index j=0; j<n; ++j)\n    {\n      RealScalar Ljj = numext::real(mat.coeff(j,j));\n      RealScalar dj = numext::abs2(Ljj);\n      Scalar wj = temp.coeff(j);\n      RealScalar swj2 = sigma*numext::abs2(wj);\n      RealScalar gamma = dj*beta + swj2;\n\n      RealScalar x = dj + swj2/beta;\n      if (x<=RealScalar(0))\n        return j;\n      RealScalar nLjj = sqrt(x);\n      mat.coeffRef(j,j) = nLjj;\n      beta += swj2/dj;\n\n      // Update the terms of L\n      Index rs = n-j-1;\n      if(rs)\n      {\n        temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);\n        if(gamma != 0)\n          mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs);\n      }\n    }\n  }\n  return -1;\n}\n\ntemplate<typename Scalar> struct llt_inplace<Scalar, Lower>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  template<typename MatrixType>\n  static Index unblocked(MatrixType& mat)\n  {\n    using std::sqrt;\n\n    eigen_assert(mat.rows()==mat.cols());\n    const Index size = mat.rows();\n    for(Index k = 0; k < size; ++k)\n    {\n      Index rs = size-k-1; // remaining size\n\n      Block<MatrixType,Dynamic,1> A21(mat,k+1,k,rs,1);\n      Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);\n      Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);\n\n      RealScalar x = numext::real(mat.coeff(k,k));\n      if (k>0) x -= A10.squaredNorm();\n      if (x<=RealScalar(0))\n        return k;\n      mat.coeffRef(k,k) = x = sqrt(x);\n      if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();\n      if (rs>0) A21 /= x;\n    }\n    return -1;\n  }\n\n  template<typename MatrixType>\n  static Index blocked(MatrixType& m)\n  {\n    eigen_assert(m.rows()==m.cols());\n    Index size = m.rows();\n    if(size<32)\n      return unblocked(m);\n\n    Index blockSize = size/8;\n    blockSize = (blockSize/16)*16;\n    blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));\n\n    for (Index k=0; k<size; k+=blockSize)\n    {\n      // partition the matrix:\n      //       A00 |  -  |  -\n      // lu  = A10 | A11 |  -\n      //       A20 | A21 | A22\n      Index bs = (std::min)(blockSize, size-k);\n      Index rs = size - k - bs;\n      Block<MatrixType,Dynamic,Dynamic> A11(m,k,   k,   bs,bs);\n      Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k,   rs,bs);\n      Block<MatrixType,Dynamic,Dynamic> A22(m,k+bs,k+bs,rs,rs);\n\n      Index ret;\n      if((ret=unblocked(A11))>=0) return k+ret;\n      if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);\n      if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck\n    }\n    return -1;\n  }\n\n  template<typename MatrixType, typename VectorType>\n  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)\n  {\n    return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);\n  }\n};\n\ntemplate<typename Scalar> struct llt_inplace<Scalar, Upper>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n\n  template<typename MatrixType>\n  static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat)\n  {\n    Transpose<MatrixType> matt(mat);\n    return llt_inplace<Scalar, Lower>::unblocked(matt);\n  }\n  template<typename MatrixType>\n  static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat)\n  {\n    Transpose<MatrixType> matt(mat);\n    return llt_inplace<Scalar, Lower>::blocked(matt);\n  }\n  template<typename MatrixType, typename VectorType>\n  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)\n  {\n    Transpose<MatrixType> matt(mat);\n    return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);\n  }\n};\n\ntemplate<typename MatrixType> struct LLT_Traits<MatrixType,Lower>\n{\n  typedef const TriangularView<const MatrixType, Lower> MatrixL;\n  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;\n  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }\n  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }\n  static bool inplace_decomposition(MatrixType& m)\n  { return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }\n};\n\ntemplate<typename MatrixType> struct LLT_Traits<MatrixType,Upper>\n{\n  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;\n  typedef const TriangularView<const MatrixType, Upper> MatrixU;\n  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }\n  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }\n  static bool inplace_decomposition(MatrixType& m)\n  { return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }\n};\n\n} // end namespace internal\n\n/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \\a matrix\n  *\n  * \\returns a reference to *this\n  *\n  * Example: \\include TutorialLinAlgComputeTwice.cpp\n  * Output: \\verbinclude TutorialLinAlgComputeTwice.out\n  */\ntemplate<typename MatrixType, int _UpLo>\ntemplate<typename InputType>\nLLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)\n{\n  check_template_parameters();\n\n  eigen_assert(a.rows()==a.cols());\n  const Index size = a.rows();\n  m_matrix.resize(size, size);\n  if (!internal::is_same_dense(m_matrix, a.derived()))\n    m_matrix = a.derived();\n\n  // Compute matrix L1 norm = max abs column sum.\n  m_l1_norm = RealScalar(0);\n  // TODO move this code to SelfAdjointView\n  for (Index col = 0; col < size; ++col) {\n    RealScalar abs_col_sum;\n    if (_UpLo == Lower)\n      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();\n    else\n      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();\n    if (abs_col_sum > m_l1_norm)\n      m_l1_norm = abs_col_sum;\n  }\n\n  m_isInitialized = true;\n  bool ok = Traits::inplace_decomposition(m_matrix);\n  m_info = ok ? Success : NumericalIssue;\n\n  return *this;\n}\n\n/** Performs a rank one update (or dowdate) of the current decomposition.\n  * If A = LL^* before the rank one update,\n  * then after it we have LL^* = A + sigma * v v^* where \\a v must be a vector\n  * of same dimension.\n  */\ntemplate<typename _MatrixType, int _UpLo>\ntemplate<typename VectorType>\nLLT<_MatrixType,_UpLo> & LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);\n  eigen_assert(v.size()==m_matrix.cols());\n  eigen_assert(m_isInitialized);\n  if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)\n    m_info = NumericalIssue;\n  else\n    m_info = Success;\n\n  return *this;\n}\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename _MatrixType,int _UpLo>\ntemplate<typename RhsType, typename DstType>\nvoid LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  _solve_impl_transposed<true>(rhs, dst);\n}\n\ntemplate<typename _MatrixType,int _UpLo>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid LLT<_MatrixType,_UpLo>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n    dst = rhs;\n\n    matrixL().template conjugateIf<!Conjugate>().solveInPlace(dst);\n    matrixU().template conjugateIf<!Conjugate>().solveInPlace(dst);\n}\n#endif\n\n/** \\internal use x = llt_object.solve(x);\n  *\n  * This is the \\em in-place version of solve().\n  *\n  * \\param bAndX represents both the right-hand side matrix b and result x.\n  *\n  * This version avoids a copy when the right hand side matrix b is not needed anymore.\n  *\n  * \\warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.\n  * This function will const_cast it, so constness isn't honored here.\n  *\n  * \\sa LLT::solve(), MatrixBase::llt()\n  */\ntemplate<typename MatrixType, int _UpLo>\ntemplate<typename Derived>\nvoid LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const\n{\n  eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n  eigen_assert(m_matrix.rows()==bAndX.rows());\n  matrixL().solveInPlace(bAndX);\n  matrixU().solveInPlace(bAndX);\n}\n\n/** \\returns the matrix represented by the decomposition,\n * i.e., it returns the product: L L^*.\n * This function is provided for debug purpose. */\ntemplate<typename MatrixType, int _UpLo>\nMatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const\n{\n  eigen_assert(m_isInitialized && \"LLT is not initialized.\");\n  return matrixL() * matrixL().adjoint().toDenseMatrix();\n}\n\n/** \\cholesky_module\n  * \\returns the LLT decomposition of \\c *this\n  * \\sa SelfAdjointView::llt()\n  */\ntemplate<typename Derived>\ninline const LLT<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::llt() const\n{\n  return LLT<PlainObject>(derived());\n}\n\n/** \\cholesky_module\n  * \\returns the LLT decomposition of \\c *this\n  * \\sa SelfAdjointView::llt()\n  */\ntemplate<typename MatrixType, unsigned int UpLo>\ninline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>\nSelfAdjointView<MatrixType, UpLo>::llt() const\n{\n  return LLT<PlainObject,UpLo>(m_matrix);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_LLT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Cholesky/LLT_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *     LLt decomposition based on LAPACKE_?potrf function.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_LLT_LAPACKE_H\n#define EIGEN_LLT_LAPACKE_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Scalar> struct lapacke_llt;\n\n#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \\\ntemplate<> struct lapacke_llt<EIGTYPE> \\\n{ \\\n  template<typename MatrixType> \\\n  static inline Index potrf(MatrixType& m, char uplo) \\\n  { \\\n    lapack_int matrix_order; \\\n    lapack_int size, lda, info, StorageOrder; \\\n    EIGTYPE* a; \\\n    eigen_assert(m.rows()==m.cols()); \\\n    /* Set up parameters for ?potrf */ \\\n    size = convert_index<lapack_int>(m.rows()); \\\n    StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \\\n    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \\\n    a = &(m.coeffRef(0,0)); \\\n    lda = convert_index<lapack_int>(m.outerStride()); \\\n\\\n    info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \\\n    info = (info==0) ? -1 : info>0 ? info-1 : size; \\\n    return info; \\\n  } \\\n}; \\\ntemplate<> struct llt_inplace<EIGTYPE, Lower> \\\n{ \\\n  template<typename MatrixType> \\\n  static Index blocked(MatrixType& m) \\\n  { \\\n    return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \\\n  } \\\n  template<typename MatrixType, typename VectorType> \\\n  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \\\n  { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \\\n}; \\\ntemplate<> struct llt_inplace<EIGTYPE, Upper> \\\n{ \\\n  template<typename MatrixType> \\\n  static Index blocked(MatrixType& m) \\\n  { \\\n    return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \\\n  } \\\n  template<typename MatrixType, typename VectorType> \\\n  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \\\n  { \\\n    Transpose<MatrixType> matt(mat); \\\n    return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \\\n  } \\\n};\n\nEIGEN_LAPACKE_LLT(double, double, d)\nEIGEN_LAPACKE_LLT(float, float, s)\nEIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)\nEIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_LLT_LAPACKE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/CholmodSupport/CholmodSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CHOLMODSUPPORT_H\n#define EIGEN_CHOLMODSUPPORT_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Scalar> struct cholmod_configure_matrix;\n\ntemplate<> struct cholmod_configure_matrix<double> {\n  template<typename CholmodType>\n  static void run(CholmodType& mat) {\n    mat.xtype = CHOLMOD_REAL;\n    mat.dtype = CHOLMOD_DOUBLE;\n  }\n};\n\ntemplate<> struct cholmod_configure_matrix<std::complex<double> > {\n  template<typename CholmodType>\n  static void run(CholmodType& mat) {\n    mat.xtype = CHOLMOD_COMPLEX;\n    mat.dtype = CHOLMOD_DOUBLE;\n  }\n};\n\n// Other scalar types are not yet supported by Cholmod\n// template<> struct cholmod_configure_matrix<float> {\n//   template<typename CholmodType>\n//   static void run(CholmodType& mat) {\n//     mat.xtype = CHOLMOD_REAL;\n//     mat.dtype = CHOLMOD_SINGLE;\n//   }\n// };\n//\n// template<> struct cholmod_configure_matrix<std::complex<float> > {\n//   template<typename CholmodType>\n//   static void run(CholmodType& mat) {\n//     mat.xtype = CHOLMOD_COMPLEX;\n//     mat.dtype = CHOLMOD_SINGLE;\n//   }\n// };\n\n} // namespace internal\n\n/** Wraps the Eigen sparse matrix \\a mat into a Cholmod sparse matrix object.\n  * Note that the data are shared.\n  */\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\ncholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)\n{\n  cholmod_sparse res;\n  res.nzmax   = mat.nonZeros();\n  res.nrow    = mat.rows();\n  res.ncol    = mat.cols();\n  res.p       = mat.outerIndexPtr();\n  res.i       = mat.innerIndexPtr();\n  res.x       = mat.valuePtr();\n  res.z       = 0;\n  res.sorted  = 1;\n  if(mat.isCompressed())\n  {\n    res.packed  = 1;\n    res.nz = 0;\n  }\n  else\n  {\n    res.packed  = 0;\n    res.nz = mat.innerNonZeroPtr();\n  }\n\n  res.dtype   = 0;\n  res.stype   = -1;\n\n  if (internal::is_same<_StorageIndex,int>::value)\n  {\n    res.itype = CHOLMOD_INT;\n  }\n  else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)\n  {\n    res.itype = CHOLMOD_LONG;\n  }\n  else\n  {\n    eigen_assert(false && \"Index type not supported yet\");\n  }\n\n  // setup res.xtype\n  internal::cholmod_configure_matrix<_Scalar>::run(res);\n\n  res.stype = 0;\n\n  return res;\n}\n\ntemplate<typename _Scalar, int _Options, typename _Index>\nconst cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)\n{\n  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));\n  return res;\n}\n\ntemplate<typename _Scalar, int _Options, typename _Index>\nconst cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)\n{\n  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));\n  return res;\n}\n\n/** Returns a view of the Eigen sparse matrix \\a mat as Cholmod sparse matrix.\n  * The data are not copied but shared. */\ntemplate<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>\ncholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)\n{\n  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));\n\n  if(UpLo==Upper) res.stype =  1;\n  if(UpLo==Lower) res.stype = -1;\n  // swap stype for rowmajor matrices (only works for real matrices)\n  EIGEN_STATIC_ASSERT((_Options & RowMajorBit) == 0 || NumTraits<_Scalar>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n  if(_Options & RowMajorBit) res.stype *=-1;\n\n  return res;\n}\n\n/** Returns a view of the Eigen \\b dense matrix \\a mat as Cholmod dense matrix.\n  * The data are not copied but shared. */\ntemplate<typename Derived>\ncholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)\n{\n  EIGEN_STATIC_ASSERT((internal::traits<Derived>::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n  typedef typename Derived::Scalar Scalar;\n\n  cholmod_dense res;\n  res.nrow   = mat.rows();\n  res.ncol   = mat.cols();\n  res.nzmax  = res.nrow * res.ncol;\n  res.d      = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride();\n  res.x      = (void*)(mat.derived().data());\n  res.z      = 0;\n\n  internal::cholmod_configure_matrix<Scalar>::run(res);\n\n  return res;\n}\n\n/** Returns a view of the Cholmod sparse matrix \\a cm as an Eigen sparse matrix.\n  * The data are not copied but shared. */\ntemplate<typename Scalar, int Flags, typename StorageIndex>\nMappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)\n{\n  return MappedSparseMatrix<Scalar,Flags,StorageIndex>\n         (cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],\n          static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );\n}\n\nnamespace internal {\n\n// template specializations for int and long that call the correct cholmod method\n\n#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \\\n    template<typename _StorageIndex> inline ret cm_ ## name       (cholmod_common &Common) { return cholmod_ ## name   (&Common); } \\\n    template<>                       inline ret cm_ ## name<SuiteSparse_long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }\n\n#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \\\n    template<typename _StorageIndex> inline ret cm_ ## name       (t1& a1, cholmod_common &Common) { return cholmod_ ## name   (&a1, &Common); } \\\n    template<>                       inline ret cm_ ## name<SuiteSparse_long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }\n\nEIGEN_CHOLMOD_SPECIALIZE0(int, start)\nEIGEN_CHOLMOD_SPECIALIZE0(int, finish)\n\nEIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L)\nEIGEN_CHOLMOD_SPECIALIZE1(int, free_dense,  cholmod_dense*,  X)\nEIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A)\n\nEIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A)\n\ntemplate<typename _StorageIndex> inline cholmod_dense*  cm_solve         (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_solve     (sys, &L, &B, &Common); }\ntemplate<>                       inline cholmod_dense*  cm_solve<SuiteSparse_long>   (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_l_solve   (sys, &L, &B, &Common); }\n\ntemplate<typename _StorageIndex> inline cholmod_sparse* cm_spsolve       (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve   (sys, &L, &B, &Common); }\ntemplate<>                       inline cholmod_sparse* cm_spsolve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }\n\ntemplate<typename _StorageIndex>\ninline int  cm_factorize_p       (cholmod_sparse*  A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p   (A, beta, fset, fsize, L, &Common); }\ntemplate<>\ninline int  cm_factorize_p<SuiteSparse_long> (cholmod_sparse*  A, double beta[2], SuiteSparse_long* fset,          std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }\n\n#undef EIGEN_CHOLMOD_SPECIALIZE0\n#undef EIGEN_CHOLMOD_SPECIALIZE1\n\n}  // namespace internal\n\n\nenum CholmodMode {\n  CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt\n};\n\n\n/** \\ingroup CholmodSupport_Module\n  * \\class CholmodBase\n  * \\brief The base class for the direct Cholesky factorization of Cholmod\n  * \\sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT\n  */\ntemplate<typename _MatrixType, int _UpLo, typename Derived>\nclass CholmodBase : public SparseSolverBase<Derived>\n{\n  protected:\n    typedef SparseSolverBase<Derived> Base;\n    using Base::derived;\n    using Base::m_isInitialized;\n  public:\n    typedef _MatrixType MatrixType;\n    enum { UpLo = _UpLo };\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef MatrixType CholMatrixType;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n  public:\n\n    CholmodBase()\n      : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)\n    {\n      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);\n      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;\n      internal::cm_start<StorageIndex>(m_cholmod);\n    }\n\n    explicit CholmodBase(const MatrixType& matrix)\n      : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)\n    {\n      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);\n      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;\n      internal::cm_start<StorageIndex>(m_cholmod);\n      compute(matrix);\n    }\n\n    ~CholmodBase()\n    {\n      if(m_cholmodFactor)\n        internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod);\n      internal::cm_finish<StorageIndex>(m_cholmod);\n    }\n\n    inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }\n    inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n\n    /** Computes the sparse Cholesky decomposition of \\a matrix */\n    Derived& compute(const MatrixType& matrix)\n    {\n      analyzePattern(matrix);\n      factorize(matrix);\n      return derived();\n    }\n\n    /** Performs a symbolic decomposition on the sparsity pattern of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      *\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& matrix)\n    {\n      if(m_cholmodFactor)\n      {\n        internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod);\n        m_cholmodFactor = 0;\n      }\n      cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());\n      m_cholmodFactor = internal::cm_analyze<StorageIndex>(A, m_cholmod);\n\n      this->m_isInitialized = true;\n      this->m_info = Success;\n      m_analysisIsOk = true;\n      m_factorizationIsOk = false;\n    }\n\n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    void factorize(const MatrixType& matrix)\n    {\n      eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n      cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());\n      internal::cm_factorize_p<StorageIndex>(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod);\n\n      // If the factorization failed, minor is the column at which it did. On success minor == n.\n      this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);\n      m_factorizationIsOk = true;\n    }\n\n    /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations.\n     *  See the Cholmod user guide for details. */\n    cholmod_common& cholmod() { return m_cholmod; }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const\n    {\n      eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()\");\n      const Index size = m_cholmodFactor->n;\n      EIGEN_UNUSED_VARIABLE(size);\n      eigen_assert(size==b.rows());\n\n      // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref.\n      Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());\n\n      cholmod_dense b_cd = viewAsCholmod(b_ref);\n      cholmod_dense* x_cd = internal::cm_solve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod);\n      if(!x_cd)\n      {\n        this->m_info = NumericalIssue;\n        return;\n      }\n      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)\n      // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve\n      dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());\n      internal::cm_free_dense<StorageIndex>(x_cd, m_cholmod);\n    }\n\n    /** \\internal */\n    template<typename RhsDerived, typename DestDerived>\n    void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const\n    {\n      eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()\");\n      const Index size = m_cholmodFactor->n;\n      EIGEN_UNUSED_VARIABLE(size);\n      eigen_assert(size==b.rows());\n\n      // note: cs stands for Cholmod Sparse\n      Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());\n      cholmod_sparse b_cs = viewAsCholmod(b_ref);\n      cholmod_sparse* x_cs = internal::cm_spsolve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod);\n      if(!x_cs)\n      {\n        this->m_info = NumericalIssue;\n        return;\n      }\n      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)\n      // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's sparse solver)\n      dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);\n      internal::cm_free_sparse<StorageIndex>(x_cs, m_cholmod);\n    }\n    #endif // EIGEN_PARSED_BY_DOXYGEN\n\n\n    /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization.\n      *\n      * During the numerical factorization, an offset term is added to the diagonal coefficients:\\n\n      * \\c d_ii = \\a offset + \\c d_ii\n      *\n      * The default is \\a offset=0.\n      *\n      * \\returns a reference to \\c *this.\n      */\n    Derived& setShift(const RealScalar& offset)\n    {\n      m_shiftOffset[0] = double(offset);\n      return derived();\n    }\n\n    /** \\returns the determinant of the underlying matrix from the current factorization */\n    Scalar determinant() const\n    {\n      using std::exp;\n      return exp(logDeterminant());\n    }\n\n    /** \\returns the log determinant of the underlying matrix from the current factorization */\n    Scalar logDeterminant() const\n    {\n      using std::log;\n      using numext::real;\n      eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()\");\n\n      RealScalar logDet = 0;\n      Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);\n      if (m_cholmodFactor->is_super)\n      {\n        // Supernodal factorization stored as a packed list of dense column-major blocs,\n        // as described by the following structure:\n\n        // super[k] == index of the first column of the j-th super node\n        StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);\n        // pi[k] == offset to the description of row indices\n        StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);\n        // px[k] == offset to the respective dense block\n        StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);\n\n        Index nb_super_nodes = m_cholmodFactor->nsuper;\n        for (Index k=0; k < nb_super_nodes; ++k)\n        {\n          StorageIndex ncols = super[k + 1] - super[k];\n          StorageIndex nrows = pi[k + 1] - pi[k];\n\n          Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));\n          logDet += sk.real().log().sum();\n        }\n      }\n      else\n      {\n        // Simplicial factorization stored as standard CSC matrix.\n        StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);\n        Index size = m_cholmodFactor->n;\n        for (Index k=0; k<size; ++k)\n          logDet += log(real( x[p[k]] ));\n      }\n      if (m_cholmodFactor->is_ll)\n        logDet *= 2.0;\n      return logDet;\n    };\n\n    template<typename Stream>\n    void dumpMemory(Stream& /*s*/)\n    {}\n\n  protected:\n    mutable cholmod_common m_cholmod;\n    cholmod_factor* m_cholmodFactor;\n    double m_shiftOffset[2];\n    mutable ComputationInfo m_info;\n    int m_factorizationIsOk;\n    int m_analysisIsOk;\n};\n\n/** \\ingroup CholmodSupport_Module\n  * \\class CholmodSimplicialLLT\n  * \\brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization\n  * using the Cholmod library.\n  * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest.\n  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices\n  * X and B can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower\n  *               or Upper. Default is Lower.\n  *\n  * \\implsparsesolverconcept\n  *\n  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.\n  *\n  * \\warning Only double precision real and complex scalar types are supported by Cholmod.\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT\n  */\ntemplate<typename _MatrixType, int _UpLo = Lower>\nclass CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >\n{\n    typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base;\n    using Base::m_cholmod;\n\n  public:\n\n    typedef _MatrixType MatrixType;\n\n    CholmodSimplicialLLT() : Base() { init(); }\n\n    CholmodSimplicialLLT(const MatrixType& matrix) : Base()\n    {\n      init();\n      this->compute(matrix);\n    }\n\n    ~CholmodSimplicialLLT() {}\n  protected:\n    void init()\n    {\n      m_cholmod.final_asis = 0;\n      m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;\n      m_cholmod.final_ll = 1;\n    }\n};\n\n\n/** \\ingroup CholmodSupport_Module\n  * \\class CholmodSimplicialLDLT\n  * \\brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization\n  * using the Cholmod library.\n  * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest.\n  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices\n  * X and B can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower\n  *               or Upper. Default is Lower.\n  *\n  * \\implsparsesolverconcept\n  *\n  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.\n  *\n  * \\warning Only double precision real and complex scalar types are supported by Cholmod.\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT\n  */\ntemplate<typename _MatrixType, int _UpLo = Lower>\nclass CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >\n{\n    typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base;\n    using Base::m_cholmod;\n\n  public:\n\n    typedef _MatrixType MatrixType;\n\n    CholmodSimplicialLDLT() : Base() { init(); }\n\n    CholmodSimplicialLDLT(const MatrixType& matrix) : Base()\n    {\n      init();\n      this->compute(matrix);\n    }\n\n    ~CholmodSimplicialLDLT() {}\n  protected:\n    void init()\n    {\n      m_cholmod.final_asis = 1;\n      m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;\n    }\n};\n\n/** \\ingroup CholmodSupport_Module\n  * \\class CholmodSupernodalLLT\n  * \\brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization\n  * using the Cholmod library.\n  * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM.\n  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices\n  * X and B can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower\n  *               or Upper. Default is Lower.\n  *\n  * \\implsparsesolverconcept\n  *\n  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.\n  *\n  * \\warning Only double precision real and complex scalar types are supported by Cholmod.\n  *\n  * \\sa \\ref TutorialSparseSolverConcept\n  */\ntemplate<typename _MatrixType, int _UpLo = Lower>\nclass CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >\n{\n    typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base;\n    using Base::m_cholmod;\n\n  public:\n\n    typedef _MatrixType MatrixType;\n\n    CholmodSupernodalLLT() : Base() { init(); }\n\n    CholmodSupernodalLLT(const MatrixType& matrix) : Base()\n    {\n      init();\n      this->compute(matrix);\n    }\n\n    ~CholmodSupernodalLLT() {}\n  protected:\n    void init()\n    {\n      m_cholmod.final_asis = 1;\n      m_cholmod.supernodal = CHOLMOD_SUPERNODAL;\n    }\n};\n\n/** \\ingroup CholmodSupport_Module\n  * \\class CholmodDecomposition\n  * \\brief A general Cholesky factorization and solver based on Cholmod\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization\n  * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices\n  * X and B can be either dense or sparse.\n  *\n  * This variant permits to change the underlying Cholesky method at runtime.\n  * On the other hand, it does not provide access to the result of the factorization.\n  * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower\n  *               or Upper. Default is Lower.\n  *\n  * \\implsparsesolverconcept\n  *\n  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.\n  *\n  * \\warning Only double precision real and complex scalar types are supported by Cholmod.\n  *\n  * \\sa \\ref TutorialSparseSolverConcept\n  */\ntemplate<typename _MatrixType, int _UpLo = Lower>\nclass CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >\n{\n    typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base;\n    using Base::m_cholmod;\n\n  public:\n\n    typedef _MatrixType MatrixType;\n\n    CholmodDecomposition() : Base() { init(); }\n\n    CholmodDecomposition(const MatrixType& matrix) : Base()\n    {\n      init();\n      this->compute(matrix);\n    }\n\n    ~CholmodDecomposition() {}\n\n    void setMode(CholmodMode mode)\n    {\n      switch(mode)\n      {\n        case CholmodAuto:\n          m_cholmod.final_asis = 1;\n          m_cholmod.supernodal = CHOLMOD_AUTO;\n          break;\n        case CholmodSimplicialLLt:\n          m_cholmod.final_asis = 0;\n          m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;\n          m_cholmod.final_ll = 1;\n          break;\n        case CholmodSupernodalLLt:\n          m_cholmod.final_asis = 1;\n          m_cholmod.supernodal = CHOLMOD_SUPERNODAL;\n          break;\n        case CholmodLDLt:\n          m_cholmod.final_asis = 1;\n          m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;\n          break;\n        default:\n          break;\n      }\n    }\n  protected:\n    void init()\n    {\n      m_cholmod.final_asis = 1;\n      m_cholmod.supernodal = CHOLMOD_AUTO;\n    }\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_CHOLMODSUPPORT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ArithmeticSequence.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ARITHMETIC_SEQUENCE_H\n#define EIGEN_ARITHMETIC_SEQUENCE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)\ntemplate<typename T> struct aseq_negate {};\n\ntemplate<> struct aseq_negate<Index> {\n  typedef Index type;\n};\n\ntemplate<int N> struct aseq_negate<FixedInt<N> > {\n  typedef FixedInt<-N> type;\n};\n\n// Compilation error in the following case:\ntemplate<> struct aseq_negate<FixedInt<DynamicIndex> > {};\n\ntemplate<typename FirstType,typename SizeType,typename IncrType,\n         bool FirstIsSymbolic=symbolic::is_symbolic<FirstType>::value,\n         bool SizeIsSymbolic =symbolic::is_symbolic<SizeType>::value>\nstruct aseq_reverse_first_type {\n  typedef Index type;\n};\n\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nstruct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,true> {\n  typedef symbolic::AddExpr<FirstType,\n                            symbolic::ProductExpr<symbolic::AddExpr<SizeType,symbolic::ValueExpr<FixedInt<-1> > >,\n                                                  symbolic::ValueExpr<IncrType> >\n                           > type;\n};\n\ntemplate<typename SizeType,typename IncrType,typename EnableIf = void>\nstruct aseq_reverse_first_type_aux {\n  typedef Index type;\n};\n\ntemplate<typename SizeType,typename IncrType>\nstruct aseq_reverse_first_type_aux<SizeType,IncrType,typename internal::enable_if<bool((SizeType::value+IncrType::value)|0x1)>::type> {\n  typedef FixedInt<(SizeType::value-1)*IncrType::value> type;\n};\n\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nstruct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,false> {\n  typedef typename aseq_reverse_first_type_aux<SizeType,IncrType>::type Aux;\n  typedef symbolic::AddExpr<FirstType,symbolic::ValueExpr<Aux> > type;\n};\n\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nstruct aseq_reverse_first_type<FirstType,SizeType,IncrType,false,true> {\n  typedef symbolic::AddExpr<symbolic::ProductExpr<symbolic::AddExpr<SizeType,symbolic::ValueExpr<FixedInt<-1> > >,\n                                                  symbolic::ValueExpr<IncrType> >,\n                            symbolic::ValueExpr<> > type;\n};\n#endif\n\n// Helper to cleanup the type of the increment:\ntemplate<typename T> struct cleanup_seq_incr {\n  typedef typename cleanup_index_type<T,DynamicIndex>::type type;\n};\n\n}\n\n//--------------------------------------------------------------------------------\n// seq(first,last,incr) and seqN(first,size,incr)\n//--------------------------------------------------------------------------------\n\ntemplate<typename FirstType=Index,typename SizeType=Index,typename IncrType=internal::FixedInt<1> >\nclass ArithmeticSequence;\n\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,\n                   typename internal::cleanup_index_type<SizeType>::type,\n                   typename internal::cleanup_seq_incr<IncrType>::type >\nseqN(FirstType first, SizeType size, IncrType incr);\n\n/** \\class ArithmeticSequence\n  * \\ingroup Core_Module\n  *\n  * This class represents an arithmetic progression \\f$ a_0, a_1, a_2, ..., a_{n-1}\\f$ defined by\n  * its \\em first value \\f$ a_0 \\f$, its \\em size (aka length) \\em n, and the \\em increment (aka stride)\n  * that is equal to \\f$ a_{i+1}-a_{i}\\f$ for any \\em i.\n  *\n  * It is internally used as the return type of the Eigen::seq and Eigen::seqN functions, and as the input arguments\n  * of DenseBase::operator()(const RowIndices&, const ColIndices&), and most of the time this is the\n  * only way it is used.\n  *\n  * \\tparam FirstType type of the first element, usually an Index,\n  *                   but internally it can be a symbolic expression\n  * \\tparam SizeType type representing the size of the sequence, usually an Index\n  *                  or a compile time integral constant. Internally, it can also be a symbolic expression\n  * \\tparam IncrType type of the increment, can be a runtime Index, or a compile time integral constant (default is compile-time 1)\n  *\n  * \\sa Eigen::seq, Eigen::seqN, DenseBase::operator()(const RowIndices&, const ColIndices&), class IndexedView\n  */\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nclass ArithmeticSequence\n{\npublic:\n  ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {}\n  ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {}\n\n  enum {\n    SizeAtCompileTime = internal::get_fixed_value<SizeType>::value,\n    IncrAtCompileTime = internal::get_fixed_value<IncrType,DynamicIndex>::value\n  };\n\n  /** \\returns the size, i.e., number of elements, of the sequence */\n  Index size()  const { return m_size; }\n\n  /** \\returns the first element \\f$ a_0 \\f$ in the sequence */\n  Index first()  const { return m_first; }\n\n  /** \\returns the value \\f$ a_i \\f$ at index \\a i in the sequence. */\n  Index operator[](Index i) const { return m_first + i * m_incr; }\n\n  const FirstType& firstObject() const { return m_first; }\n  const SizeType&  sizeObject()  const { return m_size; }\n  const IncrType&  incrObject()  const { return m_incr; }\n\nprotected:\n  FirstType m_first;\n  SizeType  m_size;\n  IncrType  m_incr;\n\npublic:\n\n#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)\n  auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) {\n    return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);\n  }\n#else\nprotected:\n  typedef typename internal::aseq_negate<IncrType>::type ReverseIncrType;\n  typedef typename internal::aseq_reverse_first_type<FirstType,SizeType,IncrType>::type ReverseFirstType;\npublic:\n  ArithmeticSequence<ReverseFirstType,SizeType,ReverseIncrType>\n  reverse() const {\n    return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);\n  }\n#endif\n};\n\n/** \\returns an ArithmeticSequence starting at \\a first, of length \\a size, and increment \\a incr\n  *\n  * \\sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type >\nseqN(FirstType first, SizeType size, IncrType incr)  {\n  return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type>(first,size,incr);\n}\n\n/** \\returns an ArithmeticSequence starting at \\a first, of length \\a size, and unit increment\n  *\n  * \\sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) */\ntemplate<typename FirstType,typename SizeType>\nArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type >\nseqN(FirstType first, SizeType size)  {\n  return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type>(first,size);\n}\n\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n\n/** \\returns an ArithmeticSequence starting at \\a f, up (or down) to \\a l, and with positive (or negative) increment \\a incr\n  *\n  * It is essentially an alias to:\n  * \\code\n  * seqN(f, (l-f+incr)/incr, incr);\n  * \\endcode\n  *\n  * \\sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType)\n  */\ntemplate<typename FirstType,typename LastType, typename IncrType>\nauto seq(FirstType f, LastType l, IncrType incr);\n\n/** \\returns an ArithmeticSequence starting at \\a f, up (or down) to \\a l, and unit increment\n  *\n  * It is essentially an alias to:\n  * \\code\n  * seqN(f,l-f+1);\n  * \\endcode\n  *\n  * \\sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType)\n  */\ntemplate<typename FirstType,typename LastType>\nauto seq(FirstType f, LastType l);\n\n#else // EIGEN_PARSED_BY_DOXYGEN\n\n#if EIGEN_HAS_CXX11\ntemplate<typename FirstType,typename LastType>\nauto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n                                                   (  typename internal::cleanup_index_type<LastType>::type(l)\n                                                    - typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())))\n{\n  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n              (typename internal::cleanup_index_type<LastType>::type(l)\n               -typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));\n}\n\ntemplate<typename FirstType,typename LastType, typename IncrType>\nauto seq(FirstType f, LastType l, IncrType incr)\n  -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n                   (   typename internal::cleanup_index_type<LastType>::type(l)\n                     - typename internal::cleanup_index_type<FirstType>::type(f)+typename internal::cleanup_seq_incr<IncrType>::type(incr)\n                   ) / typename internal::cleanup_seq_incr<IncrType>::type(incr),\n                   typename internal::cleanup_seq_incr<IncrType>::type(incr)))\n{\n  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;\n  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n              ( typename internal::cleanup_index_type<LastType>::type(l)\n               -typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr)) / CleanedIncrType(incr),\n              CleanedIncrType(incr));\n}\n\n#else // EIGEN_HAS_CXX11\n\ntemplate<typename FirstType,typename LastType>\ntypename internal::enable_if<!(symbolic::is_symbolic<FirstType>::value || symbolic::is_symbolic<LastType>::value),\n                             ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index> >::type\nseq(FirstType f, LastType l)\n{\n  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n              Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())));\n}\n\ntemplate<typename FirstTypeDerived,typename LastType>\ntypename internal::enable_if<!symbolic::is_symbolic<LastType>::value,\n    ArithmeticSequence<FirstTypeDerived, symbolic::AddExpr<symbolic::AddExpr<symbolic::NegateExpr<FirstTypeDerived>,symbolic::ValueExpr<> >,\n                                                            symbolic::ValueExpr<internal::FixedInt<1> > > > >::type\nseq(const symbolic::BaseExpr<FirstTypeDerived> &f, LastType l)\n{\n  return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+fix<1>()));\n}\n\ntemplate<typename FirstType,typename LastTypeDerived>\ntypename internal::enable_if<!symbolic::is_symbolic<FirstType>::value,\n    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,\n                        symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::ValueExpr<> >,\n                                          symbolic::ValueExpr<internal::FixedInt<1> > > > >::type\nseq(FirstType f, const symbolic::BaseExpr<LastTypeDerived> &l)\n{\n  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));\n}\n\ntemplate<typename FirstTypeDerived,typename LastTypeDerived>\nArithmeticSequence<FirstTypeDerived,\n                    symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::NegateExpr<FirstTypeDerived> >,symbolic::ValueExpr<internal::FixedInt<1> > > >\nseq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<LastTypeDerived> &l)\n{\n  return seqN(f.derived(),(l.derived()-f.derived()+fix<1>()));\n}\n\n\ntemplate<typename FirstType,typename LastType, typename IncrType>\ntypename internal::enable_if<!(symbolic::is_symbolic<FirstType>::value || symbolic::is_symbolic<LastType>::value),\n    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index,typename internal::cleanup_seq_incr<IncrType>::type> >::type\nseq(FirstType f, LastType l, IncrType incr)\n{\n  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;\n  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n              Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr);\n}\n\ntemplate<typename FirstTypeDerived,typename LastType, typename IncrType>\ntypename internal::enable_if<!symbolic::is_symbolic<LastType>::value,\n    ArithmeticSequence<FirstTypeDerived,\n                        symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<symbolic::NegateExpr<FirstTypeDerived>,\n                                                                                   symbolic::ValueExpr<> >,\n                                                                 symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,\n                                              symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,\n                        typename internal::cleanup_seq_incr<IncrType>::type> >::type\nseq(const symbolic::BaseExpr<FirstTypeDerived> &f, LastType l, IncrType incr)\n{\n  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;\n  return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);\n}\n\ntemplate<typename FirstType,typename LastTypeDerived, typename IncrType>\ntypename internal::enable_if<!symbolic::is_symbolic<FirstType>::value,\n    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,\n                        symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,symbolic::ValueExpr<> >,\n                                                                 symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,\n                                               symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,\n                        typename internal::cleanup_seq_incr<IncrType>::type> >::type\nseq(FirstType f, const symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)\n{\n  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;\n  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),\n              (l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr);\n}\n\ntemplate<typename FirstTypeDerived,typename LastTypeDerived, typename IncrType>\nArithmeticSequence<FirstTypeDerived,\n                    symbolic::QuotientExpr<symbolic::AddExpr<symbolic::AddExpr<LastTypeDerived,\n                                                                               symbolic::NegateExpr<FirstTypeDerived> >,\n                                                             symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,\n                                          symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,\n                    typename internal::cleanup_seq_incr<IncrType>::type>\nseq(const symbolic::BaseExpr<FirstTypeDerived> &f, const symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)\n{\n  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;\n  return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);\n}\n#endif // EIGEN_HAS_CXX11\n\n#endif // EIGEN_PARSED_BY_DOXYGEN\n\n\n#if EIGEN_HAS_CXX11 || defined(EIGEN_PARSED_BY_DOXYGEN)\n/** \\cpp11\n  * \\returns a symbolic ArithmeticSequence representing the last \\a size elements with increment \\a incr.\n  *\n  * It is a shortcut for: \\code seqN(last-(size-fix<1>)*incr, size, incr) \\endcode\n  * \n  * \\sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */\ntemplate<typename SizeType,typename IncrType>\nauto lastN(SizeType size, IncrType incr)\n-> decltype(seqN(Eigen::last-(size-fix<1>())*incr, size, incr))\n{\n  return seqN(Eigen::last-(size-fix<1>())*incr, size, incr);\n}\n\n/** \\cpp11\n  * \\returns a symbolic ArithmeticSequence representing the last \\a size elements with a unit increment.\n  *\n  *  It is a shortcut for: \\code seq(last+fix<1>-size, last) \\endcode\n  * \n  * \\sa lastN(SizeType,IncrType, seqN(FirstType,SizeType), seq(FirstType,LastType) */\ntemplate<typename SizeType>\nauto lastN(SizeType size)\n-> decltype(seqN(Eigen::last+fix<1>()-size, size))\n{\n  return seqN(Eigen::last+fix<1>()-size, size);\n}\n#endif\n\nnamespace internal {\n\n// Convert a symbolic span into a usable one (i.e., remove last/end \"keywords\")\ntemplate<typename T>\nstruct make_size_type {\n  typedef typename internal::conditional<symbolic::is_symbolic<T>::value, Index, T>::type type;\n};\n\ntemplate<typename FirstType,typename SizeType,typename IncrType,int XprSize>\nstruct IndexedViewCompatibleType<ArithmeticSequence<FirstType,SizeType,IncrType>, XprSize> {\n  typedef ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType> type;\n};\n\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType>\nmakeIndexedViewCompatible(const ArithmeticSequence<FirstType,SizeType,IncrType>& ids, Index size,SpecializedType) {\n  return ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType>(\n            eval_expr_given_size(ids.firstObject(),size),eval_expr_given_size(ids.sizeObject(),size),ids.incrObject());\n}\n\ntemplate<typename FirstType,typename SizeType,typename IncrType>\nstruct get_compile_time_incr<ArithmeticSequence<FirstType,SizeType,IncrType> > {\n  enum { value = get_fixed_value<IncrType,DynamicIndex>::value };\n};\n\n} // end namespace internal\n\n/** \\namespace Eigen::indexing\n  * \\ingroup Core_Module\n  * \n  * The sole purpose of this namespace is to be able to import all functions\n  * and symbols that are expected to be used within operator() for indexing\n  * and slicing. If you already imported the whole Eigen namespace:\n  * \\code using namespace Eigen; \\endcode\n  * then you are already all set. Otherwise, if you don't want/cannot import\n  * the whole Eigen namespace, the following line:\n  * \\code using namespace Eigen::indexing; \\endcode\n  * is equivalent to:\n  * \\code\n  using Eigen::all;\n  using Eigen::seq;\n  using Eigen::seqN;\n  using Eigen::lastN; // c++11 only\n  using Eigen::last;\n  using Eigen::lastp1;\n  using Eigen::fix;\n  \\endcode\n  */\nnamespace indexing {\n  using Eigen::all;\n  using Eigen::seq;\n  using Eigen::seqN;\n  #if EIGEN_HAS_CXX11\n  using Eigen::lastN;\n  #endif\n  using Eigen::last;\n  using Eigen::lastp1;\n  using Eigen::fix;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_ARITHMETIC_SEQUENCE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Array.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ARRAY_H\n#define EIGEN_ARRAY_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nstruct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >\n{\n  typedef ArrayXpr XprKind;\n  typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;\n};\n}\n\n/** \\class Array\n  * \\ingroup Core_Module\n  *\n  * \\brief General-purpose arrays with easy API for coefficient-wise operations\n  *\n  * The %Array class is very similar to the Matrix class. It provides\n  * general-purpose one- and two-dimensional arrays. The difference between the\n  * %Array and the %Matrix class is primarily in the API: the API for the\n  * %Array class provides easy access to coefficient-wise operations, while the\n  * API for the %Matrix class provides easy access to linear-algebra\n  * operations.\n  *\n  * See documentation of class Matrix for detailed information on the template parameters\n  * storage layout.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_ARRAY_PLUGIN.\n  *\n  * \\sa \\blank \\ref TutorialArrayClass, \\ref TopicClassHierarchy\n  */\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nclass Array\n  : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >\n{\n  public:\n\n    typedef PlainObjectBase<Array> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Array)\n\n    enum { Options = _Options };\n    typedef typename Base::PlainObject PlainObject;\n\n  protected:\n    template <typename Derived, typename OtherDerived, bool IsVector>\n    friend struct internal::conservative_resize_like_impl;\n\n    using Base::m_storage;\n\n  public:\n\n    using Base::base;\n    using Base::coeff;\n    using Base::coeffRef;\n\n    /**\n      * The usage of\n      *   using Base::operator=;\n      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped\n      * the usage of 'using'. This should be done only for operator=.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)\n    {\n      return Base::operator=(other);\n    }\n\n    /** Set all the entries to \\a value.\n      * \\sa DenseBase::setConstant(), DenseBase::fill()\n      */\n    /* This overload is needed because the usage of\n      *   using Base::operator=;\n      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped\n      * the usage of 'using'. This should be done only for operator=.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)\n    {\n      Base::setConstant(value);\n      return *this;\n    }\n\n    /** Copies the value of the expression \\a other into \\c *this with automatic resizing.\n      *\n      * *this might be resized to match the dimensions of \\a other. If *this was a null matrix (not already initialized),\n      * it will be initialized.\n      *\n      * Note that copying a row-vector into a vector (and conversely) is allowed.\n      * The resizing, if any, is then done in the appropriate way so that row-vectors\n      * remain row-vectors and vectors remain vectors.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)\n    {\n      return Base::_set(other);\n    }\n\n    /** This is a special case of the templated operator=. Its purpose is to\n      * prevent a default operator= from hiding the templated operator=.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array& operator=(const Array& other)\n    {\n      return Base::_set(other);\n    }\n\n    /** Default constructor.\n      *\n      * For fixed-size matrices, does nothing.\n      *\n      * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix\n      * is called a null matrix. This constructor is the unique way to create null matrices: resizing\n      * a matrix to 0 is not supported.\n      *\n      * \\sa resize(Index,Index)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array() : Base()\n    {\n      Base::_check_template_params();\n      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n    }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    // FIXME is it still needed ??\n    /** \\internal */\n    EIGEN_DEVICE_FUNC\n    Array(internal::constructor_without_unaligned_array_assert)\n      : Base(internal::constructor_without_unaligned_array_assert())\n    {\n      Base::_check_template_params();\n      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n    }\n#endif\n\n#if EIGEN_HAS_RVALUE_REFERENCES\n    EIGEN_DEVICE_FUNC\n    Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)\n      : Base(std::move(other))\n    {\n      Base::_check_template_params();\n    }\n    EIGEN_DEVICE_FUNC\n    Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)\n    {\n      Base::operator=(std::move(other));\n      return *this;\n    }\n#endif\n\n    #if EIGEN_HAS_CXX11\n    /** \\copydoc PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)\n     *\n     * Example: \\include Array_variadic_ctor_cxx11.cpp\n     * Output: \\verbinclude Array_variadic_ctor_cxx11.out\n     *\n     * \\sa Array(const std::initializer_list<std::initializer_list<Scalar>>&)\n     * \\sa Array(const Scalar&), Array(const Scalar&,const Scalar&)\n     */\n    template <typename... ArgTypes>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)\n      : Base(a0, a1, a2, a3, args...) {}\n\n    /** \\brief Constructs an array and initializes it from the coefficients given as initializer-lists grouped by row. \\cpp11\n      *\n      * In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients:\n      *\n      * Example: \\include Array_initializer_list_23_cxx11.cpp\n      * Output: \\verbinclude Array_initializer_list_23_cxx11.out\n      *\n      * Each of the inner initializer lists must contain the exact same number of elements, otherwise an assertion is triggered.\n      *\n      * In the case of a compile-time column 1D array, implicit transposition from a single row is allowed.\n      * Therefore <code> Array<int,Dynamic,1>{{1,2,3,4,5}}</code> is legal and the more verbose syntax\n      * <code>Array<int,Dynamic,1>{{1},{2},{3},{4},{5}}</code> can be avoided:\n      *\n      * Example: \\include Array_initializer_list_vector_cxx11.cpp\n      * Output: \\verbinclude Array_initializer_list_vector_cxx11.out\n      *\n      * In the case of fixed-sized arrays, the initializer list sizes must exactly match the array sizes,\n      * and implicit transposition is allowed for compile-time 1D arrays only.\n      *\n      * \\sa  Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}\n    #endif // end EIGEN_HAS_CXX11\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE explicit Array(const T& x)\n    {\n      Base::_check_template_params();\n      Base::template _init1<T>(x);\n    }\n\n    template<typename T0, typename T1>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)\n    {\n      Base::_check_template_params();\n      this->template _init2<T0,T1>(val0, val1);\n    }\n\n    #else\n    /** \\brief Constructs a fixed-sized array initialized with coefficients starting at \\a data */\n    EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);\n    /** Constructs a vector or row-vector with given dimension. \\only_for_vectors\n      *\n      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,\n      * it is redundant to pass the dimension here, so it makes more sense to use the default\n      * constructor Array() instead.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE explicit Array(Index dim);\n    /** constructs an initialized 1x1 Array with the given coefficient\n      * \\sa const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args */\n    Array(const Scalar& value);\n    /** constructs an uninitialized array with \\a rows rows and \\a cols columns.\n      *\n      * This is useful for dynamic-size arrays. For fixed-size arrays,\n      * it is redundant to pass these parameters, so one should use the default constructor\n      * Array() instead. */\n    Array(Index rows, Index cols);\n    /** constructs an initialized 2D vector with given coefficients\n      * \\sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args) */\n    Array(const Scalar& val0, const Scalar& val1);\n    #endif  // end EIGEN_PARSED_BY_DOXYGEN\n\n    /** constructs an initialized 3D vector with given coefficients\n      * \\sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)\n    {\n      Base::_check_template_params();\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)\n      m_storage.data()[0] = val0;\n      m_storage.data()[1] = val1;\n      m_storage.data()[2] = val2;\n    }\n    /** constructs an initialized 4D vector with given coefficients\n      * \\sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)\n    {\n      Base::_check_template_params();\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)\n      m_storage.data()[0] = val0;\n      m_storage.data()[1] = val1;\n      m_storage.data()[2] = val2;\n      m_storage.data()[3] = val3;\n    }\n\n    /** Copy constructor */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array(const Array& other)\n            : Base(other)\n    { }\n\n  private:\n    struct PrivateType {};\n  public:\n\n    /** \\sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,\n                              typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,\n                                                           PrivateType>::type = PrivateType())\n      : Base(other.derived())\n    { }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT{ return 1; }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return this->innerSize(); }\n\n    #ifdef EIGEN_ARRAY_PLUGIN\n    #include EIGEN_ARRAY_PLUGIN\n    #endif\n\n  private:\n\n    template<typename MatrixType, typename OtherDerived, bool SwapPointers>\n    friend struct internal::matrix_swap_impl;\n};\n\n/** \\defgroup arraytypedefs Global array typedefs\n  * \\ingroup Core_Module\n  *\n  * %Eigen defines several typedef shortcuts for most common 1D and 2D array types.\n  *\n  * The general patterns are the following:\n  *\n  * \\c ArrayRowsColsType where \\c Rows and \\c Cols can be \\c 2,\\c 3,\\c 4 for fixed size square matrices or \\c X for dynamic size,\n  * and where \\c Type can be \\c i for integer, \\c f for float, \\c d for double, \\c cf for complex float, \\c cd\n  * for complex double.\n  *\n  * For example, \\c Array33d is a fixed-size 3x3 array type of doubles, and \\c ArrayXXf is a dynamic-size matrix of floats.\n  *\n  * There are also \\c ArraySizeType which are self-explanatory. For example, \\c Array4cf is\n  * a fixed-size 1D array of 4 complex floats.\n  *\n  * With \\cpp11, template alias are also defined for common sizes.\n  * They follow the same pattern as above except that the scalar type suffix is replaced by a\n  * template parameter, i.e.:\n  *   - `ArrayRowsCols<Type>` where `Rows` and `Cols` can be \\c 2,\\c 3,\\c 4, or \\c X for fixed or dynamic size.\n  *   - `ArraySize<Type>` where `Size` can be \\c 2,\\c 3,\\c 4 or \\c X for fixed or dynamic size 1D arrays.\n  *\n  * \\sa class Array\n  */\n\n#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)   \\\n/** \\ingroup arraytypedefs */                                    \\\ntypedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix;  \\\n/** \\ingroup arraytypedefs */                                    \\\ntypedef Array<Type, Size, 1>    Array##SizeSuffix##TypeSuffix;\n\n#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size)         \\\n/** \\ingroup arraytypedefs */                                    \\\ntypedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix;  \\\n/** \\ingroup arraytypedefs */                                    \\\ntypedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;\n\n#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \\\nEIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \\\nEIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \\\nEIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \\\nEIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \\\nEIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \\\nEIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \\\nEIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)\n\nEIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int,                  i)\nEIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float,                f)\nEIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double,               d)\nEIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>,  cf)\nEIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)\n\n#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES\n#undef EIGEN_MAKE_ARRAY_TYPEDEFS\n#undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS\n\n#if EIGEN_HAS_CXX11\n\n#define EIGEN_MAKE_ARRAY_TYPEDEFS(Size, SizeSuffix)               \\\n/** \\ingroup arraytypedefs */                                     \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Array##SizeSuffix##SizeSuffix = Array<Type, Size, Size>;    \\\n/** \\ingroup arraytypedefs */                                     \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Array##SizeSuffix = Array<Type, Size, 1>;\n\n#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Size)                     \\\n/** \\ingroup arraytypedefs */                                     \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Array##Size##X = Array<Type, Size, Dynamic>;                \\\n/** \\ingroup arraytypedefs */                                     \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Array##X##Size = Array<Type, Dynamic, Size>;\n\nEIGEN_MAKE_ARRAY_TYPEDEFS(2, 2)\nEIGEN_MAKE_ARRAY_TYPEDEFS(3, 3)\nEIGEN_MAKE_ARRAY_TYPEDEFS(4, 4)\nEIGEN_MAKE_ARRAY_TYPEDEFS(Dynamic, X)\nEIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(2)\nEIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(3)\nEIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(4)\n\n#undef EIGEN_MAKE_ARRAY_TYPEDEFS\n#undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS\n\n#endif // EIGEN_HAS_CXX11\n\n#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \\\nusing Eigen::Matrix##SizeSuffix##TypeSuffix; \\\nusing Eigen::Vector##SizeSuffix##TypeSuffix; \\\nusing Eigen::RowVector##SizeSuffix##TypeSuffix;\n\n#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \\\n\n#define EIGEN_USING_ARRAY_TYPEDEFS \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \\\nEIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)\n\n} // end namespace Eigen\n\n#endif // EIGEN_ARRAY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ArrayBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ARRAYBASE_H\n#define EIGEN_ARRAYBASE_H\n\nnamespace Eigen { \n\ntemplate<typename ExpressionType> class MatrixWrapper;\n\n/** \\class ArrayBase\n  * \\ingroup Core_Module\n  *\n  * \\brief Base class for all 1D and 2D array, and related expressions\n  *\n  * An array is similar to a dense vector or matrix. While matrices are mathematical\n  * objects with well defined linear algebra operators, an array is just a collection\n  * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence,\n  * all operations applied to an array are performed coefficient wise. Furthermore,\n  * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient\n  * constructors allowing to easily write generic code working for both scalar values\n  * and arrays.\n  *\n  * This class is the base that is inherited by all array expression types.\n  *\n  * \\tparam Derived is the derived type, e.g., an array or an expression type.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_ARRAYBASE_PLUGIN.\n  *\n  * \\sa class MatrixBase, \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived> class ArrayBase\n  : public DenseBase<Derived>\n{\n  public:\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** The base class for a given storage type. */\n    typedef ArrayBase StorageBaseType;\n\n    typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;\n\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    typedef DenseBase<Derived> Base;\n    using Base::RowsAtCompileTime;\n    using Base::ColsAtCompileTime;\n    using Base::SizeAtCompileTime;\n    using Base::MaxRowsAtCompileTime;\n    using Base::MaxColsAtCompileTime;\n    using Base::MaxSizeAtCompileTime;\n    using Base::IsVectorAtCompileTime;\n    using Base::Flags;\n    \n    using Base::derived;\n    using Base::const_cast_derived;\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::coeff;\n    using Base::coeffRef;\n    using Base::lazyAssign;\n    using Base::operator-;\n    using Base::operator=;\n    using Base::operator+=;\n    using Base::operator-=;\n    using Base::operator*=;\n    using Base::operator/=;\n\n    typedef typename Base::CoeffReturnType CoeffReturnType;\n\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename Base::PlainObject PlainObject;\n\n    /** \\internal Represents a matrix with all coefficients equal to one another*/\n    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase\n#define EIGEN_DOC_UNARY_ADDONS(X,Y)\n#   include \"../plugins/MatrixCwiseUnaryOps.h\"\n#   include \"../plugins/ArrayCwiseUnaryOps.h\"\n#   include \"../plugins/CommonCwiseBinaryOps.h\"\n#   include \"../plugins/MatrixCwiseBinaryOps.h\"\n#   include \"../plugins/ArrayCwiseBinaryOps.h\"\n#   ifdef EIGEN_ARRAYBASE_PLUGIN\n#     include EIGEN_ARRAYBASE_PLUGIN\n#   endif\n#undef EIGEN_CURRENT_STORAGE_BASE_CLASS\n#undef EIGEN_DOC_UNARY_ADDONS\n\n    /** Special case of the template operator=, in order to prevent the compiler\n      * from generating a default operator= (issue hit with g++ 4.1)\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator=(const ArrayBase& other)\n    {\n      internal::call_assignment(derived(), other.derived());\n      return derived();\n    }\n    \n    /** Set all the entries to \\a value.\n      * \\sa DenseBase::setConstant(), DenseBase::fill() */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator=(const Scalar &value)\n    { Base::setConstant(value); return derived(); }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator+=(const Scalar& scalar);\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator-=(const Scalar& scalar);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator+=(const ArrayBase<OtherDerived>& other);\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator-=(const ArrayBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator*=(const ArrayBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator/=(const ArrayBase<OtherDerived>& other);\n\n  public:\n    EIGEN_DEVICE_FUNC\n    ArrayBase<Derived>& array() { return *this; }\n    EIGEN_DEVICE_FUNC\n    const ArrayBase<Derived>& array() const { return *this; }\n\n    /** \\returns an \\link Eigen::MatrixBase Matrix \\endlink expression of this array\n      * \\sa MatrixBase::array() */\n    EIGEN_DEVICE_FUNC\n    MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }\n    EIGEN_DEVICE_FUNC\n    const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }\n\n//     template<typename Dest>\n//     inline void evalTo(Dest& dst) const { dst = matrix(); }\n\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase)\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase)\n\n  private:\n    explicit ArrayBase(Index);\n    ArrayBase(Index,Index);\n    template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);\n  protected:\n    // mixing arrays and matrices is not legal\n    template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )\n    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}\n    // mixing arrays and matrices is not legal\n    template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )\n    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}\n};\n\n/** replaces \\c *this by \\c *this - \\a other.\n  *\n  * \\returns a reference to \\c *this\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &\nArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n/** replaces \\c *this by \\c *this + \\a other.\n  *\n  * \\returns a reference to \\c *this\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &\nArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)\n{\n  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n/** replaces \\c *this by \\c *this * \\a other coefficient wise.\n  *\n  * \\returns a reference to \\c *this\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &\nArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)\n{\n  call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n/** replaces \\c *this by \\c *this / \\a other coefficient wise.\n  *\n  * \\returns a reference to \\c *this\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &\nArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)\n{\n  call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_ARRAYBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ArrayWrapper.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ARRAYWRAPPER_H\n#define EIGEN_ARRAYWRAPPER_H\n\nnamespace Eigen {\n\n/** \\class ArrayWrapper\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a mathematical vector or matrix as an array object\n  *\n  * This class is the return type of MatrixBase::array(), and most of the time\n  * this is the only way it is use.\n  *\n  * \\sa MatrixBase::array(), class MatrixWrapper\n  */\n\nnamespace internal {\ntemplate<typename ExpressionType>\nstruct traits<ArrayWrapper<ExpressionType> >\n  : public traits<typename remove_all<typename ExpressionType::Nested>::type >\n{\n  typedef ArrayXpr XprKind;\n  // Let's remove NestByRefBit\n  enum {\n    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,\n    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,\n    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag\n  };\n};\n}\n\ntemplate<typename ExpressionType>\nclass ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >\n{\n  public:\n    typedef ArrayBase<ArrayWrapper> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)\n    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;\n\n    typedef typename internal::conditional<\n                       internal::is_lvalue<ExpressionType>::value,\n                       Scalar,\n                       const Scalar\n                     >::type ScalarWithConstIfNotLvalue;\n\n    typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;\n\n    using Base::coeffRef;\n\n    EIGEN_DEVICE_FUNC\n    explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }\n\n    EIGEN_DEVICE_FUNC\n    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }\n    EIGEN_DEVICE_FUNC\n    inline const Scalar* data() const { return m_expression.data(); }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index rowId, Index colId) const\n    {\n      return m_expression.coeffRef(rowId, colId);\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index index) const\n    {\n      return m_expression.coeffRef(index);\n    }\n\n    template<typename Dest>\n    EIGEN_DEVICE_FUNC\n    inline void evalTo(Dest& dst) const { dst = m_expression; }\n\n    EIGEN_DEVICE_FUNC\n    const typename internal::remove_all<NestedExpressionType>::type&\n    nestedExpression() const\n    {\n      return m_expression;\n    }\n\n    /** Forwards the resizing request to the nested expression\n      * \\sa DenseBase::resize(Index)  */\n    EIGEN_DEVICE_FUNC\n    void resize(Index newSize) { m_expression.resize(newSize); }\n    /** Forwards the resizing request to the nested expression\n      * \\sa DenseBase::resize(Index,Index)*/\n    EIGEN_DEVICE_FUNC\n    void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }\n\n  protected:\n    NestedExpressionType m_expression;\n};\n\n/** \\class MatrixWrapper\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of an array as a mathematical vector or matrix\n  *\n  * This class is the return type of ArrayBase::matrix(), and most of the time\n  * this is the only way it is use.\n  *\n  * \\sa MatrixBase::matrix(), class ArrayWrapper\n  */\n\nnamespace internal {\ntemplate<typename ExpressionType>\nstruct traits<MatrixWrapper<ExpressionType> >\n : public traits<typename remove_all<typename ExpressionType::Nested>::type >\n{\n  typedef MatrixXpr XprKind;\n  // Let's remove NestByRefBit\n  enum {\n    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,\n    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,\n    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag\n  };\n};\n}\n\ntemplate<typename ExpressionType>\nclass MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >\n{\n  public:\n    typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)\n    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;\n\n    typedef typename internal::conditional<\n                       internal::is_lvalue<ExpressionType>::value,\n                       Scalar,\n                       const Scalar\n                     >::type ScalarWithConstIfNotLvalue;\n\n    typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;\n\n    using Base::coeffRef;\n\n    EIGEN_DEVICE_FUNC\n    explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }\n\n    EIGEN_DEVICE_FUNC\n    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }\n    EIGEN_DEVICE_FUNC\n    inline const Scalar* data() const { return m_expression.data(); }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index rowId, Index colId) const\n    {\n      return m_expression.derived().coeffRef(rowId, colId);\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index index) const\n    {\n      return m_expression.coeffRef(index);\n    }\n\n    EIGEN_DEVICE_FUNC\n    const typename internal::remove_all<NestedExpressionType>::type&\n    nestedExpression() const\n    {\n      return m_expression;\n    }\n\n    /** Forwards the resizing request to the nested expression\n      * \\sa DenseBase::resize(Index)  */\n    EIGEN_DEVICE_FUNC\n    void resize(Index newSize) { m_expression.resize(newSize); }\n    /** Forwards the resizing request to the nested expression\n      * \\sa DenseBase::resize(Index,Index)*/\n    EIGEN_DEVICE_FUNC\n    void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }\n\n  protected:\n    NestedExpressionType m_expression;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_ARRAYWRAPPER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Assign.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ASSIGN_H\n#define EIGEN_ASSIGN_H\n\nnamespace Eigen {\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>\n  ::lazyAssign(const DenseBase<OtherDerived>& other)\n{\n  enum{\n    SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value\n  };\n\n  EIGEN_STATIC_ASSERT_LVALUE(Derived)\n  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)\n  EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n\n  eigen_assert(rows() == other.rows() && cols() == other.cols());\n  internal::call_assignment_no_alias(derived(),other.derived());\n  \n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)\n{\n  internal::call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)\n{\n  internal::call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)\n{\n  internal::call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate <typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)\n{\n  internal::call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate <typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)\n{\n  internal::call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)\n{\n  other.derived().evalTo(derived());\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_ASSIGN_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/AssignEvaluator.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ASSIGN_EVALUATOR_H\n#define EIGEN_ASSIGN_EVALUATOR_H\n\nnamespace Eigen {\n\n// This implementation is based on Assign.h\n\nnamespace internal {\n\n/***************************************************************************\n* Part 1 : the logic deciding a strategy for traversal and unrolling       *\n***************************************************************************/\n\n// copy_using_evaluator_traits is based on assign_traits\n\ntemplate <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>\nstruct copy_using_evaluator_traits\n{\n  typedef typename DstEvaluator::XprType Dst;\n  typedef typename Dst::Scalar DstScalar;\n\n  enum {\n    DstFlags = DstEvaluator::Flags,\n    SrcFlags = SrcEvaluator::Flags\n  };\n\npublic:\n  enum {\n    DstAlignment = DstEvaluator::Alignment,\n    SrcAlignment = SrcEvaluator::Alignment,\n    DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,\n    JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)\n  };\n\nprivate:\n  enum {\n    InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)\n              : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)\n              : int(Dst::RowsAtCompileTime),\n    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)\n              : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)\n              : int(Dst::MaxRowsAtCompileTime),\n    RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),\n    RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),\n    OuterStride = int(outer_stride_at_compile_time<Dst>::ret),\n    MaxSizeAtCompileTime = Dst::SizeAtCompileTime\n  };\n\n  // TODO distinguish between linear traversal and inner-traversals\n  typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;\n  typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;\n\n  enum {\n    LinearPacketSize = unpacket_traits<LinearPacketType>::size,\n    InnerPacketSize = unpacket_traits<InnerPacketType>::size\n  };\n\npublic:\n  enum {\n    LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,\n    InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment\n  };\n\nprivate:\n  enum {\n    DstIsRowMajor = DstFlags&RowMajorBit,\n    SrcIsRowMajor = SrcFlags&RowMajorBit,\n    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),\n    MightVectorize = bool(StorageOrdersAgree)\n                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)\n                  && bool(functor_traits<AssignFunc>::PacketAccess),\n    MayInnerVectorize  = MightVectorize\n                       && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0\n                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0\n                       && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),\n    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),\n    MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)\n                       && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),\n      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,\n         so it's only good for large enough sizes. */\n    MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)\n                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))\n      /* slice vectorization can be slow, so we only want it if the slices are big, which is\n         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block\n         in a fixed-size matrix\n         However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */\n  };\n\npublic:\n  enum {\n    Traversal =  int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.\n              : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)\n              : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)\n              : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)\n              : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)\n              : int(MayLinearize)        ? int(LinearTraversal)\n                                         : int(DefaultTraversal),\n    Vectorized = int(Traversal) == InnerVectorizedTraversal\n              || int(Traversal) == LinearVectorizedTraversal\n              || int(Traversal) == SliceVectorizedTraversal\n  };\n\n  typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;\n\nprivate:\n  enum {\n    ActualPacketSize    = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize\n                        : Vectorized ? InnerPacketSize\n                        : 1,\n    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * ActualPacketSize,\n    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic\n                       && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),\n    MayUnrollInner      = int(InnerSize) != Dynamic\n                       && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)\n  };\n\npublic:\n  enum {\n    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))\n                ? (\n                    int(MayUnrollCompletely) ? int(CompleteUnrolling)\n                  : int(MayUnrollInner)      ? int(InnerUnrolling)\n                                             : int(NoUnrolling)\n                  )\n              : int(Traversal) == int(LinearVectorizedTraversal)\n                ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))\n                          ? int(CompleteUnrolling)\n                          : int(NoUnrolling) )\n              : int(Traversal) == int(LinearTraversal)\n                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)\n                                              : int(NoUnrolling) )\n#if EIGEN_UNALIGNED_VECTORIZE\n              : int(Traversal) == int(SliceVectorizedTraversal)\n                ? ( bool(MayUnrollInner) ? int(InnerUnrolling)\n                                         : int(NoUnrolling) )\n#endif\n              : int(NoUnrolling)\n  };\n\n#ifdef EIGEN_DEBUG_ASSIGN\n  static void debug()\n  {\n    std::cerr << \"DstXpr: \" << typeid(typename DstEvaluator::XprType).name() << std::endl;\n    std::cerr << \"SrcXpr: \" << typeid(typename SrcEvaluator::XprType).name() << std::endl;\n    std::cerr.setf(std::ios::hex, std::ios::basefield);\n    std::cerr << \"DstFlags\" << \" = \" << DstFlags << \" (\" << demangle_flags(DstFlags) << \" )\" << std::endl;\n    std::cerr << \"SrcFlags\" << \" = \" << SrcFlags << \" (\" << demangle_flags(SrcFlags) << \" )\" << std::endl;\n    std::cerr.unsetf(std::ios::hex);\n    EIGEN_DEBUG_VAR(DstAlignment)\n    EIGEN_DEBUG_VAR(SrcAlignment)\n    EIGEN_DEBUG_VAR(LinearRequiredAlignment)\n    EIGEN_DEBUG_VAR(InnerRequiredAlignment)\n    EIGEN_DEBUG_VAR(JointAlignment)\n    EIGEN_DEBUG_VAR(InnerSize)\n    EIGEN_DEBUG_VAR(InnerMaxSize)\n    EIGEN_DEBUG_VAR(LinearPacketSize)\n    EIGEN_DEBUG_VAR(InnerPacketSize)\n    EIGEN_DEBUG_VAR(ActualPacketSize)\n    EIGEN_DEBUG_VAR(StorageOrdersAgree)\n    EIGEN_DEBUG_VAR(MightVectorize)\n    EIGEN_DEBUG_VAR(MayLinearize)\n    EIGEN_DEBUG_VAR(MayInnerVectorize)\n    EIGEN_DEBUG_VAR(MayLinearVectorize)\n    EIGEN_DEBUG_VAR(MaySliceVectorize)\n    std::cerr << \"Traversal\" << \" = \" << Traversal << \" (\" << demangle_traversal(Traversal) << \")\" << std::endl;\n    EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)\n    EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)\n    EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)\n    EIGEN_DEBUG_VAR(UnrollingLimit)\n    EIGEN_DEBUG_VAR(MayUnrollCompletely)\n    EIGEN_DEBUG_VAR(MayUnrollInner)\n    std::cerr << \"Unrolling\" << \" = \" << Unrolling << \" (\" << demangle_unrolling(Unrolling) << \")\" << std::endl;\n    std::cerr << std::endl;\n  }\n#endif\n};\n\n/***************************************************************************\n* Part 2 : meta-unrollers\n***************************************************************************/\n\n/************************\n*** Default traversal ***\n************************/\n\ntemplate<typename Kernel, int Index, int Stop>\nstruct copy_using_evaluator_DefaultTraversal_CompleteUnrolling\n{\n  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?\n  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;\n  typedef typename DstEvaluatorType::XprType DstXprType;\n\n  enum {\n    outer = Index / DstXprType::InnerSizeAtCompileTime,\n    inner = Index % DstXprType::InnerSizeAtCompileTime\n  };\n\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    kernel.assignCoeffByOuterInner(outer, inner);\n    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);\n  }\n};\n\ntemplate<typename Kernel, int Stop>\nstruct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }\n};\n\ntemplate<typename Kernel, int Index_, int Stop>\nstruct copy_using_evaluator_DefaultTraversal_InnerUnrolling\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)\n  {\n    kernel.assignCoeffByOuterInner(outer, Index_);\n    copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);\n  }\n};\n\ntemplate<typename Kernel, int Stop>\nstruct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }\n};\n\n/***********************\n*** Linear traversal ***\n***********************/\n\ntemplate<typename Kernel, int Index, int Stop>\nstruct copy_using_evaluator_LinearTraversal_CompleteUnrolling\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)\n  {\n    kernel.assignCoeff(Index);\n    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);\n  }\n};\n\ntemplate<typename Kernel, int Stop>\nstruct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }\n};\n\n/**************************\n*** Inner vectorization ***\n**************************/\n\ntemplate<typename Kernel, int Index, int Stop>\nstruct copy_using_evaluator_innervec_CompleteUnrolling\n{\n  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?\n  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;\n  typedef typename DstEvaluatorType::XprType DstXprType;\n  typedef typename Kernel::PacketType PacketType;\n\n  enum {\n    outer = Index / DstXprType::InnerSizeAtCompileTime,\n    inner = Index % DstXprType::InnerSizeAtCompileTime,\n    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,\n    DstAlignment = Kernel::AssignmentTraits::DstAlignment\n  };\n\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);\n    enum { NextIndex = Index + unpacket_traits<PacketType>::size };\n    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);\n  }\n};\n\ntemplate<typename Kernel, int Stop>\nstruct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }\n};\n\ntemplate<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>\nstruct copy_using_evaluator_innervec_InnerUnrolling\n{\n  typedef typename Kernel::PacketType PacketType;\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)\n  {\n    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);\n    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };\n    copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);\n  }\n};\n\ntemplate<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>\nstruct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }\n};\n\n/***************************************************************************\n* Part 3 : implementation of all cases\n***************************************************************************/\n\n// dense_assignment_loop is based on assign_impl\n\ntemplate<typename Kernel,\n         int Traversal = Kernel::AssignmentTraits::Traversal,\n         int Unrolling = Kernel::AssignmentTraits::Unrolling>\nstruct dense_assignment_loop;\n\n/************************\n***** Special Cases *****\n************************/\n\n// Zero-sized assignment is a no-op.\ntemplate<typename Kernel, int Unrolling>\nstruct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>\n{\n  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,\n      EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)\n  }\n};\n\n/************************\n*** Default traversal ***\n************************/\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>\n{\n  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)\n  {\n    for(Index outer = 0; outer < kernel.outerSize(); ++outer) {\n      for(Index inner = 0; inner < kernel.innerSize(); ++inner) {\n        kernel.assignCoeffByOuterInner(outer, inner);\n      }\n    }\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n\n    const Index outerSize = kernel.outerSize();\n    for(Index outer = 0; outer < outerSize; ++outer)\n      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);\n  }\n};\n\n/***************************\n*** Linear vectorization ***\n***************************/\n\n\n// The goal of unaligned_dense_assignment_loop is simply to factorize the handling\n// of the non vectorizable beginning and ending parts\n\ntemplate <bool IsAligned = false>\nstruct unaligned_dense_assignment_loop\n{\n  // if IsAligned = true, then do nothing\n  template <typename Kernel>\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}\n};\n\ntemplate <>\nstruct unaligned_dense_assignment_loop<false>\n{\n  // MSVC must not inline this functions. If it does, it fails to optimize the\n  // packet access path.\n  // FIXME check which version exhibits this issue\n#if EIGEN_COMP_MSVC\n  template <typename Kernel>\n  static EIGEN_DONT_INLINE void run(Kernel &kernel,\n                                    Index start,\n                                    Index end)\n#else\n  template <typename Kernel>\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,\n                                      Index start,\n                                      Index end)\n#endif\n  {\n    for (Index index = start; index < end; ++index)\n      kernel.assignCoeff(index);\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    const Index size = kernel.size();\n    typedef typename Kernel::Scalar Scalar;\n    typedef typename Kernel::PacketType PacketType;\n    enum {\n      requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,\n      packetSize = unpacket_traits<PacketType>::size,\n      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),\n      dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)\n                                                            : int(Kernel::AssignmentTraits::DstAlignment),\n      srcAlignment = Kernel::AssignmentTraits::JointAlignment\n    };\n    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);\n    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;\n\n    unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);\n\n    for(Index index = alignedStart; index < alignedEnd; index += packetSize)\n      kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);\n\n    unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    typedef typename Kernel::PacketType PacketType;\n\n    enum { size = DstXprType::SizeAtCompileTime,\n           packetSize =unpacket_traits<PacketType>::size,\n           alignedSize = (int(size)/packetSize)*packetSize };\n\n    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);\n    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);\n  }\n};\n\n/**************************\n*** Inner vectorization ***\n**************************/\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>\n{\n  typedef typename Kernel::PacketType PacketType;\n  enum {\n    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,\n    DstAlignment = Kernel::AssignmentTraits::DstAlignment\n  };\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    const Index innerSize = kernel.innerSize();\n    const Index outerSize = kernel.outerSize();\n    const Index packetSize = unpacket_traits<PacketType>::size;\n    for(Index outer = 0; outer < outerSize; ++outer)\n      for(Index inner = 0; inner < innerSize; inner+=packetSize)\n        kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    typedef typename Kernel::AssignmentTraits Traits;\n    const Index outerSize = kernel.outerSize();\n    for(Index outer = 0; outer < outerSize; ++outer)\n      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,\n                                                   Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);\n  }\n};\n\n/***********************\n*** Linear traversal ***\n***********************/\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    const Index size = kernel.size();\n    for(Index i = 0; i < size; ++i)\n      kernel.assignCoeff(i);\n  }\n};\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);\n  }\n};\n\n/**************************\n*** Slice vectorization ***\n***************************/\n\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::Scalar Scalar;\n    typedef typename Kernel::PacketType PacketType;\n    enum {\n      packetSize = unpacket_traits<PacketType>::size,\n      requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),\n      alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),\n      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),\n      dstAlignment = alignable ? int(requestedAlignment)\n                               : int(Kernel::AssignmentTraits::DstAlignment)\n    };\n    const Scalar *dst_ptr = kernel.dstDataPtr();\n    if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)\n    {\n      // the pointer is not aligned-on scalar, so alignment is not possible\n      return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);\n    }\n    const Index packetAlignedMask = packetSize - 1;\n    const Index innerSize = kernel.innerSize();\n    const Index outerSize = kernel.outerSize();\n    const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;\n    Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);\n\n    for(Index outer = 0; outer < outerSize; ++outer)\n    {\n      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);\n      // do the non-vectorizable part of the assignment\n      for(Index inner = 0; inner<alignedStart ; ++inner)\n        kernel.assignCoeffByOuterInner(outer, inner);\n\n      // do the vectorizable part of the assignment\n      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)\n        kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);\n\n      // do the non-vectorizable part of the assignment\n      for(Index inner = alignedEnd; inner<innerSize ; ++inner)\n        kernel.assignCoeffByOuterInner(outer, inner);\n\n      alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);\n    }\n  }\n};\n\n#if EIGEN_UNALIGNED_VECTORIZE\ntemplate<typename Kernel>\nstruct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)\n  {\n    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;\n    typedef typename Kernel::PacketType PacketType;\n\n    enum { innerSize = DstXprType::InnerSizeAtCompileTime,\n           packetSize =unpacket_traits<PacketType>::size,\n           vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),\n           size = DstXprType::SizeAtCompileTime };\n\n    for(Index outer = 0; outer < kernel.outerSize(); ++outer)\n    {\n      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);\n      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);\n    }\n  }\n};\n#endif\n\n\n/***************************************************************************\n* Part 4 : Generic dense assignment kernel\n***************************************************************************/\n\n// This class generalize the assignment of a coefficient (or packet) from one dense evaluator\n// to another dense writable evaluator.\n// It is parametrized by the two evaluators, and the actual assignment functor.\n// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.\n// One can customize the assignment using this generic dense_assignment_kernel with different\n// functors, or by completely overloading it, by-passing a functor.\ntemplate<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>\nclass generic_dense_assignment_kernel\n{\nprotected:\n  typedef typename DstEvaluatorTypeT::XprType DstXprType;\n  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;\npublic:\n\n  typedef DstEvaluatorTypeT DstEvaluatorType;\n  typedef SrcEvaluatorTypeT SrcEvaluatorType;\n  typedef typename DstEvaluatorType::Scalar Scalar;\n  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;\n  typedef typename AssignmentTraits::PacketType PacketType;\n\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)\n    : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)\n  {\n    #ifdef EIGEN_DEBUG_ASSIGN\n    AssignmentTraits::debug();\n    #endif\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }\n\n  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }\n  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }\n\n  /// Assign src(row,col) to dst(row,col) through the assignment functor.\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)\n  {\n    m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));\n  }\n\n  /// \\sa assignCoeff(Index,Index)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)\n  {\n    m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));\n  }\n\n  /// \\sa assignCoeff(Index,Index)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)\n  {\n    Index row = rowIndexByOuterInner(outer, inner);\n    Index col = colIndexByOuterInner(outer, inner);\n    assignCoeff(row, col);\n  }\n\n\n  template<int StoreMode, int LoadMode, typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)\n  {\n    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));\n  }\n\n  template<int StoreMode, int LoadMode, typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)\n  {\n    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));\n  }\n\n  template<int StoreMode, int LoadMode, typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)\n  {\n    Index row = rowIndexByOuterInner(outer, inner);\n    Index col = colIndexByOuterInner(outer, inner);\n    assignPacket<StoreMode,LoadMode,PacketType>(row, col);\n  }\n\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)\n  {\n    typedef typename DstEvaluatorType::ExpressionTraits Traits;\n    return int(Traits::RowsAtCompileTime) == 1 ? 0\n      : int(Traits::ColsAtCompileTime) == 1 ? inner\n      : int(DstEvaluatorType::Flags)&RowMajorBit ? outer\n      : inner;\n  }\n\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)\n  {\n    typedef typename DstEvaluatorType::ExpressionTraits Traits;\n    return int(Traits::ColsAtCompileTime) == 1 ? 0\n      : int(Traits::RowsAtCompileTime) == 1 ? inner\n      : int(DstEvaluatorType::Flags)&RowMajorBit ? inner\n      : outer;\n  }\n\n  EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const\n  {\n    return m_dstExpr.data();\n  }\n\nprotected:\n  DstEvaluatorType& m_dst;\n  const SrcEvaluatorType& m_src;\n  const Functor &m_functor;\n  // TODO find a way to avoid the needs of the original expression\n  DstXprType& m_dstExpr;\n};\n\n// Special kernel used when computing small products whose operands have dynamic dimensions.  It ensures that the\n// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used\n// when computing the product.\n\ntemplate<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>\nclass restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>\n{\nprotected:\n  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;\n public:\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::DstXprType DstXprType;\n    typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;\n    typedef typename AssignmentTraits::PacketType PacketType;\n\n    EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)\n    : Base(dst, src, func, dstExpr)\n  {\n  }\n };\n\n/***************************************************************************\n* Part 5 : Entry point for dense rectangular assignment\n***************************************************************************/\n\ntemplate<typename DstXprType,typename SrcXprType, typename Functor>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)\n{\n  EIGEN_ONLY_USED_FOR_DEBUG(dst);\n  EIGEN_ONLY_USED_FOR_DEBUG(src);\n  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());\n}\n\ntemplate<typename DstXprType,typename SrcXprType, typename T1, typename T2>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)\n{\n  Index dstRows = src.rows();\n  Index dstCols = src.cols();\n  if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))\n    dst.resize(dstRows, dstCols);\n  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);\n}\n\ntemplate<typename DstXprType, typename SrcXprType, typename Functor>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)\n{\n  typedef evaluator<DstXprType> DstEvaluatorType;\n  typedef evaluator<SrcXprType> SrcEvaluatorType;\n\n  SrcEvaluatorType srcEvaluator(src);\n\n  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,\n  // we need to resize the destination after the source evaluator has been created.\n  resize_if_allowed(dst, src, func);\n\n  DstEvaluatorType dstEvaluator(dst);\n\n  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;\n  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());\n\n  dense_assignment_loop<Kernel>::run(kernel);\n}\n\n// Specialization for filling the destination with a constant value.\n#ifndef EIGEN_GPU_COMPILE_PHASE\ntemplate<typename DstXprType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)\n{\n  resize_if_allowed(dst, src, func);\n  std::fill_n(dst.data(), dst.size(), src.functor()());\n}\n#endif\n\ntemplate<typename DstXprType, typename SrcXprType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)\n{\n  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());\n}\n\n/***************************************************************************\n* Part 6 : Generic assignment\n***************************************************************************/\n\n// Based on the respective shapes of the destination and source,\n// the class AssignmentKind determine the kind of assignment mechanism.\n// AssignmentKind must define a Kind typedef.\ntemplate<typename DstShape, typename SrcShape> struct AssignmentKind;\n\n// Assignment kind defined in this file:\nstruct Dense2Dense {};\nstruct EigenBase2EigenBase {};\n\ntemplate<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };\ntemplate<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };\n\n// This is the main assignment class\ntemplate< typename DstXprType, typename SrcXprType, typename Functor,\n          typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,\n          typename EnableIf = void>\nstruct Assignment;\n\n\n// The only purpose of this call_assignment() function is to deal with noalias() / \"assume-aliasing\" and automatic transposition.\n// Indeed, I (Gael) think that this concept of \"assume-aliasing\" was a mistake, and it makes thing quite complicated.\n// So this intermediate function removes everything related to \"assume-aliasing\" such that Assignment\n// does not has to bother about these annoying details.\n\ntemplate<typename Dst, typename Src>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment(Dst& dst, const Src& src)\n{\n  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());\n}\ntemplate<typename Dst, typename Src>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment(const Dst& dst, const Src& src)\n{\n  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());\n}\n\n// Deal with \"assume-aliasing\"\ntemplate<typename Dst, typename Src, typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)\n{\n  typename plain_matrix_type<Src>::type tmp(src);\n  call_assignment_no_alias(dst, tmp, func);\n}\n\ntemplate<typename Dst, typename Src, typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)\n{\n  call_assignment_no_alias(dst, src, func);\n}\n\n// by-pass \"assume-aliasing\"\n// When there is no aliasing, we require that 'dst' has been properly resized\ntemplate<typename Dst, template <typename> class StorageBase, typename Src, typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)\n{\n  call_assignment_no_alias(dst.expression(), src, func);\n}\n\n\ntemplate<typename Dst, typename Src, typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)\n{\n  enum {\n    NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)\n                        || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)\n                      ) && int(Dst::SizeAtCompileTime) != 1\n  };\n\n  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;\n  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;\n  ActualDstType actualDst(dst);\n\n  // TODO check whether this is the right place to perform these checks:\n  EIGEN_STATIC_ASSERT_LVALUE(Dst)\n  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)\n  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);\n\n  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);\n}\n\ntemplate<typename Dst, typename Src, typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)\n{\n    typedef evaluator<Dst> DstEvaluatorType;\n    typedef evaluator<Src> SrcEvaluatorType;\n    typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;\n\n    EIGEN_STATIC_ASSERT_LVALUE(Dst)\n    EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);\n\n    SrcEvaluatorType srcEvaluator(src);\n    resize_if_allowed(dst, src, func);\n\n    DstEvaluatorType dstEvaluator(dst);\n    Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());\n\n    dense_assignment_loop<Kernel>::run(kernel);\n}\n\ntemplate<typename Dst, typename Src>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment_no_alias(Dst& dst, const Src& src)\n{\n  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());\n}\n\ntemplate<typename Dst, typename Src, typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)\n{\n  // TODO check whether this is the right place to perform these checks:\n  EIGEN_STATIC_ASSERT_LVALUE(Dst)\n  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)\n  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);\n\n  Assignment<Dst,Src,Func>::run(dst, src, func);\n}\ntemplate<typename Dst, typename Src>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)\n{\n  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());\n}\n\n// forward declaration\ntemplate<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);\n\n// Generic Dense to Dense assignment\n// Note that the last template argument \"Weak\" is needed to make it possible to perform\n// both partial specialization+SFINAE without ambiguous specialization\ntemplate< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>\nstruct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)\n  {\n#ifndef EIGEN_NO_DEBUG\n    internal::check_for_aliasing(dst, src);\n#endif\n\n    call_dense_assignment_loop(dst, src, func);\n  }\n};\n\n// Generic assignment through evalTo.\n// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.\n// Note that the last template argument \"Weak\" is needed to make it possible to perform\n// both partial specialization+SFINAE without ambiguous specialization\ntemplate< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>\nstruct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());\n    src.evalTo(dst);\n  }\n\n  // NOTE The following two functions are templated to avoid their instantiation if not needed\n  //      This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.\n  template<typename SrcScalarType>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());\n    src.addTo(dst);\n  }\n\n  template<typename SrcScalarType>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());\n    src.subTo(dst);\n  }\n};\n\n} // namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_ASSIGN_EVALUATOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Assign_MKL.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n \n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to Intel(R) MKL\n *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()\n ********************************************************************************\n*/\n\n#ifndef EIGEN_ASSIGN_VML_H\n#define EIGEN_ASSIGN_VML_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Dst, typename Src>\nclass vml_assign_traits\n{\n  private:\n    enum {\n      DstHasDirectAccess = Dst::Flags & DirectAccessBit,\n      SrcHasDirectAccess = Src::Flags & DirectAccessBit,\n      StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),\n      InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)\n                : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)\n                : int(Dst::RowsAtCompileTime),\n      InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)\n                    : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)\n                    : int(Dst::MaxRowsAtCompileTime),\n      MaxSizeAtCompileTime = Dst::SizeAtCompileTime,\n\n      MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,\n      MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),\n      VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,\n      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD\n    };\n  public:\n    enum {\n      EnableVml = MightEnableVml && LargeEnough,\n      Traversal = MightLinearize ? LinearTraversal : DefaultTraversal\n    };\n};\n\n#define EIGEN_PP_EXPAND(ARG) ARG\n#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)\n#define EIGEN_VMLMODE_EXPAND_xLA , VML_HA\n#else\n#define EIGEN_VMLMODE_EXPAND_xLA , VML_LA\n#endif\n\n#define EIGEN_VMLMODE_EXPAND_x_\n\n#define EIGEN_VMLMODE_PREFIX_xLA vm\n#define EIGEN_VMLMODE_PREFIX_x_  v\n#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x,VMLMODE)\n\n#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \\\n  template< typename DstXprType, typename SrcXprNested>                                                                         \\\n  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \\\n                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \\\n    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \\\n    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                       \\\n      resize_if_allowed(dst, src, func);                                                                                        \\\n      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \\\n      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \\\n        VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \\\n              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) );                                           \\\n      } else {                                                                                                                  \\\n        const Index outerSize = dst.outerSize();                                                                                \\\n        for(Index outer = 0; outer < outerSize; ++outer) {                                                                      \\\n          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :                             \\\n                                                      &(src.nestedExpression().coeffRef(0, outer));                             \\\n          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                           \\\n          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr,                                                                      \\\n                (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE));                                             \\\n        }                                                                                                                       \\\n      }                                                                                                                         \\\n    }                                                                                                                           \\\n  };                                                                                                                            \\\n\n\n#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                         \\\n  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE)           \\\n  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)\n\n#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)                                                         \\\n  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \\\n  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)\n  \n#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)                                                              \\\n  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                               \\\n  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)\n\n  \nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin,   Sin,   LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin,  Asin,  LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh,  Sinh,  LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos,   Cos,   LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos,  Acos,  LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh,  Cosh,  LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan,   Tan,   LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan,  Atan,  LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh,  Tanh,  LA)\n// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,   Abs,    _)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp,   Exp,   LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(log,   Ln,    LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt,  Sqrt,  _)\n\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr,   _)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg,      _)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round,  _)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor,  _)\nEIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)\n\n#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \\\n  template< typename DstXprType, typename SrcXprNested, typename Plain>                                                       \\\n  struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                       \\\n                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>,    \\\n                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \\\n    typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \\\n                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \\\n    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                     \\\n      resize_if_allowed(dst, src, func);                                                                                      \\\n      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \\\n      VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other);                                       \\\n      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \\\n      {                                                                                                                       \\\n        VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent,                                                        \\\n              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) );                                         \\\n      } else {                                                                                                                \\\n        const Index outerSize = dst.outerSize();                                                                              \\\n        for(Index outer = 0; outer < outerSize; ++outer) {                                                                    \\\n          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) :                                        \\\n                                                      &(src.lhs().coeffRef(0, outer));                                        \\\n          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                         \\\n          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent,                                                          \\\n                 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE));                                          \\\n        }                                                                                                                     \\\n      }                                                                                                                       \\\n    }                                                                                                                         \\\n  };\n  \nEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float,    float,         LA)\nEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double,   double,        LA)\nEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8,  LA)\nEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_ASSIGN_VML_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/BandMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BANDMATRIX_H\n#define EIGEN_BANDMATRIX_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Derived>\nclass BandMatrixBase : public EigenBase<Derived>\n{\n  public:\n\n    enum {\n      Flags = internal::traits<Derived>::Flags,\n      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,\n      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,\n      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,\n      Supers = internal::traits<Derived>::Supers,\n      Subs   = internal::traits<Derived>::Subs,\n      Options = internal::traits<Derived>::Options\n    };\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;\n    typedef typename DenseMatrixType::StorageIndex StorageIndex;\n    typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;\n    typedef EigenBase<Derived> Base;\n\n  protected:\n    enum {\n      DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))\n                            ? 1 + Supers + Subs\n                            : Dynamic,\n      SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)\n    };\n\n  public:\n\n    using Base::derived;\n    using Base::rows;\n    using Base::cols;\n\n    /** \\returns the number of super diagonals */\n    inline Index supers() const { return derived().supers(); }\n\n    /** \\returns the number of sub diagonals */\n    inline Index subs() const { return derived().subs(); }\n\n    /** \\returns an expression of the underlying coefficient matrix */\n    inline const CoefficientsType& coeffs() const { return derived().coeffs(); }\n\n    /** \\returns an expression of the underlying coefficient matrix */\n    inline CoefficientsType& coeffs() { return derived().coeffs(); }\n\n    /** \\returns a vector expression of the \\a i -th column,\n      * only the meaningful part is returned.\n      * \\warning the internal storage must be column major. */\n    inline Block<CoefficientsType,Dynamic,1> col(Index i)\n    {\n      EIGEN_STATIC_ASSERT((int(Options) & int(RowMajor)) == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n      Index start = 0;\n      Index len = coeffs().rows();\n      if (i<=supers())\n      {\n        start = supers()-i;\n        len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));\n      }\n      else if (i>=rows()-subs())\n        len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));\n      return Block<CoefficientsType,Dynamic,1>(coeffs(), start, i, len, 1);\n    }\n\n    /** \\returns a vector expression of the main diagonal */\n    inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()\n    { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }\n\n    /** \\returns a vector expression of the main diagonal (const version) */\n    inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const\n    { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }\n\n    template<int Index> struct DiagonalIntReturnType {\n      enum {\n        ReturnOpposite = (int(Options) & int(SelfAdjoint)) && (((Index) > 0 && Supers == 0) || ((Index) < 0 && Subs == 0)),\n        Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,\n        ActualIndex = ReturnOpposite ? -Index : Index,\n        DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)\n                     ? Dynamic\n                     : (ActualIndex<0\n                     ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)\n                     : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))\n      };\n      typedef Block<CoefficientsType,1, DiagonalSize> BuildType;\n      typedef typename internal::conditional<Conjugate,\n                 CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,\n                 BuildType>::type Type;\n    };\n\n    /** \\returns a vector expression of the \\a N -th sub or super diagonal */\n    template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()\n    {\n      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));\n    }\n\n    /** \\returns a vector expression of the \\a N -th sub or super diagonal */\n    template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const\n    {\n      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));\n    }\n\n    /** \\returns a vector expression of the \\a i -th sub or super diagonal */\n    inline Block<CoefficientsType,1,Dynamic> diagonal(Index i)\n    {\n      eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));\n      return Block<CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));\n    }\n\n    /** \\returns a vector expression of the \\a i -th sub or super diagonal */\n    inline const Block<const CoefficientsType,1,Dynamic> diagonal(Index i) const\n    {\n      eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));\n      return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));\n    }\n\n    template<typename Dest> inline void evalTo(Dest& dst) const\n    {\n      dst.resize(rows(),cols());\n      dst.setZero();\n      dst.diagonal() = diagonal();\n      for (Index i=1; i<=supers();++i)\n        dst.diagonal(i) = diagonal(i);\n      for (Index i=1; i<=subs();++i)\n        dst.diagonal(-i) = diagonal(-i);\n    }\n\n    DenseMatrixType toDenseMatrix() const\n    {\n      DenseMatrixType res(rows(),cols());\n      evalTo(res);\n      return res;\n    }\n\n  protected:\n\n    inline Index diagonalLength(Index i) const\n    { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }\n};\n\n/**\n  * \\class BandMatrix\n  * \\ingroup Core_Module\n  *\n  * \\brief Represents a rectangular matrix with a banded storage\n  *\n  * \\tparam _Scalar Numeric type, i.e. float, double, int\n  * \\tparam _Rows Number of rows, or \\b Dynamic\n  * \\tparam _Cols Number of columns, or \\b Dynamic\n  * \\tparam _Supers Number of super diagonal\n  * \\tparam _Subs Number of sub diagonal\n  * \\tparam _Options A combination of either \\b #RowMajor or \\b #ColMajor, and of \\b #SelfAdjoint\n  *                  The former controls \\ref TopicStorageOrders \"storage order\", and defaults to\n  *                  column-major. The latter controls whether the matrix represents a selfadjoint\n  *                  matrix in which case either Supers of Subs have to be null.\n  *\n  * \\sa class TridiagonalMatrix\n  */\n\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>\nstruct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >\n{\n  typedef _Scalar Scalar;\n  typedef Dense StorageKind;\n  typedef Eigen::Index StorageIndex;\n  enum {\n    CoeffReadCost = NumTraits<Scalar>::ReadCost,\n    RowsAtCompileTime = _Rows,\n    ColsAtCompileTime = _Cols,\n    MaxRowsAtCompileTime = _Rows,\n    MaxColsAtCompileTime = _Cols,\n    Flags = LvalueBit,\n    Supers = _Supers,\n    Subs = _Subs,\n    Options = _Options,\n    DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic\n  };\n  typedef Matrix<Scalar, DataRowsAtCompileTime, ColsAtCompileTime, int(Options) & int(RowMajor) ? RowMajor : ColMajor> CoefficientsType;\n};\n\ntemplate<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>\nclass BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >\n{\n  public:\n\n    typedef typename internal::traits<BandMatrix>::Scalar Scalar;\n    typedef typename internal::traits<BandMatrix>::StorageIndex StorageIndex;\n    typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;\n\n    explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)\n      : m_coeffs(1+supers+subs,cols),\n        m_rows(rows), m_supers(supers), m_subs(subs)\n    {\n    }\n\n    /** \\returns the number of columns */\n    inline EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); }\n\n    /** \\returns the number of rows */\n    inline EIGEN_CONSTEXPR Index cols() const { return m_coeffs.cols(); }\n\n    /** \\returns the number of super diagonals */\n    inline EIGEN_CONSTEXPR Index supers() const { return m_supers.value(); }\n\n    /** \\returns the number of sub diagonals */\n    inline EIGEN_CONSTEXPR Index subs() const { return m_subs.value(); }\n\n    inline const CoefficientsType& coeffs() const { return m_coeffs; }\n    inline CoefficientsType& coeffs() { return m_coeffs; }\n\n  protected:\n\n    CoefficientsType m_coeffs;\n    internal::variable_if_dynamic<Index, Rows>   m_rows;\n    internal::variable_if_dynamic<Index, Supers> m_supers;\n    internal::variable_if_dynamic<Index, Subs>   m_subs;\n};\n\ntemplate<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>\nclass BandMatrixWrapper;\n\ntemplate<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>\nstruct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >\n{\n  typedef typename _CoefficientsType::Scalar Scalar;\n  typedef typename _CoefficientsType::StorageKind StorageKind;\n  typedef typename _CoefficientsType::StorageIndex StorageIndex;\n  enum {\n    CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,\n    RowsAtCompileTime = _Rows,\n    ColsAtCompileTime = _Cols,\n    MaxRowsAtCompileTime = _Rows,\n    MaxColsAtCompileTime = _Cols,\n    Flags = LvalueBit,\n    Supers = _Supers,\n    Subs = _Subs,\n    Options = _Options,\n    DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic\n  };\n  typedef _CoefficientsType CoefficientsType;\n};\n\ntemplate<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>\nclass BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >\n{\n  public:\n\n    typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;\n    typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;\n    typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;\n\n    explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)\n      : m_coeffs(coeffs),\n        m_rows(rows), m_supers(supers), m_subs(subs)\n    {\n      EIGEN_UNUSED_VARIABLE(cols);\n      //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());\n    }\n\n    /** \\returns the number of columns */\n    inline EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); }\n\n    /** \\returns the number of rows */\n    inline EIGEN_CONSTEXPR Index cols() const { return m_coeffs.cols(); }\n\n    /** \\returns the number of super diagonals */\n    inline EIGEN_CONSTEXPR Index supers() const { return m_supers.value(); }\n\n    /** \\returns the number of sub diagonals */\n    inline EIGEN_CONSTEXPR Index subs() const { return m_subs.value(); }\n\n    inline const CoefficientsType& coeffs() const { return m_coeffs; }\n\n  protected:\n\n    const CoefficientsType& m_coeffs;\n    internal::variable_if_dynamic<Index, _Rows>   m_rows;\n    internal::variable_if_dynamic<Index, _Supers> m_supers;\n    internal::variable_if_dynamic<Index, _Subs>   m_subs;\n};\n\n/**\n  * \\class TridiagonalMatrix\n  * \\ingroup Core_Module\n  *\n  * \\brief Represents a tridiagonal matrix with a compact banded storage\n  *\n  * \\tparam Scalar Numeric type, i.e. float, double, int\n  * \\tparam Size Number of rows and cols, or \\b Dynamic\n  * \\tparam Options Can be 0 or \\b SelfAdjoint\n  *\n  * \\sa class BandMatrix\n  */\ntemplate<typename Scalar, int Size, int Options>\nclass TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>\n{\n    typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;\n    typedef typename Base::StorageIndex StorageIndex;\n  public:\n    explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}\n\n    inline typename Base::template DiagonalIntReturnType<1>::Type super()\n    { return Base::template diagonal<1>(); }\n    inline const typename Base::template DiagonalIntReturnType<1>::Type super() const\n    { return Base::template diagonal<1>(); }\n    inline typename Base::template DiagonalIntReturnType<-1>::Type sub()\n    { return Base::template diagonal<-1>(); }\n    inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const\n    { return Base::template diagonal<-1>(); }\n  protected:\n};\n\n\nstruct BandShape {};\n\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>\nstruct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >\n  : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >\n{\n  typedef BandShape Shape;\n};\n\ntemplate<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>\nstruct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >\n  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >\n{\n  typedef BandShape Shape;\n};\n\ntemplate<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_BANDMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Block.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BLOCK_H\n#define EIGEN_BLOCK_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>\nstruct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>\n{\n  typedef typename traits<XprType>::Scalar Scalar;\n  typedef typename traits<XprType>::StorageKind StorageKind;\n  typedef typename traits<XprType>::XprKind XprKind;\n  typedef typename ref_selector<XprType>::type XprTypeNested;\n  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;\n  enum{\n    MatrixRows = traits<XprType>::RowsAtCompileTime,\n    MatrixCols = traits<XprType>::ColsAtCompileTime,\n    RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,\n    ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,\n    MaxRowsAtCompileTime = BlockRows==0 ? 0\n                         : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)\n                         : int(traits<XprType>::MaxRowsAtCompileTime),\n    MaxColsAtCompileTime = BlockCols==0 ? 0\n                         : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)\n                         : int(traits<XprType>::MaxColsAtCompileTime),\n\n    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,\n    IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1\n               : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0\n               : XprTypeIsRowMajor,\n    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),\n    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),\n    InnerStrideAtCompileTime = HasSameStorageOrderAsXprType\n                             ? int(inner_stride_at_compile_time<XprType>::ret)\n                             : int(outer_stride_at_compile_time<XprType>::ret),\n    OuterStrideAtCompileTime = HasSameStorageOrderAsXprType\n                             ? int(outer_stride_at_compile_time<XprType>::ret)\n                             : int(inner_stride_at_compile_time<XprType>::ret),\n\n    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further\n    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,\n    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,\n    Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,\n    // FIXME DirectAccessBit should not be handled by expressions\n    //\n    // Alignment is needed by MapBase's assertions\n    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator\n    Alignment = 0\n  };\n};\n\ntemplate<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,\n         bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;\n\n} // end namespace internal\n\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;\n\n/** \\class Block\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a fixed-size or dynamic-size block\n  *\n  * \\tparam XprType the type of the expression in which we are taking a block\n  * \\tparam BlockRows the number of rows of the block we are taking at compile time (optional)\n  * \\tparam BlockCols the number of columns of the block we are taking at compile time (optional)\n  * \\tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or\n  *         to set of columns of a column major matrix (optional). The parameter allows to determine\n  *         at compile time whether aligned access is possible on the block expression.\n  *\n  * This class represents an expression of either a fixed-size or dynamic-size block. It is the return\n  * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and\n  * most of the time this is the only way it is used.\n  *\n  * However, if you want to directly maniputate block expressions,\n  * for instance if you want to write a function returning such an expression, you\n  * will need to use this class.\n  *\n  * Here is an example illustrating the dynamic case:\n  * \\include class_Block.cpp\n  * Output: \\verbinclude class_Block.out\n  *\n  * \\note Even though this expression has dynamic size, in the case where \\a XprType\n  * has fixed size, this expression inherits a fixed maximal size which means that evaluating\n  * it does not cause a dynamic memory allocation.\n  *\n  * Here is an example illustrating the fixed-size case:\n  * \\include class_FixedBlock.cpp\n  * Output: \\verbinclude class_FixedBlock.out\n  *\n  * \\sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock\n  */\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block\n  : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>\n{\n    typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;\n  public:\n    //typedef typename Impl::Base Base;\n    typedef Impl Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(Block)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)\n\n    typedef typename internal::remove_all<XprType>::type NestedExpression;\n\n    /** Column or Row constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Block(XprType& xpr, Index i) : Impl(xpr,i)\n    {\n      eigen_assert( (i>=0) && (\n          ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())\n        ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));\n    }\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Block(XprType& xpr, Index startRow, Index startCol)\n      : Impl(xpr, startRow, startCol)\n    {\n      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)\n      eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()\n             && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());\n    }\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Block(XprType& xpr,\n          Index startRow, Index startCol,\n          Index blockRows, Index blockCols)\n      : Impl(xpr, startRow, startCol, blockRows, blockCols)\n    {\n      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)\n          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));\n      eigen_assert(startRow >= 0 && blockRows >= 0 && startRow  <= xpr.rows() - blockRows\n          && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);\n    }\n};\n\n// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense\n// that must be specialized for direct and non-direct access...\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>\nclass BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>\n  : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>\n{\n    typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;\n    typedef typename XprType::StorageIndex StorageIndex;\n  public:\n    typedef Impl Base;\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)\n      : Impl(xpr, startRow, startCol, blockRows, blockCols) {}\n};\n\nnamespace internal {\n\n/** \\internal Internal implementation of dense Blocks in the general case. */\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense\n  : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type\n{\n    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;\n    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;\n  public:\n\n    typedef typename internal::dense_xpr_base<BlockType>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)\n\n    // class InnerIterator; // FIXME apparently never used\n\n    /** Column or Row constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline BlockImpl_dense(XprType& xpr, Index i)\n      : m_xpr(xpr),\n        // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,\n        // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,\n        // all other cases are invalid.\n        // The case a 1x1 matrix seems ambiguous, but the result is the same anyway.\n        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),\n        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),\n        m_blockRows(BlockRows==1 ? 1 : xpr.rows()),\n        m_blockCols(BlockCols==1 ? 1 : xpr.cols())\n    {}\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)\n      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),\n                    m_blockRows(BlockRows), m_blockCols(BlockCols)\n    {}\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline BlockImpl_dense(XprType& xpr,\n          Index startRow, Index startCol,\n          Index blockRows, Index blockCols)\n      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),\n                    m_blockRows(blockRows), m_blockCols(blockCols)\n    {}\n\n    EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }\n    EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }\n\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index rowId, Index colId)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(XprType)\n      return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index rowId, Index colId) const\n    {\n      return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());\n    }\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const\n    {\n      return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index index)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(XprType)\n      return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index index) const\n    {\n      return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const CoeffReturnType coeff(Index index) const\n    {\n      return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n                         m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));\n    }\n\n    template<int LoadMode>\n    inline PacketScalar packet(Index rowId, Index colId) const\n    {\n      return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());\n    }\n\n    template<int LoadMode>\n    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)\n    {\n      m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);\n    }\n\n    template<int LoadMode>\n    inline PacketScalar packet(Index index) const\n    {\n      return m_xpr.template packet<Unaligned>\n              (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n               m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));\n    }\n\n    template<int LoadMode>\n    inline void writePacket(Index index, const PacketScalar& val)\n    {\n      m_xpr.template writePacket<Unaligned>\n         (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n          m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** \\sa MapBase::data() */\n    EIGEN_DEVICE_FUNC inline const Scalar* data() const;\n    EIGEN_DEVICE_FUNC inline Index innerStride() const;\n    EIGEN_DEVICE_FUNC inline Index outerStride() const;\n    #endif\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const\n    {\n      return m_xpr;\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    XprType& nestedExpression() { return m_xpr; }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    StorageIndex startRow() const EIGEN_NOEXCEPT\n    {\n      return m_startRow.value();\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    StorageIndex startCol() const EIGEN_NOEXCEPT\n    {\n      return m_startCol.value();\n    }\n\n  protected:\n\n    XprTypeNested m_xpr;\n    const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;\n    const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;\n    const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;\n    const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;\n};\n\n/** \\internal Internal implementation of dense Blocks in the direct access case.*/\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>\nclass BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>\n  : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >\n{\n    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;\n    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;\n    enum {\n      XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0\n    };\n  public:\n\n    typedef MapBase<BlockType> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)\n\n    /** Column or Row constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    BlockImpl_dense(XprType& xpr, Index i)\n      : Base(xpr.data() + i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))\n                                || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),\n             BlockRows==1 ? 1 : xpr.rows(),\n             BlockCols==1 ? 1 : xpr.cols()),\n        m_xpr(xpr),\n        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),\n        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)\n    {\n      init();\n    }\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)\n      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),\n        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)\n    {\n      init();\n    }\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    BlockImpl_dense(XprType& xpr,\n          Index startRow, Index startCol,\n          Index blockRows, Index blockCols)\n      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),\n        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)\n    {\n      init();\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const EIGEN_NOEXCEPT\n    {\n      return m_xpr;\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    XprType& nestedExpression() { return m_xpr; }\n\n    /** \\sa MapBase::innerStride() */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index innerStride() const EIGEN_NOEXCEPT\n    {\n      return internal::traits<BlockType>::HasSameStorageOrderAsXprType\n             ? m_xpr.innerStride()\n             : m_xpr.outerStride();\n    }\n\n    /** \\sa MapBase::outerStride() */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index outerStride() const EIGEN_NOEXCEPT\n    {\n      return internal::traits<BlockType>::HasSameStorageOrderAsXprType\n                    ? m_xpr.outerStride()\n                    : m_xpr.innerStride();\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    StorageIndex startRow() const EIGEN_NOEXCEPT { return m_startRow.value(); }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    StorageIndex startCol() const EIGEN_NOEXCEPT { return m_startCol.value(); }\n\n  #ifndef __SUNPRO_CC\n  // FIXME sunstudio is not friendly with the above friend...\n  // META-FIXME there is no 'friend' keyword around here. Is this obsolete?\n  protected:\n  #endif\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal used by allowAligned() */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)\n      : Base(data, blockRows, blockCols), m_xpr(xpr)\n    {\n      init();\n    }\n    #endif\n\n  protected:\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void init()\n    {\n      m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType\n                    ? m_xpr.outerStride()\n                    : m_xpr.innerStride();\n    }\n\n    XprTypeNested m_xpr;\n    const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;\n    const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;\n    Index m_outerStride;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_BLOCK_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/BooleanRedux.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ALLANDANY_H\n#define EIGEN_ALLANDANY_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Derived, int UnrollCount, int Rows>\nstruct all_unroller\n{\n  enum {\n    col = (UnrollCount-1) / Rows,\n    row = (UnrollCount-1) % Rows\n  };\n\n  EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)\n  {\n    return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col);\n  }\n};\n\ntemplate<typename Derived, int Rows>\nstruct all_unroller<Derived, 0, Rows>\n{\n  EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }\n};\n\ntemplate<typename Derived, int Rows>\nstruct all_unroller<Derived, Dynamic, Rows>\n{\n  EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }\n};\n\ntemplate<typename Derived, int UnrollCount, int Rows>\nstruct any_unroller\n{\n  enum {\n    col = (UnrollCount-1) / Rows,\n    row = (UnrollCount-1) % Rows\n  };\n  \n  EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)\n  {\n    return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col);\n  }\n};\n\ntemplate<typename Derived, int Rows>\nstruct any_unroller<Derived, 0, Rows>\n{\n  EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }\n};\n\ntemplate<typename Derived, int Rows>\nstruct any_unroller<Derived, Dynamic, Rows>\n{\n  EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }\n};\n\n} // end namespace internal\n\n/** \\returns true if all coefficients are true\n  *\n  * Example: \\include MatrixBase_all.cpp\n  * Output: \\verbinclude MatrixBase_all.out\n  *\n  * \\sa any(), Cwise::operator<()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const\n{\n  typedef internal::evaluator<Derived> Evaluator;\n  enum {\n    unroll = SizeAtCompileTime != Dynamic\n          && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT\n  };\n  Evaluator evaluator(derived());\n  if(unroll)\n    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);\n  else\n  {\n    for(Index j = 0; j < cols(); ++j)\n      for(Index i = 0; i < rows(); ++i)\n        if (!evaluator.coeff(i, j)) return false;\n    return true;\n  }\n}\n\n/** \\returns true if at least one coefficient is true\n  *\n  * \\sa all()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const\n{\n  typedef internal::evaluator<Derived> Evaluator;\n  enum {\n    unroll = SizeAtCompileTime != Dynamic\n          && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT\n  };\n  Evaluator evaluator(derived());\n  if(unroll)\n    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);\n  else\n  {\n    for(Index j = 0; j < cols(); ++j)\n      for(Index i = 0; i < rows(); ++i)\n        if (evaluator.coeff(i, j)) return true;\n    return false;\n  }\n}\n\n/** \\returns the number of coefficients which evaluate to true\n  *\n  * \\sa all(), any()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const\n{\n  return derived().template cast<bool>().template cast<Index>().sum();\n}\n\n/** \\returns true is \\c *this contains at least one Not A Number (NaN).\n  *\n  * \\sa allFinite()\n  */\ntemplate<typename Derived>\ninline bool DenseBase<Derived>::hasNaN() const\n{\n#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)\n  return derived().array().isNaN().any();\n#else\n  return !((derived().array()==derived().array()).all());\n#endif\n}\n\n/** \\returns true if \\c *this contains only finite numbers, i.e., no NaN and no +/-INF values.\n  *\n  * \\sa hasNaN()\n  */\ntemplate<typename Derived>\ninline bool DenseBase<Derived>::allFinite() const\n{\n#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)\n  return derived().array().isFinite().all();\n#else\n  return !((derived()-derived()).hasNaN());\n#endif\n}\n    \n} // end namespace Eigen\n\n#endif // EIGEN_ALLANDANY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CommaInitializer.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMMAINITIALIZER_H\n#define EIGEN_COMMAINITIALIZER_H\n\nnamespace Eigen { \n\n/** \\class CommaInitializer\n  * \\ingroup Core_Module\n  *\n  * \\brief Helper class used by the comma initializer operator\n  *\n  * This class is internally used to implement the comma initializer feature. It is\n  * the return type of MatrixBase::operator<<, and most of the time this is the only\n  * way it is used.\n  *\n  * \\sa \\blank \\ref MatrixBaseCommaInitRef \"MatrixBase::operator<<\", CommaInitializer::finished()\n  */\ntemplate<typename XprType>\nstruct CommaInitializer\n{\n  typedef typename XprType::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC\n  inline CommaInitializer(XprType& xpr, const Scalar& s)\n    : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)\n  {\n    eigen_assert(m_xpr.rows() > 0 && m_xpr.cols() > 0\n      && \"Cannot comma-initialize a 0x0 matrix (operator<<)\");\n    m_xpr.coeffRef(0,0) = s;\n  }\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC\n  inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)\n    : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())\n  {\n    eigen_assert(m_xpr.rows() >= other.rows() && m_xpr.cols() >= other.cols()\n      && \"Cannot comma-initialize a 0x0 matrix (operator<<)\");\n    m_xpr.block(0, 0, other.rows(), other.cols()) = other;\n  }\n\n  /* Copy/Move constructor which transfers ownership. This is crucial in \n   * absence of return value optimization to avoid assertions during destruction. */\n  // FIXME in C++11 mode this could be replaced by a proper RValue constructor\n  EIGEN_DEVICE_FUNC\n  inline CommaInitializer(const CommaInitializer& o)\n  : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {\n    // Mark original object as finished. In absence of R-value references we need to const_cast:\n    const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();\n    const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();\n    const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;\n  }\n\n  /* inserts a scalar value in the target matrix */\n  EIGEN_DEVICE_FUNC\n  CommaInitializer& operator,(const Scalar& s)\n  {\n    if (m_col==m_xpr.cols())\n    {\n      m_row+=m_currentBlockRows;\n      m_col = 0;\n      m_currentBlockRows = 1;\n      eigen_assert(m_row<m_xpr.rows()\n        && \"Too many rows passed to comma initializer (operator<<)\");\n    }\n    eigen_assert(m_col<m_xpr.cols()\n      && \"Too many coefficients passed to comma initializer (operator<<)\");\n    eigen_assert(m_currentBlockRows==1);\n    m_xpr.coeffRef(m_row, m_col++) = s;\n    return *this;\n  }\n\n  /* inserts a matrix expression in the target matrix */\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC\n  CommaInitializer& operator,(const DenseBase<OtherDerived>& other)\n  {\n    if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))\n    {\n      m_row+=m_currentBlockRows;\n      m_col = 0;\n      m_currentBlockRows = other.rows();\n      eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()\n        && \"Too many rows passed to comma initializer (operator<<)\");\n    }\n    eigen_assert((m_col + other.cols() <= m_xpr.cols())\n      && \"Too many coefficients passed to comma initializer (operator<<)\");\n    eigen_assert(m_currentBlockRows==other.rows());\n    m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>\n                    (m_row, m_col, other.rows(), other.cols()) = other;\n    m_col += other.cols();\n    return *this;\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline ~CommaInitializer()\n#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS\n  EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)\n#endif\n  {\n    finished();\n  }\n\n  /** \\returns the built matrix once all its coefficients have been set.\n    * Calling finished is 100% optional. Its purpose is to write expressions\n    * like this:\n    * \\code\n    * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());\n    * \\endcode\n    */\n  EIGEN_DEVICE_FUNC\n  inline XprType& finished() {\n      eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)\n           && m_col == m_xpr.cols()\n           && \"Too few coefficients passed to comma initializer (operator<<)\");\n      return m_xpr;\n  }\n\n  XprType& m_xpr;           // target expression\n  Index m_row;              // current row id\n  Index m_col;              // current col id\n  Index m_currentBlockRows; // current block height\n};\n\n/** \\anchor MatrixBaseCommaInitRef\n  * Convenient operator to set the coefficients of a matrix.\n  *\n  * The coefficients must be provided in a row major order and exactly match\n  * the size of the matrix. Otherwise an assertion is raised.\n  *\n  * Example: \\include MatrixBase_set.cpp\n  * Output: \\verbinclude MatrixBase_set.out\n  * \n  * \\note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order.\n  *\n  * \\sa CommaInitializer::finished(), class CommaInitializer\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)\n{\n  return CommaInitializer<Derived>(*static_cast<Derived*>(this), s);\n}\n\n/** \\sa operator<<(const Scalar&) */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC inline CommaInitializer<Derived>\nDenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other)\n{\n  return CommaInitializer<Derived>(*static_cast<Derived *>(this), other);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMMAINITIALIZER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ConditionEstimator.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CONDITIONESTIMATOR_H\n#define EIGEN_CONDITIONESTIMATOR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate <typename Vector, typename RealVector, bool IsComplex>\nstruct rcond_compute_sign {\n  static inline Vector run(const Vector& v) {\n    const RealVector v_abs = v.cwiseAbs();\n    return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))\n            .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));\n  }\n};\n\n// Partial specialization to avoid elementwise division for real vectors.\ntemplate <typename Vector>\nstruct rcond_compute_sign<Vector, Vector, false> {\n  static inline Vector run(const Vector& v) {\n    return (v.array() < static_cast<typename Vector::RealScalar>(0))\n           .select(-Vector::Ones(v.size()), Vector::Ones(v.size()));\n  }\n};\n\n/**\n  * \\returns an estimate of ||inv(matrix)||_1 given a decomposition of\n  * \\a matrix that implements .solve() and .adjoint().solve() methods.\n  *\n  * This function implements Algorithms 4.1 and 5.1 from\n  *   http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf\n  * which also forms the basis for the condition number estimators in\n  * LAPACK. Since at most 10 calls to the solve method of dec are\n  * performed, the total cost is O(dims^2), as opposed to O(dims^3)\n  * needed to compute the inverse matrix explicitly.\n  *\n  * The most common usage is in estimating the condition number\n  * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be\n  * computed directly in O(n^2) operations.\n  *\n  * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and\n  * LLT.\n  *\n  * \\sa FullPivLU, PartialPivLU, LDLT, LLT.\n  */\ntemplate <typename Decomposition>\ntypename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)\n{\n  typedef typename Decomposition::MatrixType MatrixType;\n  typedef typename Decomposition::Scalar Scalar;\n  typedef typename Decomposition::RealScalar RealScalar;\n  typedef typename internal::plain_col_type<MatrixType>::type Vector;\n  typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;\n  const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);\n\n  eigen_assert(dec.rows() == dec.cols());\n  const Index n = dec.rows();\n  if (n == 0)\n    return 0;\n\n  // Disable Index to float conversion warning\n#ifdef __INTEL_COMPILER\n  #pragma warning push\n  #pragma warning ( disable : 2259 )\n#endif\n  Vector v = dec.solve(Vector::Ones(n) / Scalar(n));\n#ifdef __INTEL_COMPILER\n  #pragma warning pop\n#endif\n\n  // lower_bound is a lower bound on\n  //   ||inv(matrix)||_1  = sup_v ||inv(matrix) v||_1 / ||v||_1\n  // and is the objective maximized by the (\"super-\") gradient ascent\n  // algorithm below.\n  RealScalar lower_bound = v.template lpNorm<1>();\n  if (n == 1)\n    return lower_bound;\n\n  // Gradient ascent algorithm follows: We know that the optimum is achieved at\n  // one of the simplices v = e_i, so in each iteration we follow a\n  // super-gradient to move towards the optimal one.\n  RealScalar old_lower_bound = lower_bound;\n  Vector sign_vector(n);\n  Vector old_sign_vector;\n  Index v_max_abs_index = -1;\n  Index old_v_max_abs_index = v_max_abs_index;\n  for (int k = 0; k < 4; ++k)\n  {\n    sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);\n    if (k > 0 && !is_complex && sign_vector == old_sign_vector) {\n      // Break if the solution stagnated.\n      break;\n    }\n    // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|\n    v = dec.adjoint().solve(sign_vector);\n    v.real().cwiseAbs().maxCoeff(&v_max_abs_index);\n    if (v_max_abs_index == old_v_max_abs_index) {\n      // Break if the solution stagnated.\n      break;\n    }\n    // Move to the new simplex e_j, where j = v_max_abs_index.\n    v = dec.solve(Vector::Unit(n, v_max_abs_index));  // v = inv(matrix) * e_j.\n    lower_bound = v.template lpNorm<1>();\n    if (lower_bound <= old_lower_bound) {\n      // Break if the gradient step did not increase the lower_bound.\n      break;\n    }\n    if (!is_complex) {\n      old_sign_vector = sign_vector;\n    }\n    old_v_max_abs_index = v_max_abs_index;\n    old_lower_bound = lower_bound;\n  }\n  // The following calculates an independent estimate of ||matrix||_1 by\n  // multiplying matrix by a vector with entries of slowly increasing\n  // magnitude and alternating sign:\n  //   v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.\n  // This improvement to Hager's algorithm above is due to Higham. It was\n  // added to make the algorithm more robust in certain corner cases where\n  // large elements in the matrix might otherwise escape detection due to\n  // exact cancellation (especially when op and op_adjoint correspond to a\n  // sequence of backsubstitutions and permutations), which could cause\n  // Hager's algorithm to vastly underestimate ||matrix||_1.\n  Scalar alternating_sign(RealScalar(1));\n  for (Index i = 0; i < n; ++i) {\n    // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates\n    v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));\n    alternating_sign = -alternating_sign;\n  }\n  v = dec.solve(v);\n  const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));\n  return numext::maxi(lower_bound, alternate_lower_bound);\n}\n\n/** \\brief Reciprocal condition number estimator.\n  *\n  * Computing a decomposition of a dense matrix takes O(n^3) operations, while\n  * this method estimates the condition number quickly and reliably in O(n^2)\n  * operations.\n  *\n  * \\returns an estimate of the reciprocal condition number\n  * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and\n  * its decomposition. Supports the following decompositions: FullPivLU,\n  * PartialPivLU, LDLT, and LLT.\n  *\n  * \\sa FullPivLU, PartialPivLU, LDLT, LLT.\n  */\ntemplate <typename Decomposition>\ntypename Decomposition::RealScalar\nrcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)\n{\n  typedef typename Decomposition::RealScalar RealScalar;\n  eigen_assert(dec.rows() == dec.cols());\n  if (dec.rows() == 0)              return NumTraits<RealScalar>::infinity();\n  if (matrix_norm == RealScalar(0)) return RealScalar(0);\n  if (dec.rows() == 1)              return RealScalar(1);\n  const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);\n  return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)\n                                               : (RealScalar(1) / inverse_matrix_norm) / matrix_norm);\n}\n\n}  // namespace internal\n\n}  // namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CoreEvaluators.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_COREEVALUATORS_H\n#define EIGEN_COREEVALUATORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// This class returns the evaluator kind from the expression storage kind.\n// Default assumes index based accessors\ntemplate<typename StorageKind>\nstruct storage_kind_to_evaluator_kind {\n  typedef IndexBased Kind;\n};\n\n// This class returns the evaluator shape from the expression storage kind.\n// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc.\ntemplate<typename StorageKind> struct storage_kind_to_shape;\n\ntemplate<> struct storage_kind_to_shape<Dense>                  { typedef DenseShape Shape;           };\ntemplate<> struct storage_kind_to_shape<SolverStorage>          { typedef SolverShape Shape;           };\ntemplate<> struct storage_kind_to_shape<PermutationStorage>     { typedef PermutationShape Shape;     };\ntemplate<> struct storage_kind_to_shape<TranspositionsStorage>  { typedef TranspositionsShape Shape;  };\n\n// Evaluators have to be specialized with respect to various criteria such as:\n//  - storage/structure/shape\n//  - scalar type\n//  - etc.\n// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators.\n// We currently distinguish the following kind of evaluators:\n// - unary_evaluator    for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate)\n// - binary_evaluator   for expression taking two arguments (CwiseBinaryOp)\n// - ternary_evaluator   for expression taking three arguments (CwiseTernaryOp)\n// - product_evaluator  for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching.\n// - mapbase_evaluator  for Map, Block, Ref\n// - block_evaluator    for Block (special dispatching to a mapbase_evaluator or unary_evaluator)\n\ntemplate< typename T,\n          typename Arg1Kind   = typename evaluator_traits<typename T::Arg1>::Kind,\n          typename Arg2Kind   = typename evaluator_traits<typename T::Arg2>::Kind,\n          typename Arg3Kind   = typename evaluator_traits<typename T::Arg3>::Kind,\n          typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,\n          typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,\n          typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;\n\ntemplate< typename T,\n          typename LhsKind   = typename evaluator_traits<typename T::Lhs>::Kind,\n          typename RhsKind   = typename evaluator_traits<typename T::Rhs>::Kind,\n          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,\n          typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;\n\ntemplate< typename T,\n          typename Kind   = typename evaluator_traits<typename T::NestedExpression>::Kind,\n          typename Scalar = typename T::Scalar> struct unary_evaluator;\n\n// evaluator_traits<T> contains traits for evaluator<T>\n\ntemplate<typename T>\nstruct evaluator_traits_base\n{\n  // by default, get evaluator kind and shape from storage\n  typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;\n  typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;\n};\n\n// Default evaluator traits\ntemplate<typename T>\nstruct evaluator_traits : public evaluator_traits_base<T>\n{\n};\n\ntemplate<typename T, typename Shape = typename evaluator_traits<T>::Shape >\nstruct evaluator_assume_aliasing {\n  static const bool value = false;\n};\n\n// By default, we assume a unary expression:\ntemplate<typename T>\nstruct evaluator : public unary_evaluator<T>\n{\n  typedef unary_evaluator<T> Base;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const T& xpr) : Base(xpr) {}\n};\n\n\n// TODO: Think about const-correctness\ntemplate<typename T>\nstruct evaluator<const T>\n  : evaluator<T>\n{\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}\n};\n\n// ---------- base class for all evaluators ----------\n\ntemplate<typename ExpressionType>\nstruct evaluator_base\n{\n  // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.\n  typedef traits<ExpressionType> ExpressionTraits;\n\n  enum {\n    Alignment = 0\n  };\n  // noncopyable:\n  // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization)\n  // and make complex evaluator much larger than then should do.\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {}\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {}\nprivate:\n  EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&);\n  EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&);\n};\n\n// -------------------- Matrix and Array --------------------\n//\n// evaluator<PlainObjectBase> is a common base class for the\n// Matrix and Array evaluators.\n// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense,\n// so no need for more sophisticated dispatching.\n\n// this helper permits to completely eliminate m_outerStride if it is known at compiletime.\ntemplate<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data {\npublic:\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr)\n  {\n#ifndef EIGEN_INTERNAL_DEBUGGING\n    EIGEN_UNUSED_VARIABLE(outerStride);\n#endif\n    eigen_internal_assert(outerStride==OuterStride);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n  Index outerStride() const EIGEN_NOEXCEPT { return OuterStride; }\n  const Scalar *data;\n};\n\ntemplate<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> {\npublic:\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {}\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Index outerStride() const { return m_outerStride; }\n  const Scalar *data;\nprotected:\n  Index m_outerStride;\n};\n\ntemplate<typename Derived>\nstruct evaluator<PlainObjectBase<Derived> >\n  : evaluator_base<Derived>\n{\n  typedef PlainObjectBase<Derived> PlainObjectType;\n  typedef typename PlainObjectType::Scalar Scalar;\n  typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;\n\n  enum {\n    IsRowMajor = PlainObjectType::IsRowMajor,\n    IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,\n    RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,\n    ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,\n\n    CoeffReadCost = NumTraits<Scalar>::ReadCost,\n    Flags = traits<Derived>::EvaluatorFlags,\n    Alignment = traits<Derived>::Alignment\n  };\n  enum {\n    // We do not need to know the outer stride for vectors\n    OuterStrideAtCompileTime = IsVectorAtCompileTime  ? 0\n                                                      : int(IsRowMajor) ? ColsAtCompileTime\n                                                                        : RowsAtCompileTime\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  evaluator()\n    : m_d(0,OuterStrideAtCompileTime)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const PlainObjectType& m)\n    : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride())\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    if (IsRowMajor)\n      return m_d.data[row * m_d.outerStride() + col];\n    else\n      return m_d.data[row + col * m_d.outerStride()];\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_d.data[index];\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    if (IsRowMajor)\n      return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col];\n    else\n      return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()];\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return const_cast<Scalar*>(m_d.data)[index];\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    if (IsRowMajor)\n      return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col);\n    else\n      return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride());\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return ploadt<PacketType, LoadMode>(m_d.data + index);\n  }\n\n  template<int StoreMode,typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index row, Index col, const PacketType& x)\n  {\n    if (IsRowMajor)\n      return pstoret<Scalar, PacketType, StoreMode>\n\t            (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x);\n    else\n      return pstoret<Scalar, PacketType, StoreMode>\n                    (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index index, const PacketType& x)\n  {\n    return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x);\n  }\n\nprotected:\n\n  plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d;\n};\n\ntemplate<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>\nstruct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >\n  : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >\n{\n  typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  evaluator() {}\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& m)\n    : evaluator<PlainObjectBase<XprType> >(m)\n  { }\n};\n\ntemplate<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>\nstruct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >\n  : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >\n{\n  typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  evaluator() {}\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& m)\n    : evaluator<PlainObjectBase<XprType> >(m)\n  { }\n};\n\n// -------------------- Transpose --------------------\n\ntemplate<typename ArgType>\nstruct unary_evaluator<Transpose<ArgType>, IndexBased>\n  : evaluator_base<Transpose<ArgType> >\n{\n  typedef Transpose<ArgType> XprType;\n\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n    Flags = evaluator<ArgType>::Flags ^ RowMajorBit,\n    Alignment = evaluator<ArgType>::Alignment\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_argImpl.coeff(col, row);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_argImpl.coeff(index);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_argImpl.coeffRef(col, row);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  typename XprType::Scalar& coeffRef(Index index)\n  {\n    return m_argImpl.coeffRef(index);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    return m_argImpl.template packet<LoadMode,PacketType>(col, row);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return m_argImpl.template packet<LoadMode,PacketType>(index);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index row, Index col, const PacketType& x)\n  {\n    m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index index, const PacketType& x)\n  {\n    m_argImpl.template writePacket<StoreMode,PacketType>(index, x);\n  }\n\nprotected:\n  evaluator<ArgType> m_argImpl;\n};\n\n// -------------------- CwiseNullaryOp --------------------\n// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.\n// Likewise, there is not need to more sophisticated dispatching here.\n\ntemplate<typename Scalar,typename NullaryOp,\n         bool has_nullary = has_nullary_operator<NullaryOp>::value,\n         bool has_unary   = has_unary_operator<NullaryOp>::value,\n         bool has_binary  = has_binary_operator<NullaryOp>::value>\nstruct nullary_wrapper\n{\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }\n\n  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }\n  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }\n};\n\ntemplate<typename Scalar,typename NullaryOp>\nstruct nullary_wrapper<Scalar,NullaryOp,true,false,false>\n{\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }\n  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }\n};\n\ntemplate<typename Scalar,typename NullaryOp>\nstruct nullary_wrapper<Scalar,NullaryOp,false,false,true>\n{\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }\n  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }\n};\n\n// We need the following specialization for vector-only functors assigned to a runtime vector,\n// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd.\n// In this case, i==0 and j is used for the actual iteration.\ntemplate<typename Scalar,typename NullaryOp>\nstruct nullary_wrapper<Scalar,NullaryOp,false,true,false>\n{\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {\n    eigen_assert(i==0 || j==0);\n    return op(i+j);\n  }\n  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {\n    eigen_assert(i==0 || j==0);\n    return op.template packetOp<T>(i+j);\n  }\n\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }\n  template <typename T, typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }\n};\n\ntemplate<typename Scalar,typename NullaryOp>\nstruct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};\n\n#if 0 && EIGEN_COMP_MSVC>0\n// Disable this ugly workaround. This is now handled in traits<Ref>::match,\n// but this piece of code might still become handly if some other weird compilation\n// erros pop up again.\n\n// MSVC exhibits a weird compilation error when\n// compiling:\n//    Eigen::MatrixXf A = MatrixXf::Random(3,3);\n//    Ref<const MatrixXf> R = 2.f*A;\n// and that has_*ary_operator<scalar_constant_op<float>> have not been instantiated yet.\n// The \"problem\" is that evaluator<2.f*A> is instantiated by traits<Ref>::match<2.f*A>\n// and at that time has_*ary_operator<T> returns true regardless of T.\n// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>.\n// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(),\n// and packet() are really instantiated as implemented below:\n\n// This is a simple wrapper around Index to enforce the re-instantiation of\n// has_*ary_operator when needed.\ntemplate<typename T> struct nullary_wrapper_workaround_msvc {\n  nullary_wrapper_workaround_msvc(const T&);\n  operator T()const;\n};\n\ntemplate<typename Scalar,typename NullaryOp>\nstruct nullary_wrapper<Scalar,NullaryOp,true,true,true>\n{\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {\n    return nullary_wrapper<Scalar,NullaryOp,\n    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);\n  }\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {\n    return nullary_wrapper<Scalar,NullaryOp,\n    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);\n  }\n\n  template <typename T, typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {\n    return nullary_wrapper<Scalar,NullaryOp,\n    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);\n  }\n  template <typename T, typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {\n    return nullary_wrapper<Scalar,NullaryOp,\n    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,\n    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);\n  }\n};\n#endif // MSVC workaround\n\ntemplate<typename NullaryOp, typename PlainObjectType>\nstruct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >\n  : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >\n{\n  typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;\n  typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;\n\n  enum {\n    CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,\n\n    Flags = (evaluator<PlainObjectTypeCleaned>::Flags\n          &  (  HereditaryBits\n              | (functor_has_linear_access<NullaryOp>::ret  ? LinearAccessBit : 0)\n              | (functor_traits<NullaryOp>::PacketAccess    ? PacketAccessBit : 0)))\n          | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),\n    Alignment = AlignedMax\n  };\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)\n    : m_functor(n.functor()), m_wrapper()\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(IndexType row, IndexType col) const\n  {\n    return m_wrapper(m_functor, row, col);\n  }\n\n  template <typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(IndexType index) const\n  {\n    return m_wrapper(m_functor,index);\n  }\n\n  template<int LoadMode, typename PacketType, typename IndexType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(IndexType row, IndexType col) const\n  {\n    return m_wrapper.template packetOp<PacketType>(m_functor, row, col);\n  }\n\n  template<int LoadMode, typename PacketType, typename IndexType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(IndexType index) const\n  {\n    return m_wrapper.template packetOp<PacketType>(m_functor, index);\n  }\n\nprotected:\n  const NullaryOp m_functor;\n  const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;\n};\n\n// -------------------- CwiseUnaryOp --------------------\n\ntemplate<typename UnaryOp, typename ArgType>\nstruct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >\n  : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >\n{\n  typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;\n\n  enum {\n    CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),\n\n    Flags = evaluator<ArgType>::Flags\n          & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),\n    Alignment = evaluator<ArgType>::Alignment\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& op) : m_d(op)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_d.func()(m_d.argImpl.coeff(row, col));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_d.func()(m_d.argImpl.coeff(index));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index));\n  }\n\nprotected:\n\n  // this helper permits to completely eliminate the functor if it is empty\n  struct Data\n  {\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const UnaryOp& func() const { return op; }\n    UnaryOp op;\n    evaluator<ArgType> argImpl;\n  };\n\n  Data m_d;\n};\n\n// -------------------- CwiseTernaryOp --------------------\n\n// this is a ternary expression\ntemplate<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>\nstruct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >\n  : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >\n{\n  typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;\n  typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}\n};\n\ntemplate<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>\nstruct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>\n  : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >\n{\n  typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;\n\n  enum {\n    CoeffReadCost = int(evaluator<Arg1>::CoeffReadCost) + int(evaluator<Arg2>::CoeffReadCost) + int(evaluator<Arg3>::CoeffReadCost) + int(functor_traits<TernaryOp>::Cost),\n\n    Arg1Flags = evaluator<Arg1>::Flags,\n    Arg2Flags = evaluator<Arg2>::Flags,\n    Arg3Flags = evaluator<Arg3>::Flags,\n    SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,\n    StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),\n    Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (\n        HereditaryBits\n        | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &\n           ( (StorageOrdersAgree ? LinearAccessBit : 0)\n           | (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)\n           )\n        )\n     ),\n    Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),\n    Alignment = EIGEN_PLAIN_ENUM_MIN(\n        EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),\n        evaluator<Arg3>::Alignment)\n  };\n\n  EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col),\n                               m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col),\n                               m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index),\n                               m_d.arg2Impl.template packet<LoadMode,PacketType>(index),\n                               m_d.arg3Impl.template packet<LoadMode,PacketType>(index));\n  }\n\nprotected:\n  // this helper permits to completely eliminate the functor if it is empty\n  struct Data\n  {\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Data(const XprType& xpr) : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const TernaryOp& func() const { return op; }\n    TernaryOp op;\n    evaluator<Arg1> arg1Impl;\n    evaluator<Arg2> arg2Impl;\n    evaluator<Arg3> arg3Impl;\n  };\n\n  Data m_d;\n};\n\n// -------------------- CwiseBinaryOp --------------------\n\n// this is a binary expression\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nstruct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n  : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;\n  typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& xpr) : Base(xpr) {}\n};\n\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>\n  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;\n\n  enum {\n    CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n\n    LhsFlags = evaluator<Lhs>::Flags,\n    RhsFlags = evaluator<Rhs>::Flags,\n    SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,\n    StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),\n    Flags0 = (int(LhsFlags) | int(RhsFlags)) & (\n        HereditaryBits\n      | (int(LhsFlags) & int(RhsFlags) &\n           ( (StorageOrdersAgree ? LinearAccessBit : 0)\n           | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)\n           )\n        )\n     ),\n    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),\n    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit binary_evaluator(const XprType& xpr) : m_d(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col),\n                               m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index),\n                               m_d.rhsImpl.template packet<LoadMode,PacketType>(index));\n  }\n\nprotected:\n\n  // this helper permits to completely eliminate the functor if it is empty\n  struct Data\n  {\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const BinaryOp& func() const { return op; }\n    BinaryOp op;\n    evaluator<Lhs> lhsImpl;\n    evaluator<Rhs> rhsImpl;\n  };\n\n  Data m_d;\n};\n\n// -------------------- CwiseUnaryView --------------------\n\ntemplate<typename UnaryOp, typename ArgType>\nstruct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>\n  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >\n{\n  typedef CwiseUnaryView<UnaryOp, ArgType> XprType;\n\n  enum {\n    CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),\n\n    Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),\n\n    Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...\n  };\n\n  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_d.func()(m_d.argImpl.coeff(row, col));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_d.func()(m_d.argImpl.coeff(index));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_d.func()(m_d.argImpl.coeffRef(row, col));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return m_d.func()(m_d.argImpl.coeffRef(index));\n  }\n\nprotected:\n\n  // this helper permits to completely eliminate the functor if it is empty\n  struct Data\n  {\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const UnaryOp& func() const { return op; }\n    UnaryOp op;\n    evaluator<ArgType> argImpl;\n  };\n\n  Data m_d;\n};\n\n// -------------------- Map --------------------\n\n// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?\n// but that might complicate template specialization\ntemplate<typename Derived, typename PlainObjectType>\nstruct mapbase_evaluator;\n\ntemplate<typename Derived, typename PlainObjectType>\nstruct mapbase_evaluator : evaluator_base<Derived>\n{\n  typedef Derived  XprType;\n  typedef typename XprType::PointerType PointerType;\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  enum {\n    IsRowMajor = XprType::RowsAtCompileTime,\n    ColsAtCompileTime = XprType::ColsAtCompileTime,\n    CoeffReadCost = NumTraits<Scalar>::ReadCost\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit mapbase_evaluator(const XprType& map)\n    : m_data(const_cast<PointerType>(map.data())),\n      m_innerStride(map.innerStride()),\n      m_outerStride(map.outerStride())\n  {\n    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),\n                        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_data[col * colStride() + row * rowStride()];\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_data[index * m_innerStride.value()];\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_data[col * colStride() + row * rowStride()];\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return m_data[index * m_innerStride.value()];\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    PointerType ptr = m_data + row * rowStride() + col * colStride();\n    return internal::ploadt<PacketType, LoadMode>(ptr);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index row, Index col, const PacketType& x)\n  {\n    PointerType ptr = m_data + row * rowStride() + col * colStride();\n    return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index index, const PacketType& x)\n  {\n    internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);\n  }\nprotected:\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n  Index rowStride() const EIGEN_NOEXCEPT {\n    return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value();\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n  Index colStride() const EIGEN_NOEXCEPT {\n     return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value();\n  }\n\n  PointerType m_data;\n  const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;\n  const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;\n};\n\ntemplate<typename PlainObjectType, int MapOptions, typename StrideType>\nstruct evaluator<Map<PlainObjectType, MapOptions, StrideType> >\n  : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>\n{\n  typedef Map<PlainObjectType, MapOptions, StrideType> XprType;\n  typedef typename XprType::Scalar Scalar;\n  // TODO: should check for smaller packet types once we can handle multi-sized packet types\n  typedef typename packet_traits<Scalar>::type PacketScalar;\n\n  enum {\n    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0\n                             ? int(PlainObjectType::InnerStrideAtCompileTime)\n                             : int(StrideType::InnerStrideAtCompileTime),\n    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0\n                             ? int(PlainObjectType::OuterStrideAtCompileTime)\n                             : int(StrideType::OuterStrideAtCompileTime),\n    HasNoInnerStride = InnerStrideAtCompileTime == 1,\n    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,\n    HasNoStride = HasNoInnerStride && HasNoOuterStride,\n    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,\n\n    PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),\n    LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),\n    Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),\n\n    Alignment = int(MapOptions)&int(AlignedMask)\n  };\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)\n    : mapbase_evaluator<XprType, PlainObjectType>(map)\n  { }\n};\n\n// -------------------- Ref --------------------\n\ntemplate<typename PlainObjectType, int RefOptions, typename StrideType>\nstruct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >\n  : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>\n{\n  typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;\n\n  enum {\n    Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,\n    Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& ref)\n    : mapbase_evaluator<XprType, PlainObjectType>(ref)\n  { }\n};\n\n// -------------------- Block --------------------\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,\n         bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nstruct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >\n  : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>\n{\n  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;\n  typedef typename XprType::Scalar Scalar;\n  // TODO: should check for smaller packet types once we can handle multi-sized packet types\n  typedef typename packet_traits<Scalar>::type PacketScalar;\n\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n\n    RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,\n    ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,\n    MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,\n\n    ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,\n    IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1\n               : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0\n               : ArgTypeIsRowMajor,\n    HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),\n    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),\n    InnerStrideAtCompileTime = HasSameStorageOrderAsArgType\n                             ? int(inner_stride_at_compile_time<ArgType>::ret)\n                             : int(outer_stride_at_compile_time<ArgType>::ret),\n    OuterStrideAtCompileTime = HasSameStorageOrderAsArgType\n                             ? int(outer_stride_at_compile_time<ArgType>::ret)\n                             : int(inner_stride_at_compile_time<ArgType>::ret),\n    MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,\n\n    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,\n    FlagsRowMajorBit = XprType::Flags&RowMajorBit,\n    Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |\n                                           DirectAccessBit |\n                                           MaskPacketAccessBit),\n    Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,\n\n    PacketAlignment = unpacket_traits<PacketScalar>::alignment,\n    Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)\n                             && (OuterStrideAtCompileTime!=0)\n                             && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,\n    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)\n  };\n  typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& block) : block_evaluator_type(block)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n};\n\n// no direct-access => dispatch to a unary evaluator\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nstruct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>\n  : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >\n{\n  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit block_evaluator(const XprType& block)\n    : unary_evaluator<XprType>(block)\n  {}\n};\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nstruct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>\n  : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >\n{\n  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& block)\n    : m_argImpl(block.nestedExpression()),\n      m_startRow(block.startRow()),\n      m_startCol(block.startCol()),\n      m_linear_offset(ForwardLinearAccess?(ArgType::IsRowMajor ? block.startRow()*block.nestedExpression().cols() + block.startCol() : block.startCol()*block.nestedExpression().rows() + block.startRow()):0)\n  { }\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  enum {\n    RowsAtCompileTime = XprType::RowsAtCompileTime,\n    ForwardLinearAccess = (InnerPanel || int(XprType::IsRowMajor)==int(ArgType::IsRowMajor)) && bool(evaluator<ArgType>::Flags&LinearAccessBit)\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return linear_coeff_impl(index, bool_constant<ForwardLinearAccess>());\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return linear_coeffRef_impl(index, bool_constant<ForwardLinearAccess>());\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    if (ForwardLinearAccess)\n      return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);\n    else\n      return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,\n                                         RowsAtCompileTime == 1 ? index : 0);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index row, Index col, const PacketType& x)\n  {\n    return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index index, const PacketType& x)\n  {\n    if (ForwardLinearAccess)\n      return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);\n    else\n      return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,\n                                              RowsAtCompileTime == 1 ? index : 0,\n                                              x);\n  }\n\nprotected:\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const\n  {\n    return m_argImpl.coeff(m_linear_offset.value() + index);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const\n  {\n    return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& linear_coeffRef_impl(Index index, internal::true_type /* ForwardLinearAccess */)\n  {\n    return m_argImpl.coeffRef(m_linear_offset.value() + index);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& linear_coeffRef_impl(Index index, internal::false_type /* not ForwardLinearAccess */)\n  {\n    return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);\n  }\n\n  evaluator<ArgType> m_argImpl;\n  const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;\n  const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;\n  const variable_if_dynamic<Index, ForwardLinearAccess ? Dynamic : 0> m_linear_offset;\n};\n\n// TODO: This evaluator does not actually use the child evaluator;\n// all action is via the data() as returned by the Block expression.\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nstruct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>\n  : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,\n                      typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>\n{\n  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;\n  typedef typename XprType::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit block_evaluator(const XprType& block)\n    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)\n  {\n    // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime\n    eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && \"data is not aligned\");\n  }\n};\n\n\n// -------------------- Select --------------------\n// NOTE shall we introduce a ternary_evaluator?\n\n// TODO enable vectorization for Select\ntemplate<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>\nstruct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >\n  : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >\n{\n  typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;\n  enum {\n    CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost\n                  + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,\n                                         evaluator<ElseMatrixType>::CoeffReadCost),\n\n    Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,\n\n    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& select)\n    : m_conditionImpl(select.conditionMatrix()),\n      m_thenImpl(select.thenMatrix()),\n      m_elseImpl(select.elseMatrix())\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    if (m_conditionImpl.coeff(row, col))\n      return m_thenImpl.coeff(row, col);\n    else\n      return m_elseImpl.coeff(row, col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    if (m_conditionImpl.coeff(index))\n      return m_thenImpl.coeff(index);\n    else\n      return m_elseImpl.coeff(index);\n  }\n\nprotected:\n  evaluator<ConditionMatrixType> m_conditionImpl;\n  evaluator<ThenMatrixType> m_thenImpl;\n  evaluator<ElseMatrixType> m_elseImpl;\n};\n\n\n// -------------------- Replicate --------------------\n\ntemplate<typename ArgType, int RowFactor, int ColFactor>\nstruct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >\n  : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >\n{\n  typedef Replicate<ArgType, RowFactor, ColFactor> XprType;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n  enum {\n    Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor\n  };\n  typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;\n  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;\n\n  enum {\n    CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,\n    LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,\n    Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),\n\n    Alignment = evaluator<ArgTypeNestedCleaned>::Alignment\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& replicate)\n    : m_arg(replicate.nestedExpression()),\n      m_argImpl(m_arg),\n      m_rows(replicate.nestedExpression().rows()),\n      m_cols(replicate.nestedExpression().cols())\n  {}\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    // try to avoid using modulo; this is a pure optimization strategy\n    const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0\n                           : RowFactor==1 ? row\n                           : row % m_rows.value();\n    const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0\n                           : ColFactor==1 ? col\n                           : col % m_cols.value();\n\n    return m_argImpl.coeff(actual_row, actual_col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    // try to avoid using modulo; this is a pure optimization strategy\n    const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1\n                                  ? (ColFactor==1 ?  index : index%m_cols.value())\n                                  : (RowFactor==1 ?  index : index%m_rows.value());\n\n    return m_argImpl.coeff(actual_index);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0\n                           : RowFactor==1 ? row\n                           : row % m_rows.value();\n    const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0\n                           : ColFactor==1 ? col\n                           : col % m_cols.value();\n\n    return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1\n                                  ? (ColFactor==1 ?  index : index%m_cols.value())\n                                  : (RowFactor==1 ?  index : index%m_rows.value());\n\n    return m_argImpl.template packet<LoadMode,PacketType>(actual_index);\n  }\n\nprotected:\n  const ArgTypeNested m_arg;\n  evaluator<ArgTypeNestedCleaned> m_argImpl;\n  const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;\n  const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;\n};\n\n// -------------------- MatrixWrapper and ArrayWrapper --------------------\n//\n// evaluator_wrapper_base<T> is a common base class for the\n// MatrixWrapper and ArrayWrapper evaluators.\n\ntemplate<typename XprType>\nstruct evaluator_wrapper_base\n  : evaluator_base<XprType>\n{\n  typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n    Flags = evaluator<ArgType>::Flags,\n    Alignment = evaluator<ArgType>::Alignment\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}\n\n  typedef typename ArgType::Scalar Scalar;\n  typedef typename ArgType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_argImpl.coeff(row, col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_argImpl.coeff(index);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_argImpl.coeffRef(row, col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return m_argImpl.coeffRef(index);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    return m_argImpl.template packet<LoadMode,PacketType>(row, col);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    return m_argImpl.template packet<LoadMode,PacketType>(index);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index row, Index col, const PacketType& x)\n  {\n    m_argImpl.template writePacket<StoreMode>(row, col, x);\n  }\n\n  template<int StoreMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index index, const PacketType& x)\n  {\n    m_argImpl.template writePacket<StoreMode>(index, x);\n  }\n\nprotected:\n  evaluator<ArgType> m_argImpl;\n};\n\ntemplate<typename TArgType>\nstruct unary_evaluator<MatrixWrapper<TArgType> >\n  : evaluator_wrapper_base<MatrixWrapper<TArgType> >\n{\n  typedef MatrixWrapper<TArgType> XprType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& wrapper)\n    : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())\n  { }\n};\n\ntemplate<typename TArgType>\nstruct unary_evaluator<ArrayWrapper<TArgType> >\n  : evaluator_wrapper_base<ArrayWrapper<TArgType> >\n{\n  typedef ArrayWrapper<TArgType> XprType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& wrapper)\n    : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())\n  { }\n};\n\n\n// -------------------- Reverse --------------------\n\n// defined in Reverse.h:\ntemplate<typename PacketType, bool ReversePacket> struct reverse_packet_cond;\n\ntemplate<typename ArgType, int Direction>\nstruct unary_evaluator<Reverse<ArgType, Direction> >\n  : evaluator_base<Reverse<ArgType, Direction> >\n{\n  typedef Reverse<ArgType, Direction> XprType;\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  enum {\n    IsRowMajor = XprType::IsRowMajor,\n    IsColMajor = !IsRowMajor,\n    ReverseRow = (Direction == Vertical)   || (Direction == BothDirections),\n    ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),\n    ReversePacket = (Direction == BothDirections)\n                    || ((Direction == Vertical)   && IsColMajor)\n                    || ((Direction == Horizontal) && IsRowMajor),\n\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n\n    // let's enable LinearAccess only with vectorization because of the product overhead\n    // FIXME enable DirectAccess with negative strides?\n    Flags0 = evaluator<ArgType>::Flags,\n    LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )\n                  || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))\n                 ? LinearAccessBit : 0,\n\n    Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),\n\n    Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit unary_evaluator(const XprType& reverse)\n    : m_argImpl(reverse.nestedExpression()),\n      m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),\n      m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)\n  { }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,\n                           ReverseCol ? m_cols.value() - col - 1 : col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,\n                              ReverseCol ? m_cols.value() - col - 1 : col);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index row, Index col) const\n  {\n    enum {\n      PacketSize = unpacket_traits<PacketType>::size,\n      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,\n      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1\n    };\n    typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;\n    return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(\n                                  ReverseRow ? m_rows.value() - row - OffsetRow : row,\n                                  ReverseCol ? m_cols.value() - col - OffsetCol : col));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  PacketType packet(Index index) const\n  {\n    enum { PacketSize = unpacket_traits<PacketType>::size };\n    return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index row, Index col, const PacketType& x)\n  {\n    // FIXME we could factorize some code with packet(i,j)\n    enum {\n      PacketSize = unpacket_traits<PacketType>::size,\n      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,\n      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1\n    };\n    typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;\n    m_argImpl.template writePacket<LoadMode>(\n                                  ReverseRow ? m_rows.value() - row - OffsetRow : row,\n                                  ReverseCol ? m_cols.value() - col - OffsetCol : col,\n                                  reverse_packet::run(x));\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE\n  void writePacket(Index index, const PacketType& x)\n  {\n    enum { PacketSize = unpacket_traits<PacketType>::size };\n    m_argImpl.template writePacket<LoadMode>\n      (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));\n  }\n\nprotected:\n  evaluator<ArgType> m_argImpl;\n\n  // If we do not reverse rows, then we do not need to know the number of rows; same for columns\n  // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors.\n  const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 1> m_rows;\n  const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 1> m_cols;\n};\n\n\n// -------------------- Diagonal --------------------\n\ntemplate<typename ArgType, int DiagIndex>\nstruct evaluator<Diagonal<ArgType, DiagIndex> >\n  : evaluator_base<Diagonal<ArgType, DiagIndex> >\n{\n  typedef Diagonal<ArgType, DiagIndex> XprType;\n\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n\n    Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,\n\n    Alignment = 0\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit evaluator(const XprType& diagonal)\n    : m_argImpl(diagonal.nestedExpression()),\n      m_index(diagonal.index())\n  { }\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index) const\n  {\n    return m_argImpl.coeff(row + rowOffset(), row + colOffset());\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index index) const\n  {\n    return m_argImpl.coeff(index + rowOffset(), index + colOffset());\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index)\n  {\n    return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());\n  }\n\nprotected:\n  evaluator<ArgType> m_argImpl;\n  const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;\n\nprivate:\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n  Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n  Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }\n};\n\n\n//----------------------------------------------------------------------\n// deprecated code\n//----------------------------------------------------------------------\n\n// -------------------- EvalToTemp --------------------\n\n// expression class for evaluating nested expression to a temporary\n\ntemplate<typename ArgType> class EvalToTemp;\n\ntemplate<typename ArgType>\nstruct traits<EvalToTemp<ArgType> >\n  : public traits<ArgType>\n{ };\n\ntemplate<typename ArgType>\nclass EvalToTemp\n  : public dense_xpr_base<EvalToTemp<ArgType> >::type\n{\n public:\n\n  typedef typename dense_xpr_base<EvalToTemp>::type Base;\n  EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)\n\n  explicit EvalToTemp(const ArgType& arg)\n    : m_arg(arg)\n  { }\n\n  const ArgType& arg() const\n  {\n    return m_arg;\n  }\n\n  EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT\n  {\n    return m_arg.rows();\n  }\n\n  EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT\n  {\n    return m_arg.cols();\n  }\n\n private:\n  const ArgType& m_arg;\n};\n\ntemplate<typename ArgType>\nstruct evaluator<EvalToTemp<ArgType> >\n  : public evaluator<typename ArgType::PlainObject>\n{\n  typedef EvalToTemp<ArgType>                   XprType;\n  typedef typename ArgType::PlainObject         PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)\n    : m_result(xpr.arg())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n  }\n\n  // This constructor is used when nesting an EvalTo evaluator in another evaluator\n  EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)\n    : m_result(arg)\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n} // namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COREEVALUATORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CoreIterators.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COREITERATORS_H\n#define EIGEN_COREITERATORS_H\n\nnamespace Eigen { \n\n/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core\n */\n\nnamespace internal {\n\ntemplate<typename XprType, typename EvaluatorKind>\nclass inner_iterator_selector;\n\n}\n\n/** \\class InnerIterator\n  * \\brief An InnerIterator allows to loop over the element of any matrix expression.\n  * \n  * \\warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.\n  * \n  * TODO: add a usage example\n  */\ntemplate<typename XprType>\nclass InnerIterator\n{\nprotected:\n  typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;\n  typedef internal::evaluator<XprType> EvaluatorType;\n  typedef typename internal::traits<XprType>::Scalar Scalar;\npublic:\n  /** Construct an iterator over the \\a outerId -th row or column of \\a xpr */\n  InnerIterator(const XprType &xpr, const Index &outerId)\n    : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize())\n  {}\n  \n  /// \\returns the value of the current coefficient.\n  EIGEN_STRONG_INLINE Scalar value() const          { return m_iter.value(); }\n  /** Increment the iterator \\c *this to the next non-zero coefficient.\n    * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView\n    */\n  EIGEN_STRONG_INLINE InnerIterator& operator++()   { m_iter.operator++(); return *this; }\n  EIGEN_STRONG_INLINE InnerIterator& operator+=(Index i) { m_iter.operator+=(i); return *this; }\n  EIGEN_STRONG_INLINE InnerIterator operator+(Index i) \n  { InnerIterator result(*this); result+=i; return result; }\n    \n\n  /// \\returns the column or row index of the current coefficient.\n  EIGEN_STRONG_INLINE Index index() const           { return m_iter.index(); }\n  /// \\returns the row index of the current coefficient.\n  EIGEN_STRONG_INLINE Index row() const             { return m_iter.row(); }\n  /// \\returns the column index of the current coefficient.\n  EIGEN_STRONG_INLINE Index col() const             { return m_iter.col(); }\n  /// \\returns \\c true if the iterator \\c *this still references a valid coefficient.\n  EIGEN_STRONG_INLINE operator bool() const         { return m_iter; }\n  \nprotected:\n  EvaluatorType m_eval;\n  IteratorType m_iter;\nprivate:\n  // If you get here, then you're not using the right InnerIterator type, e.g.:\n  //   SparseMatrix<double,RowMajor> A;\n  //   SparseMatrix<double>::InnerIterator it(A,0);\n  template<typename T> InnerIterator(const EigenBase<T>&,Index outer);\n};\n\nnamespace internal {\n\n// Generic inner iterator implementation for dense objects\ntemplate<typename XprType>\nclass inner_iterator_selector<XprType, IndexBased>\n{\nprotected:\n  typedef evaluator<XprType> EvaluatorType;\n  typedef typename traits<XprType>::Scalar Scalar;\n  enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };\n  \npublic:\n  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize)\n    : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize)\n  {}\n\n  EIGEN_STRONG_INLINE Scalar value() const\n  {\n    return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner)\n                        : m_eval.coeff(m_inner, m_outer);\n  }\n\n  EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; }\n\n  EIGEN_STRONG_INLINE Index index() const { return m_inner; }\n  inline Index row() const { return IsRowMajor ? m_outer : index(); }\n  inline Index col() const { return IsRowMajor ? index() : m_outer; }\n\n  EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }\n\nprotected:\n  const EvaluatorType& m_eval;\n  Index m_inner;\n  const Index m_outer;\n  const Index m_end;\n};\n\n// For iterator-based evaluator, inner-iterator is already implemented as\n// evaluator<>::InnerIterator\ntemplate<typename XprType>\nclass inner_iterator_selector<XprType, IteratorBased>\n : public evaluator<XprType>::InnerIterator\n{\nprotected:\n  typedef typename evaluator<XprType>::InnerIterator Base;\n  typedef evaluator<XprType> EvaluatorType;\n  \npublic:\n  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)\n    : Base(eval, outerId)\n  {}  \n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COREITERATORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CwiseBinaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CWISE_BINARY_OP_H\n#define EIGEN_CWISE_BINARY_OP_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nstruct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\n  // we must not inherit from traits<Lhs> since it has\n  // the potential to cause problems with MSVC\n  typedef typename remove_all<Lhs>::type Ancestor;\n  typedef typename traits<Ancestor>::XprKind XprKind;\n  enum {\n    RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,\n    ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,\n    MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime\n  };\n\n  // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),\n  // we still want to handle the case when the result type is different.\n  typedef typename result_of<\n                     BinaryOp(\n                       const typename Lhs::Scalar&,\n                       const typename Rhs::Scalar&\n                     )\n                   >::type Scalar;\n  typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,\n                                              typename traits<Rhs>::StorageKind,\n                                              BinaryOp>::ret StorageKind;\n  typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,\n                                      typename traits<Rhs>::StorageIndex>::type StorageIndex;\n  typedef typename Lhs::Nested LhsNested;\n  typedef typename Rhs::Nested RhsNested;\n  typedef typename remove_reference<LhsNested>::type _LhsNested;\n  typedef typename remove_reference<RhsNested>::type _RhsNested;\n  enum {\n    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value\n  };\n};\n} // end namespace internal\n\ntemplate<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>\nclass CwiseBinaryOpImpl;\n\n/** \\class CwiseBinaryOp\n  * \\ingroup Core_Module\n  *\n  * \\brief Generic expression where a coefficient-wise binary operator is applied to two expressions\n  *\n  * \\tparam BinaryOp template functor implementing the operator\n  * \\tparam LhsType the type of the left-hand side\n  * \\tparam RhsType the type of the right-hand side\n  *\n  * This class represents an expression  where a coefficient-wise binary operator is applied to two expressions.\n  * It is the return type of binary operators, by which we mean only those binary operators where\n  * both the left-hand side and the right-hand side are Eigen expressions.\n  * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.\n  *\n  * Most of the time, this is the only way that it is used, so you typically don't have to name\n  * CwiseBinaryOp types explicitly.\n  *\n  * \\sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp\n  */\ntemplate<typename BinaryOp, typename LhsType, typename RhsType>\nclass CwiseBinaryOp :\n  public CwiseBinaryOpImpl<\n          BinaryOp, LhsType, RhsType,\n          typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,\n                                                        typename internal::traits<RhsType>::StorageKind,\n                                                        BinaryOp>::ret>,\n  internal::no_assignment_operator\n{\n  public:\n\n    typedef typename internal::remove_all<BinaryOp>::type Functor;\n    typedef typename internal::remove_all<LhsType>::type Lhs;\n    typedef typename internal::remove_all<RhsType>::type Rhs;\n\n    typedef typename CwiseBinaryOpImpl<\n        BinaryOp, LhsType, RhsType,\n        typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,\n                                                      typename internal::traits<Rhs>::StorageKind,\n                                                      BinaryOp>::ret>::Base Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)\n\n    typedef typename internal::ref_selector<LhsType>::type LhsNested;\n    typedef typename internal::ref_selector<RhsType>::type RhsNested;\n    typedef typename internal::remove_reference<LhsNested>::type _LhsNested;\n    typedef typename internal::remove_reference<RhsNested>::type _RhsNested;\n\n#if EIGEN_COMP_MSVC && EIGEN_HAS_CXX11\n    //Required for Visual Studio or the Copy constructor will probably not get inlined!\n    EIGEN_STRONG_INLINE\n    CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;\n#endif\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())\n      : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)\n    {\n      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);\n      // require the sizes to match\n      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)\n      eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT {\n      // return the fixed size type if available to enable compile time optimizations\n      return internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic ? m_rhs.rows() : m_lhs.rows();\n    }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT {\n      // return the fixed size type if available to enable compile time optimizations\n      return internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic ? m_rhs.cols() : m_lhs.cols();\n    }\n\n    /** \\returns the left hand side nested expression */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const _LhsNested& lhs() const { return m_lhs; }\n    /** \\returns the right hand side nested expression */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const _RhsNested& rhs() const { return m_rhs; }\n    /** \\returns the functor representing the binary operation */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const BinaryOp& functor() const { return m_functor; }\n\n  protected:\n    LhsNested m_lhs;\n    RhsNested m_rhs;\n    const BinaryOp m_functor;\n};\n\n// Generic API dispatcher\ntemplate<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>\nclass CwiseBinaryOpImpl\n  : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type\n{\npublic:\n  typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;\n};\n\n/** replaces \\c *this by \\c *this - \\a other.\n  *\n  * \\returns a reference to \\c *this\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &\nMatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n/** replaces \\c *this by \\c *this + \\a other.\n  *\n  * \\returns a reference to \\c *this\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &\nMatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)\n{\n  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_CWISE_BINARY_OP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CwiseNullaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CWISE_NULLARY_OP_H\n#define EIGEN_CWISE_NULLARY_OP_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename NullaryOp, typename PlainObjectType>\nstruct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>\n{\n  enum {\n    Flags = traits<PlainObjectType>::Flags & RowMajorBit\n  };\n};\n\n} // namespace internal\n\n/** \\class CwiseNullaryOp\n  * \\ingroup Core_Module\n  *\n  * \\brief Generic expression of a matrix where all coefficients are defined by a functor\n  *\n  * \\tparam NullaryOp template functor implementing the operator\n  * \\tparam PlainObjectType the underlying plain matrix/array type\n  *\n  * This class represents an expression of a generic nullary operator.\n  * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,\n  * and most of the time this is the only way it is used.\n  *\n  * However, if you want to write a function returning such an expression, you\n  * will need to use this class.\n  *\n  * The functor NullaryOp must expose one of the following method:\n    <table class=\"manual\">\n    <tr            ><td>\\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>\n    <tr class=\"alt\"><td>\\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \\c i (e.g., linspace) </td></tr>\n    <tr            ><td>\\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \\c i, \\c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>\n    </table>\n  * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.\n  *\n  * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding\n  * C++11 random number generators.\n  *\n  * A nullary expression can also be used to implement custom sophisticated matrix manipulations\n  * that cannot be covered by the existing set of natively supported matrix manipulations.\n  * See this \\ref TopicCustomizing_NullaryExpr \"page\" for some examples and additional explanations\n  * on the behavior of CwiseNullaryOp.\n  *\n  * \\sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr\n  */\ntemplate<typename NullaryOp, typename PlainObjectType>\nclass CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)\n\n    EIGEN_DEVICE_FUNC\n    CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())\n      : m_rows(rows), m_cols(cols), m_functor(func)\n    {\n      eigen_assert(rows >= 0\n            && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)\n            &&  cols >= 0\n            && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const { return m_rows.value(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const { return m_cols.value(); }\n\n    /** \\returns the functor representing the nullary operation */\n    EIGEN_DEVICE_FUNC\n    const NullaryOp& functor() const { return m_functor; }\n\n  protected:\n    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;\n    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;\n    const NullaryOp m_functor;\n};\n\n\n/** \\returns an expression of a matrix defined by a custom functor \\a func\n  *\n  * The parameters \\a rows and \\a cols are the number of rows and of columns of\n  * the returned matrix. Must be compatible with this MatrixBase type.\n  *\n  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,\n  * it is redundant to pass \\a rows and \\a cols as arguments, so Zero() should be used\n  * instead.\n  *\n  * The template parameter \\a CustomNullaryOp is the type of the functor.\n  *\n  * \\sa class CwiseNullaryOp\n  */\ntemplate<typename Derived>\ntemplate<typename CustomNullaryOp>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst CwiseNullaryOp<CustomNullaryOp,typename DenseBase<Derived>::PlainObject>\n#else\nconst CwiseNullaryOp<CustomNullaryOp,PlainObject>\n#endif\nDenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)\n{\n  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);\n}\n\n/** \\returns an expression of a matrix defined by a custom functor \\a func\n  *\n  * The parameter \\a size is the size of the returned vector.\n  * Must be compatible with this MatrixBase type.\n  *\n  * \\only_for_vectors\n  *\n  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,\n  * it is redundant to pass \\a size as argument, so Zero() should be used\n  * instead.\n  *\n  * The template parameter \\a CustomNullaryOp is the type of the functor.\n  *\n  * Here is an example with C++11 random generators: \\include random_cpp11.cpp\n  * Output: \\verbinclude random_cpp11.out\n  *\n  * \\sa class CwiseNullaryOp\n  */\ntemplate<typename Derived>\ntemplate<typename CustomNullaryOp>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>\n#else\nconst CwiseNullaryOp<CustomNullaryOp, PlainObject>\n#endif\nDenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);\n  else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);\n}\n\n/** \\returns an expression of a matrix defined by a custom functor \\a func\n  *\n  * This variant is only for fixed-size DenseBase types. For dynamic-size types, you\n  * need to use the variants taking size arguments.\n  *\n  * The template parameter \\a CustomNullaryOp is the type of the functor.\n  *\n  * \\sa class CwiseNullaryOp\n  */\ntemplate<typename Derived>\ntemplate<typename CustomNullaryOp>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>\n#else\nconst CwiseNullaryOp<CustomNullaryOp, PlainObject>\n#endif\nDenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)\n{\n  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);\n}\n\n/** \\returns an expression of a constant matrix of value \\a value\n  *\n  * The parameters \\a rows and \\a cols are the number of rows and of columns of\n  * the returned matrix. Must be compatible with this DenseBase type.\n  *\n  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,\n  * it is redundant to pass \\a rows and \\a cols as arguments, so Zero() should be used\n  * instead.\n  *\n  * The template parameter \\a CustomNullaryOp is the type of the functor.\n  *\n  * \\sa class CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)\n{\n  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));\n}\n\n/** \\returns an expression of a constant matrix of value \\a value\n  *\n  * The parameter \\a size is the size of the returned vector.\n  * Must be compatible with this DenseBase type.\n  *\n  * \\only_for_vectors\n  *\n  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,\n  * it is redundant to pass \\a size as argument, so Zero() should be used\n  * instead.\n  *\n  * The template parameter \\a CustomNullaryOp is the type of the functor.\n  *\n  * \\sa class CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Constant(Index size, const Scalar& value)\n{\n  return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));\n}\n\n/** \\returns an expression of a constant matrix of value \\a value\n  *\n  * This variant is only for fixed-size DenseBase types. For dynamic-size types, you\n  * need to use the variants taking size arguments.\n  *\n  * The template parameter \\a CustomNullaryOp is the type of the functor.\n  *\n  * \\sa class CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Constant(const Scalar& value)\n{\n  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)\n  return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));\n}\n\n/** \\deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include DenseBase_LinSpaced_seq_deprecated.cpp\n  * Output: \\verbinclude DenseBase_LinSpaced_seq_deprecated.out\n  *\n  * \\sa LinSpaced(Index,const Scalar&, const Scalar&), setLinSpaced(Index,const Scalar&,const Scalar&)\n  */\ntemplate<typename Derived>\nEIGEN_DEPRECATED EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType\nDenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar>(low,high,size));\n}\n\n/** \\deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)\n  *\n  * \\sa LinSpaced(const Scalar&, const Scalar&)\n  */\ntemplate<typename Derived>\nEIGEN_DEPRECATED EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType\nDenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)\n  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar>(low,high,Derived::SizeAtCompileTime));\n}\n\n/**\n  * \\brief Sets a linearly spaced vector.\n  *\n  * The function generates 'size' equally spaced values in the closed interval [low,high].\n  * When size is set to 1, a vector of length 1 containing 'high' is returned.\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include DenseBase_LinSpaced.cpp\n  * Output: \\verbinclude DenseBase_LinSpaced.out\n  *\n  * For integer scalar types, an even spacing is possible if and only if the length of the range,\n  * i.e., \\c high-low is a scalar multiple of \\c size-1, or if \\c size is a scalar multiple of the\n  * number of values \\c high-low+1 (meaning each value can be repeated the same number of time).\n  * If one of these two considions is not satisfied, then \\c high is lowered to the largest value\n  * satisfying one of this constraint.\n  * Here are some examples:\n  *\n  * Example: \\include DenseBase_LinSpacedInt.cpp\n  * Output: \\verbinclude DenseBase_LinSpacedInt.out\n  *\n  * \\sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType\nDenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar>(low,high,size));\n}\n\n/**\n  * \\copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)\n  * Special version for fixed size types which does not require the size parameter.\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType\nDenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)\n  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar>(low,high,Derived::SizeAtCompileTime));\n}\n\n/** \\returns true if all coefficients in this matrix are approximately equal to \\a val, to within precision \\a prec */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant\n(const Scalar& val, const RealScalar& prec) const\n{\n  typename internal::nested_eval<Derived,1>::type self(derived());\n  for(Index j = 0; j < cols(); ++j)\n    for(Index i = 0; i < rows(); ++i)\n      if(!internal::isApprox(self.coeff(i, j), val, prec))\n        return false;\n  return true;\n}\n\n/** This is just an alias for isApproxToConstant().\n  *\n  * \\returns true if all coefficients in this matrix are approximately equal to \\a value, to within precision \\a prec */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant\n(const Scalar& val, const RealScalar& prec) const\n{\n  return isApproxToConstant(val, prec);\n}\n\n/** Alias for setConstant(): sets all coefficients in this expression to \\a val.\n  *\n  * \\sa setConstant(), Constant(), class CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)\n{\n  setConstant(val);\n}\n\n/** Sets all coefficients in this expression to value \\a val.\n  *\n  * \\sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)\n{\n  return derived() = Constant(rows(), cols(), val);\n}\n\n/** Resizes to the given \\a size, and sets all coefficients in this expression to the given value \\a val.\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include Matrix_setConstant_int.cpp\n  * Output: \\verbinclude Matrix_setConstant_int.out\n  *\n  * \\sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)\n{\n  resize(size);\n  return setConstant(val);\n}\n\n/** Resizes to the given size, and sets all coefficients in this expression to the given value \\a val.\n  *\n  * \\param rows the new number of rows\n  * \\param cols the new number of columns\n  * \\param val the value to which all coefficients are set\n  *\n  * Example: \\include Matrix_setConstant_int_int.cpp\n  * Output: \\verbinclude Matrix_setConstant_int_int.out\n  *\n  * \\sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)\n{\n  resize(rows, cols);\n  return setConstant(val);\n}\n\n/** Resizes to the given size, changing only the number of columns, and sets all\n  * coefficients in this expression to the given value \\a val. For the parameter\n  * of type NoChange_t, just pass the special value \\c NoChange.\n  *\n  * \\sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setConstant(NoChange_t, Index cols, const Scalar& val)\n{\n  return setConstant(rows(), cols, val);\n}\n\n/** Resizes to the given size, changing only the number of rows, and sets all\n  * coefficients in this expression to the given value \\a val. For the parameter\n  * of type NoChange_t, just pass the special value \\c NoChange.\n  *\n  * \\sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setConstant(Index rows, NoChange_t, const Scalar& val)\n{\n  return setConstant(rows, cols(), val);\n}\n\n\n/**\n  * \\brief Sets a linearly spaced vector.\n  *\n  * The function generates 'size' equally spaced values in the closed interval [low,high].\n  * When size is set to 1, a vector of length 1 containing 'high' is returned.\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include DenseBase_setLinSpaced.cpp\n  * Output: \\verbinclude DenseBase_setLinSpaced.out\n  *\n  * For integer scalar types, do not miss the explanations on the definition\n  * of \\link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \\endlink.\n  *\n  * \\sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar>(low,high,newSize));\n}\n\n/**\n  * \\brief Sets a linearly spaced vector.\n  *\n  * The function fills \\c *this with equally spaced values in the closed interval [low,high].\n  * When size is set to 1, a vector of length 1 containing 'high' is returned.\n  *\n  * \\only_for_vectors\n  *\n  * For integer scalar types, do not miss the explanations on the definition\n  * of \\link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \\endlink.\n  *\n  * \\sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return setLinSpaced(size(), low, high);\n}\n\n// zero:\n\n/** \\returns an expression of a zero matrix.\n  *\n  * The parameters \\a rows and \\a cols are the number of rows and of columns of\n  * the returned matrix. Must be compatible with this MatrixBase type.\n  *\n  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,\n  * it is redundant to pass \\a rows and \\a cols as arguments, so Zero() should be used\n  * instead.\n  *\n  * Example: \\include MatrixBase_zero_int_int.cpp\n  * Output: \\verbinclude MatrixBase_zero_int_int.out\n  *\n  * \\sa Zero(), Zero(Index)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Zero(Index rows, Index cols)\n{\n  return Constant(rows, cols, Scalar(0));\n}\n\n/** \\returns an expression of a zero vector.\n  *\n  * The parameter \\a size is the size of the returned vector.\n  * Must be compatible with this MatrixBase type.\n  *\n  * \\only_for_vectors\n  *\n  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,\n  * it is redundant to pass \\a size as argument, so Zero() should be used\n  * instead.\n  *\n  * Example: \\include MatrixBase_zero_int.cpp\n  * Output: \\verbinclude MatrixBase_zero_int.out\n  *\n  * \\sa Zero(), Zero(Index,Index)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Zero(Index size)\n{\n  return Constant(size, Scalar(0));\n}\n\n/** \\returns an expression of a fixed-size zero matrix or vector.\n  *\n  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you\n  * need to use the variants taking size arguments.\n  *\n  * Example: \\include MatrixBase_zero.cpp\n  * Output: \\verbinclude MatrixBase_zero.out\n  *\n  * \\sa Zero(Index), Zero(Index,Index)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Zero()\n{\n  return Constant(Scalar(0));\n}\n\n/** \\returns true if *this is approximately equal to the zero matrix,\n  *          within the precision given by \\a prec.\n  *\n  * Example: \\include MatrixBase_isZero.cpp\n  * Output: \\verbinclude MatrixBase_isZero.out\n  *\n  * \\sa class CwiseNullaryOp, Zero()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const\n{\n  typename internal::nested_eval<Derived,1>::type self(derived());\n  for(Index j = 0; j < cols(); ++j)\n    for(Index i = 0; i < rows(); ++i)\n      if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))\n        return false;\n  return true;\n}\n\n/** Sets all coefficients in this expression to zero.\n  *\n  * Example: \\include MatrixBase_setZero.cpp\n  * Output: \\verbinclude MatrixBase_setZero.out\n  *\n  * \\sa class CwiseNullaryOp, Zero()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()\n{\n  return setConstant(Scalar(0));\n}\n\n/** Resizes to the given \\a size, and sets all coefficients in this expression to zero.\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include Matrix_setZero_int.cpp\n  * Output: \\verbinclude Matrix_setZero_int.out\n  *\n  * \\sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setZero(Index newSize)\n{\n  resize(newSize);\n  return setConstant(Scalar(0));\n}\n\n/** Resizes to the given size, and sets all coefficients in this expression to zero.\n  *\n  * \\param rows the new number of rows\n  * \\param cols the new number of columns\n  *\n  * Example: \\include Matrix_setZero_int_int.cpp\n  * Output: \\verbinclude Matrix_setZero_int_int.out\n  *\n  * \\sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setZero(Index rows, Index cols)\n{\n  resize(rows, cols);\n  return setConstant(Scalar(0));\n}\n\n/** Resizes to the given size, changing only the number of columns, and sets all\n  * coefficients in this expression to zero. For the parameter of type NoChange_t,\n  * just pass the special value \\c NoChange.\n  *\n  * \\sa DenseBase::setZero(), setZero(Index), setZero(Index, Index), setZero(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Zero()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setZero(NoChange_t, Index cols)\n{\n  return setZero(rows(), cols);\n}\n\n/** Resizes to the given size, changing only the number of rows, and sets all\n  * coefficients in this expression to zero. For the parameter of type NoChange_t,\n  * just pass the special value \\c NoChange.\n  *\n  * \\sa DenseBase::setZero(), setZero(Index), setZero(Index, Index), setZero(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Zero()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setZero(Index rows, NoChange_t)\n{\n  return setZero(rows, cols());\n}\n\n// ones:\n\n/** \\returns an expression of a matrix where all coefficients equal one.\n  *\n  * The parameters \\a rows and \\a cols are the number of rows and of columns of\n  * the returned matrix. Must be compatible with this MatrixBase type.\n  *\n  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,\n  * it is redundant to pass \\a rows and \\a cols as arguments, so Ones() should be used\n  * instead.\n  *\n  * Example: \\include MatrixBase_ones_int_int.cpp\n  * Output: \\verbinclude MatrixBase_ones_int_int.out\n  *\n  * \\sa Ones(), Ones(Index), isOnes(), class Ones\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Ones(Index rows, Index cols)\n{\n  return Constant(rows, cols, Scalar(1));\n}\n\n/** \\returns an expression of a vector where all coefficients equal one.\n  *\n  * The parameter \\a newSize is the size of the returned vector.\n  * Must be compatible with this MatrixBase type.\n  *\n  * \\only_for_vectors\n  *\n  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,\n  * it is redundant to pass \\a size as argument, so Ones() should be used\n  * instead.\n  *\n  * Example: \\include MatrixBase_ones_int.cpp\n  * Output: \\verbinclude MatrixBase_ones_int.out\n  *\n  * \\sa Ones(), Ones(Index,Index), isOnes(), class Ones\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Ones(Index newSize)\n{\n  return Constant(newSize, Scalar(1));\n}\n\n/** \\returns an expression of a fixed-size matrix or vector where all coefficients equal one.\n  *\n  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you\n  * need to use the variants taking size arguments.\n  *\n  * Example: \\include MatrixBase_ones.cpp\n  * Output: \\verbinclude MatrixBase_ones.out\n  *\n  * \\sa Ones(Index), Ones(Index,Index), isOnes(), class Ones\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType\nDenseBase<Derived>::Ones()\n{\n  return Constant(Scalar(1));\n}\n\n/** \\returns true if *this is approximately equal to the matrix where all coefficients\n  *          are equal to 1, within the precision given by \\a prec.\n  *\n  * Example: \\include MatrixBase_isOnes.cpp\n  * Output: \\verbinclude MatrixBase_isOnes.out\n  *\n  * \\sa class CwiseNullaryOp, Ones()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes\n(const RealScalar& prec) const\n{\n  return isApproxToConstant(Scalar(1), prec);\n}\n\n/** Sets all coefficients in this expression to one.\n  *\n  * Example: \\include MatrixBase_setOnes.cpp\n  * Output: \\verbinclude MatrixBase_setOnes.out\n  *\n  * \\sa class CwiseNullaryOp, Ones()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()\n{\n  return setConstant(Scalar(1));\n}\n\n/** Resizes to the given \\a newSize, and sets all coefficients in this expression to one.\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include Matrix_setOnes_int.cpp\n  * Output: \\verbinclude Matrix_setOnes_int.out\n  *\n  * \\sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setOnes(Index newSize)\n{\n  resize(newSize);\n  return setConstant(Scalar(1));\n}\n\n/** Resizes to the given size, and sets all coefficients in this expression to one.\n  *\n  * \\param rows the new number of rows\n  * \\param cols the new number of columns\n  *\n  * Example: \\include Matrix_setOnes_int_int.cpp\n  * Output: \\verbinclude Matrix_setOnes_int_int.out\n  *\n  * \\sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setOnes(Index rows, Index cols)\n{\n  resize(rows, cols);\n  return setConstant(Scalar(1));\n}\n\n/** Resizes to the given size, changing only the number of rows, and sets all\n  * coefficients in this expression to one. For the parameter of type NoChange_t,\n  * just pass the special value \\c NoChange.\n  *\n * \\sa MatrixBase::setOnes(), setOnes(Index), setOnes(Index, Index), setOnes(NoChange_t, Index), class CwiseNullaryOp, MatrixBase::Ones()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setOnes(Index rows, NoChange_t)\n{\n  return setOnes(rows, cols());\n}\n\n/** Resizes to the given size, changing only the number of columns, and sets all\n  * coefficients in this expression to one. For the parameter of type NoChange_t,\n  * just pass the special value \\c NoChange.\n  *\n * \\sa MatrixBase::setOnes(), setOnes(Index), setOnes(Index, Index), setOnes(Index, NoChange_t) class CwiseNullaryOp, MatrixBase::Ones()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setOnes(NoChange_t, Index cols)\n{\n  return setOnes(rows(), cols);\n}\n\n// Identity:\n\n/** \\returns an expression of the identity matrix (not necessarily square).\n  *\n  * The parameters \\a rows and \\a cols are the number of rows and of columns of\n  * the returned matrix. Must be compatible with this MatrixBase type.\n  *\n  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,\n  * it is redundant to pass \\a rows and \\a cols as arguments, so Identity() should be used\n  * instead.\n  *\n  * Example: \\include MatrixBase_identity_int_int.cpp\n  * Output: \\verbinclude MatrixBase_identity_int_int.out\n  *\n  * \\sa Identity(), setIdentity(), isIdentity()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType\nMatrixBase<Derived>::Identity(Index rows, Index cols)\n{\n  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());\n}\n\n/** \\returns an expression of the identity matrix (not necessarily square).\n  *\n  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you\n  * need to use the variant taking size arguments.\n  *\n  * Example: \\include MatrixBase_identity.cpp\n  * Output: \\verbinclude MatrixBase_identity.out\n  *\n  * \\sa Identity(Index,Index), setIdentity(), isIdentity()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType\nMatrixBase<Derived>::Identity()\n{\n  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)\n  return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());\n}\n\n/** \\returns true if *this is approximately equal to the identity matrix\n  *          (not necessarily square),\n  *          within the precision given by \\a prec.\n  *\n  * Example: \\include MatrixBase_isIdentity.cpp\n  * Output: \\verbinclude MatrixBase_isIdentity.out\n  *\n  * \\sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity()\n  */\ntemplate<typename Derived>\nbool MatrixBase<Derived>::isIdentity\n(const RealScalar& prec) const\n{\n  typename internal::nested_eval<Derived,1>::type self(derived());\n  for(Index j = 0; j < cols(); ++j)\n  {\n    for(Index i = 0; i < rows(); ++i)\n    {\n      if(i == j)\n      {\n        if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))\n          return false;\n      }\n      else\n      {\n        if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))\n          return false;\n      }\n    }\n  }\n  return true;\n}\n\nnamespace internal {\n\ntemplate<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>\nstruct setIdentity_impl\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE Derived& run(Derived& m)\n  {\n    return m = Derived::Identity(m.rows(), m.cols());\n  }\n};\n\ntemplate<typename Derived>\nstruct setIdentity_impl<Derived, true>\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE Derived& run(Derived& m)\n  {\n    m.setZero();\n    const Index size = numext::mini(m.rows(), m.cols());\n    for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);\n    return m;\n  }\n};\n\n} // end namespace internal\n\n/** Writes the identity expression (not necessarily square) into *this.\n  *\n  * Example: \\include MatrixBase_setIdentity.cpp\n  * Output: \\verbinclude MatrixBase_setIdentity.out\n  *\n  * \\sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()\n{\n  return internal::setIdentity_impl<Derived>::run(derived());\n}\n\n/** \\brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.\n  *\n  * \\param rows the new number of rows\n  * \\param cols the new number of columns\n  *\n  * Example: \\include Matrix_setIdentity_int_int.cpp\n  * Output: \\verbinclude Matrix_setIdentity_int_int.out\n  *\n  * \\sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)\n{\n  derived().resize(rows, cols);\n  return setIdentity();\n}\n\n/** \\returns an expression of the i-th unit (basis) vector.\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);\n}\n\n/** \\returns an expression of the i-th unit (basis) vector.\n  *\n  * \\only_for_vectors\n  *\n  * This variant is for fixed-size vector only.\n  *\n  * \\sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return BasisReturnType(SquareMatrixType::Identity(),i);\n}\n\n/** \\returns an expression of the X axis unit vector (1{,0}^*)\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()\n{ return Derived::Unit(0); }\n\n/** \\returns an expression of the Y axis unit vector (0,1{,0}^*)\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()\n{ return Derived::Unit(1); }\n\n/** \\returns an expression of the Z axis unit vector (0,0,1{,0}^*)\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()\n{ return Derived::Unit(2); }\n\n/** \\returns an expression of the W axis unit vector (0,0,0,1)\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()\n{ return Derived::Unit(3); }\n\n/** \\brief Set the coefficients of \\c *this to the i-th unit (basis) vector\n  *\n  * \\param i index of the unique coefficient to be set to 1\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Unit(Index,Index)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setUnit(Index i)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  eigen_assert(i<size());\n  derived().setZero();\n  derived().coeffRef(i) = Scalar(1);\n  return derived();\n}\n\n/** \\brief Resizes to the given \\a newSize, and writes the i-th unit (basis) vector into *this.\n  *\n  * \\param newSize the new size of the vector\n  * \\param i index of the unique coefficient to be set to 1\n  *\n  * \\only_for_vectors\n  *\n  * \\sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Unit(Index,Index)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setUnit(Index newSize, Index i)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  eigen_assert(i<newSize);\n  derived().resize(newSize);\n  return setUnit(i);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_CWISE_NULLARY_OP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CwiseTernaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CWISE_TERNARY_OP_H\n#define EIGEN_CWISE_TERNARY_OP_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>\nstruct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {\n  // we must not inherit from traits<Arg1> since it has\n  // the potential to cause problems with MSVC\n  typedef typename remove_all<Arg1>::type Ancestor;\n  typedef typename traits<Ancestor>::XprKind XprKind;\n  enum {\n    RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,\n    ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,\n    MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime\n  };\n\n  // even though we require Arg1, Arg2, and Arg3 to have the same scalar type\n  // (see CwiseTernaryOp constructor),\n  // we still want to handle the case when the result type is different.\n  typedef typename result_of<TernaryOp(\n      const typename Arg1::Scalar&, const typename Arg2::Scalar&,\n      const typename Arg3::Scalar&)>::type Scalar;\n\n  typedef typename internal::traits<Arg1>::StorageKind StorageKind;\n  typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;\n\n  typedef typename Arg1::Nested Arg1Nested;\n  typedef typename Arg2::Nested Arg2Nested;\n  typedef typename Arg3::Nested Arg3Nested;\n  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;\n  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;\n  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;\n  enum { Flags = _Arg1Nested::Flags & RowMajorBit };\n};\n}  // end namespace internal\n\ntemplate <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,\n          typename StorageKind>\nclass CwiseTernaryOpImpl;\n\n/** \\class CwiseTernaryOp\n  * \\ingroup Core_Module\n  *\n  * \\brief Generic expression where a coefficient-wise ternary operator is\n * applied to two expressions\n  *\n  * \\tparam TernaryOp template functor implementing the operator\n  * \\tparam Arg1Type the type of the first argument\n  * \\tparam Arg2Type the type of the second argument\n  * \\tparam Arg3Type the type of the third argument\n  *\n  * This class represents an expression where a coefficient-wise ternary\n * operator is applied to three expressions.\n  * It is the return type of ternary operators, by which we mean only those\n * ternary operators where\n  * all three arguments are Eigen expressions.\n  * For example, the return type of betainc(matrix1, matrix2, matrix3) is a\n * CwiseTernaryOp.\n  *\n  * Most of the time, this is the only way that it is used, so you typically\n * don't have to name\n  * CwiseTernaryOp types explicitly.\n  *\n  * \\sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const\n * MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,\n * class CwiseUnaryOp, class CwiseNullaryOp\n  */\ntemplate <typename TernaryOp, typename Arg1Type, typename Arg2Type,\n          typename Arg3Type>\nclass CwiseTernaryOp : public CwiseTernaryOpImpl<\n                           TernaryOp, Arg1Type, Arg2Type, Arg3Type,\n                           typename internal::traits<Arg1Type>::StorageKind>,\n                       internal::no_assignment_operator\n{\n public:\n  typedef typename internal::remove_all<Arg1Type>::type Arg1;\n  typedef typename internal::remove_all<Arg2Type>::type Arg2;\n  typedef typename internal::remove_all<Arg3Type>::type Arg3;\n\n  typedef typename CwiseTernaryOpImpl<\n      TernaryOp, Arg1Type, Arg2Type, Arg3Type,\n      typename internal::traits<Arg1Type>::StorageKind>::Base Base;\n  EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)\n\n  typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;\n  typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;\n  typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;\n  typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;\n  typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;\n  typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;\n\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,\n                                     const Arg3& a3,\n                                     const TernaryOp& func = TernaryOp())\n      : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {\n    // require the sizes to match\n    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)\n    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)\n\n    // The index types should match\n    EIGEN_STATIC_ASSERT((internal::is_same<\n                         typename internal::traits<Arg1Type>::StorageKind,\n                         typename internal::traits<Arg2Type>::StorageKind>::value),\n                        STORAGE_KIND_MUST_MATCH)\n    EIGEN_STATIC_ASSERT((internal::is_same<\n                         typename internal::traits<Arg1Type>::StorageKind,\n                         typename internal::traits<Arg3Type>::StorageKind>::value),\n                        STORAGE_KIND_MUST_MATCH)\n\n    eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&\n                 a1.rows() == a3.rows() && a1.cols() == a3.cols());\n  }\n\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE Index rows() const {\n    // return the fixed size type if available to enable compile time\n    // optimizations\n    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::\n                RowsAtCompileTime == Dynamic &&\n        internal::traits<typename internal::remove_all<Arg2Nested>::type>::\n                RowsAtCompileTime == Dynamic)\n      return m_arg3.rows();\n    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::\n                     RowsAtCompileTime == Dynamic &&\n             internal::traits<typename internal::remove_all<Arg3Nested>::type>::\n                     RowsAtCompileTime == Dynamic)\n      return m_arg2.rows();\n    else\n      return m_arg1.rows();\n  }\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE Index cols() const {\n    // return the fixed size type if available to enable compile time\n    // optimizations\n    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::\n                ColsAtCompileTime == Dynamic &&\n        internal::traits<typename internal::remove_all<Arg2Nested>::type>::\n                ColsAtCompileTime == Dynamic)\n      return m_arg3.cols();\n    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::\n                     ColsAtCompileTime == Dynamic &&\n             internal::traits<typename internal::remove_all<Arg3Nested>::type>::\n                     ColsAtCompileTime == Dynamic)\n      return m_arg2.cols();\n    else\n      return m_arg1.cols();\n  }\n\n  /** \\returns the first argument nested expression */\n  EIGEN_DEVICE_FUNC\n  const _Arg1Nested& arg1() const { return m_arg1; }\n  /** \\returns the first argument nested expression */\n  EIGEN_DEVICE_FUNC\n  const _Arg2Nested& arg2() const { return m_arg2; }\n  /** \\returns the third argument nested expression */\n  EIGEN_DEVICE_FUNC\n  const _Arg3Nested& arg3() const { return m_arg3; }\n  /** \\returns the functor representing the ternary operation */\n  EIGEN_DEVICE_FUNC\n  const TernaryOp& functor() const { return m_functor; }\n\n protected:\n  Arg1Nested m_arg1;\n  Arg2Nested m_arg2;\n  Arg3Nested m_arg3;\n  const TernaryOp m_functor;\n};\n\n// Generic API dispatcher\ntemplate <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,\n          typename StorageKind>\nclass CwiseTernaryOpImpl\n    : public internal::generic_xpr_base<\n          CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {\n public:\n  typedef typename internal::generic_xpr_base<\n      CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;\n};\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_CWISE_TERNARY_OP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CwiseUnaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CWISE_UNARY_OP_H\n#define EIGEN_CWISE_UNARY_OP_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename UnaryOp, typename XprType>\nstruct traits<CwiseUnaryOp<UnaryOp, XprType> >\n : traits<XprType>\n{\n  typedef typename result_of<\n                     UnaryOp(const typename XprType::Scalar&)\n                   >::type Scalar;\n  typedef typename XprType::Nested XprTypeNested;\n  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;\n  enum {\n    Flags = _XprTypeNested::Flags & RowMajorBit\n  };\n};\n}\n\ntemplate<typename UnaryOp, typename XprType, typename StorageKind>\nclass CwiseUnaryOpImpl;\n\n/** \\class CwiseUnaryOp\n  * \\ingroup Core_Module\n  *\n  * \\brief Generic expression where a coefficient-wise unary operator is applied to an expression\n  *\n  * \\tparam UnaryOp template functor implementing the operator\n  * \\tparam XprType the type of the expression to which we are applying the unary operator\n  *\n  * This class represents an expression where a unary operator is applied to an expression.\n  * It is the return type of all operations taking exactly 1 input expression, regardless of the\n  * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix\n  * is considered unary, because only the right-hand side is an expression, and its\n  * return type is a specialization of CwiseUnaryOp.\n  *\n  * Most of the time, this is the only way that it is used, so you typically don't have to name\n  * CwiseUnaryOp types explicitly.\n  *\n  * \\sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp\n  */\ntemplate<typename UnaryOp, typename XprType>\nclass CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator\n{\n  public:\n\n    typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)\n    typedef typename internal::ref_selector<XprType>::type XprTypeNested;\n    typedef typename internal::remove_all<XprType>::type NestedExpression;\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())\n      : m_xpr(xpr), m_functor(func) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }\n\n    /** \\returns the functor representing the unary operation */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const UnaryOp& functor() const { return m_functor; }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const typename internal::remove_all<XprTypeNested>::type&\n    nestedExpression() const { return m_xpr; }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    typename internal::remove_all<XprTypeNested>::type&\n    nestedExpression() { return m_xpr; }\n\n  protected:\n    XprTypeNested m_xpr;\n    const UnaryOp m_functor;\n};\n\n// Generic API dispatcher\ntemplate<typename UnaryOp, typename XprType, typename StorageKind>\nclass CwiseUnaryOpImpl\n  : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type\n{\npublic:\n  typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_CWISE_UNARY_OP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/CwiseUnaryView.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CWISE_UNARY_VIEW_H\n#define EIGEN_CWISE_UNARY_VIEW_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename ViewOp, typename MatrixType>\nstruct traits<CwiseUnaryView<ViewOp, MatrixType> >\n : traits<MatrixType>\n{\n  typedef typename result_of<\n                     ViewOp(const typename traits<MatrixType>::Scalar&)\n                   >::type Scalar;\n  typedef typename MatrixType::Nested MatrixTypeNested;\n  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;\n  enum {\n    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,\n    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions\n    MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,\n    // need to cast the sizeof's from size_t to int explicitly, otherwise:\n    // \"error: no integral type can represent all of the enumerator values\n    InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic\n                             ? int(Dynamic)\n                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),\n    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic\n                             ? int(Dynamic)\n                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))\n  };\n};\n}\n\ntemplate<typename ViewOp, typename MatrixType, typename StorageKind>\nclass CwiseUnaryViewImpl;\n\n/** \\class CwiseUnaryView\n  * \\ingroup Core_Module\n  *\n  * \\brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector\n  *\n  * \\tparam ViewOp template functor implementing the view\n  * \\tparam MatrixType the type of the matrix we are applying the unary operator\n  *\n  * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.\n  * It is the return type of real() and imag(), and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp\n  */\ntemplate<typename ViewOp, typename MatrixType>\nclass CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>\n{\n  public:\n\n    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)\n    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;\n    typedef typename internal::remove_all<MatrixType>::type NestedExpression;\n\n    explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())\n      : m_matrix(mat), m_functor(func) {}\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n    /** \\returns the functor representing unary operation */\n    EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC const typename internal::remove_all<MatrixTypeNested>::type&\n    nestedExpression() const { return m_matrix; }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC typename internal::remove_reference<MatrixTypeNested>::type&\n    nestedExpression() { return m_matrix; }\n\n  protected:\n    MatrixTypeNested m_matrix;\n    ViewOp m_functor;\n};\n\n// Generic API dispatcher\ntemplate<typename ViewOp, typename XprType, typename StorageKind>\nclass CwiseUnaryViewImpl\n  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type\n{\npublic:\n  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;\n};\n\ntemplate<typename ViewOp, typename MatrixType>\nclass CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>\n  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type\n{\n  public:\n\n    typedef CwiseUnaryView<ViewOp, MatrixType> Derived;\n    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;\n\n    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)\n\n    EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }\n    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const\n    {\n      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const\n    {\n      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);\n    }\n  protected:\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_CWISE_UNARY_VIEW_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/DenseBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DENSEBASE_H\n#define EIGEN_DENSEBASE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.\n// This dummy function simply aims at checking that at compile time.\nstatic inline void check_DenseIndex_is_signed() {\n  EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)\n}\n\n} // end namespace internal\n\n/** \\class DenseBase\n  * \\ingroup Core_Module\n  *\n  * \\brief Base class for all dense matrices, vectors, and arrays\n  *\n  * This class is the base that is inherited by all dense objects (matrix, vector, arrays,\n  * and related expression types). The common Eigen API for dense objects is contained in this class.\n  *\n  * \\tparam Derived is the derived type, e.g., a matrix type or an expression.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_DENSEBASE_PLUGIN.\n  *\n  * \\sa \\blank \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived> class DenseBase\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n  : public DenseCoeffsBase<Derived, internal::accessors_level<Derived>::value>\n#else\n  : public DenseCoeffsBase<Derived,DirectWriteAccessors>\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n{\n  public:\n\n    /** Inner iterator type to iterate over the coefficients of a row or column.\n      * \\sa class InnerIterator\n      */\n    typedef Eigen::InnerIterator<Derived> InnerIterator;\n\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n\n    /**\n      * \\brief The type used to store indices\n      * \\details This typedef is relevant for types that store multiple indices such as\n      *          PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index\n      * \\sa \\blank \\ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.\n     */\n    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;\n\n    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n\n    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.\n      *\n      * It is an alias for the Scalar type */\n    typedef Scalar value_type;\n\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef DenseCoeffsBase<Derived, internal::accessors_level<Derived>::value> Base;\n\n    using Base::derived;\n    using Base::const_cast_derived;\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::rowIndexByOuterInner;\n    using Base::colIndexByOuterInner;\n    using Base::coeff;\n    using Base::coeffByOuterInner;\n    using Base::operator();\n    using Base::operator[];\n    using Base::x;\n    using Base::y;\n    using Base::z;\n    using Base::w;\n    using Base::stride;\n    using Base::innerStride;\n    using Base::outerStride;\n    using Base::rowStride;\n    using Base::colStride;\n    typedef typename Base::CoeffReturnType CoeffReturnType;\n\n    enum {\n\n      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,\n        /**< The number of rows at compile-time. This is just a copy of the value provided\n          * by the \\a Derived type. If a value is not known at compile-time,\n          * it is set to the \\a Dynamic constant.\n          * \\sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */\n\n      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,\n        /**< The number of columns at compile-time. This is just a copy of the value provided\n          * by the \\a Derived type. If a value is not known at compile-time,\n          * it is set to the \\a Dynamic constant.\n          * \\sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */\n\n\n      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,\n                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),\n        /**< This is equal to the number of coefficients, i.e. the number of\n          * rows times the number of columns, or to \\a Dynamic if this is not\n          * known at compile-time. \\sa RowsAtCompileTime, ColsAtCompileTime */\n\n      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,\n        /**< This value is equal to the maximum possible number of rows that this expression\n          * might have. If this expression might have an arbitrarily high number of rows,\n          * this value is set to \\a Dynamic.\n          *\n          * This value is useful to know when evaluating an expression, in order to determine\n          * whether it is possible to avoid doing a dynamic memory allocation.\n          *\n          * \\sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime\n          */\n\n      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,\n        /**< This value is equal to the maximum possible number of columns that this expression\n          * might have. If this expression might have an arbitrarily high number of columns,\n          * this value is set to \\a Dynamic.\n          *\n          * This value is useful to know when evaluating an expression, in order to determine\n          * whether it is possible to avoid doing a dynamic memory allocation.\n          *\n          * \\sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime\n          */\n\n      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,\n                                                      internal::traits<Derived>::MaxColsAtCompileTime>::ret),\n        /**< This value is equal to the maximum possible number of coefficients that this expression\n          * might have. If this expression might have an arbitrarily high number of coefficients,\n          * this value is set to \\a Dynamic.\n          *\n          * This value is useful to know when evaluating an expression, in order to determine\n          * whether it is possible to avoid doing a dynamic memory allocation.\n          *\n          * \\sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime\n          */\n\n      IsVectorAtCompileTime = internal::traits<Derived>::RowsAtCompileTime == 1\n                           || internal::traits<Derived>::ColsAtCompileTime == 1,\n        /**< This is set to true if either the number of rows or the number of\n          * columns is known at compile-time to be equal to 1. Indeed, in that case,\n          * we are dealing with a column-vector (if there is only one column) or with\n          * a row-vector (if there is only one row). */\n\n      NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2,\n        /**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors,\n         * and 2 for matrices.\n         */\n\n      Flags = internal::traits<Derived>::Flags,\n        /**< This stores expression \\ref flags flags which may or may not be inherited by new expressions\n          * constructed from this one. See the \\ref flags \"list of flags\".\n          */\n\n      IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */\n\n      InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)\n                             : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),\n\n      InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,\n      OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret\n    };\n\n    typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;\n\n    enum { IsPlainObjectBase = 0 };\n\n    /** The plain matrix type corresponding to this expression.\n      * \\sa PlainObject */\n    typedef Matrix<typename internal::traits<Derived>::Scalar,\n                internal::traits<Derived>::RowsAtCompileTime,\n                internal::traits<Derived>::ColsAtCompileTime,\n                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),\n                internal::traits<Derived>::MaxRowsAtCompileTime,\n                internal::traits<Derived>::MaxColsAtCompileTime\n          > PlainMatrix;\n\n    /** The plain array type corresponding to this expression.\n      * \\sa PlainObject */\n    typedef Array<typename internal::traits<Derived>::Scalar,\n                internal::traits<Derived>::RowsAtCompileTime,\n                internal::traits<Derived>::ColsAtCompileTime,\n                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),\n                internal::traits<Derived>::MaxRowsAtCompileTime,\n                internal::traits<Derived>::MaxColsAtCompileTime\n          > PlainArray;\n\n    /** \\brief The plain matrix or array type corresponding to this expression.\n      *\n      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,\n      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed\n      * that the return type of eval() is either PlainObject or const PlainObject&.\n      */\n    typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,\n                                 PlainMatrix, PlainArray>::type PlainObject;\n\n    /** \\returns the number of nonzero coefficients which is in practice the number\n      * of stored coefficients. */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index nonZeros() const { return size(); }\n\n    /** \\returns the outer size.\n      *\n      * \\note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension\n      * with respect to the \\ref TopicStorageOrders \"storage order\", i.e., the number of columns for a\n      * column-major matrix, and the number of rows for a row-major matrix. */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index outerSize() const\n    {\n      return IsVectorAtCompileTime ? 1\n           : int(IsRowMajor) ? this->rows() : this->cols();\n    }\n\n    /** \\returns the inner size.\n      *\n      * \\note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension\n      * with respect to the \\ref TopicStorageOrders \"storage order\", i.e., the number of rows for a\n      * column-major matrix, and the number of columns for a row-major matrix. */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index innerSize() const\n    {\n      return IsVectorAtCompileTime ? this->size()\n           : int(IsRowMajor) ? this->cols() : this->rows();\n    }\n\n    /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are\n      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does\n      * nothing else.\n      */\n    EIGEN_DEVICE_FUNC\n    void resize(Index newSize)\n    {\n      EIGEN_ONLY_USED_FOR_DEBUG(newSize);\n      eigen_assert(newSize == this->size()\n                && \"DenseBase::resize() does not actually allow to resize.\");\n    }\n    /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are\n      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does\n      * nothing else.\n      */\n    EIGEN_DEVICE_FUNC\n    void resize(Index rows, Index cols)\n    {\n      EIGEN_ONLY_USED_FOR_DEBUG(rows);\n      EIGEN_ONLY_USED_FOR_DEBUG(cols);\n      eigen_assert(rows == this->rows() && cols == this->cols()\n                && \"DenseBase::resize() does not actually allow to resize.\");\n    }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal Represents a matrix with all coefficients equal to one another*/\n    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;\n    /** \\internal \\deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */\n    EIGEN_DEPRECATED typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> SequentialLinSpacedReturnType;\n    /** \\internal Represents a vector with linearly spaced coefficients that allows random access. */\n    typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> RandomAccessLinSpacedReturnType;\n    /** \\internal the return type of MatrixBase::eigenvalues() */\n    typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;\n\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n    /** Copies \\a other into *this. \\returns a reference to *this. */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator=(const DenseBase<OtherDerived>& other);\n\n    /** Special case of the template operator=, in order to prevent the compiler\n      * from generating a default operator= (issue hit with g++ 4.1)\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator=(const DenseBase& other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Derived& operator=(const EigenBase<OtherDerived> &other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Derived& operator+=(const EigenBase<OtherDerived> &other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Derived& operator-=(const EigenBase<OtherDerived> &other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Derived& operator=(const ReturnByValue<OtherDerived>& func);\n\n    /** \\internal\n      * Copies \\a other into *this without evaluating other. \\returns a reference to *this. */\n    template<typename OtherDerived>\n    /** \\deprecated */\n    EIGEN_DEPRECATED EIGEN_DEVICE_FUNC\n    Derived& lazyAssign(const DenseBase<OtherDerived>& other);\n\n    EIGEN_DEVICE_FUNC\n    CommaInitializer<Derived> operator<< (const Scalar& s);\n\n    template<unsigned int Added,unsigned int Removed>\n    /** \\deprecated it now returns \\c *this */\n    EIGEN_DEPRECATED\n    const Derived& flagged() const\n    { return derived(); }\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);\n\n    typedef Transpose<Derived> TransposeReturnType;\n    EIGEN_DEVICE_FUNC\n    TransposeReturnType transpose();\n    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;\n    EIGEN_DEVICE_FUNC\n    ConstTransposeReturnType transpose() const;\n    EIGEN_DEVICE_FUNC\n    void transposeInPlace();\n\n    EIGEN_DEVICE_FUNC static const ConstantReturnType\n    Constant(Index rows, Index cols, const Scalar& value);\n    EIGEN_DEVICE_FUNC static const ConstantReturnType\n    Constant(Index size, const Scalar& value);\n    EIGEN_DEVICE_FUNC static const ConstantReturnType\n    Constant(const Scalar& value);\n\n    EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType\n    LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);\n    EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType\n    LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);\n\n    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType\n    LinSpaced(Index size, const Scalar& low, const Scalar& high);\n    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType\n    LinSpaced(const Scalar& low, const Scalar& high);\n\n    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC\n    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>\n    NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);\n    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC\n    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>\n    NullaryExpr(Index size, const CustomNullaryOp& func);\n    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC\n    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>\n    NullaryExpr(const CustomNullaryOp& func);\n\n    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);\n    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);\n    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();\n    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);\n    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);\n    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();\n\n    EIGEN_DEVICE_FUNC void fill(const Scalar& value);\n    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);\n    EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);\n    EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);\n    EIGEN_DEVICE_FUNC Derived& setZero();\n    EIGEN_DEVICE_FUNC Derived& setOnes();\n    EIGEN_DEVICE_FUNC Derived& setRandom();\n\n    template<typename OtherDerived> EIGEN_DEVICE_FUNC\n    bool isApprox(const DenseBase<OtherDerived>& other,\n                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    EIGEN_DEVICE_FUNC\n    bool isMuchSmallerThan(const RealScalar& other,\n                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    template<typename OtherDerived> EIGEN_DEVICE_FUNC\n    bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,\n                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n\n    EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n\n    inline bool hasNaN() const;\n    inline bool allFinite() const;\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator*=(const Scalar& other);\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator/=(const Scalar& other);\n\n    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;\n    /** \\returns the matrix or vector obtained by evaluating this expression.\n      *\n      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns\n      * a const reference, in order to avoid a useless copy.\n      *\n      * \\warning Be careful with eval() and the auto C++ keyword, as detailed in this \\link TopicPitfalls_auto_keyword page \\endlink.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE EvalReturnType eval() const\n    {\n      // Even though MSVC does not honor strong inlining when the return type\n      // is a dynamic matrix, we desperately need strong inlining for fixed\n      // size types on MSVC.\n      return typename internal::eval<Derived>::type(derived());\n    }\n\n    /** swaps *this with the expression \\a other.\n      *\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void swap(const DenseBase<OtherDerived>& other)\n    {\n      EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);\n      eigen_assert(rows()==other.rows() && cols()==other.cols());\n      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());\n    }\n\n    /** swaps *this with the matrix or array \\a other.\n      *\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void swap(PlainObjectBase<OtherDerived>& other)\n    {\n      eigen_assert(rows()==other.rows() && cols()==other.cols());\n      call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());\n    }\n\n    EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;\n    EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;\n    EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();\n    template<bool Enable> EIGEN_DEVICE_FUNC\n    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;\n    template<bool Enable> EIGEN_DEVICE_FUNC\n    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();\n\n    EIGEN_DEVICE_FUNC Scalar sum() const;\n    EIGEN_DEVICE_FUNC Scalar mean() const;\n    EIGEN_DEVICE_FUNC Scalar trace() const;\n\n    EIGEN_DEVICE_FUNC Scalar prod() const;\n\n    template<int NaNPropagation>\n    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;\n    template<int NaNPropagation>\n    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;\n\n\n    // By default, the fastest version with undefined NaN propagation semantics is\n    // used.\n    // TODO(rmlarsen): Replace with default template argument when we move to\n    // c++11 or beyond.\n    EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar minCoeff() const {\n      return minCoeff<PropagateFast>();\n    }\n    EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar maxCoeff() const {\n      return maxCoeff<PropagateFast>();\n    }\n\n    template<int NaNPropagation, typename IndexType>\n    EIGEN_DEVICE_FUNC\n    typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;\n    template<int NaNPropagation, typename IndexType>\n    EIGEN_DEVICE_FUNC\n    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;\n    template<int NaNPropagation, typename IndexType>\n    EIGEN_DEVICE_FUNC\n    typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;\n    template<int NaNPropagation, typename IndexType>\n    EIGEN_DEVICE_FUNC\n    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;\n\n    // TODO(rmlarsen): Replace these methods with a default template argument.\n    template<typename IndexType>\n    EIGEN_DEVICE_FUNC inline\n    typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const {\n      return minCoeff<PropagateFast>(row, col);\n    }\n    template<typename IndexType>\n    EIGEN_DEVICE_FUNC inline\n    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const {\n      return maxCoeff<PropagateFast>(row, col);\n    }\n    template<typename IndexType>\n     EIGEN_DEVICE_FUNC inline\n    typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const {\n      return minCoeff<PropagateFast>(index);\n    }\n    template<typename IndexType>\n    EIGEN_DEVICE_FUNC inline\n    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const {\n      return maxCoeff<PropagateFast>(index);\n    }\n  \n    template<typename BinaryOp>\n    EIGEN_DEVICE_FUNC\n    Scalar redux(const BinaryOp& func) const;\n\n    template<typename Visitor>\n    EIGEN_DEVICE_FUNC\n    void visit(Visitor& func) const;\n\n    /** \\returns a WithFormat proxy object allowing to print a matrix the with given\n      * format \\a fmt.\n      *\n      * See class IOFormat for some examples.\n      *\n      * \\sa class IOFormat, class WithFormat\n      */\n    inline const WithFormat<Derived> format(const IOFormat& fmt) const\n    {\n      return WithFormat<Derived>(derived(), fmt);\n    }\n\n    /** \\returns the unique coefficient of a 1x1 expression */\n    EIGEN_DEVICE_FUNC\n    CoeffReturnType value() const\n    {\n      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)\n      eigen_assert(this->rows() == 1 && this->cols() == 1);\n      return derived().coeff(0,0);\n    }\n\n    EIGEN_DEVICE_FUNC bool all() const;\n    EIGEN_DEVICE_FUNC bool any() const;\n    EIGEN_DEVICE_FUNC Index count() const;\n\n    typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;\n    typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;\n    typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;\n    typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;\n\n    /** \\returns a VectorwiseOp wrapper of *this for broadcasting and partial reductions\n    *\n    * Example: \\include MatrixBase_rowwise.cpp\n    * Output: \\verbinclude MatrixBase_rowwise.out\n    *\n    * \\sa colwise(), class VectorwiseOp, \\ref TutorialReductionsVisitorsBroadcasting\n    */\n    //Code moved here due to a CUDA compiler bug\n    EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {\n      return ConstRowwiseReturnType(derived());\n    }\n    EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();\n\n    /** \\returns a VectorwiseOp wrapper of *this broadcasting and partial reductions\n    *\n    * Example: \\include MatrixBase_colwise.cpp\n    * Output: \\verbinclude MatrixBase_colwise.out\n    *\n    * \\sa rowwise(), class VectorwiseOp, \\ref TutorialReductionsVisitorsBroadcasting\n    */\n    EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {\n      return ConstColwiseReturnType(derived());\n    }\n    EIGEN_DEVICE_FUNC ColwiseReturnType colwise();\n\n    typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;\n    static const RandomReturnType Random(Index rows, Index cols);\n    static const RandomReturnType Random(Index size);\n    static const RandomReturnType Random();\n\n    template<typename ThenDerived,typename ElseDerived>\n    inline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived,ElseDerived>\n    select(const DenseBase<ThenDerived>& thenMatrix,\n           const DenseBase<ElseDerived>& elseMatrix) const;\n\n    template<typename ThenDerived>\n    inline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>\n    select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;\n\n    template<typename ElseDerived>\n    inline EIGEN_DEVICE_FUNC const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >\n    select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;\n\n    template<int p> RealScalar lpNorm() const;\n\n    template<int RowFactor, int ColFactor>\n    EIGEN_DEVICE_FUNC\n    const Replicate<Derived,RowFactor,ColFactor> replicate() const;\n    /**\n    * \\return an expression of the replication of \\c *this\n    *\n    * Example: \\include MatrixBase_replicate_int_int.cpp\n    * Output: \\verbinclude MatrixBase_replicate_int_int.out\n    *\n    * \\sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate\n    */\n    //Code moved here due to a CUDA compiler bug\n    EIGEN_DEVICE_FUNC\n    const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const\n    {\n      return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);\n    }\n\n    typedef Reverse<Derived, BothDirections> ReverseReturnType;\n    typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;\n    EIGEN_DEVICE_FUNC ReverseReturnType reverse();\n    /** This is the const version of reverse(). */\n    //Code moved here due to a CUDA compiler bug\n    EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const\n    {\n      return ConstReverseReturnType(derived());\n    }\n    EIGEN_DEVICE_FUNC void reverseInPlace();\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** STL-like <a href=\"https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator\">RandomAccessIterator</a>\n      * iterator type as returned by the begin() and end() methods.\n      */\n    typedef random_access_iterator_type iterator;\n    /** This is the const version of iterator (aka read-only) */\n    typedef random_access_iterator_type const_iterator;\n    #else\n    typedef typename internal::conditional< (Flags&DirectAccessBit)==DirectAccessBit,\n                                            internal::pointer_based_stl_iterator<Derived>,\n                                            internal::generic_randaccess_stl_iterator<Derived>\n                                          >::type iterator_type;\n\n    typedef typename internal::conditional< (Flags&DirectAccessBit)==DirectAccessBit,\n                                            internal::pointer_based_stl_iterator<const Derived>,\n                                            internal::generic_randaccess_stl_iterator<const Derived>\n                                          >::type const_iterator_type;\n\n    // Stl-style iterators are supported only for vectors.\n\n    typedef typename internal::conditional< IsVectorAtCompileTime,\n                                            iterator_type,\n                                            void\n                                          >::type iterator;\n\n    typedef typename internal::conditional< IsVectorAtCompileTime,\n                                            const_iterator_type,\n                                            void\n                                          >::type const_iterator;\n    #endif\n\n    inline iterator begin();\n    inline const_iterator begin() const;\n    inline const_iterator cbegin() const;\n    inline iterator end();\n    inline const_iterator end() const;\n    inline const_iterator cend() const;\n\n#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase\n#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)\n#define EIGEN_DOC_UNARY_ADDONS(X,Y)\n#   include \"../plugins/CommonCwiseUnaryOps.h\"\n#   include \"../plugins/BlockMethods.h\"\n#   include \"../plugins/IndexedViewMethods.h\"\n#   include \"../plugins/ReshapedMethods.h\"\n#   ifdef EIGEN_DENSEBASE_PLUGIN\n#     include EIGEN_DENSEBASE_PLUGIN\n#   endif\n#undef EIGEN_CURRENT_STORAGE_BASE_CLASS\n#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF\n#undef EIGEN_DOC_UNARY_ADDONS\n\n    // disable the use of evalTo for dense objects with a nice compilation error\n    template<typename Dest>\n    EIGEN_DEVICE_FUNC\n    inline void evalTo(Dest& ) const\n    {\n      EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);\n    }\n\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)\n    /** Default constructor. Do nothing. */\n    EIGEN_DEVICE_FUNC DenseBase()\n    {\n      /* Just checks for self-consistency of the flags.\n       * Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down\n       */\n#ifdef EIGEN_INTERNAL_DEBUGGING\n      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))\n                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),\n                          INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)\n#endif\n    }\n\n  private:\n    EIGEN_DEVICE_FUNC explicit DenseBase(int);\n    EIGEN_DEVICE_FUNC DenseBase(int,int);\n    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_DENSEBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/DenseCoeffsBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DENSECOEFFSBASE_H\n#define EIGEN_DENSECOEFFSBASE_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename T> struct add_const_on_value_type_if_arithmetic\n{\n  typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;\n};\n}\n\n/** \\brief Base class providing read-only coefficient access to matrices and arrays.\n  * \\ingroup Core_Module\n  * \\tparam Derived Type of the derived class\n  *\n  * \\note #ReadOnlyAccessors Constant indicating read-only access\n  *\n  * This class defines the \\c operator() \\c const function and friends, which can be used to read specific\n  * entries of a matrix or array.\n  *\n  * \\sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,\n  *     \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived>\nclass DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>\n{\n  public:\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n\n    // Explanation for this CoeffReturnType typedef.\n    // - This is the return type of the coeff() method.\n    // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references\n    // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).\n    // - The is_artihmetic check is required since \"const int\", \"const double\", etc. will cause warnings on some systems\n    // while the declaration of \"const T\", where T is a non arithmetic type does not. Always returning \"const Scalar&\" is\n    // not possible, since the underlying expressions might not offer a valid address the reference could be referring to.\n    typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),\n                         const Scalar&,\n                         typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type\n                     >::type CoeffReturnType;\n\n    typedef typename internal::add_const_on_value_type_if_arithmetic<\n                         typename internal::packet_traits<Scalar>::type\n                     >::type PacketReturnType;\n\n    typedef EigenBase<Derived> Base;\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::derived;\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const\n    {\n      return int(Derived::RowsAtCompileTime) == 1 ? 0\n          : int(Derived::ColsAtCompileTime) == 1 ? inner\n          : int(Derived::Flags)&RowMajorBit ? outer\n          : inner;\n    }\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const\n    {\n      return int(Derived::ColsAtCompileTime) == 1 ? 0\n          : int(Derived::RowsAtCompileTime) == 1 ? inner\n          : int(Derived::Flags)&RowMajorBit ? inner\n          : outer;\n    }\n\n    /** Short version: don't use this function, use\n      * \\link operator()(Index,Index) const \\endlink instead.\n      *\n      * Long version: this function is similar to\n      * \\link operator()(Index,Index) const \\endlink, but without the assertion.\n      * Use this for limiting the performance cost of debugging code when doing\n      * repeated coefficient access. Only use this when it is guaranteed that the\n      * parameters \\a row and \\a col are in range.\n      *\n      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this\n      * function equivalent to \\link operator()(Index,Index) const \\endlink.\n      *\n      * \\sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const\n    {\n      eigen_internal_assert(row >= 0 && row < rows()\n                         && col >= 0 && col < cols());\n      return internal::evaluator<Derived>(derived()).coeff(row,col);\n    }\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const\n    {\n      return coeff(rowIndexByOuterInner(outer, inner),\n                   colIndexByOuterInner(outer, inner));\n    }\n\n    /** \\returns the coefficient at given the given row and column.\n      *\n      * \\sa operator()(Index,Index), operator[](Index)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const\n    {\n      eigen_assert(row >= 0 && row < rows()\n          && col >= 0 && col < cols());\n      return coeff(row, col);\n    }\n\n    /** Short version: don't use this function, use\n      * \\link operator[](Index) const \\endlink instead.\n      *\n      * Long version: this function is similar to\n      * \\link operator[](Index) const \\endlink, but without the assertion.\n      * Use this for limiting the performance cost of debugging code when doing\n      * repeated coefficient access. Only use this when it is guaranteed that the\n      * parameter \\a index is in range.\n      *\n      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this\n      * function equivalent to \\link operator[](Index) const \\endlink.\n      *\n      * \\sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    coeff(Index index) const\n    {\n      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,\n                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)\n      eigen_internal_assert(index >= 0 && index < size());\n      return internal::evaluator<Derived>(derived()).coeff(index);\n    }\n\n\n    /** \\returns the coefficient at given index.\n      *\n      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.\n      *\n      * \\sa operator[](Index), operator()(Index,Index) const, x() const, y() const,\n      * z() const, w() const\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    operator[](Index index) const\n    {\n      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,\n                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)\n      eigen_assert(index >= 0 && index < size());\n      return coeff(index);\n    }\n\n    /** \\returns the coefficient at given index.\n      *\n      * This is synonymous to operator[](Index) const.\n      *\n      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.\n      *\n      * \\sa operator[](Index), operator()(Index,Index) const, x() const, y() const,\n      * z() const, w() const\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    operator()(Index index) const\n    {\n      eigen_assert(index >= 0 && index < size());\n      return coeff(index);\n    }\n\n    /** equivalent to operator[](0).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    x() const { return (*this)[0]; }\n\n    /** equivalent to operator[](1).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    y() const\n    {\n      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);\n      return (*this)[1];\n    }\n\n    /** equivalent to operator[](2).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    z() const\n    {\n      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);\n      return (*this)[2];\n    }\n\n    /** equivalent to operator[](3).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE CoeffReturnType\n    w() const\n    {\n      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);\n      return (*this)[3];\n    }\n\n    /** \\internal\n      * \\returns the packet of coefficients starting at the given row and column. It is your responsibility\n      * to ensure that a packet really starts there. This method is only available on expressions having the\n      * PacketAccessBit.\n      *\n      * The \\a LoadMode parameter may have the value \\a #Aligned or \\a #Unaligned. Its effect is to select\n      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets\n      * starting at an address which is a multiple of the packet size.\n      */\n\n    template<int LoadMode>\n    EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const\n    {\n      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;\n      eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());\n      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);\n    }\n\n\n    /** \\internal */\n    template<int LoadMode>\n    EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const\n    {\n      return packet<LoadMode>(rowIndexByOuterInner(outer, inner),\n                              colIndexByOuterInner(outer, inner));\n    }\n\n    /** \\internal\n      * \\returns the packet of coefficients starting at the given index. It is your responsibility\n      * to ensure that a packet really starts there. This method is only available on expressions having the\n      * PacketAccessBit and the LinearAccessBit.\n      *\n      * The \\a LoadMode parameter may have the value \\a #Aligned or \\a #Unaligned. Its effect is to select\n      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets\n      * starting at an address which is a multiple of the packet size.\n      */\n\n    template<int LoadMode>\n    EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const\n    {\n      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,\n                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)\n      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;\n      eigen_internal_assert(index >= 0 && index < size());\n      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);\n    }\n\n  protected:\n    // explanation: DenseBase is doing \"using ...\" on the methods from DenseCoeffsBase.\n    // But some methods are only available in the DirectAccess case.\n    // So we add dummy methods here with these names, so that \"using... \" doesn't fail.\n    // It's not private so that the child class DenseBase can access them, and it's not public\n    // either since it's an implementation detail, so has to be protected.\n    void coeffRef();\n    void coeffRefByOuterInner();\n    void writePacket();\n    void writePacketByOuterInner();\n    void copyCoeff();\n    void copyCoeffByOuterInner();\n    void copyPacket();\n    void copyPacketByOuterInner();\n    void stride();\n    void innerStride();\n    void outerStride();\n    void rowStride();\n    void colStride();\n};\n\n/** \\brief Base class providing read/write coefficient access to matrices and arrays.\n  * \\ingroup Core_Module\n  * \\tparam Derived Type of the derived class\n  *\n  * \\note #WriteAccessors Constant indicating read/write access\n  *\n  * This class defines the non-const \\c operator() function and friends, which can be used to write specific\n  * entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which\n  * defines the const variant for reading specific entries.\n  *\n  * \\sa DenseCoeffsBase<Derived, DirectAccessors>, \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived>\nclass DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>\n{\n  public:\n\n    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;\n\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    using Base::coeff;\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::derived;\n    using Base::rowIndexByOuterInner;\n    using Base::colIndexByOuterInner;\n    using Base::operator[];\n    using Base::operator();\n    using Base::x;\n    using Base::y;\n    using Base::z;\n    using Base::w;\n\n    /** Short version: don't use this function, use\n      * \\link operator()(Index,Index) \\endlink instead.\n      *\n      * Long version: this function is similar to\n      * \\link operator()(Index,Index) \\endlink, but without the assertion.\n      * Use this for limiting the performance cost of debugging code when doing\n      * repeated coefficient access. Only use this when it is guaranteed that the\n      * parameters \\a row and \\a col are in range.\n      *\n      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this\n      * function equivalent to \\link operator()(Index,Index) \\endlink.\n      *\n      * \\sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)\n    {\n      eigen_internal_assert(row >= 0 && row < rows()\n                         && col >= 0 && col < cols());\n      return internal::evaluator<Derived>(derived()).coeffRef(row,col);\n    }\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    coeffRefByOuterInner(Index outer, Index inner)\n    {\n      return coeffRef(rowIndexByOuterInner(outer, inner),\n                      colIndexByOuterInner(outer, inner));\n    }\n\n    /** \\returns a reference to the coefficient at given the given row and column.\n      *\n      * \\sa operator[](Index)\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    operator()(Index row, Index col)\n    {\n      eigen_assert(row >= 0 && row < rows()\n          && col >= 0 && col < cols());\n      return coeffRef(row, col);\n    }\n\n\n    /** Short version: don't use this function, use\n      * \\link operator[](Index) \\endlink instead.\n      *\n      * Long version: this function is similar to\n      * \\link operator[](Index) \\endlink, but without the assertion.\n      * Use this for limiting the performance cost of debugging code when doing\n      * repeated coefficient access. Only use this when it is guaranteed that the\n      * parameters \\a row and \\a col are in range.\n      *\n      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this\n      * function equivalent to \\link operator[](Index) \\endlink.\n      *\n      * \\sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    coeffRef(Index index)\n    {\n      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,\n                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)\n      eigen_internal_assert(index >= 0 && index < size());\n      return internal::evaluator<Derived>(derived()).coeffRef(index);\n    }\n\n    /** \\returns a reference to the coefficient at given index.\n      *\n      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.\n      *\n      * \\sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    operator[](Index index)\n    {\n      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,\n                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)\n      eigen_assert(index >= 0 && index < size());\n      return coeffRef(index);\n    }\n\n    /** \\returns a reference to the coefficient at given index.\n      *\n      * This is synonymous to operator[](Index).\n      *\n      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.\n      *\n      * \\sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()\n      */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    operator()(Index index)\n    {\n      eigen_assert(index >= 0 && index < size());\n      return coeffRef(index);\n    }\n\n    /** equivalent to operator[](0).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    x() { return (*this)[0]; }\n\n    /** equivalent to operator[](1).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    y()\n    {\n      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);\n      return (*this)[1];\n    }\n\n    /** equivalent to operator[](2).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    z()\n    {\n      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);\n      return (*this)[2];\n    }\n\n    /** equivalent to operator[](3).  */\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar&\n    w()\n    {\n      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);\n      return (*this)[3];\n    }\n};\n\n/** \\brief Base class providing direct read-only coefficient access to matrices and arrays.\n  * \\ingroup Core_Module\n  * \\tparam Derived Type of the derived class\n  *\n  * \\note #DirectAccessors Constant indicating direct access\n  *\n  * This class defines functions to work with strides which can be used to access entries directly. This class\n  * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using\n  * \\c operator() .\n  *\n  * \\sa \\blank \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived>\nclass DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>\n{\n  public:\n\n    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::derived;\n\n    /** \\returns the pointer increment between two consecutive elements within a slice in the inner direction.\n      *\n      * \\sa outerStride(), rowStride(), colStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const\n    {\n      return derived().innerStride();\n    }\n\n    /** \\returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns\n      *          in a column-major matrix).\n      *\n      * \\sa innerStride(), rowStride(), colStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const\n    {\n      return derived().outerStride();\n    }\n\n    // FIXME shall we remove it ?\n    EIGEN_CONSTEXPR inline Index stride() const\n    {\n      return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();\n    }\n\n    /** \\returns the pointer increment between two consecutive rows.\n      *\n      * \\sa innerStride(), outerStride(), colStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rowStride() const\n    {\n      return Derived::IsRowMajor ? outerStride() : innerStride();\n    }\n\n    /** \\returns the pointer increment between two consecutive columns.\n      *\n      * \\sa innerStride(), outerStride(), rowStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index colStride() const\n    {\n      return Derived::IsRowMajor ? innerStride() : outerStride();\n    }\n};\n\n/** \\brief Base class providing direct read/write coefficient access to matrices and arrays.\n  * \\ingroup Core_Module\n  * \\tparam Derived Type of the derived class\n  *\n  * \\note #DirectWriteAccessors Constant indicating direct access\n  *\n  * This class defines functions to work with strides which can be used to access entries directly. This class\n  * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using\n  * \\c operator().\n  *\n  * \\sa \\blank \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived>\nclass DenseCoeffsBase<Derived, DirectWriteAccessors>\n  : public DenseCoeffsBase<Derived, WriteAccessors>\n{\n  public:\n\n    typedef DenseCoeffsBase<Derived, WriteAccessors> Base;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::derived;\n\n    /** \\returns the pointer increment between two consecutive elements within a slice in the inner direction.\n      *\n      * \\sa outerStride(), rowStride(), colStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT\n    {\n      return derived().innerStride();\n    }\n\n    /** \\returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns\n      *          in a column-major matrix).\n      *\n      * \\sa innerStride(), rowStride(), colStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT\n    {\n      return derived().outerStride();\n    }\n\n    // FIXME shall we remove it ?\n    EIGEN_CONSTEXPR inline Index stride() const EIGEN_NOEXCEPT\n    {\n      return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();\n    }\n\n    /** \\returns the pointer increment between two consecutive rows.\n      *\n      * \\sa innerStride(), outerStride(), colStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rowStride() const EIGEN_NOEXCEPT\n    {\n      return Derived::IsRowMajor ? outerStride() : innerStride();\n    }\n\n    /** \\returns the pointer increment between two consecutive columns.\n      *\n      * \\sa innerStride(), outerStride(), rowStride()\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index colStride() const EIGEN_NOEXCEPT\n    {\n      return Derived::IsRowMajor ? innerStride() : outerStride();\n    }\n};\n\nnamespace internal {\n\ntemplate<int Alignment, typename Derived, bool JustReturnZero>\nstruct first_aligned_impl\n{\n  static EIGEN_CONSTEXPR inline Index run(const Derived&) EIGEN_NOEXCEPT\n  { return 0; }\n};\n\ntemplate<int Alignment, typename Derived>\nstruct first_aligned_impl<Alignment, Derived, false>\n{\n  static inline Index run(const Derived& m)\n  {\n    return internal::first_aligned<Alignment>(m.data(), m.size());\n  }\n};\n\n/** \\internal \\returns the index of the first element of the array stored by \\a m that is properly aligned with respect to \\a Alignment for vectorization.\n  *\n  * \\tparam Alignment requested alignment in Bytes.\n  *\n  * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more\n  * documentation.\n  */\ntemplate<int Alignment, typename Derived>\nstatic inline Index first_aligned(const DenseBase<Derived>& m)\n{\n  enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };\n  return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());\n}\n\ntemplate<typename Derived>\nstatic inline Index first_default_aligned(const DenseBase<Derived>& m)\n{\n  typedef typename Derived::Scalar Scalar;\n  typedef typename packet_traits<Scalar>::type DefaultPacketType;\n  return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);\n}\n\ntemplate<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>\nstruct inner_stride_at_compile_time\n{\n  enum { ret = traits<Derived>::InnerStrideAtCompileTime };\n};\n\ntemplate<typename Derived>\nstruct inner_stride_at_compile_time<Derived, false>\n{\n  enum { ret = 0 };\n};\n\ntemplate<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>\nstruct outer_stride_at_compile_time\n{\n  enum { ret = traits<Derived>::OuterStrideAtCompileTime };\n};\n\ntemplate<typename Derived>\nstruct outer_stride_at_compile_time<Derived, false>\n{\n  enum { ret = 0 };\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_DENSECOEFFSBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/DenseStorage.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATRIXSTORAGE_H\n#define EIGEN_MATRIXSTORAGE_H\n\n#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;\n#else\n  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\nstruct constructor_without_unaligned_array_assert {};\n\ntemplate<typename T, int Size>\nEIGEN_DEVICE_FUNC\nvoid check_static_allocation_size()\n{\n  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit\n  #if EIGEN_STACK_ALLOCATION_LIMIT\n  EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);\n  #endif\n}\n\n/** \\internal\n  * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:\n  * to 16 bytes boundary if the total size is a multiple of 16 bytes.\n  */\ntemplate <typename T, int Size, int MatrixOrArrayOptions,\n          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0\n                        : compute_default_alignment<T,Size>::value >\nstruct plain_array\n{\n  T array[Size];\n\n  EIGEN_DEVICE_FUNC\n  plain_array()\n  {\n    check_static_allocation_size<T,Size>();\n  }\n\n  EIGEN_DEVICE_FUNC\n  plain_array(constructor_without_unaligned_array_assert)\n  {\n    check_static_allocation_size<T,Size>();\n  }\n};\n\n#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)\n  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)\n#elif EIGEN_GNUC_AT_LEAST(4,7)\n  // GCC 4.7 is too aggressive in its optimizations and remove the alignment test based on the fact the array is declared to be aligned.\n  // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900\n  // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined:\n  template<typename PtrType>\n  EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }\n  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \\\n    eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \\\n              && \"this assertion is explained here: \" \\\n              \"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html\" \\\n              \" **** READ THIS WEB PAGE !!! ****\");\n#else\n  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \\\n    eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \\\n              && \"this assertion is explained here: \" \\\n              \"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html\" \\\n              \" **** READ THIS WEB PAGE !!! ****\");\n#endif\n\ntemplate <typename T, int Size, int MatrixOrArrayOptions>\nstruct plain_array<T, Size, MatrixOrArrayOptions, 8>\n{\n  EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];\n\n  EIGEN_DEVICE_FUNC\n  plain_array()\n  {\n    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);\n    check_static_allocation_size<T,Size>();\n  }\n\n  EIGEN_DEVICE_FUNC\n  plain_array(constructor_without_unaligned_array_assert)\n  {\n    check_static_allocation_size<T,Size>();\n  }\n};\n\ntemplate <typename T, int Size, int MatrixOrArrayOptions>\nstruct plain_array<T, Size, MatrixOrArrayOptions, 16>\n{\n  EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];\n\n  EIGEN_DEVICE_FUNC\n  plain_array()\n  {\n    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);\n    check_static_allocation_size<T,Size>();\n  }\n\n  EIGEN_DEVICE_FUNC\n  plain_array(constructor_without_unaligned_array_assert)\n  {\n    check_static_allocation_size<T,Size>();\n  }\n};\n\ntemplate <typename T, int Size, int MatrixOrArrayOptions>\nstruct plain_array<T, Size, MatrixOrArrayOptions, 32>\n{\n  EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];\n\n  EIGEN_DEVICE_FUNC\n  plain_array()\n  {\n    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);\n    check_static_allocation_size<T,Size>();\n  }\n\n  EIGEN_DEVICE_FUNC\n  plain_array(constructor_without_unaligned_array_assert)\n  {\n    check_static_allocation_size<T,Size>();\n  }\n};\n\ntemplate <typename T, int Size, int MatrixOrArrayOptions>\nstruct plain_array<T, Size, MatrixOrArrayOptions, 64>\n{\n  EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];\n\n  EIGEN_DEVICE_FUNC\n  plain_array()\n  {\n    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);\n    check_static_allocation_size<T,Size>();\n  }\n\n  EIGEN_DEVICE_FUNC\n  plain_array(constructor_without_unaligned_array_assert)\n  {\n    check_static_allocation_size<T,Size>();\n  }\n};\n\ntemplate <typename T, int MatrixOrArrayOptions, int Alignment>\nstruct plain_array<T, 0, MatrixOrArrayOptions, Alignment>\n{\n  T array[1];\n  EIGEN_DEVICE_FUNC plain_array() {}\n  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}\n};\n\nstruct plain_array_helper {\n  template<typename T, int Size, int MatrixOrArrayOptions, int Alignment>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  static void copy(const plain_array<T, Size, MatrixOrArrayOptions, Alignment>& src, const Eigen::Index size,\n                         plain_array<T, Size, MatrixOrArrayOptions, Alignment>& dst) {\n    smart_copy(src.array, src.array + size, dst.array);\n  }\n  \n  template<typename T, int Size, int MatrixOrArrayOptions, int Alignment>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  static void swap(plain_array<T, Size, MatrixOrArrayOptions, Alignment>& a, const Eigen::Index a_size,\n                   plain_array<T, Size, MatrixOrArrayOptions, Alignment>& b, const Eigen::Index b_size) {\n    if (a_size < b_size) {\n      std::swap_ranges(b.array, b.array + a_size, a.array);\n      smart_move(b.array + a_size, b.array + b_size, a.array + a_size);\n    } else if (a_size > b_size) {\n      std::swap_ranges(a.array, a.array + b_size, b.array);\n      smart_move(a.array + b_size, a.array + a_size, b.array + b_size);\n    } else {\n      std::swap_ranges(a.array, a.array + a_size, b.array);\n    }\n  }\n};\n\n} // end namespace internal\n\n/** \\internal\n  *\n  * \\class DenseStorage\n  * \\ingroup Core_Module\n  *\n  * \\brief Stores the data of a matrix\n  *\n  * This class stores the data of fixed-size, dynamic-size or mixed matrices\n  * in a way as compact as possible.\n  *\n  * \\sa Matrix\n  */\ntemplate<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;\n\n// purely fixed-size matrix\ntemplate<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage\n{\n    internal::plain_array<T,Size,_Options> m_data;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)\n    }\n    EIGEN_DEVICE_FUNC\n    explicit DenseStorage(internal::constructor_without_unaligned_array_assert)\n      : m_data(internal::constructor_without_unaligned_array_assert()) {}\n#if !EIGEN_HAS_CXX11 || defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)\n    EIGEN_DEVICE_FUNC\n    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)\n    }\n#else\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) = default;\n#endif\n#if !EIGEN_HAS_CXX11\n    EIGEN_DEVICE_FUNC\n    DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other) m_data = other.m_data;\n      return *this;\n    }\n#else\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) = default;\n#endif\n#if EIGEN_HAS_RVALUE_REFERENCES\n#if !EIGEN_HAS_CXX11\n    EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT\n      : m_data(std::move(other.m_data))\n    {\n    }\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT\n    {\n      if (this != &other)\n        m_data = std::move(other.m_data);\n      return *this;\n    }\n#else\n    EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&&) = default;\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&&) = default;\n#endif\n#endif\n    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);\n      EIGEN_UNUSED_VARIABLE(size);\n      EIGEN_UNUSED_VARIABLE(rows);\n      EIGEN_UNUSED_VARIABLE(cols);\n    }\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {\n      numext::swap(m_data, other.m_data);\n    }\n    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}\n    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}\n    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}\n    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }\n};\n\n// null matrix\ntemplate<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>\n{\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() {}\n    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }\n    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}\n    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}\n    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return _Cols;}\n    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}\n    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}\n    EIGEN_DEVICE_FUNC const T *data() const { return 0; }\n    EIGEN_DEVICE_FUNC T *data() { return 0; }\n};\n\n// more specializations for null matrices; these are necessary to resolve ambiguities\ntemplate<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>\n: public DenseStorage<T, 0, 0, 0, _Options> { };\n\ntemplate<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>\n: public DenseStorage<T, 0, 0, 0, _Options> { };\n\ntemplate<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>\n: public DenseStorage<T, 0, 0, 0, _Options> { };\n\n// dynamic-size matrix with fixed-size storage\ntemplate<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>\n{\n    internal::plain_array<T,Size,_Options> m_data;\n    Index m_rows;\n    Index m_cols;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}\n    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)\n      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)\n      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols)\n    {\n      internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other)\n      {\n        m_rows = other.m_rows;\n        m_cols = other.m_cols;\n        internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);\n      }\n      return *this;\n    }\n    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)\n    {\n      internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols);\n      numext::swap(m_rows,other.m_rows);\n      numext::swap(m_cols,other.m_cols);\n    }\n    EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}\n    EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}\n    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }\n    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }\n};\n\n// dynamic-size matrix with fixed-size storage and fixed width\ntemplate<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>\n{\n    internal::plain_array<T,Size,_Options> m_data;\n    Index m_rows;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}\n    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)\n      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)\n      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)\n    {\n      internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);\n    }\n    \n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other)\n      {\n        m_rows = other.m_rows;\n        internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);\n      }\n      return *this;\n    }\n    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)\n    { \n      internal::plain_array_helper::swap(m_data, m_rows * _Cols, other.m_data, other.m_rows * _Cols);\n      numext::swap(m_rows, other.m_rows);\n    }\n    EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return _Cols;}\n    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }\n    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }\n};\n\n// dynamic-size matrix with fixed-size storage and fixed height\ntemplate<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>\n{\n    internal::plain_array<T,Size,_Options> m_data;\n    Index m_cols;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}\n    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)\n      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) \n      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols)\n    {\n      internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other)\n      {\n        m_cols = other.m_cols;\n        internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);\n      }\n      return *this;\n    }\n    EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {\n      internal::plain_array_helper::swap(m_data, _Rows * m_cols, other.m_data, _Rows * other.m_cols);\n      numext::swap(m_cols, other.m_cols);\n    }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return _Rows;}\n    EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}\n    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }\n    EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }\n};\n\n// purely dynamic matrix.\ntemplate<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>\n{\n    T *m_data;\n    Index m_rows;\n    Index m_cols;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}\n    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)\n       : m_data(0), m_rows(0), m_cols(0) {}\n    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)\n      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)\n    {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)\n      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))\n      , m_rows(other.m_rows)\n      , m_cols(other.m_cols)\n    {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)\n      internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other)\n      {\n        DenseStorage tmp(other);\n        this->swap(tmp);\n      }\n      return *this;\n    }\n#if EIGEN_HAS_RVALUE_REFERENCES\n    EIGEN_DEVICE_FUNC\n    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT\n      : m_data(std::move(other.m_data))\n      , m_rows(std::move(other.m_rows))\n      , m_cols(std::move(other.m_cols))\n    {\n      other.m_data = nullptr;\n      other.m_rows = 0;\n      other.m_cols = 0;\n    }\n    EIGEN_DEVICE_FUNC\n    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT\n    {\n      numext::swap(m_data, other.m_data);\n      numext::swap(m_rows, other.m_rows);\n      numext::swap(m_cols, other.m_cols);\n      return *this;\n    }\n#endif\n    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)\n    {\n      numext::swap(m_data,other.m_data);\n      numext::swap(m_rows,other.m_rows);\n      numext::swap(m_cols,other.m_cols);\n    }\n    EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}\n    EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}\n    void conservativeResize(Index size, Index rows, Index cols)\n    {\n      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);\n      m_rows = rows;\n      m_cols = cols;\n    }\n    EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)\n    {\n      if(size != m_rows*m_cols)\n      {\n        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);\n        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative\n          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);\n        else\n          m_data = 0;\n        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      }\n      m_rows = rows;\n      m_cols = cols;\n    }\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data; }\n};\n\n// matrix with dynamic width and fixed height (so that matrix has dynamic size).\ntemplate<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>\n{\n    T *m_data;\n    Index m_cols;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}\n    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}\n    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)\n    {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);\n      EIGEN_UNUSED_VARIABLE(rows);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)\n      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))\n      , m_cols(other.m_cols)\n    {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)\n      internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other)\n      {\n        DenseStorage tmp(other);\n        this->swap(tmp);\n      }\n      return *this;\n    }\n#if EIGEN_HAS_RVALUE_REFERENCES\n    EIGEN_DEVICE_FUNC\n    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT\n      : m_data(std::move(other.m_data))\n      , m_cols(std::move(other.m_cols))\n    {\n      other.m_data = nullptr;\n      other.m_cols = 0;\n    }\n    EIGEN_DEVICE_FUNC\n    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT\n    {\n      numext::swap(m_data, other.m_data);\n      numext::swap(m_cols, other.m_cols);\n      return *this;\n    }\n#endif\n    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {\n      numext::swap(m_data,other.m_data);\n      numext::swap(m_cols,other.m_cols);\n    }\n    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return _Rows;}\n    EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}\n    EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)\n    {\n      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);\n      m_cols = cols;\n    }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)\n    {\n      if(size != _Rows*m_cols)\n      {\n        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);\n        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative\n          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);\n        else\n          m_data = 0;\n        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      }\n      m_cols = cols;\n    }\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data; }\n};\n\n// matrix with dynamic height and fixed width (so that matrix has dynamic size).\ntemplate<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>\n{\n    T *m_data;\n    Index m_rows;\n  public:\n    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}\n    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}\n    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)\n    {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);\n      EIGEN_UNUSED_VARIABLE(cols);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)\n      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))\n      , m_rows(other.m_rows)\n    {\n      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)\n      internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);\n    }\n    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)\n    {\n      if (this != &other)\n      {\n        DenseStorage tmp(other);\n        this->swap(tmp);\n      }\n      return *this;\n    }\n#if EIGEN_HAS_RVALUE_REFERENCES\n    EIGEN_DEVICE_FUNC\n    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT\n      : m_data(std::move(other.m_data))\n      , m_rows(std::move(other.m_rows))\n    {\n      other.m_data = nullptr;\n      other.m_rows = 0;\n    }\n    EIGEN_DEVICE_FUNC\n    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT\n    {\n      numext::swap(m_data, other.m_data);\n      numext::swap(m_rows, other.m_rows);\n      return *this;\n    }\n#endif\n    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }\n    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {\n      numext::swap(m_data,other.m_data);\n      numext::swap(m_rows,other.m_rows);\n    }\n    EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}\n    EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) {return _Cols;}\n    void conservativeResize(Index size, Index rows, Index)\n    {\n      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);\n      m_rows = rows;\n    }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)\n    {\n      if(size != m_rows*_Cols)\n      {\n        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);\n        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative\n          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);\n        else\n          m_data = 0;\n        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})\n      }\n      m_rows = rows;\n    }\n    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }\n    EIGEN_DEVICE_FUNC T *data() { return m_data; }\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Diagonal.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DIAGONAL_H\n#define EIGEN_DIAGONAL_H\n\nnamespace Eigen {\n\n/** \\class Diagonal\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix\n  *\n  * \\param MatrixType the type of the object in which we are taking a sub/main/super diagonal\n  * \\param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.\n  *              A positive value means a superdiagonal, a negative value means a subdiagonal.\n  *              You can also use DynamicIndex so the index can be set at runtime.\n  *\n  * The matrix is not required to be square.\n  *\n  * This class represents an expression of the main diagonal, or any sub/super diagonal\n  * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the\n  * time this is the only way it is used.\n  *\n  * \\sa MatrixBase::diagonal(), MatrixBase::diagonal(Index)\n  */\n\nnamespace internal {\ntemplate<typename MatrixType, int DiagIndex>\nstruct traits<Diagonal<MatrixType,DiagIndex> >\n : traits<MatrixType>\n{\n  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;\n  typedef typename MatrixType::StorageKind StorageKind;\n  enum {\n    RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic\n                      : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),\n                                              MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),\n    ColsAtCompileTime = 1,\n    MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic\n                         : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,\n                                                                              MatrixType::MaxColsAtCompileTime)\n                         : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),\n                                                 MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),\n    MaxColsAtCompileTime = 1,\n    MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,\n    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions\n    MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,\n    InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,\n    OuterStrideAtCompileTime = 0\n  };\n};\n}\n\ntemplate<typename MatrixType, int _DiagIndex> class Diagonal\n   : public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type\n{\n  public:\n\n    enum { DiagIndex = _DiagIndex };\n    typedef typename internal::dense_xpr_base<Diagonal>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)\n\n    EIGEN_DEVICE_FUNC\n    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)\n    {\n      eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );\n    }\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)\n\n    EIGEN_DEVICE_FUNC\n    inline Index rows() const\n    {\n      return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())\n                               : numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return 1; }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT {\n      return m_matrix.outerStride() + 1;\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return 0; }\n\n    typedef typename internal::conditional<\n                       internal::is_lvalue<MatrixType>::value,\n                       Scalar,\n                       const Scalar\n                     >::type ScalarWithConstIfNotLvalue;\n\n    EIGEN_DEVICE_FUNC\n    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }\n    EIGEN_DEVICE_FUNC\n    inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }\n\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index row, Index)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)\n      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index row, Index) const\n    {\n      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline CoeffReturnType coeff(Index row, Index) const\n    {\n      return m_matrix.coeff(row+rowOffset(), row+colOffset());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index idx)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)\n      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index idx) const\n    {\n      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline CoeffReturnType coeff(Index idx) const\n    {\n      return m_matrix.coeff(idx+rowOffset(), idx+colOffset());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline const typename internal::remove_all<typename MatrixType::Nested>::type&\n    nestedExpression() const\n    {\n      return m_matrix;\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline Index index() const\n    {\n      return m_index.value();\n    }\n\n  protected:\n    typename internal::ref_selector<MatrixType>::non_const_type m_matrix;\n    const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;\n\n  private:\n    // some compilers may fail to optimize std::max etc in case of compile-time constants...\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index absDiagIndex() const EIGEN_NOEXCEPT { return m_index.value()>0 ? m_index.value() : -m_index.value(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rowOffset() const EIGEN_NOEXCEPT { return m_index.value()>0 ? 0 : -m_index.value(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index colOffset() const EIGEN_NOEXCEPT { return m_index.value()>0 ? m_index.value() : 0; }\n    // trigger a compile-time error if someone try to call packet\n    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;\n    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;\n};\n\n/** \\returns an expression of the main diagonal of the matrix \\c *this\n  *\n  * \\c *this is not required to be square.\n  *\n  * Example: \\include MatrixBase_diagonal.cpp\n  * Output: \\verbinclude MatrixBase_diagonal.out\n  *\n  * \\sa class Diagonal */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType\nMatrixBase<Derived>::diagonal()\n{\n  return DiagonalReturnType(derived());\n}\n\n/** This is the const version of diagonal(). */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType\nMatrixBase<Derived>::diagonal() const\n{\n  return ConstDiagonalReturnType(derived());\n}\n\n/** \\returns an expression of the \\a DiagIndex-th sub or super diagonal of the matrix \\c *this\n  *\n  * \\c *this is not required to be square.\n  *\n  * The template parameter \\a DiagIndex represent a super diagonal if \\a DiagIndex > 0\n  * and a sub diagonal otherwise. \\a DiagIndex == 0 is equivalent to the main diagonal.\n  *\n  * Example: \\include MatrixBase_diagonal_int.cpp\n  * Output: \\verbinclude MatrixBase_diagonal_int.out\n  *\n  * \\sa MatrixBase::diagonal(), class Diagonal */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType\nMatrixBase<Derived>::diagonal(Index index)\n{\n  return DiagonalDynamicIndexReturnType(derived(), index);\n}\n\n/** This is the const version of diagonal(Index). */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType\nMatrixBase<Derived>::diagonal(Index index) const\n{\n  return ConstDiagonalDynamicIndexReturnType(derived(), index);\n}\n\n/** \\returns an expression of the \\a DiagIndex-th sub or super diagonal of the matrix \\c *this\n  *\n  * \\c *this is not required to be square.\n  *\n  * The template parameter \\a DiagIndex represent a super diagonal if \\a DiagIndex > 0\n  * and a sub diagonal otherwise. \\a DiagIndex == 0 is equivalent to the main diagonal.\n  *\n  * Example: \\include MatrixBase_diagonal_template_int.cpp\n  * Output: \\verbinclude MatrixBase_diagonal_template_int.out\n  *\n  * \\sa MatrixBase::diagonal(), class Diagonal */\ntemplate<typename Derived>\ntemplate<int Index_>\nEIGEN_DEVICE_FUNC\ninline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type\nMatrixBase<Derived>::diagonal()\n{\n  return typename DiagonalIndexReturnType<Index_>::Type(derived());\n}\n\n/** This is the const version of diagonal<int>(). */\ntemplate<typename Derived>\ntemplate<int Index_>\nEIGEN_DEVICE_FUNC\ninline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type\nMatrixBase<Derived>::diagonal() const\n{\n  return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_DIAGONAL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/DiagonalMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DIAGONALMATRIX_H\n#define EIGEN_DIAGONALMATRIX_H\n\nnamespace Eigen { \n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename Derived>\nclass DiagonalBase : public EigenBase<Derived>\n{\n  public:\n    typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;\n    typedef typename DiagonalVectorType::Scalar Scalar;\n    typedef typename DiagonalVectorType::RealScalar RealScalar;\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;\n\n    enum {\n      RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,\n      ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,\n      MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,\n      MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,\n      IsVectorAtCompileTime = 0,\n      Flags = NoPreferredStorageOrderBit\n    };\n\n    typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;\n    typedef DenseMatrixType DenseType;\n    typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;\n\n    EIGEN_DEVICE_FUNC\n    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }\n    EIGEN_DEVICE_FUNC\n    inline Derived& derived() { return *static_cast<Derived*>(this); }\n\n    EIGEN_DEVICE_FUNC\n    DenseMatrixType toDenseMatrix() const { return derived(); }\n\n    EIGEN_DEVICE_FUNC\n    inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }\n    EIGEN_DEVICE_FUNC\n    inline DiagonalVectorType& diagonal() { return derived().diagonal(); }\n\n    EIGEN_DEVICE_FUNC\n    inline Index rows() const { return diagonal().size(); }\n    EIGEN_DEVICE_FUNC\n    inline Index cols() const { return diagonal().size(); }\n\n    template<typename MatrixDerived>\n    EIGEN_DEVICE_FUNC\n    const Product<Derived,MatrixDerived,LazyProduct>\n    operator*(const MatrixBase<MatrixDerived> &matrix) const\n    {\n      return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());\n    }\n\n    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;\n    EIGEN_DEVICE_FUNC\n    inline const InverseReturnType\n    inverse() const\n    {\n      return InverseReturnType(diagonal().cwiseInverse());\n    }\n    \n    EIGEN_DEVICE_FUNC\n    inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >\n    operator*(const Scalar& scalar) const\n    {\n      return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);\n    }\n    EIGEN_DEVICE_FUNC\n    friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >\n    operator*(const Scalar& scalar, const DiagonalBase& other)\n    {\n      return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());\n    }\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    inline unspecified_expression_type\n    #else\n    inline const DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(DiagonalVectorType,typename OtherDerived::DiagonalVectorType,sum) >\n    #endif\n    operator+(const DiagonalBase<OtherDerived>& other) const\n    {\n      return (diagonal() + other.diagonal()).asDiagonal();\n    }\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    inline unspecified_expression_type\n    #else\n    inline const DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(DiagonalVectorType,typename OtherDerived::DiagonalVectorType,difference) >\n    #endif\n    operator-(const DiagonalBase<OtherDerived>& other) const\n    {\n      return (diagonal() - other.diagonal()).asDiagonal();\n    }\n};\n\n#endif\n\n/** \\class DiagonalMatrix\n  * \\ingroup Core_Module\n  *\n  * \\brief Represents a diagonal matrix with its storage\n  *\n  * \\param _Scalar the type of coefficients\n  * \\param SizeAtCompileTime the dimension of the matrix, or Dynamic\n  * \\param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults\n  *        to SizeAtCompileTime. Most of the time, you do not need to specify it.\n  *\n  * \\sa class DiagonalWrapper\n  */\n\nnamespace internal {\ntemplate<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>\nstruct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >\n : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >\n{\n  typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;\n  typedef DiagonalShape StorageKind;\n  enum {\n    Flags = LvalueBit | NoPreferredStorageOrderBit\n  };\n};\n}\ntemplate<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>\nclass DiagonalMatrix\n  : public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >\n{\n  public:\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;\n    typedef const DiagonalMatrix& Nested;\n    typedef _Scalar Scalar;\n    typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;\n    typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;\n    #endif\n\n  protected:\n\n    DiagonalVectorType m_diagonal;\n\n  public:\n\n    /** const version of diagonal(). */\n    EIGEN_DEVICE_FUNC\n    inline const DiagonalVectorType& diagonal() const { return m_diagonal; }\n    /** \\returns a reference to the stored vector of diagonal coefficients. */\n    EIGEN_DEVICE_FUNC\n    inline DiagonalVectorType& diagonal() { return m_diagonal; }\n\n    /** Default constructor without initialization */\n    EIGEN_DEVICE_FUNC\n    inline DiagonalMatrix() {}\n\n    /** Constructs a diagonal matrix with given dimension  */\n    EIGEN_DEVICE_FUNC\n    explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}\n\n    /** 2D constructor. */\n    EIGEN_DEVICE_FUNC\n    inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}\n\n    /** 3D constructor. */\n    EIGEN_DEVICE_FUNC\n    inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}\n\n    #if EIGEN_HAS_CXX11\n    /** \\brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients. \\cpp11\n      * \n      * There exists C++98 anologue constructors for fixed-size diagonal matrices having 2 or 3 coefficients.\n      * \n      * \\warning To construct a diagonal matrix of fixed size, the number of values passed to this \n      * constructor must match the fixed dimension of \\c *this.\n      * \n      * \\sa DiagonalMatrix(const Scalar&, const Scalar&)\n      * \\sa DiagonalMatrix(const Scalar&, const Scalar&, const Scalar&)\n      */\n    template <typename... ArgTypes>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, const ArgTypes&... args)\n      : m_diagonal(a0, a1, a2, args...) {}\n\n    /** \\brief Constructs a DiagonalMatrix and initializes it by elements given by an initializer list of initializer\n      * lists \\cpp11\n      */\n    EIGEN_DEVICE_FUNC\n    explicit EIGEN_STRONG_INLINE DiagonalMatrix(const std::initializer_list<std::initializer_list<Scalar>>& list)\n      : m_diagonal(list) {}\n    #endif  // EIGEN_HAS_CXX11\n\n    /** Copy constructor. */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */\n    inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {}\n    #endif\n\n    /** generic constructor from expression of the diagonal coefficients */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)\n    {}\n\n    /** Copy operator. */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)\n    {\n      m_diagonal = other.diagonal();\n      return *this;\n    }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** This is a special case of the templated operator=. Its purpose is to\n      * prevent a default operator= from hiding the templated operator=.\n      */\n    EIGEN_DEVICE_FUNC\n    DiagonalMatrix& operator=(const DiagonalMatrix& other)\n    {\n      m_diagonal = other.diagonal();\n      return *this;\n    }\n    #endif\n\n    /** Resizes to given size. */\n    EIGEN_DEVICE_FUNC\n    inline void resize(Index size) { m_diagonal.resize(size); }\n    /** Sets all coefficients to zero. */\n    EIGEN_DEVICE_FUNC\n    inline void setZero() { m_diagonal.setZero(); }\n    /** Resizes and sets all coefficients to zero. */\n    EIGEN_DEVICE_FUNC\n    inline void setZero(Index size) { m_diagonal.setZero(size); }\n    /** Sets this matrix to be the identity matrix of the current size. */\n    EIGEN_DEVICE_FUNC\n    inline void setIdentity() { m_diagonal.setOnes(); }\n    /** Sets this matrix to be the identity matrix of the given size. */\n    EIGEN_DEVICE_FUNC\n    inline void setIdentity(Index size) { m_diagonal.setOnes(size); }\n};\n\n/** \\class DiagonalWrapper\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a diagonal matrix\n  *\n  * \\param _DiagonalVectorType the type of the vector of diagonal coefficients\n  *\n  * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,\n  * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()\n  * and most of the time this is the only way that it is used.\n  *\n  * \\sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()\n  */\n\nnamespace internal {\ntemplate<typename _DiagonalVectorType>\nstruct traits<DiagonalWrapper<_DiagonalVectorType> >\n{\n  typedef _DiagonalVectorType DiagonalVectorType;\n  typedef typename DiagonalVectorType::Scalar Scalar;\n  typedef typename DiagonalVectorType::StorageIndex StorageIndex;\n  typedef DiagonalShape StorageKind;\n  typedef typename traits<DiagonalVectorType>::XprKind XprKind;\n  enum {\n    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,\n    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,\n    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,\n    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,\n    Flags =  (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit\n  };\n};\n}\n\ntemplate<typename _DiagonalVectorType>\nclass DiagonalWrapper\n  : public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator\n{\n  public:\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef _DiagonalVectorType DiagonalVectorType;\n    typedef DiagonalWrapper Nested;\n    #endif\n\n    /** Constructor from expression of diagonal coefficients to wrap. */\n    EIGEN_DEVICE_FUNC\n    explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}\n\n    /** \\returns a const reference to the wrapped expression of diagonal coefficients. */\n    EIGEN_DEVICE_FUNC\n    const DiagonalVectorType& diagonal() const { return m_diagonal; }\n\n  protected:\n    typename DiagonalVectorType::Nested m_diagonal;\n};\n\n/** \\returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include MatrixBase_asDiagonal.cpp\n  * Output: \\verbinclude MatrixBase_asDiagonal.out\n  *\n  * \\sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()\n  **/\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived>\nMatrixBase<Derived>::asDiagonal() const\n{\n  return DiagonalWrapper<const Derived>(derived());\n}\n\n/** \\returns true if *this is approximately equal to a diagonal matrix,\n  *          within the precision given by \\a prec.\n  *\n  * Example: \\include MatrixBase_isDiagonal.cpp\n  * Output: \\verbinclude MatrixBase_isDiagonal.out\n  *\n  * \\sa asDiagonal()\n  */\ntemplate<typename Derived>\nbool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const\n{\n  if(cols() != rows()) return false;\n  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);\n  for(Index j = 0; j < cols(); ++j)\n  {\n    RealScalar absOnDiagonal = numext::abs(coeff(j,j));\n    if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;\n  }\n  for(Index j = 0; j < cols(); ++j)\n    for(Index i = 0; i < j; ++i)\n    {\n      if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;\n      if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;\n    }\n  return true;\n}\n\nnamespace internal {\n\ntemplate<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };\n\nstruct Diagonal2Dense {};\n\ntemplate<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };\n\n// Diagonal matrix to Dense assignment\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>\n{\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n    \n    dst.setZero();\n    dst.diagonal() = src.diagonal();\n  }\n  \n  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  { dst.diagonal() += src.diagonal(); }\n  \n  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  { dst.diagonal() -= src.diagonal(); }\n};\n\n} // namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_DIAGONALMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/DiagonalProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DIAGONALPRODUCT_H\n#define EIGEN_DIAGONALPRODUCT_H\n\nnamespace Eigen { \n\n/** \\returns the diagonal matrix product of \\c *this by the diagonal matrix \\a diagonal.\n  */\ntemplate<typename Derived>\ntemplate<typename DiagonalDerived>\nEIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct>\nMatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const\n{\n  return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_DIAGONALPRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Dot.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DOT_H\n#define EIGEN_DOT_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot\n// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE\n// looking at the static assertions. Thus this is a trick to get better compile errors.\ntemplate<typename T, typename U,\n// the NeedToTranspose condition here is taken straight from Assign.h\n         bool NeedToTranspose = T::IsVectorAtCompileTime\n                && U::IsVectorAtCompileTime\n                && ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)\n                      |  // FIXME | instead of || to please GCC 4.4.0 stupid warning \"suggest parentheses around &&\".\n                         // revert to || as soon as not needed anymore.\n                    (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))\n>\nstruct dot_nocheck\n{\n  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;\n  typedef typename conj_prod::result_type ResScalar;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE\n  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)\n  {\n    return a.template binaryExpr<conj_prod>(b).sum();\n  }\n};\n\ntemplate<typename T, typename U>\nstruct dot_nocheck<T, U, true>\n{\n  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;\n  typedef typename conj_prod::result_type ResScalar;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE\n  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)\n  {\n    return a.transpose().template binaryExpr<conj_prod>(b).sum();\n  }\n};\n\n} // end namespace internal\n\n/** \\fn MatrixBase::dot\n  * \\returns the dot product of *this with other.\n  *\n  * \\only_for_vectors\n  *\n  * \\note If the scalar type is complex numbers, then this function returns the hermitian\n  * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the\n  * second variable.\n  *\n  * \\sa squaredNorm(), norm()\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE\ntypename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType\nMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)\n#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))\n  typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;\n  EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);\n#endif\n  \n  eigen_assert(size() == other.size());\n\n  return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);\n}\n\n//---------- implementation of L2 norm and related functions ----------\n\n/** \\returns, for vectors, the squared \\em l2 norm of \\c *this, and for matrices the squared Frobenius norm.\n  * In both cases, it consists in the sum of the square of all the matrix entries.\n  * For vectors, this is also equals to the dot product of \\c *this with itself.\n  *\n  * \\sa dot(), norm(), lpNorm()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const\n{\n  return numext::real((*this).cwiseAbs2().sum());\n}\n\n/** \\returns, for vectors, the \\em l2 norm of \\c *this, and for matrices the Frobenius norm.\n  * In both cases, it consists in the square root of the sum of the square of all the matrix entries.\n  * For vectors, this is also equals to the square root of the dot product of \\c *this with itself.\n  *\n  * \\sa lpNorm(), dot(), squaredNorm()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const\n{\n  return numext::sqrt(squaredNorm());\n}\n\n/** \\returns an expression of the quotient of \\c *this by its own norm.\n  *\n  * \\warning If the input vector is too small (i.e., this->norm()==0),\n  *          then this function returns a copy of the input.\n  *\n  * \\only_for_vectors\n  *\n  * \\sa norm(), normalize()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject\nMatrixBase<Derived>::normalized() const\n{\n  typedef typename internal::nested_eval<Derived,2>::type _Nested;\n  _Nested n(derived());\n  RealScalar z = n.squaredNorm();\n  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU\n  if(z>RealScalar(0))\n    return n / numext::sqrt(z);\n  else\n    return n;\n}\n\n/** Normalizes the vector, i.e. divides it by its own norm.\n  *\n  * \\only_for_vectors\n  *\n  * \\warning If the input vector is too small (i.e., this->norm()==0), then \\c *this is left unchanged.\n  *\n  * \\sa norm(), normalized()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()\n{\n  RealScalar z = squaredNorm();\n  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU\n  if(z>RealScalar(0))\n    derived() /= numext::sqrt(z);\n}\n\n/** \\returns an expression of the quotient of \\c *this by its own norm while avoiding underflow and overflow.\n  *\n  * \\only_for_vectors\n  *\n  * This method is analogue to the normalized() method, but it reduces the risk of\n  * underflow and overflow when computing the norm.\n  *\n  * \\warning If the input vector is too small (i.e., this->norm()==0),\n  *          then this function returns a copy of the input.\n  *\n  * \\sa stableNorm(), stableNormalize(), normalized()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject\nMatrixBase<Derived>::stableNormalized() const\n{\n  typedef typename internal::nested_eval<Derived,3>::type _Nested;\n  _Nested n(derived());\n  RealScalar w = n.cwiseAbs().maxCoeff();\n  RealScalar z = (n/w).squaredNorm();\n  if(z>RealScalar(0))\n    return n / (numext::sqrt(z)*w);\n  else\n    return n;\n}\n\n/** Normalizes the vector while avoid underflow and overflow\n  *\n  * \\only_for_vectors\n  *\n  * This method is analogue to the normalize() method, but it reduces the risk of\n  * underflow and overflow when computing the norm.\n  *\n  * \\warning If the input vector is too small (i.e., this->norm()==0), then \\c *this is left unchanged.\n  *\n  * \\sa stableNorm(), stableNormalized(), normalize()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()\n{\n  RealScalar w = cwiseAbs().maxCoeff();\n  RealScalar z = (derived()/w).squaredNorm();\n  if(z>RealScalar(0))\n    derived() /= numext::sqrt(z)*w;\n}\n\n//---------- implementation of other norms ----------\n\nnamespace internal {\n\ntemplate<typename Derived, int p>\nstruct lpNorm_selector\n{\n  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const MatrixBase<Derived>& m)\n  {\n    EIGEN_USING_STD(pow)\n    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);\n  }\n};\n\ntemplate<typename Derived>\nstruct lpNorm_selector<Derived, 1>\n{\n  EIGEN_DEVICE_FUNC\n  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)\n  {\n    return m.cwiseAbs().sum();\n  }\n};\n\ntemplate<typename Derived>\nstruct lpNorm_selector<Derived, 2>\n{\n  EIGEN_DEVICE_FUNC\n  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)\n  {\n    return m.norm();\n  }\n};\n\ntemplate<typename Derived>\nstruct lpNorm_selector<Derived, Infinity>\n{\n  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const MatrixBase<Derived>& m)\n  {\n    if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))\n      return RealScalar(0);\n    return m.cwiseAbs().maxCoeff();\n  }\n};\n\n} // end namespace internal\n\n/** \\returns the \\b coefficient-wise \\f$ \\ell^p \\f$ norm of \\c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values\n  *          of the coefficients of \\c *this. If \\a p is the special value \\a Eigen::Infinity, this function returns the \\f$ \\ell^\\infty \\f$\n  *          norm, that is the maximum of the absolute values of the coefficients of \\c *this.\n  *\n  * In all cases, if \\c *this is empty, then the value 0 is returned.\n  *\n  * \\note For matrices, this function does not compute the <a href=\"https://en.wikipedia.org/wiki/Operator_norm\">operator-norm</a>. That is, if \\c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \\f$\\infty\\f$-norm matrix operator norms using \\link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \\endlink.\n  *\n  * \\sa norm()\n  */\ntemplate<typename Derived>\ntemplate<int p>\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\n#else\nEIGEN_DEVICE_FUNC MatrixBase<Derived>::RealScalar\n#endif\nMatrixBase<Derived>::lpNorm() const\n{\n  return internal::lpNorm_selector<Derived, p>::run(*this);\n}\n\n//---------- implementation of isOrthogonal / isUnitary ----------\n\n/** \\returns true if *this is approximately orthogonal to \\a other,\n  *          within the precision given by \\a prec.\n  *\n  * Example: \\include MatrixBase_isOrthogonal.cpp\n  * Output: \\verbinclude MatrixBase_isOrthogonal.out\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nbool MatrixBase<Derived>::isOrthogonal\n(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const\n{\n  typename internal::nested_eval<Derived,2>::type nested(derived());\n  typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());\n  return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();\n}\n\n/** \\returns true if *this is approximately an unitary matrix,\n  *          within the precision given by \\a prec. In the case where the \\a Scalar\n  *          type is real numbers, a unitary matrix is an orthogonal matrix, whence the name.\n  *\n  * \\note This can be used to check whether a family of vectors forms an orthonormal basis.\n  *       Indeed, \\c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an\n  *       orthonormal basis.\n  *\n  * Example: \\include MatrixBase_isUnitary.cpp\n  * Output: \\verbinclude MatrixBase_isUnitary.out\n  */\ntemplate<typename Derived>\nbool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const\n{\n  typename internal::nested_eval<Derived,1>::type self(derived());\n  for(Index i = 0; i < cols(); ++i)\n  {\n    if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))\n      return false;\n    for(Index j = 0; j < i; ++j)\n      if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))\n        return false;\n  }\n  return true;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_DOT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/EigenBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_EIGENBASE_H\n#define EIGEN_EIGENBASE_H\n\nnamespace Eigen {\n\n/** \\class EigenBase\n  * \\ingroup Core_Module\n  *\n  * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).\n  *\n  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.\n  *\n  * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.\n  *\n  * Notice that this class is trivial, it is only used to disambiguate overloaded functions.\n  *\n  * \\sa \\blank \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived> struct EigenBase\n{\n//   typedef typename internal::plain_matrix_type<Derived>::type PlainObject;\n\n  /** \\brief The interface type of indices\n    * \\details To change this, \\c \\#define the preprocessor symbol \\c EIGEN_DEFAULT_DENSE_INDEX_TYPE.\n    * \\sa StorageIndex, \\ref TopicPreprocessorDirectives.\n    * DEPRECATED: Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.\n    * Deprecation is not marked with a doxygen comment because there are too many existing usages to add the deprecation attribute.\n    */\n  typedef Eigen::Index Index;\n\n  // FIXME is it needed?\n  typedef typename internal::traits<Derived>::StorageKind StorageKind;\n\n  /** \\returns a reference to the derived object */\n  EIGEN_DEVICE_FUNC\n  Derived& derived() { return *static_cast<Derived*>(this); }\n  /** \\returns a const reference to the derived object */\n  EIGEN_DEVICE_FUNC\n  const Derived& derived() const { return *static_cast<const Derived*>(this); }\n\n  EIGEN_DEVICE_FUNC\n  inline Derived& const_cast_derived() const\n  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }\n  EIGEN_DEVICE_FUNC\n  inline const Derived& const_derived() const\n  { return *static_cast<const Derived*>(this); }\n\n  /** \\returns the number of rows. \\sa cols(), RowsAtCompileTime */\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  inline Index rows() const EIGEN_NOEXCEPT { return derived().rows(); }\n  /** \\returns the number of columns. \\sa rows(), ColsAtCompileTime*/\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  inline Index cols() const EIGEN_NOEXCEPT { return derived().cols(); }\n  /** \\returns the number of coefficients, which is rows()*cols().\n    * \\sa rows(), cols(), SizeAtCompileTime. */\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  inline Index size() const EIGEN_NOEXCEPT { return rows() * cols(); }\n\n  /** \\internal Don't use it, but do the equivalent: \\code dst = *this; \\endcode */\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC\n  inline void evalTo(Dest& dst) const\n  { derived().evalTo(dst); }\n\n  /** \\internal Don't use it, but do the equivalent: \\code dst += *this; \\endcode */\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC\n  inline void addTo(Dest& dst) const\n  {\n    // This is the default implementation,\n    // derived class can reimplement it in a more optimized way.\n    typename Dest::PlainObject res(rows(),cols());\n    evalTo(res);\n    dst += res;\n  }\n\n  /** \\internal Don't use it, but do the equivalent: \\code dst -= *this; \\endcode */\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC\n  inline void subTo(Dest& dst) const\n  {\n    // This is the default implementation,\n    // derived class can reimplement it in a more optimized way.\n    typename Dest::PlainObject res(rows(),cols());\n    evalTo(res);\n    dst -= res;\n  }\n\n  /** \\internal Don't use it, but do the equivalent: \\code dst.applyOnTheRight(*this); \\endcode */\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const\n  {\n    // This is the default implementation,\n    // derived class can reimplement it in a more optimized way.\n    dst = dst * this->derived();\n  }\n\n  /** \\internal Don't use it, but do the equivalent: \\code dst.applyOnTheLeft(*this); \\endcode */\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const\n  {\n    // This is the default implementation,\n    // derived class can reimplement it in a more optimized way.\n    dst = this->derived() * dst;\n  }\n\n};\n\n/***************************************************************************\n* Implementation of matrix base methods\n***************************************************************************/\n\n/** \\brief Copies the generic expression \\a other into *this.\n  *\n  * \\details The expression must provide a (templated) evalTo(Derived& dst) const\n  * function which does the actual job. In practice, this allows any user to write\n  * its own special matrix without having to modify MatrixBase\n  *\n  * \\returns a reference to *this.\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nDerived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nDerived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nDerived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_EIGENBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ForceAlignedAccess.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_FORCEALIGNEDACCESS_H\n#define EIGEN_FORCEALIGNEDACCESS_H\n\nnamespace Eigen {\n\n/** \\class ForceAlignedAccess\n  * \\ingroup Core_Module\n  *\n  * \\brief Enforce aligned packet loads and stores regardless of what is requested\n  *\n  * \\param ExpressionType the type of the object of which we are forcing aligned packet access\n  *\n  * This class is the return type of MatrixBase::forceAlignedAccess()\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::forceAlignedAccess()\n  */\n\nnamespace internal {\ntemplate<typename ExpressionType>\nstruct traits<ForceAlignedAccess<ExpressionType> > : public traits<ExpressionType>\n{};\n}\n\ntemplate<typename ExpressionType> class ForceAlignedAccess\n  : public internal::dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)\n\n    EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return m_expression.outerStride(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT { return m_expression.innerStride(); }\n\n    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const\n    {\n      return m_expression.coeff(row, col);\n    }\n\n    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)\n    {\n      return m_expression.const_cast_derived().coeffRef(row, col);\n    }\n\n    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const\n    {\n      return m_expression.coeff(index);\n    }\n\n    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)\n    {\n      return m_expression.const_cast_derived().coeffRef(index);\n    }\n\n    template<int LoadMode>\n    inline const PacketScalar packet(Index row, Index col) const\n    {\n      return m_expression.template packet<Aligned>(row, col);\n    }\n\n    template<int LoadMode>\n    inline void writePacket(Index row, Index col, const PacketScalar& x)\n    {\n      m_expression.const_cast_derived().template writePacket<Aligned>(row, col, x);\n    }\n\n    template<int LoadMode>\n    inline const PacketScalar packet(Index index) const\n    {\n      return m_expression.template packet<Aligned>(index);\n    }\n\n    template<int LoadMode>\n    inline void writePacket(Index index, const PacketScalar& x)\n    {\n      m_expression.const_cast_derived().template writePacket<Aligned>(index, x);\n    }\n\n    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }\n\n  protected:\n    const ExpressionType& m_expression;\n\n  private:\n    ForceAlignedAccess& operator=(const ForceAlignedAccess&);\n};\n\n/** \\returns an expression of *this with forced aligned access\n  * \\sa forceAlignedAccessIf(),class ForceAlignedAccess\n  */\ntemplate<typename Derived>\ninline const ForceAlignedAccess<Derived>\nMatrixBase<Derived>::forceAlignedAccess() const\n{\n  return ForceAlignedAccess<Derived>(derived());\n}\n\n/** \\returns an expression of *this with forced aligned access\n  * \\sa forceAlignedAccessIf(), class ForceAlignedAccess\n  */\ntemplate<typename Derived>\ninline ForceAlignedAccess<Derived>\nMatrixBase<Derived>::forceAlignedAccess()\n{\n  return ForceAlignedAccess<Derived>(derived());\n}\n\n/** \\returns an expression of *this with forced aligned access if \\a Enable is true.\n  * \\sa forceAlignedAccess(), class ForceAlignedAccess\n  */\ntemplate<typename Derived>\ntemplate<bool Enable>\ninline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type\nMatrixBase<Derived>::forceAlignedAccessIf() const\n{\n  return derived();  // FIXME This should not work but apparently is never used\n}\n\n/** \\returns an expression of *this with forced aligned access if \\a Enable is true.\n  * \\sa forceAlignedAccess(), class ForceAlignedAccess\n  */\ntemplate<typename Derived>\ntemplate<bool Enable>\ninline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type\nMatrixBase<Derived>::forceAlignedAccessIf()\n{\n  return derived();  // FIXME This should not work but apparently is never used\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_FORCEALIGNEDACCESS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Fuzzy.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_FUZZY_H\n#define EIGEN_FUZZY_H\n\nnamespace Eigen { \n\nnamespace internal\n{\n\ntemplate<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>\nstruct isApprox_selector\n{\n  EIGEN_DEVICE_FUNC\n  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)\n  {\n    typename internal::nested_eval<Derived,2>::type nested(x);\n    typename internal::nested_eval<OtherDerived,2>::type otherNested(y);\n    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());\n  }\n};\n\ntemplate<typename Derived, typename OtherDerived>\nstruct isApprox_selector<Derived, OtherDerived, true>\n{\n  EIGEN_DEVICE_FUNC\n  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)\n  {\n    return x.matrix() == y.matrix();\n  }\n};\n\ntemplate<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>\nstruct isMuchSmallerThan_object_selector\n{\n  EIGEN_DEVICE_FUNC\n  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)\n  {\n    return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();\n  }\n};\n\ntemplate<typename Derived, typename OtherDerived>\nstruct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>\n{\n  EIGEN_DEVICE_FUNC\n  static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)\n  {\n    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();\n  }\n};\n\ntemplate<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>\nstruct isMuchSmallerThan_scalar_selector\n{\n  EIGEN_DEVICE_FUNC\n  static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)\n  {\n    return x.cwiseAbs2().sum() <= numext::abs2(prec * y);\n  }\n};\n\ntemplate<typename Derived>\nstruct isMuchSmallerThan_scalar_selector<Derived, true>\n{\n  EIGEN_DEVICE_FUNC\n  static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)\n  {\n    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();\n  }\n};\n\n} // end namespace internal\n\n\n/** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n  * determined by \\a prec.\n  *\n  * \\note The fuzzy compares are done multiplicatively. Two vectors \\f$ v \\f$ and \\f$ w \\f$\n  * are considered to be approximately equal within precision \\f$ p \\f$ if\n  * \\f[ \\Vert v - w \\Vert \\leqslant p\\,\\min(\\Vert v\\Vert, \\Vert w\\Vert). \\f]\n  * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm\n  * L2 norm).\n  *\n  * \\note Because of the multiplicativeness of this comparison, one can't use this function\n  * to check whether \\c *this is approximately equal to the zero matrix or vector.\n  * Indeed, \\c isApprox(zero) returns false unless \\c *this itself is exactly the zero matrix\n  * or vector. If you want to test whether \\c *this is zero, use internal::isMuchSmallerThan(const\n  * RealScalar&, RealScalar) instead.\n  *\n  * \\sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox(\n  const DenseBase<OtherDerived>& other,\n  const RealScalar& prec\n) const\n{\n  return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);\n}\n\n/** \\returns \\c true if the norm of \\c *this is much smaller than \\a other,\n  * within the precision determined by \\a prec.\n  *\n  * \\note The fuzzy compares are done multiplicatively. A vector \\f$ v \\f$ is\n  * considered to be much smaller than \\f$ x \\f$ within precision \\f$ p \\f$ if\n  * \\f[ \\Vert v \\Vert \\leqslant p\\,\\vert x\\vert. \\f]\n  *\n  * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason,\n  * the value of the reference scalar \\a other should come from the Hilbert-Schmidt norm\n  * of a reference matrix of same dimensions.\n  *\n  * \\sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(\n  const typename NumTraits<Scalar>::Real& other,\n  const RealScalar& prec\n) const\n{\n  return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);\n}\n\n/** \\returns \\c true if the norm of \\c *this is much smaller than the norm of \\a other,\n  * within the precision determined by \\a prec.\n  *\n  * \\note The fuzzy compares are done multiplicatively. A vector \\f$ v \\f$ is\n  * considered to be much smaller than a vector \\f$ w \\f$ within precision \\f$ p \\f$ if\n  * \\f[ \\Vert v \\Vert \\leqslant p\\,\\Vert w\\Vert. \\f]\n  * For matrices, the comparison is done using the Hilbert-Schmidt norm.\n  *\n  * \\sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(\n  const DenseBase<OtherDerived>& other,\n  const RealScalar& prec\n) const\n{\n  return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_FUZZY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/GeneralProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERAL_PRODUCT_H\n#define EIGEN_GENERAL_PRODUCT_H\n\nnamespace Eigen {\n\nenum {\n  Large = 2,\n  Small = 3\n};\n\n// Define the threshold value to fallback from the generic matrix-matrix product\n// implementation (heavy) to the lightweight coeff-based product one.\n// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>\n// in products/GeneralMatrixMatrix.h for more details.\n// TODO This threshold should also be used in the compile-time selector below.\n#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD\n// This default value has been obtained on a Haswell architecture.\n#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20\n#endif\n\nnamespace internal {\n\ntemplate<int Rows, int Cols, int Depth> struct product_type_selector;\n\ntemplate<int Size, int MaxSize> struct product_size_category\n{\n  enum {\n    #ifndef EIGEN_GPU_COMPILE_PHASE\n    is_large = MaxSize == Dynamic ||\n               Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||\n               (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),\n    #else\n    is_large = 0,\n    #endif\n    value = is_large  ? Large\n          : Size == 1 ? 1\n                      : Small\n  };\n};\n\ntemplate<typename Lhs, typename Rhs> struct product_type\n{\n  typedef typename remove_all<Lhs>::type _Lhs;\n  typedef typename remove_all<Rhs>::type _Rhs;\n  enum {\n    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,\n    Rows    = traits<_Lhs>::RowsAtCompileTime,\n    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,\n    Cols    = traits<_Rhs>::ColsAtCompileTime,\n    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,\n                                           traits<_Rhs>::MaxRowsAtCompileTime),\n    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,\n                                        traits<_Rhs>::RowsAtCompileTime)\n  };\n\n  // the splitting into different lines of code here, introducing the _select enums and the typedef below,\n  // is to work around an internal compiler error with gcc 4.1 and 4.2.\nprivate:\n  enum {\n    rows_select = product_size_category<Rows,MaxRows>::value,\n    cols_select = product_size_category<Cols,MaxCols>::value,\n    depth_select = product_size_category<Depth,MaxDepth>::value\n  };\n  typedef product_type_selector<rows_select, cols_select, depth_select> selector;\n\npublic:\n  enum {\n    value = selector::ret,\n    ret = selector::ret\n  };\n#ifdef EIGEN_DEBUG_PRODUCT\n  static void debug()\n  {\n      EIGEN_DEBUG_VAR(Rows);\n      EIGEN_DEBUG_VAR(Cols);\n      EIGEN_DEBUG_VAR(Depth);\n      EIGEN_DEBUG_VAR(rows_select);\n      EIGEN_DEBUG_VAR(cols_select);\n      EIGEN_DEBUG_VAR(depth_select);\n      EIGEN_DEBUG_VAR(value);\n  }\n#endif\n};\n\n/* The following allows to select the kind of product at compile time\n * based on the three dimensions of the product.\n * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */\n// FIXME I'm not sure the current mapping is the ideal one.\ntemplate<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };\ntemplate<int M>         struct product_type_selector<M, 1, 1>            { enum { ret = LazyCoeffBasedProductMode }; };\ntemplate<int N>         struct product_type_selector<1, N, 1>            { enum { ret = LazyCoeffBasedProductMode }; };\ntemplate<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };\ntemplate<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };\ntemplate<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Small,Small,Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Small, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Small, Large, 1>    { enum { ret = LazyCoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<1,    Large,Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };\ntemplate<>              struct product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Large,1,    Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };\ntemplate<>              struct product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };\ntemplate<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };\ntemplate<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };\ntemplate<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };\ntemplate<>              struct product_type_selector<Large,Small,Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Small,Large,Small>  { enum { ret = CoeffBasedProductMode }; };\ntemplate<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };\n\n} // end namespace internal\n\n/***********************************************************************\n*  Implementation of Inner Vector Vector Product\n***********************************************************************/\n\n// FIXME : maybe the \"inner product\" could return a Scalar\n// instead of a 1x1 matrix ??\n// Pro: more natural for the user\n// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix\n// product ends up to a row-vector times col-vector product... To tackle this use\n// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);\n\n/***********************************************************************\n*  Implementation of Outer Vector Vector Product\n***********************************************************************/\n\n/***********************************************************************\n*  Implementation of General Matrix Vector Product\n***********************************************************************/\n\n/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:\n *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine\n *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine\n *   3 - all other cases are handled using a simple loop along the outer-storage direction.\n *  Therefore we need a lower level meta selector.\n *  Furthermore, if the matrix is the rhs, then the product has to be transposed.\n */\nnamespace internal {\n\ntemplate<int Side, int StorageOrder, bool BlasCompatible>\nstruct gemv_dense_selector;\n\n} // end namespace internal\n\nnamespace internal {\n\ntemplate<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;\n\ntemplate<typename Scalar,int Size,int MaxSize>\nstruct gemv_static_vector_if<Scalar,Size,MaxSize,false>\n{\n  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { eigen_internal_assert(false && \"should never be called\"); return 0; }\n};\n\ntemplate<typename Scalar,int Size>\nstruct gemv_static_vector_if<Scalar,Size,Dynamic,true>\n{\n  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; }\n};\n\ntemplate<typename Scalar,int Size,int MaxSize>\nstruct gemv_static_vector_if<Scalar,Size,MaxSize,true>\n{\n  enum {\n    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,\n    PacketSize      = internal::packet_traits<Scalar>::size\n  };\n  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0\n  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;\n  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }\n  #else\n  // Some architectures cannot align on the stack,\n  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.\n  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;\n  EIGEN_STRONG_INLINE Scalar* data() {\n    return ForceAlignment\n            ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)\n            : m_data.array;\n  }\n  #endif\n};\n\n// The vector is on the left => transposition\ntemplate<int StorageOrder, bool BlasCompatible>\nstruct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    Transpose<Dest> destT(dest);\n    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };\n    gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>\n      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);\n  }\n};\n\ntemplate<> struct gemv_dense_selector<OnTheRight,ColMajor,true>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    typedef typename Lhs::Scalar   LhsScalar;\n    typedef typename Rhs::Scalar   RhsScalar;\n    typedef typename Dest::Scalar  ResScalar;\n    typedef typename Dest::RealScalar  RealScalar;\n    \n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n  \n    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;\n\n    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);\n    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);\n\n    ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);\n\n    // make sure Dest is a compile-time vector type (bug 1166)\n    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;\n\n    enum {\n      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1\n      // on, the other hand it is good for the cache to pack the vector anyways...\n      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),\n      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),\n      MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime!=0)\n    };\n\n    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;\n    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);\n\n    if(!MightCannotUseDest)\n    {\n      // shortcut if we are sure to be able to use dest directly,\n      // this ease the compiler to generate cleaner and more optimzized code for most common cases\n      general_matrix_vector_product\n          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(\n          actualLhs.rows(), actualLhs.cols(),\n          LhsMapper(actualLhs.data(), actualLhs.outerStride()),\n          RhsMapper(actualRhs.data(), actualRhs.innerStride()),\n          dest.data(), 1,\n          compatibleAlpha);\n    }\n    else\n    {\n      gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;\n\n      const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));\n      const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;\n\n      ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),\n                                                    evalToDest ? dest.data() : static_dest.data());\n\n      if(!evalToDest)\n      {\n        #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n        Index size = dest.size();\n        EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n        #endif\n        if(!alphaIsCompatible)\n        {\n          MappedDest(actualDestPtr, dest.size()).setZero();\n          compatibleAlpha = RhsScalar(1);\n        }\n        else\n          MappedDest(actualDestPtr, dest.size()) = dest;\n      }\n\n      general_matrix_vector_product\n          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(\n          actualLhs.rows(), actualLhs.cols(),\n          LhsMapper(actualLhs.data(), actualLhs.outerStride()),\n          RhsMapper(actualRhs.data(), actualRhs.innerStride()),\n          actualDestPtr, 1,\n          compatibleAlpha);\n\n      if (!evalToDest)\n      {\n        if(!alphaIsCompatible)\n          dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());\n        else\n          dest = MappedDest(actualDestPtr, dest.size());\n      }\n    }\n  }\n};\n\ntemplate<> struct gemv_dense_selector<OnTheRight,RowMajor,true>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    typedef typename Lhs::Scalar   LhsScalar;\n    typedef typename Rhs::Scalar   RhsScalar;\n    typedef typename Dest::Scalar  ResScalar;\n    \n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;\n\n    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);\n    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);\n\n    ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);\n\n    enum {\n      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1\n      // on, the other hand it is good for the cache to pack the vector anyways...\n      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime==0\n    };\n\n    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;\n\n    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),\n        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());\n\n    if(!DirectlyUseRhs)\n    {\n      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      Index size = actualRhs.size();\n      EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      #endif\n      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;\n    }\n\n    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;\n    general_matrix_vector_product\n        <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(\n        actualLhs.rows(), actualLhs.cols(),\n        LhsMapper(actualLhs.data(), actualLhs.outerStride()),\n        RhsMapper(actualRhsPtr, 1),\n        dest.data(), dest.col(0).innerStride(), //NOTE  if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)\n        actualAlpha);\n  }\n};\n\ntemplate<> struct gemv_dense_selector<OnTheRight,ColMajor,false>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);\n    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp\n    typename nested_eval<Rhs,1>::type actual_rhs(rhs);\n    const Index size = rhs.rows();\n    for(Index k=0; k<size; ++k)\n      dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);\n  }\n};\n\ntemplate<> struct gemv_dense_selector<OnTheRight,RowMajor,false>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);\n    typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);\n    const Index rows = dest.rows();\n    for(Index i=0; i<rows; ++i)\n      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();\n  }\n};\n\n} // end namespace internal\n\n/***************************************************************************\n* Implementation of matrix base methods\n***************************************************************************/\n\n/** \\returns the matrix product of \\c *this and \\a other.\n  *\n  * \\note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().\n  *\n  * \\sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst Product<Derived, OtherDerived>\nMatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const\n{\n  // A note regarding the function declaration: In MSVC, this function will sometimes\n  // not be inlined since DenseStorage is an unwindable object for dynamic\n  // matrices and product types are holding a member to store the result.\n  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.\n  enum {\n    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic\n                   || OtherDerived::RowsAtCompileTime==Dynamic\n                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),\n    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,\n    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)\n  };\n  // note to the lost user:\n  //    * for a dot product use: v1.dot(v2)\n  //    * for a coeff-wise product use: v1.cwiseProduct(v2)\n  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),\n    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)\n  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),\n    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)\n  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)\n#ifdef EIGEN_DEBUG_PRODUCT\n  internal::product_type<Derived,OtherDerived>::debug();\n#endif\n\n  return Product<Derived, OtherDerived>(derived(), other.derived());\n}\n\n/** \\returns an expression of the matrix product of \\c *this and \\a other without implicit evaluation.\n  *\n  * The returned product will behave like any other expressions: the coefficients of the product will be\n  * computed once at a time as requested. This might be useful in some extremely rare cases when only\n  * a small and no coherent fraction of the result's coefficients have to be computed.\n  *\n  * \\warning This version of the matrix product can be much much slower. So use it only if you know\n  * what you are doing and that you measured a true speed improvement.\n  *\n  * \\sa operator*(const MatrixBase&)\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst Product<Derived,OtherDerived,LazyProduct>\nMatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const\n{\n  enum {\n    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic\n                   || OtherDerived::RowsAtCompileTime==Dynamic\n                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),\n    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,\n    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)\n  };\n  // note to the lost user:\n  //    * for a dot product use: v1.dot(v2)\n  //    * for a coeff-wise product use: v1.cwiseProduct(v2)\n  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),\n    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)\n  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),\n    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)\n  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)\n\n  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_PRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/GenericPacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERIC_PACKET_MATH_H\n#define EIGEN_GENERIC_PACKET_MATH_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** \\internal\n  * \\file GenericPacketMath.h\n  *\n  * Default implementation for types not supported by the vectorization.\n  * In practice these functions are provided to make easier the writing\n  * of generic vectorized code.\n  */\n\n#ifndef EIGEN_DEBUG_ALIGNED_LOAD\n#define EIGEN_DEBUG_ALIGNED_LOAD\n#endif\n\n#ifndef EIGEN_DEBUG_UNALIGNED_LOAD\n#define EIGEN_DEBUG_UNALIGNED_LOAD\n#endif\n\n#ifndef EIGEN_DEBUG_ALIGNED_STORE\n#define EIGEN_DEBUG_ALIGNED_STORE\n#endif\n\n#ifndef EIGEN_DEBUG_UNALIGNED_STORE\n#define EIGEN_DEBUG_UNALIGNED_STORE\n#endif\n\nstruct default_packet_traits\n{\n  enum {\n    HasHalfPacket = 0,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 0,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 1,\n    HasBlend     = 0,\n    // This flag is used to indicate whether packet comparison is supported.\n    // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.\n    HasCmp       = 0,\n\n    HasDiv    = 0,\n    HasSqrt   = 0,\n    HasRsqrt  = 0,\n    HasExp    = 0,\n    HasExpm1  = 0,\n    HasLog    = 0,\n    HasLog1p  = 0,\n    HasLog10  = 0,\n    HasPow    = 0,\n\n    HasSin    = 0,\n    HasCos    = 0,\n    HasTan    = 0,\n    HasASin   = 0,\n    HasACos   = 0,\n    HasATan   = 0,\n    HasSinh   = 0,\n    HasCosh   = 0,\n    HasTanh   = 0,\n    HasLGamma = 0,\n    HasDiGamma = 0,\n    HasZeta = 0,\n    HasPolygamma = 0,\n    HasErf = 0,\n    HasErfc = 0,\n    HasNdtri = 0,\n    HasBessel = 0,\n    HasIGamma = 0,\n    HasIGammaDerA = 0,\n    HasGammaSampleDerAlpha = 0,\n    HasIGammac = 0,\n    HasBetaInc = 0,\n\n    HasRound  = 0,\n    HasRint   = 0,\n    HasFloor  = 0,\n    HasCeil   = 0,\n    HasSign   = 0\n  };\n};\n\ntemplate<typename T> struct packet_traits : default_packet_traits\n{\n  typedef T type;\n  typedef T half;\n  enum {\n    Vectorizable = 0,\n    size = 1,\n    AlignedOnScalar = 0,\n    HasHalfPacket = 0\n  };\n  enum {\n    HasAdd    = 0,\n    HasSub    = 0,\n    HasMul    = 0,\n    HasNegate = 0,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasConj   = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<typename T> struct packet_traits<const T> : packet_traits<T> { };\n\ntemplate<typename T> struct unpacket_traits\n{\n  typedef T type;\n  typedef T half;\n  enum\n  {\n    size = 1,\n    alignment = 1,\n    vectorizable = false,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };\n\ntemplate <typename Src, typename Tgt> struct type_casting_traits {\n  enum {\n    VectorizedCast = 0,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\n/** \\internal Wrapper to ensure that multiple packet types can map to the same\n    same underlying vector type. */\ntemplate<typename T, int unique_id = 0>\nstruct eigen_packet_wrapper\n{\n  EIGEN_ALWAYS_INLINE operator T&() { return m_val; }\n  EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }\n  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}\n  EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}\n  EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {\n    m_val = v;\n    return *this;\n  }\n\n  T m_val;\n};\n\n\n/** \\internal A convenience utility for determining if the type is a scalar.\n * This is used to enable some generic packet implementations.\n */\ntemplate<typename Packet>\nstruct is_scalar {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  enum {\n    value = internal::is_same<Packet, Scalar>::value\n  };\n};\n\n/** \\internal \\returns static_cast<TgtType>(a) (coeff-wise) */\ntemplate <typename SrcPacket, typename TgtPacket>\nEIGEN_DEVICE_FUNC inline TgtPacket\npcast(const SrcPacket& a) {\n  return static_cast<TgtPacket>(a);\n}\ntemplate <typename SrcPacket, typename TgtPacket>\nEIGEN_DEVICE_FUNC inline TgtPacket\npcast(const SrcPacket& a, const SrcPacket& /*b*/) {\n  return static_cast<TgtPacket>(a);\n}\ntemplate <typename SrcPacket, typename TgtPacket>\nEIGEN_DEVICE_FUNC inline TgtPacket\npcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {\n  return static_cast<TgtPacket>(a);\n}\ntemplate <typename SrcPacket, typename TgtPacket>\nEIGEN_DEVICE_FUNC inline TgtPacket\npcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,\n      const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {\n  return static_cast<TgtPacket>(a);\n}\n\n/** \\internal \\returns reinterpret_cast<Target>(a) */\ntemplate <typename Target, typename Packet>\nEIGEN_DEVICE_FUNC inline Target\npreinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */\n\n/** \\internal \\returns a + b (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npadd(const Packet& a, const Packet& b) { return a+b; }\n// Avoid compiler warning for boolean algebra.\ntemplate<> EIGEN_DEVICE_FUNC inline bool\npadd(const bool& a, const bool& b) { return a || b; }\n\n/** \\internal \\returns a - b (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npsub(const Packet& a, const Packet& b) { return a-b; }\n\n/** \\internal \\returns -a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npnegate(const Packet& a) { return -a; }\n\ntemplate<> EIGEN_DEVICE_FUNC inline bool\npnegate(const bool& a) { return !a; }\n\n/** \\internal \\returns conj(a) (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npconj(const Packet& a) { return numext::conj(a); }\n\n/** \\internal \\returns a * b (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npmul(const Packet& a, const Packet& b) { return a*b; }\n// Avoid compiler warning for boolean algebra.\ntemplate<> EIGEN_DEVICE_FUNC inline bool\npmul(const bool& a, const bool& b) { return a && b; }\n\n/** \\internal \\returns a / b (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npdiv(const Packet& a, const Packet& b) { return a/b; }\n\n// In the generic case, memset to all one bits.\ntemplate<typename Packet, typename EnableIf = void>\nstruct ptrue_impl {\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){\n    Packet b;\n    memset(static_cast<void*>(&b), 0xff, sizeof(Packet));\n    return b;\n  }\n};\n\n// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).\n// Although this is technically not a valid bitmask, the scalar path for pselect\n// uses a comparison to zero, so this should still work in most cases. We don't\n// have another option, since the scalar type requires initialization.\ntemplate<typename T>\nstruct ptrue_impl<T, \n    typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {\n  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){\n    return T(1);\n  }\n};\n\n/** \\internal \\returns one bits. */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\nptrue(const Packet& a) {\n  return ptrue_impl<Packet>::run(a);\n}\n\n// In the general case, memset to zero.\ntemplate<typename Packet, typename EnableIf = void>\nstruct pzero_impl {\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {\n    Packet b;\n    memset(static_cast<void*>(&b), 0x00, sizeof(Packet));\n    return b;\n  }\n};\n\n// For scalars, explicitly set to Scalar(0), since the underlying representation\n// for zero may not consist of all-zero bits.\ntemplate<typename T>\nstruct pzero_impl<T,\n    typename internal::enable_if<is_scalar<T>::value>::type> {\n  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {\n    return T(0);\n  }\n};\n\n/** \\internal \\returns packet of zeros */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npzero(const Packet& a) {\n  return pzero_impl<Packet>::run(a);\n}\n\n/** \\internal \\returns a <= b as a bit mask */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npcmp_le(const Packet& a, const Packet& b)  { return a<=b ? ptrue(a) : pzero(a); }\n\n/** \\internal \\returns a < b as a bit mask */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npcmp_lt(const Packet& a, const Packet& b)  { return a<b ? ptrue(a) : pzero(a); }\n\n/** \\internal \\returns a == b as a bit mask */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }\n\n/** \\internal \\returns a < b or a==NaN or b==NaN as a bit mask */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }\n\ntemplate<typename T>\nstruct bit_and {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {\n    return a & b;\n  }\n};\n\ntemplate<typename T>\nstruct bit_or {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {\n    return a | b;\n  }\n};\n\ntemplate<typename T>\nstruct bit_xor {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {\n    return a ^ b;\n  }\n};\n\ntemplate<typename T>\nstruct bit_not {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {\n    return ~a;\n  }\n};\n\n// Use operators &, |, ^, ~.\ntemplate<typename T>\nstruct operator_bitwise_helper {\n  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }\n  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }\n  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }\n  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }\n};\n\n// Apply binary operations byte-by-byte\ntemplate<typename T>\nstruct bytewise_bitwise_helper {\n  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {\n    return binary(a, b, bit_and<unsigned char>());\n  }\n  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { \n    return binary(a, b, bit_or<unsigned char>());\n   }\n  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {\n    return binary(a, b, bit_xor<unsigned char>());\n  }\n  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { \n    return unary(a,bit_not<unsigned char>());\n   }\n  \n private:\n  template<typename Op>\n  EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {\n    const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);\n    T c;\n    unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);\n    for (size_t i = 0; i < sizeof(T); ++i) {\n      *c_ptr++ = op(*a_ptr++);\n    }\n    return c;\n  }\n\n  template<typename Op>\n  EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {\n    const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);\n    const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);\n    T c;\n    unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);\n    for (size_t i = 0; i < sizeof(T); ++i) {\n      *c_ptr++ = op(*a_ptr++, *b_ptr++);\n    }\n    return c;\n  }\n};\n\n// In the general case, use byte-by-byte manipulation.\ntemplate<typename T, typename EnableIf = void>\nstruct bitwise_helper : public bytewise_bitwise_helper<T> {};\n\n// For integers or non-trivial scalars, use binary operators.\ntemplate<typename T>\nstruct bitwise_helper<T,\n  typename internal::enable_if<\n    is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type\n  > : public operator_bitwise_helper<T> {};\n\n/** \\internal \\returns the bitwise and of \\a a and \\a b */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npand(const Packet& a, const Packet& b) {\n  return bitwise_helper<Packet>::bitwise_and(a, b);\n}\n\n/** \\internal \\returns the bitwise or of \\a a and \\a b */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npor(const Packet& a, const Packet& b) {\n  return bitwise_helper<Packet>::bitwise_or(a, b);\n}\n\n/** \\internal \\returns the bitwise xor of \\a a and \\a b */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npxor(const Packet& a, const Packet& b) {\n  return bitwise_helper<Packet>::bitwise_xor(a, b);\n}\n\n/** \\internal \\returns the bitwise not of \\a a */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npnot(const Packet& a) {\n  return bitwise_helper<Packet>::bitwise_not(a);\n}\n\n/** \\internal \\returns the bitwise and of \\a a and not \\a b */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }\n\n// In the general case, use bitwise select.\ntemplate<typename Packet, typename EnableIf = void>\nstruct pselect_impl {\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {\n    return por(pand(a,mask),pandnot(b,mask));\n  }\n};\n\n// For scalars, use ternary select.\ntemplate<typename Packet>\nstruct pselect_impl<Packet, \n    typename internal::enable_if<is_scalar<Packet>::value>::type > {\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {\n    return numext::equal_strict(mask, Packet(0)) ? b : a;\n  }\n};\n\n/** \\internal \\returns \\a or \\b for each field in packet according to \\mask */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npselect(const Packet& mask, const Packet& a, const Packet& b) {\n  return pselect_impl<Packet>::run(mask, a, b);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(\n    const bool& cond, const bool& a, const bool& b) {\n  return cond ? a : b;\n}\n\n/** \\internal \\returns the min or of \\a a and \\a b (coeff-wise)\n    If either \\a a or \\a b are NaN, the result is implementation defined. */\ntemplate<int NaNPropagation>\nstruct pminmax_impl {\n  template <typename Packet, typename Op>\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {\n    return op(a,b);\n  }\n};\n\n/** \\internal \\returns the min or max of \\a a and \\a b (coeff-wise)\n    If either \\a a or \\a b are NaN, NaN is returned. */\ntemplate<>\nstruct pminmax_impl<PropagateNaN> {\n  template <typename Packet, typename Op>\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {\n  Packet not_nan_mask_a = pcmp_eq(a, a);\n  Packet not_nan_mask_b = pcmp_eq(b, b);\n  return pselect(not_nan_mask_a,\n                 pselect(not_nan_mask_b, op(a, b), b),\n                 a);\n  }\n};\n\n/** \\internal \\returns the min or max of \\a a and \\a b (coeff-wise)\n    If both \\a a and \\a b are NaN, NaN is returned.\n    Equivalent to std::fmin(a, b).  */\ntemplate<>\nstruct pminmax_impl<PropagateNumbers> {\n  template <typename Packet, typename Op>\n  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {\n  Packet not_nan_mask_a = pcmp_eq(a, a);\n  Packet not_nan_mask_b = pcmp_eq(b, b);\n  return pselect(not_nan_mask_a,\n                 pselect(not_nan_mask_b, op(a, b), a),\n                 b);\n  }\n};\n\n\n#ifndef SYCL_DEVICE_ONLY\n#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func\n#else\n#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \\\n[](const Type& a, const Type& b) { \\\n        return Func(a, b);}\n#endif\n\n/** \\internal \\returns the min of \\a a and \\a b  (coeff-wise).\n    If \\a a or \\b b is NaN, the return value is implementation defined. */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }\n\n/** \\internal \\returns the min of \\a a and \\a b  (coeff-wise).\n    NaNPropagation determines the NaN propagation semantics. */\ntemplate <int NaNPropagation, typename Packet>\nEIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {\n  return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));\n}\n\n/** \\internal \\returns the max of \\a a and \\a b  (coeff-wise)\n    If \\a a or \\b b is NaN, the return value is implementation defined. */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }\n\n/** \\internal \\returns the max of \\a a and \\a b  (coeff-wise).\n    NaNPropagation determines the NaN propagation semantics. */\ntemplate <int NaNPropagation, typename Packet>\nEIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {\n  return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));\n}\n\n/** \\internal \\returns the absolute value of \\a a */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npabs(const Packet& a) { return numext::abs(a); }\ntemplate<> EIGEN_DEVICE_FUNC inline unsigned int\npabs(const unsigned int& a) { return a; }\ntemplate<> EIGEN_DEVICE_FUNC inline unsigned long\npabs(const unsigned long& a) { return a; }\ntemplate<> EIGEN_DEVICE_FUNC inline unsigned long long\npabs(const unsigned long long& a) { return a; }\n\n/** \\internal \\returns the addsub value of \\a a,b */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npaddsub(const Packet& a, const Packet& b) {\n  return pselect(peven_mask(a), padd(a, b), psub(a, b));\n }\n\n/** \\internal \\returns the phase angle of \\a a */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\nparg(const Packet& a) { using numext::arg; return arg(a); }\n\n\n/** \\internal \\returns \\a a logically shifted by N bits to the right */\ntemplate<int N> EIGEN_DEVICE_FUNC inline int\nparithmetic_shift_right(const int& a) { return a >> N; }\ntemplate<int N> EIGEN_DEVICE_FUNC inline long int\nparithmetic_shift_right(const long int& a) { return a >> N; }\n\n/** \\internal \\returns \\a a arithmetically shifted by N bits to the right */\ntemplate<int N> EIGEN_DEVICE_FUNC inline int\nplogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }\ntemplate<int N> EIGEN_DEVICE_FUNC inline long int\nplogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }\n\n/** \\internal \\returns \\a a shifted by N bits to the left */\ntemplate<int N> EIGEN_DEVICE_FUNC inline int\nplogical_shift_left(const int& a) { return a << N; }\ntemplate<int N> EIGEN_DEVICE_FUNC inline long int\nplogical_shift_left(const long int& a) { return a << N; }\n\n/** \\internal \\returns the significant and exponent of the underlying floating point numbers\n  * See https://en.cppreference.com/w/cpp/numeric/math/frexp\n  */\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {\n  int exp;\n  EIGEN_USING_STD(frexp);\n  Packet result = static_cast<Packet>(frexp(a, &exp));\n  exponent = static_cast<Packet>(exp);\n  return result;\n}\n\n/** \\internal \\returns a * 2^((int)exponent)\n  * See https://en.cppreference.com/w/cpp/numeric/math/ldexp\n  */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npldexp(const Packet &a, const Packet &exponent) {\n  EIGEN_USING_STD(ldexp)\n  return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));\n}\n\n/** \\internal \\returns the min of \\a a and \\a b  (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }\n\n/** \\internal \\returns a packet version of \\a *from, from must be 16 bytes aligned */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npload(const typename unpacket_traits<Packet>::type* from) { return *from; }\n\n/** \\internal \\returns a packet version of \\a *from, (un-aligned load) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\nploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }\n\n/** \\internal \\returns a packet version of \\a *from, (un-aligned masked load)\n * There is no generic implementation. We only have implementations for specialized\n * cases. Generic case should not be called.\n */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline\ntypename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type\nploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);\n\n/** \\internal \\returns a packet with constant coefficients \\a a, e.g.: (a,a,a,a) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npset1(const typename unpacket_traits<Packet>::type& a) { return a; }\n\n/** \\internal \\returns a packet with constant coefficients set from bits */\ntemplate<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet\npset1frombits(BitsType a);\n\n/** \\internal \\returns a packet with constant coefficients \\a a[0], e.g.: (a[0],a[0],a[0],a[0]) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }\n\n/** \\internal \\returns a packet with elements of \\a *from duplicated.\n  * For instance, for a packet of 8 elements, 4 scalars will be read from \\a *from and\n  * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}\n  * Currently, this function is only used for scalar * complex products.\n  */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet\nploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }\n\n/** \\internal \\returns a packet with elements of \\a *from quadrupled.\n  * For instance, for a packet of 8 elements, 2 scalars will be read from \\a *from and\n  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}\n  * Currently, this function is only used in matrix products.\n  * For packet-size smaller or equal to 4, this function is equivalent to pload1\n  */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\nploadquad(const typename unpacket_traits<Packet>::type* from)\n{ return pload1<Packet>(from); }\n\n/** \\internal equivalent to\n  * \\code\n  * a0 = pload1(a+0);\n  * a1 = pload1(a+1);\n  * a2 = pload1(a+2);\n  * a3 = pload1(a+3);\n  * \\endcode\n  * \\sa pset1, pload1, ploaddup, pbroadcast2\n  */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC\ninline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,\n                        Packet& a0, Packet& a1, Packet& a2, Packet& a3)\n{\n  a0 = pload1<Packet>(a+0);\n  a1 = pload1<Packet>(a+1);\n  a2 = pload1<Packet>(a+2);\n  a3 = pload1<Packet>(a+3);\n}\n\n/** \\internal equivalent to\n  * \\code\n  * a0 = pload1(a+0);\n  * a1 = pload1(a+1);\n  * \\endcode\n  * \\sa pset1, pload1, ploaddup, pbroadcast4\n  */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC\ninline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,\n                        Packet& a0, Packet& a1)\n{\n  a0 = pload1<Packet>(a+0);\n  a1 = pload1<Packet>(a+1);\n}\n\n/** \\internal \\brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet\nplset(const typename unpacket_traits<Packet>::type& a) { return a; }\n\n/** \\internal \\returns a packet with constant coefficients \\a a, e.g.: (x, 0, x, 0),\n     where x is the value of all 1-bits. */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npeven_mask(const Packet& /*a*/) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  const size_t n = unpacket_traits<Packet>::size;\n  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];\n  for(size_t i = 0; i < n; ++i) {\n    memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));\n  }\n  return ploadu<Packet>(elements);\n}\n\n\n/** \\internal copy the packet \\a from to \\a *to, \\a to must be 16 bytes aligned */\ntemplate<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)\n{ (*to) = from; }\n\n/** \\internal copy the packet \\a from to \\a *to, (un-aligned store) */\ntemplate<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)\n{  (*to) = from; }\n\n/** \\internal copy the packet \\a from to \\a *to, (un-aligned store with a mask)\n * There is no generic implementation. We only have implementations for specialized\n * cases. Generic case should not be called.\n */\ntemplate<typename Scalar, typename Packet>\nEIGEN_DEVICE_FUNC inline\ntypename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type\npstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);\n\n template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)\n { return ploadu<Packet>(from); }\n\n template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)\n { pstore(to, from); }\n\n/** \\internal tries to do cache prefetching of \\a addr */\ntemplate<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)\n{\n#if defined(EIGEN_HIP_DEVICE_COMPILE)\n  // do nothing\n#elif defined(EIGEN_CUDA_ARCH)\n#if defined(__LP64__) || EIGEN_OS_WIN64\n  // 64-bit pointer operand constraint for inlined asm\n  asm(\" prefetch.L1 [ %1 ];\" : \"=l\"(addr) : \"l\"(addr));\n#else\n  // 32-bit pointer operand constraint for inlined asm\n  asm(\" prefetch.L1 [ %1 ];\" : \"=r\"(addr) : \"r\"(addr));\n#endif\n#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)\n  __builtin_prefetch(addr);\n#endif\n}\n\n/** \\internal \\returns the reversed elements of \\a a*/\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)\n{ return a; }\n\n/** \\internal \\returns \\a a with real and imaginary part flipped (for complex type only) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)\n{\n  return Packet(numext::imag(a),numext::real(a));\n}\n\n/**************************\n* Special math functions\n***************************/\n\n/** \\internal \\returns the sine of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }\n\n/** \\internal \\returns the cosine of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }\n\n/** \\internal \\returns the tan of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }\n\n/** \\internal \\returns the arc sine of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }\n\n/** \\internal \\returns the arc cosine of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }\n\n/** \\internal \\returns the arc tangent of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }\n\n/** \\internal \\returns the hyperbolic sine of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }\n\n/** \\internal \\returns the hyperbolic cosine of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }\n\n/** \\internal \\returns the hyperbolic tan of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }\n\n/** \\internal \\returns the exp of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }\n\n/** \\internal \\returns the expm1 of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pexpm1(const Packet& a) { return numext::expm1(a); }\n\n/** \\internal \\returns the log of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }\n\n/** \\internal \\returns the log1p of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket plog1p(const Packet& a) { return numext::log1p(a); }\n\n/** \\internal \\returns the log10 of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }\n\n/** \\internal \\returns the log10 of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket plog2(const Packet& a) {\n  typedef typename internal::unpacket_traits<Packet>::type Scalar;\n  return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a)); \n}\n\n/** \\internal \\returns the square-root of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket psqrt(const Packet& a) { return numext::sqrt(a); }\n\n/** \\internal \\returns the reciprocal square-root of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket prsqrt(const Packet& a) {\n  typedef typename internal::unpacket_traits<Packet>::type Scalar;\n  return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));\n}\n\n/** \\internal \\returns the rounded value of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pround(const Packet& a) { using numext::round; return round(a); }\n\n/** \\internal \\returns the floor of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pfloor(const Packet& a) { using numext::floor; return floor(a); }\n\n/** \\internal \\returns the rounded value of \\a a (coeff-wise) with current\n * rounding mode */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket print(const Packet& a) { using numext::rint; return rint(a); }\n\n/** \\internal \\returns the ceil of \\a a (coeff-wise) */\ntemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nPacket pceil(const Packet& a) { using numext::ceil; return ceil(a); }\n\n/** \\internal \\returns the first element of a packet */\ntemplate<typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type\npfirst(const Packet& a)\n{ return a; }\n\n/** \\internal \\returns the sum of the elements of upper and lower half of \\a a if \\a a is larger than 4.\n  * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}\n  * For packet-size smaller or equal to 4, this boils down to a noop.\n  */\ntemplate<typename Packet>\nEIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type\npredux_half_dowto4(const Packet& a)\n{ return a; }\n\n// Slow generic implementation of Packet reduction.\ntemplate <typename Packet, typename Op>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type\npredux_helper(const Packet& a, Op op) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  const size_t n = unpacket_traits<Packet>::size;\n  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];\n  pstoreu<Scalar>(elements, a);\n  for(size_t k = n / 2; k > 0; k /= 2)  {\n    for(size_t i = 0; i < k; ++i) {\n      elements[i] = op(elements[i], elements[i + k]);\n    }\n  }\n  return elements[0];\n}\n\n/** \\internal \\returns the sum of the elements of \\a a*/\ntemplate<typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type\npredux(const Packet& a)\n{\n  return a;\n}\n\n/** \\internal \\returns the product of the elements of \\a a */\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(\n    const Packet& a) {\n  typedef typename unpacket_traits<Packet>::type Scalar; \n  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));\n}\n\n/** \\internal \\returns the min of the elements of \\a a */\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(\n    const Packet &a) {\n  typedef typename unpacket_traits<Packet>::type Scalar; \n  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));\n}\n\ntemplate <int NaNPropagation, typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(\n    const Packet& a) {\n  typedef typename unpacket_traits<Packet>::type Scalar; \n  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));\n}\n\n/** \\internal \\returns the min of the elements of \\a a */\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(\n    const Packet &a) {\n  typedef typename unpacket_traits<Packet>::type Scalar; \n  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));\n}\n\ntemplate <int NaNPropagation, typename Packet>\nEIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(\n    const Packet& a) {\n  typedef typename unpacket_traits<Packet>::type Scalar; \n  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));\n}\n\n#undef EIGEN_BINARY_OP_NAN_PROPAGATION\n\n/** \\internal \\returns true if all coeffs of \\a a means \"true\"\n  * It is supposed to be called on values returned by pcmp_*.\n  */\n// not needed yet\n// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)\n// { return bool(a); }\n\n/** \\internal \\returns true if any coeffs of \\a a means \"true\"\n  * It is supposed to be called on values returned by pcmp_*.\n  */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)\n{\n  // Dirty but generic implementation where \"true\" is assumed to be non 0 and all the sames.\n  // It is expected that \"true\" is either:\n  //  - Scalar(1)\n  //  - bits full of ones (NaN for floats),\n  //  - or first bit equals to 1 (1 for ints, smallest denormal for floats).\n  // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  return numext::not_equal_strict(predux(a), Scalar(0));\n}\n\n/***************************************************************************\n* The following functions might not have to be overwritten for vectorized types\n***************************************************************************/\n\n/** \\internal copy a packet with constant coefficient \\a a (e.g., [a,a,a,a]) to \\a *to. \\a to must be 16 bytes aligned */\n// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)\ntemplate<typename Packet>\ninline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)\n{\n  pstore(to, pset1<Packet>(a));\n}\n\n/** \\internal \\returns a * b + c (coeff-wise) */\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npmadd(const Packet&  a,\n         const Packet&  b,\n         const Packet&  c)\n{ return padd(pmul(a, b),c); }\n\n/** \\internal \\returns a packet version of \\a *from.\n  * The pointer \\a from must be aligned on a \\a Alignment bytes boundary. */\ntemplate<typename Packet, int Alignment>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)\n{\n  if(Alignment >= unpacket_traits<Packet>::alignment)\n    return pload<Packet>(from);\n  else\n    return ploadu<Packet>(from);\n}\n\n/** \\internal copy the packet \\a from to \\a *to.\n  * The pointer \\a from must be aligned on a \\a Alignment bytes boundary. */\ntemplate<typename Scalar, typename Packet, int Alignment>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)\n{\n  if(Alignment >= unpacket_traits<Packet>::alignment)\n    pstore(to, from);\n  else\n    pstoreu(to, from);\n}\n\n/** \\internal \\returns a packet version of \\a *from.\n  * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the\n  * hardware if available to speedup the loading of data that won't be modified\n  * by the current computation.\n  */\ntemplate<typename Packet, int LoadMode>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)\n{\n  return ploadt<Packet, LoadMode>(from);\n}\n\n/***************************************************************************\n* Fast complex products (GCC generates a function call which is very slow)\n***************************************************************************/\n\n// Eigen+CUDA does not support complexes.\n#if !defined(EIGEN_GPUCC)\n\ntemplate<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)\n{ return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }\n\ntemplate<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)\n{ return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }\n\n#endif\n\n\n/***************************************************************************\n * PacketBlock, that is a collection of N packets where the number of words\n * in the packet is a multiple of N.\n***************************************************************************/\ntemplate <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {\n  Packet packet[N];\n};\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet,1>& /*kernel*/) {\n  // Nothing to do in the scalar case, i.e. a 1x1 matrix.\n}\n\n/***************************************************************************\n * Selector, i.e. vector of N boolean values used to select (i.e. blend)\n * words from 2 packets.\n***************************************************************************/\ntemplate <size_t N> struct Selector {\n  bool select[N];\n};\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet\npblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {\n  return ifPacket.select[0] ? thenPacket : elsePacket;\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERIC_PACKET_MATH_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/GlobalFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GLOBAL_FUNCTIONS_H\n#define EIGEN_GLOBAL_FUNCTIONS_H\n\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n\n#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \\\n  /** \\returns an expression of the coefficient-wise DOC_OP of \\a x\n\n    DOC_DETAILS\n\n    \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_##NAME\">Math functions</a>, class CwiseUnaryOp\n    */ \\\n  template<typename Derived> \\\n  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \\\n  NAME(const Eigen::ArrayBase<Derived>& x);\n\n#else\n\n#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \\\n  template<typename Derived> \\\n  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \\\n  (NAME)(const Eigen::ArrayBase<Derived>& x) { \\\n    return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \\\n  }\n\n#endif // EIGEN_PARSED_BY_DOXYGEN\n\n#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \\\n  \\\n  template<typename Derived> \\\n  struct NAME##_retval<ArrayBase<Derived> > \\\n  { \\\n    typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \\\n  }; \\\n  template<typename Derived> \\\n  struct NAME##_impl<ArrayBase<Derived> > \\\n  { \\\n    static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \\\n    { \\\n      return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \\\n    } \\\n  };\n\nnamespace Eigen\n{\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\\sa ArrayBase::real)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\\sa ArrayBase::imag)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\\sa ArrayBase::conjugate)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\\sa ArrayBase::inverse)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\\sa ArrayBase::sin)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\\sa ArrayBase::cos)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\\sa ArrayBase::tan)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\\sa ArrayBase::atan)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\\sa ArrayBase::asin)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\\sa ArrayBase::acos)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\\sa ArrayBase::sinh)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\\sa ArrayBase::cosh)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\\sa ArrayBase::tanh)\n#if EIGEN_HAS_CXX11_MATH\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asinh,scalar_asinh_op,inverse hyperbolic sine,\\sa ArrayBase::asinh)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acosh,scalar_acosh_op,inverse hyperbolic cosine,\\sa ArrayBase::acosh)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atanh,scalar_atanh_op,inverse hyperbolic tangent,\\sa ArrayBase::atanh)\n#endif\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\\sa ArrayBase::logistic)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\\sa ArrayBase::lgamma)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\\sa ArrayBase::digamma)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\\sa ArrayBase::erf)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\\sa ArrayBase::erfc)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\\sa ArrayBase::ndtri)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\\sa ArrayBase::exp)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\\sa ArrayBase::expm1)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\\sa Eigen::log10 DOXCOMMA ArrayBase::log)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\\sa ArrayBase::log1p)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\\sa Eigen::log DOXCOMMA ArrayBase::log10)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log2,scalar_log2_op,base 2 logarithm,\\sa Eigen::log DOXCOMMA ArrayBase::log2)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\\sa ArrayBase::arg DOXCOMMA MatrixBase::cwiseArg)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\\sa ArrayBase::rsqrt)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\\sa Eigen::pow DOXCOMMA ArrayBase::cube)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint,scalar_rint_op,nearest integer,\\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\\sa Eigen::ceil DOXCOMMA ArrayBase::floor)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\\sa Eigen::floor DOXCOMMA ArrayBase::ceil)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)\n  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\\sa ArrayBase::sign)\n\n  /** \\returns an expression of the coefficient-wise power of \\a x to the given constant \\a exponent.\n    *\n    * \\tparam ScalarExponent is the scalar type of \\a exponent. It must be compatible with the scalar type of the given expression (\\c Derived::Scalar).\n    *\n    * \\sa ArrayBase::pow()\n    *\n    * \\relates ArrayBase\n    */\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n  template<typename Derived,typename ScalarExponent>\n  inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >\n  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);\n#else\n  template <typename Derived,typename ScalarExponent>\n  EIGEN_DEVICE_FUNC inline\n  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(\n    const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<typename Derived::Scalar\n                                                 EIGEN_COMMA ScalarExponent EIGEN_COMMA\n                                                 EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent)>::type,pow))\n  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent)\n  {\n    typedef typename internal::promote_scalar_arg<typename Derived::Scalar,ScalarExponent,\n                                                  EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent)>::type PromotedExponent;\n    return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedExponent,pow)(x.derived(),\n           typename internal::plain_constant_type<Derived,PromotedExponent>::type(x.derived().rows(), x.derived().cols(), internal::scalar_constant_op<PromotedExponent>(exponent)));\n  }\n#endif\n\n  /** \\returns an expression of the coefficient-wise power of \\a x to the given array of \\a exponents.\n    *\n    * This function computes the coefficient-wise power.\n    *\n    * Example: \\include Cwise_array_power_array.cpp\n    * Output: \\verbinclude Cwise_array_power_array.out\n    *\n    * \\sa ArrayBase::pow()\n    *\n    * \\relates ArrayBase\n    */\n  template<typename Derived,typename ExponentDerived>\n  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>\n  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)\n  {\n    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(\n      x.derived(),\n      exponents.derived()\n    );\n  }\n\n  /** \\returns an expression of the coefficient-wise power of the scalar \\a x to the given array of \\a exponents.\n    *\n    * This function computes the coefficient-wise power between a scalar and an array of exponents.\n    *\n    * \\tparam Scalar is the scalar type of \\a x. It must be compatible with the scalar type of the given array expression (\\c Derived::Scalar).\n    *\n    * Example: \\include Cwise_scalar_power_array.cpp\n    * Output: \\verbinclude Cwise_scalar_power_array.out\n    *\n    * \\sa ArrayBase::pow()\n    *\n    * \\relates ArrayBase\n    */\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n  template<typename Scalar,typename Derived>\n  inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>\n  pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);\n#else\n  template <typename Scalar, typename Derived>\n  EIGEN_DEVICE_FUNC inline\n  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(\n    const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<typename Derived::Scalar\n                                                 EIGEN_COMMA Scalar EIGEN_COMMA\n                                                 EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type,Derived,pow))\n  pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents) {\n    typedef typename internal::promote_scalar_arg<typename Derived::Scalar,Scalar,\n                                                  EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar)>::type PromotedScalar;\n    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedScalar,Derived,pow)(\n           typename internal::plain_constant_type<Derived,PromotedScalar>::type(exponents.derived().rows(), exponents.derived().cols(), internal::scalar_constant_op<PromotedScalar>(x)), exponents.derived());\n  }\n#endif\n\n\n  namespace internal\n  {\n    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)\n    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)\n    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)\n  }\n}\n\n// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)\n\n#endif // EIGEN_GLOBAL_FUNCTIONS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/IO.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_IO_H\n#define EIGEN_IO_H\n\nnamespace Eigen { \n\nenum { DontAlignCols = 1 };\nenum { StreamPrecision = -1,\n       FullPrecision = -2 };\n\nnamespace internal {\ntemplate<typename Derived>\nstd::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt);\n}\n\n/** \\class IOFormat\n  * \\ingroup Core_Module\n  *\n  * \\brief Stores a set of parameters controlling the way matrices are printed\n  *\n  * List of available parameters:\n  *  - \\b precision number of digits for floating point values, or one of the special constants \\c StreamPrecision and \\c FullPrecision.\n  *                 The default is the special value \\c StreamPrecision which means to use the\n  *                 stream's own precision setting, as set for instance using \\c cout.precision(3). The other special value\n  *                 \\c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point\n  *                 type.\n  *  - \\b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \\c DontAlignCols which\n  *             allows to disable the alignment of columns, resulting in faster code.\n  *  - \\b coeffSeparator string printed between two coefficients of the same row\n  *  - \\b rowSeparator string printed between two rows\n  *  - \\b rowPrefix string printed at the beginning of each row\n  *  - \\b rowSuffix string printed at the end of each row\n  *  - \\b matPrefix string printed at the beginning of the matrix\n  *  - \\b matSuffix string printed at the end of the matrix\n  *  - \\b fill character printed to fill the empty space in aligned columns\n  *\n  * Example: \\include IOFormat.cpp\n  * Output: \\verbinclude IOFormat.out\n  *\n  * \\sa DenseBase::format(), class WithFormat\n  */\nstruct IOFormat\n{\n  /** Default constructor, see class IOFormat for the meaning of the parameters */\n  IOFormat(int _precision = StreamPrecision, int _flags = 0,\n    const std::string& _coeffSeparator = \" \",\n    const std::string& _rowSeparator = \"\\n\", const std::string& _rowPrefix=\"\", const std::string& _rowSuffix=\"\",\n    const std::string& _matPrefix=\"\", const std::string& _matSuffix=\"\", const char _fill=' ')\n  : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),\n    rowSpacer(\"\"), coeffSeparator(_coeffSeparator), fill(_fill), precision(_precision), flags(_flags)\n  {\n    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline\n    // don't add rowSpacer if columns are not to be aligned\n    if((flags & DontAlignCols))\n      return;\n    int i = int(matSuffix.length())-1;\n    while (i>=0 && matSuffix[i]!='\\n')\n    {\n      rowSpacer += ' ';\n      i--;\n    }\n  }\n  std::string matPrefix, matSuffix;\n  std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer;\n  std::string coeffSeparator;\n  char fill;\n  int precision;\n  int flags;\n};\n\n/** \\class WithFormat\n  * \\ingroup Core_Module\n  *\n  * \\brief Pseudo expression providing matrix output with given format\n  *\n  * \\tparam ExpressionType the type of the object on which IO stream operations are performed\n  *\n  * This class represents an expression with stream operators controlled by a given IOFormat.\n  * It is the return type of DenseBase::format()\n  * and most of the time this is the only way it is used.\n  *\n  * See class IOFormat for some examples.\n  *\n  * \\sa DenseBase::format(), class IOFormat\n  */\ntemplate<typename ExpressionType>\nclass WithFormat\n{\n  public:\n\n    WithFormat(const ExpressionType& matrix, const IOFormat& format)\n      : m_matrix(matrix), m_format(format)\n    {}\n\n    friend std::ostream & operator << (std::ostream & s, const WithFormat& wf)\n    {\n      return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);\n    }\n\n  protected:\n    typename ExpressionType::Nested m_matrix;\n    IOFormat m_format;\n};\n\nnamespace internal {\n\n// NOTE: This helper is kept for backward compatibility with previous code specializing\n//       this internal::significant_decimals_impl structure. In the future we should directly\n//       call digits10() which has been introduced in July 2016 in 3.3.\ntemplate<typename Scalar>\nstruct significant_decimals_impl\n{\n  static inline int run()\n  {\n    return NumTraits<Scalar>::digits10();\n  }\n};\n\n/** \\internal\n  * print the matrix \\a _m to the output stream \\a s using the output format \\a fmt */\ntemplate<typename Derived>\nstd::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)\n{\n  using internal::is_same;\n  using internal::conditional;\n\n  if(_m.size() == 0)\n  {\n    s << fmt.matPrefix << fmt.matSuffix;\n    return s;\n  }\n  \n  typename Derived::Nested m = _m;\n  typedef typename Derived::Scalar Scalar;\n  typedef typename\n      conditional<\n          is_same<Scalar, char>::value ||\n            is_same<Scalar, unsigned char>::value ||\n            is_same<Scalar, numext::int8_t>::value ||\n            is_same<Scalar, numext::uint8_t>::value,\n          int,\n          typename conditional<\n              is_same<Scalar, std::complex<char> >::value ||\n                is_same<Scalar, std::complex<unsigned char> >::value ||\n                is_same<Scalar, std::complex<numext::int8_t> >::value ||\n                is_same<Scalar, std::complex<numext::uint8_t> >::value,\n              std::complex<int>,\n              const Scalar&\n            >::type\n        >::type PrintType;\n\n  Index width = 0;\n\n  std::streamsize explicit_precision;\n  if(fmt.precision == StreamPrecision)\n  {\n    explicit_precision = 0;\n  }\n  else if(fmt.precision == FullPrecision)\n  {\n    if (NumTraits<Scalar>::IsInteger)\n    {\n      explicit_precision = 0;\n    }\n    else\n    {\n      explicit_precision = significant_decimals_impl<Scalar>::run();\n    }\n  }\n  else\n  {\n    explicit_precision = fmt.precision;\n  }\n\n  std::streamsize old_precision = 0;\n  if(explicit_precision) old_precision = s.precision(explicit_precision);\n\n  bool align_cols = !(fmt.flags & DontAlignCols);\n  if(align_cols)\n  {\n    // compute the largest width\n    for(Index j = 0; j < m.cols(); ++j)\n      for(Index i = 0; i < m.rows(); ++i)\n      {\n        std::stringstream sstr;\n        sstr.copyfmt(s);\n        sstr << static_cast<PrintType>(m.coeff(i,j));\n        width = std::max<Index>(width, Index(sstr.str().length()));\n      }\n  }\n  std::streamsize old_width = s.width();\n  char old_fill_character = s.fill();\n  s << fmt.matPrefix;\n  for(Index i = 0; i < m.rows(); ++i)\n  {\n    if (i)\n      s << fmt.rowSpacer;\n    s << fmt.rowPrefix;\n    if(width) {\n      s.fill(fmt.fill);\n      s.width(width);\n    }\n    s << static_cast<PrintType>(m.coeff(i, 0));\n    for(Index j = 1; j < m.cols(); ++j)\n    {\n      s << fmt.coeffSeparator;\n      if(width) {\n        s.fill(fmt.fill);\n        s.width(width);\n      }\n      s << static_cast<PrintType>(m.coeff(i, j));\n    }\n    s << fmt.rowSuffix;\n    if( i < m.rows() - 1)\n      s << fmt.rowSeparator;\n  }\n  s << fmt.matSuffix;\n  if(explicit_precision) s.precision(old_precision);\n  if(width) {\n    s.fill(old_fill_character);\n    s.width(old_width);\n  }\n  return s;\n}\n\n} // end namespace internal\n\n/** \\relates DenseBase\n  *\n  * Outputs the matrix, to the given stream.\n  *\n  * If you wish to print the matrix with a format different than the default, use DenseBase::format().\n  *\n  * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers.\n  * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters.\n  *\n  * \\sa DenseBase::format()\n  */\ntemplate<typename Derived>\nstd::ostream & operator <<\n(std::ostream & s,\n const DenseBase<Derived> & m)\n{\n  return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_IO_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/IndexedView.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_INDEXED_VIEW_H\n#define EIGEN_INDEXED_VIEW_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename XprType, typename RowIndices, typename ColIndices>\nstruct traits<IndexedView<XprType, RowIndices, ColIndices> >\n : traits<XprType>\n{\n  enum {\n    RowsAtCompileTime = int(array_size<RowIndices>::value),\n    ColsAtCompileTime = int(array_size<ColIndices>::value),\n    MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : Dynamic,\n    MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : Dynamic,\n\n    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,\n    IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1\n               : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0\n               : XprTypeIsRowMajor,\n\n    RowIncr = int(get_compile_time_incr<RowIndices>::value),\n    ColIncr = int(get_compile_time_incr<ColIndices>::value),\n    InnerIncr = IsRowMajor ? ColIncr : RowIncr,\n    OuterIncr = IsRowMajor ? RowIncr : ColIncr,\n\n    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),\n    XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time<XprType>::ret) : int(outer_stride_at_compile_time<XprType>::ret),\n    XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret),\n\n    InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime,\n    IsBlockAlike = InnerIncr==1 && OuterIncr==1,\n    IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,typename conditional<XprTypeIsRowMajor,ColIndices,RowIndices>::type>::value,\n\n    InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr,\n    OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr,\n\n    ReturnAsScalar = is_same<RowIndices,SingleRange>::value && is_same<ColIndices,SingleRange>::value,\n    ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,\n    ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock),\n\n    // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag,\n    // but this is too strict regarding negative strides...\n    DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0,\n    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,\n    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,\n    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,\n    Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask )) | FlagsLvalueBit | FlagsRowMajorBit | FlagsLinearAccessBit\n  };\n\n  typedef Block<XprType,RowsAtCompileTime,ColsAtCompileTime,IsInnerPannel> BlockType;\n};\n\n}\n\ntemplate<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>\nclass IndexedViewImpl;\n\n\n/** \\class IndexedView\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices\n  *\n  * \\tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns\n  * \\tparam RowIndices the type of the object defining the sequence of row indices\n  * \\tparam ColIndices the type of the object defining the sequence of column indices\n  *\n  * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection\n  * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \\f$ \\{r_0,r_1,..r_{m-1}\\} \\f$\n  * and column indices \\f$ \\{c_0,c_1,..c_{n-1} \\}\\f$. Let \\f$ A \\f$  be the nested matrix, then the resulting matrix \\f$ B \\f$ has \\c m\n  * rows and \\c n columns, and its entries are given by: \\f$ B(i,j) = A(r_i,c_j) \\f$.\n  *\n  * The \\c RowIndices and \\c ColIndices types must be compatible with the following API:\n  * \\code\n  * <integral type> operator[](Index) const;\n  * Index size() const;\n  * \\endcode\n  *\n  * Typical supported types thus include:\n  *  - std::vector<int>\n  *  - std::valarray<int>\n  *  - std::array<int>\n  *  - Plain C arrays: int[N]\n  *  - Eigen::ArrayXi\n  *  - decltype(ArrayXi::LinSpaced(...))\n  *  - Any view/expressions of the previous types\n  *  - Eigen::ArithmeticSequence\n  *  - Eigen::internal::AllRange      (helper for Eigen::all)\n  *  - Eigen::internal::SingleRange  (helper for single index)\n  *  - etc.\n  *\n  * In typical usages of %Eigen, this class should never be used directly. It is the return type of\n  * DenseBase::operator()(const RowIndices&, const ColIndices&).\n  *\n  * \\sa class Block\n  */\ntemplate<typename XprType, typename RowIndices, typename ColIndices>\nclass IndexedView : public IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>\n{\npublic:\n  typedef typename IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>::Base Base;\n  EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView)\n  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)\n\n  typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;\n  typedef typename internal::remove_all<XprType>::type NestedExpression;\n\n  template<typename T0, typename T1>\n  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices)\n    : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices)\n  {}\n\n  /** \\returns number of rows */\n  Index rows() const { return internal::size(m_rowIndices); }\n\n  /** \\returns number of columns */\n  Index cols() const { return internal::size(m_colIndices); }\n\n  /** \\returns the nested expression */\n  const typename internal::remove_all<XprType>::type&\n  nestedExpression() const { return m_xpr; }\n\n  /** \\returns the nested expression */\n  typename internal::remove_reference<XprType>::type&\n  nestedExpression() { return m_xpr; }\n\n  /** \\returns a const reference to the object storing/generating the row indices */\n  const RowIndices& rowIndices() const { return m_rowIndices; }\n\n  /** \\returns a const reference to the object storing/generating the column indices */\n  const ColIndices& colIndices() const { return m_colIndices; }\n\nprotected:\n  MatrixTypeNested m_xpr;\n  RowIndices m_rowIndices;\n  ColIndices m_colIndices;\n};\n\n\n// Generic API dispatcher\ntemplate<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>\nclass IndexedViewImpl\n  : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type\n{\npublic:\n  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type Base;\n};\n\nnamespace internal {\n\n\ntemplate<typename ArgType, typename RowIndices, typename ColIndices>\nstruct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>\n  : evaluator_base<IndexedView<ArgType, RowIndices, ColIndices> >\n{\n  typedef IndexedView<ArgType, RowIndices, ColIndices> XprType;\n\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */,\n\n    FlagsLinearAccessBit = (traits<XprType>::RowsAtCompileTime == 1 || traits<XprType>::ColsAtCompileTime == 1) ? LinearAccessBit : 0,\n\n    FlagsRowMajorBit = traits<XprType>::FlagsRowMajorBit, \n\n    Flags = (evaluator<ArgType>::Flags & (HereditaryBits & ~RowMajorBit /*| LinearAccessBit | DirectAccessBit*/)) | FlagsLinearAccessBit | FlagsRowMajorBit,\n\n    Alignment = 0\n  };\n\n  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeff(Index row, Index col) const\n  {\n    return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index row, Index col)\n  {\n    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Scalar& coeffRef(Index index)\n  {\n    EIGEN_STATIC_ASSERT_LVALUE(XprType)\n    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;\n    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;\n    return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const Scalar& coeffRef(Index index) const\n  {\n    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;\n    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;\n    return m_argImpl.coeffRef( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const CoeffReturnType coeff(Index index) const\n  {\n    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;\n    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;\n    return m_argImpl.coeff( m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);\n  }\n\nprotected:\n\n  evaluator<ArgType> m_argImpl;\n  const XprType& m_xpr;\n\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_INDEXED_VIEW_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Inverse.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014-2019 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_INVERSE_H\n#define EIGEN_INVERSE_H\n\nnamespace Eigen {\n\ntemplate<typename XprType,typename StorageKind> class InverseImpl;\n\nnamespace internal {\n\ntemplate<typename XprType>\nstruct traits<Inverse<XprType> >\n  : traits<typename XprType::PlainObject>\n{\n  typedef typename XprType::PlainObject PlainObject;\n  typedef traits<PlainObject> BaseTraits;\n  enum {\n    Flags = BaseTraits::Flags & RowMajorBit\n  };\n};\n\n} // end namespace internal\n\n/** \\class Inverse\n  *\n  * \\brief Expression of the inverse of another expression\n  *\n  * \\tparam XprType the type of the expression we are taking the inverse\n  *\n  * This class represents an abstract expression of A.inverse()\n  * and most of the time this is the only way it is used.\n  *\n  */\ntemplate<typename XprType>\nclass Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>\n{\npublic:\n  typedef typename XprType::StorageIndex StorageIndex;\n  typedef typename XprType::Scalar                            Scalar;\n  typedef typename internal::ref_selector<XprType>::type      XprTypeNested;\n  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;\n  typedef typename internal::ref_selector<Inverse>::type Nested;\n  typedef typename internal::remove_all<XprType>::type NestedExpression;\n\n  explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)\n    : m_xpr(xpr)\n  {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR  Index rows() const EIGEN_NOEXCEPT { return m_xpr.cols(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR  Index cols() const EIGEN_NOEXCEPT { return m_xpr.rows(); }\n\n  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }\n\nprotected:\n  XprTypeNested m_xpr;\n};\n\n// Generic API dispatcher\ntemplate<typename XprType, typename StorageKind>\nclass InverseImpl\n  : public internal::generic_xpr_base<Inverse<XprType> >::type\n{\npublic:\n  typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;\n  typedef typename XprType::Scalar Scalar;\nprivate:\n\n  Scalar coeff(Index row, Index col) const;\n  Scalar coeff(Index i) const;\n};\n\nnamespace internal {\n\n/** \\internal\n  * \\brief Default evaluator for Inverse expression.\n  *\n  * This default evaluator for Inverse expression simply evaluate the inverse into a temporary\n  * by a call to internal::call_assignment_no_alias.\n  * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for\n  * there own nested expression.\n  *\n  * \\sa class Inverse\n  */\ntemplate<typename ArgType>\nstruct unary_evaluator<Inverse<ArgType> >\n  : public evaluator<typename Inverse<ArgType>::PlainObject>\n{\n  typedef Inverse<ArgType> InverseType;\n  typedef typename InverseType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  enum { Flags = Base::Flags | EvalBeforeNestingBit };\n\n  unary_evaluator(const InverseType& inv_xpr)\n    : m_result(inv_xpr.rows(), inv_xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    internal::call_assignment_no_alias(m_result, inv_xpr);\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_INVERSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Map.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MAP_H\n#define EIGEN_MAP_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename PlainObjectType, int MapOptions, typename StrideType>\nstruct traits<Map<PlainObjectType, MapOptions, StrideType> >\n  : public traits<PlainObjectType>\n{\n  typedef traits<PlainObjectType> TraitsBase;\n  enum {\n    PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)\n                             ? PlainObjectType::ColsAtCompileTime\n                             : PlainObjectType::RowsAtCompileTime,\n\n    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0\n                             ? int(PlainObjectType::InnerStrideAtCompileTime)\n                             : int(StrideType::InnerStrideAtCompileTime),\n    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0\n                             ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic\n                                ? Dynamic\n                                : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))\n                             : int(StrideType::OuterStrideAtCompileTime),\n    Alignment = int(MapOptions)&int(AlignedMask),\n    Flags0 = TraitsBase::Flags & (~NestByRefBit),\n    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)\n  };\nprivate:\n  enum { Options }; // Expressions don't have Options\n};\n}\n\n/** \\class Map\n  * \\ingroup Core_Module\n  *\n  * \\brief A matrix or vector expression mapping an existing array of data.\n  *\n  * \\tparam PlainObjectType the equivalent matrix type of the mapped data\n  * \\tparam MapOptions specifies the pointer alignment in bytes. It can be: \\c #Aligned128, \\c #Aligned64, \\c #Aligned32, \\c #Aligned16, \\c #Aligned8 or \\c #Unaligned.\n  *                The default is \\c #Unaligned.\n  * \\tparam StrideType optionally specifies strides. By default, Map assumes the memory layout\n  *                   of an ordinary, contiguous array. This can be overridden by specifying strides.\n  *                   The type passed here must be a specialization of the Stride template, see examples below.\n  *\n  * This class represents a matrix or vector expression mapping an existing array of data.\n  * It can be used to let Eigen interface without any overhead with non-Eigen data structures,\n  * such as plain C arrays or structures from other libraries. By default, it assumes that the\n  * data is laid out contiguously in memory. You can however override this by explicitly specifying\n  * inner and outer strides.\n  *\n  * Here's an example of simply mapping a contiguous array as a \\ref TopicStorageOrders \"column-major\" matrix:\n  * \\include Map_simple.cpp\n  * Output: \\verbinclude Map_simple.out\n  *\n  * If you need to map non-contiguous arrays, you can do so by specifying strides:\n  *\n  * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer\n  * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time\n  * fixed value.\n  * \\include Map_inner_stride.cpp\n  * Output: \\verbinclude Map_inner_stride.out\n  *\n  * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping\n  * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.\n  * Here, we're specifying the outer stride as a runtime parameter. Note that here \\c OuterStride<> is\n  * a short version of \\c OuterStride<Dynamic> because the default template parameter of OuterStride\n  * is  \\c Dynamic\n  * \\include Map_outer_stride.cpp\n  * Output: \\verbinclude Map_outer_stride.out\n  *\n  * For more details and for an example of specifying both an inner and an outer stride, see class Stride.\n  *\n  * \\b Tip: to change the array of data mapped by a Map object, you can use the C++\n  * placement new syntax:\n  *\n  * Example: \\include Map_placement_new.cpp\n  * Output: \\verbinclude Map_placement_new.out\n  *\n  * This class is the return type of PlainObjectBase::Map() but can also be used directly.\n  *\n  * \\sa PlainObjectBase::Map(), \\ref TopicStorageOrders\n  */\ntemplate<typename PlainObjectType, int MapOptions, typename StrideType> class Map\n  : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >\n{\n  public:\n\n    typedef MapBase<Map> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Map)\n\n    typedef typename Base::PointerType PointerType;\n    typedef PointerType PointerArgType;\n    EIGEN_DEVICE_FUNC\n    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const\n    {\n      return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const\n    {\n      return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()\n           : internal::traits<Map>::OuterStrideAtCompileTime != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)\n           : IsVectorAtCompileTime ? (this->size() * innerStride())\n           : int(Flags)&RowMajorBit ? (this->cols() * innerStride())\n           : (this->rows() * innerStride());\n    }\n\n    /** Constructor in the fixed-size case.\n      *\n      * \\param dataPtr pointer to the array to map\n      * \\param stride optional Stride object, passing the strides.\n      */\n    EIGEN_DEVICE_FUNC\n    explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())\n      : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)\n    {\n      PlainObjectType::Base::_check_template_params();\n    }\n\n    /** Constructor in the dynamic-size vector case.\n      *\n      * \\param dataPtr pointer to the array to map\n      * \\param size the size of the vector expression\n      * \\param stride optional Stride object, passing the strides.\n      */\n    EIGEN_DEVICE_FUNC\n    inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())\n      : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)\n    {\n      PlainObjectType::Base::_check_template_params();\n    }\n\n    /** Constructor in the dynamic-size matrix case.\n      *\n      * \\param dataPtr pointer to the array to map\n      * \\param rows the number of rows of the matrix expression\n      * \\param cols the number of columns of the matrix expression\n      * \\param stride optional Stride object, passing the strides.\n      */\n    EIGEN_DEVICE_FUNC\n    inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())\n      : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)\n    {\n      PlainObjectType::Base::_check_template_params();\n    }\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)\n\n  protected:\n    StrideType m_stride;\n};\n\n\n} // end namespace Eigen\n\n#endif // EIGEN_MAP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/MapBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MAPBASE_H\n#define EIGEN_MAPBASE_H\n\n#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \\\n      EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \\\n                          YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)\n\nnamespace Eigen {\n\n/** \\ingroup Core_Module\n  *\n  * \\brief Base class for dense Map and Block expression with direct access\n  *\n  * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense\n  * Map and Block objects with direct access.\n  * Typical users do not have to directly deal with this class.\n  *\n  * This class can be extended by through the macro plugin \\c EIGEN_MAPBASE_PLUGIN.\n  * See \\link TopicCustomizing_Plugins customizing Eigen \\endlink for details.\n  *\n  * The \\c Derived class has to provide the following two methods describing the memory layout:\n  *  \\code Index innerStride() const; \\endcode\n  *  \\code Index outerStride() const; \\endcode\n  *\n  * \\sa class Map, class Block\n  */\ntemplate<typename Derived> class MapBase<Derived, ReadOnlyAccessors>\n  : public internal::dense_xpr_base<Derived>::type\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<Derived>::type Base;\n    enum {\n      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,\n      InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,\n      SizeAtCompileTime = Base::SizeAtCompileTime\n    };\n\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef typename internal::conditional<\n                         bool(internal::is_lvalue<Derived>::value),\n                         Scalar *,\n                         const Scalar *>::type\n                     PointerType;\n\n    using Base::derived;\n//    using Base::RowsAtCompileTime;\n//    using Base::ColsAtCompileTime;\n//    using Base::SizeAtCompileTime;\n    using Base::MaxRowsAtCompileTime;\n    using Base::MaxColsAtCompileTime;\n    using Base::MaxSizeAtCompileTime;\n    using Base::IsVectorAtCompileTime;\n    using Base::Flags;\n    using Base::IsRowMajor;\n\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::coeff;\n    using Base::coeffRef;\n    using Base::lazyAssign;\n    using Base::eval;\n\n    using Base::innerStride;\n    using Base::outerStride;\n    using Base::rowStride;\n    using Base::colStride;\n\n    // bug 217 - compile error on ICC 11.1\n    using Base::operator=;\n\n    typedef typename Base::CoeffReturnType CoeffReturnType;\n\n    /** \\copydoc DenseBase::rows() */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_rows.value(); }\n    /** \\copydoc DenseBase::cols() */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_cols.value(); }\n\n    /** Returns a pointer to the first coefficient of the matrix or vector.\n      *\n      * \\note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().\n      *\n      * \\sa innerStride(), outerStride()\n      */\n    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }\n\n    /** \\copydoc PlainObjectBase::coeff(Index,Index) const */\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeff(Index rowId, Index colId) const\n    {\n      return m_data[colId * colStride() + rowId * rowStride()];\n    }\n\n    /** \\copydoc PlainObjectBase::coeff(Index) const */\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeff(Index index) const\n    {\n      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)\n      return m_data[index * innerStride()];\n    }\n\n    /** \\copydoc PlainObjectBase::coeffRef(Index,Index) const */\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index rowId, Index colId) const\n    {\n      return this->m_data[colId * colStride() + rowId * rowStride()];\n    }\n\n    /** \\copydoc PlainObjectBase::coeffRef(Index) const */\n    EIGEN_DEVICE_FUNC\n    inline const Scalar& coeffRef(Index index) const\n    {\n      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)\n      return this->m_data[index * innerStride()];\n    }\n\n    /** \\internal */\n    template<int LoadMode>\n    inline PacketScalar packet(Index rowId, Index colId) const\n    {\n      return internal::ploadt<PacketScalar, LoadMode>\n               (m_data + (colId * colStride() + rowId * rowStride()));\n    }\n\n    /** \\internal */\n    template<int LoadMode>\n    inline PacketScalar packet(Index index) const\n    {\n      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)\n      return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());\n    }\n\n    /** \\internal Constructor for fixed size matrices or vectors */\n    EIGEN_DEVICE_FUNC\n    explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)\n    {\n      EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)\n      checkSanity<Derived>();\n    }\n\n    /** \\internal Constructor for dynamically sized vectors */\n    EIGEN_DEVICE_FUNC\n    inline MapBase(PointerType dataPtr, Index vecSize)\n            : m_data(dataPtr),\n              m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),\n              m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n      eigen_assert(vecSize >= 0);\n      eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);\n      checkSanity<Derived>();\n    }\n\n    /** \\internal Constructor for dynamically sized matrices */\n    EIGEN_DEVICE_FUNC\n    inline MapBase(PointerType dataPtr, Index rows, Index cols)\n            : m_data(dataPtr), m_rows(rows), m_cols(cols)\n    {\n      eigen_assert( (dataPtr == 0)\n              || (   rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)\n                  && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));\n      checkSanity<Derived>();\n    }\n\n    #ifdef EIGEN_MAPBASE_PLUGIN\n    #include EIGEN_MAPBASE_PLUGIN\n    #endif\n\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)\n\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const\n    {\n#if EIGEN_MAX_ALIGN_BYTES>0\n      // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:\n      const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);\n      EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);\n      eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)\n                    || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && \"data is not aligned\");\n#endif\n    }\n\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const\n    {}\n\n    PointerType m_data;\n    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;\n    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;\n};\n\n/** \\ingroup Core_Module\n  *\n  * \\brief Base class for non-const dense Map and Block expression with direct access\n  *\n  * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of\n  * dense Map and Block objects with direct access.\n  * It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.\n  *\n  * \\sa class Map, class Block\n  */\ntemplate<typename Derived> class MapBase<Derived, WriteAccessors>\n  : public MapBase<Derived, ReadOnlyAccessors>\n{\n    typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;\n  public:\n\n    typedef MapBase<Derived, ReadOnlyAccessors> Base;\n\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::PacketScalar PacketScalar;\n    typedef typename Base::StorageIndex StorageIndex;\n    typedef typename Base::PointerType PointerType;\n\n    using Base::derived;\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::coeff;\n    using Base::coeffRef;\n\n    using Base::innerStride;\n    using Base::outerStride;\n    using Base::rowStride;\n    using Base::colStride;\n\n    typedef typename internal::conditional<\n                    internal::is_lvalue<Derived>::value,\n                    Scalar,\n                    const Scalar\n                  >::type ScalarWithConstIfNotLvalue;\n\n    EIGEN_DEVICE_FUNC\n    inline const Scalar* data() const { return this->m_data; }\n    EIGEN_DEVICE_FUNC\n    inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error\n\n    EIGEN_DEVICE_FUNC\n    inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)\n    {\n      return this->m_data[col * colStride() + row * rowStride()];\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline ScalarWithConstIfNotLvalue& coeffRef(Index index)\n    {\n      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)\n      return this->m_data[index * innerStride()];\n    }\n\n    template<int StoreMode>\n    inline void writePacket(Index row, Index col, const PacketScalar& val)\n    {\n      internal::pstoret<Scalar, PacketScalar, StoreMode>\n               (this->m_data + (col * colStride() + row * rowStride()), val);\n    }\n\n    template<int StoreMode>\n    inline void writePacket(Index index, const PacketScalar& val)\n    {\n      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)\n      internal::pstoret<Scalar, PacketScalar, StoreMode>\n                (this->m_data + index * innerStride(), val);\n    }\n\n    EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}\n    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}\n    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}\n\n    EIGEN_DEVICE_FUNC\n    Derived& operator=(const MapBase& other)\n    {\n      ReadOnlyMapBase::Base::operator=(other);\n      return derived();\n    }\n\n    // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,\n    // see bugs 821 and 920.\n    using ReadOnlyMapBase::Base::operator=;\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)\n};\n\n#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS\n\n} // end namespace Eigen\n\n#endif // EIGEN_MAPBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATHFUNCTIONS_H\n#define EIGEN_MATHFUNCTIONS_H\n\n// TODO this should better be moved to NumTraits\n// Source: WolframAlpha\n#define EIGEN_PI    3.141592653589793238462643383279502884197169399375105820974944592307816406L\n#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L\n#define EIGEN_LN2   0.693147180559945309417232121458176568075500134360255254120680009493393621L\n\nnamespace Eigen {\n\n// On WINCE, std::abs is defined for int only, so let's defined our own overloads:\n// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.\n#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500\nlong        abs(long        x) { return (labs(x));  }\ndouble      abs(double      x) { return (fabs(x));  }\nfloat       abs(float       x) { return (fabsf(x)); }\nlong double abs(long double x) { return (fabsl(x)); }\n#endif\n\nnamespace internal {\n\n/** \\internal \\class global_math_functions_filtering_base\n  *\n  * What it does:\n  * Defines a typedef 'type' as follows:\n  * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then\n  *   global_math_functions_filtering_base<T>::type is a typedef for it.\n  * - otherwise, global_math_functions_filtering_base<T>::type is a typedef for T.\n  *\n  * How it's used:\n  * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions.\n  * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know\n  * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase<Derived>.\n  * So we must make sure to use sin_impl<ArrayBase<Derived> > and not sin_impl<Derived>, otherwise our partial specialization\n  * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it.\n  *\n  * How it's implemented:\n  * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace\n  * the typename dummy by an integer template parameter, it doesn't work anymore!\n  */\n\ntemplate<typename T, typename dummy = void>\nstruct global_math_functions_filtering_base\n{\n  typedef T type;\n};\n\ntemplate<typename T> struct always_void { typedef void type; };\n\ntemplate<typename T>\nstruct global_math_functions_filtering_base\n  <T,\n   typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type\n  >\n{\n  typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;\n};\n\n#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>\n#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type\n\n/****************************************************************************\n* Implementation of real                                                 *\n****************************************************************************/\n\ntemplate<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>\nstruct real_default_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    return x;\n  }\n};\n\ntemplate<typename Scalar>\nstruct real_default_impl<Scalar,true>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    using std::real;\n    return real(x);\n  }\n};\n\ntemplate<typename Scalar> struct real_impl : real_default_impl<Scalar> {};\n\n#if defined(EIGEN_GPU_COMPILE_PHASE)\ntemplate<typename T>\nstruct real_impl<std::complex<T> >\n{\n  typedef T RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline T run(const std::complex<T>& x)\n  {\n    return x.real();\n  }\n};\n#endif\n\ntemplate<typename Scalar>\nstruct real_retval\n{\n  typedef typename NumTraits<Scalar>::Real type;\n};\n\n/****************************************************************************\n* Implementation of imag                                                 *\n****************************************************************************/\n\ntemplate<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>\nstruct imag_default_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar&)\n  {\n    return RealScalar(0);\n  }\n};\n\ntemplate<typename Scalar>\nstruct imag_default_impl<Scalar,true>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    using std::imag;\n    return imag(x);\n  }\n};\n\ntemplate<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};\n\n#if defined(EIGEN_GPU_COMPILE_PHASE)\ntemplate<typename T>\nstruct imag_impl<std::complex<T> >\n{\n  typedef T RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline T run(const std::complex<T>& x)\n  {\n    return x.imag();\n  }\n};\n#endif\n\ntemplate<typename Scalar>\nstruct imag_retval\n{\n  typedef typename NumTraits<Scalar>::Real type;\n};\n\n/****************************************************************************\n* Implementation of real_ref                                             *\n****************************************************************************/\n\ntemplate<typename Scalar>\nstruct real_ref_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar& run(Scalar& x)\n  {\n    return reinterpret_cast<RealScalar*>(&x)[0];\n  }\n  EIGEN_DEVICE_FUNC\n  static inline const RealScalar& run(const Scalar& x)\n  {\n    return reinterpret_cast<const RealScalar*>(&x)[0];\n  }\n};\n\ntemplate<typename Scalar>\nstruct real_ref_retval\n{\n  typedef typename NumTraits<Scalar>::Real & type;\n};\n\n/****************************************************************************\n* Implementation of imag_ref                                             *\n****************************************************************************/\n\ntemplate<typename Scalar, bool IsComplex>\nstruct imag_ref_default_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar& run(Scalar& x)\n  {\n    return reinterpret_cast<RealScalar*>(&x)[1];\n  }\n  EIGEN_DEVICE_FUNC\n  static inline const RealScalar& run(const Scalar& x)\n  {\n    return reinterpret_cast<RealScalar*>(&x)[1];\n  }\n};\n\ntemplate<typename Scalar>\nstruct imag_ref_default_impl<Scalar, false>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline Scalar run(Scalar&)\n  {\n    return Scalar(0);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline const Scalar run(const Scalar&)\n  {\n    return Scalar(0);\n  }\n};\n\ntemplate<typename Scalar>\nstruct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};\n\ntemplate<typename Scalar>\nstruct imag_ref_retval\n{\n  typedef typename NumTraits<Scalar>::Real & type;\n};\n\n/****************************************************************************\n* Implementation of conj                                                 *\n****************************************************************************/\n\ntemplate<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>\nstruct conj_default_impl\n{\n  EIGEN_DEVICE_FUNC\n  static inline Scalar run(const Scalar& x)\n  {\n    return x;\n  }\n};\n\ntemplate<typename Scalar>\nstruct conj_default_impl<Scalar,true>\n{\n  EIGEN_DEVICE_FUNC\n  static inline Scalar run(const Scalar& x)\n  {\n    using std::conj;\n    return conj(x);\n  }\n};\n\ntemplate<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>\nstruct conj_impl : conj_default_impl<Scalar, IsComplex> {};\n\ntemplate<typename Scalar>\nstruct conj_retval\n{\n  typedef Scalar type;\n};\n\n/****************************************************************************\n* Implementation of abs2                                                 *\n****************************************************************************/\n\ntemplate<typename Scalar,bool IsComplex>\nstruct abs2_impl_default\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    return x*x;\n  }\n};\n\ntemplate<typename Scalar>\nstruct abs2_impl_default<Scalar, true> // IsComplex\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    return x.real()*x.real() + x.imag()*x.imag();\n  }\n};\n\ntemplate<typename Scalar>\nstruct abs2_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);\n  }\n};\n\ntemplate<typename Scalar>\nstruct abs2_retval\n{\n  typedef typename NumTraits<Scalar>::Real type;\n};\n\n/****************************************************************************\n* Implementation of sqrt/rsqrt                                             *\n****************************************************************************/\n\ntemplate<typename Scalar>\nstruct sqrt_impl\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_ALWAYS_INLINE Scalar run(const Scalar& x)\n  {\n    EIGEN_USING_STD(sqrt);\n    return sqrt(x);\n  }\n};\n\n// Complex sqrt defined in MathFunctionsImpl.h.\ntemplate<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x);\n\n// Custom implementation is faster than `std::sqrt`, works on\n// GPU, and correctly handles special cases (unlike MSVC).\ntemplate<typename T>\nstruct sqrt_impl<std::complex<T> >\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x)\n  {\n    return complex_sqrt<T>(x);\n  }\n};\n\ntemplate<typename Scalar>\nstruct sqrt_retval\n{\n  typedef Scalar type;\n};\n\n// Default implementation relies on numext::sqrt, at bottom of file.\ntemplate<typename T>\nstruct rsqrt_impl;\n\n// Complex rsqrt defined in MathFunctionsImpl.h.\ntemplate<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x);\n\ntemplate<typename T>\nstruct rsqrt_impl<std::complex<T> >\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x)\n  {\n    return complex_rsqrt<T>(x);\n  }\n};\n\ntemplate<typename Scalar>\nstruct rsqrt_retval\n{\n  typedef Scalar type;\n};\n\n/****************************************************************************\n* Implementation of norm1                                                *\n****************************************************************************/\n\ntemplate<typename Scalar, bool IsComplex>\nstruct norm1_default_impl;\n\ntemplate<typename Scalar>\nstruct norm1_default_impl<Scalar,true>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    EIGEN_USING_STD(abs);\n    return abs(x.real()) + abs(x.imag());\n  }\n};\n\ntemplate<typename Scalar>\nstruct norm1_default_impl<Scalar, false>\n{\n  EIGEN_DEVICE_FUNC\n  static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_USING_STD(abs);\n    return abs(x);\n  }\n};\n\ntemplate<typename Scalar>\nstruct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};\n\ntemplate<typename Scalar>\nstruct norm1_retval\n{\n  typedef typename NumTraits<Scalar>::Real type;\n};\n\n/****************************************************************************\n* Implementation of hypot                                                *\n****************************************************************************/\n\ntemplate<typename Scalar> struct hypot_impl;\n\ntemplate<typename Scalar>\nstruct hypot_retval\n{\n  typedef typename NumTraits<Scalar>::Real type;\n};\n\n/****************************************************************************\n* Implementation of cast                                                 *\n****************************************************************************/\n\ntemplate<typename OldType, typename NewType, typename EnableIf = void>\nstruct cast_impl\n{\n  EIGEN_DEVICE_FUNC\n  static inline NewType run(const OldType& x)\n  {\n    return static_cast<NewType>(x);\n  }\n};\n\n// Casting from S -> Complex<T> leads to an implicit conversion from S to T,\n// generating warnings on clang.  Here we explicitly cast the real component.\ntemplate<typename OldType, typename NewType>\nstruct cast_impl<OldType, NewType,\n  typename internal::enable_if<\n    !NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex\n  >::type>\n{\n  EIGEN_DEVICE_FUNC\n  static inline NewType run(const OldType& x)\n  {\n    typedef typename NumTraits<NewType>::Real NewReal;\n    return static_cast<NewType>(static_cast<NewReal>(x));\n  }\n};\n\n// here, for once, we're plainly returning NewType: we don't want cast to do weird things.\n\ntemplate<typename OldType, typename NewType>\nEIGEN_DEVICE_FUNC\ninline NewType cast(const OldType& x)\n{\n  return cast_impl<OldType, NewType>::run(x);\n}\n\n/****************************************************************************\n* Implementation of round                                                   *\n****************************************************************************/\n\ntemplate<typename Scalar>\nstruct round_impl\n{\n  EIGEN_DEVICE_FUNC\n  static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)\n#if EIGEN_HAS_CXX11_MATH\n    EIGEN_USING_STD(round);\n#endif\n    return Scalar(round(x));\n  }\n};\n\n#if !EIGEN_HAS_CXX11_MATH\n#if EIGEN_HAS_C99_MATH\n// Use ::roundf for float.\ntemplate<>\nstruct round_impl<float> {\n  EIGEN_DEVICE_FUNC\n  static inline float run(const float& x)\n  {\n    return ::roundf(x);\n  }\n};\n#else\ntemplate<typename Scalar>\nstruct round_using_floor_ceil_impl\n{\n  EIGEN_DEVICE_FUNC\n  static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)\n    // Without C99 round/roundf, resort to floor/ceil.\n    EIGEN_USING_STD(floor);\n    EIGEN_USING_STD(ceil);\n    // If not enough precision to resolve a decimal at all, return the input.\n    // Otherwise, adding 0.5 can trigger an increment by 1.\n    const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1));\n    if (x >= limit || x <= -limit) {\n      return x;\n    }\n    return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5)));\n  }\n};\n\ntemplate<>\nstruct round_impl<float> : round_using_floor_ceil_impl<float> {};\n\ntemplate<>\nstruct round_impl<double> : round_using_floor_ceil_impl<double> {};\n#endif // EIGEN_HAS_C99_MATH\n#endif // !EIGEN_HAS_CXX11_MATH\n\ntemplate<typename Scalar>\nstruct round_retval\n{\n  typedef Scalar type;\n};\n\n/****************************************************************************\n* Implementation of rint                                                    *\n****************************************************************************/\n\ntemplate<typename Scalar>\nstruct rint_impl {\n  EIGEN_DEVICE_FUNC\n  static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)\n#if EIGEN_HAS_CXX11_MATH\n      EIGEN_USING_STD(rint);\n#endif\n    return rint(x);\n  }\n};\n\n#if !EIGEN_HAS_CXX11_MATH\ntemplate<>\nstruct rint_impl<double> {\n  EIGEN_DEVICE_FUNC\n  static inline double run(const double& x)\n  {\n    return ::rint(x);\n  }\n};\ntemplate<>\nstruct rint_impl<float> {\n  EIGEN_DEVICE_FUNC\n  static inline float run(const float& x)\n  {\n    return ::rintf(x);\n  }\n};\n#endif\n\ntemplate<typename Scalar>\nstruct rint_retval\n{\n  typedef Scalar type;\n};\n\n/****************************************************************************\n* Implementation of arg                                                     *\n****************************************************************************/\n\n// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.\n// This seems to be fixed in VS 2019.\n#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)\n// std::arg is only defined for types of std::complex, or integer types or float/double/long double\ntemplate<typename Scalar,\n          bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value\n                            || is_same<Scalar, float>::value || is_same<Scalar, double>::value\n                            || is_same<Scalar, long double>::value >\nstruct arg_default_impl;\n\ntemplate<typename Scalar>\nstruct arg_default_impl<Scalar, true> {\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    #if defined(EIGEN_HIP_DEVICE_COMPILE)\n    // HIP does not seem to have a native device side implementation for the math routine \"arg\"\n    using std::arg;\n    #else\n    EIGEN_USING_STD(arg);\n    #endif\n    return static_cast<RealScalar>(arg(x));\n  }\n};\n\n// Must be non-complex floating-point type (e.g. half/bfloat16).\ntemplate<typename Scalar>\nstruct arg_default_impl<Scalar, false> {\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);\n  }\n};\n#else\ntemplate<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>\nstruct arg_default_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);\n  }\n};\n\ntemplate<typename Scalar>\nstruct arg_default_impl<Scalar,true>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline RealScalar run(const Scalar& x)\n  {\n    EIGEN_USING_STD(arg);\n    return arg(x);\n  }\n};\n#endif\ntemplate<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};\n\ntemplate<typename Scalar>\nstruct arg_retval\n{\n  typedef typename NumTraits<Scalar>::Real type;\n};\n\n/****************************************************************************\n* Implementation of expm1                                                   *\n****************************************************************************/\n\n// This implementation is based on GSL Math's expm1.\nnamespace std_fallback {\n  // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar,\n  // or that there is no suitable std::expm1 function available. Implementation\n  // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php.\n  template<typename Scalar>\n  EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    EIGEN_USING_STD(exp);\n    Scalar u = exp(x);\n    if (numext::equal_strict(u, Scalar(1))) {\n      return x;\n    }\n    Scalar um1 = u - RealScalar(1);\n    if (numext::equal_strict(um1, Scalar(-1))) {\n      return RealScalar(-1);\n    }\n\n    EIGEN_USING_STD(log);\n    Scalar logu = log(u);\n    return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu;\n  }\n}\n\ntemplate<typename Scalar>\nstruct expm1_impl {\n  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)\n    #if EIGEN_HAS_CXX11_MATH\n    using std::expm1;\n    #else\n    using std_fallback::expm1;\n    #endif\n    return expm1(x);\n  }\n};\n\ntemplate<typename Scalar>\nstruct expm1_retval\n{\n  typedef Scalar type;\n};\n\n/****************************************************************************\n* Implementation of log                                                     *\n****************************************************************************/\n\n// Complex log defined in MathFunctionsImpl.h.\ntemplate<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z);\n\ntemplate<typename Scalar>\nstruct log_impl {\n  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_USING_STD(log);\n    return static_cast<Scalar>(log(x));\n  }\n};\n\ntemplate<typename Scalar>\nstruct log_impl<std::complex<Scalar> > {\n  EIGEN_DEVICE_FUNC static inline std::complex<Scalar> run(const std::complex<Scalar>& z)\n  {\n    return complex_log(z);\n  }\n};\n\n/****************************************************************************\n* Implementation of log1p                                                   *\n****************************************************************************/\n\nnamespace std_fallback {\n  // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar,\n  // or that there is no suitable std::log1p function available\n  template<typename Scalar>\n  EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    EIGEN_USING_STD(log);\n    Scalar x1p = RealScalar(1) + x;\n    Scalar log_1p = log_impl<Scalar>::run(x1p);\n    const bool is_small = numext::equal_strict(x1p, Scalar(1));\n    const bool is_inf = numext::equal_strict(x1p, log_1p);\n    return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1)));\n  }\n}\n\ntemplate<typename Scalar>\nstruct log1p_impl {\n  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)\n  {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)\n    #if EIGEN_HAS_CXX11_MATH\n    using std::log1p;\n    #else\n    using std_fallback::log1p;\n    #endif\n    return log1p(x);\n  }\n};\n\n// Specialization for complex types that are not supported by std::log1p.\ntemplate <typename RealScalar>\nstruct log1p_impl<std::complex<RealScalar> > {\n  EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(\n      const std::complex<RealScalar>& x) {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)\n    return std_fallback::log1p(x);\n  }\n};\n\ntemplate<typename Scalar>\nstruct log1p_retval\n{\n  typedef Scalar type;\n};\n\n/****************************************************************************\n* Implementation of pow                                                  *\n****************************************************************************/\n\ntemplate<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>\nstruct pow_impl\n{\n  //typedef Scalar retval;\n  typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;\n  static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)\n  {\n    EIGEN_USING_STD(pow);\n    return pow(x, y);\n  }\n};\n\ntemplate<typename ScalarX,typename ScalarY>\nstruct pow_impl<ScalarX,ScalarY, true>\n{\n  typedef ScalarX result_type;\n  static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)\n  {\n    ScalarX res(1);\n    eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);\n    if(y & 1) res *= x;\n    y >>= 1;\n    while(y)\n    {\n      x *= x;\n      if(y&1) res *= x;\n      y >>= 1;\n    }\n    return res;\n  }\n};\n\n/****************************************************************************\n* Implementation of random                                               *\n****************************************************************************/\n\ntemplate<typename Scalar,\n         bool IsComplex,\n         bool IsInteger>\nstruct random_default_impl {};\n\ntemplate<typename Scalar>\nstruct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};\n\ntemplate<typename Scalar>\nstruct random_retval\n{\n  typedef Scalar type;\n};\n\ntemplate<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);\ntemplate<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();\n\ntemplate<typename Scalar>\nstruct random_default_impl<Scalar, false, false>\n{\n  static inline Scalar run(const Scalar& x, const Scalar& y)\n  {\n    return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);\n  }\n  static inline Scalar run()\n  {\n    return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));\n  }\n};\n\nenum {\n  meta_floor_log2_terminate,\n  meta_floor_log2_move_up,\n  meta_floor_log2_move_down,\n  meta_floor_log2_bogus\n};\n\ntemplate<unsigned int n, int lower, int upper> struct meta_floor_log2_selector\n{\n  enum { middle = (lower + upper) / 2,\n         value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)\n               : (n < (1 << middle)) ? int(meta_floor_log2_move_down)\n               : (n==0) ? int(meta_floor_log2_bogus)\n               : int(meta_floor_log2_move_up)\n  };\n};\n\ntemplate<unsigned int n,\n         int lower = 0,\n         int upper = sizeof(unsigned int) * CHAR_BIT - 1,\n         int selector = meta_floor_log2_selector<n, lower, upper>::value>\nstruct meta_floor_log2 {};\n\ntemplate<unsigned int n, int lower, int upper>\nstruct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>\n{\n  enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };\n};\n\ntemplate<unsigned int n, int lower, int upper>\nstruct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>\n{\n  enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };\n};\n\ntemplate<unsigned int n, int lower, int upper>\nstruct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>\n{\n  enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };\n};\n\ntemplate<unsigned int n, int lower, int upper>\nstruct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>\n{\n  // no value, error at compile time\n};\n\ntemplate<typename Scalar>\nstruct random_default_impl<Scalar, false, true>\n{\n  static inline Scalar run(const Scalar& x, const Scalar& y)\n  {\n    if (y <= x)\n      return x;\n    // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.\n    typedef typename make_unsigned<Scalar>::type ScalarU;\n    // ScalarX is the widest of ScalarU and unsigned int.\n    // We'll deal only with ScalarX and unsigned int below thus avoiding signed\n    // types and arithmetic and signed overflows (which are undefined behavior).\n    typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;\n    // The following difference doesn't overflow, provided our integer types are two's\n    // complement and have the same number of padding bits in signed and unsigned variants.\n    // This is the case in most modern implementations of C++.\n    ScalarX range = ScalarX(y) - ScalarX(x);\n    ScalarX offset = 0;\n    ScalarX divisor = 1;\n    ScalarX multiplier = 1;\n    const unsigned rand_max = RAND_MAX;\n    if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);\n    else                   multiplier = 1 + range / (rand_max + 1);\n    // Rejection sampling.\n    do {\n      offset = (unsigned(std::rand()) * multiplier) / divisor;\n    } while (offset > range);\n    return Scalar(ScalarX(x) + offset);\n  }\n\n  static inline Scalar run()\n  {\n#ifdef EIGEN_MAKING_DOCS\n    return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));\n#else\n    enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,\n           scalar_bits = sizeof(Scalar) * CHAR_BIT,\n           shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),\n           offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0\n    };\n    return Scalar((std::rand() >> shift) - offset);\n#endif\n  }\n};\n\ntemplate<typename Scalar>\nstruct random_default_impl<Scalar, true, false>\n{\n  static inline Scalar run(const Scalar& x, const Scalar& y)\n  {\n    return Scalar(random(x.real(), y.real()),\n                  random(x.imag(), y.imag()));\n  }\n  static inline Scalar run()\n  {\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    return Scalar(random<RealScalar>(), random<RealScalar>());\n  }\n};\n\ntemplate<typename Scalar>\ninline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)\n{\n  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);\n}\n\ntemplate<typename Scalar>\ninline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()\n{\n  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();\n}\n\n// Implementation of is* functions\n\n// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.\n#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)\n#define EIGEN_USE_STD_FPCLASSIFY 1\n#else\n#define EIGEN_USE_STD_FPCLASSIFY 0\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ntypename internal::enable_if<internal::is_integral<T>::value,bool>::type\nisnan_impl(const T&) { return false; }\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ntypename internal::enable_if<internal::is_integral<T>::value,bool>::type\nisinf_impl(const T&) { return false; }\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ntypename internal::enable_if<internal::is_integral<T>::value,bool>::type\nisfinite_impl(const T&) { return true; }\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ntypename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type\nisfinite_impl(const T& x)\n{\n  #if defined(EIGEN_GPU_COMPILE_PHASE)\n    return (::isfinite)(x);\n  #elif EIGEN_USE_STD_FPCLASSIFY\n    using std::isfinite;\n    return isfinite EIGEN_NOT_A_MACRO (x);\n  #else\n    return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();\n  #endif\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ntypename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type\nisinf_impl(const T& x)\n{\n  #if defined(EIGEN_GPU_COMPILE_PHASE)\n    return (::isinf)(x);\n  #elif EIGEN_USE_STD_FPCLASSIFY\n    using std::isinf;\n    return isinf EIGEN_NOT_A_MACRO (x);\n  #else\n    return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();\n  #endif\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ntypename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type\nisnan_impl(const T& x)\n{\n  #if defined(EIGEN_GPU_COMPILE_PHASE)\n    return (::isnan)(x);\n  #elif EIGEN_USE_STD_FPCLASSIFY\n    using std::isnan;\n    return isnan EIGEN_NOT_A_MACRO (x);\n  #else\n    return x != x;\n  #endif\n}\n\n#if (!EIGEN_USE_STD_FPCLASSIFY)\n\n#if EIGEN_COMP_MSVC\n\ntemplate<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)\n{\n  return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF;\n}\n\n//MSVC defines a _isnan builtin function, but for double only\nEIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }\nEIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x)      { return _isnan(x)!=0; }\nEIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x)       { return _isnan(x)!=0; }\n\nEIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }\nEIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x)      { return isinf_msvc_helper(x); }\nEIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x)       { return isinf_msvc_helper(x); }\n\n#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)\n\n#if EIGEN_GNUC_AT_LEAST(5,0)\n  #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize(\"no-finite-math-only\")))\n#else\n  // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol),\n  //      while the second prevent too aggressive optimizations in fast-math mode:\n  #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize(\"no-finite-math-only\")))\n#endif\n\ntemplate<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }\ntemplate<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x)      { return __builtin_isnan(x); }\ntemplate<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x)       { return __builtin_isnan(x); }\ntemplate<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x)      { return __builtin_isinf(x); }\ntemplate<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x)       { return __builtin_isinf(x); }\ntemplate<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }\n\n#undef EIGEN_TMP_NOOPT_ATTRIB\n\n#endif\n\n#endif\n\n// The following overload are defined at the end of this file\ntemplate<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);\ntemplate<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);\ntemplate<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);\n\ntemplate<typename T> T generic_fast_tanh_float(const T& a_x);\n} // end namespace internal\n\n/****************************************************************************\n* Generic math functions                                                    *\n****************************************************************************/\n\nnamespace numext {\n\n#if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)\n{\n  EIGEN_USING_STD(min)\n  return min EIGEN_NOT_A_MACRO (x,y);\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)\n{\n  EIGEN_USING_STD(max)\n  return max EIGEN_NOT_A_MACRO (x,y);\n}\n#else\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)\n{\n  return y < x ? y : x;\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)\n{\n  return fminf(x, y);\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE double mini(const double& x, const double& y)\n{\n  return fmin(x, y);\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)\n{\n#if defined(EIGEN_HIPCC)\n  // no \"fminl\" on HIP yet\n  return (x < y) ? x : y;\n#else\n  return fminl(x, y);\n#endif\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)\n{\n  return x < y ? y : x;\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)\n{\n  return fmaxf(x, y);\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y)\n{\n  return fmax(x, y);\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)\n{\n#if defined(EIGEN_HIPCC)\n  // no \"fmaxl\" on HIP yet\n  return (x > y) ? x : y;\n#else\n  return fmaxl(x, y);\n#endif\n}\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\n\n\n#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_char)   \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_short)  \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_int)    \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_long)\n#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_char)   \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_short)  \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_int)    \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_long)\n#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar)  \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uint)   \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong)\n#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar)  \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uint)   \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong)\n#define SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC)\n#define SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC)\n#define SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \\\n  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC,cl::sycl::cl_double)\n#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(NAME, FUNC) \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \\\n  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC,cl::sycl::cl_double)\n#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \\\n  SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \\\n  SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double)\n\n#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \\\ntemplate<>                                               \\\n  EIGEN_DEVICE_FUNC                                      \\\n  EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE& x) { \\\n    return cl::sycl::FUNC(x);                            \\\n  }\n\n#define SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, TYPE) \\\n  SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, TYPE, TYPE)\n\n#define SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \\\n  template<>                                                                  \\\n  EIGEN_DEVICE_FUNC                                                           \\\n  EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE1& x, const ARG_TYPE2& y) { \\\n    return cl::sycl::FUNC(x, y);                                              \\\n  }\n\n#define SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \\\n  SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE, ARG_TYPE)\n\n#define SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, TYPE) \\\n  SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, TYPE, TYPE)\n\nSYCL_SPECIALIZE_INTEGER_TYPES_BINARY(mini, min)\nSYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin)\nSYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max)\nSYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax)\n\n#endif\n\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)\n{\n  return internal::real_ref_impl<Scalar>::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)\n{\n  return internal::imag_ref_impl<Scalar>::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);\n}\n\nEIGEN_DEVICE_FUNC\ninline bool abs2(bool x) { return x; }\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE T absdiff(const T& x, const T& y)\n{\n  return x > y ? x - y : y - x;\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE float absdiff(const float& x, const float& y)\n{\n  return fabsf(x - y);\n}\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE double absdiff(const double& x, const double& y)\n{\n  return fabs(x - y);\n}\n\n#if !defined(EIGEN_GPUCC)\n// HIP and CUDA do not support long double.\ntemplate<>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE long double absdiff(const long double& x, const long double& y) {\n  return fabsl(x - y);\n}\n#endif\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)\n{\n  return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\n  SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot)\n#endif\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat log1p(const float &x) { return ::log1pf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble log1p(const double &x) { return ::log1p(x); }\n#endif\n\ntemplate<typename ScalarX,typename ScalarY>\nEIGEN_DEVICE_FUNC\ninline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)\n{\n  return internal::pow_impl<ScalarX,ScalarY>::run(x, y);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow)\n#endif\n\ntemplate<typename T> EIGEN_DEVICE_FUNC bool (isnan)   (const T &x) { return internal::isnan_impl(x); }\ntemplate<typename T> EIGEN_DEVICE_FUNC bool (isinf)   (const T &x) { return internal::isinf_impl(x); }\ntemplate<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)\n#endif\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(rint, Scalar) rint(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(rint, Scalar)::run(x);\n}\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round)\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nT (floor)(const T& x)\n{\n  EIGEN_USING_STD(floor)\n  return floor(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat floor(const float &x) { return ::floorf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble floor(const double &x) { return ::floor(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nT (ceil)(const T& x)\n{\n  EIGEN_USING_STD(ceil);\n  return ceil(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat ceil(const float &x) { return ::ceilf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble ceil(const double &x) { return ::ceil(x); }\n#endif\n\n\n/** Log base 2 for 32 bits positive integers.\n  * Conveniently returns 0 for x==0. */\ninline int log2(int x)\n{\n  eigen_assert(x>=0);\n  unsigned int v(x);\n  static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };\n  v |= v >> 1;\n  v |= v >> 2;\n  v |= v >> 4;\n  v |= v >> 8;\n  v |= v >> 16;\n  return table[(v * 0x07C4ACDDU) >> 27];\n}\n\n/** \\returns the square root of \\a x.\n  *\n  * It is essentially equivalent to\n  * \\code using std::sqrt; return sqrt(x); \\endcode\n  * but slightly faster for float/double and some compilers (e.g., gcc), thanks to\n  * specializations when SSE is enabled.\n  *\n  * It's usage is justified in performance critical functions, like norm/normalize.\n  */\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\nEIGEN_ALWAYS_INLINE EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x);\n}\n\n// Boolean specialization, avoids implicit float to bool conversion (-Wimplicit-conversion-floating-point-to-bool).\ntemplate<>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC\nbool sqrt<bool>(const bool &x) { return x; }\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)\n#endif\n\n/** \\returns the reciprocal square root of \\a x. **/\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT rsqrt(const T& x)\n{\n  return internal::rsqrt_impl<T>::run(x);\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT log(const T &x) {\n  return internal::log_impl<T>::run(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log)\n#endif\n\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat log(const float &x) { return ::logf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble log(const double &x) { return ::log(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ntypename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type\nabs(const T &x) {\n  EIGEN_USING_STD(abs);\n  return abs(x);\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ntypename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type\nabs(const T &x) {\n  return x;\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat abs(const float &x) { return ::fabsf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble abs(const double &x) { return ::fabs(x); }\n\ntemplate <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat abs(const std::complex<float>& x) {\n  return ::hypotf(x.real(), x.imag());\n}\n\ntemplate <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble abs(const std::complex<double>& x) {\n  return ::hypot(x.real(), x.imag());\n}\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT exp(const T &x) {\n  EIGEN_USING_STD(exp);\n  return exp(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat exp(const float &x) { return ::expf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble exp(const double &x) { return ::exp(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nstd::complex<float> exp(const std::complex<float>& x) {\n  float com = ::expf(x.real());\n  float res_real = com * ::cosf(x.imag());\n  float res_imag = com * ::sinf(x.imag());\n  return std::complex<float>(res_real, res_imag);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nstd::complex<double> exp(const std::complex<double>& x) {\n  double com = ::exp(x.real());\n  double res_real = com * ::cos(x.imag());\n  double res_imag = com * ::sin(x.imag());\n  return std::complex<double>(res_real, res_imag);\n}\n#endif\n\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\ninline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x)\n{\n  return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat expm1(const float &x) { return ::expm1f(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble expm1(const double &x) { return ::expm1(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT cos(const T &x) {\n  EIGEN_USING_STD(cos);\n  return cos(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos,cos)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat cos(const float &x) { return ::cosf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble cos(const double &x) { return ::cos(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT sin(const T &x) {\n  EIGEN_USING_STD(sin);\n  return sin(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat sin(const float &x) { return ::sinf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble sin(const double &x) { return ::sin(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT tan(const T &x) {\n  EIGEN_USING_STD(tan);\n  return tan(x);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat tan(const float &x) { return ::tanf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble tan(const double &x) { return ::tan(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT acos(const T &x) {\n  EIGEN_USING_STD(acos);\n  return acos(x);\n}\n\n#if EIGEN_HAS_CXX11_MATH\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT acosh(const T &x) {\n  EIGEN_USING_STD(acosh);\n  return static_cast<T>(acosh(x));\n}\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat acos(const float &x) { return ::acosf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble acos(const double &x) { return ::acos(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT asin(const T &x) {\n  EIGEN_USING_STD(asin);\n  return asin(x);\n}\n\n#if EIGEN_HAS_CXX11_MATH\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT asinh(const T &x) {\n  EIGEN_USING_STD(asinh);\n  return static_cast<T>(asinh(x));\n}\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat asin(const float &x) { return ::asinf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble asin(const double &x) { return ::asin(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT atan(const T &x) {\n  EIGEN_USING_STD(atan);\n  return static_cast<T>(atan(x));\n}\n\n#if EIGEN_HAS_CXX11_MATH\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT atanh(const T &x) {\n  EIGEN_USING_STD(atanh);\n  return static_cast<T>(atanh(x));\n}\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat atan(const float &x) { return ::atanf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble atan(const double &x) { return ::atan(x); }\n#endif\n\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT cosh(const T &x) {\n  EIGEN_USING_STD(cosh);\n  return static_cast<T>(cosh(x));\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat cosh(const float &x) { return ::coshf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble cosh(const double &x) { return ::cosh(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT sinh(const T &x) {\n  EIGEN_USING_STD(sinh);\n  return static_cast<T>(sinh(x));\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat sinh(const float &x) { return ::sinhf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble sinh(const double &x) { return ::sinh(x); }\n#endif\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT tanh(const T &x) {\n  EIGEN_USING_STD(tanh);\n  return tanh(x);\n}\n\n#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat tanh(float x) { return internal::generic_fast_tanh_float(x); }\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat tanh(const float &x) { return ::tanhf(x); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble tanh(const double &x) { return ::tanh(x); }\n#endif\n\ntemplate <typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nT fmod(const T& a, const T& b) {\n  EIGEN_USING_STD(fmod);\n  return fmod(a, b);\n}\n\n#if defined(SYCL_DEVICE_ONLY)\nSYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod)\n#endif\n\n#if defined(EIGEN_GPUCC)\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat fmod(const float& a, const float& b) {\n  return ::fmodf(a, b);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble fmod(const double& a, const double& b) {\n  return ::fmod(a, b);\n}\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\n#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY\n#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY\n#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY\n#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY\n#undef SYCL_SPECIALIZE_INTEGER_TYPES_BINARY\n#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY\n#undef SYCL_SPECIALIZE_FLOATING_TYPES_BINARY\n#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY\n#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE\n#undef SYCL_SPECIALIZE_GEN_UNARY_FUNC\n#undef SYCL_SPECIALIZE_UNARY_FUNC\n#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC\n#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC\n#undef SYCL_SPECIALIZE_BINARY_FUNC\n#endif\n\n} // end namespace numext\n\nnamespace internal {\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)\n{\n  return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)\n{\n  return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)\n{\n  return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));\n}\n\n/****************************************************************************\n* Implementation of fuzzy comparisons                                       *\n****************************************************************************/\n\ntemplate<typename Scalar,\n         bool IsComplex,\n         bool IsInteger>\nstruct scalar_fuzzy_default_impl {};\n\ntemplate<typename Scalar>\nstruct scalar_fuzzy_default_impl<Scalar, false, false>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  template<typename OtherScalar> EIGEN_DEVICE_FUNC\n  static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)\n  {\n    return numext::abs(x) <= numext::abs(y) * prec;\n  }\n  EIGEN_DEVICE_FUNC\n  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)\n  {\n    return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;\n  }\n  EIGEN_DEVICE_FUNC\n  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)\n  {\n    return x <= y || isApprox(x, y, prec);\n  }\n};\n\ntemplate<typename Scalar>\nstruct scalar_fuzzy_default_impl<Scalar, false, true>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  template<typename OtherScalar> EIGEN_DEVICE_FUNC\n  static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)\n  {\n    return x == Scalar(0);\n  }\n  EIGEN_DEVICE_FUNC\n  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)\n  {\n    return x == y;\n  }\n  EIGEN_DEVICE_FUNC\n  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)\n  {\n    return x <= y;\n  }\n};\n\ntemplate<typename Scalar>\nstruct scalar_fuzzy_default_impl<Scalar, true, false>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  template<typename OtherScalar> EIGEN_DEVICE_FUNC\n  static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)\n  {\n    return numext::abs2(x) <= numext::abs2(y) * prec * prec;\n  }\n  EIGEN_DEVICE_FUNC\n  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)\n  {\n    return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;\n  }\n};\n\ntemplate<typename Scalar>\nstruct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};\n\ntemplate<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC\ninline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,\n                              const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())\n{\n  return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);\n}\n\ntemplate<typename Scalar> EIGEN_DEVICE_FUNC\ninline bool isApprox(const Scalar& x, const Scalar& y,\n                     const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())\n{\n  return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);\n}\n\ntemplate<typename Scalar> EIGEN_DEVICE_FUNC\ninline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,\n                               const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())\n{\n  return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);\n}\n\n/******************************************\n***  The special case of the  bool type ***\n******************************************/\n\ntemplate<> struct random_impl<bool>\n{\n  static inline bool run()\n  {\n    return random<int>(0,1)==0 ? false : true;\n  }\n\n  static inline bool run(const bool& a, const bool& b)\n  {\n    return random<int>(a, b)==0 ? false : true;\n  }\n};\n\ntemplate<> struct scalar_fuzzy_impl<bool>\n{\n  typedef bool RealScalar;\n\n  template<typename OtherScalar> EIGEN_DEVICE_FUNC\n  static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)\n  {\n    return !x;\n  }\n\n  EIGEN_DEVICE_FUNC\n  static inline bool isApprox(bool x, bool y, bool)\n  {\n    return x == y;\n  }\n\n  EIGEN_DEVICE_FUNC\n  static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)\n  {\n    return (!x) || y;\n  }\n\n};\n\n} // end namespace internal\n\n// Default implementations that rely on other numext implementations\nnamespace internal {\n\n// Specialization for complex types that are not supported by std::expm1.\ntemplate <typename RealScalar>\nstruct expm1_impl<std::complex<RealScalar> > {\n  EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(\n      const std::complex<RealScalar>& x) {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)\n    RealScalar xr = x.real();\n    RealScalar xi = x.imag();\n    // expm1(z) = exp(z) - 1\n    //          = exp(x +  i * y) - 1\n    //          = exp(x) * (cos(y) + i * sin(y)) - 1\n    //          = exp(x) * cos(y) - 1 + i * exp(x) * sin(y)\n    // Imag(expm1(z)) = exp(x) * sin(y)\n    // Real(expm1(z)) = exp(x) * cos(y) - 1\n    //          = exp(x) * cos(y) - 1.\n    //          = expm1(x) + exp(x) * (cos(y) - 1)\n    //          = expm1(x) + exp(x) * (2 * sin(y / 2) ** 2)\n    RealScalar erm1 = numext::expm1<RealScalar>(xr);\n    RealScalar er = erm1 + RealScalar(1.);\n    RealScalar sin2 = numext::sin(xi / RealScalar(2.));\n    sin2 = sin2 * sin2;\n    RealScalar s = numext::sin(xi);\n    RealScalar real_part = erm1 - RealScalar(2.) * er * sin2;\n    return std::complex<RealScalar>(real_part, er * s);\n  }\n};\n\ntemplate<typename T>\nstruct rsqrt_impl {\n  EIGEN_DEVICE_FUNC\n  static EIGEN_ALWAYS_INLINE T run(const T& x) {\n    return T(1)/numext::sqrt(x);\n  }\n};\n\n#if defined(EIGEN_GPU_COMPILE_PHASE)\ntemplate<typename T>\nstruct conj_impl<std::complex<T>, true>\n{\n  EIGEN_DEVICE_FUNC\n  static inline std::complex<T> run(const std::complex<T>& x)\n  {\n    return std::complex<T>(numext::real(x), -numext::imag(x));\n  }\n};\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATHFUNCTIONS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/MathFunctionsImpl.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)\n// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATHFUNCTIONSIMPL_H\n#define EIGEN_MATHFUNCTIONSIMPL_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** \\internal \\returns the hyperbolic tan of \\a a (coeff-wise)\n    Doesn't do anything fancy, just a 13/6-degree rational interpolant which\n    is accurate up to a couple of ulps in the (approximate) range [-8, 8],\n    outside of which tanh(x) = +/-1 in single precision. The input is clamped\n    to the range [-c, c]. The value c is chosen as the smallest value where\n    the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]\n    the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero.\n\n    This implementation works on both scalars and packets.\n*/\ntemplate<typename T>\nT generic_fast_tanh_float(const T& a_x)\n{\n  // Clamp the inputs to the range [-c, c]\n#ifdef EIGEN_VECTORIZE_FMA\n  const T plus_clamp = pset1<T>(7.99881172180175781f);\n  const T minus_clamp = pset1<T>(-7.99881172180175781f);\n#else\n  const T plus_clamp = pset1<T>(7.90531110763549805f);\n  const T minus_clamp = pset1<T>(-7.90531110763549805f);\n#endif\n  const T tiny = pset1<T>(0.0004f);\n  const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);\n  const T tiny_mask = pcmp_lt(pabs(a_x), tiny);\n  // The monomial coefficients of the numerator polynomial (odd).\n  const T alpha_1 = pset1<T>(4.89352455891786e-03f);\n  const T alpha_3 = pset1<T>(6.37261928875436e-04f);\n  const T alpha_5 = pset1<T>(1.48572235717979e-05f);\n  const T alpha_7 = pset1<T>(5.12229709037114e-08f);\n  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);\n  const T alpha_11 = pset1<T>(2.00018790482477e-13f);\n  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);\n\n  // The monomial coefficients of the denominator polynomial (even).\n  const T beta_0 = pset1<T>(4.89352518554385e-03f);\n  const T beta_2 = pset1<T>(2.26843463243900e-03f);\n  const T beta_4 = pset1<T>(1.18534705686654e-04f);\n  const T beta_6 = pset1<T>(1.19825839466702e-06f);\n\n  // Since the polynomials are odd/even, we need x^2.\n  const T x2 = pmul(x, x);\n\n  // Evaluate the numerator polynomial p.\n  T p = pmadd(x2, alpha_13, alpha_11);\n  p = pmadd(x2, p, alpha_9);\n  p = pmadd(x2, p, alpha_7);\n  p = pmadd(x2, p, alpha_5);\n  p = pmadd(x2, p, alpha_3);\n  p = pmadd(x2, p, alpha_1);\n  p = pmul(x, p);\n\n  // Evaluate the denominator polynomial q.\n  T q = pmadd(x2, beta_6, beta_4);\n  q = pmadd(x2, q, beta_2);\n  q = pmadd(x2, q, beta_0);\n\n  // Divide the numerator by the denominator.\n  return pselect(tiny_mask, x, pdiv(p, q));\n}\n\ntemplate<typename RealScalar>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nRealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)\n{\n  // IEEE IEC 6059 special cases.\n  if ((numext::isinf)(x) || (numext::isinf)(y))\n    return NumTraits<RealScalar>::infinity();\n  if ((numext::isnan)(x) || (numext::isnan)(y))\n    return NumTraits<RealScalar>::quiet_NaN();\n    \n  EIGEN_USING_STD(sqrt);\n  RealScalar p, qp;\n  p = numext::maxi(x,y);\n  if(p==RealScalar(0)) return RealScalar(0);\n  qp = numext::mini(y,x) / p;\n  return p * sqrt(RealScalar(1) + qp*qp);\n}\n\ntemplate<typename Scalar>\nstruct hypot_impl\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  static EIGEN_DEVICE_FUNC\n  inline RealScalar run(const Scalar& x, const Scalar& y)\n  {\n    EIGEN_USING_STD(abs);\n    return positive_real_hypot<RealScalar>(abs(x), abs(y));\n  }\n};\n\n// Generic complex sqrt implementation that correctly handles corner cases\n// according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt\ntemplate<typename T>\nEIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {\n  // Computes the principal sqrt of the input.\n  //\n  // For a complex square root of the number x + i*y. We want to find real\n  // numbers u and v such that\n  //    (u + i*v)^2 = x + i*y  <=>\n  //    u^2 - v^2 + i*2*u*v = x + i*v.\n  // By equating the real and imaginary parts we get:\n  //    u^2 - v^2 = x\n  //    2*u*v = y.\n  //\n  // For x >= 0, this has the numerically stable solution\n  //    u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))\n  //    v = y / (2 * u)\n  // and for x < 0,\n  //    v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))\n  //    u = y / (2 * v)\n  //\n  // Letting w = sqrt(0.5 * (|x| + |z|)),\n  //   if x == 0: u = w, v = sign(y) * w\n  //   if x > 0:  u = w, v = y / (2 * w)\n  //   if x < 0:  u = |y| / (2 * w), v = sign(y) * w\n\n  const T x = numext::real(z);\n  const T y = numext::imag(z);\n  const T zero = T(0);\n  const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));\n\n  return\n    (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)\n      : x == zero ? std::complex<T>(w, y < zero ? -w : w)\n      : x > zero ? std::complex<T>(w, y / (2 * w))\n      : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w );\n}\n\n// Generic complex rsqrt implementation.\ntemplate<typename T>\nEIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {\n  // Computes the principal reciprocal sqrt of the input.\n  //\n  // For a complex reciprocal square root of the number z = x + i*y. We want to\n  // find real numbers u and v such that\n  //    (u + i*v)^2 = 1 / (x + i*y)  <=>\n  //    u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2.\n  // By equating the real and imaginary parts we get:\n  //    u^2 - v^2 = x/|z|^2\n  //    2*u*v = y/|z|^2.\n  //\n  // For x >= 0, this has the numerically stable solution\n  //    u = sqrt(0.5 * (x + |z|)) / |z|\n  //    v = -y / (2 * u * |z|)\n  // and for x < 0,\n  //    v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z|\n  //    u = -y / (2 * v * |z|)\n  //\n  // Letting w = sqrt(0.5 * (|x| + |z|)),\n  //   if x == 0: u = w / |z|, v = -sign(y) * w / |z|\n  //   if x > 0:  u = w / |z|, v = -y / (2 * w * |z|)\n  //   if x < 0:  u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|\n\n  const T x = numext::real(z);\n  const T y = numext::imag(z);\n  const T zero = T(0);\n\n  const T abs_z = numext::hypot(x, y);\n  const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));\n  const T woz = w / abs_z;\n  // Corner cases consistent with 1/sqrt(z) on gcc/clang.\n  return\n    abs_z == zero ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())\n      : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)\n      : x == zero ? std::complex<T>(woz, y < zero ? woz : -woz)\n      : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))\n      : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz );\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {\n  // Computes complex log.\n  T a = numext::abs(z);\n  EIGEN_USING_STD(atan2);\n  T b = atan2(z.imag(), z.real());\n  return std::complex<T>(numext::log(a), b);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATHFUNCTIONSIMPL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Matrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATRIX_H\n#define EIGEN_MATRIX_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nstruct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >\n{\nprivate:\n  enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };\n  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;\n  enum {\n      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,\n      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,\n      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,\n      default_alignment = compute_default_alignment<_Scalar,max_size>::value,\n      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,\n      required_alignment = unpacket_traits<PacketScalar>::alignment,\n      packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0\n    };\n\npublic:\n  typedef _Scalar Scalar;\n  typedef Dense StorageKind;\n  typedef Eigen::Index StorageIndex;\n  typedef MatrixXpr XprKind;\n  enum {\n    RowsAtCompileTime = _Rows,\n    ColsAtCompileTime = _Cols,\n    MaxRowsAtCompileTime = _MaxRows,\n    MaxColsAtCompileTime = _MaxCols,\n    Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,\n    Options = _Options,\n    InnerStrideAtCompileTime = 1,\n    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,\n\n    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase\n    EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,\n    Alignment = actual_alignment\n  };\n};\n}\n\n/** \\class Matrix\n  * \\ingroup Core_Module\n  *\n  * \\brief The matrix class, also used for vectors and row-vectors\n  *\n  * The %Matrix class is the work-horse for all \\em dense (\\ref dense \"note\") matrices and vectors within Eigen.\n  * Vectors are matrices with one column, and row-vectors are matrices with one row.\n  *\n  * The %Matrix class encompasses \\em both fixed-size and dynamic-size objects (\\ref fixedsize \"note\").\n  *\n  * The first three template parameters are required:\n  * \\tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.\n  *                 User defined scalar types are supported as well (see \\ref user_defined_scalars \"here\").\n  * \\tparam _Rows Number of rows, or \\b Dynamic\n  * \\tparam _Cols Number of columns, or \\b Dynamic\n  *\n  * The remaining template parameters are optional -- in most cases you don't have to worry about them.\n  * \\tparam _Options A combination of either \\b #RowMajor or \\b #ColMajor, and of either\n  *                 \\b #AutoAlign or \\b #DontAlign.\n  *                 The former controls \\ref TopicStorageOrders \"storage order\", and defaults to column-major. The latter controls alignment, which is required\n  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.\n  * \\tparam _MaxRows Maximum number of rows. Defaults to \\a _Rows (\\ref maxrows \"note\").\n  * \\tparam _MaxCols Maximum number of columns. Defaults to \\a _Cols (\\ref maxrows \"note\").\n  *\n  * Eigen provides a number of typedefs covering the usual cases. Here are some examples:\n  *\n  * \\li \\c Matrix2d is a 2x2 square matrix of doubles (\\c Matrix<double, 2, 2>)\n  * \\li \\c Vector4f is a vector of 4 floats (\\c Matrix<float, 4, 1>)\n  * \\li \\c RowVector3i is a row-vector of 3 ints (\\c Matrix<int, 1, 3>)\n  *\n  * \\li \\c MatrixXf is a dynamic-size matrix of floats (\\c Matrix<float, Dynamic, Dynamic>)\n  * \\li \\c VectorXf is a dynamic-size vector of floats (\\c Matrix<float, Dynamic, 1>)\n  *\n  * \\li \\c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\\c Matrix<float, 2, Dynamic>)\n  * \\li \\c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\\c Matrix<double, Dynamic, 3>)\n  *\n  * See \\link matrixtypedefs this page \\endlink for a complete list of predefined \\em %Matrix and \\em Vector typedefs.\n  *\n  * You can access elements of vectors and matrices using normal subscripting:\n  *\n  * \\code\n  * Eigen::VectorXd v(10);\n  * v[0] = 0.1;\n  * v[1] = 0.2;\n  * v(0) = 0.3;\n  * v(1) = 0.4;\n  *\n  * Eigen::MatrixXi m(10, 10);\n  * m(0, 1) = 1;\n  * m(0, 2) = 2;\n  * m(0, 3) = 3;\n  * \\endcode\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_MATRIX_PLUGIN.\n  *\n  * <i><b>Some notes:</b></i>\n  *\n  * <dl>\n  * <dt><b>\\anchor dense Dense versus sparse:</b></dt>\n  * <dd>This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module.\n  *\n  * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array.\n  * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.</dd>\n  *\n  * <dt><b>\\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>\n  * <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array\n  * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up\n  * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time.\n  *\n  * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime\n  * variables, and the array of coefficients is allocated dynamically on the heap.\n  *\n  * Note that \\em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.\n  * If you want this behavior, see the Sparse module.</dd>\n  *\n  * <dt><b>\\anchor maxrows _MaxRows and _MaxCols:</b></dt>\n  * <dd>In most cases, one just leaves these parameters to the default values.\n  * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases\n  * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot\n  * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols\n  * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>\n  * </dl>\n  *\n  * <i><b>ABI and storage layout</b></i>\n  *\n  * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.\n  * <table  class=\"manual\">\n  * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>\n  * <tr><td>\\code Matrix<T,Dynamic,Dynamic> \\endcode</td><td>\\code\n  * struct {\n  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0\n  *   Eigen::Index rows, cols;\n  *  };\n  * \\endcode</td></tr>\n  * <tr class=\"alt\"><td>\\code\n  * Matrix<T,Dynamic,1>\n  * Matrix<T,1,Dynamic> \\endcode</td><td>\\code\n  * struct {\n  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0\n  *   Eigen::Index size;\n  *  };\n  * \\endcode</td></tr>\n  * <tr><td>\\code Matrix<T,Rows,Cols> \\endcode</td><td>\\code\n  * struct {\n  *   T data[Rows*Cols];        // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0\n  *  };\n  * \\endcode</td></tr>\n  * <tr class=\"alt\"><td>\\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \\endcode</td><td>\\code\n  * struct {\n  *   T data[MaxRows*MaxCols];  // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0\n  *   Eigen::Index rows, cols;\n  *  };\n  * \\endcode</td></tr>\n  * </table>\n  * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two\n  * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.\n  *\n  * \\see MatrixBase for the majority of the API methods for matrices, \\ref TopicClassHierarchy,\n  * \\ref TopicStorageOrders\n  */\n\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nclass Matrix\n  : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >\n{\n  public:\n\n    /** \\brief Base class typedef.\n      * \\sa PlainObjectBase\n      */\n    typedef PlainObjectBase<Matrix> Base;\n\n    enum { Options = _Options };\n\n    EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)\n\n    typedef typename Base::PlainObject PlainObject;\n\n    using Base::base;\n    using Base::coeffRef;\n\n    /**\n      * \\brief Assigns matrices to each other.\n      *\n      * \\note This is a special case of the templated operator=. Its purpose is\n      * to prevent a default operator= from hiding the templated operator=.\n      *\n      * \\callgraph\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)\n    {\n      return Base::_set(other);\n    }\n\n    /** \\internal\n      * \\brief Copies the value of the expression \\a other into \\c *this with automatic resizing.\n      *\n      * *this might be resized to match the dimensions of \\a other. If *this was a null matrix (not already initialized),\n      * it will be initialized.\n      *\n      * Note that copying a row-vector into a vector (and conversely) is allowed.\n      * The resizing, if any, is then done in the appropriate way so that row-vectors\n      * remain row-vectors and vectors remain vectors.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)\n    {\n      return Base::_set(other);\n    }\n\n    /* Here, doxygen failed to copy the brief information when using \\copydoc */\n\n    /**\n      * \\brief Copies the generic expression \\a other into *this.\n      * \\copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)\n    {\n      return Base::operator=(other);\n    }\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)\n    {\n      return Base::operator=(func);\n    }\n\n    /** \\brief Default constructor.\n      *\n      * For fixed-size matrices, does nothing.\n      *\n      * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix\n      * is called a null matrix. This constructor is the unique way to create null matrices: resizing\n      * a matrix to 0 is not supported.\n      *\n      * \\sa resize(Index,Index)\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Matrix() : Base()\n    {\n      Base::_check_template_params();\n      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n    }\n\n    // FIXME is it still needed\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    explicit Matrix(internal::constructor_without_unaligned_array_assert)\n      : Base(internal::constructor_without_unaligned_array_assert())\n    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }\n\n#if EIGEN_HAS_RVALUE_REFERENCES\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)\n      : Base(std::move(other))\n    {\n      Base::_check_template_params();\n    }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)\n    {\n      Base::operator=(std::move(other));\n      return *this;\n    }\n#endif\n\n#if EIGEN_HAS_CXX11\n    /** \\copydoc PlainObjectBase(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&... args)\n     *\n     * Example: \\include Matrix_variadic_ctor_cxx11.cpp\n     * Output: \\verbinclude Matrix_variadic_ctor_cxx11.out\n     *\n     * \\sa Matrix(const std::initializer_list<std::initializer_list<Scalar>>&)\n     */\n    template <typename... ArgTypes>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)\n      : Base(a0, a1, a2, a3, args...) {}\n\n    /** \\brief Constructs a Matrix and initializes it from the coefficients given as initializer-lists grouped by row. \\cpp11\n      *\n      * In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients:\n      *\n      * Example: \\include Matrix_initializer_list_23_cxx11.cpp\n      * Output: \\verbinclude Matrix_initializer_list_23_cxx11.out\n      *\n      * Each of the inner initializer lists must contain the exact same number of elements, otherwise an assertion is triggered.\n      *\n      * In the case of a compile-time column vector, implicit transposition from a single row is allowed.\n      * Therefore <code>VectorXd{{1,2,3,4,5}}</code> is legal and the more verbose syntax\n      * <code>RowVectorXd{{1},{2},{3},{4},{5}}</code> can be avoided:\n      *\n      * Example: \\include Matrix_initializer_list_vector_cxx11.cpp\n      * Output: \\verbinclude Matrix_initializer_list_vector_cxx11.out\n      *\n      * In the case of fixed-sized matrices, the initializer list sizes must exactly match the matrix sizes,\n      * and implicit transposition is allowed for compile-time vectors only.\n      *\n      * \\sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)\n      */\n    EIGEN_DEVICE_FUNC\n    explicit EIGEN_STRONG_INLINE Matrix(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}\n#endif // end EIGEN_HAS_CXX11\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n\n    // This constructor is for both 1x1 matrices and dynamic vectors\n    template<typename T>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    explicit Matrix(const T& x)\n    {\n      Base::_check_template_params();\n      Base::template _init1<T>(x);\n    }\n\n    template<typename T0, typename T1>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Matrix(const T0& x, const T1& y)\n    {\n      Base::_check_template_params();\n      Base::template _init2<T0,T1>(x, y);\n    }\n\n\n#else\n    /** \\brief Constructs a fixed-sized matrix initialized with coefficients starting at \\a data */\n    EIGEN_DEVICE_FUNC\n    explicit Matrix(const Scalar *data);\n\n    /** \\brief Constructs a vector or row-vector with given dimension. \\only_for_vectors\n      *\n      * This is useful for dynamic-size vectors. For fixed-size vectors,\n      * it is redundant to pass these parameters, so one should use the default constructor\n      * Matrix() instead.\n      *\n      * \\warning This constructor is disabled for fixed-size \\c 1x1 matrices. For instance,\n      * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).\n      * For fixed-size \\c 1x1 matrices it is therefore recommended to use the default\n      * constructor Matrix() instead, especially when using one of the non standard\n      * \\c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\\c NAN} macros (see \\ref TopicPreprocessorDirectives).\n      */\n    EIGEN_STRONG_INLINE explicit Matrix(Index dim);\n    /** \\brief Constructs an initialized 1x1 matrix with the given coefficient\n      * \\sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...) */\n    Matrix(const Scalar& x);\n    /** \\brief Constructs an uninitialized matrix with \\a rows rows and \\a cols columns.\n      *\n      * This is useful for dynamic-size matrices. For fixed-size matrices,\n      * it is redundant to pass these parameters, so one should use the default constructor\n      * Matrix() instead.\n      *\n      * \\warning This constructor is disabled for fixed-size \\c 1x2 and \\c 2x1 vectors. For instance,\n      * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).\n      * For fixed-size \\c 1x2 or \\c 2x1 vectors it is therefore recommended to use the default\n      * constructor Matrix() instead, especially when using one of the non standard\n      * \\c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\\c NAN} macros (see \\ref TopicPreprocessorDirectives).\n      */\n    EIGEN_DEVICE_FUNC\n    Matrix(Index rows, Index cols);\n\n    /** \\brief Constructs an initialized 2D vector with given coefficients\n      * \\sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...) */\n    Matrix(const Scalar& x, const Scalar& y);\n    #endif  // end EIGEN_PARSED_BY_DOXYGEN\n\n    /** \\brief Constructs an initialized 3D vector with given coefficients\n      * \\sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)\n    {\n      Base::_check_template_params();\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)\n      m_storage.data()[0] = x;\n      m_storage.data()[1] = y;\n      m_storage.data()[2] = z;\n    }\n    /** \\brief Constructs an initialized 4D vector with given coefficients\n      * \\sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)\n    {\n      Base::_check_template_params();\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)\n      m_storage.data()[0] = x;\n      m_storage.data()[1] = y;\n      m_storage.data()[2] = z;\n      m_storage.data()[3] = w;\n    }\n\n\n    /** \\brief Copy constructor */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)\n    { }\n\n    /** \\brief Copy constructor for generic expressions.\n      * \\sa MatrixBase::operator=(const EigenBase<OtherDerived>&)\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)\n      : Base(other.derived())\n    { }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT { return 1; }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return this->innerSize(); }\n\n    /////////// Geometry module ///////////\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);\n\n    // allow to extend Matrix outside Eigen\n    #ifdef EIGEN_MATRIX_PLUGIN\n    #include EIGEN_MATRIX_PLUGIN\n    #endif\n\n  protected:\n    template <typename Derived, typename OtherDerived, bool IsVector>\n    friend struct internal::conservative_resize_like_impl;\n\n    using Base::m_storage;\n};\n\n/** \\defgroup matrixtypedefs Global matrix typedefs\n  *\n  * \\ingroup Core_Module\n  *\n  * %Eigen defines several typedef shortcuts for most common matrix and vector types.\n  *\n  * The general patterns are the following:\n  *\n  * \\c MatrixSizeType where \\c Size can be \\c 2,\\c 3,\\c 4 for fixed size square matrices or \\c X for dynamic size,\n  * and where \\c Type can be \\c i for integer, \\c f for float, \\c d for double, \\c cf for complex float, \\c cd\n  * for complex double.\n  *\n  * For example, \\c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \\c MatrixXf is a dynamic-size matrix of floats.\n  *\n  * There are also \\c VectorSizeType and \\c RowVectorSizeType which are self-explanatory. For example, \\c Vector4cf is\n  * a fixed-size vector of 4 complex floats.\n  *\n  * With \\cpp11, template alias are also defined for common sizes.\n  * They follow the same pattern as above except that the scalar type suffix is replaced by a\n  * template parameter, i.e.:\n  *   - `MatrixSize<Type>` where `Size` can be \\c 2,\\c 3,\\c 4 for fixed size square matrices or \\c X for dynamic size.\n  *   - `MatrixXSize<Type>` and `MatrixSizeX<Type>` where `Size` can be \\c 2,\\c 3,\\c 4 for hybrid dynamic/fixed matrices.\n  *   - `VectorSize<Type>` and `RowVectorSize<Type>` for column and row vectors.\n  *\n  * With \\cpp11, you can also use fully generic column and row vector types: `Vector<Type,Size>` and `RowVector<Type,Size>`.\n  *\n  * \\sa class Matrix\n  */\n\n#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)   \\\n/** \\ingroup matrixtypedefs */                                    \\\ntypedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix;  \\\n/** \\ingroup matrixtypedefs */                                    \\\ntypedef Matrix<Type, Size, 1>    Vector##SizeSuffix##TypeSuffix;  \\\n/** \\ingroup matrixtypedefs */                                    \\\ntypedef Matrix<Type, 1, Size>    RowVector##SizeSuffix##TypeSuffix;\n\n#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size)         \\\n/** \\ingroup matrixtypedefs */                                    \\\ntypedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix;  \\\n/** \\ingroup matrixtypedefs */                                    \\\ntypedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;\n\n#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \\\nEIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \\\nEIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \\\nEIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)\n\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(int,                  i)\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(float,                f)\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(double,               d)\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>,  cf)\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)\n\n#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES\n#undef EIGEN_MAKE_TYPEDEFS\n#undef EIGEN_MAKE_FIXED_TYPEDEFS\n\n#if EIGEN_HAS_CXX11\n\n#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix)                     \\\n/** \\ingroup matrixtypedefs */                                    \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Matrix##SizeSuffix = Matrix<Type, Size, Size>;              \\\n/** \\ingroup matrixtypedefs */                                    \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Vector##SizeSuffix = Matrix<Type, Size, 1>;                 \\\n/** \\ingroup matrixtypedefs */                                    \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing RowVector##SizeSuffix = Matrix<Type, 1, Size>;\n\n#define EIGEN_MAKE_FIXED_TYPEDEFS(Size)                           \\\n/** \\ingroup matrixtypedefs */                                    \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Matrix##Size##X = Matrix<Type, Size, Dynamic>;              \\\n/** \\ingroup matrixtypedefs */                                    \\\n/** \\brief \\cpp11 */                                              \\\ntemplate <typename Type>                                          \\\nusing Matrix##X##Size = Matrix<Type, Dynamic, Size>;\n\nEIGEN_MAKE_TYPEDEFS(2, 2)\nEIGEN_MAKE_TYPEDEFS(3, 3)\nEIGEN_MAKE_TYPEDEFS(4, 4)\nEIGEN_MAKE_TYPEDEFS(Dynamic, X)\nEIGEN_MAKE_FIXED_TYPEDEFS(2)\nEIGEN_MAKE_FIXED_TYPEDEFS(3)\nEIGEN_MAKE_FIXED_TYPEDEFS(4)\n\n/** \\ingroup matrixtypedefs\n  * \\brief \\cpp11 */\ntemplate <typename Type, int Size>\nusing Vector = Matrix<Type, Size, 1>;\n\n/** \\ingroup matrixtypedefs\n  * \\brief \\cpp11 */\ntemplate <typename Type, int Size>\nusing RowVector = Matrix<Type, 1, Size>;\n\n#undef EIGEN_MAKE_TYPEDEFS\n#undef EIGEN_MAKE_FIXED_TYPEDEFS\n\n#endif // EIGEN_HAS_CXX11\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/MatrixBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATRIXBASE_H\n#define EIGEN_MATRIXBASE_H\n\nnamespace Eigen {\n\n/** \\class MatrixBase\n  * \\ingroup Core_Module\n  *\n  * \\brief Base class for all dense matrices, vectors, and expressions\n  *\n  * This class is the base that is inherited by all matrix, vector, and related expression\n  * types. Most of the Eigen API is contained in this class, and its base classes. Other important\n  * classes for the Eigen API are Matrix, and VectorwiseOp.\n  *\n  * Note that some methods are defined in other modules such as the \\ref LU_Module LU module\n  * for all functions related to matrix inversions.\n  *\n  * \\tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.\n  *\n  * When writing a function taking Eigen objects as argument, if you want your function\n  * to take as argument any matrix, vector, or expression, just let it take a\n  * MatrixBase argument. As an example, here is a function printFirstRow which, given\n  * a matrix, vector, or expression \\a x, prints the first row of \\a x.\n  *\n  * \\code\n    template<typename Derived>\n    void printFirstRow(const Eigen::MatrixBase<Derived>& x)\n    {\n      cout << x.row(0) << endl;\n    }\n  * \\endcode\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_MATRIXBASE_PLUGIN.\n  *\n  * \\sa \\blank \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived> class MatrixBase\n  : public DenseBase<Derived>\n{\n  public:\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef MatrixBase StorageBaseType;\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    typedef DenseBase<Derived> Base;\n    using Base::RowsAtCompileTime;\n    using Base::ColsAtCompileTime;\n    using Base::SizeAtCompileTime;\n    using Base::MaxRowsAtCompileTime;\n    using Base::MaxColsAtCompileTime;\n    using Base::MaxSizeAtCompileTime;\n    using Base::IsVectorAtCompileTime;\n    using Base::Flags;\n\n    using Base::derived;\n    using Base::const_cast_derived;\n    using Base::rows;\n    using Base::cols;\n    using Base::size;\n    using Base::coeff;\n    using Base::coeffRef;\n    using Base::lazyAssign;\n    using Base::eval;\n    using Base::operator-;\n    using Base::operator+=;\n    using Base::operator-=;\n    using Base::operator*=;\n    using Base::operator/=;\n\n    typedef typename Base::CoeffReturnType CoeffReturnType;\n    typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;\n    typedef typename Base::RowXpr RowXpr;\n    typedef typename Base::ColXpr ColXpr;\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** type of the equivalent square matrix */\n    typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),\n                          EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n    /** \\returns the size of the main diagonal, which is min(rows(),cols()).\n      * \\sa rows(), cols(), SizeAtCompileTime. */\n    EIGEN_DEVICE_FUNC\n    inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }\n\n    typedef typename Base::PlainObject PlainObject;\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal Represents a matrix with all coefficients equal to one another*/\n    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;\n    /** \\internal the return type of MatrixBase::adjoint() */\n    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,\n                        ConstTransposeReturnType\n                     >::type AdjointReturnType;\n    /** \\internal Return type of eigenvalues() */\n    typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;\n    /** \\internal the return type of identity */\n    typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;\n    /** \\internal the return type of unit vectors */\n    typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,\n                  internal::traits<Derived>::RowsAtCompileTime,\n                  internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase\n#define EIGEN_DOC_UNARY_ADDONS(X,Y)\n#   include \"../plugins/CommonCwiseBinaryOps.h\"\n#   include \"../plugins/MatrixCwiseUnaryOps.h\"\n#   include \"../plugins/MatrixCwiseBinaryOps.h\"\n#   ifdef EIGEN_MATRIXBASE_PLUGIN\n#     include EIGEN_MATRIXBASE_PLUGIN\n#   endif\n#undef EIGEN_CURRENT_STORAGE_BASE_CLASS\n#undef EIGEN_DOC_UNARY_ADDONS\n\n    /** Special case of the template operator=, in order to prevent the compiler\n      * from generating a default operator= (issue hit with g++ 4.1)\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator=(const MatrixBase& other);\n\n    // We cannot inherit here via Base::operator= since it is causing\n    // trouble with MSVC.\n\n    template <typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator=(const DenseBase<OtherDerived>& other);\n\n    template <typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Derived& operator=(const EigenBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    Derived& operator=(const ReturnByValue<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator+=(const MatrixBase<OtherDerived>& other);\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Derived& operator-=(const MatrixBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    const Product<Derived,OtherDerived>\n    operator*(const MatrixBase<OtherDerived> &other) const;\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    const Product<Derived,OtherDerived,LazyProduct>\n    lazyProduct(const MatrixBase<OtherDerived> &other) const;\n\n    template<typename OtherDerived>\n    Derived& operator*=(const EigenBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    void applyOnTheLeft(const EigenBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    void applyOnTheRight(const EigenBase<OtherDerived>& other);\n\n    template<typename DiagonalDerived>\n    EIGEN_DEVICE_FUNC\n    const Product<Derived, DiagonalDerived, LazyProduct>\n    operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType\n    dot(const MatrixBase<OtherDerived>& other) const;\n\n    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;\n    EIGEN_DEVICE_FUNC RealScalar norm() const;\n    RealScalar stableNorm() const;\n    RealScalar blueNorm() const;\n    RealScalar hypotNorm() const;\n    EIGEN_DEVICE_FUNC const PlainObject normalized() const;\n    EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;\n    EIGEN_DEVICE_FUNC void normalize();\n    EIGEN_DEVICE_FUNC void stableNormalize();\n\n    EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;\n    EIGEN_DEVICE_FUNC void adjointInPlace();\n\n    typedef Diagonal<Derived> DiagonalReturnType;\n    EIGEN_DEVICE_FUNC\n    DiagonalReturnType diagonal();\n\n    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;\n    EIGEN_DEVICE_FUNC\n    ConstDiagonalReturnType diagonal() const;\n\n    template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };\n    template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };\n\n    template<int Index>\n    EIGEN_DEVICE_FUNC\n    typename DiagonalIndexReturnType<Index>::Type diagonal();\n\n    template<int Index>\n    EIGEN_DEVICE_FUNC\n    typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;\n\n    typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;\n    typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;\n\n    EIGEN_DEVICE_FUNC\n    DiagonalDynamicIndexReturnType diagonal(Index index);\n    EIGEN_DEVICE_FUNC\n    ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;\n\n    template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };\n    template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };\n\n    template<unsigned int Mode>\n    EIGEN_DEVICE_FUNC\n    typename TriangularViewReturnType<Mode>::Type triangularView();\n    template<unsigned int Mode>\n    EIGEN_DEVICE_FUNC\n    typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;\n\n    template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };\n    template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };\n\n    template<unsigned int UpLo>\n    EIGEN_DEVICE_FUNC\n    typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();\n    template<unsigned int UpLo>\n    EIGEN_DEVICE_FUNC\n    typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;\n\n    const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),\n                                         const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;\n    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();\n    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);\n    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);\n    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);\n    EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();\n    EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();\n    EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();\n    EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();\n\n    EIGEN_DEVICE_FUNC\n    const DiagonalWrapper<const Derived> asDiagonal() const;\n    const PermutationWrapper<const Derived> asPermutation() const;\n\n    EIGEN_DEVICE_FUNC\n    Derived& setIdentity();\n    EIGEN_DEVICE_FUNC\n    Derived& setIdentity(Index rows, Index cols);\n    EIGEN_DEVICE_FUNC Derived& setUnit(Index i);\n    EIGEN_DEVICE_FUNC Derived& setUnit(Index newSize, Index i);\n\n    bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n\n    bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n\n    template<typename OtherDerived>\n    bool isOrthogonal(const MatrixBase<OtherDerived>& other,\n                      const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n    bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n\n    /** \\returns true if each coefficients of \\c *this and \\a other are all exactly equal.\n      * \\warning When using floating point scalar values you probably should rather use a\n      *          fuzzy comparison such as isApprox()\n      * \\sa isApprox(), operator!= */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const\n    { return cwiseEqual(other).all(); }\n\n    /** \\returns true if at least one pair of coefficients of \\c *this and \\a other are not exactly equal to each other.\n      * \\warning When using floating point scalar values you probably should rather use a\n      *          fuzzy comparison such as isApprox()\n      * \\sa isApprox(), operator== */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const\n    { return cwiseNotEqual(other).any(); }\n\n    NoAlias<Derived,Eigen::MatrixBase > EIGEN_DEVICE_FUNC noalias();\n\n    // TODO forceAlignedAccess is temporarily disabled\n    // Need to find a nicer workaround.\n    inline const Derived& forceAlignedAccess() const { return derived(); }\n    inline Derived& forceAlignedAccess() { return derived(); }\n    template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }\n    template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }\n\n    EIGEN_DEVICE_FUNC Scalar trace() const;\n\n    template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;\n\n    EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }\n    EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }\n\n    /** \\returns an \\link Eigen::ArrayBase Array \\endlink expression of this matrix\n      * \\sa ArrayBase::matrix() */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }\n    /** \\returns a const \\link Eigen::ArrayBase Array \\endlink expression of this matrix\n      * \\sa ArrayBase::matrix() */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }\n\n/////////// LU module ///////////\n\n    inline const FullPivLU<PlainObject> fullPivLu() const;\n    inline const PartialPivLU<PlainObject> partialPivLu() const;\n\n    inline const PartialPivLU<PlainObject> lu() const;\n\n    EIGEN_DEVICE_FUNC\n    inline const Inverse<Derived> inverse() const;\n\n    template<typename ResultType>\n    inline void computeInverseAndDetWithCheck(\n      ResultType& inverse,\n      typename ResultType::Scalar& determinant,\n      bool& invertible,\n      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()\n    ) const;\n\n    template<typename ResultType>\n    inline void computeInverseWithCheck(\n      ResultType& inverse,\n      bool& invertible,\n      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()\n    ) const;\n\n    EIGEN_DEVICE_FUNC\n    Scalar determinant() const;\n\n/////////// Cholesky module ///////////\n\n    inline const LLT<PlainObject>  llt() const;\n    inline const LDLT<PlainObject> ldlt() const;\n\n/////////// QR module ///////////\n\n    inline const HouseholderQR<PlainObject> householderQr() const;\n    inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;\n    inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;\n    inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;\n\n/////////// Eigenvalues module ///////////\n\n    inline EigenvaluesReturnType eigenvalues() const;\n    inline RealScalar operatorNorm() const;\n\n/////////// SVD module ///////////\n\n    inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;\n    inline BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;\n\n/////////// Geometry module ///////////\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /// \\internal helper struct to form the return type of the cross product\n    template<typename OtherDerived> struct cross_product_return_type {\n      typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;\n      typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;\n    };\n    #endif // EIGEN_PARSED_BY_DOXYGEN\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    inline typename cross_product_return_type<OtherDerived>::type\n#else\n    inline PlainObject\n#endif\n    cross(const MatrixBase<OtherDerived>& other) const;\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;\n\n    EIGEN_DEVICE_FUNC\n    inline PlainObject unitOrthogonal(void) const;\n\n    EIGEN_DEVICE_FUNC\n    inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;\n\n    // put this as separate enum value to work around possible GCC 4.3 bug (?)\n    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)\n                                          : ColsAtCompileTime==1 ? Vertical : Horizontal };\n    typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;\n    EIGEN_DEVICE_FUNC\n    inline HomogeneousReturnType homogeneous() const;\n\n    enum {\n      SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1\n    };\n    typedef Block<const Derived,\n                  internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,\n                  internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;\n    typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;\n    EIGEN_DEVICE_FUNC\n    inline const HNormalizedReturnType hnormalized() const;\n\n////////// Householder module ///////////\n\n    EIGEN_DEVICE_FUNC\n    void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);\n    template<typename EssentialPart>\n    EIGEN_DEVICE_FUNC\n    void makeHouseholder(EssentialPart& essential,\n                         Scalar& tau, RealScalar& beta) const;\n    template<typename EssentialPart>\n    EIGEN_DEVICE_FUNC\n    void applyHouseholderOnTheLeft(const EssentialPart& essential,\n                                   const Scalar& tau,\n                                   Scalar* workspace);\n    template<typename EssentialPart>\n    EIGEN_DEVICE_FUNC\n    void applyHouseholderOnTheRight(const EssentialPart& essential,\n                                    const Scalar& tau,\n                                    Scalar* workspace);\n\n///////// Jacobi module /////////\n\n    template<typename OtherScalar>\n    EIGEN_DEVICE_FUNC\n    void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);\n    template<typename OtherScalar>\n    EIGEN_DEVICE_FUNC\n    void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);\n\n///////// SparseCore module /////////\n\n    template<typename OtherDerived>\n    EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type\n    cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const\n    {\n      return other.cwiseProduct(derived());\n    }\n\n///////// MatrixFunctions module /////////\n\n    typedef typename internal::stem_function<Scalar>::type StemFunction;\n#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \\\n    /** \\returns an expression of the matrix Description of \\c *this. \\brief This function requires the <a href=\"unsupported/group__MatrixFunctions__Module.html\"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \\\n    const ReturnType<Derived> Name() const;\n#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \\\n    /** \\returns an expression of the matrix Description of \\c *this. \\brief This function requires the <a href=\"unsupported/group__MatrixFunctions__Module.html\"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \\\n    const ReturnType<Derived> Name(Argument) const;\n\n    EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)\n    /** \\brief Helper function for the <a href=\"unsupported/group__MatrixFunctions__Module.html\"> unsupported MatrixFunctions module</a>.*/\n    const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)\n#if EIGEN_HAS_CXX11_MATH\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, atanh, inverse hyperbolic cosine)\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, acosh, inverse hyperbolic cosine)\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, asinh, inverse hyperbolic sine)\n#endif\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)\n    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)\n    EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)\n    EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)\n    EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue,        pow, power to \\c p, const RealScalar& p)\n    EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \\c p, const std::complex<RealScalar>& p)\n\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MatrixBase)\n\n  private:\n    EIGEN_DEVICE_FUNC explicit MatrixBase(int);\n    EIGEN_DEVICE_FUNC MatrixBase(int,int);\n    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);\n  protected:\n    // mixing arrays and matrices is not legal\n    template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )\n    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}\n    // mixing arrays and matrices is not legal\n    template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )\n    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}\n};\n\n\n/***************************************************************************\n* Implementation of matrix base methods\n***************************************************************************/\n\n/** replaces \\c *this by \\c *this * \\a other.\n  *\n  * \\returns a reference to \\c *this\n  *\n  * Example: \\include MatrixBase_applyOnTheRight.cpp\n  * Output: \\verbinclude MatrixBase_applyOnTheRight.out\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ninline Derived&\nMatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)\n{\n  other.derived().applyThisOnTheRight(derived());\n  return derived();\n}\n\n/** replaces \\c *this by \\c *this * \\a other. It is equivalent to MatrixBase::operator*=().\n  *\n  * Example: \\include MatrixBase_applyOnTheRight.cpp\n  * Output: \\verbinclude MatrixBase_applyOnTheRight.out\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ninline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)\n{\n  other.derived().applyThisOnTheRight(derived());\n}\n\n/** replaces \\c *this by \\a other * \\c *this.\n  *\n  * Example: \\include MatrixBase_applyOnTheLeft.cpp\n  * Output: \\verbinclude MatrixBase_applyOnTheLeft.out\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ninline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)\n{\n  other.derived().applyThisOnTheLeft(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATRIXBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/NestByValue.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_NESTBYVALUE_H\n#define EIGEN_NESTBYVALUE_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename ExpressionType>\nstruct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>\n{\n  enum {\n    Flags = traits<ExpressionType>::Flags & ~NestByRefBit\n  };\n};\n}\n\n/** \\class NestByValue\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression which must be nested by value\n  *\n  * \\tparam ExpressionType the type of the object of which we are requiring nesting-by-value\n  *\n  * This class is the return type of MatrixBase::nestByValue()\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::nestByValue()\n  */\ntemplate<typename ExpressionType> class NestByValue\n  : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<NestByValue>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)\n\n    EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); }\n\n    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }\n\n    EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; }\n\n  protected:\n    const ExpressionType m_expression;\n};\n\n/** \\returns an expression of the temporary version of *this.\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline const NestByValue<Derived>\nDenseBase<Derived>::nestByValue() const\n{\n  return NestByValue<Derived>(derived());\n}\n\nnamespace internal {\n\n// Evaluator of Solve -> eval into a temporary\ntemplate<typename ArgType>\nstruct evaluator<NestByValue<ArgType> >\n  : public evaluator<ArgType>\n{\n  typedef evaluator<ArgType> Base;\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const NestByValue<ArgType>& xpr)\n    : Base(xpr.nestedExpression())\n  {}\n};\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_NESTBYVALUE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/NoAlias.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_NOALIAS_H\n#define EIGEN_NOALIAS_H\n\nnamespace Eigen {\n\n/** \\class NoAlias\n  * \\ingroup Core_Module\n  *\n  * \\brief Pseudo expression providing an operator = assuming no aliasing\n  *\n  * \\tparam ExpressionType the type of the object on which to do the lazy assignment\n  *\n  * This class represents an expression with special assignment operators\n  * assuming no aliasing between the target expression and the source expression.\n  * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression.\n  * It is the return type of MatrixBase::noalias()\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::noalias()\n  */\ntemplate<typename ExpressionType, template <typename> class StorageBase>\nclass NoAlias\n{\n  public:\n    typedef typename ExpressionType::Scalar Scalar;\n    \n    EIGEN_DEVICE_FUNC\n    explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}\n    \n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)\n    {\n      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());\n      return m_expression;\n    }\n    \n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)\n    {\n      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());\n      return m_expression;\n    }\n    \n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)\n    {\n      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());\n      return m_expression;\n    }\n\n    EIGEN_DEVICE_FUNC\n    ExpressionType& expression() const\n    {\n      return m_expression;\n    }\n\n  protected:\n    ExpressionType& m_expression;\n};\n\n/** \\returns a pseudo expression of \\c *this with an operator= assuming\n  * no aliasing between \\c *this and the source expression.\n  *\n  * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag.\n  * Currently, even though several expressions may alias, only product\n  * expressions have this flag. Therefore, noalias() is only useful when\n  * the source expression contains a matrix product.\n  *\n  * Here are some examples where noalias is useful:\n  * \\code\n  * D.noalias()  = A * B;\n  * D.noalias() += A.transpose() * B;\n  * D.noalias() -= 2 * A * B.adjoint();\n  * \\endcode\n  *\n  * On the other hand the following example will lead to a \\b wrong result:\n  * \\code\n  * A.noalias() = A * B;\n  * \\endcode\n  * because the result matrix A is also an operand of the matrix product. Therefore,\n  * there is no alternative than evaluating A * B in a temporary, that is the default\n  * behavior when you write:\n  * \\code\n  * A = A * B;\n  * \\endcode\n  *\n  * \\sa class NoAlias\n  */\ntemplate<typename Derived>\nNoAlias<Derived,MatrixBase> EIGEN_DEVICE_FUNC MatrixBase<Derived>::noalias()\n{\n  return NoAlias<Derived, Eigen::MatrixBase >(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_NOALIAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/NumTraits.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_NUMTRAITS_H\n#define EIGEN_NUMTRAITS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// default implementation of digits10(), based on numeric_limits if specialized,\n// 0 for integer types, and log10(epsilon()) otherwise.\ntemplate< typename T,\n          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,\n          bool is_integer = NumTraits<T>::IsInteger>\nstruct default_digits10_impl\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int run() { return std::numeric_limits<T>::digits10; }\n};\n\ntemplate<typename T>\nstruct default_digits10_impl<T,false,false> // Floating point\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int run() {\n    using std::log10;\n    using std::ceil;\n    typedef typename NumTraits<T>::Real Real;\n    return int(ceil(-log10(NumTraits<Real>::epsilon())));\n  }\n};\n\ntemplate<typename T>\nstruct default_digits10_impl<T,false,true> // Integer\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int run() { return 0; }\n};\n\n\n// default implementation of digits(), based on numeric_limits if specialized,\n// 0 for integer types, and log2(epsilon()) otherwise.\ntemplate< typename T,\n          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,\n          bool is_integer = NumTraits<T>::IsInteger>\nstruct default_digits_impl\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int run() { return std::numeric_limits<T>::digits; }\n};\n\ntemplate<typename T>\nstruct default_digits_impl<T,false,false> // Floating point\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int run() {\n    using std::log;\n    using std::ceil;\n    typedef typename NumTraits<T>::Real Real;\n    return int(ceil(-log(NumTraits<Real>::epsilon())/log(static_cast<Real>(2))));\n  }\n};\n\ntemplate<typename T>\nstruct default_digits_impl<T,false,true> // Integer\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int run() { return 0; }\n};\n\n} // end namespace internal\n\nnamespace numext {\n/** \\internal bit-wise cast without changing the underlying bit representation. */\n\n// TODO: Replace by std::bit_cast (available in C++20)\ntemplate <typename Tgt, typename Src>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) {\n#if EIGEN_HAS_TYPE_TRAITS\n  // The behaviour of memcpy is not specified for non-trivially copyable types\n  EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED);\n  EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value,\n                      THIS_TYPE_IS_NOT_SUPPORTED);\n#endif\n\n  EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED);\n  Tgt tgt;\n  EIGEN_USING_STD(memcpy)\n  memcpy(&tgt, &src, sizeof(Tgt));\n  return tgt;\n}\n}  // namespace numext\n\n/** \\class NumTraits\n  * \\ingroup Core_Module\n  *\n  * \\brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.\n  *\n  * \\tparam T the numeric type at hand\n  *\n  * This class stores enums, typedefs and static methods giving information about a numeric type.\n  *\n  * The provided data consists of:\n  * \\li A typedef \\c Real, giving the \"real part\" type of \\a T. If \\a T is already real,\n  *     then \\c Real is just a typedef to \\a T. If \\a T is \\c std::complex<U> then \\c Real\n  *     is a typedef to \\a U.\n  * \\li A typedef \\c NonInteger, giving the type that should be used for operations producing non-integral values,\n  *     such as quotients, square roots, etc. If \\a T is a floating-point type, then this typedef just gives\n  *     \\a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to\n  *     take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is\n  *     only intended as a helper for code that needs to explicitly promote types.\n  * \\li A typedef \\c Literal giving the type to use for numeric literals such as \"2\" or \"0.5\". For instance, for \\c std::complex<U>, Literal is defined as \\c U.\n  *     Of course, this type must be fully compatible with \\a T. In doubt, just use \\a T here.\n  * \\li A typedef \\a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what\n  *     this means, just use \\a T here.\n  * \\li An enum value \\a IsComplex. It is equal to 1 if \\a T is a \\c std::complex\n  *     type, and to 0 otherwise.\n  * \\li An enum value \\a IsInteger. It is equal to \\c 1 if \\a T is an integer type such as \\c int,\n  *     and to \\c 0 otherwise.\n  * \\li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed\n  *     to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.\n  *     Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \\c Eigen::HugeCost.\n  * \\li An enum value \\a IsSigned. It is equal to \\c 1 if \\a T is a signed type and to 0 if \\a T is unsigned.\n  * \\li An enum value \\a RequireInitialization. It is equal to \\c 1 if the constructor of the numeric type \\a T must\n  *     be called, and to 0 if it is safe not to call it. Default is 0 if \\a T is an arithmetic type, and 1 otherwise.\n  * \\li An epsilon() function which, unlike <a href=\"http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon\">std::numeric_limits::epsilon()</a>,\n  *     it returns a \\a Real instead of a \\a T.\n  * \\li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default\n  *     value by the fuzzy comparison operators.\n  * \\li highest() and lowest() functions returning the highest and lowest possible values respectively.\n  * \\li digits() function returning the number of radix digits (non-sign digits for integers, mantissa for floating-point). This is\n  *     the analogue of <a href=\"http://en.cppreference.com/w/cpp/types/numeric_limits/digits\">std::numeric_limits<T>::digits</a>\n  *     which is used as the default implementation if specialized.\n  * \\li digits10() function returning the number of decimal digits that can be represented without change. This is\n  *     the analogue of <a href=\"http://en.cppreference.com/w/cpp/types/numeric_limits/digits10\">std::numeric_limits<T>::digits10</a>\n  *     which is used as the default implementation if specialized.\n  * \\li min_exponent() and max_exponent() functions returning the highest and lowest possible values, respectively,\n  *     such that the radix raised to the power exponent-1 is a normalized floating-point number.  These are equivalent to\n  *     <a href=\"http://en.cppreference.com/w/cpp/types/numeric_limits/min_exponent\">std::numeric_limits<T>::min_exponent</a>/\n  *     <a href=\"http://en.cppreference.com/w/cpp/types/numeric_limits/max_exponent\">std::numeric_limits<T>::max_exponent</a>.\n  * \\li infinity() function returning a representation of positive infinity, if available.\n  * \\li quiet_NaN function returning a non-signaling \"not-a-number\", if available.\n  */\n\ntemplate<typename T> struct GenericNumTraits\n{\n  enum {\n    IsInteger = std::numeric_limits<T>::is_integer,\n    IsSigned = std::numeric_limits<T>::is_signed,\n    IsComplex = 0,\n    RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,\n    ReadCost = 1,\n    AddCost = 1,\n    MulCost = 1\n  };\n\n  typedef T Real;\n  typedef typename internal::conditional<\n                     IsInteger,\n                     typename internal::conditional<sizeof(T)<=2, float, double>::type,\n                     T\n                   >::type NonInteger;\n  typedef T Nested;\n  typedef T Literal;\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline Real epsilon()\n  {\n    return numext::numeric_limits<T>::epsilon();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline int digits10()\n  {\n    return internal::default_digits10_impl<T>::run();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline int digits()\n  {\n    return internal::default_digits_impl<T>::run();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline int min_exponent()\n  {\n    return numext::numeric_limits<T>::min_exponent;\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline int max_exponent()\n  {\n    return numext::numeric_limits<T>::max_exponent;\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline Real dummy_precision()\n  {\n    // make sure to override this for floating-point types\n    return Real(0);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline T highest() {\n    return (numext::numeric_limits<T>::max)();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline T lowest()  {\n    return IsInteger ? (numext::numeric_limits<T>::min)()\n                     : static_cast<T>(-(numext::numeric_limits<T>::max)());\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline T infinity() {\n    return numext::numeric_limits<T>::infinity();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline T quiet_NaN() {\n    return numext::numeric_limits<T>::quiet_NaN();\n  }\n};\n\ntemplate<typename T> struct NumTraits : GenericNumTraits<T>\n{};\n\ntemplate<> struct NumTraits<float>\n  : GenericNumTraits<float>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline float dummy_precision() { return 1e-5f; }\n};\n\ntemplate<> struct NumTraits<double> : GenericNumTraits<double>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline double dummy_precision() { return 1e-12; }\n};\n\ntemplate<> struct NumTraits<long double>\n  : GenericNumTraits<long double>\n{\n  EIGEN_CONSTEXPR\n  static inline long double dummy_precision() { return 1e-15l; }\n};\n\ntemplate<typename _Real> struct NumTraits<std::complex<_Real> >\n  : GenericNumTraits<std::complex<_Real> >\n{\n  typedef _Real Real;\n  typedef typename NumTraits<_Real>::Literal Literal;\n  enum {\n    IsComplex = 1,\n    RequireInitialization = NumTraits<_Real>::RequireInitialization,\n    ReadCost = 2 * NumTraits<_Real>::ReadCost,\n    AddCost = 2 * NumTraits<Real>::AddCost,\n    MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline Real epsilon() { return NumTraits<Real>::epsilon(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline int digits10() { return NumTraits<Real>::digits10(); }\n};\n\ntemplate<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>\nstruct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >\n{\n  typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;\n  typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;\n  typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;\n  typedef ArrayType & Nested;\n  typedef typename NumTraits<Scalar>::Literal Literal;\n\n  enum {\n    IsComplex = NumTraits<Scalar>::IsComplex,\n    IsInteger = NumTraits<Scalar>::IsInteger,\n    IsSigned  = NumTraits<Scalar>::IsSigned,\n    RequireInitialization = 1,\n    ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::ReadCost),\n    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::AddCost),\n    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::MulCost)\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }\n\n  EIGEN_CONSTEXPR\n  static inline int digits10() { return NumTraits<Scalar>::digits10(); }\n};\n\ntemplate<> struct NumTraits<std::string>\n  : GenericNumTraits<std::string>\n{\n  enum {\n    RequireInitialization = 1,\n    ReadCost = HugeCost,\n    AddCost  = HugeCost,\n    MulCost  = HugeCost\n  };\n\n  EIGEN_CONSTEXPR\n  static inline int digits10() { return 0; }\n\nprivate:\n  static inline std::string epsilon();\n  static inline std::string dummy_precision();\n  static inline std::string lowest();\n  static inline std::string highest();\n  static inline std::string infinity();\n  static inline std::string quiet_NaN();\n};\n\n// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.\ntemplate<> struct NumTraits<void> {};\n\ntemplate<> struct NumTraits<bool> : GenericNumTraits<bool> {};\n\n} // end namespace Eigen\n\n#endif // EIGEN_NUMTRAITS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/PartialReduxEvaluator.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARTIALREDUX_H\n#define EIGEN_PARTIALREDUX_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n\n/***************************************************************************\n*\n* This file provides evaluators for partial reductions.\n* There are two modes:\n*\n*  - scalar path: simply calls the respective function on the column or row.\n*    -> nothing special here, all the tricky part is handled by the return\n*       types of VectorwiseOp's members. They embed the functor calling the\n*       respective DenseBase's member function.\n*\n*  - vectorized path: implements a packet-wise reductions followed by\n*    some (optional) processing of the outcome, e.g., division by n for mean.\n*\n* For the vectorized path let's observe that the packet-size and outer-unrolling\n* are both decided by the assignement logic. So all we have to do is to decide\n* on the inner unrolling.\n*\n* For the unrolling, we can reuse \"internal::redux_vec_unroller\" from Redux.h,\n* but be need to be careful to specify correct increment.\n*\n***************************************************************************/\n\n\n/* logic deciding a strategy for unrolling of vectorized paths */\ntemplate<typename Func, typename Evaluator>\nstruct packetwise_redux_traits\n{\n  enum {\n    OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,\n    Cost = OuterSize == Dynamic ? HugeCost\n         : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost,\n    Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling\n  };\n\n};\n\n/* Value to be returned when size==0 , by default let's return 0 */\ntemplate<typename PacketType,typename Func>\nEIGEN_DEVICE_FUNC\nPacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); }\n\n/* For products the default is 1 */\ntemplate<typename PacketType,typename Scalar>\nEIGEN_DEVICE_FUNC\nPacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); }\n\n/* Perform the actual reduction */\ntemplate<typename Func, typename Evaluator,\n         int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling\n>\nstruct packetwise_redux_impl;\n\n/* Perform the actual reduction with unrolling */\ntemplate<typename Func, typename Evaluator>\nstruct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>\n{\n  typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;\n  typedef typename Evaluator::Scalar Scalar;\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE\n  PacketType run(const Evaluator &eval, const Func& func, Index /*size*/)\n  {\n    return redux_vec_unroller<Func, Evaluator, 0, packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,func);\n  }\n};\n\n/* Add a specialization of redux_vec_unroller for size==0 at compiletime.\n * This specialization is not required for general reductions, which is\n * why it is defined here.\n */\ntemplate<typename Func, typename Evaluator, int Start>\nstruct redux_vec_unroller<Func, Evaluator, Start, 0>\n{\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f)\n  {\n    return packetwise_redux_empty_value<PacketType>(f);\n  }\n};\n\n/* Perform the actual reduction for dynamic sizes */\ntemplate<typename Func, typename Evaluator>\nstruct packetwise_redux_impl<Func, Evaluator, NoUnrolling>\n{\n  typedef typename Evaluator::Scalar Scalar;\n  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC\n  static PacketType run(const Evaluator &eval, const Func& func, Index size)\n  {\n    if(size==0)\n      return packetwise_redux_empty_value<PacketType>(func);\n    \n    const Index size4 = (size-1)&(~3);\n    PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0);\n    Index i = 1;\n    // This loop is optimized for instruction pipelining:\n    // - each iteration generates two independent instructions\n    // - thanks to branch prediction and out-of-order execution we have independent instructions across loops\n    for(; i<size4; i+=4)\n      p = func.packetOp(p,\n            func.packetOp(\n              func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+0,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+1,0)),\n              func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+2,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+3,0))));\n    for(; i<size; ++i)\n      p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0));\n    return p;\n  }\n};\n\ntemplate< typename ArgType, typename MemberOp, int Direction>\nstruct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >\n  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >\n{\n  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;\n  typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;\n  typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested;\n  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;\n  typedef typename ArgType::Scalar InputScalar;\n  typedef typename XprType::Scalar Scalar;\n  enum {\n    TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(ArgType::ColsAtCompileTime)\n  };\n  typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;\n  enum {\n    CoeffReadCost = TraversalSize==Dynamic ? HugeCost\n                  : TraversalSize==0 ? 1\n                  : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),\n    \n    _ArgFlags = evaluator<ArgType>::Flags,\n\n    _Vectorizable =  bool(int(_ArgFlags)&PacketAccessBit)\n                  && bool(MemberOp::Vectorizable)\n                  && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)\n                  && (TraversalSize!=0),\n                  \n    Flags = (traits<XprType>::Flags&RowMajorBit)\n          | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))\n          | (_Vectorizable ? PacketAccessBit : 0)\n          | LinearAccessBit,\n    \n    Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized\n  };\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)\n    : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value)));\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const Scalar coeff(Index i, Index j) const\n  {\n    return coeff(Direction==Vertical ? j : i);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const Scalar coeff(Index index) const\n  {\n    return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));\n  }\n\n  template<int LoadMode,typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  PacketType packet(Index i, Index j) const\n  {\n    return packet<LoadMode,PacketType>(Direction==Vertical ? j : i);\n  }\n  \n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\n  PacketType packet(Index idx) const\n  {\n    enum { PacketSize = internal::unpacket_traits<PacketType>::size };\n    typedef Block<const ArgTypeNestedCleaned,\n                  Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),\n                  Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime),\n                  true /* InnerPanel */> PanelType;\n    \n    PanelType panel(m_arg,\n                    Direction==Vertical ? 0 : idx,\n                    Direction==Vertical ? idx : 0,\n                    Direction==Vertical ? m_arg.rows() : Index(PacketSize),\n                    Direction==Vertical ? Index(PacketSize) : m_arg.cols());\n\n    // FIXME\n    // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed\n    // and methods like packetByOuterInner do not make sense anymore in this context.\n    // So let's just by pass \"vectorization\" in this case:\n    if(PacketSize==1)\n      return internal::pset1<PacketType>(coeff(idx));\n    \n    typedef typename internal::redux_evaluator<PanelType> PanelEvaluator;\n    PanelEvaluator panel_eval(panel);\n    typedef typename MemberOp::BinaryOp BinaryOp;\n    PacketType p = internal::packetwise_redux_impl<BinaryOp,PanelEvaluator>::template run<PacketType>(panel_eval,m_functor.binaryFunc(),m_arg.outerSize());\n    return p;\n  }\n\nprotected:\n  ConstArgTypeNested m_arg;\n  const MemberOp m_functor;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARTIALREDUX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/PermutationMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PERMUTATIONMATRIX_H\n#define EIGEN_PERMUTATIONMATRIX_H\n\nnamespace Eigen { \n\nnamespace internal {\n\nenum PermPermProduct_t {PermPermProduct};\n\n} // end namespace internal\n\n/** \\class PermutationBase\n  * \\ingroup Core_Module\n  *\n  * \\brief Base class for permutations\n  *\n  * \\tparam Derived the derived class\n  *\n  * This class is the base class for all expressions representing a permutation matrix,\n  * internally stored as a vector of integers.\n  * The convention followed here is that if \\f$ \\sigma \\f$ is a permutation, the corresponding permutation matrix\n  * \\f$ P_\\sigma \\f$ is such that if \\f$ (e_1,\\ldots,e_p) \\f$ is the canonical basis, we have:\n  *  \\f[ P_\\sigma(e_i) = e_{\\sigma(i)}. \\f]\n  * This convention ensures that for any two permutations \\f$ \\sigma, \\tau \\f$, we have:\n  *  \\f[ P_{\\sigma\\circ\\tau} = P_\\sigma P_\\tau. \\f]\n  *\n  * Permutation matrices are square and invertible.\n  *\n  * Notice that in addition to the member functions and operators listed here, there also are non-member\n  * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase)\n  * on either side.\n  *\n  * \\sa class PermutationMatrix, class PermutationWrapper\n  */\ntemplate<typename Derived>\nclass PermutationBase : public EigenBase<Derived>\n{\n    typedef internal::traits<Derived> Traits;\n    typedef EigenBase<Derived> Base;\n  public:\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename Traits::IndicesType IndicesType;\n    enum {\n      Flags = Traits::Flags,\n      RowsAtCompileTime = Traits::RowsAtCompileTime,\n      ColsAtCompileTime = Traits::ColsAtCompileTime,\n      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime\n    };\n    typedef typename Traits::StorageIndex StorageIndex;\n    typedef Matrix<StorageIndex,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>\n            DenseMatrixType;\n    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndex>\n            PlainPermutationType;\n    typedef PlainPermutationType PlainObject;\n    using Base::derived;\n    typedef Inverse<Derived> InverseReturnType;\n    typedef void Scalar;\n    #endif\n\n    /** Copies the other permutation into *this */\n    template<typename OtherDerived>\n    Derived& operator=(const PermutationBase<OtherDerived>& other)\n    {\n      indices() = other.indices();\n      return derived();\n    }\n\n    /** Assignment from the Transpositions \\a tr */\n    template<typename OtherDerived>\n    Derived& operator=(const TranspositionsBase<OtherDerived>& tr)\n    {\n      setIdentity(tr.size());\n      for(Index k=size()-1; k>=0; --k)\n        applyTranspositionOnTheRight(k,tr.coeff(k));\n      return derived();\n    }\n\n    /** \\returns the number of rows */\n    inline EIGEN_DEVICE_FUNC Index rows() const { return Index(indices().size()); }\n\n    /** \\returns the number of columns */\n    inline EIGEN_DEVICE_FUNC Index cols() const { return Index(indices().size()); }\n\n    /** \\returns the size of a side of the respective square matrix, i.e., the number of indices */\n    inline EIGEN_DEVICE_FUNC Index size() const { return Index(indices().size()); }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename DenseDerived>\n    void evalTo(MatrixBase<DenseDerived>& other) const\n    {\n      other.setZero();\n      for (Index i=0; i<rows(); ++i)\n        other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);\n    }\n    #endif\n\n    /** \\returns a Matrix object initialized from this permutation matrix. Notice that it\n      * is inefficient to return this Matrix object by value. For efficiency, favor using\n      * the Matrix constructor taking EigenBase objects.\n      */\n    DenseMatrixType toDenseMatrix() const\n    {\n      return derived();\n    }\n\n    /** const version of indices(). */\n    const IndicesType& indices() const { return derived().indices(); }\n    /** \\returns a reference to the stored array representing the permutation. */\n    IndicesType& indices() { return derived().indices(); }\n\n    /** Resizes to given size.\n      */\n    inline void resize(Index newSize)\n    {\n      indices().resize(newSize);\n    }\n\n    /** Sets *this to be the identity permutation matrix */\n    void setIdentity()\n    {\n      StorageIndex n = StorageIndex(size());\n      for(StorageIndex i = 0; i < n; ++i)\n        indices().coeffRef(i) = i;\n    }\n\n    /** Sets *this to be the identity permutation matrix of given size.\n      */\n    void setIdentity(Index newSize)\n    {\n      resize(newSize);\n      setIdentity();\n    }\n\n    /** Multiplies *this by the transposition \\f$(ij)\\f$ on the left.\n      *\n      * \\returns a reference to *this.\n      *\n      * \\warning This is much slower than applyTranspositionOnTheRight(Index,Index):\n      * this has linear complexity and requires a lot of branching.\n      *\n      * \\sa applyTranspositionOnTheRight(Index,Index)\n      */\n    Derived& applyTranspositionOnTheLeft(Index i, Index j)\n    {\n      eigen_assert(i>=0 && j>=0 && i<size() && j<size());\n      for(Index k = 0; k < size(); ++k)\n      {\n        if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndex(j);\n        else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndex(i);\n      }\n      return derived();\n    }\n\n    /** Multiplies *this by the transposition \\f$(ij)\\f$ on the right.\n      *\n      * \\returns a reference to *this.\n      *\n      * This is a fast operation, it only consists in swapping two indices.\n      *\n      * \\sa applyTranspositionOnTheLeft(Index,Index)\n      */\n    Derived& applyTranspositionOnTheRight(Index i, Index j)\n    {\n      eigen_assert(i>=0 && j>=0 && i<size() && j<size());\n      std::swap(indices().coeffRef(i), indices().coeffRef(j));\n      return derived();\n    }\n\n    /** \\returns the inverse permutation matrix.\n      *\n      * \\note \\blank \\note_try_to_help_rvo\n      */\n    inline InverseReturnType inverse() const\n    { return InverseReturnType(derived()); }\n    /** \\returns the tranpose permutation matrix.\n      *\n      * \\note \\blank \\note_try_to_help_rvo\n      */\n    inline InverseReturnType transpose() const\n    { return InverseReturnType(derived()); }\n\n    /**** multiplication helpers to hopefully get RVO ****/\n\n  \n#ifndef EIGEN_PARSED_BY_DOXYGEN\n  protected:\n    template<typename OtherDerived>\n    void assignTranspose(const PermutationBase<OtherDerived>& other)\n    {\n      for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;\n    }\n    template<typename Lhs,typename Rhs>\n    void assignProduct(const Lhs& lhs, const Rhs& rhs)\n    {\n      eigen_assert(lhs.cols() == rhs.rows());\n      for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));\n    }\n#endif\n\n  public:\n\n    /** \\returns the product permutation matrix.\n      *\n      * \\note \\blank \\note_try_to_help_rvo\n      */\n    template<typename Other>\n    inline PlainPermutationType operator*(const PermutationBase<Other>& other) const\n    { return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); }\n\n    /** \\returns the product of a permutation with another inverse permutation.\n      *\n      * \\note \\blank \\note_try_to_help_rvo\n      */\n    template<typename Other>\n    inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const\n    { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }\n\n    /** \\returns the product of an inverse permutation with another permutation.\n      *\n      * \\note \\blank \\note_try_to_help_rvo\n      */\n    template<typename Other> friend\n    inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)\n    { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }\n    \n    /** \\returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.\n      *\n      * This function is O(\\c n) procedure allocating a buffer of \\c n booleans.\n      */\n    Index determinant() const\n    {\n      Index res = 1;\n      Index n = size();\n      Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);\n      mask.fill(false);\n      Index r = 0;\n      while(r < n)\n      {\n        // search for the next seed\n        while(r<n && mask[r]) r++;\n        if(r>=n)\n          break;\n        // we got one, let's follow it until we are back to the seed\n        Index k0 = r++;\n        mask.coeffRef(k0) = true;\n        for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))\n        {\n          mask.coeffRef(k) = true;\n          res = -res;\n        }\n      }\n      return res;\n    }\n\n  protected:\n\n};\n\nnamespace internal {\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>\nstruct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >\n : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >\n{\n  typedef PermutationStorage StorageKind;\n  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;\n  typedef _StorageIndex StorageIndex;\n  typedef void Scalar;\n};\n}\n\n/** \\class PermutationMatrix\n  * \\ingroup Core_Module\n  *\n  * \\brief Permutation matrix\n  *\n  * \\tparam SizeAtCompileTime the number of rows/cols, or Dynamic\n  * \\tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.\n  * \\tparam _StorageIndex the integer type of the indices\n  *\n  * This class represents a permutation matrix, internally stored as a vector of integers.\n  *\n  * \\sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix\n  */\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>\nclass PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >\n{\n    typedef PermutationBase<PermutationMatrix> Base;\n    typedef internal::traits<PermutationMatrix> Traits;\n  public:\n\n    typedef const PermutationMatrix& Nested;\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename Traits::IndicesType IndicesType;\n    typedef typename Traits::StorageIndex StorageIndex;\n    #endif\n\n    inline PermutationMatrix()\n    {}\n\n    /** Constructs an uninitialized permutation matrix of given size.\n      */\n    explicit inline PermutationMatrix(Index size) : m_indices(size)\n    {\n      eigen_internal_assert(size <= NumTraits<StorageIndex>::highest());\n    }\n\n    /** Copy constructor. */\n    template<typename OtherDerived>\n    inline PermutationMatrix(const PermutationBase<OtherDerived>& other)\n      : m_indices(other.indices()) {}\n\n    /** Generic constructor from expression of the indices. The indices\n      * array has the meaning that the permutations sends each integer i to indices[i].\n      *\n      * \\warning It is your responsibility to check that the indices array that you passes actually\n      * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the\n      * array's size.\n      */\n    template<typename Other>\n    explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices)\n    {}\n\n    /** Convert the Transpositions \\a tr to a permutation matrix */\n    template<typename Other>\n    explicit PermutationMatrix(const TranspositionsBase<Other>& tr)\n      : m_indices(tr.size())\n    {\n      *this = tr;\n    }\n\n    /** Copies the other permutation into *this */\n    template<typename Other>\n    PermutationMatrix& operator=(const PermutationBase<Other>& other)\n    {\n      m_indices = other.indices();\n      return *this;\n    }\n\n    /** Assignment from the Transpositions \\a tr */\n    template<typename Other>\n    PermutationMatrix& operator=(const TranspositionsBase<Other>& tr)\n    {\n      return Base::operator=(tr.derived());\n    }\n\n    /** const version of indices(). */\n    const IndicesType& indices() const { return m_indices; }\n    /** \\returns a reference to the stored array representing the permutation. */\n    IndicesType& indices() { return m_indices; }\n\n\n    /**** multiplication helpers to hopefully get RVO ****/\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename Other>\n    PermutationMatrix(const InverseImpl<Other,PermutationStorage>& other)\n      : m_indices(other.derived().nestedExpression().size())\n    {\n      eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());\n      StorageIndex end = StorageIndex(m_indices.size());\n      for (StorageIndex i=0; i<end;++i)\n        m_indices.coeffRef(other.derived().nestedExpression().indices().coeff(i)) = i;\n    }\n    template<typename Lhs,typename Rhs>\n    PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)\n      : m_indices(lhs.indices().size())\n    {\n      Base::assignProduct(lhs,rhs);\n    }\n#endif\n\n  protected:\n\n    IndicesType m_indices;\n};\n\n\nnamespace internal {\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>\nstruct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >\n : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >\n{\n  typedef PermutationStorage StorageKind;\n  typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;\n  typedef _StorageIndex StorageIndex;\n  typedef void Scalar;\n};\n}\n\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>\nclass Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>\n  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >\n{\n    typedef PermutationBase<Map> Base;\n    typedef internal::traits<Map> Traits;\n  public:\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename Traits::IndicesType IndicesType;\n    typedef typename IndicesType::Scalar StorageIndex;\n    #endif\n\n    inline Map(const StorageIndex* indicesPtr)\n      : m_indices(indicesPtr)\n    {}\n\n    inline Map(const StorageIndex* indicesPtr, Index size)\n      : m_indices(indicesPtr,size)\n    {}\n\n    /** Copies the other permutation into *this */\n    template<typename Other>\n    Map& operator=(const PermutationBase<Other>& other)\n    { return Base::operator=(other.derived()); }\n\n    /** Assignment from the Transpositions \\a tr */\n    template<typename Other>\n    Map& operator=(const TranspositionsBase<Other>& tr)\n    { return Base::operator=(tr.derived()); }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** This is a special case of the templated operator=. Its purpose is to\n      * prevent a default operator= from hiding the templated operator=.\n      */\n    Map& operator=(const Map& other)\n    {\n      m_indices = other.m_indices;\n      return *this;\n    }\n    #endif\n\n    /** const version of indices(). */\n    const IndicesType& indices() const { return m_indices; }\n    /** \\returns a reference to the stored array representing the permutation. */\n    IndicesType& indices() { return m_indices; }\n\n  protected:\n\n    IndicesType m_indices;\n};\n\ntemplate<typename _IndicesType> class TranspositionsWrapper;\nnamespace internal {\ntemplate<typename _IndicesType>\nstruct traits<PermutationWrapper<_IndicesType> >\n{\n  typedef PermutationStorage StorageKind;\n  typedef void Scalar;\n  typedef typename _IndicesType::Scalar StorageIndex;\n  typedef _IndicesType IndicesType;\n  enum {\n    RowsAtCompileTime = _IndicesType::SizeAtCompileTime,\n    ColsAtCompileTime = _IndicesType::SizeAtCompileTime,\n    MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,\n    MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,\n    Flags = 0\n  };\n};\n}\n\n/** \\class PermutationWrapper\n  * \\ingroup Core_Module\n  *\n  * \\brief Class to view a vector of integers as a permutation matrix\n  *\n  * \\tparam _IndicesType the type of the vector of integer (can be any compatible expression)\n  *\n  * This class allows to view any vector expression of integers as a permutation matrix.\n  *\n  * \\sa class PermutationBase, class PermutationMatrix\n  */\ntemplate<typename _IndicesType>\nclass PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >\n{\n    typedef PermutationBase<PermutationWrapper> Base;\n    typedef internal::traits<PermutationWrapper> Traits;\n  public:\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename Traits::IndicesType IndicesType;\n    #endif\n\n    inline PermutationWrapper(const IndicesType& indices)\n      : m_indices(indices)\n    {}\n\n    /** const version of indices(). */\n    const typename internal::remove_all<typename IndicesType::Nested>::type&\n    indices() const { return m_indices; }\n\n  protected:\n\n    typename IndicesType::Nested m_indices;\n};\n\n\n/** \\returns the matrix with the permutation applied to the columns.\n  */\ntemplate<typename MatrixDerived, typename PermutationDerived>\nEIGEN_DEVICE_FUNC\nconst Product<MatrixDerived, PermutationDerived, AliasFreeProduct>\noperator*(const MatrixBase<MatrixDerived> &matrix,\n          const PermutationBase<PermutationDerived>& permutation)\n{\n  return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>\n            (matrix.derived(), permutation.derived());\n}\n\n/** \\returns the matrix with the permutation applied to the rows.\n  */\ntemplate<typename PermutationDerived, typename MatrixDerived>\nEIGEN_DEVICE_FUNC\nconst Product<PermutationDerived, MatrixDerived, AliasFreeProduct>\noperator*(const PermutationBase<PermutationDerived> &permutation,\n          const MatrixBase<MatrixDerived>& matrix)\n{\n  return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>\n            (permutation.derived(), matrix.derived());\n}\n\n\ntemplate<typename PermutationType>\nclass InverseImpl<PermutationType, PermutationStorage>\n  : public EigenBase<Inverse<PermutationType> >\n{\n    typedef typename PermutationType::PlainPermutationType PlainPermutationType;\n    typedef internal::traits<PermutationType> PermTraits;\n  protected:\n    InverseImpl() {}\n  public:\n    typedef Inverse<PermutationType> InverseType;\n    using EigenBase<Inverse<PermutationType> >::derived;\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    typedef typename PermutationType::DenseMatrixType DenseMatrixType;\n    enum {\n      RowsAtCompileTime = PermTraits::RowsAtCompileTime,\n      ColsAtCompileTime = PermTraits::ColsAtCompileTime,\n      MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime\n    };\n    #endif\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename DenseDerived>\n    void evalTo(MatrixBase<DenseDerived>& other) const\n    {\n      other.setZero();\n      for (Index i=0; i<derived().rows();++i)\n        other.coeffRef(i, derived().nestedExpression().indices().coeff(i)) = typename DenseDerived::Scalar(1);\n    }\n    #endif\n\n    /** \\return the equivalent permutation matrix */\n    PlainPermutationType eval() const { return derived(); }\n\n    DenseMatrixType toDenseMatrix() const { return derived(); }\n\n    /** \\returns the matrix with the inverse permutation applied to the columns.\n      */\n    template<typename OtherDerived> friend\n    const Product<OtherDerived, InverseType, AliasFreeProduct>\n    operator*(const MatrixBase<OtherDerived>& matrix, const InverseType& trPerm)\n    {\n      return Product<OtherDerived, InverseType, AliasFreeProduct>(matrix.derived(), trPerm.derived());\n    }\n\n    /** \\returns the matrix with the inverse permutation applied to the rows.\n      */\n    template<typename OtherDerived>\n    const Product<InverseType, OtherDerived, AliasFreeProduct>\n    operator*(const MatrixBase<OtherDerived>& matrix) const\n    {\n      return Product<InverseType, OtherDerived, AliasFreeProduct>(derived(), matrix.derived());\n    }\n};\n\ntemplate<typename Derived>\nconst PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() const\n{\n  return derived();\n}\n\nnamespace internal {\n\ntemplate<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PERMUTATIONMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/PlainObjectBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DENSESTORAGEBASE_H\n#define EIGEN_DENSESTORAGEBASE_H\n\n#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)\n# define EIGEN_INITIALIZE_COEFFS\n# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(Index i=0;i<base().size();++i) coeffRef(i)=Scalar(0);\n#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)\n# define EIGEN_INITIALIZE_COEFFS\n# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(Index i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();\n#else\n# undef EIGEN_INITIALIZE_COEFFS\n# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {\n  template<typename Index>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_ALWAYS_INLINE void run(Index, Index)\n  {\n  }\n};\n\ntemplate<> struct check_rows_cols_for_overflow<Dynamic> {\n  template<typename Index>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)\n  {\n    // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242\n    // we assume Index is signed\n    Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed\n    bool error = (rows == 0 || cols == 0) ? false\n               : (rows > max_index / cols);\n    if (error)\n      throw_std_bad_alloc();\n  }\n};\n\ntemplate <typename Derived,\n          typename OtherDerived = Derived,\n          bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>\nstruct conservative_resize_like_impl;\n\ntemplate<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;\n\n} // end namespace internal\n\n#ifdef EIGEN_PARSED_BY_DOXYGEN\nnamespace doxygen {\n\n// This is a workaround to doxygen not being able to understand the inheritance logic\n// when it is hidden by the dense_xpr_base helper struct.\n// Moreover, doxygen fails to include members that are not documented in the declaration body of\n// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,\n// this is why we simply inherits MatrixBase, though this does not make sense.\n\n/** This class is just a workaround for Doxygen and it does not not actually exist. */\ntemplate<typename Derived> struct dense_xpr_base_dispatcher;\n/** This class is just a workaround for Doxygen and it does not not actually exist. */\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nstruct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >\n    : public MatrixBase {};\n/** This class is just a workaround for Doxygen and it does not not actually exist. */\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nstruct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >\n    : public ArrayBase {};\n\n} // namespace doxygen\n\n/** \\class PlainObjectBase\n  * \\ingroup Core_Module\n  * \\brief %Dense storage base class for matrices and arrays.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_PLAINOBJECTBASE_PLUGIN.\n  *\n  * \\tparam Derived is the derived type, e.g., a Matrix or Array\n  *\n  * \\sa \\ref TopicClassHierarchy\n  */\ntemplate<typename Derived>\nclass PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>\n#else\ntemplate<typename Derived>\nclass PlainObjectBase : public internal::dense_xpr_base<Derived>::type\n#endif\n{\n  public:\n    enum { Options = internal::traits<Derived>::Options };\n    typedef typename internal::dense_xpr_base<Derived>::type Base;\n\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n\n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Derived DenseType;\n\n    using Base::RowsAtCompileTime;\n    using Base::ColsAtCompileTime;\n    using Base::SizeAtCompileTime;\n    using Base::MaxRowsAtCompileTime;\n    using Base::MaxColsAtCompileTime;\n    using Base::MaxSizeAtCompileTime;\n    using Base::IsVectorAtCompileTime;\n    using Base::Flags;\n\n    typedef Eigen::Map<Derived, Unaligned>  MapType;\n    typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;\n    typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;\n    typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;\n    template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };\n    template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };\n    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };\n    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };\n\n  protected:\n    DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;\n\n  public:\n    enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };\n    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)\n\n    EIGEN_DEVICE_FUNC\n    Base& base() { return *static_cast<Base*>(this); }\n    EIGEN_DEVICE_FUNC\n    const Base& base() const { return *static_cast<const Base*>(this); }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_storage.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_storage.cols(); }\n\n    /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const\n      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.\n      *\n      * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const\n    {\n      if(Flags & RowMajorBit)\n        return m_storage.data()[colId + rowId * m_storage.cols()];\n      else // column-major\n        return m_storage.data()[rowId + colId * m_storage.rows()];\n    }\n\n    /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const\n      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.\n      *\n      * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const\n    {\n      return m_storage.data()[index];\n    }\n\n    /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const\n      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.\n      *\n      * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)\n    {\n      if(Flags & RowMajorBit)\n        return m_storage.data()[colId + rowId * m_storage.cols()];\n      else // column-major\n        return m_storage.data()[rowId + colId * m_storage.rows()];\n    }\n\n    /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const\n      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.\n      *\n      * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)\n    {\n      return m_storage.data()[index];\n    }\n\n    /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).\n      * It is provided for convenience. */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const\n    {\n      if(Flags & RowMajorBit)\n        return m_storage.data()[colId + rowId * m_storage.cols()];\n      else // column-major\n        return m_storage.data()[rowId + colId * m_storage.rows()];\n    }\n\n    /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).\n      * It is provided for convenience. */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const\n    {\n      return m_storage.data()[index];\n    }\n\n    /** \\internal */\n    template<int LoadMode>\n    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const\n    {\n      return internal::ploadt<PacketScalar, LoadMode>\n               (m_storage.data() + (Flags & RowMajorBit\n                                   ? colId + rowId * m_storage.cols()\n                                   : rowId + colId * m_storage.rows()));\n    }\n\n    /** \\internal */\n    template<int LoadMode>\n    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const\n    {\n      return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);\n    }\n\n    /** \\internal */\n    template<int StoreMode>\n    EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)\n    {\n      internal::pstoret<Scalar, PacketScalar, StoreMode>\n              (m_storage.data() + (Flags & RowMajorBit\n                                   ? colId + rowId * m_storage.cols()\n                                   : rowId + colId * m_storage.rows()), val);\n    }\n\n    /** \\internal */\n    template<int StoreMode>\n    EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)\n    {\n      internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);\n    }\n\n    /** \\returns a const pointer to the data array of this matrix */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const\n    { return m_storage.data(); }\n\n    /** \\returns a pointer to the data array of this matrix */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()\n    { return m_storage.data(); }\n\n    /** Resizes \\c *this to a \\a rows x \\a cols matrix.\n      *\n      * This method is intended for dynamic-size matrices, although it is legal to call it on any\n      * matrix as long as fixed dimensions are left unchanged. If you only want to change the number\n      * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t).\n      *\n      * If the current number of coefficients of \\c *this exactly matches the\n      * product \\a rows * \\a cols, then no memory allocation is performed and\n      * the current values are left unchanged. In all other cases, including\n      * shrinking, the data is reallocated and all previous values are lost.\n      *\n      * Example: \\include Matrix_resize_int_int.cpp\n      * Output: \\verbinclude Matrix_resize_int_int.out\n      *\n      * \\sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)\n    {\n      eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)\n                   && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)\n                   && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)\n                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)\n                   && rows>=0 && cols>=0 && \"Invalid sizes when resizing a matrix or array.\");\n      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);\n      #ifdef EIGEN_INITIALIZE_COEFFS\n        Index size = rows*cols;\n        bool size_changed = size != this->size();\n        m_storage.resize(size, rows, cols);\n        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n      #else\n        m_storage.resize(rows*cols, rows, cols);\n      #endif\n    }\n\n    /** Resizes \\c *this to a vector of length \\a size\n      *\n      * \\only_for_vectors. This method does not work for\n      * partially dynamic matrices when the static dimension is anything other\n      * than 1. For example it will not work with Matrix<double, 2, Dynamic>.\n      *\n      * Example: \\include Matrix_resize_int.cpp\n      * Output: \\verbinclude Matrix_resize_int.out\n      *\n      * \\sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)\n      */\n    EIGEN_DEVICE_FUNC\n    inline void resize(Index size)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)\n      eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);\n      #ifdef EIGEN_INITIALIZE_COEFFS\n        bool size_changed = size != this->size();\n      #endif\n      if(RowsAtCompileTime == 1)\n        m_storage.resize(size, 1, size);\n      else\n        m_storage.resize(size, size, 1);\n      #ifdef EIGEN_INITIALIZE_COEFFS\n        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n      #endif\n    }\n\n    /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the special value \\c NoChange\n      * as in the example below.\n      *\n      * Example: \\include Matrix_resize_NoChange_int.cpp\n      * Output: \\verbinclude Matrix_resize_NoChange_int.out\n      *\n      * \\sa resize(Index,Index)\n      */\n    EIGEN_DEVICE_FUNC\n    inline void resize(NoChange_t, Index cols)\n    {\n      resize(rows(), cols);\n    }\n\n    /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \\c NoChange\n      * as in the example below.\n      *\n      * Example: \\include Matrix_resize_int_NoChange.cpp\n      * Output: \\verbinclude Matrix_resize_int_NoChange.out\n      *\n      * \\sa resize(Index,Index)\n      */\n    EIGEN_DEVICE_FUNC\n    inline void resize(Index rows, NoChange_t)\n    {\n      resize(rows, cols());\n    }\n\n    /** Resizes \\c *this to have the same dimensions as \\a other.\n      * Takes care of doing all the checking that's needed.\n      *\n      * Note that copying a row-vector into a vector (and conversely) is allowed.\n      * The resizing, if any, is then done in the appropriate way so that row-vectors\n      * remain row-vectors and vectors remain vectors.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)\n    {\n      const OtherDerived& other = _other.derived();\n      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());\n      const Index othersize = other.rows()*other.cols();\n      if(RowsAtCompileTime == 1)\n      {\n        eigen_assert(other.rows() == 1 || other.cols() == 1);\n        resize(1, othersize);\n      }\n      else if(ColsAtCompileTime == 1)\n      {\n        eigen_assert(other.rows() == 1 || other.cols() == 1);\n        resize(othersize, 1);\n      }\n      else resize(other.rows(), other.cols());\n    }\n\n    /** Resizes the matrix to \\a rows x \\a cols while leaving old values untouched.\n      *\n      * The method is intended for matrices of dynamic size. If you only want to change the number\n      * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or\n      * conservativeResize(Index, NoChange_t).\n      *\n      * Matrices are resized relative to the top-left element. In case values need to be\n      * appended to the matrix they will be uninitialized.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)\n    {\n      internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);\n    }\n\n    /** Resizes the matrix to \\a rows x \\a cols while leaving old values untouched.\n      *\n      * As opposed to conservativeResize(Index rows, Index cols), this version leaves\n      * the number of columns unchanged.\n      *\n      * In case the matrix is growing, new rows will be uninitialized.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)\n    {\n      // Note: see the comment in conservativeResize(Index,Index)\n      conservativeResize(rows, cols());\n    }\n\n    /** Resizes the matrix to \\a rows x \\a cols while leaving old values untouched.\n      *\n      * As opposed to conservativeResize(Index rows, Index cols), this version leaves\n      * the number of rows unchanged.\n      *\n      * In case the matrix is growing, new columns will be uninitialized.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)\n    {\n      // Note: see the comment in conservativeResize(Index,Index)\n      conservativeResize(rows(), cols);\n    }\n\n    /** Resizes the vector to \\a size while retaining old values.\n      *\n      * \\only_for_vectors. This method does not work for\n      * partially dynamic matrices when the static dimension is anything other\n      * than 1. For example it will not work with Matrix<double, 2, Dynamic>.\n      *\n      * When values are appended, they will be uninitialized.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void conservativeResize(Index size)\n    {\n      internal::conservative_resize_like_impl<Derived>::run(*this, size);\n    }\n\n    /** Resizes the matrix to \\a rows x \\a cols of \\c other, while leaving old values untouched.\n      *\n      * The method is intended for matrices of dynamic size. If you only want to change the number\n      * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or\n      * conservativeResize(Index, NoChange_t).\n      *\n      * Matrices are resized relative to the top-left element. In case values need to be\n      * appended to the matrix they will copied from \\c other.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)\n    {\n      internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);\n    }\n\n    /** This is a special case of the templated operator=. Its purpose is to\n      * prevent a default operator= from hiding the templated operator=.\n      */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)\n    {\n      return _set(other);\n    }\n\n    /** \\sa MatrixBase::lazyAssign() */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)\n    {\n      _resize_to_match(other);\n      return Base::lazyAssign(other.derived());\n    }\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)\n    {\n      resize(func.rows(), func.cols());\n      return Base::operator=(func);\n    }\n\n    // Prevent user from trying to instantiate PlainObjectBase objects\n    // by making all its constructor protected. See bug 1074.\n  protected:\n\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()\n    {\n//       _check_template_params();\n//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n    }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    // FIXME is it still needed ?\n    /** \\internal */\n    EIGEN_DEVICE_FUNC\n    explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)\n      : m_storage(internal::constructor_without_unaligned_array_assert())\n    {\n//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n    }\n#endif\n\n#if EIGEN_HAS_RVALUE_REFERENCES\n    EIGEN_DEVICE_FUNC\n    PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT\n      : m_storage( std::move(other.m_storage) )\n    {\n    }\n\n    EIGEN_DEVICE_FUNC\n    PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT\n    {\n      _check_template_params();\n      m_storage = std::move(other.m_storage);\n      return *this;\n    }\n#endif\n\n    /** Copy constructor */\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)\n      : Base(), m_storage(other.m_storage) { }\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)\n      : m_storage(size, rows, cols)\n    {\n//       _check_template_params();\n//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED\n    }\n\n    #if EIGEN_HAS_CXX11\n    /** \\brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. \\cpp11\n      *\n      * \\only_for_vectors\n      *\n      * This constructor is for 1D array or vectors with more than 4 coefficients.\n      * There exists C++98 analogue constructors for fixed-size array/vector having 1, 2, 3, or 4 coefficients.\n      *\n      * \\warning To construct a column (resp. row) vector of fixed length, the number of values passed to this\n      * constructor must match the the fixed number of rows (resp. columns) of \\c *this.\n      */\n    template <typename... ArgTypes>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)\n      : m_storage()\n    {\n      _check_template_params();\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, sizeof...(args) + 4);\n      m_storage.data()[0] = a0;\n      m_storage.data()[1] = a1;\n      m_storage.data()[2] = a2;\n      m_storage.data()[3] = a3;\n      Index i = 4;\n      auto x = {(m_storage.data()[i++] = args, 0)...};\n      static_cast<void>(x);\n    }\n\n    /** \\brief Constructs a Matrix or Array and initializes it by elements given by an initializer list of initializer\n      * lists \\cpp11\n      */\n    EIGEN_DEVICE_FUNC\n    explicit EIGEN_STRONG_INLINE PlainObjectBase(const std::initializer_list<std::initializer_list<Scalar>>& list)\n      : m_storage()\n    {\n      _check_template_params();\n\n      size_t list_size = 0;\n      if (list.begin() != list.end()) {\n        list_size = list.begin()->size();\n      }\n\n      // This is to allow syntax like VectorXi {{1, 2, 3, 4}}\n      if (ColsAtCompileTime == 1 && list.size() == 1) {\n        eigen_assert(list_size == static_cast<size_t>(RowsAtCompileTime) || RowsAtCompileTime == Dynamic);\n        resize(list_size, ColsAtCompileTime);\n        std::copy(list.begin()->begin(), list.begin()->end(), m_storage.data());\n      } else {\n        eigen_assert(list.size() == static_cast<size_t>(RowsAtCompileTime) || RowsAtCompileTime == Dynamic);\n        eigen_assert(list_size == static_cast<size_t>(ColsAtCompileTime) || ColsAtCompileTime == Dynamic);\n        resize(list.size(), list_size);\n\n        Index row_index = 0;\n        for (const std::initializer_list<Scalar>& row : list) {\n          eigen_assert(list_size == row.size());\n          Index col_index = 0;\n          for (const Scalar& e : row) {\n            coeffRef(row_index, col_index) = e;\n            ++col_index;\n          }\n          ++row_index;\n        }\n      }\n    }\n    #endif  // end EIGEN_HAS_CXX11\n\n    /** \\sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)\n      : m_storage()\n    {\n      _check_template_params();\n      resizeLike(other);\n      _set_noalias(other);\n    }\n\n    /** \\sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)\n      : m_storage()\n    {\n      _check_template_params();\n      resizeLike(other);\n      *this = other.derived();\n    }\n    /** \\brief Copy constructor with in-place evaluation */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)\n    {\n      _check_template_params();\n      // FIXME this does not automatically transpose vectors if necessary\n      resize(other.rows(), other.cols());\n      other.evalTo(this->derived());\n    }\n\n  public:\n\n    /** \\brief Copies the generic expression \\a other into *this.\n      * \\copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)\n    {\n      _resize_to_match(other);\n      Base::operator=(other.derived());\n      return this->derived();\n    }\n\n    /** \\name Map\n      * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects,\n      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned\n      * \\a data pointers.\n      *\n      * Here is an example using strides:\n      * \\include Matrix_Map_stride.cpp\n      * Output: \\verbinclude Matrix_Map_stride.out\n      *\n      * \\see class Map\n      */\n    //@{\n    static inline ConstMapType Map(const Scalar* data)\n    { return ConstMapType(data); }\n    static inline MapType Map(Scalar* data)\n    { return MapType(data); }\n    static inline ConstMapType Map(const Scalar* data, Index size)\n    { return ConstMapType(data, size); }\n    static inline MapType Map(Scalar* data, Index size)\n    { return MapType(data, size); }\n    static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)\n    { return ConstMapType(data, rows, cols); }\n    static inline MapType Map(Scalar* data, Index rows, Index cols)\n    { return MapType(data, rows, cols); }\n\n    static inline ConstAlignedMapType MapAligned(const Scalar* data)\n    { return ConstAlignedMapType(data); }\n    static inline AlignedMapType MapAligned(Scalar* data)\n    { return AlignedMapType(data); }\n    static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)\n    { return ConstAlignedMapType(data, size); }\n    static inline AlignedMapType MapAligned(Scalar* data, Index size)\n    { return AlignedMapType(data, size); }\n    static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)\n    { return ConstAlignedMapType(data, rows, cols); }\n    static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)\n    { return AlignedMapType(data, rows, cols); }\n\n    template<int Outer, int Inner>\n    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)\n    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)\n    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)\n    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)\n    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)\n    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)\n    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }\n\n    template<int Outer, int Inner>\n    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)\n    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)\n    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)\n    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)\n    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)\n    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }\n    template<int Outer, int Inner>\n    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)\n    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }\n    //@}\n\n    using Base::setConstant;\n    EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);\n    EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);\n    EIGEN_DEVICE_FUNC Derived& setConstant(NoChange_t, Index cols, const Scalar& val);\n    EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, NoChange_t, const Scalar& val);\n\n    using Base::setZero;\n    EIGEN_DEVICE_FUNC Derived& setZero(Index size);\n    EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);\n    EIGEN_DEVICE_FUNC Derived& setZero(NoChange_t, Index cols);\n    EIGEN_DEVICE_FUNC Derived& setZero(Index rows, NoChange_t);\n\n    using Base::setOnes;\n    EIGEN_DEVICE_FUNC Derived& setOnes(Index size);\n    EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);\n    EIGEN_DEVICE_FUNC Derived& setOnes(NoChange_t, Index cols);\n    EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, NoChange_t);\n\n    using Base::setRandom;\n    Derived& setRandom(Index size);\n    Derived& setRandom(Index rows, Index cols);\n    Derived& setRandom(NoChange_t, Index cols);\n    Derived& setRandom(Index rows, NoChange_t);\n\n    #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN\n    #include EIGEN_PLAINOBJECTBASE_PLUGIN\n    #endif\n\n  protected:\n    /** \\internal Resizes *this in preparation for assigning \\a other to it.\n      * Takes care of doing all the checking that's needed.\n      *\n      * Note that copying a row-vector into a vector (and conversely) is allowed.\n      * The resizing, if any, is then done in the appropriate way so that row-vectors\n      * remain row-vectors and vectors remain vectors.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)\n    {\n      #ifdef EIGEN_NO_AUTOMATIC_RESIZING\n      eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())\n                 : (rows() == other.rows() && cols() == other.cols())))\n        && \"Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined\");\n      EIGEN_ONLY_USED_FOR_DEBUG(other);\n      #else\n      resizeLike(other);\n      #endif\n    }\n\n    /**\n      * \\brief Copies the value of the expression \\a other into \\c *this with automatic resizing.\n      *\n      * *this might be resized to match the dimensions of \\a other. If *this was a null matrix (not already initialized),\n      * it will be initialized.\n      *\n      * Note that copying a row-vector into a vector (and conversely) is allowed.\n      * The resizing, if any, is then done in the appropriate way so that row-vectors\n      * remain row-vectors and vectors remain vectors.\n      *\n      * \\sa operator=(const MatrixBase<OtherDerived>&), _set_noalias()\n      *\n      * \\internal\n      */\n    // aliasing is dealt once in internal::call_assignment\n    // so at this stage we have to assume aliasing... and resising has to be done later.\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)\n    {\n      internal::call_assignment(this->derived(), other.derived());\n      return this->derived();\n    }\n\n    /** \\internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which\n      * is the case when creating a new matrix) so one can enforce lazy evaluation.\n      *\n      * \\sa operator=(const MatrixBase<OtherDerived>&), _set()\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)\n    {\n      // I don't think we need this resize call since the lazyAssign will anyways resize\n      // and lazyAssign will be called by the assign selector.\n      //_resize_to_match(other);\n      // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because\n      // it wouldn't allow to copy a row-vector into a column-vector.\n      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());\n      return this->derived();\n    }\n\n    template<typename T0, typename T1>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)\n    {\n      const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;\n      const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value;\n      EIGEN_STATIC_ASSERT(t0_is_integer_alike &&\n                          t1_is_integer_alike,\n                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)\n      resize(rows,cols);\n    }\n\n    template<typename T0, typename T1>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)\n      m_storage.data()[0] = Scalar(val0);\n      m_storage.data()[1] = Scalar(val1);\n    }\n\n    template<typename T0, typename T1>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,\n                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)\n                                                                  && (internal::is_same<T0,Index>::value)\n                                                                  && (internal::is_same<T1,Index>::value)\n                                                                  && Base::SizeAtCompileTime==2,T1>::type* = 0)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)\n      m_storage.data()[0] = Scalar(val0);\n      m_storage.data()[1] = Scalar(val1);\n    }\n\n    // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,\n    // then the argument is meant to be the size of the object.\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)\n                                                                              && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)\n    {\n      // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.\n      const bool is_integer_alike = internal::is_valid_index_type<T>::value;\n      EIGEN_UNUSED_VARIABLE(is_integer_alike);\n      EIGEN_STATIC_ASSERT(is_integer_alike,\n                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)\n      resize(size);\n    }\n\n    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitly converted)\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)\n      m_storage.data()[0] = val0;\n    }\n\n    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type)\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const Index& val0,\n                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)\n                                                                  && (internal::is_same<Index,T>::value)\n                                                                  && Base::SizeAtCompileTime==1\n                                                                  && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)\n      m_storage.data()[0] = Scalar(val0);\n    }\n\n    // Initialize a fixed size matrix from a pointer to raw data\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const Scalar* data){\n      this->_set_noalias(ConstMapType(data));\n    }\n\n    // Initialize an arbitrary matrix from a dense expression\n    template<typename T, typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){\n      this->_set_noalias(other);\n    }\n\n    // Initialize an arbitrary matrix from an object convertible to the Derived type.\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const Derived& other){\n      this->_set_noalias(other);\n    }\n\n    // Initialize an arbitrary matrix from a generic Eigen expression\n    template<typename T, typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){\n      this->derived() = other;\n    }\n\n    template<typename T, typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)\n    {\n      resize(other.rows(), other.cols());\n      other.evalTo(this->derived());\n    }\n\n    template<typename T, typename OtherDerived, int ColsAtCompileTime>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)\n    {\n      this->derived() = r;\n    }\n\n    // For fixed-size Array<Scalar,...>\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const Scalar& val0,\n                                    typename internal::enable_if<    Base::SizeAtCompileTime!=Dynamic\n                                                                  && Base::SizeAtCompileTime!=1\n                                                                  && internal::is_convertible<T, Scalar>::value\n                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)\n    {\n      Base::setConstant(val0);\n    }\n\n    // For fixed-size Array<Index,...>\n    template<typename T>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _init1(const Index& val0,\n                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)\n                                                                  && (internal::is_same<Index,T>::value)\n                                                                  && Base::SizeAtCompileTime!=Dynamic\n                                                                  && Base::SizeAtCompileTime!=1\n                                                                  && internal::is_convertible<T, Scalar>::value\n                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)\n    {\n      Base::setConstant(val0);\n    }\n\n    template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>\n    friend struct internal::matrix_swap_impl;\n\n  public:\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal\n      * \\brief Override DenseBase::swap() since for dynamic-sized matrices\n      * of same type it is enough to swap the data pointers.\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void swap(DenseBase<OtherDerived> & other)\n    {\n      enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };\n      internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());\n    }\n\n    /** \\internal\n      * \\brief const version forwarded to DenseBase::swap\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void swap(DenseBase<OtherDerived> const & other)\n    { Base::swap(other.derived()); }\n\n    EIGEN_DEVICE_FUNC\n    static EIGEN_STRONG_INLINE void _check_template_params()\n    {\n      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor)\n                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0)\n                        && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))\n                        && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))\n                        && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))\n                        && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))\n                        && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)\n                        && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)\n                        && (Options & (DontAlign|RowMajor)) == Options),\n        INVALID_MATRIX_TEMPLATE_PARAMETERS)\n    }\n\n    enum { IsPlainObjectBase = 1 };\n#endif\n  public:\n    // These apparently need to be down here for nvcc+icc to prevent duplicate\n    // Map symbol.\n    template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;\n    friend class Eigen::Map<Derived, Unaligned>;\n    friend class Eigen::Map<const Derived, Unaligned>;\n#if EIGEN_MAX_ALIGN_BYTES>0\n    // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.\n    friend class Eigen::Map<Derived, AlignedMax>;\n    friend class Eigen::Map<const Derived, AlignedMax>;\n#endif\n};\n\nnamespace internal {\n\ntemplate <typename Derived, typename OtherDerived, bool IsVector>\nstruct conservative_resize_like_impl\n{\n  #if EIGEN_HAS_TYPE_TRAITS\n  static const bool IsRelocatable = std::is_trivially_copyable<typename Derived::Scalar>::value;\n  #else\n  static const bool IsRelocatable = !NumTraits<typename Derived::Scalar>::RequireInitialization;\n  #endif\n  static void run(DenseBase<Derived>& _this, Index rows, Index cols)\n  {\n    if (_this.rows() == rows && _this.cols() == cols) return;\n    EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)\n\n    if ( IsRelocatable\n          && (( Derived::IsRowMajor && _this.cols() == cols) ||  // row-major and we change only the number of rows\n              (!Derived::IsRowMajor && _this.rows() == rows) ))  // column-major and we change only the number of columns\n    {\n      internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);\n      _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);\n    }\n    else\n    {\n      // The storage order does not allow us to use reallocation.\n      Derived tmp(rows,cols);\n      const Index common_rows = numext::mini(rows, _this.rows());\n      const Index common_cols = numext::mini(cols, _this.cols());\n      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);\n      _this.derived().swap(tmp);\n    }\n  }\n\n  static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)\n  {\n    if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;\n\n    // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index),\n    // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the\n    // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or\n    // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like\n    // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good.\n    EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)\n    EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)\n\n    if ( IsRelocatable &&\n          (( Derived::IsRowMajor && _this.cols() == other.cols()) ||  // row-major and we change only the number of rows\n           (!Derived::IsRowMajor && _this.rows() == other.rows()) ))  // column-major and we change only the number of columns\n    {\n      const Index new_rows = other.rows() - _this.rows();\n      const Index new_cols = other.cols() - _this.cols();\n      _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());\n      if (new_rows>0)\n        _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows);\n      else if (new_cols>0)\n        _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols);\n    }\n    else\n    {\n      // The storage order does not allow us to use reallocation.\n      Derived tmp(other);\n      const Index common_rows = numext::mini(tmp.rows(), _this.rows());\n      const Index common_cols = numext::mini(tmp.cols(), _this.cols());\n      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);\n      _this.derived().swap(tmp);\n    }\n  }\n};\n\n// Here, the specialization for vectors inherits from the general matrix case\n// to allow calling .conservativeResize(rows,cols) on vectors.\ntemplate <typename Derived, typename OtherDerived>\nstruct conservative_resize_like_impl<Derived,OtherDerived,true>\n  : conservative_resize_like_impl<Derived,OtherDerived,false>\n{\n  typedef conservative_resize_like_impl<Derived,OtherDerived,false> Base;\n  using Base::run;\n  using Base::IsRelocatable;\n\n  static void run(DenseBase<Derived>& _this, Index size)\n  {\n    const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;\n    const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1;\n    if(IsRelocatable)\n      _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);\n    else\n      Base::run(_this.derived(), new_rows, new_cols);\n  }\n\n  static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)\n  {\n    if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;\n\n    const Index num_new_elements = other.size() - _this.size();\n\n    const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();\n    const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;\n    if(IsRelocatable)\n      _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);\n    else\n      Base::run(_this.derived(), new_rows, new_cols);\n\n    if (num_new_elements > 0)\n      _this.tail(num_new_elements) = other.tail(num_new_elements);\n  }\n};\n\ntemplate<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>\nstruct matrix_swap_impl\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE void run(MatrixTypeA& a, MatrixTypeB& b)\n  {\n    a.base().swap(b);\n  }\n};\n\ntemplate<typename MatrixTypeA, typename MatrixTypeB>\nstruct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(MatrixTypeA& a, MatrixTypeB& b)\n  {\n    static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_DENSESTORAGEBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Product.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PRODUCT_H\n#define EIGEN_PRODUCT_H\n\nnamespace Eigen {\n\ntemplate<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, int Option>\nstruct traits<Product<Lhs, Rhs, Option> >\n{\n  typedef typename remove_all<Lhs>::type LhsCleaned;\n  typedef typename remove_all<Rhs>::type RhsCleaned;\n  typedef traits<LhsCleaned> LhsTraits;\n  typedef traits<RhsCleaned> RhsTraits;\n\n  typedef MatrixXpr XprKind;\n\n  typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;\n  typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,\n                                                typename RhsTraits::StorageKind,\n                                                internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;\n  typedef typename promote_index_type<typename LhsTraits::StorageIndex,\n                                      typename RhsTraits::StorageIndex>::type StorageIndex;\n\n  enum {\n    RowsAtCompileTime    = LhsTraits::RowsAtCompileTime,\n    ColsAtCompileTime    = RhsTraits::ColsAtCompileTime,\n    MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,\n\n    // FIXME: only needed by GeneralMatrixMatrixTriangular\n    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),\n\n    // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.\n    Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit\n          : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0\n          : (   ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))\n             || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit\n          : NoPreferredStorageOrderBit\n  };\n};\n\n} // end namespace internal\n\n/** \\class Product\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of the product of two arbitrary matrices or vectors\n  *\n  * \\tparam _Lhs the type of the left-hand side expression\n  * \\tparam _Rhs the type of the right-hand side expression\n  *\n  * This class represents an expression of the product of two arbitrary matrices.\n  *\n  * The other template parameters are:\n  * \\tparam Option     can be DefaultProduct, AliasFreeProduct, or LazyProduct\n  *\n  */\ntemplate<typename _Lhs, typename _Rhs, int Option>\nclass Product : public ProductImpl<_Lhs,_Rhs,Option,\n                                   typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,\n                                                                                   typename internal::traits<_Rhs>::StorageKind,\n                                                                                   internal::product_type<_Lhs,_Rhs>::ret>::ret>\n{\n  public:\n\n    typedef _Lhs Lhs;\n    typedef _Rhs Rhs;\n\n    typedef typename ProductImpl<\n        Lhs, Rhs, Option,\n        typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,\n                                                        typename internal::traits<Rhs>::StorageKind,\n                                                        internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)\n\n    typedef typename internal::ref_selector<Lhs>::type LhsNested;\n    typedef typename internal::ref_selector<Rhs>::type RhsNested;\n    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;\n    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)\n    {\n      eigen_assert(lhs.cols() == rhs.rows()\n        && \"invalid matrix product\"\n        && \"if you wanted a coeff-wise or a dot product use the respective explicit functions\");\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_lhs.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const LhsNestedCleaned& lhs() const { return m_lhs; }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const RhsNestedCleaned& rhs() const { return m_rhs; }\n\n  protected:\n\n    LhsNested m_lhs;\n    RhsNested m_rhs;\n};\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>\nclass dense_product_base\n : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type\n{};\n\n/** Conversion to scalar for inner-products */\ntemplate<typename Lhs, typename Rhs, int Option>\nclass dense_product_base<Lhs, Rhs, Option, InnerProduct>\n : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type\n{\n  typedef Product<Lhs,Rhs,Option> ProductXpr;\n  typedef typename internal::dense_xpr_base<ProductXpr>::type Base;\npublic:\n  using Base::derived;\n  typedef typename Base::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator const Scalar() const\n  {\n    return internal::evaluator<ProductXpr>(derived()).coeff(0,0);\n  }\n};\n\n} // namespace internal\n\n// Generic API dispatcher\ntemplate<typename Lhs, typename Rhs, int Option, typename StorageKind>\nclass ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type\n{\n  public:\n    typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;\n};\n\ntemplate<typename Lhs, typename Rhs, int Option>\nclass ProductImpl<Lhs,Rhs,Option,Dense>\n  : public internal::dense_product_base<Lhs,Rhs,Option>\n{\n    typedef Product<Lhs, Rhs, Option> Derived;\n\n  public:\n\n    typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)\n  protected:\n    enum {\n      IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) &&\n                   (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),\n      EnableCoeff = IsOneByOne || Option==LazyProduct\n    };\n\n  public:\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const\n    {\n      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);\n      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );\n\n      return internal::evaluator<Derived>(derived()).coeff(row,col);\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const\n    {\n      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);\n      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );\n\n      return internal::evaluator<Derived>(derived()).coeff(i);\n    }\n\n\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_PRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ProductEvaluators.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_PRODUCTEVALUATORS_H\n#define EIGEN_PRODUCTEVALUATORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** \\internal\n  * Evaluator of a product expression.\n  * Since products require special treatments to handle all possible cases,\n  * we simply defer the evaluation logic to a product_evaluator class\n  * which offers more partial specialization possibilities.\n  *\n  * \\sa class product_evaluator\n  */\ntemplate<typename Lhs, typename Rhs, int Options>\nstruct evaluator<Product<Lhs, Rhs, Options> >\n : public product_evaluator<Product<Lhs, Rhs, Options> >\n{\n  typedef Product<Lhs, Rhs, Options> XprType;\n  typedef product_evaluator<XprType> Base;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}\n};\n\n// Catch \"scalar * ( A * B )\" and transform it to \"(A*scalar) * B\"\n// TODO we should apply that rule only if that's really helpful\ntemplate<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>\nstruct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,\n                                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,\n                                               const Product<Lhs, Rhs, DefaultProduct> > >\n{\n  static const bool value = true;\n};\ntemplate<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>\nstruct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,\n                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,\n                               const Product<Lhs, Rhs, DefaultProduct> > >\n : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >\n{\n  typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,\n                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,\n                               const Product<Lhs, Rhs, DefaultProduct> > XprType;\n  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)\n    : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())\n  {}\n};\n\n\ntemplate<typename Lhs, typename Rhs, int DiagIndex>\nstruct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >\n : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >\n{\n  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;\n  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)\n    : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(\n        Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),\n        xpr.index() ))\n  {}\n};\n\n\n// Helper class to perform a matrix product with the destination at hand.\n// Depending on the sizes of the factors, there are different evaluation strategies\n// as controlled by internal::product_type.\ntemplate< typename Lhs, typename Rhs,\n          typename LhsShape = typename evaluator_traits<Lhs>::Shape,\n          typename RhsShape = typename evaluator_traits<Rhs>::Shape,\n          int ProductType = internal::product_type<Lhs,Rhs>::value>\nstruct generic_product_impl;\n\ntemplate<typename Lhs, typename Rhs>\nstruct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {\n  static const bool value = true;\n};\n\n// This is the default evaluator implementation for products:\n// It creates a temporary and call generic_product_impl\ntemplate<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>\nstruct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>\n  : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>\n{\n  typedef Product<Lhs, Rhs, Options> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n  enum {\n    Flags = Base::Flags | EvalBeforeNestingBit\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit product_evaluator(const XprType& xpr)\n    : m_result(xpr.rows(), xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n\n// FIXME shall we handle nested_eval here?,\n// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)\n//     typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;\n//     typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;\n//     typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;\n//     typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;\n//\n//     const LhsNested lhs(xpr.lhs());\n//     const RhsNested rhs(xpr.rhs());\n//\n//     generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);\n\n    generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n// The following three shortcuts are enabled only if the scalar types match exactly.\n// TODO: we could enable them for different scalar types when the product is not vectorized.\n\n// Dense = Product\ntemplate< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,\n  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>\n{\n  typedef Product<Lhs,Rhs,Options> SrcXprType;\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n    // FIXME shall we handle nested_eval here?\n    generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());\n  }\n};\n\n// Dense += Product\ntemplate< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,\n  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>\n{\n  typedef Product<Lhs,Rhs,Options> SrcXprType;\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)\n  {\n    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());\n    // FIXME shall we handle nested_eval here?\n    generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());\n  }\n};\n\n// Dense -= Product\ntemplate< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,\n  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>\n{\n  typedef Product<Lhs,Rhs,Options> SrcXprType;\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)\n  {\n    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());\n    // FIXME shall we handle nested_eval here?\n    generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());\n  }\n};\n\n\n// Dense ?= scalar * Product\n// TODO we should apply that rule if that's really helpful\n// for instance, this is not good for inner products\ntemplate< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>\nstruct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,\n                                           const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>\n{\n  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,\n                        const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,\n                        const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)\n  {\n    call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);\n  }\n};\n\n//----------------------------------------\n// Catch \"Dense ?= xpr + Product<>\" expression to save one temporary\n// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct\n\ntemplate<typename OtherXpr, typename Lhs, typename Rhs>\nstruct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,\n                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {\n  static const bool value = true;\n};\n\ntemplate<typename OtherXpr, typename Lhs, typename Rhs>\nstruct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,\n                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {\n  static const bool value = true;\n};\n\ntemplate<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>\nstruct assignment_from_xpr_op_product\n{\n  template<typename SrcXprType, typename InitialFunc>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)\n  {\n    call_assignment_no_alias(dst, src.lhs(), Func1());\n    call_assignment_no_alias(dst, src.rhs(), Func2());\n  }\n};\n\n#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \\\n  template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \\\n  struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \\\n                                            const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \\\n    : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \\\n  {}\n\nEIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op,    scalar_sum_op,add_assign_op);\nEIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);\nEIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);\n\nEIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op,    scalar_difference_op,sub_assign_op);\nEIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);\nEIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);\n\n//----------------------------------------\n\ntemplate<typename Lhs, typename Rhs>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>\n{\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }\n};\n\n\n/***********************************************************************\n*  Implementation of outer dense * dense vector product\n***********************************************************************/\n\n// Column major result\ntemplate<typename Dst, typename Lhs, typename Rhs, typename Func>\nvoid EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)\n{\n  evaluator<Rhs> rhsEval(rhs);\n  ei_declare_local_nested_eval(Lhs,lhs,Rhs::SizeAtCompileTime,actual_lhs);\n  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored\n  // FIXME not very good if rhs is real and lhs complex while alpha is real too\n  const Index cols = dst.cols();\n  for (Index j=0; j<cols; ++j)\n    func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);\n}\n\n// Row major result\ntemplate<typename Dst, typename Lhs, typename Rhs, typename Func>\nvoid EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)\n{\n  evaluator<Lhs> lhsEval(lhs);\n  ei_declare_local_nested_eval(Rhs,rhs,Lhs::SizeAtCompileTime,actual_rhs);\n  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored\n  // FIXME not very good if lhs is real and rhs complex while alpha is real too\n  const Index rows = dst.rows();\n  for (Index i=0; i<rows; ++i)\n    func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);\n}\n\ntemplate<typename Lhs, typename Rhs>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>\n{\n  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose\n  struct set  { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };\n  struct add  { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };\n  struct sub  { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };\n  struct adds {\n    Scalar m_scale;\n    explicit adds(const Scalar& s) : m_scale(s) {}\n    template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {\n      dst.const_cast_derived() += m_scale * src;\n    }\n  };\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());\n  }\n\n};\n\n\n// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo\ntemplate<typename Lhs, typename Rhs, typename Derived>\nstruct generic_product_impl_base\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }\n\n};\n\ntemplate<typename Lhs, typename Rhs>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>\n  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >\n{\n  typedef typename nested_eval<Lhs,1>::type LhsNested;\n  typedef typename nested_eval<Rhs,1>::type RhsNested;\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };\n  typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;\n\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    // Fallback to inner product if both the lhs and rhs is a runtime vector.\n    if (lhs.rows() == 1 && rhs.cols() == 1) {\n      dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));\n      return;\n    }\n    LhsNested actual_lhs(lhs);\n    RhsNested actual_rhs(rhs);\n    internal::gemv_dense_selector<Side,\n                            (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,\n                            bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)\n                           >::run(actual_lhs, actual_rhs, dst, alpha);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    // Same as: dst.noalias() = lhs.lazyProduct(rhs);\n    // but easier on the compiler side\n    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    // dst.noalias() += lhs.lazyProduct(rhs);\n    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());\n  }\n\n  template<typename Dst>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    // dst.noalias() -= lhs.lazyProduct(rhs);\n    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());\n  }\n\n  // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h\n  // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:\n  //   dst {,+,-}= (s1*A)*(B*s2)\n  // will be rewritten as:\n  //   dst {,+,-}= (s1*s2) * (A.lazyProduct(B))\n  // There are at least four benefits of doing so:\n  //  1 - huge performance gain for heap-allocated matrix types as it save costly allocations.\n  //  2 - it is faster than simply by-passing the heap allocation through stack allocation.\n  //  3 - it makes this fallback consistent with the heavy GEMM routine.\n  //  4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.\n  //      (see https://stackoverflow.com/questions/54738495)\n  // For small fixed sizes matrices, howver, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 slower,\n  // and the behavior depends also a lot on the compiler... This is why this re-writting strategy is currently\n  // enabled only when falling back from the main GEMM.\n  template<typename Dst, typename Func>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func &func)\n  {\n    enum {\n      HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,\n      ConjLhs = blas_traits<Lhs>::NeedToConjugate,\n      ConjRhs = blas_traits<Rhs>::NeedToConjugate\n    };\n    // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto\n    //        this is important for real*complex_mat\n    Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);\n\n    eval_dynamic_impl(dst,\n                      blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),\n                      blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(),\n                      func,\n                      actualAlpha,\n                      typename conditional<HasScalarFactor,true_type,false_type>::type());\n  }\n\nprotected:\n\n  template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar&  s /* == 1 */, false_type)\n  {\n    EIGEN_UNUSED_VARIABLE(s);\n    eigen_internal_assert(s==Scalar(1));\n    call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);\n  }\n\n  template<typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, const Func &func, const Scalar& s, true_type)\n  {\n    call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);\n  }\n};\n\n// This specialization enforces the use of a coefficient-based evaluation strategy\ntemplate<typename Lhs, typename Rhs>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>\n  : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};\n\n// Case 2: Evaluate coeff by coeff\n//\n// This is mostly taken from CoeffBasedProduct.h\n// The main difference is that we add an extra argument to the etor_product_*_impl::run() function\n// for the inner dimension of the product, because evaluator object do not know their size.\n\ntemplate<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>\nstruct etor_product_coeff_impl;\n\ntemplate<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl;\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>\n    : evaluator_base<Product<Lhs, Rhs, LazyProduct> >\n{\n  typedef Product<Lhs, Rhs, LazyProduct> XprType;\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit product_evaluator(const XprType& xpr)\n    : m_lhs(xpr.lhs()),\n      m_rhs(xpr.rhs()),\n      m_lhsImpl(m_lhs),     // FIXME the creation of the evaluator objects should result in a no-op, but check that!\n      m_rhsImpl(m_rhs),     //       Moreover, they are only useful for the packet path, so we could completely disable them when not needed,\n                            //       or perhaps declare them on the fly on the packet method... We have experiment to check what's best.\n      m_innerDim(xpr.lhs().cols())\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n#if 0\n    std::cerr << \"LhsOuterStrideBytes=  \" << LhsOuterStrideBytes << \"\\n\";\n    std::cerr << \"RhsOuterStrideBytes=  \" << RhsOuterStrideBytes << \"\\n\";\n    std::cerr << \"LhsAlignment=         \" << LhsAlignment << \"\\n\";\n    std::cerr << \"RhsAlignment=         \" << RhsAlignment << \"\\n\";\n    std::cerr << \"CanVectorizeLhs=      \" << CanVectorizeLhs << \"\\n\";\n    std::cerr << \"CanVectorizeRhs=      \" << CanVectorizeRhs << \"\\n\";\n    std::cerr << \"CanVectorizeInner=    \" << CanVectorizeInner << \"\\n\";\n    std::cerr << \"EvalToRowMajor=       \" << EvalToRowMajor << \"\\n\";\n    std::cerr << \"Alignment=            \" << Alignment << \"\\n\";\n    std::cerr << \"Flags=                \" << Flags << \"\\n\";\n#endif\n  }\n\n  // Everything below here is taken from CoeffBasedProduct.h\n\n  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;\n  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;\n\n  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;\n  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;\n\n  typedef evaluator<LhsNestedCleaned> LhsEtorType;\n  typedef evaluator<RhsNestedCleaned> RhsEtorType;\n\n  enum {\n    RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,\n    ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,\n    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),\n    MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime\n  };\n\n  typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;\n  typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;\n\n  enum {\n\n    LhsCoeffReadCost = LhsEtorType::CoeffReadCost,\n    RhsCoeffReadCost = RhsEtorType::CoeffReadCost,\n    CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost\n                  : InnerSize == Dynamic ? HugeCost\n                    : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))\n                    + (InnerSize - 1) * NumTraits<Scalar>::AddCost,\n\n    Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,\n\n    LhsFlags = LhsEtorType::Flags,\n    RhsFlags = RhsEtorType::Flags,\n\n    LhsRowMajor = LhsFlags & RowMajorBit,\n    RhsRowMajor = RhsFlags & RowMajorBit,\n\n    LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,\n    RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,\n\n    // Here, we don't care about alignment larger than the usable packet size.\n    LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),\n    RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),\n\n    SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,\n\n    CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),\n    CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),\n\n    EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1\n                    : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0\n                    : (bool(RhsRowMajor) && !CanVectorizeLhs),\n\n    Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)\n          | (EvalToRowMajor ? RowMajorBit : 0)\n          // TODO enable vectorization for mixed types\n          | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)\n          | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),\n\n    LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),\n    RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),\n\n    Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)\n              : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)\n              : 0,\n\n    /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside\n     * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner\n     * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect\n     * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.\n     */\n    CanVectorizeInner =    SameType\n                        && LhsRowMajor\n                        && (!RhsRowMajor)\n                        && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)\n                        && (int(InnerSize) % packet_traits<Scalar>::size == 0)\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const\n  {\n    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();\n  }\n\n  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,\n   * which is why we don't set the LinearAccessBit.\n   * TODO: this seems possible when the result is a vector\n   */\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const CoeffReturnType coeff(Index index) const\n  {\n    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;\n    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;\n    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const PacketType packet(Index row, Index col) const\n  {\n    PacketType res;\n    typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,\n                                     Unroll ? int(InnerSize) : Dynamic,\n                                     LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;\n    PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);\n    return res;\n  }\n\n  template<int LoadMode, typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const PacketType packet(Index index) const\n  {\n    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;\n    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;\n    return packet<LoadMode,PacketType>(row,col);\n  }\n\nprotected:\n  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;\n  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;\n\n  LhsEtorType m_lhsImpl;\n  RhsEtorType m_rhsImpl;\n\n  // TODO: Get rid of m_innerDim if known at compile time\n  Index m_innerDim;\n};\n\ntemplate<typename Lhs, typename Rhs>\nstruct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>\n  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>\n{\n  typedef Product<Lhs, Rhs, DefaultProduct> XprType;\n  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;\n  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;\n  enum {\n    Flags = Base::Flags | EvalBeforeNestingBit\n  };\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit product_evaluator(const XprType& xpr)\n    : Base(BaseProduct(xpr.lhs(),xpr.rhs()))\n  {}\n};\n\n/****************************************\n*** Coeff based product, Packet path  ***\n****************************************/\n\ntemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)\n  {\n    etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);\n    res =  pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);\n  }\n};\n\ntemplate<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)\n  {\n    etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);\n    res =  pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)\n  {\n    res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)\n  {\n    res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)\n  {\n    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)\n  {\n    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)\n  {\n    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));\n    for(Index i = 0; i < innerDim; ++i)\n      res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename Packet, int LoadMode>\nstruct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>\n{\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)\n  {\n    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));\n    for(Index i = 0; i < innerDim; ++i)\n      res =  pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);\n  }\n};\n\n\n/***************************************************************************\n* Triangular products\n***************************************************************************/\ntemplate<int Mode, bool LhsIsTriangular,\n         typename Lhs, bool LhsIsVector,\n         typename Rhs, bool RhsIsVector>\nstruct triangular_product_impl;\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>\n  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>\n        ::run(dst, lhs.nestedExpression(), rhs, alpha);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>\n: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);\n  }\n};\n\n\n/***************************************************************************\n* SelfAdjoint products\n***************************************************************************/\ntemplate <typename Lhs, int LhsMode, bool LhsIsVector,\n          typename Rhs, int RhsMode, bool RhsIsVector>\nstruct selfadjoint_product_impl;\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>\n  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC\n  void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>\n: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n\n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);\n  }\n};\n\n\n/***************************************************************************\n* Diagonal products\n***************************************************************************/\n\ntemplate<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>\nstruct diagonal_product_evaluator_base\n  : evaluator_base<Derived>\n{\n   typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;\npublic:\n  enum {\n    CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),\n\n    MatrixFlags = evaluator<MatrixType>::Flags,\n    DiagFlags = evaluator<DiagonalType>::Flags,\n\n    _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor\n                  : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor\n                  : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,\n    _SameStorageOrder = _StorageOrder == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),\n\n    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)\n                           ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),\n    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,\n    // FIXME currently we need same types, but in the future the next rule should be the one\n    //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),\n    _Vectorizable =   bool(int(MatrixFlags)&PacketAccessBit)\n                  &&  _SameTypes\n                  && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)\n                  && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),\n    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,\n    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),\n    Alignment = evaluator<MatrixType>::Alignment,\n\n    AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)\n                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)\n                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)\n  };\n\n  EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)\n    : m_diagImpl(diag), m_matImpl(mat)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const\n  {\n    if(AsScalarProduct)\n      return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);\n    else\n      return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);\n  }\n\nprotected:\n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const\n  {\n    return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),\n                          internal::pset1<PacketType>(m_diagImpl.coeff(id)));\n  }\n\n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const\n  {\n    enum {\n      InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,\n      DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!\n    };\n    return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),\n                          m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));\n  }\n\n  evaluator<DiagonalType> m_diagImpl;\n  evaluator<MatrixType>   m_matImpl;\n};\n\n// diagonal * dense\ntemplate<typename Lhs, typename Rhs, int ProductKind, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>\n  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>\n{\n  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;\n  using Base::m_diagImpl;\n  using Base::m_matImpl;\n  using Base::coeff;\n  typedef typename Base::Scalar Scalar;\n\n  typedef Product<Lhs, Rhs, ProductKind> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n  typedef typename Lhs::DiagonalVectorType DiagonalType;\n\n\n  enum { StorageOrder = Base::_StorageOrder };\n\n  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)\n    : Base(xpr.rhs(), xpr.lhs().diagonal())\n  {\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const\n  {\n    return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);\n  }\n\n#ifndef EIGEN_GPUCC\n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const\n  {\n    // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.\n    // See also similar calls below.\n    return this->template packet_impl<LoadMode,PacketType>(row,col, row,\n                                 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());\n  }\n\n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE PacketType packet(Index idx) const\n  {\n    return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);\n  }\n#endif\n};\n\n// dense * diagonal\ntemplate<typename Lhs, typename Rhs, int ProductKind, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>\n  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>\n{\n  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;\n  using Base::m_diagImpl;\n  using Base::m_matImpl;\n  using Base::coeff;\n  typedef typename Base::Scalar Scalar;\n\n  typedef Product<Lhs, Rhs, ProductKind> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n\n  enum { StorageOrder = Base::_StorageOrder };\n\n  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)\n    : Base(xpr.lhs(), xpr.rhs().diagonal())\n  {\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const\n  {\n    return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);\n  }\n\n#ifndef EIGEN_GPUCC\n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const\n  {\n    return this->template packet_impl<LoadMode,PacketType>(row,col, col,\n                                 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());\n  }\n\n  template<int LoadMode,typename PacketType>\n  EIGEN_STRONG_INLINE PacketType packet(Index idx) const\n  {\n    return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);\n  }\n#endif\n};\n\n/***************************************************************************\n* Products with permutation matrices\n***************************************************************************/\n\n/** \\internal\n  * \\class permutation_matrix_product\n  * Internal helper class implementing the product between a permutation matrix and a matrix.\n  * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h\n  */\ntemplate<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>\nstruct permutation_matrix_product;\n\ntemplate<typename ExpressionType, int Side, bool Transposed>\nstruct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>\n{\n    typedef typename nested_eval<ExpressionType, 1>::type MatrixType;\n    typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;\n\n    template<typename Dest, typename PermutationType>\n    static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)\n    {\n      MatrixType mat(xpr);\n      const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();\n      // FIXME we need an is_same for expression that is not sensitive to constness. For instance\n      // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.\n      //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))\n      if(is_same_dense(dst, mat))\n      {\n        // apply the permutation inplace\n        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());\n        mask.fill(false);\n        Index r = 0;\n        while(r < perm.size())\n        {\n          // search for the next seed\n          while(r<perm.size() && mask[r]) r++;\n          if(r>=perm.size())\n            break;\n          // we got one, let's follow it until we are back to the seed\n          Index k0 = r++;\n          Index kPrev = k0;\n          mask.coeffRef(k0) = true;\n          for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))\n          {\n                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)\n            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>\n                       (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));\n\n            mask.coeffRef(k) = true;\n            kPrev = k;\n          }\n        }\n      }\n      else\n      {\n        for(Index i = 0; i < n; ++i)\n        {\n          Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>\n               (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)\n\n          =\n\n          Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>\n               (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);\n        }\n      }\n    }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)\n  {\n    permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)\n  {\n    permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);\n  }\n};\n\n\n/***************************************************************************\n* Products with transpositions matrices\n***************************************************************************/\n\n// FIXME could we unify Transpositions and Permutation into a single \"shape\"??\n\n/** \\internal\n  * \\class transposition_matrix_product\n  * Internal helper class implementing the product between a permutation matrix and a matrix.\n  */\ntemplate<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>\nstruct transposition_matrix_product\n{\n  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;\n  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;\n\n  template<typename Dest, typename TranspositionType>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)\n  {\n    MatrixType mat(xpr);\n    typedef typename TranspositionType::StorageIndex StorageIndex;\n    const Index size = tr.size();\n    StorageIndex j = 0;\n\n    if(!is_same_dense(dst,mat))\n      dst = mat;\n\n    for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)\n      if(Index(j=tr.coeff(k))!=k)\n      {\n        if(Side==OnTheLeft)        dst.row(k).swap(dst.row(j));\n        else if(Side==OnTheRight)  dst.col(k).swap(dst.col(j));\n      }\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);\n  }\n};\n\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)\n  {\n    transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>\nstruct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>\n{\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)\n  {\n    transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PRODUCT_EVALUATORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Random.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_RANDOM_H\n#define EIGEN_RANDOM_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Scalar> struct scalar_random_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)\n  inline const Scalar operator() () const { return random<Scalar>(); }\n};\n\ntemplate<typename Scalar>\nstruct functor_traits<scalar_random_op<Scalar> >\n{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };\n\n} // end namespace internal\n\n/** \\returns a random matrix expression\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  * \n  * The parameters \\a rows and \\a cols are the number of rows and of columns of\n  * the returned matrix. Must be compatible with this MatrixBase type.\n  *\n  * \\not_reentrant\n  * \n  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,\n  * it is redundant to pass \\a rows and \\a cols as arguments, so Random() should be used\n  * instead.\n  * \n  *\n  * Example: \\include MatrixBase_random_int_int.cpp\n  * Output: \\verbinclude MatrixBase_random_int_int.out\n  *\n  * This expression has the \"evaluate before nesting\" flag so that it will be evaluated into\n  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected\n  * behavior with expressions involving random matrices.\n  * \n  * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.\n  *\n  * \\sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()\n  */\ntemplate<typename Derived>\ninline const typename DenseBase<Derived>::RandomReturnType\nDenseBase<Derived>::Random(Index rows, Index cols)\n{\n  return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());\n}\n\n/** \\returns a random vector expression\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  *\n  * The parameter \\a size is the size of the returned vector.\n  * Must be compatible with this MatrixBase type.\n  *\n  * \\only_for_vectors\n  * \\not_reentrant\n  *\n  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,\n  * it is redundant to pass \\a size as argument, so Random() should be used\n  * instead.\n  *\n  * Example: \\include MatrixBase_random_int.cpp\n  * Output: \\verbinclude MatrixBase_random_int.out\n  *\n  * This expression has the \"evaluate before nesting\" flag so that it will be evaluated into\n  * a temporary vector whenever it is nested in a larger expression. This prevents unexpected\n  * behavior with expressions involving random matrices.\n  *\n  * \\sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()\n  */\ntemplate<typename Derived>\ninline const typename DenseBase<Derived>::RandomReturnType\nDenseBase<Derived>::Random(Index size)\n{\n  return NullaryExpr(size, internal::scalar_random_op<Scalar>());\n}\n\n/** \\returns a fixed-size random matrix or vector expression\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  * \n  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you\n  * need to use the variants taking size arguments.\n  *\n  * Example: \\include MatrixBase_random.cpp\n  * Output: \\verbinclude MatrixBase_random.out\n  *\n  * This expression has the \"evaluate before nesting\" flag so that it will be evaluated into\n  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected\n  * behavior with expressions involving random matrices.\n  * \n  * \\not_reentrant\n  *\n  * \\sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)\n  */\ntemplate<typename Derived>\ninline const typename DenseBase<Derived>::RandomReturnType\nDenseBase<Derived>::Random()\n{\n  return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());\n}\n\n/** Sets all coefficients in this expression to random values.\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  * \n  * \\not_reentrant\n  * \n  * Example: \\include MatrixBase_setRandom.cpp\n  * Output: \\verbinclude MatrixBase_setRandom.out\n  *\n  * \\sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom()\n{\n  return *this = Random(rows(), cols());\n}\n\n/** Resizes to the given \\a newSize, and sets all coefficients in this expression to random values.\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  * \n  * \\only_for_vectors\n  * \\not_reentrant\n  *\n  * Example: \\include Matrix_setRandom_int.cpp\n  * Output: \\verbinclude Matrix_setRandom_int.out\n  *\n  * \\sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()\n  */\ntemplate<typename Derived>\nEIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setRandom(Index newSize)\n{\n  resize(newSize);\n  return setRandom();\n}\n\n/** Resizes to the given size, and sets all coefficients in this expression to random values.\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  *\n  * \\not_reentrant\n  * \n  * \\param rows the new number of rows\n  * \\param cols the new number of columns\n  *\n  * Example: \\include Matrix_setRandom_int_int.cpp\n  * Output: \\verbinclude Matrix_setRandom_int_int.out\n  *\n  * \\sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()\n  */\ntemplate<typename Derived>\nEIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setRandom(Index rows, Index cols)\n{\n  resize(rows, cols);\n  return setRandom();\n}\n\n/** Resizes to the given size, changing only the number of columns, and sets all\n  * coefficients in this expression to random values. For the parameter of type\n  * NoChange_t, just pass the special value \\c NoChange.\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  *\n  * \\not_reentrant\n  *\n  * \\sa DenseBase::setRandom(), setRandom(Index), setRandom(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Random()\n  */\ntemplate<typename Derived>\nEIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setRandom(NoChange_t, Index cols)\n{\n  return setRandom(rows(), cols);\n}\n\n/** Resizes to the given size, changing only the number of rows, and sets all\n  * coefficients in this expression to random values. For the parameter of type\n  * NoChange_t, just pass the special value \\c NoChange.\n  *\n  * Numbers are uniformly spread through their whole definition range for integer types,\n  * and in the [-1:1] range for floating point scalar types.\n  *\n  * \\not_reentrant\n  *\n  * \\sa DenseBase::setRandom(), setRandom(Index), setRandom(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Random()\n  */\ntemplate<typename Derived>\nEIGEN_STRONG_INLINE Derived&\nPlainObjectBase<Derived>::setRandom(Index rows, NoChange_t)\n{\n  return setRandom(rows, cols());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_RANDOM_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Redux.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REDUX_H\n#define EIGEN_REDUX_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n// TODO\n//  * implement other kind of vectorization\n//  * factorize code\n\n/***************************************************************************\n* Part 1 : the logic deciding a strategy for vectorization and unrolling\n***************************************************************************/\n\ntemplate<typename Func, typename Evaluator>\nstruct redux_traits\n{\npublic:\n    typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;\n  enum {\n    PacketSize = unpacket_traits<PacketType>::size,\n    InnerMaxSize = int(Evaluator::IsRowMajor)\n                 ? Evaluator::MaxColsAtCompileTime\n                 : Evaluator::MaxRowsAtCompileTime,\n    OuterMaxSize = int(Evaluator::IsRowMajor)\n                 ? Evaluator::MaxRowsAtCompileTime\n                 : Evaluator::MaxColsAtCompileTime,\n    SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic\n                        : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)\n                        : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)\n  };\n\n  enum {\n    MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)\n                  && (functor_traits<Func>::PacketAccess),\n    MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),\n    MaySliceVectorize  = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)\n  };\n\npublic:\n  enum {\n    Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)\n              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)\n                                        : int(DefaultTraversal)\n  };\n\npublic:\n  enum {\n    Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost\n         : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,\n    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))\n  };\n\npublic:\n  enum {\n    Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling\n  };\n  \n#ifdef EIGEN_DEBUG_ASSIGN\n  static void debug()\n  {\n    std::cerr << \"Xpr: \" << typeid(typename Evaluator::XprType).name() << std::endl;\n    std::cerr.setf(std::ios::hex, std::ios::basefield);\n    EIGEN_DEBUG_VAR(Evaluator::Flags)\n    std::cerr.unsetf(std::ios::hex);\n    EIGEN_DEBUG_VAR(InnerMaxSize)\n    EIGEN_DEBUG_VAR(OuterMaxSize)\n    EIGEN_DEBUG_VAR(SliceVectorizedWork)\n    EIGEN_DEBUG_VAR(PacketSize)\n    EIGEN_DEBUG_VAR(MightVectorize)\n    EIGEN_DEBUG_VAR(MayLinearVectorize)\n    EIGEN_DEBUG_VAR(MaySliceVectorize)\n    std::cerr << \"Traversal\" << \" = \" << Traversal << \" (\" << demangle_traversal(Traversal) << \")\" << std::endl;\n    EIGEN_DEBUG_VAR(UnrollingLimit)\n    std::cerr << \"Unrolling\" << \" = \" << Unrolling << \" (\" << demangle_unrolling(Unrolling) << \")\" << std::endl;\n    std::cerr << std::endl;\n  }\n#endif\n};\n\n/***************************************************************************\n* Part 2 : unrollers\n***************************************************************************/\n\n/*** no vectorization ***/\n\ntemplate<typename Func, typename Evaluator, int Start, int Length>\nstruct redux_novec_unroller\n{\n  enum {\n    HalfLength = Length/2\n  };\n\n  typedef typename Evaluator::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)\n  {\n    return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),\n                redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));\n  }\n};\n\ntemplate<typename Func, typename Evaluator, int Start>\nstruct redux_novec_unroller<Func, Evaluator, Start, 1>\n{\n  enum {\n    outer = Start / Evaluator::InnerSizeAtCompileTime,\n    inner = Start % Evaluator::InnerSizeAtCompileTime\n  };\n\n  typedef typename Evaluator::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)\n  {\n    return eval.coeffByOuterInner(outer, inner);\n  }\n};\n\n// This is actually dead code and will never be called. It is required\n// to prevent false warnings regarding failed inlining though\n// for 0 length run() will never be called at all.\ntemplate<typename Func, typename Evaluator, int Start>\nstruct redux_novec_unroller<Func, Evaluator, Start, 0>\n{\n  typedef typename Evaluator::Scalar Scalar;\n  EIGEN_DEVICE_FUNC \n  static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }\n};\n\n/*** vectorization ***/\n\ntemplate<typename Func, typename Evaluator, int Start, int Length>\nstruct redux_vec_unroller\n{\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)\n  {\n    enum {\n      PacketSize = unpacket_traits<PacketType>::size,\n      HalfLength = Length/2\n    };\n\n    return func.packetOp(\n            redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),\n            redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );\n  }\n};\n\ntemplate<typename Func, typename Evaluator, int Start>\nstruct redux_vec_unroller<Func, Evaluator, Start, 1>\n{\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)\n  {\n    enum {\n      PacketSize = unpacket_traits<PacketType>::size,\n      index = Start * PacketSize,\n      outer = index / int(Evaluator::InnerSizeAtCompileTime),\n      inner = index % int(Evaluator::InnerSizeAtCompileTime),\n      alignment = Evaluator::Alignment\n    };\n    return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);\n  }\n};\n\n/***************************************************************************\n* Part 3 : implementation of all cases\n***************************************************************************/\n\ntemplate<typename Func, typename Evaluator,\n         int Traversal = redux_traits<Func, Evaluator>::Traversal,\n         int Unrolling = redux_traits<Func, Evaluator>::Unrolling\n>\nstruct redux_impl;\n\ntemplate<typename Func, typename Evaluator>\nstruct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>\n{\n  typedef typename Evaluator::Scalar Scalar;\n\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE\n  Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)\n  {\n    eigen_assert(xpr.rows()>0 && xpr.cols()>0 && \"you are using an empty matrix\");\n    Scalar res;\n    res = eval.coeffByOuterInner(0, 0);\n    for(Index i = 1; i < xpr.innerSize(); ++i)\n      res = func(res, eval.coeffByOuterInner(0, i));\n    for(Index i = 1; i < xpr.outerSize(); ++i)\n      for(Index j = 0; j < xpr.innerSize(); ++j)\n        res = func(res, eval.coeffByOuterInner(i, j));\n    return res;\n  }\n};\n\ntemplate<typename Func, typename Evaluator>\nstruct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>\n  : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>\n{\n  typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;\n  typedef typename Evaluator::Scalar Scalar;\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE\n  Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)\n  {\n    return Base::run(eval,func);\n  }\n};\n\ntemplate<typename Func, typename Evaluator>\nstruct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>\n{\n  typedef typename Evaluator::Scalar Scalar;\n  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;\n\n  template<typename XprType>\n  static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)\n  {\n    const Index size = xpr.size();\n    \n    const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;\n    const int packetAlignment = unpacket_traits<PacketScalar>::alignment;\n    enum {\n      alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),\n      alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)\n    };\n    const Index alignedStart = internal::first_default_aligned(xpr);\n    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);\n    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);\n    const Index alignedEnd2 = alignedStart + alignedSize2;\n    const Index alignedEnd  = alignedStart + alignedSize;\n    Scalar res;\n    if(alignedSize)\n    {\n      PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);\n      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop\n      {\n        PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);\n        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)\n        {\n          packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));\n          packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));\n        }\n\n        packet_res0 = func.packetOp(packet_res0,packet_res1);\n        if(alignedEnd>alignedEnd2)\n          packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));\n      }\n      res = func.predux(packet_res0);\n\n      for(Index index = 0; index < alignedStart; ++index)\n        res = func(res,eval.coeff(index));\n\n      for(Index index = alignedEnd; index < size; ++index)\n        res = func(res,eval.coeff(index));\n    }\n    else // too small to vectorize anything.\n         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.\n    {\n      res = eval.coeff(0);\n      for(Index index = 1; index < size; ++index)\n        res = func(res,eval.coeff(index));\n    }\n\n    return res;\n  }\n};\n\n// NOTE: for SliceVectorizedTraversal we simply bypass unrolling\ntemplate<typename Func, typename Evaluator, int Unrolling>\nstruct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>\n{\n  typedef typename Evaluator::Scalar Scalar;\n  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;\n\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)\n  {\n    eigen_assert(xpr.rows()>0 && xpr.cols()>0 && \"you are using an empty matrix\");\n    const Index innerSize = xpr.innerSize();\n    const Index outerSize = xpr.outerSize();\n    enum {\n      packetSize = redux_traits<Func, Evaluator>::PacketSize\n    };\n    const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;\n    Scalar res;\n    if(packetedInnerSize)\n    {\n      PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);\n      for(Index j=0; j<outerSize; ++j)\n        for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))\n          packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));\n\n      res = func.predux(packet_res);\n      for(Index j=0; j<outerSize; ++j)\n        for(Index i=packetedInnerSize; i<innerSize; ++i)\n          res = func(res, eval.coeffByOuterInner(j,i));\n    }\n    else // too small to vectorize anything.\n         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.\n    {\n      res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);\n    }\n\n    return res;\n  }\n};\n\ntemplate<typename Func, typename Evaluator>\nstruct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>\n{\n  typedef typename Evaluator::Scalar Scalar;\n\n  typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;\n  enum {\n    PacketSize = redux_traits<Func, Evaluator>::PacketSize,\n    Size = Evaluator::SizeAtCompileTime,\n    VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)\n  };\n\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE\n  Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)\n  {\n    EIGEN_ONLY_USED_FOR_DEBUG(xpr)\n    eigen_assert(xpr.rows()>0 && xpr.cols()>0 && \"you are using an empty matrix\");\n    if (VectorizedSize > 0) {\n      Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));\n      if (VectorizedSize != Size)\n        res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));\n      return res;\n    }\n    else {\n      return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);\n    }\n  }\n};\n\n// evaluator adaptor\ntemplate<typename _XprType>\nclass redux_evaluator : public internal::evaluator<_XprType>\n{\n  typedef internal::evaluator<_XprType> Base;\npublic:\n  typedef _XprType XprType;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}\n  \n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n  typedef typename XprType::PacketScalar PacketScalar;\n  \n  enum {\n    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,\n    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator\n    Flags = Base::Flags & ~DirectAccessBit,\n    IsRowMajor = XprType::IsRowMajor,\n    SizeAtCompileTime = XprType::SizeAtCompileTime,\n    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime\n  };\n  \n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const\n  { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }\n  \n  template<int LoadMode, typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  PacketType packetByOuterInner(Index outer, Index inner) const\n  { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }\n  \n};\n\n} // end namespace internal\n\n/***************************************************************************\n* Part 4 : public API\n***************************************************************************/\n\n\n/** \\returns the result of a full redux operation on the whole matrix or vector using \\a func\n  *\n  * The template parameter \\a BinaryOp is the type of the functor \\a func which must be\n  * an associative operator. Both current C++98 and C++11 functor styles are handled.\n  *\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  *\n  * \\sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()\n  */\ntemplate<typename Derived>\ntemplate<typename Func>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nDenseBase<Derived>::redux(const Func& func) const\n{\n  eigen_assert(this->rows()>0 && this->cols()>0 && \"you are using an empty matrix\");\n\n  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;\n  ThisEvaluator thisEval(derived());\n\n  // The initial expression is passed to the reducer as an additional argument instead of\n  // passing it as a member of redux_evaluator to help  \n  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());\n}\n\n/** \\returns the minimum of all coefficients of \\c *this.\n  * In case \\c *this contains NaN, NaNPropagation determines the behavior:\n  *   NaNPropagation == PropagateFast : undefined\n  *   NaNPropagation == PropagateNaN : result is NaN\n  *   NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  */\ntemplate<typename Derived>\ntemplate<int NaNPropagation>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nDenseBase<Derived>::minCoeff() const\n{\n  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());\n}\n\n/** \\returns the maximum of all coefficients of \\c *this. \n  * In case \\c *this contains NaN, NaNPropagation determines the behavior:\n  *   NaNPropagation == PropagateFast : undefined\n  *   NaNPropagation == PropagateNaN : result is NaN\n  *   NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  */\ntemplate<typename Derived>\ntemplate<int NaNPropagation>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nDenseBase<Derived>::maxCoeff() const\n{\n  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());\n}\n\n/** \\returns the sum of all coefficients of \\c *this\n  *\n  * If \\c *this is empty, then the value 0 is returned.\n  *\n  * \\sa trace(), prod(), mean()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nDenseBase<Derived>::sum() const\n{\n  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))\n    return Scalar(0);\n  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());\n}\n\n/** \\returns the mean of all coefficients of *this\n*\n* \\sa trace(), prod(), sum()\n*/\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nDenseBase<Derived>::mean() const\n{\n#ifdef __INTEL_COMPILER\n  #pragma warning push\n  #pragma warning ( disable : 2259 )\n#endif\n  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());\n#ifdef __INTEL_COMPILER\n  #pragma warning pop\n#endif\n}\n\n/** \\returns the product of all coefficients of *this\n  *\n  * Example: \\include MatrixBase_prod.cpp\n  * Output: \\verbinclude MatrixBase_prod.out\n  *\n  * \\sa sum(), mean(), trace()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nDenseBase<Derived>::prod() const\n{\n  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))\n    return Scalar(1);\n  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());\n}\n\n/** \\returns the trace of \\c *this, i.e. the sum of the coefficients on the main diagonal.\n  *\n  * \\c *this can be any matrix, not necessarily square.\n  *\n  * \\sa diagonal(), sum()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar\nMatrixBase<Derived>::trace() const\n{\n  return derived().diagonal().sum();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_REDUX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Ref.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REF_H\n#define EIGEN_REF_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename _PlainObjectType, int _Options, typename _StrideType>\nstruct traits<Ref<_PlainObjectType, _Options, _StrideType> >\n  : public traits<Map<_PlainObjectType, _Options, _StrideType> >\n{\n  typedef _PlainObjectType PlainObjectType;\n  typedef _StrideType StrideType;\n  enum {\n    Options = _Options,\n    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,\n    Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment\n  };\n\n  template<typename Derived> struct match {\n    enum {\n      IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime,\n      HasDirectAccess = internal::has_direct_access<Derived>::ret,\n      StorageOrderMatch = IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),\n      InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)\n                      || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)\n                      || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),\n      OuterStrideMatch = IsVectorAtCompileTime\n                      || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),\n      // NOTE, this indirection of evaluator<Derived>::Alignment is needed\n      // to workaround a very strange bug in MSVC related to the instantiation\n      // of has_*ary_operator in evaluator<CwiseNullaryOp>.\n      // This line is surprisingly very sensitive. For instance, simply adding parenthesis\n      // as \"DerivedAlignment = (int(evaluator<Derived>::Alignment)),\" will make MSVC fail...\n      DerivedAlignment = int(evaluator<Derived>::Alignment),\n      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment\n      ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,\n      MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch\n    };\n    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;\n  };\n\n};\n\ntemplate<typename Derived>\nstruct traits<RefBase<Derived> > : public traits<Derived> {};\n\n}\n\ntemplate<typename Derived> class RefBase\n : public MapBase<Derived>\n{\n  typedef typename internal::traits<Derived>::PlainObjectType PlainObjectType;\n  typedef typename internal::traits<Derived>::StrideType StrideType;\n\npublic:\n\n  typedef MapBase<Derived> Base;\n  EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const\n  {\n    return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const\n  {\n    return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()\n         : IsVectorAtCompileTime ? this->size()\n         : int(Flags)&RowMajorBit ? this->cols()\n         : this->rows();\n  }\n\n  EIGEN_DEVICE_FUNC RefBase()\n    : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),\n      // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:\n      m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,\n               StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)\n  {}\n\n  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)\n\nprotected:\n\n  typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;\n\n  // Resolves inner stride if default 0.\n  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index resolveInnerStride(Index inner) {\n    return inner == 0 ? 1 : inner;\n  }\n\n  // Resolves outer stride if default 0.\n  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index resolveOuterStride(Index inner, Index outer, Index rows, Index cols, bool isVectorAtCompileTime, bool isRowMajor) {\n    return outer == 0 ? isVectorAtCompileTime ? inner * rows * cols : isRowMajor ? inner * cols : inner * rows : outer;\n  }\n\n  // Returns true if construction is valid, false if there is a stride mismatch,\n  // and fails if there is a size mismatch.\n  template<typename Expression>\n  EIGEN_DEVICE_FUNC bool construct(Expression& expr)\n  {\n    // Check matrix sizes.  If this is a compile-time vector, we do allow\n    // implicitly transposing.\n    EIGEN_STATIC_ASSERT(\n      EIGEN_PREDICATE_SAME_MATRIX_SIZE(PlainObjectType, Expression)\n      // If it is a vector, the transpose sizes might match.\n      || ( PlainObjectType::IsVectorAtCompileTime\n            && ((int(PlainObjectType::RowsAtCompileTime)==Eigen::Dynamic\n              || int(Expression::ColsAtCompileTime)==Eigen::Dynamic\n              || int(PlainObjectType::RowsAtCompileTime)==int(Expression::ColsAtCompileTime))\n            &&  (int(PlainObjectType::ColsAtCompileTime)==Eigen::Dynamic\n              || int(Expression::RowsAtCompileTime)==Eigen::Dynamic\n              || int(PlainObjectType::ColsAtCompileTime)==int(Expression::RowsAtCompileTime)))),\n      YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES\n    )\n\n    // Determine runtime rows and columns.\n    Index rows = expr.rows();\n    Index cols = expr.cols();\n    if(PlainObjectType::RowsAtCompileTime==1)\n    {\n      eigen_assert(expr.rows()==1 || expr.cols()==1);\n      rows = 1;\n      cols = expr.size();\n    }\n    else if(PlainObjectType::ColsAtCompileTime==1)\n    {\n      eigen_assert(expr.rows()==1 || expr.cols()==1);\n      rows = expr.size();\n      cols = 1;\n    }\n    // Verify that the sizes are valid.\n    eigen_assert(\n      (PlainObjectType::RowsAtCompileTime == Dynamic) || (PlainObjectType::RowsAtCompileTime == rows));\n    eigen_assert(\n      (PlainObjectType::ColsAtCompileTime == Dynamic) || (PlainObjectType::ColsAtCompileTime == cols));\n\n\n    // If this is a vector, we might be transposing, which means that stride should swap.\n    const bool transpose = PlainObjectType::IsVectorAtCompileTime && (rows != expr.rows());\n    // If the storage format differs, we also need to swap the stride.\n    const bool row_major = ((PlainObjectType::Flags)&RowMajorBit) != 0;\n    const bool expr_row_major = (Expression::Flags&RowMajorBit) != 0;\n    const bool storage_differs =  (row_major != expr_row_major);\n\n    const bool swap_stride = (transpose != storage_differs);\n\n    // Determine expr's actual strides, resolving any defaults if zero.\n    const Index expr_inner_actual = resolveInnerStride(expr.innerStride());\n    const Index expr_outer_actual = resolveOuterStride(expr_inner_actual,\n                                                       expr.outerStride(),\n                                                       expr.rows(),\n                                                       expr.cols(),\n                                                       Expression::IsVectorAtCompileTime != 0,\n                                                       expr_row_major);\n\n    // If this is a column-major row vector or row-major column vector, the inner-stride\n    // is arbitrary, so set it to either the compile-time inner stride or 1.\n    const bool row_vector = (rows == 1);\n    const bool col_vector = (cols == 1);\n    const Index inner_stride =\n        ( (!row_major && row_vector) || (row_major && col_vector) ) ?\n            ( StrideType::InnerStrideAtCompileTime > 0 ? Index(StrideType::InnerStrideAtCompileTime) : 1)\n            : swap_stride ? expr_outer_actual : expr_inner_actual;\n\n    // If this is a column-major column vector or row-major row vector, the outer-stride\n    // is arbitrary, so set it to either the compile-time outer stride or vector size.\n    const Index outer_stride =\n      ( (!row_major && col_vector) || (row_major && row_vector) ) ?\n          ( StrideType::OuterStrideAtCompileTime > 0 ? Index(StrideType::OuterStrideAtCompileTime) : rows * cols * inner_stride)\n          : swap_stride ? expr_inner_actual : expr_outer_actual;\n\n    // Check if given inner/outer strides are compatible with compile-time strides.\n    const bool inner_valid = (StrideType::InnerStrideAtCompileTime == Dynamic)\n        || (resolveInnerStride(Index(StrideType::InnerStrideAtCompileTime)) == inner_stride);\n    if (!inner_valid) {\n      return false;\n    }\n\n    const bool outer_valid = (StrideType::OuterStrideAtCompileTime == Dynamic)\n        || (resolveOuterStride(\n              inner_stride,\n              Index(StrideType::OuterStrideAtCompileTime),\n              rows, cols, PlainObjectType::IsVectorAtCompileTime != 0,\n              row_major)\n            == outer_stride);\n    if (!outer_valid) {\n      return false;\n    }\n\n    ::new (static_cast<Base*>(this)) Base(expr.data(), rows, cols);\n    ::new (&m_stride) StrideBase(\n      (StrideType::OuterStrideAtCompileTime == 0) ? 0 : outer_stride,\n      (StrideType::InnerStrideAtCompileTime == 0) ? 0 : inner_stride );\n    return true;\n  }\n\n  StrideBase m_stride;\n};\n\n/** \\class Ref\n  * \\ingroup Core_Module\n  *\n  * \\brief A matrix or vector expression mapping an existing expression\n  *\n  * \\tparam PlainObjectType the equivalent matrix type of the mapped data\n  * \\tparam Options specifies the pointer alignment in bytes. It can be: \\c #Aligned128, , \\c #Aligned64, \\c #Aligned32, \\c #Aligned16, \\c #Aligned8 or \\c #Unaligned.\n  *                 The default is \\c #Unaligned.\n  * \\tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),\n  *                   but accepts a variable outer stride (leading dimension).\n  *                   This can be overridden by specifying strides.\n  *                   The type passed here must be a specialization of the Stride template, see examples below.\n  *\n  * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.\n  * A Ref<> object can represent either a const expression or a l-value:\n  * \\code\n  * // in-out argument:\n  * void foo1(Ref<VectorXf> x);\n  *\n  * // read-only const argument:\n  * void foo2(const Ref<const VectorXf>& x);\n  * \\endcode\n  *\n  * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.\n  * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.\n  * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with\n  * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)\n  * can be greater than the number of rows.\n  *\n  * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.\n  * Here are some examples:\n  * \\code\n  * MatrixXf A;\n  * VectorXf a;\n  * foo1(a.head());             // OK\n  * foo1(A.col());              // OK\n  * foo1(A.row());              // Compilation error because here innerstride!=1\n  * foo2(A.row());              // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object\n  * foo2(A.row().transpose());  // The row is copied into a contiguous temporary\n  * foo2(2*a);                  // The expression is evaluated into a temporary\n  * foo2(A.col().segment(2,4)); // No temporary\n  * \\endcode\n  *\n  * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.\n  * Here is an example accepting an innerstride!=1:\n  * \\code\n  * // in-out argument:\n  * void foo3(Ref<VectorXf,0,InnerStride<> > x);\n  * foo3(A.row());              // OK\n  * \\endcode\n  * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more\n  * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a\n  * template function, e.g.:\n  * \\code\n  * // in the .h:\n  * void foo(const Ref<MatrixXf>& A);\n  * void foo(const Ref<MatrixXf,0,Stride<> >& A);\n  *\n  * // in the .cpp:\n  * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {\n  *     ... // crazy code goes here\n  * }\n  * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }\n  * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }\n  * \\endcode\n  *\n  * See also the following stackoverflow questions for further references:\n  *  - <a href=\"http://stackoverflow.com/questions/21132538/correct-usage-of-the-eigenref-class\">Correct usage of the Eigen::Ref<> class</a>\n  *\n  * \\sa PlainObjectBase::Map(), \\ref TopicStorageOrders\n  */\ntemplate<typename PlainObjectType, int Options, typename StrideType> class Ref\n  : public RefBase<Ref<PlainObjectType, Options, StrideType> >\n{\n  private:\n    typedef internal::traits<Ref> Traits;\n    template<typename Derived>\n    EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,\n                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);\n  public:\n\n    typedef RefBase<Ref> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)\n\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename Derived>\n    EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,\n                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)\n    {\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      // Construction must pass since we will not create temprary storage in the non-const case.\n      const bool success = Base::construct(expr.derived());\n      EIGEN_UNUSED_VARIABLE(success)\n      eigen_assert(success);\n    }\n    template<typename Derived>\n    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,\n                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)\n    #else\n    /** Implicit constructor from any dense expression */\n    template<typename Derived>\n    inline Ref(DenseBase<Derived>& expr)\n    #endif\n    {\n      EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);\n      // Construction must pass since we will not create temporary storage in the non-const case.\n      const bool success = Base::construct(expr.const_cast_derived());\n      EIGEN_UNUSED_VARIABLE(success)\n      eigen_assert(success);\n    }\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)\n\n};\n\n// this is the const ref version\ntemplate<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>\n  : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >\n{\n    typedef internal::traits<Ref> Traits;\n  public:\n\n    typedef RefBase<Ref> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)\n\n    template<typename Derived>\n    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,\n                                 typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)\n    {\n//      std::cout << match_helper<Derived>::HasDirectAccess << \",\" << match_helper<Derived>::OuterStrideMatch << \",\" << match_helper<Derived>::InnerStrideMatch << \"\\n\";\n//      std::cout << int(StrideType::OuterStrideAtCompileTime) << \" - \" << int(Derived::OuterStrideAtCompileTime) << \"\\n\";\n//      std::cout << int(StrideType::InnerStrideAtCompileTime) << \" - \" << int(Derived::InnerStrideAtCompileTime) << \"\\n\";\n      construct(expr.derived(), typename Traits::template match<Derived>::type());\n    }\n\n    EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {\n      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy\n    }\n\n    template<typename OtherRef>\n    EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {\n      construct(other.derived(), typename Traits::template match<OtherRef>::type());\n    }\n\n  protected:\n\n    template<typename Expression>\n    EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)\n    {\n      // Check if we can use the underlying expr's storage directly, otherwise call the copy version.\n      if (!Base::construct(expr)) {\n        construct(expr, internal::false_type());\n      }\n    }\n\n    template<typename Expression>\n    EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)\n    {\n      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());\n      Base::construct(m_object);\n    }\n\n  protected:\n    TPlainObjectType m_object;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_REF_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Replicate.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REPLICATE_H\n#define EIGEN_REPLICATE_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename MatrixType,int RowFactor,int ColFactor>\nstruct traits<Replicate<MatrixType,RowFactor,ColFactor> >\n : traits<MatrixType>\n{\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename traits<MatrixType>::StorageKind StorageKind;\n  typedef typename traits<MatrixType>::XprKind XprKind;\n  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;\n  enum {\n    RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic\n                      ? Dynamic\n                      : RowFactor * MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic\n                      ? Dynamic\n                      : ColFactor * MatrixType::ColsAtCompileTime,\n   //FIXME we don't propagate the max sizes !!!\n    MaxRowsAtCompileTime = RowsAtCompileTime,\n    MaxColsAtCompileTime = ColsAtCompileTime,\n    IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1\n               : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0\n               : (MatrixType::Flags & RowMajorBit) ? 1 : 0,\n\n    // FIXME enable DirectAccess with negative strides?\n    Flags = IsRowMajor ? RowMajorBit : 0\n  };\n};\n}\n\n/**\n  * \\class Replicate\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of the multiple replication of a matrix or vector\n  *\n  * \\tparam MatrixType the type of the object we are replicating\n  * \\tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.\n  * \\tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.\n  *\n  * This class represents an expression of the multiple replication of a matrix or vector.\n  * It is the return type of DenseBase::replicate() and most of the time\n  * this is the only way it is used.\n  *\n  * \\sa DenseBase::replicate()\n  */\ntemplate<typename MatrixType,int RowFactor,int ColFactor> class Replicate\n  : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type\n{\n    typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;\n    typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;\n  public:\n\n    typedef typename internal::dense_xpr_base<Replicate>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)\n    typedef typename internal::remove_all<MatrixType>::type NestedExpression;\n\n    template<typename OriginalMatrixType>\n    EIGEN_DEVICE_FUNC\n    inline explicit Replicate(const OriginalMatrixType& matrix)\n      : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)\n    {\n      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),\n                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)\n      eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);\n    }\n\n    template<typename OriginalMatrixType>\n    EIGEN_DEVICE_FUNC\n    inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)\n      : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)\n    {\n      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),\n                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }\n\n    EIGEN_DEVICE_FUNC\n    const _MatrixTypeNested& nestedExpression() const\n    {\n      return m_matrix;\n    }\n\n  protected:\n    MatrixTypeNested m_matrix;\n    const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;\n    const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;\n};\n\n/**\n  * \\return an expression of the replication of \\c *this\n  *\n  * Example: \\include MatrixBase_replicate.cpp\n  * Output: \\verbinclude MatrixBase_replicate.out\n  *\n  * \\sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate\n  */\ntemplate<typename Derived>\ntemplate<int RowFactor, int ColFactor>\nEIGEN_DEVICE_FUNC const Replicate<Derived,RowFactor,ColFactor>\nDenseBase<Derived>::replicate() const\n{\n  return Replicate<Derived,RowFactor,ColFactor>(derived());\n}\n\n/**\n  * \\return an expression of the replication of each column (or row) of \\c *this\n  *\n  * Example: \\include DirectionWise_replicate_int.cpp\n  * Output: \\verbinclude DirectionWise_replicate_int.out\n  *\n  * \\sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate\n  */\ntemplate<typename ExpressionType, int Direction>\nEIGEN_DEVICE_FUNC const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType\nVectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const\n{\n  return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType\n          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_REPLICATE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Reshaped.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2014 yoco <peter.xiau@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_RESHAPED_H\n#define EIGEN_RESHAPED_H\n\nnamespace Eigen {\n\n/** \\class Reshaped\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a fixed-size or dynamic-size reshape\n  *\n  * \\tparam XprType the type of the expression in which we are taking a reshape\n  * \\tparam Rows the number of rows of the reshape we are taking at compile time (optional)\n  * \\tparam Cols the number of columns of the reshape we are taking at compile time (optional)\n  * \\tparam Order can be ColMajor or RowMajor, default is ColMajor.\n  *\n  * This class represents an expression of either a fixed-size or dynamic-size reshape.\n  * It is the return type of DenseBase::reshaped(NRowsType,NColsType) and\n  * most of the time this is the only way it is used.\n  *\n  * However, in C++98, if you want to directly maniputate reshaped expressions,\n  * for instance if you want to write a function returning such an expression, you\n  * will need to use this class. In C++11, it is advised to use the \\em auto\n  * keyword for such use cases.\n  *\n  * Here is an example illustrating the dynamic case:\n  * \\include class_Reshaped.cpp\n  * Output: \\verbinclude class_Reshaped.out\n  *\n  * Here is an example illustrating the fixed-size case:\n  * \\include class_FixedReshaped.cpp\n  * Output: \\verbinclude class_FixedReshaped.out\n  *\n  * \\sa DenseBase::reshaped(NRowsType,NColsType)\n  */\n\nnamespace internal {\n\ntemplate<typename XprType, int Rows, int Cols, int Order>\nstruct traits<Reshaped<XprType, Rows, Cols, Order> > : traits<XprType>\n{\n  typedef typename traits<XprType>::Scalar Scalar;\n  typedef typename traits<XprType>::StorageKind StorageKind;\n  typedef typename traits<XprType>::XprKind XprKind;\n  enum{\n    MatrixRows = traits<XprType>::RowsAtCompileTime,\n    MatrixCols = traits<XprType>::ColsAtCompileTime,\n    RowsAtCompileTime = Rows,\n    ColsAtCompileTime = Cols,\n    MaxRowsAtCompileTime = Rows,\n    MaxColsAtCompileTime = Cols,\n    XpxStorageOrder = ((int(traits<XprType>::Flags) & RowMajorBit) == RowMajorBit) ? RowMajor : ColMajor,\n    ReshapedStorageOrder = (RowsAtCompileTime == 1 && ColsAtCompileTime != 1) ? RowMajor\n                         : (ColsAtCompileTime == 1 && RowsAtCompileTime != 1) ? ColMajor\n                         : XpxStorageOrder,\n    HasSameStorageOrderAsXprType = (ReshapedStorageOrder == XpxStorageOrder),\n    InnerSize = (ReshapedStorageOrder==int(RowMajor)) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),\n    InnerStrideAtCompileTime = HasSameStorageOrderAsXprType\n                             ? int(inner_stride_at_compile_time<XprType>::ret)\n                             : Dynamic,\n    OuterStrideAtCompileTime = Dynamic,\n\n    HasDirectAccess = internal::has_direct_access<XprType>::ret\n                    && (Order==int(XpxStorageOrder))\n                    && ((evaluator<XprType>::Flags&LinearAccessBit)==LinearAccessBit),\n\n    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)\n                       && (InnerStrideAtCompileTime == 1)\n                        ? PacketAccessBit : 0,\n    //MaskAlignedBit = ((OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,\n    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,\n    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,\n    FlagsRowMajorBit = (ReshapedStorageOrder==int(RowMajor)) ? RowMajorBit : 0,\n    FlagsDirectAccessBit = HasDirectAccess ? DirectAccessBit : 0,\n    Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) | MaskPacketAccessBit),\n\n    Flags = (Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit | FlagsDirectAccessBit)\n  };\n};\n\ntemplate<typename XprType, int Rows, int Cols, int Order, bool HasDirectAccess> class ReshapedImpl_dense;\n\n} // end namespace internal\n\ntemplate<typename XprType, int Rows, int Cols, int Order, typename StorageKind> class ReshapedImpl;\n\ntemplate<typename XprType, int Rows, int Cols, int Order> class Reshaped\n  : public ReshapedImpl<XprType, Rows, Cols, Order, typename internal::traits<XprType>::StorageKind>\n{\n    typedef ReshapedImpl<XprType, Rows, Cols, Order, typename internal::traits<XprType>::StorageKind> Impl;\n  public:\n    //typedef typename Impl::Base Base;\n    typedef Impl Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(Reshaped)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reshaped)\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline Reshaped(XprType& xpr)\n      : Impl(xpr)\n    {\n      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)\n      eigen_assert(Rows * Cols == xpr.rows() * xpr.cols());\n    }\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline Reshaped(XprType& xpr,\n          Index reshapeRows, Index reshapeCols)\n      : Impl(xpr, reshapeRows, reshapeCols)\n    {\n      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==reshapeRows)\n          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==reshapeCols));\n      eigen_assert(reshapeRows * reshapeCols == xpr.rows() * xpr.cols());\n    }\n};\n\n// The generic default implementation for dense reshape simply forward to the internal::ReshapedImpl_dense\n// that must be specialized for direct and non-direct access...\ntemplate<typename XprType, int Rows, int Cols, int Order>\nclass ReshapedImpl<XprType, Rows, Cols, Order, Dense>\n  : public internal::ReshapedImpl_dense<XprType, Rows, Cols, Order,internal::traits<Reshaped<XprType,Rows,Cols,Order> >::HasDirectAccess>\n{\n    typedef internal::ReshapedImpl_dense<XprType, Rows, Cols, Order,internal::traits<Reshaped<XprType,Rows,Cols,Order> >::HasDirectAccess> Impl;\n  public:\n    typedef Impl Base;\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl)\n    EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr) : Impl(xpr) {}\n    EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr, Index reshapeRows, Index reshapeCols)\n      : Impl(xpr, reshapeRows, reshapeCols) {}\n};\n\nnamespace internal {\n\n/** \\internal Internal implementation of dense Reshaped in the general case. */\ntemplate<typename XprType, int Rows, int Cols, int Order>\nclass ReshapedImpl_dense<XprType,Rows,Cols,Order,false>\n  : public internal::dense_xpr_base<Reshaped<XprType, Rows, Cols, Order> >::type\n{\n    typedef Reshaped<XprType, Rows, Cols, Order> ReshapedType;\n  public:\n\n    typedef typename internal::dense_xpr_base<ReshapedType>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(ReshapedType)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl_dense)\n\n    typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;\n    typedef typename internal::remove_all<XprType>::type NestedExpression;\n\n    class InnerIterator;\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline ReshapedImpl_dense(XprType& xpr)\n      : m_xpr(xpr), m_rows(Rows), m_cols(Cols)\n    {}\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols)\n      : m_xpr(xpr), m_rows(nRows), m_cols(nCols)\n    {}\n\n    EIGEN_DEVICE_FUNC Index rows() const { return m_rows; }\n    EIGEN_DEVICE_FUNC Index cols() const { return m_cols; }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** \\sa MapBase::data() */\n    EIGEN_DEVICE_FUNC inline const Scalar* data() const;\n    EIGEN_DEVICE_FUNC inline Index innerStride() const;\n    EIGEN_DEVICE_FUNC inline Index outerStride() const;\n    #endif\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC\n    const typename internal::remove_all<XprType>::type&\n    nestedExpression() const { return m_xpr; }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC\n    typename internal::remove_reference<XprType>::type&\n    nestedExpression() { return m_xpr; }\n\n  protected:\n\n    MatrixTypeNested m_xpr;\n    const internal::variable_if_dynamic<Index, Rows> m_rows;\n    const internal::variable_if_dynamic<Index, Cols> m_cols;\n};\n\n\n/** \\internal Internal implementation of dense Reshaped in the direct access case. */\ntemplate<typename XprType, int Rows, int Cols, int Order>\nclass ReshapedImpl_dense<XprType, Rows, Cols, Order, true>\n  : public MapBase<Reshaped<XprType, Rows, Cols, Order> >\n{\n    typedef Reshaped<XprType, Rows, Cols, Order> ReshapedType;\n    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;\n  public:\n\n    typedef MapBase<ReshapedType> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(ReshapedType)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl_dense)\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline ReshapedImpl_dense(XprType& xpr)\n      : Base(xpr.data()), m_xpr(xpr)\n    {}\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC\n    inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols)\n      : Base(xpr.data(), nRows, nCols),\n        m_xpr(xpr)\n    {}\n\n    EIGEN_DEVICE_FUNC\n    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const\n    {\n      return m_xpr;\n    }\n\n    EIGEN_DEVICE_FUNC\n    XprType& nestedExpression() { return m_xpr; }\n\n    /** \\sa MapBase::innerStride() */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const\n    {\n      return m_xpr.innerStride();\n    }\n\n    /** \\sa MapBase::outerStride() */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const\n    {\n      return ((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows();\n    }\n\n  protected:\n\n    XprTypeNested m_xpr;\n};\n\n// Evaluators\ntemplate<typename ArgType, int Rows, int Cols, int Order, bool HasDirectAccess> struct reshaped_evaluator;\n\ntemplate<typename ArgType, int Rows, int Cols, int Order>\nstruct evaluator<Reshaped<ArgType, Rows, Cols, Order> >\n  : reshaped_evaluator<ArgType, Rows, Cols, Order, traits<Reshaped<ArgType,Rows,Cols,Order> >::HasDirectAccess>\n{\n  typedef Reshaped<ArgType, Rows, Cols, Order> XprType;\n  typedef typename XprType::Scalar Scalar;\n  // TODO: should check for smaller packet types\n  typedef typename packet_traits<Scalar>::type PacketScalar;\n\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n    HasDirectAccess = traits<XprType>::HasDirectAccess,\n\n//     RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,\n//     ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,\n//     MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,\n//     MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,\n//\n//     InnerStrideAtCompileTime = traits<XprType>::HasSameStorageOrderAsXprType\n//                              ? int(inner_stride_at_compile_time<ArgType>::ret)\n//                              : Dynamic,\n//     OuterStrideAtCompileTime = Dynamic,\n\n    FlagsLinearAccessBit = (traits<XprType>::RowsAtCompileTime == 1 || traits<XprType>::ColsAtCompileTime == 1 || HasDirectAccess) ? LinearAccessBit : 0,\n    FlagsRowMajorBit = (traits<XprType>::ReshapedStorageOrder==int(RowMajor)) ? RowMajorBit : 0,\n    FlagsDirectAccessBit =  HasDirectAccess ? DirectAccessBit : 0,\n    Flags0 = evaluator<ArgType>::Flags & (HereditaryBits & ~RowMajorBit),\n    Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit | FlagsDirectAccessBit,\n\n    PacketAlignment = unpacket_traits<PacketScalar>::alignment,\n    Alignment = evaluator<ArgType>::Alignment\n  };\n  typedef reshaped_evaluator<ArgType, Rows, Cols, Order, HasDirectAccess> reshaped_evaluator_type;\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : reshaped_evaluator_type(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n};\n\ntemplate<typename ArgType, int Rows, int Cols, int Order>\nstruct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ false>\n  : evaluator_base<Reshaped<ArgType, Rows, Cols, Order> >\n{\n  typedef Reshaped<ArgType, Rows, Cols, Order> XprType;\n\n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of index computations */,\n\n    Flags = (evaluator<ArgType>::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)),\n\n    Alignment = 0\n  };\n\n  EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  typedef std::pair<Index, Index> RowCol;\n\n  inline RowCol index_remap(Index rowId, Index colId) const\n  {\n    if(Order==ColMajor)\n    {\n      const Index nth_elem_idx = colId * m_xpr.rows() + rowId;\n      return RowCol(nth_elem_idx % m_xpr.nestedExpression().rows(),\n                    nth_elem_idx / m_xpr.nestedExpression().rows());\n    }\n    else\n    {\n      const Index nth_elem_idx = colId + rowId * m_xpr.cols();\n      return RowCol(nth_elem_idx / m_xpr.nestedExpression().cols(),\n                    nth_elem_idx % m_xpr.nestedExpression().cols());\n    }\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline Scalar& coeffRef(Index rowId, Index colId)\n  {\n    EIGEN_STATIC_ASSERT_LVALUE(XprType)\n    const RowCol row_col = index_remap(rowId, colId);\n    return m_argImpl.coeffRef(row_col.first, row_col.second);\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline const Scalar& coeffRef(Index rowId, Index colId) const\n  {\n    const RowCol row_col = index_remap(rowId, colId);\n    return m_argImpl.coeffRef(row_col.first, row_col.second);\n  }\n\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const\n  {\n    const RowCol row_col = index_remap(rowId, colId);\n    return m_argImpl.coeff(row_col.first, row_col.second);\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline Scalar& coeffRef(Index index)\n  {\n    EIGEN_STATIC_ASSERT_LVALUE(XprType)\n    const RowCol row_col = index_remap(Rows == 1 ? 0 : index,\n                                       Rows == 1 ? index : 0);\n    return m_argImpl.coeffRef(row_col.first, row_col.second);\n\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline const Scalar& coeffRef(Index index) const\n  {\n    const RowCol row_col = index_remap(Rows == 1 ? 0 : index,\n                                       Rows == 1 ? index : 0);\n    return m_argImpl.coeffRef(row_col.first, row_col.second);\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline const CoeffReturnType coeff(Index index) const\n  {\n    const RowCol row_col = index_remap(Rows == 1 ? 0 : index,\n                                       Rows == 1 ? index : 0);\n    return m_argImpl.coeff(row_col.first, row_col.second);\n  }\n#if 0\n  EIGEN_DEVICE_FUNC\n  template<int LoadMode>\n  inline PacketScalar packet(Index rowId, Index colId) const\n  {\n    const RowCol row_col = index_remap(rowId, colId);\n    return m_argImpl.template packet<Unaligned>(row_col.first, row_col.second);\n\n  }\n\n  template<int LoadMode>\n  EIGEN_DEVICE_FUNC\n  inline void writePacket(Index rowId, Index colId, const PacketScalar& val)\n  {\n    const RowCol row_col = index_remap(rowId, colId);\n    m_argImpl.const_cast_derived().template writePacket<Unaligned>\n            (row_col.first, row_col.second, val);\n  }\n\n  template<int LoadMode>\n  EIGEN_DEVICE_FUNC\n  inline PacketScalar packet(Index index) const\n  {\n    const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index,\n                                        RowsAtCompileTime == 1 ? index : 0);\n    return m_argImpl.template packet<Unaligned>(row_col.first, row_col.second);\n  }\n\n  template<int LoadMode>\n  EIGEN_DEVICE_FUNC\n  inline void writePacket(Index index, const PacketScalar& val)\n  {\n    const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index,\n                                        RowsAtCompileTime == 1 ? index : 0);\n    return m_argImpl.template packet<Unaligned>(row_col.first, row_col.second, val);\n  }\n#endif\nprotected:\n\n  evaluator<ArgType> m_argImpl;\n  const XprType& m_xpr;\n\n};\n\ntemplate<typename ArgType, int Rows, int Cols, int Order>\nstruct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ true>\n: mapbase_evaluator<Reshaped<ArgType, Rows, Cols, Order>,\n                      typename Reshaped<ArgType, Rows, Cols, Order>::PlainObject>\n{\n  typedef Reshaped<ArgType, Rows, Cols, Order> XprType;\n  typedef typename XprType::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr)\n    : mapbase_evaluator<XprType, typename XprType::PlainObject>(xpr)\n  {\n    // TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime\n    eigen_assert(((internal::UIntPtr(xpr.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && \"data is not aligned\");\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_RESHAPED_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/ReturnByValue.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_RETURNBYVALUE_H\n#define EIGEN_RETURNBYVALUE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Derived>\nstruct traits<ReturnByValue<Derived> >\n  : public traits<typename traits<Derived>::ReturnType>\n{\n  enum {\n    // We're disabling the DirectAccess because e.g. the constructor of\n    // the Block-with-DirectAccess expression requires to have a coeffRef method.\n    // Also, we don't want to have to implement the stride stuff.\n    Flags = (traits<typename traits<Derived>::ReturnType>::Flags\n             | EvalBeforeNestingBit) & ~DirectAccessBit\n  };\n};\n\n/* The ReturnByValue object doesn't even have a coeff() method.\n * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.\n * So internal::nested always gives the plain return matrix type.\n *\n * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??\n * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators\n */\ntemplate<typename Derived,int n,typename PlainObject>\nstruct nested_eval<ReturnByValue<Derived>, n, PlainObject>\n{\n  typedef typename traits<Derived>::ReturnType type;\n};\n\n} // end namespace internal\n\n/** \\class ReturnByValue\n  * \\ingroup Core_Module\n  *\n  */\ntemplate<typename Derived> class ReturnByValue\n  : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator\n{\n  public:\n    typedef typename internal::traits<Derived>::ReturnType ReturnType;\n\n    typedef typename internal::dense_xpr_base<ReturnByValue>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)\n\n    template<typename Dest>\n    EIGEN_DEVICE_FUNC\n    inline void evalTo(Dest& dst) const\n    { static_cast<const Derived*>(this)->evalTo(dst); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return static_cast<const Derived*>(this)->rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return static_cast<const Derived*>(this)->cols(); }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT\n    class Unusable{\n      Unusable(const Unusable&) {}\n      Unusable& operator=(const Unusable&) {return *this;}\n    };\n    const Unusable& coeff(Index) const { return *reinterpret_cast<const Unusable*>(this); }\n    const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }\n    Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }\n    Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }\n#undef Unusable\n#endif\n};\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)\n{\n  other.evalTo(derived());\n  return derived();\n}\n\nnamespace internal {\n\n// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that\n// when a ReturnByValue expression is assigned, the evaluator is not constructed.\n// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world\n\ntemplate<typename Derived>\nstruct evaluator<ReturnByValue<Derived> >\n  : public evaluator<typename internal::traits<Derived>::ReturnType>\n{\n  typedef ReturnByValue<Derived> XprType;\n  typedef typename internal::traits<Derived>::ReturnType PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)\n    : m_result(xpr.rows(), xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    xpr.evalTo(m_result);\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_RETURNBYVALUE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Reverse.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009 Ricard Marxer <email@ricardmarxer.com>\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REVERSE_H\n#define EIGEN_REVERSE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename MatrixType, int Direction>\nstruct traits<Reverse<MatrixType, Direction> >\n : traits<MatrixType>\n{\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename traits<MatrixType>::StorageKind StorageKind;\n  typedef typename traits<MatrixType>::XprKind XprKind;\n  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;\n  enum {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)\n  };\n};\n\ntemplate<typename PacketType, bool ReversePacket> struct reverse_packet_cond\n{\n  static inline PacketType run(const PacketType& x) { return preverse(x); }\n};\n\ntemplate<typename PacketType> struct reverse_packet_cond<PacketType,false>\n{\n  static inline PacketType run(const PacketType& x) { return x; }\n};\n\n} // end namespace internal\n\n/** \\class Reverse\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of the reverse of a vector or matrix\n  *\n  * \\tparam MatrixType the type of the object of which we are taking the reverse\n  * \\tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections\n  *\n  * This class represents an expression of the reverse of a vector.\n  * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::reverse(), VectorwiseOp::reverse()\n  */\ntemplate<typename MatrixType, int Direction> class Reverse\n  : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<Reverse>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)\n    typedef typename internal::remove_all<MatrixType>::type NestedExpression;\n    using Base::IsRowMajor;\n\n  protected:\n    enum {\n      PacketSize = internal::packet_traits<Scalar>::size,\n      IsColMajor = !IsRowMajor,\n      ReverseRow = (Direction == Vertical)   || (Direction == BothDirections),\n      ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),\n      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,\n      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1,\n      ReversePacket = (Direction == BothDirections)\n                    || ((Direction == Vertical)   && IsColMajor)\n                    || ((Direction == Horizontal) && IsRowMajor)\n    };\n    typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;\n  public:\n\n    EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n    EIGEN_DEVICE_FUNC inline Index innerStride() const\n    {\n      return -m_matrix.innerStride();\n    }\n\n    EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&\n    nestedExpression() const\n    {\n      return m_matrix;\n    }\n\n  protected:\n    typename MatrixType::Nested m_matrix;\n};\n\n/** \\returns an expression of the reverse of *this.\n  *\n  * Example: \\include MatrixBase_reverse.cpp\n  * Output: \\verbinclude MatrixBase_reverse.out\n  *\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ReverseReturnType\nDenseBase<Derived>::reverse()\n{\n  return ReverseReturnType(derived());\n}\n\n\n//reverse const overload moved DenseBase.h due to a CUDA compiler bug\n\n/** This is the \"in place\" version of reverse: it reverses \\c *this.\n  *\n  * In most cases it is probably better to simply use the reversed expression\n  * of a matrix. However, when reversing the matrix data itself is really needed,\n  * then this \"in-place\" version is probably the right choice because it provides\n  * the following additional benefits:\n  *  - less error prone: doing the same operation with .reverse() requires special care:\n  *    \\code m = m.reverse().eval(); \\endcode\n  *  - this API enables reverse operations without the need for a temporary\n  *  - it allows future optimizations (cache friendliness, etc.)\n  *\n  * \\sa VectorwiseOp::reverseInPlace(), reverse() */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline void DenseBase<Derived>::reverseInPlace()\n{\n  if(cols()>rows())\n  {\n    Index half = cols()/2;\n    leftCols(half).swap(rightCols(half).reverse());\n    if((cols()%2)==1)\n    {\n      Index half2 = rows()/2;\n      col(half).head(half2).swap(col(half).tail(half2).reverse());\n    }\n  }\n  else\n  {\n    Index half = rows()/2;\n    topRows(half).swap(bottomRows(half).reverse());\n    if((rows()%2)==1)\n    {\n      Index half2 = cols()/2;\n      row(half).head(half2).swap(row(half).tail(half2).reverse());\n    }\n  }\n}\n\nnamespace internal {\n\ntemplate<int Direction>\nstruct vectorwise_reverse_inplace_impl;\n\ntemplate<>\nstruct vectorwise_reverse_inplace_impl<Vertical>\n{\n  template<typename ExpressionType>\n  static void run(ExpressionType &xpr)\n  {\n    const int HalfAtCompileTime = ExpressionType::RowsAtCompileTime==Dynamic?Dynamic:ExpressionType::RowsAtCompileTime/2;\n    Index half = xpr.rows()/2;\n    xpr.topRows(fix<HalfAtCompileTime>(half))\n       .swap(xpr.bottomRows(fix<HalfAtCompileTime>(half)).colwise().reverse());\n  }\n};\n\ntemplate<>\nstruct vectorwise_reverse_inplace_impl<Horizontal>\n{\n  template<typename ExpressionType>\n  static void run(ExpressionType &xpr)\n  {\n    const int HalfAtCompileTime = ExpressionType::ColsAtCompileTime==Dynamic?Dynamic:ExpressionType::ColsAtCompileTime/2;\n    Index half = xpr.cols()/2;\n    xpr.leftCols(fix<HalfAtCompileTime>(half))\n       .swap(xpr.rightCols(fix<HalfAtCompileTime>(half)).rowwise().reverse());\n  }\n};\n\n} // end namespace internal\n\n/** This is the \"in place\" version of VectorwiseOp::reverse: it reverses each column or row of \\c *this.\n  *\n  * In most cases it is probably better to simply use the reversed expression\n  * of a matrix. However, when reversing the matrix data itself is really needed,\n  * then this \"in-place\" version is probably the right choice because it provides\n  * the following additional benefits:\n  *  - less error prone: doing the same operation with .reverse() requires special care:\n  *    \\code m = m.reverse().eval(); \\endcode\n  *  - this API enables reverse operations without the need for a temporary\n  *\n  * \\sa DenseBase::reverseInPlace(), reverse() */\ntemplate<typename ExpressionType, int Direction>\nEIGEN_DEVICE_FUNC void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()\n{\n  internal::vectorwise_reverse_inplace_impl<Direction>::run(m_matrix);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_REVERSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Select.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELECT_H\n#define EIGEN_SELECT_H\n\nnamespace Eigen {\n\n/** \\class Select\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a coefficient wise version of the C++ ternary operator ?:\n  *\n  * \\param ConditionMatrixType the type of the \\em condition expression which must be a boolean matrix\n  * \\param ThenMatrixType the type of the \\em then expression\n  * \\param ElseMatrixType the type of the \\em else expression\n  *\n  * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.\n  * It is the return type of DenseBase::select() and most of the time this is the only way it is used.\n  *\n  * \\sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const\n  */\n\nnamespace internal {\ntemplate<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>\nstruct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >\n : traits<ThenMatrixType>\n{\n  typedef typename traits<ThenMatrixType>::Scalar Scalar;\n  typedef Dense StorageKind;\n  typedef typename traits<ThenMatrixType>::XprKind XprKind;\n  typedef typename ConditionMatrixType::Nested ConditionMatrixNested;\n  typedef typename ThenMatrixType::Nested ThenMatrixNested;\n  typedef typename ElseMatrixType::Nested ElseMatrixNested;\n  enum {\n    RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,\n    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit\n  };\n};\n}\n\ntemplate<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>\nclass Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,\n               internal::no_assignment_operator\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<Select>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Select)\n\n    inline EIGEN_DEVICE_FUNC\n    Select(const ConditionMatrixType& a_conditionMatrix,\n           const ThenMatrixType& a_thenMatrix,\n           const ElseMatrixType& a_elseMatrix)\n      : m_condition(a_conditionMatrix), m_then(a_thenMatrix), m_else(a_elseMatrix)\n    {\n      eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());\n      eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());\n    }\n\n    inline EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_condition.rows(); }\n    inline EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_condition.cols(); }\n\n    inline EIGEN_DEVICE_FUNC\n    const Scalar coeff(Index i, Index j) const\n    {\n      if (m_condition.coeff(i,j))\n        return m_then.coeff(i,j);\n      else\n        return m_else.coeff(i,j);\n    }\n\n    inline EIGEN_DEVICE_FUNC\n    const Scalar coeff(Index i) const\n    {\n      if (m_condition.coeff(i))\n        return m_then.coeff(i);\n      else\n        return m_else.coeff(i);\n    }\n\n    inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const\n    {\n      return m_condition;\n    }\n\n    inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const\n    {\n      return m_then;\n    }\n\n    inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const\n    {\n      return m_else;\n    }\n\n  protected:\n    typename ConditionMatrixType::Nested m_condition;\n    typename ThenMatrixType::Nested m_then;\n    typename ElseMatrixType::Nested m_else;\n};\n\n\n/** \\returns a matrix where each coefficient (i,j) is equal to \\a thenMatrix(i,j)\n  * if \\c *this(i,j), and \\a elseMatrix(i,j) otherwise.\n  *\n  * Example: \\include MatrixBase_select.cpp\n  * Output: \\verbinclude MatrixBase_select.out\n  *\n  * \\sa class Select\n  */\ntemplate<typename Derived>\ntemplate<typename ThenDerived,typename ElseDerived>\ninline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived,ElseDerived>\nDenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,\n                            const DenseBase<ElseDerived>& elseMatrix) const\n{\n  return Select<Derived,ThenDerived,ElseDerived>(derived(), thenMatrix.derived(), elseMatrix.derived());\n}\n\n/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with\n  * the \\em else expression being a scalar value.\n  *\n  * \\sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select\n  */\ntemplate<typename Derived>\ntemplate<typename ThenDerived>\ninline EIGEN_DEVICE_FUNC const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>\nDenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,\n                           const typename ThenDerived::Scalar& elseScalar) const\n{\n  return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(\n    derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));\n}\n\n/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with\n  * the \\em then expression being a scalar value.\n  *\n  * \\sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select\n  */\ntemplate<typename Derived>\ntemplate<typename ElseDerived>\ninline EIGEN_DEVICE_FUNC const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >\nDenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,\n                           const DenseBase<ElseDerived>& elseMatrix) const\n{\n  return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(\n    derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELECT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/SelfAdjointView.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFADJOINTMATRIX_H\n#define EIGEN_SELFADJOINTMATRIX_H\n\nnamespace Eigen {\n\n/** \\class SelfAdjointView\n  * \\ingroup Core_Module\n  *\n  *\n  * \\brief Expression of a selfadjoint matrix from a triangular part of a dense matrix\n  *\n  * \\param MatrixType the type of the dense matrix storing the coefficients\n  * \\param TriangularPart can be either \\c #Lower or \\c #Upper\n  *\n  * This class is an expression of a sefladjoint matrix from a triangular part of a matrix\n  * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()\n  * and most of the time this is the only way that it is used.\n  *\n  * \\sa class TriangularBase, MatrixBase::selfadjointView()\n  */\n\nnamespace internal {\ntemplate<typename MatrixType, unsigned int UpLo>\nstruct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>\n{\n  typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;\n  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;\n  typedef MatrixType ExpressionType;\n  typedef typename MatrixType::PlainObject FullMatrixType;\n  enum {\n    Mode = UpLo | SelfAdjoint,\n    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,\n    Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)\n           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved\n  };\n};\n}\n\n\ntemplate<typename _MatrixType, unsigned int UpLo> class SelfAdjointView\n  : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    typedef TriangularBase<SelfAdjointView> Base;\n    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;\n    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;\n    typedef MatrixTypeNestedCleaned NestedExpression;\n\n    /** \\brief The type of coefficients in this matrix */\n    typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;\n    typedef SelfAdjointView<typename internal::add_const<MatrixType>::type, UpLo> ConstSelfAdjointView;\n\n    enum {\n      Mode = internal::traits<SelfAdjointView>::Mode,\n      Flags = internal::traits<SelfAdjointView>::Flags,\n      TransposeMode = ((int(Mode) & int(Upper)) ? Lower : 0) | ((int(Mode) & int(Lower)) ? Upper : 0)\n    };\n    typedef typename MatrixType::PlainObject PlainObject;\n\n    EIGEN_DEVICE_FUNC\n    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)\n    {\n      EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return m_matrix.outerStride(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT { return m_matrix.innerStride(); }\n\n    /** \\sa MatrixBase::coeff()\n      * \\warning the coordinates must fit into the referenced triangular part\n      */\n    EIGEN_DEVICE_FUNC\n    inline Scalar coeff(Index row, Index col) const\n    {\n      Base::check_coordinates_internal(row, col);\n      return m_matrix.coeff(row, col);\n    }\n\n    /** \\sa MatrixBase::coeffRef()\n      * \\warning the coordinates must fit into the referenced triangular part\n      */\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);\n      Base::check_coordinates_internal(row, col);\n      return m_matrix.coeffRef(row, col);\n    }\n\n    /** \\internal */\n    EIGEN_DEVICE_FUNC\n    const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }\n\n    EIGEN_DEVICE_FUNC\n    const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }\n    EIGEN_DEVICE_FUNC\n    MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }\n\n    /** Efficient triangular matrix times vector/matrix product */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    const Product<SelfAdjointView,OtherDerived>\n    operator*(const MatrixBase<OtherDerived>& rhs) const\n    {\n      return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());\n    }\n\n    /** Efficient vector/matrix times triangular matrix product */\n    template<typename OtherDerived> friend\n    EIGEN_DEVICE_FUNC\n    const Product<OtherDerived,SelfAdjointView>\n    operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)\n    {\n      return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);\n    }\n\n    friend EIGEN_DEVICE_FUNC\n    const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>\n    operator*(const Scalar& s, const SelfAdjointView& mat)\n    {\n      return (s*mat.nestedExpression()).template selfadjointView<UpLo>();\n    }\n\n    /** Perform a symmetric rank 2 update of the selfadjoint matrix \\c *this:\n      * \\f$ this = this + \\alpha u v^* + conj(\\alpha) v u^* \\f$\n      * \\returns a reference to \\c *this\n      *\n      * The vectors \\a u and \\c v \\b must be column vectors, however they can be\n      * a adjoint expression without any overhead. Only the meaningful triangular\n      * part of the matrix is updated, the rest is left unchanged.\n      *\n      * \\sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)\n      */\n    template<typename DerivedU, typename DerivedV>\n    EIGEN_DEVICE_FUNC\n    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));\n\n    /** Perform a symmetric rank K update of the selfadjoint matrix \\c *this:\n      * \\f$ this = this + \\alpha ( u u^* ) \\f$ where \\a u is a vector or matrix.\n      *\n      * \\returns a reference to \\c *this\n      *\n      * Note that to perform \\f$ this = this + \\alpha ( u^* u ) \\f$ you can simply\n      * call this function with u.adjoint().\n      *\n      * \\sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)\n      */\n    template<typename DerivedU>\n    EIGEN_DEVICE_FUNC\n    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));\n\n    /** \\returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part\n      *\n      * The parameter \\a TriMode can have the following values: \\c #Upper, \\c #StrictlyUpper, \\c #UnitUpper,\n      * \\c #Lower, \\c #StrictlyLower, \\c #UnitLower.\n      *\n      * If \\c TriMode references the same triangular part than \\c *this, then this method simply return a \\c TriangularView of the nested expression,\n      * otherwise, the nested expression is first transposed, thus returning a \\c TriangularView<Transpose<MatrixType>> object.\n      *\n      * \\sa MatrixBase::triangularView(), class TriangularView\n      */\n    template<unsigned int TriMode>\n    EIGEN_DEVICE_FUNC\n    typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),\n                                   TriangularView<MatrixType,TriMode>,\n                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type\n    triangularView() const\n    {\n      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);\n      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);\n      return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),\n                                   TriangularView<MatrixType,TriMode>,\n                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);\n    }\n\n    typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;\n    /** \\sa MatrixBase::conjugate() const */\n    EIGEN_DEVICE_FUNC\n    inline const ConjugateReturnType conjugate() const\n    { return ConjugateReturnType(m_matrix.conjugate()); }\n\n    /** \\returns an expression of the complex conjugate of \\c *this if Cond==true,\n     *           returns \\c *this otherwise.\n     */\n    template<bool Cond>\n    EIGEN_DEVICE_FUNC\n    inline typename internal::conditional<Cond,ConjugateReturnType,ConstSelfAdjointView>::type\n    conjugateIf() const\n    {\n      typedef typename internal::conditional<Cond,ConjugateReturnType,ConstSelfAdjointView>::type ReturnType;\n      return ReturnType(m_matrix.template conjugateIf<Cond>());\n    }\n\n    typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;\n    /** \\sa MatrixBase::adjoint() const */\n    EIGEN_DEVICE_FUNC\n    inline const AdjointReturnType adjoint() const\n    { return AdjointReturnType(m_matrix.adjoint()); }\n\n    typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;\n     /** \\sa MatrixBase::transpose() */\n    EIGEN_DEVICE_FUNC\n    inline TransposeReturnType transpose()\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)\n      typename MatrixType::TransposeReturnType tmp(m_matrix);\n      return TransposeReturnType(tmp);\n    }\n\n    typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;\n    /** \\sa MatrixBase::transpose() const */\n    EIGEN_DEVICE_FUNC\n    inline const ConstTransposeReturnType transpose() const\n    {\n      return ConstTransposeReturnType(m_matrix.transpose());\n    }\n\n    /** \\returns a const expression of the main diagonal of the matrix \\c *this\n      *\n      * This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.\n      *\n      * \\sa MatrixBase::diagonal(), class Diagonal */\n    EIGEN_DEVICE_FUNC\n    typename MatrixType::ConstDiagonalReturnType diagonal() const\n    {\n      return typename MatrixType::ConstDiagonalReturnType(m_matrix);\n    }\n\n/////////// Cholesky module ///////////\n\n    const LLT<PlainObject, UpLo> llt() const;\n    const LDLT<PlainObject, UpLo> ldlt() const;\n\n/////////// Eigenvalue module ///////////\n\n    /** Real part of #Scalar */\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    /** Return type of eigenvalues() */\n    typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;\n\n    EIGEN_DEVICE_FUNC\n    EigenvaluesReturnType eigenvalues() const;\n    EIGEN_DEVICE_FUNC\n    RealScalar operatorNorm() const;\n\n  protected:\n    MatrixTypeNested m_matrix;\n};\n\n\n// template<typename OtherDerived, typename MatrixType, unsigned int UpLo>\n// internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >\n// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)\n// {\n//   return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);\n// }\n\n// selfadjoint to dense matrix\n\nnamespace internal {\n\n// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>\n//      in the future selfadjoint-ness should be defined by the expression traits\n//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)\ntemplate<typename MatrixType, unsigned int Mode>\nstruct evaluator_traits<SelfAdjointView<MatrixType,Mode> >\n{\n  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;\n  typedef SelfAdjointShape Shape;\n};\n\ntemplate<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>\nclass triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>\n  : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>\n{\nprotected:\n  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;\n  typedef typename Base::DstXprType DstXprType;\n  typedef typename Base::SrcXprType SrcXprType;\n  using Base::m_dst;\n  using Base::m_src;\n  using Base::m_functor;\npublic:\n\n  typedef typename Base::DstEvaluatorType DstEvaluatorType;\n  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;\n  typedef typename Base::Scalar Scalar;\n  typedef typename Base::AssignmentTraits AssignmentTraits;\n\n\n  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)\n    : Base(dst, src, func, dstExpr)\n  {}\n\n  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)\n  {\n    eigen_internal_assert(row!=col);\n    Scalar tmp = m_src.coeff(row,col);\n    m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);\n    m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));\n  }\n\n  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)\n  {\n    Base::assignCoeff(id,id);\n  }\n\n  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)\n  { eigen_internal_assert(false && \"should never be called\"); }\n};\n\n} // end namespace internal\n\n/***************************************************************************\n* Implementation of MatrixBase methods\n***************************************************************************/\n\n/** This is the const version of MatrixBase::selfadjointView() */\ntemplate<typename Derived>\ntemplate<unsigned int UpLo>\nEIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type\nMatrixBase<Derived>::selfadjointView() const\n{\n  return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());\n}\n\n/** \\returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix\n  *\n  * The parameter \\a UpLo can be either \\c #Upper or \\c #Lower\n  *\n  * Example: \\include MatrixBase_selfadjointView.cpp\n  * Output: \\verbinclude MatrixBase_selfadjointView.out\n  *\n  * \\sa class SelfAdjointView\n  */\ntemplate<typename Derived>\ntemplate<unsigned int UpLo>\nEIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type\nMatrixBase<Derived>::selfadjointView()\n{\n  return typename SelfAdjointViewReturnType<UpLo>::Type(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINTMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/SelfCwiseBinaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFCWISEBINARYOP_H\n#define EIGEN_SELFCWISEBINARYOP_H\n\nnamespace Eigen { \n\n// TODO generalize the scalar type of 'other'\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)\n{\n  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)\n{\n  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)\n{\n  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)\n{\n  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFCWISEBINARYOP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Solve.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SOLVE_H\n#define EIGEN_SOLVE_H\n\nnamespace Eigen {\n\ntemplate<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;\n\n/** \\class Solve\n  * \\ingroup Core_Module\n  *\n  * \\brief Pseudo expression representing a solving operation\n  *\n  * \\tparam Decomposition the type of the matrix or decomposition object\n  * \\tparam Rhstype the type of the right-hand side\n  *\n  * This class represents an expression of A.solve(B)\n  * and most of the time this is the only way it is used.\n  *\n  */\nnamespace internal {\n\n// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)\ntemplate<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;\n\ntemplate<typename Decomposition, typename RhsType>\nstruct solve_traits<Decomposition,RhsType,Dense>\n{\n  typedef typename make_proper_matrix_type<typename RhsType::Scalar,\n                 Decomposition::ColsAtCompileTime,\n                 RhsType::ColsAtCompileTime,\n                 RhsType::PlainObject::Options,\n                 Decomposition::MaxColsAtCompileTime,\n                 RhsType::MaxColsAtCompileTime>::type PlainObject;\n};\n\ntemplate<typename Decomposition, typename RhsType>\nstruct traits<Solve<Decomposition, RhsType> >\n  : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>\n{\n  typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;\n  typedef typename promote_index_type<typename Decomposition::StorageIndex, typename RhsType::StorageIndex>::type StorageIndex;\n  typedef traits<PlainObject> BaseTraits;\n  enum {\n    Flags = BaseTraits::Flags & RowMajorBit,\n    CoeffReadCost = HugeCost\n  };\n};\n\n}\n\n\ntemplate<typename Decomposition, typename RhsType>\nclass Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>\n{\npublic:\n  typedef typename internal::traits<Solve>::PlainObject PlainObject;\n  typedef typename internal::traits<Solve>::StorageIndex StorageIndex;\n\n  Solve(const Decomposition &dec, const RhsType &rhs)\n    : m_dec(dec), m_rhs(rhs)\n  {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dec.cols(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }\n\n  EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }\n  EIGEN_DEVICE_FUNC const RhsType&       rhs() const { return m_rhs; }\n\nprotected:\n  const Decomposition &m_dec;\n  const RhsType       &m_rhs;\n};\n\n\n// Specialization of the Solve expression for dense results\ntemplate<typename Decomposition, typename RhsType>\nclass SolveImpl<Decomposition,RhsType,Dense>\n  : public MatrixBase<Solve<Decomposition,RhsType> >\n{\n  typedef Solve<Decomposition,RhsType> Derived;\n\npublic:\n\n  typedef MatrixBase<Solve<Decomposition,RhsType> > Base;\n  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)\n\nprivate:\n\n  Scalar coeff(Index row, Index col) const;\n  Scalar coeff(Index i) const;\n};\n\n// Generic API dispatcher\ntemplate<typename Decomposition, typename RhsType, typename StorageKind>\nclass SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type\n{\n  public:\n    typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;\n};\n\nnamespace internal {\n\n// Evaluator of Solve -> eval into a temporary\ntemplate<typename Decomposition, typename RhsType>\nstruct evaluator<Solve<Decomposition,RhsType> >\n  : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>\n{\n  typedef Solve<Decomposition,RhsType> SolveType;\n  typedef typename SolveType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  enum { Flags = Base::Flags | EvalBeforeNestingBit };\n\n  EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)\n    : m_result(solve.rows(), solve.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    solve.dec()._solve_impl(solve.rhs(), m_result);\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n// Specialization for \"dst = dec.solve(rhs)\"\n// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere\ntemplate<typename DstXprType, typename DecType, typename RhsType, typename Scalar>\nstruct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>\n{\n  typedef Solve<DecType,RhsType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    src.dec()._solve_impl(src.rhs(), dst);\n  }\n};\n\n// Specialization for \"dst = dec.transpose().solve(rhs)\"\ntemplate<typename DstXprType, typename DecType, typename RhsType, typename Scalar>\nstruct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>\n{\n  typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);\n  }\n};\n\n// Specialization for \"dst = dec.adjoint().solve(rhs)\"\ntemplate<typename DstXprType, typename DecType, typename RhsType, typename Scalar>\nstruct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,\n                  internal::assign_op<Scalar,Scalar>, Dense2Dense>\n{\n  typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SOLVE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/SolveTriangular.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SOLVETRIANGULAR_H\n#define EIGEN_SOLVETRIANGULAR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// Forward declarations:\n// The following two routines are implemented in the products/TriangularSolver*.h files\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>\nstruct triangular_solve_vector;\n\ntemplate <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder, int OtherInnerStride>\nstruct triangular_solve_matrix;\n\n// small helper struct extracting some traits on the underlying solver operation\ntemplate<typename Lhs, typename Rhs, int Side>\nclass trsolve_traits\n{\n  private:\n    enum {\n      RhsIsVectorAtCompileTime = (Side==OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime)==1\n    };\n  public:\n    enum {\n      Unrolling   = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime != Dynamic && Rhs::SizeAtCompileTime <= 8)\n                  ? CompleteUnrolling : NoUnrolling,\n      RhsVectors  = RhsIsVectorAtCompileTime ? 1 : Dynamic\n    };\n};\n\ntemplate<typename Lhs, typename Rhs,\n  int Side, // can be OnTheLeft/OnTheRight\n  int Mode, // can be Upper/Lower | UnitDiag\n  int Unrolling = trsolve_traits<Lhs,Rhs,Side>::Unrolling,\n  int RhsVectors = trsolve_traits<Lhs,Rhs,Side>::RhsVectors\n  >\nstruct triangular_solver_selector;\n\ntemplate<typename Lhs, typename Rhs, int Side, int Mode>\nstruct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>\n{\n  typedef typename Lhs::Scalar LhsScalar;\n  typedef typename Rhs::Scalar RhsScalar;\n  typedef blas_traits<Lhs> LhsProductTraits;\n  typedef typename LhsProductTraits::ExtractType ActualLhsType;\n  typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;\n  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)\n  {\n    ActualLhsType actualLhs = LhsProductTraits::extract(lhs);\n\n    // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1\n\n    bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;\n\n    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),\n                                                  (useRhsDirectly ? rhs.data() : 0));\n\n    if(!useRhsDirectly)\n      MappedRhs(actualRhs,rhs.size()) = rhs;\n\n    triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,\n                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>\n      ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);\n\n    if(!useRhsDirectly)\n      rhs = MappedRhs(actualRhs, rhs.size());\n  }\n};\n\n// the rhs is a matrix\ntemplate<typename Lhs, typename Rhs, int Side, int Mode>\nstruct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>\n{\n  typedef typename Rhs::Scalar Scalar;\n  typedef blas_traits<Lhs> LhsProductTraits;\n  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;\n\n  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)\n  {\n    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);\n\n    const Index size = lhs.rows();\n    const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();\n\n    typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,\n              Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;\n\n    BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);\n\n    triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,\n                               (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor, Rhs::InnerStrideAtCompileTime>\n      ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.innerStride(), rhs.outerStride(), blocking);\n  }\n};\n\n/***************************************************************************\n* meta-unrolling implementation\n***************************************************************************/\n\ntemplate<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size,\n         bool Stop = LoopIndex==Size>\nstruct triangular_solver_unroller;\n\ntemplate<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>\nstruct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {\n  enum {\n    IsLower = ((Mode&Lower)==Lower),\n    DiagIndex  = IsLower ? LoopIndex : Size - LoopIndex - 1,\n    StartIndex = IsLower ? 0         : DiagIndex+1\n  };\n  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)\n  {\n    if (LoopIndex>0)\n      rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()\n                                .cwiseProduct(rhs.template segment<LoopIndex>(StartIndex)).sum();\n\n    if(!(Mode & UnitDiag))\n      rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex);\n\n    triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex+1,Size>::run(lhs,rhs);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>\nstruct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {\n  static EIGEN_DEVICE_FUNC void run(const Lhs&, Rhs&) {}\n};\n\ntemplate<typename Lhs, typename Rhs, int Mode>\nstruct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {\n  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)\n  { triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }\n};\n\ntemplate<typename Lhs, typename Rhs, int Mode>\nstruct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {\n  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)\n  {\n    Transpose<const Lhs> trLhs(lhs);\n    Transpose<Rhs> trRhs(rhs);\n\n    triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,\n                              ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),\n                              0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);\n  }\n};\n\n} // end namespace internal\n\n/***************************************************************************\n* TriangularView methods\n***************************************************************************/\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename MatrixType, unsigned int Mode>\ntemplate<int Side, typename OtherDerived>\nEIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const\n{\n  OtherDerived& other = _other.const_cast_derived();\n  eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );\n  eigen_assert((!(int(Mode) & int(ZeroDiag))) && bool(int(Mode) & (int(Upper) | int(Lower))));\n  // If solving for a 0x0 matrix, nothing to do, simply return.\n  if (derived().cols() == 0)\n    return;\n\n  enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit)  && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};\n  typedef typename internal::conditional<copy,\n    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;\n  OtherCopy otherCopy(other);\n\n  internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,\n    Side, Mode>::run(derived().nestedExpression(), otherCopy);\n\n  if (copy)\n    other = otherCopy;\n}\n\ntemplate<typename Derived, unsigned int Mode>\ntemplate<int Side, typename Other>\nconst internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>\nTriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const\n{\n  return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());\n}\n#endif\n\nnamespace internal {\n\n\ntemplate<int Side, typename TriangularType, typename Rhs>\nstruct traits<triangular_solve_retval<Side, TriangularType, Rhs> >\n{\n  typedef typename internal::plain_matrix_type_column_major<Rhs>::type ReturnType;\n};\n\ntemplate<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval\n : public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >\n{\n  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;\n  typedef ReturnByValue<triangular_solve_retval> Base;\n\n  triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)\n    : m_triangularMatrix(tri), m_rhs(rhs)\n  {}\n\n  inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_rhs.rows(); }\n  inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }\n\n  template<typename Dest> inline void evalTo(Dest& dst) const\n  {\n    if(!is_same_dense(dst,m_rhs))\n      dst = m_rhs;\n    m_triangularMatrix.template solveInPlace<Side>(dst);\n  }\n\n  protected:\n    const TriangularType& m_triangularMatrix;\n    typename Rhs::Nested m_rhs;\n};\n\n} // namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SOLVETRIANGULAR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/SolverBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SOLVERBASE_H\n#define EIGEN_SOLVERBASE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Derived>\nstruct solve_assertion {\n    template<bool Transpose_, typename Rhs>\n    static void run(const Derived& solver, const Rhs& b) { solver.template _check_solve_assertion<Transpose_>(b); }\n};\n\ntemplate<typename Derived>\nstruct solve_assertion<Transpose<Derived> >\n{\n    typedef Transpose<Derived> type;\n\n    template<bool Transpose_, typename Rhs>\n    static void run(const type& transpose, const Rhs& b)\n    {\n        internal::solve_assertion<typename internal::remove_all<Derived>::type>::template run<true>(transpose.nestedExpression(), b);\n    }\n};\n\ntemplate<typename Scalar, typename Derived>\nstruct solve_assertion<CwiseUnaryOp<Eigen::internal::scalar_conjugate_op<Scalar>, const Transpose<Derived> > >\n{\n    typedef CwiseUnaryOp<Eigen::internal::scalar_conjugate_op<Scalar>, const Transpose<Derived> > type;\n\n    template<bool Transpose_, typename Rhs>\n    static void run(const type& adjoint, const Rhs& b)\n    {\n        internal::solve_assertion<typename internal::remove_all<Transpose<Derived> >::type>::template run<true>(adjoint.nestedExpression(), b);\n    }\n};\n} // end namespace internal\n\n/** \\class SolverBase\n  * \\brief A base class for matrix decomposition and solvers\n  *\n  * \\tparam Derived the actual type of the decomposition/solver.\n  *\n  * Any matrix decomposition inheriting this base class provide the following API:\n  *\n  * \\code\n  * MatrixType A, b, x;\n  * DecompositionType dec(A);\n  * x = dec.solve(b);             // solve A   * x = b\n  * x = dec.transpose().solve(b); // solve A^T * x = b\n  * x = dec.adjoint().solve(b);   // solve A'  * x = b\n  * \\endcode\n  *\n  * \\warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors.\n  *\n  * \\sa class PartialPivLU, class FullPivLU, class HouseholderQR, class ColPivHouseholderQR, class FullPivHouseholderQR, class CompleteOrthogonalDecomposition, class LLT, class LDLT, class SVDBase\n  */\ntemplate<typename Derived>\nclass SolverBase : public EigenBase<Derived>\n{\n  public:\n\n    typedef EigenBase<Derived> Base;\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef Scalar CoeffReturnType;\n\n    template<typename Derived_>\n    friend struct internal::solve_assertion;\n\n    enum {\n      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,\n      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,\n                                                          internal::traits<Derived>::ColsAtCompileTime>::ret),\n      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,\n      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,\n                                                             internal::traits<Derived>::MaxColsAtCompileTime>::ret),\n      IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1\n                           || internal::traits<Derived>::MaxColsAtCompileTime == 1,\n      NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2\n    };\n\n    /** Default constructor */\n    SolverBase()\n    {}\n\n    ~SolverBase()\n    {}\n\n    using Base::derived;\n\n    /** \\returns an expression of the solution x of \\f$ A x = b \\f$ using the current decomposition of A.\n      */\n    template<typename Rhs>\n    inline const Solve<Derived, Rhs>\n    solve(const MatrixBase<Rhs>& b) const\n    {\n      internal::solve_assertion<typename internal::remove_all<Derived>::type>::template run<false>(derived(), b);\n      return Solve<Derived, Rhs>(derived(), b.derived());\n    }\n\n    /** \\internal the return type of transpose() */\n    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;\n    /** \\returns an expression of the transposed of the factored matrix.\n      *\n      * A typical usage is to solve for the transposed problem A^T x = b:\n      * \\code x = dec.transpose().solve(b); \\endcode\n      *\n      * \\sa adjoint(), solve()\n      */\n    inline ConstTransposeReturnType transpose() const\n    {\n      return ConstTransposeReturnType(derived());\n    }\n\n    /** \\internal the return type of adjoint() */\n    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,\n                        ConstTransposeReturnType\n                     >::type AdjointReturnType;\n    /** \\returns an expression of the adjoint of the factored matrix\n      *\n      * A typical usage is to solve for the adjoint problem A' x = b:\n      * \\code x = dec.adjoint().solve(b); \\endcode\n      *\n      * For real scalar types, this function is equivalent to transpose().\n      *\n      * \\sa transpose(), solve()\n      */\n    inline AdjointReturnType adjoint() const\n    {\n      return AdjointReturnType(derived().transpose());\n    }\n\n  protected:\n\n    template<bool Transpose_, typename Rhs>\n    void _check_solve_assertion(const Rhs& b) const {\n        EIGEN_ONLY_USED_FOR_DEBUG(b);\n        eigen_assert(derived().m_isInitialized && \"Solver is not initialized.\");\n        eigen_assert((Transpose_?derived().cols():derived().rows())==b.rows() && \"SolverBase::solve(): invalid number of rows of the right hand side matrix b\");\n    }\n};\n\nnamespace internal {\n\ntemplate<typename Derived>\nstruct generic_xpr_base<Derived, MatrixXpr, SolverStorage>\n{\n  typedef SolverBase<Derived> type;\n\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SOLVERBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/StableNorm.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STABLENORM_H\n#define EIGEN_STABLENORM_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename ExpressionType, typename Scalar>\ninline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)\n{\n  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();\n  \n  if(maxCoeff>scale)\n  {\n    ssq = ssq * numext::abs2(scale/maxCoeff);\n    Scalar tmp = Scalar(1)/maxCoeff;\n    if(tmp > NumTraits<Scalar>::highest())\n    {\n      invScale = NumTraits<Scalar>::highest();\n      scale = Scalar(1)/invScale;\n    }\n    else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF\n    {\n      invScale = Scalar(1);\n      scale = maxCoeff;\n    }\n    else\n    {\n      scale = maxCoeff;\n      invScale = tmp;\n    }\n  }\n  else if(maxCoeff!=maxCoeff) // we got a NaN\n  {\n    scale = maxCoeff;\n  }\n  \n  // TODO if the maxCoeff is much much smaller than the current scale,\n  // then we can neglect this sub vector\n  if(scale>Scalar(0)) // if scale==0, then bl is 0 \n    ssq += (bl*invScale).squaredNorm();\n}\n\ntemplate<typename VectorType, typename RealScalar>\nvoid stable_norm_impl_inner_step(const VectorType &vec, RealScalar& ssq, RealScalar& scale, RealScalar& invScale)\n{\n  typedef typename VectorType::Scalar Scalar;\n  const Index blockSize = 4096;\n  \n  typedef typename internal::nested_eval<VectorType,2>::type VectorTypeCopy;\n  typedef typename internal::remove_all<VectorTypeCopy>::type VectorTypeCopyClean;\n  const VectorTypeCopy copy(vec);\n  \n  enum {\n    CanAlign = (   (int(VectorTypeCopyClean::Flags)&DirectAccessBit)\n                || (int(internal::evaluator<VectorTypeCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough\n               ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)\n                 && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization\n  };\n  typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,\n                                                   typename VectorTypeCopyClean::ConstSegmentReturnType>::type SegmentWrapper;\n  Index n = vec.size();\n  \n  Index bi = internal::first_default_aligned(copy);\n  if (bi>0)\n    internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);\n  for (; bi<n; bi+=blockSize)\n    internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);\n}\n\ntemplate<typename VectorType>\ntypename VectorType::RealScalar\nstable_norm_impl(const VectorType &vec, typename enable_if<VectorType::IsVectorAtCompileTime>::type* = 0 )\n{\n  using std::sqrt;\n  using std::abs;\n\n  Index n = vec.size();\n\n  if(n==1)\n    return abs(vec.coeff(0));\n\n  typedef typename VectorType::RealScalar RealScalar;\n  RealScalar scale(0);\n  RealScalar invScale(1);\n  RealScalar ssq(0); // sum of squares\n\n  stable_norm_impl_inner_step(vec, ssq, scale, invScale);\n  \n  return scale * sqrt(ssq);\n}\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar\nstable_norm_impl(const MatrixType &mat, typename enable_if<!MatrixType::IsVectorAtCompileTime>::type* = 0 )\n{\n  using std::sqrt;\n\n  typedef typename MatrixType::RealScalar RealScalar;\n  RealScalar scale(0);\n  RealScalar invScale(1);\n  RealScalar ssq(0); // sum of squares\n\n  for(Index j=0; j<mat.outerSize(); ++j)\n    stable_norm_impl_inner_step(mat.innerVector(j), ssq, scale, invScale);\n  return scale * sqrt(ssq);\n}\n\ntemplate<typename Derived>\ninline typename NumTraits<typename traits<Derived>::Scalar>::Real\nblueNorm_impl(const EigenBase<Derived>& _vec)\n{\n  typedef typename Derived::RealScalar RealScalar;  \n  using std::pow;\n  using std::sqrt;\n  using std::abs;\n\n  // This program calculates the machine-dependent constants\n  // bl, b2, slm, s2m, relerr overfl\n  // from the \"basic\" machine-dependent numbers\n  // nbig, ibeta, it, iemin, iemax, rbig.\n  // The following define the basic machine-dependent constants.\n  // For portability, the PORT subprograms \"ilmaeh\" and \"rlmach\"\n  // are used. For any specific computer, each of the assignment\n  // statements can be replaced\n  static const int ibeta = std::numeric_limits<RealScalar>::radix;  // base for floating-point numbers\n  static const int it    = NumTraits<RealScalar>::digits();  // number of base-beta digits in mantissa\n  static const int iemin = NumTraits<RealScalar>::min_exponent();  // minimum exponent\n  static const int iemax = NumTraits<RealScalar>::max_exponent();  // maximum exponent\n  static const RealScalar rbig   = NumTraits<RealScalar>::highest();  // largest floating-point number\n  static const RealScalar b1     = RealScalar(pow(RealScalar(ibeta),RealScalar(-((1-iemin)/2))));  // lower boundary of midrange\n  static const RealScalar b2     = RealScalar(pow(RealScalar(ibeta),RealScalar((iemax + 1 - it)/2)));  // upper boundary of midrange\n  static const RealScalar s1m    = RealScalar(pow(RealScalar(ibeta),RealScalar((2-iemin)/2)));  // scaling factor for lower range\n  static const RealScalar s2m    = RealScalar(pow(RealScalar(ibeta),RealScalar(- ((iemax+it)/2))));  // scaling factor for upper range\n  static const RealScalar eps    = RealScalar(pow(double(ibeta), 1-it));\n  static const RealScalar relerr = sqrt(eps);  // tolerance for neglecting asml\n\n  const Derived& vec(_vec.derived());\n  Index n = vec.size();\n  RealScalar ab2 = b2 / RealScalar(n);\n  RealScalar asml = RealScalar(0);\n  RealScalar amed = RealScalar(0);\n  RealScalar abig = RealScalar(0);\n\n  for(Index j=0; j<vec.outerSize(); ++j)\n  {\n    for(typename Derived::InnerIterator iter(vec, j); iter; ++iter)\n    {\n      RealScalar ax = abs(iter.value());\n      if(ax > ab2)     abig += numext::abs2(ax*s2m);\n      else if(ax < b1) asml += numext::abs2(ax*s1m);\n      else             amed += numext::abs2(ax);\n    }\n  }\n  if(amed!=amed)\n    return amed;  // we got a NaN\n  if(abig > RealScalar(0))\n  {\n    abig = sqrt(abig);\n    if(abig > rbig) // overflow, or *this contains INF values\n      return abig;  // return INF\n    if(amed > RealScalar(0))\n    {\n      abig = abig/s2m;\n      amed = sqrt(amed);\n    }\n    else\n      return abig/s2m;\n  }\n  else if(asml > RealScalar(0))\n  {\n    if (amed > RealScalar(0))\n    {\n      abig = sqrt(amed);\n      amed = sqrt(asml) / s1m;\n    }\n    else\n      return sqrt(asml)/s1m;\n  }\n  else\n    return sqrt(amed);\n  asml = numext::mini(abig, amed);\n  abig = numext::maxi(abig, amed);\n  if(asml <= abig*relerr)\n    return abig;\n  else\n    return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));\n}\n\n} // end namespace internal\n\n/** \\returns the \\em l2 norm of \\c *this avoiding underflow and overflow.\n  * This version use a blockwise two passes algorithm:\n  *  1 - find the absolute largest coefficient \\c s\n  *  2 - compute \\f$ s \\Vert \\frac{*this}{s} \\Vert \\f$ in a standard way\n  *\n  * For architecture/scalar types supporting vectorization, this version\n  * is faster than blueNorm(). Otherwise the blueNorm() is much faster.\n  *\n  * \\sa norm(), blueNorm(), hypotNorm()\n  */\ntemplate<typename Derived>\ninline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\nMatrixBase<Derived>::stableNorm() const\n{\n  return internal::stable_norm_impl(derived());\n}\n\n/** \\returns the \\em l2 norm of \\c *this using the Blue's algorithm.\n  * A Portable Fortran Program to Find the Euclidean Norm of a Vector,\n  * ACM TOMS, Vol 4, Issue 1, 1978.\n  *\n  * For architecture/scalar types without vectorization, this version\n  * is much faster than stableNorm(). Otherwise the stableNorm() is faster.\n  *\n  * \\sa norm(), stableNorm(), hypotNorm()\n  */\ntemplate<typename Derived>\ninline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\nMatrixBase<Derived>::blueNorm() const\n{\n  return internal::blueNorm_impl(*this);\n}\n\n/** \\returns the \\em l2 norm of \\c *this avoiding undeflow and overflow.\n  * This version use a concatenation of hypot() calls, and it is very slow.\n  *\n  * \\sa norm(), stableNorm()\n  */\ntemplate<typename Derived>\ninline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\nMatrixBase<Derived>::hypotNorm() const\n{\n  if(size()==1)\n    return numext::abs(coeff(0,0));\n  else\n    return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_STABLENORM_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/StlIterators.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2018 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STLITERATORS_H\n#define EIGEN_STLITERATORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename IteratorType>\nstruct indexed_based_stl_iterator_traits;\n\ntemplate<typename  Derived>\nclass indexed_based_stl_iterator_base\n{\nprotected:\n  typedef indexed_based_stl_iterator_traits<Derived> traits;\n  typedef typename traits::XprType XprType;\n  typedef indexed_based_stl_iterator_base<typename traits::non_const_iterator> non_const_iterator;\n  typedef indexed_based_stl_iterator_base<typename traits::const_iterator> const_iterator;\n  typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;\n  // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:\n  friend class indexed_based_stl_iterator_base<typename traits::const_iterator>;\n  friend class indexed_based_stl_iterator_base<typename traits::non_const_iterator>;\npublic:\n  typedef Index difference_type;\n  typedef std::random_access_iterator_tag iterator_category;\n\n  indexed_based_stl_iterator_base() EIGEN_NO_THROW : mp_xpr(0), m_index(0) {}\n  indexed_based_stl_iterator_base(XprType& xpr, Index index) EIGEN_NO_THROW : mp_xpr(&xpr), m_index(index) {}\n\n  indexed_based_stl_iterator_base(const non_const_iterator& other) EIGEN_NO_THROW\n    : mp_xpr(other.mp_xpr), m_index(other.m_index)\n  {}\n\n  indexed_based_stl_iterator_base& operator=(const non_const_iterator& other)\n  {\n    mp_xpr = other.mp_xpr;\n    m_index = other.m_index;\n    return *this;\n  }\n\n  Derived& operator++() { ++m_index; return derived(); }\n  Derived& operator--() { --m_index; return derived(); }\n\n  Derived operator++(int) { Derived prev(derived()); operator++(); return prev;}\n  Derived operator--(int) { Derived prev(derived()); operator--(); return prev;}\n\n  friend Derived operator+(const indexed_based_stl_iterator_base& a, Index b) { Derived ret(a.derived()); ret += b; return ret; }\n  friend Derived operator-(const indexed_based_stl_iterator_base& a, Index b) { Derived ret(a.derived()); ret -= b; return ret; }\n  friend Derived operator+(Index a, const indexed_based_stl_iterator_base& b) { Derived ret(b.derived()); ret += a; return ret; }\n  friend Derived operator-(Index a, const indexed_based_stl_iterator_base& b) { Derived ret(b.derived()); ret -= a; return ret; }\n  \n  Derived& operator+=(Index b) { m_index += b; return derived(); }\n  Derived& operator-=(Index b) { m_index -= b; return derived(); }\n\n  difference_type operator-(const indexed_based_stl_iterator_base& other) const\n  {\n    eigen_assert(mp_xpr == other.mp_xpr);\n    return m_index - other.m_index;\n  }\n\n  difference_type operator-(const other_iterator& other) const\n  {\n    eigen_assert(mp_xpr == other.mp_xpr);\n    return m_index - other.m_index;\n  }\n\n  bool operator==(const indexed_based_stl_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index == other.m_index; }\n  bool operator!=(const indexed_based_stl_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index != other.m_index; }\n  bool operator< (const indexed_based_stl_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <  other.m_index; }\n  bool operator<=(const indexed_based_stl_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <= other.m_index; }\n  bool operator> (const indexed_based_stl_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >  other.m_index; }\n  bool operator>=(const indexed_based_stl_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >= other.m_index; }\n\n  bool operator==(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index == other.m_index; }\n  bool operator!=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index != other.m_index; }\n  bool operator< (const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <  other.m_index; }\n  bool operator<=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <= other.m_index; }\n  bool operator> (const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >  other.m_index; }\n  bool operator>=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >= other.m_index; }\n\nprotected:\n\n  Derived& derived() { return static_cast<Derived&>(*this); }\n  const Derived& derived() const { return static_cast<const Derived&>(*this); }\n\n  XprType *mp_xpr;\n  Index m_index;\n};\n\ntemplate<typename  Derived>\nclass indexed_based_stl_reverse_iterator_base\n{\nprotected:\n  typedef indexed_based_stl_iterator_traits<Derived> traits;\n  typedef typename traits::XprType XprType;\n  typedef indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator> non_const_iterator;\n  typedef indexed_based_stl_reverse_iterator_base<typename traits::const_iterator> const_iterator;\n  typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;\n  // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:\n  friend class indexed_based_stl_reverse_iterator_base<typename traits::const_iterator>;\n  friend class indexed_based_stl_reverse_iterator_base<typename traits::non_const_iterator>;\npublic:\n  typedef Index difference_type;\n  typedef std::random_access_iterator_tag iterator_category;\n\n  indexed_based_stl_reverse_iterator_base() : mp_xpr(0), m_index(0) {}\n  indexed_based_stl_reverse_iterator_base(XprType& xpr, Index index) : mp_xpr(&xpr), m_index(index) {}\n\n  indexed_based_stl_reverse_iterator_base(const non_const_iterator& other)\n    : mp_xpr(other.mp_xpr), m_index(other.m_index)\n  {}\n\n  indexed_based_stl_reverse_iterator_base& operator=(const non_const_iterator& other)\n  {\n    mp_xpr = other.mp_xpr;\n    m_index = other.m_index;\n    return *this;\n  }\n\n  Derived& operator++() { --m_index; return derived(); }\n  Derived& operator--() { ++m_index; return derived(); }\n\n  Derived operator++(int) { Derived prev(derived()); operator++(); return prev;}\n  Derived operator--(int) { Derived prev(derived()); operator--(); return prev;}\n\n  friend Derived operator+(const indexed_based_stl_reverse_iterator_base& a, Index b) { Derived ret(a.derived()); ret += b; return ret; }\n  friend Derived operator-(const indexed_based_stl_reverse_iterator_base& a, Index b) { Derived ret(a.derived()); ret -= b; return ret; }\n  friend Derived operator+(Index a, const indexed_based_stl_reverse_iterator_base& b) { Derived ret(b.derived()); ret += a; return ret; }\n  friend Derived operator-(Index a, const indexed_based_stl_reverse_iterator_base& b) { Derived ret(b.derived()); ret -= a; return ret; }\n  \n  Derived& operator+=(Index b) { m_index -= b; return derived(); }\n  Derived& operator-=(Index b) { m_index += b; return derived(); }\n\n  difference_type operator-(const indexed_based_stl_reverse_iterator_base& other) const\n  {\n    eigen_assert(mp_xpr == other.mp_xpr);\n    return other.m_index - m_index;\n  }\n\n  difference_type operator-(const other_iterator& other) const\n  {\n    eigen_assert(mp_xpr == other.mp_xpr);\n    return other.m_index - m_index;\n  }\n\n  bool operator==(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index == other.m_index; }\n  bool operator!=(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index != other.m_index; }\n  bool operator< (const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >  other.m_index; }\n  bool operator<=(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >= other.m_index; }\n  bool operator> (const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <  other.m_index; }\n  bool operator>=(const indexed_based_stl_reverse_iterator_base& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <= other.m_index; }\n\n  bool operator==(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index == other.m_index; }\n  bool operator!=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index != other.m_index; }\n  bool operator< (const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >  other.m_index; }\n  bool operator<=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index >= other.m_index; }\n  bool operator> (const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <  other.m_index; }\n  bool operator>=(const other_iterator& other) const { eigen_assert(mp_xpr == other.mp_xpr); return m_index <= other.m_index; }\n\nprotected:\n\n  Derived& derived() { return static_cast<Derived&>(*this); }\n  const Derived& derived() const { return static_cast<const Derived&>(*this); }\n\n  XprType *mp_xpr;\n  Index m_index;\n};\n\ntemplate<typename XprType>\nclass pointer_based_stl_iterator\n{\n  enum { is_lvalue  = internal::is_lvalue<XprType>::value };\n  typedef pointer_based_stl_iterator<typename internal::remove_const<XprType>::type> non_const_iterator;\n  typedef pointer_based_stl_iterator<typename internal::add_const<XprType>::type> const_iterator;\n  typedef typename internal::conditional<internal::is_const<XprType>::value,non_const_iterator,const_iterator>::type other_iterator;\n  // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class:\n  friend class pointer_based_stl_iterator<typename internal::add_const<XprType>::type>;\n  friend class pointer_based_stl_iterator<typename internal::remove_const<XprType>::type>;\npublic:\n  typedef Index difference_type;\n  typedef typename XprType::Scalar value_type;\n  typedef std::random_access_iterator_tag iterator_category;\n  typedef typename internal::conditional<bool(is_lvalue), value_type*, const value_type*>::type pointer;\n  typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;\n\n\n  pointer_based_stl_iterator() EIGEN_NO_THROW : m_ptr(0) {}\n  pointer_based_stl_iterator(XprType& xpr, Index index) EIGEN_NO_THROW : m_incr(xpr.innerStride())\n  {\n    m_ptr = xpr.data() + index * m_incr.value();\n  }\n\n  pointer_based_stl_iterator(const non_const_iterator& other) EIGEN_NO_THROW\n    : m_ptr(other.m_ptr), m_incr(other.m_incr)\n  {}\n\n  pointer_based_stl_iterator& operator=(const non_const_iterator& other) EIGEN_NO_THROW\n  {\n    m_ptr = other.m_ptr;\n    m_incr.setValue(other.m_incr);\n    return *this;\n  }\n\n  reference operator*()         const { return *m_ptr;   }\n  reference operator[](Index i) const { return *(m_ptr+i*m_incr.value()); }\n  pointer   operator->()        const { return m_ptr;    }\n\n  pointer_based_stl_iterator& operator++() { m_ptr += m_incr.value(); return *this; }\n  pointer_based_stl_iterator& operator--() { m_ptr -= m_incr.value(); return *this; }\n\n  pointer_based_stl_iterator operator++(int) { pointer_based_stl_iterator prev(*this); operator++(); return prev;}\n  pointer_based_stl_iterator operator--(int) { pointer_based_stl_iterator prev(*this); operator--(); return prev;}\n\n  friend pointer_based_stl_iterator operator+(const pointer_based_stl_iterator& a, Index b) { pointer_based_stl_iterator ret(a); ret += b; return ret; }\n  friend pointer_based_stl_iterator operator-(const pointer_based_stl_iterator& a, Index b) { pointer_based_stl_iterator ret(a); ret -= b; return ret; }\n  friend pointer_based_stl_iterator operator+(Index a, const pointer_based_stl_iterator& b) { pointer_based_stl_iterator ret(b); ret += a; return ret; }\n  friend pointer_based_stl_iterator operator-(Index a, const pointer_based_stl_iterator& b) { pointer_based_stl_iterator ret(b); ret -= a; return ret; }\n  \n  pointer_based_stl_iterator& operator+=(Index b) { m_ptr += b*m_incr.value(); return *this; }\n  pointer_based_stl_iterator& operator-=(Index b) { m_ptr -= b*m_incr.value(); return *this; }\n\n  difference_type operator-(const pointer_based_stl_iterator& other) const {\n    return (m_ptr - other.m_ptr)/m_incr.value();\n  }\n\n  difference_type operator-(const other_iterator& other) const {\n    return (m_ptr - other.m_ptr)/m_incr.value();\n  }\n\n  bool operator==(const pointer_based_stl_iterator& other) const { return m_ptr == other.m_ptr; }\n  bool operator!=(const pointer_based_stl_iterator& other) const { return m_ptr != other.m_ptr; }\n  bool operator< (const pointer_based_stl_iterator& other) const { return m_ptr <  other.m_ptr; }\n  bool operator<=(const pointer_based_stl_iterator& other) const { return m_ptr <= other.m_ptr; }\n  bool operator> (const pointer_based_stl_iterator& other) const { return m_ptr >  other.m_ptr; }\n  bool operator>=(const pointer_based_stl_iterator& other) const { return m_ptr >= other.m_ptr; }\n\n  bool operator==(const other_iterator& other) const { return m_ptr == other.m_ptr; }\n  bool operator!=(const other_iterator& other) const { return m_ptr != other.m_ptr; }\n  bool operator< (const other_iterator& other) const { return m_ptr <  other.m_ptr; }\n  bool operator<=(const other_iterator& other) const { return m_ptr <= other.m_ptr; }\n  bool operator> (const other_iterator& other) const { return m_ptr >  other.m_ptr; }\n  bool operator>=(const other_iterator& other) const { return m_ptr >= other.m_ptr; }\n\nprotected:\n\n  pointer m_ptr;\n  internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_incr;\n};\n\ntemplate<typename _XprType>\nstruct indexed_based_stl_iterator_traits<generic_randaccess_stl_iterator<_XprType> >\n{\n  typedef _XprType XprType;\n  typedef generic_randaccess_stl_iterator<typename internal::remove_const<XprType>::type> non_const_iterator;\n  typedef generic_randaccess_stl_iterator<typename internal::add_const<XprType>::type> const_iterator;\n};\n\ntemplate<typename XprType>\nclass generic_randaccess_stl_iterator : public indexed_based_stl_iterator_base<generic_randaccess_stl_iterator<XprType> >\n{\npublic:\n  typedef typename XprType::Scalar value_type;\n\nprotected:\n\n  enum {\n    has_direct_access = (internal::traits<XprType>::Flags & DirectAccessBit) ? 1 : 0,\n    is_lvalue  = internal::is_lvalue<XprType>::value\n  };\n\n  typedef indexed_based_stl_iterator_base<generic_randaccess_stl_iterator> Base;\n  using Base::m_index;\n  using Base::mp_xpr;\n\n  // TODO currently const Transpose/Reshape expressions never returns const references,\n  // so lets return by value too.\n  //typedef typename internal::conditional<bool(has_direct_access), const value_type&, const value_type>::type read_only_ref_t;\n  typedef const value_type read_only_ref_t;\n\npublic:\n  \n  typedef typename internal::conditional<bool(is_lvalue), value_type *, const value_type *>::type pointer;\n  typedef typename internal::conditional<bool(is_lvalue), value_type&, read_only_ref_t>::type reference;\n  \n  generic_randaccess_stl_iterator() : Base() {}\n  generic_randaccess_stl_iterator(XprType& xpr, Index index) : Base(xpr,index) {}\n  generic_randaccess_stl_iterator(const typename Base::non_const_iterator& other) : Base(other) {}\n  using Base::operator=;\n\n  reference operator*()         const { return   (*mp_xpr)(m_index);   }\n  reference operator[](Index i) const { return   (*mp_xpr)(m_index+i); }\n  pointer   operator->()        const { return &((*mp_xpr)(m_index)); }\n};\n\ntemplate<typename _XprType, DirectionType Direction>\nstruct indexed_based_stl_iterator_traits<subvector_stl_iterator<_XprType,Direction> >\n{\n  typedef _XprType XprType;\n  typedef subvector_stl_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;\n  typedef subvector_stl_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;\n};\n\ntemplate<typename XprType, DirectionType Direction>\nclass subvector_stl_iterator : public indexed_based_stl_iterator_base<subvector_stl_iterator<XprType,Direction> >\n{\nprotected:\n\n  enum { is_lvalue  = internal::is_lvalue<XprType>::value };\n\n  typedef indexed_based_stl_iterator_base<subvector_stl_iterator> Base;\n  using Base::m_index;\n  using Base::mp_xpr;\n\n  typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;\n  typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;\n\n\npublic:\n  typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;\n  typedef typename reference::PlainObject value_type;\n\nprivate:\n  class subvector_stl_iterator_ptr\n  {\n  public:\n      subvector_stl_iterator_ptr(const reference &subvector) : m_subvector(subvector) {}\n      reference* operator->() { return &m_subvector; }\n  private:\n      reference m_subvector;\n  };\npublic:\n\n  typedef subvector_stl_iterator_ptr pointer;\n  \n  subvector_stl_iterator() : Base() {}\n  subvector_stl_iterator(XprType& xpr, Index index) : Base(xpr,index) {}\n\n  reference operator*()         const { return (*mp_xpr).template subVector<Direction>(m_index); }\n  reference operator[](Index i) const { return (*mp_xpr).template subVector<Direction>(m_index+i); }\n  pointer   operator->()        const { return (*mp_xpr).template subVector<Direction>(m_index); }\n};\n\ntemplate<typename _XprType, DirectionType Direction>\nstruct indexed_based_stl_iterator_traits<subvector_stl_reverse_iterator<_XprType,Direction> >\n{\n  typedef _XprType XprType;\n  typedef subvector_stl_reverse_iterator<typename internal::remove_const<XprType>::type, Direction> non_const_iterator;\n  typedef subvector_stl_reverse_iterator<typename internal::add_const<XprType>::type, Direction> const_iterator;\n};\n\ntemplate<typename XprType, DirectionType Direction>\nclass subvector_stl_reverse_iterator : public indexed_based_stl_reverse_iterator_base<subvector_stl_reverse_iterator<XprType,Direction> >\n{\nprotected:\n\n  enum { is_lvalue  = internal::is_lvalue<XprType>::value };\n\n  typedef indexed_based_stl_reverse_iterator_base<subvector_stl_reverse_iterator> Base;\n  using Base::m_index;\n  using Base::mp_xpr;\n\n  typedef typename internal::conditional<Direction==Vertical,typename XprType::ColXpr,typename XprType::RowXpr>::type SubVectorType;\n  typedef typename internal::conditional<Direction==Vertical,typename XprType::ConstColXpr,typename XprType::ConstRowXpr>::type ConstSubVectorType;\n\n\npublic:\n  typedef typename internal::conditional<bool(is_lvalue), SubVectorType, ConstSubVectorType>::type reference;\n  typedef typename reference::PlainObject value_type;\n\nprivate:\n  class subvector_stl_reverse_iterator_ptr\n  {\n  public:\n      subvector_stl_reverse_iterator_ptr(const reference &subvector) : m_subvector(subvector) {}\n      reference* operator->() { return &m_subvector; }\n  private:\n      reference m_subvector;\n  };\npublic:\n\n  typedef subvector_stl_reverse_iterator_ptr pointer;\n  \n  subvector_stl_reverse_iterator() : Base() {}\n  subvector_stl_reverse_iterator(XprType& xpr, Index index) : Base(xpr,index) {}\n\n  reference operator*()         const { return (*mp_xpr).template subVector<Direction>(m_index); }\n  reference operator[](Index i) const { return (*mp_xpr).template subVector<Direction>(m_index+i); }\n  pointer   operator->()        const { return (*mp_xpr).template subVector<Direction>(m_index); }\n};\n\n} // namespace internal\n\n\n/** returns an iterator to the first element of the 1D vector or array\n  * \\only_for_vectors\n  * \\sa end(), cbegin()\n  */\ntemplate<typename Derived>\ninline typename DenseBase<Derived>::iterator DenseBase<Derived>::begin()\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  return iterator(derived(), 0);\n}\n\n/** const version of begin() */\ntemplate<typename Derived>\ninline typename DenseBase<Derived>::const_iterator DenseBase<Derived>::begin() const\n{\n  return cbegin();\n}\n\n/** returns a read-only const_iterator to the first element of the 1D vector or array\n  * \\only_for_vectors\n  * \\sa cend(), begin()\n  */\ntemplate<typename Derived>\ninline typename DenseBase<Derived>::const_iterator DenseBase<Derived>::cbegin() const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  return const_iterator(derived(), 0);\n}\n\n/** returns an iterator to the element following the last element of the 1D vector or array\n  * \\only_for_vectors\n  * \\sa begin(), cend()\n  */\ntemplate<typename Derived>\ninline typename DenseBase<Derived>::iterator DenseBase<Derived>::end()\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  return iterator(derived(), size());\n}\n\n/** const version of end() */\ntemplate<typename Derived>\ninline typename DenseBase<Derived>::const_iterator DenseBase<Derived>::end() const\n{\n  return cend();\n}\n\n/** returns a read-only const_iterator to the element following the last element of the 1D vector or array\n  * \\only_for_vectors\n  * \\sa begin(), cend()\n  */\ntemplate<typename Derived>\ninline typename DenseBase<Derived>::const_iterator DenseBase<Derived>::cend() const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  return const_iterator(derived(), size());\n}\n\n} // namespace Eigen\n\n#endif // EIGEN_STLITERATORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Stride.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STRIDE_H\n#define EIGEN_STRIDE_H\n\nnamespace Eigen {\n\n/** \\class Stride\n  * \\ingroup Core_Module\n  *\n  * \\brief Holds strides information for Map\n  *\n  * This class holds the strides information for mapping arrays with strides with class Map.\n  *\n  * It holds two values: the inner stride and the outer stride.\n  *\n  * The inner stride is the pointer increment between two consecutive entries within a given row of a\n  * row-major matrix or within a given column of a column-major matrix.\n  *\n  * The outer stride is the pointer increment between two consecutive rows of a row-major matrix or\n  * between two consecutive columns of a column-major matrix.\n  *\n  * These two values can be passed either at compile-time as template parameters, or at runtime as\n  * arguments to the constructor.\n  *\n  * Indeed, this class takes two template parameters:\n  *  \\tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.\n  *  \\tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.\n  *\n  * Here is an example:\n  * \\include Map_general_stride.cpp\n  * Output: \\verbinclude Map_general_stride.out\n  *\n  * Both strides can be negative, however, a negative stride of -1 cannot be specified at compiletime\n  * because of the ambiguity with Dynamic which is defined to -1 (historically, negative strides were\n  * not allowed).\n  *\n  * \\sa class InnerStride, class OuterStride, \\ref TopicStorageOrders\n  */\ntemplate<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>\nclass Stride\n{\n  public:\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n    enum {\n      InnerStrideAtCompileTime = _InnerStrideAtCompileTime,\n      OuterStrideAtCompileTime = _OuterStrideAtCompileTime\n    };\n\n    /** Default constructor, for use when strides are fixed at compile time */\n    EIGEN_DEVICE_FUNC\n    Stride()\n      : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)\n    {\n      // FIXME: for Eigen 4 we should use DynamicIndex instead of Dynamic.\n      // FIXME: for Eigen 4 we should also unify this API with fix<>\n      eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);\n    }\n\n    /** Constructor allowing to pass the strides at runtime */\n    EIGEN_DEVICE_FUNC\n    Stride(Index outerStride, Index innerStride)\n      : m_outer(outerStride), m_inner(innerStride)\n    {\n    }\n\n    /** Copy constructor */\n    EIGEN_DEVICE_FUNC\n    Stride(const Stride& other)\n      : m_outer(other.outer()), m_inner(other.inner())\n    {}\n\n    /** \\returns the outer stride */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outer() const { return m_outer.value(); }\n    /** \\returns the inner stride */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index inner() const { return m_inner.value(); }\n\n  protected:\n    internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;\n    internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;\n};\n\n/** \\brief Convenience specialization of Stride to specify only an inner stride\n  * See class Map for some examples */\ntemplate<int Value>\nclass InnerStride : public Stride<0, Value>\n{\n    typedef Stride<0, Value> Base;\n  public:\n    EIGEN_DEVICE_FUNC InnerStride() : Base() {}\n    EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code\n};\n\n/** \\brief Convenience specialization of Stride to specify only an outer stride\n  * See class Map for some examples */\ntemplate<int Value>\nclass OuterStride : public Stride<Value, 0>\n{\n    typedef Stride<Value, 0> Base;\n  public:\n    EIGEN_DEVICE_FUNC OuterStride() : Base() {}\n    EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_STRIDE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Swap.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SWAP_H\n#define EIGEN_SWAP_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n// Overload default assignPacket behavior for swapping them\ntemplate<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>\nclass generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>\n : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>\n{\nprotected:\n  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;\n  using Base::m_dst;\n  using Base::m_src;\n  using Base::m_functor;\n  \npublic:\n  typedef typename Base::Scalar Scalar;\n  typedef typename Base::DstXprType DstXprType;\n  typedef swap_assign_op<Scalar> Functor;\n  \n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)\n    : Base(dst, src, func, dstExpr)\n  {}\n  \n  template<int StoreMode, int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)\n  {\n    PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);\n    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));\n    m_dst.template writePacket<StoreMode>(row,col,tmp);\n  }\n  \n  template<int StoreMode, int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE void assignPacket(Index index)\n  {\n    PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);\n    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));\n    m_dst.template writePacket<StoreMode>(index,tmp);\n  }\n  \n  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)\n  template<int StoreMode, int LoadMode, typename PacketType>\n  EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)\n  {\n    Index row = Base::rowIndexByOuterInner(outer, inner); \n    Index col = Base::colIndexByOuterInner(outer, inner);\n    assignPacket<StoreMode,LoadMode,PacketType>(row, col);\n  }\n};\n\n} // namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SWAP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Transpose.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRANSPOSE_H\n#define EIGEN_TRANSPOSE_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename MatrixType>\nstruct traits<Transpose<MatrixType> > : public traits<MatrixType>\n{\n  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;\n  enum {\n    RowsAtCompileTime = MatrixType::ColsAtCompileTime,\n    ColsAtCompileTime = MatrixType::RowsAtCompileTime,\n    MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,\n    Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),\n    Flags1 = Flags0 | FlagsLvalueBit,\n    Flags = Flags1 ^ RowMajorBit,\n    InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,\n    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret\n  };\n};\n}\n\ntemplate<typename MatrixType, typename StorageKind> class TransposeImpl;\n\n/** \\class Transpose\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of the transpose of a matrix\n  *\n  * \\tparam MatrixType the type of the object of which we are taking the transpose\n  *\n  * This class represents an expression of the transpose of a matrix.\n  * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::transpose(), MatrixBase::adjoint()\n  */\ntemplate<typename MatrixType> class Transpose\n  : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>\n{\n  public:\n\n    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;\n\n    typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;\n    EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)\n    typedef typename internal::remove_all<MatrixType>::type NestedExpression;\n\n    EIGEN_DEVICE_FUNC\n    explicit EIGEN_STRONG_INLINE Transpose(MatrixType& matrix) : m_matrix(matrix) {}\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const typename internal::remove_all<MatrixTypeNested>::type&\n    nestedExpression() const { return m_matrix; }\n\n    /** \\returns the nested expression */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    typename internal::remove_reference<MatrixTypeNested>::type&\n    nestedExpression() { return m_matrix; }\n\n    /** \\internal */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void resize(Index nrows, Index ncols) {\n      m_matrix.resize(ncols,nrows);\n    }\n\n  protected:\n    typename internal::ref_selector<MatrixType>::non_const_type m_matrix;\n};\n\nnamespace internal {\n\ntemplate<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>\nstruct TransposeImpl_base\n{\n  typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;\n};\n\ntemplate<typename MatrixType>\nstruct TransposeImpl_base<MatrixType, false>\n{\n  typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;\n};\n\n} // end namespace internal\n\n// Generic API dispatcher\ntemplate<typename XprType, typename StorageKind>\nclass TransposeImpl\n  : public internal::generic_xpr_base<Transpose<XprType> >::type\n{\npublic:\n  typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;\n};\n\ntemplate<typename MatrixType> class TransposeImpl<MatrixType,Dense>\n  : public internal::TransposeImpl_base<MatrixType>::type\n{\n  public:\n\n    typedef typename internal::TransposeImpl_base<MatrixType>::type Base;\n    using Base::coeffRef;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Index innerStride() const { return derived().nestedExpression().innerStride(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    Index outerStride() const { return derived().nestedExpression().outerStride(); }\n\n    typedef typename internal::conditional<\n                       internal::is_lvalue<MatrixType>::value,\n                       Scalar,\n                       const Scalar\n                     >::type ScalarWithConstIfNotLvalue;\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const Scalar* data() const { return derived().nestedExpression().data(); }\n\n    // FIXME: shall we keep the const version of coeffRef?\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const Scalar& coeffRef(Index rowId, Index colId) const\n    {\n      return derived().nestedExpression().coeffRef(colId, rowId);\n    }\n\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    const Scalar& coeffRef(Index index) const\n    {\n      return derived().nestedExpression().coeffRef(index);\n    }\n  protected:\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TransposeImpl)\n};\n\n/** \\returns an expression of the transpose of *this.\n  *\n  * Example: \\include MatrixBase_transpose.cpp\n  * Output: \\verbinclude MatrixBase_transpose.out\n  *\n  * \\warning If you want to replace a matrix by its own transpose, do \\b NOT do this:\n  * \\code\n  * m = m.transpose(); // bug!!! caused by aliasing effect\n  * \\endcode\n  * Instead, use the transposeInPlace() method:\n  * \\code\n  * m.transposeInPlace();\n  * \\endcode\n  * which gives Eigen good opportunities for optimization, or alternatively you can also do:\n  * \\code\n  * m = m.transpose().eval();\n  * \\endcode\n  *\n  * \\sa transposeInPlace(), adjoint() */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nTranspose<Derived>\nDenseBase<Derived>::transpose()\n{\n  return TransposeReturnType(derived());\n}\n\n/** This is the const version of transpose().\n  *\n  * Make sure you read the warning for transpose() !\n  *\n  * \\sa transposeInPlace(), adjoint() */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename DenseBase<Derived>::ConstTransposeReturnType\nDenseBase<Derived>::transpose() const\n{\n  return ConstTransposeReturnType(derived());\n}\n\n/** \\returns an expression of the adjoint (i.e. conjugate transpose) of *this.\n  *\n  * Example: \\include MatrixBase_adjoint.cpp\n  * Output: \\verbinclude MatrixBase_adjoint.out\n  *\n  * \\warning If you want to replace a matrix by its own adjoint, do \\b NOT do this:\n  * \\code\n  * m = m.adjoint(); // bug!!! caused by aliasing effect\n  * \\endcode\n  * Instead, use the adjointInPlace() method:\n  * \\code\n  * m.adjointInPlace();\n  * \\endcode\n  * which gives Eigen good opportunities for optimization, or alternatively you can also do:\n  * \\code\n  * m = m.adjoint().eval();\n  * \\endcode\n  *\n  * \\sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::AdjointReturnType\nMatrixBase<Derived>::adjoint() const\n{\n  return AdjointReturnType(this->transpose());\n}\n\n/***************************************************************************\n* \"in place\" transpose implementation\n***************************************************************************/\n\nnamespace internal {\n\ntemplate<typename MatrixType,\n  bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,\n  bool MatchPacketSize =\n        (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))\n    &&  (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >\nstruct inplace_transpose_selector;\n\ntemplate<typename MatrixType>\nstruct inplace_transpose_selector<MatrixType,true,false> { // square matrix\n  static void run(MatrixType& m) {\n    m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose().template triangularView<StrictlyUpper>());\n  }\n};\n\ntemplate<typename MatrixType>\nstruct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize\n  static void run(MatrixType& m) {\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;\n    const Index PacketSize = internal::packet_traits<Scalar>::size;\n    const Index Alignment = internal::evaluator<MatrixType>::Alignment;\n    PacketBlock<Packet> A;\n    for (Index i=0; i<PacketSize; ++i)\n      A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);\n    internal::ptranspose(A);\n    for (Index i=0; i<PacketSize; ++i)\n      m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);\n  }\n};\n\n\ntemplate <typename MatrixType, Index Alignment>\nvoid BlockedInPlaceTranspose(MatrixType& m) {\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;\n  const Index PacketSize = internal::packet_traits<Scalar>::size;\n  eigen_assert(m.rows() == m.cols());\n  int row_start = 0;\n  for (; row_start + PacketSize <= m.rows(); row_start += PacketSize) {\n    for (int col_start = row_start; col_start + PacketSize <= m.cols(); col_start += PacketSize) {\n      PacketBlock<Packet> A;\n      if (row_start == col_start) {\n        for (Index i=0; i<PacketSize; ++i)\n          A.packet[i] = m.template packetByOuterInner<Alignment>(row_start + i,col_start);\n        internal::ptranspose(A);\n        for (Index i=0; i<PacketSize; ++i)\n          m.template writePacket<Alignment>(m.rowIndexByOuterInner(row_start + i, col_start), m.colIndexByOuterInner(row_start + i,col_start), A.packet[i]);\n      } else {\n        PacketBlock<Packet> B;\n        for (Index i=0; i<PacketSize; ++i) {\n          A.packet[i] = m.template packetByOuterInner<Alignment>(row_start + i,col_start);\n          B.packet[i] = m.template packetByOuterInner<Alignment>(col_start + i, row_start);\n        }\n        internal::ptranspose(A);\n        internal::ptranspose(B);\n        for (Index i=0; i<PacketSize; ++i) {\n          m.template writePacket<Alignment>(m.rowIndexByOuterInner(row_start + i, col_start), m.colIndexByOuterInner(row_start + i,col_start), B.packet[i]);\n          m.template writePacket<Alignment>(m.rowIndexByOuterInner(col_start + i, row_start), m.colIndexByOuterInner(col_start + i,row_start), A.packet[i]);\n        }\n      }\n    }\n  }\n  for (Index row = row_start; row < m.rows(); ++row) {\n    m.matrix().row(row).head(row).swap(\n        m.matrix().col(row).head(row).transpose());\n  }\n}\n\ntemplate<typename MatrixType,bool MatchPacketSize>\nstruct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square or dynamic matrix\n  static void run(MatrixType& m) {\n    typedef typename MatrixType::Scalar Scalar;\n    if (m.rows() == m.cols()) {\n      const Index PacketSize = internal::packet_traits<Scalar>::size;\n      if (!NumTraits<Scalar>::IsComplex && m.rows() >= PacketSize) {\n        if ((m.rows() % PacketSize) == 0)\n          BlockedInPlaceTranspose<MatrixType,internal::evaluator<MatrixType>::Alignment>(m);\n        else\n          BlockedInPlaceTranspose<MatrixType,Unaligned>(m);\n      }\n      else {\n        m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose().template triangularView<StrictlyUpper>());\n      }\n    } else {\n      m = m.transpose().eval();\n    }\n  }\n};\n\n\n} // end namespace internal\n\n/** This is the \"in place\" version of transpose(): it replaces \\c *this by its own transpose.\n  * Thus, doing\n  * \\code\n  * m.transposeInPlace();\n  * \\endcode\n  * has the same effect on m as doing\n  * \\code\n  * m = m.transpose().eval();\n  * \\endcode\n  * and is faster and also safer because in the latter line of code, forgetting the eval() results\n  * in a bug caused by \\ref TopicAliasing \"aliasing\".\n  *\n  * Notice however that this method is only useful if you want to replace a matrix by its own transpose.\n  * If you just need the transpose of a matrix, use transpose().\n  *\n  * \\note if the matrix is not square, then \\c *this must be a resizable matrix.\n  * This excludes (non-square) fixed-size matrices, block-expressions and maps.\n  *\n  * \\sa transpose(), adjoint(), adjointInPlace() */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace()\n{\n  eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))\n               && \"transposeInPlace() called on a non-square non-resizable matrix\");\n  internal::inplace_transpose_selector<Derived>::run(derived());\n}\n\n/***************************************************************************\n* \"in place\" adjoint implementation\n***************************************************************************/\n\n/** This is the \"in place\" version of adjoint(): it replaces \\c *this by its own transpose.\n  * Thus, doing\n  * \\code\n  * m.adjointInPlace();\n  * \\endcode\n  * has the same effect on m as doing\n  * \\code\n  * m = m.adjoint().eval();\n  * \\endcode\n  * and is faster and also safer because in the latter line of code, forgetting the eval() results\n  * in a bug caused by aliasing.\n  *\n  * Notice however that this method is only useful if you want to replace a matrix by its own adjoint.\n  * If you just need the adjoint of a matrix, use adjoint().\n  *\n  * \\note if the matrix is not square, then \\c *this must be a resizable matrix.\n  * This excludes (non-square) fixed-size matrices, block-expressions and maps.\n  *\n  * \\sa transpose(), adjoint(), transposeInPlace() */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::adjointInPlace()\n{\n  derived() = adjoint().eval();\n}\n\n#ifndef EIGEN_NO_DEBUG\n\n// The following is to detect aliasing problems in most common cases.\n\nnamespace internal {\n\ntemplate<bool DestIsTransposed, typename OtherDerived>\nstruct check_transpose_aliasing_compile_time_selector\n{\n  enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };\n};\n\ntemplate<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>\nstruct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >\n{\n  enum { ret =    bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed\n               || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed\n  };\n};\n\ntemplate<typename Scalar, bool DestIsTransposed, typename OtherDerived>\nstruct check_transpose_aliasing_run_time_selector\n{\n  static bool run(const Scalar* dest, const OtherDerived& src)\n  {\n    return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));\n  }\n};\n\ntemplate<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>\nstruct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >\n{\n  static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)\n  {\n    return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))\n        || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));\n  }\n};\n\n// the following selector, checkTransposeAliasing_impl, based on MightHaveTransposeAliasing,\n// is because when the condition controlling the assert is known at compile time, ICC emits a warning.\n// This is actually a good warning: in expressions that don't have any transposing, the condition is\n// known at compile time to be false, and using that, we can avoid generating the code of the assert again\n// and again for all these expressions that don't need it.\n\ntemplate<typename Derived, typename OtherDerived,\n         bool MightHaveTransposeAliasing\n                 = check_transpose_aliasing_compile_time_selector\n                     <blas_traits<Derived>::IsTransposed,OtherDerived>::ret\n        >\nstruct checkTransposeAliasing_impl\n{\n    static void run(const Derived& dst, const OtherDerived& other)\n    {\n        eigen_assert((!check_transpose_aliasing_run_time_selector\n                      <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>\n                      ::run(extract_data(dst), other))\n          && \"aliasing detected during transposition, use transposeInPlace() \"\n             \"or evaluate the rhs into a temporary using .eval()\");\n\n    }\n};\n\ntemplate<typename Derived, typename OtherDerived>\nstruct checkTransposeAliasing_impl<Derived, OtherDerived, false>\n{\n    static void run(const Derived&, const OtherDerived&)\n    {\n    }\n};\n\ntemplate<typename Dst, typename Src>\nvoid check_for_aliasing(const Dst &dst, const Src &src)\n{\n  if((!Dst::IsVectorAtCompileTime) && dst.rows()>1 && dst.cols()>1)\n    internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);\n}\n\n} // end namespace internal\n\n#endif // EIGEN_NO_DEBUG\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRANSPOSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Transpositions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRANSPOSITIONS_H\n#define EIGEN_TRANSPOSITIONS_H\n\nnamespace Eigen {\n\ntemplate<typename Derived>\nclass TranspositionsBase\n{\n    typedef internal::traits<Derived> Traits;\n\n  public:\n\n    typedef typename Traits::IndicesType IndicesType;\n    typedef typename IndicesType::Scalar StorageIndex;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    EIGEN_DEVICE_FUNC\n    Derived& derived() { return *static_cast<Derived*>(this); }\n    EIGEN_DEVICE_FUNC\n    const Derived& derived() const { return *static_cast<const Derived*>(this); }\n\n    /** Copies the \\a other transpositions into \\c *this */\n    template<typename OtherDerived>\n    Derived& operator=(const TranspositionsBase<OtherDerived>& other)\n    {\n      indices() = other.indices();\n      return derived();\n    }\n\n    /** \\returns the number of transpositions */\n    EIGEN_DEVICE_FUNC\n    Index size() const { return indices().size(); }\n    /** \\returns the number of rows of the equivalent permutation matrix */\n    EIGEN_DEVICE_FUNC\n    Index rows() const { return indices().size(); }\n    /** \\returns the number of columns of the equivalent permutation matrix */\n    EIGEN_DEVICE_FUNC\n    Index cols() const { return indices().size(); }\n\n    /** Direct access to the underlying index vector */\n    EIGEN_DEVICE_FUNC\n    inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }\n    /** Direct access to the underlying index vector */\n    inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }\n    /** Direct access to the underlying index vector */\n    inline const StorageIndex& operator()(Index i) const { return indices()(i); }\n    /** Direct access to the underlying index vector */\n    inline StorageIndex& operator()(Index i) { return indices()(i); }\n    /** Direct access to the underlying index vector */\n    inline const StorageIndex& operator[](Index i) const { return indices()(i); }\n    /** Direct access to the underlying index vector */\n    inline StorageIndex& operator[](Index i) { return indices()(i); }\n\n    /** const version of indices(). */\n    EIGEN_DEVICE_FUNC\n    const IndicesType& indices() const { return derived().indices(); }\n    /** \\returns a reference to the stored array representing the transpositions. */\n    EIGEN_DEVICE_FUNC\n    IndicesType& indices() { return derived().indices(); }\n\n    /** Resizes to given size. */\n    inline void resize(Index newSize)\n    {\n      indices().resize(newSize);\n    }\n\n    /** Sets \\c *this to represents an identity transformation */\n    void setIdentity()\n    {\n      for(StorageIndex i = 0; i < indices().size(); ++i)\n        coeffRef(i) = i;\n    }\n\n    // FIXME: do we want such methods ?\n    // might be useful when the target matrix expression is complex, e.g.:\n    // object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..);\n    /*\n    template<typename MatrixType>\n    void applyForwardToRows(MatrixType& mat) const\n    {\n      for(Index k=0 ; k<size() ; ++k)\n        if(m_indices(k)!=k)\n          mat.row(k).swap(mat.row(m_indices(k)));\n    }\n\n    template<typename MatrixType>\n    void applyBackwardToRows(MatrixType& mat) const\n    {\n      for(Index k=size()-1 ; k>=0 ; --k)\n        if(m_indices(k)!=k)\n          mat.row(k).swap(mat.row(m_indices(k)));\n    }\n    */\n\n    /** \\returns the inverse transformation */\n    inline Transpose<TranspositionsBase> inverse() const\n    { return Transpose<TranspositionsBase>(derived()); }\n\n    /** \\returns the tranpose transformation */\n    inline Transpose<TranspositionsBase> transpose() const\n    { return Transpose<TranspositionsBase>(derived()); }\n\n  protected:\n};\n\nnamespace internal {\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>\nstruct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >\n : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >\n{\n  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;\n  typedef TranspositionsStorage StorageKind;\n};\n}\n\n/** \\class Transpositions\n  * \\ingroup Core_Module\n  *\n  * \\brief Represents a sequence of transpositions (row/column interchange)\n  *\n  * \\tparam SizeAtCompileTime the number of transpositions, or Dynamic\n  * \\tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.\n  *\n  * This class represents a permutation transformation as a sequence of \\em n transpositions\n  * \\f$[T_{n-1} \\ldots T_{i} \\ldots T_{0}]\\f$. It is internally stored as a vector of integers \\c indices.\n  * Each transposition \\f$ T_{i} \\f$ applied on the left of a matrix (\\f$ T_{i} M\\f$) interchanges\n  * the rows \\c i and \\c indices[i] of the matrix \\c M.\n  * A transposition applied on the right (e.g., \\f$ M T_{i}\\f$) yields a column interchange.\n  *\n  * Compared to the class PermutationMatrix, such a sequence of transpositions is what is\n  * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.\n  *\n  * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:\n  * \\code\n  * Transpositions tr;\n  * MatrixXf mat;\n  * mat = tr * mat;\n  * \\endcode\n  * In this example, we detect that the matrix appears on both side, and so the transpositions\n  * are applied in-place without any temporary or extra copy.\n  *\n  * \\sa class PermutationMatrix\n  */\n\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>\nclass Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >\n{\n    typedef internal::traits<Transpositions> Traits;\n  public:\n\n    typedef TranspositionsBase<Transpositions> Base;\n    typedef typename Traits::IndicesType IndicesType;\n    typedef typename IndicesType::Scalar StorageIndex;\n\n    inline Transpositions() {}\n\n    /** Copy constructor. */\n    template<typename OtherDerived>\n    inline Transpositions(const TranspositionsBase<OtherDerived>& other)\n      : m_indices(other.indices()) {}\n\n    /** Generic constructor from expression of the transposition indices. */\n    template<typename Other>\n    explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)\n    {}\n\n    /** Copies the \\a other transpositions into \\c *this */\n    template<typename OtherDerived>\n    Transpositions& operator=(const TranspositionsBase<OtherDerived>& other)\n    {\n      return Base::operator=(other);\n    }\n\n    /** Constructs an uninitialized permutation matrix of given size.\n      */\n    inline Transpositions(Index size) : m_indices(size)\n    {}\n\n    /** const version of indices(). */\n    EIGEN_DEVICE_FUNC\n    const IndicesType& indices() const { return m_indices; }\n    /** \\returns a reference to the stored array representing the transpositions. */\n    EIGEN_DEVICE_FUNC\n    IndicesType& indices() { return m_indices; }\n\n  protected:\n\n    IndicesType m_indices;\n};\n\n\nnamespace internal {\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>\nstruct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >\n : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >\n{\n  typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;\n  typedef _StorageIndex StorageIndex;\n  typedef TranspositionsStorage StorageKind;\n};\n}\n\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>\nclass Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>\n : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >\n{\n    typedef internal::traits<Map> Traits;\n  public:\n\n    typedef TranspositionsBase<Map> Base;\n    typedef typename Traits::IndicesType IndicesType;\n    typedef typename IndicesType::Scalar StorageIndex;\n\n    explicit inline Map(const StorageIndex* indicesPtr)\n      : m_indices(indicesPtr)\n    {}\n\n    inline Map(const StorageIndex* indicesPtr, Index size)\n      : m_indices(indicesPtr,size)\n    {}\n\n    /** Copies the \\a other transpositions into \\c *this */\n    template<typename OtherDerived>\n    Map& operator=(const TranspositionsBase<OtherDerived>& other)\n    {\n      return Base::operator=(other);\n    }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** This is a special case of the templated operator=. Its purpose is to\n      * prevent a default operator= from hiding the templated operator=.\n      */\n    Map& operator=(const Map& other)\n    {\n      m_indices = other.m_indices;\n      return *this;\n    }\n    #endif\n\n    /** const version of indices(). */\n    EIGEN_DEVICE_FUNC\n    const IndicesType& indices() const { return m_indices; }\n\n    /** \\returns a reference to the stored array representing the transpositions. */\n    EIGEN_DEVICE_FUNC\n    IndicesType& indices() { return m_indices; }\n\n  protected:\n\n    IndicesType m_indices;\n};\n\nnamespace internal {\ntemplate<typename _IndicesType>\nstruct traits<TranspositionsWrapper<_IndicesType> >\n : traits<PermutationWrapper<_IndicesType> >\n{\n  typedef TranspositionsStorage StorageKind;\n};\n}\n\ntemplate<typename _IndicesType>\nclass TranspositionsWrapper\n : public TranspositionsBase<TranspositionsWrapper<_IndicesType> >\n{\n    typedef internal::traits<TranspositionsWrapper> Traits;\n  public:\n\n    typedef TranspositionsBase<TranspositionsWrapper> Base;\n    typedef typename Traits::IndicesType IndicesType;\n    typedef typename IndicesType::Scalar StorageIndex;\n\n    explicit inline TranspositionsWrapper(IndicesType& indices)\n      : m_indices(indices)\n    {}\n\n    /** Copies the \\a other transpositions into \\c *this */\n    template<typename OtherDerived>\n    TranspositionsWrapper& operator=(const TranspositionsBase<OtherDerived>& other)\n    {\n      return Base::operator=(other);\n    }\n\n    /** const version of indices(). */\n    EIGEN_DEVICE_FUNC\n    const IndicesType& indices() const { return m_indices; }\n\n    /** \\returns a reference to the stored array representing the transpositions. */\n    EIGEN_DEVICE_FUNC\n    IndicesType& indices() { return m_indices; }\n\n  protected:\n\n    typename IndicesType::Nested m_indices;\n};\n\n\n\n/** \\returns the \\a matrix with the \\a transpositions applied to the columns.\n  */\ntemplate<typename MatrixDerived, typename TranspositionsDerived>\nEIGEN_DEVICE_FUNC\nconst Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>\noperator*(const MatrixBase<MatrixDerived> &matrix,\n          const TranspositionsBase<TranspositionsDerived>& transpositions)\n{\n  return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>\n            (matrix.derived(), transpositions.derived());\n}\n\n/** \\returns the \\a matrix with the \\a transpositions applied to the rows.\n  */\ntemplate<typename TranspositionsDerived, typename MatrixDerived>\nEIGEN_DEVICE_FUNC\nconst Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>\noperator*(const TranspositionsBase<TranspositionsDerived> &transpositions,\n          const MatrixBase<MatrixDerived>& matrix)\n{\n  return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>\n            (transpositions.derived(), matrix.derived());\n}\n\n// Template partial specialization for transposed/inverse transpositions\n\nnamespace internal {\n\ntemplate<typename Derived>\nstruct traits<Transpose<TranspositionsBase<Derived> > >\n : traits<Derived>\n{};\n\n} // end namespace internal\n\ntemplate<typename TranspositionsDerived>\nclass Transpose<TranspositionsBase<TranspositionsDerived> >\n{\n    typedef TranspositionsDerived TranspositionType;\n    typedef typename TranspositionType::IndicesType IndicesType;\n  public:\n\n    explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index size() const EIGEN_NOEXCEPT { return m_transpositions.size(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return m_transpositions.size(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return m_transpositions.size(); }\n\n    /** \\returns the \\a matrix with the inverse transpositions applied to the columns.\n      */\n    template<typename OtherDerived> friend\n    const Product<OtherDerived, Transpose, AliasFreeProduct>\n    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)\n    {\n      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);\n    }\n\n    /** \\returns the \\a matrix with the inverse transpositions applied to the rows.\n      */\n    template<typename OtherDerived>\n    const Product<Transpose, OtherDerived, AliasFreeProduct>\n    operator*(const MatrixBase<OtherDerived>& matrix) const\n    {\n      return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());\n    }\n\n    EIGEN_DEVICE_FUNC\n    const TranspositionType& nestedExpression() const { return m_transpositions; }\n\n  protected:\n    const TranspositionType& m_transpositions;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRANSPOSITIONS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/TriangularMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRIANGULARMATRIX_H\n#define EIGEN_TRIANGULARMATRIX_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;\n\n}\n\n/** \\class TriangularBase\n  * \\ingroup Core_Module\n  *\n  * \\brief Base class for triangular part in a matrix\n  */\ntemplate<typename Derived> class TriangularBase : public EigenBase<Derived>\n{\n  public:\n\n    enum {\n      Mode = internal::traits<Derived>::Mode,\n      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,\n      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,\n\n      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,\n                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),\n      /**< This is equal to the number of coefficients, i.e. the number of\n          * rows times the number of columns, or to \\a Dynamic if this is not\n          * known at compile-time. \\sa RowsAtCompileTime, ColsAtCompileTime */\n\n      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,\n                                                   internal::traits<Derived>::MaxColsAtCompileTime>::ret)\n\n    };\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;\n    typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType;\n    typedef DenseMatrixType DenseType;\n    typedef Derived const& Nested;\n\n    EIGEN_DEVICE_FUNC\n    inline TriangularBase() { eigen_assert(!((int(Mode) & int(UnitDiag)) && (int(Mode) & int(ZeroDiag)))); }\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return derived().rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return derived().cols(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index outerStride() const EIGEN_NOEXCEPT { return derived().outerStride(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index innerStride() const EIGEN_NOEXCEPT { return derived().innerStride(); }\n\n    // dummy resize function\n    EIGEN_DEVICE_FUNC\n    void resize(Index rows, Index cols)\n    {\n      EIGEN_UNUSED_VARIABLE(rows);\n      EIGEN_UNUSED_VARIABLE(cols);\n      eigen_assert(rows==this->rows() && cols==this->cols());\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline Scalar coeff(Index row, Index col) const  { return derived().coeff(row,col); }\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }\n\n    /** \\see MatrixBase::copyCoeff(row,col)\n      */\n    template<typename Other>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)\n    {\n      derived().coeffRef(row, col) = other.coeff(row, col);\n    }\n\n    EIGEN_DEVICE_FUNC\n    inline Scalar operator()(Index row, Index col) const\n    {\n      check_coordinates(row, col);\n      return coeff(row,col);\n    }\n    EIGEN_DEVICE_FUNC\n    inline Scalar& operator()(Index row, Index col)\n    {\n      check_coordinates(row, col);\n      return coeffRef(row,col);\n    }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    EIGEN_DEVICE_FUNC\n    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }\n    EIGEN_DEVICE_FUNC\n    inline Derived& derived() { return *static_cast<Derived*>(this); }\n    #endif // not EIGEN_PARSED_BY_DOXYGEN\n\n    template<typename DenseDerived>\n    EIGEN_DEVICE_FUNC\n    void evalTo(MatrixBase<DenseDerived> &other) const;\n    template<typename DenseDerived>\n    EIGEN_DEVICE_FUNC\n    void evalToLazy(MatrixBase<DenseDerived> &other) const;\n\n    EIGEN_DEVICE_FUNC\n    DenseMatrixType toDenseMatrix() const\n    {\n      DenseMatrixType res(rows(), cols());\n      evalToLazy(res);\n      return res;\n    }\n\n  protected:\n\n    void check_coordinates(Index row, Index col) const\n    {\n      EIGEN_ONLY_USED_FOR_DEBUG(row);\n      EIGEN_ONLY_USED_FOR_DEBUG(col);\n      eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());\n      const int mode = int(Mode) & ~SelfAdjoint;\n      EIGEN_ONLY_USED_FOR_DEBUG(mode);\n      eigen_assert((mode==Upper && col>=row)\n                || (mode==Lower && col<=row)\n                || ((mode==StrictlyUpper || mode==UnitUpper) && col>row)\n                || ((mode==StrictlyLower || mode==UnitLower) && col<row));\n    }\n\n    #ifdef EIGEN_INTERNAL_DEBUGGING\n    void check_coordinates_internal(Index row, Index col) const\n    {\n      check_coordinates(row, col);\n    }\n    #else\n    void check_coordinates_internal(Index , Index ) const {}\n    #endif\n\n};\n\n/** \\class TriangularView\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a triangular part in a matrix\n  *\n  * \\param MatrixType the type of the object in which we are taking the triangular part\n  * \\param Mode the kind of triangular matrix expression to construct. Can be #Upper,\n  *             #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.\n  *             This is in fact a bit field; it must have either #Upper or #Lower,\n  *             and additionally it may have #UnitDiag or #ZeroDiag or neither.\n  *\n  * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular\n  * matrices one should speak of \"trapezoid\" parts. This class is the return type\n  * of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::triangularView()\n  */\nnamespace internal {\ntemplate<typename MatrixType, unsigned int _Mode>\nstruct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>\n{\n  typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;\n  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;\n  typedef typename MatrixType::PlainObject FullMatrixType;\n  typedef MatrixType ExpressionType;\n  enum {\n    Mode = _Mode,\n    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,\n    Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))\n  };\n};\n}\n\ntemplate<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;\n\ntemplate<typename _MatrixType, unsigned int _Mode> class TriangularView\n  : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >\n{\n  public:\n\n    typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;\n    typedef typename internal::traits<TriangularView>::Scalar Scalar;\n    typedef _MatrixType MatrixType;\n\n  protected:\n    typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;\n    typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;\n\n    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;\n    typedef TriangularView<typename internal::add_const<MatrixType>::type, _Mode> ConstTriangularView;\n\n  public:\n\n    typedef typename internal::traits<TriangularView>::StorageKind StorageKind;\n    typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;\n\n    enum {\n      Mode = _Mode,\n      Flags = internal::traits<TriangularView>::Flags,\n      TransposeMode = (Mode & Upper ? Lower : 0)\n                    | (Mode & Lower ? Upper : 0)\n                    | (Mode & (UnitDiag))\n                    | (Mode & (ZeroDiag)),\n      IsVectorAtCompileTime = false\n    };\n\n    EIGEN_DEVICE_FUNC\n    explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)\n    {}\n\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView)\n\n    /** \\copydoc EigenBase::rows() */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    /** \\copydoc EigenBase::cols() */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n    /** \\returns a const reference to the nested expression */\n    EIGEN_DEVICE_FUNC\n    const NestedExpression& nestedExpression() const { return m_matrix; }\n\n    /** \\returns a reference to the nested expression */\n    EIGEN_DEVICE_FUNC\n    NestedExpression& nestedExpression() { return m_matrix; }\n\n    typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;\n    /** \\sa MatrixBase::conjugate() const */\n    EIGEN_DEVICE_FUNC\n    inline const ConjugateReturnType conjugate() const\n    { return ConjugateReturnType(m_matrix.conjugate()); }\n\n    /** \\returns an expression of the complex conjugate of \\c *this if Cond==true,\n     *           returns \\c *this otherwise.\n     */\n    template<bool Cond>\n    EIGEN_DEVICE_FUNC\n    inline typename internal::conditional<Cond,ConjugateReturnType,ConstTriangularView>::type\n    conjugateIf() const\n    {\n      typedef typename internal::conditional<Cond,ConjugateReturnType,ConstTriangularView>::type ReturnType;\n      return ReturnType(m_matrix.template conjugateIf<Cond>());\n    }\n\n    typedef TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;\n    /** \\sa MatrixBase::adjoint() const */\n    EIGEN_DEVICE_FUNC\n    inline const AdjointReturnType adjoint() const\n    { return AdjointReturnType(m_matrix.adjoint()); }\n\n    typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;\n     /** \\sa MatrixBase::transpose() */\n    EIGEN_DEVICE_FUNC\n    inline TransposeReturnType transpose()\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)\n      typename MatrixType::TransposeReturnType tmp(m_matrix);\n      return TransposeReturnType(tmp);\n    }\n\n    typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;\n    /** \\sa MatrixBase::transpose() const */\n    EIGEN_DEVICE_FUNC\n    inline const ConstTransposeReturnType transpose() const\n    {\n      return ConstTransposeReturnType(m_matrix.transpose());\n    }\n\n    template<typename Other>\n    EIGEN_DEVICE_FUNC\n    inline const Solve<TriangularView, Other>\n    solve(const MatrixBase<Other>& other) const\n    { return Solve<TriangularView, Other>(*this, other.derived()); }\n\n  // workaround MSVC ICE\n  #if EIGEN_COMP_MSVC\n    template<int Side, typename Other>\n    EIGEN_DEVICE_FUNC\n    inline const internal::triangular_solve_retval<Side,TriangularView, Other>\n    solve(const MatrixBase<Other>& other) const\n    { return Base::template solve<Side>(other); }\n  #else\n    using Base::solve;\n  #endif\n\n    /** \\returns a selfadjoint view of the referenced triangular part which must be either \\c #Upper or \\c #Lower.\n      *\n      * This is a shortcut for \\code this->nestedExpression().selfadjointView<(*this)::Mode>() \\endcode\n      * \\sa MatrixBase::selfadjointView() */\n    EIGEN_DEVICE_FUNC\n    SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()\n    {\n      EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);\n      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);\n    }\n\n    /** This is the const version of selfadjointView() */\n    EIGEN_DEVICE_FUNC\n    const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const\n    {\n      EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);\n      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);\n    }\n\n\n    /** \\returns the determinant of the triangular matrix\n      * \\sa MatrixBase::determinant() */\n    EIGEN_DEVICE_FUNC\n    Scalar determinant() const\n    {\n      if (Mode & UnitDiag)\n        return 1;\n      else if (Mode & ZeroDiag)\n        return 0;\n      else\n        return m_matrix.diagonal().prod();\n    }\n\n  protected:\n\n    MatrixTypeNested m_matrix;\n};\n\n/** \\ingroup Core_Module\n  *\n  * \\brief Base class for a triangular part in a \\b dense matrix\n  *\n  * This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.\n  * It extends class TriangularView with additional methods which available for dense expressions only.\n  *\n  * \\sa class TriangularView, MatrixBase::triangularView()\n  */\ntemplate<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>\n  : public TriangularBase<TriangularView<_MatrixType, _Mode> >\n{\n  public:\n\n    typedef TriangularView<_MatrixType, _Mode> TriangularViewType;\n    typedef TriangularBase<TriangularViewType> Base;\n    typedef typename internal::traits<TriangularViewType>::Scalar Scalar;\n\n    typedef _MatrixType MatrixType;\n    typedef typename MatrixType::PlainObject DenseMatrixType;\n    typedef DenseMatrixType PlainObject;\n\n  public:\n    using Base::evalToLazy;\n    using Base::derived;\n\n    typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;\n\n    enum {\n      Mode = _Mode,\n      Flags = internal::traits<TriangularViewType>::Flags\n    };\n\n    /** \\returns the outer-stride of the underlying dense matrix\n      * \\sa DenseCoeffsBase::outerStride() */\n    EIGEN_DEVICE_FUNC\n    inline Index outerStride() const { return derived().nestedExpression().outerStride(); }\n    /** \\returns the inner-stride of the underlying dense matrix\n      * \\sa DenseCoeffsBase::innerStride() */\n    EIGEN_DEVICE_FUNC\n    inline Index innerStride() const { return derived().nestedExpression().innerStride(); }\n\n    /** \\sa MatrixBase::operator+=() */\n    template<typename Other>\n    EIGEN_DEVICE_FUNC\n    TriangularViewType&  operator+=(const DenseBase<Other>& other) {\n      internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());\n      return derived();\n    }\n    /** \\sa MatrixBase::operator-=() */\n    template<typename Other>\n    EIGEN_DEVICE_FUNC\n    TriangularViewType&  operator-=(const DenseBase<Other>& other) {\n      internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());\n      return derived();\n    }\n\n    /** \\sa MatrixBase::operator*=() */\n    EIGEN_DEVICE_FUNC\n    TriangularViewType&  operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }\n    /** \\sa DenseBase::operator/=() */\n    EIGEN_DEVICE_FUNC\n    TriangularViewType&  operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }\n\n    /** \\sa MatrixBase::fill() */\n    EIGEN_DEVICE_FUNC\n    void fill(const Scalar& value) { setConstant(value); }\n    /** \\sa MatrixBase::setConstant() */\n    EIGEN_DEVICE_FUNC\n    TriangularViewType& setConstant(const Scalar& value)\n    { return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); }\n    /** \\sa MatrixBase::setZero() */\n    EIGEN_DEVICE_FUNC\n    TriangularViewType& setZero() { return setConstant(Scalar(0)); }\n    /** \\sa MatrixBase::setOnes() */\n    EIGEN_DEVICE_FUNC\n    TriangularViewType& setOnes() { return setConstant(Scalar(1)); }\n\n    /** \\sa MatrixBase::coeff()\n      * \\warning the coordinates must fit into the referenced triangular part\n      */\n    EIGEN_DEVICE_FUNC\n    inline Scalar coeff(Index row, Index col) const\n    {\n      Base::check_coordinates_internal(row, col);\n      return derived().nestedExpression().coeff(row, col);\n    }\n\n    /** \\sa MatrixBase::coeffRef()\n      * \\warning the coordinates must fit into the referenced triangular part\n      */\n    EIGEN_DEVICE_FUNC\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);\n      Base::check_coordinates_internal(row, col);\n      return derived().nestedExpression().coeffRef(row, col);\n    }\n\n    /** Assigns a triangular matrix to a triangular part of a dense matrix */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);\n\n    /** Shortcut for\\code *this = other.other.triangularView<(*this)::Mode>() \\endcode */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    EIGEN_DEVICE_FUNC\n    TriangularViewType& operator=(const TriangularViewImpl& other)\n    { return *this = other.derived().nestedExpression(); }\n\n    template<typename OtherDerived>\n    /** \\deprecated */\n    EIGEN_DEPRECATED EIGEN_DEVICE_FUNC\n    void lazyAssign(const TriangularBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    /** \\deprecated */\n    EIGEN_DEPRECATED EIGEN_DEVICE_FUNC\n    void lazyAssign(const MatrixBase<OtherDerived>& other);\n#endif\n\n    /** Efficient triangular matrix times vector/matrix product */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    const Product<TriangularViewType,OtherDerived>\n    operator*(const MatrixBase<OtherDerived>& rhs) const\n    {\n      return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived());\n    }\n\n    /** Efficient vector/matrix times triangular matrix product */\n    template<typename OtherDerived> friend\n    EIGEN_DEVICE_FUNC\n    const Product<OtherDerived,TriangularViewType>\n    operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs)\n    {\n      return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());\n    }\n\n    /** \\returns the product of the inverse of \\c *this with \\a other, \\a *this being triangular.\n      *\n      * This function computes the inverse-matrix matrix product inverse(\\c *this) * \\a other if\n      * \\a Side==OnTheLeft (the default), or the right-inverse-multiply  \\a other * inverse(\\c *this) if\n      * \\a Side==OnTheRight.\n      *\n      * Note that the template parameter \\c Side can be omitted, in which case \\c Side==OnTheLeft\n      *\n      * The matrix \\c *this must be triangular and invertible (i.e., all the coefficients of the\n      * diagonal must be non zero). It works as a forward (resp. backward) substitution if \\c *this\n      * is an upper (resp. lower) triangular matrix.\n      *\n      * Example: \\include Triangular_solve.cpp\n      * Output: \\verbinclude Triangular_solve.out\n      *\n      * This function returns an expression of the inverse-multiply and can works in-place if it is assigned\n      * to the same matrix or vector \\a other.\n      *\n      * For users coming from BLAS, this function (and more specifically solveInPlace()) offer\n      * all the operations supported by the \\c *TRSV and \\c *TRSM BLAS routines.\n      *\n      * \\sa TriangularView::solveInPlace()\n      */\n    template<int Side, typename Other>\n    inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>\n    solve(const MatrixBase<Other>& other) const;\n\n    /** \"in-place\" version of TriangularView::solve() where the result is written in \\a other\n      *\n      * \\warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.\n      * This function will const_cast it, so constness isn't honored here.\n      *\n      * Note that the template parameter \\c Side can be omitted, in which case \\c Side==OnTheLeft\n      *\n      * See TriangularView:solve() for the details.\n      */\n    template<int Side, typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    void solveInPlace(const MatrixBase<OtherDerived>& other) const;\n\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    void solveInPlace(const MatrixBase<OtherDerived>& other) const\n    { return solveInPlace<OnTheLeft>(other); }\n\n    /** Swaps the coefficients of the common triangular parts of two matrices */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n    void swap(TriangularBase<OtherDerived> &other)\n#else\n    void swap(TriangularBase<OtherDerived> const & other)\n#endif\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);\n      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());\n    }\n\n    /** Shortcut for \\code (*this).swap(other.triangularView<(*this)::Mode>()) \\endcode */\n    template<typename OtherDerived>\n    /** \\deprecated */\n    EIGEN_DEPRECATED EIGEN_DEVICE_FUNC\n    void swap(MatrixBase<OtherDerived> const & other)\n    {\n      EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);\n      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());\n    }\n\n    template<typename RhsType, typename DstType>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {\n      if(!internal::is_same_dense(dst,rhs))\n        dst = rhs;\n      this->solveInPlace(dst);\n    }\n\n    template<typename ProductType>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(TriangularViewImpl)\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TriangularViewImpl)\n\n};\n\n/***************************************************************************\n* Implementation of triangular evaluation/assignment\n***************************************************************************/\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n// FIXME should we keep that possibility\ntemplate<typename MatrixType, unsigned int Mode>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>&\nTriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)\n{\n  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\n// FIXME should we keep that possibility\ntemplate<typename MatrixType, unsigned int Mode>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)\n{\n  internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());\n}\n\n\n\ntemplate<typename MatrixType, unsigned int Mode>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>&\nTriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)\n{\n  eigen_assert(Mode == int(OtherDerived::Mode));\n  internal::call_assignment(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename MatrixType, unsigned int Mode>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)\n{\n  eigen_assert(Mode == int(OtherDerived::Mode));\n  internal::call_assignment_no_alias(derived(), other.derived());\n}\n#endif\n\n/***************************************************************************\n* Implementation of TriangularBase methods\n***************************************************************************/\n\n/** Assigns a triangular or selfadjoint matrix to a dense matrix.\n  * If the matrix is triangular, the opposite part is set to zero. */\ntemplate<typename Derived>\ntemplate<typename DenseDerived>\nEIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const\n{\n  evalToLazy(other.derived());\n}\n\n/***************************************************************************\n* Implementation of TriangularView methods\n***************************************************************************/\n\n/***************************************************************************\n* Implementation of MatrixBase methods\n***************************************************************************/\n\n/**\n  * \\returns an expression of a triangular view extracted from the current matrix\n  *\n  * The parameter \\a Mode can have the following values: \\c #Upper, \\c #StrictlyUpper, \\c #UnitUpper,\n  * \\c #Lower, \\c #StrictlyLower, \\c #UnitLower.\n  *\n  * Example: \\include MatrixBase_triangularView.cpp\n  * Output: \\verbinclude MatrixBase_triangularView.out\n  *\n  * \\sa class TriangularView\n  */\ntemplate<typename Derived>\ntemplate<unsigned int Mode>\nEIGEN_DEVICE_FUNC\ntypename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type\nMatrixBase<Derived>::triangularView()\n{\n  return typename TriangularViewReturnType<Mode>::Type(derived());\n}\n\n/** This is the const version of MatrixBase::triangularView() */\ntemplate<typename Derived>\ntemplate<unsigned int Mode>\nEIGEN_DEVICE_FUNC\ntypename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type\nMatrixBase<Derived>::triangularView() const\n{\n  return typename ConstTriangularViewReturnType<Mode>::Type(derived());\n}\n\n/** \\returns true if *this is approximately equal to an upper triangular matrix,\n  *          within the precision given by \\a prec.\n  *\n  * \\sa isLowerTriangular()\n  */\ntemplate<typename Derived>\nbool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const\n{\n  RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);\n  for(Index j = 0; j < cols(); ++j)\n  {\n    Index maxi = numext::mini(j, rows()-1);\n    for(Index i = 0; i <= maxi; ++i)\n    {\n      RealScalar absValue = numext::abs(coeff(i,j));\n      if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;\n    }\n  }\n  RealScalar threshold = maxAbsOnUpperPart * prec;\n  for(Index j = 0; j < cols(); ++j)\n    for(Index i = j+1; i < rows(); ++i)\n      if(numext::abs(coeff(i, j)) > threshold) return false;\n  return true;\n}\n\n/** \\returns true if *this is approximately equal to a lower triangular matrix,\n  *          within the precision given by \\a prec.\n  *\n  * \\sa isUpperTriangular()\n  */\ntemplate<typename Derived>\nbool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const\n{\n  RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);\n  for(Index j = 0; j < cols(); ++j)\n    for(Index i = j; i < rows(); ++i)\n    {\n      RealScalar absValue = numext::abs(coeff(i,j));\n      if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;\n    }\n  RealScalar threshold = maxAbsOnLowerPart * prec;\n  for(Index j = 1; j < cols(); ++j)\n  {\n    Index maxi = numext::mini(j, rows()-1);\n    for(Index i = 0; i < maxi; ++i)\n      if(numext::abs(coeff(i, j)) > threshold) return false;\n  }\n  return true;\n}\n\n\n/***************************************************************************\n****************************************************************************\n* Evaluators and Assignment of triangular expressions\n***************************************************************************\n***************************************************************************/\n\nnamespace internal {\n\n\n// TODO currently a triangular expression has the form TriangularView<.,.>\n//      in the future triangular-ness should be defined by the expression traits\n//      such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)\ntemplate<typename MatrixType, unsigned int Mode>\nstruct evaluator_traits<TriangularView<MatrixType,Mode> >\n{\n  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;\n  typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;\n};\n\ntemplate<typename MatrixType, unsigned int Mode>\nstruct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>\n : evaluator<typename internal::remove_all<MatrixType>::type>\n{\n  typedef TriangularView<MatrixType,Mode> XprType;\n  typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;\n  EIGEN_DEVICE_FUNC\n  unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}\n};\n\n// Additional assignment kinds:\nstruct Triangular2Triangular    {};\nstruct Triangular2Dense         {};\nstruct Dense2Triangular         {};\n\n\ntemplate<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;\n\n\n/** \\internal Specialization of the dense assignment kernel for triangular matrices.\n  * The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.\n  * \\tparam UpLo must be either Lower or Upper\n  * \\tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint\n  */\ntemplate<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>\nclass triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>\n{\nprotected:\n  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;\n  typedef typename Base::DstXprType DstXprType;\n  typedef typename Base::SrcXprType SrcXprType;\n  using Base::m_dst;\n  using Base::m_src;\n  using Base::m_functor;\npublic:\n\n  typedef typename Base::DstEvaluatorType DstEvaluatorType;\n  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;\n  typedef typename Base::Scalar Scalar;\n  typedef typename Base::AssignmentTraits AssignmentTraits;\n\n\n  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)\n    : Base(dst, src, func, dstExpr)\n  {}\n\n#ifdef EIGEN_INTERNAL_DEBUGGING\n  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)\n  {\n    eigen_internal_assert(row!=col);\n    Base::assignCoeff(row,col);\n  }\n#else\n  using Base::assignCoeff;\n#endif\n\n  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)\n  {\n         if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));\n    else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));\n    else if(Mode==0)                       Base::assignCoeff(id,id);\n  }\n\n  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)\n  {\n    eigen_internal_assert(row!=col);\n    if(SetOpposite)\n      m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));\n  }\n};\n\ntemplate<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)\n{\n  typedef evaluator<DstXprType> DstEvaluatorType;\n  typedef evaluator<SrcXprType> SrcEvaluatorType;\n\n  SrcEvaluatorType srcEvaluator(src);\n\n  Index dstRows = src.rows();\n  Index dstCols = src.cols();\n  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n    dst.resize(dstRows, dstCols);\n  DstEvaluatorType dstEvaluator(dst);\n\n  typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,\n                                              DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;\n  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());\n\n  enum {\n      unroll = DstXprType::SizeAtCompileTime != Dynamic\n            && SrcEvaluatorType::CoeffReadCost < HugeCost\n            && DstXprType::SizeAtCompileTime * (int(DstEvaluatorType::CoeffReadCost) + int(SrcEvaluatorType::CoeffReadCost)) / 2 <= EIGEN_UNROLLING_LIMIT\n    };\n\n  triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);\n}\n\ntemplate<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nvoid call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)\n{\n  call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());\n}\n\ntemplate<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };\ntemplate<> struct AssignmentKind<DenseShape,TriangularShape>      { typedef Triangular2Dense      Kind; };\ntemplate<> struct AssignmentKind<TriangularShape,DenseShape>      { typedef Dense2Triangular      Kind; };\n\n\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>\n{\n  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)\n  {\n    eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));\n\n    call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);\n  }\n};\n\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>\n{\n  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)\n  {\n    call_triangular_assignment_loop<SrcXprType::Mode, (int(SrcXprType::Mode) & int(SelfAdjoint)) == 0>(dst, src, func);\n  }\n};\n\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>\n{\n  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)\n  {\n    call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);\n  }\n};\n\n\ntemplate<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>\nstruct triangular_assignment_loop\n{\n  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?\n  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;\n  typedef typename DstEvaluatorType::XprType DstXprType;\n\n  enum {\n    col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,\n    row = (UnrollCount-1) % DstXprType::RowsAtCompileTime\n  };\n\n  typedef typename Kernel::Scalar Scalar;\n\n  EIGEN_DEVICE_FUNC\n  static inline void run(Kernel &kernel)\n  {\n    triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);\n\n    if(row==col)\n      kernel.assignDiagonalCoeff(row);\n    else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )\n      kernel.assignCoeff(row,col);\n    else if(SetOpposite)\n      kernel.assignOppositeCoeff(row,col);\n  }\n};\n\n// prevent buggy user code from causing an infinite recursion\ntemplate<typename Kernel, unsigned int Mode, bool SetOpposite>\nstruct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(Kernel &) {}\n};\n\n\n\n// TODO: experiment with a recursive assignment procedure splitting the current\n//       triangular part into one rectangular and two triangular parts.\n\n\ntemplate<typename Kernel, unsigned int Mode, bool SetOpposite>\nstruct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>\n{\n  typedef typename Kernel::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  static inline void run(Kernel &kernel)\n  {\n    for(Index j = 0; j < kernel.cols(); ++j)\n    {\n      Index maxi = numext::mini(j, kernel.rows());\n      Index i = 0;\n      if (((Mode&Lower) && SetOpposite) || (Mode&Upper))\n      {\n        for(; i < maxi; ++i)\n          if(Mode&Upper) kernel.assignCoeff(i, j);\n          else           kernel.assignOppositeCoeff(i, j);\n      }\n      else\n        i = maxi;\n\n      if(i<kernel.rows()) // then i==j\n        kernel.assignDiagonalCoeff(i++);\n\n      if (((Mode&Upper) && SetOpposite) || (Mode&Lower))\n      {\n        for(; i < kernel.rows(); ++i)\n          if(Mode&Lower) kernel.assignCoeff(i, j);\n          else           kernel.assignOppositeCoeff(i, j);\n      }\n    }\n  }\n};\n\n} // end namespace internal\n\n/** Assigns a triangular or selfadjoint matrix to a dense matrix.\n  * If the matrix is triangular, the opposite part is set to zero. */\ntemplate<typename Derived>\ntemplate<typename DenseDerived>\nEIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const\n{\n  other.derived().resize(this->rows(), this->cols());\n  internal::call_triangular_assignment_loop<Derived::Mode, (int(Derived::Mode) & int(SelfAdjoint)) == 0 /* SetOpposite */>(other.derived(), derived().nestedExpression());\n}\n\nnamespace internal {\n\n// Triangular = Product\ntemplate< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>\n{\n  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    dst._assignProduct(src, Scalar(1), false);\n  }\n};\n\n// Triangular += Product\ntemplate< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>\n{\n  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)\n  {\n    dst._assignProduct(src, Scalar(1), true);\n  }\n};\n\n// Triangular -= Product\ntemplate< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>\n{\n  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)\n  {\n    dst._assignProduct(src, Scalar(-1), true);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULARMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/VectorBlock.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_VECTORBLOCK_H\n#define EIGEN_VECTORBLOCK_H\n\nnamespace Eigen { \n\nnamespace internal {\ntemplate<typename VectorType, int Size>\nstruct traits<VectorBlock<VectorType, Size> >\n  : public traits<Block<VectorType,\n                     traits<VectorType>::Flags & RowMajorBit ? 1 : Size,\n                     traits<VectorType>::Flags & RowMajorBit ? Size : 1> >\n{\n};\n}\n\n/** \\class VectorBlock\n  * \\ingroup Core_Module\n  *\n  * \\brief Expression of a fixed-size or dynamic-size sub-vector\n  *\n  * \\tparam VectorType the type of the object in which we are taking a sub-vector\n  * \\tparam Size size of the sub-vector we are taking at compile time (optional)\n  *\n  * This class represents an expression of either a fixed-size or dynamic-size sub-vector.\n  * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and\n  * most of the time this is the only way it is used.\n  *\n  * However, if you want to directly manipulate sub-vector expressions,\n  * for instance if you want to write a function returning such an expression, you\n  * will need to use this class.\n  *\n  * Here is an example illustrating the dynamic case:\n  * \\include class_VectorBlock.cpp\n  * Output: \\verbinclude class_VectorBlock.out\n  *\n  * \\note Even though this expression has dynamic size, in the case where \\a VectorType\n  * has fixed size, this expression inherits a fixed maximal size which means that evaluating\n  * it does not cause a dynamic memory allocation.\n  *\n  * Here is an example illustrating the fixed-size case:\n  * \\include class_FixedVectorBlock.cpp\n  * Output: \\verbinclude class_FixedVectorBlock.out\n  *\n  * \\sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)\n  */\ntemplate<typename VectorType, int Size> class VectorBlock\n  : public Block<VectorType,\n                     internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,\n                     internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1>\n{\n    typedef Block<VectorType,\n                     internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,\n                     internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;\n    enum {\n      IsColVector = !(internal::traits<VectorType>::Flags & RowMajorBit)\n    };\n  public:\n    EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)\n\n    using Base::operator=;\n\n    /** Dynamic-size constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    VectorBlock(VectorType& vector, Index start, Index size)\n      : Base(vector,\n             IsColVector ? start : 0, IsColVector ? 0 : start,\n             IsColVector ? size  : 1, IsColVector ? 1 : size)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);\n    }\n\n    /** Fixed-size constructor\n      */\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    VectorBlock(VectorType& vector, Index start)\n      : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);\n    }\n};\n\n\n} // end namespace Eigen\n\n#endif // EIGEN_VECTORBLOCK_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/VectorwiseOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2019 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARTIAL_REDUX_H\n#define EIGEN_PARTIAL_REDUX_H\n\nnamespace Eigen {\n\n/** \\class PartialReduxExpr\n  * \\ingroup Core_Module\n  *\n  * \\brief Generic expression of a partially reduxed matrix\n  *\n  * \\tparam MatrixType the type of the matrix we are applying the redux operation\n  * \\tparam MemberOp type of the member functor\n  * \\tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)\n  *\n  * This class represents an expression of a partial redux operator of a matrix.\n  * It is the return type of some VectorwiseOp functions,\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa class VectorwiseOp\n  */\n\ntemplate< typename MatrixType, typename MemberOp, int Direction>\nclass PartialReduxExpr;\n\nnamespace internal {\ntemplate<typename MatrixType, typename MemberOp, int Direction>\nstruct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >\n : traits<MatrixType>\n{\n  typedef typename MemberOp::result_type Scalar;\n  typedef typename traits<MatrixType>::StorageKind StorageKind;\n  typedef typename traits<MatrixType>::XprKind XprKind;\n  typedef typename MatrixType::Scalar InputScalar;\n  enum {\n    RowsAtCompileTime = Direction==Vertical   ? 1 : MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = Direction==Vertical   ? 1 : MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,\n    Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,\n    TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime :  MatrixType::ColsAtCompileTime\n  };\n};\n}\n\ntemplate< typename MatrixType, typename MemberOp, int Direction>\nclass PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,\n                         internal::no_assignment_operator\n{\n  public:\n\n    typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)\n\n    EIGEN_DEVICE_FUNC\n    explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())\n      : m_matrix(mat), m_functor(func) {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return (Direction==Vertical   ? 1 : m_matrix.rows()); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return (Direction==Horizontal ? 1 : m_matrix.cols()); }\n\n    EIGEN_DEVICE_FUNC\n    typename MatrixType::Nested nestedExpression() const { return m_matrix; }\n\n    EIGEN_DEVICE_FUNC\n    const MemberOp& functor() const { return m_functor; }\n\n  protected:\n    typename MatrixType::Nested m_matrix;\n    const MemberOp m_functor;\n};\n\ntemplate<typename A,typename B> struct partial_redux_dummy_func;\n\n#define EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER,COST,VECTORIZABLE,BINARYOP)                \\\n  template <typename ResultType,typename Scalar>                                                            \\\n  struct member_##MEMBER {                                                                  \\\n    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                                                \\\n    typedef ResultType result_type;                                                         \\\n    typedef BINARYOP<Scalar,Scalar> BinaryOp;   \\\n    template<int Size> struct Cost { enum { value = COST }; };             \\\n    enum { Vectorizable = VECTORIZABLE };                                                   \\\n    template<typename XprType>                                                              \\\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                   \\\n    ResultType operator()(const XprType& mat) const                                         \\\n    { return mat.MEMBER(); }                                                                \\\n    BinaryOp binaryFunc() const { return BinaryOp(); }                                      \\\n  }\n\n#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \\\n  EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER,COST,0,partial_redux_dummy_func)\n\nnamespace internal {\n\nEIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);\nEIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);\nEIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);\nEIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );\nEIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);\nEIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);\nEIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);\n\nEIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost, 1, internal::scalar_sum_op);\nEIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost, 1, internal::scalar_min_op);\nEIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost, 1, internal::scalar_max_op);\nEIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost, 1, internal::scalar_product_op);\n\ntemplate <int p, typename ResultType,typename Scalar>\nstruct member_lpnorm {\n  typedef ResultType result_type;\n  enum { Vectorizable = 0 };\n  template<int Size> struct Cost\n  { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };\n  EIGEN_DEVICE_FUNC member_lpnorm() {}\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const\n  { return mat.template lpNorm<p>(); }\n};\n\ntemplate <typename BinaryOpT, typename Scalar>\nstruct member_redux {\n  typedef BinaryOpT BinaryOp;\n  typedef typename result_of<\n                     BinaryOp(const Scalar&,const Scalar&)\n                   >::type  result_type;\n\n  enum { Vectorizable = functor_traits<BinaryOp>::PacketAccess };\n  template<int Size> struct Cost { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };\n  EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const\n  { return mat.redux(m_functor); }\n  const BinaryOp& binaryFunc() const { return m_functor; }\n  const BinaryOp m_functor;\n};\n}\n\n/** \\class VectorwiseOp\n  * \\ingroup Core_Module\n  *\n  * \\brief Pseudo expression providing broadcasting and partial reduction operations\n  *\n  * \\tparam ExpressionType the type of the object on which to do partial reductions\n  * \\tparam Direction indicates whether to operate on columns (#Vertical) or rows (#Horizontal)\n  *\n  * This class represents a pseudo expression with broadcasting and partial reduction features.\n  * It is the return type of DenseBase::colwise() and DenseBase::rowwise()\n  * and most of the time this is the only way it is explicitly used.\n  *\n  * To understand the logic of rowwise/colwise expression, let's consider a generic case `A.colwise().foo()`\n  * where `foo` is any method of `VectorwiseOp`. This expression is equivalent to applying `foo()` to each\n  * column of `A` and then re-assemble the outputs in a matrix expression:\n  * \\code [A.col(0).foo(), A.col(1).foo(), ..., A.col(A.cols()-1).foo()] \\endcode\n  *\n  * Example: \\include MatrixBase_colwise.cpp\n  * Output: \\verbinclude MatrixBase_colwise.out\n  *\n  * The begin() and end() methods are obviously exceptions to the previous rule as they\n  * return STL-compatible begin/end iterators to the rows or columns of the nested expression.\n  * Typical use cases include for-range-loop and calls to STL algorithms:\n  *\n  * Example: \\include MatrixBase_colwise_iterator_cxx11.cpp\n  * Output: \\verbinclude MatrixBase_colwise_iterator_cxx11.out\n  *\n  * For a partial reduction on an empty input, some rules apply.\n  * For the sake of clarity, let's consider a vertical reduction:\n  *   - If the number of columns is zero, then a 1x0 row-major vector expression is returned.\n  *   - Otherwise, if the number of rows is zero, then\n  *       - a row vector of zeros is returned for sum-like reductions (sum, squaredNorm, norm, etc.)\n  *       - a row vector of ones is returned for a product reduction (e.g., <code>MatrixXd(n,0).colwise().prod()</code>)\n  *       - an assert is triggered for all other reductions (minCoeff,maxCoeff,redux(bin_op))\n  *\n  * \\sa DenseBase::colwise(), DenseBase::rowwise(), class PartialReduxExpr\n  */\ntemplate<typename ExpressionType, int Direction> class VectorwiseOp\n{\n  public:\n\n    typedef typename ExpressionType::Scalar Scalar;\n    typedef typename ExpressionType::RealScalar RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n    typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;\n    typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;\n\n    template<template<typename OutScalar,typename InputScalar> class Functor,\n                      typename ReturnScalar=Scalar> struct ReturnType\n    {\n      typedef PartialReduxExpr<ExpressionType,\n                               Functor<ReturnScalar,Scalar>,\n                               Direction\n                              > Type;\n    };\n\n    template<typename BinaryOp> struct ReduxReturnType\n    {\n      typedef PartialReduxExpr<ExpressionType,\n                               internal::member_redux<BinaryOp,Scalar>,\n                               Direction\n                              > Type;\n    };\n\n    enum {\n      isVertical   = (Direction==Vertical) ? 1 : 0,\n      isHorizontal = (Direction==Horizontal) ? 1 : 0\n    };\n\n  protected:\n\n    template<typename OtherDerived> struct ExtendedType {\n      typedef Replicate<OtherDerived,\n                        isVertical   ? 1 : ExpressionType::RowsAtCompileTime,\n                        isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;\n    };\n\n    /** \\internal\n      * Replicates a vector to match the size of \\c *this */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    typename ExtendedType<OtherDerived>::Type\n    extendedTo(const DenseBase<OtherDerived>& other) const\n    {\n      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),\n                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)\n      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),\n                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)\n      return typename ExtendedType<OtherDerived>::Type\n                      (other.derived(),\n                       isVertical   ? 1 : m_matrix.rows(),\n                       isHorizontal ? 1 : m_matrix.cols());\n    }\n\n    template<typename OtherDerived> struct OppositeExtendedType {\n      typedef Replicate<OtherDerived,\n                        isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,\n                        isVertical   ? 1 : ExpressionType::ColsAtCompileTime> Type;\n    };\n\n    /** \\internal\n      * Replicates a vector in the opposite direction to match the size of \\c *this */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    typename OppositeExtendedType<OtherDerived>::Type\n    extendedToOpposite(const DenseBase<OtherDerived>& other) const\n    {\n      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),\n                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)\n      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),\n                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)\n      return typename OppositeExtendedType<OtherDerived>::Type\n                      (other.derived(),\n                       isHorizontal  ? 1 : m_matrix.rows(),\n                       isVertical    ? 1 : m_matrix.cols());\n    }\n\n  public:\n    EIGEN_DEVICE_FUNC\n    explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}\n\n    /** \\internal */\n    EIGEN_DEVICE_FUNC\n    inline const ExpressionType& _expression() const { return m_matrix; }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** STL-like <a href=\"https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator\">RandomAccessIterator</a>\n      * iterator type over the columns or rows as returned by the begin() and end() methods.\n      */\n    random_access_iterator_type iterator;\n    /** This is the const version of iterator (aka read-only) */\n    random_access_iterator_type const_iterator;\n    #else\n    typedef internal::subvector_stl_iterator<ExpressionType,               DirectionType(Direction)> iterator;\n    typedef internal::subvector_stl_iterator<const ExpressionType,         DirectionType(Direction)> const_iterator;\n    typedef internal::subvector_stl_reverse_iterator<ExpressionType,       DirectionType(Direction)> reverse_iterator;\n    typedef internal::subvector_stl_reverse_iterator<const ExpressionType, DirectionType(Direction)> const_reverse_iterator;\n    #endif\n\n    /** returns an iterator to the first row (rowwise) or column (colwise) of the nested expression.\n      * \\sa end(), cbegin()\n      */\n    iterator                 begin()       { return iterator      (m_matrix, 0); }\n    /** const version of begin() */\n    const_iterator           begin() const { return const_iterator(m_matrix, 0); }\n    /** const version of begin() */\n    const_iterator          cbegin() const { return const_iterator(m_matrix, 0); }\n\n    /** returns a reverse iterator to the last row (rowwise) or column (colwise) of the nested expression.\n      * \\sa rend(), crbegin()\n      */\n    reverse_iterator        rbegin()       { return reverse_iterator       (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()-1); }\n\t/** const version of rbegin() */\n    const_reverse_iterator  rbegin() const { return const_reverse_iterator (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()-1); }\n\t/** const version of rbegin() */\n\tconst_reverse_iterator crbegin() const { return const_reverse_iterator (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()-1); }\n\n    /** returns an iterator to the row (resp. column) following the last row (resp. column) of the nested expression\n      * \\sa begin(), cend()\n      */\n    iterator                 end()         { return iterator      (m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()); }\n    /** const version of end() */\n    const_iterator           end()  const  { return const_iterator(m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()); }\n    /** const version of end() */\n    const_iterator          cend()  const  { return const_iterator(m_matrix, m_matrix.template subVectors<DirectionType(Direction)>()); }\n\n    /** returns a reverse iterator to the row (resp. column) before the first row (resp. column) of the nested expression\n      * \\sa begin(), cend()\n      */\n    reverse_iterator        rend()         { return reverse_iterator       (m_matrix, -1); }\n    /** const version of rend() */\n    const_reverse_iterator  rend()  const  { return const_reverse_iterator (m_matrix, -1); }\n    /** const version of rend() */\n    const_reverse_iterator crend()  const  { return const_reverse_iterator (m_matrix, -1); }\n\n    /** \\returns a row or column vector expression of \\c *this reduxed by \\a func\n      *\n      * The template parameter \\a BinaryOp is the type of the functor\n      * of the custom redux operator. Note that func must be an associative operator.\n      *\n      * \\warning the size along the reduction direction must be strictly positive,\n      *          otherwise an assertion is triggered.\n      *\n      * \\sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()\n      */\n    template<typename BinaryOp>\n    EIGEN_DEVICE_FUNC\n    const typename ReduxReturnType<BinaryOp>::Type\n    redux(const BinaryOp& func = BinaryOp()) const\n    {\n      eigen_assert(redux_length()>0 && \"you are using an empty matrix\");\n      return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func));\n    }\n\n    typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;\n    typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;\n    typedef PartialReduxExpr<const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const ExpressionTypeNestedCleaned>,internal::member_sum<RealScalar,RealScalar>,Direction> SquaredNormReturnType;\n    typedef CwiseUnaryOp<internal::scalar_sqrt_op<RealScalar>, const SquaredNormReturnType> NormReturnType;\n    typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;\n    typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;\n    typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;\n    typedef typename ReturnType<internal::member_sum>::Type SumReturnType;\n    typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(SumReturnType,Scalar,quotient) MeanReturnType;\n    typedef typename ReturnType<internal::member_all>::Type AllReturnType;\n    typedef typename ReturnType<internal::member_any>::Type AnyReturnType;\n    typedef PartialReduxExpr<ExpressionType, internal::member_count<Index,Scalar>, Direction> CountReturnType;\n    typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;\n    typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;\n    typedef Reverse<ExpressionType, Direction> ReverseReturnType;\n\n    template<int p> struct LpNormReturnType {\n      typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar,Scalar>,Direction> Type;\n    };\n\n    /** \\returns a row (or column) vector expression of the smallest coefficient\n      * of each column (or row) of the referenced expression.\n      *\n      * \\warning the size along the reduction direction must be strictly positive,\n      *          otherwise an assertion is triggered.\n      *\n      * \\warning the result is undefined if \\c *this contains NaN.\n      *\n      * Example: \\include PartialRedux_minCoeff.cpp\n      * Output: \\verbinclude PartialRedux_minCoeff.out\n      *\n      * \\sa DenseBase::minCoeff() */\n    EIGEN_DEVICE_FUNC\n    const MinCoeffReturnType minCoeff() const\n    {\n      eigen_assert(redux_length()>0 && \"you are using an empty matrix\");\n      return MinCoeffReturnType(_expression());\n    }\n\n    /** \\returns a row (or column) vector expression of the largest coefficient\n      * of each column (or row) of the referenced expression.\n      *\n      * \\warning the size along the reduction direction must be strictly positive,\n      *          otherwise an assertion is triggered.\n      *\n      * \\warning the result is undefined if \\c *this contains NaN.\n      *\n      * Example: \\include PartialRedux_maxCoeff.cpp\n      * Output: \\verbinclude PartialRedux_maxCoeff.out\n      *\n      * \\sa DenseBase::maxCoeff() */\n    EIGEN_DEVICE_FUNC\n    const MaxCoeffReturnType maxCoeff() const\n    {\n      eigen_assert(redux_length()>0 && \"you are using an empty matrix\");\n      return MaxCoeffReturnType(_expression());\n    }\n\n    /** \\returns a row (or column) vector expression of the squared norm\n      * of each column (or row) of the referenced expression.\n      * This is a vector with real entries, even if the original matrix has complex entries.\n      *\n      * Example: \\include PartialRedux_squaredNorm.cpp\n      * Output: \\verbinclude PartialRedux_squaredNorm.out\n      *\n      * \\sa DenseBase::squaredNorm() */\n    EIGEN_DEVICE_FUNC\n    const SquaredNormReturnType squaredNorm() const\n    { return SquaredNormReturnType(m_matrix.cwiseAbs2()); }\n\n    /** \\returns a row (or column) vector expression of the norm\n      * of each column (or row) of the referenced expression.\n      * This is a vector with real entries, even if the original matrix has complex entries.\n      *\n      * Example: \\include PartialRedux_norm.cpp\n      * Output: \\verbinclude PartialRedux_norm.out\n      *\n      * \\sa DenseBase::norm() */\n    EIGEN_DEVICE_FUNC\n    const NormReturnType norm() const\n    { return NormReturnType(squaredNorm()); }\n\n    /** \\returns a row (or column) vector expression of the norm\n      * of each column (or row) of the referenced expression.\n      * This is a vector with real entries, even if the original matrix has complex entries.\n      *\n      * Example: \\include PartialRedux_norm.cpp\n      * Output: \\verbinclude PartialRedux_norm.out\n      *\n      * \\sa DenseBase::norm() */\n    template<int p>\n    EIGEN_DEVICE_FUNC\n    const typename LpNormReturnType<p>::Type lpNorm() const\n    { return typename LpNormReturnType<p>::Type(_expression()); }\n\n\n    /** \\returns a row (or column) vector expression of the norm\n      * of each column (or row) of the referenced expression, using\n      * Blue's algorithm.\n      * This is a vector with real entries, even if the original matrix has complex entries.\n      *\n      * \\sa DenseBase::blueNorm() */\n    EIGEN_DEVICE_FUNC\n    const BlueNormReturnType blueNorm() const\n    { return BlueNormReturnType(_expression()); }\n\n\n    /** \\returns a row (or column) vector expression of the norm\n      * of each column (or row) of the referenced expression, avoiding\n      * underflow and overflow.\n      * This is a vector with real entries, even if the original matrix has complex entries.\n      *\n      * \\sa DenseBase::stableNorm() */\n    EIGEN_DEVICE_FUNC\n    const StableNormReturnType stableNorm() const\n    { return StableNormReturnType(_expression()); }\n\n\n    /** \\returns a row (or column) vector expression of the norm\n      * of each column (or row) of the referenced expression, avoiding\n      * underflow and overflow using a concatenation of hypot() calls.\n      * This is a vector with real entries, even if the original matrix has complex entries.\n      *\n      * \\sa DenseBase::hypotNorm() */\n    EIGEN_DEVICE_FUNC\n    const HypotNormReturnType hypotNorm() const\n    { return HypotNormReturnType(_expression()); }\n\n    /** \\returns a row (or column) vector expression of the sum\n      * of each column (or row) of the referenced expression.\n      *\n      * Example: \\include PartialRedux_sum.cpp\n      * Output: \\verbinclude PartialRedux_sum.out\n      *\n      * \\sa DenseBase::sum() */\n    EIGEN_DEVICE_FUNC\n    const SumReturnType sum() const\n    { return SumReturnType(_expression()); }\n\n    /** \\returns a row (or column) vector expression of the mean\n    * of each column (or row) of the referenced expression.\n    *\n    * \\sa DenseBase::mean() */\n    EIGEN_DEVICE_FUNC\n    const MeanReturnType mean() const\n    { return sum() / Scalar(Direction==Vertical?m_matrix.rows():m_matrix.cols()); }\n\n    /** \\returns a row (or column) vector expression representing\n      * whether \\b all coefficients of each respective column (or row) are \\c true.\n      * This expression can be assigned to a vector with entries of type \\c bool.\n      *\n      * \\sa DenseBase::all() */\n    EIGEN_DEVICE_FUNC\n    const AllReturnType all() const\n    { return AllReturnType(_expression()); }\n\n    /** \\returns a row (or column) vector expression representing\n      * whether \\b at \\b least one coefficient of each respective column (or row) is \\c true.\n      * This expression can be assigned to a vector with entries of type \\c bool.\n      *\n      * \\sa DenseBase::any() */\n    EIGEN_DEVICE_FUNC\n    const AnyReturnType any() const\n    { return AnyReturnType(_expression()); }\n\n    /** \\returns a row (or column) vector expression representing\n      * the number of \\c true coefficients of each respective column (or row).\n      * This expression can be assigned to a vector whose entries have the same type as is used to\n      * index entries of the original matrix; for dense matrices, this is \\c std::ptrdiff_t .\n      *\n      * Example: \\include PartialRedux_count.cpp\n      * Output: \\verbinclude PartialRedux_count.out\n      *\n      * \\sa DenseBase::count() */\n    EIGEN_DEVICE_FUNC\n    const CountReturnType count() const\n    { return CountReturnType(_expression()); }\n\n    /** \\returns a row (or column) vector expression of the product\n      * of each column (or row) of the referenced expression.\n      *\n      * Example: \\include PartialRedux_prod.cpp\n      * Output: \\verbinclude PartialRedux_prod.out\n      *\n      * \\sa DenseBase::prod() */\n    EIGEN_DEVICE_FUNC\n    const ProdReturnType prod() const\n    { return ProdReturnType(_expression()); }\n\n\n    /** \\returns a matrix expression\n      * where each column (or row) are reversed.\n      *\n      * Example: \\include Vectorwise_reverse.cpp\n      * Output: \\verbinclude Vectorwise_reverse.out\n      *\n      * \\sa DenseBase::reverse() */\n    EIGEN_DEVICE_FUNC\n    const ConstReverseReturnType reverse() const\n    { return ConstReverseReturnType( _expression() ); }\n\n    /** \\returns a writable matrix expression\n      * where each column (or row) are reversed.\n      *\n      * \\sa reverse() const */\n    EIGEN_DEVICE_FUNC\n    ReverseReturnType reverse()\n    { return ReverseReturnType( _expression() ); }\n\n    typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;\n    EIGEN_DEVICE_FUNC\n    const ReplicateReturnType replicate(Index factor) const;\n\n    /**\n      * \\return an expression of the replication of each column (or row) of \\c *this\n      *\n      * Example: \\include DirectionWise_replicate.cpp\n      * Output: \\verbinclude DirectionWise_replicate.out\n      *\n      * \\sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate\n      */\n    // NOTE implemented here because of sunstudio's compilation errors\n    // isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator\n    template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>\n    EIGEN_DEVICE_FUNC\n    replicate(Index factor = Factor) const\n    {\n      return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>\n          (_expression(),isVertical?factor:1,isHorizontal?factor:1);\n    }\n\n/////////// Artithmetic operators ///////////\n\n    /** Copies the vector \\a other to each subvector of \\c *this */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    ExpressionType& operator=(const DenseBase<OtherDerived>& other)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      //eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME\n      return m_matrix = extendedTo(other.derived());\n    }\n\n    /** Adds the vector \\a other to each subvector of \\c *this */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    ExpressionType& operator+=(const DenseBase<OtherDerived>& other)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      return m_matrix += extendedTo(other.derived());\n    }\n\n    /** Substracts the vector \\a other to each subvector of \\c *this */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    ExpressionType& operator-=(const DenseBase<OtherDerived>& other)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      return m_matrix -= extendedTo(other.derived());\n    }\n\n    /** Multiples each subvector of \\c *this by the vector \\a other */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    ExpressionType& operator*=(const DenseBase<OtherDerived>& other)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      m_matrix *= extendedTo(other.derived());\n      return m_matrix;\n    }\n\n    /** Divides each subvector of \\c *this by the vector \\a other */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    ExpressionType& operator/=(const DenseBase<OtherDerived>& other)\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      m_matrix /= extendedTo(other.derived());\n      return m_matrix;\n    }\n\n    /** Returns the expression of the sum of the vector \\a other to each subvector of \\c *this */\n    template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\n    CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,\n                  const ExpressionTypeNestedCleaned,\n                  const typename ExtendedType<OtherDerived>::Type>\n    operator+(const DenseBase<OtherDerived>& other) const\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      return m_matrix + extendedTo(other.derived());\n    }\n\n    /** Returns the expression of the difference between each subvector of \\c *this and the vector \\a other */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,\n                  const ExpressionTypeNestedCleaned,\n                  const typename ExtendedType<OtherDerived>::Type>\n    operator-(const DenseBase<OtherDerived>& other) const\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      return m_matrix - extendedTo(other.derived());\n    }\n\n    /** Returns the expression where each subvector is the product of the vector \\a other\n      * by the corresponding subvector of \\c *this */\n    template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\n    CwiseBinaryOp<internal::scalar_product_op<Scalar>,\n                  const ExpressionTypeNestedCleaned,\n                  const typename ExtendedType<OtherDerived>::Type>\n    EIGEN_DEVICE_FUNC\n    operator*(const DenseBase<OtherDerived>& other) const\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      return m_matrix * extendedTo(other.derived());\n    }\n\n    /** Returns the expression where each subvector is the quotient of the corresponding\n      * subvector of \\c *this by the vector \\a other */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,\n                  const ExpressionTypeNestedCleaned,\n                  const typename ExtendedType<OtherDerived>::Type>\n    operator/(const DenseBase<OtherDerived>& other) const\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)\n      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)\n      return m_matrix / extendedTo(other.derived());\n    }\n\n    /** \\returns an expression where each column (or row) of the referenced matrix are normalized.\n      * The referenced matrix is \\b not modified.\n      * \\sa MatrixBase::normalized(), normalize()\n      */\n    EIGEN_DEVICE_FUNC\n    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,\n                  const ExpressionTypeNestedCleaned,\n                  const typename OppositeExtendedType<NormReturnType>::Type>\n    normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }\n\n\n    /** Normalize in-place each row or columns of the referenced matrix.\n      * \\sa MatrixBase::normalize(), normalized()\n      */\n    EIGEN_DEVICE_FUNC void normalize() {\n      m_matrix = this->normalized();\n    }\n\n    EIGEN_DEVICE_FUNC inline void reverseInPlace();\n\n/////////// Geometry module ///////////\n\n    typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;\n    EIGEN_DEVICE_FUNC\n    HomogeneousReturnType homogeneous() const;\n\n    typedef typename ExpressionType::PlainObject CrossReturnType;\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC\n    const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;\n\n    enum {\n      HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime\n                                             : internal::traits<ExpressionType>::ColsAtCompileTime,\n      HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1\n    };\n    typedef Block<const ExpressionType,\n                  Direction==Vertical   ? int(HNormalized_SizeMinusOne)\n                                        : int(internal::traits<ExpressionType>::RowsAtCompileTime),\n                  Direction==Horizontal ? int(HNormalized_SizeMinusOne)\n                                        : int(internal::traits<ExpressionType>::ColsAtCompileTime)>\n            HNormalized_Block;\n    typedef Block<const ExpressionType,\n                  Direction==Vertical   ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),\n                  Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>\n            HNormalized_Factors;\n    typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,\n                const HNormalized_Block,\n                const Replicate<HNormalized_Factors,\n                  Direction==Vertical   ? HNormalized_SizeMinusOne : 1,\n                  Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >\n            HNormalizedReturnType;\n\n    EIGEN_DEVICE_FUNC\n    const HNormalizedReturnType hnormalized() const;\n\n#   ifdef EIGEN_VECTORWISEOP_PLUGIN\n#     include EIGEN_VECTORWISEOP_PLUGIN\n#   endif\n\n  protected:\n    Index redux_length() const\n    {\n      return Direction==Vertical ? m_matrix.rows() : m_matrix.cols();\n    }\n    ExpressionTypeNested m_matrix;\n};\n\n//const colwise moved to DenseBase.h due to CUDA compiler bug\n\n\n/** \\returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations\n  *\n  * \\sa rowwise(), class VectorwiseOp, \\ref TutorialReductionsVisitorsBroadcasting\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ColwiseReturnType\nDenseBase<Derived>::colwise()\n{\n  return ColwiseReturnType(derived());\n}\n\n//const rowwise moved to DenseBase.h due to CUDA compiler bug\n\n\n/** \\returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations\n  *\n  * \\sa colwise(), class VectorwiseOp, \\ref TutorialReductionsVisitorsBroadcasting\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::RowwiseReturnType\nDenseBase<Derived>::rowwise()\n{\n  return RowwiseReturnType(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARTIAL_REDUX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/Visitor.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_VISITOR_H\n#define EIGEN_VISITOR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Visitor, typename Derived, int UnrollCount>\nstruct visitor_impl\n{\n  enum {\n    col = (UnrollCount-1) / Derived::RowsAtCompileTime,\n    row = (UnrollCount-1) % Derived::RowsAtCompileTime\n  };\n\n  EIGEN_DEVICE_FUNC\n  static inline void run(const Derived &mat, Visitor& visitor)\n  {\n    visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);\n    visitor(mat.coeff(row, col), row, col);\n  }\n};\n\ntemplate<typename Visitor, typename Derived>\nstruct visitor_impl<Visitor, Derived, 1>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(const Derived &mat, Visitor& visitor)\n  {\n    return visitor.init(mat.coeff(0, 0), 0, 0);\n  }\n};\n\n// This specialization enables visitors on empty matrices at compile-time\ntemplate<typename Visitor, typename Derived>\nstruct visitor_impl<Visitor, Derived, 0> {\n  EIGEN_DEVICE_FUNC\n  static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/)\n  {}\n};\n\ntemplate<typename Visitor, typename Derived>\nstruct visitor_impl<Visitor, Derived, Dynamic>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(const Derived& mat, Visitor& visitor)\n  {\n    visitor.init(mat.coeff(0,0), 0, 0);\n    for(Index i = 1; i < mat.rows(); ++i)\n      visitor(mat.coeff(i, 0), i, 0);\n    for(Index j = 1; j < mat.cols(); ++j)\n      for(Index i = 0; i < mat.rows(); ++i)\n        visitor(mat.coeff(i, j), i, j);\n  }\n};\n\n// evaluator adaptor\ntemplate<typename XprType>\nclass visitor_evaluator\n{\npublic:\n  EIGEN_DEVICE_FUNC\n  explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}\n\n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::CoeffReturnType CoeffReturnType;\n\n  enum {\n    RowsAtCompileTime = XprType::RowsAtCompileTime,\n    CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_xpr.size(); }\n\n  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const\n  { return m_evaluator.coeff(row, col); }\n\nprotected:\n  internal::evaluator<XprType> m_evaluator;\n  const XprType &m_xpr;\n};\n} // end namespace internal\n\n/** Applies the visitor \\a visitor to the whole coefficients of the matrix or vector.\n  *\n  * The template parameter \\a Visitor is the type of the visitor and provides the following interface:\n  * \\code\n  * struct MyVisitor {\n  *   // called for the first coefficient\n  *   void init(const Scalar& value, Index i, Index j);\n  *   // called for all other coefficients\n  *   void operator() (const Scalar& value, Index i, Index j);\n  * };\n  * \\endcode\n  *\n  * \\note compared to one or two \\em for \\em loops, visitors offer automatic\n  * unrolling for small fixed size matrix.\n  *\n  * \\note if the matrix is empty, then the visitor is left unchanged.\n  *\n  * \\sa minCoeff(Index*,Index*), maxCoeff(Index*,Index*), DenseBase::redux()\n  */\ntemplate<typename Derived>\ntemplate<typename Visitor>\nEIGEN_DEVICE_FUNC\nvoid DenseBase<Derived>::visit(Visitor& visitor) const\n{\n  if(size()==0)\n    return;\n\n  typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;\n  ThisEvaluator thisEval(derived());\n\n  enum {\n    unroll =  SizeAtCompileTime != Dynamic\n           && SizeAtCompileTime * int(ThisEvaluator::CoeffReadCost) + (SizeAtCompileTime-1) * int(internal::functor_traits<Visitor>::Cost) <= EIGEN_UNROLLING_LIMIT\n  };\n  return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);\n}\n\nnamespace internal {\n\n/** \\internal\n  * \\brief Base class to implement min and max visitors\n  */\ntemplate <typename Derived>\nstruct coeff_visitor\n{\n  // default initialization to avoid countless invalid maybe-uninitialized warnings by gcc\n  EIGEN_DEVICE_FUNC\n  coeff_visitor() : row(-1), col(-1), res(0) {}\n  typedef typename Derived::Scalar Scalar;\n  Index row, col;\n  Scalar res;\n  EIGEN_DEVICE_FUNC\n  inline void init(const Scalar& value, Index i, Index j)\n  {\n    res = value;\n    row = i;\n    col = j;\n  }\n};\n\n/** \\internal\n  * \\brief Visitor computing the min coefficient with its value and coordinates\n  *\n  * \\sa DenseBase::minCoeff(Index*, Index*)\n  */\ntemplate <typename Derived, int NaNPropagation>\nstruct min_coeff_visitor : coeff_visitor<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  void operator() (const Scalar& value, Index i, Index j)\n  {\n    if(value < this->res)\n    {\n      this->res = value;\n      this->row = i;\n      this->col = j;\n    }\n  }\n};\n\ntemplate <typename Derived>\nstruct min_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  void operator() (const Scalar& value, Index i, Index j)\n  {\n    if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value < this->res))\n    {\n      this->res = value;\n      this->row = i;\n      this->col = j;\n    }\n  }\n};\n\ntemplate <typename Derived>\nstruct min_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  void operator() (const Scalar& value, Index i, Index j)\n  {\n    if((numext::isnan)(value) || value < this->res)\n    {\n      this->res = value;\n      this->row = i;\n      this->col = j;\n    }\n  }\n};\n\ntemplate<typename Scalar, int NaNPropagation>\n    struct functor_traits<min_coeff_visitor<Scalar, NaNPropagation> > {\n  enum {\n    Cost = NumTraits<Scalar>::AddCost\n  };\n};\n\n/** \\internal\n  * \\brief Visitor computing the max coefficient with its value and coordinates\n  *\n  * \\sa DenseBase::maxCoeff(Index*, Index*)\n  */\ntemplate <typename Derived, int NaNPropagation>\nstruct max_coeff_visitor : coeff_visitor<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  void operator() (const Scalar& value, Index i, Index j)\n  {\n    if(value > this->res)\n    {\n      this->res = value;\n      this->row = i;\n      this->col = j;\n    }\n  }\n};\n\ntemplate <typename Derived>\nstruct max_coeff_visitor<Derived, PropagateNumbers> : coeff_visitor<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  void operator() (const Scalar& value, Index i, Index j)\n  {\n    if((numext::isnan)(this->res) || (!(numext::isnan)(value) && value > this->res))\n    {\n      this->res = value;\n      this->row = i;\n      this->col = j;\n    }\n  }\n};\n\ntemplate <typename Derived>\nstruct max_coeff_visitor<Derived, PropagateNaN> : coeff_visitor<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  EIGEN_DEVICE_FUNC\n  void operator() (const Scalar& value, Index i, Index j)\n  {\n    if((numext::isnan)(value) || value > this->res)\n    {\n      this->res = value;\n      this->row = i;\n      this->col = j;\n    }\n  }\n};\n\ntemplate<typename Scalar, int NaNPropagation>\nstruct functor_traits<max_coeff_visitor<Scalar, NaNPropagation> > {\n  enum {\n    Cost = NumTraits<Scalar>::AddCost\n  };\n};\n\n} // end namespace internal\n\n/** \\fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const\n  * \\returns the minimum of all coefficients of *this and puts in *row and *col its location.\n  *\n  * In case \\c *this contains NaN, NaNPropagation determines the behavior:\n  *   NaNPropagation == PropagateFast : undefined\n  *   NaNPropagation == PropagateNaN : result is NaN\n  *   NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  *\n  * \\sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()\n  */\ntemplate<typename Derived>\ntemplate<int NaNPropagation, typename IndexType>\nEIGEN_DEVICE_FUNC\ntypename internal::traits<Derived>::Scalar\nDenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const\n{\n  eigen_assert(this->rows()>0 && this->cols()>0 && \"you are using an empty matrix\");\n\n  internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;\n  this->visit(minVisitor);\n  *rowId = minVisitor.row;\n  if (colId) *colId = minVisitor.col;\n  return minVisitor.res;\n}\n\n/** \\returns the minimum of all coefficients of *this and puts in *index its location.\n  *\n  * In case \\c *this contains NaN, NaNPropagation determines the behavior:\n  *   NaNPropagation == PropagateFast : undefined\n  *   NaNPropagation == PropagateNaN : result is NaN\n  *   NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  *\n  * \\sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()\n  */\ntemplate<typename Derived>\ntemplate<int NaNPropagation, typename IndexType>\nEIGEN_DEVICE_FUNC\ntypename internal::traits<Derived>::Scalar\nDenseBase<Derived>::minCoeff(IndexType* index) const\n{\n  eigen_assert(this->rows()>0 && this->cols()>0 && \"you are using an empty matrix\");\n\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n      internal::min_coeff_visitor<Derived, NaNPropagation> minVisitor;\n  this->visit(minVisitor);\n  *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);\n  return minVisitor.res;\n}\n\n/** \\fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const\n  * \\returns the maximum of all coefficients of *this and puts in *row and *col its location.\n  *\n  * In case \\c *this contains NaN, NaNPropagation determines the behavior:\n  *   NaNPropagation == PropagateFast : undefined\n  *   NaNPropagation == PropagateNaN : result is NaN\n  *   NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  *\n  * \\sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()\n  */\ntemplate<typename Derived>\ntemplate<int NaNPropagation, typename IndexType>\nEIGEN_DEVICE_FUNC\ntypename internal::traits<Derived>::Scalar\nDenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const\n{\n  eigen_assert(this->rows()>0 && this->cols()>0 && \"you are using an empty matrix\");\n\n  internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;\n  this->visit(maxVisitor);\n  *rowPtr = maxVisitor.row;\n  if (colPtr) *colPtr = maxVisitor.col;\n  return maxVisitor.res;\n}\n\n/** \\returns the maximum of all coefficients of *this and puts in *index its location.\n  *\n  * In case \\c *this contains NaN, NaNPropagation determines the behavior:\n  *   NaNPropagation == PropagateFast : undefined\n  *   NaNPropagation == PropagateNaN : result is NaN\n  *   NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN\n  * \\warning the matrix must be not empty, otherwise an assertion is triggered.\n  *\n  * \\sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()\n  */\ntemplate<typename Derived>\ntemplate<int NaNPropagation, typename IndexType>\nEIGEN_DEVICE_FUNC\ntypename internal::traits<Derived>::Scalar\nDenseBase<Derived>::maxCoeff(IndexType* index) const\n{\n  eigen_assert(this->rows()>0 && this->cols()>0 && \"you are using an empty matrix\");\n\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n      internal::max_coeff_visitor<Derived, NaNPropagation> maxVisitor;\n  this->visit(maxVisitor);\n  *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;\n  return maxVisitor.res;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_VISITOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_AVX_H\n#define EIGEN_COMPLEX_AVX_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n//---------- float ----------\nstruct Packet4cf\n{\n  EIGEN_STRONG_INLINE Packet4cf() {}\n  EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}\n  __m256  v;\n};\n\n#ifndef EIGEN_VECTORIZE_AVX512\ntemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits\n{\n  typedef Packet4cf type;\n  typedef Packet2cf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 1,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasSqrt   = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n#endif\n\ntemplate<> struct unpacket_traits<Packet4cf> {\n  typedef std::complex<float> type;\n  typedef Packet2cf half;\n  typedef Packet8f as_real;\n  enum {\n    size=4,\n    alignment=Aligned32,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)\n{\n  return Packet4cf(pnegate(a.v));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)\n{\n  const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));\n  return Packet4cf(_mm256_xor_ps(a.v,mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)\n{\n  __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);\n  __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));\n  __m256 result = _mm256_addsub_ps(tmp1, tmp2);\n  return Packet4cf(result);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {\n  __m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);\n  return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(Packet8f(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pand   <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf por    <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pxor   <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(b.v,a.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)\n{\n  return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)\n{\n  // FIXME The following might be optimized using _mm256_movedup_pd\n  Packet2cf a = ploaddup<Packet2cf>(from);\n  Packet2cf b = ploaddup<Packet2cf>(from+1);\n  return  Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)\n{\n  return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),\n                                 std::imag(from[2*stride]), std::real(from[2*stride]),\n                                 std::imag(from[1*stride]), std::real(from[1*stride]),\n                                 std::imag(from[0*stride]), std::real(from[0*stride])));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)\n{\n  __m128 low = _mm256_extractf128_ps(from.v, 0);\n  to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),\n                                     _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));\n  to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),\n                                     _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));\n\n  __m128 high = _mm256_extractf128_ps(from.v, 1);\n  to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),\n                                     _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));\n  to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),\n                                     _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));\n\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet4cf>(const Packet4cf& a)\n{\n  return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {\n  __m128 low  = _mm256_extractf128_ps(a.v, 0);\n  __m128 high = _mm256_extractf128_ps(a.v, 1);\n  __m128d lowd  = _mm_castps_pd(low);\n  __m128d highd = _mm_castps_pd(high);\n  low  = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));\n  high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));\n  __m256 result = _mm256_setzero_ps();\n  result = _mm256_insertf128_ps(result, low, 1);\n  result = _mm256_insertf128_ps(result, high, 0);\n  return Packet4cf(result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)\n{\n  return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),\n                     Packet2cf(_mm256_extractf128_ps(a.v,1))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)\n{\n  return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),\n                         Packet2cf(_mm256_extractf128_ps(a.v, 1))));\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)\n{\n  Packet4cf num = pmul(a, pconj(b));\n  __m256 tmp = _mm256_mul_ps(b.v, b.v);\n  __m256 tmp2    = _mm256_shuffle_ps(tmp,tmp,0xB1);\n  __m256 denom = _mm256_add_ps(tmp, tmp2);\n  return Packet4cf(_mm256_div_ps(num.v, denom));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)\n{\n  return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));\n}\n\n//---------- double ----------\nstruct Packet2cd\n{\n  EIGEN_STRONG_INLINE Packet2cd() {}\n  EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}\n  __m256d  v;\n};\n\n#ifndef EIGEN_VECTORIZE_AVX512\ntemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits\n{\n  typedef Packet2cd type;\n  typedef Packet1cd half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 0,\n    size = 2,\n    HasHalfPacket = 1,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasSqrt   = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n#endif\n\ntemplate<> struct unpacket_traits<Packet2cd> {\n  typedef std::complex<double> type;\n  typedef Packet1cd half;\n  typedef Packet4d as_real;\n  enum {\n    size=2,\n    alignment=Aligned32,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)\n{\n  const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));\n  return Packet2cd(_mm256_xor_pd(a.v,mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)\n{\n  __m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);\n  __m256d even = _mm256_mul_pd(tmp1, b.v);\n  __m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);\n  __m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);\n  __m256d odd  = _mm256_mul_pd(tmp2, tmp3);\n  return Packet2cd(_mm256_addsub_pd(even, odd));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {\n  __m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ);\n  return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(Packet4d(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pand   <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd por    <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pxor   <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(b.v,a.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)\n{\n  // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)\n//   return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));\n    return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)\n{\n  return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),\n\t\t\t\t std::imag(from[0*stride]), std::real(from[0*stride])));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)\n{\n  __m128d low = _mm256_extractf128_pd(from.v, 0);\n  to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));\n  __m128d high = _mm256_extractf128_pd(from.v, 1);\n  to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)\n{\n  __m128d low = _mm256_extractf128_pd(a.v, 0);\n  EIGEN_ALIGN16 double res[2];\n  _mm_store_pd(res, low);\n  return std::complex<double>(res[0],res[1]);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {\n  __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);\n  return Packet2cd(result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)\n{\n  return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),\n                     Packet1cd(_mm256_extractf128_pd(a.v,1))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)\n{\n  return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),\n                     Packet1cd(_mm256_extractf128_pd(a.v,1))));\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)\n{\n  Packet2cd num = pmul(a, pconj(b));\n  __m256d tmp = _mm256_mul_pd(b.v, b.v);\n  __m256d denom = _mm256_hadd_pd(tmp, tmp);\n  return Packet2cd(_mm256_div_pd(num.v, denom));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)\n{\n  return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4cf,4>& kernel) {\n  __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);\n  __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);\n  __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);\n  __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);\n\n  __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);\n  __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);\n  __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);\n  __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);\n\n  kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));\n  kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));\n  kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));\n  kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet2cd,2>& kernel) {\n  __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));\n  kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));\n kernel.packet[0].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(const Packet2cd& a) {\n  return psqrt_complex<Packet2cd>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(const Packet4cf& a) {\n  return psqrt_complex<Packet4cf>(a);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_AVX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATH_FUNCTIONS_AVX_H\n#define EIGEN_MATH_FUNCTIONS_AVX_H\n\n/* The sin and cos functions of this file are loosely derived from\n * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/\n */\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f\npsin<Packet8f>(const Packet8f& _x) {\n  return psin_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f\npcos<Packet8f>(const Packet8f& _x) {\n  return pcos_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f\nplog<Packet8f>(const Packet8f& _x) {\n  return plog_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d\nplog<Packet4d>(const Packet4d& _x) {\n  return plog_double(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f\nplog2<Packet8f>(const Packet8f& _x) {\n  return plog2_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d\nplog2<Packet4d>(const Packet4d& _x) {\n  return plog2_double(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket8f plog1p<Packet8f>(const Packet8f& _x) {\n  return generic_plog1p(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket8f pexpm1<Packet8f>(const Packet8f& _x) {\n  return generic_expm1(_x);\n}\n\n// Exponential function. Works by writing \"x = m*log(2) + r\" where\n// \"m = floor(x/log(2)+1/2)\" and \"r\" is the remainder. The result is then\n// \"exp(x) = 2^m*exp(r)\" where exp(r) is in the range [-1,1).\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f\npexp<Packet8f>(const Packet8f& _x) {\n  return pexp_float(_x);\n}\n\n// Hyperbolic Tangent function.\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f\nptanh<Packet8f>(const Packet8f& _x) {\n  return internal::generic_fast_tanh_float(_x);\n}\n\n// Exponential function for doubles.\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d\npexp<Packet4d>(const Packet4d& _x) {\n  return pexp_double(_x);\n}\n\n// Functions for sqrt.\n// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step\n// of Newton's method, at a cost of 1-2 bits of precision as opposed to the\n// exact solution. It does not handle +inf, or denormalized numbers correctly.\n// The main advantage of this approach is not just speed, but also the fact that\n// it can be inlined and pipelined with other computations, further reducing its\n// effective latency. This is similar to Quake3's fast inverse square root.\n// For detail see here: http://www.beyond3d.com/content/articles/8/\n#if EIGEN_FAST_MATH\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket8f psqrt<Packet8f>(const Packet8f& _x) {\n  Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));\n  Packet8f denormal_mask = pandnot(\n      pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),\n      pcmp_lt(_x, pzero(_x)));\n\n  // Compute approximate reciprocal sqrt.\n  Packet8f x = _mm256_rsqrt_ps(_x);\n  // Do a single step of Newton's iteration.\n  x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet8f>(1.5f)));\n  // Flush results for denormals to zero.\n  return pandnot(pmul(_x,x), denormal_mask);\n}\n\n#else\n\ntemplate <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket8f psqrt<Packet8f>(const Packet8f& _x) {\n  return _mm256_sqrt_ps(_x);\n}\n\n#endif\n\ntemplate <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4d psqrt<Packet4d>(const Packet4d& _x) {\n  return _mm256_sqrt_pd(_x);\n}\n\n#if EIGEN_FAST_MATH\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket8f prsqrt<Packet8f>(const Packet8f& _x) {\n  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);\n  _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);\n  _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);\n  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);\n\n  Packet8f neg_half = pmul(_x, p8f_minus_half);\n\n  // select only the inverse sqrt of positive normal inputs (denormals are\n  // flushed to zero and cause infs as well).\n  Packet8f lt_min_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);\n  Packet8f inf_mask =  _mm256_cmp_ps(_x, p8f_inf, _CMP_EQ_OQ);\n  Packet8f not_normal_finite_mask = _mm256_or_ps(lt_min_mask, inf_mask);\n\n  // Compute an approximate result using the rsqrt intrinsic.\n  Packet8f y_approx = _mm256_rsqrt_ps(_x);\n\n  // Do a single step of Newton-Raphson iteration to improve the approximation.\n  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).\n  // It is essential to evaluate the inner term like this because forming\n  // y_n^2 may over- or underflow.\n  Packet8f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p8f_one_point_five));\n\n  // Select the result of the Newton-Raphson step for positive normal arguments.\n  // For other arguments, choose the output of the intrinsic. This will\n  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(x) = +inf if\n  // x is zero or a positive denormalized float (equivalent to flushing positive\n  // denormalized inputs to zero).\n  return pselect<Packet8f>(not_normal_finite_mask, y_approx, y_newton);\n}\n\n#else\ntemplate <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket8f prsqrt<Packet8f>(const Packet8f& _x) {\n  _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);\n  return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));\n}\n#endif\n\ntemplate <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4d prsqrt<Packet4d>(const Packet4d& _x) {\n  _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);\n  return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));\n}\n\nF16_PACKET_FUNCTION(Packet8f, Packet8h, psin)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, plog)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)\nF16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {\n  Packet8f fexponent;\n  const Packet8h out = float2half(pfrexp<Packet8f>(half2float(a), fexponent));\n  exponent = float2half(fexponent);\n  return out;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent) {\n  return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));\n}\n\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)\nBF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {\n  Packet8f fexponent;\n  const Packet8bf out = F32ToBf16(pfrexp<Packet8f>(Bf16ToF32(a), fexponent));\n  exponent = F32ToBf16(fexponent);\n  return out;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& exponent) {\n  return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_MATH_FUNCTIONS_AVX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_AVX_H\n#define EIGEN_PACKET_MATH_AVX_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n#if !defined(EIGEN_VECTORIZE_AVX512) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16\n#endif\n\n#ifdef EIGEN_VECTORIZE_FMA\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n#endif\n\ntypedef __m256  Packet8f;\ntypedef __m256i Packet8i;\ntypedef __m256d Packet4d;\ntypedef eigen_packet_wrapper<__m128i, 2> Packet8h;\ntypedef eigen_packet_wrapper<__m128i, 3> Packet8bf;\n\ntemplate<> struct is_arithmetic<__m256>  { enum { value = true }; };\ntemplate<> struct is_arithmetic<__m256i> { enum { value = true }; };\ntemplate<> struct is_arithmetic<__m256d> { enum { value = true }; };\ntemplate<> struct is_arithmetic<Packet8h> { enum { value = true }; };\ntemplate<> struct is_arithmetic<Packet8bf> { enum { value = true }; };\n\n#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \\\n  const Packet8f p8f_##NAME = pset1<Packet8f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \\\n  const Packet4d p4d_##NAME = pset1<Packet4d>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \\\n  const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))\n\n#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \\\n  const Packet8i p8i_##NAME = pset1<Packet8i>(X)\n\n// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going\n// to leverage AVX512 instructions.\n#ifndef EIGEN_VECTORIZE_AVX512\ntemplate<> struct packet_traits<float>  : default_packet_traits\n{\n  typedef Packet8f type;\n  typedef Packet4f half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 1,\n\n    HasCmp  = 1,\n    HasDiv = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasLog = 1,\n    HasLog1p = 1,\n    HasExpm1 = 1,\n    HasExp = 1,\n    HasNdtri = 1,\n    HasBessel = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n    HasBlend = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1\n  };\n};\ntemplate<> struct packet_traits<double> : default_packet_traits\n{\n  typedef Packet4d type;\n  typedef Packet2d half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=4,\n    HasHalfPacket = 1,\n\n    HasCmp  = 1,\n    HasDiv  = 1,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasBlend = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<Eigen::half> : default_packet_traits {\n  typedef Packet8h type;\n  // There is no half-size packet for Packet8h.\n  typedef Packet8h half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 0,\n\n    HasCmp    = 1,\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasSin    = EIGEN_FAST_MATH,\n    HasCos    = EIGEN_FAST_MATH,\n    HasNegate = 1,\n    HasAbs    = 1,\n    HasAbs2   = 0,\n    HasMin    = 1,\n    HasMax    = 1,\n    HasConj   = 1,\n    HasSetLinear = 0,\n    HasLog    = 1,\n    HasLog1p  = 1,\n    HasExpm1  = 1,\n    HasExp    = 1,\n    HasSqrt   = 1,\n    HasRsqrt  = 1,\n    HasTanh   = EIGEN_FAST_MATH,\n    HasErf    = EIGEN_FAST_MATH,\n    HasBlend  = 0,\n    HasRound  = 1,\n    HasFloor  = 1,\n    HasCeil   = 1,\n    HasRint   = 1,\n    HasBessel = 1,\n    HasNdtri  = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<bfloat16> : default_packet_traits {\n  typedef Packet8bf type;\n  // There is no half-size packet for current Packet8bf.\n  // TODO: support as SSE path.\n  typedef Packet8bf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 0,\n\n    HasCmp = 1,\n    HasAdd = 1,\n    HasSub = 1,\n    HasMul = 1,\n    HasDiv = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasNegate = 1,\n    HasAbs    = 1,\n    HasAbs2   = 0,\n    HasMin    = 1,\n    HasMax    = 1,\n    HasConj   = 1,\n    HasSetLinear = 0,\n    HasLog = 1,\n    HasLog1p  = 1,\n    HasExpm1  = 1,\n    HasExp = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n    HasBlend = 0,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n    HasBessel = 1,\n    HasNdtri  = 1\n  };\n};\n#endif\n\ntemplate<> struct scalar_div_cost<float,true> { enum { value = 14 }; };\ntemplate<> struct scalar_div_cost<double,true> { enum { value = 16 }; };\n\n/* Proper support for integers is only provided by AVX2. In the meantime, we'll\n   use SSE instructions and packets to deal with integers.\ntemplate<> struct packet_traits<int>    : default_packet_traits\n{\n  typedef Packet8i type;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=8\n  };\n};\n*/\n\ntemplate<> struct unpacket_traits<Packet8f> {\n  typedef float     type;\n  typedef Packet4f  half;\n  typedef Packet8i  integer_packet;\n  typedef uint8_t   mask_t;\n  enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true};\n};\ntemplate<> struct unpacket_traits<Packet4d> {\n  typedef double type;\n  typedef Packet2d half;\n  enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet8i> { typedef int    type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false, masked_store_available=false}; };\ntemplate<> struct unpacket_traits<Packet8bf> { typedef bfloat16 type; typedef Packet8bf half; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; };\n\n// Helper function for bit packing snippet of low precision comparison.\n// It packs the flags from 16x16 to 8x16.\nEIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf) {\n  return _mm_packs_epi32(_mm256_extractf128_si256(_mm256_castps_si256(rf), 0),\n                         _mm256_extractf128_si256(_mm256_castps_si256(rf), 1));\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float&  from) { return _mm256_set1_ps(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int&    from) { return _mm256_set1_epi32(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pset1frombits<Packet8f>(unsigned int from) { return _mm256_castsi256_ps(pset1<Packet8i>(from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pset1frombits<Packet4d>(uint64_t from) { return _mm256_castsi256_pd(_mm256_set1_epi64x(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f& /*a*/) { return _mm256_setzero_ps(); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pzero(const Packet4d& /*a*/) { return _mm256_setzero_pd(); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pzero(const Packet8i& /*a*/) { return _mm256_setzero_si256(); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f& /*a*/) { return _mm256_castsi256_ps(_mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i peven_mask(const Packet8i& /*a*/) { return _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d peven_mask(const Packet4d& /*a*/) { return _mm256_castsi256_pd(_mm256_set_epi32(0, 0, -1, -1, 0, 0, -1, -1)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float*  from) { return _mm256_broadcast_ss(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_add_epi32(a,b);\n#else\n  __m128i lo = _mm_add_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));\n  __m128i hi = _mm_add_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_sub_epi32(a,b);\n#else\n  __m128i lo = _mm_sub_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));\n  __m128i hi = _mm_sub_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)\n{\n  return _mm256_sub_ps(_mm256_set1_ps(0.0),a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)\n{\n  return _mm256_sub_pd(_mm256_set1_pd(0.0),a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pmul<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_mullo_epi32(a,b);\n#else\n  const __m128i lo = _mm_mullo_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));\n  const __m128i hi = _mm_mullo_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)\n{ eigen_assert(false && \"packet integer division are not supported by AVX\");\n  return pset1<Packet8i>(0);\n}\n\n#ifdef EIGEN_VECTORIZE_FMA\ntemplate<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {\n#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )\n  // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,\n  //  and even register spilling with clang>=6.0 (bug 1637).\n  // Gcc stupidly generates a vfmadd132ps instruction.\n  // So let's enforce it to generate a vfmadd231ps instruction since the most common use\n  //  case is to accumulate the result of the product.\n  Packet8f res = c;\n  __asm__(\"vfmadd231ps %[a], %[b], %[c]\" : [c] \"+x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  return res;\n#else\n  return _mm256_fmadd_ps(a,b,c);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {\n#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )\n  // see above\n  Packet4d res = c;\n  __asm__(\"vfmadd231pd %[a], %[b], %[c]\" : [c] \"+x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  return res;\n#else\n  return _mm256_fmadd_pd(a,b,c);\n#endif\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcmp_le(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_LE_OQ); }\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcmp_lt(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_LT_OQ); }\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcmp_lt_or_nan(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a, b, _CMP_NGE_UQ); }\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcmp_eq(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_EQ_OQ); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4d pcmp_le(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_LE_OQ); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pcmp_lt(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_LT_OQ); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pcmp_lt_or_nan(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a, b, _CMP_NGE_UQ); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pcmp_eq(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_EQ_OQ); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8i pcmp_eq(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_cmpeq_epi32(a,b);\n#else\n  __m128i lo = _mm_cmpeq_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));\n  __m128i hi = _mm_cmpeq_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // There appears to be a bug in GCC, by which the optimizer may flip\n  // the argument order in calls to _mm_min_ps/_mm_max_ps, so we have to\n  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,\n  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867\n  Packet8f res;\n  asm(\"vminps %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  return res;\n#else\n  // Arguments are swapped to match NaN propagation behavior of std::min.\n  return _mm256_min_ps(b,a);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // See pmin above\n  Packet4d res;\n  asm(\"vminpd %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  return res;\n#else\n  // Arguments are swapped to match NaN propagation behavior of std::min.\n  return _mm256_min_pd(b,a);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // See pmin above\n  Packet8f res;\n  asm(\"vmaxps %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  return res;\n#else\n  // Arguments are swapped to match NaN propagation behavior of std::max.\n  return _mm256_max_ps(b,a);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // See pmin above\n  Packet4d res;\n  asm(\"vmaxpd %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  return res;\n#else\n  // Arguments are swapped to match NaN propagation behavior of std::max.\n  return _mm256_max_pd(b,a);\n#endif\n}\n\n// Add specializations for min/max with prescribed NaN progation.\ntemplate<>\nEIGEN_STRONG_INLINE Packet8f pmin<PropagateNumbers, Packet8f>(const Packet8f& a, const Packet8f& b) {\n  return pminmax_propagate_numbers(a, b, pmin<Packet8f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4d pmin<PropagateNumbers, Packet4d>(const Packet4d& a, const Packet4d& b) {\n  return pminmax_propagate_numbers(a, b, pmin<Packet4d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8f pmax<PropagateNumbers, Packet8f>(const Packet8f& a, const Packet8f& b) {\n  return pminmax_propagate_numbers(a, b, pmax<Packet8f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4d pmax<PropagateNumbers, Packet4d>(const Packet4d& a, const Packet4d& b) {\n  return pminmax_propagate_numbers(a, b, pmax<Packet4d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8f pmin<PropagateNaN, Packet8f>(const Packet8f& a, const Packet8f& b) {\n  return pminmax_propagate_nan(a, b, pmin<Packet8f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4d pmin<PropagateNaN, Packet4d>(const Packet4d& a, const Packet4d& b) {\n  return pminmax_propagate_nan(a, b, pmin<Packet4d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8f pmax<PropagateNaN, Packet8f>(const Packet8f& a, const Packet8f& b) {\n  return pminmax_propagate_nan(a, b, pmax<Packet8f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4d pmax<PropagateNaN, Packet4d>(const Packet4d& a, const Packet4d& b) {\n  return pminmax_propagate_nan(a, b, pmax<Packet4d>);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f print<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d print<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8i ptrue<Packet8i>(const Packet8i& a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  // vpcmpeqd has lower latency than the more general vcmpps\n  return _mm256_cmpeq_epi32(a,a);\n#else\n  const __m256 b = _mm256_castsi256_ps(a);\n  return _mm256_castps_si256(_mm256_cmp_ps(b,b,_CMP_TRUE_UQ));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f ptrue<Packet8f>(const Packet8f& a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  // vpcmpeqd has lower latency than the more general vcmpps\n  const __m256i b = _mm256_castps_si256(a);\n  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(b,b));\n#else\n  return _mm256_cmp_ps(a,a,_CMP_TRUE_UQ);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4d ptrue<Packet4d>(const Packet4d& a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  // vpcmpeqq has lower latency than the more general vcmppd\n  const __m256i b = _mm256_castpd_si256(a);\n  return _mm256_castsi256_pd(_mm256_cmpeq_epi64(b,b));\n#else\n  return _mm256_cmp_pd(a,a,_CMP_TRUE_UQ);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_and_si256(a,b);\n#else\n  return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a),_mm256_castsi256_ps(b)));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_or_si256(a,b);\n#else\n  return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a),_mm256_castsi256_ps(b)));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_xor_si256(a,b);\n#else\n  return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a),_mm256_castsi256_ps(b)));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(b,a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(b,a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_andnot_si256(b,a);\n#else\n  return _mm256_castps_si256(_mm256_andnot_ps(_mm256_castsi256_ps(b),_mm256_castsi256_ps(a)));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a)\n{\n  const Packet8f mask = pset1frombits<Packet8f>(static_cast<numext::uint32_t>(0x80000000u));\n  const Packet8f prev0dot5 = pset1frombits<Packet8f>(static_cast<numext::uint32_t>(0x3EFFFFFFu));\n  return _mm256_round_ps(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a)\n{\n  const Packet4d mask = pset1frombits<Packet4d>(static_cast<numext::uint64_t>(0x8000000000000000ull));\n  const Packet4d prev0dot5 = pset1frombits<Packet4d>(static_cast<numext::uint64_t>(0x3FDFFFFFFFFFFFFFull));\n  return _mm256_round_pd(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pselect<Packet8f>(const Packet8f& mask, const Packet8f& a, const Packet8f& b)\n{ return _mm256_blendv_ps(b,a,mask); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pselect<Packet4d>(const Packet4d& mask, const Packet4d& a, const Packet4d& b)\n{ return _mm256_blendv_pd(b,a,mask); }\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet8i parithmetic_shift_right(Packet8i a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_srai_epi32(a, N);\n#else\n  __m128i lo = _mm_srai_epi32(_mm256_extractf128_si256(a, 0), N);\n  __m128i hi = _mm_srai_epi32(_mm256_extractf128_si256(a, 1), N);\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet8i plogical_shift_right(Packet8i a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_srli_epi32(a, N);\n#else\n  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(a, 0), N);\n  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(a, 1), N);\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet8i plogical_shift_left(Packet8i a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  return _mm256_slli_epi32(a, N);\n#else\n  __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(a, 0), N);\n  __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(a, 1), N);\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float*   from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double*  from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int*     from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {\n  Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));\n  const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);\n  mask = por<Packet8i>(mask, bit_mask);\n  mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));\n  EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskload_ps(from, mask);\n}\n\n// Loads 4 floats from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3, a3}\ntemplate<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)\n{\n  // TODO try to find a way to avoid the need of a temporary register\n//   Packet8f tmp  = _mm256_castps128_ps256(_mm_loadu_ps(from));\n//   tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);\n//   return _mm256_unpacklo_ps(tmp,tmp);\n\n  // _mm256_insertf128_ps is very slow on Haswell, thus:\n  Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);\n  // mimic an \"inplace\" permutation of the lower 128bits using a blend\n  tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);\n  // then we can perform a consistent permutation on the global register to get everything in shape:\n  return  _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));\n}\n// Loads 2 doubles from memory a returns the packet {a0, a0  a1, a1}\ntemplate<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)\n{\n  Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);\n  return  _mm256_permute_pd(tmp, 3<<2);\n}\n\n// Loads 2 floats from memory a returns the packet {a0, a0  a0, a0, a1, a1, a1, a1}\ntemplate<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)\n{\n  Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));\n  return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet8f& from, uint8_t umask) {\n  Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));\n  const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);\n  mask = por<Packet8i>(mask, bit_mask);\n  mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));\n  EIGEN_DEBUG_UNALIGNED_STORE return _mm256_maskstore_ps(to, mask, from);\n}\n\n// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available\n// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);\ntemplate<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)\n{\n  return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],\n                       from[3*stride], from[2*stride], from[1*stride], from[0*stride]);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)\n{\n  return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)\n{\n  __m128 low = _mm256_extractf128_ps(from, 0);\n  to[stride*0] = _mm_cvtss_f32(low);\n  to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));\n  to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));\n  to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));\n\n  __m128 high = _mm256_extractf128_ps(from, 1);\n  to[stride*4] = _mm_cvtss_f32(high);\n  to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));\n  to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));\n  to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)\n{\n  __m128d low = _mm256_extractf128_pd(from, 0);\n  to[stride*0] = _mm_cvtsd_f64(low);\n  to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));\n  __m128d high = _mm256_extractf128_pd(from, 1);\n  to[stride*2] = _mm_cvtsd_f64(high);\n  to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)\n{\n  Packet8f pa = pset1<Packet8f>(a);\n  pstore(to, pa);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)\n{\n  Packet4d pa = pset1<Packet4d>(a);\n  pstore(to, pa);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)\n{\n  Packet8i pa = pset1<Packet8i>(a);\n  pstore(to, pa);\n}\n\n#ifndef EIGEN_VECTORIZE_AVX512\ntemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE float  pfirst<Packet8f>(const Packet8f& a) {\n  return _mm_cvtss_f32(_mm256_castps256_ps128(a));\n}\ntemplate<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {\n  return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));\n}\ntemplate<> EIGEN_STRONG_INLINE int    pfirst<Packet8i>(const Packet8i& a) {\n  return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)\n{\n  __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);\n  return _mm256_permute2f128_ps(tmp, tmp, 1);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)\n{\n   __m256d tmp = _mm256_shuffle_pd(a,a,5);\n  return _mm256_permute2f128_pd(tmp, tmp, 1);\n  #if 0\n  // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd\n  // exhibit the same latency/throughput, but it is here for future reference/benchmarking...\n  __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);\n    return _mm256_permute_pd(swap_halves,5);\n  #endif\n}\n\n// pabs should be ok\ntemplate<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)\n{\n  const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));\n  return _mm256_and_ps(a,mask);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)\n{\n  const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));\n  return _mm256_and_pd(a,mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Packet8f& exponent) {\n  return pfrexp_generic(a,exponent);\n}\n\n// Extract exponent without existence of Packet4l.\ntemplate<>\nEIGEN_STRONG_INLINE  \nPacket4d pfrexp_generic_get_biased_exponent(const Packet4d& a) {\n  const Packet4d cst_exp_mask  = pset1frombits<Packet4d>(static_cast<uint64_t>(0x7ff0000000000000ull));\n  __m256i a_expo = _mm256_castpd_si256(pand(a, cst_exp_mask));\n#ifdef EIGEN_VECTORIZE_AVX2\n  a_expo = _mm256_srli_epi64(a_expo, 52);\n  __m128i lo = _mm256_extractf128_si256(a_expo, 0);\n  __m128i hi = _mm256_extractf128_si256(a_expo, 1);\n#else\n  __m128i lo = _mm256_extractf128_si256(a_expo, 0);\n  __m128i hi = _mm256_extractf128_si256(a_expo, 1);\n  lo = _mm_srli_epi64(lo, 52);\n  hi = _mm_srli_epi64(hi, 52);\n#endif\n  Packet2d exponent_lo = _mm_cvtepi32_pd(vec4i_swizzle1(lo, 0, 2, 1, 3));\n  Packet2d exponent_hi = _mm_cvtepi32_pd(vec4i_swizzle1(hi, 0, 2, 1, 3));\n  Packet4d exponent = _mm256_insertf128_pd(_mm256_setzero_pd(), exponent_lo, 0);\n  exponent = _mm256_insertf128_pd(exponent, exponent_hi, 1);\n  return exponent;\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4d pfrexp<Packet4d>(const Packet4d& a, Packet4d& exponent) {\n  return pfrexp_generic(a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pldexp<Packet8f>(const Packet8f& a, const Packet8f& exponent) {\n  return pldexp_generic(a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4d pldexp<Packet4d>(const Packet4d& a, const Packet4d& exponent) {\n  // Clamp exponent to [-2099, 2099]\n  const Packet4d max_exponent = pset1<Packet4d>(2099.0);\n  const Packet4i e = _mm256_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));\n  \n  // Split 2^e into four factors and multiply.\n  const Packet4i bias = pset1<Packet4i>(1023);\n  Packet4i b = parithmetic_shift_right<2>(e);  // floor(e/4)\n  \n  // 2^b\n  Packet4i hi = vec4i_swizzle1(padd(b, bias), 0, 2, 1, 3);\n  Packet4i lo = _mm_slli_epi64(hi, 52);\n  hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);\n  Packet4d c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));\n  Packet4d out = pmul(pmul(pmul(a, c), c), c);  // a * 2^(3b)\n  \n  // 2^(e - 3b)\n  b = psub(psub(psub(e, b), b), b);  // e - 3b\n  hi = vec4i_swizzle1(padd(b, bias), 0, 2, 1, 3);\n  lo = _mm_slli_epi64(hi, 52);\n  hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);\n  c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));\n  out = pmul(out, c); // a * 2^e\n  return out;\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)\n{\n  return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));\n}\ntemplate<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)\n{\n  return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f predux_half_dowto4<Packet8f>(const Packet8f& a)\n{\n  return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)\n{\n  Packet8f tmp;\n  tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));\n  tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));\n  return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));\n}\ntemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)\n{\n  Packet4d tmp;\n  tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));\n  return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)\n{\n  Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));\n  tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));\n  return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));\n}\ntemplate<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)\n{\n  Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));\n  return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)\n{\n  Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));\n  tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));\n  return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)\n{\n  Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));\n  return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));\n}\n\n// not needed yet\n// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet8f& x)\n// {\n//   return _mm256_movemask_ps(x)==0xFF;\n// }\n\ntemplate<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)\n{\n  return _mm256_movemask_ps(x)!=0;\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8f,8>& kernel) {\n  __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);\n  __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);\n  __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);\n  __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);\n  __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);\n  __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);\n  __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);\n  __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);\n  __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));\n  __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));\n  __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));\n  __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));\n  __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));\n  __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));\n  __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));\n  __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));\n  kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);\n  kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);\n  kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);\n  kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);\n  kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);\n  kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);\n  kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);\n  kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8f,4>& kernel) {\n  __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);\n  __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);\n  __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);\n  __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);\n\n  __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));\n  __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));\n  __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));\n  __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));\n\n  kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);\n  kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);\n  kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);\n  kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4d,4>& kernel) {\n  __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);\n  __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);\n  __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);\n  __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);\n\n  kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);\n  kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);\n  kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);\n  kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {\n  const __m256 zero = _mm256_setzero_ps();\n  const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);\n  __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);\n  return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {\n  const __m256d zero = _mm256_setzero_pd();\n  const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);\n  __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);\n  return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);\n}\n\n// Packet math for Eigen::half\n\ntemplate<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {\n  return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet8h>(const Packet8h& from) {\n  return numext::bit_cast<Eigen::half>(static_cast<numext::uint16_t>(_mm_extract_epi16(from, 0)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pload<Packet8h>(const Eigen::half* from) {\n  return _mm_load_si128(reinterpret_cast<const __m128i*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h ploadu<Packet8h>(const Eigen::half* from) {\n  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet8h& from) {\n  _mm_store_si128(reinterpret_cast<__m128i*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet8h& from) {\n  _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h\nploaddup<Packet8h>(const Eigen::half*  from) {\n  const numext::uint16_t a = numext::bit_cast<numext::uint16_t>(from[0]);\n  const numext::uint16_t b = numext::bit_cast<numext::uint16_t>(from[1]);\n  const numext::uint16_t c = numext::bit_cast<numext::uint16_t>(from[2]);\n  const numext::uint16_t d = numext::bit_cast<numext::uint16_t>(from[3]);\n  return _mm_set_epi16(d, d, c, c, b, b, a, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h\nploadquad<Packet8h>(const Eigen::half* from) {\n  const numext::uint16_t a = numext::bit_cast<numext::uint16_t>(from[0]);\n  const numext::uint16_t b = numext::bit_cast<numext::uint16_t>(from[1]);\n  return _mm_set_epi16(b, b, b, b, a, a, a, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h& a) {\n return _mm_cmpeq_epi32(a, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h pabs(const Packet8h& a) {\n  const __m128i sign_mask = _mm_set1_epi16(static_cast<numext::uint16_t>(0x8000));\n  return _mm_andnot_si128(sign_mask, a);\n}\n\nEIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) {\n#ifdef EIGEN_HAS_FP16_C\n  return _mm256_cvtph_ps(a);\n#else\n  EIGEN_ALIGN32 Eigen::half aux[8];\n  pstore(aux, a);\n  float f0(aux[0]);\n  float f1(aux[1]);\n  float f2(aux[2]);\n  float f3(aux[3]);\n  float f4(aux[4]);\n  float f5(aux[5]);\n  float f6(aux[6]);\n  float f7(aux[7]);\n\n  return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0);\n#endif\n}\n\nEIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {\n#ifdef EIGEN_HAS_FP16_C\n  return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);\n#else\n  EIGEN_ALIGN32 float aux[8];\n  pstore(aux, a);\n  const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[0]));\n  const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[1]));\n  const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[2]));\n  const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[3]));\n  const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[4]));\n  const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[5]));\n  const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[6]));\n  const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(Eigen::half(aux[7]));\n  return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h pmin<Packet8h>(const Packet8h& a,\n                                            const Packet8h& b) {\n  return float2half(pmin<Packet8f>(half2float(a), half2float(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h pmax<Packet8h>(const Packet8h& a,\n                                            const Packet8h& b) {\n  return float2half(pmax<Packet8f>(half2float(a), half2float(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h plset<Packet8h>(const half& a) {\n  return float2half(plset<Packet8f>(static_cast<float>(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h por(const Packet8h& a,const Packet8h& b) {\n  // in some cases Packet4i is a wrapper around __m128i, so we either need to\n  // cast to Packet4i to directly call the intrinsics as below:\n  return _mm_or_si128(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h& a,const Packet8h& b) {\n  return _mm_xor_si128(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8h pand(const Packet8h& a,const Packet8h& b) {\n  return _mm_and_si128(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h& a,const Packet8h& b) {\n  return _mm_andnot_si128(b,a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pselect(const Packet8h& mask, const Packet8h& a, const Packet8h& b) {\n  return _mm_blendv_epi8(b, a, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pround<Packet8h>(const Packet8h& a) {\n  return float2half(pround<Packet8f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h print<Packet8h>(const Packet8h& a) {\n  return float2half(print<Packet8f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pceil<Packet8h>(const Packet8h& a) {\n  return float2half(pceil<Packet8f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pfloor<Packet8h>(const Packet8h& a) {\n  return float2half(pfloor<Packet8f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pcmp_eq(const Packet8h& a,const Packet8h& b) {\n  return Pack16To8(pcmp_eq(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pcmp_le(const Packet8h& a,const Packet8h& b) {\n  return Pack16To8(pcmp_le(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pcmp_lt(const Packet8h& a,const Packet8h& b) {\n  return Pack16To8(pcmp_lt(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pcmp_lt_or_nan(const Packet8h& a,const Packet8h& b) {\n  return Pack16To8(pcmp_lt_or_nan(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pnegate(const Packet8h& a) {\n  Packet8h sign_mask = _mm_set1_epi16(static_cast<numext::uint16_t>(0x8000));\n  return _mm_xor_si128(a, sign_mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {\n  Packet8f af = half2float(a);\n  Packet8f bf = half2float(b);\n  Packet8f rf = padd(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {\n  Packet8f af = half2float(a);\n  Packet8f bf = half2float(b);\n  Packet8f rf = psub(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {\n  Packet8f af = half2float(a);\n  Packet8f bf = half2float(b);\n  Packet8f rf = pmul(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {\n  Packet8f af = half2float(a);\n  Packet8f bf = half2float(b);\n  Packet8f rf = pdiv(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)\n{\n  const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(from[0*stride]);\n  const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(from[1*stride]);\n  const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(from[2*stride]);\n  const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(from[3*stride]);\n  const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(from[4*stride]);\n  const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(from[5*stride]);\n  const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(from[6*stride]);\n  const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(from[7*stride]);\n  return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half* to, const Packet8h& from, Index stride)\n{\n  EIGEN_ALIGN32 Eigen::half aux[8];\n  pstore(aux, from);\n  to[stride*0] = aux[0];\n  to[stride*1] = aux[1];\n  to[stride*2] = aux[2];\n  to[stride*3] = aux[3];\n  to[stride*4] = aux[4];\n  to[stride*5] = aux[5];\n  to[stride*6] = aux[6];\n  to[stride*7] = aux[7];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {\n  Packet8f af = half2float(a);\n  float reduced = predux<Packet8f>(af);\n  return Eigen::half(reduced);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {\n  Packet8f af = half2float(a);\n  float reduced = predux_max<Packet8f>(af);\n  return Eigen::half(reduced);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) {\n  Packet8f af = half2float(a);\n  float reduced = predux_min<Packet8f>(af);\n  return Eigen::half(reduced);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) {\n  Packet8f af = half2float(a);\n  float reduced = predux_mul<Packet8f>(af);\n  return Eigen::half(reduced);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a)\n{\n  __m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);\n  return _mm_shuffle_epi8(a,m);\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet8h,8>& kernel) {\n  __m128i a = kernel.packet[0];\n  __m128i b = kernel.packet[1];\n  __m128i c = kernel.packet[2];\n  __m128i d = kernel.packet[3];\n  __m128i e = kernel.packet[4];\n  __m128i f = kernel.packet[5];\n  __m128i g = kernel.packet[6];\n  __m128i h = kernel.packet[7];\n\n  __m128i a03b03 = _mm_unpacklo_epi16(a, b);\n  __m128i c03d03 = _mm_unpacklo_epi16(c, d);\n  __m128i e03f03 = _mm_unpacklo_epi16(e, f);\n  __m128i g03h03 = _mm_unpacklo_epi16(g, h);\n  __m128i a47b47 = _mm_unpackhi_epi16(a, b);\n  __m128i c47d47 = _mm_unpackhi_epi16(c, d);\n  __m128i e47f47 = _mm_unpackhi_epi16(e, f);\n  __m128i g47h47 = _mm_unpackhi_epi16(g, h);\n\n  __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);\n  __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);\n  __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);\n  __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);\n  __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);\n  __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);\n  __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);\n  __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);\n\n  __m128i a0b0c0d0e0f0g0h0 = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);\n  __m128i a1b1c1d1e1f1g1h1 = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);\n  __m128i a2b2c2d2e2f2g2h2 = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);\n  __m128i a3b3c3d3e3f3g3h3 = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);\n  __m128i a4b4c4d4e4f4g4h4 = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);\n  __m128i a5b5c5d5e5f5g5h5 = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);\n  __m128i a6b6c6d6e6f6g6h6 = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);\n  __m128i a7b7c7d7e7f7g7h7 = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);\n\n  kernel.packet[0] = a0b0c0d0e0f0g0h0;\n  kernel.packet[1] = a1b1c1d1e1f1g1h1;\n  kernel.packet[2] = a2b2c2d2e2f2g2h2;\n  kernel.packet[3] = a3b3c3d3e3f3g3h3;\n  kernel.packet[4] = a4b4c4d4e4f4g4h4;\n  kernel.packet[5] = a5b5c5d5e5f5g5h5;\n  kernel.packet[6] = a6b6c6d6e6f6g6h6;\n  kernel.packet[7] = a7b7c7d7e7f7g7h7;\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet8h,4>& kernel) {\n  EIGEN_ALIGN32 Eigen::half in[4][8];\n  pstore<Eigen::half>(in[0], kernel.packet[0]);\n  pstore<Eigen::half>(in[1], kernel.packet[1]);\n  pstore<Eigen::half>(in[2], kernel.packet[2]);\n  pstore<Eigen::half>(in[3], kernel.packet[3]);\n\n  EIGEN_ALIGN32 Eigen::half out[4][8];\n\n  for (int i = 0; i < 4; ++i) {\n    for (int j = 0; j < 4; ++j) {\n      out[i][j] = in[j][2*i];\n    }\n    for (int j = 0; j < 4; ++j) {\n      out[i][j+4] = in[j][2*i+1];\n    }\n  }\n\n  kernel.packet[0] = pload<Packet8h>(out[0]);\n  kernel.packet[1] = pload<Packet8h>(out[1]);\n  kernel.packet[2] = pload<Packet8h>(out[2]);\n  kernel.packet[3] = pload<Packet8h>(out[3]);\n}\n\n// BFloat16 implementation.\n\nEIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf& a) {\n#ifdef EIGEN_VECTORIZE_AVX2\n  __m256i extend = _mm256_cvtepu16_epi32(a);\n  return _mm256_castsi256_ps(_mm256_slli_epi32(extend, 16));\n#else\n  __m128i lo = _mm_cvtepu16_epi32(a);\n  __m128i hi = _mm_cvtepu16_epi32(_mm_srli_si128(a, 8));\n  __m128i lo_shift = _mm_slli_epi32(lo, 16);\n  __m128i hi_shift = _mm_slli_epi32(hi, 16);\n  return _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo_shift), hi_shift, 1));\n#endif\n}\n\n// Convert float to bfloat16 according to round-to-nearest-even/denormals algorithm.\nEIGEN_STRONG_INLINE Packet8bf F32ToBf16(const Packet8f& a) {\n  Packet8bf r;\n\n  __m256i input = _mm256_castps_si256(a);\n\n#ifdef EIGEN_VECTORIZE_AVX2\n  // uint32_t lsb = (input >> 16);\n  __m256i t = _mm256_srli_epi32(input, 16);\n  // uint32_t lsb = lsb & 1;\n  t = _mm256_and_si256(t, _mm256_set1_epi32(1));\n  // uint32_t rounding_bias = 0x7fff + lsb;\n  t = _mm256_add_epi32(t, _mm256_set1_epi32(0x7fff));\n  // input += rounding_bias;\n  t = _mm256_add_epi32(t, input);\n  // input = input >> 16;\n  t = _mm256_srli_epi32(t, 16);\n  // Check NaN before converting back to bf16\n  __m256 mask = _mm256_cmp_ps(a, a, _CMP_ORD_Q);\n  __m256i nan = _mm256_set1_epi32(0x7fc0);\n  t = _mm256_blendv_epi8(nan, t, _mm256_castps_si256(mask));\n  // output = numext::bit_cast<uint16_t>(input);\n  return _mm_packus_epi32(_mm256_extractf128_si256(t, 0),\n                         _mm256_extractf128_si256(t, 1));\n#else\n  // uint32_t lsb = (input >> 16);\n  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(input, 0), 16);\n  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(input, 1), 16);\n  // uint32_t lsb = lsb & 1;\n  lo = _mm_and_si128(lo, _mm_set1_epi32(1));\n  hi = _mm_and_si128(hi, _mm_set1_epi32(1));\n  // uint32_t rounding_bias = 0x7fff + lsb;\n  lo = _mm_add_epi32(lo, _mm_set1_epi32(0x7fff));\n  hi = _mm_add_epi32(hi, _mm_set1_epi32(0x7fff));\n  // input += rounding_bias;\n  lo = _mm_add_epi32(lo, _mm256_extractf128_si256(input, 0));\n  hi = _mm_add_epi32(hi, _mm256_extractf128_si256(input, 1));\n  // input = input >> 16;\n  lo = _mm_srli_epi32(lo, 16);\n  hi = _mm_srli_epi32(hi, 16);\n  // Check NaN before converting back to bf16\n  __m256 mask = _mm256_cmp_ps(a, a, _CMP_ORD_Q);\n  __m128i nan = _mm_set1_epi32(0x7fc0);\n  lo = _mm_blendv_epi8(nan, lo, _mm_castps_si128(_mm256_castps256_ps128(mask)));\n  hi = _mm_blendv_epi8(nan, hi, _mm_castps_si128(_mm256_extractf128_ps(mask, 1)));\n  // output = numext::bit_cast<uint16_t>(input);\n  return _mm_packus_epi32(lo, hi);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pset1<Packet8bf>(const bfloat16& from) {\n  return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 pfirst<Packet8bf>(const Packet8bf& from) {\n  return numext::bit_cast<bfloat16>(static_cast<numext::uint16_t>(_mm_extract_epi16(from, 0)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pload<Packet8bf>(const bfloat16* from) {\n  return _mm_load_si128(reinterpret_cast<const __m128i*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf ploadu<Packet8bf>(const bfloat16* from) {\n  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to, const Packet8bf& from) {\n  _mm_store_si128(reinterpret_cast<__m128i*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16* to, const Packet8bf& from) {\n  _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf\nploaddup<Packet8bf>(const bfloat16* from) {\n  const numext::uint16_t a = numext::bit_cast<numext::uint16_t>(from[0]);\n  const numext::uint16_t b = numext::bit_cast<numext::uint16_t>(from[1]);\n  const numext::uint16_t c = numext::bit_cast<numext::uint16_t>(from[2]);\n  const numext::uint16_t d = numext::bit_cast<numext::uint16_t>(from[3]);\n  return _mm_set_epi16(d, d, c, c, b, b, a, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf\nploadquad<Packet8bf>(const bfloat16* from) {\n  const numext::uint16_t a = numext::bit_cast<numext::uint16_t>(from[0]);\n  const numext::uint16_t b = numext::bit_cast<numext::uint16_t>(from[1]);\n  return _mm_set_epi16(b, b, b, b, a, a, a, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf ptrue(const Packet8bf& a) {\n return _mm_cmpeq_epi32(a, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf pabs(const Packet8bf& a) {\n  const __m128i sign_mask = _mm_set1_epi16(static_cast<numext::uint16_t>(0x8000));\n  return _mm_andnot_si128(sign_mask, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf pmin<Packet8bf>(const Packet8bf& a,\n                                                const Packet8bf& b) {\n  return F32ToBf16(pmin<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf pmax<Packet8bf>(const Packet8bf& a,\n                                                const Packet8bf& b) {\n  return F32ToBf16(pmax<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf plset<Packet8bf>(const bfloat16& a) {\n  return F32ToBf16(plset<Packet8f>(static_cast<float>(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf por(const Packet8bf& a,const Packet8bf& b) {\n  return _mm_or_si128(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pxor(const Packet8bf& a,const Packet8bf& b) {\n  return _mm_xor_si128(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pand(const Packet8bf& a,const Packet8bf& b) {\n  return _mm_and_si128(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pandnot(const Packet8bf& a,const Packet8bf& b) {\n  return _mm_andnot_si128(b,a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pselect(const Packet8bf& mask, const Packet8bf& a, const Packet8bf& b) {\n  return _mm_blendv_epi8(b, a, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf>(const Packet8bf& a)\n{\n  return F32ToBf16(pround<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf>(const Packet8bf& a) {\n  return F32ToBf16(print<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pceil<Packet8bf>(const Packet8bf& a) {\n  return F32ToBf16(pceil<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pfloor<Packet8bf>(const Packet8bf& a) {\n  return F32ToBf16(pfloor<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_eq(const Packet8bf& a,const Packet8bf& b) {\n  return Pack16To8(pcmp_eq(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_le(const Packet8bf& a,const Packet8bf& b) {\n  return Pack16To8(pcmp_le(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_lt(const Packet8bf& a,const Packet8bf& b) {\n  return Pack16To8(pcmp_lt(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_lt_or_nan(const Packet8bf& a,const Packet8bf& b) {\n  return Pack16To8(pcmp_lt_or_nan(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pconj(const Packet8bf& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pnegate(const Packet8bf& a) {\n  Packet8bf sign_mask = _mm_set1_epi16(static_cast<numext::uint16_t>(0x8000));\n  return _mm_xor_si128(a, sign_mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf padd<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  return F32ToBf16(padd<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  return F32ToBf16(psub<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pmul<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  return F32ToBf16(pmul<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pdiv<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  return F32ToBf16(pdiv<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pgather<bfloat16, Packet8bf>(const bfloat16* from, Index stride)\n{\n  const numext::uint16_t s0 = numext::bit_cast<numext::uint16_t>(from[0*stride]);\n  const numext::uint16_t s1 = numext::bit_cast<numext::uint16_t>(from[1*stride]);\n  const numext::uint16_t s2 = numext::bit_cast<numext::uint16_t>(from[2*stride]);\n  const numext::uint16_t s3 = numext::bit_cast<numext::uint16_t>(from[3*stride]);\n  const numext::uint16_t s4 = numext::bit_cast<numext::uint16_t>(from[4*stride]);\n  const numext::uint16_t s5 = numext::bit_cast<numext::uint16_t>(from[5*stride]);\n  const numext::uint16_t s6 = numext::bit_cast<numext::uint16_t>(from[6*stride]);\n  const numext::uint16_t s7 = numext::bit_cast<numext::uint16_t>(from[7*stride]);\n  return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pscatter<bfloat16, Packet8bf>(bfloat16* to, const Packet8bf& from, Index stride)\n{\n  EIGEN_ALIGN32 bfloat16 aux[8];\n  pstore(aux, from);\n  to[stride*0] = aux[0];\n  to[stride*1] = aux[1];\n  to[stride*2] = aux[2];\n  to[stride*3] = aux[3];\n  to[stride*4] = aux[4];\n  to[stride*5] = aux[5];\n  to[stride*6] = aux[6];\n  to[stride*7] = aux[7];\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux<Packet8bf>(const Packet8bf& a) {\n  return static_cast<bfloat16>(predux<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_max<Packet8bf>(const Packet8bf& a) {\n  return static_cast<bfloat16>(predux_max<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_min<Packet8bf>(const Packet8bf& a) {\n  return static_cast<bfloat16>(predux_min<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet8bf>(const Packet8bf& a) {\n  return static_cast<bfloat16>(predux_mul<Packet8f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf preverse(const Packet8bf& a)\n{\n  __m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);\n  return _mm_shuffle_epi8(a,m);\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet8bf,8>& kernel) {\n  __m128i a = kernel.packet[0];\n  __m128i b = kernel.packet[1];\n  __m128i c = kernel.packet[2];\n  __m128i d = kernel.packet[3];\n  __m128i e = kernel.packet[4];\n  __m128i f = kernel.packet[5];\n  __m128i g = kernel.packet[6];\n  __m128i h = kernel.packet[7];\n\n  __m128i a03b03 = _mm_unpacklo_epi16(a, b);\n  __m128i c03d03 = _mm_unpacklo_epi16(c, d);\n  __m128i e03f03 = _mm_unpacklo_epi16(e, f);\n  __m128i g03h03 = _mm_unpacklo_epi16(g, h);\n  __m128i a47b47 = _mm_unpackhi_epi16(a, b);\n  __m128i c47d47 = _mm_unpackhi_epi16(c, d);\n  __m128i e47f47 = _mm_unpackhi_epi16(e, f);\n  __m128i g47h47 = _mm_unpackhi_epi16(g, h);\n\n  __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);\n  __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);\n  __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);\n  __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);\n  __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);\n  __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);\n  __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);\n  __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);\n\n  kernel.packet[0] = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);\n  kernel.packet[1] = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);\n  kernel.packet[2] = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);\n  kernel.packet[3] = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);\n  kernel.packet[4] = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);\n  kernel.packet[5] = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);\n  kernel.packet[6] = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);\n  kernel.packet[7] = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet8bf,4>& kernel) {\n  __m128i a = kernel.packet[0];\n  __m128i b = kernel.packet[1];\n  __m128i c = kernel.packet[2];\n  __m128i d = kernel.packet[3];\n\n  __m128i ab_03 = _mm_unpacklo_epi16(a, b);\n  __m128i cd_03 = _mm_unpacklo_epi16(c, d);\n  __m128i ab_47 = _mm_unpackhi_epi16(a, b);\n  __m128i cd_47 = _mm_unpackhi_epi16(c, d);\n\n  kernel.packet[0] = _mm_unpacklo_epi32(ab_03, cd_03);\n  kernel.packet[1] = _mm_unpackhi_epi32(ab_03, cd_03);\n  kernel.packet[2] = _mm_unpacklo_epi32(ab_47, cd_47);\n  kernel.packet[3] = _mm_unpackhi_epi32(ab_47, cd_47);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PACKET_MATH_AVX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TYPE_CASTING_AVX_H\n#define EIGEN_TYPE_CASTING_AVX_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// For now we use SSE to handle integers, so we can't use AVX instructions to cast\n// from int to float\ntemplate <>\nstruct type_casting_traits<float, int> {\n  enum {\n    VectorizedCast = 0,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<int, float> {\n  enum {\n    VectorizedCast = 0,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\n\n#ifndef EIGEN_VECTORIZE_AVX512\n\ntemplate <>\nstruct type_casting_traits<Eigen::half, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\n\ntemplate <>\nstruct type_casting_traits<float, Eigen::half> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<bfloat16, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<float, bfloat16> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\n#endif  // EIGEN_VECTORIZE_AVX512\n\ntemplate<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {\n  return _mm256_cvttps_epi32(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {\n  return _mm256_cvtepi32_ps(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) {\n  return _mm256_castps_si256(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) {\n  return _mm256_castsi256_ps(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {\n  return half2float(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {\n  return Bf16ToF32(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {\n  return float2half(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet8f, Packet8bf>(const Packet8f& a) {\n  return F32ToBf16(a);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TYPE_CASTING_AVX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX512/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2018 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_AVX512_H\n#define EIGEN_COMPLEX_AVX512_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n//---------- float ----------\nstruct Packet8cf\n{\n  EIGEN_STRONG_INLINE Packet8cf() {}\n  EIGEN_STRONG_INLINE explicit Packet8cf(const __m512& a) : v(a) {}\n  __m512  v;\n};\n\ntemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits\n{\n  typedef Packet8cf type;\n  typedef Packet4cf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 1,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasSqrt   = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet8cf> {\n  typedef std::complex<float> type;\n  typedef Packet4cf half;\n  typedef Packet16f as_real;\n  enum {\n    size = 8,\n    alignment=unpacket_traits<Packet16f>::alignment,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf ptrue<Packet8cf>(const Packet8cf& a) { return Packet8cf(ptrue(Packet16f(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf padd<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_add_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf psub<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_sub_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a)\n{\n  return Packet8cf(pnegate(a.v));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pconj(const Packet8cf& a)\n{\n  const __m512 mask = _mm512_castsi512_ps(_mm512_setr_epi32(\n    0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,\n    0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));\n  return Packet8cf(pxor(a.v,mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pmul<Packet8cf>(const Packet8cf& a, const Packet8cf& b)\n{\n  __m512 tmp2 = _mm512_mul_ps(_mm512_movehdup_ps(a.v), _mm512_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));\n  return Packet8cf(_mm512_fmaddsub_ps(_mm512_moveldup_ps(a.v), b.v, tmp2));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pand   <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(pand(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf por    <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(por(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pxor   <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(pxor(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pandnot<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(pandnot(a.v,b.v)); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8cf pcmp_eq(const Packet8cf& a, const Packet8cf& b) {\n  __m512 eq = pcmp_eq<Packet16f>(a.v, b.v);\n  return Packet8cf(pand(eq, _mm512_permute_ps(eq, 0xB1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pload <Packet8cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet8cf(pload<Packet16f>(&numext::real_ref(*from))); }\ntemplate<> EIGEN_STRONG_INLINE Packet8cf ploadu<Packet8cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet8cf(ploadu<Packet16f>(&numext::real_ref(*from))); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from)\n{\n  return Packet8cf(_mm512_castpd_ps(pload1<Packet8d>((const double*)(const void*)&from)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from)\n{\n  return Packet8cf( _mm512_castpd_ps( ploaddup<Packet8d>((const double*)(const void*)from )) );\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8cf ploadquad<Packet8cf>(const std::complex<float>* from)\n{\n  return Packet8cf( _mm512_castpd_ps( ploadquad<Packet8d>((const double*)(const void*)from )) );\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet8cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet8cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet8cf pgather<std::complex<float>, Packet8cf>(const std::complex<float>* from, Index stride)\n{\n  return Packet8cf(_mm512_castpd_ps(pgather<double,Packet8d>((const double*)(const void*)from, stride)));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet8cf>(std::complex<float>* to, const Packet8cf& from, Index stride)\n{\n  pscatter((double*)(void*)to, _mm512_castps_pd(from.v), stride);\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet8cf>(const Packet8cf& a)\n{\n  return pfirst(Packet2cf(_mm512_castps512_ps128(a.v)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf preverse(const Packet8cf& a) {\n  return Packet8cf(_mm512_castsi512_ps(\n            _mm512_permutexvar_epi64( _mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7),\n                                      _mm512_castps_si512(a.v))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet8cf>(const Packet8cf& a)\n{\n  return predux(padd(Packet4cf(extract256<0>(a.v)),\n                     Packet4cf(extract256<1>(a.v))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet8cf>(const Packet8cf& a)\n{\n  return predux_mul(pmul(Packet4cf(extract256<0>(a.v)),\n                         Packet4cf(extract256<1>(a.v))));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a) {\n  __m256 lane0 = extract256<0>(a.v);\n  __m256 lane1 = extract256<1>(a.v);\n  __m256 res = _mm256_add_ps(lane0, lane1);\n  return Packet4cf(res);\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pdiv<Packet8cf>(const Packet8cf& a, const Packet8cf& b)\n{\n  Packet8cf num = pmul(a, pconj(b));\n  __m512 tmp = _mm512_mul_ps(b.v, b.v);\n  __m512 tmp2    = _mm512_shuffle_ps(tmp,tmp,0xB1);\n  __m512 denom = _mm512_add_ps(tmp, tmp2);\n  return Packet8cf(_mm512_div_ps(num.v, denom));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& x)\n{\n  return Packet8cf(_mm512_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));\n}\n\n//---------- double ----------\nstruct Packet4cd\n{\n  EIGEN_STRONG_INLINE Packet4cd() {}\n  EIGEN_STRONG_INLINE explicit Packet4cd(const __m512d& a) : v(a) {}\n  __m512d  v;\n};\n\ntemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits\n{\n  typedef Packet4cd type;\n  typedef Packet2cd half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 0,\n    size = 4,\n    HasHalfPacket = 1,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasSqrt   = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet4cd> {\n  typedef std::complex<double> type;\n  typedef Packet2cd half;\n  typedef Packet8d as_real;\n  enum {\n    size = 4,\n    alignment = unpacket_traits<Packet8d>::alignment,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd padd<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_add_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd psub<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_sub_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pnegate(const Packet4cd& a) { return Packet4cd(pnegate(a.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pconj(const Packet4cd& a)\n{\n  const __m512d mask = _mm512_castsi512_pd(\n          _mm512_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0,\n                           0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));\n  return Packet4cd(pxor(a.v,mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pmul<Packet4cd>(const Packet4cd& a, const Packet4cd& b)\n{\n  __m512d tmp1 = _mm512_shuffle_pd(a.v,a.v,0x0);\n  __m512d tmp2 = _mm512_shuffle_pd(a.v,a.v,0xFF);\n  __m512d tmp3 = _mm512_shuffle_pd(b.v,b.v,0x55);\n  __m512d odd  = _mm512_mul_pd(tmp2, tmp3);\n  return Packet4cd(_mm512_fmaddsub_pd(tmp1, b.v, odd));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd ptrue<Packet4cd>(const Packet4cd& a) { return Packet4cd(ptrue(Packet8d(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pand   <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd por    <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pxor   <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pandnot<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pandnot(a.v,b.v)); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4cd pcmp_eq(const Packet4cd& a, const Packet4cd& b) {\n  __m512d eq = pcmp_eq<Packet8d>(a.v, b.v);\n  return Packet4cd(pand(eq, _mm512_permute_pd(eq, 0x55)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pload <Packet4cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return Packet4cd(pload<Packet8d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4cd ploadu<Packet4cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cd(ploadu<Packet8d>((const double*)from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from)\n{\n  #ifdef EIGEN_VECTORIZE_AVX512DQ\n  return Packet4cd(_mm512_broadcast_f64x2(pset1<Packet1cd>(from).v));\n  #else\n  return Packet4cd(_mm512_castps_pd(_mm512_broadcast_f32x4( _mm_castpd_ps(pset1<Packet1cd>(from).v))));\n  #endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {\n  return Packet4cd(_mm512_insertf64x4(\n          _mm512_castpd256_pd512(ploaddup<Packet2cd>(from).v), ploaddup<Packet2cd>(from+1).v, 1));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet4cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet4cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4cd pgather<std::complex<double>, Packet4cd>(const std::complex<double>* from, Index stride)\n{\n  return Packet4cd(_mm512_insertf64x4(_mm512_castpd256_pd512(\n            _mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+0*stride).v), ploadu<Packet1cd>(from+1*stride).v,1)),\n            _mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+2*stride).v), ploadu<Packet1cd>(from+3*stride).v,1), 1));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet4cd>(std::complex<double>* to, const Packet4cd& from, Index stride)\n{\n  __m512i fromi = _mm512_castpd_si512(from.v);\n  double* tod = (double*)(void*)to;\n  _mm_storeu_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) );\n  _mm_storeu_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) );\n  _mm_storeu_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) );\n  _mm_storeu_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) );\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Packet4cd& a)\n{\n  __m128d low = extract128<0>(a.v);\n  EIGEN_ALIGN16 double res[2];\n  _mm_store_pd(res, low);\n  return std::complex<double>(res[0],res[1]);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd preverse(const Packet4cd& a) {\n  return Packet4cd(_mm512_shuffle_f64x2(a.v, a.v, (shuffle_mask<3,2,1,0>::mask)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet4cd>(const Packet4cd& a)\n{\n  return predux(padd(Packet2cd(_mm512_extractf64x4_pd(a.v,0)),\n                     Packet2cd(_mm512_extractf64x4_pd(a.v,1))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const Packet4cd& a)\n{\n  return predux_mul(pmul(Packet2cd(_mm512_extractf64x4_pd(a.v,0)),\n                         Packet2cd(_mm512_extractf64x4_pd(a.v,1))));\n}\n\ntemplate<> struct conj_helper<Packet4cd, Packet4cd, false,true>\n{\n  EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const\n  { return padd(pmul(x,y),c); }\n\n  EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const\n  {\n    return internal::pmul(a, pconj(b));\n  }\n};\n\ntemplate<> struct conj_helper<Packet4cd, Packet4cd, true,false>\n{\n  EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const\n  { return padd(pmul(x,y),c); }\n\n  EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const\n  {\n    return internal::pmul(pconj(a), b);\n  }\n};\n\ntemplate<> struct conj_helper<Packet4cd, Packet4cd, true,true>\n{\n  EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const\n  { return padd(pmul(x,y),c); }\n\n  EIGEN_STRONG_INLINE Packet4cd pmul(const Packet4cd& a, const Packet4cd& b) const\n  {\n    return pconj(internal::pmul(a, b));\n  }\n};\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cd,Packet8d)\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pdiv<Packet4cd>(const Packet4cd& a, const Packet4cd& b)\n{\n  Packet4cd num = pmul(a, pconj(b));\n  __m512d tmp = _mm512_mul_pd(b.v, b.v);\n  __m512d denom =  padd(_mm512_permute_pd(tmp,0x55), tmp);\n  return Packet4cd(_mm512_div_pd(num.v, denom));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& x)\n{\n  return Packet4cd(_mm512_permute_pd(x.v,0x55));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8cf,4>& kernel) {\n  PacketBlock<Packet8d,4> pb;\n  \n  pb.packet[0] = _mm512_castps_pd(kernel.packet[0].v);\n  pb.packet[1] = _mm512_castps_pd(kernel.packet[1].v);\n  pb.packet[2] = _mm512_castps_pd(kernel.packet[2].v);\n  pb.packet[3] = _mm512_castps_pd(kernel.packet[3].v);\n  ptranspose(pb);\n  kernel.packet[0].v = _mm512_castpd_ps(pb.packet[0]);\n  kernel.packet[1].v = _mm512_castpd_ps(pb.packet[1]);\n  kernel.packet[2].v = _mm512_castpd_ps(pb.packet[2]);\n  kernel.packet[3].v = _mm512_castpd_ps(pb.packet[3]);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8cf,8>& kernel) {\n  PacketBlock<Packet8d,8> pb;\n  \n  pb.packet[0] = _mm512_castps_pd(kernel.packet[0].v);\n  pb.packet[1] = _mm512_castps_pd(kernel.packet[1].v);\n  pb.packet[2] = _mm512_castps_pd(kernel.packet[2].v);\n  pb.packet[3] = _mm512_castps_pd(kernel.packet[3].v);\n  pb.packet[4] = _mm512_castps_pd(kernel.packet[4].v);\n  pb.packet[5] = _mm512_castps_pd(kernel.packet[5].v);\n  pb.packet[6] = _mm512_castps_pd(kernel.packet[6].v);\n  pb.packet[7] = _mm512_castps_pd(kernel.packet[7].v);\n  ptranspose(pb);\n  kernel.packet[0].v = _mm512_castpd_ps(pb.packet[0]);\n  kernel.packet[1].v = _mm512_castpd_ps(pb.packet[1]);\n  kernel.packet[2].v = _mm512_castpd_ps(pb.packet[2]);\n  kernel.packet[3].v = _mm512_castpd_ps(pb.packet[3]);\n  kernel.packet[4].v = _mm512_castpd_ps(pb.packet[4]);\n  kernel.packet[5].v = _mm512_castpd_ps(pb.packet[5]);\n  kernel.packet[6].v = _mm512_castpd_ps(pb.packet[6]);\n  kernel.packet[7].v = _mm512_castpd_ps(pb.packet[7]);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4cd,4>& kernel) {\n  __m512d T0 = _mm512_shuffle_f64x2(kernel.packet[0].v, kernel.packet[1].v, (shuffle_mask<0,1,0,1>::mask)); // [a0 a1 b0 b1]\n  __m512d T1 = _mm512_shuffle_f64x2(kernel.packet[0].v, kernel.packet[1].v, (shuffle_mask<2,3,2,3>::mask)); // [a2 a3 b2 b3]\n  __m512d T2 = _mm512_shuffle_f64x2(kernel.packet[2].v, kernel.packet[3].v, (shuffle_mask<0,1,0,1>::mask)); // [c0 c1 d0 d1]\n  __m512d T3 = _mm512_shuffle_f64x2(kernel.packet[2].v, kernel.packet[3].v, (shuffle_mask<2,3,2,3>::mask)); // [c2 c3 d2 d3]\n\n  kernel.packet[3] = Packet4cd(_mm512_shuffle_f64x2(T1, T3, (shuffle_mask<1,3,1,3>::mask))); // [a3 b3 c3 d3]\n  kernel.packet[2] = Packet4cd(_mm512_shuffle_f64x2(T1, T3, (shuffle_mask<0,2,0,2>::mask))); // [a2 b2 c2 d2]\n  kernel.packet[1] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<1,3,1,3>::mask))); // [a1 b1 c1 d1]\n  kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<0,2,0,2>::mask))); // [a0 b0 c0 d0]\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4cd psqrt<Packet4cd>(const Packet4cd& a) {\n  return psqrt_complex<Packet4cd>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8cf psqrt<Packet8cf>(const Packet8cf& a) {\n  return psqrt_complex<Packet8cf>(a);\n}\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_AVX512_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX512/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_\n#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_\n\nnamespace Eigen {\n\nnamespace internal {\n\n// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.\n#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG  || EIGEN_COMP_MSVC >= 1923\n\n#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \\\n  const Packet16f p16f_##NAME = pset1<Packet16f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \\\n  const Packet16f p16f_##NAME =  preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))\n\n#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \\\n  const Packet8d p8d_##NAME = pset1<Packet8d>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \\\n  const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))\n\n#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \\\n  const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \\\n  const Packet16bf p16bf_##NAME =  preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\nplog<Packet16f>(const Packet16f& _x) {\n  return plog_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d\nplog<Packet8d>(const Packet8d& _x) {\n  return plog_double(_x);\n}\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, plog)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\nplog2<Packet16f>(const Packet16f& _x) {\n  return plog2_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d\nplog2<Packet8d>(const Packet8d& _x) {\n  return plog2_double(_x);\n}\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)\n\n// Exponential function. Works by writing \"x = m*log(2) + r\" where\n// \"m = floor(x/log(2)+1/2)\" and \"r\" is the remainder. The result is then\n// \"exp(x) = 2^m*exp(r)\" where exp(r) is in the range [-1,1).\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\npexp<Packet16f>(const Packet16f& _x) {\n  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);\n  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);\n  _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);\n\n  _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);\n  _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);\n\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);\n\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);\n  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);\n\n  // Clamp x.\n  Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);\n\n  // Express exp(x) as exp(m*ln(2) + r), start by extracting\n  // m = floor(x/ln(2) + 0.5).\n  Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));\n\n  // Get r = x - m*ln(2). Note that we can do this without losing more than one\n  // ulp precision due to the FMA instruction.\n  _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);\n  Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);\n  Packet16f r2 = pmul(r, r);\n  Packet16f r3 = pmul(r2, r);\n\n  // Evaluate the polynomial approximant,improved by instruction-level parallelism.\n  Packet16f y, y1, y2;\n  y  = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);\n  y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);\n  y2 = padd(r, p16f_1);\n  y  = pmadd(y, r, p16f_cephes_exp_p2);\n  y1 = pmadd(y1, r, p16f_cephes_exp_p5);\n  y  = pmadd(y, r3, y1);\n  y  = pmadd(y, r2, y2);\n\n  // Build emm0 = 2^m.\n  Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));\n  emm0 = _mm512_slli_epi32(emm0, 23);\n\n  // Return 2^m * exp(r).\n  return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d\npexp<Packet8d>(const Packet8d& _x) {\n  return pexp_double(_x);\n}\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {\n  Packet16f fexponent;\n  const Packet16h out = float2half(pfrexp<Packet16f>(half2float(a), fexponent));\n  exponent = float2half(fexponent);\n  return out;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16h pldexp(const Packet16h& a, const Packet16h& exponent) {\n  return float2half(pldexp<Packet16f>(half2float(a), half2float(exponent)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pfrexp(const Packet16bf& a, Packet16bf& exponent) {\n  Packet16f fexponent;\n  const Packet16bf out = F32ToBf16(pfrexp<Packet16f>(Bf16ToF32(a), fexponent));\n  exponent = F32ToBf16(fexponent);\n  return out;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exponent) {\n  return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));\n}\n\n// Functions for sqrt.\n// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step\n// of Newton's method, at a cost of 1-2 bits of precision as opposed to the\n// exact solution. The main advantage of this approach is not just speed, but\n// also the fact that it can be inlined and pipelined with other computations,\n// further reducing its effective latency.\n#if EIGEN_FAST_MATH\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\npsqrt<Packet16f>(const Packet16f& _x) {\n  Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));\n  __mmask16 denormal_mask = _mm512_kand(\n      _mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),\n                        _CMP_LT_OQ),\n      _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));\n\n  Packet16f x = _mm512_rsqrt14_ps(_x);\n\n  // Do a single step of Newton's iteration.\n  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));\n\n  // Flush results for denormals to zero.\n  return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d\npsqrt<Packet8d>(const Packet8d& _x) {\n  Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));\n  __mmask16 denormal_mask = _mm512_kand(\n      _mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),\n                        _CMP_LT_OQ),\n      _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));\n\n  Packet8d x = _mm512_rsqrt14_pd(_x);\n\n  // Do a single step of Newton's iteration.\n  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));\n\n  // Do a second step of Newton's iteration.\n  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));\n\n  return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());\n}\n#else\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {\n  return _mm512_sqrt_ps(x);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {\n  return _mm512_sqrt_pd(x);\n}\n#endif\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)\n\n// prsqrt for float.\n#if defined(EIGEN_VECTORIZE_AVX512ER)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {\n  return _mm512_rsqrt28_ps(x);\n}\n#elif EIGEN_FAST_MATH\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\nprsqrt<Packet16f>(const Packet16f& _x) {\n  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);\n  _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);\n  _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);\n\n  Packet16f neg_half = pmul(_x, p16f_minus_half);\n\n  // Identity infinite, negative and denormal arguments.\n  __mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);\n  __mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);\n  __mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;\n\n  // Compute an approximate result using the rsqrt intrinsic, forcing +inf\n  // for denormals for consistency with AVX and SSE implementations.\n  Packet16f y_approx = _mm512_rsqrt14_ps(_x);\n\n  // Do a single step of Newton-Raphson iteration to improve the approximation.\n  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).\n  // It is essential to evaluate the inner term like this because forming\n  // y_n^2 may over- or underflow.\n  Packet16f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p16f_one_point_five));\n\n  // Select the result of the Newton-Raphson step for positive finite arguments.\n  // For other arguments, choose the output of the intrinsic. This will\n  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.\n  return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);\n}\n#else\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {\n  _EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);\n  return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));\n}\n#endif\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)\n\n// prsqrt for double.\n#if EIGEN_FAST_MATH\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d\nprsqrt<Packet8d>(const Packet8d& _x) {\n  _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);\n  _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);\n  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);\n\n  Packet8d neg_half = pmul(_x, p8d_minus_half);\n\n  // Identity infinite, negative and denormal arguments.\n  __mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);\n  __mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);\n  __mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;\n\n  // Compute an approximate result using the rsqrt intrinsic, forcing +inf\n  // for denormals for consistency with AVX and SSE implementations.\n#if defined(EIGEN_VECTORIZE_AVX512ER)\n  Packet8d y_approx = _mm512_rsqrt28_pd(_x);\n#else\n  Packet8d y_approx = _mm512_rsqrt14_pd(_x);\n#endif\n  // Do one or two steps of Newton-Raphson's to improve the approximation, depending on the\n  // starting accuracy (either 2^-14 or 2^-28, depending on whether AVX512ER is available).\n  // The Newton-Raphson algorithm has quadratic convergence and roughly doubles the number\n  // of correct digits for each step.\n  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).\n  // It is essential to evaluate the inner term like this because forming\n  // y_n^2 may over- or underflow.\n  Packet8d y_newton = pmul(y_approx, pmadd(neg_half, pmul(y_approx, y_approx), p8d_one_point_five));\n#if !defined(EIGEN_VECTORIZE_AVX512ER)\n  y_newton = pmul(y_newton, pmadd(y_newton, pmul(neg_half, y_newton), p8d_one_point_five));\n#endif\n  // Select the result of the Newton-Raphson step for positive finite arguments.\n  // For other arguments, choose the output of the intrinsic. This will\n  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.\n  return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);\n}\n#else\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(const Packet8d& x) {\n  _EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);\n  return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));\n}\n#endif\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket16f plog1p<Packet16f>(const Packet16f& _x) {\n  return generic_plog1p(_x);\n}\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket16f pexpm1<Packet16f>(const Packet16f& _x) {\n  return generic_expm1(_x);\n}\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)\n\n#endif\n\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\npsin<Packet16f>(const Packet16f& _x) {\n  return psin_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\npcos<Packet16f>(const Packet16f& _x) {\n  return pcos_float(_x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f\nptanh<Packet16f>(const Packet16f& _x) {\n  return internal::generic_fast_tanh_float(_x);\n}\n\nF16_PACKET_FUNCTION(Packet16f, Packet16h, psin)\nF16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)\nF16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)\n\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)\nBF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX512/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Benoit Steiner (benoit.steiner.goog@gmail.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_AVX512_H\n#define EIGEN_PACKET_MATH_AVX512_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32\n#endif\n\n#ifdef EIGEN_VECTORIZE_FMA\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n#endif\n\ntypedef __m512 Packet16f;\ntypedef __m512i Packet16i;\ntypedef __m512d Packet8d;\ntypedef eigen_packet_wrapper<__m256i, 1> Packet16h;\ntypedef eigen_packet_wrapper<__m256i, 2> Packet16bf;\n\ntemplate <>\nstruct is_arithmetic<__m512> {\n  enum { value = true };\n};\ntemplate <>\nstruct is_arithmetic<__m512i> {\n  enum { value = true };\n};\ntemplate <>\nstruct is_arithmetic<__m512d> {\n  enum { value = true };\n};\n\ntemplate<> struct is_arithmetic<Packet16h> { enum { value = true }; };\n\ntemplate <>\nstruct packet_traits<half> : default_packet_traits {\n  typedef Packet16h type;\n  // There is no half-size packet for Packet16h.\n  typedef Packet16h half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 1,\n\n    HasCmp    = 1,\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasAbs    = 1,\n    HasAbs2   = 0,\n    HasMin    = 1,\n    HasMax    = 1,\n    HasConj   = 1,\n    HasSetLinear = 0,\n    HasLog    = 1,\n    HasLog1p  = 1,\n    HasExpm1  = 1,\n    HasExp    = 1,\n    HasSqrt   = 1,\n    HasRsqrt  = 1,\n    HasSin    = EIGEN_FAST_MATH,\n    HasCos    = EIGEN_FAST_MATH,\n    HasTanh   = EIGEN_FAST_MATH,\n    HasErf    = EIGEN_FAST_MATH,\n    HasBlend = 0,\n    HasRound  = 1,\n    HasFloor  = 1,\n    HasCeil   = 1,\n    HasRint   = 1,\n    HasBessel = 1,\n    HasNdtri  = 1\n  };\n};\n\ntemplate<> struct packet_traits<float>  : default_packet_traits\n{\n  typedef Packet16f type;\n  typedef Packet8f half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 1,\n\n    HasAbs = 1,\n    HasMin    = 1,\n    HasMax    = 1,\n    HasConj   = 1,\n    HasBlend = 0,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)\n    HasLog = 1,\n    HasLog1p  = 1,\n    HasExpm1  = 1,\n    HasNdtri = 1,\n    HasBessel  = 1,\n    HasExp = 1,\n    HasSqrt = EIGEN_FAST_MATH,\n    HasRsqrt = EIGEN_FAST_MATH,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n#endif\n    HasCmp  = 1,\n    HasDiv = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1\n  };\n };\ntemplate<> struct packet_traits<double> : default_packet_traits\n{\n  typedef Packet8d type;\n  typedef Packet4d half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 1,\n#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)\n    HasLog  = 1,\n    HasExp = 1,\n    HasSqrt = EIGEN_FAST_MATH,\n    HasRsqrt = EIGEN_FAST_MATH,\n#endif\n    HasCmp  = 1,\n    HasDiv = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1\n  };\n};\n\n/* TODO Implement AVX512 for integers\ntemplate<> struct packet_traits<int>    : default_packet_traits\n{\n  typedef Packet16i type;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=8\n  };\n};\n*/\n\ntemplate <>\nstruct unpacket_traits<Packet16f> {\n  typedef float type;\n  typedef Packet8f half;\n  typedef Packet16i integer_packet;\n  typedef uint16_t mask_t;\n  enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true, masked_store_available=true };\n};\ntemplate <>\nstruct unpacket_traits<Packet8d> {\n  typedef double type;\n  typedef Packet4d half;\n  enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false, masked_store_available=false };\n};\ntemplate <>\nstruct unpacket_traits<Packet16i> {\n  typedef int type;\n  typedef Packet8i half;\n  enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false, masked_store_available=false };\n};\n\ntemplate<>\nstruct unpacket_traits<Packet16h> {\n  typedef Eigen::half type;\n  typedef Packet8h half;\n  enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pset1<Packet16f>(const float& from) {\n  return _mm512_set1_ps(from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pset1<Packet8d>(const double& from) {\n  return _mm512_set1_pd(from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {\n  return _mm512_set1_epi32(from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pset1frombits<Packet16f>(unsigned int from) {\n  return _mm512_castsi512_ps(_mm512_set1_epi32(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pset1frombits<Packet8d>(const numext::uint64_t from) {\n  return _mm512_castsi512_pd(_mm512_set1_epi64(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pzero(const Packet16f& /*a*/) { return _mm512_setzero_ps(); }\ntemplate<> EIGEN_STRONG_INLINE Packet8d pzero(const Packet8d& /*a*/) { return _mm512_setzero_pd(); }\ntemplate<> EIGEN_STRONG_INLINE Packet16i pzero(const Packet16i& /*a*/) { return _mm512_setzero_si512(); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /*a*/) {\n  return _mm512_castsi512_ps(_mm512_set_epi32(0, -1, 0, -1, 0, -1, 0, -1,\n                                              0, -1, 0, -1, 0, -1, 0, -1));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16i peven_mask(const Packet16i& /*a*/) {\n  return _mm512_set_epi32(0, -1, 0, -1, 0, -1, 0, -1,\n                          0, -1, 0, -1, 0, -1, 0, -1);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /*a*/) {\n  return _mm512_castsi512_pd(_mm512_set_epi32(0, 0, -1, -1, 0, 0, -1, -1,\n                                              0, 0, -1, -1, 0, 0, -1, -1));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {\n  return _mm512_broadcastss_ps(_mm_load_ps1(from));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {\n  return _mm512_set1_pd(*from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f plset<Packet16f>(const float& a) {\n  return _mm512_add_ps(\n      _mm512_set1_ps(a),\n      _mm512_set_ps(15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f,\n                    4.0f, 3.0f, 2.0f, 1.0f, 0.0f));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {\n  return _mm512_add_pd(_mm512_set1_pd(a),\n                       _mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n  return _mm512_add_ps(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n  return _mm512_add_pd(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a,\n                                              const Packet16i& b) {\n  return _mm512_add_epi32(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n  return _mm512_sub_ps(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d psub<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n  return _mm512_sub_pd(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a,\n                                              const Packet16i& b) {\n  return _mm512_sub_epi32(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {\n  return _mm512_sub_ps(_mm512_set1_ps(0.0), a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {\n  return _mm512_sub_pd(_mm512_set1_pd(0.0), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pconj(const Packet8d& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) {\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n  return _mm512_mul_ps(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pmul<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n  return _mm512_mul_pd(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pmul<Packet16i>(const Packet16i& a,\n                                              const Packet16i& b) {\n  return _mm512_mullo_epi32(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n  return _mm512_div_ps(a, b);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n  return _mm512_div_pd(a, b);\n}\n\n#ifdef EIGEN_VECTORIZE_FMA\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,\n                                    const Packet16f& c) {\n  return _mm512_fmadd_ps(a, b, c);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,\n                                   const Packet8d& c) {\n  return _mm512_fmadd_pd(a, b, c);\n}\n#endif\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet16f pselect(const Packet16f& mask,\n                                           const Packet16f& a,\n                                           const Packet16f& b) {\n  __mmask16 mask16 = _mm512_cmp_epi32_mask(\n      _mm512_castps_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);\n  return _mm512_mask_blend_ps(mask16, a, b);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet8d pselect(const Packet8d& mask,\n                                          const Packet8d& a,\n                                          const Packet8d& b) {\n  __mmask8 mask8 = _mm512_cmp_epi64_mask(_mm512_castpd_si512(mask),\n                                         _mm512_setzero_epi32(), _MM_CMPINT_EQ);\n  return _mm512_mask_blend_pd(mask8, a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n  // Arguments are reversed to match NaN propagation behavior of std::min.\n  return _mm512_min_ps(b, a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n  // Arguments are reversed to match NaN propagation behavior of std::min.\n  return _mm512_min_pd(b, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n  // Arguments are reversed to match NaN propagation behavior of std::max.\n  return _mm512_max_ps(b, a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n  // Arguments are reversed to match NaN propagation behavior of std::max.\n  return _mm512_max_pd(b, a);\n}\n\n// Add specializations for min/max with prescribed NaN progation.\ntemplate<>\nEIGEN_STRONG_INLINE Packet16f pmin<PropagateNumbers, Packet16f>(const Packet16f& a, const Packet16f& b) {\n  return pminmax_propagate_numbers(a, b, pmin<Packet16f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8d pmin<PropagateNumbers, Packet8d>(const Packet8d& a, const Packet8d& b) {\n  return pminmax_propagate_numbers(a, b, pmin<Packet8d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet16f pmax<PropagateNumbers, Packet16f>(const Packet16f& a, const Packet16f& b) {\n  return pminmax_propagate_numbers(a, b, pmax<Packet16f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8d pmax<PropagateNumbers, Packet8d>(const Packet8d& a, const Packet8d& b) {\n  return pminmax_propagate_numbers(a, b, pmax<Packet8d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet16f pmin<PropagateNaN, Packet16f>(const Packet16f& a, const Packet16f& b) {\n  return pminmax_propagate_nan(a, b, pmin<Packet16f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8d pmin<PropagateNaN, Packet8d>(const Packet8d& a, const Packet8d& b) {\n  return pminmax_propagate_nan(a, b, pmin<Packet8d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet16f pmax<PropagateNaN, Packet16f>(const Packet16f& a, const Packet16f& b) {\n  return pminmax_propagate_nan(a, b, pmax<Packet16f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet8d pmax<PropagateNaN, Packet8d>(const Packet8d& a, const Packet8d& b) {\n  return pminmax_propagate_nan(a, b, pmax<Packet8d>);\n}\n\n\n#ifdef EIGEN_VECTORIZE_AVX512DQ\ntemplate<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }\ntemplate<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }\nEIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }\n#else\n// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512\ntemplate<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {\n  return  _mm256_castsi256_ps(_mm512_extracti64x4_epi64( _mm512_castps_si512(x),I_));\n}\n\n// AVX512F does not define _mm512_extractf64x2_pd to extract _m128 from _m512\ntemplate<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) {\n  return _mm_castsi128_pd(_mm512_extracti32x4_epi32( _mm512_castpd_si512(x),I_));\n}\n\nEIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) {\n  return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),\n                                                _mm256_castps_si256(b),1));\n}\n#endif\n\n// Helper function for bit packing snippet of low precision comparison.\n// It packs the flags from 32x16 to 16x16.\nEIGEN_STRONG_INLINE __m256i Pack32To16(Packet16f rf) {\n  // Split data into small pieces and handle with AVX instructions\n  // to guarantee internal order of vector.\n  // Operation:\n  //   dst[15:0]    := Saturate16(rf[31:0])\n  //   dst[31:16]   := Saturate16(rf[63:32])\n  //   ...\n  //   dst[255:240] := Saturate16(rf[255:224])\n  __m256i lo = _mm256_castps_si256(extract256<0>(rf));\n  __m256i hi = _mm256_castps_si256(extract256<1>(rf));\n  __m128i result_lo = _mm_packs_epi32(_mm256_extractf128_si256(lo, 0),\n                                      _mm256_extractf128_si256(lo, 1));\n  __m128i result_hi = _mm_packs_epi32(_mm256_extractf128_si256(hi, 0),\n                                      _mm256_extractf128_si256(hi, 1));\n  return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) {\n  __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);\n  return _mm512_castsi512_ps(\n      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) {\n  __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ);\n  return _mm512_castsi512_ps(\n      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) {\n  __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);\n  return _mm512_castsi512_ps(\n      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {\n  __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ);\n  return _mm512_castsi512_ps(\n      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {\n  __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _CMP_EQ_OQ);\n  return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);\n}\n\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) {\n  __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ);\n  return _mm512_castsi512_pd(\n      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pcmp_le(const Packet8d& a, const Packet8d& b) {\n  __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ);\n  return _mm512_castsi512_pd(\n      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pcmp_lt(const Packet8d& a, const Packet8d& b) {\n  __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ);\n  return _mm512_castsi512_pd(\n      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pcmp_lt_or_nan(const Packet8d& a, const Packet8d& b) {\n  __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ);\n  return _mm512_castsi512_pd(\n      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f print<Packet16f>(const Packet16f& a) { return _mm512_roundscale_ps(a, _MM_FROUND_CUR_DIRECTION); }\ntemplate<> EIGEN_STRONG_INLINE Packet8d print<Packet8d>(const Packet8d& a) { return _mm512_roundscale_pd(a, _MM_FROUND_CUR_DIRECTION); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pceil<Packet16f>(const Packet16f& a) { return _mm512_roundscale_ps(a, _MM_FROUND_TO_POS_INF); }\ntemplate<> EIGEN_STRONG_INLINE Packet8d pceil<Packet8d>(const Packet8d& a) { return _mm512_roundscale_pd(a, _MM_FROUND_TO_POS_INF); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pfloor<Packet16f>(const Packet16f& a) { return _mm512_roundscale_ps(a, _MM_FROUND_TO_NEG_INF); }\ntemplate<> EIGEN_STRONG_INLINE Packet8d pfloor<Packet8d>(const Packet8d& a) { return _mm512_roundscale_pd(a, _MM_FROUND_TO_NEG_INF); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i ptrue<Packet16i>(const Packet16i& /*a*/) {\n  return _mm512_set1_epi32(0xffffffffu);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f ptrue<Packet16f>(const Packet16f& a) {\n  return _mm512_castsi512_ps(ptrue<Packet16i>(_mm512_castps_si512(a)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d ptrue<Packet8d>(const Packet8d& a) {\n  return _mm512_castsi512_pd(ptrue<Packet16i>(_mm512_castpd_si512(a)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pand<Packet16i>(const Packet16i& a,\n                                              const Packet16i& b) {\n  return _mm512_and_si512(a,b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,\n                                              const Packet16f& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_and_ps(a, b);\n#else\n  return _mm512_castsi512_ps(pand(_mm512_castps_si512(a),_mm512_castps_si512(b)));\n#endif\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pand<Packet8d>(const Packet8d& a,\n                                            const Packet8d& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_and_pd(a, b);\n#else\n  Packet8d res = _mm512_undefined_pd();\n  Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);\n  Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);\n  res = _mm512_insertf64x4(res, _mm256_and_pd(lane0_a, lane0_b), 0);\n\n  Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);\n  Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);\n  return _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i por<Packet16i>(const Packet16i& a, const Packet16i& b) {\n  return _mm512_or_si512(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a, const Packet16f& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_or_ps(a, b);\n#else\n  return _mm512_castsi512_ps(por(_mm512_castps_si512(a),_mm512_castps_si512(b)));\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d por<Packet8d>(const Packet8d& a,\n                                           const Packet8d& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_or_pd(a, b);\n#else\n  return _mm512_castsi512_pd(por(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pxor<Packet16i>(const Packet16i& a, const Packet16i& b) {\n  return _mm512_xor_si512(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a, const Packet16f& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_xor_ps(a, b);\n#else\n  return _mm512_castsi512_ps(pxor(_mm512_castps_si512(a),_mm512_castps_si512(b)));\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a, const Packet8d& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_xor_pd(a, b);\n#else\n  return _mm512_castsi512_pd(pxor(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pandnot<Packet16i>(const Packet16i& a, const Packet16i& b) {\n  return _mm512_andnot_si512(b, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a, const Packet16f& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_andnot_ps(b, a);\n#else\n  return _mm512_castsi512_ps(pandnot(_mm512_castps_si512(a),_mm512_castps_si512(b)));\n#endif\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,const Packet8d& b) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_andnot_pd(b, a);\n#else\n  return _mm512_castsi512_pd(pandnot(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pround<Packet16f>(const Packet16f& a)\n{\n  // Work-around for default std::round rounding mode.\n  const Packet16f mask = pset1frombits<Packet16f>(static_cast<numext::uint32_t>(0x80000000u));\n  const Packet16f prev0dot5 = pset1frombits<Packet16f>(static_cast<numext::uint32_t>(0x3EFFFFFFu));\n  return _mm512_roundscale_ps(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8d pround<Packet8d>(const Packet8d& a)\n{\n  // Work-around for default std::round rounding mode.\n  const Packet8d mask = pset1frombits<Packet8d>(static_cast<numext::uint64_t>(0x8000000000000000ull));\n  const Packet8d prev0dot5 = pset1frombits<Packet8d>(static_cast<numext::uint64_t>(0x3FDFFFFFFFFFFFFFull));\n  return _mm512_roundscale_pd(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet16i parithmetic_shift_right(Packet16i a) {\n  return _mm512_srai_epi32(a, N);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet16i plogical_shift_right(Packet16i a) {\n  return _mm512_srli_epi32(a, N);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet16i plogical_shift_left(Packet16i a) {\n  return _mm512_slli_epi32(a, N);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {\n  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pload<Packet8d>(const double* from) {\n  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_pd(from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {\n  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(\n      reinterpret_cast<const __m512i*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from) {\n  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ps(from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {\n  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_pd(from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {\n  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(\n      reinterpret_cast<const __m512i*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from, uint16_t umask) {\n  __mmask16 mask = static_cast<__mmask16>(umask);\n  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_maskz_loadu_ps(mask, from);\n}\n\n// Loads 8 floats from memory a returns the packet\n// {a0, a0  a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {\n  // an unaligned load is required here as there is no requirement\n  // on the alignment of input pointer 'from'\n  __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));\n  __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));\n  __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));\n  return pairs;\n}\n\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n// FIXME: this does not look optimal, better load a Packet4d and shuffle...\n// Loads 4 doubles from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3,\n// a3}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {\n __m512d x = _mm512_setzero_pd();\n  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0);\n  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1);\n  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2);\n  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3);\n  return x;\n}\n#else\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {\n  __m512d x = _mm512_setzero_pd();\n  x = _mm512_mask_broadcastsd_pd(x, 0x3<<0, _mm_load_sd(from+0));\n  x = _mm512_mask_broadcastsd_pd(x, 0x3<<2, _mm_load_sd(from+1));\n  x = _mm512_mask_broadcastsd_pd(x, 0x3<<4, _mm_load_sd(from+2));\n  x = _mm512_mask_broadcastsd_pd(x, 0x3<<6, _mm_load_sd(from+3));\n  return x;\n}\n#endif\n\n// Loads 4 floats from memory a returns the packet\n// {a0, a0  a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {\n  Packet16f tmp = _mm512_castps128_ps512(ploadu<Packet4f>(from));\n  const Packet16i scatter_mask = _mm512_set_epi32(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0);\n  return _mm512_permutexvar_ps(scatter_mask, tmp);\n}\n\n// Loads 2 doubles from memory a returns the packet\n// {a0, a0  a0, a0, a1, a1, a1, a1}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {\n  __m256d lane0 = _mm256_set1_pd(*from);\n  __m256d lane1 = _mm256_set1_pd(*(from+1));\n  __m512d tmp = _mm512_undefined_pd();\n  tmp = _mm512_insertf64x4(tmp, lane0, 0);\n  return _mm512_insertf64x4(tmp, lane1, 1);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet16f& from) {\n  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ps(to, from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet8d& from) {\n  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_pd(to, from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) {\n  EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to),\n                                                from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from) {\n  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ps(to, from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {\n  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_pd(to, from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {\n  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(\n      reinterpret_cast<__m512i*>(to), from);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from, uint16_t umask) {\n  __mmask16 mask = static_cast<__mmask16>(umask);\n  EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_ps(to, mask, from);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,\n                                                             Index stride) {\n  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));\n  Packet16i stride_multiplier =\n      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);\n  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);\n\n  return _mm512_i32gather_ps(indices, from, 4);\n}\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,\n                                                            Index stride) {\n  Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));\n  Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);\n  Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);\n\n  return _mm512_i32gather_pd(indices, from, 8);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,\n                                                         const Packet16f& from,\n                                                         Index stride) {\n  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));\n  Packet16i stride_multiplier =\n      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);\n  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);\n  _mm512_i32scatter_ps(to, indices, from, 4);\n}\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,\n                                                         const Packet8d& from,\n                                                         Index stride) {\n  Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));\n  Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);\n  Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);\n  _mm512_i32scatter_pd(to, indices, from, 8);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore1<Packet16f>(float* to, const float& a) {\n  Packet16f pa = pset1<Packet16f>(a);\n  pstore(to, pa);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstore1<Packet8d>(double* to, const double& a) {\n  Packet8d pa = pset1<Packet8d>(a);\n  pstore(to, pa);\n}\ntemplate <>\nEIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {\n  Packet16i pa = pset1<Packet16i>(a);\n  pstore(to, pa);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\n\ntemplate <>\nEIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {\n  return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0));\n}\ntemplate <>\nEIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) {\n  return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0));\n}\ntemplate <>\nEIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) {\n  return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f preverse(const Packet16f& a)\n{\n  return _mm512_permutexvar_ps(_mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)\n{\n  return _mm512_permutexvar_pd(_mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7), a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)\n{\n  // _mm512_abs_ps intrinsic not found, so hack around it\n  return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {\n  // _mm512_abs_ps intrinsic not found, so hack around it\n  return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),\n                                   _mm512_set1_epi64(0x7fffffffffffffff)));\n}\n\ntemplate<>\nEIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& exponent){\n  return pfrexp_generic(a, exponent);\n}\n\n// Extract exponent without existence of Packet8l.\ntemplate<>\nEIGEN_STRONG_INLINE  \nPacket8d pfrexp_generic_get_biased_exponent(const Packet8d& a) {\n  const Packet8d cst_exp_mask  = pset1frombits<Packet8d>(static_cast<uint64_t>(0x7ff0000000000000ull));\n  #ifdef EIGEN_VECTORIZE_AVX512DQ\n  return _mm512_cvtepi64_pd(_mm512_srli_epi64(_mm512_castpd_si512(pand(a, cst_exp_mask)), 52));\n  #else\n  return _mm512_cvtepi32_pd(_mm512_cvtepi64_epi32(_mm512_srli_epi64(_mm512_castpd_si512(pand(a, cst_exp_mask)), 52)));\n  #endif\n}\n\ntemplate<>\nEIGEN_STRONG_INLINE Packet8d pfrexp<Packet8d>(const Packet8d& a, Packet8d& exponent) {\n  return pfrexp_generic(a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pldexp<Packet16f>(const Packet16f& a, const Packet16f& exponent) {\n  return pldexp_generic(a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, const Packet8d& exponent) {\n  // Clamp exponent to [-2099, 2099]\n  const Packet8d max_exponent = pset1<Packet8d>(2099.0);\n  const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));\n  \n  // Split 2^e into four factors and multiply.\n  const Packet8i bias = pset1<Packet8i>(1023);\n  Packet8i b = parithmetic_shift_right<2>(e);  // floor(e/4)\n  \n  // 2^b\n  const Packet8i permute_idx = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);\n  Packet8i hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);\n  Packet8i lo = _mm256_slli_epi64(hi, 52);\n  hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);\n  Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));\n  Packet8d out = pmul(pmul(pmul(a, c), c), c);  // a * 2^(3b)\n  \n  // 2^(e - 3b)\n  b = psub(psub(psub(e, b), b), b);  // e - 3b\n  hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);\n  lo = _mm256_slli_epi64(hi, 52);\n  hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);\n  c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));\n  out = pmul(out, c);  // a * 2^e\n  return out;\n}\n\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512\n#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                           \\\n  __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0);                    \\\n  __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1)\n#else\n#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                \\\n  __m256 OUTPUT##_0 = _mm256_insertf128_ps(                     \\\n      _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 0)), \\\n      _mm512_extractf32x4_ps(INPUT, 1), 1);                     \\\n  __m256 OUTPUT##_1 = _mm256_insertf128_ps(                     \\\n      _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \\\n      _mm512_extractf32x4_ps(INPUT, 3), 1);\n#endif\n\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \\\n  OUTPUT = _mm512_insertf32x8(_mm512_castps256_ps512(INPUTA), INPUTB, 1);\n#else\n#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB)                    \\\n  OUTPUT = _mm512_undefined_ps();                                           \\\n  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 0), 0); \\\n  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \\\n  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \\\n  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);\n#endif\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  __m256 lane0 = _mm512_extractf32x8_ps(a, 0);\n  __m256 lane1 = _mm512_extractf32x8_ps(a, 1);\n  Packet8f x = _mm256_add_ps(lane0, lane1);\n  return predux<Packet8f>(x);\n#else\n  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);\n  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);\n  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);\n  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);\n  __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3));\n  sum = _mm_hadd_ps(sum, sum);\n  sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));\n  return _mm_cvtss_f32(sum);\n#endif\n}\ntemplate <>\nEIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {\n  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);\n  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);\n  __m256d sum = _mm256_add_pd(lane0, lane1);\n  __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));\n  return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8f predux_half_dowto4<Packet16f>(const Packet16f& a) {\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n  __m256 lane0 = _mm512_extractf32x8_ps(a, 0);\n  __m256 lane1 = _mm512_extractf32x8_ps(a, 1);\n  return _mm256_add_ps(lane0, lane1);\n#else\n  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);\n  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);\n  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);\n  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);\n  __m128 sum0 = _mm_add_ps(lane0, lane2);\n  __m128 sum1 = _mm_add_ps(lane1, lane3);\n  return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);\n#endif\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4d predux_half_dowto4<Packet8d>(const Packet8d& a) {\n  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);\n  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);\n  return _mm256_add_pd(lane0, lane1);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {\n//#ifdef EIGEN_VECTORIZE_AVX512DQ\n#if 0\n  Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);\n  Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);\n  Packet8f res = pmul(lane0, lane1);\n  res = pmul(res, _mm256_permute2f128_ps(res, res, 1));\n  res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));\n  return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));\n#else\n  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);\n  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);\n  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);\n  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);\n  __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));\n  res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));\n  return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));\n#endif\n}\ntemplate <>\nEIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {\n  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);\n  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);\n  __m256d res = pmul(lane0, lane1);\n  res = pmul(res, _mm256_permute2f128_pd(res, res, 1));\n  return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {\n  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);\n  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);\n  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);\n  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);\n  __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));\n  res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));\n  return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));\n}\ntemplate <>\nEIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {\n  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);\n  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);\n  __m256d res = _mm256_min_pd(lane0, lane1);\n  res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));\n  return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {\n  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);\n  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);\n  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);\n  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);\n  __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));\n  res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));\n  return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {\n  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);\n  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);\n  __m256d res = _mm256_max_pd(lane0, lane1);\n  res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));\n  return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)\n{\n  Packet16i xi = _mm512_castps_si512(x);\n  __mmask16 tmp = _mm512_test_epi32_mask(xi,xi);\n  return !_mm512_kortestz(tmp,tmp);\n}\n\n\n\n#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \\\n  EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 16>& kernel) {\n  __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);\n  __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);\n  __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);\n  __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);\n  __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4], kernel.packet[5]);\n  __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4], kernel.packet[5]);\n  __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6], kernel.packet[7]);\n  __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6], kernel.packet[7]);\n  __m512 T8 = _mm512_unpacklo_ps(kernel.packet[8], kernel.packet[9]);\n  __m512 T9 = _mm512_unpackhi_ps(kernel.packet[8], kernel.packet[9]);\n  __m512 T10 = _mm512_unpacklo_ps(kernel.packet[10], kernel.packet[11]);\n  __m512 T11 = _mm512_unpackhi_ps(kernel.packet[10], kernel.packet[11]);\n  __m512 T12 = _mm512_unpacklo_ps(kernel.packet[12], kernel.packet[13]);\n  __m512 T13 = _mm512_unpackhi_ps(kernel.packet[12], kernel.packet[13]);\n  __m512 T14 = _mm512_unpacklo_ps(kernel.packet[14], kernel.packet[15]);\n  __m512 T15 = _mm512_unpackhi_ps(kernel.packet[14], kernel.packet[15]);\n  __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S4 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S5 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S6 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S7 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S8 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S9 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S10 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S11 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S12 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S13 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S14 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S15 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(3, 2, 3, 2));\n\n  EIGEN_EXTRACT_8f_FROM_16f(S0, S0);\n  EIGEN_EXTRACT_8f_FROM_16f(S1, S1);\n  EIGEN_EXTRACT_8f_FROM_16f(S2, S2);\n  EIGEN_EXTRACT_8f_FROM_16f(S3, S3);\n  EIGEN_EXTRACT_8f_FROM_16f(S4, S4);\n  EIGEN_EXTRACT_8f_FROM_16f(S5, S5);\n  EIGEN_EXTRACT_8f_FROM_16f(S6, S6);\n  EIGEN_EXTRACT_8f_FROM_16f(S7, S7);\n  EIGEN_EXTRACT_8f_FROM_16f(S8, S8);\n  EIGEN_EXTRACT_8f_FROM_16f(S9, S9);\n  EIGEN_EXTRACT_8f_FROM_16f(S10, S10);\n  EIGEN_EXTRACT_8f_FROM_16f(S11, S11);\n  EIGEN_EXTRACT_8f_FROM_16f(S12, S12);\n  EIGEN_EXTRACT_8f_FROM_16f(S13, S13);\n  EIGEN_EXTRACT_8f_FROM_16f(S14, S14);\n  EIGEN_EXTRACT_8f_FROM_16f(S15, S15);\n\n  PacketBlock<Packet8f, 32> tmp;\n\n  tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S4_0, 0x20);\n  tmp.packet[1] = _mm256_permute2f128_ps(S1_0, S5_0, 0x20);\n  tmp.packet[2] = _mm256_permute2f128_ps(S2_0, S6_0, 0x20);\n  tmp.packet[3] = _mm256_permute2f128_ps(S3_0, S7_0, 0x20);\n  tmp.packet[4] = _mm256_permute2f128_ps(S0_0, S4_0, 0x31);\n  tmp.packet[5] = _mm256_permute2f128_ps(S1_0, S5_0, 0x31);\n  tmp.packet[6] = _mm256_permute2f128_ps(S2_0, S6_0, 0x31);\n  tmp.packet[7] = _mm256_permute2f128_ps(S3_0, S7_0, 0x31);\n\n  tmp.packet[8] = _mm256_permute2f128_ps(S0_1, S4_1, 0x20);\n  tmp.packet[9] = _mm256_permute2f128_ps(S1_1, S5_1, 0x20);\n  tmp.packet[10] = _mm256_permute2f128_ps(S2_1, S6_1, 0x20);\n  tmp.packet[11] = _mm256_permute2f128_ps(S3_1, S7_1, 0x20);\n  tmp.packet[12] = _mm256_permute2f128_ps(S0_1, S4_1, 0x31);\n  tmp.packet[13] = _mm256_permute2f128_ps(S1_1, S5_1, 0x31);\n  tmp.packet[14] = _mm256_permute2f128_ps(S2_1, S6_1, 0x31);\n  tmp.packet[15] = _mm256_permute2f128_ps(S3_1, S7_1, 0x31);\n\n  // Second set of _m256 outputs\n  tmp.packet[16] = _mm256_permute2f128_ps(S8_0, S12_0, 0x20);\n  tmp.packet[17] = _mm256_permute2f128_ps(S9_0, S13_0, 0x20);\n  tmp.packet[18] = _mm256_permute2f128_ps(S10_0, S14_0, 0x20);\n  tmp.packet[19] = _mm256_permute2f128_ps(S11_0, S15_0, 0x20);\n  tmp.packet[20] = _mm256_permute2f128_ps(S8_0, S12_0, 0x31);\n  tmp.packet[21] = _mm256_permute2f128_ps(S9_0, S13_0, 0x31);\n  tmp.packet[22] = _mm256_permute2f128_ps(S10_0, S14_0, 0x31);\n  tmp.packet[23] = _mm256_permute2f128_ps(S11_0, S15_0, 0x31);\n\n  tmp.packet[24] = _mm256_permute2f128_ps(S8_1, S12_1, 0x20);\n  tmp.packet[25] = _mm256_permute2f128_ps(S9_1, S13_1, 0x20);\n  tmp.packet[26] = _mm256_permute2f128_ps(S10_1, S14_1, 0x20);\n  tmp.packet[27] = _mm256_permute2f128_ps(S11_1, S15_1, 0x20);\n  tmp.packet[28] = _mm256_permute2f128_ps(S8_1, S12_1, 0x31);\n  tmp.packet[29] = _mm256_permute2f128_ps(S9_1, S13_1, 0x31);\n  tmp.packet[30] = _mm256_permute2f128_ps(S10_1, S14_1, 0x31);\n  tmp.packet[31] = _mm256_permute2f128_ps(S11_1, S15_1, 0x31);\n\n  // Pack them into the output\n  PACK_OUTPUT(kernel.packet, tmp.packet, 0, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 1, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 2, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 3, 16);\n\n  PACK_OUTPUT(kernel.packet, tmp.packet, 4, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 5, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 6, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 7, 16);\n\n  PACK_OUTPUT(kernel.packet, tmp.packet, 8, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 9, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 10, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 11, 16);\n\n  PACK_OUTPUT(kernel.packet, tmp.packet, 12, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 13, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 14, 16);\n  PACK_OUTPUT(kernel.packet, tmp.packet, 15, 16);\n}\n#define PACK_OUTPUT_2(OUTPUT, INPUT, INDEX, STRIDE)         \\\n  EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[2 * INDEX], \\\n                           INPUT[2 * INDEX + STRIDE]);\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 4>& kernel) {\n  __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);\n  __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);\n  __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);\n  __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);\n\n  __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));\n  __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));\n  __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));\n\n  EIGEN_EXTRACT_8f_FROM_16f(S0, S0);\n  EIGEN_EXTRACT_8f_FROM_16f(S1, S1);\n  EIGEN_EXTRACT_8f_FROM_16f(S2, S2);\n  EIGEN_EXTRACT_8f_FROM_16f(S3, S3);\n\n  PacketBlock<Packet8f, 8> tmp;\n\n  tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S1_0, 0x20);\n  tmp.packet[1] = _mm256_permute2f128_ps(S2_0, S3_0, 0x20);\n  tmp.packet[2] = _mm256_permute2f128_ps(S0_0, S1_0, 0x31);\n  tmp.packet[3] = _mm256_permute2f128_ps(S2_0, S3_0, 0x31);\n\n  tmp.packet[4] = _mm256_permute2f128_ps(S0_1, S1_1, 0x20);\n  tmp.packet[5] = _mm256_permute2f128_ps(S2_1, S3_1, 0x20);\n  tmp.packet[6] = _mm256_permute2f128_ps(S0_1, S1_1, 0x31);\n  tmp.packet[7] = _mm256_permute2f128_ps(S2_1, S3_1, 0x31);\n\n  PACK_OUTPUT_2(kernel.packet, tmp.packet, 0, 1);\n  PACK_OUTPUT_2(kernel.packet, tmp.packet, 1, 1);\n  PACK_OUTPUT_2(kernel.packet, tmp.packet, 2, 1);\n  PACK_OUTPUT_2(kernel.packet, tmp.packet, 3, 1);\n}\n\n#define PACK_OUTPUT_SQ_D(OUTPUT, INPUT, INDEX, STRIDE)                \\\n  OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX], 0); \\\n  OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX + STRIDE], 1);\n\n#define PACK_OUTPUT_D(OUTPUT, INPUT, INDEX, STRIDE)                         \\\n  OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \\\n  OUTPUT[INDEX] =                                                           \\\n      _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {\n  __m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);\n  __m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff);\n  __m512d T2 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);\n  __m512d T3 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0xff);\n\n  PacketBlock<Packet4d, 8> tmp;\n\n  tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),\n                                         _mm512_extractf64x4_pd(T2, 0), 0x20);\n  tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),\n                                         _mm512_extractf64x4_pd(T3, 0), 0x20);\n  tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),\n                                         _mm512_extractf64x4_pd(T2, 0), 0x31);\n  tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),\n                                         _mm512_extractf64x4_pd(T3, 0), 0x31);\n\n  tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),\n                                         _mm512_extractf64x4_pd(T2, 1), 0x20);\n  tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),\n                                         _mm512_extractf64x4_pd(T3, 1), 0x20);\n  tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),\n                                         _mm512_extractf64x4_pd(T2, 1), 0x31);\n  tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),\n                                         _mm512_extractf64x4_pd(T3, 1), 0x31);\n\n  PACK_OUTPUT_D(kernel.packet, tmp.packet, 0, 1);\n  PACK_OUTPUT_D(kernel.packet, tmp.packet, 1, 1);\n  PACK_OUTPUT_D(kernel.packet, tmp.packet, 2, 1);\n  PACK_OUTPUT_D(kernel.packet, tmp.packet, 3, 1);\n}\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 8>& kernel) {\n  __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0], kernel.packet[1]);\n  __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0], kernel.packet[1]);\n  __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2], kernel.packet[3]);\n  __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2], kernel.packet[3]);\n  __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4], kernel.packet[5]);\n  __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4], kernel.packet[5]);\n  __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6], kernel.packet[7]);\n  __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6], kernel.packet[7]);\n\n  PacketBlock<Packet4d, 16> tmp;\n\n  tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),\n                                         _mm512_extractf64x4_pd(T2, 0), 0x20);\n  tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),\n                                         _mm512_extractf64x4_pd(T3, 0), 0x20);\n  tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),\n                                         _mm512_extractf64x4_pd(T2, 0), 0x31);\n  tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),\n                                         _mm512_extractf64x4_pd(T3, 0), 0x31);\n\n  tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),\n                                         _mm512_extractf64x4_pd(T2, 1), 0x20);\n  tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),\n                                         _mm512_extractf64x4_pd(T3, 1), 0x20);\n  tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),\n                                         _mm512_extractf64x4_pd(T2, 1), 0x31);\n  tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),\n                                         _mm512_extractf64x4_pd(T3, 1), 0x31);\n\n  tmp.packet[8] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),\n                                         _mm512_extractf64x4_pd(T6, 0), 0x20);\n  tmp.packet[9] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),\n                                         _mm512_extractf64x4_pd(T7, 0), 0x20);\n  tmp.packet[10] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),\n                                          _mm512_extractf64x4_pd(T6, 0), 0x31);\n  tmp.packet[11] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),\n                                          _mm512_extractf64x4_pd(T7, 0), 0x31);\n\n  tmp.packet[12] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),\n                                          _mm512_extractf64x4_pd(T6, 1), 0x20);\n  tmp.packet[13] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),\n                                          _mm512_extractf64x4_pd(T7, 1), 0x20);\n  tmp.packet[14] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),\n                                          _mm512_extractf64x4_pd(T6, 1), 0x31);\n  tmp.packet[15] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),\n                                          _mm512_extractf64x4_pd(T7, 1), 0x31);\n\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 0, 8);\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 1, 8);\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 2, 8);\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 3, 8);\n\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 4, 8);\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 5, 8);\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 6, 8);\n  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 7, 8);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& /*ifPacket*/,\n                                     const Packet16f& /*thenPacket*/,\n                                     const Packet16f& /*elsePacket*/) {\n  assert(false && \"To be implemented\");\n  return Packet16f();\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket,\n                                    const Packet8d& thenPacket,\n                                    const Packet8d& elsePacket) {\n  __mmask8 m = (ifPacket.select[0]   )\n             | (ifPacket.select[1]<<1)\n             | (ifPacket.select[2]<<2)\n             | (ifPacket.select[3]<<3)\n             | (ifPacket.select[4]<<4)\n             | (ifPacket.select[5]<<5)\n             | (ifPacket.select[6]<<6)\n             | (ifPacket.select[7]<<7);\n  return _mm512_mask_blend_pd(m, elsePacket, thenPacket);\n}\n\n// Packet math for Eigen::half\ntemplate<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {\n  return _mm256_set1_epi16(from.x);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet16h>(const Packet16h& from) {\n  return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm256_extract_epi16(from, 0)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pload<Packet16h>(const Eigen::half* from) {\n  return _mm256_load_si256(reinterpret_cast<const __m256i*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* from) {\n  return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet16h& from) {\n  // (void*) -> workaround clang warning:\n  // cast from 'Eigen::half *' to '__m256i *' increases required alignment from 2 to 32\n  _mm256_store_si256((__m256i*)(void*)to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet16h& from) {\n  // (void*) -> workaround clang warning:\n  // cast from 'Eigen::half *' to '__m256i *' increases required alignment from 2 to 32\n  _mm256_storeu_si256((__m256i*)(void*)to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h\nploaddup<Packet16h>(const Eigen::half*  from) {\n  unsigned short a = from[0].x;\n  unsigned short b = from[1].x;\n  unsigned short c = from[2].x;\n  unsigned short d = from[3].x;\n  unsigned short e = from[4].x;\n  unsigned short f = from[5].x;\n  unsigned short g = from[6].x;\n  unsigned short h = from[7].x;\n  return _mm256_set_epi16(h, h, g, g, f, f, e, e, d, d, c, c, b, b, a, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h\nploadquad(const Eigen::half* from) {\n  unsigned short a = from[0].x;\n  unsigned short b = from[1].x;\n  unsigned short c = from[2].x;\n  unsigned short d = from[3].x;\n  return _mm256_set_epi16(d, d, d, d, c, c, c, c, b, b, b, b, a, a, a, a);\n}\n\nEIGEN_STRONG_INLINE Packet16f half2float(const Packet16h& a) {\n#ifdef EIGEN_HAS_FP16_C\n  return _mm512_cvtph_ps(a);\n#else\n  EIGEN_ALIGN64 half aux[16];\n  pstore(aux, a);\n  float f0(aux[0]);\n  float f1(aux[1]);\n  float f2(aux[2]);\n  float f3(aux[3]);\n  float f4(aux[4]);\n  float f5(aux[5]);\n  float f6(aux[6]);\n  float f7(aux[7]);\n  float f8(aux[8]);\n  float f9(aux[9]);\n  float fa(aux[10]);\n  float fb(aux[11]);\n  float fc(aux[12]);\n  float fd(aux[13]);\n  float fe(aux[14]);\n  float ff(aux[15]);\n\n  return _mm512_set_ps(\n      ff, fe, fd, fc, fb, fa, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0);\n#endif\n}\n\nEIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {\n#ifdef EIGEN_HAS_FP16_C\n  return _mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);\n#else\n  EIGEN_ALIGN64 float aux[16];\n  pstore(aux, a);\n  half h0(aux[0]);\n  half h1(aux[1]);\n  half h2(aux[2]);\n  half h3(aux[3]);\n  half h4(aux[4]);\n  half h5(aux[5]);\n  half h6(aux[6]);\n  half h7(aux[7]);\n  half h8(aux[8]);\n  half h9(aux[9]);\n  half ha(aux[10]);\n  half hb(aux[11]);\n  half hc(aux[12]);\n  half hd(aux[13]);\n  half he(aux[14]);\n  half hf(aux[15]);\n\n  return _mm256_set_epi16(\n      hf.x, he.x, hd.x, hc.x, hb.x, ha.x, h9.x, h8.x,\n      h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h ptrue(const Packet16h& a) {\n  return ptrue(Packet8i(a));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16h pabs(const Packet16h& a) {\n  const __m256i sign_mask = _mm256_set1_epi16(static_cast<numext::uint16_t>(0x8000));\n  return _mm256_andnot_si256(sign_mask, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16h pmin<Packet16h>(const Packet16h& a,\n                                              const Packet16h& b) {\n  return float2half(pmin<Packet16f>(half2float(a), half2float(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16h pmax<Packet16h>(const Packet16h& a,\n                                              const Packet16h& b) {\n  return float2half(pmax<Packet16f>(half2float(a), half2float(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16h plset<Packet16h>(const half& a) {\n  return float2half(plset<Packet16f>(static_cast<float>(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) {\n  // in some cases Packet8i is a wrapper around __m256i, so we need to\n  // cast to Packet8i to call the correct overload.\n  return por(Packet8i(a),Packet8i(b));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16h pxor(const Packet16h& a,const Packet16h& b) {\n  return pxor(Packet8i(a),Packet8i(b));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16h pand(const Packet16h& a,const Packet16h& b) {\n  return pand(Packet8i(a),Packet8i(b));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16h pandnot(const Packet16h& a,const Packet16h& b) {\n  return pandnot(Packet8i(a),Packet8i(b));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pselect(const Packet16h& mask, const Packet16h& a, const Packet16h& b) {\n  return _mm256_blendv_epi8(b, a, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pround<Packet16h>(const Packet16h& a) {\n  return float2half(pround<Packet16f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h print<Packet16h>(const Packet16h& a) {\n  return float2half(print<Packet16f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pceil<Packet16h>(const Packet16h& a) {\n  return float2half(pceil<Packet16f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pfloor<Packet16h>(const Packet16h& a) {\n  return float2half(pfloor<Packet16f>(half2float(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pcmp_eq(const Packet16h& a,const Packet16h& b) {\n  Packet16f af = half2float(a);\n  Packet16f bf = half2float(b);\n  return Pack32To16(pcmp_eq(af, bf));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pcmp_le(const Packet16h& a,const Packet16h& b) {\n  return Pack32To16(pcmp_le(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pcmp_lt(const Packet16h& a,const Packet16h& b) {\n  return Pack32To16(pcmp_lt(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pcmp_lt_or_nan(const Packet16h& a,const Packet16h& b) {\n  return Pack32To16(pcmp_lt_or_nan(half2float(a), half2float(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pconj(const Packet16h& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pnegate(const Packet16h& a) {\n  Packet16h sign_mask = _mm256_set1_epi16(static_cast<unsigned short>(0x8000));\n  return _mm256_xor_si256(a, sign_mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {\n  Packet16f af = half2float(a);\n  Packet16f bf = half2float(b);\n  Packet16f rf = padd(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {\n  Packet16f af = half2float(a);\n  Packet16f bf = half2float(b);\n  Packet16f rf = psub(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {\n  Packet16f af = half2float(a);\n  Packet16f bf = half2float(b);\n  Packet16f rf = pmul(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {\n  Packet16f af = half2float(a);\n  Packet16f bf = half2float(b);\n  Packet16f rf = pdiv(af, bf);\n  return float2half(rf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {\n  Packet16f from_float = half2float(from);\n  return half(predux(from_float));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8h predux_half_dowto4<Packet16h>(const Packet16h& a) {\n  Packet8h lane0 = _mm256_extractf128_si256(a, 0);\n  Packet8h lane1 = _mm256_extractf128_si256(a, 1);\n  return padd<Packet8h>(lane0, lane1);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet16h>(const Packet16h& a) {\n  Packet16f af = half2float(a);\n  float reduced = predux_max<Packet16f>(af);\n  return Eigen::half(reduced);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet16h>(const Packet16h& a) {\n  Packet16f af = half2float(a);\n  float reduced = predux_min<Packet16f>(af);\n  return Eigen::half(reduced);\n}\n\ntemplate<> EIGEN_STRONG_INLINE half predux_mul<Packet16h>(const Packet16h& from) {\n  Packet16f from_float = half2float(from);\n  return half(predux_mul(from_float));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a)\n{\n  __m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);\n  return _mm256_insertf128_si256(\n                    _mm256_castsi128_si256(_mm_shuffle_epi8(_mm256_extractf128_si256(a,1),m)),\n                                           _mm_shuffle_epi8(_mm256_extractf128_si256(a,0),m), 1);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)\n{\n  return _mm256_set_epi16(\n      from[15*stride].x, from[14*stride].x, from[13*stride].x, from[12*stride].x,\n      from[11*stride].x, from[10*stride].x, from[9*stride].x, from[8*stride].x,\n      from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x,\n      from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pscatter<half, Packet16h>(half* to, const Packet16h& from, Index stride)\n{\n  EIGEN_ALIGN64 half aux[16];\n  pstore(aux, from);\n  to[stride*0] = aux[0];\n  to[stride*1] = aux[1];\n  to[stride*2] = aux[2];\n  to[stride*3] = aux[3];\n  to[stride*4] = aux[4];\n  to[stride*5] = aux[5];\n  to[stride*6] = aux[6];\n  to[stride*7] = aux[7];\n  to[stride*8] = aux[8];\n  to[stride*9] = aux[9];\n  to[stride*10] = aux[10];\n  to[stride*11] = aux[11];\n  to[stride*12] = aux[12];\n  to[stride*13] = aux[13];\n  to[stride*14] = aux[14];\n  to[stride*15] = aux[15];\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet16h,16>& kernel) {\n  __m256i a = kernel.packet[0];\n  __m256i b = kernel.packet[1];\n  __m256i c = kernel.packet[2];\n  __m256i d = kernel.packet[3];\n  __m256i e = kernel.packet[4];\n  __m256i f = kernel.packet[5];\n  __m256i g = kernel.packet[6];\n  __m256i h = kernel.packet[7];\n  __m256i i = kernel.packet[8];\n  __m256i j = kernel.packet[9];\n  __m256i k = kernel.packet[10];\n  __m256i l = kernel.packet[11];\n  __m256i m = kernel.packet[12];\n  __m256i n = kernel.packet[13];\n  __m256i o = kernel.packet[14];\n  __m256i p = kernel.packet[15];\n\n  __m256i ab_07 = _mm256_unpacklo_epi16(a, b);\n  __m256i cd_07 = _mm256_unpacklo_epi16(c, d);\n  __m256i ef_07 = _mm256_unpacklo_epi16(e, f);\n  __m256i gh_07 = _mm256_unpacklo_epi16(g, h);\n  __m256i ij_07 = _mm256_unpacklo_epi16(i, j);\n  __m256i kl_07 = _mm256_unpacklo_epi16(k, l);\n  __m256i mn_07 = _mm256_unpacklo_epi16(m, n);\n  __m256i op_07 = _mm256_unpacklo_epi16(o, p);\n\n  __m256i ab_8f = _mm256_unpackhi_epi16(a, b);\n  __m256i cd_8f = _mm256_unpackhi_epi16(c, d);\n  __m256i ef_8f = _mm256_unpackhi_epi16(e, f);\n  __m256i gh_8f = _mm256_unpackhi_epi16(g, h);\n  __m256i ij_8f = _mm256_unpackhi_epi16(i, j);\n  __m256i kl_8f = _mm256_unpackhi_epi16(k, l);\n  __m256i mn_8f = _mm256_unpackhi_epi16(m, n);\n  __m256i op_8f = _mm256_unpackhi_epi16(o, p);\n\n  __m256i abcd_03 = _mm256_unpacklo_epi32(ab_07, cd_07);\n  __m256i abcd_47 = _mm256_unpackhi_epi32(ab_07, cd_07);\n  __m256i efgh_03 = _mm256_unpacklo_epi32(ef_07, gh_07);\n  __m256i efgh_47 = _mm256_unpackhi_epi32(ef_07, gh_07);\n  __m256i ijkl_03 = _mm256_unpacklo_epi32(ij_07, kl_07);\n  __m256i ijkl_47 = _mm256_unpackhi_epi32(ij_07, kl_07);\n  __m256i mnop_03 = _mm256_unpacklo_epi32(mn_07, op_07);\n  __m256i mnop_47 = _mm256_unpackhi_epi32(mn_07, op_07);\n\n  __m256i abcd_8b = _mm256_unpacklo_epi32(ab_8f, cd_8f);\n  __m256i abcd_cf = _mm256_unpackhi_epi32(ab_8f, cd_8f);\n  __m256i efgh_8b = _mm256_unpacklo_epi32(ef_8f, gh_8f);\n  __m256i efgh_cf = _mm256_unpackhi_epi32(ef_8f, gh_8f);\n  __m256i ijkl_8b = _mm256_unpacklo_epi32(ij_8f, kl_8f);\n  __m256i ijkl_cf = _mm256_unpackhi_epi32(ij_8f, kl_8f);\n  __m256i mnop_8b = _mm256_unpacklo_epi32(mn_8f, op_8f);\n  __m256i mnop_cf = _mm256_unpackhi_epi32(mn_8f, op_8f);\n\n  __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);\n  __m256i abcdefgh_23 = _mm256_unpackhi_epi64(abcd_03, efgh_03);\n  __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);\n  __m256i ijklmnop_23 = _mm256_unpackhi_epi64(ijkl_03, mnop_03);\n  __m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47);\n  __m256i abcdefgh_67 = _mm256_unpackhi_epi64(abcd_47, efgh_47);\n  __m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47);\n  __m256i ijklmnop_67 = _mm256_unpackhi_epi64(ijkl_47, mnop_47);\n  __m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b);\n  __m256i abcdefgh_ab = _mm256_unpackhi_epi64(abcd_8b, efgh_8b);\n  __m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b);\n  __m256i ijklmnop_ab = _mm256_unpackhi_epi64(ijkl_8b, mnop_8b);\n  __m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf);\n  __m256i abcdefgh_ef = _mm256_unpackhi_epi64(abcd_cf, efgh_cf);\n  __m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf);\n  __m256i ijklmnop_ef = _mm256_unpackhi_epi64(ijkl_cf, mnop_cf);\n\n  // NOTE: no unpacklo/hi instr in this case, so using permute instr.\n  __m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);\n  __m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);\n  __m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);\n  __m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);\n  __m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);\n  __m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x20);\n  __m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x20);\n  __m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x20);\n  __m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);\n  __m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);\n  __m256i a_p_a = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);\n  __m256i a_p_b = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);\n  __m256i a_p_c = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);\n  __m256i a_p_d = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x31);\n  __m256i a_p_e = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x31);\n  __m256i a_p_f = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x31);\n\n  kernel.packet[0] = a_p_0;\n  kernel.packet[1] = a_p_1;\n  kernel.packet[2] = a_p_2;\n  kernel.packet[3] = a_p_3;\n  kernel.packet[4] = a_p_4;\n  kernel.packet[5] = a_p_5;\n  kernel.packet[6] = a_p_6;\n  kernel.packet[7] = a_p_7;\n  kernel.packet[8] = a_p_8;\n  kernel.packet[9] = a_p_9;\n  kernel.packet[10] = a_p_a;\n  kernel.packet[11] = a_p_b;\n  kernel.packet[12] = a_p_c;\n  kernel.packet[13] = a_p_d;\n  kernel.packet[14] = a_p_e;\n  kernel.packet[15] = a_p_f;\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet16h,8>& kernel) {\n  EIGEN_ALIGN64 half in[8][16];\n  pstore<half>(in[0], kernel.packet[0]);\n  pstore<half>(in[1], kernel.packet[1]);\n  pstore<half>(in[2], kernel.packet[2]);\n  pstore<half>(in[3], kernel.packet[3]);\n  pstore<half>(in[4], kernel.packet[4]);\n  pstore<half>(in[5], kernel.packet[5]);\n  pstore<half>(in[6], kernel.packet[6]);\n  pstore<half>(in[7], kernel.packet[7]);\n\n  EIGEN_ALIGN64 half out[8][16];\n\n  for (int i = 0; i < 8; ++i) {\n    for (int j = 0; j < 8; ++j) {\n      out[i][j] = in[j][2*i];\n    }\n    for (int j = 0; j < 8; ++j) {\n      out[i][j+8] = in[j][2*i+1];\n    }\n  }\n\n  kernel.packet[0] = pload<Packet16h>(out[0]);\n  kernel.packet[1] = pload<Packet16h>(out[1]);\n  kernel.packet[2] = pload<Packet16h>(out[2]);\n  kernel.packet[3] = pload<Packet16h>(out[3]);\n  kernel.packet[4] = pload<Packet16h>(out[4]);\n  kernel.packet[5] = pload<Packet16h>(out[5]);\n  kernel.packet[6] = pload<Packet16h>(out[6]);\n  kernel.packet[7] = pload<Packet16h>(out[7]);\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet16h,4>& kernel) {\n  EIGEN_ALIGN64 half in[4][16];\n  pstore<half>(in[0], kernel.packet[0]);\n  pstore<half>(in[1], kernel.packet[1]);\n  pstore<half>(in[2], kernel.packet[2]);\n  pstore<half>(in[3], kernel.packet[3]);\n\n  EIGEN_ALIGN64 half out[4][16];\n\n  for (int i = 0; i < 4; ++i) {\n    for (int j = 0; j < 4; ++j) {\n      out[i][j] = in[j][4*i];\n    }\n    for (int j = 0; j < 4; ++j) {\n      out[i][j+4] = in[j][4*i+1];\n    }\n    for (int j = 0; j < 4; ++j) {\n      out[i][j+8] = in[j][4*i+2];\n    }\n    for (int j = 0; j < 4; ++j) {\n      out[i][j+12] = in[j][4*i+3];\n    }\n  }\n\n  kernel.packet[0] = pload<Packet16h>(out[0]);\n  kernel.packet[1] = pload<Packet16h>(out[1]);\n  kernel.packet[2] = pload<Packet16h>(out[2]);\n  kernel.packet[3] = pload<Packet16h>(out[3]);\n}\n\ntemplate <> struct is_arithmetic<Packet16bf> { enum { value = true }; };\n\ntemplate <>\nstruct packet_traits<bfloat16> : default_packet_traits {\n  typedef Packet16bf type;\n  typedef Packet8bf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 1,\n    HasBlend = 0,\n    HasInsert = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)\n#ifdef EIGEN_VECTORIZE_AVX512DQ\n    HasLog = 1,  // Currently fails test with bad accuracy.\n    HasLog1p  = 1,\n    HasExpm1  = 1,\n    HasNdtri = 1,\n    HasBessel  = 1,\n#endif\n    HasExp = 1,\n    HasSqrt = EIGEN_FAST_MATH,\n    HasRsqrt = EIGEN_FAST_MATH,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n#endif\n    HasCmp  = 1,\n    HasDiv = 1\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet16bf>\n{\n  typedef bfloat16 type;\n  enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};\n  typedef Packet8bf half;\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pset1<Packet16bf>(const bfloat16& from) {\n  return _mm256_set1_epi16(from.value);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE bfloat16 pfirst<Packet16bf>(const Packet16bf& from) {\n  bfloat16 t;\n  t.value = static_cast<unsigned short>(_mm256_extract_epi16(from, 0));\n  return t;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pload<Packet16bf>(const bfloat16* from) {\n  return _mm256_load_si256(reinterpret_cast<const __m256i*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf ploadu<Packet16bf>(const bfloat16* from) {\n  return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to,\n                                          const Packet16bf& from) {\n  _mm256_store_si256(reinterpret_cast<__m256i*>(to), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16* to,\n                                           const Packet16bf& from) {\n  _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf\nploaddup<Packet16bf>(const bfloat16* from) {\n  Packet16bf r;\n  unsigned short a = from[0].value;\n  unsigned short b = from[1].value;\n  unsigned short c = from[2].value;\n  unsigned short d = from[3].value;\n  unsigned short e = from[4].value;\n  unsigned short f = from[5].value;\n  unsigned short g = from[6].value;\n  unsigned short h = from[7].value;\n  return _mm256_set_epi16(h, h, g, g, f, f, e, e, d, d, c, c, b, b, a, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf\nploadquad(const bfloat16* from) {\n  Packet16bf r;\n  unsigned short a = from[0].value;\n  unsigned short b = from[1].value;\n  unsigned short c = from[2].value;\n  unsigned short d = from[3].value;\n  return _mm256_set_epi16(d, d, d, d, c, c, c, c, b, b, b, b, a, a, a, a);\n}\n\nEIGEN_STRONG_INLINE Packet16f Bf16ToF32(const Packet16bf& a) {\n  return _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(a), 16));\n}\n\n// Convert float to bfloat16 according to round-to-nearest-even/denormals algorithm.\nEIGEN_STRONG_INLINE Packet16bf F32ToBf16(const Packet16f& a) {\n  Packet16bf r;\n\n#if defined(EIGEN_VECTORIZE_AVX512BF16) && EIGEN_GNUC_AT_LEAST(10, 1)\n  // Since GCC 10.1 supports avx512bf16 and C style explicit cast\n  // (C++ static_cast is not supported yet), do converion via intrinsic\n  // and register path for performance.\n  r = (__m256i)(_mm512_cvtneps_pbh(a));\n\n#else\n  __m512i t;\n  __m512i input = _mm512_castps_si512(a);\n  __m512i nan = _mm512_set1_epi32(0x7fc0);\n\n  // uint32_t lsb = (input >> 16) & 1;\n  t = _mm512_and_si512(_mm512_srli_epi32(input, 16), _mm512_set1_epi32(1));\n  // uint32_t rounding_bias = 0x7fff + lsb;\n  t = _mm512_add_epi32(t, _mm512_set1_epi32(0x7fff));\n  // input += rounding_bias;\n  t = _mm512_add_epi32(t, input);\n  // input = input >> 16;\n  t = _mm512_srli_epi32(t, 16);\n\n  // Check NaN before converting back to bf16\n  __mmask16 mask = _mm512_cmp_ps_mask(a, a, _CMP_ORD_Q);\n\n  t = _mm512_mask_blend_epi32(mask, nan, t);\n  // output.value = static_cast<uint16_t>(input);\n  r = _mm512_cvtepi32_epi16(t);\n#endif // EIGEN_VECTORIZE_AVX512BF16\n\n  return r;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf ptrue(const Packet16bf& a) {\n  return ptrue<Packet8i>(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf por(const Packet16bf& a, const Packet16bf& b) {\n  return por<Packet8i>(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pxor(const Packet16bf& a, const Packet16bf& b) {\n  return pxor<Packet8i>(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pand(const Packet16bf& a, const Packet16bf& b) {\n  return pand<Packet8i>(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pandnot(const Packet16bf& a,\n                                       const Packet16bf& b) {\n  return pandnot<Packet8i>(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pselect(const Packet16bf& mask,\n                                       const Packet16bf& a,\n                                       const Packet16bf& b) {\n  // Input mask is expected to be all 0/1, handle it with 8-bit\n  // intrinsic for performance.\n  return _mm256_blendv_epi8(b, a, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf pround<Packet16bf>(const Packet16bf& a)\n{\n  return F32ToBf16(pround<Packet16f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf print<Packet16bf>(const Packet16bf& a) {\n  return F32ToBf16(print<Packet16f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf pceil<Packet16bf>(const Packet16bf& a) {\n  return F32ToBf16(pceil<Packet16f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf pfloor<Packet16bf>(const Packet16bf& a) {\n  return F32ToBf16(pfloor<Packet16f>(Bf16ToF32(a)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pcmp_eq(const Packet16bf& a,\n                                       const Packet16bf& b) {\n  return Pack32To16(pcmp_eq(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pcmp_le(const Packet16bf& a,\n                                       const Packet16bf& b) {\n  return Pack32To16(pcmp_le(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pcmp_lt(const Packet16bf& a,\n                                       const Packet16bf& b) {\n  return Pack32To16(pcmp_lt(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pcmp_lt_or_nan(const Packet16bf& a,\n                                              const Packet16bf& b) {\n  return Pack32To16(pcmp_lt_or_nan(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pnegate(const Packet16bf& a) {\n  Packet16bf sign_mask = _mm256_set1_epi16(static_cast<unsigned short>(0x8000));\n  return _mm256_xor_si256(a, sign_mask);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pconj(const Packet16bf& a) {\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pabs(const Packet16bf& a) {\n  const __m256i sign_mask = _mm256_set1_epi16(static_cast<numext::uint16_t>(0x8000));\n  return _mm256_andnot_si256(sign_mask, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf padd<Packet16bf>(const Packet16bf& a,\n                                                const Packet16bf& b) {\n  return F32ToBf16(padd<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf psub<Packet16bf>(const Packet16bf& a,\n                                                const Packet16bf& b) {\n  return F32ToBf16(psub<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pmul<Packet16bf>(const Packet16bf& a,\n                                                const Packet16bf& b) {\n  return F32ToBf16(pmul<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pdiv<Packet16bf>(const Packet16bf& a,\n                                                const Packet16bf& b) {\n  return F32ToBf16(pdiv<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pmin<Packet16bf>(const Packet16bf& a,\n                                                const Packet16bf& b) {\n  return F32ToBf16(pmin<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pmax<Packet16bf>(const Packet16bf& a,\n                                                const Packet16bf& b) {\n  return F32ToBf16(pmax<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf plset<Packet16bf>(const bfloat16& a) {\n  return F32ToBf16(plset<Packet16f>(static_cast<float>(a)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8bf predux_half_dowto4<Packet16bf>(const Packet16bf& a) {\n  Packet8bf lane0 = _mm256_extractf128_si256(a, 0);\n  Packet8bf lane1 = _mm256_extractf128_si256(a, 1);\n  return padd<Packet8bf>(lane0, lane1);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE bfloat16 predux<Packet16bf>(const Packet16bf& p) {\n  return static_cast<bfloat16>(predux<Packet16f>(Bf16ToF32(p)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE bfloat16 predux_mul<Packet16bf>(const Packet16bf& from) {\n  return static_cast<bfloat16>(predux_mul<Packet16f>(Bf16ToF32(from)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE bfloat16 predux_min<Packet16bf>(const Packet16bf& from) {\n  return static_cast<bfloat16>(predux_min<Packet16f>(Bf16ToF32(from)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE bfloat16 predux_max<Packet16bf>(const Packet16bf& from) {\n  return static_cast<bfloat16>(predux_max<Packet16f>(Bf16ToF32(from)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf preverse(const Packet16bf& a) {\n  __m256i m = _mm256_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1,\n                               14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);\n\n  Packet16bf res;\n  // Swap hi and lo first because shuffle is in 128-bit lanes.\n  res = _mm256_permute2x128_si256(a, a, 1);\n  // Shuffle 8-bit values in src within 2*128-bit lanes.\n  return _mm256_shuffle_epi8(res, m);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet16bf pgather<bfloat16, Packet16bf>(const bfloat16* from,\n                                                             Index stride) {\n  return _mm256_set_epi16(\n      from[15*stride].value, from[14*stride].value, from[13*stride].value, from[12*stride].value,\n      from[11*stride].value, from[10*stride].value, from[9*stride].value, from[8*stride].value,\n      from[7*stride].value, from[6*stride].value, from[5*stride].value, from[4*stride].value,\n      from[3*stride].value, from[2*stride].value, from[1*stride].value, from[0*stride].value);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pscatter<bfloat16, Packet16bf>(bfloat16* to,\n                                                        const Packet16bf& from,\n                                                        Index stride) {\n  EIGEN_ALIGN64 bfloat16 aux[16];\n  pstore(aux, from);\n  to[stride*0] = aux[0];\n  to[stride*1] = aux[1];\n  to[stride*2] = aux[2];\n  to[stride*3] = aux[3];\n  to[stride*4] = aux[4];\n  to[stride*5] = aux[5];\n  to[stride*6] = aux[6];\n  to[stride*7] = aux[7];\n  to[stride*8] = aux[8];\n  to[stride*9] = aux[9];\n  to[stride*10] = aux[10];\n  to[stride*11] = aux[11];\n  to[stride*12] = aux[12];\n  to[stride*13] = aux[13];\n  to[stride*14] = aux[14];\n  to[stride*15] = aux[15];\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16bf,16>& kernel) {\n  __m256i a = kernel.packet[0];\n  __m256i b = kernel.packet[1];\n  __m256i c = kernel.packet[2];\n  __m256i d = kernel.packet[3];\n  __m256i e = kernel.packet[4];\n  __m256i f = kernel.packet[5];\n  __m256i g = kernel.packet[6];\n  __m256i h = kernel.packet[7];\n  __m256i i = kernel.packet[8];\n  __m256i j = kernel.packet[9];\n  __m256i k = kernel.packet[10];\n  __m256i l = kernel.packet[11];\n  __m256i m = kernel.packet[12];\n  __m256i n = kernel.packet[13];\n  __m256i o = kernel.packet[14];\n  __m256i p = kernel.packet[15];\n\n  __m256i ab_07 = _mm256_unpacklo_epi16(a, b);\n  __m256i cd_07 = _mm256_unpacklo_epi16(c, d);\n  __m256i ef_07 = _mm256_unpacklo_epi16(e, f);\n  __m256i gh_07 = _mm256_unpacklo_epi16(g, h);\n  __m256i ij_07 = _mm256_unpacklo_epi16(i, j);\n  __m256i kl_07 = _mm256_unpacklo_epi16(k, l);\n  __m256i mn_07 = _mm256_unpacklo_epi16(m, n);\n  __m256i op_07 = _mm256_unpacklo_epi16(o, p);\n\n  __m256i ab_8f = _mm256_unpackhi_epi16(a, b);\n  __m256i cd_8f = _mm256_unpackhi_epi16(c, d);\n  __m256i ef_8f = _mm256_unpackhi_epi16(e, f);\n  __m256i gh_8f = _mm256_unpackhi_epi16(g, h);\n  __m256i ij_8f = _mm256_unpackhi_epi16(i, j);\n  __m256i kl_8f = _mm256_unpackhi_epi16(k, l);\n  __m256i mn_8f = _mm256_unpackhi_epi16(m, n);\n  __m256i op_8f = _mm256_unpackhi_epi16(o, p);\n\n  __m256i abcd_03 = _mm256_unpacklo_epi32(ab_07, cd_07);\n  __m256i abcd_47 = _mm256_unpackhi_epi32(ab_07, cd_07);\n  __m256i efgh_03 = _mm256_unpacklo_epi32(ef_07, gh_07);\n  __m256i efgh_47 = _mm256_unpackhi_epi32(ef_07, gh_07);\n  __m256i ijkl_03 = _mm256_unpacklo_epi32(ij_07, kl_07);\n  __m256i ijkl_47 = _mm256_unpackhi_epi32(ij_07, kl_07);\n  __m256i mnop_03 = _mm256_unpacklo_epi32(mn_07, op_07);\n  __m256i mnop_47 = _mm256_unpackhi_epi32(mn_07, op_07);\n\n  __m256i abcd_8b = _mm256_unpacklo_epi32(ab_8f, cd_8f);\n  __m256i abcd_cf = _mm256_unpackhi_epi32(ab_8f, cd_8f);\n  __m256i efgh_8b = _mm256_unpacklo_epi32(ef_8f, gh_8f);\n  __m256i efgh_cf = _mm256_unpackhi_epi32(ef_8f, gh_8f);\n  __m256i ijkl_8b = _mm256_unpacklo_epi32(ij_8f, kl_8f);\n  __m256i ijkl_cf = _mm256_unpackhi_epi32(ij_8f, kl_8f);\n  __m256i mnop_8b = _mm256_unpacklo_epi32(mn_8f, op_8f);\n  __m256i mnop_cf = _mm256_unpackhi_epi32(mn_8f, op_8f);\n\n  __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);\n  __m256i abcdefgh_23 = _mm256_unpackhi_epi64(abcd_03, efgh_03);\n  __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);\n  __m256i ijklmnop_23 = _mm256_unpackhi_epi64(ijkl_03, mnop_03);\n  __m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47);\n  __m256i abcdefgh_67 = _mm256_unpackhi_epi64(abcd_47, efgh_47);\n  __m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47);\n  __m256i ijklmnop_67 = _mm256_unpackhi_epi64(ijkl_47, mnop_47);\n  __m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b);\n  __m256i abcdefgh_ab = _mm256_unpackhi_epi64(abcd_8b, efgh_8b);\n  __m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b);\n  __m256i ijklmnop_ab = _mm256_unpackhi_epi64(ijkl_8b, mnop_8b);\n  __m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf);\n  __m256i abcdefgh_ef = _mm256_unpackhi_epi64(abcd_cf, efgh_cf);\n  __m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf);\n  __m256i ijklmnop_ef = _mm256_unpackhi_epi64(ijkl_cf, mnop_cf);\n\n  // NOTE: no unpacklo/hi instr in this case, so using permute instr.\n  kernel.packet[0] = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);\n  kernel.packet[1] = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);\n  kernel.packet[2] = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);\n  kernel.packet[3] = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);\n  kernel.packet[4] = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);\n  kernel.packet[5] = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x20);\n  kernel.packet[6] = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x20);\n  kernel.packet[7] = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x20);\n  kernel.packet[8] = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);\n  kernel.packet[9] = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);\n  kernel.packet[10] = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);\n  kernel.packet[11] = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);\n  kernel.packet[12] = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);\n  kernel.packet[13] = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x31);\n  kernel.packet[14] = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x31);\n  kernel.packet[15] = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x31);\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16bf,4>& kernel) {\n  __m256i a = kernel.packet[0];\n  __m256i b = kernel.packet[1];\n  __m256i c = kernel.packet[2];\n  __m256i d = kernel.packet[3];\n\n  __m256i ab_07 = _mm256_unpacklo_epi16(a, b);\n  __m256i cd_07 = _mm256_unpacklo_epi16(c, d);\n  __m256i ab_8f = _mm256_unpackhi_epi16(a, b);\n  __m256i cd_8f = _mm256_unpackhi_epi16(c, d);\n\n  __m256i abcd_03 = _mm256_unpacklo_epi32(ab_07, cd_07);\n  __m256i abcd_47 = _mm256_unpackhi_epi32(ab_07, cd_07);\n  __m256i abcd_8b = _mm256_unpacklo_epi32(ab_8f, cd_8f);\n  __m256i abcd_cf = _mm256_unpackhi_epi32(ab_8f, cd_8f);\n\n  // NOTE: no unpacklo/hi instr in this case, so using permute instr.\n  kernel.packet[0] = _mm256_permute2x128_si256(abcd_03, abcd_47, 0x20);\n  kernel.packet[1] = _mm256_permute2x128_si256(abcd_8b, abcd_cf, 0x20);\n  kernel.packet[2] = _mm256_permute2x128_si256(abcd_03, abcd_47, 0x31);\n  kernel.packet[3] = _mm256_permute2x128_si256(abcd_8b, abcd_cf, 0x31);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PACKET_MATH_AVX512_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AVX512/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TYPE_CASTING_AVX512_H\n#define EIGEN_TYPE_CASTING_AVX512_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {\n  return _mm512_cvttps_epi32(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {\n  return _mm512_cvtepi32_ps(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {\n  return _mm512_castps_si512(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(const Packet16i& a) {\n  return _mm512_castsi512_ps(a);\n}\n\ntemplate <>\nstruct type_casting_traits<half, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {\n  return half2float(a);\n}\n\ntemplate <>\nstruct type_casting_traits<float, half> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {\n  return float2half(a);\n}\n\ntemplate <>\nstruct type_casting_traits<bfloat16, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16bf, Packet16f>(const Packet16bf& a) {\n  return Bf16ToF32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<float, bfloat16> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet16bf pcast<Packet16f, Packet16bf>(const Packet16f& a) {\n  return F32ToBf16(a);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TYPE_CASTING_AVX512_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AltiVec/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010-2016 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX32_ALTIVEC_H\n#define EIGEN_COMPLEX32_ALTIVEC_H\n\nnamespace Eigen {\n\nnamespace internal {\n\nstatic Packet4ui  p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };\n#ifdef __VSX__\n#if defined(_BIG_ENDIAN)\nstatic Packet2ul  p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };\nstatic Packet2ul  p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO,  (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };\n#else\nstatic Packet2ul  p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO,  (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };\nstatic Packet2ul  p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };\n#endif\n#endif\n\n//---------- float ----------\nstruct Packet2cf\n{\n  EIGEN_STRONG_INLINE explicit Packet2cf() {}\n  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}\n\n  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b)\n  {\n    Packet4f v1, v2;\n\n    // Permute and multiply the real parts of a and b\n    v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);\n    // Get the imaginary parts of a\n    v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);\n    // multiply a_re * b\n    v1 = vec_madd(v1, b.v, p4f_ZERO);\n    // multiply a_im * b and get the conjugate result\n    v2 = vec_madd(v2, b.v, p4f_ZERO);\n    v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));\n    // permute back to a proper order\n    v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);\n\n    return Packet2cf(padd<Packet4f>(v1, v2));\n  }\n\n  EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) {\n    v = pmul(Packet2cf(*this), b).v;\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const {\n    return Packet2cf(*this) *= b;\n  }\n\n  EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) {\n    v = padd(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const {\n    return Packet2cf(*this) += b;\n  }\n  EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) {\n    v = psub(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {\n    return Packet2cf(*this) -= b;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator-(void) const {\n    return Packet2cf(-v);\n  }\n\n  Packet4f  v;\n};\n\ntemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits\n{\n  typedef Packet2cf type;\n  typedef Packet2cf half;\n  typedef Packet4f as_real;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n#ifdef __VSX__\n    HasBlend  = 1,\n#endif\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; typedef Packet4f as_real; };\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)\n{\n  Packet2cf res;\n  if((std::ptrdiff_t(&from) % 16) == 0)\n    res.v = pload<Packet4f>((const float *)&from);\n  else\n    res.v = ploadu<Packet4f>((const float *)&from);\n  res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>*        from) { return Packet2cf(pload<Packet4f>((const float *) from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>*       from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>*     from) { return pset1<Packet2cf>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { pstore((float*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { pstoreu((float*)to, from.v); }\n\nEIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>* from0, const std::complex<float>* from1)\n{\n  Packet4f res0, res1;\n#ifdef __VSX__\n  __asm__ (\"lxsdx %x0,%y1\" : \"=wa\" (res0) : \"Z\" (*from0));\n  __asm__ (\"lxsdx %x0,%y1\" : \"=wa\" (res1) : \"Z\" (*from1));\n#ifdef _BIG_ENDIAN\n  __asm__ (\"xxpermdi %x0, %x1, %x2, 0\" : \"=wa\" (res0) : \"wa\" (res0), \"wa\" (res1));\n#else\n  __asm__ (\"xxpermdi %x0, %x2, %x1, 0\" : \"=wa\" (res0) : \"wa\" (res0), \"wa\" (res1));\n#endif\n#else\n  *reinterpret_cast<std::complex<float> *>(&res0) = *from0;\n  *reinterpret_cast<std::complex<float> *>(&res1) = *from1;\n  res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI);\n#endif\n  return Packet2cf(res0);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)\n{\n  EIGEN_ALIGN16 std::complex<float> af[2];\n  af[0] = from[0*stride];\n  af[1] = from[1*stride];\n  return pload<Packet2cf>(af);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)\n{\n  EIGEN_ALIGN16 std::complex<float> af[2];\n  pstore<std::complex<float> >((std::complex<float> *) af, from);\n  to[0*stride] = af[0];\n  to[1*stride] = af[1];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr)    { EIGEN_PPC_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)\n{\n  EIGEN_ALIGN16 std::complex<float> res[2];\n  pstore((float *)&res, a.v);\n\n  return res[0];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)\n{\n  Packet4f rev_a;\n  rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);\n  return Packet2cf(rev_a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)\n{\n  Packet4f b;\n  b = vec_sld(a.v, a.v, 8);\n  b = padd<Packet4f>(a.v, b);\n  return pfirst<Packet2cf>(Packet2cf(b));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)\n{\n  Packet4f b;\n  Packet2cf prod;\n  b = vec_sld(a.v, a.v, 8);\n  prod = pmul<Packet2cf>(a, Packet2cf(b));\n\n  return pfirst<Packet2cf>(prod);\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  // TODO optimize it for AltiVec\n  Packet2cf res = pmul(a, pconj(b));\n  Packet4f s = pmul<Packet4f>(b.v, b.v);\n  return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)\n{\n  return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)\n{\n  Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);\n  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);\n  kernel.packet[0].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {\n  Packet4f eq = reinterpret_cast<Packet4f>(vec_cmpeq(a.v,b.v));\n  return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV)));\n}\n\n#ifdef __VSX__\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {\n  Packet2cf result;\n  result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));\n  return result;\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a)\n{\n  return psqrt_complex<Packet2cf>(a);\n}\n\n//---------- double ----------\n#ifdef __VSX__\nstruct Packet1cd\n{\n  EIGEN_STRONG_INLINE Packet1cd() {}\n  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}\n\n  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b)\n  {\n    Packet2d a_re, a_im, v1, v2;\n\n    // Permute and multiply the real parts of a and b\n    a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);\n    // Get the imaginary parts of a\n    a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);\n    // multiply a_re * b\n    v1 = vec_madd(a_re, b.v, p2d_ZERO);\n    // multiply a_im * b and get the conjugate result\n    v2 = vec_madd(a_im, b.v, p2d_ZERO);\n    v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));\n    v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));\n\n    return Packet1cd(padd<Packet2d>(v1, v2));\n  }\n\n  EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) {\n    v = pmul(Packet1cd(*this), b).v;\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const {\n    return Packet1cd(*this) *= b;\n  }\n\n  EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) {\n    v = padd(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const {\n    return Packet1cd(*this) += b;\n  }\n  EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) {\n    v = psub(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const {\n    return Packet1cd(*this) -= b;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator-(void) const {\n    return Packet1cd(-v);\n  }\n\n  Packet2d v;\n};\n\ntemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits\n{\n  typedef Packet1cd type;\n  typedef Packet1cd half;\n  typedef Packet2d as_real;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 0,\n    size = 1,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; typedef Packet2d as_real; };\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { pstore((double*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { pstoreu((double*)to, from.v); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)\n{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index)\n{\n  return pload<Packet1cd>(from);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index)\n{\n  pstore<std::complex<double> >(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>*     from)  { return pset1<Packet1cd>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr)    { EIGEN_PPC_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)\n{\n  EIGEN_ALIGN16 std::complex<double> res[2];\n  pstore<std::complex<double> >(res, a);\n\n  return res[0];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  // TODO optimize it for AltiVec\n  Packet1cd res = pmul(a,pconj(b));\n  Packet2d s = pmul<Packet2d>(b.v, b.v);\n  return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));\n}\n\nEIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)\n{\n  return Packet1cd(preverse(Packet2d(x.v)));\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)\n{\n  Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);\n  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);\n  kernel.packet[0].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {\n  // Compare real and imaginary parts of a and b to get the mask vector:\n  // [re(a)==re(b), im(a)==im(b)]\n  Packet2d eq = reinterpret_cast<Packet2d>(vec_cmpeq(a.v,b.v));\n  // Swap real/imag elements in the mask in to get:\n  // [im(a)==im(b), re(a)==re(b)]\n  Packet2d eq_swapped = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(eq), reinterpret_cast<Packet4ui>(eq), 8));\n  // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped\n  return Packet1cd(vec_and(eq, eq_swapped));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a)\n{\n  return psqrt_complex<Packet1cd>(a);\n}\n\n#endif // __VSX__\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX32_ALTIVEC_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AltiVec/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007 Julien Pommier\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H\n#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f plog<Packet4f>(const Packet4f& _x)\n{\n  return plog_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f pexp<Packet4f>(const Packet4f& _x)\n{\n  return pexp_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f psin<Packet4f>(const Packet4f& _x)\n{\n  return psin_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f pcos<Packet4f>(const Packet4f& _x)\n{\n  return pcos_float(_x);\n}\n\n#ifndef EIGEN_COMP_CLANG\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f prsqrt<Packet4f>(const Packet4f& x)\n{\n  return  vec_rsqrt(x);\n}\n#endif\n\n#ifdef __VSX__\n#ifndef EIGEN_COMP_CLANG\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d prsqrt<Packet2d>(const Packet2d& x)\n{\n  return  vec_rsqrt(x);\n}\n#endif\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f psqrt<Packet4f>(const Packet4f& x)\n{\n  return  vec_sqrt(x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d psqrt<Packet2d>(const Packet2d& x)\n{\n  return  vec_sqrt(x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d pexp<Packet2d>(const Packet2d& _x)\n{\n  return pexp_double(_x);\n}\n#endif\n\n// Hyperbolic Tangent function.\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\nptanh<Packet4f>(const Packet4f& x) {\n  return internal::generic_fast_tanh_float(x);\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AltiVec/MatrixProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2020 Everton Constantino (everton.constantino@ibm.com)\n// Copyright (C) 2021 Chip Kerchner (chip.kerchner@ibm.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATRIX_PRODUCT_ALTIVEC_H\n#define EIGEN_MATRIX_PRODUCT_ALTIVEC_H\n\n#ifndef EIGEN_ALTIVEC_USE_CUSTOM_PACK\n#define EIGEN_ALTIVEC_USE_CUSTOM_PACK    1\n#endif\n\n#include \"MatrixProductCommon.h\"\n\n// Since LLVM doesn't support dynamic dispatching, force either always MMA or VSX\n#if EIGEN_COMP_LLVM\n#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)\n#ifdef __MMA__\n#define EIGEN_ALTIVEC_MMA_ONLY\n#else\n#define EIGEN_ALTIVEC_DISABLE_MMA\n#endif\n#endif\n#endif\n\n#ifdef __has_builtin\n#if __has_builtin(__builtin_mma_assemble_acc)\n  #define ALTIVEC_MMA_SUPPORT\n#endif\n#endif\n\n#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n  #include \"MatrixProductMMA.h\"\n#endif\n\n/**************************************************************************************************\n * TODO                                                                                           *\n * - Check StorageOrder on dhs_pack (the innermost second loop seems unvectorized when it could). *\n * - Check the possibility of transposing as GETREAL and GETIMAG when needed.                     *\n **************************************************************************************************/\nnamespace Eigen {\n\nnamespace internal {\n\n/**************************\n * Constants and typedefs *\n **************************/\ntemplate<typename Scalar>\nstruct quad_traits\n{\n  typedef typename packet_traits<Scalar>::type    vectortype;\n  typedef PacketBlock<vectortype,4>                     type;\n  typedef vectortype                                 rhstype;\n  enum\n  {\n    vectorsize = packet_traits<Scalar>::size,\n    size = 4,\n    rows = 4\n  };\n};\n\ntemplate<>\nstruct quad_traits<double>\n{\n  typedef Packet2d                        vectortype;\n  typedef PacketBlock<vectortype,4>             type;\n  typedef PacketBlock<Packet2d,2>            rhstype;\n  enum\n  {\n    vectorsize = packet_traits<double>::size,\n    size = 2,\n    rows = 4\n  };\n};\n\n// MatrixProduct decomposes real/imaginary vectors into a real vector and an imaginary vector, this turned out\n// to be faster than Eigen's usual approach of having real/imaginary pairs on a single vector. This constants then\n// are responsible to extract from convert between Eigen's and MatrixProduct approach.\n\nconst static Packet16uc p16uc_GETREAL32 = {  0,  1,  2,  3,\n                                             8,  9, 10, 11,\n                                            16, 17, 18, 19,\n                                            24, 25, 26, 27};\n\nconst static Packet16uc p16uc_GETIMAG32 = {  4,  5,  6,  7,\n                                            12, 13, 14, 15,\n                                            20, 21, 22, 23,\n                                            28, 29, 30, 31};\nconst static Packet16uc p16uc_GETREAL64 = {  0,  1,  2,  3,  4,  5,  6,  7,\n                                            16, 17, 18, 19, 20, 21, 22, 23};\n\n//[a,ai],[b,bi] = [ai,bi]\nconst static Packet16uc p16uc_GETIMAG64 = {  8,  9, 10, 11, 12, 13, 14, 15,\n                                            24, 25, 26, 27, 28, 29, 30, 31};\n\n/*********************************************\n * Single precision real and complex packing *\n * *******************************************/\n\n/**\n * Symm packing is related to packing of symmetric adjoint blocks, as expected the packing leaves\n * the diagonal real, whatever is below it is copied from the respective upper diagonal element and \n * conjugated. There's no PanelMode available for symm packing.\n *\n * Packing in general is supposed to leave the lhs block and the rhs block easy to be read by gemm using \n * its respective rank-update instructions. The float32/64 versions are different because at this moment\n * the size of the accumulator is fixed at 512-bits so you can't have a 4x4 accumulator of 64-bit elements.\n * \n * As mentioned earlier MatrixProduct breaks complex numbers into a real vector and a complex vector so packing has\n * to take that into account, at the moment, we run pack the real part and then the imaginary part, this is the main\n * reason why packing for complex is broken down into several different parts, also the reason why we endup having a\n * float32/64 and complex float32/64 version.\n **/\ntemplate<typename Scalar, typename Index, int StorageOrder>\nEIGEN_ALWAYS_INLINE std::complex<Scalar> getAdjointVal(Index i, Index j, const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder>& dt)\n{\n  std::complex<Scalar> v;\n  if(i < j)\n  {\n    v.real( dt(j,i).real());\n    v.imag(-dt(j,i).imag());\n  } else if(i > j)\n  {\n    v.real( dt(i,j).real());\n    v.imag( dt(i,j).imag());\n  } else {\n    v.real( dt(i,j).real());\n    v.imag((Scalar)0.0);\n  }\n  return v;\n}\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int N>\nEIGEN_STRONG_INLINE void symm_pack_complex_rhs_helper(std::complex<Scalar>* blockB, const std::complex<Scalar>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n{\n  const Index depth = k2 + rows;\n  const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder> rhs(_rhs, rhsStride);\n  const Index vectorSize = N*quad_traits<Scalar>::vectorsize;\n  const Index vectorDelta = vectorSize * rows;\n  Scalar* blockBf = reinterpret_cast<Scalar *>(blockB);\n\n  Index rir = 0, rii, j = 0;\n  for(; j + vectorSize <= cols; j+=vectorSize)\n  {\n    rii = rir + vectorDelta;\n\n    for(Index i = k2; i < depth; i++)\n    {\n      for(Index k = 0; k < vectorSize; k++)\n      {\n        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, j + k, rhs);\n\n        blockBf[rir + k] = v.real();\n        blockBf[rii + k] = v.imag();\n      }\n      rir += vectorSize;\n      rii += vectorSize;\n    }\n\n    rir += vectorDelta;\n  }\n  if (j < cols)\n  {\n    rii = rir + ((cols - j) * rows);\n\n    for(Index i = k2; i < depth; i++)\n    {\n      Index k = j;\n      for(; k < cols; k++)\n      {\n        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, k, rhs);\n\n        blockBf[rir] = v.real();\n        blockBf[rii] = v.imag();\n\n        rir += 1;\n        rii += 1;\n      }\n    }\n  }\n}\n\ntemplate<typename Scalar, typename Index, int StorageOrder>\nEIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex<Scalar>* blockA, const std::complex<Scalar>* _lhs, Index lhsStride, Index cols, Index rows)\n{\n  const Index depth = cols;\n  const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder> lhs(_lhs, lhsStride);\n  const Index vectorSize = quad_traits<Scalar>::vectorsize;\n  const Index vectorDelta = vectorSize * depth;\n  Scalar* blockAf = (Scalar *)(blockA);\n\n  Index rir = 0, rii, j = 0;\n  for(; j + vectorSize <= rows; j+=vectorSize)\n  {\n    rii = rir + vectorDelta;\n\n    for(Index i = 0; i < depth; i++)\n    {\n      for(Index k = 0; k < vectorSize; k++)\n      {\n        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(j+k, i, lhs);\n\n        blockAf[rir + k] = v.real();\n        blockAf[rii + k] = v.imag();\n      }\n      rir += vectorSize;\n      rii += vectorSize;\n    }\n\n    rir += vectorDelta;\n  }\n\n  if (j < rows)\n  {\n    rii = rir + ((rows - j) * depth);\n\n    for(Index i = 0; i < depth; i++)\n    {\n      Index k = j;\n      for(; k < rows; k++)\n      {\n        std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(k, i, lhs);\n\n        blockAf[rir] = v.real();\n        blockAf[rii] = v.imag();\n\n        rir += 1;\n        rii += 1;\n      }\n    }\n  }\n}\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int N>\nEIGEN_STRONG_INLINE void symm_pack_rhs_helper(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n{\n  const Index depth = k2 + rows;\n  const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(_rhs, rhsStride);\n  const Index vectorSize = quad_traits<Scalar>::vectorsize;\n\n  Index ri = 0, j = 0;\n  for(; j + N*vectorSize <= cols; j+=N*vectorSize)\n  {\n    Index i = k2;\n    for(; i < depth; i++)\n    {\n      for(Index k = 0; k < N*vectorSize; k++)\n      {\n        if(i <= j+k)\n          blockB[ri + k] = rhs(j+k, i);\n        else\n          blockB[ri + k] = rhs(i, j+k);\n      }\n      ri += N*vectorSize;\n    }\n  }\n\n  if (j < cols)\n  {\n    for(Index i = k2; i < depth; i++)\n    {\n      Index k = j;\n      for(; k < cols; k++)\n      {\n        if(k <= i)\n          blockB[ri] = rhs(i, k);\n        else\n          blockB[ri] = rhs(k, i);\n        ri += 1;\n      }\n    }\n  }\n}\n\ntemplate<typename Scalar, typename Index, int StorageOrder>\nEIGEN_STRONG_INLINE void symm_pack_lhs_helper(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)\n{\n  const Index depth = cols;\n  const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs, lhsStride);\n  const Index vectorSize = quad_traits<Scalar>::vectorsize;\n\n  Index ri = 0, j = 0;\n  for(; j + vectorSize <= rows; j+=vectorSize)\n  {\n    Index i = 0;\n\n    for(; i < depth; i++)\n    {\n      for(Index k = 0; k < vectorSize; k++)\n      {\n        if(i <= j+k)\n          blockA[ri + k] = lhs(j+k, i);\n        else\n          blockA[ri + k] = lhs(i, j+k);\n      }\n      ri += vectorSize;\n    }\n  }\n\n  if (j < rows)\n  {\n    for(Index i = 0; i < depth; i++)\n    {\n      Index k = j;\n      for(; k < rows; k++)\n      {\n        if(i <= k)\n          blockA[ri] = lhs(k, i);\n        else\n          blockA[ri] = lhs(i, k);\n        ri += 1;\n      }\n    }\n  }\n}\n\ntemplate<typename Index, int nr, int StorageOrder>\nstruct symm_pack_rhs<std::complex<float>, Index, nr, StorageOrder>\n{\n  void operator()(std::complex<float>* blockB, const std::complex<float>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n  {\n    symm_pack_complex_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);\n  }\n};\n\ntemplate<typename Index, int Pack1, int Pack2_dummy, int StorageOrder>\nstruct symm_pack_lhs<std::complex<float>, Index, Pack1, Pack2_dummy, StorageOrder>\n{\n  void operator()(std::complex<float>* blockA, const std::complex<float>* _lhs, Index lhsStride, Index cols, Index rows)\n  {\n    symm_pack_complex_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);\n  }\n};\n\n// *********** symm_pack std::complex<float64> ***********\n\ntemplate<typename Index, int nr, int StorageOrder>\nstruct symm_pack_rhs<std::complex<double>, Index, nr, StorageOrder>\n{\n  void operator()(std::complex<double>* blockB, const std::complex<double>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n  {\n    symm_pack_complex_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);\n  }\n};\n\ntemplate<typename Index, int Pack1, int Pack2_dummy, int StorageOrder>\nstruct symm_pack_lhs<std::complex<double>, Index, Pack1, Pack2_dummy, StorageOrder>\n{\n  void operator()(std::complex<double>* blockA, const std::complex<double>* _lhs, Index lhsStride, Index cols, Index rows)\n  {\n    symm_pack_complex_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);\n  }\n};\n\n// *********** symm_pack float32 ***********\ntemplate<typename Index, int nr, int StorageOrder>\nstruct symm_pack_rhs<float, Index, nr, StorageOrder>\n{\n  void operator()(float* blockB, const float* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n  {\n    symm_pack_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);\n  }\n};\n\ntemplate<typename Index, int Pack1, int Pack2_dummy, int StorageOrder>\nstruct symm_pack_lhs<float, Index, Pack1, Pack2_dummy, StorageOrder>\n{\n  void operator()(float* blockA, const float* _lhs, Index lhsStride, Index cols, Index rows)\n  {\n    symm_pack_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);\n  }\n};\n\n// *********** symm_pack float64 ***********\ntemplate<typename Index, int nr, int StorageOrder>\nstruct symm_pack_rhs<double, Index, nr, StorageOrder>\n{\n  void operator()(double* blockB, const double* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n  {\n    symm_pack_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);\n  }\n};\n\ntemplate<typename Index, int Pack1, int Pack2_dummy, int StorageOrder>\nstruct symm_pack_lhs<double, Index, Pack1, Pack2_dummy, StorageOrder>\n{\n  void operator()(double* blockA, const double* _lhs, Index lhsStride, Index cols, Index rows)\n  {\n    symm_pack_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);\n  }\n};\n\n/**\n * PanelMode\n * Packing might be called several times before being multiplied by gebp_kernel, this happens because \n * on special occasions it fills part of block with other parts of the matrix. Two variables control\n * how PanelMode should behave: offset and stride. The idea is that those variables represent whatever\n * is going to be the real offset and stride in the future and this is what you should obey. The process\n * is to behave as you would with normal packing but leave the start of each part with the correct offset\n * and the end as well respecting the real stride the block will have. Gebp is aware of both blocks stride\n * and offset and behaves accordingly.\n **/\n\ntemplate<typename Scalar, typename Packet, typename Index>\nEIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock<Packet,4>& block)\n{\n  const Index size = 16 / sizeof(Scalar);\n  pstore<Scalar>(to + (0 * size), block.packet[0]);\n  pstore<Scalar>(to + (1 * size), block.packet[1]);\n  pstore<Scalar>(to + (2 * size), block.packet[2]);\n  pstore<Scalar>(to + (3 * size), block.packet[3]);\n}\n\ntemplate<typename Scalar, typename Packet, typename Index>\nEIGEN_ALWAYS_INLINE void storeBlock(Scalar* to, PacketBlock<Packet,2>& block)\n{\n  const Index size = 16 / sizeof(Scalar);\n  pstore<Scalar>(to + (0 * size), block.packet[0]);\n  pstore<Scalar>(to + (1 * size), block.packet[1]);\n}\n\n// General template for lhs & rhs complex packing.\ntemplate<typename Scalar, typename Index, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode, bool UseLhs>\nstruct dhs_cpack {\n  EIGEN_STRONG_INLINE void operator()(std::complex<Scalar>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n  {\n    const Index vectorSize = quad_traits<Scalar>::vectorsize;\n    const Index vectorDelta = vectorSize * ((PanelMode) ? stride : depth);\n    Index rir = ((PanelMode) ? (vectorSize*offset) : 0), rii;\n    Scalar* blockAt = reinterpret_cast<Scalar *>(blockA);\n    Index j = 0;\n\n    for(; j + vectorSize <= rows; j+=vectorSize)\n    {\n      Index i = 0;\n\n      rii = rir + vectorDelta;\n\n      for(; i + vectorSize <= depth; i+=vectorSize)\n      {\n        PacketBlock<Packet,4> blockr, blocki;\n        PacketBlock<PacketC,8> cblock;\n\n        if (UseLhs) {\n          bload<DataMapper, PacketC, Index, 2, 0, StorageOrder>(cblock, lhs, j, i);\n        } else {\n          bload<DataMapper, PacketC, Index, 2, 0, StorageOrder>(cblock, lhs, i, j);\n        }\n\n        blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETREAL32);\n        blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[5].v, p16uc_GETREAL32);\n        blockr.packet[2] = vec_perm(cblock.packet[2].v, cblock.packet[6].v, p16uc_GETREAL32);\n        blockr.packet[3] = vec_perm(cblock.packet[3].v, cblock.packet[7].v, p16uc_GETREAL32);\n\n        blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETIMAG32);\n        blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[5].v, p16uc_GETIMAG32);\n        blocki.packet[2] = vec_perm(cblock.packet[2].v, cblock.packet[6].v, p16uc_GETIMAG32);\n        blocki.packet[3] = vec_perm(cblock.packet[3].v, cblock.packet[7].v, p16uc_GETIMAG32);\n\n        if(Conjugate)\n        {\n          blocki.packet[0] = -blocki.packet[0];\n          blocki.packet[1] = -blocki.packet[1];\n          blocki.packet[2] = -blocki.packet[2];\n          blocki.packet[3] = -blocki.packet[3];\n        }\n\n        if(((StorageOrder == RowMajor) && UseLhs) || (((StorageOrder == ColMajor) && !UseLhs)))\n        {\n          ptranspose(blockr);\n          ptranspose(blocki);\n        }\n\n        storeBlock<Scalar, Packet, Index>(blockAt + rir, blockr);\n        storeBlock<Scalar, Packet, Index>(blockAt + rii, blocki);\n\n        rir += 4*vectorSize;\n        rii += 4*vectorSize;\n      }\n      for(; i < depth; i++)\n      {\n        PacketBlock<Packet,1> blockr, blocki;\n        PacketBlock<PacketC,2> cblock;\n\n        if(((StorageOrder == ColMajor) && UseLhs) || (((StorageOrder == RowMajor) && !UseLhs)))\n        {\n          if (UseLhs) {\n            cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);\n            cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 2, i);\n          } else {\n            cblock.packet[0] = lhs.template loadPacket<PacketC>(i, j + 0);\n            cblock.packet[1] = lhs.template loadPacket<PacketC>(i, j + 2);\n          }\n        } else {\n          std::complex<Scalar> lhs0, lhs1;\n          if (UseLhs) {\n            lhs0 = lhs(j + 0, i);\n            lhs1 = lhs(j + 1, i);\n            cblock.packet[0] = pload2(&lhs0, &lhs1);\n            lhs0 = lhs(j + 2, i);\n            lhs1 = lhs(j + 3, i);\n            cblock.packet[1] = pload2(&lhs0, &lhs1);\n          } else {\n            lhs0 = lhs(i, j + 0);\n            lhs1 = lhs(i, j + 1);\n            cblock.packet[0] = pload2(&lhs0, &lhs1);\n            lhs0 = lhs(i, j + 2);\n            lhs1 = lhs(i, j + 3);\n            cblock.packet[1] = pload2(&lhs0, &lhs1);\n          }\n        }\n\n        blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL32);\n        blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG32);\n\n        if(Conjugate)\n        {\n          blocki.packet[0] = -blocki.packet[0];\n        }\n\n        pstore<Scalar>(blockAt + rir, blockr.packet[0]);\n        pstore<Scalar>(blockAt + rii, blocki.packet[0]);\n\n        rir += vectorSize;\n        rii += vectorSize;\n      }\n\n      rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);\n    }\n\n    if (j < rows)\n    {\n      if(PanelMode) rir += (offset*(rows - j - vectorSize));\n      rii = rir + (((PanelMode) ? stride : depth) * (rows - j));\n\n      for(Index i = 0; i < depth; i++)\n      {\n        Index k = j;\n        for(; k < rows; k++)\n        {\n          if (UseLhs) {\n            blockAt[rir] = lhs(k, i).real();\n\n            if(Conjugate)\n              blockAt[rii] = -lhs(k, i).imag();\n            else\n              blockAt[rii] =  lhs(k, i).imag();\n          } else {\n            blockAt[rir] = lhs(i, k).real();\n\n            if(Conjugate)\n              blockAt[rii] = -lhs(i, k).imag();\n            else\n              blockAt[rii] =  lhs(i, k).imag();\n          }\n\n          rir += 1;\n          rii += 1;\n        }\n      }\n    }\n  }\n};\n\n// General template for lhs & rhs packing.\ntemplate<typename Scalar, typename Index, typename DataMapper, typename Packet, int StorageOrder, bool PanelMode, bool UseLhs>\nstruct dhs_pack{\n  EIGEN_STRONG_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n  {\n    const Index vectorSize = quad_traits<Scalar>::vectorsize;\n    Index ri = 0, j = 0;\n\n    for(; j + vectorSize <= rows; j+=vectorSize)\n    {\n      Index i = 0;\n\n      if(PanelMode) ri += vectorSize*offset;\n\n      for(; i + vectorSize <= depth; i+=vectorSize)\n      {\n        PacketBlock<Packet,4> block;\n\n        if (UseLhs) {\n          bload<DataMapper, Packet, Index, 4, 0, StorageOrder>(block, lhs, j, i);\n        } else {\n          bload<DataMapper, Packet, Index, 4, 0, StorageOrder>(block, lhs, i, j);\n        }\n        if(((StorageOrder == RowMajor) && UseLhs) || ((StorageOrder == ColMajor) && !UseLhs))\n        {\n          ptranspose(block);\n        }\n\n        storeBlock<Scalar, Packet, Index>(blockA + ri, block);\n\n        ri += 4*vectorSize;\n      }\n      for(; i < depth; i++)\n      {\n        if(((StorageOrder == RowMajor) && UseLhs) || ((StorageOrder == ColMajor) && !UseLhs))\n        {\n          if (UseLhs) {\n            blockA[ri+0] = lhs(j+0, i);\n            blockA[ri+1] = lhs(j+1, i);\n            blockA[ri+2] = lhs(j+2, i);\n            blockA[ri+3] = lhs(j+3, i);\n          } else {\n            blockA[ri+0] = lhs(i, j+0);\n            blockA[ri+1] = lhs(i, j+1);\n            blockA[ri+2] = lhs(i, j+2);\n            blockA[ri+3] = lhs(i, j+3);\n          }\n        } else {\n          Packet lhsV;\n          if (UseLhs) {\n            lhsV = lhs.template loadPacket<Packet>(j, i);\n          } else {\n            lhsV = lhs.template loadPacket<Packet>(i, j);\n          }\n          pstore<Scalar>(blockA + ri, lhsV);\n        }\n\n        ri += vectorSize;\n      }\n\n      if(PanelMode) ri += vectorSize*(stride - offset - depth);\n    }\n\n    if (j < rows)\n    {\n      if(PanelMode) ri += offset*(rows - j);\n\n      for(Index i = 0; i < depth; i++)\n      {\n        Index k = j;\n        for(; k < rows; k++)\n        {\n          if (UseLhs) {\n            blockA[ri] = lhs(k, i);\n          } else {\n            blockA[ri] = lhs(i, k);\n          }\n          ri += 1;\n        }\n      }\n    }\n  }\n};\n\n// General template for lhs packing, float64 specialization.\ntemplate<typename Index, typename DataMapper, int StorageOrder, bool PanelMode>\nstruct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, true>\n{\n  EIGEN_STRONG_INLINE void operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n  {\n    const Index vectorSize = quad_traits<double>::vectorsize;\n    Index ri = 0, j = 0;\n\n    for(; j + vectorSize <= rows; j+=vectorSize)\n    {\n      Index i = 0;\n\n      if(PanelMode) ri += vectorSize*offset;\n\n      for(; i + vectorSize <= depth; i+=vectorSize)\n      {\n        PacketBlock<Packet2d,2> block;\n        if(StorageOrder == RowMajor)\n        {\n          block.packet[0] = lhs.template loadPacket<Packet2d>(j + 0, i);\n          block.packet[1] = lhs.template loadPacket<Packet2d>(j + 1, i);\n\n          ptranspose(block);\n        } else {\n          block.packet[0] = lhs.template loadPacket<Packet2d>(j, i + 0);\n          block.packet[1] = lhs.template loadPacket<Packet2d>(j, i + 1);\n        }\n\n        storeBlock<double, Packet2d, Index>(blockA + ri, block);\n\n        ri += 2*vectorSize;\n      }\n      for(; i < depth; i++)\n      {\n        if(StorageOrder == RowMajor)\n        {\n          blockA[ri+0] = lhs(j+0, i);\n          blockA[ri+1] = lhs(j+1, i);\n        } else {\n          Packet2d lhsV = lhs.template loadPacket<Packet2d>(j, i);\n          pstore<double>(blockA + ri, lhsV);\n        }\n\n        ri += vectorSize;\n      }\n\n      if(PanelMode) ri += vectorSize*(stride - offset - depth);\n    }\n\n    if (j < rows)\n    {\n      if(PanelMode) ri += offset*(rows - j);\n\n      for(Index i = 0; i < depth; i++)\n      {\n        Index k = j;\n        for(; k < rows; k++)\n        {\n          blockA[ri] = lhs(k, i);\n          ri += 1;\n        }\n      }\n    }\n  }\n};\n\n// General template for rhs packing, float64 specialization.\ntemplate<typename Index, typename DataMapper, int StorageOrder, bool PanelMode>\nstruct dhs_pack<double, Index, DataMapper, Packet2d, StorageOrder, PanelMode, false>\n{\n  EIGEN_STRONG_INLINE void operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n  {\n    const Index vectorSize = quad_traits<double>::vectorsize;\n    Index ri = 0, j = 0;\n\n    for(; j + 2*vectorSize <= cols; j+=2*vectorSize)\n    {\n      Index i = 0;\n\n      if(PanelMode) ri += offset*(2*vectorSize);\n\n      for(; i + vectorSize <= depth; i+=vectorSize)\n      {\n        PacketBlock<Packet2d,4> block;\n        if(StorageOrder == ColMajor)\n        {\n          PacketBlock<Packet2d,2> block1, block2;\n          block1.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 0);\n          block1.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 1);\n          block2.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 2);\n          block2.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 3);\n\n          ptranspose(block1);\n          ptranspose(block2);\n\n          pstore<double>(blockB + ri    , block1.packet[0]);\n          pstore<double>(blockB + ri + 2, block2.packet[0]);\n          pstore<double>(blockB + ri + 4, block1.packet[1]);\n          pstore<double>(blockB + ri + 6, block2.packet[1]);\n        } else {\n          block.packet[0] = rhs.template loadPacket<Packet2d>(i + 0, j + 0); //[a1 a2]\n          block.packet[1] = rhs.template loadPacket<Packet2d>(i + 0, j + 2); //[a3 a4]\n          block.packet[2] = rhs.template loadPacket<Packet2d>(i + 1, j + 0); //[b1 b2]\n          block.packet[3] = rhs.template loadPacket<Packet2d>(i + 1, j + 2); //[b3 b4]\n\n          storeBlock<double, Packet2d, Index>(blockB + ri, block);\n        }\n\n        ri += 4*vectorSize;\n      }\n      for(; i < depth; i++)\n      {\n        if(StorageOrder == ColMajor)\n        {\n          blockB[ri+0] = rhs(i, j+0);\n          blockB[ri+1] = rhs(i, j+1);\n\n          ri += vectorSize;\n\n          blockB[ri+0] = rhs(i, j+2);\n          blockB[ri+1] = rhs(i, j+3);\n        } else {\n          Packet2d rhsV = rhs.template loadPacket<Packet2d>(i, j);\n          pstore<double>(blockB + ri, rhsV);\n\n          ri += vectorSize;\n\n          rhsV = rhs.template loadPacket<Packet2d>(i, j + 2);\n          pstore<double>(blockB + ri, rhsV);\n        }\n        ri += vectorSize;\n      }\n\n      if(PanelMode) ri += (2*vectorSize)*(stride - offset - depth);\n    }\n\n    if (j < cols)\n    {\n      if(PanelMode) ri += offset*(cols - j);\n\n      for(Index i = 0; i < depth; i++)\n      {\n        Index k = j;\n        for(; k < cols; k++)\n        {\n          blockB[ri] = rhs(i, k);\n          ri += 1;\n        }\n      }\n    }\n  }\n};\n\n// General template for lhs complex packing, float64 specialization.\ntemplate<typename Index, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode>\nstruct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, true>\n{\n  EIGEN_STRONG_INLINE void operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n  {\n    const Index vectorSize = quad_traits<double>::vectorsize;\n    const Index vectorDelta = vectorSize * ((PanelMode) ? stride : depth);\n    Index rir = ((PanelMode) ? (vectorSize*offset) : 0), rii;\n    double* blockAt = reinterpret_cast<double *>(blockA);\n    Index j = 0;\n\n    for(; j + vectorSize <= rows; j+=vectorSize)\n    {\n      Index i = 0;\n\n      rii = rir + vectorDelta;\n\n      for(; i + vectorSize <= depth; i+=vectorSize)\n      {\n        PacketBlock<Packet,2> blockr, blocki;\n        PacketBlock<PacketC,4> cblock;\n\n        if(StorageOrder == ColMajor)\n        {\n          cblock.packet[0] = lhs.template loadPacket<PacketC>(j, i + 0); //[a1 a1i]\n          cblock.packet[1] = lhs.template loadPacket<PacketC>(j, i + 1); //[b1 b1i]\n\n          cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 1, i + 0); //[a2 a2i]\n          cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1); //[b2 b2i]\n\n          blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETREAL64); //[a1 a2]\n          blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETREAL64); //[b1 b2]\n\n          blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETIMAG64);\n          blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETIMAG64);\n        } else {\n          cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i); //[a1 a1i]\n          cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i); //[a2 a2i]\n\n          cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 0, i + 1); //[b1 b1i]\n          cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1); //[b2 b2i\n\n          blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64); //[a1 a2]\n          blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64); //[b1 b2]\n\n          blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);\n          blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);\n        }\n\n        if(Conjugate)\n        {\n          blocki.packet[0] = -blocki.packet[0];\n          blocki.packet[1] = -blocki.packet[1];\n        }\n\n        storeBlock<double, Packet, Index>(blockAt + rir, blockr);\n        storeBlock<double, Packet, Index>(blockAt + rii, blocki);\n\n        rir += 2*vectorSize;\n        rii += 2*vectorSize;\n      }\n      for(; i < depth; i++)\n      {\n        PacketBlock<Packet,1> blockr, blocki;\n        PacketBlock<PacketC,2> cblock;\n\n        cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);\n        cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);\n\n        blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);\n        blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);\n\n        if(Conjugate)\n        {\n          blocki.packet[0] = -blocki.packet[0];\n        }\n\n        pstore<double>(blockAt + rir, blockr.packet[0]);\n        pstore<double>(blockAt + rii, blocki.packet[0]);\n\n        rir += vectorSize;\n        rii += vectorSize;\n      }\n\n      rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);\n    }\n\n    if (j < rows)\n    {\n      if(PanelMode) rir += (offset*(rows - j - vectorSize));\n      rii = rir + (((PanelMode) ? stride : depth) * (rows - j));\n\n      for(Index i = 0; i < depth; i++)\n      {\n        Index k = j;\n        for(; k < rows; k++)\n        {\n          blockAt[rir] = lhs(k, i).real();\n\n          if(Conjugate)\n            blockAt[rii] = -lhs(k, i).imag();\n          else\n            blockAt[rii] =  lhs(k, i).imag();\n\n          rir += 1;\n          rii += 1;\n        }\n      }\n    }\n  }\n};\n\n// General template for rhs complex packing, float64 specialization.\ntemplate<typename Index, typename DataMapper, typename Packet, typename PacketC, int StorageOrder, bool Conjugate, bool PanelMode>\nstruct dhs_cpack<double, Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, false>\n{\n  EIGEN_STRONG_INLINE void operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n  {\n    const Index vectorSize = quad_traits<double>::vectorsize;\n    const Index vectorDelta = 2*vectorSize * ((PanelMode) ? stride : depth);\n    Index rir = ((PanelMode) ? (2*vectorSize*offset) : 0), rii;\n    double* blockBt = reinterpret_cast<double *>(blockB);\n    Index j = 0;\n\n    for(; j + 2*vectorSize <= cols; j+=2*vectorSize)\n    {\n      Index i = 0;\n\n      rii = rir + vectorDelta;\n\n      for(; i < depth; i++)\n      {\n        PacketBlock<PacketC,4> cblock;\n        PacketBlock<Packet,2> blockr, blocki;\n\n        bload<DataMapper, PacketC, Index, 2, 0, ColMajor>(cblock, rhs, i, j);\n\n        blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);\n        blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);\n\n        blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);\n        blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);\n\n        if(Conjugate)\n        {\n          blocki.packet[0] = -blocki.packet[0];\n          blocki.packet[1] = -blocki.packet[1];\n        }\n\n        storeBlock<double, Packet, Index>(blockBt + rir, blockr);\n        storeBlock<double, Packet, Index>(blockBt + rii, blocki);\n\n        rir += 2*vectorSize;\n        rii += 2*vectorSize;\n      }\n\n      rir += ((PanelMode) ? (2*vectorSize*(2*stride - depth)) : vectorDelta);\n    }\n\n    if (j < cols)\n    {\n      if(PanelMode) rir += (offset*(cols - j - 2*vectorSize));\n      rii = rir + (((PanelMode) ? stride : depth) * (cols - j));\n\n      for(Index i = 0; i < depth; i++)\n      {\n        Index k = j;\n        for(; k < cols; k++)\n        {\n          blockBt[rir] = rhs(i, k).real();\n\n          if(Conjugate)\n            blockBt[rii] = -rhs(i, k).imag();\n          else\n            blockBt[rii] =  rhs(i, k).imag();\n\n          rir += 1;\n          rii += 1;\n        }\n      }\n    }\n  }\n};\n\n/**************\n * GEMM utils *\n **************/\n\n// 512-bits rank1-update of acc. It can either positive or negative accumulate (useful for complex gemm).\ntemplate<typename Packet, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pger_common(PacketBlock<Packet,4>* acc, const Packet& lhsV, const Packet* rhsV)\n{\n  if(NegativeAccumulate)\n  {\n    acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);\n    acc->packet[1] = vec_nmsub(lhsV, rhsV[1], acc->packet[1]);\n    acc->packet[2] = vec_nmsub(lhsV, rhsV[2], acc->packet[2]);\n    acc->packet[3] = vec_nmsub(lhsV, rhsV[3], acc->packet[3]);\n  } else {\n    acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);\n    acc->packet[1] = vec_madd(lhsV, rhsV[1], acc->packet[1]);\n    acc->packet[2] = vec_madd(lhsV, rhsV[2], acc->packet[2]);\n    acc->packet[3] = vec_madd(lhsV, rhsV[3], acc->packet[3]);\n  }\n}\n\ntemplate<typename Packet, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pger_common(PacketBlock<Packet,1>* acc, const Packet& lhsV, const Packet* rhsV)\n{\n  if(NegativeAccumulate)\n  {\n    acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);\n  } else {\n    acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);\n  }\n}\n\ntemplate<int N, typename Scalar, typename Packet, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pger(PacketBlock<Packet,N>* acc, const Scalar* lhs, const Packet* rhsV)\n{\n  Packet lhsV = pload<Packet>(lhs);\n\n  pger_common<Packet, NegativeAccumulate>(acc, lhsV, rhsV);\n}\n\ntemplate<typename Scalar, typename Packet, typename Index>\nEIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs, Packet &lhsV, Index remaining_rows)\n{\n#ifdef _ARCH_PWR9\n  lhsV = vec_xl_len((Scalar *)lhs, remaining_rows * sizeof(Scalar));\n#else\n  Index i = 0;\n  do {\n    lhsV[i] = lhs[i];\n  } while (++i < remaining_rows);\n#endif\n}\n\ntemplate<int N, typename Scalar, typename Packet, typename Index, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pger(PacketBlock<Packet,N>* acc, const Scalar* lhs, const Packet* rhsV, Index remaining_rows)\n{\n  Packet lhsV;\n  loadPacketRemaining<Scalar, Packet, Index>(lhs, lhsV, remaining_rows);\n\n  pger_common<Packet, NegativeAccumulate>(acc, lhsV, rhsV);\n}\n\n// 512-bits rank1-update of complex acc. It takes decoupled accumulators as entries. It also takes cares of mixed types real * complex and complex * real.\ntemplate<int N, typename Packet, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_ALWAYS_INLINE void pgerc_common(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Packet &lhsV, const Packet &lhsVi, const Packet* rhsV, const Packet* rhsVi)\n{\n  pger_common<Packet, false>(accReal, lhsV, rhsV);\n  if(LhsIsReal)\n  {\n    pger_common<Packet, ConjugateRhs>(accImag, lhsV, rhsVi);\n    EIGEN_UNUSED_VARIABLE(lhsVi);\n  } else {\n    if (!RhsIsReal) {\n      pger_common<Packet, ConjugateLhs == ConjugateRhs>(accReal, lhsVi, rhsVi);\n      pger_common<Packet, ConjugateRhs>(accImag, lhsV, rhsVi);\n    } else {\n      EIGEN_UNUSED_VARIABLE(rhsVi);\n    }\n    pger_common<Packet, ConjugateLhs>(accImag, lhsVi, rhsV);\n  }\n}\n\ntemplate<int N, typename Scalar, typename Packet, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_ALWAYS_INLINE void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, const Packet* rhsV, const Packet* rhsVi)\n{\n  Packet lhsV = ploadLhs<Scalar, Packet>(lhs_ptr);\n  Packet lhsVi;\n  if(!LhsIsReal) lhsVi = ploadLhs<Scalar, Packet>(lhs_ptr_imag);\n  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n\n  pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);\n}\n\ntemplate<typename Scalar, typename Packet, typename Index, bool LhsIsReal>\nEIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, Packet &lhsV, Packet &lhsVi, Index remaining_rows)\n{\n#ifdef _ARCH_PWR9\n  lhsV = vec_xl_len((Scalar *)lhs_ptr, remaining_rows * sizeof(Scalar));\n  if(!LhsIsReal) lhsVi = vec_xl_len((Scalar *)lhs_ptr_imag, remaining_rows * sizeof(Scalar));\n  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n#else\n  Index i = 0;\n  do {\n    lhsV[i] = lhs_ptr[i];\n    if(!LhsIsReal) lhsVi[i] = lhs_ptr_imag[i];\n  } while (++i < remaining_rows);\n  if(LhsIsReal) EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n#endif\n}\n\ntemplate<int N, typename Scalar, typename Packet, typename Index, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_ALWAYS_INLINE void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag, const Scalar* lhs_ptr, const Scalar* lhs_ptr_imag, const Packet* rhsV, const Packet* rhsVi, Index remaining_rows)\n{\n  Packet lhsV, lhsVi;\n  loadPacketRemaining<Scalar, Packet, Index, LhsIsReal>(lhs_ptr, lhs_ptr_imag, lhsV, lhsVi, remaining_rows);\n\n  pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);\n}\n\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs)\n{\n  return ploadu<Packet>(lhs);\n}\n\n// Zero the accumulator on PacketBlock.\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE void bsetzero(PacketBlock<Packet,4>& acc)\n{\n  acc.packet[0] = pset1<Packet>((Scalar)0);\n  acc.packet[1] = pset1<Packet>((Scalar)0);\n  acc.packet[2] = pset1<Packet>((Scalar)0);\n  acc.packet[3] = pset1<Packet>((Scalar)0);\n}\n\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE void bsetzero(PacketBlock<Packet,1>& acc)\n{\n  acc.packet[0] = pset1<Packet>((Scalar)0);\n}\n\n// Scale the PacketBlock vectors by alpha.\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha)\n{\n  acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);\n  acc.packet[1] = pmadd(pAlpha, accZ.packet[1], acc.packet[1]);\n  acc.packet[2] = pmadd(pAlpha, accZ.packet[2], acc.packet[2]);\n  acc.packet[3] = pmadd(pAlpha, accZ.packet[3], acc.packet[3]);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,1>& acc, PacketBlock<Packet,1>& accZ, const Packet& pAlpha)\n{\n  acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha)\n{\n  acc.packet[0] = pmul<Packet>(accZ.packet[0], pAlpha);\n  acc.packet[1] = pmul<Packet>(accZ.packet[1], pAlpha);\n  acc.packet[2] = pmul<Packet>(accZ.packet[2], pAlpha);\n  acc.packet[3] = pmul<Packet>(accZ.packet[3], pAlpha);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock<Packet,1>& acc, PacketBlock<Packet,1>& accZ, const Packet& pAlpha)\n{\n  acc.packet[0] = pmul<Packet>(accZ.packet[0], pAlpha);\n}\n\n// Complex version of PacketBlock scaling.\ntemplate<typename Packet, int N>\nEIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag)\n{\n  bscalec_common<Packet>(cReal, aReal, bReal);\n\n  bscalec_common<Packet>(cImag, aImag, bReal);\n\n  pger_common<Packet, true>(&cReal, bImag, aImag.packet);\n\n  pger_common<Packet, false>(&cImag, bImag, aReal.packet);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void band(PacketBlock<Packet,4>& acc, const Packet& pMask)\n{\n  acc.packet[0] = pand(acc.packet[0], pMask);\n  acc.packet[1] = pand(acc.packet[1], pMask);\n  acc.packet[2] = pand(acc.packet[2], pMask);\n  acc.packet[3] = pand(acc.packet[3], pMask);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,4>& aReal, PacketBlock<Packet,4>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,4>& cReal, PacketBlock<Packet,4>& cImag, const Packet& pMask)\n{\n  band<Packet>(aReal, pMask);\n  band<Packet>(aImag, pMask);\n\n  bscalec<Packet,4>(aReal, aImag, bReal, bImag, cReal, cImag);\n}\n\n// Load a PacketBlock, the N parameters make tunning gemm easier so we can add more accumulators as needed.\ntemplate<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>\nEIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col)\n{\n  if (StorageOrder == RowMajor) {\n    acc.packet[0] = res.template loadPacket<Packet>(row + 0, col + N*accCols);\n    acc.packet[1] = res.template loadPacket<Packet>(row + 1, col + N*accCols);\n    acc.packet[2] = res.template loadPacket<Packet>(row + 2, col + N*accCols);\n    acc.packet[3] = res.template loadPacket<Packet>(row + 3, col + N*accCols);\n  } else {\n    acc.packet[0] = res.template loadPacket<Packet>(row + N*accCols, col + 0);\n    acc.packet[1] = res.template loadPacket<Packet>(row + N*accCols, col + 1);\n    acc.packet[2] = res.template loadPacket<Packet>(row + N*accCols, col + 2);\n    acc.packet[3] = res.template loadPacket<Packet>(row + N*accCols, col + 3);\n  }\n}\n\n// An overload of bload when you have a PacketBLock with 8 vectors.\ntemplate<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>\nEIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col)\n{\n  if (StorageOrder == RowMajor) {\n    acc.packet[0] = res.template loadPacket<Packet>(row + 0, col + N*accCols);\n    acc.packet[1] = res.template loadPacket<Packet>(row + 1, col + N*accCols);\n    acc.packet[2] = res.template loadPacket<Packet>(row + 2, col + N*accCols);\n    acc.packet[3] = res.template loadPacket<Packet>(row + 3, col + N*accCols);\n    acc.packet[4] = res.template loadPacket<Packet>(row + 0, col + (N+1)*accCols);\n    acc.packet[5] = res.template loadPacket<Packet>(row + 1, col + (N+1)*accCols);\n    acc.packet[6] = res.template loadPacket<Packet>(row + 2, col + (N+1)*accCols);\n    acc.packet[7] = res.template loadPacket<Packet>(row + 3, col + (N+1)*accCols);\n  } else {\n    acc.packet[0] = res.template loadPacket<Packet>(row + N*accCols, col + 0);\n    acc.packet[1] = res.template loadPacket<Packet>(row + N*accCols, col + 1);\n    acc.packet[2] = res.template loadPacket<Packet>(row + N*accCols, col + 2);\n    acc.packet[3] = res.template loadPacket<Packet>(row + N*accCols, col + 3);\n    acc.packet[4] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 0);\n    acc.packet[5] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 1);\n    acc.packet[6] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 2);\n    acc.packet[7] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 3);\n  }\n}\n\ntemplate<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>\nEIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,2>& acc, const DataMapper& res, Index row, Index col)\n{\n  acc.packet[0] = res.template loadPacket<Packet>(row + N*accCols, col + 0);\n  acc.packet[1] = res.template loadPacket<Packet>(row + (N+1)*accCols, col + 0);\n}\n\nconst static Packet4i mask41 = { -1,  0,  0,  0 };\nconst static Packet4i mask42 = { -1, -1,  0,  0 };\nconst static Packet4i mask43 = { -1, -1, -1,  0 };\n\nconst static Packet2l mask21 = { -1, 0 };\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows)\n{\n  if (remaining_rows == 0) {\n    return pset1<Packet>(float(0.0));  // Not used\n  } else {\n    switch (remaining_rows) {\n      case 1:  return Packet(mask41);\n      case 2:  return Packet(mask42);\n      default: return Packet(mask43);\n    }\n  }\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(const int remaining_rows)\n{\n  if (remaining_rows == 0) {\n    return pset1<Packet2d>(double(0.0));  // Not used\n  } else {\n    return Packet2d(mask21);\n  }\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha, const Packet& pMask)\n{\n  band<Packet>(accZ, pMask);\n\n  bscale<Packet>(acc, accZ, pAlpha);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void pbroadcast4_old(const __UNPACK_TYPE__(Packet)* a, Packet& a0, Packet& a1, Packet& a2, Packet& a3)\n{\n  pbroadcast4<Packet>(a, a0, a1, a2, a3);\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void pbroadcast4_old<Packet2d>(const double* a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)\n{\n  a1 = pload<Packet2d>(a);\n  a3 = pload<Packet2d>(a + 2);\n  a0 = vec_splat(a1, 0);\n  a1 = vec_splat(a1, 1);\n  a2 = vec_splat(a3, 0);\n  a3 = vec_splat(a3, 1);\n}\n\n// PEEL loop factor.\n#define PEEL 7\n\ntemplate<typename Scalar, typename Packet, typename Index>\nEIGEN_ALWAYS_INLINE void MICRO_EXTRA_COL(\n  const Scalar* &lhs_ptr,\n  const Scalar* &rhs_ptr,\n  PacketBlock<Packet,1> &accZero,\n  Index remaining_rows,\n  Index remaining_cols)\n{\n  Packet rhsV[1];\n  rhsV[0] = pset1<Packet>(rhs_ptr[0]);\n  pger<1,Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);\n  lhs_ptr += remaining_rows;\n  rhs_ptr += remaining_cols;\n}\n\ntemplate<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>\nEIGEN_STRONG_INLINE void gemm_extra_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index row,\n  Index col,\n  Index remaining_rows,\n  Index remaining_cols,\n  const Packet& pAlpha)\n{\n  const Scalar* rhs_ptr = rhs_base;\n  const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;\n  PacketBlock<Packet,1> accZero;\n\n  bsetzero<Scalar, Packet>(accZero);\n\n  Index remaining_depth = (depth & -accRows);\n  Index k = 0;\n  for(; k + PEEL <= remaining_depth; k+= PEEL)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr);\n    EIGEN_POWER_PREFETCH(lhs_ptr);\n    for (int l = 0; l < PEEL; l++) {\n      MICRO_EXTRA_COL<Scalar, Packet, Index>(lhs_ptr, rhs_ptr, accZero, remaining_rows, remaining_cols);\n    }\n  }\n  for(; k < remaining_depth; k++)\n  {\n    MICRO_EXTRA_COL<Scalar, Packet, Index>(lhs_ptr, rhs_ptr, accZero, remaining_rows, remaining_cols);\n  }\n  for(; k < depth; k++)\n  {\n    Packet rhsV[1];\n    rhsV[0] = pset1<Packet>(rhs_ptr[0]);\n    pger<1, Scalar, Packet, Index, false>(&accZero, lhs_ptr, rhsV, remaining_rows);\n    lhs_ptr += remaining_rows;\n    rhs_ptr += remaining_cols;\n  }\n\n  accZero.packet[0] = vec_mul(pAlpha, accZero.packet[0]);\n  for(Index i = 0; i < remaining_rows; i++) {\n    res(row + i, col) += accZero.packet[0][i];\n  }\n}\n\ntemplate<typename Scalar, typename Packet, typename Index, const Index accRows>\nEIGEN_ALWAYS_INLINE void MICRO_EXTRA_ROW(\n  const Scalar* &lhs_ptr,\n  const Scalar* &rhs_ptr,\n  PacketBlock<Packet,4> &accZero,\n  Index remaining_rows)\n{\n  Packet rhsV[4];\n  pbroadcast4<Packet>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);\n  pger<4, Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);\n  lhs_ptr += remaining_rows;\n  rhs_ptr += accRows;\n}\n\ntemplate<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_extra_row(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index row,\n  Index col,\n  Index rows,\n  Index cols,\n  Index remaining_rows,\n  const Packet& pAlpha,\n  const Packet& pMask)\n{\n  const Scalar* rhs_ptr = rhs_base;\n  const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;\n  PacketBlock<Packet,4> accZero, acc;\n\n  bsetzero<Scalar, Packet>(accZero);\n\n  Index remaining_depth = (col + accRows < cols) ? depth : (depth & -accRows);\n  Index k = 0;\n  for(; k + PEEL <= remaining_depth; k+= PEEL)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr);\n    EIGEN_POWER_PREFETCH(lhs_ptr);\n    for (int l = 0; l < PEEL; l++) {\n      MICRO_EXTRA_ROW<Scalar, Packet, Index, accRows>(lhs_ptr, rhs_ptr, accZero, remaining_rows);\n    }\n  }\n  for(; k < remaining_depth; k++)\n  {\n    MICRO_EXTRA_ROW<Scalar, Packet, Index, accRows>(lhs_ptr, rhs_ptr, accZero, remaining_rows);\n  }\n\n  if ((remaining_depth == depth) && (rows >= accCols))\n  {\n    for(Index j = 0; j < 4; j++) {\n      acc.packet[j] = res.template loadPacket<Packet>(row, col + j);\n    }\n    bscale<Packet>(acc, accZero, pAlpha, pMask);\n    res.template storePacketBlock<Packet,4>(row, col, acc);\n  } else {\n    for(; k < depth; k++)\n    {\n      Packet rhsV[4];\n      pbroadcast4<Packet>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);\n      pger<4, Scalar, Packet, Index, false>(&accZero, lhs_ptr, rhsV, remaining_rows);\n      lhs_ptr += remaining_rows;\n      rhs_ptr += accRows;\n    }\n\n    for(Index j = 0; j < 4; j++) {\n      accZero.packet[j] = vec_mul(pAlpha, accZero.packet[j]);\n    }\n    for(Index j = 0; j < 4; j++) {\n      for(Index i = 0; i < remaining_rows; i++) {\n        res(row + i, col + j) += accZero.packet[j][i];\n      }\n    }\n  }\n}\n\n#define MICRO_UNROLL(func) \\\n  func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)\n\n#define MICRO_UNROLL_WORK(func, func2, peel) \\\n    MICRO_UNROLL(func2); \\\n    func(0,peel) func(1,peel) func(2,peel) func(3,peel) \\\n    func(4,peel) func(5,peel) func(6,peel) func(7,peel)\n\n#define MICRO_LOAD_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \\\n    lhs_ptr##iter += accCols; \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhsV##iter); \\\n  }\n\n#define MICRO_WORK_ONE(iter, peel) \\\n  if (unroll_factor > iter) { \\\n    pger_common<Packet, false>(&accZero##iter, lhsV##iter, rhsV##peel); \\\n  }\n\n#define MICRO_TYPE_PEEL4(func, func2, peel) \\\n  if (PEEL > peel) { \\\n    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \\\n    pbroadcast4<Packet>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \\\n    MICRO_UNROLL_WORK(func, func2, peel) \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(rhsV##peel); \\\n  }\n\n#define MICRO_TYPE_PEEL1(func, func2, peel) \\\n  if (PEEL > peel) { \\\n    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \\\n    rhsV##peel[0] = pset1<Packet>(rhs_ptr[remaining_cols * peel]); \\\n    MICRO_UNROLL_WORK(func, func2, peel) \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(rhsV##peel); \\\n  }\n\n#define MICRO_UNROLL_TYPE_PEEL(M, func, func1, func2) \\\n  Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M], rhsV8[M], rhsV9[M]; \\\n  func(func1,func2,0); func(func1,func2,1); \\\n  func(func1,func2,2); func(func1,func2,3); \\\n  func(func1,func2,4); func(func1,func2,5); \\\n  func(func1,func2,6); func(func1,func2,7); \\\n  func(func1,func2,8); func(func1,func2,9);\n\n#define MICRO_UNROLL_TYPE_ONE(M, func, func1, func2) \\\n  Packet rhsV0[M]; \\\n  func(func1,func2,0);\n\n#define MICRO_ONE_PEEL4 \\\n  MICRO_UNROLL_TYPE_PEEL(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \\\n  rhs_ptr += (accRows * PEEL);\n\n#define MICRO_ONE4 \\\n  MICRO_UNROLL_TYPE_ONE(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \\\n  rhs_ptr += accRows;\n\n#define MICRO_ONE_PEEL1 \\\n  MICRO_UNROLL_TYPE_PEEL(1, MICRO_TYPE_PEEL1, MICRO_WORK_ONE, MICRO_LOAD_ONE); \\\n  rhs_ptr += (remaining_cols * PEEL);\n\n#define MICRO_ONE1 \\\n  MICRO_UNROLL_TYPE_ONE(1, MICRO_TYPE_PEEL1, MICRO_WORK_ONE, MICRO_LOAD_ONE); \\\n  rhs_ptr += remaining_cols;\n\n#define MICRO_DST_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    bsetzero<Scalar, Packet>(accZero##iter); \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(accZero##iter); \\\n  }\n\n#define MICRO_DST_PTR MICRO_UNROLL(MICRO_DST_PTR_ONE)\n\n#define MICRO_SRC_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols + accCols*offsetA; \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \\\n  }\n\n#define MICRO_SRC_PTR MICRO_UNROLL(MICRO_SRC_PTR_ONE)\n\n#define MICRO_PREFETCH_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    EIGEN_POWER_PREFETCH(lhs_ptr##iter); \\\n  }\n\n#define MICRO_PREFETCH MICRO_UNROLL(MICRO_PREFETCH_ONE)\n\n#define MICRO_STORE_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    acc.packet[0] = res.template loadPacket<Packet>(row + iter*accCols, col + 0); \\\n    acc.packet[1] = res.template loadPacket<Packet>(row + iter*accCols, col + 1); \\\n    acc.packet[2] = res.template loadPacket<Packet>(row + iter*accCols, col + 2); \\\n    acc.packet[3] = res.template loadPacket<Packet>(row + iter*accCols, col + 3); \\\n    bscale<Packet>(acc, accZero##iter, pAlpha); \\\n    res.template storePacketBlock<Packet,4>(row + iter*accCols, col, acc); \\\n  }\n\n#define MICRO_STORE MICRO_UNROLL(MICRO_STORE_ONE)\n\n#define MICRO_COL_STORE_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    acc.packet[0] = res.template loadPacket<Packet>(row + iter*accCols, col + 0); \\\n    bscale<Packet>(acc, accZero##iter, pAlpha); \\\n    res.template storePacketBlock<Packet,1>(row + iter*accCols, col, acc); \\\n  }\n\n#define MICRO_COL_STORE MICRO_UNROLL(MICRO_COL_STORE_ONE)\n\ntemplate<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_unrolled_iteration(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index& row,\n  Index col,\n  const Packet& pAlpha)\n{\n  const Scalar* rhs_ptr = rhs_base;\n  const Scalar* lhs_ptr0 = NULL, *  lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;\n  PacketBlock<Packet,4> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;\n  PacketBlock<Packet,4> acc;\n\n  MICRO_SRC_PTR\n  MICRO_DST_PTR\n\n  Index k = 0;\n  for(; k + PEEL <= depth; k+= PEEL)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr);\n    MICRO_PREFETCH\n    MICRO_ONE_PEEL4\n  }\n  for(; k < depth; k++)\n  {\n    MICRO_ONE4\n  }\n  MICRO_STORE\n\n  row += unroll_factor*accCols;\n}\n\ntemplate<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_unrolled_col_iteration(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index& row,\n  Index col,\n  Index remaining_cols,\n  const Packet& pAlpha)\n{\n  const Scalar* rhs_ptr = rhs_base;\n  const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, *lhs_ptr7 = NULL;\n  PacketBlock<Packet,1> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;\n  PacketBlock<Packet,1> acc;\n\n  MICRO_SRC_PTR\n  MICRO_DST_PTR\n\n  Index k = 0;\n  for(; k + PEEL <= depth; k+= PEEL)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr);\n    MICRO_PREFETCH\n    MICRO_ONE_PEEL1\n  }\n  for(; k < depth; k++)\n  {\n    MICRO_ONE1\n  }\n  MICRO_COL_STORE\n\n  row += unroll_factor*accCols;\n}\n\ntemplate<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_unrolled_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index& row,\n  Index rows,\n  Index col,\n  Index remaining_cols,\n  const Packet& pAlpha)\n{\n#define MAX_UNROLL 6\n  while(row + MAX_UNROLL*accCols <= rows) {\n    gemm_unrolled_col_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n  }\n  switch( (rows-row)/accCols ) {\n#if MAX_UNROLL > 7\n    case 7:\n      gemm_unrolled_col_iteration<7, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n      break;\n#endif\n#if MAX_UNROLL > 6\n    case 6:\n      gemm_unrolled_col_iteration<6, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n      break;\n#endif\n#if MAX_UNROLL > 5\n   case 5:\n      gemm_unrolled_col_iteration<5, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n      break;\n#endif\n#if MAX_UNROLL > 4\n   case 4:\n      gemm_unrolled_col_iteration<4, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n      break;\n#endif\n#if MAX_UNROLL > 3\n   case 3:\n     gemm_unrolled_col_iteration<3, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n     break;\n#endif\n#if MAX_UNROLL > 2\n   case 2:\n     gemm_unrolled_col_iteration<2, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n     break;\n#endif\n#if MAX_UNROLL > 1\n   case 1:\n     gemm_unrolled_col_iteration<1, Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_cols, pAlpha);\n     break;\n#endif\n   default:\n     break;\n  }\n#undef MAX_UNROLL\n}\n\n/****************\n * GEMM kernels *\n * **************/\ntemplate<typename Scalar, typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>\nEIGEN_STRONG_INLINE void gemm(const DataMapper& res, const Scalar* blockA, const Scalar* blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)\n{\n      const Index remaining_rows = rows % accCols;\n      const Index remaining_cols = cols % accRows;\n\n      if( strideA == -1 ) strideA = depth;\n      if( strideB == -1 ) strideB = depth;\n\n      const Packet pAlpha = pset1<Packet>(alpha);\n      const Packet pMask  = bmask<Packet>((const int)(remaining_rows));\n\n      Index col = 0;\n      for(; col + accRows <= cols; col += accRows)\n      {\n        const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;\n        const Scalar* lhs_base = blockA;\n        Index row = 0;\n\n#define MAX_UNROLL 6\n        while(row + MAX_UNROLL*accCols <= rows) {\n          gemm_unrolled_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n        }\n        switch( (rows-row)/accCols ) {\n#if MAX_UNROLL > 7\n          case 7:\n            gemm_unrolled_iteration<7, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_UNROLL > 6\n          case 6:\n            gemm_unrolled_iteration<6, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_UNROLL > 5\n          case 5:\n            gemm_unrolled_iteration<5, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_UNROLL > 4\n          case 4:\n            gemm_unrolled_iteration<4, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_UNROLL > 3\n          case 3:\n            gemm_unrolled_iteration<3, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_UNROLL > 2\n          case 2:\n            gemm_unrolled_iteration<2, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_UNROLL > 1\n          case 1:\n            gemm_unrolled_iteration<1, Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n          default:\n            break;\n        }\n#undef MAX_UNROLL\n\n        if(remaining_rows > 0)\n        {\n          gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);\n        }\n    }\n\n    if(remaining_cols > 0)\n    {\n      const Scalar* rhs_base = blockB + col*strideB + remaining_cols*offsetB;\n      const Scalar* lhs_base = blockA;\n\n      for(; col < cols; col++)\n      {\n        Index row = 0;\n\n        gemm_unrolled_col<Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, rows, col, remaining_cols, pAlpha);\n\n        if (remaining_rows > 0)\n        {\n          gemm_extra_col<Scalar, Packet, DataMapper, Index, accRows>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_rows, remaining_cols, pAlpha);\n        }\n        rhs_base++;\n      }\n    }\n}\n\n#define accColsC (accCols / 2)\n#define advanceRows ((LhsIsReal) ? 1 : 2)\n#define advanceCols ((RhsIsReal) ? 1 : 2)\n\n// PEEL_COMPLEX loop factor.\n#define PEEL_COMPLEX 3\n\ntemplate<typename Scalar, typename Packet, typename Index, const Index accRows, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_ALWAYS_INLINE void MICRO_COMPLEX_EXTRA_COL(\n  const Scalar* &lhs_ptr_real, const Scalar* &lhs_ptr_imag,\n  const Scalar* &rhs_ptr_real, const Scalar* &rhs_ptr_imag,\n  PacketBlock<Packet,1> &accReal, PacketBlock<Packet,1> &accImag,\n  Index remaining_rows,\n  Index remaining_cols)\n{\n  Packet rhsV[1], rhsVi[1];\n  rhsV[0] = pset1<Packet>(rhs_ptr_real[0]);\n  if(!RhsIsReal) rhsVi[0] = pset1<Packet>(rhs_ptr_imag[0]);\n  pgerc<1, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);\n  lhs_ptr_real += remaining_rows;\n  if(!LhsIsReal) lhs_ptr_imag += remaining_rows;\n  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n  rhs_ptr_real += remaining_cols;\n  if(!RhsIsReal) rhs_ptr_imag += remaining_cols;\n  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n}\n\ntemplate<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_extra_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index row,\n  Index col,\n  Index remaining_rows,\n  Index remaining_cols,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag)\n{\n  const Scalar* rhs_ptr_real = rhs_base;\n  const Scalar* rhs_ptr_imag;\n  if(!RhsIsReal) rhs_ptr_imag = rhs_base + remaining_cols*strideB;\n  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n  const Scalar* lhs_ptr_real = lhs_base + advanceRows*row*strideA + remaining_rows*offsetA;\n  const Scalar* lhs_ptr_imag;\n  if(!LhsIsReal) lhs_ptr_imag = lhs_ptr_real + remaining_rows*strideA;\n  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n  PacketBlock<Packet,1> accReal, accImag;\n  PacketBlock<Packet,1> taccReal, taccImag;\n  PacketBlock<Packetc,1> acc0, acc1;\n\n  bsetzero<Scalar, Packet>(accReal);\n  bsetzero<Scalar, Packet>(accImag);\n\n  Index remaining_depth = (depth & -accRows);\n  Index k = 0;\n  for(; k + PEEL_COMPLEX <= remaining_depth; k+= PEEL_COMPLEX)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr_real);\n    if(!RhsIsReal) {\n      EIGEN_POWER_PREFETCH(rhs_ptr_imag);\n    }\n    EIGEN_POWER_PREFETCH(lhs_ptr_real);\n    if(!LhsIsReal) {\n      EIGEN_POWER_PREFETCH(lhs_ptr_imag);\n    }\n    for (int l = 0; l < PEEL_COMPLEX; l++) {\n      MICRO_COMPLEX_EXTRA_COL<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows, remaining_cols);\n    }\n  }\n  for(; k < remaining_depth; k++)\n  {\n    MICRO_COMPLEX_EXTRA_COL<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows, remaining_cols);\n  }\n\n  for(; k < depth; k++)\n  {\n    Packet rhsV[1], rhsVi[1];\n    rhsV[0] = pset1<Packet>(rhs_ptr_real[0]);\n    if(!RhsIsReal) rhsVi[0] = pset1<Packet>(rhs_ptr_imag[0]);\n    pgerc<1, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi, remaining_rows);\n    lhs_ptr_real += remaining_rows;\n    if(!LhsIsReal) lhs_ptr_imag += remaining_rows;\n    rhs_ptr_real += remaining_cols;\n    if(!RhsIsReal) rhs_ptr_imag += remaining_cols;\n  }\n\n  bscalec<Packet,1>(accReal, accImag, pAlphaReal, pAlphaImag, taccReal, taccImag);\n  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc0, acc1);\n\n  if ((sizeof(Scalar) == sizeof(float)) && (remaining_rows == 1))\n  {\n    res(row + 0, col + 0) += pfirst<Packetc>(acc0.packet[0]);\n  } else {\n    acc0.packet[0] += res.template loadPacket<Packetc>(row + 0, col + 0);\n    res.template storePacketBlock<Packetc,1>(row + 0, col + 0, acc0);\n    if(remaining_rows > accColsC) {\n      res(row + accColsC, col + 0) += pfirst<Packetc>(acc1.packet[0]);\n    }\n  }\n}\n\ntemplate<typename Scalar, typename Packet, typename Index, const Index accRows, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_ALWAYS_INLINE void MICRO_COMPLEX_EXTRA_ROW(\n  const Scalar* &lhs_ptr_real, const Scalar* &lhs_ptr_imag,\n  const Scalar* &rhs_ptr_real, const Scalar* &rhs_ptr_imag,\n  PacketBlock<Packet,4> &accReal, PacketBlock<Packet,4> &accImag,\n  Index remaining_rows)\n{\n  Packet rhsV[4], rhsVi[4];\n  pbroadcast4_old<Packet>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);\n  if(!RhsIsReal) pbroadcast4_old<Packet>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);\n  pgerc<4, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);\n  lhs_ptr_real += remaining_rows;\n  if(!LhsIsReal) lhs_ptr_imag += remaining_rows;\n  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n  rhs_ptr_real += accRows;\n  if(!RhsIsReal) rhs_ptr_imag += accRows;\n  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n}\n\ntemplate<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_extra_row(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index row,\n  Index col,\n  Index rows,\n  Index cols,\n  Index remaining_rows,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag,\n  const Packet& pMask)\n{\n  const Scalar* rhs_ptr_real = rhs_base;\n  const Scalar* rhs_ptr_imag;\n  if(!RhsIsReal) rhs_ptr_imag = rhs_base + accRows*strideB;\n  else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n  const Scalar* lhs_ptr_real = lhs_base + advanceRows*row*strideA + remaining_rows*offsetA;\n  const Scalar* lhs_ptr_imag;\n  if(!LhsIsReal) lhs_ptr_imag = lhs_ptr_real + remaining_rows*strideA;\n  else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);\n  PacketBlock<Packet,4> accReal, accImag;\n  PacketBlock<Packet,4> taccReal, taccImag;\n  PacketBlock<Packetc,4> acc0, acc1;\n  PacketBlock<Packetc,8> tRes;\n\n  bsetzero<Scalar, Packet>(accReal);\n  bsetzero<Scalar, Packet>(accImag);\n\n  Index remaining_depth = (col + accRows < cols) ? depth : (depth & -accRows);\n  Index k = 0;\n  for(; k + PEEL_COMPLEX <= remaining_depth; k+= PEEL_COMPLEX)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr_real);\n    if(!RhsIsReal) {\n      EIGEN_POWER_PREFETCH(rhs_ptr_imag);\n    }\n    EIGEN_POWER_PREFETCH(lhs_ptr_real);\n    if(!LhsIsReal) {\n      EIGEN_POWER_PREFETCH(lhs_ptr_imag);\n    }\n    for (int l = 0; l < PEEL_COMPLEX; l++) {\n      MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows);\n    }\n  }\n  for(; k < remaining_depth; k++)\n  {\n    MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal, accImag, remaining_rows);\n  }\n\n  if ((remaining_depth == depth) && (rows >= accCols))\n  {\n    bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row, col);\n    bscalec<Packet>(accReal, accImag, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask);\n    bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1);\n    res.template storePacketBlock<Packetc,4>(row + 0, col, acc0);\n    res.template storePacketBlock<Packetc,4>(row + accColsC, col, acc1);\n  } else {\n    for(; k < depth; k++)\n    {\n      Packet rhsV[4], rhsVi[4];\n      pbroadcast4_old<Packet>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);\n      if(!RhsIsReal) pbroadcast4_old<Packet>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);\n      pgerc<4, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi, remaining_rows);\n      lhs_ptr_real += remaining_rows;\n      if(!LhsIsReal) lhs_ptr_imag += remaining_rows;\n      rhs_ptr_real += accRows;\n      if(!RhsIsReal) rhs_ptr_imag += accRows;\n    }\n\n    bscalec<Packet,4>(accReal, accImag, pAlphaReal, pAlphaImag, taccReal, taccImag);\n    bcouple_common<Packet, Packetc>(taccReal, taccImag, acc0, acc1);\n\n    if ((sizeof(Scalar) == sizeof(float)) && (remaining_rows == 1))\n    {\n      for(Index j = 0; j < 4; j++) {\n        res(row + 0, col + j) += pfirst<Packetc>(acc0.packet[j]);\n      }\n    } else {\n      for(Index j = 0; j < 4; j++) {\n        PacketBlock<Packetc,1> acc2;\n        acc2.packet[0] = res.template loadPacket<Packetc>(row + 0, col + j) + acc0.packet[j];\n        res.template storePacketBlock<Packetc,1>(row + 0, col + j, acc2);\n        if(remaining_rows > accColsC) {\n          res(row + accColsC, col + j) += pfirst<Packetc>(acc1.packet[j]);\n        }\n      }\n    }\n  }\n}\n\n#define MICRO_COMPLEX_UNROLL(func) \\\n  func(0) func(1) func(2) func(3) func(4)\n\n#define MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \\\n    MICRO_COMPLEX_UNROLL(func2); \\\n    func(0,peel) func(1,peel) func(2,peel) func(3,peel) func(4,peel)\n\n#define MICRO_COMPLEX_LOAD_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \\\n    lhs_ptr_real##iter += accCols; \\\n    if(!LhsIsReal) { \\\n      lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_imag##iter); \\\n      lhs_ptr_imag##iter += accCols; \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(lhsVi##iter); \\\n    } \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhsV##iter); \\\n    EIGEN_UNUSED_VARIABLE(lhsVi##iter); \\\n  }\n\n#define MICRO_COMPLEX_WORK_ONE4(iter, peel) \\\n  if (unroll_factor > iter) { \\\n    pgerc_common<4, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \\\n  }\n\n#define MICRO_COMPLEX_WORK_ONE1(iter, peel) \\\n  if (unroll_factor > iter) { \\\n    pgerc_common<1, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \\\n  }\n\n#define MICRO_COMPLEX_TYPE_PEEL4(func, func2, peel) \\\n  if (PEEL_COMPLEX > peel) { \\\n    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \\\n    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \\\n    pbroadcast4_old<Packet>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \\\n    if(!RhsIsReal) { \\\n      pbroadcast4_old<Packet>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(rhsVi##peel); \\\n    } \\\n    MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(rhsV##peel); \\\n    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \\\n  }\n\n#define MICRO_COMPLEX_TYPE_PEEL1(func, func2, peel) \\\n  if (PEEL_COMPLEX > peel) { \\\n    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \\\n    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \\\n    rhsV##peel[0] = pset1<Packet>(rhs_ptr_real[remaining_cols * peel]); \\\n    if(!RhsIsReal) { \\\n      rhsVi##peel[0] = pset1<Packet>(rhs_ptr_imag[remaining_cols * peel]); \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(rhsVi##peel); \\\n    } \\\n    MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(rhsV##peel); \\\n    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \\\n  }\n\n#define MICRO_COMPLEX_UNROLL_TYPE_PEEL(M, func, func1, func2) \\\n  Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M], rhsV8[M], rhsV9[M]; \\\n  Packet rhsVi0[M], rhsVi1[M], rhsVi2[M], rhsVi3[M], rhsVi4[M], rhsVi5[M], rhsVi6[M], rhsVi7[M], rhsVi8[M], rhsVi9[M]; \\\n  func(func1,func2,0); func(func1,func2,1); \\\n  func(func1,func2,2); func(func1,func2,3); \\\n  func(func1,func2,4); func(func1,func2,5); \\\n  func(func1,func2,6); func(func1,func2,7); \\\n  func(func1,func2,8); func(func1,func2,9);\n\n#define MICRO_COMPLEX_UNROLL_TYPE_ONE(M, func, func1, func2) \\\n  Packet rhsV0[M], rhsVi0[M];\\\n  func(func1,func2,0);\n\n#define MICRO_COMPLEX_ONE_PEEL4 \\\n  MICRO_COMPLEX_UNROLL_TYPE_PEEL(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \\\n  rhs_ptr_real += (accRows * PEEL_COMPLEX); \\\n  if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX);\n\n#define MICRO_COMPLEX_ONE4 \\\n  MICRO_COMPLEX_UNROLL_TYPE_ONE(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \\\n  rhs_ptr_real += accRows; \\\n  if(!RhsIsReal) rhs_ptr_imag += accRows;\n\n#define MICRO_COMPLEX_ONE_PEEL1 \\\n  MICRO_COMPLEX_UNROLL_TYPE_PEEL(1, MICRO_COMPLEX_TYPE_PEEL1, MICRO_COMPLEX_WORK_ONE1, MICRO_COMPLEX_LOAD_ONE); \\\n  rhs_ptr_real += (remaining_cols * PEEL_COMPLEX); \\\n  if(!RhsIsReal) rhs_ptr_imag += (remaining_cols * PEEL_COMPLEX);\n\n#define MICRO_COMPLEX_ONE1 \\\n  MICRO_COMPLEX_UNROLL_TYPE_ONE(1, MICRO_COMPLEX_TYPE_PEEL1, MICRO_COMPLEX_WORK_ONE1, MICRO_COMPLEX_LOAD_ONE); \\\n  rhs_ptr_real += remaining_cols; \\\n  if(!RhsIsReal) rhs_ptr_imag += remaining_cols;\n\n#define MICRO_COMPLEX_DST_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    bsetzero<Scalar, Packet>(accReal##iter); \\\n    bsetzero<Scalar, Packet>(accImag##iter); \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(accReal##iter); \\\n    EIGEN_UNUSED_VARIABLE(accImag##iter); \\\n  }\n\n#define MICRO_COMPLEX_DST_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_DST_PTR_ONE)\n\n#define MICRO_COMPLEX_SRC_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols + accCols*offsetA; \\\n    if(!LhsIsReal) { \\\n      lhs_ptr_imag##iter = lhs_ptr_real##iter + accCols*strideA; \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \\\n    } \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \\\n    EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \\\n  }\n\n#define MICRO_COMPLEX_SRC_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_SRC_PTR_ONE)\n\n#define MICRO_COMPLEX_PREFETCH_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \\\n    if(!LhsIsReal) { \\\n      EIGEN_POWER_PREFETCH(lhs_ptr_imag##iter); \\\n    } \\\n  }\n\n#define MICRO_COMPLEX_PREFETCH MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_PREFETCH_ONE)\n\n#define MICRO_COMPLEX_STORE_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row + iter*accCols, col); \\\n    bscalec<Packet,4>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \\\n    bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1); \\\n    res.template storePacketBlock<Packetc,4>(row + iter*accCols + 0, col, acc0); \\\n    res.template storePacketBlock<Packetc,4>(row + iter*accCols + accColsC, col, acc1); \\\n  }\n\n#define MICRO_COMPLEX_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_STORE_ONE)\n\n#define MICRO_COMPLEX_COL_STORE_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row + iter*accCols, col); \\\n    bscalec<Packet,1>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \\\n    bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1); \\\n    res.template storePacketBlock<Packetc,1>(row + iter*accCols + 0, col, acc0); \\\n    res.template storePacketBlock<Packetc,1>(row + iter*accCols + accColsC, col, acc1); \\\n  }\n\n#define MICRO_COMPLEX_COL_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_COL_STORE_ONE)\n\ntemplate<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_unrolled_iteration(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index& row,\n  Index col,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag)\n{\n  const Scalar* rhs_ptr_real = rhs_base;\n  const Scalar* rhs_ptr_imag;\n  if(!RhsIsReal) {\n    rhs_ptr_imag = rhs_base + accRows*strideB;\n  } else {\n    EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n  }\n  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_imag0 = NULL, * lhs_ptr_real1 = NULL, * lhs_ptr_imag1 = NULL;\n  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_imag2 = NULL, * lhs_ptr_real3 = NULL, * lhs_ptr_imag3 = NULL;\n  const Scalar* lhs_ptr_real4 = NULL, * lhs_ptr_imag4 = NULL;\n  PacketBlock<Packet,4> accReal0, accImag0, accReal1, accImag1;\n  PacketBlock<Packet,4> accReal2, accImag2, accReal3, accImag3;\n  PacketBlock<Packet,4> accReal4, accImag4;\n  PacketBlock<Packet,4> taccReal, taccImag;\n  PacketBlock<Packetc,4> acc0, acc1;\n  PacketBlock<Packetc,8> tRes;\n\n  MICRO_COMPLEX_SRC_PTR\n  MICRO_COMPLEX_DST_PTR\n\n  Index k = 0;\n  for(; k + PEEL_COMPLEX <= depth; k+= PEEL_COMPLEX)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr_real);\n    if(!RhsIsReal) {\n      EIGEN_POWER_PREFETCH(rhs_ptr_imag);\n    }\n    MICRO_COMPLEX_PREFETCH\n    MICRO_COMPLEX_ONE_PEEL4\n  }\n  for(; k < depth; k++)\n  {\n    MICRO_COMPLEX_ONE4\n  }\n  MICRO_COMPLEX_STORE\n\n  row += unroll_factor*accCols;\n}\n\ntemplate<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_unrolled_col_iteration(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index& row,\n  Index col,\n  Index remaining_cols,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag)\n{\n  const Scalar* rhs_ptr_real = rhs_base;\n  const Scalar* rhs_ptr_imag;\n  if(!RhsIsReal) {\n    rhs_ptr_imag = rhs_base + remaining_cols*strideB;\n  } else {\n    EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n  }\n  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_imag0 = NULL, * lhs_ptr_real1 = NULL, * lhs_ptr_imag1 = NULL;\n  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_imag2 = NULL, * lhs_ptr_real3 = NULL, * lhs_ptr_imag3 = NULL;\n  const Scalar* lhs_ptr_real4 = NULL, * lhs_ptr_imag4 = NULL;\n  PacketBlock<Packet,1> accReal0, accImag0, accReal1, accImag1;\n  PacketBlock<Packet,1> accReal2, accImag2, accReal3, accImag3;\n  PacketBlock<Packet,1> accReal4, accImag4;\n  PacketBlock<Packet,1> taccReal, taccImag;\n  PacketBlock<Packetc,1> acc0, acc1;\n  PacketBlock<Packetc,2> tRes;\n\n  MICRO_COMPLEX_SRC_PTR\n  MICRO_COMPLEX_DST_PTR\n\n  Index k = 0;\n  for(; k + PEEL_COMPLEX <= depth; k+= PEEL_COMPLEX)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr_real);\n    if(!RhsIsReal) {\n      EIGEN_POWER_PREFETCH(rhs_ptr_imag);\n    }\n    MICRO_COMPLEX_PREFETCH\n    MICRO_COMPLEX_ONE_PEEL1\n  }\n  for(; k < depth; k++)\n  {\n    MICRO_COMPLEX_ONE1\n  }\n  MICRO_COMPLEX_COL_STORE\n\n  row += unroll_factor*accCols;\n}\n\ntemplate<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_unrolled_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index& row,\n  Index rows,\n  Index col,\n  Index remaining_cols,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag)\n{\n#define MAX_COMPLEX_UNROLL 3\n  while(row + MAX_COMPLEX_UNROLL*accCols <= rows) {\n    gemm_complex_unrolled_col_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);\n  }\n  switch( (rows-row)/accCols ) {\n#if MAX_COMPLEX_UNROLL > 4\n   case 4:\n     gemm_complex_unrolled_col_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);\n     break;\n#endif\n#if MAX_COMPLEX_UNROLL > 3\n   case 3:\n     gemm_complex_unrolled_col_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);\n     break;\n#endif\n#if MAX_COMPLEX_UNROLL > 2\n   case 2:\n     gemm_complex_unrolled_col_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);\n     break;\n#endif\n#if MAX_COMPLEX_UNROLL > 1\n   case 1:\n     gemm_complex_unrolled_col_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_cols, pAlphaReal, pAlphaImag);\n     break;\n#endif\n   default:\n     break;\n  }\n#undef MAX_COMPLEX_UNROLL\n}\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Index, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex(const DataMapper& res, const LhsScalar* blockAc, const RhsScalar* blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)\n{\n      const Index remaining_rows = rows % accCols;\n      const Index remaining_cols = cols % accRows;\n\n      if( strideA == -1 ) strideA = depth;\n      if( strideB == -1 ) strideB = depth;\n\n      const Packet pAlphaReal = pset1<Packet>(alpha.real());\n      const Packet pAlphaImag = pset1<Packet>(alpha.imag());\n      const Packet pMask = bmask<Packet>((const int)(remaining_rows));\n\n      const Scalar* blockA = (Scalar *) blockAc;\n      const Scalar* blockB = (Scalar *) blockBc;\n\n      Index col = 0;\n      for(; col + accRows <= cols; col += accRows)\n      {\n        const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;\n        const Scalar* lhs_base = blockA;\n        Index row = 0;\n\n#define MAX_COMPLEX_UNROLL 3\n        while(row + MAX_COMPLEX_UNROLL*accCols <= rows) {\n          gemm_complex_unrolled_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n        }\n        switch( (rows-row)/accCols ) {\n#if MAX_COMPLEX_UNROLL > 4\n          case 4:\n            gemm_complex_unrolled_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n#if MAX_COMPLEX_UNROLL > 3\n          case 3:\n            gemm_complex_unrolled_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n#if MAX_COMPLEX_UNROLL > 2\n          case 2:\n            gemm_complex_unrolled_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n#if MAX_COMPLEX_UNROLL > 1\n          case 1:\n            gemm_complex_unrolled_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n          default:\n            break;\n        }\n#undef MAX_COMPLEX_UNROLL\n\n        if(remaining_rows > 0)\n        {\n          gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);\n        }\n      }\n\n      if(remaining_cols > 0)\n      {\n        const Scalar* rhs_base = blockB + advanceCols*col*strideB + remaining_cols*offsetB;\n        const Scalar* lhs_base = blockA;\n\n        for(; col < cols; col++)\n        {\n          Index row = 0;\n\n          gemm_complex_unrolled_col<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, rows, col, remaining_cols, pAlphaReal, pAlphaImag);\n\n          if (remaining_rows > 0)\n          {\n            gemm_complex_extra_col<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_rows, remaining_cols, pAlphaReal, pAlphaImag);\n          }\n          rhs_base++;\n        }\n      }\n}\n\n#undef accColsC\n#undef advanceCols\n#undef advanceRows\n\n/************************************\n * ppc64le template specializations *\n * **********************************/\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n  ::operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n    dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, true> pack;\n    pack(blockA, lhs, depth, rows, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n  ::operator()(double* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n    dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, true> pack;\n    pack(blockA, lhs, depth, rows, stride, offset);\n}\n\n#if EIGEN_ALTIVEC_USE_CUSTOM_PACK\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<double, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<double, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n  ::operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<double, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<double, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n  ::operator()(double* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n#endif\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n  ::operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, true> pack;\n  pack(blockA, lhs, depth, rows, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n  ::operator()(float* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, true> pack;\n  pack(blockA, lhs, depth, rows, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<std::complex<float>, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<float>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<std::complex<float>, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<float>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, true> pack;\n  pack(blockA, lhs, depth, rows, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<std::complex<float>, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<float>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<std::complex<float>, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<float>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, true> pack;\n  pack(blockA, lhs, depth, rows, stride, offset);\n}\n\n#if EIGEN_ALTIVEC_USE_CUSTOM_PACK\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n  ::operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n  ::operator()(float* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n#endif\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<std::complex<float>, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<float>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<std::complex<float>, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<float>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<std::complex<float>, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<float>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<std::complex<float>, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<float>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<std::complex<double>, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<std::complex<double>, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, true> pack;\n  pack(blockA, lhs, depth, rows, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<std::complex<double>, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nvoid gemm_pack_lhs<std::complex<double>, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<double>* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, true> pack;\n  pack(blockA, lhs, depth, rows, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<std::complex<double>, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<std::complex<double>, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<std::complex<double>, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n{\n  void operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nvoid gemm_pack_rhs<std::complex<double>, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n  ::operator()(std::complex<double>* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, false> pack;\n  pack(blockB, rhs, depth, cols, stride, offset);\n}\n\n// ********* gebp specializations *********\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef typename quad_traits<float>::vectortype   Packet;\n  typedef typename quad_traits<float>::rhstype      RhsPacket;\n\n  void operator()(const DataMapper& res, const float* blockA, const float* blockB,\n                  Index rows, Index depth, Index cols, float alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const float* blockA, const float* blockB,\n               Index rows, Index depth, Index cols, float alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<float>::rows;\n    const Index accCols = quad_traits<float>::size;\n    void (*gemm_function)(const DataMapper&, const float*, const float*, Index, Index, Index, float, Index, Index, Index, Index);\n\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n      //generate with MMA only\n      gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n    #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n      if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n        gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n      }\n      else{\n        gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n      }\n    #else\n      gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n    #endif\n      gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<std::complex<float>, std::complex<float>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef Packet4f   Packet;\n  typedef Packet2cf  Packetc;\n  typedef Packet4f   RhsPacket;\n\n  void operator()(const DataMapper& res, const std::complex<float>* blockA, const std::complex<float>* blockB,\n                  Index rows, Index depth, Index cols, std::complex<float> alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<std::complex<float>, std::complex<float>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const std::complex<float>* blockA, const std::complex<float>* blockB,\n               Index rows, Index depth, Index cols, std::complex<float> alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<float>::rows;\n    const Index accCols = quad_traits<float>::size;\n    void (*gemm_function)(const DataMapper&, const std::complex<float>*, const std::complex<float>*,\n          Index, Index, Index, std::complex<float>, Index, Index, Index, Index);\n\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n       //generate with MMA only\n       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n       if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n       }\n       else{\n         gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n       }\n     #else\n       gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n     #endif\n      gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<float, std::complex<float>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef Packet4f   Packet;\n  typedef Packet2cf  Packetc;\n  typedef Packet4f   RhsPacket;\n\n  void operator()(const DataMapper& res, const float* blockA, const std::complex<float>* blockB,\n                  Index rows, Index depth, Index cols, std::complex<float> alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<float, std::complex<float>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const float* blockA, const std::complex<float>* blockB,\n               Index rows, Index depth, Index cols, std::complex<float> alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<float>::rows;\n    const Index accCols = quad_traits<float>::size;\n    void (*gemm_function)(const DataMapper&, const float*, const std::complex<float>*,\n          Index, Index, Index, std::complex<float>, Index, Index, Index, Index);\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n       //generate with MMA only\n       gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n       if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n         gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n       }\n       else{\n         gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n       }\n     #else\n       gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n     #endif\n       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<std::complex<float>, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef Packet4f   Packet;\n  typedef Packet2cf  Packetc;\n  typedef Packet4f   RhsPacket;\n\n  void operator()(const DataMapper& res, const std::complex<float>* blockA, const float* blockB,\n                  Index rows, Index depth, Index cols, std::complex<float> alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<std::complex<float>, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const std::complex<float>* blockA, const float* blockB,\n               Index rows, Index depth, Index cols, std::complex<float> alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<float>::rows;\n    const Index accCols = quad_traits<float>::size;\n    void (*gemm_function)(const DataMapper&, const std::complex<float>*, const float*,\n          Index, Index, Index, std::complex<float>, Index, Index, Index, Index);\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n       //generate with MMA only\n       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n       if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n       }\n       else{\n         gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n       }\n     #else\n       gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n     #endif\n       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<double, double, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef typename quad_traits<double>::vectortype  Packet;\n  typedef typename quad_traits<double>::rhstype     RhsPacket;\n\n  void operator()(const DataMapper& res, const double* blockA, const double* blockB,\n                  Index rows, Index depth, Index cols, double alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<double, double, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const double* blockA, const double* blockB,\n               Index rows, Index depth, Index cols, double alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<double>::rows;\n    const Index accCols = quad_traits<double>::size;\n    void (*gemm_function)(const DataMapper&, const double*, const double*, Index, Index, Index, double, Index, Index, Index, Index);\n\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n      //generate with MMA only\n      gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n    #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n      if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n        gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n      }\n      else{\n        gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n      }\n    #else\n      gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;\n    #endif\n      gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<std::complex<double>, std::complex<double>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef quad_traits<double>::vectortype   Packet;\n  typedef Packet1cd  Packetc;\n  typedef quad_traits<double>::rhstype   RhsPacket;\n\n  void operator()(const DataMapper& res, const std::complex<double>* blockA, const std::complex<double>* blockB,\n                  Index rows, Index depth, Index cols, std::complex<double> alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<std::complex<double>, std::complex<double>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const std::complex<double>* blockA, const std::complex<double>* blockB,\n               Index rows, Index depth, Index cols, std::complex<double> alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<double>::rows;\n    const Index accCols = quad_traits<double>::size;\n    void (*gemm_function)(const DataMapper&, const std::complex<double>*, const std::complex<double>*,\n          Index, Index, Index, std::complex<double>, Index, Index, Index, Index);\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n       //generate with MMA only\n       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n       if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n       }\n       else{\n         gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n       }\n     #else\n       gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>;\n     #endif\n       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<std::complex<double>, double, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef quad_traits<double>::vectortype   Packet;\n  typedef Packet1cd  Packetc;\n  typedef quad_traits<double>::rhstype   RhsPacket;\n\n  void operator()(const DataMapper& res, const std::complex<double>* blockA, const double* blockB,\n                  Index rows, Index depth, Index cols, std::complex<double> alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<std::complex<double>, double, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const std::complex<double>* blockA, const double* blockB,\n               Index rows, Index depth, Index cols, std::complex<double> alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<double>::rows;\n    const Index accCols = quad_traits<double>::size;\n    void (*gemm_function)(const DataMapper&, const std::complex<double>*, const double*,\n          Index, Index, Index, std::complex<double>, Index, Index, Index, Index);\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n       //generate with MMA only\n       gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n       if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n         gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n       }\n       else{\n         gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n       }\n     #else\n       gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>;\n     #endif\n       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel<double, std::complex<double>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n{\n  typedef quad_traits<double>::vectortype   Packet;\n  typedef Packet1cd  Packetc;\n  typedef quad_traits<double>::rhstype   RhsPacket;\n\n  void operator()(const DataMapper& res, const double* blockA, const std::complex<double>* blockB,\n                  Index rows, Index depth, Index cols, std::complex<double> alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nvoid gebp_kernel<double, std::complex<double>, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>\n  ::operator()(const DataMapper& res, const double* blockA, const std::complex<double>* blockB,\n               Index rows, Index depth, Index cols, std::complex<double> alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    const Index accRows = quad_traits<double>::rows;\n    const Index accCols = quad_traits<double>::size;\n    void (*gemm_function)(const DataMapper&, const double*, const std::complex<double>*,\n          Index, Index, Index, std::complex<double>, Index, Index, Index, Index);\n    #ifdef EIGEN_ALTIVEC_MMA_ONLY\n       //generate with MMA only\n       gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n     #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)\n       if (__builtin_cpu_supports (\"arch_3_1\") && __builtin_cpu_supports (\"mma\")){\n         gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n       }\n       else{\n         gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n       }\n     #else\n       gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>;\n     #endif\n       gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);\n  }\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATRIX_PRODUCT_ALTIVEC_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h",
    "content": "//#define EIGEN_POWER_USE_PREFETCH  // Use prefetching in gemm routines\n#ifdef EIGEN_POWER_USE_PREFETCH\n#define EIGEN_POWER_PREFETCH(p)  prefetch(p)\n#else\n#define EIGEN_POWER_PREFETCH(p)\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>\nEIGEN_STRONG_INLINE void gemm_extra_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index row,\n  Index col,\n  Index remaining_rows,\n  Index remaining_cols,\n  const Packet& pAlpha);\n\ntemplate<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_extra_row(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index row,\n  Index col,\n  Index rows,\n  Index cols,\n  Index remaining_rows,\n  const Packet& pAlpha,\n  const Packet& pMask);\n\ntemplate<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_unrolled_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index& row,\n  Index rows,\n  Index col,\n  Index remaining_cols,\n  const Packet& pAlpha);\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows);\n\ntemplate<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_extra_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index row,\n  Index col,\n  Index remaining_rows,\n  Index remaining_cols,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag);\n\ntemplate<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_extra_row(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index row,\n  Index col,\n  Index rows,\n  Index cols,\n  Index remaining_rows,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag,\n  const Packet& pMask);\n\ntemplate<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_unrolled_col(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index& row,\n  Index rows,\n  Index col,\n  Index remaining_cols,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag);\n\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs);\n\ntemplate<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>\nEIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col);\n\ntemplate<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>\nEIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col);\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha);\n\ntemplate<typename Packet, int N>\nEIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag);\n\nconst static Packet16uc p16uc_SETCOMPLEX32_FIRST = {  0,  1,  2,  3,\n                                                     16, 17, 18, 19,\n                                                      4,  5,  6,  7,\n                                                     20, 21, 22, 23};\n\nconst static Packet16uc p16uc_SETCOMPLEX32_SECOND = {  8,  9, 10, 11,\n                                                      24, 25, 26, 27,\n                                                      12, 13, 14, 15,\n                                                      28, 29, 30, 31};\n//[a,b],[ai,bi] = [a,ai] - This is equivalent to p16uc_GETREAL64\nconst static Packet16uc p16uc_SETCOMPLEX64_FIRST = {  0,  1,  2,  3,  4,  5,  6,  7,\n                                                     16, 17, 18, 19, 20, 21, 22, 23};\n\n//[a,b],[ai,bi] = [b,bi] - This is equivalent to p16uc_GETIMAG64\nconst static Packet16uc p16uc_SETCOMPLEX64_SECOND = {  8,  9, 10, 11, 12, 13, 14, 15,\n                                                      24, 25, 26, 27, 28, 29, 30, 31};\n\n\n// Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.\ntemplate<typename Packet, typename Packetc>\nEIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)\n{\n  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);\n  acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_FIRST);\n  acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_FIRST);\n  acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_FIRST);\n\n  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);\n  acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_SECOND);\n  acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_SECOND);\n  acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_SECOND);\n}\n\ntemplate<typename Packet, typename Packetc>\nEIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,4>& taccReal, PacketBlock<Packet,4>& taccImag, PacketBlock<Packetc,8>& tRes, PacketBlock<Packetc, 4>& acc1, PacketBlock<Packetc, 4>& acc2)\n{\n  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);\n\n  acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);\n  acc1.packet[1] = padd<Packetc>(tRes.packet[1], acc1.packet[1]);\n  acc1.packet[2] = padd<Packetc>(tRes.packet[2], acc1.packet[2]);\n  acc1.packet[3] = padd<Packetc>(tRes.packet[3], acc1.packet[3]);\n\n  acc2.packet[0] = padd<Packetc>(tRes.packet[4], acc2.packet[0]);\n  acc2.packet[1] = padd<Packetc>(tRes.packet[5], acc2.packet[1]);\n  acc2.packet[2] = padd<Packetc>(tRes.packet[6], acc2.packet[2]);\n  acc2.packet[3] = padd<Packetc>(tRes.packet[7], acc2.packet[3]);\n}\n\ntemplate<typename Packet, typename Packetc>\nEIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)\n{\n  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);\n\n  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);\n}\n\ntemplate<typename Packet, typename Packetc>\nEIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,1>& taccReal, PacketBlock<Packet,1>& taccImag, PacketBlock<Packetc,2>& tRes, PacketBlock<Packetc, 1>& acc1, PacketBlock<Packetc, 1>& acc2)\n{\n  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);\n\n  acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);\n\n  acc2.packet[0] = padd<Packetc>(tRes.packet[1], acc2.packet[0]);\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,4>& taccReal, PacketBlock<Packet2d,4>& taccImag, PacketBlock<Packet1cd, 4>& acc1, PacketBlock<Packet1cd, 4>& acc2)\n{\n  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);\n  acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_FIRST);\n  acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_FIRST);\n  acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_FIRST);\n\n  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);\n  acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_SECOND);\n  acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_SECOND);\n  acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_SECOND);\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void bcouple_common<Packet2d, Packet1cd>(PacketBlock<Packet2d,1>& taccReal, PacketBlock<Packet2d,1>& taccImag, PacketBlock<Packet1cd, 1>& acc1, PacketBlock<Packet1cd, 1>& acc2)\n{\n  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);\n\n  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);\n}\n\n// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE Packet ploadRhs(const Scalar* rhs)\n{\n  return ploadu<Packet>(rhs);\n}\n\n} // end namespace internal\n} // end namespace Eigen\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2020 Everton Constantino (everton.constantino@ibm.com)\n// Copyright (C) 2021 Chip Kerchner (chip.kerchner@ibm.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H\n#define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H\n\n#pragma GCC target(\"cpu=power10\")\n\n#ifdef __has_builtin\n#if !__has_builtin(__builtin_vsx_assemble_pair)\n#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair\n#endif\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE void bsetzeroMMA(__vector_quad* acc)\n{\n  __builtin_mma_xxsetaccz(acc);\n}\n\ntemplate<typename DataMapper, typename Index, typename Packet, const Index accCols>\nEIGEN_ALWAYS_INLINE void storeAccumulator(Index i, Index j, const DataMapper& data, const Packet& alpha, __vector_quad* acc)\n{\n  PacketBlock<Packet, 4> result;\n  __builtin_mma_disassemble_acc(&result.packet, acc);\n\n  PacketBlock<Packet, 4> tRes;\n  bload<DataMapper, Packet, Index, accCols, 0, ColMajor>(tRes, data, i, j);\n\n  bscale<Packet>(tRes, result, alpha);\n\n  data.template storePacketBlock<Packet, 4>(i, j, tRes);\n}\n\ntemplate<typename DataMapper, typename Index, typename Packet, typename Packetc, const Index accColsC, int N>\nEIGEN_ALWAYS_INLINE void storeComplexAccumulator(Index i, Index j, const DataMapper& data, const Packet& alphaReal, const Packet& alphaImag, __vector_quad* accReal, __vector_quad* accImag)\n{\n  PacketBlock<Packet, 4> resultReal, resultImag;\n  __builtin_mma_disassemble_acc(&resultReal.packet, accReal);\n  __builtin_mma_disassemble_acc(&resultImag.packet, accImag);\n\n  PacketBlock<Packetc, 8> tRes;\n  bload<DataMapper, Packetc, Index, accColsC, N, ColMajor>(tRes, data, i, j);\n\n  PacketBlock<Packet,4> taccReal, taccImag;\n  bscalec<Packet,4>(resultReal, resultImag, alphaReal, alphaImag, taccReal, taccImag);\n\n  PacketBlock<Packetc, 4> acc1, acc2;\n  bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc1, acc2);\n\n  data.template storePacketBlock<Packetc, 4>(i + N*accColsC, j, acc1);\n  data.template storePacketBlock<Packetc, 4>(i + (N+1)*accColsC, j, acc2);\n}\n\n// Defaults to float32, since Eigen still supports C++03 we can't use default template arguments\ntemplate<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const RhsPacket& a, const LhsPacket& b)\n{\n  if(NegativeAccumulate)\n  {\n    __builtin_mma_xvf32gernp(acc, (__vector unsigned char)a, (__vector unsigned char)b);\n  } else {\n    __builtin_mma_xvf32gerpp(acc, (__vector unsigned char)a, (__vector unsigned char)b);\n  }\n}\n\ntemplate<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const PacketBlock<Packet2d,2>& a, const Packet2d& b)\n{\n  __vector_pair* a0 = (__vector_pair *)(&a.packet[0]);\n  if(NegativeAccumulate)\n  {\n    __builtin_mma_xvf64gernp(acc, *a0, (__vector unsigned char)b);\n  } else {\n    __builtin_mma_xvf64gerpp(acc, *a0, (__vector unsigned char)b);\n  }\n}\n\ntemplate<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const __vector_pair& a, const Packet2d& b)\n{\n  if(NegativeAccumulate)\n  {\n    __builtin_mma_xvf64gernp(acc, (__vector_pair)a, (__vector unsigned char)b);\n  } else {\n    __builtin_mma_xvf64gerpp(acc, (__vector_pair)a, (__vector unsigned char)b);\n  }\n}\n\ntemplate<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>\nEIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad*, const __vector_pair&, const Packet4f&)\n{\n  // Just for compilation\n}\n\ntemplate<typename Scalar, typename Packet, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_ALWAYS_INLINE void pgercMMA(__vector_quad* accReal, __vector_quad* accImag, const Packet& lhsV, const Packet& lhsVi, const RhsPacket& rhsV, const RhsPacket& rhsVi)\n{\n  pgerMMA<Packet, RhsPacket, false>(accReal,  rhsV,  lhsV);\n  if(LhsIsReal) {\n    pgerMMA<Packet, RhsPacket, ConjugateRhs>(accImag, rhsVi,  lhsV);\n  } else {\n    if(!RhsIsReal) {\n      pgerMMA<Packet, RhsPacket, ConjugateLhs == ConjugateRhs>(accReal, rhsVi, lhsVi);\n      pgerMMA<Packet, RhsPacket, ConjugateRhs>(accImag, rhsVi,  lhsV);\n    } else {\n      EIGEN_UNUSED_VARIABLE(rhsVi);\n    }\n    pgerMMA<Packet, RhsPacket, ConjugateLhs>(accImag,  rhsV, lhsVi);\n  }\n}\n\n// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.\ntemplate<typename Scalar, typename Packet>\nEIGEN_ALWAYS_INLINE void ploadRhsMMA(const Scalar* rhs, Packet& rhsV)\n{\n  rhsV = ploadRhs<Scalar, Packet>((const Scalar*)(rhs));\n} \n\ntemplate<>\nEIGEN_ALWAYS_INLINE void ploadRhsMMA<double, PacketBlock<Packet2d, 2> >(const double* rhs, PacketBlock<Packet2d, 2>& rhsV)\n{\n  rhsV.packet[0] = ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs      ));\n  rhsV.packet[1] = ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1));\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void ploadRhsMMA<double, __vector_pair>(const double* rhs, __vector_pair& rhsV)\n{\n#if EIGEN_COMP_LLVM\n  __builtin_vsx_assemble_pair(&rhsV,\n    (__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1))),\n    (__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs      ))));\n#else\n  __asm__ (\"lxvp %x0,%1\" : \"=wa\" (rhsV) : \"Y\" (*rhs));\n#endif\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void ploadRhsMMA(const float*, __vector_pair&)\n{\n  // Just for compilation\n}\n\n// PEEL_MMA loop factor.\n#define PEEL_MMA 7\n\n#define MICRO_MMA_UNROLL(func) \\\n  func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)\n\n#define MICRO_MMA_LOAD_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \\\n    lhs_ptr##iter += accCols; \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhsV##iter); \\\n  }\n\n#define MICRO_MMA_WORK_ONE(iter, type, peel) \\\n  if (unroll_factor > iter) { \\\n    pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV##iter); \\\n  }\n\n#define MICRO_MMA_TYPE_PEEL(func, func2, type, peel) \\\n  if (PEEL_MMA > peel) { \\\n    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \\\n    ploadRhsMMA<Scalar, type>(rhs_ptr + (accRows * peel), rhsV##peel); \\\n    MICRO_MMA_UNROLL(func2); \\\n    func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) \\\n    func(4,type,peel) func(5,type,peel) func(6,type,peel) func(7,type,peel) \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(rhsV##peel); \\\n  }\n\n#define MICRO_MMA_UNROLL_TYPE_PEEL(func, func2, type) \\\n  type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7, rhsV8, rhsV9; \\\n  MICRO_MMA_TYPE_PEEL(func,func2,type,0); MICRO_MMA_TYPE_PEEL(func,func2,type,1); \\\n  MICRO_MMA_TYPE_PEEL(func,func2,type,2); MICRO_MMA_TYPE_PEEL(func,func2,type,3); \\\n  MICRO_MMA_TYPE_PEEL(func,func2,type,4); MICRO_MMA_TYPE_PEEL(func,func2,type,5); \\\n  MICRO_MMA_TYPE_PEEL(func,func2,type,6); MICRO_MMA_TYPE_PEEL(func,func2,type,7); \\\n  MICRO_MMA_TYPE_PEEL(func,func2,type,8); MICRO_MMA_TYPE_PEEL(func,func2,type,9);\n\n#define MICRO_MMA_UNROLL_TYPE_ONE(func, func2, type) \\\n  type rhsV0; \\\n  MICRO_MMA_TYPE_PEEL(func,func2,type,0);\n\n#define MICRO_MMA_ONE_PEEL \\\n  if (sizeof(Scalar) == sizeof(float)) { \\\n    MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \\\n  } else { \\\n    MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \\\n  } \\\n  rhs_ptr += (accRows * PEEL_MMA);\n\n#define MICRO_MMA_ONE \\\n  if (sizeof(Scalar) == sizeof(float)) { \\\n    MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \\\n  } else { \\\n    MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \\\n  } \\\n  rhs_ptr += accRows;\n\n#define MICRO_MMA_DST_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    bsetzeroMMA<Scalar, Packet>(&accZero##iter); \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(accZero##iter); \\\n  }\n\n#define MICRO_MMA_DST_PTR MICRO_MMA_UNROLL(MICRO_MMA_DST_PTR_ONE)\n\n#define MICRO_MMA_SRC_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols + accCols*offsetA; \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \\\n  }\n\n#define MICRO_MMA_SRC_PTR MICRO_MMA_UNROLL(MICRO_MMA_SRC_PTR_ONE)\n\n#define MICRO_MMA_PREFETCH_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    EIGEN_POWER_PREFETCH(lhs_ptr##iter); \\\n  }\n\n#define MICRO_MMA_PREFETCH MICRO_MMA_UNROLL(MICRO_MMA_PREFETCH_ONE)\n\n#define MICRO_MMA_STORE_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    storeAccumulator<DataMapper, Index, Packet, accCols>(row + iter*accCols, col, res, pAlpha, &accZero##iter); \\\n  }\n\n#define MICRO_MMA_STORE MICRO_MMA_UNROLL(MICRO_MMA_STORE_ONE)\n\ntemplate<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols>\nEIGEN_STRONG_INLINE void gemm_unrolled_MMA_iteration(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index& row,\n  Index col,\n  const Packet& pAlpha)\n{\n  const Scalar* rhs_ptr = rhs_base;\n  const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;\n  __vector_quad accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;\n\n  MICRO_MMA_SRC_PTR\n  MICRO_MMA_DST_PTR\n\n  Index k = 0;\n  for(; k + PEEL_MMA <= depth; k+= PEEL_MMA)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr);\n    MICRO_MMA_PREFETCH\n    MICRO_MMA_ONE_PEEL\n  }\n  for(; k < depth; k++)\n  {\n    MICRO_MMA_ONE\n  }\n  MICRO_MMA_STORE\n\n  row += unroll_factor*accCols;\n}\n\ntemplate<typename Scalar, typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>\nvoid gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)\n{\n      const Index remaining_rows = rows % accCols;\n      const Index remaining_cols = cols % accRows;\n\n      if( strideA == -1 ) strideA = depth;\n      if( strideB == -1 ) strideB = depth;\n\n      const Packet pAlpha = pset1<Packet>(alpha);\n      const Packet pMask  = bmask<Packet>((const int)(remaining_rows));\n\n      Index col = 0;\n      for(; col + accRows <= cols; col += accRows)\n      {\n        const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;\n        const Scalar* lhs_base = blockA;\n\n        Index row = 0;\n#define MAX_MMA_UNROLL 7\n        while(row + MAX_MMA_UNROLL*accCols <= rows) {\n          gemm_unrolled_MMA_iteration<MAX_MMA_UNROLL, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n        }\n        switch( (rows-row)/accCols ) {\n#if MAX_MMA_UNROLL > 7\n          case 7:\n            gemm_unrolled_MMA_iteration<7, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_MMA_UNROLL > 6\n          case 6:\n            gemm_unrolled_MMA_iteration<6, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_MMA_UNROLL > 5\n          case 5:\n            gemm_unrolled_MMA_iteration<5, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_MMA_UNROLL > 4\n          case 4:\n            gemm_unrolled_MMA_iteration<4, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_MMA_UNROLL > 3\n          case 3:\n            gemm_unrolled_MMA_iteration<3, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_MMA_UNROLL > 2\n          case 2:\n            gemm_unrolled_MMA_iteration<2, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n#if MAX_MMA_UNROLL > 1\n          case 1:\n            gemm_unrolled_MMA_iteration<1, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, pAlpha);\n            break;\n#endif\n          default:\n            break;\n        }\n#undef MAX_MMA_UNROLL\n\n        if(remaining_rows > 0)\n        {\n          gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);\n        }\n      }\n\n      if(remaining_cols > 0)\n      {\n        const Scalar* rhs_base = blockB + col*strideB + remaining_cols*offsetB;\n        const Scalar* lhs_base = blockA;\n\n        for(; col < cols; col++)\n        {\n          Index row = 0;\n\n          gemm_unrolled_col<Scalar, Packet, DataMapper, Index, accCols>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, rows, col, remaining_cols, pAlpha);\n\n          if (remaining_rows > 0)\n          {\n            gemm_extra_col<Scalar, Packet, DataMapper, Index, accRows>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, remaining_rows, remaining_cols, pAlpha);\n          }\n          rhs_base++;\n        }\n      }\n}\n\n#define accColsC (accCols / 2)\n#define advanceRows ((LhsIsReal) ? 1 : 2)\n#define advanceCols ((RhsIsReal) ? 1 : 2)\n\n// PEEL_COMPLEX_MMA loop factor.\n#define PEEL_COMPLEX_MMA 7\n\n#define MICRO_COMPLEX_MMA_UNROLL(func) \\\n  func(0) func(1) func(2) func(3) func(4)\n\n#define MICRO_COMPLEX_MMA_LOAD_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \\\n    lhs_ptr_real##iter += accCols; \\\n    if(!LhsIsReal) { \\\n      lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_imag##iter); \\\n      lhs_ptr_imag##iter += accCols; \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(lhsVi##iter); \\\n    } \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhsV##iter); \\\n    EIGEN_UNUSED_VARIABLE(lhsVi##iter); \\\n  }\n\n#define MICRO_COMPLEX_MMA_WORK_ONE(iter, type, peel) \\\n  if (unroll_factor > iter) { \\\n    pgercMMA<Scalar, Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \\\n  }\n\n#define MICRO_COMPLEX_MMA_TYPE_PEEL(func, func2, type, peel) \\\n  if (PEEL_COMPLEX_MMA > peel) { \\\n    Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \\\n    Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \\\n    ploadRhsMMA<Scalar, type>(rhs_ptr_real + (accRows * peel), rhsV##peel); \\\n    if(!RhsIsReal) { \\\n      ploadRhsMMA<Scalar, type>(rhs_ptr_imag + (accRows * peel), rhsVi##peel); \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(rhsVi##peel); \\\n    } \\\n    MICRO_COMPLEX_MMA_UNROLL(func2); \\\n    func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) func(4,type,peel) \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(rhsV##peel); \\\n    EIGEN_UNUSED_VARIABLE(rhsVi##peel); \\\n  }\n\n#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(func, func2, type) \\\n  type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7, rhsV8, rhsV9; \\\n  type rhsVi0, rhsVi1, rhsVi2, rhsVi3, rhsVi4, rhsVi5, rhsVi6, rhsVi7, rhsVi8, rhsVi9; \\\n  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,1); \\\n  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,2); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,3); \\\n  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,4); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,5); \\\n  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,6); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,7); \\\n  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,8); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,9);\n\n#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(func, func2, type) \\\n  type rhsV0, rhsVi0; \\\n  MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0);\n\n#define MICRO_COMPLEX_MMA_ONE_PEEL \\\n  if (sizeof(Scalar) == sizeof(float)) { \\\n    MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \\\n  } else { \\\n    MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \\\n  } \\\n  rhs_ptr_real += (accRows * PEEL_COMPLEX_MMA); \\\n  if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX_MMA);\n\n#define MICRO_COMPLEX_MMA_ONE \\\n  if (sizeof(Scalar) == sizeof(float)) { \\\n    MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \\\n  } else { \\\n    MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \\\n  } \\\n  rhs_ptr_real += accRows; \\\n  if(!RhsIsReal) rhs_ptr_imag += accRows;\n\n#define MICRO_COMPLEX_MMA_DST_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    bsetzeroMMA<Scalar, Packet>(&accReal##iter); \\\n    bsetzeroMMA<Scalar, Packet>(&accImag##iter); \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(accReal##iter); \\\n    EIGEN_UNUSED_VARIABLE(accImag##iter); \\\n  }\n\n#define MICRO_COMPLEX_MMA_DST_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_DST_PTR_ONE)\n\n#define MICRO_COMPLEX_MMA_SRC_PTR_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols + accCols*offsetA; \\\n    if(!LhsIsReal) { \\\n      lhs_ptr_imag##iter = lhs_ptr_real##iter + accCols*strideA; \\\n    } else { \\\n      EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \\\n    } \\\n  } else { \\\n    EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \\\n    EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \\\n  }\n\n#define MICRO_COMPLEX_MMA_SRC_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_SRC_PTR_ONE)\n\n#define MICRO_COMPLEX_MMA_PREFETCH_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \\\n    if(!LhsIsReal) { \\\n      EIGEN_POWER_PREFETCH(lhs_ptr_imag##iter); \\\n    } \\\n  }\n\n#define MICRO_COMPLEX_MMA_PREFETCH MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_PREFETCH_ONE)\n\n#define MICRO_COMPLEX_MMA_STORE_ONE(iter) \\\n  if (unroll_factor > iter) { \\\n    storeComplexAccumulator<DataMapper, Index, Packet, Packetc, accColsC, 0>(row + iter*accCols, col, res, pAlphaReal, pAlphaImag, &accReal##iter, &accImag##iter); \\\n  }\n\n#define MICRO_COMPLEX_MMA_STORE MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_STORE_ONE)\n\ntemplate<int unroll_factor, typename Scalar, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nEIGEN_STRONG_INLINE void gemm_complex_unrolled_MMA_iteration(\n  const DataMapper& res,\n  const Scalar* lhs_base,\n  const Scalar* rhs_base,\n  Index depth,\n  Index strideA,\n  Index offsetA,\n  Index strideB,\n  Index& row,\n  Index col,\n  const Packet& pAlphaReal,\n  const Packet& pAlphaImag)\n{\n  const Scalar* rhs_ptr_real = rhs_base;\n  const Scalar* rhs_ptr_imag;\n  if(!RhsIsReal) {\n    rhs_ptr_imag = rhs_base + accRows*strideB;\n  } else {\n    EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);\n  }\n  const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_imag0 = NULL, * lhs_ptr_real1 = NULL, * lhs_ptr_imag1 = NULL;\n  const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_imag2 = NULL, * lhs_ptr_real3 = NULL, * lhs_ptr_imag3 = NULL;\n  const Scalar* lhs_ptr_real4 = NULL, * lhs_ptr_imag4 = NULL;\n  __vector_quad accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3, accReal4, accImag4;\n\n  MICRO_COMPLEX_MMA_SRC_PTR\n  MICRO_COMPLEX_MMA_DST_PTR\n\n  Index k = 0;\n  for(; k + PEEL_COMPLEX_MMA <= depth; k+= PEEL_COMPLEX_MMA)\n  {\n    EIGEN_POWER_PREFETCH(rhs_ptr_real);\n    if(!RhsIsReal) {\n      EIGEN_POWER_PREFETCH(rhs_ptr_imag);\n    }\n    MICRO_COMPLEX_MMA_PREFETCH\n    MICRO_COMPLEX_MMA_ONE_PEEL\n  }\n  for(; k < depth; k++)\n  {\n    MICRO_COMPLEX_MMA_ONE\n  }\n  MICRO_COMPLEX_MMA_STORE\n\n  row += unroll_factor*accCols;\n}\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Scalarc, typename Scalar, typename Index, typename Packet, typename Packetc, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>\nvoid gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsScalar* blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)\n{\n      const Index remaining_rows = rows % accCols;\n      const Index remaining_cols = cols % accRows;\n\n      if( strideA == -1 ) strideA = depth;\n      if( strideB == -1 ) strideB = depth;\n\n      const Packet pAlphaReal = pset1<Packet>(alpha.real());\n      const Packet pAlphaImag = pset1<Packet>(alpha.imag());\n      const Packet pMask = bmask<Packet>((const int)(remaining_rows));\n\n      const Scalar* blockA = (Scalar *) blockAc;\n      const Scalar* blockB = (Scalar *) blockBc;\n\n      Index col = 0;\n      for(; col + accRows <= cols; col += accRows)\n      {\n        const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;\n        const Scalar* lhs_base = blockA;\n        Index row = 0;\n\n#define MAX_COMPLEX_MMA_UNROLL 4\n        while(row + MAX_COMPLEX_MMA_UNROLL*accCols <= rows) {\n          gemm_complex_unrolled_MMA_iteration<MAX_COMPLEX_MMA_UNROLL, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n        }\n        switch( (rows-row)/accCols ) {\n#if MAX_COMPLEX_MMA_UNROLL > 4\n          case 4:\n            gemm_complex_unrolled_MMA_iteration<4, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n#if MAX_COMPLEX_MMA_UNROLL > 3\n          case 3:\n            gemm_complex_unrolled_MMA_iteration<3, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n#if MAX_COMPLEX_MMA_UNROLL > 2\n          case 2:\n            gemm_complex_unrolled_MMA_iteration<2, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n#if MAX_COMPLEX_MMA_UNROLL > 1\n          case 1:\n            gemm_complex_unrolled_MMA_iteration<1, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, pAlphaReal, pAlphaImag);\n            break;\n#endif\n          default:\n            break;\n        }\n#undef MAX_COMPLEX_MMA_UNROLL\n\n        if(remaining_rows > 0)\n        {\n          gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);\n        }\n      }\n\n      if(remaining_cols > 0)\n      {\n        const Scalar* rhs_base = blockB + advanceCols*col*strideB + remaining_cols*offsetB;\n        const Scalar* lhs_base = blockA;\n\n        for(; col < cols; col++)\n        {\n          Index row = 0;\n\n          gemm_complex_unrolled_col<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, rows, col, remaining_cols, pAlphaReal, pAlphaImag);\n\n          if (remaining_rows > 0)\n          {\n            gemm_complex_extra_col<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, remaining_rows, remaining_cols, pAlphaReal, pAlphaImag);\n          }\n          rhs_base++;\n        }\n      }\n}\n\n#undef accColsC\n#undef advanceRows\n#undef advanceCols\n\n#pragma GCC reset_options\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H\n\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/AltiVec/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2016 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_ALTIVEC_H\n#define EIGEN_PACKET_MATH_ALTIVEC_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4\n#endif\n\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n\n// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16\n#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS  32\n#endif\n\ntypedef __vector float                   Packet4f;\ntypedef __vector int                     Packet4i;\ntypedef __vector unsigned int            Packet4ui;\ntypedef __vector __bool int              Packet4bi;\ntypedef __vector short int               Packet8s;\ntypedef __vector unsigned short int      Packet8us;\ntypedef __vector signed char             Packet16c;\ntypedef __vector unsigned char           Packet16uc;\ntypedef eigen_packet_wrapper<__vector unsigned short int,0> Packet8bf;\n\n// We don't want to write the same code all the time, but we need to reuse the constants\n// and it doesn't really work to declare them global, so we define macros instead\n#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \\\n  Packet4f p4f_##NAME = {X, X, X, X}\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \\\n  Packet4i p4i_##NAME = vec_splat_s32(X)\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet4ui(NAME,X) \\\n  Packet4ui p4ui_##NAME = {X, X, X, X}\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet8us(NAME,X) \\\n  Packet8us p8us_##NAME = {X, X, X, X, X, X, X, X}\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet16uc(NAME,X) \\\n  Packet16uc p16uc_##NAME = {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X}\n\n#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \\\n  Packet4f p4f_##NAME = pset1<Packet4f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \\\n  Packet4i p4i_##NAME = pset1<Packet4i>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \\\n  Packet2d p2d_##NAME = pset1<Packet2d>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \\\n  Packet2l p2l_##NAME = pset1<Packet2l>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \\\n  const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))\n\n#define DST_CHAN 1\n#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))\n#define __UNPACK_TYPE__(PACKETNAME) typename unpacket_traits<PACKETNAME>::type \n\n// These constants are endian-agnostic\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u);\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4ui(PREV0DOT5, 0x3EFFFFFFu);\nstatic _EIGEN_DECLARE_CONST_FAST_Packet8us(ONE,1); //{ 1, 1, 1, 1, 1, 1, 1, 1}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet16uc(ONE,1);\nstatic Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}\n#ifndef __VSX__\nstatic Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}\n#endif\n\nstatic Packet4f  p4f_COUNTDOWN  = { 0.0, 1.0, 2.0, 3.0 };\nstatic Packet4i  p4i_COUNTDOWN  = { 0, 1, 2, 3 };\nstatic Packet8s  p8s_COUNTDOWN  = { 0, 1, 2, 3, 4, 5, 6, 7 };\nstatic Packet8us p8us_COUNTDOWN = { 0, 1, 2, 3, 4, 5, 6, 7 };\n\nstatic Packet16c  p16c_COUNTDOWN = { 0, 1, 2, 3, 4, 5, 6, 7,\n                                    8, 9, 10, 11, 12, 13, 14, 15};\nstatic Packet16uc p16uc_COUNTDOWN = { 0, 1, 2, 3, 4, 5, 6, 7, \n                                    8, 9, 10, 11, 12, 13, 14, 15};\n\nstatic Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };\nstatic Packet16uc p16uc_REVERSE16 = { 14,15, 12,13, 10,11, 8,9, 6,7, 4,5, 2,3, 0,1 };\nstatic Packet16uc p16uc_REVERSE8 = { 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 };\n\nstatic Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };\nstatic Packet16uc p16uc_DUPLICATE16_HI = { 0,1,0,1, 2,3,2,3, 4,5,4,5, 6,7,6,7 };\nstatic Packet16uc p16uc_DUPLICATE8_HI = { 0,0, 1,1, 2,2, 3,3, 4,4, 5,5, 6,6, 7,7 };\nstatic const Packet16uc p16uc_DUPLICATE16_EVEN= { 0,1 ,0,1, 4,5, 4,5, 8,9, 8,9, 12,13, 12,13 };\nstatic const Packet16uc p16uc_DUPLICATE16_ODD = { 2,3 ,2,3, 6,7, 6,7, 10,11, 10,11, 14,15, 14,15 };\n\nstatic Packet16uc p16uc_QUADRUPLICATE16_HI = { 0,1,0,1,0,1,0,1, 2,3,2,3,2,3,2,3 };\n\n// Handle endianness properly while loading constants\n// Define global static constants:\n#ifdef _BIG_ENDIAN\nstatic Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);\n#ifdef __VSX__\nstatic Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };\n#endif\nstatic Packet16uc p16uc_PSET32_WODD   = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };\nstatic Packet16uc p16uc_PSET32_WEVEN  = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };\nstatic Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};\n#else\nstatic Packet16uc p16uc_FORWARD = p16uc_REVERSE32;\nstatic Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };\nstatic Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };\nstatic Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };\nstatic Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};\n#endif // _BIG_ENDIAN\n\nstatic Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };\nstatic Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };\nstatic Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16;                                         //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};\nstatic Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16;                                         //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};\n\nstatic Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);                                         //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };\n\n#ifdef _BIG_ENDIAN\nstatic Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };\n#else\nstatic Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };\n#endif // _BIG_ENDIAN\n\n#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC\n  #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);\n#else\n  #define EIGEN_PPC_PREFETCH(ADDR) asm( \"   dcbt [%[addr]]\\n\" :: [addr] \"r\" (ADDR) : \"cc\" );\n#endif\n\ntemplate <>\nstruct packet_traits<float> : default_packet_traits {\n  typedef Packet4f type;\n  typedef Packet4f half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 1,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasMul = 1,\n    HasDiv = 1,\n    HasMin = 1,\n    HasMax = 1,\n    HasAbs = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasLog = 1,\n    HasExp = 1,\n#ifdef __VSX__\n    HasSqrt = 1,\n#if !EIGEN_COMP_CLANG\n    HasRsqrt = 1,\n#else\n    HasRsqrt = 0,\n#endif\n#else\n    HasSqrt = 0,\n    HasRsqrt = 0,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n#endif\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n    HasNegate = 1,\n    HasBlend = 1\n  };\n};\ntemplate <>\nstruct packet_traits<bfloat16> : default_packet_traits {\n  typedef Packet8bf type;\n  typedef Packet8bf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 0,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasMul = 1,\n    HasDiv = 1,\n    HasMin = 1,\n    HasMax = 1,\n    HasAbs = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasLog = 1,\n    HasExp = 1,\n#ifdef __VSX__\n    HasSqrt = 1,\n#if !EIGEN_COMP_CLANG\n    HasRsqrt = 1,\n#else\n    HasRsqrt = 0,\n#endif\n#else\n    HasSqrt = 0,\n    HasRsqrt = 0,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n#endif\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n    HasNegate = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<int> : default_packet_traits {\n  typedef Packet4i type;\n  typedef Packet4i half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,\n\n    HasAdd   = 1,\n    HasSub   = 1,\n    HasShift = 1,\n    HasMul   = 1,\n    HasDiv   = 0,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<short int> : default_packet_traits {\n  typedef Packet8s type;\n  typedef Packet8s half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 0,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 0,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<unsigned short int> : default_packet_traits {\n  typedef Packet8us type;\n  typedef Packet8us half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 0,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 0,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<signed char> : default_packet_traits {\n  typedef Packet16c type;\n  typedef Packet16c half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 0,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 0,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<unsigned char> : default_packet_traits {\n  typedef Packet16uc type;\n  typedef Packet16uc half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 0,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 0,\n    HasBlend = 1\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet4f>\n{\n  typedef float     type;\n  typedef Packet4f  half;\n  typedef Packet4i  integer_packet;\n  enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet4i>\n{\n  typedef int       type;\n  typedef Packet4i  half;\n  enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet8s>\n{\n  typedef short int type;\n  typedef Packet8s  half;\n  enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet8us>\n{\n  typedef unsigned short int type;\n  typedef Packet8us          half;\n  enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\n\ntemplate<> struct unpacket_traits<Packet16c>\n{\n  typedef signed char type;\n  typedef Packet16c  half;\n  enum {size=16, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet16uc>\n{\n  typedef unsigned char type;\n  typedef Packet16uc  half;\n  enum {size=16, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\n\ntemplate<> struct unpacket_traits<Packet8bf>\n{\n  typedef bfloat16 type;\n  typedef Packet8bf          half;\n  enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ninline std::ostream & operator <<(std::ostream & s, const Packet16c & v)\n{\n  union {\n    Packet16c   v;\n    signed char n[16];\n  } vt;\n  vt.v = v;\n  for (int i=0; i< 16; i++)\n    s << vt.n[i] << \", \";\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)\n{\n  union {\n    Packet16uc   v;\n    unsigned char n[16];\n  } vt;\n  vt.v = v;\n  for (int i=0; i< 16; i++)\n    s << vt.n[i] << \", \";\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet4f & v)\n{\n  union {\n    Packet4f   v;\n    float n[4];\n  } vt;\n  vt.v = v;\n  s << vt.n[0] << \", \" << vt.n[1] << \", \" << vt.n[2] << \", \" << vt.n[3];\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet4i & v)\n{\n  union {\n    Packet4i   v;\n    int n[4];\n  } vt;\n  vt.v = v;\n  s << vt.n[0] << \", \" << vt.n[1] << \", \" << vt.n[2] << \", \" << vt.n[3];\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)\n{\n  union {\n    Packet4ui   v;\n    unsigned int n[4];\n  } vt;\n  vt.v = v;\n  s << vt.n[0] << \", \" << vt.n[1] << \", \" << vt.n[2] << \", \" << vt.n[3];\n  return s;\n}\n\ntemplate <typename Packet>\nEIGEN_STRONG_INLINE Packet pload_common(const __UNPACK_TYPE__(Packet)* from)\n{\n  // some versions of GCC throw \"unused-but-set-parameter\".\n  // ignoring these warnings for now.\n  EIGEN_UNUSED_VARIABLE(from);\n  EIGEN_DEBUG_ALIGNED_LOAD\n#ifdef __VSX__\n  return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));\n#else\n  return vec_ld(0, from);\n#endif\n}\n\n// Need to define them first or we get specialization after instantiation errors\ntemplate<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)\n{\n  return pload_common<Packet4f>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from)\n{\n  return pload_common<Packet4i>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8s pload<Packet8s>(const short int* from)\n{\n  return pload_common<Packet8s>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8us pload<Packet8us>(const unsigned short int* from)\n{\n  return pload_common<Packet8us>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16c pload<Packet16c>(const signed char*     from)\n{\n  return pload_common<Packet16c>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pload<Packet16uc>(const unsigned char*     from)\n{\n  return pload_common<Packet16uc>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pload<Packet8bf>(const bfloat16*     from)\n{\n  return pload_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from));\n}\n\ntemplate <typename Packet>\nEIGEN_STRONG_INLINE void pstore_common(__UNPACK_TYPE__(Packet)* to, const Packet& from){\n  // some versions of GCC throw \"unused-but-set-parameter\" (float *to).\n  // ignoring these warnings for now.\n  EIGEN_UNUSED_VARIABLE(to);\n  EIGEN_DEBUG_ALIGNED_STORE\n#ifdef __VSX__\n  vec_xst(from, 0, to);\n#else\n  vec_st(from, 0, to);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from)\n{\n  pstore_common<Packet4f>(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from)\n{\n  pstore_common<Packet4i>(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<short int>(short int*       to, const Packet8s& from)\n{\n  pstore_common<Packet8s>(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<unsigned short int>(unsigned short int*       to, const Packet8us& from)\n{\n  pstore_common<Packet8us>(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16*       to, const Packet8bf& from)\n{\n  pstore_common<Packet8us>(reinterpret_cast<unsigned short int*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<signed char>(signed char*       to, const Packet16c& from)\n{\n  pstore_common<Packet16c>(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<unsigned char>(unsigned char*       to, const Packet16uc& from)\n{\n  pstore_common<Packet16uc>(to, from);\n}\n\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE Packet pset1_size4(const __UNPACK_TYPE__(Packet)& from)\n{\n  Packet v = {from, from, from, from};\n  return v;\n}\n\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE Packet pset1_size8(const __UNPACK_TYPE__(Packet)& from)\n{\n  Packet v = {from, from, from, from, from, from, from, from};\n  return v;\n}\n\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE Packet pset1_size16(const __UNPACK_TYPE__(Packet)& from)\n{\n  Packet v = {from, from, from, from, from, from, from, from, from, from, from, from, from, from, from, from};\n  return v;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) {\n  return pset1_size4<Packet4f>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)   {\n  return pset1_size4<Packet4i>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8s pset1<Packet8s>(const short int&    from)   {\n  return pset1_size8<Packet8s>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8us pset1<Packet8us>(const unsigned short int&    from)   {\n  return pset1_size8<Packet8us>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16c pset1<Packet16c>(const signed char&    from)   {\n  return pset1_size16<Packet16c>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pset1<Packet16uc>(const unsigned char&    from)   {\n  return pset1_size16<Packet16uc>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) {\n  return reinterpret_cast<Packet4f>(pset1<Packet4i>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pset1<Packet8bf>(const bfloat16&    from)   {\n  return pset1_size8<Packet8us>(reinterpret_cast<const unsigned short int&>(from));\n}\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE void\npbroadcast4_common(const __UNPACK_TYPE__(Packet) *a,\n                      Packet& a0, Packet& a1, Packet& a2, Packet& a3)\n{\n  a3 = pload<Packet>(a);\n  a0 = vec_splat(a3, 0);\n  a1 = vec_splat(a3, 1);\n  a2 = vec_splat(a3, 2);\n  a3 = vec_splat(a3, 3);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet4f>(const float *a,\n                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)\n{\n  pbroadcast4_common<Packet4f>(a, a0, a1, a2, a3);\n}\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet4i>(const int *a,\n                      Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)\n{\n  pbroadcast4_common<Packet4i>(a, a0, a1, a2, a3);\n}\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_common(const __UNPACK_TYPE__(Packet)* from, Index stride)\n{\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];\n  a[0] = from[0*stride];\n  a[1] = from[1*stride];\n  a[2] = from[2*stride];\n  a[3] = from[3*stride];\n  return pload<Packet>(a);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)\n{\n  return pgather_common<Packet4f>(from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)\n{\n  return pgather_common<Packet4i>(from, stride);\n}\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size8(const __UNPACK_TYPE__(Packet)* from, Index stride)\n{\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[8];\n  a[0] = from[0*stride];\n  a[1] = from[1*stride];\n  a[2] = from[2*stride];\n  a[3] = from[3*stride];\n  a[4] = from[4*stride];\n  a[5] = from[5*stride];\n  a[6] = from[6*stride];\n  a[7] = from[7*stride];\n  return pload<Packet>(a);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet8s pgather<short int, Packet8s>(const short int* from, Index stride)\n{\n  return pgather_size8<Packet8s>(from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet8us pgather<unsigned short int, Packet8us>(const unsigned short int* from, Index stride)\n{\n  return pgather_size8<Packet8us>(from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet8bf pgather<bfloat16, Packet8bf>(const bfloat16* from, Index stride)\n{\n  return pgather_size8<Packet8bf>(from, stride);\n}\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_size16(const __UNPACK_TYPE__(Packet)* from, Index stride)\n{\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];\n  a[0] = from[0*stride];\n  a[1] = from[1*stride];\n  a[2] = from[2*stride];\n  a[3] = from[3*stride];\n  a[4] = from[4*stride];\n  a[5] = from[5*stride];\n  a[6] = from[6*stride];\n  a[7] = from[7*stride];\n  a[8] = from[8*stride];\n  a[9] = from[9*stride];\n  a[10] = from[10*stride];\n  a[11] = from[11*stride];\n  a[12] = from[12*stride];\n  a[13] = from[13*stride];\n  a[14] = from[14*stride];\n  a[15] = from[15*stride];\n  return pload<Packet>(a);\n}\n\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet16c pgather<signed char, Packet16c>(const signed char* from, Index stride)\n{\n  return pgather_size16<Packet16c>(from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet16uc pgather<unsigned char, Packet16uc>(const unsigned char* from, Index stride)\n{\n  return pgather_size16<Packet16uc>(from, stride);\n}\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size4(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)\n{\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[4];\n  pstore<__UNPACK_TYPE__(Packet)>(a, from);\n  to[0*stride] = a[0];\n  to[1*stride] = a[1];\n  to[2*stride] = a[2];\n  to[3*stride] = a[3];\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)\n{\n  pscatter_size4<Packet4f>(to, from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)\n{\n  pscatter_size4<Packet4i>(to, from, stride);\n}\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size8(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)\n{\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[8];\n  pstore<__UNPACK_TYPE__(Packet)>(a, from);\n  to[0*stride] = a[0];\n  to[1*stride] = a[1];\n  to[2*stride] = a[2];\n  to[3*stride] = a[3];\n  to[4*stride] = a[4];\n  to[5*stride] = a[5];\n  to[6*stride] = a[6];\n  to[7*stride] = a[7];\n}\n\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<short int, Packet8s>(short int* to, const Packet8s& from, Index stride)\n{\n  pscatter_size8<Packet8s>(to, from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<unsigned short int, Packet8us>(unsigned short int* to, const Packet8us& from, Index stride)\n{\n  pscatter_size8<Packet8us>(to, from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<bfloat16, Packet8bf>(bfloat16* to, const Packet8bf& from, Index stride)\n{\n  pscatter_size8<Packet8bf>(to, from, stride);\n}\n\ntemplate<typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_size16(__UNPACK_TYPE__(Packet)* to, const Packet& from, Index stride)\n{\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) a[16];\n  pstore<__UNPACK_TYPE__(Packet)>(a, from);\n  to[0*stride] = a[0];\n  to[1*stride] = a[1];\n  to[2*stride] = a[2];\n  to[3*stride] = a[3];\n  to[4*stride] = a[4];\n  to[5*stride] = a[5];\n  to[6*stride] = a[6];\n  to[7*stride] = a[7];\n  to[8*stride] = a[8];\n  to[9*stride] = a[9];\n  to[10*stride] = a[10];\n  to[11*stride] = a[11];\n  to[12*stride] = a[12];\n  to[13*stride] = a[13];\n  to[14*stride] = a[14];\n  to[15*stride] = a[15];\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<signed char, Packet16c>(signed char* to, const Packet16c& from, Index stride)\n{\n  pscatter_size16<Packet16c>(to, from, stride);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<unsigned char, Packet16uc>(unsigned char* to, const Packet16uc& from, Index stride)\n{\n  pscatter_size16<Packet16uc>(to, from, stride);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f   plset<Packet4f>(const float&     a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN;  }\ntemplate<> EIGEN_STRONG_INLINE Packet4i   plset<Packet4i>(const int&       a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN;  }\ntemplate<> EIGEN_STRONG_INLINE Packet8s   plset<Packet8s>(const short int& a) { return pset1<Packet8s>(a) + p8s_COUNTDOWN; }\ntemplate<> EIGEN_STRONG_INLINE Packet8us  plset<Packet8us>(const unsigned short int& a) { return pset1<Packet8us>(a) + p8us_COUNTDOWN; }\ntemplate<> EIGEN_STRONG_INLINE Packet16c  plset<Packet16c>(const signed char& a)   { return pset1<Packet16c>(a) + p16c_COUNTDOWN; }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc plset<Packet16uc>(const unsigned char& a)   { return pset1<Packet16uc>(a) + p16uc_COUNTDOWN; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f   padd<Packet4f>  (const Packet4f&   a, const Packet4f&   b) { return a + b; }\ntemplate<> EIGEN_STRONG_INLINE Packet4i   padd<Packet4i>  (const Packet4i&   a, const Packet4i&   b) { return a + b; }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui   padd<Packet4ui>  (const Packet4ui&   a, const Packet4ui&   b) { return a + b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8s   padd<Packet8s>  (const Packet8s&   a, const Packet8s&   b) { return a + b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8us  padd<Packet8us> (const Packet8us&  a, const Packet8us&  b) { return a + b; }\ntemplate<> EIGEN_STRONG_INLINE Packet16c  padd<Packet16c> (const Packet16c&  a, const Packet16c&  b) { return a + b; }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc padd<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return a + b; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f   psub<Packet4f>  (const Packet4f&   a, const Packet4f&   b) { return a - b; }\ntemplate<> EIGEN_STRONG_INLINE Packet4i   psub<Packet4i>  (const Packet4i&   a, const Packet4i&   b) { return a - b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8s   psub<Packet8s>  (const Packet8s&   a, const Packet8s&   b) { return a - b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8us  psub<Packet8us> (const Packet8us&  a, const Packet8us&  b) { return a - b; }\ntemplate<> EIGEN_STRONG_INLINE Packet16c  psub<Packet16c> (const Packet16c&  a, const Packet16c&  b) { return a - b; }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc psub<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return a - b; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f   pmul<Packet4f>  (const Packet4f&   a, const Packet4f&   b) { return vec_madd(a,b, p4f_MZERO); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i   pmul<Packet4i>  (const Packet4i&   a, const Packet4i&   b) { return a * b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8s   pmul<Packet8s>  (const Packet8s&   a, const Packet8s&   b) { return vec_mul(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us  pmul<Packet8us> (const Packet8us&  a, const Packet8us&  b) { return vec_mul(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c  pmul<Packet16c> (const Packet16c&  a, const Packet16c&  b) { return vec_mul(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmul<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_mul(a,b); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n#ifndef __VSX__  // VSX actually provides a div instruction\n  Packet4f t, y_0, y_1;\n\n  // Altivec does not offer a divide instruction, we have to do a reciprocal approximation\n  y_0 = vec_re(b);\n\n  // Do one Newton-Raphson iteration to get the needed accuracy\n  t   = vec_nmsub(y_0, b, p4f_ONE);\n  y_1 = vec_madd(y_0, t, y_0);\n\n  return vec_madd(a, y_1, p4f_MZERO);\n#else\n  return vec_div(a, b);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)\n{ eigen_assert(false && \"packet integer division are not supported by AltiVec\");\n  return pset1<Packet4i>(0);\n}\n\n// for some weird raisons, it has to be overloaded for packet of integers\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  #ifdef __VSX__\n  // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN\n  Packet4f ret;\n  __asm__ (\"xvcmpgesp %x0,%x1,%x2\\n\\txxsel %x0,%x1,%x2,%x0\" : \"=&wa\" (ret) : \"wa\" (a), \"wa\" (b));\n  return ret;\n  #else\n  return vec_min(a, b);\n  #endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmin<Packet8s>(const Packet8s& a, const Packet8s& b) { return vec_min(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmin<Packet8us>(const Packet8us& a, const Packet8us& b) { return vec_min(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pmin<Packet16c>(const Packet16c& a, const Packet16c& b) { return vec_min(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmin<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_min(a, b); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  #ifdef __VSX__\n  // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN\n  Packet4f ret;\n  __asm__ (\"xvcmpgtsp %x0,%x2,%x1\\n\\txxsel %x0,%x1,%x2,%x0\" : \"=&wa\" (ret) : \"wa\" (a), \"wa\" (b));\n  return ret;\n  #else\n  return vec_max(a, b);\n  #endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmax<Packet8s>(const Packet8s& a, const Packet8s& b) { return vec_max(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmax<Packet8us>(const Packet8us& a, const Packet8us& b) { return vec_max(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pmax<Packet16c>(const Packet16c& a, const Packet16c& b) { return vec_max(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {\n  Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));\n  return vec_nor(c,c);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmpeq(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmpeq(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmpeq(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pcmp_eq(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmpeq(a,b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pand<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vec_and(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pand<Packet8us>(const Packet8us& a, const Packet8us& b) { return vec_and(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pand<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  return pand<Packet8us>(a, b);\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s por<Packet8s>(const Packet8s& a, const Packet8s& b) { return vec_or(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us por<Packet8us>(const Packet8us& a, const Packet8us& b) { return vec_or(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8bf por<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  return por<Packet8us>(a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pxor<Packet8bf>(const Packet8bf& a, const Packet8bf& b) { \n  return pxor<Packet8us>(a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_andc(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_andc(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {\n  return vec_sel(b, a, reinterpret_cast<Packet4ui>(mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)\n{\n    Packet4f t = vec_add(reinterpret_cast<Packet4f>(vec_or(vec_and(reinterpret_cast<Packet4ui>(a), p4ui_SIGN), p4ui_PREV0DOT5)), a);\n    Packet4f res;\n\n#ifdef __VSX__\n    __asm__(\"xvrspiz %x0, %x1\\n\\t\"\n        : \"=&wa\" (res)\n        : \"wa\" (t));\n#else\n    __asm__(\"vrfiz %0, %1\\n\\t\"\n        : \"=v\" (res)\n        : \"v\" (t));\n#endif\n\n    return res;\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const  Packet4f& a) { return vec_ceil(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)\n{\n    Packet4f res;\n\n    __asm__(\"xvrspic %x0, %x1\\n\\t\"\n        : \"=&wa\" (res)\n        : \"wa\" (a));\n\n    return res;\n}\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)\n{\n  EIGEN_DEBUG_ALIGNED_LOAD\n#ifdef _BIG_ENDIAN\n  Packet16uc MSQ, LSQ;\n  Packet16uc mask;\n  MSQ = vec_ld(0, (unsigned char *)from);          // most significant quadword\n  LSQ = vec_ld(15, (unsigned char *)from);         // least significant quadword\n  mask = vec_lvsl(0, from);                        // create the permute mask\n  //TODO: Add static_cast here\n  return (Packet) vec_perm(MSQ, LSQ, mask);           // align the data\n#else\n  EIGEN_DEBUG_UNALIGNED_LOAD\n  return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)\n{\n  return ploadu_common<Packet4f>(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)\n{\n  return ploadu_common<Packet4i>(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8s ploadu<Packet8s>(const short int* from)\n{\n  return ploadu_common<Packet8s>(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8us ploadu<Packet8us>(const unsigned short int* from)\n{\n  return ploadu_common<Packet8us>(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf ploadu<Packet8bf>(const bfloat16* from)\n{\n  return ploadu_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16c ploadu<Packet16c>(const signed char* from)\n{\n  return ploadu_common<Packet16c>(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16uc ploadu<Packet16uc>(const unsigned char* from)\n{\n  return ploadu_common<Packet16uc>(from);\n}\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE Packet ploaddup_common(const __UNPACK_TYPE__(Packet)*   from)\n{\n  Packet p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet>(from);\n  else                                  p = ploadu<Packet>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE32_HI);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)\n{\n  return ploaddup_common<Packet4f>(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)\n{\n  return ploaddup_common<Packet4i>(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8s ploaddup<Packet8s>(const short int*     from)\n{\n  Packet8s p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet8s>(from);\n  else                                  p = ploadu<Packet8s>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE16_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8us ploaddup<Packet8us>(const unsigned short int*     from)\n{\n  Packet8us p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet8us>(from);\n  else                                  p = ploadu<Packet8us>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE16_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8s ploadquad<Packet8s>(const short int*     from)\n{\n  Packet8s p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet8s>(from);\n  else                                  p = ploadu<Packet8s>(from);\n  return vec_perm(p, p, p16uc_QUADRUPLICATE16_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8us ploadquad<Packet8us>(const unsigned short int*     from)\n{\n  Packet8us p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet8us>(from);\n  else                                  p = ploadu<Packet8us>(from);\n  return vec_perm(p, p, p16uc_QUADRUPLICATE16_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf ploadquad<Packet8bf>(const bfloat16*     from)\n{\n  return ploadquad<Packet8us>(reinterpret_cast<const unsigned short int*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16c ploaddup<Packet16c>(const signed char*     from)\n{\n  Packet16c p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet16c>(from);\n  else                                  p = ploadu<Packet16c>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE8_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const unsigned char*     from)\n{\n  Packet16uc p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet16uc>(from);\n  else                                  p = ploadu<Packet16uc>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE8_HI);\n}\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)*  to, const Packet& from)\n{\n  EIGEN_DEBUG_UNALIGNED_STORE\n#ifdef _BIG_ENDIAN\n  // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html\n  // Warning: not thread safe!\n  Packet16uc MSQ, LSQ, edges;\n  Packet16uc edgeAlign, align;\n\n  MSQ = vec_ld(0, (unsigned char *)to);                     // most significant quadword\n  LSQ = vec_ld(15, (unsigned char *)to);                    // least significant quadword\n  edgeAlign = vec_lvsl(0, to);                              // permute map to extract edges\n  edges=vec_perm(LSQ,MSQ,edgeAlign);                        // extract the edges\n  align = vec_lvsr( 0, to );                                // permute map to misalign data\n  MSQ = vec_perm(edges,(Packet16uc)from,align);             // misalign the data (MSQ)\n  LSQ = vec_perm((Packet16uc)from,edges,align);             // misalign the data (LSQ)\n  vec_st( LSQ, 15, (unsigned char *)to );                   // Store the LSQ part first\n  vec_st( MSQ, 0, (unsigned char *)to );                   // Store the MSQ part second\n#else\n  vec_xst(from, 0, to);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from)\n{\n  pstoreu_common<Packet4f>(to, from);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int>(int*      to, const Packet4i& from)\n{\n  pstoreu_common<Packet4i>(to, from);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<short int>(short int*      to, const Packet8s& from)\n{\n  pstoreu_common<Packet8s>(to, from);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<unsigned short int>(unsigned short int*      to, const Packet8us& from)\n{\n  pstoreu_common<Packet8us>(to, from);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16*      to, const Packet8bf& from)\n{\n  pstoreu_common<Packet8us>(reinterpret_cast<unsigned short int*>(to), from);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<signed char>(signed char*      to, const Packet16c& from)\n{\n  pstoreu_common<Packet16c>(to, from);\n}\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<unsigned char>(unsigned char*      to, const Packet16uc& from)\n{\n  pstoreu_common<Packet16uc>(to, from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr)    { EIGEN_PPC_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int>(const int*     addr)    { EIGEN_PPC_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { EIGEN_ALIGN16 float x; vec_ste(a, 0, &x); return x; }\ntemplate<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { EIGEN_ALIGN16 int   x; vec_ste(a, 0, &x); return x; }\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) pfirst_common(const Packet& a) {\n  EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) x;\n  vec_ste(a, 0, &x);\n  return x;\n}\n\ntemplate<> EIGEN_STRONG_INLINE short int pfirst<Packet8s>(const Packet8s& a) {\n  return pfirst_common<Packet8s>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned short int pfirst<Packet8us>(const Packet8us& a) {\n  return pfirst_common<Packet8us>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE signed char pfirst<Packet16c>(const Packet16c& a)\n{\n  return pfirst_common<Packet16c>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned char pfirst<Packet16uc>(const Packet16uc& a)\n{\n  return pfirst_common<Packet16uc>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)\n{\n  return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)\n{\n  return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8s preverse(const Packet8s& a)\n{\n  return reinterpret_cast<Packet8s>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE16));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8us preverse(const Packet8us& a)\n{\n  return reinterpret_cast<Packet8us>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE16));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16c preverse(const Packet16c& a)\n{\n  return vec_perm(a, a, p16uc_REVERSE8);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16uc preverse(const Packet16uc& a)\n{\n  return vec_perm(a, a, p16uc_REVERSE8);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf preverse(const Packet8bf& a)\n{\n  return preverse<Packet8us>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pabs(const Packet8s& a) { return vec_abs(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pabs(const Packet8us& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pabs(const Packet16c& a) { return vec_abs(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pabs(const Packet16uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8bf  pabs(const Packet8bf& a) {\n  _EIGEN_DECLARE_CONST_FAST_Packet8us(abs_mask,0x7FFF);\n  return pand<Packet8us>(p8us_abs_mask, a);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a)\n{ return vec_sra(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a)\n{ return vec_sr(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a)\n{ return vec_sl(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4f plogical_shift_left(const Packet4f& a)\n{\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);\n  Packet4ui r = vec_sl(reinterpret_cast<Packet4ui>(a), p4ui_mask);\n  return reinterpret_cast<Packet4f>(r);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4f plogical_shift_right(const Packet4f& a)\n{\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);\n  Packet4ui r = vec_sr(reinterpret_cast<Packet4ui>(a), p4ui_mask);\n  return reinterpret_cast<Packet4f>(r);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_right(const Packet4ui& a)\n{\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);\n  return vec_sr(a, p4ui_mask);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(const Packet4ui& a)\n{\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);\n  return vec_sl(a, p4ui_mask);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_left(const Packet8us& a)\n{\n  const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);\n  return vec_sl(a, p8us_mask);\n}\ntemplate<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_right(const Packet8us& a)\n{\n  const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);\n  return vec_sr(a, p8us_mask);\n}\n\nEIGEN_STRONG_INLINE Packet4f Bf16ToF32Even(const Packet8bf& bf){\n  return plogical_shift_left<16>(reinterpret_cast<Packet4f>(bf.m_val));\n}\n\nEIGEN_STRONG_INLINE Packet4f Bf16ToF32Odd(const Packet8bf& bf){\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);\n  return pand<Packet4f>(\n    reinterpret_cast<Packet4f>(bf.m_val),\n    reinterpret_cast<Packet4f>(p4ui_high_mask)\n  );\n}\n\n// Simple interleaving of bool masks, prevents true values from being\n// converted to NaNs.\nEIGEN_STRONG_INLINE Packet8bf F32ToBf16Bool(Packet4f even, Packet4f odd) {\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);\n  Packet4f bf_odd, bf_even;\n  bf_odd = pand(reinterpret_cast<Packet4f>(p4ui_high_mask), odd);\n  bf_even = plogical_shift_right<16>(even);\n  return reinterpret_cast<Packet8us>(por<Packet4f>(bf_even, bf_odd));\n}\n\nEIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f){\n  Packet4ui input = reinterpret_cast<Packet4ui>(p4f);\n  Packet4ui lsb = plogical_shift_right<16>(input);\n  lsb = pand<Packet4ui>(lsb, reinterpret_cast<Packet4ui>(p4i_ONE));\n\n  _EIGEN_DECLARE_CONST_FAST_Packet4ui(BIAS,0x7FFFu);\n  Packet4ui rounding_bias = padd<Packet4ui>(lsb, p4ui_BIAS);\n  input = padd<Packet4ui>(input, rounding_bias);\n\n  //Test NaN and Subnormal - Begin\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(exp_mask, 0x7F800000);\n  Packet4ui exp = pand<Packet4ui>(p4ui_exp_mask, reinterpret_cast<Packet4ui>(p4f));\n\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mantissa_mask, 0x7FFFFF);\n  Packet4ui mantissa = pand<Packet4ui>(p4ui_mantissa_mask, reinterpret_cast<Packet4ui>(p4f));\n\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(max_exp, 0x7F800000);\n  Packet4bi is_max_exp = vec_cmpeq(exp, p4ui_max_exp);\n  Packet4bi is_zero_exp = vec_cmpeq(exp, reinterpret_cast<Packet4ui>(p4i_ZERO));\n\n  Packet4bi is_mant_zero = vec_cmpeq(mantissa, reinterpret_cast<Packet4ui>(p4i_ZERO));\n  Packet4ui nan_selector = pandnot<Packet4ui>(\n      reinterpret_cast<Packet4ui>(is_max_exp),\n      reinterpret_cast<Packet4ui>(is_mant_zero)\n  );\n\n  Packet4ui subnormal_selector = pandnot<Packet4ui>(\n      reinterpret_cast<Packet4ui>(is_zero_exp),\n      reinterpret_cast<Packet4ui>(is_mant_zero)\n  );\n\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);\n  input = vec_sel(input, p4ui_nan, nan_selector);\n  input = vec_sel(input, reinterpret_cast<Packet4ui>(p4f), subnormal_selector);\n  //Test NaN and Subnormal - End\n\n  input = plogical_shift_right<16>(input);\n  return reinterpret_cast<Packet8us>(input);\n}\n\nEIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f even, Packet4f odd){\n  Packet4f bf_odd, bf_even;\n  bf_odd = reinterpret_cast<Packet4f>(F32ToBf16(odd).m_val);\n  bf_odd = plogical_shift_left<16>(bf_odd);\n  bf_even = reinterpret_cast<Packet4f>(F32ToBf16(even).m_val);\n  return reinterpret_cast<Packet8us>(por<Packet4f>(bf_even, bf_odd));\n}\n#define BF16_TO_F32_UNARY_OP_WRAPPER(OP, A) \\\n  Packet4f a_even = Bf16ToF32Even(A);\\\n  Packet4f a_odd = Bf16ToF32Odd(A);\\\n  Packet4f op_even = OP(a_even);\\\n  Packet4f op_odd = OP(a_odd);\\\n  return F32ToBf16(op_even, op_odd);\\\n\n#define BF16_TO_F32_BINARY_OP_WRAPPER(OP, A, B) \\\n  Packet4f a_even = Bf16ToF32Even(A);\\\n  Packet4f a_odd = Bf16ToF32Odd(A);\\\n  Packet4f b_even = Bf16ToF32Even(B);\\\n  Packet4f b_odd = Bf16ToF32Odd(B);\\\n  Packet4f op_even = OP(a_even, b_even);\\\n  Packet4f op_odd = OP(a_odd, b_odd);\\\n  return F32ToBf16(op_even, op_odd);\\\n\n#define BF16_TO_F32_BINARY_OP_WRAPPER_BOOL(OP, A, B) \\\n  Packet4f a_even = Bf16ToF32Even(A);\\\n  Packet4f a_odd = Bf16ToF32Odd(A);\\\n  Packet4f b_even = Bf16ToF32Even(B);\\\n  Packet4f b_odd = Bf16ToF32Odd(B);\\\n  Packet4f op_even = OP(a_even, b_even);\\\n  Packet4f op_odd = OP(a_odd, b_odd);\\\n  return F32ToBf16Bool(op_even, op_odd);\\\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf padd<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER(padd<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pmul<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER(pmul<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pdiv<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER(pdiv<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pnegate<Packet8bf>(const Packet8bf& a) {\n  BF16_TO_F32_UNARY_OP_WRAPPER(pnegate<Packet4f>, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {\n  return pldexp_generic(a,exponent);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pldexp<Packet8bf> (const Packet8bf& a, const Packet8bf& exponent){\n  BF16_TO_F32_BINARY_OP_WRAPPER(pldexp<Packet4f>, a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {\n  return pfrexp_generic(a,exponent);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pfrexp<Packet8bf> (const Packet8bf& a, Packet8bf& e){\n  Packet4f a_even = Bf16ToF32Even(a);\n  Packet4f a_odd = Bf16ToF32Odd(a);\n  Packet4f e_even;\n  Packet4f e_odd;\n  Packet4f op_even = pfrexp<Packet4f>(a_even, e_even);\n  Packet4f op_odd = pfrexp<Packet4f>(a_odd, e_odd);\n  e = F32ToBf16(e_even, e_odd);\n  return F32ToBf16(op_even, op_odd);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf psin<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(psin_float, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcos<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(pcos_float, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf plog<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(plog_float, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pfloor<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(pfloor<Packet4f>, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pceil<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(pceil<Packet4f>, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf> (const Packet8bf& a){\n  BF16_TO_F32_UNARY_OP_WRAPPER(print<Packet4f>, a);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) {\n  Packet4f a_even = Bf16ToF32Even(a);\n  Packet4f a_odd = Bf16ToF32Odd(a);\n  Packet4f b_even = Bf16ToF32Even(b);\n  Packet4f b_odd = Bf16ToF32Odd(b);\n  Packet4f c_even = Bf16ToF32Even(c);\n  Packet4f c_odd = Bf16ToF32Odd(c);\n  Packet4f pmadd_even = pmadd<Packet4f>(a_even, b_even, c_even);\n  Packet4f pmadd_odd = pmadd<Packet4f>(a_odd, b_odd, c_odd);\n  return F32ToBf16(pmadd_even, pmadd_odd);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pmin<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER(pmin<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pmax<Packet8bf>(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER(pmax<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_lt(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER_BOOL(pcmp_lt<Packet4f>, a, b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_lt_or_nan(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER_BOOL(pcmp_lt_or_nan<Packet4f>, a, b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_le(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER_BOOL(pcmp_le<Packet4f>, a, b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcmp_eq(const Packet8bf& a, const Packet8bf& b) {\n  BF16_TO_F32_BINARY_OP_WRAPPER_BOOL(pcmp_eq<Packet4f>, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf& a) {\n  return Eigen::bfloat16_impl::raw_uint16_to_bfloat16((pfirst<Packet8us>(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf ploaddup<Packet8bf>(const  bfloat16*     from)\n{\n  return ploaddup<Packet8us>(reinterpret_cast<const unsigned short int*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf plset<Packet8bf>(const bfloat16& a) {\n  bfloat16 countdown[8] = { bfloat16(0), bfloat16(1), bfloat16(2), bfloat16(3),\n                            bfloat16(4), bfloat16(5), bfloat16(6), bfloat16(7) };\n  return padd<Packet8bf>(pset1<Packet8bf>(a), pload<Packet8bf>(countdown));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)\n{\n  Packet4f b, sum;\n  b   = vec_sld(a, a, 8);\n  sum = a + b;\n  b   = vec_sld(sum, sum, 4);\n  sum += b;\n  return pfirst(sum);\n}\n\ntemplate<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)\n{\n  Packet4i sum;\n  sum = vec_sums(a, p4i_ZERO);\n#ifdef _BIG_ENDIAN\n  sum = vec_sld(sum, p4i_ZERO, 12);\n#else\n  sum = vec_sld(p4i_ZERO, sum, 4);\n#endif\n  return pfirst(sum);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux<Packet8bf>(const Packet8bf& a)\n{\n  float redux_even = predux<Packet4f>(Bf16ToF32Even(a));\n  float redux_odd  = predux<Packet4f>(Bf16ToF32Odd(a));\n  float f32_result = redux_even + redux_odd;\n  return bfloat16(f32_result);\n}\ntemplate<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_size8(const Packet& a)\n{\n  union{\n    Packet v;\n    __UNPACK_TYPE__(Packet) n[8];\n  } vt;\n  vt.v = a;\n\n  EIGEN_ALIGN16 int first_loader[4] = { vt.n[0], vt.n[1], vt.n[2], vt.n[3] };\n  EIGEN_ALIGN16 int second_loader[4] = { vt.n[4], vt.n[5], vt.n[6], vt.n[7] };\n  Packet4i first_half  = pload<Packet4i>(first_loader);\n  Packet4i second_half = pload<Packet4i>(second_loader);\n\n  return static_cast<__UNPACK_TYPE__(Packet)>(predux(first_half) + predux(second_half));\n}\n\ntemplate<> EIGEN_STRONG_INLINE short int predux<Packet8s>(const Packet8s& a)\n{\n  return predux_size8<Packet8s>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned short int predux<Packet8us>(const Packet8us& a)\n{\n  return predux_size8<Packet8us>(a);\n}\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_size16(const Packet& a)\n{\n  union{\n    Packet v;\n    __UNPACK_TYPE__(Packet) n[16];\n  } vt;\n  vt.v = a;\n\n  EIGEN_ALIGN16 int first_loader[4] = { vt.n[0], vt.n[1], vt.n[2], vt.n[3] };\n  EIGEN_ALIGN16 int second_loader[4] = { vt.n[4], vt.n[5], vt.n[6], vt.n[7] };\n  EIGEN_ALIGN16 int third_loader[4] = { vt.n[8], vt.n[9], vt.n[10], vt.n[11] };\n  EIGEN_ALIGN16 int fourth_loader[4] = { vt.n[12], vt.n[13], vt.n[14], vt.n[15] };\n\n  Packet4i first_quarter = pload<Packet4i>(first_loader);\n  Packet4i second_quarter = pload<Packet4i>(second_loader);\n  Packet4i third_quarter = pload<Packet4i>(third_loader);\n  Packet4i fourth_quarter = pload<Packet4i>(fourth_loader);\n\n  return static_cast<__UNPACK_TYPE__(Packet)>(predux(first_quarter) + predux(second_quarter)\n\t\t                  + predux(third_quarter) + predux(fourth_quarter));\n}\n\ntemplate<> EIGEN_STRONG_INLINE signed char predux<Packet16c>(const Packet16c& a)\n{\n  return predux_size16<Packet16c>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned char predux<Packet16uc>(const Packet16uc& a)\n{\n  return predux_size16<Packet16uc>(a);\n}\n\n// Other reduction functions:\n// mul\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)\n{\n  Packet4f prod;\n  prod = pmul(a, vec_sld(a, a, 8));\n  return pfirst(pmul(prod, vec_sld(prod, prod, 4)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)\n{\n  EIGEN_ALIGN16 int aux[4];\n  pstore(aux, a);\n  return aux[0] * aux[1] * aux[2] * aux[3];\n}\n\ntemplate<> EIGEN_STRONG_INLINE short int predux_mul<Packet8s>(const Packet8s& a)\n{\n  Packet8s pair, quad, octo;\n\n  pair = vec_mul(a, vec_sld(a, a, 8));\n  quad = vec_mul(pair, vec_sld(pair, pair, 4));\n  octo = vec_mul(quad, vec_sld(quad, quad, 2));\n\n  return pfirst(octo);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned short int predux_mul<Packet8us>(const Packet8us& a)\n{\n  Packet8us pair, quad, octo;\n\n  pair = vec_mul(a, vec_sld(a, a, 8));\n  quad = vec_mul(pair, vec_sld(pair, pair, 4));\n  octo = vec_mul(quad, vec_sld(quad, quad, 2));\n\n  return pfirst(octo);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet8bf>(const Packet8bf& a)\n{\n  float redux_even = predux_mul<Packet4f>(Bf16ToF32Even(a));\n  float redux_odd  = predux_mul<Packet4f>(Bf16ToF32Odd(a));\n  float f32_result = redux_even * redux_odd;\n  return bfloat16(f32_result);\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE signed char predux_mul<Packet16c>(const Packet16c& a)\n{\n  Packet16c pair, quad, octo, result;\n\n  pair = vec_mul(a, vec_sld(a, a, 8));\n  quad = vec_mul(pair, vec_sld(pair, pair, 4));\n  octo = vec_mul(quad, vec_sld(quad, quad, 2));\n  result = vec_mul(octo, vec_sld(octo, octo, 1));\n\n  return pfirst(result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned char predux_mul<Packet16uc>(const Packet16uc& a)\n{\n  Packet16uc pair, quad, octo, result;\n\n  pair = vec_mul(a, vec_sld(a, a, 8));\n  quad = vec_mul(pair, vec_sld(pair, pair, 4));\n  octo = vec_mul(quad, vec_sld(quad, quad, 2));\n  result = vec_mul(octo, vec_sld(octo, octo, 1));\n\n  return pfirst(result);\n}\n\n// min\ntemplate<typename Packet> EIGEN_STRONG_INLINE\n__UNPACK_TYPE__(Packet) predux_min4(const Packet& a)\n{\n  Packet b, res;\n  b = vec_min(a, vec_sld(a, a, 8));\n  res = vec_min(b, vec_sld(b, b, 4));\n  return pfirst(res);\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)\n{\n  return predux_min4<Packet4f>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)\n{\n  return predux_min4<Packet4i>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_min<Packet8bf>(const Packet8bf& a)\n{\n  float redux_even = predux_min<Packet4f>(Bf16ToF32Even(a));\n  float redux_odd  = predux_min<Packet4f>(Bf16ToF32Odd(a));\n  float f32_result = (std::min)(redux_even, redux_odd);\n  return bfloat16(f32_result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE short int predux_min<Packet8s>(const Packet8s& a)\n{\n  Packet8s pair, quad, octo;\n  \n  //pair = { Min(a0,a4), Min(a1,a5), Min(a2,a6), Min(a3,a7) }\n  pair = vec_min(a, vec_sld(a, a, 8)); \n\n  //quad = { Min(a0, a4, a2, a6), Min(a1, a5, a3, a7) }\n  quad = vec_min(pair, vec_sld(pair, pair, 4));\n\n  //octo = { Min(a0, a4, a2, a6, a1, a5, a3, a7) }\n  octo = vec_min(quad, vec_sld(quad, quad, 2));\n  return pfirst(octo);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned short int predux_min<Packet8us>(const Packet8us& a)\n{\n  Packet8us pair, quad, octo;\n  \n  //pair = { Min(a0,a4), Min(a1,a5), Min(a2,a6), Min(a3,a7) }\n  pair = vec_min(a, vec_sld(a, a, 8)); \n\n  //quad = { Min(a0, a4, a2, a6), Min(a1, a5, a3, a7) }\n  quad = vec_min(pair, vec_sld(pair, pair, 4));\n\n  //octo = { Min(a0, a4, a2, a6, a1, a5, a3, a7) }\n  octo = vec_min(quad, vec_sld(quad, quad, 2));\n  return pfirst(octo);\n}\n\ntemplate<> EIGEN_STRONG_INLINE signed char predux_min<Packet16c>(const Packet16c& a)\n{\n  Packet16c pair, quad, octo, result;\n\n  pair = vec_min(a, vec_sld(a, a, 8));\n  quad = vec_min(pair, vec_sld(pair, pair, 4));\n  octo = vec_min(quad, vec_sld(quad, quad, 2));\n  result = vec_min(octo, vec_sld(octo, octo, 1));\n\n  return pfirst(result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned char predux_min<Packet16uc>(const Packet16uc& a)\n{\n  Packet16uc pair, quad, octo, result;\n\n  pair = vec_min(a, vec_sld(a, a, 8));\n  quad = vec_min(pair, vec_sld(pair, pair, 4));\n  octo = vec_min(quad, vec_sld(quad, quad, 2));\n  result = vec_min(octo, vec_sld(octo, octo, 1));\n\n  return pfirst(result);\n}\n// max\ntemplate<typename Packet> EIGEN_STRONG_INLINE __UNPACK_TYPE__(Packet) predux_max4(const Packet& a)\n{\n  Packet b, res;\n  b = vec_max(a, vec_sld(a, a, 8));\n  res = vec_max(b, vec_sld(b, b, 4));\n  return pfirst(res);\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)\n{\n  return predux_max4<Packet4f>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)\n{\n  return predux_max4<Packet4i>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_max<Packet8bf>(const Packet8bf& a)\n{\n  float redux_even = predux_max<Packet4f>(Bf16ToF32Even(a));\n  float redux_odd  = predux_max<Packet4f>(Bf16ToF32Odd(a));\n  float f32_result = (std::max)(redux_even, redux_odd);\n  return bfloat16(f32_result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE short int predux_max<Packet8s>(const Packet8s& a)\n{\n  Packet8s pair, quad, octo;\n  \n  //pair = { Max(a0,a4), Max(a1,a5), Max(a2,a6), Max(a3,a7) }\n  pair = vec_max(a, vec_sld(a, a, 8)); \n\n  //quad = { Max(a0, a4, a2, a6), Max(a1, a5, a3, a7) }\n  quad = vec_max(pair, vec_sld(pair, pair, 4));\n\n  //octo = { Max(a0, a4, a2, a6, a1, a5, a3, a7) }\n  octo = vec_max(quad, vec_sld(quad, quad, 2));\n  return pfirst(octo);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned short int predux_max<Packet8us>(const Packet8us& a)\n{\n  Packet8us pair, quad, octo;\n  \n  //pair = { Max(a0,a4), Max(a1,a5), Max(a2,a6), Max(a3,a7) }\n  pair = vec_max(a, vec_sld(a, a, 8)); \n\n  //quad = { Max(a0, a4, a2, a6), Max(a1, a5, a3, a7) }\n  quad = vec_max(pair, vec_sld(pair, pair, 4));\n\n  //octo = { Max(a0, a4, a2, a6, a1, a5, a3, a7) }\n  octo = vec_max(quad, vec_sld(quad, quad, 2));\n  return pfirst(octo);\n}\n\ntemplate<> EIGEN_STRONG_INLINE signed char predux_max<Packet16c>(const Packet16c& a)\n{\n  Packet16c pair, quad, octo, result;\n\n  pair = vec_max(a, vec_sld(a, a, 8));\n  quad = vec_max(pair, vec_sld(pair, pair, 4));\n  octo = vec_max(quad, vec_sld(quad, quad, 2));\n  result = vec_max(octo, vec_sld(octo, octo, 1));\n\n  return pfirst(result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE unsigned char predux_max<Packet16uc>(const Packet16uc& a)\n{\n  Packet16uc pair, quad, octo, result;\n\n  pair = vec_max(a, vec_sld(a, a, 8));\n  quad = vec_max(pair, vec_sld(pair, pair, 4));\n  octo = vec_max(quad, vec_sld(quad, quad, 2));\n  result = vec_max(octo, vec_sld(octo, octo, 1));\n\n  return pfirst(result);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)\n{\n  return vec_any_ne(x, pzero(x));\n}\n\ntemplate <typename T> EIGEN_DEVICE_FUNC inline void\nptranpose_common(PacketBlock<T,4>& kernel){\n  T t0, t1, t2, t3;\n  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4f,4>& kernel) {\n  ptranpose_common<Packet4f>(kernel);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4i,4>& kernel) {\n  ptranpose_common<Packet4i>(kernel);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8s,4>& kernel) {\n  Packet8s t0, t1, t2, t3;\n  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8us,4>& kernel) {\n  Packet8us t0, t1, t2, t3;\n  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8bf,4>& kernel) {\n  Packet8us t0, t1, t2, t3;\n\n  t0 = vec_mergeh(kernel.packet[0].m_val, kernel.packet[2].m_val);\n  t1 = vec_mergel(kernel.packet[0].m_val, kernel.packet[2].m_val);\n  t2 = vec_mergeh(kernel.packet[1].m_val, kernel.packet[3].m_val);\n  t3 = vec_mergel(kernel.packet[1].m_val, kernel.packet[3].m_val);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet16c,4>& kernel) {\n  Packet16c t0, t1, t2, t3;\n  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet16uc,4>& kernel) {\n  Packet16uc t0, t1, t2, t3;\n  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8s,8>& kernel) {\n  Packet8s v[8], sum[8];\n\n  v[0] = vec_mergeh(kernel.packet[0], kernel.packet[4]);\n  v[1] = vec_mergel(kernel.packet[0], kernel.packet[4]);\n  v[2] = vec_mergeh(kernel.packet[1], kernel.packet[5]);\n  v[3] = vec_mergel(kernel.packet[1], kernel.packet[5]);\n  v[4] = vec_mergeh(kernel.packet[2], kernel.packet[6]);\n  v[5] = vec_mergel(kernel.packet[2], kernel.packet[6]);\n  v[6] = vec_mergeh(kernel.packet[3], kernel.packet[7]);\n  v[7] = vec_mergel(kernel.packet[3], kernel.packet[7]);\n  sum[0] = vec_mergeh(v[0], v[4]);\n  sum[1] = vec_mergel(v[0], v[4]);\n  sum[2] = vec_mergeh(v[1], v[5]);\n  sum[3] = vec_mergel(v[1], v[5]);\n  sum[4] = vec_mergeh(v[2], v[6]);\n  sum[5] = vec_mergel(v[2], v[6]);\n  sum[6] = vec_mergeh(v[3], v[7]);\n  sum[7] = vec_mergel(v[3], v[7]);\n\n  kernel.packet[0] = vec_mergeh(sum[0], sum[4]);\n  kernel.packet[1] = vec_mergel(sum[0], sum[4]);\n  kernel.packet[2] = vec_mergeh(sum[1], sum[5]);\n  kernel.packet[3] = vec_mergel(sum[1], sum[5]);\n  kernel.packet[4] = vec_mergeh(sum[2], sum[6]);\n  kernel.packet[5] = vec_mergel(sum[2], sum[6]);\n  kernel.packet[6] = vec_mergeh(sum[3], sum[7]);\n  kernel.packet[7] = vec_mergel(sum[3], sum[7]);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8us,8>& kernel) {\n  Packet8us v[8], sum[8];\n\n  v[0] = vec_mergeh(kernel.packet[0], kernel.packet[4]);\n  v[1] = vec_mergel(kernel.packet[0], kernel.packet[4]);\n  v[2] = vec_mergeh(kernel.packet[1], kernel.packet[5]);\n  v[3] = vec_mergel(kernel.packet[1], kernel.packet[5]);\n  v[4] = vec_mergeh(kernel.packet[2], kernel.packet[6]);\n  v[5] = vec_mergel(kernel.packet[2], kernel.packet[6]);\n  v[6] = vec_mergeh(kernel.packet[3], kernel.packet[7]);\n  v[7] = vec_mergel(kernel.packet[3], kernel.packet[7]);\n  sum[0] = vec_mergeh(v[0], v[4]);\n  sum[1] = vec_mergel(v[0], v[4]);\n  sum[2] = vec_mergeh(v[1], v[5]);\n  sum[3] = vec_mergel(v[1], v[5]);\n  sum[4] = vec_mergeh(v[2], v[6]);\n  sum[5] = vec_mergel(v[2], v[6]);\n  sum[6] = vec_mergeh(v[3], v[7]);\n  sum[7] = vec_mergel(v[3], v[7]);\n\n  kernel.packet[0] = vec_mergeh(sum[0], sum[4]);\n  kernel.packet[1] = vec_mergel(sum[0], sum[4]);\n  kernel.packet[2] = vec_mergeh(sum[1], sum[5]);\n  kernel.packet[3] = vec_mergel(sum[1], sum[5]);\n  kernel.packet[4] = vec_mergeh(sum[2], sum[6]);\n  kernel.packet[5] = vec_mergel(sum[2], sum[6]);\n  kernel.packet[6] = vec_mergeh(sum[3], sum[7]);\n  kernel.packet[7] = vec_mergel(sum[3], sum[7]);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet8bf,8>& kernel) {\n  Packet8bf v[8], sum[8];\n\n  v[0] = vec_mergeh(kernel.packet[0].m_val, kernel.packet[4].m_val);\n  v[1] = vec_mergel(kernel.packet[0].m_val, kernel.packet[4].m_val);\n  v[2] = vec_mergeh(kernel.packet[1].m_val, kernel.packet[5].m_val);\n  v[3] = vec_mergel(kernel.packet[1].m_val, kernel.packet[5].m_val);\n  v[4] = vec_mergeh(kernel.packet[2].m_val, kernel.packet[6].m_val);\n  v[5] = vec_mergel(kernel.packet[2].m_val, kernel.packet[6].m_val);\n  v[6] = vec_mergeh(kernel.packet[3].m_val, kernel.packet[7].m_val);\n  v[7] = vec_mergel(kernel.packet[3].m_val, kernel.packet[7].m_val);\n  sum[0] = vec_mergeh(v[0].m_val, v[4].m_val);\n  sum[1] = vec_mergel(v[0].m_val, v[4].m_val);\n  sum[2] = vec_mergeh(v[1].m_val, v[5].m_val);\n  sum[3] = vec_mergel(v[1].m_val, v[5].m_val);\n  sum[4] = vec_mergeh(v[2].m_val, v[6].m_val);\n  sum[5] = vec_mergel(v[2].m_val, v[6].m_val);\n  sum[6] = vec_mergeh(v[3].m_val, v[7].m_val);\n  sum[7] = vec_mergel(v[3].m_val, v[7].m_val);\n\n  kernel.packet[0] = vec_mergeh(sum[0].m_val, sum[4].m_val);\n  kernel.packet[1] = vec_mergel(sum[0].m_val, sum[4].m_val);\n  kernel.packet[2] = vec_mergeh(sum[1].m_val, sum[5].m_val);\n  kernel.packet[3] = vec_mergel(sum[1].m_val, sum[5].m_val);\n  kernel.packet[4] = vec_mergeh(sum[2].m_val, sum[6].m_val);\n  kernel.packet[5] = vec_mergel(sum[2].m_val, sum[6].m_val);\n  kernel.packet[6] = vec_mergeh(sum[3].m_val, sum[7].m_val);\n  kernel.packet[7] = vec_mergel(sum[3].m_val, sum[7].m_val);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet16c,16>& kernel) {\n  Packet16c step1[16], step2[16], step3[16];\n\n  step1[0] = vec_mergeh(kernel.packet[0], kernel.packet[8]);\n  step1[1] = vec_mergel(kernel.packet[0], kernel.packet[8]);\n  step1[2] = vec_mergeh(kernel.packet[1], kernel.packet[9]);\n  step1[3] = vec_mergel(kernel.packet[1], kernel.packet[9]);\n  step1[4] = vec_mergeh(kernel.packet[2], kernel.packet[10]);\n  step1[5] = vec_mergel(kernel.packet[2], kernel.packet[10]);\n  step1[6] = vec_mergeh(kernel.packet[3], kernel.packet[11]);\n  step1[7] = vec_mergel(kernel.packet[3], kernel.packet[11]);\n  step1[8] = vec_mergeh(kernel.packet[4], kernel.packet[12]);\n  step1[9] = vec_mergel(kernel.packet[4], kernel.packet[12]);\n  step1[10] = vec_mergeh(kernel.packet[5], kernel.packet[13]);\n  step1[11] = vec_mergel(kernel.packet[5], kernel.packet[13]);\n  step1[12] = vec_mergeh(kernel.packet[6], kernel.packet[14]);\n  step1[13] = vec_mergel(kernel.packet[6], kernel.packet[14]);\n  step1[14] = vec_mergeh(kernel.packet[7], kernel.packet[15]);\n  step1[15] = vec_mergel(kernel.packet[7], kernel.packet[15]);\n\n  step2[0]  = vec_mergeh(step1[0], step1[8]);\n  step2[1]  = vec_mergel(step1[0], step1[8]);\n  step2[2]  = vec_mergeh(step1[1], step1[9]);\n  step2[3]  = vec_mergel(step1[1], step1[9]);\n  step2[4]  = vec_mergeh(step1[2], step1[10]);\n  step2[5]  = vec_mergel(step1[2], step1[10]);\n  step2[6]  = vec_mergeh(step1[3], step1[11]);\n  step2[7]  = vec_mergel(step1[3], step1[11]);\n  step2[8]  = vec_mergeh(step1[4], step1[12]);\n  step2[9]  = vec_mergel(step1[4], step1[12]);\n  step2[10] = vec_mergeh(step1[5], step1[13]);\n  step2[11] = vec_mergel(step1[5], step1[13]);\n  step2[12] = vec_mergeh(step1[6], step1[14]);\n  step2[13] = vec_mergel(step1[6], step1[14]);\n  step2[14] = vec_mergeh(step1[7], step1[15]);\n  step2[15] = vec_mergel(step1[7], step1[15]);\n\n  step3[0]  = vec_mergeh(step2[0], step2[8]);\n  step3[1]  = vec_mergel(step2[0], step2[8]);\n  step3[2]  = vec_mergeh(step2[1], step2[9]);\n  step3[3]  = vec_mergel(step2[1], step2[9]);\n  step3[4]  = vec_mergeh(step2[2], step2[10]);\n  step3[5]  = vec_mergel(step2[2], step2[10]);\n  step3[6]  = vec_mergeh(step2[3], step2[11]);\n  step3[7]  = vec_mergel(step2[3], step2[11]);\n  step3[8]  = vec_mergeh(step2[4], step2[12]);\n  step3[9]  = vec_mergel(step2[4], step2[12]);\n  step3[10] = vec_mergeh(step2[5], step2[13]);\n  step3[11] = vec_mergel(step2[5], step2[13]);\n  step3[12] = vec_mergeh(step2[6], step2[14]);\n  step3[13] = vec_mergel(step2[6], step2[14]);\n  step3[14] = vec_mergeh(step2[7], step2[15]);\n  step3[15] = vec_mergel(step2[7], step2[15]);\n\n  kernel.packet[0]  = vec_mergeh(step3[0], step3[8]);\n  kernel.packet[1]  = vec_mergel(step3[0], step3[8]);\n  kernel.packet[2]  = vec_mergeh(step3[1], step3[9]);\n  kernel.packet[3]  = vec_mergel(step3[1], step3[9]);\n  kernel.packet[4]  = vec_mergeh(step3[2], step3[10]);\n  kernel.packet[5]  = vec_mergel(step3[2], step3[10]);\n  kernel.packet[6]  = vec_mergeh(step3[3], step3[11]);\n  kernel.packet[7]  = vec_mergel(step3[3], step3[11]);\n  kernel.packet[8]  = vec_mergeh(step3[4], step3[12]);\n  kernel.packet[9]  = vec_mergel(step3[4], step3[12]);\n  kernel.packet[10] = vec_mergeh(step3[5], step3[13]);\n  kernel.packet[11] = vec_mergel(step3[5], step3[13]);\n  kernel.packet[12] = vec_mergeh(step3[6], step3[14]);\n  kernel.packet[13] = vec_mergel(step3[6], step3[14]);\n  kernel.packet[14] = vec_mergeh(step3[7], step3[15]);\n  kernel.packet[15] = vec_mergel(step3[7], step3[15]);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet16uc,16>& kernel) {\n  Packet16uc step1[16], step2[16], step3[16];\n\n  step1[0] = vec_mergeh(kernel.packet[0], kernel.packet[8]);\n  step1[1] = vec_mergel(kernel.packet[0], kernel.packet[8]);\n  step1[2] = vec_mergeh(kernel.packet[1], kernel.packet[9]);\n  step1[3] = vec_mergel(kernel.packet[1], kernel.packet[9]);\n  step1[4] = vec_mergeh(kernel.packet[2], kernel.packet[10]);\n  step1[5] = vec_mergel(kernel.packet[2], kernel.packet[10]);\n  step1[6] = vec_mergeh(kernel.packet[3], kernel.packet[11]);\n  step1[7] = vec_mergel(kernel.packet[3], kernel.packet[11]);\n  step1[8] = vec_mergeh(kernel.packet[4], kernel.packet[12]);\n  step1[9] = vec_mergel(kernel.packet[4], kernel.packet[12]);\n  step1[10] = vec_mergeh(kernel.packet[5], kernel.packet[13]);\n  step1[11] = vec_mergel(kernel.packet[5], kernel.packet[13]);\n  step1[12] = vec_mergeh(kernel.packet[6], kernel.packet[14]);\n  step1[13] = vec_mergel(kernel.packet[6], kernel.packet[14]);\n  step1[14] = vec_mergeh(kernel.packet[7], kernel.packet[15]);\n  step1[15] = vec_mergel(kernel.packet[7], kernel.packet[15]);\n\n  step2[0]  = vec_mergeh(step1[0], step1[8]);\n  step2[1]  = vec_mergel(step1[0], step1[8]);\n  step2[2]  = vec_mergeh(step1[1], step1[9]);\n  step2[3]  = vec_mergel(step1[1], step1[9]);\n  step2[4]  = vec_mergeh(step1[2], step1[10]);\n  step2[5]  = vec_mergel(step1[2], step1[10]);\n  step2[6]  = vec_mergeh(step1[3], step1[11]);\n  step2[7]  = vec_mergel(step1[3], step1[11]);\n  step2[8]  = vec_mergeh(step1[4], step1[12]);\n  step2[9]  = vec_mergel(step1[4], step1[12]);\n  step2[10] = vec_mergeh(step1[5], step1[13]);\n  step2[11] = vec_mergel(step1[5], step1[13]);\n  step2[12] = vec_mergeh(step1[6], step1[14]);\n  step2[13] = vec_mergel(step1[6], step1[14]);\n  step2[14] = vec_mergeh(step1[7], step1[15]);\n  step2[15] = vec_mergel(step1[7], step1[15]);\n\n  step3[0]  = vec_mergeh(step2[0], step2[8]);\n  step3[1]  = vec_mergel(step2[0], step2[8]);\n  step3[2]  = vec_mergeh(step2[1], step2[9]);\n  step3[3]  = vec_mergel(step2[1], step2[9]);\n  step3[4]  = vec_mergeh(step2[2], step2[10]);\n  step3[5]  = vec_mergel(step2[2], step2[10]);\n  step3[6]  = vec_mergeh(step2[3], step2[11]);\n  step3[7]  = vec_mergel(step2[3], step2[11]);\n  step3[8]  = vec_mergeh(step2[4], step2[12]);\n  step3[9]  = vec_mergel(step2[4], step2[12]);\n  step3[10] = vec_mergeh(step2[5], step2[13]);\n  step3[11] = vec_mergel(step2[5], step2[13]);\n  step3[12] = vec_mergeh(step2[6], step2[14]);\n  step3[13] = vec_mergel(step2[6], step2[14]);\n  step3[14] = vec_mergeh(step2[7], step2[15]);\n  step3[15] = vec_mergel(step2[7], step2[15]);\n\n  kernel.packet[0]  = vec_mergeh(step3[0], step3[8]);\n  kernel.packet[1]  = vec_mergel(step3[0], step3[8]);\n  kernel.packet[2]  = vec_mergeh(step3[1], step3[9]);\n  kernel.packet[3]  = vec_mergel(step3[1], step3[9]);\n  kernel.packet[4]  = vec_mergeh(step3[2], step3[10]);\n  kernel.packet[5]  = vec_mergel(step3[2], step3[10]);\n  kernel.packet[6]  = vec_mergeh(step3[3], step3[11]);\n  kernel.packet[7]  = vec_mergel(step3[3], step3[11]);\n  kernel.packet[8]  = vec_mergeh(step3[4], step3[12]);\n  kernel.packet[9]  = vec_mergel(step3[4], step3[12]);\n  kernel.packet[10] = vec_mergeh(step3[5], step3[13]);\n  kernel.packet[11] = vec_mergel(step3[5], step3[13]);\n  kernel.packet[12] = vec_mergeh(step3[6], step3[14]);\n  kernel.packet[13] = vec_mergel(step3[6], step3[14]);\n  kernel.packet[14] = vec_mergeh(step3[7], step3[15]);\n  kernel.packet[15] = vec_mergel(step3[7], step3[15]);\n}\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE\nPacket pblend4(const Selector<4>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {\n  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };\n  Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {\n  return pblend4<Packet4i>(ifPacket, thenPacket, elsePacket);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {\n  return pblend4<Packet4f>(ifPacket, thenPacket, elsePacket);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8s pblend(const Selector<8>& ifPacket, const Packet8s& thenPacket, const Packet8s& elsePacket) {\n  Packet8us select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3],\n                       ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7] };\n  Packet8us mask = reinterpret_cast<Packet8us>(vec_cmpeq(select, p8us_ONE));\n  Packet8s result = vec_sel(elsePacket, thenPacket, mask);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8us pblend(const Selector<8>& ifPacket, const Packet8us& thenPacket, const Packet8us& elsePacket) {\n  Packet8us select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3],\n                       ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7] };\n  Packet8us mask = reinterpret_cast<Packet8us>(vec_cmpeq(reinterpret_cast<Packet8us>(select), p8us_ONE));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pblend(const Selector<8>& ifPacket, const Packet8bf& thenPacket, const Packet8bf& elsePacket) {\n  return pblend<Packet8us>(ifPacket, thenPacket, elsePacket);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16c pblend(const Selector<16>& ifPacket, const Packet16c& thenPacket, const Packet16c& elsePacket) {\n  Packet16uc select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3],\n                       ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7],\n                       ifPacket.select[8], ifPacket.select[9], ifPacket.select[10], ifPacket.select[11],\n                       ifPacket.select[12], ifPacket.select[13], ifPacket.select[14], ifPacket.select[15] };\n\n  Packet16uc mask = reinterpret_cast<Packet16uc>(vec_cmpeq(reinterpret_cast<Packet16uc>(select), p16uc_ONE));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pblend(const Selector<16>& ifPacket, const Packet16uc& thenPacket, const Packet16uc& elsePacket) {\n  Packet16uc select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3],\n                       ifPacket.select[4], ifPacket.select[5], ifPacket.select[6], ifPacket.select[7],\n                       ifPacket.select[8], ifPacket.select[9], ifPacket.select[10], ifPacket.select[11],\n                       ifPacket.select[12], ifPacket.select[13], ifPacket.select[14], ifPacket.select[15] };\n\n  Packet16uc mask = reinterpret_cast<Packet16uc>(vec_cmpeq(reinterpret_cast<Packet16uc>(select), p16uc_ONE));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\ntemplate <>\nstruct type_casting_traits<float, int> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<int, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<bfloat16, unsigned short int> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<unsigned short int, bfloat16> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {\n  return vec_cts(a,0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4f, Packet4ui>(const Packet4f& a) {\n  return vec_ctu(a,0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {\n  return vec_ctf(a,0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4ui, Packet4f>(const Packet4ui& a) {\n  return vec_ctf(a,0);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8bf, Packet8us>(const Packet8bf& a) {\n  Packet4f float_even = Bf16ToF32Even(a);\n  Packet4f float_odd = Bf16ToF32Odd(a);\n  Packet4ui int_even = pcast<Packet4f, Packet4ui>(float_even);\n  Packet4ui int_odd = pcast<Packet4f, Packet4ui>(float_odd);\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);\n  Packet4ui low_even = pand<Packet4ui>(int_even, p4ui_low_mask);\n  Packet4ui low_odd = pand<Packet4ui>(int_odd, p4ui_low_mask);\n\n  //Check values that are bigger than USHRT_MAX (0xFFFF)\n  Packet4bi overflow_selector;\n  if(vec_any_gt(int_even, p4ui_low_mask)){\n    overflow_selector = vec_cmpgt(int_even, p4ui_low_mask);\n    low_even = vec_sel(low_even, p4ui_low_mask, overflow_selector);\n  }\n  if(vec_any_gt(int_odd, p4ui_low_mask)){\n    overflow_selector = vec_cmpgt(int_odd, p4ui_low_mask);\n    low_odd = vec_sel(low_even, p4ui_low_mask, overflow_selector);\n  }\n\n  low_odd = plogical_shift_left<16>(low_odd);\n\n  Packet4ui int_final = por<Packet4ui>(low_even, low_odd);\n  return reinterpret_cast<Packet8us>(int_final);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet8us, Packet8bf>(const Packet8us& a) {\n  //short -> int -> float -> bfloat16\n  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);\n  Packet4ui int_cast = reinterpret_cast<Packet4ui>(a);\n  Packet4ui int_even = pand<Packet4ui>(int_cast, p4ui_low_mask);\n  Packet4ui int_odd = plogical_shift_right<16>(int_cast);\n  Packet4f float_even = pcast<Packet4ui, Packet4f>(int_even);\n  Packet4f float_odd = pcast<Packet4ui, Packet4f>(int_odd);\n  return F32ToBf16(float_even, float_odd);\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {\n  return reinterpret_cast<Packet4i>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Packet4i& a) {\n  return reinterpret_cast<Packet4f>(a);\n}\n\n\n\n//---------- double ----------\n#ifdef __VSX__\ntypedef __vector double              Packet2d;\ntypedef __vector unsigned long long  Packet2ul;\ntypedef __vector long long           Packet2l;\n#if EIGEN_COMP_CLANG\ntypedef Packet2ul                    Packet2bl;\n#else\ntypedef __vector __bool long         Packet2bl;\n#endif\n\nstatic Packet2l  p2l_ONE  = { 1, 1 };\nstatic Packet2l  p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);\nstatic Packet2ul p2ul_SIGN = { 0x8000000000000000ull, 0x8000000000000000ull };\nstatic Packet2ul p2ul_PREV0DOT5 = { 0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull };\nstatic Packet2d  p2d_ONE  = { 1.0, 1.0 };\nstatic Packet2d  p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);\nstatic Packet2d  p2d_MZERO = { numext::bit_cast<double>(0x8000000000000000ull),\n                               numext::bit_cast<double>(0x8000000000000000ull) };\n\n#ifdef _BIG_ENDIAN\nstatic Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));\n#else\nstatic Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));\n#endif\n\ntemplate<int index> Packet2d vec_splat_dbl(Packet2d& a)\n{\n  return vec_splat(a, index);\n}\n\ntemplate<> struct packet_traits<double> : default_packet_traits\n{\n  typedef Packet2d type;\n  typedef Packet2d half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=2,\n    HasHalfPacket = 1,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 1,\n    HasMin  = 1,\n    HasMax  = 1,\n    HasAbs  = 1,\n    HasSin  = 0,\n    HasCos  = 0,\n    HasLog  = 0,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n    HasNegate = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };\n\ninline std::ostream & operator <<(std::ostream & s, const Packet2l & v)\n{\n  union {\n    Packet2l   v;\n    int64_t n[2];\n  } vt;\n  vt.v = v;\n  s << vt.n[0] << \", \" << vt.n[1];\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet2d & v)\n{\n  union {\n    Packet2d   v;\n    double n[2];\n  } vt;\n  vt.v = v;\n  s << vt.n[0] << \", \" << vt.n[1];\n  return s;\n}\n\n// Need to define them first or we get specialization after instantiation errors\ntemplate<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)\n{\n  EIGEN_DEBUG_ALIGNED_LOAD\n  return vec_xl(0, const_cast<double *>(from)); // cast needed by Clang\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)\n{\n  EIGEN_DEBUG_ALIGNED_STORE\n  vec_xst(from, 0, to);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double&  from) {\n  Packet2d v = {from, from};\n  return v;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(unsigned long from) {\n  Packet2l v = {static_cast<long long>(from), static_cast<long long>(from)};\n  return reinterpret_cast<Packet2d>(v);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet2d>(const double *a,\n                      Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)\n{\n  //This way is faster than vec_splat (at least for doubles in Power 9)\n  a0 = pset1<Packet2d>(a[0]);\n  a1 = pset1<Packet2d>(a[1]);\n  a2 = pset1<Packet2d>(a[2]);\n  a3 = pset1<Packet2d>(a[3]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)\n{\n  EIGEN_ALIGN16 double af[2];\n  af[0] = from[0*stride];\n  af[1] = from[1*stride];\n return pload<Packet2d>(af);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)\n{\n  EIGEN_ALIGN16 double af[2];\n  pstore<double>(af, from);\n  to[0*stride] = af[0];\n  to[1*stride] = af[1];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }\n\n// for some weird raisons, it has to be overloaded for packet of integers\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)\n{\n  // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN\n  Packet2d ret;\n  __asm__ (\"xvcmpgedp %x0,%x1,%x2\\n\\txxsel %x0,%x1,%x2,%x0\" : \"=&wa\" (ret) : \"wa\" (a), \"wa\" (b));\n  return ret;\n }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)\n{\n  // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN\n  Packet2d ret;\n  __asm__ (\"xvcmpgtdp %x0,%x2,%x1\\n\\txxsel %x0,%x1,%x2,%x0\" : \"=&wa\" (ret) : \"wa\" (a), \"wa\" (b));\n  return ret;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return reinterpret_cast<Packet2d>(vec_cmple(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return reinterpret_cast<Packet2d>(vec_cmplt(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return reinterpret_cast<Packet2d>(vec_cmpeq(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) {\n  Packet2d c = reinterpret_cast<Packet2d>(vec_cmpge(a,b));\n  return vec_nor(c,c);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a)\n{\n    Packet2d t = vec_add(reinterpret_cast<Packet2d>(vec_or(vec_and(reinterpret_cast<Packet2ul>(a), p2ul_SIGN), p2ul_PREV0DOT5)), a);\n    Packet2d res;\n\n    __asm__(\"xvrdpiz %x0, %x1\\n\\t\"\n        : \"=&wa\" (res)\n        : \"wa\" (t));\n\n    return res;\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const  Packet2d& a) { return vec_ceil(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d print<Packet2d>(const Packet2d& a)\n{\n    Packet2d res;\n\n    __asm__(\"xvrdpic %x0, %x1\\n\\t\"\n        : \"=&wa\" (res)\n        : \"wa\" (a));\n\n    return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)\n{\n  EIGEN_DEBUG_UNALIGNED_LOAD\n  return vec_xl(0, const_cast<double*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*   from)\n{\n  Packet2d p;\n  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet2d>(from);\n  else                                  p = ploadu<Packet2d>(from);\n  return vec_splat_dbl<0>(p);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d& from)\n{\n  EIGEN_DEBUG_UNALIGNED_STORE\n  vec_xst(from, 0, to);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE double  pfirst<Packet2d>(const Packet2d& a) { EIGEN_ALIGN16 double x[2]; pstore<double>(x, a); return x[0]; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)\n{\n  return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }\n\n// VSX support varies between different compilers and even different\n// versions of the same compiler.  For gcc version >= 4.9.3, we can use\n// vec_cts to efficiently convert Packet2d to Packet2l.  Otherwise, use\n// a slow version that works with older compilers. \n// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles\n// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963\ntemplate<>\ninline Packet2l pcast<Packet2d, Packet2l>(const Packet2d& x) {\n#if EIGEN_GNUC_AT_LEAST(5, 4) || \\\n    (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)\n  return vec_cts(x, 0);    // TODO: check clang version.\n#else\n  double tmp[2];\n  memcpy(tmp, &x, sizeof(tmp));\n  Packet2l l = { static_cast<long long>(tmp[0]),\n                 static_cast<long long>(tmp[1]) };\n  return l;\n#endif\n}\n\ntemplate<>\ninline Packet2d pcast<Packet2l, Packet2d>(const Packet2l& x) {\n  unsigned long long tmp[2];\n  memcpy(tmp, &x, sizeof(tmp));\n  Packet2d d = { static_cast<double>(tmp[0]),\n                 static_cast<double>(tmp[1]) };\n  return d;\n}\n\n\n// Packet2l shifts.\n// For POWER8 we simply use vec_sr/l. \n//\n// Things are more complicated for POWER7. There is actually a\n// vec_xxsxdi intrinsic but it is not supported by some gcc versions.\n// So we need to shift by N % 32 and rearrage bytes.\n#ifdef __POWER8_VECTOR__\n\ntemplate<int N>\nEIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {\n  const Packet2ul shift = { N, N };\n  return vec_sl(a, shift); \n}\n\ntemplate<int N>\nEIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {\n  const Packet2ul shift = { N, N };\n  return vec_sr(a, shift); \n}\n\n#else\n\n// Shifts [A, B, C, D] to [B, 0, D, 0].\n// Used to implement left shifts for Packet2l.\nEIGEN_ALWAYS_INLINE Packet4i shift_even_left(const Packet4i& a) {\n  static const Packet16uc perm = {\n      0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03, \n      0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };\n  #ifdef  _BIG_ENDIAN\n    return vec_perm(p4i_ZERO, a, perm);\n  #else\n    return vec_perm(a, p4i_ZERO, perm);\n  #endif\n}\n\n// Shifts [A, B, C, D] to [0, A, 0, C].\n// Used to implement right shifts for Packet2l.\nEIGEN_ALWAYS_INLINE Packet4i shift_odd_right(const Packet4i& a) {\n  static const Packet16uc perm = {\n      0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, \n      0x0c, 0x0d, 0x0e, 0x0f, 0x18, 0x19, 0x1a, 0x1b };\n  #ifdef  _BIG_ENDIAN\n    return vec_perm(p4i_ZERO, a, perm);\n  #else\n    return vec_perm(a, p4i_ZERO, perm);\n  #endif\n}\n\ntemplate<int N, typename EnableIf = void>\nstruct plogical_shift_left_impl;\n\ntemplate<int N>\nstruct plogical_shift_left_impl<N, typename enable_if<(N < 32) && (N >= 0)>::type> {\n  static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {\n    static const unsigned n = static_cast<unsigned>(N);\n    const Packet4ui shift = {n, n, n, n};\n    const Packet4i ai = reinterpret_cast<Packet4i>(a);\n    static const unsigned m = static_cast<unsigned>(32 - N);\n    const Packet4ui shift_right = {m, m, m, m};\n    const Packet4i out_hi = vec_sl(ai, shift);\n    const Packet4i out_lo = shift_even_left(vec_sr(ai, shift_right));\n    return reinterpret_cast<Packet2l>(por<Packet4i>(out_hi, out_lo));\n  }\n};\n\ntemplate<int N>\nstruct plogical_shift_left_impl<N, typename enable_if<(N >= 32)>::type> {\n  static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {\n    static const unsigned m = static_cast<unsigned>(N - 32);\n    const Packet4ui shift = {m, m, m, m};\n    const Packet4i ai = reinterpret_cast<Packet4i>(a);\n    return reinterpret_cast<Packet2l>(shift_even_left(vec_sl(ai, shift)));\n  }\n};\n\ntemplate<int N>\nEIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {\n  return plogical_shift_left_impl<N>::run(a); \n}\n\ntemplate<int N, typename EnableIf = void>\nstruct plogical_shift_right_impl;\n\ntemplate<int N>\nstruct plogical_shift_right_impl<N, typename enable_if<(N < 32) && (N >= 0)>::type> {\n  static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {\n    static const unsigned n = static_cast<unsigned>(N);\n    const Packet4ui shift = {n, n, n, n};\n    const Packet4i ai = reinterpret_cast<Packet4i>(a);\n    static const unsigned m = static_cast<unsigned>(32 - N);\n    const Packet4ui shift_left = {m, m, m, m};\n    const Packet4i out_lo = vec_sr(ai, shift);\n    const Packet4i out_hi = shift_odd_right(vec_sl(ai, shift_left));\n    return reinterpret_cast<Packet2l>(por<Packet4i>(out_hi, out_lo));\n  }\n};\n\ntemplate<int N>\nstruct plogical_shift_right_impl<N, typename enable_if<(N >= 32)>::type> {\n  static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {\n    static const unsigned m = static_cast<unsigned>(N - 32);\n    const Packet4ui shift = {m, m, m, m};\n    const Packet4i ai = reinterpret_cast<Packet4i>(a);\n    return reinterpret_cast<Packet2l>(shift_odd_right(vec_sr(ai, shift)));\n  }\n};\n\ntemplate<int N>\nEIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {\n  return plogical_shift_right_impl<N>::run(a); \n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {\n  // Clamp exponent to [-2099, 2099]\n  const Packet2d max_exponent = pset1<Packet2d>(2099.0);\n  const Packet2l e = pcast<Packet2d, Packet2l>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));\n\n  // Split 2^e into four factors and multiply:\n  const Packet2l  bias = { 1023, 1023 };\n  Packet2l b = plogical_shift_right<2>(e);  // floor(e/4)\n  Packet2d c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias));\n  Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)\n  b = psub(psub(psub(e, b), b), b);  // e - 3b\n  c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias)); // 2^(e - 3b)\n  out = pmul(out, c); // a * 2^e\n  return out;\n}\n\n\n// Extract exponent without existence of Packet2l.\ntemplate<>\nEIGEN_STRONG_INLINE  \nPacket2d pfrexp_generic_get_biased_exponent(const Packet2d& a) {\n  return pcast<Packet2l, Packet2d>(plogical_shift_right<52>(reinterpret_cast<Packet2l>(pabs(a))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d> (const Packet2d& a, Packet2d& exponent) {\n  return pfrexp_generic(a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)\n{\n  Packet2d b, sum;\n  b   = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));\n  sum = a + b;\n  return pfirst<Packet2d>(sum);\n}\n\n// Other reduction functions:\n// mul\ntemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)\n{\n  return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));\n}\n\n// min\ntemplate<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)\n{\n  return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));\n}\n\n// max\ntemplate<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)\n{\n  return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet2d,2>& kernel) {\n  Packet2d t0, t1;\n  t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);\n  t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);\n  kernel.packet[0] = t0;\n  kernel.packet[1] = t1;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {\n  Packet2l select = { ifPacket.select[0], ifPacket.select[1] };\n  Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\n\n#endif // __VSX__\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PACKET_MATH_ALTIVEC_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/CUDA/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>\n// Copyright (C) 2021 C. Antonio Sanchez <cantonios@google.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_CUDA_H\n#define EIGEN_COMPLEX_CUDA_H\n\n// clang-format off\n// Many std::complex methods such as operator+, operator-, operator* and\n// operator/ are not constexpr. Due to this, GCC and older versions of clang do\n// not treat them as device functions and thus Eigen functors making use of\n// these operators fail to compile. Here, we manually specialize these\n// operators and functors for complex types when building for CUDA to enable\n// their use on-device.\n\n#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)\n    \n// ICC already specializes std::complex<float> and std::complex<double>\n// operators, preventing us from making them device functions here.\n// This will lead to silent runtime errors if the operators are used on device.\n//\n// To allow std::complex operator use on device, define _OVERRIDE_COMPLEX_SPECIALIZATION_\n// prior to first inclusion of <complex>.  This prevents ICC from adding\n// its own specializations, so our custom ones below can be used instead.\n#if !(defined(EIGEN_COMP_ICC) && defined(_USE_COMPLEX_SPECIALIZATION_))\n\n// Import Eigen's internal operator specializations.\n#define EIGEN_USING_STD_COMPLEX_OPERATORS           \\\n  using Eigen::complex_operator_detail::operator+;  \\\n  using Eigen::complex_operator_detail::operator-;  \\\n  using Eigen::complex_operator_detail::operator*;  \\\n  using Eigen::complex_operator_detail::operator/;  \\\n  using Eigen::complex_operator_detail::operator+=; \\\n  using Eigen::complex_operator_detail::operator-=; \\\n  using Eigen::complex_operator_detail::operator*=; \\\n  using Eigen::complex_operator_detail::operator/=; \\\n  using Eigen::complex_operator_detail::operator==; \\\n  using Eigen::complex_operator_detail::operator!=;\n\nnamespace Eigen {\n\n// Specialized std::complex overloads.\nnamespace complex_operator_detail {\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nstd::complex<T> complex_multiply(const std::complex<T>& a, const std::complex<T>& b) {\n  const T a_real = numext::real(a);\n  const T a_imag = numext::imag(a);\n  const T b_real = numext::real(b);\n  const T b_imag = numext::imag(b);\n  return std::complex<T>(\n      a_real * b_real - a_imag * b_imag,\n      a_imag * b_real + a_real * b_imag);\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nstd::complex<T> complex_divide_fast(const std::complex<T>& a, const std::complex<T>& b) {\n  const T a_real = numext::real(a);\n  const T a_imag = numext::imag(a);\n  const T b_real = numext::real(b);\n  const T b_imag = numext::imag(b);\n  const T norm = (b_real * b_real + b_imag * b_imag);\n  return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm,\n                          (a_imag * b_real - a_real * b_imag) / norm);\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nstd::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) {\n  const T a_real = numext::real(a);\n  const T a_imag = numext::imag(a);\n  const T b_real = numext::real(b);\n  const T b_imag = numext::imag(b);\n  // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf),\n  // guards against over/under-flow.\n  const bool scale_imag = numext::abs(b_imag) <= numext::abs(b_real);\n  const T rscale = scale_imag ? T(1) : b_real / b_imag;\n  const T iscale = scale_imag ? b_imag / b_real : T(1);\n  const T denominator = b_real * rscale + b_imag * iscale;\n  return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator, \n                         (a_imag * rscale - a_real * iscale) / denominator);\n}\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nstd::complex<T> complex_divide(const std::complex<T>& a, const std::complex<T>& b) {\n#if EIGEN_FAST_MATH\n  return complex_divide_fast(a, b);\n#else\n  return complex_divide_stable(a, b);\n#endif\n}\n\n// NOTE: We cannot specialize compound assignment operators with Scalar T,\n//         (i.e.  operator@=(const T&), for @=+,-,*,/)\n//       since they are already specialized for float/double/long double within\n//       the standard <complex> header. We also do not specialize the stream\n//       operators.\n#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T)                                    \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator+(const std::complex<T>& a) { return a; }                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator-(const std::complex<T>& a) {                                           \\\n  return std::complex<T>(-numext::real(a), -numext::imag(a));                                   \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator+(const std::complex<T>& a, const std::complex<T>& b) {                 \\\n  return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator+(const std::complex<T>& a, const T& b) {                               \\\n  return std::complex<T>(numext::real(a) + b, numext::imag(a));                                 \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator+(const T& a, const std::complex<T>& b) {                               \\\n  return std::complex<T>(a + numext::real(b), numext::imag(b));                                 \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator-(const std::complex<T>& a, const std::complex<T>& b) {                 \\\n  return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator-(const std::complex<T>& a, const T& b) {                               \\\n  return std::complex<T>(numext::real(a) - b, numext::imag(a));                                 \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator-(const T& a, const std::complex<T>& b) {                               \\\n  return std::complex<T>(a - numext::real(b), -numext::imag(b));                                \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator*(const std::complex<T>& a, const std::complex<T>& b) {                 \\\n  return complex_multiply(a, b);                                                                \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator*(const std::complex<T>& a, const T& b) {                               \\\n  return std::complex<T>(numext::real(a) * b, numext::imag(a) * b);                             \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator*(const T& a, const std::complex<T>& b) {                               \\\n  return std::complex<T>(a * numext::real(b), a * numext::imag(b));                             \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator/(const std::complex<T>& a, const std::complex<T>& b) {                 \\\n  return complex_divide(a, b);                                                                  \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator/(const std::complex<T>& a, const T& b) {                               \\\n  return std::complex<T>(numext::real(a) / b, numext::imag(a) / b);                             \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T> operator/(const T& a, const std::complex<T>& b) {                               \\\n  return complex_divide(std::complex<T>(a, 0), b);                                              \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) {                     \\\n  numext::real_ref(a) += numext::real(b);                                                       \\\n  numext::imag_ref(a) += numext::imag(b);                                                       \\\n  return a;                                                                                     \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) {                     \\\n  numext::real_ref(a) -= numext::real(b);                                                       \\\n  numext::imag_ref(a) -= numext::imag(b);                                                       \\\n  return a;                                                                                     \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) {                     \\\n  a = complex_multiply(a, b);                                                                   \\\n  return a;                                                                                     \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nstd::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) {                     \\\n  a = complex_divide(a, b);                                                                     \\\n  return  a;                                                                                    \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nbool operator==(const std::complex<T>& a, const std::complex<T>& b) {                           \\\n  return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b);              \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nbool operator==(const std::complex<T>& a, const T& b) {                                         \\\n  return numext::real(a) == b && numext::imag(a) == 0;                                          \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nbool operator==(const T& a, const std::complex<T>& b) {                                         \\\n  return a  == numext::real(b) && 0 == numext::imag(b);                                         \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nbool operator!=(const std::complex<T>& a, const std::complex<T>& b) {                           \\\n  return !(a == b);                                                                             \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nbool operator!=(const std::complex<T>& a, const T& b) {                                         \\\n  return !(a == b);                                                                             \\\n}                                                                                               \\\n                                                                                                \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \\\nbool operator!=(const T& a, const std::complex<T>& b) {                                         \\\n  return !(a == b);                                                                             \\\n}\n\n// Do not specialize for long double, since that reduces to double on device.\nEIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(float)\nEIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(double)\n\n#undef EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS\n\n  \n}  // namespace complex_operator_detail\n\nEIGEN_USING_STD_COMPLEX_OPERATORS\n\nnamespace numext {\nEIGEN_USING_STD_COMPLEX_OPERATORS\n}  // namespace numext\n\nnamespace internal {\nEIGEN_USING_STD_COMPLEX_OPERATORS\n\n}  // namespace internal\n}  // namespace Eigen\n\n#endif  // !(EIGEN_COMP_ICC && _USE_COMPLEX_SPECIALIZATION_)\n\n#endif  // EIGEN_CUDACC && EIGEN_GPU_COMPILE_PHASE\n\n#endif  // EIGEN_COMPLEX_CUDA_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/BFloat16.h",
    "content": "/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n==============================================================================*/\n\n#ifndef EIGEN_BFLOAT16_H\n#define EIGEN_BFLOAT16_H\n\n#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)         \\\n  template <>                                                       \\\n  EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED  \\\n  PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) {          \\\n    return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x)));              \\\n  }\n\nnamespace Eigen {\n\nstruct bfloat16;\n\nnamespace bfloat16_impl {\n\n// Make our own __bfloat16_raw definition.\nstruct __bfloat16_raw {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}\n  unsigned short value;\n};\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);\ntemplate <bool AssumeArgumentIsNormalOrInfinityOrZero>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);\n// Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:\n// > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff);\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff);\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);\n\nstruct bfloat16_base : public __bfloat16_raw {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}\n};\n\n} // namespace bfloat16_impl\n\n// Class definition.\nstruct bfloat16 : public bfloat16_impl::bfloat16_base {\n\n  typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}\n\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)\n      : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}\n\n  template<class T>\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)\n      : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}\n\n  explicit EIGEN_DEVICE_FUNC bfloat16(float f)\n      : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}\n\n  // Following the convention of numpy, converting between complex and\n  // float will lead to loss of imag value.\n  template<typename RealScalar>\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)\n      : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}\n\n  EIGEN_DEVICE_FUNC operator float() const {  // NOLINT: Allow implicit conversion to float, because it is lossless.\n    return bfloat16_impl::bfloat16_to_float(*this);\n  }\n};\n} // namespace Eigen\n\nnamespace std {\ntemplate<>\nstruct numeric_limits<Eigen::bfloat16> {\n  static const bool is_specialized = true;\n  static const bool is_signed = true;\n  static const bool is_integer = false;\n  static const bool is_exact = false;\n  static const bool has_infinity = true;\n  static const bool has_quiet_NaN = true;\n  static const bool has_signaling_NaN = true;\n  static const float_denorm_style has_denorm = std::denorm_absent;\n  static const bool has_denorm_loss = false;\n  static const std::float_round_style round_style = numeric_limits<float>::round_style;\n  static const bool is_iec559 = false;\n  static const bool is_bounded = true;\n  static const bool is_modulo = false;\n  static const int digits = 8;\n  static const int digits10 = 2;\n  static const int max_digits10 = 4;\n  static const int radix = 2;\n  static const int min_exponent = numeric_limits<float>::min_exponent;\n  static const int min_exponent10 = numeric_limits<float>::min_exponent10;\n  static const int max_exponent = numeric_limits<float>::max_exponent;\n  static const int max_exponent10 = numeric_limits<float>::max_exponent10;\n  static const bool traps = numeric_limits<float>::traps;\n  static const bool tinyness_before = numeric_limits<float>::tinyness_before;\n\n  static Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }\n  static Eigen::bfloat16 lowest() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0xff7f); }\n  static Eigen::bfloat16 (max)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f7f); }\n  static Eigen::bfloat16 epsilon() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3c00); }\n  static Eigen::bfloat16 round_error() { return Eigen::bfloat16(0x3f00); }\n  static Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }\n  static Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }\n  static Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }\n  static Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }\n};\n\n// If std::numeric_limits<T> is specialized, should also specialize\n// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and\n// std::numeric_limits<const volatile T>\n// https://stackoverflow.com/a/16519653/\ntemplate<>\nstruct numeric_limits<const Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};\ntemplate<>\nstruct numeric_limits<volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};\ntemplate<>\nstruct numeric_limits<const volatile Eigen::bfloat16> : numeric_limits<Eigen::bfloat16> {};\n} // namespace std\n\nnamespace Eigen {\n\nnamespace bfloat16_impl {\n\n// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,\n// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation\n// of the functions, while the latter can only deal with one of them.\n#if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats\n\n#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)\n// We need to provide emulated *host-side* BF16 operators for clang.\n#pragma push_macro(\"EIGEN_DEVICE_FUNC\")\n#undef EIGEN_DEVICE_FUNC\n#if defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_NATIVE_BF16)\n#define EIGEN_DEVICE_FUNC __host__\n#else // both host and device need emulated ops.\n#define EIGEN_DEVICE_FUNC __host__ __device__\n#endif\n#endif\n\n// Definitions for CPUs, mostly working through conversion\n// to/from fp32.\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {\n  return bfloat16(float(a) + float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {\n  return bfloat16(float(a) + static_cast<float>(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {\n  return bfloat16(static_cast<float>(a) + float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {\n  return bfloat16(float(a) * float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {\n  return bfloat16(float(a) - float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {\n  return bfloat16(float(a) / float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {\n  bfloat16 result;\n  result.value = a.value ^ 0x8000;\n  return result;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {\n  a = bfloat16(float(a) + float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {\n  a = bfloat16(float(a) * float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {\n  a = bfloat16(float(a) - float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {\n  a = bfloat16(float(a) / float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a) {\n  a += bfloat16(1);\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a) {\n  a -= bfloat16(1);\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {\n  bfloat16 original_value = a;\n  ++a;\n  return original_value;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {\n  bfloat16 original_value = a;\n  --a;\n  return original_value;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {\n  return numext::equal_strict(float(a),float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {\n  return numext::not_equal_strict(float(a), float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {\n  return float(a) < float(b);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {\n  return float(a) <= float(b);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {\n  return float(a) > float(b);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {\n  return float(a) >= float(b);\n}\n\n#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)\n#pragma pop_macro(\"EIGEN_DEVICE_FUNC\")\n#endif\n#endif  // Emulate support for bfloat16 floats\n\n// Division by an index. Do it in full float precision to avoid accuracy\n// issues in converting the denominator to bfloat16.\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {\n  return bfloat16(static_cast<float>(a) / static_cast<float>(b));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {\n  __bfloat16_raw output;\n  if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(v)) {\n    output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;\n    return output;\n  }\n  const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);\n#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__\n  output.value = p[0];\n#else\n  output.value = p[1];\n#endif\n  return output;\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value) {\n  return __bfloat16_raw(value);\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {\n  return bf.value;\n}\n\n// float_to_bfloat16_rtne template specialization that does not make any\n// assumption about the value of its function argument (ff).\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<false>(float ff) {\n#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))\n  // Nothing to do here\n#else\n  __bfloat16_raw output;\n\n  if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) {\n    // If the value is a NaN, squash it to a qNaN with msb of fraction set,\n    // this makes sure after truncation we don't end up with an inf.\n    //\n    // qNaN magic: All exponent bits set + most significant bit of fraction\n    // set.\n    output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;\n  } else {\n    // Fast rounding algorithm that rounds a half value to nearest even. This\n    // reduces expected error when we convert a large number of floats. Here\n    // is how it works:\n    //\n    // Definitions:\n    // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits\n    // with the following tags:\n    //\n    // Sign |  Exp (8 bits) | Frac (23 bits)\n    //  S     EEEEEEEE         FFFFFFLRTTTTTTTTTTTTTTT\n    //\n    //  S: Sign bit.\n    //  E: Exponent bits.\n    //  F: First 6 bits of fraction.\n    //  L: Least significant bit of resulting bfloat16 if we truncate away the\n    //  rest of the float32. This is also the 7th bit of fraction\n    //  R: Rounding bit, 8th bit of fraction.\n    //  T: Sticky bits, rest of fraction, 15 bits.\n    //\n    // To round half to nearest even, there are 3 cases where we want to round\n    // down (simply truncate the result of the bits away, which consists of\n    // rounding bit and sticky bits) and two cases where we want to round up\n    // (truncate then add one to the result).\n    //\n    // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of\n    // 1s) as the rounding bias, adds the rounding bias to the input, then\n    // truncates the last 16 bits away.\n    //\n    // To understand how it works, we can analyze this algorithm case by case:\n    //\n    // 1. L = 0, R = 0:\n    //   Expect: round down, this is less than half value.\n    //\n    //   Algorithm:\n    //   - Rounding bias: 0x7fff + 0 = 0x7fff\n    //   - Adding rounding bias to input may create any carry, depending on\n    //   whether there is any value set to 1 in T bits.\n    //   - R may be set to 1 if there is a carry.\n    //   - L remains 0.\n    //   - Note that this case also handles Inf and -Inf, where all fraction\n    //   bits, including L, R and Ts are all 0. The output remains Inf after\n    //   this algorithm.\n    //\n    // 2. L = 1, R = 0:\n    //   Expect: round down, this is less than half value.\n    //\n    //   Algorithm:\n    //   - Rounding bias: 0x7fff + 1 = 0x8000\n    //   - Adding rounding bias to input doesn't change sticky bits but\n    //   adds 1 to rounding bit.\n    //   - L remains 1.\n    //\n    // 3. L = 0, R = 1, all of T are 0:\n    //   Expect: round down, this is exactly at half, the result is already\n    //   even (L=0).\n    //\n    //   Algorithm:\n    //   - Rounding bias: 0x7fff + 0 = 0x7fff\n    //   - Adding rounding bias to input sets all sticky bits to 1, but\n    //   doesn't create a carry.\n    //   - R remains 1.\n    //   - L remains 0.\n    //\n    // 4. L = 1, R = 1:\n    //   Expect: round up, this is exactly at half, the result needs to be\n    //   round to the next even number.\n    //\n    //   Algorithm:\n    //   - Rounding bias: 0x7fff + 1 = 0x8000\n    //   - Adding rounding bias to input doesn't change sticky bits, but\n    //   creates a carry from rounding bit.\n    //   - The carry sets L to 0, creates another carry bit and propagate\n    //   forward to F bits.\n    //   - If all the F bits are 1, a carry then propagates to the exponent\n    //   bits, which then creates the minimum value with the next exponent\n    //   value. Note that we won't have the case where exponents are all 1,\n    //   since that's either a NaN (handled in the other if condition) or inf\n    //   (handled in case 1).\n    //\n    // 5. L = 0, R = 1, any of T is 1:\n    //   Expect: round up, this is greater than half.\n    //\n    //   Algorithm:\n    //   - Rounding bias: 0x7fff + 0 = 0x7fff\n    //   - Adding rounding bias to input creates a carry from sticky bits,\n    //   sets rounding bit to 0, then create another carry.\n    //   - The second carry sets L to 1.\n    //\n    // Examples:\n    //\n    //  Exact half value that is already even:\n    //    Input:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit) | Frac (last 16 bit)\n    //     S     E E E E E E E E      F F F F F F L     RTTTTTTTTTTTTTTT\n    //     0     0 0 0 0 0 0 0 0      0 0 0 0 0 1 0     1000000000000000\n    //\n    //     This falls into case 3. We truncate the rest of 16 bits and no\n    //     carry is created into F and L:\n    //\n    //    Output:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit)\n    //     S     E E E E E E E E      F F F F F F L\n    //     0     0 0 0 0 0 0 0 0      0 0 0 0 0 1 0\n    //\n    //  Exact half value, round to next even number:\n    //    Input:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit) | Frac (last 16 bit)\n    //     S     E E E E E E E E      F F F F F F L     RTTTTTTTTTTTTTTT\n    //     0     0 0 0 0 0 0 0 0      0 0 0 0 0 0 1     1000000000000000\n    //\n    //     This falls into case 4. We create a carry from R and T,\n    //     which then propagates into L and F:\n    //\n    //    Output:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit)\n    //     S     E E E E E E E E      F F F F F F L\n    //     0     0 0 0 0 0 0 0 0      0 0 0 0 0 1 0\n    //\n    //\n    //  Max denormal value round to min normal value:\n    //    Input:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit) | Frac (last 16 bit)\n    //     S     E E E E E E E E      F F F F F F L     RTTTTTTTTTTTTTTT\n    //     0     0 0 0 0 0 0 0 0      1 1 1 1 1 1 1     1111111111111111\n    //\n    //     This falls into case 4. We create a carry from R and T,\n    //     propagate into L and F, which then propagates into exponent\n    //     bits:\n    //\n    //    Output:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit)\n    //     S     E E E E E E E E      F F F F F F L\n    //     0     0 0 0 0 0 0 0 1      0 0 0 0 0 0 0\n    //\n    //  Max normal value round to Inf:\n    //    Input:\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit) | Frac (last 16 bit)\n    //     S     E E E E E E E E      F F F F F F L     RTTTTTTTTTTTTTTT\n    //     0     1 1 1 1 1 1 1 0      1 1 1 1 1 1 1     1111111111111111\n    //\n    //     This falls into case 4. We create a carry from R and T,\n    //     propagate into L and F, which then propagates into exponent\n    //     bits:\n    //\n    //    Sign |  Exp (8 bit)     | Frac (first 7 bit)\n    //     S     E E E E E E E E      F F F F F F L\n    //     0     1 1 1 1 1 1 1 1      0 0 0 0 0 0 0\n\n    // At this point, ff must be either a normal float, or +/-infinity.\n    output = float_to_bfloat16_rtne<true>(ff);\n  }\n  return output;\n#endif\n}\n\n// float_to_bfloat16_rtne template specialization that assumes that its function\n// argument (ff) is either a normal floating point number, or +/-infinity, or\n// zero. Used to improve the runtime performance of conversion from an integer\n// type to bfloat16.\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {\n#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))\n    // Nothing to do here\n#else\n    numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);\n    __bfloat16_raw output;\n\n    // Least significant bit of resulting bfloat.\n    numext::uint32_t lsb = (input >> 16) & 1;\n    numext::uint32_t rounding_bias = 0x7fff + lsb;\n    input += rounding_bias;\n    output.value = static_cast<numext::uint16_t>(input >> 16);\n    return output;\n#endif\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {\n    float result = 0;\n    unsigned short* q = reinterpret_cast<unsigned short*>(&result);\n#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__\n    q[0] = h.value;\n#else\n    q[1] = h.value;\n#endif\n    return result;\n}\n// --- standard functions ---\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {\n  EIGEN_USING_STD(isinf);\n  return (isinf)(float(a));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {\n  EIGEN_USING_STD(isnan);\n  return (isnan)(float(a));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {\n  return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {\n  bfloat16 result;\n  result.value = a.value & 0x7FFF;\n  return result;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {\n   return bfloat16(::expf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {\n  return bfloat16(numext::expm1(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {\n  return bfloat16(::logf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {\n  return bfloat16(numext::log1p(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {\n  return bfloat16(::log10f(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {\n  return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {\n    return bfloat16(::sqrtf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {\n  return bfloat16(::powf(float(a), float(b)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {\n  return bfloat16(::sinf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {\n  return bfloat16(::cosf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {\n  return bfloat16(::tanf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {\n  return bfloat16(::asinf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {\n  return bfloat16(::acosf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {\n  return bfloat16(::atanf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {\n  return bfloat16(::sinhf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {\n  return bfloat16(::coshf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {\n  return bfloat16(::tanhf(float(a)));\n}\n#if EIGEN_HAS_CXX11_MATH\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {\n  return bfloat16(::asinhf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {\n  return bfloat16(::acoshf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {\n  return bfloat16(::atanhf(float(a)));\n}\n#endif\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {\n  return bfloat16(::floorf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {\n  return bfloat16(::ceilf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {\n  return bfloat16(::rintf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {\n  return bfloat16(::roundf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {\n  return bfloat16(::fmodf(float(a), float(b)));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {\n  const float f1 = static_cast<float>(a);\n  const float f2 = static_cast<float>(b);\n  return f2 < f1 ? b : a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {\n  const float f1 = static_cast<float>(a);\n  const float f2 = static_cast<float>(b);\n  return f1 < f2 ? b : a;\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {\n  const float f1 = static_cast<float>(a);\n  const float f2 = static_cast<float>(b);\n  return bfloat16(::fminf(f1, f2));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {\n  const float f1 = static_cast<float>(a);\n  const float f2 = static_cast<float>(b);\n  return bfloat16(::fmaxf(f1, f2));\n}\n\n#ifndef EIGEN_NO_IO\nEIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {\n  os << static_cast<float>(v);\n  return os;\n}\n#endif\n\n} // namespace bfloat16_impl\n\nnamespace internal {\n\ntemplate<>\nstruct random_default_impl<bfloat16, false, false>\n{\n  static inline bfloat16 run(const bfloat16& x, const bfloat16& y)\n  {\n    return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));\n  }\n  static inline bfloat16 run()\n  {\n    return run(bfloat16(-1.f), bfloat16(1.f));\n  }\n};\n\ntemplate<> struct is_arithmetic<bfloat16> { enum { value = true }; };\n\n} // namespace internal\n\ntemplate<> struct NumTraits<Eigen::bfloat16>\n    : GenericNumTraits<Eigen::bfloat16>\n{\n  enum {\n    IsSigned = true,\n    IsInteger = false,\n    IsComplex = false,\n    RequireInitialization = false\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {\n    return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {\n    return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D);  // bfloat16(5e-2f);\n\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {\n    return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {\n    return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {\n    return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {\n    return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);\n  }\n};\n\n} // namespace Eigen\n\nnamespace Eigen {\nnamespace numext {\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nbool (isnan)(const Eigen::bfloat16& h) {\n  return (bfloat16_impl::isnan)(h);\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nbool (isinf)(const Eigen::bfloat16& h) {\n  return (bfloat16_impl::isinf)(h);\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nbool (isfinite)(const Eigen::bfloat16& h) {\n  return (bfloat16_impl::isfinite)(h);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {\n  return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {\n  return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);\n}\n\n}  // namespace numext\n}  // namespace Eigen\n\n#if EIGEN_HAS_STD_HASH\nnamespace std {\ntemplate <>\nstruct hash<Eigen::bfloat16> {\n  EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {\n    return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));\n  }\n};\n} // namespace std\n#endif\n\n\n#endif // EIGEN_BFLOAT16_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/ConjHelper.h",
    "content": "\n// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ARCH_CONJ_HELPER_H\n#define EIGEN_ARCH_CONJ_HELPER_H\n\n#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)      \\\n  template <>                                                           \\\n  struct conj_helper<PACKET_REAL, PACKET_CPLX, false, false> {          \\\n    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x,         \\\n                                          const PACKET_CPLX& y,         \\\n                                          const PACKET_CPLX& c) const { \\\n      return padd(c, this->pmul(x, y));                                 \\\n    }                                                                   \\\n    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x,          \\\n                                         const PACKET_CPLX& y) const {  \\\n      return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v));   \\\n    }                                                                   \\\n  };                                                                    \\\n                                                                        \\\n  template <>                                                           \\\n  struct conj_helper<PACKET_CPLX, PACKET_REAL, false, false> {          \\\n    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x,         \\\n                                          const PACKET_REAL& y,         \\\n                                          const PACKET_CPLX& c) const { \\\n      return padd(c, this->pmul(x, y));                                 \\\n    }                                                                   \\\n    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x,          \\\n                                         const PACKET_REAL& y) const {  \\\n      return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y));   \\\n    }                                                                   \\\n  };\n\nnamespace Eigen {\nnamespace internal {\n\ntemplate<bool Conjugate> struct conj_if;\n\ntemplate<> struct conj_if<true> {\n  template<typename T>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { return numext::conj(x); }\n  template<typename T>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T pconj(const T& x) const { return internal::pconj(x); }\n};\n\ntemplate<> struct conj_if<false> {\n  template<typename T>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator()(const T& x) const { return x; }\n  template<typename T>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& pconj(const T& x) const { return x; }\n};\n\n// Generic Implementation, assume scalars since the packet-version is\n// specialized below.\ntemplate<typename LhsType, typename RhsType, bool ConjLhs, bool ConjRhs>\nstruct conj_helper {\n  typedef typename ScalarBinaryOpTraits<LhsType, RhsType>::ReturnType ResultType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType\n  pmadd(const LhsType& x, const RhsType& y, const ResultType& c) const\n  { return this->pmul(x, y) + c; }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType\n  pmul(const LhsType& x, const RhsType& y) const\n  { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }\n};\n\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct conj_helper<LhsScalar, RhsScalar, true, true> {\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType ResultType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType\n  pmadd(const LhsScalar& x, const RhsScalar& y, const ResultType& c) const\n  { return this->pmul(x, y) + c; }\n\n  // We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b).\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType\n  pmul(const LhsScalar& x, const RhsScalar& y) const\n  { return numext::conj(x * y); }\n};\n\n// Implementation with equal type, use packet operations.\ntemplate<typename Packet, bool ConjLhs, bool ConjRhs>\nstruct conj_helper<Packet, Packet, ConjLhs, ConjRhs>\n{\n  typedef Packet ResultType;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const\n  { return Eigen::internal::pmadd(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y), c); }\n\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const\n  { return Eigen::internal::pmul(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y)); }\n};\n\ntemplate<typename Packet>\nstruct conj_helper<Packet, Packet, true, true>\n{\n  typedef Packet ResultType;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const\n  { return Eigen::internal::pmadd(pconj(x), pconj(y), c); }\n  // We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b).\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const\n  { return pconj(Eigen::internal::pmul(x, y)); }\n};\n\n}  // namespace internal\n}  // namespace Eigen\n\n#endif  // EIGEN_ARCH_CONJ_HELPER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007 Julien Pommier\n// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)\n// Copyright (C) 2009-2019 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* The exp and log functions of this file initially come from\n * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/\n */\n\n#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H\n#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H\n\nnamespace Eigen {\nnamespace internal {\n\n// Creates a Scalar integer type with same bit-width.\ntemplate<typename T> struct make_integer;\ntemplate<> struct make_integer<float>    { typedef numext::int32_t type; };\ntemplate<> struct make_integer<double>   { typedef numext::int64_t type; };\ntemplate<> struct make_integer<half>     { typedef numext::int16_t type; };\ntemplate<> struct make_integer<bfloat16> { typedef numext::int16_t type; };\n\ntemplate<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC  \nPacket pfrexp_generic_get_biased_exponent(const Packet& a) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  typedef typename unpacket_traits<Packet>::integer_packet PacketI;\n  enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};\n  return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(pabs(a))));\n}\n\n// Safely applies frexp, correctly handles denormals.\n// Assumes IEEE floating point format.\ntemplate<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nPacket pfrexp_generic(const Packet& a, Packet& exponent) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;\n  enum {\n    TotalBits = sizeof(Scalar) * CHAR_BIT,\n    MantissaBits = numext::numeric_limits<Scalar>::digits - 1,\n    ExponentBits = int(TotalBits) - int(MantissaBits) - 1\n  };\n\n  EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask = \n      ~(((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1)) << int(MantissaBits)); // ~0x7f800000\n  const Packet sign_mantissa_mask = pset1frombits<Packet>(static_cast<ScalarUI>(scalar_sign_mantissa_mask)); \n  const Packet half = pset1<Packet>(Scalar(0.5));\n  const Packet zero = pzero(a);\n  const Packet normal_min = pset1<Packet>((numext::numeric_limits<Scalar>::min)()); // Minimum normal value, 2^-126\n  \n  // To handle denormals, normalize by multiplying by 2^(int(MantissaBits)+1).\n  const Packet is_denormal = pcmp_lt(pabs(a), normal_min);\n  EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(int(MantissaBits) + 1); // 24\n  // The following cannot be constexpr because bfloat16(uint16_t) is not constexpr.\n  const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) << int(scalar_normalization_offset)); // 2^24\n  const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);  \n  const Packet normalized_a = pselect(is_denormal, pmul(a, normalization_factor), a);\n  \n  // Determine exponent offset: -126 if normal, -126-24 if denormal\n  const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(int(ExponentBits)-1)) - ScalarUI(2)); // -126\n  Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);\n  const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset)); // -24\n  exponent_offset = pselect(is_denormal, padd(exponent_offset, normalization_offset), exponent_offset);\n  \n  // Determine exponent and mantissa from normalized_a.\n  exponent = pfrexp_generic_get_biased_exponent(normalized_a);\n  // Zero, Inf and NaN return 'a' unmodified, exponent is zero\n  // (technically the exponent is unspecified for inf/NaN, but GCC/Clang set it to zero)\n  const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1));  // 255\n  const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);\n  const Packet is_zero_or_not_finite = por(pcmp_eq(a, zero), pcmp_eq(exponent, non_finite_exponent));\n  const Packet m = pselect(is_zero_or_not_finite, a, por(pand(normalized_a, sign_mantissa_mask), half));\n  exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));  \n  return m;\n}\n\n// Safely applies ldexp, correctly handles overflows, underflows and denormals.\n// Assumes IEEE floating point format.\ntemplate<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nPacket pldexp_generic(const Packet& a, const Packet& exponent) {\n  // We want to return a * 2^exponent, allowing for all possible integer\n  // exponents without overflowing or underflowing in intermediate\n  // computations.\n  //\n  // Since 'a' and the output can be denormal, the maximum range of 'exponent'\n  // to consider for a float is:\n  //   -255-23 -> 255+23\n  // Below -278 any finite float 'a' will become zero, and above +278 any\n  // finite float will become inf, including when 'a' is the smallest possible \n  // denormal.\n  //\n  // Unfortunately, 2^(278) cannot be represented using either one or two\n  // finite normal floats, so we must split the scale factor into at least\n  // three parts. It turns out to be faster to split 'exponent' into four\n  // factors, since [exponent>>2] is much faster to compute that [exponent/3].\n  //\n  // Set e = min(max(exponent, -278), 278);\n  //     b = floor(e/4);\n  //   out = ((((a * 2^(b)) * 2^(b)) * 2^(b)) * 2^(e-3*b))\n  //\n  // This will avoid any intermediate overflows and correctly handle 0, inf,\n  // NaN cases.\n  typedef typename unpacket_traits<Packet>::integer_packet PacketI;\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  typedef typename unpacket_traits<PacketI>::type ScalarI;\n  enum {\n    TotalBits = sizeof(Scalar) * CHAR_BIT,\n    MantissaBits = numext::numeric_limits<Scalar>::digits - 1,\n    ExponentBits = int(TotalBits) - int(MantissaBits) - 1\n  };\n\n  const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) + ScalarI(int(MantissaBits) - 1)));  // 278\n  const PacketI bias = pset1<PacketI>((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1));  // 127\n  const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));\n  PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);\n  Packet c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));  // 2^b\n  Packet out = pmul(pmul(pmul(a, c), c), c);  // a * 2^(3b)\n  b = psub(psub(psub(e, b), b), b); // e - 3b\n  c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));  // 2^(e-3*b)\n  out = pmul(out, c);\n  return out;\n}\n\n// Explicitly multiplies \n//    a * (2^e)\n// clamping e to the range\n// [NumTraits<Scalar>::min_exponent()-2, NumTraits<Scalar>::max_exponent()]\n//\n// This is approx 7x faster than pldexp_impl, but will prematurely over/underflow\n// if 2^e doesn't fit into a normal floating-point Scalar.\n//\n// Assumes IEEE floating point format\ntemplate<typename Packet>\nstruct pldexp_fast_impl {\n  typedef typename unpacket_traits<Packet>::integer_packet PacketI;\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  typedef typename unpacket_traits<PacketI>::type ScalarI;\n  enum {\n    TotalBits = sizeof(Scalar) * CHAR_BIT,\n    MantissaBits = numext::numeric_limits<Scalar>::digits - 1,\n    ExponentBits = int(TotalBits) - int(MantissaBits) - 1\n  };\n  \n  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\n  Packet run(const Packet& a, const Packet& exponent) {\n    const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1)));  // 127\n    const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) - ScalarI(1)));     // 255\n    // restrict biased exponent between 0 and 255 for float.\n    const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponent, bias), pzero(limit)), limit)); // exponent + 127\n    // return a * (2^e)\n    return pmul(a, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));\n  }\n};\n\n// Natural or base 2 logarithm.\n// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)\n// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can\n// be easily approximated by a polynomial centered on m=1 for stability.\n// TODO(gonnet): Further reduce the interval allowing for lower-degree\n//               polynomial interpolants -> ... -> profit!\ntemplate <typename Packet, bool base2>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog_impl_float(const Packet _x)\n{\n  Packet x = _x;\n\n  const Packet cst_1              = pset1<Packet>(1.0f);\n  const Packet cst_neg_half       = pset1<Packet>(-0.5f);\n  // The smallest non denormalized float number.\n  const Packet cst_min_norm_pos   = pset1frombits<Packet>( 0x00800000u);\n  const Packet cst_minus_inf      = pset1frombits<Packet>( 0xff800000u);\n  const Packet cst_pos_inf        = pset1frombits<Packet>( 0x7f800000u);\n\n  // Polynomial coefficients.\n  const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);\n  const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);\n  const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);\n  const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);\n  const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);\n  const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);\n  const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);\n  const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);\n  const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);\n  const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);\n\n  // Truncate input values to the minimum positive normal.\n  x = pmax(x, cst_min_norm_pos);\n\n  Packet e;\n  // extract significant in the range [0.5,1) and exponent\n  x = pfrexp(x,e);\n\n  // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))\n  // and shift by -1. The values are then centered around 0, which improves\n  // the stability of the polynomial evaluation.\n  //   if( x < SQRTHF ) {\n  //     e -= 1;\n  //     x = x + x - 1.0;\n  //   } else { x = x - 1.0; }\n  Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);\n  Packet tmp = pand(x, mask);\n  x = psub(x, cst_1);\n  e = psub(e, pand(cst_1, mask));\n  x = padd(x, tmp);\n\n  Packet x2 = pmul(x, x);\n  Packet x3 = pmul(x2, x);\n\n  // Evaluate the polynomial approximant of degree 8 in three parts, probably\n  // to improve instruction-level parallelism.\n  Packet y, y1, y2;\n  y  = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);\n  y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);\n  y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);\n  y  = pmadd(y, x, cst_cephes_log_p2);\n  y1 = pmadd(y1, x, cst_cephes_log_p5);\n  y2 = pmadd(y2, x, cst_cephes_log_p8);\n  y  = pmadd(y, x3, y1);\n  y  = pmadd(y, x3, y2);\n  y  = pmul(y, x3);\n\n  y = pmadd(cst_neg_half, x2, y);\n  x = padd(x, y);\n\n  // Add the logarithm of the exponent back to the result of the interpolation.\n  if (base2) {\n    const Packet cst_log2e = pset1<Packet>(static_cast<float>(EIGEN_LOG2E));\n    x = pmadd(x, cst_log2e, e);\n  } else {\n    const Packet cst_ln2 = pset1<Packet>(static_cast<float>(EIGEN_LN2));\n    x = pmadd(e, cst_ln2, x);\n  }\n\n  Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));\n  Packet iszero_mask  = pcmp_eq(_x,pzero(_x));\n  Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);\n  // Filter out invalid inputs, i.e.:\n  //  - negative arg will be NAN\n  //  - 0 will be -INF\n  //  - +INF will be +INF\n  return pselect(iszero_mask, cst_minus_inf,\n                              por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));\n}\n\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog_float(const Packet _x)\n{\n  return plog_impl_float<Packet, /* base2 */ false>(_x);\n}\n\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog2_float(const Packet _x)\n{\n  return plog_impl_float<Packet, /* base2 */ true>(_x);\n}\n\n/* Returns the base e (2.718...) or base 2 logarithm of x.\n * The argument is separated into its exponent and fractional parts.\n * The logarithm of the fraction in the interval [sqrt(1/2), sqrt(2)],\n * is approximated by\n *\n *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).\n *\n * for more detail see: http://www.netlib.org/cephes/\n */\ntemplate <typename Packet, bool base2>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog_impl_double(const Packet _x)\n{\n  Packet x = _x;\n\n  const Packet cst_1              = pset1<Packet>(1.0);\n  const Packet cst_neg_half       = pset1<Packet>(-0.5);\n  // The smallest non denormalized double.\n  const Packet cst_min_norm_pos   = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull));\n  const Packet cst_minus_inf      = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull));\n  const Packet cst_pos_inf        = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));\n\n\n // Polynomial Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)\n //                             1/sqrt(2) <= x < sqrt(2)\n  const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);\n  const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);\n  const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);\n  const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);\n  const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);\n  const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);\n  const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);\n\n  const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);\n  const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);\n  const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);\n  const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);\n  const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);\n  const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);\n\n  // Truncate input values to the minimum positive normal.\n  x = pmax(x, cst_min_norm_pos);\n\n  Packet e;\n  // extract significant in the range [0.5,1) and exponent\n  x = pfrexp(x,e);\n  \n  // Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))\n  // and shift by -1. The values are then centered around 0, which improves\n  // the stability of the polynomial evaluation.\n  //   if( x < SQRTHF ) {\n  //     e -= 1;\n  //     x = x + x - 1.0;\n  //   } else { x = x - 1.0; }\n  Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);\n  Packet tmp = pand(x, mask);\n  x = psub(x, cst_1);\n  e = psub(e, pand(cst_1, mask));\n  x = padd(x, tmp);\n\n  Packet x2 = pmul(x, x);\n  Packet x3 = pmul(x2, x);\n\n  // Evaluate the polynomial approximant , probably to improve instruction-level parallelism.\n  // y = x - 0.5*x^2 + x^3 * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) );\n  Packet y, y1, y_;\n  y  = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);\n  y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);\n  y  = pmadd(y, x, cst_cephes_log_p2);\n  y1 = pmadd(y1, x, cst_cephes_log_p5);\n  y_ = pmadd(y, x3, y1);\n\n  y  = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);\n  y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);\n  y  = pmadd(y, x, cst_cephes_log_q2);\n  y1 = pmadd(y1, x, cst_cephes_log_q5);\n  y  = pmadd(y, x3, y1);\n\n  y_ = pmul(y_, x3);\n  y  = pdiv(y_, y);\n\n  y = pmadd(cst_neg_half, x2, y);\n  x = padd(x, y);\n\n  // Add the logarithm of the exponent back to the result of the interpolation.\n  if (base2) {\n    const Packet cst_log2e = pset1<Packet>(static_cast<double>(EIGEN_LOG2E));\n    x = pmadd(x, cst_log2e, e);\n  } else {\n    const Packet cst_ln2 = pset1<Packet>(static_cast<double>(EIGEN_LN2));\n    x = pmadd(e, cst_ln2, x);\n  }\n\n  Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));\n  Packet iszero_mask  = pcmp_eq(_x,pzero(_x));\n  Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);\n  // Filter out invalid inputs, i.e.:\n  //  - negative arg will be NAN\n  //  - 0 will be -INF\n  //  - +INF will be +INF\n  return pselect(iszero_mask, cst_minus_inf,\n                              por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));\n}\n\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog_double(const Packet _x)\n{\n  return plog_impl_double<Packet, /* base2 */ false>(_x);\n}\n\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog2_double(const Packet _x)\n{\n  return plog_impl_double<Packet, /* base2 */ true>(_x);\n}\n\n/** \\internal \\returns log(1 + x) computed using W. Kahan's formula.\n    See: http://www.plunk.org/~hatch/rightway.php\n */\ntemplate<typename Packet>\nPacket generic_plog1p(const Packet& x)\n{\n  typedef typename unpacket_traits<Packet>::type ScalarType;\n  const Packet one = pset1<Packet>(ScalarType(1));\n  Packet xp1 = padd(x, one);\n  Packet small_mask = pcmp_eq(xp1, one);\n  Packet log1 = plog(xp1);\n  Packet inf_mask = pcmp_eq(xp1, log1);\n  Packet log_large = pmul(x, pdiv(log1, psub(xp1, one)));\n  return pselect(por(small_mask, inf_mask), x, log_large);\n}\n\n/** \\internal \\returns exp(x)-1 computed using W. Kahan's formula.\n    See: http://www.plunk.org/~hatch/rightway.php\n */\ntemplate<typename Packet>\nPacket generic_expm1(const Packet& x)\n{\n  typedef typename unpacket_traits<Packet>::type ScalarType;\n  const Packet one = pset1<Packet>(ScalarType(1));\n  const Packet neg_one = pset1<Packet>(ScalarType(-1));\n  Packet u = pexp(x);\n  Packet one_mask = pcmp_eq(u, one);\n  Packet u_minus_one = psub(u, one);\n  Packet neg_one_mask = pcmp_eq(u_minus_one, neg_one);\n  Packet logu = plog(u);\n  // The following comparison is to catch the case where\n  // exp(x) = +inf. It is written in this way to avoid having\n  // to form the constant +inf, which depends on the packet\n  // type.\n  Packet pos_inf_mask = pcmp_eq(logu, u);\n  Packet expm1 = pmul(u_minus_one, pdiv(x, logu));\n  expm1 = pselect(pos_inf_mask, u, expm1);\n  return pselect(one_mask,\n                 x,\n                 pselect(neg_one_mask,\n                         neg_one,\n                         expm1));\n}\n\n\n// Exponential function. Works by writing \"x = m*log(2) + r\" where\n// \"m = floor(x/log(2)+1/2)\" and \"r\" is the remainder. The result is then\n// \"exp(x) = 2^m*exp(r)\" where exp(r) is in the range [-1,1).\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket pexp_float(const Packet _x)\n{\n  const Packet cst_1      = pset1<Packet>(1.0f);\n  const Packet cst_half   = pset1<Packet>(0.5f);\n  const Packet cst_exp_hi = pset1<Packet>( 88.723f);\n  const Packet cst_exp_lo = pset1<Packet>(-88.723f);\n\n  const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);\n  const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);\n  const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);\n  const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);\n  const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);\n  const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);\n  const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);\n\n  // Clamp x.\n  Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo);\n\n  // Express exp(x) as exp(m*ln(2) + r), start by extracting\n  // m = floor(x/ln(2) + 0.5).\n  Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));\n\n  // Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is\n  // subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating\n  // truncation errors.\n  const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);\n  const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);\n  Packet r = pmadd(m, cst_cephes_exp_C1, x);\n  r = pmadd(m, cst_cephes_exp_C2, r);\n\n  Packet r2 = pmul(r, r);\n  Packet r3 = pmul(r2, r);\n\n  // Evaluate the polynomial approximant,improved by instruction-level parallelism.\n  Packet y, y1, y2;\n  y  = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);\n  y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);\n  y2 = padd(r, cst_1);\n  y  = pmadd(y, r, cst_cephes_exp_p2);\n  y1 = pmadd(y1, r, cst_cephes_exp_p5);\n  y  = pmadd(y, r3, y1);\n  y  = pmadd(y, r2, y2);\n\n  // Return 2^m * exp(r).\n  // TODO: replace pldexp with faster implementation since y in [-1, 1).\n  return pmax(pldexp(y,m), _x);\n}\n\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket pexp_double(const Packet _x)\n{\n  Packet x = _x;\n\n  const Packet cst_1 = pset1<Packet>(1.0);\n  const Packet cst_2 = pset1<Packet>(2.0);\n  const Packet cst_half = pset1<Packet>(0.5);\n\n  const Packet cst_exp_hi = pset1<Packet>(709.784);\n  const Packet cst_exp_lo = pset1<Packet>(-709.784);\n\n  const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);\n  const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);\n  const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);\n  const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);\n  const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);\n  const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);\n  const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);\n  const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);\n  const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);\n  const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);\n\n  Packet tmp, fx;\n\n  // clamp x\n  x = pmax(pmin(x, cst_exp_hi), cst_exp_lo);\n  // Express exp(x) as exp(g + n*log(2)).\n  fx = pmadd(cst_cephes_LOG2EF, x, cst_half);\n\n  // Get the integer modulus of log(2), i.e. the \"n\" described above.\n  fx = pfloor(fx);\n\n  // Get the remainder modulo log(2), i.e. the \"g\" described above. Subtract\n  // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last\n  // digits right.\n  tmp = pmul(fx, cst_cephes_exp_C1);\n  Packet z = pmul(fx, cst_cephes_exp_C2);\n  x = psub(x, tmp);\n  x = psub(x, z);\n\n  Packet x2 = pmul(x, x);\n\n  // Evaluate the numerator polynomial of the rational interpolant.\n  Packet px = cst_cephes_exp_p0;\n  px = pmadd(px, x2, cst_cephes_exp_p1);\n  px = pmadd(px, x2, cst_cephes_exp_p2);\n  px = pmul(px, x);\n\n  // Evaluate the denominator polynomial of the rational interpolant.\n  Packet qx = cst_cephes_exp_q0;\n  qx = pmadd(qx, x2, cst_cephes_exp_q1);\n  qx = pmadd(qx, x2, cst_cephes_exp_q2);\n  qx = pmadd(qx, x2, cst_cephes_exp_q3);\n\n  // I don't really get this bit, copied from the SSE2 routines, so...\n  // TODO(gonnet): Figure out what is going on here, perhaps find a better\n  // rational interpolant?\n  x = pdiv(px, psub(qx, px));\n  x = pmadd(cst_2, x, cst_1);\n\n  // Construct the result 2^n * exp(g) = e * x. The max is used to catch\n  // non-finite values in the input.\n  // TODO: replace pldexp with faster implementation since x in [-1, 1).\n  return pmax(pldexp(x,fx), _x);\n}\n\n// The following code is inspired by the following stack-overflow answer:\n//   https://stackoverflow.com/questions/30463616/payne-hanek-algorithm-implementation-in-c/30465751#30465751\n// It has been largely optimized:\n//  - By-pass calls to frexp.\n//  - Aligned loads of required 96 bits of 2/pi. This is accomplished by\n//    (1) balancing the mantissa and exponent to the required bits of 2/pi are\n//    aligned on 8-bits, and (2) replicating the storage of the bits of 2/pi.\n//  - Avoid a branch in rounding and extraction of the remaining fractional part.\n// Overall, I measured a speed up higher than x2 on x86-64.\ninline float trig_reduce_huge (float xf, int *quadrant)\n{\n  using Eigen::numext::int32_t;\n  using Eigen::numext::uint32_t;\n  using Eigen::numext::int64_t;\n  using Eigen::numext::uint64_t;\n\n  const double pio2_62 = 3.4061215800865545e-19;    // pi/2 * 2^-62\n  const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt\n\n  // 192 bits of 2/pi for Payne-Hanek reduction\n  // Bits are introduced by packet of 8 to enable aligned reads.\n  static const uint32_t two_over_pi [] = \n  {\n    0x00000028, 0x000028be, 0x0028be60, 0x28be60db,\n    0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,\n    0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,\n    0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,\n    0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,\n    0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,\n    0x10e41000, 0xe4100000\n  };\n  \n  uint32_t xi = numext::bit_cast<uint32_t>(xf);\n  // Below, -118 = -126 + 8.\n  //   -126 is to get the exponent,\n  //   +8 is to enable alignment of 2/pi's bits on 8 bits.\n  // This is possible because the fractional part of x as only 24 meaningful bits.\n  uint32_t e = (xi >> 23) - 118;\n  // Extract the mantissa and shift it to align it wrt the exponent\n  xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7);\n\n  uint32_t i = e >> 3;\n  uint32_t twoopi_1  = two_over_pi[i-1];\n  uint32_t twoopi_2  = two_over_pi[i+3];\n  uint32_t twoopi_3  = two_over_pi[i+7];\n\n  // Compute x * 2/pi in 2.62-bit fixed-point format.\n  uint64_t p;\n  p = uint64_t(xi) * twoopi_3;\n  p = uint64_t(xi) * twoopi_2 + (p >> 32);\n  p = (uint64_t(xi * twoopi_1) << 32) + p;\n\n  // Round to nearest: add 0.5 and extract integral part.\n  uint64_t q = (p + zero_dot_five) >> 62;\n  *quadrant = int(q);\n  // Now it remains to compute \"r = x - q*pi/2\" with high accuracy,\n  // since we have p=x/(pi/2) with high accuracy, we can more efficiently compute r as:\n  //   r = (p-q)*pi/2,\n  // where the product can be be carried out with sufficient accuracy using double precision.\n  p -= q<<62;\n  return float(double(int64_t(p)) * pio2_62);\n}\n\ntemplate<bool ComputeSine,typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\n#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT\n__attribute__((optimize(\"-fno-unsafe-math-optimizations\")))\n#endif\nPacket psincos_float(const Packet& _x)\n{\n  typedef typename unpacket_traits<Packet>::integer_packet PacketI;\n\n  const Packet  cst_2oPI            = pset1<Packet>(0.636619746685028076171875f); // 2/PI\n  const Packet  cst_rounding_magic  = pset1<Packet>(12582912); // 2^23 for rounding\n  const PacketI csti_1              = pset1<PacketI>(1);\n  const Packet  cst_sign_mask       = pset1frombits<Packet>(0x80000000u);\n\n  Packet x = pabs(_x);\n\n  // Scale x by 2/Pi to find x's octant.\n  Packet y = pmul(x, cst_2oPI);\n\n  // Rounding trick:\n  Packet y_round = padd(y, cst_rounding_magic);\n  EIGEN_OPTIMIZATION_BARRIER(y_round)\n  PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)\n  y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi\n\n  // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4\n  // using \"Extended precision modular arithmetic\"\n  #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)\n  // This version requires true FMA for high accuracy\n  // It provides a max error of 1ULP up to (with absolute_error < 5.9605e-08):\n  const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;\n  x = pmadd(y, pset1<Packet>(-1.57079601287841796875f), x);\n  x = pmadd(y, pset1<Packet>(-3.1391647326017846353352069854736328125e-07f), x);\n  x = pmadd(y, pset1<Packet>(-5.390302529957764765544681040410068817436695098876953125e-15f), x);\n  #else\n  // Without true FMA, the previous set of coefficients maintain 1ULP accuracy\n  // up to x<15.7 (for sin), but accuracy is immediately lost for x>15.7.\n  // We thus use one more iteration to maintain 2ULPs up to reasonably large inputs.\n\n  // The following set of coefficients maintain 1ULP up to 9.43 and 14.16 for sin and cos respectively.\n  // and 2 ULP up to:\n  const float huge_th = ComputeSine ? 25966.f : 18838.f;\n  x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000\n  EIGEN_OPTIMIZATION_BARRIER(x)\n  x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000\n  EIGEN_OPTIMIZATION_BARRIER(x)\n  x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000\n  x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee\n\n  // For the record, the following set of coefficients maintain 2ULP up\n  // to a slightly larger range:\n  // const float huge_th = ComputeSine ? 51981.f : 39086.125f;\n  // but it slightly fails to maintain 1ULP for two values of sin below pi.\n  // x = pmadd(y, pset1<Packet>(-3.140625/2.), x);\n  // x = pmadd(y, pset1<Packet>(-0.00048351287841796875), x);\n  // x = pmadd(y, pset1<Packet>(-3.13855707645416259765625e-07), x);\n  // x = pmadd(y, pset1<Packet>(-6.0771006282767103812147979624569416046142578125e-11), x);\n\n  // For the record, with only 3 iterations it is possible to maintain\n  // 1 ULP up to 3PI (maybe more) and 2ULP up to 255.\n  // The coefficients are: 0xbfc90f80, 0xb7354480, 0x2e74b9ee\n  #endif\n\n  if(predux_any(pcmp_le(pset1<Packet>(huge_th),pabs(_x))))\n  {\n    const int PacketSize = unpacket_traits<Packet>::size;\n    EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float vals[PacketSize];\n    EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float x_cpy[PacketSize];\n    EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) int y_int2[PacketSize];\n    pstoreu(vals, pabs(_x));\n    pstoreu(x_cpy, x);\n    pstoreu(y_int2, y_int);\n    for(int k=0; k<PacketSize;++k)\n    {\n      float val = vals[k];\n      if(val>=huge_th && (numext::isfinite)(val))\n        x_cpy[k] = trig_reduce_huge(val,&y_int2[k]);\n    }\n    x = ploadu<Packet>(x_cpy);\n    y_int = ploadu<PacketI>(y_int2);\n  }\n\n  // Compute the sign to apply to the polynomial.\n  // sin: sign = second_bit(y_int) xor signbit(_x)\n  // cos: sign = second_bit(y_int+1)\n  Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)))\n                                : preinterpret<Packet>(plogical_shift_left<30>(padd(y_int,csti_1)));\n  sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit\n\n  // Get the polynomial selection mask from the second bit of y_int\n  // We'll calculate both (sin and cos) polynomials and then select from the two.\n  Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int)));\n\n  Packet x2 = pmul(x,x);\n\n  // Evaluate the cos(x) polynomial. (-Pi/4 <= x <= Pi/4)\n  Packet y1 =        pset1<Packet>(2.4372266125283204019069671630859375e-05f);\n  y1 = pmadd(y1, x2, pset1<Packet>(-0.00138865201734006404876708984375f     ));\n  y1 = pmadd(y1, x2, pset1<Packet>(0.041666619479656219482421875f           ));\n  y1 = pmadd(y1, x2, pset1<Packet>(-0.5f));\n  y1 = pmadd(y1, x2, pset1<Packet>(1.f));\n\n  // Evaluate the sin(x) polynomial. (Pi/4 <= x <= Pi/4)\n  // octave/matlab code to compute those coefficients:\n  //    x = (0:0.0001:pi/4)';\n  //    A = [x.^3 x.^5 x.^7];\n  //    w = ((1.-(x/(pi/4)).^2).^5)*2000+1;         # weights trading relative accuracy\n  //    c = (A'*diag(w)*A)\\(A'*diag(w)*(sin(x)-x)); # weighted LS, linear coeff forced to 1\n  //    printf('%.64f\\n %.64f\\n%.64f\\n', c(3), c(2), c(1))\n  //\n  Packet y2 =        pset1<Packet>(-0.0001959234114083702898469196984621021329076029360294342041015625f);\n  y2 = pmadd(y2, x2, pset1<Packet>( 0.0083326873655616851693794799871284340042620897293090820312500000f));\n  y2 = pmadd(y2, x2, pset1<Packet>(-0.1666666203982298255503735617821803316473960876464843750000000000f));\n  y2 = pmul(y2, x2);\n  y2 = pmadd(y2, x, x);\n\n  // Select the correct result from the two polynomials.\n  y = ComputeSine ? pselect(poly_mask,y2,y1)\n                  : pselect(poly_mask,y1,y2);\n\n  // Update the sign and filter huge inputs\n  return pxor(y, sign_bit);\n}\n\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket psin_float(const Packet& x)\n{\n  return psincos_float<true>(x);\n}\n\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket pcos_float(const Packet& x)\n{\n  return psincos_float<false>(x);\n}\n\n\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket psqrt_complex(const Packet& a) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  typedef typename Scalar::value_type RealScalar;\n  typedef typename unpacket_traits<Packet>::as_real RealPacket;\n\n  // Computes the principal sqrt of the complex numbers in the input.\n  //\n  // For example, for packets containing 2 complex numbers stored in interleaved format\n  //    a = [a0, a1] = [x0, y0, x1, y1],\n  // where x0 = real(a0), y0 = imag(a0) etc., this function returns\n  //    b = [b0, b1] = [u0, v0, u1, v1],\n  // such that b0^2 = a0, b1^2 = a1.\n  //\n  // To derive the formula for the complex square roots, let's consider the equation for\n  // a single complex square root of the number x + i*y. We want to find real numbers\n  // u and v such that\n  //    (u + i*v)^2 = x + i*y  <=>\n  //    u^2 - v^2 + i*2*u*v = x + i*v.\n  // By equating the real and imaginary parts we get:\n  //    u^2 - v^2 = x\n  //    2*u*v = y.\n  //\n  // For x >= 0, this has the numerically stable solution\n  //    u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))\n  //    v = 0.5 * (y / u)\n  // and for x < 0,\n  //    v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))\n  //    u = 0.5 * (y / v)\n  //\n  //  To avoid unnecessary over- and underflow, we compute sqrt(x^2 + y^2) as\n  //     l = max(|x|, |y|) * sqrt(1 + (min(|x|, |y|) / max(|x|, |y|))^2) ,\n\n  // In the following, without lack of generality, we have annotated the code, assuming\n  // that the input is a packet of 2 complex numbers.\n  //\n  // Step 1. Compute l = [l0, l0, l1, l1], where\n  //    l0 = sqrt(x0^2 + y0^2),  l1 = sqrt(x1^2 + y1^2)\n  // To avoid over- and underflow, we use the stable formula for each hypotenuse\n  //    l0 = (min0 == 0 ? max0 : max0 * sqrt(1 + (min0/max0)**2)),\n  // where max0 = max(|x0|, |y0|), min0 = min(|x0|, |y0|), and similarly for l1.\n\n  RealPacket a_abs = pabs(a.v);           // [|x0|, |y0|, |x1|, |y1|]\n  RealPacket a_abs_flip = pcplxflip(Packet(a_abs)).v; // [|y0|, |x0|, |y1|, |x1|]\n  RealPacket a_max = pmax(a_abs, a_abs_flip);\n  RealPacket a_min = pmin(a_abs, a_abs_flip);\n  RealPacket a_min_zero_mask = pcmp_eq(a_min, pzero(a_min));\n  RealPacket a_max_zero_mask = pcmp_eq(a_max, pzero(a_max));\n  RealPacket r = pdiv(a_min, a_max);\n  const RealPacket cst_one  = pset1<RealPacket>(RealScalar(1));\n  RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));  // [l0, l0, l1, l1]\n  // Set l to a_max if a_min is zero.\n  l = pselect(a_min_zero_mask, a_max, l);\n\n  // Step 2. Compute [rho0, *, rho1, *], where\n  // rho0 = sqrt(0.5 * (l0 + |x0|)), rho1 =  sqrt(0.5 * (l1 + |x1|))\n  // We don't care about the imaginary parts computed here. They will be overwritten later.\n  const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));\n  Packet rho;\n  rho.v = psqrt(pmul(cst_half, padd(a_abs, l)));\n\n  // Step 3. Compute [rho0, eta0, rho1, eta1], where\n  // eta0 = (y0 / l0) / 2, and eta1 = (y1 / l1) / 2.\n  // set eta = 0 of input is 0 + i0.\n  RealPacket eta = pandnot(pmul(cst_half, pdiv(a.v, pcplxflip(rho).v)), a_max_zero_mask);\n  RealPacket real_mask = peven_mask(a.v);\n  Packet positive_real_result;\n  // Compute result for inputs with positive real part.\n  positive_real_result.v = pselect(real_mask, rho.v, eta);\n\n  // Step 4. Compute solution for inputs with negative real part:\n  //         [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1]\n  const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));\n  const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;\n  RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);\n  Packet negative_real_result;\n  // Notice that rho is positive, so taking it's absolute value is a noop.\n  negative_real_result.v = por(pabs(pcplxflip(positive_real_result).v), imag_signs);\n\n  // Step 5. Select solution branch based on the sign of the real parts.\n  Packet negative_real_mask;\n  negative_real_mask.v = pcmp_lt(pand(real_mask, a.v), pzero(a.v));\n  negative_real_mask.v = por(negative_real_mask.v, pcplxflip(negative_real_mask).v);\n  Packet result = pselect(negative_real_mask, negative_real_result, positive_real_result);\n\n  // Step 6. Handle special cases for infinities:\n  // * If z is (x,+∞), the result is (+∞,+∞) even if x is NaN\n  // * If z is (x,-∞), the result is (+∞,-∞) even if x is NaN\n  // * If z is (-∞,y), the result is (0*|y|,+∞) for finite or NaN y\n  // * If z is (+∞,y), the result is (+∞,0*|y|) for finite or NaN y\n  const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());\n  Packet is_inf;\n  is_inf.v = pcmp_eq(a_abs, cst_pos_inf);\n  Packet is_real_inf;\n  is_real_inf.v = pand(is_inf.v, real_mask);\n  is_real_inf = por(is_real_inf, pcplxflip(is_real_inf));\n  // prepare packet of (+∞,0*|y|) or (0*|y|,+∞), depending on the sign of the infinite real part.\n  Packet real_inf_result;\n  real_inf_result.v = pmul(a_abs, pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0))).v);\n  real_inf_result.v = pselect(negative_real_mask.v, pcplxflip(real_inf_result).v, real_inf_result.v);\n  // prepare packet of (+∞,+∞) or (+∞,-∞), depending on the sign of the infinite imaginary part.\n  Packet is_imag_inf;\n  is_imag_inf.v = pandnot(is_inf.v, real_mask);\n  is_imag_inf = por(is_imag_inf, pcplxflip(is_imag_inf));\n  Packet imag_inf_result;\n  imag_inf_result.v = por(pand(cst_pos_inf, real_mask), pandnot(a.v, real_mask));\n\n  return  pselect(is_imag_inf, imag_inf_result,\n                  pselect(is_real_inf, real_inf_result,result));\n}\n\n// TODO(rmlarsen): The following set of utilities for double word arithmetic\n// should perhaps be refactored as a separate file, since it would be generally\n// useful for special function implementation etc. Writing the algorithms in\n// terms if a double word type would also make the code more readable.\n\n// This function splits x into the nearest integer n and fractional part r,\n// such that x = n + r holds exactly.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid absolute_split(const Packet& x, Packet& n, Packet& r) {\n  n = pround(x);\n  r = psub(x, n);\n}\n\n// This function computes the sum {s, r}, such that x + y = s_hi + s_lo\n// holds exactly, and s_hi = fl(x+y), if |x| >= |y|.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {\n  s_hi = padd(x, y);\n  const Packet t = psub(s_hi, x);\n  s_lo = psub(y, t);\n}\n\n#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n// This function implements the extended precision product of\n// a pair of floating point numbers. Given {x, y}, it computes the pair\n// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and\n// p_hi = fl(x * y).\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid twoprod(const Packet& x, const Packet& y,\n             Packet& p_hi, Packet& p_lo) {\n  p_hi = pmul(x, y);\n  p_lo = pmadd(x, y, pnegate(p_hi));\n}\n\n#else\n\n// This function implements the Veltkamp splitting. Given a floating point\n// number x it returns the pair {x_hi, x_lo} such that x_hi + x_lo = x holds\n// exactly and that half of the significant of x fits in x_hi.\n// This is Algorithm 3 from Jean-Michel Muller, \"Elementary Functions\",\n// 3rd edition, Birkh\\\"auser, 2016.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  EIGEN_CONSTEXPR int shift = (NumTraits<Scalar>::digits() + 1) / 2;\n  const Scalar shift_scale = Scalar(uint64_t(1) << shift);  // Scalar constructor not necessarily constexpr.\n  const Packet gamma = pmul(pset1<Packet>(shift_scale + Scalar(1)), x);\n  Packet rho = psub(x, gamma);\n  x_hi = padd(rho, gamma);\n  x_lo = psub(x, x_hi);\n}\n\n// This function implements Dekker's algorithm for products x * y.\n// Given floating point numbers {x, y} computes the pair\n// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and\n// p_hi = fl(x * y).\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid twoprod(const Packet& x, const Packet& y,\n             Packet& p_hi, Packet& p_lo) {\n  Packet x_hi, x_lo, y_hi, y_lo;\n  veltkamp_splitting(x, x_hi, x_lo);\n  veltkamp_splitting(y, y_hi, y_lo);\n\n  p_hi = pmul(x, y);\n  p_lo = pmadd(x_hi, y_hi, pnegate(p_hi));\n  p_lo = pmadd(x_hi, y_lo, p_lo);\n  p_lo = pmadd(x_lo, y_hi, p_lo);\n  p_lo = pmadd(x_lo, y_lo, p_lo);\n}\n\n#endif  // EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n\n\n// This function implements Dekker's algorithm for the addition\n// of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.\n// It returns the result as a pair {s_hi, s_lo} such that\n// x_hi + x_lo + y_hi + y_lo = s_hi + s_lo holds exactly.\n// This is Algorithm 5 from Jean-Michel Muller, \"Elementary Functions\",\n// 3rd edition, Birkh\\\"auser, 2016.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\n  void twosum(const Packet& x_hi, const Packet& x_lo,\n              const Packet& y_hi, const Packet& y_lo,\n              Packet& s_hi, Packet& s_lo) {\n  const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));\n  Packet r_hi_1, r_lo_1;\n  fast_twosum(x_hi, y_hi,r_hi_1, r_lo_1);\n  Packet r_hi_2, r_lo_2;\n  fast_twosum(y_hi, x_hi,r_hi_2, r_lo_2);\n  const Packet r_hi = pselect(x_greater_mask, r_hi_1, r_hi_2);\n\n  const Packet s1 = padd(padd(y_lo, r_lo_1), x_lo);\n  const Packet s2 = padd(padd(x_lo, r_lo_2), y_lo);\n  const Packet s = pselect(x_greater_mask, s1, s2);\n\n  fast_twosum(r_hi, s, s_hi, s_lo);\n}\n\n// This is a version of twosum for double word numbers,\n// which assumes that |x_hi| >= |y_hi|.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\n  void fast_twosum(const Packet& x_hi, const Packet& x_lo,\n              const Packet& y_hi, const Packet& y_lo,\n              Packet& s_hi, Packet& s_lo) {\n  Packet r_hi, r_lo;\n  fast_twosum(x_hi, y_hi, r_hi, r_lo);\n  const Packet s = padd(padd(y_lo, r_lo), x_lo);\n  fast_twosum(r_hi, s, s_hi, s_lo);\n}\n\n// This is a version of twosum for adding a floating point number x to\n// double word number {y_hi, y_lo} number, with the assumption\n// that |x| >= |y_hi|.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid fast_twosum(const Packet& x,\n                 const Packet& y_hi, const Packet& y_lo,\n                 Packet& s_hi, Packet& s_lo) {\n  Packet r_hi, r_lo;\n  fast_twosum(x, y_hi, r_hi, r_lo);\n  const Packet s = padd(y_lo, r_lo);\n  fast_twosum(r_hi, s, s_hi, s_lo);\n}\n\n// This function implements the multiplication of a double word\n// number represented by {x_hi, x_lo} by a floating point number y.\n// It returns the result as a pair {p_hi, p_lo} such that\n// (x_hi + x_lo) * y = p_hi + p_lo hold with a relative error\n// of less than 2*2^{-2p}, where p is the number of significand bit\n// in the floating point type.\n// This is Algorithm 7 from Jean-Michel Muller, \"Elementary Functions\",\n// 3rd edition, Birkh\\\"auser, 2016.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y,\n             Packet& p_hi, Packet& p_lo) {\n  Packet c_hi, c_lo1;\n  twoprod(x_hi, y, c_hi, c_lo1);\n  const Packet c_lo2 = pmul(x_lo, y);\n  Packet t_hi, t_lo1;\n  fast_twosum(c_hi, c_lo2, t_hi, t_lo1);\n  const Packet t_lo2 = padd(t_lo1, c_lo1);\n  fast_twosum(t_hi, t_lo2, p_hi, p_lo);\n}\n\n// This function implements the multiplication of two double word\n// numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.\n// It returns the result as a pair {p_hi, p_lo} such that\n// (x_hi + x_lo) * (y_hi + y_lo) = p_hi + p_lo holds with a relative error\n// of less than 2*2^{-2p}, where p is the number of significand bit\n// in the floating point type.\ntemplate<typename Packet>\nEIGEN_STRONG_INLINE\nvoid twoprod(const Packet& x_hi, const Packet& x_lo,\n             const Packet& y_hi, const Packet& y_lo,\n             Packet& p_hi, Packet& p_lo) {\n  Packet p_hi_hi, p_hi_lo;\n  twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);\n  Packet p_lo_hi, p_lo_lo;\n  twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);\n  fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);\n}\n\n// This function computes the reciprocal of a floating point number\n// with extra precision and returns the result as a double word.\ntemplate <typename Packet>\nvoid doubleword_reciprocal(const Packet& x, Packet& recip_hi, Packet& recip_lo) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  // 1. Approximate the reciprocal as the reciprocal of the high order element.\n  Packet approx_recip = prsqrt(x);\n  approx_recip = pmul(approx_recip, approx_recip);\n\n  // 2. Run one step of Newton-Raphson iteration in double word arithmetic\n  // to get the bottom half. The NR iteration for reciprocal of 'a' is\n  //    x_{i+1} = x_i * (2 - a * x_i)\n\n  // -a*x_i\n  Packet t1_hi, t1_lo;\n  twoprod(pnegate(x), approx_recip, t1_hi, t1_lo);\n  // 2 - a*x_i\n  Packet t2_hi, t2_lo;\n  fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);\n  Packet t3_hi, t3_lo;\n  fast_twosum(t2_hi, padd(t2_lo, t1_lo), t3_hi, t3_lo);\n  // x_i * (2 - a * x_i)\n  twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);\n}\n\n\n// This function computes log2(x) and returns the result as a double word.\ntemplate <typename Scalar>\nstruct accurate_log2 {\n  template <typename Packet>\n  EIGEN_STRONG_INLINE\n  void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {\n    log2_x_hi = plog2(x);\n    log2_x_lo = pzero(x);\n  }\n};\n\n// This specialization uses a more accurate algorithm to compute log2(x) for\n// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~6.42e-10.\n// This additional accuracy is needed to counter the error-magnification\n// inherent in multiplying by a potentially large exponent in pow(x,y).\n// The minimax polynomial used was calculated using the Sollya tool.\n// See sollya.org.\ntemplate <>\nstruct accurate_log2<float> {\n  template <typename Packet>\n  EIGEN_STRONG_INLINE\n  void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {\n    // The function log(1+x)/x is approximated in the interval\n    // [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form\n    //  Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))),\n    // where the degree 6 polynomial P(x) is evaluated in single precision,\n    // while the remaining 4 terms of Q(x), as well as the final multiplication by x\n    // to reconstruct log(1+x) are evaluated in extra precision using\n    // double word arithmetic. C0 through C3 are extra precise constants\n    // stored as double words.\n    //\n    // The polynomial coefficients were calculated using Sollya commands:\n    // > n = 10;\n    // > f = log2(1+x)/x;\n    // > interval = [sqrt(0.5)-1;sqrt(2)-1];\n    // > p = fpminimax(f,n,[|double,double,double,double,single...|],interval,relative,floating);\n    \n    const Packet p6 = pset1<Packet>( 9.703654795885e-2f);\n    const Packet p5 = pset1<Packet>(-0.1690667718648f);\n    const Packet p4 = pset1<Packet>( 0.1720575392246f);\n    const Packet p3 = pset1<Packet>(-0.1789081543684f);\n    const Packet p2 = pset1<Packet>( 0.2050433009862f);\n    const Packet p1 = pset1<Packet>(-0.2404672354459f);\n    const Packet p0 = pset1<Packet>( 0.2885761857032f);\n\n    const Packet C3_hi = pset1<Packet>(-0.360674142838f);\n    const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f);\n    const Packet C2_hi = pset1<Packet>(0.480897903442f);\n    const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f);\n    const Packet C1_hi = pset1<Packet>(-0.721347510815f);\n    const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f);\n    const Packet C0_hi = pset1<Packet>(1.44269502163f);\n    const Packet C0_lo = pset1<Packet>(2.01711713999e-08f);\n    const Packet one = pset1<Packet>(1.0f);\n\n    const Packet x = psub(z, one);\n    // Evaluate P(x) in working precision.\n    // We evaluate it in multiple parts to improve instruction level\n    // parallelism.\n    Packet x2 = pmul(x,x);\n    Packet p_even = pmadd(p6, x2, p4);\n    p_even = pmadd(p_even, x2, p2);\n    p_even = pmadd(p_even, x2, p0);\n    Packet p_odd = pmadd(p5, x2, p3);\n    p_odd = pmadd(p_odd, x2, p1);\n    Packet p = pmadd(p_odd, x, p_even);\n\n    // Now evaluate the low-order tems of Q(x) in double word precision.\n    // In the following, due to the alternating signs and the fact that\n    // |x| < sqrt(2)-1, we can assume that |C*_hi| >= q_i, and use\n    // fast_twosum instead of the slower twosum.\n    Packet q_hi, q_lo;\n    Packet t_hi, t_lo;\n    // C3 + x * p(x)\n    twoprod(p, x, t_hi, t_lo);\n    fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);\n    // C2 + x * p(x)\n    twoprod(q_hi, q_lo, x, t_hi, t_lo);\n    fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);\n    // C1 + x * p(x)\n    twoprod(q_hi, q_lo, x, t_hi, t_lo);\n    fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo);\n    // C0 + x * p(x)\n    twoprod(q_hi, q_lo, x, t_hi, t_lo);\n    fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo);\n\n    // log(z) ~= x * Q(x)\n    twoprod(q_hi, q_lo, x, log2_x_hi, log2_x_lo);\n  }\n};\n\n// This specialization uses a more accurate algorithm to compute log2(x) for\n// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~1.27e-18.\n// This additional accuracy is needed to counter the error-magnification\n// inherent in multiplying by a potentially large exponent in pow(x,y).\n// The minimax polynomial used was calculated using the Sollya tool.\n// See sollya.org.\n\ntemplate <>\nstruct accurate_log2<double> {\n  template <typename Packet>\n  EIGEN_STRONG_INLINE\n  void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {\n    // We use a transformation of variables:\n    //    r = c * (x-1) / (x+1),\n    // such that\n    //    log2(x) = log2((1 + r/c) / (1 - r/c)) = f(r).\n    // The function f(r) can be approximated well using an odd polynomial\n    // of the form\n    //   P(r) = ((Q(r^2) * r^2 + C) * r^2 + 1) * r,\n    // For the implementation of log2<double> here, Q is of degree 6 with\n    // coefficient represented in working precision (double), while C is a\n    // constant represented in extra precision as a double word to achieve\n    // full accuracy.\n    //\n    // The polynomial coefficients were computed by the Sollya script:\n    //\n    // c = 2 / log(2);\n    // trans = c * (x-1)/(x+1);\n    // itrans = (1+x/c)/(1-x/c);\n    // interval=[trans(sqrt(0.5)); trans(sqrt(2))];\n    // print(interval);\n    // f = log2(itrans(x));\n    // p=fpminimax(f,[|1,3,5,7,9,11,13,15,17|],[|1,DD,double...|],interval,relative,floating);\n    const Packet q12 = pset1<Packet>(2.87074255468000586e-9);\n    const Packet q10 = pset1<Packet>(2.38957980901884082e-8);\n    const Packet q8 = pset1<Packet>(2.31032094540014656e-7);\n    const Packet q6 = pset1<Packet>(2.27279857398537278e-6);\n    const Packet q4 = pset1<Packet>(2.31271023278625638e-5);\n    const Packet q2 = pset1<Packet>(2.47556738444535513e-4);\n    const Packet q0 = pset1<Packet>(2.88543873228900172e-3);\n    const Packet C_hi = pset1<Packet>(0.0400377511598501157);\n    const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);\n    const Packet one = pset1<Packet>(1.0);\n\n    const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);\n    const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);\n    // c * (x - 1)\n    Packet num_hi, num_lo;\n    twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);\n    // TODO(rmlarsen): Investigate if using the division algorithm by\n    // Muller et al. is faster/more accurate.\n    // 1 / (x + 1)\n    Packet denom_hi, denom_lo;\n    doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);\n    // r =  c * (x-1) / (x+1),\n    Packet r_hi, r_lo;\n    twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);\n    // r2 = r * r\n    Packet r2_hi, r2_lo;\n    twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);\n    // r4 = r2 * r2\n    Packet r4_hi, r4_lo;\n    twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);\n\n    // Evaluate Q(r^2) in working precision. We evaluate it in two parts\n    // (even and odd in r^2) to improve instruction level parallelism.\n    Packet q_even = pmadd(q12, r4_hi, q8);\n    Packet q_odd = pmadd(q10, r4_hi, q6);\n    q_even = pmadd(q_even, r4_hi, q4);\n    q_odd = pmadd(q_odd, r4_hi, q2);\n    q_even = pmadd(q_even, r4_hi, q0);\n    Packet q = pmadd(q_odd, r2_hi, q_even);\n\n    // Now evaluate the low order terms of P(x) in double word precision.\n    // In the following, due to the increasing magnitude of the coefficients\n    // and r being constrained to [-0.5, 0.5] we can use fast_twosum instead\n    // of the slower twosum.\n    // Q(r^2) * r^2\n    Packet p_hi, p_lo;\n    twoprod(r2_hi, r2_lo, q, p_hi, p_lo);\n    // Q(r^2) * r^2 + C\n    Packet p1_hi, p1_lo;\n    fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);\n    // (Q(r^2) * r^2 + C) * r^2\n    Packet p2_hi, p2_lo;\n    twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);\n    // ((Q(r^2) * r^2 + C) * r^2 + 1)\n    Packet p3_hi, p3_lo;\n    fast_twosum(one, p2_hi, p2_lo, p3_hi, p3_lo);\n\n    // log(z) ~= ((Q(r^2) * r^2 + C) * r^2 + 1) * r\n    twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);\n  }\n};\n\n// This function computes exp2(x) (i.e. 2**x).\ntemplate <typename Scalar>\nstruct fast_accurate_exp2 {\n  template <typename Packet>\n  EIGEN_STRONG_INLINE\n  Packet operator()(const Packet& x) {\n    // TODO(rmlarsen): Add a pexp2 packetop.\n    return pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), x));\n  }\n};\n\n// This specialization uses a faster algorithm to compute exp2(x) for floats\n// in [-0.5;0.5] with a relative accuracy of 1 ulp.\n// The minimax polynomial used was calculated using the Sollya tool.\n// See sollya.org.\ntemplate <>\nstruct fast_accurate_exp2<float> {\n  template <typename Packet>\n  EIGEN_STRONG_INLINE\n  Packet operator()(const Packet& x) {\n    // This function approximates exp2(x) by a degree 6 polynomial of the form\n    // Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in\n    // single precision, and the remaining steps are evaluated with extra precision using\n    // double word arithmetic. C is an extra precise constant stored as a double word.\n    //\n    // The polynomial coefficients were calculated using Sollya commands:\n    // > n = 6;\n    // > f = 2^x;\n    // > interval = [-0.5;0.5];\n    // > p = fpminimax(f,n,[|1,double,single...|],interval,relative,floating);\n\n    const Packet p4 = pset1<Packet>(1.539513905e-4f);\n    const Packet p3 = pset1<Packet>(1.340007293e-3f);\n    const Packet p2 = pset1<Packet>(9.618283249e-3f);\n    const Packet p1 = pset1<Packet>(5.550328270e-2f);\n    const Packet p0 = pset1<Packet>(0.2402264923f);\n\n    const Packet C_hi = pset1<Packet>(0.6931471825f);\n    const Packet C_lo = pset1<Packet>(2.36836577e-08f);\n    const Packet one = pset1<Packet>(1.0f);\n\n    // Evaluate P(x) in working precision.\n    // We evaluate even and odd parts of the polynomial separately\n    // to gain some instruction level parallelism.\n    Packet x2 = pmul(x,x);\n    Packet p_even = pmadd(p4, x2, p2);\n    Packet p_odd = pmadd(p3, x2, p1);\n    p_even = pmadd(p_even, x2, p0);\n    Packet p = pmadd(p_odd, x, p_even);\n\n    // Evaluate the remaining terms of Q(x) with extra precision using\n    // double word arithmetic.\n    Packet p_hi, p_lo;\n    // x * p(x)\n    twoprod(p, x, p_hi, p_lo);\n    // C + x * p(x)\n    Packet q1_hi, q1_lo;\n    twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);\n    // x * (C + x * p(x))\n    Packet q2_hi, q2_lo;\n    twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);\n    // 1 + x * (C + x * p(x))\n    Packet q3_hi, q3_lo;\n    // Since |q2_hi| <= sqrt(2)-1 < 1, we can use fast_twosum\n    // for adding it to unity here.\n    fast_twosum(one, q2_hi, q3_hi, q3_lo);\n    return padd(q3_hi, padd(q2_lo, q3_lo));\n  }\n};\n\n// in [-0.5;0.5] with a relative accuracy of 1 ulp.\n// The minimax polynomial used was calculated using the Sollya tool.\n// See sollya.org.\ntemplate <>\nstruct fast_accurate_exp2<double> {\n  template <typename Packet>\n  EIGEN_STRONG_INLINE\n  Packet operator()(const Packet& x) {\n    // This function approximates exp2(x) by a degree 10 polynomial of the form\n    // Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in\n    // single precision, and the remaining steps are evaluated with extra precision using\n    // double word arithmetic. C is an extra precise constant stored as a double word.\n    //\n    // The polynomial coefficients were calculated using Sollya commands:\n    // > n = 11;\n    // > f = 2^x;\n    // > interval = [-0.5;0.5];\n    // > p = fpminimax(f,n,[|1,DD,double...|],interval,relative,floating);\n\n    const Packet p9 = pset1<Packet>(4.431642109085495276e-10);\n    const Packet p8 = pset1<Packet>(7.073829923303358410e-9);\n    const Packet p7 = pset1<Packet>(1.017822306737031311e-7);\n    const Packet p6 = pset1<Packet>(1.321543498017646657e-6);\n    const Packet p5 = pset1<Packet>(1.525273342728892877e-5);\n    const Packet p4 = pset1<Packet>(1.540353045780084423e-4);\n    const Packet p3 = pset1<Packet>(1.333355814685869807e-3);\n    const Packet p2 = pset1<Packet>(9.618129107593478832e-3);\n    const Packet p1 = pset1<Packet>(5.550410866481961247e-2);\n    const Packet p0 = pset1<Packet>(0.240226506959101332);\n    const Packet C_hi = pset1<Packet>(0.693147180559945286); \n    const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);\n    const Packet one = pset1<Packet>(1.0);\n\n    // Evaluate P(x) in working precision.\n    // We evaluate even and odd parts of the polynomial separately\n    // to gain some instruction level parallelism.\n    Packet x2 = pmul(x,x);\n    Packet p_even = pmadd(p8, x2, p6);\n    Packet p_odd = pmadd(p9, x2, p7);\n    p_even = pmadd(p_even, x2, p4);\n    p_odd = pmadd(p_odd, x2, p5);\n    p_even = pmadd(p_even, x2, p2);\n    p_odd = pmadd(p_odd, x2, p3);\n    p_even = pmadd(p_even, x2, p0);\n    p_odd = pmadd(p_odd, x2, p1);\n    Packet p = pmadd(p_odd, x, p_even);\n\n    // Evaluate the remaining terms of Q(x) with extra precision using\n    // double word arithmetic.\n    Packet p_hi, p_lo;\n    // x * p(x)\n    twoprod(p, x, p_hi, p_lo);\n    // C + x * p(x)\n    Packet q1_hi, q1_lo;\n    twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);\n    // x * (C + x * p(x))\n    Packet q2_hi, q2_lo;\n    twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);\n    // 1 + x * (C + x * p(x))\n    Packet q3_hi, q3_lo;\n    // Since |q2_hi| <= sqrt(2)-1 < 1, we can use fast_twosum\n    // for adding it to unity here.\n    fast_twosum(one, q2_hi, q3_hi, q3_lo);\n    return padd(q3_hi, padd(q2_lo, q3_lo));\n  }\n};\n\n// This function implements the non-trivial case of pow(x,y) where x is\n// positive and y is (possibly) non-integer.\n// Formally, pow(x,y) = exp2(y * log2(x)), where exp2(x) is shorthand for 2^x.\n// TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it\n// easier to specialize or turn off for specific types and/or backends.x\ntemplate <typename Packet>\nEIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n  // Split x into exponent e_x and mantissa m_x.\n  Packet e_x;\n  Packet m_x = pfrexp(x, e_x);\n\n  // Adjust m_x to lie in [1/sqrt(2):sqrt(2)] to minimize absolute error in log2(m_x).\n  EIGEN_CONSTEXPR Scalar sqrt_half = Scalar(0.70710678118654752440);\n  const Packet m_x_scale_mask = pcmp_lt(m_x, pset1<Packet>(sqrt_half));\n  m_x = pselect(m_x_scale_mask, pmul(pset1<Packet>(Scalar(2)), m_x), m_x);\n  e_x = pselect(m_x_scale_mask, psub(e_x, pset1<Packet>(Scalar(1))), e_x);\n\n  // Compute log2(m_x) with 6 extra bits of accuracy.\n  Packet rx_hi, rx_lo;\n  accurate_log2<Scalar>()(m_x, rx_hi, rx_lo);\n\n  // Compute the two terms {y * e_x, y * r_x} in f = y * log2(x) with doubled\n  // precision using double word arithmetic.\n  Packet f1_hi, f1_lo, f2_hi, f2_lo;\n  twoprod(e_x, y, f1_hi, f1_lo);\n  twoprod(rx_hi, rx_lo, y, f2_hi, f2_lo);\n  // Sum the two terms in f using double word arithmetic. We know\n  // that |e_x| > |log2(m_x)|, except for the case where e_x==0.\n  // This means that we can use fast_twosum(f1,f2).\n  // In the case e_x == 0, e_x * y = f1 = 0, so we don't lose any\n  // accuracy by violating the assumption of fast_twosum, because\n  // it's a no-op.\n  Packet f_hi, f_lo;\n  fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);\n\n  // Split f into integer and fractional parts.\n  Packet n_z, r_z;\n  absolute_split(f_hi, n_z, r_z);\n  r_z = padd(r_z, f_lo);\n  Packet n_r;\n  absolute_split(r_z, n_r, r_z);\n  n_z = padd(n_z, n_r);\n\n  // We now have an accurate split of f = n_z + r_z and can compute\n  //   x^y = 2**{n_z + r_z) = exp2(r_z) * 2**{n_z}.\n  // Since r_z is in [-0.5;0.5], we compute the first factor to high accuracy\n  // using a specialized algorithm. Multiplication by the second factor can\n  // be done exactly using pldexp(), since it is an integer power of 2.\n  const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);\n  return pldexp(e_r, n_z);\n}\n\n// Generic implementation of pow(x,y).\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket generic_pow(const Packet& x, const Packet& y) {\n  typedef typename unpacket_traits<Packet>::type Scalar;\n\n  const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());\n  const Packet cst_zero = pset1<Packet>(Scalar(0));\n  const Packet cst_one = pset1<Packet>(Scalar(1));\n  const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());\n\n  const Packet abs_x = pabs(x);\n  // Predicates for sign and magnitude of x.\n  const Packet x_is_zero = pcmp_eq(x, cst_zero);\n  const Packet x_is_neg = pcmp_lt(x, cst_zero);\n  const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);\n  const Packet abs_x_is_one =  pcmp_eq(abs_x, cst_one);\n  const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x);\n  const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);\n  const Packet x_is_one =  pandnot(abs_x_is_one, x_is_neg);\n  const Packet x_is_neg_one =  pand(abs_x_is_one, x_is_neg);\n  const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));\n\n  // Predicates for sign and magnitude of y.\n  const Packet y_is_one = pcmp_eq(y, cst_one);\n  const Packet y_is_zero = pcmp_eq(y, cst_zero);\n  const Packet y_is_neg = pcmp_lt(y, cst_zero);\n  const Packet y_is_pos = pandnot(ptrue(y), por(y_is_zero, y_is_neg));\n  const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));\n  const Packet abs_y_is_inf = pcmp_eq(pabs(y), cst_pos_inf);\n  EIGEN_CONSTEXPR Scalar huge_exponent =\n      (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) /\n       NumTraits<Scalar>::epsilon();\n  const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y));\n\n  // Predicates for whether y is integer and/or even.\n  const Packet y_is_int = pcmp_eq(pfloor(y), y);\n  const Packet y_div_2 = pmul(y, pset1<Packet>(Scalar(0.5)));\n  const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);\n\n  // Predicates encoding special cases for the value of pow(x,y)\n  const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf),\n                                                    y_is_int),\n                                            abs_y_is_inf);\n  const Packet pow_is_one = por(por(x_is_one, y_is_zero),\n                                pand(x_is_neg_one,\n                                     por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));\n  const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan));\n  const Packet pow_is_zero = por(por(por(pand(x_is_zero, y_is_pos),\n                                         pand(abs_x_is_inf, y_is_neg)),\n                                     pand(pand(abs_x_is_lt_one, abs_y_is_huge),\n                                          y_is_pos)),\n                                 pand(pand(abs_x_is_gt_one, abs_y_is_huge),\n                                      y_is_neg));\n  const Packet pow_is_inf = por(por(por(pand(x_is_zero, y_is_neg),\n                                        pand(abs_x_is_inf, y_is_pos)),\n                                    pand(pand(abs_x_is_lt_one, abs_y_is_huge),\n                                         y_is_neg)),\n                                pand(pand(abs_x_is_gt_one, abs_y_is_huge),\n                                     y_is_pos));\n\n  // General computation of pow(x,y) for positive x or negative x and integer y.\n  const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even);\n  const Packet pow_abs = generic_pow_impl(abs_x, y);\n  return pselect(y_is_one, x,\n                 pselect(pow_is_one, cst_one,\n                         pselect(pow_is_nan, cst_nan,\n                                 pselect(pow_is_inf, cst_pos_inf,\n                                         pselect(pow_is_zero, cst_zero,\n                                                 pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));\n}\n\n\n\n/* polevl (modified for Eigen)\n *\n *      Evaluate polynomial\n *\n *\n *\n * SYNOPSIS:\n *\n * int N;\n * Scalar x, y, coef[N+1];\n *\n * y = polevl<decltype(x), N>( x, coef);\n *\n *\n *\n * DESCRIPTION:\n *\n * Evaluates polynomial of degree N:\n *\n *                     2          N\n * y  =  C  + C x + C x  +...+ C x\n *        0    1     2          N\n *\n * Coefficients are stored in reverse order:\n *\n * coef[0] = C  , ..., coef[N] = C  .\n *            N                   0\n *\n *  The function p1evl() assumes that coef[N] = 1.0 and is\n * omitted from the array.  Its calling arguments are\n * otherwise the same as polevl().\n *\n *\n * The Eigen implementation is templatized.  For best speed, store\n * coef as a const array (constexpr), e.g.\n *\n * const double coef[] = {1.0, 2.0, 3.0, ...};\n *\n */\ntemplate <typename Packet, int N>\nstruct ppolevl {\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {\n    EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);\n    return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));\n  }\n};\n\ntemplate <typename Packet>\nstruct ppolevl<Packet, 0> {\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {\n    EIGEN_UNUSED_VARIABLE(x);\n    return pset1<Packet>(coeff[0]);\n  }\n};\n\n/* chbevl (modified for Eigen)\n *\n *     Evaluate Chebyshev series\n *\n *\n *\n * SYNOPSIS:\n *\n * int N;\n * Scalar x, y, coef[N], chebevl();\n *\n * y = chbevl( x, coef, N );\n *\n *\n *\n * DESCRIPTION:\n *\n * Evaluates the series\n *\n *        N-1\n *         - '\n *  y  =   >   coef[i] T (x/2)\n *         -            i\n *        i=0\n *\n * of Chebyshev polynomials Ti at argument x/2.\n *\n * Coefficients are stored in reverse order, i.e. the zero\n * order term is last in the array.  Note N is the number of\n * coefficients, not the order.\n *\n * If coefficients are for the interval a to b, x must\n * have been transformed to x -> 2(2x - b - a)/(b-a) before\n * entering the routine.  This maps x from (a, b) to (-1, 1),\n * over which the Chebyshev polynomials are defined.\n *\n * If the coefficients are for the inverted interval, in\n * which (a, b) is mapped to (1/b, 1/a), the transformation\n * required is x -> 2(2ab/x - b - a)/(b-a).  If b is infinity,\n * this becomes x -> 4a/x - 1.\n *\n *\n *\n * SPEED:\n *\n * Taking advantage of the recurrence properties of the\n * Chebyshev polynomials, the routine requires one more\n * addition per loop than evaluating a nested polynomial of\n * the same degree.\n *\n */\n\ntemplate <typename Packet, int N>\nstruct pchebevl {\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits<Packet>::type coef[]) {\n    typedef typename unpacket_traits<Packet>::type Scalar;\n    Packet b0 = pset1<Packet>(coef[0]);\n    Packet b1 = pset1<Packet>(static_cast<Scalar>(0.f));\n    Packet b2;\n\n    for (int i = 1; i < N; i++) {\n      b2 = b1;\n      b1 = b0;\n      b0 = psub(pmadd(x, b1, pset1<Packet>(coef[i])), b2);\n    }\n\n    return pmul(pset1<Packet>(static_cast<Scalar>(0.5f)), psub(b0, b2));\n  }\n};\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2019 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H\n#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H\n\nnamespace Eigen {\nnamespace internal {\n\n// Forward declarations of the generic math functions\n// implemented in GenericPacketMathFunctions.h\n// This is needed to workaround a circular dependency.\n\n/***************************************************************************\n * Some generic implementations to be used by implementors\n***************************************************************************/\n\n/** Default implementation of pfrexp.\n  * It is expected to be called by implementers of template<> pfrexp.\n  */\ntemplate<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nPacket pfrexp_generic(const Packet& a, Packet& exponent);\n\n// Extracts the biased exponent value from Packet p, and casts the results to\n// a floating-point Packet type. Used by pfrexp_generic. Override this if\n// there is no unpacket_traits<Packet>::integer_packet.\ntemplate<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nPacket pfrexp_generic_get_biased_exponent(const Packet& p);\n\n/** Default implementation of pldexp.\n  * It is expected to be called by implementers of template<> pldexp.\n  */\ntemplate<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nPacket pldexp_generic(const Packet& a, const Packet& exponent);\n\n/** \\internal \\returns log(x) for single precision float */\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog_float(const Packet _x);\n\n/** \\internal \\returns log2(x) for single precision float */\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog2_float(const Packet _x);\n\n/** \\internal \\returns log(x) for single precision float */\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog_double(const Packet _x);\n\n/** \\internal \\returns log2(x) for single precision float */\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket plog2_double(const Packet _x);\n\n/** \\internal \\returns log(1 + x) */\ntemplate<typename Packet>\nPacket generic_plog1p(const Packet& x);\n\n/** \\internal \\returns exp(x)-1 */\ntemplate<typename Packet>\nPacket generic_expm1(const Packet& x);\n\n/** \\internal \\returns exp(x) for single precision float */\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket pexp_float(const Packet _x);\n\n/** \\internal \\returns exp(x) for double precision real numbers */\ntemplate <typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket pexp_double(const Packet _x);\n\n/** \\internal \\returns sin(x) for single precision float */\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket psin_float(const Packet& x);\n\n/** \\internal \\returns cos(x) for single precision float */\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket pcos_float(const Packet& x);\n\n/** \\internal \\returns sqrt(x) for complex types */\ntemplate<typename Packet>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS\nEIGEN_UNUSED\nPacket psqrt_complex(const Packet& a);\n\ntemplate <typename Packet, int N> struct ppolevl;\n\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_FWD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/Half.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n//\n// The conversion routines are Copyright (c) Fabian Giesen, 2016.\n// The original license follows:\n//\n// Copyright (c) Fabian Giesen, 2016\n// All rights reserved.\n// Redistribution and use in source and binary forms, with or without\n// modification, are permitted.\n// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n// \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n\n// Standard 16-bit float type, mostly useful for GPUs. Defines a new\n// type Eigen::half (inheriting either from CUDA's or HIP's __half struct) with\n// operator overloads such that it behaves basically as an arithmetic\n// type. It will be quite slow on CPUs (so it is recommended to stay\n// in fp32 for CPUs, except for simple parameter conversions, I/O\n// to disk and the likes), but fast on GPUs.\n\n\n#ifndef EIGEN_HALF_H\n#define EIGEN_HALF_H\n\n#include <sstream>\n\n#if defined(EIGEN_HAS_GPU_FP16) || defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n// When compiling with GPU support, the \"__half_raw\" base class as well as\n// some other routines are defined in the GPU compiler header files\n// (cuda_fp16.h, hip_fp16.h), and they are not tagged constexpr\n// As a consequence, we get compile failures when compiling Eigen with\n// GPU support. Hence the need to disable EIGEN_CONSTEXPR when building\n// Eigen with GPU support\n  #pragma push_macro(\"EIGEN_CONSTEXPR\")\n  #undef EIGEN_CONSTEXPR\n  #define EIGEN_CONSTEXPR\n#endif\n\n#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)           \\\n  template <>                                                       \\\n  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED                \\\n  PACKET_F16 METHOD<PACKET_F16>(const PACKET_F16& _x) {             \\\n    return float2half(METHOD<PACKET_F>(half2float(_x)));            \\\n  }\n\nnamespace Eigen {\n\nstruct half;\n\nnamespace half_impl {\n\n// We want to use the __half_raw struct from the HIP header file only during the device compile phase.\n// This is required because of a quirk in the way TensorFlow GPU builds are done.\n// When compiling TensorFlow source code with GPU support, files that\n//  * contain GPU kernels (i.e. *.cu.cc files) are compiled via hipcc\n//  * do not contain GPU kernels ( i.e. *.cc files) are compiled via gcc (typically)\n//\n// Tensorflow uses the Eigen::half type as its FP16 type, and there are functions that\n//  * are defined in a file that gets compiled via hipcc AND\n//  * have Eigen::half as a pass-by-value argument AND\n//  * are called in a file that gets compiled via gcc\n//\n// In the scenario described above the caller and callee will see different versions\n// of the Eigen::half base class __half_raw, and they will be compiled by different compilers\n//\n// There appears to be an ABI mismatch between gcc and clang (which is called by hipcc) that results in\n// the callee getting corrupted values for the Eigen::half argument.\n//\n// Making the host side compile phase of hipcc use the same Eigen::half impl, as the gcc compile, resolves\n// this error, and hence the following convoluted #if condition\n#if !defined(EIGEN_HAS_GPU_FP16) || !defined(EIGEN_GPU_COMPILE_PHASE)\n// Make our own __half_raw definition that is similar to CUDA's.\nstruct __half_raw {\n#if (defined(EIGEN_HAS_GPU_FP16) && !defined(EIGEN_GPU_COMPILE_PHASE))\n  // Eigen::half can be used as the datatype for shared memory declarations (in Eigen and TF)\n  // The element type for shared memory cannot have non-trivial constructors\n  // and hence the following special casing (which skips the zero-initilization).\n  // Note that this check gets done even in the host compilation phase, and\n  // hence the need for this\n  EIGEN_DEVICE_FUNC __half_raw() {}\n#else\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw() : x(0) {}\n#endif\n#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(numext::uint16_t raw) : x(numext::bit_cast<__fp16>(raw)) {\n  }\n  __fp16 x;\n#else\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(numext::uint16_t raw) : x(raw) {}\n  numext::uint16_t x;\n#endif\n};\n\n#elif defined(EIGEN_HAS_HIP_FP16)\n  // Nothing to do here\n  // HIP fp16 header file has a definition for __half_raw\n#elif defined(EIGEN_HAS_CUDA_FP16)\n  #if EIGEN_CUDA_SDK_VER < 90000\n    // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw\n    typedef __half __half_raw;\n  #endif // defined(EIGEN_HAS_CUDA_FP16)\n#elif defined(SYCL_DEVICE_ONLY)\n  typedef cl::sycl::half __half_raw;\n#endif\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x);\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);\n\nstruct half_base : public __half_raw {\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base() {}\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half_raw& h) : __half_raw(h) {}\n\n#if defined(EIGEN_HAS_GPU_FP16)\n #if defined(EIGEN_HAS_HIP_FP16)\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) { x = __half_as_ushort(h); }\n #elif defined(EIGEN_HAS_CUDA_FP16)\n  #if EIGEN_CUDA_SDK_VER >= 90000\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}\n  #endif\n #endif\n#endif\n};\n\n} // namespace half_impl\n\n// Class definition.\nstruct half : public half_impl::half_base {\n\n  // Writing this out as separate #if-else blocks to make the code easier to follow\n  // The same applies to most #if-else blocks in this file\n#if !defined(EIGEN_HAS_GPU_FP16) || !defined(EIGEN_GPU_COMPILE_PHASE)\n  // Use the same base class for the following two scenarios\n  // * when compiling without GPU support enabled\n  // * during host compile phase when compiling with GPU support enabled\n  typedef half_impl::__half_raw __half_raw;\n#elif defined(EIGEN_HAS_HIP_FP16)\n  // Nothing to do here\n  // HIP fp16 header file has a definition for __half_raw\n#elif defined(EIGEN_HAS_CUDA_FP16)\n  // Note that EIGEN_CUDA_SDK_VER is set to 0 even when compiling with HIP, so\n  // (EIGEN_CUDA_SDK_VER < 90000) is true even for HIP!  So keeping this within\n  // #if defined(EIGEN_HAS_CUDA_FP16) is needed\n  #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000\n    typedef half_impl::__half_raw __half_raw;\n  #endif\n#endif\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half() {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half_raw& h) : half_impl::half_base(h) {}\n\n#if defined(EIGEN_HAS_GPU_FP16)\n #if defined(EIGEN_HAS_HIP_FP16)\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}\n #elif defined(EIGEN_HAS_CUDA_FP16)\n  #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}\n  #endif\n #endif\n#endif\n\n\n  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(bool b)\n      : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}\n  template<class T>\n  explicit EIGEN_DEVICE_FUNC half(T val)\n      : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}\n  explicit EIGEN_DEVICE_FUNC half(float f)\n      : half_impl::half_base(half_impl::float_to_half_rtne(f)) {}\n\n  // Following the convention of numpy, converting between complex and\n  // float will lead to loss of imag value.\n  template<typename RealScalar>\n  explicit EIGEN_DEVICE_FUNC half(std::complex<RealScalar> c)\n      : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(c.real()))) {}\n\n   EIGEN_DEVICE_FUNC operator float() const {  // NOLINT: Allow implicit conversion to float, because it is lossless.\n    return half_impl::half_to_float(*this);\n  }\n\n#if defined(EIGEN_HAS_GPU_FP16) && !defined(EIGEN_GPU_COMPILE_PHASE)\n  EIGEN_DEVICE_FUNC operator __half() const {\n    ::__half_raw hr;\n    hr.x = x;\n    return __half(hr);\n  }\n#endif\n};\n\n} // end namespace Eigen\n\nnamespace std {\ntemplate<>\nstruct numeric_limits<Eigen::half> {\n  static const bool is_specialized = true;\n  static const bool is_signed = true;\n  static const bool is_integer = false;\n  static const bool is_exact = false;\n  static const bool has_infinity = true;\n  static const bool has_quiet_NaN = true;\n  static const bool has_signaling_NaN = true;\n  static const float_denorm_style has_denorm = denorm_present;\n  static const bool has_denorm_loss = false;\n  static const std::float_round_style round_style = std::round_to_nearest;\n  static const bool is_iec559 = false;\n  static const bool is_bounded = false;\n  static const bool is_modulo = false;\n  static const int digits = 11;\n  static const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html\n  static const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html\n  static const int radix = 2;\n  static const int min_exponent = -13;\n  static const int min_exponent10 = -4;\n  static const int max_exponent = 16;\n  static const int max_exponent10 = 4;\n  static const bool traps = true;\n  static const bool tinyness_before = false;\n\n  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }\n  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }\n  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }\n  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }\n  static Eigen::half round_error() { return Eigen::half(0.5); }\n  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }\n  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }\n  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); }\n  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }\n};\n\n// If std::numeric_limits<T> is specialized, should also specialize\n// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and\n// std::numeric_limits<const volatile T>\n// https://stackoverflow.com/a/16519653/\ntemplate<>\nstruct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};\ntemplate<>\nstruct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};\ntemplate<>\nstruct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};\n} // end namespace std\n\nnamespace Eigen {\n\nnamespace half_impl {\n\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && \\\n     EIGEN_CUDA_ARCH >= 530) ||                                  \\\n    (defined(EIGEN_HAS_HIP_FP16) && defined(HIP_DEVICE_COMPILE))\n// Note: We deliberatly do *not* define this to 1 even if we have Arm's native\n// fp16 type since GPU halfs are rather different from native CPU halfs.\n// TODO: Rename to something like EIGEN_HAS_NATIVE_GPU_FP16\n#define EIGEN_HAS_NATIVE_FP16\n#endif\n\n// Intrinsics for native fp16 support. Note that on current hardware,\n// these are no faster than fp32 arithmetic (you need to use the half2\n// versions to get the ALU speed increased), but you do save the\n// conversion steps back and forth.\n\n#if defined(EIGEN_HAS_NATIVE_FP16)\nEIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {\n#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000\n  return __hadd(::__half(a), ::__half(b));\n#else\n  return __hadd(a, b);\n#endif\n}\nEIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {\n  return __hmul(a, b);\n}\nEIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {\n  return __hsub(a, b);\n}\nEIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {\n#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000\n  return __hdiv(a, b);\n#else\n  float num = __half2float(a);\n  float denom = __half2float(b);\n  return __float2half(num / denom);\n#endif\n}\nEIGEN_STRONG_INLINE __device__ half operator - (const half& a) {\n  return __hneg(a);\n}\nEIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {\n  a = a + b;\n  return a;\n}\nEIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {\n  a = a * b;\n  return a;\n}\nEIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {\n  a = a - b;\n  return a;\n}\nEIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {\n  a = a / b;\n  return a;\n}\nEIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {\n  return __heq(a, b);\n}\nEIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {\n  return __hne(a, b);\n}\nEIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {\n  return __hlt(a, b);\n}\nEIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {\n  return __hle(a, b);\n}\nEIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {\n  return __hgt(a, b);\n}\nEIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {\n  return __hge(a, b);\n}\n#endif\n\n#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {\n  return half(vaddh_f16(a.x, b.x));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {\n  return half(vmulh_f16(a.x, b.x));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {\n  return half(vsubh_f16(a.x, b.x));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {\n  return half(vdivh_f16(a.x, b.x));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {\n  return half(vnegh_f16(a.x));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {\n  a = half(vaddh_f16(a.x, b.x));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {\n  a = half(vmulh_f16(a.x, b.x));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {\n  a = half(vsubh_f16(a.x, b.x));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {\n  a = half(vdivh_f16(a.x, b.x));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {\n  return vceqh_f16(a.x, b.x);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {\n  return !vceqh_f16(a.x, b.x);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {\n  return vclth_f16(a.x, b.x);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {\n  return vcleh_f16(a.x, b.x);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {\n  return vcgth_f16(a.x, b.x);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {\n  return vcgeh_f16(a.x, b.x);\n}\n// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,\n// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation\n// of the functions, while the latter can only deal with one of them.\n#elif !defined(EIGEN_HAS_NATIVE_FP16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for half floats\n\n#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)\n// We need to provide emulated *host-side* FP16 operators for clang.\n#pragma push_macro(\"EIGEN_DEVICE_FUNC\")\n#undef EIGEN_DEVICE_FUNC\n#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_HAS_NATIVE_FP16)\n#define EIGEN_DEVICE_FUNC __host__\n#else // both host and device need emulated ops.\n#define EIGEN_DEVICE_FUNC __host__ __device__\n#endif\n#endif\n\n// Definitions for CPUs and older HIP+CUDA, mostly working through conversion\n// to/from fp32.\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {\n  return half(float(a) + float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {\n  return half(float(a) * float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {\n  return half(float(a) - float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {\n  return half(float(a) / float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {\n  half result;\n  result.x = a.x ^ 0x8000;\n  return result;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {\n  a = half(float(a) + float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {\n  a = half(float(a) * float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {\n  a = half(float(a) - float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {\n  a = half(float(a) / float(b));\n  return a;\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {\n  return numext::equal_strict(float(a),float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {\n  return numext::not_equal_strict(float(a), float(b));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {\n  return float(a) < float(b);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {\n  return float(a) <= float(b);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {\n  return float(a) > float(b);\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {\n  return float(a) >= float(b);\n}\n\n#if defined(__clang__) && defined(__CUDA__)\n#pragma pop_macro(\"EIGEN_DEVICE_FUNC\")\n#endif\n#endif  // Emulate support for half floats\n\n// Division by an index. Do it in full float precision to avoid accuracy\n// issues in converting the denominator to half.\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {\n  return half(static_cast<float>(a) / static_cast<float>(b));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator++(half& a) {\n  a += half(1);\n  return a;\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator--(half& a) {\n  a -= half(1);\n  return a;\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator++(half& a, int) {\n  half original_value = a;\n  ++a;\n  return original_value;\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator--(half& a, int) {\n  half original_value = a;\n  --a;\n  return original_value;\n}\n\n// Conversion routines, including fallbacks for the host or older CUDA.\n// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of\n// these in hardware. If we need more performance on older/other CPUs, they are\n// also possible to vectorize directly.\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x) {\n  // We cannot simply do a \"return __half_raw(x)\" here, because __half_raw is union type\n  // in the hip_fp16 header file, and that will trigger a compile error\n  // On the other hand, having anything but a return statement also triggers a compile error\n  // because this is constexpr function.\n  // Fortunately, since we need to disable EIGEN_CONSTEXPR for GPU anyway, we can get out\n  // of this catch22 by having separate bodies for GPU / non GPU\n#if defined(EIGEN_HAS_GPU_FP16)\n   __half_raw h;\n   h.x = x;\n  return h;\n#else\n  return __half_raw(x);\n#endif\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC numext::uint16_t raw_half_as_uint16(const __half_raw& h) {\n  // HIP/CUDA/Default have a member 'x' of type uint16_t.\n  // For ARM64 native half, the member 'x' is of type __fp16, so we need to bit-cast.\n  // For SYCL, cl::sycl::half is _Float16, so cast directly.\n#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  return numext::bit_cast<numext::uint16_t>(h.x);\n#elif defined(SYCL_DEVICE_ONLY)\n  return numext::bit_cast<numext::uint16_t>(h);\n#else\n  return h.x;\n#endif\n}\n\nunion float32_bits {\n  unsigned int u;\n  float f;\n};\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n  __half tmp_ff = __float2half(ff);\n  return *(__half_raw*)&tmp_ff;\n\n#elif defined(EIGEN_HAS_FP16_C)\n  __half_raw h;\n  h.x = _cvtss_sh(ff, 0);\n  return h;\n\n#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  __half_raw h;\n  h.x = static_cast<__fp16>(ff);\n  return h;\n\n#else\n  float32_bits f; f.f = ff;\n\n  const float32_bits f32infty = { 255 << 23 };\n  const float32_bits f16max = { (127 + 16) << 23 };\n  const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };\n  unsigned int sign_mask = 0x80000000u;\n  __half_raw o;\n  o.x = static_cast<numext::uint16_t>(0x0u);\n\n  unsigned int sign = f.u & sign_mask;\n  f.u ^= sign;\n\n  // NOTE all the integer compares in this function can be safely\n  // compiled into signed compares since all operands are below\n  // 0x80000000. Important if you want fast straight SSE2 code\n  // (since there's no unsigned PCMPGTD).\n\n  if (f.u >= f16max.u) {  // result is Inf or NaN (all exponent bits set)\n    o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf\n  } else {  // (De)normalized number or zero\n    if (f.u < (113 << 23)) {  // resulting FP16 is subnormal or zero\n      // use a magic value to align our 10 mantissa bits at the bottom of\n      // the float. as long as FP addition is round-to-nearest-even this\n      // just works.\n      f.f += denorm_magic.f;\n\n      // and one integer subtract of the bias later, we have our final float!\n      o.x = static_cast<numext::uint16_t>(f.u - denorm_magic.u);\n    } else {\n      unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd\n\n      // update exponent, rounding bias part 1\n      // Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but\n      // without arithmetic overflow.\n      f.u += 0xc8000fffU;\n      // rounding bias part 2\n      f.u += mant_odd;\n      // take the bits!\n      o.x = static_cast<numext::uint16_t>(f.u >> 13);\n    }\n  }\n\n  o.x |= static_cast<numext::uint16_t>(sign >> 16);\n  return o;\n#endif\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n  return __half2float(h);\n#elif defined(EIGEN_HAS_FP16_C)\n  return _cvtsh_ss(h.x);\n#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  return static_cast<float>(h.x);\n#else\n  const float32_bits magic = { 113 << 23 };\n  const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift\n  float32_bits o;\n\n  o.u = (h.x & 0x7fff) << 13;             // exponent/mantissa bits\n  unsigned int exp = shifted_exp & o.u;   // just the exponent\n  o.u += (127 - 15) << 23;                // exponent adjust\n\n  // handle exponent special cases\n  if (exp == shifted_exp) {     // Inf/NaN?\n    o.u += (128 - 16) << 23;    // extra exp adjust\n  } else if (exp == 0) {        // Zero/Denormal?\n    o.u += 1 << 23;             // extra exp adjust\n    o.f -= magic.f;             // renormalize\n  }\n\n  o.u |= (h.x & 0x8000) << 16;    // sign bit\n  return o.f;\n#endif\n}\n\n// --- standard functions ---\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {\n#ifdef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC\n  return (numext::bit_cast<numext::uint16_t>(a.x) & 0x7fff) == 0x7c00;\n#else\n  return (a.x & 0x7fff) == 0x7c00;\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n  return __hisnan(a);\n#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  return (numext::bit_cast<numext::uint16_t>(a.x) & 0x7fff) > 0x7c00;\n#else\n  return (a.x & 0x7fff) > 0x7c00;\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {\n  return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {\n#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  return half(vabsh_f16(a.x));\n#else\n  half result;\n  result.x = a.x & 0x7FFF;\n  return result;\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {\n#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \\\n  defined(EIGEN_HIP_DEVICE_COMPILE)\n  return half(hexp(a));\n#else\n   return half(::expf(float(a)));\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {\n  return half(numext::expm1(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {\n#if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n  return half(::hlog(a));\n#else\n  return half(::logf(float(a)));\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {\n  return half(numext::log1p(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {\n  return half(::log10f(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log2(const half& a) {\n  return half(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {\n#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \\\n  defined(EIGEN_HIP_DEVICE_COMPILE)\n  return half(hsqrt(a));\n#else\n    return half(::sqrtf(float(a)));\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {\n  return half(::powf(float(a), float(b)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {\n  return half(::sinf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {\n  return half(::cosf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {\n  return half(::tanf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {\n  return half(::tanhf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) {\n  return half(::asinf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {\n  return half(::acosf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {\n#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \\\n  defined(EIGEN_HIP_DEVICE_COMPILE)\n  return half(hfloor(a));\n#else\n  return half(::floorf(float(a)));\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {\n#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \\\n  defined(EIGEN_HIP_DEVICE_COMPILE)\n  return half(hceil(a));\n#else\n  return half(::ceilf(float(a)));\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) {\n  return half(::rintf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half round(const half& a) {\n  return half(::roundf(float(a)));\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half fmod(const half& a, const half& b) {\n  return half(::fmodf(float(a), float(b)));\n}\n\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n  return __hlt(b, a) ? b : a;\n#else\n  const float f1 = static_cast<float>(a);\n  const float f2 = static_cast<float>(b);\n  return f2 < f1 ? b : a;\n#endif\n}\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n  return __hlt(a, b) ? b : a;\n#else\n  const float f1 = static_cast<float>(a);\n  const float f2 = static_cast<float>(b);\n  return f1 < f2 ? b : a;\n#endif\n}\n\n#ifndef EIGEN_NO_IO\nEIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {\n  os << static_cast<float>(v);\n  return os;\n}\n#endif\n\n} // end namespace half_impl\n\n// import Eigen::half_impl::half into Eigen namespace\n// using half_impl::half;\n\nnamespace internal {\n\ntemplate<>\nstruct random_default_impl<half, false, false>\n{\n  static inline half run(const half& x, const half& y)\n  {\n    return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));\n  }\n  static inline half run()\n  {\n    return run(half(-1.f), half(1.f));\n  }\n};\n\ntemplate<> struct is_arithmetic<half> { enum { value = true }; };\n\n} // end namespace internal\n\ntemplate<> struct NumTraits<Eigen::half>\n    : GenericNumTraits<Eigen::half>\n{\n  enum {\n    IsSigned = true,\n    IsInteger = false,\n    IsComplex = false,\n    RequireInitialization = false\n  };\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half epsilon() {\n    return half_impl::raw_uint16_to_half(0x0800);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half dummy_precision() {\n    return half_impl::raw_uint16_to_half(0x211f); //  Eigen::half(1e-2f);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half highest() {\n    return half_impl::raw_uint16_to_half(0x7bff);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half lowest() {\n    return half_impl::raw_uint16_to_half(0xfbff);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half infinity() {\n    return half_impl::raw_uint16_to_half(0x7c00);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {\n    return half_impl::raw_uint16_to_half(0x7e00);\n  }\n};\n\n} // end namespace Eigen\n\n#if defined(EIGEN_HAS_GPU_FP16) || defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  #pragma pop_macro(\"EIGEN_CONSTEXPR\")\n#endif\n\nnamespace Eigen {\nnamespace numext {\n\n#if defined(EIGEN_GPU_COMPILE_PHASE)\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::half& h) {\n  return (half_impl::isnan)(h);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isinf)(const Eigen::half& h) {\n  return (half_impl::isinf)(h);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::half& h) {\n  return (half_impl::isfinite)(h);\n}\n\n#endif\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bit_cast<Eigen::half, uint16_t>(const uint16_t& src) {\n  return Eigen::half(Eigen::half_impl::raw_uint16_to_half(src));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::half>(const Eigen::half& src) {\n  return Eigen::half_impl::raw_half_as_uint16(src);\n}\n\n}  // namespace numext\n}  // namespace Eigen\n\n// Add the missing shfl* intrinsics.\n// The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.\n//   CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))\n//\n// HIP and CUDA prior to SDK 9.0 define\n//    __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float\n// CUDA since 9.0 deprecates those and instead defines\n//    __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,\n//    with native support for __half and __nv_bfloat16\n//\n// Note that the following are __device__ - only functions.\n#if (defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 300)) \\\n    || defined(EIGEN_HIPCC)\n\n#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane, int width=warpSize) {\n  const __half h = var;\n  return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));\n}\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {\n  const __half h = var;\n  return static_cast<Eigen::half>(__shfl_up_sync(mask, h, delta, width));\n}\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {\n  const __half h = var;\n  return static_cast<Eigen::half>(__shfl_down_sync(mask, h, delta, width));\n}\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor_sync(unsigned mask, Eigen::half var, int laneMask, int width=warpSize) {\n  const __half h = var;\n  return static_cast<Eigen::half>(__shfl_xor_sync(mask, h, laneMask, width));\n}\n\n#else // HIP or CUDA SDK < 9.0\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl(Eigen::half var, int srcLane, int width=warpSize) {\n  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));\n  return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));\n}\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up(Eigen::half var, unsigned int delta, int width=warpSize) {\n  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));\n  return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));\n}\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down(Eigen::half var, unsigned int delta, int width=warpSize) {\n  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));\n  return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));\n}\n\n__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {\n  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));\n  return Eigen::numext::bit_cast<Eigen::half>(static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));\n}\n\n#endif // HIP vs CUDA\n#endif // __shfl*\n\n// ldg() has an overload for __half_raw, but we also need one for Eigen::half.\n#if (defined(EIGEN_CUDACC) && (!defined(EIGEN_CUDA_ARCH) || EIGEN_CUDA_ARCH >= 350)) \\\n    || defined(EIGEN_HIPCC)\nEIGEN_STRONG_INLINE __device__ Eigen::half __ldg(const Eigen::half* ptr) {\n  return Eigen::half_impl::raw_uint16_to_half(__ldg(reinterpret_cast<const Eigen::numext::uint16_t*>(ptr)));\n}\n#endif // __ldg\n\n#if EIGEN_HAS_STD_HASH\nnamespace std {\ntemplate <>\nstruct hash<Eigen::half> {\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {\n    return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));\n  }\n};\n} // end namespace std\n#endif\n\n#endif // EIGEN_HALF_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/Settings.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n/* All the parameters defined in this file can be specialized in the\n * architecture specific files, and/or by the user.\n * More to come... */\n\n#ifndef EIGEN_DEFAULT_SETTINGS_H\n#define EIGEN_DEFAULT_SETTINGS_H\n\n/** Defines the maximal loop size to enable meta unrolling of loops.\n  * Note that the value here is expressed in Eigen's own notion of \"number of FLOPS\",\n  * it does not correspond to the number of iterations or the number of instructions\n  */\n#ifndef EIGEN_UNROLLING_LIMIT\n#define EIGEN_UNROLLING_LIMIT 110\n#endif\n\n/** Defines the threshold between a \"small\" and a \"large\" matrix.\n  * This threshold is mainly used to select the proper product implementation.\n  */\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n/** Defines the maximal width of the blocks used in the triangular product and solver\n  * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.\n  */\n#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH\n#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8\n#endif\n\n\n/** Defines the default number of registers available for that architecture.\n  * Currently it must be 8 or 16. Other values will fail.\n  */\n#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8\n#endif\n\n#endif // EIGEN_DEFAULT_SETTINGS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/Default/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>\n// Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERIC_TYPE_CASTING_H\n#define EIGEN_GENERIC_TYPE_CASTING_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<>\nstruct scalar_cast_op<float, Eigen::half> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef Eigen::half result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {\n    #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \\\n      (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n      return __float2half(a);\n    #else\n      return Eigen::half(a);\n    #endif\n  }\n};\n\ntemplate<>\nstruct functor_traits<scalar_cast_op<float, Eigen::half> >\n{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };\n\n\ntemplate<>\nstruct scalar_cast_op<int, Eigen::half> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef Eigen::half result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {\n    #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \\\n      (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n      return __float2half(static_cast<float>(a));\n    #else\n      return Eigen::half(static_cast<float>(a));\n    #endif\n  }\n};\n\ntemplate<>\nstruct functor_traits<scalar_cast_op<int, Eigen::half> >\n{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };\n\n\ntemplate<>\nstruct scalar_cast_op<Eigen::half, float> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef float result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {\n    #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \\\n      (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n      return __half2float(a);\n    #else\n      return static_cast<float>(a);\n    #endif\n  }\n};\n\ntemplate<>\nstruct functor_traits<scalar_cast_op<Eigen::half, float> >\n{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };\n\n\ntemplate<>\nstruct scalar_cast_op<float, Eigen::bfloat16> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef Eigen::bfloat16 result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const float& a) const {\n    return Eigen::bfloat16(a);\n  }\n};\n\ntemplate<>\nstruct functor_traits<scalar_cast_op<float, Eigen::bfloat16> >\n{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };\n\n\ntemplate<>\nstruct scalar_cast_op<int, Eigen::bfloat16> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef Eigen::bfloat16 result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const int& a) const {\n    return Eigen::bfloat16(static_cast<float>(a));\n  }\n};\n\ntemplate<>\nstruct functor_traits<scalar_cast_op<int, Eigen::bfloat16> >\n{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };\n\n\ntemplate<>\nstruct scalar_cast_op<Eigen::bfloat16, float> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef float result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::bfloat16& a) const {\n    return static_cast<float>(a);\n  }\n};\n\ntemplate<>\nstruct functor_traits<scalar_cast_op<Eigen::bfloat16, float> >\n{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };\n\n\n}\n}\n\n#endif  // EIGEN_GENERIC_TYPE_CASTING_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/GPU/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATH_FUNCTIONS_GPU_H\n#define EIGEN_MATH_FUNCTIONS_GPU_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// Make sure this is only available when targeting a GPU: we don't want to\n// introduce conflicts between these packet_traits definitions and the ones\n// we'll use on the host side (SSE, AVX, ...)\n#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nfloat4 plog<float4>(const float4& a)\n{\n  return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));\n}\n\ntemplate<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ndouble2 plog<double2>(const double2& a)\n{\n  using ::log;\n  return make_double2(log(a.x), log(a.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nfloat4 plog1p<float4>(const float4& a)\n{\n  return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));\n}\n\ntemplate<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ndouble2 plog1p<double2>(const double2& a)\n{\n  return make_double2(log1p(a.x), log1p(a.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nfloat4 pexp<float4>(const float4& a)\n{\n  return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ndouble2 pexp<double2>(const double2& a)\n{\n  using ::exp;\n  return make_double2(exp(a.x), exp(a.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nfloat4 pexpm1<float4>(const float4& a)\n{\n  return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ndouble2 pexpm1<double2>(const double2& a)\n{\n  return make_double2(expm1(a.x), expm1(a.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nfloat4 psqrt<float4>(const float4& a)\n{\n  return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ndouble2 psqrt<double2>(const double2& a)\n{\n  using ::sqrt;\n  return make_double2(sqrt(a.x), sqrt(a.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nfloat4 prsqrt<float4>(const float4& a)\n{\n  return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ndouble2 prsqrt<double2>(const double2& a)\n{\n  return make_double2(rsqrt(a.x), rsqrt(a.y));\n}\n\n\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATH_FUNCTIONS_GPU_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/GPU/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_GPU_H\n#define EIGEN_PACKET_MATH_GPU_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// Read-only data cached load available.\n#if defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350)\n#define EIGEN_GPU_HAS_LDG 1\n#endif\n\n// FP16 math available.\n#if (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530)\n#define EIGEN_CUDA_HAS_FP16_ARITHMETIC 1\n#endif\n\n#if defined(EIGEN_HIP_DEVICE_COMPILE) || defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)\n#define EIGEN_GPU_HAS_FP16_ARITHMETIC 1\n#endif\n\n// Make sure this is only available when targeting a GPU: we don't want to\n// introduce conflicts between these packet_traits definitions and the ones\n// we'll use on the host side (SSE, AVX, ...)\n#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)\n\ntemplate<> struct is_arithmetic<float4>  { enum { value = true }; };\ntemplate<> struct is_arithmetic<double2> { enum { value = true }; };\n\ntemplate<> struct packet_traits<float> : default_packet_traits\n{\n  typedef float4 type;\n  typedef float4 half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=4,\n    HasHalfPacket = 0,\n\n    HasDiv  = 1,\n    HasSin  = 0,\n    HasCos  = 0,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasLGamma = 1,\n    HasDiGamma = 1,\n    HasZeta = 1,\n    HasPolygamma = 1,\n    HasErf = 1,\n    HasErfc = 1,\n    HasNdtri = 1,\n    HasBessel = 1,\n    HasIGamma = 1,\n    HasIGammaDerA = 1,\n    HasGammaSampleDerAlpha = 1,\n    HasIGammac = 1,\n    HasBetaInc = 1,\n\n    HasBlend = 0,\n    HasFloor = 1,\n  };\n};\n\ntemplate<> struct packet_traits<double> : default_packet_traits\n{\n  typedef double2 type;\n  typedef double2 half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=2,\n    HasHalfPacket = 0,\n\n    HasDiv  = 1,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasLGamma = 1,\n    HasDiGamma = 1,\n    HasZeta = 1,\n    HasPolygamma = 1,\n    HasErf = 1,\n    HasErfc = 1,\n    HasNdtri = 1,\n    HasBessel = 1,\n    HasIGamma = 1,\n    HasIGammaDerA = 1,\n    HasGammaSampleDerAlpha = 1,\n    HasIGammac = 1,\n    HasBetaInc = 1,\n\n    HasBlend = 0,\n    HasFloor = 1,\n  };\n};\n\n\ntemplate<> struct unpacket_traits<float4>  { typedef float  type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef float4 half; };\ntemplate<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef double2 half; };\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float&  from) {\n  return make_float4(from, from, from, from);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {\n  return make_double2(from, from);\n}\n\n// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,\n// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation\n// of the functions, while the latter can only deal with one of them.\n#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)\nnamespace {\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,\n                                                        const float& b) {\n  return __int_as_float(__float_as_int(a) & __float_as_int(b));\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,\n                                                         const double& b) {\n  return __longlong_as_double(__double_as_longlong(a) &\n                              __double_as_longlong(b));\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,\n                                                       const float& b) {\n  return __int_as_float(__float_as_int(a) | __float_as_int(b));\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,\n                                                        const double& b) {\n  return __longlong_as_double(__double_as_longlong(a) |\n                              __double_as_longlong(b));\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,\n                                                        const float& b) {\n  return __int_as_float(__float_as_int(a) ^ __float_as_int(b));\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,\n                                                         const double& b) {\n  return __longlong_as_double(__double_as_longlong(a) ^\n                              __double_as_longlong(b));\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,\n                                                           const float& b) {\n  return __int_as_float(__float_as_int(a) & ~__float_as_int(b));\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,\n                                                            const double& b) {\n  return __longlong_as_double(__double_as_longlong(a) &\n                              ~__double_as_longlong(b));\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,\n                                                    const float& b) {\n  return __int_as_float(a == b ? 0xffffffffu : 0u);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,\n                                                     const double& b) {\n  return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a,\n                                                    const float& b) {\n  return __int_as_float(a < b ? 0xffffffffu : 0u);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,\n                                                     const double& b) {\n  return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);\n}\n\n}  // namespace\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,\n                                                          const float4& b) {\n  return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),\n                     bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,\n                                                            const double2& b) {\n  return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,\n                                                         const float4& b) {\n  return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),\n                     bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,\n                                                           const double2& b) {\n  return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,\n                                                          const float4& b) {\n  return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),\n                     bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,\n                                                            const double2& b) {\n  return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,\n                                                             const float4& b) {\n  return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),\n                     bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2\npandnot<double2>(const double2& a, const double2& b) {\n  return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,\n                                                             const float4& b) {\n  return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),\n                     eq_mask(a.w, b.w));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a,\n                                                             const float4& b) {\n  return make_float4(lt_mask(a.x, b.x), lt_mask(a.y, b.y), lt_mask(a.z, b.z),\n                     lt_mask(a.w, b.w));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2\npcmp_eq<double2>(const double2& a, const double2& b) {\n  return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2\npcmp_lt<double2>(const double2& a, const double2& b) {\n  return make_double2(lt_mask(a.x, b.x), lt_mask(a.y, b.y));\n}\n#endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {\n  return make_float4(a, a+1, a+2, a+3);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {\n  return make_double2(a, a+1);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {\n  return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {\n  return make_double2(a.x+b.x, a.y+b.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {\n  return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {\n  return make_double2(a.x-b.x, a.y-b.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {\n  return make_float4(-a.x, -a.y, -a.z, -a.w);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {\n  return make_double2(-a.x, -a.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {\n  return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {\n  return make_double2(a.x*b.x, a.y*b.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {\n  return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {\n  return make_double2(a.x/b.x, a.y/b.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {\n  return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {\n  return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {\n  return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {\n  return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {\n  return *reinterpret_cast<const float4*>(from);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {\n  return *reinterpret_cast<const double2*>(from);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {\n  return make_float4(from[0], from[1], from[2], from[3]);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {\n  return make_double2(from[0], from[1]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float*   from) {\n  return make_float4(from[0], from[0], from[1], from[1]);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double*  from) {\n  return make_double2(from[0], from[0]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float*   to, const float4& from) {\n  *reinterpret_cast<float4*>(to) = from;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {\n  *reinterpret_cast<double2*>(to) = from;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const float4& from) {\n  to[0] = from.x;\n  to[1] = from.y;\n  to[2] = from.z;\n  to[3] = from.w;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {\n  to[0] = from.x;\n  to[1] = from.y;\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  return __ldg((const float4*)from);\n#else\n  return make_float4(from[0], from[1], from[2], from[3]);\n#endif\n}\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  return __ldg((const double2*)from);\n#else\n  return make_double2(from[0], from[1]);\n#endif\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));\n#else\n  return make_float4(from[0], from[1], from[2], from[3]);\n#endif\n}\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  return make_double2(__ldg(from+0), __ldg(from+1));\n#else\n  return make_double2(from[0], from[1]);\n#endif\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {\n  return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {\n  return make_double2(from[0*stride], from[1*stride]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {\n  to[stride*0] = from.x;\n  to[stride*1] = from.y;\n  to[stride*2] = from.z;\n  to[stride*3] = from.w;\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {\n  to[stride*0] = from.x;\n  to[stride*1] = from.y;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float  pfirst<float4>(const float4& a) {\n  return a.x;\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {\n  return a.x;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float  predux<float4>(const float4& a) {\n  return a.x + a.y + a.z + a.w;\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {\n  return a.x + a.y;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float  predux_max<float4>(const float4& a) {\n  return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {\n  return fmax(a.x, a.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float  predux_min<float4>(const float4& a) {\n  return fminf(fminf(a.x, a.y), fminf(a.z, a.w));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {\n  return fmin(a.x, a.y);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float  predux_mul<float4>(const float4& a) {\n  return a.x * a.y * a.z * a.w;\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {\n  return a.x * a.y;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float4  pabs<float4>(const float4& a) {\n  return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {\n  return make_double2(fabs(a.x), fabs(a.y));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline float4  pfloor<float4>(const float4& a) {\n  return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {\n  return make_double2(floor(a.x), floor(a.y));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<float4,4>& kernel) {\n  float tmp = kernel.packet[0].y;\n  kernel.packet[0].y = kernel.packet[1].x;\n  kernel.packet[1].x = tmp;\n\n  tmp = kernel.packet[0].z;\n  kernel.packet[0].z = kernel.packet[2].x;\n  kernel.packet[2].x = tmp;\n\n  tmp = kernel.packet[0].w;\n  kernel.packet[0].w = kernel.packet[3].x;\n  kernel.packet[3].x = tmp;\n\n  tmp = kernel.packet[1].z;\n  kernel.packet[1].z = kernel.packet[2].y;\n  kernel.packet[2].y = tmp;\n\n  tmp = kernel.packet[1].w;\n  kernel.packet[1].w = kernel.packet[3].y;\n  kernel.packet[3].y = tmp;\n\n  tmp = kernel.packet[2].w;\n  kernel.packet[2].w = kernel.packet[3].z;\n  kernel.packet[3].z = tmp;\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<double2,2>& kernel) {\n  double tmp = kernel.packet[0].y;\n  kernel.packet[0].y = kernel.packet[1].x;\n  kernel.packet[1].x = tmp;\n}\n\n#endif // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)\n\n// Packet4h2 must be defined in the macro without EIGEN_CUDA_ARCH, meaning\n// its corresponding packet_traits<Eigen::half> must be visible on host.\n#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)\n\ntypedef ulonglong2 Packet4h2;\ntemplate<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };\ntemplate<> struct is_arithmetic<Packet4h2> { enum { value = true }; };\n\ntemplate<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef half2 half; };\ntemplate<> struct is_arithmetic<half2> { enum { value = true }; };\n\ntemplate<> struct packet_traits<Eigen::half> : default_packet_traits\n{\n  typedef Packet4h2 type;\n  typedef Packet4h2 half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=8,\n    HasHalfPacket = 0,\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasSqrt   = 1,\n    HasRsqrt  = 1,\n    HasExp    = 1,\n    HasExpm1  = 1,\n    HasLog    = 1,\n    HasLog1p  = 1\n  };\n};\n\nnamespace {\n// This is equivalent to make_half2, which is undocumented and doesn't seem to always exist.\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 combine_half(const __half& a, const __half& b) {\n#if defined(EIGEN_GPU_COMPILE_PHASE)\n  return __halves2half2(a, b);\n#else\n  // Round-about way since __halves2half2 is a __device__ function.\n  return __floats2half2_rn(__half2float(a), __half2float(b));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE __half get_half2_low(const half2& a) {\n#if defined(EIGEN_GPU_COMPILE_PHASE)\n  return __low2half(a);\n#else\n  return __float2half(__low2float(a));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE __half get_half2_high(const half2& a) {\n#if defined(EIGEN_GPU_COMPILE_PHASE)\n  return __high2half(a);\n#else\n  return __float2half(__high2float(a));\n#endif\n}\n} // namespace\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {\n#if defined(EIGEN_GPU_COMPILE_PHASE)\n  return __half2half2(from);\n#else\n  const float f = __half2float(from);\n  return __floats2half2_rn(f, f);\n#endif\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npset1<Packet4h2>(const Eigen::half& from) {\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  p_alias[0] = pset1<half2>(from);\n  p_alias[1] = pset1<half2>(from);\n  p_alias[2] = pset1<half2>(from);\n  p_alias[3] = pset1<half2>(from);\n  return r;\n}\n\n// We now need this visible on both host and device.\n// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)\nnamespace {\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) {\n  return *reinterpret_cast<const half2*>(from);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) {\n  return combine_half(from[0], from[1]);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half*  from) {\n  return combine_half(from[0], from[0]);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,\n                                                  const half2& from) {\n  *reinterpret_cast<half2*>(to) = from;\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,\n                                                   const half2& from) {\n  to[0] = get_half2_low(from);\n  to[1] = get_half2_high(from);\n}\n\n\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(\n    const Eigen::half* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  // Input is guaranteed to be properly aligned.\n  return __ldg(reinterpret_cast<const half2*>(from));\n#else\n  return combine_half(*(from+0), *(from+1));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(\n    const Eigen::half* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  return __halves2half2(__ldg(from+0), __ldg(from+1));\n#else\n  return combine_half(*(from+0), *(from+1));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,\n                                                    Index stride) {\n  return combine_half(from[0*stride], from[1*stride]);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(\n    Eigen::half* to, const half2& from, Index stride) {\n  to[stride*0] = get_half2_low(from);\n  to[stride*1] = get_half2_high(from);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) {\n  return get_half2_low(a);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs(const half2& a) {\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half result1 = half_impl::raw_uint16_to_half(a1.x & 0x7FFF);\n  half result2 = half_impl::raw_uint16_to_half(a2.x & 0x7FFF);\n  return combine_half(result1, result2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ptrue(const half2& /*a*/) {\n  half true_half = half_impl::raw_uint16_to_half(0xffffu);\n  return pset1<half2>(true_half);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pzero(const half2& /*a*/) {\n  half false_half = half_impl::raw_uint16_to_half(0x0000u);\n  return pset1<half2>(false_half);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void\nptranspose(PacketBlock<half2,2>& kernel) {\n  __half a1 = get_half2_low(kernel.packet[0]);\n  __half a2 = get_half2_high(kernel.packet[0]);\n  __half b1 = get_half2_low(kernel.packet[1]);\n  __half b2 = get_half2_high(kernel.packet[1]);\n  kernel.packet[0] = combine_half(a1, b1);\n  kernel.packet[1] = combine_half(a2, b2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __halves2half2(a, __hadd(a, __float2half(1.0f)));\n#else\n  float f = __half2float(a) + 1.0f;\n  return combine_half(a, __float2half(f));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,\n                                                    const half2& a,\n                                                    const half2& b) {\n  half mask_low = get_half2_low(mask);\n  half mask_high = get_half2_high(mask);\n  half result_low = mask_low == half(0) ? get_half2_low(b) : get_half2_low(a);\n  half result_high = mask_high == half(0) ? get_half2_high(b) : get_half2_high(a);\n  return combine_half(result_low, result_high);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a,\n                                                    const half2& b) {\n  half true_half = half_impl::raw_uint16_to_half(0xffffu);\n  half false_half = half_impl::raw_uint16_to_half(0x0000u);\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half b1 = get_half2_low(b);\n  half b2 = get_half2_high(b);\n  half eq1 = __half2float(a1) == __half2float(b1) ? true_half : false_half;\n  half eq2 = __half2float(a2) == __half2float(b2) ? true_half : false_half;\n  return combine_half(eq1, eq2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a,\n                                                    const half2& b) {\n  half true_half = half_impl::raw_uint16_to_half(0xffffu);\n  half false_half = half_impl::raw_uint16_to_half(0x0000u);\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half b1 = get_half2_low(b);\n  half b2 = get_half2_high(b);\n  half eq1 = __half2float(a1) < __half2float(b1) ? true_half : false_half;\n  half eq2 = __half2float(a2) < __half2float(b2) ? true_half : false_half;\n  return combine_half(eq1, eq2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a,\n                                                 const half2& b) {\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half b1 = get_half2_low(b);\n  half b2 = get_half2_high(b);\n  half result1 = half_impl::raw_uint16_to_half(a1.x & b1.x);\n  half result2 = half_impl::raw_uint16_to_half(a2.x & b2.x);\n  return combine_half(result1, result2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a,\n                                                const half2& b) {\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half b1 = get_half2_low(b);\n  half b2 = get_half2_high(b);\n  half result1 = half_impl::raw_uint16_to_half(a1.x | b1.x);\n  half result2 = half_impl::raw_uint16_to_half(a2.x | b2.x);\n  return combine_half(result1, result2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a,\n                                                 const half2& b) {\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half b1 = get_half2_low(b);\n  half b2 = get_half2_high(b);\n  half result1 = half_impl::raw_uint16_to_half(a1.x ^ b1.x);\n  half result2 = half_impl::raw_uint16_to_half(a2.x ^ b2.x);\n  return combine_half(result1, result2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,\n                                                    const half2& b) {\n  half a1 = get_half2_low(a);\n  half a2 = get_half2_high(a);\n  half b1 = get_half2_low(b);\n  half b2 = get_half2_high(b);\n  half result1 = half_impl::raw_uint16_to_half(a1.x & ~b1.x);\n  half result2 = half_impl::raw_uint16_to_half(a2.x & ~b2.x);\n  return combine_half(result1, result2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,\n                                                 const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hadd2(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 + b1;\n  float r2 = a2 + b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,\n                                                 const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hsub2(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 - b1;\n  float r2 = a2 - b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hneg2(a);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  return __floats2half2_rn(-a1, -a2);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,\n                                                 const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hmul2(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 * b1;\n  float r2 = a2 * b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,\n                                                  const half2& b,\n                                                  const half2& c) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n   return __hfma2(a, b, c);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float c1 = __low2float(c);\n  float c2 = __high2float(c);\n  float r1 = a1 * b1 + c1;\n  float r2 = a2 * b2 + c2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,\n                                                 const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __h2div(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 / b1;\n  float r2 = a2 / b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a,\n                                                 const half2& b) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  __half r1 = a1 < b1 ? get_half2_low(a) : get_half2_low(b);\n  __half r2 = a2 < b2 ? get_half2_high(a) : get_half2_high(b);\n  return combine_half(r1, r2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,\n                                                 const half2& b) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  __half r1 = a1 > b1 ? get_half2_low(a) : get_half2_low(b);\n  __half r2 = a2 > b2 ? get_half2_high(a) : get_half2_high(b);\n  return combine_half(r1, r2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hadd(__low2half(a), __high2half(a));\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  return Eigen::half(__float2half(a1 + a2));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  __half first = __low2half(a);\n  __half second = __high2half(a);\n  return __hgt(first, second) ? first : second;\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  return a1 > a2 ? get_half2_low(a) : get_half2_high(a);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  __half first = __low2half(a);\n  __half second = __high2half(a);\n  return __hlt(first, second) ? first : second;\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  return a1 < a2 ? get_half2_low(a) : get_half2_high(a);\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(const half2& a) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hmul(__low2half(a), __high2half(a));\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  return Eigen::half(__float2half(a1 * a2));\n#endif\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float r1 = log1pf(a1);\n  float r2 = log1pf(a2);\n  return __floats2half2_rn(r1, r2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float r1 = expm1f(a1);\n  float r2 = expm1f(a2);\n  return __floats2half2_rn(r1, r2);\n}\n\n#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || \\\n  defined(EIGEN_HIP_DEVICE_COMPILE)\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nhalf2 plog(const half2& a) {\n  return h2log(a);\n}\n\n EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nhalf2 pexp(const half2& a) {\n  return h2exp(a);\n}\n\n EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nhalf2 psqrt(const half2& a) {\n  return h2sqrt(a);\n}\n\n EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nhalf2 prsqrt(const half2& a) {\n  return h2rsqrt(a);\n}\n\n#else\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& a) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float r1 = logf(a1);\n  float r2 = logf(a2);\n  return __floats2half2_rn(r1, r2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float r1 = expf(a1);\n  float r2 = expf(a2);\n  return __floats2half2_rn(r1, r2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt(const half2& a) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float r1 = sqrtf(a1);\n  float r2 = sqrtf(a2);\n  return __floats2half2_rn(r1, r2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt(const half2& a) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float r1 = rsqrtf(a1);\n  float r2 = rsqrtf(a2);\n  return __floats2half2_rn(r1, r2);\n}\n#endif\n} // namespace\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npload<Packet4h2>(const Eigen::half* from) {\n  return *reinterpret_cast<const Packet4h2*>(from);\n}\n\n// unaligned load;\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\nploadu<Packet4h2>(const Eigen::half* from) {\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  p_alias[0] = ploadu(from + 0);\n  p_alias[1] = ploadu(from + 2);\n  p_alias[2] = ploadu(from + 4);\n  p_alias[3] = ploadu(from + 6);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\nploaddup<Packet4h2>(const Eigen::half* from) {\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  p_alias[0] = ploaddup(from + 0);\n  p_alias[1] = ploaddup(from + 1);\n  p_alias[2] = ploaddup(from + 2);\n  p_alias[3] = ploaddup(from + 3);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(\n    Eigen::half* to, const Packet4h2& from) {\n  *reinterpret_cast<Packet4h2*>(to) = from;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(\n    Eigen::half* to, const Packet4h2& from) {\n  const half2* from_alias = reinterpret_cast<const half2*>(&from);\n  pstoreu(to + 0,from_alias[0]);\n  pstoreu(to + 2,from_alias[1]);\n  pstoreu(to + 4,from_alias[2]);\n  pstoreu(to + 6,from_alias[3]);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2\nploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {\n#if defined(EIGEN_GPU_HAS_LDG)\n  Packet4h2 r;\n  r = __ldg(reinterpret_cast<const Packet4h2*>(from));\n  return r;\n#else\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  r_alias[0] = ploadt_ro_aligned(from + 0);\n  r_alias[1] = ploadt_ro_aligned(from + 2);\n  r_alias[2] = ploadt_ro_aligned(from + 4);\n  r_alias[3] = ploadt_ro_aligned(from + 6);\n  return r;\n#endif\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2\nploadt_ro<Packet4h2, Unaligned>(const Eigen::half* from) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  r_alias[0] = ploadt_ro_unaligned(from + 0);\n  r_alias[1] = ploadt_ro_unaligned(from + 2);\n  r_alias[2] = ploadt_ro_unaligned(from + 4);\n  r_alias[3] = ploadt_ro_unaligned(from + 6);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  p_alias[0] = combine_half(from[0 * stride], from[1 * stride]);\n  p_alias[1] = combine_half(from[2 * stride], from[3 * stride]);\n  p_alias[2] = combine_half(from[4 * stride], from[5 * stride]);\n  p_alias[3] = combine_half(from[6 * stride], from[7 * stride]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h2>(\n    Eigen::half* to, const Packet4h2& from, Index stride) {\n  const half2* from_alias = reinterpret_cast<const half2*>(&from);\n  pscatter(to + stride * 0, from_alias[0], stride);\n  pscatter(to + stride * 2, from_alias[1], stride);\n  pscatter(to + stride * 4, from_alias[2], stride);\n  pscatter(to + stride * 6, from_alias[3], stride);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h2>(\n    const Packet4h2& a) {\n  return pfirst(*(reinterpret_cast<const half2*>(&a)));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pabs<Packet4h2>(\n    const Packet4h2& a) {\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  p_alias[0] = pabs(a_alias[0]);\n  p_alias[1] = pabs(a_alias[1]);\n  p_alias[2] = pabs(a_alias[2]);\n  p_alias[3] = pabs(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ptrue<Packet4h2>(\n    const Packet4h2& /*a*/) {\n  half true_half = half_impl::raw_uint16_to_half(0xffffu);\n  return pset1<Packet4h2>(true_half);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pzero<Packet4h2>(const Packet4h2& /*a*/) {\n  half false_half = half_impl::raw_uint16_to_half(0x0000u);\n  return pset1<Packet4h2>(false_half);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_double(\n    double* d_row0, double* d_row1, double* d_row2, double* d_row3,\n    double* d_row4, double* d_row5, double* d_row6, double* d_row7) {\n  double d_tmp;\n  d_tmp = d_row0[1];\n  d_row0[1] = d_row4[0];\n  d_row4[0] = d_tmp;\n\n  d_tmp = d_row1[1];\n  d_row1[1] = d_row5[0];\n  d_row5[0] = d_tmp;\n\n  d_tmp = d_row2[1];\n  d_row2[1] = d_row6[0];\n  d_row6[0] = d_tmp;\n\n  d_tmp = d_row3[1];\n  d_row3[1] = d_row7[0];\n  d_row7[0] = d_tmp;\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(\n    half2* f_row0, half2* f_row1, half2* f_row2, half2* f_row3) {\n  half2 f_tmp;\n  f_tmp = f_row0[1];\n  f_row0[1] = f_row2[0];\n  f_row2[0] = f_tmp;\n\n  f_tmp = f_row1[1];\n  f_row1[1] = f_row3[0];\n  f_row3[0] = f_tmp;\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void\nptranspose_half(half2& f0, half2& f1) {\n  __half a1 = get_half2_low(f0);\n  __half a2 = get_half2_high(f0);\n  __half b1 = get_half2_low(f1);\n  __half b2 = get_half2_high(f1);\n  f0 = combine_half(a1, b1);\n  f1 = combine_half(a2, b2);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet4h2,8>& kernel) {\n  double* d_row0 = reinterpret_cast<double*>(&kernel.packet[0]);\n  double* d_row1 = reinterpret_cast<double*>(&kernel.packet[1]);\n  double* d_row2 = reinterpret_cast<double*>(&kernel.packet[2]);\n  double* d_row3 = reinterpret_cast<double*>(&kernel.packet[3]);\n  double* d_row4 = reinterpret_cast<double*>(&kernel.packet[4]);\n  double* d_row5 = reinterpret_cast<double*>(&kernel.packet[5]);\n  double* d_row6 = reinterpret_cast<double*>(&kernel.packet[6]);\n  double* d_row7 = reinterpret_cast<double*>(&kernel.packet[7]);\n  ptranspose_double(d_row0, d_row1, d_row2, d_row3,\n                    d_row4, d_row5, d_row6, d_row7);\n\n\n  half2* f_row0 = reinterpret_cast<half2*>(d_row0);\n  half2* f_row1 = reinterpret_cast<half2*>(d_row1);\n  half2* f_row2 = reinterpret_cast<half2*>(d_row2);\n  half2* f_row3 = reinterpret_cast<half2*>(d_row3);\n  ptranspose_half2(f_row0, f_row1, f_row2, f_row3);\n  ptranspose_half(f_row0[0], f_row1[0]);\n  ptranspose_half(f_row0[1], f_row1[1]);\n  ptranspose_half(f_row2[0], f_row3[0]);\n  ptranspose_half(f_row2[1], f_row3[1]);\n\n  f_row0 = reinterpret_cast<half2*>(d_row0 + 1);\n  f_row1 = reinterpret_cast<half2*>(d_row1 + 1);\n  f_row2 = reinterpret_cast<half2*>(d_row2 + 1);\n  f_row3 = reinterpret_cast<half2*>(d_row3 + 1);\n  ptranspose_half2(f_row0, f_row1, f_row2, f_row3);\n  ptranspose_half(f_row0[0], f_row1[0]);\n  ptranspose_half(f_row0[1], f_row1[1]);\n  ptranspose_half(f_row2[0], f_row3[0]);\n  ptranspose_half(f_row2[1], f_row3[1]);\n\n  f_row0 = reinterpret_cast<half2*>(d_row4);\n  f_row1 = reinterpret_cast<half2*>(d_row5);\n  f_row2 = reinterpret_cast<half2*>(d_row6);\n  f_row3 = reinterpret_cast<half2*>(d_row7);\n  ptranspose_half2(f_row0, f_row1, f_row2, f_row3);\n  ptranspose_half(f_row0[0], f_row1[0]);\n  ptranspose_half(f_row0[1], f_row1[1]);\n  ptranspose_half(f_row2[0], f_row3[0]);\n  ptranspose_half(f_row2[1], f_row3[1]);\n\n  f_row0 = reinterpret_cast<half2*>(d_row4 + 1);\n  f_row1 = reinterpret_cast<half2*>(d_row5 + 1);\n  f_row2 = reinterpret_cast<half2*>(d_row6 + 1);\n  f_row3 = reinterpret_cast<half2*>(d_row7 + 1);\n  ptranspose_half2(f_row0, f_row1, f_row2, f_row3);\n  ptranspose_half(f_row0[0], f_row1[0]);\n  ptranspose_half(f_row0[1], f_row1[1]);\n  ptranspose_half(f_row2[0], f_row3[0]);\n  ptranspose_half(f_row2[1], f_row3[1]);\n\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\nplset<Packet4h2>(const Eigen::half& a) {\n#if defined(EIGEN_HIP_DEVICE_COMPILE)\n\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  p_alias[0] = __halves2half2(a, __hadd(a, __float2half(1.0f)));\n  p_alias[1] = __halves2half2(__hadd(a, __float2half(2.0f)),\n                              __hadd(a, __float2half(3.0f)));\n  p_alias[2] = __halves2half2(__hadd(a, __float2half(4.0f)),\n                              __hadd(a, __float2half(5.0f)));\n  p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)),\n                              __hadd(a, __float2half(7.0f)));\n  return r;\n#elif defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n\n  half2 b = pset1<half2>(a);\n  half2 c;\n  half2 half_offset0 = __halves2half2(__float2half(0.0f),__float2half(2.0f));\n  half2 half_offset1 = __halves2half2(__float2half(4.0f),__float2half(6.0f));\n\n  c = __hadd2(b, half_offset0);\n  r_alias[0] = plset(__low2half(c));\n  r_alias[1] = plset(__high2half(c));\n\n  c = __hadd2(b, half_offset1);\n  r_alias[2] = plset(__low2half(c));\n  r_alias[3] = plset(__high2half(c));\n\n  return r;\n\n#else\n  float f = __half2float(a);\n  Packet4h2 r;\n  half2* p_alias = reinterpret_cast<half2*>(&r);\n  p_alias[0] = combine_half(a, __float2half(f + 1.0f));\n  p_alias[1] = combine_half(__float2half(f + 2.0f), __float2half(f + 3.0f));\n  p_alias[2] = combine_half(__float2half(f + 4.0f), __float2half(f + 5.0f));\n  p_alias[3] = combine_half(__float2half(f + 6.0f), __float2half(f + 7.0f));\n  return r;\n#endif\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npselect<Packet4h2>(const Packet4h2& mask, const Packet4h2& a,\n                   const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* mask_alias = reinterpret_cast<const half2*>(&mask);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pselect(mask_alias[0], a_alias[0], b_alias[0]);\n  r_alias[1] = pselect(mask_alias[1], a_alias[1], b_alias[1]);\n  r_alias[2] = pselect(mask_alias[2], a_alias[2], b_alias[2]);\n  r_alias[3] = pselect(mask_alias[3], a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pcmp_eq(a_alias[0], b_alias[0]);\n  r_alias[1] = pcmp_eq(a_alias[1], b_alias[1]);\n  r_alias[2] = pcmp_eq(a_alias[2], b_alias[2]);\n  r_alias[3] = pcmp_eq(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pand(a_alias[0], b_alias[0]);\n  r_alias[1] = pand(a_alias[1], b_alias[1]);\n  r_alias[2] = pand(a_alias[2], b_alias[2]);\n  r_alias[3] = pand(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 por<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = por(a_alias[0], b_alias[0]);\n  r_alias[1] = por(a_alias[1], b_alias[1]);\n  r_alias[2] = por(a_alias[2], b_alias[2]);\n  r_alias[3] = por(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pxor<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pxor(a_alias[0], b_alias[0]);\n  r_alias[1] = pxor(a_alias[1], b_alias[1]);\n  r_alias[2] = pxor(a_alias[2], b_alias[2]);\n  r_alias[3] = pxor(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npandnot<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pandnot(a_alias[0], b_alias[0]);\n  r_alias[1] = pandnot(a_alias[1], b_alias[1]);\n  r_alias[2] = pandnot(a_alias[2], b_alias[2]);\n  r_alias[3] = pandnot(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 padd<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = padd(a_alias[0], b_alias[0]);\n  r_alias[1] = padd(a_alias[1], b_alias[1]);\n  r_alias[2] = padd(a_alias[2], b_alias[2]);\n  r_alias[3] = padd(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psub<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = psub(a_alias[0], b_alias[0]);\n  r_alias[1] = psub(a_alias[1], b_alias[1]);\n  r_alias[2] = psub(a_alias[2], b_alias[2]);\n  r_alias[3] = psub(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pnegate(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = pnegate(a_alias[0]);\n  r_alias[1] = pnegate(a_alias[1]);\n  r_alias[2] = pnegate(a_alias[2]);\n  r_alias[3] = pnegate(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pconj(const Packet4h2& a) {\n  return a;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmul<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pmul(a_alias[0], b_alias[0]);\n  r_alias[1] = pmul(a_alias[1], b_alias[1]);\n  r_alias[2] = pmul(a_alias[2], b_alias[2]);\n  r_alias[3] = pmul(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmadd<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b, const Packet4h2& c) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  const half2* c_alias = reinterpret_cast<const half2*>(&c);\n  r_alias[0] = pmadd(a_alias[0], b_alias[0], c_alias[0]);\n  r_alias[1] = pmadd(a_alias[1], b_alias[1], c_alias[1]);\n  r_alias[2] = pmadd(a_alias[2], b_alias[2], c_alias[2]);\n  r_alias[3] = pmadd(a_alias[3], b_alias[3], c_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pdiv<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pdiv(a_alias[0], b_alias[0]);\n  r_alias[1] = pdiv(a_alias[1], b_alias[1]);\n  r_alias[2] = pdiv(a_alias[2], b_alias[2]);\n  r_alias[3] = pdiv(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmin<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pmin(a_alias[0], b_alias[0]);\n  r_alias[1] = pmin(a_alias[1], b_alias[1]);\n  r_alias[2] = pmin(a_alias[2], b_alias[2]);\n  r_alias[3] = pmin(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmax<Packet4h2>(\n    const Packet4h2& a, const Packet4h2& b) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  const half2* b_alias = reinterpret_cast<const half2*>(&b);\n  r_alias[0] = pmax(a_alias[0], b_alias[0]);\n  r_alias[1] = pmax(a_alias[1], b_alias[1]);\n  r_alias[2] = pmax(a_alias[2], b_alias[2]);\n  r_alias[3] = pmax(a_alias[3], b_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<Packet4h2>(\n    const Packet4h2& a) {\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n\n  return predux(a_alias[0]) + predux(a_alias[1]) +\n         predux(a_alias[2]) + predux(a_alias[3]);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(\n    const Packet4h2& a) {\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  half2 m0 = combine_half(predux_max(a_alias[0]),\n                            predux_max(a_alias[1]));\n  half2 m1 = combine_half(predux_max(a_alias[2]),\n                            predux_max(a_alias[3]));\n  __half first  = predux_max(m0);\n  __half second = predux_max(m1);\n#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)\n  return (__hgt(first, second) ? first : second);\n#else\n  float ffirst  = __half2float(first);\n  float fsecond = __half2float(second);\n  return (ffirst > fsecond)? first: second;\n#endif\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(\n    const Packet4h2& a) {\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  half2 m0 = combine_half(predux_min(a_alias[0]),\n                            predux_min(a_alias[1]));\n  half2 m1 = combine_half(predux_min(a_alias[2]),\n                            predux_min(a_alias[3]));\n  __half first  = predux_min(m0);\n  __half second = predux_min(m1);\n#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)\n  return (__hlt(first, second) ? first : second);\n#else\n  float ffirst  = __half2float(first);\n  float fsecond = __half2float(second);\n  return (ffirst < fsecond)? first: second;\n#endif\n}\n\n// likely overflow/underflow\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4h2>(\n    const Packet4h2& a) {\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  return predux_mul(pmul(pmul(a_alias[0], a_alias[1]),\n                                       pmul(a_alias[2], a_alias[3])));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\nplog1p<Packet4h2>(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = plog1p(a_alias[0]);\n  r_alias[1] = plog1p(a_alias[1]);\n  r_alias[2] = plog1p(a_alias[2]);\n  r_alias[3] = plog1p(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\npexpm1<Packet4h2>(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = pexpm1(a_alias[0]);\n  r_alias[1] = pexpm1(a_alias[1]);\n  r_alias[2] = pexpm1(a_alias[2]);\n  r_alias[3] = pexpm1(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 plog<Packet4h2>(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = plog(a_alias[0]);\n  r_alias[1] = plog(a_alias[1]);\n  r_alias[2] = plog(a_alias[2]);\n  r_alias[3] = plog(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pexp<Packet4h2>(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = pexp(a_alias[0]);\n  r_alias[1] = pexp(a_alias[1]);\n  r_alias[2] = pexp(a_alias[2]);\n  r_alias[3] = pexp(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psqrt<Packet4h2>(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = psqrt(a_alias[0]);\n  r_alias[1] = psqrt(a_alias[1]);\n  r_alias[2] = psqrt(a_alias[2]);\n  r_alias[3] = psqrt(a_alias[3]);\n  return r;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2\nprsqrt<Packet4h2>(const Packet4h2& a) {\n  Packet4h2 r;\n  half2* r_alias = reinterpret_cast<half2*>(&r);\n  const half2* a_alias = reinterpret_cast<const half2*>(&a);\n  r_alias[0] = prsqrt(a_alias[0]);\n  r_alias[1] = prsqrt(a_alias[1]);\n  r_alias[2] = prsqrt(a_alias[2]);\n  r_alias[3] = prsqrt(a_alias[3]);\n  return r;\n}\n\n// The following specialized padd, pmul, pdiv, pmin, pmax, pset1 are needed for\n// the implementation of GPU half reduction.\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,\n                                                        const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hadd2(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 + b1;\n  float r2 = a2 + b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,\n                                                        const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __hmul2(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 * b1;\n  float r2 = a2 * b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a,\n                                                        const half2& b) {\n#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)\n  return __h2div(a, b);\n#else\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  float r1 = a1 / b1;\n  float r2 = a2 / b2;\n  return __floats2half2_rn(r1, r2);\n#endif\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a,\n                                                        const half2& b) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  __half r1 = a1 < b1 ? get_half2_low(a) : get_half2_low(b);\n  __half r2 = a2 < b2 ? get_half2_high(a) : get_half2_high(b);\n  return combine_half(r1, r2);\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a,\n                                                        const half2& b) {\n  float a1 = __low2float(a);\n  float a2 = __high2float(a);\n  float b1 = __low2float(b);\n  float b2 = __high2float(b);\n  __half r1 = a1 > b1 ? get_half2_low(a) : get_half2_low(b);\n  __half r2 = a2 > b2 ? get_half2_high(a) : get_half2_high(b);\n  return combine_half(r1, r2);\n}\n\n// #endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)\n\n#endif // defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)\n\n#undef EIGEN_GPU_HAS_LDG\n#undef EIGEN_CUDA_HAS_FP16_ARITHMETIC\n#undef EIGEN_GPU_HAS_FP16_ARITHMETIC\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n\n#endif // EIGEN_PACKET_MATH_GPU_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/GPU/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TYPE_CASTING_GPU_H\n#define EIGEN_TYPE_CASTING_GPU_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \\\n  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))\n\n\ntemplate <>\nstruct type_casting_traits<Eigen::half, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 2\n  };\n};\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {\n  float2 r1 = __half22float2(a);\n  float2 r2 = __half22float2(b);\n  return make_float4(r1.x, r1.y, r2.x, r2.y);\n}\n\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {\n  Packet4h2 r;\n  half2* r_alias=reinterpret_cast<half2*>(&r);\n  r_alias[0]=__floats2half2_rn(a.x,a.y);\n  r_alias[1]=__floats2half2_rn(a.z,a.w);\n  r_alias[2]=__floats2half2_rn(b.x,b.y);\n  r_alias[3]=__floats2half2_rn(b.z,b.w);\n  return r;\n}\n\ntemplate <>\nstruct type_casting_traits<float, Eigen::half> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 2,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {\n  // Simply discard the second half of the input\n  float4 r;\n  const half2* a_alias=reinterpret_cast<const half2*>(&a);\n  float2 r1 = __half22float2(a_alias[0]);\n  float2 r2 = __half22float2(a_alias[1]);\n  r.x=static_cast<float>(r1.x);\n  r.y=static_cast<float>(r1.y);\n  r.z=static_cast<float>(r2.x);\n  r.w=static_cast<float>(r2.y);\n  return r;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {\n  // Simply discard the second half of the input\n  return __floats2half2_rn(a.x, a.y);\n}\n\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TYPE_CASTING_GPU_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/HIP/hcc/math_constants.h",
    "content": "/*\n * math_constants.h - \n *  HIP equivalent of the CUDA header of the same name\n */\n\n#ifndef __MATH_CONSTANTS_H__\n#define __MATH_CONSTANTS_H__\n\n/* single precision constants */\n\n#define HIPRT_INF_F        __int_as_float(0x7f800000)\n#define HIPRT_NAN_F        __int_as_float(0x7fffffff)\n#define HIPRT_MIN_DENORM_F __int_as_float(0x00000001)\n#define HIPRT_MAX_NORMAL_F __int_as_float(0x7f7fffff)\n#define HIPRT_NEG_ZERO_F   __int_as_float(0x80000000)\n#define HIPRT_ZERO_F       0.0f\n#define HIPRT_ONE_F        1.0f\n\n/* double precision constants */\n#define HIPRT_INF          __hiloint2double(0x7ff00000, 0x00000000)\n#define HIPRT_NAN          __hiloint2double(0xfff80000, 0x00000000)\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/MSA/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2018 Wave Computing, Inc.\n// Written by:\n//   Chris Larsen\n//   Alexey Frunze (afrunze@wavecomp.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_MSA_H\n#define EIGEN_COMPLEX_MSA_H\n\n#include <iostream>\n\nnamespace Eigen {\n\nnamespace internal {\n\n//---------- float ----------\nstruct Packet2cf {\n  EIGEN_STRONG_INLINE Packet2cf() {\n  }\n  EIGEN_STRONG_INLINE explicit Packet2cf(const std::complex<float>& a,\n                                         const std::complex<float>& b) {\n    Packet4f t = { std::real(a), std::imag(a), std::real(b), std::imag(b) };\n    v = t;\n  }\n  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {\n  }\n  EIGEN_STRONG_INLINE Packet2cf(const Packet2cf& a) : v(a.v) {\n  }\n  EIGEN_STRONG_INLINE Packet2cf& operator=(const Packet2cf& b) {\n    v = b.v;\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf conjugate(void) const {\n    return Packet2cf((Packet4f)__builtin_msa_bnegi_d((v2u64)v, 63));\n  }\n  EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) {\n    Packet4f v1, v2;\n\n    // Get the real values of a | a1_re | a1_re | a2_re | a2_re |\n    v1 = (Packet4f)__builtin_msa_ilvev_w((v4i32)v, (v4i32)v);\n    // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |\n    v2 = (Packet4f)__builtin_msa_ilvod_w((v4i32)v, (v4i32)v);\n    // Multiply the real a with b\n    v1 = pmul(v1, b.v);\n    // Multiply the imag a with b\n    v2 = pmul(v2, b.v);\n    // Conjugate v2\n    v2 = Packet2cf(v2).conjugate().v;\n    // Swap real/imag elements in v2.\n    v2 = (Packet4f)__builtin_msa_shf_w((v4i32)v2, EIGEN_MSA_SHF_I8(1, 0, 3, 2));\n    // Add and return the result\n    v = padd(v1, v2);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const {\n    return Packet2cf(*this) *= b;\n  }\n  EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) {\n    v = padd(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const {\n    return Packet2cf(*this) += b;\n  }\n  EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) {\n    v = psub(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const {\n    return Packet2cf(*this) -= b;\n  }\n  EIGEN_STRONG_INLINE Packet2cf& operator/=(const Packet2cf& b) {\n    *this *= b.conjugate();\n    Packet4f s = pmul<Packet4f>(b.v, b.v);\n    s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n    v = pdiv(v, s);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator/(const Packet2cf& b) const {\n    return Packet2cf(*this) /= b;\n  }\n  EIGEN_STRONG_INLINE Packet2cf operator-(void) const {\n    return Packet2cf(pnegate(v));\n  }\n\n  Packet4f v;\n};\n\ninline std::ostream& operator<<(std::ostream& os, const Packet2cf& value) {\n  os << \"[ (\" << value.v[0] << \", \" << value.v[1]\n     << \"i),\"\n        \"  (\"\n     << value.v[2] << \", \" << value.v[3] << \"i) ]\";\n  return os;\n}\n\ntemplate <>\nstruct packet_traits<std::complex<float> > : default_packet_traits {\n  typedef Packet2cf type;\n  typedef Packet2cf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasMul = 1,\n    HasDiv = 1,\n    HasNegate = 1,\n    HasAbs = 0,\n    HasAbs2 = 0,\n    HasMin = 0,\n    HasMax = 0,\n    HasSetLinear = 0,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet2cf> {\n  typedef std::complex<float> type;\n  enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };\n  typedef Packet2cf half;\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) {\n  EIGEN_MSA_DEBUG;\n\n  float f0 = from.real(), f1 = from.imag();\n  Packet4f v0 = { f0, f0, f0, f0 };\n  Packet4f v1 = { f1, f1, f1, f1 };\n  return Packet2cf((Packet4f)__builtin_msa_ilvr_w((Packet4i)v1, (Packet4i)v0));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a + b;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a - b;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  return -a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a.conjugate();\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a * b;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf(pand(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf(por(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf(pxor(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf(pandnot(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) {\n  EIGEN_MSA_DEBUG;\n\n  return pset1<Packet2cf>(*from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to,\n                                                      const Packet2cf& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_STORE pstore<float>((float*)to, from.v);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to,\n                                                       const Packet2cf& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_STORE pstoreu<float>((float*)to, from.v);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(\n    const std::complex<float>* from, Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf(from[0 * stride], from[1 * stride]);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,\n                                                                       const Packet2cf& from,\n                                                                       Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  *to = std::complex<float>(from.v[0], from.v[1]);\n  to += stride;\n  *to = std::complex<float>(from.v[2], from.v[3]);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float>* addr) {\n  EIGEN_MSA_DEBUG;\n\n  prefetch(reinterpret_cast<const float*>(addr));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  return std::complex<float>(a.v[0], a.v[1]);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf((Packet4f)__builtin_msa_shf_w((v4i32)a.v, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet2cf((Packet4f)__builtin_msa_shf_w((v4i32)a.v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4f value = (Packet4f)preverse((Packet2d)a.v);\n  value += a.v;\n  return std::complex<float>(value[0], value[1]);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {\n  EIGEN_MSA_DEBUG;\n\n  return std::complex<float>((a.v[0] * a.v[2]) - (a.v[1] * a.v[3]),\n                             (a.v[0] * a.v[3]) + (a.v[1] * a.v[2]));\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a / b;\n}\n\ninline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2cf, 2>& value) {\n  os << \"[ \" << value.packet[0] << \", \" << std::endl << \"  \" << value.packet[1] << \" ]\";\n  return os;\n}\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4f tmp =\n      (Packet4f)__builtin_msa_ilvl_d((v2i64)kernel.packet[1].v, (v2i64)kernel.packet[0].v);\n  kernel.packet[0].v =\n      (Packet4f)__builtin_msa_ilvr_d((v2i64)kernel.packet[1].v, (v2i64)kernel.packet[0].v);\n  kernel.packet[1].v = tmp;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket,\n                                     const Packet2cf& elsePacket) {\n  return (Packet2cf)(Packet4f)pblend<Packet2d>(ifPacket, (Packet2d)thenPacket.v,\n                                               (Packet2d)elsePacket.v);\n}\n\n//---------- double ----------\n\nstruct Packet1cd {\n  EIGEN_STRONG_INLINE Packet1cd() {\n  }\n  EIGEN_STRONG_INLINE explicit Packet1cd(const std::complex<double>& a) {\n    v[0] = std::real(a);\n    v[1] = std::imag(a);\n  }\n  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {\n  }\n  EIGEN_STRONG_INLINE Packet1cd(const Packet1cd& a) : v(a.v) {\n  }\n  EIGEN_STRONG_INLINE Packet1cd& operator=(const Packet1cd& b) {\n    v = b.v;\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd conjugate(void) const {\n    static const v2u64 p2ul_CONJ_XOR = { 0x0, 0x8000000000000000 };\n    return (Packet1cd)pxor(v, (Packet2d)p2ul_CONJ_XOR);\n  }\n  EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) {\n    Packet2d v1, v2;\n\n    // Get the real values of a | a1_re | a1_re\n    v1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)v, (v2i64)v);\n    // Get the imag values of a | a1_im | a1_im\n    v2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)v, (v2i64)v);\n    // Multiply the real a with b\n    v1 = pmul(v1, b.v);\n    // Multiply the imag a with b\n    v2 = pmul(v2, b.v);\n    // Conjugate v2\n    v2 = Packet1cd(v2).conjugate().v;\n    // Swap real/imag elements in v2.\n    v2 = (Packet2d)__builtin_msa_shf_w((v4i32)v2, EIGEN_MSA_SHF_I8(2, 3, 0, 1));\n    // Add and return the result\n    v = padd(v1, v2);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const {\n    return Packet1cd(*this) *= b;\n  }\n  EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) {\n    v = padd(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const {\n    return Packet1cd(*this) += b;\n  }\n  EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) {\n    v = psub(v, b.v);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const {\n    return Packet1cd(*this) -= b;\n  }\n  EIGEN_STRONG_INLINE Packet1cd& operator/=(const Packet1cd& b) {\n    *this *= b.conjugate();\n    Packet2d s = pmul<Packet2d>(b.v, b.v);\n    s = padd(s, preverse<Packet2d>(s));\n    v = pdiv(v, s);\n    return *this;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator/(const Packet1cd& b) const {\n    return Packet1cd(*this) /= b;\n  }\n  EIGEN_STRONG_INLINE Packet1cd operator-(void) const {\n    return Packet1cd(pnegate(v));\n  }\n\n  Packet2d v;\n};\n\ninline std::ostream& operator<<(std::ostream& os, const Packet1cd& value) {\n  os << \"[ (\" << value.v[0] << \", \" << value.v[1] << \"i) ]\";\n  return os;\n}\n\ntemplate <>\nstruct packet_traits<std::complex<double> > : default_packet_traits {\n  typedef Packet1cd type;\n  typedef Packet1cd half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 0,\n    size = 1,\n    HasHalfPacket = 0,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasMul = 1,\n    HasDiv = 1,\n    HasNegate = 1,\n    HasAbs = 0,\n    HasAbs2 = 0,\n    HasMin = 0,\n    HasMax = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet1cd> {\n  typedef std::complex<double> type;\n  enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };\n  typedef Packet1cd half;\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet1cd(from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a + b;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a - b;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {\n  EIGEN_MSA_DEBUG;\n\n  return -a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a.conjugate();\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a * b;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet1cd(pand(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet1cd(por(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet1cd(pxor(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet1cd(pandnot(a.v, b.v));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) {\n  EIGEN_MSA_DEBUG;\n\n  return pset1<Packet1cd>(*from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to,\n                                                       const Packet1cd& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_STORE pstore<double>((double*)to, from.v);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to,\n                                                        const Packet1cd& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_STORE pstoreu<double>((double*)to, from.v);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double>* addr) {\n  EIGEN_MSA_DEBUG;\n\n  prefetch(reinterpret_cast<const double*>(addr));\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(\n    const std::complex<double>* from, Index stride __attribute__((unused))) {\n  EIGEN_MSA_DEBUG;\n\n  Packet1cd res;\n  res.v[0] = std::real(from[0]);\n  res.v[1] = std::imag(from[0]);\n  return res;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,\n                                                                        const Packet1cd& from,\n                                                                        Index stride\n                                                                        __attribute__((unused))) {\n  EIGEN_MSA_DEBUG;\n\n  pstore(to, from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {\n  EIGEN_MSA_DEBUG;\n\n  return std::complex<double>(a.v[0], a.v[1]);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {\n  EIGEN_MSA_DEBUG;\n\n  return pfirst(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {\n  EIGEN_MSA_DEBUG;\n\n  return pfirst(a);\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {\n  EIGEN_MSA_DEBUG;\n\n  return a / b;\n}\n\nEIGEN_STRONG_INLINE Packet1cd pcplxflip /*<Packet1cd>*/ (const Packet1cd& x) {\n  EIGEN_MSA_DEBUG;\n\n  return Packet1cd(preverse(Packet2d(x.v)));\n}\n\ninline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet1cd, 2>& value) {\n  os << \"[ \" << value.packet[0] << \", \" << std::endl << \"  \" << value.packet[1] << \" ]\";\n  return os;\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd, 2>& kernel) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d v1, v2;\n\n  v1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[0].v, (v2i64)kernel.packet[1].v);\n  // Get the imag values of a\n  v2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[0].v, (v2i64)kernel.packet[1].v);\n\n  kernel.packet[0].v = v1;\n  kernel.packet[1].v = v2;\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_COMPLEX_MSA_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/MSA/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007 Julien Pommier\n// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)\n// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// Copyright (C) 2018 Wave Computing, Inc.\n// Written by:\n//   Chris Larsen\n//   Alexey Frunze (afrunze@wavecomp.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* The sin, cos, exp, and log functions of this file come from\n * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/\n */\n\n/* The tanh function of this file is an adaptation of\n * template<typename T> T generic_fast_tanh_float(const T&)\n * from MathFunctionsImpl.h.\n */\n\n#ifndef EIGEN_MATH_FUNCTIONS_MSA_H\n#define EIGEN_MATH_FUNCTIONS_MSA_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\nplog<Packet4f>(const Packet4f& _x) {\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);\n  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);\n  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);\n\n  // Convert negative argument into NAN (quiet negative, to be specific).\n  Packet4f zero = (Packet4f)__builtin_msa_ldi_w(0);\n  Packet4i neg_mask = __builtin_msa_fclt_w(_x, zero);\n  Packet4i zero_mask = __builtin_msa_fceq_w(_x, zero);\n  Packet4f non_neg_x_or_nan = padd(_x, (Packet4f)neg_mask);  // Add 0.0 or NAN.\n  Packet4f x = non_neg_x_or_nan;\n\n  // Extract exponent from x = mantissa * 2**exponent, where 1.0 <= mantissa < 2.0.\n  // N.B. the exponent is one less of what frexpf() would return.\n  Packet4i e_int = __builtin_msa_ftint_s_w(__builtin_msa_flog2_w(x));\n  // Multiply x by 2**(-exponent-1) to get 0.5 <= x < 1.0 as from frexpf().\n  x = __builtin_msa_fexp2_w(x, (Packet4i)__builtin_msa_nori_b((v16u8)e_int, 0));\n\n  /*\n     if (x < SQRTHF) {\n       x = x + x - 1.0;\n     } else {\n       e += 1;\n       x = x - 1.0;\n     }\n  */\n  Packet4f xx = padd(x, x);\n  Packet4i ge_mask = __builtin_msa_fcle_w(p4f_cephes_SQRTHF, x);\n  e_int = psub(e_int, ge_mask);\n  x = (Packet4f)__builtin_msa_bsel_v((v16u8)ge_mask, (v16u8)xx, (v16u8)x);\n  x = psub(x, p4f_1);\n  Packet4f e = __builtin_msa_ffint_s_w(e_int);\n\n  Packet4f x2 = pmul(x, x);\n  Packet4f x3 = pmul(x2, x);\n\n  Packet4f y, y1, y2;\n  y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);\n  y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);\n  y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);\n  y = pmadd(y, x, p4f_cephes_log_p2);\n  y1 = pmadd(y1, x, p4f_cephes_log_p5);\n  y2 = pmadd(y2, x, p4f_cephes_log_p8);\n  y = pmadd(y, x3, y1);\n  y = pmadd(y, x3, y2);\n  y = pmul(y, x3);\n\n  y = pmadd(e, p4f_cephes_log_q1, y);\n  x = __builtin_msa_fmsub_w(x, x2, p4f_half);\n  x = padd(x, y);\n  x = pmadd(e, p4f_cephes_log_q2, x);\n\n  // x is now the logarithm result candidate. We still need to handle the\n  // extreme arguments of zero and positive infinity, though.\n  // N.B. if the argument is +INFINITY, x is NAN because the polynomial terms\n  // contain infinities of both signs (see the coefficients and code above).\n  // INFINITY - INFINITY is NAN.\n\n  // If the argument is +INFINITY, make it the new result candidate.\n  // To achieve that we choose the smaller of the result candidate and the\n  // argument.\n  // This is correct for all finite pairs of values (the logarithm is smaller\n  // than the argument).\n  // This is also correct in the special case when the argument is +INFINITY\n  // and the result candidate is NAN. This is because the fmin.df instruction\n  // prefers non-NANs to NANs.\n  x = __builtin_msa_fmin_w(x, non_neg_x_or_nan);\n\n  // If the argument is zero (including -0.0), the result becomes -INFINITY.\n  Packet4i neg_infs = __builtin_msa_slli_w(zero_mask, 23);\n  x = (Packet4f)__builtin_msa_bsel_v((v16u8)zero_mask, (v16u8)x, (v16u8)neg_infs);\n\n  return x;\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\npexp<Packet4f>(const Packet4f& _x) {\n  // Limiting single-precision pexp's argument to [-128, +128] lets pexp\n  // reach 0 and INFINITY naturally.\n  static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);\n  static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);\n  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);\n\n  Packet4f x = _x;\n\n  // Clamp x.\n  x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(x, p4f_exp_lo), (v16u8)x,\n                                     (v16u8)p4f_exp_lo);\n  x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_exp_hi, x), (v16u8)x,\n                                     (v16u8)p4f_exp_hi);\n\n  // Round to nearest integer by adding 0.5 (with x's sign) and truncating.\n  Packet4f x2_add = (Packet4f)__builtin_msa_binsli_w((v4u32)p4f_half, (v4u32)x, 0);\n  Packet4f x2 = pmadd(x, p4f_cephes_LOG2EF, x2_add);\n  Packet4i x2_int = __builtin_msa_ftrunc_s_w(x2);\n  Packet4f x2_int_f = __builtin_msa_ffint_s_w(x2_int);\n\n  x = __builtin_msa_fmsub_w(x, x2_int_f, p4f_cephes_exp_C1);\n  x = __builtin_msa_fmsub_w(x, x2_int_f, p4f_cephes_exp_C2);\n\n  Packet4f z = pmul(x, x);\n\n  Packet4f y = p4f_cephes_exp_p0;\n  y = pmadd(y, x, p4f_cephes_exp_p1);\n  y = pmadd(y, x, p4f_cephes_exp_p2);\n  y = pmadd(y, x, p4f_cephes_exp_p3);\n  y = pmadd(y, x, p4f_cephes_exp_p4);\n  y = pmadd(y, x, p4f_cephes_exp_p5);\n  y = pmadd(y, z, x);\n  y = padd(y, p4f_1);\n\n  // y *= 2**exponent.\n  y = __builtin_msa_fexp2_w(y, x2_int);\n\n  return y;\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\nptanh<Packet4f>(const Packet4f& _x) {\n  static _EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);\n  // The monomial coefficients of the numerator polynomial (odd).\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);\n  static _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);\n  // The monomial coefficients of the denominator polynomial (even).\n  static _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);\n\n  Packet4f x = pabs(_x);\n  Packet4i tiny_mask = __builtin_msa_fclt_w(x, p4f_tanh_tiny);\n\n  // Clamp the inputs to the range [-9, 9] since anything outside\n  // this range is -/+1.0f in single-precision.\n  x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_tanh_hi, x), (v16u8)x,\n                                     (v16u8)p4f_tanh_hi);\n\n  // Since the polynomials are odd/even, we need x**2.\n  Packet4f x2 = pmul(x, x);\n\n  // Evaluate the numerator polynomial p.\n  Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);\n  p = pmadd(x2, p, p4f_alpha_9);\n  p = pmadd(x2, p, p4f_alpha_7);\n  p = pmadd(x2, p, p4f_alpha_5);\n  p = pmadd(x2, p, p4f_alpha_3);\n  p = pmadd(x2, p, p4f_alpha_1);\n  p = pmul(x, p);\n\n  // Evaluate the denominator polynomial q.\n  Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);\n  q = pmadd(x2, q, p4f_beta_2);\n  q = pmadd(x2, q, p4f_beta_0);\n\n  // Divide the numerator by the denominator.\n  p = pdiv(p, q);\n\n  // Reinstate the sign.\n  p = (Packet4f)__builtin_msa_binsli_w((v4u32)p, (v4u32)_x, 0);\n\n  // When the argument is very small in magnitude it's more accurate to just return it.\n  p = (Packet4f)__builtin_msa_bsel_v((v16u8)tiny_mask, (v16u8)p, (v16u8)_x);\n\n  return p;\n}\n\ntemplate <bool sine>\nPacket4f psincos_inner_msa_float(const Packet4f& _x) {\n  static _EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f);  // Approx. (2**24) / (4/Pi).\n  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);\n  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);\n  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);\n  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);\n  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);\n  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);\n  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);\n  static _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);  // 4/Pi.\n  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);\n  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);\n\n  Packet4f x = pabs(_x);\n\n  // Translate infinite arguments into NANs.\n  Packet4f zero_or_nan_if_inf = psub(_x, _x);\n  x = padd(x, zero_or_nan_if_inf);\n  // Prevent sin/cos from generating values larger than 1.0 in magnitude\n  // for very large arguments by setting x to 0.0.\n  Packet4i small_or_nan_mask = __builtin_msa_fcult_w(x, p4f_sincos_max_arg);\n  x = pand(x, (Packet4f)small_or_nan_mask);\n\n  // Scale x by 4/Pi to find x's octant.\n  Packet4f y = pmul(x, p4f_cephes_FOPI);\n  // Get the octant. We'll reduce x by this number of octants or by one more than it.\n  Packet4i y_int = __builtin_msa_ftrunc_s_w(y);\n  // x's from even-numbered octants will translate to octant 0: [0, +Pi/4].\n  // x's from odd-numbered octants will translate to octant -1: [-Pi/4, 0].\n  // Adjustment for odd-numbered octants: octant = (octant + 1) & (~1).\n  Packet4i y_int1 = __builtin_msa_addvi_w(y_int, 1);\n  Packet4i y_int2 = (Packet4i)__builtin_msa_bclri_w((Packet4ui)y_int1, 0); // bclri = bit-clear\n  y = __builtin_msa_ffint_s_w(y_int2);\n\n  // Compute the sign to apply to the polynomial.\n  Packet4i sign_mask = sine ? pxor(__builtin_msa_slli_w(y_int1, 29), (Packet4i)_x)\n                            : __builtin_msa_slli_w(__builtin_msa_addvi_w(y_int, 3), 29);\n\n  // Get the polynomial selection mask.\n  // We'll calculate both (sin and cos) polynomials and then select from the two.\n  Packet4i poly_mask = __builtin_msa_ceqi_w(__builtin_msa_slli_w(y_int2, 30), 0);\n\n  // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4.\n  // The magic pass: \"Extended precision modular arithmetic\"\n  // x = ((x - y * DP1) - y * DP2) - y * DP3\n  Packet4f tmp1 = pmul(y, p4f_minus_cephes_DP1);\n  Packet4f tmp2 = pmul(y, p4f_minus_cephes_DP2);\n  Packet4f tmp3 = pmul(y, p4f_minus_cephes_DP3);\n  x = padd(x, tmp1);\n  x = padd(x, tmp2);\n  x = padd(x, tmp3);\n\n  // Evaluate the cos(x) polynomial.\n  y = p4f_coscof_p0;\n  Packet4f z = pmul(x, x);\n  y = pmadd(y, z, p4f_coscof_p1);\n  y = pmadd(y, z, p4f_coscof_p2);\n  y = pmul(y, z);\n  y = pmul(y, z);\n  y = __builtin_msa_fmsub_w(y, z, p4f_half);\n  y = padd(y, p4f_1);\n\n  // Evaluate the sin(x) polynomial.\n  Packet4f y2 = p4f_sincof_p0;\n  y2 = pmadd(y2, z, p4f_sincof_p1);\n  y2 = pmadd(y2, z, p4f_sincof_p2);\n  y2 = pmul(y2, z);\n  y2 = pmadd(y2, x, x);\n\n  // Select the correct result from the two polynomials.\n  y = sine ? (Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y, (v16u8)y2)\n           : (Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y2, (v16u8)y);\n\n  // Update the sign.\n  sign_mask = pxor(sign_mask, (Packet4i)y);\n  y = (Packet4f)__builtin_msa_binsli_w((v4u32)y, (v4u32)sign_mask, 0); // binsli = bit-insert-left\n  return y;\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\npsin<Packet4f>(const Packet4f& x) {\n  return psincos_inner_msa_float</* sine */ true>(x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\npcos<Packet4f>(const Packet4f& x) {\n  return psincos_inner_msa_float</* sine */ false>(x);\n}\n\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d\npexp<Packet2d>(const Packet2d& _x) {\n  // Limiting double-precision pexp's argument to [-1024, +1024] lets pexp\n  // reach 0 and INFINITY naturally.\n  static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);\n  static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);\n  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);\n  static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);\n  static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0);\n  static _EIGEN_DECLARE_CONST_Packet2d(2, 2.0);\n\n  Packet2d x = _x;\n\n  // Clamp x.\n  x = (Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(x, p2d_exp_lo), (v16u8)x,\n                                     (v16u8)p2d_exp_lo);\n  x = (Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(p2d_exp_hi, x), (v16u8)x,\n                                     (v16u8)p2d_exp_hi);\n\n  // Round to nearest integer by adding 0.5 (with x's sign) and truncating.\n  Packet2d x2_add = (Packet2d)__builtin_msa_binsli_d((v2u64)p2d_half, (v2u64)x, 0);\n  Packet2d x2 = pmadd(x, p2d_cephes_LOG2EF, x2_add);\n  Packet2l x2_long = __builtin_msa_ftrunc_s_d(x2);\n  Packet2d x2_long_d = __builtin_msa_ffint_s_d(x2_long);\n\n  x = __builtin_msa_fmsub_d(x, x2_long_d, p2d_cephes_exp_C1);\n  x = __builtin_msa_fmsub_d(x, x2_long_d, p2d_cephes_exp_C2);\n\n  x2 = pmul(x, x);\n\n  Packet2d px = p2d_cephes_exp_p0;\n  px = pmadd(px, x2, p2d_cephes_exp_p1);\n  px = pmadd(px, x2, p2d_cephes_exp_p2);\n  px = pmul(px, x);\n\n  Packet2d qx = p2d_cephes_exp_q0;\n  qx = pmadd(qx, x2, p2d_cephes_exp_q1);\n  qx = pmadd(qx, x2, p2d_cephes_exp_q2);\n  qx = pmadd(qx, x2, p2d_cephes_exp_q3);\n\n  x = pdiv(px, psub(qx, px));\n  x = pmadd(p2d_2, x, p2d_1);\n\n  // x *= 2**exponent.\n  x = __builtin_msa_fexp2_d(x, x2_long);\n\n  return x;\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_MATH_FUNCTIONS_MSA_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/MSA/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2018 Wave Computing, Inc.\n// Written by:\n//   Chris Larsen\n//   Alexey Frunze (afrunze@wavecomp.com)\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_MSA_H\n#define EIGEN_PACKET_MATH_MSA_H\n\n#include <iostream>\n#include <string>\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n\n#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32\n#endif\n\n#if 0\n#define EIGEN_MSA_DEBUG                                                             \\\n  static bool firstTime = true;                                                     \\\n  do {                                                                              \\\n    if (firstTime) {                                                                \\\n      std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \\\n      firstTime = false;                                                            \\\n    }                                                                               \\\n  } while (0)\n#else\n#define EIGEN_MSA_DEBUG\n#endif\n\n#define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))\n\ntypedef v4f32 Packet4f;\ntypedef v4i32 Packet4i;\ntypedef v4u32 Packet4ui;\n\n#define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }\n#define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }\n#define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }\n\ninline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {\n  os << \"[ \" << value[0] << \", \" << value[1] << \", \" << value[2] << \", \" << value[3] << \" ]\";\n  return os;\n}\n\ninline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {\n  os << \"[ \" << value[0] << \", \" << value[1] << \", \" << value[2] << \", \" << value[3] << \" ]\";\n  return os;\n}\n\ninline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {\n  os << \"[ \" << value[0] << \", \" << value[1] << \", \" << value[2] << \", \" << value[3] << \" ]\";\n  return os;\n}\n\ntemplate <>\nstruct packet_traits<float> : default_packet_traits {\n  typedef Packet4f type;\n  typedef Packet4f half;  // Packet2f intrinsics not implemented yet\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,  // Packet2f intrinsics not implemented yet\n    // FIXME check the Has*\n    HasDiv = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n    HasLog = 1,\n    HasExp = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<int32_t> : default_packet_traits {\n  typedef Packet4i type;\n  typedef Packet4i half;  // Packet2i intrinsics not implemented yet\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,  // Packet2i intrinsics not implemented yet\n    // FIXME check the Has*\n    HasDiv = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet4f> {\n  typedef float type;\n  enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };\n  typedef Packet4f half;\n};\n\ntemplate <>\nstruct unpacket_traits<Packet4i> {\n  typedef int32_t type;\n  enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };\n  typedef Packet4i half;\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4f v = { from, from, from, from };\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fill_w(from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float* from) {\n  EIGEN_MSA_DEBUG;\n\n  float f = *from;\n  Packet4f v = { f, f, f, f };\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pload1<Packet4i>(const int32_t* from) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fill_w(*from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fadd_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_addv_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {\n  EIGEN_MSA_DEBUG;\n\n  static const Packet4f countdown = { 0.0f, 1.0f, 2.0f, 3.0f };\n  return padd(pset1<Packet4f>(a), countdown);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) {\n  EIGEN_MSA_DEBUG;\n\n  static const Packet4i countdown = { 0, 1, 2, 3 };\n  return padd(pset1<Packet4i>(a), countdown);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fsub_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_subv_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fmul_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_mulv_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fdiv_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_div_s_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fmadd_w(c, a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {\n  EIGEN_MSA_DEBUG;\n\n  // Use \"asm\" construct to avoid __builtin_msa_maddv_w GNU C bug.\n  Packet4i value = c;\n  __asm__(\"maddv.w %w[value], %w[a], %w[b]\\n\"\n          // Outputs\n          : [value] \"+f\"(value)\n          // Inputs\n          : [a] \"f\"(a), [b] \"f\"(b));\n  return value;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n  return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  // This prefers numbers to NaNs.\n  return __builtin_msa_fmin_w(a, b);\n#else\n  // This prefers NaNs to numbers.\n  Packet4i aNaN = __builtin_msa_fcun_w(a, a);\n  Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);\n  return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_min_s_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  // This prefers numbers to NaNs.\n  return __builtin_msa_fmax_w(a, b);\n#else\n  // This prefers NaNs to numbers.\n  Packet4i aNaN = __builtin_msa_fcun_w(a, a);\n  Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);\n  return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_max_s_w(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {\n  EIGEN_MSA_DEBUG;\n\n  float f0 = from[0], f1 = from[1];\n  Packet4f v0 = { f0, f0, f0, f0 };\n  Packet4f v1 = { f1, f1, f1, f1 };\n  return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) {\n  EIGEN_MSA_DEBUG;\n\n  int32_t i0 = from[0], i1 = from[1];\n  Packet4i v0 = { i0, i0, i0, i0 };\n  Packet4i v1 = { i1, i1, i1, i1 };\n  return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  float f = *from;\n  Packet4f v = { f, f, f, f };\n  v[1] = from[stride];\n  v[2] = from[2 * stride];\n  v[3] = from[3 * stride];\n  return v;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  int32_t i = *from;\n  Packet4i v = { i, i, i, i };\n  v[1] = from[stride];\n  v[2] = from[2 * stride];\n  v[3] = from[3 * stride];\n  return v;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from,\n                                                        Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  *to = from[0];\n  to += stride;\n  *to = from[1];\n  to += stride;\n  *to = from[2];\n  to += stride;\n  *to = from[3];\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from,\n                                                          Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  *to = from[0];\n  to += stride;\n  *to = from[1];\n  to += stride;\n  *to = from[2];\n  to += stride;\n  *to = from[3];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {\n  EIGEN_MSA_DEBUG;\n\n  __builtin_prefetch(addr);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) {\n  EIGEN_MSA_DEBUG;\n\n  __builtin_prefetch(addr);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a[0];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a[0];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4i zero = __builtin_msa_ldi_w(0);\n  return __builtin_msa_add_a_w(zero, a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n  s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n  return s[0];\n}\n\n\ntemplate <>\nEIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n  s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n  return s[0];\n}\n\n// Other reduction functions:\n// mul\ntemplate <>\nEIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n  p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n  return p[0];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n  p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n  return p[0];\n}\n\n// min\ntemplate <>\nEIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  // Swap 64-bit halves of a.\n  Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));\n#if !EIGEN_FAST_MATH\n  // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit\n  // masks of all zeroes/ones in low 64 bits.\n  v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);\n  // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.\n  unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);\n#endif\n  // Continue with min computation.\n  Packet4f v = __builtin_msa_fmin_w(a, swapped);\n  v = __builtin_msa_fmin_w(\n      v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n#if !EIGEN_FAST_MATH\n  // Based on the mask select between v and 4 qNaNs.\n  v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);\n  v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);\n#endif\n  return v[0];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n  m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n  return m[0];\n}\n\n// max\ntemplate <>\nEIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  // Swap 64-bit halves of a.\n  Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));\n#if !EIGEN_FAST_MATH\n  // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit\n  // masks of all zeroes/ones in low 64 bits.\n  v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);\n  // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.\n  unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);\n#endif\n  // Continue with max computation.\n  Packet4f v = __builtin_msa_fmax_w(a, swapped);\n  v = __builtin_msa_fmax_w(\n      v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n#if !EIGEN_FAST_MATH\n  // Based on the mask select between v and 4 qNaNs.\n  v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);\n  v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);\n#endif\n  return v[0];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));\n  m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));\n  return m[0];\n}\n\ninline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {\n  os << \"[ \" << value.packet[0] << \",\" << std::endl\n     << \"  \" << value.packet[1] << \",\" << std::endl\n     << \"  \" << value.packet[2] << \",\" << std::endl\n     << \"  \" << value.packet[3] << \" ]\";\n  return os;\n}\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {\n  EIGEN_MSA_DEBUG;\n\n  v4i32 tmp1, tmp2, tmp3, tmp4;\n\n  tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);\n  tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);\n  tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);\n  tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);\n\n  kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);\n  kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);\n  kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);\n  kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);\n}\n\ninline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {\n  os << \"[ \" << value.packet[0] << \",\" << std::endl\n     << \"  \" << value.packet[1] << \",\" << std::endl\n     << \"  \" << value.packet[2] << \",\" << std::endl\n     << \"  \" << value.packet[3] << \" ]\";\n  return os;\n}\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {\n  EIGEN_MSA_DEBUG;\n\n  v4i32 tmp1, tmp2, tmp3, tmp4;\n\n  tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);\n  tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);\n  tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);\n  tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);\n\n  kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);\n  kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);\n  kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);\n  kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fsqrt_w(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  return __builtin_msa_frsqrt_w(a);\n#else\n  Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));\n  return pdiv(ones, psqrt(a));\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {\n  Packet4f v = a;\n  int32_t old_mode, new_mode;\n  asm volatile(\n      \"cfcmsa  %[old_mode], $1\\n\"\n      \"ori     %[new_mode], %[old_mode], 3\\n\"  // 3 = round towards -INFINITY.\n      \"ctcmsa  $1, %[new_mode]\\n\"\n      \"frint.w %w[v], %w[v]\\n\"\n      \"ctcmsa  $1, %[old_mode]\\n\"\n      :  // outputs\n      [old_mode] \"=r\"(old_mode), [new_mode] \"=r\"(new_mode),\n      [v] \"+f\"(v)\n      :  // inputs\n      :  // clobbers\n  );\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {\n  Packet4f v = a;\n  int32_t old_mode, new_mode;\n  asm volatile(\n      \"cfcmsa  %[old_mode], $1\\n\"\n      \"ori     %[new_mode], %[old_mode], 3\\n\"\n      \"xori    %[new_mode], %[new_mode], 1\\n\"  // 2 = round towards +INFINITY.\n      \"ctcmsa  $1, %[new_mode]\\n\"\n      \"frint.w %w[v], %w[v]\\n\"\n      \"ctcmsa  $1, %[old_mode]\\n\"\n      :  // outputs\n      [old_mode] \"=r\"(old_mode), [new_mode] \"=r\"(new_mode),\n      [v] \"+f\"(v)\n      :  // inputs\n      :  // clobbers\n  );\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {\n  Packet4f v = a;\n  int32_t old_mode, new_mode;\n  asm volatile(\n      \"cfcmsa  %[old_mode], $1\\n\"\n      \"ori     %[new_mode], %[old_mode], 3\\n\"\n      \"xori    %[new_mode], %[new_mode], 3\\n\"  // 0 = round to nearest, ties to even.\n      \"ctcmsa  $1, %[new_mode]\\n\"\n      \"frint.w %w[v], %w[v]\\n\"\n      \"ctcmsa  $1, %[old_mode]\\n\"\n      :  // outputs\n      [old_mode] \"=r\"(old_mode), [new_mode] \"=r\"(new_mode),\n      [v] \"+f\"(v)\n      :  // inputs\n      :  // clobbers\n  );\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,\n                                    const Packet4f& elsePacket) {\n  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],\n                       ifPacket.select[3] };\n  Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);\n  return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,\n                                    const Packet4i& elsePacket) {\n  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],\n                       ifPacket.select[3] };\n  Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);\n  return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);\n}\n\n//---------- double ----------\n\ntypedef v2f64 Packet2d;\ntypedef v2i64 Packet2l;\ntypedef v2u64 Packet2ul;\n\n#define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }\n#define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }\n#define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }\n\ninline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {\n  os << \"[ \" << value[0] << \", \" << value[1] << \" ]\";\n  return os;\n}\n\ninline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {\n  os << \"[ \" << value[0] << \", \" << value[1] << \" ]\";\n  return os;\n}\n\ninline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {\n  os << \"[ \" << value[0] << \", \" << value[1] << \" ]\";\n  return os;\n}\n\ntemplate <>\nstruct packet_traits<double> : default_packet_traits {\n  typedef Packet2d type;\n  typedef Packet2d half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n    // FIXME check the Has*\n    HasDiv = 1,\n    HasExp = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet2d> {\n  typedef double type;\n  enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };\n  typedef Packet2d half;\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d value = { from, from };\n  return value;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fadd_d(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {\n  EIGEN_MSA_DEBUG;\n\n  static const Packet2d countdown = { 0.0, 1.0 };\n  return padd(pset1<Packet2d>(a), countdown);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fsub_d(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fmul_d(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fdiv_d(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fmadd_d(c, a, b);\n}\n\n// Logical Operations are not supported for float, so we have to reinterpret casts using MSA\n// intrinsics\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n  return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  // This prefers numbers to NaNs.\n  return __builtin_msa_fmin_d(a, b);\n#else\n  // This prefers NaNs to numbers.\n  v2i64 aNaN = __builtin_msa_fcun_d(a, a);\n  v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);\n  return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  // This prefers numbers to NaNs.\n  return __builtin_msa_fmax_d(a, b);\n#else\n  // This prefers NaNs to numbers.\n  v2i64 aNaN = __builtin_msa_fcun_d(a, a);\n  v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);\n  return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d value = { *from, *from };\n  return value;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {\n  EIGEN_MSA_DEBUG;\n\n  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d value;\n  value[0] = *from;\n  from += stride;\n  value[1] = *from;\n  return value;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from,\n                                                         Index stride) {\n  EIGEN_MSA_DEBUG;\n\n  *to = from[0];\n  to += stride;\n  *to = from[1];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {\n  EIGEN_MSA_DEBUG;\n\n  __builtin_prefetch(addr);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  return a[0];\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d s = padd(a, preverse(a));\n  return s[0];\n}\n\n// Other reduction functions:\n// mul\ntemplate <>\nEIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d p = pmul(a, preverse(a));\n  return p[0];\n}\n\n// min\ntemplate <>\nEIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));\n  Packet2d v = __builtin_msa_fmin_d(a, swapped);\n  return v[0];\n#else\n  double a0 = a[0], a1 = a[1];\n  return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;\n#endif\n}\n\n// max\ntemplate <>\nEIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));\n  Packet2d v = __builtin_msa_fmax_d(a, swapped);\n  return v[0];\n#else\n  double a0 = a[0], a1 = a[1];\n  return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;\n#endif\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n  return __builtin_msa_fsqrt_d(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {\n  EIGEN_MSA_DEBUG;\n\n#if EIGEN_FAST_MATH\n  return __builtin_msa_frsqrt_d(a);\n#else\n  Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));\n  return pdiv(ones, psqrt(a));\n#endif\n}\n\ninline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {\n  os << \"[ \" << value.packet[0] << \",\" << std::endl << \"  \" << value.packet[1] << \" ]\";\n  return os;\n}\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {\n  EIGEN_MSA_DEBUG;\n\n  Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);\n  Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);\n  kernel.packet[0] = trn1;\n  kernel.packet[1] = trn2;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {\n  Packet2d v = a;\n  int32_t old_mode, new_mode;\n  asm volatile(\n      \"cfcmsa  %[old_mode], $1\\n\"\n      \"ori     %[new_mode], %[old_mode], 3\\n\"  // 3 = round towards -INFINITY.\n      \"ctcmsa  $1, %[new_mode]\\n\"\n      \"frint.d %w[v], %w[v]\\n\"\n      \"ctcmsa  $1, %[old_mode]\\n\"\n      :  // outputs\n      [old_mode] \"=r\"(old_mode), [new_mode] \"=r\"(new_mode),\n      [v] \"+f\"(v)\n      :  // inputs\n      :  // clobbers\n  );\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {\n  Packet2d v = a;\n  int32_t old_mode, new_mode;\n  asm volatile(\n      \"cfcmsa  %[old_mode], $1\\n\"\n      \"ori     %[new_mode], %[old_mode], 3\\n\"\n      \"xori    %[new_mode], %[new_mode], 1\\n\"  // 2 = round towards +INFINITY.\n      \"ctcmsa  $1, %[new_mode]\\n\"\n      \"frint.d %w[v], %w[v]\\n\"\n      \"ctcmsa  $1, %[old_mode]\\n\"\n      :  // outputs\n      [old_mode] \"=r\"(old_mode), [new_mode] \"=r\"(new_mode),\n      [v] \"+f\"(v)\n      :  // inputs\n      :  // clobbers\n  );\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {\n  Packet2d v = a;\n  int32_t old_mode, new_mode;\n  asm volatile(\n      \"cfcmsa  %[old_mode], $1\\n\"\n      \"ori     %[new_mode], %[old_mode], 3\\n\"\n      \"xori    %[new_mode], %[new_mode], 3\\n\"  // 0 = round to nearest, ties to even.\n      \"ctcmsa  $1, %[new_mode]\\n\"\n      \"frint.d %w[v], %w[v]\\n\"\n      \"ctcmsa  $1, %[old_mode]\\n\"\n      :  // outputs\n      [old_mode] \"=r\"(old_mode), [new_mode] \"=r\"(new_mode),\n      [v] \"+f\"(v)\n      :  // inputs\n      :  // clobbers\n  );\n  return v;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,\n                                    const Packet2d& elsePacket) {\n  Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };\n  Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);\n  return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_PACKET_MATH_MSA_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/NEON/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_NEON_H\n#define EIGEN_COMPLEX_NEON_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ninline uint32x4_t p4ui_CONJ_XOR()\n{\n// See bug 1325, clang fails to call vld1q_u64.\n#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML\n  uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };\n  return ret;\n#else\n  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };\n  return vld1q_u32( conj_XOR_DATA );\n#endif\n}\n\ninline uint32x2_t p2ui_CONJ_XOR()\n{\n  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };\n  return vld1_u32( conj_XOR_DATA );\n}\n\n//---------- float ----------\n\nstruct Packet1cf\n{\n  EIGEN_STRONG_INLINE Packet1cf() {}\n  EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}\n  Packet2f v;\n};\nstruct Packet2cf\n{\n  EIGEN_STRONG_INLINE Packet2cf() {}\n  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}\n  Packet4f v;\n};\n\ntemplate<> struct packet_traits<std::complex<float> > : default_packet_traits\n{\n  typedef Packet2cf type;\n  typedef Packet1cf half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasMul       = 1,\n    HasDiv       = 1,\n    HasNegate    = 1,\n    HasAbs       = 0,\n    HasAbs2      = 0,\n    HasMin       = 0,\n    HasMax       = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet1cf>\n{\n  typedef std::complex<float> type;\n  typedef Packet1cf half;\n  typedef Packet2f as_real;\n  enum\n  {\n    size = 1,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet2cf>\n{\n  typedef std::complex<float> type;\n  typedef Packet1cf half;\n  typedef Packet4f as_real;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)\n{ return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)\n{ return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)\n{ return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)\n{\n  const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));\n  return Packet2cf(vcombine_f32(r64, r64));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{ return Packet1cf(padd<Packet2f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{ return Packet2cf(padd<Packet4f>(a.v, b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{ return Packet1cf(psub<Packet2f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{ return Packet2cf(psub<Packet4f>(a.v, b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)\n{\n  const Packet2ui b = vreinterpret_u32_f32(a.v);\n  return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)\n{\n  const Packet4ui b = vreinterpretq_u32_f32(a.v);\n  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{\n  Packet2f v1, v2;\n\n  // Get the real values of a | a1_re | a1_re |\n  v1 = vdup_lane_f32(a.v, 0);\n  // Get the imag values of a | a1_im | a1_im |\n  v2 = vdup_lane_f32(a.v, 1);\n  // Multiply the real a with b\n  v1 = vmul_f32(v1, b.v);\n  // Multiply the imag a with b\n  v2 = vmul_f32(v2, b.v);\n  // Conjugate v2\n  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));\n  // Swap real/imag elements in v2.\n  v2 = vrev64_f32(v2);\n  // Add and return the result\n  return Packet1cf(vadd_f32(v1, v2));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  Packet4f v1, v2;\n\n  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |\n  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));\n  // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |\n  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));\n  // Multiply the real a with b\n  v1 = vmulq_f32(v1, b.v);\n  // Multiply the imag a with b\n  v2 = vmulq_f32(v2, b.v);\n  // Conjugate v2\n  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));\n  // Swap real/imag elements in v2.\n  v2 = vrev64q_f32(v2);\n  // Add and return the result\n  return Packet2cf(vaddq_f32(v1, v2));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)\n{\n  // Compare real and imaginary parts of a and b to get the mask vector:\n  // [re(a[0])==re(b[0]), im(a[0])==im(b[0])]\n  Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);\n  // Swap real/imag elements in the mask in to get:\n  // [im(a[0])==im(b[0]), re(a[0])==re(b[0])]\n  Packet2f eq_swapped = vrev64_f32(eq);\n  // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped\n  return Packet1cf(pand<Packet2f>(eq, eq_swapped));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)\n{\n  // Compare real and imaginary parts of a and b to get the mask vector:\n  // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]\n  Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);\n  // Swap real/imag elements in the mask in to get:\n  // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]\n  Packet4f eq_swapped = vrev64q_f32(eq);\n  // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped\n  return Packet2cf(pand<Packet4f>(eq, eq_swapped));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{ return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{ return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{ return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{ return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from)\n{ return pset1<Packet1cf>(*from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)\n{ return pset1<Packet2cf>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet1cf& from)\n{ EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet2cf& from)\n{ EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet1cf& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet2cf& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(\n    const std::complex<float>* from, Index stride)\n{\n  const Packet2f tmp = vdup_n_f32(std::real(from[0*stride]));\n  return Packet1cf(vset_lane_f32(std::imag(from[0*stride]), tmp, 1));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(\n    const std::complex<float>* from, Index stride)\n{\n  Packet4f res = vdupq_n_f32(std::real(from[0*stride]));\n  res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);\n  res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);\n  res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);\n  return Packet2cf(res);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(\n    std::complex<float>* to, const Packet1cf& from, Index stride)\n{ to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(\n    std::complex<float>* to, const Packet2cf& from, Index stride)\n{\n  to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));\n  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *addr)\n{ EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr)); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a)\n{\n  EIGEN_ALIGN16 std::complex<float> x;\n  vst1_f32(reinterpret_cast<float*>(&x), a.v);\n  return x;\n}\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)\n{\n  EIGEN_ALIGN16 std::complex<float> x[2];\n  vst1q_f32(reinterpret_cast<float*>(x), a.v);\n  return x[0];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)\n{ return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a)\n{ return Packet1cf(vrev64_f32(a.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)\n{ return Packet2cf(vrev64q_f32(a.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a)\n{\n  std::complex<float> s;\n  vst1_f32((float *)&s, a.v);\n  return s;\n}\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)\n{\n  std::complex<float> s;\n  vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));\n  return s;\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)\n{\n  std::complex<float> s;\n  vst1_f32((float *)&s, a.v);\n  return s;\n}\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)\n{\n  float32x2_t a1, a2, v1, v2, prod;\n  std::complex<float> s;\n\n  a1 = vget_low_f32(a.v);\n  a2 = vget_high_f32(a.v);\n   // Get the real values of a | a1_re | a1_re | a2_re | a2_re |\n  v1 = vdup_lane_f32(a1, 0);\n  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |\n  v2 = vdup_lane_f32(a1, 1);\n  // Multiply the real a with b\n  v1 = vmul_f32(v1, a2);\n  // Multiply the imag a with b\n  v2 = vmul_f32(v2, a2);\n  // Conjugate v2\n  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));\n  // Swap real/imag elements in v2.\n  v2 = vrev64_f32(v2);\n  // Add v1, v2\n  prod = vadd_f32(v1, v2);\n\n  vst1_f32(reinterpret_cast<float*>(&s), prod);\n\n  return s;\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)\n{\n  // TODO optimize it for NEON\n  Packet1cf res = pmul(a, pconj(b));\n  Packet2f s, rev_s;\n\n  // this computes the norm\n  s = vmul_f32(b.v, b.v);\n  rev_s = vrev64_f32(s);\n\n  return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  // TODO optimize it for NEON\n  Packet2cf res = pmul(a,pconj(b));\n  Packet4f s, rev_s;\n\n  // this computes the norm\n  s = vmulq_f32(b.v, b.v);\n  rev_s = vrev64q_f32(s);\n\n  return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));\n}\n\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)\n{\n  Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));\n  kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));\n  kernel.packet[1].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {\n  return psqrt_complex<Packet1cf>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {\n  return psqrt_complex<Packet2cf>(a);\n}\n\n//---------- double ----------\n#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG\n\n// See bug 1325, clang fails to call vld1q_u64.\n#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML\n  static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};\n#else\n  const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };\n  static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );\n#endif\n\nstruct Packet1cd\n{\n  EIGEN_STRONG_INLINE Packet1cd() {}\n  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}\n  Packet2d v;\n};\n\ntemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits\n{\n  typedef Packet1cd type;\n  typedef Packet1cd half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 0,\n    size = 1,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet1cd>\n{\n  typedef std::complex<double> type;\n  typedef Packet1cd half;\n  typedef Packet2d as_real;\n  enum\n  {\n    size=1,\n    alignment=Aligned16,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)\n{\n  /* here we really have to use unaligned loads :( */\n  return ploadu<Packet1cd>(&from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{ return Packet1cd(padd<Packet2d>(a.v, b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{ return Packet1cd(psub<Packet2d>(a.v, b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)\n{ return Packet1cd(pnegate<Packet2d>(a.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)\n{ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  Packet2d v1, v2;\n\n  // Get the real values of a\n  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);\n  // Get the imag values of a\n  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);\n  // Multiply the real a with b\n  v1 = vmulq_f64(v1, b.v);\n  // Multiply the imag a with b\n  v2 = vmulq_f64(v2, b.v);\n  // Conjugate v2\n  v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));\n  // Swap real/imag elements in v2.\n  v2 = preverse<Packet2d>(v2);\n  // Add and return the result\n  return Packet1cd(vaddq_f64(v1, v2));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)\n{\n  // Compare real and imaginary parts of a and b to get the mask vector:\n  // [re(a)==re(b), im(a)==im(b)]\n  Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);\n  // Swap real/imag elements in the mask in to get:\n  // [im(a)==im(b), re(a)==re(b)]\n  Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));\n  // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped\n  return Packet1cd(pand<Packet2d>(eq, eq_swapped));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)\n{ return pset1<Packet1cd>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, const Packet1cd& from)\n{ EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, const Packet1cd& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v); }\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *addr)\n{ EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr)); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(\n    const std::complex<double>* from, Index stride)\n{\n  Packet2d res = pset1<Packet2d>(0.0);\n  res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);\n  res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);\n  return Packet1cd(res);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(\n    std::complex<double>* to, const Packet1cd& from, Index stride)\n{ to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)\n{\n  EIGEN_ALIGN16 std::complex<double> res;\n  pstore<std::complex<double> >(&res, a);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  // TODO optimize it for NEON\n  Packet1cd res = pmul(a,pconj(b));\n  Packet2d s = pmul<Packet2d>(b.v, b.v);\n  Packet2d rev_s = preverse<Packet2d>(s);\n\n  return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));\n}\n\nEIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)\n{ return Packet1cd(preverse(Packet2d(x.v))); }\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)\n{\n  Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));\n  kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));\n  kernel.packet[1].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {\n  return psqrt_complex<Packet1cd>(a);\n}\n\n#endif // EIGEN_ARCH_ARM64\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_NEON_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h",
    "content": "namespace Eigen {\nnamespace internal {\n  \n#if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG\n\n// Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.\n// Here we specialize gebp_traits to eliminate these register spills.\n// See #2138.\ntemplate<>\nstruct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>\n : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>\n{\n  EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const\n  { \n    // This volatile inline ASM both acts as a barrier to prevent reordering,\n    // as well as enforces strict register use.\n    asm volatile(\n      \"vmla.f32 %q[r], %q[c], %q[alpha]\"\n      : [r] \"+w\" (r)\n      : [c] \"w\" (c),\n        [alpha] \"w\" (alpha)\n      : );\n  }\n\n  template <typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const Packet4f& a, const Packet4f& b,\n                                Packet4f& c, Packet4f& tmp,\n                                const LaneIdType&) const {\n    acc(a, b, c);\n  }\n  \n  template <typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const Packet4f& a, const QuadPacket<Packet4f>& b,\n                                Packet4f& c, Packet4f& tmp,\n                                const LaneIdType& lane) const {\n    madd(a, b.get(lane), c, tmp, lane);\n  }\n};\n\n#endif // EIGEN_ARCH_ARM && EIGEN_COMP_CLANG\n\n#if EIGEN_ARCH_ARM64\n\ntemplate<>\nstruct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>\n : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>\n{\n  typedef float RhsPacket;\n  typedef float32x4_t RhsPacketx4;\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const\n  {\n    dest = *b;\n  }\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const\n  {\n    dest = vld1q_f32(b);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const\n  {\n    dest = *b;\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const\n  {}\n\n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const\n  {\n    loadRhs(b,dest);\n  }\n\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const\n  {\n    c = vfmaq_n_f32(c, a, b);\n  }\n\n  // NOTE: Template parameter inference failed when compiled with Android NDK:\n  // \"candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>\".\n\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const\n  { madd_helper<0>(a, b, c); }\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<1>&) const\n  { madd_helper<1>(a, b, c); }\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<2>&) const\n  { madd_helper<2>(a, b, c); }\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<3>&) const\n  { madd_helper<3>(a, b, c); }\n\n private:\n  template<int LaneID>\n  EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const\n  {\n    #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))\n    // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101\n    // vfmaq_laneq_f32 is implemented through a costly dup\n         if(LaneID==0)  asm(\"fmla %0.4s, %1.4s, %2.s[0]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b) :  );\n    else if(LaneID==1)  asm(\"fmla %0.4s, %1.4s, %2.s[1]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b) :  );\n    else if(LaneID==2)  asm(\"fmla %0.4s, %1.4s, %2.s[2]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b) :  );\n    else if(LaneID==3)  asm(\"fmla %0.4s, %1.4s, %2.s[3]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b) :  );\n    #else\n    c = vfmaq_laneq_f32(c, a, b, LaneID);\n    #endif\n  }\n};\n\n\ntemplate<>\nstruct gebp_traits <double,double,false,false,Architecture::NEON>\n : gebp_traits<double,double,false,false,Architecture::Generic>\n{\n  typedef double RhsPacket;\n\n  struct RhsPacketx4 {\n    float64x2_t B_0, B_1;\n  };\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const\n  {\n    dest = *b;\n  }\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const\n  {\n    dest.B_0 = vld1q_f64(b);\n    dest.B_1 = vld1q_f64(b+2);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const\n  {\n    loadRhs(b,dest);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const\n  {}\n\n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const\n  {\n    loadRhs(b,dest);\n  }\n\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const\n  {\n    c = vfmaq_n_f64(c, a, b);\n  }\n\n  // NOTE: Template parameter inference failed when compiled with Android NDK:\n  // \"candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>\".\n\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const\n  { madd_helper<0>(a, b, c); }\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<1>&) const\n  { madd_helper<1>(a, b, c); }\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<2>&) const\n  { madd_helper<2>(a, b, c); }\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<3>&) const\n  { madd_helper<3>(a, b, c); }\n\n private:\n  template <int LaneID>\n  EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const\n  {\n    #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))\n    // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101\n    // vfmaq_laneq_f64 is implemented through a costly dup\n         if(LaneID==0)  asm(\"fmla %0.2d, %1.2d, %2.d[0]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b.B_0) :  );\n    else if(LaneID==1)  asm(\"fmla %0.2d, %1.2d, %2.d[1]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b.B_0) :  );\n    else if(LaneID==2)  asm(\"fmla %0.2d, %1.2d, %2.d[0]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b.B_1) :  );\n    else if(LaneID==3)  asm(\"fmla %0.2d, %1.2d, %2.d[1]\\n\" : \"+w\" (c) : \"w\" (a), \"w\" (b.B_1) :  );\n    #else\n         if(LaneID==0) c = vfmaq_laneq_f64(c, a, b.B_0, 0);\n    else if(LaneID==1) c = vfmaq_laneq_f64(c, a, b.B_0, 1);\n    else if(LaneID==2) c = vfmaq_laneq_f64(c, a, b.B_1, 0);\n    else if(LaneID==3) c = vfmaq_laneq_f64(c, a, b.B_1, 1);\n    #endif\n  }\n};\n\n#endif // EIGEN_ARCH_ARM64\n\n}  // namespace internal\n}  // namespace Eigen\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/NEON/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATH_FUNCTIONS_NEON_H\n#define EIGEN_MATH_FUNCTIONS_NEON_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f pexp<Packet2f>(const Packet2f& x)\n{ return pexp_float(x); }\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp<Packet4f>(const Packet4f& x)\n{ return pexp_float(x); }\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f plog<Packet2f>(const Packet2f& x)\n{ return plog_float(x); }\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog<Packet4f>(const Packet4f& x)\n{ return plog_float(x); }\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f psin<Packet2f>(const Packet2f& x)\n{ return psin_float(x); }\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psin<Packet4f>(const Packet4f& x)\n{ return psin_float(x); }\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f pcos<Packet2f>(const Packet2f& x)\n{ return pcos_float(x); }\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pcos<Packet4f>(const Packet4f& x)\n{ return pcos_float(x); }\n\n// Hyperbolic Tangent function.\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2f ptanh<Packet2f>(const Packet2f& x)\n{ return internal::generic_fast_tanh_float(x); }\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh<Packet4f>(const Packet4f& x)\n{ return internal::generic_fast_tanh_float(x); }\n\nBF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin)\nBF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos)\nBF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog)\nBF16_PACKET_FUNCTION(Packet4f, Packet4bf, pexp)\nBF16_PACKET_FUNCTION(Packet4f, Packet4bf, ptanh)\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4bf pfrexp(const Packet4bf& a, Packet4bf& exponent) {\n  Packet4f fexponent;\n  const Packet4bf out = F32ToBf16(pfrexp<Packet4f>(Bf16ToF32(a), fexponent));\n  exponent = F32ToBf16(fexponent);\n  return out;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4bf pldexp(const Packet4bf& a, const Packet4bf& exponent) {\n  return F32ToBf16(pldexp<Packet4f>(Bf16ToF32(a), Bf16ToF32(exponent)));\n}\n\n//---------- double ----------\n\n#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp<Packet2d>(const Packet2d& x)\n{ return pexp_double(x); }\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d plog<Packet2d>(const Packet2d& x)\n{ return plog_double(x); }\n\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATH_FUNCTIONS_NEON_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/NEON/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>\n// Heavily based on Gael's SSE version.\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_NEON_H\n#define EIGEN_PACKET_MATH_NEON_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n\n#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS\n#if EIGEN_ARCH_ARM64\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32\n#else\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16\n#endif\n#endif\n\n#if EIGEN_COMP_MSVC_STRICT\n\n// In MSVC's arm_neon.h header file, all NEON vector types\n// are aliases to the same underlying type __n128.\n// We thus have to wrap them to make them different C++ types.\n// (See also bug 1428)\ntypedef eigen_packet_wrapper<float32x2_t,0>  Packet2f;\ntypedef eigen_packet_wrapper<float32x4_t,1>  Packet4f;\ntypedef eigen_packet_wrapper<int32_t    ,2>  Packet4c;\ntypedef eigen_packet_wrapper<int8x8_t   ,3>  Packet8c;\ntypedef eigen_packet_wrapper<int8x16_t  ,4>  Packet16c;\ntypedef eigen_packet_wrapper<uint32_t   ,5>  Packet4uc;\ntypedef eigen_packet_wrapper<uint8x8_t  ,6>  Packet8uc;\ntypedef eigen_packet_wrapper<uint8x16_t ,7>  Packet16uc;\ntypedef eigen_packet_wrapper<int16x4_t  ,8>  Packet4s;\ntypedef eigen_packet_wrapper<int16x8_t  ,9>  Packet8s;\ntypedef eigen_packet_wrapper<uint16x4_t ,10> Packet4us;\ntypedef eigen_packet_wrapper<uint16x8_t ,11> Packet8us;\ntypedef eigen_packet_wrapper<int32x2_t  ,12> Packet2i;\ntypedef eigen_packet_wrapper<int32x4_t  ,13> Packet4i;\ntypedef eigen_packet_wrapper<uint32x2_t ,14> Packet2ui;\ntypedef eigen_packet_wrapper<uint32x4_t ,15> Packet4ui;\ntypedef eigen_packet_wrapper<int64x2_t  ,16> Packet2l;\ntypedef eigen_packet_wrapper<uint64x2_t ,17> Packet2ul;\n\n#else\n\ntypedef float32x2_t                          Packet2f;\ntypedef float32x4_t                          Packet4f;\ntypedef eigen_packet_wrapper<int32_t    ,2>  Packet4c;\ntypedef int8x8_t                             Packet8c;\ntypedef int8x16_t                            Packet16c;\ntypedef eigen_packet_wrapper<uint32_t   ,5>  Packet4uc;\ntypedef uint8x8_t                            Packet8uc;\ntypedef uint8x16_t                           Packet16uc;\ntypedef int16x4_t                            Packet4s;\ntypedef int16x8_t                            Packet8s;\ntypedef uint16x4_t                           Packet4us;\ntypedef uint16x8_t                           Packet8us;\ntypedef int32x2_t                            Packet2i;\ntypedef int32x4_t                            Packet4i;\ntypedef uint32x2_t                           Packet2ui;\ntypedef uint32x4_t                           Packet4ui;\ntypedef int64x2_t                            Packet2l;\ntypedef uint64x2_t                           Packet2ul;\n\n#endif // EIGEN_COMP_MSVC_STRICT\n\nEIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f& m, int mask){\n  const float* a = reinterpret_cast<const float*>(&m);\n  Packet4f res = {*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3))};\n  return res;\n}\n\n// fuctionally equivalent to _mm_shuffle_ps in SSE when interleave\n// == false (i.e. shuffle<false>(m, n, mask) equals _mm_shuffle_ps(m, n, mask)),\n// interleave m and n when interleave == true. Currently used in LU/arch/InverseSize4.h\n// to enable a shared implementation for fast inversion of matrices of size 4. \ntemplate<bool interleave> \nEIGEN_STRONG_INLINE Packet4f shuffle2(const Packet4f &m, const Packet4f &n, int mask)\n{\n  const float* a = reinterpret_cast<const float*>(&m);\n  const float* b = reinterpret_cast<const float*>(&n);\n  Packet4f res = {*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(b + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3))};\n  return res;\n}\n\ntemplate<> \nEIGEN_STRONG_INLINE Packet4f shuffle2<true>(const Packet4f &m, const Packet4f &n, int mask) \n{\n  const float* a = reinterpret_cast<const float*>(&m);\n  const float* b = reinterpret_cast<const float*>(&n);\n  Packet4f res = {*(a + (mask & 3)), *(b + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3))};\n  return res;\n}\n\nEIGEN_STRONG_INLINE static int eigen_neon_shuffle_mask(int p, int q, int r, int s) {return ((s)<<6|(r)<<4|(q)<<2|(p));}\n\nEIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f& a, int p, int q, int r, int s)\n{ \n  return shuffle1(a, eigen_neon_shuffle_mask(p, q, r, s));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_swizzle2(const Packet4f& a, const Packet4f& b, int p, int q, int r, int s)\n{ \n  return shuffle2<false>(a,b,eigen_neon_shuffle_mask(p, q, r, s));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f& a, const Packet4f& b)\n{\n  return shuffle2<false>(a,b,eigen_neon_shuffle_mask(0, 1, 0, 1));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f& a, const Packet4f& b)\n{\n  return shuffle2<false>(b,a,eigen_neon_shuffle_mask(2, 3, 2, 3));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f& a, const Packet4f& b)\n{\n  return shuffle2<true>(a,b,eigen_neon_shuffle_mask(0, 0, 1, 1));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f& a, const Packet4f& b)\n{\n  return shuffle2<true>(a,b,eigen_neon_shuffle_mask(2, 2, 3, 3));\n}\n#define vec4f_duplane(a, p) \\\n  vdupq_lane_f32(vget_low_f32(a), p)\n\n#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \\\n  const Packet4f p4f_##NAME = pset1<Packet4f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \\\n  const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))\n\n#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \\\n  const Packet4i p4i_##NAME = pset1<Packet4i>(X)\n\n#if EIGEN_ARCH_ARM64\n  // __builtin_prefetch tends to do nothing on ARM64 compilers because the\n  // prefetch instructions there are too detailed for __builtin_prefetch to map\n  // meaningfully to them.\n  #define EIGEN_ARM_PREFETCH(ADDR)  __asm__ __volatile__(\"prfm pldl1keep, [%[addr]]\\n\" ::[addr] \"r\"(ADDR) : );\n#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC\n  #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);\n#elif defined __pld\n  #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)\n#elif EIGEN_ARCH_ARM32\n  #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ (\"pld [%[addr]]\\n\" :: [addr] \"r\" (ADDR) : );\n#else\n  // by default no explicit prefetching\n  #define EIGEN_ARM_PREFETCH(ADDR)\n#endif\n\ntemplate <>\nstruct packet_traits<float> : default_packet_traits\n{\n  typedef Packet4f type;\n  typedef Packet2f half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0,\n\n    HasDiv   = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n\n    HasSin  = EIGEN_FAST_MATH,\n    HasCos  = EIGEN_FAST_MATH,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf  = EIGEN_FAST_MATH,\n    HasBessel = 0,  // Issues with accuracy.\n    HasNdtri = 0\n  };\n};\n\ntemplate <>\nstruct packet_traits<int8_t> : default_packet_traits\n{\n  typedef Packet16c type;\n  typedef Packet8c half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasAbsDiff   = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0\n  };\n};\n\ntemplate <>\nstruct packet_traits<uint8_t> : default_packet_traits\n{\n  typedef Packet16uc type;\n  typedef Packet8uc half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 16,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 0,\n    HasAbs       = 1,\n    HasAbsDiff   = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0,\n\n    HasSqrt = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<int16_t> : default_packet_traits\n{\n  typedef Packet8s type;\n  typedef Packet4s half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasAbsDiff   = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0\n  };\n};\n\ntemplate <>\nstruct packet_traits<uint16_t> : default_packet_traits\n{\n  typedef Packet8us type;\n  typedef Packet4us half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 0,\n    HasAbs       = 0,\n    HasAbsDiff   = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0,\n    HasSqrt = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<int32_t> : default_packet_traits\n{\n  typedef Packet4i type;\n  typedef Packet2i half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0\n  };\n};\n\ntemplate <>\nstruct packet_traits<uint32_t> : default_packet_traits\n{\n  typedef Packet4ui type;\n  typedef Packet2ui half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 1,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 0,\n    HasAbs       = 0,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0,\n\n    HasSqrt = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<int64_t> : default_packet_traits\n{\n  typedef Packet2l type;\n  typedef Packet2l half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasCmp       = 1,\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0\n  };\n};\n\ntemplate <>\nstruct packet_traits<uint64_t> : default_packet_traits\n{\n  typedef Packet2ul type;\n  typedef Packet2ul half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasCmp       = 1,\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 0,\n    HasAbs       = 0,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0\n  };\n};\n\n#if EIGEN_GNUC_AT_MOST(4, 4) && !EIGEN_COMP_LLVM\n// workaround gcc 4.2, 4.3 and 4.4 compilation issue\nEIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }\nEIGEN_STRONG_INLINE float32x2_t vld1_f32(const float* x) { return ::vld1_f32 ((const float32_t*)x); }\nEIGEN_STRONG_INLINE float32x2_t vld1_dup_f32(const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }\nEIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }\nEIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }\n#endif\n\ntemplate<> struct unpacket_traits<Packet2f>\n{\n  typedef float type;\n  typedef Packet2f half;\n  typedef Packet2i integer_packet;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet4f>\n{\n  typedef float type;\n  typedef Packet2f half;\n  typedef Packet4i integer_packet;\n  enum\n  {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet4c>\n{\n  typedef int8_t type;\n  typedef Packet4c half;\n  enum\n  {\n    size = 4,\n    alignment = Unaligned,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet8c>\n{\n  typedef int8_t type;\n  typedef Packet4c half;\n  enum\n  {\n    size = 8,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet16c>\n{\n  typedef int8_t type;\n  typedef Packet8c half;\n  enum\n  {\n    size = 16,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet4uc>\n{\n  typedef uint8_t type;\n  typedef Packet4uc half;\n  enum\n  {\n    size = 4,\n    alignment = Unaligned,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet8uc>\n{\n  typedef uint8_t type;\n  typedef Packet4uc half;\n  enum\n  {\n    size = 8,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet16uc>\n{\n  typedef uint8_t type;\n  typedef Packet8uc half;\n  enum\n  {\n    size = 16,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false};\n};\ntemplate<> struct unpacket_traits<Packet4s>\n{\n  typedef int16_t type;\n  typedef Packet4s half;\n  enum\n  {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet8s>\n{\n  typedef int16_t type;\n  typedef Packet4s half;\n  enum\n  {\n    size = 8,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet4us>\n{\n  typedef uint16_t type;\n  typedef Packet4us half;\n  enum\n  {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet8us>\n{\n  typedef uint16_t type;\n  typedef Packet4us half;\n  enum\n  {\n    size = 8,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet2i>\n{\n  typedef int32_t type;\n  typedef Packet2i half;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet4i>\n{\n  typedef int32_t type;\n  typedef Packet2i half;\n  enum\n  {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet2ui>\n{\n  typedef uint32_t type;\n  typedef Packet2ui half;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet4ui>\n{\n  typedef uint32_t type;\n  typedef Packet2ui half;\n  enum\n  {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet2l>\n{\n  typedef int64_t type;\n  typedef Packet2l half;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\ntemplate<> struct unpacket_traits<Packet2ul>\n{\n  typedef uint64_t type;\n  typedef Packet2ul half;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pset1<Packet2f>(const float& from) { return vdup_n_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pset1<Packet4c>(const int8_t& from)\n{ return vget_lane_s32(vreinterpret_s32_s8(vdup_n_s8(from)), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pset1<Packet8c>(const int8_t& from) { return vdup_n_s8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pset1<Packet16c>(const int8_t& from) { return vdupq_n_s8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pset1<Packet4uc>(const uint8_t& from)\n{ return vget_lane_u32(vreinterpret_u32_u8(vdup_n_u8(from)), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pset1<Packet8uc>(const uint8_t& from) { return vdup_n_u8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pset1<Packet16uc>(const uint8_t& from) { return vdupq_n_u8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pset1<Packet4s>(const int16_t& from) { return vdup_n_s16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pset1<Packet8s>(const int16_t& from) { return vdupq_n_s16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pset1<Packet4us>(const uint16_t& from) { return vdup_n_u16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pset1<Packet8us>(const uint16_t& from) { return vdupq_n_u16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pset1<Packet2i>(const int32_t& from) { return vdup_n_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pset1<Packet2ui>(const uint32_t& from) { return vdup_n_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pset1<Packet4ui>(const uint32_t& from) { return vdupq_n_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pset1<Packet2l>(const int64_t& from) { return vdupq_n_s64(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pset1<Packet2ul>(const uint64_t& from) { return vdupq_n_u64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pset1frombits<Packet2f>(unsigned int from)\n{ return vreinterpret_f32_u32(vdup_n_u32(from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from)\n{ return vreinterpretq_f32_u32(vdupq_n_u32(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f plset<Packet2f>(const float& a)\n{\n  const float c[] = {0.0f,1.0f};\n  return vadd_f32(pset1<Packet2f>(a), vld1_f32(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)\n{\n  const float c[] = {0.0f,1.0f,2.0f,3.0f};\n  return vaddq_f32(pset1<Packet4f>(a), vld1q_f32(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4c plset<Packet4c>(const int8_t& a)\n{ return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(vreinterpret_s8_u32(vdup_n_u32(0x03020100)), vdup_n_s8(a))), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8c plset<Packet8c>(const int8_t& a)\n{\n  const int8_t c[] = {0,1,2,3,4,5,6,7};\n  return vadd_s8(pset1<Packet8c>(a), vld1_s8(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16c plset<Packet16c>(const int8_t& a)\n{\n  const int8_t c[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};\n  return vaddq_s8(pset1<Packet16c>(a), vld1q_s8(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4uc plset<Packet4uc>(const uint8_t& a)\n{ return vget_lane_u32(vreinterpret_u32_u8(vadd_u8(vreinterpret_u8_u32(vdup_n_u32(0x03020100)), vdup_n_u8(a))), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc plset<Packet8uc>(const uint8_t& a)\n{\n  const uint8_t c[] = {0,1,2,3,4,5,6,7};\n  return vadd_u8(pset1<Packet8uc>(a), vld1_u8(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16uc plset<Packet16uc>(const uint8_t& a)\n{\n  const uint8_t c[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};\n  return vaddq_u8(pset1<Packet16uc>(a), vld1q_u8(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4s plset<Packet4s>(const int16_t& a)\n{\n  const int16_t c[] = {0,1,2,3};\n  return vadd_s16(pset1<Packet4s>(a), vld1_s16(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4us plset<Packet4us>(const uint16_t& a)\n{\n  const uint16_t c[] = {0,1,2,3};\n  return vadd_u16(pset1<Packet4us>(a), vld1_u16(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8s plset<Packet8s>(const int16_t& a)\n{\n  const int16_t c[] = {0,1,2,3,4,5,6,7};\n  return vaddq_s16(pset1<Packet8s>(a), vld1q_s16(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8us plset<Packet8us>(const uint16_t& a)\n{\n  const uint16_t c[] = {0,1,2,3,4,5,6,7};\n  return vaddq_u16(pset1<Packet8us>(a), vld1q_u16(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2i plset<Packet2i>(const int32_t& a)\n{\n  const int32_t c[] = {0,1};\n  return vadd_s32(pset1<Packet2i>(a), vld1_s32(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)\n{\n  const int32_t c[] = {0,1,2,3};\n  return vaddq_s32(pset1<Packet4i>(a), vld1q_s32(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ui plset<Packet2ui>(const uint32_t& a)\n{\n  const uint32_t c[] = {0,1};\n  return vadd_u32(pset1<Packet2ui>(a), vld1_u32(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4ui plset<Packet4ui>(const uint32_t& a)\n{\n  const uint32_t c[] = {0,1,2,3};\n  return vaddq_u32(pset1<Packet4ui>(a), vld1q_u32(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2l plset<Packet2l>(const int64_t& a)\n{\n  const int64_t c[] = {0,1};\n  return vaddq_s64(pset1<Packet2l>(a), vld1q_s64(c));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul plset<Packet2ul>(const uint64_t& a)\n{\n  const uint64_t c[] = {0,1};\n  return vaddq_u64(pset1<Packet2ul>(a), vld1q_u64(c));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f padd<Packet2f>(const Packet2f& a, const Packet2f& b) { return vadd_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c padd<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c padd<Packet8c>(const Packet8c& a, const Packet8c& b) { return vadd_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c padd<Packet16c>(const Packet16c& a, const Packet16c& b) { return vaddq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc padd<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vadd_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc padd<Packet8uc>(const Packet8uc& a, const Packet8uc& b) { return vadd_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc padd<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vaddq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s padd<Packet4s>(const Packet4s& a, const Packet4s& b) { return vadd_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s padd<Packet8s>(const Packet8s& a, const Packet8s& b) { return vaddq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us padd<Packet4us>(const Packet4us& a, const Packet4us& b) { return vadd_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us padd<Packet8us>(const Packet8us& a, const Packet8us& b) { return vaddq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i padd<Packet2i>(const Packet2i& a, const Packet2i& b) { return vadd_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui padd<Packet2ui>(const Packet2ui& a, const Packet2ui& b) { return vadd_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui padd<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vaddq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l padd<Packet2l>(const Packet2l& a, const Packet2l& b) { return vaddq_s64(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul padd<Packet2ul>(const Packet2ul& a, const Packet2ul& b) { return vaddq_u64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f psub<Packet2f>(const Packet2f& a, const Packet2f& b) { return vsub_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c psub<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vsub_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c psub<Packet8c>(const Packet8c& a, const Packet8c& b) { return vsub_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c psub<Packet16c>(const Packet16c& a, const Packet16c& b) { return vsubq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc psub<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vsub_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc psub<Packet8uc>(const Packet8uc& a, const Packet8uc& b) { return vsub_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc psub<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vsubq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s psub<Packet4s>(const Packet4s& a, const Packet4s& b) { return vsub_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s psub<Packet8s>(const Packet8s& a, const Packet8s& b) { return vsubq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us psub<Packet4us>(const Packet4us& a, const Packet4us& b) { return vsub_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us psub<Packet8us>(const Packet8us& a, const Packet8us& b) { return vsubq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i psub<Packet2i>(const Packet2i& a, const Packet2i& b) { return vsub_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui psub<Packet2ui>(const Packet2ui& a, const Packet2ui& b) { return vsub_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui psub<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vsubq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l psub<Packet2l>(const Packet2l& a, const Packet2l& b) { return vsubq_s64(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul psub<Packet2ul>(const Packet2ul& a, const Packet2ul& b) { return vsubq_u64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pxor<Packet2f>(const Packet2f& a, const Packet2f& b);\ntemplate<> EIGEN_STRONG_INLINE Packet2f paddsub<Packet2f>(const Packet2f& a, const Packet2f & b) {\n  Packet2f mask = {numext::bit_cast<float>(0x80000000u), 0.0f};\n  return padd(a, pxor(mask, b));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b);\ntemplate<> EIGEN_STRONG_INLINE Packet4f paddsub<Packet4f>(const Packet4f& a, const Packet4f& b) {\n  Packet4f mask = {numext::bit_cast<float>(0x80000000u), 0.0f, numext::bit_cast<float>(0x80000000u), 0.0f};\n  return padd(a, pxor(mask, b));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pnegate(const Packet2f& a) { return vneg_f32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pnegate(const Packet4c& a)\n{ return vget_lane_s32(vreinterpret_s32_s8(vneg_s8(vreinterpret_s8_s32(vdup_n_s32(a)))), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pnegate(const Packet8c& a) { return vneg_s8(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pnegate(const Packet16c& a) { return vnegq_s8(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pnegate(const Packet4s& a) { return vneg_s16(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pnegate(const Packet8s& a) { return vnegq_s16(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pnegate(const Packet2i& a) { return vneg_s32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pnegate(const Packet2l& a) {\n#if EIGEN_ARCH_ARM64\n  return vnegq_s64(a);\n#else\n  return vcombine_s64(\n      vdup_n_s64(-vgetq_lane_s64(a, 0)),\n      vdup_n_s64(-vgetq_lane_s64(a, 1)));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pconj(const Packet2f& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pconj(const Packet4c& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pconj(const Packet8c& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pconj(const Packet16c& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pconj(const Packet4uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pconj(const Packet8uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pconj(const Packet16uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pconj(const Packet4s& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pconj(const Packet8s& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pconj(const Packet4us& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pconj(const Packet8us& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pconj(const Packet2i& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pconj(const Packet2ui& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pconj(const Packet4ui& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pconj(const Packet2l& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pconj(const Packet2ul& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmul<Packet2f>(const Packet2f& a, const Packet2f& b) { return vmul_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pmul<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vmul_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pmul<Packet8c>(const Packet8c& a, const Packet8c& b) { return vmul_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pmul<Packet16c>(const Packet16c& a, const Packet16c& b) { return vmulq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pmul<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vmul_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pmul<Packet8uc>(const Packet8uc& a, const Packet8uc& b) { return vmul_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmul<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vmulq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pmul<Packet4s>(const Packet4s& a, const Packet4s& b) { return vmul_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmul<Packet8s>(const Packet8s& a, const Packet8s& b) { return vmulq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pmul<Packet4us>(const Packet4us& a, const Packet4us& b) { return vmul_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmul<Packet8us>(const Packet8us& a, const Packet8us& b) { return vmulq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pmul<Packet2i>(const Packet2i& a, const Packet2i& b) { return vmul_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pmul<Packet2ui>(const Packet2ui& a, const Packet2ui& b) { return vmul_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pmul<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vmulq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pmul<Packet2l>(const Packet2l& a, const Packet2l& b) {\n  return vcombine_s64(\n    vdup_n_s64(vgetq_lane_s64(a, 0)*vgetq_lane_s64(b, 0)),\n    vdup_n_s64(vgetq_lane_s64(a, 1)*vgetq_lane_s64(b, 1)));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pmul<Packet2ul>(const Packet2ul& a, const Packet2ul& b) {\n  return vcombine_u64(\n    vdup_n_u64(vgetq_lane_u64(a, 0)*vgetq_lane_u64(b, 0)),\n    vdup_n_u64(vgetq_lane_u64(a, 1)*vgetq_lane_u64(b, 1)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pdiv<Packet2f>(const Packet2f& a, const Packet2f& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vdiv_f32(a,b);\n#else\n  Packet2f inv, restep, div;\n\n  // NEON does not offer a divide instruction, we have to do a reciprocal approximation\n  // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers\n  // a reciprocal estimate AND a reciprocal step -which saves a few instructions\n  // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with\n  // Newton-Raphson and vrecpsq_f32()\n  inv = vrecpe_f32(b);\n\n  // This returns a differential, by which we will have to multiply inv to get a better\n  // approximation of 1/b.\n  restep = vrecps_f32(b, inv);\n  inv = vmul_f32(restep, inv);\n\n  // Finally, multiply a by 1/b and get the wanted result of the division.\n  div = vmul_f32(a, inv);\n\n  return div;\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vdivq_f32(a,b);\n#else\n  Packet4f inv, restep, div;\n\n  // NEON does not offer a divide instruction, we have to do a reciprocal approximation\n  // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers\n  // a reciprocal estimate AND a reciprocal step -which saves a few instructions\n  // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with\n  // Newton-Raphson and vrecpsq_f32()\n  inv = vrecpeq_f32(b);\n\n  // This returns a differential, by which we will have to multiply inv to get a better\n  // approximation of 1/b.\n  restep = vrecpsq_f32(b, inv);\n  inv = vmulq_f32(restep, inv);\n\n  // Finally, multiply a by 1/b and get the wanted result of the division.\n  div = vmulq_f32(a, inv);\n\n  return div;\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4c pdiv<Packet4c>(const Packet4c& /*a*/, const Packet4c& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet4c>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pdiv<Packet8c>(const Packet8c& /*a*/, const Packet8c& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet8c>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16c pdiv<Packet16c>(const Packet16c& /*a*/, const Packet16c& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet16c>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pdiv<Packet4uc>(const Packet4uc& /*a*/, const Packet4uc& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet4uc>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pdiv<Packet8uc>(const Packet8uc& /*a*/, const Packet8uc& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet8uc>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pdiv<Packet16uc>(const Packet16uc& /*a*/, const Packet16uc& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet16uc>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4s pdiv<Packet4s>(const Packet4s& /*a*/, const Packet4s& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet4s>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8s pdiv<Packet8s>(const Packet8s& /*a*/, const Packet8s& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet8s>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4us pdiv<Packet4us>(const Packet4us& /*a*/, const Packet4us& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet4us>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8us pdiv<Packet8us>(const Packet8us& /*a*/, const Packet8us& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet8us>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2i pdiv<Packet2i>(const Packet2i& /*a*/, const Packet2i& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet2i>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet4i>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pdiv<Packet2ui>(const Packet2ui& /*a*/, const Packet2ui& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet2ui>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pdiv<Packet4ui>(const Packet4ui& /*a*/, const Packet4ui& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet4ui>(0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2l pdiv<Packet2l>(const Packet2l& /*a*/, const Packet2l& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet2l>(0LL);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pdiv<Packet2ul>(const Packet2ul& /*a*/, const Packet2ul& /*b*/)\n{\n  eigen_assert(false && \"packet integer division are not supported by NEON\");\n  return pset1<Packet2ul>(0ULL);\n}\n\n\n#ifdef __ARM_FEATURE_FMA\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)\n{ return vfmaq_f32(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)\n{ return vfma_f32(c,a,b); }\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)\n{\n  return vmlaq_f32(c,a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)\n{\n  return vmla_f32(c,a,b);\n}\n#endif\n\n// No FMA instruction for int, so use MLA unconditionally.\ntemplate<> EIGEN_STRONG_INLINE Packet4c pmadd(const Packet4c& a, const Packet4c& b, const Packet4c& c)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vmla_s8(\n      vreinterpret_s8_s32(vdup_n_s32(c)),\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pmadd(const Packet8c& a, const Packet8c& b, const Packet8c& c)\n{ return vmla_s8(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pmadd(const Packet16c& a, const Packet16c& b, const Packet16c& c)\n{ return vmlaq_s8(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pmadd(const Packet4uc& a, const Packet4uc& b, const Packet4uc& c)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vmla_u8(\n      vreinterpret_u8_u32(vdup_n_u32(c)),\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pmadd(const Packet8uc& a, const Packet8uc& b, const Packet8uc& c)\n{ return vmla_u8(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmadd(const Packet16uc& a, const Packet16uc& b, const Packet16uc& c)\n{ return vmlaq_u8(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pmadd(const Packet4s& a, const Packet4s& b, const Packet4s& c)\n{ return vmla_s16(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c)\n{ return vmlaq_s16(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pmadd(const Packet4us& a, const Packet4us& b, const Packet4us& c)\n{ return vmla_u16(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c)\n{ return vmlaq_u16(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pmadd(const Packet2i& a, const Packet2i& b, const Packet2i& c)\n{ return vmla_s32(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c)\n{ return vmlaq_s32(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pmadd(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c)\n{ return vmla_u32(c,a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pmadd(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c)\n{ return vmlaq_u32(c,a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pabsdiff<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vabd_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pabsdiff<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vabdq_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pabsdiff<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vabd_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pabsdiff<Packet8c>(const Packet8c& a, const Packet8c& b)\n{ return vabd_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pabsdiff<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return vabdq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pabsdiff<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vabd_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pabsdiff<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vabd_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pabsdiff<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vabdq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pabsdiff<Packet4s>(const Packet4s& a, const Packet4s& b)\n{ return vabd_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pabsdiff<Packet8s>(const Packet8s& a, const Packet8s& b)\n{ return vabdq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pabsdiff<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vabd_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pabsdiff<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vabdq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pabsdiff<Packet2i>(const Packet2i& a, const Packet2i& b)\n{ return vabd_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pabsdiff<Packet4i>(const Packet4i& a, const Packet4i& b)\n{ return vabdq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pabsdiff<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vabd_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pabsdiff<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vabdq_u32(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmin<Packet2f>(const Packet2f& a, const Packet2f& b) { return vmin_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }\n\n#ifdef __ARM_FEATURE_NUMERIC_MAXMIN\n// numeric max and min are only available if ARM_FEATURE_NUMERIC_MAXMIN is defined (which can only be the case for Armv8 systems).\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<PropagateNumbers, Packet4f>(const Packet4f& a, const Packet4f& b) { return vminnmq_f32(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmin<PropagateNumbers, Packet2f>(const Packet2f& a, const Packet2f& b) { return vminnm_f32(a, b); }\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<PropagateNaN, Packet4f>(const Packet4f& a, const Packet4f& b) { return pmin<Packet4f>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmin<PropagateNaN, Packet2f>(const Packet2f& a, const Packet2f& b) { return pmin<Packet2f>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4c pmin<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vmin_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pmin<Packet8c>(const Packet8c& a, const Packet8c& b) { return vmin_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pmin<Packet16c>(const Packet16c& a, const Packet16c& b) { return vminq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pmin<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vmin_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pmin<Packet8uc>(const Packet8uc& a, const Packet8uc& b) { return vmin_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmin<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vminq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pmin<Packet4s>(const Packet4s& a, const Packet4s& b) { return vmin_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmin<Packet8s>(const Packet8s& a, const Packet8s& b) { return vminq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pmin<Packet4us>(const Packet4us& a, const Packet4us& b) { return vmin_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmin<Packet8us>(const Packet8us& a, const Packet8us& b) { return vminq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pmin<Packet2i>(const Packet2i& a, const Packet2i& b) { return vmin_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pmin<Packet2ui>(const Packet2ui& a, const Packet2ui& b) { return vmin_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pmin<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vminq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pmin<Packet2l>(const Packet2l& a, const Packet2l& b) {\n  return vcombine_s64(\n      vdup_n_s64((std::min)(vgetq_lane_s64(a, 0), vgetq_lane_s64(b, 0))),\n      vdup_n_s64((std::min)(vgetq_lane_s64(a, 1), vgetq_lane_s64(b, 1))));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pmin<Packet2ul>(const Packet2ul& a, const Packet2ul& b) {\n  return vcombine_u64(\n      vdup_n_u64((std::min)(vgetq_lane_u64(a, 0), vgetq_lane_u64(b, 0))),\n      vdup_n_u64((std::min)(vgetq_lane_u64(a, 1), vgetq_lane_u64(b, 1))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmax<Packet2f>(const Packet2f& a, const Packet2f& b) { return vmax_f32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }\n\n#ifdef __ARM_FEATURE_NUMERIC_MAXMIN\n// numeric max and min are only available if ARM_FEATURE_NUMERIC_MAXMIN is defined (which can only be the case for Armv8 systems).\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<PropagateNumbers, Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxnmq_f32(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmax<PropagateNumbers, Packet2f>(const Packet2f& a, const Packet2f& b) { return vmaxnm_f32(a, b); }\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<PropagateNaN, Packet4f>(const Packet4f& a, const Packet4f& b) { return pmax<Packet4f>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pmax<PropagateNaN, Packet2f>(const Packet2f& a, const Packet2f& b) { return pmax<Packet2f>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4c pmax<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vmax_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pmax<Packet8c>(const Packet8c& a, const Packet8c& b) { return vmax_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pmax<Packet16c>(const Packet16c& a, const Packet16c& b) { return vmaxq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pmax<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vmax_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pmax<Packet8uc>(const Packet8uc& a, const Packet8uc& b) { return vmax_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vmaxq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pmax<Packet4s>(const Packet4s& a, const Packet4s& b) { return vmax_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pmax<Packet8s>(const Packet8s& a, const Packet8s& b) { return vmaxq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pmax<Packet4us>(const Packet4us& a, const Packet4us& b) { return vmax_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pmax<Packet8us>(const Packet8us& a, const Packet8us& b) { return vmaxq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pmax<Packet2i>(const Packet2i& a, const Packet2i& b) { return vmax_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pmax<Packet2ui>(const Packet2ui& a, const Packet2ui& b) { return vmax_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pmax<Packet4ui>(const Packet4ui& a, const Packet4ui& b) { return vmaxq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pmax<Packet2l>(const Packet2l& a, const Packet2l& b) {\n  return vcombine_s64(\n      vdup_n_s64((std::max)(vgetq_lane_s64(a, 0), vgetq_lane_s64(b, 0))),\n      vdup_n_s64((std::max)(vgetq_lane_s64(a, 1), vgetq_lane_s64(b, 1))));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pmax<Packet2ul>(const Packet2ul& a, const Packet2ul& b) {\n  return vcombine_u64(\n      vdup_n_u64((std::max)(vgetq_lane_u64(a, 0), vgetq_lane_u64(b, 0))),\n      vdup_n_u64((std::max)(vgetq_lane_u64(a, 1), vgetq_lane_u64(b, 1))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pcmp_le<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vcle_f32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_le<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vcleq_f32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pcmp_le<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_u8(vcle_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pcmp_le<Packet8c>(const Packet8c& a, const Packet8c& b)\n{ return vreinterpret_s8_u8(vcle_s8(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pcmp_le<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return vreinterpretq_s8_u8(vcleq_s8(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pcmp_le<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vcle_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pcmp_le<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vcle_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pcmp_le<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vcleq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pcmp_le<Packet4s>(const Packet4s& a, const Packet4s& b)\n{ return vreinterpret_s16_u16(vcle_s16(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pcmp_le<Packet8s>(const Packet8s& a, const Packet8s& b)\n{ return vreinterpretq_s16_u16(vcleq_s16(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pcmp_le<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vcle_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcmp_le<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vcleq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pcmp_le<Packet2i>(const Packet2i& a, const Packet2i& b)\n{ return vreinterpret_s32_u32(vcle_s32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_le<Packet4i>(const Packet4i& a, const Packet4i& b)\n{ return vreinterpretq_s32_u32(vcleq_s32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pcmp_le<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vcle_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pcmp_le<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vcleq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pcmp_le<Packet2l>(const Packet2l& a, const Packet2l& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vreinterpretq_s64_u64(vcleq_s64(a,b));\n#else\n  return vcombine_s64(\n      vdup_n_s64(vgetq_lane_s64(a, 0) <= vgetq_lane_s64(b, 0) ? numext::int64_t(-1) : 0),\n      vdup_n_s64(vgetq_lane_s64(a, 1) <= vgetq_lane_s64(b, 1) ? numext::int64_t(-1) : 0));\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pcmp_le<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vcleq_u64(a,b);\n#else\n  return vcombine_u64(\n      vdup_n_u64(vgetq_lane_u64(a, 0) <= vgetq_lane_u64(b, 0) ? numext::uint64_t(-1) : 0),\n      vdup_n_u64(vgetq_lane_u64(a, 1) <= vgetq_lane_u64(b, 1) ? numext::uint64_t(-1) : 0));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pcmp_lt<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vclt_f32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_lt<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vcltq_f32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pcmp_lt<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_u8(vclt_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pcmp_lt<Packet8c>(const Packet8c& a, const Packet8c& b)\n{ return vreinterpret_s8_u8(vclt_s8(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pcmp_lt<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return vreinterpretq_s8_u8(vcltq_s8(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pcmp_lt<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vclt_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pcmp_lt<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vclt_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vcltq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pcmp_lt<Packet4s>(const Packet4s& a, const Packet4s& b)\n{ return vreinterpret_s16_u16(vclt_s16(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pcmp_lt<Packet8s>(const Packet8s& a, const Packet8s& b)\n{ return vreinterpretq_s16_u16(vcltq_s16(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pcmp_lt<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vclt_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcmp_lt<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vcltq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pcmp_lt<Packet2i>(const Packet2i& a, const Packet2i& b)\n{ return vreinterpret_s32_u32(vclt_s32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_lt<Packet4i>(const Packet4i& a, const Packet4i& b)\n{ return vreinterpretq_s32_u32(vcltq_s32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pcmp_lt<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vclt_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pcmp_lt<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vcltq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pcmp_lt<Packet2l>(const Packet2l& a, const Packet2l& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vreinterpretq_s64_u64(vcltq_s64(a,b));\n#else\n  return vcombine_s64(\n      vdup_n_s64(vgetq_lane_s64(a, 0) < vgetq_lane_s64(b, 0) ? numext::int64_t(-1) : 0),\n      vdup_n_s64(vgetq_lane_s64(a, 1) < vgetq_lane_s64(b, 1) ? numext::int64_t(-1) : 0));\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pcmp_lt<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vcltq_u64(a,b);\n#else\n  return vcombine_u64(\n      vdup_n_u64(vgetq_lane_u64(a, 0) < vgetq_lane_u64(b, 0) ? numext::uint64_t(-1) : 0),\n      vdup_n_u64(vgetq_lane_u64(a, 1) < vgetq_lane_u64(b, 1) ? numext::uint64_t(-1) : 0));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pcmp_eq<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vceq_f32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vceqq_f32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pcmp_eq<Packet4c>(const Packet4c& a, const Packet4c& b)\n{\n  return vget_lane_s32(vreinterpret_s32_u8(vceq_s8(\n      vreinterpret_s8_s32(vdup_n_s32(a)),\n      vreinterpret_s8_s32(vdup_n_s32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pcmp_eq<Packet8c>(const Packet8c& a, const Packet8c& b)\n{ return vreinterpret_s8_u8(vceq_s8(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pcmp_eq<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return vreinterpretq_s8_u8(vceqq_s8(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pcmp_eq<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vceq_u8(\n      vreinterpret_u8_u32(vdup_n_u32(a)),\n      vreinterpret_u8_u32(vdup_n_u32(b)))), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pcmp_eq<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vceq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pcmp_eq<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vceqq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pcmp_eq<Packet4s>(const Packet4s& a, const Packet4s& b)\n{ return vreinterpret_s16_u16(vceq_s16(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pcmp_eq<Packet8s>(const Packet8s& a, const Packet8s& b)\n{ return vreinterpretq_s16_u16(vceqq_s16(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pcmp_eq<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vceq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pcmp_eq<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vceqq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pcmp_eq<Packet2i>(const Packet2i& a, const Packet2i& b)\n{ return vreinterpret_s32_u32(vceq_s32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_eq<Packet4i>(const Packet4i& a, const Packet4i& b)\n{ return vreinterpretq_s32_u32(vceqq_s32(a,b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pcmp_eq<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vceq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pcmp_eq<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vceqq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pcmp_eq<Packet2l>(const Packet2l& a, const Packet2l& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vreinterpretq_s64_u64(vceqq_s64(a,b));\n#else\n  return vcombine_s64(\n      vdup_n_s64(vgetq_lane_s64(a, 0) == vgetq_lane_s64(b, 0) ? numext::int64_t(-1) : 0),\n      vdup_n_s64(vgetq_lane_s64(a, 1) == vgetq_lane_s64(b, 1) ? numext::int64_t(-1) : 0));\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pcmp_eq<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{\n#if EIGEN_ARCH_ARM64\n  return vceqq_u64(a,b);\n#else\n  return vcombine_u64(\n      vdup_n_u64(vgetq_lane_u64(a, 0) == vgetq_lane_u64(b, 0) ? numext::uint64_t(-1) : 0),\n      vdup_n_u64(vgetq_lane_u64(a, 1) == vgetq_lane_u64(b, 1) ? numext::uint64_t(-1) : 0));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pcmp_lt_or_nan<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vmvn_u32(vcge_f32(a,b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); }\n\n// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics\ntemplate<> EIGEN_STRONG_INLINE Packet2f pand<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a),vreinterpret_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pand<Packet4c>(const Packet4c& a, const Packet4c& b)\n{ return a & b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pand<Packet8c>(const Packet8c& a, const Packet8c& b)\n{ return vand_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pand<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return vandq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pand<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{ return a & b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pand<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vand_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pand<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vandq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pand<Packet4s>(const Packet4s& a, const Packet4s& b) { return vand_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pand<Packet8s>(const Packet8s& a, const Packet8s& b) { return vandq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pand<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vand_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pand<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vandq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pand<Packet2i>(const Packet2i& a, const Packet2i& b) { return vand_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pand<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vand_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pand<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vandq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pand<Packet2l>(const Packet2l& a, const Packet2l& b) { return vandq_s64(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pand<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{ return vandq_u64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f por<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a),vreinterpret_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c por<Packet4c>(const Packet4c& a, const Packet4c& b)\n{ return a | b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8c por<Packet8c>(const Packet8c& a, const Packet8c& b) { return vorr_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c por<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return vorrq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc por<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{ return a | b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc por<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vorr_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc por<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vorrq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s por<Packet4s>(const Packet4s& a, const Packet4s& b)\n{ return vorr_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s por<Packet8s>(const Packet8s& a, const Packet8s& b)\n{ return vorrq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us por<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vorr_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us por<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vorrq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i por<Packet2i>(const Packet2i& a, const Packet2i& b) { return vorr_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui por<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vorr_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui por<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vorrq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l por<Packet2l>(const Packet2l& a, const Packet2l& b)\n{ return vorrq_s64(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul por<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{ return vorrq_u64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pxor<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a),vreinterpret_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pxor<Packet4c>(const Packet4c& a, const Packet4c& b)\n{ return a ^ b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pxor<Packet8c>(const Packet8c& a, const Packet8c& b)\n{ return veor_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pxor<Packet16c>(const Packet16c& a, const Packet16c& b)\n{ return veorq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pxor<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{ return a ^ b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pxor<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return veor_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pxor<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return veorq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pxor<Packet4s>(const Packet4s& a, const Packet4s& b) { return veor_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pxor<Packet8s>(const Packet8s& a, const Packet8s& b) { return veorq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pxor<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return veor_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pxor<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return veorq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pxor<Packet2i>(const Packet2i& a, const Packet2i& b) { return veor_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pxor<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return veor_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pxor<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return veorq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pxor<Packet2l>(const Packet2l& a, const Packet2l& b)\n{ return veorq_s64(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pxor<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{ return veorq_u64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pandnot<Packet2f>(const Packet2f& a, const Packet2f& b)\n{ return vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a),vreinterpret_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)\n{ return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pandnot<Packet4c>(const Packet4c& a, const Packet4c& b)\n{ return a & ~b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pandnot<Packet8c>(const Packet8c& a, const Packet8c& b) { return vbic_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pandnot<Packet16c>(const Packet16c& a, const Packet16c& b) { return vbicq_s8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pandnot<Packet4uc>(const Packet4uc& a, const Packet4uc& b)\n{ return a & ~b; }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pandnot<Packet8uc>(const Packet8uc& a, const Packet8uc& b)\n{ return vbic_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pandnot<Packet16uc>(const Packet16uc& a, const Packet16uc& b)\n{ return vbicq_u8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pandnot<Packet4s>(const Packet4s& a, const Packet4s& b)\n{ return vbic_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pandnot<Packet8s>(const Packet8s& a, const Packet8s& b)\n{ return vbicq_s16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pandnot<Packet4us>(const Packet4us& a, const Packet4us& b)\n{ return vbic_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pandnot<Packet8us>(const Packet8us& a, const Packet8us& b)\n{ return vbicq_u16(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pandnot<Packet2i>(const Packet2i& a, const Packet2i& b)\n{ return vbic_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b)\n{ return vbicq_s32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pandnot<Packet2ui>(const Packet2ui& a, const Packet2ui& b)\n{ return vbic_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pandnot<Packet4ui>(const Packet4ui& a, const Packet4ui& b)\n{ return vbicq_u32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pandnot<Packet2l>(const Packet2l& a, const Packet2l& b)\n{ return vbicq_s64(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pandnot<Packet2ul>(const Packet2ul& a, const Packet2ul& b)\n{ return vbicq_u64(a,b); }\n\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4c parithmetic_shift_right(Packet4c& a)\n{ return vget_lane_s32(vreinterpret_s32_s8(vshr_n_s8(vreinterpret_s8_s32(vdup_n_s32(a)), N)), 0); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8c parithmetic_shift_right(Packet8c a) { return vshr_n_s8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet16c parithmetic_shift_right(Packet16c a) { return vshrq_n_s8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4uc parithmetic_shift_right(Packet4uc& a)\n{ return vget_lane_u32(vreinterpret_u32_u8(vshr_n_u8(vreinterpret_u8_u32(vdup_n_u32(a)), N)), 0); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8uc parithmetic_shift_right(Packet8uc a) { return vshr_n_u8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet16uc parithmetic_shift_right(Packet16uc a) { return vshrq_n_u8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4s parithmetic_shift_right(Packet4s a) { return vshr_n_s16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8s parithmetic_shift_right(Packet8s a) { return vshrq_n_s16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4us parithmetic_shift_right(Packet4us a) { return vshr_n_u16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8us parithmetic_shift_right(Packet8us a) { return vshrq_n_u16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2i parithmetic_shift_right(Packet2i a) { return vshr_n_s32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(Packet4i a) { return vshrq_n_s32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2ui parithmetic_shift_right(Packet2ui a) { return vshr_n_u32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4ui parithmetic_shift_right(Packet4ui a) { return vshrq_n_u32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(Packet2l a) { return vshrq_n_s64(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2ul parithmetic_shift_right(Packet2ul a) { return vshrq_n_u64(a,N); }\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4c plogical_shift_right(Packet4c& a)\n{ return vget_lane_s32(vreinterpret_s32_u8(vshr_n_u8(vreinterpret_u8_s32(vdup_n_s32(a)), N)), 0); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8c plogical_shift_right(Packet8c a)\n{ return vreinterpret_s8_u8(vshr_n_u8(vreinterpret_u8_s8(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet16c plogical_shift_right(Packet16c a)\n{ return vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4uc plogical_shift_right(Packet4uc& a)\n{ return vget_lane_u32(vreinterpret_u32_s8(vshr_n_s8(vreinterpret_s8_u32(vdup_n_u32(a)), N)), 0); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8uc plogical_shift_right(Packet8uc a) { return vshr_n_u8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet16uc plogical_shift_right(Packet16uc a) { return vshrq_n_u8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4s plogical_shift_right(Packet4s a)\n{ return vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8s plogical_shift_right(Packet8s a)\n{ return vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4us plogical_shift_right(Packet4us a) { return vshr_n_u16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_right(Packet8us a) { return vshrq_n_u16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2i plogical_shift_right(Packet2i a)\n{ return vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right(Packet4i a)\n{ return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2ui plogical_shift_right(Packet2ui a) { return vshr_n_u32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_right(Packet4ui a) { return vshrq_n_u32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2l plogical_shift_right(Packet2l a)\n{ return vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_s64(a),N)); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2ul plogical_shift_right(Packet2ul a) { return vshrq_n_u64(a,N); }\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4c plogical_shift_left(Packet4c& a)\n{ return vget_lane_s32(vreinterpret_s32_s8(vshl_n_s8(vreinterpret_s8_s32(vdup_n_s32(a)), N)), 0); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8c plogical_shift_left(Packet8c a) { return vshl_n_s8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet16c plogical_shift_left(Packet16c a) { return vshlq_n_s8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4uc plogical_shift_left(Packet4uc& a)\n{ return vget_lane_u32(vreinterpret_u32_u8(vshl_n_u8(vreinterpret_u8_u32(vdup_n_u32(a)), N)), 0); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8uc plogical_shift_left(Packet8uc a) { return vshl_n_u8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet16uc plogical_shift_left(Packet16uc a) { return vshlq_n_u8(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4s plogical_shift_left(Packet4s a) { return vshl_n_s16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8s plogical_shift_left(Packet8s a) { return vshlq_n_s16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4us plogical_shift_left(Packet4us a) { return vshl_n_u16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_left(Packet8us a) { return vshlq_n_u16(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2i plogical_shift_left(Packet2i a) { return vshl_n_s32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_left(Packet4i a) { return vshlq_n_s32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2ui plogical_shift_left(Packet2ui a) { return vshl_n_u32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(Packet4ui a) { return vshlq_n_u32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2l plogical_shift_left(Packet2l a) { return vshlq_n_s64(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet2ul plogical_shift_left(Packet2ul a) { return vshlq_n_u64(a,N); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pload<Packet2f>(const float* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pload<Packet4c>(const int8_t* from)\n{\n  Packet4c res;\n  memcpy(&res, from, sizeof(Packet4c));\n  return res;\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c pload<Packet8c>(const int8_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_s8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pload<Packet16c>(const int8_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pload<Packet4uc>(const uint8_t* from)\n{\n  Packet4uc res;\n  memcpy(&res, from, sizeof(Packet4uc));\n  return res;\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pload<Packet8uc>(const uint8_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_u8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pload<Packet16uc>(const uint8_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pload<Packet4s>(const int16_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_s16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pload<Packet8s>(const int16_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pload<Packet4us>(const uint16_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_u16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pload<Packet8us>(const uint16_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pload<Packet2i>(const int32_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pload<Packet2ui>(const uint32_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pload<Packet4ui>(const uint32_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pload<Packet2l>(const int64_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s64(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pload<Packet2ul>(const uint64_t* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f ploadu<Packet2f>(const float* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c ploadu<Packet4c>(const int8_t* from)\n{\n  Packet4c res;\n  memcpy(&res, from, sizeof(Packet4c));\n  return res;\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c ploadu<Packet8c>(const int8_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_s8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c ploadu<Packet16c>(const int8_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc ploadu<Packet4uc>(const uint8_t* from)\n{\n  Packet4uc res;\n  memcpy(&res, from, sizeof(Packet4uc));\n  return res;\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc ploadu<Packet8uc>(const uint8_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_u8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc ploadu<Packet16uc>(const uint8_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_u8(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4s ploadu<Packet4s>(const int16_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_s16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s ploadu<Packet8s>(const int16_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us ploadu<Packet4us>(const uint16_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_u16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us ploadu<Packet8us>(const uint16_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_u16(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2i ploadu<Packet2i>(const int32_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui ploadu<Packet2ui>(const uint32_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui ploadu<Packet4ui>(const uint32_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l ploadu<Packet2l>(const int64_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s64(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul ploadu<Packet2ul>(const uint64_t* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_u64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f ploaddup<Packet2f>(const float* from)\n{ return vld1_dup_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)\n{ return vcombine_f32(vld1_dup_f32(from), vld1_dup_f32(from+1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c ploaddup<Packet4c>(const int8_t* from)\n{\n  const int8x8_t a = vreinterpret_s8_s32(vdup_n_s32(pload<Packet4c>(from)));\n  return vget_lane_s32(vreinterpret_s32_s8(vzip_s8(a,a).val[0]), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8c ploaddup<Packet8c>(const int8_t* from)\n{\n  const int8x8_t a = vld1_s8(from);\n  return vzip_s8(a,a).val[0];\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16c ploaddup<Packet16c>(const int8_t* from)\n{\n  const int8x8_t a = vld1_s8(from);\n  const int8x8x2_t b = vzip_s8(a,a);\n  return vcombine_s8(b.val[0], b.val[1]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4uc ploaddup<Packet4uc>(const uint8_t* from)\n{\n  const uint8x8_t a = vreinterpret_u8_u32(vdup_n_u32(pload<Packet4uc>(from)));\n  return vget_lane_u32(vreinterpret_u32_u8(vzip_u8(a,a).val[0]), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8uc ploaddup<Packet8uc>(const uint8_t* from)\n{\n  const uint8x8_t a = vld1_u8(from);\n  return vzip_u8(a,a).val[0];\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const uint8_t* from)\n{\n  const uint8x8_t a = vld1_u8(from);\n  const uint8x8x2_t b = vzip_u8(a,a);\n  return vcombine_u8(b.val[0], b.val[1]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4s ploaddup<Packet4s>(const int16_t* from)\n{\n  return vreinterpret_s16_u32(vzip_u32(vreinterpret_u32_s16(vld1_dup_s16(from)),\n      vreinterpret_u32_s16(vld1_dup_s16(from+1))).val[0]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8s ploaddup<Packet8s>(const int16_t* from)\n{\n  const int16x4_t a = vld1_s16(from);\n  const int16x4x2_t b = vzip_s16(a,a);\n  return vcombine_s16(b.val[0], b.val[1]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4us ploaddup<Packet4us>(const uint16_t* from)\n{\n  return vreinterpret_u16_u32(vzip_u32(vreinterpret_u32_u16(vld1_dup_u16(from)),\n      vreinterpret_u32_u16(vld1_dup_u16(from+1))).val[0]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8us ploaddup<Packet8us>(const uint16_t* from)\n{\n  const uint16x4_t a = vld1_u16(from);\n  const uint16x4x2_t b = vzip_u16(a,a);\n  return vcombine_u16(b.val[0], b.val[1]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2i ploaddup<Packet2i>(const int32_t* from)\n{ return vld1_dup_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)\n{ return vcombine_s32(vld1_dup_s32(from), vld1_dup_s32(from+1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui ploaddup<Packet2ui>(const uint32_t* from)\n{ return vld1_dup_u32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui ploaddup<Packet4ui>(const uint32_t* from)\n{ return vcombine_u32(vld1_dup_u32(from), vld1_dup_u32(from+1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2l ploaddup<Packet2l>(const int64_t* from)\n{ return vld1q_dup_s64(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul ploaddup<Packet2ul>(const uint64_t* from)\n{ return vld1q_dup_u64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploadquad<Packet4f>(const float* from) { return vld1q_dup_f32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c ploadquad<Packet4c>(const int8_t* from)\n{ return vget_lane_s32(vreinterpret_s32_s8(vld1_dup_s8(from)), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8c ploadquad<Packet8c>(const int8_t* from)\n{\n  return vreinterpret_s8_u32(vzip_u32(\n      vreinterpret_u32_s8(vld1_dup_s8(from)),\n      vreinterpret_u32_s8(vld1_dup_s8(from+1))).val[0]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16c ploadquad<Packet16c>(const int8_t* from)\n{\n  const int8x8_t a = vreinterpret_s8_u32(vzip_u32(\n      vreinterpret_u32_s8(vld1_dup_s8(from)),\n      vreinterpret_u32_s8(vld1_dup_s8(from+1))).val[0]);\n  const int8x8_t b = vreinterpret_s8_u32(vzip_u32(\n      vreinterpret_u32_s8(vld1_dup_s8(from+2)),\n      vreinterpret_u32_s8(vld1_dup_s8(from+3))).val[0]);\n  return vcombine_s8(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4uc ploadquad<Packet4uc>(const uint8_t* from)\n{ return vget_lane_u32(vreinterpret_u32_u8(vld1_dup_u8(from)), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc ploadquad<Packet8uc>(const uint8_t* from)\n{\n  return vreinterpret_u8_u32(vzip_u32(\n      vreinterpret_u32_u8(vld1_dup_u8(from)),\n      vreinterpret_u32_u8(vld1_dup_u8(from+1))).val[0]);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16uc ploadquad<Packet16uc>(const uint8_t* from)\n{\n  const uint8x8_t a = vreinterpret_u8_u32(vzip_u32(\n      vreinterpret_u32_u8(vld1_dup_u8(from)),\n      vreinterpret_u32_u8(vld1_dup_u8(from+1))).val[0]);\n  const uint8x8_t b = vreinterpret_u8_u32(vzip_u32(\n      vreinterpret_u32_u8(vld1_dup_u8(from+2)),\n      vreinterpret_u32_u8(vld1_dup_u8(from+3))).val[0]);\n  return vcombine_u8(a,b);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet8s ploadquad<Packet8s>(const int16_t* from)\n{ return vcombine_s16(vld1_dup_s16(from), vld1_dup_s16(from+1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us ploadquad<Packet8us>(const uint16_t* from)\n{ return vcombine_u16(vld1_dup_u16(from), vld1_dup_u16(from+1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploadquad<Packet4i>(const int32_t* from) { return vld1q_dup_s32(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui ploadquad<Packet4ui>(const uint32_t* from) { return vld1q_dup_u32(from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet2f& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_f32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet4c& from)\n{ memcpy(to, &from, sizeof(from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet8c& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_s8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet16c& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_s8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet4uc& from)\n{ memcpy(to, &from, sizeof(from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet8uc& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_u8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet16uc& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_u8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int16_t>(int16_t* to, const Packet4s& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_s16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int16_t>(int16_t* to, const Packet8s& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_s16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint16_t>(uint16_t* to, const Packet4us& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_u16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint16_t>(uint16_t* to, const Packet8us& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_u16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet2i& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_s32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint32_t>(uint32_t* to, const Packet2ui& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1_u32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint32_t>(uint32_t* to, const Packet4ui& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_u32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet2l& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_s64(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<uint64_t>(uint64_t* to, const Packet2ul& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_u64(to,from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet2f& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_f32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int8_t>(int8_t* to, const Packet4c& from)\n{ memcpy(to, &from, sizeof(from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int8_t>(int8_t* to, const Packet8c& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_s8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int8_t>(int8_t* to, const Packet16c& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_s8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint8_t>(uint8_t* to, const Packet4uc& from)\n{ memcpy(to, &from, sizeof(from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint8_t>(uint8_t* to, const Packet8uc& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_u8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint8_t>(uint8_t* to, const Packet16uc& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_u8(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int16_t>(int16_t* to, const Packet4s& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_s16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int16_t>(int16_t* to, const Packet8s& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_s16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint16_t>(uint16_t* to, const Packet4us& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_u16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint16_t>(uint16_t* to, const Packet8us& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_u16(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet2i& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_s32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint32_t>(uint32_t* to, const Packet2ui& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1_u32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint32_t>(uint32_t* to, const Packet4ui& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_u32(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet2l& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_s64(to,from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<uint64_t>(uint64_t* to, const Packet2ul& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_u64(to,from); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2f pgather<float, Packet2f>(const float* from, Index stride)\n{\n  Packet2f res = vld1_dup_f32(from);\n  res = vld1_lane_f32(from + 1*stride, res, 1);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pgather<float, Packet4f>(const float* from, Index stride)\n{\n  Packet4f res = vld1q_dup_f32(from);\n  res = vld1q_lane_f32(from + 1*stride, res, 1);\n  res = vld1q_lane_f32(from + 2*stride, res, 2);\n  res = vld1q_lane_f32(from + 3*stride, res, 3);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c pgather<int8_t, Packet4c>(const int8_t* from, Index stride)\n{\n  Packet4c res;\n  for (int i = 0; i != 4; i++)\n    reinterpret_cast<int8_t*>(&res)[i] = *(from + i * stride);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c pgather<int8_t, Packet8c>(const int8_t* from, Index stride)\n{\n  Packet8c res = vld1_dup_s8(from);\n  res = vld1_lane_s8(from + 1*stride, res, 1);\n  res = vld1_lane_s8(from + 2*stride, res, 2);\n  res = vld1_lane_s8(from + 3*stride, res, 3);\n  res = vld1_lane_s8(from + 4*stride, res, 4);\n  res = vld1_lane_s8(from + 5*stride, res, 5);\n  res = vld1_lane_s8(from + 6*stride, res, 6);\n  res = vld1_lane_s8(from + 7*stride, res, 7);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pgather<int8_t, Packet16c>(const int8_t* from, Index stride)\n{\n  Packet16c res = vld1q_dup_s8(from);\n  res = vld1q_lane_s8(from + 1*stride, res, 1);\n  res = vld1q_lane_s8(from + 2*stride, res, 2);\n  res = vld1q_lane_s8(from + 3*stride, res, 3);\n  res = vld1q_lane_s8(from + 4*stride, res, 4);\n  res = vld1q_lane_s8(from + 5*stride, res, 5);\n  res = vld1q_lane_s8(from + 6*stride, res, 6);\n  res = vld1q_lane_s8(from + 7*stride, res, 7);\n  res = vld1q_lane_s8(from + 8*stride, res, 8);\n  res = vld1q_lane_s8(from + 9*stride, res, 9);\n  res = vld1q_lane_s8(from + 10*stride, res, 10);\n  res = vld1q_lane_s8(from + 11*stride, res, 11);\n  res = vld1q_lane_s8(from + 12*stride, res, 12);\n  res = vld1q_lane_s8(from + 13*stride, res, 13);\n  res = vld1q_lane_s8(from + 14*stride, res, 14);\n  res = vld1q_lane_s8(from + 15*stride, res, 15);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4uc pgather<uint8_t, Packet4uc>(const uint8_t* from, Index stride)\n{\n  Packet4uc res;\n  for (int i = 0; i != 4; i++)\n    reinterpret_cast<uint8_t*>(&res)[i] = *(from + i * stride);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc pgather<uint8_t, Packet8uc>(const uint8_t* from, Index stride)\n{\n  Packet8uc res = vld1_dup_u8(from);\n  res = vld1_lane_u8(from + 1*stride, res, 1);\n  res = vld1_lane_u8(from + 2*stride, res, 2);\n  res = vld1_lane_u8(from + 3*stride, res, 3);\n  res = vld1_lane_u8(from + 4*stride, res, 4);\n  res = vld1_lane_u8(from + 5*stride, res, 5);\n  res = vld1_lane_u8(from + 6*stride, res, 6);\n  res = vld1_lane_u8(from + 7*stride, res, 7);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pgather<uint8_t, Packet16uc>(const uint8_t* from, Index stride)\n{\n  Packet16uc res = vld1q_dup_u8(from);\n  res = vld1q_lane_u8(from + 1*stride, res, 1);\n  res = vld1q_lane_u8(from + 2*stride, res, 2);\n  res = vld1q_lane_u8(from + 3*stride, res, 3);\n  res = vld1q_lane_u8(from + 4*stride, res, 4);\n  res = vld1q_lane_u8(from + 5*stride, res, 5);\n  res = vld1q_lane_u8(from + 6*stride, res, 6);\n  res = vld1q_lane_u8(from + 7*stride, res, 7);\n  res = vld1q_lane_u8(from + 8*stride, res, 8);\n  res = vld1q_lane_u8(from + 9*stride, res, 9);\n  res = vld1q_lane_u8(from + 10*stride, res, 10);\n  res = vld1q_lane_u8(from + 11*stride, res, 11);\n  res = vld1q_lane_u8(from + 12*stride, res, 12);\n  res = vld1q_lane_u8(from + 13*stride, res, 13);\n  res = vld1q_lane_u8(from + 14*stride, res, 14);\n  res = vld1q_lane_u8(from + 15*stride, res, 15);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s pgather<int16_t, Packet4s>(const int16_t* from, Index stride)\n{\n  Packet4s res = vld1_dup_s16(from);\n  res = vld1_lane_s16(from + 1*stride, res, 1);\n  res = vld1_lane_s16(from + 2*stride, res, 2);\n  res = vld1_lane_s16(from + 3*stride, res, 3);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pgather<int16_t, Packet8s>(const int16_t* from, Index stride)\n{\n  Packet8s res = vld1q_dup_s16(from);\n  res = vld1q_lane_s16(from + 1*stride, res, 1);\n  res = vld1q_lane_s16(from + 2*stride, res, 2);\n  res = vld1q_lane_s16(from + 3*stride, res, 3);\n  res = vld1q_lane_s16(from + 4*stride, res, 4);\n  res = vld1q_lane_s16(from + 5*stride, res, 5);\n  res = vld1q_lane_s16(from + 6*stride, res, 6);\n  res = vld1q_lane_s16(from + 7*stride, res, 7);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us pgather<uint16_t, Packet4us>(const uint16_t* from, Index stride)\n{\n  Packet4us res = vld1_dup_u16(from);\n  res = vld1_lane_u16(from + 1*stride, res, 1);\n  res = vld1_lane_u16(from + 2*stride, res, 2);\n  res = vld1_lane_u16(from + 3*stride, res, 3);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pgather<uint16_t, Packet8us>(const uint16_t* from, Index stride)\n{\n  Packet8us res = vld1q_dup_u16(from);\n  res = vld1q_lane_u16(from + 1*stride, res, 1);\n  res = vld1q_lane_u16(from + 2*stride, res, 2);\n  res = vld1q_lane_u16(from + 3*stride, res, 3);\n  res = vld1q_lane_u16(from + 4*stride, res, 4);\n  res = vld1q_lane_u16(from + 5*stride, res, 5);\n  res = vld1q_lane_u16(from + 6*stride, res, 6);\n  res = vld1q_lane_u16(from + 7*stride, res, 7);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2i pgather<int32_t, Packet2i>(const int32_t* from, Index stride)\n{\n  Packet2i res = vld1_dup_s32(from);\n  res = vld1_lane_s32(from + 1*stride, res, 1);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)\n{\n  Packet4i res = vld1q_dup_s32(from);\n  res = vld1q_lane_s32(from + 1*stride, res, 1);\n  res = vld1q_lane_s32(from + 2*stride, res, 2);\n  res = vld1q_lane_s32(from + 3*stride, res, 3);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ui pgather<uint32_t, Packet2ui>(const uint32_t* from, Index stride)\n{\n  Packet2ui res = vld1_dup_u32(from);\n  res = vld1_lane_u32(from + 1*stride, res, 1);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pgather<uint32_t, Packet4ui>(const uint32_t* from, Index stride)\n{\n  Packet4ui res = vld1q_dup_u32(from);\n  res = vld1q_lane_u32(from + 1*stride, res, 1);\n  res = vld1q_lane_u32(from + 2*stride, res, 2);\n  res = vld1q_lane_u32(from + 3*stride, res, 3);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pgather<int64_t, Packet2l>(const int64_t* from, Index stride)\n{\n  Packet2l res = vld1q_dup_s64(from);\n  res = vld1q_lane_s64(from + 1*stride, res, 1);\n  return res;\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pgather<uint64_t, Packet2ul>(const uint64_t* from, Index stride)\n{\n  Packet2ul res = vld1q_dup_u64(from);\n  res = vld1q_lane_u64(from + 1*stride, res, 1);\n  return res;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<float, Packet2f>(float* to, const Packet2f& from, Index stride)\n{\n  vst1_lane_f32(to + stride*0, from, 0);\n  vst1_lane_f32(to + stride*1, from, 1);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)\n{\n  vst1q_lane_f32(to + stride*0, from, 0);\n  vst1q_lane_f32(to + stride*1, from, 1);\n  vst1q_lane_f32(to + stride*2, from, 2);\n  vst1q_lane_f32(to + stride*3, from, 3);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int8_t, Packet4c>(int8_t* to, const Packet4c& from, Index stride)\n{\n  for (int i = 0; i != 4; i++)\n    *(to + i * stride) = reinterpret_cast<const int8_t*>(&from)[i];\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int8_t, Packet8c>(int8_t* to, const Packet8c& from, Index stride)\n{\n  vst1_lane_s8(to + stride*0, from, 0);\n  vst1_lane_s8(to + stride*1, from, 1);\n  vst1_lane_s8(to + stride*2, from, 2);\n  vst1_lane_s8(to + stride*3, from, 3);\n  vst1_lane_s8(to + stride*4, from, 4);\n  vst1_lane_s8(to + stride*5, from, 5);\n  vst1_lane_s8(to + stride*6, from, 6);\n  vst1_lane_s8(to + stride*7, from, 7);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int8_t, Packet16c>(int8_t* to, const Packet16c& from, Index stride)\n{\n  vst1q_lane_s8(to + stride*0, from, 0);\n  vst1q_lane_s8(to + stride*1, from, 1);\n  vst1q_lane_s8(to + stride*2, from, 2);\n  vst1q_lane_s8(to + stride*3, from, 3);\n  vst1q_lane_s8(to + stride*4, from, 4);\n  vst1q_lane_s8(to + stride*5, from, 5);\n  vst1q_lane_s8(to + stride*6, from, 6);\n  vst1q_lane_s8(to + stride*7, from, 7);\n  vst1q_lane_s8(to + stride*8, from, 8);\n  vst1q_lane_s8(to + stride*9, from, 9);\n  vst1q_lane_s8(to + stride*10, from, 10);\n  vst1q_lane_s8(to + stride*11, from, 11);\n  vst1q_lane_s8(to + stride*12, from, 12);\n  vst1q_lane_s8(to + stride*13, from, 13);\n  vst1q_lane_s8(to + stride*14, from, 14);\n  vst1q_lane_s8(to + stride*15, from, 15);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint8_t, Packet4uc>(uint8_t* to, const Packet4uc& from, Index stride)\n{\n  for (int i = 0; i != 4; i++)\n    *(to + i * stride) = reinterpret_cast<const uint8_t*>(&from)[i];\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint8_t, Packet8uc>(uint8_t* to, const Packet8uc& from, Index stride)\n{\n  vst1_lane_u8(to + stride*0, from, 0);\n  vst1_lane_u8(to + stride*1, from, 1);\n  vst1_lane_u8(to + stride*2, from, 2);\n  vst1_lane_u8(to + stride*3, from, 3);\n  vst1_lane_u8(to + stride*4, from, 4);\n  vst1_lane_u8(to + stride*5, from, 5);\n  vst1_lane_u8(to + stride*6, from, 6);\n  vst1_lane_u8(to + stride*7, from, 7);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint8_t, Packet16uc>(uint8_t* to, const Packet16uc& from, Index stride)\n{\n  vst1q_lane_u8(to + stride*0, from, 0);\n  vst1q_lane_u8(to + stride*1, from, 1);\n  vst1q_lane_u8(to + stride*2, from, 2);\n  vst1q_lane_u8(to + stride*3, from, 3);\n  vst1q_lane_u8(to + stride*4, from, 4);\n  vst1q_lane_u8(to + stride*5, from, 5);\n  vst1q_lane_u8(to + stride*6, from, 6);\n  vst1q_lane_u8(to + stride*7, from, 7);\n  vst1q_lane_u8(to + stride*8, from, 8);\n  vst1q_lane_u8(to + stride*9, from, 9);\n  vst1q_lane_u8(to + stride*10, from, 10);\n  vst1q_lane_u8(to + stride*11, from, 11);\n  vst1q_lane_u8(to + stride*12, from, 12);\n  vst1q_lane_u8(to + stride*13, from, 13);\n  vst1q_lane_u8(to + stride*14, from, 14);\n  vst1q_lane_u8(to + stride*15, from, 15);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int16_t, Packet4s>(int16_t* to, const Packet4s& from, Index stride)\n{\n  vst1_lane_s16(to + stride*0, from, 0);\n  vst1_lane_s16(to + stride*1, from, 1);\n  vst1_lane_s16(to + stride*2, from, 2);\n  vst1_lane_s16(to + stride*3, from, 3);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int16_t, Packet8s>(int16_t* to, const Packet8s& from, Index stride)\n{\n  vst1q_lane_s16(to + stride*0, from, 0);\n  vst1q_lane_s16(to + stride*1, from, 1);\n  vst1q_lane_s16(to + stride*2, from, 2);\n  vst1q_lane_s16(to + stride*3, from, 3);\n  vst1q_lane_s16(to + stride*4, from, 4);\n  vst1q_lane_s16(to + stride*5, from, 5);\n  vst1q_lane_s16(to + stride*6, from, 6);\n  vst1q_lane_s16(to + stride*7, from, 7);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint16_t, Packet4us>(uint16_t* to, const Packet4us& from, Index stride)\n{\n  vst1_lane_u16(to + stride*0, from, 0);\n  vst1_lane_u16(to + stride*1, from, 1);\n  vst1_lane_u16(to + stride*2, from, 2);\n  vst1_lane_u16(to + stride*3, from, 3);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint16_t, Packet8us>(uint16_t* to, const Packet8us& from, Index stride)\n{\n  vst1q_lane_u16(to + stride*0, from, 0);\n  vst1q_lane_u16(to + stride*1, from, 1);\n  vst1q_lane_u16(to + stride*2, from, 2);\n  vst1q_lane_u16(to + stride*3, from, 3);\n  vst1q_lane_u16(to + stride*4, from, 4);\n  vst1q_lane_u16(to + stride*5, from, 5);\n  vst1q_lane_u16(to + stride*6, from, 6);\n  vst1q_lane_u16(to + stride*7, from, 7);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int32_t, Packet2i>(int32_t* to, const Packet2i& from, Index stride)\n{\n  vst1_lane_s32(to + stride*0, from, 0);\n  vst1_lane_s32(to + stride*1, from, 1);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)\n{\n  vst1q_lane_s32(to + stride*0, from, 0);\n  vst1q_lane_s32(to + stride*1, from, 1);\n  vst1q_lane_s32(to + stride*2, from, 2);\n  vst1q_lane_s32(to + stride*3, from, 3);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint32_t, Packet2ui>(uint32_t* to, const Packet2ui& from, Index stride)\n{\n  vst1_lane_u32(to + stride*0, from, 0);\n  vst1_lane_u32(to + stride*1, from, 1);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint32_t, Packet4ui>(uint32_t* to, const Packet4ui& from, Index stride)\n{\n  vst1q_lane_u32(to + stride*0, from, 0);\n  vst1q_lane_u32(to + stride*1, from, 1);\n  vst1q_lane_u32(to + stride*2, from, 2);\n  vst1q_lane_u32(to + stride*3, from, 3);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<int64_t, Packet2l>(int64_t* to, const Packet2l& from, Index stride)\n{\n  vst1q_lane_s64(to + stride*0, from, 0);\n  vst1q_lane_s64(to + stride*1, from, 1);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<uint64_t, Packet2ul>(uint64_t* to, const Packet2ul& from, Index stride)\n{\n  vst1q_lane_u64(to + stride*0, from, 0);\n  vst1q_lane_u64(to + stride*1, from, 1);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int8_t>(const int8_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<uint8_t>(const uint8_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int16_t>(const int16_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<uint16_t>(const uint16_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<uint32_t>(const uint32_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int64_t>(const int64_t* addr) { EIGEN_ARM_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<uint64_t>(const uint64_t* addr) { EIGEN_ARM_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE float pfirst<Packet2f>(const Packet2f& a) { return vget_lane_f32(a,0); }\ntemplate<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return vgetq_lane_f32(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int8_t pfirst<Packet4c>(const Packet4c& a) { return static_cast<int8_t>(a & 0xff); }\ntemplate<> EIGEN_STRONG_INLINE int8_t pfirst<Packet8c>(const Packet8c& a) { return vget_lane_s8(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int8_t pfirst<Packet16c>(const Packet16c& a) { return vgetq_lane_s8(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint8_t pfirst<Packet4uc>(const Packet4uc& a) { return static_cast<uint8_t>(a & 0xff); }\ntemplate<> EIGEN_STRONG_INLINE uint8_t pfirst<Packet8uc>(const Packet8uc& a) { return vget_lane_u8(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint8_t pfirst<Packet16uc>(const Packet16uc& a) { return vgetq_lane_u8(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int16_t pfirst<Packet4s>(const Packet4s& a) { return vget_lane_s16(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int16_t pfirst<Packet8s>(const Packet8s& a) { return vgetq_lane_s16(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint16_t pfirst<Packet4us>(const Packet4us& a) { return vget_lane_u16(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint16_t pfirst<Packet8us>(const Packet8us& a) { return vgetq_lane_u16(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int32_t pfirst<Packet2i>(const Packet2i& a) { return vget_lane_s32(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { return vgetq_lane_s32(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t pfirst<Packet2ui>(const Packet2ui& a) { return vget_lane_u32(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) { return vgetq_lane_u32(a,0); }\ntemplate<> EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) { return vgetq_lane_s64(a,0); }\ntemplate<> EIGEN_STRONG_INLINE uint64_t pfirst<Packet2ul>(const Packet2ul& a) { return vgetq_lane_u64(a,0); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f preverse(const Packet2f& a) { return vrev64_f32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)\n{\n  const float32x4_t a_r64 = vrev64q_f32(a);\n  return vcombine_f32(vget_high_f32(a_r64), vget_low_f32(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4c preverse(const Packet4c& a)\n{ return vget_lane_s32(vreinterpret_s32_s8(vrev64_s8(vreinterpret_s8_s32(vdup_n_s32(a)))), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8c preverse(const Packet8c& a) { return vrev64_s8(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c preverse(const Packet16c& a)\n{\n  const int8x16_t a_r64 = vrev64q_s8(a);\n  return vcombine_s8(vget_high_s8(a_r64), vget_low_s8(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4uc preverse(const Packet4uc& a)\n{ return vget_lane_u32(vreinterpret_u32_u8(vrev64_u8(vreinterpret_u8_u32(vdup_n_u32(a)))), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc preverse(const Packet8uc& a) { return vrev64_u8(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc preverse(const Packet16uc& a)\n{\n  const uint8x16_t a_r64 = vrev64q_u8(a);\n  return vcombine_u8(vget_high_u8(a_r64), vget_low_u8(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4s preverse(const Packet4s& a) { return vrev64_s16(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s preverse(const Packet8s& a)\n{\n  const int16x8_t a_r64 = vrev64q_s16(a);\n  return vcombine_s16(vget_high_s16(a_r64), vget_low_s16(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4us preverse(const Packet4us& a) { return vrev64_u16(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8us preverse(const Packet8us& a)\n{\n  const uint16x8_t a_r64 = vrev64q_u16(a);\n  return vcombine_u16(vget_high_u16(a_r64), vget_low_u16(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2i preverse(const Packet2i& a) { return vrev64_s32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)\n{\n  const int32x4_t a_r64 = vrev64q_s32(a);\n  return vcombine_s32(vget_high_s32(a_r64), vget_low_s32(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ui preverse(const Packet2ui& a) { return vrev64_u32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a)\n{\n  const uint32x4_t a_r64 = vrev64q_u32(a);\n  return vcombine_u32(vget_high_u32(a_r64), vget_low_u32(a_r64));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2l preverse(const Packet2l& a)\n{ return vcombine_s64(vget_high_s64(a), vget_low_s64(a)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ul preverse(const Packet2ul& a)\n{ return vcombine_u64(vget_high_u64(a), vget_low_u64(a)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pabs(const Packet2f& a) { return vabs_f32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4c pabs<Packet4c>(const Packet4c& a)\n{ return vget_lane_s32(vreinterpret_s32_s8(vabs_s8(vreinterpret_s8_s32(vdup_n_s32(a)))), 0); }\ntemplate<> EIGEN_STRONG_INLINE Packet8c pabs(const Packet8c& a) { return vabs_s8(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet16c pabs(const Packet16c& a) { return vabsq_s8(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4uc pabs(const Packet4uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8uc pabs(const Packet8uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet16uc pabs(const Packet16uc& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4s pabs(const Packet4s& a) { return vabs_s16(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet8s pabs(const Packet8s& a) { return vabsq_s16(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4us pabs(const Packet4us& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet8us pabs(const Packet8us& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2i pabs(const Packet2i& a) { return vabs_s32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2ui pabs(const Packet2ui& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4ui pabs(const Packet4ui& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) {\n#if EIGEN_ARCH_ARM64\n  return vabsq_s64(a);\n#else\n  return vcombine_s64(\n      vdup_n_s64((std::abs)(vgetq_lane_s64(a, 0))),\n      vdup_n_s64((std::abs)(vgetq_lane_s64(a, 1))));\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2ul pabs(const Packet2ul& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pfrexp<Packet2f>(const Packet2f& a, Packet2f& exponent)\n{ return pfrexp_generic(a,exponent); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent)\n{ return pfrexp_generic(a,exponent); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pldexp<Packet2f>(const Packet2f& a, const Packet2f& exponent)\n{ return pldexp_generic(a,exponent); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent)\n{ return pldexp_generic(a,exponent); }\n\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet2f>(const Packet2f& a) { return vget_lane_f32(vpadd_f32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)\n{\n  const float32x2_t sum = vadd_f32(vget_low_f32(a), vget_high_f32(a));\n  return vget_lane_f32(vpadd_f32(sum, sum), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux<Packet4c>(const Packet4c& a)\n{\n  const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(a));\n  int8x8_t sum = vpadd_s8(a_dup, a_dup);\n  sum = vpadd_s8(sum, sum);\n  return vget_lane_s8(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux<Packet8c>(const Packet8c& a)\n{\n  int8x8_t sum = vpadd_s8(a,a);\n  sum = vpadd_s8(sum, sum);\n  sum = vpadd_s8(sum, sum);\n  return vget_lane_s8(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux<Packet16c>(const Packet16c& a)\n{\n  int8x8_t sum = vadd_s8(vget_low_s8(a), vget_high_s8(a));\n  sum = vpadd_s8(sum, sum);\n  sum = vpadd_s8(sum, sum);\n  sum = vpadd_s8(sum, sum);\n  return vget_lane_s8(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux<Packet4uc>(const Packet4uc& a)\n{\n  const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(a));\n  uint8x8_t sum = vpadd_u8(a_dup, a_dup);\n  sum = vpadd_u8(sum, sum);\n  return vget_lane_u8(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux<Packet8uc>(const Packet8uc& a)\n{\n  uint8x8_t sum = vpadd_u8(a,a);\n  sum = vpadd_u8(sum, sum);\n  sum = vpadd_u8(sum, sum);\n  return vget_lane_u8(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux<Packet16uc>(const Packet16uc& a)\n{\n  uint8x8_t sum = vadd_u8(vget_low_u8(a), vget_high_u8(a));\n  sum = vpadd_u8(sum, sum);\n  sum = vpadd_u8(sum, sum);\n  sum = vpadd_u8(sum, sum);\n  return vget_lane_u8(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux<Packet4s>(const Packet4s& a)\n{\n  const int16x4_t sum = vpadd_s16(a,a);\n  return vget_lane_s16(vpadd_s16(sum, sum), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux<Packet8s>(const Packet8s& a)\n{\n  int16x4_t sum = vadd_s16(vget_low_s16(a), vget_high_s16(a));\n  sum = vpadd_s16(sum, sum);\n  sum = vpadd_s16(sum, sum);\n  return vget_lane_s16(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux<Packet4us>(const Packet4us& a)\n{\n  const uint16x4_t sum = vpadd_u16(a,a);\n  return vget_lane_u16(vpadd_u16(sum, sum), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux<Packet8us>(const Packet8us& a)\n{\n  uint16x4_t sum = vadd_u16(vget_low_u16(a), vget_high_u16(a));\n  sum = vpadd_u16(sum, sum);\n  sum = vpadd_u16(sum, sum);\n  return vget_lane_u16(sum, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int32_t predux<Packet2i>(const Packet2i& a) { return vget_lane_s32(vpadd_s32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)\n{\n  const int32x2_t sum = vadd_s32(vget_low_s32(a), vget_high_s32(a));\n  return vget_lane_s32(vpadd_s32(sum, sum), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux<Packet2ui>(const Packet2ui& a) { return vget_lane_u32(vpadd_u32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux<Packet4ui>(const Packet4ui& a)\n{\n  const uint32x2_t sum = vadd_u32(vget_low_u32(a), vget_high_u32(a));\n  return vget_lane_u32(vpadd_u32(sum, sum), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a)\n{ return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1); }\ntemplate<> EIGEN_STRONG_INLINE uint64_t predux<Packet2ul>(const Packet2ul& a)\n{ return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c predux_half_dowto4(const Packet8c& a)\n{\n  return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(a,\n      vreinterpret_s8_s32(vrev64_s32(vreinterpret_s32_s8(a))))), 0);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c predux_half_dowto4(const Packet16c& a)\n{ return vadd_s8(vget_high_s8(a), vget_low_s8(a)); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4uc predux_half_dowto4(const Packet8uc& a)\n{\n  return vget_lane_u32(vreinterpret_u32_u8(vadd_u8(a,\n      vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(a))))), 0);\n}\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc predux_half_dowto4(const Packet16uc& a)\n{ return vadd_u8(vget_high_u8(a), vget_low_u8(a)); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s predux_half_dowto4(const Packet8s& a)\n{ return vadd_s16(vget_high_s16(a), vget_low_s16(a)); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us predux_half_dowto4(const Packet8us& a)\n{ return vadd_u16(vget_high_u16(a), vget_low_u16(a)); }\n\n// Other reduction functions:\n// mul\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet2f>(const Packet2f& a)\n{ return vget_lane_f32(a, 0) * vget_lane_f32(a, 1); }\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)\n{ return predux_mul(vmul_f32(vget_low_f32(a), vget_high_f32(a))); }\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_mul<Packet4c>(const Packet4c& a)\n{\n  int8x8_t prod = vreinterpret_s8_s32(vdup_n_s32(a));\n  prod = vmul_s8(prod, vrev16_s8(prod));\n  return vget_lane_s8(prod, 0) * vget_lane_s8(prod, 2);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_mul<Packet8c>(const Packet8c& a)\n{\n  int8x8_t prod = vmul_s8(a, vrev16_s8(a));\n  prod = vmul_s8(prod, vrev32_s8(prod));\n  return vget_lane_s8(prod, 0) * vget_lane_s8(prod, 4);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_mul<Packet16c>(const Packet16c& a)\n{ return predux_mul(vmul_s8(vget_low_s8(a), vget_high_s8(a))); }\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_mul<Packet4uc>(const Packet4uc& a)\n{\n  uint8x8_t prod = vreinterpret_u8_u32(vdup_n_u32(a));\n  prod = vmul_u8(prod, vrev16_u8(prod));\n  return vget_lane_u8(prod, 0) * vget_lane_u8(prod, 2);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_mul<Packet8uc>(const Packet8uc& a)\n{\n  uint8x8_t prod = vmul_u8(a, vrev16_u8(a));\n  prod = vmul_u8(prod, vrev32_u8(prod));\n  return vget_lane_u8(prod, 0) * vget_lane_u8(prod, 4);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_mul<Packet16uc>(const Packet16uc& a)\n{ return predux_mul(vmul_u8(vget_low_u8(a), vget_high_u8(a))); }\ntemplate<> EIGEN_STRONG_INLINE int16_t predux_mul<Packet4s>(const Packet4s& a)\n{\n  const int16x4_t prod = vmul_s16(a, vrev32_s16(a));\n  return vget_lane_s16(prod, 0) * vget_lane_s16(prod, 2);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux_mul<Packet8s>(const Packet8s& a)\n{\n  int16x4_t prod;\n\n  // Get the product of a_lo * a_hi -> |a1*a5|a2*a6|a3*a7|a4*a8|\n  prod = vmul_s16(vget_low_s16(a), vget_high_s16(a));\n  // Swap and multiply |a1*a5*a2*a6|a3*a7*a4*a8|\n  prod = vmul_s16(prod, vrev32_s16(prod));\n  // Multiply |a1*a5*a2*a6*a3*a7*a4*a8|\n  return vget_lane_s16(prod, 0) * vget_lane_s16(prod, 2);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux_mul<Packet4us>(const Packet4us& a)\n{\n  const uint16x4_t prod = vmul_u16(a, vrev32_u16(a));\n  return vget_lane_u16(prod, 0) * vget_lane_u16(prod, 2);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux_mul<Packet8us>(const Packet8us& a)\n{\n  uint16x4_t prod;\n\n  // Get the product of a_lo * a_hi -> |a1*a5|a2*a6|a3*a7|a4*a8|\n  prod = vmul_u16(vget_low_u16(a), vget_high_u16(a));\n  // Swap and multiply |a1*a5*a2*a6|a3*a7*a4*a8|\n  prod = vmul_u16(prod, vrev32_u16(prod));\n  // Multiply |a1*a5*a2*a6*a3*a7*a4*a8|\n  return vget_lane_u16(prod, 0) * vget_lane_u16(prod, 2);\n}\ntemplate<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet2i>(const Packet2i& a)\n{ return vget_lane_s32(a, 0) * vget_lane_s32(a, 1); }\ntemplate<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)\n{ return predux_mul(vmul_s32(vget_low_s32(a), vget_high_s32(a))); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux_mul<Packet2ui>(const Packet2ui& a)\n{ return vget_lane_u32(a, 0) * vget_lane_u32(a, 1); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux_mul<Packet4ui>(const Packet4ui& a)\n{ return predux_mul(vmul_u32(vget_low_u32(a), vget_high_u32(a))); }\ntemplate<> EIGEN_STRONG_INLINE int64_t predux_mul<Packet2l>(const Packet2l& a)\n{ return vgetq_lane_s64(a, 0) * vgetq_lane_s64(a, 1); }\ntemplate<> EIGEN_STRONG_INLINE uint64_t predux_mul<Packet2ul>(const Packet2ul& a)\n{ return vgetq_lane_u64(a, 0) * vgetq_lane_u64(a, 1); }\n\n// min\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet2f>(const Packet2f& a)\n{ return vget_lane_f32(vpmin_f32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)\n{\n  const float32x2_t min = vmin_f32(vget_low_f32(a), vget_high_f32(a));\n  return vget_lane_f32(vpmin_f32(min, min), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_min<Packet4c>(const Packet4c& a)\n{\n  const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(a));\n  int8x8_t min = vpmin_s8(a_dup, a_dup);\n  min = vpmin_s8(min, min);\n  return vget_lane_s8(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_min<Packet8c>(const Packet8c& a)\n{\n  int8x8_t min = vpmin_s8(a,a);\n  min = vpmin_s8(min, min);\n  min = vpmin_s8(min, min);\n  return vget_lane_s8(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_min<Packet16c>(const Packet16c& a)\n{\n  int8x8_t min = vmin_s8(vget_low_s8(a), vget_high_s8(a));\n  min = vpmin_s8(min, min);\n  min = vpmin_s8(min, min);\n  min = vpmin_s8(min, min);\n  return vget_lane_s8(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet4uc>(const Packet4uc& a)\n{\n  const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(a));\n  uint8x8_t min = vpmin_u8(a_dup, a_dup);\n  min = vpmin_u8(min, min);\n  return vget_lane_u8(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet8uc>(const Packet8uc& a)\n{\n  uint8x8_t min = vpmin_u8(a,a);\n  min = vpmin_u8(min, min);\n  min = vpmin_u8(min, min);\n  return vget_lane_u8(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_min<Packet16uc>(const Packet16uc& a)\n{\n  uint8x8_t min = vmin_u8(vget_low_u8(a), vget_high_u8(a));\n  min = vpmin_u8(min, min);\n  min = vpmin_u8(min, min);\n  min = vpmin_u8(min, min);\n  return vget_lane_u8(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux_min<Packet4s>(const Packet4s& a)\n{\n  const int16x4_t min = vpmin_s16(a,a);\n  return vget_lane_s16(vpmin_s16(min, min), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux_min<Packet8s>(const Packet8s& a)\n{\n  int16x4_t min = vmin_s16(vget_low_s16(a), vget_high_s16(a));\n  min = vpmin_s16(min, min);\n  min = vpmin_s16(min, min);\n  return vget_lane_s16(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux_min<Packet4us>(const Packet4us& a)\n{\n  const uint16x4_t min = vpmin_u16(a,a);\n  return vget_lane_u16(vpmin_u16(min, min), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux_min<Packet8us>(const Packet8us& a)\n{\n  uint16x4_t min = vmin_u16(vget_low_u16(a), vget_high_u16(a));\n  min = vpmin_u16(min, min);\n  min = vpmin_u16(min, min);\n  return vget_lane_u16(min, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int32_t predux_min<Packet2i>(const Packet2i& a)\n{ return vget_lane_s32(vpmin_s32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)\n{\n  const int32x2_t min = vmin_s32(vget_low_s32(a), vget_high_s32(a));\n  return vget_lane_s32(vpmin_s32(min, min), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux_min<Packet2ui>(const Packet2ui& a)\n{ return vget_lane_u32(vpmin_u32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux_min<Packet4ui>(const Packet4ui& a)\n{\n  const uint32x2_t min = vmin_u32(vget_low_u32(a), vget_high_u32(a));\n  return vget_lane_u32(vpmin_u32(min, min), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int64_t predux_min<Packet2l>(const Packet2l& a)\n{ return (std::min)(vgetq_lane_s64(a, 0), vgetq_lane_s64(a, 1)); }\ntemplate<> EIGEN_STRONG_INLINE uint64_t predux_min<Packet2ul>(const Packet2ul& a)\n{ return (std::min)(vgetq_lane_u64(a, 0), vgetq_lane_u64(a, 1)); }\n\n// max\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet2f>(const Packet2f& a)\n{ return vget_lane_f32(vpmax_f32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)\n{\n  const float32x2_t max = vmax_f32(vget_low_f32(a), vget_high_f32(a));\n  return vget_lane_f32(vpmax_f32(max, max), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_max<Packet4c>(const Packet4c& a)\n{\n  const int8x8_t a_dup = vreinterpret_s8_s32(vdup_n_s32(a));\n  int8x8_t max = vpmax_s8(a_dup, a_dup);\n  max = vpmax_s8(max, max);\n  return vget_lane_s8(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_max<Packet8c>(const Packet8c& a)\n{\n  int8x8_t max = vpmax_s8(a,a);\n  max = vpmax_s8(max, max);\n  max = vpmax_s8(max, max);\n  return vget_lane_s8(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int8_t predux_max<Packet16c>(const Packet16c& a)\n{\n  int8x8_t max = vmax_s8(vget_low_s8(a), vget_high_s8(a));\n  max = vpmax_s8(max, max);\n  max = vpmax_s8(max, max);\n  max = vpmax_s8(max, max);\n  return vget_lane_s8(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet4uc>(const Packet4uc& a)\n{\n  const uint8x8_t a_dup = vreinterpret_u8_u32(vdup_n_u32(a));\n  uint8x8_t max = vpmax_u8(a_dup, a_dup);\n  max = vpmax_u8(max, max);\n  return vget_lane_u8(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet8uc>(const Packet8uc& a)\n{\n  uint8x8_t max = vpmax_u8(a,a);\n  max = vpmax_u8(max, max);\n  max = vpmax_u8(max, max);\n  return vget_lane_u8(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint8_t predux_max<Packet16uc>(const Packet16uc& a)\n{\n  uint8x8_t max = vmax_u8(vget_low_u8(a), vget_high_u8(a));\n  max = vpmax_u8(max, max);\n  max = vpmax_u8(max, max);\n  max = vpmax_u8(max, max);\n  return vget_lane_u8(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux_max<Packet4s>(const Packet4s& a)\n{\n  const int16x4_t max = vpmax_s16(a,a);\n  return vget_lane_s16(vpmax_s16(max, max), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int16_t predux_max<Packet8s>(const Packet8s& a)\n{\n  int16x4_t max = vmax_s16(vget_low_s16(a), vget_high_s16(a));\n  max = vpmax_s16(max, max);\n  max = vpmax_s16(max, max);\n  return vget_lane_s16(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux_max<Packet4us>(const Packet4us& a)\n{\n  const uint16x4_t max = vpmax_u16(a,a);\n  return vget_lane_u16(vpmax_u16(max, max), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint16_t predux_max<Packet8us>(const Packet8us& a)\n{\n  uint16x4_t max = vmax_u16(vget_low_u16(a), vget_high_u16(a));\n  max = vpmax_u16(max, max);\n  max = vpmax_u16(max, max);\n  return vget_lane_u16(max, 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int32_t predux_max<Packet2i>(const Packet2i& a)\n{ return vget_lane_s32(vpmax_s32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)\n{\n  const int32x2_t max = vmax_s32(vget_low_s32(a), vget_high_s32(a));\n  return vget_lane_s32(vpmax_s32(max, max), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux_max<Packet2ui>(const Packet2ui& a)\n{ return vget_lane_u32(vpmax_u32(a,a), 0); }\ntemplate<> EIGEN_STRONG_INLINE uint32_t predux_max<Packet4ui>(const Packet4ui& a)\n{\n  const uint32x2_t max = vmax_u32(vget_low_u32(a), vget_high_u32(a));\n  return vget_lane_u32(vpmax_u32(max, max), 0);\n}\ntemplate<> EIGEN_STRONG_INLINE int64_t predux_max<Packet2l>(const Packet2l& a)\n{ return (std::max)(vgetq_lane_s64(a, 0), vgetq_lane_s64(a, 1)); }\ntemplate<> EIGEN_STRONG_INLINE uint64_t predux_max<Packet2ul>(const Packet2ul& a)\n{ return (std::max)(vgetq_lane_u64(a, 0), vgetq_lane_u64(a, 1)); }\n\ntemplate<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)\n{\n  uint32x2_t tmp = vorr_u32(vget_low_u32( vreinterpretq_u32_f32(x)),\n                            vget_high_u32(vreinterpretq_u32_f32(x)));\n  return vget_lane_u32(vpmax_u32(tmp, tmp), 0);\n}\n\n// Helpers for ptranspose.\nnamespace detail {\n  \ntemplate<typename Packet>\nvoid zip_in_place(Packet& p1, Packet& p2);\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet2f>(Packet2f& p1, Packet2f& p2) {\n  const float32x2x2_t tmp = vzip_f32(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet4f>(Packet4f& p1, Packet4f& p2) {\n  const float32x4x2_t tmp = vzipq_f32(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet8c>(Packet8c& p1, Packet8c& p2) {\n  const int8x8x2_t tmp = vzip_s8(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet16c>(Packet16c& p1, Packet16c& p2) {\n  const int8x16x2_t tmp = vzipq_s8(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet8uc>(Packet8uc& p1, Packet8uc& p2) {\n  const uint8x8x2_t tmp = vzip_u8(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet16uc>(Packet16uc& p1, Packet16uc& p2) {\n  const uint8x16x2_t tmp = vzipq_u8(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet2i>(Packet2i& p1, Packet2i& p2) {\n  const int32x2x2_t tmp = vzip_s32(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet4i>(Packet4i& p1, Packet4i& p2) {\n  const int32x4x2_t tmp = vzipq_s32(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet2ui>(Packet2ui& p1, Packet2ui& p2) {\n  const uint32x2x2_t tmp = vzip_u32(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet4ui>(Packet4ui& p1, Packet4ui& p2) {\n  const uint32x4x2_t tmp = vzipq_u32(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet4s>(Packet4s& p1, Packet4s& p2) {\n  const int16x4x2_t tmp = vzip_s16(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet8s>(Packet8s& p1, Packet8s& p2) {\n  const int16x8x2_t tmp = vzipq_s16(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet4us>(Packet4us& p1, Packet4us& p2) {\n  const uint16x4x2_t tmp = vzip_u16(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet8us>(Packet8us& p1, Packet8us& p2) {\n  const uint16x8x2_t tmp = vzipq_u16(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 2>& kernel) {\n  zip_in_place(kernel.packet[0], kernel.packet[1]);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 4>& kernel) {\n  zip_in_place(kernel.packet[0], kernel.packet[2]);\n  zip_in_place(kernel.packet[1], kernel.packet[3]);\n  zip_in_place(kernel.packet[0], kernel.packet[1]);\n  zip_in_place(kernel.packet[2], kernel.packet[3]);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 8>& kernel) {\n  zip_in_place(kernel.packet[0], kernel.packet[4]);\n  zip_in_place(kernel.packet[1], kernel.packet[5]);\n  zip_in_place(kernel.packet[2], kernel.packet[6]);\n  zip_in_place(kernel.packet[3], kernel.packet[7]);\n\n  zip_in_place(kernel.packet[0], kernel.packet[2]);\n  zip_in_place(kernel.packet[1], kernel.packet[3]);\n  zip_in_place(kernel.packet[4], kernel.packet[6]);\n  zip_in_place(kernel.packet[5], kernel.packet[7]);\n  \n  zip_in_place(kernel.packet[0], kernel.packet[1]);\n  zip_in_place(kernel.packet[2], kernel.packet[3]);\n  zip_in_place(kernel.packet[4], kernel.packet[5]);\n  zip_in_place(kernel.packet[6], kernel.packet[7]);\n}\n\ntemplate<typename Packet>\nEIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 16>& kernel) {\n  EIGEN_UNROLL_LOOP\n  for (int i=0; i<4; ++i) {\n    const int m = (1 << i);\n    EIGEN_UNROLL_LOOP\n    for (int j=0; j<m; ++j) {\n      const int n = (1 << (3-i));\n      EIGEN_UNROLL_LOOP\n      for (int k=0; k<n; ++k) {\n        const int idx = 2*j*n+k;\n        zip_in_place(kernel.packet[idx], kernel.packet[idx + n]);\n      }\n    }\n  }\n}\n\n} // namespace detail\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2f, 2>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4c, 4>& kernel)\n{\n  const int8x8_t a = vreinterpret_s8_s32(vset_lane_s32(kernel.packet[2], vdup_n_s32(kernel.packet[0]), 1));\n  const int8x8_t b = vreinterpret_s8_s32(vset_lane_s32(kernel.packet[3], vdup_n_s32(kernel.packet[1]), 1));\n\n  const int8x8x2_t zip8 = vzip_s8(a,b);\n  const int16x4x2_t zip16 = vzip_s16(vreinterpret_s16_s8(zip8.val[0]), vreinterpret_s16_s8(zip8.val[1]));\n\n  kernel.packet[0] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[0]), 0);\n  kernel.packet[1] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[0]), 1);\n  kernel.packet[2] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[1]), 0);\n  kernel.packet[3] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[1]), 1);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8c, 8>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8c, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 16>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 8>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4uc, 4>& kernel)\n{\n  const uint8x8_t a = vreinterpret_u8_u32(vset_lane_u32(kernel.packet[2], vdup_n_u32(kernel.packet[0]), 1));\n  const uint8x8_t b = vreinterpret_u8_u32(vset_lane_u32(kernel.packet[3], vdup_n_u32(kernel.packet[1]), 1));\n\n  const uint8x8x2_t zip8 = vzip_u8(a,b);\n  const uint16x4x2_t zip16 = vzip_u16(vreinterpret_u16_u8(zip8.val[0]), vreinterpret_u16_u8(zip8.val[1]));\n\n  kernel.packet[0] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[0]), 0);\n  kernel.packet[1] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[0]), 1);\n  kernel.packet[2] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[1]), 0);\n  kernel.packet[3] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[1]), 1);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8uc, 8>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8uc, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 16>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 8>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4s, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 8>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4us, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 8>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2i, 2>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4i, 4>& kernel) {\n    detail::ptranspose_impl(kernel);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2ui, 2>& kernel) {\n  detail::zip_in_place(kernel.packet[0], kernel.packet[1]);\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4ui, 4>& kernel) {\n  detail::ptranspose_impl(kernel);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet2l, 2>& kernel)\n{\n#if EIGEN_ARCH_ARM64\n  const int64x2_t tmp1 = vzip1q_s64(kernel.packet[0], kernel.packet[1]);\n  kernel.packet[1] = vzip2q_s64(kernel.packet[0], kernel.packet[1]);\n  kernel.packet[0] = tmp1;\n#else\n  const int64x1_t tmp[2][2] = {\n    { vget_low_s64(kernel.packet[0]), vget_high_s64(kernel.packet[0]) },\n    { vget_low_s64(kernel.packet[1]), vget_high_s64(kernel.packet[1]) }\n  };\n\n  kernel.packet[0] = vcombine_s64(tmp[0][0], tmp[1][0]);\n  kernel.packet[1] = vcombine_s64(tmp[0][1], tmp[1][1]);\n#endif\n}\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet2ul, 2>& kernel)\n{\n#if EIGEN_ARCH_ARM64\n  const uint64x2_t tmp1 = vzip1q_u64(kernel.packet[0], kernel.packet[1]);\n  kernel.packet[1] = vzip2q_u64(kernel.packet[0], kernel.packet[1]);\n  kernel.packet[0] = tmp1;\n#else\n  const uint64x1_t tmp[2][2] = {\n    { vget_low_u64(kernel.packet[0]), vget_high_u64(kernel.packet[0]) },\n    { vget_low_u64(kernel.packet[1]), vget_high_u64(kernel.packet[1]) }\n  };\n\n  kernel.packet[0] = vcombine_u64(tmp[0][0], tmp[1][0]);\n  kernel.packet[1] = vcombine_u64(tmp[0][1], tmp[1][1]);\n#endif\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2f pselect( const Packet2f& mask, const Packet2f& a, const Packet2f& b)\n{ return vbsl_f32(vreinterpret_u32_f32(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b)\n{ return vbslq_f32(vreinterpretq_u32_f32(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c pselect(const Packet8c& mask, const Packet8c& a, const Packet8c& b)\n{ return vbsl_s8(vreinterpret_u8_s8(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pselect(const Packet16c& mask, const Packet16c& a, const Packet16c& b)\n{ return vbslq_s8(vreinterpretq_u8_s8(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc pselect(const Packet8uc& mask, const Packet8uc& a, const Packet8uc& b)\n{ return vbsl_u8(mask, a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pselect(const Packet16uc& mask, const Packet16uc& a, const Packet16uc& b)\n{ return vbslq_u8(mask, a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s pselect(const Packet4s& mask, const Packet4s& a, const Packet4s& b)\n{ return vbsl_s16(vreinterpret_u16_s16(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pselect(const Packet8s& mask, const Packet8s& a, const Packet8s& b)\n{ return vbslq_s16(vreinterpretq_u16_s16(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us pselect(const Packet4us& mask, const Packet4us& a, const Packet4us& b)\n{ return vbsl_u16(mask, a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pselect(const Packet8us& mask, const Packet8us& a, const Packet8us& b)\n{ return vbslq_u16(mask, a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2i pselect(const Packet2i& mask, const Packet2i& a, const Packet2i& b)\n{ return vbsl_s32(vreinterpret_u32_s32(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b)\n{ return vbslq_s32(vreinterpretq_u32_s32(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ui pselect(const Packet2ui& mask, const Packet2ui& a, const Packet2ui& b)\n{ return vbsl_u32(mask, a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b)\n{ return vbslq_u32(mask, a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pselect(const Packet2l& mask, const Packet2l& a, const Packet2l& b)\n{ return vbslq_s64(vreinterpretq_u64_s64(mask), a, b); }\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pselect(const Packet2ul& mask, const Packet2ul& a, const Packet2ul& b)\n{ return vbslq_u64(mask, a, b); }\n\n// Use armv8 rounding intinsics if available.\n#if EIGEN_ARCH_ARMV8\ntemplate<> EIGEN_STRONG_INLINE Packet2f print<Packet2f>(const Packet2f& a)\n{ return vrndn_f32(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)\n{ return vrndnq_f32(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pfloor<Packet2f>(const Packet2f& a)\n{ return vrndm_f32(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)\n{ return vrndmq_f32(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pceil<Packet2f>(const Packet2f& a)\n{ return vrndp_f32(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)\n{ return vrndpq_f32(a); }\n\n#else\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {\n  // Adds and subtracts signum(a) * 2^23 to force rounding.\n  const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));\n  const Packet4f abs_a = pabs(a);\n  Packet4f r = padd(abs_a, limit);\n  // Don't compile-away addition and subtraction.\n  EIGEN_OPTIMIZATION_BARRIER(r);\n  r = psub(r, limit);\n  // If greater than limit, simply return a.  Otherwise, account for sign.\n  r = pselect(pcmp_lt(abs_a, limit),\n              pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);\n  return r;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) {\n  // Adds and subtracts signum(a) * 2^23 to force rounding.\n  const Packet2f limit = pset1<Packet2f>(static_cast<float>(1<<23));\n  const Packet2f abs_a = pabs(a);\n  Packet2f r = padd(abs_a, limit);\n  // Don't compile-away addition and subtraction.\n  EIGEN_OPTIMIZATION_BARRIER(r);\n  r = psub(r, limit);\n  // If greater than limit, simply return a.  Otherwise, account for sign.\n  r = pselect(pcmp_lt(abs_a, limit),\n              pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);\n  return r;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)\n{\n  const Packet4f cst_1 = pset1<Packet4f>(1.0f);\n  Packet4f tmp  = print<Packet4f>(a);\n  // If greater, subtract one.\n  Packet4f mask = pcmp_lt(a, tmp);\n  mask = pand(mask, cst_1);\n  return psub(tmp, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pfloor<Packet2f>(const Packet2f& a)\n{\n  const Packet2f cst_1 = pset1<Packet2f>(1.0f);\n  Packet2f tmp  = print<Packet2f>(a);\n  // If greater, subtract one.\n  Packet2f mask = pcmp_lt(a, tmp);\n  mask = pand(mask, cst_1);\n  return psub(tmp, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)\n{\n  const Packet4f cst_1 = pset1<Packet4f>(1.0f);\n  Packet4f tmp  = print<Packet4f>(a);\n  // If smaller, add one.\n  Packet4f mask = pcmp_lt(tmp, a);\n  mask = pand(mask, cst_1);\n  return padd(tmp, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f pceil<Packet2f>(const Packet2f& a)\n{\n  const Packet2f cst_1 = pset1<Packet2f>(1.0);\n  Packet2f tmp  = print<Packet2f>(a);\n  // If smaller, add one.\n  Packet2f mask = pcmp_lt(tmp, a);\n  mask = pand(mask, cst_1);\n  return padd(tmp, mask);\n}\n\n#endif\n\n/**\n * Computes the integer square root\n * @remarks The calculation is performed using an algorithm which iterates through each binary digit of the result\n *   and tests whether setting that digit to 1 would cause the square of the value to be greater than the argument\n *   value. The algorithm is described in detail here: http://ww1.microchip.com/downloads/en/AppNotes/91040a.pdf .\n */\ntemplate<> EIGEN_STRONG_INLINE Packet4uc psqrt(const Packet4uc& a) {\n  uint8x8_t x = vreinterpret_u8_u32(vdup_n_u32(a));\n  uint8x8_t res = vdup_n_u8(0);\n  uint8x8_t add = vdup_n_u8(0x8);\n  for (int i = 0; i < 4; i++)\n  {\n    const uint8x8_t temp = vorr_u8(res, add);\n    res = vbsl_u8(vcge_u8(x, vmul_u8(temp, temp)), temp, res);\n    add = vshr_n_u8(add, 1);\n  }\n  return vget_lane_u32(vreinterpret_u32_u8(res), 0);\n}\n/// @copydoc Eigen::internal::psqrt(const Packet4uc& a)\ntemplate<> EIGEN_STRONG_INLINE Packet8uc psqrt(const Packet8uc& a) {\n  uint8x8_t res = vdup_n_u8(0);\n  uint8x8_t add = vdup_n_u8(0x8);\n  for (int i = 0; i < 4; i++)\n  {\n    const uint8x8_t temp = vorr_u8(res, add);\n    res = vbsl_u8(vcge_u8(a, vmul_u8(temp, temp)), temp, res);\n    add = vshr_n_u8(add, 1);\n  }\n  return res;\n}\n/// @copydoc Eigen::internal::psqrt(const Packet4uc& a)\ntemplate<> EIGEN_STRONG_INLINE Packet16uc psqrt(const Packet16uc& a) {\n  uint8x16_t res = vdupq_n_u8(0);\n  uint8x16_t add = vdupq_n_u8(0x8);\n  for (int i = 0; i < 4; i++)\n  {\n    const uint8x16_t temp = vorrq_u8(res, add);\n    res = vbslq_u8(vcgeq_u8(a, vmulq_u8(temp, temp)), temp, res);\n    add = vshrq_n_u8(add, 1);\n  }\n  return res;\n}\n/// @copydoc Eigen::internal::psqrt(const Packet4uc& a)\ntemplate<> EIGEN_STRONG_INLINE Packet4us psqrt(const Packet4us& a) {\n  uint16x4_t res = vdup_n_u16(0);\n  uint16x4_t add = vdup_n_u16(0x80);\n  for (int i = 0; i < 8; i++)\n  {\n    const uint16x4_t temp = vorr_u16(res, add);\n    res = vbsl_u16(vcge_u16(a, vmul_u16(temp, temp)), temp, res);\n    add = vshr_n_u16(add, 1);\n  }\n  return res;\n}\n/// @copydoc Eigen::internal::psqrt(const Packet4uc& a)\ntemplate<> EIGEN_STRONG_INLINE Packet8us psqrt(const Packet8us& a) {\n  uint16x8_t res = vdupq_n_u16(0);\n  uint16x8_t add = vdupq_n_u16(0x80);\n  for (int i = 0; i < 8; i++)\n  {\n    const uint16x8_t temp = vorrq_u16(res, add);\n    res = vbslq_u16(vcgeq_u16(a, vmulq_u16(temp, temp)), temp, res);\n    add = vshrq_n_u16(add, 1);\n  }\n  return res;\n}\n/// @copydoc Eigen::internal::psqrt(const Packet4uc& a)\ntemplate<> EIGEN_STRONG_INLINE Packet2ui psqrt(const Packet2ui& a) {\n  uint32x2_t res = vdup_n_u32(0);\n  uint32x2_t add = vdup_n_u32(0x8000);\n  for (int i = 0; i < 16; i++)\n  {\n    const uint32x2_t temp = vorr_u32(res, add);\n    res = vbsl_u32(vcge_u32(a, vmul_u32(temp, temp)), temp, res);\n    add = vshr_n_u32(add, 1);\n  }\n  return res;\n}\n/// @copydoc Eigen::internal::psqrt(const Packet4uc& a)\ntemplate<> EIGEN_STRONG_INLINE Packet4ui psqrt(const Packet4ui& a) {\n  uint32x4_t res = vdupq_n_u32(0);\n  uint32x4_t add = vdupq_n_u32(0x8000);\n  for (int i = 0; i < 16; i++)\n  {\n    const uint32x4_t temp = vorrq_u32(res, add);\n    res = vbslq_u32(vcgeq_u32(a, vmulq_u32(temp, temp)), temp, res);\n    add = vshrq_n_u32(add, 1);\n  }\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {\n  // Compute approximate reciprocal sqrt.\n  Packet4f x = vrsqrteq_f32(a);\n  // Do Newton iterations for 1/sqrt(x).\n  x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, x), x), x);\n  x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, x), x), x);\n  const Packet4f infinity = pset1<Packet4f>(NumTraits<float>::infinity());\n  return pselect(pcmp_eq(a, pzero(a)), infinity, x);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2f prsqrt(const Packet2f& a) {\n  // Compute approximate reciprocal sqrt.\n  Packet2f x = vrsqrte_f32(a);\n  // Do Newton iterations for 1/sqrt(x).\n  x = vmul_f32(vrsqrts_f32(vmul_f32(a, x), x), x);\n  x = vmul_f32(vrsqrts_f32(vmul_f32(a, x), x), x);\n  const Packet2f infinity = pset1<Packet2f>(NumTraits<float>::infinity());\n  return pselect(pcmp_eq(a, pzero(a)), infinity, x);\n}\n\n// Unfortunately vsqrt_f32 is only available for A64.\n#if EIGEN_ARCH_ARM64\ntemplate<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& _x){return vsqrtq_f32(_x);}\ntemplate<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& _x){return vsqrt_f32(_x); }\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {\n  const Packet4f infinity = pset1<Packet4f>(NumTraits<float>::infinity());\n  const Packet4f is_zero_or_inf = por(pcmp_eq(a, pzero(a)), pcmp_eq(a, infinity));\n  return pselect(is_zero_or_inf, a, pmul(a, prsqrt(a)));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& a) {\n  const Packet2f infinity = pset1<Packet2f>(NumTraits<float>::infinity());\n  const Packet2f is_zero_or_inf = por(pcmp_eq(a, pzero(a)), pcmp_eq(a, infinity));\n  return pselect(is_zero_or_inf, a, pmul(a, prsqrt(a)));\n}\n#endif\n\n//---------- bfloat16 ----------\n// TODO: Add support for native armv8.6-a bfloat16_t\n\n// TODO: Guard if we have native bfloat16 support\ntypedef eigen_packet_wrapper<uint16x4_t, 19> Packet4bf;\n\ntemplate<> struct is_arithmetic<Packet4bf> { enum { value = true }; };\n\ntemplate<> struct packet_traits<bfloat16> : default_packet_traits\n{\n  typedef Packet4bf type;\n  typedef Packet4bf half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,\n\n    HasCmp       = 1,\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0,\n    HasDiv       = 1,\n    HasFloor     = 1,\n    HasCeil      = 1,\n    HasRint      = 1,\n\n    HasSin  = EIGEN_FAST_MATH,\n    HasCos  = EIGEN_FAST_MATH,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 0,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf  = EIGEN_FAST_MATH,\n    HasBessel = 0,  // Issues with accuracy.\n    HasNdtri = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet4bf>\n{\n  typedef bfloat16 type;\n  typedef Packet4bf half;\n  enum\n  {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\nnamespace detail {  \ntemplate<>\nEIGEN_ALWAYS_INLINE void zip_in_place<Packet4bf>(Packet4bf& p1, Packet4bf& p2) {\n  const uint16x4x2_t tmp = vzip_u16(p1, p2);\n  p1 = tmp.val[0];\n  p2 = tmp.val[1];\n}\n} // namespace detail\n\nEIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f& p)\n{\n  // See the scalar implemention in BFloat16.h for a comprehensible explanation\n  // of this fast rounding algorithm\n  Packet4ui input = reinterpret_cast<Packet4ui>(p);\n\n  // lsb = (input >> 16) & 1\n  Packet4ui lsb =  vandq_u32(vshrq_n_u32(input, 16), vdupq_n_u32(1));\n\n  // rounding_bias = 0x7fff + lsb\n  Packet4ui rounding_bias = vaddq_u32(lsb, vdupq_n_u32(0x7fff));\n\n  // input += rounding_bias\n  input = vaddq_u32(input, rounding_bias);\n\n  // input = input >> 16\n  input = vshrq_n_u32(input, 16);\n\n  // Replace float-nans by bfloat16-nans, that is 0x7fc0\n  const Packet4ui bf16_nan = vdupq_n_u32(0x7fc0);\n  const Packet4ui mask = vceqq_f32(p, p);\n  input = vbslq_u32(mask, input, bf16_nan);\n\n  // output = static_cast<uint16_t>(input)\n  return vmovn_u32(input);\n}\n\nEIGEN_STRONG_INLINE Packet4f Bf16ToF32(const Packet4bf& p)\n{\n  return reinterpret_cast<Packet4f>(vshlq_n_u32(vmovl_u16(p), 16));\n}\n\nEIGEN_STRONG_INLINE Packet4bf F32MaskToBf16Mask(const Packet4f& p) {\n  return vmovn_u32(vreinterpretq_u32_f32(p));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pset1<Packet4bf>(const bfloat16& from) {\n  return pset1<Packet4us>(from.value);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 pfirst<Packet4bf>(const Packet4bf& from) {\n  return bfloat16_impl::raw_uint16_to_bfloat16(static_cast<uint16_t>(pfirst<Packet4us>(from)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pload<Packet4bf>(const bfloat16* from)\n{\n  return pload<Packet4us>(reinterpret_cast<const uint16_t*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf ploadu<Packet4bf>(const bfloat16* from)\n{\n  return ploadu<Packet4us>(reinterpret_cast<const uint16_t*>(from));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to, const Packet4bf& from)\n{\n  EIGEN_DEBUG_ALIGNED_STORE vst1_u16(reinterpret_cast<uint16_t*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<bfloat16>(bfloat16* to, const Packet4bf& from)\n{\n  EIGEN_DEBUG_UNALIGNED_STORE vst1_u16(reinterpret_cast<uint16_t*>(to), from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf ploaddup<Packet4bf>(const bfloat16* from)\n{\n  return ploaddup<Packet4us>(reinterpret_cast<const uint16_t*>(from));\n}\n\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pabs(const Packet4bf& a) {\n  return F32ToBf16(pabs<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pmin<PropagateNumbers, Packet4bf>(const Packet4bf &a,\n                                                                            const Packet4bf &b)\n{\n  return F32ToBf16(pmin<PropagateNumbers, Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pmin<PropagateNaN, Packet4bf>(const Packet4bf &a,\n                                                                        const Packet4bf &b)\n{\n  return F32ToBf16(pmin<PropagateNaN, Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pmin<Packet4bf>(const Packet4bf &a,\n                                                          const Packet4bf &b)\n{\n  return F32ToBf16(pmin<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pmax<PropagateNumbers, Packet4bf>(const Packet4bf &a,\n                                                                            const Packet4bf &b)\n{\n  return F32ToBf16(pmax<PropagateNumbers, Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pmax<PropagateNaN, Packet4bf>(const Packet4bf &a,\n                                                                        const Packet4bf &b)\n{\n  return F32ToBf16(pmax<PropagateNaN, Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate <> EIGEN_STRONG_INLINE Packet4bf pmax<Packet4bf>(const Packet4bf &a,\n                                                          const Packet4bf &b)\n{\n  return F32ToBf16(pmax<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf plset<Packet4bf>(const bfloat16& a)\n{\n  return F32ToBf16(plset<Packet4f>(static_cast<float>(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf& a,const Packet4bf& b) {\n  return por<Packet4us>(a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pxor(const Packet4bf& a,const Packet4bf& b) {\n  return pxor<Packet4us>(a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pand(const Packet4bf& a,const Packet4bf& b) {\n  return pand<Packet4us>(a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pandnot(const Packet4bf& a,const Packet4bf& b) {\n  return pandnot<Packet4us>(a, b);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4bf pselect(const Packet4bf& mask, const Packet4bf& a,\n                                                      const Packet4bf& b)\n{\n  return pselect<Packet4us>(mask, a, b);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf print<Packet4bf>(const Packet4bf& a)\n{\n  return F32ToBf16(print<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pfloor<Packet4bf>(const Packet4bf& a)\n{\n  return F32ToBf16(pfloor<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pceil<Packet4bf>(const Packet4bf& a)\n{\n  return F32ToBf16(pceil<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pconj(const Packet4bf& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf padd<Packet4bf>(const Packet4bf& a, const Packet4bf& b) {\n  return F32ToBf16(padd<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf psub<Packet4bf>(const Packet4bf& a, const Packet4bf& b) {\n  return F32ToBf16(psub<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pmul<Packet4bf>(const Packet4bf& a, const Packet4bf& b) {\n  return F32ToBf16(pmul<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pdiv<Packet4bf>(const Packet4bf& a, const Packet4bf& b) {\n  return F32ToBf16(pdiv<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<>\nEIGEN_STRONG_INLINE Packet4bf pgather<bfloat16, Packet4bf>(const bfloat16* from, Index stride)\n{\n  return pgather<uint16_t, Packet4us>(reinterpret_cast<const uint16_t*>(from), stride);\n}\n\ntemplate<>\nEIGEN_STRONG_INLINE void pscatter<bfloat16, Packet4bf>(bfloat16* to, const Packet4bf& from, Index stride)\n{\n  pscatter<uint16_t, Packet4us>(reinterpret_cast<uint16_t*>(to), from, stride);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux<Packet4bf>(const Packet4bf& a)\n{\n  return static_cast<bfloat16>(predux<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_max<Packet4bf>(const Packet4bf& a)\n{\n  return static_cast<bfloat16>(predux_max<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_min<Packet4bf>(const Packet4bf& a)\n{\n  return static_cast<bfloat16>(predux_min<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet4bf>(const Packet4bf& a)\n{\n  return static_cast<bfloat16>(predux_mul<Packet4f>(Bf16ToF32(a)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf preverse<Packet4bf>(const Packet4bf& a)\n{\n  return preverse<Packet4us>(a);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4bf, 4>& kernel)\n{\n  detail::ptranspose_impl(kernel);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pabsdiff<Packet4bf>(const Packet4bf& a, const Packet4bf& b)\n{\n  return F32ToBf16(pabsdiff<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pcmp_eq<Packet4bf>(const Packet4bf& a, const Packet4bf& b)\n{\n  return F32MaskToBf16Mask(pcmp_eq<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pcmp_lt<Packet4bf>(const Packet4bf& a, const Packet4bf& b)\n{\n  return F32MaskToBf16Mask(pcmp_lt<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pcmp_lt_or_nan<Packet4bf>(const Packet4bf& a, const Packet4bf& b)\n{\n  return F32MaskToBf16Mask(pcmp_lt_or_nan<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pcmp_le<Packet4bf>(const Packet4bf& a, const Packet4bf& b)\n{\n  return F32MaskToBf16Mask(pcmp_le<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4bf pnegate<Packet4bf>(const Packet4bf& a)\n{\n  return pxor<Packet4us>(a, pset1<Packet4us>(static_cast<uint16_t>(0x8000)));\n}\n\n//---------- double ----------\n\n// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.\n// Confirmed at least with __apple_build_version__ = 6000054.\n#ifdef __apple_build_version__\n// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.\n// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with\n// major toolchain updates.\n#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)\n#else\n#define EIGEN_APPLE_DOUBLE_NEON_BUG 0\n#endif\n\n#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG\n\n// Bug 907: workaround missing declarations of the following two functions in the ADK\n// Defining these functions as templates ensures that if these intrinsics are\n// already defined in arm_neon.h, then our workaround doesn't cause a conflict\n// and has lower priority in overload resolution.\ntemplate <typename T> uint64x2_t vreinterpretq_u64_f64(T a) { return (uint64x2_t) a; }\n\ntemplate <typename T> float64x2_t vreinterpretq_f64_u64(T a) { return (float64x2_t) a; }\n\ntypedef float64x2_t Packet2d;\ntypedef float64x1_t Packet1d;\n\n// fuctionally equivalent to _mm_shuffle_pd in SSE (i.e. shuffle(m, n, mask) equals _mm_shuffle_pd(m,n,mask))\n// Currently used in LU/arch/InverseSize4.h to enable a shared implementation\n// for fast inversion of matrices of size 4.\nEIGEN_STRONG_INLINE Packet2d shuffle(const Packet2d& m, const Packet2d& n, int mask)\n{\n  const double* a = reinterpret_cast<const double*>(&m);\n  const double* b = reinterpret_cast<const double*>(&n);\n  Packet2d res = {*(a + (mask & 1)), *(b + ((mask >> 1) & 1))};\n  return res;\n}\n\nEIGEN_STRONG_INLINE Packet2d vec2d_swizzle2(const Packet2d& a, const Packet2d& b, int mask)\n{\n  return shuffle(a, b, mask);\n}\nEIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d& a,const Packet2d& b)\n{\n  return shuffle(a, b, 0);\n}\nEIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d& a,const Packet2d& b)\n{\n  return shuffle(a, b, 3);\n}\n#define vec2d_duplane(a, p) \\\n  vdupq_laneq_f64(a, p)\n\ntemplate<> struct packet_traits<double>  : default_packet_traits\n{\n  typedef Packet2d type;\n  typedef Packet2d half;\n  enum\n  {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasCmp       = 1,\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 1,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 1,\n    HasArg       = 0,\n    HasAbs2      = 1,\n    HasAbsDiff   = 1,\n    HasMin       = 1,\n    HasMax       = 1,\n    HasConj      = 1,\n    HasSetLinear = 0,\n    HasBlend     = 0,\n\n    HasDiv   = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n\n    HasSin  = 0,\n    HasCos  = 0,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasTanh = 0,\n    HasErf  = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet2d>\n{\n  typedef double type;\n  typedef Packet2d half;\n  typedef Packet2l integer_packet;\n  enum\n  {\n    size = 2,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double&  from) { return vdupq_n_f64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)\n{\n  const double c[] = {0.0,1.0};\n  return vaddq_f64(pset1<Packet2d>(a), vld1q_f64(c));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& , const Packet2d& );\ntemplate<> EIGEN_STRONG_INLINE Packet2d paddsub<Packet2d>(const Packet2d& a, const Packet2d& b){\n  const Packet2d mask = {numext::bit_cast<double>(0x8000000000000000ull),0.0};\n  return padd(a, pxor(mask, b));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }\n\n#ifdef __ARM_FEATURE_FMA\n// See bug 936. See above comment about FMA for float.\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c)\n{ return vfmaq_f64(c,a,b); }\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c)\n{ return vmlaq_f64(c,a,b); }\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }\n\n#ifdef __ARM_FEATURE_NUMERIC_MAXMIN\n// numeric max and min are only available if ARM_FEATURE_NUMERIC_MAXMIN is defined (which can only be the case for Armv8 systems).\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmin<PropagateNumbers, Packet2d>(const Packet2d& a, const Packet2d& b) { return vminnmq_f64(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmax<PropagateNumbers, Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxnmq_f64(a, b); }\n\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmin<PropagateNaN, Packet2d>(const Packet2d& a, const Packet2d& b) { return pmin<Packet2d>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmax<PropagateNaN, Packet2d>(const Packet2d& a, const Packet2d& b) { return pmax<Packet2d>(a, b); }\n\n// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics\ntemplate<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(vcleq_f64(a,b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(vcltq_f64(a,b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_u64(vcgeq_f64(a,b)))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b)\n{ return vreinterpretq_f64_u64(vceqq_f64(a,b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) { return vld1q_dup_f64(from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)\n{ EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to,from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)\n{ EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to,from); }\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pgather<double, Packet2d>(const double* from, Index stride)\n{\n  Packet2d res = pset1<Packet2d>(0.0);\n  res = vld1q_lane_f64(from + 0*stride, res, 0);\n  res = vld1q_lane_f64(from + 1*stride, res, 1);\n  return res;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)\n{\n  vst1q_lane_f64(to + stride*0, from, 0);\n  vst1q_lane_f64(to + stride*1, from, 1);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }\n\n// FIXME only store the 2 first elements ?\ntemplate<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a,0); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)\n{ return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }\n\n#if EIGEN_COMP_CLANG && defined(__apple_build_version__)\n// workaround ICE, see bug 907\ntemplate<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)\n{ return (vget_low_f64(a) + vget_high_f64(a))[0]; }\n#else\ntemplate<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)\n{ return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }\n#endif\n\n// Other reduction functions:\n// mul\n#if EIGEN_COMP_CLANG && defined(__apple_build_version__)\ntemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)\n{ return (vget_low_f64(a) * vget_high_f64(a))[0]; }\n#else\ntemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)\n{ return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }\n#endif\n\n// min\ntemplate<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)\n{ return vgetq_lane_f64(vpminq_f64(a,a), 0); }\n\n// max\ntemplate<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)\n{ return vgetq_lane_f64(vpmaxq_f64(a,a), 0); }\n\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet2d, 2>& kernel)\n{\n  const float64x2_t tmp1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);\n  const float64x2_t tmp2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);\n\n  kernel.packet[0] = tmp1;\n  kernel.packet[1] = tmp2;\n}\n\ntemplate<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pselect( const Packet2d& mask, const Packet2d& a, const Packet2d& b)\n{ return vbslq_f64(vreinterpretq_u64_f64(mask), a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d print<Packet2d>(const Packet2d& a)\n{ return vrndnq_f64(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)\n{ return vrndmq_f64(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a)\n{ return vrndpq_f64(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent)\n{ return pldexp_generic(a, exponent); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d>(const Packet2d& a, Packet2d& exponent)\n{ return pfrexp_generic(a,exponent); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from)\n{ return vreinterpretq_f64_u64(vdupq_n_u64(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {\n  // Compute approximate reciprocal sqrt.\n  Packet2d x = vrsqrteq_f64(a);\n  // Do Newton iterations for 1/sqrt(x).\n  x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);\n  x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);\n  x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);\n  const Packet2d infinity = pset1<Packet2d>(NumTraits<double>::infinity());\n  return pselect(pcmp_eq(a, pzero(a)), infinity, x);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq_f64(_x); }\n\n#endif // EIGEN_ARCH_ARM64\n\n// Do we have an fp16 types and supporting Neon intrinsics?\n#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC\ntypedef float16x4_t Packet4hf;\ntypedef float16x8_t Packet8hf;\n\ntemplate <>\nstruct packet_traits<Eigen::half> : default_packet_traits {\n  typedef Packet8hf type;\n  typedef Packet4hf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 8,\n    HasHalfPacket = 1,\n\n    HasCmp = 1,\n    HasCast = 1,\n    HasAdd = 1,\n    HasSub = 1,\n    HasShift = 1,\n    HasMul = 1,\n    HasNegate = 1,\n    HasAbs = 1,\n    HasArg = 0,\n    HasAbs2 = 1,\n    HasAbsDiff = 0,\n    HasMin = 1,\n    HasMax = 1,\n    HasConj = 1,\n    HasSetLinear = 0,\n    HasBlend = 0,\n    HasInsert = 1,\n    HasReduxp = 1,\n    HasDiv = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasRint = 1,\n    HasSin = 0,\n    HasCos = 0,\n    HasLog = 0,\n    HasExp = 0,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasErf = EIGEN_FAST_MATH,\n    HasBessel = 0,  // Issues with accuracy.\n    HasNdtri = 0\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet4hf> {\n  typedef Eigen::half type;\n  typedef Packet4hf half;\n  enum {\n    size = 4,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<Packet8hf> {\n  typedef Eigen::half type;\n  typedef Packet4hf half;\n  enum {\n    size = 8,\n    alignment = Aligned16,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf predux_half_dowto4<Packet8hf>(const Packet8hf& a) {\n  return vadd_f16(vget_low_f16(a), vget_high_f16(a));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pset1<Packet8hf>(const Eigen::half& from) {\n  return vdupq_n_f16(from.x);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pset1<Packet4hf>(const Eigen::half& from) {\n  return vdup_n_f16(from.x);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf plset<Packet8hf>(const Eigen::half& a) {\n  const float16_t f[] = {0, 1, 2, 3, 4, 5, 6, 7};\n  Packet8hf countdown = vld1q_f16(f);\n  return vaddq_f16(pset1<Packet8hf>(a), countdown);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf plset<Packet4hf>(const Eigen::half& a) {\n  const float16_t f[] = {0, 1, 2, 3};\n  Packet4hf countdown = vld1_f16(f);\n  return vadd_f16(pset1<Packet4hf>(a), countdown);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf padd<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vaddq_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf padd<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vadd_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf psub<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vsubq_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf psub<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vsub_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pnegate(const Packet8hf& a) {\n  return vnegq_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pnegate(const Packet4hf& a) {\n  return vneg_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pconj(const Packet8hf& a) {\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pconj(const Packet4hf& a) {\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pmul<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vmulq_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pmul<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vmul_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pdiv<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vdivq_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pdiv<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vdiv_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pmadd(const Packet8hf& a, const Packet8hf& b, const Packet8hf& c) {\n  return vfmaq_f16(c, a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pmadd(const Packet4hf& a, const Packet4hf& b, const Packet4hf& c) {\n  return vfma_f16(c, a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pmin<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vminq_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pmin<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vmin_f16(a, b);\n}\n\n#ifdef __ARM_FEATURE_NUMERIC_MAXMIN\n// numeric max and min are only available if ARM_FEATURE_NUMERIC_MAXMIN is defined (which can only be the case for Armv8 systems).\ntemplate<> EIGEN_STRONG_INLINE Packet4hf pmin<PropagateNumbers, Packet4hf>(const Packet4hf& a, const Packet4hf& b) { return vminnm_f16(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8hf pmin<PropagateNumbers, Packet8hf>(const Packet8hf& a, const Packet8hf& b) { return vminnmq_f16(a, b); }\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4hf pmin<PropagateNaN, Packet4hf>(const Packet4hf& a, const Packet4hf& b) { return pmin<Packet4hf>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8hf pmin<PropagateNaN, Packet8hf>(const Packet8hf& a, const Packet8hf& b) { return pmin<Packet8hf>(a, b); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pmax<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vmaxq_f16(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pmax<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vmax_f16(a, b);\n}\n\n#ifdef __ARM_FEATURE_NUMERIC_MAXMIN\n// numeric max and min are only available if ARM_FEATURE_NUMERIC_MAXMIN is defined (which can only be the case for Armv8 systems).\ntemplate<> EIGEN_STRONG_INLINE Packet4hf pmax<PropagateNumbers, Packet4hf>(const Packet4hf& a, const Packet4hf& b) { return vmaxnm_f16(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet8hf pmax<PropagateNumbers, Packet8hf>(const Packet8hf& a, const Packet8hf& b) { return vmaxnmq_f16(a, b); }\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4hf pmax<PropagateNaN, Packet4hf>(const Packet4hf& a, const Packet4hf& b) { return pmax<Packet4hf>(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet8hf pmax<PropagateNaN, Packet8hf>(const Packet8hf& a, const Packet8hf& b) { return pmax<Packet8hf>(a, b); }\n\n#define EIGEN_MAKE_ARM_FP16_CMP_8(name)                                               \\\n  template <>                                                                         \\\n  EIGEN_STRONG_INLINE Packet8hf pcmp_##name(const Packet8hf& a, const Packet8hf& b) { \\\n    return vreinterpretq_f16_u16(vc##name##q_f16(a, b));                              \\\n  }\n\n#define EIGEN_MAKE_ARM_FP16_CMP_4(name)                                               \\\n  template <>                                                                         \\\n  EIGEN_STRONG_INLINE Packet4hf pcmp_##name(const Packet4hf& a, const Packet4hf& b) { \\\n    return vreinterpret_f16_u16(vc##name##_f16(a, b));                                \\\n  }\n\nEIGEN_MAKE_ARM_FP16_CMP_8(eq)\nEIGEN_MAKE_ARM_FP16_CMP_8(lt)\nEIGEN_MAKE_ARM_FP16_CMP_8(le)\n\nEIGEN_MAKE_ARM_FP16_CMP_4(eq)\nEIGEN_MAKE_ARM_FP16_CMP_4(lt)\nEIGEN_MAKE_ARM_FP16_CMP_4(le)\n\n#undef EIGEN_MAKE_ARM_FP16_CMP_8\n#undef EIGEN_MAKE_ARM_FP16_CMP_4\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pcmp_lt_or_nan<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vreinterpretq_f16_u16(vmvnq_u16(vcgeq_f16(a, b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pcmp_lt_or_nan<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vreinterpret_f16_u16(vmvn_u16(vcge_f16(a, b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf print<Packet8hf>(const Packet8hf& a)\n{ return vrndnq_f16(a); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf print<Packet4hf>(const Packet4hf& a)\n{ return vrndn_f16(a); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pfloor<Packet8hf>(const Packet8hf& a)\n{ return vrndmq_f16(a); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pfloor<Packet4hf>(const Packet4hf& a)\n{ return vrndm_f16(a); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pceil<Packet8hf>(const Packet8hf& a)\n{ return vrndpq_f16(a); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pceil<Packet4hf>(const Packet4hf& a)\n{ return vrndp_f16(a); }\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf psqrt<Packet8hf>(const Packet8hf& a) {\n  return vsqrtq_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf psqrt<Packet4hf>(const Packet4hf& a) {\n  return vsqrt_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pand<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vreinterpretq_f16_u16(vandq_u16(vreinterpretq_u16_f16(a), vreinterpretq_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pand<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vreinterpret_f16_u16(vand_u16(vreinterpret_u16_f16(a), vreinterpret_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf por<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vreinterpretq_f16_u16(vorrq_u16(vreinterpretq_u16_f16(a), vreinterpretq_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf por<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vreinterpret_f16_u16(vorr_u16(vreinterpret_u16_f16(a), vreinterpret_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pxor<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vreinterpretq_f16_u16(veorq_u16(vreinterpretq_u16_f16(a), vreinterpretq_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pxor<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vreinterpret_f16_u16(veor_u16(vreinterpret_u16_f16(a), vreinterpret_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pandnot<Packet8hf>(const Packet8hf& a, const Packet8hf& b) {\n  return vreinterpretq_f16_u16(vbicq_u16(vreinterpretq_u16_f16(a), vreinterpretq_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pandnot<Packet4hf>(const Packet4hf& a, const Packet4hf& b) {\n  return vreinterpret_f16_u16(vbic_u16(vreinterpret_u16_f16(a), vreinterpret_u16_f16(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pload<Packet8hf>(const Eigen::half* from) {\n  EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f16(reinterpret_cast<const float16_t*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pload<Packet4hf>(const Eigen::half* from) {\n  EIGEN_DEBUG_ALIGNED_LOAD return vld1_f16(reinterpret_cast<const float16_t*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf ploadu<Packet8hf>(const Eigen::half* from) {\n  EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f16(reinterpret_cast<const float16_t*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf ploadu<Packet4hf>(const Eigen::half* from) {\n  EIGEN_DEBUG_UNALIGNED_LOAD return vld1_f16(reinterpret_cast<const float16_t*>(from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf ploaddup<Packet8hf>(const Eigen::half* from) {\n  Packet8hf packet;\n  packet[0] = from[0].x;\n  packet[1] = from[0].x;\n  packet[2] = from[1].x;\n  packet[3] = from[1].x;\n  packet[4] = from[2].x;\n  packet[5] = from[2].x;\n  packet[6] = from[3].x;\n  packet[7] = from[3].x;\n  return packet;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf ploaddup<Packet4hf>(const Eigen::half* from) {\n  float16x4_t packet;\n  float16_t* tmp;\n  tmp = (float16_t*)&packet;\n  tmp[0] = from[0].x;\n  tmp[1] = from[0].x;\n  tmp[2] = from[1].x;\n  tmp[3] = from[1].x;\n  return packet;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf ploadquad<Packet8hf>(const Eigen::half* from) {\n  Packet4hf lo, hi;\n  lo = vld1_dup_f16(reinterpret_cast<const float16_t*>(from));\n  hi = vld1_dup_f16(reinterpret_cast<const float16_t*>(from+1));\n  return vcombine_f16(lo, hi);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8hf pinsertfirst(const Packet8hf& a, Eigen::half b) { return vsetq_lane_f16(b.x, a, 0); }\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf pinsertfirst(const Packet4hf& a, Eigen::half b) { return vset_lane_f16(b.x, a, 0); }\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8hf pselect(const Packet8hf& mask, const Packet8hf& a, const Packet8hf& b) {\n  return vbslq_f16(vreinterpretq_u16_f16(mask), a, b);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf pselect(const Packet4hf& mask, const Packet4hf& a, const Packet4hf& b) {\n  return vbsl_f16(vreinterpret_u16_f16(mask), a, b);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8hf pinsertlast(const Packet8hf& a, Eigen::half b) { return vsetq_lane_f16(b.x, a, 7); }\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf pinsertlast(const Packet4hf& a, Eigen::half b) { return vset_lane_f16(b.x, a, 3); }\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet8hf& from) {\n  EIGEN_DEBUG_ALIGNED_STORE vst1q_f16(reinterpret_cast<float16_t*>(to), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4hf& from) {\n  EIGEN_DEBUG_ALIGNED_STORE vst1_f16(reinterpret_cast<float16_t*>(to), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet8hf& from) {\n  EIGEN_DEBUG_UNALIGNED_STORE vst1q_f16(reinterpret_cast<float16_t*>(to), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4hf& from) {\n  EIGEN_DEBUG_UNALIGNED_STORE vst1_f16(reinterpret_cast<float16_t*>(to), from);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8hf pgather<Eigen::half, Packet8hf>(const Eigen::half* from, Index stride) {\n  Packet8hf res = pset1<Packet8hf>(Eigen::half(0.f));\n  res = vsetq_lane_f16(from[0 * stride].x, res, 0);\n  res = vsetq_lane_f16(from[1 * stride].x, res, 1);\n  res = vsetq_lane_f16(from[2 * stride].x, res, 2);\n  res = vsetq_lane_f16(from[3 * stride].x, res, 3);\n  res = vsetq_lane_f16(from[4 * stride].x, res, 4);\n  res = vsetq_lane_f16(from[5 * stride].x, res, 5);\n  res = vsetq_lane_f16(from[6 * stride].x, res, 6);\n  res = vsetq_lane_f16(from[7 * stride].x, res, 7);\n  return res;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf pgather<Eigen::half, Packet4hf>(const Eigen::half* from, Index stride) {\n  Packet4hf res = pset1<Packet4hf>(Eigen::half(0.f));\n  res = vset_lane_f16(from[0 * stride].x, res, 0);\n  res = vset_lane_f16(from[1 * stride].x, res, 1);\n  res = vset_lane_f16(from[2 * stride].x, res, 2);\n  res = vset_lane_f16(from[3 * stride].x, res, 3);\n  return res;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8hf>(Eigen::half* to, const Packet8hf& from, Index stride) {\n  to[stride * 0].x = vgetq_lane_f16(from, 0);\n  to[stride * 1].x = vgetq_lane_f16(from, 1);\n  to[stride * 2].x = vgetq_lane_f16(from, 2);\n  to[stride * 3].x = vgetq_lane_f16(from, 3);\n  to[stride * 4].x = vgetq_lane_f16(from, 4);\n  to[stride * 5].x = vgetq_lane_f16(from, 5);\n  to[stride * 6].x = vgetq_lane_f16(from, 6);\n  to[stride * 7].x = vgetq_lane_f16(from, 7);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4hf>(Eigen::half* to, const Packet4hf& from, Index stride) {\n  to[stride * 0].x = vget_lane_f16(from, 0);\n  to[stride * 1].x = vget_lane_f16(from, 1);\n  to[stride * 2].x = vget_lane_f16(from, 2);\n  to[stride * 3].x = vget_lane_f16(from, 3);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<Eigen::half>(const Eigen::half* addr) {\n  EIGEN_ARM_PREFETCH(addr);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half pfirst<Packet8hf>(const Packet8hf& a) {\n  float16_t x[8];\n  vst1q_f16(x, a);\n  Eigen::half h;\n  h.x = x[0];\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half pfirst<Packet4hf>(const Packet4hf& a) {\n  float16_t x[4];\n  vst1_f16(x, a);\n  Eigen::half h;\n  h.x = x[0];\n  return h;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet8hf preverse(const Packet8hf& a) {\n  float16x4_t a_lo, a_hi;\n  Packet8hf a_r64;\n\n  a_r64 = vrev64q_f16(a);\n  a_lo = vget_low_f16(a_r64);\n  a_hi = vget_high_f16(a_r64);\n  return vcombine_f16(a_hi, a_lo);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf preverse<Packet4hf>(const Packet4hf& a) {\n  return vrev64_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet8hf pabs<Packet8hf>(const Packet8hf& a) {\n  return vabsq_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4hf pabs<Packet4hf>(const Packet4hf& a) {\n  return vabs_f16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux<Packet8hf>(const Packet8hf& a) {\n  float16x4_t a_lo, a_hi, sum;\n\n  a_lo = vget_low_f16(a);\n  a_hi = vget_high_f16(a);\n  sum = vpadd_f16(a_lo, a_hi);\n  sum = vpadd_f16(sum, sum);\n  sum = vpadd_f16(sum, sum);\n\n  Eigen::half h;\n  h.x = vget_lane_f16(sum, 0);\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux<Packet4hf>(const Packet4hf& a) {\n  float16x4_t sum;\n\n  sum = vpadd_f16(a, a);\n  sum = vpadd_f16(sum, sum);\n  Eigen::half h;\n  h.x = vget_lane_f16(sum, 0);\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8hf>(const Packet8hf& a) {\n  float16x4_t a_lo, a_hi, prod;\n\n  a_lo = vget_low_f16(a);\n  a_hi = vget_high_f16(a);\n  prod = vmul_f16(a_lo, a_hi);\n  prod = vmul_f16(prod, vrev64_f16(prod));\n\n  Eigen::half h;\n  h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1));\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4hf>(const Packet4hf& a) {\n  float16x4_t prod;\n  prod = vmul_f16(a, vrev64_f16(a));\n  Eigen::half h;\n  h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1));\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux_min<Packet8hf>(const Packet8hf& a) {\n  float16x4_t a_lo, a_hi, min;\n\n  a_lo = vget_low_f16(a);\n  a_hi = vget_high_f16(a);\n  min = vpmin_f16(a_lo, a_hi);\n  min = vpmin_f16(min, min);\n  min = vpmin_f16(min, min);\n\n  Eigen::half h;\n  h.x = vget_lane_f16(min, 0);\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux_min<Packet4hf>(const Packet4hf& a) {\n  Packet4hf tmp;\n  tmp = vpmin_f16(a, a);\n  tmp = vpmin_f16(tmp, tmp);\n  Eigen::half h;\n  h.x = vget_lane_f16(tmp, 0);\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux_max<Packet8hf>(const Packet8hf& a) {\n  float16x4_t a_lo, a_hi, max;\n\n  a_lo = vget_low_f16(a);\n  a_hi = vget_high_f16(a);\n  max = vpmax_f16(a_lo, a_hi);\n  max = vpmax_f16(max, max);\n  max = vpmax_f16(max, max);\n\n  Eigen::half h;\n  h.x = vget_lane_f16(max, 0);\n  return h;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Eigen::half predux_max<Packet4hf>(const Packet4hf& a) {\n  Packet4hf tmp;\n  tmp = vpmax_f16(a, a);\n  tmp = vpmax_f16(tmp, tmp);\n  Eigen::half h;\n  h.x = vget_lane_f16(tmp, 0);\n  return h;\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8hf, 4>& kernel)\n{\n  const float16x8x2_t zip16_1 = vzipq_f16(kernel.packet[0], kernel.packet[1]);\n  const float16x8x2_t zip16_2 = vzipq_f16(kernel.packet[2], kernel.packet[3]);\n\n  const float32x4x2_t zip32_1 = vzipq_f32(vreinterpretq_f32_f16(zip16_1.val[0]), vreinterpretq_f32_f16(zip16_2.val[0]));\n  const float32x4x2_t zip32_2 = vzipq_f32(vreinterpretq_f32_f16(zip16_1.val[1]), vreinterpretq_f32_f16(zip16_2.val[1]));\n\n  kernel.packet[0] = vreinterpretq_f16_f32(zip32_1.val[0]);\n  kernel.packet[1] = vreinterpretq_f16_f32(zip32_1.val[1]);\n  kernel.packet[2] = vreinterpretq_f16_f32(zip32_2.val[0]);\n  kernel.packet[3] = vreinterpretq_f16_f32(zip32_2.val[1]);\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4hf, 4>& kernel) {\n  EIGEN_ALIGN16 float16x4x4_t tmp_x4;\n  float16_t* tmp = (float16_t*)&kernel;\n  tmp_x4 = vld4_f16(tmp);\n\n  kernel.packet[0] = tmp_x4.val[0];\n  kernel.packet[1] = tmp_x4.val[1];\n  kernel.packet[2] = tmp_x4.val[2];\n  kernel.packet[3] = tmp_x4.val[3];\n}\n\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8hf, 8>& kernel) {\n  float16x8x2_t T_1[4];\n\n  T_1[0] = vuzpq_f16(kernel.packet[0], kernel.packet[1]);\n  T_1[1] = vuzpq_f16(kernel.packet[2], kernel.packet[3]);\n  T_1[2] = vuzpq_f16(kernel.packet[4], kernel.packet[5]);\n  T_1[3] = vuzpq_f16(kernel.packet[6], kernel.packet[7]);\n\n  float16x8x2_t T_2[4];\n  T_2[0] = vuzpq_f16(T_1[0].val[0], T_1[1].val[0]);\n  T_2[1] = vuzpq_f16(T_1[0].val[1], T_1[1].val[1]);\n  T_2[2] = vuzpq_f16(T_1[2].val[0], T_1[3].val[0]);\n  T_2[3] = vuzpq_f16(T_1[2].val[1], T_1[3].val[1]);\n\n  float16x8x2_t T_3[4];\n  T_3[0] = vuzpq_f16(T_2[0].val[0], T_2[2].val[0]);\n  T_3[1] = vuzpq_f16(T_2[0].val[1], T_2[2].val[1]);\n  T_3[2] = vuzpq_f16(T_2[1].val[0], T_2[3].val[0]);\n  T_3[3] = vuzpq_f16(T_2[1].val[1], T_2[3].val[1]);\n\n  kernel.packet[0] = T_3[0].val[0];\n  kernel.packet[1] = T_3[2].val[0];\n  kernel.packet[2] = T_3[1].val[0];\n  kernel.packet[3] = T_3[3].val[0];\n  kernel.packet[4] = T_3[0].val[1];\n  kernel.packet[5] = T_3[2].val[1];\n  kernel.packet[6] = T_3[1].val[1];\n  kernel.packet[7] = T_3[3].val[1];\n}\n#endif // end EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PACKET_MATH_NEON_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/NEON/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com>\n// Copyright (C) 2020 Antonio Sanchez <cantonios@google.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TYPE_CASTING_NEON_H\n#define EIGEN_TYPE_CASTING_NEON_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n//==============================================================================\n// pcast, SrcType = float\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<float, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet4f, Packet4f>(const Packet4f& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet2f, Packet2f>(const Packet2f& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<float, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nstruct type_casting_traits<float, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\n// If float64 exists, first convert to that to keep as much precision as possible.\n#if EIGEN_ARCH_ARM64\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {\n  // Discard second half of input.\n  return vcvtq_s64_f64(vcvt_f64_f32(vget_low_f32(a)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {\n  // Discard second half of input.\n  return vcvtq_u64_f64(vcvt_f64_f32(vget_low_f32(a)));\n}\n#else\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {\n  // Discard second half of input.\n  return vmovl_s32(vget_low_s32(vcvtq_s32_f32(a)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {\n  // Discard second half of input.\n  return vmovl_u32(vget_low_u32(vcvtq_u32_f32(a)));\n}\n#endif  // EIGEN_ARCH_ARM64\n\ntemplate <>\nstruct type_casting_traits<float, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {\n  return vcvtq_s32_f32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet2f, Packet2i>(const Packet2f& a) {\n  return vcvt_s32_f32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<float, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet4f, Packet4ui>(const Packet4f& a) {\n  return vcvtq_u32_f32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet2f, Packet2ui>(const Packet2f& a) {\n  return vcvt_u32_f32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<float, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet4f, Packet8s>(const Packet4f& a, const Packet4f& b) {\n  return vcombine_s16(vmovn_s32(vcvtq_s32_f32(a)), vmovn_s32(vcvtq_s32_f32(b)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet2f, Packet4s>(const Packet2f& a, const Packet2f& b) {\n  return vmovn_s32(vcombine_s32(vcvt_s32_f32(a), vcvt_s32_f32(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<float, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet4f, Packet8us>(const Packet4f& a, const Packet4f& b) {\n  return vcombine_u16(vmovn_u32(vcvtq_u32_f32(a)), vmovn_u32(vcvtq_u32_f32(b)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet2f, Packet4us>(const Packet2f& a, const Packet2f& b) {\n  return vmovn_u32(vcombine_u32(vcvt_u32_f32(a), vcvt_u32_f32(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<float, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet4f, Packet16c>(const Packet4f& a, const Packet4f& b, const Packet4f& c,\n                                                         const Packet4f& d) {\n  const int16x8_t ab_s16 = pcast<Packet4f, Packet8s>(a, b);\n  const int16x8_t cd_s16 = pcast<Packet4f, Packet8s>(c, d);\n  return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet2f, Packet8c>(const Packet2f& a, const Packet2f& b, const Packet2f& c,\n                                                       const Packet2f& d) {\n  const int16x4_t ab_s16 = pcast<Packet2f, Packet4s>(a, b);\n  const int16x4_t cd_s16 = pcast<Packet2f, Packet4s>(c, d);\n  return vmovn_s16(vcombine_s16(ab_s16, cd_s16));\n}\n\ntemplate <>\nstruct type_casting_traits<float, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet4f, Packet16uc>(const Packet4f& a, const Packet4f& b, const Packet4f& c,\n                                                           const Packet4f& d) {\n  const uint16x8_t ab_u16 = pcast<Packet4f, Packet8us>(a, b);\n  const uint16x8_t cd_u16 = pcast<Packet4f, Packet8us>(c, d);\n  return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet2f, Packet8uc>(const Packet2f& a, const Packet2f& b, const Packet2f& c,\n                                                         const Packet2f& d) {\n  const uint16x4_t ab_u16 = pcast<Packet2f, Packet4us>(a, b);\n  const uint16x4_t cd_u16 = pcast<Packet2f, Packet4us>(c, d);\n  return vmovn_u16(vcombine_u16(ab_u16, cd_u16));\n}\n\n//==============================================================================\n// pcast, SrcType = int8_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::int8_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet16c, Packet4f>(const Packet16c& a) {\n  // Discard all but first 4 bytes.\n  return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a)))));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet8c, Packet2f>(const Packet8c& a) {\n  // Discard all but first 2 bytes.\n  return vcvt_f32_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a)))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet16c, Packet2l>(const Packet16c& a) {\n  // Discard all but first two bytes.\n  return vmovl_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet16c, Packet2ul>(const Packet16c& a) {\n  return vreinterpretq_u64_s64(pcast<Packet16c, Packet2l>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet16c, Packet4i>(const Packet16c& a) {\n  // Discard all but first 4 bytes.\n  return vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet8c, Packet2i>(const Packet8c& a) {\n  // Discard all but first 2 bytes.\n  return vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet16c, Packet4ui>(const Packet16c& a) {\n  return vreinterpretq_u32_s32(pcast<Packet16c, Packet4i>(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet8c, Packet2ui>(const Packet8c& a) {\n  return vreinterpret_u32_s32(pcast<Packet8c, Packet2i>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet16c, Packet8s>(const Packet16c& a) {\n  // Discard second half of input.\n  return vmovl_s8(vget_low_s8(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet8c, Packet4s>(const Packet8c& a) {\n  // Discard second half of input.\n  return vget_low_s16(vmovl_s8(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet16c, Packet8us>(const Packet16c& a) {\n  return vreinterpretq_u16_s16(pcast<Packet16c, Packet8s>(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) {\n  return vreinterpret_u16_s16(pcast<Packet8c, Packet4s>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet16c, Packet16c>(const Packet16c& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet8c, Packet8c>(const Packet8c& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4c pcast<Packet4c, Packet4c>(const Packet4c& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet16c, Packet16uc>(const Packet16c& a) {\n  return vreinterpretq_u8_s8(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet8c, Packet8uc>(const Packet8c& a) {\n  return vreinterpret_u8_s8(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4uc pcast<Packet4c, Packet4uc>(const Packet4c& a) {\n  return static_cast<Packet4uc>(a);\n}\n\n//==============================================================================\n// pcast, SrcType = uint8_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet16uc, Packet4f>(const Packet16uc& a) {\n  // Discard all but first 4 bytes.\n  return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a)))));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet8uc, Packet2f>(const Packet8uc& a) {\n  // Discard all but first 2 bytes.\n  return vcvt_f32_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a)))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet16uc, Packet2ul>(const Packet16uc& a) {\n  // Discard all but first two bytes.\n  return vmovl_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet16uc, Packet2l>(const Packet16uc& a) {\n  return vreinterpretq_s64_u64(pcast<Packet16uc, Packet2ul>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet16uc, Packet4ui>(const Packet16uc& a) {\n  // Discard all but first 4 bytes.\n  return vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet8uc, Packet2ui>(const Packet8uc& a) {\n  // Discard all but first 2 bytes.\n  return vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet16uc, Packet4i>(const Packet16uc& a) {\n  return vreinterpretq_s32_u32(pcast<Packet16uc, Packet4ui>(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet8uc, Packet2i>(const Packet8uc& a) {\n  return vreinterpret_s32_u32(pcast<Packet8uc, Packet2ui>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet16uc, Packet8us>(const Packet16uc& a) {\n  // Discard second half of input.\n  return vmovl_u8(vget_low_u8(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet8uc, Packet4us>(const Packet8uc& a) {\n  // Discard second half of input.\n  return vget_low_u16(vmovl_u8(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet16uc, Packet8s>(const Packet16uc& a) {\n  return vreinterpretq_s16_u16(pcast<Packet16uc, Packet8us>(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet8uc, Packet4s>(const Packet8uc& a) {\n  return vreinterpret_s16_u16(pcast<Packet8uc, Packet4us>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet16uc, Packet16uc>(const Packet16uc& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet8uc, Packet8uc>(const Packet8uc& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4uc pcast<Packet4uc, Packet4uc>(const Packet4uc& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet16uc, Packet16c>(const Packet16uc& a) {\n  return vreinterpretq_s8_u8(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet8uc, Packet8c>(const Packet8uc& a) {\n  return vreinterpret_s8_u8(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4c pcast<Packet4uc, Packet4c>(const Packet4uc& a) {\n  return static_cast<Packet4c>(a);\n}\n\n//==============================================================================\n// pcast, SrcType = int16_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::int16_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet8s, Packet4f>(const Packet8s& a) {\n  // Discard second half of input.\n  return vcvtq_f32_s32(vmovl_s16(vget_low_s16(a)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet4s, Packet2f>(const Packet4s& a) {\n  // Discard second half of input.\n  return vcvt_f32_s32(vget_low_s32(vmovl_s16(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet8s, Packet2l>(const Packet8s& a) {\n  // Discard all but first two values.\n  return vmovl_s32(vget_low_s32(vmovl_s16(vget_low_s16(a))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet8s, Packet2ul>(const Packet8s& a) {\n  return vreinterpretq_u64_s64(pcast<Packet8s, Packet2l>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet8s, Packet4i>(const Packet8s& a) {\n  // Discard second half of input.\n  return vmovl_s16(vget_low_s16(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet4s, Packet2i>(const Packet4s& a) {\n  // Discard second half of input.\n  return vget_low_s32(vmovl_s16(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet8s, Packet4ui>(const Packet8s& a) {\n  return vreinterpretq_u32_s32(pcast<Packet8s, Packet4i>(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) {\n  return vreinterpret_u32_s32(pcast<Packet4s, Packet2i>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet8s, Packet8s>(const Packet8s& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet4s, Packet4s>(const Packet4s& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet8s, Packet8us>(const Packet8s& a) {\n  return vreinterpretq_u16_s16(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet4s, Packet4us>(const Packet4s& a) {\n  return vreinterpret_u16_s16(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet8s, Packet16c>(const Packet8s& a, const Packet8s& b) {\n  return vcombine_s8(vmovn_s16(a), vmovn_s16(b));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet4s, Packet8c>(const Packet4s& a, const Packet4s& b) {\n  return vmovn_s16(vcombine_s16(a, b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet8s, Packet16uc>(const Packet8s& a, const Packet8s& b) {\n  return vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(a)), vmovn_u16(vreinterpretq_u16_s16(b)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet4s, Packet8uc>(const Packet4s& a, const Packet4s& b) {\n  return vmovn_u16(vcombine_u16(vreinterpret_u16_s16(a), vreinterpret_u16_s16(b)));\n}\n\n//==============================================================================\n// pcast, SrcType = uint16_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet8us, Packet4f>(const Packet8us& a) {\n  // Discard second half of input.\n  return vcvtq_f32_u32(vmovl_u16(vget_low_u16(a)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet4us, Packet2f>(const Packet4us& a) {\n  // Discard second half of input.\n  return vcvt_f32_u32(vget_low_u32(vmovl_u16(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet8us, Packet2ul>(const Packet8us& a) {\n  // Discard all but first two values.\n  return vmovl_u32(vget_low_u32(vmovl_u16(vget_low_u16(a))));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet8us, Packet2l>(const Packet8us& a) {\n  return vreinterpretq_s64_u64(pcast<Packet8us, Packet2ul>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet8us, Packet4ui>(const Packet8us& a) {\n  // Discard second half of input.\n  return vmovl_u16(vget_low_u16(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet4us, Packet2ui>(const Packet4us& a) {\n  // Discard second half of input.\n  return vget_low_u32(vmovl_u16(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet8us, Packet4i>(const Packet8us& a) {\n  return vreinterpretq_s32_u32(pcast<Packet8us, Packet4ui>(a));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) {\n  return vreinterpret_s32_u32(pcast<Packet4us, Packet2ui>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet8us, Packet8us>(const Packet8us& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet4us, Packet4us>(const Packet4us& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet8us, Packet8s>(const Packet8us& a) {\n  return vreinterpretq_s16_u16(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet4us, Packet4s>(const Packet4us& a) {\n  return vreinterpret_s16_u16(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet8us, Packet16uc>(const Packet8us& a, const Packet8us& b) {\n  return vcombine_u8(vmovn_u16(a), vmovn_u16(b));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet4us, Packet8uc>(const Packet4us& a, const Packet4us& b) {\n  return vmovn_u16(vcombine_u16(a, b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet8us, Packet16c>(const Packet8us& a, const Packet8us& b) {\n  return vreinterpretq_s8_u8(pcast<Packet8us, Packet16uc>(a, b));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet4us, Packet8c>(const Packet4us& a, const Packet4us& b) {\n  return vreinterpret_s8_u8(pcast<Packet4us, Packet8uc>(a, b));\n}\n\n//==============================================================================\n// pcast, SrcType = int32_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::int32_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {\n  return vcvtq_f32_s32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet2i, Packet2f>(const Packet2i& a) {\n  return vcvt_f32_s32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet4i, Packet2l>(const Packet4i& a) {\n  // Discard second half of input.\n  return vmovl_s32(vget_low_s32(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {\n  return vreinterpretq_u64_s64(pcast<Packet4i, Packet2l>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet4i, Packet4i>(const Packet4i& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet2i, Packet2i>(const Packet2i& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet4i, Packet4ui>(const Packet4i& a) {\n  return vreinterpretq_u32_s32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet2i, Packet2ui>(const Packet2i& a) {\n  return vreinterpret_u32_s32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet4i, Packet8s>(const Packet4i& a, const Packet4i& b) {\n  return vcombine_s16(vmovn_s32(a), vmovn_s32(b));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet2i, Packet4s>(const Packet2i& a, const Packet2i& b) {\n  return vmovn_s32(vcombine_s32(a, b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet4i, Packet8us>(const Packet4i& a, const Packet4i& b) {\n  return vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(a)), vmovn_u32(vreinterpretq_u32_s32(b)));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet2i, Packet4us>(const Packet2i& a, const Packet2i& b) {\n  return vmovn_u32(vreinterpretq_u32_s32(vcombine_s32(a, b)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet4i, Packet16c>(const Packet4i& a, const Packet4i& b, const Packet4i& c,\n                                                         const Packet4i& d) {\n  const int16x8_t ab_s16 = pcast<Packet4i, Packet8s>(a, b);\n  const int16x8_t cd_s16 = pcast<Packet4i, Packet8s>(c, d);\n  return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet2i, Packet8c>(const Packet2i& a, const Packet2i& b, const Packet2i& c,\n                                                       const Packet2i& d) {\n  const int16x4_t ab_s16 = vmovn_s32(vcombine_s32(a, b));\n  const int16x4_t cd_s16 = vmovn_s32(vcombine_s32(c, d));\n  return vmovn_s16(vcombine_s16(ab_s16, cd_s16));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet4i, Packet16uc>(const Packet4i& a, const Packet4i& b, const Packet4i& c,\n                                                           const Packet4i& d) {\n  const uint16x8_t ab_u16 = pcast<Packet4i, Packet8us>(a, b);\n  const uint16x8_t cd_u16 = pcast<Packet4i, Packet8us>(c, d);\n  return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet2i, Packet8uc>(const Packet2i& a, const Packet2i& b, const Packet2i& c,\n                                                         const Packet2i& d) {\n  const uint16x4_t ab_u16 = pcast<Packet2i, Packet4us>(a, b);\n  const uint16x4_t cd_u16 = pcast<Packet2i, Packet4us>(c, d);\n  return vmovn_u16(vcombine_u16(ab_u16, cd_u16));\n}\n\n//==============================================================================\n// pcast, SrcType = uint32_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet4ui, Packet4f>(const Packet4ui& a) {\n  return vcvtq_f32_u32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f pcast<Packet2ui, Packet2f>(const Packet2ui& a) {\n  return vcvt_f32_u32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet4ui, Packet2ul>(const Packet4ui& a) {\n  // Discard second half of input.\n  return vmovl_u32(vget_low_u32(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) {\n  return vreinterpretq_s64_u64(pcast<Packet4ui, Packet2ul>(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet4ui, Packet4ui>(const Packet4ui& a) {\n  return a;\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui pcast<Packet2ui, Packet2ui>(const Packet2ui& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet4ui, Packet4i>(const Packet4ui& a) {\n  return vreinterpretq_s32_u32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i pcast<Packet2ui, Packet2i>(const Packet2ui& a) {\n  return vreinterpret_s32_u32(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet4ui, Packet8us>(const Packet4ui& a, const Packet4ui& b) {\n  return vcombine_u16(vmovn_u32(a), vmovn_u32(b));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us pcast<Packet2ui, Packet4us>(const Packet2ui& a, const Packet2ui& b) {\n  return vmovn_u32(vcombine_u32(a, b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet4ui, Packet8s>(const Packet4ui& a, const Packet4ui& b) {\n  return vreinterpretq_s16_u16(pcast<Packet4ui, Packet8us>(a, b));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s pcast<Packet2ui, Packet4s>(const Packet2ui& a, const Packet2ui& b) {\n  return vreinterpret_s16_u16(pcast<Packet2ui, Packet4us>(a, b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet4ui, Packet16uc>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,\n                                                            const Packet4ui& d) {\n  const uint16x8_t ab_u16 = vcombine_u16(vmovn_u32(a), vmovn_u32(b));\n  const uint16x8_t cd_u16 = vcombine_u16(vmovn_u32(c), vmovn_u32(d));\n  return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc pcast<Packet2ui, Packet8uc>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,\n                                                          const Packet2ui& d) {\n  const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(a, b));\n  const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(c, d));\n  return vmovn_u16(vcombine_u16(ab_u16, cd_u16));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet4ui, Packet16c>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,\n                                                          const Packet4ui& d) {\n  return vreinterpretq_s8_u8(pcast<Packet4ui, Packet16uc>(a, b, c, d));\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c pcast<Packet2ui, Packet8c>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,\n                                                        const Packet2ui& d) {\n  return vreinterpret_s8_u8(pcast<Packet2ui, Packet8uc>(a, b, c, d));\n}\n\n//==============================================================================\n// pcast, SrcType = int64_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::int64_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {\n  return vcvtq_f32_s32(vcombine_s32(vmovn_s64(a), vmovn_s64(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet2l, Packet2l>(const Packet2l& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet2l, Packet2ul>(const Packet2l& a) {\n  return vreinterpretq_u64_s64(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet2l, Packet4i>(const Packet2l& a, const Packet2l& b) {\n  return vcombine_s32(vmovn_s64(a), vmovn_s64(b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet2l, Packet4ui>(const Packet2l& a, const Packet2l& b) {\n  return vcombine_u32(vmovn_u64(vreinterpretq_u64_s64(a)), vmovn_u64(vreinterpretq_u64_s64(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet2l, Packet8s>(const Packet2l& a, const Packet2l& b, const Packet2l& c,\n                                                       const Packet2l& d) {\n  const int32x4_t ab_s32 = pcast<Packet2l, Packet4i>(a, b);\n  const int32x4_t cd_s32 = pcast<Packet2l, Packet4i>(c, d);\n  return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet2l, Packet8us>(const Packet2l& a, const Packet2l& b, const Packet2l& c,\n                                                         const Packet2l& d) {\n  const uint32x4_t ab_u32 = pcast<Packet2l, Packet4ui>(a, b);\n  const uint32x4_t cd_u32 = pcast<Packet2l, Packet4ui>(c, d);\n  return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet2l, Packet16c>(const Packet2l& a, const Packet2l& b, const Packet2l& c,\n                                                         const Packet2l& d, const Packet2l& e, const Packet2l& f,\n                                                         const Packet2l& g, const Packet2l& h) {\n  const int16x8_t abcd_s16 = pcast<Packet2l, Packet8s>(a, b, c, d);\n  const int16x8_t efgh_s16 = pcast<Packet2l, Packet8s>(e, f, g, h);\n  return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet2l, Packet16uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c,\n                                                           const Packet2l& d, const Packet2l& e, const Packet2l& f,\n                                                           const Packet2l& g, const Packet2l& h) {\n  const uint16x8_t abcd_u16 = pcast<Packet2l, Packet8us>(a, b, c, d);\n  const uint16x8_t efgh_u16 = pcast<Packet2l, Packet8us>(e, f, g, h);\n  return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));\n}\n\n//==============================================================================\n// pcast, SrcType = uint64_t\n//==============================================================================\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) {\n  return vcvtq_f32_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet2ul, Packet2ul>(const Packet2ul& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet2ul, Packet2l>(const Packet2ul& a) {\n  return vreinterpretq_s64_u64(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet2ul, Packet4ui>(const Packet2ul& a, const Packet2ul& b) {\n  return vcombine_u32(vmovn_u64(a), vmovn_u64(b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) {\n  return vreinterpretq_s32_u32(pcast<Packet2ul, Packet4ui>(a, b));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet2ul, Packet8us>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,\n                                                          const Packet2ul& d) {\n  const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));\n  const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(vmovn_u64(c), vmovn_u64(d)));\n  return vcombine_u16(ab_u16, cd_u16);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet2ul, Packet8s>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,\n                                                        const Packet2ul& d) {\n  return vreinterpretq_s16_u16(pcast<Packet2ul, Packet8us>(a, b, c, d));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet2ul, Packet16uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,\n                                                            const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,\n                                                            const Packet2ul& g, const Packet2ul& h) {\n  const uint16x8_t abcd_u16 = pcast<Packet2ul, Packet8us>(a, b, c, d);\n  const uint16x8_t efgh_u16 = pcast<Packet2ul, Packet8us>(e, f, g, h);\n  return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet2ul, Packet16c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,\n                                                          const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,\n                                                          const Packet2ul& g, const Packet2ul& h) {\n  return vreinterpretq_s8_u8(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));\n}\n\n//==============================================================================\n// preinterpret\n//==============================================================================\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) {\n  return vreinterpret_f32_s32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) {\n  return vreinterpret_f32_u32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {\n  return vreinterpretq_f32_s32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {\n  return vreinterpretq_f32_u32(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {\n  return static_cast<Packet4c>(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {\n  return vreinterpret_s8_u8(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {\n  return vreinterpretq_s8_u8(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {\n  return static_cast<Packet4uc>(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) {\n  return vreinterpret_u8_s8(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {\n  return vreinterpretq_u8_s8(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) {\n  return vreinterpret_s16_u16(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {\n  return vreinterpretq_s16_u16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {\n  return vreinterpret_u16_s16(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {\n  return vreinterpretq_u16_s16(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) {\n  return vreinterpret_s32_f32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) {\n  return vreinterpret_s32_u32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {\n  return vreinterpretq_s32_f32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {\n  return vreinterpretq_s32_u32(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) {\n  return vreinterpret_u32_f32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) {\n  return vreinterpret_u32_s32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {\n  return vreinterpretq_u32_f32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {\n  return vreinterpretq_u32_s32(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) {\n  return vreinterpretq_s64_u64(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {\n  return vreinterpretq_u64_s64(a);\n}\n\n#if EIGEN_ARCH_ARM64\n\n//==============================================================================\n// pcast/preinterpret, Double\n//==============================================================================\n\ntemplate <>\nstruct type_casting_traits<double, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet2d, Packet2d>(const Packet2d& a) {\n  return a;\n}\n\ntemplate <>\nstruct type_casting_traits<double, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {\n  return vcombine_f32(vcvt_f32_f64(a), vcvt_f32_f64(b));\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::int64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {\n  return vcvtq_s64_f64(a);\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::uint64_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul pcast<Packet2d, Packet2ul>(const Packet2d& a) {\n  return vcvtq_u64_f64(a);\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {\n  return vcombine_s32(vmovn_s64(vcvtq_s64_f64(a)), vmovn_s64(vcvtq_s64_f64(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::uint32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet4ui pcast<Packet2d, Packet4ui>(const Packet2d& a, const Packet2d& b) {\n  return vcombine_u32(vmovn_u64(vcvtq_u64_f64(a)), vmovn_u64(vcvtq_u64_f64(b)));\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::int16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8s pcast<Packet2d, Packet8s>(const Packet2d& a, const Packet2d& b, const Packet2d& c,\n                                                       const Packet2d& d) {\n  const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);\n  const int32x4_t cd_s32 = pcast<Packet2d, Packet4i>(c, d);\n  return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::uint16_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 4, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet8us pcast<Packet2d, Packet8us>(const Packet2d& a, const Packet2d& b, const Packet2d& c,\n                                                         const Packet2d& d) {\n  const uint32x4_t ab_u32 = pcast<Packet2d, Packet4ui>(a, b);\n  const uint32x4_t cd_u32 = pcast<Packet2d, Packet4ui>(c, d);\n  return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32));\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::int8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16c pcast<Packet2d, Packet16c>(const Packet2d& a, const Packet2d& b, const Packet2d& c,\n                                                         const Packet2d& d, const Packet2d& e, const Packet2d& f,\n                                                         const Packet2d& g, const Packet2d& h) {\n  const int16x8_t abcd_s16 = pcast<Packet2d, Packet8s>(a, b, c, d);\n  const int16x8_t efgh_s16 = pcast<Packet2d, Packet8s>(e, f, g, h);\n  return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));\n}\n\ntemplate <>\nstruct type_casting_traits<double, numext::uint8_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 8, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet16uc pcast<Packet2d, Packet16uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c,\n                                                           const Packet2d& d, const Packet2d& e, const Packet2d& f,\n                                                           const Packet2d& g, const Packet2d& h) {\n  const uint16x8_t abcd_u16 = pcast<Packet2d, Packet8us>(a, b, c, d);\n  const uint16x8_t efgh_u16 = pcast<Packet2d, Packet8us>(e, f, g, h);\n  return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));\n}\n\ntemplate <>\nstruct type_casting_traits<float, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {\n  // Discard second-half of input.\n  return vcvt_f64_f32(vget_low_f32(a));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int8_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet16c, Packet2d>(const Packet16c& a) {\n  // Discard all but first two values.\n  return vcvt_f64_f32(pcast<Packet8c, Packet2f>(vget_low_s8(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint8_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 8 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet16uc, Packet2d>(const Packet16uc& a) {\n  // Discard all but first two values.\n  return vcvt_f64_f32(pcast<Packet8uc, Packet2f>(vget_low_u8(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int16_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet8s, Packet2d>(const Packet8s& a) {\n  // Discard all but first two values.\n  return vcvt_f64_f32(pcast<Packet4s, Packet2f>(vget_low_s16(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint16_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {\n  // Discard all but first two values.\n  return vcvt_f64_f32(pcast<Packet4us, Packet2f>(vget_low_u16(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(const Packet4i& a) {\n  // Discard second half of input.\n  return vcvtq_f64_s64(vmovl_s32(vget_low_s32(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint32_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet4ui, Packet2d>(const Packet4ui& a) {\n  // Discard second half of input.\n  return vcvtq_f64_u64(vmovl_u32(vget_low_u32(a)));\n}\n\ntemplate <>\nstruct type_casting_traits<numext::int64_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {\n  return vcvtq_f64_s64(a);\n}\n\ntemplate <>\nstruct type_casting_traits<numext::uint64_t, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d pcast<Packet2ul, Packet2d>(const Packet2ul& a) {\n  return vcvtq_f64_u64(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {\n  return vreinterpretq_f64_s64(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {\n  return vreinterpretq_f64_u64(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {\n  return vreinterpretq_s64_f64(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {\n  return vreinterpretq_u64_f64(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {\n  return vreinterpretq_f64_s32(a);\n}\ntemplate <>\nEIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {\n  return vreinterpretq_s32_f64(a);\n}\n\n#endif  // EIGEN_ARCH_ARM64\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_TYPE_CASTING_NEON_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SSE/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_SSE_H\n#define EIGEN_COMPLEX_SSE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n//---------- float ----------\nstruct Packet2cf\n{\n  EIGEN_STRONG_INLINE Packet2cf() {}\n  EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}\n  Packet4f v;\n};\n\n// Use the packet_traits defined in AVX/PacketMath.h instead if we're going\n// to leverage AVX instructions.\n#ifndef EIGEN_VECTORIZE_AVX\ntemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits\n{\n  typedef Packet2cf type;\n  typedef Packet2cf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasSqrt   = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0,\n    HasBlend  = 1\n  };\n};\n#endif\n\ntemplate<> struct unpacket_traits<Packet2cf> {\n  typedef std::complex<float> type;\n  typedef Packet2cf half;\n  typedef Packet4f as_real;\n  enum {\n    size=2,\n    alignment=Aligned16,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)\n{\n  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));\n  return Packet2cf(_mm_xor_ps(a.v,mask));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)\n{\n  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));\n  return Packet2cf(_mm_xor_ps(a.v,mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  #ifdef EIGEN_VECTORIZE_SSE3\n  return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),\n                                 _mm_mul_ps(_mm_movehdup_ps(a.v),\n                                            vec4f_swizzle1(b.v, 1, 0, 3, 2))));\n//   return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),\n//                                  _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),\n//                                             vec4f_swizzle1(b.v, 1, 0, 3, 2))));\n  #else\n  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));\n  return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),\n                              _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),\n                                                    vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));\n  #endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ptrue  <Packet2cf>(const Packet2cf& a) { return Packet2cf(ptrue(Packet4f(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(b.v,a.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)\n{\n  Packet2cf res;\n#ifdef EIGEN_VECTORIZE_SSE3\n  res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));\n#else\n  res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));\n  res.v = _mm_movelh_ps(res.v, res.v);\n#endif\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }\n\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)\n{\n  return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),\n                              std::imag(from[0*stride]), std::real(from[0*stride])));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)\n{\n  to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),\n                                     _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));\n  to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),\n                                     _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)\n{\n  #if EIGEN_GNUC_AT_MOST(4,3)\n  // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...\n  // This workaround also fix invalid code generation with gcc 4.3\n  EIGEN_ALIGN16 std::complex<float> res[2];\n  _mm_store_ps((float*)res, a.v);\n  return res[0];\n  #else\n  std::complex<float> res;\n  _mm_storel_pi((__m64*)&res, a.v);\n  return res;\n  #endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)\n{\n  return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)\n{\n  return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));\n}\n\nEIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)\n{\n  return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  // TODO optimize it for SSE3 and 4\n  Packet2cf res = pmul(a, pconj(b));\n  __m128 s = _mm_mul_ps(b.v,b.v);\n  return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2))));\n}\n\n\n\n//---------- double ----------\nstruct Packet1cd\n{\n  EIGEN_STRONG_INLINE Packet1cd() {}\n  EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}\n  Packet2d v;\n};\n\n// Use the packet_traits defined in AVX/PacketMath.h instead if we're going\n// to leverage AVX instructions.\n#ifndef EIGEN_VECTORIZE_AVX\ntemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits\n{\n  typedef Packet1cd type;\n  typedef Packet1cd half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 0,\n    size = 1,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasSqrt   = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n#endif\n\ntemplate<> struct unpacket_traits<Packet1cd> {\n  typedef std::complex<double> type;\n  typedef Packet1cd half;\n  typedef Packet2d as_real;\n  enum {\n    size=1,\n    alignment=Aligned16,\n    vectorizable=true,\n    masked_load_available=false,\n    masked_store_available=false\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)\n{\n  const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));\n  return Packet1cd(_mm_xor_pd(a.v,mask));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  #ifdef EIGEN_VECTORIZE_SSE3\n  return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),\n                                 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),\n                                            vec2d_swizzle1(b.v, 1, 0))));\n  #else\n  const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));\n  return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),\n                              _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),\n                                                    vec2d_swizzle1(b.v, 1, 0)), mask)));\n  #endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ptrue  <Packet1cd>(const Packet1cd& a) { return Packet1cd(ptrue(Packet2d(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(b.v,a.v)); }\n\n// FIXME force unaligned load, this is a temporary fix\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)\n{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)\n{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }\n\n// FIXME force unaligned store, this is a temporary fix\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)\n{\n  EIGEN_ALIGN16 double res[2];\n  _mm_store_pd(res, a.v);\n  return std::complex<double>(res[0],res[1]);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)\n{\n  return pfirst(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)\n{\n  return pfirst(a);\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  // TODO optimize it for SSE3 and 4\n  Packet1cd res = pmul(a,pconj(b));\n  __m128d s = _mm_mul_pd(b.v,b.v);\n  return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));\n}\n\nEIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)\n{\n  return Packet1cd(preverse(Packet2d(x.v)));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet2cf,2>& kernel) {\n  __m128d w1 = _mm_castps_pd(kernel.packet[0].v);\n  __m128d w2 = _mm_castps_pd(kernel.packet[1].v);\n\n  __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));\n  kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));\n  kernel.packet[1].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)\n{\n  __m128 eq = _mm_cmpeq_ps(a.v, b.v);\n  return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)\n{\n  __m128d eq = _mm_cmpeq_pd(a.v, b.v);\n  return Packet1cd(pand<Packet2d>(eq, vec2d_swizzle1(eq, 1, 0)));\n}\n\ntemplate<>  EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {\n  __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));\n  return Packet2cf(_mm_castpd_ps(result));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {\n  return psqrt_complex<Packet1cd>(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {\n  return psqrt_complex<Packet2cf>(a);\n}\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_SSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SSE/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007 Julien Pommier\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* The sin and cos and functions of this file come from\n * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/\n */\n\n#ifndef EIGEN_MATH_FUNCTIONS_SSE_H\n#define EIGEN_MATH_FUNCTIONS_SSE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f plog<Packet4f>(const Packet4f& _x) {\n  return plog_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d plog<Packet2d>(const Packet2d& _x) {\n  return plog_double(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f plog2<Packet4f>(const Packet4f& _x) {\n  return plog2_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d plog2<Packet2d>(const Packet2d& _x) {\n  return plog2_double(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f plog1p<Packet4f>(const Packet4f& _x) {\n  return generic_plog1p(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f pexpm1<Packet4f>(const Packet4f& _x) {\n  return generic_expm1(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f pexp<Packet4f>(const Packet4f& _x)\n{\n  return pexp_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d pexp<Packet2d>(const Packet2d& x)\n{\n  return pexp_double(x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f psin<Packet4f>(const Packet4f& _x)\n{\n  return psin_float(_x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f pcos<Packet4f>(const Packet4f& _x)\n{\n  return pcos_float(_x);\n}\n\n#if EIGEN_FAST_MATH\n\n// Functions for sqrt.\n// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step\n// of Newton's method, at a cost of 1-2 bits of precision as opposed to the\n// exact solution. It does not handle +inf, or denormalized numbers correctly.\n// The main advantage of this approach is not just speed, but also the fact that\n// it can be inlined and pipelined with other computations, further reducing its\n// effective latency. This is similar to Quake3's fast inverse square root.\n// For detail see here: http://www.beyond3d.com/content/articles/8/\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f psqrt<Packet4f>(const Packet4f& _x)\n{\n  Packet4f minus_half_x = pmul(_x, pset1<Packet4f>(-0.5f));\n  Packet4f denormal_mask = pandnot(\n      pcmp_lt(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())),\n      pcmp_lt(_x, pzero(_x)));\n\n  // Compute approximate reciprocal sqrt.\n  Packet4f x = _mm_rsqrt_ps(_x);\n  // Do a single step of Newton's iteration.\n  x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet4f>(1.5f)));\n  // Flush results for denormals to zero.\n  return pandnot(pmul(_x,x), denormal_mask);\n}\n\n#else\n\ntemplate<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }\n\n#endif\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket16b psqrt<Packet16b>(const Packet16b& x) { return x; }\n\n#if EIGEN_FAST_MATH\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f prsqrt<Packet4f>(const Packet4f& _x) {\n  _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);\n  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);\n  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000u);\n  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000u);\n\n  Packet4f neg_half = pmul(_x, p4f_minus_half);\n\n  // Identity infinite, zero, negative and denormal arguments.\n  Packet4f lt_min_mask = _mm_cmplt_ps(_x, p4f_flt_min);\n  Packet4f inf_mask = _mm_cmpeq_ps(_x, p4f_inf);\n  Packet4f not_normal_finite_mask = _mm_or_ps(lt_min_mask, inf_mask);\n\n  // Compute an approximate result using the rsqrt intrinsic.\n  Packet4f y_approx = _mm_rsqrt_ps(_x);\n\n  // Do a single step of Newton-Raphson iteration to improve the approximation.\n  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).\n  // It is essential to evaluate the inner term like this because forming\n  // y_n^2 may over- or underflow.\n  Packet4f y_newton = pmul(\n      y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p4f_one_point_five));\n\n  // Select the result of the Newton-Raphson step for positive normal arguments.\n  // For other arguments, choose the output of the intrinsic. This will\n  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(x) = +inf if\n  // x is zero or a positive denormalized float (equivalent to flushing positive\n  // denormalized inputs to zero).\n  return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);\n}\n\n#else\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f prsqrt<Packet4f>(const Packet4f& x) {\n  // Unfortunately we can't use the much faster mm_rsqrt_ps since it only provides an approximation.\n  return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));\n}\n\n#endif\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d prsqrt<Packet2d>(const Packet2d& x) {\n  return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));\n}\n\n// Hyperbolic Tangent function.\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\nptanh<Packet4f>(const Packet4f& x) {\n  return internal::generic_fast_tanh_float(x);\n}\n\n} // end namespace internal\n\nnamespace numext {\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\nfloat sqrt(const float &x)\n{\n  return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));\n}\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE\ndouble sqrt(const double &x)\n{\n#if EIGEN_COMP_GNUC_STRICT\n  // This works around a GCC bug generating poor code for _mm_sqrt_pd\n  // See https://gitlab.com/libeigen/eigen/commit/8dca9f97e38970\n  return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));\n#else\n  return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));\n#endif\n}\n\n} // end namespace numex\n\n} // end namespace Eigen\n\n#endif // EIGEN_MATH_FUNCTIONS_SSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SSE/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_SSE_H\n#define EIGEN_PACKET_MATH_SSE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n#if !defined(EIGEN_VECTORIZE_AVX) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)\n// 32 bits =>  8 registers\n// 64 bits => 16 registers\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))\n#endif\n\n#ifdef EIGEN_VECTORIZE_FMA\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n#endif\n\n#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX\n// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot\n// have overloads for both types without linking error.\n// One solution is to increase ABI version using -fabi-version=4 (or greater).\n// Otherwise, we workaround this inconvenience by wrapping 128bit types into the following helper\n// structure:\ntypedef eigen_packet_wrapper<__m128>  Packet4f;\ntypedef eigen_packet_wrapper<__m128d> Packet2d;\n#else\ntypedef __m128  Packet4f;\ntypedef __m128d Packet2d;\n#endif\n\ntypedef eigen_packet_wrapper<__m128i, 0> Packet4i;\ntypedef eigen_packet_wrapper<__m128i, 1> Packet16b;\n\ntemplate<> struct is_arithmetic<__m128>  { enum { value = true }; };\ntemplate<> struct is_arithmetic<__m128i> { enum { value = true }; };\ntemplate<> struct is_arithmetic<__m128d> { enum { value = true }; };\ntemplate<> struct is_arithmetic<Packet4i>  { enum { value = true }; };\ntemplate<> struct is_arithmetic<Packet16b>  { enum { value = true }; };\n\ntemplate<int p, int q, int r, int s>\nstruct shuffle_mask{\n enum { mask = (s)<<6|(r)<<4|(q)<<2|(p) };\n};\n\n// TODO: change the implementation of all swizzle* ops from macro to template,\n#define vec4f_swizzle1(v,p,q,r,s) \\\n  Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), (shuffle_mask<p,q,r,s>::mask))))\n\n#define vec4i_swizzle1(v,p,q,r,s) \\\n  Packet4i(_mm_shuffle_epi32( v, (shuffle_mask<p,q,r,s>::mask)))\n\n#define vec2d_swizzle1(v,p,q) \\\n  Packet2d(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), (shuffle_mask<2*p,2*p+1,2*q,2*q+1>::mask))))\n\n#define vec4f_swizzle2(a,b,p,q,r,s) \\\n  Packet4f(_mm_shuffle_ps( (a), (b), (shuffle_mask<p,q,r,s>::mask)))\n\n#define vec4i_swizzle2(a,b,p,q,r,s) \\\n  Packet4i(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), (shuffle_mask<p,q,r,s>::mask)))))\n\nEIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f& a, const Packet4f& b)\n{\n  return Packet4f(_mm_movelh_ps(a,b));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f& a, const Packet4f& b)\n{\n  return Packet4f(_mm_movehl_ps(a,b));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f& a, const Packet4f& b)\n{\n  return Packet4f(_mm_unpacklo_ps(a,b));\n}\nEIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f& a, const Packet4f& b)\n{\n  return Packet4f(_mm_unpackhi_ps(a,b));\n}\n#define vec4f_duplane(a,p) \\\n  vec4f_swizzle2(a,a,p,p,p,p)\n\n#define vec2d_swizzle2(a,b,mask) \\\n  Packet2d(_mm_shuffle_pd(a,b,mask))\n\nEIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d& a, const Packet2d& b)\n{\n  return Packet2d(_mm_unpacklo_pd(a,b));\n}\nEIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d& a, const Packet2d& b)\n{\n  return Packet2d(_mm_unpackhi_pd(a,b));\n}\n#define vec2d_duplane(a,p) \\\n  vec2d_swizzle2(a,a,(p<<1)|p)\n\n#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \\\n  const Packet4f p4f_##NAME = pset1<Packet4f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \\\n  const Packet2d p2d_##NAME = pset1<Packet2d>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \\\n  const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \\\n  const Packet4i p4i_##NAME = pset1<Packet4i>(X)\n\n\n// Use the packet_traits defined in AVX/PacketMath.h instead if we're going\n// to leverage AVX instructions.\n#ifndef EIGEN_VECTORIZE_AVX\ntemplate <>\nstruct packet_traits<float> : default_packet_traits {\n  typedef Packet4f type;\n  typedef Packet4f half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,\n\n    HasCmp  = 1,\n    HasDiv = 1,\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasLog = 1,\n    HasLog1p = 1,\n    HasExpm1 = 1,\n    HasNdtri = 1,\n    HasExp = 1,\n    HasBessel = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH,\n    HasBlend = 1,\n    HasCeil = 1,\n    HasFloor = 1,\n#ifdef EIGEN_VECTORIZE_SSE4_1\n    HasRound = 1,\n#endif\n    HasRint = 1\n  };\n};\ntemplate <>\nstruct packet_traits<double> : default_packet_traits {\n  typedef Packet2d type;\n  typedef Packet2d half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=2,\n    HasHalfPacket = 0,\n\n    HasCmp  = 1,\n    HasDiv  = 1,\n    HasLog  = 1,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasBlend = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n#ifdef EIGEN_VECTORIZE_SSE4_1\n    HasRound = 1,\n#endif\n    HasRint = 1\n  };\n};\n#endif\ntemplate<> struct packet_traits<int>    : default_packet_traits\n{\n  typedef Packet4i type;\n  typedef Packet4i half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=4,\n\n    HasShift = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate<> struct packet_traits<bool> : default_packet_traits\n{\n  typedef Packet16b type;\n  typedef Packet16b half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    HasHalfPacket = 0,\n    size=16,\n\n    HasAdd       = 1,\n    HasSub       = 1,\n    HasShift     = 0,\n    HasMul       = 1,\n    HasNegate    = 1,\n    HasAbs       = 0,\n    HasAbs2      = 0,\n    HasMin       = 0,\n    HasMax       = 0,\n    HasConj      = 0,\n    HasSqrt      = 1\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet4f> {\n  typedef float     type;\n  typedef Packet4f  half;\n  typedef Packet4i  integer_packet;\n  enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet2d> {\n  typedef double    type;\n  typedef Packet2d  half;\n  enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet4i> {\n  typedef int       type;\n  typedef Packet4i  half;\n  enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false};\n};\ntemplate<> struct unpacket_traits<Packet16b> {\n  typedef bool       type;\n  typedef Packet16b  half;\n  enum {size=16, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\n};\n\n#ifndef EIGEN_VECTORIZE_AVX\ntemplate<> struct scalar_div_cost<float,true> { enum { value = 7 }; };\ntemplate<> struct scalar_div_cost<double,true> { enum { value = 8 }; };\n#endif\n\n#if EIGEN_COMP_MSVC==1500\n// Workaround MSVC 9 internal compiler error.\n// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode\n// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps1(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }\n#endif\ntemplate<> EIGEN_STRONG_INLINE Packet16b pset1<Packet16b>(const bool&    from) { return _mm_set1_epi8(static_cast<char>(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) { return _mm_castsi128_ps(pset1<Packet4i>(from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from) { return _mm_castsi128_pd(_mm_set1_epi64x(from)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f peven_mask(const Packet4f& /*a*/) { return _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i peven_mask(const Packet4i& /*a*/) { return _mm_set_epi32(0, -1, 0, -1); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d peven_mask(const Packet2d& /*a*/) { return _mm_castsi128_pd(_mm_set_epi32(0, 0, -1, -1)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pzero(const Packet4f& /*a*/) { return _mm_setzero_ps(); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pzero(const Packet2d& /*a*/) { return _mm_setzero_pd(); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pzero(const Packet4i& /*a*/) { return _mm_setzero_si128(); }\n\n// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.\n// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)\n// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.\n// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.\n// Also note that with AVX, we want it to generate a vbroadcastss.\n#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)\ntemplate<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {\n  return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet16b padd<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_or_si128(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b psub<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_xor_si128(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b);\ntemplate<> EIGEN_STRONG_INLINE Packet4f paddsub<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n#ifdef EIGEN_VECTORIZE_SSE3\n  return _mm_addsub_ps(a,b);\n#else\n  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x0,0x80000000,0x0));\n  return padd(a, pxor(mask, b));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& , const Packet2d& );\ntemplate<> EIGEN_STRONG_INLINE Packet2d paddsub<Packet2d>(const Packet2d& a, const Packet2d& b) \n{\n#ifdef EIGEN_VECTORIZE_SSE3  \n  return _mm_addsub_pd(a,b); \n#else\n  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x0)); \n  return padd(a, pxor(mask, b));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)\n{\n  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));\n  return _mm_xor_ps(a,mask);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)\n{\n  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));\n  return _mm_xor_pd(a,mask);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)\n{\n  return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a)\n{\n  return psub(pset1<Packet16b>(false), a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)\n{\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  return _mm_mullo_epi32(a,b);\n#else\n  // this version is slightly faster than 4 scalar products\n  return vec4i_swizzle1(\n            vec4i_swizzle2(\n              _mm_mul_epu32(a,b),\n              _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),\n                            vec4i_swizzle1(b,1,0,3,2)),\n              0,2,0,2),\n            0,2,1,3);\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet16b pmul<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_and_si128(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }\n\n// for some weird raisons, it has to be overloaded for packet of integers\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }\n#ifdef EIGEN_VECTORIZE_FMA\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }\n#endif\n\n#ifdef EIGEN_VECTORIZE_SSE4_1\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {\n  return _mm_blendv_ps(b,a,mask);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) {\n  return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b),_mm_castsi128_ps(a),_mm_castsi128_ps(mask)));\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) {  return _mm_blendv_pd(b,a,mask); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) {\n  return _mm_blendv_epi8(b,a,mask);\n}\n#else\ntemplate<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) {\n  Packet16b a_part = _mm_and_si128(mask, a);\n  Packet16b b_part = _mm_andnot_si128(mask, b);\n  return _mm_or_si128(a_part, b_part);\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& a) { return _mm_cmpeq_epi8(a, a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f\nptrue<Packet4f>(const Packet4f& a) {\n  Packet4i b = _mm_castps_si128(a);\n  return _mm_castsi128_ps(_mm_cmpeq_epi32(b, b));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d\nptrue<Packet2d>(const Packet2d& a) {\n  Packet4i b = _mm_castpd_si128(a);\n  return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b));\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b pand<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_and_si128(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b por<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_or_si128(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b pxor<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_xor_si128(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(b,a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return por(pcmp_lt(a,b), pcmp_eq(a,b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // There appears to be a bug in GCC, by which the optimizer may\n  // flip the argument order in calls to _mm_min_ps, so we have to\n  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,\n  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867\n  #ifdef EIGEN_VECTORIZE_AVX\n  Packet4f res;\n  asm(\"vminps %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  #else\n  Packet4f res = b;\n  asm(\"minps %[a], %[res]\" : [res] \"+x\" (res) : [a] \"x\" (a));\n  #endif\n  return res;\n#else\n  // Arguments are reversed to match NaN propagation behavior of std::min.\n  return _mm_min_ps(b, a);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // There appears to be a bug in GCC, by which the optimizer may\n  // flip the argument order in calls to _mm_min_pd, so we have to\n  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,\n  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867\n  #ifdef EIGEN_VECTORIZE_AVX\n  Packet2d res;\n  asm(\"vminpd %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  #else\n  Packet2d res = b;\n  asm(\"minpd %[a], %[res]\" : [res] \"+x\" (res) : [a] \"x\" (a));\n  #endif\n  return res;\n#else\n  // Arguments are reversed to match NaN propagation behavior of std::min.\n  return _mm_min_pd(b, a);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)\n{\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  return _mm_min_epi32(a,b);\n#else\n  // after some bench, this version *is* faster than a scalar implementation\n  Packet4i mask = _mm_cmplt_epi32(a,b);\n  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));\n#endif\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // There appears to be a bug in GCC, by which the optimizer may\n  // flip the argument order in calls to _mm_max_ps, so we have to\n  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,\n  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867\n  #ifdef EIGEN_VECTORIZE_AVX\n  Packet4f res;\n  asm(\"vmaxps %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  #else\n  Packet4f res = b;\n  asm(\"maxps %[a], %[res]\" : [res] \"+x\" (res) : [a] \"x\" (a));\n  #endif\n  return res;\n#else\n  // Arguments are reversed to match NaN propagation behavior of std::max.\n  return _mm_max_ps(b, a);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {\n#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63\n  // There appears to be a bug in GCC, by which the optimizer may\n  // flip the argument order in calls to _mm_max_pd, so we have to\n  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,\n  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867\n  #ifdef EIGEN_VECTORIZE_AVX\n  Packet2d res;\n  asm(\"vmaxpd %[a], %[b], %[res]\" : [res] \"=x\" (res) : [a] \"x\" (a), [b] \"x\" (b));\n  #else\n  Packet2d res = b;\n  asm(\"maxpd %[a], %[res]\" : [res] \"+x\" (res) : [a] \"x\" (a));\n  #endif\n  return res;\n#else\n  // Arguments are reversed to match NaN propagation behavior of std::max.\n  return _mm_max_pd(b, a);\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)\n{\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  return _mm_max_epi32(a,b);\n#else\n  // after some bench, this version *is* faster than a scalar implementation\n  Packet4i mask = _mm_cmpgt_epi32(a,b);\n  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));\n#endif\n}\n\ntemplate <typename Packet, typename Op>\nEIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet& a, const Packet& b, Op op) {\n  // In this implementation, we take advantage of the fact that pmin/pmax for SSE\n  // always return a if either a or b is NaN.\n  Packet not_nan_mask_a = pcmp_eq(a, a);\n  Packet m = op(a, b);\n  return pselect<Packet>(not_nan_mask_a, m, b);\n}\n\ntemplate <typename Packet, typename Op>\nEIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet& a, const Packet& b, Op op) {\n  // In this implementation, we take advantage of the fact that pmin/pmax for SSE\n  // always return a if either a or b is NaN.\n  Packet not_nan_mask_a = pcmp_eq(a, a);\n  Packet m = op(b, a);\n  return pselect<Packet>(not_nan_mask_a, m, a);\n}\n\n// Add specializations for min/max with prescribed NaN progation.\ntemplate<>\nEIGEN_STRONG_INLINE Packet4f pmin<PropagateNumbers, Packet4f>(const Packet4f& a, const Packet4f& b) {\n  return pminmax_propagate_numbers(a, b, pmin<Packet4f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet2d pmin<PropagateNumbers, Packet2d>(const Packet2d& a, const Packet2d& b) {\n  return pminmax_propagate_numbers(a, b, pmin<Packet2d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4f pmax<PropagateNumbers, Packet4f>(const Packet4f& a, const Packet4f& b) {\n  return pminmax_propagate_numbers(a, b, pmax<Packet4f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet2d pmax<PropagateNumbers, Packet2d>(const Packet2d& a, const Packet2d& b) {\n  return pminmax_propagate_numbers(a, b, pmax<Packet2d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4f pmin<PropagateNaN, Packet4f>(const Packet4f& a, const Packet4f& b) {\n  return pminmax_propagate_nan(a, b, pmin<Packet4f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet2d pmin<PropagateNaN, Packet2d>(const Packet2d& a, const Packet2d& b) {\n  return pminmax_propagate_nan(a, b, pmin<Packet2d>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet4f pmax<PropagateNaN, Packet4f>(const Packet4f& a, const Packet4f& b) {\n  return pminmax_propagate_nan(a, b, pmax<Packet4f>);\n}\ntemplate<>\nEIGEN_STRONG_INLINE Packet2d pmax<PropagateNaN, Packet2d>(const Packet2d& a, const Packet2d& b) {\n  return pminmax_propagate_nan(a, b, pmax<Packet2d>);\n}\n\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) { return _mm_srai_epi32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right   (const Packet4i& a) { return _mm_srli_epi32(a,N); }\ntemplate<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_left    (const Packet4i& a) { return _mm_slli_epi32(a,N); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)\n{\n  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));\n  return _mm_and_ps(a,mask);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)\n{\n  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));\n  return _mm_and_pd(a,mask);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)\n{\n  #ifdef EIGEN_VECTORIZE_SSSE3\n  return _mm_abs_epi32(a);\n  #else\n  Packet4i aux = _mm_srai_epi32(a,31);\n  return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);\n  #endif\n}\n\n#ifdef EIGEN_VECTORIZE_SSE4_1\ntemplate<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)\n{\n  // Unfortunatly _mm_round_ps doesn't have a rounding mode to implement numext::round.\n  const Packet4f mask = pset1frombits<Packet4f>(0x80000000u);\n  const Packet4f prev0dot5 = pset1frombits<Packet4f>(0x3EFFFFFFu);\n  return _mm_round_ps(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a)\n{\n  const Packet2d mask = _mm_castsi128_pd(_mm_set_epi64x(0x8000000000000000ull, 0x8000000000000000ull));\n  const Packet2d prev0dot5 = _mm_castsi128_pd(_mm_set_epi64x(0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull));\n  return _mm_round_pd(padd(por(pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, _MM_FROUND_CUR_DIRECTION); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d print<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {\n  // Adds and subtracts signum(a) * 2^23 to force rounding.\n  const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));\n  const Packet4f abs_a = pabs(a);\n  Packet4f r = padd(abs_a, limit);\n  // Don't compile-away addition and subtraction.\n  EIGEN_OPTIMIZATION_BARRIER(r);\n  r = psub(r, limit);\n  // If greater than limit, simply return a.  Otherwise, account for sign.\n  r = pselect(pcmp_lt(abs_a, limit),\n              pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);\n  return r;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) {\n  // Adds and subtracts signum(a) * 2^52 to force rounding.\n  const Packet2d limit = pset1<Packet2d>(static_cast<double>(1ull<<52));\n  const Packet2d abs_a = pabs(a);\n  Packet2d r = padd(abs_a, limit);\n  // Don't compile-away addition and subtraction.\n  EIGEN_OPTIMIZATION_BARRIER(r);\n  r = psub(r, limit);\n  // If greater than limit, simply return a.  Otherwise, account for sign.\n  r = pselect(pcmp_lt(abs_a, limit),\n              pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);\n  return r;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)\n{\n  const Packet4f cst_1 = pset1<Packet4f>(1.0f);\n  Packet4f tmp  = print<Packet4f>(a);\n  // If greater, subtract one.\n  Packet4f mask = _mm_cmpgt_ps(tmp, a);\n  mask = pand(mask, cst_1);\n  return psub(tmp, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)\n{\n  const Packet2d cst_1 = pset1<Packet2d>(1.0);\n  Packet2d tmp  = print<Packet2d>(a);\n  // If greater, subtract one.\n  Packet2d mask = _mm_cmpgt_pd(tmp, a);\n  mask = pand(mask, cst_1);\n  return psub(tmp, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)\n{\n  const Packet4f cst_1 = pset1<Packet4f>(1.0f);\n  Packet4f tmp  = print<Packet4f>(a);\n  // If smaller, add one.\n  Packet4f mask = _mm_cmplt_ps(tmp, a);\n  mask = pand(mask, cst_1);\n  return padd(tmp, mask);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a)\n{\n  const Packet2d cst_1 = pset1<Packet2d>(1.0);\n  Packet2d tmp  = print<Packet2d>(a);\n  // If smaller, add one.\n  Packet2d mask = _mm_cmplt_pd(tmp, a);\n  mask = pand(mask, cst_1);\n  return padd(tmp, mask);\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double*  from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b pload<Packet16b>(const bool*     from) { EIGEN_DEBUG_ALIGNED_LOAD return  _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }\n\n#if EIGEN_COMP_MSVC\n  template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*  from) {\n    EIGEN_DEBUG_UNALIGNED_LOAD\n    #if (EIGEN_COMP_MSVC==1600)\n    // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps\n    // (i.e., it does not generate an unaligned load!!\n    __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));\n    res = _mm_loadh_pi(res, (const __m64*)(from+2));\n    return res;\n    #else\n    return _mm_loadu_ps(from);\n    #endif\n  }\n#else\n// NOTE: with the code below, MSVC's compiler crashes!\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)\n{\n  EIGEN_DEBUG_UNALIGNED_LOAD\n  return _mm_loadu_ps(from);\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)\n{\n  EIGEN_DEBUG_UNALIGNED_LOAD\n  return _mm_loadu_pd(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)\n{\n  EIGEN_DEBUG_UNALIGNED_LOAD\n  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));\n}\ntemplate<> EIGEN_STRONG_INLINE Packet16b ploadu<Packet16b>(const bool*     from) {\n  EIGEN_DEBUG_UNALIGNED_LOAD\n  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)\n{\n  return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*  from)\n{ return pset1<Packet2d>(from[0]); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)\n{\n  Packet4i tmp;\n  tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));\n  return vec4i_swizzle1(tmp, 0, 0, 1, 1);\n}\n\n// Loads 8 bools from memory and returns the packet\n// {b0, b0, b1, b1, b2, b2, b3, b3, b4, b4, b5, b5, b6, b6, b7, b7}\ntemplate<> EIGEN_STRONG_INLINE Packet16b ploaddup<Packet16b>(const bool*     from)\n{\n  __m128i tmp = _mm_castpd_si128(pload1<Packet2d>(reinterpret_cast<const double*>(from)));\n  return  _mm_unpacklo_epi8(tmp, tmp);\n}\n\n// Loads 4 bools from memory and returns the packet\n// {b0, b0  b0, b0, b1, b1, b1, b1, b2, b2, b2, b2, b3, b3, b3, b3}\ntemplate<> EIGEN_STRONG_INLINE Packet16b\nploadquad<Packet16b>(const bool* from) {\n  __m128i tmp = _mm_castps_si128(pload1<Packet4f>(reinterpret_cast<const float*>(from)));\n  tmp = _mm_unpacklo_epi8(tmp, tmp);\n  return  _mm_unpacklo_epi16(tmp, tmp);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }\ntemplate<> EIGEN_STRONG_INLINE void pstore<bool>(bool*     to, const Packet16b& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<bool>(bool*     to, const Packet16b& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)\n{\n return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)\n{\n return _mm_set_pd(from[1*stride], from[0*stride]);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)\n{\n return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet16b pgather<bool, Packet16b>(const bool* from, Index stride)\n{\n  return _mm_set_epi8(from[15*stride], from[14*stride], from[13*stride], from[12*stride],\n                      from[11*stride], from[10*stride], from[9*stride], from[8*stride],\n                      from[7*stride], from[6*stride], from[5*stride], from[4*stride],\n                      from[3*stride], from[2*stride], from[1*stride], from[0*stride]);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)\n{\n  to[stride*0] = _mm_cvtss_f32(from);\n  to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));\n  to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));\n  to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)\n{\n  to[stride*0] = _mm_cvtsd_f64(from);\n  to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)\n{\n  to[stride*0] = _mm_cvtsi128_si32(from);\n  to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));\n  to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));\n  to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride)\n{\n  to[4*stride*0] = _mm_cvtsi128_si32(from);\n  to[4*stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));\n  to[4*stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));\n  to[4*stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));\n}\n\n\n// some compilers might be tempted to perform multiple moves instead of using a vector path.\ntemplate<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)\n{\n  Packet4f pa = _mm_set_ss(a);\n  pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));\n}\n// some compilers might be tempted to perform multiple moves instead of using a vector path.\ntemplate<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)\n{\n  Packet2d pa = _mm_set_sd(a);\n  pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));\n}\n\n#if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900\ntypedef const void * SsePrefetchPtrType;\n#else\ntypedef const char * SsePrefetchPtrType;\n#endif\n\n#ifndef EIGEN_VECTORIZE_AVX\ntemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }\n#endif\n\n#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64\n// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010\n// Direct of the struct members fixed bug #62.\ntemplate<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }\ntemplate<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }\ntemplate<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }\n#elif EIGEN_COMP_MSVC_STRICT\n// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010\ntemplate<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }\ntemplate<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }\ntemplate<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }\n#else\ntemplate<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }\ntemplate<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }\ntemplate<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }\n#endif\ntemplate<> EIGEN_STRONG_INLINE bool   pfirst<Packet16b>(const Packet16b& a) { int x = _mm_cvtsi128_si32(a); return static_cast<bool>(x & 1); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return _mm_shuffle_ps(a,a,0x1B); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return _mm_shuffle_pd(a,a,0x1); }\ntemplate<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return _mm_shuffle_epi32(a,0x1B); }\ntemplate<> EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) {\n#ifdef EIGEN_VECTORIZE_SSSE3\n  __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);\n  return _mm_shuffle_epi8(a, mask);\n#else\n  Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));\n  tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));\n  return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));\n#endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {\n  return pfrexp_generic(a,exponent);\n}\n\n// Extract exponent without existence of Packet2l.\ntemplate<>\nEIGEN_STRONG_INLINE  \nPacket2d pfrexp_generic_get_biased_exponent(const Packet2d& a) {\n  const Packet2d cst_exp_mask  = pset1frombits<Packet2d>(static_cast<uint64_t>(0x7ff0000000000000ull));\n  __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(pand(a, cst_exp_mask)), 52);\n  return _mm_cvtepi32_pd(vec4i_swizzle1(a_expo, 0, 2, 1, 3));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d>(const Packet2d& a, Packet2d& exponent) {\n  return pfrexp_generic(a, exponent);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {\n  return pldexp_generic(a,exponent);\n}\n\n// We specialize pldexp here, since the generic implementation uses Packet2l, which is not well\n// supported by SSE, and has more range than is needed for exponents.\ntemplate<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {\n  // Clamp exponent to [-2099, 2099]\n  const Packet2d max_exponent = pset1<Packet2d>(2099.0);\n  const Packet2d e = pmin(pmax(exponent, pnegate(max_exponent)), max_exponent);\n  \n  // Convert e to integer and swizzle to low-order bits.\n  const Packet4i ei = vec4i_swizzle1(_mm_cvtpd_epi32(e), 0, 3, 1, 3);\n  \n  // Split 2^e into four factors and multiply:\n  const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);\n  Packet4i b = parithmetic_shift_right<2>(ei);  // floor(e/4)\n  Packet2d c = _mm_castsi128_pd(_mm_slli_epi64(padd(b, bias), 52));  // 2^b\n  Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)\n  b = psub(psub(psub(ei, b), b), b);  // e - 3b\n  c = _mm_castsi128_pd(_mm_slli_epi64(padd(b, bias), 52));  // 2^(e - 3b)\n  out = pmul(out, c);  // a * 2^e\n  return out;\n}\n\n// with AVX, the default implementations based on pload1 are faster\n#ifndef __AVX__\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet4f>(const float *a,\n                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)\n{\n  a3 = pload<Packet4f>(a);\n  a0 = vec4f_swizzle1(a3, 0,0,0,0);\n  a1 = vec4f_swizzle1(a3, 1,1,1,1);\n  a2 = vec4f_swizzle1(a3, 2,2,2,2);\n  a3 = vec4f_swizzle1(a3, 3,3,3,3);\n}\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet2d>(const double *a,\n                      Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)\n{\n#ifdef EIGEN_VECTORIZE_SSE3\n  a0 = _mm_loaddup_pd(a+0);\n  a1 = _mm_loaddup_pd(a+1);\n  a2 = _mm_loaddup_pd(a+2);\n  a3 = _mm_loaddup_pd(a+3);\n#else\n  a1 = pload<Packet2d>(a);\n  a0 = vec2d_swizzle1(a1, 0,0);\n  a1 = vec2d_swizzle1(a1, 1,1);\n  a3 = pload<Packet2d>(a+2);\n  a2 = vec2d_swizzle1(a3, 0,0);\n  a3 = vec2d_swizzle1(a3, 1,1);\n#endif\n}\n#endif\n\nEIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)\n{\n  vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));\n  vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));\n  vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));\n  vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)\n{\n  // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures\n  // (from Nehalem to Haswell)\n// #ifdef EIGEN_VECTORIZE_SSE3\n//   Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));\n//   return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));\n// #else\n  Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));\n  return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));\n// #endif\n}\n\ntemplate<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)\n{\n  // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures\n  // (from Nehalem to Haswell)\n// #ifdef EIGEN_VECTORIZE_SSE3\n//   return pfirst<Packet2d>(_mm_hadd_pd(a, a));\n// #else\n  return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));\n// #endif\n}\n\n#ifdef EIGEN_VECTORIZE_SSSE3\ntemplate<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)\n{\n  Packet4i tmp0 = _mm_hadd_epi32(a,a);\n  return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));\n}\n\n#else\ntemplate<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)\n{\n  Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));\n  return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE bool predux<Packet16b>(const Packet16b& a) {\n  Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));\n  return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);\n}\n\n// Other reduction functions:\n\n\n// mul\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)\n{\n  Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));\n  return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));\n}\ntemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)\n{\n  return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));\n}\ntemplate<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)\n{\n  // after some experiments, it is seems this is the fastest way to implement it\n  // for GCC (eg., reusing pmul is very slow !)\n  // TODO try to call _mm_mul_epu32 directly\n  EIGEN_ALIGN16 int aux[4];\n  pstore(aux, a);\n  return  (aux[0] * aux[1]) * (aux[2] * aux[3]);\n}\n\ntemplate<> EIGEN_STRONG_INLINE bool predux_mul<Packet16b>(const Packet16b& a) {\n  Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a,a));\n  return ((pfirst<Packet4i>(tmp) == 0x01010101) &&\n          (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));\n}\n\n// min\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)\n{\n  Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));\n  return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));\n}\ntemplate<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)\n{\n  return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));\n}\ntemplate<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)\n{\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));\n  return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));\n#else\n  // after some experiments, it is seems this is the fastest way to implement it\n  // for GCC (eg., it does not like using std::min after the pstore !!)\n  EIGEN_ALIGN16 int aux[4];\n  pstore(aux, a);\n  int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];\n  int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];\n  return aux0<aux2 ? aux0 : aux2;\n#endif // EIGEN_VECTORIZE_SSE4_1\n}\n\n// max\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)\n{\n  Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));\n  return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));\n}\ntemplate<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)\n{\n  return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));\n}\ntemplate<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)\n{\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));\n  return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));\n#else\n  // after some experiments, it is seems this is the fastest way to implement it\n  // for GCC (eg., it does not like using std::min after the pstore !!)\n  EIGEN_ALIGN16 int aux[4];\n  pstore(aux, a);\n  int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];\n  int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];\n  return aux0>aux2 ? aux0 : aux2;\n#endif // EIGEN_VECTORIZE_SSE4_1\n}\n\n// not needed yet\n// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet4f& x)\n// {\n//   return _mm_movemask_ps(x) == 0xF;\n// }\n\ntemplate<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)\n{\n  return _mm_movemask_ps(x) != 0x0;\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4f,4>& kernel) {\n  _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet2d,2>& kernel) {\n  __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);\n  kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);\n  kernel.packet[1] = tmp;\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4i,4>& kernel) {\n  __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);\n  __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);\n  __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);\n  __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);\n\n  kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);\n  kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);\n  kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);\n  kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet16b,4>& kernel) {\n  __m128i T0 =  _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]);\n  __m128i T1 =  _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]);\n  __m128i T2 =  _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]);\n  __m128i T3 =  _mm_unpackhi_epi8(kernel.packet[2], kernel.packet[3]);\n  kernel.packet[0] = _mm_unpacklo_epi16(T0, T2);\n  kernel.packet[1] = _mm_unpackhi_epi16(T0, T2);\n  kernel.packet[2] = _mm_unpacklo_epi16(T1, T3);\n  kernel.packet[3] = _mm_unpackhi_epi16(T1, T3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet16b,16>& kernel) {\n  // If we number the elements in the input thus:\n  // kernel.packet[ 0] = {00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f}\n  // kernel.packet[ 1] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f}\n  // ...\n  // kernel.packet[15] = {f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff},\n  //\n  // the desired output is:\n  // kernel.packet[ 0] = {00, 10, 20, 30, 40, 50, 60, 70, 80, 90, a0, b0, c0, d0, e0, f0}\n  // kernel.packet[ 1] = {01, 11, 21, 31, 41, 51, 61, 71, 81, 91, a1, b1, c1, d1, e1, f1}\n  // ...\n  // kernel.packet[15] = {0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, af, bf, cf, df, ef, ff},\n  __m128i t0 =  _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17\n  __m128i t1 =  _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]); // 08 18 09 19 0a 1a 0b 1b 0c 1c 0d 1d 0e 1e 0f 1f\n  __m128i t2 =  _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]); // 20 30 21 31 22 32 ...                     27 37\n  __m128i t3 =  _mm_unpackhi_epi8(kernel.packet[2], kernel.packet[3]); // 28 38 29 39 2a 3a ...                     2f 3f\n  __m128i t4 =  _mm_unpacklo_epi8(kernel.packet[4], kernel.packet[5]); // 40 50 41 51 42 52                         47 57\n  __m128i t5 =  _mm_unpackhi_epi8(kernel.packet[4], kernel.packet[5]); // 48 58 49 59 4a 5a\n  __m128i t6 =  _mm_unpacklo_epi8(kernel.packet[6], kernel.packet[7]);\n  __m128i t7 =  _mm_unpackhi_epi8(kernel.packet[6], kernel.packet[7]);\n  __m128i t8 =  _mm_unpacklo_epi8(kernel.packet[8], kernel.packet[9]);\n  __m128i t9 =  _mm_unpackhi_epi8(kernel.packet[8], kernel.packet[9]);\n  __m128i ta =  _mm_unpacklo_epi8(kernel.packet[10], kernel.packet[11]);\n  __m128i tb =  _mm_unpackhi_epi8(kernel.packet[10], kernel.packet[11]);\n  __m128i tc =  _mm_unpacklo_epi8(kernel.packet[12], kernel.packet[13]);\n  __m128i td =  _mm_unpackhi_epi8(kernel.packet[12], kernel.packet[13]);\n  __m128i te =  _mm_unpacklo_epi8(kernel.packet[14], kernel.packet[15]);\n  __m128i tf =  _mm_unpackhi_epi8(kernel.packet[14], kernel.packet[15]);\n\n  __m128i s0 =  _mm_unpacklo_epi16(t0, t2); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33\n  __m128i s1 =  _mm_unpackhi_epi16(t0, t2); // 04 14 24 34\n  __m128i s2 =  _mm_unpacklo_epi16(t1, t3); // 08 18 28 38 ...\n  __m128i s3 =  _mm_unpackhi_epi16(t1, t3); // 0c 1c 2c 3c ...\n  __m128i s4 =  _mm_unpacklo_epi16(t4, t6); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73\n  __m128i s5 =  _mm_unpackhi_epi16(t4, t6); // 44 54 64 74 ...\n  __m128i s6 =  _mm_unpacklo_epi16(t5, t7);\n  __m128i s7 =  _mm_unpackhi_epi16(t5, t7);\n  __m128i s8 =  _mm_unpacklo_epi16(t8, ta);\n  __m128i s9 =  _mm_unpackhi_epi16(t8, ta);\n  __m128i sa =  _mm_unpacklo_epi16(t9, tb);\n  __m128i sb =  _mm_unpackhi_epi16(t9, tb);\n  __m128i sc =  _mm_unpacklo_epi16(tc, te);\n  __m128i sd =  _mm_unpackhi_epi16(tc, te);\n  __m128i se =  _mm_unpacklo_epi16(td, tf);\n  __m128i sf =  _mm_unpackhi_epi16(td, tf);\n\n  __m128i u0 =  _mm_unpacklo_epi32(s0, s4); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71\n  __m128i u1 =  _mm_unpackhi_epi32(s0, s4); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73\n  __m128i u2 =  _mm_unpacklo_epi32(s1, s5);\n  __m128i u3 =  _mm_unpackhi_epi32(s1, s5);\n  __m128i u4 =  _mm_unpacklo_epi32(s2, s6);\n  __m128i u5 =  _mm_unpackhi_epi32(s2, s6);\n  __m128i u6 =  _mm_unpacklo_epi32(s3, s7);\n  __m128i u7 =  _mm_unpackhi_epi32(s3, s7);\n  __m128i u8 =  _mm_unpacklo_epi32(s8, sc);\n  __m128i u9 =  _mm_unpackhi_epi32(s8, sc);\n  __m128i ua =  _mm_unpacklo_epi32(s9, sd);\n  __m128i ub =  _mm_unpackhi_epi32(s9, sd);\n  __m128i uc =  _mm_unpacklo_epi32(sa, se);\n  __m128i ud =  _mm_unpackhi_epi32(sa, se);\n  __m128i ue =  _mm_unpacklo_epi32(sb, sf);\n  __m128i uf =  _mm_unpackhi_epi32(sb, sf);\n\n  kernel.packet[0]  = _mm_unpacklo_epi64(u0, u8);\n  kernel.packet[1]  = _mm_unpackhi_epi64(u0, u8);\n  kernel.packet[2]  = _mm_unpacklo_epi64(u1, u9);\n  kernel.packet[3]  = _mm_unpackhi_epi64(u1, u9);\n  kernel.packet[4]  = _mm_unpacklo_epi64(u2, ua);\n  kernel.packet[5]  = _mm_unpackhi_epi64(u2, ua);\n  kernel.packet[6]  = _mm_unpacklo_epi64(u3, ub);\n  kernel.packet[7]  = _mm_unpackhi_epi64(u3, ub);\n  kernel.packet[8]  = _mm_unpacklo_epi64(u4, uc);\n  kernel.packet[9]  = _mm_unpackhi_epi64(u4, uc);\n  kernel.packet[10] = _mm_unpacklo_epi64(u5, ud);\n  kernel.packet[11] = _mm_unpackhi_epi64(u5, ud);\n  kernel.packet[12] = _mm_unpacklo_epi64(u6, ue);\n  kernel.packet[13] = _mm_unpackhi_epi64(u6, ue);\n  kernel.packet[14] = _mm_unpacklo_epi64(u7, uf);\n  kernel.packet[15] = _mm_unpackhi_epi64(u7, uf);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {\n  const __m128i zero = _mm_setzero_si128();\n  const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);\n  __m128i false_mask = _mm_cmpeq_epi32(select, zero);\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);\n#else\n  return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {\n  const __m128 zero = _mm_setzero_ps();\n  const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);\n  __m128 false_mask = _mm_cmpeq_ps(select, zero);\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  return _mm_blendv_ps(thenPacket, elsePacket, false_mask);\n#else\n  return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));\n#endif\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {\n  const __m128d zero = _mm_setzero_pd();\n  const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);\n  __m128d false_mask = _mm_cmpeq_pd(select, zero);\n#ifdef EIGEN_VECTORIZE_SSE4_1\n  return _mm_blendv_pd(thenPacket, elsePacket, false_mask);\n#else\n  return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));\n#endif\n}\n\n// Scalar path for pmadd with FMA to ensure consistency with vectorized path.\n#ifdef EIGEN_VECTORIZE_FMA\ntemplate<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {\n  return ::fmaf(a,b,c);\n}\ntemplate<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {\n  return ::fma(a,b,c);\n}\n#endif\n\n\n// Packet math for Eigen::half\n// Disable the following code since it's broken on too many platforms / compilers.\n//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)\n#if 0\n\ntypedef struct {\n  __m64 x;\n} Packet4h;\n\n\ntemplate<> struct is_arithmetic<Packet4h> { enum { value = true }; };\n\ntemplate <>\nstruct packet_traits<Eigen::half> : default_packet_traits {\n  typedef Packet4h type;\n  // There is no half-size packet for Packet4h.\n  typedef Packet4h half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 0,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasConj   = 0,\n    HasSetLinear = 0,\n    HasSqrt = 0,\n    HasRsqrt = 0,\n    HasExp = 0,\n    HasLog = 0,\n    HasBlend = 0\n  };\n};\n\n\ntemplate<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h half; };\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {\n  Packet4h result;\n  result.x = _mm_set1_pi16(from.x);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h>(const Packet4h& from) {\n  return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm_cvtsi64_si32(from.x)));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pconj(const Packet4h& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h padd<Packet4h>(const Packet4h& a, const Packet4h& b) {\n  __int64_t a64 = _mm_cvtm64_si64(a.x);\n  __int64_t b64 = _mm_cvtm64_si64(b.x);\n\n  Eigen::half h[4];\n\n  Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));\n  Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));\n  h[0] = ha + hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));\n  h[1] = ha + hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));\n  h[2] = ha + hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));\n  h[3] = ha + hb;\n  Packet4h result;\n  result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h psub<Packet4h>(const Packet4h& a, const Packet4h& b) {\n  __int64_t a64 = _mm_cvtm64_si64(a.x);\n  __int64_t b64 = _mm_cvtm64_si64(b.x);\n\n  Eigen::half h[4];\n\n  Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));\n  Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));\n  h[0] = ha - hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));\n  h[1] = ha - hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));\n  h[2] = ha - hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));\n  h[3] = ha - hb;\n  Packet4h result;\n  result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const Packet4h& a, const Packet4h& b) {\n  __int64_t a64 = _mm_cvtm64_si64(a.x);\n  __int64_t b64 = _mm_cvtm64_si64(b.x);\n\n  Eigen::half h[4];\n\n  Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));\n  Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));\n  h[0] = ha * hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));\n  h[1] = ha * hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));\n  h[2] = ha * hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));\n  h[3] = ha * hb;\n  Packet4h result;\n  result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pdiv<Packet4h>(const Packet4h& a, const Packet4h& b) {\n  __int64_t a64 = _mm_cvtm64_si64(a.x);\n  __int64_t b64 = _mm_cvtm64_si64(b.x);\n\n  Eigen::half h[4];\n\n  Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));\n  Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));\n  h[0] = ha / hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));\n  h[1] = ha / hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));\n  h[2] = ha / hb;\n  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));\n  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));\n  h[3] = ha / hb;\n  Packet4h result;\n  result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const Eigen::half* from) {\n  Packet4h result;\n  result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h ploadu<Packet4h>(const Eigen::half* from) {\n  Packet4h result;\n  result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4h& from) {\n  __int64_t r = _mm_cvtm64_si64(from.x);\n  *(reinterpret_cast<__int64_t*>(to)) = r;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4h& from) {\n  __int64_t r = _mm_cvtm64_si64(from.x);\n  *(reinterpret_cast<__int64_t*>(to)) = r;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h\nploadquad<Packet4h>(const Eigen::half* from) {\n  return pset1<Packet4h>(*from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pgather<Eigen::half, Packet4h>(const Eigen::half* from, Index stride)\n{\n  Packet4h result;\n  result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);\n  return result;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h>(Eigen::half* to, const Packet4h& from, Index stride)\n{\n  __int64_t a = _mm_cvtm64_si64(from.x);\n  to[stride*0].x = static_cast<unsigned short>(a);\n  to[stride*1].x = static_cast<unsigned short>(a >> 16);\n  to[stride*2].x = static_cast<unsigned short>(a >> 32);\n  to[stride*3].x = static_cast<unsigned short>(a >> 48);\n}\n\nEIGEN_STRONG_INLINE void\nptranspose(PacketBlock<Packet4h,4>& kernel) {\n  __m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x);\n  __m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x);\n  __m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x);\n  __m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x);\n\n  kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1);\n  kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1);\n  kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3);\n  kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3);\n}\n\n#endif\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900\n// PGI++ does not define the following intrinsics in C++ mode.\nstatic inline __m128  _mm_castpd_ps   (__m128d x) { return reinterpret_cast<__m128&>(x);  }\nstatic inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }\nstatic inline __m128d _mm_castps_pd   (__m128  x) { return reinterpret_cast<__m128d&>(x); }\nstatic inline __m128i _mm_castps_si128(__m128  x) { return reinterpret_cast<__m128i&>(x); }\nstatic inline __m128  _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x);  }\nstatic inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }\n#endif\n\n#endif // EIGEN_PACKET_MATH_SSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SSE/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TYPE_CASTING_SSE_H\n#define EIGEN_TYPE_CASTING_SSE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_VECTORIZE_AVX\ntemplate <>\nstruct type_casting_traits<float, int> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<int, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<double, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 2,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate <>\nstruct type_casting_traits<float, double> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 2\n  };\n};\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {\n  return _mm_cvttps_epi32(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {\n  return _mm_cvtepi32_ps(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {\n  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {\n  // Simply discard the second half of the input\n  return _mm_cvtps_pd(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {\n  return _mm_castps_si128(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Packet4i& a) {\n  return _mm_castsi128_ps(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d,Packet4i>(const Packet4i& a) {\n  return _mm_castsi128_pd(a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet2d>(const Packet2d& a) {\n  return _mm_castpd_si128(a);\n}\n\n// Disable the following code since it's broken on too many platforms / compilers.\n//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)\n#if 0\n\ntemplate <>\nstruct type_casting_traits<Eigen::half, float> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {\n  __int64_t a64 = _mm_cvtm64_si64(a.x);\n  Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));\n  float f1 = static_cast<float>(h);\n  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));\n  float f2 = static_cast<float>(h);\n  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));\n  float f3 = static_cast<float>(h);\n  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));\n  float f4 = static_cast<float>(h);\n  return _mm_set_ps(f4, f3, f2, f1);\n}\n\ntemplate <>\nstruct type_casting_traits<float, Eigen::half> {\n  enum {\n    VectorizedCast = 1,\n    SrcCoeffRatio = 1,\n    TgtCoeffRatio = 1\n  };\n};\n\ntemplate<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {\n  EIGEN_ALIGN16 float aux[4];\n  pstore(aux, a);\n  Eigen::half h0(aux[0]);\n  Eigen::half h1(aux[1]);\n  Eigen::half h2(aux[2]);\n  Eigen::half h3(aux[3]);\n\n  Packet4h result;\n  result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);\n  return result;\n}\n\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TYPE_CASTING_SSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SVE/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2020, Arm Limited and Contributors\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATH_FUNCTIONS_SVE_H\n#define EIGEN_MATH_FUNCTIONS_SVE_H\n\nnamespace Eigen {\nnamespace internal {\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf pexp<PacketXf>(const PacketXf& x) {\n  return pexp_float(x);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf plog<PacketXf>(const PacketXf& x) {\n  return plog_float(x);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf psin<PacketXf>(const PacketXf& x) {\n  return psin_float(x);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf pcos<PacketXf>(const PacketXf& x) {\n  return pcos_float(x);\n}\n\n// Hyperbolic Tangent function.\ntemplate <>\nEIGEN_STRONG_INLINE EIGEN_UNUSED PacketXf ptanh<PacketXf>(const PacketXf& x) {\n  return internal::generic_fast_tanh_float(x);\n}\n}  // end namespace internal\n}  // end namespace Eigen\n\n#endif  // EIGEN_MATH_FUNCTIONS_SVE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SVE/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2020, Arm Limited and Contributors\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_SVE_H\n#define EIGEN_PACKET_MATH_SVE_H\n\nnamespace Eigen\n{\nnamespace internal\n{\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8\n#endif\n\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32\n\ntemplate <typename Scalar, int SVEVectorLength>\nstruct sve_packet_size_selector {\n  enum { size = SVEVectorLength / (sizeof(Scalar) * CHAR_BIT) };\n};\n\n/********************************* int32 **************************************/\ntypedef svint32_t PacketXi __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)));\n\ntemplate <>\nstruct packet_traits<numext::int32_t> : default_packet_traits {\n  typedef PacketXi type;\n  typedef PacketXi half;  // Half not implemented yet\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,\n    HasHalfPacket = 0,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasShift = 1,\n    HasMul = 1,\n    HasNegate = 1,\n    HasAbs = 1,\n    HasArg = 0,\n    HasAbs2 = 1,\n    HasMin = 1,\n    HasMax = 1,\n    HasConj = 1,\n    HasSetLinear = 0,\n    HasBlend = 0,\n    HasReduxp = 0  // Not implemented in SVE\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<PacketXi> {\n  typedef numext::int32_t type;\n  typedef PacketXi half;  // Half not yet implemented\n  enum {\n    size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,\n    alignment = Aligned64,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE void prefetch<numext::int32_t>(const numext::int32_t* addr)\n{\n  svprfw(svptrue_b32(), addr, SV_PLDL1KEEP);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pset1<PacketXi>(const numext::int32_t& from)\n{\n  return svdup_n_s32(from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi plset<PacketXi>(const numext::int32_t& a)\n{\n  numext::int32_t c[packet_traits<numext::int32_t>::size];\n  for (int i = 0; i < packet_traits<numext::int32_t>::size; i++) c[i] = i;\n  return svadd_s32_z(svptrue_b32(), pset1<PacketXi>(a), svld1_s32(svptrue_b32(), c));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi padd<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svadd_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi psub<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svsub_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a)\n{\n  return svneg_s32_z(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pconj(const PacketXi& a)\n{\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pmul<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svmul_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pdiv<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svdiv_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pmadd(const PacketXi& a, const PacketXi& b, const PacketXi& c)\n{\n  return svmla_s32_z(svptrue_b32(), c, a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pmin<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svmin_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pmax<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svmax_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pcmp_le<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pcmp_lt<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pcmp_eq<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svdup_n_s32_z(svcmpeq_s32(svptrue_b32(), a, b), 0xffffffffu);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi ptrue<PacketXi>(const PacketXi& /*a*/)\n{\n  return svdup_n_s32_z(svptrue_b32(), 0xffffffffu);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pzero<PacketXi>(const PacketXi& /*a*/)\n{\n  return svdup_n_s32_z(svptrue_b32(), 0);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pand<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svand_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi por<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svorr_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pxor<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return sveor_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pandnot<PacketXi>(const PacketXi& a, const PacketXi& b)\n{\n  return svbic_s32_z(svptrue_b32(), a, b);\n}\n\ntemplate <int N>\nEIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a)\n{\n  return svasrd_n_s32_z(svptrue_b32(), a, N);\n}\n\ntemplate <int N>\nEIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a)\n{\n  return svreinterpret_s32_u32(svlsr_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), svdup_n_u32_z(svptrue_b32(), N)));\n}\n\ntemplate <int N>\nEIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a)\n{\n  return svlsl_s32_z(svptrue_b32(), a, svdup_n_u32_z(svptrue_b32(), N));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pload<PacketXi>(const numext::int32_t* from)\n{\n  EIGEN_DEBUG_ALIGNED_LOAD return svld1_s32(svptrue_b32(), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi ploadu<PacketXi>(const numext::int32_t* from)\n{\n  EIGEN_DEBUG_UNALIGNED_LOAD return svld1_s32(svptrue_b32(), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi ploaddup<PacketXi>(const numext::int32_t* from)\n{\n  svuint32_t indices = svindex_u32(0, 1);  // index {base=0, base+step=1, base+step*2, ...}\n  indices = svzip1_u32(indices, indices);  // index in the format {a0, a0, a1, a1, a2, a2, ...}\n  return svld1_gather_u32index_s32(svptrue_b32(), from, indices);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi ploadquad<PacketXi>(const numext::int32_t* from)\n{\n  svuint32_t indices = svindex_u32(0, 1);  // index {base=0, base+step=1, base+step*2, ...}\n  indices = svzip1_u32(indices, indices);  // index in the format {a0, a0, a1, a1, a2, a2, ...}\n  indices = svzip1_u32(indices, indices);  // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}\n  return svld1_gather_u32index_s32(svptrue_b32(), from, indices);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<numext::int32_t>(numext::int32_t* to, const PacketXi& from)\n{\n  EIGEN_DEBUG_ALIGNED_STORE svst1_s32(svptrue_b32(), to, from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<numext::int32_t>(numext::int32_t* to, const PacketXi& from)\n{\n  EIGEN_DEBUG_UNALIGNED_STORE svst1_s32(svptrue_b32(), to, from);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline PacketXi pgather<numext::int32_t, PacketXi>(const numext::int32_t* from, Index stride)\n{\n  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}\n  svint32_t indices = svindex_s32(0, stride);\n  return svld1_gather_s32index_s32(svptrue_b32(), from, indices);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<numext::int32_t, PacketXi>(numext::int32_t* to, const PacketXi& from, Index stride)\n{\n  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}\n  svint32_t indices = svindex_s32(0, stride);\n  svst1_scatter_s32index_s32(svptrue_b32(), to, indices, from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE numext::int32_t pfirst<PacketXi>(const PacketXi& a)\n{\n  // svlasta returns the first element if all predicate bits are 0\n  return svlasta_s32(svpfalse_b(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a)\n{\n  return svrev_s32(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pabs(const PacketXi& a)\n{\n  return svabs_s32_z(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE numext::int32_t predux<PacketXi>(const PacketXi& a)\n{\n  return static_cast<numext::int32_t>(svaddv_s32(svptrue_b32(), a));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE numext::int32_t predux_mul<PacketXi>(const PacketXi& a)\n{\n  EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0),\n                      EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);\n\n  // Multiply the vector by its reverse\n  svint32_t prod = svmul_s32_z(svptrue_b32(), a, svrev_s32(a));\n  svint32_t half_prod;\n\n  // Extract the high half of the vector. Depending on the VL more reductions need to be done\n  if (EIGEN_ARM64_SVE_VL >= 2048) {\n    half_prod = svtbl_s32(prod, svindex_u32(32, 1));\n    prod = svmul_s32_z(svptrue_b32(), prod, half_prod);\n  }\n  if (EIGEN_ARM64_SVE_VL >= 1024) {\n    half_prod = svtbl_s32(prod, svindex_u32(16, 1));\n    prod = svmul_s32_z(svptrue_b32(), prod, half_prod);\n  }\n  if (EIGEN_ARM64_SVE_VL >= 512) {\n    half_prod = svtbl_s32(prod, svindex_u32(8, 1));\n    prod = svmul_s32_z(svptrue_b32(), prod, half_prod);\n  }\n  if (EIGEN_ARM64_SVE_VL >= 256) {\n    half_prod = svtbl_s32(prod, svindex_u32(4, 1));\n    prod = svmul_s32_z(svptrue_b32(), prod, half_prod);\n  }\n  // Last reduction\n  half_prod = svtbl_s32(prod, svindex_u32(2, 1));\n  prod = svmul_s32_z(svptrue_b32(), prod, half_prod);\n\n  // The reduction is done to the first element.\n  return pfirst<PacketXi>(prod);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE numext::int32_t predux_min<PacketXi>(const PacketXi& a)\n{\n  return svminv_s32(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE numext::int32_t predux_max<PacketXi>(const PacketXi& a)\n{\n  return svmaxv_s32(svptrue_b32(), a);\n}\n\ntemplate <int N>\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXi, N>& kernel) {\n  int buffer[packet_traits<numext::int32_t>::size * N] = {0};\n  int i = 0;\n\n  PacketXi stride_index = svindex_s32(0, N);\n\n  for (i = 0; i < N; i++) {\n    svst1_scatter_s32index_s32(svptrue_b32(), buffer + i, stride_index, kernel.packet[i]);\n  }\n  for (i = 0; i < N; i++) {\n    kernel.packet[i] = svld1_s32(svptrue_b32(), buffer + i * packet_traits<numext::int32_t>::size);\n  }\n}\n\n/********************************* float32 ************************************/\n\ntypedef svfloat32_t PacketXf __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)));\n\ntemplate <>\nstruct packet_traits<float> : default_packet_traits {\n  typedef PacketXf type;\n  typedef PacketXf half;\n\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,\n    HasHalfPacket = 0,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasShift = 1,\n    HasMul = 1,\n    HasNegate = 1,\n    HasAbs = 1,\n    HasArg = 0,\n    HasAbs2 = 1,\n    HasMin = 1,\n    HasMax = 1,\n    HasConj = 1,\n    HasSetLinear = 0,\n    HasBlend = 0,\n    HasReduxp = 0,  // Not implemented in SVE\n\n    HasDiv = 1,\n    HasFloor = 1,\n\n    HasSin = EIGEN_FAST_MATH,\n    HasCos = EIGEN_FAST_MATH,\n    HasLog = 1,\n    HasExp = 1,\n    HasSqrt = 0,\n    HasTanh = EIGEN_FAST_MATH,\n    HasErf = EIGEN_FAST_MATH\n  };\n};\n\ntemplate <>\nstruct unpacket_traits<PacketXf> {\n  typedef float type;\n  typedef PacketXf half;  // Half not yet implemented\n  typedef PacketXi integer_packet;\n\n  enum {\n    size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,\n    alignment = Aligned64,\n    vectorizable = true,\n    masked_load_available = false,\n    masked_store_available = false\n  };\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pset1<PacketXf>(const float& from)\n{\n  return svdup_n_f32(from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pset1frombits<PacketXf>(numext::uint32_t from)\n{\n  return svreinterpret_f32_u32(svdup_n_u32_z(svptrue_b32(), from));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf plset<PacketXf>(const float& a)\n{\n  float c[packet_traits<float>::size];\n  for (int i = 0; i < packet_traits<float>::size; i++) c[i] = i;\n  return svadd_f32_z(svptrue_b32(), pset1<PacketXf>(a), svld1_f32(svptrue_b32(), c));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf padd<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svadd_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf psub<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svsub_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a)\n{\n  return svneg_f32_z(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pconj(const PacketXf& a)\n{\n  return a;\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmul<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svmul_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pdiv<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svdiv_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c)\n{\n  return svmla_f32_z(svptrue_b32(), c, a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmin<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svmin_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmin<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return pmin<PacketXf>(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmin<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svminnm_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmax<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svmax_f32_z(svptrue_b32(), a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmax<PropagateNaN, PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return pmax<PacketXf>(a, b);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pmax<PropagateNumbers, PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svmaxnm_f32_z(svptrue_b32(), a, b);\n}\n\n// Float comparisons in SVE return svbool (predicate). Use svdup to set active\n// lanes to 1 (0xffffffffu) and inactive lanes to 0.\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pcmp_le<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pcmp_lt<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));\n}\n\n// Do a predicate inverse (svnot_b_z) on the predicate resulted from the\n// greater/equal comparison (svcmpge_f32). Then fill a float vector with the\n// active elements.\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pfloor<PacketXf>(const PacketXf& a)\n{\n  return svrintm_f32_z(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf ptrue<PacketXf>(const PacketXf& /*a*/)\n{\n  return svreinterpret_f32_u32(svdup_n_u32_z(svptrue_b32(), 0xffffffffu));\n}\n\n// Logical Operations are not supported for float, so reinterpret casts\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pand<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svand_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf por<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svorr_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pxor<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(sveor_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pandnot<PacketXf>(const PacketXf& a, const PacketXf& b)\n{\n  return svreinterpret_f32_u32(svbic_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), svreinterpret_u32_f32(b)));\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pload<PacketXf>(const float* from)\n{\n  EIGEN_DEBUG_ALIGNED_LOAD return svld1_f32(svptrue_b32(), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf ploadu<PacketXf>(const float* from)\n{\n  EIGEN_DEBUG_UNALIGNED_LOAD return svld1_f32(svptrue_b32(), from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf ploaddup<PacketXf>(const float* from)\n{\n  svuint32_t indices = svindex_u32(0, 1);  // index {base=0, base+step=1, base+step*2, ...}\n  indices = svzip1_u32(indices, indices);  // index in the format {a0, a0, a1, a1, a2, a2, ...}\n  return svld1_gather_u32index_f32(svptrue_b32(), from, indices);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf ploadquad<PacketXf>(const float* from)\n{\n  svuint32_t indices = svindex_u32(0, 1);  // index {base=0, base+step=1, base+step*2, ...}\n  indices = svzip1_u32(indices, indices);  // index in the format {a0, a0, a1, a1, a2, a2, ...}\n  indices = svzip1_u32(indices, indices);  // index in the format {a0, a0, a0, a0, a1, a1, a1, a1, ...}\n  return svld1_gather_u32index_f32(svptrue_b32(), from, indices);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstore<float>(float* to, const PacketXf& from)\n{\n  EIGEN_DEBUG_ALIGNED_STORE svst1_f32(svptrue_b32(), to, from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE void pstoreu<float>(float* to, const PacketXf& from)\n{\n  EIGEN_DEBUG_UNALIGNED_STORE svst1_f32(svptrue_b32(), to, from);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline PacketXf pgather<float, PacketXf>(const float* from, Index stride)\n{\n  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}\n  svint32_t indices = svindex_s32(0, stride);\n  return svld1_gather_s32index_f32(svptrue_b32(), from, indices);\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC inline void pscatter<float, PacketXf>(float* to, const PacketXf& from, Index stride)\n{\n  // Indice format: {base=0, base+stride, base+stride*2, base+stride*3, ...}\n  svint32_t indices = svindex_s32(0, stride);\n  svst1_scatter_s32index_f32(svptrue_b32(), to, indices, from);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float pfirst<PacketXf>(const PacketXf& a)\n{\n  // svlasta returns the first element if all predicate bits are 0\n  return svlasta_f32(svpfalse_b(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a)\n{\n  return svrev_f32(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pabs(const PacketXf& a)\n{\n  return svabs_f32_z(svptrue_b32(), a);\n}\n\n// TODO(tellenbach): Should this go into MathFunctions.h? If so, change for \n// all vector extensions and the generic version.\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pfrexp<PacketXf>(const PacketXf& a, PacketXf& exponent)\n{\n  return pfrexp_generic(a, exponent);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux<PacketXf>(const PacketXf& a)\n{\n  return svaddv_f32(svptrue_b32(), a);\n}\n\n// Other reduction functions:\n// mul\n// Only works for SVE Vls multiple of 128\ntemplate <>\nEIGEN_STRONG_INLINE float predux_mul<PacketXf>(const PacketXf& a)\n{\n  EIGEN_STATIC_ASSERT((EIGEN_ARM64_SVE_VL % 128 == 0),\n                      EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);\n  // Multiply the vector by its reverse\n  svfloat32_t prod = svmul_f32_z(svptrue_b32(), a, svrev_f32(a));\n  svfloat32_t half_prod;\n\n  // Extract the high half of the vector. Depending on the VL more reductions need to be done\n  if (EIGEN_ARM64_SVE_VL >= 2048) {\n    half_prod = svtbl_f32(prod, svindex_u32(32, 1));\n    prod = svmul_f32_z(svptrue_b32(), prod, half_prod);\n  }\n  if (EIGEN_ARM64_SVE_VL >= 1024) {\n    half_prod = svtbl_f32(prod, svindex_u32(16, 1));\n    prod = svmul_f32_z(svptrue_b32(), prod, half_prod);\n  }\n  if (EIGEN_ARM64_SVE_VL >= 512) {\n    half_prod = svtbl_f32(prod, svindex_u32(8, 1));\n    prod = svmul_f32_z(svptrue_b32(), prod, half_prod);\n  }\n  if (EIGEN_ARM64_SVE_VL >= 256) {\n    half_prod = svtbl_f32(prod, svindex_u32(4, 1));\n    prod = svmul_f32_z(svptrue_b32(), prod, half_prod);\n  }\n  // Last reduction\n  half_prod = svtbl_f32(prod, svindex_u32(2, 1));\n  prod = svmul_f32_z(svptrue_b32(), prod, half_prod);\n\n  // The reduction is done to the first element.\n  return pfirst<PacketXf>(prod);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux_min<PacketXf>(const PacketXf& a)\n{\n  return svminv_f32(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE float predux_max<PacketXf>(const PacketXf& a)\n{\n  return svmaxv_f32(svptrue_b32(), a);\n}\n\ntemplate<int N>\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXf, N>& kernel)\n{\n  float buffer[packet_traits<float>::size * N] = {0};\n  int i = 0;\n\n  PacketXi stride_index = svindex_s32(0, N);\n\n  for (i = 0; i < N; i++) {\n    svst1_scatter_s32index_f32(svptrue_b32(), buffer + i, stride_index, kernel.packet[i]);\n  }\n\n  for (i = 0; i < N; i++) {\n    kernel.packet[i] = svld1_f32(svptrue_b32(), buffer + i * packet_traits<float>::size);\n  }\n}\n\ntemplate<>\nEIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf& exponent)\n{\n  return pldexp_generic(a, exponent);\n}\n\n}  // namespace internal\n}  // namespace Eigen\n\n#endif  // EIGEN_PACKET_MATH_SVE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SVE/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2020, Arm Limited and Contributors\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TYPE_CASTING_SVE_H\n#define EIGEN_TYPE_CASTING_SVE_H\n\nnamespace Eigen {\nnamespace internal {\n\ntemplate <>\nstruct type_casting_traits<float, numext::int32_t> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\n\ntemplate <>\nstruct type_casting_traits<numext::int32_t, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf pcast<PacketXi, PacketXf>(const PacketXi& a) {\n  return svcvt_f32_s32_z(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi pcast<PacketXf, PacketXi>(const PacketXf& a) {\n  return svcvt_s32_f32_z(svptrue_b32(), a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXf preinterpret<PacketXf, PacketXi>(const PacketXi& a) {\n  return svreinterpret_f32_s32(a);\n}\n\ntemplate <>\nEIGEN_STRONG_INLINE PacketXi preinterpret<PacketXi, PacketXf>(const PacketXf& a) {\n  return svreinterpret_s32_f32(a);\n}\n\n}  // namespace internal\n}  // namespace Eigen\n\n#endif // EIGEN_TYPE_CASTING_SVE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SYCL/InteropHeaders.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Mehdi Goli    Codeplay Software Ltd.\n// Ralph Potter  Codeplay Software Ltd.\n// Luke Iwanski  Codeplay Software Ltd.\n// Contact: <eigen@codeplay.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/*****************************************************************\n * InteropHeaders.h\n *\n * \\brief:\n *  InteropHeaders\n *\n *****************************************************************/\n\n#ifndef EIGEN_INTEROP_HEADERS_SYCL_H\n#define EIGEN_INTEROP_HEADERS_SYCL_H\n\nnamespace Eigen {\n\n#if !defined(EIGEN_DONT_VECTORIZE_SYCL)\n\nnamespace internal {\n\ntemplate <int has_blend, int lengths>\nstruct sycl_packet_traits : default_packet_traits {\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = lengths,\n    HasHalfPacket = 0,\n    HasDiv = 1,\n    HasLog = 1,\n    HasExp = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasSin = 1,\n    HasCos = 1,\n    HasTan = 1,\n    HasASin = 1,\n    HasACos = 1,\n    HasATan = 1,\n    HasSinh = 1,\n    HasCosh = 1,\n    HasTanh = 1,\n    HasLGamma = 0,\n    HasDiGamma = 0,\n    HasZeta = 0,\n    HasPolygamma = 0,\n    HasErf = 0,\n    HasErfc = 0,\n    HasNdtri = 0,\n    HasIGamma = 0,\n    HasIGammac = 0,\n    HasBetaInc = 0,\n    HasBlend = has_blend,\n    // This flag is used to indicate whether packet comparison is supported.\n    // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.\n    HasCmp = 1,\n    HasMax = 1,\n    HasMin = 1,\n    HasMul = 1,\n    HasAdd = 1,\n    HasFloor = 1,\n    HasRound = 1,\n    HasRint = 1,\n    HasLog1p = 1,\n    HasExpm1 = 1,\n    HasCeil = 1,\n  };\n};\n\n#ifdef SYCL_DEVICE_ONLY\n#define SYCL_PACKET_TRAITS(packet_type, has_blend, unpacket_type, lengths) \\\n  template <>                                                              \\\n  struct packet_traits<unpacket_type>                                      \\\n      : sycl_packet_traits<has_blend, lengths> {                           \\\n    typedef packet_type type;                                              \\\n    typedef packet_type half;                                              \\\n  };\n\nSYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)\nSYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, const float, 4)\nSYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2)\nSYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)\n#undef SYCL_PACKET_TRAITS\n\n// Make sure this is only available when targeting a GPU: we don't want to\n// introduce conflicts between these packet_traits definitions and the ones\n// we'll use on the host side (SSE, AVX, ...)\n#define SYCL_ARITHMETIC(packet_type)  \\\n  template <>                         \\\n  struct is_arithmetic<packet_type> { \\\n    enum { value = true };            \\\n  };\nSYCL_ARITHMETIC(cl::sycl::cl_float4)\nSYCL_ARITHMETIC(cl::sycl::cl_double2)\n#undef SYCL_ARITHMETIC\n\n#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)        \\\n  template <>                                                            \\\n  struct unpacket_traits<packet_type> {                                  \\\n    typedef unpacket_type type;                                          \\\n    enum { size = lengths, vectorizable = true, alignment = Aligned16 }; \\\n    typedef packet_type half;                                            \\\n  };\nSYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)\nSYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)\n\n#undef SYCL_UNPACKET_TRAITS\n#endif\n\n}  // end namespace internal\n\n#endif\n\nnamespace TensorSycl {\nnamespace internal {\n\ntemplate <typename PacketReturnType, int PacketSize>\nstruct PacketWrapper;\n// This function should never get called on the device\n#ifndef SYCL_DEVICE_ONLY\ntemplate <typename PacketReturnType, int PacketSize>\nstruct PacketWrapper {\n  typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type\n      Scalar;\n  template <typename Index>\n  EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &) {\n    eigen_assert(false && \"THERE IS NO PACKETIZE VERSION FOR  THE CHOSEN TYPE\");\n    abort();\n  }\n  EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,\n                                                                   Scalar) {\n    return ::Eigen::internal::template plset<PacketReturnType>(in);\n  }\n  EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType, Scalar *) {\n    eigen_assert(false && \"THERE IS NO PACKETIZE VERSION FOR  THE CHOSEN TYPE\");\n    abort();\n  }\n};\n\n#elif defined(SYCL_DEVICE_ONLY)\ntemplate <typename PacketReturnType>\nstruct PacketWrapper<PacketReturnType, 4> {\n  typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type\n      Scalar;\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) {\n    switch (index) {\n      case 0:\n        return in.x();\n      case 1:\n        return in.y();\n      case 2:\n        return in.z();\n      case 3:\n        return in.w();\n      default:\n      //INDEX MUST BE BETWEEN 0 and 3.There is no abort function in SYCL kernel. so we cannot use abort here. \n      // The code will never reach here\n      __builtin_unreachable();\n    }\n    __builtin_unreachable();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(\n      Scalar in, Scalar other) {\n    return PacketReturnType(in, other, other, other);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {\n    lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]);\n  }\n};\n\ntemplate <typename PacketReturnType>\nstruct PacketWrapper<PacketReturnType, 1> {\n  typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type\n      Scalar;\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index, PacketReturnType &in) {\n    return in;\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(Scalar in,\n                                                                   Scalar) {\n    return PacketReturnType(in);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {\n    lhs = rhs[0];\n  }\n};\n\ntemplate <typename PacketReturnType>\nstruct PacketWrapper<PacketReturnType, 2> {\n  typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type\n      Scalar;\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) {\n    switch (index) {\n      case 0:\n        return in.x();\n      case 1:\n        return in.y();\n      default:\n        //INDEX MUST BE BETWEEN 0 and 1.There is no abort function in SYCL kernel. so we cannot use abort here. \n      // The code will never reach here\n        __builtin_unreachable();\n    }\n    __builtin_unreachable();\n  }\n  \n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(\n      Scalar in, Scalar other) {\n    return PacketReturnType(in, other);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {\n    lhs = PacketReturnType(rhs[0], rhs[1]);\n  }\n};\n\n#endif\n\n}  // end namespace internal\n}  // end namespace TensorSycl\n}  // end namespace Eigen\n\n#endif  // EIGEN_INTEROP_HEADERS_SYCL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SYCL/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Mehdi Goli    Codeplay Software Ltd.\n// Ralph Potter  Codeplay Software Ltd.\n// Luke Iwanski  Codeplay Software Ltd.\n// Contact: <eigen@codeplay.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/*****************************************************************\n * MathFunctions.h\n *\n * \\brief:\n *  MathFunctions\n *\n *****************************************************************/\n\n#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H\n#define EIGEN_MATH_FUNCTIONS_SYCL_H\nnamespace Eigen {\n\nnamespace internal {\n\n// Make sure this is only available when targeting a GPU: we don't want to\n// introduce conflicts between these packet_traits definitions and the ones\n// we'll use on the host side (SSE, AVX, ...)\n#if defined(SYCL_DEVICE_ONLY)\n#define SYCL_PLOG(packet_type)                                         \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog<packet_type>( \\\n      const packet_type& a) {                                          \\\n    return cl::sycl::log(a);                                           \\\n  }\n\nSYCL_PLOG(cl::sycl::cl_float4)\nSYCL_PLOG(cl::sycl::cl_double2)\n#undef SYCL_PLOG\n\n#define SYCL_PLOG1P(packet_type)                                         \\\n  template <>                                                            \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog1p<packet_type>( \\\n      const packet_type& a) {                                            \\\n    return cl::sycl::log1p(a);                                           \\\n  }\n\nSYCL_PLOG1P(cl::sycl::cl_float4)\nSYCL_PLOG1P(cl::sycl::cl_double2)\n#undef SYCL_PLOG1P\n\n#define SYCL_PLOG10(packet_type)                                         \\\n  template <>                                                            \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog10<packet_type>( \\\n      const packet_type& a) {                                            \\\n    return cl::sycl::log10(a);                                           \\\n  }\n\nSYCL_PLOG10(cl::sycl::cl_float4)\nSYCL_PLOG10(cl::sycl::cl_double2)\n#undef SYCL_PLOG10\n\n#define SYCL_PEXP(packet_type)                                         \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexp<packet_type>( \\\n      const packet_type& a) {                                          \\\n    return cl::sycl::exp(a);                                           \\\n  }\n\nSYCL_PEXP(cl::sycl::cl_float4)\nSYCL_PEXP(cl::sycl::cl_float)\nSYCL_PEXP(cl::sycl::cl_double2)\n#undef SYCL_PEXP\n\n#define SYCL_PEXPM1(packet_type)                                         \\\n  template <>                                                            \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexpm1<packet_type>( \\\n      const packet_type& a) {                                            \\\n    return cl::sycl::expm1(a);                                           \\\n  }\n\nSYCL_PEXPM1(cl::sycl::cl_float4)\nSYCL_PEXPM1(cl::sycl::cl_double2)\n#undef SYCL_PEXPM1\n\n#define SYCL_PSQRT(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psqrt<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::sqrt(a);                                           \\\n  }\n\nSYCL_PSQRT(cl::sycl::cl_float4)\nSYCL_PSQRT(cl::sycl::cl_double2)\n#undef SYCL_PSQRT\n\n#define SYCL_PRSQRT(packet_type)                                         \\\n  template <>                                                            \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type prsqrt<packet_type>( \\\n      const packet_type& a) {                                            \\\n    return cl::sycl::rsqrt(a);                                           \\\n  }\n\nSYCL_PRSQRT(cl::sycl::cl_float4)\nSYCL_PRSQRT(cl::sycl::cl_double2)\n#undef SYCL_PRSQRT\n\n/** \\internal \\returns the hyperbolic sine of \\a a (coeff-wise) */\n#define SYCL_PSIN(packet_type)                                         \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psin<packet_type>( \\\n      const packet_type& a) {                                          \\\n    return cl::sycl::sin(a);                                           \\\n  }\n\nSYCL_PSIN(cl::sycl::cl_float4)\nSYCL_PSIN(cl::sycl::cl_double2)\n#undef SYCL_PSIN\n\n/** \\internal \\returns the hyperbolic cosine of \\a a (coeff-wise) */\n#define SYCL_PCOS(packet_type)                                         \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcos<packet_type>( \\\n      const packet_type& a) {                                          \\\n    return cl::sycl::cos(a);                                           \\\n  }\n\nSYCL_PCOS(cl::sycl::cl_float4)\nSYCL_PCOS(cl::sycl::cl_double2)\n#undef SYCL_PCOS\n\n/** \\internal \\returns the hyperbolic tan of \\a a (coeff-wise) */\n#define SYCL_PTAN(packet_type)                                         \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptan<packet_type>( \\\n      const packet_type& a) {                                          \\\n    return cl::sycl::tan(a);                                           \\\n  }\n\nSYCL_PTAN(cl::sycl::cl_float4)\nSYCL_PTAN(cl::sycl::cl_double2)\n#undef SYCL_PTAN\n\n/** \\internal \\returns the hyperbolic sine of \\a a (coeff-wise) */\n#define SYCL_PASIN(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pasin<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::asin(a);                                           \\\n  }\n\nSYCL_PASIN(cl::sycl::cl_float4)\nSYCL_PASIN(cl::sycl::cl_double2)\n#undef SYCL_PASIN\n\n/** \\internal \\returns the hyperbolic cosine of \\a a (coeff-wise) */\n#define SYCL_PACOS(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pacos<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::acos(a);                                           \\\n  }\n\nSYCL_PACOS(cl::sycl::cl_float4)\nSYCL_PACOS(cl::sycl::cl_double2)\n#undef SYCL_PACOS\n\n/** \\internal \\returns the hyperbolic tan of \\a a (coeff-wise) */\n#define SYCL_PATAN(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type patan<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::atan(a);                                           \\\n  }\n\nSYCL_PATAN(cl::sycl::cl_float4)\nSYCL_PATAN(cl::sycl::cl_double2)\n#undef SYCL_PATAN\n\n/** \\internal \\returns the hyperbolic sine of \\a a (coeff-wise) */\n#define SYCL_PSINH(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psinh<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::sinh(a);                                           \\\n  }\n\nSYCL_PSINH(cl::sycl::cl_float4)\nSYCL_PSINH(cl::sycl::cl_double2)\n#undef SYCL_PSINH\n\n/** \\internal \\returns the hyperbolic cosine of \\a a (coeff-wise) */\n#define SYCL_PCOSH(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcosh<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::cosh(a);                                           \\\n  }\n\nSYCL_PCOSH(cl::sycl::cl_float4)\nSYCL_PCOSH(cl::sycl::cl_double2)\n#undef SYCL_PCOSH\n\n/** \\internal \\returns the hyperbolic tan of \\a a (coeff-wise) */\n#define SYCL_PTANH(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptanh<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::tanh(a);                                           \\\n  }\n\nSYCL_PTANH(cl::sycl::cl_float4)\nSYCL_PTANH(cl::sycl::cl_double2)\n#undef SYCL_PTANH\n\n#define SYCL_PCEIL(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pceil<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::ceil(a);                                           \\\n  }\n\nSYCL_PCEIL(cl::sycl::cl_float4)\nSYCL_PCEIL(cl::sycl::cl_double2)\n#undef SYCL_PCEIL\n\n#define SYCL_PROUND(packet_type)                                         \\\n  template <>                                                            \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pround<packet_type>( \\\n      const packet_type& a) {                                            \\\n    return cl::sycl::round(a);                                           \\\n  }\n\nSYCL_PROUND(cl::sycl::cl_float4)\nSYCL_PROUND(cl::sycl::cl_double2)\n#undef SYCL_PROUND\n\n#define SYCL_PRINT(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type print<packet_type>( \\\n      const packet_type& a) {                                           \\\n    return cl::sycl::rint(a);                                           \\\n  }\n\nSYCL_PRINT(cl::sycl::cl_float4)\nSYCL_PRINT(cl::sycl::cl_double2)\n#undef SYCL_PRINT\n\n#define SYCL_FLOOR(packet_type)                                          \\\n  template <>                                                            \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pfloor<packet_type>( \\\n      const packet_type& a) {                                            \\\n    return cl::sycl::floor(a);                                           \\\n  }\n\nSYCL_FLOOR(cl::sycl::cl_float4)\nSYCL_FLOOR(cl::sycl::cl_double2)\n#undef SYCL_FLOOR\n\n#define SYCL_PMIN(packet_type, expr)                                   \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmin<packet_type>( \\\n      const packet_type& a, const packet_type& b) {                    \\\n    return expr;                                                       \\\n  }\n\nSYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))\nSYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))\n#undef SYCL_PMIN\n\n#define SYCL_PMAX(packet_type, expr)                                   \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmax<packet_type>( \\\n      const packet_type& a, const packet_type& b) {                    \\\n    return expr;                                                       \\\n  }\n\nSYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))\nSYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))\n#undef SYCL_PMAX\n\n#define SYCL_PLDEXP(packet_type)                                             \\\n  template <>                                                                \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pldexp(                  \\\n      const packet_type& a, const packet_type& exponent) {                   \\\n    return cl::sycl::ldexp(                                                  \\\n        a, exponent.template convert<cl::sycl::cl_int,                       \\\n                                     cl::sycl::rounding_mode::automatic>()); \\\n  }\n\nSYCL_PLDEXP(cl::sycl::cl_float4)\nSYCL_PLDEXP(cl::sycl::cl_double2)\n#undef SYCL_PLDEXP\n\n#endif\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_MATH_FUNCTIONS_SYCL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SYCL/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Mehdi Goli    Codeplay Software Ltd.\n// Ralph Potter  Codeplay Software Ltd.\n// Luke Iwanski  Codeplay Software Ltd.\n// Contact: <eigen@codeplay.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/*****************************************************************\n * PacketMath.h\n *\n * \\brief:\n *  PacketMath\n *\n *****************************************************************/\n\n#ifndef EIGEN_PACKET_MATH_SYCL_H\n#define EIGEN_PACKET_MATH_SYCL_H\n#include <type_traits>\nnamespace Eigen {\n\nnamespace internal {\n#ifdef SYCL_DEVICE_ONLY\n\n#define SYCL_PLOADT_RO(address_space_target)                                 \\\n  template <typename packet_type, int Alignment>                             \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro(               \\\n      typename cl::sycl::multi_ptr<                                          \\\n          const typename unpacket_traits<packet_type>::type,                 \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t  \\\n          from) {                                                            \\\n    typedef typename unpacket_traits<packet_type>::type scalar;              \\\n    typedef cl::sycl::multi_ptr<                                             \\\n        scalar, cl::sycl::access::address_space::address_space_target>       \\\n        multi_ptr;                                                           \\\n    auto res = packet_type(                                                  \\\n        static_cast<typename unpacket_traits<packet_type>::type>(0));        \\\n    res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \\\n    return res;                                                              \\\n  }\n\nSYCL_PLOADT_RO(global_space)\nSYCL_PLOADT_RO(local_space)\n#undef SYCL_PLOADT_RO\n#endif\n\ntemplate <typename packet_type, int Alignment, typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\nploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<\n          cl::sycl::access::mode::read_write, T>& from) {\n  return ploadt_ro<packet_type, Alignment>(from.get_pointer());\n}\n\n#ifdef SYCL_DEVICE_ONLY\n#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)            \\\n  template <typename packet_type>                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType(     \\\n      typename cl::sycl::multi_ptr<                                         \\\n          const typename unpacket_traits<packet_type>::type,                \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          from) {                                                           \\\n    return ploadt_ro<packet_type, Alignment>(from);                         \\\n  }\n\n// global space\nSYCL_PLOAD(global_space, Unaligned, u)\nSYCL_PLOAD(global_space, Aligned, )\n// local space\nSYCL_PLOAD(local_space, Unaligned, u)\nSYCL_PLOAD(local_space, Aligned, )\n\n#undef SYCL_PLOAD\n#endif\n\n#define SYCL_PLOAD(Alignment, AlignedType)                              \\\n  template <typename packet_type>                                       \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \\\n      const Eigen::TensorSycl::internal::RangeAccess<                   \\\n          cl::sycl::access::mode::read_write,                           \\\n          typename unpacket_traits<packet_type>::type>                  \\\n          from) {                                                       \\\n    return ploadt_ro<packet_type, Alignment>(from);                     \\\n  }\nSYCL_PLOAD(Unaligned, u)\nSYCL_PLOAD(Aligned, )\n#undef SYCL_PLOAD\n\n#ifdef SYCL_DEVICE_ONLY\n/** \\internal \\returns a packet version of \\a *from.\n * The pointer \\a from must be aligned on a \\a Alignment bytes boundary. */\n#define SYCL_PLOADT(address_space_target)                                   \\\n  template <typename packet_type, int Alignment>                            \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(                 \\\n      typename cl::sycl::multi_ptr<                                         \\\n          const typename unpacket_traits<packet_type>::type,                \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          from) {                                                           \\\n    if (Alignment >= unpacket_traits<packet_type>::alignment)               \\\n      return pload<packet_type>(from);                                      \\\n    else                                                                    \\\n      return ploadu<packet_type>(from);                                     \\\n  }\n\n// global space\nSYCL_PLOADT(global_space)\n// local space\nSYCL_PLOADT(local_space)\n#undef SYCL_PLOADT\n#endif\n\ntemplate <typename packet_type, int Alignment>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\nploadt(const Eigen::TensorSycl::internal::RangeAccess<\n       cl::sycl::access::mode::read_write,\n       typename unpacket_traits<packet_type>::type>& from) {\n  return ploadt<packet_type, Alignment>(from.get_pointer());\n}\n#ifdef SYCL_DEVICE_ONLY\n\n// private_space\n#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)                 \\\n  template <>                                                          \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type                    \\\n  ploadt_ro<packet_type, Alignment>(                                   \\\n      const typename unpacket_traits<packet_type>::type* from) {       \\\n    typedef typename unpacket_traits<packet_type>::type scalar;        \\\n    auto res = packet_type(static_cast<scalar>(0));                    \\\n    res.template load<cl::sycl::access::address_space::private_space>( \\\n        0, const_cast<scalar*>(from));                                 \\\n    return res;                                                        \\\n  }\n\nSYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)\nSYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)\nSYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)\nSYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)\n\n#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)                    \\\n  template <>                                                              \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \\\n      const typename unpacket_traits<packet_type>::type* from) {           \\\n    typedef typename unpacket_traits<packet_type>::type scalar;            \\\n    auto res = packet_type(static_cast<scalar>(0));                        \\\n    res.template load<cl::sycl::access::address_space::private_space>(     \\\n        0, const_cast<scalar*>(from));                                     \\\n    return res;                                                            \\\n  }\nSYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )\nSYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )\nSYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)\nSYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)\n\n#undef SYCL_PLOAD_SPECIAL\n\n#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)   \\\n  template <>                                                               \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(             \\\n      typename cl::sycl::multi_ptr<                                         \\\n          scalar,                                                           \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          to,                                                               \\\n      const packet_type& from) {                                            \\\n    typedef cl::sycl::multi_ptr<                                            \\\n        scalar, cl::sycl::access::address_space::address_space_target>      \\\n        multi_ptr;                                                          \\\n    from.store(0, multi_ptr(to));                                           \\\n  }\n\n// global space\nSYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )\nSYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)\nSYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )\nSYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)\nSYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )\nSYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)\nSYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )\nSYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)\n\nSYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )\nSYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)\nSYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )\nSYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)\n#undef SYCL_PSTORE\n\n#define SYCL_PSTORE_T(address_space_target)                                 \\\n  template <typename scalar, typename packet_type, int Alignment>           \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(                       \\\n      typename cl::sycl::multi_ptr<                                         \\\n          scalar,                                                           \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          to,                                                               \\\n      const packet_type& from) {                                            \\\n    if (Alignment)                                                          \\\n      pstore(to, from);                                                     \\\n    else                                                                    \\\n      pstoreu(to, from);                                                    \\\n  }\n\nSYCL_PSTORE_T(global_space)\n\nSYCL_PSTORE_T(local_space)\n\n#undef SYCL_PSTORE_T\n\n#define SYCL_PSET1(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \\\n      const typename unpacket_traits<packet_type>::type& from) {        \\\n    return packet_type(from);                                           \\\n  }\n\n// global space\nSYCL_PSET1(cl::sycl::cl_float4)\nSYCL_PSET1(cl::sycl::cl_double2)\n\n#undef SYCL_PSET1\n\ntemplate <typename packet_type>\nstruct get_base_packet {\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type\n  get_ploaddup(sycl_multi_pointer) {}\n\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type\n  get_pgather(sycl_multi_pointer, Index) {}\n};\n\ntemplate <>\nstruct get_base_packet<cl::sycl::cl_float4> {\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(\n      sycl_multi_pointer from) {\n    return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);\n  }\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(\n      sycl_multi_pointer from, Index stride) {\n    return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],\n                               from[2 * stride], from[3 * stride]);\n  }\n\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(\n      sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {\n    auto tmp = stride;\n    to[0] = from.x();\n    to[tmp] = from.y();\n    to[tmp += stride] = from.z();\n    to[tmp += stride] = from.w();\n  }\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(\n      const float& a) {\n    return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),\n                               static_cast<float>(a + 2),\n                               static_cast<float>(a + 3));\n  }\n};\n\ntemplate <>\nstruct get_base_packet<cl::sycl::cl_double2> {\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2\n  get_ploaddup(const sycl_multi_pointer from) {\n    return cl::sycl::cl_double2(from[0], from[0]);\n  }\n\n  template <typename sycl_multi_pointer, typename Index>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(\n      const sycl_multi_pointer from, Index stride) {\n    return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);\n  }\n\n  template <typename sycl_multi_pointer>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(\n      sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {\n    to[0] = from.x();\n    to[stride] = from.y();\n  }\n\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(\n      const double& a) {\n    return cl::sycl::cl_double2(static_cast<double>(a),\n                                static_cast<double>(a + 1));\n  }\n};\n\n#define SYCL_PLOAD_DUP(address_space_target)                                \\\n  template <typename packet_type>                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup(               \\\n      typename cl::sycl::multi_ptr<                                         \\\n          const typename unpacket_traits<packet_type>::type,                \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          from) {                                                           \\\n    return get_base_packet<packet_type>::get_ploaddup(from);                \\\n  }\n\n// global space\nSYCL_PLOAD_DUP(global_space)\n// local_space\nSYCL_PLOAD_DUP(local_space)\n#undef SYCL_PLOAD_DUP\n\n#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)                              \\\n  template <>                                                              \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \\\n      const typename unpacket_traits<packet_type>::type* from) {           \\\n    return get_base_packet<packet_type>::get_ploaddup(from);               \\\n  }\n\nSYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)\nSYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)\n\n#undef SYCL_PLOAD_DUP_SPECILIZE\n\n#define SYCL_PLSET(packet_type)                                         \\\n  template <>                                                           \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \\\n      const typename unpacket_traits<packet_type>::type& a) {           \\\n    return get_base_packet<packet_type>::set_plset(a);                  \\\n  }\n\nSYCL_PLSET(cl::sycl::cl_float4)\nSYCL_PLSET(cl::sycl::cl_double2)\n\n#undef SYCL_PLSET\n\n#define SYCL_PGATHER(address_space_target)                                  \\\n  template <typename Scalar, typename packet_type>                          \\\n  EIGEN_DEVICE_FUNC inline packet_type pgather(                             \\\n      typename cl::sycl::multi_ptr<                                         \\\n          const typename unpacket_traits<packet_type>::type,                \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          from,                                                             \\\n      Index stride) {                                                       \\\n    return get_base_packet<packet_type>::get_pgather(from, stride);         \\\n  }\n\n// global space\nSYCL_PGATHER(global_space)\n// local space\nSYCL_PGATHER(local_space)\n\n#undef SYCL_PGATHER\n\n#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)                            \\\n  template <>                                                                  \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type                            \\\n  pgather<scalar, packet_type>(                                                \\\n      const typename unpacket_traits<packet_type>::type* from, Index stride) { \\\n    return get_base_packet<packet_type>::get_pgather(from, stride);            \\\n  }\n\nSYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)\nSYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)\n\n#undef SYCL_PGATHER_SPECILIZE\n\n#define SYCL_PSCATTER(address_space_target)                                 \\\n  template <typename Scalar, typename packet_type>                          \\\n  EIGEN_DEVICE_FUNC inline void pscatter(                                   \\\n      typename cl::sycl::multi_ptr<                                         \\\n          typename unpacket_traits<packet_type>::type,                      \\\n          cl::sycl::access::address_space::address_space_target>::pointer_t \\\n          to,                                                               \\\n      const packet_type& from, Index stride) {                              \\\n    get_base_packet<packet_type>::set_pscatter(to, from, stride);           \\\n  }\n\n// global space\nSYCL_PSCATTER(global_space)\n// local space\nSYCL_PSCATTER(local_space)\n\n#undef SYCL_PSCATTER\n\n#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)                        \\\n  template <>                                                               \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \\\n      typename unpacket_traits<packet_type>::type * to,                     \\\n      const packet_type& from, Index stride) {                              \\\n    get_base_packet<packet_type>::set_pscatter(to, from, stride);           \\\n  }\n\nSYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)\nSYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)\n\n#undef SYCL_PSCATTER_SPECILIZE\n\n#define SYCL_PMAD(packet_type)                                            \\\n  template <>                                                             \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(                \\\n      const packet_type& a, const packet_type& b, const packet_type& c) { \\\n    return cl::sycl::mad(a, b, c);                                        \\\n  }\n\nSYCL_PMAD(cl::sycl::cl_float4)\nSYCL_PMAD(cl::sycl::cl_double2)\n#undef SYCL_PMAD\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(\n    const cl::sycl::cl_float4& a) {\n  return a.x();\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(\n    const cl::sycl::cl_double2& a) {\n  return a.x();\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(\n    const cl::sycl::cl_float4& a) {\n  return a.x() + a.y() + a.z() + a.w();\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(\n    const cl::sycl::cl_double2& a) {\n  return a.x() + a.y();\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(\n    const cl::sycl::cl_float4& a) {\n  return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),\n                        cl::sycl::fmax(a.z(), a.w()));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(\n    const cl::sycl::cl_double2& a) {\n  return cl::sycl::fmax(a.x(), a.y());\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(\n    const cl::sycl::cl_float4& a) {\n  return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),\n                        cl::sycl::fmin(a.z(), a.w()));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(\n    const cl::sycl::cl_double2& a) {\n  return cl::sycl::fmin(a.x(), a.y());\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(\n    const cl::sycl::cl_float4& a) {\n  return a.x() * a.y() * a.z() * a.w();\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(\n    const cl::sycl::cl_double2& a) {\n  return a.x() * a.y();\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4\npabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {\n  return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),\n                             cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));\n}\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2\npabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {\n  return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));\n}\n\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,\n                                                          const Packet &b) {\n  return ((a <= b)\n              .template convert<typename unpacket_traits<Packet>::type,\n                                cl::sycl::rounding_mode::automatic>());\n}\n\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,\n                                                          const Packet &b) {\n  return ((a < b)\n              .template convert<typename unpacket_traits<Packet>::type,\n                                cl::sycl::rounding_mode::automatic>());\n}\n\ntemplate <typename Packet>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,\n                                                          const Packet &b) {\n  return ((a == b)\n              .template convert<typename unpacket_traits<Packet>::type,\n                                cl::sycl::rounding_mode::automatic>());\n}\n\n#define SYCL_PCMP(OP, TYPE)                                                    \\\n  template <>                                                                  \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a,    \\\n                                                             const TYPE &b) {  \\\n    return sycl_pcmp_##OP<TYPE>(a, b);                                         \\\n  }\n\nSYCL_PCMP(le, cl::sycl::cl_float4)\nSYCL_PCMP(lt, cl::sycl::cl_float4)\nSYCL_PCMP(eq, cl::sycl::cl_float4)\nSYCL_PCMP(le, cl::sycl::cl_double2)\nSYCL_PCMP(lt, cl::sycl::cl_double2)\nSYCL_PCMP(eq, cl::sycl::cl_double2)\n#undef SYCL_PCMP\n\ntemplate <typename T> struct convert_to_integer;\n\ntemplate <> struct convert_to_integer<float> {\n  using type = std::int32_t;\n  using packet_type = cl::sycl::cl_int4;\n};\ntemplate <> struct convert_to_integer<double> {\n  using type = std::int64_t;\n  using packet_type = cl::sycl::cl_long2;\n};\n\ntemplate <typename PacketIn>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<\n    typename unpacket_traits<PacketIn>::type>::packet_type\nvector_as_int(const PacketIn &p) {\n  return (\n      p.template convert<typename convert_to_integer<\n                             typename unpacket_traits<PacketIn>::type>::type,\n                         cl::sycl::rounding_mode::automatic>());\n}\n\ntemplate <typename packetOut, typename PacketIn>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut\nconvert_vector(const PacketIn &p) {\n  return (p.template convert<typename unpacket_traits<packetOut>::type,\n                             cl::sycl::rounding_mode::automatic>());\n}\n\n#define SYCL_PAND(TYPE)                                                        \\\n  template <>                                                                  \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a,         \\\n                                                        const TYPE &b) {       \\\n    return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b));          \\\n  }\nSYCL_PAND(cl::sycl::cl_float4)\nSYCL_PAND(cl::sycl::cl_double2)\n#undef SYCL_PAND\n\n#define SYCL_POR(TYPE)                                                         \\\n  template <>                                                                  \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a,          \\\n                                                       const TYPE &b) {        \\\n    return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b));          \\\n  }\n\nSYCL_POR(cl::sycl::cl_float4)\nSYCL_POR(cl::sycl::cl_double2)\n#undef SYCL_POR\n\n#define SYCL_PXOR(TYPE)                                                        \\\n  template <>                                                                  \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a,         \\\n                                                        const TYPE &b) {       \\\n    return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b));          \\\n  }\n\nSYCL_PXOR(cl::sycl::cl_float4)\nSYCL_PXOR(cl::sycl::cl_double2)\n#undef SYCL_PXOR\n\n#define SYCL_PANDNOT(TYPE)                                                     \\\n  template <>                                                                  \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a,      \\\n                                                           const TYPE &b) {    \\\n    return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b)));       \\\n  }\nSYCL_PANDNOT(cl::sycl::cl_float4)\nSYCL_PANDNOT(cl::sycl::cl_double2)\n#undef SYCL_PANDNOT\n\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(\n    PacketBlock<cl::sycl::cl_float4, 4>& kernel) {\n  float tmp = kernel.packet[0].y();\n  kernel.packet[0].y() = kernel.packet[1].x();\n  kernel.packet[1].x() = tmp;\n\n  tmp = kernel.packet[0].z();\n  kernel.packet[0].z() = kernel.packet[2].x();\n  kernel.packet[2].x() = tmp;\n\n  tmp = kernel.packet[0].w();\n  kernel.packet[0].w() = kernel.packet[3].x();\n  kernel.packet[3].x() = tmp;\n\n  tmp = kernel.packet[1].z();\n  kernel.packet[1].z() = kernel.packet[2].y();\n  kernel.packet[2].y() = tmp;\n\n  tmp = kernel.packet[1].w();\n  kernel.packet[1].w() = kernel.packet[3].y();\n  kernel.packet[3].y() = tmp;\n\n  tmp = kernel.packet[2].w();\n  kernel.packet[2].w() = kernel.packet[3].z();\n  kernel.packet[3].z() = tmp;\n}\n\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(\n    PacketBlock<cl::sycl::cl_double2, 2>& kernel) {\n  double tmp = kernel.packet[0].y();\n  kernel.packet[0].y() = kernel.packet[1].x();\n  kernel.packet[1].x() = tmp;\n}\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(\n    const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,\n    const cl::sycl::cl_float4& thenPacket,\n    const cl::sycl::cl_float4& elsePacket) {\n  cl::sycl::cl_int4 condition(\n      ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,\n      ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);\n  return cl::sycl::select(thenPacket, elsePacket, condition);\n}\n\ntemplate <>\ninline cl::sycl::cl_double2 pblend(\n    const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,\n    const cl::sycl::cl_double2& thenPacket,\n    const cl::sycl::cl_double2& elsePacket) {\n  cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,\n                               ifPacket.select[1] ? 0 : -1);\n  return cl::sycl::select(thenPacket, elsePacket, condition);\n}\n#endif  // SYCL_DEVICE_ONLY\n\n#define SYCL_PSTORE(alignment)                                  \\\n  template <typename packet_type>                               \\\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \\\n      const Eigen::TensorSycl::internal::RangeAccess<           \\\n          cl::sycl::access::mode::read_write,                   \\\n          typename unpacket_traits<packet_type>::type>& to,     \\\n      const packet_type& from) {                                \\\n    pstore##alignment(to.get_pointer(), from);                  \\\n  }\n\n// global space\nSYCL_PSTORE()\nSYCL_PSTORE(u)\n\n#undef SYCL_PSTORE\n\ntemplate <typename scalar, typename packet_type, int Alignment>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(\n    Eigen::TensorSycl::internal::RangeAccess<\n        cl::sycl::access::mode::read_write,\n        typename unpacket_traits<packet_type>::type>\n        to,\n    const packet_type& from) {\n  pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_PACKET_MATH_SYCL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h",
    "content": "/***************************************************************************\n *  Copyright (C) 2017 Codeplay Software Limited\n *  This Source Code Form is subject to the terms of the Mozilla\n *  Public License v. 2.0. If a copy of the MPL was not distributed\n *  with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n *\n *\n *  SyclMemoryModel.h\n *\n *  Description:\n *    Interface for SYCL buffers to behave as a non-dereferenceable pointer\n *    Interface for Placeholder accessor to behave as a pointer on both host\n *    and device\n *\n * Authors:\n *\n *    Ruyman Reyes   Codeplay Software Ltd.\n *    Mehdi Goli     Codeplay Software Ltd.\n *    Vanya Yaneva   Codeplay Software Ltd.\n *\n **************************************************************************/\n\n#if defined(EIGEN_USE_SYCL) && \\\n    !defined(EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H)\n#define EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H\n\n#include <CL/sycl.hpp>\n#ifdef EIGEN_EXCEPTIONS\n#include <stdexcept>\n#endif\n#include <cstddef>\n#include <queue>\n#include <set>\n#include <unordered_map>\n\nnamespace Eigen {\nnamespace TensorSycl {\nnamespace internal {\n\nusing sycl_acc_target = cl::sycl::access::target;\nusing sycl_acc_mode = cl::sycl::access::mode;\n\n/**\n * Default values for template arguments\n */\nusing buffer_data_type_t = uint8_t;\nconst sycl_acc_target default_acc_target = sycl_acc_target::global_buffer;\nconst sycl_acc_mode default_acc_mode = sycl_acc_mode::read_write;\n\n/**\n * PointerMapper\n *  Associates fake pointers with buffers.\n *\n */\nclass PointerMapper {\n public:\n  using base_ptr_t = std::intptr_t;\n\n  /* Structure of a virtual pointer\n   *\n   * |================================================|\n   * |               POINTER ADDRESS                  |\n   * |================================================|\n   */\n  struct virtual_pointer_t {\n    /* Type for the pointers\n     */\n    base_ptr_t m_contents;\n\n    /** Conversions from virtual_pointer_t to\n     * void * should just reinterpret_cast the integer number\n     */\n    operator void *() const { return reinterpret_cast<void *>(m_contents); }\n\n    /**\n     * Convert back to the integer number.\n     */\n    operator base_ptr_t() const { return m_contents; }\n\n    /**\n     * Add a certain value to the pointer to create a\n     * new pointer to that offset\n     */\n    virtual_pointer_t operator+(size_t off) { return m_contents + off; }\n\n    /* Numerical order for sorting pointers in containers. */\n    bool operator<(virtual_pointer_t rhs) const {\n      return (static_cast<base_ptr_t>(m_contents) <\n              static_cast<base_ptr_t>(rhs.m_contents));\n    }\n\n    bool operator>(virtual_pointer_t rhs) const {\n      return (static_cast<base_ptr_t>(m_contents) >\n              static_cast<base_ptr_t>(rhs.m_contents));\n    }\n\n    /**\n     * Numerical order for sorting pointers in containers\n     */\n    bool operator==(virtual_pointer_t rhs) const {\n      return (static_cast<base_ptr_t>(m_contents) ==\n              static_cast<base_ptr_t>(rhs.m_contents));\n    }\n\n    /**\n     * Simple forward to the equality overload.\n     */\n    bool operator!=(virtual_pointer_t rhs) const {\n      return !(this->operator==(rhs));\n    }\n\n    /**\n     * Converts a void * into a virtual pointer structure.\n     * Note that this will only work if the void * was\n     * already a virtual_pointer_t, but we have no way of\n     * checking\n     */\n    virtual_pointer_t(const void *ptr)\n        : m_contents(reinterpret_cast<base_ptr_t>(ptr)){};\n\n    /**\n     * Creates a virtual_pointer_t from the given integer\n     * number\n     */\n    virtual_pointer_t(base_ptr_t u) : m_contents(u){};\n  };\n\n  /* Definition of a null pointer\n   */\n  const virtual_pointer_t null_virtual_ptr = nullptr;\n\n  /**\n   * Whether if a pointer is null or not.\n   * A pointer is nullptr if the value is of null_virtual_ptr\n   */\n  static inline bool is_nullptr(virtual_pointer_t ptr) {\n    return (static_cast<void *>(ptr) == nullptr);\n  }\n\n  /* basic type for all buffers\n   */\n  using buffer_t = cl::sycl::buffer_mem;\n\n  /**\n   * Node that stores information about a device allocation.\n   * Nodes are sorted by size to organise a free list of nodes\n   * that can be recovered.\n   */\n  struct pMapNode_t {\n    buffer_t m_buffer;\n    size_t m_size;\n    bool m_free;\n\n    pMapNode_t(buffer_t b, size_t size, bool f)\n        : m_buffer{b}, m_size{size}, m_free{f} {\n      m_buffer.set_final_data(nullptr);\n    }\n\n    bool operator<=(const pMapNode_t &rhs) { return (m_size <= rhs.m_size); }\n  };\n\n  /** Storage of the pointer / buffer tree\n   */\n  using pointerMap_t = std::map<virtual_pointer_t, pMapNode_t>;\n\n  /**\n   * Obtain the insertion point in the pointer map for\n   * a pointer of the given size.\n   * \\param requiredSize Size attemted to reclaim\n   */\n  typename pointerMap_t::iterator get_insertion_point(size_t requiredSize) {\n    typename pointerMap_t::iterator retVal;\n    bool reuse = false;\n    if (!m_freeList.empty()) {\n      // try to re-use an existing block\n      for (auto freeElem : m_freeList) {\n        if (freeElem->second.m_size >= requiredSize) {\n          retVal = freeElem;\n          reuse = true;\n          // Element is not going to be free anymore\n          m_freeList.erase(freeElem);\n          break;\n        }\n      }\n    }\n    if (!reuse) {\n      retVal = std::prev(m_pointerMap.end());\n    }\n    return retVal;\n  }\n\n  /**\n   * Returns an iterator to the node that stores the information\n   * of the given virtual pointer from the given pointer map structure.\n   * If pointer is not found, throws std::out_of_range.\n   * If the pointer map structure is empty, throws std::out_of_range\n   *\n   * \\param pMap the pointerMap_t structure storing all the pointers\n   * \\param virtual_pointer_ptr The virtual pointer to obtain the node of\n   * \\throws std::out:of_range if the pointer is not found or pMap is empty\n   */\n  typename pointerMap_t::iterator get_node(const virtual_pointer_t ptr) {\n    if (this->count() == 0) {\n      m_pointerMap.clear();\n      EIGEN_THROW_X(std::out_of_range(\"There are no pointers allocated\\n\"));\n\n    }\n    if (is_nullptr(ptr)) {\n      m_pointerMap.clear();\n      EIGEN_THROW_X(std::out_of_range(\"Cannot access null pointer\\n\"));\n    }\n    // The previous element to the lower bound is the node that\n    // holds this memory address\n    auto node = m_pointerMap.lower_bound(ptr);\n    // If the value of the pointer is not the one of the node\n    // then we return the previous one\n    if (node == std::end(m_pointerMap)) {\n      --node;\n    } else if (node->first != ptr) {\n      if (node == std::begin(m_pointerMap)) {\n        m_pointerMap.clear();\n        EIGEN_THROW_X(\n            std::out_of_range(\"The pointer is not registered in the map\\n\"));\n\n      }\n      --node;\n    }\n\n    return node;\n  }\n\n  /* get_buffer.\n   * Returns a buffer from the map using the pointer address\n   */\n  template <typename buffer_data_type = buffer_data_type_t>\n  cl::sycl::buffer<buffer_data_type, 1> get_buffer(\n      const virtual_pointer_t ptr) {\n    using sycl_buffer_t = cl::sycl::buffer<buffer_data_type, 1>;\n\n    // get_node() returns a `buffer_mem`, so we need to cast it to a `buffer<>`.\n    // We can do this without the `buffer_mem` being a pointer, as we\n    // only declare member variables in the base class (`buffer_mem`) and not in\n    // the child class (`buffer<>).\n    auto node = get_node(ptr);\n    eigen_assert(node->first == ptr || node->first < ptr);\n    eigen_assert(ptr < static_cast<virtual_pointer_t>(node->second.m_size +\n                                                      node->first));\n    return *(static_cast<sycl_buffer_t *>(&node->second.m_buffer));\n  }\n\n  /**\n   * @brief Returns an accessor to the buffer of the given virtual pointer\n   * @param accessMode\n   * @param accessTarget\n   * @param ptr The virtual pointer\n   */\n  template <sycl_acc_mode access_mode = default_acc_mode,\n            sycl_acc_target access_target = default_acc_target,\n            typename buffer_data_type = buffer_data_type_t>\n  cl::sycl::accessor<buffer_data_type, 1, access_mode, access_target>\n  get_access(const virtual_pointer_t ptr) {\n    auto buf = get_buffer<buffer_data_type>(ptr);\n    return buf.template get_access<access_mode, access_target>();\n  }\n\n  /**\n   * @brief Returns an accessor to the buffer of the given virtual pointer\n   *        in the given command group scope\n   * @param accessMode\n   * @param accessTarget\n   * @param ptr The virtual pointer\n   * @param cgh Reference to the command group scope\n   */\n  template <sycl_acc_mode access_mode = default_acc_mode,\n            sycl_acc_target access_target = default_acc_target,\n            typename buffer_data_type = buffer_data_type_t>\n  cl::sycl::accessor<buffer_data_type, 1, access_mode, access_target>\n  get_access(const virtual_pointer_t ptr, cl::sycl::handler &cgh) {\n    auto buf = get_buffer<buffer_data_type>(ptr);\n    return buf.template get_access<access_mode, access_target>(cgh);\n  }\n\n  /*\n   * Returns the offset from the base address of this pointer.\n   */\n  inline std::ptrdiff_t get_offset(const virtual_pointer_t ptr) {\n    // The previous element to the lower bound is the node that\n    // holds this memory address\n    auto node = get_node(ptr);\n    auto start = node->first;\n    eigen_assert(start == ptr || start < ptr);\n    eigen_assert(ptr < start + node->second.m_size);\n    return (ptr - start);\n  }\n\n  /*\n   * Returns the number of elements by which the given pointer is offset from\n   * the base address.\n   */\n  template <typename buffer_data_type>\n  inline size_t get_element_offset(const virtual_pointer_t ptr) {\n    return get_offset(ptr) / sizeof(buffer_data_type);\n  }\n\n  /**\n   * Constructs the PointerMapper structure.\n   */\n  PointerMapper(base_ptr_t baseAddress = 4096)\n      : m_pointerMap{}, m_freeList{}, m_baseAddress{baseAddress} {\n    if (m_baseAddress == 0) {\n      EIGEN_THROW_X(std::invalid_argument(\"Base address cannot be zero\\n\"));\n    }\n  };\n\n  /**\n   * PointerMapper cannot be copied or moved\n   */\n  PointerMapper(const PointerMapper &) = delete;\n\n  /**\n   * Empty the pointer list\n   */\n  inline void clear() {\n    m_freeList.clear();\n    m_pointerMap.clear();\n  }\n\n  /* add_pointer.\n   * Adds an existing pointer to the map and returns the virtual pointer id.\n   */\n  inline virtual_pointer_t add_pointer(const buffer_t &b) {\n    return add_pointer_impl(b);\n  }\n\n  /* add_pointer.\n   * Adds a pointer to the map and returns the virtual pointer id.\n   */\n  inline virtual_pointer_t add_pointer(buffer_t &&b) {\n    return add_pointer_impl(b);\n  }\n\n  /**\n   * @brief Fuses the given node with the previous nodes in the\n   *        pointer map if they are free\n   *\n   * @param node A reference to the free node to be fused\n   */\n  void fuse_forward(typename pointerMap_t::iterator &node) {\n    while (node != std::prev(m_pointerMap.end())) {\n      // if following node is free\n      // remove it and extend the current node with its size\n      auto fwd_node = std::next(node);\n      if (!fwd_node->second.m_free) {\n        break;\n      }\n      auto fwd_size = fwd_node->second.m_size;\n      m_freeList.erase(fwd_node);\n      m_pointerMap.erase(fwd_node);\n\n      node->second.m_size += fwd_size;\n    }\n  }\n\n  /**\n   * @brief Fuses the given node with the following nodes in the\n   *        pointer map if they are free\n   *\n   * @param node A reference to the free node to be fused\n   */\n  void fuse_backward(typename pointerMap_t::iterator &node) {\n    while (node != m_pointerMap.begin()) {\n      // if previous node is free, extend it\n      // with the size of the current one\n      auto prev_node = std::prev(node);\n      if (!prev_node->second.m_free) {\n        break;\n      }\n      prev_node->second.m_size += node->second.m_size;\n\n      // remove the current node\n      m_freeList.erase(node);\n      m_pointerMap.erase(node);\n\n      // point to the previous node\n      node = prev_node;\n    }\n  }\n\n  /* remove_pointer.\n   * Removes the given pointer from the map.\n   * The pointer is allowed to be reused only if ReUse if true.\n   */\n  template <bool ReUse = true>\n  void remove_pointer(const virtual_pointer_t ptr) {\n    if (is_nullptr(ptr)) {\n      return;\n    }\n    auto node = this->get_node(ptr);\n\n    node->second.m_free = true;\n    m_freeList.emplace(node);\n\n    // Fuse the node\n    // with free nodes before and after it\n    fuse_forward(node);\n    fuse_backward(node);\n\n    // If after fusing the node is the last one\n    // simply remove it (since it is free)\n    if (node == std::prev(m_pointerMap.end())) {\n      m_freeList.erase(node);\n      m_pointerMap.erase(node);\n    }\n  }\n\n  /* count.\n   * Return the number of active pointers (i.e, pointers that\n   * have been malloc but not freed).\n   */\n  size_t count() const { return (m_pointerMap.size() - m_freeList.size()); }\n\n private:\n  /* add_pointer_impl.\n   * Adds a pointer to the map and returns the virtual pointer id.\n   * BufferT is either a const buffer_t& or a buffer_t&&.\n   */\n  template <class BufferT>\n  virtual_pointer_t add_pointer_impl(BufferT b) {\n    virtual_pointer_t retVal = nullptr;\n    size_t bufSize = b.get_count();\n    pMapNode_t p{b, bufSize, false};\n    // If this is the first pointer:\n    if (m_pointerMap.empty()) {\n      virtual_pointer_t initialVal{m_baseAddress};\n      m_pointerMap.emplace(initialVal, p);\n      return initialVal;\n    }\n\n    auto lastElemIter = get_insertion_point(bufSize);\n    // We are recovering an existing free node\n    if (lastElemIter->second.m_free) {\n      lastElemIter->second.m_buffer = b;\n      lastElemIter->second.m_free = false;\n\n      // If the recovered node is bigger than the inserted one\n      // add a new free node with the remaining space\n      if (lastElemIter->second.m_size > bufSize) {\n        // create a new node with the remaining space\n        auto remainingSize = lastElemIter->second.m_size - bufSize;\n        pMapNode_t p2{b, remainingSize, true};\n\n        // update size of the current node\n        lastElemIter->second.m_size = bufSize;\n\n        // add the new free node\n        auto newFreePtr = lastElemIter->first + bufSize;\n        auto freeNode = m_pointerMap.emplace(newFreePtr, p2).first;\n        m_freeList.emplace(freeNode);\n      }\n\n      retVal = lastElemIter->first;\n    } else {\n      size_t lastSize = lastElemIter->second.m_size;\n      retVal = lastElemIter->first + lastSize;\n      m_pointerMap.emplace(retVal, p);\n    }\n    return retVal;\n  }\n\n  /**\n   * Compare two iterators to pointer map entries according to\n   * the size of the allocation on the device.\n   */\n  struct SortBySize {\n    bool operator()(typename pointerMap_t::iterator a,\n                    typename pointerMap_t::iterator b) const {\n      return ((a->first < b->first) && (a->second <= b->second)) ||\n             ((a->first < b->first) && (b->second <= a->second));\n    }\n  };\n\n  /* Maps the pointer addresses to buffer and size pairs.\n   */\n  pointerMap_t m_pointerMap;\n\n  /* List of free nodes available for re-using\n   */\n  std::set<typename pointerMap_t::iterator, SortBySize> m_freeList;\n\n  /* Base address used when issuing the first virtual pointer, allows users\n   * to specify alignment. Cannot be zero. */\n  std::intptr_t m_baseAddress;\n};\n\n/* remove_pointer.\n * Removes the given pointer from the map.\n * The pointer is allowed to be reused only if ReUse if true.\n */\ntemplate <>\ninline void PointerMapper::remove_pointer<false>(const virtual_pointer_t ptr) {\n  if (is_nullptr(ptr)) {\n    return;\n  }\n  m_pointerMap.erase(this->get_node(ptr));\n}\n\n/**\n * Malloc-like interface to the pointer-mapper.\n * Given a size, creates a byte-typed buffer and returns a\n * fake pointer to keep track of it.\n * \\param size Size in bytes of the desired allocation\n * \\throw cl::sycl::exception if error while creating the buffer\n */\ninline void *SYCLmalloc(size_t size, PointerMapper &pMap) {\n  if (size == 0) {\n    return nullptr;\n  }\n  // Create a generic buffer of the given size\n  using buffer_t = cl::sycl::buffer<buffer_data_type_t, 1>;\n  auto thePointer = pMap.add_pointer(buffer_t(cl::sycl::range<1>{size}));\n  // Store the buffer on the global list\n  return static_cast<void *>(thePointer);\n}\n\n/**\n * Free-like interface to the pointer mapper.\n * Given a fake-pointer created with the virtual-pointer malloc,\n * destroys the buffer and remove it from the list.\n * If ReUse is false, the pointer is not added to the freeList,\n * it should be false only for sub-buffers.\n */\ntemplate <bool ReUse = true, typename PointerMapper>\ninline void SYCLfree(void *ptr, PointerMapper &pMap) {\n  pMap.template remove_pointer<ReUse>(ptr);\n}\n\n/**\n * Clear all the memory allocated by SYCL.\n */\ntemplate <typename PointerMapper>\ninline void SYCLfreeAll(PointerMapper &pMap) {\n  pMap.clear();\n}\n\ntemplate <cl::sycl::access::mode AcMd, typename T>\nstruct RangeAccess {\n  static const auto global_access = cl::sycl::access::target::global_buffer;\n  static const auto is_place_holder = cl::sycl::access::placeholder::true_t;\n  typedef T scalar_t;\n  typedef scalar_t &ref_t;\n  typedef typename cl::sycl::global_ptr<scalar_t>::pointer_t ptr_t;\n\n  // the accessor type does not necessarily the same as T\n  typedef cl::sycl::accessor<scalar_t, 1, AcMd, global_access, is_place_holder>\n      accessor;\n\n  typedef RangeAccess<AcMd, T> self_t;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RangeAccess(accessor access,\n                                                    size_t offset,\n                                                    std::intptr_t virtual_ptr)\n      : access_(access), offset_(offset), virtual_ptr_(virtual_ptr) {}\n\n  RangeAccess(cl::sycl::buffer<scalar_t, 1> buff =\n                  cl::sycl::buffer<scalar_t, 1>(cl::sycl::range<1>(1)))\n      : access_{accessor{buff}}, offset_(0), virtual_ptr_(-1) {}\n\n  // This should be only used for null constructor on the host side\n  RangeAccess(std::nullptr_t) : RangeAccess() {}\n  // This template parameter must be removed and scalar_t should be replaced\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptr_t get_pointer() const {\n    return (access_.get_pointer().get() + offset_);\n  }\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator+=(Index offset) {\n    offset_ += (offset);\n    return *this;\n  }\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator+(Index offset) const {\n    return self_t(access_, offset_ + offset, virtual_ptr_);\n  }\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator-(Index offset) const {\n    return self_t(access_, offset_ - offset, virtual_ptr_);\n  }\n  template <typename Index>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator-=(Index offset) {\n    offset_ -= offset;\n    return *this;\n  }\n\n  // THIS IS FOR NULL COMPARISON ONLY\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator==(\n      const RangeAccess &lhs, std::nullptr_t) {\n    return ((lhs.virtual_ptr_ == -1));\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator!=(\n      const RangeAccess &lhs, std::nullptr_t i) {\n    return !(lhs == i);\n  }\n\n  // THIS IS FOR NULL COMPARISON ONLY\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator==(\n      std::nullptr_t, const RangeAccess &rhs) {\n    return ((rhs.virtual_ptr_ == -1));\n  }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator!=(\n      std::nullptr_t i, const RangeAccess &rhs) {\n    return !(i == rhs);\n  }\n  // Prefix operator (Increment and return value)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator++() {\n    offset_++;\n    return (*this);\n  }\n\n  // Postfix operator (Return value and increment)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator++(int i) {\n    EIGEN_UNUSED_VARIABLE(i);\n    self_t temp_iterator(*this);\n    offset_++;\n    return temp_iterator;\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t get_size() const {\n    return (access_.get_count() - offset_);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t get_offset() const {\n    return offset_;\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_offset(std::ptrdiff_t offset) {\n    offset_ = offset;\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator*() const {\n    return *get_pointer();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator*() {\n    return *get_pointer();\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptr_t operator->() = delete;\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator[](int x) {\n    return *(get_pointer() + x);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator[](int x) const {\n    return *(get_pointer() + x);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_t *get_virtual_pointer() const {\n    return reinterpret_cast<scalar_t *>(virtual_ptr_ +\n                                        (offset_ * sizeof(scalar_t)));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit operator bool() const {\n    return (virtual_ptr_ != -1);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator RangeAccess<AcMd, const T>() {\n    return RangeAccess<AcMd, const T>(access_, offset_, virtual_ptr_);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  operator RangeAccess<AcMd, const T>() const {\n    return RangeAccess<AcMd, const T>(access_, offset_, virtual_ptr_);\n  }\n  // binding placeholder accessors to a command group handler for SYCL\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(\n      cl::sycl::handler &cgh) const {\n    cgh.require(access_);\n  }\n\n private:\n  accessor access_;\n  size_t offset_;\n  std::intptr_t virtual_ptr_;  // the location of the buffer in the map\n};\n\ntemplate <cl::sycl::access::mode AcMd, typename T>\nstruct RangeAccess<AcMd, const T> : RangeAccess<AcMd, T> {\n  typedef RangeAccess<AcMd, T> Base;\n  using Base::Base;\n};\n\n}  // namespace internal\n}  // namespace TensorSycl\n}  // namespace Eigen\n\n#endif  // EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/SYCL/TypeCasting.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Mehdi Goli    Codeplay Software Ltd.\n// Ralph Potter  Codeplay Software Ltd.\n// Luke Iwanski  Codeplay Software Ltd.\n// Contact: <eigen@codeplay.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/*****************************************************************\n * TypeCasting.h\n *\n * \\brief:\n *  TypeCasting\n *\n *****************************************************************/\n\n#ifndef EIGEN_TYPE_CASTING_SYCL_H\n#define EIGEN_TYPE_CASTING_SYCL_H\n\nnamespace Eigen {\n\nnamespace internal {\n#ifdef SYCL_DEVICE_ONLY\ntemplate <>\nstruct type_casting_traits<float, int> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4\npcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {\n  return a\n      .template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();\n}\n\ntemplate <>\nstruct type_casting_traits<int, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };\n};\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4\npcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {\n  return a.template convert<cl::sycl::cl_float,\n                            cl::sycl::rounding_mode::automatic>();\n}\n\ntemplate <>\nstruct type_casting_traits<double, float> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };\n};\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4\npcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(\n    const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {\n  auto a1 = a.template convert<cl::sycl::cl_float,\n                               cl::sycl::rounding_mode::automatic>();\n  auto b1 = b.template convert<cl::sycl::cl_float,\n                               cl::sycl::rounding_mode::automatic>();\n  return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());\n}\n\ntemplate <>\nstruct type_casting_traits<float, double> {\n  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };\n};\n\ntemplate <>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2\npcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {\n  // Simply discard the second half of the input\n  return cl::sycl::cl_double2(a.x(), a.y());\n}\n\n#endif\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_TYPE_CASTING_SYCL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/ZVector/Complex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX32_ALTIVEC_H\n#define EIGEN_COMPLEX32_ALTIVEC_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\nstatic Packet4ui  p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; //vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);\n#endif\n\nstatic Packet2ul  p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };\nstatic Packet2ul  p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO,  (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };\n\nstruct Packet1cd\n{\n  EIGEN_STRONG_INLINE Packet1cd() {}\n  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}\n  Packet2d v;\n};\n\nstruct Packet2cf\n{\n  EIGEN_STRONG_INLINE Packet2cf() {}\n  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)\n  union {\n    Packet4f v;\n    Packet1cd cd[2];\n  };\n#else\n  Packet4f v;\n#endif\n};\n\ntemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits\n{\n  typedef Packet2cf type;\n  typedef Packet2cf half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 2,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasBlend  = 1,\n    HasSetLinear = 0\n  };\n};\n\n\ntemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits\n{\n  typedef Packet1cd type;\n  typedef Packet1cd half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 1,\n    HasHalfPacket = 0,\n\n    HasAdd    = 1,\n    HasSub    = 1,\n    HasMul    = 1,\n    HasDiv    = 1,\n    HasNegate = 1,\n    HasAbs    = 0,\n    HasAbs2   = 0,\n    HasMin    = 0,\n    HasMax    = 0,\n    HasSetLinear = 0\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float>  type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };\ntemplate<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };\n\n/* Forward declaration */\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);\n\n/* complex<double> first */\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)\n{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)\n{\n  return pload<Packet1cd>(from);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)\n{\n  pstore<std::complex<double> >(to, from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  Packet2d a_re, a_im, v1, v2;\n\n  // Permute and multiply the real parts of a and b\n  a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);\n  // Get the imaginary parts of a\n  a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);\n  // multiply a_re * b\n  v1 = vec_madd(a_re, b.v, p2d_ZERO);\n  // multiply a_im * b and get the conjugate result\n  v2 = vec_madd(a_im, b.v, p2d_ZERO);\n  v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);\n  v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);\n\n  return Packet1cd(v1 + v2);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pand    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd por     <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pxor    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>*     from) {  return pset1<Packet1cd>(*from); }\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {\n  Packet2d eq = vec_cmpeq (a.v, b.v);\n  Packet2d tmp = { eq[1], eq[0] };\n  return (Packet1cd)pand<Packet2d>(eq, tmp);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ZVECTOR_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)\n{\n  std::complex<double> EIGEN_ALIGN16 res;\n  pstore<std::complex<double> >(&res, a);\n\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)\n{\n  return pfirst(a);\n}\ntemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)\n{\n  return pfirst(a);\n}\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)\n\ntemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)\n{\n  // TODO optimize it for AltiVec\n  Packet1cd res = pmul(a,pconj(b));\n  Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);\n  return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));\n}\n\nEIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)\n{\n  return Packet1cd(preverse(Packet2d(x.v)));\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)\n{\n  Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);\n  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);\n  kernel.packet[0].v = tmp;\n}\n\n/* complex<float> follows */\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from)  { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)  { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }\ntemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *     to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *     to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)\n{\n  std::complex<float> EIGEN_ALIGN16 res[2];\n  pstore<std::complex<float> >(res, a);\n\n  return res[0];\n}\n\n\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)\n{\n  Packet2cf res;\n  res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));\n  res.cd[1] = res.cd[0];\n  return res;\n}\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)\n{\n  Packet2cf res;\n  if((std::ptrdiff_t(&from) % 16) == 0)\n    res.v = pload<Packet4f>((const float *)&from);\n  else\n    res.v = ploadu<Packet4f>((const float *)&from);\n  res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);\n  return res;\n}\n#endif\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)\n{\n  std::complex<float> EIGEN_ALIGN16 af[2];\n  af[0] = from[0*stride];\n  af[1] = from[1*stride];\n  return pload<Packet2cf>(af);\n}\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)\n{\n  std::complex<float> EIGEN_ALIGN16 af[2];\n  pstore<std::complex<float> >((std::complex<float> *) af, from);\n  to[0*stride] = af[0];\n  to[1*stride] = af[1];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>*      from) {  return pset1<Packet2cf>(*from); }\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *     addr) { EIGEN_ZVECTOR_PREFETCH(addr); }\n\n\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {\n  Packet4f eq = pcmp_eq<Packet4f> (a.v, b.v);\n  Packet2cf res;\n  Packet2d tmp1 = { eq.v4f[0][1], eq.v4f[0][0] };\n  Packet2d tmp2 = { eq.v4f[1][1], eq.v4f[1][0] };\n  res.v.v4f[0] = pand<Packet2d>(eq.v4f[0], tmp1);\n  res.v.v4f[1] = pand<Packet2d>(eq.v4f[1], tmp2);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)\n{\n  Packet2cf res;\n  res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;\n  res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  Packet2cf res;\n  res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;\n  res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)\n{\n  Packet2cf res;\n  res.cd[0] = a.cd[1];\n  res.cd[1] = a.cd[0];\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)\n{\n  std::complex<float> res;\n  Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);\n  vec_st2f(b.v, (float*)&res);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)\n{\n  std::complex<float> res;\n  Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);\n  vec_st2f(b.v, (float*)&res);\n  return res;\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  // TODO optimize it for AltiVec\n  Packet2cf res;\n  res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);\n  res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);\n  return res;\n}\n\nEIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)\n{\n  Packet2cf res;\n  res.cd[0] = pcplxflip(x.cd[0]);\n  res.cd[1] = pcplxflip(x.cd[1]);\n  return res;\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)\n{\n  Packet1cd tmp = kernel.packet[0].cd[1];\n  kernel.packet[0].cd[1] = kernel.packet[1].cd[0];\n  kernel.packet[1].cd[0] = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {\n  Packet2cf result;\n  const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };\n  result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);\n  return result;\n}\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {\n  Packet4f eq = vec_cmpeq (a.v, b.v);\n  Packet4f tmp = { eq[1], eq[0], eq[3], eq[2] };\n  return (Packet2cf)pand<Packet4f>(eq, tmp);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  Packet4f a_re, a_im, prod, prod_im;\n\n  // Permute and multiply the real parts of a and b\n  a_re = vec_perm(a.v, a.v, p16uc_PSET32_WODD);\n  \n  // Get the imaginary parts of a\n  a_im = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);\n\n  // multiply a_im * b and get the conjugate result\n  prod_im = a_im * b.v;\n  prod_im = pxor<Packet4f>(prod_im, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR));\n  // permute back to a proper order\n  prod_im = vec_perm(prod_im, prod_im, p16uc_COMPLEX32_REV);\n\n  // multiply a_re * b, add prod_im\n  prod = pmadd<Packet4f>(a_re, b.v, prod_im);\n \n  return Packet2cf(prod);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)\n{\n  Packet4f rev_a;\n  rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);\n  return Packet2cf(rev_a);\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)\n{\n  Packet4f b;\n  b = vec_sld(a.v, a.v, 8);\n  b = padd<Packet4f>(a.v, b);\n  return pfirst<Packet2cf>(Packet2cf(b));\n}\n\ntemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)\n{\n  Packet4f b;\n  Packet2cf prod;\n  b = vec_sld(a.v, a.v, 8);\n  prod = pmul<Packet2cf>(a, Packet2cf(b));\n\n  return pfirst<Packet2cf>(prod);\n}\n\nEIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)\n{\n  // TODO optimize it for AltiVec\n  Packet2cf res = pmul(a, pconj(b));\n  Packet4f s = pmul<Packet4f>(b.v, b.v);\n  return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)\n{\n  return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));\n}\n\nEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)\n{\n  Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);\n  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);\n  kernel.packet[0].v = tmp;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {\n  Packet2cf result;\n  result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));\n  return result;\n}\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX32_ALTIVEC_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/ZVector/MathFunctions.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007 Julien Pommier\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* The sin, cos, exp, and log functions of this file come from\n * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/\n */\n\n#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H\n#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\nstatic _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);\nstatic _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);\nstatic _EIGEN_DECLARE_CONST_Packet4i(23, 23);\n\nstatic _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);\n\n/* the smallest non denormalized float number */\nstatic _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);\nstatic _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f\nstatic _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);\n  \n/* natural logarithm computed for 4 simultaneous float\n  return NaN for x <= 0\n*/\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);\n\nstatic _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);\n\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);\n\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);\nstatic _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);\n#endif\n\nstatic _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);\nstatic _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);\nstatic _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);\n\nstatic _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);\nstatic _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);\n\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);\n\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);\n\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);\n\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);\nstatic _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d pexp<Packet2d>(const Packet2d& _x)\n{\n  Packet2d x = _x;\n\n  Packet2d tmp, fx;\n  Packet2l emm0;\n\n  // clamp x\n  x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);\n  /* express exp(x) as exp(g + n*log(2)) */\n  fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);\n\n  fx = vec_floor(fx);\n\n  tmp = pmul(fx, p2d_cephes_exp_C1);\n  Packet2d z = pmul(fx, p2d_cephes_exp_C2);\n  x = psub(x, tmp);\n  x = psub(x, z);\n\n  Packet2d x2 = pmul(x,x);\n\n  Packet2d px = p2d_cephes_exp_p0;\n  px = pmadd(px, x2, p2d_cephes_exp_p1);\n  px = pmadd(px, x2, p2d_cephes_exp_p2);\n  px = pmul (px, x);\n\n  Packet2d qx = p2d_cephes_exp_q0;\n  qx = pmadd(qx, x2, p2d_cephes_exp_q1);\n  qx = pmadd(qx, x2, p2d_cephes_exp_q2);\n  qx = pmadd(qx, x2, p2d_cephes_exp_q3);\n\n  x = pdiv(px,psub(qx,px));\n  x = pmadd(p2d_2,x,p2d_1);\n\n  // build 2^n\n  emm0 = vec_ctsl(fx, 0);\n\n  static const Packet2l p2l_1023 = { 1023, 1023 };\n  static const Packet2ul p2ul_52 = { 52, 52 };\n\n  emm0 = emm0 + p2l_1023;\n  emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);\n\n  // Altivec's max & min operators just drop silent NaNs. Check NaNs in \n  // inputs and return them unmodified.\n  Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));\n  return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),\n                 isnumber_mask);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f pexp<Packet4f>(const Packet4f& _x)\n{\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\n  Packet4f x = _x;\n\n  Packet4f tmp, fx;\n  Packet4i emm0;\n\n  // clamp x\n  x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);\n\n  // express exp(x) as exp(g + n*log(2))\n  fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);\n\n  fx = pfloor(fx);\n\n  tmp = pmul(fx, p4f_cephes_exp_C1);\n  Packet4f z = pmul(fx, p4f_cephes_exp_C2);\n  x = psub(x, tmp);\n  x = psub(x, z);\n\n  z = pmul(x,x);\n\n  Packet4f y = p4f_cephes_exp_p0;\n  y = pmadd(y, x, p4f_cephes_exp_p1);\n  y = pmadd(y, x, p4f_cephes_exp_p2);\n  y = pmadd(y, x, p4f_cephes_exp_p3);\n  y = pmadd(y, x, p4f_cephes_exp_p4);\n  y = pmadd(y, x, p4f_cephes_exp_p5);\n  y = pmadd(y, z, x);\n  y = padd(y, p4f_1);\n\n  // build 2^n\n  emm0 = (Packet4i){ (int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3] };\n  emm0 = emm0 + p4i_0x7f;\n  emm0 = emm0 << reinterpret_cast<Packet4i>(p4i_23);\n\n  return pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x);\n#else\n  Packet4f res;\n  res.v4f[0] = pexp<Packet2d>(_x.v4f[0]);\n  res.v4f[1] = pexp<Packet2d>(_x.v4f[1]);\n  return res;\n#endif\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d psqrt<Packet2d>(const Packet2d& x)\n{\n  return vec_sqrt(x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f psqrt<Packet4f>(const Packet4f& x)\n{\n  Packet4f res;\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\n  res = vec_sqrt(x);\n#else\n  res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);\n  res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);\n#endif\n  return res;\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket2d prsqrt<Packet2d>(const Packet2d& x) {\n  return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);\n}\n\ntemplate<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED\nPacket4f prsqrt<Packet4f>(const Packet4f& x) {\n  Packet4f res;\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\n  res = pset1<Packet4f>(1.0) / psqrt<Packet4f>(x);\n#else\n  res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);\n  res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);\n#endif\n  return res;\n}\n\n// Hyperbolic Tangent function.\ntemplate <>\nEIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f\nptanh<Packet4f>(const Packet4f& x) {\n  return internal::generic_fast_tanh_float(x);\n}\n\n}  // end namespace internal\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/arch/ZVector/PacketMath.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PACKET_MATH_ZVECTOR_H\n#define EIGEN_PACKET_MATH_ZVECTOR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD\n#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16\n#endif\n\n#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n#endif\n\n#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS\n#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS  32\n#endif\n\ntypedef __vector int                 Packet4i;\ntypedef __vector unsigned int        Packet4ui;\ntypedef __vector __bool int          Packet4bi;\ntypedef __vector short int           Packet8i;\ntypedef __vector unsigned char       Packet16uc;\ntypedef __vector double              Packet2d;\ntypedef __vector unsigned long long  Packet2ul;\ntypedef __vector long long           Packet2l;\n\n// Z14 has builtin support for float vectors\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\ntypedef __vector float               Packet4f;\n#else\ntypedef struct {\n\tPacket2d  v4f[2];\n} Packet4f;\n#endif\n\ntypedef union {\n  numext::int32_t   i[4];\n  numext::uint32_t ui[4];\n  numext::int64_t   l[2];\n  numext::uint64_t ul[2];\n  double    d[2];\n  float     f[4];\n  Packet4i  v4i;\n  Packet4ui v4ui;\n  Packet2l  v2l;\n  Packet2ul v2ul;\n  Packet2d  v2d;\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\n  Packet4f  v4f;\n#endif\n} Packet;\n\n// We don't want to write the same code all the time, but we need to reuse the constants\n// and it doesn't really work to declare them global, so we define macros instead\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \\\n  Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \\\n  Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))\n\n#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \\\n  Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))\n\n#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \\\n  Packet4i p4i_##NAME = pset1<Packet4i>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \\\n  Packet2d p2d_##NAME = pset1<Packet2d>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \\\n  Packet2l p2l_##NAME = pset1<Packet2l>(X)\n\n// These constants are endian-agnostic\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}\n\nstatic _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);\nstatic _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);\nstatic _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);\n\nstatic Packet2d p2d_ONE = { 1.0, 1.0 };\nstatic Packet2d p2d_ZERO_ = { numext::bit_cast<double>0x8000000000000000ull),\n                              numext::bit_cast<double>0x8000000000000000ull) };\n\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\n#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \\\n  Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))\n\n#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \\\n  Packet4f p4f_##NAME = pset1<Packet4f>(X)\n\n#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \\\n  const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))\n\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}\nstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}\nstatic Packet4f p4f_MZERO = { 0x80000000, 0x80000000, 0x80000000, 0x80000000};\n#endif\n\nstatic Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };\nstatic Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };\nstatic Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));\n\nstatic Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };\nstatic Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };\n\n// Mask alignment\n#define _EIGEN_MASK_ALIGNMENT\t0xfffffffffffffff0\n\n#define _EIGEN_ALIGNED_PTR(x)\t((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)\n\n// Handle endianness properly while loading constants\n// Define global static constants:\n\nstatic Packet16uc p16uc_FORWARD =   { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };\nstatic Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };\nstatic Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };\n\nstatic Packet16uc p16uc_PSET32_WODD   = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };\nstatic Packet16uc p16uc_PSET32_WEVEN  = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };\n/*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};\n\nstatic Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/\nstatic Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };\n/*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16);                                         //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};\nstatic Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16);                                         //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/\nstatic Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};\nstatic Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};\n\nstatic Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);                                         //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };\n\nstatic Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };\n\n\n#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC\n  #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);\n#else\n  #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( \"   pfd [%[addr]]\\n\" :: [addr] \"r\" (ADDR) : \"cc\" );\n#endif\n\ntemplate<> struct packet_traits<int>    : default_packet_traits\n{\n  typedef Packet4i type;\n  typedef Packet4i half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate <>\nstruct packet_traits<float> : default_packet_traits {\n  typedef Packet4f type;\n  typedef Packet4f half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size = 4,\n    HasHalfPacket = 0,\n\n    HasAdd = 1,\n    HasSub = 1,\n    HasMul = 1,\n    HasDiv = 1,\n    HasMin = 1,\n    HasMax = 1,\n    HasAbs = 1,\n    HasSin = 0,\n    HasCos = 0,\n    HasLog = 0,\n    HasExp = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasTanh = 1,\n    HasErf = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasNegate = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate<> struct packet_traits<double> : default_packet_traits\n{\n  typedef Packet2d type;\n  typedef Packet2d half;\n  enum {\n    Vectorizable = 1,\n    AlignedOnScalar = 1,\n    size=2,\n    HasHalfPacket = 1,\n\n    HasAdd  = 1,\n    HasSub  = 1,\n    HasMul  = 1,\n    HasDiv  = 1,\n    HasMin  = 1,\n    HasMax  = 1,\n    HasAbs  = 1,\n    HasSin  = 0,\n    HasCos  = 0,\n    HasLog  = 0,\n    HasExp  = 1,\n    HasSqrt = 1,\n    HasRsqrt = 1,\n    HasRound = 1,\n    HasFloor = 1,\n    HasCeil = 1,\n    HasNegate = 1,\n    HasBlend = 1\n  };\n};\n\ntemplate<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4i half; };\ntemplate<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4f half; };\ntemplate<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };\n\n/* Forward declaration */\nEIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);\n \ninline std::ostream & operator <<(std::ostream & s, const Packet4i & v)\n{\n  Packet vt;\n  vt.v4i = v;\n  s << vt.i[0] << \", \" << vt.i[1] << \", \" << vt.i[2] << \", \" << vt.i[3];\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)\n{\n  Packet vt;\n  vt.v4ui = v;\n  s << vt.ui[0] << \", \" << vt.ui[1] << \", \" << vt.ui[2] << \", \" << vt.ui[3];\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet2l & v)\n{\n  Packet vt;\n  vt.v2l = v;\n  s << vt.l[0] << \", \" << vt.l[1];\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)\n{\n  Packet vt;\n  vt.v2ul = v;\n  s << vt.ul[0] << \", \" << vt.ul[1] ;\n  return s;\n}\n\ninline std::ostream & operator <<(std::ostream & s, const Packet2d & v)\n{\n  Packet vt;\n  vt.v2d = v;\n  s << vt.d[0] << \", \" << vt.d[1];\n  return s;\n}\n\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)\ninline std::ostream & operator <<(std::ostream & s, const Packet4f & v)\n{\n  Packet vt;\n  vt.v4f = v;\n  s << vt.f[0] << \", \" << vt.f[1] << \", \" << vt.f[2] << \", \" << vt.f[3];\n  return s;\n}\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_LOAD\n  Packet *vfrom;\n  vfrom = (Packet *) from;\n  return vfrom->v4i;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_LOAD\n  Packet *vfrom;\n  vfrom = (Packet *) from;\n  return vfrom->v2d;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_STORE\n  Packet *vto;\n  vto = (Packet *) to;\n  vto->v4i = from;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_STORE\n  Packet *vto;\n  vto = (Packet *) to;\n  vto->v2d = from;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)\n{\n  return vec_splats(from);\n}\ntemplate<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {\n  return vec_splats(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet4i>(const int *a,\n                      Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)\n{\n  a3 = pload<Packet4i>(a);\n  a0 = vec_splat(a3, 0);\n  a1 = vec_splat(a3, 1);\n  a2 = vec_splat(a3, 2);\n  a3 = vec_splat(a3, 3);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet2d>(const double *a,\n                      Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)\n{\n  a1 = pload<Packet2d>(a);\n  a0 = vec_splat(a1, 0);\n  a1 = vec_splat(a1, 1);\n  a3 = pload<Packet2d>(a+2);\n  a2 = vec_splat(a3, 0);\n  a3 = vec_splat(a3, 1);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)\n{\n  int EIGEN_ALIGN16 ai[4];\n  ai[0] = from[0*stride];\n  ai[1] = from[1*stride];\n  ai[2] = from[2*stride];\n  ai[3] = from[3*stride];\n return pload<Packet4i>(ai);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)\n{\n  double EIGEN_ALIGN16 af[2];\n  af[0] = from[0*stride];\n  af[1] = from[1*stride];\n return pload<Packet2d>(af);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)\n{\n  int EIGEN_ALIGN16 ai[4];\n  pstore<int>((int *)ai, from);\n  to[0*stride] = ai[0];\n  to[1*stride] = ai[1];\n  to[2*stride] = ai[2];\n  to[3*stride] = ai[3];\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)\n{\n  double EIGEN_ALIGN16 af[2];\n  pstore<double>(af, from);\n  to[0*stride] = af[0];\n  to[1*stride] = af[1];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)    { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const  Packet2d& a) { return vec_ceil(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int*       from) { return pload<Packet4i>(from); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double*    from) { return pload<Packet2d>(from); }\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)\n{\n  Packet4i p = pload<Packet4i>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE32_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*   from)\n{\n  Packet2d p = pload<Packet2d>(from);\n  return vec_perm(p, p, p16uc_PSET64_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<int>(int*        to, const Packet4i& from) { pstore<int>(to, from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d& from) { pstore<double>(to, from); }\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { EIGEN_ZVECTOR_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }\n\ntemplate<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int    EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }\ntemplate<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)\n{\n  return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)\n{\n  return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }\n\ntemplate<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)\n{\n  Packet4i b, sum;\n  b   = vec_sld(a, a, 8);\n  sum = padd<Packet4i>(a, b);\n  b   = vec_sld(sum, sum, 4);\n  sum = padd<Packet4i>(sum, b);\n  return pfirst(sum);\n}\n\ntemplate<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)\n{\n  Packet2d b, sum;\n  b   = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));\n  sum = padd<Packet2d>(a, b);\n  return pfirst(sum);\n}\n\n// Other reduction functions:\n// mul\ntemplate<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)\n{\n  EIGEN_ALIGN16 int aux[4];\n  pstore(aux, a);\n  return aux[0] * aux[1] * aux[2] * aux[3];\n}\n\ntemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)\n{\n  return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));\n}\n\n// min\ntemplate<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)\n{\n  Packet4i b, res;\n  b   = pmin<Packet4i>(a, vec_sld(a, a, 8));\n  res = pmin<Packet4i>(b, vec_sld(b, b, 4));\n  return pfirst(res);\n}\n\ntemplate<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)\n{\n  return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));\n}\n\n// max\ntemplate<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)\n{\n  Packet4i b, res;\n  b = pmax<Packet4i>(a, vec_sld(a, a, 8));\n  res = pmax<Packet4i>(b, vec_sld(b, b, 4));\n  return pfirst(res);\n}\n\n// max\ntemplate<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)\n{\n  return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4i,4>& kernel) {\n  Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet2d,2>& kernel) {\n  Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);\n  Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);\n  kernel.packet[0] = t0;\n  kernel.packet[1] = t1;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {\n  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };\n  Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\n\ntemplate<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {\n  Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };\n  Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\n/* z13 has no vector float support so we emulate that with double\n   z14 has proper vector float support.\n*/\n#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)\n/* Helper function to simulate a vec_splat_packet4f\n */\ntemplate<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f&   from)\n{\n  Packet4f splat;\n  switch (element) {\n  case 0:\n    splat.v4f[0] = vec_splat(from.v4f[0], 0);\n    splat.v4f[1] = splat.v4f[0];\n    break;\n  case 1:\n    splat.v4f[0] = vec_splat(from.v4f[0], 1);\n    splat.v4f[1] = splat.v4f[0];\n    break;\n  case 2:\n    splat.v4f[0] = vec_splat(from.v4f[1], 0);\n    splat.v4f[1] = splat.v4f[0];\n    break;\n  case 3:\n    splat.v4f[0] = vec_splat(from.v4f[1], 1);\n    splat.v4f[1] = splat.v4f[0];\n    break;\n  }\n  return splat;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_LOAD\n  Packet4f vfrom;\n  vfrom.v4f[0] = vec_ld2f(&from[0]);\n  vfrom.v4f[1] = vec_ld2f(&from[2]);\n  return vfrom;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_STORE\n  vec_st2f(from.v4f[0], &to[0]);\n  vec_st2f(from.v4f[1], &to[2]);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&    from)\n{\n  Packet4f to;\n  to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));\n  to.v4f[1] = to.v4f[0];\n  return to;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet4f>(const float *a,\n                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)\n{\n  a3 = pload<Packet4f>(a);\n  a0 = vec_splat_packet4f<0>(a3);\n  a1 = vec_splat_packet4f<1>(a3);\n  a2 = vec_splat_packet4f<2>(a3);\n  a3 = vec_splat_packet4f<3>(a3);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)\n{\n  float EIGEN_ALIGN16 ai[4];\n  ai[0] = from[0*stride];\n  ai[1] = from[1*stride];\n  ai[2] = from[2*stride];\n  ai[3] = from[3*stride];\n return pload<Packet4f>(ai);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)\n{\n  float EIGEN_ALIGN16 ai[4];\n  pstore<float>((float *)ai, from);\n  to[0*stride] = ai[0];\n  to[1*stride] = ai[1];\n  to[2*stride] = ai[2];\n  to[3*stride] = ai[3];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f c;\n  c.v4f[0] = a.v4f[0] + b.v4f[0];\n  c.v4f[1] = a.v4f[1] + b.v4f[1];\n  return c;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f c;\n  c.v4f[0] = a.v4f[0] - b.v4f[0];\n  c.v4f[1] = a.v4f[1] - b.v4f[1];\n  return c;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f c;\n  c.v4f[0] = a.v4f[0] * b.v4f[0];\n  c.v4f[1] = a.v4f[1] * b.v4f[1];\n  return c;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f c;\n  c.v4f[0] = a.v4f[0] / b.v4f[0];\n  c.v4f[1] = a.v4f[1] / b.v4f[1];\n  return c;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)\n{\n  Packet4f c;\n  c.v4f[0] = -a.v4f[0];\n  c.v4f[1] = -a.v4f[1];\n  return c;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)\n{\n  Packet4f res;\n  res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);\n  res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = por(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = por(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pxor(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pxor(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)\n{\n  Packet4f res;\n  res.v4f[0] = vec_round(a.v4f[0]);\n  res.v4f[1] = vec_round(a.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const  Packet4f& a)\n{\n  Packet4f res;\n  res.v4f[0] = vec_ceil(a.v4f[0]);\n  res.v4f[1] = vec_ceil(a.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)\n{\n  Packet4f res;\n  res.v4f[0] = vec_floor(a.v4f[0]);\n  res.v4f[1] = vec_floor(a.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*    from)\n{\n  Packet4f p = pload<Packet4f>(from);\n  p.v4f[1] = vec_splat(p.v4f[0], 1);\n  p.v4f[0] = vec_splat(p.v4f[0], 0);\n  return p;\n}\n\ntemplate<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float  EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)\n{\n  Packet4f rev;\n  rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);\n  rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);\n  return rev;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)\n{\n  Packet4f res;\n  res.v4f[0] = pabs(a.v4f[0]);\n  res.v4f[1] = pabs(a.v4f[1]);\n  return res;\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)\n{\n  Packet2d sum;\n  sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);\n  double first = predux<Packet2d>(sum);\n  return static_cast<float>(first);\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)\n{\n  // Return predux_mul<Packet2d> of the subvectors product\n  return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)\n{\n  Packet2d b, res;\n  b   = pmin<Packet2d>(a.v4f[0], a.v4f[1]);\n  res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));\n  return static_cast<float>(pfirst(res));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)\n{\n  Packet2d b, res;\n  b   = pmax<Packet2d>(a.v4f[0], a.v4f[1]);\n  res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));\n  return static_cast<float>(pfirst(res));\n}\n\n/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one\n */\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4f,4>& kernel) {\n  PacketBlock<Packet2d,2> t0,t1,t2,t3;\n  // copy top-left 2x2 Packet2d block\n  t0.packet[0] = kernel.packet[0].v4f[0];\n  t0.packet[1] = kernel.packet[1].v4f[0];\n\n  // copy top-right 2x2 Packet2d block\n  t1.packet[0] = kernel.packet[0].v4f[1];\n  t1.packet[1] = kernel.packet[1].v4f[1];\n\n  // copy bottom-left 2x2 Packet2d block\n  t2.packet[0] = kernel.packet[2].v4f[0];\n  t2.packet[1] = kernel.packet[3].v4f[0];\n\n  // copy bottom-right 2x2 Packet2d block\n  t3.packet[0] = kernel.packet[2].v4f[1];\n  t3.packet[1] = kernel.packet[3].v4f[1];\n\n  // Transpose all 2x2 blocks\n  ptranspose(t0);\n  ptranspose(t1);\n  ptranspose(t2);\n  ptranspose(t3);\n\n  // Copy back transposed blocks, but exchange t1 and t2 due to transposition\n  kernel.packet[0].v4f[0] = t0.packet[0];\n  kernel.packet[0].v4f[1] = t2.packet[0];\n  kernel.packet[1].v4f[0] = t0.packet[1];\n  kernel.packet[1].v4f[1] = t2.packet[1];\n  kernel.packet[2].v4f[0] = t1.packet[0];\n  kernel.packet[2].v4f[1] = t3.packet[0];\n  kernel.packet[3].v4f[0] = t1.packet[1];\n  kernel.packet[3].v4f[1] = t3.packet[1];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {\n  Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };\n  Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };\n  Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));\n  Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));\n  Packet4f result;\n  result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);\n  result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);\n  return result;\n}\n\ntemplate<> Packet4f EIGEN_STRONG_INLINE pcmp_le<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pcmp_le(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pcmp_le(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> Packet4f EIGEN_STRONG_INLINE pcmp_lt<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pcmp_lt(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pcmp_lt(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\ntemplate<> Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f& b)\n{\n  Packet4f res;\n  res.v4f[0] = pcmp_eq(a.v4f[0], b.v4f[0]);\n  res.v4f[1] = pcmp_eq(a.v4f[1], b.v4f[1]);\n  return res;\n}\n\n#else\ntemplate<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_LOAD\n  Packet *vfrom;\n  vfrom = (Packet *) from;\n  return vfrom->v4f;\n}\n\ntemplate<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)\n{\n  // FIXME: No intrinsic yet\n  EIGEN_DEBUG_ALIGNED_STORE\n  Packet *vto;\n  vto = (Packet *) to;\n  vto->v4f = from;\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)\n{\n  return vec_splats(from);\n}\n\ntemplate<> EIGEN_STRONG_INLINE void\npbroadcast4<Packet4f>(const float *a,\n                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)\n{\n  a3 = pload<Packet4f>(a);\n  a0 = vec_splat(a3, 0);\n  a1 = vec_splat(a3, 1);\n  a2 = vec_splat(a3, 2);\n  a3 = vec_splat(a3, 3);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)\n{\n  float EIGEN_ALIGN16 af[4];\n  af[0] = from[0*stride];\n  af[1] = from[1*stride];\n  af[2] = from[2*stride];\n  af[3] = from[3*stride];\n return pload<Packet4f>(af);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)\n{\n  float EIGEN_ALIGN16 af[4];\n  pstore<float>((float*)af, from);\n  to[0*stride] = af[0];\n  to[1*stride] = af[1];\n  to[2*stride] = af[2];\n  to[3*stride] = af[3];\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a + b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a - b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a * b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a / b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pnegate<Packet4f>(const Packet4f& a) { return (-a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pconj<Packet4f>  (const Packet4f& a) { return a; }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmadd<Packet4f>  (const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>   (const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>   (const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>   (const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>    (const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>   (const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f> (const Packet4f& a) { return vec_round(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>  (const Packet4f& a) { return vec_ceil(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f> (const Packet4f& a) { return vec_floor(a); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>   (const Packet4f& a) { return vec_abs(a); }\ntemplate<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)\n{\n  Packet4f p = pload<Packet4f>(from);\n  return vec_perm(p, p, p16uc_DUPLICATE32_HI);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)\n{\n  return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));\n}\n\ntemplate<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)\n{\n  Packet4f b, sum;\n  b   = vec_sld(a, a, 8);\n  sum = padd<Packet4f>(a, b);\n  b   = vec_sld(sum, sum, 4);\n  sum = padd<Packet4f>(sum, b);\n  return pfirst(sum);\n}\n\n// Other reduction functions:\n// mul\ntemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)\n{\n  Packet4f prod;\n  prod = pmul(a, vec_sld(a, a, 8));\n  return pfirst(pmul(prod, vec_sld(prod, prod, 4)));\n}\n\n// min\ntemplate<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)\n{\n  Packet4f b, res;\n  b   = pmin<Packet4f>(a, vec_sld(a, a, 8));\n  res = pmin<Packet4f>(b, vec_sld(b, b, 4));\n  return pfirst(res);\n}\n\n// max\ntemplate<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)\n{\n  Packet4f b, res;\n  b = pmax<Packet4f>(a, vec_sld(a, a, 8));\n  res = pmax<Packet4f>(b, vec_sld(b, b, 4));\n  return pfirst(res);\n}\n\nEIGEN_DEVICE_FUNC inline void\nptranspose(PacketBlock<Packet4f,4>& kernel) {\n  Packet4f t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);\n  Packet4f t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);\n  Packet4f t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);\n  Packet4f t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);\n  kernel.packet[0] = vec_mergeh(t0, t2);\n  kernel.packet[1] = vec_mergel(t0, t2);\n  kernel.packet[2] = vec_mergeh(t1, t3);\n  kernel.packet[3] = vec_mergel(t1, t3);\n}\n\ntemplate<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {\n  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };\n  Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));\n  return vec_sel(elsePacket, thenPacket, mask);\n}\n\n#endif\n\ntemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { EIGEN_ZVECTOR_PREFETCH(addr); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f> (const float* from) { return pload<Packet4f>(from); }\ntemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }\ntemplate<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>  (const float& a)  { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PACKET_MATH_ZVECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/functors/AssignmentFunctors.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H\n#define EIGEN_ASSIGNMENT_FUNCTORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n  \n/** \\internal\n  * \\brief Template functor for scalar/packet assignment\n  *\n  */\ntemplate<typename DstScalar,typename SrcScalar> struct assign_op {\n\n  EIGEN_EMPTY_STRUCT_CTOR(assign_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }\n  \n  template<int Alignment, typename Packet>\n  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const\n  { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }\n};\n\n// Empty overload for void type (used by PermutationMatrix)\ntemplate<typename DstScalar> struct assign_op<DstScalar,void> {};\n\ntemplate<typename DstScalar,typename SrcScalar>\nstruct functor_traits<assign_op<DstScalar,SrcScalar> > {\n  enum {\n    Cost = NumTraits<DstScalar>::ReadCost,\n    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable\n  };\n};\n\n/** \\internal\n  * \\brief Template functor for scalar/packet assignment with addition\n  *\n  */\ntemplate<typename DstScalar,typename SrcScalar> struct add_assign_op {\n\n  EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }\n  \n  template<int Alignment, typename Packet>\n  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const\n  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }\n};\ntemplate<typename DstScalar,typename SrcScalar>\nstruct functor_traits<add_assign_op<DstScalar,SrcScalar> > {\n  enum {\n    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,\n    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd\n  };\n};\n\n/** \\internal\n  * \\brief Template functor for scalar/packet assignment with subtraction\n  *\n  */\ntemplate<typename DstScalar,typename SrcScalar> struct sub_assign_op {\n\n  EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }\n  \n  template<int Alignment, typename Packet>\n  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const\n  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }\n};\ntemplate<typename DstScalar,typename SrcScalar>\nstruct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {\n  enum {\n    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,\n    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub\n  };\n};\n\n/** \\internal\n  * \\brief Template functor for scalar/packet assignment with multiplication\n  *\n  */\ntemplate<typename DstScalar, typename SrcScalar=DstScalar>\nstruct mul_assign_op {\n\n  EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }\n  \n  template<int Alignment, typename Packet>\n  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const\n  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }\n};\ntemplate<typename DstScalar, typename SrcScalar>\nstruct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {\n  enum {\n    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,\n    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul\n  };\n};\n\n/** \\internal\n  * \\brief Template functor for scalar/packet assignment with diviving\n  *\n  */\ntemplate<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {\n\n  EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }\n  \n  template<int Alignment, typename Packet>\n  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const\n  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }\n};\ntemplate<typename DstScalar, typename SrcScalar>\nstruct functor_traits<div_assign_op<DstScalar,SrcScalar> > {\n  enum {\n    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,\n    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv\n  };\n};\n\n/** \\internal\n  * \\brief Template functor for scalar/packet assignment with swapping\n  *\n  * It works as follow. For a non-vectorized evaluation loop, we have:\n  *   for(i) func(A.coeffRef(i), B.coeff(i));\n  * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.\n  * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable\n  * B.coeff already returns a const reference to the underlying scalar value.\n  * \n  * The case of a vectorized loop is more tricky:\n  *   for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));\n  * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,\n  * the actual alignment and Packet type.\n  *\n  */\ntemplate<typename Scalar> struct swap_assign_op {\n\n  EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const\n  {\n#ifdef EIGEN_GPUCC\n    // FIXME is there some kind of cuda::swap?\n    Scalar t=b; const_cast<Scalar&>(b)=a; a=t;\n#else\n    using std::swap;\n    swap(a,const_cast<Scalar&>(b));\n#endif\n  }\n};\ntemplate<typename Scalar>\nstruct functor_traits<swap_assign_op<Scalar> > {\n  enum {\n    Cost = 3 * NumTraits<Scalar>::ReadCost,\n    PacketAccess = \n    #if defined(EIGEN_VECTORIZE_AVX) && EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<800 || defined(__apple_build_version__))\n    // This is a partial workaround for a bug in clang generating bad code\n    // when mixing 256/512 bits loads and 128 bits moves.\n    // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1684\n    //     https://bugs.llvm.org/show_bug.cgi?id=40815\n    0\n    #else\n    packet_traits<Scalar>::Vectorizable\n    #endif\n  };\n};\n\n} // namespace internal\n\n} // namespace Eigen\n\n#endif // EIGEN_ASSIGNMENT_FUNCTORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/functors/BinaryFunctors.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BINARY_FUNCTORS_H\n#define EIGEN_BINARY_FUNCTORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n//---------- associative binary functors ----------\n\ntemplate<typename Arg1, typename Arg2>\nstruct binary_op_base\n{\n  typedef Arg1 first_argument_type;\n  typedef Arg2 second_argument_type;\n};\n\n/** \\internal\n  * \\brief Template functor to compute the sum of two scalars\n  *\n  * \\sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()\n  */\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;\n#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)\n#else\n  scalar_sum_op() {\n    EIGEN_SCALAR_BINARY_OP_PLUGIN\n  }\n#endif\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::padd(a,b); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const\n  { return internal::predux(a); }\n};\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {\n  enum {\n    Cost = (int(NumTraits<LhsScalar>::AddCost) + int(NumTraits<RhsScalar>::AddCost)) / 2, // rough estimate!\n    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd\n    // TODO vectorize mixed sum\n  };\n};\n\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool scalar_sum_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a || b; }\n\n\n/** \\internal\n  * \\brief Template functor to compute the product of two scalars\n  *\n  * \\sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()\n  */\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct scalar_product_op  : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;\n#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)\n#else\n  scalar_product_op() {\n    EIGEN_SCALAR_BINARY_OP_PLUGIN\n  }\n#endif\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::pmul(a,b); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const\n  { return internal::predux_mul(a); }\n};\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {\n  enum {\n    Cost = (int(NumTraits<LhsScalar>::MulCost) + int(NumTraits<RhsScalar>::MulCost))/2, // rough estimate!\n    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul\n    // TODO vectorize mixed product\n  };\n};\n\ntemplate<>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool scalar_product_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a && b; }\n\n\n/** \\internal\n  * \\brief Template functor to compute the conjugate product of two scalars\n  *\n  * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)\n  */\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct scalar_conj_product_op  : binary_op_base<LhsScalar,RhsScalar>\n{\n\n  enum {\n    Conj = NumTraits<LhsScalar>::IsComplex\n  };\n  \n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;\n  \n  EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const\n  { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }\n  \n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const\n  { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }\n};\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {\n  enum {\n    Cost = NumTraits<LhsScalar>::MulCost,\n    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the min of two scalars\n  *\n  * \\sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()\n  */\ntemplate<typename LhsScalar,typename RhsScalar, int NaNPropagation>\nstruct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {\n    return internal::pmin<NaNPropagation>(a, b);\n  }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const\n  {\n    return internal::pmin<NaNPropagation>(a,b);\n  }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const\n  {\n    return internal::predux_min<NaNPropagation>(a);\n  }\n};\n\ntemplate<typename LhsScalar,typename RhsScalar, int NaNPropagation>\nstruct functor_traits<scalar_min_op<LhsScalar,RhsScalar, NaNPropagation> > {\n  enum {\n    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,\n    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the max of two scalars\n  *\n  * \\sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()\n  */\ntemplate<typename LhsScalar,typename RhsScalar, int NaNPropagation>\nstruct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {\n    return internal::pmax<NaNPropagation>(a,b);\n  }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const\n  {\n    return internal::pmax<NaNPropagation>(a,b);\n  }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const\n  {\n    return internal::predux_max<NaNPropagation>(a);\n  }\n};\n\ntemplate<typename LhsScalar,typename RhsScalar, int NaNPropagation>\nstruct functor_traits<scalar_max_op<LhsScalar,RhsScalar, NaNPropagation> > {\n  enum {\n    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,\n    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax\n  };\n};\n\n/** \\internal\n  * \\brief Template functors for comparison of two scalars\n  * \\todo Implement packet-comparisons\n  */\ntemplate<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;\n\ntemplate<typename LhsScalar, typename RhsScalar, ComparisonName cmp>\nstruct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {\n  enum {\n    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,\n    PacketAccess = false\n  };\n};\n\ntemplate<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>\nstruct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {\n  typedef bool type;\n};\n\n\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}\n};\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}\n};\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}\n};\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}\n};\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}\n};\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}\n};\ntemplate<typename LhsScalar, typename RhsScalar>\nstruct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef bool result_type;\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}\n};\n\n/** \\internal\n  * \\brief Template functor to compute the hypot of two \\b positive \\b and \\b real scalars\n  *\n  * \\sa MatrixBase::stableNorm(), class Redux\n  */\ntemplate<typename Scalar>\nstruct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>\n{\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const\n  {\n    // This functor is used by hypotNorm only for which it is faster to first apply abs\n    // on all coefficients prior to reduction through hypot.\n    // This way we avoid calling abs on positive and real entries, and this also permits\n    // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes\n    // through the same functor...\n    return internal::positive_real_hypot(x,y);\n  }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_hypot_op<Scalar,Scalar> > {\n  enum\n  {\n    Cost = 3 * NumTraits<Scalar>::AddCost +\n           2 * NumTraits<Scalar>::MulCost +\n           2 * scalar_div_cost<Scalar,false>::value,\n    PacketAccess = false\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the pow of two scalars\n  * See the specification of pow in https://en.cppreference.com/w/cpp/numeric/math/pow\n  */\ntemplate<typename Scalar, typename Exponent>\nstruct scalar_pow_op  : binary_op_base<Scalar,Exponent>\n{\n  typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;\n#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)\n#else\n  scalar_pow_op() {\n    typedef Scalar LhsScalar;\n    typedef Exponent RhsScalar;\n    EIGEN_SCALAR_BINARY_OP_PLUGIN\n  }\n#endif\n\n  EIGEN_DEVICE_FUNC\n  inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }\n\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  {\n    return generic_pow(a,b);\n  }\n};\n\ntemplate<typename Scalar, typename Exponent>\nstruct functor_traits<scalar_pow_op<Scalar,Exponent> > {\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = (!NumTraits<Scalar>::IsComplex && !NumTraits<Scalar>::IsInteger &&\n                    packet_traits<Scalar>::HasExp && packet_traits<Scalar>::HasLog &&\n                    packet_traits<Scalar>::HasRound && packet_traits<Scalar>::HasCmp &&\n                    // Temporarly disable packet access for half/bfloat16 until\n                    // accuracy is improved.\n                    !is_same<Scalar, half>::value && !is_same<Scalar, bfloat16>::value\n                    )\n  };\n};\n\n//---------- non associative binary functors ----------\n\n/** \\internal\n  * \\brief Template functor to compute the difference of two scalars\n  *\n  * \\sa class CwiseBinaryOp, MatrixBase::operator-\n  */\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;\n#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)\n#else\n  scalar_difference_op() {\n    EIGEN_SCALAR_BINARY_OP_PLUGIN\n  }\n#endif\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::psub(a,b); }\n};\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {\n  enum {\n    Cost = (int(NumTraits<LhsScalar>::AddCost) + int(NumTraits<RhsScalar>::AddCost)) / 2,\n    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the quotient of two scalars\n  *\n  * \\sa class CwiseBinaryOp, Cwise::operator/()\n  */\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct scalar_quotient_op  : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;\n#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)\n#else\n  scalar_quotient_op() {\n    EIGEN_SCALAR_BINARY_OP_PLUGIN\n  }\n#endif\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::pdiv(a,b); }\n};\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {\n  typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;\n  enum {\n    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,\n    Cost = scalar_div_cost<result_type,PacketAccess>::value\n  };\n};\n\n\n\n/** \\internal\n  * \\brief Template functor to compute the and of two booleans\n  *\n  * \\sa class CwiseBinaryOp, ArrayBase::operator&&\n  */\nstruct scalar_boolean_and_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::pand(a,b); }\n};\ntemplate<> struct functor_traits<scalar_boolean_and_op> {\n  enum {\n    Cost = NumTraits<bool>::AddCost,\n    PacketAccess = true\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the or of two booleans\n  *\n  * \\sa class CwiseBinaryOp, ArrayBase::operator||\n  */\nstruct scalar_boolean_or_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::por(a,b); }\n};\ntemplate<> struct functor_traits<scalar_boolean_or_op> {\n  enum {\n    Cost = NumTraits<bool>::AddCost,\n    PacketAccess = true\n  };\n};\n\n/** \\internal\n * \\brief Template functor to compute the xor of two booleans\n *\n * \\sa class CwiseBinaryOp, ArrayBase::operator^\n */\nstruct scalar_boolean_xor_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::pxor(a,b); }\n};\ntemplate<> struct functor_traits<scalar_boolean_xor_op> {\n  enum {\n    Cost = NumTraits<bool>::AddCost,\n    PacketAccess = true\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the absolute difference of two scalars\n  *\n  * \\sa class CwiseBinaryOp, MatrixBase::absolute_difference\n  */\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct scalar_absolute_difference_op : binary_op_base<LhsScalar,RhsScalar>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_absolute_difference_op>::ReturnType result_type;\n#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_absolute_difference_op)\n#else\n  scalar_absolute_difference_op() {\n    EIGEN_SCALAR_BINARY_OP_PLUGIN\n  }\n#endif\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const\n  { return numext::absdiff(a,b); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const\n  { return internal::pabsdiff(a,b); }\n};\ntemplate<typename LhsScalar,typename RhsScalar>\nstruct functor_traits<scalar_absolute_difference_op<LhsScalar,RhsScalar> > {\n  enum {\n    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,\n    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAbsDiff\n  };\n};\n\n\n\n//---------- binary functors bound to a constant, thus appearing as a unary functor ----------\n\n// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value.\n// They are analogues to std::binder1st/binder2nd but with the following differences:\n//  - they are compatible with packetOp\n//  - they are portable across C++ versions (the std::binder* are deprecated in C++11)\ntemplate<typename BinaryOp> struct bind1st_op : BinaryOp {\n\n  typedef typename BinaryOp::first_argument_type  first_argument_type;\n  typedef typename BinaryOp::second_argument_type second_argument_type;\n  typedef typename BinaryOp::result_type          result_type;\n\n  EIGEN_DEVICE_FUNC explicit bind1st_op(const first_argument_type &val) : m_value(val) {}\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }\n\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const\n  { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }\n\n  first_argument_type m_value;\n};\ntemplate<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};\n\n\ntemplate<typename BinaryOp> struct bind2nd_op : BinaryOp {\n\n  typedef typename BinaryOp::first_argument_type  first_argument_type;\n  typedef typename BinaryOp::second_argument_type second_argument_type;\n  typedef typename BinaryOp::result_type          result_type;\n\n  EIGEN_DEVICE_FUNC explicit bind2nd_op(const second_argument_type &val) : m_value(val) {}\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }\n\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }\n\n  second_argument_type m_value;\n};\ntemplate<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_BINARY_FUNCTORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/functors/NullaryFunctors.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_NULLARY_FUNCTORS_H\n#define EIGEN_NULLARY_FUNCTORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Scalar>\nstruct scalar_constant_op {\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }\n  const Scalar m_other;\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_constant_op<Scalar> >\n{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */,\n         PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };\n\ntemplate<typename Scalar> struct scalar_identity_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)\n  template<typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_identity_op<Scalar> >\n{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };\n\ntemplate <typename Scalar, bool IsInteger> struct linspaced_op_impl;\n\ntemplate <typename Scalar>\nstruct linspaced_op_impl<Scalar,/*IsInteger*/false>\n{\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n\n  EIGEN_DEVICE_FUNC linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :\n    m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : Scalar((high-low)/RealScalar(num_steps-1))),\n    m_flip(numext::abs(high)<numext::abs(low))\n  {}\n\n  template<typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {\n    if(m_flip)\n      return (i==0)? m_low : Scalar(m_high - RealScalar(m_size1-i)*m_step);\n    else\n      return (i==m_size1)? m_high : Scalar(m_low + RealScalar(i)*m_step);\n  }\n\n  template<typename Packet, typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const\n  {\n    // Principle:\n    // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )\n    if(m_flip)\n    {\n      Packet pi = plset<Packet>(Scalar(i-m_size1));\n      Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));\n      if (EIGEN_PREDICT_TRUE(i != 0)) return res;\n      Packet mask = pcmp_lt(pset1<Packet>(0), plset<Packet>(0));\n      return pselect<Packet>(mask, res, pset1<Packet>(m_low));\n    }\n    else\n    {\n      Packet pi = plset<Packet>(Scalar(i));\n      Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));\n      if(EIGEN_PREDICT_TRUE(i != m_size1-unpacket_traits<Packet>::size+1)) return res;\n      Packet mask = pcmp_lt(plset<Packet>(0), pset1<Packet>(unpacket_traits<Packet>::size-1));\n      return pselect<Packet>(mask, res, pset1<Packet>(m_high));\n    }\n  }\n\n  const Scalar m_low;\n  const Scalar m_high;\n  const Index m_size1;\n  const Scalar m_step;\n  const bool m_flip;\n};\n\ntemplate <typename Scalar>\nstruct linspaced_op_impl<Scalar,/*IsInteger*/true>\n{\n  EIGEN_DEVICE_FUNC linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :\n    m_low(low),\n    m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),\n    m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),\n    m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)\n  {}\n\n  template<typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  const Scalar operator() (IndexType i) const\n  {\n    if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;\n    else              return m_low + convert_index<Scalar>(i)*m_multiplier;\n  }\n\n  const Scalar m_low;\n  const Scalar m_multiplier;\n  const Scalar m_divisor;\n  const bool m_use_divisor;\n};\n\n// ----- Linspace functor ----------------------------------------------------------------\n\n// Forward declaration (we default to random access which does not really give\n// us a speed gain when using packet access but it allows to use the functor in\n// nested expressions).\ntemplate <typename Scalar> struct linspaced_op;\ntemplate <typename Scalar> struct functor_traits< linspaced_op<Scalar> >\n{\n  enum\n  {\n    Cost = 1,\n    PacketAccess =   (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,\n                  /*&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),*/ // <- vectorization for integer is currently disabled\n    IsRepeatable = true\n  };\n};\ntemplate <typename Scalar> struct linspaced_op\n{\n  EIGEN_DEVICE_FUNC linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)\n    : impl((num_steps==1 ? high : low),high,num_steps)\n  {}\n\n  template<typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }\n\n  template<typename Packet,typename IndexType>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.template packetOp<Packet>(i); }\n\n  // This proxy object handles the actual required temporaries and the different\n  // implementations (integer vs. floating point).\n  const linspaced_op_impl<Scalar,NumTraits<Scalar>::IsInteger> impl;\n};\n\n// Linear access is automatically determined from the operator() prototypes available for the given functor.\n// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently\n// and linear access is not possible. In all other cases, linear access is enabled.\n// Users should not have to deal with this structure.\ntemplate<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };\n\n// For unreliable compilers, let's specialize the has_*ary_operator\n// helpers so that at least built-in nullary functors work fine.\n#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))\ntemplate<typename Scalar,typename IndexType>\nstruct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };\n\ntemplate<typename Scalar,typename IndexType>\nstruct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };\n\ntemplate<typename Scalar,typename IndexType>\nstruct has_nullary_operator<linspaced_op<Scalar>,IndexType> { enum { value = 0}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_unary_operator<linspaced_op<Scalar>,IndexType> { enum { value = 1}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_binary_operator<linspaced_op<Scalar>,IndexType> { enum { value = 0}; };\n\ntemplate<typename Scalar,typename IndexType>\nstruct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };\ntemplate<typename Scalar,typename IndexType>\nstruct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_NULLARY_FUNCTORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/functors/StlFunctors.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STL_FUNCTORS_H\n#define EIGEN_STL_FUNCTORS_H\n\nnamespace Eigen {\n\n// Portable replacements for certain functors.\nnamespace numext {\n\ntemplate<typename T = void>\nstruct equal_to {\n  typedef bool result_type;\n  EIGEN_DEVICE_FUNC bool operator()(const T& lhs, const T& rhs) const {\n    return lhs == rhs;\n  }\n};\n\ntemplate<typename T = void>\nstruct not_equal_to {\n  typedef bool result_type;\n  EIGEN_DEVICE_FUNC bool operator()(const T& lhs, const T& rhs) const {\n    return lhs != rhs;\n  }\n};\n\n}\n\n\nnamespace internal {\n\n// default functor traits for STL functors:\n\ntemplate<typename T>\nstruct functor_traits<std::multiplies<T> >\n{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::divides<T> >\n{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::plus<T> >\n{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::minus<T> >\n{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::negate<T> >\n{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::logical_or<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::logical_and<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::logical_not<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::greater<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::less<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::greater_equal<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::less_equal<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::equal_to<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<numext::equal_to<T> >\n  : functor_traits<std::equal_to<T> > {};\n\ntemplate<typename T>\nstruct functor_traits<std::not_equal_to<T> >\n{ enum { Cost = 1, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<numext::not_equal_to<T> >\n  : functor_traits<std::not_equal_to<T> > {};\n\n#if (EIGEN_COMP_CXXVER < 11)\n// std::binder* are deprecated since c++11 and will be removed in c++17\ntemplate<typename T>\nstruct functor_traits<std::binder2nd<T> >\n{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };\n\ntemplate<typename T>\nstruct functor_traits<std::binder1st<T> >\n{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };\n#endif\n\n#if (EIGEN_COMP_CXXVER < 17)\n// std::unary_negate is deprecated since c++17 and will be removed in c++20\ntemplate<typename T>\nstruct functor_traits<std::unary_negate<T> >\n{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };\n\n// std::binary_negate is deprecated since c++17 and will be removed in c++20\ntemplate<typename T>\nstruct functor_traits<std::binary_negate<T> >\n{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };\n#endif\n\n#ifdef EIGEN_STDEXT_SUPPORT\n\ntemplate<typename T0,typename T1>\nstruct functor_traits<std::project1st<T0,T1> >\n{ enum { Cost = 0, PacketAccess = false }; };\n\ntemplate<typename T0,typename T1>\nstruct functor_traits<std::project2nd<T0,T1> >\n{ enum { Cost = 0, PacketAccess = false }; };\n\ntemplate<typename T0,typename T1>\nstruct functor_traits<std::select2nd<std::pair<T0,T1> > >\n{ enum { Cost = 0, PacketAccess = false }; };\n\ntemplate<typename T0,typename T1>\nstruct functor_traits<std::select1st<std::pair<T0,T1> > >\n{ enum { Cost = 0, PacketAccess = false }; };\n\ntemplate<typename T0,typename T1>\nstruct functor_traits<std::unary_compose<T0,T1> >\n{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };\n\ntemplate<typename T0,typename T1,typename T2>\nstruct functor_traits<std::binary_compose<T0,T1,T2> >\n{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };\n\n#endif // EIGEN_STDEXT_SUPPORT\n\n// allow to add new functors and specializations of functor_traits from outside Eigen.\n// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...\n#ifdef EIGEN_FUNCTORS_PLUGIN\n#include EIGEN_FUNCTORS_PLUGIN\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_STL_FUNCTORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/functors/TernaryFunctors.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TERNARY_FUNCTORS_H\n#define EIGEN_TERNARY_FUNCTORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n//---------- associative ternary functors ----------\n\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TERNARY_FUNCTORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/functors/UnaryFunctors.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_UNARY_FUNCTORS_H\n#define EIGEN_UNARY_FUNCTORS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** \\internal\n  * \\brief Template functor to compute the opposite of a scalar\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::operator-\n  */\ntemplate<typename Scalar> struct scalar_opposite_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return internal::pnegate(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_opposite_op<Scalar> >\n{ enum {\n    Cost = NumTraits<Scalar>::AddCost,\n    PacketAccess = packet_traits<Scalar>::HasNegate };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the absolute value of a scalar\n  *\n  * \\sa class CwiseUnaryOp, Cwise::abs\n  */\ntemplate<typename Scalar> struct scalar_abs_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return internal::pabs(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_abs_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::AddCost,\n    PacketAccess = packet_traits<Scalar>::HasAbs\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the score of a scalar, to chose a pivot\n  *\n  * \\sa class CwiseUnaryOp\n  */\ntemplate<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>\n{\n  typedef void Score_is_abs;\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};\n\n/* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor.  */\ntemplate<typename Scalar, typename=void> struct abs_knowing_score\n{\n  EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  template<typename Score>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }\n};\ntemplate<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>\n{\n  EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  template<typename Scal>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }\n};\n\n/** \\internal\n  * \\brief Template functor to compute the squared absolute value of a scalar\n  *\n  * \\sa class CwiseUnaryOp, Cwise::abs2\n  */\ntemplate<typename Scalar> struct scalar_abs2_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return internal::pmul(a,a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_abs2_op<Scalar> >\n{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };\n\n/** \\internal\n  * \\brief Template functor to compute the conjugate of a complex value\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::conjugate()\n  */\ntemplate<typename Scalar> struct scalar_conjugate_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::conj(a); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_conjugate_op<Scalar> >\n{\n  enum {\n    Cost = 0,\n    // Yes the cost is zero even for complexes because in most cases for which\n    // the cost is used, conjugation turns to be a no-op. Some examples:\n    //   cost(a*conj(b)) == cost(a*b)\n    //   cost(a+conj(b)) == cost(a+b)\n    //   <etc.\n    // If we don't set it to zero, then:\n    //   A.conjugate().lazyProduct(B.conjugate())\n    // will bake its operands. We definitely don't want that!\n    PacketAccess = packet_traits<Scalar>::HasConj\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the phase angle of a complex\n  *\n  * \\sa class CwiseUnaryOp, Cwise::arg\n  */\ntemplate<typename Scalar> struct scalar_arg_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::arg(a); }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return internal::parg(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_arg_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,\n    PacketAccess = packet_traits<Scalar>::HasArg\n  };\n};\n/** \\internal\n  * \\brief Template functor to cast a scalar to another type\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::cast()\n  */\ntemplate<typename Scalar, typename NewType>\nstruct scalar_cast_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)\n  typedef NewType result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }\n};\ntemplate<typename Scalar, typename NewType>\nstruct functor_traits<scalar_cast_op<Scalar,NewType> >\n{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };\n\n/** \\internal\n  * \\brief Template functor to arithmetically shift a scalar right by a number of bits\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::shift_right()\n  */\ntemplate<typename Scalar, int N>\nstruct scalar_shift_right_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_shift_right_op)\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const\n  { return a >> N; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return internal::parithmetic_shift_right<N>(a); }\n};\ntemplate<typename Scalar, int N>\nstruct functor_traits<scalar_shift_right_op<Scalar,N> >\n{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasShift }; };\n\n/** \\internal\n  * \\brief Template functor to logically shift a scalar left by a number of bits\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::shift_left()\n  */\ntemplate<typename Scalar, int N>\nstruct scalar_shift_left_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_shift_left_op)\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const\n  { return a << N; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const\n  { return internal::plogical_shift_left<N>(a); }\n};\ntemplate<typename Scalar, int N>\nstruct functor_traits<scalar_shift_left_op<Scalar,N> >\n{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasShift }; };\n\n/** \\internal\n  * \\brief Template functor to extract the real part of a complex\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::real()\n  */\ntemplate<typename Scalar>\nstruct scalar_real_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_real_op<Scalar> >\n{ enum { Cost = 0, PacketAccess = false }; };\n\n/** \\internal\n  * \\brief Template functor to extract the imaginary part of a complex\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::imag()\n  */\ntemplate<typename Scalar>\nstruct scalar_imag_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_imag_op<Scalar> >\n{ enum { Cost = 0, PacketAccess = false }; };\n\n/** \\internal\n  * \\brief Template functor to extract the real part of a complex as a reference\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::real()\n  */\ntemplate<typename Scalar>\nstruct scalar_real_ref_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_real_ref_op<Scalar> >\n{ enum { Cost = 0, PacketAccess = false }; };\n\n/** \\internal\n  * \\brief Template functor to extract the imaginary part of a complex as a reference\n  *\n  * \\sa class CwiseUnaryOp, MatrixBase::imag()\n  */\ntemplate<typename Scalar>\nstruct scalar_imag_ref_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)\n  typedef typename NumTraits<Scalar>::Real result_type;\n  EIGEN_DEVICE_FUNC\n  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_imag_ref_op<Scalar> >\n{ enum { Cost = 0, PacketAccess = false }; };\n\n/** \\internal\n  *\n  * \\brief Template functor to compute the exponential of a scalar\n  *\n  * \\sa class CwiseUnaryOp, Cwise::exp()\n  */\ntemplate<typename Scalar> struct scalar_exp_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }\n};\ntemplate <typename Scalar>\nstruct functor_traits<scalar_exp_op<Scalar> > {\n  enum {\n    PacketAccess = packet_traits<Scalar>::HasExp,\n    // The following numbers are based on the AVX implementation.\n#ifdef EIGEN_VECTORIZE_FMA\n    // Haswell can issue 2 add/mul/madd per cycle.\n    Cost =\n    (sizeof(Scalar) == 4\n     // float: 8 pmadd, 4 pmul, 2 padd/psub, 6 other\n     ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost)\n     // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other\n     : (14 * NumTraits<Scalar>::AddCost +\n        6 * NumTraits<Scalar>::MulCost +\n        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))\n#else\n    Cost =\n    (sizeof(Scalar) == 4\n     // float: 7 pmadd, 6 pmul, 4 padd/psub, 10 other\n     ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost)\n     // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other\n     : (23 * NumTraits<Scalar>::AddCost +\n        12 * NumTraits<Scalar>::MulCost +\n        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))\n#endif\n  };\n};\n\n/** \\internal\n  *\n  * \\brief Template functor to compute the exponential of a scalar - 1.\n  *\n  * \\sa class CwiseUnaryOp, ArrayBase::expm1()\n  */\ntemplate<typename Scalar> struct scalar_expm1_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); }\n};\ntemplate <typename Scalar>\nstruct functor_traits<scalar_expm1_op<Scalar> > {\n  enum {\n    PacketAccess = packet_traits<Scalar>::HasExpm1,\n    Cost = functor_traits<scalar_exp_op<Scalar> >::Cost // TODO measure cost of expm1\n  };\n};\n\n/** \\internal\n  *\n  * \\brief Template functor to compute the logarithm of a scalar\n  *\n  * \\sa class CwiseUnaryOp, ArrayBase::log()\n  */\ntemplate<typename Scalar> struct scalar_log_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }\n};\ntemplate <typename Scalar>\nstruct functor_traits<scalar_log_op<Scalar> > {\n  enum {\n    PacketAccess = packet_traits<Scalar>::HasLog,\n    Cost =\n    (PacketAccess\n     // The following numbers are based on the AVX implementation.\n#ifdef EIGEN_VECTORIZE_FMA\n     // 8 pmadd, 6 pmul, 8 padd/psub, 16 other, can issue 2 add/mul/madd per cycle.\n     ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost)\n#else\n     // 8 pmadd, 6 pmul, 8 padd/psub, 20 other\n     ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost)\n#endif\n     // Measured cost of std::log.\n     : sizeof(Scalar)==4 ? 40 : 85)\n  };\n};\n\n/** \\internal\n  *\n  * \\brief Template functor to compute the logarithm of 1 plus a scalar value\n  *\n  * \\sa class CwiseUnaryOp, ArrayBase::log1p()\n  */\ntemplate<typename Scalar> struct scalar_log1p_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }\n};\ntemplate <typename Scalar>\nstruct functor_traits<scalar_log1p_op<Scalar> > {\n  enum {\n    PacketAccess = packet_traits<Scalar>::HasLog1p,\n    Cost = functor_traits<scalar_log_op<Scalar> >::Cost // TODO measure cost of log1p\n  };\n};\n\n/** \\internal\n  *\n  * \\brief Template functor to compute the base-10 logarithm of a scalar\n  *\n  * \\sa class CwiseUnaryOp, Cwise::log10()\n  */\ntemplate<typename Scalar> struct scalar_log10_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD(log10) return log10(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_log10_op<Scalar> >\n{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };\n\n/** \\internal\n  *\n  * \\brief Template functor to compute the base-2 logarithm of a scalar\n  *\n  * \\sa class CwiseUnaryOp, Cwise::log2()\n  */\ntemplate<typename Scalar> struct scalar_log2_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_log2_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(EIGEN_LOG2E) * numext::log(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog2(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_log2_op<Scalar> >\n{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };\n\n/** \\internal\n  * \\brief Template functor to compute the square root of a scalar\n  * \\sa class CwiseUnaryOp, Cwise::sqrt()\n  */\ntemplate<typename Scalar> struct scalar_sqrt_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }\n};\ntemplate <typename Scalar>\nstruct functor_traits<scalar_sqrt_op<Scalar> > {\n  enum {\n#if EIGEN_FAST_MATH\n    // The following numbers are based on the AVX implementation.\n    Cost = (sizeof(Scalar) == 8 ? 28\n                                // 4 pmul, 1 pmadd, 3 other\n                                : (3 * NumTraits<Scalar>::AddCost +\n                                   5 * NumTraits<Scalar>::MulCost)),\n#else\n    // The following numbers are based on min VSQRT throughput on Haswell.\n    Cost = (sizeof(Scalar) == 8 ? 28 : 14),\n#endif\n    PacketAccess = packet_traits<Scalar>::HasSqrt\n  };\n};\n\n// Boolean specialization to eliminate -Wimplicit-conversion-floating-point-to-bool warnings.\ntemplate<> struct scalar_sqrt_op<bool> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)\n  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline bool operator() (const bool& a) const { return a; }\n  template <typename Packet>\n  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return a; }\n};\ntemplate <>\nstruct functor_traits<scalar_sqrt_op<bool> > {\n  enum { Cost = 1, PacketAccess = packet_traits<bool>::Vectorizable };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the reciprocal square root of a scalar\n  * \\sa class CwiseUnaryOp, Cwise::rsqrt()\n  */\ntemplate<typename Scalar> struct scalar_rsqrt_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::rsqrt(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }\n};\n\ntemplate<typename Scalar>\nstruct functor_traits<scalar_rsqrt_op<Scalar> >\n{ enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasRsqrt\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the cosine of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::cos()\n  */\ntemplate<typename Scalar> struct scalar_cos_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)\n  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_cos_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasCos\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the sine of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::sin()\n  */\ntemplate<typename Scalar> struct scalar_sin_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_sin_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasSin\n  };\n};\n\n\n/** \\internal\n  * \\brief Template functor to compute the tan of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::tan()\n  */\ntemplate<typename Scalar> struct scalar_tan_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_tan_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasTan\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the arc cosine of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::acos()\n  */\ntemplate<typename Scalar> struct scalar_acos_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_acos_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasACos\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the arc sine of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::asin()\n  */\ntemplate<typename Scalar> struct scalar_asin_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_asin_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasASin\n  };\n};\n\n\n/** \\internal\n  * \\brief Template functor to compute the atan of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::atan()\n  */\ntemplate<typename Scalar> struct scalar_atan_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_atan_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasATan\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the tanh of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::tanh()\n  */\ntemplate <typename Scalar>\nstruct scalar_tanh_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }\n};\n\ntemplate <typename Scalar>\nstruct functor_traits<scalar_tanh_op<Scalar> > {\n  enum {\n    PacketAccess = packet_traits<Scalar>::HasTanh,\n    Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)\n// The following numbers are based on the AVX implementation,\n#ifdef EIGEN_VECTORIZE_FMA\n                // Haswell can issue 2 add/mul/madd per cycle.\n                // 9 pmadd, 2 pmul, 1 div, 2 other\n                ? (2 * NumTraits<Scalar>::AddCost +\n                   6 * NumTraits<Scalar>::MulCost +\n                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)\n#else\n                ? (11 * NumTraits<Scalar>::AddCost +\n                   11 * NumTraits<Scalar>::MulCost +\n                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)\n#endif\n                // This number assumes a naive implementation of tanh\n                : (6 * NumTraits<Scalar>::AddCost +\n                   3 * NumTraits<Scalar>::MulCost +\n                   2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +\n                   functor_traits<scalar_exp_op<Scalar> >::Cost))\n  };\n};\n\n#if EIGEN_HAS_CXX11_MATH\n/** \\internal\n  * \\brief Template functor to compute the atanh of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::atanh()\n  */\ntemplate <typename Scalar>\nstruct scalar_atanh_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_atanh_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::atanh(a); }\n};\n\ntemplate <typename Scalar>\nstruct functor_traits<scalar_atanh_op<Scalar> > {\n  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };\n};\n#endif\n\n/** \\internal\n  * \\brief Template functor to compute the sinh of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::sinh()\n  */\ntemplate<typename Scalar> struct scalar_sinh_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_sinh_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasSinh\n  };\n};\n\n#if EIGEN_HAS_CXX11_MATH\n/** \\internal\n  * \\brief Template functor to compute the asinh of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::asinh()\n  */\ntemplate <typename Scalar>\nstruct scalar_asinh_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::asinh(a); }\n};\n\ntemplate <typename Scalar>\nstruct functor_traits<scalar_asinh_op<Scalar> > {\n  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };\n};\n#endif\n\n/** \\internal\n  * \\brief Template functor to compute the cosh of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::cosh()\n  */\ntemplate<typename Scalar> struct scalar_cosh_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_cosh_op<Scalar> >\n{\n  enum {\n    Cost = 5 * NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasCosh\n  };\n};\n\n#if EIGEN_HAS_CXX11_MATH\n/** \\internal\n  * \\brief Template functor to compute the acosh of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::acosh()\n  */\ntemplate <typename Scalar>\nstruct scalar_acosh_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_acosh_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::acosh(a); }\n};\n\ntemplate <typename Scalar>\nstruct functor_traits<scalar_acosh_op<Scalar> > {\n  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };\n};\n#endif\n\n/** \\internal\n  * \\brief Template functor to compute the inverse of a scalar\n  * \\sa class CwiseUnaryOp, Cwise::inverse()\n  */\ntemplate<typename Scalar>\nstruct scalar_inverse_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)\n  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const\n  { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }\n};\ntemplate <typename Scalar>\nstruct functor_traits<scalar_inverse_op<Scalar> > {\n  enum {\n    PacketAccess = packet_traits<Scalar>::HasDiv,\n    Cost = scalar_div_cost<Scalar, PacketAccess>::value\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the square of a scalar\n  * \\sa class CwiseUnaryOp, Cwise::square()\n  */\ntemplate<typename Scalar>\nstruct scalar_square_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)\n  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const\n  { return internal::pmul(a,a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_square_op<Scalar> >\n{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };\n\n// Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.\ntemplate<>\nstruct scalar_square_op<bool> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)\n  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline bool operator() (const bool& a) const { return a; }\n  template<typename Packet>\n  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const\n  { return a; }\n};\ntemplate<>\nstruct functor_traits<scalar_square_op<bool> >\n{ enum { Cost = 0, PacketAccess = packet_traits<bool>::Vectorizable }; };\n\n/** \\internal\n  * \\brief Template functor to compute the cube of a scalar\n  * \\sa class CwiseUnaryOp, Cwise::cube()\n  */\ntemplate<typename Scalar>\nstruct scalar_cube_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)\n  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }\n  template<typename Packet>\n  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const\n  { return internal::pmul(a,pmul(a,a)); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_cube_op<Scalar> >\n{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };\n\n// Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.\ntemplate<>\nstruct scalar_cube_op<bool> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)\n  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline bool operator() (const bool& a) const { return a; }\n  template<typename Packet>\n  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const\n  { return a; }\n};\ntemplate<>\nstruct functor_traits<scalar_cube_op<bool> >\n{ enum { Cost = 0, PacketAccess = packet_traits<bool>::Vectorizable }; };\n\n/** \\internal\n  * \\brief Template functor to compute the rounded value of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::round()\n  */\ntemplate<typename Scalar> struct scalar_round_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_round_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasRound\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the floor of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::floor()\n  */\ntemplate<typename Scalar> struct scalar_floor_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_floor_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasFloor\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the rounded (with current rounding mode)  value of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::rint()\n  */\ntemplate<typename Scalar> struct scalar_rint_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_rint_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::rint(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::print(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_rint_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasRint\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the ceil of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::ceil()\n  */\ntemplate<typename Scalar> struct scalar_ceil_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_ceil_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = packet_traits<Scalar>::HasCeil\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute whether a scalar is NaN\n  * \\sa class CwiseUnaryOp, ArrayBase::isnan()\n  */\ntemplate<typename Scalar> struct scalar_isnan_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)\n  typedef bool result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {\n#if defined(SYCL_DEVICE_ONLY)\n    return numext::isnan(a);\n#else\n    return (numext::isnan)(a);\n#endif\n  }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_isnan_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = false\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to check whether a scalar is +/-inf\n  * \\sa class CwiseUnaryOp, ArrayBase::isinf()\n  */\ntemplate<typename Scalar> struct scalar_isinf_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)\n  typedef bool result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {\n#if defined(SYCL_DEVICE_ONLY)\n    return numext::isinf(a);\n#else\n    return (numext::isinf)(a);\n#endif\n  }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_isinf_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = false\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to check whether a scalar has a finite value\n  * \\sa class CwiseUnaryOp, ArrayBase::isfinite()\n  */\ntemplate<typename Scalar> struct scalar_isfinite_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)\n  typedef bool result_type;\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {\n#if defined(SYCL_DEVICE_ONLY)\n    return numext::isfinite(a);\n#else\n    return (numext::isfinite)(a);\n#endif\n  }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_isfinite_op<Scalar> >\n{\n  enum {\n    Cost = NumTraits<Scalar>::MulCost,\n    PacketAccess = false\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the logical not of a boolean\n  *\n  * \\sa class CwiseUnaryOp, ArrayBase::operator!\n  */\ntemplate<typename Scalar> struct scalar_boolean_not_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_boolean_not_op<Scalar> > {\n  enum {\n    Cost = NumTraits<bool>::AddCost,\n    PacketAccess = false\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the signum of a scalar\n  * \\sa class CwiseUnaryOp, Cwise::sign()\n  */\ntemplate<typename Scalar,bool is_complex=(NumTraits<Scalar>::IsComplex!=0), bool is_integer=(NumTraits<Scalar>::IsInteger!=0) > struct scalar_sign_op;\ntemplate<typename Scalar>\nstruct scalar_sign_op<Scalar, false, true> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const\n  {\n      return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );\n  }\n  //TODO\n  //template <typename Packet>\n  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }\n};\n\ntemplate<typename Scalar>\nstruct scalar_sign_op<Scalar, false, false> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const\n  {\n    return (numext::isnan)(a) ? a : Scalar( (a>Scalar(0)) - (a<Scalar(0)) );\n  }\n  //TODO\n  //template <typename Packet>\n  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }\n};\n\ntemplate<typename Scalar, bool is_integer>\nstruct scalar_sign_op<Scalar,true, is_integer> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)\n  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const\n  {\n    typedef typename NumTraits<Scalar>::Real real_type;\n    real_type aa = numext::abs(a);\n    if (aa==real_type(0))\n      return Scalar(0);\n    aa = real_type(1)/aa;\n    return Scalar(a.real()*aa, a.imag()*aa );\n  }\n  //TODO\n  //template <typename Packet>\n  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }\n};\ntemplate<typename Scalar>\nstruct functor_traits<scalar_sign_op<Scalar> >\n{ enum {\n    Cost =\n        NumTraits<Scalar>::IsComplex\n        ? ( 8*NumTraits<Scalar>::MulCost  ) // roughly\n        : ( 3*NumTraits<Scalar>::AddCost),\n    PacketAccess = packet_traits<Scalar>::HasSign\n  };\n};\n\n/** \\internal\n  * \\brief Template functor to compute the logistic function of a scalar\n  * \\sa class CwiseUnaryOp, ArrayBase::logistic()\n  */\ntemplate <typename T>\nstruct scalar_logistic_op {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {\n    return packetOp(x);\n  }\n\n  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Packet packetOp(const Packet& x) const {\n    const Packet one = pset1<Packet>(T(1));\n    return pdiv(one, padd(one, pexp(pnegate(x))));\n  }\n};\n\n#ifndef EIGEN_GPU_COMPILE_PHASE\n/** \\internal\n  * \\brief Template specialization of the logistic function for float.\n  *\n  *  Uses just a 9/10-degree rational interpolant which\n  *  interpolates 1/(1+exp(-x)) - 0.5 up to a couple of ulps in the range\n  *  [-9, 18]. Below -9 we use the more accurate approximation\n  *  1/(1+exp(-x)) ~= exp(x), and above 18 the logistic function is 1 withing\n  *  one ulp. The shifted logistic is interpolated because it was easier to\n  *  make the fit converge.\n  *\n  */\ntemplate <>\nstruct scalar_logistic_op<float> {\n  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const {\n    return packetOp(x);\n  }\n\n  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  Packet packetOp(const Packet& _x) const {\n    const Packet cutoff_lower = pset1<Packet>(-9.f);\n    const Packet lt_mask = pcmp_lt<Packet>(_x, cutoff_lower);\n    const bool any_small = predux_any(lt_mask);\n\n    // The upper cut-off is the smallest x for which the rational approximation evaluates to 1.\n    // Choosing this value saves us a few instructions clamping the results at the end.\n#ifdef EIGEN_VECTORIZE_FMA\n    const Packet cutoff_upper = pset1<Packet>(15.7243833541870117f);\n#else\n    const Packet cutoff_upper = pset1<Packet>(15.6437711715698242f);\n#endif\n    const Packet x = pmin(_x, cutoff_upper);\n\n    // The monomial coefficients of the numerator polynomial (odd).\n    const Packet alpha_1 = pset1<Packet>(2.48287947061529e-01f);\n    const Packet alpha_3 = pset1<Packet>(8.51377133304701e-03f);\n    const Packet alpha_5 = pset1<Packet>(6.08574864600143e-05f);\n    const Packet alpha_7 = pset1<Packet>(1.15627324459942e-07f);\n    const Packet alpha_9 = pset1<Packet>(4.37031012579801e-11f);\n\n    // The monomial coefficients of the denominator polynomial (even).\n    const Packet beta_0 = pset1<Packet>(9.93151921023180e-01f);\n    const Packet beta_2 = pset1<Packet>(1.16817656904453e-01f);\n    const Packet beta_4 = pset1<Packet>(1.70198817374094e-03f);\n    const Packet beta_6 = pset1<Packet>(6.29106785017040e-06f);\n    const Packet beta_8 = pset1<Packet>(5.76102136993427e-09f);\n    const Packet beta_10 = pset1<Packet>(6.10247389755681e-13f);\n\n    // Since the polynomials are odd/even, we need x^2.\n    const Packet x2 = pmul(x, x);\n\n    // Evaluate the numerator polynomial p.\n    Packet p = pmadd(x2, alpha_9, alpha_7);\n    p = pmadd(x2, p, alpha_5);\n    p = pmadd(x2, p, alpha_3);\n    p = pmadd(x2, p, alpha_1);\n    p = pmul(x, p);\n\n    // Evaluate the denominator polynomial q.\n    Packet q = pmadd(x2, beta_10, beta_8);\n    q = pmadd(x2, q, beta_6);\n    q = pmadd(x2, q, beta_4);\n    q = pmadd(x2, q, beta_2);\n    q = pmadd(x2, q, beta_0);\n    // Divide the numerator by the denominator and shift it up.\n    const Packet logistic = padd(pdiv(p, q), pset1<Packet>(0.5f));\n    if (EIGEN_PREDICT_FALSE(any_small)) {\n      const Packet exponential = pexp(_x);\n      return pselect(lt_mask, exponential, logistic);\n    } else {\n      return logistic;\n    }\n  }\n};\n#endif  // #ifndef EIGEN_GPU_COMPILE_PHASE\n\ntemplate <typename T>\nstruct functor_traits<scalar_logistic_op<T> > {\n  enum {\n    // The cost estimate for float here here is for the common(?) case where\n    // all arguments are greater than -9.\n    Cost = scalar_div_cost<T, packet_traits<T>::HasDiv>::value +\n           (internal::is_same<T, float>::value\n                ? NumTraits<T>::AddCost * 15 + NumTraits<T>::MulCost * 11\n                : NumTraits<T>::AddCost * 2 +\n                      functor_traits<scalar_exp_op<T> >::Cost),\n    PacketAccess =\n        packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&\n        (internal::is_same<T, float>::value\n             ? packet_traits<T>::HasMul && packet_traits<T>::HasMax &&\n                   packet_traits<T>::HasMin\n             : packet_traits<T>::HasNegate && packet_traits<T>::HasExp)\n  };\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_FUNCTORS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralBlockPanelKernel.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERAL_BLOCK_PANEL_H\n#define EIGEN_GENERAL_BLOCK_PANEL_H\n\n\nnamespace Eigen {\n\nnamespace internal {\n\nenum GEBPPacketSizeType {\n  GEBPPacketFull = 0,\n  GEBPPacketHalf,\n  GEBPPacketQuarter\n};\n\ntemplate<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false, int Arch=Architecture::Target, int _PacketSize=GEBPPacketFull>\nclass gebp_traits;\n\n\n/** \\internal \\returns b if a<=0, and returns a otherwise. */\ninline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)\n{\n  return a<=0 ? b : a;\n}\n\n#if defined(EIGEN_DEFAULT_L1_CACHE_SIZE)\n#define EIGEN_SET_DEFAULT_L1_CACHE_SIZE(val) EIGEN_DEFAULT_L1_CACHE_SIZE\n#else\n#define EIGEN_SET_DEFAULT_L1_CACHE_SIZE(val) val\n#endif // defined(EIGEN_DEFAULT_L1_CACHE_SIZE)\n\n#if defined(EIGEN_DEFAULT_L2_CACHE_SIZE)\n#define EIGEN_SET_DEFAULT_L2_CACHE_SIZE(val) EIGEN_DEFAULT_L2_CACHE_SIZE\n#else\n#define EIGEN_SET_DEFAULT_L2_CACHE_SIZE(val) val\n#endif // defined(EIGEN_DEFAULT_L2_CACHE_SIZE)\n\n#if defined(EIGEN_DEFAULT_L3_CACHE_SIZE)\n#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) EIGEN_DEFAULT_L3_CACHE_SIZE\n#else\n#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) val\n#endif // defined(EIGEN_DEFAULT_L3_CACHE_SIZE)\n  \n#if EIGEN_ARCH_i386_OR_x86_64\nconst std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(32*1024);\nconst std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(256*1024);\nconst std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(2*1024*1024);\n#elif EIGEN_ARCH_PPC\nconst std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(64*1024);\nconst std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(512*1024);\nconst std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(4*1024*1024);\n#else\nconst std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(16*1024);\nconst std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(512*1024);\nconst std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(512*1024);\n#endif\n\n#undef EIGEN_SET_DEFAULT_L1_CACHE_SIZE\n#undef EIGEN_SET_DEFAULT_L2_CACHE_SIZE\n#undef EIGEN_SET_DEFAULT_L3_CACHE_SIZE\n\n/** \\internal */\nstruct CacheSizes {\n  CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {\n    int l1CacheSize, l2CacheSize, l3CacheSize;\n    queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);\n    m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);\n    m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);\n    m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);\n  }\n\n  std::ptrdiff_t m_l1;\n  std::ptrdiff_t m_l2;\n  std::ptrdiff_t m_l3;\n};\n\n/** \\internal */\ninline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)\n{\n  static CacheSizes m_cacheSizes;\n\n  if(action==SetAction)\n  {\n    // set the cpu cache size and cache all block sizes from a global cache size in byte\n    eigen_internal_assert(l1!=0 && l2!=0);\n    m_cacheSizes.m_l1 = *l1;\n    m_cacheSizes.m_l2 = *l2;\n    m_cacheSizes.m_l3 = *l3;\n  }\n  else if(action==GetAction)\n  {\n    eigen_internal_assert(l1!=0 && l2!=0);\n    *l1 = m_cacheSizes.m_l1;\n    *l2 = m_cacheSizes.m_l2;\n    *l3 = m_cacheSizes.m_l3;\n  }\n  else\n  {\n    eigen_internal_assert(false);\n  }\n}\n\n/* Helper for computeProductBlockingSizes.\n *\n * Given a m x k times k x n matrix product of scalar types \\c LhsScalar and \\c RhsScalar,\n * this function computes the blocking size parameters along the respective dimensions\n * for matrix products and related algorithms. The blocking sizes depends on various\n * parameters:\n * - the L1 and L2 cache sizes,\n * - the register level blocking sizes defined by gebp_traits,\n * - the number of scalars that fit into a packet (when vectorization is enabled).\n *\n * \\sa setCpuCacheSizes */\n\ntemplate<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>\nvoid evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)\n{\n  typedef gebp_traits<LhsScalar,RhsScalar> Traits;\n\n  // Explanations:\n  // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and\n  // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed\n  // per mr x kc horizontal small panels where mr is the blocking size along the m dimension\n  // at the register level. This small horizontal panel has to stay within L1 cache.\n  std::ptrdiff_t l1, l2, l3;\n  manage_caching_sizes(GetAction, &l1, &l2, &l3);\n  #ifdef EIGEN_VECTORIZE_AVX512\n  // We need to find a rationale for that, but without this adjustment,\n  // performance with AVX512 is pretty bad, like -20% slower.\n  // One reason is that with increasing packet-size, the blocking size k\n  // has to become pretty small if we want that 1 lhs panel fit within L1.\n  // For instance, with the 3pX4 kernel and double, the size of the lhs+rhs panels are:\n  //   k*(3*64 + 4*8) Bytes, with l1=32kBytes, and k%8=0, we have k=144.\n  // This is quite small for a good reuse of the accumulation registers.\n  l1 *= 4;\n  #endif\n\n  if (num_threads > 1) {\n    typedef typename Traits::ResScalar ResScalar;\n    enum {\n      kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),\n      ksub = Traits::mr * Traits::nr * sizeof(ResScalar),\n      kr = 8,\n      mr = Traits::mr,\n      nr = Traits::nr\n    };\n    // Increasing k gives us more time to prefetch the content of the \"C\"\n    // registers. However once the latency is hidden there is no point in\n    // increasing the value of k, so we'll cap it at 320 (value determined\n    // experimentally).\n    // To avoid that k vanishes, we make k_cache at least as big as kr\n    const Index k_cache = numext::maxi<Index>(kr, (numext::mini<Index>)((l1-ksub)/kdiv, 320));\n    if (k_cache < k) {\n      k = k_cache - (k_cache % kr);\n      eigen_internal_assert(k > 0);\n    }\n\n    const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);\n    const Index n_per_thread = numext::div_ceil(n, num_threads);\n    if (n_cache <= n_per_thread) {\n      // Don't exceed the capacity of the l2 cache.\n      eigen_internal_assert(n_cache >= static_cast<Index>(nr));\n      n = n_cache - (n_cache % nr);\n      eigen_internal_assert(n > 0);\n    } else {\n      n = (numext::mini<Index>)(n, (n_per_thread + nr - 1) - ((n_per_thread + nr - 1) % nr));\n    }\n\n    if (l3 > l2) {\n      // l3 is shared between all cores, so we'll give each thread its own chunk of l3.\n      const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);\n      const Index m_per_thread = numext::div_ceil(m, num_threads);\n      if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {\n        m = m_cache - (m_cache % mr);\n        eigen_internal_assert(m > 0);\n      } else {\n        m = (numext::mini<Index>)(m, (m_per_thread + mr - 1) - ((m_per_thread + mr - 1) % mr));\n      }\n    }\n  }\n  else {\n    // In unit tests we do not want to use extra large matrices,\n    // so we reduce the cache size to check the blocking strategy is not flawed\n#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS\n    l1 = 9*1024;\n    l2 = 32*1024;\n    l3 = 512*1024;\n#endif\n\n    // Early return for small problems because the computation below are time consuming for small problems.\n    // Perhaps it would make more sense to consider k*n*m??\n    // Note that for very tiny problem, this function should be bypassed anyway\n    // because we use the coefficient-based implementation for them.\n    if((numext::maxi)(k,(numext::maxi)(m,n))<48)\n      return;\n\n    typedef typename Traits::ResScalar ResScalar;\n    enum {\n      k_peeling = 8,\n      k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),\n      k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)\n    };\n\n    // ---- 1st level of blocking on L1, yields kc ----\n\n    // Blocking on the third dimension (i.e., k) is chosen so that an horizontal panel\n    // of size mr x kc of the lhs plus a vertical panel of kc x nr of the rhs both fits within L1 cache.\n    // We also include a register-level block of the result (mx x nr).\n    // (In an ideal world only the lhs panel would stay in L1)\n    // Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:\n    const Index max_kc = numext::maxi<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);\n    const Index old_k = k;\n    if(k>max_kc)\n    {\n      // We are really blocking on the third dimension:\n      // -> reduce blocking size to make sure the last block is as large as possible\n      //    while keeping the same number of sweeps over the result.\n      k = (k%max_kc)==0 ? max_kc\n                        : max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));\n\n      eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && \"the number of sweeps has to remain the same\");\n    }\n\n    // ---- 2nd level of blocking on max(L2,L3), yields nc ----\n\n    // TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:\n    //      actual_l2 = max(l2, l3/nb_core_sharing_l3)\n    // The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)\n    // For instance, it corresponds to 6MB of L3 shared among 4 cores.\n    #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS\n    const Index actual_l2 = l3;\n    #else\n    const Index actual_l2 = 1572864; // == 1.5 MB\n    #endif\n\n    // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.\n    // The second half is implicitly reserved to access the result and lhs coefficients.\n    // When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful\n    // to limit this growth: we bound nc to growth by a factor x1.5.\n    // However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,\n    // and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.\n    Index max_nc;\n    const Index lhs_bytes = m * k * sizeof(LhsScalar);\n    const Index remaining_l1 = l1- k_sub - lhs_bytes;\n    if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)\n    {\n      // L1 blocking\n      max_nc = remaining_l1 / (k*sizeof(RhsScalar));\n    }\n    else\n    {\n      // L2 blocking\n      max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));\n    }\n    // WARNING Below, we assume that Traits::nr is a power of two.\n    Index nc = numext::mini<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));\n    if(n>nc)\n    {\n      // We are really blocking over the columns:\n      // -> reduce blocking size to make sure the last block is as large as possible\n      //    while keeping the same number of sweeps over the packed lhs.\n      //    Here we allow one more sweep if this gives us a perfect match, thus the commented \"-1\"\n      n = (n%nc)==0 ? nc\n                    : (nc - Traits::nr * ((nc/*-1*/-(n%nc))/(Traits::nr*(n/nc+1))));\n    }\n    else if(old_k==k)\n    {\n      // So far, no blocking at all, i.e., kc==k, and nc==n.\n      // In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2\n      // TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.\n      Index problem_size = k*n*sizeof(LhsScalar);\n      Index actual_lm = actual_l2;\n      Index max_mc = m;\n      if(problem_size<=1024)\n      {\n        // problem is small enough to keep in L1\n        // Let's choose m such that lhs's block fit in 1/3 of L1\n        actual_lm = l1;\n      }\n      else if(l3!=0 && problem_size<=32768)\n      {\n        // we have both L2 and L3, and problem is small enough to be kept in L2\n        // Let's choose m such that lhs's block fit in 1/3 of L2\n        actual_lm = l2;\n        max_mc = (numext::mini<Index>)(576,max_mc);\n      }\n      Index mc = (numext::mini<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);\n      if (mc > Traits::mr) mc -= mc % Traits::mr;\n      else if (mc==0) return;\n      m = (m%mc)==0 ? mc\n                    : (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));\n    }\n  }\n}\n\ntemplate <typename Index>\ninline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)\n{\n#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES\n  if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {\n    k = numext::mini<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);\n    m = numext::mini<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);\n    n = numext::mini<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);\n    return true;\n  }\n#else\n  EIGEN_UNUSED_VARIABLE(k)\n  EIGEN_UNUSED_VARIABLE(m)\n  EIGEN_UNUSED_VARIABLE(n)\n#endif\n  return false;\n}\n\n/** \\brief Computes the blocking parameters for a m x k times k x n matrix product\n  *\n  * \\param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.\n  * \\param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.\n  * \\param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.\n  *\n  * Given a m x k times k x n matrix product of scalar types \\c LhsScalar and \\c RhsScalar,\n  * this function computes the blocking size parameters along the respective dimensions\n  * for matrix products and related algorithms.\n  *\n  * The blocking size parameters may be evaluated:\n  *   - either by a heuristic based on cache sizes;\n  *   - or using fixed prescribed values (for testing purposes).\n  *\n  * \\sa setCpuCacheSizes */\n\ntemplate<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>\nvoid computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)\n{\n  if (!useSpecificBlockingSizes(k, m, n)) {\n    evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);\n  }\n}\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Index>\ninline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)\n{\n  computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);\n}\n\ntemplate <typename RhsPacket, typename RhsPacketx4, int registers_taken>\nstruct RhsPanelHelper {\n private:\n  static const int remaining_registers = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS - registers_taken;\n public:\n  typedef typename conditional<remaining_registers>=4, RhsPacketx4, RhsPacket>::type type;\n};\n\ntemplate <typename Packet>\nstruct QuadPacket\n{\n  Packet B_0, B1, B2, B3;\n  const Packet& get(const FixedInt<0>&) const { return B_0; }\n  const Packet& get(const FixedInt<1>&) const { return B1; }\n  const Packet& get(const FixedInt<2>&) const { return B2; }\n  const Packet& get(const FixedInt<3>&) const { return B3; }\n};\n\ntemplate <int N, typename T1, typename T2, typename T3>\nstruct packet_conditional { typedef T3 type; };\n\ntemplate <typename T1, typename T2, typename T3>\nstruct packet_conditional<GEBPPacketFull, T1, T2, T3> { typedef T1 type; };\n\ntemplate <typename T1, typename T2, typename T3>\nstruct packet_conditional<GEBPPacketHalf, T1, T2, T3> { typedef T2 type; };\n\n#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size)         \\\n  typedef typename packet_conditional<packet_size,                 \\\n                                      typename packet_traits<name ## Scalar>::type, \\\n                                      typename packet_traits<name ## Scalar>::half, \\\n                                      typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \\\n  prefix ## name ## Packet\n\n#define PACKET_DECL_COND(name, packet_size)                        \\\n  typedef typename packet_conditional<packet_size,                 \\\n                                      typename packet_traits<name ## Scalar>::type, \\\n                                      typename packet_traits<name ## Scalar>::half, \\\n                                      typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \\\n  name ## Packet\n\n#define PACKET_DECL_COND_SCALAR_PREFIX(prefix, packet_size)        \\\n  typedef typename packet_conditional<packet_size,                 \\\n                                      typename packet_traits<Scalar>::type, \\\n                                      typename packet_traits<Scalar>::half, \\\n                                      typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type \\\n  prefix ## ScalarPacket\n\n#define PACKET_DECL_COND_SCALAR(packet_size)                       \\\n  typedef typename packet_conditional<packet_size,                 \\\n                                      typename packet_traits<Scalar>::type, \\\n                                      typename packet_traits<Scalar>::half, \\\n                                      typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type \\\n  ScalarPacket\n\n/* Vectorization logic\n *  real*real: unpack rhs to constant packets, ...\n * \n *  cd*cd : unpack rhs to (b_r,b_r), (b_i,b_i), mul to get (a_r b_r,a_i b_r) (a_r b_i,a_i b_i),\n *          storing each res packet into two packets (2x2),\n *          at the end combine them: swap the second and addsub them \n *  cf*cf : same but with 2x4 blocks\n *  cplx*real : unpack rhs to constant packets, ...\n *  real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual\n */\ntemplate<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs, int Arch, int _PacketSize>\nclass gebp_traits\n{\npublic:\n  typedef _LhsScalar LhsScalar;\n  typedef _RhsScalar RhsScalar;\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n\n  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);\n\n  enum {\n    ConjLhs = _ConjLhs,\n    ConjRhs = _ConjRhs,\n    Vectorizable = unpacket_traits<_LhsPacket>::vectorizable && unpacket_traits<_RhsPacket>::vectorizable,\n    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,\n    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,\n    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,\n    \n    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,\n\n    // register block size along the N direction must be 1 or 4\n    nr = 4,\n\n    // register block size along the M direction (currently, this one cannot be modified)\n    default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,\n#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) \\\n    && ((!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1914))\n    // we assume 16 registers or more\n    // See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,\n    // then using 3*LhsPacketSize triggers non-implemented paths in syrk.\n    // Bug 1515: MSVC prior to v19.14 yields to register spilling.\n    mr = Vectorizable ? 3*LhsPacketSize : default_mr,\n#else\n    mr = default_mr,\n#endif\n    \n    LhsProgress = LhsPacketSize,\n    RhsProgress = 1\n  };\n\n\n  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;\n  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;\n  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;\n  typedef LhsPacket LhsPacket4Packing;\n\n  typedef QuadPacket<RhsPacket> RhsPacketx4;\n  typedef ResPacket AccPacket;\n  \n  EIGEN_STRONG_INLINE void initAcc(AccPacket& p)\n  {\n    p = pset1<ResPacket>(ResScalar(0));\n  }\n\n  template<typename RhsPacketType>\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const\n  {\n    dest = pset1<RhsPacketType>(*b);\n  }\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const\n  {\n    pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);\n  }\n\n  template<typename RhsPacketType>\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const\n  {\n    loadRhs(b, dest);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const\n  {\n  }\n\n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const\n  {\n    dest = ploadquad<RhsPacket>(b);\n  }\n\n  template<typename LhsPacketType>\n  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacketType& dest) const\n  {\n    dest = pload<LhsPacketType>(a);\n  }\n\n  template<typename LhsPacketType>\n  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const\n  {\n    dest = ploadu<LhsPacketType>(a);\n  }\n\n  template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const LaneIdType&) const\n  {\n    conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;\n    // It would be a lot cleaner to call pmadd all the time. Unfortunately if we\n    // let gcc allocate the register in which to store the result of the pmul\n    // (in the case where there is no FMA) gcc fails to figure out how to avoid\n    // spilling register.\n#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n    EIGEN_UNUSED_VARIABLE(tmp);\n    c = cj.pmadd(a,b,c);\n#else\n    tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);\n#endif\n  }\n\n  template<typename LhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp, const LaneIdType& lane) const\n  {\n    madd(a, b.get(lane), c, tmp, lane);\n  }\n\n  EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const\n  {\n    r = pmadd(c,alpha,r);\n  }\n  \n  template<typename ResPacketHalf>\n  EIGEN_STRONG_INLINE void acc(const ResPacketHalf& c, const ResPacketHalf& alpha, ResPacketHalf& r) const\n  {\n    r = pmadd(c,alpha,r);\n  }\n\n};\n\ntemplate<typename RealScalar, bool _ConjLhs, int Arch, int _PacketSize>\nclass gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false, Arch, _PacketSize>\n{\npublic:\n  typedef std::complex<RealScalar> LhsScalar;\n  typedef RealScalar RhsScalar;\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n\n  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);\n\n  enum {\n    ConjLhs = _ConjLhs,\n    ConjRhs = false,\n    Vectorizable = unpacket_traits<_LhsPacket>::vectorizable && unpacket_traits<_RhsPacket>::vectorizable,\n    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,\n    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,\n    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,\n    \n    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,\n    nr = 4,\n#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)\n    // we assume 16 registers\n    mr = 3*LhsPacketSize,\n#else\n    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,\n#endif\n\n    LhsProgress = LhsPacketSize,\n    RhsProgress = 1\n  };\n\n  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;\n  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;\n  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;\n  typedef LhsPacket LhsPacket4Packing;\n\n  typedef QuadPacket<RhsPacket> RhsPacketx4;\n\n  typedef ResPacket AccPacket;\n\n  EIGEN_STRONG_INLINE void initAcc(AccPacket& p)\n  {\n    p = pset1<ResPacket>(ResScalar(0));\n  }\n\n  template<typename RhsPacketType>\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const\n  {\n    dest = pset1<RhsPacketType>(*b);\n  }\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const\n  {\n    pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);\n  }\n\n  template<typename RhsPacketType>\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const\n  {\n    loadRhs(b, dest);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const\n  {}\n  \n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const\n  {\n    loadRhsQuad_impl(b,dest, typename conditional<RhsPacketSize==16,true_type,false_type>::type());\n  }\n\n  EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const true_type&) const\n  {\n    // FIXME we can do better!\n    // what we want here is a ploadheight\n    RhsScalar tmp[4] = {b[0],b[0],b[1],b[1]};\n    dest = ploadquad<RhsPacket>(tmp);\n  }\n\n  EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const false_type&) const\n  {\n    eigen_internal_assert(RhsPacketSize<=8);\n    dest = pset1<RhsPacket>(*b);\n  }\n\n  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const\n  {\n    dest = pload<LhsPacket>(a);\n  }\n\n  template<typename LhsPacketType>\n  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const\n  {\n    dest = ploadu<LhsPacketType>(a);\n  }\n\n  template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const LaneIdType&) const\n  {\n    madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());\n  }\n\n  template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>\n  EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const true_type&) const\n  {\n#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n    EIGEN_UNUSED_VARIABLE(tmp);\n    c.v = pmadd(a.v,b,c.v);\n#else\n    tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);\n#endif\n  }\n\n  EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const\n  {\n    c += a * b;\n  }\n\n  template<typename LhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp, const LaneIdType& lane) const\n  {\n    madd(a, b.get(lane), c, tmp, lane);\n  }\n\n  template <typename ResPacketType, typename AccPacketType>\n  EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const\n  {\n    conj_helper<ResPacketType,ResPacketType,ConjLhs,false> cj;\n    r = cj.pmadd(c,alpha,r);\n  }\n\nprotected:\n};\n\ntemplate<typename Packet>\nstruct DoublePacket\n{\n  Packet first;\n  Packet second;\n};\n\ntemplate<typename Packet>\nDoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)\n{\n  DoublePacket<Packet> res;\n  res.first  = padd(a.first, b.first);\n  res.second = padd(a.second,b.second);\n  return res;\n}\n\n// note that for DoublePacket<RealPacket> the \"4\" in \"downto4\"\n// corresponds to the number of complexes, so it means \"8\"\n// it terms of real coefficients.\n\ntemplate<typename Packet>\nconst DoublePacket<Packet>&\npredux_half_dowto4(const DoublePacket<Packet> &a,\n                   typename enable_if<unpacket_traits<Packet>::size<=8>::type* = 0)\n{\n  return a;\n}\n\ntemplate<typename Packet>\nDoublePacket<typename unpacket_traits<Packet>::half>\npredux_half_dowto4(const DoublePacket<Packet> &a,\n                   typename enable_if<unpacket_traits<Packet>::size==16>::type* = 0)\n{\n  // yes, that's pretty hackish :(\n  DoublePacket<typename unpacket_traits<Packet>::half> res;\n  typedef std::complex<typename unpacket_traits<Packet>::type> Cplx;\n  typedef typename packet_traits<Cplx>::type CplxPacket;\n  res.first  = predux_half_dowto4(CplxPacket(a.first)).v;\n  res.second = predux_half_dowto4(CplxPacket(a.second)).v;\n  return res;\n}\n\n// same here, \"quad\" actually means \"8\" in terms of real coefficients\ntemplate<typename Scalar, typename RealPacket>\nvoid loadQuadToDoublePacket(const Scalar* b, DoublePacket<RealPacket>& dest,\n                            typename enable_if<unpacket_traits<RealPacket>::size<=8>::type* = 0)\n{\n  dest.first  = pset1<RealPacket>(numext::real(*b));\n  dest.second = pset1<RealPacket>(numext::imag(*b));\n}\n\ntemplate<typename Scalar, typename RealPacket>\nvoid loadQuadToDoublePacket(const Scalar* b, DoublePacket<RealPacket>& dest,\n                            typename enable_if<unpacket_traits<RealPacket>::size==16>::type* = 0)\n{\n  // yes, that's pretty hackish too :(\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  RealScalar r[4] = {numext::real(b[0]), numext::real(b[0]), numext::real(b[1]), numext::real(b[1])};\n  RealScalar i[4] = {numext::imag(b[0]), numext::imag(b[0]), numext::imag(b[1]), numext::imag(b[1])};\n  dest.first  = ploadquad<RealPacket>(r);\n  dest.second = ploadquad<RealPacket>(i);\n}\n\n\ntemplate<typename Packet> struct unpacket_traits<DoublePacket<Packet> > {\n  typedef DoublePacket<typename unpacket_traits<Packet>::half> half;\n};\n// template<typename Packet>\n// DoublePacket<Packet> pmadd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)\n// {\n//   DoublePacket<Packet> res;\n//   res.first  = padd(a.first, b.first);\n//   res.second = padd(a.second,b.second);\n//   return res;\n// }\n\ntemplate<typename RealScalar, bool _ConjLhs, bool _ConjRhs, int Arch, int _PacketSize>\nclass gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs, Arch, _PacketSize >\n{\npublic:\n  typedef std::complex<RealScalar>  Scalar;\n  typedef std::complex<RealScalar>  LhsScalar;\n  typedef std::complex<RealScalar>  RhsScalar;\n  typedef std::complex<RealScalar>  ResScalar;\n  \n  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);\n  PACKET_DECL_COND(Real, _PacketSize);\n  PACKET_DECL_COND_SCALAR(_PacketSize);\n\n  enum {\n    ConjLhs = _ConjLhs,\n    ConjRhs = _ConjRhs,\n    Vectorizable = unpacket_traits<RealPacket>::vectorizable\n                && unpacket_traits<ScalarPacket>::vectorizable,\n    ResPacketSize   = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,\n    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,\n    RhsPacketSize = Vectorizable ? unpacket_traits<RhsScalar>::size : 1,\n    RealPacketSize  = Vectorizable ? unpacket_traits<RealPacket>::size : 1,\n\n    // FIXME: should depend on NumberOfRegisters\n    nr = 4,\n    mr = ResPacketSize,\n\n    LhsProgress = ResPacketSize,\n    RhsProgress = 1\n  };\n  \n  typedef DoublePacket<RealPacket>                 DoublePacketType;\n\n  typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type LhsPacket4Packing;\n  typedef typename conditional<Vectorizable,RealPacket,  Scalar>::type LhsPacket;\n  typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;\n  typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;\n  typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;\n\n  // this actualy holds 8 packets!\n  typedef QuadPacket<RhsPacket> RhsPacketx4;\n  \n  EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }\n\n  EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p)\n  {\n    p.first   = pset1<RealPacket>(RealScalar(0));\n    p.second  = pset1<RealPacket>(RealScalar(0));\n  }\n\n  // Scalar path\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ScalarPacket& dest) const\n  {\n    dest = pset1<ScalarPacket>(*b);\n  }\n\n  // Vectorized path\n  template<typename RealPacketType>\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const\n  {\n    dest.first  = pset1<RealPacketType>(numext::real(*b));\n    dest.second = pset1<RealPacketType>(numext::imag(*b));\n  }\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const\n  {\n    loadRhs(b, dest.B_0);\n    loadRhs(b + 1, dest.B1);\n    loadRhs(b + 2, dest.B2);\n    loadRhs(b + 3, dest.B3);\n  }\n\n  // Scalar path\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, ScalarPacket& dest) const\n  {\n    loadRhs(b, dest);\n  }\n\n  // Vectorized path\n  template<typename RealPacketType>\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const\n  {\n    loadRhs(b, dest);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}\n  \n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const\n  {\n    loadRhs(b,dest);\n  }\n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const\n  {\n    loadQuadToDoublePacket(b,dest);\n  }\n\n  // nothing special here\n  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const\n  {\n    dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));\n  }\n\n  template<typename LhsPacketType>\n  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const\n  {\n    dest = ploadu<LhsPacketType>((const typename unpacket_traits<LhsPacketType>::type*)(a));\n  }\n\n  template<typename LhsPacketType, typename RhsPacketType, typename ResPacketType, typename TmpType, typename LaneIdType>\n  EIGEN_STRONG_INLINE\n  typename enable_if<!is_same<RhsPacketType,RhsPacketx4>::value>::type\n  madd(const LhsPacketType& a, const RhsPacketType& b, DoublePacket<ResPacketType>& c, TmpType& /*tmp*/, const LaneIdType&) const\n  {\n    c.first   = padd(pmul(a,b.first), c.first);\n    c.second  = padd(pmul(a,b.second),c.second);\n  }\n\n  template<typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/, const LaneIdType&) const\n  {\n    c = cj.pmadd(a,b,c);\n  }\n\n  template<typename LhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp, const LaneIdType& lane) const\n  {\n    madd(a, b.get(lane), c, tmp, lane);\n  }\n  \n  EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }\n  \n  template<typename RealPacketType, typename ResPacketType>\n  EIGEN_STRONG_INLINE void acc(const DoublePacket<RealPacketType>& c, const ResPacketType& alpha, ResPacketType& r) const\n  {\n    // assemble c\n    ResPacketType tmp;\n    if((!ConjLhs)&&(!ConjRhs))\n    {\n      tmp = pcplxflip(pconj(ResPacketType(c.second)));\n      tmp = padd(ResPacketType(c.first),tmp);\n    }\n    else if((!ConjLhs)&&(ConjRhs))\n    {\n      tmp = pconj(pcplxflip(ResPacketType(c.second)));\n      tmp = padd(ResPacketType(c.first),tmp);\n    }\n    else if((ConjLhs)&&(!ConjRhs))\n    {\n      tmp = pcplxflip(ResPacketType(c.second));\n      tmp = padd(pconj(ResPacketType(c.first)),tmp);\n    }\n    else if((ConjLhs)&&(ConjRhs))\n    {\n      tmp = pcplxflip(ResPacketType(c.second));\n      tmp = psub(pconj(ResPacketType(c.first)),tmp);\n    }\n    \n    r = pmadd(tmp,alpha,r);\n  }\n\nprotected:\n  conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;\n};\n\ntemplate<typename RealScalar, bool _ConjRhs, int Arch, int _PacketSize>\nclass gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs, Arch, _PacketSize >\n{\npublic:\n  typedef std::complex<RealScalar>  Scalar;\n  typedef RealScalar  LhsScalar;\n  typedef Scalar      RhsScalar;\n  typedef Scalar      ResScalar;\n\n  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Real, _PacketSize);\n  PACKET_DECL_COND_SCALAR_PREFIX(_, _PacketSize);\n\n#undef PACKET_DECL_COND_SCALAR_PREFIX\n#undef PACKET_DECL_COND_PREFIX\n#undef PACKET_DECL_COND_SCALAR\n#undef PACKET_DECL_COND\n\n  enum {\n    ConjLhs = false,\n    ConjRhs = _ConjRhs,\n    Vectorizable = unpacket_traits<_RealPacket>::vectorizable\n                && unpacket_traits<_ScalarPacket>::vectorizable,\n    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,\n    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,\n    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,\n    \n    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,\n    // FIXME: should depend on NumberOfRegisters\n    nr = 4,\n    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,\n\n    LhsProgress = ResPacketSize,\n    RhsProgress = 1\n  };\n\n  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;\n  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;\n  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;\n  typedef LhsPacket LhsPacket4Packing;\n  typedef QuadPacket<RhsPacket> RhsPacketx4;\n  typedef ResPacket AccPacket;\n\n  EIGEN_STRONG_INLINE void initAcc(AccPacket& p)\n  {\n    p = pset1<ResPacket>(ResScalar(0));\n  }\n\n  template<typename RhsPacketType>\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const\n  {\n    dest = pset1<RhsPacketType>(*b);\n  }\n\n  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const\n  {\n    pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);\n  }\n\n  template<typename RhsPacketType>\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const\n  {\n    loadRhs(b, dest);\n  }\n\n  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const\n  {}\n\n  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const\n  {\n    dest = ploaddup<LhsPacket>(a);\n  }\n  \n  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const\n  {\n    dest = ploadquad<RhsPacket>(b);\n  }\n\n  template<typename LhsPacketType>\n  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const\n  {\n    dest = ploaddup<LhsPacketType>(a);\n  }\n\n  template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const LaneIdType&) const\n  {\n    madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());\n  }\n\n  template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>\n  EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp, const true_type&) const\n  {\n#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD\n    EIGEN_UNUSED_VARIABLE(tmp);\n    c.v = pmadd(a,b.v,c.v);\n#else\n    tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);\n#endif\n    \n  }\n\n  EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const\n  {\n    c += a * b;\n  }\n\n  template<typename LhsPacketType, typename AccPacketType, typename LaneIdType>\n  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp, const LaneIdType& lane) const\n  {\n    madd(a, b.get(lane), c, tmp, lane);\n  }\n\n  template <typename ResPacketType, typename AccPacketType>\n  EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const\n  {\n    conj_helper<ResPacketType,ResPacketType,false,ConjRhs> cj;\n    r = cj.pmadd(alpha,c,r);\n  }\n\nprotected:\n\n};\n\n/* optimized General packed Block * packed Panel product kernel\n *\n * Mixing type logic: C += A * B\n *  |  A  |  B  | comments\n *  |real |cplx | no vectorization yet, would require to pack A with duplication\n *  |cplx |real | easy vectorization\n */\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct gebp_kernel\n{\n  typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs,Architecture::Target> Traits;\n  typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs,Architecture::Target,GEBPPacketHalf> HalfTraits;\n  typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs,Architecture::Target,GEBPPacketQuarter> QuarterTraits;\n  \n  typedef typename Traits::ResScalar ResScalar;\n  typedef typename Traits::LhsPacket LhsPacket;\n  typedef typename Traits::RhsPacket RhsPacket;\n  typedef typename Traits::ResPacket ResPacket;\n  typedef typename Traits::AccPacket AccPacket;\n  typedef typename Traits::RhsPacketx4 RhsPacketx4;\n\n  typedef typename RhsPanelHelper<RhsPacket, RhsPacketx4, 15>::type RhsPanel15;\n\n  typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs,Architecture::Target> SwappedTraits;\n\n  typedef typename SwappedTraits::ResScalar SResScalar;\n  typedef typename SwappedTraits::LhsPacket SLhsPacket;\n  typedef typename SwappedTraits::RhsPacket SRhsPacket;\n  typedef typename SwappedTraits::ResPacket SResPacket;\n  typedef typename SwappedTraits::AccPacket SAccPacket;\n\n  typedef typename HalfTraits::LhsPacket LhsPacketHalf;\n  typedef typename HalfTraits::RhsPacket RhsPacketHalf;\n  typedef typename HalfTraits::ResPacket ResPacketHalf;\n  typedef typename HalfTraits::AccPacket AccPacketHalf;\n\n  typedef typename QuarterTraits::LhsPacket LhsPacketQuarter;\n  typedef typename QuarterTraits::RhsPacket RhsPacketQuarter;\n  typedef typename QuarterTraits::ResPacket ResPacketQuarter;\n  typedef typename QuarterTraits::AccPacket AccPacketQuarter;\n\n  typedef typename DataMapper::LinearMapper LinearMapper;\n\n  enum {\n    Vectorizable  = Traits::Vectorizable,\n    LhsProgress   = Traits::LhsProgress,\n    LhsProgressHalf      = HalfTraits::LhsProgress,\n    LhsProgressQuarter   = QuarterTraits::LhsProgress,\n    RhsProgress   = Traits::RhsProgress,\n    RhsProgressHalf      = HalfTraits::RhsProgress,\n    RhsProgressQuarter   = QuarterTraits::RhsProgress,\n    ResPacketSize = Traits::ResPacketSize\n  };\n\n  EIGEN_DONT_INLINE\n  void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,\n                  Index rows, Index depth, Index cols, ResScalar alpha,\n                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);\n};\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs,\nint SwappedLhsProgress = gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs,Architecture::Target>::LhsProgress>\nstruct last_row_process_16_packets\n{\n  typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs,Architecture::Target> Traits;\n  typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs,Architecture::Target> SwappedTraits;\n\n  typedef typename Traits::ResScalar ResScalar;\n  typedef typename SwappedTraits::LhsPacket SLhsPacket;\n  typedef typename SwappedTraits::RhsPacket SRhsPacket;\n  typedef typename SwappedTraits::ResPacket SResPacket;\n  typedef typename SwappedTraits::AccPacket SAccPacket;\n\n  EIGEN_STRONG_INLINE void operator()(const DataMapper& res, SwappedTraits &straits, const LhsScalar* blA,\n                  const RhsScalar* blB, Index depth, const Index endk, Index i, Index j2,\n                  ResScalar alpha, SAccPacket &C0)\n    {\n      EIGEN_UNUSED_VARIABLE(res);\n      EIGEN_UNUSED_VARIABLE(straits);\n      EIGEN_UNUSED_VARIABLE(blA);\n      EIGEN_UNUSED_VARIABLE(blB);\n      EIGEN_UNUSED_VARIABLE(depth);\n      EIGEN_UNUSED_VARIABLE(endk);\n      EIGEN_UNUSED_VARIABLE(i);\n      EIGEN_UNUSED_VARIABLE(j2);\n      EIGEN_UNUSED_VARIABLE(alpha);\n      EIGEN_UNUSED_VARIABLE(C0);\n    }\n};\n\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nstruct last_row_process_16_packets<LhsScalar, RhsScalar, Index, DataMapper,  mr,  nr, ConjugateLhs,  ConjugateRhs, 16> {\n  typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs,Architecture::Target> Traits;\n  typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs,Architecture::Target> SwappedTraits;\n\n  typedef typename Traits::ResScalar ResScalar;\n  typedef typename SwappedTraits::LhsPacket SLhsPacket;\n  typedef typename SwappedTraits::RhsPacket SRhsPacket;\n  typedef typename SwappedTraits::ResPacket SResPacket;\n  typedef typename SwappedTraits::AccPacket SAccPacket;\n\n  EIGEN_STRONG_INLINE void operator()(const DataMapper& res, SwappedTraits &straits, const LhsScalar* blA,\n                  const RhsScalar* blB, Index depth, const Index endk, Index i, Index j2,\n                  ResScalar alpha, SAccPacket &C0)\n  {\n    typedef typename unpacket_traits<typename unpacket_traits<SResPacket>::half>::half SResPacketQuarter;\n    typedef typename unpacket_traits<typename unpacket_traits<SLhsPacket>::half>::half SLhsPacketQuarter;\n    typedef typename unpacket_traits<typename unpacket_traits<SRhsPacket>::half>::half SRhsPacketQuarter;\n    typedef typename unpacket_traits<typename unpacket_traits<SAccPacket>::half>::half SAccPacketQuarter;\n\n    SResPacketQuarter R = res.template gatherPacket<SResPacketQuarter>(i, j2);\n    SResPacketQuarter alphav = pset1<SResPacketQuarter>(alpha);\n\n    if (depth - endk > 0)\n      {\n\t// We have to handle the last row(s) of the rhs, which\n\t// correspond to a half-packet\n\tSAccPacketQuarter c0 = predux_half_dowto4(predux_half_dowto4(C0));\n\n\tfor (Index kk = endk; kk < depth; kk++)\n\t  {\n\t    SLhsPacketQuarter a0;\n\t    SRhsPacketQuarter b0;\n\t    straits.loadLhsUnaligned(blB, a0);\n\t    straits.loadRhs(blA, b0);\n\t    straits.madd(a0,b0,c0,b0, fix<0>);\n\t    blB += SwappedTraits::LhsProgress/4;\n\t    blA += 1;\n\t  }\n\tstraits.acc(c0, alphav, R);\n      }\n    else\n      {\n\tstraits.acc(predux_half_dowto4(predux_half_dowto4(C0)), alphav, R);\n      }\n    res.scatterPacket(i, j2, R);\n  }\n};\n\ntemplate<int nr, Index LhsProgress, Index RhsProgress, typename LhsScalar, typename RhsScalar, typename ResScalar, typename AccPacket, typename LhsPacket, typename RhsPacket, typename ResPacket, typename GEBPTraits, typename LinearMapper, typename DataMapper>\nstruct lhs_process_one_packet\n{\n  typedef typename GEBPTraits::RhsPacketx4 RhsPacketx4;\n\n  EIGEN_STRONG_INLINE void peeled_kc_onestep(Index K, const LhsScalar* blA, const RhsScalar* blB, GEBPTraits traits, LhsPacket *A0, RhsPacketx4 *rhs_panel, RhsPacket *T0, AccPacket *C0, AccPacket *C1, AccPacket *C2, AccPacket *C3)\n  {\n    EIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 1X4\");\n    EIGEN_ASM_COMMENT(\"Note: these asm comments work around bug 935!\");\n    traits.loadLhs(&blA[(0+1*K)*LhsProgress], *A0);\n    traits.loadRhs(&blB[(0+4*K)*RhsProgress], *rhs_panel);\n    traits.madd(*A0, *rhs_panel, *C0, *T0, fix<0>);\n    traits.madd(*A0, *rhs_panel, *C1, *T0, fix<1>);\n    traits.madd(*A0, *rhs_panel, *C2, *T0, fix<2>);\n    traits.madd(*A0, *rhs_panel, *C3, *T0, fix<3>);\n    #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)\n    __asm__  (\"\" : \"+x,m\" (*A0));\n    #endif\n    EIGEN_ASM_COMMENT(\"end step of gebp micro kernel 1X4\");\n  }\n\n  EIGEN_STRONG_INLINE void operator()(\n    const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB, ResScalar alpha,\n    Index peelStart, Index peelEnd, Index strideA, Index strideB, Index offsetA, Index offsetB,\n    int prefetch_res_offset, Index peeled_kc, Index pk, Index cols, Index depth, Index packet_cols4)\n  {\n    GEBPTraits traits;\n\n    // loops on each largest micro horizontal panel of lhs\n    // (LhsProgress x depth)\n    for(Index i=peelStart; i<peelEnd; i+=LhsProgress)\n    {\n      // loops on each largest micro vertical panel of rhs (depth * nr)\n      for(Index j2=0; j2<packet_cols4; j2+=nr)\n      {\n        // We select a LhsProgress x nr micro block of res\n        // which is entirely stored into 1 x nr registers.\n\n        const LhsScalar* blA = &blockA[i*strideA+offsetA*(LhsProgress)];\n        prefetch(&blA[0]);\n\n        // gets res block as register\n        AccPacket C0, C1, C2, C3;\n        traits.initAcc(C0);\n        traits.initAcc(C1);\n        traits.initAcc(C2);\n        traits.initAcc(C3);\n        // To improve instruction pipelining, let's double the accumulation registers:\n        //  even k will accumulate in C*, while odd k will accumulate in D*.\n        // This trick is crutial to get good performance with FMA, otherwise it is \n        // actually faster to perform separated MUL+ADD because of a naturally\n        // better instruction-level parallelism.\n        AccPacket D0, D1, D2, D3;\n        traits.initAcc(D0);\n        traits.initAcc(D1);\n        traits.initAcc(D2);\n        traits.initAcc(D3);\n\n        LinearMapper r0 = res.getLinearMapper(i, j2 + 0);\n        LinearMapper r1 = res.getLinearMapper(i, j2 + 1);\n        LinearMapper r2 = res.getLinearMapper(i, j2 + 2);\n        LinearMapper r3 = res.getLinearMapper(i, j2 + 3);\n\n        r0.prefetch(prefetch_res_offset);\n        r1.prefetch(prefetch_res_offset);\n        r2.prefetch(prefetch_res_offset);\n        r3.prefetch(prefetch_res_offset);\n\n        // performs \"inner\" products\n        const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];\n        prefetch(&blB[0]);\n        LhsPacket A0, A1;\n\n        for(Index k=0; k<peeled_kc; k+=pk)\n        {\n          EIGEN_ASM_COMMENT(\"begin gebp micro kernel 1/half/quarterX4\");\n          RhsPacketx4 rhs_panel;\n          RhsPacket T0;\n\n          internal::prefetch(blB+(48+0));\n          peeled_kc_onestep(0, blA, blB, traits, &A0, &rhs_panel, &T0, &C0, &C1, &C2, &C3);\n          peeled_kc_onestep(1, blA, blB, traits, &A1, &rhs_panel, &T0, &D0, &D1, &D2, &D3);\n          peeled_kc_onestep(2, blA, blB, traits, &A0, &rhs_panel, &T0, &C0, &C1, &C2, &C3);\n          peeled_kc_onestep(3, blA, blB, traits, &A1, &rhs_panel, &T0, &D0, &D1, &D2, &D3);\n          internal::prefetch(blB+(48+16));\n          peeled_kc_onestep(4, blA, blB, traits, &A0, &rhs_panel, &T0, &C0, &C1, &C2, &C3);\n          peeled_kc_onestep(5, blA, blB, traits, &A1, &rhs_panel, &T0, &D0, &D1, &D2, &D3);\n          peeled_kc_onestep(6, blA, blB, traits, &A0, &rhs_panel, &T0, &C0, &C1, &C2, &C3);\n          peeled_kc_onestep(7, blA, blB, traits, &A1, &rhs_panel, &T0, &D0, &D1, &D2, &D3);\n\n          blB += pk*4*RhsProgress;\n          blA += pk*LhsProgress;\n\n          EIGEN_ASM_COMMENT(\"end gebp micro kernel 1/half/quarterX4\");\n        }\n        C0 = padd(C0,D0);\n        C1 = padd(C1,D1);\n        C2 = padd(C2,D2);\n        C3 = padd(C3,D3);\n\n        // process remaining peeled loop\n        for(Index k=peeled_kc; k<depth; k++)\n        {\n          RhsPacketx4 rhs_panel;\n          RhsPacket T0;\n          peeled_kc_onestep(0, blA, blB, traits, &A0, &rhs_panel, &T0, &C0, &C1, &C2, &C3);\n          blB += 4*RhsProgress;\n          blA += LhsProgress;\n        }\n\n        ResPacket R0, R1;\n        ResPacket alphav = pset1<ResPacket>(alpha);\n\n        R0 = r0.template loadPacket<ResPacket>(0);\n        R1 = r1.template loadPacket<ResPacket>(0);\n        traits.acc(C0, alphav, R0);\n        traits.acc(C1,  alphav, R1);\n        r0.storePacket(0, R0);\n        r1.storePacket(0, R1);\n\n        R0 = r2.template loadPacket<ResPacket>(0);\n        R1 = r3.template loadPacket<ResPacket>(0);\n        traits.acc(C2,  alphav, R0);\n        traits.acc(C3,  alphav, R1);\n        r2.storePacket(0, R0);\n        r3.storePacket(0, R1);\n      }\n\n      // Deal with remaining columns of the rhs\n      for(Index j2=packet_cols4; j2<cols; j2++)\n      {\n        // One column at a time\n        const LhsScalar* blA = &blockA[i*strideA+offsetA*(LhsProgress)];\n        prefetch(&blA[0]);\n\n        // gets res block as register\n        AccPacket C0;\n        traits.initAcc(C0);\n\n        LinearMapper r0 = res.getLinearMapper(i, j2);\n\n        // performs \"inner\" products\n        const RhsScalar* blB = &blockB[j2*strideB+offsetB];\n        LhsPacket A0;\n\n        for(Index k= 0; k<peeled_kc; k+=pk)\n        {\n          EIGEN_ASM_COMMENT(\"begin gebp micro kernel 1/half/quarterX1\");\n          RhsPacket B_0;\n\n#define EIGEN_GEBGP_ONESTEP(K)                                          \\\n\t      do {                                                      \\\n\t\tEIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 1/half/quarterX1\"); \\\n\t\tEIGEN_ASM_COMMENT(\"Note: these asm comments work around bug 935!\"); \\\n    /* FIXME: why unaligned???? */ \\\n\t\ttraits.loadLhsUnaligned(&blA[(0+1*K)*LhsProgress], A0); \\\n\t\ttraits.loadRhs(&blB[(0+K)*RhsProgress], B_0);\t\t\\\n\t\ttraits.madd(A0, B_0, C0, B_0, fix<0>);\t\t\t\t\\\n\t\tEIGEN_ASM_COMMENT(\"end step of gebp micro kernel 1/half/quarterX1\"); \\\n\t      } while(false);\n\n          EIGEN_GEBGP_ONESTEP(0);\n          EIGEN_GEBGP_ONESTEP(1);\n          EIGEN_GEBGP_ONESTEP(2);\n          EIGEN_GEBGP_ONESTEP(3);\n          EIGEN_GEBGP_ONESTEP(4);\n          EIGEN_GEBGP_ONESTEP(5);\n          EIGEN_GEBGP_ONESTEP(6);\n          EIGEN_GEBGP_ONESTEP(7);\n\n          blB += pk*RhsProgress;\n          blA += pk*LhsProgress;\n\n          EIGEN_ASM_COMMENT(\"end gebp micro kernel 1/half/quarterX1\");\n        }\n\n        // process remaining peeled loop\n        for(Index k=peeled_kc; k<depth; k++)\n        {\n          RhsPacket B_0;\n          EIGEN_GEBGP_ONESTEP(0);\n          blB += RhsProgress;\n          blA += LhsProgress;\n        }\n#undef EIGEN_GEBGP_ONESTEP\n        ResPacket R0;\n        ResPacket alphav = pset1<ResPacket>(alpha);\n        R0 = r0.template loadPacket<ResPacket>(0);\n        traits.acc(C0, alphav, R0);\n        r0.storePacket(0, R0);\n      }\n    }\n  }\n};\n\ntemplate<int nr, Index LhsProgress, Index RhsProgress, typename LhsScalar, typename RhsScalar, typename ResScalar, typename AccPacket, typename LhsPacket, typename RhsPacket, typename ResPacket, typename GEBPTraits, typename LinearMapper, typename DataMapper>\nstruct lhs_process_fraction_of_packet : lhs_process_one_packet<nr, LhsProgress, RhsProgress, LhsScalar, RhsScalar, ResScalar, AccPacket, LhsPacket, RhsPacket, ResPacket, GEBPTraits, LinearMapper, DataMapper>\n{\n\nEIGEN_STRONG_INLINE void peeled_kc_onestep(Index K, const LhsScalar* blA, const RhsScalar* blB, GEBPTraits traits, LhsPacket *A0, RhsPacket *B_0, RhsPacket *B1, RhsPacket *B2, RhsPacket *B3, AccPacket *C0, AccPacket *C1, AccPacket *C2, AccPacket *C3)\n  {\n        EIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 1X4\");\n        EIGEN_ASM_COMMENT(\"Note: these asm comments work around bug 935!\");\n        traits.loadLhsUnaligned(&blA[(0+1*K)*(LhsProgress)], *A0);\n        traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], *B_0, *B1, *B2, *B3);\n        traits.madd(*A0, *B_0, *C0, *B_0);\n        traits.madd(*A0, *B1,  *C1, *B1);\n        traits.madd(*A0, *B2,  *C2, *B2);\n        traits.madd(*A0, *B3,  *C3, *B3);\n        EIGEN_ASM_COMMENT(\"end step of gebp micro kernel 1X4\");\n  }\n};\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>\nEIGEN_DONT_INLINE\nvoid gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,ConjugateRhs>\n  ::operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,\n               Index rows, Index depth, Index cols, ResScalar alpha,\n               Index strideA, Index strideB, Index offsetA, Index offsetB)\n  {\n    Traits traits;\n    SwappedTraits straits;\n    \n    if(strideA==-1) strideA = depth;\n    if(strideB==-1) strideB = depth;\n    conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;\n    Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;\n    const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;\n    const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;\n    const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? peeled_mc2+((rows-peeled_mc2)/(1*LhsProgress))*(1*LhsProgress) : 0;\n    const Index peeled_mc_half = mr>=LhsProgressHalf ? peeled_mc1+((rows-peeled_mc1)/(LhsProgressHalf))*(LhsProgressHalf) : 0;\n    const Index peeled_mc_quarter = mr>=LhsProgressQuarter ? peeled_mc_half+((rows-peeled_mc_half)/(LhsProgressQuarter))*(LhsProgressQuarter) : 0;\n    enum { pk = 8 }; // NOTE Such a large peeling factor is important for large matrices (~ +5% when >1000 on Haswell)\n    const Index peeled_kc  = depth & ~(pk-1);\n    const int prefetch_res_offset = 32/sizeof(ResScalar);    \n//     const Index depth2     = depth & ~1;\n\n    //---------- Process 3 * LhsProgress rows at once ----------\n    // This corresponds to 3*LhsProgress x nr register blocks.\n    // Usually, make sense only with FMA\n    if(mr>=3*Traits::LhsProgress)\n    {\n      // Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)\n      // and on each largest micro vertical panel of the rhs (depth * nr).\n      // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.\n      // However, if depth is too small, we can extend the number of rows of these horizontal panels.\n      // This actual number of rows is computed as follow:\n      const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.\n      // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size\n      // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),\n      // or because we are testing specific blocking sizes.\n      const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));\n      for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)\n      {\n        const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);\n        for(Index j2=0; j2<packet_cols4; j2+=nr)\n        {\n          for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)\n          {\n          \n          // We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely\n          // stored into 3 x nr registers.\n          \n          const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];\n          prefetch(&blA[0]);\n\n          // gets res block as register\n          AccPacket C0, C1, C2,  C3,\n                    C4, C5, C6,  C7,\n                    C8, C9, C10, C11;\n          traits.initAcc(C0);  traits.initAcc(C1);  traits.initAcc(C2);  traits.initAcc(C3);\n          traits.initAcc(C4);  traits.initAcc(C5);  traits.initAcc(C6);  traits.initAcc(C7);\n          traits.initAcc(C8);  traits.initAcc(C9);  traits.initAcc(C10); traits.initAcc(C11);\n\n          LinearMapper r0 = res.getLinearMapper(i, j2 + 0);\n          LinearMapper r1 = res.getLinearMapper(i, j2 + 1);\n          LinearMapper r2 = res.getLinearMapper(i, j2 + 2);\n          LinearMapper r3 = res.getLinearMapper(i, j2 + 3);\n\n          r0.prefetch(0);\n          r1.prefetch(0);\n          r2.prefetch(0);\n          r3.prefetch(0);\n\n          // performs \"inner\" products\n          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];\n          prefetch(&blB[0]);\n          LhsPacket A0, A1;\n\n          for(Index k=0; k<peeled_kc; k+=pk)\n          {\n            EIGEN_ASM_COMMENT(\"begin gebp micro kernel 3pX4\");\n            // 15 registers are taken (12 for acc, 2 for lhs).\n            RhsPanel15 rhs_panel;\n            RhsPacket T0;\n            LhsPacket A2;\n            #if EIGEN_COMP_GNUC_STRICT && EIGEN_ARCH_ARM64 && defined(EIGEN_VECTORIZE_NEON) && !(EIGEN_GNUC_AT_LEAST(9,0))\n            // see http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1633\n            // without this workaround A0, A1, and A2 are loaded in the same register,\n            // which is not good for pipelining\n            #define EIGEN_GEBP_3PX4_REGISTER_ALLOC_WORKAROUND __asm__  (\"\" : \"+w,m\" (A0), \"+w,m\" (A1), \"+w,m\" (A2));\n            #else\n            #define EIGEN_GEBP_3PX4_REGISTER_ALLOC_WORKAROUND\n            #endif\n#define EIGEN_GEBP_ONESTEP(K)                                                     \\\n            do {                                                                  \\\n              EIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 3pX4\");          \\\n              EIGEN_ASM_COMMENT(\"Note: these asm comments work around bug 935!\"); \\\n              internal::prefetch(blA + (3 * K + 16) * LhsProgress);               \\\n              if (EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) {                            \\\n                internal::prefetch(blB + (4 * K + 16) * RhsProgress);             \\\n              } /* Bug 953 */                                                     \\\n              traits.loadLhs(&blA[(0 + 3 * K) * LhsProgress], A0);                \\\n              traits.loadLhs(&blA[(1 + 3 * K) * LhsProgress], A1);                \\\n              traits.loadLhs(&blA[(2 + 3 * K) * LhsProgress], A2);                \\\n              EIGEN_GEBP_3PX4_REGISTER_ALLOC_WORKAROUND \\\n              traits.loadRhs(blB + (0+4*K) * Traits::RhsProgress, rhs_panel);     \\\n              traits.madd(A0, rhs_panel, C0, T0, fix<0>);                         \\\n              traits.madd(A1, rhs_panel, C4, T0, fix<0>);                         \\\n              traits.madd(A2, rhs_panel, C8, T0, fix<0>);                         \\\n              traits.updateRhs(blB + (1+4*K) * Traits::RhsProgress, rhs_panel);   \\\n              traits.madd(A0, rhs_panel, C1, T0, fix<1>);                         \\\n              traits.madd(A1, rhs_panel, C5, T0, fix<1>);                         \\\n              traits.madd(A2, rhs_panel, C9, T0, fix<1>);                         \\\n              traits.updateRhs(blB + (2+4*K) * Traits::RhsProgress, rhs_panel);   \\\n              traits.madd(A0, rhs_panel, C2, T0, fix<2>);                         \\\n              traits.madd(A1, rhs_panel, C6, T0, fix<2>);                         \\\n              traits.madd(A2, rhs_panel, C10, T0, fix<2>);                        \\\n              traits.updateRhs(blB + (3+4*K) * Traits::RhsProgress, rhs_panel);   \\\n              traits.madd(A0, rhs_panel, C3, T0, fix<3>);                         \\\n              traits.madd(A1, rhs_panel, C7, T0, fix<3>);                         \\\n              traits.madd(A2, rhs_panel, C11, T0, fix<3>);                        \\\n              EIGEN_ASM_COMMENT(\"end step of gebp micro kernel 3pX4\");            \\\n            } while (false)\n\n            internal::prefetch(blB);\n            EIGEN_GEBP_ONESTEP(0);\n            EIGEN_GEBP_ONESTEP(1);\n            EIGEN_GEBP_ONESTEP(2);\n            EIGEN_GEBP_ONESTEP(3);\n            EIGEN_GEBP_ONESTEP(4);\n            EIGEN_GEBP_ONESTEP(5);\n            EIGEN_GEBP_ONESTEP(6);\n            EIGEN_GEBP_ONESTEP(7);\n\n            blB += pk*4*RhsProgress;\n            blA += pk*3*Traits::LhsProgress;\n\n            EIGEN_ASM_COMMENT(\"end gebp micro kernel 3pX4\");\n          }\n          // process remaining peeled loop\n          for(Index k=peeled_kc; k<depth; k++)\n          {\n            RhsPanel15 rhs_panel;\n            RhsPacket T0;\n            LhsPacket A2;\n            EIGEN_GEBP_ONESTEP(0);\n            blB += 4*RhsProgress;\n            blA += 3*Traits::LhsProgress;\n          }\n\n#undef EIGEN_GEBP_ONESTEP\n\n          ResPacket R0, R1, R2;\n          ResPacket alphav = pset1<ResPacket>(alpha);\n\n          R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);\n          traits.acc(C0, alphav, R0);\n          traits.acc(C4, alphav, R1);\n          traits.acc(C8, alphav, R2);\n          r0.storePacket(0 * Traits::ResPacketSize, R0);\n          r0.storePacket(1 * Traits::ResPacketSize, R1);\n          r0.storePacket(2 * Traits::ResPacketSize, R2);\n\n          R0 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r1.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);\n          traits.acc(C1, alphav, R0);\n          traits.acc(C5, alphav, R1);\n          traits.acc(C9, alphav, R2);\n          r1.storePacket(0 * Traits::ResPacketSize, R0);\n          r1.storePacket(1 * Traits::ResPacketSize, R1);\n          r1.storePacket(2 * Traits::ResPacketSize, R2);\n\n          R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r2.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);\n          traits.acc(C2, alphav, R0);\n          traits.acc(C6, alphav, R1);\n          traits.acc(C10, alphav, R2);\n          r2.storePacket(0 * Traits::ResPacketSize, R0);\n          r2.storePacket(1 * Traits::ResPacketSize, R1);\n          r2.storePacket(2 * Traits::ResPacketSize, R2);\n\n          R0 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r3.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);\n          traits.acc(C3, alphav, R0);\n          traits.acc(C7, alphav, R1);\n          traits.acc(C11, alphav, R2);\n          r3.storePacket(0 * Traits::ResPacketSize, R0);\n          r3.storePacket(1 * Traits::ResPacketSize, R1);\n          r3.storePacket(2 * Traits::ResPacketSize, R2);          \n          }\n        }\n\n        // Deal with remaining columns of the rhs\n        for(Index j2=packet_cols4; j2<cols; j2++)\n        {\n          for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)\n          {\n          // One column at a time\n          const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];\n          prefetch(&blA[0]);\n\n          // gets res block as register\n          AccPacket C0, C4, C8;\n          traits.initAcc(C0);\n          traits.initAcc(C4);\n          traits.initAcc(C8);\n\n          LinearMapper r0 = res.getLinearMapper(i, j2);\n          r0.prefetch(0);\n\n          // performs \"inner\" products\n          const RhsScalar* blB = &blockB[j2*strideB+offsetB];\n          LhsPacket A0, A1, A2;\n          \n          for(Index k=0; k<peeled_kc; k+=pk)\n          {\n            EIGEN_ASM_COMMENT(\"begin gebp micro kernel 3pX1\");\n            RhsPacket B_0;\n#define EIGEN_GEBGP_ONESTEP(K)                                                    \\\n            do {                                                                  \\\n              EIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 3pX1\");          \\\n              EIGEN_ASM_COMMENT(\"Note: these asm comments work around bug 935!\"); \\\n              traits.loadLhs(&blA[(0 + 3 * K) * LhsProgress], A0);                \\\n              traits.loadLhs(&blA[(1 + 3 * K) * LhsProgress], A1);                \\\n              traits.loadLhs(&blA[(2 + 3 * K) * LhsProgress], A2);                \\\n              traits.loadRhs(&blB[(0 + K) * RhsProgress], B_0);                   \\\n              traits.madd(A0, B_0, C0, B_0, fix<0>);                              \\\n              traits.madd(A1, B_0, C4, B_0, fix<0>);                              \\\n              traits.madd(A2, B_0, C8, B_0, fix<0>);                              \\\n              EIGEN_ASM_COMMENT(\"end step of gebp micro kernel 3pX1\");            \\\n            } while (false)\n\n            EIGEN_GEBGP_ONESTEP(0);\n            EIGEN_GEBGP_ONESTEP(1);\n            EIGEN_GEBGP_ONESTEP(2);\n            EIGEN_GEBGP_ONESTEP(3);\n            EIGEN_GEBGP_ONESTEP(4);\n            EIGEN_GEBGP_ONESTEP(5);\n            EIGEN_GEBGP_ONESTEP(6);\n            EIGEN_GEBGP_ONESTEP(7);\n\n            blB += int(pk) * int(RhsProgress);\n            blA += int(pk) * 3 * int(Traits::LhsProgress);\n\n            EIGEN_ASM_COMMENT(\"end gebp micro kernel 3pX1\");\n          }\n\n          // process remaining peeled loop\n          for(Index k=peeled_kc; k<depth; k++)\n          {\n            RhsPacket B_0;\n            EIGEN_GEBGP_ONESTEP(0);\n            blB += RhsProgress;\n            blA += 3*Traits::LhsProgress;\n          }\n#undef EIGEN_GEBGP_ONESTEP\n          ResPacket R0, R1, R2;\n          ResPacket alphav = pset1<ResPacket>(alpha);\n\n          R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);\n          traits.acc(C0, alphav, R0);\n          traits.acc(C4, alphav, R1);\n          traits.acc(C8, alphav, R2);\n          r0.storePacket(0 * Traits::ResPacketSize, R0);\n          r0.storePacket(1 * Traits::ResPacketSize, R1);\n          r0.storePacket(2 * Traits::ResPacketSize, R2);          \n          }\n        }\n      }\n    }\n\n    //---------- Process 2 * LhsProgress rows at once ----------\n    if(mr>=2*Traits::LhsProgress)\n    {\n      const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.\n      // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size\n      // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),\n      // or because we are testing specific blocking sizes.\n      Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));\n\n      for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)\n      {\n        Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);\n        for(Index j2=0; j2<packet_cols4; j2+=nr)\n        {\n          for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)\n          {\n          \n          // We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely\n          // stored into 2 x nr registers.\n          \n          const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];\n          prefetch(&blA[0]);\n\n          // gets res block as register\n          AccPacket C0, C1, C2, C3,\n                    C4, C5, C6, C7;\n          traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);\n          traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);\n\n          LinearMapper r0 = res.getLinearMapper(i, j2 + 0);\n          LinearMapper r1 = res.getLinearMapper(i, j2 + 1);\n          LinearMapper r2 = res.getLinearMapper(i, j2 + 2);\n          LinearMapper r3 = res.getLinearMapper(i, j2 + 3);\n\n          r0.prefetch(prefetch_res_offset);\n          r1.prefetch(prefetch_res_offset);\n          r2.prefetch(prefetch_res_offset);\n          r3.prefetch(prefetch_res_offset);\n\n          // performs \"inner\" products\n          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];\n          prefetch(&blB[0]);\n          LhsPacket A0, A1;\n\n          for(Index k=0; k<peeled_kc; k+=pk)\n          {\n            EIGEN_ASM_COMMENT(\"begin gebp micro kernel 2pX4\");\n            RhsPacketx4 rhs_panel;\n            RhsPacket T0;\n\n          // NOTE: the begin/end asm comments below work around bug 935!\n          // but they are not enough for gcc>=6 without FMA (bug 1637)\n          #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)\n            #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__  (\"\" : [a0] \"+x,m\" (A0),[a1] \"+x,m\" (A1));\n          #else\n            #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND\n          #endif\n#define EIGEN_GEBGP_ONESTEP(K)                                            \\\n            do {                                                          \\\n              EIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 2pX4\");  \\\n              traits.loadLhs(&blA[(0 + 2 * K) * LhsProgress], A0);        \\\n              traits.loadLhs(&blA[(1 + 2 * K) * LhsProgress], A1);        \\\n              traits.loadRhs(&blB[(0 + 4 * K) * RhsProgress], rhs_panel); \\\n              traits.madd(A0, rhs_panel, C0, T0, fix<0>);                 \\\n              traits.madd(A1, rhs_panel, C4, T0, fix<0>);                 \\\n              traits.madd(A0, rhs_panel, C1, T0, fix<1>);                 \\\n              traits.madd(A1, rhs_panel, C5, T0, fix<1>);                 \\\n              traits.madd(A0, rhs_panel, C2, T0, fix<2>);                 \\\n              traits.madd(A1, rhs_panel, C6, T0, fix<2>);                 \\\n              traits.madd(A0, rhs_panel, C3, T0, fix<3>);                 \\\n              traits.madd(A1, rhs_panel, C7, T0, fix<3>);                 \\\n              EIGEN_GEBP_2PX4_SPILLING_WORKAROUND                         \\\n              EIGEN_ASM_COMMENT(\"end step of gebp micro kernel 2pX4\");    \\\n            } while (false)\n\n            internal::prefetch(blB+(48+0));\n            EIGEN_GEBGP_ONESTEP(0);\n            EIGEN_GEBGP_ONESTEP(1);\n            EIGEN_GEBGP_ONESTEP(2);\n            EIGEN_GEBGP_ONESTEP(3);\n            internal::prefetch(blB+(48+16));\n            EIGEN_GEBGP_ONESTEP(4);\n            EIGEN_GEBGP_ONESTEP(5);\n            EIGEN_GEBGP_ONESTEP(6);\n            EIGEN_GEBGP_ONESTEP(7);\n\n            blB += pk*4*RhsProgress;\n            blA += pk*(2*Traits::LhsProgress);\n\n            EIGEN_ASM_COMMENT(\"end gebp micro kernel 2pX4\");\n          }\n          // process remaining peeled loop\n          for(Index k=peeled_kc; k<depth; k++)\n          {\n            RhsPacketx4 rhs_panel;\n            RhsPacket T0;\n            EIGEN_GEBGP_ONESTEP(0);\n            blB += 4*RhsProgress;\n            blA += 2*Traits::LhsProgress;\n          }\n#undef EIGEN_GEBGP_ONESTEP\n\n          ResPacket R0, R1, R2, R3;\n          ResPacket alphav = pset1<ResPacket>(alpha);\n\n          R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R3 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          traits.acc(C0, alphav, R0);\n          traits.acc(C4, alphav, R1);\n          traits.acc(C1, alphav, R2);\n          traits.acc(C5, alphav, R3);\n          r0.storePacket(0 * Traits::ResPacketSize, R0);\n          r0.storePacket(1 * Traits::ResPacketSize, R1);\n          r1.storePacket(0 * Traits::ResPacketSize, R2);\n          r1.storePacket(1 * Traits::ResPacketSize, R3);\n\n          R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          R2 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R3 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          traits.acc(C2,  alphav, R0);\n          traits.acc(C6,  alphav, R1);\n          traits.acc(C3,  alphav, R2);\n          traits.acc(C7,  alphav, R3);\n          r2.storePacket(0 * Traits::ResPacketSize, R0);\n          r2.storePacket(1 * Traits::ResPacketSize, R1);\n          r3.storePacket(0 * Traits::ResPacketSize, R2);\n          r3.storePacket(1 * Traits::ResPacketSize, R3);\n          }\n        }\n      \n        // Deal with remaining columns of the rhs\n        for(Index j2=packet_cols4; j2<cols; j2++)\n        {\n          for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)\n          {\n          // One column at a time\n          const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];\n          prefetch(&blA[0]);\n\n          // gets res block as register\n          AccPacket C0, C4;\n          traits.initAcc(C0);\n          traits.initAcc(C4);\n\n          LinearMapper r0 = res.getLinearMapper(i, j2);\n          r0.prefetch(prefetch_res_offset);\n\n          // performs \"inner\" products\n          const RhsScalar* blB = &blockB[j2*strideB+offsetB];\n          LhsPacket A0, A1;\n\n          for(Index k=0; k<peeled_kc; k+=pk)\n          {\n            EIGEN_ASM_COMMENT(\"begin gebp micro kernel 2pX1\");\n            RhsPacket B_0, B1;\n        \n#define EIGEN_GEBGP_ONESTEP(K) \\\n            do {                                                                  \\\n              EIGEN_ASM_COMMENT(\"begin step of gebp micro kernel 2pX1\");          \\\n              EIGEN_ASM_COMMENT(\"Note: these asm comments work around bug 935!\"); \\\n              traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0);                      \\\n              traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1);                      \\\n              traits.loadRhs(&blB[(0+K)*RhsProgress], B_0);                       \\\n              traits.madd(A0, B_0, C0, B1, fix<0>);                               \\\n              traits.madd(A1, B_0, C4, B_0, fix<0>);                              \\\n              EIGEN_ASM_COMMENT(\"end step of gebp micro kernel 2pX1\");            \\\n            } while(false)\n        \n            EIGEN_GEBGP_ONESTEP(0);\n            EIGEN_GEBGP_ONESTEP(1);\n            EIGEN_GEBGP_ONESTEP(2);\n            EIGEN_GEBGP_ONESTEP(3);\n            EIGEN_GEBGP_ONESTEP(4);\n            EIGEN_GEBGP_ONESTEP(5);\n            EIGEN_GEBGP_ONESTEP(6);\n            EIGEN_GEBGP_ONESTEP(7);\n\n            blB += int(pk) * int(RhsProgress);\n            blA += int(pk) * 2 * int(Traits::LhsProgress);\n\n            EIGEN_ASM_COMMENT(\"end gebp micro kernel 2pX1\");\n          }\n\n          // process remaining peeled loop\n          for(Index k=peeled_kc; k<depth; k++)\n          {\n            RhsPacket B_0, B1;\n            EIGEN_GEBGP_ONESTEP(0);\n            blB += RhsProgress;\n            blA += 2*Traits::LhsProgress;\n          }\n#undef EIGEN_GEBGP_ONESTEP\n          ResPacket R0, R1;\n          ResPacket alphav = pset1<ResPacket>(alpha);\n\n          R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);\n          R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);\n          traits.acc(C0, alphav, R0);\n          traits.acc(C4, alphav, R1);\n          r0.storePacket(0 * Traits::ResPacketSize, R0);\n          r0.storePacket(1 * Traits::ResPacketSize, R1);\n          }\n        }\n      }\n    }\n    //---------- Process 1 * LhsProgress rows at once ----------\n    if(mr>=1*Traits::LhsProgress)\n    {\n      lhs_process_one_packet<nr, LhsProgress, RhsProgress, LhsScalar, RhsScalar, ResScalar, AccPacket, LhsPacket, RhsPacket, ResPacket, Traits, LinearMapper, DataMapper> p;\n      p(res, blockA, blockB, alpha, peeled_mc2, peeled_mc1, strideA, strideB, offsetA, offsetB, prefetch_res_offset, peeled_kc, pk, cols, depth, packet_cols4);\n    }\n    //---------- Process LhsProgressHalf rows at once ----------\n    if((LhsProgressHalf < LhsProgress) && mr>=LhsProgressHalf)\n    {\n      lhs_process_fraction_of_packet<nr, LhsProgressHalf, RhsProgressHalf, LhsScalar, RhsScalar, ResScalar, AccPacketHalf, LhsPacketHalf, RhsPacketHalf, ResPacketHalf, HalfTraits, LinearMapper, DataMapper> p;\n      p(res, blockA, blockB, alpha, peeled_mc1, peeled_mc_half, strideA, strideB, offsetA, offsetB, prefetch_res_offset, peeled_kc, pk, cols, depth, packet_cols4);\n    }\n    //---------- Process LhsProgressQuarter rows at once ----------\n    if((LhsProgressQuarter < LhsProgressHalf) && mr>=LhsProgressQuarter)\n    {\n      lhs_process_fraction_of_packet<nr, LhsProgressQuarter, RhsProgressQuarter, LhsScalar, RhsScalar, ResScalar, AccPacketQuarter, LhsPacketQuarter, RhsPacketQuarter, ResPacketQuarter, QuarterTraits, LinearMapper, DataMapper> p;\n      p(res, blockA, blockB, alpha, peeled_mc_half, peeled_mc_quarter, strideA, strideB, offsetA, offsetB, prefetch_res_offset, peeled_kc, pk, cols, depth, packet_cols4);\n    }\n    //---------- Process remaining rows, 1 at once ----------\n    if(peeled_mc_quarter<rows)\n    {\n      // loop on each panel of the rhs\n      for(Index j2=0; j2<packet_cols4; j2+=nr)\n      {\n        // loop on each row of the lhs (1*LhsProgress x depth)\n        for(Index i=peeled_mc_quarter; i<rows; i+=1)\n        {\n          const LhsScalar* blA = &blockA[i*strideA+offsetA];\n          prefetch(&blA[0]);\n          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];\n\n          // If LhsProgress is 8 or 16, it assumes that there is a\n          // half or quarter packet, respectively, of the same size as\n          // nr (which is currently 4) for the return type.\n          const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;\n          const int SResPacketQuarterSize = unpacket_traits<typename unpacket_traits<typename unpacket_traits<SResPacket>::half>::half>::size;\n          if ((SwappedTraits::LhsProgress % 4) == 0 &&\n              (SwappedTraits::LhsProgress<=16) &&\n              (SwappedTraits::LhsProgress!=8  || SResPacketHalfSize==nr) &&\n              (SwappedTraits::LhsProgress!=16 || SResPacketQuarterSize==nr))\n          {\n            SAccPacket C0, C1, C2, C3;\n            straits.initAcc(C0);\n            straits.initAcc(C1);\n            straits.initAcc(C2);\n            straits.initAcc(C3);\n\n            const Index spk   = (std::max)(1,SwappedTraits::LhsProgress/4);\n            const Index endk  = (depth/spk)*spk;\n            const Index endk4 = (depth/(spk*4))*(spk*4);\n\n            Index k=0;\n            for(; k<endk4; k+=4*spk)\n            {\n              SLhsPacket A0,A1;\n              SRhsPacket B_0,B_1;\n\n              straits.loadLhsUnaligned(blB+0*SwappedTraits::LhsProgress, A0);\n              straits.loadLhsUnaligned(blB+1*SwappedTraits::LhsProgress, A1);\n\n              straits.loadRhsQuad(blA+0*spk, B_0);\n              straits.loadRhsQuad(blA+1*spk, B_1);\n              straits.madd(A0,B_0,C0,B_0, fix<0>);\n              straits.madd(A1,B_1,C1,B_1, fix<0>);\n\n              straits.loadLhsUnaligned(blB+2*SwappedTraits::LhsProgress, A0);\n              straits.loadLhsUnaligned(blB+3*SwappedTraits::LhsProgress, A1);\n              straits.loadRhsQuad(blA+2*spk, B_0);\n              straits.loadRhsQuad(blA+3*spk, B_1);\n              straits.madd(A0,B_0,C2,B_0, fix<0>);\n              straits.madd(A1,B_1,C3,B_1, fix<0>);\n\n              blB += 4*SwappedTraits::LhsProgress;\n              blA += 4*spk;\n            }\n            C0 = padd(padd(C0,C1),padd(C2,C3));\n            for(; k<endk; k+=spk)\n            {\n              SLhsPacket A0;\n              SRhsPacket B_0;\n\n              straits.loadLhsUnaligned(blB, A0);\n              straits.loadRhsQuad(blA, B_0);\n              straits.madd(A0,B_0,C0,B_0, fix<0>);\n\n              blB += SwappedTraits::LhsProgress;\n              blA += spk;\n            }\n            if(SwappedTraits::LhsProgress==8)\n            {\n              // Special case where we have to first reduce the accumulation register C0\n              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;\n              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;\n              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SRhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;\n              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;\n\n              SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);\n              SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);\n\n              if(depth-endk>0)\n              {\n                // We have to handle the last row of the rhs which corresponds to a half-packet\n                SLhsPacketHalf a0;\n                SRhsPacketHalf b0;\n                straits.loadLhsUnaligned(blB, a0);\n                straits.loadRhs(blA, b0);\n                SAccPacketHalf c0 = predux_half_dowto4(C0);\n                straits.madd(a0,b0,c0,b0, fix<0>);\n                straits.acc(c0, alphav, R);\n              }\n              else\n              {\n                straits.acc(predux_half_dowto4(C0), alphav, R);\n              }\n              res.scatterPacket(i, j2, R);\n            }\n            else if (SwappedTraits::LhsProgress==16)\n            {\n              // Special case where we have to first reduce the\n              // accumulation register C0. We specialize the block in\n              // template form, so that LhsProgress < 16 paths don't\n              // fail to compile\n              last_row_process_16_packets<LhsScalar, RhsScalar, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs> p;\n\t            p(res, straits, blA, blB, depth, endk, i, j2,alpha, C0);\n            }\n            else\n            {\n              SResPacket R = res.template gatherPacket<SResPacket>(i, j2);\n              SResPacket alphav = pset1<SResPacket>(alpha);\n              straits.acc(C0, alphav, R);\n              res.scatterPacket(i, j2, R);\n            }\n          }\n          else // scalar path\n          {\n            // get a 1 x 4 res block as registers\n            ResScalar C0(0), C1(0), C2(0), C3(0);\n\n            for(Index k=0; k<depth; k++)\n            {\n              LhsScalar A0;\n              RhsScalar B_0, B_1;\n\n              A0 = blA[k];\n\n              B_0 = blB[0];\n              B_1 = blB[1];\n              C0 = cj.pmadd(A0,B_0,C0);\n              C1 = cj.pmadd(A0,B_1,C1);\n\n              B_0 = blB[2];\n              B_1 = blB[3];\n              C2 = cj.pmadd(A0,B_0,C2);\n              C3 = cj.pmadd(A0,B_1,C3);\n\n              blB += 4;\n            }\n            res(i, j2 + 0) += alpha * C0;\n            res(i, j2 + 1) += alpha * C1;\n            res(i, j2 + 2) += alpha * C2;\n            res(i, j2 + 3) += alpha * C3;\n          }\n        }\n      }\n      // remaining columns\n      for(Index j2=packet_cols4; j2<cols; j2++)\n      {\n        // loop on each row of the lhs (1*LhsProgress x depth)\n        for(Index i=peeled_mc_quarter; i<rows; i+=1)\n        {\n          const LhsScalar* blA = &blockA[i*strideA+offsetA];\n          prefetch(&blA[0]);\n          // gets a 1 x 1 res block as registers\n          ResScalar C0(0);\n          const RhsScalar* blB = &blockB[j2*strideB+offsetB];\n          for(Index k=0; k<depth; k++)\n          {\n            LhsScalar A0 = blA[k];\n            RhsScalar B_0 = blB[k];\n            C0 = cj.pmadd(A0, B_0, C0);\n          }\n          res(i, j2) += alpha * C0;\n        }\n      }\n    }\n  }\n\n\n// pack a block of the lhs\n// The traversal is as follow (mr==4):\n//   0  4  8 12 ...\n//   1  5  9 13 ...\n//   2  6 10 14 ...\n//   3  7 11 15 ...\n//\n//  16 20 24 28 ...\n//  17 21 25 29 ...\n//  18 22 26 30 ...\n//  19 23 27 31 ...\n//\n//  32 33 34 35 ...\n//  36 36 38 39 ...\ntemplate<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n{\n  typedef typename DataMapper::LinearMapper LinearMapper;\n  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nEIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>\n  ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  typedef typename unpacket_traits<Packet>::half HalfPacket;\n  typedef typename unpacket_traits<typename unpacket_traits<Packet>::half>::half QuarterPacket;\n  enum { PacketSize = unpacket_traits<Packet>::size,\n         HalfPacketSize = unpacket_traits<HalfPacket>::size,\n         QuarterPacketSize = unpacket_traits<QuarterPacket>::size,\n         HasHalf = (int)HalfPacketSize < (int)PacketSize,\n         HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};\n\n  EIGEN_ASM_COMMENT(\"EIGEN PRODUCT PACK LHS\");\n  EIGEN_UNUSED_VARIABLE(stride);\n  EIGEN_UNUSED_VARIABLE(offset);\n  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));\n  eigen_assert( ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) || (Pack1<=4) );\n  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;\n  Index count = 0;\n\n  const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;\n  const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;\n  const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;\n  const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;\n  const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? (rows/(QuarterPacketSize))*(QuarterPacketSize) : 0;\n  const Index last_lhs_progress = rows > peeled_mc_quarter ? (rows - peeled_mc_quarter) & ~1 : 0;\n  const Index peeled_mc0 = Pack2>=PacketSize ? peeled_mc_quarter\n                         : Pack2>1 && last_lhs_progress ? (rows/last_lhs_progress)*last_lhs_progress : 0;\n\n  Index i=0;\n\n  // Pack 3 packets\n  if(Pack1>=3*PacketSize)\n  {\n    for(; i<peeled_mc3; i+=3*PacketSize)\n    {\n      if(PanelMode) count += (3*PacketSize) * offset;\n\n      for(Index k=0; k<depth; k++)\n      {\n        Packet A, B, C;\n        A = lhs.template loadPacket<Packet>(i+0*PacketSize, k);\n        B = lhs.template loadPacket<Packet>(i+1*PacketSize, k);\n        C = lhs.template loadPacket<Packet>(i+2*PacketSize, k);\n        pstore(blockA+count, cj.pconj(A)); count+=PacketSize;\n        pstore(blockA+count, cj.pconj(B)); count+=PacketSize;\n        pstore(blockA+count, cj.pconj(C)); count+=PacketSize;\n      }\n      if(PanelMode) count += (3*PacketSize) * (stride-offset-depth);\n    }\n  }\n  // Pack 2 packets\n  if(Pack1>=2*PacketSize)\n  {\n    for(; i<peeled_mc2; i+=2*PacketSize)\n    {\n      if(PanelMode) count += (2*PacketSize) * offset;\n\n      for(Index k=0; k<depth; k++)\n      {\n        Packet A, B;\n        A = lhs.template loadPacket<Packet>(i+0*PacketSize, k);\n        B = lhs.template loadPacket<Packet>(i+1*PacketSize, k);\n        pstore(blockA+count, cj.pconj(A)); count+=PacketSize;\n        pstore(blockA+count, cj.pconj(B)); count+=PacketSize;\n      }\n      if(PanelMode) count += (2*PacketSize) * (stride-offset-depth);\n    }\n  }\n  // Pack 1 packets\n  if(Pack1>=1*PacketSize)\n  {\n    for(; i<peeled_mc1; i+=1*PacketSize)\n    {\n      if(PanelMode) count += (1*PacketSize) * offset;\n\n      for(Index k=0; k<depth; k++)\n      {\n        Packet A;\n        A = lhs.template loadPacket<Packet>(i+0*PacketSize, k);\n        pstore(blockA+count, cj.pconj(A));\n        count+=PacketSize;\n      }\n      if(PanelMode) count += (1*PacketSize) * (stride-offset-depth);\n    }\n  }\n  // Pack half packets\n  if(HasHalf && Pack1>=HalfPacketSize)\n  {\n    for(; i<peeled_mc_half; i+=HalfPacketSize)\n    {\n      if(PanelMode) count += (HalfPacketSize) * offset;\n\n      for(Index k=0; k<depth; k++)\n      {\n        HalfPacket A;\n        A = lhs.template loadPacket<HalfPacket>(i+0*(HalfPacketSize), k);\n        pstoreu(blockA+count, cj.pconj(A));\n        count+=HalfPacketSize;\n      }\n      if(PanelMode) count += (HalfPacketSize) * (stride-offset-depth);\n    }\n  }\n  // Pack quarter packets\n  if(HasQuarter && Pack1>=QuarterPacketSize)\n  {\n    for(; i<peeled_mc_quarter; i+=QuarterPacketSize)\n    {\n      if(PanelMode) count += (QuarterPacketSize) * offset;\n\n      for(Index k=0; k<depth; k++)\n      {\n        QuarterPacket A;\n        A = lhs.template loadPacket<QuarterPacket>(i+0*(QuarterPacketSize), k);\n        pstoreu(blockA+count, cj.pconj(A));\n        count+=QuarterPacketSize;\n      }\n      if(PanelMode) count += (QuarterPacketSize) * (stride-offset-depth);\n    }\n  }\n  // Pack2 may be *smaller* than PacketSize—that happens for\n  // products like real * complex, where we have to go half the\n  // progress on the lhs in order to duplicate those operands to\n  // address both real & imaginary parts on the rhs. This portion will\n  // pack those half ones until they match the number expected on the\n  // last peeling loop at this point (for the rhs).\n  if(Pack2<PacketSize && Pack2>1)\n  {\n    for(; i<peeled_mc0; i+=last_lhs_progress)\n    {\n      if(PanelMode) count += last_lhs_progress * offset;\n\n      for(Index k=0; k<depth; k++)\n        for(Index w=0; w<last_lhs_progress; w++)\n          blockA[count++] = cj(lhs(i+w, k));\n\n      if(PanelMode) count += last_lhs_progress * (stride-offset-depth);\n    }\n  }\n  // Pack scalars\n  for(; i<rows; i++)\n  {\n    if(PanelMode) count += offset;\n    for(Index k=0; k<depth; k++)\n      blockA[count++] = cj(lhs(i, k));\n    if(PanelMode) count += (stride-offset-depth);\n  }\n}\n\ntemplate<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nstruct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n{\n  typedef typename DataMapper::LinearMapper LinearMapper;\n  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>\nEIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>\n  ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)\n{\n  typedef typename unpacket_traits<Packet>::half HalfPacket;\n  typedef typename unpacket_traits<typename unpacket_traits<Packet>::half>::half QuarterPacket;\n  enum { PacketSize = unpacket_traits<Packet>::size,\n         HalfPacketSize = unpacket_traits<HalfPacket>::size,\n         QuarterPacketSize = unpacket_traits<QuarterPacket>::size,\n         HasHalf = (int)HalfPacketSize < (int)PacketSize,\n         HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};\n\n  EIGEN_ASM_COMMENT(\"EIGEN PRODUCT PACK LHS\");\n  EIGEN_UNUSED_VARIABLE(stride);\n  EIGEN_UNUSED_VARIABLE(offset);\n  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));\n  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;\n  Index count = 0;\n  bool gone_half = false, gone_quarter = false, gone_last = false;\n\n  Index i = 0;\n  int pack = Pack1;\n  int psize = PacketSize;\n  while(pack>0)\n  {\n    Index remaining_rows = rows-i;\n    Index peeled_mc = gone_last ? Pack2>1 ? (rows/pack)*pack : 0 : i+(remaining_rows/pack)*pack;\n    Index starting_pos = i;\n    for(; i<peeled_mc; i+=pack)\n    {\n      if(PanelMode) count += pack * offset;\n\n      Index k=0;\n      if(pack>=psize && psize >= QuarterPacketSize)\n      {\n        const Index peeled_k = (depth/psize)*psize;\n        for(; k<peeled_k; k+=psize)\n        {\n          for (Index m = 0; m < pack; m += psize)\n          {\n            if (psize == PacketSize) {\n              PacketBlock<Packet> kernel;\n              for (int p = 0; p < psize; ++p) kernel.packet[p] = lhs.template loadPacket<Packet>(i+p+m, k);\n              ptranspose(kernel);\n              for (int p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));\n            } else if (HasHalf && psize == HalfPacketSize) {\n              gone_half = true;\n              PacketBlock<HalfPacket> kernel_half;\n              for (int p = 0; p < psize; ++p) kernel_half.packet[p] = lhs.template loadPacket<HalfPacket>(i+p+m, k);\n              ptranspose(kernel_half);\n              for (int p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel_half.packet[p]));\n            } else if (HasQuarter && psize == QuarterPacketSize) {\n              gone_quarter = true;\n              PacketBlock<QuarterPacket> kernel_quarter;\n              for (int p = 0; p < psize; ++p) kernel_quarter.packet[p] = lhs.template loadPacket<QuarterPacket>(i+p+m, k);\n              ptranspose(kernel_quarter);\n              for (int p = 0; p < psize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel_quarter.packet[p]));\n\t    }\n          }\n          count += psize*pack;\n        }\n      }\n\n      for(; k<depth; k++)\n      {\n        Index w=0;\n        for(; w<pack-3; w+=4)\n        {\n          Scalar a(cj(lhs(i+w+0, k))),\n                 b(cj(lhs(i+w+1, k))),\n                 c(cj(lhs(i+w+2, k))),\n                 d(cj(lhs(i+w+3, k)));\n          blockA[count++] = a;\n          blockA[count++] = b;\n          blockA[count++] = c;\n          blockA[count++] = d;\n        }\n        if(pack%4)\n          for(;w<pack;++w)\n            blockA[count++] = cj(lhs(i+w, k));\n      }\n\n      if(PanelMode) count += pack * (stride-offset-depth);\n    }\n\n    pack -= psize;\n    Index left = rows - i;\n    if (pack <= 0) {\n      if (!gone_last &&\n          (starting_pos == i || left >= psize/2 || left >= psize/4) &&\n          ((psize/2 == HalfPacketSize && HasHalf && !gone_half) ||\n           (psize/2 == QuarterPacketSize && HasQuarter && !gone_quarter))) {\n        psize /= 2;\n        pack = psize;\n        continue;\n      }\n      // Pack2 may be *smaller* than PacketSize—that happens for\n      // products like real * complex, where we have to go half the\n      // progress on the lhs in order to duplicate those operands to\n      // address both real & imaginary parts on the rhs. This portion will\n      // pack those half ones until they match the number expected on the\n      // last peeling loop at this point (for the rhs).\n      if (Pack2 < PacketSize && !gone_last) {\n        gone_last = true;\n        psize = pack = left & ~1;\n      }\n    }\n  }\n\n  for(; i<rows; i++)\n  {\n    if(PanelMode) count += offset;\n    for(Index k=0; k<depth; k++)\n      blockA[count++] = cj(lhs(i, k));\n    if(PanelMode) count += (stride-offset-depth);\n  }\n}\n\n// copy a complete panel of the rhs\n// this version is optimized for column major matrices\n// The traversal order is as follow: (nr==4):\n//  0  1  2  3   12 13 14 15   24 27\n//  4  5  6  7   16 17 18 19   25 28\n//  8  9 10 11   20 21 22 23   26 29\n//  .  .  .  .    .  .  .  .    .  .\ntemplate<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n{\n  typedef typename packet_traits<Scalar>::type Packet;\n  typedef typename DataMapper::LinearMapper LinearMapper;\n  enum { PacketSize = packet_traits<Scalar>::size };\n  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);\n};\n\ntemplate<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nEIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>\n  ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)\n{\n  EIGEN_ASM_COMMENT(\"EIGEN PRODUCT PACK RHS COLMAJOR\");\n  EIGEN_UNUSED_VARIABLE(stride);\n  EIGEN_UNUSED_VARIABLE(offset);\n  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));\n  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;\n  Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;\n  Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;\n  Index count = 0;\n  const Index peeled_k = (depth/PacketSize)*PacketSize;\n//   if(nr>=8)\n//   {\n//     for(Index j2=0; j2<packet_cols8; j2+=8)\n//     {\n//       // skip what we have before\n//       if(PanelMode) count += 8 * offset;\n//       const Scalar* b0 = &rhs[(j2+0)*rhsStride];\n//       const Scalar* b1 = &rhs[(j2+1)*rhsStride];\n//       const Scalar* b2 = &rhs[(j2+2)*rhsStride];\n//       const Scalar* b3 = &rhs[(j2+3)*rhsStride];\n//       const Scalar* b4 = &rhs[(j2+4)*rhsStride];\n//       const Scalar* b5 = &rhs[(j2+5)*rhsStride];\n//       const Scalar* b6 = &rhs[(j2+6)*rhsStride];\n//       const Scalar* b7 = &rhs[(j2+7)*rhsStride];\n//       Index k=0;\n//       if(PacketSize==8) // TODO enable vectorized transposition for PacketSize==4\n//       {\n//         for(; k<peeled_k; k+=PacketSize) {\n//           PacketBlock<Packet> kernel;\n//           for (int p = 0; p < PacketSize; ++p) {\n//             kernel.packet[p] = ploadu<Packet>(&rhs[(j2+p)*rhsStride+k]);\n//           }\n//           ptranspose(kernel);\n//           for (int p = 0; p < PacketSize; ++p) {\n//             pstoreu(blockB+count, cj.pconj(kernel.packet[p]));\n//             count+=PacketSize;\n//           }\n//         }\n//       }\n//       for(; k<depth; k++)\n//       {\n//         blockB[count+0] = cj(b0[k]);\n//         blockB[count+1] = cj(b1[k]);\n//         blockB[count+2] = cj(b2[k]);\n//         blockB[count+3] = cj(b3[k]);\n//         blockB[count+4] = cj(b4[k]);\n//         blockB[count+5] = cj(b5[k]);\n//         blockB[count+6] = cj(b6[k]);\n//         blockB[count+7] = cj(b7[k]);\n//         count += 8;\n//       }\n//       // skip what we have after\n//       if(PanelMode) count += 8 * (stride-offset-depth);\n//     }\n//   }\n\n  if(nr>=4)\n  {\n    for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)\n    {\n      // skip what we have before\n      if(PanelMode) count += 4 * offset;\n      const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);\n      const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);\n      const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);\n      const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);\n\n      Index k=0;\n      if((PacketSize%4)==0) // TODO enable vectorized transposition for PacketSize==2 ??\n      {\n        for(; k<peeled_k; k+=PacketSize) {\n          PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;\n          kernel.packet[0           ] = dm0.template loadPacket<Packet>(k);\n          kernel.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);\n          kernel.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k);\n          kernel.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k);\n          ptranspose(kernel);\n          pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));\n          pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));\n          pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));\n          pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));\n          count+=4*PacketSize;\n        }\n      }\n      for(; k<depth; k++)\n      {\n        blockB[count+0] = cj(dm0(k));\n        blockB[count+1] = cj(dm1(k));\n        blockB[count+2] = cj(dm2(k));\n        blockB[count+3] = cj(dm3(k));\n        count += 4;\n      }\n      // skip what we have after\n      if(PanelMode) count += 4 * (stride-offset-depth);\n    }\n  }\n\n  // copy the remaining columns one at a time (nr==1)\n  for(Index j2=packet_cols4; j2<cols; ++j2)\n  {\n    if(PanelMode) count += offset;\n    const LinearMapper dm0 = rhs.getLinearMapper(0, j2);\n    for(Index k=0; k<depth; k++)\n    {\n      blockB[count] = cj(dm0(k));\n      count += 1;\n    }\n    if(PanelMode) count += (stride-offset-depth);\n  }\n}\n\n// this version is optimized for row major matrices\ntemplate<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>\nstruct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>\n{\n  typedef typename packet_traits<Scalar>::type Packet;\n  typedef typename unpacket_traits<Packet>::half HalfPacket;\n  typedef typename unpacket_traits<typename unpacket_traits<Packet>::half>::half QuarterPacket;\n  typedef typename DataMapper::LinearMapper LinearMapper;\n  enum { PacketSize = packet_traits<Scalar>::size,\n         HalfPacketSize = unpacket_traits<HalfPacket>::size,\n\t\t QuarterPacketSize = unpacket_traits<QuarterPacket>::size};\n  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0)\n  {\n    EIGEN_ASM_COMMENT(\"EIGEN PRODUCT PACK RHS ROWMAJOR\");\n    EIGEN_UNUSED_VARIABLE(stride);\n    EIGEN_UNUSED_VARIABLE(offset);\n    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));\n    const bool HasHalf = (int)HalfPacketSize < (int)PacketSize;\n    const bool HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize;\n    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;\n    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;\n    Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;\n    Index count = 0;\n\n  //   if(nr>=8)\n  //   {\n  //     for(Index j2=0; j2<packet_cols8; j2+=8)\n  //     {\n  //       // skip what we have before\n  //       if(PanelMode) count += 8 * offset;\n  //       for(Index k=0; k<depth; k++)\n  //       {\n  //         if (PacketSize==8) {\n  //           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);\n  //           pstoreu(blockB+count, cj.pconj(A));\n  //         } else if (PacketSize==4) {\n  //           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);\n  //           Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]);\n  //           pstoreu(blockB+count, cj.pconj(A));\n  //           pstoreu(blockB+count+PacketSize, cj.pconj(B));\n  //         } else {\n  //           const Scalar* b0 = &rhs[k*rhsStride + j2];\n  //           blockB[count+0] = cj(b0[0]);\n  //           blockB[count+1] = cj(b0[1]);\n  //           blockB[count+2] = cj(b0[2]);\n  //           blockB[count+3] = cj(b0[3]);\n  //           blockB[count+4] = cj(b0[4]);\n  //           blockB[count+5] = cj(b0[5]);\n  //           blockB[count+6] = cj(b0[6]);\n  //           blockB[count+7] = cj(b0[7]);\n  //         }\n  //         count += 8;\n  //       }\n  //       // skip what we have after\n  //       if(PanelMode) count += 8 * (stride-offset-depth);\n  //     }\n  //   }\n    if(nr>=4)\n    {\n      for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)\n      {\n        // skip what we have before\n        if(PanelMode) count += 4 * offset;\n        for(Index k=0; k<depth; k++)\n        {\n          if (PacketSize==4) {\n            Packet A = rhs.template loadPacket<Packet>(k, j2);\n            pstoreu(blockB+count, cj.pconj(A));\n            count += PacketSize;\n          } else if (HasHalf && HalfPacketSize==4) {\n            HalfPacket A = rhs.template loadPacket<HalfPacket>(k, j2);\n            pstoreu(blockB+count, cj.pconj(A));\n            count += HalfPacketSize;\n          } else if (HasQuarter && QuarterPacketSize==4) {\n            QuarterPacket A = rhs.template loadPacket<QuarterPacket>(k, j2);\n            pstoreu(blockB+count, cj.pconj(A));\n            count += QuarterPacketSize;\n          } else {\n            const LinearMapper dm0 = rhs.getLinearMapper(k, j2);\n            blockB[count+0] = cj(dm0(0));\n            blockB[count+1] = cj(dm0(1));\n            blockB[count+2] = cj(dm0(2));\n            blockB[count+3] = cj(dm0(3));\n            count += 4;\n          }\n        }\n        // skip what we have after\n        if(PanelMode) count += 4 * (stride-offset-depth);\n      }\n    }\n    // copy the remaining columns one at a time (nr==1)\n    for(Index j2=packet_cols4; j2<cols; ++j2)\n    {\n      if(PanelMode) count += offset;\n      for(Index k=0; k<depth; k++)\n      {\n        blockB[count] = cj(rhs(k, j2));\n        count += 1;\n      }\n      if(PanelMode) count += stride-offset-depth;\n    }\n  }\n};\n\n} // end namespace internal\n\n/** \\returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.\n  * \\sa setCpuCacheSize */\ninline std::ptrdiff_t l1CacheSize()\n{\n  std::ptrdiff_t l1, l2, l3;\n  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);\n  return l1;\n}\n\n/** \\returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.\n  * \\sa setCpuCacheSize */\ninline std::ptrdiff_t l2CacheSize()\n{\n  std::ptrdiff_t l1, l2, l3;\n  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);\n  return l2;\n}\n\n/** \\returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\\\nrs.                                                                                                                \n* \\sa setCpuCacheSize */\ninline std::ptrdiff_t l3CacheSize()\n{\n  std::ptrdiff_t l1, l2, l3;\n  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);\n  return l3;\n}\n\n/** Set the cpu L1 and L2 cache sizes (in bytes).\n  * These values are use to adjust the size of the blocks\n  * for the algorithms working per blocks.\n  *\n  * \\sa computeProductBlockingSizes */\ninline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)\n{\n  internal::manage_caching_sizes(SetAction, &l1, &l2, &l3);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_BLOCK_PANEL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralMatrixMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H\n#define EIGEN_GENERAL_MATRIX_MATRIX_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename _LhsScalar, typename _RhsScalar> class level3_blocking;\n\n/* Specialization for a row-major destination matrix => simple transposition of the product */\ntemplate<\n  typename Index,\n  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,\n  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,\n  int ResInnerStride>\nstruct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride>\n{\n  typedef gebp_traits<RhsScalar,LhsScalar> Traits;\n\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n  static EIGEN_STRONG_INLINE void run(\n    Index rows, Index cols, Index depth,\n    const LhsScalar* lhs, Index lhsStride,\n    const RhsScalar* rhs, Index rhsStride,\n    ResScalar* res, Index resIncr, Index resStride,\n    ResScalar alpha,\n    level3_blocking<RhsScalar,LhsScalar>& blocking,\n    GemmParallelInfo<Index>* info = 0)\n  {\n    // transpose the product such that the result is column major\n    general_matrix_matrix_product<Index,\n      RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,\n      LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,\n      ColMajor,ResInnerStride>\n    ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info);\n  }\n};\n\n/*  Specialization for a col-major destination matrix\n *    => Blocking algorithm following Goto's paper */\ntemplate<\n  typename Index,\n  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,\n  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,\n  int ResInnerStride>\nstruct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride>\n{\n\ntypedef gebp_traits<LhsScalar,RhsScalar> Traits;\n\ntypedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\nstatic void run(Index rows, Index cols, Index depth,\n  const LhsScalar* _lhs, Index lhsStride,\n  const RhsScalar* _rhs, Index rhsStride,\n  ResScalar* _res, Index resIncr, Index resStride,\n  ResScalar alpha,\n  level3_blocking<LhsScalar,RhsScalar>& blocking,\n  GemmParallelInfo<Index>* info = 0)\n{\n  typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;\n  typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;\n  typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper;\n  LhsMapper lhs(_lhs, lhsStride);\n  RhsMapper rhs(_rhs, rhsStride);\n  ResMapper res(_res, resStride, resIncr);\n\n  Index kc = blocking.kc();                   // cache block size along the K direction\n  Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction\n  Index nc = (std::min)(cols,blocking.nc());  // cache block size along the N direction\n\n  gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;\n  gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;\n  gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;\n\n#ifdef EIGEN_HAS_OPENMP\n  if(info)\n  {\n    // this is the parallel version!\n    int tid = omp_get_thread_num();\n    int threads = omp_get_num_threads();\n\n    LhsScalar* blockA = blocking.blockA();\n    eigen_internal_assert(blockA!=0);\n\n    std::size_t sizeB = kc*nc;\n    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);\n\n    // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...\n    for(Index k=0; k<depth; k+=kc)\n    {\n      const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'\n\n      // In order to reduce the chance that a thread has to wait for the other,\n      // let's start by packing B'.\n      pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);\n\n      // Pack A_k to A' in a parallel fashion:\n      // each thread packs the sub block A_k,i to A'_i where i is the thread id.\n\n      // However, before copying to A'_i, we have to make sure that no other thread is still using it,\n      // i.e., we test that info[tid].users equals 0.\n      // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.\n      while(info[tid].users!=0) {}\n      info[tid].users = threads;\n\n      pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);\n\n      // Notify the other threads that the part A'_i is ready to go.\n      info[tid].sync = k;\n\n      // Computes C_i += A' * B' per A'_i\n      for(int shift=0; shift<threads; ++shift)\n      {\n        int i = (tid+shift)%threads;\n\n        // At this point we have to make sure that A'_i has been updated by the thread i,\n        // we use testAndSetOrdered to mimic a volatile access.\n        // However, no need to wait for the B' part which has been updated by the current thread!\n        if (shift>0) {\n          while(info[i].sync!=k) {\n          }\n        }\n\n        gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);\n      }\n\n      // Then keep going as usual with the remaining B'\n      for(Index j=nc; j<cols; j+=nc)\n      {\n        const Index actual_nc = (std::min)(j+nc,cols)-j;\n\n        // pack B_k,j to B'\n        pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);\n\n        // C_j += A' * B'\n        gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);\n      }\n\n      // Release all the sub blocks A'_i of A' for the current thread,\n      // i.e., we simply decrement the number of users by 1\n      for(Index i=0; i<threads; ++i)\n#if !EIGEN_HAS_CXX11_ATOMIC\n        #pragma omp atomic\n#endif\n        info[i].users -= 1;\n    }\n  }\n  else\n#endif // EIGEN_HAS_OPENMP\n  {\n    EIGEN_UNUSED_VARIABLE(info);\n\n    // this is the sequential version!\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*nc;\n\n    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());\n\n    const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;\n\n    // For each horizontal panel of the rhs, and corresponding panel of the lhs...\n    for(Index i2=0; i2<rows; i2+=mc)\n    {\n      const Index actual_mc = (std::min)(i2+mc,rows)-i2;\n\n      for(Index k2=0; k2<depth; k2+=kc)\n      {\n        const Index actual_kc = (std::min)(k2+kc,depth)-k2;\n\n        // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.\n        // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)\n        // Note that this panel will be read as many times as the number of blocks in the rhs's\n        // horizontal panel which is, in practice, a very low number.\n        pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);\n\n        // For each kc x nc block of the rhs's horizontal panel...\n        for(Index j2=0; j2<cols; j2+=nc)\n        {\n          const Index actual_nc = (std::min)(j2+nc,cols)-j2;\n\n          // We pack the rhs's block into a sequential chunk of memory (L2 caching)\n          // Note that this block will be read a very high number of times, which is equal to the number of\n          // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).\n          if((!pack_rhs_once) || i2==0)\n            pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);\n\n          // Everything is packed, we can now call the panel * block kernel:\n          gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);\n        }\n      }\n    }\n  }\n}\n\n};\n\n/*********************************************************************************\n*  Specialization of generic_product_impl for \"large\" GEMM, i.e.,\n*  implementation of the high level wrapper to general_matrix_matrix_product\n**********************************************************************************/\n\ntemplate<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>\nstruct gemm_functor\n{\n  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)\n    : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)\n  {}\n\n  void initParallelSession(Index num_threads) const\n  {\n    m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);\n    m_blocking.allocateA();\n  }\n\n  void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const\n  {\n    if(cols==-1)\n      cols = m_rhs.cols();\n\n    Gemm::run(rows, cols, m_lhs.cols(),\n              &m_lhs.coeffRef(row,0), m_lhs.outerStride(),\n              &m_rhs.coeffRef(0,col), m_rhs.outerStride(),\n              (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(),\n              m_actualAlpha, m_blocking, info);\n  }\n\n  typedef typename Gemm::Traits Traits;\n\n  protected:\n    const Lhs& m_lhs;\n    const Rhs& m_rhs;\n    Dest& m_dest;\n    Scalar m_actualAlpha;\n    BlockingType& m_blocking;\n};\n\ntemplate<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,\nbool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;\n\ntemplate<typename _LhsScalar, typename _RhsScalar>\nclass level3_blocking\n{\n    typedef _LhsScalar LhsScalar;\n    typedef _RhsScalar RhsScalar;\n\n  protected:\n    LhsScalar* m_blockA;\n    RhsScalar* m_blockB;\n\n    Index m_mc;\n    Index m_nc;\n    Index m_kc;\n\n  public:\n\n    level3_blocking()\n      : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)\n    {}\n\n    inline Index mc() const { return m_mc; }\n    inline Index nc() const { return m_nc; }\n    inline Index kc() const { return m_kc; }\n\n    inline LhsScalar* blockA() { return m_blockA; }\n    inline RhsScalar* blockB() { return m_blockB; }\n};\n\ntemplate<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>\nclass gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>\n  : public level3_blocking<\n      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,\n      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>\n{\n    enum {\n      Transpose = StorageOrder==RowMajor,\n      ActualRows = Transpose ? MaxCols : MaxRows,\n      ActualCols = Transpose ? MaxRows : MaxCols\n    };\n    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;\n    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;\n    typedef gebp_traits<LhsScalar,RhsScalar> Traits;\n    enum {\n      SizeA = ActualRows * MaxDepth,\n      SizeB = ActualCols * MaxDepth\n    };\n\n#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES\n    EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];\n    EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];\n#else\n    EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];\n    EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];\n#endif\n\n  public:\n\n    gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)\n    {\n      this->m_mc = ActualRows;\n      this->m_nc = ActualCols;\n      this->m_kc = MaxDepth;\n#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES\n      this->m_blockA = m_staticA;\n      this->m_blockB = m_staticB;\n#else\n      this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));\n      this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));\n#endif\n    }\n\n    void initParallel(Index, Index, Index, Index)\n    {}\n\n    inline void allocateA() {}\n    inline void allocateB() {}\n    inline void allocateAll() {}\n};\n\ntemplate<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>\nclass gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>\n  : public level3_blocking<\n      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,\n      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>\n{\n    enum {\n      Transpose = StorageOrder==RowMajor\n    };\n    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;\n    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;\n    typedef gebp_traits<LhsScalar,RhsScalar> Traits;\n\n    Index m_sizeA;\n    Index m_sizeB;\n\n  public:\n\n    gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)\n    {\n      this->m_mc = Transpose ? cols : rows;\n      this->m_nc = Transpose ? rows : cols;\n      this->m_kc = depth;\n\n      if(l3_blocking)\n      {\n        computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);\n      }\n      else  // no l3 blocking\n      {\n        Index n = this->m_nc;\n        computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);\n      }\n\n      m_sizeA = this->m_mc * this->m_kc;\n      m_sizeB = this->m_kc * this->m_nc;\n    }\n\n    void initParallel(Index rows, Index cols, Index depth, Index num_threads)\n    {\n      this->m_mc = Transpose ? cols : rows;\n      this->m_nc = Transpose ? rows : cols;\n      this->m_kc = depth;\n\n      eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);\n      Index m = this->m_mc;\n      computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);\n      m_sizeA = this->m_mc * this->m_kc;\n      m_sizeB = this->m_kc * this->m_nc;\n    }\n\n    void allocateA()\n    {\n      if(this->m_blockA==0)\n        this->m_blockA = aligned_new<LhsScalar>(m_sizeA);\n    }\n\n    void allocateB()\n    {\n      if(this->m_blockB==0)\n        this->m_blockB = aligned_new<RhsScalar>(m_sizeB);\n    }\n\n    void allocateAll()\n    {\n      allocateA();\n      allocateB();\n    }\n\n    ~gemm_blocking_space()\n    {\n      aligned_delete(this->m_blockA, m_sizeA);\n      aligned_delete(this->m_blockB, m_sizeB);\n    }\n};\n\n} // end namespace internal\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs>\nstruct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>\n  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  typedef typename Lhs::Scalar LhsScalar;\n  typedef typename Rhs::Scalar RhsScalar;\n\n  typedef internal::blas_traits<Lhs> LhsBlasTraits;\n  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;\n\n  typedef internal::blas_traits<Rhs> RhsBlasTraits;\n  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;\n\n  enum {\n    MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)\n  };\n\n  typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;\n\n  template<typename Dst>\n  static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program\n    // to determine the following heuristic.\n    // EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,\n    // unless it has been specialized by the user or for a given architecture.\n    // Note that the condition rhs.rows()>0 was required because lazy product is (was?) not happy with empty inputs.\n    // I'm not sure it is still required.\n    if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)\n      lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());\n    else\n    {\n      dst.setZero();\n      scaleAndAddTo(dst, lhs, rhs, Scalar(1));\n    }\n  }\n\n  template<typename Dst>\n  static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)\n      lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());\n    else\n      scaleAndAddTo(dst,lhs, rhs, Scalar(1));\n  }\n\n  template<typename Dst>\n  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)\n      lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());\n    else\n      scaleAndAddTo(dst, lhs, rhs, Scalar(-1));\n  }\n\n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)\n  {\n    eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());\n    if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)\n      return;\n\n    if (dst.cols() == 1)\n    {\n      // Fallback to GEMV if either the lhs or rhs is a runtime vector\n      typename Dest::ColXpr dst_vec(dst.col(0));\n      return internal::generic_product_impl<Lhs,typename Rhs::ConstColXpr,DenseShape,DenseShape,GemvProduct>\n        ::scaleAndAddTo(dst_vec, a_lhs, a_rhs.col(0), alpha);\n    }\n    else if (dst.rows() == 1)\n    {\n      // Fallback to GEMV if either the lhs or rhs is a runtime vector\n      typename Dest::RowXpr dst_vec(dst.row(0));\n      return internal::generic_product_impl<typename Lhs::ConstRowXpr,Rhs,DenseShape,DenseShape,GemvProduct>\n        ::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);\n    }\n\n    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);\n    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);\n\n    Scalar actualAlpha = combine_scalar_factors(alpha, a_lhs, a_rhs);\n\n    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,\n            Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;\n\n    typedef internal::gemm_functor<\n      Scalar, Index,\n      internal::general_matrix_matrix_product<\n        Index,\n        LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),\n        RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),\n        (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,\n        Dest::InnerStrideAtCompileTime>,\n      ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;\n\n    BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);\n    internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>\n        (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_MATRIX_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H\n#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H\n\nnamespace Eigen { \n\ntemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>\nstruct selfadjoint_rank1_update;\n\nnamespace internal {\n\n/**********************************************************************\n* This file implements a general A * B product while\n* evaluating only one triangular part of the product.\n* This is a more general version of self adjoint product (C += A A^T)\n* as the level 3 SYRK Blas routine.\n**********************************************************************/\n\n// forward declarations (defined at the end of this file)\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>\nstruct tribb_kernel;\n  \n/* Optimized matrix-matrix product evaluating only one triangular half */\ntemplate <typename Index,\n          typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,\n          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,\n                              int ResStorageOrder, int ResInnerStride, int  UpLo, int Version = Specialized>\nstruct general_matrix_matrix_triangular_product;\n\n// as usual if the result is row major => we transpose the product\ntemplate <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,\n                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,\n                          int ResInnerStride, int  UpLo, int Version>\nstruct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,UpLo,Version>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,\n                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr, Index resStride,\n                                      const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)\n  {\n    general_matrix_matrix_triangular_product<Index,\n        RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,\n        LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,\n        ColMajor, ResInnerStride, UpLo==Lower?Upper:Lower>\n      ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking);\n  }\n};\n\ntemplate <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,\n                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,\n                          int ResInnerStride, int  UpLo, int Version>\nstruct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,UpLo,Version>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,\n                                      const RhsScalar* _rhs, Index rhsStride,\n                                      ResScalar* _res, Index resIncr, Index resStride,\n                                      const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)\n  {\n    typedef gebp_traits<LhsScalar,RhsScalar> Traits;\n\n    typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;\n    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;\n    LhsMapper lhs(_lhs,lhsStride);\n    RhsMapper rhs(_rhs,rhsStride);\n    ResMapper res(_res, resStride, resIncr);\n\n    Index kc = blocking.kc();\n    Index mc = (std::min)(size,blocking.mc());\n\n    // !!! mc must be a multiple of nr:\n    if(mc > Traits::nr)\n      mc = (mc/Traits::nr)*Traits::nr;\n\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*size;\n\n    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());\n\n    gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;\n    gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;\n    gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;\n    tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, ResInnerStride, UpLo> sybb;\n\n    for(Index k2=0; k2<depth; k2+=kc)\n    {\n      const Index actual_kc = (std::min)(k2+kc,depth)-k2;\n\n      // note that the actual rhs is the transpose/adjoint of mat\n      pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, size);\n\n      for(Index i2=0; i2<size; i2+=mc)\n      {\n        const Index actual_mc = (std::min)(i2+mc,size)-i2;\n\n        pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);\n\n        // the selected actual_mc * size panel of res is split into three different part:\n        //  1 - before the diagonal => processed with gebp or skipped\n        //  2 - the actual_mc x actual_mc symmetric block => processed with a special kernel\n        //  3 - after the diagonal => processed with gebp or skipped\n        if (UpLo==Lower)\n          gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,\n               (std::min)(size,i2), alpha, -1, -1, 0, 0);\n\n        sybb(_res+resStride*i2 + resIncr*i2, resIncr, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);\n\n        if (UpLo==Upper)\n        {\n          Index j2 = i2+actual_mc;\n          gebp(res.getSubMapper(i2, j2), blockA, blockB+actual_kc*j2, actual_mc,\n               actual_kc, (std::max)(Index(0), size-j2), alpha, -1, -1, 0, 0);\n        }\n      }\n    }\n  }\n};\n\n// Optimized packed Block * packed Block product kernel evaluating only one given triangular part\n// This kernel is built on top of the gebp kernel:\n// - the current destination block is processed per panel of actual_mc x BlockSize\n//   where BlockSize is set to the minimal value allowing gebp to be as fast as possible\n// - then, as usual, each panel is split into three parts along the diagonal,\n//   the sub blocks above and below the diagonal are processed as usual,\n//   while the triangular block overlapping the diagonal is evaluated into a\n//   small temporary buffer which is then accumulated into the result using a\n//   triangular traversal.\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>\nstruct tribb_kernel\n{\n  typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;\n  typedef typename Traits::ResScalar ResScalar;\n\n  enum {\n    BlockSize  = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret\n  };\n  void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)\n  {\n    typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;\n    typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned> BufferMapper;\n    ResMapper res(_res, resStride, resIncr);\n    gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel1;\n    gebp_kernel<LhsScalar, RhsScalar, Index, BufferMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel2;\n\n    Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));\n\n    // let's process the block per panel of actual_mc x BlockSize,\n    // again, each is split into three parts, etc.\n    for (Index j=0; j<size; j+=BlockSize)\n    {\n      Index actualBlockSize = std::min<Index>(BlockSize,size - j);\n      const RhsScalar* actual_b = blockB+j*depth;\n\n      if(UpLo==Upper)\n        gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,\n                     -1, -1, 0, 0);\n      \n      // selfadjoint micro block\n      {\n        Index i = j;\n        buffer.setZero();\n        // 1 - apply the kernel on the temporary buffer\n        gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,\n                     -1, -1, 0, 0);\n\n        // 2 - triangular accumulation\n        for(Index j1=0; j1<actualBlockSize; ++j1)\n        {\n          typename ResMapper::LinearMapper r = res.getLinearMapper(i,j+j1);\n          for(Index i1=UpLo==Lower ? j1 : 0;\n              UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)\n            r(i1) += buffer(i1,j1);\n        }\n      }\n\n      if(UpLo==Lower)\n      {\n        Index i = j+actualBlockSize;\n        gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i, \n                     depth, actualBlockSize, alpha, -1, -1, 0, 0);\n      }\n    }\n  }\n};\n\n} // end namespace internal\n\n// high level API\n\ntemplate<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>\nstruct general_product_to_triangular_selector;\n\n\ntemplate<typename MatrixType, typename ProductType, int UpLo>\nstruct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>\n{\n  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)\n  {\n    typedef typename MatrixType::Scalar Scalar;\n    \n    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;\n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;\n    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;\n    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());\n    \n    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;\n    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;\n    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());\n\n    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());\n\n    if(!beta)\n      mat.template triangularView<UpLo>().setZero();\n\n    enum {\n      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,\n      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,\n      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1\n    };\n    \n    internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;\n    ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),\n      (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));\n    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;\n    \n    internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;\n    ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),\n      (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));\n    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;\n    \n    \n    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,\n                              LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,\n                              RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>\n          ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);\n  }\n};\n\ntemplate<typename MatrixType, typename ProductType, int UpLo>\nstruct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>\n{\n  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)\n  {\n    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;\n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;\n    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;\n    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());\n    \n    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;\n    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;\n    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());\n\n    typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());\n\n    if(!beta)\n      mat.template triangularView<UpLo>().setZero();\n\n    enum {\n      IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,\n      LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,\n      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,\n      SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0\n    };\n\n    Index size = mat.cols();\n    if(SkipDiag)\n      size--;\n    Index depth = actualLhs.cols();\n\n    typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,\n          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;\n\n    BlockingType blocking(size, size, depth, 1, false);\n\n    internal::general_matrix_matrix_triangular_product<Index,\n      typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,\n      typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,\n      IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo&(Lower|Upper)>\n      ::run(size, depth,\n            &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),\n            &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),\n            mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? mat.innerStride() : mat.outerStride() ) : 0),\n            mat.innerStride(), mat.outerStride(), actualAlpha, blocking);\n  }\n};\n\ntemplate<typename MatrixType, unsigned int UpLo>\ntemplate<typename ProductType>\nEIGEN_DEVICE_FUNC TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)\n{\n  EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);\n  eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());\n\n  general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);\n\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   Level 3 BLAS SYRK/HERK implementation.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H\n#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo>\nstruct general_matrix_matrix_rankupdate :\n       general_matrix_matrix_triangular_product<\n         Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,1,UpLo,BuiltIn> {};\n\n\n// try to go to BLAS specialization\n#define EIGEN_BLAS_RANKUPDATE_SPECIALIZE(Scalar) \\\ntemplate <typename Index, int LhsStorageOrder, bool ConjugateLhs, \\\n                          int RhsStorageOrder, bool ConjugateRhs, int  UpLo> \\\nstruct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \\\n               Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,1,UpLo,Specialized> { \\\n  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \\\n                          const Scalar* rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \\\n  { \\\n    if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \\\n      general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \\\n      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \\\n    } else { \\\n      general_matrix_matrix_triangular_product<Index, \\\n        Scalar, LhsStorageOrder, ConjugateLhs, \\\n        Scalar, RhsStorageOrder, ConjugateRhs, \\\n        ColMajor, 1, UpLo, BuiltIn> \\\n      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resIncr,resStride,alpha,blocking); \\\n    } \\\n  } \\\n};\n\nEIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)\nEIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)\n// TODO handle complex cases\n// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)\n// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)\n\n// SYRK for float/double\n#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \\\ntemplate <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \\\nstruct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \\\n  enum { \\\n    IsLower = (UpLo&Lower) == Lower, \\\n    LowUp = IsLower ? Lower : Upper, \\\n    conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \\\n  }; \\\n  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \\\n                          const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \\\n  { \\\n  /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \\\n\\\n   BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \\\n   char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \\\n   EIGTYPE beta(1); \\\n   BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \\\n  } \\\n};\n\n// HERK for complex data\n#define EIGEN_BLAS_RANKUPDATE_C(EIGTYPE, BLASTYPE, RTYPE, BLASFUNC) \\\ntemplate <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \\\nstruct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \\\n  enum { \\\n    IsLower = (UpLo&Lower) == Lower, \\\n    LowUp = IsLower ? Lower : Upper, \\\n    conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \\\n  }; \\\n  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \\\n                          const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \\\n  { \\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \\\n\\\n   BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \\\n   char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'C':'N'); \\\n   RTYPE alpha_, beta_; \\\n   const EIGTYPE* a_ptr; \\\n\\\n   alpha_ = alpha.real(); \\\n   beta_ = 1.0; \\\n/* Copy with conjugation in some cases*/ \\\n   MatrixType a; \\\n   if (conjA) { \\\n     Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \\\n     a = mapA.conjugate(); \\\n     lda = a.outerStride(); \\\n     a_ptr = a.data(); \\\n   } else a_ptr=lhs; \\\n   BLASFUNC(&uplo, &trans, &n, &k, &alpha_, (BLASTYPE*)a_ptr, &lda, &beta_, (BLASTYPE*)res, &ldc); \\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)\nEIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk)\n#else\nEIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)\nEIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk_)\n#endif\n\n// TODO hanlde complex cases\n// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)\n// EIGEN_BLAS_RANKUPDATE_C(scomplex, float,  float, cherk_)\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   General matrix-matrix product functionality based on ?GEMM.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H\n#define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/**********************************************************************\n* This file implements general matrix-matrix multiplication using BLAS\n* gemm function via partial specialization of\n* general_matrix_matrix_product::run(..) method for float, double,\n* std::complex<float> and std::complex<double> types\n**********************************************************************/\n\n// gemm specialization\n\n#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \\\ntemplate< \\\n  typename Index, \\\n  int LhsStorageOrder, bool ConjugateLhs, \\\n  int RhsStorageOrder, bool ConjugateRhs> \\\nstruct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1> \\\n{ \\\ntypedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \\\n\\\nstatic void run(Index rows, Index cols, Index depth, \\\n  const EIGTYPE* _lhs, Index lhsStride, \\\n  const EIGTYPE* _rhs, Index rhsStride, \\\n  EIGTYPE* res, Index resIncr, Index resStride, \\\n  EIGTYPE alpha, \\\n  level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \\\n  GemmParallelInfo<Index>* /*info = 0*/) \\\n{ \\\n  using std::conj; \\\n\\\n  EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \\\n  eigen_assert(resIncr == 1); \\\n  char transa, transb; \\\n  BlasIndex m, n, k, lda, ldb, ldc; \\\n  const EIGTYPE *a, *b; \\\n  EIGTYPE beta(1); \\\n  MatrixX##EIGPREFIX a_tmp, b_tmp; \\\n\\\n/* Set transpose options */ \\\n  transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \\\n  transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \\\n\\\n/* Set m, n, k */ \\\n  m = convert_index<BlasIndex>(rows);  \\\n  n = convert_index<BlasIndex>(cols);  \\\n  k = convert_index<BlasIndex>(depth); \\\n\\\n/* Set lda, ldb, ldc */ \\\n  lda = convert_index<BlasIndex>(lhsStride); \\\n  ldb = convert_index<BlasIndex>(rhsStride); \\\n  ldc = convert_index<BlasIndex>(resStride); \\\n\\\n/* Set a, b, c */ \\\n  if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \\\n    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \\\n    a_tmp = lhs.conjugate(); \\\n    a = a_tmp.data(); \\\n    lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n  } else a = _lhs; \\\n\\\n  if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \\\n    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \\\n    b_tmp = rhs.conjugate(); \\\n    b = b_tmp.data(); \\\n    ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n  } else b = _rhs; \\\n\\\n  BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \\\n}};\n\n#ifdef EIGEN_USE_MKL\nGEMM_SPECIALIZATION(double,   d,  double, dgemm)\nGEMM_SPECIALIZATION(float,    f,  float,  sgemm)\nGEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)\nGEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  cgemm)\n#else\nGEMM_SPECIALIZATION(double,   d,  double, dgemm_)\nGEMM_SPECIALIZATION(float,    f,  float,  sgemm_)\nGEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)\nGEMM_SPECIALIZATION(scomplex, cf, float,  cgemm_)\n#endif\n\n} // end namespase internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralMatrixVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H\n#define EIGEN_GENERAL_MATRIX_VECTOR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\nenum GEMVPacketSizeType {\n  GEMVPacketFull = 0,\n  GEMVPacketHalf,\n  GEMVPacketQuarter\n};\n\ntemplate <int N, typename T1, typename T2, typename T3>\nstruct gemv_packet_cond { typedef T3 type; };\n\ntemplate <typename T1, typename T2, typename T3>\nstruct gemv_packet_cond<GEMVPacketFull, T1, T2, T3> { typedef T1 type; };\n\ntemplate <typename T1, typename T2, typename T3>\nstruct gemv_packet_cond<GEMVPacketHalf, T1, T2, T3> { typedef T2 type; };\n\ntemplate<typename LhsScalar, typename RhsScalar, int _PacketSize=GEMVPacketFull>\nclass gemv_traits\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n\n#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size)                        \\\n  typedef typename gemv_packet_cond<packet_size,                                  \\\n                                    typename packet_traits<name ## Scalar>::type, \\\n                                    typename packet_traits<name ## Scalar>::half, \\\n                                    typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \\\n  prefix ## name ## Packet\n\n  PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize);\n  PACKET_DECL_COND_PREFIX(_, Res, _PacketSize);\n#undef PACKET_DECL_COND_PREFIX\n\npublic:\n  enum {\n        Vectorizable = unpacket_traits<_LhsPacket>::vectorizable &&\n        unpacket_traits<_RhsPacket>::vectorizable &&\n        int(unpacket_traits<_LhsPacket>::size)==int(unpacket_traits<_RhsPacket>::size),\n        LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,\n        RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,\n        ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1\n  };\n\n  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;\n  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;\n  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;\n};\n\n\n/* Optimized col-major matrix * vector product:\n * This algorithm processes the matrix per vertical panels,\n * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments.\n *\n * Mixing type logic: C += alpha * A * B\n *  |  A  |  B  |alpha| comments\n *  |real |cplx |cplx | no vectorization\n *  |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization\n *  |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp\n *  |cplx |real |real | optimal case, vectorization possible via real-cplx mul\n *\n * The same reasoning apply for the transposed case.\n */\ntemplate<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>\nstruct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>\n{\n  typedef gemv_traits<LhsScalar,RhsScalar> Traits;\n  typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketHalf> HalfTraits;\n  typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketQuarter> QuarterTraits;\n\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n\n  typedef typename Traits::LhsPacket LhsPacket;\n  typedef typename Traits::RhsPacket RhsPacket;\n  typedef typename Traits::ResPacket ResPacket;\n\n  typedef typename HalfTraits::LhsPacket LhsPacketHalf;\n  typedef typename HalfTraits::RhsPacket RhsPacketHalf;\n  typedef typename HalfTraits::ResPacket ResPacketHalf;\n\n  typedef typename QuarterTraits::LhsPacket LhsPacketQuarter;\n  typedef typename QuarterTraits::RhsPacket RhsPacketQuarter;\n  typedef typename QuarterTraits::ResPacket ResPacketQuarter;\n\nEIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(\n  Index rows, Index cols,\n  const LhsMapper& lhs,\n  const RhsMapper& rhs,\n        ResScalar* res, Index resIncr,\n  RhsScalar alpha);\n};\n\ntemplate<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>\nEIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(\n  Index rows, Index cols,\n  const LhsMapper& alhs,\n  const RhsMapper& rhs,\n        ResScalar* res, Index resIncr,\n  RhsScalar alpha)\n{\n  EIGEN_UNUSED_VARIABLE(resIncr);\n  eigen_internal_assert(resIncr==1);\n\n  // The following copy tells the compiler that lhs's attributes are not modified outside this function\n  // This helps GCC to generate propoer code.\n  LhsMapper lhs(alhs);\n\n  conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;\n  conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;\n  conj_helper<LhsPacketHalf,RhsPacketHalf,ConjugateLhs,ConjugateRhs> pcj_half;\n  conj_helper<LhsPacketQuarter,RhsPacketQuarter,ConjugateLhs,ConjugateRhs> pcj_quarter;\n\n  const Index lhsStride = lhs.stride();\n  // TODO: for padded aligned inputs, we could enable aligned reads\n  enum { LhsAlignment = Unaligned,\n         ResPacketSize = Traits::ResPacketSize,\n         ResPacketSizeHalf = HalfTraits::ResPacketSize,\n         ResPacketSizeQuarter = QuarterTraits::ResPacketSize,\n         LhsPacketSize = Traits::LhsPacketSize,\n         HasHalf = (int)ResPacketSizeHalf < (int)ResPacketSize,\n         HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf\n  };\n\n  const Index n8 = rows-8*ResPacketSize+1;\n  const Index n4 = rows-4*ResPacketSize+1;\n  const Index n3 = rows-3*ResPacketSize+1;\n  const Index n2 = rows-2*ResPacketSize+1;\n  const Index n1 = rows-1*ResPacketSize+1;\n  const Index n_half = rows-1*ResPacketSizeHalf+1;\n  const Index n_quarter = rows-1*ResPacketSizeQuarter+1;\n\n  // TODO: improve the following heuristic:\n  const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4);\n  ResPacket palpha = pset1<ResPacket>(alpha);\n  ResPacketHalf palpha_half = pset1<ResPacketHalf>(alpha);\n  ResPacketQuarter palpha_quarter = pset1<ResPacketQuarter>(alpha);\n\n  for(Index j2=0; j2<cols; j2+=block_cols)\n  {\n    Index jend = numext::mini(j2+block_cols,cols);\n    Index i=0;\n    for(; i<n8; i+=ResPacketSize*8)\n    {\n      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n                c1 = pset1<ResPacket>(ResScalar(0)),\n                c2 = pset1<ResPacket>(ResScalar(0)),\n                c3 = pset1<ResPacket>(ResScalar(0)),\n                c4 = pset1<ResPacket>(ResScalar(0)),\n                c5 = pset1<ResPacket>(ResScalar(0)),\n                c6 = pset1<ResPacket>(ResScalar(0)),\n                c7 = pset1<ResPacket>(ResScalar(0));\n\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));\n        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);\n        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);\n        c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2);\n        c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3);\n        c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*4,j),b0,c4);\n        c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*5,j),b0,c5);\n        c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*6,j),b0,c6);\n        c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*7,j),b0,c7);\n      }\n      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));\n      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));\n      pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2)));\n      pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3)));\n      pstoreu(res+i+ResPacketSize*4, pmadd(c4,palpha,ploadu<ResPacket>(res+i+ResPacketSize*4)));\n      pstoreu(res+i+ResPacketSize*5, pmadd(c5,palpha,ploadu<ResPacket>(res+i+ResPacketSize*5)));\n      pstoreu(res+i+ResPacketSize*6, pmadd(c6,palpha,ploadu<ResPacket>(res+i+ResPacketSize*6)));\n      pstoreu(res+i+ResPacketSize*7, pmadd(c7,palpha,ploadu<ResPacket>(res+i+ResPacketSize*7)));\n    }\n    if(i<n4)\n    {\n      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n                c1 = pset1<ResPacket>(ResScalar(0)),\n                c2 = pset1<ResPacket>(ResScalar(0)),\n                c3 = pset1<ResPacket>(ResScalar(0));\n\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));\n        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);\n        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);\n        c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2);\n        c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3);\n      }\n      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));\n      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));\n      pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2)));\n      pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3)));\n\n      i+=ResPacketSize*4;\n    }\n    if(i<n3)\n    {\n      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n                c1 = pset1<ResPacket>(ResScalar(0)),\n                c2 = pset1<ResPacket>(ResScalar(0));\n\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));\n        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);\n        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);\n        c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2);\n      }\n      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));\n      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));\n      pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2)));\n\n      i+=ResPacketSize*3;\n    }\n    if(i<n2)\n    {\n      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n                c1 = pset1<ResPacket>(ResScalar(0));\n\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));\n        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);\n        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);\n      }\n      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));\n      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));\n      i+=ResPacketSize*2;\n    }\n    if(i<n1)\n    {\n      ResPacket c0 = pset1<ResPacket>(ResScalar(0));\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));\n        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);\n      }\n      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));\n      i+=ResPacketSize;\n    }\n    if(HasHalf && i<n_half)\n    {\n      ResPacketHalf c0 = pset1<ResPacketHalf>(ResScalar(0));\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacketHalf b0 = pset1<RhsPacketHalf>(rhs(j,0));\n        c0 = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i+0,j),b0,c0);\n      }\n      pstoreu(res+i+ResPacketSizeHalf*0, pmadd(c0,palpha_half,ploadu<ResPacketHalf>(res+i+ResPacketSizeHalf*0)));\n      i+=ResPacketSizeHalf;\n    }\n    if(HasQuarter && i<n_quarter)\n    {\n      ResPacketQuarter c0 = pset1<ResPacketQuarter>(ResScalar(0));\n      for(Index j=j2; j<jend; j+=1)\n      {\n        RhsPacketQuarter b0 = pset1<RhsPacketQuarter>(rhs(j,0));\n        c0 = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i+0,j),b0,c0);\n      }\n      pstoreu(res+i+ResPacketSizeQuarter*0, pmadd(c0,palpha_quarter,ploadu<ResPacketQuarter>(res+i+ResPacketSizeQuarter*0)));\n      i+=ResPacketSizeQuarter;\n    }\n    for(;i<rows;++i)\n    {\n      ResScalar c0(0);\n      for(Index j=j2; j<jend; j+=1)\n        c0 += cj.pmul(lhs(i,j), rhs(j,0));\n      res[i] += alpha*c0;\n    }\n  }\n}\n\n/* Optimized row-major matrix * vector product:\n * This algorithm processes 4 rows at once that allows to both reduce\n * the number of load/stores of the result by a factor 4 and to reduce\n * the instruction dependency. Moreover, we know that all bands have the\n * same alignment pattern.\n *\n * Mixing type logic:\n *  - alpha is always a complex (or converted to a complex)\n *  - no vectorization\n */\ntemplate<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>\nstruct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>\n{\n  typedef gemv_traits<LhsScalar,RhsScalar> Traits;\n  typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketHalf> HalfTraits;\n  typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketQuarter> QuarterTraits;\n\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n\n  typedef typename Traits::LhsPacket LhsPacket;\n  typedef typename Traits::RhsPacket RhsPacket;\n  typedef typename Traits::ResPacket ResPacket;\n\n  typedef typename HalfTraits::LhsPacket LhsPacketHalf;\n  typedef typename HalfTraits::RhsPacket RhsPacketHalf;\n  typedef typename HalfTraits::ResPacket ResPacketHalf;\n\n  typedef typename QuarterTraits::LhsPacket LhsPacketQuarter;\n  typedef typename QuarterTraits::RhsPacket RhsPacketQuarter;\n  typedef typename QuarterTraits::ResPacket ResPacketQuarter;\n\nEIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(\n  Index rows, Index cols,\n  const LhsMapper& lhs,\n  const RhsMapper& rhs,\n        ResScalar* res, Index resIncr,\n  ResScalar alpha);\n};\n\ntemplate<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>\nEIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(\n  Index rows, Index cols,\n  const LhsMapper& alhs,\n  const RhsMapper& rhs,\n  ResScalar* res, Index resIncr,\n  ResScalar alpha)\n{\n  // The following copy tells the compiler that lhs's attributes are not modified outside this function\n  // This helps GCC to generate propoer code.\n  LhsMapper lhs(alhs);\n\n  eigen_internal_assert(rhs.stride()==1);\n  conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;\n  conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;\n  conj_helper<LhsPacketHalf,RhsPacketHalf,ConjugateLhs,ConjugateRhs> pcj_half;\n  conj_helper<LhsPacketQuarter,RhsPacketQuarter,ConjugateLhs,ConjugateRhs> pcj_quarter;\n\n  // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,\n  //       processing 8 rows at once might be counter productive wrt cache.\n  const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7;\n  const Index n4 = rows-3;\n  const Index n2 = rows-1;\n\n  // TODO: for padded aligned inputs, we could enable aligned reads\n  enum { LhsAlignment = Unaligned,\n         ResPacketSize = Traits::ResPacketSize,\n         ResPacketSizeHalf = HalfTraits::ResPacketSize,\n         ResPacketSizeQuarter = QuarterTraits::ResPacketSize,\n         LhsPacketSize = Traits::LhsPacketSize,\n         LhsPacketSizeHalf = HalfTraits::LhsPacketSize,\n         LhsPacketSizeQuarter = QuarterTraits::LhsPacketSize,\n         HasHalf = (int)ResPacketSizeHalf < (int)ResPacketSize,\n         HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf\n  };\n\n  Index i=0;\n  for(; i<n8; i+=8)\n  {\n    ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n              c1 = pset1<ResPacket>(ResScalar(0)),\n              c2 = pset1<ResPacket>(ResScalar(0)),\n              c3 = pset1<ResPacket>(ResScalar(0)),\n              c4 = pset1<ResPacket>(ResScalar(0)),\n              c5 = pset1<ResPacket>(ResScalar(0)),\n              c6 = pset1<ResPacket>(ResScalar(0)),\n              c7 = pset1<ResPacket>(ResScalar(0));\n\n    Index j=0;\n    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)\n    {\n      RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);\n\n      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);\n      c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1);\n      c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2);\n      c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3);\n      c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+4,j),b0,c4);\n      c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+5,j),b0,c5);\n      c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+6,j),b0,c6);\n      c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+7,j),b0,c7);\n    }\n    ResScalar cc0 = predux(c0);\n    ResScalar cc1 = predux(c1);\n    ResScalar cc2 = predux(c2);\n    ResScalar cc3 = predux(c3);\n    ResScalar cc4 = predux(c4);\n    ResScalar cc5 = predux(c5);\n    ResScalar cc6 = predux(c6);\n    ResScalar cc7 = predux(c7);\n    for(; j<cols; ++j)\n    {\n      RhsScalar b0 = rhs(j,0);\n\n      cc0 += cj.pmul(lhs(i+0,j), b0);\n      cc1 += cj.pmul(lhs(i+1,j), b0);\n      cc2 += cj.pmul(lhs(i+2,j), b0);\n      cc3 += cj.pmul(lhs(i+3,j), b0);\n      cc4 += cj.pmul(lhs(i+4,j), b0);\n      cc5 += cj.pmul(lhs(i+5,j), b0);\n      cc6 += cj.pmul(lhs(i+6,j), b0);\n      cc7 += cj.pmul(lhs(i+7,j), b0);\n    }\n    res[(i+0)*resIncr] += alpha*cc0;\n    res[(i+1)*resIncr] += alpha*cc1;\n    res[(i+2)*resIncr] += alpha*cc2;\n    res[(i+3)*resIncr] += alpha*cc3;\n    res[(i+4)*resIncr] += alpha*cc4;\n    res[(i+5)*resIncr] += alpha*cc5;\n    res[(i+6)*resIncr] += alpha*cc6;\n    res[(i+7)*resIncr] += alpha*cc7;\n  }\n  for(; i<n4; i+=4)\n  {\n    ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n              c1 = pset1<ResPacket>(ResScalar(0)),\n              c2 = pset1<ResPacket>(ResScalar(0)),\n              c3 = pset1<ResPacket>(ResScalar(0));\n\n    Index j=0;\n    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)\n    {\n      RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);\n\n      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);\n      c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1);\n      c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2);\n      c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3);\n    }\n    ResScalar cc0 = predux(c0);\n    ResScalar cc1 = predux(c1);\n    ResScalar cc2 = predux(c2);\n    ResScalar cc3 = predux(c3);\n    for(; j<cols; ++j)\n    {\n      RhsScalar b0 = rhs(j,0);\n\n      cc0 += cj.pmul(lhs(i+0,j), b0);\n      cc1 += cj.pmul(lhs(i+1,j), b0);\n      cc2 += cj.pmul(lhs(i+2,j), b0);\n      cc3 += cj.pmul(lhs(i+3,j), b0);\n    }\n    res[(i+0)*resIncr] += alpha*cc0;\n    res[(i+1)*resIncr] += alpha*cc1;\n    res[(i+2)*resIncr] += alpha*cc2;\n    res[(i+3)*resIncr] += alpha*cc3;\n  }\n  for(; i<n2; i+=2)\n  {\n    ResPacket c0 = pset1<ResPacket>(ResScalar(0)),\n              c1 = pset1<ResPacket>(ResScalar(0));\n\n    Index j=0;\n    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)\n    {\n      RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);\n\n      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);\n      c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1);\n    }\n    ResScalar cc0 = predux(c0);\n    ResScalar cc1 = predux(c1);\n    for(; j<cols; ++j)\n    {\n      RhsScalar b0 = rhs(j,0);\n\n      cc0 += cj.pmul(lhs(i+0,j), b0);\n      cc1 += cj.pmul(lhs(i+1,j), b0);\n    }\n    res[(i+0)*resIncr] += alpha*cc0;\n    res[(i+1)*resIncr] += alpha*cc1;\n  }\n  for(; i<rows; ++i)\n  {\n    ResPacket c0 = pset1<ResPacket>(ResScalar(0));\n    ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0));\n    ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0));\n    Index j=0;\n    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)\n    {\n      RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);\n      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);\n    }\n    ResScalar cc0 = predux(c0);\n    if (HasHalf) {\n      for(; j+LhsPacketSizeHalf<=cols; j+=LhsPacketSizeHalf)\n        {\n          RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0);\n          c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h);\n        }\n      cc0 += predux(c0_h);\n    }\n    if (HasQuarter) {\n      for(; j+LhsPacketSizeQuarter<=cols; j+=LhsPacketSizeQuarter)\n        {\n          RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0);\n          c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q);\n        }\n      cc0 += predux(c0_q);\n    }\n    for(; j<cols; ++j)\n    {\n      cc0 += cj.pmul(lhs(i,j), rhs(j,0));\n    }\n    res[i*resIncr] += alpha*cc0;\n  }\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_MATRIX_VECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   General matrix-vector product functionality based on ?GEMV.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H\n#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/**********************************************************************\n* This file implements general matrix-vector multiplication using BLAS\n* gemv function via partial specialization of\n* general_matrix_vector_product::run(..) method for float, double,\n* std::complex<float> and std::complex<double> types\n**********************************************************************/\n\n// gemv specialization\n\ntemplate<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>\nstruct general_matrix_vector_product_gemv;\n\n#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \\\ntemplate<typename Index, bool ConjugateLhs, bool ConjugateRhs> \\\nstruct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \\\nstatic void run( \\\n  Index rows, Index cols, \\\n  const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \\\n  const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \\\n  Scalar* res, Index resIncr, Scalar alpha) \\\n{ \\\n  if (ConjugateLhs) { \\\n    general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \\\n      rows, cols, lhs, rhs, res, resIncr, alpha); \\\n  } else { \\\n    general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \\\n      rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \\\n  } \\\n} \\\n}; \\\ntemplate<typename Index, bool ConjugateLhs, bool ConjugateRhs> \\\nstruct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \\\nstatic void run( \\\n  Index rows, Index cols, \\\n  const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \\\n  const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \\\n  Scalar* res, Index resIncr, Scalar alpha) \\\n{ \\\n    general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \\\n      rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \\\n} \\\n}; \\\n\nEIGEN_BLAS_GEMV_SPECIALIZE(double)\nEIGEN_BLAS_GEMV_SPECIALIZE(float)\nEIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)\nEIGEN_BLAS_GEMV_SPECIALIZE(scomplex)\n\n#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \\\ntemplate<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \\\nstruct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \\\n{ \\\ntypedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\\\n\\\nstatic void run( \\\n  Index rows, Index cols, \\\n  const EIGTYPE* lhs, Index lhsStride, \\\n  const EIGTYPE* rhs, Index rhsIncr, \\\n  EIGTYPE* res, Index resIncr, EIGTYPE alpha) \\\n{ \\\n  BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \\\n            lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \\\n  const EIGTYPE beta(1); \\\n  const EIGTYPE *x_ptr; \\\n  char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \\\n  if (LhsStorageOrder==RowMajor) { \\\n    m = convert_index<BlasIndex>(cols); \\\n    n = convert_index<BlasIndex>(rows); \\\n  }\\\n  GEMVVector x_tmp; \\\n  if (ConjugateRhs) { \\\n    Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \\\n    x_tmp=map_x.conjugate(); \\\n    x_ptr=x_tmp.data(); \\\n    incx=1; \\\n  } else x_ptr=rhs; \\\n  BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \\\n}\\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv)\nEIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv)\nEIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)\nEIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)\n#else\nEIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv_)\nEIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv_)\nEIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)\nEIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  cgemv_)\n#endif\n\n} // end namespase internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/Parallelizer.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARALLELIZER_H\n#define EIGEN_PARALLELIZER_H\n\n#if EIGEN_HAS_CXX11_ATOMIC\n#include <atomic>\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** \\internal */\ninline void manage_multi_threading(Action action, int* v)\n{\n  static int m_maxThreads = -1;\n  EIGEN_UNUSED_VARIABLE(m_maxThreads)\n\n  if(action==SetAction)\n  {\n    eigen_internal_assert(v!=0);\n    m_maxThreads = *v;\n  }\n  else if(action==GetAction)\n  {\n    eigen_internal_assert(v!=0);\n    #ifdef EIGEN_HAS_OPENMP\n    if(m_maxThreads>0)\n      *v = m_maxThreads;\n    else\n      *v = omp_get_max_threads();\n    #else\n    *v = 1;\n    #endif\n  }\n  else\n  {\n    eigen_internal_assert(false);\n  }\n}\n\n}\n\n/** Must be call first when calling Eigen from multiple threads */\ninline void initParallel()\n{\n  int nbt;\n  internal::manage_multi_threading(GetAction, &nbt);\n  std::ptrdiff_t l1, l2, l3;\n  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);\n}\n\n/** \\returns the max number of threads reserved for Eigen\n  * \\sa setNbThreads */\ninline int nbThreads()\n{\n  int ret;\n  internal::manage_multi_threading(GetAction, &ret);\n  return ret;\n}\n\n/** Sets the max number of threads reserved for Eigen\n  * \\sa nbThreads */\ninline void setNbThreads(int v)\n{\n  internal::manage_multi_threading(SetAction, &v);\n}\n\nnamespace internal {\n\ntemplate<typename Index> struct GemmParallelInfo\n{\n  GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}\n\n  // volatile is not enough on all architectures (see bug 1572)\n  // to guarantee that when thread A says to thread B that it is\n  // done with packing a block, then all writes have been really\n  // carried out... C++11 memory model+atomic guarantees this.\n#if EIGEN_HAS_CXX11_ATOMIC\n  std::atomic<Index> sync;\n  std::atomic<int> users;\n#else\n  Index volatile sync;\n  int volatile users;\n#endif\n\n  Index lhs_start;\n  Index lhs_length;\n};\n\ntemplate<bool Condition, typename Functor, typename Index>\nvoid parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)\n{\n  // TODO when EIGEN_USE_BLAS is defined,\n  // we should still enable OMP for other scalar types\n  // Without C++11, we have to disable GEMM's parallelization on\n  // non x86 architectures because there volatile is not enough for our purpose.\n  // See bug 1572.\n#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))\n  // FIXME the transpose variable is only needed to properly split\n  // the matrix product when multithreading is enabled. This is a temporary\n  // fix to support row-major destination matrices. This whole\n  // parallelizer mechanism has to be redesigned anyway.\n  EIGEN_UNUSED_VARIABLE(depth);\n  EIGEN_UNUSED_VARIABLE(transpose);\n  func(0,rows, 0,cols);\n#else\n\n  // Dynamically check whether we should enable or disable OpenMP.\n  // The conditions are:\n  // - the max number of threads we can create is greater than 1\n  // - we are not already in a parallel code\n  // - the sizes are large enough\n\n  // compute the maximal number of threads from the size of the product:\n  // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.\n  Index size = transpose ? rows : cols;\n  Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);\n\n  // compute the maximal number of threads from the total amount of work:\n  double work = static_cast<double>(rows) * static_cast<double>(cols) *\n      static_cast<double>(depth);\n  double kMinTaskSize = 50000;  // FIXME improve this heuristic.\n  pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));\n\n  // compute the number of threads we are going to use\n  Index threads = std::min<Index>(nbThreads(), pb_max_threads);\n\n  // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,\n  // then abort multi-threading\n  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?\n  if((!Condition) || (threads==1) || (omp_get_num_threads()>1))\n    return func(0,rows, 0,cols);\n\n  Eigen::initParallel();\n  func.initParallelSession(threads);\n\n  if(transpose)\n    std::swap(rows,cols);\n\n  ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);\n\n  #pragma omp parallel num_threads(threads)\n  {\n    Index i = omp_get_thread_num();\n    // Note that the actual number of threads might be lower than the number of request ones.\n    Index actual_threads = omp_get_num_threads();\n\n    Index blockCols = (cols / actual_threads) & ~Index(0x3);\n    Index blockRows = (rows / actual_threads);\n    blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;\n\n    Index r0 = i*blockRows;\n    Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;\n\n    Index c0 = i*blockCols;\n    Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;\n\n    info[i].lhs_start = r0;\n    info[i].lhs_length = actualBlockRows;\n\n    if(transpose) func(c0, actualBlockCols, 0, rows, info);\n    else          func(0, rows, c0, actualBlockCols, info);\n  }\n#endif\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARALLELIZER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/SelfadjointMatrixMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H\n#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n// pack a selfadjoint block diagonal for use with the gebp_kernel\ntemplate<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>\nstruct symm_pack_lhs\n{\n  template<int BlockRows> inline\n  void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)\n  {\n    // normal copy\n    for(Index k=0; k<i; k++)\n      for(Index w=0; w<BlockRows; w++)\n        blockA[count++] = lhs(i+w,k);           // normal\n    // symmetric copy\n    Index h = 0;\n    for(Index k=i; k<i+BlockRows; k++)\n    {\n      for(Index w=0; w<h; w++)\n        blockA[count++] = numext::conj(lhs(k, i+w)); // transposed\n\n      blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)\n\n      for(Index w=h+1; w<BlockRows; w++)\n        blockA[count++] = lhs(i+w, k);          // normal\n      ++h;\n    }\n    // transposed copy\n    for(Index k=i+BlockRows; k<cols; k++)\n      for(Index w=0; w<BlockRows; w++)\n        blockA[count++] = numext::conj(lhs(k, i+w)); // transposed\n  }\n  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)\n  {\n    typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;\n    typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;\n    enum { PacketSize = packet_traits<Scalar>::size,\n           HalfPacketSize = unpacket_traits<HalfPacket>::size,\n           QuarterPacketSize = unpacket_traits<QuarterPacket>::size,\n           HasHalf = (int)HalfPacketSize < (int)PacketSize,\n           HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};\n\n    const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);\n    Index count = 0;\n    //Index peeled_mc3 = (rows/Pack1)*Pack1;\n    \n    const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;\n    const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;\n    const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;\n    const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;\n    const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;\n    \n    if(Pack1>=3*PacketSize)\n      for(Index i=0; i<peeled_mc3; i+=3*PacketSize)\n        pack<3*PacketSize>(blockA, lhs, cols, i, count);\n    \n    if(Pack1>=2*PacketSize)\n      for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)\n        pack<2*PacketSize>(blockA, lhs, cols, i, count);\n    \n    if(Pack1>=1*PacketSize)\n      for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)\n        pack<1*PacketSize>(blockA, lhs, cols, i, count);\n\n    if(HasHalf && Pack1>=HalfPacketSize)\n      for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)\n        pack<HalfPacketSize>(blockA, lhs, cols, i, count);\n\n    if(HasQuarter && Pack1>=QuarterPacketSize)\n      for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)\n        pack<QuarterPacketSize>(blockA, lhs, cols, i, count);\n\n    // do the same with mr==1\n    for(Index i=peeled_mc_quarter; i<rows; i++)\n    {\n      for(Index k=0; k<i; k++)\n        blockA[count++] = lhs(i, k);                   // normal\n\n      blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)\n\n      for(Index k=i+1; k<cols; k++)\n        blockA[count++] = numext::conj(lhs(k, i));     // transposed\n    }\n  }\n};\n\ntemplate<typename Scalar, typename Index, int nr, int StorageOrder>\nstruct symm_pack_rhs\n{\n  enum { PacketSize = packet_traits<Scalar>::size };\n  void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)\n  {\n    Index end_k = k2 + rows;\n    Index count = 0;\n    const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);\n    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;\n    Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;\n\n    // first part: normal case\n    for(Index j2=0; j2<k2; j2+=nr)\n    {\n      for(Index k=k2; k<end_k; k++)\n      {\n        blockB[count+0] = rhs(k,j2+0);\n        blockB[count+1] = rhs(k,j2+1);\n        if (nr>=4)\n        {\n          blockB[count+2] = rhs(k,j2+2);\n          blockB[count+3] = rhs(k,j2+3);\n        }\n        if (nr>=8)\n        {\n          blockB[count+4] = rhs(k,j2+4);\n          blockB[count+5] = rhs(k,j2+5);\n          blockB[count+6] = rhs(k,j2+6);\n          blockB[count+7] = rhs(k,j2+7);\n        }\n        count += nr;\n      }\n    }\n\n    // second part: diagonal block\n    Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;\n    if(nr>=8)\n    {\n      for(Index j2=k2; j2<end8; j2+=8)\n      {\n        // again we can split vertically in three different parts (transpose, symmetric, normal)\n        // transpose\n        for(Index k=k2; k<j2; k++)\n        {\n          blockB[count+0] = numext::conj(rhs(j2+0,k));\n          blockB[count+1] = numext::conj(rhs(j2+1,k));\n          blockB[count+2] = numext::conj(rhs(j2+2,k));\n          blockB[count+3] = numext::conj(rhs(j2+3,k));\n          blockB[count+4] = numext::conj(rhs(j2+4,k));\n          blockB[count+5] = numext::conj(rhs(j2+5,k));\n          blockB[count+6] = numext::conj(rhs(j2+6,k));\n          blockB[count+7] = numext::conj(rhs(j2+7,k));\n          count += 8;\n        }\n        // symmetric\n        Index h = 0;\n        for(Index k=j2; k<j2+8; k++)\n        {\n          // normal\n          for (Index w=0 ; w<h; ++w)\n            blockB[count+w] = rhs(k,j2+w);\n\n          blockB[count+h] = numext::real(rhs(k,k));\n\n          // transpose\n          for (Index w=h+1 ; w<8; ++w)\n            blockB[count+w] = numext::conj(rhs(j2+w,k));\n          count += 8;\n          ++h;\n        }\n        // normal\n        for(Index k=j2+8; k<end_k; k++)\n        {\n          blockB[count+0] = rhs(k,j2+0);\n          blockB[count+1] = rhs(k,j2+1);\n          blockB[count+2] = rhs(k,j2+2);\n          blockB[count+3] = rhs(k,j2+3);\n          blockB[count+4] = rhs(k,j2+4);\n          blockB[count+5] = rhs(k,j2+5);\n          blockB[count+6] = rhs(k,j2+6);\n          blockB[count+7] = rhs(k,j2+7);\n          count += 8;\n        }\n      }\n    }\n    if(nr>=4)\n    {\n      for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)\n      {\n        // again we can split vertically in three different parts (transpose, symmetric, normal)\n        // transpose\n        for(Index k=k2; k<j2; k++)\n        {\n          blockB[count+0] = numext::conj(rhs(j2+0,k));\n          blockB[count+1] = numext::conj(rhs(j2+1,k));\n          blockB[count+2] = numext::conj(rhs(j2+2,k));\n          blockB[count+3] = numext::conj(rhs(j2+3,k));\n          count += 4;\n        }\n        // symmetric\n        Index h = 0;\n        for(Index k=j2; k<j2+4; k++)\n        {\n          // normal\n          for (Index w=0 ; w<h; ++w)\n            blockB[count+w] = rhs(k,j2+w);\n\n          blockB[count+h] = numext::real(rhs(k,k));\n\n          // transpose\n          for (Index w=h+1 ; w<4; ++w)\n            blockB[count+w] = numext::conj(rhs(j2+w,k));\n          count += 4;\n          ++h;\n        }\n        // normal\n        for(Index k=j2+4; k<end_k; k++)\n        {\n          blockB[count+0] = rhs(k,j2+0);\n          blockB[count+1] = rhs(k,j2+1);\n          blockB[count+2] = rhs(k,j2+2);\n          blockB[count+3] = rhs(k,j2+3);\n          count += 4;\n        }\n      }\n    }\n\n    // third part: transposed\n    if(nr>=8)\n    {\n      for(Index j2=k2+rows; j2<packet_cols8; j2+=8)\n      {\n        for(Index k=k2; k<end_k; k++)\n        {\n          blockB[count+0] = numext::conj(rhs(j2+0,k));\n          blockB[count+1] = numext::conj(rhs(j2+1,k));\n          blockB[count+2] = numext::conj(rhs(j2+2,k));\n          blockB[count+3] = numext::conj(rhs(j2+3,k));\n          blockB[count+4] = numext::conj(rhs(j2+4,k));\n          blockB[count+5] = numext::conj(rhs(j2+5,k));\n          blockB[count+6] = numext::conj(rhs(j2+6,k));\n          blockB[count+7] = numext::conj(rhs(j2+7,k));\n          count += 8;\n        }\n      }\n    }\n    if(nr>=4)\n    {\n      for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)\n      {\n        for(Index k=k2; k<end_k; k++)\n        {\n          blockB[count+0] = numext::conj(rhs(j2+0,k));\n          blockB[count+1] = numext::conj(rhs(j2+1,k));\n          blockB[count+2] = numext::conj(rhs(j2+2,k));\n          blockB[count+3] = numext::conj(rhs(j2+3,k));\n          count += 4;\n        }\n      }\n    }\n\n    // copy the remaining columns one at a time (=> the same with nr==1)\n    for(Index j2=packet_cols4; j2<cols; ++j2)\n    {\n      // transpose\n      Index half = (std::min)(end_k,j2);\n      for(Index k=k2; k<half; k++)\n      {\n        blockB[count] = numext::conj(rhs(j2,k));\n        count += 1;\n      }\n\n      if(half==j2 && half<k2+rows)\n      {\n        blockB[count] = numext::real(rhs(j2,j2));\n        count += 1;\n      }\n      else\n        half--;\n\n      // normal\n      for(Index k=half+1; k<k2+rows; k++)\n      {\n        blockB[count] = rhs(k,j2);\n        count += 1;\n      }\n    }\n  }\n};\n\n/* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of\n * the general matrix matrix product.\n */\ntemplate <typename Scalar, typename Index,\n          int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,\n          int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,\n          int ResStorageOrder, int ResInnerStride>\nstruct product_selfadjoint_matrix;\n\ntemplate <typename Scalar, typename Index,\n          int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,\n          int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,\n          int ResInnerStride>\nstruct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>\n{\n\n  static EIGEN_STRONG_INLINE void run(\n    Index rows, Index cols,\n    const Scalar* lhs, Index lhsStride,\n    const Scalar* rhs, Index rhsStride,\n    Scalar* res,       Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)\n  {\n    product_selfadjoint_matrix<Scalar, Index,\n      EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,\n      RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),\n      EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,\n      LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),\n      ColMajor,ResInnerStride>\n      ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resIncr, resStride,  alpha, blocking);\n  }\n};\n\ntemplate <typename Scalar, typename Index,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride>\nstruct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>\n{\n\n  static EIGEN_DONT_INLINE void run(\n    Index rows, Index cols,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* res,        Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);\n};\n\ntemplate <typename Scalar, typename Index,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride>\nEIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(\n    Index rows, Index cols,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* _res,       Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)\n  {\n    Index size = rows;\n\n    typedef gebp_traits<Scalar,Scalar> Traits;\n\n    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;\n    typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;\n    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;\n    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;\n    LhsMapper lhs(_lhs,lhsStride);\n    LhsTransposeMapper lhs_transpose(_lhs,lhsStride);\n    RhsMapper rhs(_rhs,rhsStride);\n    ResMapper res(_res, resStride, resIncr);\n\n    Index kc = blocking.kc();                   // cache block size along the K direction\n    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction\n    // kc must be smaller than mc\n    kc = (std::min)(kc,mc);\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*cols;\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());\n\n    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;\n    symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;\n    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;\n    gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;\n\n    for(Index k2=0; k2<size; k2+=kc)\n    {\n      const Index actual_kc = (std::min)(k2+kc,size)-k2;\n\n      // we have selected one row panel of rhs and one column panel of lhs\n      // pack rhs's panel into a sequential chunk of memory\n      // and expand each coeff to a constant packet for further reuse\n      pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);\n\n      // the select lhs's panel has to be split in three different parts:\n      //  1 - the transposed panel above the diagonal block => transposed packed copy\n      //  2 - the diagonal block => special packed copy\n      //  3 - the panel below the diagonal block => generic packed copy\n      for(Index i2=0; i2<k2; i2+=mc)\n      {\n        const Index actual_mc = (std::min)(i2+mc,k2)-i2;\n        // transposed packed copy\n        pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);\n\n        gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);\n      }\n      // the block diagonal\n      {\n        const Index actual_mc = (std::min)(k2+kc,size)-k2;\n        // symmetric packed copy\n        pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);\n\n        gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);\n      }\n\n      for(Index i2=k2+kc; i2<size; i2+=mc)\n      {\n        const Index actual_mc = (std::min)(i2+mc,size)-i2;\n        gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()\n          (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);\n\n        gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);\n      }\n    }\n  }\n\n// matrix * selfadjoint product\ntemplate <typename Scalar, typename Index,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride>\nstruct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>\n{\n\n  static EIGEN_DONT_INLINE void run(\n    Index rows, Index cols,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* res,        Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);\n};\n\ntemplate <typename Scalar, typename Index,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride>\nEIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(\n    Index rows, Index cols,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* _res,       Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)\n  {\n    Index size = cols;\n\n    typedef gebp_traits<Scalar,Scalar> Traits;\n\n    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;\n    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;\n    LhsMapper lhs(_lhs,lhsStride);\n    ResMapper res(_res,resStride, resIncr);\n\n    Index kc = blocking.kc();                   // cache block size along the K direction\n    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*cols;\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());\n\n    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;\n    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;\n    symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;\n\n    for(Index k2=0; k2<size; k2+=kc)\n    {\n      const Index actual_kc = (std::min)(k2+kc,size)-k2;\n\n      pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);\n\n      // => GEPP\n      for(Index i2=0; i2<rows; i2+=mc)\n      {\n        const Index actual_mc = (std::min)(i2+mc,rows)-i2;\n        pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);\n\n        gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);\n      }\n    }\n  }\n\n} // end namespace internal\n\n/***************************************************************************\n* Wrapper to product_selfadjoint_matrix\n***************************************************************************/\n\nnamespace internal {\n  \ntemplate<typename Lhs, int LhsMode, typename Rhs, int RhsMode>\nstruct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  \n  typedef internal::blas_traits<Lhs> LhsBlasTraits;\n  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n  typedef internal::blas_traits<Rhs> RhsBlasTraits;\n  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n  \n  enum {\n    LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,\n    LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,\n    RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,\n    RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint\n  };\n  \n  template<typename Dest>\n  static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)\n  {\n    eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());\n\n    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);\n    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);\n\n    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)\n                               * RhsBlasTraits::extractScalarFactor(a_rhs);\n\n    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,\n              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;\n\n    BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);\n\n    internal::product_selfadjoint_matrix<Scalar, Index,\n      EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,\n      NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),\n      EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,\n      NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),\n      internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor,\n      Dest::InnerStrideAtCompileTime>\n      ::run(\n        lhs.rows(), rhs.cols(),                 // sizes\n        &lhs.coeffRef(0,0), lhs.outerStride(),  // lhs info\n        &rhs.coeffRef(0,0), rhs.outerStride(),  // rhs info\n        &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(),  // result info\n        actualAlpha, blocking                   // alpha\n      );\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n//\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H\n#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n\n/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */\n\n#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \\\ntemplate <typename Index, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \\\n{\\\n\\\n  static void run( \\\n    Index rows, Index cols, \\\n    const EIGTYPE* _lhs, Index lhsStride, \\\n    const EIGTYPE* _rhs, Index rhsStride, \\\n    EIGTYPE* res,        Index resIncr, Index resStride, \\\n    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \\\n  { \\\n    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \\\n    eigen_assert(resIncr == 1); \\\n    char side='L', uplo='L'; \\\n    BlasIndex m, n, lda, ldb, ldc; \\\n    const EIGTYPE *a, *b; \\\n    EIGTYPE beta(1); \\\n    MatrixX##EIGPREFIX b_tmp; \\\n\\\n/* Set transpose options */ \\\n/* Set m, n, k */ \\\n    m = convert_index<BlasIndex>(rows);  \\\n    n = convert_index<BlasIndex>(cols);  \\\n\\\n/* Set lda, ldb, ldc */ \\\n    lda = convert_index<BlasIndex>(lhsStride); \\\n    ldb = convert_index<BlasIndex>(rhsStride); \\\n    ldc = convert_index<BlasIndex>(resStride); \\\n\\\n/* Set a, b, c */ \\\n    if (LhsStorageOrder==RowMajor) uplo='U'; \\\n    a = _lhs; \\\n\\\n    if (RhsStorageOrder==RowMajor) { \\\n      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \\\n      b_tmp = rhs.adjoint(); \\\n      b = b_tmp.data(); \\\n      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n    } else b = _rhs; \\\n\\\n    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \\\n\\\n  } \\\n};\n\n\n#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \\\ntemplate <typename Index, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \\\n{\\\n  static void run( \\\n    Index rows, Index cols, \\\n    const EIGTYPE* _lhs, Index lhsStride, \\\n    const EIGTYPE* _rhs, Index rhsStride, \\\n    EIGTYPE* res,        Index resIncr, Index resStride, \\\n    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \\\n  { \\\n    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \\\n    eigen_assert(resIncr == 1); \\\n    char side='L', uplo='L'; \\\n    BlasIndex m, n, lda, ldb, ldc; \\\n    const EIGTYPE *a, *b; \\\n    EIGTYPE beta(1); \\\n    MatrixX##EIGPREFIX b_tmp; \\\n    Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \\\n\\\n/* Set transpose options */ \\\n/* Set m, n, k */ \\\n    m = convert_index<BlasIndex>(rows); \\\n    n = convert_index<BlasIndex>(cols); \\\n\\\n/* Set lda, ldb, ldc */ \\\n    lda = convert_index<BlasIndex>(lhsStride); \\\n    ldb = convert_index<BlasIndex>(rhsStride); \\\n    ldc = convert_index<BlasIndex>(resStride); \\\n\\\n/* Set a, b, c */ \\\n    if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \\\n      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \\\n      a_tmp = lhs.conjugate(); \\\n      a = a_tmp.data(); \\\n      lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n    } else a = _lhs; \\\n    if (LhsStorageOrder==RowMajor) uplo='U'; \\\n\\\n    if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \\\n       b = _rhs; } \\\n    else { \\\n      if (RhsStorageOrder==ColMajor && ConjugateRhs) { \\\n        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \\\n        b_tmp = rhs.conjugate(); \\\n      } else \\\n      if (ConjugateRhs) { \\\n        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \\\n        b_tmp = rhs.adjoint(); \\\n      } else { \\\n        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \\\n        b_tmp = rhs.transpose(); \\\n      } \\\n      b = b_tmp.data(); \\\n      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n    } \\\n\\\n    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \\\n\\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_SYMM_L(double, double, d, dsymm)\nEIGEN_BLAS_SYMM_L(float, float, f, ssymm)\nEIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)\nEIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)\n#else\nEIGEN_BLAS_SYMM_L(double, double, d, dsymm_)\nEIGEN_BLAS_SYMM_L(float, float, f, ssymm_)\nEIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)\nEIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)\n#endif\n\n/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */\n\n#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \\\ntemplate <typename Index, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \\\n{\\\n\\\n  static void run( \\\n    Index rows, Index cols, \\\n    const EIGTYPE* _lhs, Index lhsStride, \\\n    const EIGTYPE* _rhs, Index rhsStride, \\\n    EIGTYPE* res,        Index resIncr, Index resStride, \\\n    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \\\n  { \\\n    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \\\n    eigen_assert(resIncr == 1); \\\n    char side='R', uplo='L'; \\\n    BlasIndex m, n, lda, ldb, ldc; \\\n    const EIGTYPE *a, *b; \\\n    EIGTYPE beta(1); \\\n    MatrixX##EIGPREFIX b_tmp; \\\n\\\n/* Set m, n, k */ \\\n    m = convert_index<BlasIndex>(rows);  \\\n    n = convert_index<BlasIndex>(cols);  \\\n\\\n/* Set lda, ldb, ldc */ \\\n    lda = convert_index<BlasIndex>(rhsStride); \\\n    ldb = convert_index<BlasIndex>(lhsStride); \\\n    ldc = convert_index<BlasIndex>(resStride); \\\n\\\n/* Set a, b, c */ \\\n    if (RhsStorageOrder==RowMajor) uplo='U'; \\\n    a = _rhs; \\\n\\\n    if (LhsStorageOrder==RowMajor) { \\\n      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \\\n      b_tmp = lhs.adjoint(); \\\n      b = b_tmp.data(); \\\n      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n    } else b = _lhs; \\\n\\\n    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \\\n\\\n  } \\\n};\n\n\n#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \\\ntemplate <typename Index, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \\\n{\\\n  static void run( \\\n    Index rows, Index cols, \\\n    const EIGTYPE* _lhs, Index lhsStride, \\\n    const EIGTYPE* _rhs, Index rhsStride, \\\n    EIGTYPE* res,        Index resIncr, Index resStride, \\\n    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \\\n  { \\\n    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \\\n    eigen_assert(resIncr == 1); \\\n    char side='R', uplo='L'; \\\n    BlasIndex m, n, lda, ldb, ldc; \\\n    const EIGTYPE *a, *b; \\\n    EIGTYPE beta(1); \\\n    MatrixX##EIGPREFIX b_tmp; \\\n    Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \\\n\\\n/* Set m, n, k */ \\\n    m = convert_index<BlasIndex>(rows); \\\n    n = convert_index<BlasIndex>(cols); \\\n\\\n/* Set lda, ldb, ldc */ \\\n    lda = convert_index<BlasIndex>(rhsStride); \\\n    ldb = convert_index<BlasIndex>(lhsStride); \\\n    ldc = convert_index<BlasIndex>(resStride); \\\n\\\n/* Set a, b, c */ \\\n    if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \\\n      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \\\n      a_tmp = rhs.conjugate(); \\\n      a = a_tmp.data(); \\\n      lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n    } else a = _rhs; \\\n    if (RhsStorageOrder==RowMajor) uplo='U'; \\\n\\\n    if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \\\n       b = _lhs; } \\\n    else { \\\n      if (LhsStorageOrder==ColMajor && ConjugateLhs) { \\\n        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \\\n        b_tmp = lhs.conjugate(); \\\n      } else \\\n      if (ConjugateLhs) { \\\n        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \\\n        b_tmp = lhs.adjoint(); \\\n      } else { \\\n        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \\\n        b_tmp = lhs.transpose(); \\\n      } \\\n      b = b_tmp.data(); \\\n      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n    } \\\n\\\n    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_SYMM_R(double, double, d, dsymm)\nEIGEN_BLAS_SYMM_R(float, float, f, ssymm)\nEIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)\nEIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)\n#else\nEIGEN_BLAS_SYMM_R(double, double, d, dsymm_)\nEIGEN_BLAS_SYMM_R(float, float, f, ssymm_)\nEIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)\nEIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)\n#endif\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/SelfadjointMatrixVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H\n#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/* Optimized selfadjoint matrix * vector product:\n * This algorithm processes 2 columns at once that allows to both reduce\n * the number of load/stores of the result by a factor 2 and to reduce\n * the instruction dependency.\n */\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>\nstruct selfadjoint_matrix_vector_product;\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>\nstruct selfadjoint_matrix_vector_product\n\n{\nstatic EIGEN_DONT_INLINE EIGEN_DEVICE_FUNC\nvoid run(\n  Index size,\n  const Scalar*  lhs, Index lhsStride,\n  const Scalar*  rhs,\n  Scalar* res,\n  Scalar alpha);\n};\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>\nEIGEN_DONT_INLINE EIGEN_DEVICE_FUNC\nvoid selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(\n  Index size,\n  const Scalar*  lhs, Index lhsStride,\n  const Scalar*  rhs,\n  Scalar* res,\n  Scalar alpha)\n{\n  typedef typename packet_traits<Scalar>::type Packet;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  const Index PacketSize = sizeof(Packet)/sizeof(Scalar);\n\n  enum {\n    IsRowMajor = StorageOrder==RowMajor ? 1 : 0,\n    IsLower = UpLo == Lower ? 1 : 0,\n    FirstTriangular = IsRowMajor == IsLower\n  };\n\n  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;\n  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;\n  conj_helper<RealScalar,Scalar,false, ConjugateRhs> cjd;\n\n  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;\n  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;\n\n  Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;\n\n  Index bound = numext::maxi(Index(0), size-8) & 0xfffffffe;\n  if (FirstTriangular)\n    bound = size - bound;\n\n  for (Index j=FirstTriangular ? bound : 0;\n       j<(FirstTriangular ? size : bound);j+=2)\n  {\n    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;\n    const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;\n\n    Scalar t0 = cjAlpha * rhs[j];\n    Packet ptmp0 = pset1<Packet>(t0);\n    Scalar t1 = cjAlpha * rhs[j+1];\n    Packet ptmp1 = pset1<Packet>(t1);\n\n    Scalar t2(0);\n    Packet ptmp2 = pset1<Packet>(t2);\n    Scalar t3(0);\n    Packet ptmp3 = pset1<Packet>(t3);\n\n    Index starti = FirstTriangular ? 0 : j+2;\n    Index endi   = FirstTriangular ? j : size;\n    Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);\n    Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);\n\n    res[j]   += cjd.pmul(numext::real(A0[j]), t0);\n    res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);\n    if(FirstTriangular)\n    {\n      res[j]   += cj0.pmul(A1[j],   t1);\n      t3       += cj1.pmul(A1[j],   rhs[j]);\n    }\n    else\n    {\n      res[j+1] += cj0.pmul(A0[j+1],t0);\n      t2 += cj1.pmul(A0[j+1], rhs[j+1]);\n    }\n\n    for (Index i=starti; i<alignedStart; ++i)\n    {\n      res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);\n      t2 += cj1.pmul(A0[i], rhs[i]);\n      t3 += cj1.pmul(A1[i], rhs[i]);\n    }\n    // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)\n    // gcc 4.2 does this optimization automatically.\n    const Scalar* EIGEN_RESTRICT a0It  = A0  + alignedStart;\n    const Scalar* EIGEN_RESTRICT a1It  = A1  + alignedStart;\n    const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;\n          Scalar* EIGEN_RESTRICT resIt = res + alignedStart;\n    for (Index i=alignedStart; i<alignedEnd; i+=PacketSize)\n    {\n      Packet A0i = ploadu<Packet>(a0It);  a0It  += PacketSize;\n      Packet A1i = ploadu<Packet>(a1It);  a1It  += PacketSize;\n      Packet Bi  = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases\n      Packet Xi  = pload <Packet>(resIt);\n\n      Xi    = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));\n      ptmp2 = pcj1.pmadd(A0i,  Bi, ptmp2);\n      ptmp3 = pcj1.pmadd(A1i,  Bi, ptmp3);\n      pstore(resIt,Xi); resIt += PacketSize;\n    }\n    for (Index i=alignedEnd; i<endi; i++)\n    {\n      res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);\n      t2 += cj1.pmul(A0[i], rhs[i]);\n      t3 += cj1.pmul(A1[i], rhs[i]);\n    }\n\n    res[j]   += alpha * (t2 + predux(ptmp2));\n    res[j+1] += alpha * (t3 + predux(ptmp3));\n  }\n  for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)\n  {\n    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;\n\n    Scalar t1 = cjAlpha * rhs[j];\n    Scalar t2(0);\n    res[j] += cjd.pmul(numext::real(A0[j]), t1);\n    for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)\n    {\n      res[i] += cj0.pmul(A0[i], t1);\n      t2 += cj1.pmul(A0[i], rhs[i]);\n    }\n    res[j] += alpha * t2;\n  }\n}\n\n} // end namespace internal \n\n/***************************************************************************\n* Wrapper to product_selfadjoint_vector\n***************************************************************************/\n\nnamespace internal {\n\ntemplate<typename Lhs, int LhsMode, typename Rhs>\nstruct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  \n  typedef internal::blas_traits<Lhs> LhsBlasTraits;\n  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;\n  \n  typedef internal::blas_traits<Rhs> RhsBlasTraits;\n  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;\n\n  enum { LhsUpLo = LhsMode&(Upper|Lower) };\n\n  template<typename Dest>\n  static EIGEN_DEVICE_FUNC\n  void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)\n  {\n    typedef typename Dest::Scalar ResScalar;\n    typedef typename Rhs::Scalar RhsScalar;\n    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;\n    \n    eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());\n\n    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);\n    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);\n\n    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)\n                               * RhsBlasTraits::extractScalarFactor(a_rhs);\n\n    enum {\n      EvalToDest = (Dest::InnerStrideAtCompileTime==1),\n      UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1)\n    };\n    \n    internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;\n    internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs;\n\n    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),\n                                                  EvalToDest ? dest.data() : static_dest.data());\n                                                  \n    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),\n        UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());\n    \n    if(!EvalToDest)\n    {\n      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      Index size = dest.size();\n      EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      #endif\n      MappedDest(actualDestPtr, dest.size()) = dest;\n    }\n      \n    if(!UseRhs)\n    {\n      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      Index size = rhs.size();\n      EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      #endif\n      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;\n    }\n      \n      \n    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor,\n                                                int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run\n      (\n        lhs.rows(),                             // size\n        &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info\n        actualRhsPtr,                           // rhs info\n        actualDestPtr,                          // result info\n        actualAlpha                             // scale factor\n      );\n    \n    if(!EvalToDest)\n      dest = MappedDest(actualDestPtr, dest.size());\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int RhsMode>\nstruct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false>\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  enum { RhsUpLo = RhsMode&(Upper|Lower)  };\n\n  template<typename Dest>\n  static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)\n  {\n    // let's simply transpose the product\n    Transpose<Dest> destT(dest);\n    selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,\n                             Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H\n#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/**********************************************************************\n* This file implements selfadjoint matrix-vector multiplication using BLAS\n**********************************************************************/\n\n// symv/hemv specialization\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>\nstruct selfadjoint_matrix_vector_product_symv :\n  selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};\n\n#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \\\ntemplate<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \\\nstruct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \\\nstatic void run( \\\n  Index size, const Scalar*  lhs, Index lhsStride, \\\n  const Scalar* _rhs, Scalar* res, Scalar alpha) { \\\n    enum {\\\n      IsColMajor = StorageOrder==ColMajor \\\n    }; \\\n    if (IsColMajor == ConjugateLhs) {\\\n      selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \\\n        size, lhs, lhsStride, _rhs, res, alpha);  \\\n    } else {\\\n      selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \\\n        size, lhs, lhsStride, _rhs, res, alpha);  \\\n    }\\\n  } \\\n}; \\\n\nEIGEN_BLAS_SYMV_SPECIALIZE(double)\nEIGEN_BLAS_SYMV_SPECIALIZE(float)\nEIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)\nEIGEN_BLAS_SYMV_SPECIALIZE(scomplex)\n\n#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \\\ntemplate<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \\\nstruct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \\\n{ \\\ntypedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\\\n\\\nstatic void run( \\\nIndex size, const EIGTYPE*  lhs, Index lhsStride, \\\nconst EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \\\n{ \\\n  enum {\\\n    IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \\\n    IsLower = UpLo == Lower ? 1 : 0 \\\n  }; \\\n  BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \\\n  EIGTYPE beta(1); \\\n  const EIGTYPE *x_ptr; \\\n  char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \\\n  SYMVVector x_tmp; \\\n  if (ConjugateRhs) { \\\n    Map<const SYMVVector, 0 > map_x(_rhs,size,1); \\\n    x_tmp=map_x.conjugate(); \\\n    x_ptr=x_tmp.data(); \\\n  } else x_ptr=_rhs; \\\n  BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \\\n}\\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv)\nEIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv)\nEIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)\nEIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)\n#else\nEIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv_)\nEIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv_)\nEIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)\nEIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float,  chemv_)\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/SelfadjointProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFADJOINT_PRODUCT_H\n#define EIGEN_SELFADJOINT_PRODUCT_H\n\n/**********************************************************************\n* This file implements a self adjoint product: C += A A^T updating only\n* half of the selfadjoint matrix C.\n* It corresponds to the level 3 SYRK and level 2 SYR Blas routines.\n**********************************************************************/\n\nnamespace Eigen { \n\n\ntemplate<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>\nstruct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>\n{\n  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)\n  {\n    internal::conj_if<ConjRhs> cj;\n    typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;\n    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;\n    for (Index i=0; i<size; ++i)\n    {\n      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))\n          += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));\n    }\n  }\n};\n\ntemplate<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>\nstruct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>\n{\n  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)\n  {\n    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);\n  }\n};\n\ntemplate<typename MatrixType, typename OtherType, int UpLo, bool OtherIsVector = OtherType::IsVectorAtCompileTime>\nstruct selfadjoint_product_selector;\n\ntemplate<typename MatrixType, typename OtherType, int UpLo>\nstruct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>\n{\n  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)\n  {\n    typedef typename MatrixType::Scalar Scalar;\n    typedef internal::blas_traits<OtherType> OtherBlasTraits;\n    typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;\n    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;\n    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());\n\n    Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());\n\n    enum {\n      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,\n      UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1\n    };\n    internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;\n\n    ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),\n      (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));\n      \n    if(!UseOtherDirectly)\n      Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;\n    \n    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,\n                              OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,\n                            (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>\n          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);\n  }\n};\n\ntemplate<typename MatrixType, typename OtherType, int UpLo>\nstruct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>\n{\n  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)\n  {\n    typedef typename MatrixType::Scalar Scalar;\n    typedef internal::blas_traits<OtherType> OtherBlasTraits;\n    typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;\n    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;\n    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());\n\n    Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());\n\n    enum {\n      IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,\n      OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0\n    };\n\n    Index size = mat.cols();\n    Index depth = actualOther.cols();\n\n    typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,\n              MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;\n\n    BlockingType blocking(size, size, depth, 1, false);\n\n\n    internal::general_matrix_matrix_triangular_product<Index,\n      Scalar, OtherIsRowMajor ? RowMajor : ColMajor,   OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,\n      Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,\n      IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>\n      ::run(size, depth,\n            actualOther.data(), actualOther.outerStride(), actualOther.data(), actualOther.outerStride(),\n            mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);\n  }\n};\n\n// high level API\n\ntemplate<typename MatrixType, unsigned int UpLo>\ntemplate<typename DerivedU>\nEIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>\n::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)\n{\n  selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);\n\n  return *this;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINT_PRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/SelfadjointRank2Update.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H\n#define EIGEN_SELFADJOINTRANK2UPTADE_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'\n * It corresponds to the Level2 syr2 BLAS routine\n */\n\ntemplate<typename Scalar, typename Index, typename UType, typename VType, int UpLo>\nstruct selfadjoint_rank2_update_selector;\n\ntemplate<typename Scalar, typename Index, typename UType, typename VType>\nstruct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>\n{\n  static EIGEN_DEVICE_FUNC\n  void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)\n  {\n    const Index size = u.size();\n    for (Index i=0; i<size; ++i)\n    {\n      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=\n                        (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i)\n                      + (alpha * numext::conj(v.coeff(i))) * u.tail(size-i);\n    }\n  }\n};\n\ntemplate<typename Scalar, typename Index, typename UType, typename VType>\nstruct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>\n{\n  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)\n  {\n    const Index size = u.size();\n    for (Index i=0; i<size; ++i)\n      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=\n                        (numext::conj(alpha)  * numext::conj(u.coeff(i))) * v.head(i+1)\n                      + (alpha * numext::conj(v.coeff(i))) * u.head(i+1);\n  }\n};\n\ntemplate<bool Cond, typename T> struct conj_expr_if\n  : conditional<!Cond, const T&,\n      CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};\n\n} // end namespace internal\n\ntemplate<typename MatrixType, unsigned int UpLo>\ntemplate<typename DerivedU, typename DerivedV>\nEIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>\n::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)\n{\n  typedef internal::blas_traits<DerivedU> UBlasTraits;\n  typedef typename UBlasTraits::DirectLinearAccessType ActualUType;\n  typedef typename internal::remove_all<ActualUType>::type _ActualUType;\n  typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());\n\n  typedef internal::blas_traits<DerivedV> VBlasTraits;\n  typedef typename VBlasTraits::DirectLinearAccessType ActualVType;\n  typedef typename internal::remove_all<ActualVType>::type _ActualVType;\n  typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());\n\n  // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and\n  // vice versa, and take the complex conjugate of all coefficients and vector entries.\n\n  enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };\n  Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())\n                             * numext::conj(VBlasTraits::extractScalarFactor(v.derived()));\n  if (IsRowMajor)\n    actualAlpha = numext::conj(actualAlpha);\n\n  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;\n  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;\n  internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,\n    (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>\n    ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);\n\n  return *this;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINTRANK2UPTADE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularMatrixMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H\n#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>\n// struct gemm_pack_lhs_triangular\n// {\n//   Matrix<Scalar,mr,mr,\n//   void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)\n//   {\n//     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;\n//     const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);\n//     int count = 0;\n//     const int peeled_mc = (rows/mr)*mr;\n//     for(int i=0; i<peeled_mc; i+=mr)\n//     {\n//       for(int k=0; k<depth; k++)\n//         for(int w=0; w<mr; w++)\n//           blockA[count++] = cj(lhs(i+w, k));\n//     }\n//     for(int i=peeled_mc; i<rows; i++)\n//     {\n//       for(int k=0; k<depth; k++)\n//         blockA[count++] = cj(lhs(i, k));\n//     }\n//   }\n// };\n\n/* Optimized triangular matrix * matrix (_TRMM++) product built on top of\n * the general matrix matrix product.\n */\ntemplate <typename Scalar, typename Index,\n          int Mode, bool LhsIsTriangular,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResStorageOrder, int ResInnerStride,\n          int Version = Specialized>\nstruct product_triangular_matrix_matrix;\n\ntemplate <typename Scalar, typename Index,\n          int Mode, bool LhsIsTriangular,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride, int Version>\nstruct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,\n                                           LhsStorageOrder,ConjugateLhs,\n                                           RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>\n{\n  static EIGEN_STRONG_INLINE void run(\n    Index rows, Index cols, Index depth,\n    const Scalar* lhs, Index lhsStride,\n    const Scalar* rhs, Index rhsStride,\n    Scalar* res,       Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)\n  {\n    product_triangular_matrix_matrix<Scalar, Index,\n      (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),\n      (!LhsIsTriangular),\n      RhsStorageOrder==RowMajor ? ColMajor : RowMajor,\n      ConjugateRhs,\n      LhsStorageOrder==RowMajor ? ColMajor : RowMajor,\n      ConjugateLhs,\n      ColMajor, ResInnerStride>\n      ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);\n  }\n};\n\n// implements col-major += alpha * op(triangular) * op(general)\ntemplate <typename Scalar, typename Index, int Mode,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride, int Version>\nstruct product_triangular_matrix_matrix<Scalar,Index,Mode,true,\n                                           LhsStorageOrder,ConjugateLhs,\n                                           RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>\n{\n  \n  typedef gebp_traits<Scalar,Scalar> Traits;\n  enum {\n    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),\n    IsLower = (Mode&Lower) == Lower,\n    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1\n  };\n\n  static EIGEN_DONT_INLINE void run(\n    Index _rows, Index _cols, Index _depth,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* res,        Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);\n};\n\ntemplate <typename Scalar, typename Index, int Mode,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride, int Version>\nEIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,\n                                                        LhsStorageOrder,ConjugateLhs,\n                                                        RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(\n    Index _rows, Index _cols, Index _depth,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* _res,       Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)\n  {\n    // strip zeros\n    Index diagSize  = (std::min)(_rows,_depth);\n    Index rows      = IsLower ? _rows : diagSize;\n    Index depth     = IsLower ? diagSize : _depth;\n    Index cols      = _cols;\n    \n    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;\n    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;\n    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;\n    LhsMapper lhs(_lhs,lhsStride);\n    RhsMapper rhs(_rhs,rhsStride);\n    ResMapper res(_res, resStride, resIncr);\n\n    Index kc = blocking.kc();                   // cache block size along the K direction\n    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction\n    // The small panel size must not be larger than blocking size.\n    // Usually this should never be the case because SmallPanelWidth^2 is very small\n    // compared to L2 cache size, but let's be safe:\n    Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));\n\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*cols;\n\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());\n\n    // To work around an \"error: member reference base type 'Matrix<...>\n    // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is\n    // not a structure or union\" compilation error in nvcc (tested V8.0.61),\n    // create a dummy internal::constructor_without_unaligned_array_assert\n    // object to pass to the Matrix constructor.\n    internal::constructor_without_unaligned_array_assert a;\n    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);\n    triangularBuffer.setZero();\n    if((Mode&ZeroDiag)==ZeroDiag)\n      triangularBuffer.diagonal().setZero();\n    else\n      triangularBuffer.diagonal().setOnes();\n\n    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;\n    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;\n    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;\n\n    for(Index k2=IsLower ? depth : 0;\n        IsLower ? k2>0 : k2<depth;\n        IsLower ? k2-=kc : k2+=kc)\n    {\n      Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);\n      Index actual_k2 = IsLower ? k2-actual_kc : k2;\n\n      // align blocks with the end of the triangular part for trapezoidal lhs\n      if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))\n      {\n        actual_kc = rows-k2;\n        k2 = k2+actual_kc-kc;\n      }\n\n      pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);\n\n      // the selected lhs's panel has to be split in three different parts:\n      //  1 - the part which is zero => skip it\n      //  2 - the diagonal block => special kernel\n      //  3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP\n\n      // the block diagonal, if any:\n      if(IsLower || actual_k2<rows)\n      {\n        // for each small vertical panels of lhs\n        for (Index k1=0; k1<actual_kc; k1+=panelWidth)\n        {\n          Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);\n          Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;\n          Index startBlock   = actual_k2+k1;\n          Index blockBOffset = k1;\n\n          // => GEBP with the micro triangular block\n          // The trick is to pack this micro block while filling the opposite triangular part with zeros.\n          // To this end we do an extra triangular copy to a small temporary buffer\n          for (Index k=0;k<actualPanelWidth;++k)\n          {\n            if (SetDiag)\n              triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);\n            for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)\n              triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);\n          }\n          pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);\n\n          gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,\n                      actualPanelWidth, actualPanelWidth, cols, alpha,\n                      actualPanelWidth, actual_kc, 0, blockBOffset);\n\n          // GEBP with remaining micro panel\n          if (lengthTarget>0)\n          {\n            Index startTarget  = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;\n\n            pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);\n\n            gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,\n                        lengthTarget, actualPanelWidth, cols, alpha,\n                        actualPanelWidth, actual_kc, 0, blockBOffset);\n          }\n        }\n      }\n      // the part below (lower case) or above (upper case) the diagonal => GEPP\n      {\n        Index start = IsLower ? k2 : 0;\n        Index end   = IsLower ? rows : (std::min)(actual_k2,rows);\n        for(Index i2=start; i2<end; i2+=mc)\n        {\n          const Index actual_mc = (std::min)(i2+mc,end)-i2;\n          gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()\n            (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);\n\n          gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,\n                      actual_kc, cols, alpha, -1, -1, 0, 0);\n        }\n      }\n    }\n  }\n\n// implements col-major += alpha * op(general) * op(triangular)\ntemplate <typename Scalar, typename Index, int Mode,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride, int Version>\nstruct product_triangular_matrix_matrix<Scalar,Index,Mode,false,\n                                        LhsStorageOrder,ConjugateLhs,\n                                        RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>\n{\n  typedef gebp_traits<Scalar,Scalar> Traits;\n  enum {\n    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),\n    IsLower = (Mode&Lower) == Lower,\n    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1\n  };\n\n  static EIGEN_DONT_INLINE void run(\n    Index _rows, Index _cols, Index _depth,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* res,        Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);\n};\n\ntemplate <typename Scalar, typename Index, int Mode,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResInnerStride, int Version>\nEIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,\n                                                        LhsStorageOrder,ConjugateLhs,\n                                                        RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(\n    Index _rows, Index _cols, Index _depth,\n    const Scalar* _lhs, Index lhsStride,\n    const Scalar* _rhs, Index rhsStride,\n    Scalar* _res,       Index resIncr, Index resStride,\n    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)\n  {\n    const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);\n    // strip zeros\n    Index diagSize  = (std::min)(_cols,_depth);\n    Index rows      = _rows;\n    Index depth     = IsLower ? _depth : diagSize;\n    Index cols      = IsLower ? diagSize : _cols;\n    \n    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;\n    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;\n    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;\n    LhsMapper lhs(_lhs,lhsStride);\n    RhsMapper rhs(_rhs,rhsStride);\n    ResMapper res(_res, resStride, resIncr);\n\n    Index kc = blocking.kc();                   // cache block size along the K direction\n    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction\n\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);\n\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());\n\n    internal::constructor_without_unaligned_array_assert a;\n    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);\n    triangularBuffer.setZero();\n    if((Mode&ZeroDiag)==ZeroDiag)\n      triangularBuffer.diagonal().setZero();\n    else\n      triangularBuffer.diagonal().setOnes();\n\n    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;\n    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;\n    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;\n    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;\n\n    for(Index k2=IsLower ? 0 : depth;\n        IsLower ? k2<depth  : k2>0;\n        IsLower ? k2+=kc   : k2-=kc)\n    {\n      Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);\n      Index actual_k2 = IsLower ? k2 : k2-actual_kc;\n\n      // align blocks with the end of the triangular part for trapezoidal rhs\n      if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))\n      {\n        actual_kc = cols-k2;\n        k2 = actual_k2 + actual_kc - kc;\n      }\n\n      // remaining size\n      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;\n      // size of the triangular part\n      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;\n\n      Scalar* geb = blockB+ts*ts;\n      geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));\n\n      pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);\n\n      // pack the triangular part of the rhs padding the unrolled blocks with zeros\n      if(ts>0)\n      {\n        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)\n        {\n          Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);\n          Index actual_j2 = actual_k2 + j2;\n          Index panelOffset = IsLower ? j2+actualPanelWidth : 0;\n          Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;\n          // general part\n          pack_rhs_panel(blockB+j2*actual_kc,\n                         rhs.getSubMapper(actual_k2+panelOffset, actual_j2),\n                         panelLength, actualPanelWidth,\n                         actual_kc, panelOffset);\n\n          // append the triangular part via a temporary buffer\n          for (Index j=0;j<actualPanelWidth;++j)\n          {\n            if (SetDiag)\n              triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);\n            for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)\n              triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);\n          }\n\n          pack_rhs_panel(blockB+j2*actual_kc,\n                         RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),\n                         actualPanelWidth, actualPanelWidth,\n                         actual_kc, j2);\n        }\n      }\n\n      for (Index i2=0; i2<rows; i2+=mc)\n      {\n        const Index actual_mc = (std::min)(mc,rows-i2);\n        pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);\n\n        // triangular kernel\n        if(ts>0)\n        {\n          for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)\n          {\n            Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);\n            Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;\n            Index blockOffset = IsLower ? j2 : 0;\n\n            gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),\n                        blockA, blockB+j2*actual_kc,\n                        actual_mc, panelLength, actualPanelWidth,\n                        alpha,\n                        actual_kc, actual_kc,  // strides\n                        blockOffset, blockOffset);// offsets\n          }\n        }\n        gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),\n                    blockA, geb, actual_mc, actual_kc, rs,\n                    alpha,\n                    -1, -1, 0, 0);\n      }\n    }\n  }\n\n/***************************************************************************\n* Wrapper to product_triangular_matrix_matrix\n***************************************************************************/\n\n} // end namespace internal\n\nnamespace internal {\ntemplate<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>\nstruct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>\n{\n  template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)\n  {\n    typedef typename Lhs::Scalar  LhsScalar;\n    typedef typename Rhs::Scalar  RhsScalar;\n    typedef typename Dest::Scalar Scalar;\n    \n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n    typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;\n    \n    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);\n    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);\n\n    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);\n    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);\n    Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;\n\n    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,\n              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;\n\n    enum { IsLower = (Mode&Lower) == Lower };\n    Index stripedRows  = ((!LhsIsTriangular) || (IsLower))  ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());\n    Index stripedCols  = ((LhsIsTriangular)  || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());\n    Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))\n                                         : ((IsLower)  ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));\n\n    BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);\n\n    internal::product_triangular_matrix_matrix<Scalar, Index,\n      Mode, LhsIsTriangular,\n      (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,\n      (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,\n      (internal::traits<Dest          >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>\n      ::run(\n        stripedRows, stripedCols, stripedDepth,   // sizes\n        &lhs.coeffRef(0,0), lhs.outerStride(),    // lhs info\n        &rhs.coeffRef(0,0), rhs.outerStride(),    // rhs info\n        &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(),    // result info\n        actualAlpha, blocking\n      );\n\n    // Apply correction if the diagonal is unit and a scalar factor was nested:\n    if ((Mode&UnitDiag)==UnitDiag)\n    {\n      if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))\n      {\n        Index diagSize = (std::min)(lhs.rows(),lhs.cols());\n        dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);\n      }\n      else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))\n      {\n        Index diagSize = (std::min)(rhs.rows(),rhs.cols());\n        dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);\n      }\n    }\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   Triangular matrix * matrix product functionality based on ?TRMM.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H\n#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n\ntemplate <typename Scalar, typename Index,\n          int Mode, bool LhsIsTriangular,\n          int LhsStorageOrder, bool ConjugateLhs,\n          int RhsStorageOrder, bool ConjugateRhs,\n          int ResStorageOrder>\nstruct product_triangular_matrix_matrix_trmm :\n       product_triangular_matrix_matrix<Scalar,Index,Mode,\n          LhsIsTriangular,LhsStorageOrder,ConjugateLhs,\n          RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {};\n\n\n// try to go to BLAS specialization\n#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \\\ntemplate <typename Index, int Mode, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \\\n           LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \\\n  static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\\\n    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \\\n      EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \\\n      eigen_assert(resIncr == 1); \\\n      product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \\\n        LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \\\n        RhsStorageOrder, ConjugateRhs, ColMajor>::run( \\\n          _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \\\n  } \\\n};\n\nEIGEN_BLAS_TRMM_SPECIALIZE(double, true)\nEIGEN_BLAS_TRMM_SPECIALIZE(double, false)\nEIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true)\nEIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false)\nEIGEN_BLAS_TRMM_SPECIALIZE(float, true)\nEIGEN_BLAS_TRMM_SPECIALIZE(float, false)\nEIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)\nEIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)\n\n// implements col-major += alpha * op(triangular) * op(general)\n#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \\\ntemplate <typename Index, int Mode, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \\\n         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \\\n{ \\\n  enum { \\\n    IsLower = (Mode&Lower) == Lower, \\\n    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \\\n    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \\\n    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \\\n    LowUp = IsLower ? Lower : Upper, \\\n    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \\\n  }; \\\n\\\n  static void run( \\\n    Index _rows, Index _cols, Index _depth, \\\n    const EIGTYPE* _lhs, Index lhsStride, \\\n    const EIGTYPE* _rhs, Index rhsStride, \\\n    EIGTYPE* res,        Index resStride, \\\n    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \\\n  { \\\n   Index diagSize  = (std::min)(_rows,_depth); \\\n   Index rows      = IsLower ? _rows : diagSize; \\\n   Index depth     = IsLower ? diagSize : _depth; \\\n   Index cols      = _cols; \\\n\\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \\\n\\\n/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \\\n   if (rows != depth) { \\\n\\\n     /* FIXME handle mkl_domain_get_max_threads */ \\\n     /*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\\\n\\\n     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \\\n     /* Most likely no benefit to call TRMM or GEMM from BLAS */ \\\n       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \\\n       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \\\n           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \\\n     /*std::cout << \"TRMM_L: A is not square! Go to Eigen TRMM implementation!\\n\";*/ \\\n     } else { \\\n     /* Make sense to call GEMM */ \\\n       Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \\\n       MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \\\n       BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \\\n       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \\\n       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \\\n       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \\\n\\\n     /*std::cout << \"TRMM_L: A is not square! Go to BLAS GEMM implementation! \" << nthr<<\" \\n\";*/ \\\n     } \\\n     return; \\\n   } \\\n   char side = 'L', transa, uplo, diag = 'N'; \\\n   EIGTYPE *b; \\\n   const EIGTYPE *a; \\\n   BlasIndex m, n, lda, ldb; \\\n\\\n/* Set m, n */ \\\n   m = convert_index<BlasIndex>(diagSize); \\\n   n = convert_index<BlasIndex>(cols); \\\n\\\n/* Set trans */ \\\n   transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \\\n\\\n/* Set b, ldb */ \\\n   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \\\n   MatrixX##EIGPREFIX b_tmp; \\\n\\\n   if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \\\n   b = b_tmp.data(); \\\n   ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n\\\n/* Set uplo */ \\\n   uplo = IsLower ? 'L' : 'U'; \\\n   if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \\\n/* Set a, lda */ \\\n   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \\\n   MatrixLhs a_tmp; \\\n\\\n   if ((conjA!=0) || (SetDiag==0)) { \\\n     if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \\\n     if (IsZeroDiag) \\\n       a_tmp.diagonal().setZero(); \\\n     else if (IsUnitDiag) \\\n       a_tmp.diagonal().setOnes();\\\n     a = a_tmp.data(); \\\n     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n   } else { \\\n     a = _lhs; \\\n     lda = convert_index<BlasIndex>(lhsStride); \\\n   } \\\n   /*std::cout << \"TRMM_L: A is square! Go to BLAS TRMM implementation! \\n\";*/ \\\n/* call ?trmm*/ \\\n   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \\\n\\\n/* Add op(a_triangular)*b into res*/ \\\n   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \\\n   res_tmp=res_tmp+b_tmp; \\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_TRMM_L(double, double, d, dtrmm)\nEIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)\nEIGEN_BLAS_TRMM_L(float, float, f, strmm)\nEIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)\n#else\nEIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)\nEIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)\nEIGEN_BLAS_TRMM_L(float, float, f, strmm_)\nEIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)\n#endif\n\n// implements col-major += alpha * op(general) * op(triangular)\n#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \\\ntemplate <typename Index, int Mode, \\\n          int LhsStorageOrder, bool ConjugateLhs, \\\n          int RhsStorageOrder, bool ConjugateRhs> \\\nstruct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \\\n         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \\\n{ \\\n  enum { \\\n    IsLower = (Mode&Lower) == Lower, \\\n    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \\\n    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \\\n    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \\\n    LowUp = IsLower ? Lower : Upper, \\\n    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \\\n  }; \\\n\\\n  static void run( \\\n    Index _rows, Index _cols, Index _depth, \\\n    const EIGTYPE* _lhs, Index lhsStride, \\\n    const EIGTYPE* _rhs, Index rhsStride, \\\n    EIGTYPE* res,        Index resStride, \\\n    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \\\n  { \\\n   Index diagSize  = (std::min)(_cols,_depth); \\\n   Index rows      = _rows; \\\n   Index depth     = IsLower ? _depth : diagSize; \\\n   Index cols      = IsLower ? diagSize : _cols; \\\n\\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \\\n\\\n/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \\\n   if (cols != depth) { \\\n\\\n     int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \\\n\\\n     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \\\n     /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \\\n       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \\\n       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \\\n           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \\\n       /*std::cout << \"TRMM_R: A is not square! Go to Eigen TRMM implementation!\\n\";*/ \\\n     } else { \\\n     /* Make sense to call GEMM */ \\\n       Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \\\n       MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \\\n       BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \\\n       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \\\n       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \\\n       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \\\n\\\n     /*std::cout << \"TRMM_R: A is not square! Go to BLAS GEMM implementation! \" << nthr<<\" \\n\";*/ \\\n     } \\\n     return; \\\n   } \\\n   char side = 'R', transa, uplo, diag = 'N'; \\\n   EIGTYPE *b; \\\n   const EIGTYPE *a; \\\n   BlasIndex m, n, lda, ldb; \\\n\\\n/* Set m, n */ \\\n   m = convert_index<BlasIndex>(rows); \\\n   n = convert_index<BlasIndex>(diagSize); \\\n\\\n/* Set trans */ \\\n   transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \\\n\\\n/* Set b, ldb */ \\\n   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \\\n   MatrixX##EIGPREFIX b_tmp; \\\n\\\n   if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \\\n   b = b_tmp.data(); \\\n   ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \\\n\\\n/* Set uplo */ \\\n   uplo = IsLower ? 'L' : 'U'; \\\n   if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \\\n/* Set a, lda */ \\\n   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \\\n   MatrixRhs a_tmp; \\\n\\\n   if ((conjA!=0) || (SetDiag==0)) { \\\n     if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \\\n     if (IsZeroDiag) \\\n       a_tmp.diagonal().setZero(); \\\n     else if (IsUnitDiag) \\\n       a_tmp.diagonal().setOnes();\\\n     a = a_tmp.data(); \\\n     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n   } else { \\\n     a = _rhs; \\\n     lda = convert_index<BlasIndex>(rhsStride); \\\n   } \\\n   /*std::cout << \"TRMM_R: A is square! Go to BLAS TRMM implementation! \\n\";*/ \\\n/* call ?trmm*/ \\\n   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \\\n\\\n/* Add op(a_triangular)*b into res*/ \\\n   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \\\n   res_tmp=res_tmp+b_tmp; \\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_TRMM_R(double, double, d, dtrmm)\nEIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)\nEIGEN_BLAS_TRMM_R(float, float, f, strmm)\nEIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)\n#else\nEIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)\nEIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)\nEIGEN_BLAS_TRMM_R(float, float, f, strmm_)\nEIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)\n#endif\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularMatrixVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H\n#define EIGEN_TRIANGULARMATRIXVECTOR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>\nstruct triangular_matrix_vector_product;\n\ntemplate<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>\nstruct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n  enum {\n    IsLower = ((Mode&Lower)==Lower),\n    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,\n    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag\n  };\n  static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,\n                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha);\n};\n\ntemplate<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>\nEIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>\n  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,\n        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha)\n  {\n    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;\n    Index size = (std::min)(_rows,_cols);\n    Index rows = IsLower ? _rows : (std::min)(_rows,_cols);\n    Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;\n\n    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;\n    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));\n    typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);\n\n    typedef Map<const Matrix<RhsScalar,Dynamic,1>, 0, InnerStride<> > RhsMap;\n    const RhsMap rhs(_rhs,cols,InnerStride<>(rhsIncr));\n    typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);\n\n    typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;\n    ResMap res(_res,rows);\n\n    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;\n\n    for (Index pi=0; pi<size; pi+=PanelWidth)\n    {\n      Index actualPanelWidth = (std::min)(PanelWidth, size-pi);\n      for (Index k=0; k<actualPanelWidth; ++k)\n      {\n        Index i = pi + k;\n        Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;\n        Index r = IsLower ? actualPanelWidth-k : k+1;\n        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)\n          res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);\n        if (HasUnitDiag)\n          res.coeffRef(i) += alpha * cjRhs.coeff(i);\n      }\n      Index r = IsLower ? rows - pi - actualPanelWidth : pi;\n      if (r>0)\n      {\n        Index s = IsLower ? pi+actualPanelWidth : 0;\n        general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(\n            r, actualPanelWidth,\n            LhsMapper(&lhs.coeffRef(s,pi), lhsStride),\n            RhsMapper(&rhs.coeffRef(pi), rhsIncr),\n            &res.coeffRef(s), resIncr, alpha);\n      }\n    }\n    if((!IsLower) && cols>size)\n    {\n      general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(\n          rows, cols-size,\n          LhsMapper(&lhs.coeffRef(0,size), lhsStride),\n          RhsMapper(&rhs.coeffRef(size), rhsIncr),\n          _res, resIncr, alpha);\n    }\n  }\n\ntemplate<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>\nstruct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>\n{\n  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;\n  enum {\n    IsLower = ((Mode&Lower)==Lower),\n    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,\n    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag\n  };\n  static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,\n                                    const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);\n};\n\ntemplate<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>\nEIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>\n  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,\n        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)\n  {\n    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;\n    Index diagSize = (std::min)(_rows,_cols);\n    Index rows = IsLower ? _rows : diagSize;\n    Index cols = IsLower ? diagSize : _cols;\n\n    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;\n    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));\n    typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);\n\n    typedef Map<const Matrix<RhsScalar,Dynamic,1> > RhsMap;\n    const RhsMap rhs(_rhs,cols);\n    typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);\n\n    typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;\n    ResMap res(_res,rows,InnerStride<>(resIncr));\n\n    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;\n\n    for (Index pi=0; pi<diagSize; pi+=PanelWidth)\n    {\n      Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);\n      for (Index k=0; k<actualPanelWidth; ++k)\n      {\n        Index i = pi + k;\n        Index s = IsLower ? pi  : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);\n        Index r = IsLower ? k+1 : actualPanelWidth-k;\n        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)\n          res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();\n        if (HasUnitDiag)\n          res.coeffRef(i) += alpha * cjRhs.coeff(i);\n      }\n      Index r = IsLower ? pi : cols - pi - actualPanelWidth;\n      if (r>0)\n      {\n        Index s = IsLower ? 0 : pi + actualPanelWidth;\n        general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(\n            actualPanelWidth, r,\n            LhsMapper(&lhs.coeffRef(pi,s), lhsStride),\n            RhsMapper(&rhs.coeffRef(s), rhsIncr),\n            &res.coeffRef(pi), resIncr, alpha);\n      }\n    }\n    if(IsLower && rows>diagSize)\n    {\n      general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(\n            rows-diagSize, cols,\n            LhsMapper(&lhs.coeffRef(diagSize,0), lhsStride),\n            RhsMapper(&rhs.coeffRef(0), rhsIncr),\n            &res.coeffRef(diagSize), resIncr, alpha);\n    }\n  }\n\n/***************************************************************************\n* Wrapper to product_triangular_vector\n***************************************************************************/\n\ntemplate<int Mode,int StorageOrder>\nstruct trmv_selector;\n\n} // end namespace internal\n\nnamespace internal {\n\ntemplate<int Mode, typename Lhs, typename Rhs>\nstruct triangular_product_impl<Mode,true,Lhs,false,Rhs,true>\n{\n  template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)\n  {\n    eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());\n  \n    internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha);\n  }\n};\n\ntemplate<int Mode, typename Lhs, typename Rhs>\nstruct triangular_product_impl<Mode,false,Lhs,true,Rhs,false>\n{\n  template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)\n  {\n    eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());\n\n    Transpose<Dest> dstT(dst);\n    internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),\n                            (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>\n            ::run(rhs.transpose(),lhs.transpose(), dstT, alpha);\n  }\n};\n\n} // end namespace internal\n\nnamespace internal {\n\n// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.\n  \ntemplate<int Mode> struct trmv_selector<Mode,ColMajor>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    typedef typename Lhs::Scalar      LhsScalar;\n    typedef typename Rhs::Scalar      RhsScalar;\n    typedef typename Dest::Scalar     ResScalar;\n    typedef typename Dest::RealScalar RealScalar;\n    \n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n    \n    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;\n\n    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);\n    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);\n\n    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);\n    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);\n    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;\n\n    enum {\n      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1\n      // on, the other hand it is good for the cache to pack the vector anyways...\n      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,\n      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),\n      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal\n    };\n\n    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;\n\n    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));\n    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;\n\n    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);\n\n    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),\n                                                  evalToDest ? dest.data() : static_dest.data());\n\n    if(!evalToDest)\n    {\n      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      Index size = dest.size();\n      EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      #endif\n      if(!alphaIsCompatible)\n      {\n        MappedDest(actualDestPtr, dest.size()).setZero();\n        compatibleAlpha = RhsScalar(1);\n      }\n      else\n        MappedDest(actualDestPtr, dest.size()) = dest;\n    }\n\n    internal::triangular_matrix_vector_product\n      <Index,Mode,\n       LhsScalar, LhsBlasTraits::NeedToConjugate,\n       RhsScalar, RhsBlasTraits::NeedToConjugate,\n       ColMajor>\n      ::run(actualLhs.rows(),actualLhs.cols(),\n            actualLhs.data(),actualLhs.outerStride(),\n            actualRhs.data(),actualRhs.innerStride(),\n            actualDestPtr,1,compatibleAlpha);\n\n    if (!evalToDest)\n    {\n      if(!alphaIsCompatible)\n        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());\n      else\n        dest = MappedDest(actualDestPtr, dest.size());\n    }\n\n    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )\n    {\n      Index diagSize = (std::min)(lhs.rows(),lhs.cols());\n      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);\n    }\n  }\n};\n\ntemplate<int Mode> struct trmv_selector<Mode,RowMajor>\n{\n  template<typename Lhs, typename Rhs, typename Dest>\n  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)\n  {\n    typedef typename Lhs::Scalar      LhsScalar;\n    typedef typename Rhs::Scalar      RhsScalar;\n    typedef typename Dest::Scalar     ResScalar;\n    \n    typedef internal::blas_traits<Lhs> LhsBlasTraits;\n    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;\n    typedef internal::blas_traits<Rhs> RhsBlasTraits;\n    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;\n    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;\n\n    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);\n    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);\n\n    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);\n    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);\n    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;\n\n    enum {\n      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1\n    };\n\n    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;\n\n    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),\n        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());\n\n    if(!DirectlyUseRhs)\n    {\n      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      Index size = actualRhs.size();\n      EIGEN_DENSE_STORAGE_CTOR_PLUGIN\n      #endif\n      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;\n    }\n\n    internal::triangular_matrix_vector_product\n      <Index,Mode,\n       LhsScalar, LhsBlasTraits::NeedToConjugate,\n       RhsScalar, RhsBlasTraits::NeedToConjugate,\n       RowMajor>\n      ::run(actualLhs.rows(),actualLhs.cols(),\n            actualLhs.data(),actualLhs.outerStride(),\n            actualRhsPtr,1,\n            dest.data(),dest.innerStride(),\n            actualAlpha);\n\n    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )\n    {\n      Index diagSize = (std::min)(lhs.rows(),lhs.cols());\n      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);\n    }\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULARMATRIXVECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   Triangular matrix-vector product functionality based on ?TRMV.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H\n#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/**********************************************************************\n* This file implements triangular matrix-vector multiplication using BLAS\n**********************************************************************/\n\n// trmv/hemv specialization\n\ntemplate<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>\nstruct triangular_matrix_vector_product_trmv :\n  triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};\n\n#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \\\ntemplate<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \\\nstruct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \\\n static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \\\n                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \\\n      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \\\n        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \\\n  } \\\n}; \\\ntemplate<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \\\nstruct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \\\n static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \\\n                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \\\n      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \\\n        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \\\n  } \\\n};\n\nEIGEN_BLAS_TRMV_SPECIALIZE(double)\nEIGEN_BLAS_TRMV_SPECIALIZE(float)\nEIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)\nEIGEN_BLAS_TRMV_SPECIALIZE(scomplex)\n\n// implements col-major: res += alpha * op(triangular) * vector\n#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \\\ntemplate<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \\\nstruct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \\\n  enum { \\\n    IsLower = (Mode&Lower) == Lower, \\\n    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \\\n    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \\\n    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \\\n    LowUp = IsLower ? Lower : Upper \\\n  }; \\\n static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \\\n                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \\\n { \\\n   if (ConjLhs || IsZeroDiag) { \\\n     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \\\n       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \\\n     return; \\\n   }\\\n   Index size = (std::min)(_rows,_cols); \\\n   Index rows = IsLower ? _rows : size; \\\n   Index cols = IsLower ? size : _cols; \\\n\\\n   typedef VectorX##EIGPREFIX VectorRhs; \\\n   EIGTYPE *x, *y;\\\n\\\n/* Set x*/ \\\n   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \\\n   VectorRhs x_tmp; \\\n   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \\\n   x = x_tmp.data(); \\\n\\\n/* Square part handling */\\\n\\\n   char trans, uplo, diag; \\\n   BlasIndex m, n, lda, incx, incy; \\\n   EIGTYPE const *a; \\\n   EIGTYPE beta(1); \\\n\\\n/* Set m, n */ \\\n   n = convert_index<BlasIndex>(size); \\\n   lda = convert_index<BlasIndex>(lhsStride); \\\n   incx = 1; \\\n   incy = convert_index<BlasIndex>(resIncr); \\\n\\\n/* Set uplo, trans and diag*/ \\\n   trans = 'N'; \\\n   uplo = IsLower ? 'L' : 'U'; \\\n   diag = IsUnitDiag ? 'U' : 'N'; \\\n\\\n/* call ?TRMV*/ \\\n   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \\\n\\\n/* Add op(a_tr)rhs into res*/ \\\n   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \\\n/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \\\n   if (size<(std::max)(rows,cols)) { \\\n     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \\\n     x = x_tmp.data(); \\\n     if (size<rows) { \\\n       y = _res + size*resIncr; \\\n       a = _lhs + size; \\\n       m = convert_index<BlasIndex>(rows-size); \\\n       n = convert_index<BlasIndex>(size); \\\n     } \\\n     else { \\\n       x += size; \\\n       y = _res; \\\n       a = _lhs + size*lda; \\\n       m = convert_index<BlasIndex>(size); \\\n       n = convert_index<BlasIndex>(cols-size); \\\n     } \\\n     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \\\n   } \\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_TRMV_CM(double,   double, d,  d,)\nEIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)\nEIGEN_BLAS_TRMV_CM(float,    float,  f,  s,)\nEIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8,  cf, c,)\n#else\nEIGEN_BLAS_TRMV_CM(double,   double, d,  d, _)\nEIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)\nEIGEN_BLAS_TRMV_CM(float,    float,  f,  s, _)\nEIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c, _)\n#endif\n\n// implements row-major: res += alpha * op(triangular) * vector\n#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \\\ntemplate<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \\\nstruct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \\\n  enum { \\\n    IsLower = (Mode&Lower) == Lower, \\\n    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \\\n    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \\\n    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \\\n    LowUp = IsLower ? Lower : Upper \\\n  }; \\\n static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \\\n                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \\\n { \\\n   if (IsZeroDiag) { \\\n     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \\\n       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \\\n     return; \\\n   }\\\n   Index size = (std::min)(_rows,_cols); \\\n   Index rows = IsLower ? _rows : size; \\\n   Index cols = IsLower ? size : _cols; \\\n\\\n   typedef VectorX##EIGPREFIX VectorRhs; \\\n   EIGTYPE *x, *y;\\\n\\\n/* Set x*/ \\\n   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \\\n   VectorRhs x_tmp; \\\n   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \\\n   x = x_tmp.data(); \\\n\\\n/* Square part handling */\\\n\\\n   char trans, uplo, diag; \\\n   BlasIndex m, n, lda, incx, incy; \\\n   EIGTYPE const *a; \\\n   EIGTYPE beta(1); \\\n\\\n/* Set m, n */ \\\n   n = convert_index<BlasIndex>(size); \\\n   lda = convert_index<BlasIndex>(lhsStride); \\\n   incx = 1; \\\n   incy = convert_index<BlasIndex>(resIncr); \\\n\\\n/* Set uplo, trans and diag*/ \\\n   trans = ConjLhs ? 'C' : 'T'; \\\n   uplo = IsLower ? 'U' : 'L'; \\\n   diag = IsUnitDiag ? 'U' : 'N'; \\\n\\\n/* call ?TRMV*/ \\\n   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \\\n\\\n/* Add op(a_tr)rhs into res*/ \\\n   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \\\n/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \\\n   if (size<(std::max)(rows,cols)) { \\\n     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \\\n     x = x_tmp.data(); \\\n     if (size<rows) { \\\n       y = _res + size*resIncr; \\\n       a = _lhs + size*lda; \\\n       m = convert_index<BlasIndex>(rows-size); \\\n       n = convert_index<BlasIndex>(size); \\\n     } \\\n     else { \\\n       x += size; \\\n       y = _res; \\\n       a = _lhs + size; \\\n       m = convert_index<BlasIndex>(size); \\\n       n = convert_index<BlasIndex>(cols-size); \\\n     } \\\n     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \\\n   } \\\n  } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_TRMV_RM(double,   double, d,  d,)\nEIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)\nEIGEN_BLAS_TRMV_RM(float,    float,  f,  s,)\nEIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8,  cf, c,)\n#else\nEIGEN_BLAS_TRMV_RM(double,   double, d,  d,_)\nEIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)\nEIGEN_BLAS_TRMV_RM(float,    float,  f,  s,_)\nEIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c,_)\n#endif\n\n} // end namespase internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularSolverMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H\n#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n// if the rhs is row major, let's transpose the product\ntemplate <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>\nstruct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>\n{\n  static void run(\n    Index size, Index cols,\n    const Scalar*  tri, Index triStride,\n    Scalar* _other, Index otherIncr, Index otherStride,\n    level3_blocking<Scalar,Scalar>& blocking)\n  {\n    triangular_solve_matrix<\n      Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,\n      (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),\n      NumTraits<Scalar>::IsComplex && Conjugate,\n      TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>\n      ::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);\n  }\n};\n\n/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left\n */\ntemplate <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>\nstruct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>\n{\n  static EIGEN_DONT_INLINE void run(\n    Index size, Index otherSize,\n    const Scalar* _tri, Index triStride,\n    Scalar* _other, Index otherIncr, Index otherStride,\n    level3_blocking<Scalar,Scalar>& blocking);\n};\ntemplate <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>\nEIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(\n    Index size, Index otherSize,\n    const Scalar* _tri, Index triStride,\n    Scalar* _other, Index otherIncr, Index otherStride,\n    level3_blocking<Scalar,Scalar>& blocking)\n  {\n    Index cols = otherSize;\n\n    typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;\n    typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;\n    TriMapper tri(_tri, triStride);\n    OtherMapper other(_other, otherStride, otherIncr);\n\n    typedef gebp_traits<Scalar,Scalar> Traits;\n\n    enum {\n      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),\n      IsLower = (Mode&Lower) == Lower\n    };\n\n    Index kc = blocking.kc();                   // cache block size along the K direction\n    Index mc = (std::min)(size,blocking.mc());  // cache block size along the M direction\n\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*cols;\n\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());\n\n    conj_if<Conjugate> conj;\n    gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;\n    gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, TriStorageOrder> pack_lhs;\n    gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;\n\n    // the goal here is to subdivise the Rhs panels such that we keep some cache\n    // coherence when accessing the rhs elements\n    std::ptrdiff_t l1, l2, l3;\n    manage_caching_sizes(GetAction, &l1, &l2, &l3);\n    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;\n    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);\n\n    for(Index k2=IsLower ? 0 : size;\n        IsLower ? k2<size : k2>0;\n        IsLower ? k2+=kc : k2-=kc)\n    {\n      const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);\n\n      // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,\n      // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into\n      // A11 (the triangular part) and A21 the remaining rectangular part.\n      // Then the high level algorithm is:\n      //  - B = R1                    => general block copy (done during the next step)\n      //  - R1 = A11^-1 B             => tricky part\n      //  - update B from the new R1  => actually this has to be performed continuously during the above step\n      //  - R2 -= A21 * B             => GEPP\n\n      // The tricky part: compute R1 = A11^-1 B while updating B from R1\n      // The idea is to split A11 into multiple small vertical panels.\n      // Each panel can be split into a small triangular part T1k which is processed without optimization,\n      // and the remaining small part T2k which is processed using gebp with appropriate block strides\n      for(Index j2=0; j2<cols; j2+=subcols)\n      {\n        Index actual_cols = (std::min)(cols-j2,subcols);\n        // for each small vertical panels [T1k^T, T2k^T]^T of lhs\n        for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)\n        {\n          Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);\n          // tr solve\n          for (Index k=0; k<actualPanelWidth; ++k)\n          {\n            // TODO write a small kernel handling this (can be shared with trsv)\n            Index i  = IsLower ? k2+k1+k : k2-k1-k-1;\n            Index rs = actualPanelWidth - k - 1; // remaining size\n            Index s  = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)\n                                                 :  IsLower ? i+1 : i-rs;\n\n            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));\n            for (Index j=j2; j<j2+actual_cols; ++j)\n            {\n              if (TriStorageOrder==RowMajor)\n              {\n                Scalar b(0);\n                const Scalar* l = &tri(i,s);\n                typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);\n                for (Index i3=0; i3<k; ++i3)\n                  b += conj(l[i3]) * r(i3);\n\n                other(i,j) = (other(i,j) - b)*a;\n              }\n              else\n              {\n                Scalar& otherij = other(i,j);\n                otherij *= a;\n                Scalar b = otherij;\n                typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);\n                typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);\n                for (Index i3=0;i3<rs;++i3)\n                  r(i3) -= b * conj(l(i3));\n              }\n            }\n          }\n\n          Index lengthTarget = actual_kc-k1-actualPanelWidth;\n          Index startBlock   = IsLower ? k2+k1 : k2-k1-actualPanelWidth;\n          Index blockBOffset = IsLower ? k1 : lengthTarget;\n\n          // update the respective rows of B from other\n          pack_rhs(blockB+actual_kc*j2, other.getSubMapper(startBlock,j2), actualPanelWidth, actual_cols, actual_kc, blockBOffset);\n\n          // GEBP\n          if (lengthTarget>0)\n          {\n            Index startTarget  = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;\n\n            pack_lhs(blockA, tri.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);\n\n            gebp_kernel(other.getSubMapper(startTarget,j2), blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),\n                        actualPanelWidth, actual_kc, 0, blockBOffset);\n          }\n        }\n      }\n      \n      // R2 -= A21 * B => GEPP\n      {\n        Index start = IsLower ? k2+kc : 0;\n        Index end   = IsLower ? size : k2-kc;\n        for(Index i2=start; i2<end; i2+=mc)\n        {\n          const Index actual_mc = (std::min)(mc,end-i2);\n          if (actual_mc>0)\n          {\n            pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);\n\n            gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);\n          }\n        }\n      }\n    }\n  }\n\n/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right\n */\ntemplate <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>\nstruct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>\n{\n  static EIGEN_DONT_INLINE void run(\n    Index size, Index otherSize,\n    const Scalar* _tri, Index triStride,\n    Scalar* _other, Index otherIncr, Index otherStride,\n    level3_blocking<Scalar,Scalar>& blocking);\n};\ntemplate <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>\nEIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(\n    Index size, Index otherSize,\n    const Scalar* _tri, Index triStride,\n    Scalar* _other, Index otherIncr, Index otherStride,\n    level3_blocking<Scalar,Scalar>& blocking)\n  {\n    Index rows = otherSize;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;\n    typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;\n    LhsMapper lhs(_other, otherStride, otherIncr);\n    RhsMapper rhs(_tri, triStride);\n\n    typedef gebp_traits<Scalar,Scalar> Traits;\n    enum {\n      RhsStorageOrder   = TriStorageOrder,\n      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),\n      IsLower = (Mode&Lower) == Lower\n    };\n\n    Index kc = blocking.kc();                   // cache block size along the K direction\n    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction\n\n    std::size_t sizeA = kc*mc;\n    std::size_t sizeB = kc*size;\n\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());\n    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());\n\n    conj_if<Conjugate> conj;\n    gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;\n    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;\n    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;\n    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor, false, true> pack_lhs_panel;\n\n    for(Index k2=IsLower ? size : 0;\n        IsLower ? k2>0 : k2<size;\n        IsLower ? k2-=kc : k2+=kc)\n    {\n      const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);\n      Index actual_k2 = IsLower ? k2-actual_kc : k2 ;\n\n      Index startPanel = IsLower ? 0 : k2+actual_kc;\n      Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;\n      Scalar* geb = blockB+actual_kc*actual_kc;\n\n      if (rs>0) pack_rhs(geb, rhs.getSubMapper(actual_k2,startPanel), actual_kc, rs);\n\n      // triangular packing (we only pack the panels off the diagonal,\n      // neglecting the blocks overlapping the diagonal\n      {\n        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)\n        {\n          Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);\n          Index actual_j2 = actual_k2 + j2;\n          Index panelOffset = IsLower ? j2+actualPanelWidth : 0;\n          Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;\n\n          if (panelLength>0)\n          pack_rhs_panel(blockB+j2*actual_kc,\n                         rhs.getSubMapper(actual_k2+panelOffset, actual_j2),\n                         panelLength, actualPanelWidth,\n                         actual_kc, panelOffset);\n        }\n      }\n\n      for(Index i2=0; i2<rows; i2+=mc)\n      {\n        const Index actual_mc = (std::min)(mc,rows-i2);\n\n        // triangular solver kernel\n        {\n          // for each small block of the diagonal (=> vertical panels of rhs)\n          for (Index j2 = IsLower\n                      ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth)\n                                                                  : Index(SmallPanelWidth)))\n                      : 0;\n               IsLower ? j2>=0 : j2<actual_kc;\n               IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)\n          {\n            Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);\n            Index absolute_j2 = actual_k2 + j2;\n            Index panelOffset = IsLower ? j2+actualPanelWidth : 0;\n            Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;\n\n            // GEBP\n            if(panelLength>0)\n            {\n              gebp_kernel(lhs.getSubMapper(i2,absolute_j2),\n                          blockA, blockB+j2*actual_kc,\n                          actual_mc, panelLength, actualPanelWidth,\n                          Scalar(-1),\n                          actual_kc, actual_kc, // strides\n                          panelOffset, panelOffset); // offsets\n            }\n\n            // unblocked triangular solve\n            for (Index k=0; k<actualPanelWidth; ++k)\n            {\n              Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;\n\n              typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);\n              for (Index k3=0; k3<k; ++k3)\n              {\n                Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));\n                typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);\n                for (Index i=0; i<actual_mc; ++i)\n                  r(i) -= a(i) * b;\n              }\n              if((Mode & UnitDiag)==0)\n              {\n                Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));\n                for (Index i=0; i<actual_mc; ++i)\n                  r(i) *= inv_rjj;\n              }\n            }\n\n            // pack the just computed part of lhs to A\n            pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),\n                           actualPanelWidth, actual_mc,\n                           actual_kc, j2);\n          }\n        }\n\n        if (rs>0)\n          gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,\n                      actual_mc, actual_kc, rs, Scalar(-1),\n                      -1, -1, 0, 0);\n      }\n    }\n  }\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to BLAS F77\n *   Triangular matrix * matrix product functionality based on ?TRMM.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H\n#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n// implements LeftSide op(triangular)^-1 * general\n#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \\\ntemplate <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \\\nstruct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \\\n{ \\\n  enum { \\\n    IsLower = (Mode&Lower) == Lower, \\\n    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \\\n    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \\\n    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \\\n  }; \\\n  static void run( \\\n      Index size, Index otherSize, \\\n      const EIGTYPE* _tri, Index triStride, \\\n      EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \\\n  { \\\n   EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \\\n   eigen_assert(otherIncr == 1); \\\n   BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \\\n   char side = 'L', uplo, diag='N', transa; \\\n   /* Set alpha_ */ \\\n   EIGTYPE alpha(1); \\\n   ldb = convert_index<BlasIndex>(otherStride);\\\n\\\n   const EIGTYPE *a; \\\n/* Set trans */ \\\n   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \\\n/* Set uplo */ \\\n   uplo = IsLower ? 'L' : 'U'; \\\n   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \\\n/* Set a, lda */ \\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \\\n   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \\\n   MatrixTri a_tmp; \\\n\\\n   if (conjA) { \\\n     a_tmp = tri.conjugate(); \\\n     a = a_tmp.data(); \\\n     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n   } else { \\\n     a = _tri; \\\n     lda = convert_index<BlasIndex>(triStride); \\\n   } \\\n   if (IsUnitDiag) diag='U'; \\\n/* call ?trsm*/ \\\n   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \\\n } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_TRSM_L(double,   double, dtrsm)\nEIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)\nEIGEN_BLAS_TRSM_L(float,    float,  strsm)\nEIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)\n#else\nEIGEN_BLAS_TRSM_L(double,   double, dtrsm_)\nEIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)\nEIGEN_BLAS_TRSM_L(float,    float,  strsm_)\nEIGEN_BLAS_TRSM_L(scomplex, float,  ctrsm_)\n#endif\n\n// implements RightSide general * op(triangular)^-1\n#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \\\ntemplate <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \\\nstruct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \\\n{ \\\n  enum { \\\n    IsLower = (Mode&Lower) == Lower, \\\n    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \\\n    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \\\n    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \\\n  }; \\\n  static void run( \\\n      Index size, Index otherSize, \\\n      const EIGTYPE* _tri, Index triStride, \\\n      EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \\\n  { \\\n   EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \\\n   eigen_assert(otherIncr == 1); \\\n   BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \\\n   char side = 'R', uplo, diag='N', transa; \\\n   /* Set alpha_ */ \\\n   EIGTYPE alpha(1); \\\n   ldb = convert_index<BlasIndex>(otherStride);\\\n\\\n   const EIGTYPE *a; \\\n/* Set trans */ \\\n   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \\\n/* Set uplo */ \\\n   uplo = IsLower ? 'L' : 'U'; \\\n   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \\\n/* Set a, lda */ \\\n   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \\\n   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \\\n   MatrixTri a_tmp; \\\n\\\n   if (conjA) { \\\n     a_tmp = tri.conjugate(); \\\n     a = a_tmp.data(); \\\n     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \\\n   } else { \\\n     a = _tri; \\\n     lda = convert_index<BlasIndex>(triStride); \\\n   } \\\n   if (IsUnitDiag) diag='U'; \\\n/* call ?trsm*/ \\\n   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \\\n   /*std::cout << \"TRMS_L specialization!\\n\";*/ \\\n } \\\n};\n\n#ifdef EIGEN_USE_MKL\nEIGEN_BLAS_TRSM_R(double,   double, dtrsm)\nEIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)\nEIGEN_BLAS_TRSM_R(float,    float,  strsm)\nEIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8,  ctrsm)\n#else\nEIGEN_BLAS_TRSM_R(double,   double, dtrsm_)\nEIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)\nEIGEN_BLAS_TRSM_R(float,    float,  strsm_)\nEIGEN_BLAS_TRSM_R(scomplex, float,  ctrsm_)\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/products/TriangularSolverVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H\n#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder>\nstruct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheRight, Mode, Conjugate, StorageOrder>\n{\n  static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)\n  {\n    triangular_solve_vector<LhsScalar,RhsScalar,Index,OnTheLeft,\n        ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),\n        Conjugate,StorageOrder==RowMajor?ColMajor:RowMajor\n      >::run(size, _lhs, lhsStride, rhs);\n  }\n};\n\n// forward and backward substitution, row-major, rhs is a vector\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>\nstruct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, RowMajor>\n{\n  enum {\n    IsLower = ((Mode&Lower)==Lower)\n  };\n  static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)\n  {\n    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;\n    const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));\n\n    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;\n\n    typename internal::conditional<\n                          Conjugate,\n                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,\n                          const LhsMap&>\n                        ::type cjLhs(lhs);\n    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;\n    for(Index pi=IsLower ? 0 : size;\n        IsLower ? pi<size : pi>0;\n        IsLower ? pi+=PanelWidth : pi-=PanelWidth)\n    {\n      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);\n\n      Index r = IsLower ? pi : size - pi; // remaining size\n      if (r > 0)\n      {\n        // let's directly call the low level product function because:\n        // 1 - it is faster to compile\n        // 2 - it is slightly faster at runtime\n        Index startRow = IsLower ? pi : pi-actualPanelWidth;\n        Index startCol = IsLower ? 0 : pi;\n\n        general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,Conjugate,RhsScalar,RhsMapper,false>::run(\n          actualPanelWidth, r,\n          LhsMapper(&lhs.coeffRef(startRow,startCol), lhsStride),\n          RhsMapper(rhs + startCol, 1),\n          rhs + startRow, 1,\n          RhsScalar(-1));\n      }\n\n      for(Index k=0; k<actualPanelWidth; ++k)\n      {\n        Index i = IsLower ? pi+k : pi-k-1;\n        Index s = IsLower ? pi   : i+1;\n        if (k>0)\n          rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map<const Matrix<RhsScalar,Dynamic,1> >(rhs+s,k))).sum();\n\n        if((!(Mode & UnitDiag)) && numext::not_equal_strict(rhs[i],RhsScalar(0)))\n          rhs[i] /= cjLhs(i,i);\n      }\n    }\n  }\n};\n\n// forward and backward substitution, column-major, rhs is a vector\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>\nstruct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, ColMajor>\n{\n  enum {\n    IsLower = ((Mode&Lower)==Lower)\n  };\n  static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)\n  {\n    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;\n    const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));\n    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;\n    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;\n    typename internal::conditional<Conjugate,\n                                   const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,\n                                   const LhsMap&\n                                  >::type cjLhs(lhs);\n    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;\n\n    for(Index pi=IsLower ? 0 : size;\n        IsLower ? pi<size : pi>0;\n        IsLower ? pi+=PanelWidth : pi-=PanelWidth)\n    {\n      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);\n      Index startBlock = IsLower ? pi : pi-actualPanelWidth;\n      Index endBlock = IsLower ? pi + actualPanelWidth : 0;\n\n      for(Index k=0; k<actualPanelWidth; ++k)\n      {\n        Index i = IsLower ? pi+k : pi-k-1;\n        if(numext::not_equal_strict(rhs[i],RhsScalar(0)))\n        {\n          if(!(Mode & UnitDiag))\n            rhs[i] /= cjLhs.coeff(i,i);\n\n          Index r = actualPanelWidth - k - 1; // remaining size\n          Index s = IsLower ? i+1 : i-r;\n          if (r>0)\n            Map<Matrix<RhsScalar,Dynamic,1> >(rhs+s,r) -= rhs[i] * cjLhs.col(i).segment(s,r);\n        }\n      }\n      Index r = IsLower ? size - endBlock : startBlock; // remaining size\n      if (r > 0)\n      {\n        // let's directly call the low level product function because:\n        // 1 - it is faster to compile\n        // 2 - it is slightly faster at runtime\n        general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,Conjugate,RhsScalar,RhsMapper,false>::run(\n            r, actualPanelWidth,\n            LhsMapper(&lhs.coeffRef(endBlock,startBlock), lhsStride),\n            RhsMapper(rhs+startBlock, 1),\n            rhs+endBlock, 1, RhsScalar(-1));\n      }\n    }\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/BlasUtil.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BLASUTIL_H\n#define EIGEN_BLASUTIL_H\n\n// This file contains many lightweight helper classes used to\n// implement and control fast level 2 and level 3 BLAS-like routines.\n\nnamespace Eigen {\n\nnamespace internal {\n\n// forward declarations\ntemplate<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>\nstruct gebp_kernel;\n\ntemplate<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>\nstruct gemm_pack_rhs;\n\ntemplate<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false>\nstruct gemm_pack_lhs;\n\ntemplate<\n  typename Index,\n  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,\n  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,\n  int ResStorageOrder, int ResInnerStride>\nstruct general_matrix_matrix_product;\n\ntemplate<typename Index,\n         typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,\n         typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>\nstruct general_matrix_vector_product;\n\ntemplate<typename From,typename To> struct get_factor {\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }\n};\n\ntemplate<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {\n  EIGEN_DEVICE_FUNC\n  static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }\n};\n\n\ntemplate<typename Scalar, typename Index>\nclass BlasVectorMapper {\n  public:\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {\n    return m_data[i];\n  }\n  template <typename Packet, int AlignmentType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {\n    return ploadt<Packet, AlignmentType>(m_data + i);\n  }\n\n  template <typename Packet>\n  EIGEN_DEVICE_FUNC bool aligned(Index i) const {\n    return (UIntPtr(m_data+i)%sizeof(Packet))==0;\n  }\n\n  protected:\n  Scalar* m_data;\n};\n\ntemplate<typename Scalar, typename Index, int AlignmentType, int Incr=1>\nclass BlasLinearMapper;\n\ntemplate<typename Scalar, typename Index, int AlignmentType>\nclass BlasLinearMapper<Scalar,Index,AlignmentType>\n{\npublic:\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)\n    : m_data(data)\n  {\n    EIGEN_ONLY_USED_FOR_DEBUG(incr);\n    eigen_assert(incr==1);\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {\n    internal::prefetch(&operator()(i));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {\n    return m_data[i];\n  }\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {\n    return ploadt<PacketType, AlignmentType>(m_data + i);\n  }\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {\n    pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);\n  }\n\nprotected:\n  Scalar *m_data;\n};\n\n// Lightweight helper class to access matrix coefficients.\ntemplate<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>\nclass blas_data_mapper;\n\n// TMP to help PacketBlock store implementation.\n// There's currently no known use case for PacketBlock load.\n// The default implementation assumes ColMajor order.\n// It always store each packet sequentially one `stride` apart.\ntemplate<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder>\nstruct PacketBlockManagement\n{\n  PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm;\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {\n    pbm.store(to, stride, i, j, block);\n    pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]);\n  }\n};\n\n// PacketBlockManagement specialization to take care of RowMajor order without ifs.\ntemplate<typename Index, typename Scalar, typename Packet, int n, int idx>\nstruct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor>\n{\n  PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm;\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {\n    pbm.store(to, stride, i, j, block);\n    pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]);\n  }\n};\n\ntemplate<typename Index, typename Scalar, typename Packet, int n, int StorageOrder>\nstruct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder>\n{\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {\n    EIGEN_UNUSED_VARIABLE(to);\n    EIGEN_UNUSED_VARIABLE(stride);\n    EIGEN_UNUSED_VARIABLE(i);\n    EIGEN_UNUSED_VARIABLE(j);\n    EIGEN_UNUSED_VARIABLE(block);\n  }\n};\n\ntemplate<typename Index, typename Scalar, typename Packet, int n>\nstruct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor>\n{\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {\n    EIGEN_UNUSED_VARIABLE(to);\n    EIGEN_UNUSED_VARIABLE(stride);\n    EIGEN_UNUSED_VARIABLE(i);\n    EIGEN_UNUSED_VARIABLE(j);\n    EIGEN_UNUSED_VARIABLE(block);\n  }\n};\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int AlignmentType>\nclass blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>\n{\npublic:\n  typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;\n  typedef BlasVectorMapper<Scalar, Index> VectorMapper;\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)\n   : m_data(data), m_stride(stride)\n  {\n    EIGEN_ONLY_USED_FOR_DEBUG(incr);\n    eigen_assert(incr==1);\n  }\n\n  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>\n  getSubMapper(Index i, Index j) const {\n    return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);\n  }\n\n  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {\n    return LinearMapper(&operator()(i, j));\n  }\n\n  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {\n    return VectorMapper(&operator()(i, j));\n  }\n\n\n  EIGEN_DEVICE_FUNC\n  EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {\n    return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];\n  }\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {\n    return ploadt<PacketType, AlignmentType>(&operator()(i, j));\n  }\n\n  template <typename PacketT, int AlignmentT>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {\n    return ploadt<PacketT, AlignmentT>(&operator()(i, j));\n  }\n\n  template<typename SubPacket>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {\n    pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);\n  }\n\n  template<typename SubPacket>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {\n    return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);\n  }\n\n  EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }\n  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }\n\n  EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {\n    if (UIntPtr(m_data)%sizeof(Scalar)) {\n      return -1;\n    }\n    return internal::first_default_aligned(m_data, size);\n  }\n\n  template<typename SubPacket, int n>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const {\n    PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm;\n    pbm.store(m_data, m_stride, i, j, block);\n  }\nprotected:\n  Scalar* EIGEN_RESTRICT m_data;\n  const Index m_stride;\n};\n\n// Implementation of non-natural increment (i.e. inner-stride != 1)\n// The exposed API is not complete yet compared to the Incr==1 case\n// because some features makes less sense in this case.\ntemplate<typename Scalar, typename Index, int AlignmentType, int Incr>\nclass BlasLinearMapper\n{\npublic:\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {\n    internal::prefetch(&operator()(i));\n  }\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {\n    return m_data[i*m_incr.value()];\n  }\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {\n    return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());\n  }\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {\n    pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());\n  }\n\nprotected:\n  Scalar *m_data;\n  const internal::variable_if_dynamic<Index,Incr> m_incr;\n};\n\ntemplate<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>\nclass blas_data_mapper\n{\npublic:\n  typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;\n\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}\n\n  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper\n  getSubMapper(Index i, Index j) const {\n    return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());\n  }\n\n  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {\n    return LinearMapper(&operator()(i, j), m_incr.value());\n  }\n\n  EIGEN_DEVICE_FUNC\n  EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {\n    return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];\n  }\n\n  template<typename PacketType>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {\n    return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value());\n  }\n\n  template <typename PacketT, int AlignmentT>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {\n    return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());\n  }\n\n  template<typename SubPacket>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {\n    pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);\n  }\n\n  template<typename SubPacket>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {\n    return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);\n  }\n\n  // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.\n  template<typename SubPacket, typename ScalarT, int n, int idx>\n  struct storePacketBlock_helper\n  {\n    storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;\n    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {\n      spbh.store(sup, i,j,block);\n      for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)\n      {\n        ScalarT *v = &sup->operator()(i+l, j+idx);\n        *v = block.packet[idx][l];\n      }\n    }\n  };\n\n  template<typename SubPacket, int n, int idx>\n  struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>\n  {\n    storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;\n    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {\n      spbh.store(sup,i,j,block);\n      for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)\n      {\n        std::complex<float> *v = &sup->operator()(i+l, j+idx);\n        v->real(block.packet[idx].v[2*l+0]);\n        v->imag(block.packet[idx].v[2*l+1]);\n      }\n    }\n  };\n\n  template<typename SubPacket, int n, int idx>\n  struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>\n  {\n    storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;\n    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {\n      spbh.store(sup,i,j,block);\n      for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)\n      {\n        std::complex<double> *v = &sup->operator()(i+l, j+idx);\n        v->real(block.packet[idx].v[2*l+0]);\n        v->imag(block.packet[idx].v[2*l+1]);\n      }\n    }\n  };\n\n  template<typename SubPacket, typename ScalarT, int n>\n  struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>\n  {\n    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {\n    }\n  };\n\n  template<typename SubPacket, int n>\n  struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>\n  {\n    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {\n    }\n  };\n\n  template<typename SubPacket, int n>\n  struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>\n  {\n    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {\n    }\n  };\n  // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.\n  template<typename SubPacket, int n>\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {\n    storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;\n    spb.store(this, i,j,block);\n  }\nprotected:\n  Scalar* EIGEN_RESTRICT m_data;\n  const Index m_stride;\n  const internal::variable_if_dynamic<Index,Incr> m_incr;\n};\n\n// lightweight helper class to access matrix coefficients (const version)\ntemplate<typename Scalar, typename Index, int StorageOrder>\nclass const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {\n  public:\n  EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}\n\n  EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {\n    return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);\n  }\n};\n\n\n/* Helper class to analyze the factors of a Product expression.\n * In particular it allows to pop out operator-, scalar multiples,\n * and conjugate */\ntemplate<typename XprType> struct blas_traits\n{\n  typedef typename traits<XprType>::Scalar Scalar;\n  typedef const XprType& ExtractType;\n  typedef XprType _ExtractType;\n  enum {\n    IsComplex = NumTraits<Scalar>::IsComplex,\n    IsTransposed = false,\n    NeedToConjugate = false,\n    HasUsableDirectAccess = (    (int(XprType::Flags)&DirectAccessBit)\n                              && (   bool(XprType::IsVectorAtCompileTime)\n                                  || int(inner_stride_at_compile_time<XprType>::ret) == 1)\n                             ) ?  1 : 0,\n    HasScalarFactor = false\n  };\n  typedef typename conditional<bool(HasUsableDirectAccess),\n    ExtractType,\n    typename _ExtractType::PlainObject\n    >::type DirectLinearAccessType;\n  static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }\n  static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }\n};\n\n// pop conjugate\ntemplate<typename Scalar, typename NestedXpr>\nstruct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >\n : blas_traits<NestedXpr>\n{\n  typedef blas_traits<NestedXpr> Base;\n  typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;\n  typedef typename Base::ExtractType ExtractType;\n\n  enum {\n    IsComplex = NumTraits<Scalar>::IsComplex,\n    NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex\n  };\n  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }\n  static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }\n};\n\n// pop scalar multiple\ntemplate<typename Scalar, typename NestedXpr, typename Plain>\nstruct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >\n : blas_traits<NestedXpr>\n{\n  enum {\n    HasScalarFactor = true\n  };\n  typedef blas_traits<NestedXpr> Base;\n  typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;\n  typedef typename Base::ExtractType ExtractType;\n  static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }\n  static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x)\n  { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }\n};\ntemplate<typename Scalar, typename NestedXpr, typename Plain>\nstruct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >\n : blas_traits<NestedXpr>\n{\n  enum {\n    HasScalarFactor = true\n  };\n  typedef blas_traits<NestedXpr> Base;\n  typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;\n  typedef typename Base::ExtractType ExtractType;\n  static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }\n  static inline Scalar extractScalarFactor(const XprType& x)\n  { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }\n};\ntemplate<typename Scalar, typename Plain1, typename Plain2>\nstruct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,\n                                                            const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >\n : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >\n{};\n\n// pop opposite\ntemplate<typename Scalar, typename NestedXpr>\nstruct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >\n : blas_traits<NestedXpr>\n{\n  enum {\n    HasScalarFactor = true\n  };\n  typedef blas_traits<NestedXpr> Base;\n  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;\n  typedef typename Base::ExtractType ExtractType;\n  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }\n  static inline Scalar extractScalarFactor(const XprType& x)\n  { return - Base::extractScalarFactor(x.nestedExpression()); }\n};\n\n// pop/push transpose\ntemplate<typename NestedXpr>\nstruct blas_traits<Transpose<NestedXpr> >\n : blas_traits<NestedXpr>\n{\n  typedef typename NestedXpr::Scalar Scalar;\n  typedef blas_traits<NestedXpr> Base;\n  typedef Transpose<NestedXpr> XprType;\n  typedef Transpose<const typename Base::_ExtractType>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS\n  typedef Transpose<const typename Base::_ExtractType> _ExtractType;\n  typedef typename conditional<bool(Base::HasUsableDirectAccess),\n    ExtractType,\n    typename ExtractType::PlainObject\n    >::type DirectLinearAccessType;\n  enum {\n    IsTransposed = Base::IsTransposed ? 0 : 1\n  };\n  static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }\n  static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }\n};\n\ntemplate<typename T>\nstruct blas_traits<const T>\n     : blas_traits<T>\n{};\n\ntemplate<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>\nstruct extract_data_selector {\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)\n  {\n    return blas_traits<T>::extract(m).data();\n  }\n};\n\ntemplate<typename T>\nstruct extract_data_selector<T,false> {\n  static typename T::Scalar* run(const T&) { return 0; }\n};\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)\n{\n  return extract_data_selector<T>::run(m);\n}\n\n/**\n * \\c combine_scalar_factors extracts and multiplies factors from GEMM and GEMV products.\n * There is a specialization for booleans\n */\ntemplate<typename ResScalar, typename Lhs, typename Rhs>\nstruct combine_scalar_factors_impl\n{\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs)\n  {\n    return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)\n  {\n    return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);\n  }\n};\ntemplate<typename Lhs, typename Rhs>\nstruct combine_scalar_factors_impl<bool, Lhs, Rhs>\n{\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs)\n  {\n    return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);\n  }\n  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs)\n  {\n    return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);\n  }\n};\n\ntemplate<typename ResScalar, typename Lhs, typename Rhs>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)\n{\n  return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs);\n}\ntemplate<typename ResScalar, typename Lhs, typename Rhs>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs)\n{\n  return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs);\n}\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_BLASUTIL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/ConfigureVectorization.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2020, Arm Limited and Contributors\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CONFIGURE_VECTORIZATION_H\n#define EIGEN_CONFIGURE_VECTORIZATION_H\n\n//------------------------------------------------------------------------------------------\n// Static and dynamic alignment control\n//\n// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES\n// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.\n// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,\n// a default value is automatically computed based on architecture, compiler, and OS.\n//\n// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}\n// to be used to declare statically aligned buffers.\n//------------------------------------------------------------------------------------------\n\n\n/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.\n * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,\n * so that vectorization doesn't affect binary compatibility.\n *\n * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link\n * vectorized and non-vectorized code.\n * \n * FIXME: this code can be cleaned up once we switch to proper C++11 only.\n */\n#if (defined EIGEN_CUDACC)\n  #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)\n  #define EIGEN_ALIGNOF(x) __alignof(x)\n#elif EIGEN_HAS_ALIGNAS\n  #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)\n  #define EIGEN_ALIGNOF(x) alignof(x)\n#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM\n  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))\n  #define EIGEN_ALIGNOF(x) __alignof(x)\n#elif EIGEN_COMP_MSVC\n  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))\n  #define EIGEN_ALIGNOF(x) __alignof(x)\n#elif EIGEN_COMP_SUNCC\n  // FIXME not sure about this one:\n  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))\n  #define EIGEN_ALIGNOF(x) __alignof(x)\n#else\n  #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler\n#endif\n\n// If the user explicitly disable vectorization, then we also disable alignment\n#if defined(EIGEN_DONT_VECTORIZE)\n  #if defined(EIGEN_GPUCC)\n    // GPU code is always vectorized and requires memory alignment for\n    // statically allocated buffers.\n    #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16\n  #else\n    #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0\n  #endif\n#elif defined(__AVX512F__)\n  // 64 bytes static alignment is preferred only if really required\n  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64\n#elif defined(__AVX__)\n  // 32 bytes static alignment is preferred only if really required\n  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32\n#else\n  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16\n#endif\n\n\n// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense\n#define EIGEN_MIN_ALIGN_BYTES 16\n\n// Defined the boundary (in bytes) on which the data needs to be aligned. Note\n// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be\n// aligned at all regardless of the value of this #define.\n\n#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0\n#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.\n#endif\n\n// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated\n// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0\n#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)\n  #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES\n    #undef EIGEN_MAX_STATIC_ALIGN_BYTES\n  #endif\n  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0\n#endif\n\n#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES\n\n  // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES\n\n  // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable\n  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always\n  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in\n  // certain common platform (compiler+architecture combinations) to avoid these problems.\n  // Only static alignment is really problematic (relies on nonstandard compiler extensions),\n  // try to keep heap alignment even when we have to disable static alignment.\n  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)\n  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1\n  #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)\n  // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.\n  // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.\n  // 4.8 and newer seem definitely unaffected.\n  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1\n  #else\n  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0\n  #endif\n\n  // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX\n  #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \\\n  && !EIGEN_GCC3_OR_OLDER \\\n  && !EIGEN_COMP_SUNCC \\\n  && !EIGEN_OS_QNX\n    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1\n  #else\n    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0\n  #endif\n\n  #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT\n    #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES\n  #else\n    #define EIGEN_MAX_STATIC_ALIGN_BYTES 0\n  #endif\n\n#endif\n\n// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES\n#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES\n#undef EIGEN_MAX_STATIC_ALIGN_BYTES\n#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES\n#endif\n\n#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)\n  #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT\n#endif\n\n// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.\n// It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)\n// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).\n// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.\n\n\n// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY\n#define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)\n#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)\n#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)\n#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)\n#if EIGEN_MAX_STATIC_ALIGN_BYTES>0\n#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)\n#else\n#define EIGEN_ALIGN_MAX\n#endif\n\n\n// Dynamic alignment control\n\n#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0\n#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.\n#endif\n\n#ifdef EIGEN_DONT_ALIGN\n  #ifdef EIGEN_MAX_ALIGN_BYTES\n    #undef EIGEN_MAX_ALIGN_BYTES\n  #endif\n  #define EIGEN_MAX_ALIGN_BYTES 0\n#elif !defined(EIGEN_MAX_ALIGN_BYTES)\n  #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES\n#endif\n\n#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES\n#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES\n#else\n#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES\n#endif\n\n\n#ifndef EIGEN_UNALIGNED_VECTORIZE\n#define EIGEN_UNALIGNED_VECTORIZE 1\n#endif\n\n//----------------------------------------------------------------------\n\n// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into\n// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks\n#if EIGEN_MAX_ALIGN_BYTES==0\n  #ifndef EIGEN_DONT_VECTORIZE\n    #define EIGEN_DONT_VECTORIZE\n  #endif\n#endif\n\n\n// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be\n// removed as gcc 4.1 and msvc 2008 are not supported anyways.\n#if EIGEN_COMP_MSVC\n  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled\n  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later\n    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.\n    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64\n      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER\n    #endif\n  #endif\n#else\n  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )\n    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC\n  #endif\n#endif\n\n#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))\n\n  #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)\n\n    // Defines symbols for compile-time detection of which instructions are\n    // used.\n    // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used\n    #define EIGEN_VECTORIZE\n    #define EIGEN_VECTORIZE_SSE\n    #define EIGEN_VECTORIZE_SSE2\n\n    // Detect sse3/ssse3/sse4:\n    // gcc and icc defines __SSE3__, ...\n    // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you\n    // want to force the use of those instructions with msvc.\n    #ifdef __SSE3__\n      #define EIGEN_VECTORIZE_SSE3\n    #endif\n    #ifdef __SSSE3__\n      #define EIGEN_VECTORIZE_SSSE3\n    #endif\n    #ifdef __SSE4_1__\n      #define EIGEN_VECTORIZE_SSE4_1\n    #endif\n    #ifdef __SSE4_2__\n      #define EIGEN_VECTORIZE_SSE4_2\n    #endif\n    #ifdef __AVX__\n      #ifndef EIGEN_USE_SYCL \n        #define EIGEN_VECTORIZE_AVX\n      #endif\n      #define EIGEN_VECTORIZE_SSE3\n      #define EIGEN_VECTORIZE_SSSE3\n      #define EIGEN_VECTORIZE_SSE4_1\n      #define EIGEN_VECTORIZE_SSE4_2\n    #endif\n    #ifdef __AVX2__\n      #ifndef EIGEN_USE_SYCL \n        #define EIGEN_VECTORIZE_AVX2\n        #define EIGEN_VECTORIZE_AVX\n      #endif\n      #define EIGEN_VECTORIZE_SSE3\n      #define EIGEN_VECTORIZE_SSSE3\n      #define EIGEN_VECTORIZE_SSE4_1\n      #define EIGEN_VECTORIZE_SSE4_2\n    #endif\n    #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))\n      // MSVC does not expose a switch dedicated for FMA\n      // For MSVC, AVX2 => FMA\n      #define EIGEN_VECTORIZE_FMA\n    #endif\n    #if defined(__AVX512F__)\n      #ifndef EIGEN_VECTORIZE_FMA\n      #if EIGEN_COMP_GNUC\n      #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).\n      #else\n      #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).\n      #endif\n      #endif\n      #ifndef EIGEN_USE_SYCL\n        #define EIGEN_VECTORIZE_AVX512\n        #define EIGEN_VECTORIZE_AVX2\n        #define EIGEN_VECTORIZE_AVX\n      #endif\n      #define EIGEN_VECTORIZE_FMA\n      #define EIGEN_VECTORIZE_SSE3\n      #define EIGEN_VECTORIZE_SSSE3\n      #define EIGEN_VECTORIZE_SSE4_1\n      #define EIGEN_VECTORIZE_SSE4_2\n      #ifndef EIGEN_USE_SYCL\n        #ifdef __AVX512DQ__\n          #define EIGEN_VECTORIZE_AVX512DQ\n        #endif\n        #ifdef __AVX512ER__\n          #define EIGEN_VECTORIZE_AVX512ER\n        #endif\n        #ifdef __AVX512BF16__\n          #define EIGEN_VECTORIZE_AVX512BF16\n        #endif\n      #endif\n    #endif\n\n    // Disable AVX support on broken xcode versions\n    #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )\n      // A nasty bug in the clang compiler shipped with xcode in a common compilation situation\n      // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1\n      #ifdef EIGEN_VECTORIZE_AVX\n        #undef EIGEN_VECTORIZE_AVX\n        #warning \"Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. \"\n        #ifdef EIGEN_VECTORIZE_AVX2\n          #undef EIGEN_VECTORIZE_AVX2\n        #endif\n        #ifdef EIGEN_VECTORIZE_FMA\n          #undef EIGEN_VECTORIZE_FMA\n        #endif\n        #ifdef EIGEN_VECTORIZE_AVX512\n          #undef EIGEN_VECTORIZE_AVX512\n        #endif\n        #ifdef EIGEN_VECTORIZE_AVX512DQ\n          #undef EIGEN_VECTORIZE_AVX512DQ\n        #endif\n        #ifdef EIGEN_VECTORIZE_AVX512ER\n          #undef EIGEN_VECTORIZE_AVX512ER\n        #endif\n      #endif\n      // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with  -macosx-version-min=10.15 and AVX\n      // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests\n      // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases\n      // NOTE __clang_version__ \"11.0.0 (clang-1100.0.33.8)\"  XCode 11.0 <- Produces many segfault and core dumping tests\n      //                                                                    with  -macosx-version-min=10.15 and AVX\n      // NOTE __clang_version__ \"11.0.0 (clang-1100.0.33.12)\" XCode 11.2 <- Produces 3 core dumping tests with  \n      //                                                                    -macosx-version-min=10.15 and AVX\n    #endif\n\n    // include files\n\n    // This extern \"C\" works around a MINGW-w64 compilation issue\n    // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354\n    // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).\n    // However, intrin.h uses an extern \"C\" declaration, and g++ thus complains of duplicate declarations\n    // with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;\n    // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern \"C\" here too.\n    // notice that since these are C headers, the extern \"C\" is theoretically needed anyways.\n    extern \"C\" {\n      // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.\n      // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:\n      #if EIGEN_COMP_ICC >= 1110\n        #include <immintrin.h>\n      #else\n        #include <mmintrin.h>\n        #include <emmintrin.h>\n        #include <xmmintrin.h>\n        #ifdef  EIGEN_VECTORIZE_SSE3\n        #include <pmmintrin.h>\n        #endif\n        #ifdef EIGEN_VECTORIZE_SSSE3\n        #include <tmmintrin.h>\n        #endif\n        #ifdef EIGEN_VECTORIZE_SSE4_1\n        #include <smmintrin.h>\n        #endif\n        #ifdef EIGEN_VECTORIZE_SSE4_2\n        #include <nmmintrin.h>\n        #endif\n        #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)\n        #include <immintrin.h>\n        #endif\n      #endif\n    } // end extern \"C\"\n\n  #elif defined __VSX__\n\n    #define EIGEN_VECTORIZE\n    #define EIGEN_VECTORIZE_VSX\n    #include <altivec.h>\n    // We need to #undef all these ugly tokens defined in <altivec.h>\n    // => use __vector instead of vector\n    #undef bool\n    #undef vector\n    #undef pixel\n\n  #elif defined __ALTIVEC__\n\n    #define EIGEN_VECTORIZE\n    #define EIGEN_VECTORIZE_ALTIVEC\n    #include <altivec.h>\n    // We need to #undef all these ugly tokens defined in <altivec.h>\n    // => use __vector instead of vector\n    #undef bool\n    #undef vector\n    #undef pixel\n\n  #elif ((defined  __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)\n\n    #define EIGEN_VECTORIZE\n    #define EIGEN_VECTORIZE_NEON\n    #include <arm_neon.h>\n\n  // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and\n  // will not select the backend automatically\n  #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)\n\n    #define EIGEN_VECTORIZE\n    #define EIGEN_VECTORIZE_SVE\n    #include <arm_sve.h>\n\n    // Since we depend on knowing SVE vector lengths at compile-time, we need\n    // to ensure a fixed lengths is set\n    #if defined __ARM_FEATURE_SVE_BITS\n      #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS\n    #else\n#error \"Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set.\"\n#endif\n\n#elif (defined __s390x__ && defined __VEC__)\n\n#define EIGEN_VECTORIZE\n#define EIGEN_VECTORIZE_ZVECTOR\n#include <vecintrin.h>\n\n#elif defined __mips_msa\n\n// Limit MSA optimizations to little-endian CPUs for now.\n// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?\n#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)\n#if defined(__LP64__)\n#define EIGEN_MIPS_64\n#else\n#define EIGEN_MIPS_32\n#endif\n#define EIGEN_VECTORIZE\n#define EIGEN_VECTORIZE_MSA\n#include <msa.h>\n#endif\n\n#endif\n#endif\n\n// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all\n// compilers seem to follow this. We therefore include it explicitly.\n// See also: https://bugs.llvm.org/show_bug.cgi?id=47955\n#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)\n  #include <arm_fp16.h>\n#endif\n\n#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))\n  // We can use the optimized fp16 to float and float to fp16 conversion routines\n  #define EIGEN_HAS_FP16_C\n\n  #if defined(EIGEN_COMP_CLANG)\n    // Workaround for clang: The FP16C intrinsics for clang are included by\n    // immintrin.h, as opposed to emmintrin.h as suggested by Intel:\n    // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711\n    #include <immintrin.h>\n  #endif\n#endif\n\n#if defined EIGEN_CUDACC\n  #define EIGEN_VECTORIZE_GPU\n  #include <vector_types.h>\n  #if EIGEN_CUDA_SDK_VER >= 70500\n    #define EIGEN_HAS_CUDA_FP16\n  #endif\n#endif\n\n#if defined(EIGEN_HAS_CUDA_FP16)\n  #include <cuda_runtime_api.h>\n  #include <cuda_fp16.h>\n#endif\n\n#if defined(EIGEN_HIPCC)\n  #define EIGEN_VECTORIZE_GPU\n  #include <hip/hip_vector_types.h>\n  #define EIGEN_HAS_HIP_FP16\n  #include <hip/hip_fp16.h>\n#endif\n\n\n/** \\brief Namespace containing all symbols from the %Eigen library. */\nnamespace Eigen {\n\ninline static const char *SimdInstructionSetsInUse(void) {\n#if defined(EIGEN_VECTORIZE_AVX512)\n  return \"AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2\";\n#elif defined(EIGEN_VECTORIZE_AVX)\n  return \"AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2\";\n#elif defined(EIGEN_VECTORIZE_SSE4_2)\n  return \"SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2\";\n#elif defined(EIGEN_VECTORIZE_SSE4_1)\n  return \"SSE, SSE2, SSE3, SSSE3, SSE4.1\";\n#elif defined(EIGEN_VECTORIZE_SSSE3)\n  return \"SSE, SSE2, SSE3, SSSE3\";\n#elif defined(EIGEN_VECTORIZE_SSE3)\n  return \"SSE, SSE2, SSE3\";\n#elif defined(EIGEN_VECTORIZE_SSE2)\n  return \"SSE, SSE2\";\n#elif defined(EIGEN_VECTORIZE_ALTIVEC)\n  return \"AltiVec\";\n#elif defined(EIGEN_VECTORIZE_VSX)\n  return \"VSX\";\n#elif defined(EIGEN_VECTORIZE_NEON)\n  return \"ARM NEON\";\n#elif defined(EIGEN_VECTORIZE_SVE)\n  return \"ARM SVE\";\n#elif defined(EIGEN_VECTORIZE_ZVECTOR)\n  return \"S390X ZVECTOR\";\n#elif defined(EIGEN_VECTORIZE_MSA)\n  return \"MIPS MSA\";\n#else\n  return \"None\";\n#endif\n}\n\n} // end namespace Eigen\n\n\n#endif // EIGEN_CONFIGURE_VECTORIZATION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/Constants.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2020, Arm Limited and Contributors\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CONSTANTS_H\n#define EIGEN_CONSTANTS_H\n\nnamespace Eigen {\n\n/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is\n  * stored in some runtime variable.\n  *\n  * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.\n  */\nconst int Dynamic = -1;\n\n/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value\n  * has to be specified at runtime.\n  */\nconst int DynamicIndex = 0xffffff;\n\n/** This value means that the increment to go from one value to another in a sequence is not constant for each step.\n  */\nconst int UndefinedIncr = 0xfffffe;\n\n/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().\n  * The value Infinity there means the L-infinity norm.\n  */\nconst int Infinity = -1;\n\n/** This value means that the cost to evaluate an expression coefficient is either very expensive or\n  * cannot be known at compile time.\n  *\n  * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions.\n  * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow.\n  */\nconst int HugeCost = 10000;\n\n/** \\defgroup flags Flags\n  * \\ingroup Core_Module\n  *\n  * These are the possible bits which can be OR'ed to constitute the flags of a matrix or\n  * expression.\n  *\n  * It is important to note that these flags are a purely compile-time notion. They are a compile-time property of\n  * an expression type, implemented as enum's. They are not stored in memory at runtime, and they do not incur any\n  * runtime overhead.\n  *\n  * \\sa MatrixBase::Flags\n  */\n\n/** \\ingroup flags\n  *\n  * for a matrix, this means that the storage order is row-major.\n  * If this bit is not set, the storage order is column-major.\n  * For an expression, this determines the storage order of\n  * the matrix created by evaluation of that expression.\n  * \\sa \\blank  \\ref TopicStorageOrders */\nconst unsigned int RowMajorBit = 0x1;\n\n/** \\ingroup flags\n  * means the expression should be evaluated by the calling expression */\nconst unsigned int EvalBeforeNestingBit = 0x2;\n\n/** \\ingroup flags\n  * \\deprecated\n  * means the expression should be evaluated before any assignment */\nEIGEN_DEPRECATED\nconst unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated\n\n/** \\ingroup flags\n  *\n  * Short version: means the expression might be vectorized\n  *\n  * Long version: means that the coefficients can be handled by packets\n  * and start at a memory location whose alignment meets the requirements\n  * of the present CPU architecture for optimized packet access. In the fixed-size\n  * case, there is the additional condition that it be possible to access all the\n  * coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes,\n  * and that any nontrivial strides don't break the alignment). In the dynamic-size case,\n  * there is no such condition on the total size and strides, so it might not be possible to access\n  * all coeffs by packets.\n  *\n  * \\note This bit can be set regardless of whether vectorization is actually enabled.\n  *       To check for actual vectorizability, see \\a ActualPacketAccessBit.\n  */\nconst unsigned int PacketAccessBit = 0x8;\n\n#ifdef EIGEN_VECTORIZE\n/** \\ingroup flags\n  *\n  * If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant\n  * is set to the value \\a PacketAccessBit.\n  *\n  * If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant\n  * is set to the value 0.\n  */\nconst unsigned int ActualPacketAccessBit = PacketAccessBit;\n#else\nconst unsigned int ActualPacketAccessBit = 0x0;\n#endif\n\n/** \\ingroup flags\n  *\n  * Short version: means the expression can be seen as 1D vector.\n  *\n  * Long version: means that one can access the coefficients\n  * of this expression by coeff(int), and coeffRef(int) in the case of a lvalue expression. These\n  * index-based access methods are guaranteed\n  * to not have to do any runtime computation of a (row, col)-pair from the index, so that it\n  * is guaranteed that whenever it is available, index-based access is at least as fast as\n  * (row,col)-based access. Expressions for which that isn't possible don't have the LinearAccessBit.\n  *\n  * If both PacketAccessBit and LinearAccessBit are set, then the\n  * packets of this expression can be accessed by packet(int), and writePacket(int) in the case of a\n  * lvalue expression.\n  *\n  * Typically, all vector expressions have the LinearAccessBit, but there is one exception:\n  * Product expressions don't have it, because it would be troublesome for vectorization, even when the\n  * Product is a vector expression. Thus, vector Product expressions allow index-based coefficient access but\n  * not index-based packet access, so they don't have the LinearAccessBit.\n  */\nconst unsigned int LinearAccessBit = 0x10;\n\n/** \\ingroup flags\n  *\n  * Means the expression has a coeffRef() method, i.e. is writable as its individual coefficients are directly addressable.\n  * This rules out read-only expressions.\n  *\n  * Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but note\n  * the other:\n  *   \\li writable expressions that don't have a very simple memory layout as a strided array, have LvalueBit but not DirectAccessBit\n  *   \\li Map-to-const expressions, for example Map<const Matrix>, have DirectAccessBit but not LvalueBit\n  *\n  * Expressions having LvalueBit also have their coeff() method returning a const reference instead of returning a new value.\n  */\nconst unsigned int LvalueBit = 0x20;\n\n/** \\ingroup flags\n  *\n  * Means that the underlying array of coefficients can be directly accessed as a plain strided array. The memory layout\n  * of the array of coefficients must be exactly the natural one suggested by rows(), cols(),\n  * outerStride(), innerStride(), and the RowMajorBit. This rules out expressions such as Diagonal, whose coefficients,\n  * though referencable, do not have such a regular memory layout.\n  *\n  * See the comment on LvalueBit for an explanation of how LvalueBit and DirectAccessBit are mutually orthogonal.\n  */\nconst unsigned int DirectAccessBit = 0x40;\n\n/** \\deprecated \\ingroup flags\n  *\n  * means the first coefficient packet is guaranteed to be aligned.\n  * An expression cannot have the AlignedBit without the PacketAccessBit flag.\n  * In other words, this means we are allow to perform an aligned packet access to the first element regardless\n  * of the expression kind:\n  * \\code\n  * expression.packet<Aligned>(0);\n  * \\endcode\n  */\nEIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;\n\nconst unsigned int NestByRefBit = 0x100;\n\n/** \\ingroup flags\n  *\n  * for an expression, this means that the storage order\n  * can be either row-major or column-major.\n  * The precise choice will be decided at evaluation time or when\n  * combined with other expressions.\n  * \\sa \\blank  \\ref RowMajorBit, \\ref TopicStorageOrders */\nconst unsigned int NoPreferredStorageOrderBit = 0x200;\n\n/** \\ingroup flags\n  *\n  * Means that the underlying coefficients can be accessed through pointers to the sparse (un)compressed storage format,\n  * that is, the expression provides:\n  * \\code\n    inline const Scalar* valuePtr() const;\n    inline const Index* innerIndexPtr() const;\n    inline const Index* outerIndexPtr() const;\n    inline const Index* innerNonZeroPtr() const;\n    \\endcode\n  */\nconst unsigned int CompressedAccessBit = 0x400;\n\n\n// list of flags that are inherited by default\nconst unsigned int HereditaryBits = RowMajorBit\n                                  | EvalBeforeNestingBit;\n\n/** \\defgroup enums Enumerations\n  * \\ingroup Core_Module\n  *\n  * Various enumerations used in %Eigen. Many of these are used as template parameters.\n  */\n\n/** \\ingroup enums\n  * Enum containing possible values for the \\c Mode or \\c UpLo parameter of\n  * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */\nenum UpLoType {\n  /** View matrix as a lower triangular matrix. */\n  Lower=0x1,                      \n  /** View matrix as an upper triangular matrix. */\n  Upper=0x2,                      \n  /** %Matrix has ones on the diagonal; to be used in combination with #Lower or #Upper. */\n  UnitDiag=0x4, \n  /** %Matrix has zeros on the diagonal; to be used in combination with #Lower or #Upper. */\n  ZeroDiag=0x8,\n  /** View matrix as a lower triangular matrix with ones on the diagonal. */\n  UnitLower=UnitDiag|Lower, \n  /** View matrix as an upper triangular matrix with ones on the diagonal. */\n  UnitUpper=UnitDiag|Upper,\n  /** View matrix as a lower triangular matrix with zeros on the diagonal. */\n  StrictlyLower=ZeroDiag|Lower, \n  /** View matrix as an upper triangular matrix with zeros on the diagonal. */\n  StrictlyUpper=ZeroDiag|Upper,\n  /** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */\n  SelfAdjoint=0x10,\n  /** Used to support symmetric, non-selfadjoint, complex matrices. */\n  Symmetric=0x20\n};\n\n/** \\ingroup enums\n  * Enum for indicating whether a buffer is aligned or not. */\nenum AlignmentType {\n  Unaligned=0,        /**< Data pointer has no specific alignment. */\n  Aligned8=8,         /**< Data pointer is aligned on a 8 bytes boundary. */\n  Aligned16=16,       /**< Data pointer is aligned on a 16 bytes boundary. */\n  Aligned32=32,       /**< Data pointer is aligned on a 32 bytes boundary. */\n  Aligned64=64,       /**< Data pointer is aligned on a 64 bytes boundary. */\n  Aligned128=128,     /**< Data pointer is aligned on a 128 bytes boundary. */\n  AlignedMask=255,\n  Aligned=16,         /**< \\deprecated Synonym for Aligned16. */\n#if EIGEN_MAX_ALIGN_BYTES==128\n  AlignedMax = Aligned128\n#elif EIGEN_MAX_ALIGN_BYTES==64\n  AlignedMax = Aligned64\n#elif EIGEN_MAX_ALIGN_BYTES==32\n  AlignedMax = Aligned32\n#elif EIGEN_MAX_ALIGN_BYTES==16\n  AlignedMax = Aligned16\n#elif EIGEN_MAX_ALIGN_BYTES==8\n  AlignedMax = Aligned8\n#elif EIGEN_MAX_ALIGN_BYTES==0\n  AlignedMax = Unaligned\n#else\n#error Invalid value for EIGEN_MAX_ALIGN_BYTES\n#endif\n};\n\n/** \\ingroup enums\n  * Enum containing possible values for the \\p Direction parameter of\n  * Reverse, PartialReduxExpr and VectorwiseOp. */\nenum DirectionType { \n  /** For Reverse, all columns are reversed; \n    * for PartialReduxExpr and VectorwiseOp, act on columns. */\n  Vertical, \n  /** For Reverse, all rows are reversed; \n    * for PartialReduxExpr and VectorwiseOp, act on rows. */\n  Horizontal, \n  /** For Reverse, both rows and columns are reversed; \n    * not used for PartialReduxExpr and VectorwiseOp. */\n  BothDirections \n};\n\n/** \\internal \\ingroup enums\n  * Enum to specify how to traverse the entries of a matrix. */\nenum TraversalType {\n  /** \\internal Default traversal, no vectorization, no index-based access */\n  DefaultTraversal,\n  /** \\internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */\n  LinearTraversal,\n  /** \\internal Equivalent to a slice vectorization for fixed-size matrices having good alignment\n    * and good size */\n  InnerVectorizedTraversal,\n  /** \\internal Vectorization path using a single loop plus scalar loops for the\n    * unaligned boundaries */\n  LinearVectorizedTraversal,\n  /** \\internal Generic vectorization path using one vectorized loop per row/column with some\n    * scalar loops to handle the unaligned boundaries */\n  SliceVectorizedTraversal,\n  /** \\internal Special case to properly handle incompatible scalar types or other defecting cases*/\n  InvalidTraversal,\n  /** \\internal Evaluate all entries at once */\n  AllAtOnceTraversal\n};\n\n/** \\internal \\ingroup enums\n  * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */\nenum UnrollingType {\n  /** \\internal Do not unroll loops. */\n  NoUnrolling,\n  /** \\internal Unroll only the inner loop, but not the outer loop. */\n  InnerUnrolling,\n  /** \\internal Unroll both the inner and the outer loop. If there is only one loop, \n    * because linear traversal is used, then unroll that loop. */\n  CompleteUnrolling\n};\n\n/** \\internal \\ingroup enums\n  * Enum to specify whether to use the default (built-in) implementation or the specialization. */\nenum SpecializedType {\n  Specialized,\n  BuiltIn\n};\n\n/** \\ingroup enums\n  * Enum containing possible values for the \\p _Options template parameter of\n  * Matrix, Array and BandMatrix. */\nenum StorageOptions {\n  /** Storage order is column major (see \\ref TopicStorageOrders). */\n  ColMajor = 0,\n  /** Storage order is row major (see \\ref TopicStorageOrders). */\n  RowMajor = 0x1,  // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that\n  /** Align the matrix itself if it is vectorizable fixed-size */\n  AutoAlign = 0,\n  /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation\n  DontAlign = 0x2\n};\n\n/** \\ingroup enums\n  * Enum for specifying whether to apply or solve on the left or right. */\nenum SideType {\n  /** Apply transformation on the left. */\n  OnTheLeft = 1,\n  /** Apply transformation on the right. */\n  OnTheRight = 2\n};\n\n/** \\ingroup enums\n * Enum for specifying NaN-propagation behavior, e.g. for coeff-wise min/max. */\nenum NaNPropagationOptions {\n  /**  Implementation defined behavior if NaNs are present. */\n  PropagateFast = 0,\n  /**  Always propagate NaNs. */\n  PropagateNaN,\n  /**  Always propagate not-NaNs. */\n  PropagateNumbers\n};\n\n/* the following used to be written as:\n *\n *   struct NoChange_t {};\n *   namespace {\n *     EIGEN_UNUSED NoChange_t NoChange;\n *   }\n *\n * on the ground that it feels dangerous to disambiguate overloaded functions on enum/integer types.  \n * However, this leads to \"variable declared but never referenced\" warnings on Intel Composer XE,\n * and we do not know how to get rid of them (bug 450).\n */\n\nenum NoChange_t   { NoChange };\nenum Sequential_t { Sequential };\nenum Default_t    { Default };\n\n/** \\internal \\ingroup enums\n  * Used in AmbiVector. */\nenum AmbiVectorMode {\n  IsDense         = 0,\n  IsSparse\n};\n\n/** \\ingroup enums\n  * Used as template parameter in DenseCoeffBase and MapBase to indicate \n  * which accessors should be provided. */\nenum AccessorLevels {\n  /** Read-only access via a member function. */\n  ReadOnlyAccessors, \n  /** Read/write access via member functions. */\n  WriteAccessors, \n  /** Direct read-only access to the coefficients. */\n  DirectAccessors, \n  /** Direct read/write access to the coefficients. */\n  DirectWriteAccessors\n};\n\n/** \\ingroup enums\n  * Enum with options to give to various decompositions. */\nenum DecompositionOptions {\n  /** \\internal Not used (meant for LDLT?). */\n  Pivoting            = 0x01, \n  /** \\internal Not used (meant for LDLT?). */\n  NoPivoting          = 0x02, \n  /** Used in JacobiSVD to indicate that the square matrix U is to be computed. */\n  ComputeFullU        = 0x04,\n  /** Used in JacobiSVD to indicate that the thin matrix U is to be computed. */\n  ComputeThinU        = 0x08,\n  /** Used in JacobiSVD to indicate that the square matrix V is to be computed. */\n  ComputeFullV        = 0x10,\n  /** Used in JacobiSVD to indicate that the thin matrix V is to be computed. */\n  ComputeThinV        = 0x20,\n  /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify\n    * that only the eigenvalues are to be computed and not the eigenvectors. */\n  EigenvaluesOnly     = 0x40,\n  /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify\n    * that both the eigenvalues and the eigenvectors are to be computed. */\n  ComputeEigenvectors = 0x80,\n  /** \\internal */\n  EigVecMask = EigenvaluesOnly | ComputeEigenvectors,\n  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should\n    * solve the generalized eigenproblem \\f$ Ax = \\lambda B x \\f$. */\n  Ax_lBx              = 0x100,\n  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should\n    * solve the generalized eigenproblem \\f$ ABx = \\lambda x \\f$. */\n  ABx_lx              = 0x200,\n  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should\n    * solve the generalized eigenproblem \\f$ BAx = \\lambda x \\f$. */\n  BAx_lx              = 0x400,\n  /** \\internal */\n  GenEigMask = Ax_lBx | ABx_lx | BAx_lx\n};\n\n/** \\ingroup enums\n  * Possible values for the \\p QRPreconditioner template parameter of JacobiSVD. */\nenum QRPreconditioners {\n  /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */\n  NoQRPreconditioner,\n  /** Use a QR decomposition without pivoting as the first step. */\n  HouseholderQRPreconditioner,\n  /** Use a QR decomposition with column pivoting as the first step. */\n  ColPivHouseholderQRPreconditioner,\n  /** Use a QR decomposition with full pivoting as the first step. */\n  FullPivHouseholderQRPreconditioner\n};\n\n#ifdef Success\n#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h\n#endif\n\n/** \\ingroup enums\n  * Enum for reporting the status of a computation. */\nenum ComputationInfo {\n  /** Computation was successful. */\n  Success = 0,        \n  /** The provided data did not satisfy the prerequisites. */\n  NumericalIssue = 1, \n  /** Iterative procedure did not converge. */\n  NoConvergence = 2,\n  /** The inputs are invalid, or the algorithm has been improperly called.\n    * When assertions are enabled, such errors trigger an assert. */\n  InvalidInput = 3\n};\n\n/** \\ingroup enums\n  * Enum used to specify how a particular transformation is stored in a matrix.\n  * \\sa Transform, Hyperplane::transform(). */\nenum TransformTraits {\n  /** Transformation is an isometry. */\n  Isometry      = 0x1,\n  /** Transformation is an affine transformation stored as a (Dim+1)^2 matrix whose last row is \n    * assumed to be [0 ... 0 1]. */\n  Affine        = 0x2,\n  /** Transformation is an affine transformation stored as a (Dim) x (Dim+1) matrix. */\n  AffineCompact = 0x10 | Affine,\n  /** Transformation is a general projective transformation stored as a (Dim+1)^2 matrix. */\n  Projective    = 0x20\n};\n\n/** \\internal \\ingroup enums\n  * Enum used to choose between implementation depending on the computer architecture. */\nnamespace Architecture\n{\n  enum Type {\n    Generic = 0x0,\n    SSE = 0x1,\n    AltiVec = 0x2,\n    VSX = 0x3,\n    NEON = 0x4,\n    MSA = 0x5,\n    SVE = 0x6,\n#if defined EIGEN_VECTORIZE_SSE\n    Target = SSE\n#elif defined EIGEN_VECTORIZE_ALTIVEC\n    Target = AltiVec\n#elif defined EIGEN_VECTORIZE_VSX\n    Target = VSX\n#elif defined EIGEN_VECTORIZE_NEON\n    Target = NEON\n#elif defined EIGEN_VECTORIZE_SVE\n    Target = SVE\n#elif defined EIGEN_VECTORIZE_MSA\n    Target = MSA\n#else\n    Target = Generic\n#endif\n  };\n}\n\n/** \\internal \\ingroup enums\n  * Enum used as template parameter in Product and product evaluators. */\nenum ProductImplType\n{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };\n\n/** \\internal \\ingroup enums\n  * Enum used in experimental parallel implementation. */\nenum Action {GetAction, SetAction};\n\n/** The type used to identify a dense storage. */\nstruct Dense {};\n\n/** The type used to identify a general sparse storage. */\nstruct Sparse {};\n\n/** The type used to identify a general solver (factored) storage. */\nstruct SolverStorage {};\n\n/** The type used to identify a permutation storage. */\nstruct PermutationStorage {};\n\n/** The type used to identify a permutation storage. */\nstruct TranspositionsStorage {};\n\n/** The type used to identify a matrix expression */\nstruct MatrixXpr {};\n\n/** The type used to identify an array expression */\nstruct ArrayXpr {};\n\n// An evaluator must define its shape. By default, it can be one of the following:\nstruct DenseShape             { static std::string debugName() { return \"DenseShape\"; } };\nstruct SolverShape            { static std::string debugName() { return \"SolverShape\"; } };\nstruct HomogeneousShape       { static std::string debugName() { return \"HomogeneousShape\"; } };\nstruct DiagonalShape          { static std::string debugName() { return \"DiagonalShape\"; } };\nstruct BandShape              { static std::string debugName() { return \"BandShape\"; } };\nstruct TriangularShape        { static std::string debugName() { return \"TriangularShape\"; } };\nstruct SelfAdjointShape       { static std::string debugName() { return \"SelfAdjointShape\"; } };\nstruct PermutationShape       { static std::string debugName() { return \"PermutationShape\"; } };\nstruct TranspositionsShape    { static std::string debugName() { return \"TranspositionsShape\"; } };\nstruct SparseShape            { static std::string debugName() { return \"SparseShape\"; } };\n\nnamespace internal {\n\n  // random access iterators based on coeff*() accessors.\nstruct IndexBased {};\n\n// evaluator based on iterators to access coefficients. \nstruct IteratorBased {};\n\n/** \\internal\n * Constants for comparison functors\n */\nenum ComparisonName {\n  cmp_EQ = 0,\n  cmp_LT = 1,\n  cmp_LE = 2,\n  cmp_UNORD = 3,\n  cmp_NEQ = 4,\n  cmp_GT = 5,\n  cmp_GE = 6\n};\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_CONSTANTS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/DisableStupidWarnings.h",
    "content": "#ifndef EIGEN_WARNINGS_DISABLED\n#define EIGEN_WARNINGS_DISABLED\n\n#ifdef _MSC_VER\n  // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))\n  // 4101 - unreferenced local variable\n  // 4181 - qualifier applied to reference type ignored\n  // 4211 - nonstandard extension used : redefined extern to static\n  // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data\n  // 4273 - QtAlignedMalloc, inconsistent DLL linkage\n  // 4324 - structure was padded due to declspec(align())\n  // 4503 - decorated name length exceeded, name was truncated\n  // 4512 - assignment operator could not be generated\n  // 4522 - 'class' : multiple assignment operators specified\n  // 4700 - uninitialized local variable 'xyz' used\n  // 4714 - function marked as __forceinline not inlined\n  // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow\n  // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)\n  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS\n    #pragma warning( push )\n  #endif\n  #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)\n\n#elif defined __INTEL_COMPILER\n  // 2196 - routine is both \"inline\" and \"noinline\" (\"noinline\" assumed)\n  //        ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body\n  //        typedef that may be a reference type.\n  // 279  - controlling expression is constant\n  //        ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.\n  // 1684 - conversion from pointer to same-sized integral type (potential portability problem)\n  // 2259 - non-pointer conversion from \"Eigen::Index={ptrdiff_t={long}}\" to \"int\" may lose significant bits\n  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS\n    #pragma warning push\n  #endif\n  #pragma warning disable 2196 279 1684 2259\n\n#elif defined __clang__\n  // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant\n  //     this is really a stupid warning as it warns on compile-time expressions involving enums\n  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS\n    #pragma clang diagnostic push\n  #endif\n  #pragma clang diagnostic ignored \"-Wconstant-logical-operand\"\n  #if __clang_major__ >= 3 && __clang_minor__ >= 5\n    #pragma clang diagnostic ignored \"-Wabsolute-value\"\n  #endif\n  #if __clang_major__ >= 10\n    #pragma clang diagnostic ignored \"-Wimplicit-int-float-conversion\"\n  #endif\n  #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L\n    // warning: generic selections are a C11-specific feature\n    // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h\n    #pragma clang diagnostic ignored \"-Wc11-extensions\"\n  #endif\n\n#elif defined __GNUC__\n\n  #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))\n    #pragma GCC diagnostic push\n  #endif\n  // g++ warns about local variables shadowing member functions, which is too strict\n  #pragma GCC diagnostic ignored \"-Wshadow\"\n  #if __GNUC__ == 4 && __GNUC_MINOR__ < 8\n    // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:\n    #pragma GCC diagnostic ignored \"-Wtype-limits\"\n  #endif\n  #if __GNUC__>=6\n    #pragma GCC diagnostic ignored \"-Wignored-attributes\"\n  #endif\n  #if __GNUC__==7\n    // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325\n    #pragma GCC diagnostic ignored \"-Wattributes\"\n  #endif\n#endif\n\n#if defined __NVCC__\n  #pragma diag_suppress boolean_controlling_expr_is_constant\n  // Disable the \"statement is unreachable\" message\n  #pragma diag_suppress code_is_unreachable\n  // Disable the \"dynamic initialization in unreachable code\" message\n  #pragma diag_suppress initialization_not_reachable\n  // Disable the \"invalid error number\" message that we get with older versions of nvcc\n  #pragma diag_suppress 1222\n  // Disable the \"calling a __host__ function from a __host__ __device__ function is not allowed\" messages (yes, there are many of them and they seem to change with every version of the compiler)\n  #pragma diag_suppress 2527\n  #pragma diag_suppress 2529\n  #pragma diag_suppress 2651\n  #pragma diag_suppress 2653\n  #pragma diag_suppress 2668\n  #pragma diag_suppress 2669\n  #pragma diag_suppress 2670\n  #pragma diag_suppress 2671\n  #pragma diag_suppress 2735\n  #pragma diag_suppress 2737\n  #pragma diag_suppress 2739\n#endif\n\n#else\n// warnings already disabled:\n# ifndef EIGEN_WARNINGS_DISABLED_2\n#  define EIGEN_WARNINGS_DISABLED_2\n# elif defined(EIGEN_INTERNAL_DEBUGGING)\n#  error \"Do not include \\\"DisableStupidWarnings.h\\\" recursively more than twice!\"\n# endif\n\n#endif // not EIGEN_WARNINGS_DISABLED\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/ForwardDeclarations.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_FORWARDDECLARATIONS_H\n#define EIGEN_FORWARDDECLARATIONS_H\n\nnamespace Eigen {\nnamespace internal {\n\ntemplate<typename T> struct traits;\n\n// here we say once and for all that traits<const T> == traits<T>\n// When constness must affect traits, it has to be constness on template parameters on which T itself depends.\n// For example, traits<Map<const T> > != traits<Map<T> >, but\n//              traits<const Map<T> > == traits<Map<T> >\ntemplate<typename T> struct traits<const T> : traits<T> {};\n\ntemplate<typename Derived> struct has_direct_access\n{\n  enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };\n};\n\ntemplate<typename Derived> struct accessors_level\n{\n  enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,\n         has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,\n         value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)\n                                   : (has_write_access ? WriteAccessors       : ReadOnlyAccessors)\n  };\n};\n\ntemplate<typename T> struct evaluator_traits;\n\ntemplate< typename T> struct evaluator;\n\n} // end namespace internal\n\ntemplate<typename T> struct NumTraits;\n\ntemplate<typename Derived> struct EigenBase;\ntemplate<typename Derived> class DenseBase;\ntemplate<typename Derived> class PlainObjectBase;\ntemplate<typename Derived, int Level> class DenseCoeffsBase;\n\ntemplate<typename _Scalar, int _Rows, int _Cols,\n         int _Options = AutoAlign |\n#if EIGEN_GNUC_AT(3,4)\n    // workaround a bug in at least gcc 3.4.6\n    // the innermost ?: ternary operator is misparsed. We write it slightly\n    // differently and this makes gcc 3.4.6 happy, but it's ugly.\n    // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined\n    // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)\n                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor\n                          : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION\n                          : Eigen::ColMajor ),\n#else\n                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor\n                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor\n                          : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),\n#endif\n         int _MaxRows = _Rows,\n         int _MaxCols = _Cols\n> class Matrix;\n\ntemplate<typename Derived> class MatrixBase;\ntemplate<typename Derived> class ArrayBase;\n\ntemplate<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;\ntemplate<typename ExpressionType, template <typename> class StorageBase > class NoAlias;\ntemplate<typename ExpressionType> class NestByValue;\ntemplate<typename ExpressionType> class ForceAlignedAccess;\ntemplate<typename ExpressionType> class SwapWrapper;\n\ntemplate<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;\ntemplate<typename XprType, typename RowIndices, typename ColIndices> class IndexedView;\ntemplate<typename XprType, int Rows=Dynamic, int Cols=Dynamic, int Order=0> class Reshaped;\n\ntemplate<typename MatrixType, int Size=Dynamic> class VectorBlock;\ntemplate<typename MatrixType> class Transpose;\ntemplate<typename MatrixType> class Conjugate;\ntemplate<typename NullaryOp, typename MatrixType>         class CwiseNullaryOp;\ntemplate<typename UnaryOp,   typename MatrixType>         class CwiseUnaryOp;\ntemplate<typename ViewOp,    typename MatrixType>         class CwiseUnaryView;\ntemplate<typename BinaryOp,  typename Lhs, typename Rhs>  class CwiseBinaryOp;\ntemplate<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>  class CwiseTernaryOp;\ntemplate<typename Decomposition, typename Rhstype>        class Solve;\ntemplate<typename XprType>                                class Inverse;\n\ntemplate<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;\n\ntemplate<typename Derived> class DiagonalBase;\ntemplate<typename _DiagonalVectorType> class DiagonalWrapper;\ntemplate<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;\ntemplate<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;\ntemplate<typename MatrixType, int Index = 0> class Diagonal;\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;\ntemplate<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;\ntemplate<typename Derived> class PermutationBase;\ntemplate<typename Derived> class TranspositionsBase;\ntemplate<typename _IndicesType> class PermutationWrapper;\ntemplate<typename _IndicesType> class TranspositionsWrapper;\n\ntemplate<typename Derived,\n         int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors\n> class MapBase;\ntemplate<int OuterStrideAtCompileTime, int InnerStrideAtCompileTime> class Stride;\ntemplate<int Value = Dynamic> class InnerStride;\ntemplate<int Value = Dynamic> class OuterStride;\ntemplate<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;\ntemplate<typename Derived> class RefBase;\ntemplate<typename PlainObjectType, int Options = 0,\n         typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;\n\ntemplate<typename Derived> class TriangularBase;\ntemplate<typename MatrixType, unsigned int Mode> class TriangularView;\ntemplate<typename MatrixType, unsigned int Mode> class SelfAdjointView;\ntemplate<typename MatrixType> class SparseView;\ntemplate<typename ExpressionType> class WithFormat;\ntemplate<typename MatrixType> struct CommaInitializer;\ntemplate<typename Derived> class ReturnByValue;\ntemplate<typename ExpressionType> class ArrayWrapper;\ntemplate<typename ExpressionType> class MatrixWrapper;\ntemplate<typename Derived> class SolverBase;\ntemplate<typename XprType> class InnerIterator;\n\nnamespace internal {\ntemplate<typename XprType> class generic_randaccess_stl_iterator;\ntemplate<typename XprType> class pointer_based_stl_iterator;\ntemplate<typename XprType, DirectionType Direction> class subvector_stl_iterator;\ntemplate<typename XprType, DirectionType Direction> class subvector_stl_reverse_iterator;\ntemplate<typename DecompositionType> struct kernel_retval_base;\ntemplate<typename DecompositionType> struct kernel_retval;\ntemplate<typename DecompositionType> struct image_retval_base;\ntemplate<typename DecompositionType> struct image_retval;\n} // end namespace internal\n\nnamespace internal {\ntemplate<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;\n}\n\nnamespace internal {\ntemplate<typename Lhs, typename Rhs> struct product_type;\n\ntemplate<bool> struct EnableIf;\n\n/** \\internal\n  * \\class product_evaluator\n  * Products need their own evaluator with more template arguments allowing for\n  * easier partial template specializations.\n  */\ntemplate< typename T,\n          int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,\n          typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,\n          typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,\n          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,\n          typename RhsScalar = typename traits<typename T::Rhs>::Scalar\n        > struct product_evaluator;\n}\n\ntemplate<typename Lhs, typename Rhs,\n         int ProductType = internal::product_type<Lhs,Rhs>::value>\nstruct ProductReturnType;\n\n// this is a workaround for sun CC\ntemplate<typename Lhs, typename Rhs> struct LazyProductReturnType;\n\nnamespace internal {\n\n// Provides scalar/packet-wise product and product with accumulation\n// with optional conjugation of the arguments.\ntemplate<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;\n\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_min_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_max_op;\ntemplate<typename Scalar> struct scalar_opposite_op;\ntemplate<typename Scalar> struct scalar_conjugate_op;\ntemplate<typename Scalar> struct scalar_real_op;\ntemplate<typename Scalar> struct scalar_imag_op;\ntemplate<typename Scalar> struct scalar_abs_op;\ntemplate<typename Scalar> struct scalar_abs2_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_absolute_difference_op;\ntemplate<typename Scalar> struct scalar_sqrt_op;\ntemplate<typename Scalar> struct scalar_rsqrt_op;\ntemplate<typename Scalar> struct scalar_exp_op;\ntemplate<typename Scalar> struct scalar_log_op;\ntemplate<typename Scalar> struct scalar_cos_op;\ntemplate<typename Scalar> struct scalar_sin_op;\ntemplate<typename Scalar> struct scalar_acos_op;\ntemplate<typename Scalar> struct scalar_asin_op;\ntemplate<typename Scalar> struct scalar_tan_op;\ntemplate<typename Scalar> struct scalar_inverse_op;\ntemplate<typename Scalar> struct scalar_square_op;\ntemplate<typename Scalar> struct scalar_cube_op;\ntemplate<typename Scalar, typename NewType> struct scalar_cast_op;\ntemplate<typename Scalar> struct scalar_random_op;\ntemplate<typename Scalar> struct scalar_constant_op;\ntemplate<typename Scalar> struct scalar_identity_op;\ntemplate<typename Scalar,bool is_complex, bool is_integer> struct scalar_sign_op;\ntemplate<typename Scalar,typename ScalarExponent> struct scalar_pow_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;\ntemplate<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;\n\n// SpecialFunctions module\ntemplate<typename Scalar> struct scalar_lgamma_op;\ntemplate<typename Scalar> struct scalar_digamma_op;\ntemplate<typename Scalar> struct scalar_erf_op;\ntemplate<typename Scalar> struct scalar_erfc_op;\ntemplate<typename Scalar> struct scalar_ndtri_op;\ntemplate<typename Scalar> struct scalar_igamma_op;\ntemplate<typename Scalar> struct scalar_igammac_op;\ntemplate<typename Scalar> struct scalar_zeta_op;\ntemplate<typename Scalar> struct scalar_betainc_op;\n\n// Bessel functions in SpecialFunctions module\ntemplate<typename Scalar> struct scalar_bessel_i0_op;\ntemplate<typename Scalar> struct scalar_bessel_i0e_op;\ntemplate<typename Scalar> struct scalar_bessel_i1_op;\ntemplate<typename Scalar> struct scalar_bessel_i1e_op;\ntemplate<typename Scalar> struct scalar_bessel_j0_op;\ntemplate<typename Scalar> struct scalar_bessel_y0_op;\ntemplate<typename Scalar> struct scalar_bessel_j1_op;\ntemplate<typename Scalar> struct scalar_bessel_y1_op;\ntemplate<typename Scalar> struct scalar_bessel_k0_op;\ntemplate<typename Scalar> struct scalar_bessel_k0e_op;\ntemplate<typename Scalar> struct scalar_bessel_k1_op;\ntemplate<typename Scalar> struct scalar_bessel_k1e_op;\n\n\n} // end namespace internal\n\nstruct IOFormat;\n\n// Array module\ntemplate<typename _Scalar, int _Rows, int _Cols,\n         int _Options = AutoAlign |\n#if EIGEN_GNUC_AT(3,4)\n    // workaround a bug in at least gcc 3.4.6\n    // the innermost ?: ternary operator is misparsed. We write it slightly\n    // differently and this makes gcc 3.4.6 happy, but it's ugly.\n    // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined\n    // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)\n                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor\n                          : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION\n                          : Eigen::ColMajor ),\n#else\n                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor\n                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor\n                          : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),\n#endif\n         int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;\ntemplate<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;\ntemplate<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;\ntemplate<typename ExpressionType, int Direction> class VectorwiseOp;\ntemplate<typename MatrixType,int RowFactor,int ColFactor> class Replicate;\ntemplate<typename MatrixType, int Direction = BothDirections> class Reverse;\n\ntemplate<typename MatrixType> class FullPivLU;\ntemplate<typename MatrixType> class PartialPivLU;\nnamespace internal {\ntemplate<typename MatrixType> struct inverse_impl;\n}\ntemplate<typename MatrixType> class HouseholderQR;\ntemplate<typename MatrixType> class ColPivHouseholderQR;\ntemplate<typename MatrixType> class FullPivHouseholderQR;\ntemplate<typename MatrixType> class CompleteOrthogonalDecomposition;\ntemplate<typename MatrixType> class SVDBase;\ntemplate<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;\ntemplate<typename MatrixType> class BDCSVD;\ntemplate<typename MatrixType, int UpLo = Lower> class LLT;\ntemplate<typename MatrixType, int UpLo = Lower> class LDLT;\ntemplate<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;\ntemplate<typename Scalar>     class JacobiRotation;\n\n// Geometry module:\ntemplate<typename Derived, int _Dim> class RotationBase;\ntemplate<typename Lhs, typename Rhs> class Cross;\ntemplate<typename Derived> class QuaternionBase;\ntemplate<typename Scalar> class Rotation2D;\ntemplate<typename Scalar> class AngleAxis;\ntemplate<typename Scalar,int Dim> class Translation;\ntemplate<typename Scalar,int Dim> class AlignedBox;\ntemplate<typename Scalar, int Options = AutoAlign> class Quaternion;\ntemplate<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;\ntemplate <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;\ntemplate <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;\ntemplate<typename Scalar> class UniformScaling;\ntemplate<typename MatrixType,int Direction> class Homogeneous;\n\n// Sparse module:\ntemplate<typename Derived> class SparseMatrixBase;\n\n// MatrixFunctions module\ntemplate<typename Derived> struct MatrixExponentialReturnValue;\ntemplate<typename Derived> class MatrixFunctionReturnValue;\ntemplate<typename Derived> class MatrixSquareRootReturnValue;\ntemplate<typename Derived> class MatrixLogarithmReturnValue;\ntemplate<typename Derived> class MatrixPowerReturnValue;\ntemplate<typename Derived> class MatrixComplexPowerReturnValue;\n\nnamespace internal {\ntemplate <typename Scalar>\nstruct stem_function\n{\n  typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;\n  typedef ComplexScalar type(ComplexScalar, int);\n};\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_FORWARDDECLARATIONS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/IndexedViewHelper.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_INDEXED_VIEW_HELPER_H\n#define EIGEN_INDEXED_VIEW_HELPER_H\n\nnamespace Eigen {\n\nnamespace internal {\nstruct symbolic_last_tag {};\n}\n\n/** \\var last\n  * \\ingroup Core_Module\n  *\n  * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns\n  * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&).\n  *\n  * This symbolic placeholder supports standard arithmetic operations.\n  *\n  * A typical usage example would be:\n  * \\code\n  * using namespace Eigen;\n  * using Eigen::last;\n  * VectorXd v(n);\n  * v(seq(2,last-2)).setOnes();\n  * \\endcode\n  *\n  * \\sa end\n  */\nstatic const symbolic::SymbolExpr<internal::symbolic_last_tag> last; // PLEASE use Eigen::last   instead of Eigen::placeholders::last\n\n/** \\var lastp1\n  * \\ingroup Core_Module\n  *\n  * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically\n  * reference the last+1 element/row/columns of the underlying vector or matrix once\n  * passed to DenseBase::operator()(const RowIndices&, const ColIndices&).\n  *\n  * This symbolic placeholder supports standard arithmetic operations.\n  * It is essentially an alias to last+fix<1>.\n  *\n  * \\sa last\n  */\n#ifdef EIGEN_PARSED_BY_DOXYGEN\nstatic const auto lastp1 = last+fix<1>;\n#else\n// Using a FixedExpr<1> expression is important here to make sure the compiler\n// can fully optimize the computation starting indices with zero overhead.\nstatic const symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > lastp1(last+fix<1>());\n#endif\n\nnamespace internal {\n\n // Replace symbolic last/end \"keywords\" by their true runtime value\ninline Index eval_expr_given_size(Index x, Index /* size */)   { return x; }\n\ntemplate<int N>\nFixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/)   { return x; }\n\ntemplate<typename Derived>\nIndex eval_expr_given_size(const symbolic::BaseExpr<Derived> &x, Index size)\n{\n  return x.derived().eval(last=size-1);\n}\n\n// Extract increment/step at compile time\ntemplate<typename T, typename EnableIf = void> struct get_compile_time_incr {\n  enum { value = UndefinedIncr };\n};\n\n// Analogue of std::get<0>(x), but tailored for our needs.\ntemplate<typename T>\nEIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT { return x.first(); }\n\n// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice\n// The generic implementation is a no-op\ntemplate<typename T,int XprSize,typename EnableIf=void>\nstruct IndexedViewCompatibleType {\n  typedef T type;\n};\n\ntemplate<typename T,typename Q>\nconst T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; }\n\n//--------------------------------------------------------------------------------\n// Handling of a single Index\n//--------------------------------------------------------------------------------\n\nstruct SingleRange {\n  enum {\n    SizeAtCompileTime = 1\n  };\n  SingleRange(Index val) : m_value(val) {}\n  Index operator[](Index) const { return m_value; }\n  static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; }\n  Index first() const EIGEN_NOEXCEPT { return m_value; }\n  Index m_value;\n};\n\ntemplate<> struct get_compile_time_incr<SingleRange> {\n  enum { value = 1 }; // 1 or 0 ??\n};\n\n// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int) methods)\ntemplate<typename T, int XprSize>\nstruct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> {\n  // Here we could simply use Array, but maybe it's less work for the compiler to use\n  // a simpler wrapper as SingleRange\n  //typedef Eigen::Array<Index,1,1> type;\n  typedef SingleRange type;\n};\n\ntemplate<typename T, int XprSize>\nstruct IndexedViewCompatibleType<T, XprSize, typename enable_if<symbolic::is_symbolic<T>::value>::type> {\n  typedef SingleRange type;\n};\n\n\ntemplate<typename T>\ntypename enable_if<symbolic::is_symbolic<T>::value,SingleRange>::type\nmakeIndexedViewCompatible(const T& id, Index size, SpecializedType) {\n  return eval_expr_given_size(id,size);\n}\n\n//--------------------------------------------------------------------------------\n// Handling of all\n//--------------------------------------------------------------------------------\n\nstruct all_t { all_t() {} };\n\n// Convert a symbolic 'all' into a usable range type\ntemplate<int XprSize>\nstruct AllRange {\n  enum { SizeAtCompileTime = XprSize };\n  AllRange(Index size = XprSize) : m_size(size) {}\n  EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; }\n  EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); }\n  EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; }\n  variable_if_dynamic<Index,XprSize> m_size;\n};\n\ntemplate<int XprSize>\nstruct IndexedViewCompatibleType<all_t,XprSize> {\n  typedef AllRange<XprSize> type;\n};\n\ntemplate<typename XprSizeType>\ninline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) {\n  return AllRange<get_fixed_value<XprSizeType>::value>(size);\n}\n\ntemplate<int Size> struct get_compile_time_incr<AllRange<Size> > {\n  enum { value = 1 };\n};\n\n} // end namespace internal\n\n\n/** \\var all\n  * \\ingroup Core_Module\n  * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns\n  */\nstatic const Eigen::internal::all_t all; // PLEASE use Eigen::all instead of Eigen::placeholders::all\n\n\nnamespace placeholders {\n  typedef symbolic::SymbolExpr<internal::symbolic_last_tag> last_t;\n  typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end_t;\n  typedef Eigen::internal::all_t all_t;\n\n  EIGEN_DEPRECATED static const all_t  all  = Eigen::all;    // PLEASE use Eigen::all    instead of Eigen::placeholders::all\n  EIGEN_DEPRECATED static const last_t last = Eigen::last;   // PLEASE use Eigen::last   instead of Eigen::placeholders::last\n  EIGEN_DEPRECATED static const end_t  end  = Eigen::lastp1; // PLEASE use Eigen::lastp1 instead of Eigen::placeholders::end\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_INDEXED_VIEW_HELPER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/IntegralConstant.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_INTEGRAL_CONSTANT_H\n#define EIGEN_INTEGRAL_CONSTANT_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<int N> class FixedInt;\ntemplate<int N> class VariableAndFixedInt;\n\n/** \\internal\n  * \\class FixedInt\n  *\n  * This class embeds a compile-time integer \\c N.\n  *\n  * It is similar to c++11 std::integral_constant<int,N> but with some additional features\n  * such as:\n  *  - implicit conversion to int\n  *  - arithmetic and some bitwise operators: -, +, *, /, %, &, |\n  *  - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants.\n  *\n  * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to\n  * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does\n  * not create a FixedInt<N> instance but rather a point to function that needs to be \\em cleaned-up\n  * using the generic helper:\n  * \\code\n  * internal::cleanup_index_type<T>::type\n  * internal::cleanup_index_type<T,DynamicKey>::type\n  * \\endcode\n  * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations.\n  * \\c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values.\n  *\n  * For convenience, you can extract the compile-time value \\c N in a generic way using the following helper:\n  * \\code\n  * internal::get_fixed_value<T,DefaultVal>::value\n  * \\endcode\n  * that will give you \\c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \\c DefaultVal if T does not embed any compile-time value (e.g., T==int).\n  *\n  * \\sa fix<N>, class VariableAndFixedInt\n  */\ntemplate<int N> class FixedInt\n{\npublic:\n  static const int value = N;\n  EIGEN_CONSTEXPR operator int() const { return value; }\n  FixedInt() {}\n  FixedInt( VariableAndFixedInt<N> other) {\n    #ifndef EIGEN_INTERNAL_DEBUGGING\n    EIGEN_UNUSED_VARIABLE(other);\n    #endif\n    eigen_internal_assert(int(other)==N);\n  }\n\n  FixedInt<-N> operator-() const { return FixedInt<-N>(); }\n  template<int M>\n  FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); }\n  template<int M>\n  FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); }\n  template<int M>\n  FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); }\n  template<int M>\n  FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); }\n  template<int M>\n  FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); }\n  template<int M>\n  FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); }\n  template<int M>\n  FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); }\n\n#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES\n  // Needed in C++14 to allow fix<N>():\n  FixedInt operator() () const { return *this; }\n\n  VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); }\n#else\n  FixedInt ( FixedInt<N> (*)() ) {}\n#endif\n\n#if EIGEN_HAS_CXX11\n  FixedInt(std::integral_constant<int,N>) {}\n#endif\n};\n\n/** \\internal\n  * \\class VariableAndFixedInt\n  *\n  * This class embeds both a compile-time integer \\c N and a runtime integer.\n  * Both values are supposed to be equal unless the compile-time value \\c N has a special\n  * value meaning that the runtime-value should be used. Depending on the context, this special\n  * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for\n  * quantities that can be negative).\n  *\n  * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only\n  * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt.\n  * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert\n  * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index)\n  * using the following generic helper:\n  * \\code\n  * internal::cleanup_index_type<T>::type\n  * internal::cleanup_index_type<T,DynamicKey>::type\n  * \\endcode\n  * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations.\n  * \\c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values.\n  *\n  * For convenience, you can also extract the compile-time value \\c N using the following helper:\n  * \\code\n  * internal::get_fixed_value<T,DefaultVal>::value\n  * \\endcode\n  * that will give you \\c N if T equals VariableAndFixedInt<N>, and \\c DefaultVal if T does not embed any compile-time value (e.g., T==int).\n  *\n  * \\sa fix<N>(int), class FixedInt\n  */\ntemplate<int N> class VariableAndFixedInt\n{\npublic:\n  static const int value = N;\n  operator int() const { return m_value; }\n  VariableAndFixedInt(int val) { m_value = val; }\nprotected:\n  int m_value;\n};\n\ntemplate<typename T, int Default=Dynamic> struct get_fixed_value {\n  static const int value = Default;\n};\n\ntemplate<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> {\n  static const int value = N;\n};\n\n#if !EIGEN_HAS_CXX14\ntemplate<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> {\n  static const int value = N;\n};\n#endif\n\ntemplate<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> {\n  static const int value = N ;\n};\n\ntemplate<typename T, int N, int Default>\nstruct get_fixed_value<variable_if_dynamic<T,N>,Default> {\n  static const int value = N;\n};\n\ntemplate<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; }\n#if !EIGEN_HAS_CXX14\ntemplate<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; }\n#endif\n\n// Cleanup integer/FixedInt/VariableAndFixedInt/etc types:\n\n// By default, no cleanup:\ntemplate<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; };\n\n// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index\ntemplate<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; };\n\n#if !EIGEN_HAS_CXX14\n// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>:\ntemplate<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; };\n#endif\n\n// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value:\ntemplate<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; };\n// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index):\ntemplate<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; };\n\n#if EIGEN_HAS_CXX11\ntemplate<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; };\n#endif\n\n} // end namespace internal\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n\n#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES\ntemplate<int N>\nstatic const internal::FixedInt<N> fix{};\n#else\ntemplate<int N>\ninline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); }\n\n// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload.\n// This way a code like fix<N> can only refer to the previous function.\ntemplate<int N,typename T>\ninline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(internal::convert_index<int>(val)); }\n#endif\n\n#else // EIGEN_PARSED_BY_DOXYGEN\n\n/** \\var fix<N>()\n  * \\ingroup Core_Module\n  *\n  * This \\em identifier permits to construct an object embedding a compile-time integer \\c N.\n  *\n  * \\tparam N the compile-time integer value\n  *\n  * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them:\n  * \\code\n  * seqN(10,fix<4>,fix<-3>)   // <=> [10 7 4 1]\n  * \\endcode\n  *\n  * See also the function fix(int) to pass both a compile-time and runtime value.\n  *\n  * In c++14, it is implemented as:\n  * \\code\n  * template<int N> static const internal::FixedInt<N> fix{};\n  * \\endcode\n  * where internal::FixedInt<N> is an internal template class similar to\n  * <a href=\"http://en.cppreference.com/w/cpp/types/integral_constant\">\\c std::integral_constant </a><tt> <int,N> </tt>\n  * Here, \\c fix<N> is thus an object of type \\c internal::FixedInt<N>.\n  *\n  * In c++98/11, it is implemented as a function:\n  * \\code\n  * template<int N> inline internal::FixedInt<N> fix();\n  * \\endcode\n  * Here internal::FixedInt<N> is thus a pointer to function.\n  *\n  * If for some reason you want a true object in c++98 then you can write: \\code fix<N>() \\endcode which is also valid in c++14.\n  *\n  * \\sa fix<N>(int), seq, seqN\n  */\ntemplate<int N>\nstatic const auto fix();\n\n/** \\fn fix<N>(int)\n  * \\ingroup Core_Module\n  *\n  * This function returns an object embedding both a compile-time integer \\c N, and a fallback runtime value \\a val.\n  *\n  * \\tparam N the compile-time integer value\n  * \\param  val the fallback runtime integer value\n  *\n  * This function is a more general version of the \\ref fix identifier/function that can be used in template code\n  * where the compile-time value could turn out to actually mean \"undefined at compile-time\". For positive integers\n  * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers\n  * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \\a val\n  * will be used as a fallback.\n  *\n  * A typical use case would be:\n  * \\code\n  * template<typename Derived> void foo(const MatrixBase<Derived> &mat) {\n  *   const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2;\n  *   const int n = mat.rows()/2;\n  *   ... mat( seqN(0,fix<N>(n) ) ...;\n  * }\n  * \\endcode\n  * In this example, the function Eigen::seqN knows that the second argument is expected to be a size.\n  * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \\c n.\n  * Otherwise, the runtime-value \\c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>.\n  *\n  * \\sa fix, seqN, class ArithmeticSequence\n  */\ntemplate<int N>\nstatic const auto fix(int val);\n\n#endif // EIGEN_PARSED_BY_DOXYGEN\n\n} // end namespace Eigen\n\n#endif // EIGEN_INTEGRAL_CONSTANT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/MKL_support.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to Intel(R) MKL\n *   Include file with common MKL declarations\n ********************************************************************************\n*/\n\n#ifndef EIGEN_MKL_SUPPORT_H\n#define EIGEN_MKL_SUPPORT_H\n\n#ifdef EIGEN_USE_MKL_ALL\n  #ifndef EIGEN_USE_BLAS\n    #define EIGEN_USE_BLAS\n  #endif\n  #ifndef EIGEN_USE_LAPACKE\n    #define EIGEN_USE_LAPACKE\n  #endif\n  #ifndef EIGEN_USE_MKL_VML\n    #define EIGEN_USE_MKL_VML\n  #endif\n#endif\n\n#ifdef EIGEN_USE_LAPACKE_STRICT\n  #define EIGEN_USE_LAPACKE\n#endif\n\n#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)\n  #define EIGEN_USE_MKL\n#endif\n\n\n#if defined EIGEN_USE_MKL\n#   if (!defined MKL_DIRECT_CALL) && (!defined EIGEN_MKL_NO_DIRECT_CALL)\n#       define MKL_DIRECT_CALL\n#       define MKL_DIRECT_CALL_JUST_SET\n#   endif\n#   include <mkl.h>\n/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/\n#   ifndef INTEL_MKL_VERSION\n#       undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */\n#   elif INTEL_MKL_VERSION < 100305    /* the intel-mkl-103-release-notes say this was when the lapacke.h interface was added*/\n#       undef EIGEN_USE_MKL\n#   endif\n#   ifndef EIGEN_USE_MKL\n    /*If the MKL version is too old, undef everything*/\n#       undef   EIGEN_USE_MKL_ALL\n#       undef   EIGEN_USE_LAPACKE\n#       undef   EIGEN_USE_MKL_VML\n#       undef   EIGEN_USE_LAPACKE_STRICT\n#       undef   EIGEN_USE_LAPACKE\n#       ifdef   MKL_DIRECT_CALL_JUST_SET\n#           undef MKL_DIRECT_CALL\n#       endif\n#   endif\n#endif\n\n#if defined EIGEN_USE_MKL\n\n#define EIGEN_MKL_VML_THRESHOLD 128\n\n/* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */\n/* MKL_BLAS, etc are not defined in 11.2 */\n#ifdef MKL_DOMAIN_ALL\n#define EIGEN_MKL_DOMAIN_ALL MKL_DOMAIN_ALL\n#else\n#define EIGEN_MKL_DOMAIN_ALL MKL_ALL\n#endif\n\n#ifdef MKL_DOMAIN_BLAS\n#define EIGEN_MKL_DOMAIN_BLAS MKL_DOMAIN_BLAS\n#else\n#define EIGEN_MKL_DOMAIN_BLAS MKL_BLAS\n#endif\n\n#ifdef MKL_DOMAIN_FFT\n#define EIGEN_MKL_DOMAIN_FFT MKL_DOMAIN_FFT\n#else\n#define EIGEN_MKL_DOMAIN_FFT MKL_FFT\n#endif\n\n#ifdef MKL_DOMAIN_VML\n#define EIGEN_MKL_DOMAIN_VML MKL_DOMAIN_VML\n#else\n#define EIGEN_MKL_DOMAIN_VML MKL_VML\n#endif\n\n#ifdef MKL_DOMAIN_PARDISO\n#define EIGEN_MKL_DOMAIN_PARDISO MKL_DOMAIN_PARDISO\n#else\n#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO\n#endif\n#endif\n\n#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)\n#include \"../../misc/blas.h\"\n#endif\n\nnamespace Eigen {\n\ntypedef std::complex<double> dcomplex;\ntypedef std::complex<float>  scomplex;\n\n#if defined(EIGEN_USE_MKL)\ntypedef MKL_INT BlasIndex;\n#else\ntypedef int BlasIndex;\n#endif\n\n} // end namespace Eigen\n\n\n#endif // EIGEN_MKL_SUPPORT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/Macros.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MACROS_H\n#define EIGEN_MACROS_H\n\n//------------------------------------------------------------------------------------------\n// Eigen version and basic defaults\n//------------------------------------------------------------------------------------------\n\n#define EIGEN_WORLD_VERSION 3\n#define EIGEN_MAJOR_VERSION 4\n#define EIGEN_MINOR_VERSION 0\n\n#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \\\n                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \\\n                                                                 EIGEN_MINOR_VERSION>=z))))\n\n#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR\n#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor\n#else\n#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor\n#endif\n\n#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE\n#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t\n#endif\n\n// Upperbound on the C++ version to use.\n// Expected values are 03, 11, 14, 17, etc.\n// By default, let's use an arbitrarily large C++ version.\n#ifndef EIGEN_MAX_CPP_VER\n#define EIGEN_MAX_CPP_VER 99\n#endif\n\n/** Allows to disable some optimizations which might affect the accuracy of the result.\n  * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.\n  * They currently include:\n  *   - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization.\n  */\n#ifndef EIGEN_FAST_MATH\n#define EIGEN_FAST_MATH 1\n#endif\n\n#ifndef EIGEN_STACK_ALLOCATION_LIMIT\n// 131072 == 128 KB\n#define EIGEN_STACK_ALLOCATION_LIMIT 131072\n#endif\n\n//------------------------------------------------------------------------------------------\n// Compiler identification, EIGEN_COMP_*\n//------------------------------------------------------------------------------------------\n\n/// \\internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC\n#ifdef __GNUC__\n  #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__)\n#else\n  #define EIGEN_COMP_GNUC 0\n#endif\n\n/// \\internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang\n#if defined(__clang__)\n  #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)\n#else\n  #define EIGEN_COMP_CLANG 0\n#endif\n\n/// \\internal EIGEN_COMP_CASTXML set to 1 if being preprocessed by CastXML\n#if defined(__castxml__)\n  #define EIGEN_COMP_CASTXML 1\n#else\n  #define EIGEN_COMP_CASTXML 0\n#endif\n\n/// \\internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm\n#if defined(__llvm__)\n  #define EIGEN_COMP_LLVM 1\n#else\n  #define EIGEN_COMP_LLVM 0\n#endif\n\n/// \\internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise\n#if defined(__INTEL_COMPILER)\n  #define EIGEN_COMP_ICC __INTEL_COMPILER\n#else\n  #define EIGEN_COMP_ICC 0\n#endif\n\n/// \\internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw\n#if defined(__MINGW32__)\n  #define EIGEN_COMP_MINGW 1\n#else\n  #define EIGEN_COMP_MINGW 0\n#endif\n\n/// \\internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio\n#if defined(__SUNPRO_CC)\n  #define EIGEN_COMP_SUNCC 1\n#else\n  #define EIGEN_COMP_SUNCC 0\n#endif\n\n/// \\internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise.\n#if defined(_MSC_VER)\n  #define EIGEN_COMP_MSVC _MSC_VER\n#else\n  #define EIGEN_COMP_MSVC 0\n#endif\n\n#if defined(__NVCC__)\n#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)\n  #define EIGEN_COMP_NVCC  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))\n#elif defined(__CUDACC_VER__)\n  #define EIGEN_COMP_NVCC __CUDACC_VER__\n#else\n  #error \"NVCC did not define compiler version.\"\n#endif\n#else\n  #define EIGEN_COMP_NVCC 0\n#endif\n\n// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:\n//  name        ver   MSC_VER\n//  2008         9      1500\n//  2010        10      1600\n//  2012        11      1700\n//  2013        12      1800\n//  2015        14      1900\n//  \"15\"        15      1900\n//  2017-14.1   15.0    1910\n//  2017-14.11  15.3    1911\n//  2017-14.12  15.5    1912\n//  2017-14.13  15.6    1913\n//  2017-14.14  15.7    1914\n\n/// \\internal EIGEN_COMP_MSVC_LANG set to _MSVC_LANG if the compiler is Microsoft Visual C++, 0 otherwise.\n#if defined(_MSVC_LANG)\n  #define EIGEN_COMP_MSVC_LANG _MSVC_LANG\n#else\n  #define EIGEN_COMP_MSVC_LANG 0\n#endif\n\n// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC_LANG:\n// MSVC option                          Standard  MSVC_LANG\n// /std:c++14 (default as of VS 2019)   C++14     201402L\n// /std:c++17                           C++17     201703L\n// /std:c++latest                       >C++17    >201703L\n\n/// \\internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl\n#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)\n  #define EIGEN_COMP_MSVC_STRICT _MSC_VER\n#else\n  #define EIGEN_COMP_MSVC_STRICT 0\n#endif\n\n/// \\internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++\n// XLC   version\n// 3.1   0x0301\n// 4.5   0x0405\n// 5.0   0x0500\n// 12.1  0x0C01\n#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__)\n  #define EIGEN_COMP_IBM __xlC__\n#else\n  #define EIGEN_COMP_IBM 0\n#endif\n\n/// \\internal EIGEN_COMP_PGI set to PGI version if the compiler is Portland Group Compiler\n#if defined(__PGI)\n  #define EIGEN_COMP_PGI (__PGIC__*100+__PGIC_MINOR__)\n#else\n  #define EIGEN_COMP_PGI 0\n#endif\n\n/// \\internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler\n#if defined(__CC_ARM) || defined(__ARMCC_VERSION)\n  #define EIGEN_COMP_ARM 1\n#else\n  #define EIGEN_COMP_ARM 0\n#endif\n\n/// \\internal EIGEN_COMP_EMSCRIPTEN set to 1 if the compiler is Emscripten Compiler\n#if defined(__EMSCRIPTEN__)\n  #define EIGEN_COMP_EMSCRIPTEN 1\n#else\n  #define EIGEN_COMP_EMSCRIPTEN 0\n#endif\n\n\n/// \\internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)\n#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)\n  #define EIGEN_COMP_GNUC_STRICT 1\n#else\n  #define EIGEN_COMP_GNUC_STRICT 0\n#endif\n\n\n#if EIGEN_COMP_GNUC\n  #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)\n  #define EIGEN_GNUC_AT_MOST(x,y)  ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)\n  #define EIGEN_GNUC_AT(x,y)       ( __GNUC__==x && __GNUC_MINOR__==y )\n#else\n  #define EIGEN_GNUC_AT_LEAST(x,y) 0\n  #define EIGEN_GNUC_AT_MOST(x,y)  0\n  #define EIGEN_GNUC_AT(x,y)       0\n#endif\n\n// FIXME: could probably be removed as we do not support gcc 3.x anymore\n#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)\n#define EIGEN_GCC3_OR_OLDER 1\n#else\n#define EIGEN_GCC3_OR_OLDER 0\n#endif\n\n\n\n//------------------------------------------------------------------------------------------\n// Architecture identification, EIGEN_ARCH_*\n//------------------------------------------------------------------------------------------\n\n\n#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(__amd64)\n  #define EIGEN_ARCH_x86_64 1\n#else\n  #define EIGEN_ARCH_x86_64 0\n#endif\n\n#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)\n  #define EIGEN_ARCH_i386 1\n#else\n  #define EIGEN_ARCH_i386 0\n#endif\n\n#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386\n  #define EIGEN_ARCH_i386_OR_x86_64 1\n#else\n  #define EIGEN_ARCH_i386_OR_x86_64 0\n#endif\n\n/// \\internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM\n#if defined(__arm__)\n  #define EIGEN_ARCH_ARM 1\n#else\n  #define EIGEN_ARCH_ARM 0\n#endif\n\n/// \\internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64\n#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)\n  #define EIGEN_ARCH_ARM64 1\n#else\n  #define EIGEN_ARCH_ARM64 0\n#endif\n\n/// \\internal EIGEN_ARCH_ARM_OR_ARM64 set to 1 if the architecture is ARM or ARM64\n#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64\n  #define EIGEN_ARCH_ARM_OR_ARM64 1\n#else\n  #define EIGEN_ARCH_ARM_OR_ARM64 0\n#endif\n\n/// \\internal EIGEN_ARCH_ARMV8 set to 1 if the architecture is armv8 or greater.\n#if EIGEN_ARCH_ARM_OR_ARM64 && defined(__ARM_ARCH) && __ARM_ARCH >= 8\n#define EIGEN_ARCH_ARMV8 1\n#else\n#define EIGEN_ARCH_ARMV8 0\n#endif\n\n\n/// \\internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE\n/// compliant Arm fp16 type\n#if EIGEN_ARCH_ARM64\n  #ifndef EIGEN_HAS_ARM64_FP16\n    #if defined(__ARM_FP16_FORMAT_IEEE)\n      #define EIGEN_HAS_ARM64_FP16 1\n    #else\n      #define EIGEN_HAS_ARM64_FP16 0\n    #endif\n  #endif\n#endif\n\n/// \\internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture\n/// supports Neon vector intrinsics for fp16.\n#if EIGEN_ARCH_ARM64\n  #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC\n    #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n      #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1\n    #else\n      #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0\n    #endif\n  #endif\n#endif\n\n/// \\internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture\n/// supports Neon scalar intrinsics for fp16.\n#if EIGEN_ARCH_ARM64\n  #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC\n    #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)\n      #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1\n    #endif\n  #endif\n#endif\n\n/// \\internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS\n#if defined(__mips__) || defined(__mips)\n  #define EIGEN_ARCH_MIPS 1\n#else\n  #define EIGEN_ARCH_MIPS 0\n#endif\n\n/// \\internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC\n#if defined(__sparc__) || defined(__sparc)\n  #define EIGEN_ARCH_SPARC 1\n#else\n  #define EIGEN_ARCH_SPARC 0\n#endif\n\n/// \\internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium\n#if defined(__ia64__)\n  #define EIGEN_ARCH_IA64 1\n#else\n  #define EIGEN_ARCH_IA64 0\n#endif\n\n/// \\internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC\n#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)\n  #define EIGEN_ARCH_PPC 1\n#else\n  #define EIGEN_ARCH_PPC 0\n#endif\n\n\n\n//------------------------------------------------------------------------------------------\n// Operating system identification, EIGEN_OS_*\n//------------------------------------------------------------------------------------------\n\n/// \\internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant\n#if defined(__unix__) || defined(__unix)\n  #define EIGEN_OS_UNIX 1\n#else\n  #define EIGEN_OS_UNIX 0\n#endif\n\n/// \\internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel\n#if defined(__linux__)\n  #define EIGEN_OS_LINUX 1\n#else\n  #define EIGEN_OS_LINUX 0\n#endif\n\n/// \\internal EIGEN_OS_ANDROID set to 1 if the OS is Android\n// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.\n#if defined(__ANDROID__) || defined(ANDROID)\n  #define EIGEN_OS_ANDROID 1\n#else\n  #define EIGEN_OS_ANDROID 0\n#endif\n\n/// \\internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android)\n#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)\n  #define EIGEN_OS_GNULINUX 1\n#else\n  #define EIGEN_OS_GNULINUX 0\n#endif\n\n/// \\internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant\n#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)\n  #define EIGEN_OS_BSD 1\n#else\n  #define EIGEN_OS_BSD 0\n#endif\n\n/// \\internal EIGEN_OS_MAC set to 1 if the OS is MacOS\n#if defined(__APPLE__)\n  #define EIGEN_OS_MAC 1\n#else\n  #define EIGEN_OS_MAC 0\n#endif\n\n/// \\internal EIGEN_OS_QNX set to 1 if the OS is QNX\n#if defined(__QNX__)\n  #define EIGEN_OS_QNX 1\n#else\n  #define EIGEN_OS_QNX 0\n#endif\n\n/// \\internal EIGEN_OS_WIN set to 1 if the OS is Windows based\n#if defined(_WIN32)\n  #define EIGEN_OS_WIN 1\n#else\n  #define EIGEN_OS_WIN 0\n#endif\n\n/// \\internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits\n#if defined(_WIN64)\n  #define EIGEN_OS_WIN64 1\n#else\n  #define EIGEN_OS_WIN64 0\n#endif\n\n/// \\internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE\n#if defined(_WIN32_WCE)\n  #define EIGEN_OS_WINCE 1\n#else\n  #define EIGEN_OS_WINCE 0\n#endif\n\n/// \\internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin\n#if defined(__CYGWIN__)\n  #define EIGEN_OS_CYGWIN 1\n#else\n  #define EIGEN_OS_CYGWIN 0\n#endif\n\n/// \\internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants\n#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )\n  #define EIGEN_OS_WIN_STRICT 1\n#else\n  #define EIGEN_OS_WIN_STRICT 0\n#endif\n\n/// \\internal EIGEN_OS_SUN set to __SUNPRO_C if the OS is SUN\n// compiler  solaris   __SUNPRO_C\n// version   studio\n// 5.7       10        0x570\n// 5.8       11        0x580\n// 5.9       12        0x590\n// 5.10\t     12.1      0x5100\n// 5.11\t     12.2      0x5110\n// 5.12\t     12.3      0x5120\n#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))\n  #define EIGEN_OS_SUN __SUNPRO_C\n#else\n  #define EIGEN_OS_SUN 0\n#endif\n\n/// \\internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris\n#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))\n  #define EIGEN_OS_SOLARIS 1\n#else\n  #define EIGEN_OS_SOLARIS 0\n#endif\n\n\n//------------------------------------------------------------------------------------------\n// Detect GPU compilers and architectures\n//------------------------------------------------------------------------------------------\n\n// NVCC is not supported as the target platform for HIPCC\n// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive\n#if defined(__NVCC__) && defined(__HIPCC__)\n  #error \"NVCC as the target platform for HIPCC is currently not supported.\"\n#endif\n\n#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)\n  // Means the compiler is either nvcc or clang with CUDA enabled\n  #define EIGEN_CUDACC __CUDACC__\n#endif\n\n#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)\n  // Means we are generating code for the device\n  #define EIGEN_CUDA_ARCH __CUDA_ARCH__\n#endif\n\n#if defined(EIGEN_CUDACC)\n#include <cuda.h>\n  #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)\n#else\n  #define EIGEN_CUDA_SDK_VER 0\n#endif\n\n#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP)\n  // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP)\n  #define EIGEN_HIPCC __HIPCC__\n\n  // We need to include hip_runtime.h here because it pulls in\n  // ++ hip_common.h which contains the define for  __HIP_DEVICE_COMPILE__\n  // ++ host_defines.h which contains the defines for the __host__ and __device__ macros\n  #include <hip/hip_runtime.h>\n\n  #if defined(__HIP_DEVICE_COMPILE__)\n    // analogous to EIGEN_CUDA_ARCH, but for HIP\n    #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__\n  #endif\n\n  // For HIP (ROCm 3.5 and higher), we need to explicitly set the launch_bounds attribute\n  // value to 1024. The compiler assigns a default value of 256 when the attribute is not\n  // specified. This results in failures on the HIP platform, for cases when a GPU kernel\n  // without an explicit launch_bounds attribute is called with a threads_per_block value\n  // greater than 256.\n  //\n  // This is a regression in functioanlity and is expected to be fixed within the next\n  // couple of ROCm releases (compiler will go back to using 1024 value as the default)\n  //\n  // In the meantime, we will use a \"only enabled for HIP\" macro to set the launch_bounds\n  // attribute.\n\n  #define EIGEN_HIP_LAUNCH_BOUNDS_1024 __launch_bounds__(1024)\n\n#endif\n\n#if !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024)\n#define EIGEN_HIP_LAUNCH_BOUNDS_1024\n#endif // !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024)\n\n// Unify CUDA/HIPCC\n\n#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)\n//\n// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC\n//\n#define EIGEN_GPUCC\n//\n// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels\n// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels\n//\n// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels.\n// For those cases, the corresponding code should be guarded with\n//      #if defined(EIGEN_GPUCC)\n// instead of\n//      #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)\n//\n// For cases where the tweak is specific to HIP, the code should be guarded with\n//      #if defined(EIGEN_HIPCC)\n//\n// For cases where the tweak is specific to CUDA, the code should be guarded with\n//      #if defined(EIGEN_CUDACC)\n//\n#endif\n\n#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)\n//\n// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE\n//\n#define EIGEN_GPU_COMPILE_PHASE\n//\n// GPU compilers (HIPCC, NVCC) typically do two passes over the source code,\n//   + one to compile the source for the \"host\" (ie CPU)\n//   + another to compile the source for the \"device\" (ie. GPU)\n//\n// Code that needs to enabled only during the either the \"host\" or \"device\" compilation phase\n// needs to be guarded with a macro that indicates the current compilation phase\n//\n// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP\n// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA\n//\n// In most cases, the \"host\" / \"device\" specific code is the same for both HIP and CUDA\n// For those cases, the code should be guarded with\n//       #if defined(EIGEN_GPU_COMPILE_PHASE)\n// instead of\n//       #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)\n//\n// For cases where the tweak is specific to HIP, the code should be guarded with\n//      #if defined(EIGEN_HIP_DEVICE_COMPILE)\n//\n// For cases where the tweak is specific to CUDA, the code should be guarded with\n//      #if defined(EIGEN_CUDA_ARCH)\n//\n#endif\n\n#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)\n// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro.\n// In most cases we want to check if both macros are defined which can be done using the define below.\n#define SYCL_DEVICE_ONLY\n#endif\n\n//------------------------------------------------------------------------------------------\n// Detect Compiler/Architecture/OS specific features\n//------------------------------------------------------------------------------------------\n\n#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG\n  // see bug 89\n  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0\n#else\n  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1\n#endif\n\n// Cross compiler wrapper around LLVM's __has_builtin\n#ifdef __has_builtin\n#  define EIGEN_HAS_BUILTIN(x) __has_builtin(x)\n#else\n#  define EIGEN_HAS_BUILTIN(x) 0\n#endif\n\n// A Clang feature extension to determine compiler features.\n// We use it to determine 'cxx_rvalue_references'\n#ifndef __has_feature\n# define __has_feature(x) 0\n#endif\n\n// Some old compilers do not support template specializations like:\n// template<typename T,int N> void foo(const T x[N]);\n#if !(   EIGEN_COMP_CLANG && (   (EIGEN_COMP_CLANG<309)                                                       \\\n                              || (defined(__apple_build_version__) && (__apple_build_version__ < 9000000)))  \\\n      || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49)\n#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1\n#else\n#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0\n#endif\n\n// The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that\n// works for both platforms, indicating the C++ standard version number.\n//\n// With MSVC, without defining /Zc:__cplusplus, the __cplusplus macro will\n// report 199711L regardless of the language standard specified via /std.\n// We need to rely on _MSVC_LANG instead, which is only available after\n// VS2015.3.\n#if EIGEN_COMP_MSVC_LANG > 0\n#define EIGEN_CPLUSPLUS EIGEN_COMP_MSVC_LANG\n#elif EIGEN_COMP_MSVC >= 1900\n#define EIGEN_CPLUSPLUS 201103L\n#elif defined(__cplusplus)\n#define EIGEN_CPLUSPLUS __cplusplus\n#else\n#define EIGEN_CPLUSPLUS 0\n#endif\n\n// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler.\n// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER\n// is defined to 17.\n#if EIGEN_CPLUSPLUS > 201703L\n  #define EIGEN_COMP_CXXVER 20\n#elif EIGEN_CPLUSPLUS > 201402L\n  #define EIGEN_COMP_CXXVER 17\n#elif EIGEN_CPLUSPLUS > 201103L\n  #define EIGEN_COMP_CXXVER 14\n#elif EIGEN_CPLUSPLUS >= 201103L\n  #define EIGEN_COMP_CXXVER 11\n#else\n  #define EIGEN_COMP_CXXVER 03\n#endif\n\n#ifndef EIGEN_HAS_CXX14_VARIABLE_TEMPLATES\n  #if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304 && EIGEN_MAX_CPP_VER>=14\n    #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 1\n  #else\n    #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 0\n  #endif\n#endif\n\n\n// The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features\n// but in practice we should not rely on them but rather on the availabilty of\n// individual features as defined later.\n// This is why there is no EIGEN_HAS_CXX17.\n// FIXME: get rid of EIGEN_HAS_CXX14 and maybe even EIGEN_HAS_CXX11.\n#if EIGEN_MAX_CPP_VER>=11 && EIGEN_COMP_CXXVER>=11\n#define EIGEN_HAS_CXX11 1\n#else\n#define EIGEN_HAS_CXX11 0\n#endif\n\n#if EIGEN_MAX_CPP_VER>=14 && EIGEN_COMP_CXXVER>=14\n#define EIGEN_HAS_CXX14 1\n#else\n#define EIGEN_HAS_CXX14 0\n#endif\n\n// Do we support r-value references?\n#ifndef EIGEN_HAS_RVALUE_REFERENCES\n#if EIGEN_MAX_CPP_VER>=11 && \\\n    (__has_feature(cxx_rvalue_references) || \\\n     (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))\n  #define EIGEN_HAS_RVALUE_REFERENCES 1\n#else\n  #define EIGEN_HAS_RVALUE_REFERENCES 0\n#endif\n#endif\n\n// Does the compiler support C99?\n// Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined\n#include <cmath>\n#ifndef EIGEN_HAS_C99_MATH\n#if EIGEN_MAX_CPP_VER>=11 && \\\n    ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901))       \\\n  || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \\\n  || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \\\n  || (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY))\n  #define EIGEN_HAS_C99_MATH 1\n#else\n  #define EIGEN_HAS_C99_MATH 0\n#endif\n#endif\n\n// Does the compiler support result_of?\n// result_of was deprecated in c++17 and removed in c++ 20\n#ifndef EIGEN_HAS_STD_RESULT_OF\n#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17\n#define EIGEN_HAS_STD_RESULT_OF 1\n#else\n#define EIGEN_HAS_STD_RESULT_OF 0\n#endif\n#endif\n\n// Does the compiler support std::hash?\n#ifndef EIGEN_HAS_STD_HASH\n// The std::hash struct is defined in C++11 but is not labelled as a __device__\n// function and is not constexpr, so cannot be used on device.\n#if EIGEN_HAS_CXX11 && !defined(EIGEN_GPU_COMPILE_PHASE)\n#define EIGEN_HAS_STD_HASH 1\n#else\n#define EIGEN_HAS_STD_HASH 0\n#endif\n#endif  // EIGEN_HAS_STD_HASH\n\n#ifndef EIGEN_HAS_STD_INVOKE_RESULT\n#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17\n#define EIGEN_HAS_STD_INVOKE_RESULT 1\n#else\n#define EIGEN_HAS_STD_INVOKE_RESULT 0\n#endif\n#endif\n\n#ifndef EIGEN_HAS_ALIGNAS\n#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 &&   \\\n      (     __has_feature(cxx_alignas)            \\\n        ||  EIGEN_HAS_CXX14                       \\\n        || (EIGEN_COMP_MSVC >= 1800)              \\\n        || (EIGEN_GNUC_AT_LEAST(4,8))             \\\n        || (EIGEN_COMP_CLANG>=305)                \\\n        || (EIGEN_COMP_ICC>=1500)                 \\\n        || (EIGEN_COMP_PGI>=1500)                 \\\n        || (EIGEN_COMP_SUNCC>=0x5130))\n#define EIGEN_HAS_ALIGNAS 1\n#else\n#define EIGEN_HAS_ALIGNAS 0\n#endif\n#endif\n\n// Does the compiler support type_traits?\n// - full support of type traits was added only to GCC 5.1.0.\n// - 20150626 corresponds to the last release of 4.x libstdc++\n#ifndef EIGEN_HAS_TYPE_TRAITS\n#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \\\n  && ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \\\n  && ((!defined(__GLIBCXX__))   || __GLIBCXX__ > 20150626)\n#define EIGEN_HAS_TYPE_TRAITS 1\n#define EIGEN_INCLUDE_TYPE_TRAITS\n#else\n#define EIGEN_HAS_TYPE_TRAITS 0\n#endif\n#endif\n\n// Does the compiler support variadic templates?\n#ifndef EIGEN_HAS_VARIADIC_TEMPLATES\n#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) \\\n  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) )\n    // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:\n    //    this prevents nvcc from crashing when compiling Eigen on Tegra X1\n#define EIGEN_HAS_VARIADIC_TEMPLATES 1\n#elif  EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) && defined(SYCL_DEVICE_ONLY)\n#define EIGEN_HAS_VARIADIC_TEMPLATES 1\n#else\n#define EIGEN_HAS_VARIADIC_TEMPLATES 0\n#endif\n#endif\n\n// Does the compiler fully support const expressions? (as in c++14)\n#ifndef EIGEN_HAS_CONSTEXPR\n  #if defined(EIGEN_CUDACC)\n  // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above\n    #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))\n      #define EIGEN_HAS_CONSTEXPR 1\n    #endif\n  #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \\\n    (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \\\n    (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11)))\n    #define EIGEN_HAS_CONSTEXPR 1\n  #endif\n\n  #ifndef EIGEN_HAS_CONSTEXPR\n    #define EIGEN_HAS_CONSTEXPR 0\n  #endif\n\n#endif // EIGEN_HAS_CONSTEXPR\n\n#if EIGEN_HAS_CONSTEXPR\n#define EIGEN_CONSTEXPR constexpr\n#else\n#define EIGEN_CONSTEXPR\n#endif\n\n// Does the compiler support C++11 math?\n// Let's be conservative and enable the default C++11 implementation only if we are sure it exists\n#ifndef EIGEN_HAS_CXX11_MATH\n  #if EIGEN_MAX_CPP_VER>=11 && ((EIGEN_COMP_CXXVER > 11) || (EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC)  \\\n      && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))\n    #define EIGEN_HAS_CXX11_MATH 1\n  #else\n    #define EIGEN_HAS_CXX11_MATH 0\n  #endif\n#endif\n\n// Does the compiler support proper C++11 containers?\n#ifndef EIGEN_HAS_CXX11_CONTAINERS\n  #if    EIGEN_MAX_CPP_VER>=11 && \\\n         ((EIGEN_COMP_CXXVER > 11) \\\n      || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))\n    #define EIGEN_HAS_CXX11_CONTAINERS 1\n  #else\n    #define EIGEN_HAS_CXX11_CONTAINERS 0\n  #endif\n#endif\n\n// Does the compiler support C++11 noexcept?\n#ifndef EIGEN_HAS_CXX11_NOEXCEPT\n  #if    EIGEN_MAX_CPP_VER>=11 && \\\n         (__has_feature(cxx_noexcept) \\\n      || (EIGEN_COMP_CXXVER > 11) \\\n      || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))\n    #define EIGEN_HAS_CXX11_NOEXCEPT 1\n  #else\n    #define EIGEN_HAS_CXX11_NOEXCEPT 0\n  #endif\n#endif\n\n#ifndef EIGEN_HAS_CXX11_ATOMIC\n  #if    EIGEN_MAX_CPP_VER>=11 && \\\n         (__has_feature(cxx_atomic) \\\n      || (EIGEN_COMP_CXXVER > 11) \\\n      || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700)))\n    #define EIGEN_HAS_CXX11_ATOMIC 1\n  #else\n    #define EIGEN_HAS_CXX11_ATOMIC 0\n  #endif\n#endif\n\n#ifndef EIGEN_HAS_CXX11_OVERRIDE_FINAL\n  #if    EIGEN_MAX_CPP_VER>=11 && \\\n       (EIGEN_COMP_CXXVER >= 11 || EIGEN_COMP_MSVC >= 1700)\n    #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 1\n  #else\n    #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 0\n  #endif\n#endif\n\n// NOTE: the required Apple's clang version is very conservative\n//       and it could be that XCode 9 works just fine.\n// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support\n//       and not tested.\n#ifndef EIGEN_HAS_CXX17_OVERALIGN\n#if EIGEN_MAX_CPP_VER>=17 && EIGEN_COMP_CXXVER>=17 && (                                 \\\n           (EIGEN_COMP_MSVC >= 1912)                                                    \\\n        || (EIGEN_GNUC_AT_LEAST(7,0))                                                   \\\n        || ((!defined(__apple_build_version__)) && (EIGEN_COMP_CLANG>=500))             \\\n        || (( defined(__apple_build_version__)) && (__apple_build_version__>=10000000)) \\\n      )\n#define EIGEN_HAS_CXX17_OVERALIGN 1\n#else\n#define EIGEN_HAS_CXX17_OVERALIGN 0\n#endif\n#endif\n\n#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR\n  // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules\n  #if defined(__NVCC__)\n    // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr\n    #ifdef __CUDACC_RELAXED_CONSTEXPR__\n      #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC\n    #endif\n  #elif defined(__clang__) && defined(__CUDA__) && __has_feature(cxx_relaxed_constexpr)\n    // clang++ always considers constexpr functions as implicitly __host__ __device__\n    #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC\n  #endif\n#endif\n\n// Does the compiler support the __int128 and __uint128_t extensions for 128-bit\n// integer arithmetic?\n//\n// Clang and GCC define __SIZEOF_INT128__ when these extensions are supported,\n// but we avoid using them in certain cases:\n//\n// * Building using Clang for Windows, where the Clang runtime library has\n//   128-bit support only on LP64 architectures, but Windows is LLP64.\n#ifndef EIGEN_HAS_BUILTIN_INT128\n#if defined(__SIZEOF_INT128__) && !(EIGEN_OS_WIN && EIGEN_COMP_CLANG)\n#define EIGEN_HAS_BUILTIN_INT128 1\n#else\n#define EIGEN_HAS_BUILTIN_INT128 0\n#endif\n#endif\n\n//------------------------------------------------------------------------------------------\n// Preprocessor programming helpers\n//------------------------------------------------------------------------------------------\n\n// This macro can be used to prevent from macro expansion, e.g.:\n//   std::max EIGEN_NOT_A_MACRO(a,b)\n#define EIGEN_NOT_A_MACRO\n\n#define EIGEN_DEBUG_VAR(x) std::cerr << #x << \" = \" << x << std::endl;\n\n// concatenate two tokens\n#define EIGEN_CAT2(a,b) a ## b\n#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)\n\n#define EIGEN_COMMA ,\n\n// convert a token to a string\n#define EIGEN_MAKESTRING2(a) #a\n#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)\n\n// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,\n// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline\n// but GCC is still doing fine with just inline.\n#ifndef EIGEN_STRONG_INLINE\n#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC)\n#define EIGEN_STRONG_INLINE __forceinline\n#else\n#define EIGEN_STRONG_INLINE inline\n#endif\n#endif\n\n// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible\n// attribute to maximize inlining. This should only be used when really necessary: in particular,\n// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.\n// FIXME with the always_inline attribute,\n// gcc 3.4.x and 4.1 reports the following compilation error:\n//   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'\n//    : function body not available\n//   See also bug 1367\n#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY)\n#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline\n#else\n#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE\n#endif\n\n#if EIGEN_COMP_GNUC\n#define EIGEN_DONT_INLINE __attribute__((noinline))\n#elif EIGEN_COMP_MSVC\n#define EIGEN_DONT_INLINE __declspec(noinline)\n#else\n#define EIGEN_DONT_INLINE\n#endif\n\n#if EIGEN_COMP_GNUC\n#define EIGEN_PERMISSIVE_EXPR __extension__\n#else\n#define EIGEN_PERMISSIVE_EXPR\n#endif\n\n// GPU stuff\n\n// Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC)\n#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC)\n  // Do not try asserts on device code\n  #ifndef EIGEN_NO_DEBUG\n  #define EIGEN_NO_DEBUG\n  #endif\n\n  #ifdef EIGEN_INTERNAL_DEBUGGING\n  #undef EIGEN_INTERNAL_DEBUGGING\n  #endif\n\n  #ifdef EIGEN_EXCEPTIONS\n  #undef EIGEN_EXCEPTIONS\n  #endif\n#endif\n\n#if defined(SYCL_DEVICE_ONLY)\n  #ifndef EIGEN_DONT_VECTORIZE\n    #define EIGEN_DONT_VECTORIZE\n  #endif\n  #define EIGEN_DEVICE_FUNC __attribute__((flatten)) __attribute__((always_inline))\n// All functions callable from CUDA/HIP code must be qualified with __device__\n#elif defined(EIGEN_GPUCC)\n    #define EIGEN_DEVICE_FUNC __host__ __device__\n#else\n  #define EIGEN_DEVICE_FUNC\n#endif\n\n\n// this macro allows to get rid of linking errors about multiply defined functions.\n//  - static is not very good because it prevents definitions from different object files to be merged.\n//           So static causes the resulting linked executable to be bloated with multiple copies of the same function.\n//  - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.\n#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC\n#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline\n\n#ifdef NDEBUG\n# ifndef EIGEN_NO_DEBUG\n#  define EIGEN_NO_DEBUG\n# endif\n#endif\n\n// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89\n#ifdef EIGEN_NO_DEBUG\n  #ifdef SYCL_DEVICE_ONLY // used to silence the warning on SYCL device\n    #define eigen_plain_assert(x) EIGEN_UNUSED_VARIABLE(x)\n  #else\n    #define eigen_plain_assert(x)\n  #endif\n#else\n  #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO\n    namespace Eigen {\n    namespace internal {\n    inline bool copy_bool(bool b) { return b; }\n    }\n    }\n    #define eigen_plain_assert(x) assert(x)\n  #else\n    // work around bug 89\n    #include <cstdlib>   // for abort\n    #include <iostream>  // for std::cerr\n\n    namespace Eigen {\n    namespace internal {\n    // trivial function copying a bool. Must be EIGEN_DONT_INLINE, so we implement it after including Eigen headers.\n    // see bug 89.\n    namespace {\n    EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }\n    }\n    inline void assert_fail(const char *condition, const char *function, const char *file, int line)\n    {\n      std::cerr << \"assertion failed: \" << condition << \" in function \" << function << \" at \" << file << \":\" << line << std::endl;\n      abort();\n    }\n    }\n    }\n    #define eigen_plain_assert(x) \\\n      do { \\\n        if(!Eigen::internal::copy_bool(x)) \\\n          Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \\\n      } while(false)\n  #endif\n#endif\n\n// eigen_assert can be overridden\n#ifndef eigen_assert\n#define eigen_assert(x) eigen_plain_assert(x)\n#endif\n\n#ifdef EIGEN_INTERNAL_DEBUGGING\n#define eigen_internal_assert(x) eigen_assert(x)\n#else\n#define eigen_internal_assert(x)\n#endif\n\n#ifdef EIGEN_NO_DEBUG\n#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)\n#else\n#define EIGEN_ONLY_USED_FOR_DEBUG(x)\n#endif\n\n#ifndef EIGEN_NO_DEPRECATED_WARNING\n  #if EIGEN_COMP_GNUC\n    #define EIGEN_DEPRECATED __attribute__((deprecated))\n  #elif EIGEN_COMP_MSVC\n    #define EIGEN_DEPRECATED __declspec(deprecated)\n  #else\n    #define EIGEN_DEPRECATED\n  #endif\n#else\n  #define EIGEN_DEPRECATED\n#endif\n\n#if EIGEN_COMP_GNUC\n#define EIGEN_UNUSED __attribute__((unused))\n#else\n#define EIGEN_UNUSED\n#endif\n\n// Suppresses 'unused variable' warnings.\nnamespace Eigen {\n  namespace internal {\n    template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ignore_unused_variable(const T&) {}\n  }\n}\n#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);\n\n#if !defined(EIGEN_ASM_COMMENT)\n  #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)\n    #define EIGEN_ASM_COMMENT(X)  __asm__(\"#\" X)\n  #else\n    #define EIGEN_ASM_COMMENT(X)\n  #endif\n#endif\n\n\n// Acts as a barrier preventing operations involving `X` from crossing. This\n// occurs, for example, in the fast rounding trick where a magic constant is\n// added then subtracted, which is otherwise compiled away with -ffast-math.\n//\n// See bug 1674\n#if !defined(EIGEN_OPTIMIZATION_BARRIER)\n  #if EIGEN_COMP_GNUC\n    // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html:\n    //   X: Any operand whatsoever.\n    //   r: A register operand is allowed provided that it is in a general\n    //      register.\n    //   g: Any register, memory or immediate integer operand is allowed, except\n    //      for registers that are not general registers.\n    //   w: (AArch32/AArch64) Floating point register, Advanced SIMD vector\n    //      register or SVE vector register.\n    //   x: (SSE) Any SSE register.\n    //      (AArch64) Like w, but restricted to registers 0 to 15 inclusive.\n    //   v: (PowerPC) An Altivec vector register.\n    //   wa:(PowerPC) A VSX register.\n    //\n    // \"X\" (uppercase) should work for all cases, though this seems to fail for\n    // some versions of GCC for arm/aarch64 with\n    //   \"error: inconsistent operand constraints in an 'asm'\"\n    // Clang x86_64/arm/aarch64 seems to require \"g\" to support both scalars and\n    // vectors, otherwise\n    //   \"error: non-trivial scalar-to-vector conversion, possible invalid\n    //    constraint for vector type\"\n    //\n    // GCC for ppc64le generates an internal compiler error with x/X/g.\n    // GCC for AVX generates an internal compiler error with X.\n    //\n    // Tested on icc/gcc/clang for sse, avx, avx2, avx512dq\n    //           gcc for arm, aarch64,\n    //           gcc for ppc64le,\n    // both vectors and scalars.\n    //\n    // Note that this is restricted to plain types - this will not work\n    // directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these,\n    // you will need to apply to the underlying POD type.\n    #if EIGEN_ARCH_PPC && EIGEN_COMP_GNUC_STRICT\n      // This seems to be broken on clang.  Packet4f is loaded into a single\n      //   register rather than a vector, zeroing out some entries.  Integer\n      //   types also generate a compile error.\n      // General, Altivec, VSX.\n      #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  (\"\" : \"+r,v,wa\" (X));\n    #elif EIGEN_ARCH_ARM_OR_ARM64\n      // General, NEON.\n      #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  (\"\" : \"+g,w\" (X));\n    #elif EIGEN_ARCH_i386_OR_x86_64\n      // General, SSE.\n      #define EIGEN_OPTIMIZATION_BARRIER(X)  __asm__  (\"\" : \"+g,x\" (X));\n    #else\n      // Not implemented for other architectures.\n      #define EIGEN_OPTIMIZATION_BARRIER(X)\n    #endif\n  #else\n    // Not implemented for other compilers.\n    #define EIGEN_OPTIMIZATION_BARRIER(X)\n  #endif\n#endif\n\n#if EIGEN_COMP_MSVC\n  // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362.\n  // This workaround is ugly, but it does the job.\n#  define EIGEN_CONST_CONDITIONAL(cond)  (void)0, cond\n#else\n#  define EIGEN_CONST_CONDITIONAL(cond)  cond\n#endif\n\n#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD\n  #define EIGEN_RESTRICT\n#endif\n#ifndef EIGEN_RESTRICT\n  #define EIGEN_RESTRICT __restrict\n#endif\n\n\n#ifndef EIGEN_DEFAULT_IO_FORMAT\n#ifdef EIGEN_MAKING_DOCS\n// format used in Eigen's documentation\n// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.\n#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, \" \", \"\\n\", \"\", \"\")\n#else\n#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()\n#endif\n#endif\n\n// just an empty macro !\n#define EIGEN_EMPTY\n\n\n// When compiling CUDA/HIP device code with NVCC or HIPCC\n// pull in math functions from the global namespace.\n// In host mode, and when device code is compiled with clang,\n// use the std versions.\n#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || defined(EIGEN_HIP_DEVICE_COMPILE)\n  #define EIGEN_USING_STD(FUNC) using ::FUNC;\n#else\n  #define EIGEN_USING_STD(FUNC) using std::FUNC;\n#endif\n\n#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || (EIGEN_COMP_MSVC == 1900 && EIGEN_COMP_NVCC))\n  // For older MSVC versions, as well as 1900 && CUDA 8, using the base operator is necessary,\n  //   otherwise we get duplicate definition errors\n  // For later MSVC versions, we require explicit operator= definition, otherwise we get\n  //   use of implicitly deleted operator errors.\n  // (cf Bugs 920, 1000, 1324, 2291)\n  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \\\n    using Base::operator =;\n#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)\n  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \\\n    using Base::operator =; \\\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \\\n    template <typename OtherDerived> \\\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }\n#else\n  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \\\n    using Base::operator =; \\\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \\\n    { \\\n      Base::operator=(other); \\\n      return *this; \\\n    }\n#endif\n\n\n/**\n * \\internal\n * \\brief Macro to explicitly define the default copy constructor.\n * This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden.\n */\n#if EIGEN_HAS_CXX11\n#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) CLASS(const CLASS&) = default;\n#else\n#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS)\n#endif\n\n\n\n/** \\internal\n * \\brief Macro to manually inherit assignment operators.\n * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.\n * With C++11 or later this also default-implements the copy-constructor\n */\n#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived)  \\\n    EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \\\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived)\n\n/** \\internal\n * \\brief Macro to manually define default constructors and destructors.\n * This is necessary when the copy constructor is re-defined.\n * For empty helper classes this should usually be protected, to avoid accidentally creating empty objects.\n *\n * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision\n */\n#if EIGEN_HAS_CXX11\n#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \\\n    Derived() = default; \\\n    ~Derived() = default;\n#else\n#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \\\n    Derived() {}; \\\n    /* ~Derived() {}; */\n#endif\n\n\n\n\n\n/**\n* Just a side note. Commenting within defines works only by documenting\n* behind the object (via '!<'). Comments cannot be multi-line and thus\n* we have these extra long lines. What is confusing doxygen over here is\n* that we use '\\' and basically have a bunch of typedefs with their\n* documentation in a single line.\n**/\n\n#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \\\n  typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \\brief Numeric type, e.g. float, double, int or std::complex<float>. */ \\\n  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \\brief The underlying numeric type for composed scalar types. \\details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \\\n  typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \\brief The return type for coefficient access. \\details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \\\n  typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \\\n  typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \\\n  typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \\\n  enum CompileTimeTraits \\\n      { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \\\n        ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \\\n        Flags = Eigen::internal::traits<Derived>::Flags, \\\n        SizeAtCompileTime = Base::SizeAtCompileTime, \\\n        MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \\\n        IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \\\n  using Base::derived; \\\n  using Base::const_cast_derived;\n\n\n// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed\n#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \\\n  EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \\\n  typedef typename Base::PacketScalar PacketScalar;\n\n\n#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)\n#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)\n\n// EIGEN_SIZE_MIN_PREFER_DYNAMIC gives the min between compile-time sizes. 0 has absolute priority, followed by 1,\n// followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over\n// finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3.\n#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \\\n                           : ((int)a == 1 || (int)b == 1) ? 1 \\\n                           : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \\\n                           : ((int)a <= (int)b) ? (int)a : (int)b)\n\n// EIGEN_SIZE_MIN_PREFER_FIXED is a variant of EIGEN_SIZE_MIN_PREFER_DYNAMIC comparing MaxSizes. The difference is that finite values\n// now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is\n// (between 0 and 3), it is not more than 3.\n#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b)  (((int)a == 0 || (int)b == 0) ? 0 \\\n                           : ((int)a == 1 || (int)b == 1) ? 1 \\\n                           : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \\\n                           : ((int)a == Dynamic) ? (int)b \\\n                           : ((int)b == Dynamic) ? (int)a \\\n                           : ((int)a <= (int)b) ? (int)a : (int)b)\n\n// see EIGEN_SIZE_MIN_PREFER_DYNAMIC. No need for a separate variant for MaxSizes here.\n#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \\\n                           : ((int)a >= (int)b) ? (int)a : (int)b)\n\n#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))\n\n#define EIGEN_IMPLIES(a,b) (!(a) || (b))\n\n#if EIGEN_HAS_BUILTIN(__builtin_expect) || EIGEN_COMP_GNUC\n#define EIGEN_PREDICT_FALSE(x) (__builtin_expect(x, false))\n#define EIGEN_PREDICT_TRUE(x) (__builtin_expect(false || (x), true))\n#else\n#define EIGEN_PREDICT_FALSE(x) (x)\n#define EIGEN_PREDICT_TRUE(x) (x)\n#endif\n\n// the expression type of a standard coefficient wise binary operation\n#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \\\n    CwiseBinaryOp< \\\n      EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \\\n          typename internal::traits<LHS>::Scalar, \\\n          typename internal::traits<RHS>::Scalar \\\n      >, \\\n      const LHS, \\\n      const RHS \\\n    >\n\n#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \\\n  template<typename OtherDerived> \\\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \\\n  (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \\\n  { \\\n    return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \\\n  }\n\n#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \\\n  (Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB>  > >::value)\n\n#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \\\n  CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \\\n                const typename internal::plain_constant_type<EXPR,SCALAR>::type>\n\n#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \\\n  CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \\\n                const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>\n\n// Workaround for MSVC 2010 (see ML thread \"patch with compile for for MSVC 2010\")\n#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC_STRICT<=1600)\n#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type\n#else\n#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X\n#endif\n\n#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \\\n  template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \\\n  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\\\n  (METHOD)(const T& scalar) const { \\\n    typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \\\n    return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \\\n           typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \\\n  }\n\n#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \\\n  template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend \\\n  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \\\n  (METHOD)(const T& scalar, const StorageBaseType& matrix) { \\\n    typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \\\n    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \\\n           typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \\\n  }\n\n#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \\\n  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \\\n  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)\n\n\n#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE)\n  #define EIGEN_EXCEPTIONS\n#endif\n\n\n#ifdef EIGEN_EXCEPTIONS\n#  define EIGEN_THROW_X(X) throw X\n#  define EIGEN_THROW throw\n#  define EIGEN_TRY try\n#  define EIGEN_CATCH(X) catch (X)\n#else\n#  if defined(EIGEN_CUDA_ARCH)\n#    define EIGEN_THROW_X(X) asm(\"trap;\")\n#    define EIGEN_THROW asm(\"trap;\")\n#  elif defined(EIGEN_HIP_DEVICE_COMPILE)\n#    define EIGEN_THROW_X(X) asm(\"s_trap 0\")\n#    define EIGEN_THROW asm(\"s_trap 0\")\n#  else\n#    define EIGEN_THROW_X(X) std::abort()\n#    define EIGEN_THROW std::abort()\n#  endif\n#  define EIGEN_TRY if (true)\n#  define EIGEN_CATCH(X) else\n#endif\n\n\n#if EIGEN_HAS_CXX11_NOEXCEPT\n#   define EIGEN_INCLUDE_TYPE_TRAITS\n#   define EIGEN_NOEXCEPT noexcept\n#   define EIGEN_NOEXCEPT_IF(x) noexcept(x)\n#   define EIGEN_NO_THROW noexcept(true)\n#   define EIGEN_EXCEPTION_SPEC(X) noexcept(false)\n#else\n#   define EIGEN_NOEXCEPT\n#   define EIGEN_NOEXCEPT_IF(x)\n#   define EIGEN_NO_THROW throw()\n#   if EIGEN_COMP_MSVC || EIGEN_COMP_CXXVER>=17\n      // MSVC does not support exception specifications (warning C4290),\n      // and they are deprecated in c++11 anyway. This is even an error in c++17.\n#     define EIGEN_EXCEPTION_SPEC(X) throw()\n#   else\n#     define EIGEN_EXCEPTION_SPEC(X) throw(X)\n#   endif\n#endif\n\n#if EIGEN_HAS_VARIADIC_TEMPLATES\n// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input.\nnamespace Eigen {\nnamespace internal {\n\ninline bool all(){ return true; }\n\ntemplate<typename T, typename ...Ts>\nbool all(T t, Ts ... ts){ return t && all(ts...); }\n\n}\n}\n#endif\n\n#if EIGEN_HAS_CXX11_OVERRIDE_FINAL\n// provide override and final specifiers if they are available:\n#   define EIGEN_OVERRIDE override\n#   define EIGEN_FINAL final\n#else\n#   define EIGEN_OVERRIDE\n#   define EIGEN_FINAL\n#endif\n\n// Wrapping #pragma unroll in a macro since it is required for SYCL\n#if defined(SYCL_DEVICE_ONLY)\n  #if defined(_MSC_VER)\n    #define EIGEN_UNROLL_LOOP __pragma(unroll)\n  #else\n    #define EIGEN_UNROLL_LOOP _Pragma(\"unroll\")\n  #endif\n#else\n  #define EIGEN_UNROLL_LOOP\n#endif\n\n#endif // EIGEN_MACROS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/Memory.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>\n// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>\n// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>\n// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n/*****************************************************************************\n*** Platform checks for aligned malloc functions                           ***\n*****************************************************************************/\n\n#ifndef EIGEN_MEMORY_H\n#define EIGEN_MEMORY_H\n\n#ifndef EIGEN_MALLOC_ALREADY_ALIGNED\n\n// Try to determine automatically if malloc is already aligned.\n\n// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:\n//   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html\n// This is true at least since glibc 2.8.\n// This leaves the question how to detect 64-bit. According to this document,\n//   http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf\n// page 114, \"[The] LP64 model [...] is used by all 64-bit UNIX ports\" so it's indeed\n// quite safe, at least within the context of glibc, to equate 64-bit with LP64.\n#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \\\n && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)\n  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1\n#else\n  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0\n#endif\n\n// FreeBSD 6 seems to have 16-byte aligned malloc\n//   See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup\n// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures\n//   See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup\n#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)\n  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1\n#else\n  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0\n#endif\n\n#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16))     \\\n || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16))   \\\n || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED              \\\n || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED\n  #define EIGEN_MALLOC_ALREADY_ALIGNED 1\n#else\n  #define EIGEN_MALLOC_ALREADY_ALIGNED 0\n#endif\n\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\nEIGEN_DEVICE_FUNC\ninline void throw_std_bad_alloc()\n{\n  #ifdef EIGEN_EXCEPTIONS\n    throw std::bad_alloc();\n  #else\n    std::size_t huge = static_cast<std::size_t>(-1);\n    #if defined(EIGEN_HIPCC)\n    //\n    // calls to \"::operator new\" are to be treated as opaque function calls (i.e no inlining),\n    // and as a consequence the code in the #else block triggers the hipcc warning :\n    // \"no overloaded function has restriction specifiers that are compatible with the ambient context\"\n    //\n    // \"throw_std_bad_alloc\" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects\n    // the same on \"operator new\"\n    // Reverting code back to the old version in this #if block for the hipcc compiler\n    //\n    new int[huge];\n    #else\n    void* unused = ::operator new(huge);\n    EIGEN_UNUSED_VARIABLE(unused);\n    #endif\n  #endif\n}\n\n/*****************************************************************************\n*** Implementation of handmade aligned functions                           ***\n*****************************************************************************/\n\n/* ----- Hand made implementations of aligned malloc/free and realloc ----- */\n\n/** \\internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.\n  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.\n  */\nEIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)\n{\n  eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && \"Alignment must be at least sizeof(void*) and a power of 2\");\n\n  EIGEN_USING_STD(malloc)\n  void *original = malloc(size+alignment);\n  \n  if (original == 0) return 0;\n  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);\n  *(reinterpret_cast<void**>(aligned) - 1) = original;\n  return aligned;\n}\n\n/** \\internal Frees memory allocated with handmade_aligned_malloc */\nEIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)\n{\n  if (ptr) {\n    EIGEN_USING_STD(free)\n    free(*(reinterpret_cast<void**>(ptr) - 1));\n  }\n}\n\n/** \\internal\n  * \\brief Reallocates aligned memory.\n  * Since we know that our handmade version is based on std::malloc\n  * we can use std::realloc to implement efficient reallocation.\n  */\ninline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)\n{\n  if (ptr == 0) return handmade_aligned_malloc(size);\n  void *original = *(reinterpret_cast<void**>(ptr) - 1);\n  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);\n  original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);\n  if (original == 0) return 0;\n  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);\n  void *previous_aligned = static_cast<char *>(original)+previous_offset;\n  if(aligned!=previous_aligned)\n    std::memmove(aligned, previous_aligned, size);\n\n  *(reinterpret_cast<void**>(aligned) - 1) = original;\n  return aligned;\n}\n\n/*****************************************************************************\n*** Implementation of portable aligned versions of malloc/free/realloc     ***\n*****************************************************************************/\n\n#ifdef EIGEN_NO_MALLOC\nEIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()\n{\n  eigen_assert(false && \"heap allocation is forbidden (EIGEN_NO_MALLOC is defined)\");\n}\n#elif defined EIGEN_RUNTIME_NO_MALLOC\nEIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)\n{\n  static bool value = true;\n  if (update == 1)\n    value = new_value;\n  return value;\n}\nEIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }\nEIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }\nEIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()\n{\n  eigen_assert(is_malloc_allowed() && \"heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)\");\n}\n#else\nEIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()\n{}\n#endif\n\n/** \\internal Allocates \\a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.\n  * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.\n  */\nEIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)\n{\n  check_that_malloc_is_allowed();\n\n  void *result;\n  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED\n\n    EIGEN_USING_STD(malloc)\n    result = malloc(size);\n\n    #if EIGEN_DEFAULT_ALIGN_BYTES==16\n    eigen_assert((size<16 || (std::size_t(result)%16)==0) && \"System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.\");\n    #endif\n  #else\n    result = handmade_aligned_malloc(size);\n  #endif\n\n  if(!result && size)\n    throw_std_bad_alloc();\n\n  return result;\n}\n\n/** \\internal Frees memory allocated with aligned_malloc. */\nEIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)\n{\n  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED\n\n    EIGEN_USING_STD(free)\n    free(ptr);\n\n  #else\n    handmade_aligned_free(ptr);\n  #endif\n}\n\n/**\n  * \\internal\n  * \\brief Reallocates an aligned block of memory.\n  * \\throws std::bad_alloc on allocation failure\n  */\ninline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)\n{\n  EIGEN_UNUSED_VARIABLE(old_size)\n\n  void *result;\n#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED\n  result = std::realloc(ptr,new_size);\n#else\n  result = handmade_aligned_realloc(ptr,new_size,old_size);\n#endif\n\n  if (!result && new_size)\n    throw_std_bad_alloc();\n\n  return result;\n}\n\n/*****************************************************************************\n*** Implementation of conditionally aligned functions                      ***\n*****************************************************************************/\n\n/** \\internal Allocates \\a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.\n  * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.\n  */\ntemplate<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)\n{\n  return aligned_malloc(size);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)\n{\n  check_that_malloc_is_allowed();\n\n  EIGEN_USING_STD(malloc)\n  void *result = malloc(size);\n\n  if(!result && size)\n    throw_std_bad_alloc();\n  return result;\n}\n\n/** \\internal Frees memory allocated with conditional_aligned_malloc */\ntemplate<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)\n{\n  aligned_free(ptr);\n}\n\ntemplate<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)\n{\n  EIGEN_USING_STD(free)\n  free(ptr);\n}\n\ntemplate<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)\n{\n  return aligned_realloc(ptr, new_size, old_size);\n}\n\ntemplate<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)\n{\n  return std::realloc(ptr, new_size);\n}\n\n/*****************************************************************************\n*** Construction/destruction of array elements                             ***\n*****************************************************************************/\n\n/** \\internal Destructs the elements of an array.\n  * The \\a size parameters tells on how many objects to call the destructor of T.\n  */\ntemplate<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)\n{\n  // always destruct an array starting from the end.\n  if(ptr)\n    while(size) ptr[--size].~T();\n}\n\n/** \\internal Constructs the elements of an array.\n  * The \\a size parameter tells on how many objects to call the constructor of T.\n  */\ntemplate<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)\n{\n  std::size_t i;\n  EIGEN_TRY\n  {\n      for (i = 0; i < size; ++i) ::new (ptr + i) T;\n      return ptr;\n  }\n  EIGEN_CATCH(...)\n  {\n    destruct_elements_of_array(ptr, i);\n    EIGEN_THROW;\n  }\n  return NULL;\n}\n\n/*****************************************************************************\n*** Implementation of aligned new/delete-like functions                    ***\n*****************************************************************************/\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)\n{\n  if(size > std::size_t(-1) / sizeof(T))\n    throw_std_bad_alloc();\n}\n\n/** \\internal Allocates \\a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.\n  * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.\n  * The default constructor of T is called.\n  */\ntemplate<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)\n{\n  check_size_for_overflow<T>(size);\n  T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));\n  EIGEN_TRY\n  {\n    return construct_elements_of_array(result, size);\n  }\n  EIGEN_CATCH(...)\n  {\n    aligned_free(result);\n    EIGEN_THROW;\n  }\n  return result;\n}\n\ntemplate<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)\n{\n  check_size_for_overflow<T>(size);\n  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));\n  EIGEN_TRY\n  {\n    return construct_elements_of_array(result, size);\n  }\n  EIGEN_CATCH(...)\n  {\n    conditional_aligned_free<Align>(result);\n    EIGEN_THROW;\n  }\n  return result;\n}\n\n/** \\internal Deletes objects constructed with aligned_new\n  * The \\a size parameters tells on how many objects to call the destructor of T.\n  */\ntemplate<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)\n{\n  destruct_elements_of_array<T>(ptr, size);\n  Eigen::internal::aligned_free(ptr);\n}\n\n/** \\internal Deletes objects constructed with conditional_aligned_new\n  * The \\a size parameters tells on how many objects to call the destructor of T.\n  */\ntemplate<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)\n{\n  destruct_elements_of_array<T>(ptr, size);\n  conditional_aligned_free<Align>(ptr);\n}\n\ntemplate<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)\n{\n  check_size_for_overflow<T>(new_size);\n  check_size_for_overflow<T>(old_size);\n  if(new_size < old_size)\n    destruct_elements_of_array(pts+new_size, old_size-new_size);\n  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));\n  if(new_size > old_size)\n  {\n    EIGEN_TRY\n    {\n      construct_elements_of_array(result+old_size, new_size-old_size);\n    }\n    EIGEN_CATCH(...)\n    {\n      conditional_aligned_free<Align>(result);\n      EIGEN_THROW;\n    }\n  }\n  return result;\n}\n\n\ntemplate<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)\n{\n  if(size==0)\n    return 0; // short-cut. Also fixes Bug 884\n  check_size_for_overflow<T>(size);\n  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));\n  if(NumTraits<T>::RequireInitialization)\n  {\n    EIGEN_TRY\n    {\n      construct_elements_of_array(result, size);\n    }\n    EIGEN_CATCH(...)\n    {\n      conditional_aligned_free<Align>(result);\n      EIGEN_THROW;\n    }\n  }\n  return result;\n}\n\ntemplate<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)\n{\n  check_size_for_overflow<T>(new_size);\n  check_size_for_overflow<T>(old_size);\n  if(NumTraits<T>::RequireInitialization && (new_size < old_size))\n    destruct_elements_of_array(pts+new_size, old_size-new_size);\n  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));\n  if(NumTraits<T>::RequireInitialization && (new_size > old_size))\n  {\n    EIGEN_TRY\n    {\n      construct_elements_of_array(result+old_size, new_size-old_size);\n    }\n    EIGEN_CATCH(...)\n    {\n      conditional_aligned_free<Align>(result);\n      EIGEN_THROW;\n    }\n  }\n  return result;\n}\n\ntemplate<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)\n{\n  if(NumTraits<T>::RequireInitialization)\n    destruct_elements_of_array<T>(ptr, size);\n  conditional_aligned_free<Align>(ptr);\n}\n\n/****************************************************************************/\n\n/** \\internal Returns the index of the first element of the array that is well aligned with respect to the requested \\a Alignment.\n  *\n  * \\tparam Alignment requested alignment in Bytes.\n  * \\param array the address of the start of the array\n  * \\param size the size of the array\n  *\n  * \\note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,\n  * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If\n  * packet size for the given scalar type is 1, then everything is considered well-aligned.\n  *\n  * \\note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a\n  * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for\n  * example with Scalar=double on certain 32-bit platforms, see bug #79.\n  *\n  * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.\n  * \\sa first_default_aligned()\n  */\ntemplate<int Alignment, typename Scalar, typename Index>\nEIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)\n{\n  const Index ScalarSize = sizeof(Scalar);\n  const Index AlignmentSize = Alignment / ScalarSize;\n  const Index AlignmentMask = AlignmentSize-1;\n\n  if(AlignmentSize<=1)\n  {\n    // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar\n    // so that all elements of the array have the same alignment.\n    return 0;\n  }\n  else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)\n  {\n    // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.\n    // Consequently, no element of the array is well aligned.\n    return size;\n  }\n  else\n  {\n    Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;\n    return (first < size) ? first : size;\n  }\n}\n\n/** \\internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.\n   * \\sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */\ntemplate<typename Scalar, typename Index>\nEIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)\n{\n  typedef typename packet_traits<Scalar>::type DefaultPacketType;\n  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);\n}\n\n/** \\internal Returns the smallest integer multiple of \\a base and greater or equal to \\a size\n  */\ntemplate<typename Index>\ninline Index first_multiple(Index size, Index base)\n{\n  return ((size+base-1)/base)*base;\n}\n\n// std::copy is much slower than memcpy, so let's introduce a smart_copy which\n// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.\ntemplate<typename T, bool UseMemcpy> struct smart_copy_helper;\n\ntemplate<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)\n{\n  smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);\n}\n\ntemplate<typename T> struct smart_copy_helper<T,true> {\n  EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)\n  {\n    IntPtr size = IntPtr(end)-IntPtr(start);\n    if(size==0) return;\n    eigen_internal_assert(start!=0 && end!=0 && target!=0);\n    EIGEN_USING_STD(memcpy)\n    memcpy(target, start, size);\n  }\n};\n\ntemplate<typename T> struct smart_copy_helper<T,false> {\n  EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)\n  { std::copy(start, end, target); }\n};\n\n// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.\ntemplate<typename T, bool UseMemmove> struct smart_memmove_helper;\n\ntemplate<typename T> void smart_memmove(const T* start, const T* end, T* target)\n{\n  smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);\n}\n\ntemplate<typename T> struct smart_memmove_helper<T,true> {\n  static inline void run(const T* start, const T* end, T* target)\n  {\n    IntPtr size = IntPtr(end)-IntPtr(start);\n    if(size==0) return;\n    eigen_internal_assert(start!=0 && end!=0 && target!=0);\n    std::memmove(target, start, size);\n  }\n};\n\ntemplate<typename T> struct smart_memmove_helper<T,false> {\n  static inline void run(const T* start, const T* end, T* target)\n  {\n    if (UIntPtr(target) < UIntPtr(start))\n    {\n      std::copy(start, end, target);\n    }\n    else\n    {\n      std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);\n      std::copy_backward(start, end, target + count);\n    }\n  }\n};\n\n#if EIGEN_HAS_RVALUE_REFERENCES\ntemplate<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)\n{\n  return std::move(start, end, target);\n}\n#else\ntemplate<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)\n{\n  return std::copy(start, end, target);\n}\n#endif\n\n/*****************************************************************************\n*** Implementation of runtime stack allocation (falling back to malloc)    ***\n*****************************************************************************/\n\n// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA\n// to the appropriate stack allocation function\n#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE\n  #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)\n    #define EIGEN_ALLOCA alloca\n  #elif EIGEN_COMP_MSVC\n    #define EIGEN_ALLOCA _alloca\n  #endif\n#endif\n\n// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is\n// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because\n// the compiler still emits bad code because stack allocation checks use \"<=\".\n// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772\n// is fixed.\n#if defined(__clang__) && defined(__thumb__)\n  #undef EIGEN_ALLOCA\n#endif\n\n// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data\n// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.\ntemplate<typename T> class aligned_stack_memory_handler : noncopyable\n{\n  public:\n    /* Creates a stack_memory_handler responsible for the buffer \\a ptr of size \\a size.\n     * Note that \\a ptr can be 0 regardless of the other parameters.\n     * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).\n     * In this case, the buffer elements will also be destructed when this handler will be destructed.\n     * Finally, if \\a dealloc is true, then the pointer \\a ptr is freed.\n     **/\n    EIGEN_DEVICE_FUNC\n    aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)\n      : m_ptr(ptr), m_size(size), m_deallocate(dealloc)\n    {\n      if(NumTraits<T>::RequireInitialization && m_ptr)\n        Eigen::internal::construct_elements_of_array(m_ptr, size);\n    }\n    EIGEN_DEVICE_FUNC\n    ~aligned_stack_memory_handler()\n    {\n      if(NumTraits<T>::RequireInitialization && m_ptr)\n        Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);\n      if(m_deallocate)\n        Eigen::internal::aligned_free(m_ptr);\n    }\n  protected:\n    T* m_ptr;\n    std::size_t m_size;\n    bool m_deallocate;\n};\n\n#ifdef EIGEN_ALLOCA\n\ntemplate<typename Xpr, int NbEvaluations,\n         bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic\n         >\nstruct local_nested_eval_wrapper\n{\n  static const bool NeedExternalBuffer = false;\n  typedef typename Xpr::Scalar Scalar;\n  typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;\n  ObjectType object;\n\n  EIGEN_DEVICE_FUNC\n  local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)\n  {\n    EIGEN_UNUSED_VARIABLE(ptr);\n    eigen_internal_assert(ptr==0);\n  }\n};\n\ntemplate<typename Xpr, int NbEvaluations>\nstruct local_nested_eval_wrapper<Xpr,NbEvaluations,true>\n{\n  static const bool NeedExternalBuffer = true;\n  typedef typename Xpr::Scalar Scalar;\n  typedef typename plain_object_eval<Xpr>::type PlainObject;\n  typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;\n  ObjectType object;\n\n  EIGEN_DEVICE_FUNC\n  local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)\n    : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),\n      m_deallocate(ptr==0)\n  {\n    if(NumTraits<Scalar>::RequireInitialization && object.data())\n      Eigen::internal::construct_elements_of_array(object.data(), object.size());\n    object = xpr;\n  }\n\n  EIGEN_DEVICE_FUNC\n  ~local_nested_eval_wrapper()\n  {\n    if(NumTraits<Scalar>::RequireInitialization && object.data())\n      Eigen::internal::destruct_elements_of_array(object.data(), object.size());\n    if(m_deallocate)\n      Eigen::internal::aligned_free(object.data());\n  }\n\nprivate:\n  bool m_deallocate;\n};\n\n#endif // EIGEN_ALLOCA\n\ntemplate<typename T> class scoped_array : noncopyable\n{\n  T* m_ptr;\npublic:\n  explicit scoped_array(std::ptrdiff_t size)\n  {\n    m_ptr = new T[size];\n  }\n  ~scoped_array()\n  {\n    delete[] m_ptr;\n  }\n  T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }\n  const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }\n  T* &ptr() { return m_ptr; }\n  const T* ptr() const { return m_ptr; }\n  operator const T*() const { return m_ptr; }\n};\n\ntemplate<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)\n{\n  std::swap(a.ptr(),b.ptr());\n}\n\n} // end namespace internal\n\n/** \\internal\n  *\n  * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,\n  * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack\n  * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform\n  * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.\n  * The allocated buffer is automatically deleted when exiting the scope of this declaration.\n  * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.\n  * Here is an example:\n  * \\code\n  * {\n  *   ei_declare_aligned_stack_constructed_variable(float,data,size,0);\n  *   // use data[0] to data[size-1]\n  * }\n  * \\endcode\n  * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.\n  *\n  * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to\n  * \\code\n  *   typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);\n  * \\endcode\n  * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.\n  * This is accomplished through alloca if this later is supported and if the required number of bytes\n  * is below EIGEN_STACK_ALLOCATION_LIMIT.\n  */\n#ifdef EIGEN_ALLOCA\n\n  #if EIGEN_DEFAULT_ALIGN_BYTES>0\n    // We always manually re-align the result of EIGEN_ALLOCA.\n    // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.\n    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))\n  #else\n    #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)\n  #endif\n\n  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \\\n    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \\\n    TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \\\n               : reinterpret_cast<TYPE*>( \\\n                      (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \\\n                    : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) );  \\\n    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)\n\n\n  #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \\\n    Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \\\n      ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \\\n        ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \\\n    typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)\n\n#else\n\n  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \\\n    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \\\n    TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE));    \\\n    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)\n\n\n#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)\n\n#endif\n\n\n/*****************************************************************************\n*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF]                ***\n*****************************************************************************/\n\n#if EIGEN_HAS_CXX17_OVERALIGN\n\n// C++17 -> no need to bother about alignment anymore :)\n\n#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)\n#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)\n#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW\n#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)\n\n#else\n\n// HIP does not support new/delete on device.\n#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)\n  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \\\n      EIGEN_DEVICE_FUNC \\\n      void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \\\n        EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \\\n        EIGEN_CATCH (...) { return 0; } \\\n      }\n  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \\\n      EIGEN_DEVICE_FUNC \\\n      void *operator new(std::size_t size) { \\\n        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \\\n      } \\\n      EIGEN_DEVICE_FUNC \\\n      void *operator new[](std::size_t size) { \\\n        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \\\n      } \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \\\n      /* in-place new and delete. since (at least afaik) there is no actual   */ \\\n      /* memory allocated we can safely let the default implementation handle */ \\\n      /* this particular case. */ \\\n      EIGEN_DEVICE_FUNC \\\n      static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \\\n      EIGEN_DEVICE_FUNC \\\n      static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \\\n      /* nothrow-new (returns zero instead of std::bad_alloc) */ \\\n      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \\\n      EIGEN_DEVICE_FUNC \\\n      void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \\\n        Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \\\n      } \\\n      typedef void eigen_aligned_operator_new_marker_type;\n#else\n  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)\n#endif\n\n#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)\n#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)                        \\\n  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(                                                             \\\n        ((Size)!=Eigen::Dynamic) &&                                                                    \\\n        (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES  )==0)) ||    \\\n         ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) ||    \\\n         ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0))   )))\n\n#endif\n\n/****************************************************************************/\n\n/** \\class aligned_allocator\n* \\ingroup Core_Module\n*\n* \\brief STL compatible allocator to use with types requiring a non standrad alignment.\n*\n* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.\n* By default, it will thus provide at least 16 bytes alignment and more in following cases:\n*  - 32 bytes alignment if AVX is enabled.\n*  - 64 bytes alignment if AVX512 is enabled.\n*\n* This can be controlled using the \\c EIGEN_MAX_ALIGN_BYTES macro as documented\n* \\link TopicPreprocessorDirectivesPerformance there \\endlink.\n*\n* Example:\n* \\code\n* // Matrix4f requires 16 bytes alignment:\n* std::map< int, Matrix4f, std::less<int>,\n*           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;\n* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:\n* std::map< int, Vector3f > my_map_vec3;\n* \\endcode\n*\n* \\sa \\blank \\ref TopicStlContainers.\n*/\ntemplate<class T>\nclass aligned_allocator : public std::allocator<T>\n{\npublic:\n  typedef std::size_t     size_type;\n  typedef std::ptrdiff_t  difference_type;\n  typedef T*              pointer;\n  typedef const T*        const_pointer;\n  typedef T&              reference;\n  typedef const T&        const_reference;\n  typedef T               value_type;\n\n  template<class U>\n  struct rebind\n  {\n    typedef aligned_allocator<U> other;\n  };\n\n  aligned_allocator() : std::allocator<T>() {}\n\n  aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}\n\n  template<class U>\n  aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}\n\n  ~aligned_allocator() {}\n\n  #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)\n  // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:\n  // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807\n  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544\n  size_type max_size() const {\n    return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);\n  }\n  #endif\n\n  pointer allocate(size_type num, const void* /*hint*/ = 0)\n  {\n    internal::check_size_for_overflow<T>(num);\n    return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );\n  }\n\n  void deallocate(pointer p, size_type /*num*/)\n  {\n    internal::aligned_free(p);\n  }\n};\n\n//---------- Cache sizes ----------\n\n#if !defined(EIGEN_NO_CPUID)\n#  if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64\n#    if defined(__PIC__) && EIGEN_ARCH_i386\n       // Case for x86 with PIC\n#      define EIGEN_CPUID(abcd,func,id) \\\n         __asm__ __volatile__ (\"xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1\": \"=a\" (abcd[0]), \"=&r\" (abcd[1]), \"=c\" (abcd[2]), \"=d\" (abcd[3]) : \"a\" (func), \"c\" (id));\n#    elif defined(__PIC__) && EIGEN_ARCH_x86_64\n       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.\n       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.\n#      define EIGEN_CPUID(abcd,func,id) \\\n        __asm__ __volatile__ (\"xchg{q}\\t{%%}rbx, %q1; cpuid; xchg{q}\\t{%%}rbx, %q1\": \"=a\" (abcd[0]), \"=&r\" (abcd[1]), \"=c\" (abcd[2]), \"=d\" (abcd[3]) : \"0\" (func), \"2\" (id));\n#    else\n       // Case for x86_64 or x86 w/o PIC\n#      define EIGEN_CPUID(abcd,func,id) \\\n         __asm__ __volatile__ (\"cpuid\": \"=a\" (abcd[0]), \"=b\" (abcd[1]), \"=c\" (abcd[2]), \"=d\" (abcd[3]) : \"0\" (func), \"2\" (id) );\n#    endif\n#  elif EIGEN_COMP_MSVC\n#    if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64\n#      define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)\n#    endif\n#  endif\n#endif\n\nnamespace internal {\n\n#ifdef EIGEN_CPUID\n\ninline bool cpuid_is_vendor(int abcd[4], const int vendor[3])\n{\n  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];\n}\n\ninline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)\n{\n  int abcd[4];\n  l1 = l2 = l3 = 0;\n  int cache_id = 0;\n  int cache_type = 0;\n  do {\n    abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\n    EIGEN_CPUID(abcd,0x4,cache_id);\n    cache_type  = (abcd[0] & 0x0F) >> 0;\n    if(cache_type==1||cache_type==3) // data or unified cache\n    {\n      int cache_level = (abcd[0] & 0xE0) >> 5;  // A[7:5]\n      int ways        = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]\n      int partitions  = (abcd[1] & 0x003FF000) >> 12; // B[21:12]\n      int line_size   = (abcd[1] & 0x00000FFF) >>  0; // B[11:0]\n      int sets        = (abcd[2]);                    // C[31:0]\n\n      int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);\n\n      switch(cache_level)\n      {\n        case 1: l1 = cache_size; break;\n        case 2: l2 = cache_size; break;\n        case 3: l3 = cache_size; break;\n        default: break;\n      }\n    }\n    cache_id++;\n  } while(cache_type>0 && cache_id<16);\n}\n\ninline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)\n{\n  int abcd[4];\n  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\n  l1 = l2 = l3 = 0;\n  EIGEN_CPUID(abcd,0x00000002,0);\n  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;\n  bool check_for_p2_core2 = false;\n  for(int i=0; i<14; ++i)\n  {\n    switch(bytes[i])\n    {\n      case 0x0A: l1 = 8; break;   // 0Ah   data L1 cache, 8 KB, 2 ways, 32 byte lines\n      case 0x0C: l1 = 16; break;  // 0Ch   data L1 cache, 16 KB, 4 ways, 32 byte lines\n      case 0x0E: l1 = 24; break;  // 0Eh   data L1 cache, 24 KB, 6 ways, 64 byte lines\n      case 0x10: l1 = 16; break;  // 10h   data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)\n      case 0x15: l1 = 16; break;  // 15h   code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)\n      case 0x2C: l1 = 32; break;  // 2Ch   data L1 cache, 32 KB, 8 ways, 64 byte lines\n      case 0x30: l1 = 32; break;  // 30h   code L1 cache, 32 KB, 8 ways, 64 byte lines\n      case 0x60: l1 = 16; break;  // 60h   data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored\n      case 0x66: l1 = 8; break;   // 66h   data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored\n      case 0x67: l1 = 16; break;  // 67h   data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored\n      case 0x68: l1 = 32; break;  // 68h   data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored\n      case 0x1A: l2 = 96; break;   // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)\n      case 0x22: l3 = 512; break;   // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored\n      case 0x23: l3 = 1024; break;   // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x25: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x29: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x39: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored\n      case 0x3A: l2 = 192; break;   // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored\n      case 0x3B: l2 = 128; break;   // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored\n      case 0x3C: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored\n      case 0x3D: l2 = 384; break;   // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored\n      case 0x3E: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored\n      case 0x40: l2 = 0; break;   // no integrated L2 cache (P6 core) or L3 cache (P4 core)\n      case 0x41: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 32 byte lines\n      case 0x42: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 32 byte lines\n      case 0x43: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 32 byte lines\n      case 0x44: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines\n      case 0x45: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines\n      case 0x46: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines\n      case 0x47: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines\n      case 0x48: l2 = 3072; break;   // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines\n      case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2\n      case 0x4A: l3 = 6144; break;   // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines\n      case 0x4B: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines\n      case 0x4C: l3 = 12288; break;   // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines\n      case 0x4D: l3 = 16384; break;   // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines\n      case 0x4E: l2 = 6144; break;   // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines\n      case 0x78: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines\n      case 0x79: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x7A: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x7B: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x7C: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored\n      case 0x7D: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines\n      case 0x7E: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)\n      case 0x7F: l2 = 512; break;   // code and data L2 cache, 512 KB, 2 ways, 64 byte lines\n      case 0x80: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines\n      case 0x81: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 32 byte lines\n      case 0x82: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 32 byte lines\n      case 0x83: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 32 byte lines\n      case 0x84: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines\n      case 0x85: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines\n      case 0x86: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines\n      case 0x87: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines\n      case 0x88: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)\n      case 0x89: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)\n      case 0x8A: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)\n      case 0x8D: l3 = 3072; break;   // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)\n\n      default: break;\n    }\n  }\n  if(check_for_p2_core2 && l2 == l3)\n    l3 = 0;\n  l1 *= 1024;\n  l2 *= 1024;\n  l3 *= 1024;\n}\n\ninline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)\n{\n  if(max_std_funcs>=4)\n    queryCacheSizes_intel_direct(l1,l2,l3);\n  else if(max_std_funcs>=2)\n    queryCacheSizes_intel_codes(l1,l2,l3);\n  else\n    l1 = l2 = l3 = 0;\n}\n\ninline void queryCacheSizes_amd(int& l1, int& l2, int& l3)\n{\n  int abcd[4];\n  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\n  \n  // First query the max supported function.\n  EIGEN_CPUID(abcd,0x80000000,0);\n  if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))\n  {\n    EIGEN_CPUID(abcd,0x80000005,0);\n    l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB\n    abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\n    EIGEN_CPUID(abcd,0x80000006,0);\n    l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB\n    l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB\n  }\n  else\n  {\n    l1 = l2 = l3 = 0;\n  }\n}\n#endif\n\n/** \\internal\n * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */\ninline void queryCacheSizes(int& l1, int& l2, int& l3)\n{\n  #ifdef EIGEN_CPUID\n  int abcd[4];\n  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};\n  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};\n  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // \"AMDisbetter!\"\n\n  // identify the CPU vendor\n  EIGEN_CPUID(abcd,0x0,0);\n  int max_std_funcs = abcd[0];\n  if(cpuid_is_vendor(abcd,GenuineIntel))\n    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);\n  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))\n    queryCacheSizes_amd(l1,l2,l3);\n  else\n    // by default let's use Intel's API\n    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);\n\n  // here is the list of other vendors:\n//   ||cpuid_is_vendor(abcd,\"VIA VIA VIA \")\n//   ||cpuid_is_vendor(abcd,\"CyrixInstead\")\n//   ||cpuid_is_vendor(abcd,\"CentaurHauls\")\n//   ||cpuid_is_vendor(abcd,\"GenuineTMx86\")\n//   ||cpuid_is_vendor(abcd,\"TransmetaCPU\")\n//   ||cpuid_is_vendor(abcd,\"RiseRiseRise\")\n//   ||cpuid_is_vendor(abcd,\"Geode by NSC\")\n//   ||cpuid_is_vendor(abcd,\"SiS SiS SiS \")\n//   ||cpuid_is_vendor(abcd,\"UMC UMC UMC \")\n//   ||cpuid_is_vendor(abcd,\"NexGenDriven\")\n  #else\n  l1 = l2 = l3 = -1;\n  #endif\n}\n\n/** \\internal\n * \\returns the size in Bytes of the L1 data cache */\ninline int queryL1CacheSize()\n{\n  int l1(-1), l2, l3;\n  queryCacheSizes(l1,l2,l3);\n  return l1;\n}\n\n/** \\internal\n * \\returns the size in Bytes of the L2 or L3 cache if this later is present */\ninline int queryTopLevelCacheSize()\n{\n  int l1, l2(-1), l3(-1);\n  queryCacheSizes(l1,l2,l3);\n  return (std::max)(l2,l3);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_MEMORY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/Meta.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_META_H\n#define EIGEN_META_H\n\n#if defined(EIGEN_GPU_COMPILE_PHASE)\n\n #include <cfloat>\n\n #if defined(EIGEN_CUDA_ARCH)\n  #include <math_constants.h>\n #endif\n\n #if defined(EIGEN_HIP_DEVICE_COMPILE)\n  #include \"Eigen/src/Core/arch/HIP/hcc/math_constants.h\"\n  #endif\n\n#endif\n\n// Recent versions of ICC require <cstdint> for pointer types below.\n#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11)\n\n// Define portable (u)int{32,64} types\n#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT\n#include <cstdint>\nnamespace Eigen {\nnamespace numext {\ntypedef std::uint8_t  uint8_t;\ntypedef std::int8_t   int8_t;\ntypedef std::uint16_t uint16_t;\ntypedef std::int16_t  int16_t;\ntypedef std::uint32_t uint32_t;\ntypedef std::int32_t  int32_t;\ntypedef std::uint64_t uint64_t;\ntypedef std::int64_t  int64_t;\n}\n}\n#else\n// Without c++11, all compilers able to compile Eigen also\n// provide the C99 stdint.h header file.\n#include <stdint.h>\nnamespace Eigen {\nnamespace numext {\ntypedef ::uint8_t  uint8_t;\ntypedef ::int8_t   int8_t;\ntypedef ::uint16_t uint16_t;\ntypedef ::int16_t  int16_t;\ntypedef ::uint32_t uint32_t;\ntypedef ::int32_t  int32_t;\ntypedef ::uint64_t uint64_t;\ntypedef ::int64_t  int64_t;\n}\n}\n#endif\n\nnamespace Eigen {\n\ntypedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;\n\n/**\n * \\brief The Index type as used for the API.\n * \\details To change this, \\c \\#define the preprocessor symbol \\c EIGEN_DEFAULT_DENSE_INDEX_TYPE.\n * \\sa \\blank \\ref TopicPreprocessorDirectives, StorageIndex.\n */\n\ntypedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;\n\nnamespace internal {\n\n/** \\internal\n  * \\file Meta.h\n  * This file contains generic metaprogramming classes which are not specifically related to Eigen.\n  * \\note In case you wonder, yes we're aware that Boost already provides all these features,\n  * we however don't want to add a dependency to Boost.\n  */\n\n// Only recent versions of ICC complain about using ptrdiff_t to hold pointers,\n// and older versions do not provide *intptr_t types.\n#if EIGEN_ICC_NEEDS_CSTDINT\ntypedef std::intptr_t  IntPtr;\ntypedef std::uintptr_t UIntPtr;\n#else\ntypedef std::ptrdiff_t IntPtr;\ntypedef std::size_t UIntPtr;\n#endif\n#undef EIGEN_ICC_NEEDS_CSTDINT\n\nstruct true_type {  enum { value = 1 }; };\nstruct false_type { enum { value = 0 }; };\n\ntemplate<bool Condition>\nstruct bool_constant;\n\ntemplate<>\nstruct bool_constant<true> : true_type {};\n\ntemplate<>\nstruct bool_constant<false> : false_type {};\n\ntemplate<bool Condition, typename Then, typename Else>\nstruct conditional { typedef Then type; };\n\ntemplate<typename Then, typename Else>\nstruct conditional <false, Then, Else> { typedef Else type; };\n\ntemplate<typename T> struct remove_reference { typedef T type; };\ntemplate<typename T> struct remove_reference<T&> { typedef T type; };\n\ntemplate<typename T> struct remove_pointer { typedef T type; };\ntemplate<typename T> struct remove_pointer<T*> { typedef T type; };\ntemplate<typename T> struct remove_pointer<T*const> { typedef T type; };\n\ntemplate <class T> struct remove_const { typedef T type; };\ntemplate <class T> struct remove_const<const T> { typedef T type; };\ntemplate <class T> struct remove_const<const T[]> { typedef T type[]; };\ntemplate <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };\n\ntemplate<typename T> struct remove_all { typedef T type; };\ntemplate<typename T> struct remove_all<const T>   { typedef typename remove_all<T>::type type; };\ntemplate<typename T> struct remove_all<T const&>  { typedef typename remove_all<T>::type type; };\ntemplate<typename T> struct remove_all<T&>        { typedef typename remove_all<T>::type type; };\ntemplate<typename T> struct remove_all<T const*>  { typedef typename remove_all<T>::type type; };\ntemplate<typename T> struct remove_all<T*>        { typedef typename remove_all<T>::type type; };\n\ntemplate<typename T> struct is_arithmetic      { enum { value = false }; };\ntemplate<> struct is_arithmetic<float>         { enum { value = true }; };\ntemplate<> struct is_arithmetic<double>        { enum { value = true }; };\ntemplate<> struct is_arithmetic<long double>   { enum { value = true }; };\ntemplate<> struct is_arithmetic<bool>          { enum { value = true }; };\ntemplate<> struct is_arithmetic<char>          { enum { value = true }; };\ntemplate<> struct is_arithmetic<signed char>   { enum { value = true }; };\ntemplate<> struct is_arithmetic<unsigned char> { enum { value = true }; };\ntemplate<> struct is_arithmetic<signed short>  { enum { value = true }; };\ntemplate<> struct is_arithmetic<unsigned short>{ enum { value = true }; };\ntemplate<> struct is_arithmetic<signed int>    { enum { value = true }; };\ntemplate<> struct is_arithmetic<unsigned int>  { enum { value = true }; };\ntemplate<> struct is_arithmetic<signed long>   { enum { value = true }; };\ntemplate<> struct is_arithmetic<unsigned long> { enum { value = true }; };\n\ntemplate<typename T, typename U> struct is_same { enum { value = 0 }; };\ntemplate<typename T> struct is_same<T,T> { enum { value = 1 }; };\n\ntemplate< class T >\nstruct is_void : is_same<void, typename remove_const<T>::type> {};\n\n#if EIGEN_HAS_CXX11\ntemplate<> struct is_arithmetic<signed long long>   { enum { value = true }; };\ntemplate<> struct is_arithmetic<unsigned long long> { enum { value = true }; };\nusing std::is_integral;\n#else\ntemplate<typename T> struct is_integral               { enum { value = false }; };\ntemplate<> struct is_integral<bool>                   { enum { value = true }; };\ntemplate<> struct is_integral<char>                   { enum { value = true }; };\ntemplate<> struct is_integral<signed char>            { enum { value = true }; };\ntemplate<> struct is_integral<unsigned char>          { enum { value = true }; };\ntemplate<> struct is_integral<signed short>           { enum { value = true }; };\ntemplate<> struct is_integral<unsigned short>         { enum { value = true }; };\ntemplate<> struct is_integral<signed int>             { enum { value = true }; };\ntemplate<> struct is_integral<unsigned int>           { enum { value = true }; };\ntemplate<> struct is_integral<signed long>            { enum { value = true }; };\ntemplate<> struct is_integral<unsigned long>          { enum { value = true }; };\n#if EIGEN_COMP_MSVC\ntemplate<> struct is_integral<signed __int64>         { enum { value = true }; };\ntemplate<> struct is_integral<unsigned __int64>       { enum { value = true }; };\n#endif\n#endif\n\n#if EIGEN_HAS_CXX11\nusing std::make_unsigned;\n#else\n// TODO: Possibly improve this implementation of make_unsigned.\n// It is currently used only by\n// template<typename Scalar> struct random_default_impl<Scalar, false, true>.\ntemplate<typename> struct make_unsigned;\ntemplate<> struct make_unsigned<char>             { typedef unsigned char type; };\ntemplate<> struct make_unsigned<signed char>      { typedef unsigned char type; };\ntemplate<> struct make_unsigned<unsigned char>    { typedef unsigned char type; };\ntemplate<> struct make_unsigned<signed short>     { typedef unsigned short type; };\ntemplate<> struct make_unsigned<unsigned short>   { typedef unsigned short type; };\ntemplate<> struct make_unsigned<signed int>       { typedef unsigned int type; };\ntemplate<> struct make_unsigned<unsigned int>     { typedef unsigned int type; };\ntemplate<> struct make_unsigned<signed long>      { typedef unsigned long type; };\ntemplate<> struct make_unsigned<unsigned long>    { typedef unsigned long type; };\n#if EIGEN_COMP_MSVC\ntemplate<> struct make_unsigned<signed __int64>   { typedef unsigned __int64 type; };\ntemplate<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };\n#endif\n\n// Some platforms define int64_t as `long long` even for C++03, where\n// `long long` is not guaranteed by the standard. In this case we are missing\n// the definition for make_unsigned. If we just define it, we run into issues\n// where `long long` doesn't exist in some compilers for C++03. We therefore add\n// the specialization for these platforms only.\n#if EIGEN_OS_MAC || EIGEN_COMP_MINGW\ntemplate<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };\ntemplate<> struct make_unsigned<long long>          { typedef unsigned long long type; };\n#endif\n#endif\n\ntemplate <typename T> struct add_const { typedef const T type; };\ntemplate <typename T> struct add_const<T&> { typedef T& type; };\n\ntemplate <typename T> struct is_const { enum { value = 0 }; };\ntemplate <typename T> struct is_const<T const> { enum { value = 1 }; };\n\ntemplate<typename T> struct add_const_on_value_type            { typedef const T type;  };\ntemplate<typename T> struct add_const_on_value_type<T&>        { typedef T const& type; };\ntemplate<typename T> struct add_const_on_value_type<T*>        { typedef T const* type; };\ntemplate<typename T> struct add_const_on_value_type<T* const>  { typedef T const* const type; };\ntemplate<typename T> struct add_const_on_value_type<T const* const>  { typedef T const* const type; };\n\n#if EIGEN_HAS_CXX11\n\nusing std::is_convertible;\n\n#else\n\ntemplate<typename From, typename To>\nstruct is_convertible_impl\n{\nprivate:\n  struct any_conversion\n  {\n    template <typename T> any_conversion(const volatile T&);\n    template <typename T> any_conversion(T&);\n  };\n  struct yes {int a[1];};\n  struct no  {int a[2];};\n\n  template<typename T>\n  static yes test(T, int);\n\n  template<typename T>\n  static no  test(any_conversion, ...);\n\npublic:\n  static typename internal::remove_reference<From>::type* ms_from;\n#ifdef __INTEL_COMPILER\n  #pragma warning push\n  #pragma warning ( disable : 2259 )\n#endif\n  enum { value = sizeof(test<To>(*ms_from, 0))==sizeof(yes) };\n#ifdef __INTEL_COMPILER\n  #pragma warning pop\n#endif\n};\n\ntemplate<typename From, typename To>\nstruct is_convertible\n{\n  enum { value = is_convertible_impl<From,To>::value };\n};\n\ntemplate<typename T>\nstruct is_convertible<T,T&> { enum { value = false }; };\n\ntemplate<typename T>\nstruct is_convertible<const T,const T&> { enum { value = true }; };\n\n#endif\n\n/** \\internal Allows to enable/disable an overload\n  * according to a compile time condition.\n  */\ntemplate<bool Condition, typename T=void> struct enable_if;\n\ntemplate<typename T> struct enable_if<true,T>\n{ typedef T type; };\n\n#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11\n#if !defined(__FLT_EPSILON__)\n#define __FLT_EPSILON__ FLT_EPSILON\n#define __DBL_EPSILON__ DBL_EPSILON\n#endif\n\nnamespace device {\n\ntemplate<typename T> struct numeric_limits\n{\n  EIGEN_DEVICE_FUNC\n  static EIGEN_CONSTEXPR T epsilon() { return 0; }\n  static T (max)() { assert(false && \"Highest not supported for this type\"); }\n  static T (min)() { assert(false && \"Lowest not supported for this type\"); }\n  static T infinity() { assert(false && \"Infinity not supported for this type\"); }\n  static T quiet_NaN() { assert(false && \"quiet_NaN not supported for this type\"); }\n};\ntemplate<> struct numeric_limits<float>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static float epsilon() { return __FLT_EPSILON__; }\n  EIGEN_DEVICE_FUNC\n  static float (max)() {\n  #if defined(EIGEN_CUDA_ARCH)\n    return CUDART_MAX_NORMAL_F;\n  #else\n    return HIPRT_MAX_NORMAL_F;\n  #endif\n  }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static float (min)() { return FLT_MIN; }\n  EIGEN_DEVICE_FUNC\n  static float infinity() {\n  #if defined(EIGEN_CUDA_ARCH)\n    return CUDART_INF_F;\n  #else\n    return HIPRT_INF_F;\n  #endif\n  }\n  EIGEN_DEVICE_FUNC\n  static float quiet_NaN() {\n  #if defined(EIGEN_CUDA_ARCH)\n    return CUDART_NAN_F;\n  #else\n    return HIPRT_NAN_F;\n  #endif\n  }\n};\ntemplate<> struct numeric_limits<double>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static double epsilon() { return __DBL_EPSILON__; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static double (max)() { return DBL_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static double (min)() { return DBL_MIN; }\n  EIGEN_DEVICE_FUNC\n  static double infinity() {\n  #if defined(EIGEN_CUDA_ARCH)\n    return CUDART_INF;\n  #else\n    return HIPRT_INF;\n  #endif\n  }\n  EIGEN_DEVICE_FUNC\n  static double quiet_NaN() {\n  #if defined(EIGEN_CUDA_ARCH)\n    return CUDART_NAN;\n  #else\n    return HIPRT_NAN;\n  #endif\n  }\n};\ntemplate<> struct numeric_limits<int>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int epsilon() { return 0; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int (max)() { return INT_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static int (min)() { return INT_MIN; }\n};\ntemplate<> struct numeric_limits<unsigned int>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned int epsilon() { return 0; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned int (max)() { return UINT_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned int (min)() { return 0; }\n};\ntemplate<> struct numeric_limits<long>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static long epsilon() { return 0; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static long (max)() { return LONG_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static long (min)() { return LONG_MIN; }\n};\ntemplate<> struct numeric_limits<unsigned long>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned long epsilon() { return 0; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned long (max)() { return ULONG_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned long (min)() { return 0; }\n};\ntemplate<> struct numeric_limits<long long>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static long long epsilon() { return 0; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static long long (max)() { return LLONG_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static long long (min)() { return LLONG_MIN; }\n};\ntemplate<> struct numeric_limits<unsigned long long>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned long long epsilon() { return 0; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned long long (max)() { return ULLONG_MAX; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static unsigned long long (min)() { return 0; }\n};\ntemplate<> struct numeric_limits<bool>\n{\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static bool epsilon() { return false; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  static bool (max)() { return true; }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR \n  static bool (min)() { return false; }\n};\n\n}\n\n#endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11\n\n/** \\internal\n  * A base class do disable default copy ctor and copy assignment operator.\n  */\nclass noncopyable\n{\n  EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);\n  EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);\nprotected:\n  EIGEN_DEVICE_FUNC noncopyable() {}\n  EIGEN_DEVICE_FUNC ~noncopyable() {}\n};\n\n/** \\internal\n  * Provides access to the number of elements in the object of as a compile-time constant expression.\n  * It \"returns\" Eigen::Dynamic if the size cannot be resolved at compile-time (default).\n  *\n  * Similar to std::tuple_size, but more general.\n  *\n  * It currently supports:\n  *  - any types T defining T::SizeAtCompileTime\n  *  - plain C arrays as T[N]\n  *  - std::array (c++11)\n  *  - some internal types such as SingleRange and AllRange\n  *\n  * The second template parameter eases SFINAE-based specializations.\n  */\ntemplate<typename T, typename EnableIf = void> struct array_size {\n  enum { value = Dynamic };\n};\n\ntemplate<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> {\n  enum { value = T::SizeAtCompileTime };\n};\n\ntemplate<typename T, int N> struct array_size<const T (&)[N]> {\n  enum { value = N };\n};\ntemplate<typename T, int N> struct array_size<T (&)[N]> {\n  enum { value = N };\n};\n\n#if EIGEN_HAS_CXX11\ntemplate<typename T, std::size_t N> struct array_size<const std::array<T,N> > {\n  enum { value = N };\n};\ntemplate<typename T, std::size_t N> struct array_size<std::array<T,N> > {\n  enum { value = N };\n};\n#endif\n\n/** \\internal\n  * Analogue of the std::size free function.\n  * It returns the size of the container or view \\a x of type \\c T\n  *\n  * It currently supports:\n  *  - any types T defining a member T::size() const\n  *  - plain C arrays as T[N]\n  *\n  */\ntemplate<typename T>\nEIGEN_CONSTEXPR Index size(const T& x) { return x.size(); }\n\ntemplate<typename T,std::size_t N>\nEIGEN_CONSTEXPR Index size(const T (&) [N]) { return N; }\n\n/** \\internal\n  * Convenient struct to get the result type of a nullary, unary, binary, or\n  * ternary functor.\n  * \n  * Pre C++11:\n  * Supports both a Func::result_type member and templated\n  * Func::result<Func(ArgTypes...)>::type member.\n  * \n  * If none of these members is provided, then the type of the first\n  * argument is returned.\n  * \n  * Post C++11:\n  * This uses std::result_of. However, note the `type` member removes\n  * const and converts references/pointers to their corresponding value type.\n  */\n#if EIGEN_HAS_STD_INVOKE_RESULT\ntemplate<typename T> struct result_of;\n\ntemplate<typename F, typename... ArgTypes>\nstruct result_of<F(ArgTypes...)> {\n  typedef typename std::invoke_result<F, ArgTypes...>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n#elif EIGEN_HAS_STD_RESULT_OF\ntemplate<typename T> struct result_of {\n  typedef typename std::result_of<T>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n#else\ntemplate<typename T> struct result_of { };\n\nstruct has_none {int a[1];};\nstruct has_std_result_type {int a[2];};\nstruct has_tr1_result {int a[3];};\n\ntemplate<typename Func, int SizeOf>\nstruct nullary_result_of_select {};\n\ntemplate<typename Func>\nstruct nullary_result_of_select<Func, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};\n\ntemplate<typename Func>\nstruct nullary_result_of_select<Func, sizeof(has_tr1_result)> {typedef typename Func::template result<Func()>::type type;};\n\ntemplate<typename Func>\nstruct result_of<Func()> {\n    template<typename T>\n    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);\n    template<typename T>\n    static has_tr1_result         testFunctor(T const *, typename T::template result<T()>::type const * = 0);\n    static has_none               testFunctor(...);\n\n    // note that the following indirection is needed for gcc-3.3\n    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};\n    typedef typename nullary_result_of_select<Func, FunctorType>::type type;\n};\n\ntemplate<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>\nstruct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};\n\ntemplate<typename Func, typename ArgType>\nstruct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};\n\ntemplate<typename Func, typename ArgType>\nstruct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};\n\ntemplate<typename Func, typename ArgType>\nstruct result_of<Func(ArgType)> {\n    template<typename T>\n    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);\n    template<typename T>\n    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);\n    static has_none               testFunctor(...);\n\n    // note that the following indirection is needed for gcc-3.3\n    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};\n    typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;\n};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>\nstruct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1>\nstruct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>\n{typedef typename Func::result_type type;};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1>\nstruct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>\n{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1>\nstruct result_of<Func(ArgType0,ArgType1)> {\n    template<typename T>\n    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);\n    template<typename T>\n    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);\n    static has_none               testFunctor(...);\n\n    // note that the following indirection is needed for gcc-3.3\n    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};\n    typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;\n};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>\nstruct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>\nstruct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>\n{typedef typename Func::result_type type;};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>\nstruct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>\n{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};\n\ntemplate<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>\nstruct result_of<Func(ArgType0,ArgType1,ArgType2)> {\n    template<typename T>\n    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);\n    template<typename T>\n    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);\n    static has_none               testFunctor(...);\n\n    // note that the following indirection is needed for gcc-3.3\n    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};\n    typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;\n};\n\n#endif\n\n#if EIGEN_HAS_STD_INVOKE_RESULT\ntemplate<typename F, typename... ArgTypes>\nstruct invoke_result {\n  typedef typename std::invoke_result<F, ArgTypes...>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n#elif EIGEN_HAS_CXX11\ntemplate<typename F, typename... ArgTypes>\nstruct invoke_result {\n  typedef typename result_of<F(ArgTypes...)>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n#else\ntemplate<typename F, typename ArgType0 = void, typename ArgType1 = void, typename ArgType2 = void>\nstruct invoke_result {\n  typedef typename result_of<F(ArgType0, ArgType1, ArgType2)>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n\ntemplate<typename F>\nstruct invoke_result<F, void, void, void> {\n  typedef typename result_of<F()>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n\ntemplate<typename F, typename ArgType0>\nstruct invoke_result<F, ArgType0, void, void> {\n  typedef typename result_of<F(ArgType0)>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n\ntemplate<typename F, typename ArgType0, typename ArgType1>\nstruct invoke_result<F, ArgType0, ArgType1, void> {\n  typedef typename result_of<F(ArgType0, ArgType1)>::type type1;\n  typedef typename remove_all<type1>::type type;\n};\n#endif\n\nstruct meta_yes { char a[1]; };\nstruct meta_no  { char a[2]; };\n\n// Check whether T::ReturnType does exist\ntemplate <typename T>\nstruct has_ReturnType\n{\n  template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0);\n  template <typename C> static meta_no  testFunctor(...);\n\n  enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) };\n};\n\ntemplate<typename T> const T* return_ptr();\n\ntemplate <typename T, typename IndexType=Index>\nstruct has_nullary_operator\n{\n  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);\n  static meta_no testFunctor(...);\n\n  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };\n};\n\ntemplate <typename T, typename IndexType=Index>\nstruct has_unary_operator\n{\n  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);\n  static meta_no testFunctor(...);\n\n  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };\n};\n\ntemplate <typename T, typename IndexType=Index>\nstruct has_binary_operator\n{\n  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);\n  static meta_no testFunctor(...);\n\n  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };\n};\n\n/** \\internal In short, it computes int(sqrt(\\a Y)) with \\a Y an integer.\n  * Usage example: \\code meta_sqrt<1023>::ret \\endcode\n  */\ntemplate<int Y,\n         int InfX = 0,\n         int SupX = ((Y==1) ? 1 : Y/2),\n         bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >\n                                // use ?: instead of || just to shut up a stupid gcc 4.3 warning\nclass meta_sqrt\n{\n    enum {\n      MidX = (InfX+SupX)/2,\n      TakeInf = MidX*MidX > Y ? 1 : 0,\n      NewInf = int(TakeInf) ? InfX : int(MidX),\n      NewSup = int(TakeInf) ? int(MidX) : SupX\n    };\n  public:\n    enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret };\n};\n\ntemplate<int Y, int InfX, int SupX>\nclass meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };\n\n\n/** \\internal Computes the least common multiple of two positive integer A and B\n  * at compile-time. \n  */\ntemplate<int A, int B, int K=1, bool Done = ((A*K)%B)==0, bool Big=(A>=B)>\nstruct meta_least_common_multiple\n{\n  enum { ret = meta_least_common_multiple<A,B,K+1>::ret };\n};\ntemplate<int A, int B, int K, bool Done>\nstruct meta_least_common_multiple<A,B,K,Done,false>\n{\n  enum { ret = meta_least_common_multiple<B,A,K>::ret };\n};\ntemplate<int A, int B, int K>\nstruct meta_least_common_multiple<A,B,K,true,true>\n{\n  enum { ret = A*K };\n};\n\n\n/** \\internal determines whether the product of two numeric types is allowed and what the return type is */\ntemplate<typename T, typename U> struct scalar_product_traits\n{\n  enum { Defined = 0 };\n};\n\n// FIXME quick workaround around current limitation of result_of\n// template<typename Scalar, typename ArgType0, typename ArgType1>\n// struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {\n// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;\n// };\n\n/** \\internal Obtains a POD type suitable to use as storage for an object of a size\n  * of at most Len bytes, aligned as specified by \\c Align.\n  */\ntemplate<unsigned Len, unsigned Align>\nstruct aligned_storage {\n  struct type {\n    EIGEN_ALIGN_TO_BOUNDARY(Align) unsigned char data[Len];\n  };\n};\n\n} // end namespace internal\n\nnamespace numext {\n\n#if defined(EIGEN_GPU_COMPILE_PHASE)\ntemplate<typename T> EIGEN_DEVICE_FUNC   void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }\n#else\ntemplate<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }\n#endif\n\n#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11\nusing internal::device::numeric_limits;\n#else\nusing std::numeric_limits;\n#endif\n\n// Integer division with rounding up.\n// T is assumed to be an integer type with a>=0, and b>0\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\nT div_ceil(const T &a, const T &b)\n{\n  return (a+b-1) / b;\n}\n\n// The aim of the following functions is to bypass -Wfloat-equal warnings\n// when we really want a strict equality comparison on floating points.\ntemplate<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nbool equal_strict(const X& x,const Y& y) { return x == y; }\n\n#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))\ntemplate<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nbool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }\n\ntemplate<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nbool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }\n#endif\n\ntemplate<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nbool not_equal_strict(const X& x,const Y& y) { return x != y; }\n\n#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))\ntemplate<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nbool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }\n\ntemplate<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC\nbool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }\n#endif\n\n} // end namespace numext\n\n} // end namespace Eigen\n\n#endif // EIGEN_META_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/NonMPL2.h",
    "content": "#ifdef EIGEN_MPL2_ONLY\n#error Including non-MPL2 code in EIGEN_MPL2_ONLY mode\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/ReenableStupidWarnings.h",
    "content": "#ifdef EIGEN_WARNINGS_DISABLED_2\n// \"DisableStupidWarnings.h\" was included twice recursively: Do not reenable warnings yet!\n#  undef EIGEN_WARNINGS_DISABLED_2\n\n#elif defined(EIGEN_WARNINGS_DISABLED)\n#undef EIGEN_WARNINGS_DISABLED\n\n#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS\n  #ifdef _MSC_VER\n    #pragma warning( pop )\n  #elif defined __INTEL_COMPILER\n    #pragma warning pop\n  #elif defined __clang__\n    #pragma clang diagnostic pop\n  #elif defined __GNUC__  &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))\n    #pragma GCC diagnostic pop\n  #endif\n\n  #if defined __NVCC__\n//    Don't reenable the diagnostic messages, as it turns out these messages need\n//    to be disabled at the point of the template instantiation (i.e the user code)\n//    otherwise they'll be triggered by nvcc.\n//    #pragma diag_default code_is_unreachable\n//    #pragma diag_default initialization_not_reachable\n//    #pragma diag_default 2651\n//    #pragma diag_default 2653\n  #endif\n\n#endif\n\n#endif // EIGEN_WARNINGS_DISABLED\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/ReshapedHelper.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_RESHAPED_HELPER_H\n#define EIGEN_RESHAPED_HELPER_H\n\nnamespace Eigen {\n\nenum AutoSize_t   { AutoSize };\nconst int AutoOrder = 2;\n\nnamespace internal {\n\ntemplate<typename SizeType,typename OtherSize, int TotalSize>\nstruct get_compiletime_reshape_size {\n  enum { value = get_fixed_value<SizeType>::value };\n};\n\ntemplate<typename SizeType>\nIndex get_runtime_reshape_size(SizeType size, Index /*other*/, Index /*total*/) {\n  return internal::get_runtime_value(size);\n}\n\ntemplate<typename OtherSize, int TotalSize>\nstruct get_compiletime_reshape_size<AutoSize_t,OtherSize,TotalSize> {\n  enum {\n    other_size = get_fixed_value<OtherSize>::value,\n    value = (TotalSize==Dynamic || other_size==Dynamic) ? Dynamic : TotalSize / other_size };\n};\n\ninline Index get_runtime_reshape_size(AutoSize_t /*size*/, Index other, Index total) {\n  return total/other;\n}\n\ntemplate<int Flags, int Order>\nstruct get_compiletime_reshape_order {\n  enum { value = Order == AutoOrder ? Flags & RowMajorBit : Order };\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_RESHAPED_HELPER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/StaticAssert.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STATIC_ASSERT_H\n#define EIGEN_STATIC_ASSERT_H\n\n/* Some notes on Eigen's static assertion mechanism:\n *\n *  - in EIGEN_STATIC_ASSERT(CONDITION,MSG) the parameter CONDITION must be a compile time boolean\n *    expression, and MSG an enum listed in struct internal::static_assertion<true>\n *\n *  - define EIGEN_NO_STATIC_ASSERT to disable them (and save compilation time)\n *    in that case, the static assertion is converted to the following runtime assert:\n *      eigen_assert(CONDITION && \"MSG\")\n *\n *  - currently EIGEN_STATIC_ASSERT can only be used in function scope\n *\n */\n\n#ifndef EIGEN_STATIC_ASSERT\n#ifndef EIGEN_NO_STATIC_ASSERT\n\n  #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))\n\n    // if native static_assert is enabled, let's use it\n    #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);\n\n  #else // not CXX0X\n\n    namespace Eigen {\n\n    namespace internal {\n\n    template<bool condition>\n    struct static_assertion {};\n\n    template<>\n    struct static_assertion<true>\n    {\n      enum {\n        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,\n        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,\n        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,\n        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,\n        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,\n        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,\n        OUT_OF_RANGE_ACCESS=1,\n        YOU_MADE_A_PROGRAMMING_MISTAKE=1,\n        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,\n        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,\n        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,\n        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,\n        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,\n        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,\n        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,\n        NUMERIC_TYPE_MUST_BE_REAL=1,\n        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,\n        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,\n        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,\n        INVALID_MATRIX_PRODUCT=1,\n        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,\n        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,\n        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,\n        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,\n        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,\n        INVALID_MATRIX_TEMPLATE_PARAMETERS=1,\n        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,\n        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,\n        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,\n        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,\n        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,\n        YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,\n        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,\n        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,\n        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,\n        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,\n        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,\n        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,\n        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,\n        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,\n        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,\n        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,\n        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,\n        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,\n        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,\n        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,\n        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,\n        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,\n        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,\n        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,\n        STORAGE_LAYOUT_DOES_NOT_MATCH=1,\n        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,\n        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,\n        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,\n        THIS_TYPE_IS_NOT_SUPPORTED=1,\n        STORAGE_KIND_MUST_MATCH=1,\n        STORAGE_INDEX_MUST_MATCH=1,\n        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,\n        SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1,\n        INVALID_TEMPLATE_PARAMETER=1,\n        GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS=1,\n        THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE=1\n      };\n    };\n\n    } // end namespace internal\n\n    } // end namespace Eigen\n\n    // Specialized implementation for MSVC to avoid \"conditional\n    // expression is constant\" warnings.  This implementation doesn't\n    // appear to work under GCC, hence the multiple implementations.\n    #if EIGEN_COMP_MSVC\n\n      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \\\n        {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}\n\n    #else\n      // In some cases clang interprets bool(CONDITION) as function declaration\n      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \\\n        if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}\n\n    #endif\n\n  #endif // not CXX0X\n\n#else // EIGEN_NO_STATIC_ASSERT\n\n  #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);\n\n#endif // EIGEN_NO_STATIC_ASSERT\n#endif // EIGEN_STATIC_ASSERT\n\n// static assertion failing if the type \\a TYPE is not a vector type\n#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \\\n  EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \\\n                      YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)\n\n// static assertion failing if the type \\a TYPE is not fixed-size\n#define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \\\n  EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \\\n                      YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)\n\n// static assertion failing if the type \\a TYPE is not dynamic-size\n#define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \\\n  EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \\\n                      YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR)\n\n// static assertion failing if the type \\a TYPE is not a vector type of the given size\n#define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \\\n  EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \\\n                      THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)\n\n// static assertion failing if the type \\a TYPE is not a vector type of the given size\n#define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \\\n  EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \\\n                      THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)\n\n// static assertion failing if the two vector expression types are not compatible (same fixed-size or dynamic size)\n#define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \\\n  EIGEN_STATIC_ASSERT( \\\n      (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \\\n    || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \\\n    || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\\\n    YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)\n\n#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \\\n     ( \\\n        (int(Eigen::internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \\\n    || (\\\n          (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \\\n        || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \\\n        || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \\\n      &&  (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \\\n        || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \\\n        || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\\\n       ) \\\n     )\n\n#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \\\n    EIGEN_STATIC_ASSERT(!Eigen::NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)\n\n\n// static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes\n#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \\\n  EIGEN_STATIC_ASSERT( \\\n     EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\\\n    YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)\n\n#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \\\n      EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Eigen::Dynamic) && \\\n                          (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Eigen::Dynamic), \\\n                          THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)\n\n#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \\\n      EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue<Derived>::value, \\\n                          THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)\n\n#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \\\n      EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived>::XprKind, ArrayXpr>::value), \\\n                          THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)\n\n#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \\\n      EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived1>::XprKind, \\\n                                             typename Eigen::internal::traits<Derived2>::XprKind \\\n                                            >::value), \\\n                          YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)\n\n// Check that a cost value is positive, and that is stay within a reasonable range\n// TODO this check could be enabled for internal debugging only\n#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \\\n      EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);\n\n#endif // EIGEN_STATIC_ASSERT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/SymbolicIndex.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SYMBOLIC_INDEX_H\n#define EIGEN_SYMBOLIC_INDEX_H\n\nnamespace Eigen {\n\n/** \\namespace Eigen::symbolic\n  * \\ingroup Core_Module\n  *\n  * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index.\n  * Here is a simple example:\n  *\n  * \\code\n  * // First step, defines symbols:\n  * struct x_tag {};  static const symbolic::SymbolExpr<x_tag> x;\n  * struct y_tag {};  static const symbolic::SymbolExpr<y_tag> y;\n  * struct z_tag {};  static const symbolic::SymbolExpr<z_tag> z;\n  *\n  * // Defines an expression:\n  * auto expr = (x+3)/y+z;\n  *\n  * // And evaluate it: (c++14)\n  * std::cout << expr.eval(x=6,y=3,z=-13) << \"\\n\";\n  *\n  * // In c++98/11, only one symbol per expression is supported for now:\n  * auto expr98 = (3-x)/2;\n  * std::cout << expr98.eval(x=6) << \"\\n\";\n  * \\endcode\n  *\n  * It is currently only used internally to define and manipulate the Eigen::last and Eigen::lastp1 symbols in Eigen::seq and Eigen::seqN.\n  *\n  */\nnamespace symbolic {\n\ntemplate<typename Tag> class Symbol;\ntemplate<typename Arg0> class NegateExpr;\ntemplate<typename Arg1,typename Arg2> class AddExpr;\ntemplate<typename Arg1,typename Arg2> class ProductExpr;\ntemplate<typename Arg1,typename Arg2> class QuotientExpr;\n\n// A simple wrapper around an integral value to provide the eval method.\n// We could also use a free-function symbolic_eval...\ntemplate<typename IndexType=Index>\nclass ValueExpr {\npublic:\n  ValueExpr(IndexType val) : m_value(val) {}\n  template<typename T>\n  IndexType eval_impl(const T&) const { return m_value; }\nprotected:\n  IndexType m_value;\n};\n\n// Specialization for compile-time value,\n// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.\ntemplate<int N>\nclass ValueExpr<internal::FixedInt<N> > {\npublic:\n  ValueExpr() {}\n  template<typename T>\n  EIGEN_CONSTEXPR Index eval_impl(const T&) const { return N; }\n};\n\n\n/** \\class BaseExpr\n  * \\ingroup Core_Module\n  * Common base class of any symbolic expressions\n  */\ntemplate<typename Derived>\nclass BaseExpr\n{\npublic:\n  const Derived& derived() const { return *static_cast<const Derived*>(this); }\n\n  /** Evaluate the expression given the \\a values of the symbols.\n    *\n    * \\param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue\n    *               as constructed by SymbolExpr::operator= operator.\n    *\n    */\n  template<typename T>\n  Index eval(const T& values) const { return derived().eval_impl(values); }\n\n#if EIGEN_HAS_CXX14\n  template<typename... Types>\n  Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); }\n#endif\n\n  NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }\n\n  AddExpr<Derived,ValueExpr<> > operator+(Index b) const\n  { return AddExpr<Derived,ValueExpr<> >(derived(),  b); }\n  AddExpr<Derived,ValueExpr<> > operator-(Index a) const\n  { return AddExpr<Derived,ValueExpr<> >(derived(), -a); }\n  ProductExpr<Derived,ValueExpr<> > operator*(Index a) const\n  { return ProductExpr<Derived,ValueExpr<> >(derived(),a); }\n  QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const\n  { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); }\n\n  friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b)\n  { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); }\n  friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b)\n  { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); }\n  friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b)\n  { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); }\n  friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b)\n  { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); }\n\n  template<int N>\n  AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const\n  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const\n  { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }\n  template<int N>\n  ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const\n  { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const\n  { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }\n\n  template<int N>\n  friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b)\n  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b)\n  { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b)\n  { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }\n  template<int N>\n  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b)\n  { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }\n\n#if (!EIGEN_HAS_CXX14)\n  template<int N>\n  AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const\n  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const\n  { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }\n  template<int N>\n  ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const\n  { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const\n  { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }\n\n  template<int N>\n  friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b)\n  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b)\n  { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }\n  template<int N>\n  friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b)\n  { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }\n  template<int N>\n  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b)\n  { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }\n#endif\n\n\n  template<typename OtherDerived>\n  AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const\n  { return AddExpr<Derived,OtherDerived>(derived(),  b.derived()); }\n\n  template<typename OtherDerived>\n  AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const\n  { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); }\n\n  template<typename OtherDerived>\n  ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const\n  { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); }\n\n  template<typename OtherDerived>\n  QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const\n  { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); }\n};\n\ntemplate<typename T>\nstruct is_symbolic {\n  // BaseExpr has no conversion ctor, so we only have to check whether T can be statically cast to its base class BaseExpr<T>.\n  enum { value = internal::is_convertible<T,BaseExpr<T> >::value };\n};\n\n/** Represents the actual value of a symbol identified by its tag\n  *\n  * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used.\n  */\ntemplate<typename Tag>\nclass SymbolValue\n{\npublic:\n  /** Default constructor from the value \\a val */\n  SymbolValue(Index val) : m_value(val) {}\n\n  /** \\returns the stored value of the symbol */\n  Index value() const { return m_value; }\nprotected:\n  Index m_value;\n};\n\n/** Expression of a symbol uniquely identified by the template parameter type \\c tag */\ntemplate<typename tag>\nclass SymbolExpr : public BaseExpr<SymbolExpr<tag> >\n{\npublic:\n  /** Alias to the template parameter \\c tag */\n  typedef tag Tag;\n\n  SymbolExpr() {}\n\n  /** Associate the value \\a val to the given symbol \\c *this, uniquely identified by its \\c Tag.\n    *\n    * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value.\n    */\n  SymbolValue<Tag> operator=(Index val) const {\n    return SymbolValue<Tag>(val);\n  }\n\n  Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); }\n\n#if EIGEN_HAS_CXX14\n  // C++14 versions suitable for multiple symbols\n  template<typename... Types>\n  Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); }\n#endif\n};\n\ntemplate<typename Arg0>\nclass NegateExpr : public BaseExpr<NegateExpr<Arg0> >\n{\npublic:\n  NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}\n\n  template<typename T>\n  Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); }\nprotected:\n  Arg0 m_arg0;\n};\n\ntemplate<typename Arg0, typename Arg1>\nclass AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> >\n{\npublic:\n  AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}\n\n  template<typename T>\n  Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); }\nprotected:\n  Arg0 m_arg0;\n  Arg1 m_arg1;\n};\n\ntemplate<typename Arg0, typename Arg1>\nclass ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> >\n{\npublic:\n  ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}\n\n  template<typename T>\n  Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); }\nprotected:\n  Arg0 m_arg0;\n  Arg1 m_arg1;\n};\n\ntemplate<typename Arg0, typename Arg1>\nclass QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> >\n{\npublic:\n  QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}\n\n  template<typename T>\n  Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); }\nprotected:\n  Arg0 m_arg0;\n  Arg1 m_arg1;\n};\n\n} // end namespace symbolic\n\n} // end namespace Eigen\n\n#endif // EIGEN_SYMBOLIC_INDEX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Core/util/XprHelper.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_XPRHELPER_H\n#define EIGEN_XPRHELPER_H\n\n// just a workaround because GCC seems to not really like empty structs\n// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled\n// so currently we simply disable this optimization for gcc 4.3\n#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)\n  #define EIGEN_EMPTY_STRUCT_CTOR(X) \\\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \\\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}\n#else\n  #define EIGEN_EMPTY_STRUCT_CTOR(X)\n#endif\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename IndexDest, typename IndexSrc>\nEIGEN_DEVICE_FUNC\ninline IndexDest convert_index(const IndexSrc& idx) {\n  // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away:\n  eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && \"Index value to big for target type\");\n  return IndexDest(idx);\n}\n\n// true if T can be considered as an integral index (i.e., and integral type or enum)\ntemplate<typename T> struct is_valid_index_type\n{\n  enum { value =\n#if EIGEN_HAS_TYPE_TRAITS\n    internal::is_integral<T>::value || std::is_enum<T>::value\n#elif EIGEN_COMP_MSVC\n    internal::is_integral<T>::value || __is_enum(T)\n#else\n    // without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index.\n    internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value\n#endif\n  };\n};\n\n// true if both types are not valid index types\ntemplate<typename RowIndices, typename ColIndices>\nstruct valid_indexed_view_overload {\n  enum { value = !(internal::is_valid_index_type<RowIndices>::value && internal::is_valid_index_type<ColIndices>::value) };\n};\n\n// promote_scalar_arg is an helper used in operation between an expression and a scalar, like:\n//    expression * scalar\n// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression.\n// The IsSupported template parameter must be provided by the caller as: internal::has_ReturnType<ScalarBinaryOpTraits<ExprScalar,T,op> >::value using the proper order for ExprScalar and T.\n// Then the logic is as follows:\n//  - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned.\n//  - otherwise, NumTraits<ExprScalar>::Literal is returned if T is implicitly convertible to NumTraits<ExprScalar>::Literal AND that this does not imply a float to integer conversion.\n//  - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion.\n//  - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE).\ntemplate<typename ExprScalar,typename T, bool IsSupported>\nstruct promote_scalar_arg;\n\ntemplate<typename S,typename T>\nstruct promote_scalar_arg<S,T,true>\n{\n  typedef T type;\n};\n\n// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different.\ntemplate<typename ExprScalar,typename T,typename PromotedType,\n  bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value,\n  bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger>\nstruct promote_scalar_arg_unsupported;\n\n// Start recursion with NumTraits<ExprScalar>::Literal\ntemplate<typename S,typename T>\nstruct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {};\n\n// We found a match!\ntemplate<typename S,typename T, typename PromotedType>\nstruct promote_scalar_arg_unsupported<S,T,PromotedType,true,true>\n{\n  typedef PromotedType type;\n};\n\n// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different,\n// so let's try to promote to ExprScalar\ntemplate<typename ExprScalar,typename T, typename PromotedType>\nstruct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true>\n   : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar>\n{};\n\n// Unsafe real-to-integer, let's stop.\ntemplate<typename S,typename T, typename PromotedType, bool ConvertibleToLiteral>\nstruct promote_scalar_arg_unsupported<S,T,PromotedType,ConvertibleToLiteral,false> {};\n\n// T is not even convertible to ExprScalar, let's stop.\ntemplate<typename S,typename T>\nstruct promote_scalar_arg_unsupported<S,T,S,false,true> {};\n\n//classes inheriting no_assignment_operator don't generate a default operator=.\nclass no_assignment_operator\n{\n  private:\n    no_assignment_operator& operator=(const no_assignment_operator&);\n  protected:\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(no_assignment_operator)\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(no_assignment_operator)\n};\n\n/** \\internal return the index type with the largest number of bits */\ntemplate<typename I1, typename I2>\nstruct promote_index_type\n{\n  typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;\n};\n\n/** \\internal If the template parameter Value is Dynamic, this class is just a wrapper around a T variable that\n  * can be accessed using value() and setValue().\n  * Otherwise, this class is an empty structure and value() just returns the template parameter Value.\n  */\ntemplate<typename T, int Value> class variable_if_dynamic\n{\n  public:\n    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(variable_if_dynamic)\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }\n    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    T value() { return T(Value); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    operator T() const { return T(Value); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void setValue(T v) const { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }\n};\n\ntemplate<typename T> class variable_if_dynamic<T, Dynamic>\n{\n    T m_value;\n  public:\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {}\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }\n};\n\n/** \\internal like variable_if_dynamic but for DynamicIndex\n  */\ntemplate<typename T, int Value> class variable_if_dynamicindex\n{\n  public:\n    EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }\n    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\n    T value() { return T(Value); }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n    void setValue(T) {}\n};\n\ntemplate<typename T> class variable_if_dynamicindex<T, DynamicIndex>\n{\n    T m_value;\n    EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }\n  public:\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {}\n    EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; }\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }\n};\n\ntemplate<typename T> struct functor_traits\n{\n  enum\n  {\n    Cost = 10,\n    PacketAccess = false,\n    IsRepeatable = false\n  };\n};\n\ntemplate<typename T> struct packet_traits;\n\ntemplate<typename T> struct unpacket_traits;\n\ntemplate<int Size, typename PacketType,\n         bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>\nstruct find_best_packet_helper;\n\ntemplate< int Size, typename PacketType>\nstruct find_best_packet_helper<Size,PacketType,true>\n{\n  typedef PacketType type;\n};\n\ntemplate<int Size, typename PacketType>\nstruct find_best_packet_helper<Size,PacketType,false>\n{\n  typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;\n};\n\ntemplate<typename T, int Size>\nstruct find_best_packet\n{\n  typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;\n};\n\n#if EIGEN_MAX_STATIC_ALIGN_BYTES>0\ntemplate<int ArrayBytes, int AlignmentBytes,\n         bool Match     =  bool((ArrayBytes%AlignmentBytes)==0),\n         bool TryHalf   =  bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >\nstruct compute_default_alignment_helper\n{\n  enum { value = 0 };\n};\n\ntemplate<int ArrayBytes, int AlignmentBytes, bool TryHalf>\nstruct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf> // Match\n{\n  enum { value = AlignmentBytes };\n};\n\ntemplate<int ArrayBytes, int AlignmentBytes>\nstruct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true> // Try-half\n{\n  // current packet too large, try with an half-packet\n  enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };\n};\n#else\n// If static alignment is disabled, no need to bother.\n// This also avoids a division by zero in \"bool Match =  bool((ArrayBytes%AlignmentBytes)==0)\"\ntemplate<int ArrayBytes, int AlignmentBytes>\nstruct compute_default_alignment_helper\n{\n  enum { value = 0 };\n};\n#endif\n\ntemplate<typename T, int Size> struct compute_default_alignment {\n  enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };\n};\n\ntemplate<typename T> struct compute_default_alignment<T,Dynamic> {\n  enum { value = EIGEN_MAX_ALIGN_BYTES };\n};\n\ntemplate<typename _Scalar, int _Rows, int _Cols,\n         int _Options = AutoAlign |\n                          ( (_Rows==1 && _Cols!=1) ? RowMajor\n                          : (_Cols==1 && _Rows!=1) ? ColMajor\n                          : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),\n         int _MaxRows = _Rows,\n         int _MaxCols = _Cols\n> class make_proper_matrix_type\n{\n    enum {\n      IsColVector = _Cols==1 && _Rows!=1,\n      IsRowVector = _Rows==1 && _Cols!=1,\n      Options = IsColVector ? (_Options | ColMajor) & ~RowMajor\n              : IsRowVector ? (_Options | RowMajor) & ~ColMajor\n              : _Options\n    };\n  public:\n    typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;\n};\n\ntemplate<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>\nclass compute_matrix_flags\n{\n    enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };\n  public:\n    // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>\n    // and then propagate this information to the evaluator's flags.\n    // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.\n    enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };\n};\n\ntemplate<int _Rows, int _Cols> struct size_at_compile_time\n{\n  enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };\n};\n\ntemplate<typename XprType> struct size_of_xpr_at_compile_time\n{\n  enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };\n};\n\n/* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type,\n * whereas eval is a const reference in the case of a matrix\n */\n\ntemplate<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;\ntemplate<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense;\ntemplate<typename T> struct plain_matrix_type<T,Dense>\n{\n  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type;\n};\ntemplate<typename T> struct plain_matrix_type<T,DiagonalShape>\n{\n  typedef typename T::PlainObject type;\n};\n\ntemplate<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>\n{\n  typedef Matrix<typename traits<T>::Scalar,\n                traits<T>::RowsAtCompileTime,\n                traits<T>::ColsAtCompileTime,\n                AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),\n                traits<T>::MaxRowsAtCompileTime,\n                traits<T>::MaxColsAtCompileTime\n          > type;\n};\n\ntemplate<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags>\n{\n  typedef Array<typename traits<T>::Scalar,\n                traits<T>::RowsAtCompileTime,\n                traits<T>::ColsAtCompileTime,\n                AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),\n                traits<T>::MaxRowsAtCompileTime,\n                traits<T>::MaxColsAtCompileTime\n          > type;\n};\n\n/* eval : the return type of eval(). For matrices, this is just a const reference\n * in order to avoid a useless copy\n */\n\ntemplate<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval;\n\ntemplate<typename T> struct eval<T,Dense>\n{\n  typedef typename plain_matrix_type<T>::type type;\n//   typedef typename T::PlainObject type;\n//   typedef T::Matrix<typename traits<T>::Scalar,\n//                 traits<T>::RowsAtCompileTime,\n//                 traits<T>::ColsAtCompileTime,\n//                 AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),\n//                 traits<T>::MaxRowsAtCompileTime,\n//                 traits<T>::MaxColsAtCompileTime\n//           > type;\n};\n\ntemplate<typename T> struct eval<T,DiagonalShape>\n{\n  typedef typename plain_matrix_type<T>::type type;\n};\n\n// for matrices, no need to evaluate, just use a const reference to avoid a useless copy\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nstruct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>\n{\n  typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;\n};\n\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>\nstruct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>\n{\n  typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;\n};\n\n\n/* similar to plain_matrix_type, but using the evaluator's Flags */\ntemplate<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval;\n\ntemplate<typename T>\nstruct plain_object_eval<T,Dense>\n{\n  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type;\n};\n\n\n/* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major\n */\ntemplate<typename T> struct plain_matrix_type_column_major\n{\n  enum { Rows = traits<T>::RowsAtCompileTime,\n         Cols = traits<T>::ColsAtCompileTime,\n         MaxRows = traits<T>::MaxRowsAtCompileTime,\n         MaxCols = traits<T>::MaxColsAtCompileTime\n  };\n  typedef Matrix<typename traits<T>::Scalar,\n                Rows,\n                Cols,\n                (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor,\n                MaxRows,\n                MaxCols\n          > type;\n};\n\n/* plain_matrix_type_row_major : same as plain_matrix_type but guaranteed to be row-major\n */\ntemplate<typename T> struct plain_matrix_type_row_major\n{\n  enum { Rows = traits<T>::RowsAtCompileTime,\n         Cols = traits<T>::ColsAtCompileTime,\n         MaxRows = traits<T>::MaxRowsAtCompileTime,\n         MaxCols = traits<T>::MaxColsAtCompileTime\n  };\n  typedef Matrix<typename traits<T>::Scalar,\n                Rows,\n                Cols,\n                (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor,\n                MaxRows,\n                MaxCols\n          > type;\n};\n\n/** \\internal The reference selector for template expressions. The idea is that we don't\n  * need to use references for expressions since they are light weight proxy\n  * objects which should generate no copying overhead. */\ntemplate <typename T>\nstruct ref_selector\n{\n  typedef typename conditional<\n    bool(traits<T>::Flags & NestByRefBit),\n    T const&,\n    const T\n  >::type type;\n\n  typedef typename conditional<\n    bool(traits<T>::Flags & NestByRefBit),\n    T &,\n    T\n  >::type non_const_type;\n};\n\n/** \\internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */\ntemplate<typename T1, typename T2>\nstruct transfer_constness\n{\n  typedef typename conditional<\n    bool(internal::is_const<T1>::value),\n    typename internal::add_const_on_value_type<T2>::type,\n    T2\n  >::type type;\n};\n\n\n// However, we still need a mechanism to detect whether an expression which is evaluated multiple time\n// has to be evaluated into a temporary.\n// That's the purpose of this new nested_eval helper:\n/** \\internal Determines how a given expression should be nested when evaluated multiple times.\n  * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be\n  * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or\n  * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is\n  * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes\n  * many coefficient accesses in the nested expressions -- as is the case with matrix product for example.\n  *\n  * \\tparam T the type of the expression being nested.\n  * \\tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.\n  * \\tparam PlainObject the type of the temporary if needed.\n  */\ntemplate<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval\n{\n  enum {\n    ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,\n    CoeffReadCost = evaluator<T>::CoeffReadCost,  // NOTE What if an evaluator evaluate itself into a temporary?\n                                                  //      Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1.\n                                                  //      This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON\n                                                  //      for all evaluator creating a temporary. This flag is then propagated by the parent evaluators.\n                                                  //      Another solution could be to count the number of temps?\n    NAsInteger = n == Dynamic ? HugeCost : n,\n    CostEval   = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,\n    CostNoEval = NAsInteger * CoeffReadCost,\n    Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))\n  };\n\n  typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;\n};\n\ntemplate<typename T>\nEIGEN_DEVICE_FUNC\ninline T* const_cast_ptr(const T* ptr)\n{\n  return const_cast<T*>(ptr);\n}\n\ntemplate<typename Derived, typename XprKind = typename traits<Derived>::XprKind>\nstruct dense_xpr_base\n{\n  /* dense_xpr_base should only ever be used on dense expressions, thus falling either into the MatrixXpr or into the ArrayXpr cases */\n};\n\ntemplate<typename Derived>\nstruct dense_xpr_base<Derived, MatrixXpr>\n{\n  typedef MatrixBase<Derived> type;\n};\n\ntemplate<typename Derived>\nstruct dense_xpr_base<Derived, ArrayXpr>\n{\n  typedef ArrayBase<Derived> type;\n};\n\ntemplate<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>\nstruct generic_xpr_base;\n\ntemplate<typename Derived, typename XprKind>\nstruct generic_xpr_base<Derived, XprKind, Dense>\n{\n  typedef typename dense_xpr_base<Derived,XprKind>::type type;\n};\n\ntemplate<typename XprType, typename CastType> struct cast_return_type\n{\n  typedef typename XprType::Scalar CurrentScalarType;\n  typedef typename remove_all<CastType>::type _CastType;\n  typedef typename _CastType::Scalar NewScalarType;\n  typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,\n                              const XprType&,CastType>::type type;\n};\n\ntemplate <typename A, typename B> struct promote_storage_type;\n\ntemplate <typename A> struct promote_storage_type<A,A>\n{\n  typedef A ret;\n};\ntemplate <typename A> struct promote_storage_type<A, const A>\n{\n  typedef A ret;\n};\ntemplate <typename A> struct promote_storage_type<const A, A>\n{\n  typedef A ret;\n};\n\n/** \\internal Specify the \"storage kind\" of applying a coefficient-wise\n  * binary operations between two expressions of kinds A and B respectively.\n  * The template parameter Functor permits to specialize the resulting storage kind wrt to\n  * the functor.\n  * The default rules are as follows:\n  * \\code\n  * A      op A      -> A\n  * A      op dense  -> dense\n  * dense  op B      -> dense\n  * sparse op dense  -> sparse\n  * dense  op sparse -> sparse\n  * \\endcode\n  */\ntemplate <typename A, typename B, typename Functor> struct cwise_promote_storage_type;\n\ntemplate <typename A, typename Functor>                   struct cwise_promote_storage_type<A,A,Functor>                                      { typedef A      ret; };\ntemplate <typename Functor>                               struct cwise_promote_storage_type<Dense,Dense,Functor>                              { typedef Dense  ret; };\ntemplate <typename A, typename Functor>                   struct cwise_promote_storage_type<A,Dense,Functor>                                  { typedef Dense  ret; };\ntemplate <typename B, typename Functor>                   struct cwise_promote_storage_type<Dense,B,Functor>                                  { typedef Dense  ret; };\ntemplate <typename Functor>                               struct cwise_promote_storage_type<Sparse,Dense,Functor>                             { typedef Sparse ret; };\ntemplate <typename Functor>                               struct cwise_promote_storage_type<Dense,Sparse,Functor>                             { typedef Sparse ret; };\n\ntemplate <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order {\n  enum { value = LhsOrder };\n};\n\ntemplate <typename LhsKind, int LhsOrder, int RhsOrder>   struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder>                { enum { value = RhsOrder }; };\ntemplate <typename RhsKind, int LhsOrder, int RhsOrder>   struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder>                { enum { value = LhsOrder }; };\ntemplate <int Order>                                      struct cwise_promote_storage_order<Sparse,Sparse,Order,Order>                       { enum { value = Order }; };\n\n\n/** \\internal Specify the \"storage kind\" of multiplying an expression of kind A with kind B.\n  * The template parameter ProductTag permits to specialize the resulting storage kind wrt to\n  * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct.\n  * The default rules are as follows:\n  * \\code\n  *  K * K            -> K\n  *  dense * K        -> dense\n  *  K * dense        -> dense\n  *  diag * K         -> K\n  *  K * diag         -> K\n  *  Perm * K         -> K\n  * K * Perm          -> K\n  * \\endcode\n  */\ntemplate <typename A, typename B, int ProductTag> struct product_promote_storage_type;\n\ntemplate <typename A, int ProductTag> struct product_promote_storage_type<A,                  A,                  ProductTag> { typedef A     ret;};\ntemplate <int ProductTag>             struct product_promote_storage_type<Dense,              Dense,              ProductTag> { typedef Dense ret;};\ntemplate <typename A, int ProductTag> struct product_promote_storage_type<A,                  Dense,              ProductTag> { typedef Dense ret; };\ntemplate <typename B, int ProductTag> struct product_promote_storage_type<Dense,              B,                  ProductTag> { typedef Dense ret; };\n\ntemplate <typename A, int ProductTag> struct product_promote_storage_type<A,                  DiagonalShape,      ProductTag> { typedef A ret; };\ntemplate <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape,      B,                  ProductTag> { typedef B ret; };\ntemplate <int ProductTag>             struct product_promote_storage_type<Dense,              DiagonalShape,      ProductTag> { typedef Dense ret; };\ntemplate <int ProductTag>             struct product_promote_storage_type<DiagonalShape,      Dense,              ProductTag> { typedef Dense ret; };\n\ntemplate <typename A, int ProductTag> struct product_promote_storage_type<A,                  PermutationStorage, ProductTag> { typedef A ret; };\ntemplate <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B,                  ProductTag> { typedef B ret; };\ntemplate <int ProductTag>             struct product_promote_storage_type<Dense,              PermutationStorage, ProductTag> { typedef Dense ret; };\ntemplate <int ProductTag>             struct product_promote_storage_type<PermutationStorage, Dense,              ProductTag> { typedef Dense ret; };\n\n/** \\internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.\n  * \\tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.\n  */\ntemplate<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>\nstruct plain_row_type\n{\n  typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,\n                 int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;\n  typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,\n                 int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;\n\n  typedef typename conditional<\n    is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,\n    MatrixRowType,\n    ArrayRowType\n  >::type type;\n};\n\ntemplate<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>\nstruct plain_col_type\n{\n  typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1,\n                 ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType;\n  typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,\n                 ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;\n\n  typedef typename conditional<\n    is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,\n    MatrixColType,\n    ArrayColType\n  >::type type;\n};\n\ntemplate<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>\nstruct plain_diag_type\n{\n  enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),\n         max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)\n  };\n  typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;\n  typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;\n\n  typedef typename conditional<\n    is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,\n    MatrixDiagType,\n    ArrayDiagType\n  >::type type;\n};\n\ntemplate<typename Expr,typename Scalar = typename Expr::Scalar>\nstruct plain_constant_type\n{\n  enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 };\n\n  typedef Array<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,\n                Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type;\n\n  typedef Matrix<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,\n                 Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;\n\n  typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;\n};\n\ntemplate<typename ExpressionType>\nstruct is_lvalue\n{\n  enum { value = (!bool(is_const<ExpressionType>::value)) &&\n                 bool(traits<ExpressionType>::Flags & LvalueBit) };\n};\n\ntemplate<typename T> struct is_diagonal\n{ enum { ret = false }; };\n\ntemplate<typename T> struct is_diagonal<DiagonalBase<T> >\n{ enum { ret = true }; };\n\ntemplate<typename T> struct is_diagonal<DiagonalWrapper<T> >\n{ enum { ret = true }; };\n\ntemplate<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >\n{ enum { ret = true }; };\n\n\ntemplate<typename T> struct is_identity\n{ enum { value = false }; };\n\ntemplate<typename T> struct is_identity<CwiseNullaryOp<internal::scalar_identity_op<typename T::Scalar>, T> >\n{ enum { value = true }; };\n\n\ntemplate<typename S1, typename S2> struct glue_shapes;\ntemplate<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type;  };\n\ntemplate<typename T1, typename T2>\nstruct possibly_same_dense {\n  enum { value = has_direct_access<T1>::ret && has_direct_access<T2>::ret && is_same<typename T1::Scalar,typename T2::Scalar>::value };\n};\n\ntemplate<typename T1, typename T2>\nEIGEN_DEVICE_FUNC\nbool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<possibly_same_dense<T1,T2>::value>::type * = 0)\n{\n  return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());\n}\n\ntemplate<typename T1, typename T2>\nEIGEN_DEVICE_FUNC\nbool is_same_dense(const T1 &, const T2 &, typename enable_if<!possibly_same_dense<T1,T2>::value>::type * = 0)\n{\n  return false;\n}\n\n// Internal helper defining the cost of a scalar division for the type T.\n// The default heuristic can be specialized for each scalar type and architecture.\ntemplate<typename T,bool Vectorized=false,typename EnableIf = void>\nstruct scalar_div_cost {\n  enum { value = 8*NumTraits<T>::MulCost };\n};\n\ntemplate<typename T,bool Vectorized>\nstruct scalar_div_cost<std::complex<T>, Vectorized> {\n  enum { value = 2*scalar_div_cost<T>::value\n               + 6*NumTraits<T>::MulCost\n               + 3*NumTraits<T>::AddCost\n  };\n};\n\n\ntemplate<bool Vectorized>\nstruct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };\ntemplate<bool Vectorized>\nstruct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };\n\n\n#ifdef EIGEN_DEBUG_ASSIGN\nstd::string demangle_traversal(int t)\n{\n  if(t==DefaultTraversal) return \"DefaultTraversal\";\n  if(t==LinearTraversal) return \"LinearTraversal\";\n  if(t==InnerVectorizedTraversal) return \"InnerVectorizedTraversal\";\n  if(t==LinearVectorizedTraversal) return \"LinearVectorizedTraversal\";\n  if(t==SliceVectorizedTraversal) return \"SliceVectorizedTraversal\";\n  return \"?\";\n}\nstd::string demangle_unrolling(int t)\n{\n  if(t==NoUnrolling) return \"NoUnrolling\";\n  if(t==InnerUnrolling) return \"InnerUnrolling\";\n  if(t==CompleteUnrolling) return \"CompleteUnrolling\";\n  return \"?\";\n}\nstd::string demangle_flags(int f)\n{\n  std::string res;\n  if(f&RowMajorBit)                 res += \" | RowMajor\";\n  if(f&PacketAccessBit)             res += \" | Packet\";\n  if(f&LinearAccessBit)             res += \" | Linear\";\n  if(f&LvalueBit)                   res += \" | Lvalue\";\n  if(f&DirectAccessBit)             res += \" | Direct\";\n  if(f&NestByRefBit)                res += \" | NestByRef\";\n  if(f&NoPreferredStorageOrderBit)  res += \" | NoPreferredStorageOrderBit\";\n\n  return res;\n}\n#endif\n\n} // end namespace internal\n\n\n/** \\class ScalarBinaryOpTraits\n  * \\ingroup Core_Module\n  *\n  * \\brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is.\n  *\n  * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations.\n  *\n  * For instance, let \\c U1, \\c U2 and \\c U3 be three user defined scalar types for which most operations between instances of \\c U1 and \\c U2 returns an \\c U3.\n  * You can let %Eigen knows that by defining:\n    \\code\n    template<typename BinaryOp>\n    struct ScalarBinaryOpTraits<U1,U2,BinaryOp> { typedef U3 ReturnType;  };\n    template<typename BinaryOp>\n    struct ScalarBinaryOpTraits<U2,U1,BinaryOp> { typedef U3 ReturnType;  };\n    \\endcode\n  * You can then explicitly disable some particular operations to get more explicit error messages:\n    \\code\n    template<>\n    struct ScalarBinaryOpTraits<U1,U2,internal::scalar_max_op<U1,U2> > {};\n    \\endcode\n  * Or customize the return type for individual operation:\n    \\code\n    template<>\n    struct ScalarBinaryOpTraits<U1,U2,internal::scalar_sum_op<U1,U2> > { typedef U1 ReturnType; };\n    \\endcode\n  *\n  * By default, the following generic combinations are supported:\n  <table class=\"manual\">\n  <tr><th>ScalarA</th><th>ScalarB</th><th>BinaryOp</th><th>ReturnType</th><th>Note</th></tr>\n  <tr            ><td>\\c T </td><td>\\c T </td><td>\\c * </td><td>\\c T </td><td></td></tr>\n  <tr class=\"alt\"><td>\\c NumTraits<T>::Real </td><td>\\c T </td><td>\\c * </td><td>\\c T </td><td>Only if \\c NumTraits<T>::IsComplex </td></tr>\n  <tr            ><td>\\c T </td><td>\\c NumTraits<T>::Real </td><td>\\c * </td><td>\\c T </td><td>Only if \\c NumTraits<T>::IsComplex </td></tr>\n  </table>\n  *\n  * \\sa CwiseBinaryOp\n  */\ntemplate<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >\nstruct ScalarBinaryOpTraits\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n  // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class.\n  : internal::scalar_product_traits<ScalarA,ScalarB>\n#endif // EIGEN_PARSED_BY_DOXYGEN\n{};\n\ntemplate<typename T, typename BinaryOp>\nstruct ScalarBinaryOpTraits<T,T,BinaryOp>\n{\n  typedef T ReturnType;\n};\n\ntemplate <typename T, typename BinaryOp>\nstruct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>\n{\n  typedef T ReturnType;\n};\ntemplate <typename T, typename BinaryOp>\nstruct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>\n{\n  typedef T ReturnType;\n};\n\n// For Matrix * Permutation\ntemplate<typename T, typename BinaryOp>\nstruct ScalarBinaryOpTraits<T,void,BinaryOp>\n{\n  typedef T ReturnType;\n};\n\n// For Permutation * Matrix\ntemplate<typename T, typename BinaryOp>\nstruct ScalarBinaryOpTraits<void,T,BinaryOp>\n{\n  typedef T ReturnType;\n};\n\n// for Permutation*Permutation\ntemplate<typename BinaryOp>\nstruct ScalarBinaryOpTraits<void,void,BinaryOp>\n{\n  typedef void ReturnType;\n};\n\n// We require Lhs and Rhs to have \"compatible\" scalar types.\n// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.\n// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to\n// add together a float matrix and a double matrix.\n#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \\\n  EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \\\n    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n\n} // end namespace Eigen\n\n#endif // EIGEN_XPRHELPER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/ComplexEigenSolver.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Claire Maurice\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_EIGEN_SOLVER_H\n#define EIGEN_COMPLEX_EIGEN_SOLVER_H\n\n#include \"./ComplexSchur.h\"\n\nnamespace Eigen { \n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class ComplexEigenSolver\n  *\n  * \\brief Computes eigenvalues and eigenvectors of general complex matrices\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are\n  * computing the eigendecomposition; this is expected to be an\n  * instantiation of the Matrix class template.\n  *\n  * The eigenvalues and eigenvectors of a matrix \\f$ A \\f$ are scalars\n  * \\f$ \\lambda \\f$ and vectors \\f$ v \\f$ such that \\f$ Av = \\lambda v\n  * \\f$.  If \\f$ D \\f$ is a diagonal matrix with the eigenvalues on\n  * the diagonal, and \\f$ V \\f$ is a matrix with the eigenvectors as\n  * its columns, then \\f$ A V = V D \\f$. The matrix \\f$ V \\f$ is\n  * almost always invertible, in which case we have \\f$ A = V D V^{-1}\n  * \\f$. This is called the eigendecomposition.\n  *\n  * The main function in this class is compute(), which computes the\n  * eigenvalues and eigenvectors of a given function. The\n  * documentation for that function contains an example showing the\n  * main features of the class.\n  *\n  * \\sa class EigenSolver, class SelfAdjointEigenSolver\n  */\ntemplate<typename _MatrixType> class ComplexEigenSolver\n{\n  public:\n\n    /** \\brief Synonym for the template parameter \\p _MatrixType. */\n    typedef _MatrixType MatrixType;\n\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      Options = MatrixType::Options,\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n    /** \\brief Scalar type for matrices of type #MatrixType. */\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    /** \\brief Complex scalar type for #MatrixType.\n      *\n      * This is \\c std::complex<Scalar> if #Scalar is real (e.g.,\n      * \\c float or \\c double) and just \\c Scalar if #Scalar is\n      * complex.\n      */\n    typedef std::complex<RealScalar> ComplexScalar;\n\n    /** \\brief Type for vector of eigenvalues as returned by eigenvalues().\n      *\n      * This is a column vector with entries of type #ComplexScalar.\n      * The length of the vector is the size of #MatrixType.\n      */\n    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options&(~RowMajor), MaxColsAtCompileTime, 1> EigenvalueType;\n\n    /** \\brief Type for matrix of eigenvectors as returned by eigenvectors().\n      *\n      * This is a square matrix with entries of type #ComplexScalar.\n      * The size is the same as the size of #MatrixType.\n      */\n    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> EigenvectorType;\n\n    /** \\brief Default constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via compute().\n      */\n    ComplexEigenSolver()\n            : m_eivec(),\n              m_eivalues(),\n              m_schur(),\n              m_isInitialized(false),\n              m_eigenvectorsOk(false),\n              m_matX()\n    {}\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa ComplexEigenSolver()\n      */\n    explicit ComplexEigenSolver(Index size)\n            : m_eivec(size, size),\n              m_eivalues(size),\n              m_schur(size),\n              m_isInitialized(false),\n              m_eigenvectorsOk(false),\n              m_matX(size, size)\n    {}\n\n    /** \\brief Constructor; computes eigendecomposition of given matrix.\n      *\n      * \\param[in]  matrix  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  computeEigenvectors  If true, both the eigenvectors and the\n      *    eigenvalues are computed; if false, only the eigenvalues are\n      *    computed.\n      *\n      * This constructor calls compute() to compute the eigendecomposition.\n      */\n    template<typename InputType>\n    explicit ComplexEigenSolver(const EigenBase<InputType>& matrix, bool computeEigenvectors = true)\n            : m_eivec(matrix.rows(),matrix.cols()),\n              m_eivalues(matrix.cols()),\n              m_schur(matrix.rows()),\n              m_isInitialized(false),\n              m_eigenvectorsOk(false),\n              m_matX(matrix.rows(),matrix.cols())\n    {\n      compute(matrix.derived(), computeEigenvectors);\n    }\n\n    /** \\brief Returns the eigenvectors of given matrix.\n      *\n      * \\returns  A const reference to the matrix whose columns are the eigenvectors.\n      *\n      * \\pre Either the constructor\n      * ComplexEigenSolver(const MatrixType& matrix, bool) or the member\n      * function compute(const MatrixType& matrix, bool) has been called before\n      * to compute the eigendecomposition of a matrix, and\n      * \\p computeEigenvectors was set to true (the default).\n      *\n      * This function returns a matrix whose columns are the eigenvectors. Column\n      * \\f$ k \\f$ is an eigenvector corresponding to eigenvalue number \\f$ k\n      * \\f$ as returned by eigenvalues().  The eigenvectors are normalized to\n      * have (Euclidean) norm equal to one. The matrix returned by this\n      * function is the matrix \\f$ V \\f$ in the eigendecomposition \\f$ A = V D\n      * V^{-1} \\f$, if it exists.\n      *\n      * Example: \\include ComplexEigenSolver_eigenvectors.cpp\n      * Output: \\verbinclude ComplexEigenSolver_eigenvectors.out\n      */\n    const EigenvectorType& eigenvectors() const\n    {\n      eigen_assert(m_isInitialized && \"ComplexEigenSolver is not initialized.\");\n      eigen_assert(m_eigenvectorsOk && \"The eigenvectors have not been computed together with the eigenvalues.\");\n      return m_eivec;\n    }\n\n    /** \\brief Returns the eigenvalues of given matrix.\n      *\n      * \\returns A const reference to the column vector containing the eigenvalues.\n      *\n      * \\pre Either the constructor\n      * ComplexEigenSolver(const MatrixType& matrix, bool) or the member\n      * function compute(const MatrixType& matrix, bool) has been called before\n      * to compute the eigendecomposition of a matrix.\n      *\n      * This function returns a column vector containing the\n      * eigenvalues. Eigenvalues are repeated according to their\n      * algebraic multiplicity, so there are as many eigenvalues as\n      * rows in the matrix. The eigenvalues are not sorted in any particular\n      * order.\n      *\n      * Example: \\include ComplexEigenSolver_eigenvalues.cpp\n      * Output: \\verbinclude ComplexEigenSolver_eigenvalues.out\n      */\n    const EigenvalueType& eigenvalues() const\n    {\n      eigen_assert(m_isInitialized && \"ComplexEigenSolver is not initialized.\");\n      return m_eivalues;\n    }\n\n    /** \\brief Computes eigendecomposition of given matrix.\n      *\n      * \\param[in]  matrix  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  computeEigenvectors  If true, both the eigenvectors and the\n      *    eigenvalues are computed; if false, only the eigenvalues are\n      *    computed.\n      * \\returns    Reference to \\c *this\n      *\n      * This function computes the eigenvalues of the complex matrix \\p matrix.\n      * The eigenvalues() function can be used to retrieve them.  If\n      * \\p computeEigenvectors is true, then the eigenvectors are also computed\n      * and can be retrieved by calling eigenvectors().\n      *\n      * The matrix is first reduced to Schur form using the\n      * ComplexSchur class. The Schur decomposition is then used to\n      * compute the eigenvalues and eigenvectors.\n      *\n      * The cost of the computation is dominated by the cost of the\n      * Schur decomposition, which is \\f$ O(n^3) \\f$ where \\f$ n \\f$\n      * is the size of the matrix.\n      *\n      * Example: \\include ComplexEigenSolver_compute.cpp\n      * Output: \\verbinclude ComplexEigenSolver_compute.out\n      */\n    template<typename InputType>\n    ComplexEigenSolver& compute(const EigenBase<InputType>& matrix, bool computeEigenvectors = true);\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful, \\c NoConvergence otherwise.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"ComplexEigenSolver is not initialized.\");\n      return m_schur.info();\n    }\n\n    /** \\brief Sets the maximum number of iterations allowed. */\n    ComplexEigenSolver& setMaxIterations(Index maxIters)\n    {\n      m_schur.setMaxIterations(maxIters);\n      return *this;\n    }\n\n    /** \\brief Returns the maximum number of iterations. */\n    Index getMaxIterations()\n    {\n      return m_schur.getMaxIterations();\n    }\n\n  protected:\n    \n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n    \n    EigenvectorType m_eivec;\n    EigenvalueType m_eivalues;\n    ComplexSchur<MatrixType> m_schur;\n    bool m_isInitialized;\n    bool m_eigenvectorsOk;\n    EigenvectorType m_matX;\n\n  private:\n    void doComputeEigenvectors(RealScalar matrixnorm);\n    void sortEigenvalues(bool computeEigenvectors);\n};\n\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nComplexEigenSolver<MatrixType>& \nComplexEigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)\n{\n  check_template_parameters();\n  \n  // this code is inspired from Jampack\n  eigen_assert(matrix.cols() == matrix.rows());\n\n  // Do a complex Schur decomposition, A = U T U^*\n  // The eigenvalues are on the diagonal of T.\n  m_schur.compute(matrix.derived(), computeEigenvectors);\n\n  if(m_schur.info() == Success)\n  {\n    m_eivalues = m_schur.matrixT().diagonal();\n    if(computeEigenvectors)\n      doComputeEigenvectors(m_schur.matrixT().norm());\n    sortEigenvalues(computeEigenvectors);\n  }\n\n  m_isInitialized = true;\n  m_eigenvectorsOk = computeEigenvectors;\n  return *this;\n}\n\n\ntemplate<typename MatrixType>\nvoid ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm)\n{\n  const Index n = m_eivalues.size();\n\n  matrixnorm = numext::maxi(matrixnorm,(std::numeric_limits<RealScalar>::min)());\n\n  // Compute X such that T = X D X^(-1), where D is the diagonal of T.\n  // The matrix X is unit triangular.\n  m_matX = EigenvectorType::Zero(n, n);\n  for(Index k=n-1 ; k>=0 ; k--)\n  {\n    m_matX.coeffRef(k,k) = ComplexScalar(1.0,0.0);\n    // Compute X(i,k) using the (i,k) entry of the equation X T = D X\n    for(Index i=k-1 ; i>=0 ; i--)\n    {\n      m_matX.coeffRef(i,k) = -m_schur.matrixT().coeff(i,k);\n      if(k-i-1>0)\n        m_matX.coeffRef(i,k) -= (m_schur.matrixT().row(i).segment(i+1,k-i-1) * m_matX.col(k).segment(i+1,k-i-1)).value();\n      ComplexScalar z = m_schur.matrixT().coeff(i,i) - m_schur.matrixT().coeff(k,k);\n      if(z==ComplexScalar(0))\n      {\n        // If the i-th and k-th eigenvalue are equal, then z equals 0.\n        // Use a small value instead, to prevent division by zero.\n        numext::real_ref(z) = NumTraits<RealScalar>::epsilon() * matrixnorm;\n      }\n      m_matX.coeffRef(i,k) = m_matX.coeff(i,k) / z;\n    }\n  }\n\n  // Compute V as V = U X; now A = U T U^* = U X D X^(-1) U^* = V D V^(-1)\n  m_eivec.noalias() = m_schur.matrixU() * m_matX;\n  // .. and normalize the eigenvectors\n  for(Index k=0 ; k<n ; k++)\n  {\n    m_eivec.col(k).normalize();\n  }\n}\n\n\ntemplate<typename MatrixType>\nvoid ComplexEigenSolver<MatrixType>::sortEigenvalues(bool computeEigenvectors)\n{\n  const Index n =  m_eivalues.size();\n  for (Index i=0; i<n; i++)\n  {\n    Index k;\n    m_eivalues.cwiseAbs().tail(n-i).minCoeff(&k);\n    if (k != 0)\n    {\n      k += i;\n      std::swap(m_eivalues[k],m_eivalues[i]);\n      if(computeEigenvectors)\n\tm_eivec.col(i).swap(m_eivec.col(k));\n    }\n  }\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_EIGEN_SOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/ComplexSchur.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Claire Maurice\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLEX_SCHUR_H\n#define EIGEN_COMPLEX_SCHUR_H\n\n#include \"./HessenbergDecomposition.h\"\n\nnamespace Eigen { \n\nnamespace internal {\ntemplate<typename MatrixType, bool IsComplex> struct complex_schur_reduce_to_hessenberg;\n}\n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class ComplexSchur\n  *\n  * \\brief Performs a complex Schur decomposition of a real or complex square matrix\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are\n  * computing the Schur decomposition; this is expected to be an\n  * instantiation of the Matrix class template.\n  *\n  * Given a real or complex square matrix A, this class computes the\n  * Schur decomposition: \\f$ A = U T U^*\\f$ where U is a unitary\n  * complex matrix, and T is a complex upper triangular matrix.  The\n  * diagonal of the matrix T corresponds to the eigenvalues of the\n  * matrix A.\n  *\n  * Call the function compute() to compute the Schur decomposition of\n  * a given matrix. Alternatively, you can use the \n  * ComplexSchur(const MatrixType&, bool) constructor which computes\n  * the Schur decomposition at construction time. Once the\n  * decomposition is computed, you can use the matrixU() and matrixT()\n  * functions to retrieve the matrices U and V in the decomposition.\n  *\n  * \\note This code is inspired from Jampack\n  *\n  * \\sa class RealSchur, class EigenSolver, class ComplexEigenSolver\n  */\ntemplate<typename _MatrixType> class ComplexSchur\n{\n  public:\n    typedef _MatrixType MatrixType;\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      Options = MatrixType::Options,\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n    /** \\brief Scalar type for matrices of type \\p _MatrixType. */\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    /** \\brief Complex scalar type for \\p _MatrixType. \n      *\n      * This is \\c std::complex<Scalar> if #Scalar is real (e.g.,\n      * \\c float or \\c double) and just \\c Scalar if #Scalar is\n      * complex.\n      */\n    typedef std::complex<RealScalar> ComplexScalar;\n\n    /** \\brief Type for the matrices in the Schur decomposition.\n      *\n      * This is a square matrix with entries of type #ComplexScalar. \n      * The size is the same as the size of \\p _MatrixType.\n      */\n    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> ComplexMatrixType;\n\n    /** \\brief Default constructor.\n      *\n      * \\param [in] size  Positive integer, size of the matrix whose Schur decomposition will be computed.\n      *\n      * The default constructor is useful in cases in which the user\n      * intends to perform decompositions via compute().  The \\p size\n      * parameter is only used as a hint. It is not an error to give a\n      * wrong \\p size, but it may impair performance.\n      *\n      * \\sa compute() for an example.\n      */\n    explicit ComplexSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)\n      : m_matT(size,size),\n        m_matU(size,size),\n        m_hess(size),\n        m_isInitialized(false),\n        m_matUisUptodate(false),\n        m_maxIters(-1)\n    {}\n\n    /** \\brief Constructor; computes Schur decomposition of given matrix. \n      * \n      * \\param[in]  matrix    Square matrix whose Schur decomposition is to be computed.\n      * \\param[in]  computeU  If true, both T and U are computed; if false, only T is computed.\n      *\n      * This constructor calls compute() to compute the Schur decomposition.\n      *\n      * \\sa matrixT() and matrixU() for examples.\n      */\n    template<typename InputType>\n    explicit ComplexSchur(const EigenBase<InputType>& matrix, bool computeU = true)\n      : m_matT(matrix.rows(),matrix.cols()),\n        m_matU(matrix.rows(),matrix.cols()),\n        m_hess(matrix.rows()),\n        m_isInitialized(false),\n        m_matUisUptodate(false),\n        m_maxIters(-1)\n    {\n      compute(matrix.derived(), computeU);\n    }\n\n    /** \\brief Returns the unitary matrix in the Schur decomposition. \n      *\n      * \\returns A const reference to the matrix U.\n      *\n      * It is assumed that either the constructor\n      * ComplexSchur(const MatrixType& matrix, bool computeU) or the\n      * member function compute(const MatrixType& matrix, bool computeU)\n      * has been called before to compute the Schur decomposition of a\n      * matrix, and that \\p computeU was set to true (the default\n      * value).\n      *\n      * Example: \\include ComplexSchur_matrixU.cpp\n      * Output: \\verbinclude ComplexSchur_matrixU.out\n      */\n    const ComplexMatrixType& matrixU() const\n    {\n      eigen_assert(m_isInitialized && \"ComplexSchur is not initialized.\");\n      eigen_assert(m_matUisUptodate && \"The matrix U has not been computed during the ComplexSchur decomposition.\");\n      return m_matU;\n    }\n\n    /** \\brief Returns the triangular matrix in the Schur decomposition. \n      *\n      * \\returns A const reference to the matrix T.\n      *\n      * It is assumed that either the constructor\n      * ComplexSchur(const MatrixType& matrix, bool computeU) or the\n      * member function compute(const MatrixType& matrix, bool computeU)\n      * has been called before to compute the Schur decomposition of a\n      * matrix.\n      *\n      * Note that this function returns a plain square matrix. If you want to reference\n      * only the upper triangular part, use:\n      * \\code schur.matrixT().triangularView<Upper>() \\endcode \n      *\n      * Example: \\include ComplexSchur_matrixT.cpp\n      * Output: \\verbinclude ComplexSchur_matrixT.out\n      */\n    const ComplexMatrixType& matrixT() const\n    {\n      eigen_assert(m_isInitialized && \"ComplexSchur is not initialized.\");\n      return m_matT;\n    }\n\n    /** \\brief Computes Schur decomposition of given matrix. \n      * \n      * \\param[in]  matrix  Square matrix whose Schur decomposition is to be computed.\n      * \\param[in]  computeU  If true, both T and U are computed; if false, only T is computed.\n\n      * \\returns    Reference to \\c *this\n      *\n      * The Schur decomposition is computed by first reducing the\n      * matrix to Hessenberg form using the class\n      * HessenbergDecomposition. The Hessenberg matrix is then reduced\n      * to triangular form by performing QR iterations with a single\n      * shift. The cost of computing the Schur decomposition depends\n      * on the number of iterations; as a rough guide, it may be taken\n      * on the number of iterations; as a rough guide, it may be taken\n      * to be \\f$25n^3\\f$ complex flops, or \\f$10n^3\\f$ complex flops\n      * if \\a computeU is false.\n      *\n      * Example: \\include ComplexSchur_compute.cpp\n      * Output: \\verbinclude ComplexSchur_compute.out\n      *\n      * \\sa compute(const MatrixType&, bool, Index)\n      */\n    template<typename InputType>\n    ComplexSchur& compute(const EigenBase<InputType>& matrix, bool computeU = true);\n    \n    /** \\brief Compute Schur decomposition from a given Hessenberg matrix\n     *  \\param[in] matrixH Matrix in Hessenberg form H\n     *  \\param[in] matrixQ orthogonal matrix Q that transform a matrix A to H : A = Q H Q^T\n     *  \\param computeU Computes the matriX U of the Schur vectors\n     * \\return Reference to \\c *this\n     * \n     *  This routine assumes that the matrix is already reduced in Hessenberg form matrixH\n     *  using either the class HessenbergDecomposition or another mean. \n     *  It computes the upper quasi-triangular matrix T of the Schur decomposition of H\n     *  When computeU is true, this routine computes the matrix U such that \n     *  A = U T U^T =  (QZ) T (QZ)^T = Q H Q^T where A is the initial matrix\n     * \n     * NOTE Q is referenced if computeU is true; so, if the initial orthogonal matrix\n     * is not available, the user should give an identity matrix (Q.setIdentity())\n     * \n     * \\sa compute(const MatrixType&, bool)\n     */\n    template<typename HessMatrixType, typename OrthMatrixType>\n    ComplexSchur& computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ,  bool computeU=true);\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful, \\c NoConvergence otherwise.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"ComplexSchur is not initialized.\");\n      return m_info;\n    }\n\n    /** \\brief Sets the maximum number of iterations allowed. \n      *\n      * If not specified by the user, the maximum number of iterations is m_maxIterationsPerRow times the size\n      * of the matrix.\n      */\n    ComplexSchur& setMaxIterations(Index maxIters)\n    {\n      m_maxIters = maxIters;\n      return *this;\n    }\n\n    /** \\brief Returns the maximum number of iterations. */\n    Index getMaxIterations()\n    {\n      return m_maxIters;\n    }\n\n    /** \\brief Maximum number of iterations per row.\n      *\n      * If not otherwise specified, the maximum number of iterations is this number times the size of the\n      * matrix. It is currently set to 30.\n      */\n    static const int m_maxIterationsPerRow = 30;\n\n  protected:\n    ComplexMatrixType m_matT, m_matU;\n    HessenbergDecomposition<MatrixType> m_hess;\n    ComputationInfo m_info;\n    bool m_isInitialized;\n    bool m_matUisUptodate;\n    Index m_maxIters;\n\n  private:  \n    bool subdiagonalEntryIsNeglegible(Index i);\n    ComplexScalar computeShift(Index iu, Index iter);\n    void reduceToTriangularForm(bool computeU);\n    friend struct internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>;\n};\n\n/** If m_matT(i+1,i) is neglegible in floating point arithmetic\n  * compared to m_matT(i,i) and m_matT(j,j), then set it to zero and\n  * return true, else return false. */\ntemplate<typename MatrixType>\ninline bool ComplexSchur<MatrixType>::subdiagonalEntryIsNeglegible(Index i)\n{\n  RealScalar d = numext::norm1(m_matT.coeff(i,i)) + numext::norm1(m_matT.coeff(i+1,i+1));\n  RealScalar sd = numext::norm1(m_matT.coeff(i+1,i));\n  if (internal::isMuchSmallerThan(sd, d, NumTraits<RealScalar>::epsilon()))\n  {\n    m_matT.coeffRef(i+1,i) = ComplexScalar(0);\n    return true;\n  }\n  return false;\n}\n\n\n/** Compute the shift in the current QR iteration. */\ntemplate<typename MatrixType>\ntypename ComplexSchur<MatrixType>::ComplexScalar ComplexSchur<MatrixType>::computeShift(Index iu, Index iter)\n{\n  using std::abs;\n  if (iter == 10 || iter == 20) \n  {\n    // exceptional shift, taken from http://www.netlib.org/eispack/comqr.f\n    return abs(numext::real(m_matT.coeff(iu,iu-1))) + abs(numext::real(m_matT.coeff(iu-1,iu-2)));\n  }\n\n  // compute the shift as one of the eigenvalues of t, the 2x2\n  // diagonal block on the bottom of the active submatrix\n  Matrix<ComplexScalar,2,2> t = m_matT.template block<2,2>(iu-1,iu-1);\n  RealScalar normt = t.cwiseAbs().sum();\n  t /= normt;     // the normalization by sf is to avoid under/overflow\n\n  ComplexScalar b = t.coeff(0,1) * t.coeff(1,0);\n  ComplexScalar c = t.coeff(0,0) - t.coeff(1,1);\n  ComplexScalar disc = sqrt(c*c + RealScalar(4)*b);\n  ComplexScalar det = t.coeff(0,0) * t.coeff(1,1) - b;\n  ComplexScalar trace = t.coeff(0,0) + t.coeff(1,1);\n  ComplexScalar eival1 = (trace + disc) / RealScalar(2);\n  ComplexScalar eival2 = (trace - disc) / RealScalar(2);\n  RealScalar eival1_norm = numext::norm1(eival1);\n  RealScalar eival2_norm = numext::norm1(eival2);\n  // A division by zero can only occur if eival1==eival2==0.\n  // In this case, det==0, and all we have to do is checking that eival2_norm!=0\n  if(eival1_norm > eival2_norm)\n    eival2 = det / eival1;\n  else if(eival2_norm!=RealScalar(0))\n    eival1 = det / eival2;\n\n  // choose the eigenvalue closest to the bottom entry of the diagonal\n  if(numext::norm1(eival1-t.coeff(1,1)) < numext::norm1(eival2-t.coeff(1,1)))\n    return normt * eival1;\n  else\n    return normt * eival2;\n}\n\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nComplexSchur<MatrixType>& ComplexSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)\n{\n  m_matUisUptodate = false;\n  eigen_assert(matrix.cols() == matrix.rows());\n\n  if(matrix.cols() == 1)\n  {\n    m_matT = matrix.derived().template cast<ComplexScalar>();\n    if(computeU)  m_matU = ComplexMatrixType::Identity(1,1);\n    m_info = Success;\n    m_isInitialized = true;\n    m_matUisUptodate = computeU;\n    return *this;\n  }\n\n  internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>::run(*this, matrix.derived(), computeU);\n  computeFromHessenberg(m_matT, m_matU, computeU);\n  return *this;\n}\n\ntemplate<typename MatrixType>\ntemplate<typename HessMatrixType, typename OrthMatrixType>\nComplexSchur<MatrixType>& ComplexSchur<MatrixType>::computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ, bool computeU)\n{\n  m_matT = matrixH;\n  if(computeU)\n    m_matU = matrixQ;\n  reduceToTriangularForm(computeU);\n  return *this;\n}\nnamespace internal {\n\n/* Reduce given matrix to Hessenberg form */\ntemplate<typename MatrixType, bool IsComplex>\nstruct complex_schur_reduce_to_hessenberg\n{\n  // this is the implementation for the case IsComplex = true\n  static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)\n  {\n    _this.m_hess.compute(matrix);\n    _this.m_matT = _this.m_hess.matrixH();\n    if(computeU)  _this.m_matU = _this.m_hess.matrixQ();\n  }\n};\n\ntemplate<typename MatrixType>\nstruct complex_schur_reduce_to_hessenberg<MatrixType, false>\n{\n  static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)\n  {\n    typedef typename ComplexSchur<MatrixType>::ComplexScalar ComplexScalar;\n\n    // Note: m_hess is over RealScalar; m_matT and m_matU is over ComplexScalar\n    _this.m_hess.compute(matrix);\n    _this.m_matT = _this.m_hess.matrixH().template cast<ComplexScalar>();\n    if(computeU)  \n    {\n      // This may cause an allocation which seems to be avoidable\n      MatrixType Q = _this.m_hess.matrixQ(); \n      _this.m_matU = Q.template cast<ComplexScalar>();\n    }\n  }\n};\n\n} // end namespace internal\n\n// Reduce the Hessenberg matrix m_matT to triangular form by QR iteration.\ntemplate<typename MatrixType>\nvoid ComplexSchur<MatrixType>::reduceToTriangularForm(bool computeU)\n{  \n  Index maxIters = m_maxIters;\n  if (maxIters == -1)\n    maxIters = m_maxIterationsPerRow * m_matT.rows();\n\n  // The matrix m_matT is divided in three parts. \n  // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero. \n  // Rows il,...,iu is the part we are working on (the active submatrix).\n  // Rows iu+1,...,end are already brought in triangular form.\n  Index iu = m_matT.cols() - 1;\n  Index il;\n  Index iter = 0; // number of iterations we are working on the (iu,iu) element\n  Index totalIter = 0; // number of iterations for whole matrix\n\n  while(true)\n  {\n    // find iu, the bottom row of the active submatrix\n    while(iu > 0)\n    {\n      if(!subdiagonalEntryIsNeglegible(iu-1)) break;\n      iter = 0;\n      --iu;\n    }\n\n    // if iu is zero then we are done; the whole matrix is triangularized\n    if(iu==0) break;\n\n    // if we spent too many iterations, we give up\n    iter++;\n    totalIter++;\n    if(totalIter > maxIters) break;\n\n    // find il, the top row of the active submatrix\n    il = iu-1;\n    while(il > 0 && !subdiagonalEntryIsNeglegible(il-1))\n    {\n      --il;\n    }\n\n    /* perform the QR step using Givens rotations. The first rotation\n       creates a bulge; the (il+2,il) element becomes nonzero. This\n       bulge is chased down to the bottom of the active submatrix. */\n\n    ComplexScalar shift = computeShift(iu, iter);\n    JacobiRotation<ComplexScalar> rot;\n    rot.makeGivens(m_matT.coeff(il,il) - shift, m_matT.coeff(il+1,il));\n    m_matT.rightCols(m_matT.cols()-il).applyOnTheLeft(il, il+1, rot.adjoint());\n    m_matT.topRows((std::min)(il+2,iu)+1).applyOnTheRight(il, il+1, rot);\n    if(computeU) m_matU.applyOnTheRight(il, il+1, rot);\n\n    for(Index i=il+1 ; i<iu ; i++)\n    {\n      rot.makeGivens(m_matT.coeffRef(i,i-1), m_matT.coeffRef(i+1,i-1), &m_matT.coeffRef(i,i-1));\n      m_matT.coeffRef(i+1,i-1) = ComplexScalar(0);\n      m_matT.rightCols(m_matT.cols()-i).applyOnTheLeft(i, i+1, rot.adjoint());\n      m_matT.topRows((std::min)(i+2,iu)+1).applyOnTheRight(i, i+1, rot);\n      if(computeU) m_matU.applyOnTheRight(i, i+1, rot);\n    }\n  }\n\n  if(totalIter <= maxIters)\n    m_info = Success;\n  else\n    m_info = NoConvergence;\n\n  m_isInitialized = true;\n  m_matUisUptodate = computeU;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_SCHUR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *    Complex Schur needed to complex unsymmetrical eigenvalues/eigenvectors.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_COMPLEX_SCHUR_LAPACKE_H\n#define EIGEN_COMPLEX_SCHUR_LAPACKE_H\n\nnamespace Eigen { \n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_SCHUR_COMPLEX(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, LAPACKE_PREFIX_U, EIGCOLROW, LAPACKE_COLROW) \\\ntemplate<> template<typename InputType> inline \\\nComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \\\nComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \\\n{ \\\n  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \\\n  typedef MatrixType::RealScalar RealScalar; \\\n  typedef std::complex<RealScalar> ComplexScalar; \\\n\\\n  eigen_assert(matrix.cols() == matrix.rows()); \\\n\\\n  m_matUisUptodate = false; \\\n  if(matrix.cols() == 1) \\\n  { \\\n    m_matT = matrix.derived().template cast<ComplexScalar>(); \\\n    if(computeU)  m_matU = ComplexMatrixType::Identity(1,1); \\\n      m_info = Success; \\\n      m_isInitialized = true; \\\n      m_matUisUptodate = computeU; \\\n      return *this; \\\n  } \\\n  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), sdim, info; \\\n  lapack_int matrix_order = LAPACKE_COLROW; \\\n  char jobvs, sort='N'; \\\n  LAPACK_##LAPACKE_PREFIX_U##_SELECT1 select = 0; \\\n  jobvs = (computeU) ? 'V' : 'N'; \\\n  m_matU.resize(n, n); \\\n  lapack_int ldvs  = internal::convert_index<lapack_int>(m_matU.outerStride()); \\\n  m_matT = matrix; \\\n  lapack_int lda = internal::convert_index<lapack_int>(m_matT.outerStride()); \\\n  Matrix<EIGTYPE, Dynamic, Dynamic> w; \\\n  w.resize(n, 1);\\\n  info = LAPACKE_##LAPACKE_PREFIX##gees( matrix_order, jobvs, sort, select, n, (LAPACKE_TYPE*)m_matT.data(), lda, &sdim, (LAPACKE_TYPE*)w.data(), (LAPACKE_TYPE*)m_matU.data(), ldvs ); \\\n  if(info == 0) \\\n    m_info = Success; \\\n  else \\\n    m_info = NoConvergence; \\\n\\\n  m_isInitialized = true; \\\n  m_matUisUptodate = computeU; \\\n  return *this; \\\n\\\n}\n\nEIGEN_LAPACKE_SCHUR_COMPLEX(dcomplex, lapack_complex_double, z, Z, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SCHUR_COMPLEX(scomplex, lapack_complex_float,  c, C, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SCHUR_COMPLEX(dcomplex, lapack_complex_double, z, Z, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_SCHUR_COMPLEX(scomplex, lapack_complex_float,  c, C, RowMajor, LAPACK_ROW_MAJOR)\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPLEX_SCHUR_LAPACKE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/EigenSolver.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_EIGENSOLVER_H\n#define EIGEN_EIGENSOLVER_H\n\n#include \"./RealSchur.h\"\n\nnamespace Eigen { \n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class EigenSolver\n  *\n  * \\brief Computes eigenvalues and eigenvectors of general matrices\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the\n  * eigendecomposition; this is expected to be an instantiation of the Matrix\n  * class template. Currently, only real matrices are supported.\n  *\n  * The eigenvalues and eigenvectors of a matrix \\f$ A \\f$ are scalars\n  * \\f$ \\lambda \\f$ and vectors \\f$ v \\f$ such that \\f$ Av = \\lambda v \\f$.  If\n  * \\f$ D \\f$ is a diagonal matrix with the eigenvalues on the diagonal, and\n  * \\f$ V \\f$ is a matrix with the eigenvectors as its columns, then \\f$ A V =\n  * V D \\f$. The matrix \\f$ V \\f$ is almost always invertible, in which case we\n  * have \\f$ A = V D V^{-1} \\f$. This is called the eigendecomposition.\n  *\n  * The eigenvalues and eigenvectors of a matrix may be complex, even when the\n  * matrix is real. However, we can choose real matrices \\f$ V \\f$ and \\f$ D\n  * \\f$ satisfying \\f$ A V = V D \\f$, just like the eigendecomposition, if the\n  * matrix \\f$ D \\f$ is not required to be diagonal, but if it is allowed to\n  * have blocks of the form\n  * \\f[ \\begin{bmatrix} u & v \\\\ -v & u \\end{bmatrix} \\f]\n  * (where \\f$ u \\f$ and \\f$ v \\f$ are real numbers) on the diagonal.  These\n  * blocks correspond to complex eigenvalue pairs \\f$ u \\pm iv \\f$. We call\n  * this variant of the eigendecomposition the pseudo-eigendecomposition.\n  *\n  * Call the function compute() to compute the eigenvalues and eigenvectors of\n  * a given matrix. Alternatively, you can use the \n  * EigenSolver(const MatrixType&, bool) constructor which computes the\n  * eigenvalues and eigenvectors at construction time. Once the eigenvalue and\n  * eigenvectors are computed, they can be retrieved with the eigenvalues() and\n  * eigenvectors() functions. The pseudoEigenvalueMatrix() and\n  * pseudoEigenvectors() methods allow the construction of the\n  * pseudo-eigendecomposition.\n  *\n  * The documentation for EigenSolver(const MatrixType&, bool) contains an\n  * example of the typical use of this class.\n  *\n  * \\note The implementation is adapted from\n  * <a href=\"http://math.nist.gov/javanumerics/jama/\">JAMA</a> (public domain).\n  * Their code is based on EISPACK.\n  *\n  * \\sa MatrixBase::eigenvalues(), class ComplexEigenSolver, class SelfAdjointEigenSolver\n  */\ntemplate<typename _MatrixType> class EigenSolver\n{\n  public:\n\n    /** \\brief Synonym for the template parameter \\p _MatrixType. */\n    typedef _MatrixType MatrixType;\n\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      Options = MatrixType::Options,\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n    /** \\brief Scalar type for matrices of type #MatrixType. */\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    /** \\brief Complex scalar type for #MatrixType. \n      *\n      * This is \\c std::complex<Scalar> if #Scalar is real (e.g.,\n      * \\c float or \\c double) and just \\c Scalar if #Scalar is\n      * complex.\n      */\n    typedef std::complex<RealScalar> ComplexScalar;\n\n    /** \\brief Type for vector of eigenvalues as returned by eigenvalues(). \n      *\n      * This is a column vector with entries of type #ComplexScalar.\n      * The length of the vector is the size of #MatrixType.\n      */\n    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;\n\n    /** \\brief Type for matrix of eigenvectors as returned by eigenvectors(). \n      *\n      * This is a square matrix with entries of type #ComplexScalar. \n      * The size is the same as the size of #MatrixType.\n      */\n    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> EigenvectorsType;\n\n    /** \\brief Default constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via EigenSolver::compute(const MatrixType&, bool).\n      *\n      * \\sa compute() for an example.\n      */\n    EigenSolver() : m_eivec(), m_eivalues(), m_isInitialized(false), m_eigenvectorsOk(false), m_realSchur(), m_matT(), m_tmp() {}\n\n    /** \\brief Default constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa EigenSolver()\n      */\n    explicit EigenSolver(Index size)\n      : m_eivec(size, size),\n        m_eivalues(size),\n        m_isInitialized(false),\n        m_eigenvectorsOk(false),\n        m_realSchur(size),\n        m_matT(size, size), \n        m_tmp(size)\n    {}\n\n    /** \\brief Constructor; computes eigendecomposition of given matrix. \n      * \n      * \\param[in]  matrix  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  computeEigenvectors  If true, both the eigenvectors and the\n      *    eigenvalues are computed; if false, only the eigenvalues are\n      *    computed. \n      *\n      * This constructor calls compute() to compute the eigenvalues\n      * and eigenvectors.\n      *\n      * Example: \\include EigenSolver_EigenSolver_MatrixType.cpp\n      * Output: \\verbinclude EigenSolver_EigenSolver_MatrixType.out\n      *\n      * \\sa compute()\n      */\n    template<typename InputType>\n    explicit EigenSolver(const EigenBase<InputType>& matrix, bool computeEigenvectors = true)\n      : m_eivec(matrix.rows(), matrix.cols()),\n        m_eivalues(matrix.cols()),\n        m_isInitialized(false),\n        m_eigenvectorsOk(false),\n        m_realSchur(matrix.cols()),\n        m_matT(matrix.rows(), matrix.cols()), \n        m_tmp(matrix.cols())\n    {\n      compute(matrix.derived(), computeEigenvectors);\n    }\n\n    /** \\brief Returns the eigenvectors of given matrix. \n      *\n      * \\returns  %Matrix whose columns are the (possibly complex) eigenvectors.\n      *\n      * \\pre Either the constructor \n      * EigenSolver(const MatrixType&,bool) or the member function\n      * compute(const MatrixType&, bool) has been called before, and\n      * \\p computeEigenvectors was set to true (the default).\n      *\n      * Column \\f$ k \\f$ of the returned matrix is an eigenvector corresponding\n      * to eigenvalue number \\f$ k \\f$ as returned by eigenvalues().  The\n      * eigenvectors are normalized to have (Euclidean) norm equal to one. The\n      * matrix returned by this function is the matrix \\f$ V \\f$ in the\n      * eigendecomposition \\f$ A = V D V^{-1} \\f$, if it exists.\n      *\n      * Example: \\include EigenSolver_eigenvectors.cpp\n      * Output: \\verbinclude EigenSolver_eigenvectors.out\n      *\n      * \\sa eigenvalues(), pseudoEigenvectors()\n      */\n    EigenvectorsType eigenvectors() const;\n\n    /** \\brief Returns the pseudo-eigenvectors of given matrix. \n      *\n      * \\returns  Const reference to matrix whose columns are the pseudo-eigenvectors.\n      *\n      * \\pre Either the constructor \n      * EigenSolver(const MatrixType&,bool) or the member function\n      * compute(const MatrixType&, bool) has been called before, and\n      * \\p computeEigenvectors was set to true (the default).\n      *\n      * The real matrix \\f$ V \\f$ returned by this function and the\n      * block-diagonal matrix \\f$ D \\f$ returned by pseudoEigenvalueMatrix()\n      * satisfy \\f$ AV = VD \\f$.\n      *\n      * Example: \\include EigenSolver_pseudoEigenvectors.cpp\n      * Output: \\verbinclude EigenSolver_pseudoEigenvectors.out\n      *\n      * \\sa pseudoEigenvalueMatrix(), eigenvectors()\n      */\n    const MatrixType& pseudoEigenvectors() const\n    {\n      eigen_assert(m_isInitialized && \"EigenSolver is not initialized.\");\n      eigen_assert(m_eigenvectorsOk && \"The eigenvectors have not been computed together with the eigenvalues.\");\n      return m_eivec;\n    }\n\n    /** \\brief Returns the block-diagonal matrix in the pseudo-eigendecomposition.\n      *\n      * \\returns  A block-diagonal matrix.\n      *\n      * \\pre Either the constructor \n      * EigenSolver(const MatrixType&,bool) or the member function\n      * compute(const MatrixType&, bool) has been called before.\n      *\n      * The matrix \\f$ D \\f$ returned by this function is real and\n      * block-diagonal. The blocks on the diagonal are either 1-by-1 or 2-by-2\n      * blocks of the form\n      * \\f$ \\begin{bmatrix} u & v \\\\ -v & u \\end{bmatrix} \\f$.\n      * These blocks are not sorted in any particular order.\n      * The matrix \\f$ D \\f$ and the matrix \\f$ V \\f$ returned by\n      * pseudoEigenvectors() satisfy \\f$ AV = VD \\f$.\n      *\n      * \\sa pseudoEigenvectors() for an example, eigenvalues()\n      */\n    MatrixType pseudoEigenvalueMatrix() const;\n\n    /** \\brief Returns the eigenvalues of given matrix. \n      *\n      * \\returns A const reference to the column vector containing the eigenvalues.\n      *\n      * \\pre Either the constructor \n      * EigenSolver(const MatrixType&,bool) or the member function\n      * compute(const MatrixType&, bool) has been called before.\n      *\n      * The eigenvalues are repeated according to their algebraic multiplicity,\n      * so there are as many eigenvalues as rows in the matrix. The eigenvalues \n      * are not sorted in any particular order.\n      *\n      * Example: \\include EigenSolver_eigenvalues.cpp\n      * Output: \\verbinclude EigenSolver_eigenvalues.out\n      *\n      * \\sa eigenvectors(), pseudoEigenvalueMatrix(),\n      *     MatrixBase::eigenvalues()\n      */\n    const EigenvalueType& eigenvalues() const\n    {\n      eigen_assert(m_isInitialized && \"EigenSolver is not initialized.\");\n      return m_eivalues;\n    }\n\n    /** \\brief Computes eigendecomposition of given matrix. \n      * \n      * \\param[in]  matrix  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  computeEigenvectors  If true, both the eigenvectors and the\n      *    eigenvalues are computed; if false, only the eigenvalues are\n      *    computed. \n      * \\returns    Reference to \\c *this\n      *\n      * This function computes the eigenvalues of the real matrix \\p matrix.\n      * The eigenvalues() function can be used to retrieve them.  If \n      * \\p computeEigenvectors is true, then the eigenvectors are also computed\n      * and can be retrieved by calling eigenvectors().\n      *\n      * The matrix is first reduced to real Schur form using the RealSchur\n      * class. The Schur decomposition is then used to compute the eigenvalues\n      * and eigenvectors.\n      *\n      * The cost of the computation is dominated by the cost of the\n      * Schur decomposition, which is very approximately \\f$ 25n^3 \\f$\n      * (where \\f$ n \\f$ is the size of the matrix) if \\p computeEigenvectors \n      * is true, and \\f$ 10n^3 \\f$ if \\p computeEigenvectors is false.\n      *\n      * This method reuses of the allocated data in the EigenSolver object.\n      *\n      * Example: \\include EigenSolver_compute.cpp\n      * Output: \\verbinclude EigenSolver_compute.out\n      */\n    template<typename InputType>\n    EigenSolver& compute(const EigenBase<InputType>& matrix, bool computeEigenvectors = true);\n\n    /** \\returns NumericalIssue if the input contains INF or NaN values or overflow occurred. Returns Success otherwise. */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"EigenSolver is not initialized.\");\n      return m_info;\n    }\n\n    /** \\brief Sets the maximum number of iterations allowed. */\n    EigenSolver& setMaxIterations(Index maxIters)\n    {\n      m_realSchur.setMaxIterations(maxIters);\n      return *this;\n    }\n\n    /** \\brief Returns the maximum number of iterations. */\n    Index getMaxIterations()\n    {\n      return m_realSchur.getMaxIterations();\n    }\n\n  private:\n    void doComputeEigenvectors();\n\n  protected:\n    \n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);\n    }\n    \n    MatrixType m_eivec;\n    EigenvalueType m_eivalues;\n    bool m_isInitialized;\n    bool m_eigenvectorsOk;\n    ComputationInfo m_info;\n    RealSchur<MatrixType> m_realSchur;\n    MatrixType m_matT;\n\n    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;\n    ColumnVectorType m_tmp;\n};\n\ntemplate<typename MatrixType>\nMatrixType EigenSolver<MatrixType>::pseudoEigenvalueMatrix() const\n{\n  eigen_assert(m_isInitialized && \"EigenSolver is not initialized.\");\n  const RealScalar precision = RealScalar(2)*NumTraits<RealScalar>::epsilon();\n  Index n = m_eivalues.rows();\n  MatrixType matD = MatrixType::Zero(n,n);\n  for (Index i=0; i<n; ++i)\n  {\n    if (internal::isMuchSmallerThan(numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i)), precision))\n      matD.coeffRef(i,i) = numext::real(m_eivalues.coeff(i));\n    else\n    {\n      matD.template block<2,2>(i,i) <<  numext::real(m_eivalues.coeff(i)), numext::imag(m_eivalues.coeff(i)),\n                                       -numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i));\n      ++i;\n    }\n  }\n  return matD;\n}\n\ntemplate<typename MatrixType>\ntypename EigenSolver<MatrixType>::EigenvectorsType EigenSolver<MatrixType>::eigenvectors() const\n{\n  eigen_assert(m_isInitialized && \"EigenSolver is not initialized.\");\n  eigen_assert(m_eigenvectorsOk && \"The eigenvectors have not been computed together with the eigenvalues.\");\n  const RealScalar precision = RealScalar(2)*NumTraits<RealScalar>::epsilon();\n  Index n = m_eivec.cols();\n  EigenvectorsType matV(n,n);\n  for (Index j=0; j<n; ++j)\n  {\n    if (internal::isMuchSmallerThan(numext::imag(m_eivalues.coeff(j)), numext::real(m_eivalues.coeff(j)), precision) || j+1==n)\n    {\n      // we have a real eigen value\n      matV.col(j) = m_eivec.col(j).template cast<ComplexScalar>();\n      matV.col(j).normalize();\n    }\n    else\n    {\n      // we have a pair of complex eigen values\n      for (Index i=0; i<n; ++i)\n      {\n        matV.coeffRef(i,j)   = ComplexScalar(m_eivec.coeff(i,j),  m_eivec.coeff(i,j+1));\n        matV.coeffRef(i,j+1) = ComplexScalar(m_eivec.coeff(i,j), -m_eivec.coeff(i,j+1));\n      }\n      matV.col(j).normalize();\n      matV.col(j+1).normalize();\n      ++j;\n    }\n  }\n  return matV;\n}\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nEigenSolver<MatrixType>& \nEigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)\n{\n  check_template_parameters();\n  \n  using std::sqrt;\n  using std::abs;\n  using numext::isfinite;\n  eigen_assert(matrix.cols() == matrix.rows());\n\n  // Reduce to real Schur form.\n  m_realSchur.compute(matrix.derived(), computeEigenvectors);\n  \n  m_info = m_realSchur.info();\n\n  if (m_info == Success)\n  {\n    m_matT = m_realSchur.matrixT();\n    if (computeEigenvectors)\n      m_eivec = m_realSchur.matrixU();\n  \n    // Compute eigenvalues from matT\n    m_eivalues.resize(matrix.cols());\n    Index i = 0;\n    while (i < matrix.cols()) \n    {\n      if (i == matrix.cols() - 1 || m_matT.coeff(i+1, i) == Scalar(0)) \n      {\n        m_eivalues.coeffRef(i) = m_matT.coeff(i, i);\n        if(!(isfinite)(m_eivalues.coeffRef(i)))\n        {\n          m_isInitialized = true;\n          m_eigenvectorsOk = false;\n          m_info = NumericalIssue;\n          return *this;\n        }\n        ++i;\n      }\n      else\n      {\n        Scalar p = Scalar(0.5) * (m_matT.coeff(i, i) - m_matT.coeff(i+1, i+1));\n        Scalar z;\n        // Compute z = sqrt(abs(p * p + m_matT.coeff(i+1, i) * m_matT.coeff(i, i+1)));\n        // without overflow\n        {\n          Scalar t0 = m_matT.coeff(i+1, i);\n          Scalar t1 = m_matT.coeff(i, i+1);\n          Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));\n          t0 /= maxval;\n          t1 /= maxval;\n          Scalar p0 = p/maxval;\n          z = maxval * sqrt(abs(p0 * p0 + t0 * t1));\n        }\n        \n        m_eivalues.coeffRef(i)   = ComplexScalar(m_matT.coeff(i+1, i+1) + p, z);\n        m_eivalues.coeffRef(i+1) = ComplexScalar(m_matT.coeff(i+1, i+1) + p, -z);\n        if(!((isfinite)(m_eivalues.coeffRef(i)) && (isfinite)(m_eivalues.coeffRef(i+1))))\n        {\n          m_isInitialized = true;\n          m_eigenvectorsOk = false;\n          m_info = NumericalIssue;\n          return *this;\n        }\n        i += 2;\n      }\n    }\n    \n    // Compute eigenvectors.\n    if (computeEigenvectors)\n      doComputeEigenvectors();\n  }\n\n  m_isInitialized = true;\n  m_eigenvectorsOk = computeEigenvectors;\n\n  return *this;\n}\n\n\ntemplate<typename MatrixType>\nvoid EigenSolver<MatrixType>::doComputeEigenvectors()\n{\n  using std::abs;\n  const Index size = m_eivec.cols();\n  const Scalar eps = NumTraits<Scalar>::epsilon();\n\n  // inefficient! this is already computed in RealSchur\n  Scalar norm(0);\n  for (Index j = 0; j < size; ++j)\n  {\n    norm += m_matT.row(j).segment((std::max)(j-1,Index(0)), size-(std::max)(j-1,Index(0))).cwiseAbs().sum();\n  }\n  \n  // Backsubstitute to find vectors of upper triangular form\n  if (norm == Scalar(0))\n  {\n    return;\n  }\n\n  for (Index n = size-1; n >= 0; n--)\n  {\n    Scalar p = m_eivalues.coeff(n).real();\n    Scalar q = m_eivalues.coeff(n).imag();\n\n    // Scalar vector\n    if (q == Scalar(0))\n    {\n      Scalar lastr(0), lastw(0);\n      Index l = n;\n\n      m_matT.coeffRef(n,n) = Scalar(1);\n      for (Index i = n-1; i >= 0; i--)\n      {\n        Scalar w = m_matT.coeff(i,i) - p;\n        Scalar r = m_matT.row(i).segment(l,n-l+1).dot(m_matT.col(n).segment(l, n-l+1));\n\n        if (m_eivalues.coeff(i).imag() < Scalar(0))\n        {\n          lastw = w;\n          lastr = r;\n        }\n        else\n        {\n          l = i;\n          if (m_eivalues.coeff(i).imag() == Scalar(0))\n          {\n            if (w != Scalar(0))\n              m_matT.coeffRef(i,n) = -r / w;\n            else\n              m_matT.coeffRef(i,n) = -r / (eps * norm);\n          }\n          else // Solve real equations\n          {\n            Scalar x = m_matT.coeff(i,i+1);\n            Scalar y = m_matT.coeff(i+1,i);\n            Scalar denom = (m_eivalues.coeff(i).real() - p) * (m_eivalues.coeff(i).real() - p) + m_eivalues.coeff(i).imag() * m_eivalues.coeff(i).imag();\n            Scalar t = (x * lastr - lastw * r) / denom;\n            m_matT.coeffRef(i,n) = t;\n            if (abs(x) > abs(lastw))\n              m_matT.coeffRef(i+1,n) = (-r - w * t) / x;\n            else\n              m_matT.coeffRef(i+1,n) = (-lastr - y * t) / lastw;\n          }\n\n          // Overflow control\n          Scalar t = abs(m_matT.coeff(i,n));\n          if ((eps * t) * t > Scalar(1))\n            m_matT.col(n).tail(size-i) /= t;\n        }\n      }\n    }\n    else if (q < Scalar(0) && n > 0) // Complex vector\n    {\n      Scalar lastra(0), lastsa(0), lastw(0);\n      Index l = n-1;\n\n      // Last vector component imaginary so matrix is triangular\n      if (abs(m_matT.coeff(n,n-1)) > abs(m_matT.coeff(n-1,n)))\n      {\n        m_matT.coeffRef(n-1,n-1) = q / m_matT.coeff(n,n-1);\n        m_matT.coeffRef(n-1,n) = -(m_matT.coeff(n,n) - p) / m_matT.coeff(n,n-1);\n      }\n      else\n      {\n        ComplexScalar cc = ComplexScalar(Scalar(0),-m_matT.coeff(n-1,n)) / ComplexScalar(m_matT.coeff(n-1,n-1)-p,q);\n        m_matT.coeffRef(n-1,n-1) = numext::real(cc);\n        m_matT.coeffRef(n-1,n) = numext::imag(cc);\n      }\n      m_matT.coeffRef(n,n-1) = Scalar(0);\n      m_matT.coeffRef(n,n) = Scalar(1);\n      for (Index i = n-2; i >= 0; i--)\n      {\n        Scalar ra = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n-1).segment(l, n-l+1));\n        Scalar sa = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n).segment(l, n-l+1));\n        Scalar w = m_matT.coeff(i,i) - p;\n\n        if (m_eivalues.coeff(i).imag() < Scalar(0))\n        {\n          lastw = w;\n          lastra = ra;\n          lastsa = sa;\n        }\n        else\n        {\n          l = i;\n          if (m_eivalues.coeff(i).imag() == RealScalar(0))\n          {\n            ComplexScalar cc = ComplexScalar(-ra,-sa) / ComplexScalar(w,q);\n            m_matT.coeffRef(i,n-1) = numext::real(cc);\n            m_matT.coeffRef(i,n) = numext::imag(cc);\n          }\n          else\n          {\n            // Solve complex equations\n            Scalar x = m_matT.coeff(i,i+1);\n            Scalar y = m_matT.coeff(i+1,i);\n            Scalar vr = (m_eivalues.coeff(i).real() - p) * (m_eivalues.coeff(i).real() - p) + m_eivalues.coeff(i).imag() * m_eivalues.coeff(i).imag() - q * q;\n            Scalar vi = (m_eivalues.coeff(i).real() - p) * Scalar(2) * q;\n            if ((vr == Scalar(0)) && (vi == Scalar(0)))\n              vr = eps * norm * (abs(w) + abs(q) + abs(x) + abs(y) + abs(lastw));\n\n            ComplexScalar cc = ComplexScalar(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra) / ComplexScalar(vr,vi);\n            m_matT.coeffRef(i,n-1) = numext::real(cc);\n            m_matT.coeffRef(i,n) = numext::imag(cc);\n            if (abs(x) > (abs(lastw) + abs(q)))\n            {\n              m_matT.coeffRef(i+1,n-1) = (-ra - w * m_matT.coeff(i,n-1) + q * m_matT.coeff(i,n)) / x;\n              m_matT.coeffRef(i+1,n) = (-sa - w * m_matT.coeff(i,n) - q * m_matT.coeff(i,n-1)) / x;\n            }\n            else\n            {\n              cc = ComplexScalar(-lastra-y*m_matT.coeff(i,n-1),-lastsa-y*m_matT.coeff(i,n)) / ComplexScalar(lastw,q);\n              m_matT.coeffRef(i+1,n-1) = numext::real(cc);\n              m_matT.coeffRef(i+1,n) = numext::imag(cc);\n            }\n          }\n\n          // Overflow control\n          Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));\n          if ((eps * t) * t > Scalar(1))\n            m_matT.block(i, n-1, size-i, 2) /= t;\n\n        }\n      }\n      \n      // We handled a pair of complex conjugate eigenvalues, so need to skip them both\n      n--;\n    }\n    else\n    {\n      eigen_assert(0 && \"Internal bug in EigenSolver (INF or NaN has not been detected)\"); // this should not happen\n    }\n  }\n\n  // Back transformation to get eigenvectors of original matrix\n  for (Index j = size-1; j >= 0; j--)\n  {\n    m_tmp.noalias() = m_eivec.leftCols(j+1) * m_matT.col(j).segment(0, j+1);\n    m_eivec.col(j) = m_tmp;\n  }\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_EIGENSOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n// Copyright (C) 2016 Tobias Wood <tobias@spinicist.org.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERALIZEDEIGENSOLVER_H\n#define EIGEN_GENERALIZEDEIGENSOLVER_H\n\n#include \"./RealQZ.h\"\n\nnamespace Eigen { \n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class GeneralizedEigenSolver\n  *\n  * \\brief Computes the generalized eigenvalues and eigenvectors of a pair of general matrices\n  *\n  * \\tparam _MatrixType the type of the matrices of which we are computing the\n  * eigen-decomposition; this is expected to be an instantiation of the Matrix\n  * class template. Currently, only real matrices are supported.\n  *\n  * The generalized eigenvalues and eigenvectors of a matrix pair \\f$ A \\f$ and \\f$ B \\f$ are scalars\n  * \\f$ \\lambda \\f$ and vectors \\f$ v \\f$ such that \\f$ Av = \\lambda Bv \\f$.  If\n  * \\f$ D \\f$ is a diagonal matrix with the eigenvalues on the diagonal, and\n  * \\f$ V \\f$ is a matrix with the eigenvectors as its columns, then \\f$ A V =\n  * B V D \\f$. The matrix \\f$ V \\f$ is almost always invertible, in which case we\n  * have \\f$ A = B V D V^{-1} \\f$. This is called the generalized eigen-decomposition.\n  *\n  * The generalized eigenvalues and eigenvectors of a matrix pair may be complex, even when the\n  * matrices are real. Moreover, the generalized eigenvalue might be infinite if the matrix B is\n  * singular. To workaround this difficulty, the eigenvalues are provided as a pair of complex \\f$ \\alpha \\f$\n  * and real \\f$ \\beta \\f$ such that: \\f$ \\lambda_i = \\alpha_i / \\beta_i \\f$. If \\f$ \\beta_i \\f$ is (nearly) zero,\n  * then one can consider the well defined left eigenvalue \\f$ \\mu = \\beta_i / \\alpha_i\\f$ such that:\n  * \\f$ \\mu_i A v_i = B v_i \\f$, or even \\f$ \\mu_i u_i^T A  = u_i^T B \\f$ where \\f$ u_i \\f$ is\n  * called the left eigenvector.\n  *\n  * Call the function compute() to compute the generalized eigenvalues and eigenvectors of\n  * a given matrix pair. Alternatively, you can use the\n  * GeneralizedEigenSolver(const MatrixType&, const MatrixType&, bool) constructor which computes the\n  * eigenvalues and eigenvectors at construction time. Once the eigenvalue and\n  * eigenvectors are computed, they can be retrieved with the eigenvalues() and\n  * eigenvectors() functions.\n  *\n  * Here is an usage example of this class:\n  * Example: \\include GeneralizedEigenSolver.cpp\n  * Output: \\verbinclude GeneralizedEigenSolver.out\n  *\n  * \\sa MatrixBase::eigenvalues(), class ComplexEigenSolver, class SelfAdjointEigenSolver\n  */\ntemplate<typename _MatrixType> class GeneralizedEigenSolver\n{\n  public:\n\n    /** \\brief Synonym for the template parameter \\p _MatrixType. */\n    typedef _MatrixType MatrixType;\n\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      Options = MatrixType::Options,\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n    /** \\brief Scalar type for matrices of type #MatrixType. */\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    /** \\brief Complex scalar type for #MatrixType. \n      *\n      * This is \\c std::complex<Scalar> if #Scalar is real (e.g.,\n      * \\c float or \\c double) and just \\c Scalar if #Scalar is\n      * complex.\n      */\n    typedef std::complex<RealScalar> ComplexScalar;\n\n    /** \\brief Type for vector of real scalar values eigenvalues as returned by betas().\n      *\n      * This is a column vector with entries of type #Scalar.\n      * The length of the vector is the size of #MatrixType.\n      */\n    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> VectorType;\n\n    /** \\brief Type for vector of complex scalar values eigenvalues as returned by alphas().\n      *\n      * This is a column vector with entries of type #ComplexScalar.\n      * The length of the vector is the size of #MatrixType.\n      */\n    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ComplexVectorType;\n\n    /** \\brief Expression type for the eigenvalues as returned by eigenvalues().\n      */\n    typedef CwiseBinaryOp<internal::scalar_quotient_op<ComplexScalar,Scalar>,ComplexVectorType,VectorType> EigenvalueType;\n\n    /** \\brief Type for matrix of eigenvectors as returned by eigenvectors(). \n      *\n      * This is a square matrix with entries of type #ComplexScalar. \n      * The size is the same as the size of #MatrixType.\n      */\n    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> EigenvectorsType;\n\n    /** \\brief Default constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via EigenSolver::compute(const MatrixType&, bool).\n      *\n      * \\sa compute() for an example.\n      */\n    GeneralizedEigenSolver()\n      : m_eivec(),\n        m_alphas(),\n        m_betas(),\n        m_valuesOkay(false),\n        m_vectorsOkay(false),\n        m_realQZ()\n    {}\n\n    /** \\brief Default constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa GeneralizedEigenSolver()\n      */\n    explicit GeneralizedEigenSolver(Index size)\n      : m_eivec(size, size),\n        m_alphas(size),\n        m_betas(size),\n        m_valuesOkay(false),\n        m_vectorsOkay(false),\n        m_realQZ(size),\n        m_tmp(size)\n    {}\n\n    /** \\brief Constructor; computes the generalized eigendecomposition of given matrix pair.\n      * \n      * \\param[in]  A  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  B  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  computeEigenvectors  If true, both the eigenvectors and the\n      *    eigenvalues are computed; if false, only the eigenvalues are computed.\n      *\n      * This constructor calls compute() to compute the generalized eigenvalues\n      * and eigenvectors.\n      *\n      * \\sa compute()\n      */\n    GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)\n      : m_eivec(A.rows(), A.cols()),\n        m_alphas(A.cols()),\n        m_betas(A.cols()),\n        m_valuesOkay(false),\n        m_vectorsOkay(false),\n        m_realQZ(A.cols()),\n        m_tmp(A.cols())\n    {\n      compute(A, B, computeEigenvectors);\n    }\n\n    /* \\brief Returns the computed generalized eigenvectors.\n      *\n      * \\returns  %Matrix whose columns are the (possibly complex) right eigenvectors.\n      * i.e. the eigenvectors that solve (A - l*B)x = 0. The ordering matches the eigenvalues.\n      *\n      * \\pre Either the constructor \n      * GeneralizedEigenSolver(const MatrixType&,const MatrixType&, bool) or the member function\n      * compute(const MatrixType&, const MatrixType& bool) has been called before, and\n      * \\p computeEigenvectors was set to true (the default).\n      *\n      * \\sa eigenvalues()\n      */\n    EigenvectorsType eigenvectors() const {\n      eigen_assert(m_vectorsOkay && \"Eigenvectors for GeneralizedEigenSolver were not calculated.\");\n      return m_eivec;\n    }\n\n    /** \\brief Returns an expression of the computed generalized eigenvalues.\n      *\n      * \\returns An expression of the column vector containing the eigenvalues.\n      *\n      * It is a shortcut for \\code this->alphas().cwiseQuotient(this->betas()); \\endcode\n      * Not that betas might contain zeros. It is therefore not recommended to use this function,\n      * but rather directly deal with the alphas and betas vectors.\n      *\n      * \\pre Either the constructor \n      * GeneralizedEigenSolver(const MatrixType&,const MatrixType&,bool) or the member function\n      * compute(const MatrixType&,const MatrixType&,bool) has been called before.\n      *\n      * The eigenvalues are repeated according to their algebraic multiplicity,\n      * so there are as many eigenvalues as rows in the matrix. The eigenvalues \n      * are not sorted in any particular order.\n      *\n      * \\sa alphas(), betas(), eigenvectors()\n      */\n    EigenvalueType eigenvalues() const\n    {\n      eigen_assert(m_valuesOkay && \"GeneralizedEigenSolver is not initialized.\");\n      return EigenvalueType(m_alphas,m_betas);\n    }\n\n    /** \\returns A const reference to the vectors containing the alpha values\n      *\n      * This vector permits to reconstruct the j-th eigenvalues as alphas(i)/betas(j).\n      *\n      * \\sa betas(), eigenvalues() */\n    ComplexVectorType alphas() const\n    {\n      eigen_assert(m_valuesOkay && \"GeneralizedEigenSolver is not initialized.\");\n      return m_alphas;\n    }\n\n    /** \\returns A const reference to the vectors containing the beta values\n      *\n      * This vector permits to reconstruct the j-th eigenvalues as alphas(i)/betas(j).\n      *\n      * \\sa alphas(), eigenvalues() */\n    VectorType betas() const\n    {\n      eigen_assert(m_valuesOkay && \"GeneralizedEigenSolver is not initialized.\");\n      return m_betas;\n    }\n\n    /** \\brief Computes generalized eigendecomposition of given matrix.\n      * \n      * \\param[in]  A  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  B  Square matrix whose eigendecomposition is to be computed.\n      * \\param[in]  computeEigenvectors  If true, both the eigenvectors and the\n      *    eigenvalues are computed; if false, only the eigenvalues are\n      *    computed. \n      * \\returns    Reference to \\c *this\n      *\n      * This function computes the eigenvalues of the real matrix \\p matrix.\n      * The eigenvalues() function can be used to retrieve them.  If \n      * \\p computeEigenvectors is true, then the eigenvectors are also computed\n      * and can be retrieved by calling eigenvectors().\n      *\n      * The matrix is first reduced to real generalized Schur form using the RealQZ\n      * class. The generalized Schur decomposition is then used to compute the eigenvalues\n      * and eigenvectors.\n      *\n      * The cost of the computation is dominated by the cost of the\n      * generalized Schur decomposition.\n      *\n      * This method reuses of the allocated data in the GeneralizedEigenSolver object.\n      */\n    GeneralizedEigenSolver& compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true);\n\n    ComputationInfo info() const\n    {\n      eigen_assert(m_valuesOkay && \"EigenSolver is not initialized.\");\n      return m_realQZ.info();\n    }\n\n    /** Sets the maximal number of iterations allowed.\n    */\n    GeneralizedEigenSolver& setMaxIterations(Index maxIters)\n    {\n      m_realQZ.setMaxIterations(maxIters);\n      return *this;\n    }\n\n  protected:\n    \n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);\n    }\n    \n    EigenvectorsType m_eivec;\n    ComplexVectorType m_alphas;\n    VectorType m_betas;\n    bool m_valuesOkay, m_vectorsOkay;\n    RealQZ<MatrixType> m_realQZ;\n    ComplexVectorType m_tmp;\n};\n\ntemplate<typename MatrixType>\nGeneralizedEigenSolver<MatrixType>&\nGeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)\n{\n  check_template_parameters();\n  \n  using std::sqrt;\n  using std::abs;\n  eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());\n  Index size = A.cols();\n  m_valuesOkay = false;\n  m_vectorsOkay = false;\n  // Reduce to generalized real Schur form:\n  // A = Q S Z and B = Q T Z\n  m_realQZ.compute(A, B, computeEigenvectors);\n  if (m_realQZ.info() == Success)\n  {\n    // Resize storage\n    m_alphas.resize(size);\n    m_betas.resize(size);\n    if (computeEigenvectors)\n    {\n      m_eivec.resize(size,size);\n      m_tmp.resize(size);\n    }\n\n    // Aliases:\n    Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);\n    ComplexVectorType &cv = m_tmp;\n    const MatrixType &mS = m_realQZ.matrixS();\n    const MatrixType &mT = m_realQZ.matrixT();\n\n    Index i = 0;\n    while (i < size)\n    {\n      if (i == size - 1 || mS.coeff(i+1, i) == Scalar(0))\n      {\n        // Real eigenvalue\n        m_alphas.coeffRef(i) = mS.diagonal().coeff(i);\n        m_betas.coeffRef(i)  = mT.diagonal().coeff(i);\n        if (computeEigenvectors)\n        {\n          v.setConstant(Scalar(0.0));\n          v.coeffRef(i) = Scalar(1.0);\n          // For singular eigenvalues do nothing more\n          if(abs(m_betas.coeffRef(i)) >= (std::numeric_limits<RealScalar>::min)())\n          {\n            // Non-singular eigenvalue\n            const Scalar alpha = real(m_alphas.coeffRef(i));\n            const Scalar beta = m_betas.coeffRef(i);\n            for (Index j = i-1; j >= 0; j--)\n            {\n              const Index st = j+1;\n              const Index sz = i-j;\n              if (j > 0 && mS.coeff(j, j-1) != Scalar(0))\n              {\n                // 2x2 block\n                Matrix<Scalar, 2, 1> rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( v.segment(st,sz) );\n                Matrix<Scalar, 2, 2> lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1);\n                v.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs);\n                j--;\n              }\n              else\n              {\n                v.coeffRef(j) = -v.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum() / (beta*mS.coeffRef(j,j) - alpha*mT.coeffRef(j,j));\n              }\n            }\n          }\n          m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;\n          m_eivec.col(i).real().normalize();\n          m_eivec.col(i).imag().setConstant(0);\n        }\n        ++i;\n      }\n      else\n      {\n        // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T\n        // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00):\n\n        // T =  [a 0]\n        //      [0 b]\n        RealScalar a = mT.diagonal().coeff(i),\n                   b = mT.diagonal().coeff(i+1);\n        const RealScalar beta = m_betas.coeffRef(i) = m_betas.coeffRef(i+1) = a*b;\n\n        // ^^ NOTE: using diagonal()(i) instead of coeff(i,i) workarounds a MSVC bug.\n        Matrix<RealScalar,2,2> S2 = mS.template block<2,2>(i,i) * Matrix<Scalar,2,1>(b,a).asDiagonal();\n\n        Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1));\n        Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1)));\n        const ComplexScalar alpha = ComplexScalar(S2.coeff(1,1) + p, (beta > 0) ? z : -z);\n        m_alphas.coeffRef(i)   = conj(alpha);\n        m_alphas.coeffRef(i+1) = alpha;\n\n        if (computeEigenvectors) {\n          // Compute eigenvector in position (i+1) and then position (i) is just the conjugate\n          cv.setZero();\n          cv.coeffRef(i+1) = Scalar(1.0);\n          // here, the \"static_cast\" workaound expression template issues.\n          cv.coeffRef(i) = -(static_cast<Scalar>(beta*mS.coeffRef(i,i+1)) - alpha*mT.coeffRef(i,i+1))\n                          / (static_cast<Scalar>(beta*mS.coeffRef(i,i))   - alpha*mT.coeffRef(i,i));\n          for (Index j = i-1; j >= 0; j--)\n          {\n            const Index st = j+1;\n            const Index sz = i+1-j;\n            if (j > 0 && mS.coeff(j, j-1) != Scalar(0))\n            {\n              // 2x2 block\n              Matrix<ComplexScalar, 2, 1> rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( cv.segment(st,sz) );\n              Matrix<ComplexScalar, 2, 2> lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1);\n              cv.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs);\n              j--;\n            } else {\n              cv.coeffRef(j) =  cv.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum()\n                              / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));\n            }\n          }\n          m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);\n          m_eivec.col(i+1).normalize();\n          m_eivec.col(i) = m_eivec.col(i+1).conjugate();\n        }\n        i += 2;\n      }\n    }\n\n    m_valuesOkay = true;\n    m_vectorsOkay = computeEigenvectors;\n  }\n  return *this;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERALIZEDEIGENSOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GENERALIZEDSELFADJOINTEIGENSOLVER_H\n#define EIGEN_GENERALIZEDSELFADJOINTEIGENSOLVER_H\n\n#include \"./Tridiagonalization.h\"\n\nnamespace Eigen { \n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class GeneralizedSelfAdjointEigenSolver\n  *\n  * \\brief Computes eigenvalues and eigenvectors of the generalized selfadjoint eigen problem\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the\n  * eigendecomposition; this is expected to be an instantiation of the Matrix\n  * class template.\n  *\n  * This class solves the generalized eigenvalue problem\n  * \\f$ Av = \\lambda Bv \\f$. In this case, the matrix \\f$ A \\f$ should be\n  * selfadjoint and the matrix \\f$ B \\f$ should be positive definite.\n  *\n  * Only the \\b lower \\b triangular \\b part of the input matrix is referenced.\n  *\n  * Call the function compute() to compute the eigenvalues and eigenvectors of\n  * a given matrix. Alternatively, you can use the\n  * GeneralizedSelfAdjointEigenSolver(const MatrixType&, const MatrixType&, int)\n  * constructor which computes the eigenvalues and eigenvectors at construction time.\n  * Once the eigenvalue and eigenvectors are computed, they can be retrieved with the eigenvalues()\n  * and eigenvectors() functions.\n  *\n  * The documentation for GeneralizedSelfAdjointEigenSolver(const MatrixType&, const MatrixType&, int)\n  * contains an example of the typical use of this class.\n  *\n  * \\sa class SelfAdjointEigenSolver, class EigenSolver, class ComplexEigenSolver\n  */\ntemplate<typename _MatrixType>\nclass GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixType>\n{\n    typedef SelfAdjointEigenSolver<_MatrixType> Base;\n  public:\n\n    typedef _MatrixType MatrixType;\n\n    /** \\brief Default constructor for fixed-size matrices.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via compute(). This constructor\n      * can only be used if \\p _MatrixType is a fixed-size matrix; use\n      * GeneralizedSelfAdjointEigenSolver(Index) for dynamic-size matrices.\n      */\n    GeneralizedSelfAdjointEigenSolver() : Base() {}\n\n    /** \\brief Constructor, pre-allocates memory for dynamic-size matrices.\n      *\n      * \\param [in]  size  Positive integer, size of the matrix whose\n      * eigenvalues and eigenvectors will be computed.\n      *\n      * This constructor is useful for dynamic-size matrices, when the user\n      * intends to perform decompositions via compute(). The \\p size\n      * parameter is only used as a hint. It is not an error to give a wrong\n      * \\p size, but it may impair performance.\n      *\n      * \\sa compute() for an example\n      */\n    explicit GeneralizedSelfAdjointEigenSolver(Index size)\n        : Base(size)\n    {}\n\n    /** \\brief Constructor; computes generalized eigendecomposition of given matrix pencil.\n      *\n      * \\param[in]  matA  Selfadjoint matrix in matrix pencil.\n      *                   Only the lower triangular part of the matrix is referenced.\n      * \\param[in]  matB  Positive-definite matrix in matrix pencil.\n      *                   Only the lower triangular part of the matrix is referenced.\n      * \\param[in]  options A or-ed set of flags {#ComputeEigenvectors,#EigenvaluesOnly} | {#Ax_lBx,#ABx_lx,#BAx_lx}.\n      *                     Default is #ComputeEigenvectors|#Ax_lBx.\n      *\n      * This constructor calls compute(const MatrixType&, const MatrixType&, int)\n      * to compute the eigenvalues and (if requested) the eigenvectors of the\n      * generalized eigenproblem \\f$ Ax = \\lambda B x \\f$ with \\a matA the\n      * selfadjoint matrix \\f$ A \\f$ and \\a matB the positive definite matrix\n      * \\f$ B \\f$. Each eigenvector \\f$ x \\f$ satisfies the property\n      * \\f$ x^* B x = 1 \\f$. The eigenvectors are computed if\n      * \\a options contains ComputeEigenvectors.\n      *\n      * In addition, the two following variants can be solved via \\p options:\n      * - \\c ABx_lx: \\f$ ABx = \\lambda x \\f$\n      * - \\c BAx_lx: \\f$ BAx = \\lambda x \\f$\n      *\n      * Example: \\include SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.out\n      *\n      * \\sa compute(const MatrixType&, const MatrixType&, int)\n      */\n    GeneralizedSelfAdjointEigenSolver(const MatrixType& matA, const MatrixType& matB,\n                                      int options = ComputeEigenvectors|Ax_lBx)\n      : Base(matA.cols())\n    {\n      compute(matA, matB, options);\n    }\n\n    /** \\brief Computes generalized eigendecomposition of given matrix pencil.\n      *\n      * \\param[in]  matA  Selfadjoint matrix in matrix pencil.\n      *                   Only the lower triangular part of the matrix is referenced.\n      * \\param[in]  matB  Positive-definite matrix in matrix pencil.\n      *                   Only the lower triangular part of the matrix is referenced.\n      * \\param[in]  options A or-ed set of flags {#ComputeEigenvectors,#EigenvaluesOnly} | {#Ax_lBx,#ABx_lx,#BAx_lx}.\n      *                     Default is #ComputeEigenvectors|#Ax_lBx.\n      *\n      * \\returns    Reference to \\c *this\n      *\n      * According to \\p options, this function computes eigenvalues and (if requested)\n      * the eigenvectors of one of the following three generalized eigenproblems:\n      * - \\c Ax_lBx: \\f$ Ax = \\lambda B x \\f$\n      * - \\c ABx_lx: \\f$ ABx = \\lambda x \\f$\n      * - \\c BAx_lx: \\f$ BAx = \\lambda x \\f$\n      * with \\a matA the selfadjoint matrix \\f$ A \\f$ and \\a matB the positive definite\n      * matrix \\f$ B \\f$.\n      * In addition, each eigenvector \\f$ x \\f$ satisfies the property \\f$ x^* B x = 1 \\f$.\n      *\n      * The eigenvalues() function can be used to retrieve\n      * the eigenvalues. If \\p options contains ComputeEigenvectors, then the\n      * eigenvectors are also computed and can be retrieved by calling\n      * eigenvectors().\n      *\n      * The implementation uses LLT to compute the Cholesky decomposition\n      * \\f$ B = LL^* \\f$ and computes the classical eigendecomposition\n      * of the selfadjoint matrix \\f$ L^{-1} A (L^*)^{-1} \\f$ if \\p options contains Ax_lBx\n      * and of \\f$ L^{*} A L \\f$ otherwise. This solves the\n      * generalized eigenproblem, because any solution of the generalized\n      * eigenproblem \\f$ Ax = \\lambda B x \\f$ corresponds to a solution\n      * \\f$ L^{-1} A (L^*)^{-1} (L^* x) = \\lambda (L^* x) \\f$ of the\n      * eigenproblem for \\f$ L^{-1} A (L^*)^{-1} \\f$. Similar statements\n      * can be made for the two other variants.\n      *\n      * Example: \\include SelfAdjointEigenSolver_compute_MatrixType2.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_compute_MatrixType2.out\n      *\n      * \\sa GeneralizedSelfAdjointEigenSolver(const MatrixType&, const MatrixType&, int)\n      */\n    GeneralizedSelfAdjointEigenSolver& compute(const MatrixType& matA, const MatrixType& matB,\n                                               int options = ComputeEigenvectors|Ax_lBx);\n\n  protected:\n\n};\n\n\ntemplate<typename MatrixType>\nGeneralizedSelfAdjointEigenSolver<MatrixType>& GeneralizedSelfAdjointEigenSolver<MatrixType>::\ncompute(const MatrixType& matA, const MatrixType& matB, int options)\n{\n  eigen_assert(matA.cols()==matA.rows() && matB.rows()==matA.rows() && matB.cols()==matB.rows());\n  eigen_assert((options&~(EigVecMask|GenEigMask))==0\n          && (options&EigVecMask)!=EigVecMask\n          && ((options&GenEigMask)==0 || (options&GenEigMask)==Ax_lBx\n           || (options&GenEigMask)==ABx_lx || (options&GenEigMask)==BAx_lx)\n          && \"invalid option parameter\");\n\n  bool computeEigVecs = ((options&EigVecMask)==0) || ((options&EigVecMask)==ComputeEigenvectors);\n\n  // Compute the cholesky decomposition of matB = L L' = U'U\n  LLT<MatrixType> cholB(matB);\n\n  int type = (options&GenEigMask);\n  if(type==0)\n    type = Ax_lBx;\n\n  if(type==Ax_lBx)\n  {\n    // compute C = inv(L) A inv(L')\n    MatrixType matC = matA.template selfadjointView<Lower>();\n    cholB.matrixL().template solveInPlace<OnTheLeft>(matC);\n    cholB.matrixU().template solveInPlace<OnTheRight>(matC);\n\n    Base::compute(matC, computeEigVecs ? ComputeEigenvectors : EigenvaluesOnly );\n\n    // transform back the eigen vectors: evecs = inv(U) * evecs\n    if(computeEigVecs)\n      cholB.matrixU().solveInPlace(Base::m_eivec);\n  }\n  else if(type==ABx_lx)\n  {\n    // compute C = L' A L\n    MatrixType matC = matA.template selfadjointView<Lower>();\n    matC = matC * cholB.matrixL();\n    matC = cholB.matrixU() * matC;\n\n    Base::compute(matC, computeEigVecs ? ComputeEigenvectors : EigenvaluesOnly);\n\n    // transform back the eigen vectors: evecs = inv(U) * evecs\n    if(computeEigVecs)\n      cholB.matrixU().solveInPlace(Base::m_eivec);\n  }\n  else if(type==BAx_lx)\n  {\n    // compute C = L' A L\n    MatrixType matC = matA.template selfadjointView<Lower>();\n    matC = matC * cholB.matrixL();\n    matC = cholB.matrixU() * matC;\n\n    Base::compute(matC, computeEigVecs ? ComputeEigenvectors : EigenvaluesOnly);\n\n    // transform back the eigen vectors: evecs = L * evecs\n    if(computeEigVecs)\n      Base::m_eivec = cholB.matrixL() * Base::m_eivec;\n  }\n\n  return *this;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_GENERALIZEDSELFADJOINTEIGENSOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/HessenbergDecomposition.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_HESSENBERGDECOMPOSITION_H\n#define EIGEN_HESSENBERGDECOMPOSITION_H\n\nnamespace Eigen { \n\nnamespace internal {\n  \ntemplate<typename MatrixType> struct HessenbergDecompositionMatrixHReturnType;\ntemplate<typename MatrixType>\nstruct traits<HessenbergDecompositionMatrixHReturnType<MatrixType> >\n{\n  typedef MatrixType ReturnType;\n};\n\n}\n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class HessenbergDecomposition\n  *\n  * \\brief Reduces a square matrix to Hessenberg form by an orthogonal similarity transformation\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the Hessenberg decomposition\n  *\n  * This class performs an Hessenberg decomposition of a matrix \\f$ A \\f$. In\n  * the real case, the Hessenberg decomposition consists of an orthogonal\n  * matrix \\f$ Q \\f$ and a Hessenberg matrix \\f$ H \\f$ such that \\f$ A = Q H\n  * Q^T \\f$. An orthogonal matrix is a matrix whose inverse equals its\n  * transpose (\\f$ Q^{-1} = Q^T \\f$). A Hessenberg matrix has zeros below the\n  * subdiagonal, so it is almost upper triangular. The Hessenberg decomposition\n  * of a complex matrix is \\f$ A = Q H Q^* \\f$ with \\f$ Q \\f$ unitary (that is,\n  * \\f$ Q^{-1} = Q^* \\f$).\n  *\n  * Call the function compute() to compute the Hessenberg decomposition of a\n  * given matrix. Alternatively, you can use the\n  * HessenbergDecomposition(const MatrixType&) constructor which computes the\n  * Hessenberg decomposition at construction time. Once the decomposition is\n  * computed, you can use the matrixH() and matrixQ() functions to construct\n  * the matrices H and Q in the decomposition.\n  *\n  * The documentation for matrixH() contains an example of the typical use of\n  * this class.\n  *\n  * \\sa class ComplexSchur, class Tridiagonalization, \\ref QR_Module \"QR Module\"\n  */\ntemplate<typename _MatrixType> class HessenbergDecomposition\n{\n  public:\n\n    /** \\brief Synonym for the template parameter \\p _MatrixType. */\n    typedef _MatrixType MatrixType;\n\n    enum {\n      Size = MatrixType::RowsAtCompileTime,\n      SizeMinusOne = Size == Dynamic ? Dynamic : Size - 1,\n      Options = MatrixType::Options,\n      MaxSize = MatrixType::MaxRowsAtCompileTime,\n      MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : MaxSize - 1\n    };\n\n    /** \\brief Scalar type for matrices of type #MatrixType. */\n    typedef typename MatrixType::Scalar Scalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    /** \\brief Type for vector of Householder coefficients.\n      *\n      * This is column vector with entries of type #Scalar. The length of the\n      * vector is one less than the size of #MatrixType, if it is a fixed-side\n      * type.\n      */\n    typedef Matrix<Scalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> CoeffVectorType;\n\n    /** \\brief Return type of matrixQ() */\n    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename CoeffVectorType::ConjugateReturnType>::type> HouseholderSequenceType;\n    \n    typedef internal::HessenbergDecompositionMatrixHReturnType<MatrixType> MatrixHReturnType;\n\n    /** \\brief Default constructor; the decomposition will be computed later.\n      *\n      * \\param [in] size  The size of the matrix whose Hessenberg decomposition will be computed.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via compute().  The \\p size parameter is only\n      * used as a hint. It is not an error to give a wrong \\p size, but it may\n      * impair performance.\n      *\n      * \\sa compute() for an example.\n      */\n    explicit HessenbergDecomposition(Index size = Size==Dynamic ? 2 : Size)\n      : m_matrix(size,size),\n        m_temp(size),\n        m_isInitialized(false)\n    {\n      if(size>1)\n        m_hCoeffs.resize(size-1);\n    }\n\n    /** \\brief Constructor; computes Hessenberg decomposition of given matrix.\n      *\n      * \\param[in]  matrix  Square matrix whose Hessenberg decomposition is to be computed.\n      *\n      * This constructor calls compute() to compute the Hessenberg\n      * decomposition.\n      *\n      * \\sa matrixH() for an example.\n      */\n    template<typename InputType>\n    explicit HessenbergDecomposition(const EigenBase<InputType>& matrix)\n      : m_matrix(matrix.derived()),\n        m_temp(matrix.rows()),\n        m_isInitialized(false)\n    {\n      if(matrix.rows()<2)\n      {\n        m_isInitialized = true;\n        return;\n      }\n      m_hCoeffs.resize(matrix.rows()-1,1);\n      _compute(m_matrix, m_hCoeffs, m_temp);\n      m_isInitialized = true;\n    }\n\n    /** \\brief Computes Hessenberg decomposition of given matrix.\n      *\n      * \\param[in]  matrix  Square matrix whose Hessenberg decomposition is to be computed.\n      * \\returns    Reference to \\c *this\n      *\n      * The Hessenberg decomposition is computed by bringing the columns of the\n      * matrix successively in the required form using Householder reflections\n      * (see, e.g., Algorithm 7.4.2 in Golub \\& Van Loan, <i>%Matrix\n      * Computations</i>). The cost is \\f$ 10n^3/3 \\f$ flops, where \\f$ n \\f$\n      * denotes the size of the given matrix.\n      *\n      * This method reuses of the allocated data in the HessenbergDecomposition\n      * object.\n      *\n      * Example: \\include HessenbergDecomposition_compute.cpp\n      * Output: \\verbinclude HessenbergDecomposition_compute.out\n      */\n    template<typename InputType>\n    HessenbergDecomposition& compute(const EigenBase<InputType>& matrix)\n    {\n      m_matrix = matrix.derived();\n      if(matrix.rows()<2)\n      {\n        m_isInitialized = true;\n        return *this;\n      }\n      m_hCoeffs.resize(matrix.rows()-1,1);\n      _compute(m_matrix, m_hCoeffs, m_temp);\n      m_isInitialized = true;\n      return *this;\n    }\n\n    /** \\brief Returns the Householder coefficients.\n      *\n      * \\returns a const reference to the vector of Householder coefficients\n      *\n      * \\pre Either the constructor HessenbergDecomposition(const MatrixType&)\n      * or the member function compute(const MatrixType&) has been called\n      * before to compute the Hessenberg decomposition of a matrix.\n      *\n      * The Householder coefficients allow the reconstruction of the matrix\n      * \\f$ Q \\f$ in the Hessenberg decomposition from the packed data.\n      *\n      * \\sa packedMatrix(), \\ref Householder_Module \"Householder module\"\n      */\n    const CoeffVectorType& householderCoefficients() const\n    {\n      eigen_assert(m_isInitialized && \"HessenbergDecomposition is not initialized.\");\n      return m_hCoeffs;\n    }\n\n    /** \\brief Returns the internal representation of the decomposition\n      *\n      *\t\\returns a const reference to a matrix with the internal representation\n      *\t         of the decomposition.\n      *\n      * \\pre Either the constructor HessenbergDecomposition(const MatrixType&)\n      * or the member function compute(const MatrixType&) has been called\n      * before to compute the Hessenberg decomposition of a matrix.\n      *\n      * The returned matrix contains the following information:\n      *  - the upper part and lower sub-diagonal represent the Hessenberg matrix H\n      *  - the rest of the lower part contains the Householder vectors that, combined with\n      *    Householder coefficients returned by householderCoefficients(),\n      *    allows to reconstruct the matrix Q as\n      *       \\f$ Q = H_{N-1} \\ldots H_1 H_0 \\f$.\n      *    Here, the matrices \\f$ H_i \\f$ are the Householder transformations\n      *       \\f$ H_i = (I - h_i v_i v_i^T) \\f$\n      *    where \\f$ h_i \\f$ is the \\f$ i \\f$th Householder coefficient and\n      *    \\f$ v_i \\f$ is the Householder vector defined by\n      *       \\f$ v_i = [ 0, \\ldots, 0, 1, M(i+2,i), \\ldots, M(N-1,i) ]^T \\f$\n      *    with M the matrix returned by this function.\n      *\n      * See LAPACK for further details on this packed storage.\n      *\n      * Example: \\include HessenbergDecomposition_packedMatrix.cpp\n      * Output: \\verbinclude HessenbergDecomposition_packedMatrix.out\n      *\n      * \\sa householderCoefficients()\n      */\n    const MatrixType& packedMatrix() const\n    {\n      eigen_assert(m_isInitialized && \"HessenbergDecomposition is not initialized.\");\n      return m_matrix;\n    }\n\n    /** \\brief Reconstructs the orthogonal matrix Q in the decomposition\n      *\n      * \\returns object representing the matrix Q\n      *\n      * \\pre Either the constructor HessenbergDecomposition(const MatrixType&)\n      * or the member function compute(const MatrixType&) has been called\n      * before to compute the Hessenberg decomposition of a matrix.\n      *\n      * This function returns a light-weight object of template class\n      * HouseholderSequence. You can either apply it directly to a matrix or\n      * you can convert it to a matrix of type #MatrixType.\n      *\n      * \\sa matrixH() for an example, class HouseholderSequence\n      */\n    HouseholderSequenceType matrixQ() const\n    {\n      eigen_assert(m_isInitialized && \"HessenbergDecomposition is not initialized.\");\n      return HouseholderSequenceType(m_matrix, m_hCoeffs.conjugate())\n             .setLength(m_matrix.rows() - 1)\n             .setShift(1);\n    }\n\n    /** \\brief Constructs the Hessenberg matrix H in the decomposition\n      *\n      * \\returns expression object representing the matrix H\n      *\n      * \\pre Either the constructor HessenbergDecomposition(const MatrixType&)\n      * or the member function compute(const MatrixType&) has been called\n      * before to compute the Hessenberg decomposition of a matrix.\n      *\n      * The object returned by this function constructs the Hessenberg matrix H\n      * when it is assigned to a matrix or otherwise evaluated. The matrix H is\n      * constructed from the packed matrix as returned by packedMatrix(): The\n      * upper part (including the subdiagonal) of the packed matrix contains\n      * the matrix H. It may sometimes be better to directly use the packed\n      * matrix instead of constructing the matrix H.\n      *\n      * Example: \\include HessenbergDecomposition_matrixH.cpp\n      * Output: \\verbinclude HessenbergDecomposition_matrixH.out\n      *\n      * \\sa matrixQ(), packedMatrix()\n      */\n    MatrixHReturnType matrixH() const\n    {\n      eigen_assert(m_isInitialized && \"HessenbergDecomposition is not initialized.\");\n      return MatrixHReturnType(*this);\n    }\n\n  private:\n\n    typedef Matrix<Scalar, 1, Size, int(Options) | int(RowMajor), 1, MaxSize> VectorType;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    static void _compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp);\n\n  protected:\n    MatrixType m_matrix;\n    CoeffVectorType m_hCoeffs;\n    VectorType m_temp;\n    bool m_isInitialized;\n};\n\n/** \\internal\n  * Performs a tridiagonal decomposition of \\a matA in place.\n  *\n  * \\param matA the input selfadjoint matrix\n  * \\param hCoeffs returned Householder coefficients\n  *\n  * The result is written in the lower triangular part of \\a matA.\n  *\n  * Implemented from Golub's \"%Matrix Computations\", algorithm 8.3.1.\n  *\n  * \\sa packedMatrix()\n  */\ntemplate<typename MatrixType>\nvoid HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp)\n{\n  eigen_assert(matA.rows()==matA.cols());\n  Index n = matA.rows();\n  temp.resize(n);\n  for (Index i = 0; i<n-1; ++i)\n  {\n    // let's consider the vector v = i-th column starting at position i+1\n    Index remainingSize = n-i-1;\n    RealScalar beta;\n    Scalar h;\n    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);\n    matA.col(i).coeffRef(i+1) = beta;\n    hCoeffs.coeffRef(i) = h;\n\n    // Apply similarity transformation to remaining columns,\n    // i.e., compute A = H A H'\n\n    // A = H A\n    matA.bottomRightCorner(remainingSize, remainingSize)\n        .applyHouseholderOnTheLeft(matA.col(i).tail(remainingSize-1), h, &temp.coeffRef(0));\n\n    // A = A H'\n    matA.rightCols(remainingSize)\n        .applyHouseholderOnTheRight(matA.col(i).tail(remainingSize-1), numext::conj(h), &temp.coeffRef(0));\n  }\n}\n\nnamespace internal {\n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\brief Expression type for return value of HessenbergDecomposition::matrixH()\n  *\n  * \\tparam MatrixType type of matrix in the Hessenberg decomposition\n  *\n  * Objects of this type represent the Hessenberg matrix in the Hessenberg\n  * decomposition of some matrix. The object holds a reference to the\n  * HessenbergDecomposition class until the it is assigned or evaluated for\n  * some other reason (the reference should remain valid during the life time\n  * of this object). This class is the return type of\n  * HessenbergDecomposition::matrixH(); there is probably no other use for this\n  * class.\n  */\ntemplate<typename MatrixType> struct HessenbergDecompositionMatrixHReturnType\n: public ReturnByValue<HessenbergDecompositionMatrixHReturnType<MatrixType> >\n{\n  public:\n    /** \\brief Constructor.\n      *\n      * \\param[in] hess  Hessenberg decomposition\n      */\n    HessenbergDecompositionMatrixHReturnType(const HessenbergDecomposition<MatrixType>& hess) : m_hess(hess) { }\n\n    /** \\brief Hessenberg matrix in decomposition.\n      *\n      * \\param[out] result  Hessenberg matrix in decomposition \\p hess which\n      *                     was passed to the constructor\n      */\n    template <typename ResultType>\n    inline void evalTo(ResultType& result) const\n    {\n      result = m_hess.packedMatrix();\n      Index n = result.rows();\n      if (n>2)\n        result.bottomLeftCorner(n-2, n-2).template triangularView<Lower>().setZero();\n    }\n\n    Index rows() const { return m_hess.packedMatrix().rows(); }\n    Index cols() const { return m_hess.packedMatrix().cols(); }\n\n  protected:\n    const HessenbergDecomposition<MatrixType>& m_hess;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_HESSENBERGDECOMPOSITION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MATRIXBASEEIGENVALUES_H\n#define EIGEN_MATRIXBASEEIGENVALUES_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Derived, bool IsComplex>\nstruct eigenvalues_selector\n{\n  // this is the implementation for the case IsComplex = true\n  static inline typename MatrixBase<Derived>::EigenvaluesReturnType const\n  run(const MatrixBase<Derived>& m)\n  {\n    typedef typename Derived::PlainObject PlainObject;\n    PlainObject m_eval(m);\n    return ComplexEigenSolver<PlainObject>(m_eval, false).eigenvalues();\n  }\n};\n\ntemplate<typename Derived>\nstruct eigenvalues_selector<Derived, false>\n{\n  static inline typename MatrixBase<Derived>::EigenvaluesReturnType const\n  run(const MatrixBase<Derived>& m)\n  {\n    typedef typename Derived::PlainObject PlainObject;\n    PlainObject m_eval(m);\n    return EigenSolver<PlainObject>(m_eval, false).eigenvalues();\n  }\n};\n\n} // end namespace internal\n\n/** \\brief Computes the eigenvalues of a matrix \n  * \\returns Column vector containing the eigenvalues.\n  *\n  * \\eigenvalues_module\n  * This function computes the eigenvalues with the help of the EigenSolver\n  * class (for real matrices) or the ComplexEigenSolver class (for complex\n  * matrices). \n  *\n  * The eigenvalues are repeated according to their algebraic multiplicity,\n  * so there are as many eigenvalues as rows in the matrix.\n  *\n  * The SelfAdjointView class provides a better algorithm for selfadjoint\n  * matrices.\n  *\n  * Example: \\include MatrixBase_eigenvalues.cpp\n  * Output: \\verbinclude MatrixBase_eigenvalues.out\n  *\n  * \\sa EigenSolver::eigenvalues(), ComplexEigenSolver::eigenvalues(),\n  *     SelfAdjointView::eigenvalues()\n  */\ntemplate<typename Derived>\ninline typename MatrixBase<Derived>::EigenvaluesReturnType\nMatrixBase<Derived>::eigenvalues() const\n{\n  return internal::eigenvalues_selector<Derived, NumTraits<Scalar>::IsComplex>::run(derived());\n}\n\n/** \\brief Computes the eigenvalues of a matrix\n  * \\returns Column vector containing the eigenvalues.\n  *\n  * \\eigenvalues_module\n  * This function computes the eigenvalues with the help of the\n  * SelfAdjointEigenSolver class.  The eigenvalues are repeated according to\n  * their algebraic multiplicity, so there are as many eigenvalues as rows in\n  * the matrix.\n  *\n  * Example: \\include SelfAdjointView_eigenvalues.cpp\n  * Output: \\verbinclude SelfAdjointView_eigenvalues.out\n  *\n  * \\sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues()\n  */\ntemplate<typename MatrixType, unsigned int UpLo> \nEIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType\nSelfAdjointView<MatrixType, UpLo>::eigenvalues() const\n{\n  PlainObject thisAsMatrix(*this);\n  return SelfAdjointEigenSolver<PlainObject>(thisAsMatrix, false).eigenvalues();\n}\n\n\n\n/** \\brief Computes the L2 operator norm\n  * \\returns Operator norm of the matrix.\n  *\n  * \\eigenvalues_module\n  * This function computes the L2 operator norm of a matrix, which is also\n  * known as the spectral norm. The norm of a matrix \\f$ A \\f$ is defined to be\n  * \\f[ \\|A\\|_2 = \\max_x \\frac{\\|Ax\\|_2}{\\|x\\|_2} \\f]\n  * where the maximum is over all vectors and the norm on the right is the\n  * Euclidean vector norm. The norm equals the largest singular value, which is\n  * the square root of the largest eigenvalue of the positive semi-definite\n  * matrix \\f$ A^*A \\f$.\n  *\n  * The current implementation uses the eigenvalues of \\f$ A^*A \\f$, as computed\n  * by SelfAdjointView::eigenvalues(), to compute the operator norm of a\n  * matrix.  The SelfAdjointView class provides a better algorithm for\n  * selfadjoint matrices.\n  *\n  * Example: \\include MatrixBase_operatorNorm.cpp\n  * Output: \\verbinclude MatrixBase_operatorNorm.out\n  *\n  * \\sa SelfAdjointView::eigenvalues(), SelfAdjointView::operatorNorm()\n  */\ntemplate<typename Derived>\ninline typename MatrixBase<Derived>::RealScalar\nMatrixBase<Derived>::operatorNorm() const\n{\n  using std::sqrt;\n  typename Derived::PlainObject m_eval(derived());\n  // FIXME if it is really guaranteed that the eigenvalues are already sorted,\n  // then we don't need to compute a maxCoeff() here, comparing the 1st and last ones is enough.\n  return sqrt((m_eval*m_eval.adjoint())\n                 .eval()\n\t\t .template selfadjointView<Lower>()\n\t\t .eigenvalues()\n\t\t .maxCoeff()\n\t\t );\n}\n\n/** \\brief Computes the L2 operator norm\n  * \\returns Operator norm of the matrix.\n  *\n  * \\eigenvalues_module\n  * This function computes the L2 operator norm of a self-adjoint matrix. For a\n  * self-adjoint matrix, the operator norm is the largest eigenvalue.\n  *\n  * The current implementation uses the eigenvalues of the matrix, as computed\n  * by eigenvalues(), to compute the operator norm of the matrix.\n  *\n  * Example: \\include SelfAdjointView_operatorNorm.cpp\n  * Output: \\verbinclude SelfAdjointView_operatorNorm.out\n  *\n  * \\sa eigenvalues(), MatrixBase::operatorNorm()\n  */\ntemplate<typename MatrixType, unsigned int UpLo>\nEIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar\nSelfAdjointView<MatrixType, UpLo>::operatorNorm() const\n{\n  return eigenvalues().cwiseAbs().maxCoeff();\n}\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/RealQZ.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Alexey Korepanov <kaikaikai@yandex.ru>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REAL_QZ_H\n#define EIGEN_REAL_QZ_H\n\nnamespace Eigen {\n\n  /** \\eigenvalues_module \\ingroup Eigenvalues_Module\n   *\n   *\n   * \\class RealQZ\n   *\n   * \\brief Performs a real QZ decomposition of a pair of square matrices\n   *\n   * \\tparam _MatrixType the type of the matrix of which we are computing the\n   * real QZ decomposition; this is expected to be an instantiation of the\n   * Matrix class template.\n   *\n   * Given a real square matrices A and B, this class computes the real QZ\n   * decomposition: \\f$ A = Q S Z \\f$, \\f$ B = Q T Z \\f$ where Q and Z are\n   * real orthogonal matrixes, T is upper-triangular matrix, and S is upper\n   * quasi-triangular matrix. An orthogonal matrix is a matrix whose\n   * inverse is equal to its transpose, \\f$ U^{-1} = U^T \\f$. A quasi-triangular\n   * matrix is a block-triangular matrix whose diagonal consists of 1-by-1\n   * blocks and 2-by-2 blocks where further reduction is impossible due to\n   * complex eigenvalues. \n   *\n   * The eigenvalues of the pencil \\f$ A - z B \\f$ can be obtained from\n   * 1x1 and 2x2 blocks on the diagonals of S and T.\n   *\n   * Call the function compute() to compute the real QZ decomposition of a\n   * given pair of matrices. Alternatively, you can use the \n   * RealQZ(const MatrixType& B, const MatrixType& B, bool computeQZ)\n   * constructor which computes the real QZ decomposition at construction\n   * time. Once the decomposition is computed, you can use the matrixS(),\n   * matrixT(), matrixQ() and matrixZ() functions to retrieve the matrices\n   * S, T, Q and Z in the decomposition. If computeQZ==false, some time\n   * is saved by not computing matrices Q and Z.\n   *\n   * Example: \\include RealQZ_compute.cpp\n   * Output: \\include RealQZ_compute.out\n   *\n   * \\note The implementation is based on the algorithm in \"Matrix Computations\"\n   * by Gene H. Golub and Charles F. Van Loan, and a paper \"An algorithm for\n   * generalized eigenvalue problems\" by C.B.Moler and G.W.Stewart.\n   *\n   * \\sa class RealSchur, class ComplexSchur, class EigenSolver, class ComplexEigenSolver\n   */\n\n  template<typename _MatrixType> class RealQZ\n  {\n    public:\n      typedef _MatrixType MatrixType;\n      enum {\n        RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n        ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n        Options = MatrixType::Options,\n        MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n        MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n      };\n      typedef typename MatrixType::Scalar Scalar;\n      typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;\n      typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n      typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;\n      typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;\n\n      /** \\brief Default constructor.\n       *\n       * \\param [in] size  Positive integer, size of the matrix whose QZ decomposition will be computed.\n       *\n       * The default constructor is useful in cases in which the user intends to\n       * perform decompositions via compute().  The \\p size parameter is only\n       * used as a hint. It is not an error to give a wrong \\p size, but it may\n       * impair performance.\n       *\n       * \\sa compute() for an example.\n       */\n      explicit RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) :\n        m_S(size, size),\n        m_T(size, size),\n        m_Q(size, size),\n        m_Z(size, size),\n        m_workspace(size*2),\n        m_maxIters(400),\n        m_isInitialized(false),\n        m_computeQZ(true)\n      {}\n\n      /** \\brief Constructor; computes real QZ decomposition of given matrices\n       * \n       * \\param[in]  A          Matrix A.\n       * \\param[in]  B          Matrix B.\n       * \\param[in]  computeQZ  If false, A and Z are not computed.\n       *\n       * This constructor calls compute() to compute the QZ decomposition.\n       */\n      RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :\n        m_S(A.rows(),A.cols()),\n        m_T(A.rows(),A.cols()),\n        m_Q(A.rows(),A.cols()),\n        m_Z(A.rows(),A.cols()),\n        m_workspace(A.rows()*2),\n        m_maxIters(400),\n        m_isInitialized(false),\n        m_computeQZ(true)\n      {\n        compute(A, B, computeQZ);\n      }\n\n      /** \\brief Returns matrix Q in the QZ decomposition. \n       *\n       * \\returns A const reference to the matrix Q.\n       */\n      const MatrixType& matrixQ() const {\n        eigen_assert(m_isInitialized && \"RealQZ is not initialized.\");\n        eigen_assert(m_computeQZ && \"The matrices Q and Z have not been computed during the QZ decomposition.\");\n        return m_Q;\n      }\n\n      /** \\brief Returns matrix Z in the QZ decomposition. \n       *\n       * \\returns A const reference to the matrix Z.\n       */\n      const MatrixType& matrixZ() const {\n        eigen_assert(m_isInitialized && \"RealQZ is not initialized.\");\n        eigen_assert(m_computeQZ && \"The matrices Q and Z have not been computed during the QZ decomposition.\");\n        return m_Z;\n      }\n\n      /** \\brief Returns matrix S in the QZ decomposition. \n       *\n       * \\returns A const reference to the matrix S.\n       */\n      const MatrixType& matrixS() const {\n        eigen_assert(m_isInitialized && \"RealQZ is not initialized.\");\n        return m_S;\n      }\n\n      /** \\brief Returns matrix S in the QZ decomposition. \n       *\n       * \\returns A const reference to the matrix S.\n       */\n      const MatrixType& matrixT() const {\n        eigen_assert(m_isInitialized && \"RealQZ is not initialized.\");\n        return m_T;\n      }\n\n      /** \\brief Computes QZ decomposition of given matrix. \n       * \n       * \\param[in]  A          Matrix A.\n       * \\param[in]  B          Matrix B.\n       * \\param[in]  computeQZ  If false, A and Z are not computed.\n       * \\returns    Reference to \\c *this\n       */\n      RealQZ& compute(const MatrixType& A, const MatrixType& B, bool computeQZ = true);\n\n      /** \\brief Reports whether previous computation was successful.\n       *\n       * \\returns \\c Success if computation was successful, \\c NoConvergence otherwise.\n       */\n      ComputationInfo info() const\n      {\n        eigen_assert(m_isInitialized && \"RealQZ is not initialized.\");\n        return m_info;\n      }\n\n      /** \\brief Returns number of performed QR-like iterations.\n      */\n      Index iterations() const\n      {\n        eigen_assert(m_isInitialized && \"RealQZ is not initialized.\");\n        return m_global_iter;\n      }\n\n      /** Sets the maximal number of iterations allowed to converge to one eigenvalue\n       * or decouple the problem.\n      */\n      RealQZ& setMaxIterations(Index maxIters)\n      {\n        m_maxIters = maxIters;\n        return *this;\n      }\n\n    private:\n\n      MatrixType m_S, m_T, m_Q, m_Z;\n      Matrix<Scalar,Dynamic,1> m_workspace;\n      ComputationInfo m_info;\n      Index m_maxIters;\n      bool m_isInitialized;\n      bool m_computeQZ;\n      Scalar m_normOfT, m_normOfS;\n      Index m_global_iter;\n\n      typedef Matrix<Scalar,3,1> Vector3s;\n      typedef Matrix<Scalar,2,1> Vector2s;\n      typedef Matrix<Scalar,2,2> Matrix2s;\n      typedef JacobiRotation<Scalar> JRs;\n\n      void hessenbergTriangular();\n      void computeNorms();\n      Index findSmallSubdiagEntry(Index iu);\n      Index findSmallDiagEntry(Index f, Index l);\n      void splitOffTwoRows(Index i);\n      void pushDownZero(Index z, Index f, Index l);\n      void step(Index f, Index l, Index iter);\n\n  }; // RealQZ\n\n  /** \\internal Reduces S and T to upper Hessenberg - triangular form */\n  template<typename MatrixType>\n    void RealQZ<MatrixType>::hessenbergTriangular()\n    {\n\n      const Index dim = m_S.cols();\n\n      // perform QR decomposition of T, overwrite T with R, save Q\n      HouseholderQR<MatrixType> qrT(m_T);\n      m_T = qrT.matrixQR();\n      m_T.template triangularView<StrictlyLower>().setZero();\n      m_Q = qrT.householderQ();\n      // overwrite S with Q* S\n      m_S.applyOnTheLeft(m_Q.adjoint());\n      // init Z as Identity\n      if (m_computeQZ)\n        m_Z = MatrixType::Identity(dim,dim);\n      // reduce S to upper Hessenberg with Givens rotations\n      for (Index j=0; j<=dim-3; j++) {\n        for (Index i=dim-1; i>=j+2; i--) {\n          JRs G;\n          // kill S(i,j)\n          if(m_S.coeff(i,j) != 0)\n          {\n            G.makeGivens(m_S.coeff(i-1,j), m_S.coeff(i,j), &m_S.coeffRef(i-1, j));\n            m_S.coeffRef(i,j) = Scalar(0.0);\n            m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint());\n            m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint());\n            // update Q\n            if (m_computeQZ)\n              m_Q.applyOnTheRight(i-1,i,G);\n          }\n          // kill T(i,i-1)\n          if(m_T.coeff(i,i-1)!=Scalar(0))\n          {\n            G.makeGivens(m_T.coeff(i,i), m_T.coeff(i,i-1), &m_T.coeffRef(i,i));\n            m_T.coeffRef(i,i-1) = Scalar(0.0);\n            m_S.applyOnTheRight(i,i-1,G);\n            m_T.topRows(i).applyOnTheRight(i,i-1,G);\n            // update Z\n            if (m_computeQZ)\n              m_Z.applyOnTheLeft(i,i-1,G.adjoint());\n          }\n        }\n      }\n    }\n\n  /** \\internal Computes vector L1 norms of S and T when in Hessenberg-Triangular form already */\n  template<typename MatrixType>\n    inline void RealQZ<MatrixType>::computeNorms()\n    {\n      const Index size = m_S.cols();\n      m_normOfS = Scalar(0.0);\n      m_normOfT = Scalar(0.0);\n      for (Index j = 0; j < size; ++j)\n      {\n        m_normOfS += m_S.col(j).segment(0, (std::min)(size,j+2)).cwiseAbs().sum();\n        m_normOfT += m_T.row(j).segment(j, size - j).cwiseAbs().sum();\n      }\n    }\n\n\n  /** \\internal Look for single small sub-diagonal element S(res, res-1) and return res (or 0) */\n  template<typename MatrixType>\n    inline Index RealQZ<MatrixType>::findSmallSubdiagEntry(Index iu)\n    {\n      using std::abs;\n      Index res = iu;\n      while (res > 0)\n      {\n        Scalar s = abs(m_S.coeff(res-1,res-1)) + abs(m_S.coeff(res,res));\n        if (s == Scalar(0.0))\n          s = m_normOfS;\n        if (abs(m_S.coeff(res,res-1)) < NumTraits<Scalar>::epsilon() * s)\n          break;\n        res--;\n      }\n      return res;\n    }\n\n  /** \\internal Look for single small diagonal element T(res, res) for res between f and l, and return res (or f-1)  */\n  template<typename MatrixType>\n    inline Index RealQZ<MatrixType>::findSmallDiagEntry(Index f, Index l)\n    {\n      using std::abs;\n      Index res = l;\n      while (res >= f) {\n        if (abs(m_T.coeff(res,res)) <= NumTraits<Scalar>::epsilon() * m_normOfT)\n          break;\n        res--;\n      }\n      return res;\n    }\n\n  /** \\internal decouple 2x2 diagonal block in rows i, i+1 if eigenvalues are real */\n  template<typename MatrixType>\n    inline void RealQZ<MatrixType>::splitOffTwoRows(Index i)\n    {\n      using std::abs;\n      using std::sqrt;\n      const Index dim=m_S.cols();\n      if (abs(m_S.coeff(i+1,i))==Scalar(0))\n        return;\n      Index j = findSmallDiagEntry(i,i+1);\n      if (j==i-1)\n      {\n        // block of (S T^{-1})\n        Matrix2s STi = m_T.template block<2,2>(i,i).template triangularView<Upper>().\n          template solve<OnTheRight>(m_S.template block<2,2>(i,i));\n        Scalar p = Scalar(0.5)*(STi(0,0)-STi(1,1));\n        Scalar q = p*p + STi(1,0)*STi(0,1);\n        if (q>=0) {\n          Scalar z = sqrt(q);\n          // one QR-like iteration for ABi - lambda I\n          // is enough - when we know exact eigenvalue in advance,\n          // convergence is immediate\n          JRs G;\n          if (p>=0)\n            G.makeGivens(p + z, STi(1,0));\n          else\n            G.makeGivens(p - z, STi(1,0));\n          m_S.rightCols(dim-i).applyOnTheLeft(i,i+1,G.adjoint());\n          m_T.rightCols(dim-i).applyOnTheLeft(i,i+1,G.adjoint());\n          // update Q\n          if (m_computeQZ)\n            m_Q.applyOnTheRight(i,i+1,G);\n\n          G.makeGivens(m_T.coeff(i+1,i+1), m_T.coeff(i+1,i));\n          m_S.topRows(i+2).applyOnTheRight(i+1,i,G);\n          m_T.topRows(i+2).applyOnTheRight(i+1,i,G);\n          // update Z\n          if (m_computeQZ)\n            m_Z.applyOnTheLeft(i+1,i,G.adjoint());\n\n          m_S.coeffRef(i+1,i) = Scalar(0.0);\n          m_T.coeffRef(i+1,i) = Scalar(0.0);\n        }\n      }\n      else\n      {\n        pushDownZero(j,i,i+1);\n      }\n    }\n\n  /** \\internal use zero in T(z,z) to zero S(l,l-1), working in block f..l */\n  template<typename MatrixType>\n    inline void RealQZ<MatrixType>::pushDownZero(Index z, Index f, Index l)\n    {\n      JRs G;\n      const Index dim = m_S.cols();\n      for (Index zz=z; zz<l; zz++)\n      {\n        // push 0 down\n        Index firstColS = zz>f ? (zz-1) : zz;\n        G.makeGivens(m_T.coeff(zz, zz+1), m_T.coeff(zz+1, zz+1));\n        m_S.rightCols(dim-firstColS).applyOnTheLeft(zz,zz+1,G.adjoint());\n        m_T.rightCols(dim-zz).applyOnTheLeft(zz,zz+1,G.adjoint());\n        m_T.coeffRef(zz+1,zz+1) = Scalar(0.0);\n        // update Q\n        if (m_computeQZ)\n          m_Q.applyOnTheRight(zz,zz+1,G);\n        // kill S(zz+1, zz-1)\n        if (zz>f)\n        {\n          G.makeGivens(m_S.coeff(zz+1, zz), m_S.coeff(zz+1,zz-1));\n          m_S.topRows(zz+2).applyOnTheRight(zz, zz-1,G);\n          m_T.topRows(zz+1).applyOnTheRight(zz, zz-1,G);\n          m_S.coeffRef(zz+1,zz-1) = Scalar(0.0);\n          // update Z\n          if (m_computeQZ)\n            m_Z.applyOnTheLeft(zz,zz-1,G.adjoint());\n        }\n      }\n      // finally kill S(l,l-1)\n      G.makeGivens(m_S.coeff(l,l), m_S.coeff(l,l-1));\n      m_S.applyOnTheRight(l,l-1,G);\n      m_T.applyOnTheRight(l,l-1,G);\n      m_S.coeffRef(l,l-1)=Scalar(0.0);\n      // update Z\n      if (m_computeQZ)\n        m_Z.applyOnTheLeft(l,l-1,G.adjoint());\n    }\n\n  /** \\internal QR-like iterative step for block f..l */\n  template<typename MatrixType>\n    inline void RealQZ<MatrixType>::step(Index f, Index l, Index iter)\n    {\n      using std::abs;\n      const Index dim = m_S.cols();\n\n      // x, y, z\n      Scalar x, y, z;\n      if (iter==10)\n      {\n        // Wilkinson ad hoc shift\n        const Scalar\n          a11=m_S.coeff(f+0,f+0), a12=m_S.coeff(f+0,f+1),\n          a21=m_S.coeff(f+1,f+0), a22=m_S.coeff(f+1,f+1), a32=m_S.coeff(f+2,f+1),\n          b12=m_T.coeff(f+0,f+1),\n          b11i=Scalar(1.0)/m_T.coeff(f+0,f+0),\n          b22i=Scalar(1.0)/m_T.coeff(f+1,f+1),\n          a87=m_S.coeff(l-1,l-2),\n          a98=m_S.coeff(l-0,l-1),\n          b77i=Scalar(1.0)/m_T.coeff(l-2,l-2),\n          b88i=Scalar(1.0)/m_T.coeff(l-1,l-1);\n        Scalar ss = abs(a87*b77i) + abs(a98*b88i),\n               lpl = Scalar(1.5)*ss,\n               ll = ss*ss;\n        x = ll + a11*a11*b11i*b11i - lpl*a11*b11i + a12*a21*b11i*b22i\n          - a11*a21*b12*b11i*b11i*b22i;\n        y = a11*a21*b11i*b11i - lpl*a21*b11i + a21*a22*b11i*b22i \n          - a21*a21*b12*b11i*b11i*b22i;\n        z = a21*a32*b11i*b22i;\n      }\n      else if (iter==16)\n      {\n        // another exceptional shift\n        x = m_S.coeff(f,f)/m_T.coeff(f,f)-m_S.coeff(l,l)/m_T.coeff(l,l) + m_S.coeff(l,l-1)*m_T.coeff(l-1,l) /\n          (m_T.coeff(l-1,l-1)*m_T.coeff(l,l));\n        y = m_S.coeff(f+1,f)/m_T.coeff(f,f);\n        z = 0;\n      }\n      else if (iter>23 && !(iter%8))\n      {\n        // extremely exceptional shift\n        x = internal::random<Scalar>(-1.0,1.0);\n        y = internal::random<Scalar>(-1.0,1.0);\n        z = internal::random<Scalar>(-1.0,1.0);\n      }\n      else\n      {\n        // Compute the shifts: (x,y,z,0...) = (AB^-1 - l1 I) (AB^-1 - l2 I) e1\n        // where l1 and l2 are the eigenvalues of the 2x2 matrix C = U V^-1 where\n        // U and V are 2x2 bottom right sub matrices of A and B. Thus:\n        //  = AB^-1AB^-1 + l1 l2 I - (l1+l2)(AB^-1)\n        //  = AB^-1AB^-1 + det(M) - tr(M)(AB^-1)\n        // Since we are only interested in having x, y, z with a correct ratio, we have:\n        const Scalar\n          a11 = m_S.coeff(f,f),     a12 = m_S.coeff(f,f+1),\n          a21 = m_S.coeff(f+1,f),   a22 = m_S.coeff(f+1,f+1),\n                                    a32 = m_S.coeff(f+2,f+1),\n\n          a88 = m_S.coeff(l-1,l-1), a89 = m_S.coeff(l-1,l),\n          a98 = m_S.coeff(l,l-1),   a99 = m_S.coeff(l,l),\n\n          b11 = m_T.coeff(f,f),     b12 = m_T.coeff(f,f+1),\n                                    b22 = m_T.coeff(f+1,f+1),\n\n          b88 = m_T.coeff(l-1,l-1), b89 = m_T.coeff(l-1,l),\n                                    b99 = m_T.coeff(l,l);\n\n        x = ( (a88/b88 - a11/b11)*(a99/b99 - a11/b11) - (a89/b99)*(a98/b88) + (a98/b88)*(b89/b99)*(a11/b11) ) * (b11/a21)\n          + a12/b22 - (a11/b11)*(b12/b22);\n        y = (a22/b22-a11/b11) - (a21/b11)*(b12/b22) - (a88/b88-a11/b11) - (a99/b99-a11/b11) + (a98/b88)*(b89/b99);\n        z = a32/b22;\n      }\n\n      JRs G;\n\n      for (Index k=f; k<=l-2; k++)\n      {\n        // variables for Householder reflections\n        Vector2s essential2;\n        Scalar tau, beta;\n\n        Vector3s hr(x,y,z);\n\n        // Q_k to annihilate S(k+1,k-1) and S(k+2,k-1)\n        hr.makeHouseholderInPlace(tau, beta);\n        essential2 = hr.template bottomRows<2>();\n        Index fc=(std::max)(k-1,Index(0));  // first col to update\n        m_S.template middleRows<3>(k).rightCols(dim-fc).applyHouseholderOnTheLeft(essential2, tau, m_workspace.data());\n        m_T.template middleRows<3>(k).rightCols(dim-fc).applyHouseholderOnTheLeft(essential2, tau, m_workspace.data());\n        if (m_computeQZ)\n          m_Q.template middleCols<3>(k).applyHouseholderOnTheRight(essential2, tau, m_workspace.data());\n        if (k>f)\n          m_S.coeffRef(k+2,k-1) = m_S.coeffRef(k+1,k-1) = Scalar(0.0);\n\n        // Z_{k1} to annihilate T(k+2,k+1) and T(k+2,k)\n        hr << m_T.coeff(k+2,k+2),m_T.coeff(k+2,k),m_T.coeff(k+2,k+1);\n        hr.makeHouseholderInPlace(tau, beta);\n        essential2 = hr.template bottomRows<2>();\n        {\n          Index lr = (std::min)(k+4,dim); // last row to update\n          Map<Matrix<Scalar,Dynamic,1> > tmp(m_workspace.data(),lr);\n          // S\n          tmp = m_S.template middleCols<2>(k).topRows(lr) * essential2;\n          tmp += m_S.col(k+2).head(lr);\n          m_S.col(k+2).head(lr) -= tau*tmp;\n          m_S.template middleCols<2>(k).topRows(lr) -= (tau*tmp) * essential2.adjoint();\n          // T\n          tmp = m_T.template middleCols<2>(k).topRows(lr) * essential2;\n          tmp += m_T.col(k+2).head(lr);\n          m_T.col(k+2).head(lr) -= tau*tmp;\n          m_T.template middleCols<2>(k).topRows(lr) -= (tau*tmp) * essential2.adjoint();\n        }\n        if (m_computeQZ)\n        {\n          // Z\n          Map<Matrix<Scalar,1,Dynamic> > tmp(m_workspace.data(),dim);\n          tmp = essential2.adjoint()*(m_Z.template middleRows<2>(k));\n          tmp += m_Z.row(k+2);\n          m_Z.row(k+2) -= tau*tmp;\n          m_Z.template middleRows<2>(k) -= essential2 * (tau*tmp);\n        }\n        m_T.coeffRef(k+2,k) = m_T.coeffRef(k+2,k+1) = Scalar(0.0);\n\n        // Z_{k2} to annihilate T(k+1,k)\n        G.makeGivens(m_T.coeff(k+1,k+1), m_T.coeff(k+1,k));\n        m_S.applyOnTheRight(k+1,k,G);\n        m_T.applyOnTheRight(k+1,k,G);\n        // update Z\n        if (m_computeQZ)\n          m_Z.applyOnTheLeft(k+1,k,G.adjoint());\n        m_T.coeffRef(k+1,k) = Scalar(0.0);\n\n        // update x,y,z\n        x = m_S.coeff(k+1,k);\n        y = m_S.coeff(k+2,k);\n        if (k < l-2)\n          z = m_S.coeff(k+3,k);\n      } // loop over k\n\n      // Q_{n-1} to annihilate y = S(l,l-2)\n      G.makeGivens(x,y);\n      m_S.applyOnTheLeft(l-1,l,G.adjoint());\n      m_T.applyOnTheLeft(l-1,l,G.adjoint());\n      if (m_computeQZ)\n        m_Q.applyOnTheRight(l-1,l,G);\n      m_S.coeffRef(l,l-2) = Scalar(0.0);\n\n      // Z_{n-1} to annihilate T(l,l-1)\n      G.makeGivens(m_T.coeff(l,l),m_T.coeff(l,l-1));\n      m_S.applyOnTheRight(l,l-1,G);\n      m_T.applyOnTheRight(l,l-1,G);\n      if (m_computeQZ)\n        m_Z.applyOnTheLeft(l,l-1,G.adjoint());\n      m_T.coeffRef(l,l-1) = Scalar(0.0);\n    }\n\n  template<typename MatrixType>\n    RealQZ<MatrixType>& RealQZ<MatrixType>::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ)\n    {\n\n      const Index dim = A_in.cols();\n\n      eigen_assert (A_in.rows()==dim && A_in.cols()==dim \n          && B_in.rows()==dim && B_in.cols()==dim \n          && \"Need square matrices of the same dimension\");\n\n      m_isInitialized = true;\n      m_computeQZ = computeQZ;\n      m_S = A_in; m_T = B_in;\n      m_workspace.resize(dim*2);\n      m_global_iter = 0;\n\n      // entrance point: hessenberg triangular decomposition\n      hessenbergTriangular();\n      // compute L1 vector norms of T, S into m_normOfS, m_normOfT\n      computeNorms();\n\n      Index l = dim-1, \n            f, \n            local_iter = 0;\n\n      while (l>0 && local_iter<m_maxIters)\n      {\n        f = findSmallSubdiagEntry(l);\n        // now rows and columns f..l (including) decouple from the rest of the problem\n        if (f>0) m_S.coeffRef(f,f-1) = Scalar(0.0);\n        if (f == l) // One root found\n        {\n          l--;\n          local_iter = 0;\n        }\n        else if (f == l-1) // Two roots found\n        {\n          splitOffTwoRows(f);\n          l -= 2;\n          local_iter = 0;\n        }\n        else // No convergence yet\n        {\n          // if there's zero on diagonal of T, we can isolate an eigenvalue with Givens rotations\n          Index z = findSmallDiagEntry(f,l);\n          if (z>=f)\n          {\n            // zero found\n            pushDownZero(z,f,l);\n          }\n          else\n          {\n            // We are sure now that S.block(f,f, l-f+1,l-f+1) is underuced upper-Hessenberg \n            // and T.block(f,f, l-f+1,l-f+1) is invertible uper-triangular, which allows to\n            // apply a QR-like iteration to rows and columns f..l.\n            step(f,l, local_iter);\n            local_iter++;\n            m_global_iter++;\n          }\n        }\n      }\n      // check if we converged before reaching iterations limit\n      m_info = (local_iter<m_maxIters) ? Success : NoConvergence;\n\n      // For each non triangular 2x2 diagonal block of S,\n      //    reduce the respective 2x2 diagonal block of T to positive diagonal form using 2x2 SVD.\n      // This step is not mandatory for QZ, but it does help further extraction of eigenvalues/eigenvectors,\n      // and is in par with Lapack/Matlab QZ.\n      if(m_info==Success)\n      {\n        for(Index i=0; i<dim-1; ++i)\n        {\n          if(m_S.coeff(i+1, i) != Scalar(0))\n          {\n            JacobiRotation<Scalar> j_left, j_right;\n            internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right);\n\n            // Apply resulting Jacobi rotations\n            m_S.applyOnTheLeft(i,i+1,j_left);\n            m_S.applyOnTheRight(i,i+1,j_right);\n            m_T.applyOnTheLeft(i,i+1,j_left);\n            m_T.applyOnTheRight(i,i+1,j_right);\n            m_T(i+1,i) = m_T(i,i+1) = Scalar(0);\n\n            if(m_computeQZ) {\n              m_Q.applyOnTheRight(i,i+1,j_left.transpose());\n              m_Z.applyOnTheLeft(i,i+1,j_right.transpose());\n            }\n\n            i++;\n          }\n        }\n      }\n\n      return *this;\n    } // end compute\n\n} // end namespace Eigen\n\n#endif //EIGEN_REAL_QZ\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/RealSchur.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REAL_SCHUR_H\n#define EIGEN_REAL_SCHUR_H\n\n#include \"./HessenbergDecomposition.h\"\n\nnamespace Eigen { \n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class RealSchur\n  *\n  * \\brief Performs a real Schur decomposition of a square matrix\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the\n  * real Schur decomposition; this is expected to be an instantiation of the\n  * Matrix class template.\n  *\n  * Given a real square matrix A, this class computes the real Schur\n  * decomposition: \\f$ A = U T U^T \\f$ where U is a real orthogonal matrix and\n  * T is a real quasi-triangular matrix. An orthogonal matrix is a matrix whose\n  * inverse is equal to its transpose, \\f$ U^{-1} = U^T \\f$. A quasi-triangular\n  * matrix is a block-triangular matrix whose diagonal consists of 1-by-1\n  * blocks and 2-by-2 blocks with complex eigenvalues. The eigenvalues of the\n  * blocks on the diagonal of T are the same as the eigenvalues of the matrix\n  * A, and thus the real Schur decomposition is used in EigenSolver to compute\n  * the eigendecomposition of a matrix.\n  *\n  * Call the function compute() to compute the real Schur decomposition of a\n  * given matrix. Alternatively, you can use the RealSchur(const MatrixType&, bool)\n  * constructor which computes the real Schur decomposition at construction\n  * time. Once the decomposition is computed, you can use the matrixU() and\n  * matrixT() functions to retrieve the matrices U and T in the decomposition.\n  *\n  * The documentation of RealSchur(const MatrixType&, bool) contains an example\n  * of the typical use of this class.\n  *\n  * \\note The implementation is adapted from\n  * <a href=\"http://math.nist.gov/javanumerics/jama/\">JAMA</a> (public domain).\n  * Their code is based on EISPACK.\n  *\n  * \\sa class ComplexSchur, class EigenSolver, class ComplexEigenSolver\n  */\ntemplate<typename _MatrixType> class RealSchur\n{\n  public:\n    typedef _MatrixType MatrixType;\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      Options = MatrixType::Options,\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    typedef typename MatrixType::Scalar Scalar;\n    typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;\n    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;\n\n    /** \\brief Default constructor.\n      *\n      * \\param [in] size  Positive integer, size of the matrix whose Schur decomposition will be computed.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via compute().  The \\p size parameter is only\n      * used as a hint. It is not an error to give a wrong \\p size, but it may\n      * impair performance.\n      *\n      * \\sa compute() for an example.\n      */\n    explicit RealSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)\n            : m_matT(size, size),\n              m_matU(size, size),\n              m_workspaceVector(size),\n              m_hess(size),\n              m_isInitialized(false),\n              m_matUisUptodate(false),\n              m_maxIters(-1)\n    { }\n\n    /** \\brief Constructor; computes real Schur decomposition of given matrix. \n      * \n      * \\param[in]  matrix    Square matrix whose Schur decomposition is to be computed.\n      * \\param[in]  computeU  If true, both T and U are computed; if false, only T is computed.\n      *\n      * This constructor calls compute() to compute the Schur decomposition.\n      *\n      * Example: \\include RealSchur_RealSchur_MatrixType.cpp\n      * Output: \\verbinclude RealSchur_RealSchur_MatrixType.out\n      */\n    template<typename InputType>\n    explicit RealSchur(const EigenBase<InputType>& matrix, bool computeU = true)\n            : m_matT(matrix.rows(),matrix.cols()),\n              m_matU(matrix.rows(),matrix.cols()),\n              m_workspaceVector(matrix.rows()),\n              m_hess(matrix.rows()),\n              m_isInitialized(false),\n              m_matUisUptodate(false),\n              m_maxIters(-1)\n    {\n      compute(matrix.derived(), computeU);\n    }\n\n    /** \\brief Returns the orthogonal matrix in the Schur decomposition. \n      *\n      * \\returns A const reference to the matrix U.\n      *\n      * \\pre Either the constructor RealSchur(const MatrixType&, bool) or the\n      * member function compute(const MatrixType&, bool) has been called before\n      * to compute the Schur decomposition of a matrix, and \\p computeU was set\n      * to true (the default value).\n      *\n      * \\sa RealSchur(const MatrixType&, bool) for an example\n      */\n    const MatrixType& matrixU() const\n    {\n      eigen_assert(m_isInitialized && \"RealSchur is not initialized.\");\n      eigen_assert(m_matUisUptodate && \"The matrix U has not been computed during the RealSchur decomposition.\");\n      return m_matU;\n    }\n\n    /** \\brief Returns the quasi-triangular matrix in the Schur decomposition. \n      *\n      * \\returns A const reference to the matrix T.\n      *\n      * \\pre Either the constructor RealSchur(const MatrixType&, bool) or the\n      * member function compute(const MatrixType&, bool) has been called before\n      * to compute the Schur decomposition of a matrix.\n      *\n      * \\sa RealSchur(const MatrixType&, bool) for an example\n      */\n    const MatrixType& matrixT() const\n    {\n      eigen_assert(m_isInitialized && \"RealSchur is not initialized.\");\n      return m_matT;\n    }\n  \n    /** \\brief Computes Schur decomposition of given matrix. \n      * \n      * \\param[in]  matrix    Square matrix whose Schur decomposition is to be computed.\n      * \\param[in]  computeU  If true, both T and U are computed; if false, only T is computed.\n      * \\returns    Reference to \\c *this\n      *\n      * The Schur decomposition is computed by first reducing the matrix to\n      * Hessenberg form using the class HessenbergDecomposition. The Hessenberg\n      * matrix is then reduced to triangular form by performing Francis QR\n      * iterations with implicit double shift. The cost of computing the Schur\n      * decomposition depends on the number of iterations; as a rough guide, it\n      * may be taken to be \\f$25n^3\\f$ flops if \\a computeU is true and\n      * \\f$10n^3\\f$ flops if \\a computeU is false.\n      *\n      * Example: \\include RealSchur_compute.cpp\n      * Output: \\verbinclude RealSchur_compute.out\n      *\n      * \\sa compute(const MatrixType&, bool, Index)\n      */\n    template<typename InputType>\n    RealSchur& compute(const EigenBase<InputType>& matrix, bool computeU = true);\n\n    /** \\brief Computes Schur decomposition of a Hessenberg matrix H = Z T Z^T\n     *  \\param[in] matrixH Matrix in Hessenberg form H\n     *  \\param[in] matrixQ orthogonal matrix Q that transform a matrix A to H : A = Q H Q^T\n     *  \\param computeU Computes the matriX U of the Schur vectors\n     * \\return Reference to \\c *this\n     * \n     *  This routine assumes that the matrix is already reduced in Hessenberg form matrixH\n     *  using either the class HessenbergDecomposition or another mean. \n     *  It computes the upper quasi-triangular matrix T of the Schur decomposition of H\n     *  When computeU is true, this routine computes the matrix U such that \n     *  A = U T U^T =  (QZ) T (QZ)^T = Q H Q^T where A is the initial matrix\n     * \n     * NOTE Q is referenced if computeU is true; so, if the initial orthogonal matrix\n     * is not available, the user should give an identity matrix (Q.setIdentity())\n     * \n     * \\sa compute(const MatrixType&, bool)\n     */\n    template<typename HessMatrixType, typename OrthMatrixType>\n    RealSchur& computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ,  bool computeU);\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful, \\c NoConvergence otherwise.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"RealSchur is not initialized.\");\n      return m_info;\n    }\n\n    /** \\brief Sets the maximum number of iterations allowed. \n      *\n      * If not specified by the user, the maximum number of iterations is m_maxIterationsPerRow times the size\n      * of the matrix.\n      */\n    RealSchur& setMaxIterations(Index maxIters)\n    {\n      m_maxIters = maxIters;\n      return *this;\n    }\n\n    /** \\brief Returns the maximum number of iterations. */\n    Index getMaxIterations()\n    {\n      return m_maxIters;\n    }\n\n    /** \\brief Maximum number of iterations per row.\n      *\n      * If not otherwise specified, the maximum number of iterations is this number times the size of the\n      * matrix. It is currently set to 40.\n      */\n    static const int m_maxIterationsPerRow = 40;\n\n  private:\n    \n    MatrixType m_matT;\n    MatrixType m_matU;\n    ColumnVectorType m_workspaceVector;\n    HessenbergDecomposition<MatrixType> m_hess;\n    ComputationInfo m_info;\n    bool m_isInitialized;\n    bool m_matUisUptodate;\n    Index m_maxIters;\n\n    typedef Matrix<Scalar,3,1> Vector3s;\n\n    Scalar computeNormOfT();\n    Index findSmallSubdiagEntry(Index iu, const Scalar& considerAsZero);\n    void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);\n    void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);\n    void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);\n    void performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace);\n};\n\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nRealSchur<MatrixType>& RealSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)\n{\n  const Scalar considerAsZero = (std::numeric_limits<Scalar>::min)();\n\n  eigen_assert(matrix.cols() == matrix.rows());\n  Index maxIters = m_maxIters;\n  if (maxIters == -1)\n    maxIters = m_maxIterationsPerRow * matrix.rows();\n\n  Scalar scale = matrix.derived().cwiseAbs().maxCoeff();\n  if(scale<considerAsZero)\n  {\n    m_matT.setZero(matrix.rows(),matrix.cols());\n    if(computeU)\n      m_matU.setIdentity(matrix.rows(),matrix.cols());\n    m_info = Success;\n    m_isInitialized = true;\n    m_matUisUptodate = computeU;\n    return *this;\n  }\n\n  // Step 1. Reduce to Hessenberg form\n  m_hess.compute(matrix.derived()/scale);\n\n  // Step 2. Reduce to real Schur form\n  // Note: we copy m_hess.matrixQ() into m_matU here and not in computeFromHessenberg\n  //       to be able to pass our working-space buffer for the Householder to Dense evaluation.\n  m_workspaceVector.resize(matrix.cols());\n  if(computeU)\n    m_hess.matrixQ().evalTo(m_matU, m_workspaceVector);\n  computeFromHessenberg(m_hess.matrixH(), m_matU, computeU);\n\n  m_matT *= scale;\n  \n  return *this;\n}\ntemplate<typename MatrixType>\ntemplate<typename HessMatrixType, typename OrthMatrixType>\nRealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ,  bool computeU)\n{\n  using std::abs;\n\n  m_matT = matrixH;\n  m_workspaceVector.resize(m_matT.cols());\n  if(computeU && !internal::is_same_dense(m_matU,matrixQ))\n    m_matU = matrixQ;\n  \n  Index maxIters = m_maxIters;\n  if (maxIters == -1)\n    maxIters = m_maxIterationsPerRow * matrixH.rows();\n  Scalar* workspace = &m_workspaceVector.coeffRef(0);\n\n  // The matrix m_matT is divided in three parts. \n  // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero. \n  // Rows il,...,iu is the part we are working on (the active window).\n  // Rows iu+1,...,end are already brought in triangular form.\n  Index iu = m_matT.cols() - 1;\n  Index iter = 0;      // iteration count for current eigenvalue\n  Index totalIter = 0; // iteration count for whole matrix\n  Scalar exshift(0);   // sum of exceptional shifts\n  Scalar norm = computeNormOfT();\n  // sub-diagonal entries smaller than considerAsZero will be treated as zero.\n  // We use eps^2 to enable more precision in small eigenvalues.\n  Scalar considerAsZero = numext::maxi<Scalar>( norm * numext::abs2(NumTraits<Scalar>::epsilon()),\n                                                (std::numeric_limits<Scalar>::min)() );\n\n  if(norm!=Scalar(0))\n  {\n    while (iu >= 0)\n    {\n      Index il = findSmallSubdiagEntry(iu,considerAsZero);\n\n      // Check for convergence\n      if (il == iu) // One root found\n      {\n        m_matT.coeffRef(iu,iu) = m_matT.coeff(iu,iu) + exshift;\n        if (iu > 0)\n          m_matT.coeffRef(iu, iu-1) = Scalar(0);\n        iu--;\n        iter = 0;\n      }\n      else if (il == iu-1) // Two roots found\n      {\n        splitOffTwoRows(iu, computeU, exshift);\n        iu -= 2;\n        iter = 0;\n      }\n      else // No convergence yet\n      {\n        // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )\n        Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;\n        computeShift(iu, iter, exshift, shiftInfo);\n        iter = iter + 1;\n        totalIter = totalIter + 1;\n        if (totalIter > maxIters) break;\n        Index im;\n        initFrancisQRStep(il, iu, shiftInfo, im, firstHouseholderVector);\n        performFrancisQRStep(il, im, iu, computeU, firstHouseholderVector, workspace);\n      }\n    }\n  }\n  if(totalIter <= maxIters)\n    m_info = Success;\n  else\n    m_info = NoConvergence;\n\n  m_isInitialized = true;\n  m_matUisUptodate = computeU;\n  return *this;\n}\n\n/** \\internal Computes and returns vector L1 norm of T */\ntemplate<typename MatrixType>\ninline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()\n{\n  const Index size = m_matT.cols();\n  // FIXME to be efficient the following would requires a triangular reduxion code\n  // Scalar norm = m_matT.upper().cwiseAbs().sum() \n  //               + m_matT.bottomLeftCorner(size-1,size-1).diagonal().cwiseAbs().sum();\n  Scalar norm(0);\n  for (Index j = 0; j < size; ++j)\n    norm += m_matT.col(j).segment(0, (std::min)(size,j+2)).cwiseAbs().sum();\n  return norm;\n}\n\n/** \\internal Look for single small sub-diagonal element and returns its index */\ntemplate<typename MatrixType>\ninline Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& considerAsZero)\n{\n  using std::abs;\n  Index res = iu;\n  while (res > 0)\n  {\n    Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res));\n\n    s = numext::maxi<Scalar>(s * NumTraits<Scalar>::epsilon(), considerAsZero);\n    \n    if (abs(m_matT.coeff(res,res-1)) <= s)\n      break;\n    res--;\n  }\n  return res;\n}\n\n/** \\internal Update T given that rows iu-1 and iu decouple from the rest. */\ntemplate<typename MatrixType>\ninline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift)\n{\n  using std::sqrt;\n  using std::abs;\n  const Index size = m_matT.cols();\n\n  // The eigenvalues of the 2x2 matrix [a b; c d] are \n  // trace +/- sqrt(discr/4) where discr = tr^2 - 4*det, tr = a + d, det = ad - bc\n  Scalar p = Scalar(0.5) * (m_matT.coeff(iu-1,iu-1) - m_matT.coeff(iu,iu));\n  Scalar q = p * p + m_matT.coeff(iu,iu-1) * m_matT.coeff(iu-1,iu);   // q = tr^2 / 4 - det = discr/4\n  m_matT.coeffRef(iu,iu) += exshift;\n  m_matT.coeffRef(iu-1,iu-1) += exshift;\n\n  if (q >= Scalar(0)) // Two real eigenvalues\n  {\n    Scalar z = sqrt(abs(q));\n    JacobiRotation<Scalar> rot;\n    if (p >= Scalar(0))\n      rot.makeGivens(p + z, m_matT.coeff(iu, iu-1));\n    else\n      rot.makeGivens(p - z, m_matT.coeff(iu, iu-1));\n\n    m_matT.rightCols(size-iu+1).applyOnTheLeft(iu-1, iu, rot.adjoint());\n    m_matT.topRows(iu+1).applyOnTheRight(iu-1, iu, rot);\n    m_matT.coeffRef(iu, iu-1) = Scalar(0); \n    if (computeU)\n      m_matU.applyOnTheRight(iu-1, iu, rot);\n  }\n\n  if (iu > 1) \n    m_matT.coeffRef(iu-1, iu-2) = Scalar(0);\n}\n\n/** \\internal Form shift in shiftInfo, and update exshift if an exceptional shift is performed. */\ntemplate<typename MatrixType>\ninline void RealSchur<MatrixType>::computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo)\n{\n  using std::sqrt;\n  using std::abs;\n  shiftInfo.coeffRef(0) = m_matT.coeff(iu,iu);\n  shiftInfo.coeffRef(1) = m_matT.coeff(iu-1,iu-1);\n  shiftInfo.coeffRef(2) = m_matT.coeff(iu,iu-1) * m_matT.coeff(iu-1,iu);\n\n  // Wilkinson's original ad hoc shift\n  if (iter == 10)\n  {\n    exshift += shiftInfo.coeff(0);\n    for (Index i = 0; i <= iu; ++i)\n      m_matT.coeffRef(i,i) -= shiftInfo.coeff(0);\n    Scalar s = abs(m_matT.coeff(iu,iu-1)) + abs(m_matT.coeff(iu-1,iu-2));\n    shiftInfo.coeffRef(0) = Scalar(0.75) * s;\n    shiftInfo.coeffRef(1) = Scalar(0.75) * s;\n    shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;\n  }\n\n  // MATLAB's new ad hoc shift\n  if (iter == 30)\n  {\n    Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);\n    s = s * s + shiftInfo.coeff(2);\n    if (s > Scalar(0))\n    {\n      s = sqrt(s);\n      if (shiftInfo.coeff(1) < shiftInfo.coeff(0))\n        s = -s;\n      s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);\n      s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;\n      exshift += s;\n      for (Index i = 0; i <= iu; ++i)\n        m_matT.coeffRef(i,i) -= s;\n      shiftInfo.setConstant(Scalar(0.964));\n    }\n  }\n}\n\n/** \\internal Compute index im at which Francis QR step starts and the first Householder vector. */\ntemplate<typename MatrixType>\ninline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector)\n{\n  using std::abs;\n  Vector3s& v = firstHouseholderVector; // alias to save typing\n\n  for (im = iu-2; im >= il; --im)\n  {\n    const Scalar Tmm = m_matT.coeff(im,im);\n    const Scalar r = shiftInfo.coeff(0) - Tmm;\n    const Scalar s = shiftInfo.coeff(1) - Tmm;\n    v.coeffRef(0) = (r * s - shiftInfo.coeff(2)) / m_matT.coeff(im+1,im) + m_matT.coeff(im,im+1);\n    v.coeffRef(1) = m_matT.coeff(im+1,im+1) - Tmm - r - s;\n    v.coeffRef(2) = m_matT.coeff(im+2,im+1);\n    if (im == il) {\n      break;\n    }\n    const Scalar lhs = m_matT.coeff(im,im-1) * (abs(v.coeff(1)) + abs(v.coeff(2)));\n    const Scalar rhs = v.coeff(0) * (abs(m_matT.coeff(im-1,im-1)) + abs(Tmm) + abs(m_matT.coeff(im+1,im+1)));\n    if (abs(lhs) < NumTraits<Scalar>::epsilon() * rhs)\n      break;\n  }\n}\n\n/** \\internal Perform a Francis QR step involving rows il:iu and columns im:iu. */\ntemplate<typename MatrixType>\ninline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace)\n{\n  eigen_assert(im >= il);\n  eigen_assert(im <= iu-2);\n\n  const Index size = m_matT.cols();\n\n  for (Index k = im; k <= iu-2; ++k)\n  {\n    bool firstIteration = (k == im);\n\n    Vector3s v;\n    if (firstIteration)\n      v = firstHouseholderVector;\n    else\n      v = m_matT.template block<3,1>(k,k-1);\n\n    Scalar tau, beta;\n    Matrix<Scalar, 2, 1> ess;\n    v.makeHouseholder(ess, tau, beta);\n    \n    if (beta != Scalar(0)) // if v is not zero\n    {\n      if (firstIteration && k > il)\n        m_matT.coeffRef(k,k-1) = -m_matT.coeff(k,k-1);\n      else if (!firstIteration)\n        m_matT.coeffRef(k,k-1) = beta;\n\n      // These Householder transformations form the O(n^3) part of the algorithm\n      m_matT.block(k, k, 3, size-k).applyHouseholderOnTheLeft(ess, tau, workspace);\n      m_matT.block(0, k, (std::min)(iu,k+3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace);\n      if (computeU)\n        m_matU.block(0, k, size, 3).applyHouseholderOnTheRight(ess, tau, workspace);\n    }\n  }\n\n  Matrix<Scalar, 2, 1> v = m_matT.template block<2,1>(iu-1, iu-2);\n  Scalar tau, beta;\n  Matrix<Scalar, 1, 1> ess;\n  v.makeHouseholder(ess, tau, beta);\n\n  if (beta != Scalar(0)) // if v is not zero\n  {\n    m_matT.coeffRef(iu-1, iu-2) = beta;\n    m_matT.block(iu-1, iu-1, 2, size-iu+1).applyHouseholderOnTheLeft(ess, tau, workspace);\n    m_matT.block(0, iu-1, iu+1, 2).applyHouseholderOnTheRight(ess, tau, workspace);\n    if (computeU)\n      m_matU.block(0, iu-1, size, 2).applyHouseholderOnTheRight(ess, tau, workspace);\n  }\n\n  // clean up pollution due to round-off errors\n  for (Index i = im+2; i <= iu; ++i)\n  {\n    m_matT.coeffRef(i,i-2) = Scalar(0);\n    if (i > im+2)\n      m_matT.coeffRef(i,i-3) = Scalar(0);\n  }\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_REAL_SCHUR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *    Real Schur needed to real unsymmetrical eigenvalues/eigenvectors.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_REAL_SCHUR_LAPACKE_H\n#define EIGEN_REAL_SCHUR_LAPACKE_H\n\nnamespace Eigen { \n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_SCHUR_REAL(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, LAPACKE_PREFIX_U, EIGCOLROW, LAPACKE_COLROW) \\\ntemplate<> template<typename InputType> inline \\\nRealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \\\nRealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \\\n{ \\\n  eigen_assert(matrix.cols() == matrix.rows()); \\\n\\\n  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), sdim, info; \\\n  lapack_int matrix_order = LAPACKE_COLROW; \\\n  char jobvs, sort='N'; \\\n  LAPACK_##LAPACKE_PREFIX_U##_SELECT2 select = 0; \\\n  jobvs = (computeU) ? 'V' : 'N'; \\\n  m_matU.resize(n, n); \\\n  lapack_int ldvs  = internal::convert_index<lapack_int>(m_matU.outerStride()); \\\n  m_matT = matrix; \\\n  lapack_int lda = internal::convert_index<lapack_int>(m_matT.outerStride()); \\\n  Matrix<EIGTYPE, Dynamic, Dynamic> wr, wi; \\\n  wr.resize(n, 1); wi.resize(n, 1); \\\n  info = LAPACKE_##LAPACKE_PREFIX##gees( matrix_order, jobvs, sort, select, n, (LAPACKE_TYPE*)m_matT.data(), lda, &sdim, (LAPACKE_TYPE*)wr.data(), (LAPACKE_TYPE*)wi.data(), (LAPACKE_TYPE*)m_matU.data(), ldvs ); \\\n  if(info == 0) \\\n    m_info = Success; \\\n  else \\\n    m_info = NoConvergence; \\\n\\\n  m_isInitialized = true; \\\n  m_matUisUptodate = computeU; \\\n  return *this; \\\n\\\n}\n\nEIGEN_LAPACKE_SCHUR_REAL(double,   double, d, D, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SCHUR_REAL(float,    float,  s, S, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SCHUR_REAL(double,   double, d, D, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_SCHUR_REAL(float,    float,  s, S, RowMajor, LAPACK_ROW_MAJOR)\n\n} // end namespace Eigen\n\n#endif // EIGEN_REAL_SCHUR_LAPACKE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SELFADJOINTEIGENSOLVER_H\n#define EIGEN_SELFADJOINTEIGENSOLVER_H\n\n#include \"./Tridiagonalization.h\"\n\nnamespace Eigen { \n\ntemplate<typename _MatrixType>\nclass GeneralizedSelfAdjointEigenSolver;\n\nnamespace internal {\ntemplate<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues;\n\ntemplate<typename MatrixType, typename DiagType, typename SubDiagType>\nEIGEN_DEVICE_FUNC\nComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag, const Index maxIterations, bool computeEigenvectors, MatrixType& eivec);\n}\n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class SelfAdjointEigenSolver\n  *\n  * \\brief Computes eigenvalues and eigenvectors of selfadjoint matrices\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the\n  * eigendecomposition; this is expected to be an instantiation of the Matrix\n  * class template.\n  *\n  * A matrix \\f$ A \\f$ is selfadjoint if it equals its adjoint. For real\n  * matrices, this means that the matrix is symmetric: it equals its\n  * transpose. This class computes the eigenvalues and eigenvectors of a\n  * selfadjoint matrix. These are the scalars \\f$ \\lambda \\f$ and vectors\n  * \\f$ v \\f$ such that \\f$ Av = \\lambda v \\f$.  The eigenvalues of a\n  * selfadjoint matrix are always real. If \\f$ D \\f$ is a diagonal matrix with\n  * the eigenvalues on the diagonal, and \\f$ V \\f$ is a matrix with the\n  * eigenvectors as its columns, then \\f$ A = V D V^{-1} \\f$. This is called the\n  * eigendecomposition.\n  *\n  * For a selfadjoint matrix, \\f$ V \\f$ is unitary, meaning its inverse is equal\n  * to its adjoint, \\f$ V^{-1} = V^{\\dagger} \\f$. If \\f$ A \\f$ is real, then\n  * \\f$ V \\f$ is also real and therefore orthogonal, meaning its inverse is\n  * equal to its transpose, \\f$ V^{-1} = V^T \\f$.\n  *\n  * The algorithm exploits the fact that the matrix is selfadjoint, making it\n  * faster and more accurate than the general purpose eigenvalue algorithms\n  * implemented in EigenSolver and ComplexEigenSolver.\n  *\n  * Only the \\b lower \\b triangular \\b part of the input matrix is referenced.\n  *\n  * Call the function compute() to compute the eigenvalues and eigenvectors of\n  * a given matrix. Alternatively, you can use the\n  * SelfAdjointEigenSolver(const MatrixType&, int) constructor which computes\n  * the eigenvalues and eigenvectors at construction time. Once the eigenvalue\n  * and eigenvectors are computed, they can be retrieved with the eigenvalues()\n  * and eigenvectors() functions.\n  *\n  * The documentation for SelfAdjointEigenSolver(const MatrixType&, int)\n  * contains an example of the typical use of this class.\n  *\n  * To solve the \\em generalized eigenvalue problem \\f$ Av = \\lambda Bv \\f$ and\n  * the likes, see the class GeneralizedSelfAdjointEigenSolver.\n  *\n  * \\sa MatrixBase::eigenvalues(), class EigenSolver, class ComplexEigenSolver\n  */\ntemplate<typename _MatrixType> class SelfAdjointEigenSolver\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    enum {\n      Size = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      Options = MatrixType::Options,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    \n    /** \\brief Scalar type for matrices of type \\p _MatrixType. */\n    typedef typename MatrixType::Scalar Scalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n    \n    typedef Matrix<Scalar,Size,Size,ColMajor,MaxColsAtCompileTime,MaxColsAtCompileTime> EigenvectorsType;\n\n    /** \\brief Real scalar type for \\p _MatrixType.\n      *\n      * This is just \\c Scalar if #Scalar is real (e.g., \\c float or\n      * \\c double), and the type of the real part of \\c Scalar if #Scalar is\n      * complex.\n      */\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    \n    friend struct internal::direct_selfadjoint_eigenvalues<SelfAdjointEigenSolver,Size,NumTraits<Scalar>::IsComplex>;\n\n    /** \\brief Type for vector of eigenvalues as returned by eigenvalues().\n      *\n      * This is a column vector with entries of type #RealScalar.\n      * The length of the vector is the size of \\p _MatrixType.\n      */\n    typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVectorType;\n    typedef Tridiagonalization<MatrixType> TridiagonalizationType;\n    typedef typename TridiagonalizationType::SubDiagonalType SubDiagonalType;\n\n    /** \\brief Default constructor for fixed-size matrices.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via compute(). This constructor\n      * can only be used if \\p _MatrixType is a fixed-size matrix; use\n      * SelfAdjointEigenSolver(Index) for dynamic-size matrices.\n      *\n      * Example: \\include SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver.out\n      */\n    EIGEN_DEVICE_FUNC\n    SelfAdjointEigenSolver()\n        : m_eivec(),\n          m_eivalues(),\n          m_subdiag(),\n          m_hcoeffs(),\n          m_info(InvalidInput),\n          m_isInitialized(false),\n          m_eigenvectorsOk(false)\n    { }\n\n    /** \\brief Constructor, pre-allocates memory for dynamic-size matrices.\n      *\n      * \\param [in]  size  Positive integer, size of the matrix whose\n      * eigenvalues and eigenvectors will be computed.\n      *\n      * This constructor is useful for dynamic-size matrices, when the user\n      * intends to perform decompositions via compute(). The \\p size\n      * parameter is only used as a hint. It is not an error to give a wrong\n      * \\p size, but it may impair performance.\n      *\n      * \\sa compute() for an example\n      */\n    EIGEN_DEVICE_FUNC\n    explicit SelfAdjointEigenSolver(Index size)\n        : m_eivec(size, size),\n          m_eivalues(size),\n          m_subdiag(size > 1 ? size - 1 : 1),\n          m_hcoeffs(size > 1 ? size - 1 : 1),\n          m_isInitialized(false),\n          m_eigenvectorsOk(false)\n    {}\n\n    /** \\brief Constructor; computes eigendecomposition of given matrix.\n      *\n      * \\param[in]  matrix  Selfadjoint matrix whose eigendecomposition is to\n      *    be computed. Only the lower triangular part of the matrix is referenced.\n      * \\param[in]  options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.\n      *\n      * This constructor calls compute(const MatrixType&, int) to compute the\n      * eigenvalues of the matrix \\p matrix. The eigenvectors are computed if\n      * \\p options equals #ComputeEigenvectors.\n      *\n      * Example: \\include SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.out\n      *\n      * \\sa compute(const MatrixType&, int)\n      */\n    template<typename InputType>\n    EIGEN_DEVICE_FUNC\n    explicit SelfAdjointEigenSolver(const EigenBase<InputType>& matrix, int options = ComputeEigenvectors)\n      : m_eivec(matrix.rows(), matrix.cols()),\n        m_eivalues(matrix.cols()),\n        m_subdiag(matrix.rows() > 1 ? matrix.rows() - 1 : 1),\n        m_hcoeffs(matrix.cols() > 1 ? matrix.cols() - 1 : 1),\n        m_isInitialized(false),\n        m_eigenvectorsOk(false)\n    {\n      compute(matrix.derived(), options);\n    }\n\n    /** \\brief Computes eigendecomposition of given matrix.\n      *\n      * \\param[in]  matrix  Selfadjoint matrix whose eigendecomposition is to\n      *    be computed. Only the lower triangular part of the matrix is referenced.\n      * \\param[in]  options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.\n      * \\returns    Reference to \\c *this\n      *\n      * This function computes the eigenvalues of \\p matrix.  The eigenvalues()\n      * function can be used to retrieve them.  If \\p options equals #ComputeEigenvectors,\n      * then the eigenvectors are also computed and can be retrieved by\n      * calling eigenvectors().\n      *\n      * This implementation uses a symmetric QR algorithm. The matrix is first\n      * reduced to tridiagonal form using the Tridiagonalization class. The\n      * tridiagonal matrix is then brought to diagonal form with implicit\n      * symmetric QR steps with Wilkinson shift. Details can be found in\n      * Section 8.3 of Golub \\& Van Loan, <i>%Matrix Computations</i>.\n      *\n      * The cost of the computation is about \\f$ 9n^3 \\f$ if the eigenvectors\n      * are required and \\f$ 4n^3/3 \\f$ if they are not required.\n      *\n      * This method reuses the memory in the SelfAdjointEigenSolver object that\n      * was allocated when the object was constructed, if the size of the\n      * matrix does not change.\n      *\n      * Example: \\include SelfAdjointEigenSolver_compute_MatrixType.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_compute_MatrixType.out\n      *\n      * \\sa SelfAdjointEigenSolver(const MatrixType&, int)\n      */\n    template<typename InputType>\n    EIGEN_DEVICE_FUNC\n    SelfAdjointEigenSolver& compute(const EigenBase<InputType>& matrix, int options = ComputeEigenvectors);\n    \n    /** \\brief Computes eigendecomposition of given matrix using a closed-form algorithm\n      *\n      * This is a variant of compute(const MatrixType&, int options) which\n      * directly solves the underlying polynomial equation.\n      * \n      * Currently only 2x2 and 3x3 matrices for which the sizes are known at compile time are supported (e.g., Matrix3d).\n      * \n      * This method is usually significantly faster than the QR iterative algorithm\n      * but it might also be less accurate. It is also worth noting that\n      * for 3x3 matrices it involves trigonometric operations which are\n      * not necessarily available for all scalar types.\n      * \n      * For the 3x3 case, we observed the following worst case relative error regarding the eigenvalues:\n      *   - double: 1e-8\n      *   - float:  1e-3\n      *\n      * \\sa compute(const MatrixType&, int options)\n      */\n    EIGEN_DEVICE_FUNC\n    SelfAdjointEigenSolver& computeDirect(const MatrixType& matrix, int options = ComputeEigenvectors);\n\n    /**\n      *\\brief Computes the eigen decomposition from a tridiagonal symmetric matrix\n      *\n      * \\param[in] diag The vector containing the diagonal of the matrix.\n      * \\param[in] subdiag The subdiagonal of the matrix.\n      * \\param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.\n      * \\returns Reference to \\c *this\n      *\n      * This function assumes that the matrix has been reduced to tridiagonal form.\n      *\n      * \\sa compute(const MatrixType&, int) for more information\n      */\n    SelfAdjointEigenSolver& computeFromTridiagonal(const RealVectorType& diag, const SubDiagonalType& subdiag , int options=ComputeEigenvectors);\n\n    /** \\brief Returns the eigenvectors of given matrix.\n      *\n      * \\returns  A const reference to the matrix whose columns are the eigenvectors.\n      *\n      * \\pre The eigenvectors have been computed before.\n      *\n      * Column \\f$ k \\f$ of the returned matrix is an eigenvector corresponding\n      * to eigenvalue number \\f$ k \\f$ as returned by eigenvalues().  The\n      * eigenvectors are normalized to have (Euclidean) norm equal to one. If\n      * this object was used to solve the eigenproblem for the selfadjoint\n      * matrix \\f$ A \\f$, then the matrix returned by this function is the\n      * matrix \\f$ V \\f$ in the eigendecomposition \\f$ A = V D V^{-1} \\f$.\n      *\n      * For a selfadjoint matrix, \\f$ V \\f$ is unitary, meaning its inverse is equal\n      * to its adjoint, \\f$ V^{-1} = V^{\\dagger} \\f$. If \\f$ A \\f$ is real, then\n      * \\f$ V \\f$ is also real and therefore orthogonal, meaning its inverse is\n      * equal to its transpose, \\f$ V^{-1} = V^T \\f$.\n      *\n      * Example: \\include SelfAdjointEigenSolver_eigenvectors.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_eigenvectors.out\n      *\n      * \\sa eigenvalues()\n      */\n    EIGEN_DEVICE_FUNC\n    const EigenvectorsType& eigenvectors() const\n    {\n      eigen_assert(m_isInitialized && \"SelfAdjointEigenSolver is not initialized.\");\n      eigen_assert(m_eigenvectorsOk && \"The eigenvectors have not been computed together with the eigenvalues.\");\n      return m_eivec;\n    }\n\n    /** \\brief Returns the eigenvalues of given matrix.\n      *\n      * \\returns A const reference to the column vector containing the eigenvalues.\n      *\n      * \\pre The eigenvalues have been computed before.\n      *\n      * The eigenvalues are repeated according to their algebraic multiplicity,\n      * so there are as many eigenvalues as rows in the matrix. The eigenvalues\n      * are sorted in increasing order.\n      *\n      * Example: \\include SelfAdjointEigenSolver_eigenvalues.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_eigenvalues.out\n      *\n      * \\sa eigenvectors(), MatrixBase::eigenvalues()\n      */\n    EIGEN_DEVICE_FUNC\n    const RealVectorType& eigenvalues() const\n    {\n      eigen_assert(m_isInitialized && \"SelfAdjointEigenSolver is not initialized.\");\n      return m_eivalues;\n    }\n\n    /** \\brief Computes the positive-definite square root of the matrix.\n      *\n      * \\returns the positive-definite square root of the matrix\n      *\n      * \\pre The eigenvalues and eigenvectors of a positive-definite matrix\n      * have been computed before.\n      *\n      * The square root of a positive-definite matrix \\f$ A \\f$ is the\n      * positive-definite matrix whose square equals \\f$ A \\f$. This function\n      * uses the eigendecomposition \\f$ A = V D V^{-1} \\f$ to compute the\n      * square root as \\f$ A^{1/2} = V D^{1/2} V^{-1} \\f$.\n      *\n      * Example: \\include SelfAdjointEigenSolver_operatorSqrt.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_operatorSqrt.out\n      *\n      * \\sa operatorInverseSqrt(), <a href=\"unsupported/group__MatrixFunctions__Module.html\">MatrixFunctions Module</a>\n      */\n    EIGEN_DEVICE_FUNC\n    MatrixType operatorSqrt() const\n    {\n      eigen_assert(m_isInitialized && \"SelfAdjointEigenSolver is not initialized.\");\n      eigen_assert(m_eigenvectorsOk && \"The eigenvectors have not been computed together with the eigenvalues.\");\n      return m_eivec * m_eivalues.cwiseSqrt().asDiagonal() * m_eivec.adjoint();\n    }\n\n    /** \\brief Computes the inverse square root of the matrix.\n      *\n      * \\returns the inverse positive-definite square root of the matrix\n      *\n      * \\pre The eigenvalues and eigenvectors of a positive-definite matrix\n      * have been computed before.\n      *\n      * This function uses the eigendecomposition \\f$ A = V D V^{-1} \\f$ to\n      * compute the inverse square root as \\f$ V D^{-1/2} V^{-1} \\f$. This is\n      * cheaper than first computing the square root with operatorSqrt() and\n      * then its inverse with MatrixBase::inverse().\n      *\n      * Example: \\include SelfAdjointEigenSolver_operatorInverseSqrt.cpp\n      * Output: \\verbinclude SelfAdjointEigenSolver_operatorInverseSqrt.out\n      *\n      * \\sa operatorSqrt(), MatrixBase::inverse(), <a href=\"unsupported/group__MatrixFunctions__Module.html\">MatrixFunctions Module</a>\n      */\n    EIGEN_DEVICE_FUNC\n    MatrixType operatorInverseSqrt() const\n    {\n      eigen_assert(m_isInitialized && \"SelfAdjointEigenSolver is not initialized.\");\n      eigen_assert(m_eigenvectorsOk && \"The eigenvectors have not been computed together with the eigenvalues.\");\n      return m_eivec * m_eivalues.cwiseInverse().cwiseSqrt().asDiagonal() * m_eivec.adjoint();\n    }\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful, \\c NoConvergence otherwise.\n      */\n    EIGEN_DEVICE_FUNC\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"SelfAdjointEigenSolver is not initialized.\");\n      return m_info;\n    }\n\n    /** \\brief Maximum number of iterations.\n      *\n      * The algorithm terminates if it does not converge within m_maxIterations * n iterations, where n\n      * denotes the size of the matrix. This value is currently set to 30 (copied from LAPACK).\n      */\n    static const int m_maxIterations = 30;\n\n  protected:\n    static EIGEN_DEVICE_FUNC\n    void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n    \n    EigenvectorsType m_eivec;\n    RealVectorType m_eivalues;\n    typename TridiagonalizationType::SubDiagonalType m_subdiag;\n    typename TridiagonalizationType::CoeffVectorType m_hcoeffs;\n    ComputationInfo m_info;\n    bool m_isInitialized;\n    bool m_eigenvectorsOk;\n};\n\nnamespace internal {\n/** \\internal\n  *\n  * \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  * Performs a QR step on a tridiagonal symmetric matrix represented as a\n  * pair of two vectors \\a diag and \\a subdiag.\n  *\n  * \\param diag the diagonal part of the input selfadjoint tridiagonal matrix\n  * \\param subdiag the sub-diagonal part of the input selfadjoint tridiagonal matrix\n  * \\param start starting index of the submatrix to work on\n  * \\param end last+1 index of the submatrix to work on\n  * \\param matrixQ pointer to the column-major matrix holding the eigenvectors, can be 0\n  * \\param n size of the input matrix\n  *\n  * For compilation efficiency reasons, this procedure does not use eigen expression\n  * for its arguments.\n  *\n  * Implemented from Golub's \"Matrix Computations\", algorithm 8.3.2:\n  * \"implicit symmetric QR step with Wilkinson shift\"\n  */\ntemplate<int StorageOrder,typename RealScalar, typename Scalar, typename Index>\nEIGEN_DEVICE_FUNC\nstatic void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n);\n}\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nEIGEN_DEVICE_FUNC\nSelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>\n::compute(const EigenBase<InputType>& a_matrix, int options)\n{\n  check_template_parameters();\n  \n  const InputType &matrix(a_matrix.derived());\n  \n  EIGEN_USING_STD(abs);\n  eigen_assert(matrix.cols() == matrix.rows());\n  eigen_assert((options&~(EigVecMask|GenEigMask))==0\n          && (options&EigVecMask)!=EigVecMask\n          && \"invalid option parameter\");\n  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;\n  Index n = matrix.cols();\n  m_eivalues.resize(n,1);\n\n  if(n==1)\n  {\n    m_eivec = matrix;\n    m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0));\n    if(computeEigenvectors)\n      m_eivec.setOnes(n,n);\n    m_info = Success;\n    m_isInitialized = true;\n    m_eigenvectorsOk = computeEigenvectors;\n    return *this;\n  }\n\n  // declare some aliases\n  RealVectorType& diag = m_eivalues;\n  EigenvectorsType& mat = m_eivec;\n\n  // map the matrix coefficients to [-1:1] to avoid over- and underflow.\n  mat = matrix.template triangularView<Lower>();\n  RealScalar scale = mat.cwiseAbs().maxCoeff();\n  if(scale==RealScalar(0)) scale = RealScalar(1);\n  mat.template triangularView<Lower>() /= scale;\n  m_subdiag.resize(n-1);\n  m_hcoeffs.resize(n-1);\n  internal::tridiagonalization_inplace(mat, diag, m_subdiag, m_hcoeffs, computeEigenvectors);\n\n  m_info = internal::computeFromTridiagonal_impl(diag, m_subdiag, m_maxIterations, computeEigenvectors, m_eivec);\n  \n  // scale back the eigen values\n  m_eivalues *= scale;\n\n  m_isInitialized = true;\n  m_eigenvectorsOk = computeEigenvectors;\n  return *this;\n}\n\ntemplate<typename MatrixType>\nSelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>\n::computeFromTridiagonal(const RealVectorType& diag, const SubDiagonalType& subdiag , int options)\n{\n  //TODO : Add an option to scale the values beforehand\n  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;\n\n  m_eivalues = diag;\n  m_subdiag = subdiag;\n  if (computeEigenvectors)\n  {\n    m_eivec.setIdentity(diag.size(), diag.size());\n  }\n  m_info = internal::computeFromTridiagonal_impl(m_eivalues, m_subdiag, m_maxIterations, computeEigenvectors, m_eivec);\n\n  m_isInitialized = true;\n  m_eigenvectorsOk = computeEigenvectors;\n  return *this;\n}\n\nnamespace internal {\n/**\n  * \\internal\n  * \\brief Compute the eigendecomposition from a tridiagonal matrix\n  *\n  * \\param[in,out] diag : On input, the diagonal of the matrix, on output the eigenvalues\n  * \\param[in,out] subdiag : The subdiagonal part of the matrix (entries are modified during the decomposition)\n  * \\param[in] maxIterations : the maximum number of iterations\n  * \\param[in] computeEigenvectors : whether the eigenvectors have to be computed or not\n  * \\param[out] eivec : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input.\n  * \\returns \\c Success or \\c NoConvergence\n  */\ntemplate<typename MatrixType, typename DiagType, typename SubDiagType>\nEIGEN_DEVICE_FUNC\nComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag, const Index maxIterations, bool computeEigenvectors, MatrixType& eivec)\n{\n  ComputationInfo info;\n  typedef typename MatrixType::Scalar Scalar;\n\n  Index n = diag.size();\n  Index end = n-1;\n  Index start = 0;\n  Index iter = 0; // total number of iterations\n  \n  typedef typename DiagType::RealScalar RealScalar;\n  const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();\n  const RealScalar precision_inv = RealScalar(1)/NumTraits<RealScalar>::epsilon();\n  while (end>0)\n  {\n    for (Index i = start; i<end; ++i) {\n      if (numext::abs(subdiag[i]) < considerAsZero) {\n        subdiag[i] = RealScalar(0);\n      } else {\n        // abs(subdiag[i]) <= epsilon * sqrt(abs(diag[i]) + abs(diag[i+1]))\n        // Scaled to prevent underflows.\n        const RealScalar scaled_subdiag = precision_inv * subdiag[i];\n        if (scaled_subdiag * scaled_subdiag <= (numext::abs(diag[i])+numext::abs(diag[i+1]))) {\n          subdiag[i] = RealScalar(0);\n        }\n      }\n    }\n\n    // find the largest unreduced block at the end of the matrix.\n    while (end>0 && subdiag[end-1]==RealScalar(0))\n    {\n      end--;\n    }\n    if (end<=0)\n      break;\n\n    // if we spent too many iterations, we give up\n    iter++;\n    if(iter > maxIterations * n) break;\n\n    start = end - 1;\n    while (start>0 && subdiag[start-1]!=0)\n      start--;\n\n    internal::tridiagonal_qr_step<MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor>(diag.data(), subdiag.data(), start, end, computeEigenvectors ? eivec.data() : (Scalar*)0, n);\n  }\n  if (iter <= maxIterations * n)\n    info = Success;\n  else\n    info = NoConvergence;\n\n  // Sort eigenvalues and corresponding vectors.\n  // TODO make the sort optional ?\n  // TODO use a better sort algorithm !!\n  if (info == Success)\n  {\n    for (Index i = 0; i < n-1; ++i)\n    {\n      Index k;\n      diag.segment(i,n-i).minCoeff(&k);\n      if (k > 0)\n      {\n        numext::swap(diag[i], diag[k+i]);\n        if(computeEigenvectors)\n          eivec.col(i).swap(eivec.col(k+i));\n      }\n    }\n  }\n  return info;\n}\n  \ntemplate<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(SolverType& eig, const typename SolverType::MatrixType& A, int options)\n  { eig.compute(A,options); }\n};\n\ntemplate<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3,false>\n{\n  typedef typename SolverType::MatrixType MatrixType;\n  typedef typename SolverType::RealVectorType VectorType;\n  typedef typename SolverType::Scalar Scalar;\n  typedef typename SolverType::EigenvectorsType EigenvectorsType;\n  \n\n  /** \\internal\n   * Computes the roots of the characteristic polynomial of \\a m.\n   * For numerical stability m.trace() should be near zero and to avoid over- or underflow m should be normalized.\n   */\n  EIGEN_DEVICE_FUNC\n  static inline void computeRoots(const MatrixType& m, VectorType& roots)\n  {\n    EIGEN_USING_STD(sqrt)\n    EIGEN_USING_STD(atan2)\n    EIGEN_USING_STD(cos)\n    EIGEN_USING_STD(sin)\n    const Scalar s_inv3 = Scalar(1)/Scalar(3);\n    const Scalar s_sqrt3 = sqrt(Scalar(3));\n\n    // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0.  The\n    // eigenvalues are the roots to this equation, all guaranteed to be\n    // real-valued, because the matrix is symmetric.\n    Scalar c0 = m(0,0)*m(1,1)*m(2,2) + Scalar(2)*m(1,0)*m(2,0)*m(2,1) - m(0,0)*m(2,1)*m(2,1) - m(1,1)*m(2,0)*m(2,0) - m(2,2)*m(1,0)*m(1,0);\n    Scalar c1 = m(0,0)*m(1,1) - m(1,0)*m(1,0) + m(0,0)*m(2,2) - m(2,0)*m(2,0) + m(1,1)*m(2,2) - m(2,1)*m(2,1);\n    Scalar c2 = m(0,0) + m(1,1) + m(2,2);\n\n    // Construct the parameters used in classifying the roots of the equation\n    // and in solving the equation for the roots in closed form.\n    Scalar c2_over_3 = c2*s_inv3;\n    Scalar a_over_3 = (c2*c2_over_3 - c1)*s_inv3;\n    a_over_3 = numext::maxi(a_over_3, Scalar(0));\n\n    Scalar half_b = Scalar(0.5)*(c0 + c2_over_3*(Scalar(2)*c2_over_3*c2_over_3 - c1));\n\n    Scalar q = a_over_3*a_over_3*a_over_3 - half_b*half_b;\n    q = numext::maxi(q, Scalar(0));\n\n    // Compute the eigenvalues by solving for the roots of the polynomial.\n    Scalar rho = sqrt(a_over_3);\n    Scalar theta = atan2(sqrt(q),half_b)*s_inv3;  // since sqrt(q) > 0, atan2 is in [0, pi] and theta is in [0, pi/3]\n    Scalar cos_theta = cos(theta);\n    Scalar sin_theta = sin(theta);\n    // roots are already sorted, since cos is monotonically decreasing on [0, pi]\n    roots(0) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta); // == 2*rho*cos(theta+2pi/3)\n    roots(1) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta); // == 2*rho*cos(theta+ pi/3)\n    roots(2) = c2_over_3 + Scalar(2)*rho*cos_theta;\n  }\n\n  EIGEN_DEVICE_FUNC\n  static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative)\n  {\n    EIGEN_USING_STD(abs);\n    EIGEN_USING_STD(sqrt);\n    Index i0;\n    // Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal):\n    mat.diagonal().cwiseAbs().maxCoeff(&i0);\n    // mat.col(i0) is a good candidate for an orthogonal vector to the current eigenvector,\n    // so let's save it:\n    representative = mat.col(i0);\n    Scalar n0, n1;\n    VectorType c0, c1;\n    n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm();\n    n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm();\n    if(n0>n1) res = c0/sqrt(n0);\n    else      res = c1/sqrt(n1);\n\n    return true;\n  }\n\n  EIGEN_DEVICE_FUNC\n  static inline void run(SolverType& solver, const MatrixType& mat, int options)\n  {\n    eigen_assert(mat.cols() == 3 && mat.cols() == mat.rows());\n    eigen_assert((options&~(EigVecMask|GenEigMask))==0\n            && (options&EigVecMask)!=EigVecMask\n            && \"invalid option parameter\");\n    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;\n    \n    EigenvectorsType& eivecs = solver.m_eivec;\n    VectorType& eivals = solver.m_eivalues;\n  \n    // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.\n    Scalar shift = mat.trace() / Scalar(3);\n    // TODO Avoid this copy. Currently it is necessary to suppress bogus values when determining maxCoeff and for computing the eigenvectors later\n    MatrixType scaledMat = mat.template selfadjointView<Lower>();\n    scaledMat.diagonal().array() -= shift;\n    Scalar scale = scaledMat.cwiseAbs().maxCoeff();\n    if(scale > 0) scaledMat /= scale;   // TODO for scale==0 we could save the remaining operations\n\n    // compute the eigenvalues\n    computeRoots(scaledMat,eivals);\n\n    // compute the eigenvectors\n    if(computeEigenvectors)\n    {\n      if((eivals(2)-eivals(0))<=Eigen::NumTraits<Scalar>::epsilon())\n      {\n        // All three eigenvalues are numerically the same\n        eivecs.setIdentity();\n      }\n      else\n      {\n        MatrixType tmp;\n        tmp = scaledMat;\n\n        // Compute the eigenvector of the most distinct eigenvalue\n        Scalar d0 = eivals(2) - eivals(1);\n        Scalar d1 = eivals(1) - eivals(0);\n        Index k(0), l(2);\n        if(d0 > d1)\n        {\n          numext::swap(k,l);\n          d0 = d1;\n        }\n\n        // Compute the eigenvector of index k\n        {\n          tmp.diagonal().array () -= eivals(k);\n          // By construction, 'tmp' is of rank 2, and its kernel corresponds to the respective eigenvector.\n          extract_kernel(tmp, eivecs.col(k), eivecs.col(l));\n        }\n\n        // Compute eigenvector of index l\n        if(d0<=2*Eigen::NumTraits<Scalar>::epsilon()*d1)\n        {\n          // If d0 is too small, then the two other eigenvalues are numerically the same,\n          // and thus we only have to ortho-normalize the near orthogonal vector we saved above.\n          eivecs.col(l) -= eivecs.col(k).dot(eivecs.col(l))*eivecs.col(l);\n          eivecs.col(l).normalize();\n        }\n        else\n        {\n          tmp = scaledMat;\n          tmp.diagonal().array () -= eivals(l);\n\n          VectorType dummy;\n          extract_kernel(tmp, eivecs.col(l), dummy);\n        }\n\n        // Compute last eigenvector from the other two\n        eivecs.col(1) = eivecs.col(2).cross(eivecs.col(0)).normalized();\n      }\n    }\n\n    // Rescale back to the original size.\n    eivals *= scale;\n    eivals.array() += shift;\n    \n    solver.m_info = Success;\n    solver.m_isInitialized = true;\n    solver.m_eigenvectorsOk = computeEigenvectors;\n  }\n};\n\n// 2x2 direct eigenvalues decomposition, code from Hauke Heibel\ntemplate<typename SolverType> \nstruct direct_selfadjoint_eigenvalues<SolverType,2,false>\n{\n  typedef typename SolverType::MatrixType MatrixType;\n  typedef typename SolverType::RealVectorType VectorType;\n  typedef typename SolverType::Scalar Scalar;\n  typedef typename SolverType::EigenvectorsType EigenvectorsType;\n  \n  EIGEN_DEVICE_FUNC\n  static inline void computeRoots(const MatrixType& m, VectorType& roots)\n  {\n    EIGEN_USING_STD(sqrt);\n    const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0)));\n    const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1));\n    roots(0) = t1 - t0;\n    roots(1) = t1 + t0;\n  }\n  \n  EIGEN_DEVICE_FUNC\n  static inline void run(SolverType& solver, const MatrixType& mat, int options)\n  {\n    EIGEN_USING_STD(sqrt);\n    EIGEN_USING_STD(abs);\n    \n    eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());\n    eigen_assert((options&~(EigVecMask|GenEigMask))==0\n            && (options&EigVecMask)!=EigVecMask\n            && \"invalid option parameter\");\n    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;\n    \n    EigenvectorsType& eivecs = solver.m_eivec;\n    VectorType& eivals = solver.m_eivalues;\n  \n    // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.\n    Scalar shift = mat.trace() / Scalar(2);\n    MatrixType scaledMat = mat;\n    scaledMat.coeffRef(0,1) = mat.coeff(1,0);\n    scaledMat.diagonal().array() -= shift;\n    Scalar scale = scaledMat.cwiseAbs().maxCoeff();\n    if(scale > Scalar(0))\n      scaledMat /= scale;\n\n    // Compute the eigenvalues\n    computeRoots(scaledMat,eivals);\n\n    // compute the eigen vectors\n    if(computeEigenvectors)\n    {\n      if((eivals(1)-eivals(0))<=abs(eivals(1))*Eigen::NumTraits<Scalar>::epsilon())\n      {\n        eivecs.setIdentity();\n      }\n      else\n      {\n        scaledMat.diagonal().array () -= eivals(1);\n        Scalar a2 = numext::abs2(scaledMat(0,0));\n        Scalar c2 = numext::abs2(scaledMat(1,1));\n        Scalar b2 = numext::abs2(scaledMat(1,0));\n        if(a2>c2)\n        {\n          eivecs.col(1) << -scaledMat(1,0), scaledMat(0,0);\n          eivecs.col(1) /= sqrt(a2+b2);\n        }\n        else\n        {\n          eivecs.col(1) << -scaledMat(1,1), scaledMat(1,0);\n          eivecs.col(1) /= sqrt(c2+b2);\n        }\n\n        eivecs.col(0) << eivecs.col(1).unitOrthogonal();\n      }\n    }\n\n    // Rescale back to the original size.\n    eivals *= scale;\n    eivals.array() += shift;\n\n    solver.m_info = Success;\n    solver.m_isInitialized = true;\n    solver.m_eigenvectorsOk = computeEigenvectors;\n  }\n};\n\n}\n\ntemplate<typename MatrixType>\nEIGEN_DEVICE_FUNC\nSelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>\n::computeDirect(const MatrixType& matrix, int options)\n{\n  internal::direct_selfadjoint_eigenvalues<SelfAdjointEigenSolver,Size,NumTraits<Scalar>::IsComplex>::run(*this,matrix,options);\n  return *this;\n}\n\nnamespace internal {\n\n// Francis implicit QR step.\ntemplate<int StorageOrder,typename RealScalar, typename Scalar, typename Index>\nEIGEN_DEVICE_FUNC\nstatic void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n)\n{\n  // Wilkinson Shift.\n  RealScalar td = (diag[end-1] - diag[end])*RealScalar(0.5);\n  RealScalar e = subdiag[end-1];\n  // Note that thanks to scaling, e^2 or td^2 cannot overflow, however they can still\n  // underflow thus leading to inf/NaN values when using the following commented code:\n  //   RealScalar e2 = numext::abs2(subdiag[end-1]);\n  //   RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2));\n  // This explain the following, somewhat more complicated, version:\n  RealScalar mu = diag[end];\n  if(td==RealScalar(0)) {\n    mu -= numext::abs(e);\n  } else if (e != RealScalar(0)) {\n    const RealScalar e2 = numext::abs2(e);\n    const RealScalar h = numext::hypot(td,e);\n    if(e2 == RealScalar(0)) {\n      mu -= e / ((td + (td>RealScalar(0) ? h : -h)) / e);\n    } else {\n      mu -= e2 / (td + (td>RealScalar(0) ? h : -h)); \n    }\n  }\n\n  RealScalar x = diag[start] - mu;\n  RealScalar z = subdiag[start];\n  // If z ever becomes zero, the Givens rotation will be the identity and\n  // z will stay zero for all future iterations.\n  for (Index k = start; k < end && z != RealScalar(0); ++k)\n  {\n    JacobiRotation<RealScalar> rot;\n    rot.makeGivens(x, z);\n\n    // do T = G' T G\n    RealScalar sdk = rot.s() * diag[k] + rot.c() * subdiag[k];\n    RealScalar dkp1 = rot.s() * subdiag[k] + rot.c() * diag[k+1];\n\n    diag[k] = rot.c() * (rot.c() * diag[k] - rot.s() * subdiag[k]) - rot.s() * (rot.c() * subdiag[k] - rot.s() * diag[k+1]);\n    diag[k+1] = rot.s() * sdk + rot.c() * dkp1;\n    subdiag[k] = rot.c() * sdk - rot.s() * dkp1;\n    \n    if (k > start)\n      subdiag[k - 1] = rot.c() * subdiag[k-1] - rot.s() * z;\n\n    // \"Chasing the bulge\" to return to triangular form.\n    x = subdiag[k];\n    if (k < end - 1)\n    {\n      z = -rot.s() * subdiag[k+1];\n      subdiag[k + 1] = rot.c() * subdiag[k+1];\n    }\n    \n    // apply the givens rotation to the unit matrix Q = Q * G\n    if (matrixQ)\n    {\n      // FIXME if StorageOrder == RowMajor this operation is not very efficient\n      Map<Matrix<Scalar,Dynamic,Dynamic,StorageOrder> > q(matrixQ,n,n);\n      q.applyOnTheRight(k,k+1,rot);\n    }\n  }\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SELFADJOINTEIGENSOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *    Self-adjoint eigenvalues/eigenvectors.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_SAEIGENSOLVER_LAPACKE_H\n#define EIGEN_SAEIGENSOLVER_LAPACKE_H\n\nnamespace Eigen { \n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \\\ntemplate<> template<typename InputType> inline \\\nSelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \\\nSelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \\\n{ \\\n  eigen_assert(matrix.cols() == matrix.rows()); \\\n  eigen_assert((options&~(EigVecMask|GenEigMask))==0 \\\n          && (options&EigVecMask)!=EigVecMask \\\n          && \"invalid option parameter\"); \\\n  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \\\n  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \\\n  m_eivalues.resize(n,1); \\\n  m_subdiag.resize(n-1); \\\n  m_eivec = matrix; \\\n\\\n  if(n==1) \\\n  { \\\n    m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \\\n    if(computeEigenvectors) m_eivec.setOnes(n,n); \\\n    m_info = Success; \\\n    m_isInitialized = true; \\\n    m_eigenvectorsOk = computeEigenvectors; \\\n    return *this; \\\n  } \\\n\\\n  lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \\\n  char jobz, uplo='L'/*, range='A'*/; \\\n  jobz = computeEigenvectors ? 'V' : 'N'; \\\n\\\n  info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \\\n  m_info = (info==0) ? Success : NoConvergence; \\\n  m_isInitialized = true; \\\n  m_eigenvectorsOk = computeEigenvectors; \\\n  return *this; \\\n}\n\n#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME )              \\\n        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor )  \\\n        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor ) \n\nEIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev)\nEIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev)\nEIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)\nEIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev)\n\n} // end namespace Eigen\n\n#endif // EIGEN_SAEIGENSOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Eigenvalues/Tridiagonalization.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRIDIAGONALIZATION_H\n#define EIGEN_TRIDIAGONALIZATION_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename MatrixType> struct TridiagonalizationMatrixTReturnType;\ntemplate<typename MatrixType>\nstruct traits<TridiagonalizationMatrixTReturnType<MatrixType> >\n  : public traits<typename MatrixType::PlainObject>\n{\n  typedef typename MatrixType::PlainObject ReturnType; // FIXME shall it be a BandMatrix?\n  enum { Flags = 0 };\n};\n\ntemplate<typename MatrixType, typename CoeffVectorType>\nEIGEN_DEVICE_FUNC\nvoid tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs);\n}\n\n/** \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  *\n  * \\class Tridiagonalization\n  *\n  * \\brief Tridiagonal decomposition of a selfadjoint matrix\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the\n  * tridiagonal decomposition; this is expected to be an instantiation of the\n  * Matrix class template.\n  *\n  * This class performs a tridiagonal decomposition of a selfadjoint matrix \\f$ A \\f$ such that:\n  * \\f$ A = Q T Q^* \\f$ where \\f$ Q \\f$ is unitary and \\f$ T \\f$ a real symmetric tridiagonal matrix.\n  *\n  * A tridiagonal matrix is a matrix which has nonzero elements only on the\n  * main diagonal and the first diagonal below and above it. The Hessenberg\n  * decomposition of a selfadjoint matrix is in fact a tridiagonal\n  * decomposition. This class is used in SelfAdjointEigenSolver to compute the\n  * eigenvalues and eigenvectors of a selfadjoint matrix.\n  *\n  * Call the function compute() to compute the tridiagonal decomposition of a\n  * given matrix. Alternatively, you can use the Tridiagonalization(const MatrixType&)\n  * constructor which computes the tridiagonal Schur decomposition at\n  * construction time. Once the decomposition is computed, you can use the\n  * matrixQ() and matrixT() functions to retrieve the matrices Q and T in the\n  * decomposition.\n  *\n  * The documentation of Tridiagonalization(const MatrixType&) contains an\n  * example of the typical use of this class.\n  *\n  * \\sa class HessenbergDecomposition, class SelfAdjointEigenSolver\n  */\ntemplate<typename _MatrixType> class Tridiagonalization\n{\n  public:\n\n    /** \\brief Synonym for the template parameter \\p _MatrixType. */\n    typedef _MatrixType MatrixType;\n\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n\n    enum {\n      Size = MatrixType::RowsAtCompileTime,\n      SizeMinusOne = Size == Dynamic ? Dynamic : (Size > 1 ? Size - 1 : 1),\n      Options = MatrixType::Options,\n      MaxSize = MatrixType::MaxRowsAtCompileTime,\n      MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1)\n    };\n\n    typedef Matrix<Scalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> CoeffVectorType;\n    typedef typename internal::plain_col_type<MatrixType, RealScalar>::type DiagonalType;\n    typedef Matrix<RealScalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> SubDiagonalType;\n    typedef typename internal::remove_all<typename MatrixType::RealReturnType>::type MatrixTypeRealView;\n    typedef internal::TridiagonalizationMatrixTReturnType<MatrixTypeRealView> MatrixTReturnType;\n\n    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n              typename internal::add_const_on_value_type<typename Diagonal<const MatrixType>::RealReturnType>::type,\n              const Diagonal<const MatrixType>\n            >::type DiagonalReturnType;\n\n    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n              typename internal::add_const_on_value_type<typename Diagonal<const MatrixType, -1>::RealReturnType>::type,\n              const Diagonal<const MatrixType, -1>\n            >::type SubDiagonalReturnType;\n\n    /** \\brief Return type of matrixQ() */\n    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename CoeffVectorType::ConjugateReturnType>::type> HouseholderSequenceType;\n\n    /** \\brief Default constructor.\n      *\n      * \\param [in]  size  Positive integer, size of the matrix whose tridiagonal\n      * decomposition will be computed.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via compute().  The \\p size parameter is only\n      * used as a hint. It is not an error to give a wrong \\p size, but it may\n      * impair performance.\n      *\n      * \\sa compute() for an example.\n      */\n    explicit Tridiagonalization(Index size = Size==Dynamic ? 2 : Size)\n      : m_matrix(size,size),\n        m_hCoeffs(size > 1 ? size-1 : 1),\n        m_isInitialized(false)\n    {}\n\n    /** \\brief Constructor; computes tridiagonal decomposition of given matrix.\n      *\n      * \\param[in]  matrix  Selfadjoint matrix whose tridiagonal decomposition\n      * is to be computed.\n      *\n      * This constructor calls compute() to compute the tridiagonal decomposition.\n      *\n      * Example: \\include Tridiagonalization_Tridiagonalization_MatrixType.cpp\n      * Output: \\verbinclude Tridiagonalization_Tridiagonalization_MatrixType.out\n      */\n    template<typename InputType>\n    explicit Tridiagonalization(const EigenBase<InputType>& matrix)\n      : m_matrix(matrix.derived()),\n        m_hCoeffs(matrix.cols() > 1 ? matrix.cols()-1 : 1),\n        m_isInitialized(false)\n    {\n      internal::tridiagonalization_inplace(m_matrix, m_hCoeffs);\n      m_isInitialized = true;\n    }\n\n    /** \\brief Computes tridiagonal decomposition of given matrix.\n      *\n      * \\param[in]  matrix  Selfadjoint matrix whose tridiagonal decomposition\n      * is to be computed.\n      * \\returns    Reference to \\c *this\n      *\n      * The tridiagonal decomposition is computed by bringing the columns of\n      * the matrix successively in the required form using Householder\n      * reflections. The cost is \\f$ 4n^3/3 \\f$ flops, where \\f$ n \\f$ denotes\n      * the size of the given matrix.\n      *\n      * This method reuses of the allocated data in the Tridiagonalization\n      * object, if the size of the matrix does not change.\n      *\n      * Example: \\include Tridiagonalization_compute.cpp\n      * Output: \\verbinclude Tridiagonalization_compute.out\n      */\n    template<typename InputType>\n    Tridiagonalization& compute(const EigenBase<InputType>& matrix)\n    {\n      m_matrix = matrix.derived();\n      m_hCoeffs.resize(matrix.rows()-1, 1);\n      internal::tridiagonalization_inplace(m_matrix, m_hCoeffs);\n      m_isInitialized = true;\n      return *this;\n    }\n\n    /** \\brief Returns the Householder coefficients.\n      *\n      * \\returns a const reference to the vector of Householder coefficients\n      *\n      * \\pre Either the constructor Tridiagonalization(const MatrixType&) or\n      * the member function compute(const MatrixType&) has been called before\n      * to compute the tridiagonal decomposition of a matrix.\n      *\n      * The Householder coefficients allow the reconstruction of the matrix\n      * \\f$ Q \\f$ in the tridiagonal decomposition from the packed data.\n      *\n      * Example: \\include Tridiagonalization_householderCoefficients.cpp\n      * Output: \\verbinclude Tridiagonalization_householderCoefficients.out\n      *\n      * \\sa packedMatrix(), \\ref Householder_Module \"Householder module\"\n      */\n    inline CoeffVectorType householderCoefficients() const\n    {\n      eigen_assert(m_isInitialized && \"Tridiagonalization is not initialized.\");\n      return m_hCoeffs;\n    }\n\n    /** \\brief Returns the internal representation of the decomposition\n      *\n      *\t\\returns a const reference to a matrix with the internal representation\n      *\t         of the decomposition.\n      *\n      * \\pre Either the constructor Tridiagonalization(const MatrixType&) or\n      * the member function compute(const MatrixType&) has been called before\n      * to compute the tridiagonal decomposition of a matrix.\n      *\n      * The returned matrix contains the following information:\n      *  - the strict upper triangular part is equal to the input matrix A.\n      *  - the diagonal and lower sub-diagonal represent the real tridiagonal\n      *    symmetric matrix T.\n      *  - the rest of the lower part contains the Householder vectors that,\n      *    combined with Householder coefficients returned by\n      *    householderCoefficients(), allows to reconstruct the matrix Q as\n      *       \\f$ Q = H_{N-1} \\ldots H_1 H_0 \\f$.\n      *    Here, the matrices \\f$ H_i \\f$ are the Householder transformations\n      *       \\f$ H_i = (I - h_i v_i v_i^T) \\f$\n      *    where \\f$ h_i \\f$ is the \\f$ i \\f$th Householder coefficient and\n      *    \\f$ v_i \\f$ is the Householder vector defined by\n      *       \\f$ v_i = [ 0, \\ldots, 0, 1, M(i+2,i), \\ldots, M(N-1,i) ]^T \\f$\n      *    with M the matrix returned by this function.\n      *\n      * See LAPACK for further details on this packed storage.\n      *\n      * Example: \\include Tridiagonalization_packedMatrix.cpp\n      * Output: \\verbinclude Tridiagonalization_packedMatrix.out\n      *\n      * \\sa householderCoefficients()\n      */\n    inline const MatrixType& packedMatrix() const\n    {\n      eigen_assert(m_isInitialized && \"Tridiagonalization is not initialized.\");\n      return m_matrix;\n    }\n\n    /** \\brief Returns the unitary matrix Q in the decomposition\n      *\n      * \\returns object representing the matrix Q\n      *\n      * \\pre Either the constructor Tridiagonalization(const MatrixType&) or\n      * the member function compute(const MatrixType&) has been called before\n      * to compute the tridiagonal decomposition of a matrix.\n      *\n      * This function returns a light-weight object of template class\n      * HouseholderSequence. You can either apply it directly to a matrix or\n      * you can convert it to a matrix of type #MatrixType.\n      *\n      * \\sa Tridiagonalization(const MatrixType&) for an example,\n      *     matrixT(), class HouseholderSequence\n      */\n    HouseholderSequenceType matrixQ() const\n    {\n      eigen_assert(m_isInitialized && \"Tridiagonalization is not initialized.\");\n      return HouseholderSequenceType(m_matrix, m_hCoeffs.conjugate())\n             .setLength(m_matrix.rows() - 1)\n             .setShift(1);\n    }\n\n    /** \\brief Returns an expression of the tridiagonal matrix T in the decomposition\n      *\n      * \\returns expression object representing the matrix T\n      *\n      * \\pre Either the constructor Tridiagonalization(const MatrixType&) or\n      * the member function compute(const MatrixType&) has been called before\n      * to compute the tridiagonal decomposition of a matrix.\n      *\n      * Currently, this function can be used to extract the matrix T from internal\n      * data and copy it to a dense matrix object. In most cases, it may be\n      * sufficient to directly use the packed matrix or the vector expressions\n      * returned by diagonal() and subDiagonal() instead of creating a new\n      * dense copy matrix with this function.\n      *\n      * \\sa Tridiagonalization(const MatrixType&) for an example,\n      * matrixQ(), packedMatrix(), diagonal(), subDiagonal()\n      */\n    MatrixTReturnType matrixT() const\n    {\n      eigen_assert(m_isInitialized && \"Tridiagonalization is not initialized.\");\n      return MatrixTReturnType(m_matrix.real());\n    }\n\n    /** \\brief Returns the diagonal of the tridiagonal matrix T in the decomposition.\n      *\n      * \\returns expression representing the diagonal of T\n      *\n      * \\pre Either the constructor Tridiagonalization(const MatrixType&) or\n      * the member function compute(const MatrixType&) has been called before\n      * to compute the tridiagonal decomposition of a matrix.\n      *\n      * Example: \\include Tridiagonalization_diagonal.cpp\n      * Output: \\verbinclude Tridiagonalization_diagonal.out\n      *\n      * \\sa matrixT(), subDiagonal()\n      */\n    DiagonalReturnType diagonal() const;\n\n    /** \\brief Returns the subdiagonal of the tridiagonal matrix T in the decomposition.\n      *\n      * \\returns expression representing the subdiagonal of T\n      *\n      * \\pre Either the constructor Tridiagonalization(const MatrixType&) or\n      * the member function compute(const MatrixType&) has been called before\n      * to compute the tridiagonal decomposition of a matrix.\n      *\n      * \\sa diagonal() for an example, matrixT()\n      */\n    SubDiagonalReturnType subDiagonal() const;\n\n  protected:\n\n    MatrixType m_matrix;\n    CoeffVectorType m_hCoeffs;\n    bool m_isInitialized;\n};\n\ntemplate<typename MatrixType>\ntypename Tridiagonalization<MatrixType>::DiagonalReturnType\nTridiagonalization<MatrixType>::diagonal() const\n{\n  eigen_assert(m_isInitialized && \"Tridiagonalization is not initialized.\");\n  return m_matrix.diagonal().real();\n}\n\ntemplate<typename MatrixType>\ntypename Tridiagonalization<MatrixType>::SubDiagonalReturnType\nTridiagonalization<MatrixType>::subDiagonal() const\n{\n  eigen_assert(m_isInitialized && \"Tridiagonalization is not initialized.\");\n  return m_matrix.template diagonal<-1>().real();\n}\n\nnamespace internal {\n\n/** \\internal\n  * Performs a tridiagonal decomposition of the selfadjoint matrix \\a matA in-place.\n  *\n  * \\param[in,out] matA On input the selfadjoint matrix. Only the \\b lower triangular part is referenced.\n  *                     On output, the strict upper part is left unchanged, and the lower triangular part\n  *                     represents the T and Q matrices in packed format has detailed below.\n  * \\param[out]    hCoeffs returned Householder coefficients (see below)\n  *\n  * On output, the tridiagonal selfadjoint matrix T is stored in the diagonal\n  * and lower sub-diagonal of the matrix \\a matA.\n  * The unitary matrix Q is represented in a compact way as a product of\n  * Householder reflectors \\f$ H_i \\f$ such that:\n  *       \\f$ Q = H_{N-1} \\ldots H_1 H_0 \\f$.\n  * The Householder reflectors are defined as\n  *       \\f$ H_i = (I - h_i v_i v_i^T) \\f$\n  * where \\f$ h_i = hCoeffs[i]\\f$ is the \\f$ i \\f$th Householder coefficient and\n  * \\f$ v_i \\f$ is the Householder vector defined by\n  *       \\f$ v_i = [ 0, \\ldots, 0, 1, matA(i+2,i), \\ldots, matA(N-1,i) ]^T \\f$.\n  *\n  * Implemented from Golub's \"Matrix Computations\", algorithm 8.3.1.\n  *\n  * \\sa Tridiagonalization::packedMatrix()\n  */\ntemplate<typename MatrixType, typename CoeffVectorType>\nEIGEN_DEVICE_FUNC\nvoid tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)\n{\n  using numext::conj;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  Index n = matA.rows();\n  eigen_assert(n==matA.cols());\n  eigen_assert(n==hCoeffs.size()+1 || n==1);\n\n  for (Index i = 0; i<n-1; ++i)\n  {\n    Index remainingSize = n-i-1;\n    RealScalar beta;\n    Scalar h;\n    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);\n\n    // Apply similarity transformation to remaining columns,\n    // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1)\n    matA.col(i).coeffRef(i+1) = 1;\n\n    hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>()\n                                  * (conj(h) * matA.col(i).tail(remainingSize)));\n\n    hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);\n\n    matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>()\n      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1));\n\n    matA.col(i).coeffRef(i+1) = beta;\n    hCoeffs.coeffRef(i) = h;\n  }\n}\n\n// forward declaration, implementation at the end of this file\ntemplate<typename MatrixType,\n         int Size=MatrixType::ColsAtCompileTime,\n         bool IsComplex=NumTraits<typename MatrixType::Scalar>::IsComplex>\nstruct tridiagonalization_inplace_selector;\n\n/** \\brief Performs a full tridiagonalization in place\n  *\n  * \\param[in,out]  mat  On input, the selfadjoint matrix whose tridiagonal\n  *    decomposition is to be computed. Only the lower triangular part referenced.\n  *    The rest is left unchanged. On output, the orthogonal matrix Q\n  *    in the decomposition if \\p extractQ is true.\n  * \\param[out]  diag  The diagonal of the tridiagonal matrix T in the\n  *    decomposition.\n  * \\param[out]  subdiag  The subdiagonal of the tridiagonal matrix T in\n  *    the decomposition.\n  * \\param[in]  extractQ  If true, the orthogonal matrix Q in the\n  *    decomposition is computed and stored in \\p mat.\n  *\n  * Computes the tridiagonal decomposition of the selfadjoint matrix \\p mat in place\n  * such that \\f$ mat = Q T Q^* \\f$ where \\f$ Q \\f$ is unitary and \\f$ T \\f$ a real\n  * symmetric tridiagonal matrix.\n  *\n  * The tridiagonal matrix T is passed to the output parameters \\p diag and \\p subdiag. If\n  * \\p extractQ is true, then the orthogonal matrix Q is passed to \\p mat. Otherwise the lower\n  * part of the matrix \\p mat is destroyed.\n  *\n  * The vectors \\p diag and \\p subdiag are not resized. The function\n  * assumes that they are already of the correct size. The length of the\n  * vector \\p diag should equal the number of rows in \\p mat, and the\n  * length of the vector \\p subdiag should be one left.\n  *\n  * This implementation contains an optimized path for 3-by-3 matrices\n  * which is especially useful for plane fitting.\n  *\n  * \\note Currently, it requires two temporary vectors to hold the intermediate\n  * Householder coefficients, and to reconstruct the matrix Q from the Householder\n  * reflectors.\n  *\n  * Example (this uses the same matrix as the example in\n  *    Tridiagonalization::Tridiagonalization(const MatrixType&)):\n  *    \\include Tridiagonalization_decomposeInPlace.cpp\n  * Output: \\verbinclude Tridiagonalization_decomposeInPlace.out\n  *\n  * \\sa class Tridiagonalization\n  */\ntemplate<typename MatrixType, typename DiagonalType, typename SubDiagonalType, typename CoeffVectorType>\nEIGEN_DEVICE_FUNC\nvoid tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag,\n                                CoeffVectorType& hcoeffs, bool extractQ)\n{\n  eigen_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);\n  tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, hcoeffs, extractQ);\n}\n\n/** \\internal\n  * General full tridiagonalization\n  */\ntemplate<typename MatrixType, int Size, bool IsComplex>\nstruct tridiagonalization_inplace_selector\n{\n  typedef typename Tridiagonalization<MatrixType>::CoeffVectorType CoeffVectorType;\n  typedef typename Tridiagonalization<MatrixType>::HouseholderSequenceType HouseholderSequenceType;\n  template<typename DiagonalType, typename SubDiagonalType>\n  static EIGEN_DEVICE_FUNC\n      void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType& hCoeffs, bool extractQ)\n  {\n    tridiagonalization_inplace(mat, hCoeffs);\n    diag = mat.diagonal().real();\n    subdiag = mat.template diagonal<-1>().real();\n    if(extractQ)\n      mat = HouseholderSequenceType(mat, hCoeffs.conjugate())\n            .setLength(mat.rows() - 1)\n            .setShift(1);\n  }\n};\n\n/** \\internal\n  * Specialization for 3x3 real matrices.\n  * Especially useful for plane fitting.\n  */\ntemplate<typename MatrixType>\nstruct tridiagonalization_inplace_selector<MatrixType,3,false>\n{\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n\n  template<typename DiagonalType, typename SubDiagonalType, typename CoeffVectorType>\n  static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType&, bool extractQ)\n  {\n    using std::sqrt;\n    const RealScalar tol = (std::numeric_limits<RealScalar>::min)();\n    diag[0] = mat(0,0);\n    RealScalar v1norm2 = numext::abs2(mat(2,0));\n    if(v1norm2 <= tol)\n    {\n      diag[1] = mat(1,1);\n      diag[2] = mat(2,2);\n      subdiag[0] = mat(1,0);\n      subdiag[1] = mat(2,1);\n      if (extractQ)\n        mat.setIdentity();\n    }\n    else\n    {\n      RealScalar beta = sqrt(numext::abs2(mat(1,0)) + v1norm2);\n      RealScalar invBeta = RealScalar(1)/beta;\n      Scalar m01 = mat(1,0) * invBeta;\n      Scalar m02 = mat(2,0) * invBeta;\n      Scalar q = RealScalar(2)*m01*mat(2,1) + m02*(mat(2,2) - mat(1,1));\n      diag[1] = mat(1,1) + m02*q;\n      diag[2] = mat(2,2) - m02*q;\n      subdiag[0] = beta;\n      subdiag[1] = mat(2,1) - m01 * q;\n      if (extractQ)\n      {\n        mat << 1,   0,    0,\n               0, m01,  m02,\n               0, m02, -m01;\n      }\n    }\n  }\n};\n\n/** \\internal\n  * Trivial specialization for 1x1 matrices\n  */\ntemplate<typename MatrixType, bool IsComplex>\nstruct tridiagonalization_inplace_selector<MatrixType,1,IsComplex>\n{\n  typedef typename MatrixType::Scalar Scalar;\n\n  template<typename DiagonalType, typename SubDiagonalType, typename CoeffVectorType>\n  static EIGEN_DEVICE_FUNC\n  void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType&, CoeffVectorType&, bool extractQ)\n  {\n    diag(0,0) = numext::real(mat(0,0));\n    if(extractQ)\n      mat(0,0) = Scalar(1);\n  }\n};\n\n/** \\internal\n  * \\eigenvalues_module \\ingroup Eigenvalues_Module\n  *\n  * \\brief Expression type for return value of Tridiagonalization::matrixT()\n  *\n  * \\tparam MatrixType type of underlying dense matrix\n  */\ntemplate<typename MatrixType> struct TridiagonalizationMatrixTReturnType\n: public ReturnByValue<TridiagonalizationMatrixTReturnType<MatrixType> >\n{\n  public:\n    /** \\brief Constructor.\n      *\n      * \\param[in] mat The underlying dense matrix\n      */\n    TridiagonalizationMatrixTReturnType(const MatrixType& mat) : m_matrix(mat) { }\n\n    template <typename ResultType>\n    inline void evalTo(ResultType& result) const\n    {\n      result.setZero();\n      result.template diagonal<1>() = m_matrix.template diagonal<-1>().conjugate();\n      result.diagonal() = m_matrix.diagonal();\n      result.template diagonal<-1>() = m_matrix.template diagonal<-1>();\n    }\n\n    EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); }\n    EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n  protected:\n    typename MatrixType::Nested m_matrix;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRIDIAGONALIZATION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/AlignedBox.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n// Function void Eigen::AlignedBox::transform(const Transform& transform)\n// is provided under the following license agreement:\n//\n// Software License Agreement (BSD License)\n//\n// Copyright (c) 2011-2014, Willow Garage, Inc.\n// Copyright (c) 2014-2015, Open Source Robotics Foundation\n// All rights reserved.\n//\n// Redistribution and use in source and binary forms, with or without\n// modification, are permitted provided that the following conditions\n// are met:\n//\n//  * Redistributions of source code must retain the above copyright\n//    notice, this list of conditions and the following disclaimer.\n//  * Redistributions in binary form must reproduce the above\n//    copyright notice, this list of conditions and the following\n//    disclaimer in the documentation and/or other materials provided\n//    with the distribution.\n//  * Neither the name of Open Source Robotics Foundation nor the names of its\n//    contributors may be used to endorse or promote products derived\n//    from this software without specific prior written permission.\n//\n// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n// \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\n// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\n// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\n// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\n// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n// POSSIBILITY OF SUCH DAMAGE.\n\n#ifndef EIGEN_ALIGNEDBOX_H\n#define EIGEN_ALIGNEDBOX_H\n\nnamespace Eigen {\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  *\n  * \\class AlignedBox\n  *\n  * \\brief An axis aligned box\n  *\n  * \\tparam _Scalar the type of the scalar coefficients\n  * \\tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.\n  *\n  * This class represents an axis aligned box as a pair of the minimal and maximal corners.\n  * \\warning The result of most methods is undefined when applied to an empty box. You can check for empty boxes using isEmpty().\n  * \\sa alignedboxtypedefs\n  */\ntemplate <typename _Scalar, int _AmbientDim>\nclass AlignedBox\n{\npublic:\nEIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)\n  enum { AmbientDimAtCompileTime = _AmbientDim };\n  typedef _Scalar                                   Scalar;\n  typedef NumTraits<Scalar>                         ScalarTraits;\n  typedef Eigen::Index                              Index; ///< \\deprecated since Eigen 3.3\n  typedef typename ScalarTraits::Real               RealScalar;\n  typedef typename ScalarTraits::NonInteger         NonInteger;\n  typedef Matrix<Scalar,AmbientDimAtCompileTime,1>  VectorType;\n  typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const VectorType, const VectorType> VectorTypeSum;\n\n  /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */\n  enum CornerType\n  {\n    /** 1D names @{ */\n    Min=0, Max=1,\n    /** @} */\n\n    /** Identifier for 2D corner @{ */\n    BottomLeft=0, BottomRight=1,\n    TopLeft=2, TopRight=3,\n    /** @} */\n\n    /** Identifier for 3D corner  @{ */\n    BottomLeftFloor=0, BottomRightFloor=1,\n    TopLeftFloor=2, TopRightFloor=3,\n    BottomLeftCeil=4, BottomRightCeil=5,\n    TopLeftCeil=6, TopRightCeil=7\n    /** @} */\n  };\n\n\n  /** Default constructor initializing a null box. */\n  EIGEN_DEVICE_FUNC inline AlignedBox()\n  { if (EIGEN_CONST_CONDITIONAL(AmbientDimAtCompileTime!=Dynamic)) setEmpty(); }\n\n  /** Constructs a null box with \\a _dim the dimension of the ambient space. */\n  EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)\n  { setEmpty(); }\n\n  /** Constructs a box with extremities \\a _min and \\a _max.\n   * \\warning If either component of \\a _min is larger than the same component of \\a _max, the constructed box is empty. */\n  template<typename OtherVectorType1, typename OtherVectorType2>\n  EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}\n\n  /** Constructs a box containing a single point \\a p. */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)\n  { }\n\n  EIGEN_DEVICE_FUNC ~AlignedBox() {}\n\n  /** \\returns the dimension in which the box holds */\n  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }\n\n  /** \\deprecated use isEmpty() */\n  EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); }\n\n  /** \\deprecated use setEmpty() */\n  EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); }\n\n  /** \\returns true if the box is empty.\n   * \\sa setEmpty */\n  EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }\n\n  /** Makes \\c *this an empty box.\n   * \\sa isEmpty */\n  EIGEN_DEVICE_FUNC inline void setEmpty()\n  {\n    m_min.setConstant( ScalarTraits::highest() );\n    m_max.setConstant( ScalarTraits::lowest() );\n  }\n\n  /** \\returns the minimal corner */\n  EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; }\n  /** \\returns a non const reference to the minimal corner */\n  EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; }\n  /** \\returns the maximal corner */\n  EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; }\n  /** \\returns a non const reference to the maximal corner */\n  EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; }\n\n  /** \\returns the center of the box */\n  EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)\n  center() const\n  { return (m_min+m_max)/RealScalar(2); }\n\n  /** \\returns the lengths of the sides of the bounding box.\n    * Note that this function does not get the same\n    * result for integral or floating scalar types: see\n    */\n  EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const\n  { return m_max - m_min; }\n\n  /** \\returns the volume of the bounding box */\n  EIGEN_DEVICE_FUNC inline Scalar volume() const\n  { return sizes().prod(); }\n\n  /** \\returns an expression for the bounding box diagonal vector\n    * if the length of the diagonal is needed: diagonal().norm()\n    * will provide it.\n    */\n  EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const\n  { return sizes(); }\n\n  /** \\returns the vertex of the bounding box at the corner defined by\n    * the corner-id corner. It works only for a 1D, 2D or 3D bounding box.\n    * For 1D bounding boxes corners are named by 2 enum constants:\n    * BottomLeft and BottomRight.\n    * For 2D bounding boxes, corners are named by 4 enum constants:\n    * BottomLeft, BottomRight, TopLeft, TopRight.\n    * For 3D bounding boxes, the following names are added:\n    * BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil.\n    */\n  EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const\n  {\n    EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);\n\n    VectorType res;\n\n    Index mult = 1;\n    for(Index d=0; d<dim(); ++d)\n    {\n      if( mult & corner ) res[d] = m_max[d];\n      else                res[d] = m_min[d];\n      mult *= 2;\n    }\n    return res;\n  }\n\n  /** \\returns a random point inside the bounding box sampled with\n   * a uniform distribution */\n  EIGEN_DEVICE_FUNC inline VectorType sample() const\n  {\n    VectorType r(dim());\n    for(Index d=0; d<dim(); ++d)\n    {\n      if(!ScalarTraits::IsInteger)\n      {\n        r[d] = m_min[d] + (m_max[d]-m_min[d])\n             * internal::random<Scalar>(Scalar(0), Scalar(1));\n      }\n      else\n        r[d] = internal::random(m_min[d], m_max[d]);\n    }\n    return r;\n  }\n\n  /** \\returns true if the point \\a p is inside the box \\c *this. */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase<Derived>& p) const\n  {\n    typename internal::nested_eval<Derived,2>::type p_n(p.derived());\n    return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();\n  }\n\n  /** \\returns true if the box \\a b is entirely inside the box \\c *this. */\n  EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const\n  { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }\n\n  /** \\returns true if the box \\a b is intersecting the box \\c *this.\n   * \\sa intersection, clamp */\n  EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const\n  { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }\n\n  /** Extends \\c *this such that it contains the point \\a p and returns a reference to \\c *this.\n   * \\sa extend(const AlignedBox&) */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase<Derived>& p)\n  {\n    typename internal::nested_eval<Derived,2>::type p_n(p.derived());\n    m_min = m_min.cwiseMin(p_n);\n    m_max = m_max.cwiseMax(p_n);\n    return *this;\n  }\n\n  /** Extends \\c *this such that it contains the box \\a b and returns a reference to \\c *this.\n   * \\sa merged, extend(const MatrixBase&) */\n  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b)\n  {\n    m_min = m_min.cwiseMin(b.m_min);\n    m_max = m_max.cwiseMax(b.m_max);\n    return *this;\n  }\n\n  /** Clamps \\c *this by the box \\a b and returns a reference to \\c *this.\n   * \\note If the boxes don't intersect, the resulting box is empty.\n   * \\sa intersection(), intersects() */\n  EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b)\n  {\n    m_min = m_min.cwiseMax(b.m_min);\n    m_max = m_max.cwiseMin(b.m_max);\n    return *this;\n  }\n\n  /** Returns an AlignedBox that is the intersection of \\a b and \\c *this\n   * \\note If the boxes don't intersect, the resulting box is empty.\n   * \\sa intersects(), clamp, contains()  */\n  EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const\n  {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }\n\n  /** Returns an AlignedBox that is the union of \\a b and \\c *this.\n   * \\note Merging with an empty box may result in a box bigger than \\c *this.\n   * \\sa extend(const AlignedBox&) */\n  EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const\n  { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }\n\n  /** Translate \\c *this by the vector \\a t and returns a reference to \\c *this. */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase<Derived>& a_t)\n  {\n    const typename internal::nested_eval<Derived,2>::type t(a_t.derived());\n    m_min += t;\n    m_max += t;\n    return *this;\n  }\n\n  /** \\returns a copy of \\c *this translated by the vector \\a t. */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline AlignedBox translated(const MatrixBase<Derived>& a_t) const\n  {\n    AlignedBox result(m_min, m_max);\n    result.translate(a_t);\n    return result;\n  }\n\n  /** \\returns the squared distance between the point \\a p and the box \\c *this,\n    * and zero if \\a p is inside the box.\n    * \\sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)\n    */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;\n\n  /** \\returns the squared distance between the boxes \\a b and \\c *this,\n    * and zero if the boxes intersect.\n    * \\sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)\n    */\n  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const;\n\n  /** \\returns the distance between the point \\a p and the box \\c *this,\n    * and zero if \\a p is inside the box.\n    * \\sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)\n    */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const\n  { EIGEN_USING_STD(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); }\n\n  /** \\returns the distance between the boxes \\a b and \\c *this,\n    * and zero if the boxes intersect.\n    * \\sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)\n    */\n  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const\n  { EIGEN_USING_STD(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); }\n\n  /**\n   * Specialization of transform for pure translation.\n   */\n  template<int Mode, int Options>\n  EIGEN_DEVICE_FUNC inline void transform(\n      const typename Transform<Scalar, AmbientDimAtCompileTime, Mode, Options>::TranslationType& translation)\n  {\n    this->translate(translation);\n  }\n\n  /**\n   * Transforms this box by \\a transform and recomputes it to\n   * still be an axis-aligned box.\n   *\n   * \\note This method is provided under BSD license (see the top of this file).\n   */\n  template<int Mode, int Options>\n  EIGEN_DEVICE_FUNC inline void transform(const Transform<Scalar, AmbientDimAtCompileTime, Mode, Options>& transform)\n  {\n    // Only Affine and Isometry transforms are currently supported.\n    EIGEN_STATIC_ASSERT(Mode == Affine || Mode == AffineCompact || Mode == Isometry, THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS);\n\n    // Method adapted from FCL src/shape/geometric_shapes_utility.cpp#computeBV<AABB, Box>(...)\n    // https://github.com/flexible-collision-library/fcl/blob/fcl-0.4/src/shape/geometric_shapes_utility.cpp#L292\n    //\n    // Here's a nice explanation why it works: https://zeuxcg.org/2010/10/17/aabb-from-obb-with-component-wise-abs/\n\n    // two times rotated extent\n    const VectorType rotated_extent_2 = transform.linear().cwiseAbs() * sizes();\n    // two times new center\n    const VectorType rotated_center_2 = transform.linear() * (this->m_max + this->m_min) +\n        Scalar(2) * transform.translation();\n\n    this->m_max = (rotated_center_2 + rotated_extent_2) / Scalar(2);\n    this->m_min = (rotated_center_2 - rotated_extent_2) / Scalar(2);\n  }\n\n  /**\n   * \\returns a copy of \\c *this transformed by \\a transform and recomputed to\n   * still be an axis-aligned box.\n   */\n  template<int Mode, int Options>\n  EIGEN_DEVICE_FUNC AlignedBox transformed(const Transform<Scalar, AmbientDimAtCompileTime, Mode, Options>& transform) const\n  {\n    AlignedBox result(m_min, m_max);\n    result.transform(transform);\n    return result;\n  }\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AlignedBox,\n           AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const\n  {\n    return typename internal::cast_return_type<AlignedBox,\n                    AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type(*this);\n  }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType>\n  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)\n  {\n    m_min = (other.min)().template cast<Scalar>();\n    m_max = (other.max)().template cast<Scalar>();\n  }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const\n  { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }\n\nprotected:\n\n  VectorType m_min, m_max;\n};\n\n\n\ntemplate<typename Scalar,int AmbientDim>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const\n{\n  typename internal::nested_eval<Derived,2*AmbientDim>::type p(a_p.derived());\n  Scalar dist2(0);\n  Scalar aux;\n  for (Index k=0; k<dim(); ++k)\n  {\n    if( m_min[k] > p[k] )\n    {\n      aux = m_min[k] - p[k];\n      dist2 += aux*aux;\n    }\n    else if( p[k] > m_max[k] )\n    {\n      aux = p[k] - m_max[k];\n      dist2 += aux*aux;\n    }\n  }\n  return dist2;\n}\n\ntemplate<typename Scalar,int AmbientDim>\nEIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const\n{\n  Scalar dist2(0);\n  Scalar aux;\n  for (Index k=0; k<dim(); ++k)\n  {\n    if( m_min[k] > b.m_max[k] )\n    {\n      aux = m_min[k] - b.m_max[k];\n      dist2 += aux*aux;\n    }\n    else if( b.m_min[k] > m_max[k] )\n    {\n      aux = b.m_min[k] - m_max[k];\n      dist2 += aux*aux;\n    }\n  }\n  return dist2;\n}\n\n/** \\defgroup alignedboxtypedefs Global aligned box typedefs\n  *\n  * \\ingroup Geometry_Module\n  *\n  * Eigen defines several typedef shortcuts for most common aligned box types.\n  *\n  * The general patterns are the following:\n  *\n  * \\c AlignedBoxSizeType where \\c Size can be \\c 1, \\c 2,\\c 3,\\c 4 for fixed size boxes or \\c X for dynamic size,\n  * and where \\c Type can be \\c i for integer, \\c f for float, \\c d for double.\n  *\n  * For example, \\c AlignedBox3d is a fixed-size 3x3 aligned box type of doubles, and \\c AlignedBoxXf is a dynamic-size aligned box of floats.\n  *\n  * \\sa class AlignedBox\n  */\n\n#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)    \\\n/** \\ingroup alignedboxtypedefs */                                 \\\ntypedef AlignedBox<Type, Size>   AlignedBox##SizeSuffix##TypeSuffix;\n\n#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 1, 1) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \\\nEIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X)\n\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(int,                  i)\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(float,                f)\nEIGEN_MAKE_TYPEDEFS_ALL_SIZES(double,               d)\n\n#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES\n#undef EIGEN_MAKE_TYPEDEFS\n\n} // end namespace Eigen\n\n#endif // EIGEN_ALIGNEDBOX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/AngleAxis.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ANGLEAXIS_H\n#define EIGEN_ANGLEAXIS_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class AngleAxis\n  *\n  * \\brief Represents a 3D rotation as a rotation angle around an arbitrary 3D axis\n  *\n  * \\param _Scalar the scalar type, i.e., the type of the coefficients.\n  *\n  * \\warning When setting up an AngleAxis object, the axis vector \\b must \\b be \\b normalized.\n  *\n  * The following two typedefs are provided for convenience:\n  * \\li \\c AngleAxisf for \\c float\n  * \\li \\c AngleAxisd for \\c double\n  *\n  * Combined with MatrixBase::Unit{X,Y,Z}, AngleAxis can be used to easily\n  * mimic Euler-angles. Here is an example:\n  * \\include AngleAxis_mimic_euler.cpp\n  * Output: \\verbinclude AngleAxis_mimic_euler.out\n  *\n  * \\note This class is not aimed to be used to store a rotation transformation,\n  * but rather to make easier the creation of other rotation (Quaternion, rotation Matrix)\n  * and transformation objects.\n  *\n  * \\sa class Quaternion, class Transform, MatrixBase::UnitX()\n  */\n\nnamespace internal {\ntemplate<typename _Scalar> struct traits<AngleAxis<_Scalar> >\n{\n  typedef _Scalar Scalar;\n};\n}\n\ntemplate<typename _Scalar>\nclass AngleAxis : public RotationBase<AngleAxis<_Scalar>,3>\n{\n  typedef RotationBase<AngleAxis<_Scalar>,3> Base;\n\npublic:\n\n  using Base::operator*;\n\n  enum { Dim = 3 };\n  /** the scalar type of the coefficients */\n  typedef _Scalar Scalar;\n  typedef Matrix<Scalar,3,3> Matrix3;\n  typedef Matrix<Scalar,3,1> Vector3;\n  typedef Quaternion<Scalar> QuaternionType;\n\nprotected:\n\n  Vector3 m_axis;\n  Scalar m_angle;\n\npublic:\n\n  /** Default constructor without initialization. */\n  EIGEN_DEVICE_FUNC AngleAxis() {}\n  /** Constructs and initialize the angle-axis rotation from an \\a angle in radian\n    * and an \\a axis which \\b must \\b be \\b normalized.\n    *\n    * \\warning If the \\a axis vector is not normalized, then the angle-axis object\n    *          represents an invalid rotation. */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC \n  inline AngleAxis(const Scalar& angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}\n  /** Constructs and initialize the angle-axis rotation from a quaternion \\a q.\n    * This function implicitly normalizes the quaternion \\a q.\n    */\n  template<typename QuatDerived> \n  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }\n  /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }\n\n  /** \\returns the value of the rotation angle in radian */\n  EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; }\n  /** \\returns a read-write reference to the stored angle in radian */\n  EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; }\n\n  /** \\returns the rotation axis */\n  EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; }\n  /** \\returns a read-write reference to the stored rotation axis.\n    *\n    * \\warning The rotation axis must remain a \\b unit vector.\n    */\n  EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; }\n\n  /** Concatenates two rotations */\n  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const\n  { return QuaternionType(*this) * QuaternionType(other); }\n\n  /** Concatenates two rotations */\n  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const\n  { return QuaternionType(*this) * other; }\n\n  /** Concatenates two rotations */\n  friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)\n  { return a * QuaternionType(b); }\n\n  /** \\returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */\n  EIGEN_DEVICE_FUNC AngleAxis inverse() const\n  { return AngleAxis(-m_angle, m_axis); }\n\n  template<class QuatDerived>\n  EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase<Derived>& m);\n\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);\n  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const;\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const\n  { return typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType>\n  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)\n  {\n    m_axis = other.axis().template cast<Scalar>();\n    m_angle = Scalar(other.angle());\n  }\n\n  EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); }\n};\n\n/** \\ingroup Geometry_Module\n  * single precision angle-axis type */\ntypedef AngleAxis<float> AngleAxisf;\n/** \\ingroup Geometry_Module\n  * double precision angle-axis type */\ntypedef AngleAxis<double> AngleAxisd;\n\n/** Set \\c *this from a \\b unit quaternion.\n  *\n  * The resulting axis is normalized, and the computed angle is in the [0,pi] range.\n  * \n  * This function implicitly normalizes the quaternion \\a q.\n  */\ntemplate<typename Scalar>\ntemplate<typename QuatDerived>\nEIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)\n{\n  EIGEN_USING_STD(atan2)\n  EIGEN_USING_STD(abs)\n  Scalar n = q.vec().norm();\n  if(n<NumTraits<Scalar>::epsilon())\n    n = q.vec().stableNorm();\n\n  if (n != Scalar(0))\n  {\n    m_angle = Scalar(2)*atan2(n, abs(q.w()));\n    if(q.w() < Scalar(0))\n      n = -n;\n    m_axis  = q.vec() / n;\n  }\n  else\n  {\n    m_angle = Scalar(0);\n    m_axis << Scalar(1), Scalar(0), Scalar(0);\n  }\n  return *this;\n}\n\n/** Set \\c *this from a 3x3 rotation matrix \\a mat.\n  */\ntemplate<typename Scalar>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)\n{\n  // Since a direct conversion would not be really faster,\n  // let's use the robust Quaternion implementation:\n  return *this = QuaternionType(mat);\n}\n\n/**\n* \\brief Sets \\c *this from a 3x3 rotation matrix.\n**/\ntemplate<typename Scalar>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)\n{\n  return *this = QuaternionType(mat);\n}\n\n/** Constructs and \\returns an equivalent 3x3 rotation matrix.\n  */\ntemplate<typename Scalar>\ntypename AngleAxis<Scalar>::Matrix3\nEIGEN_DEVICE_FUNC AngleAxis<Scalar>::toRotationMatrix(void) const\n{\n  EIGEN_USING_STD(sin)\n  EIGEN_USING_STD(cos)\n  Matrix3 res;\n  Vector3 sin_axis  = sin(m_angle) * m_axis;\n  Scalar c = cos(m_angle);\n  Vector3 cos1_axis = (Scalar(1)-c) * m_axis;\n\n  Scalar tmp;\n  tmp = cos1_axis.x() * m_axis.y();\n  res.coeffRef(0,1) = tmp - sin_axis.z();\n  res.coeffRef(1,0) = tmp + sin_axis.z();\n\n  tmp = cos1_axis.x() * m_axis.z();\n  res.coeffRef(0,2) = tmp + sin_axis.y();\n  res.coeffRef(2,0) = tmp - sin_axis.y();\n\n  tmp = cos1_axis.y() * m_axis.z();\n  res.coeffRef(1,2) = tmp - sin_axis.x();\n  res.coeffRef(2,1) = tmp + sin_axis.x();\n\n  res.diagonal() = (cos1_axis.cwiseProduct(m_axis)).array() + c;\n\n  return res;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_ANGLEAXIS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/EulerAngles.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_EULERANGLES_H\n#define EIGEN_EULERANGLES_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  *\n  * \\returns the Euler-angles of the rotation matrix \\c *this using the convention defined by the triplet (\\a a0,\\a a1,\\a a2)\n  *\n  * Each of the three parameters \\a a0,\\a a1,\\a a2 represents the respective rotation axis as an integer in {0,1,2}.\n  * For instance, in:\n  * \\code Vector3f ea = mat.eulerAngles(2, 0, 2); \\endcode\n  * \"2\" represents the z axis and \"0\" the x axis, etc. The returned angles are such that\n  * we have the following equality:\n  * \\code\n  * mat == AngleAxisf(ea[0], Vector3f::UnitZ())\n  *      * AngleAxisf(ea[1], Vector3f::UnitX())\n  *      * AngleAxisf(ea[2], Vector3f::UnitZ()); \\endcode\n  * This corresponds to the right-multiply conventions (with right hand side frames).\n  * \n  * The returned angles are in the ranges [0:pi]x[-pi:pi]x[-pi:pi].\n  * \n  * \\sa class AngleAxis\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>\nMatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const\n{\n  EIGEN_USING_STD(atan2)\n  EIGEN_USING_STD(sin)\n  EIGEN_USING_STD(cos)\n  /* Implemented from Graphics Gems IV */\n  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)\n\n  Matrix<Scalar,3,1> res;\n  typedef Matrix<typename Derived::Scalar,2,1> Vector2;\n\n  const Index odd = ((a0+1)%3 == a1) ? 0 : 1;\n  const Index i = a0;\n  const Index j = (a0 + 1 + odd)%3;\n  const Index k = (a0 + 2 - odd)%3;\n  \n  if (a0==a2)\n  {\n    res[0] = atan2(coeff(j,i), coeff(k,i));\n    if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0)))\n    {\n      if(res[0] > Scalar(0)) {\n        res[0] -= Scalar(EIGEN_PI);\n      }\n      else {\n        res[0] += Scalar(EIGEN_PI);\n      }\n      Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();\n      res[1] = -atan2(s2, coeff(i,i));\n    }\n    else\n    {\n      Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();\n      res[1] = atan2(s2, coeff(i,i));\n    }\n    \n    // With a=(0,1,0), we have i=0; j=1; k=2, and after computing the first two angles,\n    // we can compute their respective rotation, and apply its inverse to M. Since the result must\n    // be a rotation around x, we have:\n    //\n    //  c2  s1.s2 c1.s2                   1  0   0 \n    //  0   c1    -s1       *    M    =   0  c3  s3\n    //  -s2 s1.c2 c1.c2                   0 -s3  c3\n    //\n    //  Thus:  m11.c1 - m21.s1 = c3  &   m12.c1 - m22.s1 = s3\n    \n    Scalar s1 = sin(res[0]);\n    Scalar c1 = cos(res[0]);\n    res[2] = atan2(c1*coeff(j,k)-s1*coeff(k,k), c1*coeff(j,j) - s1 * coeff(k,j));\n  } \n  else\n  {\n    res[0] = atan2(coeff(j,k), coeff(k,k));\n    Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm();\n    if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) {\n      if(res[0] > Scalar(0)) {\n        res[0] -= Scalar(EIGEN_PI);\n      }\n      else {\n        res[0] += Scalar(EIGEN_PI);\n      }\n      res[1] = atan2(-coeff(i,k), -c2);\n    }\n    else\n      res[1] = atan2(-coeff(i,k), c2);\n    Scalar s1 = sin(res[0]);\n    Scalar c1 = cos(res[0]);\n    res[2] = atan2(s1*coeff(k,i)-c1*coeff(j,i), c1*coeff(j,j) - s1 * coeff(k,j));\n  }\n  if (!odd)\n    res = -res;\n  \n  return res;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_EULERANGLES_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Homogeneous.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_HOMOGENEOUS_H\n#define EIGEN_HOMOGENEOUS_H\n\nnamespace Eigen {\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class Homogeneous\n  *\n  * \\brief Expression of one (or a set of) homogeneous vector(s)\n  *\n  * \\param MatrixType the type of the object in which we are making homogeneous\n  *\n  * This class represents an expression of one (or a set of) homogeneous vector(s).\n  * It is the return type of MatrixBase::homogeneous() and most of the time\n  * this is the only way it is used.\n  *\n  * \\sa MatrixBase::homogeneous()\n  */\n\nnamespace internal {\n\ntemplate<typename MatrixType,int Direction>\nstruct traits<Homogeneous<MatrixType,Direction> >\n : traits<MatrixType>\n{\n  typedef typename traits<MatrixType>::StorageKind StorageKind;\n  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;\n  enum {\n    RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?\n                  int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,\n    ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ?\n                  int(MatrixType::ColsAtCompileTime) + 1 : Dynamic,\n    RowsAtCompileTime = Direction==Vertical  ?  RowsPlusOne : MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = Direction==Horizontal ? ColsPlusOne : MatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = RowsAtCompileTime,\n    MaxColsAtCompileTime = ColsAtCompileTime,\n    TmpFlags = _MatrixTypeNested::Flags & HereditaryBits,\n    Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit)\n          : RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit)\n          : TmpFlags\n  };\n};\n\ntemplate<typename MatrixType,typename Lhs> struct homogeneous_left_product_impl;\ntemplate<typename MatrixType,typename Rhs> struct homogeneous_right_product_impl;\n\n} // end namespace internal\n\ntemplate<typename MatrixType,int _Direction> class Homogeneous\n  : public MatrixBase<Homogeneous<MatrixType,_Direction> >, internal::no_assignment_operator\n{\n  public:\n\n    typedef MatrixType NestedExpression;\n    enum { Direction = _Direction };\n\n    typedef MatrixBase<Homogeneous> Base;\n    EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)\n\n    EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix)\n      : m_matrix(matrix)\n    {}\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }\n\n    EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; }\n\n    template<typename Rhs>\n    EIGEN_DEVICE_FUNC inline const Product<Homogeneous,Rhs>\n    operator* (const MatrixBase<Rhs>& rhs) const\n    {\n      eigen_assert(int(Direction)==Horizontal);\n      return Product<Homogeneous,Rhs>(*this,rhs.derived());\n    }\n\n    template<typename Lhs> friend\n    EIGEN_DEVICE_FUNC inline const Product<Lhs,Homogeneous>\n    operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)\n    {\n      eigen_assert(int(Direction)==Vertical);\n      return Product<Lhs,Homogeneous>(lhs.derived(),rhs);\n    }\n\n    template<typename Scalar, int Dim, int Mode, int Options> friend\n    EIGEN_DEVICE_FUNC inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >\n    operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)\n    {\n      eigen_assert(int(Direction)==Vertical);\n      return Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous>(lhs,rhs);\n    }\n\n    template<typename Func>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type\n    redux(const Func& func) const\n    {\n      return func(m_matrix.redux(func), Scalar(1));\n    }\n\n  protected:\n    typename MatrixType::Nested m_matrix;\n};\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\returns a vector expression that is one longer than the vector argument, with the value 1 symbolically appended as the last coefficient.\n  *\n  * This can be used to convert affine coordinates to homogeneous coordinates.\n  *\n  * \\only_for_vectors\n  *\n  * Example: \\include MatrixBase_homogeneous.cpp\n  * Output: \\verbinclude MatrixBase_homogeneous.out\n  *\n  * \\sa VectorwiseOp::homogeneous(), class Homogeneous\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::HomogeneousReturnType\nMatrixBase<Derived>::homogeneous() const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  return HomogeneousReturnType(derived());\n}\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\returns an expression where the value 1 is symbolically appended as the final coefficient to each column (or row) of the matrix.\n  *\n  * This can be used to convert affine coordinates to homogeneous coordinates.\n  *\n  * Example: \\include VectorwiseOp_homogeneous.cpp\n  * Output: \\verbinclude VectorwiseOp_homogeneous.out\n  *\n  * \\sa MatrixBase::homogeneous(), class Homogeneous */\ntemplate<typename ExpressionType, int Direction>\nEIGEN_DEVICE_FUNC inline Homogeneous<ExpressionType,Direction>\nVectorwiseOp<ExpressionType,Direction>::homogeneous() const\n{\n  return HomogeneousReturnType(_expression());\n}\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\brief homogeneous normalization\n  *\n  * \\returns a vector expression of the N-1 first coefficients of \\c *this divided by that last coefficient.\n  *\n  * This can be used to convert homogeneous coordinates to affine coordinates.\n  *\n  * It is essentially a shortcut for:\n  * \\code\n    this->head(this->size()-1)/this->coeff(this->size()-1);\n    \\endcode\n  *\n  * Example: \\include MatrixBase_hnormalized.cpp\n  * Output: \\verbinclude MatrixBase_hnormalized.out\n  *\n  * \\sa VectorwiseOp::hnormalized() */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::HNormalizedReturnType\nMatrixBase<Derived>::hnormalized() const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n  return ConstStartMinusOne(derived(),0,0,\n    ColsAtCompileTime==1?size()-1:1,\n    ColsAtCompileTime==1?1:size()-1) / coeff(size()-1);\n}\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\brief column or row-wise homogeneous normalization\n  *\n  * \\returns an expression of the first N-1 coefficients of each column (or row) of \\c *this divided by the last coefficient of each column (or row).\n  *\n  * This can be used to convert homogeneous coordinates to affine coordinates.\n  *\n  * It is conceptually equivalent to calling MatrixBase::hnormalized() to each column (or row) of \\c *this.\n  *\n  * Example: \\include DirectionWise_hnormalized.cpp\n  * Output: \\verbinclude DirectionWise_hnormalized.out\n  *\n  * \\sa MatrixBase::hnormalized() */\ntemplate<typename ExpressionType, int Direction>\nEIGEN_DEVICE_FUNC inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType\nVectorwiseOp<ExpressionType,Direction>::hnormalized() const\n{\n  return HNormalized_Block(_expression(),0,0,\n      Direction==Vertical   ? _expression().rows()-1 : _expression().rows(),\n      Direction==Horizontal ? _expression().cols()-1 : _expression().cols()).cwiseQuotient(\n      Replicate<HNormalized_Factors,\n                Direction==Vertical   ? HNormalized_SizeMinusOne : 1,\n                Direction==Horizontal ? HNormalized_SizeMinusOne : 1>\n        (HNormalized_Factors(_expression(),\n          Direction==Vertical    ? _expression().rows()-1:0,\n          Direction==Horizontal  ? _expression().cols()-1:0,\n          Direction==Vertical    ? 1 : _expression().rows(),\n          Direction==Horizontal  ? 1 : _expression().cols()),\n         Direction==Vertical   ? _expression().rows()-1 : 1,\n         Direction==Horizontal ? _expression().cols()-1 : 1));\n}\n\nnamespace internal {\n\ntemplate<typename MatrixOrTransformType>\nstruct take_matrix_for_product\n{\n  typedef MatrixOrTransformType type;\n  EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; }\n};\n\ntemplate<typename Scalar, int Dim, int Mode,int Options>\nstruct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >\n{\n  typedef Transform<Scalar, Dim, Mode, Options> TransformType;\n  typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;\n  EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }\n};\n\ntemplate<typename Scalar, int Dim, int Options>\nstruct take_matrix_for_product<Transform<Scalar, Dim, Projective, Options> >\n{\n  typedef Transform<Scalar, Dim, Projective, Options> TransformType;\n  typedef typename TransformType::MatrixType type;\n  EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); }\n};\n\ntemplate<typename MatrixType,typename Lhs>\nstruct traits<homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs> >\n{\n  typedef typename take_matrix_for_product<Lhs>::type LhsMatrixType;\n  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;\n  typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;\n  typedef typename make_proper_matrix_type<\n                 typename traits<MatrixTypeCleaned>::Scalar,\n                 LhsMatrixTypeCleaned::RowsAtCompileTime,\n                 MatrixTypeCleaned::ColsAtCompileTime,\n                 MatrixTypeCleaned::PlainObject::Options,\n                 LhsMatrixTypeCleaned::MaxRowsAtCompileTime,\n                 MatrixTypeCleaned::MaxColsAtCompileTime>::type ReturnType;\n};\n\ntemplate<typename MatrixType,typename Lhs>\nstruct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>\n  : public ReturnByValue<homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs> >\n{\n  typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;\n  typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;\n  typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;\n  EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)\n    : m_lhs(take_matrix_for_product<Lhs>::run(lhs)),\n      m_rhs(rhs)\n  {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  inline Index rows() const EIGEN_NOEXCEPT { return m_lhs.rows(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  inline Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }\n\n  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const\n  {\n    // FIXME investigate how to allow lazy evaluation of this product when possible\n    dst = Block<const LhsMatrixTypeNested,\n              LhsMatrixTypeNested::RowsAtCompileTime,\n              LhsMatrixTypeNested::ColsAtCompileTime==Dynamic?Dynamic:LhsMatrixTypeNested::ColsAtCompileTime-1>\n            (m_lhs,0,0,m_lhs.rows(),m_lhs.cols()-1) * m_rhs;\n    dst += m_lhs.col(m_lhs.cols()-1).rowwise()\n            .template replicate<MatrixType::ColsAtCompileTime>(m_rhs.cols());\n  }\n\n  typename LhsMatrixTypeCleaned::Nested m_lhs;\n  typename MatrixType::Nested m_rhs;\n};\n\ntemplate<typename MatrixType,typename Rhs>\nstruct traits<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >\n{\n  typedef typename make_proper_matrix_type<typename traits<MatrixType>::Scalar,\n                 MatrixType::RowsAtCompileTime,\n                 Rhs::ColsAtCompileTime,\n                 MatrixType::PlainObject::Options,\n                 MatrixType::MaxRowsAtCompileTime,\n                 Rhs::MaxColsAtCompileTime>::type ReturnType;\n};\n\ntemplate<typename MatrixType,typename Rhs>\nstruct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>\n  : public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >\n{\n  typedef typename remove_all<typename Rhs::Nested>::type RhsNested;\n  EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)\n    : m_lhs(lhs), m_rhs(rhs)\n  {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_lhs.rows(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }\n\n  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const\n  {\n    // FIXME investigate how to allow lazy evaluation of this product when possible\n    dst = m_lhs * Block<const RhsNested,\n                        RhsNested::RowsAtCompileTime==Dynamic?Dynamic:RhsNested::RowsAtCompileTime-1,\n                        RhsNested::ColsAtCompileTime>\n            (m_rhs,0,0,m_rhs.rows()-1,m_rhs.cols());\n    dst += m_rhs.row(m_rhs.rows()-1).colwise()\n            .template replicate<MatrixType::RowsAtCompileTime>(m_lhs.rows());\n  }\n\n  typename MatrixType::Nested m_lhs;\n  typename Rhs::Nested m_rhs;\n};\n\ntemplate<typename ArgType,int Direction>\nstruct evaluator_traits<Homogeneous<ArgType,Direction> >\n{\n  typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;\n  typedef HomogeneousShape Shape;\n};\n\ntemplate<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };\n\n\ntemplate<typename ArgType,int Direction>\nstruct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>\n  : evaluator<typename Homogeneous<ArgType,Direction>::PlainObject >\n{\n  typedef Homogeneous<ArgType,Direction> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)\n    : Base(), m_temp(op)\n  {\n    ::new (static_cast<Base*>(this)) Base(m_temp);\n  }\n\nprotected:\n  PlainObject m_temp;\n};\n\n// dense = homogeneous\ntemplate< typename DstXprType, typename ArgType, typename Scalar>\nstruct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>\n{\n  typedef Homogeneous<ArgType,Vertical> SrcXprType;\n  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();\n    dst.row(dst.rows()-1).setOnes();\n  }\n};\n\n// dense = homogeneous\ntemplate< typename DstXprType, typename ArgType, typename Scalar>\nstruct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>\n{\n  typedef Homogeneous<ArgType,Horizontal> SrcXprType;\n  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();\n    dst.col(dst.cols()-1).setOnes();\n  }\n};\n\ntemplate<typename LhsArg, typename Rhs, int ProductTag>\nstruct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>\n{\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)\n  {\n    homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);\n  }\n};\n\ntemplate<typename Lhs,typename Rhs>\nstruct homogeneous_right_product_refactoring_helper\n{\n  enum {\n    Dim  = Lhs::ColsAtCompileTime,\n    Rows = Lhs::RowsAtCompileTime\n  };\n  typedef typename Rhs::template ConstNRowsBlockXpr<Dim>::Type          LinearBlockConst;\n  typedef typename remove_const<LinearBlockConst>::type                 LinearBlock;\n  typedef typename Rhs::ConstRowXpr                                     ConstantColumn;\n  typedef Replicate<const ConstantColumn,Rows,1>                        ConstantBlock;\n  typedef Product<Lhs,LinearBlock,LazyProduct>                          LinearProduct;\n  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar,typename Rhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, HomogeneousShape, DenseShape>\n : public evaluator<typename homogeneous_right_product_refactoring_helper<typename Lhs::NestedExpression,Rhs>::Xpr>\n{\n  typedef Product<Lhs, Rhs, LazyProduct> XprType;\n  typedef homogeneous_right_product_refactoring_helper<typename Lhs::NestedExpression,Rhs> helper;\n  typedef typename helper::ConstantBlock ConstantBlock;\n  typedef typename helper::Xpr RefactoredXpr;\n  typedef evaluator<RefactoredXpr> Base;\n\n  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)\n    : Base(  xpr.lhs().nestedExpression() .lazyProduct(  xpr.rhs().template topRows<helper::Dim>(xpr.lhs().nestedExpression().cols()) )\n            + ConstantBlock(xpr.rhs().row(xpr.rhs().rows()-1),xpr.lhs().rows(), 1) )\n  {}\n};\n\ntemplate<typename Lhs, typename RhsArg, int ProductTag>\nstruct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>\n{\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)\n  {\n    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);\n  }\n};\n\n// TODO: the following specialization is to address a regression from 3.2 to 3.3\n// In the future, this path should be optimized.\ntemplate<typename Lhs, typename RhsArg, int ProductTag>\nstruct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>\n{\n  template<typename Dest>\n  static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)\n  {\n    dst.noalias() = lhs * rhs.eval();\n  }\n};\n\ntemplate<typename Lhs,typename Rhs>\nstruct homogeneous_left_product_refactoring_helper\n{\n  enum {\n    Dim = Rhs::RowsAtCompileTime,\n    Cols = Rhs::ColsAtCompileTime\n  };\n  typedef typename Lhs::template ConstNColsBlockXpr<Dim>::Type          LinearBlockConst;\n  typedef typename remove_const<LinearBlockConst>::type                 LinearBlock;\n  typedef typename Lhs::ConstColXpr                                     ConstantColumn;\n  typedef Replicate<const ConstantColumn,1,Cols>                        ConstantBlock;\n  typedef Product<LinearBlock,Rhs,LazyProduct>                          LinearProduct;\n  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar,typename Rhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, HomogeneousShape>\n : public evaluator<typename homogeneous_left_product_refactoring_helper<Lhs,typename Rhs::NestedExpression>::Xpr>\n{\n  typedef Product<Lhs, Rhs, LazyProduct> XprType;\n  typedef homogeneous_left_product_refactoring_helper<Lhs,typename Rhs::NestedExpression> helper;\n  typedef typename helper::ConstantBlock ConstantBlock;\n  typedef typename helper::Xpr RefactoredXpr;\n  typedef evaluator<RefactoredXpr> Base;\n\n  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)\n    : Base(   xpr.lhs().template leftCols<helper::Dim>(xpr.rhs().nestedExpression().rows()) .lazyProduct( xpr.rhs().nestedExpression() )\n            + ConstantBlock(xpr.lhs().col(xpr.lhs().cols()-1),1,xpr.rhs().cols()) )\n  {}\n};\n\ntemplate<typename Scalar, int Dim, int Mode,int Options, typename RhsArg, int ProductTag>\nstruct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>\n{\n  typedef Transform<Scalar,Dim,Mode,Options> TransformType;\n  template<typename Dest>\n  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)\n  {\n    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);\n  }\n};\n\ntemplate<typename ExpressionType, int Side, bool Transposed>\nstruct permutation_matrix_product<ExpressionType, Side, Transposed, HomogeneousShape>\n  : public permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>\n{};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_HOMOGENEOUS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Hyperplane.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_HYPERPLANE_H\n#define EIGEN_HYPERPLANE_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class Hyperplane\n  *\n  * \\brief A hyperplane\n  *\n  * A hyperplane is an affine subspace of dimension n-1 in a space of dimension n.\n  * For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane.\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients\n  * \\tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.\n  *             Notice that the dimension of the hyperplane is _AmbientDim-1.\n  *\n  * This class represents an hyperplane as the zero set of the implicit equation\n  * \\f$ n \\cdot x + d = 0 \\f$ where \\f$ n \\f$ is a unit normal vector of the plane (linear part)\n  * and \\f$ d \\f$ is the distance (offset) to the origin.\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\nclass Hyperplane\n{\npublic:\n  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1)\n  enum {\n    AmbientDimAtCompileTime = _AmbientDim,\n    Options = _Options\n  };\n  typedef _Scalar Scalar;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n  typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;\n  typedef Matrix<Scalar,Index(AmbientDimAtCompileTime)==Dynamic\n                        ? Dynamic\n                        : Index(AmbientDimAtCompileTime)+1,1,Options> Coefficients;\n  typedef Block<Coefficients,AmbientDimAtCompileTime,1> NormalReturnType;\n  typedef const Block<const Coefficients,AmbientDimAtCompileTime,1> ConstNormalReturnType;\n\n  /** Default constructor without initialization */\n  EIGEN_DEVICE_FUNC inline Hyperplane() {}\n  \n  template<int OtherOptions>\n  EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)\n   : m_coeffs(other.coeffs())\n  {}\n\n  /** Constructs a dynamic-size hyperplane with \\a _dim the dimension\n    * of the ambient space */\n  EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}\n\n  /** Construct a plane from its normal \\a n and a point \\a e onto the plane.\n    * \\warning the vector normal is assumed to be normalized.\n    */\n  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e)\n    : m_coeffs(n.size()+1)\n  {\n    normal() = n;\n    offset() = -n.dot(e);\n  }\n\n  /** Constructs a plane from its normal \\a n and distance to the origin \\a d\n    * such that the algebraic equation of the plane is \\f$ n \\cdot x + d = 0 \\f$.\n    * \\warning the vector normal is assumed to be normalized.\n    */\n  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d)\n    : m_coeffs(n.size()+1)\n  {\n    normal() = n;\n    offset() = d;\n  }\n\n  /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space\n    * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.\n    */\n  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)\n  {\n    Hyperplane result(p0.size());\n    result.normal() = (p1 - p0).unitOrthogonal();\n    result.offset() = -p0.dot(result.normal());\n    return result;\n  }\n\n  /** Constructs a hyperplane passing through the three points. The dimension of the ambient space\n    * is required to be exactly 3.\n    */\n  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)\n  {\n    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)\n    Hyperplane result(p0.size());\n    VectorType v0(p2 - p0), v1(p1 - p0);\n    result.normal() = v0.cross(v1);\n    RealScalar norm = result.normal().norm();\n    if(norm <= v0.norm() * v1.norm() * NumTraits<RealScalar>::epsilon())\n    {\n      Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();\n      JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);\n      result.normal() = svd.matrixV().col(2);\n    }\n    else\n      result.normal() /= norm;\n    result.offset() = -p0.dot(result.normal());\n    return result;\n  }\n\n  /** Constructs a hyperplane passing through the parametrized line \\a parametrized.\n    * If the dimension of the ambient space is greater than 2, then there isn't uniqueness,\n    * so an arbitrary choice is made.\n    */\n  // FIXME to be consistent with the rest this could be implemented as a static Through function ??\n  EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)\n  {\n    normal() = parametrized.direction().unitOrthogonal();\n    offset() = -parametrized.origin().dot(normal());\n  }\n\n  EIGEN_DEVICE_FUNC ~Hyperplane() {}\n\n  /** \\returns the dimension in which the plane holds */\n  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }\n\n  /** normalizes \\c *this */\n  EIGEN_DEVICE_FUNC void normalize(void)\n  {\n    m_coeffs /= normal().norm();\n  }\n\n  /** \\returns the signed distance between the plane \\c *this and a point \\a p.\n    * \\sa absDistance()\n    */\n  EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }\n\n  /** \\returns the absolute distance between the plane \\c *this and a point \\a p.\n    * \\sa signedDistance()\n    */\n  EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); }\n\n  /** \\returns the projection of a point \\a p onto the plane \\c *this.\n    */\n  EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }\n\n  /** \\returns a constant reference to the unit normal vector of the plane, which corresponds\n    * to the linear part of the implicit equation.\n    */\n  EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }\n\n  /** \\returns a non-constant reference to the unit normal vector of the plane, which corresponds\n    * to the linear part of the implicit equation.\n    */\n  EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }\n\n  /** \\returns the distance to the origin, which is also the \"constant term\" of the implicit equation\n    * \\warning the vector normal is assumed to be normalized.\n    */\n  EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }\n\n  /** \\returns a non-constant reference to the distance to the origin, which is also the constant part\n    * of the implicit equation */\n  EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); }\n\n  /** \\returns a constant reference to the coefficients c_i of the plane equation:\n    * \\f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \\f$\n    */\n  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }\n\n  /** \\returns a non-constant reference to the coefficients c_i of the plane equation:\n    * \\f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \\f$\n    */\n  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }\n\n  /** \\returns the intersection of *this with \\a other.\n    *\n    * \\warning The ambient space must be a plane, i.e. have dimension 2, so that \\c *this and \\a other are lines.\n    *\n    * \\note If \\a other is approximately parallel to *this, this method will return any point on *this.\n    */\n  EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const\n  {\n    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)\n    Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);\n    // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests\n    // whether the two lines are approximately parallel.\n    if(internal::isMuchSmallerThan(det, Scalar(1)))\n    {   // special case where the two lines are approximately parallel. Pick any point on the first line.\n        if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0)))\n            return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));\n        else\n            return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));\n    }\n    else\n    {   // general case\n        Scalar invdet = Scalar(1) / det;\n        return VectorType(invdet*(coeffs().coeff(1)*other.coeffs().coeff(2)-other.coeffs().coeff(1)*coeffs().coeff(2)),\n                          invdet*(other.coeffs().coeff(0)*coeffs().coeff(2)-coeffs().coeff(0)*other.coeffs().coeff(2)));\n    }\n  }\n\n  /** Applies the transformation matrix \\a mat to \\c *this and returns a reference to \\c *this.\n    *\n    * \\param mat the Dim x Dim transformation matrix\n    * \\param traits specifies whether the matrix \\a mat represents an #Isometry\n    *               or a more generic #Affine transformation. The default is #Affine.\n    */\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)\n  {\n    if (traits==Affine)\n    {\n      normal() = mat.inverse().transpose() * normal();\n      m_coeffs /= normal().norm();\n    }\n    else if (traits==Isometry)\n      normal() = mat * normal();\n    else\n    {\n      eigen_assert(0 && \"invalid traits value in Hyperplane::transform()\");\n    }\n    return *this;\n  }\n\n  /** Applies the transformation \\a t to \\c *this and returns a reference to \\c *this.\n    *\n    * \\param t the transformation of dimension Dim\n    * \\param traits specifies whether the transformation \\a t represents an #Isometry\n    *               or a more generic #Affine transformation. The default is #Affine.\n    *               Other kind of transformations are not supported.\n    */\n  template<int TrOptions>\n  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,\n                                TransformTraits traits = Affine)\n  {\n    transform(t.linear(), traits);\n    offset() -= normal().dot(t.translation());\n    return *this;\n  }\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Hyperplane,\n           Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const\n  {\n    return typename internal::cast_return_type<Hyperplane,\n                    Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type(*this);\n  }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType,int OtherOptions>\n  EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)\n  { m_coeffs = other.coeffs().template cast<Scalar>(); }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  template<int OtherOptions>\n  EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return m_coeffs.isApprox(other.m_coeffs, prec); }\n\nprotected:\n\n  Coefficients m_coeffs;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_HYPERPLANE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/OrthoMethods.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ORTHOMETHODS_H\n#define EIGEN_ORTHOMETHODS_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\returns the cross product of \\c *this and \\a other\n  *\n  * Here is a very good explanation of cross-product: http://xkcd.com/199/\n  * \n  * With complex numbers, the cross product is implemented as\n  * \\f$ (\\mathbf{a}+i\\mathbf{b}) \\times (\\mathbf{c}+i\\mathbf{d}) = (\\mathbf{a} \\times \\mathbf{c} - \\mathbf{b} \\times \\mathbf{d}) - i(\\mathbf{a} \\times \\mathbf{d} - \\mathbf{b} \\times \\mathbf{c})\\f$\n  * \n  * \\sa MatrixBase::cross3()\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type\n#else\ntypename MatrixBase<Derived>::PlainObject\n#endif\nMatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)\n\n  // Note that there is no need for an expression here since the compiler\n  // optimize such a small temporary very well (even within a complex expression)\n  typename internal::nested_eval<Derived,2>::type lhs(derived());\n  typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived());\n  return typename cross_product_return_type<OtherDerived>::type(\n    numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),\n    numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),\n    numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0))\n  );\n}\n\nnamespace internal {\n\ntemplate< int Arch,typename VectorLhs,typename VectorRhs,\n          typename Scalar = typename VectorLhs::Scalar,\n          bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)>\nstruct cross3_impl {\n  EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type\n  run(const VectorLhs& lhs, const VectorRhs& rhs)\n  {\n    return typename internal::plain_matrix_type<VectorLhs>::type(\n      numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),\n      numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),\n      numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0)),\n      0\n    );\n  }\n};\n\n}\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\returns the cross product of \\c *this and \\a other using only the x, y, and z coefficients\n  *\n  * The size of \\c *this and \\a other must be four. This function is especially useful\n  * when using 4D vectors instead of 3D ones to get advantage of SSE/AltiVec vectorization.\n  *\n  * \\sa MatrixBase::cross()\n  */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::PlainObject\nMatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)\n\n  typedef typename internal::nested_eval<Derived,2>::type DerivedNested;\n  typedef typename internal::nested_eval<OtherDerived,2>::type OtherDerivedNested;\n  DerivedNested lhs(derived());\n  OtherDerivedNested rhs(other.derived());\n\n  return internal::cross3_impl<Architecture::Target,\n                        typename internal::remove_all<DerivedNested>::type,\n                        typename internal::remove_all<OtherDerivedNested>::type>::run(lhs,rhs);\n}\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\returns a matrix expression of the cross product of each column or row\n  * of the referenced expression with the \\a other vector.\n  *\n  * The referenced matrix must have one dimension equal to 3.\n  * The result matrix has the same dimensions than the referenced one.\n  *\n  * \\sa MatrixBase::cross() */\ntemplate<typename ExpressionType, int Direction>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC \nconst typename VectorwiseOp<ExpressionType,Direction>::CrossReturnType\nVectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)\n  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),\n    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n  \n  typename internal::nested_eval<ExpressionType,2>::type mat(_expression());\n  typename internal::nested_eval<OtherDerived,2>::type vec(other.derived());\n\n  CrossReturnType res(_expression().rows(),_expression().cols());\n  if(Direction==Vertical)\n  {\n    eigen_assert(CrossReturnType::RowsAtCompileTime==3 && \"the matrix must have exactly 3 rows\");\n    res.row(0) = (mat.row(1) * vec.coeff(2) - mat.row(2) * vec.coeff(1)).conjugate();\n    res.row(1) = (mat.row(2) * vec.coeff(0) - mat.row(0) * vec.coeff(2)).conjugate();\n    res.row(2) = (mat.row(0) * vec.coeff(1) - mat.row(1) * vec.coeff(0)).conjugate();\n  }\n  else\n  {\n    eigen_assert(CrossReturnType::ColsAtCompileTime==3 && \"the matrix must have exactly 3 columns\");\n    res.col(0) = (mat.col(1) * vec.coeff(2) - mat.col(2) * vec.coeff(1)).conjugate();\n    res.col(1) = (mat.col(2) * vec.coeff(0) - mat.col(0) * vec.coeff(2)).conjugate();\n    res.col(2) = (mat.col(0) * vec.coeff(1) - mat.col(1) * vec.coeff(0)).conjugate();\n  }\n  return res;\n}\n\nnamespace internal {\n\ntemplate<typename Derived, int Size = Derived::SizeAtCompileTime>\nstruct unitOrthogonal_selector\n{\n  typedef typename plain_matrix_type<Derived>::type VectorType;\n  typedef typename traits<Derived>::Scalar Scalar;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  typedef Matrix<Scalar,2,1> Vector2;\n  EIGEN_DEVICE_FUNC\n  static inline VectorType run(const Derived& src)\n  {\n    VectorType perp = VectorType::Zero(src.size());\n    Index maxi = 0;\n    Index sndi = 0;\n    src.cwiseAbs().maxCoeff(&maxi);\n    if (maxi==0)\n      sndi = 1;\n    RealScalar invnm = RealScalar(1)/(Vector2() << src.coeff(sndi),src.coeff(maxi)).finished().norm();\n    perp.coeffRef(maxi) = -numext::conj(src.coeff(sndi)) * invnm;\n    perp.coeffRef(sndi) =  numext::conj(src.coeff(maxi)) * invnm;\n\n    return perp;\n   }\n};\n\ntemplate<typename Derived>\nstruct unitOrthogonal_selector<Derived,3>\n{\n  typedef typename plain_matrix_type<Derived>::type VectorType;\n  typedef typename traits<Derived>::Scalar Scalar;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  EIGEN_DEVICE_FUNC\n  static inline VectorType run(const Derived& src)\n  {\n    VectorType perp;\n    /* Let us compute the crossed product of *this with a vector\n     * that is not too close to being colinear to *this.\n     */\n\n    /* unless the x and y coords are both close to zero, we can\n     * simply take ( -y, x, 0 ) and normalize it.\n     */\n    if((!isMuchSmallerThan(src.x(), src.z()))\n    || (!isMuchSmallerThan(src.y(), src.z())))\n    {\n      RealScalar invnm = RealScalar(1)/src.template head<2>().norm();\n      perp.coeffRef(0) = -numext::conj(src.y())*invnm;\n      perp.coeffRef(1) = numext::conj(src.x())*invnm;\n      perp.coeffRef(2) = 0;\n    }\n    /* if both x and y are close to zero, then the vector is close\n     * to the z-axis, so it's far from colinear to the x-axis for instance.\n     * So we take the crossed product with (1,0,0) and normalize it.\n     */\n    else\n    {\n      RealScalar invnm = RealScalar(1)/src.template tail<2>().norm();\n      perp.coeffRef(0) = 0;\n      perp.coeffRef(1) = -numext::conj(src.z())*invnm;\n      perp.coeffRef(2) = numext::conj(src.y())*invnm;\n    }\n\n    return perp;\n   }\n};\n\ntemplate<typename Derived>\nstruct unitOrthogonal_selector<Derived,2>\n{\n  typedef typename plain_matrix_type<Derived>::type VectorType;\n  EIGEN_DEVICE_FUNC\n  static inline VectorType run(const Derived& src)\n  { return VectorType(-numext::conj(src.y()), numext::conj(src.x())).normalized(); }\n};\n\n} // end namespace internal\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\returns a unit vector which is orthogonal to \\c *this\n  *\n  * The size of \\c *this must be at least 2. If the size is exactly 2,\n  * then the returned vector is a counter clock wise rotation of \\c *this, i.e., (-y,x).normalized().\n  *\n  * \\sa cross()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC typename MatrixBase<Derived>::PlainObject\nMatrixBase<Derived>::unitOrthogonal() const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return internal::unitOrthogonal_selector<Derived>::run(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_ORTHOMETHODS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/ParametrizedLine.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARAMETRIZEDLINE_H\n#define EIGEN_PARAMETRIZEDLINE_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class ParametrizedLine\n  *\n  * \\brief A parametrized line\n  *\n  * A parametrized line is defined by an origin point \\f$ \\mathbf{o} \\f$ and a unit\n  * direction vector \\f$ \\mathbf{d} \\f$ such that the line corresponds to\n  * the set \\f$ l(t) = \\mathbf{o} + t \\mathbf{d} \\f$, \\f$ t \\in \\mathbf{R} \\f$.\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients\n  * \\tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\nclass ParametrizedLine\n{\npublic:\n  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)\n  enum {\n    AmbientDimAtCompileTime = _AmbientDim,\n    Options = _Options\n  };\n  typedef _Scalar Scalar;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n  typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;\n\n  /** Default constructor without initialization */\n  EIGEN_DEVICE_FUNC inline ParametrizedLine() {}\n  \n  template<int OtherOptions>\n  EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)\n   : m_origin(other.origin()), m_direction(other.direction())\n  {}\n\n  /** Constructs a dynamic-size line with \\a _dim the dimension\n    * of the ambient space */\n  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}\n\n  /** Initializes a parametrized line of direction \\a direction and origin \\a origin.\n    * \\warning the vector direction is assumed to be normalized.\n    */\n  EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction)\n    : m_origin(origin), m_direction(direction) {}\n\n  template <int OtherOptions>\n  EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);\n\n  /** Constructs a parametrized line going from \\a p0 to \\a p1. */\n  EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)\n  { return ParametrizedLine(p0, (p1-p0).normalized()); }\n\n  EIGEN_DEVICE_FUNC ~ParametrizedLine() {}\n\n  /** \\returns the dimension in which the line holds */\n  EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); }\n\n  EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; }\n  EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; }\n\n  EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; }\n  EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; }\n\n  /** \\returns the squared distance of a point \\a p to its projection onto the line \\c *this.\n    * \\sa distance()\n    */\n  EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const\n  {\n    VectorType diff = p - origin();\n    return (diff - direction().dot(diff) * direction()).squaredNorm();\n  }\n  /** \\returns the distance of a point \\a p to its projection onto the line \\c *this.\n    * \\sa squaredDistance()\n    */\n  EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD(sqrt) return sqrt(squaredDistance(p)); }\n\n  /** \\returns the projection of a point \\a p onto the line \\c *this. */\n  EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const\n  { return origin() + direction().dot(p-origin()) * direction(); }\n\n  EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;\n  \n  template <int OtherOptions>\n  EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;\n \n  template <int OtherOptions>\n  EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;\n  \n  template <int OtherOptions>\n  EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;\n\n  /** Applies the transformation matrix \\a mat to \\c *this and returns a reference to \\c *this.\n    *\n    * \\param mat the Dim x Dim transformation matrix\n    * \\param traits specifies whether the matrix \\a mat represents an #Isometry\n    *               or a more generic #Affine transformation. The default is #Affine.\n    */\n  template<typename XprType>\n  EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)\n  {\n    if (traits==Affine)\n      direction() = (mat * direction()).normalized();\n    else if (traits==Isometry)\n      direction() = mat * direction();\n    else\n    {\n      eigen_assert(0 && \"invalid traits value in ParametrizedLine::transform()\");\n    }\n    origin() = mat * origin();\n    return *this;\n  }\n\n  /** Applies the transformation \\a t to \\c *this and returns a reference to \\c *this.\n    *\n    * \\param t the transformation of dimension Dim\n    * \\param traits specifies whether the transformation \\a t represents an #Isometry\n    *               or a more generic #Affine transformation. The default is #Affine.\n    *               Other kind of transformations are not supported.\n    */\n  template<int TrOptions>\n  EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,\n                                                       TransformTraits traits = Affine)\n  {\n    transform(t.linear(), traits);\n    origin() += t.translation();\n    return *this;\n  }\n\n/** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<ParametrizedLine,\n           ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const\n  {\n    return typename internal::cast_return_type<ParametrizedLine,\n                    ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type(*this);\n  }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType,int OtherOptions>\n  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)\n  {\n    m_origin = other.origin().template cast<Scalar>();\n    m_direction = other.direction().template cast<Scalar>();\n  }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }\n\nprotected:\n\n  VectorType m_origin, m_direction;\n};\n\n/** Constructs a parametrized line from a 2D hyperplane\n  *\n  * \\warning the ambient space must have dimension 2 such that the hyperplane actually describes a line\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\ntemplate <int OtherOptions>\nEIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)\n  direction() = hyperplane.normal().unitOrthogonal();\n  origin() = -hyperplane.normal()*hyperplane.offset();\n}\n\n/** \\returns the point at \\a t along this line\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\nEIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType\nParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const\n{\n  return origin() + (direction()*t); \n}\n\n/** \\returns the parameter value of the intersection between \\c *this and the given \\a hyperplane\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\ntemplate <int OtherOptions>\nEIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const\n{\n  return -(hyperplane.offset()+hyperplane.normal().dot(origin()))\n          / hyperplane.normal().dot(direction());\n}\n\n\n/** \\deprecated use intersectionParameter()\n  * \\returns the parameter value of the intersection between \\c *this and the given \\a hyperplane\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\ntemplate <int OtherOptions>\nEIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const\n{\n  return intersectionParameter(hyperplane);\n}\n\n/** \\returns the point of the intersection between \\c *this and the given hyperplane\n  */\ntemplate <typename _Scalar, int _AmbientDim, int _Options>\ntemplate <int OtherOptions>\nEIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType\nParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const\n{\n  return pointAt(intersectionParameter(hyperplane));\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARAMETRIZEDLINE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Quaternion.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Mathieu Gautier <mathieu.gautier@cea.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_QUATERNION_H\n#define EIGEN_QUATERNION_H\nnamespace Eigen { \n\n\n/***************************************************************************\n* Definition of QuaternionBase<Derived>\n* The implementation is at the end of the file\n***************************************************************************/\n\nnamespace internal {\ntemplate<typename Other,\n         int OtherRows=Other::RowsAtCompileTime,\n         int OtherCols=Other::ColsAtCompileTime>\nstruct quaternionbase_assign_impl;\n}\n\n/** \\geometry_module \\ingroup Geometry_Module\n  * \\class QuaternionBase\n  * \\brief Base class for quaternion expressions\n  * \\tparam Derived derived type (CRTP)\n  * \\sa class Quaternion\n  */\ntemplate<class Derived>\nclass QuaternionBase : public RotationBase<Derived, 3>\n{\n public:\n  typedef RotationBase<Derived, 3> Base;\n\n  using Base::operator*;\n  using Base::derived;\n\n  typedef typename internal::traits<Derived>::Scalar Scalar;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n  typedef typename internal::traits<Derived>::Coefficients Coefficients;\n  typedef typename Coefficients::CoeffReturnType CoeffReturnType;\n  typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),\n                                        Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;\n\n\n  enum {\n    Flags = Eigen::internal::traits<Derived>::Flags\n  };\n\n // typedef typename Matrix<Scalar,4,1> Coefficients;\n  /** the type of a 3D vector */\n  typedef Matrix<Scalar,3,1> Vector3;\n  /** the equivalent rotation matrix type */\n  typedef Matrix<Scalar,3,3> Matrix3;\n  /** the equivalent angle-axis type */\n  typedef AngleAxis<Scalar> AngleAxisType;\n\n\n\n  /** \\returns the \\c x coefficient */\n  EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }\n  /** \\returns the \\c y coefficient */\n  EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }\n  /** \\returns the \\c z coefficient */\n  EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }\n  /** \\returns the \\c w coefficient */\n  EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }\n\n  /** \\returns a reference to the \\c x coefficient (if Derived is a non-const lvalue) */\n  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }\n  /** \\returns a reference to the \\c y coefficient (if Derived is a non-const lvalue) */\n  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }\n  /** \\returns a reference to the \\c z coefficient (if Derived is a non-const lvalue) */\n  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }\n  /** \\returns a reference to the \\c w coefficient (if Derived is a non-const lvalue) */\n  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }\n\n  /** \\returns a read-only vector expression of the imaginary part (x,y,z) */\n  EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }\n\n  /** \\returns a vector expression of the imaginary part (x,y,z) */\n  EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }\n\n  /** \\returns a read-only vector expression of the coefficients (x,y,z,w) */\n  EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }\n\n  /** \\returns a vector expression of the coefficients (x,y,z,w) */\n  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }\n\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);\n  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);\n\n// disabled this copy operator as it is giving very strange compilation errors when compiling\n// test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's\n// useful; however notice that we already have the templated operator= above and e.g. in MatrixBase\n// we didn't have to add, in addition to templated operator=, such a non-templated copy operator.\n//  Derived& operator=(const QuaternionBase& other)\n//  { return operator=<Derived>(other); }\n\n  EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);\n  template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);\n\n  /** \\returns a quaternion representing an identity rotation\n    * \\sa MatrixBase::Identity()\n    */\n  EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }\n\n  /** \\sa QuaternionBase::Identity(), MatrixBase::setIdentity()\n    */\n  EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }\n\n  /** \\returns the squared norm of the quaternion's coefficients\n    * \\sa QuaternionBase::norm(), MatrixBase::squaredNorm()\n    */\n  EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }\n\n  /** \\returns the norm of the quaternion's coefficients\n    * \\sa QuaternionBase::squaredNorm(), MatrixBase::norm()\n    */\n  EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); }\n\n  /** Normalizes the quaternion \\c *this\n    * \\sa normalized(), MatrixBase::normalize() */\n  EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); }\n  /** \\returns a normalized copy of \\c *this\n    * \\sa normalize(), MatrixBase::normalized() */\n  EIGEN_DEVICE_FUNC inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }\n\n    /** \\returns the dot product of \\c *this and \\a other\n    * Geometrically speaking, the dot product of two unit quaternions\n    * corresponds to the cosine of half the angle between the two rotations.\n    * \\sa angularDistance()\n    */\n  template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }\n\n  template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;\n\n  /** \\returns an equivalent 3x3 rotation matrix */\n  EIGEN_DEVICE_FUNC inline Matrix3 toRotationMatrix() const;\n\n  /** \\returns the quaternion which transform \\a a into \\a b through a rotation */\n  template<typename Derived1, typename Derived2>\n  EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);\n\n  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;\n  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);\n\n  /** \\returns the quaternion describing the inverse rotation */\n  EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;\n\n  /** \\returns the conjugated quaternion */\n  EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;\n\n  template<class OtherDerived> EIGEN_DEVICE_FUNC Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;\n\n  /** \\returns true if each coefficients of \\c *this and \\a other are all exactly equal.\n    * \\warning When using floating point scalar values you probably should rather use a\n    *          fuzzy comparison such as isApprox()\n    * \\sa isApprox(), operator!= */\n  template<class OtherDerived>\n  EIGEN_DEVICE_FUNC inline bool operator==(const QuaternionBase<OtherDerived>& other) const\n  { return coeffs() == other.coeffs(); }\n\n  /** \\returns true if at least one pair of coefficients of \\c *this and \\a other are not exactly equal to each other.\n    * \\warning When using floating point scalar values you probably should rather use a\n    *          fuzzy comparison such as isApprox()\n    * \\sa isApprox(), operator== */\n  template<class OtherDerived>\n  EIGEN_DEVICE_FUNC inline bool operator!=(const QuaternionBase<OtherDerived>& other) const\n  { return coeffs() != other.coeffs(); }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  template<class OtherDerived>\n  EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return coeffs().isApprox(other.coeffs(), prec); }\n\n  /** return the result vector of \\a v through the rotation*/\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;\n\n  #ifdef EIGEN_PARSED_BY_DOXYGEN\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const;\n\n  #else\n\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline\n  typename internal::enable_if<internal::is_same<Scalar,NewScalarType>::value,const Derived&>::type cast() const\n  {\n    return derived();\n  }\n\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline\n  typename internal::enable_if<!internal::is_same<Scalar,NewScalarType>::value,Quaternion<NewScalarType> >::type cast() const\n  {\n    return Quaternion<NewScalarType>(coeffs().template cast<NewScalarType>());\n  }\n  #endif\n\n#ifndef EIGEN_NO_IO\n  friend std::ostream& operator<<(std::ostream& s, const QuaternionBase<Derived>& q) {\n    s << q.x() << \"i + \" << q.y() << \"j + \" << q.z() << \"k\" << \" + \" << q.w();\n    return s;\n  }\n#endif\n\n#ifdef EIGEN_QUATERNIONBASE_PLUGIN\n# include EIGEN_QUATERNIONBASE_PLUGIN\n#endif\nprotected:\n  EIGEN_DEFAULT_COPY_CONSTRUCTOR(QuaternionBase)\n  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(QuaternionBase)\n};\n\n/***************************************************************************\n* Definition/implementation of Quaternion<Scalar>\n***************************************************************************/\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class Quaternion\n  *\n  * \\brief The quaternion class used to represent 3D orientations and rotations\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients\n  * \\tparam _Options controls the memory alignment of the coefficients. Can be \\# AutoAlign or \\# DontAlign. Default is AutoAlign.\n  *\n  * This class represents a quaternion \\f$ w+xi+yj+zk \\f$ that is a convenient representation of\n  * orientations and rotations of objects in three dimensions. Compared to other representations\n  * like Euler angles or 3x3 matrices, quaternions offer the following advantages:\n  * \\li \\b compact storage (4 scalars)\n  * \\li \\b efficient to compose (28 flops),\n  * \\li \\b stable spherical interpolation\n  *\n  * The following two typedefs are provided for convenience:\n  * \\li \\c Quaternionf for \\c float\n  * \\li \\c Quaterniond for \\c double\n  *\n  * \\warning Operations interpreting the quaternion as rotation have undefined behavior if the quaternion is not normalized.\n  *\n  * \\sa  class AngleAxis, class Transform\n  */\n\nnamespace internal {\ntemplate<typename _Scalar,int _Options>\nstruct traits<Quaternion<_Scalar,_Options> >\n{\n  typedef Quaternion<_Scalar,_Options> PlainObject;\n  typedef _Scalar Scalar;\n  typedef Matrix<_Scalar,4,1,_Options> Coefficients;\n  enum{\n    Alignment = internal::traits<Coefficients>::Alignment,\n    Flags = LvalueBit\n  };\n};\n}\n\ntemplate<typename _Scalar, int _Options>\nclass Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >\n{\npublic:\n  typedef QuaternionBase<Quaternion<_Scalar,_Options> > Base;\n  enum { NeedsAlignment = internal::traits<Quaternion>::Alignment>0 };\n\n  typedef _Scalar Scalar;\n\n  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Quaternion)\n  using Base::operator*=;\n\n  typedef typename internal::traits<Quaternion>::Coefficients Coefficients;\n  typedef typename Base::AngleAxisType AngleAxisType;\n\n  /** Default constructor leaving the quaternion uninitialized. */\n  EIGEN_DEVICE_FUNC inline Quaternion() {}\n\n  /** Constructs and initializes the quaternion \\f$ w+xi+yj+zk \\f$ from\n    * its four coefficients \\a w, \\a x, \\a y and \\a z.\n    *\n    * \\warning Note the order of the arguments: the real \\a w coefficient first,\n    * while internally the coefficients are stored in the following order:\n    * [\\c x, \\c y, \\c z, \\c w]\n    */\n  EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}\n\n  /** Constructs and initialize a quaternion from the array data */\n  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}\n\n  /** Copy constructor */\n  template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }\n\n  /** Constructs and initializes a quaternion from the angle-axis \\a aa */\n  EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }\n\n  /** Constructs and initializes a quaternion from either:\n    *  - a rotation matrix expression,\n    *  - a 4D vector expression representing quaternion coefficients.\n    */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }\n\n  /** Explicit copy constructor with scalar conversion */\n  template<typename OtherScalar, int OtherOptions>\n  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)\n  { m_coeffs = other.coeffs().template cast<Scalar>(); }\n\n#if EIGEN_HAS_RVALUE_REFERENCES\n  // We define a copy constructor, which means we don't get an implicit move constructor or assignment operator.\n  /** Default move constructor */\n  EIGEN_DEVICE_FUNC inline Quaternion(Quaternion&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)\n    : m_coeffs(std::move(other.coeffs()))\n  {}\n\n  /** Default move assignment operator */\n  EIGEN_DEVICE_FUNC Quaternion& operator=(Quaternion&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)\n  {\n    m_coeffs = std::move(other.coeffs());\n    return *this;\n  }\n#endif\n\n  EIGEN_DEVICE_FUNC static Quaternion UnitRandom();\n\n  template<typename Derived1, typename Derived2>\n  EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);\n\n  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}\n  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}\n\n  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))\n  \n#ifdef EIGEN_QUATERNION_PLUGIN\n# include EIGEN_QUATERNION_PLUGIN\n#endif\n\nprotected:\n  Coefficients m_coeffs;\n  \n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    static EIGEN_STRONG_INLINE void _check_template_params()\n    {\n      EIGEN_STATIC_ASSERT( (_Options & DontAlign) == _Options,\n        INVALID_MATRIX_TEMPLATE_PARAMETERS)\n    }\n#endif\n};\n\n/** \\ingroup Geometry_Module\n  * single precision quaternion type */\ntypedef Quaternion<float> Quaternionf;\n/** \\ingroup Geometry_Module\n  * double precision quaternion type */\ntypedef Quaternion<double> Quaterniond;\n\n/***************************************************************************\n* Specialization of Map<Quaternion<Scalar>>\n***************************************************************************/\n\nnamespace internal {\n  template<typename _Scalar, int _Options>\n  struct traits<Map<Quaternion<_Scalar>, _Options> > : traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> >\n  {\n    typedef Map<Matrix<_Scalar,4,1>, _Options> Coefficients;\n  };\n}\n\nnamespace internal {\n  template<typename _Scalar, int _Options>\n  struct traits<Map<const Quaternion<_Scalar>, _Options> > : traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> >\n  {\n    typedef Map<const Matrix<_Scalar,4,1>, _Options> Coefficients;\n    typedef traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> > TraitsBase;\n    enum {\n      Flags = TraitsBase::Flags & ~LvalueBit\n    };\n  };\n}\n\n/** \\ingroup Geometry_Module\n  * \\brief Quaternion expression mapping a constant memory buffer\n  *\n  * \\tparam _Scalar the type of the Quaternion coefficients\n  * \\tparam _Options see class Map\n  *\n  * This is a specialization of class Map for Quaternion. This class allows to view\n  * a 4 scalar memory buffer as an Eigen's Quaternion object.\n  *\n  * \\sa class Map, class Quaternion, class QuaternionBase\n  */\ntemplate<typename _Scalar, int _Options>\nclass Map<const Quaternion<_Scalar>, _Options >\n  : public QuaternionBase<Map<const Quaternion<_Scalar>, _Options> >\n{\n  public:\n    typedef QuaternionBase<Map<const Quaternion<_Scalar>, _Options> > Base;\n\n    typedef _Scalar Scalar;\n    typedef typename internal::traits<Map>::Coefficients Coefficients;\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)\n    using Base::operator*=;\n\n    /** Constructs a Mapped Quaternion object from the pointer \\a coeffs\n      *\n      * The pointer \\a coeffs must reference the four coefficients of Quaternion in the following order:\n      * \\code *coeffs == {x, y, z, w} \\endcode\n      *\n      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */\n    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}\n\n    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}\n\n  protected:\n    const Coefficients m_coeffs;\n};\n\n/** \\ingroup Geometry_Module\n  * \\brief Expression of a quaternion from a memory buffer\n  *\n  * \\tparam _Scalar the type of the Quaternion coefficients\n  * \\tparam _Options see class Map\n  *\n  * This is a specialization of class Map for Quaternion. This class allows to view\n  * a 4 scalar memory buffer as an Eigen's  Quaternion object.\n  *\n  * \\sa class Map, class Quaternion, class QuaternionBase\n  */\ntemplate<typename _Scalar, int _Options>\nclass Map<Quaternion<_Scalar>, _Options >\n  : public QuaternionBase<Map<Quaternion<_Scalar>, _Options> >\n{\n  public:\n    typedef QuaternionBase<Map<Quaternion<_Scalar>, _Options> > Base;\n\n    typedef _Scalar Scalar;\n    typedef typename internal::traits<Map>::Coefficients Coefficients;\n    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)\n    using Base::operator*=;\n\n    /** Constructs a Mapped Quaternion object from the pointer \\a coeffs\n      *\n      * The pointer \\a coeffs must reference the four coefficients of Quaternion in the following order:\n      * \\code *coeffs == {x, y, z, w} \\endcode\n      *\n      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */\n    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}\n\n    EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }\n    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }\n\n  protected:\n    Coefficients m_coeffs;\n};\n\n/** \\ingroup Geometry_Module\n  * Map an unaligned array of single precision scalars as a quaternion */\ntypedef Map<Quaternion<float>, 0>         QuaternionMapf;\n/** \\ingroup Geometry_Module\n  * Map an unaligned array of double precision scalars as a quaternion */\ntypedef Map<Quaternion<double>, 0>        QuaternionMapd;\n/** \\ingroup Geometry_Module\n  * Map a 16-byte aligned array of single precision scalars as a quaternion */\ntypedef Map<Quaternion<float>, Aligned>   QuaternionMapAlignedf;\n/** \\ingroup Geometry_Module\n  * Map a 16-byte aligned array of double precision scalars as a quaternion */\ntypedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;\n\n/***************************************************************************\n* Implementation of QuaternionBase methods\n***************************************************************************/\n\n// Generic Quaternion * Quaternion product\n// This product can be specialized for a given architecture via the Arch template argument.\nnamespace internal {\ntemplate<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){\n    return Quaternion<Scalar>\n    (\n      a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),\n      a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(),\n      a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(),\n      a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x()\n    );\n  }\n};\n}\n\n/** \\returns the concatenation of two rotations as a quaternion-quaternion product */\ntemplate <class Derived>\ntemplate <class OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>\nQuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),\n   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n  return internal::quat_product<Architecture::Target, Derived, OtherDerived,\n                         typename internal::traits<Derived>::Scalar>::run(*this, other);\n}\n\n/** \\sa operator*(Quaternion) */\ntemplate <class Derived>\ntemplate <class OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)\n{\n  derived() = derived() * other.derived();\n  return derived();\n}\n\n/** Rotation of a vector by a quaternion.\n  * \\remarks If the quaternion is used to rotate several points (>1)\n  * then it is much more efficient to first convert it to a 3x3 Matrix.\n  * Comparison of the operation cost for n transformations:\n  *   - Quaternion2:    30n\n  *   - Via a Matrix3: 24 + 15n\n  */\ntemplate <class Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3\nQuaternionBase<Derived>::_transformVector(const Vector3& v) const\n{\n    // Note that this algorithm comes from the optimization by hand\n    // of the conversion to a Matrix followed by a Matrix/Vector product.\n    // It appears to be much faster than the common algorithm found\n    // in the literature (30 versus 39 flops). It also requires two\n    // Vector3 as temporaries.\n    Vector3 uv = this->vec().cross(v);\n    uv += uv;\n    return v + this->w() * uv + this->vec().cross(uv);\n}\n\ntemplate<class Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)\n{\n  coeffs() = other.coeffs();\n  return derived();\n}\n\ntemplate<class Derived>\ntemplate<class OtherDerived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)\n{\n  coeffs() = other.coeffs();\n  return derived();\n}\n\n/** Set \\c *this from an angle-axis \\a aa and returns a reference to \\c *this\n  */\ntemplate<class Derived>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)\n{\n  EIGEN_USING_STD(cos)\n  EIGEN_USING_STD(sin)\n  Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings\n  this->w() = cos(ha);\n  this->vec() = sin(ha) * aa.axis();\n  return derived();\n}\n\n/** Set \\c *this from the expression \\a xpr:\n  *   - if \\a xpr is a 4x1 vector, then \\a xpr is assumed to be a quaternion\n  *   - if \\a xpr is a 3x3 matrix, then \\a xpr is assumed to be rotation matrix\n  *     and \\a xpr is converted to a quaternion\n  */\n\ntemplate<class Derived>\ntemplate<class MatrixDerived>\nEIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),\n   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n  internal::quaternionbase_assign_impl<MatrixDerived>::run(*this, xpr.derived());\n  return derived();\n}\n\n/** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to\n  * be normalized, otherwise the result is undefined.\n  */\ntemplate<class Derived>\nEIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3\nQuaternionBase<Derived>::toRotationMatrix(void) const\n{\n  // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)\n  // if not inlined then the cost of the return by value is huge ~ +35%,\n  // however, not inlining this function is an order of magnitude slower, so\n  // it has to be inlined, and so the return by value is not an issue\n  Matrix3 res;\n\n  const Scalar tx  = Scalar(2)*this->x();\n  const Scalar ty  = Scalar(2)*this->y();\n  const Scalar tz  = Scalar(2)*this->z();\n  const Scalar twx = tx*this->w();\n  const Scalar twy = ty*this->w();\n  const Scalar twz = tz*this->w();\n  const Scalar txx = tx*this->x();\n  const Scalar txy = ty*this->x();\n  const Scalar txz = tz*this->x();\n  const Scalar tyy = ty*this->y();\n  const Scalar tyz = tz*this->y();\n  const Scalar tzz = tz*this->z();\n\n  res.coeffRef(0,0) = Scalar(1)-(tyy+tzz);\n  res.coeffRef(0,1) = txy-twz;\n  res.coeffRef(0,2) = txz+twy;\n  res.coeffRef(1,0) = txy+twz;\n  res.coeffRef(1,1) = Scalar(1)-(txx+tzz);\n  res.coeffRef(1,2) = tyz-twx;\n  res.coeffRef(2,0) = txz-twy;\n  res.coeffRef(2,1) = tyz+twx;\n  res.coeffRef(2,2) = Scalar(1)-(txx+tyy);\n\n  return res;\n}\n\n/** Sets \\c *this to be a quaternion representing a rotation between\n  * the two arbitrary vectors \\a a and \\a b. In other words, the built\n  * rotation represent a rotation sending the line of direction \\a a\n  * to the line of direction \\a b, both lines passing through the origin.\n  *\n  * \\returns a reference to \\c *this.\n  *\n  * Note that the two input vectors do \\b not have to be normalized, and\n  * do not need to have the same norm.\n  */\ntemplate<class Derived>\ntemplate<typename Derived1, typename Derived2>\nEIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)\n{\n  EIGEN_USING_STD(sqrt)\n  Vector3 v0 = a.normalized();\n  Vector3 v1 = b.normalized();\n  Scalar c = v1.dot(v0);\n\n  // if dot == -1, vectors are nearly opposites\n  // => accurately compute the rotation axis by computing the\n  //    intersection of the two planes. This is done by solving:\n  //       x^T v0 = 0\n  //       x^T v1 = 0\n  //    under the constraint:\n  //       ||x|| = 1\n  //    which yields a singular value problem\n  if (c < Scalar(-1)+NumTraits<Scalar>::dummy_precision())\n  {\n    c = numext::maxi(c,Scalar(-1));\n    Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();\n    JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);\n    Vector3 axis = svd.matrixV().col(2);\n\n    Scalar w2 = (Scalar(1)+c)*Scalar(0.5);\n    this->w() = sqrt(w2);\n    this->vec() = axis * sqrt(Scalar(1) - w2);\n    return derived();\n  }\n  Vector3 axis = v0.cross(v1);\n  Scalar s = sqrt((Scalar(1)+c)*Scalar(2));\n  Scalar invs = Scalar(1)/s;\n  this->vec() = axis * invs;\n  this->w() = s * Scalar(0.5);\n\n  return derived();\n}\n\n/** \\returns a random unit quaternion following a uniform distribution law on SO(3)\n  *\n  * \\note The implementation is based on http://planning.cs.uiuc.edu/node198.html\n  */\ntemplate<typename Scalar, int Options>\nEIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()\n{\n  EIGEN_USING_STD(sqrt)\n  EIGEN_USING_STD(sin)\n  EIGEN_USING_STD(cos)\n  const Scalar u1 = internal::random<Scalar>(0, 1),\n               u2 = internal::random<Scalar>(0, 2*EIGEN_PI),\n               u3 = internal::random<Scalar>(0, 2*EIGEN_PI);\n  const Scalar a = sqrt(Scalar(1) - u1),\n               b = sqrt(u1);\n  return Quaternion (a * sin(u2), a * cos(u2), b * sin(u3), b * cos(u3));\n}\n\n\n/** Returns a quaternion representing a rotation between\n  * the two arbitrary vectors \\a a and \\a b. In other words, the built\n  * rotation represent a rotation sending the line of direction \\a a\n  * to the line of direction \\a b, both lines passing through the origin.\n  *\n  * \\returns resulting quaternion\n  *\n  * Note that the two input vectors do \\b not have to be normalized, and\n  * do not need to have the same norm.\n  */\ntemplate<typename Scalar, int Options>\ntemplate<typename Derived1, typename Derived2>\nEIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)\n{\n    Quaternion quat;\n    quat.setFromTwoVectors(a, b);\n    return quat;\n}\n\n\n/** \\returns the multiplicative inverse of \\c *this\n  * Note that in most cases, i.e., if you simply want the opposite rotation,\n  * and/or the quaternion is normalized, then it is enough to use the conjugate.\n  *\n  * \\sa QuaternionBase::conjugate()\n  */\ntemplate <class Derived>\nEIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const\n{\n  // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??\n  Scalar n2 = this->squaredNorm();\n  if (n2 > Scalar(0))\n    return Quaternion<Scalar>(conjugate().coeffs() / n2);\n  else\n  {\n    // return an invalid result to flag the error\n    return Quaternion<Scalar>(Coefficients::Zero());\n  }\n}\n\n// Generic conjugate of a Quaternion\nnamespace internal {\ntemplate<int Arch, class Derived, typename Scalar> struct quat_conj\n{\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){\n    return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());\n  }\n};\n}\n                         \n/** \\returns the conjugate of the \\c *this which is equal to the multiplicative inverse\n  * if the quaternion is normalized.\n  * The conjugate of a quaternion represents the opposite rotation.\n  *\n  * \\sa Quaternion2::inverse()\n  */\ntemplate <class Derived>\nEIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>\nQuaternionBase<Derived>::conjugate() const\n{\n  return internal::quat_conj<Architecture::Target, Derived,\n                         typename internal::traits<Derived>::Scalar>::run(*this);\n                         \n}\n\n/** \\returns the angle (in radian) between two rotations\n  * \\sa dot()\n  */\ntemplate <class Derived>\ntemplate <class OtherDerived>\nEIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar\nQuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const\n{\n  EIGEN_USING_STD(atan2)\n  Quaternion<Scalar> d = (*this) * other.conjugate();\n  return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) );\n}\n\n \n    \n/** \\returns the spherical linear interpolation between the two quaternions\n  * \\c *this and \\a other at the parameter \\a t in [0;1].\n  * \n  * This represents an interpolation for a constant motion between \\c *this and \\a other,\n  * see also http://en.wikipedia.org/wiki/Slerp.\n  */\ntemplate <class Derived>\ntemplate <class OtherDerived>\nEIGEN_DEVICE_FUNC Quaternion<typename internal::traits<Derived>::Scalar>\nQuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const\n{\n  EIGEN_USING_STD(acos)\n  EIGEN_USING_STD(sin)\n  const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();\n  Scalar d = this->dot(other);\n  Scalar absD = numext::abs(d);\n\n  Scalar scale0;\n  Scalar scale1;\n\n  if(absD>=one)\n  {\n    scale0 = Scalar(1) - t;\n    scale1 = t;\n  }\n  else\n  {\n    // theta is the angle between the 2 quaternions\n    Scalar theta = acos(absD);\n    Scalar sinTheta = sin(theta);\n\n    scale0 = sin( ( Scalar(1) - t ) * theta) / sinTheta;\n    scale1 = sin( ( t * theta) ) / sinTheta;\n  }\n  if(d<Scalar(0)) scale1 = -scale1;\n\n  return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());\n}\n\nnamespace internal {\n\n// set from a rotation matrix\ntemplate<typename Other>\nstruct quaternionbase_assign_impl<Other,3,3>\n{\n  typedef typename Other::Scalar Scalar;\n  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)\n  {\n    const typename internal::nested_eval<Other,2>::type mat(a_mat);\n    EIGEN_USING_STD(sqrt)\n    // This algorithm comes from  \"Quaternion Calculus and Fast Animation\",\n    // Ken Shoemake, 1987 SIGGRAPH course notes\n    Scalar t = mat.trace();\n    if (t > Scalar(0))\n    {\n      t = sqrt(t + Scalar(1.0));\n      q.w() = Scalar(0.5)*t;\n      t = Scalar(0.5)/t;\n      q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t;\n      q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t;\n      q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t;\n    }\n    else\n    {\n      Index i = 0;\n      if (mat.coeff(1,1) > mat.coeff(0,0))\n        i = 1;\n      if (mat.coeff(2,2) > mat.coeff(i,i))\n        i = 2;\n      Index j = (i+1)%3;\n      Index k = (j+1)%3;\n\n      t = sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0));\n      q.coeffs().coeffRef(i) = Scalar(0.5) * t;\n      t = Scalar(0.5)/t;\n      q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t;\n      q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t;\n      q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t;\n    }\n  }\n};\n\n// set from a vector of coefficients assumed to be a quaternion\ntemplate<typename Other>\nstruct quaternionbase_assign_impl<Other,4,1>\n{\n  typedef typename Other::Scalar Scalar;\n  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)\n  {\n    q.coeffs() = vec;\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_QUATERNION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Rotation2D.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ROTATION2D_H\n#define EIGEN_ROTATION2D_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class Rotation2D\n  *\n  * \\brief Represents a rotation/orientation in a 2 dimensional space.\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients\n  *\n  * This class is equivalent to a single scalar representing a counter clock wise rotation\n  * as a single angle in radian. It provides some additional features such as the automatic\n  * conversion from/to a 2x2 rotation matrix. Moreover this class aims to provide a similar\n  * interface to Quaternion in order to facilitate the writing of generic algorithms\n  * dealing with rotations.\n  *\n  * \\sa class Quaternion, class Transform\n  */\n\nnamespace internal {\n\ntemplate<typename _Scalar> struct traits<Rotation2D<_Scalar> >\n{\n  typedef _Scalar Scalar;\n};\n} // end namespace internal\n\ntemplate<typename _Scalar>\nclass Rotation2D : public RotationBase<Rotation2D<_Scalar>,2>\n{\n  typedef RotationBase<Rotation2D<_Scalar>,2> Base;\n\npublic:\n\n  using Base::operator*;\n\n  enum { Dim = 2 };\n  /** the scalar type of the coefficients */\n  typedef _Scalar Scalar;\n  typedef Matrix<Scalar,2,1> Vector2;\n  typedef Matrix<Scalar,2,2> Matrix2;\n\nprotected:\n\n  Scalar m_angle;\n\npublic:\n\n  /** Construct a 2D counter clock wise rotation from the angle \\a a in radian. */\n  EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}\n  \n  /** Default constructor wihtout initialization. The represented rotation is undefined. */\n  EIGEN_DEVICE_FUNC Rotation2D() {}\n\n  /** Construct a 2D rotation from a 2x2 rotation matrix \\a mat.\n    *\n    * \\sa fromRotationMatrix()\n    */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase<Derived>& m)\n  {\n    fromRotationMatrix(m.derived());\n  }\n\n  /** \\returns the rotation angle */\n  EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; }\n\n  /** \\returns a read-write reference to the rotation angle */\n  EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; }\n  \n  /** \\returns the rotation angle in [0,2pi] */\n  EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const {\n    Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));\n    return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;\n  }\n  \n  /** \\returns the rotation angle in [-pi,pi] */\n  EIGEN_DEVICE_FUNC inline Scalar smallestAngle() const {\n    Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));\n    if(tmp>Scalar(EIGEN_PI))       tmp -= Scalar(2*EIGEN_PI);\n    else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);\n    return tmp;\n  }\n\n  /** \\returns the inverse rotation */\n  EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); }\n\n  /** Concatenates two rotations */\n  EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const\n  { return Rotation2D(m_angle + other.m_angle); }\n\n  /** Concatenates two rotations */\n  EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other)\n  { m_angle += other.m_angle; return *this; }\n\n  /** Applies the rotation to a 2D vector */\n  EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const\n  { return toRotationMatrix() * vec; }\n  \n  template<typename Derived>\n  EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);\n  EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const;\n\n  /** Set \\c *this from a 2x2 rotation matrix \\a mat.\n    * In other words, this function extract the rotation angle from the rotation matrix.\n    *\n    * This method is an alias for fromRotationMatrix()\n    *\n    * \\sa fromRotationMatrix()\n    */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC Rotation2D& operator=(const  MatrixBase<Derived>& m)\n  { return fromRotationMatrix(m.derived()); }\n\n  /** \\returns the spherical interpolation between \\c *this and \\a other using\n    * parameter \\a t. It is in fact equivalent to a linear interpolation.\n    */\n  EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const\n  {\n    Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle();\n    return Rotation2D(m_angle + dist*t);\n  }\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const\n  { return typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType>\n  EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)\n  {\n    m_angle = Scalar(other.angle());\n  }\n\n  EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return internal::isApprox(m_angle,other.m_angle, prec); }\n  \n};\n\n/** \\ingroup Geometry_Module\n  * single precision 2D rotation type */\ntypedef Rotation2D<float> Rotation2Df;\n/** \\ingroup Geometry_Module\n  * double precision 2D rotation type */\ntypedef Rotation2D<double> Rotation2Dd;\n\n/** Set \\c *this from a 2x2 rotation matrix \\a mat.\n  * In other words, this function extract the rotation angle\n  * from the rotation matrix.\n  */\ntemplate<typename Scalar>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)\n{\n  EIGEN_USING_STD(atan2)\n  EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)\n  m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0));\n  return *this;\n}\n\n/** Constructs and \\returns an equivalent 2x2 rotation matrix.\n  */\ntemplate<typename Scalar>\ntypename Rotation2D<Scalar>::Matrix2\nEIGEN_DEVICE_FUNC Rotation2D<Scalar>::toRotationMatrix(void) const\n{\n  EIGEN_USING_STD(sin)\n  EIGEN_USING_STD(cos)\n  Scalar sinA = sin(m_angle);\n  Scalar cosA = cos(m_angle);\n  return (Matrix2() << cosA, -sinA, sinA, cosA).finished();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_ROTATION2D_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/RotationBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ROTATIONBASE_H\n#define EIGEN_ROTATIONBASE_H\n\nnamespace Eigen { \n\n// forward declaration\nnamespace internal {\ntemplate<typename RotationDerived, typename MatrixType, bool IsVector=MatrixType::IsVectorAtCompileTime>\nstruct rotation_base_generic_product_selector;\n}\n\n/** \\class RotationBase\n  *\n  * \\brief Common base class for compact rotation representations\n  *\n  * \\tparam Derived is the derived type, i.e., a rotation type\n  * \\tparam _Dim the dimension of the space\n  */\ntemplate<typename Derived, int _Dim>\nclass RotationBase\n{\n  public:\n    enum { Dim = _Dim };\n    /** the scalar type of the coefficients */\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n\n    /** corresponding linear transformation matrix type */\n    typedef Matrix<Scalar,Dim,Dim> RotationMatrixType;\n    typedef Matrix<Scalar,Dim,1> VectorType;\n\n  public:\n    EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }\n    EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }\n\n    /** \\returns an equivalent rotation matrix */\n    EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }\n\n    /** \\returns an equivalent rotation matrix \n      * This function is added to be conform with the Transform class' naming scheme.\n      */\n    EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }\n\n    /** \\returns the inverse rotation */\n    EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); }\n\n    /** \\returns the concatenation of the rotation \\c *this with a translation \\a t */\n    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const\n    { return Transform<Scalar,Dim,Isometry>(*this) * t; }\n\n    /** \\returns the concatenation of the rotation \\c *this with a uniform scaling \\a s */\n    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const\n    { return toRotationMatrix() * s.factor(); }\n\n    /** \\returns the concatenation of the rotation \\c *this with a generic expression \\a e\n      * \\a e can be:\n      *  - a DimxDim linear transformation matrix\n      *  - a DimxDim diagonal matrix (axis aligned scaling)\n      *  - a vector of size Dim\n      */\n    template<typename OtherDerived>\n    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType\n    operator*(const EigenBase<OtherDerived>& e) const\n    { return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }\n\n    /** \\returns the concatenation of a linear transformation \\a l with the rotation \\a r */\n    template<typename OtherDerived> friend\n    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)\n    { return l.derived() * r.toRotationMatrix(); }\n\n    /** \\returns the concatenation of a scaling \\a l with the rotation \\a r */\n    EIGEN_DEVICE_FUNC friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)\n    { \n      Transform<Scalar,Dim,Affine> res(r);\n      res.linear().applyOnTheLeft(l);\n      return res;\n    }\n\n    /** \\returns the concatenation of the rotation \\c *this with a transformation \\a t */\n    template<int Mode, int Options>\n    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const\n    { return toRotationMatrix() * t; }\n\n    template<typename OtherVectorType>\n    EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const\n    { return toRotationMatrix() * v; }\n};\n\nnamespace internal {\n\n// implementation of the generic product rotation * matrix\ntemplate<typename RotationDerived, typename MatrixType>\nstruct rotation_base_generic_product_selector<RotationDerived,MatrixType,false>\n{\n  enum { Dim = RotationDerived::Dim };\n  typedef Matrix<typename RotationDerived::Scalar,Dim,Dim> ReturnType;\n  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m)\n  { return r.toRotationMatrix() * m; }\n};\n\ntemplate<typename RotationDerived, typename Scalar, int Dim, int MaxDim>\nstruct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >\n{\n  typedef Transform<Scalar,Dim,Affine> ReturnType;\n  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)\n  {\n    ReturnType res(r);\n    res.linear() *= m;\n    return res;\n  }\n};\n\ntemplate<typename RotationDerived,typename OtherVectorType>\nstruct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,true>\n{\n  enum { Dim = RotationDerived::Dim };\n  typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;\n  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)\n  {\n    return r._transformVector(v);\n  }\n};\n\n} // end namespace internal\n\n/** \\geometry_module\n  *\n  * \\brief Constructs a Dim x Dim rotation matrix from the rotation \\a r\n  */\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>\n::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)\n{\n  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))\n  *this = r.toRotationMatrix();\n}\n\n/** \\geometry_module\n  *\n  * \\brief Set a Dim x Dim rotation matrix from the rotation \\a r\n  */\ntemplate<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&\nMatrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>\n::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)\n{\n  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))\n  return *this = r.toRotationMatrix();\n}\n\nnamespace internal {\n\n/** \\internal\n  *\n  * Helper function to return an arbitrary rotation object to a rotation matrix.\n  *\n  * \\tparam Scalar the numeric type of the matrix coefficients\n  * \\tparam Dim the dimension of the current space\n  *\n  * It returns a Dim x Dim fixed size matrix.\n  *\n  * Default specializations are provided for:\n  *   - any scalar type (2D),\n  *   - any matrix expression,\n  *   - any type based on RotationBase (e.g., Quaternion, AngleAxis, Rotation2D)\n  *\n  * Currently toRotationMatrix is only used by Transform.\n  *\n  * \\sa class Transform, class Rotation2D, class Quaternion, class AngleAxis\n  */\ntemplate<typename Scalar, int Dim>\nEIGEN_DEVICE_FUNC static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)\n{\n  EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)\n  return Rotation2D<Scalar>(s).toRotationMatrix();\n}\n\ntemplate<typename Scalar, int Dim, typename OtherDerived>\nEIGEN_DEVICE_FUNC static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)\n{\n  return r.toRotationMatrix();\n}\n\ntemplate<typename Scalar, int Dim, typename OtherDerived>\nEIGEN_DEVICE_FUNC static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)\n{\n  EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,\n    YOU_MADE_A_PROGRAMMING_MISTAKE)\n  return mat;\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_ROTATIONBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Scaling.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SCALING_H\n#define EIGEN_SCALING_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class UniformScaling\n  *\n  * \\brief Represents a generic uniform scaling transformation\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients.\n  *\n  * This class represent a uniform scaling transformation. It is the return\n  * type of Scaling(Scalar), and most of the time this is the only way it\n  * is used. In particular, this class is not aimed to be used to store a scaling transformation,\n  * but rather to make easier the constructions and updates of Transform objects.\n  *\n  * To represent an axis aligned scaling, use the DiagonalMatrix class.\n  *\n  * \\sa Scaling(), class DiagonalMatrix, MatrixBase::asDiagonal(), class Translation, class Transform\n  */\n\nnamespace internal\n{\n  // This helper helps nvcc+MSVC to properly parse this file.\n  // See bug 1412.\n  template <typename Scalar, int Dim, int Mode>\n  struct uniformscaling_times_affine_returntype\n  {\n    enum\n    {\n      NewMode = int(Mode) == int(Isometry) ? Affine : Mode\n    };\n    typedef Transform <Scalar, Dim, NewMode> type;\n  };\n}\n\ntemplate<typename _Scalar>\nclass UniformScaling\n{\npublic:\n  /** the scalar type of the coefficients */\n  typedef _Scalar Scalar;\n\nprotected:\n\n  Scalar m_factor;\n\npublic:\n\n  /** Default constructor without initialization. */\n  UniformScaling() {}\n  /** Constructs and initialize a uniform scaling transformation */\n  explicit inline UniformScaling(const Scalar& s) : m_factor(s) {}\n\n  inline const Scalar& factor() const { return m_factor; }\n  inline Scalar& factor() { return m_factor; }\n\n  /** Concatenates two uniform scaling */\n  inline UniformScaling operator* (const UniformScaling& other) const\n  { return UniformScaling(m_factor * other.factor()); }\n\n  /** Concatenates a uniform scaling and a translation */\n  template<int Dim>\n  inline Transform<Scalar,Dim,Affine> operator* (const Translation<Scalar,Dim>& t) const;\n\n  /** Concatenates a uniform scaling and an affine transformation */\n  template<int Dim, int Mode, int Options>\n  inline typename\n\tinternal::uniformscaling_times_affine_returntype<Scalar,Dim,Mode>::type\n\toperator* (const Transform<Scalar, Dim, Mode, Options>& t) const\n  {\n    typename internal::uniformscaling_times_affine_returntype<Scalar,Dim,Mode>::type res = t;\n    res.prescale(factor());\n    return res;\n  }\n\n  /** Concatenates a uniform scaling and a linear transformation matrix */\n  // TODO returns an expression\n  template<typename Derived>\n  inline typename Eigen::internal::plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const\n  { return other * m_factor; }\n\n  template<typename Derived,int Dim>\n  inline Matrix<Scalar,Dim,Dim> operator*(const RotationBase<Derived,Dim>& r) const\n  { return r.toRotationMatrix() * m_factor; }\n\n  /** \\returns the inverse scaling */\n  inline UniformScaling inverse() const\n  { return UniformScaling(Scalar(1)/m_factor); }\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  inline UniformScaling<NewScalarType> cast() const\n  { return UniformScaling<NewScalarType>(NewScalarType(m_factor)); }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType>\n  inline explicit UniformScaling(const UniformScaling<OtherScalarType>& other)\n  { m_factor = Scalar(other.factor()); }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  bool isApprox(const UniformScaling& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return internal::isApprox(m_factor, other.factor(), prec); }\n\n};\n\n/** \\addtogroup Geometry_Module */\n//@{\n\n/** Concatenates a linear transformation matrix and a uniform scaling\n  * \\relates UniformScaling\n  */\n// NOTE this operator is defined in MatrixBase and not as a friend function\n// of UniformScaling to fix an internal crash of Intel's ICC\ntemplate<typename Derived,typename Scalar>\nEIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product)\noperator*(const MatrixBase<Derived>& matrix, const UniformScaling<Scalar>& s)\n{ return matrix.derived() * s.factor(); }\n\n/** Constructs a uniform scaling from scale factor \\a s */\ninline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }\n/** Constructs a uniform scaling from scale factor \\a s */\ninline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }\n/** Constructs a uniform scaling from scale factor \\a s */\ntemplate<typename RealScalar>\ninline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)\n{ return UniformScaling<std::complex<RealScalar> >(s); }\n\n/** Constructs a 2D axis aligned scaling */\ntemplate<typename Scalar>\ninline DiagonalMatrix<Scalar,2> Scaling(const Scalar& sx, const Scalar& sy)\n{ return DiagonalMatrix<Scalar,2>(sx, sy); }\n/** Constructs a 3D axis aligned scaling */\ntemplate<typename Scalar>\ninline DiagonalMatrix<Scalar,3> Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)\n{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }\n\n/** Constructs an axis aligned scaling expression from vector expression \\a coeffs\n  * This is an alias for coeffs.asDiagonal()\n  */\ntemplate<typename Derived>\ninline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)\n{ return coeffs.asDiagonal(); }\n\n/** \\deprecated */\ntypedef DiagonalMatrix<float, 2> AlignedScaling2f;\n/** \\deprecated */\ntypedef DiagonalMatrix<double,2> AlignedScaling2d;\n/** \\deprecated */\ntypedef DiagonalMatrix<float, 3> AlignedScaling3f;\n/** \\deprecated */\ntypedef DiagonalMatrix<double,3> AlignedScaling3d;\n//@}\n\ntemplate<typename Scalar>\ntemplate<int Dim>\ninline Transform<Scalar,Dim,Affine>\nUniformScaling<Scalar>::operator* (const Translation<Scalar,Dim>& t) const\n{\n  Transform<Scalar,Dim,Affine> res;\n  res.matrix().setZero();\n  res.linear().diagonal().fill(factor());\n  res.translation() = factor() * t.vector();\n  res(Dim,Dim) = Scalar(1);\n  return res;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SCALING_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Transform.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRANSFORM_H\n#define EIGEN_TRANSFORM_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Transform>\nstruct transform_traits\n{\n  enum\n  {\n    Dim = Transform::Dim,\n    HDim = Transform::HDim,\n    Mode = Transform::Mode,\n    IsProjective = (int(Mode)==int(Projective))\n  };\n};\n\ntemplate< typename TransformType,\n          typename MatrixType,\n          int Case = transform_traits<TransformType>::IsProjective ? 0\n                   : int(MatrixType::RowsAtCompileTime) == int(transform_traits<TransformType>::HDim) ? 1\n                   : 2,\n          int RhsCols = MatrixType::ColsAtCompileTime>\nstruct transform_right_product_impl;\n\ntemplate< typename Other,\n          int Mode,\n          int Options,\n          int Dim,\n          int HDim,\n          int OtherRows=Other::RowsAtCompileTime,\n          int OtherCols=Other::ColsAtCompileTime>\nstruct transform_left_product_impl;\n\ntemplate< typename Lhs,\n          typename Rhs,\n          bool AnyProjective =\n            transform_traits<Lhs>::IsProjective ||\n            transform_traits<Rhs>::IsProjective>\nstruct transform_transform_product_impl;\n\ntemplate< typename Other,\n          int Mode,\n          int Options,\n          int Dim,\n          int HDim,\n          int OtherRows=Other::RowsAtCompileTime,\n          int OtherCols=Other::ColsAtCompileTime>\nstruct transform_construct_from_matrix;\n\ntemplate<typename TransformType> struct transform_take_affine_part;\n\ntemplate<typename _Scalar, int _Dim, int _Mode, int _Options>\nstruct traits<Transform<_Scalar,_Dim,_Mode,_Options> >\n{\n  typedef _Scalar Scalar;\n  typedef Eigen::Index StorageIndex;\n  typedef Dense StorageKind;\n  enum {\n    Dim1 = _Dim==Dynamic ? _Dim : _Dim + 1,\n    RowsAtCompileTime = _Mode==Projective ? Dim1 : _Dim,\n    ColsAtCompileTime = Dim1,\n    MaxRowsAtCompileTime = RowsAtCompileTime,\n    MaxColsAtCompileTime = ColsAtCompileTime,\n    Flags = 0\n  };\n};\n\ntemplate<int Mode> struct transform_make_affine;\n\n} // end namespace internal\n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class Transform\n  *\n  * \\brief Represents an homogeneous transformation in a N dimensional space\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients\n  * \\tparam _Dim the dimension of the space\n  * \\tparam _Mode the type of the transformation. Can be:\n  *              - #Affine: the transformation is stored as a (Dim+1)^2 matrix,\n  *                         where the last row is assumed to be [0 ... 0 1].\n  *              - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.\n  *              - #Projective: the transformation is stored as a (Dim+1)^2 matrix\n  *                             without any assumption.\n  *              - #Isometry: same as #Affine with the additional assumption that\n  *                           the linear part represents a rotation. This assumption is exploited\n  *                           to speed up some functions such as inverse() and rotation().\n  * \\tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.\n  *                  These Options are passed directly to the underlying matrix type.\n  *\n  * The homography is internally represented and stored by a matrix which\n  * is available through the matrix() method. To understand the behavior of\n  * this class you have to think a Transform object as its internal\n  * matrix representation. The chosen convention is right multiply:\n  *\n  * \\code v' = T * v \\endcode\n  *\n  * Therefore, an affine transformation matrix M is shaped like this:\n  *\n  * \\f$ \\left( \\begin{array}{cc}\n  * linear & translation\\\\\n  * 0 ... 0 & 1\n  * \\end{array} \\right) \\f$\n  *\n  * Note that for a projective transformation the last row can be anything,\n  * and then the interpretation of different parts might be slightly different.\n  *\n  * However, unlike a plain matrix, the Transform class provides many features\n  * simplifying both its assembly and usage. In particular, it can be composed\n  * with any other transformations (Transform,Translation,RotationBase,DiagonalMatrix)\n  * and can be directly used to transform implicit homogeneous vectors. All these\n  * operations are handled via the operator*. For the composition of transformations,\n  * its principle consists to first convert the right/left hand sides of the product\n  * to a compatible (Dim+1)^2 matrix and then perform a pure matrix product.\n  * Of course, internally, operator* tries to perform the minimal number of operations\n  * according to the nature of each terms. Likewise, when applying the transform\n  * to points, the latters are automatically promoted to homogeneous vectors\n  * before doing the matrix product. The conventions to homogeneous representations\n  * are performed as follow:\n  *\n  * \\b Translation t (Dim)x(1):\n  * \\f$ \\left( \\begin{array}{cc}\n  * I & t \\\\\n  * 0\\,...\\,0 & 1\n  * \\end{array} \\right) \\f$\n  *\n  * \\b Rotation R (Dim)x(Dim):\n  * \\f$ \\left( \\begin{array}{cc}\n  * R & 0\\\\\n  * 0\\,...\\,0 & 1\n  * \\end{array} \\right) \\f$\n  *<!--\n  * \\b Linear \\b Matrix L (Dim)x(Dim):\n  * \\f$ \\left( \\begin{array}{cc}\n  * L & 0\\\\\n  * 0\\,...\\,0 & 1\n  * \\end{array} \\right) \\f$\n  *\n  * \\b Affine \\b Matrix A (Dim)x(Dim+1):\n  * \\f$ \\left( \\begin{array}{c}\n  * A\\\\\n  * 0\\,...\\,0\\,1\n  * \\end{array} \\right) \\f$\n  *-->\n  * \\b Scaling \\b DiagonalMatrix S (Dim)x(Dim):\n  * \\f$ \\left( \\begin{array}{cc}\n  * S & 0\\\\\n  * 0\\,...\\,0 & 1\n  * \\end{array} \\right) \\f$\n  *\n  * \\b Column \\b point v (Dim)x(1):\n  * \\f$ \\left( \\begin{array}{c}\n  * v\\\\\n  * 1\n  * \\end{array} \\right) \\f$\n  *\n  * \\b Set \\b of \\b column \\b points V1...Vn (Dim)x(n):\n  * \\f$ \\left( \\begin{array}{ccc}\n  * v_1 & ... & v_n\\\\\n  * 1 & ... & 1\n  * \\end{array} \\right) \\f$\n  *\n  * The concatenation of a Transform object with any kind of other transformation\n  * always returns a Transform object.\n  *\n  * A little exception to the \"as pure matrix product\" rule is the case of the\n  * transformation of non homogeneous vectors by an affine transformation. In\n  * that case the last matrix row can be ignored, and the product returns non\n  * homogeneous vectors.\n  *\n  * Since, for instance, a Dim x Dim matrix is interpreted as a linear transformation,\n  * it is not possible to directly transform Dim vectors stored in a Dim x Dim matrix.\n  * The solution is either to use a Dim x Dynamic matrix or explicitly request a\n  * vector transformation by making the vector homogeneous:\n  * \\code\n  * m' = T * m.colwise().homogeneous();\n  * \\endcode\n  * Note that there is zero overhead.\n  *\n  * Conversion methods from/to Qt's QMatrix and QTransform are available if the\n  * preprocessor token EIGEN_QT_SUPPORT is defined.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_TRANSFORM_PLUGIN.\n  *\n  * \\sa class Matrix, class Quaternion\n  */\ntemplate<typename _Scalar, int _Dim, int _Mode, int _Options>\nclass Transform\n{\npublic:\n  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim==Dynamic ? Dynamic : (_Dim+1)*(_Dim+1))\n  enum {\n    Mode = _Mode,\n    Options = _Options,\n    Dim = _Dim,     ///< space dimension in which the transformation holds\n    HDim = _Dim+1,  ///< size of a respective homogeneous vector\n    Rows = int(Mode)==(AffineCompact) ? Dim : HDim\n  };\n  /** the scalar type of the coefficients */\n  typedef _Scalar Scalar;\n  typedef Eigen::Index StorageIndex;\n  typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n  /** type of the matrix used to represent the transformation */\n  typedef typename internal::make_proper_matrix_type<Scalar,Rows,HDim,Options>::type MatrixType;\n  /** constified MatrixType */\n  typedef const MatrixType ConstMatrixType;\n  /** type of the matrix used to represent the linear part of the transformation */\n  typedef Matrix<Scalar,Dim,Dim,Options> LinearMatrixType;\n  /** type of read/write reference to the linear part of the transformation */\n  typedef Block<MatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (int(Options)&RowMajor)==0> LinearPart;\n  /** type of read reference to the linear part of the transformation */\n  typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (int(Options)&RowMajor)==0> ConstLinearPart;\n  /** type of read/write reference to the affine part of the transformation */\n  typedef typename internal::conditional<int(Mode)==int(AffineCompact),\n                              MatrixType&,\n                              Block<MatrixType,Dim,HDim> >::type AffinePart;\n  /** type of read reference to the affine part of the transformation */\n  typedef typename internal::conditional<int(Mode)==int(AffineCompact),\n                              const MatrixType&,\n                              const Block<const MatrixType,Dim,HDim> >::type ConstAffinePart;\n  /** type of a vector */\n  typedef Matrix<Scalar,Dim,1> VectorType;\n  /** type of a read/write reference to the translation part of the rotation */\n  typedef Block<MatrixType,Dim,1,!(internal::traits<MatrixType>::Flags & RowMajorBit)> TranslationPart;\n  /** type of a read reference to the translation part of the rotation */\n  typedef const Block<ConstMatrixType,Dim,1,!(internal::traits<MatrixType>::Flags & RowMajorBit)> ConstTranslationPart;\n  /** corresponding translation type */\n  typedef Translation<Scalar,Dim> TranslationType;\n\n  // this intermediate enum is needed to avoid an ICE with gcc 3.4 and 4.0\n  enum { TransformTimeDiagonalMode = ((Mode==int(Isometry))?Affine:int(Mode)) };\n  /** The return type of the product between a diagonal matrix and a transform */\n  typedef Transform<Scalar,Dim,TransformTimeDiagonalMode> TransformTimeDiagonalReturnType;\n\nprotected:\n\n  MatrixType m_matrix;\n\npublic:\n\n  /** Default constructor without initialization of the meaningful coefficients.\n    * If Mode==Affine or Mode==Isometry, then the last row is set to [0 ... 0 1] */\n  EIGEN_DEVICE_FUNC inline Transform()\n  {\n    check_template_params();\n    internal::transform_make_affine<(int(Mode)==Affine || int(Mode)==Isometry) ? Affine : AffineCompact>::run(m_matrix);\n  }\n\n  EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t)\n  {\n    check_template_params();\n    *this = t;\n  }\n  EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling<Scalar>& s)\n  {\n    check_template_params();\n    *this = s;\n  }\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase<Derived, Dim>& r)\n  {\n    check_template_params();\n    *this = r;\n  }\n\n  typedef internal::transform_take_affine_part<Transform> take_affine_part;\n\n  /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase<OtherDerived>& other)\n  {\n    EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),\n      YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);\n\n    check_template_params();\n    internal::transform_construct_from_matrix<OtherDerived,Mode,Options,Dim,HDim>::run(this, other.derived());\n  }\n\n  /** Set \\c *this from a Dim^2 or (Dim+1)^2 matrix. */\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase<OtherDerived>& other)\n  {\n    EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),\n      YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);\n\n    internal::transform_construct_from_matrix<OtherDerived,Mode,Options,Dim,HDim>::run(this, other.derived());\n    return *this;\n  }\n\n  template<int OtherOptions>\n  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)\n  {\n    check_template_params();\n    // only the options change, we can directly copy the matrices\n    m_matrix = other.matrix();\n  }\n\n  template<int OtherMode,int OtherOptions>\n  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)\n  {\n    check_template_params();\n    // prevent conversions as:\n    // Affine | AffineCompact | Isometry = Projective\n    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Projective), Mode==int(Projective)),\n                        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)\n\n    // prevent conversions as:\n    // Isometry = Affine | AffineCompact\n    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Affine)||OtherMode==int(AffineCompact), Mode!=int(Isometry)),\n                        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)\n\n    enum { ModeIsAffineCompact = Mode == int(AffineCompact),\n           OtherModeIsAffineCompact = OtherMode == int(AffineCompact)\n    };\n\n    if(EIGEN_CONST_CONDITIONAL(ModeIsAffineCompact == OtherModeIsAffineCompact))\n    {\n      // We need the block expression because the code is compiled for all\n      // combinations of transformations and will trigger a compile time error\n      // if one tries to assign the matrices directly\n      m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0);\n      makeAffine();\n    }\n    else if(EIGEN_CONST_CONDITIONAL(OtherModeIsAffineCompact))\n    {\n      typedef typename Transform<Scalar,Dim,OtherMode,OtherOptions>::MatrixType OtherMatrixType;\n      internal::transform_construct_from_matrix<OtherMatrixType,Mode,Options,Dim,HDim>::run(this, other.matrix());\n    }\n    else\n    {\n      // here we know that Mode == AffineCompact and OtherMode != AffineCompact.\n      // if OtherMode were Projective, the static assert above would already have caught it.\n      // So the only possibility is that OtherMode == Affine\n      linear() = other.linear();\n      translation() = other.translation();\n    }\n  }\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC Transform(const ReturnByValue<OtherDerived>& other)\n  {\n    check_template_params();\n    other.evalTo(*this);\n  }\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue<OtherDerived>& other)\n  {\n    other.evalTo(*this);\n    return *this;\n  }\n\n  #ifdef EIGEN_QT_SUPPORT\n  inline Transform(const QMatrix& other);\n  inline Transform& operator=(const QMatrix& other);\n  inline QMatrix toQMatrix(void) const;\n  inline Transform(const QTransform& other);\n  inline Transform& operator=(const QTransform& other);\n  inline QTransform toQTransform(void) const;\n  #endif\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }\n\n  /** shortcut for m_matrix(row,col);\n    * \\sa MatrixBase::operator(Index,Index) const */\n  EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }\n  /** shortcut for m_matrix(row,col);\n    * \\sa MatrixBase::operator(Index,Index) */\n  EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }\n\n  /** \\returns a read-only expression of the transformation matrix */\n  EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; }\n  /** \\returns a writable expression of the transformation matrix */\n  EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; }\n\n  /** \\returns a read-only expression of the linear part of the transformation */\n  EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }\n  /** \\returns a writable expression of the linear part of the transformation */\n  EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); }\n\n  /** \\returns a read-only expression of the Dim x HDim affine part of the transformation */\n  EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }\n  /** \\returns a writable expression of the Dim x HDim affine part of the transformation */\n  EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); }\n\n  /** \\returns a read-only expression of the translation vector of the transformation */\n  EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }\n  /** \\returns a writable expression of the translation vector of the transformation */\n  EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }\n\n  /** \\returns an expression of the product between the transform \\c *this and a matrix expression \\a other.\n    *\n    * The right-hand-side \\a other can be either:\n    * \\li an homogeneous vector of size Dim+1,\n    * \\li a set of homogeneous vectors of size Dim+1 x N,\n    * \\li a transformation matrix of size Dim+1 x Dim+1.\n    *\n    * Moreover, if \\c *this represents an affine transformation (i.e., Mode!=Projective), then \\a other can also be:\n    * \\li a point of size Dim (computes: \\code this->linear() * other + this->translation()\\endcode),\n    * \\li a set of N points as a Dim x N matrix (computes: \\code (this->linear() * other).colwise() + this->translation()\\endcode),\n    *\n    * In all cases, the return type is a matrix or vector of same sizes as the right-hand-side \\a other.\n    *\n    * If you want to interpret \\a other as a linear or affine transformation, then first convert it to a Transform<> type,\n    * or do your own cooking.\n    *\n    * Finally, if you want to apply Affine transformations to vectors, then explicitly apply the linear part only:\n    * \\code\n    * Affine3f A;\n    * Vector3f v1, v2;\n    * v2 = A.linear() * v1;\n    * \\endcode\n    *\n    */\n  // note: this function is defined here because some compilers cannot find the respective declaration\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType\n  operator * (const EigenBase<OtherDerived> &other) const\n  { return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }\n\n  /** \\returns the product expression of a transformation matrix \\a a times a transform \\a b\n    *\n    * The left hand side \\a other can be either:\n    * \\li a linear transformation matrix of size Dim x Dim,\n    * \\li an affine transformation matrix of size Dim x Dim+1,\n    * \\li a general transformation matrix of size Dim+1 x Dim+1.\n    */\n  template<typename OtherDerived> friend\n  EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType\n    operator * (const EigenBase<OtherDerived> &a, const Transform &b)\n  { return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }\n\n  /** \\returns The product expression of a transform \\a a times a diagonal matrix \\a b\n    *\n    * The rhs diagonal matrix is interpreted as an affine scaling transformation. The\n    * product results in a Transform of the same type (mode) as the lhs only if the lhs\n    * mode is no isometry. In that case, the returned transform is an affinity.\n    */\n  template<typename DiagonalDerived>\n  EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType\n    operator * (const DiagonalBase<DiagonalDerived> &b) const\n  {\n    TransformTimeDiagonalReturnType res(*this);\n    res.linearExt() *= b;\n    return res;\n  }\n\n  /** \\returns The product expression of a diagonal matrix \\a a times a transform \\a b\n    *\n    * The lhs diagonal matrix is interpreted as an affine scaling transformation. The\n    * product results in a Transform of the same type (mode) as the lhs only if the lhs\n    * mode is no isometry. In that case, the returned transform is an affinity.\n    */\n  template<typename DiagonalDerived>\n  EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType\n    operator * (const DiagonalBase<DiagonalDerived> &a, const Transform &b)\n  {\n    TransformTimeDiagonalReturnType res;\n    res.linear().noalias() = a*b.linear();\n    res.translation().noalias() = a*b.translation();\n    if (EIGEN_CONST_CONDITIONAL(Mode!=int(AffineCompact)))\n      res.matrix().row(Dim) = b.matrix().row(Dim);\n    return res;\n  }\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }\n\n  /** Concatenates two transformations */\n  EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const\n  {\n    return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);\n  }\n\n  #if EIGEN_COMP_ICC\nprivate:\n  // this intermediate structure permits to workaround a bug in ICC 11:\n  //   error: template instantiation resulted in unexpected function type of \"Eigen::Transform<double, 3, 32, 0>\n  //             (const Eigen::Transform<double, 3, 2, 0> &) const\"\n  //  (the meaning of a name may have changed since the template declaration -- the type of the template is:\n  // \"Eigen::internal::transform_transform_product_impl<Eigen::Transform<double, 3, 32, 0>,\n  //     Eigen::Transform<double, 3, Mode, Options>, <expression>>::ResultType (const Eigen::Transform<double, 3, Mode, Options> &) const\")\n  //\n  template<int OtherMode,int OtherOptions> struct icc_11_workaround\n  {\n    typedef internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> > ProductType;\n    typedef typename ProductType::ResultType ResultType;\n  };\n\npublic:\n  /** Concatenates two different transformations */\n  template<int OtherMode,int OtherOptions>\n  inline typename icc_11_workaround<OtherMode,OtherOptions>::ResultType\n    operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const\n  {\n    typedef typename icc_11_workaround<OtherMode,OtherOptions>::ProductType ProductType;\n    return ProductType::run(*this,other);\n  }\n  #else\n  /** Concatenates two different transformations */\n  template<int OtherMode,int OtherOptions>\n  EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType\n    operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const\n  {\n    return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);\n  }\n  #endif\n\n  /** \\sa MatrixBase::setIdentity() */\n  EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); }\n\n  /**\n   * \\brief Returns an identity transformation.\n   * \\todo In the future this function should be returning a Transform expression.\n   */\n  EIGEN_DEVICE_FUNC static const Transform Identity()\n  {\n    return Transform(MatrixType::Identity());\n  }\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC\n  inline Transform& scale(const MatrixBase<OtherDerived> &other);\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC\n  inline Transform& prescale(const MatrixBase<OtherDerived> &other);\n\n  EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s);\n  EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s);\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC\n  inline Transform& translate(const MatrixBase<OtherDerived> &other);\n\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC\n  inline Transform& pretranslate(const MatrixBase<OtherDerived> &other);\n\n  template<typename RotationType>\n  EIGEN_DEVICE_FUNC\n  inline Transform& rotate(const RotationType& rotation);\n\n  template<typename RotationType>\n  EIGEN_DEVICE_FUNC\n  inline Transform& prerotate(const RotationType& rotation);\n\n  EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy);\n  EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy);\n\n  EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t);\n\n  EIGEN_DEVICE_FUNC\n  inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); }\n\n  EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const;\n\n  EIGEN_DEVICE_FUNC\n  inline Transform& operator=(const UniformScaling<Scalar>& t);\n\n  EIGEN_DEVICE_FUNC\n  inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }\n\n  EIGEN_DEVICE_FUNC\n  inline TransformTimeDiagonalReturnType operator*(const UniformScaling<Scalar>& s) const\n  {\n    TransformTimeDiagonalReturnType res = *this;\n    res.scale(s.factor());\n    return res;\n  }\n\n  EIGEN_DEVICE_FUNC\n  inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }\n\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase<Derived,Dim>& r);\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;\n\n  typedef typename internal::conditional<int(Mode)==Isometry,ConstLinearPart,const LinearMatrixType>::type RotationReturnType;\n  EIGEN_DEVICE_FUNC RotationReturnType rotation() const;\n\n  template<typename RotationMatrixType, typename ScalingMatrixType>\n  EIGEN_DEVICE_FUNC\n  void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const;\n  template<typename ScalingMatrixType, typename RotationMatrixType>\n  EIGEN_DEVICE_FUNC\n  void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const;\n\n  template<typename PositionDerived, typename OrientationType, typename ScaleDerived>\n  EIGEN_DEVICE_FUNC\n  Transform& fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,\n    const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale);\n\n  EIGEN_DEVICE_FUNC\n  inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;\n\n  /** \\returns a const pointer to the column major internal matrix */\n  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }\n  /** \\returns a non-const pointer to the column major internal matrix */\n  EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const\n  { return typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type(*this); }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType>\n  EIGEN_DEVICE_FUNC inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)\n  {\n    check_template_params();\n    m_matrix = other.matrix().template cast<Scalar>();\n  }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return m_matrix.isApprox(other.m_matrix, prec); }\n\n  /** Sets the last row to [0 ... 0 1]\n    */\n  EIGEN_DEVICE_FUNC void makeAffine()\n  {\n    internal::transform_make_affine<int(Mode)>::run(m_matrix);\n  }\n\n  /** \\internal\n    * \\returns the Dim x Dim linear part if the transformation is affine,\n    *          and the HDim x Dim part for projective transformations.\n    */\n  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()\n  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }\n  /** \\internal\n    * \\returns the Dim x Dim linear part if the transformation is affine,\n    *          and the HDim x Dim part for projective transformations.\n    */\n  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const\n  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }\n\n  /** \\internal\n    * \\returns the translation part if the transformation is affine,\n    *          and the last column for projective transformations.\n    */\n  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()\n  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }\n  /** \\internal\n    * \\returns the translation part if the transformation is affine,\n    *          and the last column for projective transformations.\n    */\n  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const\n  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }\n\n\n  #ifdef EIGEN_TRANSFORM_PLUGIN\n  #include EIGEN_TRANSFORM_PLUGIN\n  #endif\n\nprotected:\n  #ifndef EIGEN_PARSED_BY_DOXYGEN\n    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params()\n    {\n      EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS)\n    }\n  #endif\n\n};\n\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,2,Isometry> Isometry2f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,3,Isometry> Isometry3f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,2,Isometry> Isometry2d;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,3,Isometry> Isometry3d;\n\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,2,Affine> Affine2f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,3,Affine> Affine3f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,2,Affine> Affine2d;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,3,Affine> Affine3d;\n\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,2,AffineCompact> AffineCompact2f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,3,AffineCompact> AffineCompact3f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,2,AffineCompact> AffineCompact2d;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,3,AffineCompact> AffineCompact3d;\n\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,2,Projective> Projective2f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<float,3,Projective> Projective3f;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,2,Projective> Projective2d;\n/** \\ingroup Geometry_Module */\ntypedef Transform<double,3,Projective> Projective3d;\n\n/**************************\n*** Optional QT support ***\n**************************/\n\n#ifdef EIGEN_QT_SUPPORT\n/** Initializes \\c *this from a QMatrix assuming the dimension is 2.\n  *\n  * This function is available only if the token EIGEN_QT_SUPPORT is defined.\n  */\ntemplate<typename Scalar, int Dim, int Mode,int Options>\nTransform<Scalar,Dim,Mode,Options>::Transform(const QMatrix& other)\n{\n  check_template_params();\n  *this = other;\n}\n\n/** Set \\c *this from a QMatrix assuming the dimension is 2.\n  *\n  * This function is available only if the token EIGEN_QT_SUPPORT is defined.\n  */\ntemplate<typename Scalar, int Dim, int Mode,int Options>\nTransform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other)\n{\n  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)\n  if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact)))\n    m_matrix << other.m11(), other.m21(), other.dx(),\n                other.m12(), other.m22(), other.dy();\n  else\n    m_matrix << other.m11(), other.m21(), other.dx(),\n                other.m12(), other.m22(), other.dy(),\n                0, 0, 1;\n  return *this;\n}\n\n/** \\returns a QMatrix from \\c *this assuming the dimension is 2.\n  *\n  * \\warning this conversion might loss data if \\c *this is not affine\n  *\n  * This function is available only if the token EIGEN_QT_SUPPORT is defined.\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nQMatrix Transform<Scalar,Dim,Mode,Options>::toQMatrix(void) const\n{\n  check_template_params();\n  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)\n  return QMatrix(m_matrix.coeff(0,0), m_matrix.coeff(1,0),\n                 m_matrix.coeff(0,1), m_matrix.coeff(1,1),\n                 m_matrix.coeff(0,2), m_matrix.coeff(1,2));\n}\n\n/** Initializes \\c *this from a QTransform assuming the dimension is 2.\n  *\n  * This function is available only if the token EIGEN_QT_SUPPORT is defined.\n  */\ntemplate<typename Scalar, int Dim, int Mode,int Options>\nTransform<Scalar,Dim,Mode,Options>::Transform(const QTransform& other)\n{\n  check_template_params();\n  *this = other;\n}\n\n/** Set \\c *this from a QTransform assuming the dimension is 2.\n  *\n  * This function is available only if the token EIGEN_QT_SUPPORT is defined.\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nTransform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QTransform& other)\n{\n  check_template_params();\n  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)\n  if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact)))\n    m_matrix << other.m11(), other.m21(), other.dx(),\n                other.m12(), other.m22(), other.dy();\n  else\n    m_matrix << other.m11(), other.m21(), other.dx(),\n                other.m12(), other.m22(), other.dy(),\n                other.m13(), other.m23(), other.m33();\n  return *this;\n}\n\n/** \\returns a QTransform from \\c *this assuming the dimension is 2.\n  *\n  * This function is available only if the token EIGEN_QT_SUPPORT is defined.\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nQTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const\n{\n  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)\n  if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact)))\n    return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0),\n                      m_matrix.coeff(0,1), m_matrix.coeff(1,1),\n                      m_matrix.coeff(0,2), m_matrix.coeff(1,2));\n  else\n    return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(2,0),\n                      m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(2,1),\n                      m_matrix.coeff(0,2), m_matrix.coeff(1,2), m_matrix.coeff(2,2));\n}\n#endif\n\n/*********************\n*** Procedural API ***\n*********************/\n\n/** Applies on the right the non uniform scale transformation represented\n  * by the vector \\a other to \\c *this and returns a reference to \\c *this.\n  * \\sa prescale()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))\n  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)\n  linearExt().noalias() = (linearExt() * other.asDiagonal());\n  return *this;\n}\n\n/** Applies on the right a uniform scale of a factor \\a c to \\c *this\n  * and returns a reference to \\c *this.\n  * \\sa prescale(Scalar)\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)\n{\n  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)\n  linearExt() *= s;\n  return *this;\n}\n\n/** Applies on the left the non uniform scale transformation represented\n  * by the vector \\a other to \\c *this and returns a reference to \\c *this.\n  * \\sa scale()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &other)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))\n  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)\n  affine().noalias() = (other.asDiagonal() * affine());\n  return *this;\n}\n\n/** Applies on the left a uniform scale of a factor \\a c to \\c *this\n  * and returns a reference to \\c *this.\n  * \\sa scale(Scalar)\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)\n{\n  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)\n  m_matrix.template topRows<Dim>() *= s;\n  return *this;\n}\n\n/** Applies on the right the translation matrix represented by the vector \\a other\n  * to \\c *this and returns a reference to \\c *this.\n  * \\sa pretranslate()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &other)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))\n  translationExt() += linearExt() * other;\n  return *this;\n}\n\n/** Applies on the left the translation matrix represented by the vector \\a other\n  * to \\c *this and returns a reference to \\c *this.\n  * \\sa translate()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))\n  if(EIGEN_CONST_CONDITIONAL(int(Mode)==int(Projective)))\n    affine() += other * m_matrix.row(Dim);\n  else\n    translation() += other;\n  return *this;\n}\n\n/** Applies on the right the rotation represented by the rotation \\a rotation\n  * to \\c *this and returns a reference to \\c *this.\n  *\n  * The template parameter \\a RotationType is the type of the rotation which\n  * must be known by internal::toRotationMatrix<>.\n  *\n  * Natively supported types includes:\n  *   - any scalar (2D),\n  *   - a Dim x Dim matrix expression,\n  *   - a Quaternion (3D),\n  *   - a AngleAxis (3D)\n  *\n  * This mechanism is easily extendable to support user types such as Euler angles,\n  * or a pair of Quaternion for 4D rotations.\n  *\n  * \\sa rotate(Scalar), class Quaternion, class AngleAxis, prerotate(RotationType)\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename RotationType>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)\n{\n  linearExt() *= internal::toRotationMatrix<Scalar,Dim>(rotation);\n  return *this;\n}\n\n/** Applies on the left the rotation represented by the rotation \\a rotation\n  * to \\c *this and returns a reference to \\c *this.\n  *\n  * See rotate() for further details.\n  *\n  * \\sa rotate()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename RotationType>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)\n{\n  m_matrix.template block<Dim,HDim>(0,0) = internal::toRotationMatrix<Scalar,Dim>(rotation)\n                                         * m_matrix.template block<Dim,HDim>(0,0);\n  return *this;\n}\n\n/** Applies on the right the shear transformation represented\n  * by the vector \\a other to \\c *this and returns a reference to \\c *this.\n  * \\warning 2D only.\n  * \\sa preshear()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)\n{\n  EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)\n  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)\n  VectorType tmp = linear().col(0)*sy + linear().col(1);\n  linear() << linear().col(0) + linear().col(1)*sx, tmp;\n  return *this;\n}\n\n/** Applies on the left the shear transformation represented\n  * by the vector \\a other to \\c *this and returns a reference to \\c *this.\n  * \\warning 2D only.\n  * \\sa shear()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)\n{\n  EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)\n  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)\n  m_matrix.template block<Dim,HDim>(0,0) = LinearMatrixType(1, sx, sy, 1) * m_matrix.template block<Dim,HDim>(0,0);\n  return *this;\n}\n\n/******************************************************\n*** Scaling, Translation and Rotation compatibility ***\n******************************************************/\n\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)\n{\n  linear().setIdentity();\n  translation() = t.vector();\n  makeAffine();\n  return *this;\n}\n\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const\n{\n  Transform res = *this;\n  res.translate(t.vector());\n  return res;\n}\n\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)\n{\n  m_matrix.setZero();\n  linear().diagonal().fill(s.factor());\n  makeAffine();\n  return *this;\n}\n\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)\n{\n  linear() = internal::toRotationMatrix<Scalar,Dim>(r);\n  translation().setZero();\n  makeAffine();\n  return *this;\n}\n\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const\n{\n  Transform res = *this;\n  res.rotate(r.derived());\n  return res;\n}\n\n/************************\n*** Special functions ***\n************************/\n\nnamespace internal {\ntemplate<int Mode> struct transform_rotation_impl {\n  template<typename TransformType>\n  EIGEN_DEVICE_FUNC static inline\n  const typename TransformType::LinearMatrixType run(const TransformType& t)\n  {\n    typedef typename TransformType::LinearMatrixType LinearMatrixType;\n    LinearMatrixType result;\n    t.computeRotationScaling(&result, (LinearMatrixType*)0);\n    return result;\n  }\n};\ntemplate<> struct transform_rotation_impl<Isometry> {\n  template<typename TransformType>\n  EIGEN_DEVICE_FUNC static inline\n  typename TransformType::ConstLinearPart run(const TransformType& t)\n  {\n    return t.linear();\n  }\n};\n}\n/** \\returns the rotation part of the transformation\n  *\n  * If Mode==Isometry, then this method is an alias for linear(),\n  * otherwise it calls computeRotationScaling() to extract the rotation\n  * through a SVD decomposition.\n  *\n  * \\svd_module\n  *\n  * \\sa computeRotationScaling(), computeScalingRotation(), class SVD\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC\ntypename Transform<Scalar,Dim,Mode,Options>::RotationReturnType\nTransform<Scalar,Dim,Mode,Options>::rotation() const\n{\n  return internal::transform_rotation_impl<Mode>::run(*this);\n}\n\n\n/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being\n  * not necessarily positive.\n  *\n  * If either pointer is zero, the corresponding computation is skipped.\n  *\n  *\n  *\n  * \\svd_module\n  *\n  * \\sa computeScalingRotation(), rotation(), class SVD\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename RotationMatrixType, typename ScalingMatrixType>\nEIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const\n{\n  // Note that JacobiSVD is faster than BDCSVD for small matrices.\n  JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);\n\n  Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant() < Scalar(0) ? Scalar(-1) : Scalar(1); // so x has absolute value 1\n  VectorType sv(svd.singularValues());\n  sv.coeffRef(Dim-1) *= x;\n  if(scaling) *scaling = svd.matrixV() * sv.asDiagonal() * svd.matrixV().adjoint();\n  if(rotation)\n  {\n    LinearMatrixType m(svd.matrixU());\n    m.col(Dim-1) *= x;\n    *rotation = m * svd.matrixV().adjoint();\n  }\n}\n\n/** decomposes the linear part of the transformation as a product scaling x rotation, the scaling being\n  * not necessarily positive.\n  *\n  * If either pointer is zero, the corresponding computation is skipped.\n  *\n  *\n  *\n  * \\svd_module\n  *\n  * \\sa computeRotationScaling(), rotation(), class SVD\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename ScalingMatrixType, typename RotationMatrixType>\nEIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const\n{\n  // Note that JacobiSVD is faster than BDCSVD for small matrices.\n  JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);\n\n  Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant() < Scalar(0) ? Scalar(-1) : Scalar(1); // so x has absolute value 1\n  VectorType sv(svd.singularValues());\n  sv.coeffRef(Dim-1) *= x;\n  if(scaling) *scaling = svd.matrixU() * sv.asDiagonal() * svd.matrixU().adjoint();\n  if(rotation)\n  {\n    LinearMatrixType m(svd.matrixU());\n    m.col(Dim-1) *= x;\n    *rotation = m * svd.matrixV().adjoint();\n  }\n}\n\n/** Convenient method to set \\c *this from a position, orientation and scale\n  * of a 3D object.\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\ntemplate<typename PositionDerived, typename OrientationType, typename ScaleDerived>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&\nTransform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,\n  const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale)\n{\n  linear() = internal::toRotationMatrix<Scalar,Dim>(orientation);\n  linear() *= scale.asDiagonal();\n  translation() = position;\n  makeAffine();\n  return *this;\n}\n\nnamespace internal {\n\ntemplate<int Mode>\nstruct transform_make_affine\n{\n  template<typename MatrixType>\n  EIGEN_DEVICE_FUNC static void run(MatrixType &mat)\n  {\n    static const int Dim = MatrixType::ColsAtCompileTime-1;\n    mat.template block<1,Dim>(Dim,0).setZero();\n    mat.coeffRef(Dim,Dim) = typename MatrixType::Scalar(1);\n  }\n};\n\ntemplate<>\nstruct transform_make_affine<AffineCompact>\n{\n  template<typename MatrixType> EIGEN_DEVICE_FUNC static void run(MatrixType &) { }\n};\n\n// selector needed to avoid taking the inverse of a 3x4 matrix\ntemplate<typename TransformType, int Mode=TransformType::Mode>\nstruct projective_transform_inverse\n{\n  EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&)\n  {}\n};\n\ntemplate<typename TransformType>\nstruct projective_transform_inverse<TransformType, Projective>\n{\n  EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res)\n  {\n    res.matrix() = m.matrix().inverse();\n  }\n};\n\n} // end namespace internal\n\n\n/**\n  *\n  * \\returns the inverse transformation according to some given knowledge\n  * on \\c *this.\n  *\n  * \\param hint allows to optimize the inversion process when the transformation\n  * is known to be not a general transformation (optional). The possible values are:\n  *  - #Projective if the transformation is not necessarily affine, i.e., if the\n  *    last row is not guaranteed to be [0 ... 0 1]\n  *  - #Affine if the last row can be assumed to be [0 ... 0 1]\n  *  - #Isometry if the transformation is only a concatenations of translations\n  *    and rotations.\n  *  The default is the template class parameter \\c Mode.\n  *\n  * \\warning unless \\a traits is always set to NoShear or NoScaling, this function\n  * requires the generic inverse method of MatrixBase defined in the LU module. If\n  * you forget to include this module, then you will get hard to debug linking errors.\n  *\n  * \\sa MatrixBase::inverse()\n  */\ntemplate<typename Scalar, int Dim, int Mode, int Options>\nEIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>\nTransform<Scalar,Dim,Mode,Options>::inverse(TransformTraits hint) const\n{\n  Transform res;\n  if (hint == Projective)\n  {\n    internal::projective_transform_inverse<Transform>::run(*this, res);\n  }\n  else\n  {\n    if (hint == Isometry)\n    {\n      res.matrix().template topLeftCorner<Dim,Dim>() = linear().transpose();\n    }\n    else if(hint&Affine)\n    {\n      res.matrix().template topLeftCorner<Dim,Dim>() = linear().inverse();\n    }\n    else\n    {\n      eigen_assert(false && \"Invalid transform traits in Transform::Inverse\");\n    }\n    // translation and remaining parts\n    res.matrix().template topRightCorner<Dim,1>()\n      = - res.matrix().template topLeftCorner<Dim,Dim>() * translation();\n    res.makeAffine(); // we do need this, because in the beginning res is uninitialized\n  }\n  return res;\n}\n\nnamespace internal {\n\n/*****************************************************\n*** Specializations of take affine part            ***\n*****************************************************/\n\ntemplate<typename TransformType> struct transform_take_affine_part {\n  typedef typename TransformType::MatrixType MatrixType;\n  typedef typename TransformType::AffinePart AffinePart;\n  typedef typename TransformType::ConstAffinePart ConstAffinePart;\n  static inline AffinePart run(MatrixType& m)\n  { return m.template block<TransformType::Dim,TransformType::HDim>(0,0); }\n  static inline ConstAffinePart run(const MatrixType& m)\n  { return m.template block<TransformType::Dim,TransformType::HDim>(0,0); }\n};\n\ntemplate<typename Scalar, int Dim, int Options>\nstruct transform_take_affine_part<Transform<Scalar,Dim,AffineCompact, Options> > {\n  typedef typename Transform<Scalar,Dim,AffineCompact,Options>::MatrixType MatrixType;\n  static inline MatrixType& run(MatrixType& m) { return m; }\n  static inline const MatrixType& run(const MatrixType& m) { return m; }\n};\n\n/*****************************************************\n*** Specializations of construct from matrix       ***\n*****************************************************/\n\ntemplate<typename Other, int Mode, int Options, int Dim, int HDim>\nstruct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,Dim>\n{\n  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)\n  {\n    transform->linear() = other;\n    transform->translation().setZero();\n    transform->makeAffine();\n  }\n};\n\ntemplate<typename Other, int Mode, int Options, int Dim, int HDim>\nstruct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,HDim>\n{\n  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)\n  {\n    transform->affine() = other;\n    transform->makeAffine();\n  }\n};\n\ntemplate<typename Other, int Mode, int Options, int Dim, int HDim>\nstruct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, HDim,HDim>\n{\n  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)\n  { transform->matrix() = other; }\n};\n\ntemplate<typename Other, int Options, int Dim, int HDim>\nstruct transform_construct_from_matrix<Other, AffineCompact,Options,Dim,HDim, HDim,HDim>\n{\n  static inline void run(Transform<typename Other::Scalar,Dim,AffineCompact,Options> *transform, const Other& other)\n  { transform->matrix() = other.template block<Dim,HDim>(0,0); }\n};\n\n/**********************************************************\n***   Specializations of operator* with rhs EigenBase   ***\n**********************************************************/\n\ntemplate<int LhsMode,int RhsMode>\nstruct transform_product_result\n{\n  enum\n  {\n    Mode =\n      (LhsMode == (int)Projective    || RhsMode == (int)Projective    ) ? Projective :\n      (LhsMode == (int)Affine        || RhsMode == (int)Affine        ) ? Affine :\n      (LhsMode == (int)AffineCompact || RhsMode == (int)AffineCompact ) ? AffineCompact :\n      (LhsMode == (int)Isometry      || RhsMode == (int)Isometry      ) ? Isometry : Projective\n  };\n};\n\ntemplate< typename TransformType, typename MatrixType, int RhsCols>\nstruct transform_right_product_impl< TransformType, MatrixType, 0, RhsCols>\n{\n  typedef typename MatrixType::PlainObject ResultType;\n\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)\n  {\n    return T.matrix() * other;\n  }\n};\n\ntemplate< typename TransformType, typename MatrixType, int RhsCols>\nstruct transform_right_product_impl< TransformType, MatrixType, 1, RhsCols>\n{\n  enum {\n    Dim = TransformType::Dim,\n    HDim = TransformType::HDim,\n    OtherRows = MatrixType::RowsAtCompileTime,\n    OtherCols = MatrixType::ColsAtCompileTime\n  };\n\n  typedef typename MatrixType::PlainObject ResultType;\n\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)\n  {\n    EIGEN_STATIC_ASSERT(OtherRows==HDim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);\n\n    typedef Block<ResultType, Dim, OtherCols, int(MatrixType::RowsAtCompileTime)==Dim> TopLeftLhs;\n\n    ResultType res(other.rows(),other.cols());\n    TopLeftLhs(res, 0, 0, Dim, other.cols()).noalias() = T.affine() * other;\n    res.row(OtherRows-1) = other.row(OtherRows-1);\n\n    return res;\n  }\n};\n\ntemplate< typename TransformType, typename MatrixType, int RhsCols>\nstruct transform_right_product_impl< TransformType, MatrixType, 2, RhsCols>\n{\n  enum {\n    Dim = TransformType::Dim,\n    HDim = TransformType::HDim,\n    OtherRows = MatrixType::RowsAtCompileTime,\n    OtherCols = MatrixType::ColsAtCompileTime\n  };\n\n  typedef typename MatrixType::PlainObject ResultType;\n\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)\n  {\n    EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);\n\n    typedef Block<ResultType, Dim, OtherCols, true> TopLeftLhs;\n    ResultType res(Replicate<typename TransformType::ConstTranslationPart, 1, OtherCols>(T.translation(),1,other.cols()));\n    TopLeftLhs(res, 0, 0, Dim, other.cols()).noalias() += T.linear() * other;\n\n    return res;\n  }\n};\n\ntemplate< typename TransformType, typename MatrixType >\nstruct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is a vector of size Dim\n{\n  typedef typename TransformType::MatrixType TransformMatrix;\n  enum {\n    Dim = TransformType::Dim,\n    HDim = TransformType::HDim,\n    OtherRows = MatrixType::RowsAtCompileTime,\n    WorkingRows = EIGEN_PLAIN_ENUM_MIN(TransformMatrix::RowsAtCompileTime,HDim)\n  };\n\n  typedef typename MatrixType::PlainObject ResultType;\n\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)\n  {\n    EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);\n\n    Matrix<typename ResultType::Scalar, Dim+1, 1> rhs;\n    rhs.template head<Dim>() = other; rhs[Dim] = typename ResultType::Scalar(1);\n    Matrix<typename ResultType::Scalar, WorkingRows, 1> res(T.matrix() * rhs);\n    return res.template head<Dim>();\n  }\n};\n\n/**********************************************************\n***   Specializations of operator* with lhs EigenBase   ***\n**********************************************************/\n\n// generic HDim x HDim matrix * T => Projective\ntemplate<typename Other,int Mode, int Options, int Dim, int HDim>\nstruct transform_left_product_impl<Other,Mode,Options,Dim,HDim, HDim,HDim>\n{\n  typedef Transform<typename Other::Scalar,Dim,Mode,Options> TransformType;\n  typedef typename TransformType::MatrixType MatrixType;\n  typedef Transform<typename Other::Scalar,Dim,Projective,Options> ResultType;\n  static ResultType run(const Other& other,const TransformType& tr)\n  { return ResultType(other * tr.matrix()); }\n};\n\n// generic HDim x HDim matrix * AffineCompact => Projective\ntemplate<typename Other, int Options, int Dim, int HDim>\nstruct transform_left_product_impl<Other,AffineCompact,Options,Dim,HDim, HDim,HDim>\n{\n  typedef Transform<typename Other::Scalar,Dim,AffineCompact,Options> TransformType;\n  typedef typename TransformType::MatrixType MatrixType;\n  typedef Transform<typename Other::Scalar,Dim,Projective,Options> ResultType;\n  static ResultType run(const Other& other,const TransformType& tr)\n  {\n    ResultType res;\n    res.matrix().noalias() = other.template block<HDim,Dim>(0,0) * tr.matrix();\n    res.matrix().col(Dim) += other.col(Dim);\n    return res;\n  }\n};\n\n// affine matrix * T\ntemplate<typename Other,int Mode, int Options, int Dim, int HDim>\nstruct transform_left_product_impl<Other,Mode,Options,Dim,HDim, Dim,HDim>\n{\n  typedef Transform<typename Other::Scalar,Dim,Mode,Options> TransformType;\n  typedef typename TransformType::MatrixType MatrixType;\n  typedef TransformType ResultType;\n  static ResultType run(const Other& other,const TransformType& tr)\n  {\n    ResultType res;\n    res.affine().noalias() = other * tr.matrix();\n    res.matrix().row(Dim) = tr.matrix().row(Dim);\n    return res;\n  }\n};\n\n// affine matrix * AffineCompact\ntemplate<typename Other, int Options, int Dim, int HDim>\nstruct transform_left_product_impl<Other,AffineCompact,Options,Dim,HDim, Dim,HDim>\n{\n  typedef Transform<typename Other::Scalar,Dim,AffineCompact,Options> TransformType;\n  typedef typename TransformType::MatrixType MatrixType;\n  typedef TransformType ResultType;\n  static ResultType run(const Other& other,const TransformType& tr)\n  {\n    ResultType res;\n    res.matrix().noalias() = other.template block<Dim,Dim>(0,0) * tr.matrix();\n    res.translation() += other.col(Dim);\n    return res;\n  }\n};\n\n// linear matrix * T\ntemplate<typename Other,int Mode, int Options, int Dim, int HDim>\nstruct transform_left_product_impl<Other,Mode,Options,Dim,HDim, Dim,Dim>\n{\n  typedef Transform<typename Other::Scalar,Dim,Mode,Options> TransformType;\n  typedef typename TransformType::MatrixType MatrixType;\n  typedef TransformType ResultType;\n  static ResultType run(const Other& other, const TransformType& tr)\n  {\n    TransformType res;\n    if(Mode!=int(AffineCompact))\n      res.matrix().row(Dim) = tr.matrix().row(Dim);\n    res.matrix().template topRows<Dim>().noalias()\n      = other * tr.matrix().template topRows<Dim>();\n    return res;\n  }\n};\n\n/**********************************************************\n*** Specializations of operator* with another Transform ***\n**********************************************************/\n\ntemplate<typename Scalar, int Dim, int LhsMode, int LhsOptions, int RhsMode, int RhsOptions>\nstruct transform_transform_product_impl<Transform<Scalar,Dim,LhsMode,LhsOptions>,Transform<Scalar,Dim,RhsMode,RhsOptions>,false >\n{\n  enum { ResultMode = transform_product_result<LhsMode,RhsMode>::Mode };\n  typedef Transform<Scalar,Dim,LhsMode,LhsOptions> Lhs;\n  typedef Transform<Scalar,Dim,RhsMode,RhsOptions> Rhs;\n  typedef Transform<Scalar,Dim,ResultMode,LhsOptions> ResultType;\n  static ResultType run(const Lhs& lhs, const Rhs& rhs)\n  {\n    ResultType res;\n    res.linear() = lhs.linear() * rhs.linear();\n    res.translation() = lhs.linear() * rhs.translation() + lhs.translation();\n    res.makeAffine();\n    return res;\n  }\n};\n\ntemplate<typename Scalar, int Dim, int LhsMode, int LhsOptions, int RhsMode, int RhsOptions>\nstruct transform_transform_product_impl<Transform<Scalar,Dim,LhsMode,LhsOptions>,Transform<Scalar,Dim,RhsMode,RhsOptions>,true >\n{\n  typedef Transform<Scalar,Dim,LhsMode,LhsOptions> Lhs;\n  typedef Transform<Scalar,Dim,RhsMode,RhsOptions> Rhs;\n  typedef Transform<Scalar,Dim,Projective> ResultType;\n  static ResultType run(const Lhs& lhs, const Rhs& rhs)\n  {\n    return ResultType( lhs.matrix() * rhs.matrix() );\n  }\n};\n\ntemplate<typename Scalar, int Dim, int LhsOptions, int RhsOptions>\nstruct transform_transform_product_impl<Transform<Scalar,Dim,AffineCompact,LhsOptions>,Transform<Scalar,Dim,Projective,RhsOptions>,true >\n{\n  typedef Transform<Scalar,Dim,AffineCompact,LhsOptions> Lhs;\n  typedef Transform<Scalar,Dim,Projective,RhsOptions> Rhs;\n  typedef Transform<Scalar,Dim,Projective> ResultType;\n  static ResultType run(const Lhs& lhs, const Rhs& rhs)\n  {\n    ResultType res;\n    res.matrix().template topRows<Dim>() = lhs.matrix() * rhs.matrix();\n    res.matrix().row(Dim) = rhs.matrix().row(Dim);\n    return res;\n  }\n};\n\ntemplate<typename Scalar, int Dim, int LhsOptions, int RhsOptions>\nstruct transform_transform_product_impl<Transform<Scalar,Dim,Projective,LhsOptions>,Transform<Scalar,Dim,AffineCompact,RhsOptions>,true >\n{\n  typedef Transform<Scalar,Dim,Projective,LhsOptions> Lhs;\n  typedef Transform<Scalar,Dim,AffineCompact,RhsOptions> Rhs;\n  typedef Transform<Scalar,Dim,Projective> ResultType;\n  static ResultType run(const Lhs& lhs, const Rhs& rhs)\n  {\n    ResultType res(lhs.matrix().template leftCols<Dim>() * rhs.matrix());\n    res.matrix().col(Dim) += lhs.matrix().col(Dim);\n    return res;\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRANSFORM_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Translation.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_TRANSLATION_H\n#define EIGEN_TRANSLATION_H\n\nnamespace Eigen { \n\n/** \\geometry_module \\ingroup Geometry_Module\n  *\n  * \\class Translation\n  *\n  * \\brief Represents a translation transformation\n  *\n  * \\tparam _Scalar the scalar type, i.e., the type of the coefficients.\n  * \\tparam _Dim the  dimension of the space, can be a compile time value or Dynamic\n  *\n  * \\note This class is not aimed to be used to store a translation transformation,\n  * but rather to make easier the constructions and updates of Transform objects.\n  *\n  * \\sa class Scaling, class Transform\n  */\ntemplate<typename _Scalar, int _Dim>\nclass Translation\n{\npublic:\n  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim)\n  /** dimension of the space */\n  enum { Dim = _Dim };\n  /** the scalar type of the coefficients */\n  typedef _Scalar Scalar;\n  /** corresponding vector type */\n  typedef Matrix<Scalar,Dim,1> VectorType;\n  /** corresponding linear transformation matrix type */\n  typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;\n  /** corresponding affine transformation type */\n  typedef Transform<Scalar,Dim,Affine> AffineTransformType;\n  /** corresponding isometric transformation type */\n  typedef Transform<Scalar,Dim,Isometry> IsometryTransformType;\n\nprotected:\n\n  VectorType m_coeffs;\n\npublic:\n\n  /** Default constructor without initialization. */\n  EIGEN_DEVICE_FUNC Translation() {}\n  /**  */\n  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy)\n  {\n    eigen_assert(Dim==2);\n    m_coeffs.x() = sx;\n    m_coeffs.y() = sy;\n  }\n  /**  */\n  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)\n  {\n    eigen_assert(Dim==3);\n    m_coeffs.x() = sx;\n    m_coeffs.y() = sy;\n    m_coeffs.z() = sz;\n  }\n  /** Constructs and initialize the translation transformation from a vector of translation coefficients */\n  EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}\n\n  /** \\brief Returns the x-translation by value. **/\n  EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }\n  /** \\brief Returns the y-translation by value. **/\n  EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }\n  /** \\brief Returns the z-translation by value. **/\n  EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }\n\n  /** \\brief Returns the x-translation as a reference. **/\n  EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }\n  /** \\brief Returns the y-translation as a reference. **/\n  EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }\n  /** \\brief Returns the z-translation as a reference. **/\n  EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }\n\n  EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }\n  EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }\n\n  EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; }\n  EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; }\n\n  /** Concatenates two translation */\n  EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const\n  { return Translation(m_coeffs + other.m_coeffs); }\n\n  /** Concatenates a translation and a uniform scaling */\n  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;\n\n  /** Concatenates a translation and a linear transformation */\n  template<typename OtherDerived>\n  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;\n\n  /** Concatenates a translation and a rotation */\n  template<typename Derived>\n  EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const\n  { return *this * IsometryTransformType(r); }\n\n  /** \\returns the concatenation of a linear transformation \\a l with the translation \\a t */\n  // its a nightmare to define a templated friend function outside its declaration\n  template<typename OtherDerived> friend\n  EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)\n  {\n    AffineTransformType res;\n    res.matrix().setZero();\n    res.linear() = linear.derived();\n    res.translation() = linear.derived() * t.m_coeffs;\n    res.matrix().row(Dim).setZero();\n    res(Dim,Dim) = Scalar(1);\n    return res;\n  }\n\n  /** Concatenates a translation and a transformation */\n  template<int Mode, int Options>\n  EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const\n  {\n    Transform<Scalar,Dim,Mode> res = t;\n    res.pretranslate(m_coeffs);\n    return res;\n  }\n\n  /** Applies translation to vector */\n  template<typename Derived>\n  inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type\n  operator* (const MatrixBase<Derived>& vec) const\n  { return m_coeffs + vec.derived(); }\n\n  /** \\returns the inverse translation (opposite) */\n  Translation inverse() const { return Translation(-m_coeffs); }\n\n  static const Translation Identity() { return Translation(VectorType::Zero()); }\n\n  /** \\returns \\c *this with scalar type casted to \\a NewScalarType\n    *\n    * Note that if \\a NewScalarType is equal to the current scalar type of \\c *this\n    * then this function smartly returns a const reference to \\c *this.\n    */\n  template<typename NewScalarType>\n  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const\n  { return typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }\n\n  /** Copy constructor with scalar type conversion */\n  template<typename OtherScalarType>\n  EIGEN_DEVICE_FUNC inline explicit Translation(const Translation<OtherScalarType,Dim>& other)\n  { m_coeffs = other.vector().template cast<Scalar>(); }\n\n  /** \\returns \\c true if \\c *this is approximately equal to \\a other, within the precision\n    * determined by \\a prec.\n    *\n    * \\sa MatrixBase::isApprox() */\n  EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const\n  { return m_coeffs.isApprox(other.m_coeffs, prec); }\n\n};\n\n/** \\addtogroup Geometry_Module */\n//@{\ntypedef Translation<float, 2> Translation2f;\ntypedef Translation<double,2> Translation2d;\ntypedef Translation<float, 3> Translation3f;\ntypedef Translation<double,3> Translation3d;\n//@}\n\ntemplate<typename Scalar, int Dim>\nEIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType\nTranslation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const\n{\n  AffineTransformType res;\n  res.matrix().setZero();\n  res.linear().diagonal().fill(other.factor());\n  res.translation() = m_coeffs;\n  res(Dim,Dim) = Scalar(1);\n  return res;\n}\n\ntemplate<typename Scalar, int Dim>\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType\nTranslation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const\n{\n  AffineTransformType res;\n  res.matrix().setZero();\n  res.linear() = linear.derived();\n  res.translation() = m_coeffs;\n  res.matrix().row(Dim).setZero();\n  res(Dim,Dim) = Scalar(1);\n  return res;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_TRANSLATION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/Umeyama.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_UMEYAMA_H\n#define EIGEN_UMEYAMA_H\n\n// This file requires the user to include \n// * Eigen/Core\n// * Eigen/LU \n// * Eigen/SVD\n// * Eigen/Array\n\nnamespace Eigen { \n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n\n// These helpers are required since it allows to use mixed types as parameters\n// for the Umeyama. The problem with mixed parameters is that the return type\n// cannot trivially be deduced when float and double types are mixed.\nnamespace internal {\n\n// Compile time return type deduction for different MatrixBase types.\n// Different means here different alignment and parameters but the same underlying\n// real scalar type.\ntemplate<typename MatrixType, typename OtherMatrixType>\nstruct umeyama_transform_matrix_type\n{\n  enum {\n    MinRowsAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(MatrixType::RowsAtCompileTime, OtherMatrixType::RowsAtCompileTime),\n\n    // When possible we want to choose some small fixed size value since the result\n    // is likely to fit on the stack. So here, EIGEN_SIZE_MIN_PREFER_DYNAMIC is not what we want.\n    HomogeneousDimension = int(MinRowsAtCompileTime) == Dynamic ? Dynamic : int(MinRowsAtCompileTime)+1\n  };\n\n  typedef Matrix<typename traits<MatrixType>::Scalar,\n    HomogeneousDimension,\n    HomogeneousDimension,\n    AutoAlign | (traits<MatrixType>::Flags & RowMajorBit ? RowMajor : ColMajor),\n    HomogeneousDimension,\n    HomogeneousDimension\n  > type;\n};\n\n}\n\n#endif\n\n/**\n* \\geometry_module \\ingroup Geometry_Module\n*\n* \\brief Returns the transformation between two point sets.\n*\n* The algorithm is based on:\n* \"Least-squares estimation of transformation parameters between two point patterns\",\n* Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573\n*\n* It estimates parameters \\f$ c, \\mathbf{R}, \\f$ and \\f$ \\mathbf{t} \\f$ such that\n* \\f{align*}\n*   \\frac{1}{n} \\sum_{i=1}^n \\vert\\vert y_i - (c\\mathbf{R}x_i + \\mathbf{t}) \\vert\\vert_2^2\n* \\f}\n* is minimized.\n*\n* The algorithm is based on the analysis of the covariance matrix\n* \\f$ \\Sigma_{\\mathbf{x}\\mathbf{y}} \\in \\mathbb{R}^{d \\times d} \\f$\n* of the input point sets \\f$ \\mathbf{x} \\f$ and \\f$ \\mathbf{y} \\f$ where \n* \\f$d\\f$ is corresponding to the dimension (which is typically small).\n* The analysis is involving the SVD having a complexity of \\f$O(d^3)\\f$\n* though the actual computational effort lies in the covariance\n* matrix computation which has an asymptotic lower bound of \\f$O(dm)\\f$ when \n* the input point sets have dimension \\f$d \\times m\\f$.\n*\n* Currently the method is working only for floating point matrices.\n*\n* \\todo Should the return type of umeyama() become a Transform?\n*\n* \\param src Source points \\f$ \\mathbf{x} = \\left( x_1, \\hdots, x_n \\right) \\f$.\n* \\param dst Destination points \\f$ \\mathbf{y} = \\left( y_1, \\hdots, y_n \\right) \\f$.\n* \\param with_scaling Sets \\f$ c=1 \\f$ when <code>false</code> is passed.\n* \\return The homogeneous transformation \n* \\f{align*}\n*   T = \\begin{bmatrix} c\\mathbf{R} & \\mathbf{t} \\\\ \\mathbf{0} & 1 \\end{bmatrix}\n* \\f}\n* minimizing the residual above. This transformation is always returned as an \n* Eigen::Matrix.\n*/\ntemplate <typename Derived, typename OtherDerived>\ntypename internal::umeyama_transform_matrix_type<Derived, OtherDerived>::type\numeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, bool with_scaling = true)\n{\n  typedef typename internal::umeyama_transform_matrix_type<Derived, OtherDerived>::type TransformationMatrixType;\n  typedef typename internal::traits<TransformationMatrixType>::Scalar Scalar;\n  typedef typename NumTraits<Scalar>::Real RealScalar;\n\n  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL)\n  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename internal::traits<OtherDerived>::Scalar>::value),\n    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n\n  enum { Dimension = EIGEN_SIZE_MIN_PREFER_DYNAMIC(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) };\n\n  typedef Matrix<Scalar, Dimension, 1> VectorType;\n  typedef Matrix<Scalar, Dimension, Dimension> MatrixType;\n  typedef typename internal::plain_matrix_type_row_major<Derived>::type RowMajorMatrixType;\n\n  const Index m = src.rows(); // dimension\n  const Index n = src.cols(); // number of measurements\n\n  // required for demeaning ...\n  const RealScalar one_over_n = RealScalar(1) / static_cast<RealScalar>(n);\n\n  // computation of mean\n  const VectorType src_mean = src.rowwise().sum() * one_over_n;\n  const VectorType dst_mean = dst.rowwise().sum() * one_over_n;\n\n  // demeaning of src and dst points\n  const RowMajorMatrixType src_demean = src.colwise() - src_mean;\n  const RowMajorMatrixType dst_demean = dst.colwise() - dst_mean;\n\n  // Eq. (36)-(37)\n  const Scalar src_var = src_demean.rowwise().squaredNorm().sum() * one_over_n;\n\n  // Eq. (38)\n  const MatrixType sigma = one_over_n * dst_demean * src_demean.transpose();\n\n  JacobiSVD<MatrixType> svd(sigma, ComputeFullU | ComputeFullV);\n\n  // Initialize the resulting transformation with an identity matrix...\n  TransformationMatrixType Rt = TransformationMatrixType::Identity(m+1,m+1);\n\n  // Eq. (39)\n  VectorType S = VectorType::Ones(m);\n\n  if  ( svd.matrixU().determinant() * svd.matrixV().determinant() < 0 )\n    S(m-1) = -1;\n\n  // Eq. (40) and (43)\n  Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose();\n\n  if (with_scaling)\n  {\n    // Eq. (42)\n    const Scalar c = Scalar(1)/src_var * svd.singularValues().dot(S);\n\n    // Eq. (41)\n    Rt.col(m).head(m) = dst_mean;\n    Rt.col(m).head(m).noalias() -= c*Rt.topLeftCorner(m,m)*src_mean;\n    Rt.block(0,0,m,m) *= c;\n  }\n  else\n  {\n    Rt.col(m).head(m) = dst_mean;\n    Rt.col(m).head(m).noalias() -= Rt.topLeftCorner(m,m)*src_mean;\n  }\n\n  return Rt;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_UMEYAMA_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Geometry/arch/Geometry_SIMD.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>\n// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_GEOMETRY_SIMD_H\n#define EIGEN_GEOMETRY_SIMD_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<class Derived, class OtherDerived>\nstruct quat_product<Architecture::Target, Derived, OtherDerived, float>\n{\n  enum {\n    AAlignment = traits<Derived>::Alignment,\n    BAlignment = traits<OtherDerived>::Alignment,\n    ResAlignment = traits<Quaternion<float> >::Alignment\n  };\n  static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)\n  {\n    evaluator<typename Derived::Coefficients> ae(_a.coeffs());\n    evaluator<typename OtherDerived::Coefficients> be(_b.coeffs());\n    Quaternion<float> res;\n    const float neg_zero = numext::bit_cast<float>(0x80000000u);\n    const float arr[4] = {0.f, 0.f, 0.f, neg_zero};\n    const Packet4f mask = ploadu<Packet4f>(arr);\n    Packet4f a = ae.template packet<AAlignment,Packet4f>(0);\n    Packet4f b = be.template packet<BAlignment,Packet4f>(0);\n    Packet4f s1 = pmul(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));\n    Packet4f s2 = pmul(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));\n    pstoret<float,Packet4f,ResAlignment>(\n              &res.x(),\n              padd(psub(pmul(a,vec4f_swizzle1(b,3,3,3,3)),\n                                    pmul(vec4f_swizzle1(a,2,0,1,0),\n                                               vec4f_swizzle1(b,1,2,0,0))),\n                         pxor(mask,padd(s1,s2))));\n    \n    return res;\n  }\n};\n\ntemplate<class Derived>\nstruct quat_conj<Architecture::Target, Derived, float>\n{\n  enum {\n    ResAlignment = traits<Quaternion<float> >::Alignment\n  };\n  static inline Quaternion<float> run(const QuaternionBase<Derived>& q)\n  {\n    evaluator<typename Derived::Coefficients> qe(q.coeffs());\n    Quaternion<float> res;\n    const float neg_zero = numext::bit_cast<float>(0x80000000u);\n    const float arr[4] = {neg_zero, neg_zero, neg_zero,0.f};\n    const Packet4f mask = ploadu<Packet4f>(arr);\n    pstoret<float,Packet4f,ResAlignment>(&res.x(), pxor(mask, qe.template packet<traits<Derived>::Alignment,Packet4f>(0)));\n    return res;\n  }\n};\n\n\ntemplate<typename VectorLhs,typename VectorRhs>\nstruct cross3_impl<Architecture::Target,VectorLhs,VectorRhs,float,true>\n{\n  enum {\n    ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment\n  };\n  static inline typename plain_matrix_type<VectorLhs>::type\n  run(const VectorLhs& lhs, const VectorRhs& rhs)\n  {\n    evaluator<VectorLhs> lhs_eval(lhs);\n    evaluator<VectorRhs> rhs_eval(rhs);\n    Packet4f a = lhs_eval.template packet<traits<VectorLhs>::Alignment,Packet4f>(0);\n    Packet4f b = rhs_eval.template packet<traits<VectorRhs>::Alignment,Packet4f>(0);\n    Packet4f mul1 = pmul(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));\n    Packet4f mul2 = pmul(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));\n    typename plain_matrix_type<VectorLhs>::type res;\n    pstoret<float,Packet4f,ResAlignment>(&res.x(),psub(mul1,mul2));\n    return res;\n  }\n};\n\n\n\n#if (defined EIGEN_VECTORIZE_SSE) || (EIGEN_ARCH_ARM64)\n\ntemplate<class Derived, class OtherDerived>\nstruct quat_product<Architecture::Target, Derived, OtherDerived, double>\n{\n  enum {\n    BAlignment = traits<OtherDerived>::Alignment,\n    ResAlignment = traits<Quaternion<double> >::Alignment\n  };\n\n  static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)\n  {\n  Quaternion<double> res;\n\n  evaluator<typename Derived::Coefficients> ae(_a.coeffs());\n  evaluator<typename OtherDerived::Coefficients> be(_b.coeffs());\n\n  const double* a = _a.coeffs().data();\n  Packet2d b_xy = be.template packet<BAlignment,Packet2d>(0);\n  Packet2d b_zw = be.template packet<BAlignment,Packet2d>(2);\n  Packet2d a_xx = pset1<Packet2d>(a[0]);\n  Packet2d a_yy = pset1<Packet2d>(a[1]);\n  Packet2d a_zz = pset1<Packet2d>(a[2]);\n  Packet2d a_ww = pset1<Packet2d>(a[3]);\n\n  // two temporaries:\n  Packet2d t1, t2;\n\n  /*\n   * t1 = ww*xy + yy*zw\n   * t2 = zz*xy - xx*zw\n   * res.xy = t1 +/- swap(t2)\n   */\n  t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw));\n  t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));\n  pstoret<double,Packet2d,ResAlignment>(&res.x(), paddsub(t1, preverse(t2)));\n  \n  /*\n   * t1 = ww*zw - yy*xy\n   * t2 = zz*zw + xx*xy\n   * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)\n   */\n  t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy));\n  t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));\n  pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(paddsub(preverse(t1), t2)));\n\n  return res;\n}\n};\n\ntemplate<class Derived>\nstruct quat_conj<Architecture::Target, Derived, double>\n{\n  enum {\n    ResAlignment = traits<Quaternion<double> >::Alignment\n  };\n  static inline Quaternion<double> run(const QuaternionBase<Derived>& q)\n  {\n    evaluator<typename Derived::Coefficients> qe(q.coeffs());\n    Quaternion<double> res;\n    const double neg_zero = numext::bit_cast<double>(0x8000000000000000ull);\n    const double arr1[2] = {neg_zero, neg_zero};\n    const double arr2[2] = {neg_zero,  0.0};\n    const Packet2d mask0 = ploadu<Packet2d>(arr1);\n    const Packet2d mask2 = ploadu<Packet2d>(arr2);\n    pstoret<double,Packet2d,ResAlignment>(&res.x(), pxor(mask0, qe.template packet<traits<Derived>::Alignment,Packet2d>(0)));\n    pstoret<double,Packet2d,ResAlignment>(&res.z(), pxor(mask2, qe.template packet<traits<Derived>::Alignment,Packet2d>(2)));\n    return res;\n  }\n};\n\n#endif // end EIGEN_VECTORIZE_SSE_OR_EIGEN_ARCH_ARM64\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_GEOMETRY_SIMD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Householder/BlockHouseholder.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Vincent Lejeune\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BLOCK_HOUSEHOLDER_H\n#define EIGEN_BLOCK_HOUSEHOLDER_H\n\n// This file contains some helper function to deal with block householder reflectors\n\nnamespace Eigen { \n\nnamespace internal {\n  \n/** \\internal */\n// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>\n// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)\n// {\n//   typedef typename VectorsType::Scalar Scalar;\n//   const Index nbVecs = vectors.cols();\n//   eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);\n// \n//   for(Index i = 0; i < nbVecs; i++)\n//   {\n//     Index rs = vectors.rows() - i;\n//     // Warning, note that hCoeffs may alias with vectors.\n//     // It is then necessary to copy it before modifying vectors(i,i). \n//     typename CoeffsType::Scalar h = hCoeffs(i);\n//     // This hack permits to pass trough nested Block<> and Transpose<> expressions.\n//     Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);\n//     Scalar Vii = *Vii_ptr;\n//     *Vii_ptr = Scalar(1);\n//     triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()\n//                                        * vectors.col(i).tail(rs);\n//     *Vii_ptr = Vii;\n//     // FIXME add .noalias() once the triangular product can work inplace\n//     triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()\n//                              * triFactor.col(i).head(i);\n//     triFactor(i,i) = hCoeffs(i);\n//   }\n// }\n\n/** \\internal */\n// This variant avoid modifications in vectors\ntemplate<typename TriangularFactorType,typename VectorsType,typename CoeffsType>\nvoid make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)\n{\n  const Index nbVecs = vectors.cols();\n  eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);\n\n  for(Index i = nbVecs-1; i >=0 ; --i)\n  {\n    Index rs = vectors.rows() - i - 1;\n    Index rt = nbVecs-i-1;\n\n    if(rt>0)\n    {\n      triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()\n                                                        * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();\n            \n      // FIXME use the following line with .noalias() once the triangular product can work inplace\n      // triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();\n      for(Index j=nbVecs-1; j>i; --j)\n      {\n        typename TriangularFactorType::Scalar z = triFactor(i,j);\n        triFactor(i,j) = z * triFactor(j,j);\n        if(nbVecs-j-1>0)\n          triFactor.row(i).tail(nbVecs-j-1) += z * triFactor.row(j).tail(nbVecs-j-1);\n      }\n      \n    }\n    triFactor(i,i) = hCoeffs(i);\n  }\n}\n\n/** \\internal\n  * if forward then perform   mat = H0 * H1 * H2 * mat\n  * otherwise perform         mat = H2 * H1 * H0 * mat\n  */\ntemplate<typename MatrixType,typename VectorsType,typename CoeffsType>\nvoid apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)\n{\n  enum { TFactorSize = MatrixType::ColsAtCompileTime };\n  Index nbVecs = vectors.cols();\n  Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);\n  \n  if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);\n  else        make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());  \n  const TriangularView<const VectorsType, UnitLower> V(vectors);\n\n  // A -= V T V^* A\n  Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,\n         (VectorsType::MaxColsAtCompileTime==1 && MatrixType::MaxColsAtCompileTime!=1)?RowMajor:ColMajor,\n         VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;\n  // FIXME add .noalias() once the triangular product can work inplace\n  if(forward) tmp = T.template triangularView<Upper>()           * tmp;\n  else        tmp = T.template triangularView<Upper>().adjoint() * tmp;\n  mat.noalias() -= V * tmp;\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_BLOCK_HOUSEHOLDER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Householder/Householder.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_HOUSEHOLDER_H\n#define EIGEN_HOUSEHOLDER_H\n\nnamespace Eigen { \n\nnamespace internal {\ntemplate<int n> struct decrement_size\n{\n  enum {\n    ret = n==Dynamic ? n : n-1\n  };\n};\n}\n\n/** Computes the elementary reflector H such that:\n  * \\f$ H *this = [ beta 0 ... 0]^T \\f$\n  * where the transformation H is:\n  * \\f$ H = I - tau v v^*\\f$\n  * and the vector v is:\n  * \\f$ v^T = [1 essential^T] \\f$\n  *\n  * The essential part of the vector \\c v is stored in *this.\n  * \n  * On output:\n  * \\param tau the scaling factor of the Householder transformation\n  * \\param beta the result of H * \\c *this\n  *\n  * \\sa MatrixBase::makeHouseholder(), MatrixBase::applyHouseholderOnTheLeft(),\n  *     MatrixBase::applyHouseholderOnTheRight()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\nvoid MatrixBase<Derived>::makeHouseholderInPlace(Scalar& tau, RealScalar& beta)\n{\n  VectorBlock<Derived, internal::decrement_size<Base::SizeAtCompileTime>::ret> essentialPart(derived(), 1, size()-1);\n  makeHouseholder(essentialPart, tau, beta);\n}\n\n/** Computes the elementary reflector H such that:\n  * \\f$ H *this = [ beta 0 ... 0]^T \\f$\n  * where the transformation H is:\n  * \\f$ H = I - tau v v^*\\f$\n  * and the vector v is:\n  * \\f$ v^T = [1 essential^T] \\f$\n  *\n  * On output:\n  * \\param essential the essential part of the vector \\c v\n  * \\param tau the scaling factor of the Householder transformation\n  * \\param beta the result of H * \\c *this\n  *\n  * \\sa MatrixBase::makeHouseholderInPlace(), MatrixBase::applyHouseholderOnTheLeft(),\n  *     MatrixBase::applyHouseholderOnTheRight()\n  */\ntemplate<typename Derived>\ntemplate<typename EssentialPart>\nEIGEN_DEVICE_FUNC\nvoid MatrixBase<Derived>::makeHouseholder(\n  EssentialPart& essential,\n  Scalar& tau,\n  RealScalar& beta) const\n{\n  using std::sqrt;\n  using numext::conj;\n  \n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(EssentialPart)\n  VectorBlock<const Derived, EssentialPart::SizeAtCompileTime> tail(derived(), 1, size()-1);\n  \n  RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm();\n  Scalar c0 = coeff(0);\n  const RealScalar tol = (std::numeric_limits<RealScalar>::min)();\n\n  if(tailSqNorm <= tol && numext::abs2(numext::imag(c0))<=tol)\n  {\n    tau = RealScalar(0);\n    beta = numext::real(c0);\n    essential.setZero();\n  }\n  else\n  {\n    beta = sqrt(numext::abs2(c0) + tailSqNorm);\n    if (numext::real(c0)>=RealScalar(0))\n      beta = -beta;\n    essential = tail / (c0 - beta);\n    tau = conj((beta - c0) / beta);\n  }\n}\n\n/** Apply the elementary reflector H given by\n  * \\f$ H = I - tau v v^*\\f$\n  * with\n  * \\f$ v^T = [1 essential^T] \\f$\n  * from the left to a vector or matrix.\n  *\n  * On input:\n  * \\param essential the essential part of the vector \\c v\n  * \\param tau the scaling factor of the Householder transformation\n  * \\param workspace a pointer to working space with at least\n  *                  this->cols() entries\n  *\n  * \\sa MatrixBase::makeHouseholder(), MatrixBase::makeHouseholderInPlace(), \n  *     MatrixBase::applyHouseholderOnTheRight()\n  */\ntemplate<typename Derived>\ntemplate<typename EssentialPart>\nEIGEN_DEVICE_FUNC\nvoid MatrixBase<Derived>::applyHouseholderOnTheLeft(\n  const EssentialPart& essential,\n  const Scalar& tau,\n  Scalar* workspace)\n{\n  if(rows() == 1)\n  {\n    *this *= Scalar(1)-tau;\n  }\n  else if(tau!=Scalar(0))\n  {\n    Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());\n    Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());\n    tmp.noalias() = essential.adjoint() * bottom;\n    tmp += this->row(0);\n    this->row(0) -= tau * tmp;\n    bottom.noalias() -= tau * essential * tmp;\n  }\n}\n\n/** Apply the elementary reflector H given by\n  * \\f$ H = I - tau v v^*\\f$\n  * with\n  * \\f$ v^T = [1 essential^T] \\f$\n  * from the right to a vector or matrix.\n  *\n  * On input:\n  * \\param essential the essential part of the vector \\c v\n  * \\param tau the scaling factor of the Householder transformation\n  * \\param workspace a pointer to working space with at least\n  *                  this->rows() entries\n  *\n  * \\sa MatrixBase::makeHouseholder(), MatrixBase::makeHouseholderInPlace(), \n  *     MatrixBase::applyHouseholderOnTheLeft()\n  */\ntemplate<typename Derived>\ntemplate<typename EssentialPart>\nEIGEN_DEVICE_FUNC\nvoid MatrixBase<Derived>::applyHouseholderOnTheRight(\n  const EssentialPart& essential,\n  const Scalar& tau,\n  Scalar* workspace)\n{\n  if(cols() == 1)\n  {\n    *this *= Scalar(1)-tau;\n  }\n  else if(tau!=Scalar(0))\n  {\n    Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());\n    Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);\n    tmp.noalias() = right * essential;\n    tmp += this->col(0);\n    this->col(0) -= tau * tmp;\n    right.noalias() -= tau * tmp * essential.adjoint();\n  }\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_HOUSEHOLDER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Householder/HouseholderSequence.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_HOUSEHOLDER_SEQUENCE_H\n#define EIGEN_HOUSEHOLDER_SEQUENCE_H\n\nnamespace Eigen {\n\n/** \\ingroup Householder_Module\n  * \\householder_module\n  * \\class HouseholderSequence\n  * \\brief Sequence of Householder reflections acting on subspaces with decreasing size\n  * \\tparam VectorsType type of matrix containing the Householder vectors\n  * \\tparam CoeffsType  type of vector containing the Householder coefficients\n  * \\tparam Side        either OnTheLeft (the default) or OnTheRight\n  *\n  * This class represents a product sequence of Householder reflections where the first Householder reflection\n  * acts on the whole space, the second Householder reflection leaves the one-dimensional subspace spanned by\n  * the first unit vector invariant, the third Householder reflection leaves the two-dimensional subspace\n  * spanned by the first two unit vectors invariant, and so on up to the last reflection which leaves all but\n  * one dimensions invariant and acts only on the last dimension. Such sequences of Householder reflections\n  * are used in several algorithms to zero out certain parts of a matrix. Indeed, the methods\n  * HessenbergDecomposition::matrixQ(), Tridiagonalization::matrixQ(), HouseholderQR::householderQ(),\n  * and ColPivHouseholderQR::householderQ() all return a %HouseholderSequence.\n  *\n  * More precisely, the class %HouseholderSequence represents an \\f$ n \\times n \\f$ matrix \\f$ H \\f$ of the\n  * form \\f$ H = \\prod_{i=0}^{n-1} H_i \\f$ where the i-th Householder reflection is \\f$ H_i = I - h_i v_i\n  * v_i^* \\f$. The i-th Householder coefficient \\f$ h_i \\f$ is a scalar and the i-th Householder vector \\f$\n  * v_i \\f$ is a vector of the form\n  * \\f[\n  * v_i = [\\underbrace{0, \\ldots, 0}_{i-1\\mbox{ zeros}}, 1, \\underbrace{*, \\ldots,*}_{n-i\\mbox{ arbitrary entries}} ].\n  * \\f]\n  * The last \\f$ n-i \\f$ entries of \\f$ v_i \\f$ are called the essential part of the Householder vector.\n  *\n  * Typical usages are listed below, where H is a HouseholderSequence:\n  * \\code\n  * A.applyOnTheRight(H);             // A = A * H\n  * A.applyOnTheLeft(H);              // A = H * A\n  * A.applyOnTheRight(H.adjoint());   // A = A * H^*\n  * A.applyOnTheLeft(H.adjoint());    // A = H^* * A\n  * MatrixXd Q = H;                   // conversion to a dense matrix\n  * \\endcode\n  * In addition to the adjoint, you can also apply the inverse (=adjoint), the transpose, and the conjugate operators.\n  *\n  * See the documentation for HouseholderSequence(const VectorsType&, const CoeffsType&) for an example.\n  *\n  * \\sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()\n  */\n\nnamespace internal {\n\ntemplate<typename VectorsType, typename CoeffsType, int Side>\nstruct traits<HouseholderSequence<VectorsType,CoeffsType,Side> >\n{\n  typedef typename VectorsType::Scalar Scalar;\n  typedef typename VectorsType::StorageIndex StorageIndex;\n  typedef typename VectorsType::StorageKind StorageKind;\n  enum {\n    RowsAtCompileTime = Side==OnTheLeft ? traits<VectorsType>::RowsAtCompileTime\n                                        : traits<VectorsType>::ColsAtCompileTime,\n    ColsAtCompileTime = RowsAtCompileTime,\n    MaxRowsAtCompileTime = Side==OnTheLeft ? traits<VectorsType>::MaxRowsAtCompileTime\n                                           : traits<VectorsType>::MaxColsAtCompileTime,\n    MaxColsAtCompileTime = MaxRowsAtCompileTime,\n    Flags = 0\n  };\n};\n\nstruct HouseholderSequenceShape {};\n\ntemplate<typename VectorsType, typename CoeffsType, int Side>\nstruct evaluator_traits<HouseholderSequence<VectorsType,CoeffsType,Side> >\n  : public evaluator_traits_base<HouseholderSequence<VectorsType,CoeffsType,Side> >\n{\n  typedef HouseholderSequenceShape Shape;\n};\n\ntemplate<typename VectorsType, typename CoeffsType, int Side>\nstruct hseq_side_dependent_impl\n{\n  typedef Block<const VectorsType, Dynamic, 1> EssentialVectorType;\n  typedef HouseholderSequence<VectorsType, CoeffsType, OnTheLeft> HouseholderSequenceType;\n  static EIGEN_DEVICE_FUNC inline const EssentialVectorType essentialVector(const HouseholderSequenceType& h, Index k)\n  {\n    Index start = k+1+h.m_shift;\n    return Block<const VectorsType,Dynamic,1>(h.m_vectors, start, k, h.rows()-start, 1);\n  }\n};\n\ntemplate<typename VectorsType, typename CoeffsType>\nstruct hseq_side_dependent_impl<VectorsType, CoeffsType, OnTheRight>\n{\n  typedef Transpose<Block<const VectorsType, 1, Dynamic> > EssentialVectorType;\n  typedef HouseholderSequence<VectorsType, CoeffsType, OnTheRight> HouseholderSequenceType;\n  static inline const EssentialVectorType essentialVector(const HouseholderSequenceType& h, Index k)\n  {\n    Index start = k+1+h.m_shift;\n    return Block<const VectorsType,1,Dynamic>(h.m_vectors, k, start, 1, h.rows()-start).transpose();\n  }\n};\n\ntemplate<typename OtherScalarType, typename MatrixType> struct matrix_type_times_scalar_type\n{\n  typedef typename ScalarBinaryOpTraits<OtherScalarType, typename MatrixType::Scalar>::ReturnType\n    ResultScalar;\n  typedef Matrix<ResultScalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,\n                 0, MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime> Type;\n};\n\n} // end namespace internal\n\ntemplate<typename VectorsType, typename CoeffsType, int Side> class HouseholderSequence\n  : public EigenBase<HouseholderSequence<VectorsType,CoeffsType,Side> >\n{\n    typedef typename internal::hseq_side_dependent_impl<VectorsType,CoeffsType,Side>::EssentialVectorType EssentialVectorType;\n\n  public:\n    enum {\n      RowsAtCompileTime = internal::traits<HouseholderSequence>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<HouseholderSequence>::ColsAtCompileTime,\n      MaxRowsAtCompileTime = internal::traits<HouseholderSequence>::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = internal::traits<HouseholderSequence>::MaxColsAtCompileTime\n    };\n    typedef typename internal::traits<HouseholderSequence>::Scalar Scalar;\n\n    typedef HouseholderSequence<\n      typename internal::conditional<NumTraits<Scalar>::IsComplex,\n        typename internal::remove_all<typename VectorsType::ConjugateReturnType>::type,\n        VectorsType>::type,\n      typename internal::conditional<NumTraits<Scalar>::IsComplex,\n        typename internal::remove_all<typename CoeffsType::ConjugateReturnType>::type,\n        CoeffsType>::type,\n      Side\n    > ConjugateReturnType;\n\n    typedef HouseholderSequence<\n      VectorsType,\n      typename internal::conditional<NumTraits<Scalar>::IsComplex,\n        typename internal::remove_all<typename CoeffsType::ConjugateReturnType>::type,\n        CoeffsType>::type,\n      Side\n    > AdjointReturnType;\n\n    typedef HouseholderSequence<\n      typename internal::conditional<NumTraits<Scalar>::IsComplex,\n        typename internal::remove_all<typename VectorsType::ConjugateReturnType>::type,\n        VectorsType>::type,\n      CoeffsType,\n      Side\n    > TransposeReturnType;\n\n    typedef HouseholderSequence<\n      typename internal::add_const<VectorsType>::type,\n      typename internal::add_const<CoeffsType>::type,\n      Side\n    > ConstHouseholderSequence;\n\n    /** \\brief Constructor.\n      * \\param[in]  v      %Matrix containing the essential parts of the Householder vectors\n      * \\param[in]  h      Vector containing the Householder coefficients\n      *\n      * Constructs the Householder sequence with coefficients given by \\p h and vectors given by \\p v. The\n      * i-th Householder coefficient \\f$ h_i \\f$ is given by \\p h(i) and the essential part of the i-th\n      * Householder vector \\f$ v_i \\f$ is given by \\p v(k,i) with \\p k > \\p i (the subdiagonal part of the\n      * i-th column). If \\p v has fewer columns than rows, then the Householder sequence contains as many\n      * Householder reflections as there are columns.\n      *\n      * \\note The %HouseholderSequence object stores \\p v and \\p h by reference.\n      *\n      * Example: \\include HouseholderSequence_HouseholderSequence.cpp\n      * Output: \\verbinclude HouseholderSequence_HouseholderSequence.out\n      *\n      * \\sa setLength(), setShift()\n      */\n    EIGEN_DEVICE_FUNC\n    HouseholderSequence(const VectorsType& v, const CoeffsType& h)\n      : m_vectors(v), m_coeffs(h), m_reverse(false), m_length(v.diagonalSize()),\n        m_shift(0)\n    {\n    }\n\n    /** \\brief Copy constructor. */\n    EIGEN_DEVICE_FUNC\n    HouseholderSequence(const HouseholderSequence& other)\n      : m_vectors(other.m_vectors),\n        m_coeffs(other.m_coeffs),\n        m_reverse(other.m_reverse),\n        m_length(other.m_length),\n        m_shift(other.m_shift)\n    {\n    }\n\n    /** \\brief Number of rows of transformation viewed as a matrix.\n      * \\returns Number of rows\n      * \\details This equals the dimension of the space that the transformation acts on.\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index rows() const EIGEN_NOEXCEPT { return Side==OnTheLeft ? m_vectors.rows() : m_vectors.cols(); }\n\n    /** \\brief Number of columns of transformation viewed as a matrix.\n      * \\returns Number of columns\n      * \\details This equals the dimension of the space that the transformation acts on.\n      */\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    Index cols() const EIGEN_NOEXCEPT { return rows(); }\n\n    /** \\brief Essential part of a Householder vector.\n      * \\param[in]  k  Index of Householder reflection\n      * \\returns    Vector containing non-trivial entries of k-th Householder vector\n      *\n      * This function returns the essential part of the Householder vector \\f$ v_i \\f$. This is a vector of\n      * length \\f$ n-i \\f$ containing the last \\f$ n-i \\f$ entries of the vector\n      * \\f[\n      * v_i = [\\underbrace{0, \\ldots, 0}_{i-1\\mbox{ zeros}}, 1, \\underbrace{*, \\ldots,*}_{n-i\\mbox{ arbitrary entries}} ].\n      * \\f]\n      * The index \\f$ i \\f$ equals \\p k + shift(), corresponding to the k-th column of the matrix \\p v\n      * passed to the constructor.\n      *\n      * \\sa setShift(), shift()\n      */\n    EIGEN_DEVICE_FUNC\n    const EssentialVectorType essentialVector(Index k) const\n    {\n      eigen_assert(k >= 0 && k < m_length);\n      return internal::hseq_side_dependent_impl<VectorsType,CoeffsType,Side>::essentialVector(*this, k);\n    }\n\n    /** \\brief %Transpose of the Householder sequence. */\n    TransposeReturnType transpose() const\n    {\n      return TransposeReturnType(m_vectors.conjugate(), m_coeffs)\n              .setReverseFlag(!m_reverse)\n              .setLength(m_length)\n              .setShift(m_shift);\n    }\n\n    /** \\brief Complex conjugate of the Householder sequence. */\n    ConjugateReturnType conjugate() const\n    {\n      return ConjugateReturnType(m_vectors.conjugate(), m_coeffs.conjugate())\n             .setReverseFlag(m_reverse)\n             .setLength(m_length)\n             .setShift(m_shift);\n    }\n\n    /** \\returns an expression of the complex conjugate of \\c *this if Cond==true,\n     *           returns \\c *this otherwise.\n     */\n    template<bool Cond>\n    EIGEN_DEVICE_FUNC\n    inline typename internal::conditional<Cond,ConjugateReturnType,ConstHouseholderSequence>::type\n    conjugateIf() const\n    {\n      typedef typename internal::conditional<Cond,ConjugateReturnType,ConstHouseholderSequence>::type ReturnType;\n      return ReturnType(m_vectors.template conjugateIf<Cond>(), m_coeffs.template conjugateIf<Cond>());\n    }\n\n    /** \\brief Adjoint (conjugate transpose) of the Householder sequence. */\n    AdjointReturnType adjoint() const\n    {\n      return AdjointReturnType(m_vectors, m_coeffs.conjugate())\n              .setReverseFlag(!m_reverse)\n              .setLength(m_length)\n              .setShift(m_shift);\n    }\n\n    /** \\brief Inverse of the Householder sequence (equals the adjoint). */\n    AdjointReturnType inverse() const { return adjoint(); }\n\n    /** \\internal */\n    template<typename DestType>\n    inline EIGEN_DEVICE_FUNC\n    void evalTo(DestType& dst) const\n    {\n      Matrix<Scalar, DestType::RowsAtCompileTime, 1,\n             AutoAlign|ColMajor, DestType::MaxRowsAtCompileTime, 1> workspace(rows());\n      evalTo(dst, workspace);\n    }\n\n    /** \\internal */\n    template<typename Dest, typename Workspace>\n    EIGEN_DEVICE_FUNC\n    void evalTo(Dest& dst, Workspace& workspace) const\n    {\n      workspace.resize(rows());\n      Index vecs = m_length;\n      if(internal::is_same_dense(dst,m_vectors))\n      {\n        // in-place\n        dst.diagonal().setOnes();\n        dst.template triangularView<StrictlyUpper>().setZero();\n        for(Index k = vecs-1; k >= 0; --k)\n        {\n          Index cornerSize = rows() - k - m_shift;\n          if(m_reverse)\n            dst.bottomRightCorner(cornerSize, cornerSize)\n               .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), workspace.data());\n          else\n            dst.bottomRightCorner(cornerSize, cornerSize)\n               .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), workspace.data());\n\n          // clear the off diagonal vector\n          dst.col(k).tail(rows()-k-1).setZero();\n        }\n        // clear the remaining columns if needed\n        for(Index k = 0; k<cols()-vecs ; ++k)\n          dst.col(k).tail(rows()-k-1).setZero();\n      }\n      else if(m_length>BlockSize)\n      {\n        dst.setIdentity(rows(), rows());\n        if(m_reverse)\n          applyThisOnTheLeft(dst,workspace,true);\n        else\n          applyThisOnTheLeft(dst,workspace,true);\n      }\n      else\n      {\n        dst.setIdentity(rows(), rows());\n        for(Index k = vecs-1; k >= 0; --k)\n        {\n          Index cornerSize = rows() - k - m_shift;\n          if(m_reverse)\n            dst.bottomRightCorner(cornerSize, cornerSize)\n               .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), workspace.data());\n          else\n            dst.bottomRightCorner(cornerSize, cornerSize)\n               .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), workspace.data());\n        }\n      }\n    }\n\n    /** \\internal */\n    template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const\n    {\n      Matrix<Scalar,1,Dest::RowsAtCompileTime,RowMajor,1,Dest::MaxRowsAtCompileTime> workspace(dst.rows());\n      applyThisOnTheRight(dst, workspace);\n    }\n\n    /** \\internal */\n    template<typename Dest, typename Workspace>\n    inline void applyThisOnTheRight(Dest& dst, Workspace& workspace) const\n    {\n      workspace.resize(dst.rows());\n      for(Index k = 0; k < m_length; ++k)\n      {\n        Index actual_k = m_reverse ? m_length-k-1 : k;\n        dst.rightCols(rows()-m_shift-actual_k)\n           .applyHouseholderOnTheRight(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());\n      }\n    }\n\n    /** \\internal */\n    template<typename Dest> inline void applyThisOnTheLeft(Dest& dst, bool inputIsIdentity = false) const\n    {\n      Matrix<Scalar,1,Dest::ColsAtCompileTime,RowMajor,1,Dest::MaxColsAtCompileTime> workspace;\n      applyThisOnTheLeft(dst, workspace, inputIsIdentity);\n    }\n\n    /** \\internal */\n    template<typename Dest, typename Workspace>\n    inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace, bool inputIsIdentity = false) const\n    {\n      if(inputIsIdentity && m_reverse)\n        inputIsIdentity = false;\n      // if the entries are large enough, then apply the reflectors by block\n      if(m_length>=BlockSize && dst.cols()>1)\n      {\n        // Make sure we have at least 2 useful blocks, otherwise it is point-less:\n        Index blockSize = m_length<Index(2*BlockSize) ? (m_length+1)/2 : Index(BlockSize);\n        for(Index i = 0; i < m_length; i+=blockSize)\n        {\n          Index end = m_reverse ? (std::min)(m_length,i+blockSize) : m_length-i;\n          Index k = m_reverse ? i : (std::max)(Index(0),end-blockSize);\n          Index bs = end-k;\n          Index start = k + m_shift;\n\n          typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;\n          SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,\n                                                                   Side==OnTheRight ? start : k,\n                                                                   Side==OnTheRight ? bs : m_vectors.rows()-start,\n                                                                   Side==OnTheRight ? m_vectors.cols()-start : bs);\n          typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);\n\n          Index dstStart = dst.rows()-rows()+m_shift+k;\n          Index dstRows  = rows()-m_shift-k;\n          Block<Dest,Dynamic,Dynamic> sub_dst(dst,\n                                              dstStart,\n                                              inputIsIdentity ? dstStart : 0,\n                                              dstRows,\n                                              inputIsIdentity ? dstRows : dst.cols());\n          apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_reverse);\n        }\n      }\n      else\n      {\n        workspace.resize(dst.cols());\n        for(Index k = 0; k < m_length; ++k)\n        {\n          Index actual_k = m_reverse ? k : m_length-k-1;\n          Index dstStart = rows()-m_shift-actual_k;\n          dst.bottomRightCorner(dstStart, inputIsIdentity ? dstStart : dst.cols())\n            .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());\n        }\n      }\n    }\n\n    /** \\brief Computes the product of a Householder sequence with a matrix.\n      * \\param[in]  other  %Matrix being multiplied.\n      * \\returns    Expression object representing the product.\n      *\n      * This function computes \\f$ HM \\f$ where \\f$ H \\f$ is the Householder sequence represented by \\p *this\n      * and \\f$ M \\f$ is the matrix \\p other.\n      */\n    template<typename OtherDerived>\n    typename internal::matrix_type_times_scalar_type<Scalar, OtherDerived>::Type operator*(const MatrixBase<OtherDerived>& other) const\n    {\n      typename internal::matrix_type_times_scalar_type<Scalar, OtherDerived>::Type\n        res(other.template cast<typename internal::matrix_type_times_scalar_type<Scalar,OtherDerived>::ResultScalar>());\n      applyThisOnTheLeft(res, internal::is_identity<OtherDerived>::value && res.rows()==res.cols());\n      return res;\n    }\n\n    template<typename _VectorsType, typename _CoeffsType, int _Side> friend struct internal::hseq_side_dependent_impl;\n\n    /** \\brief Sets the length of the Householder sequence.\n      * \\param [in]  length  New value for the length.\n      *\n      * By default, the length \\f$ n \\f$ of the Householder sequence \\f$ H = H_0 H_1 \\ldots H_{n-1} \\f$ is set\n      * to the number of columns of the matrix \\p v passed to the constructor, or the number of rows if that\n      * is smaller. After this function is called, the length equals \\p length.\n      *\n      * \\sa length()\n      */\n    EIGEN_DEVICE_FUNC\n    HouseholderSequence& setLength(Index length)\n    {\n      m_length = length;\n      return *this;\n    }\n\n    /** \\brief Sets the shift of the Householder sequence.\n      * \\param [in]  shift  New value for the shift.\n      *\n      * By default, a %HouseholderSequence object represents \\f$ H = H_0 H_1 \\ldots H_{n-1} \\f$ and the i-th\n      * column of the matrix \\p v passed to the constructor corresponds to the i-th Householder\n      * reflection. After this function is called, the object represents \\f$ H = H_{\\mathrm{shift}}\n      * H_{\\mathrm{shift}+1} \\ldots H_{n-1} \\f$ and the i-th column of \\p v corresponds to the (shift+i)-th\n      * Householder reflection.\n      *\n      * \\sa shift()\n      */\n    EIGEN_DEVICE_FUNC\n    HouseholderSequence& setShift(Index shift)\n    {\n      m_shift = shift;\n      return *this;\n    }\n\n    EIGEN_DEVICE_FUNC\n    Index length() const { return m_length; }  /**< \\brief Returns the length of the Householder sequence. */\n\n    EIGEN_DEVICE_FUNC\n    Index shift() const { return m_shift; }    /**< \\brief Returns the shift of the Householder sequence. */\n\n    /* Necessary for .adjoint() and .conjugate() */\n    template <typename VectorsType2, typename CoeffsType2, int Side2> friend class HouseholderSequence;\n\n  protected:\n\n    /** \\internal\n      * \\brief Sets the reverse flag.\n      * \\param [in]  reverse  New value of the reverse flag.\n      *\n      * By default, the reverse flag is not set. If the reverse flag is set, then this object represents\n      * \\f$ H^r = H_{n-1} \\ldots H_1 H_0 \\f$ instead of \\f$ H = H_0 H_1 \\ldots H_{n-1} \\f$.\n      * \\note For real valued HouseholderSequence this is equivalent to transposing \\f$ H \\f$.\n      *\n      * \\sa reverseFlag(), transpose(), adjoint()\n      */\n    HouseholderSequence& setReverseFlag(bool reverse)\n    {\n      m_reverse = reverse;\n      return *this;\n    }\n\n    bool reverseFlag() const { return m_reverse; }     /**< \\internal \\brief Returns the reverse flag. */\n\n    typename VectorsType::Nested m_vectors;\n    typename CoeffsType::Nested m_coeffs;\n    bool m_reverse;\n    Index m_length;\n    Index m_shift;\n    enum { BlockSize = 48 };\n};\n\n/** \\brief Computes the product of a matrix with a Householder sequence.\n  * \\param[in]  other  %Matrix being multiplied.\n  * \\param[in]  h      %HouseholderSequence being multiplied.\n  * \\returns    Expression object representing the product.\n  *\n  * This function computes \\f$ MH \\f$ where \\f$ M \\f$ is the matrix \\p other and \\f$ H \\f$ is the\n  * Householder sequence represented by \\p h.\n  */\ntemplate<typename OtherDerived, typename VectorsType, typename CoeffsType, int Side>\ntypename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,OtherDerived>::Type operator*(const MatrixBase<OtherDerived>& other, const HouseholderSequence<VectorsType,CoeffsType,Side>& h)\n{\n  typename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,OtherDerived>::Type\n    res(other.template cast<typename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,OtherDerived>::ResultScalar>());\n  h.applyThisOnTheRight(res);\n  return res;\n}\n\n/** \\ingroup Householder_Module \\householder_module\n  * \\brief Convenience function for constructing a Householder sequence.\n  * \\returns A HouseholderSequence constructed from the specified arguments.\n  */\ntemplate<typename VectorsType, typename CoeffsType>\nHouseholderSequence<VectorsType,CoeffsType> householderSequence(const VectorsType& v, const CoeffsType& h)\n{\n  return HouseholderSequence<VectorsType,CoeffsType,OnTheLeft>(v, h);\n}\n\n/** \\ingroup Householder_Module \\householder_module\n  * \\brief Convenience function for constructing a Householder sequence.\n  * \\returns A HouseholderSequence constructed from the specified arguments.\n  * \\details This function differs from householderSequence() in that the template argument \\p OnTheSide of\n  * the constructed HouseholderSequence is set to OnTheRight, instead of the default OnTheLeft.\n  */\ntemplate<typename VectorsType, typename CoeffsType>\nHouseholderSequence<VectorsType,CoeffsType,OnTheRight> rightHouseholderSequence(const VectorsType& v, const CoeffsType& h)\n{\n  return HouseholderSequence<VectorsType,CoeffsType,OnTheRight>(v, h);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_HOUSEHOLDER_SEQUENCE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BASIC_PRECONDITIONERS_H\n#define EIGEN_BASIC_PRECONDITIONERS_H\n\nnamespace Eigen {\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief A preconditioner based on the digonal entries\n  *\n  * This class allows to approximately solve for A.x = b problems assuming A is a diagonal matrix.\n  * In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for:\n    \\code\n    A.diagonal().asDiagonal() . x = b\n    \\endcode\n  *\n  * \\tparam _Scalar the type of the scalar.\n  *\n  * \\implsparsesolverconcept\n  *\n  * This preconditioner is suitable for both selfadjoint and general problems.\n  * The diagonal entries are pre-inverted and stored into a dense vector.\n  *\n  * \\note A variant that has yet to be implemented would attempt to preserve the norm of each column.\n  *\n  * \\sa class LeastSquareDiagonalPreconditioner, class ConjugateGradient\n  */\ntemplate <typename _Scalar>\nclass DiagonalPreconditioner\n{\n    typedef _Scalar Scalar;\n    typedef Matrix<Scalar,Dynamic,1> Vector;\n  public:\n    typedef typename Vector::StorageIndex StorageIndex;\n    enum {\n      ColsAtCompileTime = Dynamic,\n      MaxColsAtCompileTime = Dynamic\n    };\n\n    DiagonalPreconditioner() : m_isInitialized(false) {}\n\n    template<typename MatType>\n    explicit DiagonalPreconditioner(const MatType& mat) : m_invdiag(mat.cols())\n    {\n      compute(mat);\n    }\n\n    EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_invdiag.size(); }\n    EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_invdiag.size(); }\n\n    template<typename MatType>\n    DiagonalPreconditioner& analyzePattern(const MatType& )\n    {\n      return *this;\n    }\n\n    template<typename MatType>\n    DiagonalPreconditioner& factorize(const MatType& mat)\n    {\n      m_invdiag.resize(mat.cols());\n      for(int j=0; j<mat.outerSize(); ++j)\n      {\n        typename MatType::InnerIterator it(mat,j);\n        while(it && it.index()!=j) ++it;\n        if(it && it.index()==j && it.value()!=Scalar(0))\n          m_invdiag(j) = Scalar(1)/it.value();\n        else\n          m_invdiag(j) = Scalar(1);\n      }\n      m_isInitialized = true;\n      return *this;\n    }\n\n    template<typename MatType>\n    DiagonalPreconditioner& compute(const MatType& mat)\n    {\n      return factorize(mat);\n    }\n\n    /** \\internal */\n    template<typename Rhs, typename Dest>\n    void _solve_impl(const Rhs& b, Dest& x) const\n    {\n      x = m_invdiag.array() * b.array() ;\n    }\n\n    template<typename Rhs> inline const Solve<DiagonalPreconditioner, Rhs>\n    solve(const MatrixBase<Rhs>& b) const\n    {\n      eigen_assert(m_isInitialized && \"DiagonalPreconditioner is not initialized.\");\n      eigen_assert(m_invdiag.size()==b.rows()\n                && \"DiagonalPreconditioner::solve(): invalid number of rows of the right hand side matrix b\");\n      return Solve<DiagonalPreconditioner, Rhs>(*this, b.derived());\n    }\n\n    ComputationInfo info() { return Success; }\n\n  protected:\n    Vector m_invdiag;\n    bool m_isInitialized;\n};\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief Jacobi preconditioner for LeastSquaresConjugateGradient\n  *\n  * This class allows to approximately solve for A' A x  = A' b problems assuming A' A is a diagonal matrix.\n  * In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for:\n    \\code\n    (A.adjoint() * A).diagonal().asDiagonal() * x = b\n    \\endcode\n  *\n  * \\tparam _Scalar the type of the scalar.\n  *\n  * \\implsparsesolverconcept\n  *\n  * The diagonal entries are pre-inverted and stored into a dense vector.\n  *\n  * \\sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner\n  */\ntemplate <typename _Scalar>\nclass LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>\n{\n    typedef _Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef DiagonalPreconditioner<_Scalar> Base;\n    using Base::m_invdiag;\n  public:\n\n    LeastSquareDiagonalPreconditioner() : Base() {}\n\n    template<typename MatType>\n    explicit LeastSquareDiagonalPreconditioner(const MatType& mat) : Base()\n    {\n      compute(mat);\n    }\n\n    template<typename MatType>\n    LeastSquareDiagonalPreconditioner& analyzePattern(const MatType& )\n    {\n      return *this;\n    }\n\n    template<typename MatType>\n    LeastSquareDiagonalPreconditioner& factorize(const MatType& mat)\n    {\n      // Compute the inverse squared-norm of each column of mat\n      m_invdiag.resize(mat.cols());\n      if(MatType::IsRowMajor)\n      {\n        m_invdiag.setZero();\n        for(Index j=0; j<mat.outerSize(); ++j)\n        {\n          for(typename MatType::InnerIterator it(mat,j); it; ++it)\n            m_invdiag(it.index()) += numext::abs2(it.value());\n        }\n        for(Index j=0; j<mat.cols(); ++j)\n          if(numext::real(m_invdiag(j))>RealScalar(0))\n            m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j));\n      }\n      else\n      {\n        for(Index j=0; j<mat.outerSize(); ++j)\n        {\n          RealScalar sum = mat.col(j).squaredNorm();\n          if(sum>RealScalar(0))\n            m_invdiag(j) = RealScalar(1)/sum;\n          else\n            m_invdiag(j) = RealScalar(1);\n        }\n      }\n      Base::m_isInitialized = true;\n      return *this;\n    }\n\n    template<typename MatType>\n    LeastSquareDiagonalPreconditioner& compute(const MatType& mat)\n    {\n      return factorize(mat);\n    }\n\n    ComputationInfo info() { return Success; }\n\n  protected:\n};\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief A naive preconditioner which approximates any matrix as the identity matrix\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa class DiagonalPreconditioner\n  */\nclass IdentityPreconditioner\n{\n  public:\n\n    IdentityPreconditioner() {}\n\n    template<typename MatrixType>\n    explicit IdentityPreconditioner(const MatrixType& ) {}\n\n    template<typename MatrixType>\n    IdentityPreconditioner& analyzePattern(const MatrixType& ) { return *this; }\n\n    template<typename MatrixType>\n    IdentityPreconditioner& factorize(const MatrixType& ) { return *this; }\n\n    template<typename MatrixType>\n    IdentityPreconditioner& compute(const MatrixType& ) { return *this; }\n\n    template<typename Rhs>\n    inline const Rhs& solve(const Rhs& b) const { return b; }\n\n    ComputationInfo info() { return Success; }\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_BASIC_PRECONDITIONERS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BICGSTAB_H\n#define EIGEN_BICGSTAB_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal Low-level bi conjugate gradient stabilized algorithm\n  * \\param mat The matrix A\n  * \\param rhs The right hand side vector b\n  * \\param x On input and initial solution, on output the computed solution.\n  * \\param precond A preconditioner being able to efficiently solve for an\n  *                approximation of Ax=b (regardless of b)\n  * \\param iters On input the max number of iteration, on output the number of performed iterations.\n  * \\param tol_error On input the tolerance error, on output an estimation of the relative error.\n  * \\return false in the case of numerical issue, for example a break down of BiCGSTAB. \n  */\ntemplate<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>\nbool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,\n              const Preconditioner& precond, Index& iters,\n              typename Dest::RealScalar& tol_error)\n{\n  using std::sqrt;\n  using std::abs;\n  typedef typename Dest::RealScalar RealScalar;\n  typedef typename Dest::Scalar Scalar;\n  typedef Matrix<Scalar,Dynamic,1> VectorType;\n  RealScalar tol = tol_error;\n  Index maxIters = iters;\n\n  Index n = mat.cols();\n  VectorType r  = rhs - mat * x;\n  VectorType r0 = r;\n  \n  RealScalar r0_sqnorm = r0.squaredNorm();\n  RealScalar rhs_sqnorm = rhs.squaredNorm();\n  if(rhs_sqnorm == 0)\n  {\n    x.setZero();\n    return true;\n  }\n  Scalar rho    = 1;\n  Scalar alpha  = 1;\n  Scalar w      = 1;\n  \n  VectorType v = VectorType::Zero(n), p = VectorType::Zero(n);\n  VectorType y(n),  z(n);\n  VectorType kt(n), ks(n);\n\n  VectorType s(n), t(n);\n\n  RealScalar tol2 = tol*tol*rhs_sqnorm;\n  RealScalar eps2 = NumTraits<Scalar>::epsilon()*NumTraits<Scalar>::epsilon();\n  Index i = 0;\n  Index restarts = 0;\n\n  while ( r.squaredNorm() > tol2 && i<maxIters )\n  {\n    Scalar rho_old = rho;\n\n    rho = r0.dot(r);\n    if (abs(rho) < eps2*r0_sqnorm)\n    {\n      // The new residual vector became too orthogonal to the arbitrarily chosen direction r0\n      // Let's restart with a new r0:\n      r  = rhs - mat * x;\n      r0 = r;\n      rho = r0_sqnorm = r.squaredNorm();\n      if(restarts++ == 0)\n        i = 0;\n    }\n    Scalar beta = (rho/rho_old) * (alpha / w);\n    p = r + beta * (p - w * v);\n    \n    y = precond.solve(p);\n    \n    v.noalias() = mat * y;\n\n    alpha = rho / r0.dot(v);\n    s = r - alpha * v;\n\n    z = precond.solve(s);\n    t.noalias() = mat * z;\n\n    RealScalar tmp = t.squaredNorm();\n    if(tmp>RealScalar(0))\n      w = t.dot(s) / tmp;\n    else\n      w = Scalar(0);\n    x += alpha * y + w * z;\n    r = s - w * t;\n    ++i;\n  }\n  tol_error = sqrt(r.squaredNorm()/rhs_sqnorm);\n  iters = i;\n  return true; \n}\n\n}\n\ntemplate< typename _MatrixType,\n          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >\nclass BiCGSTAB;\n\nnamespace internal {\n\ntemplate< typename _MatrixType, typename _Preconditioner>\nstruct traits<BiCGSTAB<_MatrixType,_Preconditioner> >\n{\n  typedef _MatrixType MatrixType;\n  typedef _Preconditioner Preconditioner;\n};\n\n}\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief A bi conjugate gradient stabilized solver for sparse square problems\n  *\n  * This class allows to solve for A.x = b sparse linear problems using a bi conjugate gradient\n  * stabilized algorithm. The vectors x and b can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.\n  * \\tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner\n  *\n  * \\implsparsesolverconcept\n  *\n  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()\n  * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations\n  * and NumTraits<Scalar>::epsilon() for the tolerance.\n  * \n  * The tolerance corresponds to the relative residual error: |Ax-b|/|b|\n  * \n  * \\b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.\n  * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.\n  * See \\ref TopicMultiThreading for details.\n  * \n  * This class can be used as the direct solver classes. Here is a typical usage example:\n  * \\include BiCGSTAB_simple.cpp\n  * \n  * By default the iterations start with x=0 as an initial guess of the solution.\n  * One can control the start using the solveWithGuess() method.\n  * \n  * BiCGSTAB can also be used in a matrix-free context, see the following \\link MatrixfreeSolverExample example \\endlink.\n  *\n  * \\sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner\n  */\ntemplate< typename _MatrixType, typename _Preconditioner>\nclass BiCGSTAB : public IterativeSolverBase<BiCGSTAB<_MatrixType,_Preconditioner> >\n{\n  typedef IterativeSolverBase<BiCGSTAB> Base;\n  using Base::matrix;\n  using Base::m_error;\n  using Base::m_iterations;\n  using Base::m_info;\n  using Base::m_isInitialized;\npublic:\n  typedef _MatrixType MatrixType;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  typedef _Preconditioner Preconditioner;\n\npublic:\n\n  /** Default constructor. */\n  BiCGSTAB() : Base() {}\n\n  /** Initialize the solver with matrix \\a A for further \\c Ax=b solving.\n    * \n    * This constructor is a shortcut for the default constructor followed\n    * by a call to compute().\n    * \n    * \\warning this class stores a reference to the matrix A as well as some\n    * precomputed values that depend on it. Therefore, if \\a A is changed\n    * this class becomes invalid. Call compute() to update it with the new\n    * matrix A, or modify a copy of A.\n    */\n  template<typename MatrixDerived>\n  explicit BiCGSTAB(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}\n\n  ~BiCGSTAB() {}\n\n  /** \\internal */\n  template<typename Rhs,typename Dest>\n  void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const\n  {    \n    m_iterations = Base::maxIterations();\n    m_error = Base::m_tolerance;\n    \n    bool ret = internal::bicgstab(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error);\n\n    m_info = (!ret) ? NumericalIssue\n           : m_error <= Base::m_tolerance ? Success\n           : NoConvergence;\n  }\n\nprotected:\n\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_BICGSTAB_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CONJUGATE_GRADIENT_H\n#define EIGEN_CONJUGATE_GRADIENT_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal Low-level conjugate gradient algorithm\n  * \\param mat The matrix A\n  * \\param rhs The right hand side vector b\n  * \\param x On input and initial solution, on output the computed solution.\n  * \\param precond A preconditioner being able to efficiently solve for an\n  *                approximation of Ax=b (regardless of b)\n  * \\param iters On input the max number of iteration, on output the number of performed iterations.\n  * \\param tol_error On input the tolerance error, on output an estimation of the relative error.\n  */\ntemplate<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>\nEIGEN_DONT_INLINE\nvoid conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,\n                        const Preconditioner& precond, Index& iters,\n                        typename Dest::RealScalar& tol_error)\n{\n  using std::sqrt;\n  using std::abs;\n  typedef typename Dest::RealScalar RealScalar;\n  typedef typename Dest::Scalar Scalar;\n  typedef Matrix<Scalar,Dynamic,1> VectorType;\n  \n  RealScalar tol = tol_error;\n  Index maxIters = iters;\n  \n  Index n = mat.cols();\n\n  VectorType residual = rhs - mat * x; //initial residual\n\n  RealScalar rhsNorm2 = rhs.squaredNorm();\n  if(rhsNorm2 == 0) \n  {\n    x.setZero();\n    iters = 0;\n    tol_error = 0;\n    return;\n  }\n  const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();\n  RealScalar threshold = numext::maxi(RealScalar(tol*tol*rhsNorm2),considerAsZero);\n  RealScalar residualNorm2 = residual.squaredNorm();\n  if (residualNorm2 < threshold)\n  {\n    iters = 0;\n    tol_error = sqrt(residualNorm2 / rhsNorm2);\n    return;\n  }\n\n  VectorType p(n);\n  p = precond.solve(residual);      // initial search direction\n\n  VectorType z(n), tmp(n);\n  RealScalar absNew = numext::real(residual.dot(p));  // the square of the absolute value of r scaled by invM\n  Index i = 0;\n  while(i < maxIters)\n  {\n    tmp.noalias() = mat * p;                    // the bottleneck of the algorithm\n\n    Scalar alpha = absNew / p.dot(tmp);         // the amount we travel on dir\n    x += alpha * p;                             // update solution\n    residual -= alpha * tmp;                    // update residual\n    \n    residualNorm2 = residual.squaredNorm();\n    if(residualNorm2 < threshold)\n      break;\n    \n    z = precond.solve(residual);                // approximately solve for \"A z = residual\"\n\n    RealScalar absOld = absNew;\n    absNew = numext::real(residual.dot(z));     // update the absolute value of r\n    RealScalar beta = absNew / absOld;          // calculate the Gram-Schmidt value used to create the new search direction\n    p = z + beta * p;                           // update search direction\n    i++;\n  }\n  tol_error = sqrt(residualNorm2 / rhsNorm2);\n  iters = i;\n}\n\n}\n\ntemplate< typename _MatrixType, int _UpLo=Lower,\n          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >\nclass ConjugateGradient;\n\nnamespace internal {\n\ntemplate< typename _MatrixType, int _UpLo, typename _Preconditioner>\nstruct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >\n{\n  typedef _MatrixType MatrixType;\n  typedef _Preconditioner Preconditioner;\n};\n\n}\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief A conjugate gradient solver for sparse (or dense) self-adjoint problems\n  *\n  * This class allows to solve for A.x = b linear problems using an iterative conjugate gradient algorithm.\n  * The matrix A must be selfadjoint. The matrix A and the vectors x and b can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower,\n  *               \\c Upper, or \\c Lower|Upper in which the full matrix entries will be considered.\n  *               Default is \\c Lower, best performance is \\c Lower|Upper.\n  * \\tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner\n  *\n  * \\implsparsesolverconcept\n  *\n  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()\n  * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations\n  * and NumTraits<Scalar>::epsilon() for the tolerance.\n  * \n  * The tolerance corresponds to the relative residual error: |Ax-b|/|b|\n  * \n  * \\b Performance: Even though the default value of \\c _UpLo is \\c Lower, significantly higher performance is\n  * achieved when using a complete matrix and \\b Lower|Upper as the \\a _UpLo template parameter. Moreover, in this\n  * case multi-threading can be exploited if the user code is compiled with OpenMP enabled.\n  * See \\ref TopicMultiThreading for details.\n  * \n  * This class can be used as the direct solver classes. Here is a typical usage example:\n    \\code\n    int n = 10000;\n    VectorXd x(n), b(n);\n    SparseMatrix<double> A(n,n);\n    // fill A and b\n    ConjugateGradient<SparseMatrix<double>, Lower|Upper> cg;\n    cg.compute(A);\n    x = cg.solve(b);\n    std::cout << \"#iterations:     \" << cg.iterations() << std::endl;\n    std::cout << \"estimated error: \" << cg.error()      << std::endl;\n    // update b, and solve again\n    x = cg.solve(b);\n    \\endcode\n  * \n  * By default the iterations start with x=0 as an initial guess of the solution.\n  * One can control the start using the solveWithGuess() method.\n  * \n  * ConjugateGradient can also be used in a matrix-free context, see the following \\link MatrixfreeSolverExample example \\endlink.\n  *\n  * \\sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner\n  */\ntemplate< typename _MatrixType, int _UpLo, typename _Preconditioner>\nclass ConjugateGradient : public IterativeSolverBase<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >\n{\n  typedef IterativeSolverBase<ConjugateGradient> Base;\n  using Base::matrix;\n  using Base::m_error;\n  using Base::m_iterations;\n  using Base::m_info;\n  using Base::m_isInitialized;\npublic:\n  typedef _MatrixType MatrixType;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  typedef _Preconditioner Preconditioner;\n\n  enum {\n    UpLo = _UpLo\n  };\n\npublic:\n\n  /** Default constructor. */\n  ConjugateGradient() : Base() {}\n\n  /** Initialize the solver with matrix \\a A for further \\c Ax=b solving.\n    * \n    * This constructor is a shortcut for the default constructor followed\n    * by a call to compute().\n    * \n    * \\warning this class stores a reference to the matrix A as well as some\n    * precomputed values that depend on it. Therefore, if \\a A is changed\n    * this class becomes invalid. Call compute() to update it with the new\n    * matrix A, or modify a copy of A.\n    */\n  template<typename MatrixDerived>\n  explicit ConjugateGradient(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}\n\n  ~ConjugateGradient() {}\n\n  /** \\internal */\n  template<typename Rhs,typename Dest>\n  void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const\n  {\n    typedef typename Base::MatrixWrapper MatrixWrapper;\n    typedef typename Base::ActualMatrixType ActualMatrixType;\n    enum {\n      TransposeInput  =   (!MatrixWrapper::MatrixFree)\n                      &&  (UpLo==(Lower|Upper))\n                      &&  (!MatrixType::IsRowMajor)\n                      &&  (!NumTraits<Scalar>::IsComplex)\n    };\n    typedef typename internal::conditional<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&>::type RowMajorWrapper;\n    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY);\n    typedef typename internal::conditional<UpLo==(Lower|Upper),\n                                           RowMajorWrapper,\n                                           typename MatrixWrapper::template ConstSelfAdjointViewReturnType<UpLo>::Type\n                                          >::type SelfAdjointWrapper;\n\n    m_iterations = Base::maxIterations();\n    m_error = Base::m_tolerance;\n\n    RowMajorWrapper row_mat(matrix());\n    internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b, x, Base::m_preconditioner, m_iterations, m_error);\n    m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;\n  }\n\nprotected:\n\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_CONJUGATE_GRADIENT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_INCOMPLETE_CHOlESKY_H\n#define EIGEN_INCOMPLETE_CHOlESKY_H\n\n#include <vector>\n#include <list>\n\nnamespace Eigen {\n/**\n  * \\brief Modified Incomplete Cholesky with dual threshold\n  *\n  * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with\n  *              Limited memory, SIAM J. Sci. Comput.  21(1), pp. 24-45, 1999\n  *\n  * \\tparam Scalar the scalar type of the input matrices\n  * \\tparam _UpLo The triangular part that will be used for the computations. It can be Lower\n    *               or Upper. Default is Lower.\n  * \\tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<int>,\n  *                       unless EIGEN_MPL2_ONLY is defined, in which case the default is NaturalOrdering<int>.\n  *\n  * \\implsparsesolverconcept\n  *\n  * It performs the following incomplete factorization: \\f$ S P A P' S \\approx L L' \\f$\n  * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a\n  * fill-in reducing permutation as computed by the ordering method.\n  *\n  * \\b Shifting \\b strategy: Let \\f$ B = S P A P' S \\f$  be the scaled matrix on which the factorization is carried out,\n  * and \\f$ \\beta \\f$ be the minimum value of the diagonal. If \\f$ \\beta > 0 \\f$ then, the factorization is directly performed\n  * on the matrix B. Otherwise, the factorization is performed on the shifted matrix \\f$ B + (\\sigma+|\\beta| I \\f$ where\n  * \\f$ \\sigma \\f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \\f$ \\sigma = 10^{-3} \\f$.\n  * If the factorization fails, then the shift in doubled until it succeed or a maximum of ten attempts. If it still fails, as returned by\n  * the info() method, then you can either increase the initial shift, or better use another preconditioning technique.\n  *\n  */\ntemplate <typename Scalar, int _UpLo = Lower, typename _OrderingType = AMDOrdering<int> >\nclass IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> >\n{\n  protected:\n    typedef SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> > Base;\n    using Base::m_isInitialized;\n  public:\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef _OrderingType OrderingType;\n    typedef typename OrderingType::PermutationType PermutationType;\n    typedef typename PermutationType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> FactorType;\n    typedef Matrix<Scalar,Dynamic,1> VectorSx;\n    typedef Matrix<RealScalar,Dynamic,1> VectorRx;\n    typedef Matrix<StorageIndex,Dynamic, 1> VectorIx;\n    typedef std::vector<std::list<StorageIndex> > VectorList;\n    enum { UpLo = _UpLo };\n    enum {\n      ColsAtCompileTime = Dynamic,\n      MaxColsAtCompileTime = Dynamic\n    };\n  public:\n\n    /** Default constructor leaving the object in a partly non-initialized stage.\n      *\n      * You must call compute() or the pair analyzePattern()/factorize() to make it valid.\n      *\n      * \\sa IncompleteCholesky(const MatrixType&)\n      */\n    IncompleteCholesky() : m_initialShift(1e-3),m_analysisIsOk(false),m_factorizationIsOk(false) {}\n\n    /** Constructor computing the incomplete factorization for the given matrix \\a matrix.\n      */\n    template<typename MatrixType>\n    IncompleteCholesky(const MatrixType& matrix) : m_initialShift(1e-3),m_analysisIsOk(false),m_factorizationIsOk(false)\n    {\n      compute(matrix);\n    }\n\n    /** \\returns number of rows of the factored matrix */\n    EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_L.rows(); }\n\n    /** \\returns number of columns of the factored matrix */\n    EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_L.cols(); }\n\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * It triggers an assertion if \\c *this has not been initialized through the respective constructor,\n      * or a call to compute() or analyzePattern().\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"IncompleteCholesky is not initialized.\");\n      return m_info;\n    }\n\n    /** \\brief Set the initial shift parameter \\f$ \\sigma \\f$.\n      */\n    void setInitialShift(RealScalar shift) { m_initialShift = shift; }\n\n    /** \\brief Computes the fill reducing permutation vector using the sparsity pattern of \\a mat\n      */\n    template<typename MatrixType>\n    void analyzePattern(const MatrixType& mat)\n    {\n      OrderingType ord;\n      PermutationType pinv;\n      ord(mat.template selfadjointView<UpLo>(), pinv);\n      if(pinv.size()>0) m_perm = pinv.inverse();\n      else              m_perm.resize(0);\n      m_L.resize(mat.rows(), mat.cols());\n      m_analysisIsOk = true;\n      m_isInitialized = true;\n      m_info = Success;\n    }\n\n    /** \\brief Performs the numerical factorization of the input matrix \\a mat\n      *\n      * The method analyzePattern() or compute() must have been called beforehand\n      * with a matrix having the same pattern.\n      *\n      * \\sa compute(), analyzePattern()\n      */\n    template<typename MatrixType>\n    void factorize(const MatrixType& mat);\n\n    /** Computes or re-computes the incomplete Cholesky factorization of the input matrix \\a mat\n      *\n      * It is a shortcut for a sequential call to the analyzePattern() and factorize() methods.\n      *\n      * \\sa analyzePattern(), factorize()\n      */\n    template<typename MatrixType>\n    void compute(const MatrixType& mat)\n    {\n      analyzePattern(mat);\n      factorize(mat);\n    }\n\n    // internal\n    template<typename Rhs, typename Dest>\n    void _solve_impl(const Rhs& b, Dest& x) const\n    {\n      eigen_assert(m_factorizationIsOk && \"factorize() should be called first\");\n      if (m_perm.rows() == b.rows())  x = m_perm * b;\n      else                            x = b;\n      x = m_scale.asDiagonal() * x;\n      x = m_L.template triangularView<Lower>().solve(x);\n      x = m_L.adjoint().template triangularView<Upper>().solve(x);\n      x = m_scale.asDiagonal() * x;\n      if (m_perm.rows() == b.rows())\n        x = m_perm.inverse() * x;\n    }\n\n    /** \\returns the sparse lower triangular factor L */\n    const FactorType& matrixL() const { eigen_assert(\"m_factorizationIsOk\"); return m_L; }\n\n    /** \\returns a vector representing the scaling factor S */\n    const VectorRx& scalingS() const { eigen_assert(\"m_factorizationIsOk\"); return m_scale; }\n\n    /** \\returns the fill-in reducing permutation P (can be empty for a natural ordering) */\n    const PermutationType& permutationP() const { eigen_assert(\"m_analysisIsOk\"); return m_perm; }\n\n  protected:\n    FactorType m_L;              // The lower part stored in CSC\n    VectorRx m_scale;            // The vector for scaling the matrix\n    RealScalar m_initialShift;   // The initial shift parameter\n    bool m_analysisIsOk;\n    bool m_factorizationIsOk;\n    ComputationInfo m_info;\n    PermutationType m_perm;\n\n  private:\n    inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol);\n};\n\n// Based on the following paper:\n//   C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with\n//   Limited memory, SIAM J. Sci. Comput.  21(1), pp. 24-45, 1999\n//   http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf\ntemplate<typename Scalar, int _UpLo, typename OrderingType>\ntemplate<typename _MatrixType>\nvoid IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)\n{\n  using std::sqrt;\n  eigen_assert(m_analysisIsOk && \"analyzePattern() should be called first\");\n\n  // Dropping strategy : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added\n\n  // Apply the fill-reducing permutation computed in analyzePattern()\n  if (m_perm.rows() == mat.rows() ) // To detect the null permutation\n  {\n    // The temporary is needed to make sure that the diagonal entry is properly sorted\n    FactorType tmp(mat.rows(), mat.cols());\n    tmp = mat.template selfadjointView<_UpLo>().twistedBy(m_perm);\n    m_L.template selfadjointView<Lower>() = tmp.template selfadjointView<Lower>();\n  }\n  else\n  {\n    m_L.template selfadjointView<Lower>() = mat.template selfadjointView<_UpLo>();\n  }\n\n  Index n = m_L.cols();\n  Index nnz = m_L.nonZeros();\n  Map<VectorSx> vals(m_L.valuePtr(), nnz);         //values\n  Map<VectorIx> rowIdx(m_L.innerIndexPtr(), nnz);  //Row indices\n  Map<VectorIx> colPtr( m_L.outerIndexPtr(), n+1); // Pointer to the beginning of each row\n  VectorIx firstElt(n-1); // for each j, points to the next entry in vals that will be used in the factorization\n  VectorList listCol(n);  // listCol(j) is a linked list of columns to update column j\n  VectorSx col_vals(n);   // Store a  nonzero values in each column\n  VectorIx col_irow(n);   // Row indices of nonzero elements in each column\n  VectorIx col_pattern(n);\n  col_pattern.fill(-1);\n  StorageIndex col_nnz;\n\n\n  // Computes the scaling factors\n  m_scale.resize(n);\n  m_scale.setZero();\n  for (Index j = 0; j < n; j++)\n    for (Index k = colPtr[j]; k < colPtr[j+1]; k++)\n    {\n      m_scale(j) += numext::abs2(vals(k));\n      if(rowIdx[k]!=j)\n        m_scale(rowIdx[k]) += numext::abs2(vals(k));\n    }\n\n  m_scale = m_scale.cwiseSqrt().cwiseSqrt();\n\n  for (Index j = 0; j < n; ++j)\n    if(m_scale(j)>(std::numeric_limits<RealScalar>::min)())\n      m_scale(j) = RealScalar(1)/m_scale(j);\n    else\n      m_scale(j) = 1;\n\n  // TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)\n\n  // Scale and compute the shift for the matrix\n  RealScalar mindiag = NumTraits<RealScalar>::highest();\n  for (Index j = 0; j < n; j++)\n  {\n    for (Index k = colPtr[j]; k < colPtr[j+1]; k++)\n      vals[k] *= (m_scale(j)*m_scale(rowIdx[k]));\n    eigen_internal_assert(rowIdx[colPtr[j]]==j && \"IncompleteCholesky: only the lower triangular part must be stored\");\n    mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag);\n  }\n\n  FactorType L_save = m_L;\n\n  RealScalar shift = 0;\n  if(mindiag <= RealScalar(0.))\n    shift = m_initialShift - mindiag;\n\n  m_info = NumericalIssue;\n\n  // Try to perform the incomplete factorization using the current shift\n  int iter = 0;\n  do\n  {\n    // Apply the shift to the diagonal elements of the matrix\n    for (Index j = 0; j < n; j++)\n      vals[colPtr[j]] += shift;\n\n    // jki version of the Cholesky factorization\n    Index j=0;\n    for (; j < n; ++j)\n    {\n      // Left-looking factorization of the j-th column\n      // First, load the j-th column into col_vals\n      Scalar diag = vals[colPtr[j]];  // It is assumed that only the lower part is stored\n      col_nnz = 0;\n      for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++)\n      {\n        StorageIndex l = rowIdx[i];\n        col_vals(col_nnz) = vals[i];\n        col_irow(col_nnz) = l;\n        col_pattern(l) = col_nnz;\n        col_nnz++;\n      }\n      {\n        typename std::list<StorageIndex>::iterator k;\n        // Browse all previous columns that will update column j\n        for(k = listCol[j].begin(); k != listCol[j].end(); k++)\n        {\n          Index jk = firstElt(*k); // First element to use in the column\n          eigen_internal_assert(rowIdx[jk]==j);\n          Scalar v_j_jk = numext::conj(vals[jk]);\n\n          jk += 1;\n          for (Index i = jk; i < colPtr[*k+1]; i++)\n          {\n            StorageIndex l = rowIdx[i];\n            if(col_pattern[l]<0)\n            {\n              col_vals(col_nnz) = vals[i] * v_j_jk;\n              col_irow[col_nnz] = l;\n              col_pattern(l) = col_nnz;\n              col_nnz++;\n            }\n            else\n              col_vals(col_pattern[l]) -= vals[i] * v_j_jk;\n          }\n          updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);\n        }\n      }\n\n      // Scale the current column\n      if(numext::real(diag) <= 0)\n      {\n        if(++iter>=10)\n          return;\n\n        // increase shift\n        shift = numext::maxi(m_initialShift,RealScalar(2)*shift);\n        // restore m_L, col_pattern, and listCol\n        vals = Map<const VectorSx>(L_save.valuePtr(), nnz);\n        rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);\n        colPtr = Map<const VectorIx>(L_save.outerIndexPtr(), n+1);\n        col_pattern.fill(-1);\n        for(Index i=0; i<n; ++i)\n          listCol[i].clear();\n\n        break;\n      }\n\n      RealScalar rdiag = sqrt(numext::real(diag));\n      vals[colPtr[j]] = rdiag;\n      for (Index k = 0; k<col_nnz; ++k)\n      {\n        Index i = col_irow[k];\n        //Scale\n        col_vals(k) /= rdiag;\n        //Update the remaining diagonals with col_vals\n        vals[colPtr[i]] -= numext::abs2(col_vals(k));\n      }\n      // Select the largest p elements\n      // p is the original number of elements in the column (without the diagonal)\n      Index p = colPtr[j+1] - colPtr[j] - 1 ;\n      Ref<VectorSx> cvals = col_vals.head(col_nnz);\n      Ref<VectorIx> cirow = col_irow.head(col_nnz);\n      internal::QuickSplit(cvals,cirow, p);\n      // Insert the largest p elements in the matrix\n      Index cpt = 0;\n      for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)\n      {\n        vals[i] = col_vals(cpt);\n        rowIdx[i] = col_irow(cpt);\n        // restore col_pattern:\n        col_pattern(col_irow(cpt)) = -1;\n        cpt++;\n      }\n      // Get the first smallest row index and put it after the diagonal element\n      Index jk = colPtr(j)+1;\n      updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);\n    }\n\n    if(j==n)\n    {\n      m_factorizationIsOk = true;\n      m_info = Success;\n    }\n  } while(m_info!=Success);\n}\n\ntemplate<typename Scalar, int _UpLo, typename OrderingType>\ninline void IncompleteCholesky<Scalar,_UpLo, OrderingType>::updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol)\n{\n  if (jk < colPtr(col+1) )\n  {\n    Index p = colPtr(col+1) - jk;\n    Index minpos;\n    rowIdx.segment(jk,p).minCoeff(&minpos);\n    minpos += jk;\n    if (rowIdx(minpos) != rowIdx(jk))\n    {\n      //Swap\n      std::swap(rowIdx(jk),rowIdx(minpos));\n      std::swap(vals(jk),vals(minpos));\n    }\n    firstElt(col) = internal::convert_index<StorageIndex,Index>(jk);\n    listCol[rowIdx(jk)].push_back(internal::convert_index<StorageIndex,Index>(col));\n  }\n}\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_INCOMPLETE_LUT_H\n#define EIGEN_INCOMPLETE_LUT_H\n\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** \\internal\n  * Compute a quick-sort split of a vector\n  * On output, the vector row is permuted such that its elements satisfy\n  * abs(row(i)) >= abs(row(ncut)) if i<ncut\n  * abs(row(i)) <= abs(row(ncut)) if i>ncut\n  * \\param row The vector of values\n  * \\param ind The array of index for the elements in @p row\n  * \\param ncut  The number of largest elements to keep\n  **/\ntemplate <typename VectorV, typename VectorI>\nIndex QuickSplit(VectorV &row, VectorI &ind, Index ncut)\n{\n  typedef typename VectorV::RealScalar RealScalar;\n  using std::swap;\n  using std::abs;\n  Index mid;\n  Index n = row.size(); /* length of the vector */\n  Index first, last ;\n\n  ncut--; /* to fit the zero-based indices */\n  first = 0;\n  last = n-1;\n  if (ncut < first || ncut > last ) return 0;\n\n  do {\n    mid = first;\n    RealScalar abskey = abs(row(mid));\n    for (Index j = first + 1; j <= last; j++) {\n      if ( abs(row(j)) > abskey) {\n        ++mid;\n        swap(row(mid), row(j));\n        swap(ind(mid), ind(j));\n      }\n    }\n    /* Interchange for the pivot element */\n    swap(row(mid), row(first));\n    swap(ind(mid), ind(first));\n\n    if (mid > ncut) last = mid - 1;\n    else if (mid < ncut ) first = mid + 1;\n  } while (mid != ncut );\n\n  return 0; /* mid is equal to ncut */\n}\n\n}// end namespace internal\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\class IncompleteLUT\n  * \\brief Incomplete LU factorization with dual-threshold strategy\n  *\n  * \\implsparsesolverconcept\n  *\n  * During the numerical factorization, two dropping rules are used :\n  *  1) any element whose magnitude is less than some tolerance is dropped.\n  *    This tolerance is obtained by multiplying the input tolerance @p droptol\n  *    by the average magnitude of all the original elements in the current row.\n  *  2) After the elimination of the row, only the @p fill largest elements in\n  *    the L part and the @p fill largest elements in the U part are kept\n  *    (in addition to the diagonal element ). Note that @p fill is computed from\n  *    the input parameter @p fillfactor which is used the ratio to control the fill_in\n  *    relatively to the initial number of nonzero elements.\n  *\n  * The two extreme cases are when @p droptol=0 (to keep all the @p fill*2 largest elements)\n  * and when @p fill=n/2 with @p droptol being different to zero.\n  *\n  * References : Yousef Saad, ILUT: A dual threshold incomplete LU factorization,\n  *              Numerical Linear Algebra with Applications, 1(4), pp 387-402, 1994.\n  *\n  * NOTE : The following implementation is derived from the ILUT implementation\n  * in the SPARSKIT package, Copyright (C) 2005, the Regents of the University of Minnesota\n  *  released under the terms of the GNU LGPL:\n  *    http://www-users.cs.umn.edu/~saad/software/SPARSKIT/README\n  * However, Yousef Saad gave us permission to relicense his ILUT code to MPL2.\n  * See the Eigen mailing list archive, thread: ILUT, date: July 8, 2012:\n  *   http://listengine.tuxfamily.org/lists.tuxfamily.org/eigen/2012/07/msg00064.html\n  * alternatively, on GMANE:\n  *   http://comments.gmane.org/gmane.comp.lib.eigen/3302\n  */\ntemplate <typename _Scalar, typename _StorageIndex = int>\nclass IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar, _StorageIndex> >\n{\n  protected:\n    typedef SparseSolverBase<IncompleteLUT> Base;\n    using Base::m_isInitialized;\n  public:\n    typedef _Scalar Scalar;\n    typedef _StorageIndex StorageIndex;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n    typedef Matrix<Scalar,Dynamic,1> Vector;\n    typedef Matrix<StorageIndex,Dynamic,1> VectorI;\n    typedef SparseMatrix<Scalar,RowMajor,StorageIndex> FactorType;\n\n    enum {\n      ColsAtCompileTime = Dynamic,\n      MaxColsAtCompileTime = Dynamic\n    };\n\n  public:\n\n    IncompleteLUT()\n      : m_droptol(NumTraits<Scalar>::dummy_precision()), m_fillfactor(10),\n        m_analysisIsOk(false), m_factorizationIsOk(false)\n    {}\n\n    template<typename MatrixType>\n    explicit IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)\n      : m_droptol(droptol),m_fillfactor(fillfactor),\n        m_analysisIsOk(false),m_factorizationIsOk(false)\n    {\n      eigen_assert(fillfactor != 0);\n      compute(mat);\n    }\n\n    EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); }\n\n    EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); }\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"IncompleteLUT is not initialized.\");\n      return m_info;\n    }\n\n    template<typename MatrixType>\n    void analyzePattern(const MatrixType& amat);\n\n    template<typename MatrixType>\n    void factorize(const MatrixType& amat);\n\n    /**\n      * Compute an incomplete LU factorization with dual threshold on the matrix mat\n      * No pivoting is done in this version\n      *\n      **/\n    template<typename MatrixType>\n    IncompleteLUT& compute(const MatrixType& amat)\n    {\n      analyzePattern(amat);\n      factorize(amat);\n      return *this;\n    }\n\n    void setDroptol(const RealScalar& droptol);\n    void setFillfactor(int fillfactor);\n\n    template<typename Rhs, typename Dest>\n    void _solve_impl(const Rhs& b, Dest& x) const\n    {\n      x = m_Pinv * b;\n      x = m_lu.template triangularView<UnitLower>().solve(x);\n      x = m_lu.template triangularView<Upper>().solve(x);\n      x = m_P * x;\n    }\n\nprotected:\n\n    /** keeps off-diagonal entries; drops diagonal entries */\n    struct keep_diag {\n      inline bool operator() (const Index& row, const Index& col, const Scalar&) const\n      {\n        return row!=col;\n      }\n    };\n\nprotected:\n\n    FactorType m_lu;\n    RealScalar m_droptol;\n    int m_fillfactor;\n    bool m_analysisIsOk;\n    bool m_factorizationIsOk;\n    ComputationInfo m_info;\n    PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_P;     // Fill-reducing permutation\n    PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_Pinv;  // Inverse permutation\n};\n\n/**\n * Set control parameter droptol\n *  \\param droptol   Drop any element whose magnitude is less than this tolerance\n **/\ntemplate<typename Scalar, typename StorageIndex>\nvoid IncompleteLUT<Scalar,StorageIndex>::setDroptol(const RealScalar& droptol)\n{\n  this->m_droptol = droptol;\n}\n\n/**\n * Set control parameter fillfactor\n * \\param fillfactor  This is used to compute the  number @p fill_in of largest elements to keep on each row.\n **/\ntemplate<typename Scalar, typename StorageIndex>\nvoid IncompleteLUT<Scalar,StorageIndex>::setFillfactor(int fillfactor)\n{\n  this->m_fillfactor = fillfactor;\n}\n\ntemplate <typename Scalar, typename StorageIndex>\ntemplate<typename _MatrixType>\nvoid IncompleteLUT<Scalar,StorageIndex>::analyzePattern(const _MatrixType& amat)\n{\n  // Compute the Fill-reducing permutation\n  // Since ILUT does not perform any numerical pivoting,\n  // it is highly preferable to keep the diagonal through symmetric permutations.\n  // To this end, let's symmetrize the pattern and perform AMD on it.\n  SparseMatrix<Scalar,ColMajor, StorageIndex> mat1 = amat;\n  SparseMatrix<Scalar,ColMajor, StorageIndex> mat2 = amat.transpose();\n  // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice.\n  //       on the other hand for a really non-symmetric pattern, mat2*mat1 should be preferred...\n  SparseMatrix<Scalar,ColMajor, StorageIndex> AtA = mat2 + mat1;\n  AMDOrdering<StorageIndex> ordering;\n  ordering(AtA,m_P);\n  m_Pinv  = m_P.inverse(); // cache the inverse permutation\n  m_analysisIsOk = true;\n  m_factorizationIsOk = false;\n  m_isInitialized = true;\n}\n\ntemplate <typename Scalar, typename StorageIndex>\ntemplate<typename _MatrixType>\nvoid IncompleteLUT<Scalar,StorageIndex>::factorize(const _MatrixType& amat)\n{\n  using std::sqrt;\n  using std::swap;\n  using std::abs;\n  using internal::convert_index;\n\n  eigen_assert((amat.rows() == amat.cols()) && \"The factorization should be done on a square matrix\");\n  Index n = amat.cols();  // Size of the matrix\n  m_lu.resize(n,n);\n  // Declare Working vectors and variables\n  Vector u(n) ;     // real values of the row -- maximum size is n --\n  VectorI ju(n);   // column position of the values in u -- maximum size  is n\n  VectorI jr(n);   // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1\n\n  // Apply the fill-reducing permutation\n  eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n  SparseMatrix<Scalar,RowMajor, StorageIndex> mat;\n  mat = amat.twistedBy(m_Pinv);\n\n  // Initialization\n  jr.fill(-1);\n  ju.fill(0);\n  u.fill(0);\n\n  // number of largest elements to keep in each row:\n  Index fill_in = (amat.nonZeros()*m_fillfactor)/n + 1;\n  if (fill_in > n) fill_in = n;\n\n  // number of largest nonzero elements to keep in the L and the U part of the current row:\n  Index nnzL = fill_in/2;\n  Index nnzU = nnzL;\n  m_lu.reserve(n * (nnzL + nnzU + 1));\n\n  // global loop over the rows of the sparse matrix\n  for (Index ii = 0; ii < n; ii++)\n  {\n    // 1 - copy the lower and the upper part of the row i of mat in the working vector u\n\n    Index sizeu = 1; // number of nonzero elements in the upper part of the current row\n    Index sizel = 0; // number of nonzero elements in the lower part of the current row\n    ju(ii)    = convert_index<StorageIndex>(ii);\n    u(ii)     = 0;\n    jr(ii)    = convert_index<StorageIndex>(ii);\n    RealScalar rownorm = 0;\n\n    typename FactorType::InnerIterator j_it(mat, ii); // Iterate through the current row ii\n    for (; j_it; ++j_it)\n    {\n      Index k = j_it.index();\n      if (k < ii)\n      {\n        // copy the lower part\n        ju(sizel) = convert_index<StorageIndex>(k);\n        u(sizel) = j_it.value();\n        jr(k) = convert_index<StorageIndex>(sizel);\n        ++sizel;\n      }\n      else if (k == ii)\n      {\n        u(ii) = j_it.value();\n      }\n      else\n      {\n        // copy the upper part\n        Index jpos = ii + sizeu;\n        ju(jpos) = convert_index<StorageIndex>(k);\n        u(jpos) = j_it.value();\n        jr(k) = convert_index<StorageIndex>(jpos);\n        ++sizeu;\n      }\n      rownorm += numext::abs2(j_it.value());\n    }\n\n    // 2 - detect possible zero row\n    if(rownorm==0)\n    {\n      m_info = NumericalIssue;\n      return;\n    }\n    // Take the 2-norm of the current row as a relative tolerance\n    rownorm = sqrt(rownorm);\n\n    // 3 - eliminate the previous nonzero rows\n    Index jj = 0;\n    Index len = 0;\n    while (jj < sizel)\n    {\n      // In order to eliminate in the correct order,\n      // we must select first the smallest column index among  ju(jj:sizel)\n      Index k;\n      Index minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment\n      k += jj;\n      if (minrow != ju(jj))\n      {\n        // swap the two locations\n        Index j = ju(jj);\n        swap(ju(jj), ju(k));\n        jr(minrow) = convert_index<StorageIndex>(jj);\n        jr(j) = convert_index<StorageIndex>(k);\n        swap(u(jj), u(k));\n      }\n      // Reset this location\n      jr(minrow) = -1;\n\n      // Start elimination\n      typename FactorType::InnerIterator ki_it(m_lu, minrow);\n      while (ki_it && ki_it.index() < minrow) ++ki_it;\n      eigen_internal_assert(ki_it && ki_it.col()==minrow);\n      Scalar fact = u(jj) / ki_it.value();\n\n      // drop too small elements\n      if(abs(fact) <= m_droptol)\n      {\n        jj++;\n        continue;\n      }\n\n      // linear combination of the current row ii and the row minrow\n      ++ki_it;\n      for (; ki_it; ++ki_it)\n      {\n        Scalar prod = fact * ki_it.value();\n        Index j     = ki_it.index();\n        Index jpos  = jr(j);\n        if (jpos == -1) // fill-in element\n        {\n          Index newpos;\n          if (j >= ii) // dealing with the upper part\n          {\n            newpos = ii + sizeu;\n            sizeu++;\n            eigen_internal_assert(sizeu<=n);\n          }\n          else // dealing with the lower part\n          {\n            newpos = sizel;\n            sizel++;\n            eigen_internal_assert(sizel<=ii);\n          }\n          ju(newpos) = convert_index<StorageIndex>(j);\n          u(newpos) = -prod;\n          jr(j) = convert_index<StorageIndex>(newpos);\n        }\n        else\n          u(jpos) -= prod;\n      }\n      // store the pivot element\n      u(len)  = fact;\n      ju(len) = convert_index<StorageIndex>(minrow);\n      ++len;\n\n      jj++;\n    } // end of the elimination on the row ii\n\n    // reset the upper part of the pointer jr to zero\n    for(Index k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;\n\n    // 4 - partially sort and insert the elements in the m_lu matrix\n\n    // sort the L-part of the row\n    sizel = len;\n    len = (std::min)(sizel, nnzL);\n    typename Vector::SegmentReturnType ul(u.segment(0, sizel));\n    typename VectorI::SegmentReturnType jul(ju.segment(0, sizel));\n    internal::QuickSplit(ul, jul, len);\n\n    // store the largest m_fill elements of the L part\n    m_lu.startVec(ii);\n    for(Index k = 0; k < len; k++)\n      m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);\n\n    // store the diagonal element\n    // apply a shifting rule to avoid zero pivots (we are doing an incomplete factorization)\n    if (u(ii) == Scalar(0))\n      u(ii) = sqrt(m_droptol) * rownorm;\n    m_lu.insertBackByOuterInnerUnordered(ii, ii) = u(ii);\n\n    // sort the U-part of the row\n    // apply the dropping rule first\n    len = 0;\n    for(Index k = 1; k < sizeu; k++)\n    {\n      if(abs(u(ii+k)) > m_droptol * rownorm )\n      {\n        ++len;\n        u(ii + len)  = u(ii + k);\n        ju(ii + len) = ju(ii + k);\n      }\n    }\n    sizeu = len + 1; // +1 to take into account the diagonal element\n    len = (std::min)(sizeu, nnzU);\n    typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1));\n    typename VectorI::SegmentReturnType juu(ju.segment(ii+1, sizeu-1));\n    internal::QuickSplit(uu, juu, len);\n\n    // store the largest elements of the U part\n    for(Index k = ii + 1; k < ii + len; k++)\n      m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);\n  }\n  m_lu.finalize();\n  m_lu.makeCompressed();\n\n  m_factorizationIsOk = true;\n  m_info = Success;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_INCOMPLETE_LUT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ITERATIVE_SOLVER_BASE_H\n#define EIGEN_ITERATIVE_SOLVER_BASE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename MatrixType>\nstruct is_ref_compatible_impl\n{\nprivate:\n  template <typename T0>\n  struct any_conversion\n  {\n    template <typename T> any_conversion(const volatile T&);\n    template <typename T> any_conversion(T&);\n  };\n  struct yes {int a[1];};\n  struct no  {int a[2];};\n\n  template<typename T>\n  static yes test(const Ref<const T>&, int);\n  template<typename T>\n  static no  test(any_conversion<T>, ...);\n\npublic:\n  static MatrixType ms_from;\n  enum { value = sizeof(test<MatrixType>(ms_from, 0))==sizeof(yes) };\n};\n\ntemplate<typename MatrixType>\nstruct is_ref_compatible\n{\n  enum { value = is_ref_compatible_impl<typename remove_all<MatrixType>::type>::value };\n};\n\ntemplate<typename MatrixType, bool MatrixFree = !internal::is_ref_compatible<MatrixType>::value>\nclass generic_matrix_wrapper;\n\n// We have an explicit matrix at hand, compatible with Ref<>\ntemplate<typename MatrixType>\nclass generic_matrix_wrapper<MatrixType,false>\n{\npublic:\n  typedef Ref<const MatrixType> ActualMatrixType;\n  template<int UpLo> struct ConstSelfAdjointViewReturnType {\n    typedef typename ActualMatrixType::template ConstSelfAdjointViewReturnType<UpLo>::Type Type;\n  };\n\n  enum {\n    MatrixFree = false\n  };\n\n  generic_matrix_wrapper()\n    : m_dummy(0,0), m_matrix(m_dummy)\n  {}\n\n  template<typename InputType>\n  generic_matrix_wrapper(const InputType &mat)\n    : m_matrix(mat)\n  {}\n\n  const ActualMatrixType& matrix() const\n  {\n    return m_matrix;\n  }\n\n  template<typename MatrixDerived>\n  void grab(const EigenBase<MatrixDerived> &mat)\n  {\n    m_matrix.~Ref<const MatrixType>();\n    ::new (&m_matrix) Ref<const MatrixType>(mat.derived());\n  }\n\n  void grab(const Ref<const MatrixType> &mat)\n  {\n    if(&(mat.derived()) != &m_matrix)\n    {\n      m_matrix.~Ref<const MatrixType>();\n      ::new (&m_matrix) Ref<const MatrixType>(mat);\n    }\n  }\n\nprotected:\n  MatrixType m_dummy; // used to default initialize the Ref<> object\n  ActualMatrixType m_matrix;\n};\n\n// MatrixType is not compatible with Ref<> -> matrix-free wrapper\ntemplate<typename MatrixType>\nclass generic_matrix_wrapper<MatrixType,true>\n{\npublic:\n  typedef MatrixType ActualMatrixType;\n  template<int UpLo> struct ConstSelfAdjointViewReturnType\n  {\n    typedef ActualMatrixType Type;\n  };\n\n  enum {\n    MatrixFree = true\n  };\n\n  generic_matrix_wrapper()\n    : mp_matrix(0)\n  {}\n\n  generic_matrix_wrapper(const MatrixType &mat)\n    : mp_matrix(&mat)\n  {}\n\n  const ActualMatrixType& matrix() const\n  {\n    return *mp_matrix;\n  }\n\n  void grab(const MatrixType &mat)\n  {\n    mp_matrix = &mat;\n  }\n\nprotected:\n  const ActualMatrixType *mp_matrix;\n};\n\n}\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief Base class for linear iterative solvers\n  *\n  * \\sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner\n  */\ntemplate< typename Derived>\nclass IterativeSolverBase : public SparseSolverBase<Derived>\n{\nprotected:\n  typedef SparseSolverBase<Derived> Base;\n  using Base::m_isInitialized;\n\npublic:\n  typedef typename internal::traits<Derived>::MatrixType MatrixType;\n  typedef typename internal::traits<Derived>::Preconditioner Preconditioner;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::StorageIndex StorageIndex;\n  typedef typename MatrixType::RealScalar RealScalar;\n\n  enum {\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n  };\n\npublic:\n\n  using Base::derived;\n\n  /** Default constructor. */\n  IterativeSolverBase()\n  {\n    init();\n  }\n\n  /** Initialize the solver with matrix \\a A for further \\c Ax=b solving.\n    *\n    * This constructor is a shortcut for the default constructor followed\n    * by a call to compute().\n    *\n    * \\warning this class stores a reference to the matrix A as well as some\n    * precomputed values that depend on it. Therefore, if \\a A is changed\n    * this class becomes invalid. Call compute() to update it with the new\n    * matrix A, or modify a copy of A.\n    */\n  template<typename MatrixDerived>\n  explicit IterativeSolverBase(const EigenBase<MatrixDerived>& A)\n    : m_matrixWrapper(A.derived())\n  {\n    init();\n    compute(matrix());\n  }\n\n  ~IterativeSolverBase() {}\n\n  /** Initializes the iterative solver for the sparsity pattern of the matrix \\a A for further solving \\c Ax=b problems.\n    *\n    * Currently, this function mostly calls analyzePattern on the preconditioner. In the future\n    * we might, for instance, implement column reordering for faster matrix vector products.\n    */\n  template<typename MatrixDerived>\n  Derived& analyzePattern(const EigenBase<MatrixDerived>& A)\n  {\n    grab(A.derived());\n    m_preconditioner.analyzePattern(matrix());\n    m_isInitialized = true;\n    m_analysisIsOk = true;\n    m_info = m_preconditioner.info();\n    return derived();\n  }\n\n  /** Initializes the iterative solver with the numerical values of the matrix \\a A for further solving \\c Ax=b problems.\n    *\n    * Currently, this function mostly calls factorize on the preconditioner.\n    *\n    * \\warning this class stores a reference to the matrix A as well as some\n    * precomputed values that depend on it. Therefore, if \\a A is changed\n    * this class becomes invalid. Call compute() to update it with the new\n    * matrix A, or modify a copy of A.\n    */\n  template<typename MatrixDerived>\n  Derived& factorize(const EigenBase<MatrixDerived>& A)\n  {\n    eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n    grab(A.derived());\n    m_preconditioner.factorize(matrix());\n    m_factorizationIsOk = true;\n    m_info = m_preconditioner.info();\n    return derived();\n  }\n\n  /** Initializes the iterative solver with the matrix \\a A for further solving \\c Ax=b problems.\n    *\n    * Currently, this function mostly initializes/computes the preconditioner. In the future\n    * we might, for instance, implement column reordering for faster matrix vector products.\n    *\n    * \\warning this class stores a reference to the matrix A as well as some\n    * precomputed values that depend on it. Therefore, if \\a A is changed\n    * this class becomes invalid. Call compute() to update it with the new\n    * matrix A, or modify a copy of A.\n    */\n  template<typename MatrixDerived>\n  Derived& compute(const EigenBase<MatrixDerived>& A)\n  {\n    grab(A.derived());\n    m_preconditioner.compute(matrix());\n    m_isInitialized = true;\n    m_analysisIsOk = true;\n    m_factorizationIsOk = true;\n    m_info = m_preconditioner.info();\n    return derived();\n  }\n\n  /** \\internal */\n  EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return matrix().rows(); }\n\n  /** \\internal */\n  EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return matrix().cols(); }\n\n  /** \\returns the tolerance threshold used by the stopping criteria.\n    * \\sa setTolerance()\n    */\n  RealScalar tolerance() const { return m_tolerance; }\n\n  /** Sets the tolerance threshold used by the stopping criteria.\n    *\n    * This value is used as an upper bound to the relative residual error: |Ax-b|/|b|.\n    * The default value is the machine precision given by NumTraits<Scalar>::epsilon()\n    */\n  Derived& setTolerance(const RealScalar& tolerance)\n  {\n    m_tolerance = tolerance;\n    return derived();\n  }\n\n  /** \\returns a read-write reference to the preconditioner for custom configuration. */\n  Preconditioner& preconditioner() { return m_preconditioner; }\n\n  /** \\returns a read-only reference to the preconditioner. */\n  const Preconditioner& preconditioner() const { return m_preconditioner; }\n\n  /** \\returns the max number of iterations.\n    * It is either the value set by setMaxIterations or, by default,\n    * twice the number of columns of the matrix.\n    */\n  Index maxIterations() const\n  {\n    return (m_maxIterations<0) ? 2*matrix().cols() : m_maxIterations;\n  }\n\n  /** Sets the max number of iterations.\n    * Default is twice the number of columns of the matrix.\n    */\n  Derived& setMaxIterations(Index maxIters)\n  {\n    m_maxIterations = maxIters;\n    return derived();\n  }\n\n  /** \\returns the number of iterations performed during the last solve */\n  Index iterations() const\n  {\n    eigen_assert(m_isInitialized && \"ConjugateGradient is not initialized.\");\n    return m_iterations;\n  }\n\n  /** \\returns the tolerance error reached during the last solve.\n    * It is a close approximation of the true relative residual error |Ax-b|/|b|.\n    */\n  RealScalar error() const\n  {\n    eigen_assert(m_isInitialized && \"ConjugateGradient is not initialized.\");\n    return m_error;\n  }\n\n  /** \\returns the solution x of \\f$ A x = b \\f$ using the current decomposition of A\n    * and \\a x0 as an initial solution.\n    *\n    * \\sa solve(), compute()\n    */\n  template<typename Rhs,typename Guess>\n  inline const SolveWithGuess<Derived, Rhs, Guess>\n  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const\n  {\n    eigen_assert(m_isInitialized && \"Solver is not initialized.\");\n    eigen_assert(derived().rows()==b.rows() && \"solve(): invalid number of rows of the right hand side matrix b\");\n    return SolveWithGuess<Derived, Rhs, Guess>(derived(), b.derived(), x0);\n  }\n\n  /** \\returns Success if the iterations converged, and NoConvergence otherwise. */\n  ComputationInfo info() const\n  {\n    eigen_assert(m_isInitialized && \"IterativeSolverBase is not initialized.\");\n    return m_info;\n  }\n\n  /** \\internal */\n  template<typename Rhs, typename DestDerived>\n  void _solve_with_guess_impl(const Rhs& b, SparseMatrixBase<DestDerived> &aDest) const\n  {\n    eigen_assert(rows()==b.rows());\n\n    Index rhsCols = b.cols();\n    Index size = b.rows();\n    DestDerived& dest(aDest.derived());\n    typedef typename DestDerived::Scalar DestScalar;\n    Eigen::Matrix<DestScalar,Dynamic,1> tb(size);\n    Eigen::Matrix<DestScalar,Dynamic,1> tx(cols());\n    // We do not directly fill dest because sparse expressions have to be free of aliasing issue.\n    // For non square least-square problems, b and dest might not have the same size whereas they might alias each-other.\n    typename DestDerived::PlainObject tmp(cols(),rhsCols);\n    ComputationInfo global_info = Success;\n    for(Index k=0; k<rhsCols; ++k)\n    {\n      tb = b.col(k);\n      tx = dest.col(k);\n      derived()._solve_vector_with_guess_impl(tb,tx);\n      tmp.col(k) = tx.sparseView(0);\n\n      // The call to _solve_vector_with_guess_impl updates m_info, so if it failed for a previous column\n      // we need to restore it to the worst value.\n      if(m_info==NumericalIssue)\n        global_info = NumericalIssue;\n      else if(m_info==NoConvergence)\n        global_info = NoConvergence;\n    }\n    m_info = global_info;\n    dest.swap(tmp);\n  }\n\n  template<typename Rhs, typename DestDerived>\n  typename internal::enable_if<Rhs::ColsAtCompileTime!=1 && DestDerived::ColsAtCompileTime!=1>::type\n  _solve_with_guess_impl(const Rhs& b, MatrixBase<DestDerived> &aDest) const\n  {\n    eigen_assert(rows()==b.rows());\n\n    Index rhsCols = b.cols();\n    DestDerived& dest(aDest.derived());\n    ComputationInfo global_info = Success;\n    for(Index k=0; k<rhsCols; ++k)\n    {\n      typename DestDerived::ColXpr xk(dest,k);\n      typename Rhs::ConstColXpr bk(b,k);\n      derived()._solve_vector_with_guess_impl(bk,xk);\n\n      // The call to _solve_vector_with_guess updates m_info, so if it failed for a previous column\n      // we need to restore it to the worst value.\n      if(m_info==NumericalIssue)\n        global_info = NumericalIssue;\n      else if(m_info==NoConvergence)\n        global_info = NoConvergence;\n    }\n    m_info = global_info;\n  }\n\n  template<typename Rhs, typename DestDerived>\n  typename internal::enable_if<Rhs::ColsAtCompileTime==1 || DestDerived::ColsAtCompileTime==1>::type\n  _solve_with_guess_impl(const Rhs& b, MatrixBase<DestDerived> &dest) const\n  {\n    derived()._solve_vector_with_guess_impl(b,dest.derived());\n  }\n\n  /** \\internal default initial guess = 0 */\n  template<typename Rhs,typename Dest>\n  void _solve_impl(const Rhs& b, Dest& x) const\n  {\n    x.setZero();\n    derived()._solve_with_guess_impl(b,x);\n  }\n\nprotected:\n  void init()\n  {\n    m_isInitialized = false;\n    m_analysisIsOk = false;\n    m_factorizationIsOk = false;\n    m_maxIterations = -1;\n    m_tolerance = NumTraits<Scalar>::epsilon();\n  }\n\n  typedef internal::generic_matrix_wrapper<MatrixType> MatrixWrapper;\n  typedef typename MatrixWrapper::ActualMatrixType ActualMatrixType;\n\n  const ActualMatrixType& matrix() const\n  {\n    return m_matrixWrapper.matrix();\n  }\n\n  template<typename InputType>\n  void grab(const InputType &A)\n  {\n    m_matrixWrapper.grab(A);\n  }\n\n  MatrixWrapper m_matrixWrapper;\n  Preconditioner m_preconditioner;\n\n  Index m_maxIterations;\n  RealScalar m_tolerance;\n\n  mutable RealScalar m_error;\n  mutable Index m_iterations;\n  mutable ComputationInfo m_info;\n  mutable bool m_analysisIsOk, m_factorizationIsOk;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_ITERATIVE_SOLVER_BASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H\n#define EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal Low-level conjugate gradient algorithm for least-square problems\n  * \\param mat The matrix A\n  * \\param rhs The right hand side vector b\n  * \\param x On input and initial solution, on output the computed solution.\n  * \\param precond A preconditioner being able to efficiently solve for an\n  *                approximation of A'Ax=b (regardless of b)\n  * \\param iters On input the max number of iteration, on output the number of performed iterations.\n  * \\param tol_error On input the tolerance error, on output an estimation of the relative error.\n  */\ntemplate<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>\nEIGEN_DONT_INLINE\nvoid least_square_conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,\n                                     const Preconditioner& precond, Index& iters,\n                                     typename Dest::RealScalar& tol_error)\n{\n  using std::sqrt;\n  using std::abs;\n  typedef typename Dest::RealScalar RealScalar;\n  typedef typename Dest::Scalar Scalar;\n  typedef Matrix<Scalar,Dynamic,1> VectorType;\n  \n  RealScalar tol = tol_error;\n  Index maxIters = iters;\n  \n  Index m = mat.rows(), n = mat.cols();\n\n  VectorType residual        = rhs - mat * x;\n  VectorType normal_residual = mat.adjoint() * residual;\n\n  RealScalar rhsNorm2 = (mat.adjoint()*rhs).squaredNorm();\n  if(rhsNorm2 == 0) \n  {\n    x.setZero();\n    iters = 0;\n    tol_error = 0;\n    return;\n  }\n  RealScalar threshold = tol*tol*rhsNorm2;\n  RealScalar residualNorm2 = normal_residual.squaredNorm();\n  if (residualNorm2 < threshold)\n  {\n    iters = 0;\n    tol_error = sqrt(residualNorm2 / rhsNorm2);\n    return;\n  }\n  \n  VectorType p(n);\n  p = precond.solve(normal_residual);                         // initial search direction\n\n  VectorType z(n), tmp(m);\n  RealScalar absNew = numext::real(normal_residual.dot(p));  // the square of the absolute value of r scaled by invM\n  Index i = 0;\n  while(i < maxIters)\n  {\n    tmp.noalias() = mat * p;\n\n    Scalar alpha = absNew / tmp.squaredNorm();      // the amount we travel on dir\n    x += alpha * p;                                 // update solution\n    residual -= alpha * tmp;                        // update residual\n    normal_residual = mat.adjoint() * residual;     // update residual of the normal equation\n    \n    residualNorm2 = normal_residual.squaredNorm();\n    if(residualNorm2 < threshold)\n      break;\n    \n    z = precond.solve(normal_residual);             // approximately solve for \"A'A z = normal_residual\"\n\n    RealScalar absOld = absNew;\n    absNew = numext::real(normal_residual.dot(z));  // update the absolute value of r\n    RealScalar beta = absNew / absOld;              // calculate the Gram-Schmidt value used to create the new search direction\n    p = z + beta * p;                               // update search direction\n    i++;\n  }\n  tol_error = sqrt(residualNorm2 / rhsNorm2);\n  iters = i;\n}\n\n}\n\ntemplate< typename _MatrixType,\n          typename _Preconditioner = LeastSquareDiagonalPreconditioner<typename _MatrixType::Scalar> >\nclass LeastSquaresConjugateGradient;\n\nnamespace internal {\n\ntemplate< typename _MatrixType, typename _Preconditioner>\nstruct traits<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >\n{\n  typedef _MatrixType MatrixType;\n  typedef _Preconditioner Preconditioner;\n};\n\n}\n\n/** \\ingroup IterativeLinearSolvers_Module\n  * \\brief A conjugate gradient solver for sparse (or dense) least-square problems\n  *\n  * This class allows to solve for A x = b linear problems using an iterative conjugate gradient algorithm.\n  * The matrix A can be non symmetric and rectangular, but the matrix A' A should be positive-definite to guaranty stability.\n  * Otherwise, the SparseLU or SparseQR classes might be preferable.\n  * The matrix A and the vectors x and b can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.\n  * \\tparam _Preconditioner the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner\n  *\n  * \\implsparsesolverconcept\n  * \n  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()\n  * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations\n  * and NumTraits<Scalar>::epsilon() for the tolerance.\n  * \n  * This class can be used as the direct solver classes. Here is a typical usage example:\n    \\code\n    int m=1000000, n = 10000;\n    VectorXd x(n), b(m);\n    SparseMatrix<double> A(m,n);\n    // fill A and b\n    LeastSquaresConjugateGradient<SparseMatrix<double> > lscg;\n    lscg.compute(A);\n    x = lscg.solve(b);\n    std::cout << \"#iterations:     \" << lscg.iterations() << std::endl;\n    std::cout << \"estimated error: \" << lscg.error()      << std::endl;\n    // update b, and solve again\n    x = lscg.solve(b);\n    \\endcode\n  * \n  * By default the iterations start with x=0 as an initial guess of the solution.\n  * One can control the start using the solveWithGuess() method.\n  * \n  * \\sa class ConjugateGradient, SparseLU, SparseQR\n  */\ntemplate< typename _MatrixType, typename _Preconditioner>\nclass LeastSquaresConjugateGradient : public IterativeSolverBase<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >\n{\n  typedef IterativeSolverBase<LeastSquaresConjugateGradient> Base;\n  using Base::matrix;\n  using Base::m_error;\n  using Base::m_iterations;\n  using Base::m_info;\n  using Base::m_isInitialized;\npublic:\n  typedef _MatrixType MatrixType;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  typedef _Preconditioner Preconditioner;\n\npublic:\n\n  /** Default constructor. */\n  LeastSquaresConjugateGradient() : Base() {}\n\n  /** Initialize the solver with matrix \\a A for further \\c Ax=b solving.\n    * \n    * This constructor is a shortcut for the default constructor followed\n    * by a call to compute().\n    * \n    * \\warning this class stores a reference to the matrix A as well as some\n    * precomputed values that depend on it. Therefore, if \\a A is changed\n    * this class becomes invalid. Call compute() to update it with the new\n    * matrix A, or modify a copy of A.\n    */\n  template<typename MatrixDerived>\n  explicit LeastSquaresConjugateGradient(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}\n\n  ~LeastSquaresConjugateGradient() {}\n\n  /** \\internal */\n  template<typename Rhs,typename Dest>\n  void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const\n  {\n    m_iterations = Base::maxIterations();\n    m_error = Base::m_tolerance;\n\n    internal::least_square_conjugate_gradient(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error);\n    m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;\n  }\n\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SOLVEWITHGUESS_H\n#define EIGEN_SOLVEWITHGUESS_H\n\nnamespace Eigen {\n\ntemplate<typename Decomposition, typename RhsType, typename GuessType> class SolveWithGuess;\n\n/** \\class SolveWithGuess\n  * \\ingroup IterativeLinearSolvers_Module\n  *\n  * \\brief Pseudo expression representing a solving operation\n  *\n  * \\tparam Decomposition the type of the matrix or decomposion object\n  * \\tparam Rhstype the type of the right-hand side\n  *\n  * This class represents an expression of A.solve(B)\n  * and most of the time this is the only way it is used.\n  *\n  */\nnamespace internal {\n\n\ntemplate<typename Decomposition, typename RhsType, typename GuessType>\nstruct traits<SolveWithGuess<Decomposition, RhsType, GuessType> >\n  : traits<Solve<Decomposition,RhsType> >\n{};\n\n}\n\n\ntemplate<typename Decomposition, typename RhsType, typename GuessType>\nclass SolveWithGuess : public internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type\n{\npublic:\n  typedef typename internal::traits<SolveWithGuess>::Scalar Scalar;\n  typedef typename internal::traits<SolveWithGuess>::PlainObject PlainObject;\n  typedef typename internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type Base;\n  typedef typename internal::ref_selector<SolveWithGuess>::type Nested;\n\n  SolveWithGuess(const Decomposition &dec, const RhsType &rhs, const GuessType &guess)\n    : m_dec(dec), m_rhs(rhs), m_guess(guess)\n  {}\n\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  Index rows() const EIGEN_NOEXCEPT { return m_dec.cols(); }\n  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n  Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }\n\n  EIGEN_DEVICE_FUNC const Decomposition& dec()   const { return m_dec; }\n  EIGEN_DEVICE_FUNC const RhsType&       rhs()   const { return m_rhs; }\n  EIGEN_DEVICE_FUNC const GuessType&     guess() const { return m_guess; }\n\nprotected:\n  const Decomposition &m_dec;\n  const RhsType       &m_rhs;\n  const GuessType     &m_guess;\n\nprivate:\n  Scalar coeff(Index row, Index col) const;\n  Scalar coeff(Index i) const;\n};\n\nnamespace internal {\n\n// Evaluator of SolveWithGuess -> eval into a temporary\ntemplate<typename Decomposition, typename RhsType, typename GuessType>\nstruct evaluator<SolveWithGuess<Decomposition,RhsType, GuessType> >\n  : public evaluator<typename SolveWithGuess<Decomposition,RhsType,GuessType>::PlainObject>\n{\n  typedef SolveWithGuess<Decomposition,RhsType,GuessType> SolveType;\n  typedef typename SolveType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  evaluator(const SolveType& solve)\n    : m_result(solve.rows(), solve.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    m_result = solve.guess();\n    solve.dec()._solve_with_guess_impl(solve.rhs(), m_result);\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n// Specialization for \"dst = dec.solveWithGuess(rhs)\"\n// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere\ntemplate<typename DstXprType, typename DecType, typename RhsType, typename GuessType, typename Scalar>\nstruct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>\n{\n  typedef SolveWithGuess<DecType,RhsType,GuessType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    dst = src.guess();\n    src.dec()._solve_with_guess_impl(src.rhs(), dst/*, src.guess()*/);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SOLVEWITHGUESS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/Jacobi/Jacobi.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_JACOBI_H\n#define EIGEN_JACOBI_H\n\nnamespace Eigen {\n\n/** \\ingroup Jacobi_Module\n  * \\jacobi_module\n  * \\class JacobiRotation\n  * \\brief Rotation given by a cosine-sine pair.\n  *\n  * This class represents a Jacobi or Givens rotation.\n  * This is a 2D rotation in the plane \\c J of angle \\f$ \\theta \\f$ defined by\n  * its cosine \\c c and sine \\c s as follow:\n  * \\f$ J = \\left ( \\begin{array}{cc} c & \\overline s \\\\ -s  & \\overline c \\end{array} \\right ) \\f$\n  *\n  * You can apply the respective counter-clockwise rotation to a column vector \\c v by\n  * applying its adjoint on the left: \\f$ v = J^* v \\f$ that translates to the following Eigen code:\n  * \\code\n  * v.applyOnTheLeft(J.adjoint());\n  * \\endcode\n  *\n  * \\sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()\n  */\ntemplate<typename Scalar> class JacobiRotation\n{\n  public:\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    /** Default constructor without any initialization. */\n    EIGEN_DEVICE_FUNC\n    JacobiRotation() {}\n\n    /** Construct a planar rotation from a cosine-sine pair (\\a c, \\c s). */\n    EIGEN_DEVICE_FUNC\n    JacobiRotation(const Scalar& c, const Scalar& s) : m_c(c), m_s(s) {}\n\n    EIGEN_DEVICE_FUNC Scalar& c() { return m_c; }\n    EIGEN_DEVICE_FUNC Scalar c() const { return m_c; }\n    EIGEN_DEVICE_FUNC Scalar& s() { return m_s; }\n    EIGEN_DEVICE_FUNC Scalar s() const { return m_s; }\n\n    /** Concatenates two planar rotation */\n    EIGEN_DEVICE_FUNC\n    JacobiRotation operator*(const JacobiRotation& other)\n    {\n      using numext::conj;\n      return JacobiRotation(m_c * other.m_c - conj(m_s) * other.m_s,\n                            conj(m_c * conj(other.m_s) + conj(m_s) * conj(other.m_c)));\n    }\n\n    /** Returns the transposed transformation */\n    EIGEN_DEVICE_FUNC\n    JacobiRotation transpose() const { using numext::conj; return JacobiRotation(m_c, -conj(m_s)); }\n\n    /** Returns the adjoint transformation */\n    EIGEN_DEVICE_FUNC\n    JacobiRotation adjoint() const { using numext::conj; return JacobiRotation(conj(m_c), -m_s); }\n\n    template<typename Derived>\n    EIGEN_DEVICE_FUNC\n    bool makeJacobi(const MatrixBase<Derived>&, Index p, Index q);\n    EIGEN_DEVICE_FUNC\n    bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);\n\n    EIGEN_DEVICE_FUNC\n    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0);\n\n  protected:\n    EIGEN_DEVICE_FUNC\n    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type);\n    EIGEN_DEVICE_FUNC\n    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type);\n\n    Scalar m_c, m_s;\n};\n\n/** Makes \\c *this as a Jacobi rotation \\a J such that applying \\a J on both the right and left sides of the selfadjoint 2x2 matrix\n  * \\f$ B = \\left ( \\begin{array}{cc} x & y \\\\ \\overline y & z \\end{array} \\right )\\f$ yields a diagonal matrix \\f$ A = J^* B J \\f$\n  *\n  * \\sa MatrixBase::makeJacobi(const MatrixBase<Derived>&, Index, Index), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()\n  */\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\nbool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z)\n{\n  using std::sqrt;\n  using std::abs;\n\n  RealScalar deno = RealScalar(2)*abs(y);\n  if(deno < (std::numeric_limits<RealScalar>::min)())\n  {\n    m_c = Scalar(1);\n    m_s = Scalar(0);\n    return false;\n  }\n  else\n  {\n    RealScalar tau = (x-z)/deno;\n    RealScalar w = sqrt(numext::abs2(tau) + RealScalar(1));\n    RealScalar t;\n    if(tau>RealScalar(0))\n    {\n      t = RealScalar(1) / (tau + w);\n    }\n    else\n    {\n      t = RealScalar(1) / (tau - w);\n    }\n    RealScalar sign_t = t > RealScalar(0) ? RealScalar(1) : RealScalar(-1);\n    RealScalar n = RealScalar(1) / sqrt(numext::abs2(t)+RealScalar(1));\n    m_s = - sign_t * (numext::conj(y) / abs(y)) * abs(t) * n;\n    m_c = n;\n    return true;\n  }\n}\n\n/** Makes \\c *this as a Jacobi rotation \\c J such that applying \\a J on both the right and left sides of the 2x2 selfadjoint matrix\n  * \\f$ B = \\left ( \\begin{array}{cc} \\text{this}_{pp} & \\text{this}_{pq} \\\\ (\\text{this}_{pq})^* & \\text{this}_{qq} \\end{array} \\right )\\f$ yields\n  * a diagonal matrix \\f$ A = J^* B J \\f$\n  *\n  * Example: \\include Jacobi_makeJacobi.cpp\n  * Output: \\verbinclude Jacobi_makeJacobi.out\n  *\n  * \\sa JacobiRotation::makeJacobi(RealScalar, Scalar, RealScalar), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()\n  */\ntemplate<typename Scalar>\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\ninline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Index p, Index q)\n{\n  return makeJacobi(numext::real(m.coeff(p,p)), m.coeff(p,q), numext::real(m.coeff(q,q)));\n}\n\n/** Makes \\c *this as a Givens rotation \\c G such that applying \\f$ G^* \\f$ to the left of the vector\n  * \\f$ V = \\left ( \\begin{array}{c} p \\\\ q \\end{array} \\right )\\f$ yields:\n  * \\f$ G^* V = \\left ( \\begin{array}{c} r \\\\ 0 \\end{array} \\right )\\f$.\n  *\n  * The value of \\a r is returned if \\a r is not null (the default is null).\n  * Also note that G is built such that the cosine is always real.\n  *\n  * Example: \\include Jacobi_makeGivens.cpp\n  * Output: \\verbinclude Jacobi_makeGivens.out\n  *\n  * This function implements the continuous Givens rotation generation algorithm\n  * found in Anderson (2000), Discontinuous Plane Rotations and the Symmetric Eigenvalue Problem.\n  * LAPACK Working Note 150, University of Tennessee, UT-CS-00-454, December 4, 2000.\n  *\n  * \\sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()\n  */\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\nvoid JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r)\n{\n  makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());\n}\n\n\n// specialization for complexes\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\nvoid JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type)\n{\n  using std::sqrt;\n  using std::abs;\n  using numext::conj;\n\n  if(q==Scalar(0))\n  {\n    m_c = numext::real(p)<0 ? Scalar(-1) : Scalar(1);\n    m_s = 0;\n    if(r) *r = m_c * p;\n  }\n  else if(p==Scalar(0))\n  {\n    m_c = 0;\n    m_s = -q/abs(q);\n    if(r) *r = abs(q);\n  }\n  else\n  {\n    RealScalar p1 = numext::norm1(p);\n    RealScalar q1 = numext::norm1(q);\n    if(p1>=q1)\n    {\n      Scalar ps = p / p1;\n      RealScalar p2 = numext::abs2(ps);\n      Scalar qs = q / p1;\n      RealScalar q2 = numext::abs2(qs);\n\n      RealScalar u = sqrt(RealScalar(1) + q2/p2);\n      if(numext::real(p)<RealScalar(0))\n        u = -u;\n\n      m_c = Scalar(1)/u;\n      m_s = -qs*conj(ps)*(m_c/p2);\n      if(r) *r = p * u;\n    }\n    else\n    {\n      Scalar ps = p / q1;\n      RealScalar p2 = numext::abs2(ps);\n      Scalar qs = q / q1;\n      RealScalar q2 = numext::abs2(qs);\n\n      RealScalar u = q1 * sqrt(p2 + q2);\n      if(numext::real(p)<RealScalar(0))\n        u = -u;\n\n      p1 = abs(p);\n      ps = p/p1;\n      m_c = p1/u;\n      m_s = -conj(ps) * (q/u);\n      if(r) *r = ps * u;\n    }\n  }\n}\n\n// specialization for reals\ntemplate<typename Scalar>\nEIGEN_DEVICE_FUNC\nvoid JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type)\n{\n  using std::sqrt;\n  using std::abs;\n  if(q==Scalar(0))\n  {\n    m_c = p<Scalar(0) ? Scalar(-1) : Scalar(1);\n    m_s = Scalar(0);\n    if(r) *r = abs(p);\n  }\n  else if(p==Scalar(0))\n  {\n    m_c = Scalar(0);\n    m_s = q<Scalar(0) ? Scalar(1) : Scalar(-1);\n    if(r) *r = abs(q);\n  }\n  else if(abs(p) > abs(q))\n  {\n    Scalar t = q/p;\n    Scalar u = sqrt(Scalar(1) + numext::abs2(t));\n    if(p<Scalar(0))\n      u = -u;\n    m_c = Scalar(1)/u;\n    m_s = -t * m_c;\n    if(r) *r = p * u;\n  }\n  else\n  {\n    Scalar t = p/q;\n    Scalar u = sqrt(Scalar(1) + numext::abs2(t));\n    if(q<Scalar(0))\n      u = -u;\n    m_s = -Scalar(1)/u;\n    m_c = -t * m_s;\n    if(r) *r = q * u;\n  }\n\n}\n\n/****************************************************************************************\n*   Implementation of MatrixBase methods\n****************************************************************************************/\n\nnamespace internal {\n/** \\jacobi_module\n  * Applies the clock wise 2D rotation \\a j to the set of 2D vectors of coordinates \\a x and \\a y:\n  * \\f$ \\left ( \\begin{array}{cc} x \\\\ y \\end{array} \\right )  =  J \\left ( \\begin{array}{cc} x \\\\ y \\end{array} \\right ) \\f$\n  *\n  * \\sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()\n  */\ntemplate<typename VectorX, typename VectorY, typename OtherScalar>\nEIGEN_DEVICE_FUNC\nvoid apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j);\n}\n\n/** \\jacobi_module\n  * Applies the rotation in the plane \\a j to the rows \\a p and \\a q of \\c *this, i.e., it computes B = J * B,\n  * with \\f$ B = \\left ( \\begin{array}{cc} \\text{*this.row}(p) \\\\ \\text{*this.row}(q) \\end{array} \\right ) \\f$.\n  *\n  * \\sa class JacobiRotation, MatrixBase::applyOnTheRight(), internal::apply_rotation_in_the_plane()\n  */\ntemplate<typename Derived>\ntemplate<typename OtherScalar>\nEIGEN_DEVICE_FUNC\ninline void MatrixBase<Derived>::applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j)\n{\n  RowXpr x(this->row(p));\n  RowXpr y(this->row(q));\n  internal::apply_rotation_in_the_plane(x, y, j);\n}\n\n/** \\ingroup Jacobi_Module\n  * Applies the rotation in the plane \\a j to the columns \\a p and \\a q of \\c *this, i.e., it computes B = B * J\n  * with \\f$ B = \\left ( \\begin{array}{cc} \\text{*this.col}(p) & \\text{*this.col}(q) \\end{array} \\right ) \\f$.\n  *\n  * \\sa class JacobiRotation, MatrixBase::applyOnTheLeft(), internal::apply_rotation_in_the_plane()\n  */\ntemplate<typename Derived>\ntemplate<typename OtherScalar>\nEIGEN_DEVICE_FUNC\ninline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j)\n{\n  ColXpr x(this->col(p));\n  ColXpr y(this->col(q));\n  internal::apply_rotation_in_the_plane(x, y, j.transpose());\n}\n\nnamespace internal {\n\ntemplate<typename Scalar, typename OtherScalar,\n         int SizeAtCompileTime, int MinAlignment, bool Vectorizable>\nstruct apply_rotation_in_the_plane_selector\n{\n  static EIGEN_DEVICE_FUNC\n  inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)\n  {\n    for(Index i=0; i<size; ++i)\n    {\n      Scalar xi = *x;\n      Scalar yi = *y;\n      *x =  c * xi + numext::conj(s) * yi;\n      *y = -s * xi + numext::conj(c) * yi;\n      x += incrx;\n      y += incry;\n    }\n  }\n};\n\ntemplate<typename Scalar, typename OtherScalar,\n         int SizeAtCompileTime, int MinAlignment>\nstruct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>\n{\n  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)\n  {\n    enum {\n      PacketSize = packet_traits<Scalar>::size,\n      OtherPacketSize = packet_traits<OtherScalar>::size\n    };\n    typedef typename packet_traits<Scalar>::type Packet;\n    typedef typename packet_traits<OtherScalar>::type OtherPacket;\n\n    /*** dynamic-size vectorized paths ***/\n    if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))\n    {\n      // both vectors are sequentially stored in memory => vectorization\n      enum { Peeling = 2 };\n\n      Index alignedStart = internal::first_default_aligned(y, size);\n      Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;\n\n      const OtherPacket pc = pset1<OtherPacket>(c);\n      const OtherPacket ps = pset1<OtherPacket>(s);\n      conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;\n      conj_helper<OtherPacket,Packet,false,false> pm;\n\n      for(Index i=0; i<alignedStart; ++i)\n      {\n        Scalar xi = x[i];\n        Scalar yi = y[i];\n        x[i] =  c * xi + numext::conj(s) * yi;\n        y[i] = -s * xi + numext::conj(c) * yi;\n      }\n\n      Scalar* EIGEN_RESTRICT px = x + alignedStart;\n      Scalar* EIGEN_RESTRICT py = y + alignedStart;\n\n      if(internal::first_default_aligned(x, size)==alignedStart)\n      {\n        for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)\n        {\n          Packet xi = pload<Packet>(px);\n          Packet yi = pload<Packet>(py);\n          pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));\n          pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));\n          px += PacketSize;\n          py += PacketSize;\n        }\n      }\n      else\n      {\n        Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);\n        for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)\n        {\n          Packet xi   = ploadu<Packet>(px);\n          Packet xi1  = ploadu<Packet>(px+PacketSize);\n          Packet yi   = pload <Packet>(py);\n          Packet yi1  = pload <Packet>(py+PacketSize);\n          pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));\n          pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));\n          pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));\n          pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));\n          px += Peeling*PacketSize;\n          py += Peeling*PacketSize;\n        }\n        if(alignedEnd!=peelingEnd)\n        {\n          Packet xi = ploadu<Packet>(x+peelingEnd);\n          Packet yi = pload <Packet>(y+peelingEnd);\n          pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));\n          pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));\n        }\n      }\n\n      for(Index i=alignedEnd; i<size; ++i)\n      {\n        Scalar xi = x[i];\n        Scalar yi = y[i];\n        x[i] =  c * xi + numext::conj(s) * yi;\n        y[i] = -s * xi + numext::conj(c) * yi;\n      }\n    }\n\n    /*** fixed-size vectorized path ***/\n    else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment\n    {\n      const OtherPacket pc = pset1<OtherPacket>(c);\n      const OtherPacket ps = pset1<OtherPacket>(s);\n      conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;\n      conj_helper<OtherPacket,Packet,false,false> pm;\n      Scalar* EIGEN_RESTRICT px = x;\n      Scalar* EIGEN_RESTRICT py = y;\n      for(Index i=0; i<size; i+=PacketSize)\n      {\n        Packet xi = pload<Packet>(px);\n        Packet yi = pload<Packet>(py);\n        pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));\n        pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));\n        px += PacketSize;\n        py += PacketSize;\n      }\n    }\n\n    /*** non-vectorized path ***/\n    else\n    {\n      apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);\n    }\n  }\n};\n\ntemplate<typename VectorX, typename VectorY, typename OtherScalar>\nEIGEN_DEVICE_FUNC\nvoid /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)\n{\n  typedef typename VectorX::Scalar Scalar;\n  const bool Vectorizable =    (int(VectorX::Flags) & int(VectorY::Flags) & PacketAccessBit)\n                            && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));\n\n  eigen_assert(xpr_x.size() == xpr_y.size());\n  Index size = xpr_x.size();\n  Index incrx = xpr_x.derived().innerStride();\n  Index incry = xpr_y.derived().innerStride();\n\n  Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);\n  Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);\n\n  OtherScalar c = j.c();\n  OtherScalar s = j.s();\n  if (c==OtherScalar(1) && s==OtherScalar(0))\n    return;\n\n  apply_rotation_in_the_plane_selector<\n    Scalar,OtherScalar,\n    VectorX::SizeAtCompileTime,\n    EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),\n    Vectorizable>::run(x,incrx,y,incry,size,c,s);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_JACOBI_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/KLUSupport/KLUSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Kyle Macfarlan <kyle.macfarlan@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_KLUSUPPORT_H\n#define EIGEN_KLUSUPPORT_H\n\nnamespace Eigen {\n\n/* TODO extract L, extract U, compute det, etc... */\n\n/** \\ingroup KLUSupport_Module\n  * \\brief A sparse LU factorization and solver based on KLU\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a LU factorization\n  * using the KLU library. The sparse matrix A must be squared and full rank.\n  * The vectors or matrices X and B can be either dense or sparse.\n  *\n  * \\warning The input matrix A should be in a \\b compressed and \\b column-major form.\n  * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class UmfPackLU, class SparseLU\n  */\n\n\ninline int klu_solve(klu_symbolic *Symbolic, klu_numeric *Numeric, Index ldim, Index nrhs, double B [ ], klu_common *Common, double) {\n   return klu_solve(Symbolic, Numeric, internal::convert_index<int>(ldim), internal::convert_index<int>(nrhs), B, Common);\n}\n\ninline int klu_solve(klu_symbolic *Symbolic, klu_numeric *Numeric, Index ldim, Index nrhs, std::complex<double>B[], klu_common *Common, std::complex<double>) {\n   return klu_z_solve(Symbolic, Numeric, internal::convert_index<int>(ldim), internal::convert_index<int>(nrhs), &numext::real_ref(B[0]), Common);\n}\n\ninline int klu_tsolve(klu_symbolic *Symbolic, klu_numeric *Numeric, Index ldim, Index nrhs, double B[], klu_common *Common, double) {\n   return klu_tsolve(Symbolic, Numeric, internal::convert_index<int>(ldim), internal::convert_index<int>(nrhs), B, Common);\n}\n\ninline int klu_tsolve(klu_symbolic *Symbolic, klu_numeric *Numeric, Index ldim, Index nrhs, std::complex<double>B[], klu_common *Common, std::complex<double>) {\n   return klu_z_tsolve(Symbolic, Numeric, internal::convert_index<int>(ldim), internal::convert_index<int>(nrhs), &numext::real_ref(B[0]), 0, Common);\n}\n\ninline klu_numeric* klu_factor(int Ap [ ], int Ai [ ], double Ax [ ], klu_symbolic *Symbolic, klu_common *Common, double) {\n   return klu_factor(Ap, Ai, Ax, Symbolic, Common);\n}\n\ninline klu_numeric* klu_factor(int Ap[], int Ai[], std::complex<double> Ax[], klu_symbolic *Symbolic, klu_common *Common, std::complex<double>) {\n   return klu_z_factor(Ap, Ai, &numext::real_ref(Ax[0]), Symbolic, Common);\n}\n\n\ntemplate<typename _MatrixType>\nclass KLU : public SparseSolverBase<KLU<_MatrixType> >\n{\n  protected:\n    typedef SparseSolverBase<KLU<_MatrixType> > Base;\n    using Base::m_isInitialized;\n  public:\n    using Base::_solve_impl;\n    typedef _MatrixType MatrixType;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef Matrix<Scalar,Dynamic,1> Vector;\n    typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;\n    typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;\n    typedef SparseMatrix<Scalar> LUMatrixType;\n    typedef SparseMatrix<Scalar,ColMajor,int> KLUMatrixType;\n    typedef Ref<const KLUMatrixType, StandardCompressedFormat> KLUMatrixRef;\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n  public:\n\n    KLU()\n      : m_dummy(0,0), mp_matrix(m_dummy)\n    {\n      init();\n    }\n\n    template<typename InputMatrixType>\n    explicit KLU(const InputMatrixType& matrix)\n      : mp_matrix(matrix)\n    {\n      init();\n      compute(matrix);\n    }\n\n    ~KLU()\n    {\n      if(m_symbolic) klu_free_symbolic(&m_symbolic,&m_common);\n      if(m_numeric)  klu_free_numeric(&m_numeric,&m_common);\n    }\n\n    EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return mp_matrix.rows(); }\n    EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return mp_matrix.cols(); }\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n#if 0 // not implemented yet\n    inline const LUMatrixType& matrixL() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_l;\n    }\n\n    inline const LUMatrixType& matrixU() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_u;\n    }\n\n    inline const IntColVectorType& permutationP() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_p;\n    }\n\n    inline const IntRowVectorType& permutationQ() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_q;\n    }\n#endif\n    /** Computes the sparse Cholesky decomposition of \\a matrix\n     *  Note that the matrix should be column-major, and in compressed format for best performance.\n     *  \\sa SparseMatrix::makeCompressed().\n     */\n    template<typename InputMatrixType>\n    void compute(const InputMatrixType& matrix)\n    {\n      if(m_symbolic) klu_free_symbolic(&m_symbolic, &m_common);\n      if(m_numeric)  klu_free_numeric(&m_numeric, &m_common);\n      grab(matrix.derived());\n      analyzePattern_impl();\n      factorize_impl();\n    }\n\n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      *\n      * \\sa factorize(), compute()\n      */\n    template<typename InputMatrixType>\n    void analyzePattern(const InputMatrixType& matrix)\n    {\n      if(m_symbolic) klu_free_symbolic(&m_symbolic, &m_common);\n      if(m_numeric)  klu_free_numeric(&m_numeric, &m_common);\n\n      grab(matrix.derived());\n\n      analyzePattern_impl();\n    }\n\n\n    /** Provides access to the control settings array used by KLU.\n      *\n      * See KLU documentation for details.\n      */\n    inline const klu_common& kluCommon() const\n    {\n      return m_common;\n    }\n\n    /** Provides access to the control settings array used by UmfPack.\n      *\n      * If this array contains NaN's, the default values are used.\n      *\n      * See KLU documentation for details.\n      */\n    inline klu_common& kluCommon()\n    {\n      return m_common;\n    }\n\n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed.\n      *\n      * \\sa analyzePattern(), compute()\n      */\n    template<typename InputMatrixType>\n    void factorize(const InputMatrixType& matrix)\n    {\n      eigen_assert(m_analysisIsOk && \"KLU: you must first call analyzePattern()\");\n      if(m_numeric)\n        klu_free_numeric(&m_numeric,&m_common);\n\n      grab(matrix.derived());\n\n      factorize_impl();\n    }\n\n    /** \\internal */\n    template<typename BDerived,typename XDerived>\n    bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;\n\n#if 0 // not implemented yet\n    Scalar determinant() const;\n\n    void extractData() const;\n#endif\n\n  protected:\n\n    void init()\n    {\n      m_info                  = InvalidInput;\n      m_isInitialized         = false;\n      m_numeric               = 0;\n      m_symbolic              = 0;\n      m_extractedDataAreDirty = true;\n\n      klu_defaults(&m_common);\n    }\n\n    void analyzePattern_impl()\n    {\n      m_info = InvalidInput;\n      m_analysisIsOk = false;\n      m_factorizationIsOk = false;\n      m_symbolic = klu_analyze(internal::convert_index<int>(mp_matrix.rows()),\n                                     const_cast<StorageIndex*>(mp_matrix.outerIndexPtr()), const_cast<StorageIndex*>(mp_matrix.innerIndexPtr()),\n                                     &m_common);\n      if (m_symbolic) {\n         m_isInitialized = true;\n         m_info = Success;\n         m_analysisIsOk = true;\n         m_extractedDataAreDirty = true;\n      }\n    }\n\n    void factorize_impl()\n    {\n\n      m_numeric = klu_factor(const_cast<StorageIndex*>(mp_matrix.outerIndexPtr()), const_cast<StorageIndex*>(mp_matrix.innerIndexPtr()), const_cast<Scalar*>(mp_matrix.valuePtr()),\n                                    m_symbolic, &m_common, Scalar());\n\n\n      m_info = m_numeric ? Success : NumericalIssue;\n      m_factorizationIsOk = m_numeric ? 1 : 0;\n      m_extractedDataAreDirty = true;\n    }\n\n    template<typename MatrixDerived>\n    void grab(const EigenBase<MatrixDerived> &A)\n    {\n      mp_matrix.~KLUMatrixRef();\n      ::new (&mp_matrix) KLUMatrixRef(A.derived());\n    }\n\n    void grab(const KLUMatrixRef &A)\n    {\n      if(&(A.derived()) != &mp_matrix)\n      {\n        mp_matrix.~KLUMatrixRef();\n        ::new (&mp_matrix) KLUMatrixRef(A);\n      }\n    }\n\n    // cached data to reduce reallocation, etc.\n#if 0 // not implemented yet\n    mutable LUMatrixType m_l;\n    mutable LUMatrixType m_u;\n    mutable IntColVectorType m_p;\n    mutable IntRowVectorType m_q;\n#endif\n\n    KLUMatrixType m_dummy;\n    KLUMatrixRef mp_matrix;\n\n    klu_numeric* m_numeric;\n    klu_symbolic* m_symbolic;\n    klu_common m_common;\n    mutable ComputationInfo m_info;\n    int m_factorizationIsOk;\n    int m_analysisIsOk;\n    mutable bool m_extractedDataAreDirty;\n\n  private:\n    KLU(const KLU& ) { }\n};\n\n#if 0 // not implemented yet\ntemplate<typename MatrixType>\nvoid KLU<MatrixType>::extractData() const\n{\n  if (m_extractedDataAreDirty)\n  {\n     eigen_assert(false && \"KLU: extractData Not Yet Implemented\");\n\n    // get size of the data\n    int lnz, unz, rows, cols, nz_udiag;\n    umfpack_get_lunz(&lnz, &unz, &rows, &cols, &nz_udiag, m_numeric, Scalar());\n\n    // allocate data\n    m_l.resize(rows,(std::min)(rows,cols));\n    m_l.resizeNonZeros(lnz);\n\n    m_u.resize((std::min)(rows,cols),cols);\n    m_u.resizeNonZeros(unz);\n\n    m_p.resize(rows);\n    m_q.resize(cols);\n\n    // extract\n    umfpack_get_numeric(m_l.outerIndexPtr(), m_l.innerIndexPtr(), m_l.valuePtr(),\n                        m_u.outerIndexPtr(), m_u.innerIndexPtr(), m_u.valuePtr(),\n                        m_p.data(), m_q.data(), 0, 0, 0, m_numeric);\n\n    m_extractedDataAreDirty = false;\n  }\n}\n\ntemplate<typename MatrixType>\ntypename KLU<MatrixType>::Scalar KLU<MatrixType>::determinant() const\n{\n  eigen_assert(false && \"KLU: extractData Not Yet Implemented\");\n  return Scalar();\n}\n#endif\n\ntemplate<typename MatrixType>\ntemplate<typename BDerived,typename XDerived>\nbool KLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const\n{\n  Index rhsCols = b.cols();\n  EIGEN_STATIC_ASSERT((XDerived::Flags&RowMajorBit)==0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n  eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()\");\n\n  x = b;\n  int info = klu_solve(m_symbolic, m_numeric, b.rows(), rhsCols, x.const_cast_derived().data(), const_cast<klu_common*>(&m_common), Scalar());\n\n  m_info = info!=0 ? Success : NumericalIssue;\n  return true;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_KLUSUPPORT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/LU/Determinant.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_DETERMINANT_H\n#define EIGEN_DETERMINANT_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\ninline const typename Derived::Scalar bruteforce_det3_helper\n(const MatrixBase<Derived>& matrix, int a, int b, int c)\n{\n  return matrix.coeff(0,a)\n         * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b));\n}\n\ntemplate<typename Derived,\n         int DeterminantType = Derived::RowsAtCompileTime\n> struct determinant_impl\n{\n  static inline typename traits<Derived>::Scalar run(const Derived& m)\n  {\n    if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0)\n      return typename traits<Derived>::Scalar(1);\n    return m.partialPivLu().determinant();\n  }\n};\n\ntemplate<typename Derived> struct determinant_impl<Derived, 1>\n{\n  static inline EIGEN_DEVICE_FUNC\n  typename traits<Derived>::Scalar run(const Derived& m)\n  {\n    return m.coeff(0,0);\n  }\n};\n\ntemplate<typename Derived> struct determinant_impl<Derived, 2>\n{\n  static inline EIGEN_DEVICE_FUNC\n  typename traits<Derived>::Scalar run(const Derived& m)\n  {\n    return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1);\n  }\n};\n\ntemplate<typename Derived> struct determinant_impl<Derived, 3>\n{\n  static inline EIGEN_DEVICE_FUNC\n  typename traits<Derived>::Scalar run(const Derived& m)\n  {\n    return bruteforce_det3_helper(m,0,1,2)\n          - bruteforce_det3_helper(m,1,0,2)\n          + bruteforce_det3_helper(m,2,0,1);\n  }\n};\n\ntemplate<typename Derived> struct determinant_impl<Derived, 4>\n{\n  typedef typename traits<Derived>::Scalar Scalar;\n  static EIGEN_DEVICE_FUNC\n  Scalar run(const Derived& m)\n  {\n    Scalar d2_01 = det2(m, 0, 1);\n    Scalar d2_02 = det2(m, 0, 2);\n    Scalar d2_03 = det2(m, 0, 3);\n    Scalar d2_12 = det2(m, 1, 2);\n    Scalar d2_13 = det2(m, 1, 3);\n    Scalar d2_23 = det2(m, 2, 3);\n    Scalar d3_0 = det3(m, 1,d2_23, 2,d2_13, 3,d2_12);\n    Scalar d3_1 = det3(m, 0,d2_23, 2,d2_03, 3,d2_02);\n    Scalar d3_2 = det3(m, 0,d2_13, 1,d2_03, 3,d2_01);\n    Scalar d3_3 = det3(m, 0,d2_12, 1,d2_02, 2,d2_01);\n    return internal::pmadd(-m(0,3),d3_0, m(1,3)*d3_1) +\n           internal::pmadd(-m(2,3),d3_2, m(3,3)*d3_3);\n  }\nprotected:\n  static EIGEN_DEVICE_FUNC\n  Scalar det2(const Derived& m, Index i0, Index i1)\n  {\n    return m(i0,0) * m(i1,1) - m(i1,0) * m(i0,1);\n  }\n\n  static EIGEN_DEVICE_FUNC\n  Scalar det3(const Derived& m, Index i0, const Scalar& d0, Index i1, const Scalar& d1, Index i2, const Scalar& d2)\n  {\n    return internal::pmadd(m(i0,2), d0, internal::pmadd(-m(i1,2), d1, m(i2,2)*d2));\n  }\n};\n\n} // end namespace internal\n\n/** \\lu_module\n  *\n  * \\returns the determinant of this matrix\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\ninline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const\n{\n  eigen_assert(rows() == cols());\n  typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;\n  return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_DETERMINANT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/LU/FullPivLU.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_LU_H\n#define EIGEN_LU_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >\n : traits<_MatrixType>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n\n} // end namespace internal\n\n/** \\ingroup LU_Module\n  *\n  * \\class FullPivLU\n  *\n  * \\brief LU decomposition of a matrix with complete pivoting, and related features\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the LU decomposition\n  *\n  * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is\n  * decomposed as \\f$ A = P^{-1} L U Q^{-1} \\f$ where L is unit-lower-triangular, U is\n  * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU\n  * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any\n  * zeros are at the end.\n  *\n  * This decomposition provides the generic approach to solving systems of linear equations, computing\n  * the rank, invertibility, inverse, kernel, and determinant.\n  *\n  * This LU decomposition is very stable and well tested with large matrices. However there are use cases where the SVD\n  * decomposition is inherently more stable and/or flexible. For example, when computing the kernel of a matrix,\n  * working with the SVD allows to select the smallest singular values of the matrix, something that\n  * the LU decomposition doesn't see.\n  *\n  * The data of the LU decomposition can be directly accessed through the methods matrixLU(),\n  * permutationP(), permutationQ().\n  *\n  * As an example, here is how the original matrix can be retrieved:\n  * \\include class_FullPivLU.cpp\n  * Output: \\verbinclude class_FullPivLU.out\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  *\n  * \\sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()\n  */\ntemplate<typename _MatrixType> class FullPivLU\n  : public SolverBase<FullPivLU<_MatrixType> >\n{\n  public:\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<FullPivLU> Base;\n    friend class SolverBase<FullPivLU>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)\n    enum {\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType;\n    typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType;\n    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;\n    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;\n    typedef typename MatrixType::PlainObject PlainObject;\n\n    /**\n      * \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via LU::compute(const MatrixType&).\n      */\n    FullPivLU();\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa FullPivLU()\n      */\n    FullPivLU(Index rows, Index cols);\n\n    /** Constructor.\n      *\n      * \\param matrix the matrix of which to compute the LU decomposition.\n      *               It is required to be nonzero.\n      */\n    template<typename InputType>\n    explicit FullPivLU(const EigenBase<InputType>& matrix);\n\n    /** \\brief Constructs a LU factorization from a given matrix\n      *\n      * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when \\c MatrixType is a Eigen::Ref.\n      *\n      * \\sa FullPivLU(const EigenBase&)\n      */\n    template<typename InputType>\n    explicit FullPivLU(EigenBase<InputType>& matrix);\n\n    /** Computes the LU decomposition of the given matrix.\n      *\n      * \\param matrix the matrix of which to compute the LU decomposition.\n      *               It is required to be nonzero.\n      *\n      * \\returns a reference to *this\n      */\n    template<typename InputType>\n    FullPivLU& compute(const EigenBase<InputType>& matrix) {\n      m_lu = matrix.derived();\n      computeInPlace();\n      return *this;\n    }\n\n    /** \\returns the LU decomposition matrix: the upper-triangular part is U, the\n      * unit-lower-triangular part is L (at least for square matrices; in the non-square\n      * case, special care is needed, see the documentation of class FullPivLU).\n      *\n      * \\sa matrixL(), matrixU()\n      */\n    inline const MatrixType& matrixLU() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return m_lu;\n    }\n\n    /** \\returns the number of nonzero pivots in the LU decomposition.\n      * Here nonzero is meant in the exact sense, not in a fuzzy sense.\n      * So that notion isn't really intrinsically interesting, but it is\n      * still useful when implementing algorithms.\n      *\n      * \\sa rank()\n      */\n    inline Index nonzeroPivots() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return m_nonzero_pivots;\n    }\n\n    /** \\returns the absolute value of the biggest pivot, i.e. the biggest\n      *          diagonal coefficient of U.\n      */\n    RealScalar maxPivot() const { return m_maxpivot; }\n\n    /** \\returns the permutation matrix P\n      *\n      * \\sa permutationQ()\n      */\n    EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return m_p;\n    }\n\n    /** \\returns the permutation matrix Q\n      *\n      * \\sa permutationP()\n      */\n    inline const PermutationQType& permutationQ() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return m_q;\n    }\n\n    /** \\returns the kernel of the matrix, also called its null-space. The columns of the returned matrix\n      * will form a basis of the kernel.\n      *\n      * \\note If the kernel has dimension zero, then the returned matrix is a column-vector filled with zeros.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      *\n      * Example: \\include FullPivLU_kernel.cpp\n      * Output: \\verbinclude FullPivLU_kernel.out\n      *\n      * \\sa image()\n      */\n    inline const internal::kernel_retval<FullPivLU> kernel() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return internal::kernel_retval<FullPivLU>(*this);\n    }\n\n    /** \\returns the image of the matrix, also called its column-space. The columns of the returned matrix\n      * will form a basis of the image (column-space).\n      *\n      * \\param originalMatrix the original matrix, of which *this is the LU decomposition.\n      *                       The reason why it is needed to pass it here, is that this allows\n      *                       a large optimization, as otherwise this method would need to reconstruct it\n      *                       from the LU decomposition.\n      *\n      * \\note If the image has dimension zero, then the returned matrix is a column-vector filled with zeros.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      *\n      * Example: \\include FullPivLU_image.cpp\n      * Output: \\verbinclude FullPivLU_image.out\n      *\n      * \\sa kernel()\n      */\n    inline const internal::image_retval<FullPivLU>\n      image(const MatrixType& originalMatrix) const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return internal::image_retval<FullPivLU>(*this, originalMatrix);\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** \\return a solution x to the equation Ax=b, where A is the matrix of which\n      * *this is the LU decomposition.\n      *\n      * \\param b the right-hand-side of the equation to solve. Can be a vector or a matrix,\n      *          the only requirement in order for the equation to make sense is that\n      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.\n      *\n      * \\returns a solution.\n      *\n      * \\note_about_checking_solutions\n      *\n      * \\note_about_arbitrary_choice_of_solution\n      * \\note_about_using_kernel_to_study_multiple_solutions\n      *\n      * Example: \\include FullPivLU_solve.cpp\n      * Output: \\verbinclude FullPivLU_solve.out\n      *\n      * \\sa TriangularView::solve(), kernel(), inverse()\n      */\n    template<typename Rhs>\n    inline const Solve<FullPivLU, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    /** \\returns an estimate of the reciprocal condition number of the matrix of which \\c *this is\n        the LU decomposition.\n      */\n    inline RealScalar rcond() const\n    {\n      eigen_assert(m_isInitialized && \"PartialPivLU is not initialized.\");\n      return internal::rcond_estimate_helper(m_l1_norm, *this);\n    }\n\n    /** \\returns the determinant of the matrix of which\n      * *this is the LU decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the LU decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers\n      *       optimized paths.\n      *\n      * \\warning a determinant can be very big or small, so for matrices\n      * of large enough dimension, there is a risk of overflow/underflow.\n      *\n      * \\sa MatrixBase::determinant()\n      */\n    typename internal::traits<MatrixType>::Scalar determinant() const;\n\n    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),\n      * who need to determine when pivots are to be considered nonzero. This is not used for the\n      * LU decomposition itself.\n      *\n      * When it needs to get the threshold value, Eigen calls threshold(). By default, this\n      * uses a formula to automatically determine a reasonable threshold.\n      * Once you have called the present method setThreshold(const RealScalar&),\n      * your value is used instead.\n      *\n      * \\param threshold The new value to use as the threshold.\n      *\n      * A pivot will be considered nonzero if its absolute value is strictly greater than\n      *  \\f$ \\vert pivot \\vert \\leqslant threshold \\times \\vert maxpivot \\vert \\f$\n      * where maxpivot is the biggest pivot.\n      *\n      * If you want to come back to the default behavior, call setThreshold(Default_t)\n      */\n    FullPivLU& setThreshold(const RealScalar& threshold)\n    {\n      m_usePrescribedThreshold = true;\n      m_prescribedThreshold = threshold;\n      return *this;\n    }\n\n    /** Allows to come back to the default behavior, letting Eigen use its default formula for\n      * determining the threshold.\n      *\n      * You should pass the special object Eigen::Default as parameter here.\n      * \\code lu.setThreshold(Eigen::Default); \\endcode\n      *\n      * See the documentation of setThreshold(const RealScalar&).\n      */\n    FullPivLU& setThreshold(Default_t)\n    {\n      m_usePrescribedThreshold = false;\n      return *this;\n    }\n\n    /** Returns the threshold that will be used by certain methods such as rank().\n      *\n      * See the documentation of setThreshold(const RealScalar&).\n      */\n    RealScalar threshold() const\n    {\n      eigen_assert(m_isInitialized || m_usePrescribedThreshold);\n      return m_usePrescribedThreshold ? m_prescribedThreshold\n      // this formula comes from experimenting (see \"LU precision tuning\" thread on the list)\n      // and turns out to be identical to Higham's formula used already in LDLt.\n          : NumTraits<Scalar>::epsilon() * RealScalar(m_lu.diagonalSize());\n    }\n\n    /** \\returns the rank of the matrix of which *this is the LU decomposition.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline Index rank() const\n    {\n      using std::abs;\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();\n      Index result = 0;\n      for(Index i = 0; i < m_nonzero_pivots; ++i)\n        result += (abs(m_lu.coeff(i,i)) > premultiplied_threshold);\n      return result;\n    }\n\n    /** \\returns the dimension of the kernel of the matrix of which *this is the LU decomposition.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline Index dimensionOfKernel() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return cols() - rank();\n    }\n\n    /** \\returns true if the matrix of which *this is the LU decomposition represents an injective\n      *          linear map, i.e. has trivial kernel; false otherwise.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isInjective() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return rank() == cols();\n    }\n\n    /** \\returns true if the matrix of which *this is the LU decomposition represents a surjective\n      *          linear map; false otherwise.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isSurjective() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return rank() == rows();\n    }\n\n    /** \\returns true if the matrix of which *this is the LU decomposition is invertible.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isInvertible() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return isInjective() && (m_lu.rows() == m_lu.cols());\n    }\n\n    /** \\returns the inverse of the matrix of which *this is the LU decomposition.\n      *\n      * \\note If this matrix is not invertible, the returned matrix has undefined coefficients.\n      *       Use isInvertible() to first determine whether this matrix is invertible.\n      *\n      * \\sa MatrixBase::inverse()\n      */\n    inline const Inverse<FullPivLU> inverse() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      eigen_assert(m_lu.rows() == m_lu.cols() && \"You can't take the inverse of a non-square matrix!\");\n      return Inverse<FullPivLU>(*this);\n    }\n\n    MatrixType reconstructedMatrix() const;\n\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); }\n    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR\n    inline Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n    #endif\n\n  protected:\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    void computeInPlace();\n\n    MatrixType m_lu;\n    PermutationPType m_p;\n    PermutationQType m_q;\n    IntColVectorType m_rowsTranspositions;\n    IntRowVectorType m_colsTranspositions;\n    Index m_nonzero_pivots;\n    RealScalar m_l1_norm;\n    RealScalar m_maxpivot, m_prescribedThreshold;\n    signed char m_det_pq;\n    bool m_isInitialized, m_usePrescribedThreshold;\n};\n\ntemplate<typename MatrixType>\nFullPivLU<MatrixType>::FullPivLU()\n  : m_isInitialized(false), m_usePrescribedThreshold(false)\n{\n}\n\ntemplate<typename MatrixType>\nFullPivLU<MatrixType>::FullPivLU(Index rows, Index cols)\n  : m_lu(rows, cols),\n    m_p(rows),\n    m_q(cols),\n    m_rowsTranspositions(rows),\n    m_colsTranspositions(cols),\n    m_isInitialized(false),\n    m_usePrescribedThreshold(false)\n{\n}\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nFullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix)\n  : m_lu(matrix.rows(), matrix.cols()),\n    m_p(matrix.rows()),\n    m_q(matrix.cols()),\n    m_rowsTranspositions(matrix.rows()),\n    m_colsTranspositions(matrix.cols()),\n    m_isInitialized(false),\n    m_usePrescribedThreshold(false)\n{\n  compute(matrix.derived());\n}\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nFullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix)\n  : m_lu(matrix.derived()),\n    m_p(matrix.rows()),\n    m_q(matrix.cols()),\n    m_rowsTranspositions(matrix.rows()),\n    m_colsTranspositions(matrix.cols()),\n    m_isInitialized(false),\n    m_usePrescribedThreshold(false)\n{\n  computeInPlace();\n}\n\ntemplate<typename MatrixType>\nvoid FullPivLU<MatrixType>::computeInPlace()\n{\n  check_template_parameters();\n\n  // the permutations are stored as int indices, so just to be sure:\n  eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest());\n\n  m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();\n\n  const Index size = m_lu.diagonalSize();\n  const Index rows = m_lu.rows();\n  const Index cols = m_lu.cols();\n\n  // will store the transpositions, before we accumulate them at the end.\n  // can't accumulate on-the-fly because that will be done in reverse order for the rows.\n  m_rowsTranspositions.resize(m_lu.rows());\n  m_colsTranspositions.resize(m_lu.cols());\n  Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i\n\n  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)\n  m_maxpivot = RealScalar(0);\n\n  for(Index k = 0; k < size; ++k)\n  {\n    // First, we need to find the pivot.\n\n    // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)\n    Index row_of_biggest_in_corner, col_of_biggest_in_corner;\n    typedef internal::scalar_score_coeff_op<Scalar> Scoring;\n    typedef typename Scoring::result_type Score;\n    Score biggest_in_corner;\n    biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k)\n                        .unaryExpr(Scoring())\n                        .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);\n    row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,\n    col_of_biggest_in_corner += k; // need to add k to them.\n\n    if(biggest_in_corner==Score(0))\n    {\n      // before exiting, make sure to initialize the still uninitialized transpositions\n      // in a sane state without destroying what we already have.\n      m_nonzero_pivots = k;\n      for(Index i = k; i < size; ++i)\n      {\n        m_rowsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i);\n        m_colsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i);\n      }\n      break;\n    }\n\n    RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner);\n    if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot;\n\n    // Now that we've found the pivot, we need to apply the row/col swaps to\n    // bring it to the location (k,k).\n\n    m_rowsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(row_of_biggest_in_corner);\n    m_colsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(col_of_biggest_in_corner);\n    if(k != row_of_biggest_in_corner) {\n      m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner));\n      ++number_of_transpositions;\n    }\n    if(k != col_of_biggest_in_corner) {\n      m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner));\n      ++number_of_transpositions;\n    }\n\n    // Now that the pivot is at the right location, we update the remaining\n    // bottom-right corner by Gaussian elimination.\n\n    if(k<rows-1)\n      m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k);\n    if(k<size-1)\n      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1);\n  }\n\n  // the main loop is over, we still have to accumulate the transpositions to find the\n  // permutations P and Q\n\n  m_p.setIdentity(rows);\n  for(Index k = size-1; k >= 0; --k)\n    m_p.applyTranspositionOnTheRight(k, m_rowsTranspositions.coeff(k));\n\n  m_q.setIdentity(cols);\n  for(Index k = 0; k < size; ++k)\n    m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));\n\n  m_det_pq = (number_of_transpositions%2) ? -1 : 1;\n\n  m_isInitialized = true;\n}\n\ntemplate<typename MatrixType>\ntypename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() const\n{\n  eigen_assert(m_isInitialized && \"LU is not initialized.\");\n  eigen_assert(m_lu.rows() == m_lu.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());\n}\n\n/** \\returns the matrix represented by the decomposition,\n * i.e., it returns the product: \\f$ P^{-1} L U Q^{-1} \\f$.\n * This function is provided for debug purposes. */\ntemplate<typename MatrixType>\nMatrixType FullPivLU<MatrixType>::reconstructedMatrix() const\n{\n  eigen_assert(m_isInitialized && \"LU is not initialized.\");\n  const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols());\n  // LU\n  MatrixType res(m_lu.rows(),m_lu.cols());\n  // FIXME the .toDenseMatrix() should not be needed...\n  res = m_lu.leftCols(smalldim)\n            .template triangularView<UnitLower>().toDenseMatrix()\n      * m_lu.topRows(smalldim)\n            .template triangularView<Upper>().toDenseMatrix();\n\n  // P^{-1}(LU)\n  res = m_p.inverse() * res;\n\n  // (P^{-1}LU)Q^{-1}\n  res = res * m_q.inverse();\n\n  return res;\n}\n\n/********* Implementation of kernel() **************************************************/\n\nnamespace internal {\ntemplate<typename _MatrixType>\nstruct kernel_retval<FullPivLU<_MatrixType> >\n  : kernel_retval_base<FullPivLU<_MatrixType> >\n{\n  EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>)\n\n  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(\n            MatrixType::MaxColsAtCompileTime,\n            MatrixType::MaxRowsAtCompileTime)\n  };\n\n  template<typename Dest> void evalTo(Dest& dst) const\n  {\n    using std::abs;\n    const Index cols = dec().matrixLU().cols(), dimker = cols - rank();\n    if(dimker == 0)\n    {\n      // The Kernel is just {0}, so it doesn't have a basis properly speaking, but let's\n      // avoid crashing/asserting as that depends on floating point calculations. Let's\n      // just return a single column vector filled with zeros.\n      dst.setZero();\n      return;\n    }\n\n    /* Let us use the following lemma:\n      *\n      * Lemma: If the matrix A has the LU decomposition PAQ = LU,\n      * then Ker A = Q(Ker U).\n      *\n      * Proof: trivial: just keep in mind that P, Q, L are invertible.\n      */\n\n    /* Thus, all we need to do is to compute Ker U, and then apply Q.\n      *\n      * U is upper triangular, with eigenvalues sorted so that any zeros appear at the end.\n      * Thus, the diagonal of U ends with exactly\n      * dimKer zero's. Let us use that to construct dimKer linearly\n      * independent vectors in Ker U.\n      */\n\n    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());\n    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();\n    Index p = 0;\n    for(Index i = 0; i < dec().nonzeroPivots(); ++i)\n      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)\n        pivots.coeffRef(p++) = i;\n    eigen_internal_assert(p == rank());\n\n    // we construct a temporaty trapezoid matrix m, by taking the U matrix and\n    // permuting the rows and cols to bring the nonnegligible pivots to the top of\n    // the main diagonal. We need that to be able to apply our triangular solvers.\n    // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified\n    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,\n           MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>\n      m(dec().matrixLU().block(0, 0, rank(), cols));\n    for(Index i = 0; i < rank(); ++i)\n    {\n      if(i) m.row(i).head(i).setZero();\n      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);\n    }\n    m.block(0, 0, rank(), rank());\n    m.block(0, 0, rank(), rank()).template triangularView<StrictlyLower>().setZero();\n    for(Index i = 0; i < rank(); ++i)\n      m.col(i).swap(m.col(pivots.coeff(i)));\n\n    // ok, we have our trapezoid matrix, we can apply the triangular solver.\n    // notice that the math behind this suggests that we should apply this to the\n    // negative of the RHS, but for performance we just put the negative sign elsewhere, see below.\n    m.topLeftCorner(rank(), rank())\n     .template triangularView<Upper>().solveInPlace(\n        m.topRightCorner(rank(), dimker)\n      );\n\n    // now we must undo the column permutation that we had applied!\n    for(Index i = rank()-1; i >= 0; --i)\n      m.col(i).swap(m.col(pivots.coeff(i)));\n\n    // see the negative sign in the next line, that's what we were talking about above.\n    for(Index i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker);\n    for(Index i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero();\n    for(Index k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1);\n  }\n};\n\n/***** Implementation of image() *****************************************************/\n\ntemplate<typename _MatrixType>\nstruct image_retval<FullPivLU<_MatrixType> >\n  : image_retval_base<FullPivLU<_MatrixType> >\n{\n  EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>)\n\n  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(\n            MatrixType::MaxColsAtCompileTime,\n            MatrixType::MaxRowsAtCompileTime)\n  };\n\n  template<typename Dest> void evalTo(Dest& dst) const\n  {\n    using std::abs;\n    if(rank() == 0)\n    {\n      // The Image is just {0}, so it doesn't have a basis properly speaking, but let's\n      // avoid crashing/asserting as that depends on floating point calculations. Let's\n      // just return a single column vector filled with zeros.\n      dst.setZero();\n      return;\n    }\n\n    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());\n    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();\n    Index p = 0;\n    for(Index i = 0; i < dec().nonzeroPivots(); ++i)\n      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)\n        pivots.coeffRef(p++) = i;\n    eigen_internal_assert(p == rank());\n\n    for(Index i = 0; i < rank(); ++i)\n      dst.col(i) = originalMatrix().col(dec().permutationQ().indices().coeff(pivots.coeff(i)));\n  }\n};\n\n/***** Implementation of solve() *****************************************************/\n\n} // end namespace internal\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename _MatrixType>\ntemplate<typename RhsType, typename DstType>\nvoid FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.\n  * So we proceed as follows:\n  * Step 1: compute c = P * rhs.\n  * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.\n  * Step 3: replace c by the solution x to Ux = c. May or may not exist.\n  * Step 4: result = Q * c;\n  */\n\n  const Index rows = this->rows(),\n              cols = this->cols(),\n              nonzero_pivots = this->rank();\n  const Index smalldim = (std::min)(rows, cols);\n\n  if(nonzero_pivots == 0)\n  {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());\n\n  // Step 1\n  c = permutationP() * rhs;\n\n  // Step 2\n  m_lu.topLeftCorner(smalldim,smalldim)\n      .template triangularView<UnitLower>()\n      .solveInPlace(c.topRows(smalldim));\n  if(rows>cols)\n    c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols);\n\n  // Step 3\n  m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)\n      .template triangularView<Upper>()\n      .solveInPlace(c.topRows(nonzero_pivots));\n\n  // Step 4\n  for(Index i = 0; i < nonzero_pivots; ++i)\n    dst.row(permutationQ().indices().coeff(i)) = c.row(i);\n  for(Index i = nonzero_pivots; i < m_lu.cols(); ++i)\n    dst.row(permutationQ().indices().coeff(i)).setZero();\n}\n\ntemplate<typename _MatrixType>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1},\n   * and since permutations are real and unitary, we can write this\n   * as   A^T = Q U^T L^T P,\n   * So we proceed as follows:\n   * Step 1: compute c = Q^T rhs.\n   * Step 2: replace c by the solution x to U^T x = c. May or may not exist.\n   * Step 3: replace c by the solution x to L^T x = c.\n   * Step 4: result = P^T c.\n   * If Conjugate is true, replace \"^T\" by \"^*\" above.\n   */\n\n  const Index rows = this->rows(), cols = this->cols(),\n    nonzero_pivots = this->rank();\n  const Index smalldim = (std::min)(rows, cols);\n\n  if(nonzero_pivots == 0)\n  {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());\n\n  // Step 1\n  c = permutationQ().inverse() * rhs;\n\n  // Step 2\n  m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)\n      .template triangularView<Upper>()\n      .transpose()\n      .template conjugateIf<Conjugate>()\n      .solveInPlace(c.topRows(nonzero_pivots));\n\n  // Step 3\n  m_lu.topLeftCorner(smalldim, smalldim)\n      .template triangularView<UnitLower>()\n      .transpose()\n      .template conjugateIf<Conjugate>()\n      .solveInPlace(c.topRows(smalldim));\n\n  // Step 4\n  PermutationPType invp = permutationP().inverse().eval();\n  for(Index i = 0; i < smalldim; ++i)\n    dst.row(invp.indices().coeff(i)) = c.row(i);\n  for(Index i = smalldim; i < rows; ++i)\n    dst.row(invp.indices().coeff(i)).setZero();\n}\n\n#endif\n\nnamespace internal {\n\n\n/***** Implementation of inverse() *****************************************************/\ntemplate<typename DstXprType, typename MatrixType>\nstruct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense>\n{\n  typedef FullPivLU<MatrixType> LuType;\n  typedef Inverse<LuType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)\n  {\n    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));\n  }\n};\n} // end namespace internal\n\n/******* MatrixBase methods *****************************************************************/\n\n/** \\lu_module\n  *\n  * \\return the full-pivoting LU decomposition of \\c *this.\n  *\n  * \\sa class FullPivLU\n  */\ntemplate<typename Derived>\ninline const FullPivLU<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::fullPivLu() const\n{\n  return FullPivLU<PlainObject>(eval());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_LU_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/LU/InverseImpl.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_INVERSE_IMPL_H\n#define EIGEN_INVERSE_IMPL_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/**********************************\n*** General case implementation ***\n**********************************/\n\ntemplate<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>\nstruct compute_inverse\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(const MatrixType& matrix, ResultType& result)\n  {\n    result = matrix.partialPivLu().inverse();\n  }\n};\n\ntemplate<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>\nstruct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ };\n\n/****************************\n*** Size 1 implementation ***\n****************************/\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse<MatrixType, ResultType, 1>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(const MatrixType& matrix, ResultType& result)\n  {\n    typedef typename MatrixType::Scalar Scalar;\n    internal::evaluator<MatrixType> matrixEval(matrix);\n    result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0);\n  }\n};\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(\n    const MatrixType& matrix,\n    const typename MatrixType::RealScalar& absDeterminantThreshold,\n    ResultType& result,\n    typename ResultType::Scalar& determinant,\n    bool& invertible\n  )\n  {\n    using std::abs;\n    determinant = matrix.coeff(0,0);\n    invertible = abs(determinant) > absDeterminantThreshold;\n    if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant;\n  }\n};\n\n/****************************\n*** Size 2 implementation ***\n****************************/\n\ntemplate<typename MatrixType, typename ResultType>\nEIGEN_DEVICE_FUNC \ninline void compute_inverse_size2_helper(\n    const MatrixType& matrix, const typename ResultType::Scalar& invdet,\n    ResultType& result)\n{\n  typename ResultType::Scalar temp = matrix.coeff(0,0);\n  result.coeffRef(0,0) =  matrix.coeff(1,1) * invdet;\n  result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;\n  result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;\n  result.coeffRef(1,1) =  temp * invdet;\n}\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse<MatrixType, ResultType, 2>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(const MatrixType& matrix, ResultType& result)\n  {\n    typedef typename ResultType::Scalar Scalar;\n    const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant();\n    compute_inverse_size2_helper(matrix, invdet, result);\n  }\n};\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(\n    const MatrixType& matrix,\n    const typename MatrixType::RealScalar& absDeterminantThreshold,\n    ResultType& inverse,\n    typename ResultType::Scalar& determinant,\n    bool& invertible\n  )\n  {\n    using std::abs;\n    typedef typename ResultType::Scalar Scalar;\n    determinant = matrix.determinant();\n    invertible = abs(determinant) > absDeterminantThreshold;\n    if(!invertible) return;\n    const Scalar invdet = Scalar(1) / determinant;\n    compute_inverse_size2_helper(matrix, invdet, inverse);\n  }\n};\n\n/****************************\n*** Size 3 implementation ***\n****************************/\n\ntemplate<typename MatrixType, int i, int j>\nEIGEN_DEVICE_FUNC \ninline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)\n{\n  enum {\n    i1 = (i+1) % 3,\n    i2 = (i+2) % 3,\n    j1 = (j+1) % 3,\n    j2 = (j+2) % 3\n  };\n  return m.coeff(i1, j1) * m.coeff(i2, j2)\n       - m.coeff(i1, j2) * m.coeff(i2, j1);\n}\n\ntemplate<typename MatrixType, typename ResultType>\nEIGEN_DEVICE_FUNC\ninline void compute_inverse_size3_helper(\n    const MatrixType& matrix,\n    const typename ResultType::Scalar& invdet,\n    const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,\n    ResultType& result)\n{\n  // Compute cofactors in a way that avoids aliasing issues.\n  typedef typename ResultType::Scalar Scalar;\n  const Scalar c01 = cofactor_3x3<MatrixType,0,1>(matrix) * invdet;\n  const Scalar c11 = cofactor_3x3<MatrixType,1,1>(matrix) * invdet;\n  const Scalar c02 = cofactor_3x3<MatrixType,0,2>(matrix) * invdet;\n  result.coeffRef(1,2) =  cofactor_3x3<MatrixType,2,1>(matrix) * invdet;\n  result.coeffRef(2,1) =  cofactor_3x3<MatrixType,1,2>(matrix) * invdet;\n  result.coeffRef(2,2) =  cofactor_3x3<MatrixType,2,2>(matrix) * invdet;\n  result.coeffRef(1,0) =  c01;\n  result.coeffRef(1,1) =  c11;\n  result.coeffRef(2,0) =  c02;  \n  result.row(0) = cofactors_col0 * invdet;\n}\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse<MatrixType, ResultType, 3>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(const MatrixType& matrix, ResultType& result)\n  {\n    typedef typename ResultType::Scalar Scalar;\n    Matrix<typename MatrixType::Scalar,3,1> cofactors_col0;\n    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);\n    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);\n    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);\n    const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();\n    const Scalar invdet = Scalar(1) / det;\n    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);\n  }\n};\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(\n    const MatrixType& matrix,\n    const typename MatrixType::RealScalar& absDeterminantThreshold,\n    ResultType& inverse,\n    typename ResultType::Scalar& determinant,\n    bool& invertible\n  )\n  {\n    typedef typename ResultType::Scalar Scalar;\n    Matrix<Scalar,3,1> cofactors_col0;\n    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);\n    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);\n    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);\n    determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();\n    invertible = Eigen::numext::abs(determinant) > absDeterminantThreshold;\n    if(!invertible) return;\n    const Scalar invdet = Scalar(1) / determinant;\n    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);\n  }\n};\n\n/****************************\n*** Size 4 implementation ***\n****************************/\n\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC \ninline const typename Derived::Scalar general_det3_helper\n(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)\n{\n  return matrix.coeff(i1,j1)\n         * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2));\n}\n\ntemplate<typename MatrixType, int i, int j>\nEIGEN_DEVICE_FUNC \ninline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)\n{\n  enum {\n    i1 = (i+1) % 4,\n    i2 = (i+2) % 4,\n    i3 = (i+3) % 4,\n    j1 = (j+1) % 4,\n    j2 = (j+2) % 4,\n    j3 = (j+3) % 4\n  };\n  return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3)\n       + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3)\n       + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3);\n}\n\ntemplate<int Arch, typename Scalar, typename MatrixType, typename ResultType>\nstruct compute_inverse_size4\n{\n  EIGEN_DEVICE_FUNC\n  static void run(const MatrixType& matrix, ResultType& result)\n  {\n    result.coeffRef(0,0) =  cofactor_4x4<MatrixType,0,0>(matrix);\n    result.coeffRef(1,0) = -cofactor_4x4<MatrixType,0,1>(matrix);\n    result.coeffRef(2,0) =  cofactor_4x4<MatrixType,0,2>(matrix);\n    result.coeffRef(3,0) = -cofactor_4x4<MatrixType,0,3>(matrix);\n    result.coeffRef(0,2) =  cofactor_4x4<MatrixType,2,0>(matrix);\n    result.coeffRef(1,2) = -cofactor_4x4<MatrixType,2,1>(matrix);\n    result.coeffRef(2,2) =  cofactor_4x4<MatrixType,2,2>(matrix);\n    result.coeffRef(3,2) = -cofactor_4x4<MatrixType,2,3>(matrix);\n    result.coeffRef(0,1) = -cofactor_4x4<MatrixType,1,0>(matrix);\n    result.coeffRef(1,1) =  cofactor_4x4<MatrixType,1,1>(matrix);\n    result.coeffRef(2,1) = -cofactor_4x4<MatrixType,1,2>(matrix);\n    result.coeffRef(3,1) =  cofactor_4x4<MatrixType,1,3>(matrix);\n    result.coeffRef(0,3) = -cofactor_4x4<MatrixType,3,0>(matrix);\n    result.coeffRef(1,3) =  cofactor_4x4<MatrixType,3,1>(matrix);\n    result.coeffRef(2,3) = -cofactor_4x4<MatrixType,3,2>(matrix);\n    result.coeffRef(3,3) =  cofactor_4x4<MatrixType,3,3>(matrix);\n    result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum();\n  }\n};\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse<MatrixType, ResultType, 4>\n : compute_inverse_size4<Architecture::Target, typename MatrixType::Scalar,\n                            MatrixType, ResultType>\n{\n};\n\ntemplate<typename MatrixType, typename ResultType>\nstruct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>\n{\n  EIGEN_DEVICE_FUNC\n  static inline void run(\n    const MatrixType& matrix,\n    const typename MatrixType::RealScalar& absDeterminantThreshold,\n    ResultType& inverse,\n    typename ResultType::Scalar& determinant,\n    bool& invertible\n  )\n  {\n    using std::abs;\n    determinant = matrix.determinant();\n    invertible = abs(determinant) > absDeterminantThreshold;\n    if(invertible && extract_data(matrix) != extract_data(inverse)) {\n      compute_inverse<MatrixType, ResultType>::run(matrix, inverse);\n    }\n    else if(invertible) {\n      MatrixType matrix_t = matrix;\n      compute_inverse<MatrixType, ResultType>::run(matrix_t, inverse);\n    }\n  }\n};\n\n/*************************\n*** MatrixBase methods ***\n*************************/\n\n} // end namespace internal\n\nnamespace internal {\n\n// Specialization for \"dense = dense_xpr.inverse()\"\ntemplate<typename DstXprType, typename XprType>\nstruct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>\n{\n  typedef Inverse<XprType> SrcXprType;\n  EIGEN_DEVICE_FUNC\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n    \n    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);\n    EIGEN_ONLY_USED_FOR_DEBUG(Size);\n    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))\n              && \"Aliasing problem detected in inverse(), you need to do inverse().eval() here.\");\n\n    typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type  ActualXprType;\n    typedef typename internal::remove_all<ActualXprType>::type                        ActualXprTypeCleanded;\n    \n    ActualXprType actual_xpr(src.nestedExpression());\n    \n    compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);\n  }\n};\n\n  \n} // end namespace internal\n\n/** \\lu_module\n  *\n  * \\returns the matrix inverse of this matrix.\n  *\n  * For small fixed sizes up to 4x4, this method uses cofactors.\n  * In the general case, this method uses class PartialPivLU.\n  *\n  * \\note This matrix must be invertible, otherwise the result is undefined. If you need an\n  * invertibility check, do the following:\n  * \\li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck().\n  * \\li for the general case, use class FullPivLU.\n  *\n  * Example: \\include MatrixBase_inverse.cpp\n  * Output: \\verbinclude MatrixBase_inverse.out\n  *\n  * \\sa computeInverseAndDetWithCheck()\n  */\ntemplate<typename Derived>\nEIGEN_DEVICE_FUNC\ninline const Inverse<Derived> MatrixBase<Derived>::inverse() const\n{\n  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)\n  eigen_assert(rows() == cols());\n  return Inverse<Derived>(derived());\n}\n\n/** \\lu_module\n  *\n  * Computation of matrix inverse and determinant, with invertibility check.\n  *\n  * This is only for fixed-size square matrices of size up to 4x4.\n  *\n  * Notice that it will trigger a copy of input matrix when trying to do the inverse in place.\n  *\n  * \\param inverse Reference to the matrix in which to store the inverse.\n  * \\param determinant Reference to the variable in which to store the determinant.\n  * \\param invertible Reference to the bool variable in which to store whether the matrix is invertible.\n  * \\param absDeterminantThreshold Optional parameter controlling the invertibility check.\n  *                                The matrix will be declared invertible if the absolute value of its\n  *                                determinant is greater than this threshold.\n  *\n  * Example: \\include MatrixBase_computeInverseAndDetWithCheck.cpp\n  * Output: \\verbinclude MatrixBase_computeInverseAndDetWithCheck.out\n  *\n  * \\sa inverse(), computeInverseWithCheck()\n  */\ntemplate<typename Derived>\ntemplate<typename ResultType>\ninline void MatrixBase<Derived>::computeInverseAndDetWithCheck(\n    ResultType& inverse,\n    typename ResultType::Scalar& determinant,\n    bool& invertible,\n    const RealScalar& absDeterminantThreshold\n  ) const\n{\n  // i'd love to put some static assertions there, but SFINAE means that they have no effect...\n  eigen_assert(rows() == cols());\n  // for 2x2, it's worth giving a chance to avoid evaluating.\n  // for larger sizes, evaluating has negligible cost and limits code size.\n  typedef typename internal::conditional<\n    RowsAtCompileTime == 2,\n    typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,\n    PlainObject\n  >::type MatrixType;\n  internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run\n    (derived(), absDeterminantThreshold, inverse, determinant, invertible);\n}\n\n/** \\lu_module\n  *\n  * Computation of matrix inverse, with invertibility check.\n  *\n  * This is only for fixed-size square matrices of size up to 4x4.\n  *\n  * Notice that it will trigger a copy of input matrix when trying to do the inverse in place.\n  *\n  * \\param inverse Reference to the matrix in which to store the inverse.\n  * \\param invertible Reference to the bool variable in which to store whether the matrix is invertible.\n  * \\param absDeterminantThreshold Optional parameter controlling the invertibility check.\n  *                                The matrix will be declared invertible if the absolute value of its\n  *                                determinant is greater than this threshold.\n  *\n  * Example: \\include MatrixBase_computeInverseWithCheck.cpp\n  * Output: \\verbinclude MatrixBase_computeInverseWithCheck.out\n  *\n  * \\sa inverse(), computeInverseAndDetWithCheck()\n  */\ntemplate<typename Derived>\ntemplate<typename ResultType>\ninline void MatrixBase<Derived>::computeInverseWithCheck(\n    ResultType& inverse,\n    bool& invertible,\n    const RealScalar& absDeterminantThreshold\n  ) const\n{\n  Scalar determinant;\n  // i'd love to put some static assertions there, but SFINAE means that they have no effect...\n  eigen_assert(rows() == cols());\n  computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_INVERSE_IMPL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/LU/PartialPivLU.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARTIALLU_H\n#define EIGEN_PARTIALLU_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >\n : traits<_MatrixType>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  typedef traits<_MatrixType> BaseTraits;\n  enum {\n    Flags = BaseTraits::Flags & RowMajorBit,\n    CoeffReadCost = Dynamic\n  };\n};\n\ntemplate<typename T,typename Derived>\nstruct enable_if_ref;\n// {\n//   typedef Derived type;\n// };\n\ntemplate<typename T,typename Derived>\nstruct enable_if_ref<Ref<T>,Derived> {\n  typedef Derived type;\n};\n\n} // end namespace internal\n\n/** \\ingroup LU_Module\n  *\n  * \\class PartialPivLU\n  *\n  * \\brief LU decomposition of a matrix with partial pivoting, and related features\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the LU decomposition\n  *\n  * This class represents a LU decomposition of a \\b square \\b invertible matrix, with partial pivoting: the matrix A\n  * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P\n  * is a permutation matrix.\n  *\n  * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible\n  * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class\n  * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the\n  * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices.\n  *\n  * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided\n  * by class FullPivLU.\n  *\n  * This is \\b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,\n  * such as rank computation. If you need these features, use class FullPivLU.\n  *\n  * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses\n  * in the general case.\n  * On the other hand, it is \\b not suitable to determine whether a given matrix is invertible.\n  *\n  * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  *\n  * \\sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU\n  */\ntemplate<typename _MatrixType> class PartialPivLU\n  : public SolverBase<PartialPivLU<_MatrixType> >\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<PartialPivLU> Base;\n    friend class SolverBase<PartialPivLU>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)\n    enum {\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;\n    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;\n    typedef typename MatrixType::PlainObject PlainObject;\n\n    /**\n      * \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via PartialPivLU::compute(const MatrixType&).\n      */\n    PartialPivLU();\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa PartialPivLU()\n      */\n    explicit PartialPivLU(Index size);\n\n    /** Constructor.\n      *\n      * \\param matrix the matrix of which to compute the LU decomposition.\n      *\n      * \\warning The matrix should have full rank (e.g. if it's square, it should be invertible).\n      * If you need to deal with non-full rank, use class FullPivLU instead.\n      */\n    template<typename InputType>\n    explicit PartialPivLU(const EigenBase<InputType>& matrix);\n\n    /** Constructor for \\link InplaceDecomposition inplace decomposition \\endlink\n      *\n      * \\param matrix the matrix of which to compute the LU decomposition.\n      *\n      * \\warning The matrix should have full rank (e.g. if it's square, it should be invertible).\n      * If you need to deal with non-full rank, use class FullPivLU instead.\n      */\n    template<typename InputType>\n    explicit PartialPivLU(EigenBase<InputType>& matrix);\n\n    template<typename InputType>\n    PartialPivLU& compute(const EigenBase<InputType>& matrix) {\n      m_lu = matrix.derived();\n      compute();\n      return *this;\n    }\n\n    /** \\returns the LU decomposition matrix: the upper-triangular part is U, the\n      * unit-lower-triangular part is L (at least for square matrices; in the non-square\n      * case, special care is needed, see the documentation of class FullPivLU).\n      *\n      * \\sa matrixL(), matrixU()\n      */\n    inline const MatrixType& matrixLU() const\n    {\n      eigen_assert(m_isInitialized && \"PartialPivLU is not initialized.\");\n      return m_lu;\n    }\n\n    /** \\returns the permutation matrix P.\n      */\n    inline const PermutationType& permutationP() const\n    {\n      eigen_assert(m_isInitialized && \"PartialPivLU is not initialized.\");\n      return m_p;\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** This method returns the solution x to the equation Ax=b, where A is the matrix of which\n      * *this is the LU decomposition.\n      *\n      * \\param b the right-hand-side of the equation to solve. Can be a vector or a matrix,\n      *          the only requirement in order for the equation to make sense is that\n      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.\n      *\n      * \\returns the solution.\n      *\n      * Example: \\include PartialPivLU_solve.cpp\n      * Output: \\verbinclude PartialPivLU_solve.out\n      *\n      * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution\n      * theoretically exists and is unique regardless of b.\n      *\n      * \\sa TriangularView::solve(), inverse(), computeInverse()\n      */\n    template<typename Rhs>\n    inline const Solve<PartialPivLU, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    /** \\returns an estimate of the reciprocal condition number of the matrix of which \\c *this is\n        the LU decomposition.\n      */\n    inline RealScalar rcond() const\n    {\n      eigen_assert(m_isInitialized && \"PartialPivLU is not initialized.\");\n      return internal::rcond_estimate_helper(m_l1_norm, *this);\n    }\n\n    /** \\returns the inverse of the matrix of which *this is the LU decomposition.\n      *\n      * \\warning The matrix being decomposed here is assumed to be invertible. If you need to check for\n      *          invertibility, use class FullPivLU instead.\n      *\n      * \\sa MatrixBase::inverse(), LU::inverse()\n      */\n    inline const Inverse<PartialPivLU> inverse() const\n    {\n      eigen_assert(m_isInitialized && \"PartialPivLU is not initialized.\");\n      return Inverse<PartialPivLU>(*this);\n    }\n\n    /** \\returns the determinant of the matrix of which\n      * *this is the LU decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the LU decomposition has already been computed.\n      *\n      * \\note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers\n      *       optimized paths.\n      *\n      * \\warning a determinant can be very big or small, so for matrices\n      * of large enough dimension, there is a risk of overflow/underflow.\n      *\n      * \\sa MatrixBase::determinant()\n      */\n    Scalar determinant() const;\n\n    MatrixType reconstructedMatrix() const;\n\n    EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); }\n    EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    EIGEN_DEVICE_FUNC\n    void _solve_impl(const RhsType &rhs, DstType &dst) const {\n     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.\n      * So we proceed as follows:\n      * Step 1: compute c = Pb.\n      * Step 2: replace c by the solution x to Lx = c.\n      * Step 3: replace c by the solution x to Ux = c.\n      */\n\n      // Step 1\n      dst = permutationP() * rhs;\n\n      // Step 2\n      m_lu.template triangularView<UnitLower>().solveInPlace(dst);\n\n      // Step 3\n      m_lu.template triangularView<Upper>().solveInPlace(dst);\n    }\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    EIGEN_DEVICE_FUNC\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const {\n     /* The decomposition PA = LU can be rewritten as A^T = U^T L^T P.\n      * So we proceed as follows:\n      * Step 1: compute c as the solution to L^T c = b\n      * Step 2: replace c by the solution x to U^T x = c.\n      * Step 3: update  c = P^-1 c.\n      */\n\n      eigen_assert(rhs.rows() == m_lu.cols());\n\n      // Step 1\n      dst = m_lu.template triangularView<Upper>().transpose()\n                .template conjugateIf<Conjugate>().solve(rhs);\n      // Step 2\n      m_lu.template triangularView<UnitLower>().transpose()\n          .template conjugateIf<Conjugate>().solveInPlace(dst);\n      // Step 3\n      dst = permutationP().transpose() * dst;\n    }\n    #endif\n\n  protected:\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    void compute();\n\n    MatrixType m_lu;\n    PermutationType m_p;\n    TranspositionType m_rowsTranspositions;\n    RealScalar m_l1_norm;\n    signed char m_det_p;\n    bool m_isInitialized;\n};\n\ntemplate<typename MatrixType>\nPartialPivLU<MatrixType>::PartialPivLU()\n  : m_lu(),\n    m_p(),\n    m_rowsTranspositions(),\n    m_l1_norm(0),\n    m_det_p(0),\n    m_isInitialized(false)\n{\n}\n\ntemplate<typename MatrixType>\nPartialPivLU<MatrixType>::PartialPivLU(Index size)\n  : m_lu(size, size),\n    m_p(size),\n    m_rowsTranspositions(size),\n    m_l1_norm(0),\n    m_det_p(0),\n    m_isInitialized(false)\n{\n}\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nPartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)\n  : m_lu(matrix.rows(),matrix.cols()),\n    m_p(matrix.rows()),\n    m_rowsTranspositions(matrix.rows()),\n    m_l1_norm(0),\n    m_det_p(0),\n    m_isInitialized(false)\n{\n  compute(matrix.derived());\n}\n\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nPartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)\n  : m_lu(matrix.derived()),\n    m_p(matrix.rows()),\n    m_rowsTranspositions(matrix.rows()),\n    m_l1_norm(0),\n    m_det_p(0),\n    m_isInitialized(false)\n{\n  compute();\n}\n\nnamespace internal {\n\n/** \\internal This is the blocked version of fullpivlu_unblocked() */\ntemplate<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic>\nstruct partial_lu_impl\n{\n  static const int UnBlockedBound = 16;\n  static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;\n  static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;\n  // Remaining rows and columns at compile-time:\n  static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic;\n  static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic;\n  typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;\n  typedef Ref<MatrixType> MatrixTypeRef;\n  typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;\n  typedef typename MatrixType::RealScalar RealScalar;\n\n  /** \\internal performs the LU decomposition in-place of the matrix \\a lu\n    * using an unblocked algorithm.\n    *\n    * In addition, this function returns the row transpositions in the\n    * vector \\a row_transpositions which must have a size equal to the number\n    * of columns of the matrix \\a lu, and an integer \\a nb_transpositions\n    * which returns the actual number of transpositions.\n    *\n    * \\returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.\n    */\n  static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)\n  {\n    typedef scalar_score_coeff_op<Scalar> Scoring;\n    typedef typename Scoring::result_type Score;\n    const Index rows = lu.rows();\n    const Index cols = lu.cols();\n    const Index size = (std::min)(rows,cols);\n    // For small compile-time matrices it is worth processing the last row separately:\n    //  speedup: +100% for 2x2, +10% for others.\n    const Index endk = UnBlockedAtCompileTime ? size-1 : size;\n    nb_transpositions = 0;\n    Index first_zero_pivot = -1;\n    for(Index k = 0; k < endk; ++k)\n    {\n      int rrows = internal::convert_index<int>(rows-k-1);\n      int rcols = internal::convert_index<int>(cols-k-1);\n\n      Index row_of_biggest_in_col;\n      Score biggest_in_corner\n        = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);\n      row_of_biggest_in_col += k;\n\n      row_transpositions[k] = PivIndex(row_of_biggest_in_col);\n\n      if(biggest_in_corner != Score(0))\n      {\n        if(k != row_of_biggest_in_col)\n        {\n          lu.row(k).swap(lu.row(row_of_biggest_in_col));\n          ++nb_transpositions;\n        }\n\n        lu.col(k).tail(fix<RRows>(rrows)) /= lu.coeff(k,k);\n      }\n      else if(first_zero_pivot==-1)\n      {\n        // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,\n        // and continue the factorization such we still have A = PLU\n        first_zero_pivot = k;\n      }\n\n      if(k<rows-1)\n        lu.bottomRightCorner(fix<RRows>(rrows),fix<RCols>(rcols)).noalias() -= lu.col(k).tail(fix<RRows>(rrows)) * lu.row(k).tail(fix<RCols>(rcols));\n    }\n\n    // special handling of the last entry\n    if(UnBlockedAtCompileTime)\n    {\n      Index k = endk;\n      row_transpositions[k] = PivIndex(k);\n      if (Scoring()(lu(k, k)) == Score(0) && first_zero_pivot == -1)\n        first_zero_pivot = k;\n    }\n\n    return first_zero_pivot;\n  }\n\n  /** \\internal performs the LU decomposition in-place of the matrix represented\n    * by the variables \\a rows, \\a cols, \\a lu_data, and \\a lu_stride using a\n    * recursive, blocked algorithm.\n    *\n    * In addition, this function returns the row transpositions in the\n    * vector \\a row_transpositions which must have a size equal to the number\n    * of columns of the matrix \\a lu, and an integer \\a nb_transpositions\n    * which returns the actual number of transpositions.\n    *\n    * \\returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.\n    *\n    * \\note This very low level interface using pointers, etc. is to:\n    *   1 - reduce the number of instantiations to the strict minimum\n    *   2 - avoid infinite recursion of the instantiations with Block<Block<Block<...> > >\n    */\n  static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)\n  {\n    MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride));\n\n    const Index size = (std::min)(rows,cols);\n\n    // if the matrix is too small, no blocking:\n    if(UnBlockedAtCompileTime || size<=UnBlockedBound)\n    {\n      return unblocked_lu(lu, row_transpositions, nb_transpositions);\n    }\n\n    // automatically adjust the number of subdivisions to the size\n    // of the matrix so that there is enough sub blocks:\n    Index blockSize;\n    {\n      blockSize = size/8;\n      blockSize = (blockSize/16)*16;\n      blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize);\n    }\n\n    nb_transpositions = 0;\n    Index first_zero_pivot = -1;\n    for(Index k = 0; k < size; k+=blockSize)\n    {\n      Index bs = (std::min)(size-k,blockSize); // actual size of the block\n      Index trows = rows - k - bs; // trailing rows\n      Index tsize = size - k - bs; // trailing size\n\n      // partition the matrix:\n      //                          A00 | A01 | A02\n      // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12\n      //                          A20 | A21 | A22\n      BlockType A_0 = lu.block(0,0,rows,k);\n      BlockType A_2 = lu.block(0,k+bs,rows,tsize);\n      BlockType A11 = lu.block(k,k,bs,bs);\n      BlockType A12 = lu.block(k,k+bs,bs,tsize);\n      BlockType A21 = lu.block(k+bs,k,trows,bs);\n      BlockType A22 = lu.block(k+bs,k+bs,trows,tsize);\n\n      PivIndex nb_transpositions_in_panel;\n      // recursively call the blocked LU algorithm on [A11^T A21^T]^T\n      // with a very small blocking size:\n      Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,\n                   row_transpositions+k, nb_transpositions_in_panel, 16);\n      if(ret>=0 && first_zero_pivot==-1)\n        first_zero_pivot = k+ret;\n\n      nb_transpositions += nb_transpositions_in_panel;\n      // update permutations and apply them to A_0\n      for(Index i=k; i<k+bs; ++i)\n      {\n        Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));\n        A_0.row(i).swap(A_0.row(piv));\n      }\n\n      if(trows)\n      {\n        // apply permutations to A_2\n        for(Index i=k;i<k+bs; ++i)\n          A_2.row(i).swap(A_2.row(row_transpositions[i]));\n\n        // A12 = A11^-1 A12\n        A11.template triangularView<UnitLower>().solveInPlace(A12);\n\n        A22.noalias() -= A21 * A12;\n      }\n    }\n    return first_zero_pivot;\n  }\n};\n\n/** \\internal performs the LU decomposition with partial pivoting in-place.\n  */\ntemplate<typename MatrixType, typename TranspositionType>\nvoid partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions)\n{\n  // Special-case of zero matrix.\n  if (lu.rows() == 0 || lu.cols() == 0) {\n    nb_transpositions = 0;\n    return;\n  }\n  eigen_assert(lu.cols() == row_transpositions.size());\n  eigen_assert(row_transpositions.size() < 2 || (&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);\n\n  partial_lu_impl\n    < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor,\n      typename TranspositionType::StorageIndex,\n      EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)>\n    ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);\n}\n\n} // end namespace internal\n\ntemplate<typename MatrixType>\nvoid PartialPivLU<MatrixType>::compute()\n{\n  check_template_parameters();\n\n  // the row permutation is stored as int indices, so just to be sure:\n  eigen_assert(m_lu.rows()<NumTraits<int>::highest());\n\n  if(m_lu.cols()>0)\n    m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();\n  else\n    m_l1_norm = RealScalar(0);\n\n  eigen_assert(m_lu.rows() == m_lu.cols() && \"PartialPivLU is only for square (and moreover invertible) matrices\");\n  const Index size = m_lu.rows();\n\n  m_rowsTranspositions.resize(size);\n\n  typename TranspositionType::StorageIndex nb_transpositions;\n  internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);\n  m_det_p = (nb_transpositions%2) ? -1 : 1;\n\n  m_p = m_rowsTranspositions;\n\n  m_isInitialized = true;\n}\n\ntemplate<typename MatrixType>\ntypename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const\n{\n  eigen_assert(m_isInitialized && \"PartialPivLU is not initialized.\");\n  return Scalar(m_det_p) * m_lu.diagonal().prod();\n}\n\n/** \\returns the matrix represented by the decomposition,\n * i.e., it returns the product: P^{-1} L U.\n * This function is provided for debug purpose. */\ntemplate<typename MatrixType>\nMatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const\n{\n  eigen_assert(m_isInitialized && \"LU is not initialized.\");\n  // LU\n  MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()\n                 * m_lu.template triangularView<Upper>();\n\n  // P^{-1}(LU)\n  res = m_p.inverse() * res;\n\n  return res;\n}\n\n/***** Implementation details *****************************************************/\n\nnamespace internal {\n\n/***** Implementation of inverse() *****************************************************/\ntemplate<typename DstXprType, typename MatrixType>\nstruct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>\n{\n  typedef PartialPivLU<MatrixType> LuType;\n  typedef Inverse<LuType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)\n  {\n    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));\n  }\n};\n} // end namespace internal\n\n/******** MatrixBase methods *******/\n\n/** \\lu_module\n  *\n  * \\return the partial-pivoting LU decomposition of \\c *this.\n  *\n  * \\sa class PartialPivLU\n  */\ntemplate<typename Derived>\ninline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::partialPivLu() const\n{\n  return PartialPivLU<PlainObject>(eval());\n}\n\n/** \\lu_module\n  *\n  * Synonym of partialPivLu().\n  *\n  * \\return the partial-pivoting LU decomposition of \\c *this.\n  *\n  * \\sa class PartialPivLU\n  */\ntemplate<typename Derived>\ninline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::lu() const\n{\n  return PartialPivLU<PlainObject>(eval());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARTIALLU_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/LU/PartialPivLU_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *     LU decomposition with partial pivoting based on LAPACKE_?getrf function.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_PARTIALLU_LAPACK_H\n#define EIGEN_PARTIALLU_LAPACK_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \\\ntemplate<int StorageOrder> \\\nstruct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \\\n{ \\\n  /* \\internal performs the LU decomposition in-place of the matrix represented */ \\\n  static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \\\n  { \\\n    EIGEN_UNUSED_VARIABLE(maxBlockSize);\\\n    lapack_int matrix_order, first_zero_pivot; \\\n    lapack_int m, n, lda, *ipiv, info; \\\n    EIGTYPE* a; \\\n/* Set up parameters for ?getrf */ \\\n    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \\\n    lda = convert_index<lapack_int>(luStride); \\\n    a = lu_data; \\\n    ipiv = row_transpositions; \\\n    m = convert_index<lapack_int>(rows); \\\n    n = convert_index<lapack_int>(cols); \\\n    nb_transpositions = 0; \\\n\\\n    info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \\\n\\\n    for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \\\n\\\n    eigen_assert(info >= 0); \\\n/* something should be done with nb_transpositions */ \\\n\\\n    first_zero_pivot = info; \\\n    return first_zero_pivot; \\\n  } \\\n};\n\nEIGEN_LAPACKE_LU_PARTPIV(double, double, d)\nEIGEN_LAPACKE_LU_PARTPIV(float, float, s)\nEIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)\nEIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float,  c)\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARTIALLU_LAPACK_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/LU/arch/InverseSize4.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2001 Intel Corporation\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n//\n// The algorithm below is a reimplementation of former \\src\\LU\\Inverse_SSE.h using PacketMath.\n// inv(M) = M#/|M|, where inv(M), M# and |M| denote the inverse of M,\n// adjugate of M and determinant of M respectively. M# is computed block-wise\n// using specific formulae. For proof, see:\n// https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html\n// Variable names are adopted from \\src\\LU\\Inverse_SSE.h.\n//\n// The SSE code for the 4x4 float and double matrix inverse in former (deprecated) \\src\\LU\\Inverse_SSE.h\n// comes from the following Intel's library:\n// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/\n//\n// Here is the respective copyright and license statement:\n//\n//   Copyright (c) 2001 Intel Corporation.\n//\n// Permition is granted to use, copy, distribute and prepare derivative works\n// of this library for any purpose and without fee, provided, that the above\n// copyright notice and this statement appear in all copies.\n// Intel makes no representations about the suitability of this software for\n// any purpose, and specifically disclaims all warranties.\n// See LEGAL.TXT for all the legal information.\n//\n// TODO: Unify implementations of different data types (i.e. float and double).\n#ifndef EIGEN_INVERSE_SIZE_4_H\n#define EIGEN_INVERSE_SIZE_4_H\n\nnamespace Eigen\n{\nnamespace internal\n{\ntemplate <typename MatrixType, typename ResultType>\nstruct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType>\n{\n  enum\n  {\n    MatrixAlignment = traits<MatrixType>::Alignment,\n    ResultAlignment = traits<ResultType>::Alignment,\n    StorageOrdersMatch = (MatrixType::Flags & RowMajorBit) == (ResultType::Flags & RowMajorBit)\n  };\n  typedef typename conditional<(MatrixType::Flags & LinearAccessBit), MatrixType const &, typename MatrixType::PlainObject>::type ActualMatrixType;\n\n  static void run(const MatrixType &mat, ResultType &result)\n  {\n    ActualMatrixType matrix(mat);\n\n    const float* data = matrix.data();\n    const Index stride = matrix.innerStride();\n    Packet4f _L1 = ploadt<Packet4f,MatrixAlignment>(data);\n    Packet4f _L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4);\n    Packet4f _L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8);\n    Packet4f _L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12);\n\n    // Four 2x2 sub-matrices of the input matrix\n    // input = [[A, B],\n    //          [C, D]]\n    Packet4f A, B, C, D;\n\n    if (!StorageOrdersMatch)\n    {\n      A = vec4f_unpacklo(_L1, _L2);\n      B = vec4f_unpacklo(_L3, _L4);\n      C = vec4f_unpackhi(_L1, _L2);\n      D = vec4f_unpackhi(_L3, _L4);\n    }\n    else\n    {\n      A = vec4f_movelh(_L1, _L2);\n      B = vec4f_movehl(_L2, _L1);\n      C = vec4f_movelh(_L3, _L4);\n      D = vec4f_movehl(_L4, _L3);\n    }\n\n    Packet4f AB, DC;\n\n    // AB = A# * B, where A# denotes the adjugate of A, and * denotes matrix product.\n    AB = pmul(vec4f_swizzle2(A, A, 3, 3, 0, 0), B);\n    AB = psub(AB, pmul(vec4f_swizzle2(A, A, 1, 1, 2, 2), vec4f_swizzle2(B, B, 2, 3, 0, 1)));\n\n    // DC = D#*C\n    DC = pmul(vec4f_swizzle2(D, D, 3, 3, 0, 0), C);\n    DC = psub(DC, pmul(vec4f_swizzle2(D, D, 1, 1, 2, 2), vec4f_swizzle2(C, C, 2, 3, 0, 1)));\n\n    // determinants of the sub-matrices\n    Packet4f dA, dB, dC, dD;\n\n    dA = pmul(vec4f_swizzle2(A, A, 3, 3, 1, 1), A);\n    dA = psub(dA, vec4f_movehl(dA, dA));\n\n    dB = pmul(vec4f_swizzle2(B, B, 3, 3, 1, 1), B);\n    dB = psub(dB, vec4f_movehl(dB, dB));\n\n    dC = pmul(vec4f_swizzle2(C, C, 3, 3, 1, 1), C);\n    dC = psub(dC, vec4f_movehl(dC, dC));\n\n    dD = pmul(vec4f_swizzle2(D, D, 3, 3, 1, 1), D);\n    dD = psub(dD, vec4f_movehl(dD, dD));\n\n    Packet4f d, d1, d2;\n\n    d = pmul(vec4f_swizzle2(DC, DC, 0, 2, 1, 3), AB);\n    d = padd(d, vec4f_movehl(d, d));\n    d = padd(d, vec4f_swizzle2(d, d, 1, 0, 0, 0));\n    d1 = pmul(dA, dD);\n    d2 = pmul(dB, dC);\n\n    // determinant of the input matrix, det = |A||D| + |B||C| - trace(A#*B*D#*C)\n    Packet4f det = vec4f_duplane(psub(padd(d1, d2), d), 0);\n\n    // reciprocal of the determinant of the input matrix, rd = 1/det\n    Packet4f rd = pdiv(pset1<Packet4f>(1.0f), det);\n\n    // Four sub-matrices of the inverse\n    Packet4f iA, iB, iC, iD;\n\n    // iD = D*|A| - C*A#*B\n    iD = pmul(vec4f_swizzle2(C, C, 0, 0, 2, 2), vec4f_movelh(AB, AB));\n    iD = padd(iD, pmul(vec4f_swizzle2(C, C, 1, 1, 3, 3), vec4f_movehl(AB, AB)));\n    iD = psub(pmul(D, vec4f_duplane(dA, 0)), iD);\n\n    // iA = A*|D| - B*D#*C\n    iA = pmul(vec4f_swizzle2(B, B, 0, 0, 2, 2), vec4f_movelh(DC, DC));\n    iA = padd(iA, pmul(vec4f_swizzle2(B, B, 1, 1, 3, 3), vec4f_movehl(DC, DC)));\n    iA = psub(pmul(A, vec4f_duplane(dD, 0)), iA);\n\n    // iB = C*|B| - D * (A#B)# = C*|B| - D*B#*A\n    iB = pmul(D, vec4f_swizzle2(AB, AB, 3, 0, 3, 0));\n    iB = psub(iB, pmul(vec4f_swizzle2(D, D, 1, 0, 3, 2), vec4f_swizzle2(AB, AB, 2, 1, 2, 1)));\n    iB = psub(pmul(C, vec4f_duplane(dB, 0)), iB);\n\n    // iC = B*|C| - A * (D#C)# = B*|C| - A*C#*D\n    iC = pmul(A, vec4f_swizzle2(DC, DC, 3, 0, 3, 0));\n    iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));\n    iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);\n\n    const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};\n    const Packet4f p4f_sign_PNNP = ploadu<Packet4f>(sign_mask);\n    rd = pxor(rd, p4f_sign_PNNP);\n    iA = pmul(iA, rd);\n    iB = pmul(iB, rd);\n    iC = pmul(iC, rd);\n    iD = pmul(iD, rd);\n\n    Index res_stride = result.outerStride();\n    float *res = result.data();\n\n    pstoret<float, Packet4f, ResultAlignment>(res + 0, vec4f_swizzle2(iA, iB, 3, 1, 3, 1));\n    pstoret<float, Packet4f, ResultAlignment>(res + res_stride, vec4f_swizzle2(iA, iB, 2, 0, 2, 0));\n    pstoret<float, Packet4f, ResultAlignment>(res + 2 * res_stride, vec4f_swizzle2(iC, iD, 3, 1, 3, 1));\n    pstoret<float, Packet4f, ResultAlignment>(res + 3 * res_stride, vec4f_swizzle2(iC, iD, 2, 0, 2, 0));\n  }\n};\n\n#if !(defined EIGEN_VECTORIZE_NEON && !(EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG))\n// same algorithm as above, except that each operand is split into\n// halves for two registers to hold.\ntemplate <typename MatrixType, typename ResultType>\nstruct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultType>\n{\n  enum\n  {\n    MatrixAlignment = traits<MatrixType>::Alignment,\n    ResultAlignment = traits<ResultType>::Alignment,\n    StorageOrdersMatch = (MatrixType::Flags & RowMajorBit) == (ResultType::Flags & RowMajorBit)\n  };\n  typedef typename conditional<(MatrixType::Flags & LinearAccessBit),\n                               MatrixType const &,\n                               typename MatrixType::PlainObject>::type\n      ActualMatrixType;\n\n  static void run(const MatrixType &mat, ResultType &result)\n  {\n    ActualMatrixType matrix(mat);\n\n    // Four 2x2 sub-matrices of the input matrix, each is further divided into upper and lower\n    // row e.g. A1, upper row of A, A2, lower row of A\n    // input = [[A, B],  =  [[[A1, [B1,\n    //          [C, D]]        A2], B2]],\n    //                       [[C1, [D1,\n    //                         C2], D2]]]\n\n    Packet2d A1, A2, B1, B2, C1, C2, D1, D2;\n\n    const double* data = matrix.data();\n    const Index stride = matrix.innerStride();\n    if (StorageOrdersMatch)\n    {\n      A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0);\n      B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2);\n      A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4);\n      B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6);\n      C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8);\n      D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10);\n      C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12);\n      D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14);\n    }\n    else\n    {\n      Packet2d temp;\n      A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0);\n      C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2);\n      A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4);\n      C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6);\n      temp = A1;\n      A1 = vec2d_unpacklo(A1, A2);\n      A2 = vec2d_unpackhi(temp, A2);\n\n      temp = C1;\n      C1 = vec2d_unpacklo(C1, C2);\n      C2 = vec2d_unpackhi(temp, C2);\n\n      B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8);\n      D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10);\n      B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12);\n      D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14);\n\n      temp = B1;\n      B1 = vec2d_unpacklo(B1, B2);\n      B2 = vec2d_unpackhi(temp, B2);\n\n      temp = D1;\n      D1 = vec2d_unpacklo(D1, D2);\n      D2 = vec2d_unpackhi(temp, D2);\n    }\n\n    // determinants of the sub-matrices\n    Packet2d dA, dB, dC, dD;\n\n    dA = vec2d_swizzle2(A2, A2, 1);\n    dA = pmul(A1, dA);\n    dA = psub(dA, vec2d_duplane(dA, 1));\n\n    dB = vec2d_swizzle2(B2, B2, 1);\n    dB = pmul(B1, dB);\n    dB = psub(dB, vec2d_duplane(dB, 1));\n\n    dC = vec2d_swizzle2(C2, C2, 1);\n    dC = pmul(C1, dC);\n    dC = psub(dC, vec2d_duplane(dC, 1));\n\n    dD = vec2d_swizzle2(D2, D2, 1);\n    dD = pmul(D1, dD);\n    dD = psub(dD, vec2d_duplane(dD, 1));\n\n    Packet2d DC1, DC2, AB1, AB2;\n\n    // AB = A# * B, where A# denotes the adjugate of A, and * denotes matrix product.\n    AB1 = pmul(B1, vec2d_duplane(A2, 1));\n    AB2 = pmul(B2, vec2d_duplane(A1, 0));\n    AB1 = psub(AB1, pmul(B2, vec2d_duplane(A1, 1)));\n    AB2 = psub(AB2, pmul(B1, vec2d_duplane(A2, 0)));\n\n    // DC = D#*C\n    DC1 = pmul(C1, vec2d_duplane(D2, 1));\n    DC2 = pmul(C2, vec2d_duplane(D1, 0));\n    DC1 = psub(DC1, pmul(C2, vec2d_duplane(D1, 1)));\n    DC2 = psub(DC2, pmul(C1, vec2d_duplane(D2, 0)));\n\n    Packet2d d1, d2;\n\n    // determinant of the input matrix, det = |A||D| + |B||C| - trace(A#*B*D#*C)\n    Packet2d det;\n\n    // reciprocal of the determinant of the input matrix, rd = 1/det\n    Packet2d rd;\n\n    d1 = pmul(AB1, vec2d_swizzle2(DC1, DC2, 0));\n    d2 = pmul(AB2, vec2d_swizzle2(DC1, DC2, 3));\n    rd = padd(d1, d2);\n    rd = padd(rd, vec2d_duplane(rd, 1));\n\n    d1 = pmul(dA, dD);\n    d2 = pmul(dB, dC);\n\n    det = padd(d1, d2);\n    det = psub(det, rd);\n    det = vec2d_duplane(det, 0);\n    rd = pdiv(pset1<Packet2d>(1.0), det);\n\n    // rows of four sub-matrices of the inverse\n    Packet2d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2;\n\n    // iD = D*|A| - C*A#*B\n    iD1 = pmul(AB1, vec2d_duplane(C1, 0));\n    iD2 = pmul(AB1, vec2d_duplane(C2, 0));\n    iD1 = padd(iD1, pmul(AB2, vec2d_duplane(C1, 1)));\n    iD2 = padd(iD2, pmul(AB2, vec2d_duplane(C2, 1)));\n    dA = vec2d_duplane(dA, 0);\n    iD1 = psub(pmul(D1, dA), iD1);\n    iD2 = psub(pmul(D2, dA), iD2);\n\n    // iA = A*|D| - B*D#*C\n    iA1 = pmul(DC1, vec2d_duplane(B1, 0));\n    iA2 = pmul(DC1, vec2d_duplane(B2, 0));\n    iA1 = padd(iA1, pmul(DC2, vec2d_duplane(B1, 1)));\n    iA2 = padd(iA2, pmul(DC2, vec2d_duplane(B2, 1)));\n    dD = vec2d_duplane(dD, 0);\n    iA1 = psub(pmul(A1, dD), iA1);\n    iA2 = psub(pmul(A2, dD), iA2);\n\n    // iB = C*|B| - D * (A#B)# = C*|B| - D*B#*A\n    iB1 = pmul(D1, vec2d_swizzle2(AB2, AB1, 1));\n    iB2 = pmul(D2, vec2d_swizzle2(AB2, AB1, 1));\n    iB1 = psub(iB1, pmul(vec2d_swizzle2(D1, D1, 1), vec2d_swizzle2(AB2, AB1, 2)));\n    iB2 = psub(iB2, pmul(vec2d_swizzle2(D2, D2, 1), vec2d_swizzle2(AB2, AB1, 2)));\n    dB = vec2d_duplane(dB, 0);\n    iB1 = psub(pmul(C1, dB), iB1);\n    iB2 = psub(pmul(C2, dB), iB2);\n\n    // iC = B*|C| - A * (D#C)# = B*|C| - A*C#*D\n    iC1 = pmul(A1, vec2d_swizzle2(DC2, DC1, 1));\n    iC2 = pmul(A2, vec2d_swizzle2(DC2, DC1, 1));\n    iC1 = psub(iC1, pmul(vec2d_swizzle2(A1, A1, 1), vec2d_swizzle2(DC2, DC1, 2)));\n    iC2 = psub(iC2, pmul(vec2d_swizzle2(A2, A2, 1), vec2d_swizzle2(DC2, DC1, 2)));\n    dC = vec2d_duplane(dC, 0);\n    iC1 = psub(pmul(B1, dC), iC1);\n    iC2 = psub(pmul(B2, dC), iC2);\n\n    const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};\n    const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};\n    const Packet2d sign_PN = ploadu<Packet2d>(sign_mask1);\n    const Packet2d sign_NP = ploadu<Packet2d>(sign_mask2);\n    d1 = pxor(rd, sign_PN);\n    d2 = pxor(rd, sign_NP);\n\n    Index res_stride = result.outerStride();\n    double *res = result.data();\n    pstoret<double, Packet2d, ResultAlignment>(res + 0, pmul(vec2d_swizzle2(iA2, iA1, 3), d1));\n    pstoret<double, Packet2d, ResultAlignment>(res + res_stride, pmul(vec2d_swizzle2(iA2, iA1, 0), d2));\n    pstoret<double, Packet2d, ResultAlignment>(res + 2, pmul(vec2d_swizzle2(iB2, iB1, 3), d1));\n    pstoret<double, Packet2d, ResultAlignment>(res + res_stride + 2, pmul(vec2d_swizzle2(iB2, iB1, 0), d2));\n    pstoret<double, Packet2d, ResultAlignment>(res + 2 * res_stride, pmul(vec2d_swizzle2(iC2, iC1, 3), d1));\n    pstoret<double, Packet2d, ResultAlignment>(res + 3 * res_stride, pmul(vec2d_swizzle2(iC2, iC1, 0), d2));\n    pstoret<double, Packet2d, ResultAlignment>(res + 2 * res_stride + 2, pmul(vec2d_swizzle2(iD2, iD1, 3), d1));\n    pstoret<double, Packet2d, ResultAlignment>(res + 3 * res_stride + 2, pmul(vec2d_swizzle2(iD2, iD1, 0), d2));\n  }\n};\n#endif\n} // namespace internal\n} // namespace Eigen\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/MetisSupport/MetisSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n#ifndef METIS_SUPPORT_H\n#define METIS_SUPPORT_H\n\nnamespace Eigen {\n/**\n * Get the fill-reducing ordering from the METIS package\n * \n * If A is the original matrix and Ap is the permuted matrix, \n * the fill-reducing permutation is defined as follows :\n * Row (column) i of A is the matperm(i) row (column) of Ap. \n * WARNING: As computed by METIS, this corresponds to the vector iperm (instead of perm)\n */\ntemplate <typename StorageIndex>\nclass MetisOrdering\n{\npublic:\n  typedef PermutationMatrix<Dynamic,Dynamic,StorageIndex> PermutationType;\n  typedef Matrix<StorageIndex,Dynamic,1> IndexVector; \n  \n  template <typename MatrixType>\n  void get_symmetrized_graph(const MatrixType& A)\n  {\n    Index m = A.cols(); \n    eigen_assert((A.rows() == A.cols()) && \"ONLY FOR SQUARED MATRICES\");\n    // Get the transpose of the input matrix \n    MatrixType At = A.transpose(); \n    // Get the number of nonzeros elements in each row/col of At+A\n    Index TotNz = 0; \n    IndexVector visited(m); \n    visited.setConstant(-1); \n    for (StorageIndex j = 0; j < m; j++)\n    {\n      // Compute the union structure of of A(j,:) and At(j,:)\n      visited(j) = j; // Do not include the diagonal element\n      // Get the nonzeros in row/column j of A\n      for (typename MatrixType::InnerIterator it(A, j); it; ++it)\n      {\n        Index idx = it.index(); // Get the row index (for column major) or column index (for row major)\n        if (visited(idx) != j ) \n        {\n          visited(idx) = j; \n          ++TotNz; \n        }\n      }\n      //Get the nonzeros in row/column j of At\n      for (typename MatrixType::InnerIterator it(At, j); it; ++it)\n      {\n        Index idx = it.index(); \n        if(visited(idx) != j)\n        {\n          visited(idx) = j; \n          ++TotNz; \n        }\n      }\n    }\n    // Reserve place for A + At\n    m_indexPtr.resize(m+1);\n    m_innerIndices.resize(TotNz); \n\n    // Now compute the real adjacency list of each column/row \n    visited.setConstant(-1); \n    StorageIndex CurNz = 0; \n    for (StorageIndex j = 0; j < m; j++)\n    {\n      m_indexPtr(j) = CurNz; \n      \n      visited(j) = j; // Do not include the diagonal element\n      // Add the pattern of row/column j of A to A+At\n      for (typename MatrixType::InnerIterator it(A,j); it; ++it)\n      {\n        StorageIndex idx = it.index(); // Get the row index (for column major) or column index (for row major)\n        if (visited(idx) != j ) \n        {\n          visited(idx) = j; \n          m_innerIndices(CurNz) = idx; \n          CurNz++; \n        }\n      }\n      //Add the pattern of row/column j of At to A+At\n      for (typename MatrixType::InnerIterator it(At, j); it; ++it)\n      {\n        StorageIndex idx = it.index(); \n        if(visited(idx) != j)\n        {\n          visited(idx) = j; \n          m_innerIndices(CurNz) = idx; \n          ++CurNz; \n        }\n      }\n    }\n    m_indexPtr(m) = CurNz;    \n  }\n  \n  template <typename MatrixType>\n  void operator() (const MatrixType& A, PermutationType& matperm)\n  {\n     StorageIndex m = internal::convert_index<StorageIndex>(A.cols()); // must be StorageIndex, because it is passed by address to METIS\n     IndexVector perm(m),iperm(m); \n    // First, symmetrize the matrix graph. \n     get_symmetrized_graph(A); \n     int output_error;\n     \n     // Call the fill-reducing routine from METIS \n     output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data());\n     \n    if(output_error != METIS_OK) \n    {\n      //FIXME The ordering interface should define a class of possible errors \n     std::cerr << \"ERROR WHILE CALLING THE METIS PACKAGE \\n\"; \n     return; \n    }\n    \n    // Get the fill-reducing permutation \n    //NOTE:  If Ap is the permuted matrix then perm and iperm vectors are defined as follows \n    // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap\n    \n     matperm.resize(m);\n     for (int j = 0; j < m; j++)\n       matperm.indices()(iperm(j)) = j;\n   \n  }\n  \n  protected:\n    IndexVector m_indexPtr; // Pointer to the adjacenccy list of each row/column\n    IndexVector m_innerIndices; // Adjacency list \n};\n\n}// end namespace eigen \n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/OrderingMethods/Amd.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/*\nNOTE: this routine has been adapted from the CSparse library:\n\nCopyright (c) 2006, Timothy A. Davis.\nhttp://www.suitesparse.com\n\nThe author of CSparse, Timothy A. Davis., has executed a license with Google LLC\nto permit distribution of this code and derivative works as part of Eigen under\nthe Mozilla Public License v. 2.0, as stated at the top of this file.\n*/\n\n#ifndef EIGEN_SPARSE_AMD_H\n#define EIGEN_SPARSE_AMD_H\n\nnamespace Eigen { \n\nnamespace internal {\n  \ntemplate<typename T> inline T amd_flip(const T& i) { return -i-2; }\ntemplate<typename T> inline T amd_unflip(const T& i) { return i<0 ? amd_flip(i) : i; }\ntemplate<typename T0, typename T1> inline bool amd_marked(const T0* w, const T1& j) { return w[j]<0; }\ntemplate<typename T0, typename T1> inline void amd_mark(const T0* w, const T1& j) { return w[j] = amd_flip(w[j]); }\n\n/* clear w */\ntemplate<typename StorageIndex>\nstatic StorageIndex cs_wclear (StorageIndex mark, StorageIndex lemax, StorageIndex *w, StorageIndex n)\n{\n  StorageIndex k;\n  if(mark < 2 || (mark + lemax < 0))\n  {\n    for(k = 0; k < n; k++)\n      if(w[k] != 0)\n        w[k] = 1;\n    mark = 2;\n  }\n  return (mark);     /* at this point, w[0..n-1] < mark holds */\n}\n\n/* depth-first search and postorder of a tree rooted at node j */\ntemplate<typename StorageIndex>\nStorageIndex cs_tdfs(StorageIndex j, StorageIndex k, StorageIndex *head, const StorageIndex *next, StorageIndex *post, StorageIndex *stack)\n{\n  StorageIndex i, p, top = 0;\n  if(!head || !next || !post || !stack) return (-1);    /* check inputs */\n  stack[0] = j;                 /* place j on the stack */\n  while (top >= 0)                /* while (stack is not empty) */\n  {\n    p = stack[top];           /* p = top of stack */\n    i = head[p];              /* i = youngest child of p */\n    if(i == -1)\n    {\n      top--;                 /* p has no unordered children left */\n      post[k++] = p;        /* node p is the kth postordered node */\n    }\n    else\n    {\n      head[p] = next[i];   /* remove i from children of p */\n      stack[++top] = i;     /* start dfs on child node i */\n    }\n  }\n  return k;\n}\n\n\n/** \\internal\n  * \\ingroup OrderingMethods_Module \n  * Approximate minimum degree ordering algorithm.\n  *\n  * \\param[in] C the input selfadjoint matrix stored in compressed column major format.\n  * \\param[out] perm the permutation P reducing the fill-in of the input matrix \\a C\n  *\n  * Note that the input matrix \\a C must be complete, that is both the upper and lower parts have to be stored, as well as the diagonal entries.\n  * On exit the values of C are destroyed */\ntemplate<typename Scalar, typename StorageIndex>\nvoid minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm)\n{\n  using std::sqrt;\n  \n  StorageIndex d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,\n                k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,\n                ok, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, t, h;\n  \n  StorageIndex n = StorageIndex(C.cols());\n  dense = std::max<StorageIndex> (16, StorageIndex(10 * sqrt(double(n))));   /* find dense threshold */\n  dense = (std::min)(n-2, dense);\n  \n  StorageIndex cnz = StorageIndex(C.nonZeros());\n  perm.resize(n+1);\n  t = cnz + cnz/5 + 2*n;                 /* add elbow room to C */\n  C.resizeNonZeros(t);\n  \n  // get workspace\n  ei_declare_aligned_stack_constructed_variable(StorageIndex,W,8*(n+1),0);\n  StorageIndex* len     = W;\n  StorageIndex* nv      = W +   (n+1);\n  StorageIndex* next    = W + 2*(n+1);\n  StorageIndex* head    = W + 3*(n+1);\n  StorageIndex* elen    = W + 4*(n+1);\n  StorageIndex* degree  = W + 5*(n+1);\n  StorageIndex* w       = W + 6*(n+1);\n  StorageIndex* hhead   = W + 7*(n+1);\n  StorageIndex* last    = perm.indices().data();                              /* use P as workspace for last */\n  \n  /* --- Initialize quotient graph ---------------------------------------- */\n  StorageIndex* Cp = C.outerIndexPtr();\n  StorageIndex* Ci = C.innerIndexPtr();\n  for(k = 0; k < n; k++)\n    len[k] = Cp[k+1] - Cp[k];\n  len[n] = 0;\n  nzmax = t;\n  \n  for(i = 0; i <= n; i++)\n  {\n    head[i]   = -1;                     // degree list i is empty\n    last[i]   = -1;\n    next[i]   = -1;\n    hhead[i]  = -1;                     // hash list i is empty \n    nv[i]     = 1;                      // node i is just one node\n    w[i]      = 1;                      // node i is alive\n    elen[i]   = 0;                      // Ek of node i is empty\n    degree[i] = len[i];                 // degree of node i\n  }\n  mark = internal::cs_wclear<StorageIndex>(0, 0, w, n);         /* clear w */\n  \n  /* --- Initialize degree lists ------------------------------------------ */\n  for(i = 0; i < n; i++)\n  {\n    bool has_diag = false;\n    for(p = Cp[i]; p<Cp[i+1]; ++p)\n      if(Ci[p]==i)\n      {\n        has_diag = true;\n        break;\n      }\n   \n    d = degree[i];\n    if(d == 1 && has_diag)           /* node i is empty */\n    {\n      elen[i] = -2;                 /* element i is dead */\n      nel++;\n      Cp[i] = -1;                   /* i is a root of assembly tree */\n      w[i] = 0;\n    }\n    else if(d > dense || !has_diag)  /* node i is dense or has no structural diagonal element */\n    {\n      nv[i] = 0;                    /* absorb i into element n */\n      elen[i] = -1;                 /* node i is dead */\n      nel++;\n      Cp[i] = amd_flip (n);\n      nv[n]++;\n    }\n    else\n    {\n      if(head[d] != -1) last[head[d]] = i;\n      next[i] = head[d];           /* put node i in degree list d */\n      head[d] = i;\n    }\n  }\n  \n  elen[n] = -2;                         /* n is a dead element */\n  Cp[n] = -1;                           /* n is a root of assembly tree */\n  w[n] = 0;                             /* n is a dead element */\n  \n  while (nel < n)                         /* while (selecting pivots) do */\n  {\n    /* --- Select node of minimum approximate degree -------------------- */\n    for(k = -1; mindeg < n && (k = head[mindeg]) == -1; mindeg++) {}\n    if(next[k] != -1) last[next[k]] = -1;\n    head[mindeg] = next[k];          /* remove k from degree list */\n    elenk = elen[k];                  /* elenk = |Ek| */\n    nvk = nv[k];                      /* # of nodes k represents */\n    nel += nvk;                        /* nv[k] nodes of A eliminated */\n    \n    /* --- Garbage collection ------------------------------------------- */\n    if(elenk > 0 && cnz + mindeg >= nzmax)\n    {\n      for(j = 0; j < n; j++)\n      {\n        if((p = Cp[j]) >= 0)      /* j is a live node or element */\n        {\n          Cp[j] = Ci[p];          /* save first entry of object */\n          Ci[p] = amd_flip (j);    /* first entry is now amd_flip(j) */\n        }\n      }\n      for(q = 0, p = 0; p < cnz; ) /* scan all of memory */\n      {\n        if((j = amd_flip (Ci[p++])) >= 0)  /* found object j */\n        {\n          Ci[q] = Cp[j];       /* restore first entry of object */\n          Cp[j] = q++;          /* new pointer to object j */\n          for(k3 = 0; k3 < len[j]-1; k3++) Ci[q++] = Ci[p++];\n        }\n      }\n      cnz = q;                       /* Ci[cnz...nzmax-1] now free */\n    }\n    \n    /* --- Construct new element ---------------------------------------- */\n    dk = 0;\n    nv[k] = -nvk;                     /* flag k as in Lk */\n    p = Cp[k];\n    pk1 = (elenk == 0) ? p : cnz;      /* do in place if elen[k] == 0 */\n    pk2 = pk1;\n    for(k1 = 1; k1 <= elenk + 1; k1++)\n    {\n      if(k1 > elenk)\n      {\n        e = k;                     /* search the nodes in k */\n        pj = p;                    /* list of nodes starts at Ci[pj]*/\n        ln = len[k] - elenk;      /* length of list of nodes in k */\n      }\n      else\n      {\n        e = Ci[p++];              /* search the nodes in e */\n        pj = Cp[e];\n        ln = len[e];              /* length of list of nodes in e */\n      }\n      for(k2 = 1; k2 <= ln; k2++)\n      {\n        i = Ci[pj++];\n        if((nvi = nv[i]) <= 0) continue; /* node i dead, or seen */\n        dk += nvi;                 /* degree[Lk] += size of node i */\n        nv[i] = -nvi;             /* negate nv[i] to denote i in Lk*/\n        Ci[pk2++] = i;            /* place i in Lk */\n        if(next[i] != -1) last[next[i]] = last[i];\n        if(last[i] != -1)         /* remove i from degree list */\n        {\n          next[last[i]] = next[i];\n        }\n        else\n        {\n          head[degree[i]] = next[i];\n        }\n      }\n      if(e != k)\n      {\n        Cp[e] = amd_flip (k);      /* absorb e into k */\n        w[e] = 0;                 /* e is now a dead element */\n      }\n    }\n    if(elenk != 0) cnz = pk2;         /* Ci[cnz...nzmax] is free */\n    degree[k] = dk;                   /* external degree of k - |Lk\\i| */\n    Cp[k] = pk1;                      /* element k is in Ci[pk1..pk2-1] */\n    len[k] = pk2 - pk1;\n    elen[k] = -2;                     /* k is now an element */\n    \n    /* --- Find set differences ----------------------------------------- */\n    mark = internal::cs_wclear<StorageIndex>(mark, lemax, w, n);  /* clear w if necessary */\n    for(pk = pk1; pk < pk2; pk++)    /* scan 1: find |Le\\Lk| */\n    {\n      i = Ci[pk];\n      if((eln = elen[i]) <= 0) continue;/* skip if elen[i] empty */\n      nvi = -nv[i];                      /* nv[i] was negated */\n      wnvi = mark - nvi;\n      for(p = Cp[i]; p <= Cp[i] + eln - 1; p++)  /* scan Ei */\n      {\n        e = Ci[p];\n        if(w[e] >= mark)\n        {\n          w[e] -= nvi;          /* decrement |Le\\Lk| */\n        }\n        else if(w[e] != 0)        /* ensure e is a live element */\n        {\n          w[e] = degree[e] + wnvi; /* 1st time e seen in scan 1 */\n        }\n      }\n    }\n    \n    /* --- Degree update ------------------------------------------------ */\n    for(pk = pk1; pk < pk2; pk++)    /* scan2: degree update */\n    {\n      i = Ci[pk];                   /* consider node i in Lk */\n      p1 = Cp[i];\n      p2 = p1 + elen[i] - 1;\n      pn = p1;\n      for(h = 0, d = 0, p = p1; p <= p2; p++)    /* scan Ei */\n      {\n        e = Ci[p];\n        if(w[e] != 0)             /* e is an unabsorbed element */\n        {\n          dext = w[e] - mark;   /* dext = |Le\\Lk| */\n          if(dext > 0)\n          {\n            d += dext;         /* sum up the set differences */\n            Ci[pn++] = e;     /* keep e in Ei */\n            h += e;            /* compute the hash of node i */\n          }\n          else\n          {\n            Cp[e] = amd_flip (k);  /* aggressive absorb. e->k */\n            w[e] = 0;             /* e is a dead element */\n          }\n        }\n      }\n      elen[i] = pn - p1 + 1;        /* elen[i] = |Ei| */\n      p3 = pn;\n      p4 = p1 + len[i];\n      for(p = p2 + 1; p < p4; p++) /* prune edges in Ai */\n      {\n        j = Ci[p];\n        if((nvj = nv[j]) <= 0) continue; /* node j dead or in Lk */\n        d += nvj;                  /* degree(i) += |j| */\n        Ci[pn++] = j;             /* place j in node list of i */\n        h += j;                    /* compute hash for node i */\n      }\n      if(d == 0)                     /* check for mass elimination */\n      {\n        Cp[i] = amd_flip (k);      /* absorb i into k */\n        nvi = -nv[i];\n        dk -= nvi;                 /* |Lk| -= |i| */\n        nvk += nvi;                /* |k| += nv[i] */\n        nel += nvi;\n        nv[i] = 0;\n        elen[i] = -1;             /* node i is dead */\n      }\n      else\n      {\n        degree[i] = std::min<StorageIndex> (degree[i], d);   /* update degree(i) */\n        Ci[pn] = Ci[p3];         /* move first node to end */\n        Ci[p3] = Ci[p1];         /* move 1st el. to end of Ei */\n        Ci[p1] = k;               /* add k as 1st element in of Ei */\n        len[i] = pn - p1 + 1;     /* new len of adj. list of node i */\n        h %= n;                    /* finalize hash of i */\n        next[i] = hhead[h];      /* place i in hash bucket */\n        hhead[h] = i;\n        last[i] = h;      /* save hash of i in last[i] */\n      }\n    }                                   /* scan2 is done */\n    degree[k] = dk;                   /* finalize |Lk| */\n    lemax = std::max<StorageIndex>(lemax, dk);\n    mark = internal::cs_wclear<StorageIndex>(mark+lemax, lemax, w, n);    /* clear w */\n    \n    /* --- Supernode detection ------------------------------------------ */\n    for(pk = pk1; pk < pk2; pk++)\n    {\n      i = Ci[pk];\n      if(nv[i] >= 0) continue;         /* skip if i is dead */\n      h = last[i];                      /* scan hash bucket of node i */\n      i = hhead[h];\n      hhead[h] = -1;                    /* hash bucket will be empty */\n      for(; i != -1 && next[i] != -1; i = next[i], mark++)\n      {\n        ln = len[i];\n        eln = elen[i];\n        for(p = Cp[i]+1; p <= Cp[i] + ln-1; p++) w[Ci[p]] = mark;\n        jlast = i;\n        for(j = next[i]; j != -1; ) /* compare i with all j */\n        {\n          ok = (len[j] == ln) && (elen[j] == eln);\n          for(p = Cp[j] + 1; ok && p <= Cp[j] + ln - 1; p++)\n          {\n            if(w[Ci[p]] != mark) ok = 0;    /* compare i and j*/\n          }\n          if(ok)                     /* i and j are identical */\n          {\n            Cp[j] = amd_flip (i);  /* absorb j into i */\n            nv[i] += nv[j];\n            nv[j] = 0;\n            elen[j] = -1;         /* node j is dead */\n            j = next[j];          /* delete j from hash bucket */\n            next[jlast] = j;\n          }\n          else\n          {\n            jlast = j;             /* j and i are different */\n            j = next[j];\n          }\n        }\n      }\n    }\n    \n    /* --- Finalize new element------------------------------------------ */\n    for(p = pk1, pk = pk1; pk < pk2; pk++)   /* finalize Lk */\n    {\n      i = Ci[pk];\n      if((nvi = -nv[i]) <= 0) continue;/* skip if i is dead */\n      nv[i] = nvi;                      /* restore nv[i] */\n      d = degree[i] + dk - nvi;         /* compute external degree(i) */\n      d = std::min<StorageIndex> (d, n - nel - nvi);\n      if(head[d] != -1) last[head[d]] = i;\n      next[i] = head[d];               /* put i back in degree list */\n      last[i] = -1;\n      head[d] = i;\n      mindeg = std::min<StorageIndex> (mindeg, d);       /* find new minimum degree */\n      degree[i] = d;\n      Ci[p++] = i;                      /* place i in Lk */\n    }\n    nv[k] = nvk;                      /* # nodes absorbed into k */\n    if((len[k] = p-pk1) == 0)         /* length of adj list of element k*/\n    {\n      Cp[k] = -1;                   /* k is a root of the tree */\n      w[k] = 0;                     /* k is now a dead element */\n    }\n    if(elenk != 0) cnz = p;           /* free unused space in Lk */\n  }\n  \n  /* --- Postordering ----------------------------------------------------- */\n  for(i = 0; i < n; i++) Cp[i] = amd_flip (Cp[i]);/* fix assembly tree */\n  for(j = 0; j <= n; j++) head[j] = -1;\n  for(j = n; j >= 0; j--)              /* place unordered nodes in lists */\n  {\n    if(nv[j] > 0) continue;          /* skip if j is an element */\n    next[j] = head[Cp[j]];          /* place j in list of its parent */\n    head[Cp[j]] = j;\n  }\n  for(e = n; e >= 0; e--)              /* place elements in lists */\n  {\n    if(nv[e] <= 0) continue;         /* skip unless e is an element */\n    if(Cp[e] != -1)\n    {\n      next[e] = head[Cp[e]];      /* place e in list of its parent */\n      head[Cp[e]] = e;\n    }\n  }\n  for(k = 0, i = 0; i <= n; i++)       /* postorder the assembly tree */\n  {\n    if(Cp[i] == -1) k = internal::cs_tdfs<StorageIndex>(i, k, head, next, perm.indices().data(), w);\n  }\n  \n  perm.indices().conservativeResize(n);\n}\n\n} // namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_AMD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/OrderingMethods/Eigen_Colamd.h",
    "content": "// // This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Desire Nuentsa Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n// This file is modified from the colamd/symamd library. The copyright is below\n\n//   The authors of the code itself are Stefan I. Larimore and Timothy A.\n//   Davis (davis@cise.ufl.edu), University of Florida.  The algorithm was\n//   developed in collaboration with John Gilbert, Xerox PARC, and Esmond\n//   Ng, Oak Ridge National Laboratory.\n//\n//     Date:\n//\n//   September 8, 2003.  Version 2.3.\n//\n//     Acknowledgements:\n//\n//   This work was supported by the National Science Foundation, under\n//   grants DMS-9504974 and DMS-9803599.\n//\n//     Notice:\n//\n//   Copyright (c) 1998-2003 by the University of Florida.\n//   All Rights Reserved.\n//\n//   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n//   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n//\n//   Permission is hereby granted to use, copy, modify, and/or distribute\n//   this program, provided that the Copyright, this License, and the\n//   Availability of the original version is retained on all copies and made\n//   accessible to the end-user of any code or package that includes COLAMD\n//   or any modified version of COLAMD.\n//\n//     Availability:\n//\n//   The colamd/symamd library is available at\n//\n//       http://www.suitesparse.com\n\n\n#ifndef EIGEN_COLAMD_H\n#define EIGEN_COLAMD_H\n\nnamespace internal {\n\nnamespace Colamd {\n\n/* Ensure that debugging is turned off: */\n#ifndef COLAMD_NDEBUG\n#define COLAMD_NDEBUG\n#endif /* NDEBUG */\n\n\n/* ========================================================================== */\n/* === Knob and statistics definitions ====================================== */\n/* ========================================================================== */\n\n/* size of the knobs [ ] array.  Only knobs [0..1] are currently used. */\nconst int NKnobs = 20;\n\n/* number of output statistics.  Only stats [0..6] are currently used. */\nconst int NStats = 20;\n\n/* Indices into knobs and stats array. */\nenum KnobsStatsIndex {\n  /* knobs [0] and stats [0]: dense row knob and output statistic. */\n  DenseRow = 0,\n\n  /* knobs [1] and stats [1]: dense column knob and output statistic. */\n  DenseCol = 1,\n\n  /* stats [2]: memory defragmentation count output statistic */\n  DefragCount = 2,\n\n  /* stats [3]: colamd status:  zero OK, > 0 warning or notice, < 0 error */\n  Status = 3,\n\n  /* stats [4..6]: error info, or info on jumbled columns */\n  Info1 = 4,\n  Info2 = 5,\n  Info3 = 6\n};\n\n/* error codes returned in stats [3]: */\nenum Status {\n  Ok = 0,\n  OkButJumbled = 1,\n  ErrorANotPresent = -1,\n  ErrorPNotPresent = -2,\n  ErrorNrowNegative = -3,\n  ErrorNcolNegative = -4,\n  ErrorNnzNegative = -5,\n  ErrorP0Nonzero = -6,\n  ErrorATooSmall = -7,\n  ErrorColLengthNegative = -8,\n  ErrorRowIndexOutOfBounds = -9,\n  ErrorOutOfMemory = -10,\n  ErrorInternalError = -999\n};\n/* ========================================================================== */\n/* === Definitions ========================================================== */\n/* ========================================================================== */\n\ntemplate <typename IndexType>\nIndexType ones_complement(const IndexType r) {\n  return (-(r)-1);\n}\n\n/* -------------------------------------------------------------------------- */\nconst int Empty = -1;\n\n/* Row and column status */\nenum RowColumnStatus {\n  Alive = 0,\n  Dead = -1\n};\n\n/* Column status */\nenum ColumnStatus {\n  DeadPrincipal = -1,\n  DeadNonPrincipal = -2\n};\n\n/* ========================================================================== */\n/* === Colamd reporting mechanism =========================================== */\n/* ========================================================================== */\n\n// == Row and Column structures ==\ntemplate <typename IndexType>\nstruct ColStructure\n{\n  IndexType start ;   /* index for A of first row in this column, or Dead */\n  /* if column is dead */\n  IndexType length ;  /* number of rows in this column */\n  union\n  {\n    IndexType thickness ; /* number of original columns represented by this */\n    /* col, if the column is alive */\n    IndexType parent ;  /* parent in parent tree super-column structure, if */\n    /* the column is dead */\n  } shared1 ;\n  union\n  {\n    IndexType score ; /* the score used to maintain heap, if col is alive */\n    IndexType order ; /* pivot ordering of this column, if col is dead */\n  } shared2 ;\n  union\n  {\n    IndexType headhash ;  /* head of a hash bucket, if col is at the head of */\n    /* a degree list */\n    IndexType hash ;  /* hash value, if col is not in a degree list */\n    IndexType prev ;  /* previous column in degree list, if col is in a */\n    /* degree list (but not at the head of a degree list) */\n  } shared3 ;\n  union\n  {\n    IndexType degree_next ; /* next column, if col is in a degree list */\n    IndexType hash_next ;   /* next column, if col is in a hash list */\n  } shared4 ;\n\n  inline bool is_dead() const { return start < Alive; }\n\n  inline bool is_alive() const { return start >= Alive; }\n\n  inline bool is_dead_principal() const { return start == DeadPrincipal; }\n\n  inline void kill_principal() { start = DeadPrincipal; }\n\n  inline void kill_non_principal() { start = DeadNonPrincipal; }\n\n};\n\ntemplate <typename IndexType>\nstruct RowStructure\n{\n  IndexType start ;   /* index for A of first col in this row */\n  IndexType length ;  /* number of principal columns in this row */\n  union\n  {\n    IndexType degree ;  /* number of principal & non-principal columns in row */\n    IndexType p ;   /* used as a row pointer in init_rows_cols () */\n  } shared1 ;\n  union\n  {\n    IndexType mark ;  /* for computing set differences and marking dead rows*/\n    IndexType first_column ;/* first column in row (used in garbage collection) */\n  } shared2 ;\n\n  inline bool is_dead() const { return shared2.mark < Alive; }\n\n  inline bool is_alive() const { return shared2.mark >= Alive; }\n\n  inline void kill() { shared2.mark = Dead; }\n\n};\n\n/* ========================================================================== */\n/* === Colamd recommended memory size ======================================= */\n/* ========================================================================== */\n\n/*\n  The recommended length Alen of the array A passed to colamd is given by\n  the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any\n  argument is negative.  2*nnz space is required for the row and column\n  indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is\n  required for the Col and Row arrays, respectively, which are internal to\n  colamd.  An additional n_col space is the minimal amount of \"elbow room\",\n  and nnz/5 more space is recommended for run time efficiency.\n\n  This macro is not needed when using symamd.\n\n  Explicit typecast to IndexType added Sept. 23, 2002, COLAMD version 2.2, to avoid\n  gcc -pedantic warning messages.\n*/\ntemplate <typename IndexType>\ninline IndexType colamd_c(IndexType n_col)\n{ return IndexType( ((n_col) + 1) * sizeof (ColStructure<IndexType>) / sizeof (IndexType) ) ; }\n\ntemplate <typename IndexType>\ninline IndexType  colamd_r(IndexType n_row)\n{ return IndexType(((n_row) + 1) * sizeof (RowStructure<IndexType>) / sizeof (IndexType)); }\n\n// Prototypes of non-user callable routines\ntemplate <typename IndexType>\nstatic IndexType init_rows_cols (IndexType n_row, IndexType n_col, RowStructure<IndexType> Row [], ColStructure<IndexType> col [], IndexType A [], IndexType p [], IndexType stats[NStats] );\n\ntemplate <typename IndexType>\nstatic void init_scoring (IndexType n_row, IndexType n_col, RowStructure<IndexType> Row [], ColStructure<IndexType> Col [], IndexType A [], IndexType head [], double knobs[NKnobs], IndexType *p_n_row2, IndexType *p_n_col2, IndexType *p_max_deg);\n\ntemplate <typename IndexType>\nstatic IndexType find_ordering (IndexType n_row, IndexType n_col, IndexType Alen, RowStructure<IndexType> Row [], ColStructure<IndexType> Col [], IndexType A [], IndexType head [], IndexType n_col2, IndexType max_deg, IndexType pfree);\n\ntemplate <typename IndexType>\nstatic void order_children (IndexType n_col, ColStructure<IndexType> Col [], IndexType p []);\n\ntemplate <typename IndexType>\nstatic void detect_super_cols (ColStructure<IndexType> Col [], IndexType A [], IndexType head [], IndexType row_start, IndexType row_length ) ;\n\ntemplate <typename IndexType>\nstatic IndexType garbage_collection (IndexType n_row, IndexType n_col, RowStructure<IndexType> Row [], ColStructure<IndexType> Col [], IndexType A [], IndexType *pfree) ;\n\ntemplate <typename IndexType>\nstatic inline  IndexType clear_mark (IndexType n_row, RowStructure<IndexType> Row [] ) ;\n\n/* === No debugging ========================================================= */\n\n#define COLAMD_DEBUG0(params) ;\n#define COLAMD_DEBUG1(params) ;\n#define COLAMD_DEBUG2(params) ;\n#define COLAMD_DEBUG3(params) ;\n#define COLAMD_DEBUG4(params) ;\n\n#define COLAMD_ASSERT(expression) ((void) 0)\n\n\n/**\n * \\brief Returns the recommended value of Alen\n *\n * Returns recommended value of Alen for use by colamd.\n * Returns -1 if any input argument is negative.\n * The use of this routine or macro is optional.\n * Note that the macro uses its arguments   more than once,\n * so be careful for side effects, if you pass expressions as arguments to COLAMD_RECOMMENDED.\n *\n * \\param nnz nonzeros in A\n * \\param n_row number of rows in A\n * \\param n_col number of columns in A\n * \\return recommended value of Alen for use by colamd\n */\ntemplate <typename IndexType>\ninline IndexType recommended ( IndexType nnz, IndexType n_row, IndexType n_col)\n{\n  if ((nnz) < 0 || (n_row) < 0 || (n_col) < 0)\n    return (-1);\n  else\n    return (2 * (nnz) + colamd_c (n_col) + colamd_r (n_row) + (n_col) + ((nnz) / 5));\n}\n\n/**\n * \\brief set default parameters  The use of this routine is optional.\n *\n * Colamd: rows with more than (knobs [DenseRow] * n_col)\n * entries are removed prior to ordering.  Columns with more than\n * (knobs [DenseCol] * n_row) entries are removed prior to\n * ordering, and placed last in the output column ordering.\n *\n * DenseRow and DenseCol are defined as 0 and 1,\n * respectively, in colamd.h.  Default values of these two knobs\n * are both 0.5.  Currently, only knobs [0] and knobs [1] are\n * used, but future versions may use more knobs.  If so, they will\n * be properly set to their defaults by the future version of\n * colamd_set_defaults, so that the code that calls colamd will\n * not need to change, assuming that you either use\n * colamd_set_defaults, or pass a (double *) NULL pointer as the\n * knobs array to colamd or symamd.\n *\n * \\param knobs parameter settings for colamd\n */\n\nstatic inline void set_defaults(double knobs[NKnobs])\n{\n  /* === Local variables ================================================== */\n\n  int i ;\n\n  if (!knobs)\n  {\n    return ;      /* no knobs to initialize */\n  }\n  for (i = 0 ; i < NKnobs ; i++)\n  {\n    knobs [i] = 0 ;\n  }\n  knobs [Colamd::DenseRow] = 0.5 ;  /* ignore rows over 50% dense */\n  knobs [Colamd::DenseCol] = 0.5 ;  /* ignore columns over 50% dense */\n}\n\n/**\n * \\brief  Computes a column ordering using the column approximate minimum degree ordering\n *\n * Computes a column ordering (Q) of A such that P(AQ)=LU or\n * (AQ)'AQ=LL' have less fill-in and require fewer floating point\n * operations than factorizing the unpermuted matrix A or A'A,\n * respectively.\n *\n *\n * \\param n_row number of rows in A\n * \\param n_col number of columns in A\n * \\param Alen, size of the array A\n * \\param A row indices of the matrix, of size ALen\n * \\param p column pointers of A, of size n_col+1\n * \\param knobs parameter settings for colamd\n * \\param stats colamd output statistics and error codes\n */\ntemplate <typename IndexType>\nstatic bool compute_ordering(IndexType n_row, IndexType n_col, IndexType Alen, IndexType *A, IndexType *p, double knobs[NKnobs], IndexType stats[NStats])\n{\n  /* === Local variables ================================================== */\n\n  IndexType i ;     /* loop index */\n  IndexType nnz ;     /* nonzeros in A */\n  IndexType Row_size ;    /* size of Row [], in integers */\n  IndexType Col_size ;    /* size of Col [], in integers */\n  IndexType need ;      /* minimum required length of A */\n  Colamd::RowStructure<IndexType> *Row ;   /* pointer into A of Row [0..n_row] array */\n  Colamd::ColStructure<IndexType> *Col ;   /* pointer into A of Col [0..n_col] array */\n  IndexType n_col2 ;    /* number of non-dense, non-empty columns */\n  IndexType n_row2 ;    /* number of non-dense, non-empty rows */\n  IndexType ngarbage ;    /* number of garbage collections performed */\n  IndexType max_deg ;   /* maximum row degree */\n  double default_knobs [NKnobs] ; /* default knobs array */\n\n\n  /* === Check the input arguments ======================================== */\n\n  if (!stats)\n  {\n    COLAMD_DEBUG0 ((\"colamd: stats not present\\n\")) ;\n    return (false) ;\n  }\n  for (i = 0 ; i < NStats ; i++)\n  {\n    stats [i] = 0 ;\n  }\n  stats [Colamd::Status] = Colamd::Ok ;\n  stats [Colamd::Info1] = -1 ;\n  stats [Colamd::Info2] = -1 ;\n\n  if (!A)   /* A is not present */\n  {\n    stats [Colamd::Status] = Colamd::ErrorANotPresent ;\n    COLAMD_DEBUG0 ((\"colamd: A not present\\n\")) ;\n    return (false) ;\n  }\n\n  if (!p)   /* p is not present */\n  {\n    stats [Colamd::Status] = Colamd::ErrorPNotPresent ;\n    COLAMD_DEBUG0 ((\"colamd: p not present\\n\")) ;\n    return (false) ;\n  }\n\n  if (n_row < 0)  /* n_row must be >= 0 */\n  {\n    stats [Colamd::Status] = Colamd::ErrorNrowNegative ;\n    stats [Colamd::Info1] = n_row ;\n    COLAMD_DEBUG0 ((\"colamd: nrow negative %d\\n\", n_row)) ;\n    return (false) ;\n  }\n\n  if (n_col < 0)  /* n_col must be >= 0 */\n  {\n    stats [Colamd::Status] = Colamd::ErrorNcolNegative ;\n    stats [Colamd::Info1] = n_col ;\n    COLAMD_DEBUG0 ((\"colamd: ncol negative %d\\n\", n_col)) ;\n    return (false) ;\n  }\n\n  nnz = p [n_col] ;\n  if (nnz < 0)  /* nnz must be >= 0 */\n  {\n    stats [Colamd::Status] = Colamd::ErrorNnzNegative ;\n    stats [Colamd::Info1] = nnz ;\n    COLAMD_DEBUG0 ((\"colamd: number of entries negative %d\\n\", nnz)) ;\n    return (false) ;\n  }\n\n  if (p [0] != 0)\n  {\n    stats [Colamd::Status] = Colamd::ErrorP0Nonzero ;\n    stats [Colamd::Info1] = p [0] ;\n    COLAMD_DEBUG0 ((\"colamd: p[0] not zero %d\\n\", p [0])) ;\n    return (false) ;\n  }\n\n  /* === If no knobs, set default knobs =================================== */\n\n  if (!knobs)\n  {\n    set_defaults (default_knobs) ;\n    knobs = default_knobs ;\n  }\n\n  /* === Allocate the Row and Col arrays from array A ===================== */\n\n  Col_size = colamd_c (n_col) ;\n  Row_size = colamd_r (n_row) ;\n  need = 2*nnz + n_col + Col_size + Row_size ;\n\n  if (need > Alen)\n  {\n    /* not enough space in array A to perform the ordering */\n    stats [Colamd::Status] = Colamd::ErrorATooSmall ;\n    stats [Colamd::Info1] = need ;\n    stats [Colamd::Info2] = Alen ;\n    COLAMD_DEBUG0 ((\"colamd: Need Alen >= %d, given only Alen = %d\\n\", need,Alen));\n    return (false) ;\n  }\n\n  Alen -= Col_size + Row_size ;\n  Col = (ColStructure<IndexType> *) &A [Alen] ;\n  Row = (RowStructure<IndexType> *) &A [Alen + Col_size] ;\n\n  /* === Construct the row and column data structures ===================== */\n\n  if (!Colamd::init_rows_cols (n_row, n_col, Row, Col, A, p, stats))\n  {\n    /* input matrix is invalid */\n    COLAMD_DEBUG0 ((\"colamd: Matrix invalid\\n\")) ;\n    return (false) ;\n  }\n\n  /* === Initialize scores, kill dense rows/columns ======================= */\n\n  Colamd::init_scoring (n_row, n_col, Row, Col, A, p, knobs,\n\t\t&n_row2, &n_col2, &max_deg) ;\n\n  /* === Order the supercolumns =========================================== */\n\n  ngarbage = Colamd::find_ordering (n_row, n_col, Alen, Row, Col, A, p,\n\t\t\t    n_col2, max_deg, 2*nnz) ;\n\n  /* === Order the non-principal columns ================================== */\n\n  Colamd::order_children (n_col, Col, p) ;\n\n  /* === Return statistics in stats ======================================= */\n\n  stats [Colamd::DenseRow] = n_row - n_row2 ;\n  stats [Colamd::DenseCol] = n_col - n_col2 ;\n  stats [Colamd::DefragCount] = ngarbage ;\n  COLAMD_DEBUG0 ((\"colamd: done.\\n\")) ;\n  return (true) ;\n}\n\n/* ========================================================================== */\n/* === NON-USER-CALLABLE ROUTINES: ========================================== */\n/* ========================================================================== */\n\n/* There are no user-callable routines beyond this point in the file */\n\n/* ========================================================================== */\n/* === init_rows_cols ======================================================= */\n/* ========================================================================== */\n\n/*\n  Takes the column form of the matrix in A and creates the row form of the\n  matrix.  Also, row and column attributes are stored in the Col and Row\n  structs.  If the columns are un-sorted or contain duplicate row indices,\n  this routine will also sort and remove duplicate row indices from the\n  column form of the matrix.  Returns false if the matrix is invalid,\n  true otherwise.  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic IndexType init_rows_cols  /* returns true if OK, or false otherwise */\n  (\n    /* === Parameters ======================================================= */\n\n    IndexType n_row,      /* number of rows of A */\n    IndexType n_col,      /* number of columns of A */\n    RowStructure<IndexType> Row [],    /* of size n_row+1 */\n    ColStructure<IndexType> Col [],    /* of size n_col+1 */\n    IndexType A [],     /* row indices of A, of size Alen */\n    IndexType p [],     /* pointers to columns in A, of size n_col+1 */\n    IndexType stats [NStats]  /* colamd statistics */\n    )\n{\n  /* === Local variables ================================================== */\n\n  IndexType col ;     /* a column index */\n  IndexType row ;     /* a row index */\n  IndexType *cp ;     /* a column pointer */\n  IndexType *cp_end ;   /* a pointer to the end of a column */\n  IndexType *rp ;     /* a row pointer */\n  IndexType *rp_end ;   /* a pointer to the end of a row */\n  IndexType last_row ;    /* previous row */\n\n  /* === Initialize columns, and check column pointers ==================== */\n\n  for (col = 0 ; col < n_col ; col++)\n  {\n    Col [col].start = p [col] ;\n    Col [col].length = p [col+1] - p [col] ;\n\n    if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200\n    {\n      /* column pointers must be non-decreasing */\n      stats [Colamd::Status] = Colamd::ErrorColLengthNegative ;\n      stats [Colamd::Info1] = col ;\n      stats [Colamd::Info2] = Col [col].length ;\n      COLAMD_DEBUG0 ((\"colamd: col %d length %d < 0\\n\", col, Col [col].length)) ;\n      return (false) ;\n    }\n\n    Col [col].shared1.thickness = 1 ;\n    Col [col].shared2.score = 0 ;\n    Col [col].shared3.prev = Empty ;\n    Col [col].shared4.degree_next = Empty ;\n  }\n\n  /* p [0..n_col] no longer needed, used as \"head\" in subsequent routines */\n\n  /* === Scan columns, compute row degrees, and check row indices ========= */\n\n  stats [Info3] = 0 ;  /* number of duplicate or unsorted row indices*/\n\n  for (row = 0 ; row < n_row ; row++)\n  {\n    Row [row].length = 0 ;\n    Row [row].shared2.mark = -1 ;\n  }\n\n  for (col = 0 ; col < n_col ; col++)\n  {\n    last_row = -1 ;\n\n    cp = &A [p [col]] ;\n    cp_end = &A [p [col+1]] ;\n\n    while (cp < cp_end)\n    {\n      row = *cp++ ;\n\n      /* make sure row indices within range */\n      if (row < 0 || row >= n_row)\n      {\n\tstats [Colamd::Status] = Colamd::ErrorRowIndexOutOfBounds ;\n\tstats [Colamd::Info1] = col ;\n\tstats [Colamd::Info2] = row ;\n\tstats [Colamd::Info3] = n_row ;\n\tCOLAMD_DEBUG0 ((\"colamd: row %d col %d out of bounds\\n\", row, col)) ;\n\treturn (false) ;\n      }\n\n      if (row <= last_row || Row [row].shared2.mark == col)\n      {\n\t/* row index are unsorted or repeated (or both), thus col */\n\t/* is jumbled.  This is a notice, not an error condition. */\n\tstats [Colamd::Status] = Colamd::OkButJumbled ;\n\tstats [Colamd::Info1] = col ;\n\tstats [Colamd::Info2] = row ;\n\t(stats [Colamd::Info3]) ++ ;\n\tCOLAMD_DEBUG1 ((\"colamd: row %d col %d unsorted/duplicate\\n\",row,col));\n      }\n\n      if (Row [row].shared2.mark != col)\n      {\n\tRow [row].length++ ;\n      }\n      else\n      {\n\t/* this is a repeated entry in the column, */\n\t/* it will be removed */\n\tCol [col].length-- ;\n      }\n\n      /* mark the row as having been seen in this column */\n      Row [row].shared2.mark = col ;\n\n      last_row = row ;\n    }\n  }\n\n  /* === Compute row pointers ============================================= */\n\n  /* row form of the matrix starts directly after the column */\n  /* form of matrix in A */\n  Row [0].start = p [n_col] ;\n  Row [0].shared1.p = Row [0].start ;\n  Row [0].shared2.mark = -1 ;\n  for (row = 1 ; row < n_row ; row++)\n  {\n    Row [row].start = Row [row-1].start + Row [row-1].length ;\n    Row [row].shared1.p = Row [row].start ;\n    Row [row].shared2.mark = -1 ;\n  }\n\n  /* === Create row form ================================================== */\n\n  if (stats [Status] == OkButJumbled)\n  {\n    /* if cols jumbled, watch for repeated row indices */\n    for (col = 0 ; col < n_col ; col++)\n    {\n      cp = &A [p [col]] ;\n      cp_end = &A [p [col+1]] ;\n      while (cp < cp_end)\n      {\n\trow = *cp++ ;\n\tif (Row [row].shared2.mark != col)\n\t{\n\t  A [(Row [row].shared1.p)++] = col ;\n\t  Row [row].shared2.mark = col ;\n\t}\n      }\n    }\n  }\n  else\n  {\n    /* if cols not jumbled, we don't need the mark (this is faster) */\n    for (col = 0 ; col < n_col ; col++)\n    {\n      cp = &A [p [col]] ;\n      cp_end = &A [p [col+1]] ;\n      while (cp < cp_end)\n      {\n\tA [(Row [*cp++].shared1.p)++] = col ;\n      }\n    }\n  }\n\n  /* === Clear the row marks and set row degrees ========================== */\n\n  for (row = 0 ; row < n_row ; row++)\n  {\n    Row [row].shared2.mark = 0 ;\n    Row [row].shared1.degree = Row [row].length ;\n  }\n\n  /* === See if we need to re-create columns ============================== */\n\n  if (stats [Status] == OkButJumbled)\n  {\n    COLAMD_DEBUG0 ((\"colamd: reconstructing column form, matrix jumbled\\n\")) ;\n\n\n    /* === Compute col pointers ========================================= */\n\n    /* col form of the matrix starts at A [0]. */\n    /* Note, we may have a gap between the col form and the row */\n    /* form if there were duplicate entries, if so, it will be */\n    /* removed upon the first garbage collection */\n    Col [0].start = 0 ;\n    p [0] = Col [0].start ;\n    for (col = 1 ; col < n_col ; col++)\n    {\n      /* note that the lengths here are for pruned columns, i.e. */\n      /* no duplicate row indices will exist for these columns */\n      Col [col].start = Col [col-1].start + Col [col-1].length ;\n      p [col] = Col [col].start ;\n    }\n\n    /* === Re-create col form =========================================== */\n\n    for (row = 0 ; row < n_row ; row++)\n    {\n      rp = &A [Row [row].start] ;\n      rp_end = rp + Row [row].length ;\n      while (rp < rp_end)\n      {\n\tA [(p [*rp++])++] = row ;\n      }\n    }\n  }\n\n  /* === Done.  Matrix is not (or no longer) jumbled ====================== */\n\n  return (true) ;\n}\n\n\n/* ========================================================================== */\n/* === init_scoring ========================================================= */\n/* ========================================================================== */\n\n/*\n  Kills dense or empty columns and rows, calculates an initial score for\n  each column, and places all columns in the degree lists.  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic void init_scoring\n  (\n    /* === Parameters ======================================================= */\n\n    IndexType n_row,      /* number of rows of A */\n    IndexType n_col,      /* number of columns of A */\n    RowStructure<IndexType> Row [],    /* of size n_row+1 */\n    ColStructure<IndexType> Col [],    /* of size n_col+1 */\n    IndexType A [],     /* column form and row form of A */\n    IndexType head [],    /* of size n_col+1 */\n    double knobs [NKnobs],/* parameters */\n    IndexType *p_n_row2,    /* number of non-dense, non-empty rows */\n    IndexType *p_n_col2,    /* number of non-dense, non-empty columns */\n    IndexType *p_max_deg    /* maximum row degree */\n    )\n{\n  /* === Local variables ================================================== */\n\n  IndexType c ;     /* a column index */\n  IndexType r, row ;    /* a row index */\n  IndexType *cp ;     /* a column pointer */\n  IndexType deg ;     /* degree of a row or column */\n  IndexType *cp_end ;   /* a pointer to the end of a column */\n  IndexType *new_cp ;   /* new column pointer */\n  IndexType col_length ;    /* length of pruned column */\n  IndexType score ;     /* current column score */\n  IndexType n_col2 ;    /* number of non-dense, non-empty columns */\n  IndexType n_row2 ;    /* number of non-dense, non-empty rows */\n  IndexType dense_row_count ; /* remove rows with more entries than this */\n  IndexType dense_col_count ; /* remove cols with more entries than this */\n  IndexType min_score ;   /* smallest column score */\n  IndexType max_deg ;   /* maximum row degree */\n  IndexType next_col ;    /* Used to add to degree list.*/\n\n\n  /* === Extract knobs ==================================================== */\n\n  dense_row_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [Colamd::DenseRow] * n_col), n_col)) ;\n  dense_col_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [Colamd::DenseCol] * n_row), n_row)) ;\n  COLAMD_DEBUG1 ((\"colamd: densecount: %d %d\\n\", dense_row_count, dense_col_count)) ;\n  max_deg = 0 ;\n  n_col2 = n_col ;\n  n_row2 = n_row ;\n\n  /* === Kill empty columns =============================================== */\n\n  /* Put the empty columns at the end in their natural order, so that LU */\n  /* factorization can proceed as far as possible. */\n  for (c = n_col-1 ; c >= 0 ; c--)\n  {\n    deg = Col [c].length ;\n    if (deg == 0)\n    {\n      /* this is a empty column, kill and order it last */\n      Col [c].shared2.order = --n_col2 ;\n      Col[c].kill_principal() ;\n    }\n  }\n  COLAMD_DEBUG1 ((\"colamd: null columns killed: %d\\n\", n_col - n_col2)) ;\n\n  /* === Kill dense columns =============================================== */\n\n  /* Put the dense columns at the end, in their natural order */\n  for (c = n_col-1 ; c >= 0 ; c--)\n  {\n    /* skip any dead columns */\n    if (Col[c].is_dead())\n    {\n      continue ;\n    }\n    deg = Col [c].length ;\n    if (deg > dense_col_count)\n    {\n      /* this is a dense column, kill and order it last */\n      Col [c].shared2.order = --n_col2 ;\n      /* decrement the row degrees */\n      cp = &A [Col [c].start] ;\n      cp_end = cp + Col [c].length ;\n      while (cp < cp_end)\n      {\n\tRow [*cp++].shared1.degree-- ;\n      }\n      Col[c].kill_principal() ;\n    }\n  }\n  COLAMD_DEBUG1 ((\"colamd: Dense and null columns killed: %d\\n\", n_col - n_col2)) ;\n\n  /* === Kill dense and empty rows ======================================== */\n\n  for (r = 0 ; r < n_row ; r++)\n  {\n    deg = Row [r].shared1.degree ;\n    COLAMD_ASSERT (deg >= 0 && deg <= n_col) ;\n    if (deg > dense_row_count || deg == 0)\n    {\n      /* kill a dense or empty row */\n      Row[r].kill() ;\n      --n_row2 ;\n    }\n    else\n    {\n      /* keep track of max degree of remaining rows */\n      max_deg = numext::maxi(max_deg, deg) ;\n    }\n  }\n  COLAMD_DEBUG1 ((\"colamd: Dense and null rows killed: %d\\n\", n_row - n_row2)) ;\n\n  /* === Compute initial column scores ==================================== */\n\n  /* At this point the row degrees are accurate.  They reflect the number */\n  /* of \"live\" (non-dense) columns in each row.  No empty rows exist. */\n  /* Some \"live\" columns may contain only dead rows, however.  These are */\n  /* pruned in the code below. */\n\n  /* now find the initial matlab score for each column */\n  for (c = n_col-1 ; c >= 0 ; c--)\n  {\n    /* skip dead column */\n    if (Col[c].is_dead())\n    {\n      continue ;\n    }\n    score = 0 ;\n    cp = &A [Col [c].start] ;\n    new_cp = cp ;\n    cp_end = cp + Col [c].length ;\n    while (cp < cp_end)\n    {\n      /* get a row */\n      row = *cp++ ;\n      /* skip if dead */\n      if (Row[row].is_dead())\n      {\n\tcontinue ;\n      }\n      /* compact the column */\n      *new_cp++ = row ;\n      /* add row's external degree */\n      score += Row [row].shared1.degree - 1 ;\n      /* guard against integer overflow */\n      score = numext::mini(score, n_col) ;\n    }\n    /* determine pruned column length */\n    col_length = (IndexType) (new_cp - &A [Col [c].start]) ;\n    if (col_length == 0)\n    {\n      /* a newly-made null column (all rows in this col are \"dense\" */\n      /* and have already been killed) */\n      COLAMD_DEBUG2 ((\"Newly null killed: %d\\n\", c)) ;\n      Col [c].shared2.order = --n_col2 ;\n      Col[c].kill_principal() ;\n    }\n    else\n    {\n      /* set column length and set score */\n      COLAMD_ASSERT (score >= 0) ;\n      COLAMD_ASSERT (score <= n_col) ;\n      Col [c].length = col_length ;\n      Col [c].shared2.score = score ;\n    }\n  }\n  COLAMD_DEBUG1 ((\"colamd: Dense, null, and newly-null columns killed: %d\\n\",\n\t\t  n_col-n_col2)) ;\n\n  /* At this point, all empty rows and columns are dead.  All live columns */\n  /* are \"clean\" (containing no dead rows) and simplicial (no supercolumns */\n  /* yet).  Rows may contain dead columns, but all live rows contain at */\n  /* least one live column. */\n\n  /* === Initialize degree lists ========================================== */\n\n\n  /* clear the hash buckets */\n  for (c = 0 ; c <= n_col ; c++)\n  {\n    head [c] = Empty ;\n  }\n  min_score = n_col ;\n  /* place in reverse order, so low column indices are at the front */\n  /* of the lists.  This is to encourage natural tie-breaking */\n  for (c = n_col-1 ; c >= 0 ; c--)\n  {\n    /* only add principal columns to degree lists */\n    if (Col[c].is_alive())\n    {\n      COLAMD_DEBUG4 ((\"place %d score %d minscore %d ncol %d\\n\",\n\t\t      c, Col [c].shared2.score, min_score, n_col)) ;\n\n      /* === Add columns score to DList =============================== */\n\n      score = Col [c].shared2.score ;\n\n      COLAMD_ASSERT (min_score >= 0) ;\n      COLAMD_ASSERT (min_score <= n_col) ;\n      COLAMD_ASSERT (score >= 0) ;\n      COLAMD_ASSERT (score <= n_col) ;\n      COLAMD_ASSERT (head [score] >= Empty) ;\n\n      /* now add this column to dList at proper score location */\n      next_col = head [score] ;\n      Col [c].shared3.prev = Empty ;\n      Col [c].shared4.degree_next = next_col ;\n\n      /* if there already was a column with the same score, set its */\n      /* previous pointer to this new column */\n      if (next_col != Empty)\n      {\n\tCol [next_col].shared3.prev = c ;\n      }\n      head [score] = c ;\n\n      /* see if this score is less than current min */\n      min_score = numext::mini(min_score, score) ;\n\n\n    }\n  }\n\n\n  /* === Return number of remaining columns, and max row degree =========== */\n\n  *p_n_col2 = n_col2 ;\n  *p_n_row2 = n_row2 ;\n  *p_max_deg = max_deg ;\n}\n\n\n/* ========================================================================== */\n/* === find_ordering ======================================================== */\n/* ========================================================================== */\n\n/*\n  Order the principal columns of the supercolumn form of the matrix\n  (no supercolumns on input).  Uses a minimum approximate column minimum\n  degree ordering method.  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic IndexType find_ordering /* return the number of garbage collections */\n  (\n    /* === Parameters ======================================================= */\n\n    IndexType n_row,      /* number of rows of A */\n    IndexType n_col,      /* number of columns of A */\n    IndexType Alen,     /* size of A, 2*nnz + n_col or larger */\n    RowStructure<IndexType> Row [],    /* of size n_row+1 */\n    ColStructure<IndexType> Col [],    /* of size n_col+1 */\n    IndexType A [],     /* column form and row form of A */\n    IndexType head [],    /* of size n_col+1 */\n    IndexType n_col2,     /* Remaining columns to order */\n    IndexType max_deg,    /* Maximum row degree */\n    IndexType pfree     /* index of first free slot (2*nnz on entry) */\n    )\n{\n  /* === Local variables ================================================== */\n\n  IndexType k ;     /* current pivot ordering step */\n  IndexType pivot_col ;   /* current pivot column */\n  IndexType *cp ;     /* a column pointer */\n  IndexType *rp ;     /* a row pointer */\n  IndexType pivot_row ;   /* current pivot row */\n  IndexType *new_cp ;   /* modified column pointer */\n  IndexType *new_rp ;   /* modified row pointer */\n  IndexType pivot_row_start ; /* pointer to start of pivot row */\n  IndexType pivot_row_degree ;  /* number of columns in pivot row */\n  IndexType pivot_row_length ;  /* number of supercolumns in pivot row */\n  IndexType pivot_col_score ; /* score of pivot column */\n  IndexType needed_memory ;   /* free space needed for pivot row */\n  IndexType *cp_end ;   /* pointer to the end of a column */\n  IndexType *rp_end ;   /* pointer to the end of a row */\n  IndexType row ;     /* a row index */\n  IndexType col ;     /* a column index */\n  IndexType max_score ;   /* maximum possible score */\n  IndexType cur_score ;   /* score of current column */\n  unsigned int hash ;   /* hash value for supernode detection */\n  IndexType head_column ;   /* head of hash bucket */\n  IndexType first_col ;   /* first column in hash bucket */\n  IndexType tag_mark ;    /* marker value for mark array */\n  IndexType row_mark ;    /* Row [row].shared2.mark */\n  IndexType set_difference ;  /* set difference size of row with pivot row */\n  IndexType min_score ;   /* smallest column score */\n  IndexType col_thickness ;   /* \"thickness\" (no. of columns in a supercol) */\n  IndexType max_mark ;    /* maximum value of tag_mark */\n  IndexType pivot_col_thickness ; /* number of columns represented by pivot col */\n  IndexType prev_col ;    /* Used by Dlist operations. */\n  IndexType next_col ;    /* Used by Dlist operations. */\n  IndexType ngarbage ;    /* number of garbage collections performed */\n\n\n  /* === Initialization and clear mark ==================================== */\n\n  max_mark = INT_MAX - n_col ;  /* INT_MAX defined in <limits.h> */\n  tag_mark = Colamd::clear_mark (n_row, Row) ;\n  min_score = 0 ;\n  ngarbage = 0 ;\n  COLAMD_DEBUG1 ((\"colamd: Ordering, n_col2=%d\\n\", n_col2)) ;\n\n  /* === Order the columns ================================================ */\n\n  for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)\n  {\n\n    /* === Select pivot column, and order it ============================ */\n\n    /* make sure degree list isn't empty */\n    COLAMD_ASSERT (min_score >= 0) ;\n    COLAMD_ASSERT (min_score <= n_col) ;\n    COLAMD_ASSERT (head [min_score] >= Empty) ;\n\n    /* get pivot column from head of minimum degree list */\n    while (min_score < n_col && head [min_score] == Empty)\n    {\n      min_score++ ;\n    }\n    pivot_col = head [min_score] ;\n    COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;\n    next_col = Col [pivot_col].shared4.degree_next ;\n    head [min_score] = next_col ;\n    if (next_col != Empty)\n    {\n      Col [next_col].shared3.prev = Empty ;\n    }\n\n    COLAMD_ASSERT (Col[pivot_col].is_alive()) ;\n    COLAMD_DEBUG3 ((\"Pivot col: %d\\n\", pivot_col)) ;\n\n    /* remember score for defrag check */\n    pivot_col_score = Col [pivot_col].shared2.score ;\n\n    /* the pivot column is the kth column in the pivot order */\n    Col [pivot_col].shared2.order = k ;\n\n    /* increment order count by column thickness */\n    pivot_col_thickness = Col [pivot_col].shared1.thickness ;\n    k += pivot_col_thickness ;\n    COLAMD_ASSERT (pivot_col_thickness > 0) ;\n\n    /* === Garbage_collection, if necessary ============================= */\n\n    needed_memory = numext::mini(pivot_col_score, n_col - k) ;\n    if (pfree + needed_memory >= Alen)\n    {\n      pfree = Colamd::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;\n      ngarbage++ ;\n      /* after garbage collection we will have enough */\n      COLAMD_ASSERT (pfree + needed_memory < Alen) ;\n      /* garbage collection has wiped out the Row[].shared2.mark array */\n      tag_mark = Colamd::clear_mark (n_row, Row) ;\n\n    }\n\n    /* === Compute pivot row pattern ==================================== */\n\n    /* get starting location for this new merged row */\n    pivot_row_start = pfree ;\n\n    /* initialize new row counts to zero */\n    pivot_row_degree = 0 ;\n\n    /* tag pivot column as having been visited so it isn't included */\n    /* in merged pivot row */\n    Col [pivot_col].shared1.thickness = -pivot_col_thickness ;\n\n    /* pivot row is the union of all rows in the pivot column pattern */\n    cp = &A [Col [pivot_col].start] ;\n    cp_end = cp + Col [pivot_col].length ;\n    while (cp < cp_end)\n    {\n      /* get a row */\n      row = *cp++ ;\n      COLAMD_DEBUG4 ((\"Pivot col pattern %d %d\\n\", Row[row].is_alive(), row)) ;\n      /* skip if row is dead */\n      if (Row[row].is_dead())\n      {\n\tcontinue ;\n      }\n      rp = &A [Row [row].start] ;\n      rp_end = rp + Row [row].length ;\n      while (rp < rp_end)\n      {\n\t/* get a column */\n\tcol = *rp++ ;\n\t/* add the column, if alive and untagged */\n\tcol_thickness = Col [col].shared1.thickness ;\n\tif (col_thickness > 0 && Col[col].is_alive())\n\t{\n\t  /* tag column in pivot row */\n\t  Col [col].shared1.thickness = -col_thickness ;\n\t  COLAMD_ASSERT (pfree < Alen) ;\n\t  /* place column in pivot row */\n\t  A [pfree++] = col ;\n\t  pivot_row_degree += col_thickness ;\n\t}\n      }\n    }\n\n    /* clear tag on pivot column */\n    Col [pivot_col].shared1.thickness = pivot_col_thickness ;\n    max_deg = numext::maxi(max_deg, pivot_row_degree) ;\n\n\n    /* === Kill all rows used to construct pivot row ==================== */\n\n    /* also kill pivot row, temporarily */\n    cp = &A [Col [pivot_col].start] ;\n    cp_end = cp + Col [pivot_col].length ;\n    while (cp < cp_end)\n    {\n      /* may be killing an already dead row */\n      row = *cp++ ;\n      COLAMD_DEBUG3 ((\"Kill row in pivot col: %d\\n\", row)) ;\n      Row[row].kill() ;\n    }\n\n    /* === Select a row index to use as the new pivot row =============== */\n\n    pivot_row_length = pfree - pivot_row_start ;\n    if (pivot_row_length > 0)\n    {\n      /* pick the \"pivot\" row arbitrarily (first row in col) */\n      pivot_row = A [Col [pivot_col].start] ;\n      COLAMD_DEBUG3 ((\"Pivotal row is %d\\n\", pivot_row)) ;\n    }\n    else\n    {\n      /* there is no pivot row, since it is of zero length */\n      pivot_row = Empty ;\n      COLAMD_ASSERT (pivot_row_length == 0) ;\n    }\n    COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;\n\n    /* === Approximate degree computation =============================== */\n\n    /* Here begins the computation of the approximate degree.  The column */\n    /* score is the sum of the pivot row \"length\", plus the size of the */\n    /* set differences of each row in the column minus the pattern of the */\n    /* pivot row itself.  The column (\"thickness\") itself is also */\n    /* excluded from the column score (we thus use an approximate */\n    /* external degree). */\n\n    /* The time taken by the following code (compute set differences, and */\n    /* add them up) is proportional to the size of the data structure */\n    /* being scanned - that is, the sum of the sizes of each column in */\n    /* the pivot row.  Thus, the amortized time to compute a column score */\n    /* is proportional to the size of that column (where size, in this */\n    /* context, is the column \"length\", or the number of row indices */\n    /* in that column).  The number of row indices in a column is */\n    /* monotonically non-decreasing, from the length of the original */\n    /* column on input to colamd. */\n\n    /* === Compute set differences ====================================== */\n\n    COLAMD_DEBUG3 ((\"** Computing set differences phase. **\\n\")) ;\n\n    /* pivot row is currently dead - it will be revived later. */\n\n    COLAMD_DEBUG3 ((\"Pivot row: \")) ;\n    /* for each column in pivot row */\n    rp = &A [pivot_row_start] ;\n    rp_end = rp + pivot_row_length ;\n    while (rp < rp_end)\n    {\n      col = *rp++ ;\n      COLAMD_ASSERT (Col[col].is_alive() && col != pivot_col) ;\n      COLAMD_DEBUG3 ((\"Col: %d\\n\", col)) ;\n\n      /* clear tags used to construct pivot row pattern */\n      col_thickness = -Col [col].shared1.thickness ;\n      COLAMD_ASSERT (col_thickness > 0) ;\n      Col [col].shared1.thickness = col_thickness ;\n\n      /* === Remove column from degree list =========================== */\n\n      cur_score = Col [col].shared2.score ;\n      prev_col = Col [col].shared3.prev ;\n      next_col = Col [col].shared4.degree_next ;\n      COLAMD_ASSERT (cur_score >= 0) ;\n      COLAMD_ASSERT (cur_score <= n_col) ;\n      COLAMD_ASSERT (cur_score >= Empty) ;\n      if (prev_col == Empty)\n      {\n\thead [cur_score] = next_col ;\n      }\n      else\n      {\n\tCol [prev_col].shared4.degree_next = next_col ;\n      }\n      if (next_col != Empty)\n      {\n\tCol [next_col].shared3.prev = prev_col ;\n      }\n\n      /* === Scan the column ========================================== */\n\n      cp = &A [Col [col].start] ;\n      cp_end = cp + Col [col].length ;\n      while (cp < cp_end)\n      {\n\t/* get a row */\n\trow = *cp++ ;\n\t/* skip if dead */\n\tif (Row[row].is_dead())\n\t{\n\t  continue ;\n\t}\n  row_mark = Row [row].shared2.mark ;\n\tCOLAMD_ASSERT (row != pivot_row) ;\n\tset_difference = row_mark - tag_mark ;\n\t/* check if the row has been seen yet */\n\tif (set_difference < 0)\n\t{\n\t  COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ;\n\t  set_difference = Row [row].shared1.degree ;\n\t}\n\t/* subtract column thickness from this row's set difference */\n\tset_difference -= col_thickness ;\n\tCOLAMD_ASSERT (set_difference >= 0) ;\n\t/* absorb this row if the set difference becomes zero */\n\tif (set_difference == 0)\n\t{\n\t  COLAMD_DEBUG3 ((\"aggressive absorption. Row: %d\\n\", row)) ;\n\t  Row[row].kill() ;\n\t}\n\telse\n\t{\n\t  /* save the new mark */\n\t  Row [row].shared2.mark = set_difference + tag_mark ;\n\t}\n      }\n    }\n\n\n    /* === Add up set differences for each column ======================= */\n\n    COLAMD_DEBUG3 ((\"** Adding set differences phase. **\\n\")) ;\n\n    /* for each column in pivot row */\n    rp = &A [pivot_row_start] ;\n    rp_end = rp + pivot_row_length ;\n    while (rp < rp_end)\n    {\n      /* get a column */\n      col = *rp++ ;\n      COLAMD_ASSERT (Col[col].is_alive() && col != pivot_col) ;\n      hash = 0 ;\n      cur_score = 0 ;\n      cp = &A [Col [col].start] ;\n      /* compact the column */\n      new_cp = cp ;\n      cp_end = cp + Col [col].length ;\n\n      COLAMD_DEBUG4 ((\"Adding set diffs for Col: %d.\\n\", col)) ;\n\n      while (cp < cp_end)\n      {\n\t/* get a row */\n\trow = *cp++ ;\n\tCOLAMD_ASSERT(row >= 0 && row < n_row) ;\n\t/* skip if dead */\n\tif (Row [row].is_dead())\n\t{\n\t  continue ;\n\t}\n  row_mark = Row [row].shared2.mark ;\n\tCOLAMD_ASSERT (row_mark > tag_mark) ;\n\t/* compact the column */\n\t*new_cp++ = row ;\n\t/* compute hash function */\n\thash += row ;\n\t/* add set difference */\n\tcur_score += row_mark - tag_mark ;\n\t/* integer overflow... */\n\tcur_score = numext::mini(cur_score, n_col) ;\n      }\n\n      /* recompute the column's length */\n      Col [col].length = (IndexType) (new_cp - &A [Col [col].start]) ;\n\n      /* === Further mass elimination ================================= */\n\n      if (Col [col].length == 0)\n      {\n\tCOLAMD_DEBUG4 ((\"further mass elimination. Col: %d\\n\", col)) ;\n\t/* nothing left but the pivot row in this column */\n\tCol[col].kill_principal() ;\n\tpivot_row_degree -= Col [col].shared1.thickness ;\n\tCOLAMD_ASSERT (pivot_row_degree >= 0) ;\n\t/* order it */\n\tCol [col].shared2.order = k ;\n\t/* increment order count by column thickness */\n\tk += Col [col].shared1.thickness ;\n      }\n      else\n      {\n\t/* === Prepare for supercolumn detection ==================== */\n\n\tCOLAMD_DEBUG4 ((\"Preparing supercol detection for Col: %d.\\n\", col)) ;\n\n\t/* save score so far */\n\tCol [col].shared2.score = cur_score ;\n\n\t/* add column to hash table, for supercolumn detection */\n\thash %= n_col + 1 ;\n\n\tCOLAMD_DEBUG4 ((\" Hash = %d, n_col = %d.\\n\", hash, n_col)) ;\n\tCOLAMD_ASSERT (hash <= n_col) ;\n\n\thead_column = head [hash] ;\n\tif (head_column > Empty)\n\t{\n\t  /* degree list \"hash\" is non-empty, use prev (shared3) of */\n\t  /* first column in degree list as head of hash bucket */\n\t  first_col = Col [head_column].shared3.headhash ;\n\t  Col [head_column].shared3.headhash = col ;\n\t}\n\telse\n\t{\n\t  /* degree list \"hash\" is empty, use head as hash bucket */\n\t  first_col = - (head_column + 2) ;\n\t  head [hash] = - (col + 2) ;\n\t}\n\tCol [col].shared4.hash_next = first_col ;\n\n\t/* save hash function in Col [col].shared3.hash */\n\tCol [col].shared3.hash = (IndexType) hash ;\n\tCOLAMD_ASSERT (Col[col].is_alive()) ;\n      }\n    }\n\n    /* The approximate external column degree is now computed.  */\n\n    /* === Supercolumn detection ======================================== */\n\n    COLAMD_DEBUG3 ((\"** Supercolumn detection phase. **\\n\")) ;\n\n    Colamd::detect_super_cols (Col, A, head, pivot_row_start, pivot_row_length) ;\n\n    /* === Kill the pivotal column ====================================== */\n\n    Col[pivot_col].kill_principal() ;\n\n    /* === Clear mark =================================================== */\n\n    tag_mark += (max_deg + 1) ;\n    if (tag_mark >= max_mark)\n    {\n      COLAMD_DEBUG2 ((\"clearing tag_mark\\n\")) ;\n      tag_mark = Colamd::clear_mark (n_row, Row) ;\n    }\n\n    /* === Finalize the new pivot row, and column scores ================ */\n\n    COLAMD_DEBUG3 ((\"** Finalize scores phase. **\\n\")) ;\n\n    /* for each column in pivot row */\n    rp = &A [pivot_row_start] ;\n    /* compact the pivot row */\n    new_rp = rp ;\n    rp_end = rp + pivot_row_length ;\n    while (rp < rp_end)\n    {\n      col = *rp++ ;\n      /* skip dead columns */\n      if (Col[col].is_dead())\n      {\n\tcontinue ;\n      }\n      *new_rp++ = col ;\n      /* add new pivot row to column */\n      A [Col [col].start + (Col [col].length++)] = pivot_row ;\n\n      /* retrieve score so far and add on pivot row's degree. */\n      /* (we wait until here for this in case the pivot */\n      /* row's degree was reduced due to mass elimination). */\n      cur_score = Col [col].shared2.score + pivot_row_degree ;\n\n      /* calculate the max possible score as the number of */\n      /* external columns minus the 'k' value minus the */\n      /* columns thickness */\n      max_score = n_col - k - Col [col].shared1.thickness ;\n\n      /* make the score the external degree of the union-of-rows */\n      cur_score -= Col [col].shared1.thickness ;\n\n      /* make sure score is less or equal than the max score */\n      cur_score = numext::mini(cur_score, max_score) ;\n      COLAMD_ASSERT (cur_score >= 0) ;\n\n      /* store updated score */\n      Col [col].shared2.score = cur_score ;\n\n      /* === Place column back in degree list ========================= */\n\n      COLAMD_ASSERT (min_score >= 0) ;\n      COLAMD_ASSERT (min_score <= n_col) ;\n      COLAMD_ASSERT (cur_score >= 0) ;\n      COLAMD_ASSERT (cur_score <= n_col) ;\n      COLAMD_ASSERT (head [cur_score] >= Empty) ;\n      next_col = head [cur_score] ;\n      Col [col].shared4.degree_next = next_col ;\n      Col [col].shared3.prev = Empty ;\n      if (next_col != Empty)\n      {\n\tCol [next_col].shared3.prev = col ;\n      }\n      head [cur_score] = col ;\n\n      /* see if this score is less than current min */\n      min_score = numext::mini(min_score, cur_score) ;\n\n    }\n\n    /* === Resurrect the new pivot row ================================== */\n\n    if (pivot_row_degree > 0)\n    {\n      /* update pivot row length to reflect any cols that were killed */\n      /* during super-col detection and mass elimination */\n      Row [pivot_row].start  = pivot_row_start ;\n      Row [pivot_row].length = (IndexType) (new_rp - &A[pivot_row_start]) ;\n      Row [pivot_row].shared1.degree = pivot_row_degree ;\n      Row [pivot_row].shared2.mark = 0 ;\n      /* pivot row is no longer dead */\n    }\n  }\n\n  /* === All principal columns have now been ordered ====================== */\n\n  return (ngarbage) ;\n}\n\n\n/* ========================================================================== */\n/* === order_children ======================================================= */\n/* ========================================================================== */\n\n/*\n  The find_ordering routine has ordered all of the principal columns (the\n  representatives of the supercolumns).  The non-principal columns have not\n  yet been ordered.  This routine orders those columns by walking up the\n  parent tree (a column is a child of the column which absorbed it).  The\n  final permutation vector is then placed in p [0 ... n_col-1], with p [0]\n  being the first column, and p [n_col-1] being the last.  It doesn't look\n  like it at first glance, but be assured that this routine takes time linear\n  in the number of columns.  Although not immediately obvious, the time\n  taken by this routine is O (n_col), that is, linear in the number of\n  columns.  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic inline  void order_children\n(\n  /* === Parameters ======================================================= */\n\n  IndexType n_col,      /* number of columns of A */\n  ColStructure<IndexType> Col [],    /* of size n_col+1 */\n  IndexType p []      /* p [0 ... n_col-1] is the column permutation*/\n  )\n{\n  /* === Local variables ================================================== */\n\n  IndexType i ;     /* loop counter for all columns */\n  IndexType c ;     /* column index */\n  IndexType parent ;    /* index of column's parent */\n  IndexType order ;     /* column's order */\n\n  /* === Order each non-principal column ================================== */\n\n  for (i = 0 ; i < n_col ; i++)\n  {\n    /* find an un-ordered non-principal column */\n    COLAMD_ASSERT (col_is_dead(Col, i)) ;\n    if (!Col[i].is_dead_principal() && Col [i].shared2.order == Empty)\n    {\n      parent = i ;\n      /* once found, find its principal parent */\n      do\n      {\n\tparent = Col [parent].shared1.parent ;\n      } while (!Col[parent].is_dead_principal()) ;\n\n      /* now, order all un-ordered non-principal columns along path */\n      /* to this parent.  collapse tree at the same time */\n      c = i ;\n      /* get order of parent */\n      order = Col [parent].shared2.order ;\n\n      do\n      {\n\tCOLAMD_ASSERT (Col [c].shared2.order == Empty) ;\n\n\t/* order this column */\n\tCol [c].shared2.order = order++ ;\n\t/* collaps tree */\n\tCol [c].shared1.parent = parent ;\n\n\t/* get immediate parent of this column */\n\tc = Col [c].shared1.parent ;\n\n\t/* continue until we hit an ordered column.  There are */\n\t/* guaranteed not to be anymore unordered columns */\n\t/* above an ordered column */\n      } while (Col [c].shared2.order == Empty) ;\n\n      /* re-order the super_col parent to largest order for this group */\n      Col [parent].shared2.order = order ;\n    }\n  }\n\n  /* === Generate the permutation ========================================= */\n\n  for (c = 0 ; c < n_col ; c++)\n  {\n    p [Col [c].shared2.order] = c ;\n  }\n}\n\n\n/* ========================================================================== */\n/* === detect_super_cols ==================================================== */\n/* ========================================================================== */\n\n/*\n  Detects supercolumns by finding matches between columns in the hash buckets.\n  Check amongst columns in the set A [row_start ... row_start + row_length-1].\n  The columns under consideration are currently *not* in the degree lists,\n  and have already been placed in the hash buckets.\n\n  The hash bucket for columns whose hash function is equal to h is stored\n  as follows:\n\n  if head [h] is >= 0, then head [h] contains a degree list, so:\n\n  head [h] is the first column in degree bucket h.\n  Col [head [h]].headhash gives the first column in hash bucket h.\n\n  otherwise, the degree list is empty, and:\n\n  -(head [h] + 2) is the first column in hash bucket h.\n\n  For a column c in a hash bucket, Col [c].shared3.prev is NOT a \"previous\n  column\" pointer.  Col [c].shared3.hash is used instead as the hash number\n  for that column.  The value of Col [c].shared4.hash_next is the next column\n  in the same hash bucket.\n\n  Assuming no, or \"few\" hash collisions, the time taken by this routine is\n  linear in the sum of the sizes (lengths) of each column whose score has\n  just been computed in the approximate degree computation.\n  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic void detect_super_cols\n(\n  /* === Parameters ======================================================= */\n\n  ColStructure<IndexType> Col [],    /* of size n_col+1 */\n  IndexType A [],     /* row indices of A */\n  IndexType head [],    /* head of degree lists and hash buckets */\n  IndexType row_start,    /* pointer to set of columns to check */\n  IndexType row_length    /* number of columns to check */\n)\n{\n  /* === Local variables ================================================== */\n\n  IndexType hash ;      /* hash value for a column */\n  IndexType *rp ;     /* pointer to a row */\n  IndexType c ;     /* a column index */\n  IndexType super_c ;   /* column index of the column to absorb into */\n  IndexType *cp1 ;      /* column pointer for column super_c */\n  IndexType *cp2 ;      /* column pointer for column c */\n  IndexType length ;    /* length of column super_c */\n  IndexType prev_c ;    /* column preceding c in hash bucket */\n  IndexType i ;     /* loop counter */\n  IndexType *rp_end ;   /* pointer to the end of the row */\n  IndexType col ;     /* a column index in the row to check */\n  IndexType head_column ;   /* first column in hash bucket or degree list */\n  IndexType first_col ;   /* first column in hash bucket */\n\n  /* === Consider each column in the row ================================== */\n\n  rp = &A [row_start] ;\n  rp_end = rp + row_length ;\n  while (rp < rp_end)\n  {\n    col = *rp++ ;\n    if (Col[col].is_dead())\n    {\n      continue ;\n    }\n\n    /* get hash number for this column */\n    hash = Col [col].shared3.hash ;\n    COLAMD_ASSERT (hash <= n_col) ;\n\n    /* === Get the first column in this hash bucket ===================== */\n\n    head_column = head [hash] ;\n    if (head_column > Empty)\n    {\n      first_col = Col [head_column].shared3.headhash ;\n    }\n    else\n    {\n      first_col = - (head_column + 2) ;\n    }\n\n    /* === Consider each column in the hash bucket ====================== */\n\n    for (super_c = first_col ; super_c != Empty ;\n\t super_c = Col [super_c].shared4.hash_next)\n    {\n      COLAMD_ASSERT (Col [super_c].is_alive()) ;\n      COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ;\n      length = Col [super_c].length ;\n\n      /* prev_c is the column preceding column c in the hash bucket */\n      prev_c = super_c ;\n\n      /* === Compare super_c with all columns after it ================ */\n\n      for (c = Col [super_c].shared4.hash_next ;\n\t   c != Empty ; c = Col [c].shared4.hash_next)\n      {\n\tCOLAMD_ASSERT (c != super_c) ;\n\tCOLAMD_ASSERT (Col[c].is_alive()) ;\n\tCOLAMD_ASSERT (Col [c].shared3.hash == hash) ;\n\n\t/* not identical if lengths or scores are different */\n\tif (Col [c].length != length ||\n\t    Col [c].shared2.score != Col [super_c].shared2.score)\n\t{\n\t  prev_c = c ;\n\t  continue ;\n\t}\n\n\t/* compare the two columns */\n\tcp1 = &A [Col [super_c].start] ;\n\tcp2 = &A [Col [c].start] ;\n\n\tfor (i = 0 ; i < length ; i++)\n\t{\n\t  /* the columns are \"clean\" (no dead rows) */\n\t  COLAMD_ASSERT ( cp1->is_alive() );\n\t  COLAMD_ASSERT ( cp2->is_alive() );\n\t  /* row indices will same order for both supercols, */\n\t  /* no gather scatter necessary */\n\t  if (*cp1++ != *cp2++)\n\t  {\n\t    break ;\n\t  }\n\t}\n\n\t/* the two columns are different if the for-loop \"broke\" */\n\tif (i != length)\n\t{\n\t  prev_c = c ;\n\t  continue ;\n\t}\n\n\t/* === Got it!  two columns are identical =================== */\n\n\tCOLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;\n\n\tCol [super_c].shared1.thickness += Col [c].shared1.thickness ;\n\tCol [c].shared1.parent = super_c ;\n\tCol[c].kill_non_principal() ;\n\t/* order c later, in order_children() */\n\tCol [c].shared2.order = Empty ;\n\t/* remove c from hash bucket */\n\tCol [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;\n      }\n    }\n\n    /* === Empty this hash bucket ======================================= */\n\n    if (head_column > Empty)\n    {\n      /* corresponding degree list \"hash\" is not empty */\n      Col [head_column].shared3.headhash = Empty ;\n    }\n    else\n    {\n      /* corresponding degree list \"hash\" is empty */\n      head [hash] = Empty ;\n    }\n  }\n}\n\n\n/* ========================================================================== */\n/* === garbage_collection =================================================== */\n/* ========================================================================== */\n\n/*\n  Defragments and compacts columns and rows in the workspace A.  Used when\n  all available memory has been used while performing row merging.  Returns\n  the index of the first free position in A, after garbage collection.  The\n  time taken by this routine is linear is the size of the array A, which is\n  itself linear in the number of nonzeros in the input matrix.\n  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic IndexType garbage_collection  /* returns the new value of pfree */\n  (\n    /* === Parameters ======================================================= */\n\n    IndexType n_row,      /* number of rows */\n    IndexType n_col,      /* number of columns */\n    RowStructure<IndexType> Row [],    /* row info */\n    ColStructure<IndexType> Col [],    /* column info */\n    IndexType A [],     /* A [0 ... Alen-1] holds the matrix */\n    IndexType *pfree      /* &A [0] ... pfree is in use */\n    )\n{\n  /* === Local variables ================================================== */\n\n  IndexType *psrc ;     /* source pointer */\n  IndexType *pdest ;    /* destination pointer */\n  IndexType j ;     /* counter */\n  IndexType r ;     /* a row index */\n  IndexType c ;     /* a column index */\n  IndexType length ;    /* length of a row or column */\n\n  /* === Defragment the columns =========================================== */\n\n  pdest = &A[0] ;\n  for (c = 0 ; c < n_col ; c++)\n  {\n    if (Col[c].is_alive())\n    {\n      psrc = &A [Col [c].start] ;\n\n      /* move and compact the column */\n      COLAMD_ASSERT (pdest <= psrc) ;\n      Col [c].start = (IndexType) (pdest - &A [0]) ;\n      length = Col [c].length ;\n      for (j = 0 ; j < length ; j++)\n      {\n\tr = *psrc++ ;\n\tif (Row[r].is_alive())\n\t{\n\t  *pdest++ = r ;\n\t}\n      }\n      Col [c].length = (IndexType) (pdest - &A [Col [c].start]) ;\n    }\n  }\n\n  /* === Prepare to defragment the rows =================================== */\n\n  for (r = 0 ; r < n_row ; r++)\n  {\n    if (Row[r].is_alive())\n    {\n      if (Row [r].length == 0)\n      {\n        /* this row is of zero length.  cannot compact it, so kill it */\n        COLAMD_DEBUG3 ((\"Defrag row kill\\n\")) ;\n        Row[r].kill() ;\n      }\n      else\n      {\n        /* save first column index in Row [r].shared2.first_column */\n        psrc = &A [Row [r].start] ;\n        Row [r].shared2.first_column = *psrc ;\n        COLAMD_ASSERT (Row[r].is_alive()) ;\n        /* flag the start of the row with the one's complement of row */\n        *psrc = ones_complement(r) ;\n\n      }\n    }\n  }\n\n  /* === Defragment the rows ============================================== */\n\n  psrc = pdest ;\n  while (psrc < pfree)\n  {\n    /* find a negative number ... the start of a row */\n    if (*psrc++ < 0)\n    {\n      psrc-- ;\n      /* get the row index */\n      r = ones_complement(*psrc) ;\n      COLAMD_ASSERT (r >= 0 && r < n_row) ;\n      /* restore first column index */\n      *psrc = Row [r].shared2.first_column ;\n      COLAMD_ASSERT (Row[r].is_alive()) ;\n\n      /* move and compact the row */\n      COLAMD_ASSERT (pdest <= psrc) ;\n      Row [r].start = (IndexType) (pdest - &A [0]) ;\n      length = Row [r].length ;\n      for (j = 0 ; j < length ; j++)\n      {\n\tc = *psrc++ ;\n\tif (Col[c].is_alive())\n\t{\n\t  *pdest++ = c ;\n\t}\n      }\n      Row [r].length = (IndexType) (pdest - &A [Row [r].start]) ;\n\n    }\n  }\n  /* ensure we found all the rows */\n  COLAMD_ASSERT (debug_rows == 0) ;\n\n  /* === Return the new value of pfree ==================================== */\n\n  return ((IndexType) (pdest - &A [0])) ;\n}\n\n\n/* ========================================================================== */\n/* === clear_mark =========================================================== */\n/* ========================================================================== */\n\n/*\n  Clears the Row [].shared2.mark array, and returns the new tag_mark.\n  Return value is the new tag_mark.  Not user-callable.\n*/\ntemplate <typename IndexType>\nstatic inline  IndexType clear_mark  /* return the new value for tag_mark */\n  (\n      /* === Parameters ======================================================= */\n\n    IndexType n_row,    /* number of rows in A */\n    RowStructure<IndexType> Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */\n    )\n{\n  /* === Local variables ================================================== */\n\n  IndexType r ;\n\n  for (r = 0 ; r < n_row ; r++)\n  {\n    if (Row[r].is_alive())\n    {\n      Row [r].shared2.mark = 0 ;\n    }\n  }\n  return (1) ;\n}\n\n} // namespace Colamd\n\n} // namespace internal\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/OrderingMethods/Ordering.h",
    "content": " \n// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012  Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_ORDERING_H\n#define EIGEN_ORDERING_H\n\nnamespace Eigen {\n  \n#include \"Eigen_Colamd.h\"\n\nnamespace internal {\n    \n/** \\internal\n  * \\ingroup OrderingMethods_Module\n  * \\param[in] A the input non-symmetric matrix\n  * \\param[out] symmat the symmetric pattern A^T+A from the input matrix \\a A.\n  * FIXME: The values should not be considered here\n  */\ntemplate<typename MatrixType> \nvoid ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat)\n{\n  MatrixType C;\n  C = A.transpose(); // NOTE: Could be  costly\n  for (int i = 0; i < C.rows(); i++) \n  {\n      for (typename MatrixType::InnerIterator it(C, i); it; ++it)\n        it.valueRef() = typename MatrixType::Scalar(0);\n  }\n  symmat = C + A;\n}\n    \n}\n\n/** \\ingroup OrderingMethods_Module\n  * \\class AMDOrdering\n  *\n  * Functor computing the \\em approximate \\em minimum \\em degree ordering\n  * If the matrix is not structurally symmetric, an ordering of A^T+A is computed\n  * \\tparam  StorageIndex The type of indices of the matrix \n  * \\sa COLAMDOrdering\n  */\ntemplate <typename StorageIndex>\nclass AMDOrdering\n{\n  public:\n    typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;\n    \n    /** Compute the permutation vector from a sparse matrix\n     * This routine is much faster if the input matrix is column-major     \n     */\n    template <typename MatrixType>\n    void operator()(const MatrixType& mat, PermutationType& perm)\n    {\n      // Compute the symmetric pattern\n      SparseMatrix<typename MatrixType::Scalar, ColMajor, StorageIndex> symm;\n      internal::ordering_helper_at_plus_a(mat,symm); \n    \n      // Call the AMD routine \n      //m_mat.prune(keep_diag());\n      internal::minimum_degree_ordering(symm, perm);\n    }\n    \n    /** Compute the permutation with a selfadjoint matrix */\n    template <typename SrcType, unsigned int SrcUpLo> \n    void operator()(const SparseSelfAdjointView<SrcType, SrcUpLo>& mat, PermutationType& perm)\n    { \n      SparseMatrix<typename SrcType::Scalar, ColMajor, StorageIndex> C; C = mat;\n      \n      // Call the AMD routine \n      // m_mat.prune(keep_diag()); //Remove the diagonal elements \n      internal::minimum_degree_ordering(C, perm);\n    }\n};\n\n/** \\ingroup OrderingMethods_Module\n  * \\class NaturalOrdering\n  *\n  * Functor computing the natural ordering (identity)\n  * \n  * \\note Returns an empty permutation matrix\n  * \\tparam  StorageIndex The type of indices of the matrix \n  */\ntemplate <typename StorageIndex>\nclass NaturalOrdering\n{\n  public:\n    typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;\n    \n    /** Compute the permutation vector from a column-major sparse matrix */\n    template <typename MatrixType>\n    void operator()(const MatrixType& /*mat*/, PermutationType& perm)\n    {\n      perm.resize(0); \n    }\n    \n};\n\n/** \\ingroup OrderingMethods_Module\n  * \\class COLAMDOrdering\n  *\n  * \\tparam  StorageIndex The type of indices of the matrix \n  * \n  * Functor computing the \\em column \\em approximate \\em minimum \\em degree ordering \n  * The matrix should be in column-major and \\b compressed format (see SparseMatrix::makeCompressed()).\n  */\ntemplate<typename StorageIndex>\nclass COLAMDOrdering\n{\n  public:\n    typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType; \n    typedef Matrix<StorageIndex, Dynamic, 1> IndexVector;\n    \n    /** Compute the permutation vector \\a perm form the sparse matrix \\a mat\n      * \\warning The input sparse matrix \\a mat must be in compressed mode (see SparseMatrix::makeCompressed()).\n      */\n    template <typename MatrixType>\n    void operator() (const MatrixType& mat, PermutationType& perm)\n    {\n      eigen_assert(mat.isCompressed() && \"COLAMDOrdering requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to COLAMDOrdering\");\n      \n      StorageIndex m = StorageIndex(mat.rows());\n      StorageIndex n = StorageIndex(mat.cols());\n      StorageIndex nnz = StorageIndex(mat.nonZeros());\n      // Get the recommended value of Alen to be used by colamd\n      StorageIndex Alen = internal::Colamd::recommended(nnz, m, n); \n      // Set the default parameters\n      double knobs [internal::Colamd::NKnobs]; \n      StorageIndex stats [internal::Colamd::NStats];\n      internal::Colamd::set_defaults(knobs);\n      \n      IndexVector p(n+1), A(Alen); \n      for(StorageIndex i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];\n      for(StorageIndex i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];\n      // Call Colamd routine to compute the ordering \n      StorageIndex info = internal::Colamd::compute_ordering(m, n, Alen, A.data(), p.data(), knobs, stats); \n      EIGEN_UNUSED_VARIABLE(info);\n      eigen_assert( info && \"COLAMD failed \" );\n      \n      perm.resize(n);\n      for (StorageIndex i = 0; i < n; i++) perm.indices()(p(i)) = i;\n    }\n};\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/PaStiXSupport/PaStiXSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PASTIXSUPPORT_H\n#define EIGEN_PASTIXSUPPORT_H\n\nnamespace Eigen { \n\n#if defined(DCOMPLEX)\n  #define PASTIX_COMPLEX  COMPLEX\n  #define PASTIX_DCOMPLEX DCOMPLEX\n#else\n  #define PASTIX_COMPLEX  std::complex<float>\n  #define PASTIX_DCOMPLEX std::complex<double>\n#endif\n\n/** \\ingroup PaStiXSupport_Module\n  * \\brief Interface to the PaStix solver\n  * \n  * This class is used to solve the linear systems A.X = B via the PaStix library. \n  * The matrix can be either real or complex, symmetric or not.\n  *\n  * \\sa TutorialSparseDirectSolvers\n  */\ntemplate<typename _MatrixType, bool IsStrSym = false> class PastixLU;\ntemplate<typename _MatrixType, int Options> class PastixLLT;\ntemplate<typename _MatrixType, int Options> class PastixLDLT;\n\nnamespace internal\n{\n    \n  template<class Pastix> struct pastix_traits;\n\n  template<typename _MatrixType>\n  struct pastix_traits< PastixLU<_MatrixType> >\n  {\n    typedef _MatrixType MatrixType;\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef typename _MatrixType::StorageIndex StorageIndex;\n  };\n\n  template<typename _MatrixType, int Options>\n  struct pastix_traits< PastixLLT<_MatrixType,Options> >\n  {\n    typedef _MatrixType MatrixType;\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef typename _MatrixType::StorageIndex StorageIndex;\n  };\n\n  template<typename _MatrixType, int Options>\n  struct pastix_traits< PastixLDLT<_MatrixType,Options> >\n  {\n    typedef _MatrixType MatrixType;\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef typename _MatrixType::StorageIndex StorageIndex;\n  };\n  \n  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)\n  {\n    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }\n    if (nbrhs == 0) {x = NULL; nbrhs=1;}\n    s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); \n  }\n  \n  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)\n  {\n    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }\n    if (nbrhs == 0) {x = NULL; nbrhs=1;}\n    d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); \n  }\n  \n  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)\n  {\n    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }\n    if (nbrhs == 0) {x = NULL; nbrhs=1;}\n    c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); \n  }\n  \n  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)\n  {\n    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }\n    if (nbrhs == 0) {x = NULL; nbrhs=1;}\n    z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_DCOMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_DCOMPLEX*>(x), nbrhs, iparm, dparm); \n  }\n\n  // Convert the matrix  to Fortran-style Numbering\n  template <typename MatrixType>\n  void c_to_fortran_numbering (MatrixType& mat)\n  {\n    if ( !(mat.outerIndexPtr()[0]) ) \n    { \n      int i;\n      for(i = 0; i <= mat.rows(); ++i)\n        ++mat.outerIndexPtr()[i];\n      for(i = 0; i < mat.nonZeros(); ++i)\n        ++mat.innerIndexPtr()[i];\n    }\n  }\n  \n  // Convert to C-style Numbering\n  template <typename MatrixType>\n  void fortran_to_c_numbering (MatrixType& mat)\n  {\n    // Check the Numbering\n    if ( mat.outerIndexPtr()[0] == 1 ) \n    { // Convert to C-style numbering\n      int i;\n      for(i = 0; i <= mat.rows(); ++i)\n        --mat.outerIndexPtr()[i];\n      for(i = 0; i < mat.nonZeros(); ++i)\n        --mat.innerIndexPtr()[i];\n    }\n  }\n}\n\n// This is the base class to interface with PaStiX functions. \n// Users should not used this class directly. \ntemplate <class Derived>\nclass PastixBase : public SparseSolverBase<Derived>\n{\n  protected:\n    typedef SparseSolverBase<Derived> Base;\n    using Base::derived;\n    using Base::m_isInitialized;\n  public:\n    using Base::_solve_impl;\n    \n    typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;\n    typedef _MatrixType MatrixType;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef Matrix<Scalar,Dynamic,1> Vector;\n    typedef SparseMatrix<Scalar, ColMajor> ColSpMatrix;\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    \n  public:\n    \n    PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_pastixdata(0), m_size(0)\n    {\n      init();\n    }\n    \n    ~PastixBase() \n    {\n      clean();\n    }\n    \n    template<typename Rhs,typename Dest>\n    bool _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;\n    \n    /** Returns a reference to the integer vector IPARM of PaStiX parameters\n      * to modify the default parameters. \n      * The statistics related to the different phases of factorization and solve are saved here as well\n      * \\sa analyzePattern() factorize()\n      */\n    Array<StorageIndex,IPARM_SIZE,1>& iparm()\n    {\n      return m_iparm; \n    }\n    \n    /** Return a reference to a particular index parameter of the IPARM vector \n     * \\sa iparm()\n     */\n    \n    int& iparm(int idxparam)\n    {\n      return m_iparm(idxparam);\n    }\n    \n     /** Returns a reference to the double vector DPARM of PaStiX parameters \n      * The statistics related to the different phases of factorization and solve are saved here as well\n      * \\sa analyzePattern() factorize()\n      */\n    Array<double,DPARM_SIZE,1>& dparm()\n    {\n      return m_dparm; \n    }\n    \n    \n    /** Return a reference to a particular index parameter of the DPARM vector \n     * \\sa dparm()\n     */\n    double& dparm(int idxparam)\n    {\n      return m_dparm(idxparam);\n    }\n    \n    inline Index cols() const { return m_size; }\n    inline Index rows() const { return m_size; }\n    \n     /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the PaStiX reports a problem\n      *          \\c InvalidInput if the input matrix is invalid\n      *\n      * \\sa iparm()          \n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n    \n  protected:\n\n    // Initialize the Pastix data structure, check the matrix\n    void init(); \n    \n    // Compute the ordering and the symbolic factorization\n    void analyzePattern(ColSpMatrix& mat);\n    \n    // Compute the numerical factorization\n    void factorize(ColSpMatrix& mat);\n    \n    // Free all the data allocated by Pastix\n    void clean()\n    {\n      eigen_assert(m_initisOk && \"The Pastix structure should be allocated first\"); \n      m_iparm(IPARM_START_TASK) = API_TASK_CLEAN;\n      m_iparm(IPARM_END_TASK) = API_TASK_CLEAN;\n      internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,\n                             m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());\n    }\n    \n    void compute(ColSpMatrix& mat);\n    \n    int m_initisOk; \n    int m_analysisIsOk;\n    int m_factorizationIsOk;\n    mutable ComputationInfo m_info; \n    mutable pastix_data_t *m_pastixdata; // Data structure for pastix\n    mutable int m_comm; // The MPI communicator identifier\n    mutable Array<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters\n    mutable Array<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters\n    mutable Matrix<StorageIndex,Dynamic,1> m_perm;  // Permutation vector\n    mutable Matrix<StorageIndex,Dynamic,1> m_invp;  // Inverse permutation vector\n    mutable int m_size; // Size of the matrix \n}; \n\n /** Initialize the PaStiX data structure. \n   *A first call to this function fills iparm and dparm with the default PaStiX parameters\n   * \\sa iparm() dparm()\n   */\ntemplate <class Derived>\nvoid PastixBase<Derived>::init()\n{\n  m_size = 0; \n  m_iparm.setZero(IPARM_SIZE);\n  m_dparm.setZero(DPARM_SIZE);\n  \n  m_iparm(IPARM_MODIFY_PARAMETER) = API_NO;\n  pastix(&m_pastixdata, MPI_COMM_WORLD,\n         0, 0, 0, 0,\n         0, 0, 0, 1, m_iparm.data(), m_dparm.data());\n  \n  m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;\n  m_iparm[IPARM_VERBOSE]             = API_VERBOSE_NOT;\n  m_iparm[IPARM_ORDERING]            = API_ORDER_SCOTCH;\n  m_iparm[IPARM_INCOMPLETE]          = API_NO;\n  m_iparm[IPARM_OOC_LIMIT]           = 2000;\n  m_iparm[IPARM_RHS_MAKING]          = API_RHS_B;\n  m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;\n  \n  m_iparm(IPARM_START_TASK) = API_TASK_INIT;\n  m_iparm(IPARM_END_TASK) = API_TASK_INIT;\n  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,\n                         0, 0, 0, 0, m_iparm.data(), m_dparm.data());\n  \n  // Check the returned error\n  if(m_iparm(IPARM_ERROR_NUMBER)) {\n    m_info = InvalidInput;\n    m_initisOk = false;\n  }\n  else { \n    m_info = Success;\n    m_initisOk = true;\n  }\n}\n\ntemplate <class Derived>\nvoid PastixBase<Derived>::compute(ColSpMatrix& mat)\n{\n  eigen_assert(mat.rows() == mat.cols() && \"The input matrix should be squared\");\n  \n  analyzePattern(mat);  \n  factorize(mat);\n  \n  m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;\n}\n\n\ntemplate <class Derived>\nvoid PastixBase<Derived>::analyzePattern(ColSpMatrix& mat)\n{                         \n  eigen_assert(m_initisOk && \"The initialization of PaSTiX failed\");\n  \n  // clean previous calls\n  if(m_size>0)\n    clean();\n  \n  m_size = internal::convert_index<int>(mat.rows());\n  m_perm.resize(m_size);\n  m_invp.resize(m_size);\n  \n  m_iparm(IPARM_START_TASK) = API_TASK_ORDERING;\n  m_iparm(IPARM_END_TASK) = API_TASK_ANALYSE;\n  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),\n               mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());\n  \n  // Check the returned error\n  if(m_iparm(IPARM_ERROR_NUMBER))\n  {\n    m_info = NumericalIssue;\n    m_analysisIsOk = false;\n  }\n  else\n  { \n    m_info = Success;\n    m_analysisIsOk = true;\n  }\n}\n\ntemplate <class Derived>\nvoid PastixBase<Derived>::factorize(ColSpMatrix& mat)\n{\n//   if(&m_cpyMat != &mat) m_cpyMat = mat;\n  eigen_assert(m_analysisIsOk && \"The analysis phase should be called before the factorization phase\");\n  m_iparm(IPARM_START_TASK) = API_TASK_NUMFACT;\n  m_iparm(IPARM_END_TASK) = API_TASK_NUMFACT;\n  m_size = internal::convert_index<int>(mat.rows());\n  \n  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),\n               mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());\n  \n  // Check the returned error\n  if(m_iparm(IPARM_ERROR_NUMBER))\n  {\n    m_info = NumericalIssue;\n    m_factorizationIsOk = false;\n    m_isInitialized = false;\n  }\n  else\n  {\n    m_info = Success;\n    m_factorizationIsOk = true;\n    m_isInitialized = true;\n  }\n}\n\n/* Solve the system */\ntemplate<typename Base>\ntemplate<typename Rhs,typename Dest>\nbool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const\n{\n  eigen_assert(m_isInitialized && \"The matrix should be factorized first\");\n  EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,\n                     THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n  int rhs = 1;\n  \n  x = b; /* on return, x is overwritten by the computed solution */\n  \n  for (int i = 0; i < b.cols(); i++){\n    m_iparm[IPARM_START_TASK]          = API_TASK_SOLVE;\n    m_iparm[IPARM_END_TASK]            = API_TASK_REFINE;\n  \n    internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, internal::convert_index<int>(x.rows()), 0, 0, 0,\n                           m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());\n  }\n  \n  // Check the returned error\n  m_info = m_iparm(IPARM_ERROR_NUMBER)==0 ? Success : NumericalIssue;\n  \n  return m_iparm(IPARM_ERROR_NUMBER)==0;\n}\n\n/** \\ingroup PaStiXSupport_Module\n  * \\class PastixLU\n  * \\brief Sparse direct LU solver based on PaStiX library\n  * \n  * This class is used to solve the linear systems A.X = B with a supernodal LU \n  * factorization in the PaStiX library. The matrix A should be squared and nonsingular\n  * PaStiX requires that the matrix A has a symmetric structural pattern. \n  * This interface can symmetrize the input matrix otherwise. \n  * The vectors or matrices X and B can be either dense or sparse.\n  * \n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam IsStrSym Indicates if the input matrix has a symmetric pattern, default is false\n  * NOTE : Note that if the analysis and factorization phase are called separately, \n  * the input matrix will be symmetrized at each call, hence it is advised to \n  * symmetrize the matrix in a end-user program and set \\p IsStrSym to true\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SparseLU\n  * \n  */\ntemplate<typename _MatrixType, bool IsStrSym>\nclass PastixLU : public PastixBase< PastixLU<_MatrixType> >\n{\n  public:\n    typedef _MatrixType MatrixType;\n    typedef PastixBase<PastixLU<MatrixType> > Base;\n    typedef typename Base::ColSpMatrix ColSpMatrix;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    \n  public:\n    PastixLU() : Base()\n    {\n      init();\n    }\n    \n    explicit PastixLU(const MatrixType& matrix):Base()\n    {\n      init();\n      compute(matrix);\n    }\n    /** Compute the LU supernodal factorization of \\p matrix. \n      * iparm and dparm can be used to tune the PaStiX parameters. \n      * see the PaStiX user's manual\n      * \\sa analyzePattern() factorize()\n      */\n    void compute (const MatrixType& matrix)\n    {\n      m_structureIsUptodate = false;\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::compute(temp);\n    }\n    /** Compute the LU symbolic factorization of \\p matrix using its sparsity pattern. \n      * Several ordering methods can be used at this step. See the PaStiX user's manual. \n      * The result of this operation can be used with successive matrices having the same pattern as \\p matrix\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& matrix)\n    {\n      m_structureIsUptodate = false;\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::analyzePattern(temp);\n    }\n\n    /** Compute the LU supernodal factorization of \\p matrix\n      * WARNING The matrix \\p matrix should have the same structural pattern \n      * as the same used in the analysis phase.\n      * \\sa analyzePattern()\n      */ \n    void factorize(const MatrixType& matrix)\n    {\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::factorize(temp);\n    }\n  protected:\n    \n    void init()\n    {\n      m_structureIsUptodate = false;\n      m_iparm(IPARM_SYM) = API_SYM_NO;\n      m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;\n    }\n    \n    void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)\n    {\n      if(IsStrSym)\n        out = matrix;\n      else\n      {\n        if(!m_structureIsUptodate)\n        {\n          // update the transposed structure\n          m_transposedStructure = matrix.transpose();\n          \n          // Set the elements of the matrix to zero \n          for (Index j=0; j<m_transposedStructure.outerSize(); ++j) \n            for(typename ColSpMatrix::InnerIterator it(m_transposedStructure, j); it; ++it)\n              it.valueRef() = 0.0;\n\n          m_structureIsUptodate = true;\n        }\n        \n        out = m_transposedStructure + matrix;\n      }\n      internal::c_to_fortran_numbering(out);\n    }\n    \n    using Base::m_iparm;\n    using Base::m_dparm;\n    \n    ColSpMatrix m_transposedStructure;\n    bool m_structureIsUptodate;\n};\n\n/** \\ingroup PaStiXSupport_Module\n  * \\class PastixLLT\n  * \\brief A sparse direct supernodal Cholesky (LLT) factorization and solver based on the PaStiX library\n  * \n  * This class is used to solve the linear systems A.X = B via a LL^T supernodal Cholesky factorization\n  * available in the PaStiX library. The matrix A should be symmetric and positive definite\n  * WARNING Selfadjoint complex matrices are not supported in the current version of PaStiX\n  * The vectors or matrices X and B can be either dense or sparse\n  * \n  * \\tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SimplicialLLT\n  */\ntemplate<typename _MatrixType, int _UpLo>\nclass PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >\n{\n  public:\n    typedef _MatrixType MatrixType;\n    typedef PastixBase<PastixLLT<MatrixType, _UpLo> > Base;\n    typedef typename Base::ColSpMatrix ColSpMatrix;\n    \n  public:\n    enum { UpLo = _UpLo };\n    PastixLLT() : Base()\n    {\n      init();\n    }\n    \n    explicit PastixLLT(const MatrixType& matrix):Base()\n    {\n      init();\n      compute(matrix);\n    }\n\n    /** Compute the L factor of the LL^T supernodal factorization of \\p matrix \n      * \\sa analyzePattern() factorize()\n      */\n    void compute (const MatrixType& matrix)\n    {\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::compute(temp);\n    }\n\n     /** Compute the LL^T symbolic factorization of \\p matrix using its sparsity pattern\n      * The result of this operation can be used with successive matrices having the same pattern as \\p matrix\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& matrix)\n    {\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::analyzePattern(temp);\n    }\n      /** Compute the LL^T supernodal numerical factorization of \\p matrix \n        * \\sa analyzePattern()\n        */\n    void factorize(const MatrixType& matrix)\n    {\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::factorize(temp);\n    }\n  protected:\n    using Base::m_iparm;\n    \n    void init()\n    {\n      m_iparm(IPARM_SYM) = API_SYM_YES;\n      m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;\n    }\n    \n    void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)\n    {\n      out.resize(matrix.rows(), matrix.cols());\n      // Pastix supports only lower, column-major matrices \n      out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();\n      internal::c_to_fortran_numbering(out);\n    }\n};\n\n/** \\ingroup PaStiXSupport_Module\n  * \\class PastixLDLT\n  * \\brief A sparse direct supernodal Cholesky (LLT) factorization and solver based on the PaStiX library\n  * \n  * This class is used to solve the linear systems A.X = B via a LDL^T supernodal Cholesky factorization\n  * available in the PaStiX library. The matrix A should be symmetric and positive definite\n  * WARNING Selfadjoint complex matrices are not supported in the current version of PaStiX\n  * The vectors or matrices X and B can be either dense or sparse\n  * \n  * \\tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SimplicialLDLT\n  */\ntemplate<typename _MatrixType, int _UpLo>\nclass PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >\n{\n  public:\n    typedef _MatrixType MatrixType;\n    typedef PastixBase<PastixLDLT<MatrixType, _UpLo> > Base; \n    typedef typename Base::ColSpMatrix ColSpMatrix;\n    \n  public:\n    enum { UpLo = _UpLo };\n    PastixLDLT():Base()\n    {\n      init();\n    }\n    \n    explicit PastixLDLT(const MatrixType& matrix):Base()\n    {\n      init();\n      compute(matrix);\n    }\n\n    /** Compute the L and D factors of the LDL^T factorization of \\p matrix \n      * \\sa analyzePattern() factorize()\n      */\n    void compute (const MatrixType& matrix)\n    {\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::compute(temp);\n    }\n\n    /** Compute the LDL^T symbolic factorization of \\p matrix using its sparsity pattern\n      * The result of this operation can be used with successive matrices having the same pattern as \\p matrix\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& matrix)\n    { \n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::analyzePattern(temp);\n    }\n    /** Compute the LDL^T supernodal numerical factorization of \\p matrix \n      * \n      */\n    void factorize(const MatrixType& matrix)\n    {\n      ColSpMatrix temp;\n      grabMatrix(matrix, temp);\n      Base::factorize(temp);\n    }\n\n  protected:\n    using Base::m_iparm;\n    \n    void init()\n    {\n      m_iparm(IPARM_SYM) = API_SYM_YES;\n      m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;\n    }\n    \n    void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)\n    {\n      // Pastix supports only lower, column-major matrices \n      out.resize(matrix.rows(), matrix.cols());\n      out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();\n      internal::c_to_fortran_numbering(out);\n    }\n};\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/PardisoSupport/PardisoSupport.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to Intel(R) MKL PARDISO\n ********************************************************************************\n*/\n\n#ifndef EIGEN_PARDISOSUPPORT_H\n#define EIGEN_PARDISOSUPPORT_H\n\nnamespace Eigen { \n\ntemplate<typename _MatrixType> class PardisoLU;\ntemplate<typename _MatrixType, int Options=Upper> class PardisoLLT;\ntemplate<typename _MatrixType, int Options=Upper> class PardisoLDLT;\n\nnamespace internal\n{\n  template<typename IndexType>\n  struct pardiso_run_selector\n  {\n    static IndexType run( _MKL_DSS_HANDLE_t pt, IndexType maxfct, IndexType mnum, IndexType type, IndexType phase, IndexType n, void *a,\n                      IndexType *ia, IndexType *ja, IndexType *perm, IndexType nrhs, IndexType *iparm, IndexType msglvl, void *b, void *x)\n    {\n      IndexType error = 0;\n      ::pardiso(pt, &maxfct, &mnum, &type, &phase, &n, a, ia, ja, perm, &nrhs, iparm, &msglvl, b, x, &error);\n      return error;\n    }\n  };\n  template<>\n  struct pardiso_run_selector<long long int>\n  {\n    typedef long long int IndexType;\n    static IndexType run( _MKL_DSS_HANDLE_t pt, IndexType maxfct, IndexType mnum, IndexType type, IndexType phase, IndexType n, void *a,\n                      IndexType *ia, IndexType *ja, IndexType *perm, IndexType nrhs, IndexType *iparm, IndexType msglvl, void *b, void *x)\n    {\n      IndexType error = 0;\n      ::pardiso_64(pt, &maxfct, &mnum, &type, &phase, &n, a, ia, ja, perm, &nrhs, iparm, &msglvl, b, x, &error);\n      return error;\n    }\n  };\n\n  template<class Pardiso> struct pardiso_traits;\n\n  template<typename _MatrixType>\n  struct pardiso_traits< PardisoLU<_MatrixType> >\n  {\n    typedef _MatrixType MatrixType;\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef typename _MatrixType::StorageIndex StorageIndex;\n  };\n\n  template<typename _MatrixType, int Options>\n  struct pardiso_traits< PardisoLLT<_MatrixType, Options> >\n  {\n    typedef _MatrixType MatrixType;\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef typename _MatrixType::StorageIndex StorageIndex;\n  };\n\n  template<typename _MatrixType, int Options>\n  struct pardiso_traits< PardisoLDLT<_MatrixType, Options> >\n  {\n    typedef _MatrixType MatrixType;\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef typename _MatrixType::StorageIndex StorageIndex;    \n  };\n\n} // end namespace internal\n\ntemplate<class Derived>\nclass PardisoImpl : public SparseSolverBase<Derived>\n{\n  protected:\n    typedef SparseSolverBase<Derived> Base;\n    using Base::derived;\n    using Base::m_isInitialized;\n    \n    typedef internal::pardiso_traits<Derived> Traits;\n  public:\n    using Base::_solve_impl;\n    \n    typedef typename Traits::MatrixType MatrixType;\n    typedef typename Traits::Scalar Scalar;\n    typedef typename Traits::RealScalar RealScalar;\n    typedef typename Traits::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,RowMajor,StorageIndex> SparseMatrixType;\n    typedef Matrix<Scalar,Dynamic,1> VectorType;\n    typedef Matrix<StorageIndex, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;\n    typedef Matrix<StorageIndex, MatrixType::RowsAtCompileTime, 1> IntColVectorType;\n    typedef Array<StorageIndex,64,1,DontAlign> ParameterType;\n    enum {\n      ScalarIsComplex = NumTraits<Scalar>::IsComplex,\n      ColsAtCompileTime = Dynamic,\n      MaxColsAtCompileTime = Dynamic\n    };\n\n    PardisoImpl()\n      : m_analysisIsOk(false), m_factorizationIsOk(false)\n    {\n      eigen_assert((sizeof(StorageIndex) >= sizeof(_INTEGER_t) && sizeof(StorageIndex) <= 8) && \"Non-supported index type\");\n      m_iparm.setZero();\n      m_msglvl = 0; // No output\n      m_isInitialized = false;\n    }\n\n    ~PardisoImpl()\n    {\n      pardisoRelease();\n    }\n\n    inline Index cols() const { return m_size; }\n    inline Index rows() const { return m_size; }\n  \n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n\n    /** \\warning for advanced usage only.\n      * \\returns a reference to the parameter array controlling PARDISO.\n      * See the PARDISO manual to know how to use it. */\n    ParameterType& pardisoParameterArray()\n    {\n      return m_iparm;\n    }\n    \n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      * \n      * \\sa factorize()\n      */\n    Derived& analyzePattern(const MatrixType& matrix);\n    \n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    Derived& factorize(const MatrixType& matrix);\n\n    Derived& compute(const MatrixType& matrix);\n\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;\n\n  protected:\n    void pardisoRelease()\n    {\n      if(m_isInitialized) // Factorization ran at least once\n      {\n        internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, internal::convert_index<StorageIndex>(m_size),0, 0, 0, m_perm.data(), 0,\n                                                          m_iparm.data(), m_msglvl, NULL, NULL);\n        m_isInitialized = false;\n      }\n    }\n\n    void pardisoInit(int type)\n    {\n      m_type = type;\n      bool symmetric = std::abs(m_type) < 10;\n      m_iparm[0] = 1;   // No solver default\n      m_iparm[1] = 2;   // use Metis for the ordering\n      m_iparm[2] = 0;   // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??)\n      m_iparm[3] = 0;   // No iterative-direct algorithm\n      m_iparm[4] = 0;   // No user fill-in reducing permutation\n      m_iparm[5] = 0;   // Write solution into x, b is left unchanged\n      m_iparm[6] = 0;   // Not in use\n      m_iparm[7] = 2;   // Max numbers of iterative refinement steps\n      m_iparm[8] = 0;   // Not in use\n      m_iparm[9] = 13;  // Perturb the pivot elements with 1E-13\n      m_iparm[10] = symmetric ? 0 : 1; // Use nonsymmetric permutation and scaling MPS\n      m_iparm[11] = 0;  // Not in use\n      m_iparm[12] = symmetric ? 0 : 1;  // Maximum weighted matching algorithm is switched-off (default for symmetric).\n                                        // Try m_iparm[12] = 1 in case of inappropriate accuracy\n      m_iparm[13] = 0;  // Output: Number of perturbed pivots\n      m_iparm[14] = 0;  // Not in use\n      m_iparm[15] = 0;  // Not in use\n      m_iparm[16] = 0;  // Not in use\n      m_iparm[17] = -1; // Output: Number of nonzeros in the factor LU\n      m_iparm[18] = -1; // Output: Mflops for LU factorization\n      m_iparm[19] = 0;  // Output: Numbers of CG Iterations\n      \n      m_iparm[20] = 0;  // 1x1 pivoting\n      m_iparm[26] = 0;  // No matrix checker\n      m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;\n      m_iparm[34] = 1;  // C indexing\n      m_iparm[36] = 0;  // CSR\n      m_iparm[59] = 0;  // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core\n      \n      memset(m_pt, 0, sizeof(m_pt));\n    }\n\n  protected:\n    // cached data to reduce reallocation, etc.\n    \n    void manageErrorCode(Index error) const\n    {\n      switch(error)\n      {\n        case 0:\n          m_info = Success;\n          break;\n        case -4:\n        case -7:\n          m_info = NumericalIssue;\n          break;\n        default:\n          m_info = InvalidInput;\n      }\n    }\n\n    mutable SparseMatrixType m_matrix;\n    mutable ComputationInfo m_info;\n    bool m_analysisIsOk, m_factorizationIsOk;\n    StorageIndex m_type, m_msglvl;\n    mutable void *m_pt[64];\n    mutable ParameterType m_iparm;\n    mutable IntColVectorType m_perm;\n    Index m_size;\n    \n};\n\ntemplate<class Derived>\nDerived& PardisoImpl<Derived>::compute(const MatrixType& a)\n{\n  m_size = a.rows();\n  eigen_assert(a.rows() == a.cols());\n\n  pardisoRelease();\n  m_perm.setZero(m_size);\n  derived().getMatrix(a);\n  \n  Index error;\n  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 12, internal::convert_index<StorageIndex>(m_size),\n                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),\n                                                            m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);\n  manageErrorCode(error);\n  m_analysisIsOk = true;\n  m_factorizationIsOk = true;\n  m_isInitialized = true;\n  return derived();\n}\n\ntemplate<class Derived>\nDerived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)\n{\n  m_size = a.rows();\n  eigen_assert(m_size == a.cols());\n\n  pardisoRelease();\n  m_perm.setZero(m_size);\n  derived().getMatrix(a);\n  \n  Index error;\n  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 11, internal::convert_index<StorageIndex>(m_size),\n                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),\n                                                            m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);\n  \n  manageErrorCode(error);\n  m_analysisIsOk = true;\n  m_factorizationIsOk = false;\n  m_isInitialized = true;\n  return derived();\n}\n\ntemplate<class Derived>\nDerived& PardisoImpl<Derived>::factorize(const MatrixType& a)\n{\n  eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n  eigen_assert(m_size == a.rows() && m_size == a.cols());\n  \n  derived().getMatrix(a);\n\n  Index error;\n  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 22, internal::convert_index<StorageIndex>(m_size),\n                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),\n                                                            m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);\n  \n  manageErrorCode(error);\n  m_factorizationIsOk = true;\n  return derived();\n}\n\ntemplate<class Derived>\ntemplate<typename BDerived,typename XDerived>\nvoid PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const\n{\n  if(m_iparm[0] == 0) // Factorization was not computed\n  {\n    m_info = InvalidInput;\n    return;\n  }\n\n  //Index n = m_matrix.rows();\n  Index nrhs = Index(b.cols());\n  eigen_assert(m_size==b.rows());\n  eigen_assert(((MatrixBase<BDerived>::Flags & RowMajorBit) == 0 || nrhs == 1) && \"Row-major right hand sides are not supported\");\n  eigen_assert(((MatrixBase<XDerived>::Flags & RowMajorBit) == 0 || nrhs == 1) && \"Row-major matrices of unknowns are not supported\");\n  eigen_assert(((nrhs == 1) || b.outerStride() == b.rows()));\n\n\n//  switch (transposed) {\n//    case SvNoTrans    : m_iparm[11] = 0 ; break;\n//    case SvTranspose  : m_iparm[11] = 2 ; break;\n//    case SvAdjoint    : m_iparm[11] = 1 ; break;\n//    default:\n//      //std::cerr << \"Eigen: transposition  option \\\"\" << transposed << \"\\\" not supported by the PARDISO backend\\n\";\n//      m_iparm[11] = 0;\n//  }\n\n  Scalar* rhs_ptr = const_cast<Scalar*>(b.derived().data());\n  Matrix<Scalar,Dynamic,Dynamic,ColMajor> tmp;\n  \n  // Pardiso cannot solve in-place\n  if(rhs_ptr == x.derived().data())\n  {\n    tmp = b;\n    rhs_ptr = tmp.data();\n  }\n  \n  Index error;\n  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 33, internal::convert_index<StorageIndex>(m_size),\n                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),\n                                                            m_perm.data(), internal::convert_index<StorageIndex>(nrhs), m_iparm.data(), m_msglvl,\n                                                            rhs_ptr, x.derived().data());\n\n  manageErrorCode(error);\n}\n\n\n/** \\ingroup PardisoSupport_Module\n  * \\class PardisoLU\n  * \\brief A sparse direct LU factorization and solver based on the PARDISO library\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a direct LU factorization\n  * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible.\n  * The vectors or matrices X and B can be either dense or sparse.\n  *\n  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:\n  * \\code solver.pardisoParameterArray()[59] = 1; \\endcode\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SparseLU\n  */\ntemplate<typename MatrixType>\nclass PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >\n{\n  protected:\n    typedef PardisoImpl<PardisoLU> Base;\n    using Base::pardisoInit;\n    using Base::m_matrix;\n    friend class PardisoImpl< PardisoLU<MatrixType> >;\n\n  public:\n\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::RealScalar RealScalar;\n\n    using Base::compute;\n    using Base::solve;\n\n    PardisoLU()\n      : Base()\n    {\n      pardisoInit(Base::ScalarIsComplex ? 13 : 11);\n    }\n\n    explicit PardisoLU(const MatrixType& matrix)\n      : Base()\n    {\n      pardisoInit(Base::ScalarIsComplex ? 13 : 11);\n      compute(matrix);\n    }\n  protected:\n    void getMatrix(const MatrixType& matrix)\n    {\n      m_matrix = matrix;\n      m_matrix.makeCompressed();\n    }\n};\n\n/** \\ingroup PardisoSupport_Module\n  * \\class PardisoLLT\n  * \\brief A sparse direct Cholesky (LLT) factorization and solver based on the PARDISO library\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a LL^T Cholesky factorization\n  * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite.\n  * The vectors or matrices X and B can be either dense or sparse.\n  *\n  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:\n  * \\code solver.pardisoParameterArray()[59] = 1; \\endcode\n  *\n  * \\tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used.\n  *         Upper|Lower can be used to tell both triangular parts can be used as input.\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SimplicialLLT\n  */\ntemplate<typename MatrixType, int _UpLo>\nclass PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >\n{\n  protected:\n    typedef PardisoImpl< PardisoLLT<MatrixType,_UpLo> > Base;\n    using Base::pardisoInit;\n    using Base::m_matrix;\n    friend class PardisoImpl< PardisoLLT<MatrixType,_UpLo> >;\n\n  public:\n\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::RealScalar RealScalar;\n    typedef typename Base::StorageIndex StorageIndex;\n    enum { UpLo = _UpLo };\n    using Base::compute;\n\n    PardisoLLT()\n      : Base()\n    {\n      pardisoInit(Base::ScalarIsComplex ? 4 : 2);\n    }\n\n    explicit PardisoLLT(const MatrixType& matrix)\n      : Base()\n    {\n      pardisoInit(Base::ScalarIsComplex ? 4 : 2);\n      compute(matrix);\n    }\n    \n  protected:\n    \n    void getMatrix(const MatrixType& matrix)\n    {\n      // PARDISO supports only upper, row-major matrices\n      PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;\n      m_matrix.resize(matrix.rows(), matrix.cols());\n      m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);\n      m_matrix.makeCompressed();\n    }\n};\n\n/** \\ingroup PardisoSupport_Module\n  * \\class PardisoLDLT\n  * \\brief A sparse direct Cholesky (LDLT) factorization and solver based on the PARDISO library\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a LDL^T Cholesky factorization\n  * using the Intel MKL PARDISO library. The sparse matrix A is assumed to be selfajoint and positive definite.\n  * For complex matrices, A can also be symmetric only, see the \\a Options template parameter.\n  * The vectors or matrices X and B can be either dense or sparse.\n  *\n  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:\n  * \\code solver.pardisoParameterArray()[59] = 1; \\endcode\n  *\n  * \\tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used.\n  *         Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix.\n  *         Upper|Lower can be used to tell both triangular parts can be used as input.\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SimplicialLDLT\n  */\ntemplate<typename MatrixType, int Options>\nclass PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >\n{\n  protected:\n    typedef PardisoImpl< PardisoLDLT<MatrixType,Options> > Base;\n    using Base::pardisoInit;\n    using Base::m_matrix;\n    friend class PardisoImpl< PardisoLDLT<MatrixType,Options> >;\n\n  public:\n\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::RealScalar RealScalar;\n    typedef typename Base::StorageIndex StorageIndex;\n    using Base::compute;\n    enum { UpLo = Options&(Upper|Lower) };\n\n    PardisoLDLT()\n      : Base()\n    {\n      pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2);\n    }\n\n    explicit PardisoLDLT(const MatrixType& matrix)\n      : Base()\n    {\n      pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2);\n      compute(matrix);\n    }\n    \n    void getMatrix(const MatrixType& matrix)\n    {\n      // PARDISO supports only upper, row-major matrices\n      PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;\n      m_matrix.resize(matrix.rows(), matrix.cols());\n      m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);\n      m_matrix.makeCompressed();\n    }\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_PARDISOSUPPORT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/QR/ColPivHouseholderQR.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_H\n#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >\n : traits<_MatrixType>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n\n} // end namespace internal\n\n/** \\ingroup QR_Module\n  *\n  * \\class ColPivHouseholderQR\n  *\n  * \\brief Householder rank-revealing QR decomposition of a matrix with column-pivoting\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the QR decomposition\n  *\n  * This class performs a rank-revealing QR decomposition of a matrix \\b A into matrices \\b P, \\b Q and \\b R\n  * such that\n  * \\f[\n  *  \\mathbf{A} \\, \\mathbf{P} = \\mathbf{Q} \\, \\mathbf{R}\n  * \\f]\n  * by using Householder transformations. Here, \\b P is a permutation matrix, \\b Q a unitary matrix and \\b R an\n  * upper triangular matrix.\n  *\n  * This decomposition performs column pivoting in order to be rank-revealing and improve\n  * numerical stability. It is slower than HouseholderQR, and faster than FullPivHouseholderQR.\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  * \n  * \\sa MatrixBase::colPivHouseholderQr()\n  */\ntemplate<typename _MatrixType> class ColPivHouseholderQR\n        : public SolverBase<ColPivHouseholderQR<_MatrixType> >\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<ColPivHouseholderQR> Base;\n    friend class SolverBase<ColPivHouseholderQR>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(ColPivHouseholderQR)\n    enum {\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;\n    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;\n    typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;\n    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;\n    typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;\n    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;\n    typedef typename MatrixType::PlainObject PlainObject;\n\n  private:\n\n    typedef typename PermutationType::StorageIndex PermIndexType;\n\n  public:\n\n    /**\n    * \\brief Default Constructor.\n    *\n    * The default constructor is useful in cases in which the user intends to\n    * perform decompositions via ColPivHouseholderQR::compute(const MatrixType&).\n    */\n    ColPivHouseholderQR()\n      : m_qr(),\n        m_hCoeffs(),\n        m_colsPermutation(),\n        m_colsTranspositions(),\n        m_temp(),\n        m_colNormsUpdated(),\n        m_colNormsDirect(),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false) {}\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa ColPivHouseholderQR()\n      */\n    ColPivHouseholderQR(Index rows, Index cols)\n      : m_qr(rows, cols),\n        m_hCoeffs((std::min)(rows,cols)),\n        m_colsPermutation(PermIndexType(cols)),\n        m_colsTranspositions(cols),\n        m_temp(cols),\n        m_colNormsUpdated(cols),\n        m_colNormsDirect(cols),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false) {}\n\n    /** \\brief Constructs a QR factorization from a given matrix\n      *\n      * This constructor computes the QR factorization of the matrix \\a matrix by calling\n      * the method compute(). It is a short cut for:\n      *\n      * \\code\n      * ColPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());\n      * qr.compute(matrix);\n      * \\endcode\n      *\n      * \\sa compute()\n      */\n    template<typename InputType>\n    explicit ColPivHouseholderQR(const EigenBase<InputType>& matrix)\n      : m_qr(matrix.rows(), matrix.cols()),\n        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),\n        m_colsPermutation(PermIndexType(matrix.cols())),\n        m_colsTranspositions(matrix.cols()),\n        m_temp(matrix.cols()),\n        m_colNormsUpdated(matrix.cols()),\n        m_colNormsDirect(matrix.cols()),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false)\n    {\n      compute(matrix.derived());\n    }\n\n    /** \\brief Constructs a QR factorization from a given matrix\n      *\n      * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when \\c MatrixType is a Eigen::Ref.\n      *\n      * \\sa ColPivHouseholderQR(const EigenBase&)\n      */\n    template<typename InputType>\n    explicit ColPivHouseholderQR(EigenBase<InputType>& matrix)\n      : m_qr(matrix.derived()),\n        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),\n        m_colsPermutation(PermIndexType(matrix.cols())),\n        m_colsTranspositions(matrix.cols()),\n        m_temp(matrix.cols()),\n        m_colNormsUpdated(matrix.cols()),\n        m_colNormsDirect(matrix.cols()),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false)\n    {\n      computeInPlace();\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which\n      * *this is the QR decomposition, if any exists.\n      *\n      * \\param b the right-hand-side of the equation to solve.\n      *\n      * \\returns a solution.\n      *\n      * \\note_about_checking_solutions\n      *\n      * \\note_about_arbitrary_choice_of_solution\n      *\n      * Example: \\include ColPivHouseholderQR_solve.cpp\n      * Output: \\verbinclude ColPivHouseholderQR_solve.out\n      */\n    template<typename Rhs>\n    inline const Solve<ColPivHouseholderQR, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    HouseholderSequenceType householderQ() const;\n    HouseholderSequenceType matrixQ() const\n    {\n      return householderQ();\n    }\n\n    /** \\returns a reference to the matrix where the Householder QR decomposition is stored\n      */\n    const MatrixType& matrixQR() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return m_qr;\n    }\n\n    /** \\returns a reference to the matrix where the result Householder QR is stored\n     * \\warning The strict lower part of this matrix contains internal values.\n     * Only the upper triangular part should be referenced. To get it, use\n     * \\code matrixR().template triangularView<Upper>() \\endcode\n     * For rank-deficient matrices, use\n     * \\code\n     * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()\n     * \\endcode\n     */\n    const MatrixType& matrixR() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return m_qr;\n    }\n\n    template<typename InputType>\n    ColPivHouseholderQR& compute(const EigenBase<InputType>& matrix);\n\n    /** \\returns a const reference to the column permutation matrix */\n    const PermutationType& colsPermutation() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return m_colsPermutation;\n    }\n\n    /** \\returns the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the QR decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\warning a determinant can be very big or small, so for matrices\n      * of large enough dimension, there is a risk of overflow/underflow.\n      * One way to work around that is to use logAbsDeterminant() instead.\n      *\n      * \\sa logAbsDeterminant(), MatrixBase::determinant()\n      */\n    typename MatrixType::RealScalar absDeterminant() const;\n\n    /** \\returns the natural log of the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the QR decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\note This method is useful to work around the risk of overflow/underflow that's inherent\n      * to determinant computation.\n      *\n      * \\sa absDeterminant(), MatrixBase::determinant()\n      */\n    typename MatrixType::RealScalar logAbsDeterminant() const;\n\n    /** \\returns the rank of the matrix of which *this is the QR decomposition.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline Index rank() const\n    {\n      using std::abs;\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();\n      Index result = 0;\n      for(Index i = 0; i < m_nonzero_pivots; ++i)\n        result += (abs(m_qr.coeff(i,i)) > premultiplied_threshold);\n      return result;\n    }\n\n    /** \\returns the dimension of the kernel of the matrix of which *this is the QR decomposition.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline Index dimensionOfKernel() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return cols() - rank();\n    }\n\n    /** \\returns true if the matrix of which *this is the QR decomposition represents an injective\n      *          linear map, i.e. has trivial kernel; false otherwise.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isInjective() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return rank() == cols();\n    }\n\n    /** \\returns true if the matrix of which *this is the QR decomposition represents a surjective\n      *          linear map; false otherwise.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isSurjective() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return rank() == rows();\n    }\n\n    /** \\returns true if the matrix of which *this is the QR decomposition is invertible.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isInvertible() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return isInjective() && isSurjective();\n    }\n\n    /** \\returns the inverse of the matrix of which *this is the QR decomposition.\n      *\n      * \\note If this matrix is not invertible, the returned matrix has undefined coefficients.\n      *       Use isInvertible() to first determine whether this matrix is invertible.\n      */\n    inline const Inverse<ColPivHouseholderQR> inverse() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return Inverse<ColPivHouseholderQR>(*this);\n    }\n\n    inline Index rows() const { return m_qr.rows(); }\n    inline Index cols() const { return m_qr.cols(); }\n\n    /** \\returns a const reference to the vector of Householder coefficients used to represent the factor \\c Q.\n      *\n      * For advanced uses only.\n      */\n    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }\n\n    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),\n      * who need to determine when pivots are to be considered nonzero. This is not used for the\n      * QR decomposition itself.\n      *\n      * When it needs to get the threshold value, Eigen calls threshold(). By default, this\n      * uses a formula to automatically determine a reasonable threshold.\n      * Once you have called the present method setThreshold(const RealScalar&),\n      * your value is used instead.\n      *\n      * \\param threshold The new value to use as the threshold.\n      *\n      * A pivot will be considered nonzero if its absolute value is strictly greater than\n      *  \\f$ \\vert pivot \\vert \\leqslant threshold \\times \\vert maxpivot \\vert \\f$\n      * where maxpivot is the biggest pivot.\n      *\n      * If you want to come back to the default behavior, call setThreshold(Default_t)\n      */\n    ColPivHouseholderQR& setThreshold(const RealScalar& threshold)\n    {\n      m_usePrescribedThreshold = true;\n      m_prescribedThreshold = threshold;\n      return *this;\n    }\n\n    /** Allows to come back to the default behavior, letting Eigen use its default formula for\n      * determining the threshold.\n      *\n      * You should pass the special object Eigen::Default as parameter here.\n      * \\code qr.setThreshold(Eigen::Default); \\endcode\n      *\n      * See the documentation of setThreshold(const RealScalar&).\n      */\n    ColPivHouseholderQR& setThreshold(Default_t)\n    {\n      m_usePrescribedThreshold = false;\n      return *this;\n    }\n\n    /** Returns the threshold that will be used by certain methods such as rank().\n      *\n      * See the documentation of setThreshold(const RealScalar&).\n      */\n    RealScalar threshold() const\n    {\n      eigen_assert(m_isInitialized || m_usePrescribedThreshold);\n      return m_usePrescribedThreshold ? m_prescribedThreshold\n      // this formula comes from experimenting (see \"LU precision tuning\" thread on the list)\n      // and turns out to be identical to Higham's formula used already in LDLt.\n                                      : NumTraits<Scalar>::epsilon() * RealScalar(m_qr.diagonalSize());\n    }\n\n    /** \\returns the number of nonzero pivots in the QR decomposition.\n      * Here nonzero is meant in the exact sense, not in a fuzzy sense.\n      * So that notion isn't really intrinsically interesting, but it is\n      * still useful when implementing algorithms.\n      *\n      * \\sa rank()\n      */\n    inline Index nonzeroPivots() const\n    {\n      eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n      return m_nonzero_pivots;\n    }\n\n    /** \\returns the absolute value of the biggest pivot, i.e. the biggest\n      *          diagonal coefficient of R.\n      */\n    RealScalar maxPivot() const { return m_maxpivot; }\n\n    /** \\brief Reports whether the QR factorization was successful.\n      *\n      * \\note This function always returns \\c Success. It is provided for compatibility\n      * with other factorization routines.\n      * \\returns \\c Success\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return Success;\n    }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n    #endif\n\n  protected:\n\n    friend class CompleteOrthogonalDecomposition<MatrixType>;\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    void computeInPlace();\n\n    MatrixType m_qr;\n    HCoeffsType m_hCoeffs;\n    PermutationType m_colsPermutation;\n    IntRowVectorType m_colsTranspositions;\n    RowVectorType m_temp;\n    RealRowVectorType m_colNormsUpdated;\n    RealRowVectorType m_colNormsDirect;\n    bool m_isInitialized, m_usePrescribedThreshold;\n    RealScalar m_prescribedThreshold, m_maxpivot;\n    Index m_nonzero_pivots;\n    Index m_det_pq;\n};\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::absDeterminant() const\n{\n  using std::abs;\n  eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n  eigen_assert(m_qr.rows() == m_qr.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return abs(m_qr.diagonal().prod());\n}\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDeterminant() const\n{\n  eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n  eigen_assert(m_qr.rows() == m_qr.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return m_qr.diagonal().cwiseAbs().array().log().sum();\n}\n\n/** Performs the QR factorization of the given matrix \\a matrix. The result of\n  * the factorization is stored into \\c *this, and a reference to \\c *this\n  * is returned.\n  *\n  * \\sa class ColPivHouseholderQR, ColPivHouseholderQR(const MatrixType&)\n  */\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const EigenBase<InputType>& matrix)\n{\n  m_qr = matrix.derived();\n  computeInPlace();\n  return *this;\n}\n\ntemplate<typename MatrixType>\nvoid ColPivHouseholderQR<MatrixType>::computeInPlace()\n{\n  check_template_parameters();\n\n  // the column permutation is stored as int indices, so just to be sure:\n  eigen_assert(m_qr.cols()<=NumTraits<int>::highest());\n\n  using std::abs;\n\n  Index rows = m_qr.rows();\n  Index cols = m_qr.cols();\n  Index size = m_qr.diagonalSize();\n\n  m_hCoeffs.resize(size);\n\n  m_temp.resize(cols);\n\n  m_colsTranspositions.resize(m_qr.cols());\n  Index number_of_transpositions = 0;\n\n  m_colNormsUpdated.resize(cols);\n  m_colNormsDirect.resize(cols);\n  for (Index k = 0; k < cols; ++k) {\n    // colNormsDirect(k) caches the most recent directly computed norm of\n    // column k.\n    m_colNormsDirect.coeffRef(k) = m_qr.col(k).norm();\n    m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);\n  }\n\n  RealScalar threshold_helper =  numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);\n  RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());\n\n  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)\n  m_maxpivot = RealScalar(0);\n\n  for(Index k = 0; k < size; ++k)\n  {\n    // first, we look up in our table m_colNormsUpdated which column has the biggest norm\n    Index biggest_col_index;\n    RealScalar biggest_col_sq_norm = numext::abs2(m_colNormsUpdated.tail(cols-k).maxCoeff(&biggest_col_index));\n    biggest_col_index += k;\n\n    // Track the number of meaningful pivots but do not stop the decomposition to make\n    // sure that the initial matrix is properly reproduced. See bug 941.\n    if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))\n      m_nonzero_pivots = k;\n\n    // apply the transposition to the columns\n    m_colsTranspositions.coeffRef(k) = biggest_col_index;\n    if(k != biggest_col_index) {\n      m_qr.col(k).swap(m_qr.col(biggest_col_index));\n      std::swap(m_colNormsUpdated.coeffRef(k), m_colNormsUpdated.coeffRef(biggest_col_index));\n      std::swap(m_colNormsDirect.coeffRef(k), m_colNormsDirect.coeffRef(biggest_col_index));\n      ++number_of_transpositions;\n    }\n\n    // generate the householder vector, store it below the diagonal\n    RealScalar beta;\n    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);\n\n    // apply the householder transformation to the diagonal coefficient\n    m_qr.coeffRef(k,k) = beta;\n\n    // remember the maximum absolute value of diagonal coefficients\n    if(abs(beta) > m_maxpivot) m_maxpivot = abs(beta);\n\n    // apply the householder transformation\n    m_qr.bottomRightCorner(rows-k, cols-k-1)\n        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1));\n\n    // update our table of norms of the columns\n    for (Index j = k + 1; j < cols; ++j) {\n      // The following implements the stable norm downgrade step discussed in\n      // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf\n      // and used in LAPACK routines xGEQPF and xGEQP3.\n      // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html\n      if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {\n        RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);\n        temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);\n        temp = temp <  RealScalar(0) ? RealScalar(0) : temp;\n        RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /\n                                                           m_colNormsDirect.coeffRef(j));\n        if (temp2 <= norm_downdate_threshold) {\n          // The updated norm has become too inaccurate so re-compute the column\n          // norm directly.\n          m_colNormsDirect.coeffRef(j) = m_qr.col(j).tail(rows - k - 1).norm();\n          m_colNormsUpdated.coeffRef(j) = m_colNormsDirect.coeffRef(j);\n        } else {\n          m_colNormsUpdated.coeffRef(j) *= numext::sqrt(temp);\n        }\n      }\n    }\n  }\n\n  m_colsPermutation.setIdentity(PermIndexType(cols));\n  for(PermIndexType k = 0; k < size/*m_nonzero_pivots*/; ++k)\n    m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));\n\n  m_det_pq = (number_of_transpositions%2) ? -1 : 1;\n  m_isInitialized = true;\n}\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename _MatrixType>\ntemplate<typename RhsType, typename DstType>\nvoid ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  const Index nonzero_pivots = nonzeroPivots();\n\n  if(nonzero_pivots == 0)\n  {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(rhs);\n\n  c.applyOnTheLeft(householderQ().setLength(nonzero_pivots).adjoint() );\n\n  m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)\n      .template triangularView<Upper>()\n      .solveInPlace(c.topRows(nonzero_pivots));\n\n  for(Index i = 0; i < nonzero_pivots; ++i) dst.row(m_colsPermutation.indices().coeff(i)) = c.row(i);\n  for(Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero();\n}\n\ntemplate<typename _MatrixType>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid ColPivHouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  const Index nonzero_pivots = nonzeroPivots();\n\n  if(nonzero_pivots == 0)\n  {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(m_colsPermutation.transpose()*rhs);\n\n  m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)\n        .template triangularView<Upper>()\n        .transpose().template conjugateIf<Conjugate>()\n        .solveInPlace(c.topRows(nonzero_pivots));\n\n  dst.topRows(nonzero_pivots) = c.topRows(nonzero_pivots);\n  dst.bottomRows(rows()-nonzero_pivots).setZero();\n\n  dst.applyOnTheLeft(householderQ().setLength(nonzero_pivots).template conjugateIf<!Conjugate>() );\n}\n#endif\n\nnamespace internal {\n\ntemplate<typename DstXprType, typename MatrixType>\nstruct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename ColPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>\n{\n  typedef ColPivHouseholderQR<MatrixType> QrType;\n  typedef Inverse<QrType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)\n  {\n    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));\n  }\n};\n\n} // end namespace internal\n\n/** \\returns the matrix Q as a sequence of householder transformations.\n  * You can extract the meaningful part only by using:\n  * \\code qr.householderQ().setLength(qr.nonzeroPivots()) \\endcode*/\ntemplate<typename MatrixType>\ntypename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHouseholderQR<MatrixType>\n  ::householderQ() const\n{\n  eigen_assert(m_isInitialized && \"ColPivHouseholderQR is not initialized.\");\n  return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());\n}\n\n/** \\return the column-pivoting Householder QR decomposition of \\c *this.\n  *\n  * \\sa class ColPivHouseholderQR\n  */\ntemplate<typename Derived>\nconst ColPivHouseholderQR<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::colPivHouseholderQr() const\n{\n  return ColPivHouseholderQR<PlainObject>(eval());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_COLPIVOTINGHOUSEHOLDERQR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *    Householder QR decomposition of a matrix with column pivoting based on\n *    LAPACKE_?geqp3 function.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H\n#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H\n\nnamespace Eigen { \n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_QR_COLPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \\\ntemplate<> template<typename InputType> inline \\\nColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >& \\\nColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >::compute( \\\n              const EigenBase<InputType>& matrix) \\\n\\\n{ \\\n  using std::abs; \\\n  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \\\n  typedef MatrixType::RealScalar RealScalar; \\\n  Index rows = matrix.rows();\\\n  Index cols = matrix.cols();\\\n\\\n  m_qr = matrix;\\\n  Index size = m_qr.diagonalSize();\\\n  m_hCoeffs.resize(size);\\\n\\\n  m_colsTranspositions.resize(cols);\\\n  /*Index number_of_transpositions = 0;*/ \\\n\\\n  m_nonzero_pivots = 0; \\\n  m_maxpivot = RealScalar(0);\\\n  m_colsPermutation.resize(cols); \\\n  m_colsPermutation.indices().setZero(); \\\n\\\n  lapack_int lda = internal::convert_index<lapack_int,Index>(m_qr.outerStride()); \\\n  lapack_int matrix_order = LAPACKE_COLROW; \\\n  LAPACKE_##LAPACKE_PREFIX##geqp3( matrix_order, internal::convert_index<lapack_int,Index>(rows), internal::convert_index<lapack_int,Index>(cols), \\\n                              (LAPACKE_TYPE*)m_qr.data(), lda, (lapack_int*)m_colsPermutation.indices().data(), (LAPACKE_TYPE*)m_hCoeffs.data()); \\\n  m_isInitialized = true; \\\n  m_maxpivot=m_qr.diagonal().cwiseAbs().maxCoeff(); \\\n  m_hCoeffs.adjointInPlace(); \\\n  RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); \\\n  lapack_int *perm = m_colsPermutation.indices().data(); \\\n  for(Index i=0;i<size;i++) { \\\n    m_nonzero_pivots += (abs(m_qr.coeff(i,i)) > premultiplied_threshold);\\\n  } \\\n  for(Index i=0;i<cols;i++) perm[i]--;\\\n\\\n  /*m_det_pq = (number_of_transpositions%2) ? -1 : 1;  // TODO: It's not needed now; fix upon availability in Eigen */ \\\n\\\n  return *this; \\\n}\n\nEIGEN_LAPACKE_QR_COLPIV(double,   double,        d, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_QR_COLPIV(float,    float,         s, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_QR_COLPIV(dcomplex, lapack_complex_double, z, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_QR_COLPIV(scomplex, lapack_complex_float,  c, ColMajor, LAPACK_COL_MAJOR)\n\nEIGEN_LAPACKE_QR_COLPIV(double,   double,        d, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_QR_COLPIV(float,    float,         s, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_QR_COLPIV(dcomplex, lapack_complex_double, z, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_QR_COLPIV(scomplex, lapack_complex_float,  c, RowMajor, LAPACK_ROW_MAJOR)\n\n} // end namespace Eigen\n\n#endif // EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/QR/CompleteOrthogonalDecomposition.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H\n#define EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate <typename _MatrixType>\nstruct traits<CompleteOrthogonalDecomposition<_MatrixType> >\n    : traits<_MatrixType> {\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n\n}  // end namespace internal\n\n/** \\ingroup QR_Module\n  *\n  * \\class CompleteOrthogonalDecomposition\n  *\n  * \\brief Complete orthogonal decomposition (COD) of a matrix.\n  *\n  * \\param MatrixType the type of the matrix of which we are computing the COD.\n  *\n  * This class performs a rank-revealing complete orthogonal decomposition of a\n  * matrix  \\b A into matrices \\b P, \\b Q, \\b T, and \\b Z such that\n  * \\f[\n  *  \\mathbf{A} \\, \\mathbf{P} = \\mathbf{Q} \\,\n  *                     \\begin{bmatrix} \\mathbf{T} &  \\mathbf{0} \\\\\n  *                                     \\mathbf{0} & \\mathbf{0} \\end{bmatrix} \\, \\mathbf{Z}\n  * \\f]\n  * by using Householder transformations. Here, \\b P is a permutation matrix,\n  * \\b Q and \\b Z are unitary matrices and \\b T an upper triangular matrix of\n  * size rank-by-rank. \\b A may be rank deficient.\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  * \n  * \\sa MatrixBase::completeOrthogonalDecomposition()\n  */\ntemplate <typename _MatrixType> class CompleteOrthogonalDecomposition\n          : public SolverBase<CompleteOrthogonalDecomposition<_MatrixType> >\n{\n public:\n  typedef _MatrixType MatrixType;\n  typedef SolverBase<CompleteOrthogonalDecomposition> Base;\n\n  template<typename Derived>\n  friend struct internal::solve_assertion;\n\n  EIGEN_GENERIC_PUBLIC_INTERFACE(CompleteOrthogonalDecomposition)\n  enum {\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n  };\n  typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;\n  typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime>\n      PermutationType;\n  typedef typename internal::plain_row_type<MatrixType, Index>::type\n      IntRowVectorType;\n  typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;\n  typedef typename internal::plain_row_type<MatrixType, RealScalar>::type\n      RealRowVectorType;\n  typedef HouseholderSequence<\n      MatrixType, typename internal::remove_all<\n                      typename HCoeffsType::ConjugateReturnType>::type>\n      HouseholderSequenceType;\n  typedef typename MatrixType::PlainObject PlainObject;\n\n private:\n  typedef typename PermutationType::Index PermIndexType;\n\n public:\n  /**\n   * \\brief Default Constructor.\n   *\n   * The default constructor is useful in cases in which the user intends to\n   * perform decompositions via\n   * \\c CompleteOrthogonalDecomposition::compute(const* MatrixType&).\n   */\n  CompleteOrthogonalDecomposition() : m_cpqr(), m_zCoeffs(), m_temp() {}\n\n  /** \\brief Default Constructor with memory preallocation\n   *\n   * Like the default constructor but with preallocation of the internal data\n   * according to the specified problem \\a size.\n   * \\sa CompleteOrthogonalDecomposition()\n   */\n  CompleteOrthogonalDecomposition(Index rows, Index cols)\n      : m_cpqr(rows, cols), m_zCoeffs((std::min)(rows, cols)), m_temp(cols) {}\n\n  /** \\brief Constructs a complete orthogonal decomposition from a given\n   * matrix.\n   *\n   * This constructor computes the complete orthogonal decomposition of the\n   * matrix \\a matrix by calling the method compute(). The default\n   * threshold for rank determination will be used. It is a short cut for:\n   *\n   * \\code\n   * CompleteOrthogonalDecomposition<MatrixType> cod(matrix.rows(),\n   *                                                 matrix.cols());\n   * cod.setThreshold(Default);\n   * cod.compute(matrix);\n   * \\endcode\n   *\n   * \\sa compute()\n   */\n  template <typename InputType>\n  explicit CompleteOrthogonalDecomposition(const EigenBase<InputType>& matrix)\n      : m_cpqr(matrix.rows(), matrix.cols()),\n        m_zCoeffs((std::min)(matrix.rows(), matrix.cols())),\n        m_temp(matrix.cols())\n  {\n    compute(matrix.derived());\n  }\n\n  /** \\brief Constructs a complete orthogonal decomposition from a given matrix\n    *\n    * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when \\c MatrixType is a Eigen::Ref.\n    *\n    * \\sa CompleteOrthogonalDecomposition(const EigenBase&)\n    */\n  template<typename InputType>\n  explicit CompleteOrthogonalDecomposition(EigenBase<InputType>& matrix)\n    : m_cpqr(matrix.derived()),\n      m_zCoeffs((std::min)(matrix.rows(), matrix.cols())),\n      m_temp(matrix.cols())\n  {\n    computeInPlace();\n  } \n\n  #ifdef EIGEN_PARSED_BY_DOXYGEN\n  /** This method computes the minimum-norm solution X to a least squares\n   * problem \\f[\\mathrm{minimize} \\|A X - B\\|, \\f] where \\b A is the matrix of\n   * which \\c *this is the complete orthogonal decomposition.\n   *\n   * \\param b the right-hand sides of the problem to solve.\n   *\n   * \\returns a solution.\n   *\n   */\n  template <typename Rhs>\n  inline const Solve<CompleteOrthogonalDecomposition, Rhs> solve(\n      const MatrixBase<Rhs>& b) const;\n  #endif\n\n  HouseholderSequenceType householderQ(void) const;\n  HouseholderSequenceType matrixQ(void) const { return m_cpqr.householderQ(); }\n\n  /** \\returns the matrix \\b Z.\n   */\n  MatrixType matrixZ() const {\n    MatrixType Z = MatrixType::Identity(m_cpqr.cols(), m_cpqr.cols());\n    applyZOnTheLeftInPlace<false>(Z);\n    return Z;\n  }\n\n  /** \\returns a reference to the matrix where the complete orthogonal\n   * decomposition is stored\n   */\n  const MatrixType& matrixQTZ() const { return m_cpqr.matrixQR(); }\n\n  /** \\returns a reference to the matrix where the complete orthogonal\n   * decomposition is stored.\n   * \\warning The strict lower part and \\code cols() - rank() \\endcode right\n   * columns of this matrix contains internal values.\n   * Only the upper triangular part should be referenced. To get it, use\n   * \\code matrixT().template triangularView<Upper>() \\endcode\n   * For rank-deficient matrices, use\n   * \\code\n   * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()\n   * \\endcode\n   */\n  const MatrixType& matrixT() const { return m_cpqr.matrixQR(); }\n\n  template <typename InputType>\n  CompleteOrthogonalDecomposition& compute(const EigenBase<InputType>& matrix) {\n    // Compute the column pivoted QR factorization A P = Q R.\n    m_cpqr.compute(matrix);\n    computeInPlace();\n    return *this;\n  }\n\n  /** \\returns a const reference to the column permutation matrix */\n  const PermutationType& colsPermutation() const {\n    return m_cpqr.colsPermutation();\n  }\n\n  /** \\returns the absolute value of the determinant of the matrix of which\n   * *this is the complete orthogonal decomposition. It has only linear\n   * complexity (that is, O(n) where n is the dimension of the square matrix)\n   * as the complete orthogonal decomposition has already been computed.\n   *\n   * \\note This is only for square matrices.\n   *\n   * \\warning a determinant can be very big or small, so for matrices\n   * of large enough dimension, there is a risk of overflow/underflow.\n   * One way to work around that is to use logAbsDeterminant() instead.\n   *\n   * \\sa logAbsDeterminant(), MatrixBase::determinant()\n   */\n  typename MatrixType::RealScalar absDeterminant() const;\n\n  /** \\returns the natural log of the absolute value of the determinant of the\n   * matrix of which *this is the complete orthogonal decomposition. It has\n   * only linear complexity (that is, O(n) where n is the dimension of the\n   * square matrix) as the complete orthogonal decomposition has already been\n   * computed.\n   *\n   * \\note This is only for square matrices.\n   *\n   * \\note This method is useful to work around the risk of overflow/underflow\n   * that's inherent to determinant computation.\n   *\n   * \\sa absDeterminant(), MatrixBase::determinant()\n   */\n  typename MatrixType::RealScalar logAbsDeterminant() const;\n\n  /** \\returns the rank of the matrix of which *this is the complete orthogonal\n   * decomposition.\n   *\n   * \\note This method has to determine which pivots should be considered\n   * nonzero. For that, it uses the threshold value that you can control by\n   * calling setThreshold(const RealScalar&).\n   */\n  inline Index rank() const { return m_cpqr.rank(); }\n\n  /** \\returns the dimension of the kernel of the matrix of which *this is the\n   * complete orthogonal decomposition.\n   *\n   * \\note This method has to determine which pivots should be considered\n   * nonzero. For that, it uses the threshold value that you can control by\n   * calling setThreshold(const RealScalar&).\n   */\n  inline Index dimensionOfKernel() const { return m_cpqr.dimensionOfKernel(); }\n\n  /** \\returns true if the matrix of which *this is the decomposition represents\n   * an injective linear map, i.e. has trivial kernel; false otherwise.\n   *\n   * \\note This method has to determine which pivots should be considered\n   * nonzero. For that, it uses the threshold value that you can control by\n   * calling setThreshold(const RealScalar&).\n   */\n  inline bool isInjective() const { return m_cpqr.isInjective(); }\n\n  /** \\returns true if the matrix of which *this is the decomposition represents\n   * a surjective linear map; false otherwise.\n   *\n   * \\note This method has to determine which pivots should be considered\n   * nonzero. For that, it uses the threshold value that you can control by\n   * calling setThreshold(const RealScalar&).\n   */\n  inline bool isSurjective() const { return m_cpqr.isSurjective(); }\n\n  /** \\returns true if the matrix of which *this is the complete orthogonal\n   * decomposition is invertible.\n   *\n   * \\note This method has to determine which pivots should be considered\n   * nonzero. For that, it uses the threshold value that you can control by\n   * calling setThreshold(const RealScalar&).\n   */\n  inline bool isInvertible() const { return m_cpqr.isInvertible(); }\n\n  /** \\returns the pseudo-inverse of the matrix of which *this is the complete\n   * orthogonal decomposition.\n   * \\warning: Do not compute \\c this->pseudoInverse()*rhs to solve a linear systems.\n   * It is more efficient and numerically stable to call \\c this->solve(rhs).\n   */\n  inline const Inverse<CompleteOrthogonalDecomposition> pseudoInverse() const\n  {\n    eigen_assert(m_cpqr.m_isInitialized && \"CompleteOrthogonalDecomposition is not initialized.\");\n    return Inverse<CompleteOrthogonalDecomposition>(*this);\n  }\n\n  inline Index rows() const { return m_cpqr.rows(); }\n  inline Index cols() const { return m_cpqr.cols(); }\n\n  /** \\returns a const reference to the vector of Householder coefficients used\n   * to represent the factor \\c Q.\n   *\n   * For advanced uses only.\n   */\n  inline const HCoeffsType& hCoeffs() const { return m_cpqr.hCoeffs(); }\n\n  /** \\returns a const reference to the vector of Householder coefficients\n   * used to represent the factor \\c Z.\n   *\n   * For advanced uses only.\n   */\n  const HCoeffsType& zCoeffs() const { return m_zCoeffs; }\n\n  /** Allows to prescribe a threshold to be used by certain methods, such as\n   * rank(), who need to determine when pivots are to be considered nonzero.\n   * Most be called before calling compute().\n   *\n   * When it needs to get the threshold value, Eigen calls threshold(). By\n   * default, this uses a formula to automatically determine a reasonable\n   * threshold. Once you have called the present method\n   * setThreshold(const RealScalar&), your value is used instead.\n   *\n   * \\param threshold The new value to use as the threshold.\n   *\n   * A pivot will be considered nonzero if its absolute value is strictly\n   * greater than\n   *  \\f$ \\vert pivot \\vert \\leqslant threshold \\times \\vert maxpivot \\vert \\f$\n   * where maxpivot is the biggest pivot.\n   *\n   * If you want to come back to the default behavior, call\n   * setThreshold(Default_t)\n   */\n  CompleteOrthogonalDecomposition& setThreshold(const RealScalar& threshold) {\n    m_cpqr.setThreshold(threshold);\n    return *this;\n  }\n\n  /** Allows to come back to the default behavior, letting Eigen use its default\n   * formula for determining the threshold.\n   *\n   * You should pass the special object Eigen::Default as parameter here.\n   * \\code qr.setThreshold(Eigen::Default); \\endcode\n   *\n   * See the documentation of setThreshold(const RealScalar&).\n   */\n  CompleteOrthogonalDecomposition& setThreshold(Default_t) {\n    m_cpqr.setThreshold(Default);\n    return *this;\n  }\n\n  /** Returns the threshold that will be used by certain methods such as rank().\n   *\n   * See the documentation of setThreshold(const RealScalar&).\n   */\n  RealScalar threshold() const { return m_cpqr.threshold(); }\n\n  /** \\returns the number of nonzero pivots in the complete orthogonal\n   * decomposition. Here nonzero is meant in the exact sense, not in a\n   * fuzzy sense. So that notion isn't really intrinsically interesting,\n   * but it is still useful when implementing algorithms.\n   *\n   * \\sa rank()\n   */\n  inline Index nonzeroPivots() const { return m_cpqr.nonzeroPivots(); }\n\n  /** \\returns the absolute value of the biggest pivot, i.e. the biggest\n   *          diagonal coefficient of R.\n   */\n  inline RealScalar maxPivot() const { return m_cpqr.maxPivot(); }\n\n  /** \\brief Reports whether the complete orthogonal decomposition was\n   * successful.\n   *\n   * \\note This function always returns \\c Success. It is provided for\n   * compatibility\n   * with other factorization routines.\n   * \\returns \\c Success\n   */\n  ComputationInfo info() const {\n    eigen_assert(m_cpqr.m_isInitialized && \"Decomposition is not initialized.\");\n    return Success;\n  }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n  template <typename RhsType, typename DstType>\n  void _solve_impl(const RhsType& rhs, DstType& dst) const;\n\n  template<bool Conjugate, typename RhsType, typename DstType>\n  void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n#endif\n\n protected:\n  static void check_template_parameters() {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n  }\n\n  template<bool Transpose_, typename Rhs>\n  void _check_solve_assertion(const Rhs& b) const {\n      EIGEN_ONLY_USED_FOR_DEBUG(b);\n      eigen_assert(m_cpqr.m_isInitialized && \"CompleteOrthogonalDecomposition is not initialized.\");\n      eigen_assert((Transpose_?derived().cols():derived().rows())==b.rows() && \"CompleteOrthogonalDecomposition::solve(): invalid number of rows of the right hand side matrix b\");\n  }\n\n  void computeInPlace();\n\n  /** Overwrites \\b rhs with \\f$ \\mathbf{Z} * \\mathbf{rhs} \\f$ or\n   *  \\f$ \\mathbf{\\overline Z} * \\mathbf{rhs} \\f$ if \\c Conjugate \n   *  is set to \\c true.\n   */\n  template <bool Conjugate, typename Rhs>\n  void applyZOnTheLeftInPlace(Rhs& rhs) const;\n\n  /** Overwrites \\b rhs with \\f$ \\mathbf{Z}^* * \\mathbf{rhs} \\f$.\n   */\n  template <typename Rhs>\n  void applyZAdjointOnTheLeftInPlace(Rhs& rhs) const;\n\n  ColPivHouseholderQR<MatrixType> m_cpqr;\n  HCoeffsType m_zCoeffs;\n  RowVectorType m_temp;\n};\n\ntemplate <typename MatrixType>\ntypename MatrixType::RealScalar\nCompleteOrthogonalDecomposition<MatrixType>::absDeterminant() const {\n  return m_cpqr.absDeterminant();\n}\n\ntemplate <typename MatrixType>\ntypename MatrixType::RealScalar\nCompleteOrthogonalDecomposition<MatrixType>::logAbsDeterminant() const {\n  return m_cpqr.logAbsDeterminant();\n}\n\n/** Performs the complete orthogonal decomposition of the given matrix \\a\n * matrix. The result of the factorization is stored into \\c *this, and a\n * reference to \\c *this is returned.\n *\n * \\sa class CompleteOrthogonalDecomposition,\n * CompleteOrthogonalDecomposition(const MatrixType&)\n */\ntemplate <typename MatrixType>\nvoid CompleteOrthogonalDecomposition<MatrixType>::computeInPlace()\n{\n  check_template_parameters();\n\n  // the column permutation is stored as int indices, so just to be sure:\n  eigen_assert(m_cpqr.cols() <= NumTraits<int>::highest());\n\n  const Index rank = m_cpqr.rank();\n  const Index cols = m_cpqr.cols();\n  const Index rows = m_cpqr.rows();\n  m_zCoeffs.resize((std::min)(rows, cols));\n  m_temp.resize(cols);\n\n  if (rank < cols) {\n    // We have reduced the (permuted) matrix to the form\n    //   [R11 R12]\n    //   [ 0  R22]\n    // where R11 is r-by-r (r = rank) upper triangular, R12 is\n    // r-by-(n-r), and R22 is empty or the norm of R22 is negligible.\n    // We now compute the complete orthogonal decomposition by applying\n    // Householder transformations from the right to the upper trapezoidal\n    // matrix X = [R11 R12] to zero out R12 and obtain the factorization\n    // [R11 R12] = [T11 0] * Z, where T11 is r-by-r upper triangular and\n    // Z = Z(0) * Z(1) ... Z(r-1) is an n-by-n orthogonal matrix.\n    // We store the data representing Z in R12 and m_zCoeffs.\n    for (Index k = rank - 1; k >= 0; --k) {\n      if (k != rank - 1) {\n        // Given the API for Householder reflectors, it is more convenient if\n        // we swap the leading parts of columns k and r-1 (zero-based) to form\n        // the matrix X_k = [X(0:k, k), X(0:k, r:n)]\n        m_cpqr.m_qr.col(k).head(k + 1).swap(\n            m_cpqr.m_qr.col(rank - 1).head(k + 1));\n      }\n      // Construct Householder reflector Z(k) to zero out the last row of X_k,\n      // i.e. choose Z(k) such that\n      // [X(k, k), X(k, r:n)] * Z(k) = [beta, 0, .., 0].\n      RealScalar beta;\n      m_cpqr.m_qr.row(k)\n          .tail(cols - rank + 1)\n          .makeHouseholderInPlace(m_zCoeffs(k), beta);\n      m_cpqr.m_qr(k, rank - 1) = beta;\n      if (k > 0) {\n        // Apply Z(k) to the first k rows of X_k\n        m_cpqr.m_qr.topRightCorner(k, cols - rank + 1)\n            .applyHouseholderOnTheRight(\n                m_cpqr.m_qr.row(k).tail(cols - rank).adjoint(), m_zCoeffs(k),\n                &m_temp(0));\n      }\n      if (k != rank - 1) {\n        // Swap X(0:k,k) back to its proper location.\n        m_cpqr.m_qr.col(k).head(k + 1).swap(\n            m_cpqr.m_qr.col(rank - 1).head(k + 1));\n      }\n    }\n  }\n}\n\ntemplate <typename MatrixType>\ntemplate <bool Conjugate, typename Rhs>\nvoid CompleteOrthogonalDecomposition<MatrixType>::applyZOnTheLeftInPlace(\n    Rhs& rhs) const {\n  const Index cols = this->cols();\n  const Index nrhs = rhs.cols();\n  const Index rank = this->rank();\n  Matrix<typename Rhs::Scalar, Dynamic, 1> temp((std::max)(cols, nrhs));\n  for (Index k = rank-1; k >= 0; --k) {\n    if (k != rank - 1) {\n      rhs.row(k).swap(rhs.row(rank - 1));\n    }\n    rhs.middleRows(rank - 1, cols - rank + 1)\n        .applyHouseholderOnTheLeft(\n            matrixQTZ().row(k).tail(cols - rank).transpose().template conjugateIf<!Conjugate>(), zCoeffs().template conjugateIf<Conjugate>()(k),\n            &temp(0));\n    if (k != rank - 1) {\n      rhs.row(k).swap(rhs.row(rank - 1));\n    }\n  }\n}\n\ntemplate <typename MatrixType>\ntemplate <typename Rhs>\nvoid CompleteOrthogonalDecomposition<MatrixType>::applyZAdjointOnTheLeftInPlace(\n    Rhs& rhs) const {\n  const Index cols = this->cols();\n  const Index nrhs = rhs.cols();\n  const Index rank = this->rank();\n  Matrix<typename Rhs::Scalar, Dynamic, 1> temp((std::max)(cols, nrhs));\n  for (Index k = 0; k < rank; ++k) {\n    if (k != rank - 1) {\n      rhs.row(k).swap(rhs.row(rank - 1));\n    }\n    rhs.middleRows(rank - 1, cols - rank + 1)\n        .applyHouseholderOnTheLeft(\n            matrixQTZ().row(k).tail(cols - rank).adjoint(), zCoeffs()(k),\n            &temp(0));\n    if (k != rank - 1) {\n      rhs.row(k).swap(rhs.row(rank - 1));\n    }\n  }\n}\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate <typename _MatrixType>\ntemplate <typename RhsType, typename DstType>\nvoid CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(\n    const RhsType& rhs, DstType& dst) const {\n  const Index rank = this->rank();\n  if (rank == 0) {\n    dst.setZero();\n    return;\n  }\n\n  // Compute c = Q^* * rhs\n  typename RhsType::PlainObject c(rhs);\n  c.applyOnTheLeft(matrixQ().setLength(rank).adjoint());\n\n  // Solve T z = c(1:rank, :)\n  dst.topRows(rank) = matrixT()\n                          .topLeftCorner(rank, rank)\n                          .template triangularView<Upper>()\n                          .solve(c.topRows(rank));\n\n  const Index cols = this->cols();\n  if (rank < cols) {\n    // Compute y = Z^* * [ z ]\n    //                   [ 0 ]\n    dst.bottomRows(cols - rank).setZero();\n    applyZAdjointOnTheLeftInPlace(dst);\n  }\n\n  // Undo permutation to get x = P^{-1} * y.\n  dst = colsPermutation() * dst;\n}\n\ntemplate<typename _MatrixType>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  const Index rank = this->rank();\n\n  if (rank == 0) {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(colsPermutation().transpose()*rhs);\n\n  if (rank < cols()) {\n    applyZOnTheLeftInPlace<!Conjugate>(c);\n  }\n\n  matrixT().topLeftCorner(rank, rank)\n           .template triangularView<Upper>()\n           .transpose().template conjugateIf<Conjugate>()\n           .solveInPlace(c.topRows(rank));\n\n  dst.topRows(rank) = c.topRows(rank);\n  dst.bottomRows(rows()-rank).setZero();\n\n  dst.applyOnTheLeft(householderQ().setLength(rank).template conjugateIf<!Conjugate>() );\n}\n#endif\n\nnamespace internal {\n\ntemplate<typename MatrixType>\nstruct traits<Inverse<CompleteOrthogonalDecomposition<MatrixType> > >\n  : traits<typename Transpose<typename MatrixType::PlainObject>::PlainObject>\n{\n  enum { Flags = 0 };\n};\n\ntemplate<typename DstXprType, typename MatrixType>\nstruct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename CompleteOrthogonalDecomposition<MatrixType>::Scalar>, Dense2Dense>\n{\n  typedef CompleteOrthogonalDecomposition<MatrixType> CodType;\n  typedef Inverse<CodType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename CodType::Scalar> &)\n  {\n    typedef Matrix<typename CodType::Scalar, CodType::RowsAtCompileTime, CodType::RowsAtCompileTime, 0, CodType::MaxRowsAtCompileTime, CodType::MaxRowsAtCompileTime> IdentityMatrixType;\n    dst = src.nestedExpression().solve(IdentityMatrixType::Identity(src.cols(), src.cols()));\n  }\n};\n\n} // end namespace internal\n\n/** \\returns the matrix Q as a sequence of householder transformations */\ntemplate <typename MatrixType>\ntypename CompleteOrthogonalDecomposition<MatrixType>::HouseholderSequenceType\nCompleteOrthogonalDecomposition<MatrixType>::householderQ() const {\n  return m_cpqr.householderQ();\n}\n\n/** \\return the complete orthogonal decomposition of \\c *this.\n  *\n  * \\sa class CompleteOrthogonalDecomposition\n  */\ntemplate <typename Derived>\nconst CompleteOrthogonalDecomposition<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::completeOrthogonalDecomposition() const {\n  return CompleteOrthogonalDecomposition<PlainObject>(eval());\n}\n\n}  // end namespace Eigen\n\n#endif  // EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/QR/FullPivHouseholderQR.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H\n#define EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename _MatrixType> struct traits<FullPivHouseholderQR<_MatrixType> >\n : traits<_MatrixType>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n\ntemplate<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType;\n\ntemplate<typename MatrixType>\nstruct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >\n{\n  typedef typename MatrixType::PlainObject ReturnType;\n};\n\n} // end namespace internal\n\n/** \\ingroup QR_Module\n  *\n  * \\class FullPivHouseholderQR\n  *\n  * \\brief Householder rank-revealing QR decomposition of a matrix with full pivoting\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the QR decomposition\n  *\n  * This class performs a rank-revealing QR decomposition of a matrix \\b A into matrices \\b P, \\b P', \\b Q and \\b R\n  * such that \n  * \\f[\n  *  \\mathbf{P} \\, \\mathbf{A} \\, \\mathbf{P}' = \\mathbf{Q} \\, \\mathbf{R}\n  * \\f]\n  * by using Householder transformations. Here, \\b P and \\b P' are permutation matrices, \\b Q a unitary matrix \n  * and \\b R an upper triangular matrix.\n  *\n  * This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal\n  * numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR.\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  * \n  * \\sa MatrixBase::fullPivHouseholderQr()\n  */\ntemplate<typename _MatrixType> class FullPivHouseholderQR\n        : public SolverBase<FullPivHouseholderQR<_MatrixType> >\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<FullPivHouseholderQR> Base;\n    friend class SolverBase<FullPivHouseholderQR>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivHouseholderQR)\n    enum {\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    typedef internal::FullPivHouseholderQRMatrixQReturnType<MatrixType> MatrixQReturnType;\n    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;\n    typedef Matrix<StorageIndex, 1,\n                   EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,\n                   EIGEN_SIZE_MIN_PREFER_FIXED(MaxColsAtCompileTime,MaxRowsAtCompileTime)> IntDiagSizeVectorType;\n    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;\n    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;\n    typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;\n    typedef typename MatrixType::PlainObject PlainObject;\n\n    /** \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via FullPivHouseholderQR::compute(const MatrixType&).\n      */\n    FullPivHouseholderQR()\n      : m_qr(),\n        m_hCoeffs(),\n        m_rows_transpositions(),\n        m_cols_transpositions(),\n        m_cols_permutation(),\n        m_temp(),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false) {}\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa FullPivHouseholderQR()\n      */\n    FullPivHouseholderQR(Index rows, Index cols)\n      : m_qr(rows, cols),\n        m_hCoeffs((std::min)(rows,cols)),\n        m_rows_transpositions((std::min)(rows,cols)),\n        m_cols_transpositions((std::min)(rows,cols)),\n        m_cols_permutation(cols),\n        m_temp(cols),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false) {}\n\n    /** \\brief Constructs a QR factorization from a given matrix\n      *\n      * This constructor computes the QR factorization of the matrix \\a matrix by calling\n      * the method compute(). It is a short cut for:\n      * \n      * \\code\n      * FullPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());\n      * qr.compute(matrix);\n      * \\endcode\n      * \n      * \\sa compute()\n      */\n    template<typename InputType>\n    explicit FullPivHouseholderQR(const EigenBase<InputType>& matrix)\n      : m_qr(matrix.rows(), matrix.cols()),\n        m_hCoeffs((std::min)(matrix.rows(), matrix.cols())),\n        m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())),\n        m_cols_transpositions((std::min)(matrix.rows(), matrix.cols())),\n        m_cols_permutation(matrix.cols()),\n        m_temp(matrix.cols()),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false)\n    {\n      compute(matrix.derived());\n    }\n\n    /** \\brief Constructs a QR factorization from a given matrix\n      *\n      * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when \\c MatrixType is a Eigen::Ref.\n      *\n      * \\sa FullPivHouseholderQR(const EigenBase&)\n      */\n    template<typename InputType>\n    explicit FullPivHouseholderQR(EigenBase<InputType>& matrix)\n      : m_qr(matrix.derived()),\n        m_hCoeffs((std::min)(matrix.rows(), matrix.cols())),\n        m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())),\n        m_cols_transpositions((std::min)(matrix.rows(), matrix.cols())),\n        m_cols_permutation(matrix.cols()),\n        m_temp(matrix.cols()),\n        m_isInitialized(false),\n        m_usePrescribedThreshold(false)\n    {\n      computeInPlace();\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which\n      * \\c *this is the QR decomposition.\n      *\n      * \\param b the right-hand-side of the equation to solve.\n      *\n      * \\returns the exact or least-square solution if the rank is greater or equal to the number of columns of A,\n      * and an arbitrary solution otherwise.\n      *\n      * \\note_about_checking_solutions\n      *\n      * \\note_about_arbitrary_choice_of_solution\n      *\n      * Example: \\include FullPivHouseholderQR_solve.cpp\n      * Output: \\verbinclude FullPivHouseholderQR_solve.out\n      */\n    template<typename Rhs>\n    inline const Solve<FullPivHouseholderQR, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    /** \\returns Expression object representing the matrix Q\n      */\n    MatrixQReturnType matrixQ(void) const;\n\n    /** \\returns a reference to the matrix where the Householder QR decomposition is stored\n      */\n    const MatrixType& matrixQR() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return m_qr;\n    }\n\n    template<typename InputType>\n    FullPivHouseholderQR& compute(const EigenBase<InputType>& matrix);\n\n    /** \\returns a const reference to the column permutation matrix */\n    const PermutationType& colsPermutation() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return m_cols_permutation;\n    }\n\n    /** \\returns a const reference to the vector of indices representing the rows transpositions */\n    const IntDiagSizeVectorType& rowsTranspositions() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return m_rows_transpositions;\n    }\n\n    /** \\returns the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the QR decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\warning a determinant can be very big or small, so for matrices\n      * of large enough dimension, there is a risk of overflow/underflow.\n      * One way to work around that is to use logAbsDeterminant() instead.\n      *\n      * \\sa logAbsDeterminant(), MatrixBase::determinant()\n      */\n    typename MatrixType::RealScalar absDeterminant() const;\n\n    /** \\returns the natural log of the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the QR decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\note This method is useful to work around the risk of overflow/underflow that's inherent\n      * to determinant computation.\n      *\n      * \\sa absDeterminant(), MatrixBase::determinant()\n      */\n    typename MatrixType::RealScalar logAbsDeterminant() const;\n\n    /** \\returns the rank of the matrix of which *this is the QR decomposition.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline Index rank() const\n    {\n      using std::abs;\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();\n      Index result = 0;\n      for(Index i = 0; i < m_nonzero_pivots; ++i)\n        result += (abs(m_qr.coeff(i,i)) > premultiplied_threshold);\n      return result;\n    }\n\n    /** \\returns the dimension of the kernel of the matrix of which *this is the QR decomposition.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline Index dimensionOfKernel() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return cols() - rank();\n    }\n\n    /** \\returns true if the matrix of which *this is the QR decomposition represents an injective\n      *          linear map, i.e. has trivial kernel; false otherwise.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isInjective() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return rank() == cols();\n    }\n\n    /** \\returns true if the matrix of which *this is the QR decomposition represents a surjective\n      *          linear map; false otherwise.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isSurjective() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return rank() == rows();\n    }\n\n    /** \\returns true if the matrix of which *this is the QR decomposition is invertible.\n      *\n      * \\note This method has to determine which pivots should be considered nonzero.\n      *       For that, it uses the threshold value that you can control by calling\n      *       setThreshold(const RealScalar&).\n      */\n    inline bool isInvertible() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return isInjective() && isSurjective();\n    }\n\n    /** \\returns the inverse of the matrix of which *this is the QR decomposition.\n      *\n      * \\note If this matrix is not invertible, the returned matrix has undefined coefficients.\n      *       Use isInvertible() to first determine whether this matrix is invertible.\n      */\n    inline const Inverse<FullPivHouseholderQR> inverse() const\n    {\n      eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n      return Inverse<FullPivHouseholderQR>(*this);\n    }\n\n    inline Index rows() const { return m_qr.rows(); }\n    inline Index cols() const { return m_qr.cols(); }\n    \n    /** \\returns a const reference to the vector of Householder coefficients used to represent the factor \\c Q.\n      * \n      * For advanced uses only.\n      */\n    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }\n\n    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),\n      * who need to determine when pivots are to be considered nonzero. This is not used for the\n      * QR decomposition itself.\n      *\n      * When it needs to get the threshold value, Eigen calls threshold(). By default, this\n      * uses a formula to automatically determine a reasonable threshold.\n      * Once you have called the present method setThreshold(const RealScalar&),\n      * your value is used instead.\n      *\n      * \\param threshold The new value to use as the threshold.\n      *\n      * A pivot will be considered nonzero if its absolute value is strictly greater than\n      *  \\f$ \\vert pivot \\vert \\leqslant threshold \\times \\vert maxpivot \\vert \\f$\n      * where maxpivot is the biggest pivot.\n      *\n      * If you want to come back to the default behavior, call setThreshold(Default_t)\n      */\n    FullPivHouseholderQR& setThreshold(const RealScalar& threshold)\n    {\n      m_usePrescribedThreshold = true;\n      m_prescribedThreshold = threshold;\n      return *this;\n    }\n\n    /** Allows to come back to the default behavior, letting Eigen use its default formula for\n      * determining the threshold.\n      *\n      * You should pass the special object Eigen::Default as parameter here.\n      * \\code qr.setThreshold(Eigen::Default); \\endcode\n      *\n      * See the documentation of setThreshold(const RealScalar&).\n      */\n    FullPivHouseholderQR& setThreshold(Default_t)\n    {\n      m_usePrescribedThreshold = false;\n      return *this;\n    }\n\n    /** Returns the threshold that will be used by certain methods such as rank().\n      *\n      * See the documentation of setThreshold(const RealScalar&).\n      */\n    RealScalar threshold() const\n    {\n      eigen_assert(m_isInitialized || m_usePrescribedThreshold);\n      return m_usePrescribedThreshold ? m_prescribedThreshold\n      // this formula comes from experimenting (see \"LU precision tuning\" thread on the list)\n      // and turns out to be identical to Higham's formula used already in LDLt.\n                                      : NumTraits<Scalar>::epsilon() * RealScalar(m_qr.diagonalSize());\n    }\n\n    /** \\returns the number of nonzero pivots in the QR decomposition.\n      * Here nonzero is meant in the exact sense, not in a fuzzy sense.\n      * So that notion isn't really intrinsically interesting, but it is\n      * still useful when implementing algorithms.\n      *\n      * \\sa rank()\n      */\n    inline Index nonzeroPivots() const\n    {\n      eigen_assert(m_isInitialized && \"LU is not initialized.\");\n      return m_nonzero_pivots;\n    }\n\n    /** \\returns the absolute value of the biggest pivot, i.e. the biggest\n      *          diagonal coefficient of U.\n      */\n    RealScalar maxPivot() const { return m_maxpivot; }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n    #endif\n\n  protected:\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    void computeInPlace();\n\n    MatrixType m_qr;\n    HCoeffsType m_hCoeffs;\n    IntDiagSizeVectorType m_rows_transpositions;\n    IntDiagSizeVectorType m_cols_transpositions;\n    PermutationType m_cols_permutation;\n    RowVectorType m_temp;\n    bool m_isInitialized, m_usePrescribedThreshold;\n    RealScalar m_prescribedThreshold, m_maxpivot;\n    Index m_nonzero_pivots;\n    RealScalar m_precision;\n    Index m_det_pq;\n};\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::absDeterminant() const\n{\n  using std::abs;\n  eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n  eigen_assert(m_qr.rows() == m_qr.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return abs(m_qr.diagonal().prod());\n}\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDeterminant() const\n{\n  eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n  eigen_assert(m_qr.rows() == m_qr.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return m_qr.diagonal().cwiseAbs().array().log().sum();\n}\n\n/** Performs the QR factorization of the given matrix \\a matrix. The result of\n  * the factorization is stored into \\c *this, and a reference to \\c *this\n  * is returned.\n  *\n  * \\sa class FullPivHouseholderQR, FullPivHouseholderQR(const MatrixType&)\n  */\ntemplate<typename MatrixType>\ntemplate<typename InputType>\nFullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const EigenBase<InputType>& matrix)\n{\n  m_qr = matrix.derived();\n  computeInPlace();\n  return *this;\n}\n\ntemplate<typename MatrixType>\nvoid FullPivHouseholderQR<MatrixType>::computeInPlace()\n{\n  check_template_parameters();\n\n  using std::abs;\n  Index rows = m_qr.rows();\n  Index cols = m_qr.cols();\n  Index size = (std::min)(rows,cols);\n\n  \n  m_hCoeffs.resize(size);\n\n  m_temp.resize(cols);\n\n  m_precision = NumTraits<Scalar>::epsilon() * RealScalar(size);\n\n  m_rows_transpositions.resize(size);\n  m_cols_transpositions.resize(size);\n  Index number_of_transpositions = 0;\n\n  RealScalar biggest(0);\n\n  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)\n  m_maxpivot = RealScalar(0);\n\n  for (Index k = 0; k < size; ++k)\n  {\n    Index row_of_biggest_in_corner, col_of_biggest_in_corner;\n    typedef internal::scalar_score_coeff_op<Scalar> Scoring;\n    typedef typename Scoring::result_type Score;\n\n    Score score = m_qr.bottomRightCorner(rows-k, cols-k)\n                      .unaryExpr(Scoring())\n                      .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);\n    row_of_biggest_in_corner += k;\n    col_of_biggest_in_corner += k;\n    RealScalar biggest_in_corner = internal::abs_knowing_score<Scalar>()(m_qr(row_of_biggest_in_corner, col_of_biggest_in_corner), score);\n    if(k==0) biggest = biggest_in_corner;\n\n    // if the corner is negligible, then we have less than full rank, and we can finish early\n    if(internal::isMuchSmallerThan(biggest_in_corner, biggest, m_precision))\n    {\n      m_nonzero_pivots = k;\n      for(Index i = k; i < size; i++)\n      {\n        m_rows_transpositions.coeffRef(i) = internal::convert_index<StorageIndex>(i);\n        m_cols_transpositions.coeffRef(i) = internal::convert_index<StorageIndex>(i);\n        m_hCoeffs.coeffRef(i) = Scalar(0);\n      }\n      break;\n    }\n\n    m_rows_transpositions.coeffRef(k) = internal::convert_index<StorageIndex>(row_of_biggest_in_corner);\n    m_cols_transpositions.coeffRef(k) = internal::convert_index<StorageIndex>(col_of_biggest_in_corner);\n    if(k != row_of_biggest_in_corner) {\n      m_qr.row(k).tail(cols-k).swap(m_qr.row(row_of_biggest_in_corner).tail(cols-k));\n      ++number_of_transpositions;\n    }\n    if(k != col_of_biggest_in_corner) {\n      m_qr.col(k).swap(m_qr.col(col_of_biggest_in_corner));\n      ++number_of_transpositions;\n    }\n\n    RealScalar beta;\n    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);\n    m_qr.coeffRef(k,k) = beta;\n\n    // remember the maximum absolute value of diagonal coefficients\n    if(abs(beta) > m_maxpivot) m_maxpivot = abs(beta);\n\n    m_qr.bottomRightCorner(rows-k, cols-k-1)\n        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1));\n  }\n\n  m_cols_permutation.setIdentity(cols);\n  for(Index k = 0; k < size; ++k)\n    m_cols_permutation.applyTranspositionOnTheRight(k, m_cols_transpositions.coeff(k));\n\n  m_det_pq = (number_of_transpositions%2) ? -1 : 1;\n  m_isInitialized = true;\n}\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename _MatrixType>\ntemplate<typename RhsType, typename DstType>\nvoid FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  const Index l_rank = rank();\n\n  // FIXME introduce nonzeroPivots() and use it here. and more generally,\n  // make the same improvements in this dec as in FullPivLU.\n  if(l_rank==0)\n  {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(rhs);\n\n  Matrix<typename RhsType::Scalar,1,RhsType::ColsAtCompileTime> temp(rhs.cols());\n  for (Index k = 0; k < l_rank; ++k)\n  {\n    Index remainingSize = rows()-k;\n    c.row(k).swap(c.row(m_rows_transpositions.coeff(k)));\n    c.bottomRightCorner(remainingSize, rhs.cols())\n      .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingSize-1),\n                               m_hCoeffs.coeff(k), &temp.coeffRef(0));\n  }\n\n  m_qr.topLeftCorner(l_rank, l_rank)\n      .template triangularView<Upper>()\n      .solveInPlace(c.topRows(l_rank));\n\n  for(Index i = 0; i < l_rank; ++i) dst.row(m_cols_permutation.indices().coeff(i)) = c.row(i);\n  for(Index i = l_rank; i < cols(); ++i) dst.row(m_cols_permutation.indices().coeff(i)).setZero();\n}\n\ntemplate<typename _MatrixType>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid FullPivHouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  const Index l_rank = rank();\n\n  if(l_rank == 0)\n  {\n    dst.setZero();\n    return;\n  }\n\n  typename RhsType::PlainObject c(m_cols_permutation.transpose()*rhs);\n\n  m_qr.topLeftCorner(l_rank, l_rank)\n         .template triangularView<Upper>()\n         .transpose().template conjugateIf<Conjugate>()\n         .solveInPlace(c.topRows(l_rank));\n\n  dst.topRows(l_rank) = c.topRows(l_rank);\n  dst.bottomRows(rows()-l_rank).setZero();\n\n  Matrix<Scalar, 1, DstType::ColsAtCompileTime> temp(dst.cols());\n  const Index size = (std::min)(rows(), cols());\n  for (Index k = size-1; k >= 0; --k)\n  {\n    Index remainingSize = rows()-k;\n\n    dst.bottomRightCorner(remainingSize, dst.cols())\n       .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingSize-1).template conjugateIf<!Conjugate>(),\n                                  m_hCoeffs.template conjugateIf<Conjugate>().coeff(k), &temp.coeffRef(0));\n\n    dst.row(k).swap(dst.row(m_rows_transpositions.coeff(k)));\n  }\n}\n#endif\n\nnamespace internal {\n  \ntemplate<typename DstXprType, typename MatrixType>\nstruct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>\n{\n  typedef FullPivHouseholderQR<MatrixType> QrType;\n  typedef Inverse<QrType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)\n  {    \n    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));\n  }\n};\n\n/** \\ingroup QR_Module\n  *\n  * \\brief Expression type for return value of FullPivHouseholderQR::matrixQ()\n  *\n  * \\tparam MatrixType type of underlying dense matrix\n  */\ntemplate<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType\n  : public ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> >\n{\npublic:\n  typedef typename FullPivHouseholderQR<MatrixType>::IntDiagSizeVectorType IntDiagSizeVectorType;\n  typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;\n  typedef Matrix<typename MatrixType::Scalar, 1, MatrixType::RowsAtCompileTime, RowMajor, 1,\n                 MatrixType::MaxRowsAtCompileTime> WorkVectorType;\n\n  FullPivHouseholderQRMatrixQReturnType(const MatrixType&       qr,\n                                        const HCoeffsType&      hCoeffs,\n                                        const IntDiagSizeVectorType& rowsTranspositions)\n    : m_qr(qr),\n      m_hCoeffs(hCoeffs),\n      m_rowsTranspositions(rowsTranspositions)\n  {}\n\n  template <typename ResultType>\n  void evalTo(ResultType& result) const\n  {\n    const Index rows = m_qr.rows();\n    WorkVectorType workspace(rows);\n    evalTo(result, workspace);\n  }\n\n  template <typename ResultType>\n  void evalTo(ResultType& result, WorkVectorType& workspace) const\n  {\n    using numext::conj;\n    // compute the product H'_0 H'_1 ... H'_n-1,\n    // where H_k is the k-th Householder transformation I - h_k v_k v_k'\n    // and v_k is the k-th Householder vector [1,m_qr(k+1,k), m_qr(k+2,k), ...]\n    const Index rows = m_qr.rows();\n    const Index cols = m_qr.cols();\n    const Index size = (std::min)(rows, cols);\n    workspace.resize(rows);\n    result.setIdentity(rows, rows);\n    for (Index k = size-1; k >= 0; k--)\n    {\n      result.block(k, k, rows-k, rows-k)\n            .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), conj(m_hCoeffs.coeff(k)), &workspace.coeffRef(k));\n      result.row(k).swap(result.row(m_rowsTranspositions.coeff(k)));\n    }\n  }\n\n  Index rows() const { return m_qr.rows(); }\n  Index cols() const { return m_qr.rows(); }\n\nprotected:\n  typename MatrixType::Nested m_qr;\n  typename HCoeffsType::Nested m_hCoeffs;\n  typename IntDiagSizeVectorType::Nested m_rowsTranspositions;\n};\n\n// template<typename MatrixType>\n// struct evaluator<FullPivHouseholderQRMatrixQReturnType<MatrixType> >\n//  : public evaluator<ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> > >\n// {};\n\n} // end namespace internal\n\ntemplate<typename MatrixType>\ninline typename FullPivHouseholderQR<MatrixType>::MatrixQReturnType FullPivHouseholderQR<MatrixType>::matrixQ() const\n{\n  eigen_assert(m_isInitialized && \"FullPivHouseholderQR is not initialized.\");\n  return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions);\n}\n\n/** \\return the full-pivoting Householder QR decomposition of \\c *this.\n  *\n  * \\sa class FullPivHouseholderQR\n  */\ntemplate<typename Derived>\nconst FullPivHouseholderQR<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::fullPivHouseholderQr() const\n{\n  return FullPivHouseholderQR<PlainObject>(eval());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/QR/HouseholderQR.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2010 Vincent Lejeune\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_QR_H\n#define EIGEN_QR_H\n\nnamespace Eigen { \n\nnamespace internal {\ntemplate<typename _MatrixType> struct traits<HouseholderQR<_MatrixType> >\n : traits<_MatrixType>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n\n} // end namespace internal\n\n/** \\ingroup QR_Module\n  *\n  *\n  * \\class HouseholderQR\n  *\n  * \\brief Householder QR decomposition of a matrix\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the QR decomposition\n  *\n  * This class performs a QR decomposition of a matrix \\b A into matrices \\b Q and \\b R\n  * such that \n  * \\f[\n  *  \\mathbf{A} = \\mathbf{Q} \\, \\mathbf{R}\n  * \\f]\n  * by using Householder transformations. Here, \\b Q a unitary matrix and \\b R an upper triangular matrix.\n  * The result is stored in a compact way compatible with LAPACK.\n  *\n  * Note that no pivoting is performed. This is \\b not a rank-revealing decomposition.\n  * If you want that feature, use FullPivHouseholderQR or ColPivHouseholderQR instead.\n  *\n  * This Householder QR decomposition is faster, but less numerically stable and less feature-full than\n  * FullPivHouseholderQR or ColPivHouseholderQR.\n  *\n  * This class supports the \\link InplaceDecomposition inplace decomposition \\endlink mechanism.\n  *\n  * \\sa MatrixBase::householderQr()\n  */\ntemplate<typename _MatrixType> class HouseholderQR\n        : public SolverBase<HouseholderQR<_MatrixType> >\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    typedef SolverBase<HouseholderQR> Base;\n    friend class SolverBase<HouseholderQR>;\n\n    EIGEN_GENERIC_PUBLIC_INTERFACE(HouseholderQR)\n    enum {\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, (MatrixType::Flags&RowMajorBit) ? RowMajor : ColMajor, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixQType;\n    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;\n    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;\n    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;\n\n    /**\n      * \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via HouseholderQR::compute(const MatrixType&).\n      */\n    HouseholderQR() : m_qr(), m_hCoeffs(), m_temp(), m_isInitialized(false) {}\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem \\a size.\n      * \\sa HouseholderQR()\n      */\n    HouseholderQR(Index rows, Index cols)\n      : m_qr(rows, cols),\n        m_hCoeffs((std::min)(rows,cols)),\n        m_temp(cols),\n        m_isInitialized(false) {}\n\n    /** \\brief Constructs a QR factorization from a given matrix\n      *\n      * This constructor computes the QR factorization of the matrix \\a matrix by calling\n      * the method compute(). It is a short cut for:\n      * \n      * \\code\n      * HouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());\n      * qr.compute(matrix);\n      * \\endcode\n      * \n      * \\sa compute()\n      */\n    template<typename InputType>\n    explicit HouseholderQR(const EigenBase<InputType>& matrix)\n      : m_qr(matrix.rows(), matrix.cols()),\n        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),\n        m_temp(matrix.cols()),\n        m_isInitialized(false)\n    {\n      compute(matrix.derived());\n    }\n\n\n    /** \\brief Constructs a QR factorization from a given matrix\n      *\n      * This overloaded constructor is provided for \\link InplaceDecomposition inplace decomposition \\endlink when\n      * \\c MatrixType is a Eigen::Ref.\n      *\n      * \\sa HouseholderQR(const EigenBase&)\n      */\n    template<typename InputType>\n    explicit HouseholderQR(EigenBase<InputType>& matrix)\n      : m_qr(matrix.derived()),\n        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),\n        m_temp(matrix.cols()),\n        m_isInitialized(false)\n    {\n      computeInPlace();\n    }\n\n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which\n      * *this is the QR decomposition, if any exists.\n      *\n      * \\param b the right-hand-side of the equation to solve.\n      *\n      * \\returns a solution.\n      *\n      * \\note_about_checking_solutions\n      *\n      * \\note_about_arbitrary_choice_of_solution\n      *\n      * Example: \\include HouseholderQR_solve.cpp\n      * Output: \\verbinclude HouseholderQR_solve.out\n      */\n    template<typename Rhs>\n    inline const Solve<HouseholderQR, Rhs>\n    solve(const MatrixBase<Rhs>& b) const;\n    #endif\n\n    /** This method returns an expression of the unitary matrix Q as a sequence of Householder transformations.\n      *\n      * The returned expression can directly be used to perform matrix products. It can also be assigned to a dense Matrix object.\n      * Here is an example showing how to recover the full or thin matrix Q, as well as how to perform matrix products using operator*:\n      *\n      * Example: \\include HouseholderQR_householderQ.cpp\n      * Output: \\verbinclude HouseholderQR_householderQ.out\n      */\n    HouseholderSequenceType householderQ() const\n    {\n      eigen_assert(m_isInitialized && \"HouseholderQR is not initialized.\");\n      return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());\n    }\n\n    /** \\returns a reference to the matrix where the Householder QR decomposition is stored\n      * in a LAPACK-compatible way.\n      */\n    const MatrixType& matrixQR() const\n    {\n        eigen_assert(m_isInitialized && \"HouseholderQR is not initialized.\");\n        return m_qr;\n    }\n\n    template<typename InputType>\n    HouseholderQR& compute(const EigenBase<InputType>& matrix) {\n      m_qr = matrix.derived();\n      computeInPlace();\n      return *this;\n    }\n\n    /** \\returns the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the QR decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\warning a determinant can be very big or small, so for matrices\n      * of large enough dimension, there is a risk of overflow/underflow.\n      * One way to work around that is to use logAbsDeterminant() instead.\n      *\n      * \\sa logAbsDeterminant(), MatrixBase::determinant()\n      */\n    typename MatrixType::RealScalar absDeterminant() const;\n\n    /** \\returns the natural log of the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition. It has only linear complexity\n      * (that is, O(n) where n is the dimension of the square matrix)\n      * as the QR decomposition has already been computed.\n      *\n      * \\note This is only for square matrices.\n      *\n      * \\note This method is useful to work around the risk of overflow/underflow that's inherent\n      * to determinant computation.\n      *\n      * \\sa absDeterminant(), MatrixBase::determinant()\n      */\n    typename MatrixType::RealScalar logAbsDeterminant() const;\n\n    inline Index rows() const { return m_qr.rows(); }\n    inline Index cols() const { return m_qr.cols(); }\n\n    /** \\returns a const reference to the vector of Householder coefficients used to represent the factor \\c Q.\n      * \n      * For advanced uses only.\n      */\n    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename RhsType, typename DstType>\n    void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n    template<bool Conjugate, typename RhsType, typename DstType>\n    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n    #endif\n\n  protected:\n\n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n    }\n\n    void computeInPlace();\n\n    MatrixType m_qr;\n    HCoeffsType m_hCoeffs;\n    RowVectorType m_temp;\n    bool m_isInitialized;\n};\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar HouseholderQR<MatrixType>::absDeterminant() const\n{\n  using std::abs;\n  eigen_assert(m_isInitialized && \"HouseholderQR is not initialized.\");\n  eigen_assert(m_qr.rows() == m_qr.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return abs(m_qr.diagonal().prod());\n}\n\ntemplate<typename MatrixType>\ntypename MatrixType::RealScalar HouseholderQR<MatrixType>::logAbsDeterminant() const\n{\n  eigen_assert(m_isInitialized && \"HouseholderQR is not initialized.\");\n  eigen_assert(m_qr.rows() == m_qr.cols() && \"You can't take the determinant of a non-square matrix!\");\n  return m_qr.diagonal().cwiseAbs().array().log().sum();\n}\n\nnamespace internal {\n\n/** \\internal */\ntemplate<typename MatrixQR, typename HCoeffs>\nvoid householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename MatrixQR::Scalar* tempData = 0)\n{\n  typedef typename MatrixQR::Scalar Scalar;\n  typedef typename MatrixQR::RealScalar RealScalar;\n  Index rows = mat.rows();\n  Index cols = mat.cols();\n  Index size = (std::min)(rows,cols);\n\n  eigen_assert(hCoeffs.size() == size);\n\n  typedef Matrix<Scalar,MatrixQR::ColsAtCompileTime,1> TempType;\n  TempType tempVector;\n  if(tempData==0)\n  {\n    tempVector.resize(cols);\n    tempData = tempVector.data();\n  }\n\n  for(Index k = 0; k < size; ++k)\n  {\n    Index remainingRows = rows - k;\n    Index remainingCols = cols - k - 1;\n\n    RealScalar beta;\n    mat.col(k).tail(remainingRows).makeHouseholderInPlace(hCoeffs.coeffRef(k), beta);\n    mat.coeffRef(k,k) = beta;\n\n    // apply H to remaining part of m_qr from the left\n    mat.bottomRightCorner(remainingRows, remainingCols)\n        .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), hCoeffs.coeffRef(k), tempData+k+1);\n  }\n}\n\n/** \\internal */\ntemplate<typename MatrixQR, typename HCoeffs,\n  typename MatrixQRScalar = typename MatrixQR::Scalar,\n  bool InnerStrideIsOne = (MatrixQR::InnerStrideAtCompileTime == 1 && HCoeffs::InnerStrideAtCompileTime == 1)>\nstruct householder_qr_inplace_blocked\n{\n  // This is specialized for LAPACK-supported Scalar types in HouseholderQR_LAPACKE.h\n  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index maxBlockSize=32,\n      typename MatrixQR::Scalar* tempData = 0)\n  {\n    typedef typename MatrixQR::Scalar Scalar;\n    typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;\n\n    Index rows = mat.rows();\n    Index cols = mat.cols();\n    Index size = (std::min)(rows, cols);\n\n    typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;\n    TempType tempVector;\n    if(tempData==0)\n    {\n      tempVector.resize(cols);\n      tempData = tempVector.data();\n    }\n\n    Index blockSize = (std::min)(maxBlockSize,size);\n\n    Index k = 0;\n    for (k = 0; k < size; k += blockSize)\n    {\n      Index bs = (std::min)(size-k,blockSize);  // actual size of the block\n      Index tcols = cols - k - bs;              // trailing columns\n      Index brows = rows-k;                     // rows of the block\n\n      // partition the matrix:\n      //        A00 | A01 | A02\n      // mat  = A10 | A11 | A12\n      //        A20 | A21 | A22\n      // and performs the qr dec of [A11^T A12^T]^T\n      // and update [A21^T A22^T]^T using level 3 operations.\n      // Finally, the algorithm continue on A22\n\n      BlockType A11_21 = mat.block(k,k,brows,bs);\n      Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);\n\n      householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);\n\n      if(tcols)\n      {\n        BlockType A21_22 = mat.block(k,k+bs,brows,tcols);\n        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward\n      }\n    }\n  }\n};\n\n} // end namespace internal\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename _MatrixType>\ntemplate<typename RhsType, typename DstType>\nvoid HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  const Index rank = (std::min)(rows(), cols());\n\n  typename RhsType::PlainObject c(rhs);\n\n  c.applyOnTheLeft(householderQ().setLength(rank).adjoint() );\n\n  m_qr.topLeftCorner(rank, rank)\n      .template triangularView<Upper>()\n      .solveInPlace(c.topRows(rank));\n\n  dst.topRows(rank) = c.topRows(rank);\n  dst.bottomRows(cols()-rank).setZero();\n}\n\ntemplate<typename _MatrixType>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid HouseholderQR<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  const Index rank = (std::min)(rows(), cols());\n\n  typename RhsType::PlainObject c(rhs);\n\n  m_qr.topLeftCorner(rank, rank)\n      .template triangularView<Upper>()\n      .transpose().template conjugateIf<Conjugate>()\n      .solveInPlace(c.topRows(rank));\n\n  dst.topRows(rank) = c.topRows(rank);\n  dst.bottomRows(rows()-rank).setZero();\n\n  dst.applyOnTheLeft(householderQ().setLength(rank).template conjugateIf<!Conjugate>() );\n}\n#endif\n\n/** Performs the QR factorization of the given matrix \\a matrix. The result of\n  * the factorization is stored into \\c *this, and a reference to \\c *this\n  * is returned.\n  *\n  * \\sa class HouseholderQR, HouseholderQR(const MatrixType&)\n  */\ntemplate<typename MatrixType>\nvoid HouseholderQR<MatrixType>::computeInPlace()\n{\n  check_template_parameters();\n  \n  Index rows = m_qr.rows();\n  Index cols = m_qr.cols();\n  Index size = (std::min)(rows,cols);\n\n  m_hCoeffs.resize(size);\n\n  m_temp.resize(cols);\n\n  internal::householder_qr_inplace_blocked<MatrixType, HCoeffsType>::run(m_qr, m_hCoeffs, 48, m_temp.data());\n\n  m_isInitialized = true;\n}\n\n/** \\return the Householder QR decomposition of \\c *this.\n  *\n  * \\sa class HouseholderQR\n  */\ntemplate<typename Derived>\nconst HouseholderQR<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::householderQr() const\n{\n  return HouseholderQR<PlainObject>(eval());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_QR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/QR/HouseholderQR_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *    Householder QR decomposition of a matrix w/o pivoting based on\n *    LAPACKE_?geqrf function.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_QR_LAPACKE_H\n#define EIGEN_QR_LAPACKE_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \\\ntemplate<typename MatrixQR, typename HCoeffs> \\\nstruct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \\\n{ \\\n  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \\\n      typename MatrixQR::Scalar* = 0) \\\n  { \\\n    lapack_int m = (lapack_int) mat.rows(); \\\n    lapack_int n = (lapack_int) mat.cols(); \\\n    lapack_int lda = (lapack_int) mat.outerStride(); \\\n    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \\\n    LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \\\n    hCoeffs.adjointInPlace(); \\\n  } \\\n};\n\nEIGEN_LAPACKE_QR_NOPIV(double, double, d)\nEIGEN_LAPACKE_QR_NOPIV(float, float, s)\nEIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z)\nEIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c)\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_QR_LAPACKE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SUITESPARSEQRSUPPORT_H\n#define EIGEN_SUITESPARSEQRSUPPORT_H\n\nnamespace Eigen {\n  \n  template<typename MatrixType> class SPQR; \n  template<typename SPQRType> struct SPQRMatrixQReturnType; \n  template<typename SPQRType> struct SPQRMatrixQTransposeReturnType; \n  template <typename SPQRType, typename Derived> struct SPQR_QProduct;\n  namespace internal {\n    template <typename SPQRType> struct traits<SPQRMatrixQReturnType<SPQRType> >\n    {\n      typedef typename SPQRType::MatrixType ReturnType;\n    };\n    template <typename SPQRType> struct traits<SPQRMatrixQTransposeReturnType<SPQRType> >\n    {\n      typedef typename SPQRType::MatrixType ReturnType;\n    };\n    template <typename SPQRType, typename Derived> struct traits<SPQR_QProduct<SPQRType, Derived> >\n    {\n      typedef typename Derived::PlainObject ReturnType;\n    };\n  } // End namespace internal\n  \n/**\n  * \\ingroup SPQRSupport_Module\n  * \\class SPQR\n  * \\brief Sparse QR factorization based on SuiteSparseQR library\n  *\n  * This class is used to perform a multithreaded and multifrontal rank-revealing QR decomposition\n  * of sparse matrices. The result is then used to solve linear leasts_square systems.\n  * Clearly, a QR factorization is returned such that A*P = Q*R where :\n  *\n  * P is the column permutation. Use colsPermutation() to get it.\n  *\n  * Q is the orthogonal matrix represented as Householder reflectors.\n  * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose.\n  * You can then apply it to a vector.\n  *\n  * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix.\n  * NOTE : The Index type of R is always SuiteSparse_long. You can get it with SPQR::Index\n  *\n  * \\tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>\n  *\n  * \\implsparsesolverconcept\n  *\n  *\n  */\ntemplate<typename _MatrixType>\nclass SPQR : public SparseSolverBase<SPQR<_MatrixType> >\n{\n  protected:\n    typedef SparseSolverBase<SPQR<_MatrixType> > Base;\n    using Base::m_isInitialized;\n  public:\n    typedef typename _MatrixType::Scalar Scalar;\n    typedef typename _MatrixType::RealScalar RealScalar;\n    typedef SuiteSparse_long StorageIndex ;\n    typedef SparseMatrix<Scalar, ColMajor, StorageIndex> MatrixType;\n    typedef Map<PermutationMatrix<Dynamic, Dynamic, StorageIndex> > PermutationType;\n    enum {\n      ColsAtCompileTime = Dynamic,\n      MaxColsAtCompileTime = Dynamic\n    };\n  public:\n    SPQR() \n      : m_analysisIsOk(false),\n        m_factorizationIsOk(false),\n        m_isRUpToDate(false),\n        m_ordering(SPQR_ORDERING_DEFAULT),\n        m_allow_tol(SPQR_DEFAULT_TOL),\n        m_tolerance (NumTraits<Scalar>::epsilon()),\n        m_cR(0),\n        m_E(0),\n        m_H(0),\n        m_HPinv(0),\n        m_HTau(0),\n        m_useDefaultThreshold(true)\n    { \n      cholmod_l_start(&m_cc);\n    }\n    \n    explicit SPQR(const _MatrixType& matrix)\n      : m_analysisIsOk(false),\n        m_factorizationIsOk(false),\n        m_isRUpToDate(false),\n        m_ordering(SPQR_ORDERING_DEFAULT),\n        m_allow_tol(SPQR_DEFAULT_TOL),\n        m_tolerance (NumTraits<Scalar>::epsilon()),\n        m_cR(0),\n        m_E(0),\n        m_H(0),\n        m_HPinv(0),\n        m_HTau(0),\n        m_useDefaultThreshold(true)\n    {\n      cholmod_l_start(&m_cc);\n      compute(matrix);\n    }\n    \n    ~SPQR()\n    {\n      SPQR_free();\n      cholmod_l_finish(&m_cc);\n    }\n    void SPQR_free()\n    {\n      cholmod_l_free_sparse(&m_H, &m_cc);\n      cholmod_l_free_sparse(&m_cR, &m_cc);\n      cholmod_l_free_dense(&m_HTau, &m_cc);\n      std::free(m_E);\n      std::free(m_HPinv);\n    }\n\n    void compute(const _MatrixType& matrix)\n    {\n      if(m_isInitialized) SPQR_free();\n\n      MatrixType mat(matrix);\n      \n      /* Compute the default threshold as in MatLab, see:\n       * Tim Davis, \"Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing\n       * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 \n       */\n      RealScalar pivotThreshold = m_tolerance;\n      if(m_useDefaultThreshold) \n      {\n        RealScalar max2Norm = 0.0;\n        for (int j = 0; j < mat.cols(); j++) max2Norm = numext::maxi(max2Norm, mat.col(j).norm());\n        if(max2Norm==RealScalar(0))\n          max2Norm = RealScalar(1);\n        pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits<RealScalar>::epsilon();\n      }\n      cholmod_sparse A; \n      A = viewAsCholmod(mat);\n      m_rows = matrix.rows();\n      Index col = matrix.cols();\n      m_rank = SuiteSparseQR<Scalar>(m_ordering, pivotThreshold, col, &A, \n                             &m_cR, &m_E, &m_H, &m_HPinv, &m_HTau, &m_cc);\n\n      if (!m_cR)\n      {\n        m_info = NumericalIssue;\n        m_isInitialized = false;\n        return;\n      }\n      m_info = Success;\n      m_isInitialized = true;\n      m_isRUpToDate = false;\n    }\n    /** \n     * Get the number of rows of the input matrix and the Q matrix\n     */\n    inline Index rows() const {return m_rows; }\n    \n    /** \n     * Get the number of columns of the input matrix. \n     */\n    inline Index cols() const { return m_cR->ncol; }\n    \n    template<typename Rhs, typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const\n    {\n      eigen_assert(m_isInitialized && \" The QR factorization should be computed first, call compute()\");\n      eigen_assert(b.cols()==1 && \"This method is for vectors only\");\n\n      //Compute Q^T * b\n      typename Dest::PlainObject y, y2;\n      y = matrixQ().transpose() * b;\n      \n      // Solves with the triangular matrix R\n      Index rk = this->rank();\n      y2 = y;\n      y.resize((std::max)(cols(),Index(y.rows())),y.cols());\n      y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y2.topRows(rk));\n\n      // Apply the column permutation \n      // colsPermutation() performs a copy of the permutation,\n      // so let's apply it manually:\n      for(Index i = 0; i < rk; ++i) dest.row(m_E[i]) = y.row(i);\n      for(Index i = rk; i < cols(); ++i) dest.row(m_E[i]).setZero();\n      \n//       y.bottomRows(y.rows()-rk).setZero();\n//       dest = colsPermutation() * y.topRows(cols());\n      \n      m_info = Success;\n    }\n    \n    /** \\returns the sparse triangular factor R. It is a sparse matrix\n     */\n    const MatrixType matrixR() const\n    {\n      eigen_assert(m_isInitialized && \" The QR factorization should be computed first, call compute()\");\n      if(!m_isRUpToDate) {\n        m_R = viewAsEigen<Scalar,ColMajor, typename MatrixType::StorageIndex>(*m_cR);\n        m_isRUpToDate = true;\n      }\n      return m_R;\n    }\n    /// Get an expression of the matrix Q\n    SPQRMatrixQReturnType<SPQR> matrixQ() const\n    {\n      return SPQRMatrixQReturnType<SPQR>(*this);\n    }\n    /// Get the permutation that was applied to columns of A\n    PermutationType colsPermutation() const\n    { \n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return PermutationType(m_E, m_cR->ncol);\n    }\n    /**\n     * Gets the rank of the matrix. \n     * It should be equal to matrixQR().cols if the matrix is full-rank\n     */\n    Index rank() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_cc.SPQR_istat[4];\n    }\n    /// Set the fill-reducing ordering method to be used\n    void setSPQROrdering(int ord) { m_ordering = ord;}\n    /// Set the tolerance tol to treat columns with 2-norm < =tol as zero\n    void setPivotThreshold(const RealScalar& tol)\n    {\n      m_useDefaultThreshold = false;\n      m_tolerance = tol;\n    }\n    \n    /** \\returns a pointer to the SPQR workspace */\n    cholmod_common *cholmodCommon() const { return &m_cc; }\n    \n    \n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the sparse QR can not be computed\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n  protected:\n    bool m_analysisIsOk;\n    bool m_factorizationIsOk;\n    mutable bool m_isRUpToDate;\n    mutable ComputationInfo m_info;\n    int m_ordering; // Ordering method to use, see SPQR's manual\n    int m_allow_tol; // Allow to use some tolerance during numerical factorization.\n    RealScalar m_tolerance; // treat columns with 2-norm below this tolerance as zero\n    mutable cholmod_sparse *m_cR; // The sparse R factor in cholmod format\n    mutable MatrixType m_R; // The sparse matrix R in Eigen format\n    mutable StorageIndex *m_E; // The permutation applied to columns\n    mutable cholmod_sparse *m_H;  //The householder vectors\n    mutable StorageIndex *m_HPinv; // The row permutation of H\n    mutable cholmod_dense *m_HTau; // The Householder coefficients\n    mutable Index m_rank; // The rank of the matrix\n    mutable cholmod_common m_cc; // Workspace and parameters\n    bool m_useDefaultThreshold;     // Use default threshold\n    Index m_rows;\n    template<typename ,typename > friend struct SPQR_QProduct;\n};\n\ntemplate <typename SPQRType, typename Derived>\nstruct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >\n{\n  typedef typename SPQRType::Scalar Scalar;\n  typedef typename SPQRType::StorageIndex StorageIndex;\n  //Define the constructor to get reference to argument types\n  SPQR_QProduct(const SPQRType& spqr, const Derived& other, bool transpose) : m_spqr(spqr),m_other(other),m_transpose(transpose) {}\n  \n  inline Index rows() const { return m_transpose ? m_spqr.rows() : m_spqr.cols(); }\n  inline Index cols() const { return m_other.cols(); }\n  // Assign to a vector\n  template<typename ResType>\n  void evalTo(ResType& res) const\n  {\n    cholmod_dense y_cd;\n    cholmod_dense *x_cd; \n    int method = m_transpose ? SPQR_QTX : SPQR_QX; \n    cholmod_common *cc = m_spqr.cholmodCommon();\n    y_cd = viewAsCholmod(m_other.const_cast_derived());\n    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.m_H, m_spqr.m_HTau, m_spqr.m_HPinv, &y_cd, cc);\n    res = Matrix<Scalar,ResType::RowsAtCompileTime,ResType::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x), x_cd->nrow, x_cd->ncol);\n    cholmod_l_free_dense(&x_cd, cc);\n  }\n  const SPQRType& m_spqr; \n  const Derived& m_other; \n  bool m_transpose; \n  \n};\ntemplate<typename SPQRType>\nstruct SPQRMatrixQReturnType{\n  \n  SPQRMatrixQReturnType(const SPQRType& spqr) : m_spqr(spqr) {}\n  template<typename Derived>\n  SPQR_QProduct<SPQRType, Derived> operator*(const MatrixBase<Derived>& other)\n  {\n    return SPQR_QProduct<SPQRType,Derived>(m_spqr,other.derived(),false);\n  }\n  SPQRMatrixQTransposeReturnType<SPQRType> adjoint() const\n  {\n    return SPQRMatrixQTransposeReturnType<SPQRType>(m_spqr);\n  }\n  // To use for operations with the transpose of Q\n  SPQRMatrixQTransposeReturnType<SPQRType> transpose() const\n  {\n    return SPQRMatrixQTransposeReturnType<SPQRType>(m_spqr);\n  }\n  const SPQRType& m_spqr;\n};\n\ntemplate<typename SPQRType>\nstruct SPQRMatrixQTransposeReturnType{\n  SPQRMatrixQTransposeReturnType(const SPQRType& spqr) : m_spqr(spqr) {}\n  template<typename Derived>\n  SPQR_QProduct<SPQRType,Derived> operator*(const MatrixBase<Derived>& other)\n  {\n    return SPQR_QProduct<SPQRType,Derived>(m_spqr,other.derived(), true);\n  }\n  const SPQRType& m_spqr;\n};\n\n}// End namespace Eigen\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SVD/BDCSVD.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n// \n// We used the \"A Divide-And-Conquer Algorithm for the Bidiagonal SVD\"\n// research report written by Ming Gu and Stanley C.Eisenstat\n// The code variable names correspond to the names they used in their \n// report\n//\n// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>\n// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>\n// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>\n// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>\n// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>\n// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BDCSVD_H\n#define EIGEN_BDCSVD_H\n// #define EIGEN_BDCSVD_DEBUG_VERBOSE\n// #define EIGEN_BDCSVD_SANITY_CHECKS\n\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n#undef eigen_internal_assert\n#define eigen_internal_assert(X) assert(X);\n#endif\n\nnamespace Eigen {\n\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\nIOFormat bdcsvdfmt(8, 0, \", \", \"\\n\", \"  [\", \"]\");\n#endif\n  \ntemplate<typename _MatrixType> class BDCSVD;\n\nnamespace internal {\n\ntemplate<typename _MatrixType> \nstruct traits<BDCSVD<_MatrixType> >\n        : traits<_MatrixType>\n{\n  typedef _MatrixType MatrixType;\n};  \n\n} // end namespace internal\n  \n  \n/** \\ingroup SVD_Module\n *\n *\n * \\class BDCSVD\n *\n * \\brief class Bidiagonal Divide and Conquer SVD\n *\n * \\tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition\n *\n * This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,\n * and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.\n * You can control the switching size with the setSwitchSize() method, default is 16.\n * For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly\n * recommended and can several order of magnitude faster.\n *\n * \\warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations.\n * For instance, this concerns Intel's compiler (ICC), which performs such optimization by default unless\n * you compile with the \\c -fp-model \\c precise option. Likewise, the \\c -ffast-math option of GCC or clang will\n * significantly degrade the accuracy.\n *\n * \\sa class JacobiSVD\n */\ntemplate<typename _MatrixType> \nclass BDCSVD : public SVDBase<BDCSVD<_MatrixType> >\n{\n  typedef SVDBase<BDCSVD> Base;\n    \npublic:\n  using Base::rows;\n  using Base::cols;\n  using Base::computeU;\n  using Base::computeV;\n  \n  typedef _MatrixType MatrixType;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;\n  typedef typename NumTraits<RealScalar>::Literal Literal;\n  enum {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime, \n    ColsAtCompileTime = MatrixType::ColsAtCompileTime, \n    DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime), \n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, \n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, \n    MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime), \n    MatrixOptions = MatrixType::Options\n  };\n\n  typedef typename Base::MatrixUType MatrixUType;\n  typedef typename Base::MatrixVType MatrixVType;\n  typedef typename Base::SingularValuesType SingularValuesType;\n  \n  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> MatrixX;\n  typedef Matrix<RealScalar, Dynamic, Dynamic, ColMajor> MatrixXr;\n  typedef Matrix<RealScalar, Dynamic, 1> VectorType;\n  typedef Array<RealScalar, Dynamic, 1> ArrayXr;\n  typedef Array<Index,1,Dynamic> ArrayXi;\n  typedef Ref<ArrayXr> ArrayRef;\n  typedef Ref<ArrayXi> IndicesRef;\n\n  /** \\brief Default Constructor.\n   *\n   * The default constructor is useful in cases in which the user intends to\n   * perform decompositions via BDCSVD::compute(const MatrixType&).\n   */\n  BDCSVD() : m_algoswap(16), m_isTranspose(false), m_compU(false), m_compV(false), m_numIters(0)\n  {}\n\n\n  /** \\brief Default Constructor with memory preallocation\n   *\n   * Like the default constructor but with preallocation of the internal data\n   * according to the specified problem size.\n   * \\sa BDCSVD()\n   */\n  BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)\n    : m_algoswap(16), m_numIters(0)\n  {\n    allocate(rows, cols, computationOptions);\n  }\n\n  /** \\brief Constructor performing the decomposition of given matrix.\n   *\n   * \\param matrix the matrix to decompose\n   * \\param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.\n   *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, \n   *                           #ComputeFullV, #ComputeThinV.\n   *\n   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not\n   * available with the (non - default) FullPivHouseholderQR preconditioner.\n   */\n  BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)\n    : m_algoswap(16), m_numIters(0)\n  {\n    compute(matrix, computationOptions);\n  }\n\n  ~BDCSVD() \n  {\n  }\n  \n  /** \\brief Method performing the decomposition of given matrix using custom options.\n   *\n   * \\param matrix the matrix to decompose\n   * \\param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.\n   *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, \n   *                           #ComputeFullV, #ComputeThinV.\n   *\n   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not\n   * available with the (non - default) FullPivHouseholderQR preconditioner.\n   */\n  BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);\n\n  /** \\brief Method performing the decomposition of given matrix using current options.\n   *\n   * \\param matrix the matrix to decompose\n   *\n   * This method uses the current \\a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).\n   */\n  BDCSVD& compute(const MatrixType& matrix)\n  {\n    return compute(matrix, this->m_computationOptions);\n  }\n\n  void setSwitchSize(int s) \n  {\n    eigen_assert(s>3 && \"BDCSVD the size of the algo switch has to be greater than 3\");\n    m_algoswap = s;\n  }\n \nprivate:\n  void allocate(Index rows, Index cols, unsigned int computationOptions);\n  void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);\n  void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);\n  void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);\n  void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);\n  void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);\n  void deflation43(Index firstCol, Index shift, Index i, Index size);\n  void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);\n  void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);\n  template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>\n  void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);\n  void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);\n  static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);\n\nprotected:\n  MatrixXr m_naiveU, m_naiveV;\n  MatrixXr m_computed;\n  Index m_nRec;\n  ArrayXr m_workspace;\n  ArrayXi m_workspaceI;\n  int m_algoswap;\n  bool m_isTranspose, m_compU, m_compV;\n  \n  using Base::m_singularValues;\n  using Base::m_diagSize;\n  using Base::m_computeFullU;\n  using Base::m_computeFullV;\n  using Base::m_computeThinU;\n  using Base::m_computeThinV;\n  using Base::m_matrixU;\n  using Base::m_matrixV;\n  using Base::m_info;\n  using Base::m_isInitialized;\n  using Base::m_nonzeroSingularValues;\n\npublic:  \n  int m_numIters;\n}; //end class BDCSVD\n\n\n// Method to allocate and initialize matrix and attributes\ntemplate<typename MatrixType>\nvoid BDCSVD<MatrixType>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions)\n{\n  m_isTranspose = (cols > rows);\n\n  if (Base::allocate(rows, cols, computationOptions))\n    return;\n  \n  m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );\n  m_compU = computeV();\n  m_compV = computeU();\n  if (m_isTranspose)\n    std::swap(m_compU, m_compV);\n  \n  if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );\n  else         m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );\n  \n  if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);\n  \n  m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);\n  m_workspaceI.resize(3*m_diagSize);\n}// end allocate\n\ntemplate<typename MatrixType>\nBDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions) \n{\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"\\n\\n\\n======================================================================================================================\\n\\n\\n\";\n#endif\n  allocate(matrix.rows(), matrix.cols(), computationOptions);\n  using std::abs;\n\n  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();\n  \n  //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return\n  if(matrix.cols() < m_algoswap)\n  {\n    // FIXME this line involves temporaries\n    JacobiSVD<MatrixType> jsvd(matrix,computationOptions);\n    m_isInitialized = true;\n    m_info = jsvd.info();\n    if (m_info == Success || m_info == NoConvergence) {\n      if(computeU()) m_matrixU = jsvd.matrixU();\n      if(computeV()) m_matrixV = jsvd.matrixV();\n      m_singularValues = jsvd.singularValues();\n      m_nonzeroSingularValues = jsvd.nonzeroSingularValues();\n    }\n    return *this;\n  }\n  \n  //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows\n  RealScalar scale = matrix.cwiseAbs().template maxCoeff<PropagateNaN>();\n  if (!(numext::isfinite)(scale)) {\n    m_isInitialized = true;\n    m_info = InvalidInput;\n    return *this;\n  }\n\n  if(scale==Literal(0)) scale = Literal(1);\n  MatrixX copy;\n  if (m_isTranspose) copy = matrix.adjoint()/scale;\n  else               copy = matrix/scale;\n  \n  //**** step 1 - Bidiagonalization\n  // FIXME this line involves temporaries\n  internal::UpperBidiagonalization<MatrixX> bid(copy);\n\n  //**** step 2 - Divide & Conquer\n  m_naiveU.setZero();\n  m_naiveV.setZero();\n  // FIXME this line involves a temporary matrix\n  m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();\n  m_computed.template bottomRows<1>().setZero();\n  divide(0, m_diagSize - 1, 0, 0, 0);\n  if (m_info != Success && m_info != NoConvergence) {\n    m_isInitialized = true;\n    return *this;\n  }\n    \n  //**** step 3 - Copy singular values and vectors\n  for (int i=0; i<m_diagSize; i++)\n  {\n    RealScalar a = abs(m_computed.coeff(i, i));\n    m_singularValues.coeffRef(i) = a * scale;\n    if (a<considerZero)\n    {\n      m_nonzeroSingularValues = i;\n      m_singularValues.tail(m_diagSize - i - 1).setZero();\n      break;\n    }\n    else if (i == m_diagSize - 1)\n    {\n      m_nonzeroSingularValues = i + 1;\n      break;\n    }\n  }\n\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n//   std::cout << \"m_naiveU\\n\" << m_naiveU << \"\\n\\n\";\n//   std::cout << \"m_naiveV\\n\" << m_naiveV << \"\\n\\n\";\n#endif\n  if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);\n  else              copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);\n\n  m_isInitialized = true;\n  return *this;\n}// end compute\n\n\ntemplate<typename MatrixType>\ntemplate<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>\nvoid BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)\n{\n  // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa\n  if (computeU())\n  {\n    Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();\n    m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);\n    m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);\n    householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer\n  }\n  if (computeV())\n  {\n    Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();\n    m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);\n    m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);\n    householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer\n  }\n}\n\n/** \\internal\n  * Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:\n  *  A = [A1]\n  *      [A2]\n  * such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.\n  * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large\n  * enough.\n  */\ntemplate<typename MatrixType>\nvoid BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)\n{\n  Index n = A.rows();\n  if(n>100)\n  {\n    // If the matrices are large enough, let's exploit the sparse structure of A by\n    // splitting it in half (wrt n1), and packing the non-zero columns.\n    Index n2 = n - n1;\n    Map<MatrixXr> A1(m_workspace.data()      , n1, n);\n    Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);\n    Map<MatrixXr> B1(m_workspace.data()+  n*n, n,  n);\n    Map<MatrixXr> B2(m_workspace.data()+2*n*n, n,  n);\n    Index k1=0, k2=0;\n    for(Index j=0; j<n; ++j)\n    {\n      if( (A.col(j).head(n1).array()!=Literal(0)).any() )\n      {\n        A1.col(k1) = A.col(j).head(n1);\n        B1.row(k1) = B.row(j);\n        ++k1;\n      }\n      if( (A.col(j).tail(n2).array()!=Literal(0)).any() )\n      {\n        A2.col(k2) = A.col(j).tail(n2);\n        B2.row(k2) = B.row(j);\n        ++k2;\n      }\n    }\n  \n    A.topRows(n1).noalias()    = A1.leftCols(k1) * B1.topRows(k1);\n    A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);\n  }\n  else\n  {\n    Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);\n    tmp.noalias() = A*B;\n    A = tmp;\n  }\n}\n\n// The divide algorithm is done \"in place\", we are always working on subsets of the same matrix. The divide methods takes as argument the \n// place of the submatrix we are currently working on.\n\n//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;\n//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU; \n// lastCol + 1 - firstCol is the size of the submatrix.\n//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)\n//@param firstRowW : Same as firstRowW with the column.\n//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix \n// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.\ntemplate<typename MatrixType>\nvoid BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)\n{\n  // requires rows = cols + 1;\n  using std::pow;\n  using std::sqrt;\n  using std::abs;\n  const Index n = lastCol - firstCol + 1;\n  const Index k = n/2;\n  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();\n  RealScalar alphaK;\n  RealScalar betaK; \n  RealScalar r0; \n  RealScalar lambda, phi, c0, s0;\n  VectorType l, f;\n  // We use the other algorithm which is more efficient for small \n  // matrices.\n  if (n < m_algoswap)\n  {\n    // FIXME this line involves temporaries\n    JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));\n    m_info = b.info();\n    if (m_info != Success && m_info != NoConvergence) return;\n    if (m_compU)\n      m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();\n    else \n    {\n      m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);\n      m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);\n    }\n    if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();\n    m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();\n    m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);\n    return;\n  }\n  // We use the divide and conquer algorithm\n  alphaK =  m_computed(firstCol + k, firstCol + k);\n  betaK = m_computed(firstCol + k + 1, firstCol + k);\n  // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices\n  // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the \n  // right submatrix before the left one. \n  divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);\n  if (m_info != Success && m_info != NoConvergence) return;\n  divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);\n  if (m_info != Success && m_info != NoConvergence) return;\n\n  if (m_compU)\n  {\n    lambda = m_naiveU(firstCol + k, firstCol + k);\n    phi = m_naiveU(firstCol + k + 1, lastCol + 1);\n  } \n  else \n  {\n    lambda = m_naiveU(1, firstCol + k);\n    phi = m_naiveU(0, lastCol + 1);\n  }\n  r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));\n  if (m_compU)\n  {\n    l = m_naiveU.row(firstCol + k).segment(firstCol, k);\n    f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);\n  } \n  else \n  {\n    l = m_naiveU.row(1).segment(firstCol, k);\n    f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);\n  }\n  if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);\n  if (r0<considerZero)\n  {\n    c0 = Literal(1);\n    s0 = Literal(0);\n  }\n  else\n  {\n    c0 = alphaK * lambda / r0;\n    s0 = betaK * phi / r0;\n  }\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n#endif\n  \n  if (m_compU)\n  {\n    MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));     \n    // we shiftW Q1 to the right\n    for (Index i = firstCol + k - 1; i >= firstCol; i--) \n      m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);\n    // we shift q1 at the left with a factor c0\n    m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);\n    // last column = q1 * - s0\n    m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));\n    // first column = q2 * s0\n    m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0; \n    // q2 *= c0\n    m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;\n  } \n  else \n  {\n    RealScalar q1 = m_naiveU(0, firstCol + k);\n    // we shift Q1 to the right\n    for (Index i = firstCol + k - 1; i >= firstCol; i--) \n      m_naiveU(0, i + 1) = m_naiveU(0, i);\n    // we shift q1 at the left with a factor c0\n    m_naiveU(0, firstCol) = (q1 * c0);\n    // last column = q1 * - s0\n    m_naiveU(0, lastCol + 1) = (q1 * ( - s0));\n    // first column = q2 * s0\n    m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0; \n    // q2 *= c0\n    m_naiveU(1, lastCol + 1) *= c0;\n    m_naiveU.row(1).segment(firstCol + 1, k).setZero();\n    m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();\n  }\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n#endif\n  \n  m_computed(firstCol + shift, firstCol + shift) = r0;\n  m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();\n  m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();\n\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();\n#endif\n  // Second part: try to deflate singular values in combined matrix\n  deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();\n  std::cout << \"\\n\\nj1 = \" << tmp1.transpose().format(bdcsvdfmt) << \"\\n\";\n  std::cout << \"j2 = \" << tmp2.transpose().format(bdcsvdfmt) << \"\\n\\n\";\n  std::cout << \"err:      \" << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << \"\\n\";\n  static int count = 0;\n  std::cout << \"# \" << ++count << \"\\n\\n\";\n  assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());\n//   assert(count<681);\n//   assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());\n#endif\n  \n  // Third part: compute SVD of combined matrix\n  MatrixXr UofSVD, VofSVD;\n  VectorType singVals;\n  computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(UofSVD.allFinite());\n  assert(VofSVD.allFinite());\n#endif\n  \n  if (m_compU)\n    structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);\n  else\n  {\n    Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);\n    tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;\n    m_naiveU.middleCols(firstCol, n + 1) = tmp;\n  }\n  \n  if (m_compV)  structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n#endif\n  \n  m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();\n  m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;\n}// end divide\n\n// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in\n// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing\n// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except\n// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.\n//\n// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better\n// handling of round-off errors, be consistent in ordering\n// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)\n{\n  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();\n  using std::abs;\n  ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);\n  m_workspace.head(n) =  m_computed.block(firstCol, firstCol, n, n).diagonal();\n  ArrayRef diag = m_workspace.head(n);\n  diag(0) = Literal(0);\n\n  // Allocate space for singular values and vectors\n  singVals.resize(n);\n  U.resize(n+1, n+1);\n  if (m_compV) V.resize(n, n);\n\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  if (col0.hasNaN() || diag.hasNaN())\n    std::cout << \"\\n\\nHAS NAN\\n\\n\";\n#endif\n  \n  // Many singular values might have been deflated, the zero ones have been moved to the end,\n  // but others are interleaved and we must ignore them at this stage.\n  // To this end, let's compute a permutation skipping them:\n  Index actual_n = n;\n  while(actual_n>1 && diag(actual_n-1)==Literal(0)) {--actual_n; eigen_internal_assert(col0(actual_n)==Literal(0)); }\n  Index m = 0; // size of the deflated problem\n  for(Index k=0;k<actual_n;++k)\n    if(abs(col0(k))>considerZero)\n      m_workspaceI(m++) = k;\n  Map<ArrayXi> perm(m_workspaceI.data(),m);\n  \n  Map<ArrayXr> shifts(m_workspace.data()+1*n, n);\n  Map<ArrayXr> mus(m_workspace.data()+2*n, n);\n  Map<ArrayXr> zhat(m_workspace.data()+3*n, n);\n\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"computeSVDofM using:\\n\";\n  std::cout << \"  z: \" << col0.transpose() << \"\\n\";\n  std::cout << \"  d: \" << diag.transpose() << \"\\n\";\n#endif\n  \n  // Compute singVals, shifts, and mus\n  computeSingVals(col0, diag, perm, singVals, shifts, mus);\n  \n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"  j:        \" << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << \"\\n\\n\";\n  std::cout << \"  sing-val: \" << singVals.transpose() << \"\\n\";\n  std::cout << \"  mu:       \" << mus.transpose() << \"\\n\";\n  std::cout << \"  shift:    \" << shifts.transpose() << \"\\n\";\n  \n  {\n    std::cout << \"\\n\\n    mus:    \" << mus.head(actual_n).transpose() << \"\\n\\n\";\n    std::cout << \"    check1 (expect0) : \" << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << \"\\n\\n\";\n    assert((((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n) >= 0).all());\n    std::cout << \"    check2 (>0)      : \" << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << \"\\n\\n\";\n    assert((((singVals.array()-diag) / singVals.array()).head(actual_n) >= 0).all());\n  }\n#endif\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(singVals.allFinite());\n  assert(mus.allFinite());\n  assert(shifts.allFinite());\n#endif\n  \n  // Compute zhat\n  perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"  zhat: \" << zhat.transpose() << \"\\n\";\n#endif\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(zhat.allFinite());\n#endif\n  \n  computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);\n  \n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"U^T U: \" << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << \"\\n\";\n  std::cout << \"V^T V: \" << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << \"\\n\";\n#endif\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n  assert(U.allFinite());\n  assert(V.allFinite());\n//   assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);\n//   assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);\n#endif\n  \n  // Because of deflation, the singular values might not be completely sorted.\n  // Fortunately, reordering them is a O(n) problem\n  for(Index i=0; i<actual_n-1; ++i)\n  {\n    if(singVals(i)>singVals(i+1))\n    {\n      using std::swap;\n      swap(singVals(i),singVals(i+1));\n      U.col(i).swap(U.col(i+1));\n      if(m_compV) V.col(i).swap(V.col(i+1));\n    }\n  }\n\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  {\n    bool singular_values_sorted = (((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).array() >= 0).all();\n    if(!singular_values_sorted)\n      std::cout << \"Singular values are not sorted: \" << singVals.segment(1,actual_n).transpose() << \"\\n\";\n    assert(singular_values_sorted);\n  }\n#endif\n  \n  // Reverse order so that singular values in increased order\n  // Because of deflation, the zeros singular-values are already at the end\n  singVals.head(actual_n).reverseInPlace();\n  U.leftCols(actual_n).rowwise().reverseInPlace();\n  if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();\n  \n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );\n  std::cout << \"  * j:        \" << jsvd.singularValues().transpose() << \"\\n\\n\";\n  std::cout << \"  * sing-val: \" << singVals.transpose() << \"\\n\";\n//   std::cout << \"  * err:      \" << ((jsvd.singularValues()-singVals)>1e-13*singVals.norm()).transpose() << \"\\n\";\n#endif\n}\n\ntemplate <typename MatrixType>\ntypename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)\n{\n  Index m = perm.size();\n  RealScalar res = Literal(1);\n  for(Index i=0; i<m; ++i)\n  {\n    Index j = perm(i);\n    // The following expression could be rewritten to involve only a single division,\n    // but this would make the expression more sensitive to overflow.\n    res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));\n  }\n  return res;\n\n}\n\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,\n                                         VectorType& singVals, ArrayRef shifts, ArrayRef mus)\n{\n  using std::abs;\n  using std::swap;\n  using std::sqrt;\n\n  Index n = col0.size();\n  Index actual_n = n;\n  // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above\n  // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.\n  while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;\n\n  for (Index k = 0; k < n; ++k)\n  {\n    if (col0(k) == Literal(0) || actual_n==1)\n    {\n      // if col0(k) == 0, then entry is deflated, so singular value is on diagonal\n      // if actual_n==1, then the deflated problem is already diagonalized\n      singVals(k) = k==0 ? col0(0) : diag(k);\n      mus(k) = Literal(0);\n      shifts(k) = k==0 ? col0(0) : diag(k);\n      continue;\n    } \n\n    // otherwise, use secular equation to find singular value\n    RealScalar left = diag(k);\n    RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm());\n    if(k==actual_n-1)\n      right = (diag(actual_n-1) + col0.matrix().norm());\n    else\n    {\n      // Skip deflated singular values,\n      // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.\n      // This should be equivalent to using perm[]\n      Index l = k+1;\n      while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }\n      right = diag(l);\n    }\n\n    // first decide whether it's closer to the left end or the right end\n    RealScalar mid = left + (right-left) / Literal(2);\n    RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n    std::cout << \"right-left = \" << right-left << \"\\n\";\n//     std::cout << \"fMid = \" << fMid << \" \" << secularEq(mid-left, col0, diag, perm, ArrayXr(diag-left), left)\n//                            << \" \" << secularEq(mid-right, col0, diag, perm, ArrayXr(diag-right), right)   << \"\\n\";\n    std::cout << \"     = \" << secularEq(left+RealScalar(0.000001)*(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.1)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.2)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.3)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.4)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.49)    *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.5)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.51)    *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.6)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.7)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.8)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.9)     *(right-left), col0, diag, perm, diag, 0)\n              << \" \"       << secularEq(left+RealScalar(0.999999)*(right-left), col0, diag, perm, diag, 0) << \"\\n\";\n#endif\n    RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;\n    \n    // measure everything relative to shift\n    Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);\n    diagShifted = diag - shift;\n\n    if(k!=actual_n-1)\n    {\n      // check that after the shift, f(mid) is still negative:\n      RealScalar midShifted = (right - left) / RealScalar(2);\n      if(shift==right)\n        midShifted = -midShifted;\n      RealScalar fMidShifted = secularEq(midShifted, col0, diag, perm, diagShifted, shift);\n      if(fMidShifted>0)\n      {\n        // fMid was erroneous, fix it:\n        shift =  fMidShifted > Literal(0) ? left : right;\n        diagShifted = diag - shift;\n      }\n    }\n    \n    // initial guess\n    RealScalar muPrev, muCur;\n    if (shift == left)\n    {\n      muPrev = (right - left) * RealScalar(0.1);\n      if (k == actual_n-1) muCur = right - left;\n      else                 muCur = (right - left) * RealScalar(0.5);\n    }\n    else\n    {\n      muPrev = -(right - left) * RealScalar(0.1);\n      muCur = -(right - left) * RealScalar(0.5);\n    }\n\n    RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);\n    RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift);\n    if (abs(fPrev) < abs(fCur))\n    {\n      swap(fPrev, fCur);\n      swap(muPrev, muCur);\n    }\n\n    // rational interpolation: fit a function of the form a / mu + b through the two previous\n    // iterates and use its zero to compute the next iterate\n    bool useBisection = fPrev*fCur>Literal(0);\n    while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)\n    {\n      ++m_numIters;\n\n      // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.\n      RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);\n      RealScalar b = fCur - a / muCur;\n      // And find mu such that f(mu)==0:\n      RealScalar muZero = -a/b;\n      RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);\n\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n      assert((numext::isfinite)(fZero));\n#endif\n      \n      muPrev = muCur;\n      fPrev = fCur;\n      muCur = muZero;\n      fCur = fZero;\n      \n      if (shift == left  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;\n      if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;\n      if (abs(fCur)>abs(fPrev)) useBisection = true;\n    }\n\n    // fall back on bisection method if rational interpolation did not work\n    if (useBisection)\n    {\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n      std::cout << \"useBisection for k = \" << k << \", actual_n = \" << actual_n << \"\\n\";\n#endif\n      RealScalar leftShifted, rightShifted;\n      if (shift == left)\n      {\n        // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),\n        // the factor 2 is to be more conservative\n        leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );\n\n        // check that we did it right:\n        eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );\n        // I don't understand why the case k==0 would be special there:\n        // if (k == 0) rightShifted = right - left; else\n        rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe\n      }\n      else\n      {\n        leftShifted = -(right - left) * RealScalar(0.51);\n        if(k+1<n)\n          rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );\n        else\n          rightShifted = -(std::numeric_limits<RealScalar>::min)();\n      }\n\n      RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);\n      eigen_internal_assert(fLeft<Literal(0));\n\n#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_SANITY_CHECKS\n      RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);\n#endif\n\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n      if(!(numext::isfinite)(fLeft))\n        std::cout << \"f(\" << leftShifted << \") =\" << fLeft << \" ; \" << left << \" \" << shift << \" \" << right << \"\\n\";\n      assert((numext::isfinite)(fLeft));\n\n      if(!(numext::isfinite)(fRight))\n        std::cout << \"f(\" << rightShifted << \") =\" << fRight << \" ; \" << left << \" \" << shift << \" \" << right << \"\\n\";\n      // assert((numext::isfinite)(fRight));\n#endif\n    \n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n      if(!(fLeft * fRight<0))\n      {\n        std::cout << \"f(leftShifted) using  leftShifted=\" << leftShifted << \" ;  diagShifted(1:10):\" << diagShifted.head(10).transpose()  << \"\\n ; \"\n                  << \"left==shift=\" << bool(left==shift) << \" ; left-shift = \" << (left-shift) << \"\\n\";\n        std::cout << \"k=\" << k << \", \" <<  fLeft << \" * \" << fRight << \" == \" << fLeft * fRight << \"  ;  \"\n                  << \"[\" << left << \" .. \" << right << \"] -> [\" << leftShifted << \" \" << rightShifted << \"], shift=\" << shift\n                  << \" ,  f(right)=\" << secularEq(0,     col0, diag, perm, diagShifted, shift)\n                           << \" == \" << secularEq(right, col0, diag, perm, diag, 0) << \" == \" << fRight << \"\\n\";\n      }\n#endif\n      eigen_internal_assert(fLeft * fRight < Literal(0));\n\n      if(fLeft<Literal(0))\n      {\n        while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))\n        {\n          RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);\n          fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);\n          eigen_internal_assert((numext::isfinite)(fMid));\n\n          if (fLeft * fMid < Literal(0))\n          {\n            rightShifted = midShifted;\n          }\n          else\n          {\n            leftShifted = midShifted;\n            fLeft = fMid;\n          }\n        }\n        muCur = (leftShifted + rightShifted) / Literal(2);\n      }\n      else \n      {\n        // We have a problem as shifting on the left or right give either a positive or negative value\n        // at the middle of [left,right]...\n        // Instead fo abbording or entering an infinite loop,\n        // let's just use the middle as the estimated zero-crossing:\n        muCur = (right - left) * RealScalar(0.5);\n        if(shift == right)\n          muCur = -muCur;\n      }\n    }\n      \n    singVals[k] = shift + muCur;\n    shifts[k] = shift;\n    mus[k] = muCur;\n\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n    if(k+1<n)\n      std::cout << \"found \" << singVals[k] << \" == \" << shift << \" + \" << muCur << \" from \" << diag(k) << \" .. \"  << diag(k+1) << \"\\n\";\n#endif\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n    assert(k==0 || singVals[k]>=singVals[k-1]);\n    assert(singVals[k]>=diag(k));\n#endif\n\n    // perturb singular value slightly if it equals diagonal entry to avoid division by zero later\n    // (deflation is supposed to avoid this from happening)\n    // - this does no seem to be necessary anymore -\n//     if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();\n//     if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();\n  }\n}\n\n\n// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::perturbCol0\n   (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,\n    const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)\n{\n  using std::sqrt;\n  Index n = col0.size();\n  Index m = perm.size();\n  if(m==0)\n  {\n    zhat.setZero();\n    return;\n  }\n  Index lastIdx = perm(m-1);\n  // The offset permits to skip deflated entries while computing zhat\n  for (Index k = 0; k < n; ++k)\n  {\n    if (col0(k) == Literal(0)) // deflated\n      zhat(k) = Literal(0);\n    else\n    {\n      // see equation (3.6)\n      RealScalar dk = diag(k);\n      RealScalar prod = (singVals(lastIdx) + dk) * (mus(lastIdx) + (shifts(lastIdx) - dk));\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n      if(prod<0) {\n        std::cout << \"k = \" << k << \" ;  z(k)=\" << col0(k) << \", diag(k)=\" << dk << \"\\n\";\n        std::cout << \"prod = \" << \"(\" << singVals(lastIdx) << \" + \" << dk << \") * (\" << mus(lastIdx) << \" + (\" << shifts(lastIdx) << \" - \" << dk << \"))\" << \"\\n\";\n        std::cout << \"     = \" << singVals(lastIdx) + dk << \" * \" << mus(lastIdx) + (shifts(lastIdx) - dk) <<  \"\\n\";\n      }\n      assert(prod>=0);\n#endif\n\n      for(Index l = 0; l<m; ++l)\n      {\n        Index i = perm(l);\n        if(i!=k)\n        {\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n          if(i>=k && (l==0 || l-1>=m))\n          {\n            std::cout << \"Error in perturbCol0\\n\";\n            std::cout << \"  \" << k << \"/\" << n << \" \"  << l << \"/\" << m << \" \" << i << \"/\" << n << \" ; \" << col0(k) << \" \" << diag(k) << \" \"  <<  \"\\n\";\n            std::cout << \"  \" <<diag(i) << \"\\n\";\n            Index j = (i<k /*|| l==0*/) ? i : perm(l-1);\n            std::cout << \"  \" << \"j=\" << j << \"\\n\";\n          }\n#endif\n          Index j = i<k ? i : perm(l-1);\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n          if(!(dk!=Literal(0) || diag(i)!=Literal(0)))\n          {\n            std::cout << \"k=\" << k << \", i=\" << i << \", l=\" << l << \", perm.size()=\" << perm.size() << \"\\n\";\n          }\n          assert(dk!=Literal(0) || diag(i)!=Literal(0));\n#endif\n          prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n          assert(prod>=0);\n#endif\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n          if(i!=k && numext::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )\n            std::cout << \"     \" << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << \" == (\" << (singVals(j)+dk) << \" * \" << (mus(j)+(shifts(j)-dk))\n                       << \") / (\" << (diag(i)+dk) << \" * \" << (diag(i)-dk) << \")\\n\";\n#endif\n        }\n      }\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n      std::cout << \"zhat(\" << k << \") =  sqrt( \" << prod << \")  ;  \" << (singVals(lastIdx) + dk) << \" * \" << mus(lastIdx) + shifts(lastIdx) << \" - \" << dk << \"\\n\";\n#endif\n      RealScalar tmp = sqrt(prod);\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n      assert((numext::isfinite)(tmp));\n#endif\n      zhat(k) = col0(k) > Literal(0) ? RealScalar(tmp) : RealScalar(-tmp);\n    }\n  }\n}\n\n// compute singular vectors\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::computeSingVecs\n   (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,\n    const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)\n{\n  Index n = zhat.size();\n  Index m = perm.size();\n  \n  for (Index k = 0; k < n; ++k)\n  {\n    if (zhat(k) == Literal(0))\n    {\n      U.col(k) = VectorType::Unit(n+1, k);\n      if (m_compV) V.col(k) = VectorType::Unit(n, k);\n    }\n    else\n    {\n      U.col(k).setZero();\n      for(Index l=0;l<m;++l)\n      {\n        Index i = perm(l);\n        U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));\n      }\n      U(n,k) = Literal(0);\n      U.col(k).normalize();\n    \n      if (m_compV)\n      {\n        V.col(k).setZero();\n        for(Index l=1;l<m;++l)\n        {\n          Index i = perm(l);\n          V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));\n        }\n        V(0,k) = Literal(-1);\n        V.col(k).normalize();\n      }\n    }\n  }\n  U.col(n) = VectorType::Unit(n+1, n);\n}\n\n\n// page 12_13\n// i >= 1, di almost null and zi non null.\n// We use a rotation to zero out zi applied to the left of M\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::deflation43(Eigen::Index firstCol, Eigen::Index shift, Eigen::Index i, Eigen::Index size)\n{\n  using std::abs;\n  using std::sqrt;\n  using std::pow;\n  Index start = firstCol + shift;\n  RealScalar c = m_computed(start, start);\n  RealScalar s = m_computed(start+i, start);\n  RealScalar r = numext::hypot(c,s);\n  if (r == Literal(0))\n  {\n    m_computed(start+i, start+i) = Literal(0);\n    return;\n  }\n  m_computed(start,start) = r;  \n  m_computed(start+i, start) = Literal(0);\n  m_computed(start+i, start+i) = Literal(0);\n  \n  JacobiRotation<RealScalar> J(c/r,-s/r);\n  if (m_compU)  m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);\n  else          m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);\n}// end deflation 43\n\n\n// page 13\n// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)\n// We apply two rotations to have zj = 0;\n// TODO deflation44 is still broken and not properly tested\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::deflation44(Eigen::Index firstColu , Eigen::Index firstColm, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index i, Eigen::Index j, Eigen::Index size)\n{\n  using std::abs;\n  using std::sqrt;\n  using std::conj;\n  using std::pow;\n  RealScalar c = m_computed(firstColm+i, firstColm);\n  RealScalar s = m_computed(firstColm+j, firstColm);\n  RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"deflation 4.4: \" << i << \",\" << j << \" -> \" << c << \" \" << s << \" \" << r << \" ; \"\n    << m_computed(firstColm + i-1, firstColm)  << \" \"\n    << m_computed(firstColm + i, firstColm)  << \" \"\n    << m_computed(firstColm + i+1, firstColm) << \" \"\n    << m_computed(firstColm + i+2, firstColm) << \"\\n\";\n  std::cout << m_computed(firstColm + i-1, firstColm + i-1)  << \" \"\n    << m_computed(firstColm + i, firstColm+i)  << \" \"\n    << m_computed(firstColm + i+1, firstColm+i+1) << \" \"\n    << m_computed(firstColm + i+2, firstColm+i+2) << \"\\n\";\n#endif\n  if (r==Literal(0))\n  {\n    m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);\n    return;\n  }\n  c/=r;\n  s/=r;\n  m_computed(firstColm + i, firstColm) = r;\n  m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);\n  m_computed(firstColm + j, firstColm) = Literal(0);\n\n  JacobiRotation<RealScalar> J(c,-s);\n  if (m_compU)  m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);\n  else          m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);\n  if (m_compV)  m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);\n}// end deflation 44\n\n\n// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]\ntemplate <typename MatrixType>\nvoid BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index k, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)\n{\n  using std::sqrt;\n  using std::abs;\n  const Index length = lastCol + 1 - firstCol;\n  \n  Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1);\n  Diagonal<MatrixXr> fulldiag(m_computed);\n  VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);\n  \n  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();\n  RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();\n  RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);\n  RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n#endif\n\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE  \n  std::cout << \"\\ndeflate:\" << diag.head(k+1).transpose() << \"  |  \" << diag.segment(k+1,length-k-1).transpose() << \"\\n\";\n#endif\n  \n  //condition 4.1\n  if (diag(0) < epsilon_coarse)\n  { \n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n    std::cout << \"deflation 4.1, because \" << diag(0) << \" < \" << epsilon_coarse << \"\\n\";\n#endif\n    diag(0) = epsilon_coarse;\n  }\n\n  //condition 4.2\n  for (Index i=1;i<length;++i)\n    if (abs(col0(i)) < epsilon_strict)\n    {\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n      std::cout << \"deflation 4.2, set z(\" << i << \") to zero because \" << abs(col0(i)) << \" < \" << epsilon_strict << \"  (diag(\" << i << \")=\" << diag(i) << \")\\n\";\n#endif\n      col0(i) = Literal(0);\n    }\n\n  //condition 4.3\n  for (Index i=1;i<length; i++)\n    if (diag(i) < epsilon_coarse)\n    {\n#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE\n      std::cout << \"deflation 4.3, cancel z(\" << i << \")=\" << col0(i) << \" because diag(\" << i << \")=\" << diag(i) << \" < \" << epsilon_coarse << \"\\n\";\n#endif\n      deflation43(firstCol, shift, i, length);\n    }\n\n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n#endif\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"to be sorted: \" << diag.transpose() << \"\\n\\n\";\n  std::cout << \"            : \" << col0.transpose() << \"\\n\\n\";\n#endif\n  {\n    // Check for total deflation\n    // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting\n    bool total_deflation = (col0.tail(length-1).array()<considerZero).all();\n    \n    // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.\n    // First, compute the respective permutation.\n    Index *permutation = m_workspaceI.data();\n    {\n      permutation[0] = 0;\n      Index p = 1;\n      \n      // Move deflated diagonal entries at the end.\n      for(Index i=1; i<length; ++i)\n        if(abs(diag(i))<considerZero)\n          permutation[p++] = i;\n        \n      Index i=1, j=k+1;\n      for( ; p < length; ++p)\n      {\n             if (i > k)             permutation[p] = j++;\n        else if (j >= length)       permutation[p] = i++;\n        else if (diag(i) < diag(j)) permutation[p] = j++;\n        else                        permutation[p] = i++;\n      }\n    }\n    \n    // If we have a total deflation, then we have to insert diag(0) at the right place\n    if(total_deflation)\n    {\n      for(Index i=1; i<length; ++i)\n      {\n        Index pi = permutation[i];\n        if(abs(diag(pi))<considerZero || diag(0)<diag(pi))\n          permutation[i-1] = permutation[i];\n        else\n        {\n          permutation[i-1] = 0;\n          break;\n        }\n      }\n    }\n    \n    // Current index of each col, and current column of each index\n    Index *realInd = m_workspaceI.data()+length;\n    Index *realCol = m_workspaceI.data()+2*length;\n    \n    for(int pos = 0; pos< length; pos++)\n    {\n      realCol[pos] = pos;\n      realInd[pos] = pos;\n    }\n    \n    for(Index i = total_deflation?0:1; i < length; i++)\n    {\n      const Index pi = permutation[length - (total_deflation ? i+1 : i)];\n      const Index J = realCol[pi];\n      \n      using std::swap;\n      // swap diagonal and first column entries:\n      swap(diag(i), diag(J));\n      if(i!=0 && J!=0) swap(col0(i), col0(J));\n\n      // change columns\n      if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1));\n      else         m_naiveU.col(firstCol+i).segment(0, 2)                .swap(m_naiveU.col(firstCol+J).segment(0, 2));\n      if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length));\n\n      //update real pos\n      const Index realI = realInd[i];\n      realCol[realI] = J;\n      realCol[pi] = i;\n      realInd[J] = realI;\n      realInd[i] = pi;\n    }\n  }\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n  std::cout << \"sorted: \" << diag.transpose().format(bdcsvdfmt) << \"\\n\";\n  std::cout << \"      : \" << col0.transpose() << \"\\n\\n\";\n#endif\n    \n  //condition 4.4\n  {\n    Index i = length-1;\n    while(i>0 && (abs(diag(i))<considerZero || abs(col0(i))<considerZero)) --i;\n    for(; i>1;--i)\n       if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag )\n      {\n#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE\n        std::cout << \"deflation 4.4 with i = \" << i << \" because \" << diag(i) << \" - \" << diag(i-1) << \" == \" << (diag(i) - diag(i-1)) << \" < \" << NumTraits<RealScalar>::epsilon()*/*diag(i)*/maxDiag << \"\\n\";\n#endif\n        eigen_internal_assert(abs(diag(i) - diag(i-1))<epsilon_coarse && \" diagonal entries are not properly sorted\");\n        deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length);\n      }\n  }\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  for(Index j=2;j<length;++j)\n    assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);\n#endif\n  \n#ifdef EIGEN_BDCSVD_SANITY_CHECKS\n  assert(m_naiveU.allFinite());\n  assert(m_naiveV.allFinite());\n  assert(m_computed.allFinite());\n#endif\n}//end deflation\n\n/** \\svd_module\n  *\n  * \\return the singular value decomposition of \\c *this computed by Divide & Conquer algorithm\n  *\n  * \\sa class BDCSVD\n  */\ntemplate<typename Derived>\nBDCSVD<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const\n{\n  return BDCSVD<PlainObject>(*this, computationOptions);\n}\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SVD/JacobiSVD.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2013-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_JACOBISVD_H\n#define EIGEN_JACOBISVD_H\n\nnamespace Eigen { \n\nnamespace internal {\n// forward declaration (needed by ICC)\n// the empty body is required by MSVC\ntemplate<typename MatrixType, int QRPreconditioner,\n         bool IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex>\nstruct svd_precondition_2x2_block_to_be_real {};\n\n/*** QR preconditioners (R-SVD)\n ***\n *** Their role is to reduce the problem of computing the SVD to the case of a square matrix.\n *** This approach, known as R-SVD, is an optimization for rectangular-enough matrices, and is a requirement for\n *** JacobiSVD which by itself is only able to work on square matrices.\n ***/\n\nenum { PreconditionIfMoreColsThanRows, PreconditionIfMoreRowsThanCols };\n\ntemplate<typename MatrixType, int QRPreconditioner, int Case>\nstruct qr_preconditioner_should_do_anything\n{\n  enum { a = MatrixType::RowsAtCompileTime != Dynamic &&\n             MatrixType::ColsAtCompileTime != Dynamic &&\n             MatrixType::ColsAtCompileTime <= MatrixType::RowsAtCompileTime,\n         b = MatrixType::RowsAtCompileTime != Dynamic &&\n             MatrixType::ColsAtCompileTime != Dynamic &&\n             MatrixType::RowsAtCompileTime <= MatrixType::ColsAtCompileTime,\n         ret = !( (QRPreconditioner == NoQRPreconditioner) ||\n                  (Case == PreconditionIfMoreColsThanRows && bool(a)) ||\n                  (Case == PreconditionIfMoreRowsThanCols && bool(b)) )\n  };\n};\n\ntemplate<typename MatrixType, int QRPreconditioner, int Case,\n         bool DoAnything = qr_preconditioner_should_do_anything<MatrixType, QRPreconditioner, Case>::ret\n> struct qr_preconditioner_impl {};\n\ntemplate<typename MatrixType, int QRPreconditioner, int Case>\nclass qr_preconditioner_impl<MatrixType, QRPreconditioner, Case, false>\n{\npublic:\n  void allocate(const JacobiSVD<MatrixType, QRPreconditioner>&) {}\n  bool run(JacobiSVD<MatrixType, QRPreconditioner>&, const MatrixType&)\n  {\n    return false;\n  }\n};\n\n/*** preconditioner using FullPivHouseholderQR ***/\n\ntemplate<typename MatrixType>\nclass qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>\n{\npublic:\n  typedef typename MatrixType::Scalar Scalar;\n  enum\n  {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime\n  };\n  typedef Matrix<Scalar, 1, RowsAtCompileTime, RowMajor, 1, MaxRowsAtCompileTime> WorkspaceType;\n\n  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)\n  {\n    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())\n    {\n      m_qr.~QRType();\n      ::new (&m_qr) QRType(svd.rows(), svd.cols());\n    }\n    if (svd.m_computeFullU) m_workspace.resize(svd.rows());\n  }\n\n  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)\n  {\n    if(matrix.rows() > matrix.cols())\n    {\n      m_qr.compute(matrix);\n      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();\n      if(svd.m_computeFullU) m_qr.matrixQ().evalTo(svd.m_matrixU, m_workspace);\n      if(svd.computeV()) svd.m_matrixV = m_qr.colsPermutation();\n      return true;\n    }\n    return false;\n  }\nprivate:\n  typedef FullPivHouseholderQR<MatrixType> QRType;\n  QRType m_qr;\n  WorkspaceType m_workspace;\n};\n\ntemplate<typename MatrixType>\nclass qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>\n{\npublic:\n  typedef typename MatrixType::Scalar Scalar;\n  enum\n  {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n    Options = MatrixType::Options\n  };\n\n  typedef typename internal::make_proper_matrix_type<\n    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime\n  >::type TransposeTypeWithSameStorageOrder;\n\n  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)\n  {\n    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())\n    {\n      m_qr.~QRType();\n      ::new (&m_qr) QRType(svd.cols(), svd.rows());\n    }\n    m_adjoint.resize(svd.cols(), svd.rows());\n    if (svd.m_computeFullV) m_workspace.resize(svd.cols());\n  }\n\n  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)\n  {\n    if(matrix.cols() > matrix.rows())\n    {\n      m_adjoint = matrix.adjoint();\n      m_qr.compute(m_adjoint);\n      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();\n      if(svd.m_computeFullV) m_qr.matrixQ().evalTo(svd.m_matrixV, m_workspace);\n      if(svd.computeU()) svd.m_matrixU = m_qr.colsPermutation();\n      return true;\n    }\n    else return false;\n  }\nprivate:\n  typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;\n  QRType m_qr;\n  TransposeTypeWithSameStorageOrder m_adjoint;\n  typename internal::plain_row_type<MatrixType>::type m_workspace;\n};\n\n/*** preconditioner using ColPivHouseholderQR ***/\n\ntemplate<typename MatrixType>\nclass qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>\n{\npublic:\n  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)\n  {\n    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())\n    {\n      m_qr.~QRType();\n      ::new (&m_qr) QRType(svd.rows(), svd.cols());\n    }\n    if (svd.m_computeFullU) m_workspace.resize(svd.rows());\n    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());\n  }\n\n  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)\n  {\n    if(matrix.rows() > matrix.cols())\n    {\n      m_qr.compute(matrix);\n      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();\n      if(svd.m_computeFullU) m_qr.householderQ().evalTo(svd.m_matrixU, m_workspace);\n      else if(svd.m_computeThinU)\n      {\n        svd.m_matrixU.setIdentity(matrix.rows(), matrix.cols());\n        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixU, m_workspace);\n      }\n      if(svd.computeV()) svd.m_matrixV = m_qr.colsPermutation();\n      return true;\n    }\n    return false;\n  }\n\nprivate:\n  typedef ColPivHouseholderQR<MatrixType> QRType;\n  QRType m_qr;\n  typename internal::plain_col_type<MatrixType>::type m_workspace;\n};\n\ntemplate<typename MatrixType>\nclass qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>\n{\npublic:\n  typedef typename MatrixType::Scalar Scalar;\n  enum\n  {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n    Options = MatrixType::Options\n  };\n\n  typedef typename internal::make_proper_matrix_type<\n    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime\n  >::type TransposeTypeWithSameStorageOrder;\n\n  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)\n  {\n    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())\n    {\n      m_qr.~QRType();\n      ::new (&m_qr) QRType(svd.cols(), svd.rows());\n    }\n    if (svd.m_computeFullV) m_workspace.resize(svd.cols());\n    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());\n    m_adjoint.resize(svd.cols(), svd.rows());\n  }\n\n  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)\n  {\n    if(matrix.cols() > matrix.rows())\n    {\n      m_adjoint = matrix.adjoint();\n      m_qr.compute(m_adjoint);\n\n      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();\n      if(svd.m_computeFullV) m_qr.householderQ().evalTo(svd.m_matrixV, m_workspace);\n      else if(svd.m_computeThinV)\n      {\n        svd.m_matrixV.setIdentity(matrix.cols(), matrix.rows());\n        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixV, m_workspace);\n      }\n      if(svd.computeU()) svd.m_matrixU = m_qr.colsPermutation();\n      return true;\n    }\n    else return false;\n  }\n\nprivate:\n  typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;\n  QRType m_qr;\n  TransposeTypeWithSameStorageOrder m_adjoint;\n  typename internal::plain_row_type<MatrixType>::type m_workspace;\n};\n\n/*** preconditioner using HouseholderQR ***/\n\ntemplate<typename MatrixType>\nclass qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>\n{\npublic:\n  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)\n  {\n    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())\n    {\n      m_qr.~QRType();\n      ::new (&m_qr) QRType(svd.rows(), svd.cols());\n    }\n    if (svd.m_computeFullU) m_workspace.resize(svd.rows());\n    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());\n  }\n\n  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)\n  {\n    if(matrix.rows() > matrix.cols())\n    {\n      m_qr.compute(matrix);\n      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();\n      if(svd.m_computeFullU) m_qr.householderQ().evalTo(svd.m_matrixU, m_workspace);\n      else if(svd.m_computeThinU)\n      {\n        svd.m_matrixU.setIdentity(matrix.rows(), matrix.cols());\n        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixU, m_workspace);\n      }\n      if(svd.computeV()) svd.m_matrixV.setIdentity(matrix.cols(), matrix.cols());\n      return true;\n    }\n    return false;\n  }\nprivate:\n  typedef HouseholderQR<MatrixType> QRType;\n  QRType m_qr;\n  typename internal::plain_col_type<MatrixType>::type m_workspace;\n};\n\ntemplate<typename MatrixType>\nclass qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>\n{\npublic:\n  typedef typename MatrixType::Scalar Scalar;\n  enum\n  {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n    Options = MatrixType::Options\n  };\n\n  typedef typename internal::make_proper_matrix_type<\n    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime\n  >::type TransposeTypeWithSameStorageOrder;\n\n  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)\n  {\n    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())\n    {\n      m_qr.~QRType();\n      ::new (&m_qr) QRType(svd.cols(), svd.rows());\n    }\n    if (svd.m_computeFullV) m_workspace.resize(svd.cols());\n    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());\n    m_adjoint.resize(svd.cols(), svd.rows());\n  }\n\n  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)\n  {\n    if(matrix.cols() > matrix.rows())\n    {\n      m_adjoint = matrix.adjoint();\n      m_qr.compute(m_adjoint);\n\n      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();\n      if(svd.m_computeFullV) m_qr.householderQ().evalTo(svd.m_matrixV, m_workspace);\n      else if(svd.m_computeThinV)\n      {\n        svd.m_matrixV.setIdentity(matrix.cols(), matrix.rows());\n        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixV, m_workspace);\n      }\n      if(svd.computeU()) svd.m_matrixU.setIdentity(matrix.rows(), matrix.rows());\n      return true;\n    }\n    else return false;\n  }\n\nprivate:\n  typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;\n  QRType m_qr;\n  TransposeTypeWithSameStorageOrder m_adjoint;\n  typename internal::plain_row_type<MatrixType>::type m_workspace;\n};\n\n/*** 2x2 SVD implementation\n ***\n *** JacobiSVD consists in performing a series of 2x2 SVD subproblems\n ***/\n\ntemplate<typename MatrixType, int QRPreconditioner>\nstruct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, false>\n{\n  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;\n  typedef typename MatrixType::RealScalar RealScalar;\n  static bool run(typename SVD::WorkMatrixType&, SVD&, Index, Index, RealScalar&) { return true; }\n};\n\ntemplate<typename MatrixType, int QRPreconditioner>\nstruct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>\n{\n  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  static bool run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q, RealScalar& maxDiagEntry)\n  {\n    using std::sqrt;\n    using std::abs;\n    Scalar z;\n    JacobiRotation<Scalar> rot;\n    RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p,p)) + numext::abs2(work_matrix.coeff(q,p)));\n\n    const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();\n    const RealScalar precision = NumTraits<Scalar>::epsilon();\n\n    if(n==0)\n    {\n      // make sure first column is zero\n      work_matrix.coeffRef(p,p) = work_matrix.coeffRef(q,p) = Scalar(0);\n\n      if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)\n      {\n        // work_matrix.coeff(p,q) can be zero if work_matrix.coeff(q,p) is not zero but small enough to underflow when computing n\n        z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);\n        work_matrix.row(p) *= z;\n        if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);\n      }\n      if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)\n      {\n        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);\n        work_matrix.row(q) *= z;\n        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);\n      }\n      // otherwise the second row is already zero, so we have nothing to do.\n    }\n    else\n    {\n      rot.c() = conj(work_matrix.coeff(p,p)) / n;\n      rot.s() = work_matrix.coeff(q,p) / n;\n      work_matrix.applyOnTheLeft(p,q,rot);\n      if(svd.computeU()) svd.m_matrixU.applyOnTheRight(p,q,rot.adjoint());\n      if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)\n      {\n        z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);\n        work_matrix.col(q) *= z;\n        if(svd.computeV()) svd.m_matrixV.col(q) *= z;\n      }\n      if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)\n      {\n        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);\n        work_matrix.row(q) *= z;\n        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);\n      }\n    }\n\n    // update largest diagonal entry\n    maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));\n    // and check whether the 2x2 block is already diagonal\n    RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);\n    return abs(work_matrix.coeff(p,q))>threshold || abs(work_matrix.coeff(q,p)) > threshold;\n  }\n};\n\ntemplate<typename _MatrixType, int QRPreconditioner> \nstruct traits<JacobiSVD<_MatrixType,QRPreconditioner> >\n        : traits<_MatrixType>\n{\n  typedef _MatrixType MatrixType;\n};\n\n} // end namespace internal\n\n/** \\ingroup SVD_Module\n  *\n  *\n  * \\class JacobiSVD\n  *\n  * \\brief Two-sided Jacobi SVD decomposition of a rectangular matrix\n  *\n  * \\tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition\n  * \\tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally\n  *                        for the R-SVD step for non-square matrices. See discussion of possible values below.\n  *\n  * SVD decomposition consists in decomposing any n-by-p matrix \\a A as a product\n  *   \\f[ A = U S V^* \\f]\n  * where \\a U is a n-by-n unitary, \\a V is a p-by-p unitary, and \\a S is a n-by-p real positive matrix which is zero outside of its main diagonal;\n  * the diagonal entries of S are known as the \\em singular \\em values of \\a A and the columns of \\a U and \\a V are known as the left\n  * and right \\em singular \\em vectors of \\a A respectively.\n  *\n  * Singular values are always sorted in decreasing order.\n  *\n  * This JacobiSVD decomposition computes only the singular values by default. If you want \\a U or \\a V, you need to ask for them explicitly.\n  *\n  * You can ask for only \\em thin \\a U or \\a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \\a m be the\n  * smaller value among \\a n and \\a p, there are only \\a m singular vectors; the remaining columns of \\a U and \\a V do not correspond to actual\n  * singular vectors. Asking for \\em thin \\a U or \\a V means asking for only their \\a m first columns to be formed. So \\a U is then a n-by-m matrix,\n  * and \\a V is then a p-by-m matrix. Notice that thin \\a U and \\a V are all you need for (least squares) solving.\n  *\n  * Here's an example demonstrating basic usage:\n  * \\include JacobiSVD_basic.cpp\n  * Output: \\verbinclude JacobiSVD_basic.out\n  *\n  * This JacobiSVD class is a two-sided Jacobi R-SVD decomposition, ensuring optimal reliability and accuracy. The downside is that it's slower than\n  * bidiagonalizing SVD algorithms for large square matrices; however its complexity is still \\f$ O(n^2p) \\f$ where \\a n is the smaller dimension and\n  * \\a p is the greater dimension, meaning that it is still of the same order of complexity as the faster bidiagonalizing R-SVD algorithms.\n  * In particular, like any R-SVD, it takes advantage of non-squareness in that its complexity is only linear in the greater dimension.\n  *\n  * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to\n  * terminate in finite (and reasonable) time.\n  *\n  * The possible values for QRPreconditioner are:\n  * \\li ColPivHouseholderQRPreconditioner is the default. In practice it's very safe. It uses column-pivoting QR.\n  * \\li FullPivHouseholderQRPreconditioner, is the safest and slowest. It uses full-pivoting QR.\n  *     Contrary to other QRs, it doesn't allow computing thin unitaries.\n  * \\li HouseholderQRPreconditioner is the fastest, and less safe and accurate than the pivoting variants. It uses non-pivoting QR.\n  *     This is very similar in safety and accuracy to the bidiagonalization process used by bidiagonalizing SVD algorithms (since bidiagonalization\n  *     is inherently non-pivoting). However the resulting SVD is still more reliable than bidiagonalizing SVDs because the Jacobi-based iterarive\n  *     process is more reliable than the optimized bidiagonal SVD iterations.\n  * \\li NoQRPreconditioner allows not to use a QR preconditioner at all. This is useful if you know that you will only be computing\n  *     JacobiSVD decompositions of square matrices. Non-square matrices require a QR preconditioner. Using this option will result in\n  *     faster compilation and smaller executable code. It won't significantly speed up computation, since JacobiSVD is always checking\n  *     if QR preconditioning is needed before applying it anyway.\n  *\n  * \\sa MatrixBase::jacobiSvd()\n  */\ntemplate<typename _MatrixType, int QRPreconditioner> class JacobiSVD\n : public SVDBase<JacobiSVD<_MatrixType,QRPreconditioner> >\n{\n    typedef SVDBase<JacobiSVD> Base;\n  public:\n\n    typedef _MatrixType MatrixType;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),\n      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n      MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),\n      MatrixOptions = MatrixType::Options\n    };\n\n    typedef typename Base::MatrixUType MatrixUType;\n    typedef typename Base::MatrixVType MatrixVType;\n    typedef typename Base::SingularValuesType SingularValuesType;\n    \n    typedef typename internal::plain_row_type<MatrixType>::type RowType;\n    typedef typename internal::plain_col_type<MatrixType>::type ColType;\n    typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime,\n                   MatrixOptions, MaxDiagSizeAtCompileTime, MaxDiagSizeAtCompileTime>\n            WorkMatrixType;\n\n    /** \\brief Default Constructor.\n      *\n      * The default constructor is useful in cases in which the user intends to\n      * perform decompositions via JacobiSVD::compute(const MatrixType&).\n      */\n    JacobiSVD()\n    {}\n\n\n    /** \\brief Default Constructor with memory preallocation\n      *\n      * Like the default constructor but with preallocation of the internal data\n      * according to the specified problem size.\n      * \\sa JacobiSVD()\n      */\n    JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)\n    {\n      allocate(rows, cols, computationOptions);\n    }\n\n    /** \\brief Constructor performing the decomposition of given matrix.\n     *\n     * \\param matrix the matrix to decompose\n     * \\param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.\n     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,\n     *                           #ComputeFullV, #ComputeThinV.\n     *\n     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not\n     * available with the (non-default) FullPivHouseholderQR preconditioner.\n     */\n    explicit JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)\n    {\n      compute(matrix, computationOptions);\n    }\n\n    /** \\brief Method performing the decomposition of given matrix using custom options.\n     *\n     * \\param matrix the matrix to decompose\n     * \\param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.\n     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,\n     *                           #ComputeFullV, #ComputeThinV.\n     *\n     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not\n     * available with the (non-default) FullPivHouseholderQR preconditioner.\n     */\n    JacobiSVD& compute(const MatrixType& matrix, unsigned int computationOptions);\n\n    /** \\brief Method performing the decomposition of given matrix using current options.\n     *\n     * \\param matrix the matrix to decompose\n     *\n     * This method uses the current \\a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).\n     */\n    JacobiSVD& compute(const MatrixType& matrix)\n    {\n      return compute(matrix, m_computationOptions);\n    }\n\n    using Base::computeU;\n    using Base::computeV;\n    using Base::rows;\n    using Base::cols;\n    using Base::rank;\n\n  private:\n    void allocate(Index rows, Index cols, unsigned int computationOptions);\n\n  protected:\n    using Base::m_matrixU;\n    using Base::m_matrixV;\n    using Base::m_singularValues;\n    using Base::m_info;\n    using Base::m_isInitialized;\n    using Base::m_isAllocated;\n    using Base::m_usePrescribedThreshold;\n    using Base::m_computeFullU;\n    using Base::m_computeThinU;\n    using Base::m_computeFullV;\n    using Base::m_computeThinV;\n    using Base::m_computationOptions;\n    using Base::m_nonzeroSingularValues;\n    using Base::m_rows;\n    using Base::m_cols;\n    using Base::m_diagSize;\n    using Base::m_prescribedThreshold;\n    WorkMatrixType m_workMatrix;\n\n    template<typename __MatrixType, int _QRPreconditioner, bool _IsComplex>\n    friend struct internal::svd_precondition_2x2_block_to_be_real;\n    template<typename __MatrixType, int _QRPreconditioner, int _Case, bool _DoAnything>\n    friend struct internal::qr_preconditioner_impl;\n\n    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols;\n    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows;\n    MatrixType m_scaledMatrix;\n};\n\ntemplate<typename MatrixType, int QRPreconditioner>\nvoid JacobiSVD<MatrixType, QRPreconditioner>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions)\n{\n  eigen_assert(rows >= 0 && cols >= 0);\n\n  if (m_isAllocated &&\n      rows == m_rows &&\n      cols == m_cols &&\n      computationOptions == m_computationOptions)\n  {\n    return;\n  }\n\n  m_rows = rows;\n  m_cols = cols;\n  m_info = Success;\n  m_isInitialized = false;\n  m_isAllocated = true;\n  m_computationOptions = computationOptions;\n  m_computeFullU = (computationOptions & ComputeFullU) != 0;\n  m_computeThinU = (computationOptions & ComputeThinU) != 0;\n  m_computeFullV = (computationOptions & ComputeFullV) != 0;\n  m_computeThinV = (computationOptions & ComputeThinV) != 0;\n  eigen_assert(!(m_computeFullU && m_computeThinU) && \"JacobiSVD: you can't ask for both full and thin U\");\n  eigen_assert(!(m_computeFullV && m_computeThinV) && \"JacobiSVD: you can't ask for both full and thin V\");\n  eigen_assert(EIGEN_IMPLIES(m_computeThinU || m_computeThinV, MatrixType::ColsAtCompileTime==Dynamic) &&\n              \"JacobiSVD: thin U and V are only available when your matrix has a dynamic number of columns.\");\n  if (QRPreconditioner == FullPivHouseholderQRPreconditioner)\n  {\n      eigen_assert(!(m_computeThinU || m_computeThinV) &&\n              \"JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. \"\n              \"Use the ColPivHouseholderQR preconditioner instead.\");\n  }\n  m_diagSize = (std::min)(m_rows, m_cols);\n  m_singularValues.resize(m_diagSize);\n  if(RowsAtCompileTime==Dynamic)\n    m_matrixU.resize(m_rows, m_computeFullU ? m_rows\n                            : m_computeThinU ? m_diagSize\n                            : 0);\n  if(ColsAtCompileTime==Dynamic)\n    m_matrixV.resize(m_cols, m_computeFullV ? m_cols\n                            : m_computeThinV ? m_diagSize\n                            : 0);\n  m_workMatrix.resize(m_diagSize, m_diagSize);\n  \n  if(m_cols>m_rows)   m_qr_precond_morecols.allocate(*this);\n  if(m_rows>m_cols)   m_qr_precond_morerows.allocate(*this);\n  if(m_rows!=m_cols)  m_scaledMatrix.resize(rows,cols);\n}\n\ntemplate<typename MatrixType, int QRPreconditioner>\nJacobiSVD<MatrixType, QRPreconditioner>&\nJacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsigned int computationOptions)\n{\n  using std::abs;\n  allocate(matrix.rows(), matrix.cols(), computationOptions);\n\n  // currently we stop when we reach precision 2*epsilon as the last bit of precision can require an unreasonable number of iterations,\n  // only worsening the precision of U and V as we accumulate more rotations\n  const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();\n\n  // limit for denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)\n  const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();\n\n  // Scaling factor to reduce over/under-flows\n  RealScalar scale = matrix.cwiseAbs().template maxCoeff<PropagateNaN>();\n  if (!(numext::isfinite)(scale)) {\n    m_isInitialized = true;\n    m_info = InvalidInput;\n    return *this;\n  }\n  if(scale==RealScalar(0)) scale = RealScalar(1);\n  \n  /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */\n\n  if(m_rows!=m_cols)\n  {\n    m_scaledMatrix = matrix / scale;\n    m_qr_precond_morecols.run(*this, m_scaledMatrix);\n    m_qr_precond_morerows.run(*this, m_scaledMatrix);\n  }\n  else\n  {\n    m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale;\n    if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows);\n    if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize);\n    if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols);\n    if(m_computeThinV) m_matrixV.setIdentity(m_cols, m_diagSize);\n  }\n\n  /*** step 2. The main Jacobi SVD iteration. ***/\n  RealScalar maxDiagEntry = m_workMatrix.cwiseAbs().diagonal().maxCoeff();\n\n  bool finished = false;\n  while(!finished)\n  {\n    finished = true;\n\n    // do a sweep: for all index pairs (p,q), perform SVD of the corresponding 2x2 sub-matrix\n\n    for(Index p = 1; p < m_diagSize; ++p)\n    {\n      for(Index q = 0; q < p; ++q)\n      {\n        // if this 2x2 sub-matrix is not diagonal already...\n        // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't\n        // keep us iterating forever. Similarly, small denormal numbers are considered zero.\n        RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);\n        if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)\n        {\n          finished = false;\n          // perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal\n          // the complex to real operation returns true if the updated 2x2 block is not already diagonal\n          if(internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q, maxDiagEntry))\n          {\n            JacobiRotation<RealScalar> j_left, j_right;\n            internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);\n\n            // accumulate resulting Jacobi rotations\n            m_workMatrix.applyOnTheLeft(p,q,j_left);\n            if(computeU()) m_matrixU.applyOnTheRight(p,q,j_left.transpose());\n\n            m_workMatrix.applyOnTheRight(p,q,j_right);\n            if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);\n\n            // keep track of the largest diagonal coefficient\n            maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));\n          }\n        }\n      }\n    }\n  }\n\n  /*** step 3. The work matrix is now diagonal, so ensure it's positive so its diagonal entries are the singular values ***/\n\n  for(Index i = 0; i < m_diagSize; ++i)\n  {\n    // For a complex matrix, some diagonal coefficients might note have been\n    // treated by svd_precondition_2x2_block_to_be_real, and the imaginary part\n    // of some diagonal entry might not be null.\n    if(NumTraits<Scalar>::IsComplex && abs(numext::imag(m_workMatrix.coeff(i,i)))>considerAsZero)\n    {\n      RealScalar a = abs(m_workMatrix.coeff(i,i));\n      m_singularValues.coeffRef(i) = abs(a);\n      if(computeU()) m_matrixU.col(i) *= m_workMatrix.coeff(i,i)/a;\n    }\n    else\n    {\n      // m_workMatrix.coeff(i,i) is already real, no difficulty:\n      RealScalar a = numext::real(m_workMatrix.coeff(i,i));\n      m_singularValues.coeffRef(i) = abs(a);\n      if(computeU() && (a<RealScalar(0))) m_matrixU.col(i) = -m_matrixU.col(i);\n    }\n  }\n  \n  m_singularValues *= scale;\n\n  /*** step 4. Sort singular values in descending order and compute the number of nonzero singular values ***/\n\n  m_nonzeroSingularValues = m_diagSize;\n  for(Index i = 0; i < m_diagSize; i++)\n  {\n    Index pos;\n    RealScalar maxRemainingSingularValue = m_singularValues.tail(m_diagSize-i).maxCoeff(&pos);\n    if(maxRemainingSingularValue == RealScalar(0))\n    {\n      m_nonzeroSingularValues = i;\n      break;\n    }\n    if(pos)\n    {\n      pos += i;\n      std::swap(m_singularValues.coeffRef(i), m_singularValues.coeffRef(pos));\n      if(computeU()) m_matrixU.col(pos).swap(m_matrixU.col(i));\n      if(computeV()) m_matrixV.col(pos).swap(m_matrixV.col(i));\n    }\n  }\n\n  m_isInitialized = true;\n  return *this;\n}\n\n/** \\svd_module\n  *\n  * \\return the singular value decomposition of \\c *this computed by two-sided\n  * Jacobi transformations.\n  *\n  * \\sa class JacobiSVD\n  */\ntemplate<typename Derived>\nJacobiSVD<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const\n{\n  return JacobiSVD<PlainObject>(*this, computationOptions);\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_JACOBISVD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SVD/JacobiSVD_LAPACKE.h",
    "content": "/*\n Copyright (c) 2011, Intel Corporation. All rights reserved.\n\n Redistribution and use in source and binary forms, with or without modification,\n are permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n * Neither the name of Intel Corporation nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\n ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n ********************************************************************************\n *   Content : Eigen bindings to LAPACKe\n *    Singular Value Decomposition - SVD.\n ********************************************************************************\n*/\n\n#ifndef EIGEN_JACOBISVD_LAPACKE_H\n#define EIGEN_JACOBISVD_LAPACKE_H\n\nnamespace Eigen { \n\n/** \\internal Specialization for the data types supported by LAPACKe */\n\n#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \\\ntemplate<> inline \\\nJacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>& \\\nJacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) \\\n{ \\\n  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \\\n  /*typedef MatrixType::Scalar Scalar;*/ \\\n  /*typedef MatrixType::RealScalar RealScalar;*/ \\\n  allocate(matrix.rows(), matrix.cols(), computationOptions); \\\n\\\n  /*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \\\n  m_nonzeroSingularValues = m_diagSize; \\\n\\\n  lapack_int lda = internal::convert_index<lapack_int>(matrix.outerStride()), ldu, ldvt; \\\n  lapack_int matrix_order = LAPACKE_COLROW; \\\n  char jobu, jobvt; \\\n  LAPACKE_TYPE *u, *vt, dummy; \\\n  jobu  = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \\\n  jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \\\n  if (computeU()) { \\\n    ldu  = internal::convert_index<lapack_int>(m_matrixU.outerStride()); \\\n    u    = (LAPACKE_TYPE*)m_matrixU.data(); \\\n  } else { ldu=1; u=&dummy; }\\\n  MatrixType localV; \\\n  lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \\\n  if (computeV()) { \\\n    localV.resize(vt_rows, m_cols); \\\n    ldvt  = internal::convert_index<lapack_int>(localV.outerStride()); \\\n    vt   = (LAPACKE_TYPE*)localV.data(); \\\n  } else { ldvt=1; vt=&dummy; }\\\n  Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \\\n  MatrixType m_temp; m_temp = matrix; \\\n  LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \\\n  if (computeV()) m_matrixV = localV.adjoint(); \\\n /* for(int i=0;i<m_diagSize;i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \\\n  m_isInitialized = true; \\\n  return *this; \\\n}\n\nEIGEN_LAPACKE_SVD(double,   double,                double, d, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SVD(float,    float,                 float , s, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, ColMajor, LAPACK_COL_MAJOR)\nEIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, ColMajor, LAPACK_COL_MAJOR)\n\nEIGEN_LAPACKE_SVD(double,   double,                double, d, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_SVD(float,    float,                 float , s, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, RowMajor, LAPACK_ROW_MAJOR)\nEIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, RowMajor, LAPACK_ROW_MAJOR)\n\n} // end namespace Eigen\n\n#endif // EIGEN_JACOBISVD_LAPACKE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SVD/SVDBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>\n// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>\n// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>\n// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SVDBASE_H\n#define EIGEN_SVDBASE_H\n\nnamespace Eigen {\n\nnamespace internal {\ntemplate<typename Derived> struct traits<SVDBase<Derived> >\n : traits<Derived>\n{\n  typedef MatrixXpr XprKind;\n  typedef SolverStorage StorageKind;\n  typedef int StorageIndex;\n  enum { Flags = 0 };\n};\n}\n\n/** \\ingroup SVD_Module\n *\n *\n * \\class SVDBase\n *\n * \\brief Base class of SVD algorithms\n *\n * \\tparam Derived the type of the actual SVD decomposition\n *\n * SVD decomposition consists in decomposing any n-by-p matrix \\a A as a product\n *   \\f[ A = U S V^* \\f]\n * where \\a U is a n-by-n unitary, \\a V is a p-by-p unitary, and \\a S is a n-by-p real positive matrix which is zero outside of its main diagonal;\n * the diagonal entries of S are known as the \\em singular \\em values of \\a A and the columns of \\a U and \\a V are known as the left\n * and right \\em singular \\em vectors of \\a A respectively.\n *\n * Singular values are always sorted in decreasing order.\n *\n * \n * You can ask for only \\em thin \\a U or \\a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \\a m be the\n * smaller value among \\a n and \\a p, there are only \\a m singular vectors; the remaining columns of \\a U and \\a V do not correspond to actual\n * singular vectors. Asking for \\em thin \\a U or \\a V means asking for only their \\a m first columns to be formed. So \\a U is then a n-by-m matrix,\n * and \\a V is then a p-by-m matrix. Notice that thin \\a U and \\a V are all you need for (least squares) solving.\n * \n * The status of the computation can be retrived using the \\a info() method. Unless \\a info() returns \\a Success, the results should be not\n * considered well defined.\n *  \n * If the input matrix has inf or nan coefficients, the result of the computation is undefined, and \\a info() will return \\a InvalidInput, but the computation is guaranteed to\n * terminate in finite (and reasonable) time.\n * \\sa class BDCSVD, class JacobiSVD\n */\ntemplate<typename Derived> class SVDBase\n : public SolverBase<SVDBase<Derived> >\n{\npublic: \n   \n  template<typename Derived_>\n  friend struct internal::solve_assertion;\n\n  typedef typename internal::traits<Derived>::MatrixType MatrixType;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;\n  typedef typename Eigen::internal::traits<SVDBase>::StorageIndex StorageIndex;\n  typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n  enum {\n    RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),\n    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,\n    MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),\n    MatrixOptions = MatrixType::Options\n  };\n\n  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixUType;\n  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime, MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime> MatrixVType;\n  typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;\n  \n  Derived& derived() { return *static_cast<Derived*>(this); }\n  const Derived& derived() const { return *static_cast<const Derived*>(this); }\n\n  /** \\returns the \\a U matrix.\n   *\n   * For the SVD decomposition of a n-by-p matrix, letting \\a m be the minimum of \\a n and \\a p,\n   * the U matrix is n-by-n if you asked for \\link Eigen::ComputeFullU ComputeFullU \\endlink, and is n-by-m if you asked for \\link Eigen::ComputeThinU ComputeThinU \\endlink.\n   *\n   * The \\a m first columns of \\a U are the left singular vectors of the matrix being decomposed.\n   *\n   * This method asserts that you asked for \\a U to be computed.\n   */\n  const MatrixUType& matrixU() const\n  {\n    _check_compute_assertions();\n    eigen_assert(computeU() && \"This SVD decomposition didn't compute U. Did you ask for it?\");\n    return m_matrixU;\n  }\n\n  /** \\returns the \\a V matrix.\n   *\n   * For the SVD decomposition of a n-by-p matrix, letting \\a m be the minimum of \\a n and \\a p,\n   * the V matrix is p-by-p if you asked for \\link Eigen::ComputeFullV ComputeFullV \\endlink, and is p-by-m if you asked for \\link Eigen::ComputeThinV ComputeThinV \\endlink.\n   *\n   * The \\a m first columns of \\a V are the right singular vectors of the matrix being decomposed.\n   *\n   * This method asserts that you asked for \\a V to be computed.\n   */\n  const MatrixVType& matrixV() const\n  {\n    _check_compute_assertions();\n    eigen_assert(computeV() && \"This SVD decomposition didn't compute V. Did you ask for it?\");\n    return m_matrixV;\n  }\n\n  /** \\returns the vector of singular values.\n   *\n   * For the SVD decomposition of a n-by-p matrix, letting \\a m be the minimum of \\a n and \\a p, the\n   * returned vector has size \\a m.  Singular values are always sorted in decreasing order.\n   */\n  const SingularValuesType& singularValues() const\n  {\n    _check_compute_assertions();\n    return m_singularValues;\n  }\n\n  /** \\returns the number of singular values that are not exactly 0 */\n  Index nonzeroSingularValues() const\n  {\n    _check_compute_assertions();\n    return m_nonzeroSingularValues;\n  }\n  \n  /** \\returns the rank of the matrix of which \\c *this is the SVD.\n    *\n    * \\note This method has to determine which singular values should be considered nonzero.\n    *       For that, it uses the threshold value that you can control by calling\n    *       setThreshold(const RealScalar&).\n    */\n  inline Index rank() const\n  {\n    using std::abs;\n    _check_compute_assertions();\n    if(m_singularValues.size()==0) return 0;\n    RealScalar premultiplied_threshold = numext::maxi<RealScalar>(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());\n    Index i = m_nonzeroSingularValues-1;\n    while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;\n    return i+1;\n  }\n  \n  /** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(),\n    * which need to determine when singular values are to be considered nonzero.\n    * This is not used for the SVD decomposition itself.\n    *\n    * When it needs to get the threshold value, Eigen calls threshold().\n    * The default is \\c NumTraits<Scalar>::epsilon()\n    *\n    * \\param threshold The new value to use as the threshold.\n    *\n    * A singular value will be considered nonzero if its value is strictly greater than\n    *  \\f$ \\vert singular value \\vert \\leqslant threshold \\times \\vert max singular value \\vert \\f$.\n    *\n    * If you want to come back to the default behavior, call setThreshold(Default_t)\n    */\n  Derived& setThreshold(const RealScalar& threshold)\n  {\n    m_usePrescribedThreshold = true;\n    m_prescribedThreshold = threshold;\n    return derived();\n  }\n\n  /** Allows to come back to the default behavior, letting Eigen use its default formula for\n    * determining the threshold.\n    *\n    * You should pass the special object Eigen::Default as parameter here.\n    * \\code svd.setThreshold(Eigen::Default); \\endcode\n    *\n    * See the documentation of setThreshold(const RealScalar&).\n    */\n  Derived& setThreshold(Default_t)\n  {\n    m_usePrescribedThreshold = false;\n    return derived();\n  }\n\n  /** Returns the threshold that will be used by certain methods such as rank().\n    *\n    * See the documentation of setThreshold(const RealScalar&).\n    */\n  RealScalar threshold() const\n  {\n    eigen_assert(m_isInitialized || m_usePrescribedThreshold);\n    // this temporary is needed to workaround a MSVC issue\n    Index diagSize = (std::max<Index>)(1,m_diagSize);\n    return m_usePrescribedThreshold ? m_prescribedThreshold\n                                    : RealScalar(diagSize)*NumTraits<Scalar>::epsilon();\n  }\n\n  /** \\returns true if \\a U (full or thin) is asked for in this SVD decomposition */\n  inline bool computeU() const { return m_computeFullU || m_computeThinU; }\n  /** \\returns true if \\a V (full or thin) is asked for in this SVD decomposition */\n  inline bool computeV() const { return m_computeFullV || m_computeThinV; }\n\n  inline Index rows() const { return m_rows; }\n  inline Index cols() const { return m_cols; }\n  \n  #ifdef EIGEN_PARSED_BY_DOXYGEN\n  /** \\returns a (least squares) solution of \\f$ A x = b \\f$ using the current SVD decomposition of A.\n    *\n    * \\param b the right-hand-side of the equation to solve.\n    *\n    * \\note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.\n    *\n    * \\note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.\n    * In other words, the returned solution is guaranteed to minimize the Euclidean norm \\f$ \\Vert A x - b \\Vert \\f$.\n    */\n  template<typename Rhs>\n  inline const Solve<Derived, Rhs>\n  solve(const MatrixBase<Rhs>& b) const;\n  #endif\n\n\n  /** \\brief Reports whether previous computation was successful.\n   *\n   * \\returns \\c Success if computation was successful.\n   */\n  EIGEN_DEVICE_FUNC\n  ComputationInfo info() const\n  {\n    eigen_assert(m_isInitialized && \"SVD is not initialized.\");\n    return m_info;\n  }\n\n  #ifndef EIGEN_PARSED_BY_DOXYGEN\n  template<typename RhsType, typename DstType>\n  void _solve_impl(const RhsType &rhs, DstType &dst) const;\n\n  template<bool Conjugate, typename RhsType, typename DstType>\n  void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;\n  #endif\n\nprotected:\n\n  static void check_template_parameters()\n  {\n    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);\n  }\n\n  void _check_compute_assertions() const {\n    eigen_assert(m_isInitialized && \"SVD is not initialized.\");\n  }\n\n  template<bool Transpose_, typename Rhs>\n  void _check_solve_assertion(const Rhs& b) const {\n      EIGEN_ONLY_USED_FOR_DEBUG(b);\n      _check_compute_assertions();\n      eigen_assert(computeU() && computeV() && \"SVDBase::solve(): Both unitaries U and V are required to be computed (thin unitaries suffice).\");\n      eigen_assert((Transpose_?cols():rows())==b.rows() && \"SVDBase::solve(): invalid number of rows of the right hand side matrix b\");\n  }\n\n  // return true if already allocated\n  bool allocate(Index rows, Index cols, unsigned int computationOptions) ;\n\n  MatrixUType m_matrixU;\n  MatrixVType m_matrixV;\n  SingularValuesType m_singularValues;\n  ComputationInfo m_info;\n  bool m_isInitialized, m_isAllocated, m_usePrescribedThreshold;\n  bool m_computeFullU, m_computeThinU;\n  bool m_computeFullV, m_computeThinV;\n  unsigned int m_computationOptions;\n  Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize;\n  RealScalar m_prescribedThreshold;\n\n  /** \\brief Default Constructor.\n   *\n   * Default constructor of SVDBase\n   */\n  SVDBase()\n    : m_info(Success),\n      m_isInitialized(false),\n      m_isAllocated(false),\n      m_usePrescribedThreshold(false),\n      m_computeFullU(false),\n      m_computeThinU(false),\n      m_computeFullV(false),\n      m_computeThinV(false),\n      m_computationOptions(0),\n      m_rows(-1), m_cols(-1), m_diagSize(0)\n  {\n    check_template_parameters();\n  }\n\n\n};\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename Derived>\ntemplate<typename RhsType, typename DstType>\nvoid SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const\n{\n  // A = U S V^*\n  // So A^{-1} = V S^{-1} U^*\n\n  Matrix<typename RhsType::Scalar, Dynamic, RhsType::ColsAtCompileTime, 0, MatrixType::MaxRowsAtCompileTime, RhsType::MaxColsAtCompileTime> tmp;\n  Index l_rank = rank();\n  tmp.noalias() =  m_matrixU.leftCols(l_rank).adjoint() * rhs;\n  tmp = m_singularValues.head(l_rank).asDiagonal().inverse() * tmp;\n  dst = m_matrixV.leftCols(l_rank) * tmp;\n}\n\ntemplate<typename Derived>\ntemplate<bool Conjugate, typename RhsType, typename DstType>\nvoid SVDBase<Derived>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const\n{\n  // A = U S V^*\n  // So  A^{-*} = U S^{-1} V^*\n  // And A^{-T} = U_conj S^{-1} V^T\n  Matrix<typename RhsType::Scalar, Dynamic, RhsType::ColsAtCompileTime, 0, MatrixType::MaxRowsAtCompileTime, RhsType::MaxColsAtCompileTime> tmp;\n  Index l_rank = rank();\n\n  tmp.noalias() =  m_matrixV.leftCols(l_rank).transpose().template conjugateIf<Conjugate>() * rhs;\n  tmp = m_singularValues.head(l_rank).asDiagonal().inverse() * tmp;\n  dst = m_matrixU.template conjugateIf<!Conjugate>().leftCols(l_rank) * tmp;\n}\n#endif\n\ntemplate<typename MatrixType>\nbool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)\n{\n  eigen_assert(rows >= 0 && cols >= 0);\n\n  if (m_isAllocated &&\n      rows == m_rows &&\n      cols == m_cols &&\n      computationOptions == m_computationOptions)\n  {\n    return true;\n  }\n\n  m_rows = rows;\n  m_cols = cols;\n  m_info = Success;\n  m_isInitialized = false;\n  m_isAllocated = true;\n  m_computationOptions = computationOptions;\n  m_computeFullU = (computationOptions & ComputeFullU) != 0;\n  m_computeThinU = (computationOptions & ComputeThinU) != 0;\n  m_computeFullV = (computationOptions & ComputeFullV) != 0;\n  m_computeThinV = (computationOptions & ComputeThinV) != 0;\n  eigen_assert(!(m_computeFullU && m_computeThinU) && \"SVDBase: you can't ask for both full and thin U\");\n  eigen_assert(!(m_computeFullV && m_computeThinV) && \"SVDBase: you can't ask for both full and thin V\");\n  eigen_assert(EIGEN_IMPLIES(m_computeThinU || m_computeThinV, MatrixType::ColsAtCompileTime==Dynamic) &&\n\t       \"SVDBase: thin U and V are only available when your matrix has a dynamic number of columns.\");\n\n  m_diagSize = (std::min)(m_rows, m_cols);\n  m_singularValues.resize(m_diagSize);\n  if(RowsAtCompileTime==Dynamic)\n    m_matrixU.resize(m_rows, m_computeFullU ? m_rows : m_computeThinU ? m_diagSize : 0);\n  if(ColsAtCompileTime==Dynamic)\n    m_matrixV.resize(m_cols, m_computeFullV ? m_cols : m_computeThinV ? m_diagSize : 0);\n\n  return false;\n}\n\n}// end namespace\n\n#endif // EIGEN_SVDBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SVD/UpperBidiagonalization.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2013-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_BIDIAGONALIZATION_H\n#define EIGEN_BIDIAGONALIZATION_H\n\nnamespace Eigen { \n\nnamespace internal {\n// UpperBidiagonalization will probably be replaced by a Bidiagonalization class, don't want to make it stable API.\n// At the same time, it's useful to keep for now as it's about the only thing that is testing the BandMatrix class.\n\ntemplate<typename _MatrixType> class UpperBidiagonalization\n{\n  public:\n\n    typedef _MatrixType MatrixType;\n    enum {\n      RowsAtCompileTime = MatrixType::RowsAtCompileTime,\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      ColsAtCompileTimeMinusOne = internal::decrement_size<ColsAtCompileTime>::ret\n    };\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef Eigen::Index Index; ///< \\deprecated since Eigen 3.3\n    typedef Matrix<Scalar, 1, ColsAtCompileTime> RowVectorType;\n    typedef Matrix<Scalar, RowsAtCompileTime, 1> ColVectorType;\n    typedef BandMatrix<RealScalar, ColsAtCompileTime, ColsAtCompileTime, 1, 0, RowMajor> BidiagonalType;\n    typedef Matrix<Scalar, ColsAtCompileTime, 1> DiagVectorType;\n    typedef Matrix<Scalar, ColsAtCompileTimeMinusOne, 1> SuperDiagVectorType;\n    typedef HouseholderSequence<\n              const MatrixType,\n              const typename internal::remove_all<typename Diagonal<const MatrixType,0>::ConjugateReturnType>::type\n            > HouseholderUSequenceType;\n    typedef HouseholderSequence<\n              const typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type,\n              Diagonal<const MatrixType,1>,\n              OnTheRight\n            > HouseholderVSequenceType;\n    \n    /**\n    * \\brief Default Constructor.\n    *\n    * The default constructor is useful in cases in which the user intends to\n    * perform decompositions via Bidiagonalization::compute(const MatrixType&).\n    */\n    UpperBidiagonalization() : m_householder(), m_bidiagonal(), m_isInitialized(false) {}\n\n    explicit UpperBidiagonalization(const MatrixType& matrix)\n      : m_householder(matrix.rows(), matrix.cols()),\n        m_bidiagonal(matrix.cols(), matrix.cols()),\n        m_isInitialized(false)\n    {\n      compute(matrix);\n    }\n    \n    UpperBidiagonalization& compute(const MatrixType& matrix);\n    UpperBidiagonalization& computeUnblocked(const MatrixType& matrix);\n    \n    const MatrixType& householder() const { return m_householder; }\n    const BidiagonalType& bidiagonal() const { return m_bidiagonal; }\n    \n    const HouseholderUSequenceType householderU() const\n    {\n      eigen_assert(m_isInitialized && \"UpperBidiagonalization is not initialized.\");\n      return HouseholderUSequenceType(m_householder, m_householder.diagonal().conjugate());\n    }\n\n    const HouseholderVSequenceType householderV() // const here gives nasty errors and i'm lazy\n    {\n      eigen_assert(m_isInitialized && \"UpperBidiagonalization is not initialized.\");\n      return HouseholderVSequenceType(m_householder.conjugate(), m_householder.const_derived().template diagonal<1>())\n             .setLength(m_householder.cols()-1)\n             .setShift(1);\n    }\n    \n  protected:\n    MatrixType m_householder;\n    BidiagonalType m_bidiagonal;\n    bool m_isInitialized;\n};\n\n// Standard upper bidiagonalization without fancy optimizations\n// This version should be faster for small matrix size\ntemplate<typename MatrixType>\nvoid upperbidiagonalization_inplace_unblocked(MatrixType& mat,\n                                              typename MatrixType::RealScalar *diagonal,\n                                              typename MatrixType::RealScalar *upper_diagonal,\n                                              typename MatrixType::Scalar* tempData = 0)\n{\n  typedef typename MatrixType::Scalar Scalar;\n\n  Index rows = mat.rows();\n  Index cols = mat.cols();\n\n  typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixType::MaxRowsAtCompileTime,1> TempType;\n  TempType tempVector;\n  if(tempData==0)\n  {\n    tempVector.resize(rows);\n    tempData = tempVector.data();\n  }\n\n  for (Index k = 0; /* breaks at k==cols-1 below */ ; ++k)\n  {\n    Index remainingRows = rows - k;\n    Index remainingCols = cols - k - 1;\n\n    // construct left householder transform in-place in A\n    mat.col(k).tail(remainingRows)\n       .makeHouseholderInPlace(mat.coeffRef(k,k), diagonal[k]);\n    // apply householder transform to remaining part of A on the left\n    mat.bottomRightCorner(remainingRows, remainingCols)\n       .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), mat.coeff(k,k), tempData);\n\n    if(k == cols-1) break;\n\n    // construct right householder transform in-place in mat\n    mat.row(k).tail(remainingCols)\n       .makeHouseholderInPlace(mat.coeffRef(k,k+1), upper_diagonal[k]);\n    // apply householder transform to remaining part of mat on the left\n    mat.bottomRightCorner(remainingRows-1, remainingCols)\n       .applyHouseholderOnTheRight(mat.row(k).tail(remainingCols-1).adjoint(), mat.coeff(k,k+1), tempData);\n  }\n}\n\n/** \\internal\n  * Helper routine for the block reduction to upper bidiagonal form.\n  *\n  * Let's partition the matrix A:\n  * \n  *      | A00 A01 |\n  *  A = |         |\n  *      | A10 A11 |\n  *\n  * This function reduces to bidiagonal form the left \\c rows x \\a blockSize vertical panel [A00/A10]\n  * and the \\a blockSize x \\c cols horizontal panel [A00 A01] of the matrix \\a A. The bottom-right block A11\n  * is updated using matrix-matrix products:\n  *   A22 -= V * Y^T - X * U^T\n  * where V and U contains the left and right Householder vectors. U and V are stored in A10, and A01\n  * respectively, and the update matrices X and Y are computed during the reduction.\n  * \n  */\ntemplate<typename MatrixType>\nvoid upperbidiagonalization_blocked_helper(MatrixType& A,\n                                           typename MatrixType::RealScalar *diagonal,\n                                           typename MatrixType::RealScalar *upper_diagonal,\n                                           Index bs,\n                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic,\n                                                      traits<MatrixType>::Flags & RowMajorBit> > X,\n                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic,\n                                                      traits<MatrixType>::Flags & RowMajorBit> > Y)\n{\n  typedef typename MatrixType::Scalar Scalar;\n  typedef typename MatrixType::RealScalar RealScalar;\n  typedef typename NumTraits<RealScalar>::Literal Literal;\n  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };\n  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;\n  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;\n  typedef Ref<Matrix<Scalar, Dynamic, 1>, 0, ColInnerStride>    SubColumnType;\n  typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, RowInnerStride>    SubRowType;\n  typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder > > SubMatType;\n  \n  Index brows = A.rows();\n  Index bcols = A.cols();\n\n  Scalar tau_u, tau_u_prev(0), tau_v;\n\n  for(Index k = 0; k < bs; ++k)\n  {\n    Index remainingRows = brows - k;\n    Index remainingCols = bcols - k - 1;\n\n    SubMatType X_k1( X.block(k,0, remainingRows,k) );\n    SubMatType V_k1( A.block(k,0, remainingRows,k) );\n\n    // 1 - update the k-th column of A\n    SubColumnType v_k = A.col(k).tail(remainingRows);\n          v_k -= V_k1 * Y.row(k).head(k).adjoint();\n    if(k) v_k -= X_k1 * A.col(k).head(k);\n    \n    // 2 - construct left Householder transform in-place\n    v_k.makeHouseholderInPlace(tau_v, diagonal[k]);\n       \n    if(k+1<bcols)\n    {\n      SubMatType Y_k  ( Y.block(k+1,0, remainingCols, k+1) );\n      SubMatType U_k1 ( A.block(0,k+1, k,remainingCols) );\n      \n      // this eases the application of Householder transforAions\n      // A(k,k) will store tau_v later\n      A(k,k) = Scalar(1);\n\n      // 3 - Compute y_k^T = tau_v * ( A^T*v_k - Y_k-1*V_k-1^T*v_k - U_k-1*X_k-1^T*v_k )\n      {\n        SubColumnType y_k( Y.col(k).tail(remainingCols) );\n        \n        // let's use the beginning of column k of Y as a temporary vector\n        SubColumnType tmp( Y.col(k).head(k) );\n        y_k.noalias()  = A.block(k,k+1, remainingRows,remainingCols).adjoint() * v_k; // bottleneck\n        tmp.noalias()  = V_k1.adjoint()  * v_k;\n        y_k.noalias() -= Y_k.leftCols(k) * tmp;\n        tmp.noalias()  = X_k1.adjoint()  * v_k;\n        y_k.noalias() -= U_k1.adjoint()  * tmp;\n        y_k *= numext::conj(tau_v);\n      }\n\n      // 4 - update k-th row of A (it will become u_k)\n      SubRowType u_k( A.row(k).tail(remainingCols) );\n      u_k = u_k.conjugate();\n      {\n        u_k -= Y_k * A.row(k).head(k+1).adjoint();\n        if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();\n      }\n\n      // 5 - construct right Householder transform in-place\n      u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);\n\n      // this eases the application of Householder transformations\n      // A(k,k+1) will store tau_u later\n      A(k,k+1) = Scalar(1);\n\n      // 6 - Compute x_k = tau_u * ( A*u_k - X_k-1*U_k-1^T*u_k - V_k*Y_k^T*u_k )\n      {\n        SubColumnType x_k ( X.col(k).tail(remainingRows-1) );\n        \n        // let's use the beginning of column k of X as a temporary vectors\n        // note that tmp0 and tmp1 overlaps\n        SubColumnType tmp0 ( X.col(k).head(k) ),\n                      tmp1 ( X.col(k).head(k+1) );\n                    \n        x_k.noalias()   = A.block(k+1,k+1, remainingRows-1,remainingCols) * u_k.transpose(); // bottleneck\n        tmp0.noalias()  = U_k1 * u_k.transpose();\n        x_k.noalias()  -= X_k1.bottomRows(remainingRows-1) * tmp0;\n        tmp1.noalias()  = Y_k.adjoint() * u_k.transpose();\n        x_k.noalias()  -= A.block(k+1,0, remainingRows-1,k+1) * tmp1;\n        x_k *= numext::conj(tau_u);\n        tau_u = numext::conj(tau_u);\n        u_k = u_k.conjugate();\n      }\n\n      if(k>0) A.coeffRef(k-1,k) = tau_u_prev;\n      tau_u_prev = tau_u;\n    }\n    else\n      A.coeffRef(k-1,k) = tau_u_prev;\n\n    A.coeffRef(k,k) = tau_v;\n  }\n  \n  if(bs<bcols)\n    A.coeffRef(bs-1,bs) = tau_u_prev;\n\n  // update A22\n  if(bcols>bs && brows>bs)\n  {\n    SubMatType A11( A.bottomRightCorner(brows-bs,bcols-bs) );\n    SubMatType A10( A.block(bs,0, brows-bs,bs) );\n    SubMatType A01( A.block(0,bs, bs,bcols-bs) );\n    Scalar tmp = A01(bs-1,0);\n    A01(bs-1,0) = Literal(1);\n    A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();\n    A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;\n    A01(bs-1,0) = tmp;\n  }\n}\n\n/** \\internal\n  *\n  * Implementation of a block-bidiagonal reduction.\n  * It is based on the following paper:\n  *   The Design of a Parallel Dense Linear Algebra Software Library: Reduction to Hessenberg, Tridiagonal, and Bidiagonal Form.\n  *   by Jaeyoung Choi, Jack J. Dongarra, David W. Walker. (1995)\n  *   section 3.3\n  */\ntemplate<typename MatrixType, typename BidiagType>\nvoid upperbidiagonalization_inplace_blocked(MatrixType& A, BidiagType& bidiagonal,\n                                            Index maxBlockSize=32,\n                                            typename MatrixType::Scalar* /*tempData*/ = 0)\n{\n  typedef typename MatrixType::Scalar Scalar;\n  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;\n\n  Index rows = A.rows();\n  Index cols = A.cols();\n  Index size = (std::min)(rows, cols);\n\n  // X and Y are work space\n  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };\n  Matrix<Scalar,\n         MatrixType::RowsAtCompileTime,\n         Dynamic,\n         StorageOrder,\n         MatrixType::MaxRowsAtCompileTime> X(rows,maxBlockSize);\n  Matrix<Scalar,\n         MatrixType::ColsAtCompileTime,\n         Dynamic,\n         StorageOrder,\n         MatrixType::MaxColsAtCompileTime> Y(cols,maxBlockSize);\n  Index blockSize = (std::min)(maxBlockSize,size);\n\n  Index k = 0;\n  for(k = 0; k < size; k += blockSize)\n  {\n    Index bs = (std::min)(size-k,blockSize);  // actual size of the block\n    Index brows = rows - k;                   // rows of the block\n    Index bcols = cols - k;                   // columns of the block\n\n    // partition the matrix A:\n    // \n    //      | A00 A01 A02 |\n    //      |             |\n    // A  = | A10 A11 A12 |\n    //      |             |\n    //      | A20 A21 A22 |\n    //\n    // where A11 is a bs x bs diagonal block,\n    // and let:\n    //      | A11 A12 |\n    //  B = |         |\n    //      | A21 A22 |\n\n    BlockType B = A.block(k,k,brows,bcols);\n    \n    // This stage performs the bidiagonalization of A11, A21, A12, and updating of A22.\n    // Finally, the algorithm continue on the updated A22.\n    //\n    // However, if B is too small, or A22 empty, then let's use an unblocked strategy\n    if(k+bs==cols || bcols<48) // somewhat arbitrary threshold\n    {\n      upperbidiagonalization_inplace_unblocked(B,\n                                               &(bidiagonal.template diagonal<0>().coeffRef(k)),\n                                               &(bidiagonal.template diagonal<1>().coeffRef(k)),\n                                               X.data()\n                                              );\n      break; // We're done\n    }\n    else\n    {\n      upperbidiagonalization_blocked_helper<BlockType>( B,\n                                                        &(bidiagonal.template diagonal<0>().coeffRef(k)),\n                                                        &(bidiagonal.template diagonal<1>().coeffRef(k)),\n                                                        bs,\n                                                        X.topLeftCorner(brows,bs),\n                                                        Y.topLeftCorner(bcols,bs)\n                                                      );\n    }\n  }\n}\n\ntemplate<typename _MatrixType>\nUpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::computeUnblocked(const _MatrixType& matrix)\n{\n  Index rows = matrix.rows();\n  Index cols = matrix.cols();\n  EIGEN_ONLY_USED_FOR_DEBUG(cols);\n\n  eigen_assert(rows >= cols && \"UpperBidiagonalization is only for Arices satisfying rows>=cols.\");\n\n  m_householder = matrix;\n\n  ColVectorType temp(rows);\n\n  upperbidiagonalization_inplace_unblocked(m_householder,\n                                           &(m_bidiagonal.template diagonal<0>().coeffRef(0)),\n                                           &(m_bidiagonal.template diagonal<1>().coeffRef(0)),\n                                           temp.data());\n\n  m_isInitialized = true;\n  return *this;\n}\n\ntemplate<typename _MatrixType>\nUpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::compute(const _MatrixType& matrix)\n{\n  Index rows = matrix.rows();\n  Index cols = matrix.cols();\n  EIGEN_ONLY_USED_FOR_DEBUG(rows);\n  EIGEN_ONLY_USED_FOR_DEBUG(cols);\n\n  eigen_assert(rows >= cols && \"UpperBidiagonalization is only for Arices satisfying rows>=cols.\");\n\n  m_householder = matrix;\n  upperbidiagonalization_inplace_blocked(m_householder, m_bidiagonal);\n            \n  m_isInitialized = true;\n  return *this;\n}\n\n#if 0\n/** \\return the Householder QR decomposition of \\c *this.\n  *\n  * \\sa class Bidiagonalization\n  */\ntemplate<typename Derived>\nconst UpperBidiagonalization<typename MatrixBase<Derived>::PlainObject>\nMatrixBase<Derived>::bidiagonalization() const\n{\n  return UpperBidiagonalization<PlainObject>(eval());\n}\n#endif\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_BIDIAGONALIZATION_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCholesky/SimplicialCholesky.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SIMPLICIAL_CHOLESKY_H\n#define EIGEN_SIMPLICIAL_CHOLESKY_H\n\nnamespace Eigen { \n\nenum SimplicialCholeskyMode {\n  SimplicialCholeskyLLT,\n  SimplicialCholeskyLDLT\n};\n\nnamespace internal {\n  template<typename CholMatrixType, typename InputMatrixType>\n  struct simplicial_cholesky_grab_input {\n    typedef CholMatrixType const * ConstCholMatrixPtr;\n    static void run(const InputMatrixType& input, ConstCholMatrixPtr &pmat, CholMatrixType &tmp)\n    {\n      tmp = input;\n      pmat = &tmp;\n    }\n  };\n  \n  template<typename MatrixType>\n  struct simplicial_cholesky_grab_input<MatrixType,MatrixType> {\n    typedef MatrixType const * ConstMatrixPtr;\n    static void run(const MatrixType& input, ConstMatrixPtr &pmat, MatrixType &/*tmp*/)\n    {\n      pmat = &input;\n    }\n  };\n} // end namespace internal\n\n/** \\ingroup SparseCholesky_Module\n  * \\brief A base class for direct sparse Cholesky factorizations\n  *\n  * This is a base class for LL^T and LDL^T Cholesky factorizations of sparse matrices that are\n  * selfadjoint and positive definite. These factorizations allow for solving A.X = B where\n  * X and B can be either dense or sparse.\n  * \n  * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization\n  * such that the factorized matrix is P A P^-1.\n  *\n  * \\tparam Derived the type of the derived class, that is the actual factorization type.\n  *\n  */\ntemplate<typename Derived>\nclass SimplicialCholeskyBase : public SparseSolverBase<Derived>\n{\n    typedef SparseSolverBase<Derived> Base;\n    using Base::m_isInitialized;\n    \n  public:\n    typedef typename internal::traits<Derived>::MatrixType MatrixType;\n    typedef typename internal::traits<Derived>::OrderingType OrderingType;\n    enum { UpLo = internal::traits<Derived>::UpLo };\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> CholMatrixType;\n    typedef CholMatrixType const * ConstCholMatrixPtr;\n    typedef Matrix<Scalar,Dynamic,1> VectorType;\n    typedef Matrix<StorageIndex,Dynamic,1> VectorI;\n\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n  public:\n    \n    using Base::derived;\n\n    /** Default constructor */\n    SimplicialCholeskyBase()\n      : m_info(Success),\n        m_factorizationIsOk(false),\n        m_analysisIsOk(false),\n        m_shiftOffset(0),\n        m_shiftScale(1)\n    {}\n\n    explicit SimplicialCholeskyBase(const MatrixType& matrix)\n      : m_info(Success),\n        m_factorizationIsOk(false),\n        m_analysisIsOk(false),\n        m_shiftOffset(0),\n        m_shiftScale(1)\n    {\n      derived().compute(matrix);\n    }\n\n    ~SimplicialCholeskyBase()\n    {\n    }\n\n    Derived& derived() { return *static_cast<Derived*>(this); }\n    const Derived& derived() const { return *static_cast<const Derived*>(this); }\n    \n    inline Index cols() const { return m_matrix.cols(); }\n    inline Index rows() const { return m_matrix.rows(); }\n    \n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n    \n    /** \\returns the permutation P\n      * \\sa permutationPinv() */\n    const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& permutationP() const\n    { return m_P; }\n    \n    /** \\returns the inverse P^-1 of the permutation P\n      * \\sa permutationP() */\n    const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& permutationPinv() const\n    { return m_Pinv; }\n\n    /** Sets the shift parameters that will be used to adjust the diagonal coefficients during the numerical factorization.\n      *\n      * During the numerical factorization, the diagonal coefficients are transformed by the following linear model:\\n\n      * \\c d_ii = \\a offset + \\a scale * \\c d_ii\n      *\n      * The default is the identity transformation with \\a offset=0, and \\a scale=1.\n      *\n      * \\returns a reference to \\c *this.\n      */\n    Derived& setShift(const RealScalar& offset, const RealScalar& scale = 1)\n    {\n      m_shiftOffset = offset;\n      m_shiftScale = scale;\n      return derived();\n    }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal */\n    template<typename Stream>\n    void dumpMemory(Stream& s)\n    {\n      int total = 0;\n      s << \"  L:        \" << ((total+=(m_matrix.cols()+1) * sizeof(int) + m_matrix.nonZeros()*(sizeof(int)+sizeof(Scalar))) >> 20) << \"Mb\" << \"\\n\";\n      s << \"  diag:     \" << ((total+=m_diag.size() * sizeof(Scalar)) >> 20) << \"Mb\" << \"\\n\";\n      s << \"  tree:     \" << ((total+=m_parent.size() * sizeof(int)) >> 20) << \"Mb\" << \"\\n\";\n      s << \"  nonzeros: \" << ((total+=m_nonZerosPerCol.size() * sizeof(int)) >> 20) << \"Mb\" << \"\\n\";\n      s << \"  perm:     \" << ((total+=m_P.size() * sizeof(int)) >> 20) << \"Mb\" << \"\\n\";\n      s << \"  perm^-1:  \" << ((total+=m_Pinv.size() * sizeof(int)) >> 20) << \"Mb\" << \"\\n\";\n      s << \"  TOTAL:    \" << (total>> 20) << \"Mb\" << \"\\n\";\n    }\n\n    /** \\internal */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const\n    {\n      eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()\");\n      eigen_assert(m_matrix.rows()==b.rows());\n\n      if(m_info!=Success)\n        return;\n\n      if(m_P.size()>0)\n        dest = m_P * b;\n      else\n        dest = b;\n\n      if(m_matrix.nonZeros()>0) // otherwise L==I\n        derived().matrixL().solveInPlace(dest);\n\n      if(m_diag.size()>0)\n        dest = m_diag.asDiagonal().inverse() * dest;\n\n      if (m_matrix.nonZeros()>0) // otherwise U==I\n        derived().matrixU().solveInPlace(dest);\n\n      if(m_P.size()>0)\n        dest = m_Pinv * dest;\n    }\n    \n    template<typename Rhs,typename Dest>\n    void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const\n    {\n      internal::solve_sparse_through_dense_panels(derived(), b, dest);\n    }\n\n#endif // EIGEN_PARSED_BY_DOXYGEN\n\n  protected:\n    \n    /** Computes the sparse Cholesky decomposition of \\a matrix */\n    template<bool DoLDLT>\n    void compute(const MatrixType& matrix)\n    {\n      eigen_assert(matrix.rows()==matrix.cols());\n      Index size = matrix.cols();\n      CholMatrixType tmp(size,size);\n      ConstCholMatrixPtr pmat;\n      ordering(matrix, pmat, tmp);\n      analyzePattern_preordered(*pmat, DoLDLT);\n      factorize_preordered<DoLDLT>(*pmat);\n    }\n    \n    template<bool DoLDLT>\n    void factorize(const MatrixType& a)\n    {\n      eigen_assert(a.rows()==a.cols());\n      Index size = a.cols();\n      CholMatrixType tmp(size,size);\n      ConstCholMatrixPtr pmat;\n      \n      if(m_P.size() == 0 && (int(UpLo) & int(Upper)) == Upper)\n      {\n        // If there is no ordering, try to directly use the input matrix without any copy\n        internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, tmp);\n      }\n      else\n      {\n        tmp.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);\n        pmat = &tmp;\n      }\n      \n      factorize_preordered<DoLDLT>(*pmat);\n    }\n\n    template<bool DoLDLT>\n    void factorize_preordered(const CholMatrixType& a);\n\n    void analyzePattern(const MatrixType& a, bool doLDLT)\n    {\n      eigen_assert(a.rows()==a.cols());\n      Index size = a.cols();\n      CholMatrixType tmp(size,size);\n      ConstCholMatrixPtr pmat;\n      ordering(a, pmat, tmp);\n      analyzePattern_preordered(*pmat,doLDLT);\n    }\n    void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT);\n    \n    void ordering(const MatrixType& a, ConstCholMatrixPtr &pmat, CholMatrixType& ap);\n\n    /** keeps off-diagonal entries; drops diagonal entries */\n    struct keep_diag {\n      inline bool operator() (const Index& row, const Index& col, const Scalar&) const\n      {\n        return row!=col;\n      }\n    };\n\n    mutable ComputationInfo m_info;\n    bool m_factorizationIsOk;\n    bool m_analysisIsOk;\n    \n    CholMatrixType m_matrix;\n    VectorType m_diag;                                // the diagonal coefficients (LDLT mode)\n    VectorI m_parent;                                 // elimination tree\n    VectorI m_nonZerosPerCol;\n    PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_P;     // the permutation\n    PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_Pinv;  // the inverse permutation\n\n    RealScalar m_shiftOffset;\n    RealScalar m_shiftScale;\n};\n\ntemplate<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::StorageIndex> > class SimplicialLLT;\ntemplate<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::StorageIndex> > class SimplicialLDLT;\ntemplate<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::StorageIndex> > class SimplicialCholesky;\n\nnamespace internal {\n\ntemplate<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialLLT<_MatrixType,_UpLo,_Ordering> >\n{\n  typedef _MatrixType MatrixType;\n  typedef _Ordering OrderingType;\n  enum { UpLo = _UpLo };\n  typedef typename MatrixType::Scalar                         Scalar;\n  typedef typename MatrixType::StorageIndex                   StorageIndex;\n  typedef SparseMatrix<Scalar, ColMajor, StorageIndex>        CholMatrixType;\n  typedef TriangularView<const CholMatrixType, Eigen::Lower>  MatrixL;\n  typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::Upper>   MatrixU;\n  static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }\n  static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }\n};\n\ntemplate<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >\n{\n  typedef _MatrixType MatrixType;\n  typedef _Ordering OrderingType;\n  enum { UpLo = _UpLo };\n  typedef typename MatrixType::Scalar                             Scalar;\n  typedef typename MatrixType::StorageIndex                       StorageIndex;\n  typedef SparseMatrix<Scalar, ColMajor, StorageIndex>            CholMatrixType;\n  typedef TriangularView<const CholMatrixType, Eigen::UnitLower>  MatrixL;\n  typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;\n  static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }\n  static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }\n};\n\ntemplate<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >\n{\n  typedef _MatrixType MatrixType;\n  typedef _Ordering OrderingType;\n  enum { UpLo = _UpLo };\n};\n\n}\n\n/** \\ingroup SparseCholesky_Module\n  * \\class SimplicialLLT\n  * \\brief A direct sparse LLT Cholesky factorizations\n  *\n  * This class provides a LL^T Cholesky factorizations of sparse matrices that are\n  * selfadjoint and positive definite. The factorization allows for solving A.X = B where\n  * X and B can be either dense or sparse.\n  * \n  * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization\n  * such that the factorized matrix is P A P^-1.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower\n  *               or Upper. Default is Lower.\n  * \\tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering\n  */\ntemplate<typename _MatrixType, int _UpLo, typename _Ordering>\n    class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<_MatrixType,_UpLo,_Ordering> >\n{\npublic:\n    typedef _MatrixType MatrixType;\n    enum { UpLo = _UpLo };\n    typedef SimplicialCholeskyBase<SimplicialLLT> Base;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,Index> CholMatrixType;\n    typedef Matrix<Scalar,Dynamic,1> VectorType;\n    typedef internal::traits<SimplicialLLT> Traits;\n    typedef typename Traits::MatrixL  MatrixL;\n    typedef typename Traits::MatrixU  MatrixU;\npublic:\n    /** Default constructor */\n    SimplicialLLT() : Base() {}\n    /** Constructs and performs the LLT factorization of \\a matrix */\n    explicit SimplicialLLT(const MatrixType& matrix)\n        : Base(matrix) {}\n\n    /** \\returns an expression of the factor L */\n    inline const MatrixL matrixL() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial LLT not factorized\");\n        return Traits::getL(Base::m_matrix);\n    }\n\n    /** \\returns an expression of the factor U (= L^*) */\n    inline const MatrixU matrixU() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial LLT not factorized\");\n        return Traits::getU(Base::m_matrix);\n    }\n    \n    /** Computes the sparse Cholesky decomposition of \\a matrix */\n    SimplicialLLT& compute(const MatrixType& matrix)\n    {\n      Base::template compute<false>(matrix);\n      return *this;\n    }\n\n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      *\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& a)\n    {\n      Base::analyzePattern(a, false);\n    }\n\n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    void factorize(const MatrixType& a)\n    {\n      Base::template factorize<false>(a);\n    }\n\n    /** \\returns the determinant of the underlying matrix from the current factorization */\n    Scalar determinant() const\n    {\n      Scalar detL = Base::m_matrix.diagonal().prod();\n      return numext::abs2(detL);\n    }\n};\n\n/** \\ingroup SparseCholesky_Module\n  * \\class SimplicialLDLT\n  * \\brief A direct sparse LDLT Cholesky factorizations without square root.\n  *\n  * This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are\n  * selfadjoint and positive definite. The factorization allows for solving A.X = B where\n  * X and B can be either dense or sparse.\n  * \n  * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization\n  * such that the factorized matrix is P A P^-1.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  * \\tparam _UpLo the triangular part that will be used for the computations. It can be Lower\n  *               or Upper. Default is Lower.\n  * \\tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering\n  */\ntemplate<typename _MatrixType, int _UpLo, typename _Ordering>\n    class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >\n{\npublic:\n    typedef _MatrixType MatrixType;\n    enum { UpLo = _UpLo };\n    typedef SimplicialCholeskyBase<SimplicialLDLT> Base;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> CholMatrixType;\n    typedef Matrix<Scalar,Dynamic,1> VectorType;\n    typedef internal::traits<SimplicialLDLT> Traits;\n    typedef typename Traits::MatrixL  MatrixL;\n    typedef typename Traits::MatrixU  MatrixU;\npublic:\n    /** Default constructor */\n    SimplicialLDLT() : Base() {}\n\n    /** Constructs and performs the LLT factorization of \\a matrix */\n    explicit SimplicialLDLT(const MatrixType& matrix)\n        : Base(matrix) {}\n\n    /** \\returns a vector expression of the diagonal D */\n    inline const VectorType vectorD() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial LDLT not factorized\");\n        return Base::m_diag;\n    }\n    /** \\returns an expression of the factor L */\n    inline const MatrixL matrixL() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial LDLT not factorized\");\n        return Traits::getL(Base::m_matrix);\n    }\n\n    /** \\returns an expression of the factor U (= L^*) */\n    inline const MatrixU matrixU() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial LDLT not factorized\");\n        return Traits::getU(Base::m_matrix);\n    }\n\n    /** Computes the sparse Cholesky decomposition of \\a matrix */\n    SimplicialLDLT& compute(const MatrixType& matrix)\n    {\n      Base::template compute<true>(matrix);\n      return *this;\n    }\n    \n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      *\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& a)\n    {\n      Base::analyzePattern(a, true);\n    }\n\n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    void factorize(const MatrixType& a)\n    {\n      Base::template factorize<true>(a);\n    }\n\n    /** \\returns the determinant of the underlying matrix from the current factorization */\n    Scalar determinant() const\n    {\n      return Base::m_diag.prod();\n    }\n};\n\n/** \\deprecated use SimplicialLDLT or class SimplicialLLT\n  * \\ingroup SparseCholesky_Module\n  * \\class SimplicialCholesky\n  *\n  * \\sa class SimplicialLDLT, class SimplicialLLT\n  */\ntemplate<typename _MatrixType, int _UpLo, typename _Ordering>\n    class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >\n{\npublic:\n    typedef _MatrixType MatrixType;\n    enum { UpLo = _UpLo };\n    typedef SimplicialCholeskyBase<SimplicialCholesky> Base;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> CholMatrixType;\n    typedef Matrix<Scalar,Dynamic,1> VectorType;\n    typedef internal::traits<SimplicialCholesky> Traits;\n    typedef internal::traits<SimplicialLDLT<MatrixType,UpLo> > LDLTTraits;\n    typedef internal::traits<SimplicialLLT<MatrixType,UpLo>  > LLTTraits;\n  public:\n    SimplicialCholesky() : Base(), m_LDLT(true) {}\n\n    explicit SimplicialCholesky(const MatrixType& matrix)\n      : Base(), m_LDLT(true)\n    {\n      compute(matrix);\n    }\n\n    SimplicialCholesky& setMode(SimplicialCholeskyMode mode)\n    {\n      switch(mode)\n      {\n      case SimplicialCholeskyLLT:\n        m_LDLT = false;\n        break;\n      case SimplicialCholeskyLDLT:\n        m_LDLT = true;\n        break;\n      default:\n        break;\n      }\n\n      return *this;\n    }\n\n    inline const VectorType vectorD() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial Cholesky not factorized\");\n        return Base::m_diag;\n    }\n    inline const CholMatrixType rawMatrix() const {\n        eigen_assert(Base::m_factorizationIsOk && \"Simplicial Cholesky not factorized\");\n        return Base::m_matrix;\n    }\n    \n    /** Computes the sparse Cholesky decomposition of \\a matrix */\n    SimplicialCholesky& compute(const MatrixType& matrix)\n    {\n      if(m_LDLT)\n        Base::template compute<true>(matrix);\n      else\n        Base::template compute<false>(matrix);\n      return *this;\n    }\n\n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      *\n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& a)\n    {\n      Base::analyzePattern(a, m_LDLT);\n    }\n\n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    void factorize(const MatrixType& a)\n    {\n      if(m_LDLT)\n        Base::template factorize<true>(a);\n      else\n        Base::template factorize<false>(a);\n    }\n\n    /** \\internal */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const\n    {\n      eigen_assert(Base::m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()\");\n      eigen_assert(Base::m_matrix.rows()==b.rows());\n\n      if(Base::m_info!=Success)\n        return;\n\n      if(Base::m_P.size()>0)\n        dest = Base::m_P * b;\n      else\n        dest = b;\n\n      if(Base::m_matrix.nonZeros()>0) // otherwise L==I\n      {\n        if(m_LDLT)\n          LDLTTraits::getL(Base::m_matrix).solveInPlace(dest);\n        else\n          LLTTraits::getL(Base::m_matrix).solveInPlace(dest);\n      }\n\n      if(Base::m_diag.size()>0)\n        dest = Base::m_diag.real().asDiagonal().inverse() * dest;\n\n      if (Base::m_matrix.nonZeros()>0) // otherwise I==I\n      {\n        if(m_LDLT)\n          LDLTTraits::getU(Base::m_matrix).solveInPlace(dest);\n        else\n          LLTTraits::getU(Base::m_matrix).solveInPlace(dest);\n      }\n\n      if(Base::m_P.size()>0)\n        dest = Base::m_Pinv * dest;\n    }\n    \n    /** \\internal */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const\n    {\n      internal::solve_sparse_through_dense_panels(*this, b, dest);\n    }\n    \n    Scalar determinant() const\n    {\n      if(m_LDLT)\n      {\n        return Base::m_diag.prod();\n      }\n      else\n      {\n        Scalar detL = Diagonal<const CholMatrixType>(Base::m_matrix).prod();\n        return numext::abs2(detL);\n      }\n    }\n    \n  protected:\n    bool m_LDLT;\n};\n\ntemplate<typename Derived>\nvoid SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMatrixPtr &pmat, CholMatrixType& ap)\n{\n  eigen_assert(a.rows()==a.cols());\n  const Index size = a.rows();\n  pmat = &ap;\n  // Note that ordering methods compute the inverse permutation\n  if(!internal::is_same<OrderingType,NaturalOrdering<Index> >::value)\n  {\n    {\n      CholMatrixType C;\n      C = a.template selfadjointView<UpLo>();\n      \n      OrderingType ordering;\n      ordering(C,m_Pinv);\n    }\n\n    if(m_Pinv.size()>0) m_P = m_Pinv.inverse();\n    else                m_P.resize(0);\n    \n    ap.resize(size,size);\n    ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);\n  }\n  else\n  {\n    m_Pinv.resize(0);\n    m_P.resize(0);\n    if(int(UpLo)==int(Lower) || MatrixType::IsRowMajor)\n    {\n      // we have to transpose the lower part to to the upper one\n      ap.resize(size,size);\n      ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>();\n    }\n    else\n      internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, ap);\n  }  \n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SIMPLICIAL_CHOLESKY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/*\nNOTE: these functions have been adapted from the LDL library:\n\nLDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.\n\nThe author of LDL, Timothy A. Davis., has executed a license with Google LLC\nto permit distribution of this code and derivative works as part of Eigen under\nthe Mozilla Public License v. 2.0, as stated at the top of this file.\n */\n\n#ifndef EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H\n#define EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H\n\nnamespace Eigen {\n\ntemplate<typename Derived>\nvoid SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)\n{\n  const StorageIndex size = StorageIndex(ap.rows());\n  m_matrix.resize(size, size);\n  m_parent.resize(size);\n  m_nonZerosPerCol.resize(size);\n\n  ei_declare_aligned_stack_constructed_variable(StorageIndex, tags, size, 0);\n\n  for(StorageIndex k = 0; k < size; ++k)\n  {\n    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */\n    m_parent[k] = -1;             /* parent of k is not yet known */\n    tags[k] = k;                  /* mark node k as visited */\n    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */\n    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)\n    {\n      StorageIndex i = it.index();\n      if(i < k)\n      {\n        /* follow path from i to root of etree, stop at flagged node */\n        for(; tags[i] != k; i = m_parent[i])\n        {\n          /* find parent of i if not yet determined */\n          if (m_parent[i] == -1)\n            m_parent[i] = k;\n          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */\n          tags[i] = k;                  /* mark i as visited */\n        }\n      }\n    }\n  }\n\n  /* construct Lp index array from m_nonZerosPerCol column counts */\n  StorageIndex* Lp = m_matrix.outerIndexPtr();\n  Lp[0] = 0;\n  for(StorageIndex k = 0; k < size; ++k)\n    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);\n\n  m_matrix.resizeNonZeros(Lp[size]);\n\n  m_isInitialized     = true;\n  m_info              = Success;\n  m_analysisIsOk      = true;\n  m_factorizationIsOk = false;\n}\n\n\ntemplate<typename Derived>\ntemplate<bool DoLDLT>\nvoid SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)\n{\n  using std::sqrt;\n\n  eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n  eigen_assert(ap.rows()==ap.cols());\n  eigen_assert(m_parent.size()==ap.rows());\n  eigen_assert(m_nonZerosPerCol.size()==ap.rows());\n\n  const StorageIndex size = StorageIndex(ap.rows());\n  const StorageIndex* Lp = m_matrix.outerIndexPtr();\n  StorageIndex* Li = m_matrix.innerIndexPtr();\n  Scalar* Lx = m_matrix.valuePtr();\n\n  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);\n  ei_declare_aligned_stack_constructed_variable(StorageIndex,  pattern, size, 0);\n  ei_declare_aligned_stack_constructed_variable(StorageIndex,  tags, size, 0);\n\n  bool ok = true;\n  m_diag.resize(DoLDLT ? size : 0);\n\n  for(StorageIndex k = 0; k < size; ++k)\n  {\n    // compute nonzero pattern of kth row of L, in topological order\n    y[k] = Scalar(0);                     // Y(0:k) is now all zero\n    StorageIndex top = size;               // stack for pattern is empty\n    tags[k] = k;                    // mark node k as visited\n    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L\n    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)\n    {\n      StorageIndex i = it.index();\n      if(i <= k)\n      {\n        y[i] += numext::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */\n        Index len;\n        for(len = 0; tags[i] != k; i = m_parent[i])\n        {\n          pattern[len++] = i;     /* L(k,i) is nonzero */\n          tags[i] = k;            /* mark i as visited */\n        }\n        while(len > 0)\n          pattern[--top] = pattern[--len];\n      }\n    }\n\n    /* compute numerical values kth row of L (a sparse triangular solve) */\n\n    RealScalar d = numext::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)\n    y[k] = Scalar(0);\n    for(; top < size; ++top)\n    {\n      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */\n      Scalar yi = y[i];             /* get and clear Y(i) */\n      y[i] = Scalar(0);\n\n      /* the nonzero entry L(k,i) */\n      Scalar l_ki;\n      if(DoLDLT)\n        l_ki = yi / numext::real(m_diag[i]);\n      else\n        yi = l_ki = yi / Lx[Lp[i]];\n\n      Index p2 = Lp[i] + m_nonZerosPerCol[i];\n      Index p;\n      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)\n        y[Li[p]] -= numext::conj(Lx[p]) * yi;\n      d -= numext::real(l_ki * numext::conj(yi));\n      Li[p] = k;                          /* store L(k,i) in column form of L */\n      Lx[p] = l_ki;\n      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */\n    }\n    if(DoLDLT)\n    {\n      m_diag[k] = d;\n      if(d == RealScalar(0))\n      {\n        ok = false;                         /* failure, D(k,k) is zero */\n        break;\n      }\n    }\n    else\n    {\n      Index p = Lp[k] + m_nonZerosPerCol[k]++;\n      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */\n      if(d <= RealScalar(0)) {\n        ok = false;              /* failure, matrix is not positive definite */\n        break;\n      }\n      Lx[p] = sqrt(d) ;\n    }\n  }\n\n  m_info = ok ? Success : NumericalIssue;\n  m_factorizationIsOk = true;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/AmbiVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_AMBIVECTOR_H\n#define EIGEN_AMBIVECTOR_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal\n  * Hybrid sparse/dense vector class designed for intensive read-write operations.\n  *\n  * See BasicSparseLLT and SparseProduct for usage examples.\n  */\ntemplate<typename _Scalar, typename _StorageIndex>\nclass AmbiVector\n{\n  public:\n    typedef _Scalar Scalar;\n    typedef _StorageIndex StorageIndex;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    explicit AmbiVector(Index size)\n      : m_buffer(0), m_zero(0), m_size(0), m_end(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)\n    {\n      resize(size);\n    }\n\n    void init(double estimatedDensity);\n    void init(int mode);\n\n    Index nonZeros() const;\n\n    /** Specifies a sub-vector to work on */\n    void setBounds(Index start, Index end) { m_start = convert_index(start); m_end = convert_index(end); }\n\n    void setZero();\n\n    void restart();\n    Scalar& coeffRef(Index i);\n    Scalar& coeff(Index i);\n\n    class Iterator;\n\n    ~AmbiVector() { delete[] m_buffer; }\n\n    void resize(Index size)\n    {\n      if (m_allocatedSize < size)\n        reallocate(size);\n      m_size = convert_index(size);\n    }\n\n    StorageIndex size() const { return m_size; }\n\n  protected:\n    StorageIndex convert_index(Index idx)\n    {\n      return internal::convert_index<StorageIndex>(idx);\n    }\n\n    void reallocate(Index size)\n    {\n      // if the size of the matrix is not too large, let's allocate a bit more than needed such\n      // that we can handle dense vector even in sparse mode.\n      delete[] m_buffer;\n      if (size<1000)\n      {\n        Index allocSize = (size * sizeof(ListEl) + sizeof(Scalar) - 1)/sizeof(Scalar);\n        m_allocatedElements = convert_index((allocSize*sizeof(Scalar))/sizeof(ListEl));\n        m_buffer = new Scalar[allocSize];\n      }\n      else\n      {\n        m_allocatedElements = convert_index((size*sizeof(Scalar))/sizeof(ListEl));\n        m_buffer = new Scalar[size];\n      }\n      m_size = convert_index(size);\n      m_start = 0;\n      m_end = m_size;\n    }\n\n    void reallocateSparse()\n    {\n      Index copyElements = m_allocatedElements;\n      m_allocatedElements = (std::min)(StorageIndex(m_allocatedElements*1.5),m_size);\n      Index allocSize = m_allocatedElements * sizeof(ListEl);\n      allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);\n      Scalar* newBuffer = new Scalar[allocSize];\n      std::memcpy(newBuffer,  m_buffer,  copyElements * sizeof(ListEl));\n      delete[] m_buffer;\n      m_buffer = newBuffer;\n    }\n\n  protected:\n    // element type of the linked list\n    struct ListEl\n    {\n      StorageIndex next;\n      StorageIndex index;\n      Scalar value;\n    };\n\n    // used to store data in both mode\n    Scalar* m_buffer;\n    Scalar m_zero;\n    StorageIndex m_size;\n    StorageIndex m_start;\n    StorageIndex m_end;\n    StorageIndex m_allocatedSize;\n    StorageIndex m_allocatedElements;\n    StorageIndex m_mode;\n\n    // linked list mode\n    StorageIndex m_llStart;\n    StorageIndex m_llCurrent;\n    StorageIndex m_llSize;\n};\n\n/** \\returns the number of non zeros in the current sub vector */\ntemplate<typename _Scalar,typename _StorageIndex>\nIndex AmbiVector<_Scalar,_StorageIndex>::nonZeros() const\n{\n  if (m_mode==IsSparse)\n    return m_llSize;\n  else\n    return m_end - m_start;\n}\n\ntemplate<typename _Scalar,typename _StorageIndex>\nvoid AmbiVector<_Scalar,_StorageIndex>::init(double estimatedDensity)\n{\n  if (estimatedDensity>0.1)\n    init(IsDense);\n  else\n    init(IsSparse);\n}\n\ntemplate<typename _Scalar,typename _StorageIndex>\nvoid AmbiVector<_Scalar,_StorageIndex>::init(int mode)\n{\n  m_mode = mode;\n  // This is only necessary in sparse mode, but we set these unconditionally to avoid some maybe-uninitialized warnings\n  // if (m_mode==IsSparse)\n  {\n    m_llSize = 0;\n    m_llStart = -1;\n  }\n}\n\n/** Must be called whenever we might perform a write access\n  * with an index smaller than the previous one.\n  *\n  * Don't worry, this function is extremely cheap.\n  */\ntemplate<typename _Scalar,typename _StorageIndex>\nvoid AmbiVector<_Scalar,_StorageIndex>::restart()\n{\n  m_llCurrent = m_llStart;\n}\n\n/** Set all coefficients of current subvector to zero */\ntemplate<typename _Scalar,typename _StorageIndex>\nvoid AmbiVector<_Scalar,_StorageIndex>::setZero()\n{\n  if (m_mode==IsDense)\n  {\n    for (Index i=m_start; i<m_end; ++i)\n      m_buffer[i] = Scalar(0);\n  }\n  else\n  {\n    eigen_assert(m_mode==IsSparse);\n    m_llSize = 0;\n    m_llStart = -1;\n  }\n}\n\ntemplate<typename _Scalar,typename _StorageIndex>\n_Scalar& AmbiVector<_Scalar,_StorageIndex>::coeffRef(Index i)\n{\n  if (m_mode==IsDense)\n    return m_buffer[i];\n  else\n  {\n    ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_buffer);\n    // TODO factorize the following code to reduce code generation\n    eigen_assert(m_mode==IsSparse);\n    if (m_llSize==0)\n    {\n      // this is the first element\n      m_llStart = 0;\n      m_llCurrent = 0;\n      ++m_llSize;\n      llElements[0].value = Scalar(0);\n      llElements[0].index = convert_index(i);\n      llElements[0].next = -1;\n      return llElements[0].value;\n    }\n    else if (i<llElements[m_llStart].index)\n    {\n      // this is going to be the new first element of the list\n      ListEl& el = llElements[m_llSize];\n      el.value = Scalar(0);\n      el.index = convert_index(i);\n      el.next = m_llStart;\n      m_llStart = m_llSize;\n      ++m_llSize;\n      m_llCurrent = m_llStart;\n      return el.value;\n    }\n    else\n    {\n      StorageIndex nextel = llElements[m_llCurrent].next;\n      eigen_assert(i>=llElements[m_llCurrent].index && \"you must call restart() before inserting an element with lower or equal index\");\n      while (nextel >= 0 && llElements[nextel].index<=i)\n      {\n        m_llCurrent = nextel;\n        nextel = llElements[nextel].next;\n      }\n\n      if (llElements[m_llCurrent].index==i)\n      {\n        // the coefficient already exists and we found it !\n        return llElements[m_llCurrent].value;\n      }\n      else\n      {\n        if (m_llSize>=m_allocatedElements)\n        {\n          reallocateSparse();\n          llElements = reinterpret_cast<ListEl*>(m_buffer);\n        }\n        eigen_internal_assert(m_llSize<m_allocatedElements && \"internal error: overflow in sparse mode\");\n        // let's insert a new coefficient\n        ListEl& el = llElements[m_llSize];\n        el.value = Scalar(0);\n        el.index = convert_index(i);\n        el.next = llElements[m_llCurrent].next;\n        llElements[m_llCurrent].next = m_llSize;\n        ++m_llSize;\n        return el.value;\n      }\n    }\n  }\n}\n\ntemplate<typename _Scalar,typename _StorageIndex>\n_Scalar& AmbiVector<_Scalar,_StorageIndex>::coeff(Index i)\n{\n  if (m_mode==IsDense)\n    return m_buffer[i];\n  else\n  {\n    ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_buffer);\n    eigen_assert(m_mode==IsSparse);\n    if ((m_llSize==0) || (i<llElements[m_llStart].index))\n    {\n      return m_zero;\n    }\n    else\n    {\n      Index elid = m_llStart;\n      while (elid >= 0 && llElements[elid].index<i)\n        elid = llElements[elid].next;\n\n      if (llElements[elid].index==i)\n        return llElements[m_llCurrent].value;\n      else\n        return m_zero;\n    }\n  }\n}\n\n/** Iterator over the nonzero coefficients */\ntemplate<typename _Scalar,typename _StorageIndex>\nclass AmbiVector<_Scalar,_StorageIndex>::Iterator\n{\n  public:\n    typedef _Scalar Scalar;\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    /** Default constructor\n      * \\param vec the vector on which we iterate\n      * \\param epsilon the minimal value used to prune zero coefficients.\n      * In practice, all coefficients having a magnitude smaller than \\a epsilon\n      * are skipped.\n      */\n    explicit Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0)\n      : m_vector(vec)\n    {\n      using std::abs;\n      m_epsilon = epsilon;\n      m_isDense = m_vector.m_mode==IsDense;\n      if (m_isDense)\n      {\n        m_currentEl = 0;   // this is to avoid a compilation warning\n        m_cachedValue = 0; // this is to avoid a compilation warning\n        m_cachedIndex = m_vector.m_start-1;\n        ++(*this);\n      }\n      else\n      {\n        ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_vector.m_buffer);\n        m_currentEl = m_vector.m_llStart;\n        while (m_currentEl>=0 && abs(llElements[m_currentEl].value)<=m_epsilon)\n          m_currentEl = llElements[m_currentEl].next;\n        if (m_currentEl<0)\n        {\n          m_cachedValue = 0; // this is to avoid a compilation warning\n          m_cachedIndex = -1;\n        }\n        else\n        {\n          m_cachedIndex = llElements[m_currentEl].index;\n          m_cachedValue = llElements[m_currentEl].value;\n        }\n      }\n    }\n\n    StorageIndex index() const { return m_cachedIndex; }\n    Scalar value() const { return m_cachedValue; }\n\n    operator bool() const { return m_cachedIndex>=0; }\n\n    Iterator& operator++()\n    {\n      using std::abs;\n      if (m_isDense)\n      {\n        do {\n          ++m_cachedIndex;\n        } while (m_cachedIndex<m_vector.m_end && abs(m_vector.m_buffer[m_cachedIndex])<=m_epsilon);\n        if (m_cachedIndex<m_vector.m_end)\n          m_cachedValue = m_vector.m_buffer[m_cachedIndex];\n        else\n          m_cachedIndex=-1;\n      }\n      else\n      {\n        ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_vector.m_buffer);\n        do {\n          m_currentEl = llElements[m_currentEl].next;\n        } while (m_currentEl>=0 && abs(llElements[m_currentEl].value)<=m_epsilon);\n        if (m_currentEl<0)\n        {\n          m_cachedIndex = -1;\n        }\n        else\n        {\n          m_cachedIndex = llElements[m_currentEl].index;\n          m_cachedValue = llElements[m_currentEl].value;\n        }\n      }\n      return *this;\n    }\n\n  protected:\n    const AmbiVector& m_vector; // the target vector\n    StorageIndex m_currentEl;   // the current element in sparse/linked-list mode\n    RealScalar m_epsilon;       // epsilon used to prune zero coefficients\n    StorageIndex m_cachedIndex; // current coordinate\n    Scalar m_cachedValue;       // current value\n    bool m_isDense;             // mode of the vector\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_AMBIVECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/CompressedStorage.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_COMPRESSED_STORAGE_H\n#define EIGEN_COMPRESSED_STORAGE_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\internal\n  * Stores a sparse set of values as a list of values and a list of indices.\n  *\n  */\ntemplate<typename _Scalar,typename _StorageIndex>\nclass CompressedStorage\n{\n  public:\n\n    typedef _Scalar Scalar;\n    typedef _StorageIndex StorageIndex;\n\n  protected:\n\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n  public:\n\n    CompressedStorage()\n      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)\n    {}\n\n    explicit CompressedStorage(Index size)\n      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)\n    {\n      resize(size);\n    }\n\n    CompressedStorage(const CompressedStorage& other)\n      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)\n    {\n      *this = other;\n    }\n\n    CompressedStorage& operator=(const CompressedStorage& other)\n    {\n      resize(other.size());\n      if(other.size()>0)\n      {\n        internal::smart_copy(other.m_values,  other.m_values  + m_size, m_values);\n        internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices);\n      }\n      return *this;\n    }\n\n    void swap(CompressedStorage& other)\n    {\n      std::swap(m_values, other.m_values);\n      std::swap(m_indices, other.m_indices);\n      std::swap(m_size, other.m_size);\n      std::swap(m_allocatedSize, other.m_allocatedSize);\n    }\n\n    ~CompressedStorage()\n    {\n      delete[] m_values;\n      delete[] m_indices;\n    }\n\n    void reserve(Index size)\n    {\n      Index newAllocatedSize = m_size + size;\n      if (newAllocatedSize > m_allocatedSize)\n        reallocate(newAllocatedSize);\n    }\n\n    void squeeze()\n    {\n      if (m_allocatedSize>m_size)\n        reallocate(m_size);\n    }\n\n    void resize(Index size, double reserveSizeFactor = 0)\n    {\n      if (m_allocatedSize<size)\n      {\n        Index realloc_size = (std::min<Index>)(NumTraits<StorageIndex>::highest(),  size + Index(reserveSizeFactor*double(size)));\n        if(realloc_size<size)\n          internal::throw_std_bad_alloc();\n        reallocate(realloc_size);\n      }\n      m_size = size;\n    }\n\n    void append(const Scalar& v, Index i)\n    {\n      Index id = m_size;\n      resize(m_size+1, 1);\n      m_values[id] = v;\n      m_indices[id] = internal::convert_index<StorageIndex>(i);\n    }\n\n    inline Index size() const { return m_size; }\n    inline Index allocatedSize() const { return m_allocatedSize; }\n    inline void clear() { m_size = 0; }\n\n    const Scalar* valuePtr() const { return m_values; }\n    Scalar* valuePtr() { return m_values; }\n    const StorageIndex* indexPtr() const { return m_indices; }\n    StorageIndex* indexPtr() { return m_indices; }\n\n    inline Scalar& value(Index i) { eigen_internal_assert(m_values!=0); return m_values[i]; }\n    inline const Scalar& value(Index i) const { eigen_internal_assert(m_values!=0); return m_values[i]; }\n\n    inline StorageIndex& index(Index i) { eigen_internal_assert(m_indices!=0); return m_indices[i]; }\n    inline const StorageIndex& index(Index i) const { eigen_internal_assert(m_indices!=0); return m_indices[i]; }\n\n    /** \\returns the largest \\c k such that for all \\c j in [0,k) index[\\c j]\\<\\a key */\n    inline Index searchLowerIndex(Index key) const\n    {\n      return searchLowerIndex(0, m_size, key);\n    }\n\n    /** \\returns the largest \\c k in [start,end) such that for all \\c j in [start,k) index[\\c j]\\<\\a key */\n    inline Index searchLowerIndex(Index start, Index end, Index key) const\n    {\n      while(end>start)\n      {\n        Index mid = (end+start)>>1;\n        if (m_indices[mid]<key)\n          start = mid+1;\n        else\n          end = mid;\n      }\n      return start;\n    }\n\n    /** \\returns the stored value at index \\a key\n      * If the value does not exist, then the value \\a defaultValue is returned without any insertion. */\n    inline Scalar at(Index key, const Scalar& defaultValue = Scalar(0)) const\n    {\n      if (m_size==0)\n        return defaultValue;\n      else if (key==m_indices[m_size-1])\n        return m_values[m_size-1];\n      // ^^  optimization: let's first check if it is the last coefficient\n      // (very common in high level algorithms)\n      const Index id = searchLowerIndex(0,m_size-1,key);\n      return ((id<m_size) && (m_indices[id]==key)) ? m_values[id] : defaultValue;\n    }\n\n    /** Like at(), but the search is performed in the range [start,end) */\n    inline Scalar atInRange(Index start, Index end, Index key, const Scalar &defaultValue = Scalar(0)) const\n    {\n      if (start>=end)\n        return defaultValue;\n      else if (end>start && key==m_indices[end-1])\n        return m_values[end-1];\n      // ^^  optimization: let's first check if it is the last coefficient\n      // (very common in high level algorithms)\n      const Index id = searchLowerIndex(start,end-1,key);\n      return ((id<end) && (m_indices[id]==key)) ? m_values[id] : defaultValue;\n    }\n\n    /** \\returns a reference to the value at index \\a key\n      * If the value does not exist, then the value \\a defaultValue is inserted\n      * such that the keys are sorted. */\n    inline Scalar& atWithInsertion(Index key, const Scalar& defaultValue = Scalar(0))\n    {\n      Index id = searchLowerIndex(0,m_size,key);\n      if (id>=m_size || m_indices[id]!=key)\n      {\n        if (m_allocatedSize<m_size+1)\n        {\n          m_allocatedSize = 2*(m_size+1);\n          internal::scoped_array<Scalar> newValues(m_allocatedSize);\n          internal::scoped_array<StorageIndex> newIndices(m_allocatedSize);\n\n          // copy first chunk\n          internal::smart_copy(m_values,  m_values +id, newValues.ptr());\n          internal::smart_copy(m_indices, m_indices+id, newIndices.ptr());\n\n          // copy the rest\n          if(m_size>id)\n          {\n            internal::smart_copy(m_values +id,  m_values +m_size, newValues.ptr() +id+1);\n            internal::smart_copy(m_indices+id,  m_indices+m_size, newIndices.ptr()+id+1);\n          }\n          std::swap(m_values,newValues.ptr());\n          std::swap(m_indices,newIndices.ptr());\n        }\n        else if(m_size>id)\n        {\n          internal::smart_memmove(m_values +id, m_values +m_size, m_values +id+1);\n          internal::smart_memmove(m_indices+id, m_indices+m_size, m_indices+id+1);\n        }\n        m_size++;\n        m_indices[id] = internal::convert_index<StorageIndex>(key);\n        m_values[id] = defaultValue;\n      }\n      return m_values[id];\n    }\n\n    void moveChunk(Index from, Index to, Index chunkSize)\n    {\n      eigen_internal_assert(to+chunkSize <= m_size);\n      if(to>from && from+chunkSize>to)\n      {\n        // move backward\n        internal::smart_memmove(m_values+from,  m_values+from+chunkSize,  m_values+to);\n        internal::smart_memmove(m_indices+from, m_indices+from+chunkSize, m_indices+to);\n      }\n      else\n      {\n        internal::smart_copy(m_values+from,  m_values+from+chunkSize,  m_values+to);\n        internal::smart_copy(m_indices+from, m_indices+from+chunkSize, m_indices+to);\n      }\n    }\n\n    void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())\n    {\n      Index k = 0;\n      Index n = size();\n      for (Index i=0; i<n; ++i)\n      {\n        if (!internal::isMuchSmallerThan(value(i), reference, epsilon))\n        {\n          value(k) = value(i);\n          index(k) = index(i);\n          ++k;\n        }\n      }\n      resize(k,0);\n    }\n\n  protected:\n\n    inline void reallocate(Index size)\n    {\n      #ifdef EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN\n        EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN\n      #endif\n      eigen_internal_assert(size!=m_allocatedSize);\n      internal::scoped_array<Scalar> newValues(size);\n      internal::scoped_array<StorageIndex> newIndices(size);\n      Index copySize = (std::min)(size, m_size);\n      if (copySize>0) {\n        internal::smart_copy(m_values, m_values+copySize, newValues.ptr());\n        internal::smart_copy(m_indices, m_indices+copySize, newIndices.ptr());\n      }\n      std::swap(m_values,newValues.ptr());\n      std::swap(m_indices,newIndices.ptr());\n      m_allocatedSize = size;\n    }\n\n  protected:\n    Scalar* m_values;\n    StorageIndex* m_indices;\n    Index m_size;\n    Index m_allocatedSize;\n\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_COMPRESSED_STORAGE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H\n#define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstatic void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)\n{\n  typedef typename remove_all<Lhs>::type::Scalar LhsScalar;\n  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;\n  typedef typename remove_all<ResultType>::type::Scalar ResScalar;\n\n  // make sure to call innerSize/outerSize since we fake the storage order.\n  Index rows = lhs.innerSize();\n  Index cols = rhs.outerSize();\n  eigen_assert(lhs.outerSize() == rhs.innerSize());\n\n  ei_declare_aligned_stack_constructed_variable(bool,   mask,     rows, 0);\n  ei_declare_aligned_stack_constructed_variable(ResScalar, values,   rows, 0);\n  ei_declare_aligned_stack_constructed_variable(Index,  indices,  rows, 0);\n\n  std::memset(mask,0,sizeof(bool)*rows);\n\n  evaluator<Lhs> lhsEval(lhs);\n  evaluator<Rhs> rhsEval(rhs);\n\n  // estimate the number of non zero entries\n  // given a rhs column containing Y non zeros, we assume that the respective Y columns\n  // of the lhs differs in average of one non zeros, thus the number of non zeros for\n  // the product of a rhs column with the lhs is X+Y where X is the average number of non zero\n  // per column of the lhs.\n  // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)\n  Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();\n\n  res.setZero();\n  res.reserve(Index(estimated_nnz_prod));\n  // we compute each column of the result, one after the other\n  for (Index j=0; j<cols; ++j)\n  {\n\n    res.startVec(j);\n    Index nnz = 0;\n    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)\n    {\n      RhsScalar y = rhsIt.value();\n      Index k = rhsIt.index();\n      for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)\n      {\n        Index i = lhsIt.index();\n        LhsScalar x = lhsIt.value();\n        if(!mask[i])\n        {\n          mask[i] = true;\n          values[i] = x * y;\n          indices[nnz] = i;\n          ++nnz;\n        }\n        else\n          values[i] += x * y;\n      }\n    }\n    if(!sortedInsertion)\n    {\n      // unordered insertion\n      for(Index k=0; k<nnz; ++k)\n      {\n        Index i = indices[k];\n        res.insertBackByOuterInnerUnordered(j,i) = values[i];\n        mask[i] = false;\n      }\n    }\n    else\n    {\n      // alternative ordered insertion code:\n      const Index t200 = rows/11; // 11 == (log2(200)*1.39)\n      const Index t = (rows*100)/139;\n\n      // FIXME reserve nnz non zeros\n      // FIXME implement faster sorting algorithms for very small nnz\n      // if the result is sparse enough => use a quick sort\n      // otherwise => loop through the entire vector\n      // In order to avoid to perform an expensive log2 when the\n      // result is clearly very sparse we use a linear bound up to 200.\n      if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)\n      {\n        if(nnz>1) std::sort(indices,indices+nnz);\n        for(Index k=0; k<nnz; ++k)\n        {\n          Index i = indices[k];\n          res.insertBackByOuterInner(j,i) = values[i];\n          mask[i] = false;\n        }\n      }\n      else\n      {\n        // dense path\n        for(Index i=0; i<rows; ++i)\n        {\n          if(mask[i])\n          {\n            mask[i] = false;\n            res.insertBackByOuterInner(j,i) = values[i];\n          }\n        }\n      }\n    }\n  }\n  res.finalize();\n}\n\n\n} // end namespace internal\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, typename ResultType,\n  int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,\n  int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,\n  int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor>\nstruct conservative_sparse_sparse_product_selector;\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>\n{\n  typedef typename remove_all<Lhs>::type LhsCleaned;\n  typedef typename LhsCleaned::Scalar Scalar;\n\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;\n    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux;\n    typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;\n\n    // If the result is tall and thin (in the extreme case a column vector)\n    // then it is faster to sort the coefficients inplace instead of transposing twice.\n    // FIXME, the following heuristic is probably not very good.\n    if(lhs.rows()>rhs.cols())\n    {\n      ColMajorMatrix resCol(lhs.rows(),rhs.cols());\n      // perform sorted insertion\n      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);\n      res = resCol.markAsRValue();\n    }\n    else\n    {\n      ColMajorMatrixAux resCol(lhs.rows(),rhs.cols());\n      // resort to transpose to sort the entries\n      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false);\n      RowMajorMatrix resRow(resCol);\n      res = resRow.markAsRValue();\n    }\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;\n    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;\n    RowMajorRhs rhsRow = rhs;\n    RowMajorRes resRow(lhs.rows(), rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);\n    res = resRow;\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;\n    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;\n    RowMajorLhs lhsRow = lhs;\n    RowMajorRes resRow(lhs.rows(), rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);\n    res = resRow;\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;\n    RowMajorMatrix resRow(lhs.rows(), rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);\n    res = resRow;\n  }\n};\n\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>\n{\n  typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;\n\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;\n    ColMajorMatrix resCol(lhs.rows(), rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);\n    res = resCol;\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;\n    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;\n    ColMajorLhs lhsCol = lhs;\n    ColMajorRes resCol(lhs.rows(), rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);\n    res = resCol;\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;\n    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;\n    ColMajorRhs rhsCol = rhs;\n    ColMajorRes resCol(lhs.rows(), rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);\n    res = resCol;\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;\n    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;\n    RowMajorMatrix resRow(lhs.rows(),rhs.cols());\n    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);\n    // sort the non zeros:\n    ColMajorMatrix resCol(resRow);\n    res = resCol;\n  }\n};\n\n} // end namespace internal\n\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstatic void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n{\n  typedef typename remove_all<Lhs>::type::Scalar LhsScalar;\n  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;\n  Index cols = rhs.outerSize();\n  eigen_assert(lhs.outerSize() == rhs.innerSize());\n\n  evaluator<Lhs> lhsEval(lhs);\n  evaluator<Rhs> rhsEval(rhs);\n\n  for (Index j=0; j<cols; ++j)\n  {\n    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)\n    {\n      RhsScalar y = rhsIt.value();\n      Index k = rhsIt.index();\n      for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)\n      {\n        Index i = lhsIt.index();\n        LhsScalar x = lhsIt.value();\n        res.coeffRef(i,j) += x * y;\n      }\n    }\n  }\n}\n\n\n} // end namespace internal\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, typename ResultType,\n  int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,\n  int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor>\nstruct sparse_sparse_to_dense_product_selector;\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    internal::sparse_sparse_to_dense_product_impl<Lhs,Rhs,ResultType>(lhs, rhs, res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;\n    ColMajorLhs lhsCol(lhs);\n    internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;\n    ColMajorRhs rhsCol(rhs);\n    internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor>\n{\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)\n  {\n    Transpose<ResultType> trRes(res);\n    internal::sparse_sparse_to_dense_product_impl<Rhs,Lhs,Transpose<ResultType> >(rhs, lhs, trRes);\n  }\n};\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/MappedSparseMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MAPPED_SPARSEMATRIX_H\n#define EIGEN_MAPPED_SPARSEMATRIX_H\n\nnamespace Eigen {\n\n/** \\deprecated Use Map<SparseMatrix<> >\n  * \\class MappedSparseMatrix\n  *\n  * \\brief Sparse matrix\n  *\n  * \\param _Scalar the scalar type, i.e. the type of the coefficients\n  *\n  * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme.\n  *\n  */\nnamespace internal {\ntemplate<typename _Scalar, int _Flags, typename _StorageIndex>\nstruct traits<MappedSparseMatrix<_Scalar, _Flags, _StorageIndex> > : traits<SparseMatrix<_Scalar, _Flags, _StorageIndex> >\n{};\n} // end namespace internal\n\ntemplate<typename _Scalar, int _Flags, typename _StorageIndex>\nclass MappedSparseMatrix\n  : public Map<SparseMatrix<_Scalar, _Flags, _StorageIndex> >\n{\n    typedef Map<SparseMatrix<_Scalar, _Flags, _StorageIndex> > Base;\n\n  public:\n    \n    typedef typename Base::StorageIndex StorageIndex;\n    typedef typename Base::Scalar Scalar;\n\n    inline MappedSparseMatrix(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZeroPtr = 0)\n      : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZeroPtr)\n    {}\n\n    /** Empty destructor */\n    inline ~MappedSparseMatrix() {}\n};\n\nnamespace internal {\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nstruct evaluator<MappedSparseMatrix<_Scalar,_Options,_StorageIndex> >\n  : evaluator<SparseCompressedBase<MappedSparseMatrix<_Scalar,_Options,_StorageIndex> > >\n{\n  typedef MappedSparseMatrix<_Scalar,_Options,_StorageIndex> XprType;\n  typedef evaluator<SparseCompressedBase<XprType> > Base;\n  \n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_MAPPED_SPARSEMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseAssign.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEASSIGN_H\n#define EIGEN_SPARSEASSIGN_H\n\nnamespace Eigen { \n\ntemplate<typename Derived>    \ntemplate<typename OtherDerived>\nDerived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)\n{\n  internal::call_assignment_no_alias(derived(), other.derived());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nDerived& SparseMatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)\n{\n  // TODO use the evaluator mechanism\n  other.evalTo(derived());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ninline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other)\n{\n  // by default sparse evaluation do not alias, so we can safely bypass the generic call_assignment routine\n  internal::Assignment<Derived,OtherDerived,internal::assign_op<Scalar,typename OtherDerived::Scalar> >\n          ::run(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\ninline Derived& SparseMatrixBase<Derived>::operator=(const Derived& other)\n{\n  internal::call_assignment_no_alias(derived(), other.derived());\n  return derived();\n}\n\nnamespace internal {\n\ntemplate<>\nstruct storage_kind_to_evaluator_kind<Sparse> {\n  typedef IteratorBased Kind;\n};\n\ntemplate<>\nstruct storage_kind_to_shape<Sparse> {\n  typedef SparseShape Shape;\n};\n\nstruct Sparse2Sparse {};\nstruct Sparse2Dense  {};\n\ntemplate<> struct AssignmentKind<SparseShape, SparseShape>           { typedef Sparse2Sparse Kind; };\ntemplate<> struct AssignmentKind<SparseShape, SparseTriangularShape> { typedef Sparse2Sparse Kind; };\ntemplate<> struct AssignmentKind<DenseShape,  SparseShape>           { typedef Sparse2Dense  Kind; };\ntemplate<> struct AssignmentKind<DenseShape,  SparseTriangularShape> { typedef Sparse2Dense  Kind; };\n\n\ntemplate<typename DstXprType, typename SrcXprType>\nvoid assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)\n{\n  typedef typename DstXprType::Scalar Scalar;\n  typedef internal::evaluator<DstXprType> DstEvaluatorType;\n  typedef internal::evaluator<SrcXprType> SrcEvaluatorType;\n\n  SrcEvaluatorType srcEvaluator(src);\n\n  const bool transpose = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit);\n  const Index outerEvaluationSize = (SrcEvaluatorType::Flags&RowMajorBit) ? src.rows() : src.cols();\n  if ((!transpose) && src.isRValue())\n  {\n    // eval without temporary\n    dst.resize(src.rows(), src.cols());\n    dst.setZero();\n    dst.reserve((std::min)(src.rows()*src.cols(), (std::max)(src.rows(),src.cols())*2));\n    for (Index j=0; j<outerEvaluationSize; ++j)\n    {\n      dst.startVec(j);\n      for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)\n      {\n        Scalar v = it.value();\n        dst.insertBackByOuterInner(j,it.index()) = v;\n      }\n    }\n    dst.finalize();\n  }\n  else\n  {\n    // eval through a temporary\n    eigen_assert(( ((internal::traits<DstXprType>::SupportedAccessPatterns & OuterRandomAccessPattern)==OuterRandomAccessPattern) ||\n              (!((DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit)))) &&\n              \"the transpose operation is supposed to be handled in SparseMatrix::operator=\");\n\n    enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) };\n\n    \n    DstXprType temp(src.rows(), src.cols());\n\n    temp.reserve((std::min)(src.rows()*src.cols(), (std::max)(src.rows(),src.cols())*2));\n    for (Index j=0; j<outerEvaluationSize; ++j)\n    {\n      temp.startVec(j);\n      for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)\n      {\n        Scalar v = it.value();\n        temp.insertBackByOuterInner(Flip?it.index():j,Flip?j:it.index()) = v;\n      }\n    }\n    temp.finalize();\n\n    dst = temp.markAsRValue();\n  }\n}\n\n// Generic Sparse to Sparse assignment\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse>\n{\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  {\n    assign_sparse_to_sparse(dst.derived(), src.derived());\n  }\n};\n\n// Generic Sparse to Dense assignment\ntemplate< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>\nstruct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense, Weak>\n{\n  static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)\n  {\n    if(internal::is_same<Functor,internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> >::value)\n      dst.setZero();\n    \n    internal::evaluator<SrcXprType> srcEval(src);\n    resize_if_allowed(dst, src, func);\n    internal::evaluator<DstXprType> dstEval(dst);\n    \n    const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();\n    for (Index j=0; j<outerEvaluationSize; ++j)\n      for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)\n        func.assignCoeff(dstEval.coeffRef(i.row(),i.col()), i.value());\n  }\n};\n\n// Specialization for dense ?= dense +/- sparse and dense ?= sparse +/- dense\ntemplate<typename DstXprType, typename Func1, typename Func2>\nstruct assignment_from_dense_op_sparse\n{\n  template<typename SrcXprType, typename InitialFunc>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)\n  {\n    #ifdef EIGEN_SPARSE_ASSIGNMENT_FROM_DENSE_OP_SPARSE_PLUGIN\n    EIGEN_SPARSE_ASSIGNMENT_FROM_DENSE_OP_SPARSE_PLUGIN\n    #endif\n\n    call_assignment_no_alias(dst, src.lhs(), Func1());\n    call_assignment_no_alias(dst, src.rhs(), Func2());\n  }\n\n  // Specialization for dense1 = sparse + dense2; -> dense1 = dense2; dense1 += sparse;\n  template<typename Lhs, typename Rhs, typename Scalar>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  typename internal::enable_if<internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>::type\n  run(DstXprType &dst, const CwiseBinaryOp<internal::scalar_sum_op<Scalar,Scalar>, const Lhs, const Rhs> &src,\n      const internal::assign_op<typename DstXprType::Scalar,Scalar>& /*func*/)\n  {\n    #ifdef EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_ADD_DENSE_PLUGIN\n    EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_ADD_DENSE_PLUGIN\n    #endif\n\n    // Apply the dense matrix first, then the sparse one.\n    call_assignment_no_alias(dst, src.rhs(), Func1());\n    call_assignment_no_alias(dst, src.lhs(), Func2());\n  }\n\n  // Specialization for dense1 = sparse - dense2; -> dense1 = -dense2; dense1 += sparse;\n  template<typename Lhs, typename Rhs, typename Scalar>\n  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n  typename internal::enable_if<internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>::type\n  run(DstXprType &dst, const CwiseBinaryOp<internal::scalar_difference_op<Scalar,Scalar>, const Lhs, const Rhs> &src,\n      const internal::assign_op<typename DstXprType::Scalar,Scalar>& /*func*/)\n  {\n    #ifdef EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_SUB_DENSE_PLUGIN\n    EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_SUB_DENSE_PLUGIN\n    #endif\n\n    // Apply the dense matrix first, then the sparse one.\n    call_assignment_no_alias(dst, -src.rhs(), Func1());\n    call_assignment_no_alias(dst,  src.lhs(), add_assign_op<typename DstXprType::Scalar,typename Lhs::Scalar>());\n  }\n};\n\n#define EIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(ASSIGN_OP,BINOP,ASSIGN_OP2) \\\n  template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> \\\n  struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<Scalar,Scalar>, const Lhs, const Rhs>, internal::ASSIGN_OP<typename DstXprType::Scalar,Scalar>, \\\n                    Sparse2Dense, \\\n                    typename internal::enable_if<   internal::is_same<typename internal::evaluator_traits<Lhs>::Shape,DenseShape>::value \\\n                                                 || internal::is_same<typename internal::evaluator_traits<Rhs>::Shape,DenseShape>::value>::type> \\\n    : assignment_from_dense_op_sparse<DstXprType, internal::ASSIGN_OP<typename DstXprType::Scalar,typename Lhs::Scalar>, internal::ASSIGN_OP2<typename DstXprType::Scalar,typename Rhs::Scalar> > \\\n  {}\n\nEIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(assign_op,    scalar_sum_op,add_assign_op);\nEIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(add_assign_op,scalar_sum_op,add_assign_op);\nEIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(sub_assign_op,scalar_sum_op,sub_assign_op);\n\nEIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(assign_op,    scalar_difference_op,sub_assign_op);\nEIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(add_assign_op,scalar_difference_op,sub_assign_op);\nEIGEN_CATCH_ASSIGN_DENSE_OP_SPARSE(sub_assign_op,scalar_difference_op,add_assign_op);\n\n\n// Specialization for \"dst = dec.solve(rhs)\"\n// NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error\ntemplate<typename DstXprType, typename DecType, typename RhsType, typename Scalar>\nstruct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Sparse2Sparse>\n{\n  typedef Solve<DecType,RhsType> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n\n    src.dec()._solve_impl(src.rhs(), dst);\n  }\n};\n\nstruct Diagonal2Sparse {};\n\ntemplate<> struct AssignmentKind<SparseShape,DiagonalShape> { typedef Diagonal2Sparse Kind; };\n\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse>\n{\n  typedef typename DstXprType::StorageIndex StorageIndex;\n  typedef typename DstXprType::Scalar Scalar;\n\n  template<int Options, typename AssignFunc>\n  static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const AssignFunc &func)\n  { dst.assignDiagonal(src.diagonal(), func); }\n  \n  template<typename DstDerived>\n  static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  { dst.derived().diagonal() = src.diagonal(); }\n  \n  template<typename DstDerived>\n  static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  { dst.derived().diagonal() += src.diagonal(); }\n  \n  template<typename DstDerived>\n  static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)\n  { dst.derived().diagonal() -= src.diagonal(); }\n};\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEASSIGN_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseBlock.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_BLOCK_H\n#define EIGEN_SPARSE_BLOCK_H\n\nnamespace Eigen {\n\n// Subset of columns or rows\ntemplate<typename XprType, int BlockRows, int BlockCols>\nclass BlockImpl<XprType,BlockRows,BlockCols,true,Sparse>\n  : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> >\n{\n    typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;\n    typedef Block<XprType, BlockRows, BlockCols, true> BlockType;\npublic:\n    enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };\nprotected:\n    enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };\n    typedef SparseMatrixBase<BlockType> Base;\n    using Base::convert_index;\npublic:\n    EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)\n\n    inline BlockImpl(XprType& xpr, Index i)\n      : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)\n    {}\n\n    inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)\n      : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))\n    {}\n\n    EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }\n    EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }\n\n    Index nonZeros() const\n    {\n      typedef internal::evaluator<XprType> EvaluatorType;\n      EvaluatorType matEval(m_matrix);\n      Index nnz = 0;\n      Index end = m_outerStart + m_outerSize.value();\n      for(Index j=m_outerStart; j<end; ++j)\n        for(typename EvaluatorType::InnerIterator it(matEval, j); it; ++it)\n          ++nnz;\n      return nnz;\n    }\n\n    inline const Scalar coeff(Index row, Index col) const\n    {\n      return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 :  m_outerStart));\n    }\n\n    inline const Scalar coeff(Index index) const\n    {\n      return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index :  m_outerStart);\n    }\n\n    inline const XprType& nestedExpression() const { return m_matrix; }\n    inline XprType& nestedExpression() { return m_matrix; }\n    Index startRow() const { return IsRowMajor ? m_outerStart : 0; }\n    Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }\n    Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }\n    Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }\n\n  protected:\n\n    typename internal::ref_selector<XprType>::non_const_type m_matrix;\n    Index m_outerStart;\n    const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;\n\n  protected:\n    // Disable assignment with clear error message.\n    // Note that simply removing operator= yields compilation errors with ICC+MSVC\n    template<typename T>\n    BlockImpl& operator=(const T&)\n    {\n      EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);\n      return *this;\n    }\n};\n\n\n/***************************************************************************\n* specialization for SparseMatrix\n***************************************************************************/\n\nnamespace internal {\n\ntemplate<typename SparseMatrixType, int BlockRows, int BlockCols>\nclass sparse_matrix_block_impl\n  : public SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> >\n{\n    typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _MatrixTypeNested;\n    typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;\n    typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;\n    using Base::convert_index;\npublic:\n    enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };\n    EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)\nprotected:\n    typedef typename Base::IndexVector IndexVector;\n    enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };\npublic:\n\n    inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i)\n      : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)\n    {}\n\n    inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)\n      : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))\n    {}\n\n    template<typename OtherDerived>\n    inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)\n    {\n      typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;\n      _NestedMatrixType& matrix = m_matrix;\n      // This assignment is slow if this vector set is not empty\n      // and/or it is not at the end of the nonzeros of the underlying matrix.\n\n      // 1 - eval to a temporary to avoid transposition and/or aliasing issues\n      Ref<const SparseMatrix<Scalar, IsRowMajor ? RowMajor : ColMajor, StorageIndex> > tmp(other.derived());\n      eigen_internal_assert(tmp.outerSize()==m_outerSize.value());\n\n      // 2 - let's check whether there is enough allocated memory\n      Index nnz           = tmp.nonZeros();\n      Index start         = m_outerStart==0 ? 0 : m_matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block\n      Index end           = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending position of the current block\n      Index block_size    = end - start;                                                // available room in the current block\n      Index tail_size     = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;\n\n      Index free_size     = m_matrix.isCompressed()\n                          ? Index(matrix.data().allocatedSize()) + block_size\n                          : block_size;\n\n      Index tmp_start = tmp.outerIndexPtr()[0];\n\n      bool update_trailing_pointers = false;\n      if(nnz>free_size)\n      {\n        // realloc manually to reduce copies\n        typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);\n\n        internal::smart_copy(m_matrix.valuePtr(),       m_matrix.valuePtr() + start,      newdata.valuePtr());\n        internal::smart_copy(m_matrix.innerIndexPtr(),  m_matrix.innerIndexPtr() + start, newdata.indexPtr());\n\n        internal::smart_copy(tmp.valuePtr() + tmp_start,      tmp.valuePtr() + tmp_start + nnz,       newdata.valuePtr() + start);\n        internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz,  newdata.indexPtr() + start);\n\n        internal::smart_copy(matrix.valuePtr()+end,       matrix.valuePtr()+end + tail_size,      newdata.valuePtr()+start+nnz);\n        internal::smart_copy(matrix.innerIndexPtr()+end,  matrix.innerIndexPtr()+end + tail_size, newdata.indexPtr()+start+nnz);\n\n        newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);\n\n        matrix.data().swap(newdata);\n\n        update_trailing_pointers = true;\n      }\n      else\n      {\n        if(m_matrix.isCompressed() && nnz!=block_size)\n        {\n          // no need to realloc, simply copy the tail at its respective position and insert tmp\n          matrix.data().resize(start + nnz + tail_size);\n\n          internal::smart_memmove(matrix.valuePtr()+end,      matrix.valuePtr() + end+tail_size,      matrix.valuePtr() + start+nnz);\n          internal::smart_memmove(matrix.innerIndexPtr()+end, matrix.innerIndexPtr() + end+tail_size, matrix.innerIndexPtr() + start+nnz);\n\n          update_trailing_pointers = true;\n        }\n\n        internal::smart_copy(tmp.valuePtr() + tmp_start,      tmp.valuePtr() + tmp_start + nnz,       matrix.valuePtr() + start);\n        internal::smart_copy(tmp.innerIndexPtr() + tmp_start, tmp.innerIndexPtr() + tmp_start + nnz,  matrix.innerIndexPtr() + start);\n      }\n\n      // update outer index pointers and innerNonZeros\n      if(IsVectorAtCompileTime)\n      {\n        if(!m_matrix.isCompressed())\n          matrix.innerNonZeroPtr()[m_outerStart] = StorageIndex(nnz);\n        matrix.outerIndexPtr()[m_outerStart] = StorageIndex(start);\n      }\n      else\n      {\n        StorageIndex p = StorageIndex(start);\n        for(Index k=0; k<m_outerSize.value(); ++k)\n        {\n          StorageIndex nnz_k = internal::convert_index<StorageIndex>(tmp.innerVector(k).nonZeros());\n          if(!m_matrix.isCompressed())\n            matrix.innerNonZeroPtr()[m_outerStart+k] = nnz_k;\n          matrix.outerIndexPtr()[m_outerStart+k] = p;\n          p += nnz_k;\n        }\n      }\n\n      if(update_trailing_pointers)\n      {\n        StorageIndex offset = internal::convert_index<StorageIndex>(nnz - block_size);\n        for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k)\n        {\n          matrix.outerIndexPtr()[k] += offset;\n        }\n      }\n\n      return derived();\n    }\n\n    inline BlockType& operator=(const BlockType& other)\n    {\n      return operator=<BlockType>(other);\n    }\n\n    inline const Scalar* valuePtr() const\n    { return m_matrix.valuePtr(); }\n    inline Scalar* valuePtr()\n    { return m_matrix.valuePtr(); }\n\n    inline const StorageIndex* innerIndexPtr() const\n    { return m_matrix.innerIndexPtr(); }\n    inline StorageIndex* innerIndexPtr()\n    { return m_matrix.innerIndexPtr(); }\n\n    inline const StorageIndex* outerIndexPtr() const\n    { return m_matrix.outerIndexPtr() + m_outerStart; }\n    inline StorageIndex* outerIndexPtr()\n    { return m_matrix.outerIndexPtr() + m_outerStart; }\n\n    inline const StorageIndex* innerNonZeroPtr() const\n    { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }\n    inline StorageIndex* innerNonZeroPtr()\n    { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }\n\n    bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }\n\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 :  m_outerStart));\n    }\n\n    inline const Scalar coeff(Index row, Index col) const\n    {\n      return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 :  m_outerStart));\n    }\n\n    inline const Scalar coeff(Index index) const\n    {\n      return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index :  m_outerStart);\n    }\n\n    const Scalar& lastCoeff() const\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl);\n      eigen_assert(Base::nonZeros()>0);\n      if(m_matrix.isCompressed())\n        return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];\n      else\n        return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart]+m_matrix.innerNonZeroPtr()[m_outerStart]-1];\n    }\n\n    EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }\n    EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }\n\n    inline const SparseMatrixType& nestedExpression() const { return m_matrix; }\n    inline SparseMatrixType& nestedExpression() { return m_matrix; }\n    Index startRow() const { return IsRowMajor ? m_outerStart : 0; }\n    Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }\n    Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }\n    Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }\n\n  protected:\n\n    typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix;\n    Index m_outerStart;\n    const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;\n\n};\n\n} // namespace internal\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>\nclass BlockImpl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse>\n  : public internal::sparse_matrix_block_impl<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols>\n{\npublic:\n  typedef _StorageIndex StorageIndex;\n  typedef SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType;\n  typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;\n  inline BlockImpl(SparseMatrixType& xpr, Index i)\n    : Base(xpr, i)\n  {}\n\n  inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)\n    : Base(xpr, startRow, startCol, blockRows, blockCols)\n  {}\n\n  using Base::operator=;\n};\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>\nclass BlockImpl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true,Sparse>\n  : public internal::sparse_matrix_block_impl<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols>\n{\npublic:\n  typedef _StorageIndex StorageIndex;\n  typedef const SparseMatrix<_Scalar, _Options, _StorageIndex> SparseMatrixType;\n  typedef internal::sparse_matrix_block_impl<SparseMatrixType,BlockRows,BlockCols> Base;\n  inline BlockImpl(SparseMatrixType& xpr, Index i)\n    : Base(xpr, i)\n  {}\n\n  inline BlockImpl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)\n    : Base(xpr, startRow, startCol, blockRows, blockCols)\n  {}\n\n  using Base::operator=;\nprivate:\n  template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr, Index i);\n  template<typename Derived> BlockImpl(const SparseMatrixBase<Derived>& xpr);\n};\n\n//----------\n\n/** Generic implementation of sparse Block expression.\n  * Real-only.\n  */\ntemplate<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>\nclass BlockImpl<XprType,BlockRows,BlockCols,InnerPanel,Sparse>\n  : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,InnerPanel> >, internal::no_assignment_operator\n{\n    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;\n    typedef SparseMatrixBase<BlockType> Base;\n    using Base::convert_index;\npublic:\n    enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };\n    EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)\n\n    typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;\n\n    /** Column or Row constructor\n      */\n    inline BlockImpl(XprType& xpr, Index i)\n      : m_matrix(xpr),\n        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),\n        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),\n        m_blockRows(BlockRows==1 ? 1 : xpr.rows()),\n        m_blockCols(BlockCols==1 ? 1 : xpr.cols())\n    {}\n\n    /** Dynamic-size constructor\n      */\n    inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)\n      : m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))\n    {}\n\n    inline Index rows() const { return m_blockRows.value(); }\n    inline Index cols() const { return m_blockCols.value(); }\n\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value());\n    }\n\n    inline const Scalar coeff(Index row, Index col) const\n    {\n      return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value());\n    }\n\n    inline Scalar& coeffRef(Index index)\n    {\n      return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n                               m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));\n    }\n\n    inline const Scalar coeff(Index index) const\n    {\n      return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),\n                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));\n    }\n\n    inline const XprType& nestedExpression() const { return m_matrix; }\n    inline XprType& nestedExpression() { return m_matrix; }\n    Index startRow() const { return m_startRow.value(); }\n    Index startCol() const { return m_startCol.value(); }\n    Index blockRows() const { return m_blockRows.value(); }\n    Index blockCols() const { return m_blockCols.value(); }\n\n  protected:\n//     friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;\n    friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;\n\n    Index nonZeros() const { return Dynamic; }\n\n    typename internal::ref_selector<XprType>::non_const_type m_matrix;\n    const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;\n    const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;\n    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;\n    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;\n\n  protected:\n    // Disable assignment with clear error message.\n    // Note that simply removing operator= yields compilation errors with ICC+MSVC\n    template<typename T>\n    BlockImpl& operator=(const T&)\n    {\n      EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);\n      return *this;\n    }\n\n};\n\nnamespace internal {\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nstruct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >\n : public evaluator_base<Block<ArgType,BlockRows,BlockCols,InnerPanel> >\n{\n    class InnerVectorInnerIterator;\n    class OuterVectorInnerIterator;\n  public:\n    typedef Block<ArgType,BlockRows,BlockCols,InnerPanel> XprType;\n    typedef typename XprType::StorageIndex StorageIndex;\n    typedef typename XprType::Scalar Scalar;\n\n    enum {\n      IsRowMajor = XprType::IsRowMajor,\n\n      OuterVector =  (BlockCols==1 && ArgType::IsRowMajor)\n                    | // FIXME | instead of || to please GCC 4.4.0 stupid warning \"suggest parentheses around &&\".\n                      // revert to || as soon as not needed anymore.\n                     (BlockRows==1 && !ArgType::IsRowMajor),\n\n      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n      Flags = XprType::Flags\n    };\n\n    typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator;\n\n    explicit unary_evaluator(const XprType& op)\n      : m_argImpl(op.nestedExpression()), m_block(op)\n    {}\n\n    inline Index nonZerosEstimate() const {\n      const Index nnz = m_block.nonZeros();\n      if(nnz < 0) {\n        // Scale the non-zero estimate for the underlying expression linearly with block size.\n        // Return zero if the underlying block is empty.\n        const Index nested_sz = m_block.nestedExpression().size();        \n        return nested_sz == 0 ? 0 : m_argImpl.nonZerosEstimate() * m_block.size() / nested_sz;\n      }\n      return nnz;\n    }\n\n  protected:\n    typedef typename evaluator<ArgType>::InnerIterator EvalIterator;\n\n    evaluator<ArgType> m_argImpl;\n    const XprType &m_block;\n};\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nclass unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator\n : public EvalIterator\n{\n  // NOTE MSVC fails to compile if we don't explicitely \"import\" IsRowMajor from unary_evaluator\n  //      because the base class EvalIterator has a private IsRowMajor enum too. (bug #1786)\n  // NOTE We cannot call it IsRowMajor because it would shadow unary_evaluator::IsRowMajor\n  enum { XprIsRowMajor = unary_evaluator::IsRowMajor };\n  const XprType& m_block;\n  Index m_end;\npublic:\n\n  EIGEN_STRONG_INLINE InnerVectorInnerIterator(const unary_evaluator& aEval, Index outer)\n    : EvalIterator(aEval.m_argImpl, outer + (XprIsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol())),\n      m_block(aEval.m_block),\n      m_end(XprIsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())\n  {\n    while( (EvalIterator::operator bool()) && (EvalIterator::index() < (XprIsRowMajor ? m_block.startCol() : m_block.startRow())) )\n      EvalIterator::operator++();\n  }\n\n  inline StorageIndex index() const { return EvalIterator::index() - convert_index<StorageIndex>(XprIsRowMajor ? m_block.startCol() : m_block.startRow()); }\n  inline Index outer()  const { return EvalIterator::outer() - (XprIsRowMajor ? m_block.startRow() : m_block.startCol()); }\n  inline Index row()    const { return EvalIterator::row()   - m_block.startRow(); }\n  inline Index col()    const { return EvalIterator::col()   - m_block.startCol(); }\n\n  inline operator bool() const { return EvalIterator::operator bool() && EvalIterator::index() < m_end; }\n};\n\ntemplate<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>\nclass unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::OuterVectorInnerIterator\n{\n  // NOTE see above\n  enum { XprIsRowMajor = unary_evaluator::IsRowMajor };\n  const unary_evaluator& m_eval;\n  Index m_outerPos;\n  const Index m_innerIndex;\n  Index m_end;\n  EvalIterator m_it;\npublic:\n\n  EIGEN_STRONG_INLINE OuterVectorInnerIterator(const unary_evaluator& aEval, Index outer)\n    : m_eval(aEval),\n      m_outerPos( (XprIsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) ),\n      m_innerIndex(XprIsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),\n      m_end(XprIsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows()),\n      m_it(m_eval.m_argImpl, m_outerPos)\n  {\n    EIGEN_UNUSED_VARIABLE(outer);\n    eigen_assert(outer==0);\n\n    while(m_it && m_it.index() < m_innerIndex) ++m_it;\n    if((!m_it) || (m_it.index()!=m_innerIndex))\n      ++(*this);\n  }\n\n  inline StorageIndex index() const { return convert_index<StorageIndex>(m_outerPos - (XprIsRowMajor ? m_eval.m_block.startCol() : m_eval.m_block.startRow())); }\n  inline Index outer()  const { return 0; }\n  inline Index row()    const { return XprIsRowMajor ? 0 : index(); }\n  inline Index col()    const { return XprIsRowMajor ? index() : 0; }\n\n  inline Scalar value() const { return m_it.value(); }\n  inline Scalar& valueRef() { return m_it.valueRef(); }\n\n  inline OuterVectorInnerIterator& operator++()\n  {\n    // search next non-zero entry\n    while(++m_outerPos<m_end)\n    {\n      // Restart iterator at the next inner-vector:\n      m_it.~EvalIterator();\n      ::new (&m_it) EvalIterator(m_eval.m_argImpl, m_outerPos);\n      // search for the key m_innerIndex in the current outer-vector\n      while(m_it && m_it.index() < m_innerIndex) ++m_it;\n      if(m_it && m_it.index()==m_innerIndex) break;\n    }\n    return *this;\n  }\n\n  inline operator bool() const { return m_outerPos < m_end; }\n};\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>\nstruct unary_evaluator<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased>\n  : evaluator<SparseCompressedBase<Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > >\n{\n  typedef Block<SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType;\n  typedef evaluator<SparseCompressedBase<XprType> > Base;\n  explicit unary_evaluator(const XprType &xpr) : Base(xpr) {}\n};\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex, int BlockRows, int BlockCols>\nstruct unary_evaluator<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true>, IteratorBased>\n  : evaluator<SparseCompressedBase<Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> > >\n{\n  typedef Block<const SparseMatrix<_Scalar, _Options, _StorageIndex>,BlockRows,BlockCols,true> XprType;\n  typedef evaluator<SparseCompressedBase<XprType> > Base;\n  explicit unary_evaluator(const XprType &xpr) : Base(xpr) {}\n};\n\n} // end namespace internal\n\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_BLOCK_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseColEtree.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n/* \n \n * NOTE: This file is the modified version of sp_coletree.c file in SuperLU \n \n * -- SuperLU routine (version 3.1) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * August 1, 2008\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSE_COLETREE_H\n#define SPARSE_COLETREE_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n/** Find the root of the tree/set containing the vertex i : Use Path halving */ \ntemplate<typename Index, typename IndexVector>\nIndex etree_find (Index i, IndexVector& pp)\n{\n  Index p = pp(i); // Parent \n  Index gp = pp(p); // Grand parent \n  while (gp != p) \n  {\n    pp(i) = gp; // Parent pointer on find path is changed to former grand parent\n    i = gp; \n    p = pp(i);\n    gp = pp(p);\n  }\n  return p; \n}\n\n/** Compute the column elimination tree of a sparse matrix\n  * \\param mat The matrix in column-major format. \n  * \\param parent The elimination tree\n  * \\param firstRowElt The column index of the first element in each row\n  * \\param perm The permutation to apply to the column of \\b mat\n  */\ntemplate <typename MatrixType, typename IndexVector>\nint coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowElt, typename MatrixType::StorageIndex *perm=0)\n{\n  typedef typename MatrixType::StorageIndex StorageIndex;\n  StorageIndex nc = convert_index<StorageIndex>(mat.cols()); // Number of columns\n  StorageIndex m = convert_index<StorageIndex>(mat.rows());\n  StorageIndex diagSize = (std::min)(nc,m);\n  IndexVector root(nc); // root of subtree of etree \n  root.setZero();\n  IndexVector pp(nc); // disjoint sets \n  pp.setZero(); // Initialize disjoint sets \n  parent.resize(mat.cols());\n  //Compute first nonzero column in each row \n  firstRowElt.resize(m);\n  firstRowElt.setConstant(nc);\n  firstRowElt.segment(0, diagSize).setLinSpaced(diagSize, 0, diagSize-1);\n  bool found_diag;\n  for (StorageIndex col = 0; col < nc; col++)\n  {\n    StorageIndex pcol = col;\n    if(perm) pcol  = perm[col];\n    for (typename MatrixType::InnerIterator it(mat, pcol); it; ++it)\n    { \n      Index row = it.row();\n      firstRowElt(row) = (std::min)(firstRowElt(row), col);\n    }\n  }\n  /* Compute etree by Liu's algorithm for symmetric matrices,\n          except use (firstRowElt[r],c) in place of an edge (r,c) of A.\n    Thus each row clique in A'*A is replaced by a star\n    centered at its first vertex, which has the same fill. */\n  StorageIndex rset, cset, rroot;\n  for (StorageIndex col = 0; col < nc; col++) \n  {\n    found_diag = col>=m;\n    pp(col) = col; \n    cset = col; \n    root(cset) = col; \n    parent(col) = nc; \n    /* The diagonal element is treated here even if it does not exist in the matrix\n     * hence the loop is executed once more */ \n    StorageIndex pcol = col;\n    if(perm) pcol  = perm[col];\n    for (typename MatrixType::InnerIterator it(mat, pcol); it||!found_diag; ++it)\n    { //  A sequence of interleaved find and union is performed \n      Index i = col;\n      if(it) i = it.index();\n      if (i == col) found_diag = true;\n      \n      StorageIndex row = firstRowElt(i);\n      if (row >= col) continue; \n      rset = internal::etree_find(row, pp); // Find the name of the set containing row\n      rroot = root(rset);\n      if (rroot != col) \n      {\n        parent(rroot) = col; \n        pp(cset) = rset; \n        cset = rset; \n        root(cset) = col; \n      }\n    }\n  }\n  return 0;  \n}\n\n/** \n  * Depth-first search from vertex n.  No recursion.\n  * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France.\n*/\ntemplate <typename IndexVector>\nvoid nr_etdfs (typename IndexVector::Scalar n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, typename IndexVector::Scalar postnum)\n{\n  typedef typename IndexVector::Scalar StorageIndex;\n  StorageIndex current = n, first, next;\n  while (postnum != n) \n  {\n    // No kid for the current node\n    first = first_kid(current);\n    \n    // no kid for the current node\n    if (first == -1) \n    {\n      // Numbering this node because it has no kid \n      post(current) = postnum++;\n      \n      // looking for the next kid \n      next = next_kid(current); \n      while (next == -1) \n      {\n        // No more kids : back to the parent node\n        current = parent(current); \n        // numbering the parent node \n        post(current) = postnum++;\n        \n        // Get the next kid \n        next = next_kid(current); \n      }\n      // stopping criterion \n      if (postnum == n+1) return; \n      \n      // Updating current node \n      current = next; \n    }\n    else \n    {\n      current = first; \n    }\n  }\n}\n\n\n/**\n  * \\brief Post order a tree \n  * \\param n the number of nodes\n  * \\param parent Input tree\n  * \\param post postordered tree\n  */\ntemplate <typename IndexVector>\nvoid treePostorder(typename IndexVector::Scalar n, IndexVector& parent, IndexVector& post)\n{\n  typedef typename IndexVector::Scalar StorageIndex;\n  IndexVector first_kid, next_kid; // Linked list of children \n  StorageIndex postnum; \n  // Allocate storage for working arrays and results \n  first_kid.resize(n+1); \n  next_kid.setZero(n+1);\n  post.setZero(n+1);\n  \n  // Set up structure describing children\n  first_kid.setConstant(-1); \n  for (StorageIndex v = n-1; v >= 0; v--) \n  {\n    StorageIndex dad = parent(v);\n    next_kid(v) = first_kid(dad); \n    first_kid(dad) = v; \n  }\n  \n  // Depth-first search from dummy root vertex #n\n  postnum = 0; \n  internal::nr_etdfs(n, parent, first_kid, next_kid, post, postnum);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // SPARSE_COLETREE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseCompressedBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_COMPRESSED_BASE_H\n#define EIGEN_SPARSE_COMPRESSED_BASE_H\n\nnamespace Eigen { \n\ntemplate<typename Derived> class SparseCompressedBase;\n  \nnamespace internal {\n\ntemplate<typename Derived>\nstruct traits<SparseCompressedBase<Derived> > : traits<Derived>\n{};\n\n} // end namespace internal\n\n/** \\ingroup SparseCore_Module\n  * \\class SparseCompressedBase\n  * \\brief Common base class for sparse [compressed]-{row|column}-storage format.\n  *\n  * This class defines the common interface for all derived classes implementing the compressed sparse storage format, such as:\n  *  - SparseMatrix\n  *  - Ref<SparseMatrixType,Options>\n  *  - Map<SparseMatrixType>\n  *\n  */\ntemplate<typename Derived>\nclass SparseCompressedBase\n  : public SparseMatrixBase<Derived>\n{\n  public:\n    typedef SparseMatrixBase<Derived> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(SparseCompressedBase)\n    using Base::operator=;\n    using Base::IsRowMajor;\n    \n    class InnerIterator;\n    class ReverseInnerIterator;\n    \n  protected:\n    typedef typename Base::IndexVector IndexVector;\n    Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }\n    const  Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }\n        \n  public:\n    \n    /** \\returns the number of non zero coefficients */\n    inline Index nonZeros() const\n    {\n      if(Derived::IsVectorAtCompileTime && outerIndexPtr()==0)\n        return derived().nonZeros();\n      else if(isCompressed())\n        return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0];\n      else if(derived().outerSize()==0)\n        return 0;\n      else\n        return innerNonZeros().sum();\n    }\n    \n    /** \\returns a const pointer to the array of values.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa innerIndexPtr(), outerIndexPtr() */\n    inline const Scalar* valuePtr() const { return derived().valuePtr(); }\n    /** \\returns a non-const pointer to the array of values.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa innerIndexPtr(), outerIndexPtr() */\n    inline Scalar* valuePtr() { return derived().valuePtr(); }\n\n    /** \\returns a const pointer to the array of inner indices.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa valuePtr(), outerIndexPtr() */\n    inline const StorageIndex* innerIndexPtr() const { return derived().innerIndexPtr(); }\n    /** \\returns a non-const pointer to the array of inner indices.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa valuePtr(), outerIndexPtr() */\n    inline StorageIndex* innerIndexPtr() { return derived().innerIndexPtr(); }\n\n    /** \\returns a const pointer to the array of the starting positions of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\warning it returns the null pointer 0 for SparseVector\n      * \\sa valuePtr(), innerIndexPtr() */\n    inline const StorageIndex* outerIndexPtr() const { return derived().outerIndexPtr(); }\n    /** \\returns a non-const pointer to the array of the starting positions of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\warning it returns the null pointer 0 for SparseVector\n      * \\sa valuePtr(), innerIndexPtr() */\n    inline StorageIndex* outerIndexPtr() { return derived().outerIndexPtr(); }\n\n    /** \\returns a const pointer to the array of the number of non zeros of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\warning it returns the null pointer 0 in compressed mode */\n    inline const StorageIndex* innerNonZeroPtr() const { return derived().innerNonZeroPtr(); }\n    /** \\returns a non-const pointer to the array of the number of non zeros of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\warning it returns the null pointer 0 in compressed mode */\n    inline StorageIndex* innerNonZeroPtr() { return derived().innerNonZeroPtr(); }\n    \n    /** \\returns whether \\c *this is in compressed form. */\n    inline bool isCompressed() const { return innerNonZeroPtr()==0; }\n\n    /** \\returns a read-only view of the stored coefficients as a 1D array expression.\n      *\n      * \\warning this method is for \\b compressed \\b storage \\b only, and it will trigger an assertion otherwise.\n      *\n      * \\sa valuePtr(), isCompressed() */\n    const Map<const Array<Scalar,Dynamic,1> > coeffs() const { eigen_assert(isCompressed()); return Array<Scalar,Dynamic,1>::Map(valuePtr(),nonZeros()); }\n\n    /** \\returns a read-write view of the stored coefficients as a 1D array expression\n      *\n      * \\warning this method is for \\b compressed \\b storage \\b only, and it will trigger an assertion otherwise.\n      *\n      * Here is an example:\n      * \\include SparseMatrix_coeffs.cpp\n      * and the output is:\n      * \\include SparseMatrix_coeffs.out\n      *\n      * \\sa valuePtr(), isCompressed() */\n    Map<Array<Scalar,Dynamic,1> > coeffs() { eigen_assert(isCompressed()); return Array<Scalar,Dynamic,1>::Map(valuePtr(),nonZeros()); }\n\n  protected:\n    /** Default constructor. Do nothing. */\n    SparseCompressedBase() {}\n\n    /** \\internal return the index of the coeff at (row,col) or just before if it does not exist.\n      * This is an analogue of std::lower_bound.\n      */\n    internal::LowerBoundIndex lower_bound(Index row, Index col) const\n    {\n      eigen_internal_assert(row>=0 && row<this->rows() && col>=0 && col<this->cols());\n\n      const Index outer = Derived::IsRowMajor ? row : col;\n      const Index inner = Derived::IsRowMajor ? col : row;\n\n      Index start = this->outerIndexPtr()[outer];\n      Index end = this->isCompressed() ? this->outerIndexPtr()[outer+1] : this->outerIndexPtr()[outer] + this->innerNonZeroPtr()[outer];\n      eigen_assert(end>=start && \"you are using a non finalized sparse matrix or written coefficient does not exist\");\n      internal::LowerBoundIndex p;\n      p.value = std::lower_bound(this->innerIndexPtr()+start, this->innerIndexPtr()+end,inner) - this->innerIndexPtr();\n      p.found = (p.value<end) && (this->innerIndexPtr()[p.value]==inner);\n      return p;\n    }\n\n    friend struct internal::evaluator<SparseCompressedBase<Derived> >;\n\n  private:\n    template<typename OtherDerived> explicit SparseCompressedBase(const SparseCompressedBase<OtherDerived>&);\n};\n\ntemplate<typename Derived>\nclass SparseCompressedBase<Derived>::InnerIterator\n{\n  public:\n    InnerIterator()\n      : m_values(0), m_indices(0), m_outer(0), m_id(0), m_end(0)\n    {}\n\n    InnerIterator(const InnerIterator& other)\n      : m_values(other.m_values), m_indices(other.m_indices), m_outer(other.m_outer), m_id(other.m_id), m_end(other.m_end)\n    {}\n\n    InnerIterator& operator=(const InnerIterator& other)\n    {\n      m_values = other.m_values;\n      m_indices = other.m_indices;\n      const_cast<OuterType&>(m_outer).setValue(other.m_outer.value());\n      m_id = other.m_id;\n      m_end = other.m_end;\n      return *this;\n    }\n\n    InnerIterator(const SparseCompressedBase& mat, Index outer)\n      : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer)\n    {\n      if(Derived::IsVectorAtCompileTime && mat.outerIndexPtr()==0)\n      {\n        m_id = 0;\n        m_end = mat.nonZeros();\n      }\n      else\n      {\n        m_id = mat.outerIndexPtr()[outer];\n        if(mat.isCompressed())\n          m_end = mat.outerIndexPtr()[outer+1];\n        else\n          m_end = m_id + mat.innerNonZeroPtr()[outer];\n      }\n    }\n\n    explicit InnerIterator(const SparseCompressedBase& mat)\n      : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_id(0), m_end(mat.nonZeros())\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n    }\n\n    explicit InnerIterator(const internal::CompressedStorage<Scalar,StorageIndex>& data)\n      : m_values(data.valuePtr()), m_indices(data.indexPtr()), m_outer(0), m_id(0), m_end(data.size())\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n    }\n\n    inline InnerIterator& operator++() { m_id++; return *this; }\n    inline InnerIterator& operator+=(Index i) { m_id += i ; return *this; }\n\n    inline InnerIterator operator+(Index i) \n    { \n        InnerIterator result = *this;\n        result += i;\n        return result;\n    }\n\n    inline const Scalar& value() const { return m_values[m_id]; }\n    inline Scalar& valueRef() { return const_cast<Scalar&>(m_values[m_id]); }\n\n    inline StorageIndex index() const { return m_indices[m_id]; }\n    inline Index outer() const { return m_outer.value(); }\n    inline Index row() const { return IsRowMajor ? m_outer.value() : index(); }\n    inline Index col() const { return IsRowMajor ? index() : m_outer.value(); }\n\n    inline operator bool() const { return (m_id < m_end); }\n\n  protected:\n    const Scalar* m_values;\n    const StorageIndex* m_indices;\n    typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;\n    const OuterType m_outer;\n    Index m_id;\n    Index m_end;\n  private:\n    // If you get here, then you're not using the right InnerIterator type, e.g.:\n    //   SparseMatrix<double,RowMajor> A;\n    //   SparseMatrix<double>::InnerIterator it(A,0);\n    template<typename T> InnerIterator(const SparseMatrixBase<T>&, Index outer);\n};\n\ntemplate<typename Derived>\nclass SparseCompressedBase<Derived>::ReverseInnerIterator\n{\n  public:\n    ReverseInnerIterator(const SparseCompressedBase& mat, Index outer)\n      : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer)\n    {\n      if(Derived::IsVectorAtCompileTime && mat.outerIndexPtr()==0)\n      {\n        m_start = 0;\n        m_id = mat.nonZeros();\n      }\n      else\n      {\n        m_start = mat.outerIndexPtr()[outer];\n        if(mat.isCompressed())\n          m_id = mat.outerIndexPtr()[outer+1];\n        else\n          m_id = m_start + mat.innerNonZeroPtr()[outer];\n      }\n    }\n\n    explicit ReverseInnerIterator(const SparseCompressedBase& mat)\n      : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_start(0), m_id(mat.nonZeros())\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n    }\n\n    explicit ReverseInnerIterator(const internal::CompressedStorage<Scalar,StorageIndex>& data)\n      : m_values(data.valuePtr()), m_indices(data.indexPtr()), m_outer(0), m_start(0), m_id(data.size())\n    {\n      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);\n    }\n\n    inline ReverseInnerIterator& operator--() { --m_id; return *this; }\n    inline ReverseInnerIterator& operator-=(Index i) { m_id -= i; return *this; }\n\n    inline ReverseInnerIterator operator-(Index i) \n    {\n        ReverseInnerIterator result = *this;\n        result -= i;\n        return result;\n    }\n\n    inline const Scalar& value() const { return m_values[m_id-1]; }\n    inline Scalar& valueRef() { return const_cast<Scalar&>(m_values[m_id-1]); }\n\n    inline StorageIndex index() const { return m_indices[m_id-1]; }\n    inline Index outer() const { return m_outer.value(); }\n    inline Index row() const { return IsRowMajor ? m_outer.value() : index(); }\n    inline Index col() const { return IsRowMajor ? index() : m_outer.value(); }\n\n    inline operator bool() const { return (m_id > m_start); }\n\n  protected:\n    const Scalar* m_values;\n    const StorageIndex* m_indices;\n    typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;\n    const OuterType m_outer;\n    Index m_start;\n    Index m_id;\n};\n\nnamespace internal {\n\ntemplate<typename Derived>\nstruct evaluator<SparseCompressedBase<Derived> >\n  : evaluator_base<Derived>\n{\n  typedef typename Derived::Scalar Scalar;\n  typedef typename Derived::InnerIterator InnerIterator;\n  \n  enum {\n    CoeffReadCost = NumTraits<Scalar>::ReadCost,\n    Flags = Derived::Flags\n  };\n  \n  evaluator() : m_matrix(0), m_zero(0)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  explicit evaluator(const Derived &mat) : m_matrix(&mat), m_zero(0)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  inline Index nonZerosEstimate() const {\n    return m_matrix->nonZeros();\n  }\n  \n  operator Derived&() { return m_matrix->const_cast_derived(); }\n  operator const Derived&() const { return *m_matrix; }\n  \n  typedef typename DenseCoeffsBase<Derived,ReadOnlyAccessors>::CoeffReturnType CoeffReturnType;\n  const Scalar& coeff(Index row, Index col) const\n  {\n    Index p = find(row,col);\n\n    if(p==Dynamic)\n      return m_zero;\n    else\n      return m_matrix->const_cast_derived().valuePtr()[p];\n  }\n\n  Scalar& coeffRef(Index row, Index col)\n  {\n    Index p = find(row,col);\n    eigen_assert(p!=Dynamic && \"written coefficient does not exist\");\n    return m_matrix->const_cast_derived().valuePtr()[p];\n  }\n\nprotected:\n\n  Index find(Index row, Index col) const\n  {\n    internal::LowerBoundIndex p = m_matrix->lower_bound(row,col);\n    return p.found ? p.value : Dynamic;\n  }\n\n  const Derived *m_matrix;\n  const Scalar m_zero;\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_COMPRESSED_BASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseCwiseBinaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_CWISE_BINARY_OP_H\n#define EIGEN_SPARSE_CWISE_BINARY_OP_H\n\nnamespace Eigen { \n\n// Here we have to handle 3 cases:\n//  1 - sparse op dense\n//  2 - dense op sparse\n//  3 - sparse op sparse\n// We also need to implement a 4th iterator for:\n//  4 - dense op dense\n// Finally, we also need to distinguish between the product and other operations :\n//                configuration      returned mode\n//  1 - sparse op dense    product      sparse\n//                         generic      dense\n//  2 - dense op sparse    product      sparse\n//                         generic      dense\n//  3 - sparse op sparse   product      sparse\n//                         generic      sparse\n//  4 - dense op dense     product      dense\n//                         generic      dense\n//\n// TODO to ease compiler job, we could specialize product/quotient with a scalar\n//      and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op.\n\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nclass CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>\n  : public SparseMatrixBase<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\n  public:\n    typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;\n    typedef SparseMatrixBase<Derived> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)\n    CwiseBinaryOpImpl()\n    {\n      EIGEN_STATIC_ASSERT((\n                (!internal::is_same<typename internal::traits<Lhs>::StorageKind,\n                                    typename internal::traits<Rhs>::StorageKind>::value)\n            ||  ((internal::evaluator<Lhs>::Flags&RowMajorBit) == (internal::evaluator<Rhs>::Flags&RowMajorBit))),\n            THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH);\n    }\n};\n\nnamespace internal {\n\n  \n// Generic \"sparse OP sparse\"\ntemplate<typename XprType> struct binary_sparse_evaluator;\n\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>\n  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\nprotected:\n  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;\n  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;\n  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;\n  typedef typename traits<XprType>::Scalar Scalar;\n  typedef typename XprType::StorageIndex StorageIndex;\npublic:\n\n  class InnerIterator\n  {\n  public:\n    \n    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)\n      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)\n    {\n      this->operator++();\n    }\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    {\n      if (m_lhsIter && m_rhsIter && (m_lhsIter.index() == m_rhsIter.index()))\n      {\n        m_id = m_lhsIter.index();\n        m_value = m_functor(m_lhsIter.value(), m_rhsIter.value());\n        ++m_lhsIter;\n        ++m_rhsIter;\n      }\n      else if (m_lhsIter && (!m_rhsIter || (m_lhsIter.index() < m_rhsIter.index())))\n      {\n        m_id = m_lhsIter.index();\n        m_value = m_functor(m_lhsIter.value(), Scalar(0));\n        ++m_lhsIter;\n      }\n      else if (m_rhsIter && (!m_lhsIter || (m_lhsIter.index() > m_rhsIter.index())))\n      {\n        m_id = m_rhsIter.index();\n        m_value = m_functor(Scalar(0), m_rhsIter.value());\n        ++m_rhsIter;\n      }\n      else\n      {\n        m_value = Scalar(0); // this is to avoid a compilation warning\n        m_id = -1;\n      }\n      return *this;\n    }\n\n    EIGEN_STRONG_INLINE Scalar value() const { return m_value; }\n\n    EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }\n    EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); }\n    EIGEN_STRONG_INLINE Index row() const { return Lhs::IsRowMajor ? m_lhsIter.row() : index(); }\n    EIGEN_STRONG_INLINE Index col() const { return Lhs::IsRowMajor ? index() : m_lhsIter.col(); }\n\n    EIGEN_STRONG_INLINE operator bool() const { return m_id>=0; }\n\n  protected:\n    LhsIterator m_lhsIter;\n    RhsIterator m_rhsIter;\n    const BinaryOp& m_functor;\n    Scalar m_value;\n    StorageIndex m_id;\n  };\n  \n  \n  enum {\n    CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n    Flags = XprType::Flags\n  };\n  \n  explicit binary_evaluator(const XprType& xpr)\n    : m_functor(xpr.functor()),\n      m_lhsImpl(xpr.lhs()), \n      m_rhsImpl(xpr.rhs())  \n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  inline Index nonZerosEstimate() const {\n    return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate();\n  }\n\nprotected:\n  const BinaryOp m_functor;\n  evaluator<Lhs> m_lhsImpl;\n  evaluator<Rhs> m_rhsImpl;\n};\n\n// dense op sparse\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IteratorBased>\n  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\nprotected:\n  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;\n  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;\n  typedef typename traits<XprType>::Scalar Scalar;\n  typedef typename XprType::StorageIndex StorageIndex;\npublic:\n\n  class InnerIterator\n  {\n    enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };\n  public:\n\n    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)\n      : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_value(0), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize())\n    {\n      this->operator++();\n    }\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    {\n      ++m_id;\n      if(m_id<m_innerSize)\n      {\n        Scalar lhsVal = m_lhsEval.coeff(IsRowMajor?m_rhsIter.outer():m_id,\n                                        IsRowMajor?m_id:m_rhsIter.outer());\n        if(m_rhsIter && m_rhsIter.index()==m_id)\n        {\n          m_value = m_functor(lhsVal, m_rhsIter.value());\n          ++m_rhsIter;\n        }\n        else\n          m_value = m_functor(lhsVal, Scalar(0));\n      }\n\n      return *this;\n    }\n\n    EIGEN_STRONG_INLINE Scalar value() const { eigen_internal_assert(m_id<m_innerSize); return m_value; }\n\n    EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }\n    EIGEN_STRONG_INLINE Index outer() const { return m_rhsIter.outer(); }\n    EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_rhsIter.outer() : m_id; }\n    EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_rhsIter.outer(); }\n\n    EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }\n\n  protected:\n    const evaluator<Lhs> &m_lhsEval;\n    RhsIterator m_rhsIter;\n    const BinaryOp& m_functor;\n    Scalar m_value;\n    StorageIndex m_id;\n    StorageIndex m_innerSize;\n  };\n\n\n  enum {\n    CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n    Flags = XprType::Flags\n  };\n\n  explicit binary_evaluator(const XprType& xpr)\n    : m_functor(xpr.functor()),\n      m_lhsImpl(xpr.lhs()),\n      m_rhsImpl(xpr.rhs()),\n      m_expr(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  inline Index nonZerosEstimate() const {\n    return m_expr.size();\n  }\n\nprotected:\n  const BinaryOp m_functor;\n  evaluator<Lhs> m_lhsImpl;\n  evaluator<Rhs> m_rhsImpl;\n  const XprType &m_expr;\n};\n\n// sparse op dense\ntemplate<typename BinaryOp, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IndexBased>\n  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >\n{\nprotected:\n  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;\n  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;\n  typedef typename traits<XprType>::Scalar Scalar;\n  typedef typename XprType::StorageIndex StorageIndex;\npublic:\n\n  class InnerIterator\n  {\n    enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };\n  public:\n\n    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)\n      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_value(0), m_id(-1), m_innerSize(aEval.m_expr.lhs().innerSize())\n    {\n      this->operator++();\n    }\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    {\n      ++m_id;\n      if(m_id<m_innerSize)\n      {\n        Scalar rhsVal = m_rhsEval.coeff(IsRowMajor?m_lhsIter.outer():m_id,\n                                        IsRowMajor?m_id:m_lhsIter.outer());\n        if(m_lhsIter && m_lhsIter.index()==m_id)\n        {\n          m_value = m_functor(m_lhsIter.value(), rhsVal);\n          ++m_lhsIter;\n        }\n        else\n          m_value = m_functor(Scalar(0),rhsVal);\n      }\n\n      return *this;\n    }\n\n    EIGEN_STRONG_INLINE Scalar value() const { eigen_internal_assert(m_id<m_innerSize); return m_value; }\n\n    EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }\n    EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); }\n    EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_lhsIter.outer() : m_id; }\n    EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_lhsIter.outer(); }\n\n    EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }\n\n  protected:\n    LhsIterator m_lhsIter;\n    const evaluator<Rhs> &m_rhsEval;\n    const BinaryOp& m_functor;\n    Scalar m_value;\n    StorageIndex m_id;\n    StorageIndex m_innerSize;\n  };\n\n\n  enum {\n    CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n    Flags = XprType::Flags\n  };\n\n  explicit binary_evaluator(const XprType& xpr)\n    : m_functor(xpr.functor()),\n      m_lhsImpl(xpr.lhs()),\n      m_rhsImpl(xpr.rhs()),\n      m_expr(xpr)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n\n  inline Index nonZerosEstimate() const {\n    return m_expr.size();\n  }\n\nprotected:\n  const BinaryOp m_functor;\n  evaluator<Lhs> m_lhsImpl;\n  evaluator<Rhs> m_rhsImpl;\n  const XprType &m_expr;\n};\n\ntemplate<typename T,\n         typename LhsKind   = typename evaluator_traits<typename T::Lhs>::Kind,\n         typename RhsKind   = typename evaluator_traits<typename T::Rhs>::Kind,\n         typename LhsScalar = typename traits<typename T::Lhs>::Scalar,\n         typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct sparse_conjunction_evaluator;\n\n// \"sparse .* sparse\"\ntemplate<typename T1, typename T2, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IteratorBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n// \"dense .* sparse\"\ntemplate<typename T1, typename T2, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IndexBased, IteratorBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n// \"sparse .* dense\"\ntemplate<typename T1, typename T2, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n\n// \"sparse ./ dense\"\ntemplate<typename T1, typename T2, typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_quotient_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_quotient_op<T1,T2>, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_quotient_op<T1,T2>, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n\n// \"sparse && sparse\"\ntemplate<typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IteratorBased, IteratorBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n// \"dense && sparse\"\ntemplate<typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IndexBased, IteratorBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n// \"sparse && dense\"\ntemplate<typename Lhs, typename Rhs>\nstruct binary_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs>, IteratorBased, IndexBased>\n  : sparse_conjunction_evaluator<CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> >\n{\n  typedef CwiseBinaryOp<scalar_boolean_and_op, Lhs, Rhs> XprType;\n  typedef sparse_conjunction_evaluator<XprType> Base;\n  explicit binary_evaluator(const XprType& xpr) : Base(xpr) {}\n};\n\n// \"sparse ^ sparse\"\ntemplate<typename XprType>\nstruct sparse_conjunction_evaluator<XprType, IteratorBased, IteratorBased>\n  : evaluator_base<XprType>\n{\nprotected:\n  typedef typename XprType::Functor BinaryOp;\n  typedef typename XprType::Lhs LhsArg;\n  typedef typename XprType::Rhs RhsArg;\n  typedef typename evaluator<LhsArg>::InnerIterator  LhsIterator;\n  typedef typename evaluator<RhsArg>::InnerIterator  RhsIterator;\n  typedef typename XprType::StorageIndex StorageIndex;\n  typedef typename traits<XprType>::Scalar Scalar;\npublic:\n\n  class InnerIterator\n  {\n  public:\n    \n    EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)\n      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)\n    {\n      while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index()))\n      {\n        if (m_lhsIter.index() < m_rhsIter.index())\n          ++m_lhsIter;\n        else\n          ++m_rhsIter;\n      }\n    }\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    {\n      ++m_lhsIter;\n      ++m_rhsIter;\n      while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index()))\n      {\n        if (m_lhsIter.index() < m_rhsIter.index())\n          ++m_lhsIter;\n        else\n          ++m_rhsIter;\n      }\n      return *this;\n    }\n    \n    EIGEN_STRONG_INLINE Scalar value() const { return m_functor(m_lhsIter.value(), m_rhsIter.value()); }\n\n    EIGEN_STRONG_INLINE StorageIndex index() const { return m_lhsIter.index(); }\n    EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); }\n    EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); }\n    EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }\n\n    EIGEN_STRONG_INLINE operator bool() const { return (m_lhsIter && m_rhsIter); }\n\n  protected:\n    LhsIterator m_lhsIter;\n    RhsIterator m_rhsIter;\n    const BinaryOp& m_functor;\n  };\n  \n  \n  enum {\n    CoeffReadCost = int(evaluator<LhsArg>::CoeffReadCost) + int(evaluator<RhsArg>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n    Flags = XprType::Flags\n  };\n  \n  explicit sparse_conjunction_evaluator(const XprType& xpr)\n    : m_functor(xpr.functor()),\n      m_lhsImpl(xpr.lhs()), \n      m_rhsImpl(xpr.rhs())  \n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  inline Index nonZerosEstimate() const {\n    return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate());\n  }\n\nprotected:\n  const BinaryOp m_functor;\n  evaluator<LhsArg> m_lhsImpl;\n  evaluator<RhsArg> m_rhsImpl;\n};\n\n// \"dense ^ sparse\"\ntemplate<typename XprType>\nstruct sparse_conjunction_evaluator<XprType, IndexBased, IteratorBased>\n  : evaluator_base<XprType>\n{\nprotected:\n  typedef typename XprType::Functor BinaryOp;\n  typedef typename XprType::Lhs LhsArg;\n  typedef typename XprType::Rhs RhsArg;\n  typedef evaluator<LhsArg> LhsEvaluator;\n  typedef typename evaluator<RhsArg>::InnerIterator  RhsIterator;\n  typedef typename XprType::StorageIndex StorageIndex;\n  typedef typename traits<XprType>::Scalar Scalar;\npublic:\n\n  class InnerIterator\n  {\n    enum { IsRowMajor = (int(RhsArg::Flags)&RowMajorBit)==RowMajorBit };\n\n  public:\n    \n    EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)\n      : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_outer(outer)\n    {}\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    {\n      ++m_rhsIter;\n      return *this;\n    }\n\n    EIGEN_STRONG_INLINE Scalar value() const\n    { return m_functor(m_lhsEval.coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); }\n\n    EIGEN_STRONG_INLINE StorageIndex index() const { return m_rhsIter.index(); }\n    EIGEN_STRONG_INLINE Index outer() const { return m_rhsIter.outer(); }\n    EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); }\n    EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }\n\n    EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }\n    \n  protected:\n    const LhsEvaluator &m_lhsEval;\n    RhsIterator m_rhsIter;\n    const BinaryOp& m_functor;\n    const Index m_outer;\n  };\n  \n  \n  enum {\n    CoeffReadCost = int(evaluator<LhsArg>::CoeffReadCost) + int(evaluator<RhsArg>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n    Flags = XprType::Flags\n  };\n  \n  explicit sparse_conjunction_evaluator(const XprType& xpr)\n    : m_functor(xpr.functor()),\n      m_lhsImpl(xpr.lhs()), \n      m_rhsImpl(xpr.rhs())  \n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  inline Index nonZerosEstimate() const {\n    return m_rhsImpl.nonZerosEstimate();\n  }\n\nprotected:\n  const BinaryOp m_functor;\n  evaluator<LhsArg> m_lhsImpl;\n  evaluator<RhsArg> m_rhsImpl;\n};\n\n// \"sparse ^ dense\"\ntemplate<typename XprType>\nstruct sparse_conjunction_evaluator<XprType, IteratorBased, IndexBased>\n  : evaluator_base<XprType>\n{\nprotected:\n  typedef typename XprType::Functor BinaryOp;\n  typedef typename XprType::Lhs LhsArg;\n  typedef typename XprType::Rhs RhsArg;\n  typedef typename evaluator<LhsArg>::InnerIterator LhsIterator;\n  typedef evaluator<RhsArg> RhsEvaluator;\n  typedef typename XprType::StorageIndex StorageIndex;\n  typedef typename traits<XprType>::Scalar Scalar;\npublic:\n\n  class InnerIterator\n  {\n    enum { IsRowMajor = (int(LhsArg::Flags)&RowMajorBit)==RowMajorBit };\n\n  public:\n    \n    EIGEN_STRONG_INLINE InnerIterator(const sparse_conjunction_evaluator& aEval, Index outer)\n      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_outer(outer)\n    {}\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    {\n      ++m_lhsIter;\n      return *this;\n    }\n\n    EIGEN_STRONG_INLINE Scalar value() const\n    { return m_functor(m_lhsIter.value(),\n                       m_rhsEval.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }\n\n    EIGEN_STRONG_INLINE StorageIndex index() const { return m_lhsIter.index(); }\n    EIGEN_STRONG_INLINE Index outer() const { return m_lhsIter.outer(); }\n    EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); }\n    EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }\n\n    EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }\n    \n  protected:\n    LhsIterator m_lhsIter;\n    const evaluator<RhsArg> &m_rhsEval;\n    const BinaryOp& m_functor;\n    const Index m_outer;\n  };\n  \n  \n  enum {\n    CoeffReadCost = int(evaluator<LhsArg>::CoeffReadCost) + int(evaluator<RhsArg>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),\n    Flags = XprType::Flags\n  };\n  \n  explicit sparse_conjunction_evaluator(const XprType& xpr)\n    : m_functor(xpr.functor()),\n      m_lhsImpl(xpr.lhs()), \n      m_rhsImpl(xpr.rhs())  \n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  inline Index nonZerosEstimate() const {\n    return m_lhsImpl.nonZerosEstimate();\n  }\n\nprotected:\n  const BinaryOp m_functor;\n  evaluator<LhsArg> m_lhsImpl;\n  evaluator<RhsArg> m_rhsImpl;\n};\n\n}\n\n/***************************************************************************\n* Implementation of SparseMatrixBase and SparseCwise functions/operators\n***************************************************************************/\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nDerived& SparseMatrixBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nDerived& SparseMatrixBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)\n{\n  call_assignment(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_STRONG_INLINE Derived &\nSparseMatrixBase<Derived>::operator-=(const SparseMatrixBase<OtherDerived> &other)\n{\n  return derived() = derived() - other.derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_STRONG_INLINE Derived &\nSparseMatrixBase<Derived>::operator+=(const SparseMatrixBase<OtherDerived>& other)\n{\n  return derived() = derived() + other.derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nDerived& SparseMatrixBase<Derived>::operator+=(const DiagonalBase<OtherDerived>& other)\n{\n  call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nDerived& SparseMatrixBase<Derived>::operator-=(const DiagonalBase<OtherDerived>& other)\n{\n  call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());\n  return derived();\n}\n    \ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nEIGEN_STRONG_INLINE const typename SparseMatrixBase<Derived>::template CwiseProductDenseReturnType<OtherDerived>::Type\nSparseMatrixBase<Derived>::cwiseProduct(const MatrixBase<OtherDerived> &other) const\n{\n  return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived());\n}\n\ntemplate<typename DenseDerived, typename SparseDerived>\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>\noperator+(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)\n{\n  return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());\n}\n\ntemplate<typename SparseDerived, typename DenseDerived>\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>\noperator+(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)\n{\n  return CwiseBinaryOp<internal::scalar_sum_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());\n}\n\ntemplate<typename DenseDerived, typename SparseDerived>\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>\noperator-(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)\n{\n  return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());\n}\n\ntemplate<typename SparseDerived, typename DenseDerived>\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>\noperator-(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)\n{\n  return CwiseBinaryOp<internal::scalar_difference_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_CWISE_BINARY_OP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseCwiseUnaryOp.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_CWISE_UNARY_OP_H\n#define EIGEN_SPARSE_CWISE_UNARY_OP_H\n\nnamespace Eigen { \n\nnamespace internal {\n  \ntemplate<typename UnaryOp, typename ArgType>\nstruct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>\n  : public evaluator_base<CwiseUnaryOp<UnaryOp,ArgType> >\n{\n  public:\n    typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;\n\n    class InnerIterator;\n    \n    enum {\n      CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),\n      Flags = XprType::Flags\n    };\n    \n    explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression())\n    {\n      EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);\n      EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n    }\n    \n    inline Index nonZerosEstimate() const {\n      return m_argImpl.nonZerosEstimate();\n    }\n\n  protected:\n    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;\n    \n    const UnaryOp m_functor;\n    evaluator<ArgType> m_argImpl;\n};\n\ntemplate<typename UnaryOp, typename ArgType>\nclass unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator\n    : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator\n{\n  protected:\n    typedef typename XprType::Scalar Scalar;\n    typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base;\n  public:\n\n    EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, Index outer)\n      : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor)\n    {}\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    { Base::operator++(); return *this; }\n\n    EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }\n\n  protected:\n    const UnaryOp m_functor;\n  private:\n    Scalar& valueRef();\n};\n\ntemplate<typename ViewOp, typename ArgType>\nstruct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>\n  : public evaluator_base<CwiseUnaryView<ViewOp,ArgType> >\n{\n  public:\n    typedef CwiseUnaryView<ViewOp, ArgType> XprType;\n\n    class InnerIterator;\n    \n    enum {\n      CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<ViewOp>::Cost),\n      Flags = XprType::Flags\n    };\n    \n    explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression())\n    {\n      EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<ViewOp>::Cost);\n      EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n    }\n\n  protected:\n    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;\n    \n    const ViewOp m_functor;\n    evaluator<ArgType> m_argImpl;\n};\n\ntemplate<typename ViewOp, typename ArgType>\nclass unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator\n    : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator\n{\n  protected:\n    typedef typename XprType::Scalar Scalar;\n    typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base;\n  public:\n\n    EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, Index outer)\n      : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor)\n    {}\n\n    EIGEN_STRONG_INLINE InnerIterator& operator++()\n    { Base::operator++(); return *this; }\n\n    EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }\n    EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }\n\n  protected:\n    const ViewOp m_functor;\n};\n\n} // end namespace internal\n\ntemplate<typename Derived>\nEIGEN_STRONG_INLINE Derived&\nSparseMatrixBase<Derived>::operator*=(const Scalar& other)\n{\n  typedef typename internal::evaluator<Derived>::InnerIterator EvalIterator;\n  internal::evaluator<Derived> thisEval(derived());\n  for (Index j=0; j<outerSize(); ++j)\n    for (EvalIterator i(thisEval,j); i; ++i)\n      i.valueRef() *= other;\n  return derived();\n}\n\ntemplate<typename Derived>\nEIGEN_STRONG_INLINE Derived&\nSparseMatrixBase<Derived>::operator/=(const Scalar& other)\n{\n  typedef typename internal::evaluator<Derived>::InnerIterator EvalIterator;\n  internal::evaluator<Derived> thisEval(derived());\n  for (Index j=0; j<outerSize(); ++j)\n    for (EvalIterator i(thisEval,j); i; ++i)\n      i.valueRef() /= other;\n  return derived();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_CWISE_UNARY_OP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseDenseProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEDENSEPRODUCT_H\n#define EIGEN_SPARSEDENSEPRODUCT_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };\ntemplate <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };\n\ntemplate<typename SparseLhsType, typename DenseRhsType, typename DenseResType,\n         typename AlphaType,\n         int LhsStorageOrder = ((SparseLhsType::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor,\n         bool ColPerCol = ((DenseRhsType::Flags&RowMajorBit)==0) || DenseRhsType::ColsAtCompileTime==1>\nstruct sparse_time_dense_product_impl;\n\ntemplate<typename SparseLhsType, typename DenseRhsType, typename DenseResType>\nstruct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, true>\n{\n  typedef typename internal::remove_all<SparseLhsType>::type Lhs;\n  typedef typename internal::remove_all<DenseRhsType>::type Rhs;\n  typedef typename internal::remove_all<DenseResType>::type Res;\n  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;\n  typedef evaluator<Lhs> LhsEval;\n  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)\n  {\n    LhsEval lhsEval(lhs);\n    \n    Index n = lhs.outerSize();\n#ifdef EIGEN_HAS_OPENMP\n    Eigen::initParallel();\n    Index threads = Eigen::nbThreads();\n#endif\n    \n    for(Index c=0; c<rhs.cols(); ++c)\n    {\n#ifdef EIGEN_HAS_OPENMP\n      // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.\n      // It basically represents the minimal amount of work to be done to be worth it.\n      if(threads>1 && lhsEval.nonZerosEstimate() > 20000)\n      {\n        #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)\n        for(Index i=0; i<n; ++i)\n          processRow(lhsEval,rhs,res,alpha,i,c);\n      }\n      else\n#endif\n      {\n        for(Index i=0; i<n; ++i)\n          processRow(lhsEval,rhs,res,alpha,i,c);\n      }\n    }\n  }\n  \n  static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)\n  {\n    typename Res::Scalar tmp(0);\n    for(LhsInnerIterator it(lhsEval,i); it ;++it)\n      tmp += it.value() * rhs.coeff(it.index(),col);\n    res.coeffRef(i,col) += alpha * tmp;\n  }\n  \n};\n\n// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?\n// -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators\n// template<typename T1, typename T2/*, int _Options, typename _StrideType*/>\n// struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >\n// {\n//   enum {\n//     Defined = 1\n//   };\n//   typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;\n// };\n\ntemplate<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>\nstruct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType, ColMajor, true>\n{\n  typedef typename internal::remove_all<SparseLhsType>::type Lhs;\n  typedef typename internal::remove_all<DenseRhsType>::type Rhs;\n  typedef typename internal::remove_all<DenseResType>::type Res;\n  typedef evaluator<Lhs> LhsEval;\n  typedef typename LhsEval::InnerIterator LhsInnerIterator;\n  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)\n  {\n    LhsEval lhsEval(lhs);\n    for(Index c=0; c<rhs.cols(); ++c)\n    {\n      for(Index j=0; j<lhs.outerSize(); ++j)\n      {\n//        typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);\n        typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));\n        for(LhsInnerIterator it(lhsEval,j); it ;++it)\n          res.coeffRef(it.index(),c) += it.value() * rhs_j;\n      }\n    }\n  }\n};\n\ntemplate<typename SparseLhsType, typename DenseRhsType, typename DenseResType>\nstruct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, false>\n{\n  typedef typename internal::remove_all<SparseLhsType>::type Lhs;\n  typedef typename internal::remove_all<DenseRhsType>::type Rhs;\n  typedef typename internal::remove_all<DenseResType>::type Res;\n  typedef evaluator<Lhs> LhsEval;\n  typedef typename LhsEval::InnerIterator LhsInnerIterator;\n  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)\n  {\n    Index n = lhs.rows();\n    LhsEval lhsEval(lhs);\n\n#ifdef EIGEN_HAS_OPENMP\n    Eigen::initParallel();\n    Index threads = Eigen::nbThreads();\n    // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.\n    // It basically represents the minimal amount of work to be done to be worth it.\n    if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)\n    {\n      #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)\n      for(Index i=0; i<n; ++i)\n        processRow(lhsEval,rhs,res,alpha,i);\n    }\n    else\n#endif\n    {\n      for(Index i=0; i<n; ++i)\n        processRow(lhsEval, rhs, res, alpha, i);\n    }\n  }\n\n  static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)\n  {\n    typename Res::RowXpr res_i(res.row(i));\n    for(LhsInnerIterator it(lhsEval,i); it ;++it)\n      res_i += (alpha*it.value()) * rhs.row(it.index());\n  }\n};\n\ntemplate<typename SparseLhsType, typename DenseRhsType, typename DenseResType>\nstruct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, ColMajor, false>\n{\n  typedef typename internal::remove_all<SparseLhsType>::type Lhs;\n  typedef typename internal::remove_all<DenseRhsType>::type Rhs;\n  typedef typename internal::remove_all<DenseResType>::type Res;\n  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;\n  static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)\n  {\n    evaluator<Lhs> lhsEval(lhs);\n    for(Index j=0; j<lhs.outerSize(); ++j)\n    {\n      typename Rhs::ConstRowXpr rhs_j(rhs.row(j));\n      for(LhsInnerIterator it(lhsEval,j); it ;++it)\n        res.row(it.index()) += (alpha*it.value()) * rhs_j;\n    }\n  }\n};\n\ntemplate<typename SparseLhsType, typename DenseRhsType, typename DenseResType,typename AlphaType>\ninline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)\n{\n  sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType>::run(lhs, rhs, res, alpha);\n}\n\n} // end namespace internal\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>\n : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SparseShape,DenseShape,ProductType> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  \n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;\n    typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==0) ? 1 : Dynamic>::type RhsNested;\n    LhsNested lhsNested(lhs);\n    RhsNested rhsNested(rhs);\n    internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>\n  : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>\n{};\n\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>\n  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SparseShape,ProductType> >\n{\n  typedef typename Product<Lhs,Rhs>::Scalar Scalar;\n  \n  template<typename Dst>\n  static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)\n  {\n    typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? Dynamic : 1>::type LhsNested;\n    typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type RhsNested;\n    LhsNested lhsNested(lhs);\n    RhsNested rhsNested(rhs);\n    \n    // transpose everything\n    Transpose<Dst> dstT(dst);\n    internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>\n  : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>\n{};\n\ntemplate<typename LhsT, typename RhsT, bool NeedToTranspose>\nstruct sparse_dense_outer_product_evaluator\n{\nprotected:\n  typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;\n  typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;\n  typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;\n  \n  // if the actual left-hand side is a dense vector,\n  // then build a sparse-view so that we can seamlessly iterate over it.\n  typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,\n            Lhs1, SparseView<Lhs1> >::type ActualLhs;\n  typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,\n            Lhs1 const&, SparseView<Lhs1> >::type LhsArg;\n            \n  typedef evaluator<ActualLhs> LhsEval;\n  typedef evaluator<ActualRhs> RhsEval;\n  typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;\n  typedef typename ProdXprType::Scalar Scalar;\n  \npublic:\n  enum {\n    Flags = NeedToTranspose ? RowMajorBit : 0,\n    CoeffReadCost = HugeCost\n  };\n  \n  class InnerIterator : public LhsIterator\n  {\n  public:\n    InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)\n      : LhsIterator(xprEval.m_lhsXprImpl, 0),\n        m_outer(outer),\n        m_empty(false),\n        m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))\n    {}\n    \n    EIGEN_STRONG_INLINE Index outer() const { return m_outer; }\n    EIGEN_STRONG_INLINE Index row()   const { return NeedToTranspose ? m_outer : LhsIterator::index(); }\n    EIGEN_STRONG_INLINE Index col()   const { return NeedToTranspose ? LhsIterator::index() : m_outer; }\n\n    EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }\n    EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }\n    \n  protected:\n    Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const\n    {\n      return rhs.coeff(outer);\n    }\n    \n    Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())\n    {\n      typename RhsEval::InnerIterator it(rhs, outer);\n      if (it && it.index()==0 && it.value()!=Scalar(0))\n        return it.value();\n      m_empty = true;\n      return Scalar(0);\n    }\n    \n    Index m_outer;\n    bool m_empty;\n    Scalar m_factor;\n  };\n  \n  sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)\n     : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  // transpose case\n  sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)\n     : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n    \nprotected:\n  const LhsArg m_lhs;\n  evaluator<ActualLhs> m_lhsXprImpl;\n  evaluator<ActualRhs> m_rhsXprImpl;\n};\n\n// sparse * dense outer product\ntemplate<typename Lhs, typename Rhs>\nstruct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape>\n  : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>\n{\n  typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;\n  \n  typedef Product<Lhs, Rhs> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n\n  explicit product_evaluator(const XprType& xpr)\n    : Base(xpr.lhs(), xpr.rhs())\n  {}\n  \n};\n\ntemplate<typename Lhs, typename Rhs>\nstruct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape>\n  : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>\n{\n  typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;\n  \n  typedef Product<Lhs, Rhs> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n\n  explicit product_evaluator(const XprType& xpr)\n    : Base(xpr.lhs(), xpr.rhs())\n  {}\n  \n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEDENSEPRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseDiagonalProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_DIAGONAL_PRODUCT_H\n#define EIGEN_SPARSE_DIAGONAL_PRODUCT_H\n\nnamespace Eigen { \n\n// The product of a diagonal matrix with a sparse matrix can be easily\n// implemented using expression template.\n// We have two consider very different cases:\n// 1 - diag * row-major sparse\n//     => each inner vector <=> scalar * sparse vector product\n//     => so we can reuse CwiseUnaryOp::InnerIterator\n// 2 - diag * col-major sparse\n//     => each inner vector <=> densevector * sparse vector cwise product\n//     => again, we can reuse specialization of CwiseBinaryOp::InnerIterator\n//        for that particular case\n// The two other cases are symmetric.\n\nnamespace internal {\n\nenum {\n  SDP_AsScalarProduct,\n  SDP_AsCwiseProduct\n};\n  \ntemplate<typename SparseXprType, typename DiagonalCoeffType, int SDP_Tag>\nstruct sparse_diagonal_product_evaluator;\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DiagonalShape, SparseShape>\n  : public sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct>\n{\n  typedef Product<Lhs, Rhs, DefaultProduct> XprType;\n  enum { CoeffReadCost = HugeCost, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags\n  \n  typedef sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> Base;\n  explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, DiagonalShape>\n  : public sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct>\n{\n  typedef Product<Lhs, Rhs, DefaultProduct> XprType;\n  enum { CoeffReadCost = HugeCost, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags\n  \n  typedef sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base;\n  explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {}\n};\n\ntemplate<typename SparseXprType, typename DiagonalCoeffType>\nstruct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct>\n{\nprotected:\n  typedef typename evaluator<SparseXprType>::InnerIterator SparseXprInnerIterator;\n  typedef typename SparseXprType::Scalar Scalar;\n  \npublic:\n  class InnerIterator : public SparseXprInnerIterator\n  {\n  public:\n    InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)\n      : SparseXprInnerIterator(xprEval.m_sparseXprImpl, outer),\n        m_coeff(xprEval.m_diagCoeffImpl.coeff(outer))\n    {}\n    \n    EIGEN_STRONG_INLINE Scalar value() const { return m_coeff * SparseXprInnerIterator::value(); }\n  protected:\n    typename DiagonalCoeffType::Scalar m_coeff;\n  };\n  \n  sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagonalCoeffType &diagCoeff)\n    : m_sparseXprImpl(sparseXpr), m_diagCoeffImpl(diagCoeff)\n  {}\n\n  Index nonZerosEstimate() const { return m_sparseXprImpl.nonZerosEstimate(); }\n    \nprotected:\n  evaluator<SparseXprType> m_sparseXprImpl;\n  evaluator<DiagonalCoeffType> m_diagCoeffImpl;\n};\n\n\ntemplate<typename SparseXprType, typename DiagCoeffType>\nstruct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct>\n{\n  typedef typename SparseXprType::Scalar Scalar;\n  typedef typename SparseXprType::StorageIndex StorageIndex;\n  \n  typedef typename nested_eval<DiagCoeffType,SparseXprType::IsRowMajor ? SparseXprType::RowsAtCompileTime\n                                                                       : SparseXprType::ColsAtCompileTime>::type DiagCoeffNested;\n  \n  class InnerIterator\n  {\n    typedef typename evaluator<SparseXprType>::InnerIterator SparseXprIter;\n  public:\n    InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)\n      : m_sparseIter(xprEval.m_sparseXprEval, outer), m_diagCoeffNested(xprEval.m_diagCoeffNested)\n    {}\n    \n    inline Scalar value() const { return m_sparseIter.value() * m_diagCoeffNested.coeff(index()); }\n    inline StorageIndex index() const  { return m_sparseIter.index(); }\n    inline Index outer() const  { return m_sparseIter.outer(); }\n    inline Index col() const    { return SparseXprType::IsRowMajor ? m_sparseIter.index() : m_sparseIter.outer(); }\n    inline Index row() const    { return SparseXprType::IsRowMajor ? m_sparseIter.outer() : m_sparseIter.index(); }\n    \n    EIGEN_STRONG_INLINE InnerIterator& operator++() { ++m_sparseIter; return *this; }\n    inline operator bool() const  { return m_sparseIter; }\n    \n  protected:\n    SparseXprIter m_sparseIter;\n    DiagCoeffNested m_diagCoeffNested;\n  };\n  \n  sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagCoeffType &diagCoeff)\n    : m_sparseXprEval(sparseXpr), m_diagCoeffNested(diagCoeff)\n  {}\n\n  Index nonZerosEstimate() const { return m_sparseXprEval.nonZerosEstimate(); }\n    \nprotected:\n  evaluator<SparseXprType> m_sparseXprEval;\n  DiagCoeffNested m_diagCoeffNested;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_DIAGONAL_PRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseDot.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_DOT_H\n#define EIGEN_SPARSE_DOT_H\n\nnamespace Eigen { \n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ntypename internal::traits<Derived>::Scalar\nSparseMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)\n  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),\n    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n\n  eigen_assert(size() == other.size());\n  eigen_assert(other.size()>0 && \"you are using a non initialized vector\");\n\n  internal::evaluator<Derived> thisEval(derived());\n  typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);\n  Scalar res(0);\n  while (i)\n  {\n    res += numext::conj(i.value()) * other.coeff(i.index());\n    ++i;\n  }\n  return res;\n}\n\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ntypename internal::traits<Derived>::Scalar\nSparseMatrixBase<Derived>::dot(const SparseMatrixBase<OtherDerived>& other) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)\n  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)\n  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),\n    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n\n  eigen_assert(size() == other.size());\n\n  internal::evaluator<Derived> thisEval(derived());\n  typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);\n  \n  internal::evaluator<OtherDerived>  otherEval(other.derived());\n  typename internal::evaluator<OtherDerived>::InnerIterator j(otherEval, 0);\n\n  Scalar res(0);\n  while (i && j)\n  {\n    if (i.index()==j.index())\n    {\n      res += numext::conj(i.value()) * j.value();\n      ++i; ++j;\n    }\n    else if (i.index()<j.index())\n      ++i;\n    else\n      ++j;\n  }\n  return res;\n}\n\ntemplate<typename Derived>\ninline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\nSparseMatrixBase<Derived>::squaredNorm() const\n{\n  return numext::real((*this).cwiseAbs2().sum());\n}\n\ntemplate<typename Derived>\ninline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\nSparseMatrixBase<Derived>::norm() const\n{\n  using std::sqrt;\n  return sqrt(squaredNorm());\n}\n\ntemplate<typename Derived>\ninline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real\nSparseMatrixBase<Derived>::blueNorm() const\n{\n  return internal::blueNorm_impl(*this);\n}\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_DOT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseFuzzy.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_FUZZY_H\n#define EIGEN_SPARSE_FUZZY_H\n\nnamespace Eigen {\n  \ntemplate<typename Derived>\ntemplate<typename OtherDerived>\nbool SparseMatrixBase<Derived>::isApprox(const SparseMatrixBase<OtherDerived>& other, const RealScalar &prec) const\n{\n  const typename internal::nested_eval<Derived,2,PlainObject>::type actualA(derived());\n  typename internal::conditional<bool(IsRowMajor)==bool(OtherDerived::IsRowMajor),\n    const typename internal::nested_eval<OtherDerived,2,PlainObject>::type,\n    const PlainObject>::type actualB(other.derived());\n\n  return (actualA - actualB).squaredNorm() <= prec * prec * numext::mini(actualA.squaredNorm(), actualB.squaredNorm());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_FUZZY_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseMap.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_MAP_H\n#define EIGEN_SPARSE_MAP_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct traits<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : public traits<SparseMatrix<MatScalar,MatOptions,MatIndex> >\n{\n  typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;\n  typedef traits<PlainObjectType> TraitsBase;\n  enum {\n    Flags = TraitsBase::Flags & (~NestByRefBit)\n  };\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct traits<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : public traits<SparseMatrix<MatScalar,MatOptions,MatIndex> >\n{\n  typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;\n  typedef traits<PlainObjectType> TraitsBase;\n  enum {\n    Flags = TraitsBase::Flags & (~ (NestByRefBit | LvalueBit))\n  };\n};\n\n} // end namespace internal\n\ntemplate<typename Derived,\n         int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors\n> class SparseMapBase;\n\n/** \\ingroup SparseCore_Module\n  * class SparseMapBase\n  * \\brief Common base class for Map and Ref instance of sparse matrix and vector.\n  */\ntemplate<typename Derived>\nclass SparseMapBase<Derived,ReadOnlyAccessors>\n  : public SparseCompressedBase<Derived>\n{\n  public:\n    typedef SparseCompressedBase<Derived> Base;\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::StorageIndex StorageIndex;\n    enum { IsRowMajor = Base::IsRowMajor };\n    using Base::operator=;\n  protected:\n    \n    typedef typename internal::conditional<\n                         bool(internal::is_lvalue<Derived>::value),\n                         Scalar *, const Scalar *>::type ScalarPointer;\n    typedef typename internal::conditional<\n                         bool(internal::is_lvalue<Derived>::value),\n                         StorageIndex *, const StorageIndex *>::type IndexPointer;\n\n    Index   m_outerSize;\n    Index   m_innerSize;\n    Array<StorageIndex,2,1>  m_zero_nnz;\n    IndexPointer  m_outerIndex;\n    IndexPointer  m_innerIndices;\n    ScalarPointer m_values;\n    IndexPointer  m_innerNonZeros;\n\n  public:\n\n    /** \\copydoc SparseMatrixBase::rows() */\n    inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; }\n    /** \\copydoc SparseMatrixBase::cols() */\n    inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }\n    /** \\copydoc SparseMatrixBase::innerSize() */\n    inline Index innerSize() const { return m_innerSize; }\n    /** \\copydoc SparseMatrixBase::outerSize() */\n    inline Index outerSize() const { return m_outerSize; }\n    /** \\copydoc SparseCompressedBase::nonZeros */\n    inline Index nonZeros() const { return m_zero_nnz[1]; }\n    \n    /** \\copydoc SparseCompressedBase::isCompressed */\n    bool isCompressed() const { return m_innerNonZeros==0; }\n\n    //----------------------------------------\n    // direct access interface\n    /** \\copydoc SparseMatrix::valuePtr */\n    inline const Scalar* valuePtr() const { return m_values; }\n    /** \\copydoc SparseMatrix::innerIndexPtr */\n    inline const StorageIndex* innerIndexPtr() const { return m_innerIndices; }\n    /** \\copydoc SparseMatrix::outerIndexPtr */\n    inline const StorageIndex* outerIndexPtr() const { return m_outerIndex; }\n    /** \\copydoc SparseMatrix::innerNonZeroPtr */\n    inline const StorageIndex* innerNonZeroPtr() const { return m_innerNonZeros; }\n    //----------------------------------------\n\n    /** \\copydoc SparseMatrix::coeff */\n    inline Scalar coeff(Index row, Index col) const\n    {\n      const Index outer = IsRowMajor ? row : col;\n      const Index inner = IsRowMajor ? col : row;\n\n      Index start = m_outerIndex[outer];\n      Index end = isCompressed() ? m_outerIndex[outer+1] : start + m_innerNonZeros[outer];\n      if (start==end)\n        return Scalar(0);\n      else if (end>0 && inner==m_innerIndices[end-1])\n        return m_values[end-1];\n      // ^^  optimization: let's first check if it is the last coefficient\n      // (very common in high level algorithms)\n\n      const StorageIndex* r = std::lower_bound(&m_innerIndices[start],&m_innerIndices[end-1],inner);\n      const Index id = r-&m_innerIndices[0];\n      return ((*r==inner) && (id<end)) ? m_values[id] : Scalar(0);\n    }\n\n    inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr,\n                              ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0)\n      : m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_zero_nnz(0,internal::convert_index<StorageIndex>(nnz)), m_outerIndex(outerIndexPtr),\n        m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(innerNonZerosPtr)\n    {}\n\n    // for vectors\n    inline SparseMapBase(Index size, Index nnz, IndexPointer innerIndexPtr, ScalarPointer valuePtr)\n      : m_outerSize(1), m_innerSize(size), m_zero_nnz(0,internal::convert_index<StorageIndex>(nnz)), m_outerIndex(m_zero_nnz.data()),\n        m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(0)\n    {}\n\n    /** Empty destructor */\n    inline ~SparseMapBase() {}\n\n  protected:\n    inline SparseMapBase() {}\n};\n\n/** \\ingroup SparseCore_Module\n  * class SparseMapBase\n  * \\brief Common base class for writable Map and Ref instance of sparse matrix and vector.\n  */\ntemplate<typename Derived>\nclass SparseMapBase<Derived,WriteAccessors>\n  : public SparseMapBase<Derived,ReadOnlyAccessors>\n{\n    typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;\n    \n  public:\n    typedef SparseMapBase<Derived, ReadOnlyAccessors> Base;\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::StorageIndex StorageIndex;\n    enum { IsRowMajor = Base::IsRowMajor };\n    \n    using Base::operator=;\n\n  public:\n    \n    //----------------------------------------\n    // direct access interface\n    using Base::valuePtr;\n    using Base::innerIndexPtr;\n    using Base::outerIndexPtr;\n    using Base::innerNonZeroPtr;\n    /** \\copydoc SparseMatrix::valuePtr */\n    inline Scalar* valuePtr()              { return Base::m_values; }\n    /** \\copydoc SparseMatrix::innerIndexPtr */\n    inline StorageIndex* innerIndexPtr()   { return Base::m_innerIndices; }\n    /** \\copydoc SparseMatrix::outerIndexPtr */\n    inline StorageIndex* outerIndexPtr()   { return Base::m_outerIndex; }\n    /** \\copydoc SparseMatrix::innerNonZeroPtr */\n    inline StorageIndex* innerNonZeroPtr() { return Base::m_innerNonZeros; }\n    //----------------------------------------\n\n    /** \\copydoc SparseMatrix::coeffRef */\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      const Index outer = IsRowMajor ? row : col;\n      const Index inner = IsRowMajor ? col : row;\n\n      Index start = Base::m_outerIndex[outer];\n      Index end = Base::isCompressed() ? Base::m_outerIndex[outer+1] : start + Base::m_innerNonZeros[outer];\n      eigen_assert(end>=start && \"you probably called coeffRef on a non finalized matrix\");\n      eigen_assert(end>start && \"coeffRef cannot be called on a zero coefficient\");\n      StorageIndex* r = std::lower_bound(&Base::m_innerIndices[start],&Base::m_innerIndices[end],inner);\n      const Index id = r - &Base::m_innerIndices[0];\n      eigen_assert((*r==inner) && (id<end) && \"coeffRef cannot be called on a zero coefficient\");\n      return const_cast<Scalar*>(Base::m_values)[id];\n    }\n    \n    inline SparseMapBase(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr,\n                         Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0)\n      : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr)\n    {}\n\n    // for vectors\n    inline SparseMapBase(Index size, Index nnz, StorageIndex* innerIndexPtr, Scalar* valuePtr)\n      : Base(size, nnz, innerIndexPtr, valuePtr)\n    {}\n\n    /** Empty destructor */\n    inline ~SparseMapBase() {}\n\n  protected:\n    inline SparseMapBase() {}\n};\n\n/** \\ingroup SparseCore_Module\n  *\n  * \\brief Specialization of class Map for SparseMatrix-like storage.\n  *\n  * \\tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.\n  *\n  * \\sa class Map, class SparseMatrix, class Ref<SparseMatrixType,Options>\n  */\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nclass Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>\n  : public SparseMapBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n#else\ntemplate<typename SparseMatrixType>\nclass Map<SparseMatrixType>\n  : public SparseMapBase<Derived,WriteAccessors>\n#endif\n{\n  public:\n    typedef SparseMapBase<Map> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Map)\n    enum { IsRowMajor = Base::IsRowMajor };\n\n  public:\n\n    /** Constructs a read-write Map to a sparse matrix of size \\a rows x \\a cols, containing \\a nnz non-zero coefficients,\n      * stored as a sparse format as defined by the pointers \\a outerIndexPtr, \\a innerIndexPtr, and \\a valuePtr.\n      * If the optional parameter \\a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed.\n      *\n      * This constructor is available only if \\c SparseMatrixType is non-const.\n      *\n      * More details on the expected storage schemes are given in the \\ref TutorialSparse \"manual pages\".\n      */\n    inline Map(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr,\n               StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZerosPtr = 0)\n      : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr)\n    {}\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** Empty destructor */\n    inline ~Map() {}\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nclass Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>\n  : public SparseMapBase<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n{\n  public:\n    typedef SparseMapBase<Map> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Map)\n    enum { IsRowMajor = Base::IsRowMajor };\n\n  public:\n#endif\n    /** This is the const version of the above constructor.\n      *\n      * This constructor is available only if \\c SparseMatrixType is const, e.g.:\n      * \\code Map<const SparseMatrix<double> >  \\endcode\n      */\n    inline Map(Index rows, Index cols, Index nnz, const StorageIndex* outerIndexPtr,\n               const StorageIndex* innerIndexPtr, const Scalar* valuePtr, const StorageIndex* innerNonZerosPtr = 0)\n      : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr)\n    {}\n\n    /** Empty destructor */\n    inline ~Map() {}\n};\n\nnamespace internal {\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct evaluator<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : evaluator<SparseCompressedBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >\n{\n  typedef evaluator<SparseCompressedBase<Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;\n  typedef Map<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;  \n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct evaluator<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : evaluator<SparseCompressedBase<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >\n{\n  typedef evaluator<SparseCompressedBase<Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;\n  typedef Map<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;  \n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_MAP_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEMATRIX_H\n#define EIGEN_SPARSEMATRIX_H\n\nnamespace Eigen { \n\n/** \\ingroup SparseCore_Module\n  *\n  * \\class SparseMatrix\n  *\n  * \\brief A versatible sparse matrix representation\n  *\n  * This class implements a more versatile variants of the common \\em compressed row/column storage format.\n  * Each colmun's (resp. row) non zeros are stored as a pair of value with associated row (resp. colmiun) index.\n  * All the non zeros are stored in a single large buffer. Unlike the \\em compressed format, there might be extra\n  * space in between the nonzeros of two successive colmuns (resp. rows) such that insertion of new non-zero\n  * can be done with limited memory reallocation and copies.\n  *\n  * A call to the function makeCompressed() turns the matrix into the standard \\em compressed format\n  * compatible with many library.\n  *\n  * More details on this storage sceheme are given in the \\ref TutorialSparse \"manual pages\".\n  *\n  * \\tparam _Scalar the scalar type, i.e. the type of the coefficients\n  * \\tparam _Options Union of bit flags controlling the storage scheme. Currently the only possibility\n  *                 is ColMajor or RowMajor. The default is 0 which means column-major.\n  * \\tparam _StorageIndex the type of the indices. It has to be a \\b signed type (e.g., short, int, std::ptrdiff_t). Default is \\c int.\n  *\n  * \\warning In %Eigen 3.2, the undocumented type \\c SparseMatrix::Index was improperly defined as the storage index type (e.g., int),\n  *          whereas it is now (starting from %Eigen 3.3) deprecated and always defined as Eigen::Index.\n  *          Codes making use of \\c SparseMatrix::Index, might thus likely have to be changed to use \\c SparseMatrix::StorageIndex instead.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_SPARSEMATRIX_PLUGIN.\n  */\n\nnamespace internal {\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nstruct traits<SparseMatrix<_Scalar, _Options, _StorageIndex> >\n{\n  typedef _Scalar Scalar;\n  typedef _StorageIndex StorageIndex;\n  typedef Sparse StorageKind;\n  typedef MatrixXpr XprKind;\n  enum {\n    RowsAtCompileTime = Dynamic,\n    ColsAtCompileTime = Dynamic,\n    MaxRowsAtCompileTime = Dynamic,\n    MaxColsAtCompileTime = Dynamic,\n    Flags = _Options | NestByRefBit | LvalueBit | CompressedAccessBit,\n    SupportedAccessPatterns = InnerRandomAccessPattern\n  };\n};\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex, int DiagIndex>\nstruct traits<Diagonal<SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex> >\n{\n  typedef SparseMatrix<_Scalar, _Options, _StorageIndex> MatrixType;\n  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;\n  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;\n\n  typedef _Scalar Scalar;\n  typedef Dense StorageKind;\n  typedef _StorageIndex StorageIndex;\n  typedef MatrixXpr XprKind;\n\n  enum {\n    RowsAtCompileTime = Dynamic,\n    ColsAtCompileTime = 1,\n    MaxRowsAtCompileTime = Dynamic,\n    MaxColsAtCompileTime = 1,\n    Flags = LvalueBit\n  };\n};\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex, int DiagIndex>\nstruct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex> >\n : public traits<Diagonal<SparseMatrix<_Scalar, _Options, _StorageIndex>, DiagIndex> >\n{\n  enum {\n    Flags = 0\n  };\n};\n\n} // end namespace internal\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nclass SparseMatrix\n  : public SparseCompressedBase<SparseMatrix<_Scalar, _Options, _StorageIndex> >\n{\n    typedef SparseCompressedBase<SparseMatrix> Base;\n    using Base::convert_index;\n    friend class SparseVector<_Scalar,0,_StorageIndex>;\n    template<typename, typename, typename, typename, typename>\n    friend struct internal::Assignment;\n  public:\n    using Base::isCompressed;\n    using Base::nonZeros;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)\n    using Base::operator+=;\n    using Base::operator-=;\n\n    typedef MappedSparseMatrix<Scalar,Flags> Map;\n    typedef Diagonal<SparseMatrix> DiagonalReturnType;\n    typedef Diagonal<const SparseMatrix> ConstDiagonalReturnType;\n    typedef typename Base::InnerIterator InnerIterator;\n    typedef typename Base::ReverseInnerIterator ReverseInnerIterator;\n    \n\n    using Base::IsRowMajor;\n    typedef internal::CompressedStorage<Scalar,StorageIndex> Storage;\n    enum {\n      Options = _Options\n    };\n\n    typedef typename Base::IndexVector IndexVector;\n    typedef typename Base::ScalarVector ScalarVector;\n  protected:\n    typedef SparseMatrix<Scalar,(Flags&~RowMajorBit)|(IsRowMajor?RowMajorBit:0)> TransposedSparseMatrix;\n\n    Index m_outerSize;\n    Index m_innerSize;\n    StorageIndex* m_outerIndex;\n    StorageIndex* m_innerNonZeros;     // optional, if null then the data is compressed\n    Storage m_data;\n\n  public:\n    \n    /** \\returns the number of rows of the matrix */\n    inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; }\n    /** \\returns the number of columns of the matrix */\n    inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }\n\n    /** \\returns the number of rows (resp. columns) of the matrix if the storage order column major (resp. row major) */\n    inline Index innerSize() const { return m_innerSize; }\n    /** \\returns the number of columns (resp. rows) of the matrix if the storage order column major (resp. row major) */\n    inline Index outerSize() const { return m_outerSize; }\n    \n    /** \\returns a const pointer to the array of values.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa innerIndexPtr(), outerIndexPtr() */\n    inline const Scalar* valuePtr() const { return m_data.valuePtr(); }\n    /** \\returns a non-const pointer to the array of values.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa innerIndexPtr(), outerIndexPtr() */\n    inline Scalar* valuePtr() { return m_data.valuePtr(); }\n\n    /** \\returns a const pointer to the array of inner indices.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa valuePtr(), outerIndexPtr() */\n    inline const StorageIndex* innerIndexPtr() const { return m_data.indexPtr(); }\n    /** \\returns a non-const pointer to the array of inner indices.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa valuePtr(), outerIndexPtr() */\n    inline StorageIndex* innerIndexPtr() { return m_data.indexPtr(); }\n\n    /** \\returns a const pointer to the array of the starting positions of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa valuePtr(), innerIndexPtr() */\n    inline const StorageIndex* outerIndexPtr() const { return m_outerIndex; }\n    /** \\returns a non-const pointer to the array of the starting positions of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\sa valuePtr(), innerIndexPtr() */\n    inline StorageIndex* outerIndexPtr() { return m_outerIndex; }\n\n    /** \\returns a const pointer to the array of the number of non zeros of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\warning it returns the null pointer 0 in compressed mode */\n    inline const StorageIndex* innerNonZeroPtr() const { return m_innerNonZeros; }\n    /** \\returns a non-const pointer to the array of the number of non zeros of the inner vectors.\n      * This function is aimed at interoperability with other libraries.\n      * \\warning it returns the null pointer 0 in compressed mode */\n    inline StorageIndex* innerNonZeroPtr() { return m_innerNonZeros; }\n\n    /** \\internal */\n    inline Storage& data() { return m_data; }\n    /** \\internal */\n    inline const Storage& data() const { return m_data; }\n\n    /** \\returns the value of the matrix at position \\a i, \\a j\n      * This function returns Scalar(0) if the element is an explicit \\em zero */\n    inline Scalar coeff(Index row, Index col) const\n    {\n      eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());\n      \n      const Index outer = IsRowMajor ? row : col;\n      const Index inner = IsRowMajor ? col : row;\n      Index end = m_innerNonZeros ? m_outerIndex[outer] + m_innerNonZeros[outer] : m_outerIndex[outer+1];\n      return m_data.atInRange(m_outerIndex[outer], end, StorageIndex(inner));\n    }\n\n    /** \\returns a non-const reference to the value of the matrix at position \\a i, \\a j\n      *\n      * If the element does not exist then it is inserted via the insert(Index,Index) function\n      * which itself turns the matrix into a non compressed form if that was not the case.\n      *\n      * This is a O(log(nnz_j)) operation (binary search) plus the cost of insert(Index,Index)\n      * function if the element does not already exist.\n      */\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());\n      \n      const Index outer = IsRowMajor ? row : col;\n      const Index inner = IsRowMajor ? col : row;\n\n      Index start = m_outerIndex[outer];\n      Index end = m_innerNonZeros ? m_outerIndex[outer] + m_innerNonZeros[outer] : m_outerIndex[outer+1];\n      eigen_assert(end>=start && \"you probably called coeffRef on a non finalized matrix\");\n      if(end<=start)\n        return insert(row,col);\n      const Index p = m_data.searchLowerIndex(start,end-1,StorageIndex(inner));\n      if((p<end) && (m_data.index(p)==inner))\n        return m_data.value(p);\n      else\n        return insert(row,col);\n    }\n\n    /** \\returns a reference to a novel non zero coefficient with coordinates \\a row x \\a col.\n      * The non zero coefficient must \\b not already exist.\n      *\n      * If the matrix \\c *this is in compressed mode, then \\c *this is turned into uncompressed\n      * mode while reserving room for 2 x this->innerSize() non zeros if reserve(Index) has not been called earlier.\n      * In this case, the insertion procedure is optimized for a \\e sequential insertion mode where elements are assumed to be\n      * inserted by increasing outer-indices.\n      * \n      * If that's not the case, then it is strongly recommended to either use a triplet-list to assemble the matrix, or to first\n      * call reserve(const SizesType &) to reserve the appropriate number of non-zero elements per inner vector.\n      *\n      * Assuming memory has been appropriately reserved, this function performs a sorted insertion in O(1)\n      * if the elements of each inner vector are inserted in increasing inner index order, and in O(nnz_j) for a random insertion.\n      *\n      */\n    Scalar& insert(Index row, Index col);\n\n  public:\n\n    /** Removes all non zeros but keep allocated memory\n      *\n      * This function does not free the currently allocated memory. To release as much as memory as possible,\n      * call \\code mat.data().squeeze(); \\endcode after resizing it.\n      * \n      * \\sa resize(Index,Index), data()\n      */\n    inline void setZero()\n    {\n      m_data.clear();\n      memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(StorageIndex));\n      if(m_innerNonZeros)\n        memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));\n    }\n\n    /** Preallocates \\a reserveSize non zeros.\n      *\n      * Precondition: the matrix must be in compressed mode. */\n    inline void reserve(Index reserveSize)\n    {\n      eigen_assert(isCompressed() && \"This function does not make sense in non compressed mode.\");\n      m_data.reserve(reserveSize);\n    }\n    \n    #ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** Preallocates \\a reserveSize[\\c j] non zeros for each column (resp. row) \\c j.\n      *\n      * This function turns the matrix in non-compressed mode.\n      * \n      * The type \\c SizesType must expose the following interface:\n        \\code\n        typedef value_type;\n        const value_type& operator[](i) const;\n        \\endcode\n      * for \\c i in the [0,this->outerSize()[ range.\n      * Typical choices include std::vector<int>, Eigen::VectorXi, Eigen::VectorXi::Constant, etc.\n      */\n    template<class SizesType>\n    inline void reserve(const SizesType& reserveSizes);\n    #else\n    template<class SizesType>\n    inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif =\n    #if (!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1500) // MSVC 2005 fails to compile with this typename\n        typename\n    #endif\n        SizesType::value_type())\n    {\n      EIGEN_UNUSED_VARIABLE(enableif);\n      reserveInnerVectors(reserveSizes);\n    }\n    #endif // EIGEN_PARSED_BY_DOXYGEN\n  protected:\n    template<class SizesType>\n    inline void reserveInnerVectors(const SizesType& reserveSizes)\n    {\n      if(isCompressed())\n      {\n        Index totalReserveSize = 0;\n        // turn the matrix into non-compressed mode\n        m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));\n        if (!m_innerNonZeros) internal::throw_std_bad_alloc();\n        \n        // temporarily use m_innerSizes to hold the new starting points.\n        StorageIndex* newOuterIndex = m_innerNonZeros;\n        \n        StorageIndex count = 0;\n        for(Index j=0; j<m_outerSize; ++j)\n        {\n          newOuterIndex[j] = count;\n          count += reserveSizes[j] + (m_outerIndex[j+1]-m_outerIndex[j]);\n          totalReserveSize += reserveSizes[j];\n        }\n        m_data.reserve(totalReserveSize);\n        StorageIndex previousOuterIndex = m_outerIndex[m_outerSize];\n        for(Index j=m_outerSize-1; j>=0; --j)\n        {\n          StorageIndex innerNNZ = previousOuterIndex - m_outerIndex[j];\n          for(Index i=innerNNZ-1; i>=0; --i)\n          {\n            m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);\n            m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);\n          }\n          previousOuterIndex = m_outerIndex[j];\n          m_outerIndex[j] = newOuterIndex[j];\n          m_innerNonZeros[j] = innerNNZ;\n        }\n        if(m_outerSize>0)\n          m_outerIndex[m_outerSize] = m_outerIndex[m_outerSize-1] + m_innerNonZeros[m_outerSize-1] + reserveSizes[m_outerSize-1];\n        \n        m_data.resize(m_outerIndex[m_outerSize]);\n      }\n      else\n      {\n        StorageIndex* newOuterIndex = static_cast<StorageIndex*>(std::malloc((m_outerSize+1)*sizeof(StorageIndex)));\n        if (!newOuterIndex) internal::throw_std_bad_alloc();\n        \n        StorageIndex count = 0;\n        for(Index j=0; j<m_outerSize; ++j)\n        {\n          newOuterIndex[j] = count;\n          StorageIndex alreadyReserved = (m_outerIndex[j+1]-m_outerIndex[j]) - m_innerNonZeros[j];\n          StorageIndex toReserve = std::max<StorageIndex>(reserveSizes[j], alreadyReserved);\n          count += toReserve + m_innerNonZeros[j];\n        }\n        newOuterIndex[m_outerSize] = count;\n        \n        m_data.resize(count);\n        for(Index j=m_outerSize-1; j>=0; --j)\n        {\n          Index offset = newOuterIndex[j] - m_outerIndex[j];\n          if(offset>0)\n          {\n            StorageIndex innerNNZ = m_innerNonZeros[j];\n            for(Index i=innerNNZ-1; i>=0; --i)\n            {\n              m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);\n              m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);\n            }\n          }\n        }\n        \n        std::swap(m_outerIndex, newOuterIndex);\n        std::free(newOuterIndex);\n      }\n      \n    }\n  public:\n\n    //--- low level purely coherent filling ---\n\n    /** \\internal\n      * \\returns a reference to the non zero coefficient at position \\a row, \\a col assuming that:\n      * - the nonzero does not already exist\n      * - the new coefficient is the last one according to the storage order\n      *\n      * Before filling a given inner vector you must call the statVec(Index) function.\n      *\n      * After an insertion session, you should call the finalize() function.\n      *\n      * \\sa insert, insertBackByOuterInner, startVec */\n    inline Scalar& insertBack(Index row, Index col)\n    {\n      return insertBackByOuterInner(IsRowMajor?row:col, IsRowMajor?col:row);\n    }\n\n    /** \\internal\n      * \\sa insertBack, startVec */\n    inline Scalar& insertBackByOuterInner(Index outer, Index inner)\n    {\n      eigen_assert(Index(m_outerIndex[outer+1]) == m_data.size() && \"Invalid ordered insertion (invalid outer index)\");\n      eigen_assert( (m_outerIndex[outer+1]-m_outerIndex[outer]==0 || m_data.index(m_data.size()-1)<inner) && \"Invalid ordered insertion (invalid inner index)\");\n      Index p = m_outerIndex[outer+1];\n      ++m_outerIndex[outer+1];\n      m_data.append(Scalar(0), inner);\n      return m_data.value(p);\n    }\n\n    /** \\internal\n      * \\warning use it only if you know what you are doing */\n    inline Scalar& insertBackByOuterInnerUnordered(Index outer, Index inner)\n    {\n      Index p = m_outerIndex[outer+1];\n      ++m_outerIndex[outer+1];\n      m_data.append(Scalar(0), inner);\n      return m_data.value(p);\n    }\n\n    /** \\internal\n      * \\sa insertBack, insertBackByOuterInner */\n    inline void startVec(Index outer)\n    {\n      eigen_assert(m_outerIndex[outer]==Index(m_data.size()) && \"You must call startVec for each inner vector sequentially\");\n      eigen_assert(m_outerIndex[outer+1]==0 && \"You must call startVec for each inner vector sequentially\");\n      m_outerIndex[outer+1] = m_outerIndex[outer];\n    }\n\n    /** \\internal\n      * Must be called after inserting a set of non zero entries using the low level compressed API.\n      */\n    inline void finalize()\n    {\n      if(isCompressed())\n      {\n        StorageIndex size = internal::convert_index<StorageIndex>(m_data.size());\n        Index i = m_outerSize;\n        // find the last filled column\n        while (i>=0 && m_outerIndex[i]==0)\n          --i;\n        ++i;\n        while (i<=m_outerSize)\n        {\n          m_outerIndex[i] = size;\n          ++i;\n        }\n      }\n    }\n\n    //---\n\n    template<typename InputIterators>\n    void setFromTriplets(const InputIterators& begin, const InputIterators& end);\n\n    template<typename InputIterators,typename DupFunctor>\n    void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func);\n\n    void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op<Scalar,Scalar>()); }\n\n    template<typename DupFunctor>\n    void collapseDuplicates(DupFunctor dup_func = DupFunctor());\n\n    //---\n    \n    /** \\internal\n      * same as insert(Index,Index) except that the indices are given relative to the storage order */\n    Scalar& insertByOuterInner(Index j, Index i)\n    {\n      return insert(IsRowMajor ? j : i, IsRowMajor ? i : j);\n    }\n\n    /** Turns the matrix into the \\em compressed format.\n      */\n    void makeCompressed()\n    {\n      if(isCompressed())\n        return;\n      \n      eigen_internal_assert(m_outerIndex!=0 && m_outerSize>0);\n      \n      Index oldStart = m_outerIndex[1];\n      m_outerIndex[1] = m_innerNonZeros[0];\n      for(Index j=1; j<m_outerSize; ++j)\n      {\n        Index nextOldStart = m_outerIndex[j+1];\n        Index offset = oldStart - m_outerIndex[j];\n        if(offset>0)\n        {\n          for(Index k=0; k<m_innerNonZeros[j]; ++k)\n          {\n            m_data.index(m_outerIndex[j]+k) = m_data.index(oldStart+k);\n            m_data.value(m_outerIndex[j]+k) = m_data.value(oldStart+k);\n          }\n        }\n        m_outerIndex[j+1] = m_outerIndex[j] + m_innerNonZeros[j];\n        oldStart = nextOldStart;\n      }\n      std::free(m_innerNonZeros);\n      m_innerNonZeros = 0;\n      m_data.resize(m_outerIndex[m_outerSize]);\n      m_data.squeeze();\n    }\n\n    /** Turns the matrix into the uncompressed mode */\n    void uncompress()\n    {\n      if(m_innerNonZeros != 0)\n        return; \n      m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));\n      for (Index i = 0; i < m_outerSize; i++)\n      {\n        m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; \n      }\n    }\n\n    /** Suppresses all nonzeros which are \\b much \\b smaller \\b than \\a reference under the tolerance \\a epsilon */\n    void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())\n    {\n      prune(default_prunning_func(reference,epsilon));\n    }\n    \n    /** Turns the matrix into compressed format, and suppresses all nonzeros which do not satisfy the predicate \\a keep.\n      * The functor type \\a KeepFunc must implement the following function:\n      * \\code\n      * bool operator() (const Index& row, const Index& col, const Scalar& value) const;\n      * \\endcode\n      * \\sa prune(Scalar,RealScalar)\n      */\n    template<typename KeepFunc>\n    void prune(const KeepFunc& keep = KeepFunc())\n    {\n      // TODO optimize the uncompressed mode to avoid moving and allocating the data twice\n      makeCompressed();\n\n      StorageIndex k = 0;\n      for(Index j=0; j<m_outerSize; ++j)\n      {\n        Index previousStart = m_outerIndex[j];\n        m_outerIndex[j] = k;\n        Index end = m_outerIndex[j+1];\n        for(Index i=previousStart; i<end; ++i)\n        {\n          if(keep(IsRowMajor?j:m_data.index(i), IsRowMajor?m_data.index(i):j, m_data.value(i)))\n          {\n            m_data.value(k) = m_data.value(i);\n            m_data.index(k) = m_data.index(i);\n            ++k;\n          }\n        }\n      }\n      m_outerIndex[m_outerSize] = k;\n      m_data.resize(k,0);\n    }\n\n    /** Resizes the matrix to a \\a rows x \\a cols matrix leaving old values untouched.\n      *\n      * If the sizes of the matrix are decreased, then the matrix is turned to \\b uncompressed-mode\n      * and the storage of the out of bounds coefficients is kept and reserved.\n      * Call makeCompressed() to pack the entries and squeeze extra memory.\n      *\n      * \\sa reserve(), setZero(), makeCompressed()\n      */\n    void conservativeResize(Index rows, Index cols) \n    {\n      // No change\n      if (this->rows() == rows && this->cols() == cols) return;\n      \n      // If one dimension is null, then there is nothing to be preserved\n      if(rows==0 || cols==0) return resize(rows,cols);\n\n      Index innerChange = IsRowMajor ? cols - this->cols() : rows - this->rows();\n      Index outerChange = IsRowMajor ? rows - this->rows() : cols - this->cols();\n      StorageIndex newInnerSize = convert_index(IsRowMajor ? cols : rows);\n\n      // Deals with inner non zeros\n      if (m_innerNonZeros)\n      {\n        // Resize m_innerNonZeros\n        StorageIndex *newInnerNonZeros = static_cast<StorageIndex*>(std::realloc(m_innerNonZeros, (m_outerSize + outerChange) * sizeof(StorageIndex)));\n        if (!newInnerNonZeros) internal::throw_std_bad_alloc();\n        m_innerNonZeros = newInnerNonZeros;\n        \n        for(Index i=m_outerSize; i<m_outerSize+outerChange; i++)          \n          m_innerNonZeros[i] = 0;\n      } \n      else if (innerChange < 0) \n      {\n        // Inner size decreased: allocate a new m_innerNonZeros\n        m_innerNonZeros = static_cast<StorageIndex*>(std::malloc((m_outerSize + outerChange) * sizeof(StorageIndex)));\n        if (!m_innerNonZeros) internal::throw_std_bad_alloc();\n        for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)\n          m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i];\n        for(Index i = m_outerSize; i < m_outerSize + outerChange; i++)\n          m_innerNonZeros[i] = 0;\n      }\n      \n      // Change the m_innerNonZeros in case of a decrease of inner size\n      if (m_innerNonZeros && innerChange < 0)\n      {\n        for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)\n        {\n          StorageIndex &n = m_innerNonZeros[i];\n          StorageIndex start = m_outerIndex[i];\n          while (n > 0 && m_data.index(start+n-1) >= newInnerSize) --n; \n        }\n      }\n      \n      m_innerSize = newInnerSize;\n\n      // Re-allocate outer index structure if necessary\n      if (outerChange == 0)\n        return;\n          \n      StorageIndex *newOuterIndex = static_cast<StorageIndex*>(std::realloc(m_outerIndex, (m_outerSize + outerChange + 1) * sizeof(StorageIndex)));\n      if (!newOuterIndex) internal::throw_std_bad_alloc();\n      m_outerIndex = newOuterIndex;\n      if (outerChange > 0)\n      {\n        StorageIndex lastIdx = m_outerSize == 0 ? 0 : m_outerIndex[m_outerSize];\n        for(Index i=m_outerSize; i<m_outerSize+outerChange+1; i++)          \n          m_outerIndex[i] = lastIdx; \n      }\n      m_outerSize += outerChange;\n    }\n    \n    /** Resizes the matrix to a \\a rows x \\a cols matrix and initializes it to zero.\n      * \n      * This function does not free the currently allocated memory. To release as much as memory as possible,\n      * call \\code mat.data().squeeze(); \\endcode after resizing it.\n      * \n      * \\sa reserve(), setZero()\n      */\n    void resize(Index rows, Index cols)\n    {\n      const Index outerSize = IsRowMajor ? rows : cols;\n      m_innerSize = IsRowMajor ? cols : rows;\n      m_data.clear();\n      if (m_outerSize != outerSize || m_outerSize==0)\n      {\n        std::free(m_outerIndex);\n        m_outerIndex = static_cast<StorageIndex*>(std::malloc((outerSize + 1) * sizeof(StorageIndex)));\n        if (!m_outerIndex) internal::throw_std_bad_alloc();\n        \n        m_outerSize = outerSize;\n      }\n      if(m_innerNonZeros)\n      {\n        std::free(m_innerNonZeros);\n        m_innerNonZeros = 0;\n      }\n      memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(StorageIndex));\n    }\n\n    /** \\internal\n      * Resize the nonzero vector to \\a size */\n    void resizeNonZeros(Index size)\n    {\n      m_data.resize(size);\n    }\n\n    /** \\returns a const expression of the diagonal coefficients. */\n    const ConstDiagonalReturnType diagonal() const { return ConstDiagonalReturnType(*this); }\n    \n    /** \\returns a read-write expression of the diagonal coefficients.\n      * \\warning If the diagonal entries are written, then all diagonal\n      * entries \\b must already exist, otherwise an assertion will be raised.\n      */\n    DiagonalReturnType diagonal() { return DiagonalReturnType(*this); }\n\n    /** Default constructor yielding an empty \\c 0 \\c x \\c 0 matrix */\n    inline SparseMatrix()\n      : m_outerSize(-1), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      check_template_parameters();\n      resize(0, 0);\n    }\n\n    /** Constructs a \\a rows \\c x \\a cols empty matrix */\n    inline SparseMatrix(Index rows, Index cols)\n      : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      check_template_parameters();\n      resize(rows, cols);\n    }\n\n    /** Constructs a sparse matrix from the sparse expression \\a other */\n    template<typename OtherDerived>\n    inline SparseMatrix(const SparseMatrixBase<OtherDerived>& other)\n      : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),\n        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n      check_template_parameters();\n      const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);\n      if (needToTranspose)\n        *this = other.derived();\n      else\n      {\n        #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n          EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n        #endif\n        internal::call_assignment_no_alias(*this, other.derived());\n      }\n    }\n    \n    /** Constructs a sparse matrix from the sparse selfadjoint view \\a other */\n    template<typename OtherDerived, unsigned int UpLo>\n    inline SparseMatrix(const SparseSelfAdjointView<OtherDerived, UpLo>& other)\n      : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      check_template_parameters();\n      Base::operator=(other);\n    }\n\n    /** Copy constructor (it performs a deep copy) */\n    inline SparseMatrix(const SparseMatrix& other)\n      : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      check_template_parameters();\n      *this = other.derived();\n    }\n\n    /** \\brief Copy constructor with in-place evaluation */\n    template<typename OtherDerived>\n    SparseMatrix(const ReturnByValue<OtherDerived>& other)\n      : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      check_template_parameters();\n      initAssignment(other);\n      other.evalTo(*this);\n    }\n    \n    /** \\brief Copy constructor with in-place evaluation */\n    template<typename OtherDerived>\n    explicit SparseMatrix(const DiagonalBase<OtherDerived>& other)\n      : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)\n    {\n      check_template_parameters();\n      *this = other.derived();\n    }\n\n    /** Swaps the content of two sparse matrices of the same type.\n      * This is a fast operation that simply swaps the underlying pointers and parameters. */\n    inline void swap(SparseMatrix& other)\n    {\n      //EIGEN_DBG_SPARSE(std::cout << \"SparseMatrix:: swap\\n\");\n      std::swap(m_outerIndex, other.m_outerIndex);\n      std::swap(m_innerSize, other.m_innerSize);\n      std::swap(m_outerSize, other.m_outerSize);\n      std::swap(m_innerNonZeros, other.m_innerNonZeros);\n      m_data.swap(other.m_data);\n    }\n\n    /** Sets *this to the identity matrix.\n      * This function also turns the matrix into compressed mode, and drop any reserved memory. */\n    inline void setIdentity()\n    {\n      eigen_assert(rows() == cols() && \"ONLY FOR SQUARED MATRICES\");\n      this->m_data.resize(rows());\n      Eigen::Map<IndexVector>(this->m_data.indexPtr(), rows()).setLinSpaced(0, StorageIndex(rows()-1));\n      Eigen::Map<ScalarVector>(this->m_data.valuePtr(), rows()).setOnes();\n      Eigen::Map<IndexVector>(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows()));\n      std::free(m_innerNonZeros);\n      m_innerNonZeros = 0;\n    }\n    inline SparseMatrix& operator=(const SparseMatrix& other)\n    {\n      if (other.isRValue())\n      {\n        swap(other.const_cast_derived());\n      }\n      else if(this!=&other)\n      {\n        #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n          EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n        #endif\n        initAssignment(other);\n        if(other.isCompressed())\n        {\n          internal::smart_copy(other.m_outerIndex, other.m_outerIndex + m_outerSize + 1, m_outerIndex);\n          m_data = other.m_data;\n        }\n        else\n        {\n          Base::operator=(other);\n        }\n      }\n      return *this;\n    }\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename OtherDerived>\n    inline SparseMatrix& operator=(const EigenBase<OtherDerived>& other)\n    { return Base::operator=(other.derived()); }\n\n    template<typename Lhs, typename Rhs>\n    inline SparseMatrix& operator=(const Product<Lhs,Rhs,AliasFreeProduct>& other);\n#endif // EIGEN_PARSED_BY_DOXYGEN\n\n    template<typename OtherDerived>\n    EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);\n\n    friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)\n    {\n      EIGEN_DBG_SPARSE(\n        s << \"Nonzero entries:\\n\";\n        if(m.isCompressed())\n        {\n          for (Index i=0; i<m.nonZeros(); ++i)\n            s << \"(\" << m.m_data.value(i) << \",\" << m.m_data.index(i) << \") \";\n        }\n        else\n        {\n          for (Index i=0; i<m.outerSize(); ++i)\n          {\n            Index p = m.m_outerIndex[i];\n            Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];\n            Index k=p;\n            for (; k<pe; ++k) {\n              s << \"(\" << m.m_data.value(k) << \",\" << m.m_data.index(k) << \") \";\n            }\n            for (; k<m.m_outerIndex[i+1]; ++k) {\n              s << \"(_,_) \";\n            }\n          }\n        }\n        s << std::endl;\n        s << std::endl;\n        s << \"Outer pointers:\\n\";\n        for (Index i=0; i<m.outerSize(); ++i) {\n          s << m.m_outerIndex[i] << \" \";\n        }\n        s << \" $\" << std::endl;\n        if(!m.isCompressed())\n        {\n          s << \"Inner non zeros:\\n\";\n          for (Index i=0; i<m.outerSize(); ++i) {\n            s << m.m_innerNonZeros[i] << \" \";\n          }\n          s << \" $\" << std::endl;\n        }\n        s << std::endl;\n      );\n      s << static_cast<const SparseMatrixBase<SparseMatrix>&>(m);\n      return s;\n    }\n\n    /** Destructor */\n    inline ~SparseMatrix()\n    {\n      std::free(m_outerIndex);\n      std::free(m_innerNonZeros);\n    }\n\n    /** Overloaded for performance */\n    Scalar sum() const;\n    \n#   ifdef EIGEN_SPARSEMATRIX_PLUGIN\n#     include EIGEN_SPARSEMATRIX_PLUGIN\n#   endif\n\nprotected:\n\n    template<typename Other>\n    void initAssignment(const Other& other)\n    {\n      resize(other.rows(), other.cols());\n      if(m_innerNonZeros)\n      {\n        std::free(m_innerNonZeros);\n        m_innerNonZeros = 0;\n      }\n    }\n\n    /** \\internal\n      * \\sa insert(Index,Index) */\n    EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col);\n\n    /** \\internal\n      * A vector object that is equal to 0 everywhere but v at the position i */\n    class SingletonVector\n    {\n        StorageIndex m_index;\n        StorageIndex m_value;\n      public:\n        typedef StorageIndex value_type;\n        SingletonVector(Index i, Index v)\n          : m_index(convert_index(i)), m_value(convert_index(v))\n        {}\n\n        StorageIndex operator[](Index i) const { return i==m_index ? m_value : 0; }\n    };\n\n    /** \\internal\n      * \\sa insert(Index,Index) */\n    EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col);\n\npublic:\n    /** \\internal\n      * \\sa insert(Index,Index) */\n    EIGEN_STRONG_INLINE Scalar& insertBackUncompressed(Index row, Index col)\n    {\n      const Index outer = IsRowMajor ? row : col;\n      const Index inner = IsRowMajor ? col : row;\n\n      eigen_assert(!isCompressed());\n      eigen_assert(m_innerNonZeros[outer]<=(m_outerIndex[outer+1] - m_outerIndex[outer]));\n\n      Index p = m_outerIndex[outer] + m_innerNonZeros[outer]++;\n      m_data.index(p) = convert_index(inner);\n      return (m_data.value(p) = Scalar(0));\n    }\nprotected:\n    struct IndexPosPair {\n      IndexPosPair(Index a_i, Index a_p) : i(a_i), p(a_p) {}\n      Index i;\n      Index p;\n    };\n\n    /** \\internal assign \\a diagXpr to the diagonal of \\c *this\n      * There are different strategies:\n      *   1 - if *this is overwritten (Func==assign_op) or *this is empty, then we can work treat *this as a dense vector expression.\n      *   2 - otherwise, for each diagonal coeff,\n      *     2.a - if it already exists, then we update it,\n      *     2.b - otherwise, if *this is uncompressed and that the current inner-vector has empty room for at least 1 element, then we perform an in-place insertion.\n      *     2.c - otherwise, we'll have to reallocate and copy everything, so instead of doing so for each new element, it is recorded in a std::vector.\n      *   3 - at the end, if some entries failed to be inserted in-place, then we alloc a new buffer, copy each chunk at the right position, and insert the new elements.\n      * \n      * TODO: some piece of code could be isolated and reused for a general in-place update strategy.\n      * TODO: if we start to defer the insertion of some elements (i.e., case 2.c executed once),\n      *       then it *might* be better to disable case 2.b since they will have to be copied anyway.\n      */\n    template<typename DiagXpr, typename Func>\n    void assignDiagonal(const DiagXpr diagXpr, const Func& assignFunc)\n    {\n      Index n = diagXpr.size();\n\n      const bool overwrite = internal::is_same<Func, internal::assign_op<Scalar,Scalar> >::value;\n      if(overwrite)\n      {\n        if((this->rows()!=n) || (this->cols()!=n))\n          this->resize(n, n);\n      }\n\n      if(m_data.size()==0 || overwrite)\n      {\n        typedef Array<StorageIndex,Dynamic,1> ArrayXI;  \n        this->makeCompressed();\n        this->resizeNonZeros(n);\n        Eigen::Map<ArrayXI>(this->innerIndexPtr(), n).setLinSpaced(0,StorageIndex(n)-1);\n        Eigen::Map<ArrayXI>(this->outerIndexPtr(), n+1).setLinSpaced(0,StorageIndex(n));\n        Eigen::Map<Array<Scalar,Dynamic,1> > values = this->coeffs();\n        values.setZero();\n        internal::call_assignment_no_alias(values, diagXpr, assignFunc);\n      }\n      else\n      {\n        bool isComp = isCompressed();\n        internal::evaluator<DiagXpr> diaEval(diagXpr);\n        std::vector<IndexPosPair> newEntries;\n\n        // 1 - try in-place update and record insertion failures\n        for(Index i = 0; i<n; ++i)\n        {\n          internal::LowerBoundIndex lb = this->lower_bound(i,i);\n          Index p = lb.value;\n          if(lb.found)\n          {\n            // the coeff already exists\n            assignFunc.assignCoeff(m_data.value(p), diaEval.coeff(i));\n          }\n          else if((!isComp) && m_innerNonZeros[i] < (m_outerIndex[i+1]-m_outerIndex[i]))\n          {\n            // non compressed mode with local room for inserting one element\n            m_data.moveChunk(p, p+1, m_outerIndex[i]+m_innerNonZeros[i]-p);\n            m_innerNonZeros[i]++;\n            m_data.value(p) = Scalar(0);\n            m_data.index(p) = StorageIndex(i);\n            assignFunc.assignCoeff(m_data.value(p), diaEval.coeff(i));\n          }\n          else\n          {\n            // defer insertion\n            newEntries.push_back(IndexPosPair(i,p));\n          }\n        }\n        // 2 - insert deferred entries\n        Index n_entries = Index(newEntries.size());\n        if(n_entries>0)\n        {\n          Storage newData(m_data.size()+n_entries);\n          Index prev_p = 0;\n          Index prev_i = 0;\n          for(Index k=0; k<n_entries;++k)\n          {\n            Index i = newEntries[k].i;\n            Index p = newEntries[k].p;\n            internal::smart_copy(m_data.valuePtr()+prev_p, m_data.valuePtr()+p, newData.valuePtr()+prev_p+k);\n            internal::smart_copy(m_data.indexPtr()+prev_p, m_data.indexPtr()+p, newData.indexPtr()+prev_p+k);\n            for(Index j=prev_i;j<i;++j)\n              m_outerIndex[j+1] += k;\n            if(!isComp)\n              m_innerNonZeros[i]++;\n            prev_p = p;\n            prev_i = i;\n            newData.value(p+k) = Scalar(0);\n            newData.index(p+k) = StorageIndex(i);\n            assignFunc.assignCoeff(newData.value(p+k), diaEval.coeff(i));\n          }\n          {\n            internal::smart_copy(m_data.valuePtr()+prev_p, m_data.valuePtr()+m_data.size(), newData.valuePtr()+prev_p+n_entries);\n            internal::smart_copy(m_data.indexPtr()+prev_p, m_data.indexPtr()+m_data.size(), newData.indexPtr()+prev_p+n_entries);\n            for(Index j=prev_i+1;j<=m_outerSize;++j)\n              m_outerIndex[j] += n_entries;\n          }\n          m_data.swap(newData);\n        }\n      }\n    }\n\nprivate:\n  static void check_template_parameters()\n  {\n    EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);\n    EIGEN_STATIC_ASSERT((Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS);\n  }\n\n  struct default_prunning_func {\n    default_prunning_func(const Scalar& ref, const RealScalar& eps) : reference(ref), epsilon(eps) {}\n    inline bool operator() (const Index&, const Index&, const Scalar& value) const\n    {\n      return !internal::isMuchSmallerThan(value, reference, epsilon);\n    }\n    Scalar reference;\n    RealScalar epsilon;\n  };\n};\n\nnamespace internal {\n\ntemplate<typename InputIterator, typename SparseMatrixType, typename DupFunctor>\nvoid set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, DupFunctor dup_func)\n{\n  enum { IsRowMajor = SparseMatrixType::IsRowMajor };\n  typedef typename SparseMatrixType::Scalar Scalar;\n  typedef typename SparseMatrixType::StorageIndex StorageIndex;\n  SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor,StorageIndex> trMat(mat.rows(),mat.cols());\n\n  if(begin!=end)\n  {\n    // pass 1: count the nnz per inner-vector\n    typename SparseMatrixType::IndexVector wi(trMat.outerSize());\n    wi.setZero();\n    for(InputIterator it(begin); it!=end; ++it)\n    {\n      eigen_assert(it->row()>=0 && it->row()<mat.rows() && it->col()>=0 && it->col()<mat.cols());\n      wi(IsRowMajor ? it->col() : it->row())++;\n    }\n\n    // pass 2: insert all the elements into trMat\n    trMat.reserve(wi);\n    for(InputIterator it(begin); it!=end; ++it)\n      trMat.insertBackUncompressed(it->row(),it->col()) = it->value();\n\n    // pass 3:\n    trMat.collapseDuplicates(dup_func);\n  }\n\n  // pass 4: transposed copy -> implicit sorting\n  mat = trMat;\n}\n\n}\n\n\n/** Fill the matrix \\c *this with the list of \\em triplets defined by the iterator range \\a begin - \\a end.\n  *\n  * A \\em triplet is a tuple (i,j,value) defining a non-zero element.\n  * The input list of triplets does not have to be sorted, and can contains duplicated elements.\n  * In any case, the result is a \\b sorted and \\b compressed sparse matrix where the duplicates have been summed up.\n  * This is a \\em O(n) operation, with \\em n the number of triplet elements.\n  * The initial contents of \\c *this is destroyed.\n  * The matrix \\c *this must be properly resized beforehand using the SparseMatrix(Index,Index) constructor,\n  * or the resize(Index,Index) method. The sizes are not extracted from the triplet list.\n  *\n  * The \\a InputIterators value_type must provide the following interface:\n  * \\code\n  * Scalar value() const; // the value\n  * Scalar row() const;   // the row index i\n  * Scalar col() const;   // the column index j\n  * \\endcode\n  * See for instance the Eigen::Triplet template class.\n  *\n  * Here is a typical usage example:\n  * \\code\n    typedef Triplet<double> T;\n    std::vector<T> tripletList;\n    tripletList.reserve(estimation_of_entries);\n    for(...)\n    {\n      // ...\n      tripletList.push_back(T(i,j,v_ij));\n    }\n    SparseMatrixType m(rows,cols);\n    m.setFromTriplets(tripletList.begin(), tripletList.end());\n    // m is ready to go!\n  * \\endcode\n  *\n  * \\warning The list of triplets is read multiple times (at least twice). Therefore, it is not recommended to define\n  * an abstract iterator over a complex data-structure that would be expensive to evaluate. The triplets should rather\n  * be explicitly stored into a std::vector for instance.\n  */\ntemplate<typename Scalar, int _Options, typename _StorageIndex>\ntemplate<typename InputIterators>\nvoid SparseMatrix<Scalar,_Options,_StorageIndex>::setFromTriplets(const InputIterators& begin, const InputIterators& end)\n{\n  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,_Options,_StorageIndex> >(begin, end, *this, internal::scalar_sum_op<Scalar,Scalar>());\n}\n\n/** The same as setFromTriplets but when duplicates are met the functor \\a dup_func is applied:\n  * \\code\n  * value = dup_func(OldValue, NewValue)\n  * \\endcode \n  * Here is a C++11 example keeping the latest entry only:\n  * \\code\n  * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; });\n  * \\endcode\n  */\ntemplate<typename Scalar, int _Options, typename _StorageIndex>\ntemplate<typename InputIterators,typename DupFunctor>\nvoid SparseMatrix<Scalar,_Options,_StorageIndex>::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func)\n{\n  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,_Options,_StorageIndex>, DupFunctor>(begin, end, *this, dup_func);\n}\n\n/** \\internal */\ntemplate<typename Scalar, int _Options, typename _StorageIndex>\ntemplate<typename DupFunctor>\nvoid SparseMatrix<Scalar,_Options,_StorageIndex>::collapseDuplicates(DupFunctor dup_func)\n{\n  eigen_assert(!isCompressed());\n  // TODO, in practice we should be able to use m_innerNonZeros for that task\n  IndexVector wi(innerSize());\n  wi.fill(-1);\n  StorageIndex count = 0;\n  // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers\n  for(Index j=0; j<outerSize(); ++j)\n  {\n    StorageIndex start   = count;\n    Index oldEnd  = m_outerIndex[j]+m_innerNonZeros[j];\n    for(Index k=m_outerIndex[j]; k<oldEnd; ++k)\n    {\n      Index i = m_data.index(k);\n      if(wi(i)>=start)\n      {\n        // we already meet this entry => accumulate it\n        m_data.value(wi(i)) = dup_func(m_data.value(wi(i)), m_data.value(k));\n      }\n      else\n      {\n        m_data.value(count) = m_data.value(k);\n        m_data.index(count) = m_data.index(k);\n        wi(i) = count;\n        ++count;\n      }\n    }\n    m_outerIndex[j] = start;\n  }\n  m_outerIndex[m_outerSize] = count;\n\n  // turn the matrix into compressed form\n  std::free(m_innerNonZeros);\n  m_innerNonZeros = 0;\n  m_data.resize(m_outerIndex[m_outerSize]);\n}\n\ntemplate<typename Scalar, int _Options, typename _StorageIndex>\ntemplate<typename OtherDerived>\nEIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_StorageIndex>& SparseMatrix<Scalar,_Options,_StorageIndex>::operator=(const SparseMatrixBase<OtherDerived>& other)\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),\n        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)\n\n  #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n    EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n  #endif\n      \n  const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);\n  if (needToTranspose)\n  {\n    #ifdef EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN\n      EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN\n    #endif\n    // two passes algorithm:\n    //  1 - compute the number of coeffs per dest inner vector\n    //  2 - do the actual copy/eval\n    // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed\n    typedef typename internal::nested_eval<OtherDerived,2,typename internal::plain_matrix_type<OtherDerived>::type >::type OtherCopy;\n    typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;\n    typedef internal::evaluator<_OtherCopy> OtherCopyEval;\n    OtherCopy otherCopy(other.derived());\n    OtherCopyEval otherCopyEval(otherCopy);\n\n    SparseMatrix dest(other.rows(),other.cols());\n    Eigen::Map<IndexVector> (dest.m_outerIndex,dest.outerSize()).setZero();\n\n    // pass 1\n    // FIXME the above copy could be merged with that pass\n    for (Index j=0; j<otherCopy.outerSize(); ++j)\n      for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)\n        ++dest.m_outerIndex[it.index()];\n\n    // prefix sum\n    StorageIndex count = 0;\n    IndexVector positions(dest.outerSize());\n    for (Index j=0; j<dest.outerSize(); ++j)\n    {\n      StorageIndex tmp = dest.m_outerIndex[j];\n      dest.m_outerIndex[j] = count;\n      positions[j] = count;\n      count += tmp;\n    }\n    dest.m_outerIndex[dest.outerSize()] = count;\n    // alloc\n    dest.m_data.resize(count);\n    // pass 2\n    for (StorageIndex j=0; j<otherCopy.outerSize(); ++j)\n    {\n      for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)\n      {\n        Index pos = positions[it.index()]++;\n        dest.m_data.index(pos) = j;\n        dest.m_data.value(pos) = it.value();\n      }\n    }\n    this->swap(dest);\n    return *this;\n  }\n  else\n  {\n    if(other.isRValue())\n    {\n      initAssignment(other.derived());\n    }\n    // there is no special optimization\n    return Base::operator=(other.derived());\n  }\n}\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\ntypename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insert(Index row, Index col)\n{\n  eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());\n  \n  const Index outer = IsRowMajor ? row : col;\n  const Index inner = IsRowMajor ? col : row;\n  \n  if(isCompressed())\n  {\n    if(nonZeros()==0)\n    {\n      // reserve space if not already done\n      if(m_data.allocatedSize()==0)\n        m_data.reserve(2*m_innerSize);\n      \n      // turn the matrix into non-compressed mode\n      m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));\n      if(!m_innerNonZeros) internal::throw_std_bad_alloc();\n      \n      memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));\n      \n      // pack all inner-vectors to the end of the pre-allocated space\n      // and allocate the entire free-space to the first inner-vector\n      StorageIndex end = convert_index(m_data.allocatedSize());\n      for(Index j=1; j<=m_outerSize; ++j)\n        m_outerIndex[j] = end;\n    }\n    else\n    {\n      // turn the matrix into non-compressed mode\n      m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));\n      if(!m_innerNonZeros) internal::throw_std_bad_alloc();\n      for(Index j=0; j<m_outerSize; ++j)\n        m_innerNonZeros[j] = m_outerIndex[j+1]-m_outerIndex[j];\n    }\n  }\n  \n  // check whether we can do a fast \"push back\" insertion\n  Index data_end = m_data.allocatedSize();\n  \n  // First case: we are filling a new inner vector which is packed at the end.\n  // We assume that all remaining inner-vectors are also empty and packed to the end.\n  if(m_outerIndex[outer]==data_end)\n  {\n    eigen_internal_assert(m_innerNonZeros[outer]==0);\n    \n    // pack previous empty inner-vectors to end of the used-space\n    // and allocate the entire free-space to the current inner-vector.\n    StorageIndex p = convert_index(m_data.size());\n    Index j = outer;\n    while(j>=0 && m_innerNonZeros[j]==0)\n      m_outerIndex[j--] = p;\n    \n    // push back the new element\n    ++m_innerNonZeros[outer];\n    m_data.append(Scalar(0), inner);\n    \n    // check for reallocation\n    if(data_end != m_data.allocatedSize())\n    {\n      // m_data has been reallocated\n      //  -> move remaining inner-vectors back to the end of the free-space\n      //     so that the entire free-space is allocated to the current inner-vector.\n      eigen_internal_assert(data_end < m_data.allocatedSize());\n      StorageIndex new_end = convert_index(m_data.allocatedSize());\n      for(Index k=outer+1; k<=m_outerSize; ++k)\n        if(m_outerIndex[k]==data_end)\n          m_outerIndex[k] = new_end;\n    }\n    return m_data.value(p);\n  }\n  \n  // Second case: the next inner-vector is packed to the end\n  // and the current inner-vector end match the used-space.\n  if(m_outerIndex[outer+1]==data_end && m_outerIndex[outer]+m_innerNonZeros[outer]==m_data.size())\n  {\n    eigen_internal_assert(outer+1==m_outerSize || m_innerNonZeros[outer+1]==0);\n    \n    // add space for the new element\n    ++m_innerNonZeros[outer];\n    m_data.resize(m_data.size()+1);\n    \n    // check for reallocation\n    if(data_end != m_data.allocatedSize())\n    {\n      // m_data has been reallocated\n      //  -> move remaining inner-vectors back to the end of the free-space\n      //     so that the entire free-space is allocated to the current inner-vector.\n      eigen_internal_assert(data_end < m_data.allocatedSize());\n      StorageIndex new_end = convert_index(m_data.allocatedSize());\n      for(Index k=outer+1; k<=m_outerSize; ++k)\n        if(m_outerIndex[k]==data_end)\n          m_outerIndex[k] = new_end;\n    }\n    \n    // and insert it at the right position (sorted insertion)\n    Index startId = m_outerIndex[outer];\n    Index p = m_outerIndex[outer]+m_innerNonZeros[outer]-1;\n    while ( (p > startId) && (m_data.index(p-1) > inner) )\n    {\n      m_data.index(p) = m_data.index(p-1);\n      m_data.value(p) = m_data.value(p-1);\n      --p;\n    }\n    \n    m_data.index(p) = convert_index(inner);\n    return (m_data.value(p) = Scalar(0));\n  }\n  \n  if(m_data.size() != m_data.allocatedSize())\n  {\n    // make sure the matrix is compatible to random un-compressed insertion:\n    m_data.resize(m_data.allocatedSize());\n    this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(m_outerSize, 2));\n  }\n  \n  return insertUncompressed(row,col);\n}\n    \ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nEIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insertUncompressed(Index row, Index col)\n{\n  eigen_assert(!isCompressed());\n\n  const Index outer = IsRowMajor ? row : col;\n  const StorageIndex inner = convert_index(IsRowMajor ? col : row);\n\n  Index room = m_outerIndex[outer+1] - m_outerIndex[outer];\n  StorageIndex innerNNZ = m_innerNonZeros[outer];\n  if(innerNNZ>=room)\n  {\n    // this inner vector is full, we need to reallocate the whole buffer :(\n    reserve(SingletonVector(outer,std::max<StorageIndex>(2,innerNNZ)));\n  }\n\n  Index startId = m_outerIndex[outer];\n  Index p = startId + m_innerNonZeros[outer];\n  while ( (p > startId) && (m_data.index(p-1) > inner) )\n  {\n    m_data.index(p) = m_data.index(p-1);\n    m_data.value(p) = m_data.value(p-1);\n    --p;\n  }\n  eigen_assert((p<=startId || m_data.index(p-1)!=inner) && \"you cannot insert an element that already exists, you must call coeffRef to this end\");\n\n  m_innerNonZeros[outer]++;\n\n  m_data.index(p) = inner;\n  return (m_data.value(p) = Scalar(0));\n}\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nEIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& SparseMatrix<_Scalar,_Options,_StorageIndex>::insertCompressed(Index row, Index col)\n{\n  eigen_assert(isCompressed());\n\n  const Index outer = IsRowMajor ? row : col;\n  const Index inner = IsRowMajor ? col : row;\n\n  Index previousOuter = outer;\n  if (m_outerIndex[outer+1]==0)\n  {\n    // we start a new inner vector\n    while (previousOuter>=0 && m_outerIndex[previousOuter]==0)\n    {\n      m_outerIndex[previousOuter] = convert_index(m_data.size());\n      --previousOuter;\n    }\n    m_outerIndex[outer+1] = m_outerIndex[outer];\n  }\n\n  // here we have to handle the tricky case where the outerIndex array\n  // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,\n  // the 2nd inner vector...\n  bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))\n                && (std::size_t(m_outerIndex[outer+1]) == m_data.size());\n\n  std::size_t startId = m_outerIndex[outer];\n  // FIXME let's make sure sizeof(long int) == sizeof(std::size_t)\n  std::size_t p = m_outerIndex[outer+1];\n  ++m_outerIndex[outer+1];\n\n  double reallocRatio = 1;\n  if (m_data.allocatedSize()<=m_data.size())\n  {\n    // if there is no preallocated memory, let's reserve a minimum of 32 elements\n    if (m_data.size()==0)\n    {\n      m_data.reserve(32);\n    }\n    else\n    {\n      // we need to reallocate the data, to reduce multiple reallocations\n      // we use a smart resize algorithm based on the current filling ratio\n      // in addition, we use double to avoid integers overflows\n      double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1);\n      reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size());\n      // furthermore we bound the realloc ratio to:\n      //   1) reduce multiple minor realloc when the matrix is almost filled\n      //   2) avoid to allocate too much memory when the matrix is almost empty\n      reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.);\n    }\n  }\n  m_data.resize(m_data.size()+1,reallocRatio);\n\n  if (!isLastVec)\n  {\n    if (previousOuter==-1)\n    {\n      // oops wrong guess.\n      // let's correct the outer offsets\n      for (Index k=0; k<=(outer+1); ++k)\n        m_outerIndex[k] = 0;\n      Index k=outer+1;\n      while(m_outerIndex[k]==0)\n        m_outerIndex[k++] = 1;\n      while (k<=m_outerSize && m_outerIndex[k]!=0)\n        m_outerIndex[k++]++;\n      p = 0;\n      --k;\n      k = m_outerIndex[k]-1;\n      while (k>0)\n      {\n        m_data.index(k) = m_data.index(k-1);\n        m_data.value(k) = m_data.value(k-1);\n        k--;\n      }\n    }\n    else\n    {\n      // we are not inserting into the last inner vec\n      // update outer indices:\n      Index j = outer+2;\n      while (j<=m_outerSize && m_outerIndex[j]!=0)\n        m_outerIndex[j++]++;\n      --j;\n      // shift data of last vecs:\n      Index k = m_outerIndex[j]-1;\n      while (k>=Index(p))\n      {\n        m_data.index(k) = m_data.index(k-1);\n        m_data.value(k) = m_data.value(k-1);\n        k--;\n      }\n    }\n  }\n\n  while ( (p > startId) && (m_data.index(p-1) > inner) )\n  {\n    m_data.index(p) = m_data.index(p-1);\n    m_data.value(p) = m_data.value(p-1);\n    --p;\n  }\n\n  m_data.index(p) = inner;\n  return (m_data.value(p) = Scalar(0));\n}\n\nnamespace internal {\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nstruct evaluator<SparseMatrix<_Scalar,_Options,_StorageIndex> >\n  : evaluator<SparseCompressedBase<SparseMatrix<_Scalar,_Options,_StorageIndex> > >\n{\n  typedef evaluator<SparseCompressedBase<SparseMatrix<_Scalar,_Options,_StorageIndex> > > Base;\n  typedef SparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType;\n  evaluator() : Base() {}\n  explicit evaluator(const SparseMatrixType &mat) : Base(mat) {}\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEMATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseMatrixBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEMATRIXBASE_H\n#define EIGEN_SPARSEMATRIXBASE_H\n\nnamespace Eigen { \n\n/** \\ingroup SparseCore_Module\n  *\n  * \\class SparseMatrixBase\n  *\n  * \\brief Base class of any sparse matrices or sparse expressions\n  *\n  * \\tparam Derived is the derived type, e.g. a sparse matrix type, or an expression, etc.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_SPARSEMATRIXBASE_PLUGIN.\n  */\ntemplate<typename Derived> class SparseMatrixBase\n  : public EigenBase<Derived>\n{\n  public:\n\n    typedef typename internal::traits<Derived>::Scalar Scalar;\n    \n    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.\n      *\n      * It is an alias for the Scalar type */\n    typedef Scalar value_type;\n    \n    typedef typename internal::packet_traits<Scalar>::type PacketScalar;\n    typedef typename internal::traits<Derived>::StorageKind StorageKind;\n\n    /** The integer type used to \\b store indices within a SparseMatrix.\n      * For a \\c SparseMatrix<Scalar,Options,IndexType> it an alias of the third template parameter \\c IndexType. */\n    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;\n\n    typedef typename internal::add_const_on_value_type_if_arithmetic<\n                         typename internal::packet_traits<Scalar>::type\n                     >::type PacketReturnType;\n\n    typedef SparseMatrixBase StorageBaseType;\n\n    typedef Matrix<StorageIndex,Dynamic,1> IndexVector;\n    typedef Matrix<Scalar,Dynamic,1> ScalarVector;\n    \n    template<typename OtherDerived>\n    Derived& operator=(const EigenBase<OtherDerived> &other);\n\n    enum {\n\n      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,\n        /**< The number of rows at compile-time. This is just a copy of the value provided\n          * by the \\a Derived type. If a value is not known at compile-time,\n          * it is set to the \\a Dynamic constant.\n          * \\sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */\n\n      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,\n        /**< The number of columns at compile-time. This is just a copy of the value provided\n          * by the \\a Derived type. If a value is not known at compile-time,\n          * it is set to the \\a Dynamic constant.\n          * \\sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */\n\n\n      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,\n                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),\n        /**< This is equal to the number of coefficients, i.e. the number of\n          * rows times the number of columns, or to \\a Dynamic if this is not\n          * known at compile-time. \\sa RowsAtCompileTime, ColsAtCompileTime */\n\n      MaxRowsAtCompileTime = RowsAtCompileTime,\n      MaxColsAtCompileTime = ColsAtCompileTime,\n\n      MaxSizeAtCompileTime = (internal::size_at_compile_time<MaxRowsAtCompileTime,\n                                                      MaxColsAtCompileTime>::ret),\n\n      IsVectorAtCompileTime = RowsAtCompileTime == 1 || ColsAtCompileTime == 1,\n        /**< This is set to true if either the number of rows or the number of\n          * columns is known at compile-time to be equal to 1. Indeed, in that case,\n          * we are dealing with a column-vector (if there is only one column) or with\n          * a row-vector (if there is only one row). */\n\n      NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2,\n        /**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors,\n         * and 2 for matrices.\n         */\n\n      Flags = internal::traits<Derived>::Flags,\n        /**< This stores expression \\ref flags flags which may or may not be inherited by new expressions\n          * constructed from this one. See the \\ref flags \"list of flags\".\n          */\n\n      IsRowMajor = Flags&RowMajorBit ? 1 : 0,\n      \n      InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)\n                             : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),\n\n      #ifndef EIGEN_PARSED_BY_DOXYGEN\n      _HasDirectAccess = (int(Flags)&DirectAccessBit) ? 1 : 0 // workaround sunCC\n      #endif\n    };\n\n    /** \\internal the return type of MatrixBase::adjoint() */\n    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, Eigen::Transpose<const Derived> >,\n                        Transpose<const Derived>\n                     >::type AdjointReturnType;\n    typedef Transpose<Derived> TransposeReturnType;\n    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;\n\n    // FIXME storage order do not match evaluator storage order\n    typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, StorageIndex> PlainObject;\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** This is the \"real scalar\" type; if the \\a Scalar type is already real numbers\n      * (e.g. int, float or double) then \\a RealScalar is just the same as \\a Scalar. If\n      * \\a Scalar is \\a std::complex<T> then RealScalar is \\a T.\n      *\n      * \\sa class NumTraits\n      */\n    typedef typename NumTraits<Scalar>::Real RealScalar;\n\n    /** \\internal the return type of coeff()\n      */\n    typedef typename internal::conditional<_HasDirectAccess, const Scalar&, Scalar>::type CoeffReturnType;\n\n    /** \\internal Represents a matrix with all coefficients equal to one another*/\n    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Matrix<Scalar,Dynamic,Dynamic> > ConstantReturnType;\n\n    /** type of the equivalent dense matrix */\n    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;\n    /** type of the equivalent square matrix */\n    typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),\n                          EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;\n\n    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }\n    inline Derived& derived() { return *static_cast<Derived*>(this); }\n    inline Derived& const_cast_derived() const\n    { return *static_cast<Derived*>(const_cast<SparseMatrixBase*>(this)); }\n\n    typedef EigenBase<Derived> Base;\n\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n#define EIGEN_DOC_UNARY_ADDONS(METHOD,OP)           /** <p>This method does not change the sparsity of \\c *this: the OP is applied to explicitly stored coefficients only. \\sa SparseCompressedBase::coeffs() </p> */\n#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL      /** <p> \\warning This method returns a read-only expression for any sparse matrices. \\sa \\ref TutorialSparse_SubMatrices \"Sparse block operations\" </p> */\n#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) /** <p> \\warning This method returns a read-write expression for COND sparse matrices only. Otherwise, the returned expression is read-only. \\sa \\ref TutorialSparse_SubMatrices \"Sparse block operations\" </p> */\n#else\n#define EIGEN_DOC_UNARY_ADDONS(X,Y)\n#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)\n#endif\n#   include \"../plugins/CommonCwiseUnaryOps.h\"\n#   include \"../plugins/CommonCwiseBinaryOps.h\"\n#   include \"../plugins/MatrixCwiseUnaryOps.h\"\n#   include \"../plugins/MatrixCwiseBinaryOps.h\"\n#   include \"../plugins/BlockMethods.h\"\n#   ifdef EIGEN_SPARSEMATRIXBASE_PLUGIN\n#     include EIGEN_SPARSEMATRIXBASE_PLUGIN\n#   endif\n#undef EIGEN_CURRENT_STORAGE_BASE_CLASS\n#undef EIGEN_DOC_UNARY_ADDONS\n#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF\n\n    /** \\returns the number of rows. \\sa cols() */\n    inline Index rows() const { return derived().rows(); }\n    /** \\returns the number of columns. \\sa rows() */\n    inline Index cols() const { return derived().cols(); }\n    /** \\returns the number of coefficients, which is \\a rows()*cols().\n      * \\sa rows(), cols(). */\n    inline Index size() const { return rows() * cols(); }\n    /** \\returns true if either the number of rows or the number of columns is equal to 1.\n      * In other words, this function returns\n      * \\code rows()==1 || cols()==1 \\endcode\n      * \\sa rows(), cols(), IsVectorAtCompileTime. */\n    inline bool isVector() const { return rows()==1 || cols()==1; }\n    /** \\returns the size of the storage major dimension,\n      * i.e., the number of columns for a columns major matrix, and the number of rows otherwise */\n    Index outerSize() const { return (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); }\n    /** \\returns the size of the inner dimension according to the storage order,\n      * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */\n    Index innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); }\n\n    bool isRValue() const { return m_isRValue; }\n    Derived& markAsRValue() { m_isRValue = true; return derived(); }\n\n    SparseMatrixBase() : m_isRValue(false) { /* TODO check flags */ }\n\n    \n    template<typename OtherDerived>\n    Derived& operator=(const ReturnByValue<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other);\n\n    inline Derived& operator=(const Derived& other);\n\n  protected:\n\n    template<typename OtherDerived>\n    inline Derived& assign(const OtherDerived& other);\n\n    template<typename OtherDerived>\n    inline void assignGeneric(const OtherDerived& other);\n\n  public:\n\n    friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)\n    {\n      typedef typename Derived::Nested Nested;\n      typedef typename internal::remove_all<Nested>::type NestedCleaned;\n\n      if (Flags&RowMajorBit)\n      {\n        Nested nm(m.derived());\n        internal::evaluator<NestedCleaned> thisEval(nm);\n        for (Index row=0; row<nm.outerSize(); ++row)\n        {\n          Index col = 0;\n          for (typename internal::evaluator<NestedCleaned>::InnerIterator it(thisEval, row); it; ++it)\n          {\n            for ( ; col<it.index(); ++col)\n              s << \"0 \";\n            s << it.value() << \" \";\n            ++col;\n          }\n          for ( ; col<m.cols(); ++col)\n            s << \"0 \";\n          s << std::endl;\n        }\n      }\n      else\n      {\n        Nested nm(m.derived());\n        internal::evaluator<NestedCleaned> thisEval(nm);\n        if (m.cols() == 1) {\n          Index row = 0;\n          for (typename internal::evaluator<NestedCleaned>::InnerIterator it(thisEval, 0); it; ++it)\n          {\n            for ( ; row<it.index(); ++row)\n              s << \"0\" << std::endl;\n            s << it.value() << std::endl;\n            ++row;\n          }\n          for ( ; row<m.rows(); ++row)\n            s << \"0\" << std::endl;\n        }\n        else\n        {\n          SparseMatrix<Scalar, RowMajorBit, StorageIndex> trans = m;\n          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit, StorageIndex> >&>(trans);\n        }\n      }\n      return s;\n    }\n\n    template<typename OtherDerived>\n    Derived& operator+=(const SparseMatrixBase<OtherDerived>& other);\n    template<typename OtherDerived>\n    Derived& operator-=(const SparseMatrixBase<OtherDerived>& other);\n    \n    template<typename OtherDerived>\n    Derived& operator+=(const DiagonalBase<OtherDerived>& other);\n    template<typename OtherDerived>\n    Derived& operator-=(const DiagonalBase<OtherDerived>& other);\n\n    template<typename OtherDerived>\n    Derived& operator+=(const EigenBase<OtherDerived> &other);\n    template<typename OtherDerived>\n    Derived& operator-=(const EigenBase<OtherDerived> &other);\n\n    Derived& operator*=(const Scalar& other);\n    Derived& operator/=(const Scalar& other);\n\n    template<typename OtherDerived> struct CwiseProductDenseReturnType {\n      typedef CwiseBinaryOp<internal::scalar_product_op<typename ScalarBinaryOpTraits<\n                                                          typename internal::traits<Derived>::Scalar,\n                                                          typename internal::traits<OtherDerived>::Scalar\n                                                        >::ReturnType>,\n                            const Derived,\n                            const OtherDerived\n                          > Type;\n    };\n\n    template<typename OtherDerived>\n    EIGEN_STRONG_INLINE const typename CwiseProductDenseReturnType<OtherDerived>::Type\n    cwiseProduct(const MatrixBase<OtherDerived> &other) const;\n\n    // sparse * diagonal\n    template<typename OtherDerived>\n    const Product<Derived,OtherDerived>\n    operator*(const DiagonalBase<OtherDerived> &other) const\n    { return Product<Derived,OtherDerived>(derived(), other.derived()); }\n\n    // diagonal * sparse\n    template<typename OtherDerived> friend\n    const Product<OtherDerived,Derived>\n    operator*(const DiagonalBase<OtherDerived> &lhs, const SparseMatrixBase& rhs)\n    { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }\n    \n    // sparse * sparse\n    template<typename OtherDerived>\n    const Product<Derived,OtherDerived,AliasFreeProduct>\n    operator*(const SparseMatrixBase<OtherDerived> &other) const;\n    \n    // sparse * dense\n    template<typename OtherDerived>\n    const Product<Derived,OtherDerived>\n    operator*(const MatrixBase<OtherDerived> &other) const\n    { return Product<Derived,OtherDerived>(derived(), other.derived()); }\n    \n    // dense * sparse\n    template<typename OtherDerived> friend\n    const Product<OtherDerived,Derived>\n    operator*(const MatrixBase<OtherDerived> &lhs, const SparseMatrixBase& rhs)\n    { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }\n    \n     /** \\returns an expression of P H P^-1 where H is the matrix represented by \\c *this */\n    SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const\n    {\n      return SparseSymmetricPermutationProduct<Derived,Upper|Lower>(derived(), perm);\n    }\n\n    template<typename OtherDerived>\n    Derived& operator*=(const SparseMatrixBase<OtherDerived>& other);\n\n    template<int Mode>\n    inline const TriangularView<const Derived, Mode> triangularView() const;\n    \n    template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SparseSelfAdjointView<Derived, UpLo> Type; };\n    template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SparseSelfAdjointView<const Derived, UpLo> Type; };\n\n    template<unsigned int UpLo> inline \n    typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;\n    template<unsigned int UpLo> inline\n    typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();\n\n    template<typename OtherDerived> Scalar dot(const MatrixBase<OtherDerived>& other) const;\n    template<typename OtherDerived> Scalar dot(const SparseMatrixBase<OtherDerived>& other) const;\n    RealScalar squaredNorm() const;\n    RealScalar norm()  const;\n    RealScalar blueNorm() const;\n\n    TransposeReturnType transpose() { return TransposeReturnType(derived()); }\n    const ConstTransposeReturnType transpose() const { return ConstTransposeReturnType(derived()); }\n    const AdjointReturnType adjoint() const { return AdjointReturnType(transpose()); }\n\n    DenseMatrixType toDense() const\n    {\n      return DenseMatrixType(derived());\n    }\n\n    template<typename OtherDerived>\n    bool isApprox(const SparseMatrixBase<OtherDerived>& other,\n                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;\n\n    template<typename OtherDerived>\n    bool isApprox(const MatrixBase<OtherDerived>& other,\n                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const\n    { return toDense().isApprox(other,prec); }\n\n    /** \\returns the matrix or vector obtained by evaluating this expression.\n      *\n      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns\n      * a const reference, in order to avoid a useless copy.\n      */\n    inline const typename internal::eval<Derived>::type eval() const\n    { return typename internal::eval<Derived>::type(derived()); }\n\n    Scalar sum() const;\n    \n    inline const SparseView<Derived>\n    pruned(const Scalar& reference = Scalar(0), const RealScalar& epsilon = NumTraits<Scalar>::dummy_precision()) const;\n\n  protected:\n\n    bool m_isRValue;\n\n    static inline StorageIndex convert_index(const Index idx) {\n      return internal::convert_index<StorageIndex>(idx);\n    }\n  private:\n    template<typename Dest> void evalTo(Dest &) const;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEMATRIXBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparsePermutation.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_PERMUTATION_H\n#define EIGEN_SPARSE_PERMUTATION_H\n\n// This file implements sparse * permutation products\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename ExpressionType, int Side, bool Transposed>\nstruct permutation_matrix_product<ExpressionType, Side, Transposed, SparseShape>\n{\n    typedef typename nested_eval<ExpressionType, 1>::type MatrixType;\n    typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;\n\n    typedef typename MatrixTypeCleaned::Scalar Scalar;\n    typedef typename MatrixTypeCleaned::StorageIndex StorageIndex;\n\n    enum {\n      SrcStorageOrder = MatrixTypeCleaned::Flags&RowMajorBit ? RowMajor : ColMajor,\n      MoveOuter = SrcStorageOrder==RowMajor ? Side==OnTheLeft : Side==OnTheRight\n    };\n    \n    typedef typename internal::conditional<MoveOuter,\n        SparseMatrix<Scalar,SrcStorageOrder,StorageIndex>,\n        SparseMatrix<Scalar,int(SrcStorageOrder)==RowMajor?ColMajor:RowMajor,StorageIndex> >::type ReturnType;\n\n    template<typename Dest,typename PermutationType>\n    static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)\n    {\n      MatrixType mat(xpr);\n      if(MoveOuter)\n      {\n        SparseMatrix<Scalar,SrcStorageOrder,StorageIndex> tmp(mat.rows(), mat.cols());\n        Matrix<StorageIndex,Dynamic,1> sizes(mat.outerSize());\n        for(Index j=0; j<mat.outerSize(); ++j)\n        {\n          Index jp = perm.indices().coeff(j);\n          sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = StorageIndex(mat.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).nonZeros());\n        }\n        tmp.reserve(sizes);\n        for(Index j=0; j<mat.outerSize(); ++j)\n        {\n          Index jp = perm.indices().coeff(j);\n          Index jsrc = ((Side==OnTheRight) ^ Transposed) ? jp : j;\n          Index jdst = ((Side==OnTheLeft) ^ Transposed) ? jp : j;\n          for(typename MatrixTypeCleaned::InnerIterator it(mat,jsrc); it; ++it)\n            tmp.insertByOuterInner(jdst,it.index()) = it.value();\n        }\n        dst = tmp;\n      }\n      else\n      {\n        SparseMatrix<Scalar,int(SrcStorageOrder)==RowMajor?ColMajor:RowMajor,StorageIndex> tmp(mat.rows(), mat.cols());\n        Matrix<StorageIndex,Dynamic,1> sizes(tmp.outerSize());\n        sizes.setZero();\n        PermutationMatrix<Dynamic,Dynamic,StorageIndex> perm_cpy;\n        if((Side==OnTheLeft) ^ Transposed)\n          perm_cpy = perm;\n        else\n          perm_cpy = perm.transpose();\n\n        for(Index j=0; j<mat.outerSize(); ++j)\n          for(typename MatrixTypeCleaned::InnerIterator it(mat,j); it; ++it)\n            sizes[perm_cpy.indices().coeff(it.index())]++;\n        tmp.reserve(sizes);\n        for(Index j=0; j<mat.outerSize(); ++j)\n          for(typename MatrixTypeCleaned::InnerIterator it(mat,j); it; ++it)\n            tmp.insertByOuterInner(perm_cpy.indices().coeff(it.index()),j) = it.value();\n        dst = tmp;\n      }\n    }\n};\n\n}\n\nnamespace internal {\n\ntemplate <int ProductTag> struct product_promote_storage_type<Sparse,             PermutationStorage, ProductTag> { typedef Sparse ret; };\ntemplate <int ProductTag> struct product_promote_storage_type<PermutationStorage, Sparse,             ProductTag> { typedef Sparse ret; };\n\n// TODO, the following two overloads are only needed to define the right temporary type through \n// typename traits<permutation_sparse_matrix_product<Rhs,Lhs,OnTheRight,false> >::ReturnType\n// whereas it should be correctly handled by traits<Product<> >::PlainObject\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, AliasFreeProduct>, ProductTag, PermutationShape, SparseShape>\n  : public evaluator<typename permutation_matrix_product<Rhs,OnTheLeft,false,SparseShape>::ReturnType>\n{\n  typedef Product<Lhs, Rhs, AliasFreeProduct> XprType;\n  typedef typename permutation_matrix_product<Rhs,OnTheLeft,false,SparseShape>::ReturnType PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  enum {\n    Flags = Base::Flags | EvalBeforeNestingBit\n  };\n\n  explicit product_evaluator(const XprType& xpr)\n    : m_result(xpr.rows(), xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\ntemplate<typename Lhs, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<Lhs, Rhs, AliasFreeProduct>, ProductTag, SparseShape, PermutationShape >\n  : public evaluator<typename permutation_matrix_product<Lhs,OnTheRight,false,SparseShape>::ReturnType>\n{\n  typedef Product<Lhs, Rhs, AliasFreeProduct> XprType;\n  typedef typename permutation_matrix_product<Lhs,OnTheRight,false,SparseShape>::ReturnType PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  enum {\n    Flags = Base::Flags | EvalBeforeNestingBit\n  };\n\n  explicit product_evaluator(const XprType& xpr)\n    : m_result(xpr.rows(), xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n} // end namespace internal\n\n/** \\returns the matrix with the permutation applied to the columns\n  */\ntemplate<typename SparseDerived, typename PermDerived>\ninline const Product<SparseDerived, PermDerived, AliasFreeProduct>\noperator*(const SparseMatrixBase<SparseDerived>& matrix, const PermutationBase<PermDerived>& perm)\n{ return Product<SparseDerived, PermDerived, AliasFreeProduct>(matrix.derived(), perm.derived()); }\n\n/** \\returns the matrix with the permutation applied to the rows\n  */\ntemplate<typename SparseDerived, typename PermDerived>\ninline const Product<PermDerived, SparseDerived, AliasFreeProduct>\noperator*( const PermutationBase<PermDerived>& perm, const SparseMatrixBase<SparseDerived>& matrix)\n{ return  Product<PermDerived, SparseDerived, AliasFreeProduct>(perm.derived(), matrix.derived()); }\n\n\n/** \\returns the matrix with the inverse permutation applied to the columns.\n  */\ntemplate<typename SparseDerived, typename PermutationType>\ninline const Product<SparseDerived, Inverse<PermutationType>, AliasFreeProduct>\noperator*(const SparseMatrixBase<SparseDerived>& matrix, const InverseImpl<PermutationType, PermutationStorage>& tperm)\n{\n  return Product<SparseDerived, Inverse<PermutationType>, AliasFreeProduct>(matrix.derived(), tperm.derived());\n}\n\n/** \\returns the matrix with the inverse permutation applied to the rows.\n  */\ntemplate<typename SparseDerived, typename PermutationType>\ninline const Product<Inverse<PermutationType>, SparseDerived, AliasFreeProduct>\noperator*(const InverseImpl<PermutationType,PermutationStorage>& tperm, const SparseMatrixBase<SparseDerived>& matrix)\n{\n  return Product<Inverse<PermutationType>, SparseDerived, AliasFreeProduct>(tperm.derived(), matrix.derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseProduct.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEPRODUCT_H\n#define EIGEN_SPARSEPRODUCT_H\n\nnamespace Eigen { \n\n/** \\returns an expression of the product of two sparse matrices.\n  * By default a conservative product preserving the symbolic non zeros is performed.\n  * The automatic pruning of the small values can be achieved by calling the pruned() function\n  * in which case a totally different product algorithm is employed:\n  * \\code\n  * C = (A*B).pruned();             // suppress numerical zeros (exact)\n  * C = (A*B).pruned(ref);\n  * C = (A*B).pruned(ref,epsilon);\n  * \\endcode\n  * where \\c ref is a meaningful non zero reference value.\n  * */\ntemplate<typename Derived>\ntemplate<typename OtherDerived>\ninline const Product<Derived,OtherDerived,AliasFreeProduct>\nSparseMatrixBase<Derived>::operator*(const SparseMatrixBase<OtherDerived> &other) const\n{\n  return Product<Derived,OtherDerived,AliasFreeProduct>(derived(), other.derived());\n}\n\nnamespace internal {\n\n// sparse * sparse\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>\n{\n  template<typename Dest>\n  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)\n  {\n    evalTo(dst, lhs, rhs, typename evaluator_traits<Dest>::Shape());\n  }\n\n  // dense += sparse * sparse\n  template<typename Dest,typename ActualLhs>\n  static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, typename enable_if<is_same<typename evaluator_traits<Dest>::Shape,DenseShape>::value,int*>::type* = 0)\n  {\n    typedef typename nested_eval<ActualLhs,Dynamic>::type LhsNested;\n    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;\n    LhsNested lhsNested(lhs);\n    RhsNested rhsNested(rhs);\n    internal::sparse_sparse_to_dense_product_selector<typename remove_all<LhsNested>::type,\n                                                      typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst);\n  }\n\n  // dense -= sparse * sparse\n  template<typename Dest>\n  static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, typename enable_if<is_same<typename evaluator_traits<Dest>::Shape,DenseShape>::value,int*>::type* = 0)\n  {\n    addTo(dst, -lhs, rhs);\n  }\n\nprotected:\n\n  // sparse = sparse * sparse\n  template<typename Dest>\n  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, SparseShape)\n  {\n    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;\n    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;\n    LhsNested lhsNested(lhs);\n    RhsNested rhsNested(rhs);\n    internal::conservative_sparse_sparse_product_selector<typename remove_all<LhsNested>::type,\n                                                          typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst);\n  }\n\n  // dense = sparse * sparse\n  template<typename Dest>\n  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, DenseShape)\n  {\n    dst.setZero();\n    addTo(dst, lhs, rhs);\n  }\n};\n\n// sparse * sparse-triangular\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, SparseShape, SparseTriangularShape, ProductType>\n : public generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>\n{};\n\n// sparse-triangular * sparse\ntemplate<typename Lhs, typename Rhs, int ProductType>\nstruct generic_product_impl<Lhs, Rhs, SparseTriangularShape, SparseShape, ProductType>\n : public generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>\n{};\n\n// dense = sparse-product (can be sparse*sparse, sparse*perm, etc.)\ntemplate< typename DstXprType, typename Lhs, typename Rhs>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>\n{\n  typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)\n  {\n    Index dstRows = src.rows();\n    Index dstCols = src.cols();\n    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))\n      dst.resize(dstRows, dstCols);\n    \n    generic_product_impl<Lhs, Rhs>::evalTo(dst,src.lhs(),src.rhs());\n  }\n};\n\n// dense += sparse-product (can be sparse*sparse, sparse*perm, etc.)\ntemplate< typename DstXprType, typename Lhs, typename Rhs>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::add_assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>\n{\n  typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)\n  {\n    generic_product_impl<Lhs, Rhs>::addTo(dst,src.lhs(),src.rhs());\n  }\n};\n\n// dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.)\ntemplate< typename DstXprType, typename Lhs, typename Rhs>\nstruct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::sub_assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>\n{\n  typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)\n  {\n    generic_product_impl<Lhs, Rhs>::subTo(dst,src.lhs(),src.rhs());\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, int Options>\nstruct unary_evaluator<SparseView<Product<Lhs, Rhs, Options> >, IteratorBased>\n : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>\n{\n  typedef SparseView<Product<Lhs, Rhs, Options> > XprType;\n  typedef typename XprType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  explicit unary_evaluator(const XprType& xpr)\n    : m_result(xpr.rows(), xpr.cols())\n  {\n    using std::abs;\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;\n    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;\n    LhsNested lhsNested(xpr.nestedExpression().lhs());\n    RhsNested rhsNested(xpr.nestedExpression().rhs());\n\n    internal::sparse_sparse_product_with_pruning_selector<typename remove_all<LhsNested>::type,\n                                                          typename remove_all<RhsNested>::type, PlainObject>::run(lhsNested,rhsNested,m_result,\n                                                                                                                  abs(xpr.reference())*xpr.epsilon());\n  }\n\nprotected:\n  PlainObject m_result;\n};\n\n} // end namespace internal\n\n// sparse matrix = sparse-product (can be sparse*sparse, sparse*perm, etc.)\ntemplate<typename Scalar, int _Options, typename _StorageIndex>\ntemplate<typename Lhs, typename Rhs>\nSparseMatrix<Scalar,_Options,_StorageIndex>& SparseMatrix<Scalar,_Options,_StorageIndex>::operator=(const Product<Lhs,Rhs,AliasFreeProduct>& src)\n{\n  // std::cout << \"in Assignment : \" << DstOptions << \"\\n\";\n  SparseMatrix dst(src.rows(),src.cols());\n  internal::generic_product_impl<Lhs, Rhs>::evalTo(dst,src.lhs(),src.rhs());\n  this->swap(dst);\n  return *this;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEPRODUCT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseRedux.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEREDUX_H\n#define EIGEN_SPARSEREDUX_H\n\nnamespace Eigen { \n\ntemplate<typename Derived>\ntypename internal::traits<Derived>::Scalar\nSparseMatrixBase<Derived>::sum() const\n{\n  eigen_assert(rows()>0 && cols()>0 && \"you are using a non initialized matrix\");\n  Scalar res(0);\n  internal::evaluator<Derived> thisEval(derived());\n  for (Index j=0; j<outerSize(); ++j)\n    for (typename internal::evaluator<Derived>::InnerIterator iter(thisEval,j); iter; ++iter)\n      res += iter.value();\n  return res;\n}\n\ntemplate<typename _Scalar, int _Options, typename _Index>\ntypename internal::traits<SparseMatrix<_Scalar,_Options,_Index> >::Scalar\nSparseMatrix<_Scalar,_Options,_Index>::sum() const\n{\n  eigen_assert(rows()>0 && cols()>0 && \"you are using a non initialized matrix\");\n  if(this->isCompressed())\n    return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum();\n  else\n    return Base::sum();\n}\n\ntemplate<typename _Scalar, int _Options, typename _Index>\ntypename internal::traits<SparseVector<_Scalar,_Options, _Index> >::Scalar\nSparseVector<_Scalar,_Options,_Index>::sum() const\n{\n  eigen_assert(rows()>0 && cols()>0 && \"you are using a non initialized matrix\");\n  return Matrix<Scalar,1,Dynamic>::Map(m_data.valuePtr(), m_data.size()).sum();\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEREDUX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseRef.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_REF_H\n#define EIGEN_SPARSE_REF_H\n\nnamespace Eigen {\n\nenum {\n  StandardCompressedFormat = 2 /**< used by Ref<SparseMatrix> to specify whether the input storage must be in standard compressed form */\n};\n  \nnamespace internal {\n\ntemplate<typename Derived> class SparseRefBase;\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>\nstruct traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >\n  : public traits<SparseMatrix<MatScalar,MatOptions,MatIndex> >\n{\n  typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;\n  enum {\n    Options = _Options,\n    Flags = traits<PlainObjectType>::Flags | CompressedAccessBit | NestByRefBit\n  };\n\n  template<typename Derived> struct match {\n    enum {\n      StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),\n      MatchAtCompileTime = (Derived::Flags&CompressedAccessBit) && StorageOrderMatch\n    };\n    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;\n  };\n  \n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>\nstruct traits<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >\n  : public traits<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >\n{\n  enum {\n    Flags = (traits<SparseMatrix<MatScalar,MatOptions,MatIndex> >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit\n  };\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>\nstruct traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >\n  : public traits<SparseVector<MatScalar,MatOptions,MatIndex> >\n{\n  typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType;\n  enum {\n    Options = _Options,\n    Flags = traits<PlainObjectType>::Flags | CompressedAccessBit | NestByRefBit\n  };\n\n  template<typename Derived> struct match {\n    enum {\n      MatchAtCompileTime = (Derived::Flags&CompressedAccessBit) && Derived::IsVectorAtCompileTime\n    };\n    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;\n  };\n\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int _Options, typename _StrideType>\nstruct traits<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >\n  : public traits<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, _Options, _StrideType> >\n{\n  enum {\n    Flags = (traits<SparseVector<MatScalar,MatOptions,MatIndex> >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit\n  };\n};\n\ntemplate<typename Derived>\nstruct traits<SparseRefBase<Derived> > : public traits<Derived> {};\n\ntemplate<typename Derived> class SparseRefBase\n  : public SparseMapBase<Derived>\n{\npublic:\n\n  typedef SparseMapBase<Derived> Base;\n  EIGEN_SPARSE_PUBLIC_INTERFACE(SparseRefBase)\n\n  SparseRefBase()\n    : Base(RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime, 0, 0, 0, 0, 0)\n  {}\n  \nprotected:\n\n  template<typename Expression>\n  void construct(Expression& expr)\n  {\n    if(expr.outerIndexPtr()==0)\n      ::new (static_cast<Base*>(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr());\n    else\n      ::new (static_cast<Base*>(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr());\n  }\n};\n\n} // namespace internal\n\n\n/** \n  * \\ingroup SparseCore_Module\n  *\n  * \\brief A sparse matrix expression referencing an existing sparse expression\n  *\n  * \\tparam SparseMatrixType the equivalent sparse matrix type of the referenced data, it must be a template instance of class SparseMatrix.\n  * \\tparam Options specifies whether the a standard compressed format is required \\c Options is  \\c #StandardCompressedFormat, or \\c 0.\n  *                The default is \\c 0.\n  *\n  * \\sa class Ref\n  */\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nclass Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType >\n  : public internal::SparseRefBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType > >\n#else\ntemplate<typename SparseMatrixType, int Options>\nclass Ref<SparseMatrixType, Options>\n  : public SparseMapBase<Derived,WriteAccessors> // yes, that's weird to use Derived here, but that works!\n#endif\n{\n    typedef SparseMatrix<MatScalar,MatOptions,MatIndex> PlainObjectType;\n    typedef internal::traits<Ref> Traits;\n    template<int OtherOptions>\n    inline Ref(const SparseMatrix<MatScalar,OtherOptions,MatIndex>& expr);\n    template<int OtherOptions>\n    inline Ref(const MappedSparseMatrix<MatScalar,OtherOptions,MatIndex>& expr);\n  public:\n\n    typedef internal::SparseRefBase<Ref> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)\n\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<int OtherOptions>\n    inline Ref(SparseMatrix<MatScalar,OtherOptions,MatIndex>& expr)\n    {\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<SparseMatrix<MatScalar,OtherOptions,MatIndex> >::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      eigen_assert( ((Options & int(StandardCompressedFormat))==0) || (expr.isCompressed()) );\n      Base::construct(expr.derived());\n    }\n    \n    template<int OtherOptions>\n    inline Ref(MappedSparseMatrix<MatScalar,OtherOptions,MatIndex>& expr)\n    {\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<SparseMatrix<MatScalar,OtherOptions,MatIndex> >::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      eigen_assert( ((Options & int(StandardCompressedFormat))==0) || (expr.isCompressed()) );\n      Base::construct(expr.derived());\n    }\n    \n    template<typename Derived>\n    inline Ref(const SparseCompressedBase<Derived>& expr)\n    #else\n    /** Implicit constructor from any sparse expression (2D matrix or 1D vector) */\n    template<typename Derived>\n    inline Ref(SparseCompressedBase<Derived>& expr)\n    #endif\n    {\n      EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      eigen_assert( ((Options & int(StandardCompressedFormat))==0) || (expr.isCompressed()) );\n      Base::construct(expr.const_cast_derived());\n    }\n};\n\n// this is the const ref version\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nclass Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType>\n  : public internal::SparseRefBase<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n{\n    typedef SparseMatrix<MatScalar,MatOptions,MatIndex> TPlainObjectType;\n    typedef internal::traits<Ref> Traits;\n  public:\n\n    typedef internal::SparseRefBase<Ref> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)\n\n    template<typename Derived>\n    inline Ref(const SparseMatrixBase<Derived>& expr) : m_hasCopy(false)\n    {\n      construct(expr.derived(), typename Traits::template match<Derived>::type());\n    }\n\n    inline Ref(const Ref& other) : Base(other), m_hasCopy(false) {\n      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy\n    }\n\n    template<typename OtherRef>\n    inline Ref(const RefBase<OtherRef>& other) : m_hasCopy(false) {\n      construct(other.derived(), typename Traits::template match<OtherRef>::type());\n    }\n\n    ~Ref() {\n      if(m_hasCopy) {\n        TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);\n        obj->~TPlainObjectType();\n      }\n    }\n\n  protected:\n\n    template<typename Expression>\n    void construct(const Expression& expr,internal::true_type)\n    {\n      if((Options & int(StandardCompressedFormat)) && (!expr.isCompressed()))\n      {\n        TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);\n        ::new (obj) TPlainObjectType(expr);\n        m_hasCopy = true;\n        Base::construct(*obj);\n      }\n      else\n      {\n        Base::construct(expr);\n      }\n    }\n\n    template<typename Expression>\n    void construct(const Expression& expr, internal::false_type)\n    {\n      TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);\n      ::new (obj) TPlainObjectType(expr);\n      m_hasCopy = true;\n      Base::construct(*obj);\n    }\n\n  protected:\n    typename internal::aligned_storage<sizeof(TPlainObjectType), EIGEN_ALIGNOF(TPlainObjectType)>::type m_storage;\n    bool m_hasCopy;\n};\n\n\n\n/**\n  * \\ingroup SparseCore_Module\n  *\n  * \\brief A sparse vector expression referencing an existing sparse vector expression\n  *\n  * \\tparam SparseVectorType the equivalent sparse vector type of the referenced data, it must be a template instance of class SparseVector.\n  *\n  * \\sa class Ref\n  */\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nclass Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType >\n  : public internal::SparseRefBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType > >\n#else\ntemplate<typename SparseVectorType>\nclass Ref<SparseVectorType>\n  : public SparseMapBase<Derived,WriteAccessors>\n#endif\n{\n    typedef SparseVector<MatScalar,MatOptions,MatIndex> PlainObjectType;\n    typedef internal::traits<Ref> Traits;\n    template<int OtherOptions>\n    inline Ref(const SparseVector<MatScalar,OtherOptions,MatIndex>& expr);\n  public:\n\n    typedef internal::SparseRefBase<Ref> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<int OtherOptions>\n    inline Ref(SparseVector<MatScalar,OtherOptions,MatIndex>& expr)\n    {\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<SparseVector<MatScalar,OtherOptions,MatIndex> >::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      Base::construct(expr.derived());\n    }\n\n    template<typename Derived>\n    inline Ref(const SparseCompressedBase<Derived>& expr)\n    #else\n    /** Implicit constructor from any 1D sparse vector expression */\n    template<typename Derived>\n    inline Ref(SparseCompressedBase<Derived>& expr)\n    #endif\n    {\n      EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);\n      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);\n      Base::construct(expr.const_cast_derived());\n    }\n};\n\n// this is the const ref version\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nclass Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType>\n  : public internal::SparseRefBase<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n{\n    typedef SparseVector<MatScalar,MatOptions,MatIndex> TPlainObjectType;\n    typedef internal::traits<Ref> Traits;\n  public:\n\n    typedef internal::SparseRefBase<Ref> Base;\n    EIGEN_SPARSE_PUBLIC_INTERFACE(Ref)\n\n    template<typename Derived>\n    inline Ref(const SparseMatrixBase<Derived>& expr) : m_hasCopy(false)\n    {\n      construct(expr.derived(), typename Traits::template match<Derived>::type());\n    }\n\n    inline Ref(const Ref& other) : Base(other), m_hasCopy(false) {\n      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy\n    }\n\n    template<typename OtherRef>\n    inline Ref(const RefBase<OtherRef>& other) : m_hasCopy(false) {\n      construct(other.derived(), typename Traits::template match<OtherRef>::type());\n    }\n\n    ~Ref() {\n      if(m_hasCopy) {\n        TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);\n        obj->~TPlainObjectType();\n      }\n    }\n\n  protected:\n\n    template<typename Expression>\n    void construct(const Expression& expr,internal::true_type)\n    {\n      Base::construct(expr);\n    }\n\n    template<typename Expression>\n    void construct(const Expression& expr, internal::false_type)\n    {\n      TPlainObjectType* obj = reinterpret_cast<TPlainObjectType*>(&m_storage);\n      ::new (obj) TPlainObjectType(expr);\n      m_hasCopy = true;\n      Base::construct(*obj);\n    }\n\n  protected:\n    typename internal::aligned_storage<sizeof(TPlainObjectType), EIGEN_ALIGNOF(TPlainObjectType)>::type m_storage;\n    bool m_hasCopy;\n};\n\nnamespace internal {\n\n// FIXME shall we introduce a general evaluatior_ref that we can specialize for any sparse object once, and thus remove this copy-pasta thing...\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct evaluator<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >\n{\n  typedef evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;\n  typedef Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;  \n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct evaluator<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : evaluator<SparseCompressedBase<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >\n{\n  typedef evaluator<SparseCompressedBase<Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;\n  typedef Ref<const SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;  \n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct evaluator<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : evaluator<SparseCompressedBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >\n{\n  typedef evaluator<SparseCompressedBase<Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;\n  typedef Ref<SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;\n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\ntemplate<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>\nstruct evaluator<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> >\n  : evaluator<SparseCompressedBase<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >\n{\n  typedef evaluator<SparseCompressedBase<Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;\n  typedef Ref<const SparseVector<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;\n  evaluator() : Base() {}\n  explicit evaluator(const XprType &mat) : Base(mat) {}\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_REF_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseSelfAdjointView.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_SELFADJOINTVIEW_H\n#define EIGEN_SPARSE_SELFADJOINTVIEW_H\n\nnamespace Eigen { \n  \n/** \\ingroup SparseCore_Module\n  * \\class SparseSelfAdjointView\n  *\n  * \\brief Pseudo expression to manipulate a triangular sparse matrix as a selfadjoint matrix.\n  *\n  * \\param MatrixType the type of the dense matrix storing the coefficients\n  * \\param Mode can be either \\c #Lower or \\c #Upper\n  *\n  * This class is an expression of a sefladjoint matrix from a triangular part of a matrix\n  * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()\n  * and most of the time this is the only way that it is used.\n  *\n  * \\sa SparseMatrixBase::selfadjointView()\n  */\nnamespace internal {\n  \ntemplate<typename MatrixType, unsigned int Mode>\nstruct traits<SparseSelfAdjointView<MatrixType,Mode> > : traits<MatrixType> {\n};\n\ntemplate<int SrcMode,int DstMode,typename MatrixType,int DestOrder>\nvoid permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::StorageIndex>& _dest, const typename MatrixType::StorageIndex* perm = 0);\n\ntemplate<int Mode,typename MatrixType,int DestOrder>\nvoid permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::StorageIndex>& _dest, const typename MatrixType::StorageIndex* perm = 0);\n\n}\n\ntemplate<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView\n  : public EigenBase<SparseSelfAdjointView<MatrixType,_Mode> >\n{\n  public:\n    \n    enum {\n      Mode = _Mode,\n      TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),\n      RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime\n    };\n\n    typedef EigenBase<SparseSelfAdjointView> Base;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef Matrix<StorageIndex,Dynamic,1> VectorI;\n    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;\n    typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;\n    \n    explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix)\n    {\n      eigen_assert(rows()==cols() && \"SelfAdjointView is only for squared matrices\");\n    }\n\n    inline Index rows() const { return m_matrix.rows(); }\n    inline Index cols() const { return m_matrix.cols(); }\n\n    /** \\internal \\returns a reference to the nested matrix */\n    const _MatrixTypeNested& matrix() const { return m_matrix; }\n    typename internal::remove_reference<MatrixTypeNested>::type& matrix() { return m_matrix; }\n\n    /** \\returns an expression of the matrix product between a sparse self-adjoint matrix \\c *this and a sparse matrix \\a rhs.\n      *\n      * Note that there is no algorithmic advantage of performing such a product compared to a general sparse-sparse matrix product.\n      * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.\n      */\n    template<typename OtherDerived>\n    Product<SparseSelfAdjointView, OtherDerived>\n    operator*(const SparseMatrixBase<OtherDerived>& rhs) const\n    {\n      return Product<SparseSelfAdjointView, OtherDerived>(*this, rhs.derived());\n    }\n\n    /** \\returns an expression of the matrix product between a sparse matrix \\a lhs and a sparse self-adjoint matrix \\a rhs.\n      *\n      * Note that there is no algorithmic advantage of performing such a product compared to a general sparse-sparse matrix product.\n      * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.\n      */\n    template<typename OtherDerived> friend\n    Product<OtherDerived, SparseSelfAdjointView>\n    operator*(const SparseMatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)\n    {\n      return Product<OtherDerived, SparseSelfAdjointView>(lhs.derived(), rhs);\n    }\n    \n    /** Efficient sparse self-adjoint matrix times dense vector/matrix product */\n    template<typename OtherDerived>\n    Product<SparseSelfAdjointView,OtherDerived>\n    operator*(const MatrixBase<OtherDerived>& rhs) const\n    {\n      return Product<SparseSelfAdjointView,OtherDerived>(*this, rhs.derived());\n    }\n\n    /** Efficient dense vector/matrix times sparse self-adjoint matrix product */\n    template<typename OtherDerived> friend\n    Product<OtherDerived,SparseSelfAdjointView>\n    operator*(const MatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)\n    {\n      return Product<OtherDerived,SparseSelfAdjointView>(lhs.derived(), rhs);\n    }\n\n    /** Perform a symmetric rank K update of the selfadjoint matrix \\c *this:\n      * \\f$ this = this + \\alpha ( u u^* ) \\f$ where \\a u is a vector or matrix.\n      *\n      * \\returns a reference to \\c *this\n      *\n      * To perform \\f$ this = this + \\alpha ( u^* u ) \\f$ you can simply\n      * call this function with u.adjoint().\n      */\n    template<typename DerivedU>\n    SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));\n    \n    /** \\returns an expression of P H P^-1 */\n    // TODO implement twists in a more evaluator friendly fashion\n    SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const\n    {\n      return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm);\n    }\n\n    template<typename SrcMatrixType,int SrcMode>\n    SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)\n    {\n      internal::call_assignment_no_alias_no_transpose(*this, permutedMatrix);\n      return *this;\n    }\n\n    SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src)\n    {\n      PermutationMatrix<Dynamic,Dynamic,StorageIndex> pnull;\n      return *this = src.twistedBy(pnull);\n    }\n\n    // Since we override the copy-assignment operator, we need to explicitly re-declare the copy-constructor\n    EIGEN_DEFAULT_COPY_CONSTRUCTOR(SparseSelfAdjointView)\n\n    template<typename SrcMatrixType,unsigned int SrcMode>\n    SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcMode>& src)\n    {\n      PermutationMatrix<Dynamic,Dynamic,StorageIndex> pnull;\n      return *this = src.twistedBy(pnull);\n    }\n    \n    void resize(Index rows, Index cols)\n    {\n      EIGEN_ONLY_USED_FOR_DEBUG(rows);\n      EIGEN_ONLY_USED_FOR_DEBUG(cols);\n      eigen_assert(rows == this->rows() && cols == this->cols()\n                && \"SparseSelfadjointView::resize() does not actually allow to resize.\");\n    }\n    \n  protected:\n\n    MatrixTypeNested m_matrix;\n    //mutable VectorI m_countPerRow;\n    //mutable VectorI m_countPerCol;\n  private:\n    template<typename Dest> void evalTo(Dest &) const;\n};\n\n/***************************************************************************\n* Implementation of SparseMatrixBase methods\n***************************************************************************/\n\ntemplate<typename Derived>\ntemplate<unsigned int UpLo>\ntypename SparseMatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type SparseMatrixBase<Derived>::selfadjointView() const\n{\n  return SparseSelfAdjointView<const Derived, UpLo>(derived());\n}\n\ntemplate<typename Derived>\ntemplate<unsigned int UpLo>\ntypename SparseMatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type SparseMatrixBase<Derived>::selfadjointView()\n{\n  return SparseSelfAdjointView<Derived, UpLo>(derived());\n}\n\n/***************************************************************************\n* Implementation of SparseSelfAdjointView methods\n***************************************************************************/\n\ntemplate<typename MatrixType, unsigned int Mode>\ntemplate<typename DerivedU>\nSparseSelfAdjointView<MatrixType,Mode>&\nSparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha)\n{\n  SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();\n  if(alpha==Scalar(0))\n    m_matrix = tmp.template triangularView<Mode>();\n  else\n    m_matrix += alpha * tmp.template triangularView<Mode>();\n\n  return *this;\n}\n\nnamespace internal {\n  \n// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>\n//      in the future selfadjoint-ness should be defined by the expression traits\n//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)\ntemplate<typename MatrixType, unsigned int Mode>\nstruct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >\n{\n  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;\n  typedef SparseSelfAdjointShape Shape;\n};\n\nstruct SparseSelfAdjoint2Sparse {};\n\ntemplate<> struct AssignmentKind<SparseShape,SparseSelfAdjointShape> { typedef SparseSelfAdjoint2Sparse Kind; };\ntemplate<> struct AssignmentKind<SparseSelfAdjointShape,SparseShape> { typedef Sparse2Sparse Kind; };\n\ntemplate< typename DstXprType, typename SrcXprType, typename Functor>\nstruct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse>\n{\n  typedef typename DstXprType::StorageIndex StorageIndex;\n  typedef internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> AssignOpType;\n\n  template<typename DestScalar,int StorageOrder>\n  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const AssignOpType&/*func*/)\n  {\n    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);\n  }\n\n  // FIXME: the handling of += and -= in sparse matrices should be cleanup so that next two overloads could be reduced to:\n  template<typename DestScalar,int StorageOrder,typename AssignFunc>\n  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const AssignFunc& func)\n  {\n    SparseMatrix<DestScalar,StorageOrder,StorageIndex> tmp(src.rows(),src.cols());\n    run(tmp, src, AssignOpType());\n    call_assignment_no_alias_no_transpose(dst, tmp, func);\n  }\n\n  template<typename DestScalar,int StorageOrder>\n  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src,\n                  const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>& /* func */)\n  {\n    SparseMatrix<DestScalar,StorageOrder,StorageIndex> tmp(src.rows(),src.cols());\n    run(tmp, src, AssignOpType());\n    dst += tmp;\n  }\n\n  template<typename DestScalar,int StorageOrder>\n  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src,\n                  const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>& /* func */)\n  {\n    SparseMatrix<DestScalar,StorageOrder,StorageIndex> tmp(src.rows(),src.cols());\n    run(tmp, src, AssignOpType());\n    dst -= tmp;\n  }\n  \n  template<typename DestScalar>\n  static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const AssignOpType&/*func*/)\n  {\n    // TODO directly evaluate into dst;\n    SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());\n    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), tmp);\n    dst = tmp;\n  }\n};\n\n} // end namespace internal\n\n/***************************************************************************\n* Implementation of sparse self-adjoint time dense matrix\n***************************************************************************/\n\nnamespace internal {\n\ntemplate<int Mode, typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>\ninline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)\n{\n  EIGEN_ONLY_USED_FOR_DEBUG(alpha);\n  \n  typedef typename internal::nested_eval<SparseLhsType,DenseRhsType::MaxColsAtCompileTime>::type SparseLhsTypeNested;\n  typedef typename internal::remove_all<SparseLhsTypeNested>::type SparseLhsTypeNestedCleaned;\n  typedef evaluator<SparseLhsTypeNestedCleaned> LhsEval;\n  typedef typename LhsEval::InnerIterator LhsIterator;\n  typedef typename SparseLhsType::Scalar LhsScalar;\n  \n  enum {\n    LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit,\n    ProcessFirstHalf =\n              ((Mode&(Upper|Lower))==(Upper|Lower))\n          || ( (Mode&Upper) && !LhsIsRowMajor)\n          || ( (Mode&Lower) && LhsIsRowMajor),\n    ProcessSecondHalf = !ProcessFirstHalf\n  };\n  \n  SparseLhsTypeNested lhs_nested(lhs);\n  LhsEval lhsEval(lhs_nested);\n\n  // work on one column at once\n  for (Index k=0; k<rhs.cols(); ++k)\n  {\n    for (Index j=0; j<lhs.outerSize(); ++j)\n    {\n      LhsIterator i(lhsEval,j);\n      // handle diagonal coeff\n      if (ProcessSecondHalf)\n      {\n        while (i && i.index()<j) ++i;\n        if(i && i.index()==j)\n        {\n          res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);\n          ++i;\n        }\n      }\n\n      // premultiplied rhs for scatters\n      typename ScalarBinaryOpTraits<AlphaType, typename DenseRhsType::Scalar>::ReturnType rhs_j(alpha*rhs(j,k));\n      // accumulator for partial scalar product\n      typename DenseResType::Scalar res_j(0);\n      for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)\n      {\n        LhsScalar lhs_ij = i.value();\n        if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);\n        res_j += lhs_ij * rhs.coeff(i.index(),k);\n        res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;\n      }\n      res.coeffRef(j,k) += alpha * res_j;\n\n      // handle diagonal coeff\n      if (ProcessFirstHalf && i && (i.index()==j))\n        res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);\n    }\n  }\n}\n\n\ntemplate<typename LhsView, typename Rhs, int ProductType>\nstruct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>\n: generic_product_impl_base<LhsView, Rhs, generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType> >\n{\n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs, const typename Dest::Scalar& alpha)\n  {\n    typedef typename LhsView::_MatrixTypeNested Lhs;\n    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;\n    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;\n    LhsNested lhsNested(lhsView.matrix());\n    RhsNested rhsNested(rhs);\n    \n    internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, alpha);\n  }\n};\n\ntemplate<typename Lhs, typename RhsView, int ProductType>\nstruct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType>\n: generic_product_impl_base<Lhs, RhsView, generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType> >\n{\n  template<typename Dest>\n  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView, const typename Dest::Scalar& alpha)\n  {\n    typedef typename RhsView::_MatrixTypeNested Rhs;\n    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;\n    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;\n    LhsNested lhsNested(lhs);\n    RhsNested rhsNested(rhsView.matrix());\n    \n    // transpose everything\n    Transpose<Dest> dstT(dst);\n    internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);\n  }\n};\n\n// NOTE: these two overloads are needed to evaluate the sparse selfadjoint view into a full sparse matrix\n// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore\n\ntemplate<typename LhsView, typename Rhs, int ProductTag>\nstruct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, SparseSelfAdjointShape, SparseShape>\n  : public evaluator<typename Product<typename Rhs::PlainObject, Rhs, DefaultProduct>::PlainObject>\n{\n  typedef Product<LhsView, Rhs, DefaultProduct> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  product_evaluator(const XprType& xpr)\n    : m_lhs(xpr.lhs()), m_result(xpr.rows(), xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    generic_product_impl<typename Rhs::PlainObject, Rhs, SparseShape, SparseShape, ProductTag>::evalTo(m_result, m_lhs, xpr.rhs());\n  }\n  \nprotected:\n  typename Rhs::PlainObject m_lhs;\n  PlainObject m_result;\n};\n\ntemplate<typename Lhs, typename RhsView, int ProductTag>\nstruct product_evaluator<Product<Lhs, RhsView, DefaultProduct>, ProductTag, SparseShape, SparseSelfAdjointShape>\n  : public evaluator<typename Product<Lhs, typename Lhs::PlainObject, DefaultProduct>::PlainObject>\n{\n  typedef Product<Lhs, RhsView, DefaultProduct> XprType;\n  typedef typename XprType::PlainObject PlainObject;\n  typedef evaluator<PlainObject> Base;\n\n  product_evaluator(const XprType& xpr)\n    : m_rhs(xpr.rhs()), m_result(xpr.rows(), xpr.cols())\n  {\n    ::new (static_cast<Base*>(this)) Base(m_result);\n    generic_product_impl<Lhs, typename Lhs::PlainObject, SparseShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), m_rhs);\n  }\n  \nprotected:\n  typename Lhs::PlainObject m_rhs;\n  PlainObject m_result;\n};\n\n} // namespace internal\n\n/***************************************************************************\n* Implementation of symmetric copies and permutations\n***************************************************************************/\nnamespace internal {\n\ntemplate<int Mode,typename MatrixType,int DestOrder>\nvoid permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::StorageIndex>& _dest, const typename MatrixType::StorageIndex* perm)\n{\n  typedef typename MatrixType::StorageIndex StorageIndex;\n  typedef typename MatrixType::Scalar Scalar;\n  typedef SparseMatrix<Scalar,DestOrder,StorageIndex> Dest;\n  typedef Matrix<StorageIndex,Dynamic,1> VectorI;\n  typedef evaluator<MatrixType> MatEval;\n  typedef typename evaluator<MatrixType>::InnerIterator MatIterator;\n  \n  MatEval matEval(mat);\n  Dest& dest(_dest.derived());\n  enum {\n    StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor)\n  };\n  \n  Index size = mat.rows();\n  VectorI count;\n  count.resize(size);\n  count.setZero();\n  dest.resize(size,size);\n  for(Index j = 0; j<size; ++j)\n  {\n    Index jp = perm ? perm[j] : j;\n    for(MatIterator it(matEval,j); it; ++it)\n    {\n      Index i = it.index();\n      Index r = it.row();\n      Index c = it.col();\n      Index ip = perm ? perm[i] : i;\n      if(Mode==int(Upper|Lower))\n        count[StorageOrderMatch ? jp : ip]++;\n      else if(r==c)\n        count[ip]++;\n      else if(( Mode==Lower && r>c) || ( Mode==Upper && r<c))\n      {\n        count[ip]++;\n        count[jp]++;\n      }\n    }\n  }\n  Index nnz = count.sum();\n  \n  // reserve space\n  dest.resizeNonZeros(nnz);\n  dest.outerIndexPtr()[0] = 0;\n  for(Index j=0; j<size; ++j)\n    dest.outerIndexPtr()[j+1] = dest.outerIndexPtr()[j] + count[j];\n  for(Index j=0; j<size; ++j)\n    count[j] = dest.outerIndexPtr()[j];\n  \n  // copy data\n  for(StorageIndex j = 0; j<size; ++j)\n  {\n    for(MatIterator it(matEval,j); it; ++it)\n    {\n      StorageIndex i = internal::convert_index<StorageIndex>(it.index());\n      Index r = it.row();\n      Index c = it.col();\n      \n      StorageIndex jp = perm ? perm[j] : j;\n      StorageIndex ip = perm ? perm[i] : i;\n      \n      if(Mode==int(Upper|Lower))\n      {\n        Index k = count[StorageOrderMatch ? jp : ip]++;\n        dest.innerIndexPtr()[k] = StorageOrderMatch ? ip : jp;\n        dest.valuePtr()[k] = it.value();\n      }\n      else if(r==c)\n      {\n        Index k = count[ip]++;\n        dest.innerIndexPtr()[k] = ip;\n        dest.valuePtr()[k] = it.value();\n      }\n      else if(( (Mode&Lower)==Lower && r>c) || ( (Mode&Upper)==Upper && r<c))\n      {\n        if(!StorageOrderMatch)\n          std::swap(ip,jp);\n        Index k = count[jp]++;\n        dest.innerIndexPtr()[k] = ip;\n        dest.valuePtr()[k] = it.value();\n        k = count[ip]++;\n        dest.innerIndexPtr()[k] = jp;\n        dest.valuePtr()[k] = numext::conj(it.value());\n      }\n    }\n  }\n}\n\ntemplate<int _SrcMode,int _DstMode,typename MatrixType,int DstOrder>\nvoid permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DstOrder,typename MatrixType::StorageIndex>& _dest, const typename MatrixType::StorageIndex* perm)\n{\n  typedef typename MatrixType::StorageIndex StorageIndex;\n  typedef typename MatrixType::Scalar Scalar;\n  SparseMatrix<Scalar,DstOrder,StorageIndex>& dest(_dest.derived());\n  typedef Matrix<StorageIndex,Dynamic,1> VectorI;\n  typedef evaluator<MatrixType> MatEval;\n  typedef typename evaluator<MatrixType>::InnerIterator MatIterator;\n\n  enum {\n    SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor,\n    StorageOrderMatch = int(SrcOrder) == int(DstOrder),\n    DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode,\n    SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode\n  };\n\n  MatEval matEval(mat);\n  \n  Index size = mat.rows();\n  VectorI count(size);\n  count.setZero();\n  dest.resize(size,size);\n  for(StorageIndex j = 0; j<size; ++j)\n  {\n    StorageIndex jp = perm ? perm[j] : j;\n    for(MatIterator it(matEval,j); it; ++it)\n    {\n      StorageIndex i = it.index();\n      if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))\n        continue;\n                  \n      StorageIndex ip = perm ? perm[i] : i;\n      count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;\n    }\n  }\n  dest.outerIndexPtr()[0] = 0;\n  for(Index j=0; j<size; ++j)\n    dest.outerIndexPtr()[j+1] = dest.outerIndexPtr()[j] + count[j];\n  dest.resizeNonZeros(dest.outerIndexPtr()[size]);\n  for(Index j=0; j<size; ++j)\n    count[j] = dest.outerIndexPtr()[j];\n  \n  for(StorageIndex j = 0; j<size; ++j)\n  {\n    \n    for(MatIterator it(matEval,j); it; ++it)\n    {\n      StorageIndex i = it.index();\n      if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))\n        continue;\n                  \n      StorageIndex jp = perm ? perm[j] : j;\n      StorageIndex ip = perm? perm[i] : i;\n      \n      Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;\n      dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp);\n      \n      if(!StorageOrderMatch) std::swap(ip,jp);\n      if( ((int(DstMode)==int(Lower) && ip<jp) || (int(DstMode)==int(Upper) && ip>jp)))\n        dest.valuePtr()[k] = numext::conj(it.value());\n      else\n        dest.valuePtr()[k] = it.value();\n    }\n  }\n}\n\n}\n\n// TODO implement twists in a more evaluator friendly fashion\n\nnamespace internal {\n\ntemplate<typename MatrixType, int Mode>\nstruct traits<SparseSymmetricPermutationProduct<MatrixType,Mode> > : traits<MatrixType> {\n};\n\n}\n\ntemplate<typename MatrixType,int Mode>\nclass SparseSymmetricPermutationProduct\n  : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,Mode> >\n{\n  public:\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    enum {\n      RowsAtCompileTime = internal::traits<SparseSymmetricPermutationProduct>::RowsAtCompileTime,\n      ColsAtCompileTime = internal::traits<SparseSymmetricPermutationProduct>::ColsAtCompileTime\n    };\n  protected:\n    typedef PermutationMatrix<Dynamic,Dynamic,StorageIndex> Perm;\n  public:\n    typedef Matrix<StorageIndex,Dynamic,1> VectorI;\n    typedef typename MatrixType::Nested MatrixTypeNested;\n    typedef typename internal::remove_all<MatrixTypeNested>::type NestedExpression;\n    \n    SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm)\n      : m_matrix(mat), m_perm(perm)\n    {}\n    \n    inline Index rows() const { return m_matrix.rows(); }\n    inline Index cols() const { return m_matrix.cols(); }\n        \n    const NestedExpression& matrix() const { return m_matrix; }\n    const Perm& perm() const { return m_perm; }\n    \n  protected:\n    MatrixTypeNested m_matrix;\n    const Perm& m_perm;\n\n};\n\nnamespace internal {\n  \ntemplate<typename DstXprType, typename MatrixType, int Mode, typename Scalar>\nstruct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>, internal::assign_op<Scalar,typename MatrixType::Scalar>, Sparse2Sparse>\n{\n  typedef SparseSymmetricPermutationProduct<MatrixType,Mode> SrcXprType;\n  typedef typename DstXprType::StorageIndex DstIndex;\n  template<int Options>\n  static void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)\n  {\n    // internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());\n    SparseMatrix<Scalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;\n    internal::permute_symm_to_fullsymm<Mode>(src.matrix(),tmp,src.perm().indices().data());\n    dst = tmp;\n  }\n  \n  template<typename DestType,unsigned int DestMode>\n  static void run(SparseSelfAdjointView<DestType,DestMode>& dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)\n  {\n    internal::permute_symm_to_symm<Mode,DestMode>(src.matrix(),dst.matrix(),src.perm().indices().data());\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseSolverBase.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSESOLVERBASE_H\n#define EIGEN_SPARSESOLVERBASE_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n  /** \\internal\n  * Helper functions to solve with a sparse right-hand-side and result.\n  * The rhs is decomposed into small vertical panels which are solved through dense temporaries.\n  */\ntemplate<typename Decomposition, typename Rhs, typename Dest>\ntypename enable_if<Rhs::ColsAtCompileTime!=1 && Dest::ColsAtCompileTime!=1>::type\nsolve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)\n{\n  EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n  typedef typename Dest::Scalar DestScalar;\n  // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.\n  static const Index NbColsAtOnce = 4;\n  Index rhsCols = rhs.cols();\n  Index size = rhs.rows();\n  // the temporary matrices do not need more columns than NbColsAtOnce:\n  Index tmpCols = (std::min)(rhsCols, NbColsAtOnce); \n  Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);\n  Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);\n  for(Index k=0; k<rhsCols; k+=NbColsAtOnce)\n  {\n    Index actualCols = std::min<Index>(rhsCols-k, NbColsAtOnce);\n    tmp.leftCols(actualCols) = rhs.middleCols(k,actualCols);\n    tmpX.leftCols(actualCols) = dec.solve(tmp.leftCols(actualCols));\n    dest.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();\n  }\n}\n\n// Overload for vector as rhs\ntemplate<typename Decomposition, typename Rhs, typename Dest>\ntypename enable_if<Rhs::ColsAtCompileTime==1 || Dest::ColsAtCompileTime==1>::type\nsolve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)\n{\n  typedef typename Dest::Scalar DestScalar;\n  Index size = rhs.rows();\n  Eigen::Matrix<DestScalar,Dynamic,1> rhs_dense(rhs);\n  Eigen::Matrix<DestScalar,Dynamic,1> dest_dense(size);\n  dest_dense = dec.solve(rhs_dense);\n  dest = dest_dense.sparseView();\n}\n\n} // end namespace internal\n\n/** \\class SparseSolverBase\n  * \\ingroup SparseCore_Module\n  * \\brief A base class for sparse solvers\n  *\n  * \\tparam Derived the actual type of the solver.\n  *\n  */\ntemplate<typename Derived>\nclass SparseSolverBase : internal::noncopyable\n{\n  public:\n\n    /** Default constructor */\n    SparseSolverBase()\n      : m_isInitialized(false)\n    {}\n\n    ~SparseSolverBase()\n    {}\n\n    Derived& derived() { return *static_cast<Derived*>(this); }\n    const Derived& derived() const { return *static_cast<const Derived*>(this); }\n    \n    /** \\returns an expression of the solution x of \\f$ A x = b \\f$ using the current decomposition of A.\n      *\n      * \\sa compute()\n      */\n    template<typename Rhs>\n    inline const Solve<Derived, Rhs>\n    solve(const MatrixBase<Rhs>& b) const\n    {\n      eigen_assert(m_isInitialized && \"Solver is not initialized.\");\n      eigen_assert(derived().rows()==b.rows() && \"solve(): invalid number of rows of the right hand side matrix b\");\n      return Solve<Derived, Rhs>(derived(), b.derived());\n    }\n    \n    /** \\returns an expression of the solution x of \\f$ A x = b \\f$ using the current decomposition of A.\n      *\n      * \\sa compute()\n      */\n    template<typename Rhs>\n    inline const Solve<Derived, Rhs>\n    solve(const SparseMatrixBase<Rhs>& b) const\n    {\n      eigen_assert(m_isInitialized && \"Solver is not initialized.\");\n      eigen_assert(derived().rows()==b.rows() && \"solve(): invalid number of rows of the right hand side matrix b\");\n      return Solve<Derived, Rhs>(derived(), b.derived());\n    }\n    \n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal default implementation of solving with a sparse rhs */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const\n    {\n      internal::solve_sparse_through_dense_panels(derived(), b.derived(), dest.derived());\n    }\n    #endif // EIGEN_PARSED_BY_DOXYGEN\n\n  protected:\n    \n    mutable bool m_isInitialized;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSESOLVERBASE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseSparseProductWithPruning.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H\n#define EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n\n// perform a pseudo in-place sparse * sparse product assuming all matrices are col major\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstatic void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, const typename ResultType::RealScalar& tolerance)\n{\n  // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);\n\n  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;\n  typedef typename remove_all<ResultType>::type::Scalar ResScalar;\n  typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;\n\n  // make sure to call innerSize/outerSize since we fake the storage order.\n  Index rows = lhs.innerSize();\n  Index cols = rhs.outerSize();\n  //Index size = lhs.outerSize();\n  eigen_assert(lhs.outerSize() == rhs.innerSize());\n\n  // allocate a temporary buffer\n  AmbiVector<ResScalar,StorageIndex> tempVector(rows);\n\n  // mimics a resizeByInnerOuter:\n  if(ResultType::IsRowMajor)\n    res.resize(cols, rows);\n  else\n    res.resize(rows, cols);\n  \n  evaluator<Lhs> lhsEval(lhs);\n  evaluator<Rhs> rhsEval(rhs);\n  \n  // estimate the number of non zero entries\n  // given a rhs column containing Y non zeros, we assume that the respective Y columns\n  // of the lhs differs in average of one non zeros, thus the number of non zeros for\n  // the product of a rhs column with the lhs is X+Y where X is the average number of non zero\n  // per column of the lhs.\n  // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)\n  Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();\n\n  res.reserve(estimated_nnz_prod);\n  double ratioColRes = double(estimated_nnz_prod)/(double(lhs.rows())*double(rhs.cols()));\n  for (Index j=0; j<cols; ++j)\n  {\n    // FIXME:\n    //double ratioColRes = (double(rhs.innerVector(j).nonZeros()) + double(lhs.nonZeros())/double(lhs.cols()))/double(lhs.rows());\n    // let's do a more accurate determination of the nnz ratio for the current column j of res\n    tempVector.init(ratioColRes);\n    tempVector.setZero();\n    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)\n    {\n      // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())\n      tempVector.restart();\n      RhsScalar x = rhsIt.value();\n      for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)\n      {\n        tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;\n      }\n    }\n    res.startVec(j);\n    for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)\n      res.insertBackByOuterInner(j,it.index()) = it.value();\n  }\n  res.finalize();\n}\n\ntemplate<typename Lhs, typename Rhs, typename ResultType,\n  int LhsStorageOrder = traits<Lhs>::Flags&RowMajorBit,\n  int RhsStorageOrder = traits<Rhs>::Flags&RowMajorBit,\n  int ResStorageOrder = traits<ResultType>::Flags&RowMajorBit>\nstruct sparse_sparse_product_with_pruning_selector;\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    typename remove_all<ResultType>::type _res(res.rows(), res.cols());\n    internal::sparse_sparse_product_with_pruning_impl<Lhs,Rhs,ResultType>(lhs, rhs, _res, tolerance);\n    res.swap(_res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    // we need a col-major matrix to hold the result\n    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> SparseTemporaryType;\n    SparseTemporaryType _res(res.rows(), res.cols());\n    internal::sparse_sparse_product_with_pruning_impl<Lhs,Rhs,SparseTemporaryType>(lhs, rhs, _res, tolerance);\n    res = _res;\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    // let's transpose the product to get a column x column product\n    typename remove_all<ResultType>::type _res(res.rows(), res.cols());\n    internal::sparse_sparse_product_with_pruning_impl<Rhs,Lhs,ResultType>(rhs, lhs, _res, tolerance);\n    res.swap(_res);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;\n    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;\n    ColMajorMatrixLhs colLhs(lhs);\n    ColMajorMatrixRhs colRhs(rhs);\n    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);\n\n    // let's transpose the product to get a column x column product\n//     typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;\n//     SparseTemporaryType _res(res.cols(), res.rows());\n//     sparse_sparse_product_with_pruning_impl<Rhs,Lhs,SparseTemporaryType>(rhs, lhs, _res);\n//     res = _res.transpose();\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;\n    RowMajorMatrixLhs rowLhs(lhs);\n    sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;\n    RowMajorMatrixRhs rowRhs(rhs);\n    sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;\n    ColMajorMatrixRhs colRhs(rhs);\n    internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);\n  }\n};\n\ntemplate<typename Lhs, typename Rhs, typename ResultType>\nstruct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>\n{\n  typedef typename ResultType::RealScalar RealScalar;\n  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)\n  {\n    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;\n    ColMajorMatrixLhs colLhs(lhs);\n    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseTranspose.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSETRANSPOSE_H\n#define EIGEN_SPARSETRANSPOSE_H\n\nnamespace Eigen { \n\nnamespace internal {\n  template<typename MatrixType,int CompressedAccess=int(MatrixType::Flags&CompressedAccessBit)>\n  class SparseTransposeImpl\n    : public SparseMatrixBase<Transpose<MatrixType> >\n  {};\n  \n  template<typename MatrixType>\n  class SparseTransposeImpl<MatrixType,CompressedAccessBit>\n    : public SparseCompressedBase<Transpose<MatrixType> >\n  {\n    typedef SparseCompressedBase<Transpose<MatrixType> > Base;\n  public:\n    using Base::derived;\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::StorageIndex StorageIndex;\n\n    inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); }\n    \n    inline const Scalar* valuePtr() const { return derived().nestedExpression().valuePtr(); }\n    inline const StorageIndex* innerIndexPtr() const { return derived().nestedExpression().innerIndexPtr(); }\n    inline const StorageIndex* outerIndexPtr() const { return derived().nestedExpression().outerIndexPtr(); }\n    inline const StorageIndex* innerNonZeroPtr() const { return derived().nestedExpression().innerNonZeroPtr(); }\n\n    inline Scalar* valuePtr() { return derived().nestedExpression().valuePtr(); }\n    inline StorageIndex* innerIndexPtr() { return derived().nestedExpression().innerIndexPtr(); }\n    inline StorageIndex* outerIndexPtr() { return derived().nestedExpression().outerIndexPtr(); }\n    inline StorageIndex* innerNonZeroPtr() { return derived().nestedExpression().innerNonZeroPtr(); }\n  };\n}\n  \ntemplate<typename MatrixType> class TransposeImpl<MatrixType,Sparse>\n  : public internal::SparseTransposeImpl<MatrixType>\n{\n  protected:\n    typedef internal::SparseTransposeImpl<MatrixType> Base;\n};\n\nnamespace internal {\n  \ntemplate<typename ArgType>\nstruct unary_evaluator<Transpose<ArgType>, IteratorBased>\n  : public evaluator_base<Transpose<ArgType> >\n{\n    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;\n  public:\n    typedef Transpose<ArgType> XprType;\n    \n    inline Index nonZerosEstimate() const {\n      return m_argImpl.nonZerosEstimate();\n    }\n\n    class InnerIterator : public EvalIterator\n    {\n    public:\n      EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, Index outer)\n        : EvalIterator(unaryOp.m_argImpl,outer)\n      {}\n      \n      Index row() const { return EvalIterator::col(); }\n      Index col() const { return EvalIterator::row(); }\n    };\n    \n    enum {\n      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n      Flags = XprType::Flags\n    };\n    \n    explicit unary_evaluator(const XprType& op) :m_argImpl(op.nestedExpression()) {}\n\n  protected:\n    evaluator<ArgType> m_argImpl;\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSETRANSPOSE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseTriangularView.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_TRIANGULARVIEW_H\n#define EIGEN_SPARSE_TRIANGULARVIEW_H\n\nnamespace Eigen {\n\n/** \\ingroup SparseCore_Module\n  *\n  * \\brief Base class for a triangular part in a \\b sparse matrix\n  *\n  * This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.\n  * It extends class TriangularView with additional methods which are available for sparse expressions only.\n  *\n  * \\sa class TriangularView, SparseMatrixBase::triangularView()\n  */\ntemplate<typename MatrixType, unsigned int Mode> class TriangularViewImpl<MatrixType,Mode,Sparse>\n  : public SparseMatrixBase<TriangularView<MatrixType,Mode> >\n{\n    enum { SkipFirst = ((Mode&Lower) && !(MatrixType::Flags&RowMajorBit))\n                    || ((Mode&Upper) &&  (MatrixType::Flags&RowMajorBit)),\n           SkipLast = !SkipFirst,\n           SkipDiag = (Mode&ZeroDiag) ? 1 : 0,\n           HasUnitDiag = (Mode&UnitDiag) ? 1 : 0\n    };\n    \n    typedef TriangularView<MatrixType,Mode> TriangularViewType;\n    \n  protected:\n    // dummy solve function to make TriangularView happy.\n    void solve() const;\n\n    typedef SparseMatrixBase<TriangularViewType> Base;\n  public:\n    \n    EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType)\n    \n    typedef typename MatrixType::Nested MatrixTypeNested;\n    typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;\n    typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;\n\n    template<typename RhsType, typename DstType>\n    EIGEN_DEVICE_FUNC\n    EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {\n      if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))\n        dst = rhs;\n      this->solveInPlace(dst);\n    }\n\n    /** Applies the inverse of \\c *this to the dense vector or matrix \\a other, \"in-place\" */\n    template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;\n\n    /** Applies the inverse of \\c *this to the sparse vector or matrix \\a other, \"in-place\" */\n    template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;\n  \n};\n\nnamespace internal {\n\ntemplate<typename ArgType, unsigned int Mode>\nstruct unary_evaluator<TriangularView<ArgType,Mode>, IteratorBased>\n : evaluator_base<TriangularView<ArgType,Mode> >\n{\n  typedef TriangularView<ArgType,Mode> XprType;\n  \nprotected:\n  \n  typedef typename XprType::Scalar Scalar;\n  typedef typename XprType::StorageIndex StorageIndex;\n  typedef typename evaluator<ArgType>::InnerIterator EvalIterator;\n  \n  enum { SkipFirst = ((Mode&Lower) && !(ArgType::Flags&RowMajorBit))\n                    || ((Mode&Upper) &&  (ArgType::Flags&RowMajorBit)),\n         SkipLast = !SkipFirst,\n         SkipDiag = (Mode&ZeroDiag) ? 1 : 0,\n         HasUnitDiag = (Mode&UnitDiag) ? 1 : 0\n  };\n  \npublic:\n  \n  enum {\n    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n    Flags = XprType::Flags\n  };\n    \n  explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()), m_arg(xpr.nestedExpression()) {}\n  \n  inline Index nonZerosEstimate() const {\n    return m_argImpl.nonZerosEstimate();\n  }\n  \n  class InnerIterator : public EvalIterator\n  {\n      typedef EvalIterator Base;\n    public:\n\n      EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer)\n        : Base(xprEval.m_argImpl,outer), m_returnOne(false), m_containsDiag(Base::outer()<xprEval.m_arg.innerSize())\n      {\n        if(SkipFirst)\n        {\n          while((*this) && ((HasUnitDiag||SkipDiag)  ? this->index()<=outer : this->index()<outer))\n            Base::operator++();\n          if(HasUnitDiag)\n            m_returnOne = m_containsDiag;\n        }\n        else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer()))\n        {\n          if((!SkipFirst) && Base::operator bool())\n            Base::operator++();\n          m_returnOne = m_containsDiag;\n        }\n      }\n\n      EIGEN_STRONG_INLINE InnerIterator& operator++()\n      {\n        if(HasUnitDiag && m_returnOne)\n          m_returnOne = false;\n        else\n        {\n          Base::operator++();\n          if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer()))\n          {\n            if((!SkipFirst) && Base::operator bool())\n              Base::operator++();\n            m_returnOne = m_containsDiag;\n          }\n        }\n        return *this;\n      }\n      \n      EIGEN_STRONG_INLINE operator bool() const\n      {\n        if(HasUnitDiag && m_returnOne)\n          return true;\n        if(SkipFirst) return  Base::operator bool();\n        else\n        {\n          if (SkipDiag) return (Base::operator bool() && this->index() < this->outer());\n          else return (Base::operator bool() && this->index() <= this->outer());\n        }\n      }\n\n//       inline Index row() const { return (ArgType::Flags&RowMajorBit ? Base::outer() : this->index()); }\n//       inline Index col() const { return (ArgType::Flags&RowMajorBit ? this->index() : Base::outer()); }\n      inline StorageIndex index() const\n      {\n        if(HasUnitDiag && m_returnOne)  return internal::convert_index<StorageIndex>(Base::outer());\n        else                            return Base::index();\n      }\n      inline Scalar value() const\n      {\n        if(HasUnitDiag && m_returnOne)  return Scalar(1);\n        else                            return Base::value();\n      }\n\n    protected:\n      bool m_returnOne;\n      bool m_containsDiag;\n    private:\n      Scalar& valueRef();\n  };\n  \nprotected:\n  evaluator<ArgType> m_argImpl;\n  const ArgType& m_arg;\n};\n\n} // end namespace internal\n\ntemplate<typename Derived>\ntemplate<int Mode>\ninline const TriangularView<const Derived, Mode>\nSparseMatrixBase<Derived>::triangularView() const\n{\n  return TriangularView<const Derived, Mode>(derived());\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSE_TRIANGULARVIEW_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseUtil.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEUTIL_H\n#define EIGEN_SPARSEUTIL_H\n\nnamespace Eigen { \n\n#ifdef NDEBUG\n#define EIGEN_DBG_SPARSE(X)\n#else\n#define EIGEN_DBG_SPARSE(X) X\n#endif\n\n#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \\\ntemplate<typename OtherDerived> \\\nEIGEN_STRONG_INLINE Derived& operator Op(const Eigen::SparseMatrixBase<OtherDerived>& other) \\\n{ \\\n  return Base::operator Op(other.derived()); \\\n} \\\nEIGEN_STRONG_INLINE Derived& operator Op(const Derived& other) \\\n{ \\\n  return Base::operator Op(other); \\\n}\n\n#define EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, Op) \\\ntemplate<typename Other> \\\nEIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \\\n{ \\\n  return Base::operator Op(scalar); \\\n}\n\n#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \\\nEIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =)\n\n\n#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \\\n  EIGEN_GENERIC_PUBLIC_INTERFACE(Derived)\n\n  \nconst int CoherentAccessPattern     = 0x1;\nconst int InnerRandomAccessPattern  = 0x2 | CoherentAccessPattern;\nconst int OuterRandomAccessPattern  = 0x4 | CoherentAccessPattern;\nconst int RandomAccessPattern       = 0x8 | OuterRandomAccessPattern | InnerRandomAccessPattern;\n\ntemplate<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class SparseMatrix;\ntemplate<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class DynamicSparseMatrix;\ntemplate<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class SparseVector;\ntemplate<typename _Scalar, int _Flags = 0, typename _StorageIndex = int>  class MappedSparseMatrix;\n\ntemplate<typename MatrixType, unsigned int UpLo>  class SparseSelfAdjointView;\ntemplate<typename Lhs, typename Rhs>              class SparseDiagonalProduct;\ntemplate<typename MatrixType> class SparseView;\n\ntemplate<typename Lhs, typename Rhs>        class SparseSparseProduct;\ntemplate<typename Lhs, typename Rhs>        class SparseTimeDenseProduct;\ntemplate<typename Lhs, typename Rhs>        class DenseTimeSparseProduct;\ntemplate<typename Lhs, typename Rhs, bool Transpose> class SparseDenseOuterProduct;\n\ntemplate<typename Lhs, typename Rhs> struct SparseSparseProductReturnType;\ntemplate<typename Lhs, typename Rhs,\n         int InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(internal::traits<Lhs>::ColsAtCompileTime,internal::traits<Rhs>::RowsAtCompileTime)> struct DenseSparseProductReturnType;\n         \ntemplate<typename Lhs, typename Rhs,\n         int InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(internal::traits<Lhs>::ColsAtCompileTime,internal::traits<Rhs>::RowsAtCompileTime)> struct SparseDenseProductReturnType;\ntemplate<typename MatrixType,int UpLo> class SparseSymmetricPermutationProduct;\n\nnamespace internal {\n\ntemplate<typename T,int Rows,int Cols,int Flags> struct sparse_eval;\n\ntemplate<typename T> struct eval<T,Sparse>\n  : sparse_eval<T, traits<T>::RowsAtCompileTime,traits<T>::ColsAtCompileTime,traits<T>::Flags>\n{};\n\ntemplate<typename T,int Cols,int Flags> struct sparse_eval<T,1,Cols,Flags> {\n    typedef typename traits<T>::Scalar _Scalar;\n    typedef typename traits<T>::StorageIndex _StorageIndex;\n  public:\n    typedef SparseVector<_Scalar, RowMajor, _StorageIndex> type;\n};\n\ntemplate<typename T,int Rows,int Flags> struct sparse_eval<T,Rows,1,Flags> {\n    typedef typename traits<T>::Scalar _Scalar;\n    typedef typename traits<T>::StorageIndex _StorageIndex;\n  public:\n    typedef SparseVector<_Scalar, ColMajor, _StorageIndex> type;\n};\n\n// TODO this seems almost identical to plain_matrix_type<T, Sparse>\ntemplate<typename T,int Rows,int Cols,int Flags> struct sparse_eval {\n    typedef typename traits<T>::Scalar _Scalar;\n    typedef typename traits<T>::StorageIndex _StorageIndex;\n    enum { _Options = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };\n  public:\n    typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type;\n};\n\ntemplate<typename T,int Flags> struct sparse_eval<T,1,1,Flags> {\n    typedef typename traits<T>::Scalar _Scalar;\n  public:\n    typedef Matrix<_Scalar, 1, 1> type;\n};\n\ntemplate<typename T> struct plain_matrix_type<T,Sparse>\n{\n  typedef typename traits<T>::Scalar _Scalar;\n  typedef typename traits<T>::StorageIndex _StorageIndex;\n  enum { _Options = ((evaluator<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };\n  public:\n    typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type;\n};\n\ntemplate<typename T>\nstruct plain_object_eval<T,Sparse>\n  : sparse_eval<T, traits<T>::RowsAtCompileTime,traits<T>::ColsAtCompileTime, evaluator<T>::Flags>\n{};\n\ntemplate<typename Decomposition, typename RhsType>\nstruct solve_traits<Decomposition,RhsType,Sparse>\n{\n  typedef typename sparse_eval<RhsType, RhsType::RowsAtCompileTime, RhsType::ColsAtCompileTime,traits<RhsType>::Flags>::type PlainObject;\n};\n\ntemplate<typename Derived>\nstruct generic_xpr_base<Derived, MatrixXpr, Sparse>\n{\n  typedef SparseMatrixBase<Derived> type;\n};\n\nstruct SparseTriangularShape  { static std::string debugName() { return \"SparseTriangularShape\"; } };\nstruct SparseSelfAdjointShape { static std::string debugName() { return \"SparseSelfAdjointShape\"; } };\n\ntemplate<> struct glue_shapes<SparseShape,SelfAdjointShape> { typedef SparseSelfAdjointShape type;  };\ntemplate<> struct glue_shapes<SparseShape,TriangularShape > { typedef SparseTriangularShape  type;  };\n\n// return type of SparseCompressedBase::lower_bound;\nstruct LowerBoundIndex {\n  LowerBoundIndex() : value(-1), found(false) {}\n  LowerBoundIndex(Index val, bool ok) : value(val), found(ok) {}\n  Index value;\n  bool found;\n};\n\n} // end namespace internal\n\n/** \\ingroup SparseCore_Module\n  *\n  * \\class Triplet\n  *\n  * \\brief A small structure to hold a non zero as a triplet (i,j,value).\n  *\n  * \\sa SparseMatrix::setFromTriplets()\n  */\ntemplate<typename Scalar, typename StorageIndex=typename SparseMatrix<Scalar>::StorageIndex >\nclass Triplet\n{\npublic:\n  Triplet() : m_row(0), m_col(0), m_value(0) {}\n\n  Triplet(const StorageIndex& i, const StorageIndex& j, const Scalar& v = Scalar(0))\n    : m_row(i), m_col(j), m_value(v)\n  {}\n\n  /** \\returns the row index of the element */\n  const StorageIndex& row() const { return m_row; }\n\n  /** \\returns the column index of the element */\n  const StorageIndex& col() const { return m_col; }\n\n  /** \\returns the value of the element */\n  const Scalar& value() const { return m_value; }\nprotected:\n  StorageIndex m_row, m_col;\n  Scalar m_value;\n};\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEUTIL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEVECTOR_H\n#define EIGEN_SPARSEVECTOR_H\n\nnamespace Eigen { \n\n/** \\ingroup SparseCore_Module\n  * \\class SparseVector\n  *\n  * \\brief a sparse vector class\n  *\n  * \\tparam _Scalar the scalar type, i.e. the type of the coefficients\n  *\n  * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme.\n  *\n  * This class can be extended with the help of the plugin mechanism described on the page\n  * \\ref TopicCustomizing_Plugins by defining the preprocessor symbol \\c EIGEN_SPARSEVECTOR_PLUGIN.\n  */\n\nnamespace internal {\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nstruct traits<SparseVector<_Scalar, _Options, _StorageIndex> >\n{\n  typedef _Scalar Scalar;\n  typedef _StorageIndex StorageIndex;\n  typedef Sparse StorageKind;\n  typedef MatrixXpr XprKind;\n  enum {\n    IsColVector = (_Options & RowMajorBit) ? 0 : 1,\n\n    RowsAtCompileTime = IsColVector ? Dynamic : 1,\n    ColsAtCompileTime = IsColVector ? 1 : Dynamic,\n    MaxRowsAtCompileTime = RowsAtCompileTime,\n    MaxColsAtCompileTime = ColsAtCompileTime,\n    Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit,\n    SupportedAccessPatterns = InnerRandomAccessPattern\n  };\n};\n\n// Sparse-Vector-Assignment kinds:\nenum {\n  SVA_RuntimeSwitch,\n  SVA_Inner,\n  SVA_Outer\n};\n\ntemplate< typename Dest, typename Src,\n          int AssignmentKind = !bool(Src::IsVectorAtCompileTime) ? SVA_RuntimeSwitch\n                             : Src::InnerSizeAtCompileTime==1 ? SVA_Outer\n                             : SVA_Inner>\nstruct sparse_vector_assign_selector;\n\n}\n\ntemplate<typename _Scalar, int _Options, typename _StorageIndex>\nclass SparseVector\n  : public SparseCompressedBase<SparseVector<_Scalar, _Options, _StorageIndex> >\n{\n    typedef SparseCompressedBase<SparseVector> Base;\n    using Base::convert_index;\n  public:\n    EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector)\n    EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, +=)\n    EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, -=)\n    \n    typedef internal::CompressedStorage<Scalar,StorageIndex> Storage;\n    enum { IsColVector = internal::traits<SparseVector>::IsColVector };\n    \n    enum {\n      Options = _Options\n    };\n    \n    EIGEN_STRONG_INLINE Index rows() const { return IsColVector ? m_size : 1; }\n    EIGEN_STRONG_INLINE Index cols() const { return IsColVector ? 1 : m_size; }\n    EIGEN_STRONG_INLINE Index innerSize() const { return m_size; }\n    EIGEN_STRONG_INLINE Index outerSize() const { return 1; }\n\n    EIGEN_STRONG_INLINE const Scalar* valuePtr() const { return m_data.valuePtr(); }\n    EIGEN_STRONG_INLINE Scalar* valuePtr() { return m_data.valuePtr(); }\n\n    EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return m_data.indexPtr(); }\n    EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return m_data.indexPtr(); }\n\n    inline const StorageIndex* outerIndexPtr() const { return 0; }\n    inline StorageIndex* outerIndexPtr() { return 0; }\n    inline const StorageIndex* innerNonZeroPtr() const { return 0; }\n    inline StorageIndex* innerNonZeroPtr() { return 0; }\n    \n    /** \\internal */\n    inline Storage& data() { return m_data; }\n    /** \\internal */\n    inline const Storage& data() const { return m_data; }\n\n    inline Scalar coeff(Index row, Index col) const\n    {\n      eigen_assert(IsColVector ? (col==0 && row>=0 && row<m_size) : (row==0 && col>=0 && col<m_size));\n      return coeff(IsColVector ? row : col);\n    }\n    inline Scalar coeff(Index i) const\n    {\n      eigen_assert(i>=0 && i<m_size);\n      return m_data.at(StorageIndex(i));\n    }\n\n    inline Scalar& coeffRef(Index row, Index col)\n    {\n      eigen_assert(IsColVector ? (col==0 && row>=0 && row<m_size) : (row==0 && col>=0 && col<m_size));\n      return coeffRef(IsColVector ? row : col);\n    }\n\n    /** \\returns a reference to the coefficient value at given index \\a i\n      * This operation involes a log(rho*size) binary search. If the coefficient does not\n      * exist yet, then a sorted insertion into a sequential buffer is performed.\n      *\n      * This insertion might be very costly if the number of nonzeros above \\a i is large.\n      */\n    inline Scalar& coeffRef(Index i)\n    {\n      eigen_assert(i>=0 && i<m_size);\n\n      return m_data.atWithInsertion(StorageIndex(i));\n    }\n\n  public:\n\n    typedef typename Base::InnerIterator InnerIterator;\n    typedef typename Base::ReverseInnerIterator ReverseInnerIterator;\n\n    inline void setZero() { m_data.clear(); }\n\n    /** \\returns the number of non zero coefficients */\n    inline Index nonZeros() const  { return m_data.size(); }\n\n    inline void startVec(Index outer)\n    {\n      EIGEN_UNUSED_VARIABLE(outer);\n      eigen_assert(outer==0);\n    }\n\n    inline Scalar& insertBackByOuterInner(Index outer, Index inner)\n    {\n      EIGEN_UNUSED_VARIABLE(outer);\n      eigen_assert(outer==0);\n      return insertBack(inner);\n    }\n    inline Scalar& insertBack(Index i)\n    {\n      m_data.append(0, i);\n      return m_data.value(m_data.size()-1);\n    }\n    \n    Scalar& insertBackByOuterInnerUnordered(Index outer, Index inner)\n    {\n      EIGEN_UNUSED_VARIABLE(outer);\n      eigen_assert(outer==0);\n      return insertBackUnordered(inner);\n    }\n    inline Scalar& insertBackUnordered(Index i)\n    {\n      m_data.append(0, i);\n      return m_data.value(m_data.size()-1);\n    }\n\n    inline Scalar& insert(Index row, Index col)\n    {\n      eigen_assert(IsColVector ? (col==0 && row>=0 && row<m_size) : (row==0 && col>=0 && col<m_size));\n      \n      Index inner = IsColVector ? row : col;\n      Index outer = IsColVector ? col : row;\n      EIGEN_ONLY_USED_FOR_DEBUG(outer);\n      eigen_assert(outer==0);\n      return insert(inner);\n    }\n    Scalar& insert(Index i)\n    {\n      eigen_assert(i>=0 && i<m_size);\n      \n      Index startId = 0;\n      Index p = Index(m_data.size()) - 1;\n      // TODO smart realloc\n      m_data.resize(p+2,1);\n\n      while ( (p >= startId) && (m_data.index(p) > i) )\n      {\n        m_data.index(p+1) = m_data.index(p);\n        m_data.value(p+1) = m_data.value(p);\n        --p;\n      }\n      m_data.index(p+1) = convert_index(i);\n      m_data.value(p+1) = 0;\n      return m_data.value(p+1);\n    }\n\n    /**\n      */\n    inline void reserve(Index reserveSize) { m_data.reserve(reserveSize); }\n\n\n    inline void finalize() {}\n\n    /** \\copydoc SparseMatrix::prune(const Scalar&,const RealScalar&) */\n    void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())\n    {\n      m_data.prune(reference,epsilon);\n    }\n\n    /** Resizes the sparse vector to \\a rows x \\a cols\n      *\n      * This method is provided for compatibility with matrices.\n      * For a column vector, \\a cols must be equal to 1.\n      * For a row vector, \\a rows must be equal to 1.\n      *\n      * \\sa resize(Index)\n      */\n    void resize(Index rows, Index cols)\n    {\n      eigen_assert((IsColVector ? cols : rows)==1 && \"Outer dimension must equal 1\");\n      resize(IsColVector ? rows : cols);\n    }\n\n    /** Resizes the sparse vector to \\a newSize\n      * This method deletes all entries, thus leaving an empty sparse vector\n      *\n      * \\sa  conservativeResize(), setZero() */\n    void resize(Index newSize)\n    {\n      m_size = newSize;\n      m_data.clear();\n    }\n\n    /** Resizes the sparse vector to \\a newSize, while leaving old values untouched.\n      *\n      * If the size of the vector is decreased, then the storage of the out-of bounds coefficients is kept and reserved.\n      * Call .data().squeeze() to free extra memory.\n      *\n      * \\sa reserve(), setZero()\n      */\n    void conservativeResize(Index newSize)\n    {\n      if (newSize < m_size)\n      {\n        Index i = 0;\n        while (i<m_data.size() && m_data.index(i)<newSize) ++i;\n        m_data.resize(i);\n      }\n      m_size = newSize;\n    }\n\n    void resizeNonZeros(Index size) { m_data.resize(size); }\n\n    inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }\n\n    explicit inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); }\n\n    inline SparseVector(Index rows, Index cols) : m_size(0) { check_template_parameters(); resize(rows,cols); }\n\n    template<typename OtherDerived>\n    inline SparseVector(const SparseMatrixBase<OtherDerived>& other)\n      : m_size(0)\n    {\n      #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n        EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN\n      #endif\n      check_template_parameters();\n      *this = other.derived();\n    }\n\n    inline SparseVector(const SparseVector& other)\n      : Base(other), m_size(0)\n    {\n      check_template_parameters();\n      *this = other.derived();\n    }\n\n    /** Swaps the values of \\c *this and \\a other.\n      * Overloaded for performance: this version performs a \\em shallow swap by swapping pointers and attributes only.\n      * \\sa SparseMatrixBase::swap()\n      */\n    inline void swap(SparseVector& other)\n    {\n      std::swap(m_size, other.m_size);\n      m_data.swap(other.m_data);\n    }\n\n    template<int OtherOptions>\n    inline void swap(SparseMatrix<Scalar,OtherOptions,StorageIndex>& other)\n    {\n      eigen_assert(other.outerSize()==1);\n      std::swap(m_size, other.m_innerSize);\n      m_data.swap(other.m_data);\n    }\n\n    inline SparseVector& operator=(const SparseVector& other)\n    {\n      if (other.isRValue())\n      {\n        swap(other.const_cast_derived());\n      }\n      else\n      {\n        resize(other.size());\n        m_data = other.m_data;\n      }\n      return *this;\n    }\n\n    template<typename OtherDerived>\n    inline SparseVector& operator=(const SparseMatrixBase<OtherDerived>& other)\n    {\n      SparseVector tmp(other.size());\n      internal::sparse_vector_assign_selector<SparseVector,OtherDerived>::run(tmp,other.derived());\n      this->swap(tmp);\n      return *this;\n    }\n\n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    template<typename Lhs, typename Rhs>\n    inline SparseVector& operator=(const SparseSparseProduct<Lhs,Rhs>& product)\n    {\n      return Base::operator=(product);\n    }\n    #endif\n\n    friend std::ostream & operator << (std::ostream & s, const SparseVector& m)\n    {\n      for (Index i=0; i<m.nonZeros(); ++i)\n        s << \"(\" << m.m_data.value(i) << \",\" << m.m_data.index(i) << \") \";\n      s << std::endl;\n      return s;\n    }\n\n    /** Destructor */\n    inline ~SparseVector() {}\n\n    /** Overloaded for performance */\n    Scalar sum() const;\n\n  public:\n\n    /** \\internal \\deprecated use setZero() and reserve() */\n    EIGEN_DEPRECATED void startFill(Index reserve)\n    {\n      setZero();\n      m_data.reserve(reserve);\n    }\n\n    /** \\internal \\deprecated use insertBack(Index,Index) */\n    EIGEN_DEPRECATED Scalar& fill(Index r, Index c)\n    {\n      eigen_assert(r==0 || c==0);\n      return fill(IsColVector ? r : c);\n    }\n\n    /** \\internal \\deprecated use insertBack(Index) */\n    EIGEN_DEPRECATED Scalar& fill(Index i)\n    {\n      m_data.append(0, i);\n      return m_data.value(m_data.size()-1);\n    }\n\n    /** \\internal \\deprecated use insert(Index,Index) */\n    EIGEN_DEPRECATED Scalar& fillrand(Index r, Index c)\n    {\n      eigen_assert(r==0 || c==0);\n      return fillrand(IsColVector ? r : c);\n    }\n\n    /** \\internal \\deprecated use insert(Index) */\n    EIGEN_DEPRECATED Scalar& fillrand(Index i)\n    {\n      return insert(i);\n    }\n\n    /** \\internal \\deprecated use finalize() */\n    EIGEN_DEPRECATED void endFill() {}\n    \n    // These two functions were here in the 3.1 release, so let's keep them in case some code rely on them.\n    /** \\internal \\deprecated use data() */\n    EIGEN_DEPRECATED Storage& _data() { return m_data; }\n    /** \\internal \\deprecated use data() */\n    EIGEN_DEPRECATED const Storage& _data() const { return m_data; }\n    \n#   ifdef EIGEN_SPARSEVECTOR_PLUGIN\n#     include EIGEN_SPARSEVECTOR_PLUGIN\n#   endif\n\nprotected:\n  \n    static void check_template_parameters()\n    {\n      EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);\n      EIGEN_STATIC_ASSERT((_Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS);\n    }\n    \n    Storage m_data;\n    Index m_size;\n};\n\nnamespace internal {\n\ntemplate<typename _Scalar, int _Options, typename _Index>\nstruct evaluator<SparseVector<_Scalar,_Options,_Index> >\n  : evaluator_base<SparseVector<_Scalar,_Options,_Index> >\n{\n  typedef SparseVector<_Scalar,_Options,_Index> SparseVectorType;\n  typedef evaluator_base<SparseVectorType> Base;\n  typedef typename SparseVectorType::InnerIterator InnerIterator;\n  typedef typename SparseVectorType::ReverseInnerIterator ReverseInnerIterator;\n  \n  enum {\n    CoeffReadCost = NumTraits<_Scalar>::ReadCost,\n    Flags = SparseVectorType::Flags\n  };\n\n  evaluator() : Base() {}\n  \n  explicit evaluator(const SparseVectorType &mat) : m_matrix(&mat)\n  {\n    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);\n  }\n  \n  inline Index nonZerosEstimate() const {\n    return m_matrix->nonZeros();\n  }\n  \n  operator SparseVectorType&() { return m_matrix->const_cast_derived(); }\n  operator const SparseVectorType&() const { return *m_matrix; }\n  \n  const SparseVectorType *m_matrix;\n};\n\ntemplate< typename Dest, typename Src>\nstruct sparse_vector_assign_selector<Dest,Src,SVA_Inner> {\n  static void run(Dest& dst, const Src& src) {\n    eigen_internal_assert(src.innerSize()==src.size());\n    typedef internal::evaluator<Src> SrcEvaluatorType;\n    SrcEvaluatorType srcEval(src);\n    for(typename SrcEvaluatorType::InnerIterator it(srcEval, 0); it; ++it)\n      dst.insert(it.index()) = it.value();\n  }\n};\n\ntemplate< typename Dest, typename Src>\nstruct sparse_vector_assign_selector<Dest,Src,SVA_Outer> {\n  static void run(Dest& dst, const Src& src) {\n    eigen_internal_assert(src.outerSize()==src.size());\n    typedef internal::evaluator<Src> SrcEvaluatorType;\n    SrcEvaluatorType srcEval(src);\n    for(Index i=0; i<src.size(); ++i)\n    {\n      typename SrcEvaluatorType::InnerIterator it(srcEval, i);\n      if(it)\n        dst.insert(i) = it.value();\n    }\n  }\n};\n\ntemplate< typename Dest, typename Src>\nstruct sparse_vector_assign_selector<Dest,Src,SVA_RuntimeSwitch> {\n  static void run(Dest& dst, const Src& src) {\n    if(src.outerSize()==1)  sparse_vector_assign_selector<Dest,Src,SVA_Inner>::run(dst, src);\n    else                    sparse_vector_assign_selector<Dest,Src,SVA_Outer>::run(dst, src);\n  }\n};\n\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSEVECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/SparseView.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2010 Daniel Lowengrub <lowdanie@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSEVIEW_H\n#define EIGEN_SPARSEVIEW_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename MatrixType>\nstruct traits<SparseView<MatrixType> > : traits<MatrixType>\n{\n  typedef typename MatrixType::StorageIndex StorageIndex;\n  typedef Sparse StorageKind;\n  enum {\n    Flags = int(traits<MatrixType>::Flags) & (RowMajorBit)\n  };\n};\n\n} // end namespace internal\n\n/** \\ingroup SparseCore_Module\n  * \\class SparseView\n  *\n  * \\brief Expression of a dense or sparse matrix with zero or too small values removed\n  *\n  * \\tparam MatrixType the type of the object of which we are removing the small entries\n  *\n  * This class represents an expression of a given dense or sparse matrix with\n  * entries smaller than \\c reference * \\c epsilon are removed.\n  * It is the return type of MatrixBase::sparseView() and SparseMatrixBase::pruned()\n  * and most of the time this is the only way it is used.\n  *\n  * \\sa MatrixBase::sparseView(), SparseMatrixBase::pruned()\n  */\ntemplate<typename MatrixType>\nclass SparseView : public SparseMatrixBase<SparseView<MatrixType> >\n{\n  typedef typename MatrixType::Nested MatrixTypeNested;\n  typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;\n  typedef SparseMatrixBase<SparseView > Base;\npublic:\n  EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView)\n  typedef typename internal::remove_all<MatrixType>::type NestedExpression;\n\n  explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0),\n                      const RealScalar &epsilon = NumTraits<Scalar>::dummy_precision())\n    : m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {}\n\n  inline Index rows() const { return m_matrix.rows(); }\n  inline Index cols() const { return m_matrix.cols(); }\n\n  inline Index innerSize() const { return m_matrix.innerSize(); }\n  inline Index outerSize() const { return m_matrix.outerSize(); }\n  \n  /** \\returns the nested expression */\n  const typename internal::remove_all<MatrixTypeNested>::type&\n  nestedExpression() const { return m_matrix; }\n  \n  Scalar reference() const { return m_reference; }\n  RealScalar epsilon() const { return m_epsilon; }\n  \nprotected:\n  MatrixTypeNested m_matrix;\n  Scalar m_reference;\n  RealScalar m_epsilon;\n};\n\nnamespace internal {\n\n// TODO find a way to unify the two following variants\n// This is tricky because implementing an inner iterator on top of an IndexBased evaluator is\n// not easy because the evaluators do not expose the sizes of the underlying expression.\n  \ntemplate<typename ArgType>\nstruct unary_evaluator<SparseView<ArgType>, IteratorBased>\n  : public evaluator_base<SparseView<ArgType> >\n{\n    typedef typename evaluator<ArgType>::InnerIterator EvalIterator;\n  public:\n    typedef SparseView<ArgType> XprType;\n    \n    class InnerIterator : public EvalIterator\n    {\n      protected:\n        typedef typename XprType::Scalar Scalar;\n      public:\n\n        EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, Index outer)\n          : EvalIterator(sve.m_argImpl,outer), m_view(sve.m_view)\n        {\n          incrementToNonZero();\n        }\n\n        EIGEN_STRONG_INLINE InnerIterator& operator++()\n        {\n          EvalIterator::operator++();\n          incrementToNonZero();\n          return *this;\n        }\n\n        using EvalIterator::value;\n\n      protected:\n        const XprType &m_view;\n\n      private:\n        void incrementToNonZero()\n        {\n          while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.reference(), m_view.epsilon()))\n          {\n            EvalIterator::operator++();\n          }\n        }\n    };\n    \n    enum {\n      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n      Flags = XprType::Flags\n    };\n    \n    explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {}\n\n  protected:\n    evaluator<ArgType> m_argImpl;\n    const XprType &m_view;\n};\n\ntemplate<typename ArgType>\nstruct unary_evaluator<SparseView<ArgType>, IndexBased>\n  : public evaluator_base<SparseView<ArgType> >\n{\n  public:\n    typedef SparseView<ArgType> XprType;\n  protected:\n    enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };\n    typedef typename XprType::Scalar Scalar;\n    typedef typename XprType::StorageIndex StorageIndex;\n  public:\n    \n    class InnerIterator\n    {\n      public:\n\n        EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, Index outer)\n          : m_sve(sve), m_inner(0), m_outer(outer), m_end(sve.m_view.innerSize())\n        {\n          incrementToNonZero();\n        }\n\n        EIGEN_STRONG_INLINE InnerIterator& operator++()\n        {\n          m_inner++;\n          incrementToNonZero();\n          return *this;\n        }\n\n        EIGEN_STRONG_INLINE Scalar value() const\n        {\n          return (IsRowMajor) ? m_sve.m_argImpl.coeff(m_outer, m_inner)\n                              : m_sve.m_argImpl.coeff(m_inner, m_outer);\n        }\n\n        EIGEN_STRONG_INLINE StorageIndex index() const { return m_inner; }\n        inline Index row() const { return IsRowMajor ? m_outer : index(); }\n        inline Index col() const { return IsRowMajor ? index() : m_outer; }\n\n        EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }\n\n      protected:\n        const unary_evaluator &m_sve;\n        Index m_inner;\n        const Index m_outer;\n        const Index m_end;\n\n      private:\n        void incrementToNonZero()\n        {\n          while((bool(*this)) && internal::isMuchSmallerThan(value(), m_sve.m_view.reference(), m_sve.m_view.epsilon()))\n          {\n            m_inner++;\n          }\n        }\n    };\n    \n    enum {\n      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,\n      Flags = XprType::Flags\n    };\n    \n    explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {}\n\n  protected:\n    evaluator<ArgType> m_argImpl;\n    const XprType &m_view;\n};\n\n} // end namespace internal\n\n/** \\ingroup SparseCore_Module\n  *\n  * \\returns a sparse expression of the dense expression \\c *this with values smaller than\n  * \\a reference * \\a epsilon removed.\n  *\n  * This method is typically used when prototyping to convert a quickly assembled dense Matrix \\c D to a SparseMatrix \\c S:\n  * \\code\n  * MatrixXd D(n,m);\n  * SparseMatrix<double> S;\n  * S = D.sparseView();             // suppress numerical zeros (exact)\n  * S = D.sparseView(reference);\n  * S = D.sparseView(reference,epsilon);\n  * \\endcode\n  * where \\a reference is a meaningful non zero reference value,\n  * and \\a epsilon is a tolerance factor defaulting to NumTraits<Scalar>::dummy_precision().\n  *\n  * \\sa SparseMatrixBase::pruned(), class SparseView */\ntemplate<typename Derived>\nconst SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& reference,\n                                                          const typename NumTraits<Scalar>::Real& epsilon) const\n{\n  return SparseView<Derived>(derived(), reference, epsilon);\n}\n\n/** \\returns an expression of \\c *this with values smaller than\n  * \\a reference * \\a epsilon removed.\n  *\n  * This method is typically used in conjunction with the product of two sparse matrices\n  * to automatically prune the smallest values as follows:\n  * \\code\n  * C = (A*B).pruned();             // suppress numerical zeros (exact)\n  * C = (A*B).pruned(ref);\n  * C = (A*B).pruned(ref,epsilon);\n  * \\endcode\n  * where \\c ref is a meaningful non zero reference value.\n  * */\ntemplate<typename Derived>\nconst SparseView<Derived>\nSparseMatrixBase<Derived>::pruned(const Scalar& reference,\n                                  const RealScalar& epsilon) const\n{\n  return SparseView<Derived>(derived(), reference, epsilon);\n}\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseCore/TriangularSolver.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSETRIANGULARSOLVER_H\n#define EIGEN_SPARSETRIANGULARSOLVER_H\n\nnamespace Eigen { \n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, int Mode,\n  int UpLo = (Mode & Lower)\n           ? Lower\n           : (Mode & Upper)\n           ? Upper\n           : -1,\n  int StorageOrder = int(traits<Lhs>::Flags) & RowMajorBit>\nstruct sparse_solve_triangular_selector;\n\n// forward substitution, row-major\ntemplate<typename Lhs, typename Rhs, int Mode>\nstruct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor>\n{\n  typedef typename Rhs::Scalar Scalar;\n  typedef evaluator<Lhs> LhsEval;\n  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;\n  static void run(const Lhs& lhs, Rhs& other)\n  {\n    LhsEval lhsEval(lhs);\n    for(Index col=0 ; col<other.cols() ; ++col)\n    {\n      for(Index i=0; i<lhs.rows(); ++i)\n      {\n        Scalar tmp = other.coeff(i,col);\n        Scalar lastVal(0);\n        Index lastIndex = 0;\n        for(LhsIterator it(lhsEval, i); it; ++it)\n        {\n          lastVal = it.value();\n          lastIndex = it.index();\n          if(lastIndex==i)\n            break;\n          tmp -= lastVal * other.coeff(lastIndex,col);\n        }\n        if (Mode & UnitDiag)\n          other.coeffRef(i,col) = tmp;\n        else\n        {\n          eigen_assert(lastIndex==i);\n          other.coeffRef(i,col) = tmp/lastVal;\n        }\n      }\n    }\n  }\n};\n\n// backward substitution, row-major\ntemplate<typename Lhs, typename Rhs, int Mode>\nstruct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>\n{\n  typedef typename Rhs::Scalar Scalar;\n  typedef evaluator<Lhs> LhsEval;\n  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;\n  static void run(const Lhs& lhs, Rhs& other)\n  {\n    LhsEval lhsEval(lhs);\n    for(Index col=0 ; col<other.cols() ; ++col)\n    {\n      for(Index i=lhs.rows()-1 ; i>=0 ; --i)\n      {\n        Scalar tmp = other.coeff(i,col);\n        Scalar l_ii(0);\n        LhsIterator it(lhsEval, i);\n        while(it && it.index()<i)\n          ++it;\n        if(!(Mode & UnitDiag))\n        {\n          eigen_assert(it && it.index()==i);\n          l_ii = it.value();\n          ++it;\n        }\n        else if (it && it.index() == i)\n          ++it;\n        for(; it; ++it)\n        {\n          tmp -= it.value() * other.coeff(it.index(),col);\n        }\n\n        if (Mode & UnitDiag)  other.coeffRef(i,col) = tmp;\n        else                  other.coeffRef(i,col) = tmp/l_ii;\n      }\n    }\n  }\n};\n\n// forward substitution, col-major\ntemplate<typename Lhs, typename Rhs, int Mode>\nstruct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor>\n{\n  typedef typename Rhs::Scalar Scalar;\n  typedef evaluator<Lhs> LhsEval;\n  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;\n  static void run(const Lhs& lhs, Rhs& other)\n  {\n    LhsEval lhsEval(lhs);\n    for(Index col=0 ; col<other.cols() ; ++col)\n    {\n      for(Index i=0; i<lhs.cols(); ++i)\n      {\n        Scalar& tmp = other.coeffRef(i,col);\n        if (tmp!=Scalar(0)) // optimization when other is actually sparse\n        {\n          LhsIterator it(lhsEval, i);\n          while(it && it.index()<i)\n            ++it;\n          if(!(Mode & UnitDiag))\n          {\n            eigen_assert(it && it.index()==i);\n            tmp /= it.value();\n          }\n          if (it && it.index()==i)\n            ++it;\n          for(; it; ++it)\n            other.coeffRef(it.index(), col) -= tmp * it.value();\n        }\n      }\n    }\n  }\n};\n\n// backward substitution, col-major\ntemplate<typename Lhs, typename Rhs, int Mode>\nstruct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>\n{\n  typedef typename Rhs::Scalar Scalar;\n  typedef evaluator<Lhs> LhsEval;\n  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;\n  static void run(const Lhs& lhs, Rhs& other)\n  {\n    LhsEval lhsEval(lhs);\n    for(Index col=0 ; col<other.cols() ; ++col)\n    {\n      for(Index i=lhs.cols()-1; i>=0; --i)\n      {\n        Scalar& tmp = other.coeffRef(i,col);\n        if (tmp!=Scalar(0)) // optimization when other is actually sparse\n        {\n          if(!(Mode & UnitDiag))\n          {\n            // TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements\n            LhsIterator it(lhsEval, i);\n            while(it && it.index()!=i)\n              ++it;\n            eigen_assert(it && it.index()==i);\n            other.coeffRef(i,col) /= it.value();\n          }\n          LhsIterator it(lhsEval, i);\n          for(; it && it.index()<i; ++it)\n            other.coeffRef(it.index(), col) -= tmp * it.value();\n        }\n      }\n    }\n  }\n};\n\n} // end namespace internal\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n\ntemplate<typename ExpressionType,unsigned int Mode>\ntemplate<typename OtherDerived>\nvoid TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(MatrixBase<OtherDerived>& other) const\n{\n  eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows());\n  eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));\n\n  enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };\n\n  typedef typename internal::conditional<copy,\n    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;\n  OtherCopy otherCopy(other.derived());\n\n  internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(derived().nestedExpression(), otherCopy);\n\n  if (copy)\n    other = otherCopy;\n}\n#endif\n\n// pure sparse path\n\nnamespace internal {\n\ntemplate<typename Lhs, typename Rhs, int Mode,\n  int UpLo = (Mode & Lower)\n           ? Lower\n           : (Mode & Upper)\n           ? Upper\n           : -1,\n  int StorageOrder = int(Lhs::Flags) & (RowMajorBit)>\nstruct sparse_solve_triangular_sparse_selector;\n\n// forward substitution, col-major\ntemplate<typename Lhs, typename Rhs, int Mode, int UpLo>\nstruct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor>\n{\n  typedef typename Rhs::Scalar Scalar;\n  typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,\n                                      typename traits<Rhs>::StorageIndex>::type StorageIndex;\n  static void run(const Lhs& lhs, Rhs& other)\n  {\n    const bool IsLower = (UpLo==Lower);\n    AmbiVector<Scalar,StorageIndex> tempVector(other.rows()*2);\n    tempVector.setBounds(0,other.rows());\n\n    Rhs res(other.rows(), other.cols());\n    res.reserve(other.nonZeros());\n\n    for(Index col=0 ; col<other.cols() ; ++col)\n    {\n      // FIXME estimate number of non zeros\n      tempVector.init(.99/*float(other.col(col).nonZeros())/float(other.rows())*/);\n      tempVector.setZero();\n      tempVector.restart();\n      for (typename Rhs::InnerIterator rhsIt(other, col); rhsIt; ++rhsIt)\n      {\n        tempVector.coeffRef(rhsIt.index()) = rhsIt.value();\n      }\n\n      for(Index i=IsLower?0:lhs.cols()-1;\n          IsLower?i<lhs.cols():i>=0;\n          i+=IsLower?1:-1)\n      {\n        tempVector.restart();\n        Scalar& ci = tempVector.coeffRef(i);\n        if (ci!=Scalar(0))\n        {\n          // find\n          typename Lhs::InnerIterator it(lhs, i);\n          if(!(Mode & UnitDiag))\n          {\n            if (IsLower)\n            {\n              eigen_assert(it.index()==i);\n              ci /= it.value();\n            }\n            else\n              ci /= lhs.coeff(i,i);\n          }\n          tempVector.restart();\n          if (IsLower)\n          {\n            if (it.index()==i)\n              ++it;\n            for(; it; ++it)\n              tempVector.coeffRef(it.index()) -= ci * it.value();\n          }\n          else\n          {\n            for(; it && it.index()<i; ++it)\n              tempVector.coeffRef(it.index()) -= ci * it.value();\n          }\n        }\n      }\n\n\n      Index count = 0;\n      // FIXME compute a reference value to filter zeros\n      for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector/*,1e-12*/); it; ++it)\n      {\n        ++ count;\n//         std::cerr << \"fill \" << it.index() << \", \" << col << \"\\n\";\n//         std::cout << it.value() << \"  \";\n        // FIXME use insertBack\n        res.insert(it.index(), col) = it.value();\n      }\n//       std::cout << \"tempVector.nonZeros() == \" << int(count) << \" / \" << (other.rows()) << \"\\n\";\n    }\n    res.finalize();\n    other = res.markAsRValue();\n  }\n};\n\n} // end namespace internal\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename ExpressionType,unsigned int Mode>\ntemplate<typename OtherDerived>\nvoid TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const\n{\n  eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows());\n  eigen_assert( (!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));\n\n//   enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };\n\n//   typedef typename internal::conditional<copy,\n//     typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;\n//   OtherCopy otherCopy(other.derived());\n\n  internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(derived().nestedExpression(), other.derived());\n\n//   if (copy)\n//     other = otherCopy;\n}\n#endif\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSETRIANGULARSOLVER_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_SPARSE_LU_H\n#define EIGEN_SPARSE_LU_H\n\nnamespace Eigen {\n\ntemplate <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;\ntemplate <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;\ntemplate <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;\n\ntemplate <bool Conjugate,class SparseLUType>\nclass SparseLUTransposeView : public SparseSolverBase<SparseLUTransposeView<Conjugate,SparseLUType> >\n{\nprotected:\n  typedef SparseSolverBase<SparseLUTransposeView<Conjugate,SparseLUType> > APIBase;\n  using APIBase::m_isInitialized;\npublic:\n  typedef typename SparseLUType::Scalar Scalar;\n  typedef typename SparseLUType::StorageIndex StorageIndex;\n  typedef typename SparseLUType::MatrixType MatrixType;\n  typedef typename SparseLUType::OrderingType OrderingType;\n\n  enum {\n    ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n  };\n\n  SparseLUTransposeView() : m_sparseLU(NULL) {}\n  SparseLUTransposeView(const SparseLUTransposeView& view) {\n    this->m_sparseLU = view.m_sparseLU;\n  }\n  void setIsInitialized(const bool isInitialized) {this->m_isInitialized = isInitialized;}\n  void setSparseLU(SparseLUType* sparseLU) {m_sparseLU = sparseLU;}\n  using APIBase::_solve_impl;\n  template<typename Rhs, typename Dest>\n  bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const\n  {\n    Dest& X(X_base.derived());\n    eigen_assert(m_sparseLU->info() == Success && \"The matrix should be factorized first\");\n    EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,\n                        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n\n\n    // this ugly const_cast_derived() helps to detect aliasing when applying the permutations\n    for(Index j = 0; j < B.cols(); ++j){\n      X.col(j) = m_sparseLU->colsPermutation() * B.const_cast_derived().col(j);\n    }\n    //Forward substitution with transposed or adjoint of U\n    m_sparseLU->matrixU().template solveTransposedInPlace<Conjugate>(X);\n\n    //Backward substitution with transposed or adjoint of L\n    m_sparseLU->matrixL().template solveTransposedInPlace<Conjugate>(X);\n\n    // Permute back the solution\n    for (Index j = 0; j < B.cols(); ++j)\n      X.col(j) = m_sparseLU->rowsPermutation().transpose() * X.col(j);\n    return true;\n  }\n  inline Index rows() const { return m_sparseLU->rows(); }\n  inline Index cols() const { return m_sparseLU->cols(); }\n\nprivate:\n  SparseLUType *m_sparseLU;\n  SparseLUTransposeView& operator=(const SparseLUTransposeView&);\n};\n\n\n/** \\ingroup SparseLU_Module\n  * \\class SparseLU\n  * \n  * \\brief Sparse supernodal LU factorization for general matrices\n  * \n  * This class implements the supernodal LU factorization for general matrices.\n  * It uses the main techniques from the sequential SuperLU package \n  * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real \n  * and complex arithmetic with single and double precision, depending on the \n  * scalar type of your input matrix. \n  * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. \n  * It benefits directly from the built-in high-performant Eigen BLAS routines. \n  * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to \n  * enable a better optimization from the compiler. For best performance, \n  * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors. \n  * \n  * An important parameter of this class is the ordering method. It is used to reorder the columns \n  * (and eventually the rows) of the matrix to reduce the number of new elements that are created during \n  * numerical factorization. The cheapest method available is COLAMD. \n  * See  \\link OrderingMethods_Module the OrderingMethods module \\endlink for the list of \n  * built-in and external ordering methods. \n  *\n  * Simple example with key steps \n  * \\code\n  * VectorXd x(n), b(n);\n  * SparseMatrix<double> A;\n  * SparseLU<SparseMatrix<double>, COLAMDOrdering<int> >   solver;\n  * // fill A and b;\n  * // Compute the ordering permutation vector from the structural pattern of A\n  * solver.analyzePattern(A); \n  * // Compute the numerical factorization \n  * solver.factorize(A); \n  * //Use the factors to solve the linear system \n  * x = solver.solve(b); \n  * \\endcode\n  * \n  * \\warning The input matrix A should be in a \\b compressed and \\b column-major form.\n  * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.\n  * \n  * \\note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. \n  * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. \n  * If this is the case for your matrices, you can try the basic scaling method at\n  *  \"unsupported/Eigen/src/IterativeSolvers/Scaling.h\"\n  * \n  * \\tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>\n  * \\tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD\n  *\n  * \\implsparsesolverconcept\n  * \n  * \\sa \\ref TutorialSparseSolverConcept\n  * \\sa \\ref OrderingMethods_Module\n  */\ntemplate <typename _MatrixType, typename _OrderingType>\nclass SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>\n{\n  protected:\n    typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;\n    using APIBase::m_isInitialized;\n  public:\n    using APIBase::_solve_impl;\n    \n    typedef _MatrixType MatrixType; \n    typedef _OrderingType OrderingType;\n    typedef typename MatrixType::Scalar Scalar; \n    typedef typename MatrixType::RealScalar RealScalar; \n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;\n    typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;\n    typedef Matrix<Scalar,Dynamic,1> ScalarVector;\n    typedef Matrix<StorageIndex,Dynamic,1> IndexVector;\n    typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;\n    typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;\n\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    \n  public:\n\n    SparseLU():m_lastError(\"\"),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)\n    {\n      initperfvalues(); \n    }\n    explicit SparseLU(const MatrixType& matrix)\n      : m_lastError(\"\"),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)\n    {\n      initperfvalues(); \n      compute(matrix);\n    }\n    \n    ~SparseLU()\n    {\n      // Free all explicit dynamic pointers \n    }\n    \n    void analyzePattern (const MatrixType& matrix);\n    void factorize (const MatrixType& matrix);\n    void simplicialfactorize(const MatrixType& matrix);\n    \n    /**\n      * Compute the symbolic and numeric factorization of the input sparse matrix.\n      * The input matrix should be in column-major storage. \n      */\n    void compute (const MatrixType& matrix)\n    {\n      // Analyze \n      analyzePattern(matrix); \n      //Factorize\n      factorize(matrix);\n    } \n\n    /** \\returns an expression of the transposed of the factored matrix.\n      *\n      * A typical usage is to solve for the transposed problem A^T x = b:\n      * \\code\n      * solver.compute(A);\n      * x = solver.transpose().solve(b);\n      * \\endcode\n      *\n      * \\sa adjoint(), solve()\n      */\n    const SparseLUTransposeView<false,SparseLU<_MatrixType,_OrderingType> > transpose()\n    {\n      SparseLUTransposeView<false,  SparseLU<_MatrixType,_OrderingType> > transposeView;\n      transposeView.setSparseLU(this);\n      transposeView.setIsInitialized(this->m_isInitialized);\n      return transposeView;\n    }\n\n\n    /** \\returns an expression of the adjoint of the factored matrix\n      *\n      * A typical usage is to solve for the adjoint problem A' x = b:\n      * \\code\n      * solver.compute(A);\n      * x = solver.adjoint().solve(b);\n      * \\endcode\n      *\n      * For real scalar types, this function is equivalent to transpose().\n      *\n      * \\sa transpose(), solve()\n      */\n    const SparseLUTransposeView<true, SparseLU<_MatrixType,_OrderingType> > adjoint()\n    {\n      SparseLUTransposeView<true,  SparseLU<_MatrixType,_OrderingType> > adjointView;\n      adjointView.setSparseLU(this);\n      adjointView.setIsInitialized(this->m_isInitialized);\n      return adjointView;\n    }\n    \n    inline Index rows() const { return m_mat.rows(); }\n    inline Index cols() const { return m_mat.cols(); }\n    /** Indicate that the pattern of the input matrix is symmetric */\n    void isSymmetric(bool sym)\n    {\n      m_symmetricmode = sym;\n    }\n    \n    /** \\returns an expression of the matrix L, internally stored as supernodes\n      * The only operation available with this expression is the triangular solve\n      * \\code\n      * y = b; matrixL().solveInPlace(y);\n      * \\endcode\n      */\n    SparseLUMatrixLReturnType<SCMatrix> matrixL() const\n    {\n      return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);\n    }\n    /** \\returns an expression of the matrix U,\n      * The only operation available with this expression is the triangular solve\n      * \\code\n      * y = b; matrixU().solveInPlace(y);\n      * \\endcode\n      */\n    SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const\n    {\n      return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);\n    }\n\n    /**\n      * \\returns a reference to the row matrix permutation \\f$ P_r \\f$ such that \\f$P_r A P_c^T = L U\\f$\n      * \\sa colsPermutation()\n      */\n    inline const PermutationType& rowsPermutation() const\n    {\n      return m_perm_r;\n    }\n    /**\n      * \\returns a reference to the column matrix permutation\\f$ P_c^T \\f$ such that \\f$P_r A P_c^T = L U\\f$\n      * \\sa rowsPermutation()\n      */\n    inline const PermutationType& colsPermutation() const\n    {\n      return m_perm_c;\n    }\n    /** Set the threshold used for a diagonal entry to be an acceptable pivot. */\n    void setPivotThreshold(const RealScalar& thresh)\n    {\n      m_diagpivotthresh = thresh; \n    }\n\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n    /** \\returns the solution X of \\f$ A X = B \\f$ using the current decomposition of A.\n      *\n      * \\warning the destination matrix X in X = this->solve(B) must be colmun-major.\n      *\n      * \\sa compute()\n      */\n    template<typename Rhs>\n    inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;\n#endif // EIGEN_PARSED_BY_DOXYGEN\n    \n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance\n      *          \\c InvalidInput if the input matrix is invalid\n      *\n      * \\sa iparm()          \n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n    \n    /**\n      * \\returns A string describing the type of error\n      */\n    std::string lastErrorMessage() const\n    {\n      return m_lastError; \n    }\n\n    template<typename Rhs, typename Dest>\n    bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const\n    {\n      Dest& X(X_base.derived());\n      eigen_assert(m_factorizationIsOk && \"The matrix should be factorized first\");\n      EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,\n                        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);\n      \n      // Permute the right hand side to form X = Pr*B\n      // on return, X is overwritten by the computed solution\n      X.resize(B.rows(),B.cols());\n\n      // this ugly const_cast_derived() helps to detect aliasing when applying the permutations\n      for(Index j = 0; j < B.cols(); ++j)\n        X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);\n      \n      //Forward substitution with L\n      this->matrixL().solveInPlace(X);\n      this->matrixU().solveInPlace(X);\n      \n      // Permute back the solution \n      for (Index j = 0; j < B.cols(); ++j)\n        X.col(j) = colsPermutation().inverse() * X.col(j);\n      \n      return true; \n    }\n    \n    /**\n      * \\returns the absolute value of the determinant of the matrix of which\n      * *this is the QR decomposition.\n      *\n      * \\warning a determinant can be very big or small, so for matrices\n      * of large enough dimension, there is a risk of overflow/underflow.\n      * One way to work around that is to use logAbsDeterminant() instead.\n      *\n      * \\sa logAbsDeterminant(), signDeterminant()\n      */\n    Scalar absDeterminant()\n    {\n      using std::abs;\n      eigen_assert(m_factorizationIsOk && \"The matrix should be factorized first.\");\n      // Initialize with the determinant of the row matrix\n      Scalar det = Scalar(1.);\n      // Note that the diagonal blocks of U are stored in supernodes,\n      // which are available in the  L part :)\n      for (Index j = 0; j < this->cols(); ++j)\n      {\n        for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)\n        {\n          if(it.index() == j)\n          {\n            det *= abs(it.value());\n            break;\n          }\n        }\n      }\n      return det;\n    }\n\n    /** \\returns the natural log of the absolute value of the determinant of the matrix\n      * of which **this is the QR decomposition\n      *\n      * \\note This method is useful to work around the risk of overflow/underflow that's\n      * inherent to the determinant computation.\n      *\n      * \\sa absDeterminant(), signDeterminant()\n      */\n    Scalar logAbsDeterminant() const\n    {\n      using std::log;\n      using std::abs;\n\n      eigen_assert(m_factorizationIsOk && \"The matrix should be factorized first.\");\n      Scalar det = Scalar(0.);\n      for (Index j = 0; j < this->cols(); ++j)\n      {\n        for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)\n        {\n          if(it.row() < j) continue;\n          if(it.row() == j)\n          {\n            det += log(abs(it.value()));\n            break;\n          }\n        }\n      }\n      return det;\n    }\n\n    /** \\returns A number representing the sign of the determinant\n      *\n      * \\sa absDeterminant(), logAbsDeterminant()\n      */\n    Scalar signDeterminant()\n    {\n      eigen_assert(m_factorizationIsOk && \"The matrix should be factorized first.\");\n      // Initialize with the determinant of the row matrix\n      Index det = 1;\n      // Note that the diagonal blocks of U are stored in supernodes,\n      // which are available in the  L part :)\n      for (Index j = 0; j < this->cols(); ++j)\n      {\n        for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)\n        {\n          if(it.index() == j)\n          {\n            if(it.value()<0)\n              det = -det;\n            else if(it.value()==0)\n              return 0;\n            break;\n          }\n        }\n      }\n      return det * m_detPermR * m_detPermC;\n    }\n    \n    /** \\returns The determinant of the matrix.\n      *\n      * \\sa absDeterminant(), logAbsDeterminant()\n      */\n    Scalar determinant()\n    {\n      eigen_assert(m_factorizationIsOk && \"The matrix should be factorized first.\");\n      // Initialize with the determinant of the row matrix\n      Scalar det = Scalar(1.);\n      // Note that the diagonal blocks of U are stored in supernodes,\n      // which are available in the  L part :)\n      for (Index j = 0; j < this->cols(); ++j)\n      {\n        for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)\n        {\n          if(it.index() == j)\n          {\n            det *= it.value();\n            break;\n          }\n        }\n      }\n      return (m_detPermR * m_detPermC) > 0 ? det : -det;\n    }\n\n    Index nnzL() const { return m_nnzL; };\n    Index nnzU() const { return m_nnzU; };\n\n  protected:\n    // Functions \n    void initperfvalues()\n    {\n      m_perfv.panel_size = 16;\n      m_perfv.relax = 1; \n      m_perfv.maxsuper = 128; \n      m_perfv.rowblk = 16; \n      m_perfv.colblk = 8; \n      m_perfv.fillfactor = 20;  \n    }\n      \n    // Variables \n    mutable ComputationInfo m_info;\n    bool m_factorizationIsOk;\n    bool m_analysisIsOk;\n    std::string m_lastError;\n    NCMatrix m_mat; // The input (permuted ) matrix \n    SCMatrix m_Lstore; // The lower triangular matrix (supernodal)\n    MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix\n    PermutationType m_perm_c; // Column permutation \n    PermutationType m_perm_r ; // Row permutation\n    IndexVector m_etree; // Column elimination tree \n    \n    typename Base::GlobalLU_t m_glu; \n                               \n    // SparseLU options \n    bool m_symmetricmode;\n    // values for performance \n    internal::perfvalues m_perfv;\n    RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot\n    Index m_nnzL, m_nnzU; // Nonzeros in L and U factors\n    Index m_detPermR, m_detPermC; // Determinants of the permutation matrices\n  private:\n    // Disable copy constructor \n    SparseLU (const SparseLU& );\n}; // End class SparseLU\n\n\n\n// Functions needed by the anaysis phase\n/** \n  * Compute the column permutation to minimize the fill-in\n  * \n  *  - Apply this permutation to the input matrix - \n  * \n  *  - Compute the column elimination tree on the permuted matrix \n  * \n  *  - Postorder the elimination tree and the column permutation\n  * \n  */\ntemplate <typename MatrixType, typename OrderingType>\nvoid SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)\n{\n  \n  //TODO  It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.\n  \n  // Firstly, copy the whole input matrix. \n  m_mat = mat;\n  \n  // Compute fill-in ordering\n  OrderingType ord; \n  ord(m_mat,m_perm_c);\n  \n  // Apply the permutation to the column of the input  matrix\n  if (m_perm_c.size())\n  {\n    m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  \n    // Then, permute only the column pointers\n    ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);\n    \n    // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.\n    if(!mat.isCompressed()) \n      IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);\n    \n    // Apply the permutation and compute the nnz per column.\n    for (Index i = 0; i < mat.cols(); i++)\n    {\n      m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];\n      m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];\n    }\n  }\n  \n  // Compute the column elimination tree of the permuted matrix \n  IndexVector firstRowElt;\n  internal::coletree(m_mat, m_etree,firstRowElt); \n     \n  // In symmetric mode, do not do postorder here\n  if (!m_symmetricmode) {\n    IndexVector post, iwork; \n    // Post order etree\n    internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post); \n      \n   \n    // Renumber etree in postorder \n    Index m = m_mat.cols(); \n    iwork.resize(m+1);\n    for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));\n    m_etree = iwork;\n    \n    // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree\n    PermutationType post_perm(m); \n    for (Index i = 0; i < m; i++) \n      post_perm.indices()(i) = post(i); \n        \n    // Combine the two permutations : postorder the permutation for future use\n    if(m_perm_c.size()) {\n      m_perm_c = post_perm * m_perm_c;\n    }\n    \n  } // end postordering \n  \n  m_analysisIsOk = true; \n}\n\n// Functions needed by the numerical factorization phase\n\n\n/** \n  *  - Numerical factorization \n  *  - Interleaved with the symbolic factorization \n  * On exit,  info is \n  * \n  *    = 0: successful factorization\n  * \n  *    > 0: if info = i, and i is\n  * \n  *       <= A->ncol: U(i,i) is exactly zero. The factorization has\n  *          been completed, but the factor U is exactly singular,\n  *          and division by zero will occur if it is used to solve a\n  *          system of equations.\n  * \n  *       > A->ncol: number of bytes allocated when memory allocation\n  *         failure occurred, plus A->ncol. If lwork = -1, it is\n  *         the estimated amount of space needed, plus A->ncol.  \n  */\ntemplate <typename MatrixType, typename OrderingType>\nvoid SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)\n{\n  using internal::emptyIdxLU;\n  eigen_assert(m_analysisIsOk && \"analyzePattern() should be called first\"); \n  eigen_assert((matrix.rows() == matrix.cols()) && \"Only for squared matrices\");\n  \n  m_isInitialized = true;\n  \n  // Apply the column permutation computed in analyzepattern()\n  //   m_mat = matrix * m_perm_c.inverse(); \n  m_mat = matrix;\n  if (m_perm_c.size()) \n  {\n    m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.\n    //Then, permute only the column pointers\n    const StorageIndex * outerIndexPtr;\n    if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();\n    else\n    {\n      StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];\n      for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];\n      outerIndexPtr = outerIndexPtr_t;\n    }\n    for (Index i = 0; i < matrix.cols(); i++)\n    {\n      m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];\n      m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];\n    }\n    if(!matrix.isCompressed()) delete[] outerIndexPtr;\n  } \n  else \n  { //FIXME This should not be needed if the empty permutation is handled transparently\n    m_perm_c.resize(matrix.cols());\n    for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;\n  }\n  \n  Index m = m_mat.rows();\n  Index n = m_mat.cols();\n  Index nnz = m_mat.nonZeros();\n  Index maxpanel = m_perfv.panel_size * m;\n  // Allocate working storage common to the factor routines\n  Index lwork = 0;\n  Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); \n  if (info) \n  {\n    m_lastError = \"UNABLE TO ALLOCATE WORKING MEMORY\\n\\n\" ;\n    m_factorizationIsOk = false;\n    return ; \n  }\n  \n  // Set up pointers for integer working arrays \n  IndexVector segrep(m); segrep.setZero();\n  IndexVector parent(m); parent.setZero();\n  IndexVector xplore(m); xplore.setZero();\n  IndexVector repfnz(maxpanel);\n  IndexVector panel_lsub(maxpanel);\n  IndexVector xprune(n); xprune.setZero();\n  IndexVector marker(m*internal::LUNoMarker); marker.setZero();\n  \n  repfnz.setConstant(-1); \n  panel_lsub.setConstant(-1);\n  \n  // Set up pointers for scalar working arrays \n  ScalarVector dense; \n  dense.setZero(maxpanel);\n  ScalarVector tempv; \n  tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );\n  \n  // Compute the inverse of perm_c\n  PermutationType iperm_c(m_perm_c.inverse()); \n  \n  // Identify initial relaxed snodes\n  IndexVector relax_end(n);\n  if ( m_symmetricmode == true ) \n    Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);\n  else\n    Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);\n  \n  \n  m_perm_r.resize(m); \n  m_perm_r.indices().setConstant(-1);\n  marker.setConstant(-1);\n  m_detPermR = 1; // Record the determinant of the row permutation\n  \n  m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);\n  m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);\n  \n  // Work on one 'panel' at a time. A panel is one of the following :\n  //  (a) a relaxed supernode at the bottom of the etree, or\n  //  (b) panel_size contiguous columns, <panel_size> defined by the user\n  Index jcol; \n  Index pivrow; // Pivotal row number in the original row matrix\n  Index nseg1; // Number of segments in U-column above panel row jcol\n  Index nseg; // Number of segments in each U-column \n  Index irep; \n  Index i, k, jj; \n  for (jcol = 0; jcol < n; )\n  {\n    // Adjust panel size so that a panel won't overlap with the next relaxed snode. \n    Index panel_size = m_perfv.panel_size; // upper bound on panel width\n    for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)\n    {\n      if (relax_end(k) != emptyIdxLU) \n      {\n        panel_size = k - jcol; \n        break; \n      }\n    }\n    if (k == n) \n      panel_size = n - jcol; \n      \n    // Symbolic outer factorization on a panel of columns \n    Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); \n    \n    // Numeric sup-panel updates in topological order \n    Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); \n    \n    // Sparse LU within the panel, and below the panel diagonal \n    for ( jj = jcol; jj< jcol + panel_size; jj++) \n    {\n      k = (jj - jcol) * m; // Column index for w-wide arrays \n      \n      nseg = nseg1; // begin after all the panel segments\n      //Depth-first-search for the current column\n      VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);\n      VectorBlock<IndexVector> repfnz_k(repfnz, k, m); \n      info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); \n      if ( info ) \n      {\n        m_lastError =  \"UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \";\n        m_info = NumericalIssue; \n        m_factorizationIsOk = false; \n        return; \n      }\n      // Numeric updates to this column \n      VectorBlock<ScalarVector> dense_k(dense, k, m); \n      VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); \n      info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); \n      if ( info ) \n      {\n        m_lastError = \"UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \";\n        m_info = NumericalIssue; \n        m_factorizationIsOk = false; \n        return; \n      }\n      \n      // Copy the U-segments to ucol(*)\n      info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); \n      if ( info ) \n      {\n        m_lastError = \"UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \";\n        m_info = NumericalIssue; \n        m_factorizationIsOk = false; \n        return; \n      }\n      \n      // Form the L-segment \n      info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);\n      if ( info ) \n      {\n        m_lastError = \"THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT \";\n        std::ostringstream returnInfo;\n        returnInfo << info; \n        m_lastError += returnInfo.str();\n        m_info = NumericalIssue; \n        m_factorizationIsOk = false; \n        return; \n      }\n      \n      // Update the determinant of the row permutation matrix\n      // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.\n      if (pivrow != jj) m_detPermR = -m_detPermR;\n\n      // Prune columns (0:jj-1) using column jj\n      Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); \n      \n      // Reset repfnz for this column \n      for (i = 0; i < nseg; i++)\n      {\n        irep = segrep(i); \n        repfnz_k(irep) = emptyIdxLU; \n      }\n    } // end SparseLU within the panel  \n    jcol += panel_size;  // Move to the next panel\n  } // end for -- end elimination \n  \n  m_detPermR = m_perm_r.determinant();\n  m_detPermC = m_perm_c.determinant();\n  \n  // Count the number of nonzeros in factors \n  Base::countnz(n, m_nnzL, m_nnzU, m_glu); \n  // Apply permutation  to the L subscripts \n  Base::fixupL(n, m_perm_r.indices(), m_glu);\n  \n  // Create supernode matrix L \n  m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); \n  // Create the column major upper sparse matrix  U; \n  new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );\n  \n  m_info = Success;\n  m_factorizationIsOk = true;\n}\n\ntemplate<typename MappedSupernodalType>\nstruct SparseLUMatrixLReturnType : internal::no_assignment_operator\n{\n  typedef typename MappedSupernodalType::Scalar Scalar;\n  explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)\n  { }\n  Index rows() const { return m_mapL.rows(); }\n  Index cols() const { return m_mapL.cols(); }\n  template<typename Dest>\n  void solveInPlace( MatrixBase<Dest> &X) const\n  {\n    m_mapL.solveInPlace(X);\n  }\n  template<bool Conjugate, typename Dest>\n  void solveTransposedInPlace( MatrixBase<Dest> &X) const\n  {\n    m_mapL.template solveTransposedInPlace<Conjugate>(X);\n  }\n\n  const MappedSupernodalType& m_mapL;\n};\n\ntemplate<typename MatrixLType, typename MatrixUType>\nstruct SparseLUMatrixUReturnType : internal::no_assignment_operator\n{\n  typedef typename MatrixLType::Scalar Scalar;\n  SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)\n  : m_mapL(mapL),m_mapU(mapU)\n  { }\n  Index rows() const { return m_mapL.rows(); }\n  Index cols() const { return m_mapL.cols(); }\n\n  template<typename Dest>   void solveInPlace(MatrixBase<Dest> &X) const\n  {\n    Index nrhs = X.cols();\n    Index n    = X.rows();\n    // Backward solve with U\n    for (Index k = m_mapL.nsuper(); k >= 0; k--)\n    {\n      Index fsupc = m_mapL.supToCol()[k];\n      Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension\n      Index nsupc = m_mapL.supToCol()[k+1] - fsupc;\n      Index luptr = m_mapL.colIndexPtr()[fsupc];\n\n      if (nsupc == 1)\n      {\n        for (Index j = 0; j < nrhs; j++)\n        {\n          X(fsupc, j) /= m_mapL.valuePtr()[luptr];\n        }\n      }\n      else\n      {\n        // FIXME: the following lines should use Block expressions and not Map!\n        Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );\n        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X.coeffRef(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );\n        U = A.template triangularView<Upper>().solve(U);\n      }\n\n      for (Index j = 0; j < nrhs; ++j)\n      {\n        for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)\n        {\n          typename MatrixUType::InnerIterator it(m_mapU, jcol);\n          for ( ; it; ++it)\n          {\n            Index irow = it.index();\n            X(irow, j) -= X(jcol, j) * it.value();\n          }\n        }\n      }\n    } // End For U-solve\n  }\n\n  template<bool Conjugate, typename Dest>   void solveTransposedInPlace(MatrixBase<Dest> &X) const\n  {\n    using numext::conj;\n    Index nrhs = X.cols();\n    Index n    = X.rows();\n    // Forward solve with U\n    for (Index k = 0; k <=  m_mapL.nsuper(); k++)\n    {\n      Index fsupc = m_mapL.supToCol()[k];\n      Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension\n      Index nsupc = m_mapL.supToCol()[k+1] - fsupc;\n      Index luptr = m_mapL.colIndexPtr()[fsupc];\n\n      for (Index j = 0; j < nrhs; ++j)\n      {\n        for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)\n        {\n          typename MatrixUType::InnerIterator it(m_mapU, jcol);\n          for ( ; it; ++it)\n          {\n            Index irow = it.index();\n            X(jcol, j) -= X(irow, j) * (Conjugate? conj(it.value()): it.value());\n          }\n        }\n      }\n      if (nsupc == 1)\n      {\n        for (Index j = 0; j < nrhs; j++)\n        {\n          X(fsupc, j) /= (Conjugate? conj(m_mapL.valuePtr()[luptr]) : m_mapL.valuePtr()[luptr]);\n        }\n      }\n      else\n      {\n        Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );\n        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );\n        if(Conjugate)\n          U = A.adjoint().template triangularView<Lower>().solve(U);\n        else\n          U = A.transpose().template triangularView<Lower>().solve(U);\n      }\n    }// End For U-solve\n  }\n\n\n  const MatrixLType& m_mapL;\n  const MatrixUType& m_mapU;\n};\n\n} // End namespace Eigen \n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLUImpl.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n#ifndef SPARSELU_IMPL_H\n#define SPARSELU_IMPL_H\n\nnamespace Eigen {\nnamespace internal {\n  \n/** \\ingroup SparseLU_Module\n  * \\class SparseLUImpl\n  * Base class for sparseLU\n  */\ntemplate <typename Scalar, typename StorageIndex>\nclass SparseLUImpl\n{\n  public:\n    typedef Matrix<Scalar,Dynamic,1> ScalarVector;\n    typedef Matrix<StorageIndex,Dynamic,1> IndexVector; \n    typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> ScalarMatrix;\n    typedef Map<ScalarMatrix, 0,  OuterStride<> > MappedMatrixBlock;\n    typedef typename ScalarVector::RealScalar RealScalar; \n    typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;\n    typedef Ref<Matrix<StorageIndex,Dynamic,1> > BlockIndexVector;\n    typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; \n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> MatrixType; \n    \n  protected:\n     template <typename VectorType>\n     Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions);\n     Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu); \n     template <typename VectorType>\n     Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions);\n     void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); \n     void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); \n     Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu); \n     Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);\n     Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);\n     template <typename Traits>\n     void dfs_kernel(const StorageIndex jj, IndexVector& perm_r,\n                    Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,\n                    Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,\n                    IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);\n     void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);\n    \n     void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);\n     Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);\n     Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu); \n     Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); \n     void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);\n     void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu); \n     void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu); \n     \n     template<typename , typename >\n     friend struct column_dfs_traits;\n}; \n\n} // end namespace internal\n} // namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_Memory.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU \n \n * -- SuperLU routine (version 3.1) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * August 1, 2008\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n\n#ifndef EIGEN_SPARSELU_MEMORY\n#define EIGEN_SPARSELU_MEMORY\n\nnamespace Eigen {\nnamespace internal {\n  \nenum { LUNoMarker = 3 };\nenum {emptyIdxLU = -1};\ninline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)\n{\n  return (std::max)(m, (t+b)*w);\n}\n\ntemplate< typename Scalar>\ninline Index LUTempSpace(Index&m, Index& w)\n{\n  return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);\n}\n\n\n\n\n/** \n  * Expand the existing storage to accommodate more fill-ins\n  * \\param vec Valid pointer to the vector to allocate or expand\n  * \\param[in,out] length  At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector\n  * \\param[in] nbElts Current number of elements in the factors\n  * \\param keep_prev  1: use length  and do not expand the vector; 0: compute new_len and expand\n  * \\param[in,out] num_expansions Number of times the memory has been expanded\n  */\ntemplate <typename Scalar, typename StorageIndex>\ntemplate <typename VectorType>\nIndex  SparseLUImpl<Scalar,StorageIndex>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) \n{\n  \n  float alpha = 1.5; // Ratio of the memory increase \n  Index new_len; // New size of the allocated memory\n  \n  if(num_expansions == 0 || keep_prev) \n    new_len = length ; // First time allocate requested\n  else \n    new_len = (std::max)(length+1,Index(alpha * length));\n  \n  VectorType old_vec; // Temporary vector to hold the previous values   \n  if (nbElts > 0 )\n    old_vec = vec.segment(0,nbElts); \n  \n  //Allocate or expand the current vector\n#ifdef EIGEN_EXCEPTIONS\n  try\n#endif\n  {\n    vec.resize(new_len); \n  }\n#ifdef EIGEN_EXCEPTIONS\n  catch(std::bad_alloc& )\n#else\n  if(!vec.size())\n#endif\n  {\n    if (!num_expansions)\n    {\n      // First time to allocate from LUMemInit()\n      // Let LUMemInit() deals with it.\n      return -1;\n    }\n    if (keep_prev)\n    {\n      // In this case, the memory length should not not be reduced\n      return new_len;\n    }\n    else \n    {\n      // Reduce the size and increase again \n      Index tries = 0; // Number of attempts\n      do \n      {\n        alpha = (alpha + 1)/2;\n        new_len = (std::max)(length+1,Index(alpha * length));\n#ifdef EIGEN_EXCEPTIONS\n        try\n#endif\n        {\n          vec.resize(new_len); \n        }\n#ifdef EIGEN_EXCEPTIONS\n        catch(std::bad_alloc& )\n#else\n        if (!vec.size())\n#endif\n        {\n          tries += 1; \n          if ( tries > 10) return new_len; \n        }\n      } while (!vec.size());\n    }\n  }\n  //Copy the previous values to the newly allocated space \n  if (nbElts > 0)\n    vec.segment(0, nbElts) = old_vec;   \n   \n  \n  length  = new_len;\n  if(num_expansions) ++num_expansions;\n  return 0; \n}\n\n/**\n * \\brief  Allocate various working space for the numerical factorization phase.\n * \\param m number of rows of the input matrix \n * \\param n number of columns \n * \\param annz number of initial nonzeros in the matrix \n * \\param lwork  if lwork=-1, this routine returns an estimated size of the required memory\n * \\param glu persistent data to facilitate multiple factors : will be deleted later ??\n * \\param fillratio estimated ratio of fill in the factors\n * \\param panel_size Size of a panel\n * \\return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success\n * \\note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation\n */\ntemplate <typename Scalar, typename StorageIndex>\nIndex SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu)\n{\n  Index& num_expansions = glu.num_expansions; //No memory expansions so far\n  num_expansions = 0;\n  glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U \n  glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated  nnz in L factor\n  // Return the estimated size to the user if necessary\n  Index tempSpace;\n  tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);\n  if (lwork == emptyIdxLU) \n  {\n    Index estimated_size;\n    estimated_size = (5 * n + 5) * sizeof(Index)  + tempSpace\n                    + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) *  sizeof(Scalar) + n; \n    return estimated_size;\n  }\n  \n  // Setup the required space \n  \n  // First allocate Integer pointers for L\\U factors\n  glu.xsup.resize(n+1);\n  glu.supno.resize(n+1);\n  glu.xlsub.resize(n+1);\n  glu.xlusup.resize(n+1);\n  glu.xusub.resize(n+1);\n\n  // Reserve memory for L/U factors\n  do \n  {\n    if(     (expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0)\n        ||  (expand<ScalarVector>(glu.ucol,  glu.nzumax,  0, 0, num_expansions)<0)\n        ||  (expand<IndexVector> (glu.lsub,  glu.nzlmax,  0, 0, num_expansions)<0)\n        ||  (expand<IndexVector> (glu.usub,  glu.nzumax,  0, 1, num_expansions)<0) )\n    {\n      //Reduce the estimated size and retry\n      glu.nzlumax /= 2;\n      glu.nzumax /= 2;\n      glu.nzlmax /= 2;\n      if (glu.nzlumax < annz ) return glu.nzlumax; \n    }\n  } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());\n  \n  ++num_expansions;\n  return 0;\n  \n} // end LuMemInit\n\n/** \n * \\brief Expand the existing storage \n * \\param vec vector to expand \n * \\param[in,out] maxlen On input, previous size of vec (Number of elements to copy ). on output, new size\n * \\param nbElts current number of elements in the vector.\n * \\param memtype Type of the element to expand\n * \\param num_expansions Number of expansions \n * \\return 0 on success, > 0 size of the memory allocated so far\n */\ntemplate <typename Scalar, typename StorageIndex>\ntemplate <typename VectorType>\nIndex SparseLUImpl<Scalar,StorageIndex>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)\n{\n  Index failed_size; \n  if (memtype == USUB)\n     failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);\n  else\n    failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);\n\n  if (failed_size)\n    return failed_size; \n  \n  return 0 ;  \n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n#endif // EIGEN_SPARSELU_MEMORY\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_Structs.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h\n * -- SuperLU routine (version 4.1) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * November, 2010\n * \n * Global data structures used in LU factorization -\n * \n *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].\n *   (xsup,supno): supno[i] is the supernode no to which i belongs;\n *  xsup(s) points to the beginning of the s-th supernode.\n *  e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)\n *          xsup 0 1 2 4 7 12\n *  Note: dfs will be performed on supernode rep. relative to the new \n *        row pivoting ordering\n *\n *   (xlsub,lsub): lsub[*] contains the compressed subscript of\n *  rectangular supernodes; xlsub[j] points to the starting\n *  location of the j-th column in lsub[*]. Note that xlsub \n *  is indexed by column.\n *  Storage: original row subscripts\n *\n *      During the course of sparse LU factorization, we also use\n *  (xlsub,lsub) for the purpose of symmetric pruning. For each\n *  supernode {s,s+1,...,t=s+r} with first column s and last\n *  column t, the subscript set\n *    lsub[j], j=xlsub[s], .., xlsub[s+1]-1\n *  is the structure of column s (i.e. structure of this supernode).\n *  It is used for the storage of numerical values.\n *  Furthermore,\n *    lsub[j], j=xlsub[t], .., xlsub[t+1]-1\n *  is the structure of the last column t of this supernode.\n *  It is for the purpose of symmetric pruning. Therefore, the\n *  structural subscripts can be rearranged without making physical\n *  interchanges among the numerical values.\n *\n *  However, if the supernode has only one column, then we\n *  only keep one set of subscripts. For any subscript interchange\n *  performed, similar interchange must be done on the numerical\n *  values.\n *\n *  The last column structures (for pruning) will be removed\n *  after the numercial LU factorization phase.\n *\n *   (xlusup,lusup): lusup[*] contains the numerical values of the\n *  rectangular supernodes; xlusup[j] points to the starting\n *  location of the j-th column in storage vector lusup[*]\n *  Note: xlusup is indexed by column.\n *  Each rectangular supernode is stored by column-major\n *  scheme, consistent with Fortran 2-dim array storage.\n *\n *   (xusub,ucol,usub): ucol[*] stores the numerical values of\n *  U-columns outside the rectangular supernodes. The row\n *  subscript of nonzero ucol[k] is stored in usub[k].\n *  xusub[i] points to the starting location of column i in ucol.\n *  Storage: new row subscripts; that is subscripts of PA.\n */\n\n#ifndef EIGEN_LU_STRUCTS\n#define EIGEN_LU_STRUCTS\nnamespace Eigen {\nnamespace internal {\n  \ntypedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; \n\ntemplate <typename IndexVector, typename ScalarVector>\nstruct LU_GlobalLU_t {\n  typedef typename IndexVector::Scalar StorageIndex; \n  IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode\n  IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping)\n  ScalarVector  lusup; // nonzero values of L ordered by columns \n  IndexVector lsub; // Compressed row indices of L rectangular supernodes. \n  IndexVector xlusup; // pointers to the beginning of each column in lusup\n  IndexVector xlsub; // pointers to the beginning of each column in lsub\n  Index   nzlmax; // Current max size of lsub\n  Index   nzlumax; // Current max size of lusup\n  ScalarVector  ucol; // nonzero values of U ordered by columns \n  IndexVector usub; // row indices of U columns in ucol\n  IndexVector xusub; // Pointers to the beginning of each column of U in ucol \n  Index   nzumax; // Current max size of ucol\n  Index   n; // Number of columns in the matrix  \n  Index   num_expansions; \n};\n\n// Values to set for performance\nstruct perfvalues {\n  Index panel_size; // a panel consists of at most <panel_size> consecutive columns\n  Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns) \n                // in a subtree of the elimination tree is less than relax, this subtree is considered \n                // as one supernode regardless of the row structures of those columns\n  Index maxsuper; // The maximum size for a supernode in complete LU\n  Index rowblk; // The minimum row dimension for 2-D blocking to be used;\n  Index colblk; // The minimum column dimension for 2-D blocking to be used;\n  Index fillfactor; // The estimated fills factors for L and U, compared with A\n}; \n\n} // end namespace internal\n\n} // end namespace Eigen\n#endif // EIGEN_LU_STRUCTS\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H\n#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H\n\nnamespace Eigen {\nnamespace internal {\n\n/** \\ingroup SparseLU_Module\n * \\brief a class to manipulate the L supernodal factor from the SparseLU factorization\n * \n * This class  contain the data to easily store \n * and manipulate the supernodes during the factorization and solution phase of Sparse LU. \n * Only the lower triangular matrix has supernodes.\n * \n * NOTE : This class corresponds to the SCformat structure in SuperLU\n * \n */\n/* TODO\n * InnerIterator as for sparsematrix \n * SuperInnerIterator to iterate through all supernodes \n * Function for triangular solve\n */\ntemplate <typename _Scalar, typename _StorageIndex>\nclass MappedSuperNodalMatrix\n{\n  public:\n    typedef _Scalar Scalar; \n    typedef _StorageIndex StorageIndex;\n    typedef Matrix<StorageIndex,Dynamic,1> IndexVector;\n    typedef Matrix<Scalar,Dynamic,1> ScalarVector;\n  public:\n    MappedSuperNodalMatrix()\n    {\n      \n    }\n    MappedSuperNodalMatrix(Index m, Index n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,\n             IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )\n    {\n      setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);\n    }\n    \n    ~MappedSuperNodalMatrix()\n    {\n      \n    }\n    /**\n     * Set appropriate pointers for the lower triangular supernodal matrix\n     * These infos are available at the end of the numerical factorization\n     * FIXME This class will be modified such that it can be use in the course \n     * of the factorization.\n     */\n    void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,\n             IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )\n    {\n      m_row = m;\n      m_col = n; \n      m_nzval = nzval.data(); \n      m_nzval_colptr = nzval_colptr.data(); \n      m_rowind = rowind.data(); \n      m_rowind_colptr = rowind_colptr.data(); \n      m_nsuper = col_to_sup(n); \n      m_col_to_sup = col_to_sup.data(); \n      m_sup_to_col = sup_to_col.data(); \n    }\n    \n    /**\n     * Number of rows\n     */\n    Index rows() const { return m_row; }\n    \n    /**\n     * Number of columns\n     */\n    Index cols() const { return m_col; }\n    \n    /**\n     * Return the array of nonzero values packed by column\n     * \n     * The size is nnz\n     */\n    Scalar* valuePtr() {  return m_nzval; }\n    \n    const Scalar* valuePtr() const \n    {\n      return m_nzval; \n    }\n    /**\n     * Return the pointers to the beginning of each column in \\ref valuePtr()\n     */\n    StorageIndex* colIndexPtr()\n    {\n      return m_nzval_colptr; \n    }\n    \n    const StorageIndex* colIndexPtr() const\n    {\n      return m_nzval_colptr; \n    }\n    \n    /**\n     * Return the array of compressed row indices of all supernodes\n     */\n    StorageIndex* rowIndex()  { return m_rowind; }\n    \n    const StorageIndex* rowIndex() const\n    {\n      return m_rowind; \n    }\n    \n    /**\n     * Return the location in \\em rowvaluePtr() which starts each column\n     */\n    StorageIndex* rowIndexPtr() { return m_rowind_colptr; }\n    \n    const StorageIndex* rowIndexPtr() const\n    {\n      return m_rowind_colptr; \n    }\n    \n    /** \n     * Return the array of column-to-supernode mapping \n     */\n    StorageIndex* colToSup()  { return m_col_to_sup; }\n    \n    const StorageIndex* colToSup() const\n    {\n      return m_col_to_sup;       \n    }\n    /**\n     * Return the array of supernode-to-column mapping\n     */\n    StorageIndex* supToCol() { return m_sup_to_col; }\n    \n    const StorageIndex* supToCol() const\n    {\n      return m_sup_to_col;\n    }\n    \n    /**\n     * Return the number of supernodes\n     */\n    Index nsuper() const\n    {\n      return m_nsuper; \n    }\n    \n    class InnerIterator; \n    template<typename Dest>\n    void solveInPlace( MatrixBase<Dest>&X) const;\n    template<bool Conjugate, typename Dest>\n    void solveTransposedInPlace( MatrixBase<Dest>&X) const;\n\n    \n      \n      \n    \n  protected:\n    Index m_row; // Number of rows\n    Index m_col; // Number of columns\n    Index m_nsuper; // Number of supernodes\n    Scalar* m_nzval; //array of nonzero values packed by column\n    StorageIndex* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j\n    StorageIndex* m_rowind; // Array of compressed row indices of rectangular supernodes\n    StorageIndex* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j\n    StorageIndex* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs\n    StorageIndex* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode\n    \n  private :\n};\n\n/**\n  * \\brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L\n  * \n  */\ntemplate<typename Scalar, typename StorageIndex>\nclass MappedSuperNodalMatrix<Scalar,StorageIndex>::InnerIterator\n{\n  public:\n     InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)\n      : m_matrix(mat),\n        m_outer(outer),\n        m_supno(mat.colToSup()[outer]),\n        m_idval(mat.colIndexPtr()[outer]),\n        m_startidval(m_idval),\n        m_endidval(mat.colIndexPtr()[outer+1]),\n        m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]),\n        m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1])\n    {}\n    inline InnerIterator& operator++()\n    { \n      m_idval++; \n      m_idrow++;\n      return *this;\n    }\n    inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; }\n    \n    inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); }\n    \n    inline Index index() const { return m_matrix.rowIndex()[m_idrow]; }\n    inline Index row() const { return index(); }\n    inline Index col() const { return m_outer; }\n    \n    inline Index supIndex() const { return m_supno; }\n    \n    inline operator bool() const \n    { \n      return ( (m_idval < m_endidval) && (m_idval >= m_startidval)\n                && (m_idrow < m_endidrow) );\n    }\n    \n  protected:\n    const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix \n    const Index m_outer;                    // Current column \n    const Index m_supno;                    // Current SuperNode number\n    Index m_idval;                          // Index to browse the values in the current column\n    const Index m_startidval;               // Start of the column value\n    const Index m_endidval;                 // End of the column value\n    Index m_idrow;                          // Index to browse the row indices \n    Index m_endidrow;                       // End index of row indices of the current column\n};\n\n/**\n * \\brief Solve with the supernode triangular matrix\n * \n */\ntemplate<typename Scalar, typename Index_>\ntemplate<typename Dest>\nvoid MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) const\n{\n    /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */\n//    eigen_assert(X.rows() <= NumTraits<Index>::highest());\n//    eigen_assert(X.cols() <= NumTraits<Index>::highest());\n    Index n    = int(X.rows());\n    Index nrhs = Index(X.cols());\n    const Scalar * Lval = valuePtr();                 // Nonzero values \n    Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs);     // working vector\n    work.setZero();\n    for (Index k = 0; k <= nsuper(); k ++)\n    {\n      Index fsupc = supToCol()[k];                    // First column of the current supernode \n      Index istart = rowIndexPtr()[fsupc];            // Pointer index to the subscript of the current column\n      Index nsupr = rowIndexPtr()[fsupc+1] - istart;  // Number of rows in the current supernode\n      Index nsupc = supToCol()[k+1] - fsupc;          // Number of columns in the current supernode\n      Index nrow = nsupr - nsupc;                     // Number of rows in the non-diagonal part of the supernode\n      Index irow;                                     //Current index row\n      \n      if (nsupc == 1 )\n      {\n        for (Index j = 0; j < nrhs; j++)\n        {\n          InnerIterator it(*this, fsupc);\n          ++it; // Skip the diagonal element\n          for (; it; ++it)\n          {\n            irow = it.row();\n            X(irow, j) -= X(fsupc, j) * it.value();\n          }\n        }\n      }\n      else\n      {\n        // The supernode has more than one column \n        Index luptr = colIndexPtr()[fsupc]; \n        Index lda = colIndexPtr()[fsupc+1] - luptr;\n        \n        // Triangular solve \n        Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );\n        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );\n        U = A.template triangularView<UnitLower>().solve(U); \n        \n        // Matrix-vector product \n        new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );\n        work.topRows(nrow).noalias() = A * U;\n        \n        //Begin Scatter \n        for (Index j = 0; j < nrhs; j++)\n        {\n          Index iptr = istart + nsupc; \n          for (Index i = 0; i < nrow; i++)\n          {\n            irow = rowIndex()[iptr]; \n            X(irow, j) -= work(i, j); // Scatter operation\n            work(i, j) = Scalar(0); \n            iptr++;\n          }\n        }\n      }\n    } \n}\n\ntemplate<typename Scalar, typename Index_>\ntemplate<bool Conjugate, typename Dest>\nvoid MappedSuperNodalMatrix<Scalar,Index_>::solveTransposedInPlace( MatrixBase<Dest>&X) const\n{\n    using numext::conj;\n  Index n    = int(X.rows());\n  Index nrhs = Index(X.cols());\n  const Scalar * Lval = valuePtr();                 // Nonzero values\n  Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs);     // working vector\n  work.setZero();\n  for (Index k = nsuper(); k >= 0; k--)\n  {\n    Index fsupc = supToCol()[k];                    // First column of the current supernode\n    Index istart = rowIndexPtr()[fsupc];            // Pointer index to the subscript of the current column\n    Index nsupr = rowIndexPtr()[fsupc+1] - istart;  // Number of rows in the current supernode\n    Index nsupc = supToCol()[k+1] - fsupc;          // Number of columns in the current supernode\n    Index nrow = nsupr - nsupc;                     // Number of rows in the non-diagonal part of the supernode\n    Index irow;                                     //Current index row\n\n    if (nsupc == 1 )\n    {\n      for (Index j = 0; j < nrhs; j++)\n      {\n        InnerIterator it(*this, fsupc);\n        ++it; // Skip the diagonal element\n        for (; it; ++it)\n        {\n          irow = it.row();\n          X(fsupc,j) -= X(irow, j) * (Conjugate?conj(it.value()):it.value());\n        }\n      }\n    }\n    else\n    {\n      // The supernode has more than one column\n      Index luptr = colIndexPtr()[fsupc];\n      Index lda = colIndexPtr()[fsupc+1] - luptr;\n\n      //Begin Gather\n      for (Index j = 0; j < nrhs; j++)\n      {\n        Index iptr = istart + nsupc;\n        for (Index i = 0; i < nrow; i++)\n        {\n          irow = rowIndex()[iptr];\n          work.topRows(nrow)(i,j)= X(irow,j); // Gather operation\n          iptr++;\n        }\n      }\n\n      // Matrix-vector product with transposed submatrix\n      Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );\n      Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );\n      if(Conjugate)\n        U = U - A.adjoint() * work.topRows(nrow);\n      else\n        U = U - A.transpose() * work.topRows(nrow);\n\n      // Triangular solve (of transposed diagonal block)\n      new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );\n      if(Conjugate)\n        U = A.adjoint().template triangularView<UnitUpper>().solve(U);\n      else\n        U = A.transpose().template triangularView<UnitUpper>().solve(U);\n\n    }\n\n  }\n}\n\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_SPARSELU_MATRIX_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_Utils.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n\n#ifndef EIGEN_SPARSELU_UTILS_H\n#define EIGEN_SPARSELU_UTILS_H\n\nnamespace Eigen {\nnamespace internal {\n\n/**\n * \\brief Count Nonzero elements in the factors\n */\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)\n{\n nnzL = 0; \n nnzU = (glu.xusub)(n); \n Index nsuper = (glu.supno)(n); \n Index jlen; \n Index i, j, fsupc;\n if (n <= 0 ) return; \n // For each supernode\n for (i = 0; i <= nsuper; i++)\n {\n   fsupc = glu.xsup(i); \n   jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); \n   \n   for (j = fsupc; j < glu.xsup(i+1); j++)\n   {\n     nnzL += jlen; \n     nnzU += j - fsupc + 1; \n     jlen--; \n   }\n }\n}\n\n/**\n * \\brief Fix up the data storage lsub for L-subscripts. \n * \n * It removes the subscripts sets for structural pruning, \n * and applies permutation to the remaining subscripts\n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)\n{\n  Index fsupc, i, j, k, jstart; \n  \n  StorageIndex nextl = 0; \n  Index nsuper = (glu.supno)(n); \n  \n  // For each supernode \n  for (i = 0; i <= nsuper; i++)\n  {\n    fsupc = glu.xsup(i); \n    jstart = glu.xlsub(fsupc); \n    glu.xlsub(fsupc) = nextl; \n    for (j = jstart; j < glu.xlsub(fsupc + 1); j++)\n    {\n      glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A\n      nextl++;\n    }\n    for (k = fsupc+1; k < glu.xsup(i+1); k++)\n      glu.xlsub(k) = nextl; // other columns in supernode i\n  }\n  \n  glu.xlsub(n) = nextl; \n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n#endif // EIGEN_SPARSELU_UTILS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_column_bmod.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU \n \n * -- SuperLU routine (version 3.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * October 15, 2003\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_COLUMN_BMOD_H\n#define SPARSELU_COLUMN_BMOD_H\n\nnamespace Eigen {\n\nnamespace internal {\n/**\n * \\brief Performs numeric block updates (sup-col) in topological order\n * \n * \\param jcol current column to update\n * \\param nseg Number of segments in the U part\n * \\param dense Store the full representation of the column\n * \\param tempv working array \n * \\param segrep segment representative ...\n * \\param repfnz ??? First nonzero column in each row ???  ...\n * \\param fpanelc First column in the current panel\n * \\param glu Global LU data. \n * \\return 0 - successful return \n *         > 0 - number of bytes allocated when run out of space\n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nIndex SparseLUImpl<Scalar,StorageIndex>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv,\n                                                     BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)\n{\n  Index  jsupno, k, ksub, krep, ksupno; \n  Index lptr, nrow, isub, irow, nextlu, new_next, ufirst; \n  Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; \n  /* krep = representative of current k-th supernode\n    * fsupc =  first supernodal column\n    * nsupc = number of columns in a supernode\n    * nsupr = number of rows in a supernode\n    * luptr = location of supernodal LU-block in storage\n    * kfnz = first nonz in the k-th supernodal segment\n    * no_zeros = no lf leading zeros in a supernodal U-segment\n    */\n  \n  jsupno = glu.supno(jcol);\n  // For each nonzero supernode segment of U[*,j] in topological order \n  k = nseg - 1; \n  Index d_fsupc; // distance between the first column of the current panel and the \n               // first column of the current snode\n  Index fst_col; // First column within small LU update\n  Index segsize; \n  for (ksub = 0; ksub < nseg; ksub++)\n  {\n    krep = segrep(k); k--; \n    ksupno = glu.supno(krep); \n    if (jsupno != ksupno )\n    {\n      // outside the rectangular supernode \n      fsupc = glu.xsup(ksupno); \n      fst_col = (std::max)(fsupc, fpanelc); \n      \n      // Distance from the current supernode to the current panel; \n      // d_fsupc = 0 if fsupc > fpanelc\n      d_fsupc = fst_col - fsupc; \n      \n      luptr = glu.xlusup(fst_col) + d_fsupc; \n      lptr = glu.xlsub(fsupc) + d_fsupc; \n      \n      kfnz = repfnz(krep); \n      kfnz = (std::max)(kfnz, fpanelc); \n      \n      segsize = krep - kfnz + 1; \n      nsupc = krep - fst_col + 1; \n      nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); \n      nrow = nsupr - d_fsupc - nsupc;\n      Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);\n      \n      \n      // Perform a triangular solver and block update, \n      // then scatter the result of sup-col update to dense\n      no_zeros = kfnz - fst_col; \n      if(segsize==1)\n        LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);\n      else\n        LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);\n    } // end if jsupno \n  } // end for each segment\n  \n  // Process the supernodal portion of  L\\U[*,j]\n  nextlu = glu.xlusup(jcol); \n  fsupc = glu.xsup(jsupno);\n  \n  // copy the SPA dense into L\\U[*,j]\n  Index mem; \n  new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); \n  Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;\n  if(offset)\n    new_next += offset;\n  while (new_next > glu.nzlumax )\n  {\n    mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);  \n    if (mem) return mem; \n  }\n  \n  for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)\n  {\n    irow = glu.lsub(isub);\n    glu.lusup(nextlu) = dense(irow);\n    dense(irow) = Scalar(0.0); \n    ++nextlu; \n  }\n  \n  if(offset)\n  {\n    glu.lusup.segment(nextlu,offset).setZero();\n    nextlu += offset;\n  }\n  glu.xlusup(jcol + 1) = StorageIndex(nextlu);  // close L\\U(*,jcol); \n  \n  /* For more updates within the panel (also within the current supernode),\n   * should start from the first column of the panel, or the first column\n   * of the supernode, whichever is bigger. There are two cases:\n   *  1) fsupc < fpanelc, then fst_col <-- fpanelc\n   *  2) fsupc >= fpanelc, then fst_col <-- fsupc\n   */\n  fst_col = (std::max)(fsupc, fpanelc); \n  \n  if (fst_col  < jcol)\n  {\n    // Distance between the current supernode and the current panel\n    // d_fsupc = 0 if fsupc >= fpanelc\n    d_fsupc = fst_col - fsupc; \n    \n    lptr = glu.xlsub(fsupc) + d_fsupc; \n    luptr = glu.xlusup(fst_col) + d_fsupc; \n    nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension\n    nsupc = jcol - fst_col; // excluding jcol \n    nrow = nsupr - d_fsupc - nsupc; \n    \n    // points to the beginning of jcol in snode L\\U(jsupno) \n    ufirst = glu.xlusup(jcol) + d_fsupc; \n    Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);\n    MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );\n    VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); \n    u = A.template triangularView<UnitLower>().solve(u); \n    \n    new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );\n    VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); \n    l.noalias() -= A * u;\n    \n  } // End if fst_col\n  return 0; \n}\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // SPARSELU_COLUMN_BMOD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_column_dfs.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU \n \n * -- SuperLU routine (version 2.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * November 15, 1997\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_COLUMN_DFS_H\n#define SPARSELU_COLUMN_DFS_H\n\ntemplate <typename Scalar, typename StorageIndex> class SparseLUImpl;\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename IndexVector, typename ScalarVector>\nstruct column_dfs_traits : no_assignment_operator\n{\n  typedef typename ScalarVector::Scalar Scalar;\n  typedef typename IndexVector::Scalar StorageIndex;\n  column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& glu, SparseLUImpl<Scalar, StorageIndex>& luImpl)\n   : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)\n {}\n  bool update_segrep(Index /*krep*/, Index /*jj*/)\n  {\n    return true;\n  }\n  void mem_expand(IndexVector& lsub, Index& nextl, Index chmark)\n  {\n    if (nextl >= m_glu.nzlmax)\n      m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); \n    if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU;\n  }\n  enum { ExpandMem = true };\n  \n  Index m_jcol;\n  Index& m_jsuper_ref;\n  typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& m_glu;\n  SparseLUImpl<Scalar, StorageIndex>& m_luImpl;\n};\n\n\n/**\n * \\brief Performs a symbolic factorization on column jcol and decide the supernode boundary\n * \n * A supernode representative is the last column of a supernode.\n * The nonzeros in U[*,j] are segments that end at supernodes representatives. \n * The routine returns a list of the supernodal representatives \n * in topological order of the dfs that generates them. \n * The location of the first nonzero in each supernodal segment \n * (supernodal entry location) is also returned. \n * \n * \\param m number of rows in the matrix\n * \\param jcol Current column \n * \\param perm_r Row permutation\n * \\param maxsuper  Maximum number of column allowed in a supernode\n * \\param [in,out] nseg Number of segments in current U[*,j] - new segments appended\n * \\param lsub_col defines the rhs vector to start the dfs\n * \\param [in,out] segrep Segment representatives - new segments appended \n * \\param repfnz  First nonzero location in each row\n * \\param xprune \n * \\param marker  marker[i] == jj, if i was visited during dfs of current column jj;\n * \\param parent\n * \\param xplore working array\n * \\param glu global LU data \n * \\return 0 success\n *         > 0 number of bytes allocated when run out of space\n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nIndex SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,\n                                                    BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune,\n                                                    IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)\n{\n  \n  Index jsuper = glu.supno(jcol); \n  Index nextl = glu.xlsub(jcol); \n  VectorBlock<IndexVector> marker2(marker, 2*m, m); \n  \n  \n  column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);\n  \n  // For each nonzero in A(*,jcol) do dfs \n  for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++)\n  {\n    Index krow = lsub_col(k); \n    lsub_col(k) = emptyIdxLU; \n    Index kmark = marker2(krow); \n    \n    // krow was visited before, go to the next nonz; \n    if (kmark == jcol) continue;\n    \n    dfs_kernel(StorageIndex(jcol), perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,\n                   xplore, glu, nextl, krow, traits);\n  } // for each nonzero ... \n  \n  Index fsupc;\n  StorageIndex nsuper = glu.supno(jcol);\n  StorageIndex jcolp1 = StorageIndex(jcol) + 1;\n  Index jcolm1 = jcol - 1;\n  \n  // check to see if j belongs in the same supernode as j-1\n  if ( jcol == 0 )\n  { // Do nothing for column 0 \n    nsuper = glu.supno(0) = 0 ;\n  }\n  else \n  {\n    fsupc = glu.xsup(nsuper); \n    StorageIndex jptr = glu.xlsub(jcol); // Not yet compressed\n    StorageIndex jm1ptr = glu.xlsub(jcolm1); \n    \n    // Use supernodes of type T2 : see SuperLU paper\n    if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;\n    \n    // Make sure the number of columns in a supernode doesn't\n    // exceed threshold\n    if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU; \n    \n    /* If jcol starts a new supernode, reclaim storage space in\n     * glu.lsub from previous supernode. Note we only store \n     * the subscript set of the first and last columns of \n     * a supernode. (first for num values, last for pruning)\n     */\n    if (jsuper == emptyIdxLU)\n    { // starts a new supernode \n      if ( (fsupc < jcolm1-1) ) \n      { // >= 3 columns in nsuper\n        StorageIndex ito = glu.xlsub(fsupc+1);\n        glu.xlsub(jcolm1) = ito; \n        StorageIndex istop = ito + jptr - jm1ptr; \n        xprune(jcolm1) = istop; // initialize xprune(jcol-1)\n        glu.xlsub(jcol) = istop; \n        \n        for (StorageIndex ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)\n          glu.lsub(ito) = glu.lsub(ifrom); \n        nextl = ito;  // = istop + length(jcol)\n      }\n      nsuper++; \n      glu.supno(jcol) = nsuper; \n    } // if a new supernode \n  } // end else:  jcol > 0\n  \n  // Tidy up the pointers before exit\n  glu.xsup(nsuper+1) = jcolp1; \n  glu.supno(jcolp1) = nsuper; \n  xprune(jcol) = StorageIndex(nextl);  // Initialize upper bound for pruning\n  glu.xlsub(jcolp1) = StorageIndex(nextl); \n  \n  return 0; \n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n/* \n \n * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU \n \n * -- SuperLU routine (version 2.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * November 15, 1997\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_COPY_TO_UCOL_H\n#define SPARSELU_COPY_TO_UCOL_H\n\nnamespace Eigen {\nnamespace internal {\n\n/**\n * \\brief Performs numeric block updates (sup-col) in topological order\n * \n * \\param jcol current column to update\n * \\param nseg Number of segments in the U part\n * \\param segrep segment representative ...\n * \\param repfnz First nonzero column in each row  ...\n * \\param perm_r Row permutation \n * \\param dense Store the full representation of the column\n * \\param glu Global LU data. \n * \\return 0 - successful return \n *         > 0 - number of bytes allocated when run out of space\n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nIndex SparseLUImpl<Scalar,StorageIndex>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep,\n                                                      BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)\n{  \n  Index ksub, krep, ksupno; \n    \n  Index jsupno = glu.supno(jcol);\n  \n  // For each nonzero supernode segment of U[*,j] in topological order \n  Index k = nseg - 1, i; \n  StorageIndex nextu = glu.xusub(jcol); \n  Index kfnz, isub, segsize; \n  Index new_next,irow; \n  Index fsupc, mem; \n  for (ksub = 0; ksub < nseg; ksub++)\n  {\n    krep = segrep(k); k--; \n    ksupno = glu.supno(krep); \n    if (jsupno != ksupno ) // should go into ucol(); \n    {\n      kfnz = repfnz(krep); \n      if (kfnz != emptyIdxLU)\n      { // Nonzero U-segment \n        fsupc = glu.xsup(ksupno); \n        isub = glu.xlsub(fsupc) + kfnz - fsupc; \n        segsize = krep - kfnz + 1; \n        new_next = nextu + segsize; \n        while (new_next > glu.nzumax) \n        {\n          mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); \n          if (mem) return mem; \n          mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); \n          if (mem) return mem; \n          \n        }\n        \n        for (i = 0; i < segsize; i++)\n        {\n          irow = glu.lsub(isub); \n          glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order\n          glu.ucol(nextu) = dense(irow); \n          dense(irow) = Scalar(0.0); \n          nextu++;\n          isub++;\n        }\n        \n      } // end nonzero U-segment \n      \n    } // end if jsupno \n    \n  } // end for each segment\n  glu.xusub(jcol + 1) = nextu; // close U(*,jcol)\n  return 0; \n}\n\n} // namespace internal\n} // end namespace Eigen\n\n#endif // SPARSELU_COPY_TO_UCOL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_gemm_kernel.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H\n#define EIGEN_SPARSELU_GEMM_KERNEL_H\n\nnamespace Eigen {\n\nnamespace internal {\n\n\n/** \\internal\n  * A general matrix-matrix product kernel optimized for the SparseLU factorization.\n  *  - A, B, and C must be column major\n  *  - lda and ldc must be multiples of the respective packet size\n  *  - C must have the same alignment as A\n  */\ntemplate<typename Scalar>\nEIGEN_DONT_INLINE\nvoid sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)\n{\n  using namespace Eigen::internal;\n  \n  typedef typename packet_traits<Scalar>::type Packet;\n  enum {\n    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,\n    PacketSize = packet_traits<Scalar>::size,\n    PM = 8,                             // peeling in M\n    RN = 2,                             // register blocking\n    RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking\n    BM = 4096/sizeof(Scalar),           // number of rows of A-C per chunk\n    SM = PM*PacketSize                  // step along M\n  };\n  Index d_end = (d/RK)*RK;    // number of columns of A (rows of B) suitable for full register blocking\n  Index n_end = (n/RN)*RN;    // number of columns of B-C suitable for processing RN columns at once\n  Index i0 = internal::first_default_aligned(A,m);\n  \n  eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));\n  \n  // handle the non aligned rows of A and C without any optimization:\n  for(Index i=0; i<i0; ++i)\n  {\n    for(Index j=0; j<n; ++j)\n    {\n      Scalar c = C[i+j*ldc];\n      for(Index k=0; k<d; ++k)\n        c += B[k+j*ldb] * A[i+k*lda];\n      C[i+j*ldc] = c;\n    }\n  }\n  // process the remaining rows per chunk of BM rows\n  for(Index ib=i0; ib<m; ib+=BM)\n  {\n    Index actual_b = std::min<Index>(BM, m-ib);                 // actual number of rows\n    Index actual_b_end1 = (actual_b/SM)*SM;                   // actual number of rows suitable for peeling\n    Index actual_b_end2 = (actual_b/PacketSize)*PacketSize;   // actual number of rows suitable for vectorization\n    \n    // Let's process two columns of B-C at once\n    for(Index j=0; j<n_end; j+=RN)\n    {\n      const Scalar* Bc0 = B+(j+0)*ldb;\n      const Scalar* Bc1 = B+(j+1)*ldb;\n      \n      for(Index k=0; k<d_end; k+=RK)\n      {\n        \n        // load and expand a RN x RK block of B\n        Packet b00, b10, b20, b30, b01, b11, b21, b31;\n                  { b00 = pset1<Packet>(Bc0[0]); }\n                  { b10 = pset1<Packet>(Bc0[1]); }\n        if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }\n        if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }\n                  { b01 = pset1<Packet>(Bc1[0]); }\n                  { b11 = pset1<Packet>(Bc1[1]); }\n        if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }\n        if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }\n        \n        Packet a0, a1, a2, a3, c0, c1, t0, t1;\n        \n        const Scalar* A0 = A+ib+(k+0)*lda;\n        const Scalar* A1 = A+ib+(k+1)*lda;\n        const Scalar* A2 = A+ib+(k+2)*lda;\n        const Scalar* A3 = A+ib+(k+3)*lda;\n        \n        Scalar* C0 = C+ib+(j+0)*ldc;\n        Scalar* C1 = C+ib+(j+1)*ldc;\n        \n                  a0 = pload<Packet>(A0);\n                  a1 = pload<Packet>(A1);\n        if(RK==4)\n        {\n          a2 = pload<Packet>(A2);\n          a3 = pload<Packet>(A3);\n        }\n        else\n        {\n          // workaround \"may be used uninitialized in this function\" warning\n          a2 = a3 = a0;\n        }\n        \n#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}\n#define WORK(I)  \\\n                     c0 = pload<Packet>(C0+i+(I)*PacketSize);    \\\n                     c1 = pload<Packet>(C1+i+(I)*PacketSize);    \\\n                     KMADD(c0, a0, b00, t0)                      \\\n                     KMADD(c1, a0, b01, t1)                      \\\n                     a0 = pload<Packet>(A0+i+(I+1)*PacketSize);  \\\n                     KMADD(c0, a1, b10, t0)                      \\\n                     KMADD(c1, a1, b11, t1)                      \\\n                     a1 = pload<Packet>(A1+i+(I+1)*PacketSize);  \\\n          if(RK==4){ KMADD(c0, a2, b20, t0)                     }\\\n          if(RK==4){ KMADD(c1, a2, b21, t1)                     }\\\n          if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\\\n          if(RK==4){ KMADD(c0, a3, b30, t0)                     }\\\n          if(RK==4){ KMADD(c1, a3, b31, t1)                     }\\\n          if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\\\n                     pstore(C0+i+(I)*PacketSize, c0);            \\\n                     pstore(C1+i+(I)*PacketSize, c1)\n        \n        // process rows of A' - C' with aggressive vectorization and peeling \n        for(Index i=0; i<actual_b_end1; i+=PacketSize*8)\n        {\n          EIGEN_ASM_COMMENT(\"SPARSELU_GEMML_KERNEL1\");\n                    prefetch((A0+i+(5)*PacketSize));\n                    prefetch((A1+i+(5)*PacketSize));\n          if(RK==4) prefetch((A2+i+(5)*PacketSize));\n          if(RK==4) prefetch((A3+i+(5)*PacketSize));\n\n          WORK(0);\n          WORK(1);\n          WORK(2);\n          WORK(3);\n          WORK(4);\n          WORK(5);\n          WORK(6);\n          WORK(7);\n        }\n        // process the remaining rows with vectorization only\n        for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)\n        {\n          WORK(0);\n        }\n#undef WORK\n        // process the remaining rows without vectorization\n        for(Index i=actual_b_end2; i<actual_b; ++i)\n        {\n          if(RK==4)\n          {\n            C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];\n            C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];\n          }\n          else\n          {\n            C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];\n            C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];\n          }\n        }\n        \n        Bc0 += RK;\n        Bc1 += RK;\n      } // peeled loop on k\n    } // peeled loop on the columns j\n    // process the last column (we now perform a matrix-vector product)\n    if((n-n_end)>0)\n    {\n      const Scalar* Bc0 = B+(n-1)*ldb;\n      \n      for(Index k=0; k<d_end; k+=RK)\n      {\n        \n        // load and expand a 1 x RK block of B\n        Packet b00, b10, b20, b30;\n                  b00 = pset1<Packet>(Bc0[0]);\n                  b10 = pset1<Packet>(Bc0[1]);\n        if(RK==4) b20 = pset1<Packet>(Bc0[2]);\n        if(RK==4) b30 = pset1<Packet>(Bc0[3]);\n        \n        Packet a0, a1, a2, a3, c0, t0/*, t1*/;\n        \n        const Scalar* A0 = A+ib+(k+0)*lda;\n        const Scalar* A1 = A+ib+(k+1)*lda;\n        const Scalar* A2 = A+ib+(k+2)*lda;\n        const Scalar* A3 = A+ib+(k+3)*lda;\n        \n        Scalar* C0 = C+ib+(n_end)*ldc;\n        \n                  a0 = pload<Packet>(A0);\n                  a1 = pload<Packet>(A1);\n        if(RK==4)\n        {\n          a2 = pload<Packet>(A2);\n          a3 = pload<Packet>(A3);\n        }\n        else\n        {\n          // workaround \"may be used uninitialized in this function\" warning\n          a2 = a3 = a0;\n        }\n        \n#define WORK(I) \\\n                   c0 = pload<Packet>(C0+i+(I)*PacketSize);     \\\n                   KMADD(c0, a0, b00, t0)                       \\\n                   a0 = pload<Packet>(A0+i+(I+1)*PacketSize);   \\\n                   KMADD(c0, a1, b10, t0)                       \\\n                   a1 = pload<Packet>(A1+i+(I+1)*PacketSize);   \\\n        if(RK==4){ KMADD(c0, a2, b20, t0)                      }\\\n        if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize);  }\\\n        if(RK==4){ KMADD(c0, a3, b30, t0)                      }\\\n        if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize);  }\\\n                   pstore(C0+i+(I)*PacketSize, c0);\n        \n        // aggressive vectorization and peeling\n        for(Index i=0; i<actual_b_end1; i+=PacketSize*8)\n        {\n          EIGEN_ASM_COMMENT(\"SPARSELU_GEMML_KERNEL2\");\n          WORK(0);\n          WORK(1);\n          WORK(2);\n          WORK(3);\n          WORK(4);\n          WORK(5);\n          WORK(6);\n          WORK(7);\n        }\n        // vectorization only\n        for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)\n        {\n          WORK(0);\n        }\n        // remaining scalars\n        for(Index i=actual_b_end2; i<actual_b; ++i)\n        {\n          if(RK==4) \n            C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];\n          else\n            C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];\n        }\n        \n        Bc0 += RK;\n#undef WORK\n      }\n    }\n    \n    // process the last columns of A, corresponding to the last rows of B\n    Index rd = d-d_end;\n    if(rd>0)\n    {\n      for(Index j=0; j<n; ++j)\n      {\n        enum {\n          Alignment = PacketSize>1 ? Aligned : 0\n        };\n        typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;\n        typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;\n        if(rd==1)       MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);\n        \n        else if(rd==2)  MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)\n                                                        + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);\n        \n        else            MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)\n                                                        + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)\n                                                        + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);\n      }\n    }\n  \n  } // blocking on the rows of A and C\n}\n#undef KMADD\n\n} // namespace internal\n\n} // namespace Eigen\n\n#endif // EIGEN_SPARSELU_GEMM_KERNEL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* This file is a modified version of heap_relax_snode.c file in SuperLU\n * -- SuperLU routine (version 3.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * October 15, 2003\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n\n#ifndef SPARSELU_HEAP_RELAX_SNODE_H\n#define SPARSELU_HEAP_RELAX_SNODE_H\n\nnamespace Eigen {\nnamespace internal {\n\n/** \n * \\brief Identify the initial relaxed supernodes\n * \n * This routine applied to a symmetric elimination tree. \n * It assumes that the matrix has been reordered according to the postorder of the etree\n * \\param n The number of columns\n * \\param et elimination tree \n * \\param relax_columns Maximum number of columns allowed in a relaxed snode \n * \\param descendants Number of descendants of each node in the etree\n * \\param relax_end last column in a supernode\n */\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)\n{\n  \n  // The etree may not be postordered, but its heap ordered  \n  IndexVector post;\n  internal::treePostorder(StorageIndex(n), et, post); // Post order etree\n  IndexVector inv_post(n+1); \n  for (StorageIndex i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???\n  \n  // Renumber etree in postorder \n  IndexVector iwork(n);\n  IndexVector et_save(n+1);\n  for (Index i = 0; i < n; ++i)\n  {\n    iwork(post(i)) = post(et(i));\n  }\n  et_save = et; // Save the original etree\n  et = iwork; \n  \n  // compute the number of descendants of each node in the etree\n  relax_end.setConstant(emptyIdxLU);\n  Index j, parent; \n  descendants.setZero();\n  for (j = 0; j < n; j++) \n  {\n    parent = et(j);\n    if (parent != n) // not the dummy root\n      descendants(parent) += descendants(j) + 1;\n  }\n  // Identify the relaxed supernodes by postorder traversal of the etree\n  Index snode_start; // beginning of a snode \n  StorageIndex k;\n  Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree \n  Index nsuper_et = 0; // Number of relaxed snodes in the original etree \n  StorageIndex l; \n  for (j = 0; j < n; )\n  {\n    parent = et(j);\n    snode_start = j; \n    while ( parent != n && descendants(parent) < relax_columns ) \n    {\n      j = parent; \n      parent = et(j);\n    }\n    // Found a supernode in postordered etree, j is the last column \n    ++nsuper_et_post;\n    k = StorageIndex(n);\n    for (Index i = snode_start; i <= j; ++i)\n      k = (std::min)(k, inv_post(i));\n    l = inv_post(j);\n    if ( (l - k) == (j - snode_start) )  // Same number of columns in the snode\n    {\n      // This is also a supernode in the original etree\n      relax_end(k) = l; // Record last column \n      ++nsuper_et; \n    }\n    else \n    {\n      for (Index i = snode_start; i <= j; ++i) \n      {\n        l = inv_post(i);\n        if (descendants(i) == 0) \n        {\n          relax_end(l) = l;\n          ++nsuper_et;\n        }\n      }\n    }\n    j++;\n    // Search for a new leaf\n    while (descendants(j) != 0 && j < n) j++;\n  } // End postorder traversal of the etree\n  \n  // Recover the original etree\n  et = et_save; \n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n#endif // SPARSELU_HEAP_RELAX_SNODE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_kernel_bmod.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef SPARSELU_KERNEL_BMOD_H\n#define SPARSELU_KERNEL_BMOD_H\n\nnamespace Eigen {\nnamespace internal {\n  \ntemplate <int SegSizeAtCompileTime> struct LU_kernel_bmod\n{\n  /** \\internal\n    * \\brief Performs numeric block updates from a given supernode to a single column\n    *\n    * \\param segsize Size of the segment (and blocks ) to use for updates\n    * \\param[in,out] dense Packed values of the original matrix\n    * \\param tempv temporary vector to use for updates\n    * \\param lusup array containing the supernodes\n    * \\param lda Leading dimension in the supernode\n    * \\param nrow Number of rows in the rectangular part of the supernode\n    * \\param lsub compressed row subscripts of supernodes\n    * \\param lptr pointer to the first column of the current supernode in lsub\n    * \\param no_zeros Number of nonzeros elements before the diagonal part of the supernode\n    */\n  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>\n  static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,\n                                    const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);\n};\n\ntemplate <int SegSizeAtCompileTime>\ntemplate <typename BlockScalarVector, typename ScalarVector, typename IndexVector>\nEIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,\n                                                                  const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)\n{\n  typedef typename ScalarVector::Scalar Scalar;\n  // First, copy U[*,j] segment from dense(*) to tempv(*)\n  // The result of triangular solve is in tempv[*]; \n    // The result of matric-vector update is in dense[*]\n  Index isub = lptr + no_zeros; \n  Index i;\n  Index irow;\n  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)\n  {\n    irow = lsub(isub); \n    tempv(i) = dense(irow); \n    ++isub; \n  }\n  // Dense triangular solve -- start effective triangle\n  luptr += lda * no_zeros + no_zeros; \n  // Form Eigen matrix and vector \n  Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );\n  Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);\n  \n  u = A.template triangularView<UnitLower>().solve(u); \n  \n  // Dense matrix-vector product y <-- B*x \n  luptr += segsize;\n  const Index PacketSize = internal::packet_traits<Scalar>::size;\n  Index ldl = internal::first_multiple(nrow, PacketSize);\n  Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );\n  Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);\n  Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;\n  Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );\n  \n  l.setZero();\n  internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());\n  \n  // Scatter tempv[] into SPA dense[] as a temporary storage \n  isub = lptr + no_zeros;\n  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)\n  {\n    irow = lsub(isub++); \n    dense(irow) = tempv(i);\n  }\n  \n  // Scatter l into SPA dense[]\n  for (i = 0; i < nrow; i++)\n  {\n    irow = lsub(isub++); \n    dense(irow) -= l(i);\n  } \n}\n\ntemplate <> struct LU_kernel_bmod<1>\n{\n  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>\n  static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,\n                                    const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);\n};\n\n\ntemplate <typename BlockScalarVector, typename ScalarVector, typename IndexVector>\nEIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,\n                                              const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)\n{\n  typedef typename ScalarVector::Scalar Scalar;\n  typedef typename IndexVector::Scalar StorageIndex;\n  Scalar f = dense(lsub(lptr + no_zeros));\n  luptr += lda * no_zeros + no_zeros + 1;\n  const Scalar* a(lusup.data() + luptr);\n  const StorageIndex*  irow(lsub.data()+lptr + no_zeros + 1);\n  Index i = 0;\n  for (; i+1 < nrow; i+=2)\n  {\n    Index i0 = *(irow++);\n    Index i1 = *(irow++);\n    Scalar a0 = *(a++);\n    Scalar a1 = *(a++);\n    Scalar d0 = dense.coeff(i0);\n    Scalar d1 = dense.coeff(i1);\n    d0 -= f*a0;\n    d1 -= f*a1;\n    dense.coeffRef(i0) = d0;\n    dense.coeffRef(i1) = d1;\n  }\n  if(i<nrow)\n    dense.coeffRef(*(irow++)) -= f * *(a++);\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n#endif // SPARSELU_KERNEL_BMOD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_panel_bmod.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU \n \n * -- SuperLU routine (version 3.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * October 15, 2003\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_PANEL_BMOD_H\n#define SPARSELU_PANEL_BMOD_H\n\nnamespace Eigen {\nnamespace internal {\n\n/**\n * \\brief Performs numeric block updates (sup-panel) in topological order.\n * \n * Before entering this routine, the original nonzeros in the panel\n * were already copied into the spa[m,w]\n * \n * \\param m number of rows in the matrix\n * \\param w Panel size\n * \\param jcol Starting  column of the panel\n * \\param nseg Number of segments in the U part\n * \\param dense Store the full representation of the panel \n * \\param tempv working array \n * \\param segrep segment representative... first row in the segment\n * \\param repfnz First nonzero rows\n * \\param glu Global LU data. \n * \n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol, \n                                            const Index nseg, ScalarVector& dense, ScalarVector& tempv,\n                                            IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)\n{\n  \n  Index ksub,jj,nextl_col; \n  Index fsupc, nsupc, nsupr, nrow; \n  Index krep, kfnz; \n  Index lptr; // points to the row subscripts of a supernode \n  Index luptr; // ...\n  Index segsize,no_zeros ; \n  // For each nonz supernode segment of U[*,j] in topological order\n  Index k = nseg - 1; \n  const Index PacketSize = internal::packet_traits<Scalar>::size;\n  \n  for (ksub = 0; ksub < nseg; ksub++)\n  { // For each updating supernode\n    /* krep = representative of current k-th supernode\n     * fsupc =  first supernodal column\n     * nsupc = number of columns in a supernode\n     * nsupr = number of rows in a supernode\n     */\n    krep = segrep(k); k--; \n    fsupc = glu.xsup(glu.supno(krep)); \n    nsupc = krep - fsupc + 1; \n    nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); \n    nrow = nsupr - nsupc; \n    lptr = glu.xlsub(fsupc); \n    \n    // loop over the panel columns to detect the actual number of columns and rows\n    Index u_rows = 0;\n    Index u_cols = 0;\n    for (jj = jcol; jj < jcol + w; jj++)\n    {\n      nextl_col = (jj-jcol) * m; \n      VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row\n      \n      kfnz = repfnz_col(krep); \n      if ( kfnz == emptyIdxLU ) \n        continue; // skip any zero segment\n      \n      segsize = krep - kfnz + 1;\n      u_cols++;\n      u_rows = (std::max)(segsize,u_rows);\n    }\n    \n    if(nsupc >= 2)\n    { \n      Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);\n      Map<ScalarMatrix, Aligned,  OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));\n      \n      // gather U\n      Index u_col = 0;\n      for (jj = jcol; jj < jcol + w; jj++)\n      {\n        nextl_col = (jj-jcol) * m; \n        VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row\n        VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here\n        \n        kfnz = repfnz_col(krep); \n        if ( kfnz == emptyIdxLU ) \n          continue; // skip any zero segment\n        \n        segsize = krep - kfnz + 1;\n        luptr = glu.xlusup(fsupc);    \n        no_zeros = kfnz - fsupc; \n        \n        Index isub = lptr + no_zeros;\n        Index off = u_rows-segsize;\n        for (Index i = 0; i < off; i++) U(i,u_col) = 0;\n        for (Index i = 0; i < segsize; i++)\n        {\n          Index irow = glu.lsub(isub); \n          U(i+off,u_col) = dense_col(irow); \n          ++isub; \n        }\n        u_col++;\n      }\n      // solve U = A^-1 U\n      luptr = glu.xlusup(fsupc);\n      Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);\n      no_zeros = (krep - u_rows + 1) - fsupc;\n      luptr += lda * no_zeros + no_zeros;\n      MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );\n      U = A.template triangularView<UnitLower>().solve(U);\n      \n      // update\n      luptr += u_rows;\n      MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );\n      eigen_assert(tempv.size()>w*ldu + nrow*w + 1);\n      \n      Index ldl = internal::first_multiple<Index>(nrow, PacketSize);\n      Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;\n      MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));\n      \n      L.setZero();\n      internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());\n      \n      // scatter U and L\n      u_col = 0;\n      for (jj = jcol; jj < jcol + w; jj++)\n      {\n        nextl_col = (jj-jcol) * m; \n        VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row\n        VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here\n        \n        kfnz = repfnz_col(krep); \n        if ( kfnz == emptyIdxLU ) \n          continue; // skip any zero segment\n        \n        segsize = krep - kfnz + 1;\n        no_zeros = kfnz - fsupc; \n        Index isub = lptr + no_zeros;\n        \n        Index off = u_rows-segsize;\n        for (Index i = 0; i < segsize; i++)\n        {\n          Index irow = glu.lsub(isub++); \n          dense_col(irow) = U.coeff(i+off,u_col);\n          U.coeffRef(i+off,u_col) = 0;\n        }\n        \n        // Scatter l into SPA dense[]\n        for (Index i = 0; i < nrow; i++)\n        {\n          Index irow = glu.lsub(isub++); \n          dense_col(irow) -= L.coeff(i,u_col);\n          L.coeffRef(i,u_col) = 0;\n        }\n        u_col++;\n      }\n    }\n    else // level 2 only\n    {\n      // Sequence through each column in the panel\n      for (jj = jcol; jj < jcol + w; jj++)\n      {\n        nextl_col = (jj-jcol) * m; \n        VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row\n        VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here\n        \n        kfnz = repfnz_col(krep); \n        if ( kfnz == emptyIdxLU ) \n          continue; // skip any zero segment\n        \n        segsize = krep - kfnz + 1;\n        luptr = glu.xlusup(fsupc);\n        \n        Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr\n        \n        // Perform a trianglar solve and block update, \n        // then scatter the result of sup-col update to dense[]\n        no_zeros = kfnz - fsupc; \n              if(segsize==1)  LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);\n        else  if(segsize==2)  LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);\n        else  if(segsize==3)  LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);\n        else                  LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); \n      } // End for each column in the panel \n    }\n    \n  } // End for each updating supernode\n} // end panel bmod\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // SPARSELU_PANEL_BMOD_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_panel_dfs.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU \n \n * -- SuperLU routine (version 2.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * November 15, 1997\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_PANEL_DFS_H\n#define SPARSELU_PANEL_DFS_H\n\nnamespace Eigen {\n\nnamespace internal {\n  \ntemplate<typename IndexVector>\nstruct panel_dfs_traits\n{\n  typedef typename IndexVector::Scalar StorageIndex;\n  panel_dfs_traits(Index jcol, StorageIndex* marker)\n    : m_jcol(jcol), m_marker(marker)\n  {}\n  bool update_segrep(Index krep, StorageIndex jj)\n  {\n    if(m_marker[krep]<m_jcol)\n    {\n      m_marker[krep] = jj; \n      return true;\n    }\n    return false;\n  }\n  void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}\n  enum { ExpandMem = false };\n  Index m_jcol;\n  StorageIndex* m_marker;\n};\n\n\ntemplate <typename Scalar, typename StorageIndex>\ntemplate <typename Traits>\nvoid SparseLUImpl<Scalar,StorageIndex>::dfs_kernel(const StorageIndex jj, IndexVector& perm_r,\n                   Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,\n                   Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,\n                   IndexVector& xplore, GlobalLU_t& glu,\n                   Index& nextl_col, Index krow, Traits& traits\n                  )\n{\n  \n  StorageIndex kmark = marker(krow);\n      \n  // For each unmarked krow of jj\n  marker(krow) = jj; \n  StorageIndex kperm = perm_r(krow); \n  if (kperm == emptyIdxLU ) {\n    // krow is in L : place it in structure of L(*, jj)\n    panel_lsub(nextl_col++) = StorageIndex(krow);  // krow is indexed into A\n    \n    traits.mem_expand(panel_lsub, nextl_col, kmark);\n  }\n  else \n  {\n    // krow is in U : if its supernode-representative krep\n    // has been explored, update repfnz(*)\n    // krep = supernode representative of the current row\n    StorageIndex krep = glu.xsup(glu.supno(kperm)+1) - 1; \n    // First nonzero element in the current column:\n    StorageIndex myfnz = repfnz_col(krep); \n    \n    if (myfnz != emptyIdxLU )\n    {\n      // Representative visited before\n      if (myfnz > kperm ) repfnz_col(krep) = kperm; \n      \n    }\n    else \n    {\n      // Otherwise, perform dfs starting at krep\n      StorageIndex oldrep = emptyIdxLU; \n      parent(krep) = oldrep; \n      repfnz_col(krep) = kperm; \n      StorageIndex xdfs =  glu.xlsub(krep); \n      Index maxdfs = xprune(krep); \n      \n      StorageIndex kpar;\n      do \n      {\n        // For each unmarked kchild of krep\n        while (xdfs < maxdfs) \n        {\n          StorageIndex kchild = glu.lsub(xdfs); \n          xdfs++; \n          StorageIndex chmark = marker(kchild); \n          \n          if (chmark != jj ) \n          {\n            marker(kchild) = jj; \n            StorageIndex chperm = perm_r(kchild); \n            \n            if (chperm == emptyIdxLU) \n            {\n              // case kchild is in L: place it in L(*, j)\n              panel_lsub(nextl_col++) = kchild;\n              traits.mem_expand(panel_lsub, nextl_col, chmark);\n            }\n            else\n            {\n              // case kchild is in U :\n              // chrep = its supernode-rep. If its rep has been explored, \n              // update its repfnz(*)\n              StorageIndex chrep = glu.xsup(glu.supno(chperm)+1) - 1; \n              myfnz = repfnz_col(chrep); \n              \n              if (myfnz != emptyIdxLU) \n              { // Visited before \n                if (myfnz > chperm) \n                  repfnz_col(chrep) = chperm; \n              }\n              else \n              { // Cont. dfs at snode-rep of kchild\n                xplore(krep) = xdfs; \n                oldrep = krep; \n                krep = chrep; // Go deeper down G(L)\n                parent(krep) = oldrep; \n                repfnz_col(krep) = chperm; \n                xdfs = glu.xlsub(krep); \n                maxdfs = xprune(krep); \n                \n              } // end if myfnz != -1\n            } // end if chperm == -1 \n                \n          } // end if chmark !=jj\n        } // end while xdfs < maxdfs\n        \n        // krow has no more unexplored nbrs :\n        //    Place snode-rep krep in postorder DFS, if this \n        //    segment is seen for the first time. (Note that \n        //    \"repfnz(krep)\" may change later.)\n        //    Baktrack dfs to its parent\n        if(traits.update_segrep(krep,jj))\n        //if (marker1(krep) < jcol )\n        {\n          segrep(nseg) = krep; \n          ++nseg; \n          //marker1(krep) = jj; \n        }\n        \n        kpar = parent(krep); // Pop recursion, mimic recursion \n        if (kpar == emptyIdxLU) \n          break; // dfs done \n        krep = kpar; \n        xdfs = xplore(krep); \n        maxdfs = xprune(krep); \n\n      } while (kpar != emptyIdxLU); // Do until empty stack \n      \n    } // end if (myfnz = -1)\n\n  } // end if (kperm == -1)   \n}\n\n/**\n * \\brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w)\n * \n * A supernode representative is the last column of a supernode.\n * The nonzeros in U[*,j] are segments that end at supernodes representatives\n * \n * The routine returns a list of the supernodal representatives \n * in topological order of the dfs that generates them. This list is \n * a superset of the topological order of each individual column within \n * the panel.\n * The location of the first nonzero in each supernodal segment \n * (supernodal entry location) is also returned. Each column has \n * a separate list for this purpose. \n * \n * Two markers arrays are used for dfs :\n *    marker[i] == jj, if i was visited during dfs of current column jj;\n *    marker1[i] >= jcol, if i was visited by earlier columns in this panel; \n * \n * \\param[in] m number of rows in the matrix\n * \\param[in] w Panel size\n * \\param[in] jcol Starting  column of the panel\n * \\param[in] A Input matrix in column-major storage\n * \\param[in] perm_r Row permutation\n * \\param[out] nseg Number of U segments\n * \\param[out] dense Accumulate the column vectors of the panel\n * \\param[out] panel_lsub Subscripts of the row in the panel \n * \\param[out] segrep Segment representative i.e first nonzero row of each segment\n * \\param[out] repfnz First nonzero location in each row\n * \\param[out] xprune The pruned elimination tree\n * \\param[out] marker work vector\n * \\param  parent The elimination tree\n * \\param xplore work vector\n * \\param glu The global data structure\n * \n */\n\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)\n{\n  Index nextl_col; // Next available position in panel_lsub[*,jj] \n  \n  // Initialize pointers \n  VectorBlock<IndexVector> marker1(marker, m, m); \n  nseg = 0; \n  \n  panel_dfs_traits<IndexVector> traits(jcol, marker1.data());\n  \n  // For each column in the panel \n  for (StorageIndex jj = StorageIndex(jcol); jj < jcol + w; jj++) \n  {\n    nextl_col = (jj - jcol) * m; \n    \n    VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row\n    VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here\n    \n    \n    // For each nnz in A[*, jj] do depth first search\n    for (typename MatrixType::InnerIterator it(A, jj); it; ++it)\n    {\n      Index krow = it.row(); \n      dense_col(krow) = it.value();\n      \n      StorageIndex kmark = marker(krow); \n      if (kmark == jj) \n        continue; // krow visited before, go to the next nonzero\n      \n      dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,\n                   xplore, glu, nextl_col, krow, traits);\n    }// end for nonzeros in column jj\n    \n  } // end for column jj\n}\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // SPARSELU_PANEL_DFS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_pivotL.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of xpivotL.c file in SuperLU \n \n * -- SuperLU routine (version 3.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * October 15, 2003\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_PIVOTL_H\n#define SPARSELU_PIVOTL_H\n\nnamespace Eigen {\nnamespace internal {\n  \n/**\n * \\brief Performs the numerical pivotin on the current column of L, and the CDIV operation.\n * \n * Pivot policy :\n * (1) Compute thresh = u * max_(i>=j) abs(A_ij);\n * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN\n *           pivot row = k;\n *       ELSE IF abs(A_jj) >= thresh THEN\n *           pivot row = j;\n *       ELSE\n *           pivot row = m;\n * \n *   Note: If you absolutely want to use a given pivot order, then set u=0.0.\n * \n * \\param jcol The current column of L\n * \\param diagpivotthresh diagonal pivoting threshold\n * \\param[in,out] perm_r Row permutation (threshold pivoting)\n * \\param[in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc'\n * \\param[out] pivrow  The pivot row\n * \\param glu Global LU data\n * \\return 0 if success, i > 0 if U(i,i) is exactly zero \n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nIndex SparseLUImpl<Scalar,StorageIndex>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)\n{\n  \n  Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol\n  Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0\n  Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion\n  Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode\n  Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension\n  Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode\n  Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode\n  StorageIndex* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode\n  \n  // Determine the largest abs numerical value for partial pivoting \n  Index diagind = iperm_c(jcol); // diagonal index \n  RealScalar pivmax(-1.0);\n  Index pivptr = nsupc; \n  Index diag = emptyIdxLU; \n  RealScalar rtemp;\n  Index isub, icol, itemp, k; \n  for (isub = nsupc; isub < nsupr; ++isub) {\n    using std::abs;\n    rtemp = abs(lu_col_ptr[isub]);\n    if (rtemp > pivmax) {\n      pivmax = rtemp; \n      pivptr = isub;\n    } \n    if (lsub_ptr[isub] == diagind) diag = isub;\n  }\n  \n  // Test for singularity\n  if ( pivmax <= RealScalar(0.0) ) {\n    // if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero\n    pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr];\n    perm_r(pivrow) = StorageIndex(jcol);\n    return (jcol+1);\n  }\n  \n  RealScalar thresh = diagpivotthresh * pivmax; \n  \n  // Choose appropriate pivotal element \n  \n  {\n    // Test if the diagonal element can be used as a pivot (given the threshold value)\n    if (diag >= 0 ) \n    {\n      // Diagonal element exists\n      using std::abs;\n      rtemp = abs(lu_col_ptr[diag]);\n      if (rtemp != RealScalar(0.0) && rtemp >= thresh) pivptr = diag;\n    }\n    pivrow = lsub_ptr[pivptr];\n  }\n  \n  // Record pivot row\n  perm_r(pivrow) = StorageIndex(jcol);\n  // Interchange row subscripts\n  if (pivptr != nsupc )\n  {\n    std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] );\n    // Interchange numerical values as well, for the two rows in the whole snode\n    // such that L is indexed the same way as A\n    for (icol = 0; icol <= nsupc; icol++)\n    {\n      itemp = pivptr + icol * lda; \n      std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]);\n    }\n  }\n  // cdiv operations\n  Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc];\n  for (k = nsupc+1; k < nsupr; k++)\n    lu_col_ptr[k] *= temp; \n  return 0;\n}\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // SPARSELU_PIVOTL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_pruneL.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* \n \n * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU \n \n * -- SuperLU routine (version 2.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * November 15, 1997\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n#ifndef SPARSELU_PRUNEL_H\n#define SPARSELU_PRUNEL_H\n\nnamespace Eigen {\nnamespace internal {\n\n/**\n * \\brief Prunes the L-structure.\n *\n * It prunes the L-structure  of supernodes whose L-structure contains the current pivot row \"pivrow\"\n * \n * \n * \\param jcol The current column of L\n * \\param[in] perm_r Row permutation\n * \\param[out] pivrow  The pivot row\n * \\param nseg Number of segments\n * \\param segrep \n * \\param repfnz\n * \\param[out] xprune \n * \\param glu Global LU data\n * \n */\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg,\n                                               const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)\n{\n  // For each supernode-rep irep in U(*,j]\n  Index jsupno = glu.supno(jcol); \n  Index i,irep,irep1; \n  bool movnum, do_prune = false; \n  Index kmin = 0, kmax = 0, minloc, maxloc,krow; \n  for (i = 0; i < nseg; i++)\n  {\n    irep = segrep(i); \n    irep1 = irep + 1; \n    do_prune = false; \n    \n    // Don't prune with a zero U-segment \n    if (repfnz(irep) == emptyIdxLU) continue; \n    \n    // If a snode overlaps with the next panel, then the U-segment\n    // is fragmented into two parts -- irep and irep1. We should let \n    // pruning occur at the rep-column in irep1s snode. \n    if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune \n    \n    // If it has not been pruned & it has a nonz in row L(pivrow,i)\n    if (glu.supno(irep) != jsupno )\n    {\n      if ( xprune (irep) >= glu.xlsub(irep1) )\n      {\n        kmin = glu.xlsub(irep);\n        kmax = glu.xlsub(irep1) - 1; \n        for (krow = kmin; krow <= kmax; krow++)\n        {\n          if (glu.lsub(krow) == pivrow) \n          {\n            do_prune = true; \n            break; \n          }\n        }\n      }\n      \n      if (do_prune) \n      {\n        // do a quicksort-type partition\n        // movnum=true means that the num values have to be exchanged\n        movnum = false; \n        if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1 \n          movnum = true; \n        \n        while (kmin <= kmax)\n        {\n          if (perm_r(glu.lsub(kmax)) == emptyIdxLU)\n            kmax--; \n          else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)\n            kmin++;\n          else \n          {\n            // kmin below pivrow (not yet pivoted), and kmax\n            // above pivrow: interchange the two suscripts\n            std::swap(glu.lsub(kmin), glu.lsub(kmax)); \n            \n            // If the supernode has only one column, then we \n            // only keep one set of subscripts. For any subscript\n            // intercnahge performed, similar interchange must be \n            // done on the numerical values. \n            if (movnum) \n            {\n              minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) ); \n              maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) ); \n              std::swap(glu.lusup(minloc), glu.lusup(maxloc)); \n            }\n            kmin++;\n            kmax--;\n          }\n        } // end while \n        \n        xprune(irep) = StorageIndex(kmin);  //Pruning \n      } // end if do_prune \n    } // end pruning \n  } // End for each U-segment\n}\n\n} // end namespace internal\n} // end namespace Eigen\n\n#endif // SPARSELU_PRUNEL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseLU/SparseLU_relax_snode.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n/* This file is a modified version of heap_relax_snode.c file in SuperLU\n * -- SuperLU routine (version 3.0) --\n * Univ. of California Berkeley, Xerox Palo Alto Research Center,\n * and Lawrence Berkeley National Lab.\n * October 15, 2003\n *\n * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n *\n * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n *\n * Permission is hereby granted to use or copy this program for any\n * purpose, provided the above notices are retained on all copies.\n * Permission to modify the code and to distribute modified code is\n * granted, provided the above notices are retained, and a notice that\n * the code was modified is included with the above copyright notice.\n */\n\n#ifndef SPARSELU_RELAX_SNODE_H\n#define SPARSELU_RELAX_SNODE_H\n\nnamespace Eigen {\n\nnamespace internal {\n \n/** \n * \\brief Identify the initial relaxed supernodes\n * \n * This routine is applied to a column elimination tree. \n * It assumes that the matrix has been reordered according to the postorder of the etree\n * \\param n  the number of columns\n * \\param et elimination tree \n * \\param relax_columns Maximum number of columns allowed in a relaxed snode \n * \\param descendants Number of descendants of each node in the etree\n * \\param relax_end last column in a supernode\n */\ntemplate <typename Scalar, typename StorageIndex>\nvoid SparseLUImpl<Scalar,StorageIndex>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)\n{\n  \n  // compute the number of descendants of each node in the etree\n  Index parent; \n  relax_end.setConstant(emptyIdxLU);\n  descendants.setZero();\n  for (Index j = 0; j < n; j++) \n  {\n    parent = et(j);\n    if (parent != n) // not the dummy root\n      descendants(parent) += descendants(j) + 1;\n  }\n  // Identify the relaxed supernodes by postorder traversal of the etree\n  Index snode_start; // beginning of a snode \n  for (Index j = 0; j < n; )\n  {\n    parent = et(j);\n    snode_start = j; \n    while ( parent != n && descendants(parent) < relax_columns ) \n    {\n      j = parent; \n      parent = et(j);\n    }\n    // Found a supernode in postordered etree, j is the last column \n    relax_end(snode_start) = StorageIndex(j); // Record last column\n    j++;\n    // Search for a new leaf\n    while (descendants(j) != 0 && j < n) j++;\n  } // End postorder traversal of the etree\n  \n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SparseQR/SparseQR.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2012-2013 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>\n// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SPARSE_QR_H\n#define EIGEN_SPARSE_QR_H\n\nnamespace Eigen {\n\ntemplate<typename MatrixType, typename OrderingType> class SparseQR;\ntemplate<typename SparseQRType> struct SparseQRMatrixQReturnType;\ntemplate<typename SparseQRType> struct SparseQRMatrixQTransposeReturnType;\ntemplate<typename SparseQRType, typename Derived> struct SparseQR_QProduct;\nnamespace internal {\n  template <typename SparseQRType> struct traits<SparseQRMatrixQReturnType<SparseQRType> >\n  {\n    typedef typename SparseQRType::MatrixType ReturnType;\n    typedef typename ReturnType::StorageIndex StorageIndex;\n    typedef typename ReturnType::StorageKind StorageKind;\n    enum {\n      RowsAtCompileTime = Dynamic,\n      ColsAtCompileTime = Dynamic\n    };\n  };\n  template <typename SparseQRType> struct traits<SparseQRMatrixQTransposeReturnType<SparseQRType> >\n  {\n    typedef typename SparseQRType::MatrixType ReturnType;\n  };\n  template <typename SparseQRType, typename Derived> struct traits<SparseQR_QProduct<SparseQRType, Derived> >\n  {\n    typedef typename Derived::PlainObject ReturnType;\n  };\n} // End namespace internal\n\n/**\n  * \\ingroup SparseQR_Module\n  * \\class SparseQR\n  * \\brief Sparse left-looking QR factorization with numerical column pivoting\n  * \n  * This class implements a left-looking QR decomposition of sparse matrices\n  * with numerical column pivoting.\n  * When a column has a norm less than a given tolerance\n  * it is implicitly permuted to the end. The QR factorization thus obtained is \n  * given by A*P = Q*R where R is upper triangular or trapezoidal. \n  * \n  * P is the column permutation which is the product of the fill-reducing and the\n  * numerical permutations. Use colsPermutation() to get it.\n  * \n  * Q is the orthogonal matrix represented as products of Householder reflectors. \n  * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.\n  * You can then apply it to a vector.\n  * \n  * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.\n  * matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank.\n  * \n  * \\tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>\n  * \\tparam _OrderingType The fill-reducing ordering method. See the \\link OrderingMethods_Module \n  *  OrderingMethods \\endlink module for the list of built-in and external ordering methods.\n  * \n  * \\implsparsesolverconcept\n  *\n  * The numerical pivoting strategy and default threshold are the same as in SuiteSparse QR, and\n  * detailed in the following paper:\n  * <i>\n  * Tim Davis, \"Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing\n  * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011.\n  * </i>\n  * Even though it is qualified as \"rank-revealing\", this strategy might fail for some \n  * rank deficient problems. When this class is used to solve linear or least-square problems\n  * it is thus strongly recommended to check the accuracy of the computed solution. If it\n  * failed, it usually helps to increase the threshold with setPivotThreshold.\n  * \n  * \\warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).\n  * \\warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.\n  * \n  */\ntemplate<typename _MatrixType, typename _OrderingType>\nclass SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >\n{\n  protected:\n    typedef SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > Base;\n    using Base::m_isInitialized;\n  public:\n    using Base::_solve_impl;\n    typedef _MatrixType MatrixType;\n    typedef _OrderingType OrderingType;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> QRMatrixType;\n    typedef Matrix<StorageIndex, Dynamic, 1> IndexVector;\n    typedef Matrix<Scalar, Dynamic, 1> ScalarVector;\n    typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;\n\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n    \n  public:\n    SparseQR () :  m_analysisIsok(false), m_lastError(\"\"), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)\n    { }\n    \n    /** Construct a QR factorization of the matrix \\a mat.\n      * \n      * \\warning The matrix \\a mat must be in compressed mode (see SparseMatrix::makeCompressed()).\n      * \n      * \\sa compute()\n      */\n    explicit SparseQR(const MatrixType& mat) : m_analysisIsok(false), m_lastError(\"\"), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)\n    {\n      compute(mat);\n    }\n    \n    /** Computes the QR factorization of the sparse matrix \\a mat.\n      * \n      * \\warning The matrix \\a mat must be in compressed mode (see SparseMatrix::makeCompressed()).\n      * \n      * \\sa analyzePattern(), factorize()\n      */\n    void compute(const MatrixType& mat)\n    {\n      analyzePattern(mat);\n      factorize(mat);\n    }\n    void analyzePattern(const MatrixType& mat);\n    void factorize(const MatrixType& mat);\n    \n    /** \\returns the number of rows of the represented matrix. \n      */\n    inline Index rows() const { return m_pmat.rows(); }\n    \n    /** \\returns the number of columns of the represented matrix. \n      */\n    inline Index cols() const { return m_pmat.cols();}\n    \n    /** \\returns a const reference to the \\b sparse upper triangular matrix R of the QR factorization.\n      * \\warning The entries of the returned matrix are not sorted. This means that using it in algorithms\n      *          expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()),\n      *          and coefficient-wise operations. Matrix products and triangular solves are fine though.\n      *\n      * To sort the entries, you can assign it to a row-major matrix, and if a column-major matrix\n      * is required, you can copy it again:\n      * \\code\n      * SparseMatrix<double>          R  = qr.matrixR();  // column-major, not sorted!\n      * SparseMatrix<double,RowMajor> Rr = qr.matrixR();  // row-major, sorted\n      * SparseMatrix<double>          Rc = Rr;            // column-major, sorted\n      * \\endcode\n      */\n    const QRMatrixType& matrixR() const { return m_R; }\n    \n    /** \\returns the number of non linearly dependent columns as determined by the pivoting threshold.\n      *\n      * \\sa setPivotThreshold()\n      */\n    Index rank() const\n    {\n      eigen_assert(m_isInitialized && \"The factorization should be called first, use compute()\");\n      return m_nonzeropivots; \n    }\n    \n    /** \\returns an expression of the matrix Q as products of sparse Householder reflectors.\n    * The common usage of this function is to apply it to a dense matrix or vector\n    * \\code\n    * VectorXd B1, B2;\n    * // Initialize B1\n    * B2 = matrixQ() * B1;\n    * \\endcode\n    *\n    * To get a plain SparseMatrix representation of Q:\n    * \\code\n    * SparseMatrix<double> Q;\n    * Q = SparseQR<SparseMatrix<double> >(A).matrixQ();\n    * \\endcode\n    * Internally, this call simply performs a sparse product between the matrix Q\n    * and a sparse identity matrix. However, due to the fact that the sparse\n    * reflectors are stored unsorted, two transpositions are needed to sort\n    * them before performing the product.\n    */\n    SparseQRMatrixQReturnType<SparseQR> matrixQ() const \n    { return SparseQRMatrixQReturnType<SparseQR>(*this); }\n    \n    /** \\returns a const reference to the column permutation P that was applied to A such that A*P = Q*R\n      * It is the combination of the fill-in reducing permutation and numerical column pivoting.\n      */\n    const PermutationType& colsPermutation() const\n    { \n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_outputPerm_c;\n    }\n    \n    /** \\returns A string describing the type of error.\n      * This method is provided to ease debugging, not to handle errors.\n      */\n    std::string lastErrorMessage() const { return m_lastError; }\n    \n    /** \\internal */\n    template<typename Rhs, typename Dest>\n    bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const\n    {\n      eigen_assert(m_isInitialized && \"The factorization should be called first, use compute()\");\n      eigen_assert(this->rows() == B.rows() && \"SparseQR::solve() : invalid number of rows in the right hand side matrix\");\n\n      Index rank = this->rank();\n      \n      // Compute Q^* * b;\n      typename Dest::PlainObject y, b;\n      y = this->matrixQ().adjoint() * B;\n      b = y;\n      \n      // Solve with the triangular matrix R\n      y.resize((std::max<Index>)(cols(),y.rows()),y.cols());\n      y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));\n      y.bottomRows(y.rows()-rank).setZero();\n      \n      // Apply the column permutation\n      if (m_perm_c.size())  dest = colsPermutation() * y.topRows(cols());\n      else                  dest = y.topRows(cols());\n      \n      m_info = Success;\n      return true;\n    }\n\n    /** Sets the threshold that is used to determine linearly dependent columns during the factorization.\n      *\n      * In practice, if during the factorization the norm of the column that has to be eliminated is below\n      * this threshold, then the entire column is treated as zero, and it is moved at the end.\n      */\n    void setPivotThreshold(const RealScalar& threshold)\n    {\n      m_useDefaultThreshold = false;\n      m_threshold = threshold;\n    }\n    \n    /** \\returns the solution X of \\f$ A X = B \\f$ using the current decomposition of A.\n      *\n      * \\sa compute()\n      */\n    template<typename Rhs>\n    inline const Solve<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const \n    {\n      eigen_assert(m_isInitialized && \"The factorization should be called first, use compute()\");\n      eigen_assert(this->rows() == B.rows() && \"SparseQR::solve() : invalid number of rows in the right hand side matrix\");\n      return Solve<SparseQR, Rhs>(*this, B.derived());\n    }\n    template<typename Rhs>\n    inline const Solve<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const\n    {\n          eigen_assert(m_isInitialized && \"The factorization should be called first, use compute()\");\n          eigen_assert(this->rows() == B.rows() && \"SparseQR::solve() : invalid number of rows in the right hand side matrix\");\n          return Solve<SparseQR, Rhs>(*this, B.derived());\n    }\n    \n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the QR factorization reports a numerical problem\n      *          \\c InvalidInput if the input matrix is invalid\n      *\n      * \\sa iparm()          \n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n\n\n    /** \\internal */\n    inline void _sort_matrix_Q()\n    {\n      if(this->m_isQSorted) return;\n      // The matrix Q is sorted during the transposition\n      SparseMatrix<Scalar, RowMajor, Index> mQrm(this->m_Q);\n      this->m_Q = mQrm;\n      this->m_isQSorted = true;\n    }\n\n    \n  protected:\n    bool m_analysisIsok;\n    bool m_factorizationIsok;\n    mutable ComputationInfo m_info;\n    std::string m_lastError;\n    QRMatrixType m_pmat;            // Temporary matrix\n    QRMatrixType m_R;               // The triangular factor matrix\n    QRMatrixType m_Q;               // The orthogonal reflectors\n    ScalarVector m_hcoeffs;         // The Householder coefficients\n    PermutationType m_perm_c;       // Fill-reducing  Column  permutation\n    PermutationType m_pivotperm;    // The permutation for rank revealing\n    PermutationType m_outputPerm_c; // The final column permutation\n    RealScalar m_threshold;         // Threshold to determine null Householder reflections\n    bool m_useDefaultThreshold;     // Use default threshold\n    Index m_nonzeropivots;          // Number of non zero pivots found\n    IndexVector m_etree;            // Column elimination tree\n    IndexVector m_firstRowElt;      // First element in each row\n    bool m_isQSorted;               // whether Q is sorted or not\n    bool m_isEtreeOk;               // whether the elimination tree match the initial input matrix\n    \n    template <typename, typename > friend struct SparseQR_QProduct;\n    \n};\n\n/** \\brief Preprocessing step of a QR factorization \n  * \n  * \\warning The matrix \\a mat must be in compressed mode (see SparseMatrix::makeCompressed()).\n  * \n  * In this step, the fill-reducing permutation is computed and applied to the columns of A\n  * and the column elimination tree is computed as well. Only the sparsity pattern of \\a mat is exploited.\n  * \n  * \\note In this step it is assumed that there is no empty row in the matrix \\a mat.\n  */\ntemplate <typename MatrixType, typename OrderingType>\nvoid SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)\n{\n  eigen_assert(mat.isCompressed() && \"SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR\");\n  // Copy to a column major matrix if the input is rowmajor\n  typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat);\n  // Compute the column fill reducing ordering\n  OrderingType ord; \n  ord(matCpy, m_perm_c); \n  Index n = mat.cols();\n  Index m = mat.rows();\n  Index diagSize = (std::min)(m,n);\n  \n  if (!m_perm_c.size())\n  {\n    m_perm_c.resize(n);\n    m_perm_c.indices().setLinSpaced(n, 0,StorageIndex(n-1));\n  }\n  \n  // Compute the column elimination tree of the permuted matrix\n  m_outputPerm_c = m_perm_c.inverse();\n  internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());\n  m_isEtreeOk = true;\n  \n  m_R.resize(m, n);\n  m_Q.resize(m, diagSize);\n  \n  // Allocate space for nonzero elements: rough estimation\n  m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree\n  m_Q.reserve(2*mat.nonZeros());\n  m_hcoeffs.resize(diagSize);\n  m_analysisIsok = true;\n}\n\n/** \\brief Performs the numerical QR factorization of the input matrix\n  * \n  * The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with\n  * a matrix having the same sparsity pattern than \\a mat.\n  * \n  * \\param mat The sparse column-major matrix\n  */\ntemplate <typename MatrixType, typename OrderingType>\nvoid SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)\n{\n  using std::abs;\n  \n  eigen_assert(m_analysisIsok && \"analyzePattern() should be called before this step\");\n  StorageIndex m = StorageIndex(mat.rows());\n  StorageIndex n = StorageIndex(mat.cols());\n  StorageIndex diagSize = (std::min)(m,n);\n  IndexVector mark((std::max)(m,n)); mark.setConstant(-1);  // Record the visited nodes\n  IndexVector Ridx(n), Qidx(m);                             // Store temporarily the row indexes for the current column of R and Q\n  Index nzcolR, nzcolQ;                                     // Number of nonzero for the current column of R and Q\n  ScalarVector tval(m);                                     // The dense vector used to compute the current column\n  RealScalar pivotThreshold = m_threshold;\n  \n  m_R.setZero();\n  m_Q.setZero();\n  m_pmat = mat;\n  if(!m_isEtreeOk)\n  {\n    m_outputPerm_c = m_perm_c.inverse();\n    internal::coletree(m_pmat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());\n    m_isEtreeOk = true;\n  }\n\n  m_pmat.uncompress(); // To have the innerNonZeroPtr allocated\n  \n  // Apply the fill-in reducing permutation lazily:\n  {\n    // If the input is row major, copy the original column indices,\n    // otherwise directly use the input matrix\n    // \n    IndexVector originalOuterIndicesCpy;\n    const StorageIndex *originalOuterIndices = mat.outerIndexPtr();\n    if(MatrixType::IsRowMajor)\n    {\n      originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1);\n      originalOuterIndices = originalOuterIndicesCpy.data();\n    }\n    \n    for (int i = 0; i < n; i++)\n    {\n      Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;\n      m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; \n      m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; \n    }\n  }\n  \n  /* Compute the default threshold as in MatLab, see:\n   * Tim Davis, \"Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing\n   * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 \n   */\n  if(m_useDefaultThreshold) \n  {\n    RealScalar max2Norm = 0.0;\n    for (int j = 0; j < n; j++) max2Norm = numext::maxi(max2Norm, m_pmat.col(j).norm());\n    if(max2Norm==RealScalar(0))\n      max2Norm = RealScalar(1);\n    pivotThreshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();\n  }\n  \n  // Initialize the numerical permutation\n  m_pivotperm.setIdentity(n);\n  \n  StorageIndex nonzeroCol = 0; // Record the number of valid pivots\n  m_Q.startVec(0);\n\n  // Left looking rank-revealing QR factorization: compute a column of R and Q at a time\n  for (StorageIndex col = 0; col < n; ++col)\n  {\n    mark.setConstant(-1);\n    m_R.startVec(col);\n    mark(nonzeroCol) = col;\n    Qidx(0) = nonzeroCol;\n    nzcolR = 0; nzcolQ = 1;\n    bool found_diag = nonzeroCol>=m;\n    tval.setZero(); \n    \n    // Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e.,\n    // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.\n    // Note: if the diagonal entry does not exist, then its contribution must be explicitly added,\n    // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found.\n    for (typename QRMatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)\n    {\n      StorageIndex curIdx = nonzeroCol;\n      if(itp) curIdx = StorageIndex(itp.row());\n      if(curIdx == nonzeroCol) found_diag = true;\n      \n      // Get the nonzeros indexes of the current column of R\n      StorageIndex st = m_firstRowElt(curIdx); // The traversal of the etree starts here\n      if (st < 0 )\n      {\n        m_lastError = \"Empty row found during numerical factorization\";\n        m_info = InvalidInput;\n        return;\n      }\n\n      // Traverse the etree \n      Index bi = nzcolR;\n      for (; mark(st) != col; st = m_etree(st))\n      {\n        Ridx(nzcolR) = st;  // Add this row to the list,\n        mark(st) = col;     // and mark this row as visited\n        nzcolR++;\n      }\n\n      // Reverse the list to get the topological ordering\n      Index nt = nzcolR-bi;\n      for(Index i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1));\n       \n      // Copy the current (curIdx,pcol) value of the input matrix\n      if(itp) tval(curIdx) = itp.value();\n      else    tval(curIdx) = Scalar(0);\n      \n      // Compute the pattern of Q(:,k)\n      if(curIdx > nonzeroCol && mark(curIdx) != col ) \n      {\n        Qidx(nzcolQ) = curIdx;  // Add this row to the pattern of Q,\n        mark(curIdx) = col;     // and mark it as visited\n        nzcolQ++;\n      }\n    }\n\n    // Browse all the indexes of R(:,col) in reverse order\n    for (Index i = nzcolR-1; i >= 0; i--)\n    {\n      Index curIdx = Ridx(i);\n      \n      // Apply the curIdx-th householder vector to the current column (temporarily stored into tval)\n      Scalar tdot(0);\n      \n      // First compute q' * tval\n      tdot = m_Q.col(curIdx).dot(tval);\n\n      tdot *= m_hcoeffs(curIdx);\n      \n      // Then update tval = tval - q * tau\n      // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient \"dense ?= sparse\")\n      for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)\n        tval(itq.row()) -= itq.value() * tdot;\n\n      // Detect fill-in for the current column of Q\n      if(m_etree(Ridx(i)) == nonzeroCol)\n      {\n        for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)\n        {\n          StorageIndex iQ = StorageIndex(itq.row());\n          if (mark(iQ) != col)\n          {\n            Qidx(nzcolQ++) = iQ;  // Add this row to the pattern of Q,\n            mark(iQ) = col;       // and mark it as visited\n          }\n        }\n      }\n    } // End update current column\n    \n    Scalar tau = RealScalar(0);\n    RealScalar beta = 0;\n    \n    if(nonzeroCol < diagSize)\n    {\n      // Compute the Householder reflection that eliminate the current column\n      // FIXME this step should call the Householder module.\n      Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);\n      \n      // First, the squared norm of Q((col+1):m, col)\n      RealScalar sqrNorm = 0.;\n      for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq)));\n      if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0))\n      {\n        beta = numext::real(c0);\n        tval(Qidx(0)) = 1;\n      }\n      else\n      {\n        using std::sqrt;\n        beta = sqrt(numext::abs2(c0) + sqrNorm);\n        if(numext::real(c0) >= RealScalar(0))\n          beta = -beta;\n        tval(Qidx(0)) = 1;\n        for (Index itq = 1; itq < nzcolQ; ++itq)\n          tval(Qidx(itq)) /= (c0 - beta);\n        tau = numext::conj((beta-c0) / beta);\n          \n      }\n    }\n\n    // Insert values in R\n    for (Index  i = nzcolR-1; i >= 0; i--)\n    {\n      Index curIdx = Ridx(i);\n      if(curIdx < nonzeroCol) \n      {\n        m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx);\n        tval(curIdx) = Scalar(0.);\n      }\n    }\n\n    if(nonzeroCol < diagSize && abs(beta) >= pivotThreshold)\n    {\n      m_R.insertBackByOuterInner(col, nonzeroCol) = beta;\n      // The householder coefficient\n      m_hcoeffs(nonzeroCol) = tau;\n      // Record the householder reflections\n      for (Index itq = 0; itq < nzcolQ; ++itq)\n      {\n        Index iQ = Qidx(itq);\n        m_Q.insertBackByOuterInnerUnordered(nonzeroCol,iQ) = tval(iQ);\n        tval(iQ) = Scalar(0.);\n      }\n      nonzeroCol++;\n      if(nonzeroCol<diagSize)\n        m_Q.startVec(nonzeroCol);\n    }\n    else\n    {\n      // Zero pivot found: move implicitly this column to the end\n      for (Index j = nonzeroCol; j < n-1; j++) \n        std::swap(m_pivotperm.indices()(j), m_pivotperm.indices()[j+1]);\n      \n      // Recompute the column elimination tree\n      internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data());\n      m_isEtreeOk = false;\n    }\n  }\n  \n  m_hcoeffs.tail(diagSize-nonzeroCol).setZero();\n  \n  // Finalize the column pointers of the sparse matrices R and Q\n  m_Q.finalize();\n  m_Q.makeCompressed();\n  m_R.finalize();\n  m_R.makeCompressed();\n  m_isQSorted = false;\n\n  m_nonzeropivots = nonzeroCol;\n  \n  if(nonzeroCol<n)\n  {\n    // Permute the triangular factor to put the 'dead' columns to the end\n    QRMatrixType tempR(m_R);\n    m_R = tempR * m_pivotperm;\n    \n    // Update the column permutation\n    m_outputPerm_c = m_outputPerm_c * m_pivotperm;\n  }\n  \n  m_isInitialized = true; \n  m_factorizationIsok = true;\n  m_info = Success;\n}\n\ntemplate <typename SparseQRType, typename Derived>\nstruct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived> >\n{\n  typedef typename SparseQRType::QRMatrixType MatrixType;\n  typedef typename SparseQRType::Scalar Scalar;\n  // Get the references \n  SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : \n  m_qr(qr),m_other(other),m_transpose(transpose) {}\n  inline Index rows() const { return m_qr.matrixQ().rows(); }\n  inline Index cols() const { return m_other.cols(); }\n  \n  // Assign to a vector\n  template<typename DesType>\n  void evalTo(DesType& res) const\n  {\n    Index m = m_qr.rows();\n    Index n = m_qr.cols();\n    Index diagSize = (std::min)(m,n);\n    res = m_other;\n    if (m_transpose)\n    {\n      eigen_assert(m_qr.m_Q.rows() == m_other.rows() && \"Non conforming object sizes\");\n      //Compute res = Q' * other column by column\n      for(Index j = 0; j < res.cols(); j++){\n        for (Index k = 0; k < diagSize; k++)\n        {\n          Scalar tau = Scalar(0);\n          tau = m_qr.m_Q.col(k).dot(res.col(j));\n          if(tau==Scalar(0)) continue;\n          tau = tau * m_qr.m_hcoeffs(k);\n          res.col(j) -= tau * m_qr.m_Q.col(k);\n        }\n      }\n    }\n    else\n    {\n      eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && \"Non conforming object sizes\");\n\n      res.conservativeResize(rows(), cols());\n\n      // Compute res = Q * other column by column\n      for(Index j = 0; j < res.cols(); j++)\n      {\n        Index start_k = internal::is_identity<Derived>::value ? numext::mini(j,diagSize-1) : diagSize-1;\n        for (Index k = start_k; k >=0; k--)\n        {\n          Scalar tau = Scalar(0);\n          tau = m_qr.m_Q.col(k).dot(res.col(j));\n          if(tau==Scalar(0)) continue;\n          tau = tau * numext::conj(m_qr.m_hcoeffs(k));\n          res.col(j) -= tau * m_qr.m_Q.col(k);\n        }\n      }\n    }\n  }\n  \n  const SparseQRType& m_qr;\n  const Derived& m_other;\n  bool m_transpose; // TODO this actually means adjoint\n};\n\ntemplate<typename SparseQRType>\nstruct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<SparseQRType> >\n{  \n  typedef typename SparseQRType::Scalar Scalar;\n  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;\n  enum {\n    RowsAtCompileTime = Dynamic,\n    ColsAtCompileTime = Dynamic\n  };\n  explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {}\n  template<typename Derived>\n  SparseQR_QProduct<SparseQRType, Derived> operator*(const MatrixBase<Derived>& other)\n  {\n    return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false);\n  }\n  // To use for operations with the adjoint of Q\n  SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const\n  {\n    return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);\n  }\n  inline Index rows() const { return m_qr.rows(); }\n  inline Index cols() const { return m_qr.rows(); }\n  // To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment\n  SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const\n  {\n    return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);\n  }\n  const SparseQRType& m_qr;\n};\n\n// TODO this actually represents the adjoint of Q\ntemplate<typename SparseQRType>\nstruct SparseQRMatrixQTransposeReturnType\n{\n  explicit SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {}\n  template<typename Derived>\n  SparseQR_QProduct<SparseQRType,Derived> operator*(const MatrixBase<Derived>& other)\n  {\n    return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(), true);\n  }\n  const SparseQRType& m_qr;\n};\n\nnamespace internal {\n  \ntemplate<typename SparseQRType>\nstruct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >\n{\n  typedef typename SparseQRType::MatrixType MatrixType;\n  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;\n  typedef SparseShape Shape;\n};\n\ntemplate< typename DstXprType, typename SparseQRType>\nstruct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>, Sparse2Sparse>\n{\n  typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;\n  typedef typename DstXprType::Scalar Scalar;\n  typedef typename DstXprType::StorageIndex StorageIndex;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)\n  {\n    typename DstXprType::PlainObject idMat(src.rows(), src.cols());\n    idMat.setIdentity();\n    // Sort the sparse householder reflectors if needed\n    const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();\n    dst = SparseQR_QProduct<SparseQRType, DstXprType>(src.m_qr, idMat, false);\n  }\n};\n\ntemplate< typename DstXprType, typename SparseQRType>\nstruct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>, Sparse2Dense>\n{\n  typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;\n  typedef typename DstXprType::Scalar Scalar;\n  typedef typename DstXprType::StorageIndex StorageIndex;\n  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)\n  {\n    dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows());\n  }\n};\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/StlSupport/StdDeque.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STDDEQUE_H\n#define EIGEN_STDDEQUE_H\n\n#include \"details.h\"\n\n/**\n * This section contains a convenience MACRO which allows an easy specialization of\n * std::deque such that for data types with alignment issues the correct allocator\n * is used automatically.\n */\n#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \\\nnamespace std \\\n{ \\\n  template<> \\\n  class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> >           \\\n    : public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \\\n  { \\\n    typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \\\n  public: \\\n    typedef __VA_ARGS__ value_type; \\\n    typedef deque_base::allocator_type allocator_type; \\\n    typedef deque_base::size_type size_type;  \\\n    typedef deque_base::iterator iterator;  \\\n    explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {}  \\\n    template<typename InputIterator> \\\n    deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \\\n    deque(const deque& c) : deque_base(c) {}  \\\n    explicit deque(size_type num, const value_type& val = value_type()) : deque_base(num, val) {} \\\n    deque(iterator start_, iterator end_) : deque_base(start_, end_) {}  \\\n    deque& operator=(const deque& x) {  \\\n      deque_base::operator=(x);  \\\n      return *this;  \\\n    } \\\n  }; \\\n}\n\n// check whether we really need the std::deque specialization\n#if !EIGEN_HAS_CXX11_CONTAINERS && !(defined(_GLIBCXX_DEQUE) && (!EIGEN_GNUC_AT_LEAST(4,1))) /* Note that before gcc-4.1 we already have: std::deque::resize(size_type,const T&). */\n\nnamespace std {\n\n#define EIGEN_STD_DEQUE_SPECIALIZATION_BODY \\\n  public:  \\\n    typedef T value_type; \\\n    typedef typename deque_base::allocator_type allocator_type; \\\n    typedef typename deque_base::size_type size_type;  \\\n    typedef typename deque_base::iterator iterator;  \\\n    typedef typename deque_base::const_iterator const_iterator;  \\\n    explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {}  \\\n    template<typename InputIterator> \\\n    deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \\\n    : deque_base(first, last, a) {} \\\n    deque(const deque& c) : deque_base(c) {}  \\\n    explicit deque(size_type num, const value_type& val = value_type()) : deque_base(num, val) {} \\\n    deque(iterator start_, iterator end_) : deque_base(start_, end_) {}  \\\n    deque& operator=(const deque& x) {  \\\n      deque_base::operator=(x);  \\\n      return *this;  \\\n    }\n\n  template<typename T>\n  class deque<T,EIGEN_ALIGNED_ALLOCATOR<T> >\n    : public deque<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),\n                   Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> >\n{\n  typedef deque<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),\n                Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> > deque_base;\n  EIGEN_STD_DEQUE_SPECIALIZATION_BODY\n\n  void resize(size_type new_size)\n  { resize(new_size, T()); }\n\n#if defined(_DEQUE_)\n  // workaround MSVC std::deque implementation\n  void resize(size_type new_size, const value_type& x)\n  {\n    if (deque_base::size() < new_size)\n      deque_base::_Insert_n(deque_base::end(), new_size - deque_base::size(), x);\n    else if (new_size < deque_base::size())\n      deque_base::erase(deque_base::begin() + new_size, deque_base::end());\n  }\n  void push_back(const value_type& x)\n  { deque_base::push_back(x); } \n  void push_front(const value_type& x)\n  { deque_base::push_front(x); }\n  using deque_base::insert;  \n  iterator insert(const_iterator position, const value_type& x)\n  { return deque_base::insert(position,x); }\n  void insert(const_iterator position, size_type new_size, const value_type& x)\n  { deque_base::insert(position, new_size, x); }\n#else\n  // default implementation which should always work.\n  void resize(size_type new_size, const value_type& x)\n  {\n    if (new_size < deque_base::size())\n      deque_base::erase(deque_base::begin() + new_size, deque_base::end());\n    else if (new_size > deque_base::size())\n      deque_base::insert(deque_base::end(), new_size - deque_base::size(), x);\n  }\n#endif\n  };\n}\n\n#endif // check whether specialization is actually required\n\n#endif // EIGEN_STDDEQUE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/StlSupport/StdList.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STDLIST_H\n#define EIGEN_STDLIST_H\n\n#include \"details.h\"\n\n/**\n * This section contains a convenience MACRO which allows an easy specialization of\n * std::list such that for data types with alignment issues the correct allocator\n * is used automatically.\n */\n#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \\\nnamespace std \\\n{ \\\n  template<> \\\n  class list<__VA_ARGS__, std::allocator<__VA_ARGS__> >           \\\n    : public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \\\n  { \\\n    typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \\\n  public: \\\n    typedef __VA_ARGS__ value_type; \\\n    typedef list_base::allocator_type allocator_type; \\\n    typedef list_base::size_type size_type;  \\\n    typedef list_base::iterator iterator;  \\\n    explicit list(const allocator_type& a = allocator_type()) : list_base(a) {}  \\\n    template<typename InputIterator> \\\n    list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \\\n    list(const list& c) : list_base(c) {}  \\\n    explicit list(size_type num, const value_type& val = value_type()) : list_base(num, val) {} \\\n    list(iterator start_, iterator end_) : list_base(start_, end_) {}  \\\n    list& operator=(const list& x) {  \\\n      list_base::operator=(x);  \\\n      return *this;  \\\n    } \\\n  }; \\\n}\n\n// check whether we really need the std::list specialization\n#if !EIGEN_HAS_CXX11_CONTAINERS && !(defined(_GLIBCXX_LIST) && (!EIGEN_GNUC_AT_LEAST(4,1))) /* Note that before gcc-4.1 we already have: std::list::resize(size_type,const T&). */\n\nnamespace std\n{\n\n#define EIGEN_STD_LIST_SPECIALIZATION_BODY \\\n  public:  \\\n    typedef T value_type; \\\n    typedef typename list_base::allocator_type allocator_type; \\\n    typedef typename list_base::size_type size_type;  \\\n    typedef typename list_base::iterator iterator;  \\\n    typedef typename list_base::const_iterator const_iterator;  \\\n    explicit list(const allocator_type& a = allocator_type()) : list_base(a) {}  \\\n    template<typename InputIterator> \\\n    list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \\\n    : list_base(first, last, a) {} \\\n    list(const list& c) : list_base(c) {}  \\\n    explicit list(size_type num, const value_type& val = value_type()) : list_base(num, val) {} \\\n    list(iterator start_, iterator end_) : list_base(start_, end_) {}  \\\n    list& operator=(const list& x) {  \\\n    list_base::operator=(x);  \\\n    return *this; \\\n  }\n\n  template<typename T>\n  class list<T,EIGEN_ALIGNED_ALLOCATOR<T> >\n    : public list<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),\n                  Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> >\n  {\n    typedef list<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),\n                 Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> > list_base;\n    EIGEN_STD_LIST_SPECIALIZATION_BODY\n\n    void resize(size_type new_size)\n    { resize(new_size, T()); }\n\n    void resize(size_type new_size, const value_type& x)\n    {\n      if (list_base::size() < new_size)\n        list_base::insert(list_base::end(), new_size - list_base::size(), x);\n      else\n        while (new_size < list_base::size()) list_base::pop_back();\n    }\n\n#if defined(_LIST_)\n    // workaround MSVC std::list implementation\n    void push_back(const value_type& x)\n    { list_base::push_back(x); } \n    using list_base::insert;  \n    iterator insert(const_iterator position, const value_type& x)\n    { return list_base::insert(position,x); }\n    void insert(const_iterator position, size_type new_size, const value_type& x)\n    { list_base::insert(position, new_size, x); }\n#endif\n  };\n}\n\n#endif // check whether specialization is actually required\n\n#endif // EIGEN_STDLIST_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/StlSupport/StdVector.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STDVECTOR_H\n#define EIGEN_STDVECTOR_H\n\n#include \"details.h\"\n\n/**\n * This section contains a convenience MACRO which allows an easy specialization of\n * std::vector such that for data types with alignment issues the correct allocator\n * is used automatically.\n */\n#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) \\\nnamespace std \\\n{ \\\n  template<> \\\n  class vector<__VA_ARGS__, std::allocator<__VA_ARGS__> >  \\\n    : public vector<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \\\n  { \\\n    typedef vector<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > vector_base; \\\n  public: \\\n    typedef __VA_ARGS__ value_type; \\\n    typedef vector_base::allocator_type allocator_type; \\\n    typedef vector_base::size_type size_type;  \\\n    typedef vector_base::iterator iterator;  \\\n    explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {}  \\\n    template<typename InputIterator> \\\n    vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : vector_base(first, last, a) {} \\\n    vector(const vector& c) : vector_base(c) {}  \\\n    explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \\\n    vector(iterator start_, iterator end_) : vector_base(start_, end_) {}  \\\n    vector& operator=(const vector& x) {  \\\n      vector_base::operator=(x);  \\\n      return *this;  \\\n    } \\\n  }; \\\n}\n\n// Don't specialize if containers are implemented according to C++11\n#if !EIGEN_HAS_CXX11_CONTAINERS\n\nnamespace std {\n\n#define EIGEN_STD_VECTOR_SPECIALIZATION_BODY \\\n  public:  \\\n    typedef T value_type; \\\n    typedef typename vector_base::allocator_type allocator_type; \\\n    typedef typename vector_base::size_type size_type;  \\\n    typedef typename vector_base::iterator iterator;  \\\n    typedef typename vector_base::const_iterator const_iterator;  \\\n    explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {}  \\\n    template<typename InputIterator> \\\n    vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \\\n    : vector_base(first, last, a) {} \\\n    vector(const vector& c) : vector_base(c) {}  \\\n    explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \\\n    vector(iterator start_, iterator end_) : vector_base(start_, end_) {}  \\\n    vector& operator=(const vector& x) {  \\\n      vector_base::operator=(x);  \\\n      return *this;  \\\n    }\n\n  template<typename T>\n  class vector<T,EIGEN_ALIGNED_ALLOCATOR<T> >\n    : public vector<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),\n                    Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> >\n{\n  typedef vector<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T),\n                 Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T)> > vector_base;\n  EIGEN_STD_VECTOR_SPECIALIZATION_BODY\n\n  void resize(size_type new_size)\n  { resize(new_size, T()); }\n\n#if defined(_VECTOR_)\n  // workaround MSVC std::vector implementation\n  void resize(size_type new_size, const value_type& x)\n  {\n    if (vector_base::size() < new_size)\n      vector_base::_Insert_n(vector_base::end(), new_size - vector_base::size(), x);\n    else if (new_size < vector_base::size())\n      vector_base::erase(vector_base::begin() + new_size, vector_base::end());\n  }\n  void push_back(const value_type& x)\n  { vector_base::push_back(x); } \n  using vector_base::insert;  \n  iterator insert(const_iterator position, const value_type& x)\n  { return vector_base::insert(position,x); }\n  void insert(const_iterator position, size_type new_size, const value_type& x)\n  { vector_base::insert(position, new_size, x); }\n#elif defined(_GLIBCXX_VECTOR) && (!(EIGEN_GNUC_AT_LEAST(4,1)))\n  /* Note that before gcc-4.1 we already have: std::vector::resize(size_type,const T&).\n   * However, this specialization is still needed to make the above EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION trick to work. */\n  void resize(size_type new_size, const value_type& x)\n  {\n    vector_base::resize(new_size,x);\n  }\n#elif defined(_GLIBCXX_VECTOR) && EIGEN_GNUC_AT_LEAST(4,2)\n  // workaround GCC std::vector implementation\n  void resize(size_type new_size, const value_type& x)\n  {\n    if (new_size < vector_base::size())\n      vector_base::_M_erase_at_end(this->_M_impl._M_start + new_size);\n    else\n      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);\n  }\n#else\n  // either GCC 4.1 or non-GCC\n  // default implementation which should always work.\n  void resize(size_type new_size, const value_type& x)\n  {\n    if (new_size < vector_base::size())\n      vector_base::erase(vector_base::begin() + new_size, vector_base::end());\n    else if (new_size > vector_base::size())\n      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);\n  }\n#endif\n  };\n}\n#endif // !EIGEN_HAS_CXX11_CONTAINERS\n\n\n#endif // EIGEN_STDVECTOR_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/StlSupport/details.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_STL_DETAILS_H\n#define EIGEN_STL_DETAILS_H\n\n#ifndef EIGEN_ALIGNED_ALLOCATOR\n  #define EIGEN_ALIGNED_ALLOCATOR Eigen::aligned_allocator\n#endif\n\nnamespace Eigen {\n\n  // This one is needed to prevent reimplementing the whole std::vector.\n  template <class T>\n  class aligned_allocator_indirection : public EIGEN_ALIGNED_ALLOCATOR<T>\n  {\n  public:\n    typedef std::size_t     size_type;\n    typedef std::ptrdiff_t  difference_type;\n    typedef T*              pointer;\n    typedef const T*        const_pointer;\n    typedef T&              reference;\n    typedef const T&        const_reference;\n    typedef T               value_type;\n\n    template<class U>\n    struct rebind\n    {\n      typedef aligned_allocator_indirection<U> other;\n    };\n\n    aligned_allocator_indirection() {}\n    aligned_allocator_indirection(const aligned_allocator_indirection& ) : EIGEN_ALIGNED_ALLOCATOR<T>() {}\n    aligned_allocator_indirection(const EIGEN_ALIGNED_ALLOCATOR<T>& ) {}\n    template<class U>\n    aligned_allocator_indirection(const aligned_allocator_indirection<U>& ) {}\n    template<class U>\n    aligned_allocator_indirection(const EIGEN_ALIGNED_ALLOCATOR<U>& ) {}\n    ~aligned_allocator_indirection() {}\n  };\n\n#if EIGEN_COMP_MSVC\n\n  // sometimes, MSVC detects, at compile time, that the argument x\n  // in std::vector::resize(size_t s,T x) won't be aligned and generate an error\n  // even if this function is never called. Whence this little wrapper.\n#define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) \\\n  typename Eigen::internal::conditional< \\\n    Eigen::internal::is_arithmetic<T>::value, \\\n    T, \\\n    Eigen::internal::workaround_msvc_stl_support<T> \\\n  >::type\n\n  namespace internal {\n  template<typename T> struct workaround_msvc_stl_support : public T\n  {\n    inline workaround_msvc_stl_support() : T() {}\n    inline workaround_msvc_stl_support(const T& other) : T(other) {}\n    inline operator T& () { return *static_cast<T*>(this); }\n    inline operator const T& () const { return *static_cast<const T*>(this); }\n    template<typename OtherT>\n    inline T& operator=(const OtherT& other)\n    { T::operator=(other); return *this; }\n    inline workaround_msvc_stl_support& operator=(const workaround_msvc_stl_support& other)\n    { T::operator=(other); return *this; }\n  };\n  }\n\n#else\n\n#define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) T\n\n#endif\n\n}\n\n#endif // EIGEN_STL_DETAILS_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/SuperLUSupport/SuperLUSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_SUPERLUSUPPORT_H\n#define EIGEN_SUPERLUSUPPORT_H\n\nnamespace Eigen {\n\n#if defined(SUPERLU_MAJOR_VERSION) && (SUPERLU_MAJOR_VERSION >= 5)\n#define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE)\t\t\\\n    extern \"C\" {                                                                                          \\\n      extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,                  \\\n                                char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *,           \\\n                                void *, int, SuperMatrix *, SuperMatrix *,                                \\\n                                FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *,                       \\\n                                GlobalLU_t *, mem_usage_t *, SuperLUStat_t *, int *);                     \\\n    }                                                                                                     \\\n    inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A,                                \\\n         int *perm_c, int *perm_r, int *etree, char *equed,                                               \\\n         FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,                                                      \\\n         SuperMatrix *U, void *work, int lwork,                                                           \\\n         SuperMatrix *B, SuperMatrix *X,                                                                  \\\n         FLOATTYPE *recip_pivot_growth,                                                                   \\\n         FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr,                                              \\\n         SuperLUStat_t *stats, int *info, KEYTYPE) {                                                      \\\n    mem_usage_t mem_usage;                                                                                \\\n    GlobalLU_t gLU;                                                                                       \\\n    PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L,                                      \\\n         U, work, lwork, B, X, recip_pivot_growth, rcond,                                                 \\\n         ferr, berr, &gLU, &mem_usage, stats, info);                                                      \\\n    return mem_usage.for_lu; /* bytes used by the factor storage */                                       \\\n  }\n#else // version < 5.0\n#define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE)\t\t\\\n    extern \"C\" {                                                                                          \\\n      extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,                  \\\n                                char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *,           \\\n                                void *, int, SuperMatrix *, SuperMatrix *,                                \\\n                                FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *,                       \\\n                                mem_usage_t *, SuperLUStat_t *, int *);                                   \\\n    }                                                                                                     \\\n    inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A,                                \\\n         int *perm_c, int *perm_r, int *etree, char *equed,                                               \\\n         FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,                                                      \\\n         SuperMatrix *U, void *work, int lwork,                                                           \\\n         SuperMatrix *B, SuperMatrix *X,                                                                  \\\n         FLOATTYPE *recip_pivot_growth,                                                                   \\\n         FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr,                                              \\\n         SuperLUStat_t *stats, int *info, KEYTYPE) {                                                      \\\n    mem_usage_t mem_usage;                                                                                \\\n    PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L,                                      \\\n         U, work, lwork, B, X, recip_pivot_growth, rcond,                                                 \\\n         ferr, berr, &mem_usage, stats, info);                                                            \\\n    return mem_usage.for_lu; /* bytes used by the factor storage */                                       \\\n  }\n#endif\n\nDECL_GSSVX(s,float,float)\nDECL_GSSVX(c,float,std::complex<float>)\nDECL_GSSVX(d,double,double)\nDECL_GSSVX(z,double,std::complex<double>)\n\n#ifdef MILU_ALPHA\n#define EIGEN_SUPERLU_HAS_ILU\n#endif\n\n#ifdef EIGEN_SUPERLU_HAS_ILU\n\n// similarly for the incomplete factorization using gsisx\n#define DECL_GSISX(PREFIX,FLOATTYPE,KEYTYPE)                                                    \\\n    extern \"C\" {                                                                                \\\n      extern void PREFIX##gsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *,        \\\n                         char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *,        \\\n                         void *, int, SuperMatrix *, SuperMatrix *, FLOATTYPE *, FLOATTYPE *,   \\\n                         mem_usage_t *, SuperLUStat_t *, int *);                        \\\n    }                                                                                           \\\n    inline float SuperLU_gsisx(superlu_options_t *options, SuperMatrix *A,                      \\\n         int *perm_c, int *perm_r, int *etree, char *equed,                                     \\\n         FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,                                            \\\n         SuperMatrix *U, void *work, int lwork,                                                 \\\n         SuperMatrix *B, SuperMatrix *X,                                                        \\\n         FLOATTYPE *recip_pivot_growth,                                                         \\\n         FLOATTYPE *rcond,                                                                      \\\n         SuperLUStat_t *stats, int *info, KEYTYPE) {                                            \\\n    mem_usage_t mem_usage;                                                              \\\n    PREFIX##gsisx(options, A, perm_c, perm_r, etree, equed, R, C, L,                            \\\n         U, work, lwork, B, X, recip_pivot_growth, rcond,                                       \\\n         &mem_usage, stats, info);                                                              \\\n    return mem_usage.for_lu; /* bytes used by the factor storage */                             \\\n  }\n\nDECL_GSISX(s,float,float)\nDECL_GSISX(c,float,std::complex<float>)\nDECL_GSISX(d,double,double)\nDECL_GSISX(z,double,std::complex<double>)\n\n#endif\n\ntemplate<typename MatrixType>\nstruct SluMatrixMapHelper;\n\n/** \\internal\n  *\n  * A wrapper class for SuperLU matrices. It supports only compressed sparse matrices\n  * and dense matrices. Supernodal and other fancy format are not supported by this wrapper.\n  *\n  * This wrapper class mainly aims to avoids the need of dynamic allocation of the storage structure.\n  */\nstruct SluMatrix : SuperMatrix\n{\n  SluMatrix()\n  {\n    Store = &storage;\n  }\n\n  SluMatrix(const SluMatrix& other)\n    : SuperMatrix(other)\n  {\n    Store = &storage;\n    storage = other.storage;\n  }\n\n  SluMatrix& operator=(const SluMatrix& other)\n  {\n    SuperMatrix::operator=(static_cast<const SuperMatrix&>(other));\n    Store = &storage;\n    storage = other.storage;\n    return *this;\n  }\n\n  struct\n  {\n    union {int nnz;int lda;};\n    void *values;\n    int *innerInd;\n    int *outerInd;\n  } storage;\n\n  void setStorageType(Stype_t t)\n  {\n    Stype = t;\n    if (t==SLU_NC || t==SLU_NR || t==SLU_DN)\n      Store = &storage;\n    else\n    {\n      eigen_assert(false && \"storage type not supported\");\n      Store = 0;\n    }\n  }\n\n  template<typename Scalar>\n  void setScalarType()\n  {\n    if (internal::is_same<Scalar,float>::value)\n      Dtype = SLU_S;\n    else if (internal::is_same<Scalar,double>::value)\n      Dtype = SLU_D;\n    else if (internal::is_same<Scalar,std::complex<float> >::value)\n      Dtype = SLU_C;\n    else if (internal::is_same<Scalar,std::complex<double> >::value)\n      Dtype = SLU_Z;\n    else\n    {\n      eigen_assert(false && \"Scalar type not supported by SuperLU\");\n    }\n  }\n\n  template<typename MatrixType>\n  static SluMatrix Map(MatrixBase<MatrixType>& _mat)\n  {\n    MatrixType& mat(_mat.derived());\n    eigen_assert( ((MatrixType::Flags&RowMajorBit)!=RowMajorBit) && \"row-major dense matrices are not supported by SuperLU\");\n    SluMatrix res;\n    res.setStorageType(SLU_DN);\n    res.setScalarType<typename MatrixType::Scalar>();\n    res.Mtype     = SLU_GE;\n\n    res.nrow      = internal::convert_index<int>(mat.rows());\n    res.ncol      = internal::convert_index<int>(mat.cols());\n\n    res.storage.lda       = internal::convert_index<int>(MatrixType::IsVectorAtCompileTime ? mat.size() : mat.outerStride());\n    res.storage.values    = (void*)(mat.data());\n    return res;\n  }\n\n  template<typename MatrixType>\n  static SluMatrix Map(SparseMatrixBase<MatrixType>& a_mat)\n  {\n    MatrixType &mat(a_mat.derived());\n    SluMatrix res;\n    if ((MatrixType::Flags&RowMajorBit)==RowMajorBit)\n    {\n      res.setStorageType(SLU_NR);\n      res.nrow      = internal::convert_index<int>(mat.cols());\n      res.ncol      = internal::convert_index<int>(mat.rows());\n    }\n    else\n    {\n      res.setStorageType(SLU_NC);\n      res.nrow      = internal::convert_index<int>(mat.rows());\n      res.ncol      = internal::convert_index<int>(mat.cols());\n    }\n\n    res.Mtype       = SLU_GE;\n\n    res.storage.nnz       = internal::convert_index<int>(mat.nonZeros());\n    res.storage.values    = mat.valuePtr();\n    res.storage.innerInd  = mat.innerIndexPtr();\n    res.storage.outerInd  = mat.outerIndexPtr();\n\n    res.setScalarType<typename MatrixType::Scalar>();\n\n    // FIXME the following is not very accurate\n    if (int(MatrixType::Flags) & int(Upper))\n      res.Mtype = SLU_TRU;\n    if (int(MatrixType::Flags) & int(Lower))\n      res.Mtype = SLU_TRL;\n\n    eigen_assert(((int(MatrixType::Flags) & int(SelfAdjoint))==0) && \"SelfAdjoint matrix shape not supported by SuperLU\");\n\n    return res;\n  }\n};\n\ntemplate<typename Scalar, int Rows, int Cols, int Options, int MRows, int MCols>\nstruct SluMatrixMapHelper<Matrix<Scalar,Rows,Cols,Options,MRows,MCols> >\n{\n  typedef Matrix<Scalar,Rows,Cols,Options,MRows,MCols> MatrixType;\n  static void run(MatrixType& mat, SluMatrix& res)\n  {\n    eigen_assert( ((Options&RowMajor)!=RowMajor) && \"row-major dense matrices is not supported by SuperLU\");\n    res.setStorageType(SLU_DN);\n    res.setScalarType<Scalar>();\n    res.Mtype     = SLU_GE;\n\n    res.nrow      = mat.rows();\n    res.ncol      = mat.cols();\n\n    res.storage.lda       = mat.outerStride();\n    res.storage.values    = mat.data();\n  }\n};\n\ntemplate<typename Derived>\nstruct SluMatrixMapHelper<SparseMatrixBase<Derived> >\n{\n  typedef Derived MatrixType;\n  static void run(MatrixType& mat, SluMatrix& res)\n  {\n    if ((MatrixType::Flags&RowMajorBit)==RowMajorBit)\n    {\n      res.setStorageType(SLU_NR);\n      res.nrow      = mat.cols();\n      res.ncol      = mat.rows();\n    }\n    else\n    {\n      res.setStorageType(SLU_NC);\n      res.nrow      = mat.rows();\n      res.ncol      = mat.cols();\n    }\n\n    res.Mtype       = SLU_GE;\n\n    res.storage.nnz       = mat.nonZeros();\n    res.storage.values    = mat.valuePtr();\n    res.storage.innerInd  = mat.innerIndexPtr();\n    res.storage.outerInd  = mat.outerIndexPtr();\n\n    res.setScalarType<typename MatrixType::Scalar>();\n\n    // FIXME the following is not very accurate\n    if (MatrixType::Flags & Upper)\n      res.Mtype = SLU_TRU;\n    if (MatrixType::Flags & Lower)\n      res.Mtype = SLU_TRL;\n\n    eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && \"SelfAdjoint matrix shape not supported by SuperLU\");\n  }\n};\n\nnamespace internal {\n\ntemplate<typename MatrixType>\nSluMatrix asSluMatrix(MatrixType& mat)\n{\n  return SluMatrix::Map(mat);\n}\n\n/** View a Super LU matrix as an Eigen expression */\ntemplate<typename Scalar, int Flags, typename Index>\nMappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)\n{\n  eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)\n         || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));\n\n  Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;\n\n  return MappedSparseMatrix<Scalar,Flags,Index>(\n    sluMat.nrow, sluMat.ncol, sluMat.storage.outerInd[outerSize],\n    sluMat.storage.outerInd, sluMat.storage.innerInd, reinterpret_cast<Scalar*>(sluMat.storage.values) );\n}\n\n} // end namespace internal\n\n/** \\ingroup SuperLUSupport_Module\n  * \\class SuperLUBase\n  * \\brief The base class for the direct and incomplete LU factorization of SuperLU\n  */\ntemplate<typename _MatrixType, typename Derived>\nclass SuperLUBase : public SparseSolverBase<Derived>\n{\n  protected:\n    typedef SparseSolverBase<Derived> Base;\n    using Base::derived;\n    using Base::m_isInitialized;\n  public:\n    typedef _MatrixType MatrixType;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef Matrix<Scalar,Dynamic,1> Vector;\n    typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;\n    typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;    \n    typedef Map<PermutationMatrix<Dynamic,Dynamic,int> > PermutationMap;\n    typedef SparseMatrix<Scalar> LUMatrixType;\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n  public:\n\n    SuperLUBase() {}\n\n    ~SuperLUBase()\n    {\n      clearFactors();\n    }\n    \n    inline Index rows() const { return m_matrix.rows(); }\n    inline Index cols() const { return m_matrix.cols(); }\n    \n    /** \\returns a reference to the Super LU option object to configure the  Super LU algorithms. */\n    inline superlu_options_t& options() { return m_sluOptions; }\n    \n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n\n    /** Computes the sparse Cholesky decomposition of \\a matrix */\n    void compute(const MatrixType& matrix)\n    {\n      derived().analyzePattern(matrix);\n      derived().factorize(matrix);\n    }\n\n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      * \n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& /*matrix*/)\n    {\n      m_isInitialized = true;\n      m_info = Success;\n      m_analysisIsOk = true;\n      m_factorizationIsOk = false;\n    }\n    \n    template<typename Stream>\n    void dumpMemory(Stream& /*s*/)\n    {}\n    \n  protected:\n    \n    void initFactorization(const MatrixType& a)\n    {\n      set_default_options(&this->m_sluOptions);\n      \n      const Index size = a.rows();\n      m_matrix = a;\n\n      m_sluA = internal::asSluMatrix(m_matrix);\n      clearFactors();\n\n      m_p.resize(size);\n      m_q.resize(size);\n      m_sluRscale.resize(size);\n      m_sluCscale.resize(size);\n      m_sluEtree.resize(size);\n\n      // set empty B and X\n      m_sluB.setStorageType(SLU_DN);\n      m_sluB.setScalarType<Scalar>();\n      m_sluB.Mtype          = SLU_GE;\n      m_sluB.storage.values = 0;\n      m_sluB.nrow           = 0;\n      m_sluB.ncol           = 0;\n      m_sluB.storage.lda    = internal::convert_index<int>(size);\n      m_sluX                = m_sluB;\n      \n      m_extractedDataAreDirty = true;\n    }\n    \n    void init()\n    {\n      m_info = InvalidInput;\n      m_isInitialized = false;\n      m_sluL.Store = 0;\n      m_sluU.Store = 0;\n    }\n    \n    void extractData() const;\n\n    void clearFactors()\n    {\n      if(m_sluL.Store)\n        Destroy_SuperNode_Matrix(&m_sluL);\n      if(m_sluU.Store)\n        Destroy_CompCol_Matrix(&m_sluU);\n\n      m_sluL.Store = 0;\n      m_sluU.Store = 0;\n\n      memset(&m_sluL,0,sizeof m_sluL);\n      memset(&m_sluU,0,sizeof m_sluU);\n    }\n\n    // cached data to reduce reallocation, etc.\n    mutable LUMatrixType m_l;\n    mutable LUMatrixType m_u;\n    mutable IntColVectorType m_p;\n    mutable IntRowVectorType m_q;\n\n    mutable LUMatrixType m_matrix;  // copy of the factorized matrix\n    mutable SluMatrix m_sluA;\n    mutable SuperMatrix m_sluL, m_sluU;\n    mutable SluMatrix m_sluB, m_sluX;\n    mutable SuperLUStat_t m_sluStat;\n    mutable superlu_options_t m_sluOptions;\n    mutable std::vector<int> m_sluEtree;\n    mutable Matrix<RealScalar,Dynamic,1> m_sluRscale, m_sluCscale;\n    mutable Matrix<RealScalar,Dynamic,1> m_sluFerr, m_sluBerr;\n    mutable char m_sluEqued;\n\n    mutable ComputationInfo m_info;\n    int m_factorizationIsOk;\n    int m_analysisIsOk;\n    mutable bool m_extractedDataAreDirty;\n    \n  private:\n    SuperLUBase(SuperLUBase& ) { }\n};\n\n\n/** \\ingroup SuperLUSupport_Module\n  * \\class SuperLU\n  * \\brief A sparse direct LU factorization and solver based on the SuperLU library\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a direct LU factorization\n  * using the SuperLU library. The sparse matrix A must be squared and invertible. The vectors or matrices\n  * X and B can be either dense or sparse.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  *\n  * \\warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SparseLU\n  */\ntemplate<typename _MatrixType>\nclass SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >\n{\n  public:\n    typedef SuperLUBase<_MatrixType,SuperLU> Base;\n    typedef _MatrixType MatrixType;\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::RealScalar RealScalar;\n    typedef typename Base::StorageIndex StorageIndex;\n    typedef typename Base::IntRowVectorType IntRowVectorType;\n    typedef typename Base::IntColVectorType IntColVectorType;   \n    typedef typename Base::PermutationMap PermutationMap;\n    typedef typename Base::LUMatrixType LUMatrixType;\n    typedef TriangularView<LUMatrixType, Lower|UnitDiag>  LMatrixType;\n    typedef TriangularView<LUMatrixType,  Upper>          UMatrixType;\n\n  public:\n    using Base::_solve_impl;\n\n    SuperLU() : Base() { init(); }\n\n    explicit SuperLU(const MatrixType& matrix) : Base()\n    {\n      init();\n      Base::compute(matrix);\n    }\n\n    ~SuperLU()\n    {\n    }\n    \n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      * \n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& matrix)\n    {\n      m_info = InvalidInput;\n      m_isInitialized = false;\n      Base::analyzePattern(matrix);\n    }\n    \n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    void factorize(const MatrixType& matrix);\n    \n    /** \\internal */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;\n    \n    inline const LMatrixType& matrixL() const\n    {\n      if (m_extractedDataAreDirty) this->extractData();\n      return m_l;\n    }\n\n    inline const UMatrixType& matrixU() const\n    {\n      if (m_extractedDataAreDirty) this->extractData();\n      return m_u;\n    }\n\n    inline const IntColVectorType& permutationP() const\n    {\n      if (m_extractedDataAreDirty) this->extractData();\n      return m_p;\n    }\n\n    inline const IntRowVectorType& permutationQ() const\n    {\n      if (m_extractedDataAreDirty) this->extractData();\n      return m_q;\n    }\n    \n    Scalar determinant() const;\n    \n  protected:\n    \n    using Base::m_matrix;\n    using Base::m_sluOptions;\n    using Base::m_sluA;\n    using Base::m_sluB;\n    using Base::m_sluX;\n    using Base::m_p;\n    using Base::m_q;\n    using Base::m_sluEtree;\n    using Base::m_sluEqued;\n    using Base::m_sluRscale;\n    using Base::m_sluCscale;\n    using Base::m_sluL;\n    using Base::m_sluU;\n    using Base::m_sluStat;\n    using Base::m_sluFerr;\n    using Base::m_sluBerr;\n    using Base::m_l;\n    using Base::m_u;\n    \n    using Base::m_analysisIsOk;\n    using Base::m_factorizationIsOk;\n    using Base::m_extractedDataAreDirty;\n    using Base::m_isInitialized;\n    using Base::m_info;\n    \n    void init()\n    {\n      Base::init();\n      \n      set_default_options(&this->m_sluOptions);\n      m_sluOptions.PrintStat        = NO;\n      m_sluOptions.ConditionNumber  = NO;\n      m_sluOptions.Trans            = NOTRANS;\n      m_sluOptions.ColPerm          = COLAMD;\n    }\n    \n    \n  private:\n    SuperLU(SuperLU& ) { }\n};\n\ntemplate<typename MatrixType>\nvoid SuperLU<MatrixType>::factorize(const MatrixType& a)\n{\n  eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n  if(!m_analysisIsOk)\n  {\n    m_info = InvalidInput;\n    return;\n  }\n  \n  this->initFactorization(a);\n  \n  m_sluOptions.ColPerm = COLAMD;\n  int info = 0;\n  RealScalar recip_pivot_growth, rcond;\n  RealScalar ferr, berr;\n\n  StatInit(&m_sluStat);\n  SuperLU_gssvx(&m_sluOptions, &m_sluA, m_q.data(), m_p.data(), &m_sluEtree[0],\n                &m_sluEqued, &m_sluRscale[0], &m_sluCscale[0],\n                &m_sluL, &m_sluU,\n                NULL, 0,\n                &m_sluB, &m_sluX,\n                &recip_pivot_growth, &rcond,\n                &ferr, &berr,\n                &m_sluStat, &info, Scalar());\n  StatFree(&m_sluStat);\n\n  m_extractedDataAreDirty = true;\n\n  // FIXME how to better check for errors ???\n  m_info = info == 0 ? Success : NumericalIssue;\n  m_factorizationIsOk = true;\n}\n\ntemplate<typename MatrixType>\ntemplate<typename Rhs,typename Dest>\nvoid SuperLU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const\n{\n  eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()\");\n\n  const Index rhsCols = b.cols();\n  eigen_assert(m_matrix.rows()==b.rows());\n\n  m_sluOptions.Trans = NOTRANS;\n  m_sluOptions.Fact = FACTORED;\n  m_sluOptions.IterRefine = NOREFINE;\n  \n\n  m_sluFerr.resize(rhsCols);\n  m_sluBerr.resize(rhsCols);\n  \n  Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b);\n  Ref<const Matrix<typename Dest::Scalar,Dynamic,Dynamic,ColMajor> > x_ref(x);\n  \n  m_sluB = SluMatrix::Map(b_ref.const_cast_derived());\n  m_sluX = SluMatrix::Map(x_ref.const_cast_derived());\n  \n  typename Rhs::PlainObject b_cpy;\n  if(m_sluEqued!='N')\n  {\n    b_cpy = b;\n    m_sluB = SluMatrix::Map(b_cpy.const_cast_derived());  \n  }\n\n  StatInit(&m_sluStat);\n  int info = 0;\n  RealScalar recip_pivot_growth, rcond;\n  SuperLU_gssvx(&m_sluOptions, &m_sluA,\n                m_q.data(), m_p.data(),\n                &m_sluEtree[0], &m_sluEqued,\n                &m_sluRscale[0], &m_sluCscale[0],\n                &m_sluL, &m_sluU,\n                NULL, 0,\n                &m_sluB, &m_sluX,\n                &recip_pivot_growth, &rcond,\n                &m_sluFerr[0], &m_sluBerr[0],\n                &m_sluStat, &info, Scalar());\n  StatFree(&m_sluStat);\n  \n  if(x.derived().data() != x_ref.data())\n    x = x_ref;\n  \n  m_info = info==0 ? Success : NumericalIssue;\n}\n\n// the code of this extractData() function has been adapted from the SuperLU's Matlab support code,\n//\n//  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.\n//\n//  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY\n//  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.\n//\ntemplate<typename MatrixType, typename Derived>\nvoid SuperLUBase<MatrixType,Derived>::extractData() const\n{\n  eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for extracting factors, you must first call either compute() or analyzePattern()/factorize()\");\n  if (m_extractedDataAreDirty)\n  {\n    int         upper;\n    int         fsupc, istart, nsupr;\n    int         lastl = 0, lastu = 0;\n    SCformat    *Lstore = static_cast<SCformat*>(m_sluL.Store);\n    NCformat    *Ustore = static_cast<NCformat*>(m_sluU.Store);\n    Scalar      *SNptr;\n\n    const Index size = m_matrix.rows();\n    m_l.resize(size,size);\n    m_l.resizeNonZeros(Lstore->nnz);\n    m_u.resize(size,size);\n    m_u.resizeNonZeros(Ustore->nnz);\n\n    int* Lcol = m_l.outerIndexPtr();\n    int* Lrow = m_l.innerIndexPtr();\n    Scalar* Lval = m_l.valuePtr();\n\n    int* Ucol = m_u.outerIndexPtr();\n    int* Urow = m_u.innerIndexPtr();\n    Scalar* Uval = m_u.valuePtr();\n\n    Ucol[0] = 0;\n    Ucol[0] = 0;\n\n    /* for each supernode */\n    for (int k = 0; k <= Lstore->nsuper; ++k)\n    {\n      fsupc   = L_FST_SUPC(k);\n      istart  = L_SUB_START(fsupc);\n      nsupr   = L_SUB_START(fsupc+1) - istart;\n      upper   = 1;\n\n      /* for each column in the supernode */\n      for (int j = fsupc; j < L_FST_SUPC(k+1); ++j)\n      {\n        SNptr = &((Scalar*)Lstore->nzval)[L_NZ_START(j)];\n\n        /* Extract U */\n        for (int i = U_NZ_START(j); i < U_NZ_START(j+1); ++i)\n        {\n          Uval[lastu] = ((Scalar*)Ustore->nzval)[i];\n          /* Matlab doesn't like explicit zero. */\n          if (Uval[lastu] != 0.0)\n            Urow[lastu++] = U_SUB(i);\n        }\n        for (int i = 0; i < upper; ++i)\n        {\n          /* upper triangle in the supernode */\n          Uval[lastu] = SNptr[i];\n          /* Matlab doesn't like explicit zero. */\n          if (Uval[lastu] != 0.0)\n            Urow[lastu++] = L_SUB(istart+i);\n        }\n        Ucol[j+1] = lastu;\n\n        /* Extract L */\n        Lval[lastl] = 1.0; /* unit diagonal */\n        Lrow[lastl++] = L_SUB(istart + upper - 1);\n        for (int i = upper; i < nsupr; ++i)\n        {\n          Lval[lastl] = SNptr[i];\n          /* Matlab doesn't like explicit zero. */\n          if (Lval[lastl] != 0.0)\n            Lrow[lastl++] = L_SUB(istart+i);\n        }\n        Lcol[j+1] = lastl;\n\n        ++upper;\n      } /* for j ... */\n\n    } /* for k ... */\n\n    // squeeze the matrices :\n    m_l.resizeNonZeros(lastl);\n    m_u.resizeNonZeros(lastu);\n\n    m_extractedDataAreDirty = false;\n  }\n}\n\ntemplate<typename MatrixType>\ntypename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const\n{\n  eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for computing the determinant, you must first call either compute() or analyzePattern()/factorize()\");\n  \n  if (m_extractedDataAreDirty)\n    this->extractData();\n\n  Scalar det = Scalar(1);\n  for (int j=0; j<m_u.cols(); ++j)\n  {\n    if (m_u.outerIndexPtr()[j+1]-m_u.outerIndexPtr()[j] > 0)\n    {\n      int lastId = m_u.outerIndexPtr()[j+1]-1;\n      eigen_assert(m_u.innerIndexPtr()[lastId]<=j);\n      if (m_u.innerIndexPtr()[lastId]==j)\n        det *= m_u.valuePtr()[lastId];\n    }\n  }\n  if(PermutationMap(m_p.data(),m_p.size()).determinant()*PermutationMap(m_q.data(),m_q.size()).determinant()<0)\n    det = -det;\n  if(m_sluEqued!='N')\n    return det/m_sluRscale.prod()/m_sluCscale.prod();\n  else\n    return det;\n}\n\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n#define EIGEN_SUPERLU_HAS_ILU\n#endif\n\n#ifdef EIGEN_SUPERLU_HAS_ILU\n\n/** \\ingroup SuperLUSupport_Module\n  * \\class SuperILU\n  * \\brief A sparse direct \\b incomplete LU factorization and solver based on the SuperLU library\n  *\n  * This class allows to solve for an approximate solution of A.X = B sparse linear problems via an incomplete LU factorization\n  * using the SuperLU library. This class is aimed to be used as a preconditioner of the iterative linear solvers.\n  *\n  * \\warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.\n  *\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class IncompleteLUT, class ConjugateGradient, class BiCGSTAB\n  */\n\ntemplate<typename _MatrixType>\nclass SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> >\n{\n  public:\n    typedef SuperLUBase<_MatrixType,SuperILU> Base;\n    typedef _MatrixType MatrixType;\n    typedef typename Base::Scalar Scalar;\n    typedef typename Base::RealScalar RealScalar;\n\n  public:\n    using Base::_solve_impl;\n\n    SuperILU() : Base() { init(); }\n\n    SuperILU(const MatrixType& matrix) : Base()\n    {\n      init();\n      Base::compute(matrix);\n    }\n\n    ~SuperILU()\n    {\n    }\n    \n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      * \n      * \\sa factorize()\n      */\n    void analyzePattern(const MatrixType& matrix)\n    {\n      Base::analyzePattern(matrix);\n    }\n    \n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.\n      *\n      * \\sa analyzePattern()\n      */\n    void factorize(const MatrixType& matrix);\n    \n    #ifndef EIGEN_PARSED_BY_DOXYGEN\n    /** \\internal */\n    template<typename Rhs,typename Dest>\n    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;\n    #endif // EIGEN_PARSED_BY_DOXYGEN\n    \n  protected:\n    \n    using Base::m_matrix;\n    using Base::m_sluOptions;\n    using Base::m_sluA;\n    using Base::m_sluB;\n    using Base::m_sluX;\n    using Base::m_p;\n    using Base::m_q;\n    using Base::m_sluEtree;\n    using Base::m_sluEqued;\n    using Base::m_sluRscale;\n    using Base::m_sluCscale;\n    using Base::m_sluL;\n    using Base::m_sluU;\n    using Base::m_sluStat;\n    using Base::m_sluFerr;\n    using Base::m_sluBerr;\n    using Base::m_l;\n    using Base::m_u;\n    \n    using Base::m_analysisIsOk;\n    using Base::m_factorizationIsOk;\n    using Base::m_extractedDataAreDirty;\n    using Base::m_isInitialized;\n    using Base::m_info;\n\n    void init()\n    {\n      Base::init();\n      \n      ilu_set_default_options(&m_sluOptions);\n      m_sluOptions.PrintStat        = NO;\n      m_sluOptions.ConditionNumber  = NO;\n      m_sluOptions.Trans            = NOTRANS;\n      m_sluOptions.ColPerm          = MMD_AT_PLUS_A;\n      \n      // no attempt to preserve column sum\n      m_sluOptions.ILU_MILU = SILU;\n      // only basic ILU(k) support -- no direct control over memory consumption\n      // better to use ILU_DropRule = DROP_BASIC | DROP_AREA\n      // and set ILU_FillFactor to max memory growth\n      m_sluOptions.ILU_DropRule = DROP_BASIC;\n      m_sluOptions.ILU_DropTol = NumTraits<Scalar>::dummy_precision()*10;\n    }\n    \n  private:\n    SuperILU(SuperILU& ) { }\n};\n\ntemplate<typename MatrixType>\nvoid SuperILU<MatrixType>::factorize(const MatrixType& a)\n{\n  eigen_assert(m_analysisIsOk && \"You must first call analyzePattern()\");\n  if(!m_analysisIsOk)\n  {\n    m_info = InvalidInput;\n    return;\n  }\n  \n  this->initFactorization(a);\n\n  int info = 0;\n  RealScalar recip_pivot_growth, rcond;\n\n  StatInit(&m_sluStat);\n  SuperLU_gsisx(&m_sluOptions, &m_sluA, m_q.data(), m_p.data(), &m_sluEtree[0],\n                &m_sluEqued, &m_sluRscale[0], &m_sluCscale[0],\n                &m_sluL, &m_sluU,\n                NULL, 0,\n                &m_sluB, &m_sluX,\n                &recip_pivot_growth, &rcond,\n                &m_sluStat, &info, Scalar());\n  StatFree(&m_sluStat);\n\n  // FIXME how to better check for errors ???\n  m_info = info == 0 ? Success : NumericalIssue;\n  m_factorizationIsOk = true;\n}\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntemplate<typename MatrixType>\ntemplate<typename Rhs,typename Dest>\nvoid SuperILU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const\n{\n  eigen_assert(m_factorizationIsOk && \"The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()\");\n\n  const int rhsCols = b.cols();\n  eigen_assert(m_matrix.rows()==b.rows());\n\n  m_sluOptions.Trans = NOTRANS;\n  m_sluOptions.Fact = FACTORED;\n  m_sluOptions.IterRefine = NOREFINE;\n\n  m_sluFerr.resize(rhsCols);\n  m_sluBerr.resize(rhsCols);\n  \n  Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b);\n  Ref<const Matrix<typename Dest::Scalar,Dynamic,Dynamic,ColMajor> > x_ref(x);\n  \n  m_sluB = SluMatrix::Map(b_ref.const_cast_derived());\n  m_sluX = SluMatrix::Map(x_ref.const_cast_derived());\n\n  typename Rhs::PlainObject b_cpy;\n  if(m_sluEqued!='N')\n  {\n    b_cpy = b;\n    m_sluB = SluMatrix::Map(b_cpy.const_cast_derived());  \n  }\n  \n  int info = 0;\n  RealScalar recip_pivot_growth, rcond;\n\n  StatInit(&m_sluStat);\n  SuperLU_gsisx(&m_sluOptions, &m_sluA,\n                m_q.data(), m_p.data(),\n                &m_sluEtree[0], &m_sluEqued,\n                &m_sluRscale[0], &m_sluCscale[0],\n                &m_sluL, &m_sluU,\n                NULL, 0,\n                &m_sluB, &m_sluX,\n                &recip_pivot_growth, &rcond,\n                &m_sluStat, &info, Scalar());\n  StatFree(&m_sluStat);\n  \n  if(x.derived().data() != x_ref.data())\n    x = x_ref;\n\n  m_info = info==0 ? Success : NumericalIssue;\n}\n#endif\n\n#endif\n\n} // end namespace Eigen\n\n#endif // EIGEN_SUPERLUSUPPORT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/UmfPackSupport/UmfPackSupport.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_UMFPACKSUPPORT_H\n#define EIGEN_UMFPACKSUPPORT_H\n\n// for compatibility with super old version of umfpack,\n// not sure this is really needed, but this is harmless.\n#ifndef SuiteSparse_long\n#ifdef UF_long\n#define SuiteSparse_long UF_long\n#else\n#error neither SuiteSparse_long nor UF_long are defined\n#endif\n#endif\n\nnamespace Eigen {\n\n/* TODO extract L, extract U, compute det, etc... */\n\n// generic double/complex<double> wrapper functions:\n\n\n // Defaults\ninline void umfpack_defaults(double control[UMFPACK_CONTROL], double, int)\n{ umfpack_di_defaults(control); }\n\ninline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>, int)\n{ umfpack_zi_defaults(control); }\n\ninline void umfpack_defaults(double control[UMFPACK_CONTROL], double, SuiteSparse_long)\n{ umfpack_dl_defaults(control); }\n\ninline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>, SuiteSparse_long)\n{ umfpack_zl_defaults(control); }\n\n// Report info\ninline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], double, int)\n{ umfpack_di_report_info(control, info);}\n\ninline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], std::complex<double>, int)\n{ umfpack_zi_report_info(control, info);}\n\ninline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], double, SuiteSparse_long)\n{ umfpack_dl_report_info(control, info);}\n\ninline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], std::complex<double>, SuiteSparse_long)\n{ umfpack_zl_report_info(control, info);}\n\n// Report status\ninline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, double, int)\n{ umfpack_di_report_status(control, status);}\n\ninline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, std::complex<double>, int)\n{ umfpack_zi_report_status(control, status);}\n\ninline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, double, SuiteSparse_long)\n{ umfpack_dl_report_status(control, status);}\n\ninline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, std::complex<double>, SuiteSparse_long)\n{ umfpack_zl_report_status(control, status);}\n\n// report control\ninline void umfpack_report_control(double control[UMFPACK_CONTROL], double, int)\n{ umfpack_di_report_control(control);}\n\ninline void umfpack_report_control(double control[UMFPACK_CONTROL], std::complex<double>, int)\n{ umfpack_zi_report_control(control);}\n\ninline void umfpack_report_control(double control[UMFPACK_CONTROL], double, SuiteSparse_long)\n{ umfpack_dl_report_control(control);}\n\ninline void umfpack_report_control(double control[UMFPACK_CONTROL], std::complex<double>, SuiteSparse_long)\n{ umfpack_zl_report_control(control);}\n\n// Free numeric\ninline void umfpack_free_numeric(void **Numeric, double, int)\n{ umfpack_di_free_numeric(Numeric); *Numeric = 0; }\n\ninline void umfpack_free_numeric(void **Numeric, std::complex<double>, int)\n{ umfpack_zi_free_numeric(Numeric); *Numeric = 0; }\n\ninline void umfpack_free_numeric(void **Numeric, double, SuiteSparse_long)\n{ umfpack_dl_free_numeric(Numeric); *Numeric = 0; }\n\ninline void umfpack_free_numeric(void **Numeric, std::complex<double>, SuiteSparse_long)\n{ umfpack_zl_free_numeric(Numeric); *Numeric = 0; }\n\n// Free symbolic\ninline void umfpack_free_symbolic(void **Symbolic, double, int)\n{ umfpack_di_free_symbolic(Symbolic); *Symbolic = 0; }\n\ninline void umfpack_free_symbolic(void **Symbolic, std::complex<double>, int)\n{ umfpack_zi_free_symbolic(Symbolic); *Symbolic = 0; }\n\ninline void umfpack_free_symbolic(void **Symbolic, double, SuiteSparse_long)\n{ umfpack_dl_free_symbolic(Symbolic); *Symbolic = 0; }\n\ninline void umfpack_free_symbolic(void **Symbolic, std::complex<double>, SuiteSparse_long)\n{ umfpack_zl_free_symbolic(Symbolic); *Symbolic = 0; }\n\n// Symbolic\ninline int umfpack_symbolic(int n_row,int n_col,\n                            const int Ap[], const int Ai[], const double Ax[], void **Symbolic,\n                            const double Control [UMFPACK_CONTROL], double Info [UMFPACK_INFO])\n{\n  return umfpack_di_symbolic(n_row,n_col,Ap,Ai,Ax,Symbolic,Control,Info);\n}\n\ninline int umfpack_symbolic(int n_row,int n_col,\n                            const int Ap[], const int Ai[], const std::complex<double> Ax[], void **Symbolic,\n                            const double Control [UMFPACK_CONTROL], double Info [UMFPACK_INFO])\n{\n  return umfpack_zi_symbolic(n_row,n_col,Ap,Ai,&numext::real_ref(Ax[0]),0,Symbolic,Control,Info);\n}\ninline SuiteSparse_long umfpack_symbolic( SuiteSparse_long n_row,SuiteSparse_long n_col,\n                                          const SuiteSparse_long Ap[], const SuiteSparse_long Ai[], const double Ax[], void **Symbolic,\n                                          const double Control [UMFPACK_CONTROL], double Info [UMFPACK_INFO])\n{\n  return umfpack_dl_symbolic(n_row,n_col,Ap,Ai,Ax,Symbolic,Control,Info);\n}\n\ninline SuiteSparse_long umfpack_symbolic( SuiteSparse_long n_row,SuiteSparse_long n_col,\n                                          const SuiteSparse_long Ap[], const SuiteSparse_long Ai[], const std::complex<double> Ax[], void **Symbolic,\n                                          const double Control [UMFPACK_CONTROL], double Info [UMFPACK_INFO])\n{\n  return umfpack_zl_symbolic(n_row,n_col,Ap,Ai,&numext::real_ref(Ax[0]),0,Symbolic,Control,Info);\n}\n\n// Numeric\ninline int umfpack_numeric( const int Ap[], const int Ai[], const double Ax[],\n                            void *Symbolic, void **Numeric,\n                            const double Control[UMFPACK_CONTROL],double Info [UMFPACK_INFO])\n{\n  return umfpack_di_numeric(Ap,Ai,Ax,Symbolic,Numeric,Control,Info);\n}\n\ninline int umfpack_numeric( const int Ap[], const int Ai[], const std::complex<double> Ax[],\n                            void *Symbolic, void **Numeric,\n                            const double Control[UMFPACK_CONTROL],double Info [UMFPACK_INFO])\n{\n  return umfpack_zi_numeric(Ap,Ai,&numext::real_ref(Ax[0]),0,Symbolic,Numeric,Control,Info);\n}\ninline SuiteSparse_long umfpack_numeric(const SuiteSparse_long Ap[], const SuiteSparse_long Ai[], const double Ax[],\n                                        void *Symbolic, void **Numeric,\n                                        const double Control[UMFPACK_CONTROL],double Info [UMFPACK_INFO])\n{\n  return umfpack_dl_numeric(Ap,Ai,Ax,Symbolic,Numeric,Control,Info);\n}\n\ninline SuiteSparse_long umfpack_numeric(const SuiteSparse_long Ap[], const SuiteSparse_long Ai[], const std::complex<double> Ax[],\n                                        void *Symbolic, void **Numeric,\n                                        const double Control[UMFPACK_CONTROL],double Info [UMFPACK_INFO])\n{\n  return umfpack_zl_numeric(Ap,Ai,&numext::real_ref(Ax[0]),0,Symbolic,Numeric,Control,Info);\n}\n\n// solve\ninline int umfpack_solve( int sys, const int Ap[], const int Ai[], const double Ax[],\n                          double X[], const double B[], void *Numeric,\n                          const double Control[UMFPACK_CONTROL], double Info[UMFPACK_INFO])\n{\n  return umfpack_di_solve(sys,Ap,Ai,Ax,X,B,Numeric,Control,Info);\n}\n\ninline int umfpack_solve( int sys, const int Ap[], const int Ai[], const std::complex<double> Ax[],\n                          std::complex<double> X[], const std::complex<double> B[], void *Numeric,\n                          const double Control[UMFPACK_CONTROL], double Info[UMFPACK_INFO])\n{\n  return umfpack_zi_solve(sys,Ap,Ai,&numext::real_ref(Ax[0]),0,&numext::real_ref(X[0]),0,&numext::real_ref(B[0]),0,Numeric,Control,Info);\n}\n\ninline SuiteSparse_long umfpack_solve(int sys, const SuiteSparse_long Ap[], const SuiteSparse_long Ai[], const double Ax[],\n                                      double X[], const double B[], void *Numeric,\n                                      const double Control[UMFPACK_CONTROL], double Info[UMFPACK_INFO])\n{\n  return umfpack_dl_solve(sys,Ap,Ai,Ax,X,B,Numeric,Control,Info);\n}\n\ninline SuiteSparse_long umfpack_solve(int sys, const SuiteSparse_long Ap[], const SuiteSparse_long Ai[], const std::complex<double> Ax[],\n                                      std::complex<double> X[], const std::complex<double> B[], void *Numeric,\n                                      const double Control[UMFPACK_CONTROL], double Info[UMFPACK_INFO])\n{\n  return umfpack_zl_solve(sys,Ap,Ai,&numext::real_ref(Ax[0]),0,&numext::real_ref(X[0]),0,&numext::real_ref(B[0]),0,Numeric,Control,Info);\n}\n\n// Get Lunz\ninline int umfpack_get_lunz(int *lnz, int *unz, int *n_row, int *n_col, int *nz_udiag, void *Numeric, double)\n{\n  return umfpack_di_get_lunz(lnz,unz,n_row,n_col,nz_udiag,Numeric);\n}\n\ninline int umfpack_get_lunz(int *lnz, int *unz, int *n_row, int *n_col, int *nz_udiag, void *Numeric, std::complex<double>)\n{\n  return umfpack_zi_get_lunz(lnz,unz,n_row,n_col,nz_udiag,Numeric);\n}\n\ninline SuiteSparse_long umfpack_get_lunz( SuiteSparse_long *lnz, SuiteSparse_long *unz, SuiteSparse_long *n_row, SuiteSparse_long *n_col,\n                                          SuiteSparse_long *nz_udiag, void *Numeric, double)\n{\n  return umfpack_dl_get_lunz(lnz,unz,n_row,n_col,nz_udiag,Numeric);\n}\n\ninline SuiteSparse_long umfpack_get_lunz( SuiteSparse_long *lnz, SuiteSparse_long *unz, SuiteSparse_long *n_row, SuiteSparse_long *n_col,\n                                          SuiteSparse_long *nz_udiag, void *Numeric, std::complex<double>)\n{\n  return umfpack_zl_get_lunz(lnz,unz,n_row,n_col,nz_udiag,Numeric);\n}\n\n// Get Numeric\ninline int umfpack_get_numeric(int Lp[], int Lj[], double Lx[], int Up[], int Ui[], double Ux[],\n                               int P[], int Q[], double Dx[], int *do_recip, double Rs[], void *Numeric)\n{\n  return umfpack_di_get_numeric(Lp,Lj,Lx,Up,Ui,Ux,P,Q,Dx,do_recip,Rs,Numeric);\n}\n\ninline int umfpack_get_numeric(int Lp[], int Lj[], std::complex<double> Lx[], int Up[], int Ui[], std::complex<double> Ux[],\n                               int P[], int Q[], std::complex<double> Dx[], int *do_recip, double Rs[], void *Numeric)\n{\n  double& lx0_real = numext::real_ref(Lx[0]);\n  double& ux0_real = numext::real_ref(Ux[0]);\n  double& dx0_real = numext::real_ref(Dx[0]);\n  return umfpack_zi_get_numeric(Lp,Lj,Lx?&lx0_real:0,0,Up,Ui,Ux?&ux0_real:0,0,P,Q,\n                                Dx?&dx0_real:0,0,do_recip,Rs,Numeric);\n}\ninline SuiteSparse_long umfpack_get_numeric(SuiteSparse_long Lp[], SuiteSparse_long Lj[], double Lx[], SuiteSparse_long Up[], SuiteSparse_long Ui[], double Ux[],\n                                            SuiteSparse_long P[], SuiteSparse_long Q[], double Dx[], SuiteSparse_long *do_recip, double Rs[], void *Numeric)\n{\n  return umfpack_dl_get_numeric(Lp,Lj,Lx,Up,Ui,Ux,P,Q,Dx,do_recip,Rs,Numeric);\n}\n\ninline SuiteSparse_long umfpack_get_numeric(SuiteSparse_long Lp[], SuiteSparse_long Lj[], std::complex<double> Lx[], SuiteSparse_long Up[], SuiteSparse_long Ui[], std::complex<double> Ux[],\n                                            SuiteSparse_long P[], SuiteSparse_long Q[], std::complex<double> Dx[], SuiteSparse_long *do_recip, double Rs[], void *Numeric)\n{\n  double& lx0_real = numext::real_ref(Lx[0]);\n  double& ux0_real = numext::real_ref(Ux[0]);\n  double& dx0_real = numext::real_ref(Dx[0]);\n  return umfpack_zl_get_numeric(Lp,Lj,Lx?&lx0_real:0,0,Up,Ui,Ux?&ux0_real:0,0,P,Q,\n                                Dx?&dx0_real:0,0,do_recip,Rs,Numeric);\n}\n\n// Get Determinant\ninline int umfpack_get_determinant(double *Mx, double *Ex, void *NumericHandle, double User_Info [UMFPACK_INFO], int)\n{\n  return umfpack_di_get_determinant(Mx,Ex,NumericHandle,User_Info);\n}\n\ninline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *NumericHandle, double User_Info [UMFPACK_INFO], int)\n{\n  double& mx_real = numext::real_ref(*Mx);\n  return umfpack_zi_get_determinant(&mx_real,0,Ex,NumericHandle,User_Info);\n}\n\ninline SuiteSparse_long umfpack_get_determinant(double *Mx, double *Ex, void *NumericHandle, double User_Info [UMFPACK_INFO], SuiteSparse_long)\n{\n  return umfpack_dl_get_determinant(Mx,Ex,NumericHandle,User_Info);\n}\n\ninline SuiteSparse_long umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *NumericHandle, double User_Info [UMFPACK_INFO], SuiteSparse_long)\n{\n  double& mx_real = numext::real_ref(*Mx);\n  return umfpack_zl_get_determinant(&mx_real,0,Ex,NumericHandle,User_Info);\n}\n\n\n/** \\ingroup UmfPackSupport_Module\n  * \\brief A sparse LU factorization and solver based on UmfPack\n  *\n  * This class allows to solve for A.X = B sparse linear problems via a LU factorization\n  * using the UmfPack library. The sparse matrix A must be squared and full rank.\n  * The vectors or matrices X and B can be either dense or sparse.\n  *\n  * \\warning The input matrix A should be in a \\b compressed and \\b column-major form.\n  * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.\n  * \\tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>\n  *\n  * \\implsparsesolverconcept\n  *\n  * \\sa \\ref TutorialSparseSolverConcept, class SparseLU\n  */\ntemplate<typename _MatrixType>\nclass UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >\n{\n  protected:\n    typedef SparseSolverBase<UmfPackLU<_MatrixType> > Base;\n    using Base::m_isInitialized;\n  public:\n    using Base::_solve_impl;\n    typedef _MatrixType MatrixType;\n    typedef typename MatrixType::Scalar Scalar;\n    typedef typename MatrixType::RealScalar RealScalar;\n    typedef typename MatrixType::StorageIndex StorageIndex;\n    typedef Matrix<Scalar,Dynamic,1> Vector;\n    typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;\n    typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;\n    typedef SparseMatrix<Scalar> LUMatrixType;\n    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> UmfpackMatrixType;\n    typedef Ref<const UmfpackMatrixType, StandardCompressedFormat> UmfpackMatrixRef;\n    enum {\n      ColsAtCompileTime = MatrixType::ColsAtCompileTime,\n      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime\n    };\n\n  public:\n\n    typedef Array<double, UMFPACK_CONTROL, 1> UmfpackControl;\n    typedef Array<double, UMFPACK_INFO, 1> UmfpackInfo;\n\n    UmfPackLU()\n      : m_dummy(0,0), mp_matrix(m_dummy)\n    {\n      init();\n    }\n\n    template<typename InputMatrixType>\n    explicit UmfPackLU(const InputMatrixType& matrix)\n      : mp_matrix(matrix)\n    {\n      init();\n      compute(matrix);\n    }\n\n    ~UmfPackLU()\n    {\n      if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar(), StorageIndex());\n      if(m_numeric)  umfpack_free_numeric(&m_numeric,Scalar(), StorageIndex());\n    }\n\n    inline Index rows() const { return mp_matrix.rows(); }\n    inline Index cols() const { return mp_matrix.cols(); }\n\n    /** \\brief Reports whether previous computation was successful.\n      *\n      * \\returns \\c Success if computation was successful,\n      *          \\c NumericalIssue if the matrix.appears to be negative.\n      */\n    ComputationInfo info() const\n    {\n      eigen_assert(m_isInitialized && \"Decomposition is not initialized.\");\n      return m_info;\n    }\n\n    inline const LUMatrixType& matrixL() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_l;\n    }\n\n    inline const LUMatrixType& matrixU() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_u;\n    }\n\n    inline const IntColVectorType& permutationP() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_p;\n    }\n\n    inline const IntRowVectorType& permutationQ() const\n    {\n      if (m_extractedDataAreDirty) extractData();\n      return m_q;\n    }\n\n    /** Computes the sparse Cholesky decomposition of \\a matrix\n     *  Note that the matrix should be column-major, and in compressed format for best performance.\n     *  \\sa SparseMatrix::makeCompressed().\n     */\n    template<typename InputMatrixType>\n    void compute(const InputMatrixType& matrix)\n    {\n      if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar(),StorageIndex());\n      if(m_numeric)  umfpack_free_numeric(&m_numeric,Scalar(),StorageIndex());\n      grab(matrix.derived());\n      analyzePattern_impl();\n      factorize_impl();\n    }\n\n    /** Performs a symbolic decomposition on the sparcity of \\a matrix.\n      *\n      * This function is particularly useful when solving for several problems having the same structure.\n      *\n      * \\sa factorize(), compute()\n      */\n    template<typename InputMatrixType>\n    void analyzePattern(const InputMatrixType& matrix)\n    {\n      if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar(),StorageIndex());\n      if(m_numeric)  umfpack_free_numeric(&m_numeric,Scalar(),StorageIndex());\n\n      grab(matrix.derived());\n\n      analyzePattern_impl();\n    }\n\n    /** Provides the return status code returned by UmfPack during the numeric\n      * factorization.\n      *\n      * \\sa factorize(), compute()\n      */\n    inline int umfpackFactorizeReturncode() const\n    {\n      eigen_assert(m_numeric && \"UmfPackLU: you must first call factorize()\");\n      return m_fact_errorCode;\n    }\n\n    /** Provides access to the control settings array used by UmfPack.\n      *\n      * If this array contains NaN's, the default values are used.\n      *\n      * See UMFPACK documentation for details.\n      */\n    inline const UmfpackControl& umfpackControl() const\n    {\n      return m_control;\n    }\n\n    /** Provides access to the control settings array used by UmfPack.\n      *\n      * If this array contains NaN's, the default values are used.\n      *\n      * See UMFPACK documentation for details.\n      */\n    inline UmfpackControl& umfpackControl()\n    {\n      return m_control;\n    }\n\n    /** Performs a numeric decomposition of \\a matrix\n      *\n      * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed.\n      *\n      * \\sa analyzePattern(), compute()\n      */\n    template<typename InputMatrixType>\n    void factorize(const InputMatrixType& matrix)\n    {\n      eigen_assert(m_analysisIsOk && \"UmfPackLU: you must first call analyzePattern()\");\n      if(m_numeric)\n        umfpack_free_numeric(&m_numeric,Scalar(),StorageIndex());\n\n      grab(matrix.derived());\n\n      factorize_impl();\n    }\n\n    /** Prints the current UmfPack control settings.\n      *\n      * \\sa umfpackControl()\n      */\n    void printUmfpackControl()\n    {\n      umfpack_report_control(m_control.data(), Scalar(),StorageIndex());\n    }\n\n    /** Prints statistics collected by UmfPack.\n      *\n      * \\sa analyzePattern(), compute()\n      */\n    void printUmfpackInfo()\n    {\n      eigen_assert(m_analysisIsOk && \"UmfPackLU: you must first call analyzePattern()\");\n      umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar(),StorageIndex());\n    }\n\n    /** Prints the status of the previous factorization operation performed by UmfPack (symbolic or numerical factorization).\n      *\n      * \\sa analyzePattern(), compute()\n      */\n    void printUmfpackStatus() {\n      eigen_assert(m_analysisIsOk && \"UmfPackLU: you must first call analyzePattern()\");\n      umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar(),StorageIndex());\n    }\n\n    /** \\internal */\n    template<typename BDerived,typename XDerived>\n    bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;\n\n    Scalar determinant() const;\n\n    void extractData() const;\n\n  protected:\n\n    void init()\n    {\n      m_info                  = InvalidInput;\n      m_isInitialized         = false;\n      m_numeric               = 0;\n      m_symbolic              = 0;\n      m_extractedDataAreDirty = true;\n\n      umfpack_defaults(m_control.data(), Scalar(),StorageIndex());\n    }\n\n    void analyzePattern_impl()\n    {\n      m_fact_errorCode = umfpack_symbolic(internal::convert_index<StorageIndex>(mp_matrix.rows()),\n                                          internal::convert_index<StorageIndex>(mp_matrix.cols()),\n                                          mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),\n                                          &m_symbolic, m_control.data(), m_umfpackInfo.data());\n\n      m_isInitialized = true;\n      m_info = m_fact_errorCode ? InvalidInput : Success;\n      m_analysisIsOk = true;\n      m_factorizationIsOk = false;\n      m_extractedDataAreDirty = true;\n    }\n\n    void factorize_impl()\n    {\n\n      m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),\n                                         m_symbolic, &m_numeric, m_control.data(), m_umfpackInfo.data());\n\n      m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue;\n      m_factorizationIsOk = true;\n      m_extractedDataAreDirty = true;\n    }\n\n    template<typename MatrixDerived>\n    void grab(const EigenBase<MatrixDerived> &A)\n    {\n      mp_matrix.~UmfpackMatrixRef();\n      ::new (&mp_matrix) UmfpackMatrixRef(A.derived());\n    }\n\n    void grab(const UmfpackMatrixRef &A)\n    {\n      if(&(A.derived()) != &mp_matrix)\n      {\n        mp_matrix.~UmfpackMatrixRef();\n        ::new (&mp_matrix) UmfpackMatrixRef(A);\n      }\n    }\n\n    // cached data to reduce reallocation, etc.\n    mutable LUMatrixType m_l;\n    StorageIndex m_fact_errorCode;\n    UmfpackControl m_control;\n    mutable UmfpackInfo m_umfpackInfo;\n\n    mutable LUMatrixType m_u;\n    mutable IntColVectorType m_p;\n    mutable IntRowVectorType m_q;\n\n    UmfpackMatrixType m_dummy;\n    UmfpackMatrixRef mp_matrix;\n\n    void* m_numeric;\n    void* m_symbolic;\n\n    mutable ComputationInfo m_info;\n    int m_factorizationIsOk;\n    int m_analysisIsOk;\n    mutable bool m_extractedDataAreDirty;\n\n  private:\n    UmfPackLU(const UmfPackLU& ) { }\n};\n\n\ntemplate<typename MatrixType>\nvoid UmfPackLU<MatrixType>::extractData() const\n{\n  if (m_extractedDataAreDirty)\n  {\n    // get size of the data\n    StorageIndex lnz, unz, rows, cols, nz_udiag;\n    umfpack_get_lunz(&lnz, &unz, &rows, &cols, &nz_udiag, m_numeric, Scalar());\n\n    // allocate data\n    m_l.resize(rows,(std::min)(rows,cols));\n    m_l.resizeNonZeros(lnz);\n\n    m_u.resize((std::min)(rows,cols),cols);\n    m_u.resizeNonZeros(unz);\n\n    m_p.resize(rows);\n    m_q.resize(cols);\n\n    // extract\n    umfpack_get_numeric(m_l.outerIndexPtr(), m_l.innerIndexPtr(), m_l.valuePtr(),\n                        m_u.outerIndexPtr(), m_u.innerIndexPtr(), m_u.valuePtr(),\n                        m_p.data(), m_q.data(), 0, 0, 0, m_numeric);\n\n    m_extractedDataAreDirty = false;\n  }\n}\n\ntemplate<typename MatrixType>\ntypename UmfPackLU<MatrixType>::Scalar UmfPackLU<MatrixType>::determinant() const\n{\n  Scalar det;\n  umfpack_get_determinant(&det, 0, m_numeric, 0, StorageIndex());\n  return det;\n}\n\ntemplate<typename MatrixType>\ntemplate<typename BDerived,typename XDerived>\nbool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const\n{\n  Index rhsCols = b.cols();\n  eigen_assert((BDerived::Flags&RowMajorBit)==0 && \"UmfPackLU backend does not support non col-major rhs yet\");\n  eigen_assert((XDerived::Flags&RowMajorBit)==0 && \"UmfPackLU backend does not support non col-major result yet\");\n  eigen_assert(b.derived().data() != x.derived().data() && \" Umfpack does not support inplace solve\");\n\n  Scalar* x_ptr = 0;\n  Matrix<Scalar,Dynamic,1> x_tmp;\n  if(x.innerStride()!=1)\n  {\n    x_tmp.resize(x.rows());\n    x_ptr = x_tmp.data();\n  }\n  for (int j=0; j<rhsCols; ++j)\n  {\n    if(x.innerStride()==1)\n      x_ptr = &x.col(j).coeffRef(0);\n    StorageIndex errorCode = umfpack_solve(UMFPACK_A,\n                                mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),\n                                x_ptr, &b.const_cast_derived().col(j).coeffRef(0),\n                                m_numeric, m_control.data(), m_umfpackInfo.data());\n    if(x.innerStride()!=1)\n      x.col(j) = x_tmp;\n    if (errorCode!=0)\n      return false;\n  }\n\n  return true;\n}\n\n} // end namespace Eigen\n\n#endif // EIGEN_UMFPACKSUPPORT_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/Image.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MISC_IMAGE_H\n#define EIGEN_MISC_IMAGE_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\class image_retval_base\n  *\n  */\ntemplate<typename DecompositionType>\nstruct traits<image_retval_base<DecompositionType> >\n{\n  typedef typename DecompositionType::MatrixType MatrixType;\n  typedef Matrix<\n    typename MatrixType::Scalar,\n    MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose\n                                   // dimension is the number of rows of the original matrix\n    Dynamic,                       // we don't know at compile time the dimension of the image (the rank)\n    MatrixType::Options,\n    MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,\n    MatrixType::MaxColsAtCompileTime  // so it has the same number of rows and at most as many columns.\n  > ReturnType;\n};\n\ntemplate<typename _DecompositionType> struct image_retval_base\n : public ReturnByValue<image_retval_base<_DecompositionType> >\n{\n  typedef _DecompositionType DecompositionType;\n  typedef typename DecompositionType::MatrixType MatrixType;\n  typedef ReturnByValue<image_retval_base> Base;\n\n  image_retval_base(const DecompositionType& dec, const MatrixType& originalMatrix)\n    : m_dec(dec), m_rank(dec.rank()),\n      m_cols(m_rank == 0 ? 1 : m_rank),\n      m_originalMatrix(originalMatrix)\n  {}\n\n  inline Index rows() const { return m_dec.rows(); }\n  inline Index cols() const { return m_cols; }\n  inline Index rank() const { return m_rank; }\n  inline const DecompositionType& dec() const { return m_dec; }\n  inline const MatrixType& originalMatrix() const { return m_originalMatrix; }\n\n  template<typename Dest> inline void evalTo(Dest& dst) const\n  {\n    static_cast<const image_retval<DecompositionType>*>(this)->evalTo(dst);\n  }\n\n  protected:\n    const DecompositionType& m_dec;\n    Index m_rank, m_cols;\n    const MatrixType& m_originalMatrix;\n};\n\n} // end namespace internal\n\n#define EIGEN_MAKE_IMAGE_HELPERS(DecompositionType) \\\n  typedef typename DecompositionType::MatrixType MatrixType; \\\n  typedef typename MatrixType::Scalar Scalar; \\\n  typedef typename MatrixType::RealScalar RealScalar; \\\n  typedef Eigen::internal::image_retval_base<DecompositionType> Base; \\\n  using Base::dec; \\\n  using Base::originalMatrix; \\\n  using Base::rank; \\\n  using Base::rows; \\\n  using Base::cols; \\\n  image_retval(const DecompositionType& dec, const MatrixType& originalMatrix) \\\n    : Base(dec, originalMatrix) {}\n\n} // end namespace Eigen\n\n#endif // EIGEN_MISC_IMAGE_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/Kernel.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_MISC_KERNEL_H\n#define EIGEN_MISC_KERNEL_H\n\nnamespace Eigen { \n\nnamespace internal {\n\n/** \\class kernel_retval_base\n  *\n  */\ntemplate<typename DecompositionType>\nstruct traits<kernel_retval_base<DecompositionType> >\n{\n  typedef typename DecompositionType::MatrixType MatrixType;\n  typedef Matrix<\n    typename MatrixType::Scalar,\n    MatrixType::ColsAtCompileTime, // the number of rows in the \"kernel matrix\"\n                                   // is the number of cols of the original matrix\n                                   // so that the product \"matrix * kernel = zero\" makes sense\n    Dynamic,                       // we don't know at compile-time the dimension of the kernel\n    MatrixType::Options,\n    MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter\n    MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space,\n                                     // whose dimension is the number of columns of the original matrix\n  > ReturnType;\n};\n\ntemplate<typename _DecompositionType> struct kernel_retval_base\n : public ReturnByValue<kernel_retval_base<_DecompositionType> >\n{\n  typedef _DecompositionType DecompositionType;\n  typedef ReturnByValue<kernel_retval_base> Base;\n\n  explicit kernel_retval_base(const DecompositionType& dec)\n    : m_dec(dec),\n      m_rank(dec.rank()),\n      m_cols(m_rank==dec.cols() ? 1 : dec.cols() - m_rank)\n  {}\n\n  inline Index rows() const { return m_dec.cols(); }\n  inline Index cols() const { return m_cols; }\n  inline Index rank() const { return m_rank; }\n  inline const DecompositionType& dec() const { return m_dec; }\n\n  template<typename Dest> inline void evalTo(Dest& dst) const\n  {\n    static_cast<const kernel_retval<DecompositionType>*>(this)->evalTo(dst);\n  }\n\n  protected:\n    const DecompositionType& m_dec;\n    Index m_rank, m_cols;\n};\n\n} // end namespace internal\n\n#define EIGEN_MAKE_KERNEL_HELPERS(DecompositionType) \\\n  typedef typename DecompositionType::MatrixType MatrixType; \\\n  typedef typename MatrixType::Scalar Scalar; \\\n  typedef typename MatrixType::RealScalar RealScalar; \\\n  typedef Eigen::internal::kernel_retval_base<DecompositionType> Base; \\\n  using Base::dec; \\\n  using Base::rank; \\\n  using Base::rows; \\\n  using Base::cols; \\\n  kernel_retval(const DecompositionType& dec) : Base(dec) {}\n\n} // end namespace Eigen\n\n#endif // EIGEN_MISC_KERNEL_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/RealSvd2x2.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n// Copyright (C) 2013-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_REALSVD2X2_H\n#define EIGEN_REALSVD2X2_H\n\nnamespace Eigen {\n\nnamespace internal {\n\ntemplate<typename MatrixType, typename RealScalar, typename Index>\nvoid real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,\n                         JacobiRotation<RealScalar> *j_left,\n                         JacobiRotation<RealScalar> *j_right)\n{\n  using std::sqrt;\n  using std::abs;\n  Matrix<RealScalar,2,2> m;\n  m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),\n       numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));\n  JacobiRotation<RealScalar> rot1;\n  RealScalar t = m.coeff(0,0) + m.coeff(1,1);\n  RealScalar d = m.coeff(1,0) - m.coeff(0,1);\n\n  if(abs(d) < (std::numeric_limits<RealScalar>::min)())\n  {\n    rot1.s() = RealScalar(0);\n    rot1.c() = RealScalar(1);\n  }\n  else\n  {\n    // If d!=0, then t/d cannot overflow because the magnitude of the\n    // entries forming d are not too small compared to the ones forming t.\n    RealScalar u = t / d;\n    RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));\n    rot1.s() = RealScalar(1) / tmp;\n    rot1.c() = u / tmp;\n  }\n  m.applyOnTheLeft(0,1,rot1);\n  j_right->makeJacobi(m,0,1);\n  *j_left = rot1 * j_right->transpose();\n}\n\n} // end namespace internal\n\n} // end namespace Eigen\n\n#endif // EIGEN_REALSVD2X2_H\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/blas.h",
    "content": "#ifndef BLAS_H\n#define BLAS_H\n\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\n#define BLASFUNC(FUNC) FUNC##_\n\n#ifdef __WIN64__\ntypedef long long BLASLONG;\ntypedef unsigned long long BLASULONG;\n#else\ntypedef long BLASLONG;\ntypedef unsigned long BLASULONG;\n#endif\n\nint    BLASFUNC(xerbla)(const char *, int *info, int);\n\nfloat  BLASFUNC(sdot)  (int *, float  *, int *, float  *, int *);\nfloat  BLASFUNC(sdsdot)(int *, float  *,        float  *, int *, float  *, int *);\n\ndouble BLASFUNC(dsdot) (int *, float  *, int *, float  *, int *);\ndouble BLASFUNC(ddot)  (int *, double *, int *, double *, int *);\ndouble BLASFUNC(qdot)  (int *, double *, int *, double *, int *);\n\nint  BLASFUNC(cdotuw)  (int *, float  *, int *, float  *, int *, float*);\nint  BLASFUNC(cdotcw)  (int *, float  *, int *, float  *, int *, float*);\nint  BLASFUNC(zdotuw)  (int *, double  *, int *, double  *, int *, double*);\nint  BLASFUNC(zdotcw)  (int *, double  *, int *, double  *, int *, double*);\n\nint    BLASFUNC(saxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);\nint    BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *);\nint    BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *);\nint    BLASFUNC(caxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);\nint    BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *);\nint    BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *);\nint    BLASFUNC(caxpyc)(const int *, const float  *, const float  *, const int *, float  *, const int *);\nint    BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);\nint    BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);\n\nint    BLASFUNC(scopy) (int *, float  *, int *, float  *, int *);\nint    BLASFUNC(dcopy) (int *, double *, int *, double *, int *);\nint    BLASFUNC(qcopy) (int *, double *, int *, double *, int *);\nint    BLASFUNC(ccopy) (int *, float  *, int *, float  *, int *);\nint    BLASFUNC(zcopy) (int *, double *, int *, double *, int *);\nint    BLASFUNC(xcopy) (int *, double *, int *, double *, int *);\n\nint    BLASFUNC(sswap) (int *, float  *, int *, float  *, int *);\nint    BLASFUNC(dswap) (int *, double *, int *, double *, int *);\nint    BLASFUNC(qswap) (int *, double *, int *, double *, int *);\nint    BLASFUNC(cswap) (int *, float  *, int *, float  *, int *);\nint    BLASFUNC(zswap) (int *, double *, int *, double *, int *);\nint    BLASFUNC(xswap) (int *, double *, int *, double *, int *);\n\nfloat  BLASFUNC(sasum) (int *, float  *, int *);\nfloat  BLASFUNC(scasum)(int *, float  *, int *);\ndouble BLASFUNC(dasum) (int *, double *, int *);\ndouble BLASFUNC(qasum) (int *, double *, int *);\ndouble BLASFUNC(dzasum)(int *, double *, int *);\ndouble BLASFUNC(qxasum)(int *, double *, int *);\n\nint    BLASFUNC(isamax)(int *, float  *, int *);\nint    BLASFUNC(idamax)(int *, double *, int *);\nint    BLASFUNC(iqamax)(int *, double *, int *);\nint    BLASFUNC(icamax)(int *, float  *, int *);\nint    BLASFUNC(izamax)(int *, double *, int *);\nint    BLASFUNC(ixamax)(int *, double *, int *);\n\nint    BLASFUNC(ismax) (int *, float  *, int *);\nint    BLASFUNC(idmax) (int *, double *, int *);\nint    BLASFUNC(iqmax) (int *, double *, int *);\nint    BLASFUNC(icmax) (int *, float  *, int *);\nint    BLASFUNC(izmax) (int *, double *, int *);\nint    BLASFUNC(ixmax) (int *, double *, int *);\n\nint    BLASFUNC(isamin)(int *, float  *, int *);\nint    BLASFUNC(idamin)(int *, double *, int *);\nint    BLASFUNC(iqamin)(int *, double *, int *);\nint    BLASFUNC(icamin)(int *, float  *, int *);\nint    BLASFUNC(izamin)(int *, double *, int *);\nint    BLASFUNC(ixamin)(int *, double *, int *);\n\nint    BLASFUNC(ismin)(int *, float  *, int *);\nint    BLASFUNC(idmin)(int *, double *, int *);\nint    BLASFUNC(iqmin)(int *, double *, int *);\nint    BLASFUNC(icmin)(int *, float  *, int *);\nint    BLASFUNC(izmin)(int *, double *, int *);\nint    BLASFUNC(ixmin)(int *, double *, int *);\n\nfloat  BLASFUNC(samax) (int *, float  *, int *);\ndouble BLASFUNC(damax) (int *, double *, int *);\ndouble BLASFUNC(qamax) (int *, double *, int *);\nfloat  BLASFUNC(scamax)(int *, float  *, int *);\ndouble BLASFUNC(dzamax)(int *, double *, int *);\ndouble BLASFUNC(qxamax)(int *, double *, int *);\n\nfloat  BLASFUNC(samin) (int *, float  *, int *);\ndouble BLASFUNC(damin) (int *, double *, int *);\ndouble BLASFUNC(qamin) (int *, double *, int *);\nfloat  BLASFUNC(scamin)(int *, float  *, int *);\ndouble BLASFUNC(dzamin)(int *, double *, int *);\ndouble BLASFUNC(qxamin)(int *, double *, int *);\n\nfloat  BLASFUNC(smax)  (int *, float  *, int *);\ndouble BLASFUNC(dmax)  (int *, double *, int *);\ndouble BLASFUNC(qmax)  (int *, double *, int *);\nfloat  BLASFUNC(scmax) (int *, float  *, int *);\ndouble BLASFUNC(dzmax) (int *, double *, int *);\ndouble BLASFUNC(qxmax) (int *, double *, int *);\n\nfloat  BLASFUNC(smin)  (int *, float  *, int *);\ndouble BLASFUNC(dmin)  (int *, double *, int *);\ndouble BLASFUNC(qmin)  (int *, double *, int *);\nfloat  BLASFUNC(scmin) (int *, float  *, int *);\ndouble BLASFUNC(dzmin) (int *, double *, int *);\ndouble BLASFUNC(qxmin) (int *, double *, int *);\n\nint    BLASFUNC(sscal) (int *,  float  *, float  *, int *);\nint    BLASFUNC(dscal) (int *,  double *, double *, int *);\nint    BLASFUNC(qscal) (int *,  double *, double *, int *);\nint    BLASFUNC(cscal) (int *,  float  *, float  *, int *);\nint    BLASFUNC(zscal) (int *,  double *, double *, int *);\nint    BLASFUNC(xscal) (int *,  double *, double *, int *);\nint    BLASFUNC(csscal)(int *,  float  *, float  *, int *);\nint    BLASFUNC(zdscal)(int *,  double *, double *, int *);\nint    BLASFUNC(xqscal)(int *,  double *, double *, int *);\n\nfloat  BLASFUNC(snrm2) (int *, float  *, int *);\nfloat  BLASFUNC(scnrm2)(int *, float  *, int *);\n\ndouble BLASFUNC(dnrm2) (int *, double *, int *);\ndouble BLASFUNC(qnrm2) (int *, double *, int *);\ndouble BLASFUNC(dznrm2)(int *, double *, int *);\ndouble BLASFUNC(qxnrm2)(int *, double *, int *);\n\nint    BLASFUNC(srot)  (int *, float  *, int *, float  *, int *, float  *, float  *);\nint    BLASFUNC(drot)  (int *, double *, int *, double *, int *, double *, double *);\nint    BLASFUNC(qrot)  (int *, double *, int *, double *, int *, double *, double *);\nint    BLASFUNC(csrot) (int *, float  *, int *, float  *, int *, float  *, float  *);\nint    BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);\nint    BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);\n\nint    BLASFUNC(srotg) (float  *, float  *, float  *, float  *);\nint    BLASFUNC(drotg) (double *, double *, double *, double *);\nint    BLASFUNC(qrotg) (double *, double *, double *, double *);\nint    BLASFUNC(crotg) (float  *, float  *, float  *, float  *);\nint    BLASFUNC(zrotg) (double *, double *, double *, double *);\nint    BLASFUNC(xrotg) (double *, double *, double *, double *);\n\nint    BLASFUNC(srotmg)(float  *, float  *, float  *, float  *, float  *);\nint    BLASFUNC(drotmg)(double *, double *, double *, double *, double *);\n\nint    BLASFUNC(srotm) (int *, float  *, int *, float  *, int *, float  *);\nint    BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);\nint    BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);\n\n/* Level 2 routines */\n\nint BLASFUNC(sger)(int *,    int *, float *,  float *, int *,\n\t\t   float *,  int *, float *,  int *);\nint BLASFUNC(dger)(int *,    int *, double *, double *, int *,\n\t\t   double *, int *, double *, int *);\nint BLASFUNC(qger)(int *,    int *, double *, double *, int *,\n\t\t   double *, int *, double *, int *);\nint BLASFUNC(cgeru)(int *,    int *, float *,  float *, int *,\n\t\t    float *,  int *, float *,  int *);\nint BLASFUNC(cgerc)(int *,    int *, float *,  float *, int *,\n\t\t    float *,  int *, float *,  int *);\nint BLASFUNC(zgeru)(int *,    int *, double *, double *, int *,\n\t\t    double *, int *, double *, int *);\nint BLASFUNC(zgerc)(int *,    int *, double *, double *, int *,\n\t\t    double *, int *, double *, int *);\nint BLASFUNC(xgeru)(int *,    int *, double *, double *, int *,\n\t\t    double *, int *, double *, int *);\nint BLASFUNC(xgerc)(int *,    int *, double *, double *, int *,\n\t\t    double *, int *, double *, int *);\n\nint BLASFUNC(sgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(cgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\n\nint BLASFUNC(stpsv) (char *, char *, char *, int *, float  *, float  *, int *);\nint BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);\nint BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);\nint BLASFUNC(ctpsv) (char *, char *, char *, int *, float  *, float  *, int *);\nint BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);\nint BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);\n\nint BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);\n\nint BLASFUNC(stpmv) (char *, char *, char *, int *, float  *, float  *, int *);\nint BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);\nint BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);\nint BLASFUNC(ctpmv) (char *, char *, char *, int *, float  *, float  *, int *);\nint BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);\nint BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);\n\nint BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);\nint BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\nint BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\nint BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);\nint BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\nint BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\n\nint BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);\nint BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\nint BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\nint BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);\nint BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\nint BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);\n\nint BLASFUNC(ssymv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(sspmv) (char *, int *, float  *, float *,\n\t\t     float  *, int *, float *, float *, int *);\nint BLASFUNC(dspmv) (char *, int *, double  *, double *,\n\t\t     double  *, int *, double *, double *, int *);\nint BLASFUNC(qspmv) (char *, int *, double  *, double *,\n\t\t     double  *, int *, double *, double *, int *);\n\nint BLASFUNC(ssyr) (const char *, const int *, const float   *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(dsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);\nint BLASFUNC(qsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);\n\nint BLASFUNC(ssyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(dsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(qsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(csyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);\nint BLASFUNC(zsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);\nint BLASFUNC(xsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);\n\nint BLASFUNC(sspr) (char *, int *, float   *, float  *, int *,\n\t\t    float  *);\nint BLASFUNC(dspr) (char *, int *, double  *, double *, int *,\n\t\t    double *);\nint BLASFUNC(qspr) (char *, int *, double  *, double *, int *,\n\t\t    double *);\n\nint BLASFUNC(sspr2) (char *, int *, float   *,\n\t\t     float  *, int *, float  *, int *, float  *);\nint BLASFUNC(dspr2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *);\nint BLASFUNC(qspr2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *);\nint BLASFUNC(cspr2) (char *, int *, float   *,\n\t\t     float  *, int *, float  *, int *, float  *);\nint BLASFUNC(zspr2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *);\nint BLASFUNC(xspr2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *);\n\nint BLASFUNC(cher) (char *, int *, float   *, float  *, int *,\n\t\t    float  *, int *);\nint BLASFUNC(zher) (char *, int *, double  *, double *, int *,\n\t\t    double *, int *);\nint BLASFUNC(xher) (char *, int *, double  *, double *, int *,\n\t\t    double *, int *);\n\nint BLASFUNC(chpr) (char *, int *, float   *, float  *, int *, float  *);\nint BLASFUNC(zhpr) (char *, int *, double  *, double *, int *, double *);\nint BLASFUNC(xhpr) (char *, int *, double  *, double *, int *, double *);\n\nint BLASFUNC(cher2) (char *, int *, float   *,\n\t\t     float  *, int *, float  *, int *, float  *, int *);\nint BLASFUNC(zher2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *, int *);\nint BLASFUNC(xher2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *, int *);\n\nint BLASFUNC(chpr2) (char *, int *, float   *,\n\t\t     float  *, int *, float  *, int *, float  *);\nint BLASFUNC(zhpr2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *);\nint BLASFUNC(xhpr2) (char *, int *, double  *,\n\t\t     double *, int *, double *, int *, double *);\n\nint BLASFUNC(chemv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(chpmv) (char *, int *, float  *, float *,\n\t\t     float  *, int *, float *, float *, int *);\nint BLASFUNC(zhpmv) (char *, int *, double  *, double *,\n\t\t     double  *, int *, double *, double *, int *);\nint BLASFUNC(xhpmv) (char *, int *, double  *, double *,\n\t\t     double  *, int *, double *, double *, int *);\n\nint BLASFUNC(snorm)(char *, int *, int *, float  *, int *);\nint BLASFUNC(dnorm)(char *, int *, int *, double *, int *);\nint BLASFUNC(cnorm)(char *, int *, int *, float  *, int *);\nint BLASFUNC(znorm)(char *, int *, int *, double *, int *);\n\nint BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,\n\t\t    float  *, int *, float  *, float  *, int *);\nint BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,\n\t\t    float  *, int *, float  *, float  *, int *);\nint BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\n\nint BLASFUNC(ssbmv)(char *, int *, int *, float  *, float  *, int *,\n\t\t    float  *, int *, float  *, float  *, int *);\nint BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(csbmv)(char *, int *, int *, float  *, float  *, int *,\n\t\t    float  *, int *, float  *, float  *, int *);\nint BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\n\nint BLASFUNC(chbmv)(char *, int *, int *, float  *, float  *, int *,\n\t\t    float  *, int *, float  *, float  *, int *);\nint BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\nint BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,\n\t\t    double *, int *, double *, double *, int *);\n\n/* Level 3 routines */\n\nint BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,\n\t   float  *, int *, float  *, int *, float  *, float  *, int *);\nint BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,\n\t   double *, int *, double *, int *, double *, double *, int *);\nint BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,\n\t   double *, int *, double *, int *, double *, double *, int *);\n\nint BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,\n\t\t     float *, float  *, int *, float  *, int *,\n\t\t     float *, float  *, int *);\nint BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,\n\t\t     double *, double  *, int *, double  *, int *,\n\t\t     double *, double  *, int *);\nint BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,\n\t\t     float *, float  *, int *, float  *, int *,\n\t\t     float *, float  *, int *);\nint BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,\n\t\t     double *, double  *, int *, double  *, int *,\n\t\t     double *, double  *, int *);\n\nint BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);\nint BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\nint BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\nint BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);\nint BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\nint BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\n\nint BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);\nint BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\nint BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\nint BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);\nint BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\nint BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);\n\nint BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(csymm3m)(char *, char *, int *, int *, float  *, float  *, int *, float  *, int *, float  *, float  *, int *);\nint BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);\nint BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);\n\nint BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);\nint BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);\nint BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);\nint BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);\n\nint BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(chemm3m)(char *, char *, int *, int *, float  *, float  *, int *,\n\t   float  *, int *, float  *, float  *, int *);\nint BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,\n\t   double *, int *, double *, double *, int *);\nint BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,\n\t   double *, int *, double *, double *, int *);\n\nint BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);\n\nint BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);\nint BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);\n\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/lapack.h",
    "content": "#ifndef LAPACK_H\n#define LAPACK_H\n\n#include \"blas.h\"\n\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\nint BLASFUNC(csymv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);\nint BLASFUNC(zsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\nint BLASFUNC(xsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);\n\n\nint BLASFUNC(cspmv) (char *, int *, float  *, float *,\n         float  *, int *, float *, float *, int *);\nint BLASFUNC(zspmv) (char *, int *, double  *, double *,\n         double  *, int *, double *, double *, int *);\nint BLASFUNC(xspmv) (char *, int *, double  *, double *,\n         double  *, int *, double *, double *, int *);\n\nint BLASFUNC(csyr) (char *, int *, float   *, float  *, int *,\n        float  *, int *);\nint BLASFUNC(zsyr) (char *, int *, double  *, double *, int *,\n        double *, int *);\nint BLASFUNC(xsyr) (char *, int *, double  *, double *, int *,\n        double *, int *);\n\nint BLASFUNC(cspr) (char *, int *, float   *, float  *, int *,\n        float  *);\nint BLASFUNC(zspr) (char *, int *, double  *, double *, int *,\n        double *);\nint BLASFUNC(xspr) (char *, int *, double  *, double *, int *,\n        double *);\n\nint BLASFUNC(sgemt)(char *, int *, int *, float  *, float  *, int *,\n        float  *, int *);\nint BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,\n        double *, int *);\nint BLASFUNC(cgemt)(char *, int *, int *, float  *, float  *, int *,\n        float  *, int *);\nint BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,\n        double *, int *);\n\nint BLASFUNC(sgema)(char *, char *, int *, int *, float  *,\n        float  *, int *, float *, float  *, int *, float *, int *);\nint BLASFUNC(dgema)(char *, char *, int *, int *, double *,\n        double *, int *, double*, double *, int *, double*, int *);\nint BLASFUNC(cgema)(char *, char *, int *, int *, float  *,\n        float  *, int *, float *, float  *, int *, float *, int *);\nint BLASFUNC(zgema)(char *, char *, int *, int *, double *,\n        double *, int *, double*, double *, int *, double*, int *);\n\nint BLASFUNC(sgems)(char *, char *, int *, int *, float  *,\n        float  *, int *, float *, float  *, int *, float *, int *);\nint BLASFUNC(dgems)(char *, char *, int *, int *, double *,\n        double *, int *, double*, double *, int *, double*, int *);\nint BLASFUNC(cgems)(char *, char *, int *, int *, float  *,\n        float  *, int *, float *, float  *, int *, float *, int *);\nint BLASFUNC(zgems)(char *, char *, int *, int *, double *,\n        double *, int *, double*, double *, int *, double*, int *);\n\nint BLASFUNC(sgetf2)(int *, int *, float  *, int *, int *, int *);\nint BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);\nint BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);\nint BLASFUNC(cgetf2)(int *, int *, float  *, int *, int *, int *);\nint BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);\nint BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);\n\nint BLASFUNC(sgetrf)(int *, int *, float  *, int *, int *, int *);\nint BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);\nint BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);\nint BLASFUNC(cgetrf)(int *, int *, float  *, int *, int *, int *);\nint BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);\nint BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);\n\nint BLASFUNC(slaswp)(int *, float  *, int *, int *, int *, int *, int *);\nint BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);\nint BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);\nint BLASFUNC(claswp)(int *, float  *, int *, int *, int *, int *, int *);\nint BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);\nint BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);\n\nint BLASFUNC(sgetrs)(char *, int *, int *, float  *, int *, int *, float  *, int *, int *);\nint BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);\nint BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);\nint BLASFUNC(cgetrs)(char *, int *, int *, float  *, int *, int *, float  *, int *, int *);\nint BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);\nint BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);\n\nint BLASFUNC(sgesv)(int *, int *, float  *, int *, int *, float *, int *, int *);\nint BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);\nint BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);\nint BLASFUNC(cgesv)(int *, int *, float  *, int *, int *, float *, int *, int *);\nint BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);\nint BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);\n\nint BLASFUNC(spotf2)(char *, int *, float  *, int *, int *);\nint BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);\nint BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);\nint BLASFUNC(cpotf2)(char *, int *, float  *, int *, int *);\nint BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);\nint BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);\n\nint BLASFUNC(spotrf)(char *, int *, float  *, int *, int *);\nint BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);\nint BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);\nint BLASFUNC(cpotrf)(char *, int *, float  *, int *, int *);\nint BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);\nint BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);\n\nint BLASFUNC(slauu2)(char *, int *, float  *, int *, int *);\nint BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);\nint BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);\nint BLASFUNC(clauu2)(char *, int *, float  *, int *, int *);\nint BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);\nint BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);\n\nint BLASFUNC(slauum)(char *, int *, float  *, int *, int *);\nint BLASFUNC(dlauum)(char *, int *, double *, int *, int *);\nint BLASFUNC(qlauum)(char *, int *, double *, int *, int *);\nint BLASFUNC(clauum)(char *, int *, float  *, int *, int *);\nint BLASFUNC(zlauum)(char *, int *, double *, int *, int *);\nint BLASFUNC(xlauum)(char *, int *, double *, int *, int *);\n\nint BLASFUNC(strti2)(char *, char *, int *, float  *, int *, int *);\nint BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);\nint BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);\nint BLASFUNC(ctrti2)(char *, char *, int *, float  *, int *, int *);\nint BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);\nint BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);\n\nint BLASFUNC(strtri)(char *, char *, int *, float  *, int *, int *);\nint BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);\nint BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);\nint BLASFUNC(ctrtri)(char *, char *, int *, float  *, int *, int *);\nint BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);\nint BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);\n\nint BLASFUNC(spotri)(char *, int *, float  *, int *, int *);\nint BLASFUNC(dpotri)(char *, int *, double *, int *, int *);\nint BLASFUNC(qpotri)(char *, int *, double *, int *, int *);\nint BLASFUNC(cpotri)(char *, int *, float  *, int *, int *);\nint BLASFUNC(zpotri)(char *, int *, double *, int *, int *);\nint BLASFUNC(xpotri)(char *, int *, double *, int *, int *);\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/lapacke.h",
    "content": "/*****************************************************************************\n  Copyright (c) 2010, Intel Corp.\n  All rights reserved.\n\n  Redistribution and use in source and binary forms, with or without\n  modification, are permitted provided that the following conditions are met:\n\n    * Redistributions of source code must retain the above copyright notice,\n      this list of conditions and the following disclaimer.\n    * Redistributions in binary form must reproduce the above copyright\n      notice, this list of conditions and the following disclaimer in the\n      documentation and/or other materials provided with the distribution.\n    * Neither the name of Intel Corporation nor the names of its contributors\n      may be used to endorse or promote products derived from this software\n      without specific prior written permission.\n\n  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\n  THE POSSIBILITY OF SUCH DAMAGE.\n******************************************************************************\n* Contents: Native C interface to LAPACK\n* Author: Intel Corporation\n* Generated November, 2011\n*****************************************************************************/\n\n#ifndef _MKL_LAPACKE_H_\n\n#ifndef _LAPACKE_H_\n#define _LAPACKE_H_\n\n/*\n*  Turn on HAVE_LAPACK_CONFIG_H to redefine C-LAPACK datatypes\n*/\n#ifdef HAVE_LAPACK_CONFIG_H\n#include \"lapacke_config.h\"\n#endif\n\n#include <stdlib.h>\n\n#ifndef lapack_int\n#define lapack_int     int\n#endif\n\n#ifndef lapack_logical\n#define lapack_logical lapack_int\n#endif\n\n/* Complex types are structures equivalent to the\n* Fortran complex types COMPLEX(4) and COMPLEX(8).\n*\n* One can also redefine the types with his own types\n* for example by including in the code definitions like\n*\n* #define lapack_complex_float std::complex<float>\n* #define lapack_complex_double std::complex<double>\n*\n* or define these types in the command line:\n*\n* -Dlapack_complex_float=\"std::complex<float>\"\n* -Dlapack_complex_double=\"std::complex<double>\"\n*/\n\n#ifndef LAPACK_COMPLEX_CUSTOM\n\n/* Complex type (single precision) */\n#ifndef lapack_complex_float\n#include <complex.h>\n#define lapack_complex_float    float _Complex\n#endif\n\n#ifndef lapack_complex_float_real\n#define lapack_complex_float_real(z)       (creal(z))\n#endif\n\n#ifndef lapack_complex_float_imag\n#define lapack_complex_float_imag(z)       (cimag(z))\n#endif\n\nlapack_complex_float lapack_make_complex_float( float re, float im );\n\n/* Complex type (double precision) */\n#ifndef lapack_complex_double\n#include <complex.h>\n#define lapack_complex_double   double _Complex\n#endif\n\n#ifndef lapack_complex_double_real\n#define lapack_complex_double_real(z)      (creal(z))\n#endif\n\n#ifndef lapack_complex_double_imag\n#define lapack_complex_double_imag(z)       (cimag(z))\n#endif\n\nlapack_complex_double lapack_make_complex_double( double re, double im );\n\n#endif\n\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif /* __cplusplus */\n\n#ifndef LAPACKE_malloc\n#define LAPACKE_malloc( size ) malloc( size )\n#endif\n#ifndef LAPACKE_free\n#define LAPACKE_free( p )      free( p )\n#endif\n\n#define LAPACK_C2INT( x ) (lapack_int)(*((float*)&x ))\n#define LAPACK_Z2INT( x ) (lapack_int)(*((double*)&x ))\n\n#define LAPACK_ROW_MAJOR               101\n#define LAPACK_COL_MAJOR               102\n\n#define LAPACK_WORK_MEMORY_ERROR       -1010\n#define LAPACK_TRANSPOSE_MEMORY_ERROR  -1011\n\n/* Callback logical functions of one, two, or three arguments are used\n*  to select eigenvalues to sort to the top left of the Schur form.\n*  The value is selected if function returns TRUE (non-zero). */\n\ntypedef lapack_logical (*LAPACK_S_SELECT2) ( const float*, const float* );\ntypedef lapack_logical (*LAPACK_S_SELECT3)\n    ( const float*, const float*, const float* );\ntypedef lapack_logical (*LAPACK_D_SELECT2) ( const double*, const double* );\ntypedef lapack_logical (*LAPACK_D_SELECT3)\n    ( const double*, const double*, const double* );\n\ntypedef lapack_logical (*LAPACK_C_SELECT1) ( const lapack_complex_float* );\ntypedef lapack_logical (*LAPACK_C_SELECT2)\n    ( const lapack_complex_float*, const lapack_complex_float* );\ntypedef lapack_logical (*LAPACK_Z_SELECT1) ( const lapack_complex_double* );\ntypedef lapack_logical (*LAPACK_Z_SELECT2)\n    ( const lapack_complex_double*, const lapack_complex_double* );\n\n#include \"lapacke_mangling.h\"\n\n#define LAPACK_lsame LAPACK_GLOBAL(lsame,LSAME)\nlapack_logical LAPACK_lsame( char* ca,  char* cb,\n                              lapack_int lca, lapack_int lcb );\n\n/* C-LAPACK function prototypes */\n\nlapack_int LAPACKE_sbdsdc( int matrix_order, char uplo, char compq,\n                           lapack_int n, float* d, float* e, float* u,\n                           lapack_int ldu, float* vt, lapack_int ldvt, float* q,\n                           lapack_int* iq );\nlapack_int LAPACKE_dbdsdc( int matrix_order, char uplo, char compq,\n                           lapack_int n, double* d, double* e, double* u,\n                           lapack_int ldu, double* vt, lapack_int ldvt,\n                           double* q, lapack_int* iq );\n\nlapack_int LAPACKE_sbdsqr( int matrix_order, char uplo, lapack_int n,\n                           lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                           float* d, float* e, float* vt, lapack_int ldvt,\n                           float* u, lapack_int ldu, float* c, lapack_int ldc );\nlapack_int LAPACKE_dbdsqr( int matrix_order, char uplo, lapack_int n,\n                           lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                           double* d, double* e, double* vt, lapack_int ldvt,\n                           double* u, lapack_int ldu, double* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_cbdsqr( int matrix_order, char uplo, lapack_int n,\n                           lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                           float* d, float* e, lapack_complex_float* vt,\n                           lapack_int ldvt, lapack_complex_float* u,\n                           lapack_int ldu, lapack_complex_float* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_zbdsqr( int matrix_order, char uplo, lapack_int n,\n                           lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                           double* d, double* e, lapack_complex_double* vt,\n                           lapack_int ldvt, lapack_complex_double* u,\n                           lapack_int ldu, lapack_complex_double* c,\n                           lapack_int ldc );\n\nlapack_int LAPACKE_sdisna( char job, lapack_int m, lapack_int n, const float* d,\n                           float* sep );\nlapack_int LAPACKE_ddisna( char job, lapack_int m, lapack_int n,\n                           const double* d, double* sep );\n\nlapack_int LAPACKE_sgbbrd( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int ncc, lapack_int kl,\n                           lapack_int ku, float* ab, lapack_int ldab, float* d,\n                           float* e, float* q, lapack_int ldq, float* pt,\n                           lapack_int ldpt, float* c, lapack_int ldc );\nlapack_int LAPACKE_dgbbrd( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int ncc, lapack_int kl,\n                           lapack_int ku, double* ab, lapack_int ldab,\n                           double* d, double* e, double* q, lapack_int ldq,\n                           double* pt, lapack_int ldpt, double* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_cgbbrd( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int ncc, lapack_int kl,\n                           lapack_int ku, lapack_complex_float* ab,\n                           lapack_int ldab, float* d, float* e,\n                           lapack_complex_float* q, lapack_int ldq,\n                           lapack_complex_float* pt, lapack_int ldpt,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zgbbrd( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int ncc, lapack_int kl,\n                           lapack_int ku, lapack_complex_double* ab,\n                           lapack_int ldab, double* d, double* e,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_complex_double* pt, lapack_int ldpt,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sgbcon( int matrix_order, char norm, lapack_int n,\n                           lapack_int kl, lapack_int ku, const float* ab,\n                           lapack_int ldab, const lapack_int* ipiv, float anorm,\n                           float* rcond );\nlapack_int LAPACKE_dgbcon( int matrix_order, char norm, lapack_int n,\n                           lapack_int kl, lapack_int ku, const double* ab,\n                           lapack_int ldab, const lapack_int* ipiv,\n                           double anorm, double* rcond );\nlapack_int LAPACKE_cgbcon( int matrix_order, char norm, lapack_int n,\n                           lapack_int kl, lapack_int ku,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_zgbcon( int matrix_order, char norm, lapack_int n,\n                           lapack_int kl, lapack_int ku,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_sgbequ( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, const float* ab,\n                           lapack_int ldab, float* r, float* c, float* rowcnd,\n                           float* colcnd, float* amax );\nlapack_int LAPACKE_dgbequ( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, const double* ab,\n                           lapack_int ldab, double* r, double* c,\n                           double* rowcnd, double* colcnd, double* amax );\nlapack_int LAPACKE_cgbequ( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           float* r, float* c, float* rowcnd, float* colcnd,\n                           float* amax );\nlapack_int LAPACKE_zgbequ( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           double* r, double* c, double* rowcnd, double* colcnd,\n                           double* amax );\n\nlapack_int LAPACKE_sgbequb( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_int kl, lapack_int ku, const float* ab,\n                            lapack_int ldab, float* r, float* c, float* rowcnd,\n                            float* colcnd, float* amax );\nlapack_int LAPACKE_dgbequb( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_int kl, lapack_int ku, const double* ab,\n                            lapack_int ldab, double* r, double* c,\n                            double* rowcnd, double* colcnd, double* amax );\nlapack_int LAPACKE_cgbequb( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_int kl, lapack_int ku,\n                            const lapack_complex_float* ab, lapack_int ldab,\n                            float* r, float* c, float* rowcnd, float* colcnd,\n                            float* amax );\nlapack_int LAPACKE_zgbequb( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_int kl, lapack_int ku,\n                            const lapack_complex_double* ab, lapack_int ldab,\n                            double* r, double* c, double* rowcnd,\n                            double* colcnd, double* amax );\n\nlapack_int LAPACKE_sgbrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const float* ab, lapack_int ldab, const float* afb,\n                           lapack_int ldafb, const lapack_int* ipiv,\n                           const float* b, lapack_int ldb, float* x,\n                           lapack_int ldx, float* ferr, float* berr );\nlapack_int LAPACKE_dgbrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const double* ab, lapack_int ldab, const double* afb,\n                           lapack_int ldafb, const lapack_int* ipiv,\n                           const double* b, lapack_int ldb, double* x,\n                           lapack_int ldx, double* ferr, double* berr );\nlapack_int LAPACKE_cgbrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           const lapack_complex_float* afb, lapack_int ldafb,\n                           const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zgbrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           const lapack_complex_double* afb, lapack_int ldafb,\n                           const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_sgbrfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, const float* ab, lapack_int ldab,\n                            const float* afb, lapack_int ldafb,\n                            const lapack_int* ipiv, const float* r,\n                            const float* c, const float* b, lapack_int ldb,\n                            float* x, lapack_int ldx, float* rcond, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dgbrfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, const double* ab, lapack_int ldab,\n                            const double* afb, lapack_int ldafb,\n                            const lapack_int* ipiv, const double* r,\n                            const double* c, const double* b, lapack_int ldb,\n                            double* x, lapack_int ldx, double* rcond,\n                            double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\nlapack_int LAPACKE_cgbrfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, const lapack_complex_float* ab,\n                            lapack_int ldab, const lapack_complex_float* afb,\n                            lapack_int ldafb, const lapack_int* ipiv,\n                            const float* r, const float* c,\n                            const lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* x, lapack_int ldx,\n                            float* rcond, float* berr, lapack_int n_err_bnds,\n                            float* err_bnds_norm, float* err_bnds_comp,\n                            lapack_int nparams, float* params );\nlapack_int LAPACKE_zgbrfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, const lapack_complex_double* ab,\n                            lapack_int ldab, const lapack_complex_double* afb,\n                            lapack_int ldafb, const lapack_int* ipiv,\n                            const double* r, const double* c,\n                            const lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* x, lapack_int ldx,\n                            double* rcond, double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\n\nlapack_int LAPACKE_sgbsv( int matrix_order, lapack_int n, lapack_int kl,\n                          lapack_int ku, lapack_int nrhs, float* ab,\n                          lapack_int ldab, lapack_int* ipiv, float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dgbsv( int matrix_order, lapack_int n, lapack_int kl,\n                          lapack_int ku, lapack_int nrhs, double* ab,\n                          lapack_int ldab, lapack_int* ipiv, double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_cgbsv( int matrix_order, lapack_int n, lapack_int kl,\n                          lapack_int ku, lapack_int nrhs,\n                          lapack_complex_float* ab, lapack_int ldab,\n                          lapack_int* ipiv, lapack_complex_float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_zgbsv( int matrix_order, lapack_int n, lapack_int kl,\n                          lapack_int ku, lapack_int nrhs,\n                          lapack_complex_double* ab, lapack_int ldab,\n                          lapack_int* ipiv, lapack_complex_double* b,\n                          lapack_int ldb );\n\nlapack_int LAPACKE_sgbsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int kl, lapack_int ku,\n                           lapack_int nrhs, float* ab, lapack_int ldab,\n                           float* afb, lapack_int ldafb, lapack_int* ipiv,\n                           char* equed, float* r, float* c, float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr,\n                           float* rpivot );\nlapack_int LAPACKE_dgbsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int kl, lapack_int ku,\n                           lapack_int nrhs, double* ab, lapack_int ldab,\n                           double* afb, lapack_int ldafb, lapack_int* ipiv,\n                           char* equed, double* r, double* c, double* b,\n                           lapack_int ldb, double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr,\n                           double* rpivot );\nlapack_int LAPACKE_cgbsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int kl, lapack_int ku,\n                           lapack_int nrhs, lapack_complex_float* ab,\n                           lapack_int ldab, lapack_complex_float* afb,\n                           lapack_int ldafb, lapack_int* ipiv, char* equed,\n                           float* r, float* c, lapack_complex_float* b,\n                           lapack_int ldb, lapack_complex_float* x,\n                           lapack_int ldx, float* rcond, float* ferr,\n                           float* berr, float* rpivot );\nlapack_int LAPACKE_zgbsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int kl, lapack_int ku,\n                           lapack_int nrhs, lapack_complex_double* ab,\n                           lapack_int ldab, lapack_complex_double* afb,\n                           lapack_int ldafb, lapack_int* ipiv, char* equed,\n                           double* r, double* c, lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* x,\n                           lapack_int ldx, double* rcond, double* ferr,\n                           double* berr, double* rpivot );\n\nlapack_int LAPACKE_sgbsvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, float* ab, lapack_int ldab,\n                            float* afb, lapack_int ldafb, lapack_int* ipiv,\n                            char* equed, float* r, float* c, float* b,\n                            lapack_int ldb, float* x, lapack_int ldx,\n                            float* rcond, float* rpvgrw, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dgbsvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, double* ab, lapack_int ldab,\n                            double* afb, lapack_int ldafb, lapack_int* ipiv,\n                            char* equed, double* r, double* c, double* b,\n                            lapack_int ldb, double* x, lapack_int ldx,\n                            double* rcond, double* rpvgrw, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\nlapack_int LAPACKE_cgbsvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, lapack_complex_float* ab,\n                            lapack_int ldab, lapack_complex_float* afb,\n                            lapack_int ldafb, lapack_int* ipiv, char* equed,\n                            float* r, float* c, lapack_complex_float* b,\n                            lapack_int ldb, lapack_complex_float* x,\n                            lapack_int ldx, float* rcond, float* rpvgrw,\n                            float* berr, lapack_int n_err_bnds,\n                            float* err_bnds_norm, float* err_bnds_comp,\n                            lapack_int nparams, float* params );\nlapack_int LAPACKE_zgbsvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int kl, lapack_int ku,\n                            lapack_int nrhs, lapack_complex_double* ab,\n                            lapack_int ldab, lapack_complex_double* afb,\n                            lapack_int ldafb, lapack_int* ipiv, char* equed,\n                            double* r, double* c, lapack_complex_double* b,\n                            lapack_int ldb, lapack_complex_double* x,\n                            lapack_int ldx, double* rcond, double* rpvgrw,\n                            double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\n\nlapack_int LAPACKE_sgbtrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, float* ab,\n                           lapack_int ldab, lapack_int* ipiv );\nlapack_int LAPACKE_dgbtrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, double* ab,\n                           lapack_int ldab, lapack_int* ipiv );\nlapack_int LAPACKE_cgbtrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku,\n                           lapack_complex_float* ab, lapack_int ldab,\n                           lapack_int* ipiv );\nlapack_int LAPACKE_zgbtrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku,\n                           lapack_complex_double* ab, lapack_int ldab,\n                           lapack_int* ipiv );\n\nlapack_int LAPACKE_sgbtrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const float* ab, lapack_int ldab,\n                           const lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_dgbtrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const double* ab, lapack_int ldab,\n                           const lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_cgbtrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           const lapack_int* ipiv, lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zgbtrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int kl, lapack_int ku, lapack_int nrhs,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           const lapack_int* ipiv, lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_sgebak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const float* scale,\n                           lapack_int m, float* v, lapack_int ldv );\nlapack_int LAPACKE_dgebak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const double* scale,\n                           lapack_int m, double* v, lapack_int ldv );\nlapack_int LAPACKE_cgebak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const float* scale,\n                           lapack_int m, lapack_complex_float* v,\n                           lapack_int ldv );\nlapack_int LAPACKE_zgebak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const double* scale,\n                           lapack_int m, lapack_complex_double* v,\n                           lapack_int ldv );\n\nlapack_int LAPACKE_sgebal( int matrix_order, char job, lapack_int n, float* a,\n                           lapack_int lda, lapack_int* ilo, lapack_int* ihi,\n                           float* scale );\nlapack_int LAPACKE_dgebal( int matrix_order, char job, lapack_int n, double* a,\n                           lapack_int lda, lapack_int* ilo, lapack_int* ihi,\n                           double* scale );\nlapack_int LAPACKE_cgebal( int matrix_order, char job, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* ilo, lapack_int* ihi, float* scale );\nlapack_int LAPACKE_zgebal( int matrix_order, char job, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* ilo, lapack_int* ihi, double* scale );\n\nlapack_int LAPACKE_sgebrd( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* d, float* e,\n                           float* tauq, float* taup );\nlapack_int LAPACKE_dgebrd( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* d, double* e,\n                           double* tauq, double* taup );\nlapack_int LAPACKE_cgebrd( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda, float* d,\n                           float* e, lapack_complex_float* tauq,\n                           lapack_complex_float* taup );\nlapack_int LAPACKE_zgebrd( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda, double* d,\n                           double* e, lapack_complex_double* tauq,\n                           lapack_complex_double* taup );\n\nlapack_int LAPACKE_sgecon( int matrix_order, char norm, lapack_int n,\n                           const float* a, lapack_int lda, float anorm,\n                           float* rcond );\nlapack_int LAPACKE_dgecon( int matrix_order, char norm, lapack_int n,\n                           const double* a, lapack_int lda, double anorm,\n                           double* rcond );\nlapack_int LAPACKE_cgecon( int matrix_order, char norm, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           float anorm, float* rcond );\nlapack_int LAPACKE_zgecon( int matrix_order, char norm, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           double anorm, double* rcond );\n\nlapack_int LAPACKE_sgeequ( int matrix_order, lapack_int m, lapack_int n,\n                           const float* a, lapack_int lda, float* r, float* c,\n                           float* rowcnd, float* colcnd, float* amax );\nlapack_int LAPACKE_dgeequ( int matrix_order, lapack_int m, lapack_int n,\n                           const double* a, lapack_int lda, double* r,\n                           double* c, double* rowcnd, double* colcnd,\n                           double* amax );\nlapack_int LAPACKE_cgeequ( int matrix_order, lapack_int m, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           float* r, float* c, float* rowcnd, float* colcnd,\n                           float* amax );\nlapack_int LAPACKE_zgeequ( int matrix_order, lapack_int m, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           double* r, double* c, double* rowcnd, double* colcnd,\n                           double* amax );\n\nlapack_int LAPACKE_sgeequb( int matrix_order, lapack_int m, lapack_int n,\n                            const float* a, lapack_int lda, float* r, float* c,\n                            float* rowcnd, float* colcnd, float* amax );\nlapack_int LAPACKE_dgeequb( int matrix_order, lapack_int m, lapack_int n,\n                            const double* a, lapack_int lda, double* r,\n                            double* c, double* rowcnd, double* colcnd,\n                            double* amax );\nlapack_int LAPACKE_cgeequb( int matrix_order, lapack_int m, lapack_int n,\n                            const lapack_complex_float* a, lapack_int lda,\n                            float* r, float* c, float* rowcnd, float* colcnd,\n                            float* amax );\nlapack_int LAPACKE_zgeequb( int matrix_order, lapack_int m, lapack_int n,\n                            const lapack_complex_double* a, lapack_int lda,\n                            double* r, double* c, double* rowcnd,\n                            double* colcnd, double* amax );\n\nlapack_int LAPACKE_sgees( int matrix_order, char jobvs, char sort,\n                          LAPACK_S_SELECT2 select, lapack_int n, float* a,\n                          lapack_int lda, lapack_int* sdim, float* wr,\n                          float* wi, float* vs, lapack_int ldvs );\nlapack_int LAPACKE_dgees( int matrix_order, char jobvs, char sort,\n                          LAPACK_D_SELECT2 select, lapack_int n, double* a,\n                          lapack_int lda, lapack_int* sdim, double* wr,\n                          double* wi, double* vs, lapack_int ldvs );\nlapack_int LAPACKE_cgees( int matrix_order, char jobvs, char sort,\n                          LAPACK_C_SELECT1 select, lapack_int n,\n                          lapack_complex_float* a, lapack_int lda,\n                          lapack_int* sdim, lapack_complex_float* w,\n                          lapack_complex_float* vs, lapack_int ldvs );\nlapack_int LAPACKE_zgees( int matrix_order, char jobvs, char sort,\n                          LAPACK_Z_SELECT1 select, lapack_int n,\n                          lapack_complex_double* a, lapack_int lda,\n                          lapack_int* sdim, lapack_complex_double* w,\n                          lapack_complex_double* vs, lapack_int ldvs );\n\nlapack_int LAPACKE_sgeesx( int matrix_order, char jobvs, char sort,\n                           LAPACK_S_SELECT2 select, char sense, lapack_int n,\n                           float* a, lapack_int lda, lapack_int* sdim,\n                           float* wr, float* wi, float* vs, lapack_int ldvs,\n                           float* rconde, float* rcondv );\nlapack_int LAPACKE_dgeesx( int matrix_order, char jobvs, char sort,\n                           LAPACK_D_SELECT2 select, char sense, lapack_int n,\n                           double* a, lapack_int lda, lapack_int* sdim,\n                           double* wr, double* wi, double* vs, lapack_int ldvs,\n                           double* rconde, double* rcondv );\nlapack_int LAPACKE_cgeesx( int matrix_order, char jobvs, char sort,\n                           LAPACK_C_SELECT1 select, char sense, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* sdim, lapack_complex_float* w,\n                           lapack_complex_float* vs, lapack_int ldvs,\n                           float* rconde, float* rcondv );\nlapack_int LAPACKE_zgeesx( int matrix_order, char jobvs, char sort,\n                           LAPACK_Z_SELECT1 select, char sense, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* sdim, lapack_complex_double* w,\n                           lapack_complex_double* vs, lapack_int ldvs,\n                           double* rconde, double* rcondv );\n\nlapack_int LAPACKE_sgeev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, float* a, lapack_int lda, float* wr,\n                          float* wi, float* vl, lapack_int ldvl, float* vr,\n                          lapack_int ldvr );\nlapack_int LAPACKE_dgeev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, double* a, lapack_int lda, double* wr,\n                          double* wi, double* vl, lapack_int ldvl, double* vr,\n                          lapack_int ldvr );\nlapack_int LAPACKE_cgeev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, lapack_complex_float* a, lapack_int lda,\n                          lapack_complex_float* w, lapack_complex_float* vl,\n                          lapack_int ldvl, lapack_complex_float* vr,\n                          lapack_int ldvr );\nlapack_int LAPACKE_zgeev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, lapack_complex_double* a,\n                          lapack_int lda, lapack_complex_double* w,\n                          lapack_complex_double* vl, lapack_int ldvl,\n                          lapack_complex_double* vr, lapack_int ldvr );\n\nlapack_int LAPACKE_sgeevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n, float* a,\n                           lapack_int lda, float* wr, float* wi, float* vl,\n                           lapack_int ldvl, float* vr, lapack_int ldvr,\n                           lapack_int* ilo, lapack_int* ihi, float* scale,\n                           float* abnrm, float* rconde, float* rcondv );\nlapack_int LAPACKE_dgeevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n, double* a,\n                           lapack_int lda, double* wr, double* wi, double* vl,\n                           lapack_int ldvl, double* vr, lapack_int ldvr,\n                           lapack_int* ilo, lapack_int* ihi, double* scale,\n                           double* abnrm, double* rconde, double* rcondv );\nlapack_int LAPACKE_cgeevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* w, lapack_complex_float* vl,\n                           lapack_int ldvl, lapack_complex_float* vr,\n                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,\n                           float* scale, float* abnrm, float* rconde,\n                           float* rcondv );\nlapack_int LAPACKE_zgeevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* w, lapack_complex_double* vl,\n                           lapack_int ldvl, lapack_complex_double* vr,\n                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,\n                           double* scale, double* abnrm, double* rconde,\n                           double* rcondv );\n\nlapack_int LAPACKE_sgehrd( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, float* a, lapack_int lda,\n                           float* tau );\nlapack_int LAPACKE_dgehrd( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, double* a, lapack_int lda,\n                           double* tau );\nlapack_int LAPACKE_cgehrd( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* tau );\nlapack_int LAPACKE_zgehrd( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgejsv( int matrix_order, char joba, char jobu, char jobv,\n                           char jobr, char jobt, char jobp, lapack_int m,\n                           lapack_int n, float* a, lapack_int lda, float* sva,\n                           float* u, lapack_int ldu, float* v, lapack_int ldv,\n                           float* stat, lapack_int* istat );\nlapack_int LAPACKE_dgejsv( int matrix_order, char joba, char jobu, char jobv,\n                           char jobr, char jobt, char jobp, lapack_int m,\n                           lapack_int n, double* a, lapack_int lda, double* sva,\n                           double* u, lapack_int ldu, double* v, lapack_int ldv,\n                           double* stat, lapack_int* istat );\n\nlapack_int LAPACKE_sgelq2( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgelq2( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgelq2( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zgelq2( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgelqf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgelqf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgelqf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zgelqf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m,\n                          lapack_int n, lapack_int nrhs, float* a,\n                          lapack_int lda, float* b, lapack_int ldb );\nlapack_int LAPACKE_dgels( int matrix_order, char trans, lapack_int m,\n                          lapack_int n, lapack_int nrhs, double* a,\n                          lapack_int lda, double* b, lapack_int ldb );\nlapack_int LAPACKE_cgels( int matrix_order, char trans, lapack_int m,\n                          lapack_int n, lapack_int nrhs,\n                          lapack_complex_float* a, lapack_int lda,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zgels( int matrix_order, char trans, lapack_int m,\n                          lapack_int n, lapack_int nrhs,\n                          lapack_complex_double* a, lapack_int lda,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sgelsd( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, float* a, lapack_int lda, float* b,\n                           lapack_int ldb, float* s, float rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_dgelsd( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, double* a, lapack_int lda,\n                           double* b, lapack_int ldb, double* s, double rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_cgelsd( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, float* s, float rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_zgelsd( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, double* s, double rcond,\n                           lapack_int* rank );\n\nlapack_int LAPACKE_sgelss( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, float* a, lapack_int lda, float* b,\n                           lapack_int ldb, float* s, float rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_dgelss( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, double* a, lapack_int lda,\n                           double* b, lapack_int ldb, double* s, double rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_cgelss( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, float* s, float rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_zgelss( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, double* s, double rcond,\n                           lapack_int* rank );\n\nlapack_int LAPACKE_sgelsy( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, float* a, lapack_int lda, float* b,\n                           lapack_int ldb, lapack_int* jpvt, float rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_dgelsy( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, double* a, lapack_int lda,\n                           double* b, lapack_int ldb, lapack_int* jpvt,\n                           double rcond, lapack_int* rank );\nlapack_int LAPACKE_cgelsy( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, lapack_int* jpvt, float rcond,\n                           lapack_int* rank );\nlapack_int LAPACKE_zgelsy( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nrhs, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, lapack_int* jpvt, double rcond,\n                           lapack_int* rank );\n\nlapack_int LAPACKE_sgeqlf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgeqlf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgeqlf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zgeqlf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgeqp3( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, lapack_int* jpvt,\n                           float* tau );\nlapack_int LAPACKE_dgeqp3( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, lapack_int* jpvt,\n                           double* tau );\nlapack_int LAPACKE_cgeqp3( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* jpvt, lapack_complex_float* tau );\nlapack_int LAPACKE_zgeqp3( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* jpvt, lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgeqpf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, lapack_int* jpvt,\n                           float* tau );\nlapack_int LAPACKE_dgeqpf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, lapack_int* jpvt,\n                           double* tau );\nlapack_int LAPACKE_cgeqpf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* jpvt, lapack_complex_float* tau );\nlapack_int LAPACKE_zgeqpf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* jpvt, lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgeqr2( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgeqr2( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgeqr2( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zgeqr2( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgeqrf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgeqrf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgeqrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zgeqrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgeqrfp( int matrix_order, lapack_int m, lapack_int n,\n                            float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgeqrfp( int matrix_order, lapack_int m, lapack_int n,\n                            double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgeqrfp( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* tau );\nlapack_int LAPACKE_zgeqrfp( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgerfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           const float* af, lapack_int ldaf,\n                           const lapack_int* ipiv, const float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_dgerfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           const double* af, lapack_int ldaf,\n                           const lapack_int* ipiv, const double* b,\n                           lapack_int ldb, double* x, lapack_int ldx,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_cgerfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* af,\n                           lapack_int ldaf, const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zgerfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* af,\n                           lapack_int ldaf, const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_sgerfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int nrhs, const float* a,\n                            lapack_int lda, const float* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const float* r,\n                            const float* c, const float* b, lapack_int ldb,\n                            float* x, lapack_int ldx, float* rcond, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dgerfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int nrhs, const double* a,\n                            lapack_int lda, const double* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const double* r,\n                            const double* c, const double* b, lapack_int ldb,\n                            double* x, lapack_int ldx, double* rcond,\n                            double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\nlapack_int LAPACKE_cgerfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_float* a, lapack_int lda,\n                            const lapack_complex_float* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const float* r,\n                            const float* c, const lapack_complex_float* b,\n                            lapack_int ldb, lapack_complex_float* x,\n                            lapack_int ldx, float* rcond, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_zgerfsx( int matrix_order, char trans, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_double* a, lapack_int lda,\n                            const lapack_complex_double* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const double* r,\n                            const double* c, const lapack_complex_double* b,\n                            lapack_int ldb, lapack_complex_double* x,\n                            lapack_int ldx, double* rcond, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\n\nlapack_int LAPACKE_sgerqf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dgerqf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_cgerqf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zgerqf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_sgesdd( int matrix_order, char jobz, lapack_int m,\n                           lapack_int n, float* a, lapack_int lda, float* s,\n                           float* u, lapack_int ldu, float* vt,\n                           lapack_int ldvt );\nlapack_int LAPACKE_dgesdd( int matrix_order, char jobz, lapack_int m,\n                           lapack_int n, double* a, lapack_int lda, double* s,\n                           double* u, lapack_int ldu, double* vt,\n                           lapack_int ldvt );\nlapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m,\n                           lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, float* s, lapack_complex_float* u,\n                           lapack_int ldu, lapack_complex_float* vt,\n                           lapack_int ldvt );\nlapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m,\n                           lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, double* s, lapack_complex_double* u,\n                           lapack_int ldu, lapack_complex_double* vt,\n                           lapack_int ldvt );\n\nlapack_int LAPACKE_sgesv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          float* a, lapack_int lda, lapack_int* ipiv, float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dgesv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          double* a, lapack_int lda, lapack_int* ipiv,\n                          double* b, lapack_int ldb );\nlapack_int LAPACKE_cgesv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          lapack_complex_float* a, lapack_int lda,\n                          lapack_int* ipiv, lapack_complex_float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_zgesv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          lapack_complex_double* a, lapack_int lda,\n                          lapack_int* ipiv, lapack_complex_double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dsgesv( int matrix_order, lapack_int n, lapack_int nrhs,\n                           double* a, lapack_int lda, lapack_int* ipiv,\n                           double* b, lapack_int ldb, double* x, lapack_int ldx,\n                           lapack_int* iter );\nlapack_int LAPACKE_zcgesv( int matrix_order, lapack_int n, lapack_int nrhs,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* ipiv, lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* x,\n                           lapack_int ldx, lapack_int* iter );\n\nlapack_int LAPACKE_sgesvd( int matrix_order, char jobu, char jobvt,\n                           lapack_int m, lapack_int n, float* a, lapack_int lda,\n                           float* s, float* u, lapack_int ldu, float* vt,\n                           lapack_int ldvt, float* superb );\nlapack_int LAPACKE_dgesvd( int matrix_order, char jobu, char jobvt,\n                           lapack_int m, lapack_int n, double* a,\n                           lapack_int lda, double* s, double* u, lapack_int ldu,\n                           double* vt, lapack_int ldvt, double* superb );\nlapack_int LAPACKE_cgesvd( int matrix_order, char jobu, char jobvt,\n                           lapack_int m, lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, float* s, lapack_complex_float* u,\n                           lapack_int ldu, lapack_complex_float* vt,\n                           lapack_int ldvt, float* superb );\nlapack_int LAPACKE_zgesvd( int matrix_order, char jobu, char jobvt,\n                           lapack_int m, lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, double* s, lapack_complex_double* u,\n                           lapack_int ldu, lapack_complex_double* vt,\n                           lapack_int ldvt, double* superb );\n\nlapack_int LAPACKE_sgesvj( int matrix_order, char joba, char jobu, char jobv,\n                           lapack_int m, lapack_int n, float* a, lapack_int lda,\n                           float* sva, lapack_int mv, float* v, lapack_int ldv,\n                           float* stat );\nlapack_int LAPACKE_dgesvj( int matrix_order, char joba, char jobu, char jobv,\n                           lapack_int m, lapack_int n, double* a,\n                           lapack_int lda, double* sva, lapack_int mv,\n                           double* v, lapack_int ldv, double* stat );\n\nlapack_int LAPACKE_sgesvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs, float* a,\n                           lapack_int lda, float* af, lapack_int ldaf,\n                           lapack_int* ipiv, char* equed, float* r, float* c,\n                           float* b, lapack_int ldb, float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr,\n                           float* rpivot );\nlapack_int LAPACKE_dgesvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs, double* a,\n                           lapack_int lda, double* af, lapack_int ldaf,\n                           lapack_int* ipiv, char* equed, double* r, double* c,\n                           double* b, lapack_int ldb, double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr,\n                           double* rpivot );\nlapack_int LAPACKE_cgesvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* af, lapack_int ldaf,\n                           lapack_int* ipiv, char* equed, float* r, float* c,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr,\n                           float* rpivot );\nlapack_int LAPACKE_zgesvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* af, lapack_int ldaf,\n                           lapack_int* ipiv, char* equed, double* r, double* c,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr,\n                           double* rpivot );\n\nlapack_int LAPACKE_sgesvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int nrhs, float* a,\n                            lapack_int lda, float* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, float* r, float* c,\n                            float* b, lapack_int ldb, float* x, lapack_int ldx,\n                            float* rcond, float* rpvgrw, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dgesvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int nrhs, double* a,\n                            lapack_int lda, double* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, double* r, double* c,\n                            double* b, lapack_int ldb, double* x,\n                            lapack_int ldx, double* rcond, double* rpvgrw,\n                            double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\nlapack_int LAPACKE_cgesvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, float* r, float* c,\n                            lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* x, lapack_int ldx,\n                            float* rcond, float* rpvgrw, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_zgesvxx( int matrix_order, char fact, char trans,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, double* r, double* c,\n                            lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* x, lapack_int ldx,\n                            double* rcond, double* rpvgrw, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\n\nlapack_int LAPACKE_sgetf2( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_dgetf2( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_cgetf2( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* ipiv );\nlapack_int LAPACKE_zgetf2( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* ipiv );\n\nlapack_int LAPACKE_sgetrf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_dgetrf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_cgetrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* ipiv );\nlapack_int LAPACKE_zgetrf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* ipiv );\n\nlapack_int LAPACKE_sgetri( int matrix_order, lapack_int n, float* a,\n                           lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_dgetri( int matrix_order, lapack_int n, double* a,\n                           lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_cgetri( int matrix_order, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           const lapack_int* ipiv );\nlapack_int LAPACKE_zgetri( int matrix_order, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           const lapack_int* ipiv );\n\nlapack_int LAPACKE_sgetrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           const lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_dgetrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           const lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_cgetrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_int* ipiv,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zgetrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_int* ipiv,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sggbak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const float* lscale,\n                           const float* rscale, lapack_int m, float* v,\n                           lapack_int ldv );\nlapack_int LAPACKE_dggbak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const double* lscale,\n                           const double* rscale, lapack_int m, double* v,\n                           lapack_int ldv );\nlapack_int LAPACKE_cggbak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const float* lscale,\n                           const float* rscale, lapack_int m,\n                           lapack_complex_float* v, lapack_int ldv );\nlapack_int LAPACKE_zggbak( int matrix_order, char job, char side, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, const double* lscale,\n                           const double* rscale, lapack_int m,\n                           lapack_complex_double* v, lapack_int ldv );\n\nlapack_int LAPACKE_sggbal( int matrix_order, char job, lapack_int n, float* a,\n                           lapack_int lda, float* b, lapack_int ldb,\n                           lapack_int* ilo, lapack_int* ihi, float* lscale,\n                           float* rscale );\nlapack_int LAPACKE_dggbal( int matrix_order, char job, lapack_int n, double* a,\n                           lapack_int lda, double* b, lapack_int ldb,\n                           lapack_int* ilo, lapack_int* ihi, double* lscale,\n                           double* rscale );\nlapack_int LAPACKE_cggbal( int matrix_order, char job, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_int* ilo, lapack_int* ihi, float* lscale,\n                           float* rscale );\nlapack_int LAPACKE_zggbal( int matrix_order, char job, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_int* ilo, lapack_int* ihi, double* lscale,\n                           double* rscale );\n\nlapack_int LAPACKE_sgges( int matrix_order, char jobvsl, char jobvsr, char sort,\n                          LAPACK_S_SELECT3 selctg, lapack_int n, float* a,\n                          lapack_int lda, float* b, lapack_int ldb,\n                          lapack_int* sdim, float* alphar, float* alphai,\n                          float* beta, float* vsl, lapack_int ldvsl, float* vsr,\n                          lapack_int ldvsr );\nlapack_int LAPACKE_dgges( int matrix_order, char jobvsl, char jobvsr, char sort,\n                          LAPACK_D_SELECT3 selctg, lapack_int n, double* a,\n                          lapack_int lda, double* b, lapack_int ldb,\n                          lapack_int* sdim, double* alphar, double* alphai,\n                          double* beta, double* vsl, lapack_int ldvsl,\n                          double* vsr, lapack_int ldvsr );\nlapack_int LAPACKE_cgges( int matrix_order, char jobvsl, char jobvsr, char sort,\n                          LAPACK_C_SELECT2 selctg, lapack_int n,\n                          lapack_complex_float* a, lapack_int lda,\n                          lapack_complex_float* b, lapack_int ldb,\n                          lapack_int* sdim, lapack_complex_float* alpha,\n                          lapack_complex_float* beta, lapack_complex_float* vsl,\n                          lapack_int ldvsl, lapack_complex_float* vsr,\n                          lapack_int ldvsr );\nlapack_int LAPACKE_zgges( int matrix_order, char jobvsl, char jobvsr, char sort,\n                          LAPACK_Z_SELECT2 selctg, lapack_int n,\n                          lapack_complex_double* a, lapack_int lda,\n                          lapack_complex_double* b, lapack_int ldb,\n                          lapack_int* sdim, lapack_complex_double* alpha,\n                          lapack_complex_double* beta,\n                          lapack_complex_double* vsl, lapack_int ldvsl,\n                          lapack_complex_double* vsr, lapack_int ldvsr );\n\nlapack_int LAPACKE_sggesx( int matrix_order, char jobvsl, char jobvsr,\n                           char sort, LAPACK_S_SELECT3 selctg, char sense,\n                           lapack_int n, float* a, lapack_int lda, float* b,\n                           lapack_int ldb, lapack_int* sdim, float* alphar,\n                           float* alphai, float* beta, float* vsl,\n                           lapack_int ldvsl, float* vsr, lapack_int ldvsr,\n                           float* rconde, float* rcondv );\nlapack_int LAPACKE_dggesx( int matrix_order, char jobvsl, char jobvsr,\n                           char sort, LAPACK_D_SELECT3 selctg, char sense,\n                           lapack_int n, double* a, lapack_int lda, double* b,\n                           lapack_int ldb, lapack_int* sdim, double* alphar,\n                           double* alphai, double* beta, double* vsl,\n                           lapack_int ldvsl, double* vsr, lapack_int ldvsr,\n                           double* rconde, double* rcondv );\nlapack_int LAPACKE_cggesx( int matrix_order, char jobvsl, char jobvsr,\n                           char sort, LAPACK_C_SELECT2 selctg, char sense,\n                           lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, lapack_int* sdim,\n                           lapack_complex_float* alpha,\n                           lapack_complex_float* beta,\n                           lapack_complex_float* vsl, lapack_int ldvsl,\n                           lapack_complex_float* vsr, lapack_int ldvsr,\n                           float* rconde, float* rcondv );\nlapack_int LAPACKE_zggesx( int matrix_order, char jobvsl, char jobvsr,\n                           char sort, LAPACK_Z_SELECT2 selctg, char sense,\n                           lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, lapack_int* sdim,\n                           lapack_complex_double* alpha,\n                           lapack_complex_double* beta,\n                           lapack_complex_double* vsl, lapack_int ldvsl,\n                           lapack_complex_double* vsr, lapack_int ldvsr,\n                           double* rconde, double* rcondv );\n\nlapack_int LAPACKE_sggev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, float* a, lapack_int lda, float* b,\n                          lapack_int ldb, float* alphar, float* alphai,\n                          float* beta, float* vl, lapack_int ldvl, float* vr,\n                          lapack_int ldvr );\nlapack_int LAPACKE_dggev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, double* a, lapack_int lda, double* b,\n                          lapack_int ldb, double* alphar, double* alphai,\n                          double* beta, double* vl, lapack_int ldvl, double* vr,\n                          lapack_int ldvr );\nlapack_int LAPACKE_cggev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, lapack_complex_float* a, lapack_int lda,\n                          lapack_complex_float* b, lapack_int ldb,\n                          lapack_complex_float* alpha,\n                          lapack_complex_float* beta, lapack_complex_float* vl,\n                          lapack_int ldvl, lapack_complex_float* vr,\n                          lapack_int ldvr );\nlapack_int LAPACKE_zggev( int matrix_order, char jobvl, char jobvr,\n                          lapack_int n, lapack_complex_double* a,\n                          lapack_int lda, lapack_complex_double* b,\n                          lapack_int ldb, lapack_complex_double* alpha,\n                          lapack_complex_double* beta,\n                          lapack_complex_double* vl, lapack_int ldvl,\n                          lapack_complex_double* vr, lapack_int ldvr );\n\nlapack_int LAPACKE_sggevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n, float* a,\n                           lapack_int lda, float* b, lapack_int ldb,\n                           float* alphar, float* alphai, float* beta, float* vl,\n                           lapack_int ldvl, float* vr, lapack_int ldvr,\n                           lapack_int* ilo, lapack_int* ihi, float* lscale,\n                           float* rscale, float* abnrm, float* bbnrm,\n                           float* rconde, float* rcondv );\nlapack_int LAPACKE_dggevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n, double* a,\n                           lapack_int lda, double* b, lapack_int ldb,\n                           double* alphar, double* alphai, double* beta,\n                           double* vl, lapack_int ldvl, double* vr,\n                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,\n                           double* lscale, double* rscale, double* abnrm,\n                           double* bbnrm, double* rconde, double* rcondv );\nlapack_int LAPACKE_cggevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* alpha,\n                           lapack_complex_float* beta, lapack_complex_float* vl,\n                           lapack_int ldvl, lapack_complex_float* vr,\n                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,\n                           float* lscale, float* rscale, float* abnrm,\n                           float* bbnrm, float* rconde, float* rcondv );\nlapack_int LAPACKE_zggevx( int matrix_order, char balanc, char jobvl,\n                           char jobvr, char sense, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* alpha,\n                           lapack_complex_double* beta,\n                           lapack_complex_double* vl, lapack_int ldvl,\n                           lapack_complex_double* vr, lapack_int ldvr,\n                           lapack_int* ilo, lapack_int* ihi, double* lscale,\n                           double* rscale, double* abnrm, double* bbnrm,\n                           double* rconde, double* rcondv );\n\nlapack_int LAPACKE_sggglm( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, float* a, lapack_int lda, float* b,\n                           lapack_int ldb, float* d, float* x, float* y );\nlapack_int LAPACKE_dggglm( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, double* a, lapack_int lda, double* b,\n                           lapack_int ldb, double* d, double* x, double* y );\nlapack_int LAPACKE_cggglm( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, lapack_complex_float* d,\n                           lapack_complex_float* x, lapack_complex_float* y );\nlapack_int LAPACKE_zggglm( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* d,\n                           lapack_complex_double* x, lapack_complex_double* y );\n\nlapack_int LAPACKE_sgghrd( int matrix_order, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           float* a, lapack_int lda, float* b, lapack_int ldb,\n                           float* q, lapack_int ldq, float* z, lapack_int ldz );\nlapack_int LAPACKE_dgghrd( int matrix_order, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           double* a, lapack_int lda, double* b, lapack_int ldb,\n                           double* q, lapack_int ldq, double* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_cgghrd( int matrix_order, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* q, lapack_int ldq,\n                           lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zgghrd( int matrix_order, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sgglse( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int p, float* a, lapack_int lda, float* b,\n                           lapack_int ldb, float* c, float* d, float* x );\nlapack_int LAPACKE_dgglse( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int p, double* a, lapack_int lda, double* b,\n                           lapack_int ldb, double* c, double* d, double* x );\nlapack_int LAPACKE_cgglse( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int p, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, lapack_complex_float* c,\n                           lapack_complex_float* d, lapack_complex_float* x );\nlapack_int LAPACKE_zgglse( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int p, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* c,\n                           lapack_complex_double* d, lapack_complex_double* x );\n\nlapack_int LAPACKE_sggqrf( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, float* a, lapack_int lda, float* taua,\n                           float* b, lapack_int ldb, float* taub );\nlapack_int LAPACKE_dggqrf( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, double* a, lapack_int lda,\n                           double* taua, double* b, lapack_int ldb,\n                           double* taub );\nlapack_int LAPACKE_cggqrf( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* taua,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* taub );\nlapack_int LAPACKE_zggqrf( int matrix_order, lapack_int n, lapack_int m,\n                           lapack_int p, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* taua,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* taub );\n\nlapack_int LAPACKE_sggrqf( int matrix_order, lapack_int m, lapack_int p,\n                           lapack_int n, float* a, lapack_int lda, float* taua,\n                           float* b, lapack_int ldb, float* taub );\nlapack_int LAPACKE_dggrqf( int matrix_order, lapack_int m, lapack_int p,\n                           lapack_int n, double* a, lapack_int lda,\n                           double* taua, double* b, lapack_int ldb,\n                           double* taub );\nlapack_int LAPACKE_cggrqf( int matrix_order, lapack_int m, lapack_int p,\n                           lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* taua,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* taub );\nlapack_int LAPACKE_zggrqf( int matrix_order, lapack_int m, lapack_int p,\n                           lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* taua,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* taub );\n\nlapack_int LAPACKE_sggsvd( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int n, lapack_int p,\n                           lapack_int* k, lapack_int* l, float* a,\n                           lapack_int lda, float* b, lapack_int ldb,\n                           float* alpha, float* beta, float* u, lapack_int ldu,\n                           float* v, lapack_int ldv, float* q, lapack_int ldq,\n                           lapack_int* iwork );\nlapack_int LAPACKE_dggsvd( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int n, lapack_int p,\n                           lapack_int* k, lapack_int* l, double* a,\n                           lapack_int lda, double* b, lapack_int ldb,\n                           double* alpha, double* beta, double* u,\n                           lapack_int ldu, double* v, lapack_int ldv, double* q,\n                           lapack_int ldq, lapack_int* iwork );\nlapack_int LAPACKE_cggsvd( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int n, lapack_int p,\n                           lapack_int* k, lapack_int* l,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           float* alpha, float* beta, lapack_complex_float* u,\n                           lapack_int ldu, lapack_complex_float* v,\n                           lapack_int ldv, lapack_complex_float* q,\n                           lapack_int ldq, lapack_int* iwork );\nlapack_int LAPACKE_zggsvd( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int n, lapack_int p,\n                           lapack_int* k, lapack_int* l,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           double* alpha, double* beta,\n                           lapack_complex_double* u, lapack_int ldu,\n                           lapack_complex_double* v, lapack_int ldv,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_int* iwork );\n\nlapack_int LAPACKE_sggsvp( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n, float* a,\n                           lapack_int lda, float* b, lapack_int ldb, float tola,\n                           float tolb, lapack_int* k, lapack_int* l, float* u,\n                           lapack_int ldu, float* v, lapack_int ldv, float* q,\n                           lapack_int ldq );\nlapack_int LAPACKE_dggsvp( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n, double* a,\n                           lapack_int lda, double* b, lapack_int ldb,\n                           double tola, double tolb, lapack_int* k,\n                           lapack_int* l, double* u, lapack_int ldu, double* v,\n                           lapack_int ldv, double* q, lapack_int ldq );\nlapack_int LAPACKE_cggsvp( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb, float tola,\n                           float tolb, lapack_int* k, lapack_int* l,\n                           lapack_complex_float* u, lapack_int ldu,\n                           lapack_complex_float* v, lapack_int ldv,\n                           lapack_complex_float* q, lapack_int ldq );\nlapack_int LAPACKE_zggsvp( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           double tola, double tolb, lapack_int* k,\n                           lapack_int* l, lapack_complex_double* u,\n                           lapack_int ldu, lapack_complex_double* v,\n                           lapack_int ldv, lapack_complex_double* q,\n                           lapack_int ldq );\n\nlapack_int LAPACKE_sgtcon( char norm, lapack_int n, const float* dl,\n                           const float* d, const float* du, const float* du2,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_dgtcon( char norm, lapack_int n, const double* dl,\n                           const double* d, const double* du, const double* du2,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\nlapack_int LAPACKE_cgtcon( char norm, lapack_int n,\n                           const lapack_complex_float* dl,\n                           const lapack_complex_float* d,\n                           const lapack_complex_float* du,\n                           const lapack_complex_float* du2,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_zgtcon( char norm, lapack_int n,\n                           const lapack_complex_double* dl,\n                           const lapack_complex_double* d,\n                           const lapack_complex_double* du,\n                           const lapack_complex_double* du2,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_sgtrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const float* dl, const float* d,\n                           const float* du, const float* dlf, const float* df,\n                           const float* duf, const float* du2,\n                           const lapack_int* ipiv, const float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_dgtrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const double* dl, const double* d,\n                           const double* du, const double* dlf,\n                           const double* df, const double* duf,\n                           const double* du2, const lapack_int* ipiv,\n                           const double* b, lapack_int ldb, double* x,\n                           lapack_int ldx, double* ferr, double* berr );\nlapack_int LAPACKE_cgtrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* dl,\n                           const lapack_complex_float* d,\n                           const lapack_complex_float* du,\n                           const lapack_complex_float* dlf,\n                           const lapack_complex_float* df,\n                           const lapack_complex_float* duf,\n                           const lapack_complex_float* du2,\n                           const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zgtrfs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* dl,\n                           const lapack_complex_double* d,\n                           const lapack_complex_double* du,\n                           const lapack_complex_double* dlf,\n                           const lapack_complex_double* df,\n                           const lapack_complex_double* duf,\n                           const lapack_complex_double* du2,\n                           const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_sgtsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          float* dl, float* d, float* du, float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dgtsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          double* dl, double* d, double* du, double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_cgtsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          lapack_complex_float* dl, lapack_complex_float* d,\n                          lapack_complex_float* du, lapack_complex_float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_zgtsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          lapack_complex_double* dl, lapack_complex_double* d,\n                          lapack_complex_double* du, lapack_complex_double* b,\n                          lapack_int ldb );\n\nlapack_int LAPACKE_sgtsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs, const float* dl,\n                           const float* d, const float* du, float* dlf,\n                           float* df, float* duf, float* du2, lapack_int* ipiv,\n                           const float* b, lapack_int ldb, float* x,\n                           lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dgtsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs, const double* dl,\n                           const double* d, const double* du, double* dlf,\n                           double* df, double* duf, double* du2,\n                           lapack_int* ipiv, const double* b, lapack_int ldb,\n                           double* x, lapack_int ldx, double* rcond,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_cgtsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_float* dl,\n                           const lapack_complex_float* d,\n                           const lapack_complex_float* du,\n                           lapack_complex_float* dlf, lapack_complex_float* df,\n                           lapack_complex_float* duf, lapack_complex_float* du2,\n                           lapack_int* ipiv, const lapack_complex_float* b,\n                           lapack_int ldb, lapack_complex_float* x,\n                           lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zgtsvx( int matrix_order, char fact, char trans,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_double* dl,\n                           const lapack_complex_double* d,\n                           const lapack_complex_double* du,\n                           lapack_complex_double* dlf,\n                           lapack_complex_double* df,\n                           lapack_complex_double* duf,\n                           lapack_complex_double* du2, lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_sgttrf( lapack_int n, float* dl, float* d, float* du,\n                           float* du2, lapack_int* ipiv );\nlapack_int LAPACKE_dgttrf( lapack_int n, double* dl, double* d, double* du,\n                           double* du2, lapack_int* ipiv );\nlapack_int LAPACKE_cgttrf( lapack_int n, lapack_complex_float* dl,\n                           lapack_complex_float* d, lapack_complex_float* du,\n                           lapack_complex_float* du2, lapack_int* ipiv );\nlapack_int LAPACKE_zgttrf( lapack_int n, lapack_complex_double* dl,\n                           lapack_complex_double* d, lapack_complex_double* du,\n                           lapack_complex_double* du2, lapack_int* ipiv );\n\nlapack_int LAPACKE_sgttrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const float* dl, const float* d,\n                           const float* du, const float* du2,\n                           const lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_dgttrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const double* dl, const double* d,\n                           const double* du, const double* du2,\n                           const lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_cgttrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* dl,\n                           const lapack_complex_float* d,\n                           const lapack_complex_float* du,\n                           const lapack_complex_float* du2,\n                           const lapack_int* ipiv, lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zgttrs( int matrix_order, char trans, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* dl,\n                           const lapack_complex_double* d,\n                           const lapack_complex_double* du,\n                           const lapack_complex_double* du2,\n                           const lapack_int* ipiv, lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_chbev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int kd, lapack_complex_float* ab,\n                          lapack_int ldab, float* w, lapack_complex_float* z,\n                          lapack_int ldz );\nlapack_int LAPACKE_zhbev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int kd, lapack_complex_double* ab,\n                          lapack_int ldab, double* w, lapack_complex_double* z,\n                          lapack_int ldz );\n\nlapack_int LAPACKE_chbevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int kd, lapack_complex_float* ab,\n                           lapack_int ldab, float* w, lapack_complex_float* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_zhbevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int kd, lapack_complex_double* ab,\n                           lapack_int ldab, double* w, lapack_complex_double* z,\n                           lapack_int ldz );\n\nlapack_int LAPACKE_chbevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int kd,\n                           lapack_complex_float* ab, lapack_int ldab,\n                           lapack_complex_float* q, lapack_int ldq, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, lapack_complex_float* z,\n                           lapack_int ldz, lapack_int* ifail );\nlapack_int LAPACKE_zhbevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int kd,\n                           lapack_complex_double* ab, lapack_int ldab,\n                           lapack_complex_double* q, lapack_int ldq, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_chbgst( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb,\n                           lapack_complex_float* ab, lapack_int ldab,\n                           const lapack_complex_float* bb, lapack_int ldbb,\n                           lapack_complex_float* x, lapack_int ldx );\nlapack_int LAPACKE_zhbgst( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb,\n                           lapack_complex_double* ab, lapack_int ldab,\n                           const lapack_complex_double* bb, lapack_int ldbb,\n                           lapack_complex_double* x, lapack_int ldx );\n\nlapack_int LAPACKE_chbgv( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int ka, lapack_int kb,\n                          lapack_complex_float* ab, lapack_int ldab,\n                          lapack_complex_float* bb, lapack_int ldbb, float* w,\n                          lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zhbgv( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int ka, lapack_int kb,\n                          lapack_complex_double* ab, lapack_int ldab,\n                          lapack_complex_double* bb, lapack_int ldbb, double* w,\n                          lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_chbgvd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb,\n                           lapack_complex_float* ab, lapack_int ldab,\n                           lapack_complex_float* bb, lapack_int ldbb, float* w,\n                           lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zhbgvd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb,\n                           lapack_complex_double* ab, lapack_int ldab,\n                           lapack_complex_double* bb, lapack_int ldbb,\n                           double* w, lapack_complex_double* z,\n                           lapack_int ldz );\n\nlapack_int LAPACKE_chbgvx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int ka, lapack_int kb,\n                           lapack_complex_float* ab, lapack_int ldab,\n                           lapack_complex_float* bb, lapack_int ldbb,\n                           lapack_complex_float* q, lapack_int ldq, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, lapack_complex_float* z,\n                           lapack_int ldz, lapack_int* ifail );\nlapack_int LAPACKE_zhbgvx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int ka, lapack_int kb,\n                           lapack_complex_double* ab, lapack_int ldab,\n                           lapack_complex_double* bb, lapack_int ldbb,\n                           lapack_complex_double* q, lapack_int ldq, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_chbtrd( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int kd, lapack_complex_float* ab,\n                           lapack_int ldab, float* d, float* e,\n                           lapack_complex_float* q, lapack_int ldq );\nlapack_int LAPACKE_zhbtrd( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int kd, lapack_complex_double* ab,\n                           lapack_int ldab, double* d, double* e,\n                           lapack_complex_double* q, lapack_int ldq );\n\nlapack_int LAPACKE_checon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_zhecon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_cheequb( int matrix_order, char uplo, lapack_int n,\n                            const lapack_complex_float* a, lapack_int lda,\n                            float* s, float* scond, float* amax );\nlapack_int LAPACKE_zheequb( int matrix_order, char uplo, lapack_int n,\n                            const lapack_complex_double* a, lapack_int lda,\n                            double* s, double* scond, double* amax );\n\nlapack_int LAPACKE_cheev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_complex_float* a, lapack_int lda, float* w );\nlapack_int LAPACKE_zheev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_complex_double* a, lapack_int lda, double* w );\n\nlapack_int LAPACKE_cheevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda, float* w );\nlapack_int LAPACKE_zheevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           double* w );\n\nlapack_int LAPACKE_cheevr( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, float vl, float vu, lapack_int il,\n                           lapack_int iu, float abstol, lapack_int* m, float* w,\n                           lapack_complex_float* z, lapack_int ldz,\n                           lapack_int* isuppz );\nlapack_int LAPACKE_zheevr( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, double vl, double vu, lapack_int il,\n                           lapack_int iu, double abstol, lapack_int* m,\n                           double* w, lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* isuppz );\n\nlapack_int LAPACKE_cheevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, float vl, float vu, lapack_int il,\n                           lapack_int iu, float abstol, lapack_int* m, float* w,\n                           lapack_complex_float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_zheevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, double vl, double vu, lapack_int il,\n                           lapack_int iu, double abstol, lapack_int* m,\n                           double* w, lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_chegst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zhegst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_chegv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, lapack_complex_float* a,\n                          lapack_int lda, lapack_complex_float* b,\n                          lapack_int ldb, float* w );\nlapack_int LAPACKE_zhegv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, lapack_complex_double* a,\n                          lapack_int lda, lapack_complex_double* b,\n                          lapack_int ldb, double* w );\n\nlapack_int LAPACKE_chegvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, float* w );\nlapack_int LAPACKE_zhegvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, double* w );\n\nlapack_int LAPACKE_chegvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, lapack_complex_float* z,\n                           lapack_int ldz, lapack_int* ifail );\nlapack_int LAPACKE_zhegvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_cherfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* af,\n                           lapack_int ldaf, const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zherfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* af,\n                           lapack_int ldaf, const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_cherfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_float* a, lapack_int lda,\n                            const lapack_complex_float* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const float* s,\n                            const lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* x, lapack_int ldx,\n                            float* rcond, float* berr, lapack_int n_err_bnds,\n                            float* err_bnds_norm, float* err_bnds_comp,\n                            lapack_int nparams, float* params );\nlapack_int LAPACKE_zherfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_double* a, lapack_int lda,\n                            const lapack_complex_double* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const double* s,\n                            const lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* x, lapack_int ldx,\n                            double* rcond, double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\n\nlapack_int LAPACKE_chesv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_float* a,\n                          lapack_int lda, lapack_int* ipiv,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zhesv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_double* a,\n                          lapack_int lda, lapack_int* ipiv,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_chesvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* af,\n                           lapack_int ldaf, lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zhesvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* af,\n                           lapack_int ldaf, lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_chesvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, float* s,\n                            lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* x, lapack_int ldx,\n                            float* rcond, float* rpvgrw, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_zhesvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, double* s,\n                            lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* x, lapack_int ldx,\n                            double* rcond, double* rpvgrw, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\n\nlapack_int LAPACKE_chetrd( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda, float* d,\n                           float* e, lapack_complex_float* tau );\nlapack_int LAPACKE_zhetrd( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda, double* d,\n                           double* e, lapack_complex_double* tau );\n\nlapack_int LAPACKE_chetrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* ipiv );\nlapack_int LAPACKE_zhetrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* ipiv );\n\nlapack_int LAPACKE_chetri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           const lapack_int* ipiv );\nlapack_int LAPACKE_zhetri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           const lapack_int* ipiv );\n\nlapack_int LAPACKE_chetrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_int* ipiv,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zhetrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_int* ipiv,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_chfrk( int matrix_order, char transr, char uplo, char trans,\n                          lapack_int n, lapack_int k, float alpha,\n                          const lapack_complex_float* a, lapack_int lda,\n                          float beta, lapack_complex_float* c );\nlapack_int LAPACKE_zhfrk( int matrix_order, char transr, char uplo, char trans,\n                          lapack_int n, lapack_int k, double alpha,\n                          const lapack_complex_double* a, lapack_int lda,\n                          double beta, lapack_complex_double* c );\n\nlapack_int LAPACKE_shgeqz( int matrix_order, char job, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           float* h, lapack_int ldh, float* t, lapack_int ldt,\n                           float* alphar, float* alphai, float* beta, float* q,\n                           lapack_int ldq, float* z, lapack_int ldz );\nlapack_int LAPACKE_dhgeqz( int matrix_order, char job, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           double* h, lapack_int ldh, double* t, lapack_int ldt,\n                           double* alphar, double* alphai, double* beta,\n                           double* q, lapack_int ldq, double* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_chgeqz( int matrix_order, char job, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           lapack_complex_float* h, lapack_int ldh,\n                           lapack_complex_float* t, lapack_int ldt,\n                           lapack_complex_float* alpha,\n                           lapack_complex_float* beta, lapack_complex_float* q,\n                           lapack_int ldq, lapack_complex_float* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_zhgeqz( int matrix_order, char job, char compq, char compz,\n                           lapack_int n, lapack_int ilo, lapack_int ihi,\n                           lapack_complex_double* h, lapack_int ldh,\n                           lapack_complex_double* t, lapack_int ldt,\n                           lapack_complex_double* alpha,\n                           lapack_complex_double* beta,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_chpcon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* ap,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_zhpcon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* ap,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_chpev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_complex_float* ap, float* w,\n                          lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zhpev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_complex_double* ap, double* w,\n                          lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_chpevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_complex_float* ap, float* w,\n                           lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zhpevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_complex_double* ap, double* w,\n                           lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_chpevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_complex_float* ap, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, lapack_complex_float* z,\n                           lapack_int ldz, lapack_int* ifail );\nlapack_int LAPACKE_zhpevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_complex_double* ap, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_chpgst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, lapack_complex_float* ap,\n                           const lapack_complex_float* bp );\nlapack_int LAPACKE_zhpgst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, lapack_complex_double* ap,\n                           const lapack_complex_double* bp );\n\nlapack_int LAPACKE_chpgv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, lapack_complex_float* ap,\n                          lapack_complex_float* bp, float* w,\n                          lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zhpgv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, lapack_complex_double* ap,\n                          lapack_complex_double* bp, double* w,\n                          lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_chpgvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, lapack_complex_float* ap,\n                           lapack_complex_float* bp, float* w,\n                           lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zhpgvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, lapack_complex_double* ap,\n                           lapack_complex_double* bp, double* w,\n                           lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_chpgvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n,\n                           lapack_complex_float* ap, lapack_complex_float* bp,\n                           float vl, float vu, lapack_int il, lapack_int iu,\n                           float abstol, lapack_int* m, float* w,\n                           lapack_complex_float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_zhpgvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n,\n                           lapack_complex_double* ap, lapack_complex_double* bp,\n                           double vl, double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_chprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           const lapack_complex_float* afp,\n                           const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zhprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           const lapack_complex_double* afp,\n                           const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_chpsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_float* ap,\n                          lapack_int* ipiv, lapack_complex_float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_zhpsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_double* ap,\n                          lapack_int* ipiv, lapack_complex_double* b,\n                          lapack_int ldb );\n\nlapack_int LAPACKE_chpsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           lapack_complex_float* afp, lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zhpsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           lapack_complex_double* afp, lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_chptrd( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap, float* d, float* e,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zhptrd( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap, double* d, double* e,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_chptrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap, lapack_int* ipiv );\nlapack_int LAPACKE_zhptrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap, lapack_int* ipiv );\n\nlapack_int LAPACKE_chptri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap, const lapack_int* ipiv );\nlapack_int LAPACKE_zhptri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap, const lapack_int* ipiv );\n\nlapack_int LAPACKE_chptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           const lapack_int* ipiv, lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zhptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           const lapack_int* ipiv, lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_shsein( int matrix_order, char job, char eigsrc, char initv,\n                           lapack_logical* select, lapack_int n, const float* h,\n                           lapack_int ldh, float* wr, const float* wi,\n                           float* vl, lapack_int ldvl, float* vr,\n                           lapack_int ldvr, lapack_int mm, lapack_int* m,\n                           lapack_int* ifaill, lapack_int* ifailr );\nlapack_int LAPACKE_dhsein( int matrix_order, char job, char eigsrc, char initv,\n                           lapack_logical* select, lapack_int n,\n                           const double* h, lapack_int ldh, double* wr,\n                           const double* wi, double* vl, lapack_int ldvl,\n                           double* vr, lapack_int ldvr, lapack_int mm,\n                           lapack_int* m, lapack_int* ifaill,\n                           lapack_int* ifailr );\nlapack_int LAPACKE_chsein( int matrix_order, char job, char eigsrc, char initv,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_float* h, lapack_int ldh,\n                           lapack_complex_float* w, lapack_complex_float* vl,\n                           lapack_int ldvl, lapack_complex_float* vr,\n                           lapack_int ldvr, lapack_int mm, lapack_int* m,\n                           lapack_int* ifaill, lapack_int* ifailr );\nlapack_int LAPACKE_zhsein( int matrix_order, char job, char eigsrc, char initv,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_double* h, lapack_int ldh,\n                           lapack_complex_double* w, lapack_complex_double* vl,\n                           lapack_int ldvl, lapack_complex_double* vr,\n                           lapack_int ldvr, lapack_int mm, lapack_int* m,\n                           lapack_int* ifaill, lapack_int* ifailr );\n\nlapack_int LAPACKE_shseqr( int matrix_order, char job, char compz, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, float* h,\n                           lapack_int ldh, float* wr, float* wi, float* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_dhseqr( int matrix_order, char job, char compz, lapack_int n,\n                           lapack_int ilo, lapack_int ihi, double* h,\n                           lapack_int ldh, double* wr, double* wi, double* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_chseqr( int matrix_order, char job, char compz, lapack_int n,\n                           lapack_int ilo, lapack_int ihi,\n                           lapack_complex_float* h, lapack_int ldh,\n                           lapack_complex_float* w, lapack_complex_float* z,\n                           lapack_int ldz );\nlapack_int LAPACKE_zhseqr( int matrix_order, char job, char compz, lapack_int n,\n                           lapack_int ilo, lapack_int ihi,\n                           lapack_complex_double* h, lapack_int ldh,\n                           lapack_complex_double* w, lapack_complex_double* z,\n                           lapack_int ldz );\n\nlapack_int LAPACKE_clacgv( lapack_int n, lapack_complex_float* x,\n                           lapack_int incx );\nlapack_int LAPACKE_zlacgv( lapack_int n, lapack_complex_double* x,\n                           lapack_int incx );\n\nlapack_int LAPACKE_slacpy( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, const float* a, lapack_int lda, float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_dlacpy( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, const double* a, lapack_int lda, double* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_clacpy( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, const lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zlacpy( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, const lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_zlag2c( int matrix_order, lapack_int m, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_float* sa, lapack_int ldsa );\n\nlapack_int LAPACKE_slag2d( int matrix_order, lapack_int m, lapack_int n,\n                           const float* sa, lapack_int ldsa, double* a,\n                           lapack_int lda );\n\nlapack_int LAPACKE_dlag2s( int matrix_order, lapack_int m, lapack_int n,\n                           const double* a, lapack_int lda, float* sa,\n                           lapack_int ldsa );\n\nlapack_int LAPACKE_clag2z( int matrix_order, lapack_int m, lapack_int n,\n                           const lapack_complex_float* sa, lapack_int ldsa,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_slagge( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, const float* d,\n                           float* a, lapack_int lda, lapack_int* iseed );\nlapack_int LAPACKE_dlagge( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, const double* d,\n                           double* a, lapack_int lda, lapack_int* iseed );\nlapack_int LAPACKE_clagge( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, const float* d,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* iseed );\nlapack_int LAPACKE_zlagge( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int kl, lapack_int ku, const double* d,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* iseed );\n\nfloat LAPACKE_slamch( char cmach );\ndouble LAPACKE_dlamch( char cmach );\n\nfloat LAPACKE_slange( int matrix_order, char norm, lapack_int m,\n                           lapack_int n, const float* a, lapack_int lda );\ndouble LAPACKE_dlange( int matrix_order, char norm, lapack_int m,\n                           lapack_int n, const double* a, lapack_int lda );\nfloat LAPACKE_clange( int matrix_order, char norm, lapack_int m,\n                           lapack_int n, const lapack_complex_float* a,\n                           lapack_int lda );\ndouble LAPACKE_zlange( int matrix_order, char norm, lapack_int m,\n                           lapack_int n, const lapack_complex_double* a,\n                           lapack_int lda );\n\nfloat LAPACKE_clanhe( int matrix_order, char norm, char uplo, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda );\ndouble LAPACKE_zlanhe( int matrix_order, char norm, char uplo, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda );\n\nfloat LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n,\n                           const float* a, lapack_int lda );\ndouble LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n,\n                           const double* a, lapack_int lda );\nfloat LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda );\ndouble LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda );\n\nfloat LAPACKE_slantr( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int m, lapack_int n, const float* a,\n                           lapack_int lda );\ndouble LAPACKE_dlantr( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int m, lapack_int n, const double* a,\n                           lapack_int lda );\nfloat LAPACKE_clantr( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int m, lapack_int n, const lapack_complex_float* a,\n                           lapack_int lda );\ndouble LAPACKE_zlantr( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int m, lapack_int n, const lapack_complex_double* a,\n                           lapack_int lda );\n\n\nlapack_int LAPACKE_slarfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, const float* v, lapack_int ldv,\n                           const float* t, lapack_int ldt, float* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_dlarfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, const double* v, lapack_int ldv,\n                           const double* t, lapack_int ldt, double* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_clarfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, const lapack_complex_float* v,\n                           lapack_int ldv, const lapack_complex_float* t,\n                           lapack_int ldt, lapack_complex_float* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_zlarfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, const lapack_complex_double* v,\n                           lapack_int ldv, const lapack_complex_double* t,\n                           lapack_int ldt, lapack_complex_double* c,\n                           lapack_int ldc );\n\nlapack_int LAPACKE_slarfg( lapack_int n, float* alpha, float* x,\n                           lapack_int incx, float* tau );\nlapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x,\n                           lapack_int incx, double* tau );\nlapack_int LAPACKE_clarfg( lapack_int n, lapack_complex_float* alpha,\n                           lapack_complex_float* x, lapack_int incx,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_zlarfg( lapack_int n, lapack_complex_double* alpha,\n                           lapack_complex_double* x, lapack_int incx,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_slarft( int matrix_order, char direct, char storev,\n                           lapack_int n, lapack_int k, const float* v,\n                           lapack_int ldv, const float* tau, float* t,\n                           lapack_int ldt );\nlapack_int LAPACKE_dlarft( int matrix_order, char direct, char storev,\n                           lapack_int n, lapack_int k, const double* v,\n                           lapack_int ldv, const double* tau, double* t,\n                           lapack_int ldt );\nlapack_int LAPACKE_clarft( int matrix_order, char direct, char storev,\n                           lapack_int n, lapack_int k,\n                           const lapack_complex_float* v, lapack_int ldv,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_zlarft( int matrix_order, char direct, char storev,\n                           lapack_int n, lapack_int k,\n                           const lapack_complex_double* v, lapack_int ldv,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_slarfx( int matrix_order, char side, lapack_int m,\n                           lapack_int n, const float* v, float tau, float* c,\n                           lapack_int ldc, float* work );\nlapack_int LAPACKE_dlarfx( int matrix_order, char side, lapack_int m,\n                           lapack_int n, const double* v, double tau, double* c,\n                           lapack_int ldc, double* work );\nlapack_int LAPACKE_clarfx( int matrix_order, char side, lapack_int m,\n                           lapack_int n, const lapack_complex_float* v,\n                           lapack_complex_float tau, lapack_complex_float* c,\n                           lapack_int ldc, lapack_complex_float* work );\nlapack_int LAPACKE_zlarfx( int matrix_order, char side, lapack_int m,\n                           lapack_int n, const lapack_complex_double* v,\n                           lapack_complex_double tau, lapack_complex_double* c,\n                           lapack_int ldc, lapack_complex_double* work );\n\nlapack_int LAPACKE_slarnv( lapack_int idist, lapack_int* iseed, lapack_int n,\n                           float* x );\nlapack_int LAPACKE_dlarnv( lapack_int idist, lapack_int* iseed, lapack_int n,\n                           double* x );\nlapack_int LAPACKE_clarnv( lapack_int idist, lapack_int* iseed, lapack_int n,\n                           lapack_complex_float* x );\nlapack_int LAPACKE_zlarnv( lapack_int idist, lapack_int* iseed, lapack_int n,\n                           lapack_complex_double* x );\n\nlapack_int LAPACKE_slaset( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, float alpha, float beta, float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_dlaset( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, double alpha, double beta, double* a,\n                           lapack_int lda );\nlapack_int LAPACKE_claset( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, lapack_complex_float alpha,\n                           lapack_complex_float beta, lapack_complex_float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_zlaset( int matrix_order, char uplo, lapack_int m,\n                           lapack_int n, lapack_complex_double alpha,\n                           lapack_complex_double beta, lapack_complex_double* a,\n                           lapack_int lda );\n\nlapack_int LAPACKE_slasrt( char id, lapack_int n, float* d );\nlapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d );\n\nlapack_int LAPACKE_slaswp( int matrix_order, lapack_int n, float* a,\n                           lapack_int lda, lapack_int k1, lapack_int k2,\n                           const lapack_int* ipiv, lapack_int incx );\nlapack_int LAPACKE_dlaswp( int matrix_order, lapack_int n, double* a,\n                           lapack_int lda, lapack_int k1, lapack_int k2,\n                           const lapack_int* ipiv, lapack_int incx );\nlapack_int LAPACKE_claswp( int matrix_order, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int k1, lapack_int k2, const lapack_int* ipiv,\n                           lapack_int incx );\nlapack_int LAPACKE_zlaswp( int matrix_order, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int k1, lapack_int k2, const lapack_int* ipiv,\n                           lapack_int incx );\n\nlapack_int LAPACKE_slatms( int matrix_order, lapack_int m, lapack_int n,\n                           char dist, lapack_int* iseed, char sym, float* d,\n                           lapack_int mode, float cond, float dmax,\n                           lapack_int kl, lapack_int ku, char pack, float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_dlatms( int matrix_order, lapack_int m, lapack_int n,\n                           char dist, lapack_int* iseed, char sym, double* d,\n                           lapack_int mode, double cond, double dmax,\n                           lapack_int kl, lapack_int ku, char pack, double* a,\n                           lapack_int lda );\nlapack_int LAPACKE_clatms( int matrix_order, lapack_int m, lapack_int n,\n                           char dist, lapack_int* iseed, char sym, float* d,\n                           lapack_int mode, float cond, float dmax,\n                           lapack_int kl, lapack_int ku, char pack,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zlatms( int matrix_order, lapack_int m, lapack_int n,\n                           char dist, lapack_int* iseed, char sym, double* d,\n                           lapack_int mode, double cond, double dmax,\n                           lapack_int kl, lapack_int ku, char pack,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_slauum( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_dlauum( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda );\nlapack_int LAPACKE_clauum( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zlauum( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_sopgtr( int matrix_order, char uplo, lapack_int n,\n                           const float* ap, const float* tau, float* q,\n                           lapack_int ldq );\nlapack_int LAPACKE_dopgtr( int matrix_order, char uplo, lapack_int n,\n                           const double* ap, const double* tau, double* q,\n                           lapack_int ldq );\n\nlapack_int LAPACKE_sopmtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n, const float* ap,\n                           const float* tau, float* c, lapack_int ldc );\nlapack_int LAPACKE_dopmtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n, const double* ap,\n                           const double* tau, double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sorgbr( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int k, float* a, lapack_int lda,\n                           const float* tau );\nlapack_int LAPACKE_dorgbr( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int k, double* a,\n                           lapack_int lda, const double* tau );\n\nlapack_int LAPACKE_sorghr( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, float* a, lapack_int lda,\n                           const float* tau );\nlapack_int LAPACKE_dorghr( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, double* a, lapack_int lda,\n                           const double* tau );\n\nlapack_int LAPACKE_sorglq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, float* a, lapack_int lda,\n                           const float* tau );\nlapack_int LAPACKE_dorglq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, double* a, lapack_int lda,\n                           const double* tau );\n\nlapack_int LAPACKE_sorgql( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, float* a, lapack_int lda,\n                           const float* tau );\nlapack_int LAPACKE_dorgql( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, double* a, lapack_int lda,\n                           const double* tau );\n\nlapack_int LAPACKE_sorgqr( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, float* a, lapack_int lda,\n                           const float* tau );\nlapack_int LAPACKE_dorgqr( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, double* a, lapack_int lda,\n                           const double* tau );\n\nlapack_int LAPACKE_sorgrq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, float* a, lapack_int lda,\n                           const float* tau );\nlapack_int LAPACKE_dorgrq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, double* a, lapack_int lda,\n                           const double* tau );\n\nlapack_int LAPACKE_sorgtr( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda, const float* tau );\nlapack_int LAPACKE_dorgtr( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda, const double* tau );\n\nlapack_int LAPACKE_sormbr( int matrix_order, char vect, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const float* a, lapack_int lda, const float* tau,\n                           float* c, lapack_int ldc );\nlapack_int LAPACKE_dormbr( int matrix_order, char vect, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const double* a, lapack_int lda, const double* tau,\n                           double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormhr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, const float* a, lapack_int lda,\n                           const float* tau, float* c, lapack_int ldc );\nlapack_int LAPACKE_dormhr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, const double* a, lapack_int lda,\n                           const double* tau, double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormlq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const float* a, lapack_int lda, const float* tau,\n                           float* c, lapack_int ldc );\nlapack_int LAPACKE_dormlq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const double* a, lapack_int lda, const double* tau,\n                           double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormql( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const float* a, lapack_int lda, const float* tau,\n                           float* c, lapack_int ldc );\nlapack_int LAPACKE_dormql( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const double* a, lapack_int lda, const double* tau,\n                           double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormqr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const float* a, lapack_int lda, const float* tau,\n                           float* c, lapack_int ldc );\nlapack_int LAPACKE_dormqr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const double* a, lapack_int lda, const double* tau,\n                           double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormrq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const float* a, lapack_int lda, const float* tau,\n                           float* c, lapack_int ldc );\nlapack_int LAPACKE_dormrq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const double* a, lapack_int lda, const double* tau,\n                           double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormrz( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           lapack_int l, const float* a, lapack_int lda,\n                           const float* tau, float* c, lapack_int ldc );\nlapack_int LAPACKE_dormrz( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           lapack_int l, const double* a, lapack_int lda,\n                           const double* tau, double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sormtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n, const float* a,\n                           lapack_int lda, const float* tau, float* c,\n                           lapack_int ldc );\nlapack_int LAPACKE_dormtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n, const double* a,\n                           lapack_int lda, const double* tau, double* c,\n                           lapack_int ldc );\n\nlapack_int LAPACKE_spbcon( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const float* ab, lapack_int ldab,\n                           float anorm, float* rcond );\nlapack_int LAPACKE_dpbcon( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const double* ab, lapack_int ldab,\n                           double anorm, double* rcond );\nlapack_int LAPACKE_cpbcon( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const lapack_complex_float* ab,\n                           lapack_int ldab, float anorm, float* rcond );\nlapack_int LAPACKE_zpbcon( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const lapack_complex_double* ab,\n                           lapack_int ldab, double anorm, double* rcond );\n\nlapack_int LAPACKE_spbequ( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const float* ab, lapack_int ldab,\n                           float* s, float* scond, float* amax );\nlapack_int LAPACKE_dpbequ( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const double* ab, lapack_int ldab,\n                           double* s, double* scond, double* amax );\nlapack_int LAPACKE_cpbequ( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const lapack_complex_float* ab,\n                           lapack_int ldab, float* s, float* scond,\n                           float* amax );\nlapack_int LAPACKE_zpbequ( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, const lapack_complex_double* ab,\n                           lapack_int ldab, double* s, double* scond,\n                           double* amax );\n\nlapack_int LAPACKE_spbrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs, const float* ab,\n                           lapack_int ldab, const float* afb, lapack_int ldafb,\n                           const float* b, lapack_int ldb, float* x,\n                           lapack_int ldx, float* ferr, float* berr );\nlapack_int LAPACKE_dpbrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs, const double* ab,\n                           lapack_int ldab, const double* afb, lapack_int ldafb,\n                           const double* b, lapack_int ldb, double* x,\n                           lapack_int ldx, double* ferr, double* berr );\nlapack_int LAPACKE_cpbrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           const lapack_complex_float* afb, lapack_int ldafb,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zpbrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           const lapack_complex_double* afb, lapack_int ldafb,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_spbstf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kb, float* bb, lapack_int ldbb );\nlapack_int LAPACKE_dpbstf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kb, double* bb, lapack_int ldbb );\nlapack_int LAPACKE_cpbstf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kb, lapack_complex_float* bb,\n                           lapack_int ldbb );\nlapack_int LAPACKE_zpbstf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kb, lapack_complex_double* bb,\n                           lapack_int ldbb );\n\nlapack_int LAPACKE_spbsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int kd, lapack_int nrhs, float* ab,\n                          lapack_int ldab, float* b, lapack_int ldb );\nlapack_int LAPACKE_dpbsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int kd, lapack_int nrhs, double* ab,\n                          lapack_int ldab, double* b, lapack_int ldb );\nlapack_int LAPACKE_cpbsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int kd, lapack_int nrhs,\n                          lapack_complex_float* ab, lapack_int ldab,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpbsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int kd, lapack_int nrhs,\n                          lapack_complex_double* ab, lapack_int ldab,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spbsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs, float* ab,\n                           lapack_int ldab, float* afb, lapack_int ldafb,\n                           char* equed, float* s, float* b, lapack_int ldb,\n                           float* x, lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dpbsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs, double* ab,\n                           lapack_int ldab, double* afb, lapack_int ldafb,\n                           char* equed, double* s, double* b, lapack_int ldb,\n                           double* x, lapack_int ldx, double* rcond,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_cpbsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs,\n                           lapack_complex_float* ab, lapack_int ldab,\n                           lapack_complex_float* afb, lapack_int ldafb,\n                           char* equed, float* s, lapack_complex_float* b,\n                           lapack_int ldb, lapack_complex_float* x,\n                           lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zpbsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs,\n                           lapack_complex_double* ab, lapack_int ldab,\n                           lapack_complex_double* afb, lapack_int ldafb,\n                           char* equed, double* s, lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* x,\n                           lapack_int ldx, double* rcond, double* ferr,\n                           double* berr );\n\nlapack_int LAPACKE_spbtrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, float* ab, lapack_int ldab );\nlapack_int LAPACKE_dpbtrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, double* ab, lapack_int ldab );\nlapack_int LAPACKE_cpbtrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_complex_float* ab,\n                           lapack_int ldab );\nlapack_int LAPACKE_zpbtrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_complex_double* ab,\n                           lapack_int ldab );\n\nlapack_int LAPACKE_spbtrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs, const float* ab,\n                           lapack_int ldab, float* b, lapack_int ldb );\nlapack_int LAPACKE_dpbtrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs, const double* ab,\n                           lapack_int ldab, double* b, lapack_int ldb );\nlapack_int LAPACKE_cpbtrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpbtrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spftrf( int matrix_order, char transr, char uplo,\n                           lapack_int n, float* a );\nlapack_int LAPACKE_dpftrf( int matrix_order, char transr, char uplo,\n                           lapack_int n, double* a );\nlapack_int LAPACKE_cpftrf( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_complex_float* a );\nlapack_int LAPACKE_zpftrf( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_complex_double* a );\n\nlapack_int LAPACKE_spftri( int matrix_order, char transr, char uplo,\n                           lapack_int n, float* a );\nlapack_int LAPACKE_dpftri( int matrix_order, char transr, char uplo,\n                           lapack_int n, double* a );\nlapack_int LAPACKE_cpftri( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_complex_float* a );\nlapack_int LAPACKE_zpftri( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_complex_double* a );\n\nlapack_int LAPACKE_spftrs( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_int nrhs, const float* a,\n                           float* b, lapack_int ldb );\nlapack_int LAPACKE_dpftrs( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_int nrhs, const double* a,\n                           double* b, lapack_int ldb );\nlapack_int LAPACKE_cpftrs( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_float* a,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpftrs( int matrix_order, char transr, char uplo,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_double* a,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spocon( int matrix_order, char uplo, lapack_int n,\n                           const float* a, lapack_int lda, float anorm,\n                           float* rcond );\nlapack_int LAPACKE_dpocon( int matrix_order, char uplo, lapack_int n,\n                           const double* a, lapack_int lda, double anorm,\n                           double* rcond );\nlapack_int LAPACKE_cpocon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           float anorm, float* rcond );\nlapack_int LAPACKE_zpocon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           double anorm, double* rcond );\n\nlapack_int LAPACKE_spoequ( int matrix_order, lapack_int n, const float* a,\n                           lapack_int lda, float* s, float* scond,\n                           float* amax );\nlapack_int LAPACKE_dpoequ( int matrix_order, lapack_int n, const double* a,\n                           lapack_int lda, double* s, double* scond,\n                           double* amax );\nlapack_int LAPACKE_cpoequ( int matrix_order, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           float* s, float* scond, float* amax );\nlapack_int LAPACKE_zpoequ( int matrix_order, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           double* s, double* scond, double* amax );\n\nlapack_int LAPACKE_spoequb( int matrix_order, lapack_int n, const float* a,\n                            lapack_int lda, float* s, float* scond,\n                            float* amax );\nlapack_int LAPACKE_dpoequb( int matrix_order, lapack_int n, const double* a,\n                            lapack_int lda, double* s, double* scond,\n                            double* amax );\nlapack_int LAPACKE_cpoequb( int matrix_order, lapack_int n,\n                            const lapack_complex_float* a, lapack_int lda,\n                            float* s, float* scond, float* amax );\nlapack_int LAPACKE_zpoequb( int matrix_order, lapack_int n,\n                            const lapack_complex_double* a, lapack_int lda,\n                            double* s, double* scond, double* amax );\n\nlapack_int LAPACKE_sporfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           const float* af, lapack_int ldaf, const float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_dporfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           const double* af, lapack_int ldaf, const double* b,\n                           lapack_int ldb, double* x, lapack_int ldx,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_cporfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* af,\n                           lapack_int ldaf, const lapack_complex_float* b,\n                           lapack_int ldb, lapack_complex_float* x,\n                           lapack_int ldx, float* ferr, float* berr );\nlapack_int LAPACKE_zporfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* af,\n                           lapack_int ldaf, const lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* x,\n                           lapack_int ldx, double* ferr, double* berr );\n\nlapack_int LAPACKE_sporfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs, const float* a,\n                            lapack_int lda, const float* af, lapack_int ldaf,\n                            const float* s, const float* b, lapack_int ldb,\n                            float* x, lapack_int ldx, float* rcond, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dporfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs, const double* a,\n                            lapack_int lda, const double* af, lapack_int ldaf,\n                            const double* s, const double* b, lapack_int ldb,\n                            double* x, lapack_int ldx, double* rcond,\n                            double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\nlapack_int LAPACKE_cporfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_float* a, lapack_int lda,\n                            const lapack_complex_float* af, lapack_int ldaf,\n                            const float* s, const lapack_complex_float* b,\n                            lapack_int ldb, lapack_complex_float* x,\n                            lapack_int ldx, float* rcond, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_zporfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_double* a, lapack_int lda,\n                            const lapack_complex_double* af, lapack_int ldaf,\n                            const double* s, const lapack_complex_double* b,\n                            lapack_int ldb, lapack_complex_double* x,\n                            lapack_int ldx, double* rcond, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\n\nlapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, float* a, lapack_int lda, float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dposv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, double* a, lapack_int lda, double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_cposv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_float* a,\n                          lapack_int lda, lapack_complex_float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_zposv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_double* a,\n                          lapack_int lda, lapack_complex_double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dsposv( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, double* a, lapack_int lda,\n                           double* b, lapack_int ldb, double* x, lapack_int ldx,\n                           lapack_int* iter );\nlapack_int LAPACKE_zcposv( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, lapack_complex_double* x,\n                           lapack_int ldx, lapack_int* iter );\n\nlapack_int LAPACKE_sposvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, float* a, lapack_int lda, float* af,\n                           lapack_int ldaf, char* equed, float* s, float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_dposvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, double* a, lapack_int lda,\n                           double* af, lapack_int ldaf, char* equed, double* s,\n                           double* b, lapack_int ldb, double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\nlapack_int LAPACKE_cposvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* af,\n                           lapack_int ldaf, char* equed, float* s,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zposvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* af,\n                           lapack_int ldaf, char* equed, double* s,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_sposvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs, float* a,\n                            lapack_int lda, float* af, lapack_int ldaf,\n                            char* equed, float* s, float* b, lapack_int ldb,\n                            float* x, lapack_int ldx, float* rcond,\n                            float* rpvgrw, float* berr, lapack_int n_err_bnds,\n                            float* err_bnds_norm, float* err_bnds_comp,\n                            lapack_int nparams, float* params );\nlapack_int LAPACKE_dposvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs, double* a,\n                            lapack_int lda, double* af, lapack_int ldaf,\n                            char* equed, double* s, double* b, lapack_int ldb,\n                            double* x, lapack_int ldx, double* rcond,\n                            double* rpvgrw, double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\nlapack_int LAPACKE_cposvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* af, lapack_int ldaf,\n                            char* equed, float* s, lapack_complex_float* b,\n                            lapack_int ldb, lapack_complex_float* x,\n                            lapack_int ldx, float* rcond, float* rpvgrw,\n                            float* berr, lapack_int n_err_bnds,\n                            float* err_bnds_norm, float* err_bnds_comp,\n                            lapack_int nparams, float* params );\nlapack_int LAPACKE_zposvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* af, lapack_int ldaf,\n                            char* equed, double* s, lapack_complex_double* b,\n                            lapack_int ldb, lapack_complex_double* x,\n                            lapack_int ldx, double* rcond, double* rpvgrw,\n                            double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\n\nlapack_int LAPACKE_spotrf( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_dpotrf( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda );\nlapack_int LAPACKE_cpotrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zpotrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_spotri( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_dpotri( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda );\nlapack_int LAPACKE_cpotri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zpotri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_spotrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           float* b, lapack_int ldb );\nlapack_int LAPACKE_dpotrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           double* b, lapack_int ldb );\nlapack_int LAPACKE_cpotrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zpotrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_sppcon( int matrix_order, char uplo, lapack_int n,\n                           const float* ap, float anorm, float* rcond );\nlapack_int LAPACKE_dppcon( int matrix_order, char uplo, lapack_int n,\n                           const double* ap, double anorm, double* rcond );\nlapack_int LAPACKE_cppcon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* ap, float anorm,\n                           float* rcond );\nlapack_int LAPACKE_zppcon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* ap, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_sppequ( int matrix_order, char uplo, lapack_int n,\n                           const float* ap, float* s, float* scond,\n                           float* amax );\nlapack_int LAPACKE_dppequ( int matrix_order, char uplo, lapack_int n,\n                           const double* ap, double* s, double* scond,\n                           double* amax );\nlapack_int LAPACKE_cppequ( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* ap, float* s,\n                           float* scond, float* amax );\nlapack_int LAPACKE_zppequ( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* ap, double* s,\n                           double* scond, double* amax );\n\nlapack_int LAPACKE_spprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* ap, const float* afp,\n                           const float* b, lapack_int ldb, float* x,\n                           lapack_int ldx, float* ferr, float* berr );\nlapack_int LAPACKE_dpprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* ap, const double* afp,\n                           const double* b, lapack_int ldb, double* x,\n                           lapack_int ldx, double* ferr, double* berr );\nlapack_int LAPACKE_cpprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           const lapack_complex_float* afp,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zpprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           const lapack_complex_double* afp,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_sppsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, float* ap, float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dppsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, double* ap, double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_cppsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_float* ap,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zppsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_double* ap,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sppsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, float* ap, float* afp, char* equed,\n                           float* s, float* b, lapack_int ldb, float* x,\n                           lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dppsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, double* ap, double* afp,\n                           char* equed, double* s, double* b, lapack_int ldb,\n                           double* x, lapack_int ldx, double* rcond,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_cppsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, lapack_complex_float* ap,\n                           lapack_complex_float* afp, char* equed, float* s,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zppsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, lapack_complex_double* ap,\n                           lapack_complex_double* afp, char* equed, double* s,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_spptrf( int matrix_order, char uplo, lapack_int n,\n                           float* ap );\nlapack_int LAPACKE_dpptrf( int matrix_order, char uplo, lapack_int n,\n                           double* ap );\nlapack_int LAPACKE_cpptrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap );\nlapack_int LAPACKE_zpptrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap );\n\nlapack_int LAPACKE_spptri( int matrix_order, char uplo, lapack_int n,\n                           float* ap );\nlapack_int LAPACKE_dpptri( int matrix_order, char uplo, lapack_int n,\n                           double* ap );\nlapack_int LAPACKE_cpptri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap );\nlapack_int LAPACKE_zpptri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap );\n\nlapack_int LAPACKE_spptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* ap, float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_dpptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* ap, double* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_cpptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spstrf( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda, lapack_int* piv, lapack_int* rank,\n                           float tol );\nlapack_int LAPACKE_dpstrf( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda, lapack_int* piv, lapack_int* rank,\n                           double tol );\nlapack_int LAPACKE_cpstrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* piv, lapack_int* rank, float tol );\nlapack_int LAPACKE_zpstrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* piv, lapack_int* rank, double tol );\n\nlapack_int LAPACKE_sptcon( lapack_int n, const float* d, const float* e,\n                           float anorm, float* rcond );\nlapack_int LAPACKE_dptcon( lapack_int n, const double* d, const double* e,\n                           double anorm, double* rcond );\nlapack_int LAPACKE_cptcon( lapack_int n, const float* d,\n                           const lapack_complex_float* e, float anorm,\n                           float* rcond );\nlapack_int LAPACKE_zptcon( lapack_int n, const double* d,\n                           const lapack_complex_double* e, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_spteqr( int matrix_order, char compz, lapack_int n, float* d,\n                           float* e, float* z, lapack_int ldz );\nlapack_int LAPACKE_dpteqr( int matrix_order, char compz, lapack_int n,\n                           double* d, double* e, double* z, lapack_int ldz );\nlapack_int LAPACKE_cpteqr( int matrix_order, char compz, lapack_int n, float* d,\n                           float* e, lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zpteqr( int matrix_order, char compz, lapack_int n,\n                           double* d, double* e, lapack_complex_double* z,\n                           lapack_int ldz );\n\nlapack_int LAPACKE_sptrfs( int matrix_order, lapack_int n, lapack_int nrhs,\n                           const float* d, const float* e, const float* df,\n                           const float* ef, const float* b, lapack_int ldb,\n                           float* x, lapack_int ldx, float* ferr, float* berr );\nlapack_int LAPACKE_dptrfs( int matrix_order, lapack_int n, lapack_int nrhs,\n                           const double* d, const double* e, const double* df,\n                           const double* ef, const double* b, lapack_int ldb,\n                           double* x, lapack_int ldx, double* ferr,\n                           double* berr );\nlapack_int LAPACKE_cptrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* d,\n                           const lapack_complex_float* e, const float* df,\n                           const lapack_complex_float* ef,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zptrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* d,\n                           const lapack_complex_double* e, const double* df,\n                           const lapack_complex_double* ef,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_sptsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          float* d, float* e, float* b, lapack_int ldb );\nlapack_int LAPACKE_dptsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          double* d, double* e, double* b, lapack_int ldb );\nlapack_int LAPACKE_cptsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          float* d, lapack_complex_float* e,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zptsv( int matrix_order, lapack_int n, lapack_int nrhs,\n                          double* d, lapack_complex_double* e,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sptsvx( int matrix_order, char fact, lapack_int n,\n                           lapack_int nrhs, const float* d, const float* e,\n                           float* df, float* ef, const float* b, lapack_int ldb,\n                           float* x, lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dptsvx( int matrix_order, char fact, lapack_int n,\n                           lapack_int nrhs, const double* d, const double* e,\n                           double* df, double* ef, const double* b,\n                           lapack_int ldb, double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\nlapack_int LAPACKE_cptsvx( int matrix_order, char fact, lapack_int n,\n                           lapack_int nrhs, const float* d,\n                           const lapack_complex_float* e, float* df,\n                           lapack_complex_float* ef,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zptsvx( int matrix_order, char fact, lapack_int n,\n                           lapack_int nrhs, const double* d,\n                           const lapack_complex_double* e, double* df,\n                           lapack_complex_double* ef,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_spttrf( lapack_int n, float* d, float* e );\nlapack_int LAPACKE_dpttrf( lapack_int n, double* d, double* e );\nlapack_int LAPACKE_cpttrf( lapack_int n, float* d, lapack_complex_float* e );\nlapack_int LAPACKE_zpttrf( lapack_int n, double* d, lapack_complex_double* e );\n\nlapack_int LAPACKE_spttrs( int matrix_order, lapack_int n, lapack_int nrhs,\n                           const float* d, const float* e, float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_dpttrs( int matrix_order, lapack_int n, lapack_int nrhs,\n                           const double* d, const double* e, double* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_cpttrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* d,\n                           const lapack_complex_float* e,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpttrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* d,\n                           const lapack_complex_double* e,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_ssbev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int kd, float* ab, lapack_int ldab, float* w,\n                          float* z, lapack_int ldz );\nlapack_int LAPACKE_dsbev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int kd, double* ab, lapack_int ldab, double* w,\n                          double* z, lapack_int ldz );\n\nlapack_int LAPACKE_ssbevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int kd, float* ab, lapack_int ldab, float* w,\n                           float* z, lapack_int ldz );\nlapack_int LAPACKE_dsbevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int kd, double* ab, lapack_int ldab,\n                           double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_ssbevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int kd, float* ab,\n                           lapack_int ldab, float* q, lapack_int ldq, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_dsbevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int kd, double* ab,\n                           lapack_int ldab, double* q, lapack_int ldq,\n                           double vl, double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* ifail );\n\nlapack_int LAPACKE_ssbgst( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb, float* ab,\n                           lapack_int ldab, const float* bb, lapack_int ldbb,\n                           float* x, lapack_int ldx );\nlapack_int LAPACKE_dsbgst( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb, double* ab,\n                           lapack_int ldab, const double* bb, lapack_int ldbb,\n                           double* x, lapack_int ldx );\n\nlapack_int LAPACKE_ssbgv( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int ka, lapack_int kb, float* ab,\n                          lapack_int ldab, float* bb, lapack_int ldbb, float* w,\n                          float* z, lapack_int ldz );\nlapack_int LAPACKE_dsbgv( int matrix_order, char jobz, char uplo, lapack_int n,\n                          lapack_int ka, lapack_int kb, double* ab,\n                          lapack_int ldab, double* bb, lapack_int ldbb,\n                          double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_ssbgvd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb, float* ab,\n                           lapack_int ldab, float* bb, lapack_int ldbb,\n                           float* w, float* z, lapack_int ldz );\nlapack_int LAPACKE_dsbgvd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           lapack_int ka, lapack_int kb, double* ab,\n                           lapack_int ldab, double* bb, lapack_int ldbb,\n                           double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_ssbgvx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int ka, lapack_int kb,\n                           float* ab, lapack_int ldab, float* bb,\n                           lapack_int ldbb, float* q, lapack_int ldq, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_dsbgvx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, lapack_int ka, lapack_int kb,\n                           double* ab, lapack_int ldab, double* bb,\n                           lapack_int ldbb, double* q, lapack_int ldq,\n                           double vl, double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* ifail );\n\nlapack_int LAPACKE_ssbtrd( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int kd, float* ab, lapack_int ldab, float* d,\n                           float* e, float* q, lapack_int ldq );\nlapack_int LAPACKE_dsbtrd( int matrix_order, char vect, char uplo, lapack_int n,\n                           lapack_int kd, double* ab, lapack_int ldab,\n                           double* d, double* e, double* q, lapack_int ldq );\n\nlapack_int LAPACKE_ssfrk( int matrix_order, char transr, char uplo, char trans,\n                          lapack_int n, lapack_int k, float alpha,\n                          const float* a, lapack_int lda, float beta,\n                          float* c );\nlapack_int LAPACKE_dsfrk( int matrix_order, char transr, char uplo, char trans,\n                          lapack_int n, lapack_int k, double alpha,\n                          const double* a, lapack_int lda, double beta,\n                          double* c );\n\nlapack_int LAPACKE_sspcon( int matrix_order, char uplo, lapack_int n,\n                           const float* ap, const lapack_int* ipiv, float anorm,\n                           float* rcond );\nlapack_int LAPACKE_dspcon( int matrix_order, char uplo, lapack_int n,\n                           const double* ap, const lapack_int* ipiv,\n                           double anorm, double* rcond );\nlapack_int LAPACKE_cspcon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* ap,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_zspcon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* ap,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_sspev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          float* ap, float* w, float* z, lapack_int ldz );\nlapack_int LAPACKE_dspev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          double* ap, double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sspevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           float* ap, float* w, float* z, lapack_int ldz );\nlapack_int LAPACKE_dspevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           double* ap, double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sspevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, float* ap, float vl, float vu,\n                           lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_dspevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, double* ap, double vl, double vu,\n                           lapack_int il, lapack_int iu, double abstol,\n                           lapack_int* m, double* w, double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_sspgst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, float* ap, const float* bp );\nlapack_int LAPACKE_dspgst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, double* ap, const double* bp );\n\nlapack_int LAPACKE_sspgv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, float* ap, float* bp,\n                          float* w, float* z, lapack_int ldz );\nlapack_int LAPACKE_dspgv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, double* ap, double* bp,\n                          double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sspgvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, float* ap, float* bp,\n                           float* w, float* z, lapack_int ldz );\nlapack_int LAPACKE_dspgvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, double* ap, double* bp,\n                           double* w, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sspgvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n, float* ap,\n                           float* bp, float vl, float vu, lapack_int il,\n                           lapack_int iu, float abstol, lapack_int* m, float* w,\n                           float* z, lapack_int ldz, lapack_int* ifail );\nlapack_int LAPACKE_dspgvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n, double* ap,\n                           double* bp, double vl, double vu, lapack_int il,\n                           lapack_int iu, double abstol, lapack_int* m,\n                           double* w, double* z, lapack_int ldz,\n                           lapack_int* ifail );\n\nlapack_int LAPACKE_ssprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* ap, const float* afp,\n                           const lapack_int* ipiv, const float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_dsprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* ap, const double* afp,\n                           const lapack_int* ipiv, const double* b,\n                           lapack_int ldb, double* x, lapack_int ldx,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_csprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           const lapack_complex_float* afp,\n                           const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zsprfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           const lapack_complex_double* afp,\n                           const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_sspsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, float* ap, lapack_int* ipiv,\n                          float* b, lapack_int ldb );\nlapack_int LAPACKE_dspsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, double* ap, lapack_int* ipiv,\n                          double* b, lapack_int ldb );\nlapack_int LAPACKE_cspsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_float* ap,\n                          lapack_int* ipiv, lapack_complex_float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_zspsv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_double* ap,\n                          lapack_int* ipiv, lapack_complex_double* b,\n                          lapack_int ldb );\n\nlapack_int LAPACKE_sspsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* ap, float* afp,\n                           lapack_int* ipiv, const float* b, lapack_int ldb,\n                           float* x, lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dspsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* ap, double* afp,\n                           lapack_int* ipiv, const double* b, lapack_int ldb,\n                           double* x, lapack_int ldx, double* rcond,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_cspsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           lapack_complex_float* afp, lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zspsvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           lapack_complex_double* afp, lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_ssptrd( int matrix_order, char uplo, lapack_int n, float* ap,\n                           float* d, float* e, float* tau );\nlapack_int LAPACKE_dsptrd( int matrix_order, char uplo, lapack_int n,\n                           double* ap, double* d, double* e, double* tau );\n\nlapack_int LAPACKE_ssptrf( int matrix_order, char uplo, lapack_int n, float* ap,\n                           lapack_int* ipiv );\nlapack_int LAPACKE_dsptrf( int matrix_order, char uplo, lapack_int n,\n                           double* ap, lapack_int* ipiv );\nlapack_int LAPACKE_csptrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap, lapack_int* ipiv );\nlapack_int LAPACKE_zsptrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap, lapack_int* ipiv );\n\nlapack_int LAPACKE_ssptri( int matrix_order, char uplo, lapack_int n, float* ap,\n                           const lapack_int* ipiv );\nlapack_int LAPACKE_dsptri( int matrix_order, char uplo, lapack_int n,\n                           double* ap, const lapack_int* ipiv );\nlapack_int LAPACKE_csptri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* ap, const lapack_int* ipiv );\nlapack_int LAPACKE_zsptri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* ap, const lapack_int* ipiv );\n\nlapack_int LAPACKE_ssptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* ap,\n                           const lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_dsptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* ap,\n                           const lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_csptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* ap,\n                           const lapack_int* ipiv, lapack_complex_float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_zsptrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* ap,\n                           const lapack_int* ipiv, lapack_complex_double* b,\n                           lapack_int ldb );\n\nlapack_int LAPACKE_sstebz( char range, char order, lapack_int n, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           const float* d, const float* e, lapack_int* m,\n                           lapack_int* nsplit, float* w, lapack_int* iblock,\n                           lapack_int* isplit );\nlapack_int LAPACKE_dstebz( char range, char order, lapack_int n, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, const double* d, const double* e,\n                           lapack_int* m, lapack_int* nsplit, double* w,\n                           lapack_int* iblock, lapack_int* isplit );\n\nlapack_int LAPACKE_sstedc( int matrix_order, char compz, lapack_int n, float* d,\n                           float* e, float* z, lapack_int ldz );\nlapack_int LAPACKE_dstedc( int matrix_order, char compz, lapack_int n,\n                           double* d, double* e, double* z, lapack_int ldz );\nlapack_int LAPACKE_cstedc( int matrix_order, char compz, lapack_int n, float* d,\n                           float* e, lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zstedc( int matrix_order, char compz, lapack_int n,\n                           double* d, double* e, lapack_complex_double* z,\n                           lapack_int ldz );\n\nlapack_int LAPACKE_sstegr( int matrix_order, char jobz, char range,\n                           lapack_int n, float* d, float* e, float vl, float vu,\n                           lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* isuppz );\nlapack_int LAPACKE_dstegr( int matrix_order, char jobz, char range,\n                           lapack_int n, double* d, double* e, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* isuppz );\nlapack_int LAPACKE_cstegr( int matrix_order, char jobz, char range,\n                           lapack_int n, float* d, float* e, float vl, float vu,\n                           lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, lapack_complex_float* z,\n                           lapack_int ldz, lapack_int* isuppz );\nlapack_int LAPACKE_zstegr( int matrix_order, char jobz, char range,\n                           lapack_int n, double* d, double* e, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* isuppz );\n\nlapack_int LAPACKE_sstein( int matrix_order, lapack_int n, const float* d,\n                           const float* e, lapack_int m, const float* w,\n                           const lapack_int* iblock, const lapack_int* isplit,\n                           float* z, lapack_int ldz, lapack_int* ifailv );\nlapack_int LAPACKE_dstein( int matrix_order, lapack_int n, const double* d,\n                           const double* e, lapack_int m, const double* w,\n                           const lapack_int* iblock, const lapack_int* isplit,\n                           double* z, lapack_int ldz, lapack_int* ifailv );\nlapack_int LAPACKE_cstein( int matrix_order, lapack_int n, const float* d,\n                           const float* e, lapack_int m, const float* w,\n                           const lapack_int* iblock, const lapack_int* isplit,\n                           lapack_complex_float* z, lapack_int ldz,\n                           lapack_int* ifailv );\nlapack_int LAPACKE_zstein( int matrix_order, lapack_int n, const double* d,\n                           const double* e, lapack_int m, const double* w,\n                           const lapack_int* iblock, const lapack_int* isplit,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* ifailv );\n\nlapack_int LAPACKE_sstemr( int matrix_order, char jobz, char range,\n                           lapack_int n, float* d, float* e, float vl, float vu,\n                           lapack_int il, lapack_int iu, lapack_int* m,\n                           float* w, float* z, lapack_int ldz, lapack_int nzc,\n                           lapack_int* isuppz, lapack_logical* tryrac );\nlapack_int LAPACKE_dstemr( int matrix_order, char jobz, char range,\n                           lapack_int n, double* d, double* e, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           lapack_int* m, double* w, double* z, lapack_int ldz,\n                           lapack_int nzc, lapack_int* isuppz,\n                           lapack_logical* tryrac );\nlapack_int LAPACKE_cstemr( int matrix_order, char jobz, char range,\n                           lapack_int n, float* d, float* e, float vl, float vu,\n                           lapack_int il, lapack_int iu, lapack_int* m,\n                           float* w, lapack_complex_float* z, lapack_int ldz,\n                           lapack_int nzc, lapack_int* isuppz,\n                           lapack_logical* tryrac );\nlapack_int LAPACKE_zstemr( int matrix_order, char jobz, char range,\n                           lapack_int n, double* d, double* e, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           lapack_int* m, double* w, lapack_complex_double* z,\n                           lapack_int ldz, lapack_int nzc, lapack_int* isuppz,\n                           lapack_logical* tryrac );\n\nlapack_int LAPACKE_ssteqr( int matrix_order, char compz, lapack_int n, float* d,\n                           float* e, float* z, lapack_int ldz );\nlapack_int LAPACKE_dsteqr( int matrix_order, char compz, lapack_int n,\n                           double* d, double* e, double* z, lapack_int ldz );\nlapack_int LAPACKE_csteqr( int matrix_order, char compz, lapack_int n, float* d,\n                           float* e, lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zsteqr( int matrix_order, char compz, lapack_int n,\n                           double* d, double* e, lapack_complex_double* z,\n                           lapack_int ldz );\n\nlapack_int LAPACKE_ssterf( lapack_int n, float* d, float* e );\nlapack_int LAPACKE_dsterf( lapack_int n, double* d, double* e );\n\nlapack_int LAPACKE_sstev( int matrix_order, char jobz, lapack_int n, float* d,\n                          float* e, float* z, lapack_int ldz );\nlapack_int LAPACKE_dstev( int matrix_order, char jobz, lapack_int n, double* d,\n                          double* e, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sstevd( int matrix_order, char jobz, lapack_int n, float* d,\n                           float* e, float* z, lapack_int ldz );\nlapack_int LAPACKE_dstevd( int matrix_order, char jobz, lapack_int n, double* d,\n                           double* e, double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sstevr( int matrix_order, char jobz, char range,\n                           lapack_int n, float* d, float* e, float vl, float vu,\n                           lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* isuppz );\nlapack_int LAPACKE_dstevr( int matrix_order, char jobz, char range,\n                           lapack_int n, double* d, double* e, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* isuppz );\n\nlapack_int LAPACKE_sstevx( int matrix_order, char jobz, char range,\n                           lapack_int n, float* d, float* e, float vl, float vu,\n                           lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_dstevx( int matrix_order, char jobz, char range,\n                           lapack_int n, double* d, double* e, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* ifail );\n\nlapack_int LAPACKE_ssycon( int matrix_order, char uplo, lapack_int n,\n                           const float* a, lapack_int lda,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_dsycon( int matrix_order, char uplo, lapack_int n,\n                           const double* a, lapack_int lda,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\nlapack_int LAPACKE_csycon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_int* ipiv, float anorm, float* rcond );\nlapack_int LAPACKE_zsycon( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_int* ipiv, double anorm,\n                           double* rcond );\n\nlapack_int LAPACKE_ssyequb( int matrix_order, char uplo, lapack_int n,\n                            const float* a, lapack_int lda, float* s,\n                            float* scond, float* amax );\nlapack_int LAPACKE_dsyequb( int matrix_order, char uplo, lapack_int n,\n                            const double* a, lapack_int lda, double* s,\n                            double* scond, double* amax );\nlapack_int LAPACKE_csyequb( int matrix_order, char uplo, lapack_int n,\n                            const lapack_complex_float* a, lapack_int lda,\n                            float* s, float* scond, float* amax );\nlapack_int LAPACKE_zsyequb( int matrix_order, char uplo, lapack_int n,\n                            const lapack_complex_double* a, lapack_int lda,\n                            double* s, double* scond, double* amax );\n\nlapack_int LAPACKE_ssyev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          float* a, lapack_int lda, float* w );\nlapack_int LAPACKE_dsyev( int matrix_order, char jobz, char uplo, lapack_int n,\n                          double* a, lapack_int lda, double* w );\n\nlapack_int LAPACKE_ssyevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           float* a, lapack_int lda, float* w );\nlapack_int LAPACKE_dsyevd( int matrix_order, char jobz, char uplo, lapack_int n,\n                           double* a, lapack_int lda, double* w );\n\nlapack_int LAPACKE_ssyevr( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, float* a, lapack_int lda, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* isuppz );\nlapack_int LAPACKE_dsyevr( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, double* a, lapack_int lda, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* isuppz );\n\nlapack_int LAPACKE_ssyevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, float* a, lapack_int lda, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_dsyevx( int matrix_order, char jobz, char range, char uplo,\n                           lapack_int n, double* a, lapack_int lda, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* ifail );\n\nlapack_int LAPACKE_ssygst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, float* a, lapack_int lda,\n                           const float* b, lapack_int ldb );\nlapack_int LAPACKE_dsygst( int matrix_order, lapack_int itype, char uplo,\n                           lapack_int n, double* a, lapack_int lda,\n                           const double* b, lapack_int ldb );\n\nlapack_int LAPACKE_ssygv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, float* a, lapack_int lda,\n                          float* b, lapack_int ldb, float* w );\nlapack_int LAPACKE_dsygv( int matrix_order, lapack_int itype, char jobz,\n                          char uplo, lapack_int n, double* a, lapack_int lda,\n                          double* b, lapack_int ldb, double* w );\n\nlapack_int LAPACKE_ssygvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, float* a, lapack_int lda,\n                           float* b, lapack_int ldb, float* w );\nlapack_int LAPACKE_dsygvd( int matrix_order, lapack_int itype, char jobz,\n                           char uplo, lapack_int n, double* a, lapack_int lda,\n                           double* b, lapack_int ldb, double* w );\n\nlapack_int LAPACKE_ssygvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n, float* a,\n                           lapack_int lda, float* b, lapack_int ldb, float vl,\n                           float vu, lapack_int il, lapack_int iu, float abstol,\n                           lapack_int* m, float* w, float* z, lapack_int ldz,\n                           lapack_int* ifail );\nlapack_int LAPACKE_dsygvx( int matrix_order, lapack_int itype, char jobz,\n                           char range, char uplo, lapack_int n, double* a,\n                           lapack_int lda, double* b, lapack_int ldb, double vl,\n                           double vu, lapack_int il, lapack_int iu,\n                           double abstol, lapack_int* m, double* w, double* z,\n                           lapack_int ldz, lapack_int* ifail );\n\nlapack_int LAPACKE_ssyrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           const float* af, lapack_int ldaf,\n                           const lapack_int* ipiv, const float* b,\n                           lapack_int ldb, float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_dsyrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           const double* af, lapack_int ldaf,\n                           const lapack_int* ipiv, const double* b,\n                           lapack_int ldb, double* x, lapack_int ldx,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_csyrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* af,\n                           lapack_int ldaf, const lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_zsyrfs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* af,\n                           lapack_int ldaf, const lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_ssyrfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs, const float* a,\n                            lapack_int lda, const float* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const float* s,\n                            const float* b, lapack_int ldb, float* x,\n                            lapack_int ldx, float* rcond, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dsyrfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs, const double* a,\n                            lapack_int lda, const double* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const double* s,\n                            const double* b, lapack_int ldb, double* x,\n                            lapack_int ldx, double* rcond, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\nlapack_int LAPACKE_csyrfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_float* a, lapack_int lda,\n                            const lapack_complex_float* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const float* s,\n                            const lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* x, lapack_int ldx,\n                            float* rcond, float* berr, lapack_int n_err_bnds,\n                            float* err_bnds_norm, float* err_bnds_comp,\n                            lapack_int nparams, float* params );\nlapack_int LAPACKE_zsyrfsx( int matrix_order, char uplo, char equed,\n                            lapack_int n, lapack_int nrhs,\n                            const lapack_complex_double* a, lapack_int lda,\n                            const lapack_complex_double* af, lapack_int ldaf,\n                            const lapack_int* ipiv, const double* s,\n                            const lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* x, lapack_int ldx,\n                            double* rcond, double* berr, lapack_int n_err_bnds,\n                            double* err_bnds_norm, double* err_bnds_comp,\n                            lapack_int nparams, double* params );\n\nlapack_int LAPACKE_ssysv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, float* a, lapack_int lda,\n                          lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_dsysv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, double* a, lapack_int lda,\n                          lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_csysv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_float* a,\n                          lapack_int lda, lapack_int* ipiv,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zsysv( int matrix_order, char uplo, lapack_int n,\n                          lapack_int nrhs, lapack_complex_double* a,\n                          lapack_int lda, lapack_int* ipiv,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_ssysvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           float* af, lapack_int ldaf, lapack_int* ipiv,\n                           const float* b, lapack_int ldb, float* x,\n                           lapack_int ldx, float* rcond, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dsysvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           double* af, lapack_int ldaf, lapack_int* ipiv,\n                           const double* b, lapack_int ldb, double* x,\n                           lapack_int ldx, double* rcond, double* ferr,\n                           double* berr );\nlapack_int LAPACKE_csysvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* af,\n                           lapack_int ldaf, lapack_int* ipiv,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* x, lapack_int ldx,\n                           float* rcond, float* ferr, float* berr );\nlapack_int LAPACKE_zsysvx( int matrix_order, char fact, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* af,\n                           lapack_int ldaf, lapack_int* ipiv,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* x, lapack_int ldx,\n                           double* rcond, double* ferr, double* berr );\n\nlapack_int LAPACKE_ssysvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs, float* a,\n                            lapack_int lda, float* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, float* s, float* b,\n                            lapack_int ldb, float* x, lapack_int ldx,\n                            float* rcond, float* rpvgrw, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_dsysvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs, double* a,\n                            lapack_int lda, double* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, double* s, double* b,\n                            lapack_int ldb, double* x, lapack_int ldx,\n                            double* rcond, double* rpvgrw, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\nlapack_int LAPACKE_csysvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, float* s,\n                            lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* x, lapack_int ldx,\n                            float* rcond, float* rpvgrw, float* berr,\n                            lapack_int n_err_bnds, float* err_bnds_norm,\n                            float* err_bnds_comp, lapack_int nparams,\n                            float* params );\nlapack_int LAPACKE_zsysvxx( int matrix_order, char fact, char uplo,\n                            lapack_int n, lapack_int nrhs,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* af, lapack_int ldaf,\n                            lapack_int* ipiv, char* equed, double* s,\n                            lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* x, lapack_int ldx,\n                            double* rcond, double* rpvgrw, double* berr,\n                            lapack_int n_err_bnds, double* err_bnds_norm,\n                            double* err_bnds_comp, lapack_int nparams,\n                            double* params );\n\nlapack_int LAPACKE_ssytrd( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda, float* d, float* e, float* tau );\nlapack_int LAPACKE_dsytrd( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda, double* d, double* e, double* tau );\n\nlapack_int LAPACKE_ssytrf( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_dsytrf( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_csytrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_int* ipiv );\nlapack_int LAPACKE_zsytrf( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_int* ipiv );\n\nlapack_int LAPACKE_ssytri( int matrix_order, char uplo, lapack_int n, float* a,\n                           lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_dsytri( int matrix_order, char uplo, lapack_int n, double* a,\n                           lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_csytri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           const lapack_int* ipiv );\nlapack_int LAPACKE_zsytri( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           const lapack_int* ipiv );\n\nlapack_int LAPACKE_ssytrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const float* a, lapack_int lda,\n                           const lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_dsytrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const double* a, lapack_int lda,\n                           const lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_csytrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_int* ipiv,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zsytrs( int matrix_order, char uplo, lapack_int n,\n                           lapack_int nrhs, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_int* ipiv,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stbcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, lapack_int kd, const float* ab,\n                           lapack_int ldab, float* rcond );\nlapack_int LAPACKE_dtbcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, lapack_int kd, const double* ab,\n                           lapack_int ldab, double* rcond );\nlapack_int LAPACKE_ctbcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, lapack_int kd,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           float* rcond );\nlapack_int LAPACKE_ztbcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, lapack_int kd,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           double* rcond );\n\nlapack_int LAPACKE_stbrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const float* ab, lapack_int ldab, const float* b,\n                           lapack_int ldb, const float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_dtbrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const double* ab, lapack_int ldab, const double* b,\n                           lapack_int ldb, const double* x, lapack_int ldx,\n                           double* ferr, double* berr );\nlapack_int LAPACKE_ctbrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           const lapack_complex_float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_ztbrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           const lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_stbtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const float* ab, lapack_int ldab, float* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_dtbtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const double* ab, lapack_int ldab, double* b,\n                           lapack_int ldb );\nlapack_int LAPACKE_ctbtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_float* ab, lapack_int ldab,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztbtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int kd, lapack_int nrhs,\n                           const lapack_complex_double* ab, lapack_int ldab,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stfsm( int matrix_order, char transr, char side, char uplo,\n                          char trans, char diag, lapack_int m, lapack_int n,\n                          float alpha, const float* a, float* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_dtfsm( int matrix_order, char transr, char side, char uplo,\n                          char trans, char diag, lapack_int m, lapack_int n,\n                          double alpha, const double* a, double* b,\n                          lapack_int ldb );\nlapack_int LAPACKE_ctfsm( int matrix_order, char transr, char side, char uplo,\n                          char trans, char diag, lapack_int m, lapack_int n,\n                          lapack_complex_float alpha,\n                          const lapack_complex_float* a,\n                          lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztfsm( int matrix_order, char transr, char side, char uplo,\n                          char trans, char diag, lapack_int m, lapack_int n,\n                          lapack_complex_double alpha,\n                          const lapack_complex_double* a,\n                          lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stftri( int matrix_order, char transr, char uplo, char diag,\n                           lapack_int n, float* a );\nlapack_int LAPACKE_dtftri( int matrix_order, char transr, char uplo, char diag,\n                           lapack_int n, double* a );\nlapack_int LAPACKE_ctftri( int matrix_order, char transr, char uplo, char diag,\n                           lapack_int n, lapack_complex_float* a );\nlapack_int LAPACKE_ztftri( int matrix_order, char transr, char uplo, char diag,\n                           lapack_int n, lapack_complex_double* a );\n\nlapack_int LAPACKE_stfttp( int matrix_order, char transr, char uplo,\n                           lapack_int n, const float* arf, float* ap );\nlapack_int LAPACKE_dtfttp( int matrix_order, char transr, char uplo,\n                           lapack_int n, const double* arf, double* ap );\nlapack_int LAPACKE_ctfttp( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_float* arf,\n                           lapack_complex_float* ap );\nlapack_int LAPACKE_ztfttp( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_double* arf,\n                           lapack_complex_double* ap );\n\nlapack_int LAPACKE_stfttr( int matrix_order, char transr, char uplo,\n                           lapack_int n, const float* arf, float* a,\n                           lapack_int lda );\nlapack_int LAPACKE_dtfttr( int matrix_order, char transr, char uplo,\n                           lapack_int n, const double* arf, double* a,\n                           lapack_int lda );\nlapack_int LAPACKE_ctfttr( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_float* arf,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_ztfttr( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_double* arf,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_stgevc( int matrix_order, char side, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const float* s, lapack_int lds, const float* p,\n                           lapack_int ldp, float* vl, lapack_int ldvl,\n                           float* vr, lapack_int ldvr, lapack_int mm,\n                           lapack_int* m );\nlapack_int LAPACKE_dtgevc( int matrix_order, char side, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const double* s, lapack_int lds, const double* p,\n                           lapack_int ldp, double* vl, lapack_int ldvl,\n                           double* vr, lapack_int ldvr, lapack_int mm,\n                           lapack_int* m );\nlapack_int LAPACKE_ctgevc( int matrix_order, char side, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_float* s, lapack_int lds,\n                           const lapack_complex_float* p, lapack_int ldp,\n                           lapack_complex_float* vl, lapack_int ldvl,\n                           lapack_complex_float* vr, lapack_int ldvr,\n                           lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_ztgevc( int matrix_order, char side, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_double* s, lapack_int lds,\n                           const lapack_complex_double* p, lapack_int ldp,\n                           lapack_complex_double* vl, lapack_int ldvl,\n                           lapack_complex_double* vr, lapack_int ldvr,\n                           lapack_int mm, lapack_int* m );\n\nlapack_int LAPACKE_stgexc( int matrix_order, lapack_logical wantq,\n                           lapack_logical wantz, lapack_int n, float* a,\n                           lapack_int lda, float* b, lapack_int ldb, float* q,\n                           lapack_int ldq, float* z, lapack_int ldz,\n                           lapack_int* ifst, lapack_int* ilst );\nlapack_int LAPACKE_dtgexc( int matrix_order, lapack_logical wantq,\n                           lapack_logical wantz, lapack_int n, double* a,\n                           lapack_int lda, double* b, lapack_int ldb, double* q,\n                           lapack_int ldq, double* z, lapack_int ldz,\n                           lapack_int* ifst, lapack_int* ilst );\nlapack_int LAPACKE_ctgexc( int matrix_order, lapack_logical wantq,\n                           lapack_logical wantz, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* q, lapack_int ldq,\n                           lapack_complex_float* z, lapack_int ldz,\n                           lapack_int ifst, lapack_int ilst );\nlapack_int LAPACKE_ztgexc( int matrix_order, lapack_logical wantq,\n                           lapack_logical wantz, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int ifst, lapack_int ilst );\n\nlapack_int LAPACKE_stgsen( int matrix_order, lapack_int ijob,\n                           lapack_logical wantq, lapack_logical wantz,\n                           const lapack_logical* select, lapack_int n, float* a,\n                           lapack_int lda, float* b, lapack_int ldb,\n                           float* alphar, float* alphai, float* beta, float* q,\n                           lapack_int ldq, float* z, lapack_int ldz,\n                           lapack_int* m, float* pl, float* pr, float* dif );\nlapack_int LAPACKE_dtgsen( int matrix_order, lapack_int ijob,\n                           lapack_logical wantq, lapack_logical wantz,\n                           const lapack_logical* select, lapack_int n,\n                           double* a, lapack_int lda, double* b, lapack_int ldb,\n                           double* alphar, double* alphai, double* beta,\n                           double* q, lapack_int ldq, double* z, lapack_int ldz,\n                           lapack_int* m, double* pl, double* pr, double* dif );\nlapack_int LAPACKE_ctgsen( int matrix_order, lapack_int ijob,\n                           lapack_logical wantq, lapack_logical wantz,\n                           const lapack_logical* select, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* alpha,\n                           lapack_complex_float* beta, lapack_complex_float* q,\n                           lapack_int ldq, lapack_complex_float* z,\n                           lapack_int ldz, lapack_int* m, float* pl, float* pr,\n                           float* dif );\nlapack_int LAPACKE_ztgsen( int matrix_order, lapack_int ijob,\n                           lapack_logical wantq, lapack_logical wantz,\n                           const lapack_logical* select, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* alpha,\n                           lapack_complex_double* beta,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_complex_double* z, lapack_int ldz,\n                           lapack_int* m, double* pl, double* pr, double* dif );\n\nlapack_int LAPACKE_stgsja( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n,\n                           lapack_int k, lapack_int l, float* a, lapack_int lda,\n                           float* b, lapack_int ldb, float tola, float tolb,\n                           float* alpha, float* beta, float* u, lapack_int ldu,\n                           float* v, lapack_int ldv, float* q, lapack_int ldq,\n                           lapack_int* ncycle );\nlapack_int LAPACKE_dtgsja( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n,\n                           lapack_int k, lapack_int l, double* a,\n                           lapack_int lda, double* b, lapack_int ldb,\n                           double tola, double tolb, double* alpha,\n                           double* beta, double* u, lapack_int ldu, double* v,\n                           lapack_int ldv, double* q, lapack_int ldq,\n                           lapack_int* ncycle );\nlapack_int LAPACKE_ctgsja( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n,\n                           lapack_int k, lapack_int l, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* b,\n                           lapack_int ldb, float tola, float tolb, float* alpha,\n                           float* beta, lapack_complex_float* u, lapack_int ldu,\n                           lapack_complex_float* v, lapack_int ldv,\n                           lapack_complex_float* q, lapack_int ldq,\n                           lapack_int* ncycle );\nlapack_int LAPACKE_ztgsja( int matrix_order, char jobu, char jobv, char jobq,\n                           lapack_int m, lapack_int p, lapack_int n,\n                           lapack_int k, lapack_int l, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* b,\n                           lapack_int ldb, double tola, double tolb,\n                           double* alpha, double* beta,\n                           lapack_complex_double* u, lapack_int ldu,\n                           lapack_complex_double* v, lapack_int ldv,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_int* ncycle );\n\nlapack_int LAPACKE_stgsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const float* a, lapack_int lda, const float* b,\n                           lapack_int ldb, const float* vl, lapack_int ldvl,\n                           const float* vr, lapack_int ldvr, float* s,\n                           float* dif, lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_dtgsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const double* a, lapack_int lda, const double* b,\n                           lapack_int ldb, const double* vl, lapack_int ldvl,\n                           const double* vr, lapack_int ldvr, double* s,\n                           double* dif, lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_ctgsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           const lapack_complex_float* vl, lapack_int ldvl,\n                           const lapack_complex_float* vr, lapack_int ldvr,\n                           float* s, float* dif, lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_ztgsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           const lapack_complex_double* vl, lapack_int ldvl,\n                           const lapack_complex_double* vr, lapack_int ldvr,\n                           double* s, double* dif, lapack_int mm,\n                           lapack_int* m );\n\nlapack_int LAPACKE_stgsyl( int matrix_order, char trans, lapack_int ijob,\n                           lapack_int m, lapack_int n, const float* a,\n                           lapack_int lda, const float* b, lapack_int ldb,\n                           float* c, lapack_int ldc, const float* d,\n                           lapack_int ldd, const float* e, lapack_int lde,\n                           float* f, lapack_int ldf, float* scale, float* dif );\nlapack_int LAPACKE_dtgsyl( int matrix_order, char trans, lapack_int ijob,\n                           lapack_int m, lapack_int n, const double* a,\n                           lapack_int lda, const double* b, lapack_int ldb,\n                           double* c, lapack_int ldc, const double* d,\n                           lapack_int ldd, const double* e, lapack_int lde,\n                           double* f, lapack_int ldf, double* scale,\n                           double* dif );\nlapack_int LAPACKE_ctgsyl( int matrix_order, char trans, lapack_int ijob,\n                           lapack_int m, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* c, lapack_int ldc,\n                           const lapack_complex_float* d, lapack_int ldd,\n                           const lapack_complex_float* e, lapack_int lde,\n                           lapack_complex_float* f, lapack_int ldf,\n                           float* scale, float* dif );\nlapack_int LAPACKE_ztgsyl( int matrix_order, char trans, lapack_int ijob,\n                           lapack_int m, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* c, lapack_int ldc,\n                           const lapack_complex_double* d, lapack_int ldd,\n                           const lapack_complex_double* e, lapack_int lde,\n                           lapack_complex_double* f, lapack_int ldf,\n                           double* scale, double* dif );\n\nlapack_int LAPACKE_stpcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const float* ap, float* rcond );\nlapack_int LAPACKE_dtpcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const double* ap, double* rcond );\nlapack_int LAPACKE_ctpcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const lapack_complex_float* ap,\n                           float* rcond );\nlapack_int LAPACKE_ztpcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const lapack_complex_double* ap,\n                           double* rcond );\n\nlapack_int LAPACKE_stprfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const float* ap,\n                           const float* b, lapack_int ldb, const float* x,\n                           lapack_int ldx, float* ferr, float* berr );\nlapack_int LAPACKE_dtprfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const double* ap,\n                           const double* b, lapack_int ldb, const double* x,\n                           lapack_int ldx, double* ferr, double* berr );\nlapack_int LAPACKE_ctprfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_float* ap,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           const lapack_complex_float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_ztprfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_double* ap,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           const lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_stptri( int matrix_order, char uplo, char diag, lapack_int n,\n                           float* ap );\nlapack_int LAPACKE_dtptri( int matrix_order, char uplo, char diag, lapack_int n,\n                           double* ap );\nlapack_int LAPACKE_ctptri( int matrix_order, char uplo, char diag, lapack_int n,\n                           lapack_complex_float* ap );\nlapack_int LAPACKE_ztptri( int matrix_order, char uplo, char diag, lapack_int n,\n                           lapack_complex_double* ap );\n\nlapack_int LAPACKE_stptrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const float* ap,\n                           float* b, lapack_int ldb );\nlapack_int LAPACKE_dtptrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const double* ap,\n                           double* b, lapack_int ldb );\nlapack_int LAPACKE_ctptrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_float* ap,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztptrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_double* ap,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stpttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const float* ap, float* arf );\nlapack_int LAPACKE_dtpttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const double* ap, double* arf );\nlapack_int LAPACKE_ctpttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_float* ap,\n                           lapack_complex_float* arf );\nlapack_int LAPACKE_ztpttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_double* ap,\n                           lapack_complex_double* arf );\n\nlapack_int LAPACKE_stpttr( int matrix_order, char uplo, lapack_int n,\n                           const float* ap, float* a, lapack_int lda );\nlapack_int LAPACKE_dtpttr( int matrix_order, char uplo, lapack_int n,\n                           const double* ap, double* a, lapack_int lda );\nlapack_int LAPACKE_ctpttr( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* ap,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_ztpttr( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* ap,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_strcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const float* a, lapack_int lda,\n                           float* rcond );\nlapack_int LAPACKE_dtrcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const double* a, lapack_int lda,\n                           double* rcond );\nlapack_int LAPACKE_ctrcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const lapack_complex_float* a,\n                           lapack_int lda, float* rcond );\nlapack_int LAPACKE_ztrcon( int matrix_order, char norm, char uplo, char diag,\n                           lapack_int n, const lapack_complex_double* a,\n                           lapack_int lda, double* rcond );\n\nlapack_int LAPACKE_strevc( int matrix_order, char side, char howmny,\n                           lapack_logical* select, lapack_int n, const float* t,\n                           lapack_int ldt, float* vl, lapack_int ldvl,\n                           float* vr, lapack_int ldvr, lapack_int mm,\n                           lapack_int* m );\nlapack_int LAPACKE_dtrevc( int matrix_order, char side, char howmny,\n                           lapack_logical* select, lapack_int n,\n                           const double* t, lapack_int ldt, double* vl,\n                           lapack_int ldvl, double* vr, lapack_int ldvr,\n                           lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_ctrevc( int matrix_order, char side, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           lapack_complex_float* t, lapack_int ldt,\n                           lapack_complex_float* vl, lapack_int ldvl,\n                           lapack_complex_float* vr, lapack_int ldvr,\n                           lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_ztrevc( int matrix_order, char side, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           lapack_complex_double* t, lapack_int ldt,\n                           lapack_complex_double* vl, lapack_int ldvl,\n                           lapack_complex_double* vr, lapack_int ldvr,\n                           lapack_int mm, lapack_int* m );\n\nlapack_int LAPACKE_strexc( int matrix_order, char compq, lapack_int n, float* t,\n                           lapack_int ldt, float* q, lapack_int ldq,\n                           lapack_int* ifst, lapack_int* ilst );\nlapack_int LAPACKE_dtrexc( int matrix_order, char compq, lapack_int n,\n                           double* t, lapack_int ldt, double* q, lapack_int ldq,\n                           lapack_int* ifst, lapack_int* ilst );\nlapack_int LAPACKE_ctrexc( int matrix_order, char compq, lapack_int n,\n                           lapack_complex_float* t, lapack_int ldt,\n                           lapack_complex_float* q, lapack_int ldq,\n                           lapack_int ifst, lapack_int ilst );\nlapack_int LAPACKE_ztrexc( int matrix_order, char compq, lapack_int n,\n                           lapack_complex_double* t, lapack_int ldt,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_int ifst, lapack_int ilst );\n\nlapack_int LAPACKE_strrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const float* a,\n                           lapack_int lda, const float* b, lapack_int ldb,\n                           const float* x, lapack_int ldx, float* ferr,\n                           float* berr );\nlapack_int LAPACKE_dtrrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const double* a,\n                           lapack_int lda, const double* b, lapack_int ldb,\n                           const double* x, lapack_int ldx, double* ferr,\n                           double* berr );\nlapack_int LAPACKE_ctrrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           const lapack_complex_float* x, lapack_int ldx,\n                           float* ferr, float* berr );\nlapack_int LAPACKE_ztrrfs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           const lapack_complex_double* x, lapack_int ldx,\n                           double* ferr, double* berr );\n\nlapack_int LAPACKE_strsen( int matrix_order, char job, char compq,\n                           const lapack_logical* select, lapack_int n, float* t,\n                           lapack_int ldt, float* q, lapack_int ldq, float* wr,\n                           float* wi, lapack_int* m, float* s, float* sep );\nlapack_int LAPACKE_dtrsen( int matrix_order, char job, char compq,\n                           const lapack_logical* select, lapack_int n,\n                           double* t, lapack_int ldt, double* q, lapack_int ldq,\n                           double* wr, double* wi, lapack_int* m, double* s,\n                           double* sep );\nlapack_int LAPACKE_ctrsen( int matrix_order, char job, char compq,\n                           const lapack_logical* select, lapack_int n,\n                           lapack_complex_float* t, lapack_int ldt,\n                           lapack_complex_float* q, lapack_int ldq,\n                           lapack_complex_float* w, lapack_int* m, float* s,\n                           float* sep );\nlapack_int LAPACKE_ztrsen( int matrix_order, char job, char compq,\n                           const lapack_logical* select, lapack_int n,\n                           lapack_complex_double* t, lapack_int ldt,\n                           lapack_complex_double* q, lapack_int ldq,\n                           lapack_complex_double* w, lapack_int* m, double* s,\n                           double* sep );\n\nlapack_int LAPACKE_strsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const float* t, lapack_int ldt, const float* vl,\n                           lapack_int ldvl, const float* vr, lapack_int ldvr,\n                           float* s, float* sep, lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_dtrsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const double* t, lapack_int ldt, const double* vl,\n                           lapack_int ldvl, const double* vr, lapack_int ldvr,\n                           double* s, double* sep, lapack_int mm,\n                           lapack_int* m );\nlapack_int LAPACKE_ctrsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_float* t, lapack_int ldt,\n                           const lapack_complex_float* vl, lapack_int ldvl,\n                           const lapack_complex_float* vr, lapack_int ldvr,\n                           float* s, float* sep, lapack_int mm, lapack_int* m );\nlapack_int LAPACKE_ztrsna( int matrix_order, char job, char howmny,\n                           const lapack_logical* select, lapack_int n,\n                           const lapack_complex_double* t, lapack_int ldt,\n                           const lapack_complex_double* vl, lapack_int ldvl,\n                           const lapack_complex_double* vr, lapack_int ldvr,\n                           double* s, double* sep, lapack_int mm,\n                           lapack_int* m );\n\nlapack_int LAPACKE_strsyl( int matrix_order, char trana, char tranb,\n                           lapack_int isgn, lapack_int m, lapack_int n,\n                           const float* a, lapack_int lda, const float* b,\n                           lapack_int ldb, float* c, lapack_int ldc,\n                           float* scale );\nlapack_int LAPACKE_dtrsyl( int matrix_order, char trana, char tranb,\n                           lapack_int isgn, lapack_int m, lapack_int n,\n                           const double* a, lapack_int lda, const double* b,\n                           lapack_int ldb, double* c, lapack_int ldc,\n                           double* scale );\nlapack_int LAPACKE_ctrsyl( int matrix_order, char trana, char tranb,\n                           lapack_int isgn, lapack_int m, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* b, lapack_int ldb,\n                           lapack_complex_float* c, lapack_int ldc,\n                           float* scale );\nlapack_int LAPACKE_ztrsyl( int matrix_order, char trana, char tranb,\n                           lapack_int isgn, lapack_int m, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* c, lapack_int ldc,\n                           double* scale );\n\nlapack_int LAPACKE_strtri( int matrix_order, char uplo, char diag, lapack_int n,\n                           float* a, lapack_int lda );\nlapack_int LAPACKE_dtrtri( int matrix_order, char uplo, char diag, lapack_int n,\n                           double* a, lapack_int lda );\nlapack_int LAPACKE_ctrtri( int matrix_order, char uplo, char diag, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_ztrtri( int matrix_order, char uplo, char diag, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_strtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const float* a,\n                           lapack_int lda, float* b, lapack_int ldb );\nlapack_int LAPACKE_dtrtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs, const double* a,\n                           lapack_int lda, double* b, lapack_int ldb );\nlapack_int LAPACKE_ctrtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztrtrs( int matrix_order, char uplo, char trans, char diag,\n                           lapack_int n, lapack_int nrhs,\n                           const lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_strttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const float* a, lapack_int lda,\n                           float* arf );\nlapack_int LAPACKE_dtrttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const double* a, lapack_int lda,\n                           double* arf );\nlapack_int LAPACKE_ctrttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* arf );\nlapack_int LAPACKE_ztrttf( int matrix_order, char transr, char uplo,\n                           lapack_int n, const lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* arf );\n\nlapack_int LAPACKE_strttp( int matrix_order, char uplo, lapack_int n,\n                           const float* a, lapack_int lda, float* ap );\nlapack_int LAPACKE_dtrttp( int matrix_order, char uplo, lapack_int n,\n                           const double* a, lapack_int lda, double* ap );\nlapack_int LAPACKE_ctrttp( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* ap );\nlapack_int LAPACKE_ztrttp( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* ap );\n\nlapack_int LAPACKE_stzrzf( int matrix_order, lapack_int m, lapack_int n,\n                           float* a, lapack_int lda, float* tau );\nlapack_int LAPACKE_dtzrzf( int matrix_order, lapack_int m, lapack_int n,\n                           double* a, lapack_int lda, double* tau );\nlapack_int LAPACKE_ctzrzf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* tau );\nlapack_int LAPACKE_ztzrzf( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* tau );\n\nlapack_int LAPACKE_cungbr( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int k, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau );\nlapack_int LAPACKE_zungbr( int matrix_order, char vect, lapack_int m,\n                           lapack_int n, lapack_int k, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cunghr( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau );\nlapack_int LAPACKE_zunghr( int matrix_order, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cunglq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau );\nlapack_int LAPACKE_zunglq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cungql( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau );\nlapack_int LAPACKE_zungql( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cungqr( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau );\nlapack_int LAPACKE_zungqr( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cungrq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau );\nlapack_int LAPACKE_zungrq( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cungtr( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau );\nlapack_int LAPACKE_zungtr( int matrix_order, char uplo, lapack_int n,\n                           lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau );\n\nlapack_int LAPACKE_cunmbr( int matrix_order, char vect, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmbr( int matrix_order, char vect, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmhr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmhr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int ilo,\n                           lapack_int ihi, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmlq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmlq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmql( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmql( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmqr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmqr( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmrq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmrq( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmrz( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           lapack_int l, const lapack_complex_float* a,\n                           lapack_int lda, const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmrz( int matrix_order, char side, char trans,\n                           lapack_int m, lapack_int n, lapack_int k,\n                           lapack_int l, const lapack_complex_double* a,\n                           lapack_int lda, const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cunmtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n,\n                           const lapack_complex_float* a, lapack_int lda,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zunmtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n,\n                           const lapack_complex_double* a, lapack_int lda,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_cupgtr( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_float* ap,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* q, lapack_int ldq );\nlapack_int LAPACKE_zupgtr( int matrix_order, char uplo, lapack_int n,\n                           const lapack_complex_double* ap,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* q, lapack_int ldq );\n\nlapack_int LAPACKE_cupmtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n,\n                           const lapack_complex_float* ap,\n                           const lapack_complex_float* tau,\n                           lapack_complex_float* c, lapack_int ldc );\nlapack_int LAPACKE_zupmtr( int matrix_order, char side, char uplo, char trans,\n                           lapack_int m, lapack_int n,\n                           const lapack_complex_double* ap,\n                           const lapack_complex_double* tau,\n                           lapack_complex_double* c, lapack_int ldc );\n\nlapack_int LAPACKE_sbdsdc_work( int matrix_order, char uplo, char compq,\n                                lapack_int n, float* d, float* e, float* u,\n                                lapack_int ldu, float* vt, lapack_int ldvt,\n                                float* q, lapack_int* iq, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dbdsdc_work( int matrix_order, char uplo, char compq,\n                                lapack_int n, double* d, double* e, double* u,\n                                lapack_int ldu, double* vt, lapack_int ldvt,\n                                double* q, lapack_int* iq, double* work,\n                                lapack_int* iwork );\n\nlapack_int LAPACKE_sbdsqr_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                                float* d, float* e, float* vt, lapack_int ldvt,\n                                float* u, lapack_int ldu, float* c,\n                                lapack_int ldc, float* work );\nlapack_int LAPACKE_dbdsqr_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                                double* d, double* e, double* vt,\n                                lapack_int ldvt, double* u, lapack_int ldu,\n                                double* c, lapack_int ldc, double* work );\nlapack_int LAPACKE_cbdsqr_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                                float* d, float* e, lapack_complex_float* vt,\n                                lapack_int ldvt, lapack_complex_float* u,\n                                lapack_int ldu, lapack_complex_float* c,\n                                lapack_int ldc, float* work );\nlapack_int LAPACKE_zbdsqr_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int ncvt, lapack_int nru, lapack_int ncc,\n                                double* d, double* e, lapack_complex_double* vt,\n                                lapack_int ldvt, lapack_complex_double* u,\n                                lapack_int ldu, lapack_complex_double* c,\n                                lapack_int ldc, double* work );\n\nlapack_int LAPACKE_sdisna_work( char job, lapack_int m, lapack_int n,\n                                const float* d, float* sep );\nlapack_int LAPACKE_ddisna_work( char job, lapack_int m, lapack_int n,\n                                const double* d, double* sep );\n\nlapack_int LAPACKE_sgbbrd_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int ncc, lapack_int kl,\n                                lapack_int ku, float* ab, lapack_int ldab,\n                                float* d, float* e, float* q, lapack_int ldq,\n                                float* pt, lapack_int ldpt, float* c,\n                                lapack_int ldc, float* work );\nlapack_int LAPACKE_dgbbrd_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int ncc, lapack_int kl,\n                                lapack_int ku, double* ab, lapack_int ldab,\n                                double* d, double* e, double* q, lapack_int ldq,\n                                double* pt, lapack_int ldpt, double* c,\n                                lapack_int ldc, double* work );\nlapack_int LAPACKE_cgbbrd_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int ncc, lapack_int kl,\n                                lapack_int ku, lapack_complex_float* ab,\n                                lapack_int ldab, float* d, float* e,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* pt, lapack_int ldpt,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgbbrd_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int ncc, lapack_int kl,\n                                lapack_int ku, lapack_complex_double* ab,\n                                lapack_int ldab, double* d, double* e,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* pt, lapack_int ldpt,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgbcon_work( int matrix_order, char norm, lapack_int n,\n                                lapack_int kl, lapack_int ku, const float* ab,\n                                lapack_int ldab, const lapack_int* ipiv,\n                                float anorm, float* rcond, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgbcon_work( int matrix_order, char norm, lapack_int n,\n                                lapack_int kl, lapack_int ku, const double* ab,\n                                lapack_int ldab, const lapack_int* ipiv,\n                                double anorm, double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cgbcon_work( int matrix_order, char norm, lapack_int n,\n                                lapack_int kl, lapack_int ku,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zgbcon_work( int matrix_order, char norm, lapack_int n,\n                                lapack_int kl, lapack_int ku,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, const lapack_int* ipiv,\n                                double anorm, double* rcond,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgbequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, const float* ab,\n                                lapack_int ldab, float* r, float* c,\n                                float* rowcnd, float* colcnd, float* amax );\nlapack_int LAPACKE_dgbequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, const double* ab,\n                                lapack_int ldab, double* r, double* c,\n                                double* rowcnd, double* colcnd, double* amax );\nlapack_int LAPACKE_cgbequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                float* r, float* c, float* rowcnd,\n                                float* colcnd, float* amax );\nlapack_int LAPACKE_zgbequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, double* r, double* c,\n                                double* rowcnd, double* colcnd, double* amax );\n\nlapack_int LAPACKE_sgbequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_int kl, lapack_int ku, const float* ab,\n                                 lapack_int ldab, float* r, float* c,\n                                 float* rowcnd, float* colcnd, float* amax );\nlapack_int LAPACKE_dgbequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_int kl, lapack_int ku, const double* ab,\n                                 lapack_int ldab, double* r, double* c,\n                                 double* rowcnd, double* colcnd, double* amax );\nlapack_int LAPACKE_cgbequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_int kl, lapack_int ku,\n                                 const lapack_complex_float* ab,\n                                 lapack_int ldab, float* r, float* c,\n                                 float* rowcnd, float* colcnd, float* amax );\nlapack_int LAPACKE_zgbequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_int kl, lapack_int ku,\n                                 const lapack_complex_double* ab,\n                                 lapack_int ldab, double* r, double* c,\n                                 double* rowcnd, double* colcnd, double* amax );\n\nlapack_int LAPACKE_sgbrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const float* ab, lapack_int ldab,\n                                const float* afb, lapack_int ldafb,\n                                const lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgbrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const double* ab, lapack_int ldab,\n                                const double* afb, lapack_int ldafb,\n                                const lapack_int* ipiv, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* ferr, double* berr, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cgbrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                const lapack_complex_float* afb,\n                                lapack_int ldafb, const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgbrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab,\n                                const lapack_complex_double* afb,\n                                lapack_int ldafb, const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgbrfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs, const float* ab,\n                                 lapack_int ldab, const float* afb,\n                                 lapack_int ldafb, const lapack_int* ipiv,\n                                 const float* r, const float* c, const float* b,\n                                 lapack_int ldb, float* x, lapack_int ldx,\n                                 float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dgbrfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs, const double* ab,\n                                 lapack_int ldab, const double* afb,\n                                 lapack_int ldafb, const lapack_int* ipiv,\n                                 const double* r, const double* c,\n                                 const double* b, lapack_int ldb, double* x,\n                                 lapack_int ldx, double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_cgbrfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs,\n                                 const lapack_complex_float* ab,\n                                 lapack_int ldab,\n                                 const lapack_complex_float* afb,\n                                 lapack_int ldafb, const lapack_int* ipiv,\n                                 const float* r, const float* c,\n                                 const lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* x, lapack_int ldx,\n                                 float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zgbrfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs,\n                                 const lapack_complex_double* ab,\n                                 lapack_int ldab,\n                                 const lapack_complex_double* afb,\n                                 lapack_int ldafb, const lapack_int* ipiv,\n                                 const double* r, const double* c,\n                                 const lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_sgbsv_work( int matrix_order, lapack_int n, lapack_int kl,\n                               lapack_int ku, lapack_int nrhs, float* ab,\n                               lapack_int ldab, lapack_int* ipiv, float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_dgbsv_work( int matrix_order, lapack_int n, lapack_int kl,\n                               lapack_int ku, lapack_int nrhs, double* ab,\n                               lapack_int ldab, lapack_int* ipiv, double* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_cgbsv_work( int matrix_order, lapack_int n, lapack_int kl,\n                               lapack_int ku, lapack_int nrhs,\n                               lapack_complex_float* ab, lapack_int ldab,\n                               lapack_int* ipiv, lapack_complex_float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_zgbsv_work( int matrix_order, lapack_int n, lapack_int kl,\n                               lapack_int ku, lapack_int nrhs,\n                               lapack_complex_double* ab, lapack_int ldab,\n                               lapack_int* ipiv, lapack_complex_double* b,\n                               lapack_int ldb );\n\nlapack_int LAPACKE_sgbsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int kl, lapack_int ku,\n                                lapack_int nrhs, float* ab, lapack_int ldab,\n                                float* afb, lapack_int ldafb, lapack_int* ipiv,\n                                char* equed, float* r, float* c, float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dgbsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int kl, lapack_int ku,\n                                lapack_int nrhs, double* ab, lapack_int ldab,\n                                double* afb, lapack_int ldafb, lapack_int* ipiv,\n                                char* equed, double* r, double* c, double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cgbsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int kl, lapack_int ku,\n                                lapack_int nrhs, lapack_complex_float* ab,\n                                lapack_int ldab, lapack_complex_float* afb,\n                                lapack_int ldafb, lapack_int* ipiv, char* equed,\n                                float* r, float* c, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zgbsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int kl, lapack_int ku,\n                                lapack_int nrhs, lapack_complex_double* ab,\n                                lapack_int ldab, lapack_complex_double* afb,\n                                lapack_int ldafb, lapack_int* ipiv, char* equed,\n                                double* r, double* c, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* x,\n                                lapack_int ldx, double* rcond, double* ferr,\n                                double* berr, lapack_complex_double* work,\n                                double* rwork );\n\nlapack_int LAPACKE_sgbsvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs, float* ab, lapack_int ldab,\n                                 float* afb, lapack_int ldafb, lapack_int* ipiv,\n                                 char* equed, float* r, float* c, float* b,\n                                 lapack_int ldb, float* x, lapack_int ldx,\n                                 float* rcond, float* rpvgrw, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dgbsvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs, double* ab, lapack_int ldab,\n                                 double* afb, lapack_int ldafb,\n                                 lapack_int* ipiv, char* equed, double* r,\n                                 double* c, double* b, lapack_int ldb,\n                                 double* x, lapack_int ldx, double* rcond,\n                                 double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_cgbsvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs, lapack_complex_float* ab,\n                                 lapack_int ldab, lapack_complex_float* afb,\n                                 lapack_int ldafb, lapack_int* ipiv,\n                                 char* equed, float* r, float* c,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* x, lapack_int ldx,\n                                 float* rcond, float* rpvgrw, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zgbsvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int kl, lapack_int ku,\n                                 lapack_int nrhs, lapack_complex_double* ab,\n                                 lapack_int ldab, lapack_complex_double* afb,\n                                 lapack_int ldafb, lapack_int* ipiv,\n                                 char* equed, double* r, double* c,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_sgbtrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, float* ab,\n                                lapack_int ldab, lapack_int* ipiv );\nlapack_int LAPACKE_dgbtrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, double* ab,\n                                lapack_int ldab, lapack_int* ipiv );\nlapack_int LAPACKE_cgbtrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                lapack_int* ipiv );\nlapack_int LAPACKE_zgbtrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                lapack_int* ipiv );\n\nlapack_int LAPACKE_sgbtrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const float* ab, lapack_int ldab,\n                                const lapack_int* ipiv, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dgbtrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const double* ab, lapack_int ldab,\n                                const lapack_int* ipiv, double* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_cgbtrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                const lapack_int* ipiv, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zgbtrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int kl, lapack_int ku, lapack_int nrhs,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sgebak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const float* scale, lapack_int m, float* v,\n                                lapack_int ldv );\nlapack_int LAPACKE_dgebak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const double* scale, lapack_int m, double* v,\n                                lapack_int ldv );\nlapack_int LAPACKE_cgebak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const float* scale, lapack_int m,\n                                lapack_complex_float* v, lapack_int ldv );\nlapack_int LAPACKE_zgebak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const double* scale, lapack_int m,\n                                lapack_complex_double* v, lapack_int ldv );\n\nlapack_int LAPACKE_sgebal_work( int matrix_order, char job, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* ilo,\n                                lapack_int* ihi, float* scale );\nlapack_int LAPACKE_dgebal_work( int matrix_order, char job, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* ilo,\n                                lapack_int* ihi, double* scale );\nlapack_int LAPACKE_cgebal_work( int matrix_order, char job, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* ilo, lapack_int* ihi,\n                                float* scale );\nlapack_int LAPACKE_zgebal_work( int matrix_order, char job, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* ilo, lapack_int* ihi,\n                                double* scale );\n\nlapack_int LAPACKE_sgebrd_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* d, float* e,\n                                float* tauq, float* taup, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dgebrd_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* d, double* e,\n                                double* tauq, double* taup, double* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_cgebrd_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                float* d, float* e, lapack_complex_float* tauq,\n                                lapack_complex_float* taup,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgebrd_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                double* d, double* e,\n                                lapack_complex_double* tauq,\n                                lapack_complex_double* taup,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgecon_work( int matrix_order, char norm, lapack_int n,\n                                const float* a, lapack_int lda, float anorm,\n                                float* rcond, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dgecon_work( int matrix_order, char norm, lapack_int n,\n                                const double* a, lapack_int lda, double anorm,\n                                double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cgecon_work( int matrix_order, char norm, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                float anorm, float* rcond,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgecon_work( int matrix_order, char norm, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                double anorm, double* rcond,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgeequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                const float* a, lapack_int lda, float* r,\n                                float* c, float* rowcnd, float* colcnd,\n                                float* amax );\nlapack_int LAPACKE_dgeequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                const double* a, lapack_int lda, double* r,\n                                double* c, double* rowcnd, double* colcnd,\n                                double* amax );\nlapack_int LAPACKE_cgeequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                float* r, float* c, float* rowcnd,\n                                float* colcnd, float* amax );\nlapack_int LAPACKE_zgeequ_work( int matrix_order, lapack_int m, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                double* r, double* c, double* rowcnd,\n                                double* colcnd, double* amax );\n\nlapack_int LAPACKE_sgeequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 const float* a, lapack_int lda, float* r,\n                                 float* c, float* rowcnd, float* colcnd,\n                                 float* amax );\nlapack_int LAPACKE_dgeequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 const double* a, lapack_int lda, double* r,\n                                 double* c, double* rowcnd, double* colcnd,\n                                 double* amax );\nlapack_int LAPACKE_cgeequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 float* r, float* c, float* rowcnd,\n                                 float* colcnd, float* amax );\nlapack_int LAPACKE_zgeequb_work( int matrix_order, lapack_int m, lapack_int n,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 double* r, double* c, double* rowcnd,\n                                 double* colcnd, double* amax );\n\nlapack_int LAPACKE_sgees_work( int matrix_order, char jobvs, char sort,\n                               LAPACK_S_SELECT2 select, lapack_int n, float* a,\n                               lapack_int lda, lapack_int* sdim, float* wr,\n                               float* wi, float* vs, lapack_int ldvs,\n                               float* work, lapack_int lwork,\n                               lapack_logical* bwork );\nlapack_int LAPACKE_dgees_work( int matrix_order, char jobvs, char sort,\n                               LAPACK_D_SELECT2 select, lapack_int n, double* a,\n                               lapack_int lda, lapack_int* sdim, double* wr,\n                               double* wi, double* vs, lapack_int ldvs,\n                               double* work, lapack_int lwork,\n                               lapack_logical* bwork );\nlapack_int LAPACKE_cgees_work( int matrix_order, char jobvs, char sort,\n                               LAPACK_C_SELECT1 select, lapack_int n,\n                               lapack_complex_float* a, lapack_int lda,\n                               lapack_int* sdim, lapack_complex_float* w,\n                               lapack_complex_float* vs, lapack_int ldvs,\n                               lapack_complex_float* work, lapack_int lwork,\n                               float* rwork, lapack_logical* bwork );\nlapack_int LAPACKE_zgees_work( int matrix_order, char jobvs, char sort,\n                               LAPACK_Z_SELECT1 select, lapack_int n,\n                               lapack_complex_double* a, lapack_int lda,\n                               lapack_int* sdim, lapack_complex_double* w,\n                               lapack_complex_double* vs, lapack_int ldvs,\n                               lapack_complex_double* work, lapack_int lwork,\n                               double* rwork, lapack_logical* bwork );\n\nlapack_int LAPACKE_sgeesx_work( int matrix_order, char jobvs, char sort,\n                                LAPACK_S_SELECT2 select, char sense,\n                                lapack_int n, float* a, lapack_int lda,\n                                lapack_int* sdim, float* wr, float* wi,\n                                float* vs, lapack_int ldvs, float* rconde,\n                                float* rcondv, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork,\n                                lapack_logical* bwork );\nlapack_int LAPACKE_dgeesx_work( int matrix_order, char jobvs, char sort,\n                                LAPACK_D_SELECT2 select, char sense,\n                                lapack_int n, double* a, lapack_int lda,\n                                lapack_int* sdim, double* wr, double* wi,\n                                double* vs, lapack_int ldvs, double* rconde,\n                                double* rcondv, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork,\n                                lapack_logical* bwork );\nlapack_int LAPACKE_cgeesx_work( int matrix_order, char jobvs, char sort,\n                                LAPACK_C_SELECT1 select, char sense,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, lapack_int* sdim,\n                                lapack_complex_float* w,\n                                lapack_complex_float* vs, lapack_int ldvs,\n                                float* rconde, float* rcondv,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_logical* bwork );\nlapack_int LAPACKE_zgeesx_work( int matrix_order, char jobvs, char sort,\n                                LAPACK_Z_SELECT1 select, char sense,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, lapack_int* sdim,\n                                lapack_complex_double* w,\n                                lapack_complex_double* vs, lapack_int ldvs,\n                                double* rconde, double* rcondv,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_logical* bwork );\n\nlapack_int LAPACKE_sgeev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, float* a, lapack_int lda,\n                               float* wr, float* wi, float* vl, lapack_int ldvl,\n                               float* vr, lapack_int ldvr, float* work,\n                               lapack_int lwork );\nlapack_int LAPACKE_dgeev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, double* a, lapack_int lda,\n                               double* wr, double* wi, double* vl,\n                               lapack_int ldvl, double* vr, lapack_int ldvr,\n                               double* work, lapack_int lwork );\nlapack_int LAPACKE_cgeev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, lapack_complex_float* a,\n                               lapack_int lda, lapack_complex_float* w,\n                               lapack_complex_float* vl, lapack_int ldvl,\n                               lapack_complex_float* vr, lapack_int ldvr,\n                               lapack_complex_float* work, lapack_int lwork,\n                               float* rwork );\nlapack_int LAPACKE_zgeev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, lapack_complex_double* a,\n                               lapack_int lda, lapack_complex_double* w,\n                               lapack_complex_double* vl, lapack_int ldvl,\n                               lapack_complex_double* vr, lapack_int ldvr,\n                               lapack_complex_double* work, lapack_int lwork,\n                               double* rwork );\n\nlapack_int LAPACKE_sgeevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n, float* a,\n                                lapack_int lda, float* wr, float* wi, float* vl,\n                                lapack_int ldvl, float* vr, lapack_int ldvr,\n                                lapack_int* ilo, lapack_int* ihi, float* scale,\n                                float* abnrm, float* rconde, float* rcondv,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgeevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n, double* a,\n                                lapack_int lda, double* wr, double* wi,\n                                double* vl, lapack_int ldvl, double* vr,\n                                lapack_int ldvr, lapack_int* ilo,\n                                lapack_int* ihi, double* scale, double* abnrm,\n                                double* rconde, double* rcondv, double* work,\n                                lapack_int lwork, lapack_int* iwork );\nlapack_int LAPACKE_cgeevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* w,\n                                lapack_complex_float* vl, lapack_int ldvl,\n                                lapack_complex_float* vr, lapack_int ldvr,\n                                lapack_int* ilo, lapack_int* ihi, float* scale,\n                                float* abnrm, float* rconde, float* rcondv,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork );\nlapack_int LAPACKE_zgeevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* w,\n                                lapack_complex_double* vl, lapack_int ldvl,\n                                lapack_complex_double* vr, lapack_int ldvr,\n                                lapack_int* ilo, lapack_int* ihi, double* scale,\n                                double* abnrm, double* rconde, double* rcondv,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork );\n\nlapack_int LAPACKE_sgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, float* a, lapack_int lda,\n                                float* tau, float* work, lapack_int lwork );\nlapack_int LAPACKE_dgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, double* a, lapack_int lda,\n                                double* tau, double* work, lapack_int lwork );\nlapack_int LAPACKE_cgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgejsv_work( int matrix_order, char joba, char jobu,\n                                char jobv, char jobr, char jobt, char jobp,\n                                lapack_int m, lapack_int n, float* a,\n                                lapack_int lda, float* sva, float* u,\n                                lapack_int ldu, float* v, lapack_int ldv,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgejsv_work( int matrix_order, char joba, char jobu,\n                                char jobv, char jobr, char jobt, char jobp,\n                                lapack_int m, lapack_int n, double* a,\n                                lapack_int lda, double* sva, double* u,\n                                lapack_int ldu, double* v, lapack_int ldv,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork );\n\nlapack_int LAPACKE_sgelq2_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work );\nlapack_int LAPACKE_dgelq2_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work );\nlapack_int LAPACKE_cgelq2_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zgelq2_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_sgelqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dgelqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_cgelqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgelqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgels_work( int matrix_order, char trans, lapack_int m,\n                               lapack_int n, lapack_int nrhs, float* a,\n                               lapack_int lda, float* b, lapack_int ldb,\n                               float* work, lapack_int lwork );\nlapack_int LAPACKE_dgels_work( int matrix_order, char trans, lapack_int m,\n                               lapack_int n, lapack_int nrhs, double* a,\n                               lapack_int lda, double* b, lapack_int ldb,\n                               double* work, lapack_int lwork );\nlapack_int LAPACKE_cgels_work( int matrix_order, char trans, lapack_int m,\n                               lapack_int n, lapack_int nrhs,\n                               lapack_complex_float* a, lapack_int lda,\n                               lapack_complex_float* b, lapack_int ldb,\n                               lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgels_work( int matrix_order, char trans, lapack_int m,\n                               lapack_int n, lapack_int nrhs,\n                               lapack_complex_double* a, lapack_int lda,\n                               lapack_complex_double* b, lapack_int ldb,\n                               lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgelsd_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, float* s, float rcond,\n                                lapack_int* rank, float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgelsd_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, double* s,\n                                double rcond, lapack_int* rank, double* work,\n                                lapack_int lwork, lapack_int* iwork );\nlapack_int LAPACKE_cgelsd_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, float* s, float rcond,\n                                lapack_int* rank, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_zgelsd_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, double* s, double rcond,\n                                lapack_int* rank, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int* iwork );\n\nlapack_int LAPACKE_sgelss_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, float* s, float rcond,\n                                lapack_int* rank, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dgelss_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, double* s,\n                                double rcond, lapack_int* rank, double* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_cgelss_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, float* s, float rcond,\n                                lapack_int* rank, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork );\nlapack_int LAPACKE_zgelss_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, double* s, double rcond,\n                                lapack_int* rank, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork );\n\nlapack_int LAPACKE_sgelsy_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, lapack_int* jpvt,\n                                float rcond, lapack_int* rank, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dgelsy_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, lapack_int* jpvt,\n                                double rcond, lapack_int* rank, double* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_cgelsy_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, lapack_int* jpvt, float rcond,\n                                lapack_int* rank, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork );\nlapack_int LAPACKE_zgelsy_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nrhs, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, lapack_int* jpvt, double rcond,\n                                lapack_int* rank, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork );\n\nlapack_int LAPACKE_sgeqlf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dgeqlf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_cgeqlf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgeqlf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgeqp3_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* jpvt,\n                                float* tau, float* work, lapack_int lwork );\nlapack_int LAPACKE_dgeqp3_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* jpvt,\n                                double* tau, double* work, lapack_int lwork );\nlapack_int LAPACKE_cgeqp3_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* jpvt, lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork );\nlapack_int LAPACKE_zgeqp3_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* jpvt, lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork );\n\nlapack_int LAPACKE_sgeqpf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* jpvt,\n                                float* tau, float* work );\nlapack_int LAPACKE_dgeqpf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* jpvt,\n                                double* tau, double* work );\nlapack_int LAPACKE_cgeqpf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* jpvt, lapack_complex_float* tau,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgeqpf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* jpvt, lapack_complex_double* tau,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgeqr2_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work );\nlapack_int LAPACKE_dgeqr2_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work );\nlapack_int LAPACKE_cgeqr2_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zgeqr2_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_sgeqrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dgeqrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_cgeqrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgeqrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,\n                                 float* a, lapack_int lda, float* tau,\n                                 float* work, lapack_int lwork );\nlapack_int LAPACKE_dgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,\n                                 double* a, lapack_int lda, double* tau,\n                                 double* work, lapack_int lwork );\nlapack_int LAPACKE_cgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* tau,\n                                 lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* tau,\n                                 lapack_complex_double* work,\n                                 lapack_int lwork );\n\nlapack_int LAPACKE_sgerfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const float* a, lapack_int lda,\n                                const float* af, lapack_int ldaf,\n                                const lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgerfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const double* a,\n                                lapack_int lda, const double* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cgerfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgerfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_complex_double* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgerfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int nrhs, const float* a,\n                                 lapack_int lda, const float* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const float* r, const float* c, const float* b,\n                                 lapack_int ldb, float* x, lapack_int ldx,\n                                 float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dgerfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int nrhs, const double* a,\n                                 lapack_int lda, const double* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const double* r, const double* c,\n                                 const double* b, lapack_int ldb, double* x,\n                                 lapack_int ldx, double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_cgerfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 const lapack_complex_float* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const float* r, const float* c,\n                                 const lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* x, lapack_int ldx,\n                                 float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zgerfsx_work( int matrix_order, char trans, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 const lapack_complex_double* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const double* r, const double* c,\n                                 const lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_sgerqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dgerqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_cgerqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgerqf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgesdd_work( int matrix_order, char jobz, lapack_int m,\n                                lapack_int n, float* a, lapack_int lda,\n                                float* s, float* u, lapack_int ldu, float* vt,\n                                lapack_int ldvt, float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgesdd_work( int matrix_order, char jobz, lapack_int m,\n                                lapack_int n, double* a, lapack_int lda,\n                                double* s, double* u, lapack_int ldu,\n                                double* vt, lapack_int ldvt, double* work,\n                                lapack_int lwork, lapack_int* iwork );\nlapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, float* s,\n                                lapack_complex_float* u, lapack_int ldu,\n                                lapack_complex_float* vt, lapack_int ldvt,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int* iwork );\nlapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, double* s,\n                                lapack_complex_double* u, lapack_int ldu,\n                                lapack_complex_double* vt, lapack_int ldvt,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int* iwork );\n\nlapack_int LAPACKE_sgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               float* a, lapack_int lda, lapack_int* ipiv,\n                               float* b, lapack_int ldb );\nlapack_int LAPACKE_dgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               double* a, lapack_int lda, lapack_int* ipiv,\n                               double* b, lapack_int ldb );\nlapack_int LAPACKE_cgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               lapack_complex_float* a, lapack_int lda,\n                               lapack_int* ipiv, lapack_complex_float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_zgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               lapack_complex_double* a, lapack_int lda,\n                               lapack_int* ipiv, lapack_complex_double* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_dsgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                                double* a, lapack_int lda, lapack_int* ipiv,\n                                double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* work, float* swork,\n                                lapack_int* iter );\nlapack_int LAPACKE_zcgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* ipiv, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* x,\n                                lapack_int ldx, lapack_complex_double* work,\n                                lapack_complex_float* swork, double* rwork,\n                                lapack_int* iter );\n\nlapack_int LAPACKE_sgesvd_work( int matrix_order, char jobu, char jobvt,\n                                lapack_int m, lapack_int n, float* a,\n                                lapack_int lda, float* s, float* u,\n                                lapack_int ldu, float* vt, lapack_int ldvt,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dgesvd_work( int matrix_order, char jobu, char jobvt,\n                                lapack_int m, lapack_int n, double* a,\n                                lapack_int lda, double* s, double* u,\n                                lapack_int ldu, double* vt, lapack_int ldvt,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_cgesvd_work( int matrix_order, char jobu, char jobvt,\n                                lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                float* s, lapack_complex_float* u,\n                                lapack_int ldu, lapack_complex_float* vt,\n                                lapack_int ldvt, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork );\nlapack_int LAPACKE_zgesvd_work( int matrix_order, char jobu, char jobvt,\n                                lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                double* s, lapack_complex_double* u,\n                                lapack_int ldu, lapack_complex_double* vt,\n                                lapack_int ldvt, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork );\n\nlapack_int LAPACKE_sgesvj_work( int matrix_order, char joba, char jobu,\n                                char jobv, lapack_int m, lapack_int n, float* a,\n                                lapack_int lda, float* sva, lapack_int mv,\n                                float* v, lapack_int ldv, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dgesvj_work( int matrix_order, char joba, char jobu,\n                                char jobv, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* sva,\n                                lapack_int mv, double* v, lapack_int ldv,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgesvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs, float* a,\n                                lapack_int lda, float* af, lapack_int ldaf,\n                                lapack_int* ipiv, char* equed, float* r,\n                                float* c, float* b, lapack_int ldb, float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dgesvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs, double* a,\n                                lapack_int lda, double* af, lapack_int ldaf,\n                                lapack_int* ipiv, char* equed, double* r,\n                                double* c, double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* rcond, double* ferr,\n                                double* berr, double* work, lapack_int* iwork );\nlapack_int LAPACKE_cgesvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* af, lapack_int ldaf,\n                                lapack_int* ipiv, char* equed, float* r,\n                                float* c, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zgesvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* af, lapack_int ldaf,\n                                lapack_int* ipiv, char* equed, double* r,\n                                double* c, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* x,\n                                lapack_int ldx, double* rcond, double* ferr,\n                                double* berr, lapack_complex_double* work,\n                                double* rwork );\n\nlapack_int LAPACKE_sgesvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int nrhs, float* a,\n                                 lapack_int lda, float* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, float* r,\n                                 float* c, float* b, lapack_int ldb, float* x,\n                                 lapack_int ldx, float* rcond, float* rpvgrw,\n                                 float* berr, lapack_int n_err_bnds,\n                                 float* err_bnds_norm, float* err_bnds_comp,\n                                 lapack_int nparams, float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dgesvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int nrhs, double* a,\n                                 lapack_int lda, double* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, double* r,\n                                 double* c, double* b, lapack_int ldb,\n                                 double* x, lapack_int ldx, double* rcond,\n                                 double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_cgesvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, float* r,\n                                 float* c, lapack_complex_float* b,\n                                 lapack_int ldb, lapack_complex_float* x,\n                                 lapack_int ldx, float* rcond, float* rpvgrw,\n                                 float* berr, lapack_int n_err_bnds,\n                                 float* err_bnds_norm, float* err_bnds_comp,\n                                 lapack_int nparams, float* params,\n                                 lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgesvxx_work( int matrix_order, char fact, char trans,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, double* r,\n                                 double* c, lapack_complex_double* b,\n                                 lapack_int ldb, lapack_complex_double* x,\n                                 lapack_int ldx, double* rcond, double* rpvgrw,\n                                 double* berr, lapack_int n_err_bnds,\n                                 double* err_bnds_norm, double* err_bnds_comp,\n                                 lapack_int nparams, double* params,\n                                 lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgetf2_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_dgetf2_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_cgetf2_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* ipiv );\nlapack_int LAPACKE_zgetf2_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* ipiv );\n\nlapack_int LAPACKE_sgetrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_dgetrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* ipiv );\nlapack_int LAPACKE_cgetrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* ipiv );\nlapack_int LAPACKE_zgetrf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* ipiv );\n\nlapack_int LAPACKE_sgetri_work( int matrix_order, lapack_int n, float* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dgetri_work( int matrix_order, lapack_int n, double* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_cgetri_work( int matrix_order, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                const lapack_int* ipiv,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgetri_work( int matrix_order, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgetrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const float* a, lapack_int lda,\n                                const lapack_int* ipiv, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dgetrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const double* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                double* b, lapack_int ldb );\nlapack_int LAPACKE_cgetrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zgetrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sggbak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const float* lscale, const float* rscale,\n                                lapack_int m, float* v, lapack_int ldv );\nlapack_int LAPACKE_dggbak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const double* lscale, const double* rscale,\n                                lapack_int m, double* v, lapack_int ldv );\nlapack_int LAPACKE_cggbak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const float* lscale, const float* rscale,\n                                lapack_int m, lapack_complex_float* v,\n                                lapack_int ldv );\nlapack_int LAPACKE_zggbak_work( int matrix_order, char job, char side,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                const double* lscale, const double* rscale,\n                                lapack_int m, lapack_complex_double* v,\n                                lapack_int ldv );\n\nlapack_int LAPACKE_sggbal_work( int matrix_order, char job, lapack_int n,\n                                float* a, lapack_int lda, float* b,\n                                lapack_int ldb, lapack_int* ilo,\n                                lapack_int* ihi, float* lscale, float* rscale,\n                                float* work );\nlapack_int LAPACKE_dggbal_work( int matrix_order, char job, lapack_int n,\n                                double* a, lapack_int lda, double* b,\n                                lapack_int ldb, lapack_int* ilo,\n                                lapack_int* ihi, double* lscale, double* rscale,\n                                double* work );\nlapack_int LAPACKE_cggbal_work( int matrix_order, char job, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_int* ilo, lapack_int* ihi, float* lscale,\n                                float* rscale, float* work );\nlapack_int LAPACKE_zggbal_work( int matrix_order, char job, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_int* ilo, lapack_int* ihi,\n                                double* lscale, double* rscale, double* work );\n\nlapack_int LAPACKE_sgges_work( int matrix_order, char jobvsl, char jobvsr,\n                               char sort, LAPACK_S_SELECT3 selctg, lapack_int n,\n                               float* a, lapack_int lda, float* b,\n                               lapack_int ldb, lapack_int* sdim, float* alphar,\n                               float* alphai, float* beta, float* vsl,\n                               lapack_int ldvsl, float* vsr, lapack_int ldvsr,\n                               float* work, lapack_int lwork,\n                               lapack_logical* bwork );\nlapack_int LAPACKE_dgges_work( int matrix_order, char jobvsl, char jobvsr,\n                               char sort, LAPACK_D_SELECT3 selctg, lapack_int n,\n                               double* a, lapack_int lda, double* b,\n                               lapack_int ldb, lapack_int* sdim, double* alphar,\n                               double* alphai, double* beta, double* vsl,\n                               lapack_int ldvsl, double* vsr, lapack_int ldvsr,\n                               double* work, lapack_int lwork,\n                               lapack_logical* bwork );\nlapack_int LAPACKE_cgges_work( int matrix_order, char jobvsl, char jobvsr,\n                               char sort, LAPACK_C_SELECT2 selctg, lapack_int n,\n                               lapack_complex_float* a, lapack_int lda,\n                               lapack_complex_float* b, lapack_int ldb,\n                               lapack_int* sdim, lapack_complex_float* alpha,\n                               lapack_complex_float* beta,\n                               lapack_complex_float* vsl, lapack_int ldvsl,\n                               lapack_complex_float* vsr, lapack_int ldvsr,\n                               lapack_complex_float* work, lapack_int lwork,\n                               float* rwork, lapack_logical* bwork );\nlapack_int LAPACKE_zgges_work( int matrix_order, char jobvsl, char jobvsr,\n                               char sort, LAPACK_Z_SELECT2 selctg, lapack_int n,\n                               lapack_complex_double* a, lapack_int lda,\n                               lapack_complex_double* b, lapack_int ldb,\n                               lapack_int* sdim, lapack_complex_double* alpha,\n                               lapack_complex_double* beta,\n                               lapack_complex_double* vsl, lapack_int ldvsl,\n                               lapack_complex_double* vsr, lapack_int ldvsr,\n                               lapack_complex_double* work, lapack_int lwork,\n                               double* rwork, lapack_logical* bwork );\n\nlapack_int LAPACKE_sggesx_work( int matrix_order, char jobvsl, char jobvsr,\n                                char sort, LAPACK_S_SELECT3 selctg, char sense,\n                                lapack_int n, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, lapack_int* sdim,\n                                float* alphar, float* alphai, float* beta,\n                                float* vsl, lapack_int ldvsl, float* vsr,\n                                lapack_int ldvsr, float* rconde, float* rcondv,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork,\n                                lapack_logical* bwork );\nlapack_int LAPACKE_dggesx_work( int matrix_order, char jobvsl, char jobvsr,\n                                char sort, LAPACK_D_SELECT3 selctg, char sense,\n                                lapack_int n, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, lapack_int* sdim,\n                                double* alphar, double* alphai, double* beta,\n                                double* vsl, lapack_int ldvsl, double* vsr,\n                                lapack_int ldvsr, double* rconde,\n                                double* rcondv, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork,\n                                lapack_logical* bwork );\nlapack_int LAPACKE_cggesx_work( int matrix_order, char jobvsl, char jobvsr,\n                                char sort, LAPACK_C_SELECT2 selctg, char sense,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, lapack_int* sdim,\n                                lapack_complex_float* alpha,\n                                lapack_complex_float* beta,\n                                lapack_complex_float* vsl, lapack_int ldvsl,\n                                lapack_complex_float* vsr, lapack_int ldvsr,\n                                float* rconde, float* rcondv,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int* iwork,\n                                lapack_int liwork, lapack_logical* bwork );\nlapack_int LAPACKE_zggesx_work( int matrix_order, char jobvsl, char jobvsr,\n                                char sort, LAPACK_Z_SELECT2 selctg, char sense,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, lapack_int* sdim,\n                                lapack_complex_double* alpha,\n                                lapack_complex_double* beta,\n                                lapack_complex_double* vsl, lapack_int ldvsl,\n                                lapack_complex_double* vsr, lapack_int ldvsr,\n                                double* rconde, double* rcondv,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int* iwork,\n                                lapack_int liwork, lapack_logical* bwork );\n\nlapack_int LAPACKE_sggev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, float* a, lapack_int lda, float* b,\n                               lapack_int ldb, float* alphar, float* alphai,\n                               float* beta, float* vl, lapack_int ldvl,\n                               float* vr, lapack_int ldvr, float* work,\n                               lapack_int lwork );\nlapack_int LAPACKE_dggev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, double* a, lapack_int lda,\n                               double* b, lapack_int ldb, double* alphar,\n                               double* alphai, double* beta, double* vl,\n                               lapack_int ldvl, double* vr, lapack_int ldvr,\n                               double* work, lapack_int lwork );\nlapack_int LAPACKE_cggev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, lapack_complex_float* a,\n                               lapack_int lda, lapack_complex_float* b,\n                               lapack_int ldb, lapack_complex_float* alpha,\n                               lapack_complex_float* beta,\n                               lapack_complex_float* vl, lapack_int ldvl,\n                               lapack_complex_float* vr, lapack_int ldvr,\n                               lapack_complex_float* work, lapack_int lwork,\n                               float* rwork );\nlapack_int LAPACKE_zggev_work( int matrix_order, char jobvl, char jobvr,\n                               lapack_int n, lapack_complex_double* a,\n                               lapack_int lda, lapack_complex_double* b,\n                               lapack_int ldb, lapack_complex_double* alpha,\n                               lapack_complex_double* beta,\n                               lapack_complex_double* vl, lapack_int ldvl,\n                               lapack_complex_double* vr, lapack_int ldvr,\n                               lapack_complex_double* work, lapack_int lwork,\n                               double* rwork );\n\nlapack_int LAPACKE_sggevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n, float* a,\n                                lapack_int lda, float* b, lapack_int ldb,\n                                float* alphar, float* alphai, float* beta,\n                                float* vl, lapack_int ldvl, float* vr,\n                                lapack_int ldvr, lapack_int* ilo,\n                                lapack_int* ihi, float* lscale, float* rscale,\n                                float* abnrm, float* bbnrm, float* rconde,\n                                float* rcondv, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_logical* bwork );\nlapack_int LAPACKE_dggevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n, double* a,\n                                lapack_int lda, double* b, lapack_int ldb,\n                                double* alphar, double* alphai, double* beta,\n                                double* vl, lapack_int ldvl, double* vr,\n                                lapack_int ldvr, lapack_int* ilo,\n                                lapack_int* ihi, double* lscale, double* rscale,\n                                double* abnrm, double* bbnrm, double* rconde,\n                                double* rcondv, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_logical* bwork );\nlapack_int LAPACKE_cggevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* alpha,\n                                lapack_complex_float* beta,\n                                lapack_complex_float* vl, lapack_int ldvl,\n                                lapack_complex_float* vr, lapack_int ldvr,\n                                lapack_int* ilo, lapack_int* ihi, float* lscale,\n                                float* rscale, float* abnrm, float* bbnrm,\n                                float* rconde, float* rcondv,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int* iwork,\n                                lapack_logical* bwork );\nlapack_int LAPACKE_zggevx_work( int matrix_order, char balanc, char jobvl,\n                                char jobvr, char sense, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* alpha,\n                                lapack_complex_double* beta,\n                                lapack_complex_double* vl, lapack_int ldvl,\n                                lapack_complex_double* vr, lapack_int ldvr,\n                                lapack_int* ilo, lapack_int* ihi,\n                                double* lscale, double* rscale, double* abnrm,\n                                double* bbnrm, double* rconde, double* rcondv,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int* iwork,\n                                lapack_logical* bwork );\n\nlapack_int LAPACKE_sggglm_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, float* d, float* x,\n                                float* y, float* work, lapack_int lwork );\nlapack_int LAPACKE_dggglm_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, double* d, double* x,\n                                double* y, double* work, lapack_int lwork );\nlapack_int LAPACKE_cggglm_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* d,\n                                lapack_complex_float* x,\n                                lapack_complex_float* y,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zggglm_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* d,\n                                lapack_complex_double* x,\n                                lapack_complex_double* y,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sgghrd_work( int matrix_order, char compq, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                float* a, lapack_int lda, float* b,\n                                lapack_int ldb, float* q, lapack_int ldq,\n                                float* z, lapack_int ldz );\nlapack_int LAPACKE_dgghrd_work( int matrix_order, char compq, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                double* a, lapack_int lda, double* b,\n                                lapack_int ldb, double* q, lapack_int ldq,\n                                double* z, lapack_int ldz );\nlapack_int LAPACKE_cgghrd_work( int matrix_order, char compq, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* z, lapack_int ldz );\nlapack_int LAPACKE_zgghrd_work( int matrix_order, char compq, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* z, lapack_int ldz );\n\nlapack_int LAPACKE_sgglse_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int p, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, float* c, float* d,\n                                float* x, float* work, lapack_int lwork );\nlapack_int LAPACKE_dgglse_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int p, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, double* c, double* d,\n                                double* x, double* work, lapack_int lwork );\nlapack_int LAPACKE_cgglse_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int p, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* c,\n                                lapack_complex_float* d,\n                                lapack_complex_float* x,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zgglse_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int p, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* c,\n                                lapack_complex_double* d,\n                                lapack_complex_double* x,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sggqrf_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, float* a, lapack_int lda,\n                                float* taua, float* b, lapack_int ldb,\n                                float* taub, float* work, lapack_int lwork );\nlapack_int LAPACKE_dggqrf_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, double* a, lapack_int lda,\n                                double* taua, double* b, lapack_int ldb,\n                                double* taub, double* work, lapack_int lwork );\nlapack_int LAPACKE_cggqrf_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* taua,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* taub,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zggqrf_work( int matrix_order, lapack_int n, lapack_int m,\n                                lapack_int p, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* taua,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* taub,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sggrqf_work( int matrix_order, lapack_int m, lapack_int p,\n                                lapack_int n, float* a, lapack_int lda,\n                                float* taua, float* b, lapack_int ldb,\n                                float* taub, float* work, lapack_int lwork );\nlapack_int LAPACKE_dggrqf_work( int matrix_order, lapack_int m, lapack_int p,\n                                lapack_int n, double* a, lapack_int lda,\n                                double* taua, double* b, lapack_int ldb,\n                                double* taub, double* work, lapack_int lwork );\nlapack_int LAPACKE_cggrqf_work( int matrix_order, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* taua,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* taub,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zggrqf_work( int matrix_order, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* taua,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* taub,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sggsvd_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int n,\n                                lapack_int p, lapack_int* k, lapack_int* l,\n                                float* a, lapack_int lda, float* b,\n                                lapack_int ldb, float* alpha, float* beta,\n                                float* u, lapack_int ldu, float* v,\n                                lapack_int ldv, float* q, lapack_int ldq,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dggsvd_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int n,\n                                lapack_int p, lapack_int* k, lapack_int* l,\n                                double* a, lapack_int lda, double* b,\n                                lapack_int ldb, double* alpha, double* beta,\n                                double* u, lapack_int ldu, double* v,\n                                lapack_int ldv, double* q, lapack_int ldq,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cggsvd_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int n,\n                                lapack_int p, lapack_int* k, lapack_int* l,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                float* alpha, float* beta,\n                                lapack_complex_float* u, lapack_int ldu,\n                                lapack_complex_float* v, lapack_int ldv,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* work, float* rwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_zggsvd_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int n,\n                                lapack_int p, lapack_int* k, lapack_int* l,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                double* alpha, double* beta,\n                                lapack_complex_double* u, lapack_int ldu,\n                                lapack_complex_double* v, lapack_int ldv,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* work, double* rwork,\n                                lapack_int* iwork );\n\nlapack_int LAPACKE_sggsvp_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, float tola,\n                                float tolb, lapack_int* k, lapack_int* l,\n                                float* u, lapack_int ldu, float* v,\n                                lapack_int ldv, float* q, lapack_int ldq,\n                                lapack_int* iwork, float* tau, float* work );\nlapack_int LAPACKE_dggsvp_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, double tola,\n                                double tolb, lapack_int* k, lapack_int* l,\n                                double* u, lapack_int ldu, double* v,\n                                lapack_int ldv, double* q, lapack_int ldq,\n                                lapack_int* iwork, double* tau, double* work );\nlapack_int LAPACKE_cggsvp_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb, float tola, float tolb,\n                                lapack_int* k, lapack_int* l,\n                                lapack_complex_float* u, lapack_int ldu,\n                                lapack_complex_float* v, lapack_int ldv,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_int* iwork, float* rwork,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zggsvp_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, double tola, double tolb,\n                                lapack_int* k, lapack_int* l,\n                                lapack_complex_double* u, lapack_int ldu,\n                                lapack_complex_double* v, lapack_int ldv,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_int* iwork, double* rwork,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_sgtcon_work( char norm, lapack_int n, const float* dl,\n                                const float* d, const float* du,\n                                const float* du2, const lapack_int* ipiv,\n                                float anorm, float* rcond, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgtcon_work( char norm, lapack_int n, const double* dl,\n                                const double* d, const double* du,\n                                const double* du2, const lapack_int* ipiv,\n                                double anorm, double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cgtcon_work( char norm, lapack_int n,\n                                const lapack_complex_float* dl,\n                                const lapack_complex_float* d,\n                                const lapack_complex_float* du,\n                                const lapack_complex_float* du2,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, lapack_complex_float* work );\nlapack_int LAPACKE_zgtcon_work( char norm, lapack_int n,\n                                const lapack_complex_double* dl,\n                                const lapack_complex_double* d,\n                                const lapack_complex_double* du,\n                                const lapack_complex_double* du2,\n                                const lapack_int* ipiv, double anorm,\n                                double* rcond, lapack_complex_double* work );\n\nlapack_int LAPACKE_sgtrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const float* dl,\n                                const float* d, const float* du,\n                                const float* dlf, const float* df,\n                                const float* duf, const float* du2,\n                                const lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dgtrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const double* dl,\n                                const double* d, const double* du,\n                                const double* dlf, const double* df,\n                                const double* duf, const double* du2,\n                                const lapack_int* ipiv, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* ferr, double* berr, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cgtrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* dl,\n                                const lapack_complex_float* d,\n                                const lapack_complex_float* du,\n                                const lapack_complex_float* dlf,\n                                const lapack_complex_float* df,\n                                const lapack_complex_float* duf,\n                                const lapack_complex_float* du2,\n                                const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgtrfs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* dl,\n                                const lapack_complex_double* d,\n                                const lapack_complex_double* du,\n                                const lapack_complex_double* dlf,\n                                const lapack_complex_double* df,\n                                const lapack_complex_double* duf,\n                                const lapack_complex_double* du2,\n                                const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               float* dl, float* d, float* du, float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_dgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               double* dl, double* d, double* du, double* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_cgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               lapack_complex_float* dl,\n                               lapack_complex_float* d,\n                               lapack_complex_float* du,\n                               lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               lapack_complex_double* dl,\n                               lapack_complex_double* d,\n                               lapack_complex_double* du,\n                               lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sgtsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs, const float* dl,\n                                const float* d, const float* du, float* dlf,\n                                float* df, float* duf, float* du2,\n                                lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dgtsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs, const double* dl,\n                                const double* d, const double* du, double* dlf,\n                                double* df, double* duf, double* du2,\n                                lapack_int* ipiv, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cgtsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* dl,\n                                const lapack_complex_float* d,\n                                const lapack_complex_float* du,\n                                lapack_complex_float* dlf,\n                                lapack_complex_float* df,\n                                lapack_complex_float* duf,\n                                lapack_complex_float* du2, lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zgtsvx_work( int matrix_order, char fact, char trans,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* dl,\n                                const lapack_complex_double* d,\n                                const lapack_complex_double* du,\n                                lapack_complex_double* dlf,\n                                lapack_complex_double* df,\n                                lapack_complex_double* duf,\n                                lapack_complex_double* du2, lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sgttrf_work( lapack_int n, float* dl, float* d, float* du,\n                                float* du2, lapack_int* ipiv );\nlapack_int LAPACKE_dgttrf_work( lapack_int n, double* dl, double* d, double* du,\n                                double* du2, lapack_int* ipiv );\nlapack_int LAPACKE_cgttrf_work( lapack_int n, lapack_complex_float* dl,\n                                lapack_complex_float* d,\n                                lapack_complex_float* du,\n                                lapack_complex_float* du2, lapack_int* ipiv );\nlapack_int LAPACKE_zgttrf_work( lapack_int n, lapack_complex_double* dl,\n                                lapack_complex_double* d,\n                                lapack_complex_double* du,\n                                lapack_complex_double* du2, lapack_int* ipiv );\n\nlapack_int LAPACKE_sgttrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const float* dl,\n                                const float* d, const float* du,\n                                const float* du2, const lapack_int* ipiv,\n                                float* b, lapack_int ldb );\nlapack_int LAPACKE_dgttrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const double* dl,\n                                const double* d, const double* du,\n                                const double* du2, const lapack_int* ipiv,\n                                double* b, lapack_int ldb );\nlapack_int LAPACKE_cgttrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* dl,\n                                const lapack_complex_float* d,\n                                const lapack_complex_float* du,\n                                const lapack_complex_float* du2,\n                                const lapack_int* ipiv, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zgttrs_work( int matrix_order, char trans, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* dl,\n                                const lapack_complex_double* d,\n                                const lapack_complex_double* du,\n                                const lapack_complex_double* du2,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_chbev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int kd,\n                               lapack_complex_float* ab, lapack_int ldab,\n                               float* w, lapack_complex_float* z,\n                               lapack_int ldz, lapack_complex_float* work,\n                               float* rwork );\nlapack_int LAPACKE_zhbev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int kd,\n                               lapack_complex_double* ab, lapack_int ldab,\n                               double* w, lapack_complex_double* z,\n                               lapack_int ldz, lapack_complex_double* work,\n                               double* rwork );\n\nlapack_int LAPACKE_chbevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int kd,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zhbevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int kd,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_chbevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int kd,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                lapack_complex_float* q, lapack_int ldq,\n                                float vl, float vu, lapack_int il,\n                                lapack_int iu, float abstol, lapack_int* m,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                float* rwork, lapack_int* iwork,\n                                lapack_int* ifail );\nlapack_int LAPACKE_zhbevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int kd,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                lapack_complex_double* q, lapack_int ldq,\n                                double vl, double vu, lapack_int il,\n                                lapack_int iu, double abstol, lapack_int* m,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                double* rwork, lapack_int* iwork,\n                                lapack_int* ifail );\n\nlapack_int LAPACKE_chbgst_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                const lapack_complex_float* bb, lapack_int ldbb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zhbgst_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                const lapack_complex_double* bb,\n                                lapack_int ldbb, lapack_complex_double* x,\n                                lapack_int ldx, lapack_complex_double* work,\n                                double* rwork );\n\nlapack_int LAPACKE_chbgv_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int ka, lapack_int kb,\n                               lapack_complex_float* ab, lapack_int ldab,\n                               lapack_complex_float* bb, lapack_int ldbb,\n                               float* w, lapack_complex_float* z,\n                               lapack_int ldz, lapack_complex_float* work,\n                               float* rwork );\nlapack_int LAPACKE_zhbgv_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int ka, lapack_int kb,\n                               lapack_complex_double* ab, lapack_int ldab,\n                               lapack_complex_double* bb, lapack_int ldbb,\n                               double* w, lapack_complex_double* z,\n                               lapack_int ldz, lapack_complex_double* work,\n                               double* rwork );\n\nlapack_int LAPACKE_chbgvd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                lapack_complex_float* bb, lapack_int ldbb,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zhbgvd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                lapack_complex_double* bb, lapack_int ldbb,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_chbgvx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int ka,\n                                lapack_int kb, lapack_complex_float* ab,\n                                lapack_int ldab, lapack_complex_float* bb,\n                                lapack_int ldbb, lapack_complex_float* q,\n                                lapack_int ldq, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_complex_float* work, float* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_zhbgvx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int ka,\n                                lapack_int kb, lapack_complex_double* ab,\n                                lapack_int ldab, lapack_complex_double* bb,\n                                lapack_int ldbb, lapack_complex_double* q,\n                                lapack_int ldq, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_complex_double* work, double* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_chbtrd_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int kd,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                float* d, float* e, lapack_complex_float* q,\n                                lapack_int ldq, lapack_complex_float* work );\nlapack_int LAPACKE_zhbtrd_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int kd,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                double* d, double* e, lapack_complex_double* q,\n                                lapack_int ldq, lapack_complex_double* work );\n\nlapack_int LAPACKE_checon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, lapack_complex_float* work );\nlapack_int LAPACKE_zhecon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_int* ipiv, double anorm,\n                                double* rcond, lapack_complex_double* work );\n\nlapack_int LAPACKE_cheequb_work( int matrix_order, char uplo, lapack_int n,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 float* s, float* scond, float* amax,\n                                 lapack_complex_float* work );\nlapack_int LAPACKE_zheequb_work( int matrix_order, char uplo, lapack_int n,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 double* s, double* scond, double* amax,\n                                 lapack_complex_double* work );\n\nlapack_int LAPACKE_cheev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_complex_float* a,\n                               lapack_int lda, float* w,\n                               lapack_complex_float* work, lapack_int lwork,\n                               float* rwork );\nlapack_int LAPACKE_zheev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_complex_double* a,\n                               lapack_int lda, double* w,\n                               lapack_complex_double* work, lapack_int lwork,\n                               double* rwork );\n\nlapack_int LAPACKE_cheevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, float* w,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int lrwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_zheevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, double* w,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int lrwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_cheevr_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                float vl, float vu, lapack_int il,\n                                lapack_int iu, float abstol, lapack_int* m,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_int* isuppz,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int lrwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_zheevr_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                double vl, double vu, lapack_int il,\n                                lapack_int iu, double abstol, lapack_int* m,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_int* isuppz,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int lrwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_cheevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                float vl, float vu, lapack_int il,\n                                lapack_int iu, float abstol, lapack_int* m,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_zheevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                double vl, double vu, lapack_int il,\n                                lapack_int iu, double abstol, lapack_int* m,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_chegst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zhegst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda, const lapack_complex_double* b,\n                                lapack_int ldb );\n\nlapack_int LAPACKE_chegv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n, lapack_complex_float* a,\n                               lapack_int lda, lapack_complex_float* b,\n                               lapack_int ldb, float* w,\n                               lapack_complex_float* work, lapack_int lwork,\n                               float* rwork );\nlapack_int LAPACKE_zhegv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n,\n                               lapack_complex_double* a, lapack_int lda,\n                               lapack_complex_double* b, lapack_int ldb,\n                               double* w, lapack_complex_double* work,\n                               lapack_int lwork, double* rwork );\n\nlapack_int LAPACKE_chegvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                float* w, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zhegvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                double* w, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_chegvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                float vl, float vu, lapack_int il,\n                                lapack_int iu, float abstol, lapack_int* m,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_zhegvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                double vl, double vu, lapack_int il,\n                                lapack_int iu, double abstol, lapack_int* m,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_cherfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zherfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_complex_double* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_cherfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 const lapack_complex_float* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const float* s, const lapack_complex_float* b,\n                                 lapack_int ldb, lapack_complex_float* x,\n                                 lapack_int ldx, float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zherfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 const lapack_complex_double* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const double* s,\n                                 const lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_chesv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_float* a,\n                               lapack_int lda, lapack_int* ipiv,\n                               lapack_complex_float* b, lapack_int ldb,\n                               lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zhesv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_double* a,\n                               lapack_int lda, lapack_int* ipiv,\n                               lapack_complex_double* b, lapack_int ldb,\n                               lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_chesvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* af, lapack_int ldaf,\n                                lapack_int* ipiv, const lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork );\nlapack_int LAPACKE_zhesvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* af, lapack_int ldaf,\n                                lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork );\n\nlapack_int LAPACKE_chesvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, float* s,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* x, lapack_int ldx,\n                                 float* rcond, float* rpvgrw, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zhesvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, double* s,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_chetrd_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                float* d, float* e, lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zhetrd_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                double* d, double* e,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_chetrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* ipiv, lapack_complex_float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_zhetrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* ipiv, lapack_complex_double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_chetri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                const lapack_int* ipiv,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zhetri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_chetrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zhetrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_chfrk_work( int matrix_order, char transr, char uplo,\n                               char trans, lapack_int n, lapack_int k,\n                               float alpha, const lapack_complex_float* a,\n                               lapack_int lda, float beta,\n                               lapack_complex_float* c );\nlapack_int LAPACKE_zhfrk_work( int matrix_order, char transr, char uplo,\n                               char trans, lapack_int n, lapack_int k,\n                               double alpha, const lapack_complex_double* a,\n                               lapack_int lda, double beta,\n                               lapack_complex_double* c );\n\nlapack_int LAPACKE_shgeqz_work( int matrix_order, char job, char compq,\n                                char compz, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, float* h, lapack_int ldh,\n                                float* t, lapack_int ldt, float* alphar,\n                                float* alphai, float* beta, float* q,\n                                lapack_int ldq, float* z, lapack_int ldz,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dhgeqz_work( int matrix_order, char job, char compq,\n                                char compz, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, double* h, lapack_int ldh,\n                                double* t, lapack_int ldt, double* alphar,\n                                double* alphai, double* beta, double* q,\n                                lapack_int ldq, double* z, lapack_int ldz,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_chgeqz_work( int matrix_order, char job, char compq,\n                                char compz, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, lapack_complex_float* h,\n                                lapack_int ldh, lapack_complex_float* t,\n                                lapack_int ldt, lapack_complex_float* alpha,\n                                lapack_complex_float* beta,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork );\nlapack_int LAPACKE_zhgeqz_work( int matrix_order, char job, char compq,\n                                char compz, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, lapack_complex_double* h,\n                                lapack_int ldh, lapack_complex_double* t,\n                                lapack_int ldt, lapack_complex_double* alpha,\n                                lapack_complex_double* beta,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork );\n\nlapack_int LAPACKE_chpcon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* ap,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, lapack_complex_float* work );\nlapack_int LAPACKE_zhpcon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* ap,\n                                const lapack_int* ipiv, double anorm,\n                                double* rcond, lapack_complex_double* work );\n\nlapack_int LAPACKE_chpev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_complex_float* ap, float* w,\n                               lapack_complex_float* z, lapack_int ldz,\n                               lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zhpev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_complex_double* ap,\n                               double* w, lapack_complex_double* z,\n                               lapack_int ldz, lapack_complex_double* work,\n                               double* rwork );\n\nlapack_int LAPACKE_chpevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_complex_float* ap,\n                                float* w, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zhpevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_complex_double* ap,\n                                double* w, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_chpevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n,\n                                lapack_complex_float* ap, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_complex_float* work, float* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_zhpevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n,\n                                lapack_complex_double* ap, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_complex_double* work, double* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_chpgst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, lapack_complex_float* ap,\n                                const lapack_complex_float* bp );\nlapack_int LAPACKE_zhpgst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, lapack_complex_double* ap,\n                                const lapack_complex_double* bp );\n\nlapack_int LAPACKE_chpgv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n,\n                               lapack_complex_float* ap,\n                               lapack_complex_float* bp, float* w,\n                               lapack_complex_float* z, lapack_int ldz,\n                               lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zhpgv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n,\n                               lapack_complex_double* ap,\n                               lapack_complex_double* bp, double* w,\n                               lapack_complex_double* z, lapack_int ldz,\n                               lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_chpgvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n,\n                                lapack_complex_float* ap,\n                                lapack_complex_float* bp, float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int lrwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_zhpgvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n,\n                                lapack_complex_double* ap,\n                                lapack_complex_double* bp, double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int lrwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_chpgvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n,\n                                lapack_complex_float* ap,\n                                lapack_complex_float* bp, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_complex_float* work, float* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_zhpgvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n,\n                                lapack_complex_double* ap,\n                                lapack_complex_double* bp, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_complex_double* work, double* rwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_chprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* ap,\n                                const lapack_complex_float* afp,\n                                const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zhprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                const lapack_complex_double* afp,\n                                const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_chpsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_float* ap,\n                               lapack_int* ipiv, lapack_complex_float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_zhpsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_double* ap,\n                               lapack_int* ipiv, lapack_complex_double* b,\n                               lapack_int ldb );\n\nlapack_int LAPACKE_chpsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* ap,\n                                lapack_complex_float* afp, lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zhpsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                lapack_complex_double* afp, lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_chptrd_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap, float* d, float* e,\n                                lapack_complex_float* tau );\nlapack_int LAPACKE_zhptrd_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap, double* d, double* e,\n                                lapack_complex_double* tau );\n\nlapack_int LAPACKE_chptrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap, lapack_int* ipiv );\nlapack_int LAPACKE_zhptrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap, lapack_int* ipiv );\n\nlapack_int LAPACKE_chptri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap,\n                                const lapack_int* ipiv,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zhptri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_chptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* ap,\n                                const lapack_int* ipiv, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zhptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_shsein_work( int matrix_order, char job, char eigsrc,\n                                char initv, lapack_logical* select,\n                                lapack_int n, const float* h, lapack_int ldh,\n                                float* wr, const float* wi, float* vl,\n                                lapack_int ldvl, float* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m, float* work,\n                                lapack_int* ifaill, lapack_int* ifailr );\nlapack_int LAPACKE_dhsein_work( int matrix_order, char job, char eigsrc,\n                                char initv, lapack_logical* select,\n                                lapack_int n, const double* h, lapack_int ldh,\n                                double* wr, const double* wi, double* vl,\n                                lapack_int ldvl, double* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m, double* work,\n                                lapack_int* ifaill, lapack_int* ifailr );\nlapack_int LAPACKE_chsein_work( int matrix_order, char job, char eigsrc,\n                                char initv, const lapack_logical* select,\n                                lapack_int n, const lapack_complex_float* h,\n                                lapack_int ldh, lapack_complex_float* w,\n                                lapack_complex_float* vl, lapack_int ldvl,\n                                lapack_complex_float* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_float* work, float* rwork,\n                                lapack_int* ifaill, lapack_int* ifailr );\nlapack_int LAPACKE_zhsein_work( int matrix_order, char job, char eigsrc,\n                                char initv, const lapack_logical* select,\n                                lapack_int n, const lapack_complex_double* h,\n                                lapack_int ldh, lapack_complex_double* w,\n                                lapack_complex_double* vl, lapack_int ldvl,\n                                lapack_complex_double* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_double* work, double* rwork,\n                                lapack_int* ifaill, lapack_int* ifailr );\n\nlapack_int LAPACKE_shseqr_work( int matrix_order, char job, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                float* h, lapack_int ldh, float* wr, float* wi,\n                                float* z, lapack_int ldz, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dhseqr_work( int matrix_order, char job, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                double* h, lapack_int ldh, double* wr,\n                                double* wi, double* z, lapack_int ldz,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_chseqr_work( int matrix_order, char job, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                lapack_complex_float* h, lapack_int ldh,\n                                lapack_complex_float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zhseqr_work( int matrix_order, char job, char compz,\n                                lapack_int n, lapack_int ilo, lapack_int ihi,\n                                lapack_complex_double* h, lapack_int ldh,\n                                lapack_complex_double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_clacgv_work( lapack_int n, lapack_complex_float* x,\n                                lapack_int incx );\nlapack_int LAPACKE_zlacgv_work( lapack_int n, lapack_complex_double* x,\n                                lapack_int incx );\n\nlapack_int LAPACKE_slacpy_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, const float* a, lapack_int lda,\n                                float* b, lapack_int ldb );\nlapack_int LAPACKE_dlacpy_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, const double* a, lapack_int lda,\n                                double* b, lapack_int ldb );\nlapack_int LAPACKE_clacpy_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, const lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zlacpy_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, const lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb );\n\nlapack_int LAPACKE_zlag2c_work( int matrix_order, lapack_int m, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_float* sa, lapack_int ldsa );\n\nlapack_int LAPACKE_slag2d_work( int matrix_order, lapack_int m, lapack_int n,\n                                const float* sa, lapack_int ldsa, double* a,\n                                lapack_int lda );\n\nlapack_int LAPACKE_dlag2s_work( int matrix_order, lapack_int m, lapack_int n,\n                                const double* a, lapack_int lda, float* sa,\n                                lapack_int ldsa );\n\nlapack_int LAPACKE_clag2z_work( int matrix_order, lapack_int m, lapack_int n,\n                                const lapack_complex_float* sa, lapack_int ldsa,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_slagge_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, const float* d,\n                                float* a, lapack_int lda, lapack_int* iseed,\n                                float* work );\nlapack_int LAPACKE_dlagge_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, const double* d,\n                                double* a, lapack_int lda, lapack_int* iseed,\n                                double* work );\nlapack_int LAPACKE_clagge_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, const float* d,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* iseed, lapack_complex_float* work );\nlapack_int LAPACKE_zlagge_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int kl, lapack_int ku, const double* d,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* iseed,\n                                lapack_complex_double* work );\n                                \nlapack_int LAPACKE_claghe_work( int matrix_order, lapack_int n, lapack_int k,\n                                const float* d, lapack_complex_float* a,\n                                lapack_int lda, lapack_int* iseed,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zlaghe_work( int matrix_order, lapack_int n, lapack_int k,\n                                const double* d, lapack_complex_double* a,\n                                lapack_int lda, lapack_int* iseed,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_slagsy_work( int matrix_order, lapack_int n, lapack_int k,\n                                const float* d, float* a, lapack_int lda,\n                                lapack_int* iseed, float* work );\nlapack_int LAPACKE_dlagsy_work( int matrix_order, lapack_int n, lapack_int k,\n                                const double* d, double* a, lapack_int lda,\n                                lapack_int* iseed, double* work );\nlapack_int LAPACKE_clagsy_work( int matrix_order, lapack_int n, lapack_int k,\n                                const float* d, lapack_complex_float* a,\n                                lapack_int lda, lapack_int* iseed,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zlagsy_work( int matrix_order, lapack_int n, lapack_int k,\n                                const double* d, lapack_complex_double* a,\n                                lapack_int lda, lapack_int* iseed,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_slapmr_work( int matrix_order, lapack_logical forwrd,\n                                lapack_int m, lapack_int n, float* x,\n                                lapack_int ldx, lapack_int* k );\nlapack_int LAPACKE_dlapmr_work( int matrix_order, lapack_logical forwrd,\n                                lapack_int m, lapack_int n, double* x,\n                                lapack_int ldx, lapack_int* k );\nlapack_int LAPACKE_clapmr_work( int matrix_order, lapack_logical forwrd,\n                                lapack_int m, lapack_int n,\n                                lapack_complex_float* x, lapack_int ldx,\n                                lapack_int* k );\nlapack_int LAPACKE_zlapmr_work( int matrix_order, lapack_logical forwrd,\n                                lapack_int m, lapack_int n,\n                                lapack_complex_double* x, lapack_int ldx,\n                                lapack_int* k );\n\nlapack_int LAPACKE_slartgp_work( float f, float g, float* cs, float* sn,\n                                 float* r );\nlapack_int LAPACKE_dlartgp_work( double f, double g, double* cs, double* sn,\n                                 double* r );\n\nlapack_int LAPACKE_slartgs_work( float x, float y, float sigma, float* cs,\n                                 float* sn );\nlapack_int LAPACKE_dlartgs_work( double x, double y, double sigma, double* cs,\n                                 double* sn );\n                                \nfloat LAPACKE_slapy2_work( float x, float y );\ndouble LAPACKE_dlapy2_work( double x, double y );\n\nfloat LAPACKE_slapy3_work( float x, float y, float z );\ndouble LAPACKE_dlapy3_work( double x, double y, double z );\n\nfloat LAPACKE_slamch_work( char cmach );\ndouble LAPACKE_dlamch_work( char cmach );\n\nfloat LAPACKE_slange_work( int matrix_order, char norm, lapack_int m,\n                                lapack_int n, const float* a, lapack_int lda,\n                                float* work );\ndouble LAPACKE_dlange_work( int matrix_order, char norm, lapack_int m,\n                                lapack_int n, const double* a, lapack_int lda,\n                                double* work );\nfloat LAPACKE_clange_work( int matrix_order, char norm, lapack_int m,\n                                lapack_int n, const lapack_complex_float* a,\n                                lapack_int lda, float* work );\ndouble LAPACKE_zlange_work( int matrix_order, char norm, lapack_int m,\n                                lapack_int n, const lapack_complex_double* a,\n                                lapack_int lda, double* work );\n\nfloat LAPACKE_clanhe_work( int matrix_order, char norm, char uplo,\n                                lapack_int n, const lapack_complex_float* a,\n                                lapack_int lda, float* work );\ndouble LAPACKE_zlanhe_work( int matrix_order, char norm, char uplo,\n                                lapack_int n, const lapack_complex_double* a,\n                                lapack_int lda, double* work );\n\nfloat LAPACKE_slansy_work( int matrix_order, char norm, char uplo,\n                                lapack_int n, const float* a, lapack_int lda,\n                                float* work );\ndouble LAPACKE_dlansy_work( int matrix_order, char norm, char uplo,\n                                lapack_int n, const double* a, lapack_int lda,\n                                double* work );\nfloat LAPACKE_clansy_work( int matrix_order, char norm, char uplo,\n                                lapack_int n, const lapack_complex_float* a,\n                                lapack_int lda, float* work );\ndouble LAPACKE_zlansy_work( int matrix_order, char norm, char uplo,\n                                lapack_int n, const lapack_complex_double* a,\n                                lapack_int lda, double* work );\n\nfloat LAPACKE_slantr_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int m, lapack_int n, const float* a,\n                                lapack_int lda, float* work );\ndouble LAPACKE_dlantr_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int m, lapack_int n,\n                                const double* a, lapack_int lda, double* work );\nfloat LAPACKE_clantr_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int m, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                float* work );\ndouble LAPACKE_zlantr_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int m, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                double* work );\n\nlapack_int LAPACKE_slarfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k, const float* v,\n                                lapack_int ldv, const float* t, lapack_int ldt,\n                                float* c, lapack_int ldc, float* work,\n                                lapack_int ldwork );\nlapack_int LAPACKE_dlarfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k, const double* v,\n                                lapack_int ldv, const double* t, lapack_int ldt,\n                                double* c, lapack_int ldc, double* work,\n                                lapack_int ldwork );\nlapack_int LAPACKE_clarfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k,\n                                const lapack_complex_float* v, lapack_int ldv,\n                                const lapack_complex_float* t, lapack_int ldt,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int ldwork );\nlapack_int LAPACKE_zlarfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k,\n                                const lapack_complex_double* v, lapack_int ldv,\n                                const lapack_complex_double* t, lapack_int ldt,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work,\n                                lapack_int ldwork );\n\nlapack_int LAPACKE_slarfg_work( lapack_int n, float* alpha, float* x,\n                                lapack_int incx, float* tau );\nlapack_int LAPACKE_dlarfg_work( lapack_int n, double* alpha, double* x,\n                                lapack_int incx, double* tau );\nlapack_int LAPACKE_clarfg_work( lapack_int n, lapack_complex_float* alpha,\n                                lapack_complex_float* x, lapack_int incx,\n                                lapack_complex_float* tau );\nlapack_int LAPACKE_zlarfg_work( lapack_int n, lapack_complex_double* alpha,\n                                lapack_complex_double* x, lapack_int incx,\n                                lapack_complex_double* tau );\n\nlapack_int LAPACKE_slarft_work( int matrix_order, char direct, char storev,\n                                lapack_int n, lapack_int k, const float* v,\n                                lapack_int ldv, const float* tau, float* t,\n                                lapack_int ldt );\nlapack_int LAPACKE_dlarft_work( int matrix_order, char direct, char storev,\n                                lapack_int n, lapack_int k, const double* v,\n                                lapack_int ldv, const double* tau, double* t,\n                                lapack_int ldt );\nlapack_int LAPACKE_clarft_work( int matrix_order, char direct, char storev,\n                                lapack_int n, lapack_int k,\n                                const lapack_complex_float* v, lapack_int ldv,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_zlarft_work( int matrix_order, char direct, char storev,\n                                lapack_int n, lapack_int k,\n                                const lapack_complex_double* v, lapack_int ldv,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_slarfx_work( int matrix_order, char side, lapack_int m,\n                                lapack_int n, const float* v, float tau,\n                                float* c, lapack_int ldc, float* work );\nlapack_int LAPACKE_dlarfx_work( int matrix_order, char side, lapack_int m,\n                                lapack_int n, const double* v, double tau,\n                                double* c, lapack_int ldc, double* work );\nlapack_int LAPACKE_clarfx_work( int matrix_order, char side, lapack_int m,\n                                lapack_int n, const lapack_complex_float* v,\n                                lapack_complex_float tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zlarfx_work( int matrix_order, char side, lapack_int m,\n                                lapack_int n, const lapack_complex_double* v,\n                                lapack_complex_double tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_slarnv_work( lapack_int idist, lapack_int* iseed,\n                                lapack_int n, float* x );\nlapack_int LAPACKE_dlarnv_work( lapack_int idist, lapack_int* iseed,\n                                lapack_int n, double* x );\nlapack_int LAPACKE_clarnv_work( lapack_int idist, lapack_int* iseed,\n                                lapack_int n, lapack_complex_float* x );\nlapack_int LAPACKE_zlarnv_work( lapack_int idist, lapack_int* iseed,\n                                lapack_int n, lapack_complex_double* x );\n\nlapack_int LAPACKE_slaset_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, float alpha, float beta, float* a,\n                                lapack_int lda );\nlapack_int LAPACKE_dlaset_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, double alpha, double beta,\n                                double* a, lapack_int lda );\nlapack_int LAPACKE_claset_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, lapack_complex_float alpha,\n                                lapack_complex_float beta,\n                                lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zlaset_work( int matrix_order, char uplo, lapack_int m,\n                                lapack_int n, lapack_complex_double alpha,\n                                lapack_complex_double beta,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_slasrt_work( char id, lapack_int n, float* d );\nlapack_int LAPACKE_dlasrt_work( char id, lapack_int n, double* d );\n\nlapack_int LAPACKE_slaswp_work( int matrix_order, lapack_int n, float* a,\n                                lapack_int lda, lapack_int k1, lapack_int k2,\n                                const lapack_int* ipiv, lapack_int incx );\nlapack_int LAPACKE_dlaswp_work( int matrix_order, lapack_int n, double* a,\n                                lapack_int lda, lapack_int k1, lapack_int k2,\n                                const lapack_int* ipiv, lapack_int incx );\nlapack_int LAPACKE_claswp_work( int matrix_order, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int k1, lapack_int k2,\n                                const lapack_int* ipiv, lapack_int incx );\nlapack_int LAPACKE_zlaswp_work( int matrix_order, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int k1, lapack_int k2,\n                                const lapack_int* ipiv, lapack_int incx );\n\nlapack_int LAPACKE_slatms_work( int matrix_order, lapack_int m, lapack_int n,\n                                char dist, lapack_int* iseed, char sym,\n                                float* d, lapack_int mode, float cond,\n                                float dmax, lapack_int kl, lapack_int ku,\n                                char pack, float* a, lapack_int lda,\n                                float* work );\nlapack_int LAPACKE_dlatms_work( int matrix_order, lapack_int m, lapack_int n,\n                                char dist, lapack_int* iseed, char sym,\n                                double* d, lapack_int mode, double cond,\n                                double dmax, lapack_int kl, lapack_int ku,\n                                char pack, double* a, lapack_int lda,\n                                double* work );\nlapack_int LAPACKE_clatms_work( int matrix_order, lapack_int m, lapack_int n,\n                                char dist, lapack_int* iseed, char sym,\n                                float* d, lapack_int mode, float cond,\n                                float dmax, lapack_int kl, lapack_int ku,\n                                char pack, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* work );\nlapack_int LAPACKE_zlatms_work( int matrix_order, lapack_int m, lapack_int n,\n                                char dist, lapack_int* iseed, char sym,\n                                double* d, lapack_int mode, double cond,\n                                double dmax, lapack_int kl, lapack_int ku,\n                                char pack, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* work );\n\nlapack_int LAPACKE_slauum_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda );\nlapack_int LAPACKE_dlauum_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda );\nlapack_int LAPACKE_clauum_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zlauum_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_sopgtr_work( int matrix_order, char uplo, lapack_int n,\n                                const float* ap, const float* tau, float* q,\n                                lapack_int ldq, float* work );\nlapack_int LAPACKE_dopgtr_work( int matrix_order, char uplo, lapack_int n,\n                                const double* ap, const double* tau, double* q,\n                                lapack_int ldq, double* work );\n\nlapack_int LAPACKE_sopmtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const float* ap, const float* tau, float* c,\n                                lapack_int ldc, float* work );\nlapack_int LAPACKE_dopmtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const double* ap, const double* tau, double* c,\n                                lapack_int ldc, double* work );\n\nlapack_int LAPACKE_sorgbr_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int k, float* a,\n                                lapack_int lda, const float* tau, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorgbr_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int k, double* a,\n                                lapack_int lda, const double* tau, double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_sorghr_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, float* a, lapack_int lda,\n                                const float* tau, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorghr_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, double* a, lapack_int lda,\n                                const double* tau, double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_sorglq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, float* a, lapack_int lda,\n                                const float* tau, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorglq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, double* a, lapack_int lda,\n                                const double* tau, double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_sorgql_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, float* a, lapack_int lda,\n                                const float* tau, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorgql_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, double* a, lapack_int lda,\n                                const double* tau, double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_sorgqr_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, float* a, lapack_int lda,\n                                const float* tau, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorgqr_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, double* a, lapack_int lda,\n                                const double* tau, double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_sorgrq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, float* a, lapack_int lda,\n                                const float* tau, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorgrq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, double* a, lapack_int lda,\n                                const double* tau, double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_sorgtr_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda, const float* tau,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dorgtr_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda, const double* tau,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormbr_work( int matrix_order, char vect, char side,\n                                char trans, lapack_int m, lapack_int n,\n                                lapack_int k, const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormbr_work( int matrix_order, char vect, char side,\n                                char trans, lapack_int m, lapack_int n,\n                                lapack_int k, const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormhr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormhr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormql_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormql_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormqr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormqr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormrq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormrq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormrz_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                lapack_int l, const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormrz_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                lapack_int l, const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_sormtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const float* a, lapack_int lda,\n                                const float* tau, float* c, lapack_int ldc,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dormtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const double* a, lapack_int lda,\n                                const double* tau, double* c, lapack_int ldc,\n                                double* work, lapack_int lwork );\n\nlapack_int LAPACKE_spbcon_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const float* ab, lapack_int ldab,\n                                float anorm, float* rcond, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dpbcon_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const double* ab,\n                                lapack_int ldab, double anorm, double* rcond,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cpbcon_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const lapack_complex_float* ab,\n                                lapack_int ldab, float anorm, float* rcond,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zpbcon_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const lapack_complex_double* ab,\n                                lapack_int ldab, double anorm, double* rcond,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_spbequ_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const float* ab, lapack_int ldab,\n                                float* s, float* scond, float* amax );\nlapack_int LAPACKE_dpbequ_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const double* ab,\n                                lapack_int ldab, double* s, double* scond,\n                                double* amax );\nlapack_int LAPACKE_cpbequ_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const lapack_complex_float* ab,\n                                lapack_int ldab, float* s, float* scond,\n                                float* amax );\nlapack_int LAPACKE_zpbequ_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, const lapack_complex_double* ab,\n                                lapack_int ldab, double* s, double* scond,\n                                double* amax );\n\nlapack_int LAPACKE_spbrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs, const float* ab,\n                                lapack_int ldab, const float* afb,\n                                lapack_int ldafb, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dpbrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs,\n                                const double* ab, lapack_int ldab,\n                                const double* afb, lapack_int ldafb,\n                                const double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cpbrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                const lapack_complex_float* afb,\n                                lapack_int ldafb, const lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zpbrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab,\n                                const lapack_complex_double* afb,\n                                lapack_int ldafb,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_spbstf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kb, float* bb, lapack_int ldbb );\nlapack_int LAPACKE_dpbstf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kb, double* bb, lapack_int ldbb );\nlapack_int LAPACKE_cpbstf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kb, lapack_complex_float* bb,\n                                lapack_int ldbb );\nlapack_int LAPACKE_zpbstf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kb, lapack_complex_double* bb,\n                                lapack_int ldbb );\n\nlapack_int LAPACKE_spbsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int kd, lapack_int nrhs, float* ab,\n                               lapack_int ldab, float* b, lapack_int ldb );\nlapack_int LAPACKE_dpbsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int kd, lapack_int nrhs, double* ab,\n                               lapack_int ldab, double* b, lapack_int ldb );\nlapack_int LAPACKE_cpbsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int kd, lapack_int nrhs,\n                               lapack_complex_float* ab, lapack_int ldab,\n                               lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpbsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int kd, lapack_int nrhs,\n                               lapack_complex_double* ab, lapack_int ldab,\n                               lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spbsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int kd, lapack_int nrhs,\n                                float* ab, lapack_int ldab, float* afb,\n                                lapack_int ldafb, char* equed, float* s,\n                                float* b, lapack_int ldb, float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dpbsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int kd, lapack_int nrhs,\n                                double* ab, lapack_int ldab, double* afb,\n                                lapack_int ldafb, char* equed, double* s,\n                                double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* rcond, double* ferr,\n                                double* berr, double* work, lapack_int* iwork );\nlapack_int LAPACKE_cpbsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int kd, lapack_int nrhs,\n                                lapack_complex_float* ab, lapack_int ldab,\n                                lapack_complex_float* afb, lapack_int ldafb,\n                                char* equed, float* s, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zpbsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int kd, lapack_int nrhs,\n                                lapack_complex_double* ab, lapack_int ldab,\n                                lapack_complex_double* afb, lapack_int ldafb,\n                                char* equed, double* s,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_spbtrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, float* ab, lapack_int ldab );\nlapack_int LAPACKE_dpbtrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, double* ab, lapack_int ldab );\nlapack_int LAPACKE_cpbtrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_complex_float* ab,\n                                lapack_int ldab );\nlapack_int LAPACKE_zpbtrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_complex_double* ab,\n                                lapack_int ldab );\n\nlapack_int LAPACKE_spbtrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs, const float* ab,\n                                lapack_int ldab, float* b, lapack_int ldb );\nlapack_int LAPACKE_dpbtrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs,\n                                const double* ab, lapack_int ldab, double* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_cpbtrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpbtrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int kd, lapack_int nrhs,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, lapack_complex_double* b,\n                                lapack_int ldb );\n\nlapack_int LAPACKE_spftrf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, float* a );\nlapack_int LAPACKE_dpftrf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, double* a );\nlapack_int LAPACKE_cpftrf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_complex_float* a );\nlapack_int LAPACKE_zpftrf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_complex_double* a );\n\nlapack_int LAPACKE_spftri_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, float* a );\nlapack_int LAPACKE_dpftri_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, double* a );\nlapack_int LAPACKE_cpftri_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_complex_float* a );\nlapack_int LAPACKE_zpftri_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_complex_double* a );\n\nlapack_int LAPACKE_spftrs_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_int nrhs, const float* a,\n                                float* b, lapack_int ldb );\nlapack_int LAPACKE_dpftrs_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_int nrhs, const double* a,\n                                double* b, lapack_int ldb );\nlapack_int LAPACKE_cpftrs_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* a,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpftrs_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* a,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spocon_work( int matrix_order, char uplo, lapack_int n,\n                                const float* a, lapack_int lda, float anorm,\n                                float* rcond, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dpocon_work( int matrix_order, char uplo, lapack_int n,\n                                const double* a, lapack_int lda, double anorm,\n                                double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cpocon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                float anorm, float* rcond,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zpocon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                double anorm, double* rcond,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_spoequ_work( int matrix_order, lapack_int n, const float* a,\n                                lapack_int lda, float* s, float* scond,\n                                float* amax );\nlapack_int LAPACKE_dpoequ_work( int matrix_order, lapack_int n, const double* a,\n                                lapack_int lda, double* s, double* scond,\n                                double* amax );\nlapack_int LAPACKE_cpoequ_work( int matrix_order, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                float* s, float* scond, float* amax );\nlapack_int LAPACKE_zpoequ_work( int matrix_order, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                double* s, double* scond, double* amax );\n\nlapack_int LAPACKE_spoequb_work( int matrix_order, lapack_int n, const float* a,\n                                 lapack_int lda, float* s, float* scond,\n                                 float* amax );\nlapack_int LAPACKE_dpoequb_work( int matrix_order, lapack_int n,\n                                 const double* a, lapack_int lda, double* s,\n                                 double* scond, double* amax );\nlapack_int LAPACKE_cpoequb_work( int matrix_order, lapack_int n,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 float* s, float* scond, float* amax );\nlapack_int LAPACKE_zpoequb_work( int matrix_order, lapack_int n,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 double* s, double* scond, double* amax );\n\nlapack_int LAPACKE_sporfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* a, lapack_int lda,\n                                const float* af, lapack_int ldaf,\n                                const float* b, lapack_int ldb, float* x,\n                                lapack_int ldx, float* ferr, float* berr,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dporfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* a,\n                                lapack_int lda, const double* af,\n                                lapack_int ldaf, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* ferr, double* berr, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cporfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* af,\n                                lapack_int ldaf, const lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zporfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_complex_double* af,\n                                lapack_int ldaf, const lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sporfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs, const float* a,\n                                 lapack_int lda, const float* af,\n                                 lapack_int ldaf, const float* s,\n                                 const float* b, lapack_int ldb, float* x,\n                                 lapack_int ldx, float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dporfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs, const double* a,\n                                 lapack_int lda, const double* af,\n                                 lapack_int ldaf, const double* s,\n                                 const double* b, lapack_int ldb, double* x,\n                                 lapack_int ldx, double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_cporfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 const lapack_complex_float* af,\n                                 lapack_int ldaf, const float* s,\n                                 const lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* x, lapack_int ldx,\n                                 float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zporfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 const lapack_complex_double* af,\n                                 lapack_int ldaf, const double* s,\n                                 const lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_sposv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, float* a, lapack_int lda,\n                               float* b, lapack_int ldb );\nlapack_int LAPACKE_dposv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, double* a, lapack_int lda,\n                               double* b, lapack_int ldb );\nlapack_int LAPACKE_cposv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_float* a,\n                               lapack_int lda, lapack_complex_float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_zposv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_double* a,\n                               lapack_int lda, lapack_complex_double* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_dsposv_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, double* a, lapack_int lda,\n                                double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* work, float* swork,\n                                lapack_int* iter );\nlapack_int LAPACKE_zcposv_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* x,\n                                lapack_int ldx, lapack_complex_double* work,\n                                lapack_complex_float* swork, double* rwork,\n                                lapack_int* iter );\n\nlapack_int LAPACKE_sposvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, float* a,\n                                lapack_int lda, float* af, lapack_int ldaf,\n                                char* equed, float* s, float* b, lapack_int ldb,\n                                float* x, lapack_int ldx, float* rcond,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dposvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, double* a,\n                                lapack_int lda, double* af, lapack_int ldaf,\n                                char* equed, double* s, double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cposvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* af, lapack_int ldaf,\n                                char* equed, float* s, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zposvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* af, lapack_int ldaf,\n                                char* equed, double* s,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sposvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs, float* a,\n                                 lapack_int lda, float* af, lapack_int ldaf,\n                                 char* equed, float* s, float* b,\n                                 lapack_int ldb, float* x, lapack_int ldx,\n                                 float* rcond, float* rpvgrw, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dposvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs, double* a,\n                                 lapack_int lda, double* af, lapack_int ldaf,\n                                 char* equed, double* s, double* b,\n                                 lapack_int ldb, double* x, lapack_int ldx,\n                                 double* rcond, double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_cposvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* af, lapack_int ldaf,\n                                 char* equed, float* s, lapack_complex_float* b,\n                                 lapack_int ldb, lapack_complex_float* x,\n                                 lapack_int ldx, float* rcond, float* rpvgrw,\n                                 float* berr, lapack_int n_err_bnds,\n                                 float* err_bnds_norm, float* err_bnds_comp,\n                                 lapack_int nparams, float* params,\n                                 lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zposvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* af, lapack_int ldaf,\n                                 char* equed, double* s,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_spotrf_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda );\nlapack_int LAPACKE_dpotrf_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda );\nlapack_int LAPACKE_cpotrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zpotrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_spotri_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda );\nlapack_int LAPACKE_dpotri_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda );\nlapack_int LAPACKE_cpotri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zpotri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_spotrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* a, lapack_int lda,\n                                float* b, lapack_int ldb );\nlapack_int LAPACKE_dpotrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* a,\n                                lapack_int lda, double* b, lapack_int ldb );\nlapack_int LAPACKE_cpotrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zpotrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* b,\n                                lapack_int ldb );\n\nlapack_int LAPACKE_sppcon_work( int matrix_order, char uplo, lapack_int n,\n                                const float* ap, float anorm, float* rcond,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dppcon_work( int matrix_order, char uplo, lapack_int n,\n                                const double* ap, double anorm, double* rcond,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cppcon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* ap, float anorm,\n                                float* rcond, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zppcon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* ap, double anorm,\n                                double* rcond, lapack_complex_double* work,\n                                double* rwork );\n\nlapack_int LAPACKE_sppequ_work( int matrix_order, char uplo, lapack_int n,\n                                const float* ap, float* s, float* scond,\n                                float* amax );\nlapack_int LAPACKE_dppequ_work( int matrix_order, char uplo, lapack_int n,\n                                const double* ap, double* s, double* scond,\n                                double* amax );\nlapack_int LAPACKE_cppequ_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* ap, float* s,\n                                float* scond, float* amax );\nlapack_int LAPACKE_zppequ_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* ap, double* s,\n                                double* scond, double* amax );\n\nlapack_int LAPACKE_spprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* ap,\n                                const float* afp, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dpprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* ap,\n                                const double* afp, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* ferr, double* berr, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cpprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* ap,\n                                const lapack_complex_float* afp,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zpprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                const lapack_complex_double* afp,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sppsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, float* ap, float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_dppsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, double* ap, double* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_cppsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_float* ap,\n                               lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zppsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_double* ap,\n                               lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sppsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, float* ap,\n                                float* afp, char* equed, float* s, float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dppsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, double* ap,\n                                double* afp, char* equed, double* s, double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cppsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                lapack_complex_float* ap,\n                                lapack_complex_float* afp, char* equed,\n                                float* s, lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_zppsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                lapack_complex_double* ap,\n                                lapack_complex_double* afp, char* equed,\n                                double* s, lapack_complex_double* b,\n                                lapack_int ldb, lapack_complex_double* x,\n                                lapack_int ldx, double* rcond, double* ferr,\n                                double* berr, lapack_complex_double* work,\n                                double* rwork );\n\nlapack_int LAPACKE_spptrf_work( int matrix_order, char uplo, lapack_int n,\n                                float* ap );\nlapack_int LAPACKE_dpptrf_work( int matrix_order, char uplo, lapack_int n,\n                                double* ap );\nlapack_int LAPACKE_cpptrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap );\nlapack_int LAPACKE_zpptrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap );\n\nlapack_int LAPACKE_spptri_work( int matrix_order, char uplo, lapack_int n,\n                                float* ap );\nlapack_int LAPACKE_dpptri_work( int matrix_order, char uplo, lapack_int n,\n                                double* ap );\nlapack_int LAPACKE_cpptri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap );\nlapack_int LAPACKE_zpptri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap );\n\nlapack_int LAPACKE_spptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* ap, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dpptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* ap, double* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_cpptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* ap,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_spstrf_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* piv,\n                                lapack_int* rank, float tol, float* work );\nlapack_int LAPACKE_dpstrf_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* piv,\n                                lapack_int* rank, double tol, double* work );\nlapack_int LAPACKE_cpstrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* piv, lapack_int* rank, float tol,\n                                float* work );\nlapack_int LAPACKE_zpstrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* piv, lapack_int* rank, double tol,\n                                double* work );\n\nlapack_int LAPACKE_sptcon_work( lapack_int n, const float* d, const float* e,\n                                float anorm, float* rcond, float* work );\nlapack_int LAPACKE_dptcon_work( lapack_int n, const double* d, const double* e,\n                                double anorm, double* rcond, double* work );\nlapack_int LAPACKE_cptcon_work( lapack_int n, const float* d,\n                                const lapack_complex_float* e, float anorm,\n                                float* rcond, float* work );\nlapack_int LAPACKE_zptcon_work( lapack_int n, const double* d,\n                                const lapack_complex_double* e, double anorm,\n                                double* rcond, double* work );\n\nlapack_int LAPACKE_spteqr_work( int matrix_order, char compz, lapack_int n,\n                                float* d, float* e, float* z, lapack_int ldz,\n                                float* work );\nlapack_int LAPACKE_dpteqr_work( int matrix_order, char compz, lapack_int n,\n                                double* d, double* e, double* z, lapack_int ldz,\n                                double* work );\nlapack_int LAPACKE_cpteqr_work( int matrix_order, char compz, lapack_int n,\n                                float* d, float* e, lapack_complex_float* z,\n                                lapack_int ldz, float* work );\nlapack_int LAPACKE_zpteqr_work( int matrix_order, char compz, lapack_int n,\n                                double* d, double* e, lapack_complex_double* z,\n                                lapack_int ldz, double* work );\n\nlapack_int LAPACKE_sptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                                const float* d, const float* e, const float* df,\n                                const float* ef, const float* b, lapack_int ldb,\n                                float* x, lapack_int ldx, float* ferr,\n                                float* berr, float* work );\nlapack_int LAPACKE_dptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                                const double* d, const double* e,\n                                const double* df, const double* ef,\n                                const double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                double* work );\nlapack_int LAPACKE_cptrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* d,\n                                const lapack_complex_float* e, const float* df,\n                                const lapack_complex_float* ef,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zptrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* d,\n                                const lapack_complex_double* e,\n                                const double* df,\n                                const lapack_complex_double* ef,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               float* d, float* e, float* b, lapack_int ldb );\nlapack_int LAPACKE_dptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               double* d, double* e, double* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_cptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               float* d, lapack_complex_float* e,\n                               lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                               double* d, lapack_complex_double* e,\n                               lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sptsvx_work( int matrix_order, char fact, lapack_int n,\n                                lapack_int nrhs, const float* d, const float* e,\n                                float* df, float* ef, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                float* work );\nlapack_int LAPACKE_dptsvx_work( int matrix_order, char fact, lapack_int n,\n                                lapack_int nrhs, const double* d,\n                                const double* e, double* df, double* ef,\n                                const double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* rcond, double* ferr,\n                                double* berr, double* work );\nlapack_int LAPACKE_cptsvx_work( int matrix_order, char fact, lapack_int n,\n                                lapack_int nrhs, const float* d,\n                                const lapack_complex_float* e, float* df,\n                                lapack_complex_float* ef,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zptsvx_work( int matrix_order, char fact, lapack_int n,\n                                lapack_int nrhs, const double* d,\n                                const lapack_complex_double* e, double* df,\n                                lapack_complex_double* ef,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_spttrf_work( lapack_int n, float* d, float* e );\nlapack_int LAPACKE_dpttrf_work( lapack_int n, double* d, double* e );\nlapack_int LAPACKE_cpttrf_work( lapack_int n, float* d,\n                                lapack_complex_float* e );\nlapack_int LAPACKE_zpttrf_work( lapack_int n, double* d,\n                                lapack_complex_double* e );\n\nlapack_int LAPACKE_spttrs_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                                const float* d, const float* e, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dpttrs_work( int matrix_order, lapack_int n, lapack_int nrhs,\n                                const double* d, const double* e, double* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_cpttrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* d,\n                                const lapack_complex_float* e,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zpttrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* d,\n                                const lapack_complex_double* e,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_ssbev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int kd, float* ab,\n                               lapack_int ldab, float* w, float* z,\n                               lapack_int ldz, float* work );\nlapack_int LAPACKE_dsbev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int kd, double* ab,\n                               lapack_int ldab, double* w, double* z,\n                               lapack_int ldz, double* work );\n\nlapack_int LAPACKE_ssbevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int kd, float* ab,\n                                lapack_int ldab, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dsbevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int kd, double* ab,\n                                lapack_int ldab, double* w, double* z,\n                                lapack_int ldz, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_ssbevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int kd,\n                                float* ab, lapack_int ldab, float* q,\n                                lapack_int ldq, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int* iwork,\n                                lapack_int* ifail );\nlapack_int LAPACKE_dsbevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int kd,\n                                double* ab, lapack_int ldab, double* q,\n                                lapack_int ldq, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w, double* z,\n                                lapack_int ldz, double* work, lapack_int* iwork,\n                                lapack_int* ifail );\n\nlapack_int LAPACKE_ssbgst_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                float* ab, lapack_int ldab, const float* bb,\n                                lapack_int ldbb, float* x, lapack_int ldx,\n                                float* work );\nlapack_int LAPACKE_dsbgst_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                double* ab, lapack_int ldab, const double* bb,\n                                lapack_int ldbb, double* x, lapack_int ldx,\n                                double* work );\n\nlapack_int LAPACKE_ssbgv_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int ka, lapack_int kb,\n                               float* ab, lapack_int ldab, float* bb,\n                               lapack_int ldbb, float* w, float* z,\n                               lapack_int ldz, float* work );\nlapack_int LAPACKE_dsbgv_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, lapack_int ka, lapack_int kb,\n                               double* ab, lapack_int ldab, double* bb,\n                               lapack_int ldbb, double* w, double* z,\n                               lapack_int ldz, double* work );\n\nlapack_int LAPACKE_ssbgvd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                float* ab, lapack_int ldab, float* bb,\n                                lapack_int ldbb, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dsbgvd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, lapack_int ka, lapack_int kb,\n                                double* ab, lapack_int ldab, double* bb,\n                                lapack_int ldbb, double* w, double* z,\n                                lapack_int ldz, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_ssbgvx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int ka,\n                                lapack_int kb, float* ab, lapack_int ldab,\n                                float* bb, lapack_int ldbb, float* q,\n                                lapack_int ldq, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int* iwork,\n                                lapack_int* ifail );\nlapack_int LAPACKE_dsbgvx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, lapack_int ka,\n                                lapack_int kb, double* ab, lapack_int ldab,\n                                double* bb, lapack_int ldbb, double* q,\n                                lapack_int ldq, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w, double* z,\n                                lapack_int ldz, double* work, lapack_int* iwork,\n                                lapack_int* ifail );\n\nlapack_int LAPACKE_ssbtrd_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int kd, float* ab,\n                                lapack_int ldab, float* d, float* e, float* q,\n                                lapack_int ldq, float* work );\nlapack_int LAPACKE_dsbtrd_work( int matrix_order, char vect, char uplo,\n                                lapack_int n, lapack_int kd, double* ab,\n                                lapack_int ldab, double* d, double* e,\n                                double* q, lapack_int ldq, double* work );\n\nlapack_int LAPACKE_ssfrk_work( int matrix_order, char transr, char uplo,\n                               char trans, lapack_int n, lapack_int k,\n                               float alpha, const float* a, lapack_int lda,\n                               float beta, float* c );\nlapack_int LAPACKE_dsfrk_work( int matrix_order, char transr, char uplo,\n                               char trans, lapack_int n, lapack_int k,\n                               double alpha, const double* a, lapack_int lda,\n                               double beta, double* c );\n\nlapack_int LAPACKE_sspcon_work( int matrix_order, char uplo, lapack_int n,\n                                const float* ap, const lapack_int* ipiv,\n                                float anorm, float* rcond, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dspcon_work( int matrix_order, char uplo, lapack_int n,\n                                const double* ap, const lapack_int* ipiv,\n                                double anorm, double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_cspcon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* ap,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, lapack_complex_float* work );\nlapack_int LAPACKE_zspcon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* ap,\n                                const lapack_int* ipiv, double anorm,\n                                double* rcond, lapack_complex_double* work );\n\nlapack_int LAPACKE_sspev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, float* ap, float* w, float* z,\n                               lapack_int ldz, float* work );\nlapack_int LAPACKE_dspev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, double* ap, double* w, double* z,\n                               lapack_int ldz, double* work );\n\nlapack_int LAPACKE_sspevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, float* ap, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dspevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, double* ap, double* w, double* z,\n                                lapack_int ldz, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_sspevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, float* ap, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                float abstol, lapack_int* m, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int* iwork,\n                                lapack_int* ifail );\nlapack_int LAPACKE_dspevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, double* ap, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                double abstol, lapack_int* m, double* w,\n                                double* z, lapack_int ldz, double* work,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_sspgst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, float* ap, const float* bp );\nlapack_int LAPACKE_dspgst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, double* ap, const double* bp );\n\nlapack_int LAPACKE_sspgv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n, float* ap, float* bp,\n                               float* w, float* z, lapack_int ldz,\n                               float* work );\nlapack_int LAPACKE_dspgv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n, double* ap, double* bp,\n                               double* w, double* z, lapack_int ldz,\n                               double* work );\n\nlapack_int LAPACKE_sspgvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n, float* ap, float* bp,\n                                float* w, float* z, lapack_int ldz, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_dspgvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n, double* ap, double* bp,\n                                double* w, double* z, lapack_int ldz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_sspgvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n, float* ap,\n                                float* bp, float vl, float vu, lapack_int il,\n                                lapack_int iu, float abstol, lapack_int* m,\n                                float* w, float* z, lapack_int ldz, float* work,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_dspgvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n, double* ap,\n                                double* bp, double vl, double vu, lapack_int il,\n                                lapack_int iu, double abstol, lapack_int* m,\n                                double* w, double* z, lapack_int ldz,\n                                double* work, lapack_int* iwork,\n                                lapack_int* ifail );\n\nlapack_int LAPACKE_ssprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* ap,\n                                const float* afp, const lapack_int* ipiv,\n                                const float* b, lapack_int ldb, float* x,\n                                lapack_int ldx, float* ferr, float* berr,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dsprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* ap,\n                                const double* afp, const lapack_int* ipiv,\n                                const double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_csprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* ap,\n                                const lapack_complex_float* afp,\n                                const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zsprfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                const lapack_complex_double* afp,\n                                const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_sspsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, float* ap, lapack_int* ipiv,\n                               float* b, lapack_int ldb );\nlapack_int LAPACKE_dspsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, double* ap, lapack_int* ipiv,\n                               double* b, lapack_int ldb );\nlapack_int LAPACKE_cspsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_float* ap,\n                               lapack_int* ipiv, lapack_complex_float* b,\n                               lapack_int ldb );\nlapack_int LAPACKE_zspsv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_double* ap,\n                               lapack_int* ipiv, lapack_complex_double* b,\n                               lapack_int ldb );\n\nlapack_int LAPACKE_sspsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, const float* ap,\n                                float* afp, lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dspsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, const double* ap,\n                                double* afp, lapack_int* ipiv, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_cspsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* ap,\n                                lapack_complex_float* afp, lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zspsvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                lapack_complex_double* afp, lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_ssptrd_work( int matrix_order, char uplo, lapack_int n,\n                                float* ap, float* d, float* e, float* tau );\nlapack_int LAPACKE_dsptrd_work( int matrix_order, char uplo, lapack_int n,\n                                double* ap, double* d, double* e, double* tau );\n\nlapack_int LAPACKE_ssptrf_work( int matrix_order, char uplo, lapack_int n,\n                                float* ap, lapack_int* ipiv );\nlapack_int LAPACKE_dsptrf_work( int matrix_order, char uplo, lapack_int n,\n                                double* ap, lapack_int* ipiv );\nlapack_int LAPACKE_csptrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap, lapack_int* ipiv );\nlapack_int LAPACKE_zsptrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap, lapack_int* ipiv );\n\nlapack_int LAPACKE_ssptri_work( int matrix_order, char uplo, lapack_int n,\n                                float* ap, const lapack_int* ipiv,\n                                float* work );\nlapack_int LAPACKE_dsptri_work( int matrix_order, char uplo, lapack_int n,\n                                double* ap, const lapack_int* ipiv,\n                                double* work );\nlapack_int LAPACKE_csptri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* ap,\n                                const lapack_int* ipiv,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zsptri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* ap,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_ssptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* ap,\n                                const lapack_int* ipiv, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dsptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* ap,\n                                const lapack_int* ipiv, double* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_csptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* ap,\n                                const lapack_int* ipiv, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_zsptrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_sstebz_work( char range, char order, lapack_int n, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                float abstol, const float* d, const float* e,\n                                lapack_int* m, lapack_int* nsplit, float* w,\n                                lapack_int* iblock, lapack_int* isplit,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dstebz_work( char range, char order, lapack_int n, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                double abstol, const double* d, const double* e,\n                                lapack_int* m, lapack_int* nsplit, double* w,\n                                lapack_int* iblock, lapack_int* isplit,\n                                double* work, lapack_int* iwork );\n\nlapack_int LAPACKE_sstedc_work( int matrix_order, char compz, lapack_int n,\n                                float* d, float* e, float* z, lapack_int ldz,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dstedc_work( int matrix_order, char compz, lapack_int n,\n                                double* d, double* e, double* z, lapack_int ldz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_cstedc_work( int matrix_order, char compz, lapack_int n,\n                                float* d, float* e, lapack_complex_float* z,\n                                lapack_int ldz, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zstedc_work( int matrix_order, char compz, lapack_int n,\n                                double* d, double* e, lapack_complex_double* z,\n                                lapack_int ldz, lapack_complex_double* work,\n                                lapack_int lwork, double* rwork,\n                                lapack_int lrwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_sstegr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, float* d, float* e, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                float abstol, lapack_int* m, float* w, float* z,\n                                lapack_int ldz, lapack_int* isuppz, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_dstegr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, double* d, double* e, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                double abstol, lapack_int* m, double* w,\n                                double* z, lapack_int ldz, lapack_int* isuppz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_cstegr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, float* d, float* e, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                float abstol, lapack_int* m, float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_int* isuppz, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zstegr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, double* d, double* e, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                double abstol, lapack_int* m, double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_int* isuppz, double* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_sstein_work( int matrix_order, lapack_int n, const float* d,\n                                const float* e, lapack_int m, const float* w,\n                                const lapack_int* iblock,\n                                const lapack_int* isplit, float* z,\n                                lapack_int ldz, float* work, lapack_int* iwork,\n                                lapack_int* ifailv );\nlapack_int LAPACKE_dstein_work( int matrix_order, lapack_int n, const double* d,\n                                const double* e, lapack_int m, const double* w,\n                                const lapack_int* iblock,\n                                const lapack_int* isplit, double* z,\n                                lapack_int ldz, double* work, lapack_int* iwork,\n                                lapack_int* ifailv );\nlapack_int LAPACKE_cstein_work( int matrix_order, lapack_int n, const float* d,\n                                const float* e, lapack_int m, const float* w,\n                                const lapack_int* iblock,\n                                const lapack_int* isplit,\n                                lapack_complex_float* z, lapack_int ldz,\n                                float* work, lapack_int* iwork,\n                                lapack_int* ifailv );\nlapack_int LAPACKE_zstein_work( int matrix_order, lapack_int n, const double* d,\n                                const double* e, lapack_int m, const double* w,\n                                const lapack_int* iblock,\n                                const lapack_int* isplit,\n                                lapack_complex_double* z, lapack_int ldz,\n                                double* work, lapack_int* iwork,\n                                lapack_int* ifailv );\n\nlapack_int LAPACKE_sstemr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, float* d, float* e, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                lapack_int* m, float* w, float* z,\n                                lapack_int ldz, lapack_int nzc,\n                                lapack_int* isuppz, lapack_logical* tryrac,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dstemr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, double* d, double* e, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                lapack_int* m, double* w, double* z,\n                                lapack_int ldz, lapack_int nzc,\n                                lapack_int* isuppz, lapack_logical* tryrac,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_cstemr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, float* d, float* e, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                lapack_int* m, float* w,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_int nzc, lapack_int* isuppz,\n                                lapack_logical* tryrac, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_zstemr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, double* d, double* e, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                lapack_int* m, double* w,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_int nzc, lapack_int* isuppz,\n                                lapack_logical* tryrac, double* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_ssteqr_work( int matrix_order, char compz, lapack_int n,\n                                float* d, float* e, float* z, lapack_int ldz,\n                                float* work );\nlapack_int LAPACKE_dsteqr_work( int matrix_order, char compz, lapack_int n,\n                                double* d, double* e, double* z, lapack_int ldz,\n                                double* work );\nlapack_int LAPACKE_csteqr_work( int matrix_order, char compz, lapack_int n,\n                                float* d, float* e, lapack_complex_float* z,\n                                lapack_int ldz, float* work );\nlapack_int LAPACKE_zsteqr_work( int matrix_order, char compz, lapack_int n,\n                                double* d, double* e, lapack_complex_double* z,\n                                lapack_int ldz, double* work );\n\nlapack_int LAPACKE_ssterf_work( lapack_int n, float* d, float* e );\nlapack_int LAPACKE_dsterf_work( lapack_int n, double* d, double* e );\n\nlapack_int LAPACKE_sstev_work( int matrix_order, char jobz, lapack_int n,\n                               float* d, float* e, float* z, lapack_int ldz,\n                               float* work );\nlapack_int LAPACKE_dstev_work( int matrix_order, char jobz, lapack_int n,\n                               double* d, double* e, double* z, lapack_int ldz,\n                               double* work );\n\nlapack_int LAPACKE_sstevd_work( int matrix_order, char jobz, lapack_int n,\n                                float* d, float* e, float* z, lapack_int ldz,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dstevd_work( int matrix_order, char jobz, lapack_int n,\n                                double* d, double* e, double* z, lapack_int ldz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_sstevr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, float* d, float* e, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                float abstol, lapack_int* m, float* w, float* z,\n                                lapack_int ldz, lapack_int* isuppz, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_dstevr_work( int matrix_order, char jobz, char range,\n                                lapack_int n, double* d, double* e, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                double abstol, lapack_int* m, double* w,\n                                double* z, lapack_int ldz, lapack_int* isuppz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_sstevx_work( int matrix_order, char jobz, char range,\n                                lapack_int n, float* d, float* e, float vl,\n                                float vu, lapack_int il, lapack_int iu,\n                                float abstol, lapack_int* m, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int* iwork,\n                                lapack_int* ifail );\nlapack_int LAPACKE_dstevx_work( int matrix_order, char jobz, char range,\n                                lapack_int n, double* d, double* e, double vl,\n                                double vu, lapack_int il, lapack_int iu,\n                                double abstol, lapack_int* m, double* w,\n                                double* z, lapack_int ldz, double* work,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_ssycon_work( int matrix_order, char uplo, lapack_int n,\n                                const float* a, lapack_int lda,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dsycon_work( int matrix_order, char uplo, lapack_int n,\n                                const double* a, lapack_int lda,\n                                const lapack_int* ipiv, double anorm,\n                                double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_csycon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_int* ipiv, float anorm,\n                                float* rcond, lapack_complex_float* work );\nlapack_int LAPACKE_zsycon_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_int* ipiv, double anorm,\n                                double* rcond, lapack_complex_double* work );\n\nlapack_int LAPACKE_ssyequb_work( int matrix_order, char uplo, lapack_int n,\n                                 const float* a, lapack_int lda, float* s,\n                                 float* scond, float* amax, float* work );\nlapack_int LAPACKE_dsyequb_work( int matrix_order, char uplo, lapack_int n,\n                                 const double* a, lapack_int lda, double* s,\n                                 double* scond, double* amax, double* work );\nlapack_int LAPACKE_csyequb_work( int matrix_order, char uplo, lapack_int n,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 float* s, float* scond, float* amax,\n                                 lapack_complex_float* work );\nlapack_int LAPACKE_zsyequb_work( int matrix_order, char uplo, lapack_int n,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 double* s, double* scond, double* amax,\n                                 lapack_complex_double* work );\n\nlapack_int LAPACKE_ssyev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, float* a, lapack_int lda, float* w,\n                               float* work, lapack_int lwork );\nlapack_int LAPACKE_dsyev_work( int matrix_order, char jobz, char uplo,\n                               lapack_int n, double* a, lapack_int lda,\n                               double* w, double* work, lapack_int lwork );\n\nlapack_int LAPACKE_ssyevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, float* a, lapack_int lda,\n                                float* w, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dsyevd_work( int matrix_order, char jobz, char uplo,\n                                lapack_int n, double* a, lapack_int lda,\n                                double* w, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_ssyevr_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, float* a,\n                                lapack_int lda, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w, float* z,\n                                lapack_int ldz, lapack_int* isuppz, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_dsyevr_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, double* a,\n                                lapack_int lda, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w, double* z,\n                                lapack_int ldz, lapack_int* isuppz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_ssyevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, float* a,\n                                lapack_int lda, float vl, float vu,\n                                lapack_int il, lapack_int iu, float abstol,\n                                lapack_int* m, float* w, float* z,\n                                lapack_int ldz, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int* ifail );\nlapack_int LAPACKE_dsyevx_work( int matrix_order, char jobz, char range,\n                                char uplo, lapack_int n, double* a,\n                                lapack_int lda, double vl, double vu,\n                                lapack_int il, lapack_int iu, double abstol,\n                                lapack_int* m, double* w, double* z,\n                                lapack_int ldz, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_ssygst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, float* a, lapack_int lda,\n                                const float* b, lapack_int ldb );\nlapack_int LAPACKE_dsygst_work( int matrix_order, lapack_int itype, char uplo,\n                                lapack_int n, double* a, lapack_int lda,\n                                const double* b, lapack_int ldb );\n\nlapack_int LAPACKE_ssygv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n, float* a,\n                               lapack_int lda, float* b, lapack_int ldb,\n                               float* w, float* work, lapack_int lwork );\nlapack_int LAPACKE_dsygv_work( int matrix_order, lapack_int itype, char jobz,\n                               char uplo, lapack_int n, double* a,\n                               lapack_int lda, double* b, lapack_int ldb,\n                               double* w, double* work, lapack_int lwork );\n\nlapack_int LAPACKE_ssygvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n, float* a,\n                                lapack_int lda, float* b, lapack_int ldb,\n                                float* w, float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dsygvd_work( int matrix_order, lapack_int itype, char jobz,\n                                char uplo, lapack_int n, double* a,\n                                lapack_int lda, double* b, lapack_int ldb,\n                                double* w, double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\n\nlapack_int LAPACKE_ssygvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n, float* a,\n                                lapack_int lda, float* b, lapack_int ldb,\n                                float vl, float vu, lapack_int il,\n                                lapack_int iu, float abstol, lapack_int* m,\n                                float* w, float* z, lapack_int ldz, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int* ifail );\nlapack_int LAPACKE_dsygvx_work( int matrix_order, lapack_int itype, char jobz,\n                                char range, char uplo, lapack_int n, double* a,\n                                lapack_int lda, double* b, lapack_int ldb,\n                                double vl, double vu, lapack_int il,\n                                lapack_int iu, double abstol, lapack_int* m,\n                                double* w, double* z, lapack_int ldz,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int* ifail );\n\nlapack_int LAPACKE_ssyrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* a, lapack_int lda,\n                                const float* af, lapack_int ldaf,\n                                const lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dsyrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* a,\n                                lapack_int lda, const double* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const double* b, lapack_int ldb, double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                double* work, lapack_int* iwork );\nlapack_int LAPACKE_csyrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_zsyrfs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_complex_double* af,\n                                lapack_int ldaf, const lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_ssyrfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs, const float* a,\n                                 lapack_int lda, const float* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const float* s, const float* b, lapack_int ldb,\n                                 float* x, lapack_int ldx, float* rcond,\n                                 float* berr, lapack_int n_err_bnds,\n                                 float* err_bnds_norm, float* err_bnds_comp,\n                                 lapack_int nparams, float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dsyrfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs, const double* a,\n                                 lapack_int lda, const double* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const double* s, const double* b,\n                                 lapack_int ldb, double* x, lapack_int ldx,\n                                 double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, double* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_csyrfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_float* a, lapack_int lda,\n                                 const lapack_complex_float* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const float* s, const lapack_complex_float* b,\n                                 lapack_int ldb, lapack_complex_float* x,\n                                 lapack_int ldx, float* rcond, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zsyrfsx_work( int matrix_order, char uplo, char equed,\n                                 lapack_int n, lapack_int nrhs,\n                                 const lapack_complex_double* a, lapack_int lda,\n                                 const lapack_complex_double* af,\n                                 lapack_int ldaf, const lapack_int* ipiv,\n                                 const double* s,\n                                 const lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_ssysv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, float* a, lapack_int lda,\n                               lapack_int* ipiv, float* b, lapack_int ldb,\n                               float* work, lapack_int lwork );\nlapack_int LAPACKE_dsysv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, double* a, lapack_int lda,\n                               lapack_int* ipiv, double* b, lapack_int ldb,\n                               double* work, lapack_int lwork );\nlapack_int LAPACKE_csysv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_float* a,\n                               lapack_int lda, lapack_int* ipiv,\n                               lapack_complex_float* b, lapack_int ldb,\n                               lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zsysv_work( int matrix_order, char uplo, lapack_int n,\n                               lapack_int nrhs, lapack_complex_double* a,\n                               lapack_int lda, lapack_int* ipiv,\n                               lapack_complex_double* b, lapack_int ldb,\n                               lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_ssysvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, const float* a,\n                                lapack_int lda, float* af, lapack_int ldaf,\n                                lapack_int* ipiv, const float* b,\n                                lapack_int ldb, float* x, lapack_int ldx,\n                                float* rcond, float* ferr, float* berr,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dsysvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs, const double* a,\n                                lapack_int lda, double* af, lapack_int ldaf,\n                                lapack_int* ipiv, const double* b,\n                                lapack_int ldb, double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_csysvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* af, lapack_int ldaf,\n                                lapack_int* ipiv, const lapack_complex_float* b,\n                                lapack_int ldb, lapack_complex_float* x,\n                                lapack_int ldx, float* rcond, float* ferr,\n                                float* berr, lapack_complex_float* work,\n                                lapack_int lwork, float* rwork );\nlapack_int LAPACKE_zsysvx_work( int matrix_order, char fact, char uplo,\n                                lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* af, lapack_int ldaf,\n                                lapack_int* ipiv,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* x, lapack_int ldx,\n                                double* rcond, double* ferr, double* berr,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork );\n\nlapack_int LAPACKE_ssysvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs, float* a,\n                                 lapack_int lda, float* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, float* s,\n                                 float* b, lapack_int ldb, float* x,\n                                 lapack_int ldx, float* rcond, float* rpvgrw,\n                                 float* berr, lapack_int n_err_bnds,\n                                 float* err_bnds_norm, float* err_bnds_comp,\n                                 lapack_int nparams, float* params, float* work,\n                                 lapack_int* iwork );\nlapack_int LAPACKE_dsysvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs, double* a,\n                                 lapack_int lda, double* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, double* s,\n                                 double* b, lapack_int ldb, double* x,\n                                 lapack_int ldx, double* rcond, double* rpvgrw,\n                                 double* berr, lapack_int n_err_bnds,\n                                 double* err_bnds_norm, double* err_bnds_comp,\n                                 lapack_int nparams, double* params,\n                                 double* work, lapack_int* iwork );\nlapack_int LAPACKE_csysvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, float* s,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* x, lapack_int ldx,\n                                 float* rcond, float* rpvgrw, float* berr,\n                                 lapack_int n_err_bnds, float* err_bnds_norm,\n                                 float* err_bnds_comp, lapack_int nparams,\n                                 float* params, lapack_complex_float* work,\n                                 float* rwork );\nlapack_int LAPACKE_zsysvxx_work( int matrix_order, char fact, char uplo,\n                                 lapack_int n, lapack_int nrhs,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* af, lapack_int ldaf,\n                                 lapack_int* ipiv, char* equed, double* s,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* x, lapack_int ldx,\n                                 double* rcond, double* rpvgrw, double* berr,\n                                 lapack_int n_err_bnds, double* err_bnds_norm,\n                                 double* err_bnds_comp, lapack_int nparams,\n                                 double* params, lapack_complex_double* work,\n                                 double* rwork );\n\nlapack_int LAPACKE_ssytrd_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda, float* d, float* e,\n                                float* tau, float* work, lapack_int lwork );\nlapack_int LAPACKE_dsytrd_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda, double* d, double* e,\n                                double* tau, double* work, lapack_int lwork );\n\nlapack_int LAPACKE_ssytrf_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda, lapack_int* ipiv,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dsytrf_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda, lapack_int* ipiv,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_csytrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_int* ipiv, lapack_complex_float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_zsytrf_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_int* ipiv, lapack_complex_double* work,\n                                lapack_int lwork );\n\nlapack_int LAPACKE_ssytri_work( int matrix_order, char uplo, lapack_int n,\n                                float* a, lapack_int lda,\n                                const lapack_int* ipiv, float* work );\nlapack_int LAPACKE_dsytri_work( int matrix_order, char uplo, lapack_int n,\n                                double* a, lapack_int lda,\n                                const lapack_int* ipiv, double* work );\nlapack_int LAPACKE_csytri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                const lapack_int* ipiv,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zsytri_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                const lapack_int* ipiv,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_ssytrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const float* a, lapack_int lda,\n                                const lapack_int* ipiv, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dsytrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const double* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                double* b, lapack_int ldb );\nlapack_int LAPACKE_csytrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_zsytrs_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_int nrhs, const lapack_complex_double* a,\n                                lapack_int lda, const lapack_int* ipiv,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stbcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, lapack_int kd,\n                                const float* ab, lapack_int ldab, float* rcond,\n                                float* work, lapack_int* iwork );\nlapack_int LAPACKE_dtbcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, lapack_int kd,\n                                const double* ab, lapack_int ldab,\n                                double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctbcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, lapack_int kd,\n                                const lapack_complex_float* ab, lapack_int ldab,\n                                float* rcond, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_ztbcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, lapack_int kd,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, double* rcond,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_stbrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs, const float* ab,\n                                lapack_int ldab, const float* b, lapack_int ldb,\n                                const float* x, lapack_int ldx, float* ferr,\n                                float* berr, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dtbrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs, const double* ab,\n                                lapack_int ldab, const double* b,\n                                lapack_int ldb, const double* x, lapack_int ldx,\n                                double* ferr, double* berr, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctbrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs, const lapack_complex_float* ab,\n                                lapack_int ldab, const lapack_complex_float* b,\n                                lapack_int ldb, const lapack_complex_float* x,\n                                lapack_int ldx, float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_ztbrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, const lapack_complex_double* b,\n                                lapack_int ldb, const lapack_complex_double* x,\n                                lapack_int ldx, double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_stbtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs, const float* ab,\n                                lapack_int ldab, float* b, lapack_int ldb );\nlapack_int LAPACKE_dtbtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs, const double* ab,\n                                lapack_int ldab, double* b, lapack_int ldb );\nlapack_int LAPACKE_ctbtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs, const lapack_complex_float* ab,\n                                lapack_int ldab, lapack_complex_float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_ztbtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int kd,\n                                lapack_int nrhs,\n                                const lapack_complex_double* ab,\n                                lapack_int ldab, lapack_complex_double* b,\n                                lapack_int ldb );\n\nlapack_int LAPACKE_stfsm_work( int matrix_order, char transr, char side,\n                               char uplo, char trans, char diag, lapack_int m,\n                               lapack_int n, float alpha, const float* a,\n                               float* b, lapack_int ldb );\nlapack_int LAPACKE_dtfsm_work( int matrix_order, char transr, char side,\n                               char uplo, char trans, char diag, lapack_int m,\n                               lapack_int n, double alpha, const double* a,\n                               double* b, lapack_int ldb );\nlapack_int LAPACKE_ctfsm_work( int matrix_order, char transr, char side,\n                               char uplo, char trans, char diag, lapack_int m,\n                               lapack_int n, lapack_complex_float alpha,\n                               const lapack_complex_float* a,\n                               lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztfsm_work( int matrix_order, char transr, char side,\n                               char uplo, char trans, char diag, lapack_int m,\n                               lapack_int n, lapack_complex_double alpha,\n                               const lapack_complex_double* a,\n                               lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stftri_work( int matrix_order, char transr, char uplo,\n                                char diag, lapack_int n, float* a );\nlapack_int LAPACKE_dtftri_work( int matrix_order, char transr, char uplo,\n                                char diag, lapack_int n, double* a );\nlapack_int LAPACKE_ctftri_work( int matrix_order, char transr, char uplo,\n                                char diag, lapack_int n,\n                                lapack_complex_float* a );\nlapack_int LAPACKE_ztftri_work( int matrix_order, char transr, char uplo,\n                                char diag, lapack_int n,\n                                lapack_complex_double* a );\n\nlapack_int LAPACKE_stfttp_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const float* arf, float* ap );\nlapack_int LAPACKE_dtfttp_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const double* arf, double* ap );\nlapack_int LAPACKE_ctfttp_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_float* arf,\n                                lapack_complex_float* ap );\nlapack_int LAPACKE_ztfttp_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_double* arf,\n                                lapack_complex_double* ap );\n\nlapack_int LAPACKE_stfttr_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const float* arf, float* a,\n                                lapack_int lda );\nlapack_int LAPACKE_dtfttr_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const double* arf, double* a,\n                                lapack_int lda );\nlapack_int LAPACKE_ctfttr_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_float* arf,\n                                lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_ztfttr_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_double* arf,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_stgevc_work( int matrix_order, char side, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const float* s, lapack_int lds, const float* p,\n                                lapack_int ldp, float* vl, lapack_int ldvl,\n                                float* vr, lapack_int ldvr, lapack_int mm,\n                                lapack_int* m, float* work );\nlapack_int LAPACKE_dtgevc_work( int matrix_order, char side, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const double* s, lapack_int lds,\n                                const double* p, lapack_int ldp, double* vl,\n                                lapack_int ldvl, double* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m, double* work );\nlapack_int LAPACKE_ctgevc_work( int matrix_order, char side, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const lapack_complex_float* s, lapack_int lds,\n                                const lapack_complex_float* p, lapack_int ldp,\n                                lapack_complex_float* vl, lapack_int ldvl,\n                                lapack_complex_float* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_ztgevc_work( int matrix_order, char side, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const lapack_complex_double* s, lapack_int lds,\n                                const lapack_complex_double* p, lapack_int ldp,\n                                lapack_complex_double* vl, lapack_int ldvl,\n                                lapack_complex_double* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_stgexc_work( int matrix_order, lapack_logical wantq,\n                                lapack_logical wantz, lapack_int n, float* a,\n                                lapack_int lda, float* b, lapack_int ldb,\n                                float* q, lapack_int ldq, float* z,\n                                lapack_int ldz, lapack_int* ifst,\n                                lapack_int* ilst, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dtgexc_work( int matrix_order, lapack_logical wantq,\n                                lapack_logical wantz, lapack_int n, double* a,\n                                lapack_int lda, double* b, lapack_int ldb,\n                                double* q, lapack_int ldq, double* z,\n                                lapack_int ldz, lapack_int* ifst,\n                                lapack_int* ilst, double* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_ctgexc_work( int matrix_order, lapack_logical wantq,\n                                lapack_logical wantz, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_int ifst, lapack_int ilst );\nlapack_int LAPACKE_ztgexc_work( int matrix_order, lapack_logical wantq,\n                                lapack_logical wantz, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_int ifst, lapack_int ilst );\n\nlapack_int LAPACKE_stgsen_work( int matrix_order, lapack_int ijob,\n                                lapack_logical wantq, lapack_logical wantz,\n                                const lapack_logical* select, lapack_int n,\n                                float* a, lapack_int lda, float* b,\n                                lapack_int ldb, float* alphar, float* alphai,\n                                float* beta, float* q, lapack_int ldq, float* z,\n                                lapack_int ldz, lapack_int* m, float* pl,\n                                float* pr, float* dif, float* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\nlapack_int LAPACKE_dtgsen_work( int matrix_order, lapack_int ijob,\n                                lapack_logical wantq, lapack_logical wantz,\n                                const lapack_logical* select, lapack_int n,\n                                double* a, lapack_int lda, double* b,\n                                lapack_int ldb, double* alphar, double* alphai,\n                                double* beta, double* q, lapack_int ldq,\n                                double* z, lapack_int ldz, lapack_int* m,\n                                double* pl, double* pr, double* dif,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_ctgsen_work( int matrix_order, lapack_int ijob,\n                                lapack_logical wantq, lapack_logical wantz,\n                                const lapack_logical* select, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* alpha,\n                                lapack_complex_float* beta,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* z, lapack_int ldz,\n                                lapack_int* m, float* pl, float* pr, float* dif,\n                                lapack_complex_float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_ztgsen_work( int matrix_order, lapack_int ijob,\n                                lapack_logical wantq, lapack_logical wantz,\n                                const lapack_logical* select, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* alpha,\n                                lapack_complex_double* beta,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* z, lapack_int ldz,\n                                lapack_int* m, double* pl, double* pr,\n                                double* dif, lapack_complex_double* work,\n                                lapack_int lwork, lapack_int* iwork,\n                                lapack_int liwork );\n\nlapack_int LAPACKE_stgsja_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                float* a, lapack_int lda, float* b,\n                                lapack_int ldb, float tola, float tolb,\n                                float* alpha, float* beta, float* u,\n                                lapack_int ldu, float* v, lapack_int ldv,\n                                float* q, lapack_int ldq, float* work,\n                                lapack_int* ncycle );\nlapack_int LAPACKE_dtgsja_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                double* a, lapack_int lda, double* b,\n                                lapack_int ldb, double tola, double tolb,\n                                double* alpha, double* beta, double* u,\n                                lapack_int ldu, double* v, lapack_int ldv,\n                                double* q, lapack_int ldq, double* work,\n                                lapack_int* ncycle );\nlapack_int LAPACKE_ctgsja_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                float tola, float tolb, float* alpha,\n                                float* beta, lapack_complex_float* u,\n                                lapack_int ldu, lapack_complex_float* v,\n                                lapack_int ldv, lapack_complex_float* q,\n                                lapack_int ldq, lapack_complex_float* work,\n                                lapack_int* ncycle );\nlapack_int LAPACKE_ztgsja_work( int matrix_order, char jobu, char jobv,\n                                char jobq, lapack_int m, lapack_int p,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                double tola, double tolb, double* alpha,\n                                double* beta, lapack_complex_double* u,\n                                lapack_int ldu, lapack_complex_double* v,\n                                lapack_int ldv, lapack_complex_double* q,\n                                lapack_int ldq, lapack_complex_double* work,\n                                lapack_int* ncycle );\n\nlapack_int LAPACKE_stgsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const float* a, lapack_int lda, const float* b,\n                                lapack_int ldb, const float* vl,\n                                lapack_int ldvl, const float* vr,\n                                lapack_int ldvr, float* s, float* dif,\n                                lapack_int mm, lapack_int* m, float* work,\n                                lapack_int lwork, lapack_int* iwork );\nlapack_int LAPACKE_dtgsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const double* a, lapack_int lda,\n                                const double* b, lapack_int ldb,\n                                const double* vl, lapack_int ldvl,\n                                const double* vr, lapack_int ldvr, double* s,\n                                double* dif, lapack_int mm, lapack_int* m,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctgsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                const lapack_complex_float* vl, lapack_int ldvl,\n                                const lapack_complex_float* vr, lapack_int ldvr,\n                                float* s, float* dif, lapack_int mm,\n                                lapack_int* m, lapack_complex_float* work,\n                                lapack_int lwork, lapack_int* iwork );\nlapack_int LAPACKE_ztgsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                const lapack_complex_double* vl,\n                                lapack_int ldvl,\n                                const lapack_complex_double* vr,\n                                lapack_int ldvr, double* s, double* dif,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_double* work, lapack_int lwork,\n                                lapack_int* iwork );\n\nlapack_int LAPACKE_stgsyl_work( int matrix_order, char trans, lapack_int ijob,\n                                lapack_int m, lapack_int n, const float* a,\n                                lapack_int lda, const float* b, lapack_int ldb,\n                                float* c, lapack_int ldc, const float* d,\n                                lapack_int ldd, const float* e, lapack_int lde,\n                                float* f, lapack_int ldf, float* scale,\n                                float* dif, float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dtgsyl_work( int matrix_order, char trans, lapack_int ijob,\n                                lapack_int m, lapack_int n, const double* a,\n                                lapack_int lda, const double* b, lapack_int ldb,\n                                double* c, lapack_int ldc, const double* d,\n                                lapack_int ldd, const double* e, lapack_int lde,\n                                double* f, lapack_int ldf, double* scale,\n                                double* dif, double* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctgsyl_work( int matrix_order, char trans, lapack_int ijob,\n                                lapack_int m, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* c, lapack_int ldc,\n                                const lapack_complex_float* d, lapack_int ldd,\n                                const lapack_complex_float* e, lapack_int lde,\n                                lapack_complex_float* f, lapack_int ldf,\n                                float* scale, float* dif,\n                                lapack_complex_float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ztgsyl_work( int matrix_order, char trans, lapack_int ijob,\n                                lapack_int m, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* c, lapack_int ldc,\n                                const lapack_complex_double* d, lapack_int ldd,\n                                const lapack_complex_double* e, lapack_int lde,\n                                lapack_complex_double* f, lapack_int ldf,\n                                double* scale, double* dif,\n                                lapack_complex_double* work, lapack_int lwork,\n                                lapack_int* iwork );\n\nlapack_int LAPACKE_stpcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, const float* ap,\n                                float* rcond, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dtpcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, const double* ap,\n                                double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctpcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n,\n                                const lapack_complex_float* ap, float* rcond,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_ztpcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n,\n                                const lapack_complex_double* ap, double* rcond,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_stprfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const float* ap, const float* b, lapack_int ldb,\n                                const float* x, lapack_int ldx, float* ferr,\n                                float* berr, float* work, lapack_int* iwork );\nlapack_int LAPACKE_dtprfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const double* ap, const double* b,\n                                lapack_int ldb, const double* x, lapack_int ldx,\n                                double* ferr, double* berr, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctprfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* ap,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                const lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_ztprfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                const lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_stptri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, float* ap );\nlapack_int LAPACKE_dtptri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, double* ap );\nlapack_int LAPACKE_ctptri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, lapack_complex_float* ap );\nlapack_int LAPACKE_ztptri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, lapack_complex_double* ap );\n\nlapack_int LAPACKE_stptrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const float* ap, float* b, lapack_int ldb );\nlapack_int LAPACKE_dtptrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const double* ap, double* b, lapack_int ldb );\nlapack_int LAPACKE_ctptrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* ap,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztptrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* ap,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_stpttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const float* ap, float* arf );\nlapack_int LAPACKE_dtpttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const double* ap, double* arf );\nlapack_int LAPACKE_ctpttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_float* ap,\n                                lapack_complex_float* arf );\nlapack_int LAPACKE_ztpttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_double* ap,\n                                lapack_complex_double* arf );\n\nlapack_int LAPACKE_stpttr_work( int matrix_order, char uplo, lapack_int n,\n                                const float* ap, float* a, lapack_int lda );\nlapack_int LAPACKE_dtpttr_work( int matrix_order, char uplo, lapack_int n,\n                                const double* ap, double* a, lapack_int lda );\nlapack_int LAPACKE_ctpttr_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* ap,\n                                lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_ztpttr_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* ap,\n                                lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_strcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, const float* a,\n                                lapack_int lda, float* rcond, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dtrcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n, const double* a,\n                                lapack_int lda, double* rcond, double* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctrcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                float* rcond, lapack_complex_float* work,\n                                float* rwork );\nlapack_int LAPACKE_ztrcon_work( int matrix_order, char norm, char uplo,\n                                char diag, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                double* rcond, lapack_complex_double* work,\n                                double* rwork );\n\nlapack_int LAPACKE_strevc_work( int matrix_order, char side, char howmny,\n                                lapack_logical* select, lapack_int n,\n                                const float* t, lapack_int ldt, float* vl,\n                                lapack_int ldvl, float* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m, float* work );\nlapack_int LAPACKE_dtrevc_work( int matrix_order, char side, char howmny,\n                                lapack_logical* select, lapack_int n,\n                                const double* t, lapack_int ldt, double* vl,\n                                lapack_int ldvl, double* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m, double* work );\nlapack_int LAPACKE_ctrevc_work( int matrix_order, char side, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                lapack_complex_float* t, lapack_int ldt,\n                                lapack_complex_float* vl, lapack_int ldvl,\n                                lapack_complex_float* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_ztrevc_work( int matrix_order, char side, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                lapack_complex_double* t, lapack_int ldt,\n                                lapack_complex_double* vl, lapack_int ldvl,\n                                lapack_complex_double* vr, lapack_int ldvr,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_strexc_work( int matrix_order, char compq, lapack_int n,\n                                float* t, lapack_int ldt, float* q,\n                                lapack_int ldq, lapack_int* ifst,\n                                lapack_int* ilst, float* work );\nlapack_int LAPACKE_dtrexc_work( int matrix_order, char compq, lapack_int n,\n                                double* t, lapack_int ldt, double* q,\n                                lapack_int ldq, lapack_int* ifst,\n                                lapack_int* ilst, double* work );\nlapack_int LAPACKE_ctrexc_work( int matrix_order, char compq, lapack_int n,\n                                lapack_complex_float* t, lapack_int ldt,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_int ifst, lapack_int ilst );\nlapack_int LAPACKE_ztrexc_work( int matrix_order, char compq, lapack_int n,\n                                lapack_complex_double* t, lapack_int ldt,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_int ifst, lapack_int ilst );\n\nlapack_int LAPACKE_strrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const float* a, lapack_int lda, const float* b,\n                                lapack_int ldb, const float* x, lapack_int ldx,\n                                float* ferr, float* berr, float* work,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dtrrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const double* a, lapack_int lda,\n                                const double* b, lapack_int ldb,\n                                const double* x, lapack_int ldx, double* ferr,\n                                double* berr, double* work, lapack_int* iwork );\nlapack_int LAPACKE_ctrrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                const lapack_complex_float* x, lapack_int ldx,\n                                float* ferr, float* berr,\n                                lapack_complex_float* work, float* rwork );\nlapack_int LAPACKE_ztrrfs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                const lapack_complex_double* x, lapack_int ldx,\n                                double* ferr, double* berr,\n                                lapack_complex_double* work, double* rwork );\n\nlapack_int LAPACKE_strsen_work( int matrix_order, char job, char compq,\n                                const lapack_logical* select, lapack_int n,\n                                float* t, lapack_int ldt, float* q,\n                                lapack_int ldq, float* wr, float* wi,\n                                lapack_int* m, float* s, float* sep,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_dtrsen_work( int matrix_order, char job, char compq,\n                                const lapack_logical* select, lapack_int n,\n                                double* t, lapack_int ldt, double* q,\n                                lapack_int ldq, double* wr, double* wi,\n                                lapack_int* m, double* s, double* sep,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork, lapack_int liwork );\nlapack_int LAPACKE_ctrsen_work( int matrix_order, char job, char compq,\n                                const lapack_logical* select, lapack_int n,\n                                lapack_complex_float* t, lapack_int ldt,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* w, lapack_int* m,\n                                float* s, float* sep,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_ztrsen_work( int matrix_order, char job, char compq,\n                                const lapack_logical* select, lapack_int n,\n                                lapack_complex_double* t, lapack_int ldt,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* w, lapack_int* m,\n                                double* s, double* sep,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_strsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const float* t, lapack_int ldt, const float* vl,\n                                lapack_int ldvl, const float* vr,\n                                lapack_int ldvr, float* s, float* sep,\n                                lapack_int mm, lapack_int* m, float* work,\n                                lapack_int ldwork, lapack_int* iwork );\nlapack_int LAPACKE_dtrsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const double* t, lapack_int ldt,\n                                const double* vl, lapack_int ldvl,\n                                const double* vr, lapack_int ldvr, double* s,\n                                double* sep, lapack_int mm, lapack_int* m,\n                                double* work, lapack_int ldwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ctrsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const lapack_complex_float* t, lapack_int ldt,\n                                const lapack_complex_float* vl, lapack_int ldvl,\n                                const lapack_complex_float* vr, lapack_int ldvr,\n                                float* s, float* sep, lapack_int mm,\n                                lapack_int* m, lapack_complex_float* work,\n                                lapack_int ldwork, float* rwork );\nlapack_int LAPACKE_ztrsna_work( int matrix_order, char job, char howmny,\n                                const lapack_logical* select, lapack_int n,\n                                const lapack_complex_double* t, lapack_int ldt,\n                                const lapack_complex_double* vl,\n                                lapack_int ldvl,\n                                const lapack_complex_double* vr,\n                                lapack_int ldvr, double* s, double* sep,\n                                lapack_int mm, lapack_int* m,\n                                lapack_complex_double* work, lapack_int ldwork,\n                                double* rwork );\n\nlapack_int LAPACKE_strsyl_work( int matrix_order, char trana, char tranb,\n                                lapack_int isgn, lapack_int m, lapack_int n,\n                                const float* a, lapack_int lda, const float* b,\n                                lapack_int ldb, float* c, lapack_int ldc,\n                                float* scale );\nlapack_int LAPACKE_dtrsyl_work( int matrix_order, char trana, char tranb,\n                                lapack_int isgn, lapack_int m, lapack_int n,\n                                const double* a, lapack_int lda,\n                                const double* b, lapack_int ldb, double* c,\n                                lapack_int ldc, double* scale );\nlapack_int LAPACKE_ctrsyl_work( int matrix_order, char trana, char tranb,\n                                lapack_int isgn, lapack_int m, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* b, lapack_int ldb,\n                                lapack_complex_float* c, lapack_int ldc,\n                                float* scale );\nlapack_int LAPACKE_ztrsyl_work( int matrix_order, char trana, char tranb,\n                                lapack_int isgn, lapack_int m, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* c, lapack_int ldc,\n                                double* scale );\n\nlapack_int LAPACKE_strtri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, float* a, lapack_int lda );\nlapack_int LAPACKE_dtrtri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, double* a, lapack_int lda );\nlapack_int LAPACKE_ctrtri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, lapack_complex_float* a,\n                                lapack_int lda );\nlapack_int LAPACKE_ztrtri_work( int matrix_order, char uplo, char diag,\n                                lapack_int n, lapack_complex_double* a,\n                                lapack_int lda );\n\nlapack_int LAPACKE_strtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const float* a, lapack_int lda, float* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_dtrtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const double* a, lapack_int lda, double* b,\n                                lapack_int ldb );\nlapack_int LAPACKE_ctrtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztrtrs_work( int matrix_order, char uplo, char trans,\n                                char diag, lapack_int n, lapack_int nrhs,\n                                const lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_strttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const float* a, lapack_int lda,\n                                float* arf );\nlapack_int LAPACKE_dtrttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const double* a, lapack_int lda,\n                                double* arf );\nlapack_int LAPACKE_ctrttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* arf );\nlapack_int LAPACKE_ztrttf_work( int matrix_order, char transr, char uplo,\n                                lapack_int n, const lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* arf );\n\nlapack_int LAPACKE_strttp_work( int matrix_order, char uplo, lapack_int n,\n                                const float* a, lapack_int lda, float* ap );\nlapack_int LAPACKE_dtrttp_work( int matrix_order, char uplo, lapack_int n,\n                                const double* a, lapack_int lda, double* ap );\nlapack_int LAPACKE_ctrttp_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* ap );\nlapack_int LAPACKE_ztrttp_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* ap );\n\nlapack_int LAPACKE_stzrzf_work( int matrix_order, lapack_int m, lapack_int n,\n                                float* a, lapack_int lda, float* tau,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_dtzrzf_work( int matrix_order, lapack_int m, lapack_int n,\n                                double* a, lapack_int lda, double* tau,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_ctzrzf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_ztzrzf_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cungbr_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int k,\n                                lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zungbr_work( int matrix_order, char vect, lapack_int m,\n                                lapack_int n, lapack_int k,\n                                lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunghr_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunghr_work( int matrix_order, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunglq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunglq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cungql_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zungql_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cungqr_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zungqr_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cungrq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zungrq_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int k, lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cungtr_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zungtr_work( int matrix_order, char uplo, lapack_int n,\n                                lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmbr_work( int matrix_order, char vect, char side,\n                                char trans, lapack_int m, lapack_int n,\n                                lapack_int k, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmbr_work( int matrix_order, char vect, char side,\n                                char trans, lapack_int m, lapack_int n,\n                                lapack_int k, const lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmhr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmhr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int ilo,\n                                lapack_int ihi, const lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmql_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmql_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmqr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmqr_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmrq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmrq_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmrz_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                lapack_int l, const lapack_complex_float* a,\n                                lapack_int lda, const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmrz_work( int matrix_order, char side, char trans,\n                                lapack_int m, lapack_int n, lapack_int k,\n                                lapack_int l, const lapack_complex_double* a,\n                                lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cunmtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const lapack_complex_float* a, lapack_int lda,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_zunmtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const lapack_complex_double* a, lapack_int lda,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work, lapack_int lwork );\n\nlapack_int LAPACKE_cupgtr_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_float* ap,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* q, lapack_int ldq,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zupgtr_work( int matrix_order, char uplo, lapack_int n,\n                                const lapack_complex_double* ap,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* q, lapack_int ldq,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_cupmtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const lapack_complex_float* ap,\n                                const lapack_complex_float* tau,\n                                lapack_complex_float* c, lapack_int ldc,\n                                lapack_complex_float* work );\nlapack_int LAPACKE_zupmtr_work( int matrix_order, char side, char uplo,\n                                char trans, lapack_int m, lapack_int n,\n                                const lapack_complex_double* ap,\n                                const lapack_complex_double* tau,\n                                lapack_complex_double* c, lapack_int ldc,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_claghe( int matrix_order, lapack_int n, lapack_int k,\n                           const float* d, lapack_complex_float* a,\n                           lapack_int lda, lapack_int* iseed );\nlapack_int LAPACKE_zlaghe( int matrix_order, lapack_int n, lapack_int k,\n                           const double* d, lapack_complex_double* a,\n                           lapack_int lda, lapack_int* iseed );\n\nlapack_int LAPACKE_slagsy( int matrix_order, lapack_int n, lapack_int k,\n                           const float* d, float* a, lapack_int lda,\n                           lapack_int* iseed );\nlapack_int LAPACKE_dlagsy( int matrix_order, lapack_int n, lapack_int k,\n                           const double* d, double* a, lapack_int lda,\n                           lapack_int* iseed );\nlapack_int LAPACKE_clagsy( int matrix_order, lapack_int n, lapack_int k,\n                           const float* d, lapack_complex_float* a,\n                           lapack_int lda, lapack_int* iseed );\nlapack_int LAPACKE_zlagsy( int matrix_order, lapack_int n, lapack_int k,\n                           const double* d, lapack_complex_double* a,\n                           lapack_int lda, lapack_int* iseed );\n\nlapack_int LAPACKE_slapmr( int matrix_order, lapack_logical forwrd,\n                           lapack_int m, lapack_int n, float* x, lapack_int ldx,\n                           lapack_int* k );\nlapack_int LAPACKE_dlapmr( int matrix_order, lapack_logical forwrd,\n                           lapack_int m, lapack_int n, double* x,\n                           lapack_int ldx, lapack_int* k );\nlapack_int LAPACKE_clapmr( int matrix_order, lapack_logical forwrd,\n                           lapack_int m, lapack_int n, lapack_complex_float* x,\n                           lapack_int ldx, lapack_int* k );\nlapack_int LAPACKE_zlapmr( int matrix_order, lapack_logical forwrd,\n                           lapack_int m, lapack_int n, lapack_complex_double* x,\n                           lapack_int ldx, lapack_int* k );\n\n\nfloat LAPACKE_slapy2( float x, float y );\ndouble LAPACKE_dlapy2( double x, double y );\n\nfloat LAPACKE_slapy3( float x, float y, float z );\ndouble LAPACKE_dlapy3( double x, double y, double z );\n\nlapack_int LAPACKE_slartgp( float f, float g, float* cs, float* sn, float* r );\nlapack_int LAPACKE_dlartgp( double f, double g, double* cs, double* sn,\n                            double* r );\n\nlapack_int LAPACKE_slartgs( float x, float y, float sigma, float* cs,\n                            float* sn );\nlapack_int LAPACKE_dlartgs( double x, double y, double sigma, double* cs,\n                            double* sn );\n\n\n//LAPACK 3.3.0\nlapack_int LAPACKE_cbbcsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, lapack_int m,\n                           lapack_int p, lapack_int q, float* theta, float* phi,\n                           lapack_complex_float* u1, lapack_int ldu1,\n                           lapack_complex_float* u2, lapack_int ldu2,\n                           lapack_complex_float* v1t, lapack_int ldv1t,\n                           lapack_complex_float* v2t, lapack_int ldv2t,\n                           float* b11d, float* b11e, float* b12d, float* b12e,\n                           float* b21d, float* b21e, float* b22d, float* b22e );\nlapack_int LAPACKE_cbbcsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                float* theta, float* phi,\n                                lapack_complex_float* u1, lapack_int ldu1,\n                                lapack_complex_float* u2, lapack_int ldu2,\n                                lapack_complex_float* v1t, lapack_int ldv1t,\n                                lapack_complex_float* v2t, lapack_int ldv2t,\n                                float* b11d, float* b11e, float* b12d,\n                                float* b12e, float* b21d, float* b21e,\n                                float* b22d, float* b22e, float* rwork,\n                                lapack_int lrwork );\nlapack_int LAPACKE_cheswapr( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_float* a, lapack_int i1,\n                             lapack_int i2 );\nlapack_int LAPACKE_cheswapr_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_float* a, lapack_int i1,\n                                  lapack_int i2 );\nlapack_int LAPACKE_chetri2( int matrix_order, char uplo, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            const lapack_int* ipiv );\nlapack_int LAPACKE_chetri2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 const lapack_int* ipiv,\n                                 lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_chetri2x( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_float* a, lapack_int lda,\n                             const lapack_int* ipiv, lapack_int nb );\nlapack_int LAPACKE_chetri2x_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_float* a, lapack_int lda,\n                                  const lapack_int* ipiv,\n                                  lapack_complex_float* work, lapack_int nb );\nlapack_int LAPACKE_chetrs2( int matrix_order, char uplo, lapack_int n,\n                            lapack_int nrhs, const lapack_complex_float* a,\n                            lapack_int lda, const lapack_int* ipiv,\n                            lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_chetrs2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_int nrhs, const lapack_complex_float* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* work );\nlapack_int LAPACKE_csyconv( int matrix_order, char uplo, char way, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            const lapack_int* ipiv );\nlapack_int LAPACKE_csyconv_work( int matrix_order, char uplo, char way,\n                                 lapack_int n, lapack_complex_float* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 lapack_complex_float* work );\nlapack_int LAPACKE_csyswapr( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_float* a, lapack_int i1,\n                             lapack_int i2 );\nlapack_int LAPACKE_csyswapr_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_float* a, lapack_int i1,\n                                  lapack_int i2 );\nlapack_int LAPACKE_csytri2( int matrix_order, char uplo, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            const lapack_int* ipiv );\nlapack_int LAPACKE_csytri2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 const lapack_int* ipiv,\n                                 lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_csytri2x( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_float* a, lapack_int lda,\n                             const lapack_int* ipiv, lapack_int nb );\nlapack_int LAPACKE_csytri2x_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_float* a, lapack_int lda,\n                                  const lapack_int* ipiv,\n                                  lapack_complex_float* work, lapack_int nb );\nlapack_int LAPACKE_csytrs2( int matrix_order, char uplo, lapack_int n,\n                            lapack_int nrhs, const lapack_complex_float* a,\n                            lapack_int lda, const lapack_int* ipiv,\n                            lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_csytrs2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_int nrhs, const lapack_complex_float* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* work );\nlapack_int LAPACKE_cunbdb( int matrix_order, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q,\n                           lapack_complex_float* x11, lapack_int ldx11,\n                           lapack_complex_float* x12, lapack_int ldx12,\n                           lapack_complex_float* x21, lapack_int ldx21,\n                           lapack_complex_float* x22, lapack_int ldx22,\n                           float* theta, float* phi,\n                           lapack_complex_float* taup1,\n                           lapack_complex_float* taup2,\n                           lapack_complex_float* tauq1,\n                           lapack_complex_float* tauq2 );\nlapack_int LAPACKE_cunbdb_work( int matrix_order, char trans, char signs,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                lapack_complex_float* x11, lapack_int ldx11,\n                                lapack_complex_float* x12, lapack_int ldx12,\n                                lapack_complex_float* x21, lapack_int ldx21,\n                                lapack_complex_float* x22, lapack_int ldx22,\n                                float* theta, float* phi,\n                                lapack_complex_float* taup1,\n                                lapack_complex_float* taup2,\n                                lapack_complex_float* tauq1,\n                                lapack_complex_float* tauq2,\n                                lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_cuncsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q,\n                           lapack_complex_float* x11, lapack_int ldx11,\n                           lapack_complex_float* x12, lapack_int ldx12,\n                           lapack_complex_float* x21, lapack_int ldx21,\n                           lapack_complex_float* x22, lapack_int ldx22,\n                           float* theta, lapack_complex_float* u1,\n                           lapack_int ldu1, lapack_complex_float* u2,\n                           lapack_int ldu2, lapack_complex_float* v1t,\n                           lapack_int ldv1t, lapack_complex_float* v2t,\n                           lapack_int ldv2t );\nlapack_int LAPACKE_cuncsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                char signs, lapack_int m, lapack_int p,\n                                lapack_int q, lapack_complex_float* x11,\n                                lapack_int ldx11, lapack_complex_float* x12,\n                                lapack_int ldx12, lapack_complex_float* x21,\n                                lapack_int ldx21, lapack_complex_float* x22,\n                                lapack_int ldx22, float* theta,\n                                lapack_complex_float* u1, lapack_int ldu1,\n                                lapack_complex_float* u2, lapack_int ldu2,\n                                lapack_complex_float* v1t, lapack_int ldv1t,\n                                lapack_complex_float* v2t, lapack_int ldv2t,\n                                lapack_complex_float* work, lapack_int lwork,\n                                float* rwork, lapack_int lrwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dbbcsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, lapack_int m,\n                           lapack_int p, lapack_int q, double* theta,\n                           double* phi, double* u1, lapack_int ldu1, double* u2,\n                           lapack_int ldu2, double* v1t, lapack_int ldv1t,\n                           double* v2t, lapack_int ldv2t, double* b11d,\n                           double* b11e, double* b12d, double* b12e,\n                           double* b21d, double* b21e, double* b22d,\n                           double* b22e );\nlapack_int LAPACKE_dbbcsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                double* theta, double* phi, double* u1,\n                                lapack_int ldu1, double* u2, lapack_int ldu2,\n                                double* v1t, lapack_int ldv1t, double* v2t,\n                                lapack_int ldv2t, double* b11d, double* b11e,\n                                double* b12d, double* b12e, double* b21d,\n                                double* b21e, double* b22d, double* b22e,\n                                double* work, lapack_int lwork );\nlapack_int LAPACKE_dorbdb( int matrix_order, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q,\n                           double* x11, lapack_int ldx11, double* x12,\n                           lapack_int ldx12, double* x21, lapack_int ldx21,\n                           double* x22, lapack_int ldx22, double* theta,\n                           double* phi, double* taup1, double* taup2,\n                           double* tauq1, double* tauq2 );\nlapack_int LAPACKE_dorbdb_work( int matrix_order, char trans, char signs,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                double* x11, lapack_int ldx11, double* x12,\n                                lapack_int ldx12, double* x21, lapack_int ldx21,\n                                double* x22, lapack_int ldx22, double* theta,\n                                double* phi, double* taup1, double* taup2,\n                                double* tauq1, double* tauq2, double* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_dorcsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q,\n                           double* x11, lapack_int ldx11, double* x12,\n                           lapack_int ldx12, double* x21, lapack_int ldx21,\n                           double* x22, lapack_int ldx22, double* theta,\n                           double* u1, lapack_int ldu1, double* u2,\n                           lapack_int ldu2, double* v1t, lapack_int ldv1t,\n                           double* v2t, lapack_int ldv2t );\nlapack_int LAPACKE_dorcsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                char signs, lapack_int m, lapack_int p,\n                                lapack_int q, double* x11, lapack_int ldx11,\n                                double* x12, lapack_int ldx12, double* x21,\n                                lapack_int ldx21, double* x22, lapack_int ldx22,\n                                double* theta, double* u1, lapack_int ldu1,\n                                double* u2, lapack_int ldu2, double* v1t,\n                                lapack_int ldv1t, double* v2t, lapack_int ldv2t,\n                                double* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_dsyconv( int matrix_order, char uplo, char way, lapack_int n,\n                            double* a, lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_dsyconv_work( int matrix_order, char uplo, char way,\n                                 lapack_int n, double* a, lapack_int lda,\n                                 const lapack_int* ipiv, double* work );\nlapack_int LAPACKE_dsyswapr( int matrix_order, char uplo, lapack_int n,\n                             double* a, lapack_int i1, lapack_int i2 );\nlapack_int LAPACKE_dsyswapr_work( int matrix_order, char uplo, lapack_int n,\n                                  double* a, lapack_int i1, lapack_int i2 );\nlapack_int LAPACKE_dsytri2( int matrix_order, char uplo, lapack_int n,\n                            double* a, lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_dsytri2_work( int matrix_order, char uplo, lapack_int n,\n                                 double* a, lapack_int lda,\n                                 const lapack_int* ipiv,\n                                 lapack_complex_double* work, lapack_int lwork );\nlapack_int LAPACKE_dsytri2x( int matrix_order, char uplo, lapack_int n,\n                             double* a, lapack_int lda, const lapack_int* ipiv,\n                             lapack_int nb );\nlapack_int LAPACKE_dsytri2x_work( int matrix_order, char uplo, lapack_int n,\n                                  double* a, lapack_int lda,\n                                  const lapack_int* ipiv, double* work,\n                                  lapack_int nb );\nlapack_int LAPACKE_dsytrs2( int matrix_order, char uplo, lapack_int n,\n                            lapack_int nrhs, const double* a, lapack_int lda,\n                            const lapack_int* ipiv, double* b, lapack_int ldb );\nlapack_int LAPACKE_dsytrs2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_int nrhs, const double* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 double* b, lapack_int ldb, double* work );\nlapack_int LAPACKE_sbbcsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, lapack_int m,\n                           lapack_int p, lapack_int q, float* theta, float* phi,\n                           float* u1, lapack_int ldu1, float* u2,\n                           lapack_int ldu2, float* v1t, lapack_int ldv1t,\n                           float* v2t, lapack_int ldv2t, float* b11d,\n                           float* b11e, float* b12d, float* b12e, float* b21d,\n                           float* b21e, float* b22d, float* b22e );\nlapack_int LAPACKE_sbbcsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                float* theta, float* phi, float* u1,\n                                lapack_int ldu1, float* u2, lapack_int ldu2,\n                                float* v1t, lapack_int ldv1t, float* v2t,\n                                lapack_int ldv2t, float* b11d, float* b11e,\n                                float* b12d, float* b12e, float* b21d,\n                                float* b21e, float* b22d, float* b22e,\n                                float* work, lapack_int lwork );\nlapack_int LAPACKE_sorbdb( int matrix_order, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q, float* x11,\n                           lapack_int ldx11, float* x12, lapack_int ldx12,\n                           float* x21, lapack_int ldx21, float* x22,\n                           lapack_int ldx22, float* theta, float* phi,\n                           float* taup1, float* taup2, float* tauq1,\n                           float* tauq2 );\nlapack_int LAPACKE_sorbdb_work( int matrix_order, char trans, char signs,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                float* x11, lapack_int ldx11, float* x12,\n                                lapack_int ldx12, float* x21, lapack_int ldx21,\n                                float* x22, lapack_int ldx22, float* theta,\n                                float* phi, float* taup1, float* taup2,\n                                float* tauq1, float* tauq2, float* work,\n                                lapack_int lwork );\nlapack_int LAPACKE_sorcsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q, float* x11,\n                           lapack_int ldx11, float* x12, lapack_int ldx12,\n                           float* x21, lapack_int ldx21, float* x22,\n                           lapack_int ldx22, float* theta, float* u1,\n                           lapack_int ldu1, float* u2, lapack_int ldu2,\n                           float* v1t, lapack_int ldv1t, float* v2t,\n                           lapack_int ldv2t );\nlapack_int LAPACKE_sorcsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                char signs, lapack_int m, lapack_int p,\n                                lapack_int q, float* x11, lapack_int ldx11,\n                                float* x12, lapack_int ldx12, float* x21,\n                                lapack_int ldx21, float* x22, lapack_int ldx22,\n                                float* theta, float* u1, lapack_int ldu1,\n                                float* u2, lapack_int ldu2, float* v1t,\n                                lapack_int ldv1t, float* v2t, lapack_int ldv2t,\n                                float* work, lapack_int lwork,\n                                lapack_int* iwork );\nlapack_int LAPACKE_ssyconv( int matrix_order, char uplo, char way, lapack_int n,\n                            float* a, lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_ssyconv_work( int matrix_order, char uplo, char way,\n                                 lapack_int n, float* a, lapack_int lda,\n                                 const lapack_int* ipiv, float* work );\nlapack_int LAPACKE_ssyswapr( int matrix_order, char uplo, lapack_int n,\n                             float* a, lapack_int i1, lapack_int i2 );\nlapack_int LAPACKE_ssyswapr_work( int matrix_order, char uplo, lapack_int n,\n                                  float* a, lapack_int i1, lapack_int i2 );\nlapack_int LAPACKE_ssytri2( int matrix_order, char uplo, lapack_int n, float* a,\n                            lapack_int lda, const lapack_int* ipiv );\nlapack_int LAPACKE_ssytri2_work( int matrix_order, char uplo, lapack_int n,\n                                 float* a, lapack_int lda,\n                                 const lapack_int* ipiv,\n                                 lapack_complex_float* work, lapack_int lwork );\nlapack_int LAPACKE_ssytri2x( int matrix_order, char uplo, lapack_int n,\n                             float* a, lapack_int lda, const lapack_int* ipiv,\n                             lapack_int nb );\nlapack_int LAPACKE_ssytri2x_work( int matrix_order, char uplo, lapack_int n,\n                                  float* a, lapack_int lda,\n                                  const lapack_int* ipiv, float* work,\n                                  lapack_int nb );\nlapack_int LAPACKE_ssytrs2( int matrix_order, char uplo, lapack_int n,\n                            lapack_int nrhs, const float* a, lapack_int lda,\n                            const lapack_int* ipiv, float* b, lapack_int ldb );\nlapack_int LAPACKE_ssytrs2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_int nrhs, const float* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 float* b, lapack_int ldb, float* work );\nlapack_int LAPACKE_zbbcsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, lapack_int m,\n                           lapack_int p, lapack_int q, double* theta,\n                           double* phi, lapack_complex_double* u1,\n                           lapack_int ldu1, lapack_complex_double* u2,\n                           lapack_int ldu2, lapack_complex_double* v1t,\n                           lapack_int ldv1t, lapack_complex_double* v2t,\n                           lapack_int ldv2t, double* b11d, double* b11e,\n                           double* b12d, double* b12e, double* b21d,\n                           double* b21e, double* b22d, double* b22e );\nlapack_int LAPACKE_zbbcsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                double* theta, double* phi,\n                                lapack_complex_double* u1, lapack_int ldu1,\n                                lapack_complex_double* u2, lapack_int ldu2,\n                                lapack_complex_double* v1t, lapack_int ldv1t,\n                                lapack_complex_double* v2t, lapack_int ldv2t,\n                                double* b11d, double* b11e, double* b12d,\n                                double* b12e, double* b21d, double* b21e,\n                                double* b22d, double* b22e, double* rwork,\n                                lapack_int lrwork );\nlapack_int LAPACKE_zheswapr( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_double* a, lapack_int i1,\n                             lapack_int i2 );\nlapack_int LAPACKE_zheswapr_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_double* a, lapack_int i1,\n                                  lapack_int i2 );\nlapack_int LAPACKE_zhetri2( int matrix_order, char uplo, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            const lapack_int* ipiv );\nlapack_int LAPACKE_zhetri2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 const lapack_int* ipiv,\n                                 lapack_complex_double* work, lapack_int lwork );\nlapack_int LAPACKE_zhetri2x( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_double* a, lapack_int lda,\n                             const lapack_int* ipiv, lapack_int nb );\nlapack_int LAPACKE_zhetri2x_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_double* a, lapack_int lda,\n                                  const lapack_int* ipiv,\n                                  lapack_complex_double* work, lapack_int nb );\nlapack_int LAPACKE_zhetrs2( int matrix_order, char uplo, lapack_int n,\n                            lapack_int nrhs, const lapack_complex_double* a,\n                            lapack_int lda, const lapack_int* ipiv,\n                            lapack_complex_double* b, lapack_int ldb );\nlapack_int LAPACKE_zhetrs2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_int nrhs, const lapack_complex_double* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* work );\nlapack_int LAPACKE_zsyconv( int matrix_order, char uplo, char way, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            const lapack_int* ipiv );\nlapack_int LAPACKE_zsyconv_work( int matrix_order, char uplo, char way,\n                                 lapack_int n, lapack_complex_double* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 lapack_complex_double* work );\nlapack_int LAPACKE_zsyswapr( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_double* a, lapack_int i1,\n                             lapack_int i2 );\nlapack_int LAPACKE_zsyswapr_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_double* a, lapack_int i1,\n                                  lapack_int i2 );\nlapack_int LAPACKE_zsytri2( int matrix_order, char uplo, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            const lapack_int* ipiv );\nlapack_int LAPACKE_zsytri2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 const lapack_int* ipiv,\n                                 lapack_complex_double* work, lapack_int lwork );\nlapack_int LAPACKE_zsytri2x( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_double* a, lapack_int lda,\n                             const lapack_int* ipiv, lapack_int nb );\nlapack_int LAPACKE_zsytri2x_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_double* a, lapack_int lda,\n                                  const lapack_int* ipiv,\n                                  lapack_complex_double* work, lapack_int nb );\nlapack_int LAPACKE_zsytrs2( int matrix_order, char uplo, lapack_int n,\n                            lapack_int nrhs, const lapack_complex_double* a,\n                            lapack_int lda, const lapack_int* ipiv,\n                            lapack_complex_double* b, lapack_int ldb );\nlapack_int LAPACKE_zsytrs2_work( int matrix_order, char uplo, lapack_int n,\n                                 lapack_int nrhs, const lapack_complex_double* a,\n                                 lapack_int lda, const lapack_int* ipiv,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* work );\nlapack_int LAPACKE_zunbdb( int matrix_order, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q,\n                           lapack_complex_double* x11, lapack_int ldx11,\n                           lapack_complex_double* x12, lapack_int ldx12,\n                           lapack_complex_double* x21, lapack_int ldx21,\n                           lapack_complex_double* x22, lapack_int ldx22,\n                           double* theta, double* phi,\n                           lapack_complex_double* taup1,\n                           lapack_complex_double* taup2,\n                           lapack_complex_double* tauq1,\n                           lapack_complex_double* tauq2 );\nlapack_int LAPACKE_zunbdb_work( int matrix_order, char trans, char signs,\n                                lapack_int m, lapack_int p, lapack_int q,\n                                lapack_complex_double* x11, lapack_int ldx11,\n                                lapack_complex_double* x12, lapack_int ldx12,\n                                lapack_complex_double* x21, lapack_int ldx21,\n                                lapack_complex_double* x22, lapack_int ldx22,\n                                double* theta, double* phi,\n                                lapack_complex_double* taup1,\n                                lapack_complex_double* taup2,\n                                lapack_complex_double* tauq1,\n                                lapack_complex_double* tauq2,\n                                lapack_complex_double* work, lapack_int lwork );\nlapack_int LAPACKE_zuncsd( int matrix_order, char jobu1, char jobu2,\n                           char jobv1t, char jobv2t, char trans, char signs,\n                           lapack_int m, lapack_int p, lapack_int q,\n                           lapack_complex_double* x11, lapack_int ldx11,\n                           lapack_complex_double* x12, lapack_int ldx12,\n                           lapack_complex_double* x21, lapack_int ldx21,\n                           lapack_complex_double* x22, lapack_int ldx22,\n                           double* theta, lapack_complex_double* u1,\n                           lapack_int ldu1, lapack_complex_double* u2,\n                           lapack_int ldu2, lapack_complex_double* v1t,\n                           lapack_int ldv1t, lapack_complex_double* v2t,\n                           lapack_int ldv2t );\nlapack_int LAPACKE_zuncsd_work( int matrix_order, char jobu1, char jobu2,\n                                char jobv1t, char jobv2t, char trans,\n                                char signs, lapack_int m, lapack_int p,\n                                lapack_int q, lapack_complex_double* x11,\n                                lapack_int ldx11, lapack_complex_double* x12,\n                                lapack_int ldx12, lapack_complex_double* x21,\n                                lapack_int ldx21, lapack_complex_double* x22,\n                                lapack_int ldx22, double* theta,\n                                lapack_complex_double* u1, lapack_int ldu1,\n                                lapack_complex_double* u2, lapack_int ldu2,\n                                lapack_complex_double* v1t, lapack_int ldv1t,\n                                lapack_complex_double* v2t, lapack_int ldv2t,\n                                lapack_complex_double* work, lapack_int lwork,\n                                double* rwork, lapack_int lrwork,\n                                lapack_int* iwork );\n//LAPACK 3.4.0\nlapack_int LAPACKE_sgemqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int nb, const float* v, lapack_int ldv,\n                            const float* t, lapack_int ldt, float* c,\n                            lapack_int ldc );\nlapack_int LAPACKE_dgemqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int nb, const double* v, lapack_int ldv,\n                            const double* t, lapack_int ldt, double* c,\n                            lapack_int ldc );\nlapack_int LAPACKE_cgemqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int nb, const lapack_complex_float* v,\n                            lapack_int ldv, const lapack_complex_float* t,\n                            lapack_int ldt, lapack_complex_float* c,\n                            lapack_int ldc );\nlapack_int LAPACKE_zgemqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int nb, const lapack_complex_double* v,\n                            lapack_int ldv, const lapack_complex_double* t,\n                            lapack_int ldt, lapack_complex_double* c,\n                            lapack_int ldc );\n\nlapack_int LAPACKE_sgeqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nb, float* a, lapack_int lda, float* t,\n                           lapack_int ldt );\nlapack_int LAPACKE_dgeqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nb, double* a, lapack_int lda, double* t,\n                           lapack_int ldt );\nlapack_int LAPACKE_cgeqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nb, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* t,\n                           lapack_int ldt );\nlapack_int LAPACKE_zgeqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int nb, lapack_complex_double* a,\n                           lapack_int lda, lapack_complex_double* t,\n                           lapack_int ldt );\n\nlapack_int LAPACKE_sgeqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            float* a, lapack_int lda, float* t,\n                            lapack_int ldt );\nlapack_int LAPACKE_dgeqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            double* a, lapack_int lda, double* t,\n                            lapack_int ldt );\nlapack_int LAPACKE_cgeqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_zgeqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_sgeqrt3( int matrix_order, lapack_int m, lapack_int n,\n                            float* a, lapack_int lda, float* t,\n                            lapack_int ldt );\nlapack_int LAPACKE_dgeqrt3( int matrix_order, lapack_int m, lapack_int n,\n                            double* a, lapack_int lda, double* t,\n                            lapack_int ldt );\nlapack_int LAPACKE_cgeqrt3( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_zgeqrt3( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_stpmqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int l, lapack_int nb, const float* v,\n                            lapack_int ldv, const float* t, lapack_int ldt,\n                            float* a, lapack_int lda, float* b,\n                            lapack_int ldb );\nlapack_int LAPACKE_dtpmqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int l, lapack_int nb, const double* v,\n                            lapack_int ldv, const double* t, lapack_int ldt,\n                            double* a, lapack_int lda, double* b,\n                            lapack_int ldb );\nlapack_int LAPACKE_ctpmqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int l, lapack_int nb,\n                            const lapack_complex_float* v, lapack_int ldv,\n                            const lapack_complex_float* t, lapack_int ldt,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* b, lapack_int ldb );\nlapack_int LAPACKE_ztpmqrt( int matrix_order, char side, char trans,\n                            lapack_int m, lapack_int n, lapack_int k,\n                            lapack_int l, lapack_int nb,\n                            const lapack_complex_double* v, lapack_int ldv,\n                            const lapack_complex_double* t, lapack_int ldt,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* b, lapack_int ldb );\n\nlapack_int LAPACKE_dtpqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int l, lapack_int nb, double* a,\n                           lapack_int lda, double* b, lapack_int ldb, double* t,\n                           lapack_int ldt );\nlapack_int LAPACKE_ctpqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int l, lapack_int nb, lapack_complex_float* a,\n                           lapack_int lda, lapack_complex_float* t,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_int ldt );\nlapack_int LAPACKE_ztpqrt( int matrix_order, lapack_int m, lapack_int n,\n                           lapack_int l, lapack_int nb,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_stpqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            float* a, lapack_int lda, float* b, lapack_int ldb,\n                            float* t, lapack_int ldt );\nlapack_int LAPACKE_dtpqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            double* a, lapack_int lda, double* b,\n                            lapack_int ldb, double* t, lapack_int ldt );\nlapack_int LAPACKE_ctpqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_float* a, lapack_int lda,\n                            lapack_complex_float* b, lapack_int ldb,\n                            lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_ztpqrt2( int matrix_order, lapack_int m, lapack_int n,\n                            lapack_complex_double* a, lapack_int lda,\n                            lapack_complex_double* b, lapack_int ldb,\n                            lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_stprfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_int l, const float* v,\n                           lapack_int ldv, const float* t, lapack_int ldt,\n                           float* a, lapack_int lda, float* b, lapack_int ldb,\n                           lapack_int myldwork );\nlapack_int LAPACKE_dtprfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_int l, const double* v,\n                           lapack_int ldv, const double* t, lapack_int ldt,\n                           double* a, lapack_int lda, double* b, lapack_int ldb,\n                           lapack_int myldwork );\nlapack_int LAPACKE_ctprfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_int l,\n                           const lapack_complex_float* v, lapack_int ldv,\n                           const lapack_complex_float* t, lapack_int ldt,\n                           lapack_complex_float* a, lapack_int lda,\n                           lapack_complex_float* b, lapack_int ldb,\n                           lapack_int myldwork );\nlapack_int LAPACKE_ztprfb( int matrix_order, char side, char trans, char direct,\n                           char storev, lapack_int m, lapack_int n,\n                           lapack_int k, lapack_int l,\n                           const lapack_complex_double* v, lapack_int ldv,\n                           const lapack_complex_double* t, lapack_int ldt,\n                           lapack_complex_double* a, lapack_int lda,\n                           lapack_complex_double* b, lapack_int ldb,\n                           lapack_int myldwork );\n\nlapack_int LAPACKE_sgemqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int nb, const float* v, lapack_int ldv,\n                                 const float* t, lapack_int ldt, float* c,\n                                 lapack_int ldc, float* work );\nlapack_int LAPACKE_dgemqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int nb, const double* v, lapack_int ldv,\n                                 const double* t, lapack_int ldt, double* c,\n                                 lapack_int ldc, double* work );\nlapack_int LAPACKE_cgemqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int nb, const lapack_complex_float* v,\n                                 lapack_int ldv, const lapack_complex_float* t,\n                                 lapack_int ldt, lapack_complex_float* c,\n                                 lapack_int ldc, lapack_complex_float* work );\nlapack_int LAPACKE_zgemqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int nb, const lapack_complex_double* v,\n                                 lapack_int ldv, const lapack_complex_double* t,\n                                 lapack_int ldt, lapack_complex_double* c,\n                                 lapack_int ldc, lapack_complex_double* work );\n\nlapack_int LAPACKE_sgeqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nb, float* a, lapack_int lda,\n                                float* t, lapack_int ldt, float* work );\nlapack_int LAPACKE_dgeqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nb, double* a, lapack_int lda,\n                                double* t, lapack_int ldt, double* work );\nlapack_int LAPACKE_cgeqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nb, lapack_complex_float* a,\n                                lapack_int lda, lapack_complex_float* t,\n                                lapack_int ldt, lapack_complex_float* work );\nlapack_int LAPACKE_zgeqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int nb, lapack_complex_double* a,\n                                lapack_int lda, lapack_complex_double* t,\n                                lapack_int ldt, lapack_complex_double* work );\n\nlapack_int LAPACKE_sgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 float* a, lapack_int lda, float* t,\n                                 lapack_int ldt );\nlapack_int LAPACKE_dgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 double* a, lapack_int lda, double* t,\n                                 lapack_int ldt );\nlapack_int LAPACKE_cgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_zgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_sgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,\n                                 float* a, lapack_int lda, float* t,\n                                 lapack_int ldt );\nlapack_int LAPACKE_dgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,\n                                 double* a, lapack_int lda, double* t,\n                                 lapack_int ldt );\nlapack_int LAPACKE_cgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_zgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_stpmqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int l, lapack_int nb, const float* v,\n                                 lapack_int ldv, const float* t, lapack_int ldt,\n                                 float* a, lapack_int lda, float* b,\n                                 lapack_int ldb, float* work );\nlapack_int LAPACKE_dtpmqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int l, lapack_int nb, const double* v,\n                                 lapack_int ldv, const double* t,\n                                 lapack_int ldt, double* a, lapack_int lda,\n                                 double* b, lapack_int ldb, double* work );\nlapack_int LAPACKE_ctpmqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int l, lapack_int nb,\n                                 const lapack_complex_float* v, lapack_int ldv,\n                                 const lapack_complex_float* t, lapack_int ldt,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* work );\nlapack_int LAPACKE_ztpmqrt_work( int matrix_order, char side, char trans,\n                                 lapack_int m, lapack_int n, lapack_int k,\n                                 lapack_int l, lapack_int nb,\n                                 const lapack_complex_double* v, lapack_int ldv,\n                                 const lapack_complex_double* t, lapack_int ldt,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* work );\n\nlapack_int LAPACKE_dtpqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int l, lapack_int nb, double* a,\n                                lapack_int lda, double* b, lapack_int ldb,\n                                double* t, lapack_int ldt, double* work );\nlapack_int LAPACKE_ctpqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int l, lapack_int nb,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* t,\n                                lapack_complex_float* b, lapack_int ldb,\n                                lapack_int ldt, lapack_complex_float* work );\nlapack_int LAPACKE_ztpqrt_work( int matrix_order, lapack_int m, lapack_int n,\n                                lapack_int l, lapack_int nb,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                lapack_complex_double* t, lapack_int ldt,\n                                lapack_complex_double* work );\n\nlapack_int LAPACKE_stpqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 float* a, lapack_int lda, float* b,\n                                 lapack_int ldb, float* t, lapack_int ldt );\nlapack_int LAPACKE_dtpqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 double* a, lapack_int lda, double* b,\n                                 lapack_int ldb, double* t, lapack_int ldt );\nlapack_int LAPACKE_ctpqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_float* a, lapack_int lda,\n                                 lapack_complex_float* b, lapack_int ldb,\n                                 lapack_complex_float* t, lapack_int ldt );\nlapack_int LAPACKE_ztpqrt2_work( int matrix_order, lapack_int m, lapack_int n,\n                                 lapack_complex_double* a, lapack_int lda,\n                                 lapack_complex_double* b, lapack_int ldb,\n                                 lapack_complex_double* t, lapack_int ldt );\n\nlapack_int LAPACKE_stprfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                const float* v, lapack_int ldv, const float* t,\n                                lapack_int ldt, float* a, lapack_int lda,\n                                float* b, lapack_int ldb, const float* mywork,\n                                lapack_int myldwork );\nlapack_int LAPACKE_dtprfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                const double* v, lapack_int ldv,\n                                const double* t, lapack_int ldt, double* a,\n                                lapack_int lda, double* b, lapack_int ldb,\n                                const double* mywork, lapack_int myldwork );\nlapack_int LAPACKE_ctprfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                const lapack_complex_float* v, lapack_int ldv,\n                                const lapack_complex_float* t, lapack_int ldt,\n                                lapack_complex_float* a, lapack_int lda,\n                                lapack_complex_float* b, lapack_int ldb,\n                                const float* mywork, lapack_int myldwork );\nlapack_int LAPACKE_ztprfb_work( int matrix_order, char side, char trans,\n                                char direct, char storev, lapack_int m,\n                                lapack_int n, lapack_int k, lapack_int l,\n                                const lapack_complex_double* v, lapack_int ldv,\n                                const lapack_complex_double* t, lapack_int ldt,\n                                lapack_complex_double* a, lapack_int lda,\n                                lapack_complex_double* b, lapack_int ldb,\n                                const double* mywork, lapack_int myldwork );\n//LAPACK 3.X.X\nlapack_int LAPACKE_csyr( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_float alpha,\n                             const lapack_complex_float* x, lapack_int incx,\n                             lapack_complex_float* a, lapack_int lda );\nlapack_int LAPACKE_zsyr( int matrix_order, char uplo, lapack_int n,\n                             lapack_complex_double alpha,\n                             const lapack_complex_double* x, lapack_int incx,\n                             lapack_complex_double* a, lapack_int lda );\n\nlapack_int LAPACKE_csyr_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_float alpha,\n                                  const lapack_complex_float* x,\n                                  lapack_int incx, lapack_complex_float* a,\n                                  lapack_int lda );\nlapack_int LAPACKE_zsyr_work( int matrix_order, char uplo, lapack_int n,\n                                  lapack_complex_double alpha,\n                                  const lapack_complex_double* x,\n                                  lapack_int incx, lapack_complex_double* a,\n                                  lapack_int lda );\n\n\n\n#define LAPACK_sgetrf LAPACK_GLOBAL(sgetrf,SGETRF)\n#define LAPACK_dgetrf LAPACK_GLOBAL(dgetrf,DGETRF)\n#define LAPACK_cgetrf LAPACK_GLOBAL(cgetrf,CGETRF)\n#define LAPACK_zgetrf LAPACK_GLOBAL(zgetrf,ZGETRF)\n#define LAPACK_sgbtrf LAPACK_GLOBAL(sgbtrf,SGBTRF)\n#define LAPACK_dgbtrf LAPACK_GLOBAL(dgbtrf,DGBTRF)\n#define LAPACK_cgbtrf LAPACK_GLOBAL(cgbtrf,CGBTRF)\n#define LAPACK_zgbtrf LAPACK_GLOBAL(zgbtrf,ZGBTRF)\n#define LAPACK_sgttrf LAPACK_GLOBAL(sgttrf,SGTTRF)\n#define LAPACK_dgttrf LAPACK_GLOBAL(dgttrf,DGTTRF)\n#define LAPACK_cgttrf LAPACK_GLOBAL(cgttrf,CGTTRF)\n#define LAPACK_zgttrf LAPACK_GLOBAL(zgttrf,ZGTTRF)\n#define LAPACK_spotrf LAPACK_GLOBAL(spotrf,SPOTRF)\n#define LAPACK_dpotrf LAPACK_GLOBAL(dpotrf,DPOTRF)\n#define LAPACK_cpotrf LAPACK_GLOBAL(cpotrf,CPOTRF)\n#define LAPACK_zpotrf LAPACK_GLOBAL(zpotrf,ZPOTRF)\n#define LAPACK_dpstrf LAPACK_GLOBAL(dpstrf,DPSTRF)\n#define LAPACK_spstrf LAPACK_GLOBAL(spstrf,SPSTRF)\n#define LAPACK_zpstrf LAPACK_GLOBAL(zpstrf,ZPSTRF)\n#define LAPACK_cpstrf LAPACK_GLOBAL(cpstrf,CPSTRF)\n#define LAPACK_dpftrf LAPACK_GLOBAL(dpftrf,DPFTRF)\n#define LAPACK_spftrf LAPACK_GLOBAL(spftrf,SPFTRF)\n#define LAPACK_zpftrf LAPACK_GLOBAL(zpftrf,ZPFTRF)\n#define LAPACK_cpftrf LAPACK_GLOBAL(cpftrf,CPFTRF)\n#define LAPACK_spptrf LAPACK_GLOBAL(spptrf,SPPTRF)\n#define LAPACK_dpptrf LAPACK_GLOBAL(dpptrf,DPPTRF)\n#define LAPACK_cpptrf LAPACK_GLOBAL(cpptrf,CPPTRF)\n#define LAPACK_zpptrf LAPACK_GLOBAL(zpptrf,ZPPTRF)\n#define LAPACK_spbtrf LAPACK_GLOBAL(spbtrf,SPBTRF)\n#define LAPACK_dpbtrf LAPACK_GLOBAL(dpbtrf,DPBTRF)\n#define LAPACK_cpbtrf LAPACK_GLOBAL(cpbtrf,CPBTRF)\n#define LAPACK_zpbtrf LAPACK_GLOBAL(zpbtrf,ZPBTRF)\n#define LAPACK_spttrf LAPACK_GLOBAL(spttrf,SPTTRF)\n#define LAPACK_dpttrf LAPACK_GLOBAL(dpttrf,DPTTRF)\n#define LAPACK_cpttrf LAPACK_GLOBAL(cpttrf,CPTTRF)\n#define LAPACK_zpttrf LAPACK_GLOBAL(zpttrf,ZPTTRF)\n#define LAPACK_ssytrf LAPACK_GLOBAL(ssytrf,SSYTRF)\n#define LAPACK_dsytrf LAPACK_GLOBAL(dsytrf,DSYTRF)\n#define LAPACK_csytrf LAPACK_GLOBAL(csytrf,CSYTRF)\n#define LAPACK_zsytrf LAPACK_GLOBAL(zsytrf,ZSYTRF)\n#define LAPACK_chetrf LAPACK_GLOBAL(chetrf,CHETRF)\n#define LAPACK_zhetrf LAPACK_GLOBAL(zhetrf,ZHETRF)\n#define LAPACK_ssptrf LAPACK_GLOBAL(ssptrf,SSPTRF)\n#define LAPACK_dsptrf LAPACK_GLOBAL(dsptrf,DSPTRF)\n#define LAPACK_csptrf LAPACK_GLOBAL(csptrf,CSPTRF)\n#define LAPACK_zsptrf LAPACK_GLOBAL(zsptrf,ZSPTRF)\n#define LAPACK_chptrf LAPACK_GLOBAL(chptrf,CHPTRF)\n#define LAPACK_zhptrf LAPACK_GLOBAL(zhptrf,ZHPTRF)\n#define LAPACK_sgetrs LAPACK_GLOBAL(sgetrs,SGETRS)\n#define LAPACK_dgetrs LAPACK_GLOBAL(dgetrs,DGETRS)\n#define LAPACK_cgetrs LAPACK_GLOBAL(cgetrs,CGETRS)\n#define LAPACK_zgetrs LAPACK_GLOBAL(zgetrs,ZGETRS)\n#define LAPACK_sgbtrs LAPACK_GLOBAL(sgbtrs,SGBTRS)\n#define LAPACK_dgbtrs LAPACK_GLOBAL(dgbtrs,DGBTRS)\n#define LAPACK_cgbtrs LAPACK_GLOBAL(cgbtrs,CGBTRS)\n#define LAPACK_zgbtrs LAPACK_GLOBAL(zgbtrs,ZGBTRS)\n#define LAPACK_sgttrs LAPACK_GLOBAL(sgttrs,SGTTRS)\n#define LAPACK_dgttrs LAPACK_GLOBAL(dgttrs,DGTTRS)\n#define LAPACK_cgttrs LAPACK_GLOBAL(cgttrs,CGTTRS)\n#define LAPACK_zgttrs LAPACK_GLOBAL(zgttrs,ZGTTRS)\n#define LAPACK_spotrs LAPACK_GLOBAL(spotrs,SPOTRS)\n#define LAPACK_dpotrs LAPACK_GLOBAL(dpotrs,DPOTRS)\n#define LAPACK_cpotrs LAPACK_GLOBAL(cpotrs,CPOTRS)\n#define LAPACK_zpotrs LAPACK_GLOBAL(zpotrs,ZPOTRS)\n#define LAPACK_dpftrs LAPACK_GLOBAL(dpftrs,DPFTRS)\n#define LAPACK_spftrs LAPACK_GLOBAL(spftrs,SPFTRS)\n#define LAPACK_zpftrs LAPACK_GLOBAL(zpftrs,ZPFTRS)\n#define LAPACK_cpftrs LAPACK_GLOBAL(cpftrs,CPFTRS)\n#define LAPACK_spptrs LAPACK_GLOBAL(spptrs,SPPTRS)\n#define LAPACK_dpptrs LAPACK_GLOBAL(dpptrs,DPPTRS)\n#define LAPACK_cpptrs LAPACK_GLOBAL(cpptrs,CPPTRS)\n#define LAPACK_zpptrs LAPACK_GLOBAL(zpptrs,ZPPTRS)\n#define LAPACK_spbtrs LAPACK_GLOBAL(spbtrs,SPBTRS)\n#define LAPACK_dpbtrs LAPACK_GLOBAL(dpbtrs,DPBTRS)\n#define LAPACK_cpbtrs LAPACK_GLOBAL(cpbtrs,CPBTRS)\n#define LAPACK_zpbtrs LAPACK_GLOBAL(zpbtrs,ZPBTRS)\n#define LAPACK_spttrs LAPACK_GLOBAL(spttrs,SPTTRS)\n#define LAPACK_dpttrs LAPACK_GLOBAL(dpttrs,DPTTRS)\n#define LAPACK_cpttrs LAPACK_GLOBAL(cpttrs,CPTTRS)\n#define LAPACK_zpttrs LAPACK_GLOBAL(zpttrs,ZPTTRS)\n#define LAPACK_ssytrs LAPACK_GLOBAL(ssytrs,SSYTRS)\n#define LAPACK_dsytrs LAPACK_GLOBAL(dsytrs,DSYTRS)\n#define LAPACK_csytrs LAPACK_GLOBAL(csytrs,CSYTRS)\n#define LAPACK_zsytrs LAPACK_GLOBAL(zsytrs,ZSYTRS)\n#define LAPACK_chetrs LAPACK_GLOBAL(chetrs,CHETRS)\n#define LAPACK_zhetrs LAPACK_GLOBAL(zhetrs,ZHETRS)\n#define LAPACK_ssptrs LAPACK_GLOBAL(ssptrs,SSPTRS)\n#define LAPACK_dsptrs LAPACK_GLOBAL(dsptrs,DSPTRS)\n#define LAPACK_csptrs LAPACK_GLOBAL(csptrs,CSPTRS)\n#define LAPACK_zsptrs LAPACK_GLOBAL(zsptrs,ZSPTRS)\n#define LAPACK_chptrs LAPACK_GLOBAL(chptrs,CHPTRS)\n#define LAPACK_zhptrs LAPACK_GLOBAL(zhptrs,ZHPTRS)\n#define LAPACK_strtrs LAPACK_GLOBAL(strtrs,STRTRS)\n#define LAPACK_dtrtrs LAPACK_GLOBAL(dtrtrs,DTRTRS)\n#define LAPACK_ctrtrs LAPACK_GLOBAL(ctrtrs,CTRTRS)\n#define LAPACK_ztrtrs LAPACK_GLOBAL(ztrtrs,ZTRTRS)\n#define LAPACK_stptrs LAPACK_GLOBAL(stptrs,STPTRS)\n#define LAPACK_dtptrs LAPACK_GLOBAL(dtptrs,DTPTRS)\n#define LAPACK_ctptrs LAPACK_GLOBAL(ctptrs,CTPTRS)\n#define LAPACK_ztptrs LAPACK_GLOBAL(ztptrs,ZTPTRS)\n#define LAPACK_stbtrs LAPACK_GLOBAL(stbtrs,STBTRS)\n#define LAPACK_dtbtrs LAPACK_GLOBAL(dtbtrs,DTBTRS)\n#define LAPACK_ctbtrs LAPACK_GLOBAL(ctbtrs,CTBTRS)\n#define LAPACK_ztbtrs LAPACK_GLOBAL(ztbtrs,ZTBTRS)\n#define LAPACK_sgecon LAPACK_GLOBAL(sgecon,SGECON)\n#define LAPACK_dgecon LAPACK_GLOBAL(dgecon,DGECON)\n#define LAPACK_cgecon LAPACK_GLOBAL(cgecon,CGECON)\n#define LAPACK_zgecon LAPACK_GLOBAL(zgecon,ZGECON)\n#define LAPACK_sgbcon LAPACK_GLOBAL(sgbcon,SGBCON)\n#define LAPACK_dgbcon LAPACK_GLOBAL(dgbcon,DGBCON)\n#define LAPACK_cgbcon LAPACK_GLOBAL(cgbcon,CGBCON)\n#define LAPACK_zgbcon LAPACK_GLOBAL(zgbcon,ZGBCON)\n#define LAPACK_sgtcon LAPACK_GLOBAL(sgtcon,SGTCON)\n#define LAPACK_dgtcon LAPACK_GLOBAL(dgtcon,DGTCON)\n#define LAPACK_cgtcon LAPACK_GLOBAL(cgtcon,CGTCON)\n#define LAPACK_zgtcon LAPACK_GLOBAL(zgtcon,ZGTCON)\n#define LAPACK_spocon LAPACK_GLOBAL(spocon,SPOCON)\n#define LAPACK_dpocon LAPACK_GLOBAL(dpocon,DPOCON)\n#define LAPACK_cpocon LAPACK_GLOBAL(cpocon,CPOCON)\n#define LAPACK_zpocon LAPACK_GLOBAL(zpocon,ZPOCON)\n#define LAPACK_sppcon LAPACK_GLOBAL(sppcon,SPPCON)\n#define LAPACK_dppcon LAPACK_GLOBAL(dppcon,DPPCON)\n#define LAPACK_cppcon LAPACK_GLOBAL(cppcon,CPPCON)\n#define LAPACK_zppcon LAPACK_GLOBAL(zppcon,ZPPCON)\n#define LAPACK_spbcon LAPACK_GLOBAL(spbcon,SPBCON)\n#define LAPACK_dpbcon LAPACK_GLOBAL(dpbcon,DPBCON)\n#define LAPACK_cpbcon LAPACK_GLOBAL(cpbcon,CPBCON)\n#define LAPACK_zpbcon LAPACK_GLOBAL(zpbcon,ZPBCON)\n#define LAPACK_sptcon LAPACK_GLOBAL(sptcon,SPTCON)\n#define LAPACK_dptcon LAPACK_GLOBAL(dptcon,DPTCON)\n#define LAPACK_cptcon LAPACK_GLOBAL(cptcon,CPTCON)\n#define LAPACK_zptcon LAPACK_GLOBAL(zptcon,ZPTCON)\n#define LAPACK_ssycon LAPACK_GLOBAL(ssycon,SSYCON)\n#define LAPACK_dsycon LAPACK_GLOBAL(dsycon,DSYCON)\n#define LAPACK_csycon LAPACK_GLOBAL(csycon,CSYCON)\n#define LAPACK_zsycon LAPACK_GLOBAL(zsycon,ZSYCON)\n#define LAPACK_checon LAPACK_GLOBAL(checon,CHECON)\n#define LAPACK_zhecon LAPACK_GLOBAL(zhecon,ZHECON)\n#define LAPACK_sspcon LAPACK_GLOBAL(sspcon,SSPCON)\n#define LAPACK_dspcon LAPACK_GLOBAL(dspcon,DSPCON)\n#define LAPACK_cspcon LAPACK_GLOBAL(cspcon,CSPCON)\n#define LAPACK_zspcon LAPACK_GLOBAL(zspcon,ZSPCON)\n#define LAPACK_chpcon LAPACK_GLOBAL(chpcon,CHPCON)\n#define LAPACK_zhpcon LAPACK_GLOBAL(zhpcon,ZHPCON)\n#define LAPACK_strcon LAPACK_GLOBAL(strcon,STRCON)\n#define LAPACK_dtrcon LAPACK_GLOBAL(dtrcon,DTRCON)\n#define LAPACK_ctrcon LAPACK_GLOBAL(ctrcon,CTRCON)\n#define LAPACK_ztrcon LAPACK_GLOBAL(ztrcon,ZTRCON)\n#define LAPACK_stpcon LAPACK_GLOBAL(stpcon,STPCON)\n#define LAPACK_dtpcon LAPACK_GLOBAL(dtpcon,DTPCON)\n#define LAPACK_ctpcon LAPACK_GLOBAL(ctpcon,CTPCON)\n#define LAPACK_ztpcon LAPACK_GLOBAL(ztpcon,ZTPCON)\n#define LAPACK_stbcon LAPACK_GLOBAL(stbcon,STBCON)\n#define LAPACK_dtbcon LAPACK_GLOBAL(dtbcon,DTBCON)\n#define LAPACK_ctbcon LAPACK_GLOBAL(ctbcon,CTBCON)\n#define LAPACK_ztbcon LAPACK_GLOBAL(ztbcon,ZTBCON)\n#define LAPACK_sgerfs LAPACK_GLOBAL(sgerfs,SGERFS)\n#define LAPACK_dgerfs LAPACK_GLOBAL(dgerfs,DGERFS)\n#define LAPACK_cgerfs LAPACK_GLOBAL(cgerfs,CGERFS)\n#define LAPACK_zgerfs LAPACK_GLOBAL(zgerfs,ZGERFS)\n#define LAPACK_dgerfsx LAPACK_GLOBAL(dgerfsx,DGERFSX)\n#define LAPACK_sgerfsx LAPACK_GLOBAL(sgerfsx,SGERFSX)\n#define LAPACK_zgerfsx LAPACK_GLOBAL(zgerfsx,ZGERFSX)\n#define LAPACK_cgerfsx LAPACK_GLOBAL(cgerfsx,CGERFSX)\n#define LAPACK_sgbrfs LAPACK_GLOBAL(sgbrfs,SGBRFS)\n#define LAPACK_dgbrfs LAPACK_GLOBAL(dgbrfs,DGBRFS)\n#define LAPACK_cgbrfs LAPACK_GLOBAL(cgbrfs,CGBRFS)\n#define LAPACK_zgbrfs LAPACK_GLOBAL(zgbrfs,ZGBRFS)\n#define LAPACK_dgbrfsx LAPACK_GLOBAL(dgbrfsx,DGBRFSX)\n#define LAPACK_sgbrfsx LAPACK_GLOBAL(sgbrfsx,SGBRFSX)\n#define LAPACK_zgbrfsx LAPACK_GLOBAL(zgbrfsx,ZGBRFSX)\n#define LAPACK_cgbrfsx LAPACK_GLOBAL(cgbrfsx,CGBRFSX)\n#define LAPACK_sgtrfs LAPACK_GLOBAL(sgtrfs,SGTRFS)\n#define LAPACK_dgtrfs LAPACK_GLOBAL(dgtrfs,DGTRFS)\n#define LAPACK_cgtrfs LAPACK_GLOBAL(cgtrfs,CGTRFS)\n#define LAPACK_zgtrfs LAPACK_GLOBAL(zgtrfs,ZGTRFS)\n#define LAPACK_sporfs LAPACK_GLOBAL(sporfs,SPORFS)\n#define LAPACK_dporfs LAPACK_GLOBAL(dporfs,DPORFS)\n#define LAPACK_cporfs LAPACK_GLOBAL(cporfs,CPORFS)\n#define LAPACK_zporfs LAPACK_GLOBAL(zporfs,ZPORFS)\n#define LAPACK_dporfsx LAPACK_GLOBAL(dporfsx,DPORFSX)\n#define LAPACK_sporfsx LAPACK_GLOBAL(sporfsx,SPORFSX)\n#define LAPACK_zporfsx LAPACK_GLOBAL(zporfsx,ZPORFSX)\n#define LAPACK_cporfsx LAPACK_GLOBAL(cporfsx,CPORFSX)\n#define LAPACK_spprfs LAPACK_GLOBAL(spprfs,SPPRFS)\n#define LAPACK_dpprfs LAPACK_GLOBAL(dpprfs,DPPRFS)\n#define LAPACK_cpprfs LAPACK_GLOBAL(cpprfs,CPPRFS)\n#define LAPACK_zpprfs LAPACK_GLOBAL(zpprfs,ZPPRFS)\n#define LAPACK_spbrfs LAPACK_GLOBAL(spbrfs,SPBRFS)\n#define LAPACK_dpbrfs LAPACK_GLOBAL(dpbrfs,DPBRFS)\n#define LAPACK_cpbrfs LAPACK_GLOBAL(cpbrfs,CPBRFS)\n#define LAPACK_zpbrfs LAPACK_GLOBAL(zpbrfs,ZPBRFS)\n#define LAPACK_sptrfs LAPACK_GLOBAL(sptrfs,SPTRFS)\n#define LAPACK_dptrfs LAPACK_GLOBAL(dptrfs,DPTRFS)\n#define LAPACK_cptrfs LAPACK_GLOBAL(cptrfs,CPTRFS)\n#define LAPACK_zptrfs LAPACK_GLOBAL(zptrfs,ZPTRFS)\n#define LAPACK_ssyrfs LAPACK_GLOBAL(ssyrfs,SSYRFS)\n#define LAPACK_dsyrfs LAPACK_GLOBAL(dsyrfs,DSYRFS)\n#define LAPACK_csyrfs LAPACK_GLOBAL(csyrfs,CSYRFS)\n#define LAPACK_zsyrfs LAPACK_GLOBAL(zsyrfs,ZSYRFS)\n#define LAPACK_dsyrfsx LAPACK_GLOBAL(dsyrfsx,DSYRFSX)\n#define LAPACK_ssyrfsx LAPACK_GLOBAL(ssyrfsx,SSYRFSX)\n#define LAPACK_zsyrfsx LAPACK_GLOBAL(zsyrfsx,ZSYRFSX)\n#define LAPACK_csyrfsx LAPACK_GLOBAL(csyrfsx,CSYRFSX)\n#define LAPACK_cherfs LAPACK_GLOBAL(cherfs,CHERFS)\n#define LAPACK_zherfs LAPACK_GLOBAL(zherfs,ZHERFS)\n#define LAPACK_zherfsx LAPACK_GLOBAL(zherfsx,ZHERFSX)\n#define LAPACK_cherfsx LAPACK_GLOBAL(cherfsx,CHERFSX)\n#define LAPACK_ssprfs LAPACK_GLOBAL(ssprfs,SSPRFS)\n#define LAPACK_dsprfs LAPACK_GLOBAL(dsprfs,DSPRFS)\n#define LAPACK_csprfs LAPACK_GLOBAL(csprfs,CSPRFS)\n#define LAPACK_zsprfs LAPACK_GLOBAL(zsprfs,ZSPRFS)\n#define LAPACK_chprfs LAPACK_GLOBAL(chprfs,CHPRFS)\n#define LAPACK_zhprfs LAPACK_GLOBAL(zhprfs,ZHPRFS)\n#define LAPACK_strrfs LAPACK_GLOBAL(strrfs,STRRFS)\n#define LAPACK_dtrrfs LAPACK_GLOBAL(dtrrfs,DTRRFS)\n#define LAPACK_ctrrfs LAPACK_GLOBAL(ctrrfs,CTRRFS)\n#define LAPACK_ztrrfs LAPACK_GLOBAL(ztrrfs,ZTRRFS)\n#define LAPACK_stprfs LAPACK_GLOBAL(stprfs,STPRFS)\n#define LAPACK_dtprfs LAPACK_GLOBAL(dtprfs,DTPRFS)\n#define LAPACK_ctprfs LAPACK_GLOBAL(ctprfs,CTPRFS)\n#define LAPACK_ztprfs LAPACK_GLOBAL(ztprfs,ZTPRFS)\n#define LAPACK_stbrfs LAPACK_GLOBAL(stbrfs,STBRFS)\n#define LAPACK_dtbrfs LAPACK_GLOBAL(dtbrfs,DTBRFS)\n#define LAPACK_ctbrfs LAPACK_GLOBAL(ctbrfs,CTBRFS)\n#define LAPACK_ztbrfs LAPACK_GLOBAL(ztbrfs,ZTBRFS)\n#define LAPACK_sgetri LAPACK_GLOBAL(sgetri,SGETRI)\n#define LAPACK_dgetri LAPACK_GLOBAL(dgetri,DGETRI)\n#define LAPACK_cgetri LAPACK_GLOBAL(cgetri,CGETRI)\n#define LAPACK_zgetri LAPACK_GLOBAL(zgetri,ZGETRI)\n#define LAPACK_spotri LAPACK_GLOBAL(spotri,SPOTRI)\n#define LAPACK_dpotri LAPACK_GLOBAL(dpotri,DPOTRI)\n#define LAPACK_cpotri LAPACK_GLOBAL(cpotri,CPOTRI)\n#define LAPACK_zpotri LAPACK_GLOBAL(zpotri,ZPOTRI)\n#define LAPACK_dpftri LAPACK_GLOBAL(dpftri,DPFTRI)\n#define LAPACK_spftri LAPACK_GLOBAL(spftri,SPFTRI)\n#define LAPACK_zpftri LAPACK_GLOBAL(zpftri,ZPFTRI)\n#define LAPACK_cpftri LAPACK_GLOBAL(cpftri,CPFTRI)\n#define LAPACK_spptri LAPACK_GLOBAL(spptri,SPPTRI)\n#define LAPACK_dpptri LAPACK_GLOBAL(dpptri,DPPTRI)\n#define LAPACK_cpptri LAPACK_GLOBAL(cpptri,CPPTRI)\n#define LAPACK_zpptri LAPACK_GLOBAL(zpptri,ZPPTRI)\n#define LAPACK_ssytri LAPACK_GLOBAL(ssytri,SSYTRI)\n#define LAPACK_dsytri LAPACK_GLOBAL(dsytri,DSYTRI)\n#define LAPACK_csytri LAPACK_GLOBAL(csytri,CSYTRI)\n#define LAPACK_zsytri LAPACK_GLOBAL(zsytri,ZSYTRI)\n#define LAPACK_chetri LAPACK_GLOBAL(chetri,CHETRI)\n#define LAPACK_zhetri LAPACK_GLOBAL(zhetri,ZHETRI)\n#define LAPACK_ssptri LAPACK_GLOBAL(ssptri,SSPTRI)\n#define LAPACK_dsptri LAPACK_GLOBAL(dsptri,DSPTRI)\n#define LAPACK_csptri LAPACK_GLOBAL(csptri,CSPTRI)\n#define LAPACK_zsptri LAPACK_GLOBAL(zsptri,ZSPTRI)\n#define LAPACK_chptri LAPACK_GLOBAL(chptri,CHPTRI)\n#define LAPACK_zhptri LAPACK_GLOBAL(zhptri,ZHPTRI)\n#define LAPACK_strtri LAPACK_GLOBAL(strtri,STRTRI)\n#define LAPACK_dtrtri LAPACK_GLOBAL(dtrtri,DTRTRI)\n#define LAPACK_ctrtri LAPACK_GLOBAL(ctrtri,CTRTRI)\n#define LAPACK_ztrtri LAPACK_GLOBAL(ztrtri,ZTRTRI)\n#define LAPACK_dtftri LAPACK_GLOBAL(dtftri,DTFTRI)\n#define LAPACK_stftri LAPACK_GLOBAL(stftri,STFTRI)\n#define LAPACK_ztftri LAPACK_GLOBAL(ztftri,ZTFTRI)\n#define LAPACK_ctftri LAPACK_GLOBAL(ctftri,CTFTRI)\n#define LAPACK_stptri LAPACK_GLOBAL(stptri,STPTRI)\n#define LAPACK_dtptri LAPACK_GLOBAL(dtptri,DTPTRI)\n#define LAPACK_ctptri LAPACK_GLOBAL(ctptri,CTPTRI)\n#define LAPACK_ztptri LAPACK_GLOBAL(ztptri,ZTPTRI)\n#define LAPACK_sgeequ LAPACK_GLOBAL(sgeequ,SGEEQU)\n#define LAPACK_dgeequ LAPACK_GLOBAL(dgeequ,DGEEQU)\n#define LAPACK_cgeequ LAPACK_GLOBAL(cgeequ,CGEEQU)\n#define LAPACK_zgeequ LAPACK_GLOBAL(zgeequ,ZGEEQU)\n#define LAPACK_dgeequb LAPACK_GLOBAL(dgeequb,DGEEQUB)\n#define LAPACK_sgeequb LAPACK_GLOBAL(sgeequb,SGEEQUB)\n#define LAPACK_zgeequb LAPACK_GLOBAL(zgeequb,ZGEEQUB)\n#define LAPACK_cgeequb LAPACK_GLOBAL(cgeequb,CGEEQUB)\n#define LAPACK_sgbequ LAPACK_GLOBAL(sgbequ,SGBEQU)\n#define LAPACK_dgbequ LAPACK_GLOBAL(dgbequ,DGBEQU)\n#define LAPACK_cgbequ LAPACK_GLOBAL(cgbequ,CGBEQU)\n#define LAPACK_zgbequ LAPACK_GLOBAL(zgbequ,ZGBEQU)\n#define LAPACK_dgbequb LAPACK_GLOBAL(dgbequb,DGBEQUB)\n#define LAPACK_sgbequb LAPACK_GLOBAL(sgbequb,SGBEQUB)\n#define LAPACK_zgbequb LAPACK_GLOBAL(zgbequb,ZGBEQUB)\n#define LAPACK_cgbequb LAPACK_GLOBAL(cgbequb,CGBEQUB)\n#define LAPACK_spoequ LAPACK_GLOBAL(spoequ,SPOEQU)\n#define LAPACK_dpoequ LAPACK_GLOBAL(dpoequ,DPOEQU)\n#define LAPACK_cpoequ LAPACK_GLOBAL(cpoequ,CPOEQU)\n#define LAPACK_zpoequ LAPACK_GLOBAL(zpoequ,ZPOEQU)\n#define LAPACK_dpoequb LAPACK_GLOBAL(dpoequb,DPOEQUB)\n#define LAPACK_spoequb LAPACK_GLOBAL(spoequb,SPOEQUB)\n#define LAPACK_zpoequb LAPACK_GLOBAL(zpoequb,ZPOEQUB)\n#define LAPACK_cpoequb LAPACK_GLOBAL(cpoequb,CPOEQUB)\n#define LAPACK_sppequ LAPACK_GLOBAL(sppequ,SPPEQU)\n#define LAPACK_dppequ LAPACK_GLOBAL(dppequ,DPPEQU)\n#define LAPACK_cppequ LAPACK_GLOBAL(cppequ,CPPEQU)\n#define LAPACK_zppequ LAPACK_GLOBAL(zppequ,ZPPEQU)\n#define LAPACK_spbequ LAPACK_GLOBAL(spbequ,SPBEQU)\n#define LAPACK_dpbequ LAPACK_GLOBAL(dpbequ,DPBEQU)\n#define LAPACK_cpbequ LAPACK_GLOBAL(cpbequ,CPBEQU)\n#define LAPACK_zpbequ LAPACK_GLOBAL(zpbequ,ZPBEQU)\n#define LAPACK_dsyequb LAPACK_GLOBAL(dsyequb,DSYEQUB)\n#define LAPACK_ssyequb LAPACK_GLOBAL(ssyequb,SSYEQUB)\n#define LAPACK_zsyequb LAPACK_GLOBAL(zsyequb,ZSYEQUB)\n#define LAPACK_csyequb LAPACK_GLOBAL(csyequb,CSYEQUB)\n#define LAPACK_zheequb LAPACK_GLOBAL(zheequb,ZHEEQUB)\n#define LAPACK_cheequb LAPACK_GLOBAL(cheequb,CHEEQUB)\n#define LAPACK_sgesv LAPACK_GLOBAL(sgesv,SGESV)\n#define LAPACK_dgesv LAPACK_GLOBAL(dgesv,DGESV)\n#define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV)\n#define LAPACK_zgesv LAPACK_GLOBAL(zgesv,ZGESV)\n#define LAPACK_dsgesv LAPACK_GLOBAL(dsgesv,DSGESV)\n#define LAPACK_zcgesv LAPACK_GLOBAL(zcgesv,ZCGESV)\n#define LAPACK_sgesvx LAPACK_GLOBAL(sgesvx,SGESVX)\n#define LAPACK_dgesvx LAPACK_GLOBAL(dgesvx,DGESVX)\n#define LAPACK_cgesvx LAPACK_GLOBAL(cgesvx,CGESVX)\n#define LAPACK_zgesvx LAPACK_GLOBAL(zgesvx,ZGESVX)\n#define LAPACK_dgesvxx LAPACK_GLOBAL(dgesvxx,DGESVXX)\n#define LAPACK_sgesvxx LAPACK_GLOBAL(sgesvxx,SGESVXX)\n#define LAPACK_zgesvxx LAPACK_GLOBAL(zgesvxx,ZGESVXX)\n#define LAPACK_cgesvxx LAPACK_GLOBAL(cgesvxx,CGESVXX)\n#define LAPACK_sgbsv LAPACK_GLOBAL(sgbsv,SGBSV)\n#define LAPACK_dgbsv LAPACK_GLOBAL(dgbsv,DGBSV)\n#define LAPACK_cgbsv LAPACK_GLOBAL(cgbsv,CGBSV)\n#define LAPACK_zgbsv LAPACK_GLOBAL(zgbsv,ZGBSV)\n#define LAPACK_sgbsvx LAPACK_GLOBAL(sgbsvx,SGBSVX)\n#define LAPACK_dgbsvx LAPACK_GLOBAL(dgbsvx,DGBSVX)\n#define LAPACK_cgbsvx LAPACK_GLOBAL(cgbsvx,CGBSVX)\n#define LAPACK_zgbsvx LAPACK_GLOBAL(zgbsvx,ZGBSVX)\n#define LAPACK_dgbsvxx LAPACK_GLOBAL(dgbsvxx,DGBSVXX)\n#define LAPACK_sgbsvxx LAPACK_GLOBAL(sgbsvxx,SGBSVXX)\n#define LAPACK_zgbsvxx LAPACK_GLOBAL(zgbsvxx,ZGBSVXX)\n#define LAPACK_cgbsvxx LAPACK_GLOBAL(cgbsvxx,CGBSVXX)\n#define LAPACK_sgtsv LAPACK_GLOBAL(sgtsv,SGTSV)\n#define LAPACK_dgtsv LAPACK_GLOBAL(dgtsv,DGTSV)\n#define LAPACK_cgtsv LAPACK_GLOBAL(cgtsv,CGTSV)\n#define LAPACK_zgtsv LAPACK_GLOBAL(zgtsv,ZGTSV)\n#define LAPACK_sgtsvx LAPACK_GLOBAL(sgtsvx,SGTSVX)\n#define LAPACK_dgtsvx LAPACK_GLOBAL(dgtsvx,DGTSVX)\n#define LAPACK_cgtsvx LAPACK_GLOBAL(cgtsvx,CGTSVX)\n#define LAPACK_zgtsvx LAPACK_GLOBAL(zgtsvx,ZGTSVX)\n#define LAPACK_sposv LAPACK_GLOBAL(sposv,SPOSV)\n#define LAPACK_dposv LAPACK_GLOBAL(dposv,DPOSV)\n#define LAPACK_cposv LAPACK_GLOBAL(cposv,CPOSV)\n#define LAPACK_zposv LAPACK_GLOBAL(zposv,ZPOSV)\n#define LAPACK_dsposv LAPACK_GLOBAL(dsposv,DSPOSV)\n#define LAPACK_zcposv LAPACK_GLOBAL(zcposv,ZCPOSV)\n#define LAPACK_sposvx LAPACK_GLOBAL(sposvx,SPOSVX)\n#define LAPACK_dposvx LAPACK_GLOBAL(dposvx,DPOSVX)\n#define LAPACK_cposvx LAPACK_GLOBAL(cposvx,CPOSVX)\n#define LAPACK_zposvx LAPACK_GLOBAL(zposvx,ZPOSVX)\n#define LAPACK_dposvxx LAPACK_GLOBAL(dposvxx,DPOSVXX)\n#define LAPACK_sposvxx LAPACK_GLOBAL(sposvxx,SPOSVXX)\n#define LAPACK_zposvxx LAPACK_GLOBAL(zposvxx,ZPOSVXX)\n#define LAPACK_cposvxx LAPACK_GLOBAL(cposvxx,CPOSVXX)\n#define LAPACK_sppsv LAPACK_GLOBAL(sppsv,SPPSV)\n#define LAPACK_dppsv LAPACK_GLOBAL(dppsv,DPPSV)\n#define LAPACK_cppsv LAPACK_GLOBAL(cppsv,CPPSV)\n#define LAPACK_zppsv LAPACK_GLOBAL(zppsv,ZPPSV)\n#define LAPACK_sppsvx LAPACK_GLOBAL(sppsvx,SPPSVX)\n#define LAPACK_dppsvx LAPACK_GLOBAL(dppsvx,DPPSVX)\n#define LAPACK_cppsvx LAPACK_GLOBAL(cppsvx,CPPSVX)\n#define LAPACK_zppsvx LAPACK_GLOBAL(zppsvx,ZPPSVX)\n#define LAPACK_spbsv LAPACK_GLOBAL(spbsv,SPBSV)\n#define LAPACK_dpbsv LAPACK_GLOBAL(dpbsv,DPBSV)\n#define LAPACK_cpbsv LAPACK_GLOBAL(cpbsv,CPBSV)\n#define LAPACK_zpbsv LAPACK_GLOBAL(zpbsv,ZPBSV)\n#define LAPACK_spbsvx LAPACK_GLOBAL(spbsvx,SPBSVX)\n#define LAPACK_dpbsvx LAPACK_GLOBAL(dpbsvx,DPBSVX)\n#define LAPACK_cpbsvx LAPACK_GLOBAL(cpbsvx,CPBSVX)\n#define LAPACK_zpbsvx LAPACK_GLOBAL(zpbsvx,ZPBSVX)\n#define LAPACK_sptsv LAPACK_GLOBAL(sptsv,SPTSV)\n#define LAPACK_dptsv LAPACK_GLOBAL(dptsv,DPTSV)\n#define LAPACK_cptsv LAPACK_GLOBAL(cptsv,CPTSV)\n#define LAPACK_zptsv LAPACK_GLOBAL(zptsv,ZPTSV)\n#define LAPACK_sptsvx LAPACK_GLOBAL(sptsvx,SPTSVX)\n#define LAPACK_dptsvx LAPACK_GLOBAL(dptsvx,DPTSVX)\n#define LAPACK_cptsvx LAPACK_GLOBAL(cptsvx,CPTSVX)\n#define LAPACK_zptsvx LAPACK_GLOBAL(zptsvx,ZPTSVX)\n#define LAPACK_ssysv LAPACK_GLOBAL(ssysv,SSYSV)\n#define LAPACK_dsysv LAPACK_GLOBAL(dsysv,DSYSV)\n#define LAPACK_csysv LAPACK_GLOBAL(csysv,CSYSV)\n#define LAPACK_zsysv LAPACK_GLOBAL(zsysv,ZSYSV)\n#define LAPACK_ssysvx LAPACK_GLOBAL(ssysvx,SSYSVX)\n#define LAPACK_dsysvx LAPACK_GLOBAL(dsysvx,DSYSVX)\n#define LAPACK_csysvx LAPACK_GLOBAL(csysvx,CSYSVX)\n#define LAPACK_zsysvx LAPACK_GLOBAL(zsysvx,ZSYSVX)\n#define LAPACK_dsysvxx LAPACK_GLOBAL(dsysvxx,DSYSVXX)\n#define LAPACK_ssysvxx LAPACK_GLOBAL(ssysvxx,SSYSVXX)\n#define LAPACK_zsysvxx LAPACK_GLOBAL(zsysvxx,ZSYSVXX)\n#define LAPACK_csysvxx LAPACK_GLOBAL(csysvxx,CSYSVXX)\n#define LAPACK_chesv LAPACK_GLOBAL(chesv,CHESV)\n#define LAPACK_zhesv LAPACK_GLOBAL(zhesv,ZHESV)\n#define LAPACK_chesvx LAPACK_GLOBAL(chesvx,CHESVX)\n#define LAPACK_zhesvx LAPACK_GLOBAL(zhesvx,ZHESVX)\n#define LAPACK_zhesvxx LAPACK_GLOBAL(zhesvxx,ZHESVXX)\n#define LAPACK_chesvxx LAPACK_GLOBAL(chesvxx,CHESVXX)\n#define LAPACK_sspsv LAPACK_GLOBAL(sspsv,SSPSV)\n#define LAPACK_dspsv LAPACK_GLOBAL(dspsv,DSPSV)\n#define LAPACK_cspsv LAPACK_GLOBAL(cspsv,CSPSV)\n#define LAPACK_zspsv LAPACK_GLOBAL(zspsv,ZSPSV)\n#define LAPACK_sspsvx LAPACK_GLOBAL(sspsvx,SSPSVX)\n#define LAPACK_dspsvx LAPACK_GLOBAL(dspsvx,DSPSVX)\n#define LAPACK_cspsvx LAPACK_GLOBAL(cspsvx,CSPSVX)\n#define LAPACK_zspsvx LAPACK_GLOBAL(zspsvx,ZSPSVX)\n#define LAPACK_chpsv LAPACK_GLOBAL(chpsv,CHPSV)\n#define LAPACK_zhpsv LAPACK_GLOBAL(zhpsv,ZHPSV)\n#define LAPACK_chpsvx LAPACK_GLOBAL(chpsvx,CHPSVX)\n#define LAPACK_zhpsvx LAPACK_GLOBAL(zhpsvx,ZHPSVX)\n#define LAPACK_sgeqrf LAPACK_GLOBAL(sgeqrf,SGEQRF)\n#define LAPACK_dgeqrf LAPACK_GLOBAL(dgeqrf,DGEQRF)\n#define LAPACK_cgeqrf LAPACK_GLOBAL(cgeqrf,CGEQRF)\n#define LAPACK_zgeqrf LAPACK_GLOBAL(zgeqrf,ZGEQRF)\n#define LAPACK_sgeqpf LAPACK_GLOBAL(sgeqpf,SGEQPF)\n#define LAPACK_dgeqpf LAPACK_GLOBAL(dgeqpf,DGEQPF)\n#define LAPACK_cgeqpf LAPACK_GLOBAL(cgeqpf,CGEQPF)\n#define LAPACK_zgeqpf LAPACK_GLOBAL(zgeqpf,ZGEQPF)\n#define LAPACK_sgeqp3 LAPACK_GLOBAL(sgeqp3,SGEQP3)\n#define LAPACK_dgeqp3 LAPACK_GLOBAL(dgeqp3,DGEQP3)\n#define LAPACK_cgeqp3 LAPACK_GLOBAL(cgeqp3,CGEQP3)\n#define LAPACK_zgeqp3 LAPACK_GLOBAL(zgeqp3,ZGEQP3)\n#define LAPACK_sorgqr LAPACK_GLOBAL(sorgqr,SORGQR)\n#define LAPACK_dorgqr LAPACK_GLOBAL(dorgqr,DORGQR)\n#define LAPACK_sormqr LAPACK_GLOBAL(sormqr,SORMQR)\n#define LAPACK_dormqr LAPACK_GLOBAL(dormqr,DORMQR)\n#define LAPACK_cungqr LAPACK_GLOBAL(cungqr,CUNGQR)\n#define LAPACK_zungqr LAPACK_GLOBAL(zungqr,ZUNGQR)\n#define LAPACK_cunmqr LAPACK_GLOBAL(cunmqr,CUNMQR)\n#define LAPACK_zunmqr LAPACK_GLOBAL(zunmqr,ZUNMQR)\n#define LAPACK_sgelqf LAPACK_GLOBAL(sgelqf,SGELQF)\n#define LAPACK_dgelqf LAPACK_GLOBAL(dgelqf,DGELQF)\n#define LAPACK_cgelqf LAPACK_GLOBAL(cgelqf,CGELQF)\n#define LAPACK_zgelqf LAPACK_GLOBAL(zgelqf,ZGELQF)\n#define LAPACK_sorglq LAPACK_GLOBAL(sorglq,SORGLQ)\n#define LAPACK_dorglq LAPACK_GLOBAL(dorglq,DORGLQ)\n#define LAPACK_sormlq LAPACK_GLOBAL(sormlq,SORMLQ)\n#define LAPACK_dormlq LAPACK_GLOBAL(dormlq,DORMLQ)\n#define LAPACK_cunglq LAPACK_GLOBAL(cunglq,CUNGLQ)\n#define LAPACK_zunglq LAPACK_GLOBAL(zunglq,ZUNGLQ)\n#define LAPACK_cunmlq LAPACK_GLOBAL(cunmlq,CUNMLQ)\n#define LAPACK_zunmlq LAPACK_GLOBAL(zunmlq,ZUNMLQ)\n#define LAPACK_sgeqlf LAPACK_GLOBAL(sgeqlf,SGEQLF)\n#define LAPACK_dgeqlf LAPACK_GLOBAL(dgeqlf,DGEQLF)\n#define LAPACK_cgeqlf LAPACK_GLOBAL(cgeqlf,CGEQLF)\n#define LAPACK_zgeqlf LAPACK_GLOBAL(zgeqlf,ZGEQLF)\n#define LAPACK_sorgql LAPACK_GLOBAL(sorgql,SORGQL)\n#define LAPACK_dorgql LAPACK_GLOBAL(dorgql,DORGQL)\n#define LAPACK_cungql LAPACK_GLOBAL(cungql,CUNGQL)\n#define LAPACK_zungql LAPACK_GLOBAL(zungql,ZUNGQL)\n#define LAPACK_sormql LAPACK_GLOBAL(sormql,SORMQL)\n#define LAPACK_dormql LAPACK_GLOBAL(dormql,DORMQL)\n#define LAPACK_cunmql LAPACK_GLOBAL(cunmql,CUNMQL)\n#define LAPACK_zunmql LAPACK_GLOBAL(zunmql,ZUNMQL)\n#define LAPACK_sgerqf LAPACK_GLOBAL(sgerqf,SGERQF)\n#define LAPACK_dgerqf LAPACK_GLOBAL(dgerqf,DGERQF)\n#define LAPACK_cgerqf LAPACK_GLOBAL(cgerqf,CGERQF)\n#define LAPACK_zgerqf LAPACK_GLOBAL(zgerqf,ZGERQF)\n#define LAPACK_sorgrq LAPACK_GLOBAL(sorgrq,SORGRQ)\n#define LAPACK_dorgrq LAPACK_GLOBAL(dorgrq,DORGRQ)\n#define LAPACK_cungrq LAPACK_GLOBAL(cungrq,CUNGRQ)\n#define LAPACK_zungrq LAPACK_GLOBAL(zungrq,ZUNGRQ)\n#define LAPACK_sormrq LAPACK_GLOBAL(sormrq,SORMRQ)\n#define LAPACK_dormrq LAPACK_GLOBAL(dormrq,DORMRQ)\n#define LAPACK_cunmrq LAPACK_GLOBAL(cunmrq,CUNMRQ)\n#define LAPACK_zunmrq LAPACK_GLOBAL(zunmrq,ZUNMRQ)\n#define LAPACK_stzrzf LAPACK_GLOBAL(stzrzf,STZRZF)\n#define LAPACK_dtzrzf LAPACK_GLOBAL(dtzrzf,DTZRZF)\n#define LAPACK_ctzrzf LAPACK_GLOBAL(ctzrzf,CTZRZF)\n#define LAPACK_ztzrzf LAPACK_GLOBAL(ztzrzf,ZTZRZF)\n#define LAPACK_sormrz LAPACK_GLOBAL(sormrz,SORMRZ)\n#define LAPACK_dormrz LAPACK_GLOBAL(dormrz,DORMRZ)\n#define LAPACK_cunmrz LAPACK_GLOBAL(cunmrz,CUNMRZ)\n#define LAPACK_zunmrz LAPACK_GLOBAL(zunmrz,ZUNMRZ)\n#define LAPACK_sggqrf LAPACK_GLOBAL(sggqrf,SGGQRF)\n#define LAPACK_dggqrf LAPACK_GLOBAL(dggqrf,DGGQRF)\n#define LAPACK_cggqrf LAPACK_GLOBAL(cggqrf,CGGQRF)\n#define LAPACK_zggqrf LAPACK_GLOBAL(zggqrf,ZGGQRF)\n#define LAPACK_sggrqf LAPACK_GLOBAL(sggrqf,SGGRQF)\n#define LAPACK_dggrqf LAPACK_GLOBAL(dggrqf,DGGRQF)\n#define LAPACK_cggrqf LAPACK_GLOBAL(cggrqf,CGGRQF)\n#define LAPACK_zggrqf LAPACK_GLOBAL(zggrqf,ZGGRQF)\n#define LAPACK_sgebrd LAPACK_GLOBAL(sgebrd,SGEBRD)\n#define LAPACK_dgebrd LAPACK_GLOBAL(dgebrd,DGEBRD)\n#define LAPACK_cgebrd LAPACK_GLOBAL(cgebrd,CGEBRD)\n#define LAPACK_zgebrd LAPACK_GLOBAL(zgebrd,ZGEBRD)\n#define LAPACK_sgbbrd LAPACK_GLOBAL(sgbbrd,SGBBRD)\n#define LAPACK_dgbbrd LAPACK_GLOBAL(dgbbrd,DGBBRD)\n#define LAPACK_cgbbrd LAPACK_GLOBAL(cgbbrd,CGBBRD)\n#define LAPACK_zgbbrd LAPACK_GLOBAL(zgbbrd,ZGBBRD)\n#define LAPACK_sorgbr LAPACK_GLOBAL(sorgbr,SORGBR)\n#define LAPACK_dorgbr LAPACK_GLOBAL(dorgbr,DORGBR)\n#define LAPACK_sormbr LAPACK_GLOBAL(sormbr,SORMBR)\n#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR)\n#define LAPACK_cungbr LAPACK_GLOBAL(cungbr,CUNGBR)\n#define LAPACK_zungbr LAPACK_GLOBAL(zungbr,ZUNGBR)\n#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR)\n#define LAPACK_zunmbr LAPACK_GLOBAL(zunmbr,ZUNMBR)\n#define LAPACK_sbdsqr LAPACK_GLOBAL(sbdsqr,SBDSQR)\n#define LAPACK_dbdsqr LAPACK_GLOBAL(dbdsqr,DBDSQR)\n#define LAPACK_cbdsqr LAPACK_GLOBAL(cbdsqr,CBDSQR)\n#define LAPACK_zbdsqr LAPACK_GLOBAL(zbdsqr,ZBDSQR)\n#define LAPACK_sbdsdc LAPACK_GLOBAL(sbdsdc,SBDSDC)\n#define LAPACK_dbdsdc LAPACK_GLOBAL(dbdsdc,DBDSDC)\n#define LAPACK_ssytrd LAPACK_GLOBAL(ssytrd,SSYTRD)\n#define LAPACK_dsytrd LAPACK_GLOBAL(dsytrd,DSYTRD)\n#define LAPACK_sorgtr LAPACK_GLOBAL(sorgtr,SORGTR)\n#define LAPACK_dorgtr LAPACK_GLOBAL(dorgtr,DORGTR)\n#define LAPACK_sormtr LAPACK_GLOBAL(sormtr,SORMTR)\n#define LAPACK_dormtr LAPACK_GLOBAL(dormtr,DORMTR)\n#define LAPACK_chetrd LAPACK_GLOBAL(chetrd,CHETRD)\n#define LAPACK_zhetrd LAPACK_GLOBAL(zhetrd,ZHETRD)\n#define LAPACK_cungtr LAPACK_GLOBAL(cungtr,CUNGTR)\n#define LAPACK_zungtr LAPACK_GLOBAL(zungtr,ZUNGTR)\n#define LAPACK_cunmtr LAPACK_GLOBAL(cunmtr,CUNMTR)\n#define LAPACK_zunmtr LAPACK_GLOBAL(zunmtr,ZUNMTR)\n#define LAPACK_ssptrd LAPACK_GLOBAL(ssptrd,SSPTRD)\n#define LAPACK_dsptrd LAPACK_GLOBAL(dsptrd,DSPTRD)\n#define LAPACK_sopgtr LAPACK_GLOBAL(sopgtr,SOPGTR)\n#define LAPACK_dopgtr LAPACK_GLOBAL(dopgtr,DOPGTR)\n#define LAPACK_sopmtr LAPACK_GLOBAL(sopmtr,SOPMTR)\n#define LAPACK_dopmtr LAPACK_GLOBAL(dopmtr,DOPMTR)\n#define LAPACK_chptrd LAPACK_GLOBAL(chptrd,CHPTRD)\n#define LAPACK_zhptrd LAPACK_GLOBAL(zhptrd,ZHPTRD)\n#define LAPACK_cupgtr LAPACK_GLOBAL(cupgtr,CUPGTR)\n#define LAPACK_zupgtr LAPACK_GLOBAL(zupgtr,ZUPGTR)\n#define LAPACK_cupmtr LAPACK_GLOBAL(cupmtr,CUPMTR)\n#define LAPACK_zupmtr LAPACK_GLOBAL(zupmtr,ZUPMTR)\n#define LAPACK_ssbtrd LAPACK_GLOBAL(ssbtrd,SSBTRD)\n#define LAPACK_dsbtrd LAPACK_GLOBAL(dsbtrd,DSBTRD)\n#define LAPACK_chbtrd LAPACK_GLOBAL(chbtrd,CHBTRD)\n#define LAPACK_zhbtrd LAPACK_GLOBAL(zhbtrd,ZHBTRD)\n#define LAPACK_ssterf LAPACK_GLOBAL(ssterf,SSTERF)\n#define LAPACK_dsterf LAPACK_GLOBAL(dsterf,DSTERF)\n#define LAPACK_ssteqr LAPACK_GLOBAL(ssteqr,SSTEQR)\n#define LAPACK_dsteqr LAPACK_GLOBAL(dsteqr,DSTEQR)\n#define LAPACK_csteqr LAPACK_GLOBAL(csteqr,CSTEQR)\n#define LAPACK_zsteqr LAPACK_GLOBAL(zsteqr,ZSTEQR)\n#define LAPACK_sstemr LAPACK_GLOBAL(sstemr,SSTEMR)\n#define LAPACK_dstemr LAPACK_GLOBAL(dstemr,DSTEMR)\n#define LAPACK_cstemr LAPACK_GLOBAL(cstemr,CSTEMR)\n#define LAPACK_zstemr LAPACK_GLOBAL(zstemr,ZSTEMR)\n#define LAPACK_sstedc LAPACK_GLOBAL(sstedc,SSTEDC)\n#define LAPACK_dstedc LAPACK_GLOBAL(dstedc,DSTEDC)\n#define LAPACK_cstedc LAPACK_GLOBAL(cstedc,CSTEDC)\n#define LAPACK_zstedc LAPACK_GLOBAL(zstedc,ZSTEDC)\n#define LAPACK_sstegr LAPACK_GLOBAL(sstegr,SSTEGR)\n#define LAPACK_dstegr LAPACK_GLOBAL(dstegr,DSTEGR)\n#define LAPACK_cstegr LAPACK_GLOBAL(cstegr,CSTEGR)\n#define LAPACK_zstegr LAPACK_GLOBAL(zstegr,ZSTEGR)\n#define LAPACK_spteqr LAPACK_GLOBAL(spteqr,SPTEQR)\n#define LAPACK_dpteqr LAPACK_GLOBAL(dpteqr,DPTEQR)\n#define LAPACK_cpteqr LAPACK_GLOBAL(cpteqr,CPTEQR)\n#define LAPACK_zpteqr LAPACK_GLOBAL(zpteqr,ZPTEQR)\n#define LAPACK_sstebz LAPACK_GLOBAL(sstebz,SSTEBZ)\n#define LAPACK_dstebz LAPACK_GLOBAL(dstebz,DSTEBZ)\n#define LAPACK_sstein LAPACK_GLOBAL(sstein,SSTEIN)\n#define LAPACK_dstein LAPACK_GLOBAL(dstein,DSTEIN)\n#define LAPACK_cstein LAPACK_GLOBAL(cstein,CSTEIN)\n#define LAPACK_zstein LAPACK_GLOBAL(zstein,ZSTEIN)\n#define LAPACK_sdisna LAPACK_GLOBAL(sdisna,SDISNA)\n#define LAPACK_ddisna LAPACK_GLOBAL(ddisna,DDISNA)\n#define LAPACK_ssygst LAPACK_GLOBAL(ssygst,SSYGST)\n#define LAPACK_dsygst LAPACK_GLOBAL(dsygst,DSYGST)\n#define LAPACK_chegst LAPACK_GLOBAL(chegst,CHEGST)\n#define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST)\n#define LAPACK_sspgst LAPACK_GLOBAL(sspgst,SSPGST)\n#define LAPACK_dspgst LAPACK_GLOBAL(dspgst,DSPGST)\n#define LAPACK_chpgst LAPACK_GLOBAL(chpgst,CHPGST)\n#define LAPACK_zhpgst LAPACK_GLOBAL(zhpgst,ZHPGST)\n#define LAPACK_ssbgst LAPACK_GLOBAL(ssbgst,SSBGST)\n#define LAPACK_dsbgst LAPACK_GLOBAL(dsbgst,DSBGST)\n#define LAPACK_chbgst LAPACK_GLOBAL(chbgst,CHBGST)\n#define LAPACK_zhbgst LAPACK_GLOBAL(zhbgst,ZHBGST)\n#define LAPACK_spbstf LAPACK_GLOBAL(spbstf,SPBSTF)\n#define LAPACK_dpbstf LAPACK_GLOBAL(dpbstf,DPBSTF)\n#define LAPACK_cpbstf LAPACK_GLOBAL(cpbstf,CPBSTF)\n#define LAPACK_zpbstf LAPACK_GLOBAL(zpbstf,ZPBSTF)\n#define LAPACK_sgehrd LAPACK_GLOBAL(sgehrd,SGEHRD)\n#define LAPACK_dgehrd LAPACK_GLOBAL(dgehrd,DGEHRD)\n#define LAPACK_cgehrd LAPACK_GLOBAL(cgehrd,CGEHRD)\n#define LAPACK_zgehrd LAPACK_GLOBAL(zgehrd,ZGEHRD)\n#define LAPACK_sorghr LAPACK_GLOBAL(sorghr,SORGHR)\n#define LAPACK_dorghr LAPACK_GLOBAL(dorghr,DORGHR)\n#define LAPACK_sormhr LAPACK_GLOBAL(sormhr,SORMHR)\n#define LAPACK_dormhr LAPACK_GLOBAL(dormhr,DORMHR)\n#define LAPACK_cunghr LAPACK_GLOBAL(cunghr,CUNGHR)\n#define LAPACK_zunghr LAPACK_GLOBAL(zunghr,ZUNGHR)\n#define LAPACK_cunmhr LAPACK_GLOBAL(cunmhr,CUNMHR)\n#define LAPACK_zunmhr LAPACK_GLOBAL(zunmhr,ZUNMHR)\n#define LAPACK_sgebal LAPACK_GLOBAL(sgebal,SGEBAL)\n#define LAPACK_dgebal LAPACK_GLOBAL(dgebal,DGEBAL)\n#define LAPACK_cgebal LAPACK_GLOBAL(cgebal,CGEBAL)\n#define LAPACK_zgebal LAPACK_GLOBAL(zgebal,ZGEBAL)\n#define LAPACK_sgebak LAPACK_GLOBAL(sgebak,SGEBAK)\n#define LAPACK_dgebak LAPACK_GLOBAL(dgebak,DGEBAK)\n#define LAPACK_cgebak LAPACK_GLOBAL(cgebak,CGEBAK)\n#define LAPACK_zgebak LAPACK_GLOBAL(zgebak,ZGEBAK)\n#define LAPACK_shseqr LAPACK_GLOBAL(shseqr,SHSEQR)\n#define LAPACK_dhseqr LAPACK_GLOBAL(dhseqr,DHSEQR)\n#define LAPACK_chseqr LAPACK_GLOBAL(chseqr,CHSEQR)\n#define LAPACK_zhseqr LAPACK_GLOBAL(zhseqr,ZHSEQR)\n#define LAPACK_shsein LAPACK_GLOBAL(shsein,SHSEIN)\n#define LAPACK_dhsein LAPACK_GLOBAL(dhsein,DHSEIN)\n#define LAPACK_chsein LAPACK_GLOBAL(chsein,CHSEIN)\n#define LAPACK_zhsein LAPACK_GLOBAL(zhsein,ZHSEIN)\n#define LAPACK_strevc LAPACK_GLOBAL(strevc,STREVC)\n#define LAPACK_dtrevc LAPACK_GLOBAL(dtrevc,DTREVC)\n#define LAPACK_ctrevc LAPACK_GLOBAL(ctrevc,CTREVC)\n#define LAPACK_ztrevc LAPACK_GLOBAL(ztrevc,ZTREVC)\n#define LAPACK_strsna LAPACK_GLOBAL(strsna,STRSNA)\n#define LAPACK_dtrsna LAPACK_GLOBAL(dtrsna,DTRSNA)\n#define LAPACK_ctrsna LAPACK_GLOBAL(ctrsna,CTRSNA)\n#define LAPACK_ztrsna LAPACK_GLOBAL(ztrsna,ZTRSNA)\n#define LAPACK_strexc LAPACK_GLOBAL(strexc,STREXC)\n#define LAPACK_dtrexc LAPACK_GLOBAL(dtrexc,DTREXC)\n#define LAPACK_ctrexc LAPACK_GLOBAL(ctrexc,CTREXC)\n#define LAPACK_ztrexc LAPACK_GLOBAL(ztrexc,ZTREXC)\n#define LAPACK_strsen LAPACK_GLOBAL(strsen,STRSEN)\n#define LAPACK_dtrsen LAPACK_GLOBAL(dtrsen,DTRSEN)\n#define LAPACK_ctrsen LAPACK_GLOBAL(ctrsen,CTRSEN)\n#define LAPACK_ztrsen LAPACK_GLOBAL(ztrsen,ZTRSEN)\n#define LAPACK_strsyl LAPACK_GLOBAL(strsyl,STRSYL)\n#define LAPACK_dtrsyl LAPACK_GLOBAL(dtrsyl,DTRSYL)\n#define LAPACK_ctrsyl LAPACK_GLOBAL(ctrsyl,CTRSYL)\n#define LAPACK_ztrsyl LAPACK_GLOBAL(ztrsyl,ZTRSYL)\n#define LAPACK_sgghrd LAPACK_GLOBAL(sgghrd,SGGHRD)\n#define LAPACK_dgghrd LAPACK_GLOBAL(dgghrd,DGGHRD)\n#define LAPACK_cgghrd LAPACK_GLOBAL(cgghrd,CGGHRD)\n#define LAPACK_zgghrd LAPACK_GLOBAL(zgghrd,ZGGHRD)\n#define LAPACK_sggbal LAPACK_GLOBAL(sggbal,SGGBAL)\n#define LAPACK_dggbal LAPACK_GLOBAL(dggbal,DGGBAL)\n#define LAPACK_cggbal LAPACK_GLOBAL(cggbal,CGGBAL)\n#define LAPACK_zggbal LAPACK_GLOBAL(zggbal,ZGGBAL)\n#define LAPACK_sggbak LAPACK_GLOBAL(sggbak,SGGBAK)\n#define LAPACK_dggbak LAPACK_GLOBAL(dggbak,DGGBAK)\n#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK)\n#define LAPACK_zggbak LAPACK_GLOBAL(zggbak,ZGGBAK)\n#define LAPACK_shgeqz LAPACK_GLOBAL(shgeqz,SHGEQZ)\n#define LAPACK_dhgeqz LAPACK_GLOBAL(dhgeqz,DHGEQZ)\n#define LAPACK_chgeqz LAPACK_GLOBAL(chgeqz,CHGEQZ)\n#define LAPACK_zhgeqz LAPACK_GLOBAL(zhgeqz,ZHGEQZ)\n#define LAPACK_stgevc LAPACK_GLOBAL(stgevc,STGEVC)\n#define LAPACK_dtgevc LAPACK_GLOBAL(dtgevc,DTGEVC)\n#define LAPACK_ctgevc LAPACK_GLOBAL(ctgevc,CTGEVC)\n#define LAPACK_ztgevc LAPACK_GLOBAL(ztgevc,ZTGEVC)\n#define LAPACK_stgexc LAPACK_GLOBAL(stgexc,STGEXC)\n#define LAPACK_dtgexc LAPACK_GLOBAL(dtgexc,DTGEXC)\n#define LAPACK_ctgexc LAPACK_GLOBAL(ctgexc,CTGEXC)\n#define LAPACK_ztgexc LAPACK_GLOBAL(ztgexc,ZTGEXC)\n#define LAPACK_stgsen LAPACK_GLOBAL(stgsen,STGSEN)\n#define LAPACK_dtgsen LAPACK_GLOBAL(dtgsen,DTGSEN)\n#define LAPACK_ctgsen LAPACK_GLOBAL(ctgsen,CTGSEN)\n#define LAPACK_ztgsen LAPACK_GLOBAL(ztgsen,ZTGSEN)\n#define LAPACK_stgsyl LAPACK_GLOBAL(stgsyl,STGSYL)\n#define LAPACK_dtgsyl LAPACK_GLOBAL(dtgsyl,DTGSYL)\n#define LAPACK_ctgsyl LAPACK_GLOBAL(ctgsyl,CTGSYL)\n#define LAPACK_ztgsyl LAPACK_GLOBAL(ztgsyl,ZTGSYL)\n#define LAPACK_stgsna LAPACK_GLOBAL(stgsna,STGSNA)\n#define LAPACK_dtgsna LAPACK_GLOBAL(dtgsna,DTGSNA)\n#define LAPACK_ctgsna LAPACK_GLOBAL(ctgsna,CTGSNA)\n#define LAPACK_ztgsna LAPACK_GLOBAL(ztgsna,ZTGSNA)\n#define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP)\n#define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP)\n#define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP)\n#define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP)\n#define LAPACK_stgsja LAPACK_GLOBAL(stgsja,STGSJA)\n#define LAPACK_dtgsja LAPACK_GLOBAL(dtgsja,DTGSJA)\n#define LAPACK_ctgsja LAPACK_GLOBAL(ctgsja,CTGSJA)\n#define LAPACK_ztgsja LAPACK_GLOBAL(ztgsja,ZTGSJA)\n#define LAPACK_sgels LAPACK_GLOBAL(sgels,SGELS)\n#define LAPACK_dgels LAPACK_GLOBAL(dgels,DGELS)\n#define LAPACK_cgels LAPACK_GLOBAL(cgels,CGELS)\n#define LAPACK_zgels LAPACK_GLOBAL(zgels,ZGELS)\n#define LAPACK_sgelsy LAPACK_GLOBAL(sgelsy,SGELSY)\n#define LAPACK_dgelsy LAPACK_GLOBAL(dgelsy,DGELSY)\n#define LAPACK_cgelsy LAPACK_GLOBAL(cgelsy,CGELSY)\n#define LAPACK_zgelsy LAPACK_GLOBAL(zgelsy,ZGELSY)\n#define LAPACK_sgelss LAPACK_GLOBAL(sgelss,SGELSS)\n#define LAPACK_dgelss LAPACK_GLOBAL(dgelss,DGELSS)\n#define LAPACK_cgelss LAPACK_GLOBAL(cgelss,CGELSS)\n#define LAPACK_zgelss LAPACK_GLOBAL(zgelss,ZGELSS)\n#define LAPACK_sgelsd LAPACK_GLOBAL(sgelsd,SGELSD)\n#define LAPACK_dgelsd LAPACK_GLOBAL(dgelsd,DGELSD)\n#define LAPACK_cgelsd LAPACK_GLOBAL(cgelsd,CGELSD)\n#define LAPACK_zgelsd LAPACK_GLOBAL(zgelsd,ZGELSD)\n#define LAPACK_sgglse LAPACK_GLOBAL(sgglse,SGGLSE)\n#define LAPACK_dgglse LAPACK_GLOBAL(dgglse,DGGLSE)\n#define LAPACK_cgglse LAPACK_GLOBAL(cgglse,CGGLSE)\n#define LAPACK_zgglse LAPACK_GLOBAL(zgglse,ZGGLSE)\n#define LAPACK_sggglm LAPACK_GLOBAL(sggglm,SGGGLM)\n#define LAPACK_dggglm LAPACK_GLOBAL(dggglm,DGGGLM)\n#define LAPACK_cggglm LAPACK_GLOBAL(cggglm,CGGGLM)\n#define LAPACK_zggglm LAPACK_GLOBAL(zggglm,ZGGGLM)\n#define LAPACK_ssyev LAPACK_GLOBAL(ssyev,SSYEV)\n#define LAPACK_dsyev LAPACK_GLOBAL(dsyev,DSYEV)\n#define LAPACK_cheev LAPACK_GLOBAL(cheev,CHEEV)\n#define LAPACK_zheev LAPACK_GLOBAL(zheev,ZHEEV)\n#define LAPACK_ssyevd LAPACK_GLOBAL(ssyevd,SSYEVD)\n#define LAPACK_dsyevd LAPACK_GLOBAL(dsyevd,DSYEVD)\n#define LAPACK_cheevd LAPACK_GLOBAL(cheevd,CHEEVD)\n#define LAPACK_zheevd LAPACK_GLOBAL(zheevd,ZHEEVD)\n#define LAPACK_ssyevx LAPACK_GLOBAL(ssyevx,SSYEVX)\n#define LAPACK_dsyevx LAPACK_GLOBAL(dsyevx,DSYEVX)\n#define LAPACK_cheevx LAPACK_GLOBAL(cheevx,CHEEVX)\n#define LAPACK_zheevx LAPACK_GLOBAL(zheevx,ZHEEVX)\n#define LAPACK_ssyevr LAPACK_GLOBAL(ssyevr,SSYEVR)\n#define LAPACK_dsyevr LAPACK_GLOBAL(dsyevr,DSYEVR)\n#define LAPACK_cheevr LAPACK_GLOBAL(cheevr,CHEEVR)\n#define LAPACK_zheevr LAPACK_GLOBAL(zheevr,ZHEEVR)\n#define LAPACK_sspev LAPACK_GLOBAL(sspev,SSPEV)\n#define LAPACK_dspev LAPACK_GLOBAL(dspev,DSPEV)\n#define LAPACK_chpev LAPACK_GLOBAL(chpev,CHPEV)\n#define LAPACK_zhpev LAPACK_GLOBAL(zhpev,ZHPEV)\n#define LAPACK_sspevd LAPACK_GLOBAL(sspevd,SSPEVD)\n#define LAPACK_dspevd LAPACK_GLOBAL(dspevd,DSPEVD)\n#define LAPACK_chpevd LAPACK_GLOBAL(chpevd,CHPEVD)\n#define LAPACK_zhpevd LAPACK_GLOBAL(zhpevd,ZHPEVD)\n#define LAPACK_sspevx LAPACK_GLOBAL(sspevx,SSPEVX)\n#define LAPACK_dspevx LAPACK_GLOBAL(dspevx,DSPEVX)\n#define LAPACK_chpevx LAPACK_GLOBAL(chpevx,CHPEVX)\n#define LAPACK_zhpevx LAPACK_GLOBAL(zhpevx,ZHPEVX)\n#define LAPACK_ssbev LAPACK_GLOBAL(ssbev,SSBEV)\n#define LAPACK_dsbev LAPACK_GLOBAL(dsbev,DSBEV)\n#define LAPACK_chbev LAPACK_GLOBAL(chbev,CHBEV)\n#define LAPACK_zhbev LAPACK_GLOBAL(zhbev,ZHBEV)\n#define LAPACK_ssbevd LAPACK_GLOBAL(ssbevd,SSBEVD)\n#define LAPACK_dsbevd LAPACK_GLOBAL(dsbevd,DSBEVD)\n#define LAPACK_chbevd LAPACK_GLOBAL(chbevd,CHBEVD)\n#define LAPACK_zhbevd LAPACK_GLOBAL(zhbevd,ZHBEVD)\n#define LAPACK_ssbevx LAPACK_GLOBAL(ssbevx,SSBEVX)\n#define LAPACK_dsbevx LAPACK_GLOBAL(dsbevx,DSBEVX)\n#define LAPACK_chbevx LAPACK_GLOBAL(chbevx,CHBEVX)\n#define LAPACK_zhbevx LAPACK_GLOBAL(zhbevx,ZHBEVX)\n#define LAPACK_sstev LAPACK_GLOBAL(sstev,SSTEV)\n#define LAPACK_dstev LAPACK_GLOBAL(dstev,DSTEV)\n#define LAPACK_sstevd LAPACK_GLOBAL(sstevd,SSTEVD)\n#define LAPACK_dstevd LAPACK_GLOBAL(dstevd,DSTEVD)\n#define LAPACK_sstevx LAPACK_GLOBAL(sstevx,SSTEVX)\n#define LAPACK_dstevx LAPACK_GLOBAL(dstevx,DSTEVX)\n#define LAPACK_sstevr LAPACK_GLOBAL(sstevr,SSTEVR)\n#define LAPACK_dstevr LAPACK_GLOBAL(dstevr,DSTEVR)\n#define LAPACK_sgees LAPACK_GLOBAL(sgees,SGEES)\n#define LAPACK_dgees LAPACK_GLOBAL(dgees,DGEES)\n#define LAPACK_cgees LAPACK_GLOBAL(cgees,CGEES)\n#define LAPACK_zgees LAPACK_GLOBAL(zgees,ZGEES)\n#define LAPACK_sgeesx LAPACK_GLOBAL(sgeesx,SGEESX)\n#define LAPACK_dgeesx LAPACK_GLOBAL(dgeesx,DGEESX)\n#define LAPACK_cgeesx LAPACK_GLOBAL(cgeesx,CGEESX)\n#define LAPACK_zgeesx LAPACK_GLOBAL(zgeesx,ZGEESX)\n#define LAPACK_sgeev LAPACK_GLOBAL(sgeev,SGEEV)\n#define LAPACK_dgeev LAPACK_GLOBAL(dgeev,DGEEV)\n#define LAPACK_cgeev LAPACK_GLOBAL(cgeev,CGEEV)\n#define LAPACK_zgeev LAPACK_GLOBAL(zgeev,ZGEEV)\n#define LAPACK_sgeevx LAPACK_GLOBAL(sgeevx,SGEEVX)\n#define LAPACK_dgeevx LAPACK_GLOBAL(dgeevx,DGEEVX)\n#define LAPACK_cgeevx LAPACK_GLOBAL(cgeevx,CGEEVX)\n#define LAPACK_zgeevx LAPACK_GLOBAL(zgeevx,ZGEEVX)\n#define LAPACK_sgesvd LAPACK_GLOBAL(sgesvd,SGESVD)\n#define LAPACK_dgesvd LAPACK_GLOBAL(dgesvd,DGESVD)\n#define LAPACK_cgesvd LAPACK_GLOBAL(cgesvd,CGESVD)\n#define LAPACK_zgesvd LAPACK_GLOBAL(zgesvd,ZGESVD)\n#define LAPACK_sgesdd LAPACK_GLOBAL(sgesdd,SGESDD)\n#define LAPACK_dgesdd LAPACK_GLOBAL(dgesdd,DGESDD)\n#define LAPACK_cgesdd LAPACK_GLOBAL(cgesdd,CGESDD)\n#define LAPACK_zgesdd LAPACK_GLOBAL(zgesdd,ZGESDD)\n#define LAPACK_dgejsv LAPACK_GLOBAL(dgejsv,DGEJSV)\n#define LAPACK_sgejsv LAPACK_GLOBAL(sgejsv,SGEJSV)\n#define LAPACK_dgesvj LAPACK_GLOBAL(dgesvj,DGESVJ)\n#define LAPACK_sgesvj LAPACK_GLOBAL(sgesvj,SGESVJ)\n#define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD)\n#define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD)\n#define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD)\n#define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD)\n#define LAPACK_ssygv LAPACK_GLOBAL(ssygv,SSYGV)\n#define LAPACK_dsygv LAPACK_GLOBAL(dsygv,DSYGV)\n#define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV)\n#define LAPACK_zhegv LAPACK_GLOBAL(zhegv,ZHEGV)\n#define LAPACK_ssygvd LAPACK_GLOBAL(ssygvd,SSYGVD)\n#define LAPACK_dsygvd LAPACK_GLOBAL(dsygvd,DSYGVD)\n#define LAPACK_chegvd LAPACK_GLOBAL(chegvd,CHEGVD)\n#define LAPACK_zhegvd LAPACK_GLOBAL(zhegvd,ZHEGVD)\n#define LAPACK_ssygvx LAPACK_GLOBAL(ssygvx,SSYGVX)\n#define LAPACK_dsygvx LAPACK_GLOBAL(dsygvx,DSYGVX)\n#define LAPACK_chegvx LAPACK_GLOBAL(chegvx,CHEGVX)\n#define LAPACK_zhegvx LAPACK_GLOBAL(zhegvx,ZHEGVX)\n#define LAPACK_sspgv LAPACK_GLOBAL(sspgv,SSPGV)\n#define LAPACK_dspgv LAPACK_GLOBAL(dspgv,DSPGV)\n#define LAPACK_chpgv LAPACK_GLOBAL(chpgv,CHPGV)\n#define LAPACK_zhpgv LAPACK_GLOBAL(zhpgv,ZHPGV)\n#define LAPACK_sspgvd LAPACK_GLOBAL(sspgvd,SSPGVD)\n#define LAPACK_dspgvd LAPACK_GLOBAL(dspgvd,DSPGVD)\n#define LAPACK_chpgvd LAPACK_GLOBAL(chpgvd,CHPGVD)\n#define LAPACK_zhpgvd LAPACK_GLOBAL(zhpgvd,ZHPGVD)\n#define LAPACK_sspgvx LAPACK_GLOBAL(sspgvx,SSPGVX)\n#define LAPACK_dspgvx LAPACK_GLOBAL(dspgvx,DSPGVX)\n#define LAPACK_chpgvx LAPACK_GLOBAL(chpgvx,CHPGVX)\n#define LAPACK_zhpgvx LAPACK_GLOBAL(zhpgvx,ZHPGVX)\n#define LAPACK_ssbgv LAPACK_GLOBAL(ssbgv,SSBGV)\n#define LAPACK_dsbgv LAPACK_GLOBAL(dsbgv,DSBGV)\n#define LAPACK_chbgv LAPACK_GLOBAL(chbgv,CHBGV)\n#define LAPACK_zhbgv LAPACK_GLOBAL(zhbgv,ZHBGV)\n#define LAPACK_ssbgvd LAPACK_GLOBAL(ssbgvd,SSBGVD)\n#define LAPACK_dsbgvd LAPACK_GLOBAL(dsbgvd,DSBGVD)\n#define LAPACK_chbgvd LAPACK_GLOBAL(chbgvd,CHBGVD)\n#define LAPACK_zhbgvd LAPACK_GLOBAL(zhbgvd,ZHBGVD)\n#define LAPACK_ssbgvx LAPACK_GLOBAL(ssbgvx,SSBGVX)\n#define LAPACK_dsbgvx LAPACK_GLOBAL(dsbgvx,DSBGVX)\n#define LAPACK_chbgvx LAPACK_GLOBAL(chbgvx,CHBGVX)\n#define LAPACK_zhbgvx LAPACK_GLOBAL(zhbgvx,ZHBGVX)\n#define LAPACK_sgges LAPACK_GLOBAL(sgges,SGGES)\n#define LAPACK_dgges LAPACK_GLOBAL(dgges,DGGES)\n#define LAPACK_cgges LAPACK_GLOBAL(cgges,CGGES)\n#define LAPACK_zgges LAPACK_GLOBAL(zgges,ZGGES)\n#define LAPACK_sggesx LAPACK_GLOBAL(sggesx,SGGESX)\n#define LAPACK_dggesx LAPACK_GLOBAL(dggesx,DGGESX)\n#define LAPACK_cggesx LAPACK_GLOBAL(cggesx,CGGESX)\n#define LAPACK_zggesx LAPACK_GLOBAL(zggesx,ZGGESX)\n#define LAPACK_sggev LAPACK_GLOBAL(sggev,SGGEV)\n#define LAPACK_dggev LAPACK_GLOBAL(dggev,DGGEV)\n#define LAPACK_cggev LAPACK_GLOBAL(cggev,CGGEV)\n#define LAPACK_zggev LAPACK_GLOBAL(zggev,ZGGEV)\n#define LAPACK_sggevx LAPACK_GLOBAL(sggevx,SGGEVX)\n#define LAPACK_dggevx LAPACK_GLOBAL(dggevx,DGGEVX)\n#define LAPACK_cggevx LAPACK_GLOBAL(cggevx,CGGEVX)\n#define LAPACK_zggevx LAPACK_GLOBAL(zggevx,ZGGEVX)\n#define LAPACK_dsfrk LAPACK_GLOBAL(dsfrk,DSFRK)\n#define LAPACK_ssfrk LAPACK_GLOBAL(ssfrk,SSFRK)\n#define LAPACK_zhfrk LAPACK_GLOBAL(zhfrk,ZHFRK)\n#define LAPACK_chfrk LAPACK_GLOBAL(chfrk,CHFRK)\n#define LAPACK_dtfsm LAPACK_GLOBAL(dtfsm,DTFSM)\n#define LAPACK_stfsm LAPACK_GLOBAL(stfsm,STFSM)\n#define LAPACK_ztfsm LAPACK_GLOBAL(ztfsm,ZTFSM)\n#define LAPACK_ctfsm LAPACK_GLOBAL(ctfsm,CTFSM)\n#define LAPACK_dtfttp LAPACK_GLOBAL(dtfttp,DTFTTP)\n#define LAPACK_stfttp LAPACK_GLOBAL(stfttp,STFTTP)\n#define LAPACK_ztfttp LAPACK_GLOBAL(ztfttp,ZTFTTP)\n#define LAPACK_ctfttp LAPACK_GLOBAL(ctfttp,CTFTTP)\n#define LAPACK_dtfttr LAPACK_GLOBAL(dtfttr,DTFTTR)\n#define LAPACK_stfttr LAPACK_GLOBAL(stfttr,STFTTR)\n#define LAPACK_ztfttr LAPACK_GLOBAL(ztfttr,ZTFTTR)\n#define LAPACK_ctfttr LAPACK_GLOBAL(ctfttr,CTFTTR)\n#define LAPACK_dtpttf LAPACK_GLOBAL(dtpttf,DTPTTF)\n#define LAPACK_stpttf LAPACK_GLOBAL(stpttf,STPTTF)\n#define LAPACK_ztpttf LAPACK_GLOBAL(ztpttf,ZTPTTF)\n#define LAPACK_ctpttf LAPACK_GLOBAL(ctpttf,CTPTTF)\n#define LAPACK_dtpttr LAPACK_GLOBAL(dtpttr,DTPTTR)\n#define LAPACK_stpttr LAPACK_GLOBAL(stpttr,STPTTR)\n#define LAPACK_ztpttr LAPACK_GLOBAL(ztpttr,ZTPTTR)\n#define LAPACK_ctpttr LAPACK_GLOBAL(ctpttr,CTPTTR)\n#define LAPACK_dtrttf LAPACK_GLOBAL(dtrttf,DTRTTF)\n#define LAPACK_strttf LAPACK_GLOBAL(strttf,STRTTF)\n#define LAPACK_ztrttf LAPACK_GLOBAL(ztrttf,ZTRTTF)\n#define LAPACK_ctrttf LAPACK_GLOBAL(ctrttf,CTRTTF)\n#define LAPACK_dtrttp LAPACK_GLOBAL(dtrttp,DTRTTP)\n#define LAPACK_strttp LAPACK_GLOBAL(strttp,STRTTP)\n#define LAPACK_ztrttp LAPACK_GLOBAL(ztrttp,ZTRTTP)\n#define LAPACK_ctrttp LAPACK_GLOBAL(ctrttp,CTRTTP)\n#define LAPACK_sgeqrfp LAPACK_GLOBAL(sgeqrfp,SGEQRFP)\n#define LAPACK_dgeqrfp LAPACK_GLOBAL(dgeqrfp,DGEQRFP)\n#define LAPACK_cgeqrfp LAPACK_GLOBAL(cgeqrfp,CGEQRFP)\n#define LAPACK_zgeqrfp LAPACK_GLOBAL(zgeqrfp,ZGEQRFP)\n#define LAPACK_clacgv LAPACK_GLOBAL(clacgv,CLACGV)\n#define LAPACK_zlacgv LAPACK_GLOBAL(zlacgv,ZLACGV)\n#define LAPACK_slarnv LAPACK_GLOBAL(slarnv,SLARNV)\n#define LAPACK_dlarnv LAPACK_GLOBAL(dlarnv,DLARNV)\n#define LAPACK_clarnv LAPACK_GLOBAL(clarnv,CLARNV)\n#define LAPACK_zlarnv LAPACK_GLOBAL(zlarnv,ZLARNV)\n#define LAPACK_sgeqr2 LAPACK_GLOBAL(sgeqr2,SGEQR2)\n#define LAPACK_dgeqr2 LAPACK_GLOBAL(dgeqr2,DGEQR2)\n#define LAPACK_cgeqr2 LAPACK_GLOBAL(cgeqr2,CGEQR2)\n#define LAPACK_zgeqr2 LAPACK_GLOBAL(zgeqr2,ZGEQR2)\n#define LAPACK_slacpy LAPACK_GLOBAL(slacpy,SLACPY)\n#define LAPACK_dlacpy LAPACK_GLOBAL(dlacpy,DLACPY)\n#define LAPACK_clacpy LAPACK_GLOBAL(clacpy,CLACPY)\n#define LAPACK_zlacpy LAPACK_GLOBAL(zlacpy,ZLACPY)\n#define LAPACK_sgetf2 LAPACK_GLOBAL(sgetf2,SGETF2)\n#define LAPACK_dgetf2 LAPACK_GLOBAL(dgetf2,DGETF2)\n#define LAPACK_cgetf2 LAPACK_GLOBAL(cgetf2,CGETF2)\n#define LAPACK_zgetf2 LAPACK_GLOBAL(zgetf2,ZGETF2)\n#define LAPACK_slaswp LAPACK_GLOBAL(slaswp,SLASWP)\n#define LAPACK_dlaswp LAPACK_GLOBAL(dlaswp,DLASWP)\n#define LAPACK_claswp LAPACK_GLOBAL(claswp,CLASWP)\n#define LAPACK_zlaswp LAPACK_GLOBAL(zlaswp,ZLASWP)\n#define LAPACK_slange LAPACK_GLOBAL(slange,SLANGE)\n#define LAPACK_dlange LAPACK_GLOBAL(dlange,DLANGE)\n#define LAPACK_clange LAPACK_GLOBAL(clange,CLANGE)\n#define LAPACK_zlange LAPACK_GLOBAL(zlange,ZLANGE)\n#define LAPACK_clanhe LAPACK_GLOBAL(clanhe,CLANHE)\n#define LAPACK_zlanhe LAPACK_GLOBAL(zlanhe,ZLANHE)\n#define LAPACK_slansy LAPACK_GLOBAL(slansy,SLANSY)\n#define LAPACK_dlansy LAPACK_GLOBAL(dlansy,DLANSY)\n#define LAPACK_clansy LAPACK_GLOBAL(clansy,CLANSY)\n#define LAPACK_zlansy LAPACK_GLOBAL(zlansy,ZLANSY)\n#define LAPACK_slantr LAPACK_GLOBAL(slantr,SLANTR)\n#define LAPACK_dlantr LAPACK_GLOBAL(dlantr,DLANTR)\n#define LAPACK_clantr LAPACK_GLOBAL(clantr,CLANTR)\n#define LAPACK_zlantr LAPACK_GLOBAL(zlantr,ZLANTR)\n#define LAPACK_slamch LAPACK_GLOBAL(slamch,SLAMCH)\n#define LAPACK_dlamch LAPACK_GLOBAL(dlamch,DLAMCH)\n#define LAPACK_sgelq2 LAPACK_GLOBAL(sgelq2,SGELQ2)\n#define LAPACK_dgelq2 LAPACK_GLOBAL(dgelq2,DGELQ2)\n#define LAPACK_cgelq2 LAPACK_GLOBAL(cgelq2,CGELQ2)\n#define LAPACK_zgelq2 LAPACK_GLOBAL(zgelq2,ZGELQ2)\n#define LAPACK_slarfb LAPACK_GLOBAL(slarfb,SLARFB)\n#define LAPACK_dlarfb LAPACK_GLOBAL(dlarfb,DLARFB)\n#define LAPACK_clarfb LAPACK_GLOBAL(clarfb,CLARFB)\n#define LAPACK_zlarfb LAPACK_GLOBAL(zlarfb,ZLARFB)\n#define LAPACK_slarfg LAPACK_GLOBAL(slarfg,SLARFG)\n#define LAPACK_dlarfg LAPACK_GLOBAL(dlarfg,DLARFG)\n#define LAPACK_clarfg LAPACK_GLOBAL(clarfg,CLARFG)\n#define LAPACK_zlarfg LAPACK_GLOBAL(zlarfg,ZLARFG)\n#define LAPACK_slarft LAPACK_GLOBAL(slarft,SLARFT)\n#define LAPACK_dlarft LAPACK_GLOBAL(dlarft,DLARFT)\n#define LAPACK_clarft LAPACK_GLOBAL(clarft,CLARFT)\n#define LAPACK_zlarft LAPACK_GLOBAL(zlarft,ZLARFT)\n#define LAPACK_slarfx LAPACK_GLOBAL(slarfx,SLARFX)\n#define LAPACK_dlarfx LAPACK_GLOBAL(dlarfx,DLARFX)\n#define LAPACK_clarfx LAPACK_GLOBAL(clarfx,CLARFX)\n#define LAPACK_zlarfx LAPACK_GLOBAL(zlarfx,ZLARFX)\n#define LAPACK_slatms LAPACK_GLOBAL(slatms,SLATMS)\n#define LAPACK_dlatms LAPACK_GLOBAL(dlatms,DLATMS)\n#define LAPACK_clatms LAPACK_GLOBAL(clatms,CLATMS)\n#define LAPACK_zlatms LAPACK_GLOBAL(zlatms,ZLATMS)\n#define LAPACK_slag2d LAPACK_GLOBAL(slag2d,SLAG2D)\n#define LAPACK_dlag2s LAPACK_GLOBAL(dlag2s,DLAG2S)\n#define LAPACK_clag2z LAPACK_GLOBAL(clag2z,CLAG2Z)\n#define LAPACK_zlag2c LAPACK_GLOBAL(zlag2c,ZLAG2C)\n#define LAPACK_slauum LAPACK_GLOBAL(slauum,SLAUUM)\n#define LAPACK_dlauum LAPACK_GLOBAL(dlauum,DLAUUM)\n#define LAPACK_clauum LAPACK_GLOBAL(clauum,CLAUUM)\n#define LAPACK_zlauum LAPACK_GLOBAL(zlauum,ZLAUUM)\n#define LAPACK_slagge LAPACK_GLOBAL(slagge,SLAGGE)\n#define LAPACK_dlagge LAPACK_GLOBAL(dlagge,DLAGGE)\n#define LAPACK_clagge LAPACK_GLOBAL(clagge,CLAGGE)\n#define LAPACK_zlagge LAPACK_GLOBAL(zlagge,ZLAGGE)\n#define LAPACK_slaset LAPACK_GLOBAL(slaset,SLASET)\n#define LAPACK_dlaset LAPACK_GLOBAL(dlaset,DLASET)\n#define LAPACK_claset LAPACK_GLOBAL(claset,CLASET)\n#define LAPACK_zlaset LAPACK_GLOBAL(zlaset,ZLASET)\n#define LAPACK_slasrt LAPACK_GLOBAL(slasrt,SLASRT)\n#define LAPACK_dlasrt LAPACK_GLOBAL(dlasrt,DLASRT)\n#define LAPACK_slagsy LAPACK_GLOBAL(slagsy,SLAGSY)\n#define LAPACK_dlagsy LAPACK_GLOBAL(dlagsy,DLAGSY)\n#define LAPACK_clagsy LAPACK_GLOBAL(clagsy,CLAGSY)\n#define LAPACK_zlagsy LAPACK_GLOBAL(zlagsy,ZLAGSY)\n#define LAPACK_claghe LAPACK_GLOBAL(claghe,CLAGHE)\n#define LAPACK_zlaghe LAPACK_GLOBAL(zlaghe,ZLAGHE)\n#define LAPACK_slapmr LAPACK_GLOBAL(slapmr,SLAPMR)\n#define LAPACK_dlapmr LAPACK_GLOBAL(dlapmr,DLAPMR)\n#define LAPACK_clapmr LAPACK_GLOBAL(clapmr,CLAPMR)\n#define LAPACK_zlapmr LAPACK_GLOBAL(zlapmr,ZLAPMR)\n#define LAPACK_slapy2 LAPACK_GLOBAL(slapy2,SLAPY2)\n#define LAPACK_dlapy2 LAPACK_GLOBAL(dlapy2,DLAPY2)\n#define LAPACK_slapy3 LAPACK_GLOBAL(slapy3,SLAPY3)\n#define LAPACK_dlapy3 LAPACK_GLOBAL(dlapy3,DLAPY3)\n#define LAPACK_slartgp LAPACK_GLOBAL(slartgp,SLARTGP)\n#define LAPACK_dlartgp LAPACK_GLOBAL(dlartgp,DLARTGP)\n#define LAPACK_slartgs LAPACK_GLOBAL(slartgs,SLARTGS)\n#define LAPACK_dlartgs LAPACK_GLOBAL(dlartgs,DLARTGS)\n// LAPACK 3.3.0\n#define LAPACK_cbbcsd LAPACK_GLOBAL(cbbcsd,CBBCSD)\n#define LAPACK_cheswapr LAPACK_GLOBAL(cheswapr,CHESWAPR)\n#define LAPACK_chetri2 LAPACK_GLOBAL(chetri2,CHETRI2)\n#define LAPACK_chetri2x LAPACK_GLOBAL(chetri2x,CHETRI2X)\n#define LAPACK_chetrs2 LAPACK_GLOBAL(chetrs2,CHETRS2)\n#define LAPACK_csyconv LAPACK_GLOBAL(csyconv,CSYCONV)\n#define LAPACK_csyswapr LAPACK_GLOBAL(csyswapr,CSYSWAPR)\n#define LAPACK_csytri2 LAPACK_GLOBAL(csytri2,CSYTRI2)\n#define LAPACK_csytri2x LAPACK_GLOBAL(csytri2x,CSYTRI2X)\n#define LAPACK_csytrs2 LAPACK_GLOBAL(csytrs2,CSYTRS2)\n#define LAPACK_cunbdb LAPACK_GLOBAL(cunbdb,CUNBDB)\n#define LAPACK_cuncsd LAPACK_GLOBAL(cuncsd,CUNCSD)\n#define LAPACK_dbbcsd LAPACK_GLOBAL(dbbcsd,DBBCSD)\n#define LAPACK_dorbdb LAPACK_GLOBAL(dorbdb,DORBDB)\n#define LAPACK_dorcsd LAPACK_GLOBAL(dorcsd,DORCSD)\n#define LAPACK_dsyconv LAPACK_GLOBAL(dsyconv,DSYCONV)\n#define LAPACK_dsyswapr LAPACK_GLOBAL(dsyswapr,DSYSWAPR)\n#define LAPACK_dsytri2 LAPACK_GLOBAL(dsytri2,DSYTRI2)\n#define LAPACK_dsytri2x LAPACK_GLOBAL(dsytri2x,DSYTRI2X)\n#define LAPACK_dsytrs2 LAPACK_GLOBAL(dsytrs2,DSYTRS2)\n#define LAPACK_sbbcsd LAPACK_GLOBAL(sbbcsd,SBBCSD)\n#define LAPACK_sorbdb LAPACK_GLOBAL(sorbdb,SORBDB)\n#define LAPACK_sorcsd LAPACK_GLOBAL(sorcsd,SORCSD)\n#define LAPACK_ssyconv LAPACK_GLOBAL(ssyconv,SSYCONV)\n#define LAPACK_ssyswapr LAPACK_GLOBAL(ssyswapr,SSYSWAPR)\n#define LAPACK_ssytri2 LAPACK_GLOBAL(ssytri2,SSYTRI2)\n#define LAPACK_ssytri2x LAPACK_GLOBAL(ssytri2x,SSYTRI2X)\n#define LAPACK_ssytrs2 LAPACK_GLOBAL(ssytrs2,SSYTRS2)\n#define LAPACK_zbbcsd LAPACK_GLOBAL(zbbcsd,ZBBCSD)\n#define LAPACK_zheswapr LAPACK_GLOBAL(zheswapr,ZHESWAPR)\n#define LAPACK_zhetri2 LAPACK_GLOBAL(zhetri2,ZHETRI2)\n#define LAPACK_zhetri2x LAPACK_GLOBAL(zhetri2x,ZHETRI2X)\n#define LAPACK_zhetrs2 LAPACK_GLOBAL(zhetrs2,ZHETRS2)\n#define LAPACK_zsyconv LAPACK_GLOBAL(zsyconv,ZSYCONV)\n#define LAPACK_zsyswapr LAPACK_GLOBAL(zsyswapr,ZSYSWAPR)\n#define LAPACK_zsytri2 LAPACK_GLOBAL(zsytri2,ZSYTRI2)\n#define LAPACK_zsytri2x LAPACK_GLOBAL(zsytri2x,ZSYTRI2X)\n#define LAPACK_zsytrs2 LAPACK_GLOBAL(zsytrs2,ZSYTRS2)\n#define LAPACK_zunbdb LAPACK_GLOBAL(zunbdb,ZUNBDB)\n#define LAPACK_zuncsd LAPACK_GLOBAL(zuncsd,ZUNCSD)\n// LAPACK 3.4.0\n#define LAPACK_sgemqrt LAPACK_GLOBAL(sgemqrt,SGEMQRT)\n#define LAPACK_dgemqrt LAPACK_GLOBAL(dgemqrt,DGEMQRT)\n#define LAPACK_cgemqrt LAPACK_GLOBAL(cgemqrt,CGEMQRT)\n#define LAPACK_zgemqrt LAPACK_GLOBAL(zgemqrt,ZGEMQRT)\n#define LAPACK_sgeqrt LAPACK_GLOBAL(sgeqrt,SGEQRT)\n#define LAPACK_dgeqrt LAPACK_GLOBAL(dgeqrt,DGEQRT)\n#define LAPACK_cgeqrt LAPACK_GLOBAL(cgeqrt,CGEQRT)\n#define LAPACK_zgeqrt LAPACK_GLOBAL(zgeqrt,ZGEQRT)\n#define LAPACK_sgeqrt2 LAPACK_GLOBAL(sgeqrt2,SGEQRT2)\n#define LAPACK_dgeqrt2 LAPACK_GLOBAL(dgeqrt2,DGEQRT2)\n#define LAPACK_cgeqrt2 LAPACK_GLOBAL(cgeqrt2,CGEQRT2)\n#define LAPACK_zgeqrt2 LAPACK_GLOBAL(zgeqrt2,ZGEQRT2)\n#define LAPACK_sgeqrt3 LAPACK_GLOBAL(sgeqrt3,SGEQRT3)\n#define LAPACK_dgeqrt3 LAPACK_GLOBAL(dgeqrt3,DGEQRT3)\n#define LAPACK_cgeqrt3 LAPACK_GLOBAL(cgeqrt3,CGEQRT3)\n#define LAPACK_zgeqrt3 LAPACK_GLOBAL(zgeqrt3,ZGEQRT3)\n#define LAPACK_stpmqrt LAPACK_GLOBAL(stpmqrt,STPMQRT)\n#define LAPACK_dtpmqrt LAPACK_GLOBAL(dtpmqrt,DTPMQRT)\n#define LAPACK_ctpmqrt LAPACK_GLOBAL(ctpmqrt,CTPMQRT)\n#define LAPACK_ztpmqrt LAPACK_GLOBAL(ztpmqrt,ZTPMQRT)\n#define LAPACK_dtpqrt LAPACK_GLOBAL(dtpqrt,DTPQRT)\n#define LAPACK_ctpqrt LAPACK_GLOBAL(ctpqrt,CTPQRT)\n#define LAPACK_ztpqrt LAPACK_GLOBAL(ztpqrt,ZTPQRT)\n#define LAPACK_stpqrt2 LAPACK_GLOBAL(stpqrt2,STPQRT2)\n#define LAPACK_dtpqrt2 LAPACK_GLOBAL(dtpqrt2,DTPQRT2)\n#define LAPACK_ctpqrt2 LAPACK_GLOBAL(ctpqrt2,CTPQRT2)\n#define LAPACK_ztpqrt2 LAPACK_GLOBAL(ztpqrt2,ZTPQRT2)\n#define LAPACK_stprfb LAPACK_GLOBAL(stprfb,STPRFB)\n#define LAPACK_dtprfb LAPACK_GLOBAL(dtprfb,DTPRFB)\n#define LAPACK_ctprfb LAPACK_GLOBAL(ctprfb,CTPRFB)\n#define LAPACK_ztprfb LAPACK_GLOBAL(ztprfb,ZTPRFB)\n// LAPACK 3.X.X\n#define LAPACK_csyr LAPACK_GLOBAL(csyr,CSYR)\n#define LAPACK_zsyr LAPACK_GLOBAL(zsyr,ZSYR)\n\n\nvoid LAPACK_sgetrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_dgetrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_cgetrf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_zgetrf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_sgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, float* ab, lapack_int* ldab,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_dgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, double* ab, lapack_int* ldab,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_cgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, lapack_complex_float* ab, lapack_int* ldab,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_zgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, lapack_complex_double* ab, lapack_int* ldab,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_sgttrf( lapack_int* n, float* dl, float* d, float* du, float* du2,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_dgttrf( lapack_int* n, double* dl, double* d, double* du,\n                    double* du2, lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_cgttrf( lapack_int* n, lapack_complex_float* dl,\n                    lapack_complex_float* d, lapack_complex_float* du,\n                    lapack_complex_float* du2, lapack_int* ipiv,\n                    lapack_int *info );\nvoid LAPACK_zgttrf( lapack_int* n, lapack_complex_double* dl,\n                    lapack_complex_double* d, lapack_complex_double* du,\n                    lapack_complex_double* du2, lapack_int* ipiv,\n                    lapack_int *info );\nvoid LAPACK_spotrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_dpotrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_cpotrf( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_zpotrf( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_dpstrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* piv, lapack_int* rank, double* tol,\n                    double* work, lapack_int *info );\nvoid LAPACK_spstrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* piv, lapack_int* rank, float* tol, float* work,\n                    lapack_int *info );\nvoid LAPACK_zpstrf( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* piv, lapack_int* rank,\n                    double* tol, double* work, lapack_int *info );\nvoid LAPACK_cpstrf( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* piv, lapack_int* rank,\n                    float* tol, float* work, lapack_int *info );\nvoid LAPACK_dpftrf( char* transr, char* uplo, lapack_int* n, double* a,\n                    lapack_int *info );\nvoid LAPACK_spftrf( char* transr, char* uplo, lapack_int* n, float* a,\n                    lapack_int *info );\nvoid LAPACK_zpftrf( char* transr, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int *info );\nvoid LAPACK_cpftrf( char* transr, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int *info );\nvoid LAPACK_spptrf( char* uplo, lapack_int* n, float* ap, lapack_int *info );\nvoid LAPACK_dpptrf( char* uplo, lapack_int* n, double* ap, lapack_int *info );\nvoid LAPACK_cpptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    lapack_int *info );\nvoid LAPACK_zpptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    lapack_int *info );\nvoid LAPACK_spbtrf( char* uplo, lapack_int* n, lapack_int* kd, float* ab,\n                    lapack_int* ldab, lapack_int *info );\nvoid LAPACK_dpbtrf( char* uplo, lapack_int* n, lapack_int* kd, double* ab,\n                    lapack_int* ldab, lapack_int *info );\nvoid LAPACK_cpbtrf( char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_complex_float* ab, lapack_int* ldab,\n                    lapack_int *info );\nvoid LAPACK_zpbtrf( char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_complex_double* ab, lapack_int* ldab,\n                    lapack_int *info );\nvoid LAPACK_spttrf( lapack_int* n, float* d, float* e, lapack_int *info );\nvoid LAPACK_dpttrf( lapack_int* n, double* d, double* e, lapack_int *info );\nvoid LAPACK_cpttrf( lapack_int* n, float* d, lapack_complex_float* e,\n                    lapack_int *info );\nvoid LAPACK_zpttrf( lapack_int* n, double* d, lapack_complex_double* e,\n                    lapack_int *info );\nvoid LAPACK_ssytrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* ipiv, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dsytrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* ipiv, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_csytrf( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* ipiv,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zsytrf( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* ipiv,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_chetrf( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* ipiv,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zhetrf( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* ipiv,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_ssptrf( char* uplo, lapack_int* n, float* ap, lapack_int* ipiv,\n                    lapack_int *info );\nvoid LAPACK_dsptrf( char* uplo, lapack_int* n, double* ap, lapack_int* ipiv,\n                    lapack_int *info );\nvoid LAPACK_csptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_zsptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_chptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_zhptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_sgetrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const float* a, lapack_int* lda, const lapack_int* ipiv,\n                    float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dgetrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, const lapack_int* ipiv,\n                    double* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cgetrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zgetrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_sgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const float* ab, lapack_int* ldab,\n                    const lapack_int* ipiv, float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const double* ab, lapack_int* ldab,\n                    const lapack_int* ipiv, double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_cgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const lapack_complex_float* ab,\n                    lapack_int* ldab, const lapack_int* ipiv,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const lapack_complex_double* ab,\n                    lapack_int* ldab, const lapack_int* ipiv,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_sgttrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const float* dl, const float* d, const float* du,\n                    const float* du2, const lapack_int* ipiv, float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dgttrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const double* dl, const double* d, const double* du,\n                    const double* du2, const lapack_int* ipiv, double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cgttrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* dl,\n                    const lapack_complex_float* d,\n                    const lapack_complex_float* du,\n                    const lapack_complex_float* du2, const lapack_int* ipiv,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zgttrs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* dl,\n                    const lapack_complex_double* d,\n                    const lapack_complex_double* du,\n                    const lapack_complex_double* du2, const lapack_int* ipiv,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_spotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* a, double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_spftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* a, float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_spptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* ap, float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* ap, double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_cpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_spbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const float* ab, lapack_int* ldab, float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const double* ab, lapack_int* ldab, double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_float* ab, lapack_int* ldab,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_spttrs( lapack_int* n, lapack_int* nrhs, const float* d,\n                    const float* e, float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dpttrs( lapack_int* n, lapack_int* nrhs, const double* d,\n                    const double* e, double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_cpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d,\n                    const lapack_complex_float* e, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zpttrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* d, const lapack_complex_double* e,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_ssytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,\n                    lapack_int* lda, const lapack_int* ipiv, float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dsytrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, const lapack_int* ipiv,\n                    double* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_csytrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zsytrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_chetrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zhetrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_ssptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* ap, const lapack_int* ipiv, float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dsptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* ap, const lapack_int* ipiv, double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_csptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap, const lapack_int* ipiv,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zsptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap, const lapack_int* ipiv,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_chptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap, const lapack_int* ipiv,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zhptrs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap, const lapack_int* ipiv,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_strtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const float* a, lapack_int* lda, float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dtrtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const double* a, lapack_int* lda,\n                    double* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_ctrtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_ztrtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_stptrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const float* ap, float* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dtptrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const double* ap, double* b,\n                    lapack_int* ldb, lapack_int *info );\nvoid LAPACK_ctptrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_float* ap,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_ztptrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_double* ap,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_stbtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs, const float* ab,\n                    lapack_int* ldab, float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dtbtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs, const double* ab,\n                    lapack_int* ldab, double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_ctbtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_float* ab, lapack_int* ldab,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_ztbtrs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_sgecon( char* norm, lapack_int* n, const float* a, lapack_int* lda,\n                    float* anorm, float* rcond, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgecon( char* norm, lapack_int* n, const double* a, lapack_int* lda,\n                    double* anorm, double* rcond, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgecon( char* norm, lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, float* anorm, float* rcond,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgecon( char* norm, lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, double* anorm, double* rcond,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    const float* ab, lapack_int* ldab, const lapack_int* ipiv,\n                    float* anorm, float* rcond, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    const double* ab, lapack_int* ldab, const lapack_int* ipiv,\n                    double* anorm, double* rcond, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    const lapack_complex_float* ab, lapack_int* ldab,\n                    const lapack_int* ipiv, float* anorm, float* rcond,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    const lapack_int* ipiv, double* anorm, double* rcond,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sgtcon( char* norm, lapack_int* n, const float* dl, const float* d,\n                    const float* du, const float* du2, const lapack_int* ipiv,\n                    float* anorm, float* rcond, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgtcon( char* norm, lapack_int* n, const double* dl,\n                    const double* d, const double* du, const double* du2,\n                    const lapack_int* ipiv, double* anorm, double* rcond,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgtcon( char* norm, lapack_int* n, const lapack_complex_float* dl,\n                    const lapack_complex_float* d,\n                    const lapack_complex_float* du,\n                    const lapack_complex_float* du2, const lapack_int* ipiv,\n                    float* anorm, float* rcond, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zgtcon( char* norm, lapack_int* n, const lapack_complex_double* dl,\n                    const lapack_complex_double* d,\n                    const lapack_complex_double* du,\n                    const lapack_complex_double* du2, const lapack_int* ipiv,\n                    double* anorm, double* rcond, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_spocon( char* uplo, lapack_int* n, const float* a, lapack_int* lda,\n                    float* anorm, float* rcond, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dpocon( char* uplo, lapack_int* n, const double* a, lapack_int* lda,\n                    double* anorm, double* rcond, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cpocon( char* uplo, lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, float* anorm, float* rcond,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zpocon( char* uplo, lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, double* anorm, double* rcond,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sppcon( char* uplo, lapack_int* n, const float* ap, float* anorm,\n                    float* rcond, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dppcon( char* uplo, lapack_int* n, const double* ap, double* anorm,\n                    double* rcond, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cppcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,\n                    float* anorm, float* rcond, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zppcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,\n                    double* anorm, double* rcond, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_spbcon( char* uplo, lapack_int* n, lapack_int* kd, const float* ab,\n                    lapack_int* ldab, float* anorm, float* rcond, float* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dpbcon( char* uplo, lapack_int* n, lapack_int* kd, const double* ab,\n                    lapack_int* ldab, double* anorm, double* rcond,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cpbcon( char* uplo, lapack_int* n, lapack_int* kd,\n                    const lapack_complex_float* ab, lapack_int* ldab,\n                    float* anorm, float* rcond, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zpbcon( char* uplo, lapack_int* n, lapack_int* kd,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    double* anorm, double* rcond, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sptcon( lapack_int* n, const float* d, const float* e, float* anorm,\n                    float* rcond, float* work, lapack_int *info );\nvoid LAPACK_dptcon( lapack_int* n, const double* d, const double* e,\n                    double* anorm, double* rcond, double* work,\n                    lapack_int *info );\nvoid LAPACK_cptcon( lapack_int* n, const float* d,\n                    const lapack_complex_float* e, float* anorm, float* rcond,\n                    float* work, lapack_int *info );\nvoid LAPACK_zptcon( lapack_int* n, const double* d,\n                    const lapack_complex_double* e, double* anorm,\n                    double* rcond, double* work, lapack_int *info );\nvoid LAPACK_ssycon( char* uplo, lapack_int* n, const float* a, lapack_int* lda,\n                    const lapack_int* ipiv, float* anorm, float* rcond,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dsycon( char* uplo, lapack_int* n, const double* a, lapack_int* lda,\n                    const lapack_int* ipiv, double* anorm, double* rcond,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_csycon( char* uplo, lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_int* ipiv, float* anorm,\n                    float* rcond, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zsycon( char* uplo, lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_int* ipiv, double* anorm,\n                    double* rcond, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_checon( char* uplo, lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_int* ipiv, float* anorm,\n                    float* rcond, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zhecon( char* uplo, lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_int* ipiv, double* anorm,\n                    double* rcond, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_sspcon( char* uplo, lapack_int* n, const float* ap,\n                    const lapack_int* ipiv, float* anorm, float* rcond,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dspcon( char* uplo, lapack_int* n, const double* ap,\n                    const lapack_int* ipiv, double* anorm, double* rcond,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cspcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,\n                    const lapack_int* ipiv, float* anorm, float* rcond,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zspcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,\n                    const lapack_int* ipiv, double* anorm, double* rcond,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_chpcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,\n                    const lapack_int* ipiv, float* anorm, float* rcond,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zhpcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,\n                    const lapack_int* ipiv, double* anorm, double* rcond,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_strcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const float* a, lapack_int* lda, float* rcond, float* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dtrcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const double* a, lapack_int* lda, double* rcond,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ctrcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    float* rcond, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztrcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    double* rcond, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_stpcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const float* ap, float* rcond, float* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dtpcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const double* ap, double* rcond, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ctpcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const lapack_complex_float* ap, float* rcond,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztpcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    const lapack_complex_double* ap, double* rcond,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_stbcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    lapack_int* kd, const float* ab, lapack_int* ldab,\n                    float* rcond, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dtbcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    lapack_int* kd, const double* ab, lapack_int* ldab,\n                    double* rcond, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_ctbcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    lapack_int* kd, const lapack_complex_float* ab,\n                    lapack_int* ldab, float* rcond, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_ztbcon( char* norm, char* uplo, char* diag, lapack_int* n,\n                    lapack_int* kd, const lapack_complex_double* ab,\n                    lapack_int* ldab, double* rcond,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sgerfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const float* a, lapack_int* lda, const float* af,\n                    lapack_int* ldaf, const lapack_int* ipiv, const float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,\n                    float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgerfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, const double* af,\n                    lapack_int* ldaf, const lapack_int* ipiv, const double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cgerfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zgerfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_dgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const double* a, lapack_int* lda, const double* af,\n                     lapack_int* ldaf, const lapack_int* ipiv, const double* r,\n                     const double* c, const double* b, lapack_int* ldb,\n                     double* x, lapack_int* ldx, double* rcond, double* berr,\n                     lapack_int* n_err_bnds, double* err_bnds_norm,\n                     double* err_bnds_comp, lapack_int* nparams, double* params,\n                     double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const float* a, lapack_int* lda, const float* af,\n                     lapack_int* ldaf, const lapack_int* ipiv, const float* r,\n                     const float* c, const float* b, lapack_int* ldb, float* x,\n                     lapack_int* ldx, float* rcond, float* berr,\n                     lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_double* a, lapack_int* lda,\n                     const lapack_complex_double* af, lapack_int* ldaf,\n                     const lapack_int* ipiv, const double* r, const double* c,\n                     const lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_float* a, lapack_int* lda,\n                     const lapack_complex_float* af, lapack_int* ldaf,\n                     const lapack_int* ipiv, const float* r, const float* c,\n                     const lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_sgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const float* ab, lapack_int* ldab,\n                    const float* afb, lapack_int* ldafb, const lapack_int* ipiv,\n                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const double* ab, lapack_int* ldab,\n                    const double* afb, lapack_int* ldafb,\n                    const lapack_int* ipiv, const double* b, lapack_int* ldb,\n                    double* x, lapack_int* ldx, double* ferr, double* berr,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const lapack_complex_float* ab,\n                    lapack_int* ldab, const lapack_complex_float* afb,\n                    lapack_int* ldafb, const lapack_int* ipiv,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,\n                    lapack_int* nrhs, const lapack_complex_double* ab,\n                    lapack_int* ldab, const lapack_complex_double* afb,\n                    lapack_int* ldafb, const lapack_int* ipiv,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_dgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs, const double* ab,\n                     lapack_int* ldab, const double* afb, lapack_int* ldafb,\n                     const lapack_int* ipiv, const double* r, const double* c,\n                     const double* b, lapack_int* ldb, double* x,\n                     lapack_int* ldx, double* rcond, double* berr,\n                     lapack_int* n_err_bnds, double* err_bnds_norm,\n                     double* err_bnds_comp, lapack_int* nparams, double* params,\n                     double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs, const float* ab,\n                     lapack_int* ldab, const float* afb, lapack_int* ldafb,\n                     const lapack_int* ipiv, const float* r, const float* c,\n                     const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                     float* rcond, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params, float* work,\n                     lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs,\n                     const lapack_complex_double* ab, lapack_int* ldab,\n                     const lapack_complex_double* afb, lapack_int* ldafb,\n                     const lapack_int* ipiv, const double* r, const double* c,\n                     const lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs,\n                     const lapack_complex_float* ab, lapack_int* ldab,\n                     const lapack_complex_float* afb, lapack_int* ldafb,\n                     const lapack_int* ipiv, const float* r, const float* c,\n                     const lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_sgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const float* dl, const float* d, const float* du,\n                    const float* dlf, const float* df, const float* duf,\n                    const float* du2, const lapack_int* ipiv, const float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,\n                    float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const double* dl, const double* d, const double* du,\n                    const double* dlf, const double* df, const double* duf,\n                    const double* du2, const lapack_int* ipiv, const double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* dl,\n                    const lapack_complex_float* d,\n                    const lapack_complex_float* du,\n                    const lapack_complex_float* dlf,\n                    const lapack_complex_float* df,\n                    const lapack_complex_float* duf,\n                    const lapack_complex_float* du2, const lapack_int* ipiv,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* dl,\n                    const lapack_complex_double* d,\n                    const lapack_complex_double* du,\n                    const lapack_complex_double* dlf,\n                    const lapack_complex_double* df,\n                    const lapack_complex_double* duf,\n                    const lapack_complex_double* du2, const lapack_int* ipiv,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,\n                    lapack_int* lda, const float* af, lapack_int* ldaf,\n                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dporfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, const double* af,\n                    lapack_int* ldaf, const double* b, lapack_int* ldb,\n                    double* x, lapack_int* ldx, double* ferr, double* berr,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cporfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* af, lapack_int* ldaf,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zporfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* af, lapack_int* ldaf,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_dporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const double* a, lapack_int* lda, const double* af,\n                     lapack_int* ldaf, const double* s, const double* b,\n                     lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,\n                     double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params, double* work,\n                     lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const float* a, lapack_int* lda, const float* af,\n                     lapack_int* ldaf, const float* s, const float* b,\n                     lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_double* a, lapack_int* lda,\n                     const lapack_complex_double* af, lapack_int* ldaf,\n                     const double* s, const lapack_complex_double* b,\n                     lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                     double* rcond, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_float* a, lapack_int* lda,\n                     const lapack_complex_float* af, lapack_int* ldaf,\n                     const float* s, const lapack_complex_float* b,\n                     lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                     float* rcond, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_spprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* ap, const float* afp, const float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,\n                    float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* ap, const double* afp, const double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap,\n                    const lapack_complex_float* afp,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap,\n                    const lapack_complex_double* afp,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_spbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const float* ab, lapack_int* ldab, const float* afb,\n                    lapack_int* ldafb, const float* b, lapack_int* ldb,\n                    float* x, lapack_int* ldx, float* ferr, float* berr,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const double* ab, lapack_int* ldab, const double* afb,\n                    lapack_int* ldafb, const double* b, lapack_int* ldb,\n                    double* x, lapack_int* ldx, double* ferr, double* berr,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_float* ab, lapack_int* ldab,\n                    const lapack_complex_float* afb, lapack_int* ldafb,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    const lapack_complex_double* afb, lapack_int* ldafb,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sptrfs( lapack_int* n, lapack_int* nrhs, const float* d,\n                    const float* e, const float* df, const float* ef,\n                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                    float* ferr, float* berr, float* work, lapack_int *info );\nvoid LAPACK_dptrfs( lapack_int* n, lapack_int* nrhs, const double* d,\n                    const double* e, const double* df, const double* ef,\n                    const double* b, lapack_int* ldb, double* x,\n                    lapack_int* ldx, double* ferr, double* berr, double* work,\n                    lapack_int *info );\nvoid LAPACK_cptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d,\n                    const lapack_complex_float* e, const float* df,\n                    const lapack_complex_float* ef,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zptrfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* d, const lapack_complex_double* e,\n                    const double* df, const lapack_complex_double* ef,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_ssyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,\n                    lapack_int* lda, const float* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const float* b, lapack_int* ldb,\n                    float* x, lapack_int* ldx, float* ferr, float* berr,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, const double* af,\n                    lapack_int* ldaf, const lapack_int* ipiv, const double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_csyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_dsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const double* a, lapack_int* lda, const double* af,\n                     lapack_int* ldaf, const lapack_int* ipiv, const double* s,\n                     const double* b, lapack_int* ldb, double* x,\n                     lapack_int* ldx, double* rcond, double* berr,\n                     lapack_int* n_err_bnds, double* err_bnds_norm,\n                     double* err_bnds_comp, lapack_int* nparams, double* params,\n                     double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ssyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const float* a, lapack_int* lda, const float* af,\n                     lapack_int* ldaf, const lapack_int* ipiv, const float* s,\n                     const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                     float* rcond, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params, float* work,\n                     lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_double* a, lapack_int* lda,\n                     const lapack_complex_double* af, lapack_int* ldaf,\n                     const lapack_int* ipiv, const double* s,\n                     const lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_csyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_float* a, lapack_int* lda,\n                     const lapack_complex_float* af, lapack_int* ldaf,\n                     const lapack_int* ipiv, const float* s,\n                     const lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_cherfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zherfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* af, lapack_int* ldaf,\n                    const lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_zherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_double* a, lapack_int* lda,\n                     const lapack_complex_double* af, lapack_int* ldaf,\n                     const lapack_int* ipiv, const double* s,\n                     const lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,\n                     const lapack_complex_float* a, lapack_int* lda,\n                     const lapack_complex_float* af, lapack_int* ldaf,\n                     const lapack_int* ipiv, const float* s,\n                     const lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_ssprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* ap, const float* afp, const lapack_int* ipiv,\n                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dsprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* ap, const double* afp, const lapack_int* ipiv,\n                    const double* b, lapack_int* ldb, double* x,\n                    lapack_int* ldx, double* ferr, double* berr, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_csprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap,\n                    const lapack_complex_float* afp, const lapack_int* ipiv,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zsprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap,\n                    const lapack_complex_double* afp, const lapack_int* ipiv,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_chprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap,\n                    const lapack_complex_float* afp, const lapack_int* ipiv,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zhprfs( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap,\n                    const lapack_complex_double* afp, const lapack_int* ipiv,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* ferr,\n                    double* berr, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_strrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const float* a, lapack_int* lda,\n                    const float* b, lapack_int* ldb, const float* x,\n                    lapack_int* ldx, float* ferr, float* berr, float* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dtrrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const double* a, lapack_int* lda,\n                    const double* b, lapack_int* ldb, const double* x,\n                    lapack_int* ldx, double* ferr, double* berr, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ctrrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* b,\n                    lapack_int* ldb, const lapack_complex_float* x,\n                    lapack_int* ldx, float* ferr, float* berr,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztrrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* b,\n                    lapack_int* ldb, const lapack_complex_double* x,\n                    lapack_int* ldx, double* ferr, double* berr,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_stprfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const float* ap, const float* b,\n                    lapack_int* ldb, const float* x, lapack_int* ldx,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dtprfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const double* ap, const double* b,\n                    lapack_int* ldb, const double* x, lapack_int* ldx,\n                    double* ferr, double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_ctprfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_float* ap,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    const lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztprfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* nrhs, const lapack_complex_double* ap,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    const lapack_complex_double* x, lapack_int* ldx,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_stbrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs, const float* ab,\n                    lapack_int* ldab, const float* b, lapack_int* ldb,\n                    const float* x, lapack_int* ldx, float* ferr, float* berr,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dtbrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs, const double* ab,\n                    lapack_int* ldab, const double* b, lapack_int* ldb,\n                    const double* x, lapack_int* ldx, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_ctbrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_float* ab, lapack_int* ldab,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    const lapack_complex_float* x, lapack_int* ldx, float* ferr,\n                    float* berr, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztbrfs( char* uplo, char* trans, char* diag, lapack_int* n,\n                    lapack_int* kd, lapack_int* nrhs,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    const lapack_complex_double* x, lapack_int* ldx,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sgetri( lapack_int* n, float* a, lapack_int* lda,\n                    const lapack_int* ipiv, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgetri( lapack_int* n, double* a, lapack_int* lda,\n                    const lapack_int* ipiv, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cgetri( lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zgetri( lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                    const lapack_int* ipiv, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_spotri( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_dpotri( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_cpotri( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_zpotri( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_dpftri( char* transr, char* uplo, lapack_int* n, double* a,\n                    lapack_int *info );\nvoid LAPACK_spftri( char* transr, char* uplo, lapack_int* n, float* a,\n                    lapack_int *info );\nvoid LAPACK_zpftri( char* transr, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int *info );\nvoid LAPACK_cpftri( char* transr, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int *info );\nvoid LAPACK_spptri( char* uplo, lapack_int* n, float* ap, lapack_int *info );\nvoid LAPACK_dpptri( char* uplo, lapack_int* n, double* ap, lapack_int *info );\nvoid LAPACK_cpptri( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    lapack_int *info );\nvoid LAPACK_zpptri( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    lapack_int *info );\nvoid LAPACK_ssytri( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    const lapack_int* ipiv, float* work, lapack_int *info );\nvoid LAPACK_dsytri( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    const lapack_int* ipiv, double* work, lapack_int *info );\nvoid LAPACK_csytri( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, const lapack_int* ipiv,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zsytri( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, const lapack_int* ipiv,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_chetri( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, const lapack_int* ipiv,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zhetri( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, const lapack_int* ipiv,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_ssptri( char* uplo, lapack_int* n, float* ap,\n                    const lapack_int* ipiv, float* work, lapack_int *info );\nvoid LAPACK_dsptri( char* uplo, lapack_int* n, double* ap,\n                    const lapack_int* ipiv, double* work, lapack_int *info );\nvoid LAPACK_csptri( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    const lapack_int* ipiv, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zsptri( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    const lapack_int* ipiv, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_chptri( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    const lapack_int* ipiv, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zhptri( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    const lapack_int* ipiv, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_strtri( char* uplo, char* diag, lapack_int* n, float* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_dtrtri( char* uplo, char* diag, lapack_int* n, double* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_ctrtri( char* uplo, char* diag, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_ztrtri( char* uplo, char* diag, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_dtftri( char* transr, char* uplo, char* diag, lapack_int* n,\n                    double* a, lapack_int *info );\nvoid LAPACK_stftri( char* transr, char* uplo, char* diag, lapack_int* n,\n                    float* a, lapack_int *info );\nvoid LAPACK_ztftri( char* transr, char* uplo, char* diag, lapack_int* n,\n                    lapack_complex_double* a, lapack_int *info );\nvoid LAPACK_ctftri( char* transr, char* uplo, char* diag, lapack_int* n,\n                    lapack_complex_float* a, lapack_int *info );\nvoid LAPACK_stptri( char* uplo, char* diag, lapack_int* n, float* ap,\n                    lapack_int *info );\nvoid LAPACK_dtptri( char* uplo, char* diag, lapack_int* n, double* ap,\n                    lapack_int *info );\nvoid LAPACK_ctptri( char* uplo, char* diag, lapack_int* n,\n                    lapack_complex_float* ap, lapack_int *info );\nvoid LAPACK_ztptri( char* uplo, char* diag, lapack_int* n,\n                    lapack_complex_double* ap, lapack_int *info );\nvoid LAPACK_sgeequ( lapack_int* m, lapack_int* n, const float* a,\n                    lapack_int* lda, float* r, float* c, float* rowcnd,\n                    float* colcnd, float* amax, lapack_int *info );\nvoid LAPACK_dgeequ( lapack_int* m, lapack_int* n, const double* a,\n                    lapack_int* lda, double* r, double* c, double* rowcnd,\n                    double* colcnd, double* amax, lapack_int *info );\nvoid LAPACK_cgeequ( lapack_int* m, lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, float* r, float* c, float* rowcnd,\n                    float* colcnd, float* amax, lapack_int *info );\nvoid LAPACK_zgeequ( lapack_int* m, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda, double* r,\n                    double* c, double* rowcnd, double* colcnd, double* amax,\n                    lapack_int *info );\nvoid LAPACK_dgeequb( lapack_int* m, lapack_int* n, const double* a,\n                     lapack_int* lda, double* r, double* c, double* rowcnd,\n                     double* colcnd, double* amax, lapack_int *info );\nvoid LAPACK_sgeequb( lapack_int* m, lapack_int* n, const float* a,\n                     lapack_int* lda, float* r, float* c, float* rowcnd,\n                     float* colcnd, float* amax, lapack_int *info );\nvoid LAPACK_zgeequb( lapack_int* m, lapack_int* n,\n                     const lapack_complex_double* a, lapack_int* lda, double* r,\n                     double* c, double* rowcnd, double* colcnd, double* amax,\n                     lapack_int *info );\nvoid LAPACK_cgeequb( lapack_int* m, lapack_int* n,\n                     const lapack_complex_float* a, lapack_int* lda, float* r,\n                     float* c, float* rowcnd, float* colcnd, float* amax,\n                     lapack_int *info );\nvoid LAPACK_sgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const float* ab, lapack_int* ldab, float* r,\n                    float* c, float* rowcnd, float* colcnd, float* amax,\n                    lapack_int *info );\nvoid LAPACK_dgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const double* ab, lapack_int* ldab,\n                    double* r, double* c, double* rowcnd, double* colcnd,\n                    double* amax, lapack_int *info );\nvoid LAPACK_cgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const lapack_complex_float* ab,\n                    lapack_int* ldab, float* r, float* c, float* rowcnd,\n                    float* colcnd, float* amax, lapack_int *info );\nvoid LAPACK_zgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const lapack_complex_double* ab,\n                    lapack_int* ldab, double* r, double* c, double* rowcnd,\n                    double* colcnd, double* amax, lapack_int *info );\nvoid LAPACK_dgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, const double* ab, lapack_int* ldab,\n                     double* r, double* c, double* rowcnd, double* colcnd,\n                     double* amax, lapack_int *info );\nvoid LAPACK_sgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, const float* ab, lapack_int* ldab,\n                     float* r, float* c, float* rowcnd, float* colcnd,\n                     float* amax, lapack_int *info );\nvoid LAPACK_zgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, const lapack_complex_double* ab,\n                     lapack_int* ldab, double* r, double* c, double* rowcnd,\n                     double* colcnd, double* amax, lapack_int *info );\nvoid LAPACK_cgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, const lapack_complex_float* ab,\n                     lapack_int* ldab, float* r, float* c, float* rowcnd,\n                     float* colcnd, float* amax, lapack_int *info );\nvoid LAPACK_spoequ( lapack_int* n, const float* a, lapack_int* lda, float* s,\n                    float* scond, float* amax, lapack_int *info );\nvoid LAPACK_dpoequ( lapack_int* n, const double* a, lapack_int* lda, double* s,\n                    double* scond, double* amax, lapack_int *info );\nvoid LAPACK_cpoequ( lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, float* s, float* scond, float* amax,\n                    lapack_int *info );\nvoid LAPACK_zpoequ( lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, double* s, double* scond, double* amax,\n                    lapack_int *info );\nvoid LAPACK_dpoequb( lapack_int* n, const double* a, lapack_int* lda, double* s,\n                     double* scond, double* amax, lapack_int *info );\nvoid LAPACK_spoequb( lapack_int* n, const float* a, lapack_int* lda, float* s,\n                     float* scond, float* amax, lapack_int *info );\nvoid LAPACK_zpoequb( lapack_int* n, const lapack_complex_double* a,\n                     lapack_int* lda, double* s, double* scond, double* amax,\n                     lapack_int *info );\nvoid LAPACK_cpoequb( lapack_int* n, const lapack_complex_float* a,\n                     lapack_int* lda, float* s, float* scond, float* amax,\n                     lapack_int *info );\nvoid LAPACK_sppequ( char* uplo, lapack_int* n, const float* ap, float* s,\n                    float* scond, float* amax, lapack_int *info );\nvoid LAPACK_dppequ( char* uplo, lapack_int* n, const double* ap, double* s,\n                    double* scond, double* amax, lapack_int *info );\nvoid LAPACK_cppequ( char* uplo, lapack_int* n, const lapack_complex_float* ap,\n                    float* s, float* scond, float* amax, lapack_int *info );\nvoid LAPACK_zppequ( char* uplo, lapack_int* n, const lapack_complex_double* ap,\n                    double* s, double* scond, double* amax, lapack_int *info );\nvoid LAPACK_spbequ( char* uplo, lapack_int* n, lapack_int* kd, const float* ab,\n                    lapack_int* ldab, float* s, float* scond, float* amax,\n                    lapack_int *info );\nvoid LAPACK_dpbequ( char* uplo, lapack_int* n, lapack_int* kd, const double* ab,\n                    lapack_int* ldab, double* s, double* scond, double* amax,\n                    lapack_int *info );\nvoid LAPACK_cpbequ( char* uplo, lapack_int* n, lapack_int* kd,\n                    const lapack_complex_float* ab, lapack_int* ldab, float* s,\n                    float* scond, float* amax, lapack_int *info );\nvoid LAPACK_zpbequ( char* uplo, lapack_int* n, lapack_int* kd,\n                    const lapack_complex_double* ab, lapack_int* ldab,\n                    double* s, double* scond, double* amax, lapack_int *info );\nvoid LAPACK_dsyequb( char* uplo, lapack_int* n, const double* a,\n                     lapack_int* lda, double* s, double* scond, double* amax,\n                     double* work, lapack_int *info );\nvoid LAPACK_ssyequb( char* uplo, lapack_int* n, const float* a, lapack_int* lda,\n                     float* s, float* scond, float* amax, float* work,\n                     lapack_int *info );\nvoid LAPACK_zsyequb( char* uplo, lapack_int* n, const lapack_complex_double* a,\n                     lapack_int* lda, double* s, double* scond, double* amax,\n                     lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_csyequb( char* uplo, lapack_int* n, const lapack_complex_float* a,\n                     lapack_int* lda, float* s, float* scond, float* amax,\n                     lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zheequb( char* uplo, lapack_int* n, const lapack_complex_double* a,\n                     lapack_int* lda, double* s, double* scond, double* amax,\n                     lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_cheequb( char* uplo, lapack_int* n, const lapack_complex_float* a,\n                     lapack_int* lda, float* s, float* scond, float* amax,\n                     lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_sgesv( lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda,\n                   lapack_int* ipiv, float* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_dgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda,\n                   lapack_int* ipiv, double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_cgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* a,\n                   lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a,\n                   lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dsgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda,\n                    lapack_int* ipiv, double* b, lapack_int* ldb, double* x,\n                    lapack_int* ldx, double* work, float* swork,\n                    lapack_int* iter, lapack_int *info );\nvoid LAPACK_zcgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    lapack_complex_double* work, lapack_complex_float* swork,\n                    double* rwork, lapack_int* iter, lapack_int *info );\nvoid LAPACK_sgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    float* a, lapack_int* lda, float* af, lapack_int* ldaf,\n                    lapack_int* ipiv, char* equed, float* r, float* c, float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    double* a, lapack_int* lda, double* af, lapack_int* ldaf,\n                    lapack_int* ipiv, char* equed, double* r, double* c,\n                    double* b, lapack_int* ldb, double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* af, lapack_int* ldaf,\n                    lapack_int* ipiv, char* equed, float* r, float* c,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* af, lapack_int* ldaf,\n                    lapack_int* ipiv, char* equed, double* r, double* c,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_dgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, double* r, double* c,\n                     double* b, lapack_int* ldb, double* x, lapack_int* ldx,\n                     double* rcond, double* rpvgrw, double* berr,\n                     lapack_int* n_err_bnds, double* err_bnds_norm,\n                     double* err_bnds_comp, lapack_int* nparams, double* params,\n                     double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, float* r, float* c,\n                     float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                     float* rcond, float* rpvgrw, float* berr,\n                     lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_double* a, lapack_int* lda,\n                     lapack_complex_double* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, double* r, double* c,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_float* a, lapack_int* lda,\n                     lapack_complex_float* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, float* r, float* c,\n                     lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_sgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,\n                   lapack_int* nrhs, float* ab, lapack_int* ldab,\n                   lapack_int* ipiv, float* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_dgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,\n                   lapack_int* nrhs, double* ab, lapack_int* ldab,\n                   lapack_int* ipiv, double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_cgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,\n                   lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab,\n                   lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_zgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,\n                   lapack_int* nrhs, lapack_complex_double* ab,\n                   lapack_int* ldab, lapack_int* ipiv, lapack_complex_double* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_sgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, lapack_int* nrhs, float* ab,\n                    lapack_int* ldab, float* afb, lapack_int* ldafb,\n                    lapack_int* ipiv, char* equed, float* r, float* c, float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, lapack_int* nrhs, double* ab,\n                    lapack_int* ldab, double* afb, lapack_int* ldafb,\n                    lapack_int* ipiv, char* equed, double* r, double* c,\n                    double* b, lapack_int* ldb, double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab,\n                    lapack_int* ldab, lapack_complex_float* afb,\n                    lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r,\n                    float* c, lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab,\n                    lapack_int* ldab, lapack_complex_double* afb,\n                    lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r,\n                    double* c, lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_dgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs, double* ab,\n                     lapack_int* ldab, double* afb, lapack_int* ldafb,\n                     lapack_int* ipiv, char* equed, double* r, double* c,\n                     double* b, lapack_int* ldb, double* x, lapack_int* ldx,\n                     double* rcond, double* rpvgrw, double* berr,\n                     lapack_int* n_err_bnds, double* err_bnds_norm,\n                     double* err_bnds_comp, lapack_int* nparams, double* params,\n                     double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs, float* ab,\n                     lapack_int* ldab, float* afb, lapack_int* ldafb,\n                     lapack_int* ipiv, char* equed, float* r, float* c,\n                     float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                     float* rcond, float* rpvgrw, float* berr,\n                     lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs,\n                     lapack_complex_double* ab, lapack_int* ldab,\n                     lapack_complex_double* afb, lapack_int* ldafb,\n                     lapack_int* ipiv, char* equed, double* r, double* c,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,\n                     lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab,\n                     lapack_int* ldab, lapack_complex_float* afb,\n                     lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r,\n                     float* c, lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_sgtsv( lapack_int* n, lapack_int* nrhs, float* dl, float* d,\n                   float* du, float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dgtsv( lapack_int* n, lapack_int* nrhs, double* dl, double* d,\n                   double* du, double* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* dl,\n                   lapack_complex_float* d, lapack_complex_float* du,\n                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* dl,\n                   lapack_complex_double* d, lapack_complex_double* du,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_sgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    const float* dl, const float* d, const float* du,\n                    float* dlf, float* df, float* duf, float* du2,\n                    lapack_int* ipiv, const float* b, lapack_int* ldb, float* x,\n                    lapack_int* ldx, float* rcond, float* ferr, float* berr,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    const double* dl, const double* d, const double* du,\n                    double* dlf, double* df, double* duf, double* du2,\n                    lapack_int* ipiv, const double* b, lapack_int* ldb,\n                    double* x, lapack_int* ldx, double* rcond, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* dl,\n                    const lapack_complex_float* d,\n                    const lapack_complex_float* du, lapack_complex_float* dlf,\n                    lapack_complex_float* df, lapack_complex_float* duf,\n                    lapack_complex_float* du2, lapack_int* ipiv,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* dl,\n                    const lapack_complex_double* d,\n                    const lapack_complex_double* du, lapack_complex_double* dlf,\n                    lapack_complex_double* df, lapack_complex_double* duf,\n                    lapack_complex_double* du2, lapack_int* ipiv,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sposv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,\n                   lapack_int* lda, float* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_dposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,\n                   lapack_int* lda, double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_cposv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* a, lapack_int* lda,\n                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zposv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* a, lapack_int* lda,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_dsposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb, double* x,\n                    lapack_int* ldx, double* work, float* swork,\n                    lapack_int* iter, lapack_int *info );\nvoid LAPACK_zcposv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx,\n                    lapack_complex_double* work, lapack_complex_float* swork,\n                    double* rwork, lapack_int* iter, lapack_int *info );\nvoid LAPACK_sposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    float* a, lapack_int* lda, float* af, lapack_int* ldaf,\n                    char* equed, float* s, float* b, lapack_int* ldb, float* x,\n                    lapack_int* ldx, float* rcond, float* ferr, float* berr,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    double* a, lapack_int* lda, double* af, lapack_int* ldaf,\n                    char* equed, double* s, double* b, lapack_int* ldb,\n                    double* x, lapack_int* ldx, double* rcond, double* ferr,\n                    double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* af, lapack_int* ldaf, char* equed,\n                    float* s, lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* af, lapack_int* ldaf, char* equed,\n                    double* s, lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_dposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,\n                     char* equed, double* s, double* b, lapack_int* ldb,\n                     double* x, lapack_int* ldx, double* rcond, double* rpvgrw,\n                     double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params, double* work,\n                     lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,\n                     char* equed, float* s, float* b, lapack_int* ldb, float* x,\n                     lapack_int* ldx, float* rcond, float* rpvgrw, float* berr,\n                     lapack_int* n_err_bnds, float* err_bnds_norm,\n                     float* err_bnds_comp, lapack_int* nparams, float* params,\n                     float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_double* a, lapack_int* lda,\n                     lapack_complex_double* af, lapack_int* ldaf, char* equed,\n                     double* s, lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_cposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_float* a, lapack_int* lda,\n                     lapack_complex_float* af, lapack_int* ldaf, char* equed,\n                     float* s, lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_sppsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap,\n                   float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dppsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap,\n                   double* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cppsv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* ap, lapack_complex_float* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zppsv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* ap, lapack_complex_double* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_sppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    float* ap, float* afp, char* equed, float* s, float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    double* ap, double* afp, char* equed, double* s, double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_float* ap, lapack_complex_float* afp,\n                    char* equed, float* s, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* ap, lapack_complex_double* afp,\n                    char* equed, double* s, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_spbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                   float* ab, lapack_int* ldab, float* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_dpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                   double* ab, lapack_int* ldab, double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_cpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                   lapack_complex_float* ab, lapack_int* ldab,\n                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,\n                   lapack_complex_double* ab, lapack_int* ldab,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_spbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb,\n                    lapack_int* ldafb, char* equed, float* s, float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb,\n                    lapack_int* ldafb, char* equed, double* s, double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_cpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_int* nrhs, lapack_complex_float* ab,\n                    lapack_int* ldab, lapack_complex_float* afb,\n                    lapack_int* ldafb, char* equed, float* s,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_int* nrhs, lapack_complex_double* ab,\n                    lapack_int* ldab, lapack_complex_double* afb,\n                    lapack_int* ldafb, char* equed, double* s,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sptsv( lapack_int* n, lapack_int* nrhs, float* d, float* e,\n                   float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_dptsv( lapack_int* n, lapack_int* nrhs, double* d, double* e,\n                   double* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_cptsv( lapack_int* n, lapack_int* nrhs, float* d,\n                   lapack_complex_float* e, lapack_complex_float* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zptsv( lapack_int* n, lapack_int* nrhs, double* d,\n                   lapack_complex_double* e, lapack_complex_double* b,\n                   lapack_int* ldb, lapack_int *info );\nvoid LAPACK_sptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d,\n                    const float* e, float* df, float* ef, const float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, float* work, lapack_int *info );\nvoid LAPACK_dptsvx( char* fact, lapack_int* n, lapack_int* nrhs,\n                    const double* d, const double* e, double* df, double* ef,\n                    const double* b, lapack_int* ldb, double* x,\n                    lapack_int* ldx, double* rcond, double* ferr, double* berr,\n                    double* work, lapack_int *info );\nvoid LAPACK_cptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d,\n                    const lapack_complex_float* e, float* df,\n                    lapack_complex_float* ef, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zptsvx( char* fact, lapack_int* n, lapack_int* nrhs,\n                    const double* d, const lapack_complex_double* e, double* df,\n                    lapack_complex_double* ef, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_ssysv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,\n                   lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb,\n                   float* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dsysv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,\n                   lapack_int* lda, lapack_int* ipiv, double* b,\n                   lapack_int* ldb, double* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_csysv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv,\n                   lapack_complex_float* b, lapack_int* ldb,\n                   lapack_complex_float* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_zsysv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_ssysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* a, lapack_int* lda, float* af,\n                    lapack_int* ldaf, lapack_int* ipiv, const float* b,\n                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                    float* ferr, float* berr, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* a, lapack_int* lda, double* af,\n                    lapack_int* ldaf, lapack_int* ipiv, const double* b,\n                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,\n                    double* ferr, double* berr, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_csysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* af, lapack_int* ldaf,\n                    lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* af, lapack_int* ldaf,\n                    lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_dsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, double* s, double* b,\n                     lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,\n                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params, double* work,\n                     lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ssysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, float* s, float* b,\n                     lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,\n                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params, float* work,\n                     lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_double* a, lapack_int* lda,\n                     lapack_complex_double* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, double* s,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_csysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_float* a, lapack_int* lda,\n                     lapack_complex_float* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, float* s,\n                     lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_chesv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv,\n                   lapack_complex_float* b, lapack_int* ldb,\n                   lapack_complex_float* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_zhesv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_chesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* af, lapack_int* ldaf,\n                    lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zhesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* af, lapack_int* ldaf,\n                    lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_zhesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_double* a, lapack_int* lda,\n                     lapack_complex_double* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, double* s,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* x, lapack_int* ldx, double* rcond,\n                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,\n                     double* err_bnds_norm, double* err_bnds_comp,\n                     lapack_int* nparams, double* params,\n                     lapack_complex_double* work, double* rwork,\n                     lapack_int *info );\nvoid LAPACK_chesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                     lapack_complex_float* a, lapack_int* lda,\n                     lapack_complex_float* af, lapack_int* ldaf,\n                     lapack_int* ipiv, char* equed, float* s,\n                     lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* x, lapack_int* ldx, float* rcond,\n                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,\n                     float* err_bnds_norm, float* err_bnds_comp,\n                     lapack_int* nparams, float* params,\n                     lapack_complex_float* work, float* rwork,\n                     lapack_int *info );\nvoid LAPACK_sspsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap,\n                   lapack_int* ipiv, float* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_dspsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap,\n                   lapack_int* ipiv, double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_cspsv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* ap, lapack_int* ipiv,\n                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zspsv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* ap, lapack_int* ipiv,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_sspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const float* ap, float* afp, lapack_int* ipiv,\n                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr, float* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const double* ap, double* afp, lapack_int* ipiv,\n                    const double* b, lapack_int* ldb, double* x,\n                    lapack_int* ldx, double* rcond, double* ferr, double* berr,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap, lapack_complex_float* afp,\n                    lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap, lapack_complex_double* afp,\n                    lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_chpsv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* ap, lapack_int* ipiv,\n                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );\nvoid LAPACK_zhpsv( char* uplo, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* ap, lapack_int* ipiv,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_int *info );\nvoid LAPACK_chpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_float* ap, lapack_complex_float* afp,\n                    lapack_int* ipiv, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,\n                    float* rcond, float* ferr, float* berr,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zhpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,\n                    const lapack_complex_double* ap, lapack_complex_double* afp,\n                    lapack_int* ipiv, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,\n                    double* rcond, double* ferr, double* berr,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sgeqrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgeqrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cgeqrf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zgeqrf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sgeqpf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* jpvt, float* tau, float* work,\n                    lapack_int *info );\nvoid LAPACK_dgeqpf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* jpvt, double* tau, double* work,\n                    lapack_int *info );\nvoid LAPACK_cgeqpf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* jpvt,\n                    lapack_complex_float* tau, lapack_complex_float* work,\n                    float* rwork, lapack_int *info );\nvoid LAPACK_zgeqpf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* jpvt,\n                    lapack_complex_double* tau, lapack_complex_double* work,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sgeqp3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* jpvt, float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgeqp3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* jpvt, double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgeqp3( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* jpvt,\n                    lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int *info );\nvoid LAPACK_zgeqp3( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* jpvt,\n                    lapack_complex_double* tau, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_int *info );\nvoid LAPACK_sorgqr( lapack_int* m, lapack_int* n, lapack_int* k, float* a,\n                    lapack_int* lda, const float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dorgqr( lapack_int* m, lapack_int* n, lapack_int* k, double* a,\n                    lapack_int* lda, const double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sormqr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const float* a, lapack_int* lda,\n                    const float* tau, float* c, lapack_int* ldc, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dormqr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const double* a, lapack_int* lda,\n                    const double* tau, double* c, lapack_int* ldc, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cungqr( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zungqr( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunmqr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmqr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* tau,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sgelqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgelqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cgelqf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zgelqf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sorglq( lapack_int* m, lapack_int* n, lapack_int* k, float* a,\n                    lapack_int* lda, const float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dorglq( lapack_int* m, lapack_int* n, lapack_int* k, double* a,\n                    lapack_int* lda, const double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sormlq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const float* a, lapack_int* lda,\n                    const float* tau, float* c, lapack_int* ldc, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dormlq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const double* a, lapack_int* lda,\n                    const double* tau, double* c, lapack_int* ldc, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cunglq( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zunglq( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunmlq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmlq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* tau,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sgeqlf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgeqlf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cgeqlf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zgeqlf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sorgql( lapack_int* m, lapack_int* n, lapack_int* k, float* a,\n                    lapack_int* lda, const float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dorgql( lapack_int* m, lapack_int* n, lapack_int* k, double* a,\n                    lapack_int* lda, const double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cungql( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zungql( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sormql( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const float* a, lapack_int* lda,\n                    const float* tau, float* c, lapack_int* ldc, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dormql( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const double* a, lapack_int* lda,\n                    const double* tau, double* c, lapack_int* ldc, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cunmql( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmql( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* tau,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sgerqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgerqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cgerqf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zgerqf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sorgrq( lapack_int* m, lapack_int* n, lapack_int* k, float* a,\n                    lapack_int* lda, const float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dorgrq( lapack_int* m, lapack_int* n, lapack_int* k, double* a,\n                    lapack_int* lda, const double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cungrq( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zungrq( lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sormrq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const float* a, lapack_int* lda,\n                    const float* tau, float* c, lapack_int* ldc, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dormrq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const double* a, lapack_int* lda,\n                    const double* tau, double* c, lapack_int* ldc, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cunmrq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmrq( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* tau,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_stzrzf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dtzrzf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_ctzrzf( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_ztzrzf( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sormrz( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, lapack_int* l, const float* a,\n                    lapack_int* lda, const float* tau, float* c,\n                    lapack_int* ldc, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dormrz( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, lapack_int* l, const double* a,\n                    lapack_int* lda, const double* tau, double* c,\n                    lapack_int* ldc, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunmrz( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, lapack_int* l, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmrz( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* k, lapack_int* l,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau, lapack_complex_double* c,\n                    lapack_int* ldc, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sggqrf( lapack_int* n, lapack_int* m, lapack_int* p, float* a,\n                    lapack_int* lda, float* taua, float* b, lapack_int* ldb,\n                    float* taub, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dggqrf( lapack_int* n, lapack_int* m, lapack_int* p, double* a,\n                    lapack_int* lda, double* taua, double* b, lapack_int* ldb,\n                    double* taub, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cggqrf( lapack_int* n, lapack_int* m, lapack_int* p,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* taua, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* taub,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zggqrf( lapack_int* n, lapack_int* m, lapack_int* p,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* taua, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* taub,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sggrqf( lapack_int* m, lapack_int* p, lapack_int* n, float* a,\n                    lapack_int* lda, float* taua, float* b, lapack_int* ldb,\n                    float* taub, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dggrqf( lapack_int* m, lapack_int* p, lapack_int* n, double* a,\n                    lapack_int* lda, double* taua, double* b, lapack_int* ldb,\n                    double* taub, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cggrqf( lapack_int* m, lapack_int* p, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* taua, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* taub,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zggrqf( lapack_int* m, lapack_int* p, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* taua, lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* taub,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sgebrd( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* d, float* e, float* tauq, float* taup, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgebrd( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* d, double* e, double* tauq, double* taup,\n                    double* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgebrd( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, float* d, float* e,\n                    lapack_complex_float* tauq, lapack_complex_float* taup,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zgebrd( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, double* d, double* e,\n                    lapack_complex_double* tauq, lapack_complex_double* taup,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,\n                    lapack_int* kl, lapack_int* ku, float* ab, lapack_int* ldab,\n                    float* d, float* e, float* q, lapack_int* ldq, float* pt,\n                    lapack_int* ldpt, float* c, lapack_int* ldc, float* work,\n                    lapack_int *info );\nvoid LAPACK_dgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,\n                    lapack_int* kl, lapack_int* ku, double* ab,\n                    lapack_int* ldab, double* d, double* e, double* q,\n                    lapack_int* ldq, double* pt, lapack_int* ldpt, double* c,\n                    lapack_int* ldc, double* work, lapack_int *info );\nvoid LAPACK_cgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,\n                    lapack_int* kl, lapack_int* ku, lapack_complex_float* ab,\n                    lapack_int* ldab, float* d, float* e,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* pt, lapack_int* ldpt,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,\n                    lapack_int* kl, lapack_int* ku, lapack_complex_double* ab,\n                    lapack_int* ldab, double* d, double* e,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* pt, lapack_int* ldpt,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_sorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,\n                    float* a, lapack_int* lda, const float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,\n                    double* a, lapack_int* lda, const double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sormbr( char* vect, char* side, char* trans, lapack_int* m,\n                    lapack_int* n, lapack_int* k, const float* a,\n                    lapack_int* lda, const float* tau, float* c,\n                    lapack_int* ldc, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dormbr( char* vect, char* side, char* trans, lapack_int* m,\n                    lapack_int* n, lapack_int* k, const double* a,\n                    lapack_int* lda, const double* tau, double* c,\n                    lapack_int* ldc, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunmbr( char* vect, char* side, char* trans, lapack_int* m,\n                    lapack_int* n, lapack_int* k, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmbr( char* vect, char* side, char* trans, lapack_int* m,\n                    lapack_int* n, lapack_int* k,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau, lapack_complex_double* c,\n                    lapack_int* ldc, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,\n                    lapack_int* nru, lapack_int* ncc, float* d, float* e,\n                    float* vt, lapack_int* ldvt, float* u, lapack_int* ldu,\n                    float* c, lapack_int* ldc, float* work, lapack_int *info );\nvoid LAPACK_dbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,\n                    lapack_int* nru, lapack_int* ncc, double* d, double* e,\n                    double* vt, lapack_int* ldvt, double* u, lapack_int* ldu,\n                    double* c, lapack_int* ldc, double* work,\n                    lapack_int *info );\nvoid LAPACK_cbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,\n                    lapack_int* nru, lapack_int* ncc, float* d, float* e,\n                    lapack_complex_float* vt, lapack_int* ldvt,\n                    lapack_complex_float* u, lapack_int* ldu,\n                    lapack_complex_float* c, lapack_int* ldc, float* work,\n                    lapack_int *info );\nvoid LAPACK_zbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,\n                    lapack_int* nru, lapack_int* ncc, double* d, double* e,\n                    lapack_complex_double* vt, lapack_int* ldvt,\n                    lapack_complex_double* u, lapack_int* ldu,\n                    lapack_complex_double* c, lapack_int* ldc, double* work,\n                    lapack_int *info );\nvoid LAPACK_sbdsdc( char* uplo, char* compq, lapack_int* n, float* d, float* e,\n                    float* u, lapack_int* ldu, float* vt, lapack_int* ldvt,\n                    float* q, lapack_int* iq, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dbdsdc( char* uplo, char* compq, lapack_int* n, double* d,\n                    double* e, double* u, lapack_int* ldu, double* vt,\n                    lapack_int* ldvt, double* q, lapack_int* iq, double* work,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ssytrd( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    float* d, float* e, float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dsytrd( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    double* d, double* e, double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sorgtr( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    const float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dorgtr( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    const double* tau, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_sormtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const float* a, lapack_int* lda,\n                    const float* tau, float* c, lapack_int* ldc, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dormtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const double* a, lapack_int* lda,\n                    const double* tau, double* c, lapack_int* ldc, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_chetrd( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, float* d, float* e,\n                    lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zhetrd( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, double* d, double* e,\n                    lapack_complex_double* tau, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cungtr( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zungtr( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunmtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_zunmtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* tau,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_ssptrd( char* uplo, lapack_int* n, float* ap, float* d, float* e,\n                    float* tau, lapack_int *info );\nvoid LAPACK_dsptrd( char* uplo, lapack_int* n, double* ap, double* d, double* e,\n                    double* tau, lapack_int *info );\nvoid LAPACK_sopgtr( char* uplo, lapack_int* n, const float* ap,\n                    const float* tau, float* q, lapack_int* ldq, float* work,\n                    lapack_int *info );\nvoid LAPACK_dopgtr( char* uplo, lapack_int* n, const double* ap,\n                    const double* tau, double* q, lapack_int* ldq, double* work,\n                    lapack_int *info );\nvoid LAPACK_sopmtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const float* ap, const float* tau, float* c,\n                    lapack_int* ldc, float* work, lapack_int *info );\nvoid LAPACK_dopmtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const double* ap, const double* tau,\n                    double* c, lapack_int* ldc, double* work,\n                    lapack_int *info );\nvoid LAPACK_chptrd( char* uplo, lapack_int* n, lapack_complex_float* ap,\n                    float* d, float* e, lapack_complex_float* tau,\n                    lapack_int *info );\nvoid LAPACK_zhptrd( char* uplo, lapack_int* n, lapack_complex_double* ap,\n                    double* d, double* e, lapack_complex_double* tau,\n                    lapack_int *info );\nvoid LAPACK_cupgtr( char* uplo, lapack_int* n, const lapack_complex_float* ap,\n                    const lapack_complex_float* tau, lapack_complex_float* q,\n                    lapack_int* ldq, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zupgtr( char* uplo, lapack_int* n, const lapack_complex_double* ap,\n                    const lapack_complex_double* tau, lapack_complex_double* q,\n                    lapack_int* ldq, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_cupmtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const lapack_complex_float* ap,\n                    const lapack_complex_float* tau, lapack_complex_float* c,\n                    lapack_int* ldc, lapack_complex_float* work,\n                    lapack_int *info );\nvoid LAPACK_zupmtr( char* side, char* uplo, char* trans, lapack_int* m,\n                    lapack_int* n, const lapack_complex_double* ap,\n                    const lapack_complex_double* tau, lapack_complex_double* c,\n                    lapack_int* ldc, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_ssbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,\n                    float* ab, lapack_int* ldab, float* d, float* e, float* q,\n                    lapack_int* ldq, float* work, lapack_int *info );\nvoid LAPACK_dsbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,\n                    double* ab, lapack_int* ldab, double* d, double* e,\n                    double* q, lapack_int* ldq, double* work,\n                    lapack_int *info );\nvoid LAPACK_chbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_complex_float* ab, lapack_int* ldab, float* d,\n                    float* e, lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zhbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_complex_double* ab, lapack_int* ldab, double* d,\n                    double* e, lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_ssterf( lapack_int* n, float* d, float* e, lapack_int *info );\nvoid LAPACK_dsterf( lapack_int* n, double* d, double* e, lapack_int *info );\nvoid LAPACK_ssteqr( char* compz, lapack_int* n, float* d, float* e, float* z,\n                    lapack_int* ldz, float* work, lapack_int *info );\nvoid LAPACK_dsteqr( char* compz, lapack_int* n, double* d, double* e, double* z,\n                    lapack_int* ldz, double* work, lapack_int *info );\nvoid LAPACK_csteqr( char* compz, lapack_int* n, float* d, float* e,\n                    lapack_complex_float* z, lapack_int* ldz, float* work,\n                    lapack_int *info );\nvoid LAPACK_zsteqr( char* compz, lapack_int* n, double* d, double* e,\n                    lapack_complex_double* z, lapack_int* ldz, double* work,\n                    lapack_int *info );\nvoid LAPACK_sstemr( char* jobz, char* range, lapack_int* n, float* d, float* e,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    lapack_int* m, float* w, float* z, lapack_int* ldz,\n                    lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dstemr( char* jobz, char* range, lapack_int* n, double* d,\n                    double* e, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, lapack_int* m, double* w, double* z,\n                    lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz,\n                    lapack_logical* tryrac, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_cstemr( char* jobz, char* range, lapack_int* n, float* d, float* e,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz,\n                    lapack_logical* tryrac, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_zstemr( char* jobz, char* range, lapack_int* n, double* d,\n                    double* e, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, lapack_int* m, double* w,\n                    lapack_complex_double* z, lapack_int* ldz, lapack_int* nzc,\n                    lapack_int* isuppz, lapack_logical* tryrac, double* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_sstedc( char* compz, lapack_int* n, float* d, float* e, float* z,\n                    lapack_int* ldz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dstedc( char* compz, lapack_int* n, double* d, double* e, double* z,\n                    lapack_int* ldz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_cstedc( char* compz, lapack_int* n, float* d, float* e,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zstedc( char* compz, lapack_int* n, double* d, double* e,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_sstegr( char* jobz, char* range, lapack_int* n, float* d, float* e,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    float* abstol, lapack_int* m, float* w, float* z,\n                    lapack_int* ldz, lapack_int* isuppz, float* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_dstegr( char* jobz, char* range, lapack_int* n, double* d,\n                    double* e, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, lapack_int* isuppz,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_cstegr( char* jobz, char* range, lapack_int* n, float* d, float* e,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    float* abstol, lapack_int* m, float* w,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_int* isuppz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_zstegr( char* jobz, char* range, lapack_int* n, double* d,\n                    double* e, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_int* isuppz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_spteqr( char* compz, lapack_int* n, float* d, float* e, float* z,\n                    lapack_int* ldz, float* work, lapack_int *info );\nvoid LAPACK_dpteqr( char* compz, lapack_int* n, double* d, double* e, double* z,\n                    lapack_int* ldz, double* work, lapack_int *info );\nvoid LAPACK_cpteqr( char* compz, lapack_int* n, float* d, float* e,\n                    lapack_complex_float* z, lapack_int* ldz, float* work,\n                    lapack_int *info );\nvoid LAPACK_zpteqr( char* compz, lapack_int* n, double* d, double* e,\n                    lapack_complex_double* z, lapack_int* ldz, double* work,\n                    lapack_int *info );\nvoid LAPACK_sstebz( char* range, char* order, lapack_int* n, float* vl,\n                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,\n                    const float* d, const float* e, lapack_int* m,\n                    lapack_int* nsplit, float* w, lapack_int* iblock,\n                    lapack_int* isplit, float* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dstebz( char* range, char* order, lapack_int* n, double* vl,\n                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,\n                    const double* d, const double* e, lapack_int* m,\n                    lapack_int* nsplit, double* w, lapack_int* iblock,\n                    lapack_int* isplit, double* work, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_sstein( lapack_int* n, const float* d, const float* e,\n                    lapack_int* m, const float* w, const lapack_int* iblock,\n                    const lapack_int* isplit, float* z, lapack_int* ldz,\n                    float* work, lapack_int* iwork, lapack_int* ifailv,\n                    lapack_int *info );\nvoid LAPACK_dstein( lapack_int* n, const double* d, const double* e,\n                    lapack_int* m, const double* w, const lapack_int* iblock,\n                    const lapack_int* isplit, double* z, lapack_int* ldz,\n                    double* work, lapack_int* iwork, lapack_int* ifailv,\n                    lapack_int *info );\nvoid LAPACK_cstein( lapack_int* n, const float* d, const float* e,\n                    lapack_int* m, const float* w, const lapack_int* iblock,\n                    const lapack_int* isplit, lapack_complex_float* z,\n                    lapack_int* ldz, float* work, lapack_int* iwork,\n                    lapack_int* ifailv, lapack_int *info );\nvoid LAPACK_zstein( lapack_int* n, const double* d, const double* e,\n                    lapack_int* m, const double* w, const lapack_int* iblock,\n                    const lapack_int* isplit, lapack_complex_double* z,\n                    lapack_int* ldz, double* work, lapack_int* iwork,\n                    lapack_int* ifailv, lapack_int *info );\nvoid LAPACK_sdisna( char* job, lapack_int* m, lapack_int* n, const float* d,\n                    float* sep, lapack_int *info );\nvoid LAPACK_ddisna( char* job, lapack_int* m, lapack_int* n, const double* d,\n                    double* sep, lapack_int *info );\nvoid LAPACK_ssygst( lapack_int* itype, char* uplo, lapack_int* n, float* a,\n                    lapack_int* lda, const float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_dsygst( lapack_int* itype, char* uplo, lapack_int* n, double* a,\n                    lapack_int* lda, const double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_chegst( lapack_int* itype, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_zhegst( lapack_int* itype, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int *info );\nvoid LAPACK_sspgst( lapack_int* itype, char* uplo, lapack_int* n, float* ap,\n                    const float* bp, lapack_int *info );\nvoid LAPACK_dspgst( lapack_int* itype, char* uplo, lapack_int* n, double* ap,\n                    const double* bp, lapack_int *info );\nvoid LAPACK_chpgst( lapack_int* itype, char* uplo, lapack_int* n,\n                    lapack_complex_float* ap, const lapack_complex_float* bp,\n                    lapack_int *info );\nvoid LAPACK_zhpgst( lapack_int* itype, char* uplo, lapack_int* n,\n                    lapack_complex_double* ap, const lapack_complex_double* bp,\n                    lapack_int *info );\nvoid LAPACK_ssbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, float* ab, lapack_int* ldab,\n                    const float* bb, lapack_int* ldbb, float* x,\n                    lapack_int* ldx, float* work, lapack_int *info );\nvoid LAPACK_dsbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, double* ab, lapack_int* ldab,\n                    const double* bb, lapack_int* ldbb, double* x,\n                    lapack_int* ldx, double* work, lapack_int *info );\nvoid LAPACK_chbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,\n                    const lapack_complex_float* bb, lapack_int* ldbb,\n                    lapack_complex_float* x, lapack_int* ldx,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zhbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,\n                    const lapack_complex_double* bb, lapack_int* ldbb,\n                    lapack_complex_double* x, lapack_int* ldx,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_spbstf( char* uplo, lapack_int* n, lapack_int* kb, float* bb,\n                    lapack_int* ldbb, lapack_int *info );\nvoid LAPACK_dpbstf( char* uplo, lapack_int* n, lapack_int* kb, double* bb,\n                    lapack_int* ldbb, lapack_int *info );\nvoid LAPACK_cpbstf( char* uplo, lapack_int* n, lapack_int* kb,\n                    lapack_complex_float* bb, lapack_int* ldbb,\n                    lapack_int *info );\nvoid LAPACK_zpbstf( char* uplo, lapack_int* n, lapack_int* kb,\n                    lapack_complex_double* bb, lapack_int* ldbb,\n                    lapack_int *info );\nvoid LAPACK_sgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a,\n                    lapack_int* lda, float* tau, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a,\n                    lapack_int* lda, double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* tau, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a,\n                    lapack_int* lda, const float* tau, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a,\n                    lapack_int* lda, const double* tau, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sormhr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi, const float* a,\n                    lapack_int* lda, const float* tau, float* c,\n                    lapack_int* ldc, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dormhr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi, const double* a,\n                    lapack_int* lda, const double* tau, double* c,\n                    lapack_int* ldc, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi,\n                    lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi,\n                    lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cunmhr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* tau, lapack_complex_float* c,\n                    lapack_int* ldc, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zunmhr( char* side, char* trans, lapack_int* m, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* tau, lapack_complex_double* c,\n                    lapack_int* ldc, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sgebal( char* job, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* ilo, lapack_int* ihi, float* scale,\n                    lapack_int *info );\nvoid LAPACK_dgebal( char* job, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* ilo, lapack_int* ihi, double* scale,\n                    lapack_int *info );\nvoid LAPACK_cgebal( char* job, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* ilo, lapack_int* ihi,\n                    float* scale, lapack_int *info );\nvoid LAPACK_zgebal( char* job, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* ilo, lapack_int* ihi,\n                    double* scale, lapack_int *info );\nvoid LAPACK_sgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const float* scale, lapack_int* m,\n                    float* v, lapack_int* ldv, lapack_int *info );\nvoid LAPACK_dgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const double* scale, lapack_int* m,\n                    double* v, lapack_int* ldv, lapack_int *info );\nvoid LAPACK_cgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const float* scale, lapack_int* m,\n                    lapack_complex_float* v, lapack_int* ldv,\n                    lapack_int *info );\nvoid LAPACK_zgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const double* scale, lapack_int* m,\n                    lapack_complex_double* v, lapack_int* ldv,\n                    lapack_int *info );\nvoid LAPACK_shseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, float* h, lapack_int* ldh, float* wr,\n                    float* wi, float* z, lapack_int* ldz, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, double* h, lapack_int* ldh, double* wr,\n                    double* wi, double* z, lapack_int* ldz, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_chseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, lapack_complex_float* h, lapack_int* ldh,\n                    lapack_complex_float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, lapack_complex_double* h, lapack_int* ldh,\n                    lapack_complex_double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_shsein( char* job, char* eigsrc, char* initv,\n                    lapack_logical* select, lapack_int* n, const float* h,\n                    lapack_int* ldh, float* wr, const float* wi, float* vl,\n                    lapack_int* ldvl, float* vr, lapack_int* ldvr,\n                    lapack_int* mm, lapack_int* m, float* work,\n                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );\nvoid LAPACK_dhsein( char* job, char* eigsrc, char* initv,\n                    lapack_logical* select, lapack_int* n, const double* h,\n                    lapack_int* ldh, double* wr, const double* wi, double* vl,\n                    lapack_int* ldvl, double* vr, lapack_int* ldvr,\n                    lapack_int* mm, lapack_int* m, double* work,\n                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );\nvoid LAPACK_chsein( char* job, char* eigsrc, char* initv,\n                    const lapack_logical* select, lapack_int* n,\n                    const lapack_complex_float* h, lapack_int* ldh,\n                    lapack_complex_float* w, lapack_complex_float* vl,\n                    lapack_int* ldvl, lapack_complex_float* vr,\n                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,\n                    lapack_complex_float* work, float* rwork,\n                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );\nvoid LAPACK_zhsein( char* job, char* eigsrc, char* initv,\n                    const lapack_logical* select, lapack_int* n,\n                    const lapack_complex_double* h, lapack_int* ldh,\n                    lapack_complex_double* w, lapack_complex_double* vl,\n                    lapack_int* ldvl, lapack_complex_double* vr,\n                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );\nvoid LAPACK_strevc( char* side, char* howmny, lapack_logical* select,\n                    lapack_int* n, const float* t, lapack_int* ldt, float* vl,\n                    lapack_int* ldvl, float* vr, lapack_int* ldvr,\n                    lapack_int* mm, lapack_int* m, float* work,\n                    lapack_int *info );\nvoid LAPACK_dtrevc( char* side, char* howmny, lapack_logical* select,\n                    lapack_int* n, const double* t, lapack_int* ldt, double* vl,\n                    lapack_int* ldvl, double* vr, lapack_int* ldvr,\n                    lapack_int* mm, lapack_int* m, double* work,\n                    lapack_int *info );\nvoid LAPACK_ctrevc( char* side, char* howmny, const lapack_logical* select,\n                    lapack_int* n, lapack_complex_float* t, lapack_int* ldt,\n                    lapack_complex_float* vl, lapack_int* ldvl,\n                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm,\n                    lapack_int* m, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztrevc( char* side, char* howmny, const lapack_logical* select,\n                    lapack_int* n, lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* vl, lapack_int* ldvl,\n                    lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm,\n                    lapack_int* m, lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_strsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const float* t, lapack_int* ldt,\n                    const float* vl, lapack_int* ldvl, const float* vr,\n                    lapack_int* ldvr, float* s, float* sep, lapack_int* mm,\n                    lapack_int* m, float* work, lapack_int* ldwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dtrsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const double* t, lapack_int* ldt,\n                    const double* vl, lapack_int* ldvl, const double* vr,\n                    lapack_int* ldvr, double* s, double* sep, lapack_int* mm,\n                    lapack_int* m, double* work, lapack_int* ldwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ctrsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const lapack_complex_float* t,\n                    lapack_int* ldt, const lapack_complex_float* vl,\n                    lapack_int* ldvl, const lapack_complex_float* vr,\n                    lapack_int* ldvr, float* s, float* sep, lapack_int* mm,\n                    lapack_int* m, lapack_complex_float* work,\n                    lapack_int* ldwork, float* rwork, lapack_int *info );\nvoid LAPACK_ztrsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const lapack_complex_double* t,\n                    lapack_int* ldt, const lapack_complex_double* vl,\n                    lapack_int* ldvl, const lapack_complex_double* vr,\n                    lapack_int* ldvr, double* s, double* sep, lapack_int* mm,\n                    lapack_int* m, lapack_complex_double* work,\n                    lapack_int* ldwork, double* rwork, lapack_int *info );\nvoid LAPACK_strexc( char* compq, lapack_int* n, float* t, lapack_int* ldt,\n                    float* q, lapack_int* ldq, lapack_int* ifst,\n                    lapack_int* ilst, float* work, lapack_int *info );\nvoid LAPACK_dtrexc( char* compq, lapack_int* n, double* t, lapack_int* ldt,\n                    double* q, lapack_int* ldq, lapack_int* ifst,\n                    lapack_int* ilst, double* work, lapack_int *info );\nvoid LAPACK_ctrexc( char* compq, lapack_int* n, lapack_complex_float* t,\n                    lapack_int* ldt, lapack_complex_float* q, lapack_int* ldq,\n                    lapack_int* ifst, lapack_int* ilst, lapack_int *info );\nvoid LAPACK_ztrexc( char* compq, lapack_int* n, lapack_complex_double* t,\n                    lapack_int* ldt, lapack_complex_double* q, lapack_int* ldq,\n                    lapack_int* ifst, lapack_int* ilst, lapack_int *info );\nvoid LAPACK_strsen( char* job, char* compq, const lapack_logical* select,\n                    lapack_int* n, float* t, lapack_int* ldt, float* q,\n                    lapack_int* ldq, float* wr, float* wi, lapack_int* m,\n                    float* s, float* sep, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dtrsen( char* job, char* compq, const lapack_logical* select,\n                    lapack_int* n, double* t, lapack_int* ldt, double* q,\n                    lapack_int* ldq, double* wr, double* wi, lapack_int* m,\n                    double* s, double* sep, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ctrsen( char* job, char* compq, const lapack_logical* select,\n                    lapack_int* n, lapack_complex_float* t, lapack_int* ldt,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* w, lapack_int* m, float* s,\n                    float* sep, lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_ztrsen( char* job, char* compq, const lapack_logical* select,\n                    lapack_int* n, lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* w, lapack_int* m, double* s,\n                    double* sep, lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_strsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,\n                    lapack_int* n, const float* a, lapack_int* lda,\n                    const float* b, lapack_int* ldb, float* c, lapack_int* ldc,\n                    float* scale, lapack_int *info );\nvoid LAPACK_dtrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,\n                    lapack_int* n, const double* a, lapack_int* lda,\n                    const double* b, lapack_int* ldb, double* c,\n                    lapack_int* ldc, double* scale, lapack_int *info );\nvoid LAPACK_ctrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,\n                    lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* b,\n                    lapack_int* ldb, lapack_complex_float* c, lapack_int* ldc,\n                    float* scale, lapack_int *info );\nvoid LAPACK_ztrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,\n                    lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* b,\n                    lapack_int* ldb, lapack_complex_double* c, lapack_int* ldc,\n                    double* scale, lapack_int *info );\nvoid LAPACK_sgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, float* a, lapack_int* lda, float* b,\n                    lapack_int* ldb, float* q, lapack_int* ldq, float* z,\n                    lapack_int* ldz, lapack_int *info );\nvoid LAPACK_dgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, double* a, lapack_int* lda, double* b,\n                    lapack_int* ldb, double* q, lapack_int* ldq, double* z,\n                    lapack_int* ldz, lapack_int *info );\nvoid LAPACK_cgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_int *info );\nvoid LAPACK_zgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_int *info );\nvoid LAPACK_sggbal( char* job, lapack_int* n, float* a, lapack_int* lda,\n                    float* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi,\n                    float* lscale, float* rscale, float* work,\n                    lapack_int *info );\nvoid LAPACK_dggbal( char* job, lapack_int* n, double* a, lapack_int* lda,\n                    double* b, lapack_int* ldb, lapack_int* ilo,\n                    lapack_int* ihi, double* lscale, double* rscale,\n                    double* work, lapack_int *info );\nvoid LAPACK_cggbal( char* job, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,\n                    lapack_int* ilo, lapack_int* ihi, float* lscale,\n                    float* rscale, float* work, lapack_int *info );\nvoid LAPACK_zggbal( char* job, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,\n                    lapack_int* ilo, lapack_int* ihi, double* lscale,\n                    double* rscale, double* work, lapack_int *info );\nvoid LAPACK_sggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const float* lscale, const float* rscale,\n                    lapack_int* m, float* v, lapack_int* ldv,\n                    lapack_int *info );\nvoid LAPACK_dggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const double* lscale, const double* rscale,\n                    lapack_int* m, double* v, lapack_int* ldv,\n                    lapack_int *info );\nvoid LAPACK_cggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const float* lscale, const float* rscale,\n                    lapack_int* m, lapack_complex_float* v, lapack_int* ldv,\n                    lapack_int *info );\nvoid LAPACK_zggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,\n                    lapack_int* ihi, const double* lscale, const double* rscale,\n                    lapack_int* m, lapack_complex_double* v, lapack_int* ldv,\n                    lapack_int *info );\nvoid LAPACK_shgeqz( char* job, char* compq, char* compz, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi, float* h, lapack_int* ldh,\n                    float* t, lapack_int* ldt, float* alphar, float* alphai,\n                    float* beta, float* q, lapack_int* ldq, float* z,\n                    lapack_int* ldz, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dhgeqz( char* job, char* compq, char* compz, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi, double* h,\n                    lapack_int* ldh, double* t, lapack_int* ldt, double* alphar,\n                    double* alphai, double* beta, double* q, lapack_int* ldq,\n                    double* z, lapack_int* ldz, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_chgeqz( char* job, char* compq, char* compz, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi, lapack_complex_float* h,\n                    lapack_int* ldh, lapack_complex_float* t, lapack_int* ldt,\n                    lapack_complex_float* alpha, lapack_complex_float* beta,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zhgeqz( char* job, char* compq, char* compz, lapack_int* n,\n                    lapack_int* ilo, lapack_int* ihi, lapack_complex_double* h,\n                    lapack_int* ldh, lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* alpha, lapack_complex_double* beta,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_stgevc( char* side, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const float* s, lapack_int* lds,\n                    const float* p, lapack_int* ldp, float* vl,\n                    lapack_int* ldvl, float* vr, lapack_int* ldvr,\n                    lapack_int* mm, lapack_int* m, float* work,\n                    lapack_int *info );\nvoid LAPACK_dtgevc( char* side, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const double* s, lapack_int* lds,\n                    const double* p, lapack_int* ldp, double* vl,\n                    lapack_int* ldvl, double* vr, lapack_int* ldvr,\n                    lapack_int* mm, lapack_int* m, double* work,\n                    lapack_int *info );\nvoid LAPACK_ctgevc( char* side, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const lapack_complex_float* s,\n                    lapack_int* lds, const lapack_complex_float* p,\n                    lapack_int* ldp, lapack_complex_float* vl, lapack_int* ldvl,\n                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm,\n                    lapack_int* m, lapack_complex_float* work, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_ztgevc( char* side, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const lapack_complex_double* s,\n                    lapack_int* lds, const lapack_complex_double* p,\n                    lapack_int* ldp, lapack_complex_double* vl,\n                    lapack_int* ldvl, lapack_complex_double* vr,\n                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int *info );\nvoid LAPACK_stgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,\n                    float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                    float* q, lapack_int* ldq, float* z, lapack_int* ldz,\n                    lapack_int* ifst, lapack_int* ilst, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dtgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,\n                    double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                    double* q, lapack_int* ldq, double* z, lapack_int* ldz,\n                    lapack_int* ifst, lapack_int* ilst, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_ctgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* z, lapack_int* ldz, lapack_int* ifst,\n                    lapack_int* ilst, lapack_int *info );\nvoid LAPACK_ztgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* z, lapack_int* ldz, lapack_int* ifst,\n                    lapack_int* ilst, lapack_int *info );\nvoid LAPACK_stgsen( lapack_int* ijob, lapack_logical* wantq,\n                    lapack_logical* wantz, const lapack_logical* select,\n                    lapack_int* n, float* a, lapack_int* lda, float* b,\n                    lapack_int* ldb, float* alphar, float* alphai, float* beta,\n                    float* q, lapack_int* ldq, float* z, lapack_int* ldz,\n                    lapack_int* m, float* pl, float* pr, float* dif,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dtgsen( lapack_int* ijob, lapack_logical* wantq,\n                    lapack_logical* wantz, const lapack_logical* select,\n                    lapack_int* n, double* a, lapack_int* lda, double* b,\n                    lapack_int* ldb, double* alphar, double* alphai,\n                    double* beta, double* q, lapack_int* ldq, double* z,\n                    lapack_int* ldz, lapack_int* m, double* pl, double* pr,\n                    double* dif, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ctgsen( lapack_int* ijob, lapack_logical* wantq,\n                    lapack_logical* wantz, const lapack_logical* select,\n                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* alpha, lapack_complex_float* beta,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* z, lapack_int* ldz, lapack_int* m,\n                    float* pl, float* pr, float* dif,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ztgsen( lapack_int* ijob, lapack_logical* wantq,\n                    lapack_logical* wantz, const lapack_logical* select,\n                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* alpha, lapack_complex_double* beta,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* z, lapack_int* ldz, lapack_int* m,\n                    double* pl, double* pr, double* dif,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_stgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,\n                    const float* a, lapack_int* lda, const float* b,\n                    lapack_int* ldb, float* c, lapack_int* ldc, const float* d,\n                    lapack_int* ldd, const float* e, lapack_int* lde, float* f,\n                    lapack_int* ldf, float* scale, float* dif, float* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dtgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,\n                    const double* a, lapack_int* lda, const double* b,\n                    lapack_int* ldb, double* c, lapack_int* ldc,\n                    const double* d, lapack_int* ldd, const double* e,\n                    lapack_int* lde, double* f, lapack_int* ldf, double* scale,\n                    double* dif, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ctgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    const lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    const lapack_complex_float* d, lapack_int* ldd,\n                    const lapack_complex_float* e, lapack_int* lde,\n                    lapack_complex_float* f, lapack_int* ldf, float* scale,\n                    float* dif, lapack_complex_float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ztgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    const lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    const lapack_complex_double* d, lapack_int* ldd,\n                    const lapack_complex_double* e, lapack_int* lde,\n                    lapack_complex_double* f, lapack_int* ldf, double* scale,\n                    double* dif, lapack_complex_double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_stgsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const float* a, lapack_int* lda,\n                    const float* b, lapack_int* ldb, const float* vl,\n                    lapack_int* ldvl, const float* vr, lapack_int* ldvr,\n                    float* s, float* dif, lapack_int* mm, lapack_int* m,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dtgsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const double* a, lapack_int* lda,\n                    const double* b, lapack_int* ldb, const double* vl,\n                    lapack_int* ldvl, const double* vr, lapack_int* ldvr,\n                    double* s, double* dif, lapack_int* mm, lapack_int* m,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_ctgsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, const lapack_complex_float* b,\n                    lapack_int* ldb, const lapack_complex_float* vl,\n                    lapack_int* ldvl, const lapack_complex_float* vr,\n                    lapack_int* ldvr, float* s, float* dif, lapack_int* mm,\n                    lapack_int* m, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ztgsna( char* job, char* howmny, const lapack_logical* select,\n                    lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, const lapack_complex_double* b,\n                    lapack_int* ldb, const lapack_complex_double* vl,\n                    lapack_int* ldvl, const lapack_complex_double* vr,\n                    lapack_int* ldvr, double* s, double* dif, lapack_int* mm,\n                    lapack_int* m, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, float* a, lapack_int* lda,\n                    float* b, lapack_int* ldb, float* tola, float* tolb,\n                    lapack_int* k, lapack_int* l, float* u, lapack_int* ldu,\n                    float* v, lapack_int* ldv, float* q, lapack_int* ldq,\n                    lapack_int* iwork, float* tau, float* work,\n                    lapack_int *info );\nvoid LAPACK_dggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, double* a, lapack_int* lda,\n                    double* b, lapack_int* ldb, double* tola, double* tolb,\n                    lapack_int* k, lapack_int* l, double* u, lapack_int* ldu,\n                    double* v, lapack_int* ldv, double* q, lapack_int* ldq,\n                    lapack_int* iwork, double* tau, double* work,\n                    lapack_int *info );\nvoid LAPACK_cggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,\n                    float* tola, float* tolb, lapack_int* k, lapack_int* l,\n                    lapack_complex_float* u, lapack_int* ldu,\n                    lapack_complex_float* v, lapack_int* ldv,\n                    lapack_complex_float* q, lapack_int* ldq, lapack_int* iwork,\n                    float* rwork, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,\n                    double* tola, double* tolb, lapack_int* k, lapack_int* l,\n                    lapack_complex_double* u, lapack_int* ldu,\n                    lapack_complex_double* v, lapack_int* ldv,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_int* iwork, double* rwork,\n                    lapack_complex_double* tau, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_stgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,\n                    float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                    float* tola, float* tolb, float* alpha, float* beta,\n                    float* u, lapack_int* ldu, float* v, lapack_int* ldv,\n                    float* q, lapack_int* ldq, float* work, lapack_int* ncycle,\n                    lapack_int *info );\nvoid LAPACK_dtgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,\n                    double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                    double* tola, double* tolb, double* alpha, double* beta,\n                    double* u, lapack_int* ldu, double* v, lapack_int* ldv,\n                    double* q, lapack_int* ldq, double* work,\n                    lapack_int* ncycle, lapack_int *info );\nvoid LAPACK_ctgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, float* tola,\n                    float* tolb, float* alpha, float* beta,\n                    lapack_complex_float* u, lapack_int* ldu,\n                    lapack_complex_float* v, lapack_int* ldv,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* work, lapack_int* ncycle,\n                    lapack_int *info );\nvoid LAPACK_ztgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, double* tola,\n                    double* tolb, double* alpha, double* beta,\n                    lapack_complex_double* u, lapack_int* ldu,\n                    lapack_complex_double* v, lapack_int* ldv,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* work, lapack_int* ncycle,\n                    lapack_int *info );\nvoid LAPACK_sgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                   float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                   float* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                   double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                   double* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_float* a, lapack_int* lda,\n                   lapack_complex_float* b, lapack_int* ldb,\n                   lapack_complex_float* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_zgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                   lapack_complex_double* a, lapack_int* lda,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_sgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb,\n                    lapack_int* jpvt, float* rcond, lapack_int* rank,\n                    float* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb,\n                    lapack_int* jpvt, double* rcond, lapack_int* rank,\n                    double* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, lapack_int* jpvt,\n                    float* rcond, lapack_int* rank, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int *info );\nvoid LAPACK_zgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, lapack_int* jpvt,\n                    double* rcond, lapack_int* rank,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb, float* s,\n                    float* rcond, lapack_int* rank, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb, double* s,\n                    double* rcond, lapack_int* rank, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, float* s,\n                    float* rcond, lapack_int* rank, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int *info );\nvoid LAPACK_zgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, double* s,\n                    double* rcond, lapack_int* rank,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb, float* s,\n                    float* rcond, lapack_int* rank, float* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb, double* s,\n                    double* rcond, lapack_int* rank, double* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, float* s,\n                    float* rcond, lapack_int* rank, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_zgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, double* s,\n                    double* rcond, lapack_int* rank,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sgglse( lapack_int* m, lapack_int* n, lapack_int* p, float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb, float* c,\n                    float* d, float* x, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dgglse( lapack_int* m, lapack_int* n, lapack_int* p, double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb, double* c,\n                    double* d, double* x, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cgglse( lapack_int* m, lapack_int* n, lapack_int* p,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* c, lapack_complex_float* d,\n                    lapack_complex_float* x, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zgglse( lapack_int* m, lapack_int* n, lapack_int* p,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* c, lapack_complex_double* d,\n                    lapack_complex_double* x, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sggglm( lapack_int* n, lapack_int* m, lapack_int* p, float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb, float* d,\n                    float* x, float* y, float* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_dggglm( lapack_int* n, lapack_int* m, lapack_int* p, double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb, double* d,\n                    double* x, double* y, double* work, lapack_int* lwork,\n                    lapack_int *info );\nvoid LAPACK_cggglm( lapack_int* n, lapack_int* m, lapack_int* p,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* d, lapack_complex_float* x,\n                    lapack_complex_float* y, lapack_complex_float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_zggglm( lapack_int* n, lapack_int* m, lapack_int* p,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* d, lapack_complex_double* x,\n                    lapack_complex_double* y, lapack_complex_double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_ssyev( char* jobz, char* uplo, lapack_int* n, float* a,\n                   lapack_int* lda, float* w, float* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_dsyev( char* jobz, char* uplo, lapack_int* n, double* a,\n                   lapack_int* lda, double* w, double* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_cheev( char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_float* a, lapack_int* lda, float* w,\n                   lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                   lapack_int *info );\nvoid LAPACK_zheev( char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_double* a, lapack_int* lda, double* w,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   double* rwork, lapack_int *info );\nvoid LAPACK_ssyevd( char* jobz, char* uplo, lapack_int* n, float* a,\n                    lapack_int* lda, float* w, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dsyevd( char* jobz, char* uplo, lapack_int* n, double* a,\n                    lapack_int* lda, double* w, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_cheevd( char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda, float* w,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zheevd( char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda, double* w,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ssyevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    float* a, lapack_int* lda, float* vl, float* vu,\n                    lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, float* z, lapack_int* ldz,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_dsyevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    double* a, lapack_int* lda, double* vl, double* vu,\n                    lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, double* z, lapack_int* ldz,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_cheevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda, float* vl,\n                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_zheevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda, double* vl,\n                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_ssyevr( char* jobz, char* range, char* uplo, lapack_int* n,\n                    float* a, lapack_int* lda, float* vl, float* vu,\n                    lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, float* z, lapack_int* ldz,\n                    lapack_int* isuppz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dsyevr( char* jobz, char* range, char* uplo, lapack_int* n,\n                    double* a, lapack_int* lda, double* vl, double* vu,\n                    lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, double* z, lapack_int* ldz,\n                    lapack_int* isuppz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_cheevr( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda, float* vl,\n                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_int* isuppz,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zheevr( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda, double* vl,\n                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_int* isuppz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_sspev( char* jobz, char* uplo, lapack_int* n, float* ap, float* w,\n                   float* z, lapack_int* ldz, float* work, lapack_int *info );\nvoid LAPACK_dspev( char* jobz, char* uplo, lapack_int* n, double* ap, double* w,\n                   double* z, lapack_int* ldz, double* work, lapack_int *info );\nvoid LAPACK_chpev( char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_float* ap, float* w, lapack_complex_float* z,\n                   lapack_int* ldz, lapack_complex_float* work, float* rwork,\n                   lapack_int *info );\nvoid LAPACK_zhpev( char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_double* ap, double* w,\n                   lapack_complex_double* z, lapack_int* ldz,\n                   lapack_complex_double* work, double* rwork,\n                   lapack_int *info );\nvoid LAPACK_sspevd( char* jobz, char* uplo, lapack_int* n, float* ap, float* w,\n                    float* z, lapack_int* ldz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dspevd( char* jobz, char* uplo, lapack_int* n, double* ap,\n                    double* w, double* z, lapack_int* ldz, double* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_chpevd( char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_float* ap, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int* lrwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_zhpevd( char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_double* ap, double* w,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_sspevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    float* ap, float* vl, float* vu, lapack_int* il,\n                    lapack_int* iu, float* abstol, lapack_int* m, float* w,\n                    float* z, lapack_int* ldz, float* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_dspevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    double* ap, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_chpevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_complex_float* ap, float* vl, float* vu,\n                    lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work, float* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_zhpevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_complex_double* ap, double* vl, double* vu,\n                    lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_complex_double* work, double* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_ssbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                   float* ab, lapack_int* ldab, float* w, float* z,\n                   lapack_int* ldz, float* work, lapack_int *info );\nvoid LAPACK_dsbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                   double* ab, lapack_int* ldab, double* w, double* z,\n                   lapack_int* ldz, double* work, lapack_int *info );\nvoid LAPACK_chbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                   lapack_complex_float* ab, lapack_int* ldab, float* w,\n                   lapack_complex_float* z, lapack_int* ldz,\n                   lapack_complex_float* work, float* rwork, lapack_int *info );\nvoid LAPACK_zhbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                   lapack_complex_double* ab, lapack_int* ldab, double* w,\n                   lapack_complex_double* z, lapack_int* ldz,\n                   lapack_complex_double* work, double* rwork,\n                   lapack_int *info );\nvoid LAPACK_ssbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                    float* ab, lapack_int* ldab, float* w, float* z,\n                    lapack_int* ldz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dsbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                    double* ab, lapack_int* ldab, double* w, double* z,\n                    lapack_int* ldz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_chbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_complex_float* ab, lapack_int* ldab, float* w,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zhbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,\n                    lapack_complex_double* ab, lapack_int* ldab, double* w,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ssbevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* kd, float* ab, lapack_int* ldab, float* q,\n                    lapack_int* ldq, float* vl, float* vu, lapack_int* il,\n                    lapack_int* iu, float* abstol, lapack_int* m, float* w,\n                    float* z, lapack_int* ldz, float* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_dsbevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* kd, double* ab, lapack_int* ldab, double* q,\n                    lapack_int* ldq, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_chbevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab,\n                    lapack_complex_float* q, lapack_int* ldq, float* vl,\n                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work, float* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_zhbevx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab,\n                    lapack_complex_double* q, lapack_int* ldq, double* vl,\n                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_complex_double* work, double* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_sstev( char* jobz, lapack_int* n, float* d, float* e, float* z,\n                   lapack_int* ldz, float* work, lapack_int *info );\nvoid LAPACK_dstev( char* jobz, lapack_int* n, double* d, double* e, double* z,\n                   lapack_int* ldz, double* work, lapack_int *info );\nvoid LAPACK_sstevd( char* jobz, lapack_int* n, float* d, float* e, float* z,\n                    lapack_int* ldz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dstevd( char* jobz, lapack_int* n, double* d, double* e, double* z,\n                    lapack_int* ldz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_sstevx( char* jobz, char* range, lapack_int* n, float* d, float* e,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    float* abstol, lapack_int* m, float* w, float* z,\n                    lapack_int* ldz, float* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_dstevx( char* jobz, char* range, lapack_int* n, double* d,\n                    double* e, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_sstevr( char* jobz, char* range, lapack_int* n, float* d, float* e,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    float* abstol, lapack_int* m, float* w, float* z,\n                    lapack_int* ldz, lapack_int* isuppz, float* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_dstevr( char* jobz, char* range, lapack_int* n, double* d,\n                    double* e, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, lapack_int* isuppz,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_sgees( char* jobvs, char* sort, LAPACK_S_SELECT2 select,\n                   lapack_int* n, float* a, lapack_int* lda, lapack_int* sdim,\n                   float* wr, float* wi, float* vs, lapack_int* ldvs,\n                   float* work, lapack_int* lwork, lapack_logical* bwork,\n                   lapack_int *info );\nvoid LAPACK_dgees( char* jobvs, char* sort, LAPACK_D_SELECT2 select,\n                   lapack_int* n, double* a, lapack_int* lda, lapack_int* sdim,\n                   double* wr, double* wi, double* vs, lapack_int* ldvs,\n                   double* work, lapack_int* lwork, lapack_logical* bwork,\n                   lapack_int *info );\nvoid LAPACK_cgees( char* jobvs, char* sort, LAPACK_C_SELECT1 select,\n                   lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                   lapack_int* sdim, lapack_complex_float* w,\n                   lapack_complex_float* vs, lapack_int* ldvs,\n                   lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                   lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_zgees( char* jobvs, char* sort, LAPACK_Z_SELECT1 select,\n                   lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                   lapack_int* sdim, lapack_complex_double* w,\n                   lapack_complex_double* vs, lapack_int* ldvs,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   double* rwork, lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_sgeesx( char* jobvs, char* sort, LAPACK_S_SELECT2 select,\n                    char* sense, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* sdim, float* wr, float* wi, float* vs,\n                    lapack_int* ldvs, float* rconde, float* rcondv, float* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_dgeesx( char* jobvs, char* sort, LAPACK_D_SELECT2 select,\n                    char* sense, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* sdim, double* wr, double* wi, double* vs,\n                    lapack_int* ldvs, double* rconde, double* rcondv,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_cgeesx( char* jobvs, char* sort, LAPACK_C_SELECT1 select,\n                    char* sense, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* sdim, lapack_complex_float* w,\n                    lapack_complex_float* vs, lapack_int* ldvs, float* rconde,\n                    float* rcondv, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_zgeesx( char* jobvs, char* sort, LAPACK_Z_SELECT1 select,\n                    char* sense, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* sdim, lapack_complex_double* w,\n                    lapack_complex_double* vs, lapack_int* ldvs, double* rconde,\n                    double* rcondv, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_sgeev( char* jobvl, char* jobvr, lapack_int* n, float* a,\n                   lapack_int* lda, float* wr, float* wi, float* vl,\n                   lapack_int* ldvl, float* vr, lapack_int* ldvr, float* work,\n                   lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgeev( char* jobvl, char* jobvr, lapack_int* n, double* a,\n                   lapack_int* lda, double* wr, double* wi, double* vl,\n                   lapack_int* ldvl, double* vr, lapack_int* ldvr, double* work,\n                   lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgeev( char* jobvl, char* jobvr, lapack_int* n,\n                   lapack_complex_float* a, lapack_int* lda,\n                   lapack_complex_float* w, lapack_complex_float* vl,\n                   lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr,\n                   lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                   lapack_int *info );\nvoid LAPACK_zgeev( char* jobvl, char* jobvr, lapack_int* n,\n                   lapack_complex_double* a, lapack_int* lda,\n                   lapack_complex_double* w, lapack_complex_double* vl,\n                   lapack_int* ldvl, lapack_complex_double* vr,\n                   lapack_int* ldvr, lapack_complex_double* work,\n                   lapack_int* lwork, double* rwork, lapack_int *info );\nvoid LAPACK_sgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, float* a, lapack_int* lda, float* wr,\n                    float* wi, float* vl, lapack_int* ldvl, float* vr,\n                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,\n                    float* scale, float* abnrm, float* rconde, float* rcondv,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_dgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, double* a, lapack_int* lda, double* wr,\n                    double* wi, double* vl, lapack_int* ldvl, double* vr,\n                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,\n                    double* scale, double* abnrm, double* rconde,\n                    double* rcondv, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* w, lapack_complex_float* vl,\n                    lapack_int* ldvl, lapack_complex_float* vr,\n                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,\n                    float* scale, float* abnrm, float* rconde, float* rcondv,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* w, lapack_complex_double* vl,\n                    lapack_int* ldvl, lapack_complex_double* vr,\n                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,\n                    double* scale, double* abnrm, double* rconde,\n                    double* rcondv, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_int *info );\nvoid LAPACK_sgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,\n                    float* a, lapack_int* lda, float* s, float* u,\n                    lapack_int* ldu, float* vt, lapack_int* ldvt, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,\n                    double* a, lapack_int* lda, double* s, double* u,\n                    lapack_int* ldu, double* vt, lapack_int* ldvt, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda, float* s,\n                    lapack_complex_float* u, lapack_int* ldu,\n                    lapack_complex_float* vt, lapack_int* ldvt,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int *info );\nvoid LAPACK_zgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda, double* s,\n                    lapack_complex_double* u, lapack_int* ldu,\n                    lapack_complex_double* vt, lapack_int* ldvt,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int *info );\nvoid LAPACK_sgesdd( char* jobz, lapack_int* m, lapack_int* n, float* a,\n                    lapack_int* lda, float* s, float* u, lapack_int* ldu,\n                    float* vt, lapack_int* ldvt, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dgesdd( char* jobz, lapack_int* m, lapack_int* n, double* a,\n                    lapack_int* lda, double* s, double* u, lapack_int* ldu,\n                    double* vt, lapack_int* ldvt, double* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cgesdd( char* jobz, lapack_int* m, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda, float* s,\n                    lapack_complex_float* u, lapack_int* ldu,\n                    lapack_complex_float* vt, lapack_int* ldvt,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_zgesdd( char* jobz, lapack_int* m, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda, double* s,\n                    lapack_complex_double* u, lapack_int* ldu,\n                    lapack_complex_double* vt, lapack_int* ldvt,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt,\n                    char* jobp, lapack_int* m, lapack_int* n, double* a,\n                    lapack_int* lda, double* sva, double* u, lapack_int* ldu,\n                    double* v, lapack_int* ldv, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_sgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt,\n                    char* jobp, lapack_int* m, lapack_int* n, float* a,\n                    lapack_int* lda, float* sva, float* u, lapack_int* ldu,\n                    float* v, lapack_int* ldv, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dgesvj( char* joba, char* jobu, char* jobv, lapack_int* m,\n                    lapack_int* n, double* a, lapack_int* lda, double* sva,\n                    lapack_int* mv, double* v, lapack_int* ldv, double* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sgesvj( char* joba, char* jobu, char* jobv, lapack_int* m,\n                    lapack_int* n, float* a, lapack_int* lda, float* sva,\n                    lapack_int* mv, float* v, lapack_int* ldv, float* work,\n                    lapack_int* lwork, lapack_int *info );\nvoid LAPACK_sggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,\n                    float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                    float* alpha, float* beta, float* u, lapack_int* ldu,\n                    float* v, lapack_int* ldv, float* q, lapack_int* ldq,\n                    float* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_dggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,\n                    double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                    double* alpha, double* beta, double* u, lapack_int* ldu,\n                    double* v, lapack_int* ldv, double* q, lapack_int* ldq,\n                    double* work, lapack_int* iwork, lapack_int *info );\nvoid LAPACK_cggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, float* alpha,\n                    float* beta, lapack_complex_float* u, lapack_int* ldu,\n                    lapack_complex_float* v, lapack_int* ldv,\n                    lapack_complex_float* q, lapack_int* ldq,\n                    lapack_complex_float* work, float* rwork, lapack_int* iwork,\n                    lapack_int *info );\nvoid LAPACK_zggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,\n                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, double* alpha,\n                    double* beta, lapack_complex_double* u, lapack_int* ldu,\n                    lapack_complex_double* v, lapack_int* ldv,\n                    lapack_complex_double* q, lapack_int* ldq,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int* iwork, lapack_int *info );\nvoid LAPACK_ssygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                   float* w, float* work, lapack_int* lwork, lapack_int *info );\nvoid LAPACK_dsygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                   double* w, double* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_chegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_float* a, lapack_int* lda,\n                   lapack_complex_float* b, lapack_int* ldb, float* w,\n                   lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                   lapack_int *info );\nvoid LAPACK_zhegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_double* a, lapack_int* lda,\n                   lapack_complex_double* b, lapack_int* ldb, double* w,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   double* rwork, lapack_int *info );\nvoid LAPACK_ssygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                    float* w, float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dsygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                    double* w, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_chegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, float* w,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zhegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, double* w,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ssygvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, float* a, lapack_int* lda, float* b,\n                    lapack_int* ldb, float* vl, float* vu, lapack_int* il,\n                    lapack_int* iu, float* abstol, lapack_int* m, float* w,\n                    float* z, lapack_int* ldz, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_dsygvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, double* a, lapack_int* lda, double* b,\n                    lapack_int* ldb, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_chegvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, float* vl,\n                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_zhegvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, double* vl,\n                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_sspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   float* ap, float* bp, float* w, float* z, lapack_int* ldz,\n                   float* work, lapack_int *info );\nvoid LAPACK_dspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   double* ap, double* bp, double* w, double* z,\n                   lapack_int* ldz, double* work, lapack_int *info );\nvoid LAPACK_chpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_float* ap, lapack_complex_float* bp, float* w,\n                   lapack_complex_float* z, lapack_int* ldz,\n                   lapack_complex_float* work, float* rwork, lapack_int *info );\nvoid LAPACK_zhpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                   lapack_complex_double* ap, lapack_complex_double* bp,\n                   double* w, lapack_complex_double* z, lapack_int* ldz,\n                   lapack_complex_double* work, double* rwork,\n                   lapack_int *info );\nvoid LAPACK_sspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    float* ap, float* bp, float* w, float* z, lapack_int* ldz,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    double* ap, double* bp, double* w, double* z,\n                    lapack_int* ldz, double* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );\nvoid LAPACK_chpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_float* ap, lapack_complex_float* bp,\n                    float* w, lapack_complex_float* z, lapack_int* ldz,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zhpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,\n                    lapack_complex_double* ap, lapack_complex_double* bp,\n                    double* w, lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_sspgvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, float* ap, float* bp, float* vl, float* vu,\n                    lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, float* z, lapack_int* ldz,\n                    float* work, lapack_int* iwork, lapack_int* ifail,\n                    lapack_int *info );\nvoid LAPACK_dspgvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, double* ap, double* bp, double* vl,\n                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, double* z, lapack_int* ldz,\n                    double* work, lapack_int* iwork, lapack_int* ifail,\n                    lapack_int *info );\nvoid LAPACK_chpgvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, lapack_complex_float* ap,\n                    lapack_complex_float* bp, float* vl, float* vu,\n                    lapack_int* il, lapack_int* iu, float* abstol,\n                    lapack_int* m, float* w, lapack_complex_float* z,\n                    lapack_int* ldz, lapack_complex_float* work, float* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_zhpgvx( lapack_int* itype, char* jobz, char* range, char* uplo,\n                    lapack_int* n, lapack_complex_double* ap,\n                    lapack_complex_double* bp, double* vl, double* vu,\n                    lapack_int* il, lapack_int* iu, double* abstol,\n                    lapack_int* m, double* w, lapack_complex_double* z,\n                    lapack_int* ldz, lapack_complex_double* work, double* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_ssbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                   lapack_int* kb, float* ab, lapack_int* ldab, float* bb,\n                   lapack_int* ldbb, float* w, float* z, lapack_int* ldz,\n                   float* work, lapack_int *info );\nvoid LAPACK_dsbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                   lapack_int* kb, double* ab, lapack_int* ldab, double* bb,\n                   lapack_int* ldbb, double* w, double* z, lapack_int* ldz,\n                   double* work, lapack_int *info );\nvoid LAPACK_chbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                   lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,\n                   lapack_complex_float* bb, lapack_int* ldbb, float* w,\n                   lapack_complex_float* z, lapack_int* ldz,\n                   lapack_complex_float* work, float* rwork, lapack_int *info );\nvoid LAPACK_zhbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                   lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,\n                   lapack_complex_double* bb, lapack_int* ldbb, double* w,\n                   lapack_complex_double* z, lapack_int* ldz,\n                   lapack_complex_double* work, double* rwork,\n                   lapack_int *info );\nvoid LAPACK_ssbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, float* ab, lapack_int* ldab, float* bb,\n                    lapack_int* ldbb, float* w, float* z, lapack_int* ldz,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_dsbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, double* ab, lapack_int* ldab, double* bb,\n                    lapack_int* ldbb, double* w, double* z, lapack_int* ldz,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_chbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,\n                    lapack_complex_float* bb, lapack_int* ldbb, float* w,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,\n                    lapack_int *info );\nvoid LAPACK_zhbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,\n                    lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,\n                    lapack_complex_double* bb, lapack_int* ldbb, double* w,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_int *info );\nvoid LAPACK_ssbgvx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab,\n                    float* bb, lapack_int* ldbb, float* q, lapack_int* ldq,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    float* abstol, lapack_int* m, float* w, float* z,\n                    lapack_int* ldz, float* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_dsbgvx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* ka, lapack_int* kb, double* ab,\n                    lapack_int* ldab, double* bb, lapack_int* ldbb, double* q,\n                    lapack_int* ldq, double* vl, double* vu, lapack_int* il,\n                    lapack_int* iu, double* abstol, lapack_int* m, double* w,\n                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_chbgvx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* ka, lapack_int* kb, lapack_complex_float* ab,\n                    lapack_int* ldab, lapack_complex_float* bb,\n                    lapack_int* ldbb, lapack_complex_float* q, lapack_int* ldq,\n                    float* vl, float* vu, lapack_int* il, lapack_int* iu,\n                    float* abstol, lapack_int* m, float* w,\n                    lapack_complex_float* z, lapack_int* ldz,\n                    lapack_complex_float* work, float* rwork, lapack_int* iwork,\n                    lapack_int* ifail, lapack_int *info );\nvoid LAPACK_zhbgvx( char* jobz, char* range, char* uplo, lapack_int* n,\n                    lapack_int* ka, lapack_int* kb, lapack_complex_double* ab,\n                    lapack_int* ldab, lapack_complex_double* bb,\n                    lapack_int* ldbb, lapack_complex_double* q, lapack_int* ldq,\n                    double* vl, double* vu, lapack_int* il, lapack_int* iu,\n                    double* abstol, lapack_int* m, double* w,\n                    lapack_complex_double* z, lapack_int* ldz,\n                    lapack_complex_double* work, double* rwork,\n                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );\nvoid LAPACK_sgges( char* jobvsl, char* jobvsr, char* sort,\n                   LAPACK_S_SELECT3 selctg, lapack_int* n, float* a,\n                   lapack_int* lda, float* b, lapack_int* ldb, lapack_int* sdim,\n                   float* alphar, float* alphai, float* beta, float* vsl,\n                   lapack_int* ldvsl, float* vsr, lapack_int* ldvsr,\n                   float* work, lapack_int* lwork, lapack_logical* bwork,\n                   lapack_int *info );\nvoid LAPACK_dgges( char* jobvsl, char* jobvsr, char* sort,\n                   LAPACK_D_SELECT3 selctg, lapack_int* n, double* a,\n                   lapack_int* lda, double* b, lapack_int* ldb,\n                   lapack_int* sdim, double* alphar, double* alphai,\n                   double* beta, double* vsl, lapack_int* ldvsl, double* vsr,\n                   lapack_int* ldvsr, double* work, lapack_int* lwork,\n                   lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_cgges( char* jobvsl, char* jobvsr, char* sort,\n                   LAPACK_C_SELECT2 selctg, lapack_int* n,\n                   lapack_complex_float* a, lapack_int* lda,\n                   lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim,\n                   lapack_complex_float* alpha, lapack_complex_float* beta,\n                   lapack_complex_float* vsl, lapack_int* ldvsl,\n                   lapack_complex_float* vsr, lapack_int* ldvsr,\n                   lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                   lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_zgges( char* jobvsl, char* jobvsr, char* sort,\n                   LAPACK_Z_SELECT2 selctg, lapack_int* n,\n                   lapack_complex_double* a, lapack_int* lda,\n                   lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim,\n                   lapack_complex_double* alpha, lapack_complex_double* beta,\n                   lapack_complex_double* vsl, lapack_int* ldvsl,\n                   lapack_complex_double* vsr, lapack_int* ldvsr,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   double* rwork, lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_sggesx( char* jobvsl, char* jobvsr, char* sort,\n                    LAPACK_S_SELECT3 selctg, char* sense, lapack_int* n,\n                    float* a, lapack_int* lda, float* b, lapack_int* ldb,\n                    lapack_int* sdim, float* alphar, float* alphai, float* beta,\n                    float* vsl, lapack_int* ldvsl, float* vsr,\n                    lapack_int* ldvsr, float* rconde, float* rcondv,\n                    float* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_dggesx( char* jobvsl, char* jobvsr, char* sort,\n                    LAPACK_D_SELECT3 selctg, char* sense, lapack_int* n,\n                    double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                    lapack_int* sdim, double* alphar, double* alphai,\n                    double* beta, double* vsl, lapack_int* ldvsl, double* vsr,\n                    lapack_int* ldvsr, double* rconde, double* rcondv,\n                    double* work, lapack_int* lwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_cggesx( char* jobvsl, char* jobvsr, char* sort,\n                    LAPACK_C_SELECT2 selctg, char* sense, lapack_int* n,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim,\n                    lapack_complex_float* alpha, lapack_complex_float* beta,\n                    lapack_complex_float* vsl, lapack_int* ldvsl,\n                    lapack_complex_float* vsr, lapack_int* ldvsr, float* rconde,\n                    float* rcondv, lapack_complex_float* work,\n                    lapack_int* lwork, float* rwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_zggesx( char* jobvsl, char* jobvsr, char* sort,\n                    LAPACK_Z_SELECT2 selctg, char* sense, lapack_int* n,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim,\n                    lapack_complex_double* alpha, lapack_complex_double* beta,\n                    lapack_complex_double* vsl, lapack_int* ldvsl,\n                    lapack_complex_double* vsr, lapack_int* ldvsr,\n                    double* rconde, double* rcondv, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_int* iwork,\n                    lapack_int* liwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_sggev( char* jobvl, char* jobvr, lapack_int* n, float* a,\n                   lapack_int* lda, float* b, lapack_int* ldb, float* alphar,\n                   float* alphai, float* beta, float* vl, lapack_int* ldvl,\n                   float* vr, lapack_int* ldvr, float* work, lapack_int* lwork,\n                   lapack_int *info );\nvoid LAPACK_dggev( char* jobvl, char* jobvr, lapack_int* n, double* a,\n                   lapack_int* lda, double* b, lapack_int* ldb, double* alphar,\n                   double* alphai, double* beta, double* vl, lapack_int* ldvl,\n                   double* vr, lapack_int* ldvr, double* work,\n                   lapack_int* lwork, lapack_int *info );\nvoid LAPACK_cggev( char* jobvl, char* jobvr, lapack_int* n,\n                   lapack_complex_float* a, lapack_int* lda,\n                   lapack_complex_float* b, lapack_int* ldb,\n                   lapack_complex_float* alpha, lapack_complex_float* beta,\n                   lapack_complex_float* vl, lapack_int* ldvl,\n                   lapack_complex_float* vr, lapack_int* ldvr,\n                   lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                   lapack_int *info );\nvoid LAPACK_zggev( char* jobvl, char* jobvr, lapack_int* n,\n                   lapack_complex_double* a, lapack_int* lda,\n                   lapack_complex_double* b, lapack_int* ldb,\n                   lapack_complex_double* alpha, lapack_complex_double* beta,\n                   lapack_complex_double* vl, lapack_int* ldvl,\n                   lapack_complex_double* vr, lapack_int* ldvr,\n                   lapack_complex_double* work, lapack_int* lwork,\n                   double* rwork, lapack_int *info );\nvoid LAPACK_sggevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, float* a, lapack_int* lda, float* b,\n                    lapack_int* ldb, float* alphar, float* alphai, float* beta,\n                    float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr,\n                    lapack_int* ilo, lapack_int* ihi, float* lscale,\n                    float* rscale, float* abnrm, float* bbnrm, float* rconde,\n                    float* rcondv, float* work, lapack_int* lwork,\n                    lapack_int* iwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_dggevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, double* a, lapack_int* lda, double* b,\n                    lapack_int* ldb, double* alphar, double* alphai,\n                    double* beta, double* vl, lapack_int* ldvl, double* vr,\n                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,\n                    double* lscale, double* rscale, double* abnrm,\n                    double* bbnrm, double* rconde, double* rcondv, double* work,\n                    lapack_int* lwork, lapack_int* iwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_cggevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    lapack_complex_float* alpha, lapack_complex_float* beta,\n                    lapack_complex_float* vl, lapack_int* ldvl,\n                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* ilo,\n                    lapack_int* ihi, float* lscale, float* rscale, float* abnrm,\n                    float* bbnrm, float* rconde, float* rcondv,\n                    lapack_complex_float* work, lapack_int* lwork, float* rwork,\n                    lapack_int* iwork, lapack_logical* bwork,\n                    lapack_int *info );\nvoid LAPACK_zggevx( char* balanc, char* jobvl, char* jobvr, char* sense,\n                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* alpha, lapack_complex_double* beta,\n                    lapack_complex_double* vl, lapack_int* ldvl,\n                    lapack_complex_double* vr, lapack_int* ldvr,\n                    lapack_int* ilo, lapack_int* ihi, double* lscale,\n                    double* rscale, double* abnrm, double* bbnrm,\n                    double* rconde, double* rcondv, lapack_complex_double* work,\n                    lapack_int* lwork, double* rwork, lapack_int* iwork,\n                    lapack_logical* bwork, lapack_int *info );\nvoid LAPACK_dsfrk( char* transr, char* uplo, char* trans, lapack_int* n,\n                   lapack_int* k, double* alpha, const double* a,\n                   lapack_int* lda, double* beta, double* c );\nvoid LAPACK_ssfrk( char* transr, char* uplo, char* trans, lapack_int* n,\n                   lapack_int* k, float* alpha, const float* a, lapack_int* lda,\n                   float* beta, float* c );\nvoid LAPACK_zhfrk( char* transr, char* uplo, char* trans, lapack_int* n,\n                   lapack_int* k, double* alpha, const lapack_complex_double* a,\n                   lapack_int* lda, double* beta, lapack_complex_double* c );\nvoid LAPACK_chfrk( char* transr, char* uplo, char* trans, lapack_int* n,\n                   lapack_int* k, float* alpha, const lapack_complex_float* a,\n                   lapack_int* lda, float* beta, lapack_complex_float* c );\nvoid LAPACK_dtfsm( char* transr, char* side, char* uplo, char* trans,\n                   char* diag, lapack_int* m, lapack_int* n, double* alpha,\n                   const double* a, double* b, lapack_int* ldb );\nvoid LAPACK_stfsm( char* transr, char* side, char* uplo, char* trans,\n                   char* diag, lapack_int* m, lapack_int* n, float* alpha,\n                   const float* a, float* b, lapack_int* ldb );\nvoid LAPACK_ztfsm( char* transr, char* side, char* uplo, char* trans,\n                   char* diag, lapack_int* m, lapack_int* n,\n                   lapack_complex_double* alpha, const lapack_complex_double* a,\n                   lapack_complex_double* b, lapack_int* ldb );\nvoid LAPACK_ctfsm( char* transr, char* side, char* uplo, char* trans,\n                   char* diag, lapack_int* m, lapack_int* n,\n                   lapack_complex_float* alpha, const lapack_complex_float* a,\n                   lapack_complex_float* b, lapack_int* ldb );\nvoid LAPACK_dtfttp( char* transr, char* uplo, lapack_int* n, const double* arf,\n                    double* ap, lapack_int *info );\nvoid LAPACK_stfttp( char* transr, char* uplo, lapack_int* n, const float* arf,\n                    float* ap, lapack_int *info );\nvoid LAPACK_ztfttp( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_double* arf, lapack_complex_double* ap,\n                    lapack_int *info );\nvoid LAPACK_ctfttp( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_float* arf, lapack_complex_float* ap,\n                    lapack_int *info );\nvoid LAPACK_dtfttr( char* transr, char* uplo, lapack_int* n, const double* arf,\n                    double* a, lapack_int* lda, lapack_int *info );\nvoid LAPACK_stfttr( char* transr, char* uplo, lapack_int* n, const float* arf,\n                    float* a, lapack_int* lda, lapack_int *info );\nvoid LAPACK_ztfttr( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_double* arf, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_ctfttr( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_float* arf, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_dtpttf( char* transr, char* uplo, lapack_int* n, const double* ap,\n                    double* arf, lapack_int *info );\nvoid LAPACK_stpttf( char* transr, char* uplo, lapack_int* n, const float* ap,\n                    float* arf, lapack_int *info );\nvoid LAPACK_ztpttf( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_double* ap, lapack_complex_double* arf,\n                    lapack_int *info );\nvoid LAPACK_ctpttf( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_float* ap, lapack_complex_float* arf,\n                    lapack_int *info );\nvoid LAPACK_dtpttr( char* uplo, lapack_int* n, const double* ap, double* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_stpttr( char* uplo, lapack_int* n, const float* ap, float* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_ztpttr( char* uplo, lapack_int* n, const lapack_complex_double* ap,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_ctpttr( char* uplo, lapack_int* n, const lapack_complex_float* ap,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_dtrttf( char* transr, char* uplo, lapack_int* n, const double* a,\n                    lapack_int* lda, double* arf, lapack_int *info );\nvoid LAPACK_strttf( char* transr, char* uplo, lapack_int* n, const float* a,\n                    lapack_int* lda, float* arf, lapack_int *info );\nvoid LAPACK_ztrttf( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* arf, lapack_int *info );\nvoid LAPACK_ctrttf( char* transr, char* uplo, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* arf, lapack_int *info );\nvoid LAPACK_dtrttp( char* uplo, lapack_int* n, const double* a, lapack_int* lda,\n                    double* ap, lapack_int *info );\nvoid LAPACK_strttp( char* uplo, lapack_int* n, const float* a, lapack_int* lda,\n                    float* ap, lapack_int *info );\nvoid LAPACK_ztrttp( char* uplo, lapack_int* n, const lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* ap,\n                    lapack_int *info );\nvoid LAPACK_ctrttp( char* uplo, lapack_int* n, const lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* ap,\n                    lapack_int *info );\nvoid LAPACK_sgeqrfp( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                     float* tau, float* work, lapack_int* lwork,\n                     lapack_int *info );\nvoid LAPACK_dgeqrfp( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                     double* tau, double* work, lapack_int* lwork,\n                     lapack_int *info );\nvoid LAPACK_cgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                     lapack_int* lda, lapack_complex_float* tau,\n                     lapack_complex_float* work, lapack_int* lwork,\n                     lapack_int *info );\nvoid LAPACK_zgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                     lapack_int* lda, lapack_complex_double* tau,\n                     lapack_complex_double* work, lapack_int* lwork,\n                     lapack_int *info );\nvoid LAPACK_clacgv( lapack_int* n, lapack_complex_float* x, lapack_int* incx );\nvoid LAPACK_zlacgv( lapack_int* n, lapack_complex_double* x, lapack_int* incx );\nvoid LAPACK_slarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,\n                    float* x );\nvoid LAPACK_dlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,\n                    double* x );\nvoid LAPACK_clarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,\n                    lapack_complex_float* x );\nvoid LAPACK_zlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,\n                    lapack_complex_double* x );\nvoid LAPACK_sgeqr2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int *info );\nvoid LAPACK_dgeqr2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int *info );\nvoid LAPACK_cgeqr2( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zgeqr2( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_slacpy( char* uplo, lapack_int* m, lapack_int* n, const float* a,\n                    lapack_int* lda, float* b, lapack_int* ldb );\nvoid LAPACK_dlacpy( char* uplo, lapack_int* m, lapack_int* n, const double* a,\n                    lapack_int* lda, double* b, lapack_int* ldb );\nvoid LAPACK_clacpy( char* uplo, lapack_int* m, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb );\nvoid LAPACK_zlacpy( char* uplo, lapack_int* m, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb );\nvoid LAPACK_sgetf2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_dgetf2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_cgetf2( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_zgetf2( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );\nvoid LAPACK_slaswp( lapack_int* n, float* a, lapack_int* lda, lapack_int* k1,\n                    lapack_int* k2, const lapack_int* ipiv, lapack_int* incx );\nvoid LAPACK_dlaswp( lapack_int* n, double* a, lapack_int* lda, lapack_int* k1,\n                    lapack_int* k2, const lapack_int* ipiv, lapack_int* incx );\nvoid LAPACK_claswp( lapack_int* n, lapack_complex_float* a, lapack_int* lda,\n                    lapack_int* k1, lapack_int* k2, const lapack_int* ipiv,\n                    lapack_int* incx );\nvoid LAPACK_zlaswp( lapack_int* n, lapack_complex_double* a, lapack_int* lda,\n                    lapack_int* k1, lapack_int* k2, const lapack_int* ipiv,\n                    lapack_int* incx );\nfloat LAPACK_slange( char* norm, lapack_int* m, lapack_int* n, const float* a,\n                    lapack_int* lda, float* work );\ndouble LAPACK_dlange( char* norm, lapack_int* m, lapack_int* n, const double* a,\n                    lapack_int* lda, double* work );\nfloat LAPACK_clange( char* norm, lapack_int* m, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda, float* work );\ndouble LAPACK_zlange( char* norm, lapack_int* m, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda, double* work );\nfloat LAPACK_clanhe( char* norm, char* uplo, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda, float* work );\ndouble LAPACK_zlanhe( char* norm, char* uplo, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda, double* work );\nfloat LAPACK_slansy( char* norm, char* uplo, lapack_int* n, const float* a,\n                    lapack_int* lda, float* work );\ndouble LAPACK_dlansy( char* norm, char* uplo, lapack_int* n, const double* a,\n                    lapack_int* lda, double* work );\nfloat LAPACK_clansy( char* norm, char* uplo, lapack_int* n,\n                    const lapack_complex_float* a, lapack_int* lda, float* work );\ndouble LAPACK_zlansy( char* norm, char* uplo, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda, double* work );\nfloat LAPACK_slantr( char* norm, char* uplo, char* diag, lapack_int* m,\n                    lapack_int* n, const float* a, lapack_int* lda, float* work );\ndouble LAPACK_dlantr( char* norm, char* uplo, char* diag, lapack_int* m,\n                    lapack_int* n, const double* a, lapack_int* lda, double* work );\nfloat LAPACK_clantr( char* norm, char* uplo, char* diag, lapack_int* m,\n                    lapack_int* n, const lapack_complex_float* a, lapack_int* lda,\n                    float* work );\ndouble LAPACK_zlantr( char* norm, char* uplo, char* diag, lapack_int* m,\n                    lapack_int* n, const lapack_complex_double* a, lapack_int* lda,\n                    double* work );\nfloat LAPACK_slamch( char* cmach );\ndouble LAPACK_dlamch( char* cmach );\nvoid LAPACK_sgelq2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                    float* tau, float* work, lapack_int *info );\nvoid LAPACK_dgelq2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                    double* tau, double* work, lapack_int *info );\nvoid LAPACK_cgelq2( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_complex_float* tau,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zgelq2( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_complex_double* tau,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_slarfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k, const float* v,\n                    lapack_int* ldv, const float* t, lapack_int* ldt, float* c,\n                    lapack_int* ldc, float* work, lapack_int* ldwork );\nvoid LAPACK_dlarfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k,\n                    const double* v, lapack_int* ldv, const double* t,\n                    lapack_int* ldt, double* c, lapack_int* ldc, double* work,\n                    lapack_int* ldwork );\nvoid LAPACK_clarfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k,\n                    const lapack_complex_float* v, lapack_int* ldv,\n                    const lapack_complex_float* t, lapack_int* ldt,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work, lapack_int* ldwork );\nvoid LAPACK_zlarfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k,\n                    const lapack_complex_double* v, lapack_int* ldv,\n                    const lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work, lapack_int* ldwork );\nvoid LAPACK_slarfg( lapack_int* n, float* alpha, float* x, lapack_int* incx,\n                    float* tau );\nvoid LAPACK_dlarfg( lapack_int* n, double* alpha, double* x, lapack_int* incx,\n                    double* tau );\nvoid LAPACK_clarfg( lapack_int* n, lapack_complex_float* alpha,\n                    lapack_complex_float* x, lapack_int* incx,\n                    lapack_complex_float* tau );\nvoid LAPACK_zlarfg( lapack_int* n, lapack_complex_double* alpha,\n                    lapack_complex_double* x, lapack_int* incx,\n                    lapack_complex_double* tau );\nvoid LAPACK_slarft( char* direct, char* storev, lapack_int* n, lapack_int* k,\n                    const float* v, lapack_int* ldv, const float* tau, float* t,\n                    lapack_int* ldt );\nvoid LAPACK_dlarft( char* direct, char* storev, lapack_int* n, lapack_int* k,\n                    const double* v, lapack_int* ldv, const double* tau,\n                    double* t, lapack_int* ldt );\nvoid LAPACK_clarft( char* direct, char* storev, lapack_int* n, lapack_int* k,\n                    const lapack_complex_float* v, lapack_int* ldv,\n                    const lapack_complex_float* tau, lapack_complex_float* t,\n                    lapack_int* ldt );\nvoid LAPACK_zlarft( char* direct, char* storev, lapack_int* n, lapack_int* k,\n                    const lapack_complex_double* v, lapack_int* ldv,\n                    const lapack_complex_double* tau, lapack_complex_double* t,\n                    lapack_int* ldt );\nvoid LAPACK_slarfx( char* side, lapack_int* m, lapack_int* n, const float* v,\n                    float* tau, float* c, lapack_int* ldc, float* work );\nvoid LAPACK_dlarfx( char* side, lapack_int* m, lapack_int* n, const double* v,\n                    double* tau, double* c, lapack_int* ldc, double* work );\nvoid LAPACK_clarfx( char* side, lapack_int* m, lapack_int* n,\n                    const lapack_complex_float* v, lapack_complex_float* tau,\n                    lapack_complex_float* c, lapack_int* ldc,\n                    lapack_complex_float* work );\nvoid LAPACK_zlarfx( char* side, lapack_int* m, lapack_int* n,\n                    const lapack_complex_double* v, lapack_complex_double* tau,\n                    lapack_complex_double* c, lapack_int* ldc,\n                    lapack_complex_double* work );\nvoid LAPACK_slatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,\n                    char* sym, float* d, lapack_int* mode, float* cond,\n                    float* dmax, lapack_int* kl, lapack_int* ku, char* pack,\n                    float* a, lapack_int* lda, float* work, lapack_int *info );\nvoid LAPACK_dlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,\n                    char* sym, double* d, lapack_int* mode, double* cond,\n                    double* dmax, lapack_int* kl, lapack_int* ku, char* pack,\n                    double* a, lapack_int* lda, double* work,\n                    lapack_int *info );\nvoid LAPACK_clatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,\n                    char* sym, float* d, lapack_int* mode, float* cond,\n                    float* dmax, lapack_int* kl, lapack_int* ku, char* pack,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,\n                    char* sym, double* d, lapack_int* mode, double* cond,\n                    double* dmax, lapack_int* kl, lapack_int* ku, char* pack,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_slag2d( lapack_int* m, lapack_int* n, const float* sa,\n                    lapack_int* ldsa, double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_dlag2s( lapack_int* m, lapack_int* n, const double* a,\n                    lapack_int* lda, float* sa, lapack_int* ldsa,\n                    lapack_int *info );\nvoid LAPACK_clag2z( lapack_int* m, lapack_int* n,\n                    const lapack_complex_float* sa, lapack_int* ldsa,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_zlag2c( lapack_int* m, lapack_int* n,\n                    const lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_float* sa, lapack_int* ldsa,\n                    lapack_int *info );\nvoid LAPACK_slauum( char* uplo, lapack_int* n, float* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_dlauum( char* uplo, lapack_int* n, double* a, lapack_int* lda,\n                    lapack_int *info );\nvoid LAPACK_clauum( char* uplo, lapack_int* n, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_zlauum( char* uplo, lapack_int* n, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int *info );\nvoid LAPACK_slagge( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const float* d, float* a, lapack_int* lda,\n                    lapack_int* iseed, float* work, lapack_int *info );\nvoid LAPACK_dlagge( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const double* d, double* a, lapack_int* lda,\n                    lapack_int* iseed, double* work, lapack_int *info );\nvoid LAPACK_clagge( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const float* d, lapack_complex_float* a,\n                    lapack_int* lda, lapack_int* iseed,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zlagge( lapack_int* m, lapack_int* n, lapack_int* kl,\n                    lapack_int* ku, const double* d, lapack_complex_double* a,\n                    lapack_int* lda, lapack_int* iseed,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_slaset( char* uplo, lapack_int* m, lapack_int* n, float* alpha,\n                    float* beta, float* a, lapack_int* lda );\nvoid LAPACK_dlaset( char* uplo, lapack_int* m, lapack_int* n, double* alpha,\n                    double* beta, double* a, lapack_int* lda );\nvoid LAPACK_claset( char* uplo, lapack_int* m, lapack_int* n,\n                    lapack_complex_float* alpha, lapack_complex_float* beta,\n                    lapack_complex_float* a, lapack_int* lda );\nvoid LAPACK_zlaset( char* uplo, lapack_int* m, lapack_int* n,\n                    lapack_complex_double* alpha, lapack_complex_double* beta,\n                    lapack_complex_double* a, lapack_int* lda );\nvoid LAPACK_slasrt( char* id, lapack_int* n, float* d, lapack_int *info );\nvoid LAPACK_dlasrt( char* id, lapack_int* n, double* d, lapack_int *info );\nvoid LAPACK_claghe( lapack_int* n, lapack_int* k, const float* d,\n                    lapack_complex_float* a, lapack_int* lda, lapack_int* iseed,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zlaghe( lapack_int* n, lapack_int* k, const double* d,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_int* iseed, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_slagsy( lapack_int* n, lapack_int* k, const float* d, float* a,\n                    lapack_int* lda, lapack_int* iseed, float* work,\n                    lapack_int *info );\nvoid LAPACK_dlagsy( lapack_int* n, lapack_int* k, const double* d, double* a,\n                    lapack_int* lda, lapack_int* iseed, double* work,\n                    lapack_int *info );\nvoid LAPACK_clagsy( lapack_int* n, lapack_int* k, const float* d,\n                    lapack_complex_float* a, lapack_int* lda, lapack_int* iseed,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zlagsy( lapack_int* n, lapack_int* k, const double* d,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_int* iseed, lapack_complex_double* work,\n                    lapack_int *info );\nvoid LAPACK_slapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,\n                    float* x, lapack_int* ldx, lapack_int* k );\nvoid LAPACK_dlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,\n                    double* x, lapack_int* ldx, lapack_int* k );\nvoid LAPACK_clapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,\n                    lapack_complex_float* x, lapack_int* ldx, lapack_int* k );\nvoid LAPACK_zlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,\n                    lapack_complex_double* x, lapack_int* ldx, lapack_int* k );\nfloat LAPACK_slapy2( float* x, float* y );\ndouble LAPACK_dlapy2( double* x, double* y );\nfloat LAPACK_slapy3( float* x, float* y, float* z );\ndouble LAPACK_dlapy3( double* x, double* y, double* z );\nvoid LAPACK_slartgp( float* f, float* g, float* cs, float* sn, float* r );\nvoid LAPACK_dlartgp( double* f, double* g, double* cs, double* sn, double* r );\nvoid LAPACK_slartgs( float* x, float* y, float* sigma, float* cs, float* sn );\nvoid LAPACK_dlartgs( double* x, double* y, double* sigma, double* cs,\n                     double* sn );\n// LAPACK 3.3.0\nvoid LAPACK_cbbcsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    float* theta, float* phi,\n                    lapack_complex_float* u1, lapack_int* ldu1,\n                    lapack_complex_float* u2, lapack_int* ldu2,\n                    lapack_complex_float* v1t, lapack_int* ldv1t,\n                    lapack_complex_float* v2t, lapack_int* ldv2t,\n                    float* b11d, float* b11e, float* b12d,\n                    float* b12e, float* b21d, float* b21e,\n                    float* b22d, float* b22e, float* rwork,\n                    lapack_int* lrwork , lapack_int *info );\nvoid LAPACK_cheswapr( char* uplo, lapack_int* n,\n                      lapack_complex_float* a, lapack_int* i1,\n                      lapack_int* i2 );\nvoid LAPACK_chetri2( char* uplo, lapack_int* n,\n                     lapack_complex_float* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_chetri2x( char* uplo, lapack_int* n,\n                      lapack_complex_float* a, lapack_int* lda,\n                      const lapack_int* ipiv,\n                      lapack_complex_float* work, lapack_int* nb , lapack_int *info );\nvoid LAPACK_chetrs2( char* uplo, lapack_int* n,\n                     lapack_int* nrhs, const lapack_complex_float* a,\n                     lapack_int* lda, const lapack_int* ipiv,\n                     lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* work , lapack_int *info );\nvoid LAPACK_csyconv( char* uplo, char* way,\n                     lapack_int* n, lapack_complex_float* a,\n                     lapack_int* lda, const lapack_int* ipiv,\n                     lapack_complex_float* work , lapack_int *info );\nvoid LAPACK_csyswapr( char* uplo, lapack_int* n,\n                      lapack_complex_float* a, lapack_int* i1,\n                      lapack_int* i2 );\nvoid LAPACK_csytri2( char* uplo, lapack_int* n,\n                     lapack_complex_float* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_csytri2x( char* uplo, lapack_int* n,\n                      lapack_complex_float* a, lapack_int* lda,\n                      const lapack_int* ipiv,\n                      lapack_complex_float* work, lapack_int* nb , lapack_int *info );\nvoid LAPACK_csytrs2( char* uplo, lapack_int* n,\n                     lapack_int* nrhs, const lapack_complex_float* a,\n                     lapack_int* lda, const lapack_int* ipiv,\n                     lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* work , lapack_int *info );\nvoid LAPACK_cunbdb( char* trans, char* signs,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    lapack_complex_float* x11, lapack_int* ldx11,\n                    lapack_complex_float* x12, lapack_int* ldx12,\n                    lapack_complex_float* x21, lapack_int* ldx21,\n                    lapack_complex_float* x22, lapack_int* ldx22,\n                    float* theta, float* phi,\n                    lapack_complex_float* taup1,\n                    lapack_complex_float* taup2,\n                    lapack_complex_float* tauq1,\n                    lapack_complex_float* tauq2,\n                    lapack_complex_float* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_cuncsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    char* signs, lapack_int* m, lapack_int* p,\n                    lapack_int* q, lapack_complex_float* x11,\n                    lapack_int* ldx11, lapack_complex_float* x12,\n                    lapack_int* ldx12, lapack_complex_float* x21,\n                    lapack_int* ldx21, lapack_complex_float* x22,\n                    lapack_int* ldx22, float* theta,\n                    lapack_complex_float* u1, lapack_int* ldu1,\n                    lapack_complex_float* u2, lapack_int* ldu2,\n                    lapack_complex_float* v1t, lapack_int* ldv1t,\n                    lapack_complex_float* v2t, lapack_int* ldv2t,\n                    lapack_complex_float* work, lapack_int* lwork,\n                    float* rwork, lapack_int* lrwork,\n                    lapack_int* iwork , lapack_int *info );\nvoid LAPACK_dbbcsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    double* theta, double* phi, double* u1,\n                    lapack_int* ldu1, double* u2, lapack_int* ldu2,\n                    double* v1t, lapack_int* ldv1t, double* v2t,\n                    lapack_int* ldv2t, double* b11d, double* b11e,\n                    double* b12d, double* b12e, double* b21d,\n                    double* b21e, double* b22d, double* b22e,\n                    double* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_dorbdb( char* trans, char* signs,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    double* x11, lapack_int* ldx11, double* x12,\n                    lapack_int* ldx12, double* x21, lapack_int* ldx21,\n                    double* x22, lapack_int* ldx22, double* theta,\n                    double* phi, double* taup1, double* taup2,\n                    double* tauq1, double* tauq2, double* work,\n                    lapack_int* lwork , lapack_int *info );\nvoid LAPACK_dorcsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    char* signs, lapack_int* m, lapack_int* p,\n                    lapack_int* q, double* x11, lapack_int* ldx11,\n                    double* x12, lapack_int* ldx12, double* x21,\n                    lapack_int* ldx21, double* x22, lapack_int* ldx22,\n                    double* theta, double* u1, lapack_int* ldu1,\n                    double* u2, lapack_int* ldu2, double* v1t,\n                    lapack_int* ldv1t, double* v2t, lapack_int* ldv2t,\n                    double* work, lapack_int* lwork,\n                    lapack_int* iwork , lapack_int *info );\nvoid LAPACK_dsyconv( char* uplo, char* way,\n                     lapack_int* n, double* a, lapack_int* lda,\n                     const lapack_int* ipiv, double* work , lapack_int *info );\nvoid LAPACK_dsyswapr( char* uplo, lapack_int* n,\n                      double* a, lapack_int* i1, lapack_int* i2 );\nvoid LAPACK_dsytri2( char* uplo, lapack_int* n,\n                     double* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_dsytri2x( char* uplo, lapack_int* n,\n                      double* a, lapack_int* lda,\n                      const lapack_int* ipiv, double* work,\n                      lapack_int* nb , lapack_int *info );\nvoid LAPACK_dsytrs2( char* uplo, lapack_int* n,\n                     lapack_int* nrhs, const double* a,\n                     lapack_int* lda, const lapack_int* ipiv,\n                     double* b, lapack_int* ldb, double* work , lapack_int *info );\nvoid LAPACK_sbbcsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    float* theta, float* phi, float* u1,\n                    lapack_int* ldu1, float* u2, lapack_int* ldu2,\n                    float* v1t, lapack_int* ldv1t, float* v2t,\n                    lapack_int* ldv2t, float* b11d, float* b11e,\n                    float* b12d, float* b12e, float* b21d,\n                    float* b21e, float* b22d, float* b22e,\n                    float* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_sorbdb( char* trans, char* signs,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    float* x11, lapack_int* ldx11, float* x12,\n                    lapack_int* ldx12, float* x21, lapack_int* ldx21,\n                    float* x22, lapack_int* ldx22, float* theta,\n                    float* phi, float* taup1, float* taup2,\n                    float* tauq1, float* tauq2, float* work,\n                    lapack_int* lwork , lapack_int *info );\nvoid LAPACK_sorcsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    char* signs, lapack_int* m, lapack_int* p,\n                    lapack_int* q, float* x11, lapack_int* ldx11,\n                    float* x12, lapack_int* ldx12, float* x21,\n                    lapack_int* ldx21, float* x22, lapack_int* ldx22,\n                    float* theta, float* u1, lapack_int* ldu1,\n                    float* u2, lapack_int* ldu2, float* v1t,\n                    lapack_int* ldv1t, float* v2t, lapack_int* ldv2t,\n                    float* work, lapack_int* lwork,\n                    lapack_int* iwork , lapack_int *info );\nvoid LAPACK_ssyconv( char* uplo, char* way,\n                     lapack_int* n, float* a, lapack_int* lda,\n                     const lapack_int* ipiv, float* work , lapack_int *info );\nvoid LAPACK_ssyswapr( char* uplo, lapack_int* n,\n                      float* a, lapack_int* i1, lapack_int* i2 );\nvoid LAPACK_ssytri2( char* uplo, lapack_int* n,\n                     float* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_ssytri2x( char* uplo, lapack_int* n,\n                      float* a, lapack_int* lda,\n                      const lapack_int* ipiv, float* work,\n                      lapack_int* nb , lapack_int *info );\nvoid LAPACK_ssytrs2( char* uplo, lapack_int* n,\n                     lapack_int* nrhs, const float* a,\n                     lapack_int* lda, const lapack_int* ipiv,\n                     float* b, lapack_int* ldb, float* work , lapack_int *info );\nvoid LAPACK_zbbcsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    double* theta, double* phi,\n                    lapack_complex_double* u1, lapack_int* ldu1,\n                    lapack_complex_double* u2, lapack_int* ldu2,\n                    lapack_complex_double* v1t, lapack_int* ldv1t,\n                    lapack_complex_double* v2t, lapack_int* ldv2t,\n                    double* b11d, double* b11e, double* b12d,\n                    double* b12e, double* b21d, double* b21e,\n                    double* b22d, double* b22e, double* rwork,\n                    lapack_int* lrwork , lapack_int *info );\nvoid LAPACK_zheswapr( char* uplo, lapack_int* n,\n                      lapack_complex_double* a, lapack_int* i1,\n                      lapack_int* i2 );\nvoid LAPACK_zhetri2( char* uplo, lapack_int* n,\n                     lapack_complex_double* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_zhetri2x( char* uplo, lapack_int* n,\n                      lapack_complex_double* a, lapack_int* lda,\n                      const lapack_int* ipiv,\n                      lapack_complex_double* work, lapack_int* nb , lapack_int *info );\nvoid LAPACK_zhetrs2( char* uplo, lapack_int* n,\n                     lapack_int* nrhs,\n                     const lapack_complex_double* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* work , lapack_int *info );\nvoid LAPACK_zsyconv( char* uplo, char* way,\n                     lapack_int* n, lapack_complex_double* a,\n                     lapack_int* lda, const lapack_int* ipiv,\n                     lapack_complex_double* work , lapack_int *info );\nvoid LAPACK_zsyswapr( char* uplo, lapack_int* n,\n                      lapack_complex_double* a, lapack_int* i1,\n                      lapack_int* i2 );\nvoid LAPACK_zsytri2( char* uplo, lapack_int* n,\n                     lapack_complex_double* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_zsytri2x( char* uplo, lapack_int* n,\n                      lapack_complex_double* a, lapack_int* lda,\n                      const lapack_int* ipiv,\n                      lapack_complex_double* work, lapack_int* nb , lapack_int *info );\nvoid LAPACK_zsytrs2( char* uplo, lapack_int* n,\n                     lapack_int* nrhs,\n                     const lapack_complex_double* a, lapack_int* lda,\n                     const lapack_int* ipiv,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* work , lapack_int *info );\nvoid LAPACK_zunbdb( char* trans, char* signs,\n                    lapack_int* m, lapack_int* p, lapack_int* q,\n                    lapack_complex_double* x11, lapack_int* ldx11,\n                    lapack_complex_double* x12, lapack_int* ldx12,\n                    lapack_complex_double* x21, lapack_int* ldx21,\n                    lapack_complex_double* x22, lapack_int* ldx22,\n                    double* theta, double* phi,\n                    lapack_complex_double* taup1,\n                    lapack_complex_double* taup2,\n                    lapack_complex_double* tauq1,\n                    lapack_complex_double* tauq2,\n                    lapack_complex_double* work, lapack_int* lwork , lapack_int *info );\nvoid LAPACK_zuncsd( char* jobu1, char* jobu2,\n                    char* jobv1t, char* jobv2t, char* trans,\n                    char* signs, lapack_int* m, lapack_int* p,\n                    lapack_int* q, lapack_complex_double* x11,\n                    lapack_int* ldx11, lapack_complex_double* x12,\n                    lapack_int* ldx12, lapack_complex_double* x21,\n                    lapack_int* ldx21, lapack_complex_double* x22,\n                    lapack_int* ldx22, double* theta,\n                    lapack_complex_double* u1, lapack_int* ldu1,\n                    lapack_complex_double* u2, lapack_int* ldu2,\n                    lapack_complex_double* v1t, lapack_int* ldv1t,\n                    lapack_complex_double* v2t, lapack_int* ldv2t,\n                    lapack_complex_double* work, lapack_int* lwork,\n                    double* rwork, lapack_int* lrwork,\n                    lapack_int* iwork , lapack_int *info );\n// LAPACK 3.4.0\nvoid LAPACK_sgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* nb, const float* v,\n                     lapack_int* ldv, const float* t, lapack_int* ldt, float* c,\n                     lapack_int* ldc, float* work, lapack_int *info );\nvoid LAPACK_dgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* nb, const double* v,\n                     lapack_int* ldv, const double* t, lapack_int* ldt,\n                     double* c, lapack_int* ldc, double* work,\n                     lapack_int *info );\nvoid LAPACK_cgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* nb,\n                     const lapack_complex_float* v, lapack_int* ldv,\n                     const lapack_complex_float* t, lapack_int* ldt,\n                     lapack_complex_float* c, lapack_int* ldc,\n                     lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* nb,\n                     const lapack_complex_double* v, lapack_int* ldv,\n                     const lapack_complex_double* t, lapack_int* ldt,\n                     lapack_complex_double* c, lapack_int* ldc,\n                     lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_sgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, float* a,\n                    lapack_int* lda, float* t, lapack_int* ldt, float* work,\n                    lapack_int *info );\nvoid LAPACK_dgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, double* a,\n                    lapack_int* lda, double* t, lapack_int* ldt, double* work,\n                    lapack_int *info );\nvoid LAPACK_cgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* t, lapack_int* ldt,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_zgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_sgeqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                     float* t, lapack_int* ldt, lapack_int *info );\nvoid LAPACK_dgeqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                     double* t, lapack_int* ldt, lapack_int *info );\nvoid LAPACK_cgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                     lapack_int* lda, lapack_complex_float* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_zgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                     lapack_int* lda, lapack_complex_double* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_sgeqrt3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                     float* t, lapack_int* ldt, lapack_int *info );\nvoid LAPACK_dgeqrt3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                     double* t, lapack_int* ldt, lapack_int *info );\nvoid LAPACK_cgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                     lapack_int* lda, lapack_complex_float* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_zgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                     lapack_int* lda, lapack_complex_double* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_stpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* l, lapack_int* nb,\n                     const float* v, lapack_int* ldv, const float* t,\n                     lapack_int* ldt, float* a, lapack_int* lda, float* b,\n                     lapack_int* ldb, float* work, lapack_int *info );\nvoid LAPACK_dtpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* l, lapack_int* nb,\n                     const double* v, lapack_int* ldv, const double* t,\n                     lapack_int* ldt, double* a, lapack_int* lda, double* b,\n                     lapack_int* ldb, double* work, lapack_int *info );\nvoid LAPACK_ctpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* l, lapack_int* nb,\n                     const lapack_complex_float* v, lapack_int* ldv,\n                     const lapack_complex_float* t, lapack_int* ldt,\n                     lapack_complex_float* a, lapack_int* lda,\n                     lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_ztpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,\n                     lapack_int* k, lapack_int* l, lapack_int* nb,\n                     const lapack_complex_double* v, lapack_int* ldv,\n                     const lapack_complex_double* t, lapack_int* ldt,\n                     lapack_complex_double* a, lapack_int* lda,\n                     lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_dtpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,\n                    double* a, lapack_int* lda, double* b, lapack_int* ldb,\n                    double* t, lapack_int* ldt, double* work,\n                    lapack_int *info );\nvoid LAPACK_ctpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* t, lapack_complex_float* b,\n                    lapack_int* ldb, lapack_int* ldt,\n                    lapack_complex_float* work, lapack_int *info );\nvoid LAPACK_ztpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* work, lapack_int *info );\nvoid LAPACK_stpqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,\n                     float* b, lapack_int* ldb, float* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_dtpqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,\n                     double* b, lapack_int* ldb, double* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_ctpqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a,\n                     lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,\n                     lapack_complex_float* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_ztpqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a,\n                     lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,\n                     lapack_complex_double* t, lapack_int* ldt,\n                     lapack_int *info );\nvoid LAPACK_stprfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,\n                    const float* v, lapack_int* ldv, const float* t,\n                    lapack_int* ldt, float* a, lapack_int* lda, float* b,\n                    lapack_int* ldb, const float* mywork,\n                    lapack_int* myldwork );\nvoid LAPACK_dtprfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,\n                    const double* v, lapack_int* ldv, const double* t,\n                    lapack_int* ldt, double* a, lapack_int* lda, double* b,\n                    lapack_int* ldb, const double* mywork,\n                    lapack_int* myldwork );\nvoid LAPACK_ctprfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,\n                    const lapack_complex_float* v, lapack_int* ldv,\n                    const lapack_complex_float* t, lapack_int* ldt,\n                    lapack_complex_float* a, lapack_int* lda,\n                    lapack_complex_float* b, lapack_int* ldb,\n                    const float* mywork, lapack_int* myldwork );\nvoid LAPACK_ztprfb( char* side, char* trans, char* direct, char* storev,\n                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,\n                    const lapack_complex_double* v, lapack_int* ldv,\n                    const lapack_complex_double* t, lapack_int* ldt,\n                    lapack_complex_double* a, lapack_int* lda,\n                    lapack_complex_double* b, lapack_int* ldb,\n                    const double* mywork, lapack_int* myldwork );\n// LAPACK 3.X.X\nvoid LAPACK_csyr( char* uplo, lapack_int* n, lapack_complex_float* alpha,\n                      const lapack_complex_float* x, lapack_int* incx,\n                      lapack_complex_float* a, lapack_int* lda );\nvoid LAPACK_zsyr( char* uplo, lapack_int* n, lapack_complex_double* alpha,\n                      const lapack_complex_double* x, lapack_int* incx,\n                      lapack_complex_double* a, lapack_int* lda );\n\n#ifdef __cplusplus\n}\n#endif /* __cplusplus */\n\n#endif /* _LAPACKE_H_ */\n\n#endif /* _MKL_LAPACKE_H_ */\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/misc/lapacke_mangling.h",
    "content": "#ifndef LAPACK_HEADER_INCLUDED\n#define LAPACK_HEADER_INCLUDED\n\n#ifndef LAPACK_GLOBAL\n#if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_)\n#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_\n#elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER)\n#define LAPACK_GLOBAL(lcname,UCNAME)  UCNAME\n#elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE)\n#define LAPACK_GLOBAL(lcname,UCNAME)  lcname\n#else\n#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_\n#endif\n#endif\n\n#endif\n\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/ArrayCwiseBinaryOps.h",
    "content": "\n/** \\returns an expression of the coefficient wise product of \\c *this and \\a other\n  *\n  * \\sa MatrixBase::cwiseProduct\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)\noperator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient wise quotient of \\c *this and \\a other\n  *\n  * \\sa MatrixBase::cwiseQuotient\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>\noperator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise min of \\c *this and \\a other\n  *\n  * Example: \\include Cwise_min.cpp\n  * Output: \\verbinclude Cwise_min.out\n  *\n  * \\sa max()\n  */\nEIGEN_MAKE_CWISE_BINARY_OP(min,min)\n\n/** \\returns an expression of the coefficient-wise min of \\c *this and scalar \\a other\n  *\n  * \\sa max()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,\n                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >\n#ifdef EIGEN_PARSED_BY_DOXYGEN\nmin\n#else\n(min)\n#endif\n(const Scalar &other) const\n{\n  return (min)(Derived::PlainObject::Constant(rows(), cols(), other));\n}\n\n/** \\returns an expression of the coefficient-wise max of \\c *this and \\a other\n  *\n  * Example: \\include Cwise_max.cpp\n  * Output: \\verbinclude Cwise_max.out\n  *\n  * \\sa min()\n  */\nEIGEN_MAKE_CWISE_BINARY_OP(max,max)\n\n/** \\returns an expression of the coefficient-wise max of \\c *this and scalar \\a other\n  *\n  * \\sa min()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,\n                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >\n#ifdef EIGEN_PARSED_BY_DOXYGEN\nmax\n#else\n(max)\n#endif\n(const Scalar &other) const\n{\n  return (max)(Derived::PlainObject::Constant(rows(), cols(), other));\n}\n\n/** \\returns an expression of the coefficient-wise absdiff of \\c *this and \\a other\n  *\n  * Example: \\include Cwise_absolute_difference.cpp\n  * Output: \\verbinclude Cwise_absolute_difference.out\n  *\n  * \\sa absolute_difference()\n  */\nEIGEN_MAKE_CWISE_BINARY_OP(absolute_difference,absolute_difference)\n\n/** \\returns an expression of the coefficient-wise absolute_difference of \\c *this and scalar \\a other\n  *\n  * \\sa absolute_difference()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_absolute_difference_op<Scalar,Scalar>, const Derived,\n                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >\n#ifdef EIGEN_PARSED_BY_DOXYGEN\nabsolute_difference\n#else\n(absolute_difference)\n#endif\n(const Scalar &other) const\n{\n  return (absolute_difference)(Derived::PlainObject::Constant(rows(), cols(), other));\n}\n\n/** \\returns an expression of the coefficient-wise power of \\c *this to the given array of \\a exponents.\n  *\n  * This function computes the coefficient-wise power.\n  *\n  * Example: \\include Cwise_array_power_array.cpp\n  * Output: \\verbinclude Cwise_array_power_array.out\n  */\nEIGEN_MAKE_CWISE_BINARY_OP(pow,pow)\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)\n#else\n/** \\returns an expression of the coefficients of \\c *this rasied to the constant power \\a exponent\n  *\n  * \\tparam T is the scalar type of \\a exponent. It must be compatible with the scalar type of the given expression.\n  *\n  * This function computes the coefficient-wise power. The function MatrixBase::pow() in the\n  * unsupported module MatrixFunctions computes the matrix power.\n  *\n  * Example: \\include Cwise_pow.cpp\n  * Output: \\verbinclude Cwise_pow.out\n  *\n  * \\sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log()\n  */\ntemplate<typename T>\nconst CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;\n#endif\n\n\n// TODO code generating macros could be moved to Macros.h and could include generation of documentation\n#define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \\\ntemplate<typename OtherDerived> \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \\\nOP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \\\n{ \\\n  return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \\\n}\\\ntypedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \\\ntypedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \\\nOP(const Scalar& s) const { \\\n  return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \\\n} \\\nEIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \\\nOP(const Scalar& s, const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& d) { \\\n  return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \\\n}\n\n#define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \\\ntemplate<typename OtherDerived> \\\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \\\nOP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \\\n{ \\\n  return CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \\\n} \\\nEIGEN_DEVICE_FUNC \\\ninline const RCmp ## RCOMPARATOR ## ReturnType \\\nOP(const Scalar& s) const { \\\n  return Derived::PlainObject::Constant(rows(), cols(), s).R_OP(*this); \\\n} \\\nfriend inline const Cmp ## RCOMPARATOR ## ReturnType \\\nOP(const Scalar& s, const Derived& d) { \\\n  return d.R_OP(Derived::PlainObject::Constant(d.rows(), d.cols(), s)); \\\n}\n\n\n\n/** \\returns an expression of the coefficient-wise \\< operator of *this and \\a other\n  *\n  * Example: \\include Cwise_less.cpp\n  * Output: \\verbinclude Cwise_less.out\n  *\n  * \\sa all(), any(), operator>(), operator<=()\n  */\nEIGEN_MAKE_CWISE_COMP_OP(operator<, LT)\n\n/** \\returns an expression of the coefficient-wise \\<= operator of *this and \\a other\n  *\n  * Example: \\include Cwise_less_equal.cpp\n  * Output: \\verbinclude Cwise_less_equal.out\n  *\n  * \\sa all(), any(), operator>=(), operator<()\n  */\nEIGEN_MAKE_CWISE_COMP_OP(operator<=, LE)\n\n/** \\returns an expression of the coefficient-wise \\> operator of *this and \\a other\n  *\n  * Example: \\include Cwise_greater.cpp\n  * Output: \\verbinclude Cwise_greater.out\n  *\n  * \\sa all(), any(), operator>=(), operator<()\n  */\nEIGEN_MAKE_CWISE_COMP_R_OP(operator>, operator<, LT)\n\n/** \\returns an expression of the coefficient-wise \\>= operator of *this and \\a other\n  *\n  * Example: \\include Cwise_greater_equal.cpp\n  * Output: \\verbinclude Cwise_greater_equal.out\n  *\n  * \\sa all(), any(), operator>(), operator<=()\n  */\nEIGEN_MAKE_CWISE_COMP_R_OP(operator>=, operator<=, LE)\n\n/** \\returns an expression of the coefficient-wise == operator of *this and \\a other\n  *\n  * \\warning this performs an exact comparison, which is generally a bad idea with floating-point types.\n  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is\n  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and\n  * isMuchSmallerThan().\n  *\n  * Example: \\include Cwise_equal_equal.cpp\n  * Output: \\verbinclude Cwise_equal_equal.out\n  *\n  * \\sa all(), any(), isApprox(), isMuchSmallerThan()\n  */\nEIGEN_MAKE_CWISE_COMP_OP(operator==, EQ)\n\n/** \\returns an expression of the coefficient-wise != operator of *this and \\a other\n  *\n  * \\warning this performs an exact comparison, which is generally a bad idea with floating-point types.\n  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is\n  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and\n  * isMuchSmallerThan().\n  *\n  * Example: \\include Cwise_not_equal.cpp\n  * Output: \\verbinclude Cwise_not_equal.out\n  *\n  * \\sa all(), any(), isApprox(), isMuchSmallerThan()\n  */\nEIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ)\n\n\n#undef EIGEN_MAKE_CWISE_COMP_OP\n#undef EIGEN_MAKE_CWISE_COMP_R_OP\n\n// scalar addition\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum)\n#else\n/** \\returns an expression of \\c *this with each coeff incremented by the constant \\a scalar\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  *\n  * Example: \\include Cwise_plus.cpp\n  * Output: \\verbinclude Cwise_plus.out\n  *\n  * \\sa operator+=(), operator-()\n  */\ntemplate<typename T>\nconst CwiseBinaryOp<internal::scalar_sum_op<Scalar,T>,Derived,Constant<T> > operator+(const T& scalar) const;\n/** \\returns an expression of \\a expr with each coeff incremented by the constant \\a scalar\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  */\ntemplate<typename T> friend\nconst CwiseBinaryOp<internal::scalar_sum_op<T,Scalar>,Constant<T>,Derived> operator+(const T& scalar, const StorageBaseType& expr);\n#endif\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference)\n#else\n/** \\returns an expression of \\c *this with each coeff decremented by the constant \\a scalar\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  *\n  * Example: \\include Cwise_minus.cpp\n  * Output: \\verbinclude Cwise_minus.out\n  *\n  * \\sa operator+=(), operator-()\n  */\ntemplate<typename T>\nconst CwiseBinaryOp<internal::scalar_difference_op<Scalar,T>,Derived,Constant<T> > operator-(const T& scalar) const;\n/** \\returns an expression of the constant matrix of value \\a scalar decremented by the coefficients of \\a expr\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  */\ntemplate<typename T> friend\nconst CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived> operator-(const T& scalar, const StorageBaseType& expr);\n#endif\n\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient)\n#else\n  /**\n    * \\brief Component-wise division of the scalar \\a s by array elements of \\a a.\n    *\n    * \\tparam Scalar is the scalar type of \\a x. It must be compatible with the scalar type of the given array expression (\\c Derived::Scalar).\n    */\n  template<typename T> friend\n  inline const CwiseBinaryOp<internal::scalar_quotient_op<T,Scalar>,Constant<T>,Derived>\n  operator/(const T& s,const StorageBaseType& a);\n#endif\n\n/** \\returns an expression of the coefficient-wise ^ operator of *this and \\a other\n *\n * \\warning this operator is for expression of bool only.\n *\n * Example: \\include Cwise_boolean_xor.cpp\n * Output: \\verbinclude Cwise_boolean_xor.out\n *\n * \\sa operator&&(), select()\n */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\ninline const CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>\noperator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),\n                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);\n  return CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>(derived(),other.derived());\n}\n\n// NOTE disabled until we agree on argument order\n#if 0\n/** \\cpp11 \\returns an expression of the coefficient-wise polygamma function.\n  *\n  * \\specialfunctions_module\n  *\n  * It returns the \\a n -th derivative of the digamma(psi) evaluated at \\c *this.\n  *\n  * \\warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x)\n  *\n  * \\sa Eigen::polygamma()\n  */\ntemplate<typename DerivedN>\ninline const CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>\npolygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedN> &n) const\n{\n  return CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>(n.derived(), this->derived());\n}\n#endif\n\n/** \\returns an expression of the coefficient-wise zeta function.\n  *\n  * \\specialfunctions_module\n  *\n  * It returns the Riemann zeta function of two arguments \\c *this and \\a q:\n  *\n  * \\param q is the shift, it must be > 0\n  *\n  * \\note *this is the exponent, it must be > 1.\n  * \\note This function supports only float and double scalar types. To support other scalar types, the user has\n  * to provide implementations of zeta(T,T) for any scalar type T to be supported.\n  *\n  * This method is an alias for zeta(*this,q);\n  *\n  * \\sa Eigen::zeta()\n  */\ntemplate<typename DerivedQ>\ninline const CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>\nzeta(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedQ> &q) const\n{\n  return CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>(this->derived(), q.derived());\n}\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/ArrayCwiseUnaryOps.h",
    "content": "\n\ntypedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType;\ntypedef CwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived> ArgReturnType;\ntypedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType;\ntypedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType;\ntypedef CwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> RsqrtReturnType;\ntypedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> SignReturnType;\ntypedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType;\ntypedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType;\n\ntypedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;\ntypedef CwiseUnaryOp<internal::scalar_expm1_op<Scalar>, const Derived> Expm1ReturnType;\ntypedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;\ntypedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;\ntypedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType;\ntypedef CwiseUnaryOp<internal::scalar_log2_op<Scalar>, const Derived> Log2ReturnType;\ntypedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType;\ntypedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType;\ntypedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType;\ntypedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType;\ntypedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;\ntypedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;\ntypedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;\ntypedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;\ntypedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;\n#if EIGEN_HAS_CXX11_MATH\ntypedef CwiseUnaryOp<internal::scalar_atanh_op<Scalar>, const Derived> AtanhReturnType;\ntypedef CwiseUnaryOp<internal::scalar_asinh_op<Scalar>, const Derived> AsinhReturnType;\ntypedef CwiseUnaryOp<internal::scalar_acosh_op<Scalar>, const Derived> AcoshReturnType;\n#endif\ntypedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;\ntypedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;\ntypedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;\ntypedef CwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> RoundReturnType;\ntypedef CwiseUnaryOp<internal::scalar_rint_op<Scalar>, const Derived> RintReturnType;\ntypedef CwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived> FloorReturnType;\ntypedef CwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived> CeilReturnType;\ntypedef CwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> IsNaNReturnType;\ntypedef CwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> IsInfReturnType;\ntypedef CwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> IsFiniteReturnType;\n\n/** \\returns an expression of the coefficient-wise absolute value of \\c *this\n  *\n  * Example: \\include Cwise_abs.cpp\n  * Output: \\verbinclude Cwise_abs.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_abs\">Math functions</a>, abs2()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const AbsReturnType\nabs() const\n{\n  return AbsReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise phase angle of \\c *this\n  *\n  * Example: \\include Cwise_arg.cpp\n  * Output: \\verbinclude Cwise_arg.out\n  *\n  * \\sa abs()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const ArgReturnType\narg() const\n{\n  return ArgReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise squared absolute value of \\c *this\n  *\n  * Example: \\include Cwise_abs2.cpp\n  * Output: \\verbinclude Cwise_abs2.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_abs2\">Math functions</a>, abs(), square()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const Abs2ReturnType\nabs2() const\n{\n  return Abs2ReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise exponential of *this.\n  *\n  * This function computes the coefficient-wise exponential. The function MatrixBase::exp() in the\n  * unsupported module MatrixFunctions computes the matrix exponential.\n  *\n  * Example: \\include Cwise_exp.cpp\n  * Output: \\verbinclude Cwise_exp.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_exp\">Math functions</a>, pow(), log(), sin(), cos()\n  */\nEIGEN_DEVICE_FUNC\ninline const ExpReturnType\nexp() const\n{\n  return ExpReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise exponential of *this minus 1.\n  *\n  * In exact arithmetic, \\c x.expm1() is equivalent to \\c x.exp() - 1,\n  * however, with finite precision, this function is much more accurate when \\c x is close to zero.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_expm1\">Math functions</a>, exp()\n  */\nEIGEN_DEVICE_FUNC\ninline const Expm1ReturnType\nexpm1() const\n{\n  return Expm1ReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise logarithm of *this.\n  *\n  * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the\n  * unsupported module MatrixFunctions computes the matrix logarithm.\n  *\n  * Example: \\include Cwise_log.cpp\n  * Output: \\verbinclude Cwise_log.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_log\">Math functions</a>, log()\n  */\nEIGEN_DEVICE_FUNC\ninline const LogReturnType\nlog() const\n{\n  return LogReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise logarithm of 1 plus \\c *this.\n  *\n  * In exact arithmetic, \\c x.log() is equivalent to \\c (x+1).log(),\n  * however, with finite precision, this function is much more accurate when \\c x is close to zero.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_log1p\">Math functions</a>, log()\n  */\nEIGEN_DEVICE_FUNC\ninline const Log1pReturnType\nlog1p() const\n{\n  return Log1pReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise base-10 logarithm of *this.\n  *\n  * This function computes the coefficient-wise base-10 logarithm.\n  *\n  * Example: \\include Cwise_log10.cpp\n  * Output: \\verbinclude Cwise_log10.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_log10\">Math functions</a>, log()\n  */\nEIGEN_DEVICE_FUNC\ninline const Log10ReturnType\nlog10() const\n{\n  return Log10ReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise base-2 logarithm of *this.\n  *\n  * This function computes the coefficient-wise base-2 logarithm.\n  *\n  */\nEIGEN_DEVICE_FUNC\ninline const Log2ReturnType\nlog2() const\n{\n  return Log2ReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise square root of *this.\n  *\n  * This function computes the coefficient-wise square root. The function MatrixBase::sqrt() in the\n  * unsupported module MatrixFunctions computes the matrix square root.\n  *\n  * Example: \\include Cwise_sqrt.cpp\n  * Output: \\verbinclude Cwise_sqrt.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_sqrt\">Math functions</a>, pow(), square()\n  */\nEIGEN_DEVICE_FUNC\ninline const SqrtReturnType\nsqrt() const\n{\n  return SqrtReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise inverse square root of *this.\n  *\n  * This function computes the coefficient-wise inverse square root.\n  *\n  * Example: \\include Cwise_sqrt.cpp\n  * Output: \\verbinclude Cwise_sqrt.out\n  *\n  * \\sa pow(), square()\n  */\nEIGEN_DEVICE_FUNC\ninline const RsqrtReturnType\nrsqrt() const\n{\n  return RsqrtReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise signum of *this.\n  *\n  * This function computes the coefficient-wise signum.\n  *\n  * Example: \\include Cwise_sign.cpp\n  * Output: \\verbinclude Cwise_sign.out\n  *\n  * \\sa pow(), square()\n  */\nEIGEN_DEVICE_FUNC\ninline const SignReturnType\nsign() const\n{\n  return SignReturnType(derived());\n}\n\n\n/** \\returns an expression of the coefficient-wise cosine of *this.\n  *\n  * This function computes the coefficient-wise cosine. The function MatrixBase::cos() in the\n  * unsupported module MatrixFunctions computes the matrix cosine.\n  *\n  * Example: \\include Cwise_cos.cpp\n  * Output: \\verbinclude Cwise_cos.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_cos\">Math functions</a>, sin(), acos()\n  */\nEIGEN_DEVICE_FUNC\ninline const CosReturnType\ncos() const\n{\n  return CosReturnType(derived());\n}\n\n\n/** \\returns an expression of the coefficient-wise sine of *this.\n  *\n  * This function computes the coefficient-wise sine. The function MatrixBase::sin() in the\n  * unsupported module MatrixFunctions computes the matrix sine.\n  *\n  * Example: \\include Cwise_sin.cpp\n  * Output: \\verbinclude Cwise_sin.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_sin\">Math functions</a>, cos(), asin()\n  */\nEIGEN_DEVICE_FUNC\ninline const SinReturnType\nsin() const\n{\n  return SinReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise tan of *this.\n  *\n  * Example: \\include Cwise_tan.cpp\n  * Output: \\verbinclude Cwise_tan.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_tan\">Math functions</a>, cos(), sin()\n  */\nEIGEN_DEVICE_FUNC\ninline const TanReturnType\ntan() const\n{\n  return TanReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise arc tan of *this.\n  *\n  * Example: \\include Cwise_atan.cpp\n  * Output: \\verbinclude Cwise_atan.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_atan\">Math functions</a>, tan(), asin(), acos()\n  */\nEIGEN_DEVICE_FUNC\ninline const AtanReturnType\natan() const\n{\n  return AtanReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise arc cosine of *this.\n  *\n  * Example: \\include Cwise_acos.cpp\n  * Output: \\verbinclude Cwise_acos.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_acos\">Math functions</a>, cos(), asin()\n  */\nEIGEN_DEVICE_FUNC\ninline const AcosReturnType\nacos() const\n{\n  return AcosReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise arc sine of *this.\n  *\n  * Example: \\include Cwise_asin.cpp\n  * Output: \\verbinclude Cwise_asin.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_asin\">Math functions</a>, sin(), acos()\n  */\nEIGEN_DEVICE_FUNC\ninline const AsinReturnType\nasin() const\n{\n  return AsinReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise hyperbolic tan of *this.\n  *\n  * Example: \\include Cwise_tanh.cpp\n  * Output: \\verbinclude Cwise_tanh.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_tanh\">Math functions</a>, tan(), sinh(), cosh()\n  */\nEIGEN_DEVICE_FUNC\ninline const TanhReturnType\ntanh() const\n{\n  return TanhReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise hyperbolic sin of *this.\n  *\n  * Example: \\include Cwise_sinh.cpp\n  * Output: \\verbinclude Cwise_sinh.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_sinh\">Math functions</a>, sin(), tanh(), cosh()\n  */\nEIGEN_DEVICE_FUNC\ninline const SinhReturnType\nsinh() const\n{\n  return SinhReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise hyperbolic cos of *this.\n  *\n  * Example: \\include Cwise_cosh.cpp\n  * Output: \\verbinclude Cwise_cosh.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_cosh\">Math functions</a>, tanh(), sinh(), cosh()\n  */\nEIGEN_DEVICE_FUNC\ninline const CoshReturnType\ncosh() const\n{\n  return CoshReturnType(derived());\n}\n\n#if EIGEN_HAS_CXX11_MATH\n/** \\returns an expression of the coefficient-wise inverse hyperbolic tan of *this.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_atanh\">Math functions</a>, atanh(), asinh(), acosh()\n  */\nEIGEN_DEVICE_FUNC\ninline const AtanhReturnType\natanh() const\n{\n  return AtanhReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise inverse hyperbolic sin of *this.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_asinh\">Math functions</a>, atanh(), asinh(), acosh()\n  */\nEIGEN_DEVICE_FUNC\ninline const AsinhReturnType\nasinh() const\n{\n  return AsinhReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise inverse hyperbolic cos of *this.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_acosh\">Math functions</a>, atanh(), asinh(), acosh()\n  */\nEIGEN_DEVICE_FUNC\ninline const AcoshReturnType\nacosh() const\n{\n  return AcoshReturnType(derived());\n}\n#endif\n\n/** \\returns an expression of the coefficient-wise logistic of *this.\n  */\nEIGEN_DEVICE_FUNC\ninline const LogisticReturnType\nlogistic() const\n{\n  return LogisticReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise inverse of *this.\n  *\n  * Example: \\include Cwise_inverse.cpp\n  * Output: \\verbinclude Cwise_inverse.out\n  *\n  * \\sa operator/(), operator*()\n  */\nEIGEN_DEVICE_FUNC\ninline const InverseReturnType\ninverse() const\n{\n  return InverseReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise square of *this.\n  *\n  * Example: \\include Cwise_square.cpp\n  * Output: \\verbinclude Cwise_square.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_squareE\">Math functions</a>, abs2(), cube(), pow()\n  */\nEIGEN_DEVICE_FUNC\ninline const SquareReturnType\nsquare() const\n{\n  return SquareReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise cube of *this.\n  *\n  * Example: \\include Cwise_cube.cpp\n  * Output: \\verbinclude Cwise_cube.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_cube\">Math functions</a>, square(), pow()\n  */\nEIGEN_DEVICE_FUNC\ninline const CubeReturnType\ncube() const\n{\n  return CubeReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise rint of *this.\n  *\n  * Example: \\include Cwise_rint.cpp\n  * Output: \\verbinclude Cwise_rint.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_rint\">Math functions</a>, ceil(), floor()\n  */\nEIGEN_DEVICE_FUNC\ninline const RintReturnType\nrint() const\n{\n  return RintReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise round of *this.\n  *\n  * Example: \\include Cwise_round.cpp\n  * Output: \\verbinclude Cwise_round.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_round\">Math functions</a>, ceil(), floor()\n  */\nEIGEN_DEVICE_FUNC\ninline const RoundReturnType\nround() const\n{\n  return RoundReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise floor of *this.\n  *\n  * Example: \\include Cwise_floor.cpp\n  * Output: \\verbinclude Cwise_floor.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_floor\">Math functions</a>, ceil(), round()\n  */\nEIGEN_DEVICE_FUNC\ninline const FloorReturnType\nfloor() const\n{\n  return FloorReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise ceil of *this.\n  *\n  * Example: \\include Cwise_ceil.cpp\n  * Output: \\verbinclude Cwise_ceil.out\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_ceil\">Math functions</a>, floor(), round()\n  */\nEIGEN_DEVICE_FUNC\ninline const CeilReturnType\nceil() const\n{\n  return CeilReturnType(derived());\n}\n\ntemplate<int N> struct ShiftRightXpr {\n  typedef CwiseUnaryOp<internal::scalar_shift_right_op<Scalar, N>, const Derived> Type;\n};\n\n/** \\returns an expression of \\c *this with the \\a Scalar type arithmetically\n  * shifted right by \\a N bit positions.\n  *\n  * The template parameter \\a N specifies the number of bit positions to shift.\n  * \n  * \\sa shiftLeft()\n  */\ntemplate<int N>\nEIGEN_DEVICE_FUNC\ntypename ShiftRightXpr<N>::Type\nshiftRight() const\n{\n  return typename ShiftRightXpr<N>::Type(derived());\n}\n\n\ntemplate<int N> struct ShiftLeftXpr {\n  typedef CwiseUnaryOp<internal::scalar_shift_left_op<Scalar, N>, const Derived> Type;\n};\n\n/** \\returns an expression of \\c *this with the \\a Scalar type logically\n  * shifted left by \\a N bit positions.\n  *\n  * The template parameter \\a N specifies the number of bit positions to shift.\n  *\n  * \\sa shiftRight()\n  */\ntemplate<int N>\nEIGEN_DEVICE_FUNC\ntypename ShiftLeftXpr<N>::Type\nshiftLeft() const\n{\n  return typename ShiftLeftXpr<N>::Type(derived());\n}\n\n/** \\returns an expression of the coefficient-wise isnan of *this.\n  *\n  * Example: \\include Cwise_isNaN.cpp\n  * Output: \\verbinclude Cwise_isNaN.out\n  *\n  * \\sa isfinite(), isinf()\n  */\nEIGEN_DEVICE_FUNC\ninline const IsNaNReturnType\nisNaN() const\n{\n  return IsNaNReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise isinf of *this.\n  *\n  * Example: \\include Cwise_isInf.cpp\n  * Output: \\verbinclude Cwise_isInf.out\n  *\n  * \\sa isnan(), isfinite()\n  */\nEIGEN_DEVICE_FUNC\ninline const IsInfReturnType\nisInf() const\n{\n  return IsInfReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise isfinite of *this.\n  *\n  * Example: \\include Cwise_isFinite.cpp\n  * Output: \\verbinclude Cwise_isFinite.out\n  *\n  * \\sa isnan(), isinf()\n  */\nEIGEN_DEVICE_FUNC\ninline const IsFiniteReturnType\nisFinite() const\n{\n  return IsFiniteReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise ! operator of *this\n  *\n  * \\warning this operator is for expression of bool only.\n  *\n  * Example: \\include Cwise_boolean_not.cpp\n  * Output: \\verbinclude Cwise_boolean_not.out\n  *\n  * \\sa operator!=()\n  */\nEIGEN_DEVICE_FUNC\ninline const BooleanNotReturnType\noperator!() const\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value),\n                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);\n  return BooleanNotReturnType(derived());\n}\n\n\n// --- SpecialFunctions module ---\n\ntypedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;\ntypedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;\ntypedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;\ntypedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;\ntypedef CwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived> NdtriReturnType;\n\n/** \\cpp11 \\returns an expression of the coefficient-wise ln(|gamma(*this)|).\n  *\n  * \\specialfunctions_module\n  *\n  * \\note This function supports only float and double scalar types in c++11 mode. To support other scalar types,\n  * or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar\n  * type T to be supported.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_lgamma\">Math functions</a>, digamma()\n  */\nEIGEN_DEVICE_FUNC\ninline const LgammaReturnType\nlgamma() const\n{\n  return LgammaReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise digamma (psi, derivative of lgamma).\n  *\n  * \\specialfunctions_module\n  *\n  * \\note This function supports only float and double scalar types. To support other scalar types,\n  * the user has to provide implementations of digamma(T) for any scalar\n  * type T to be supported.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_digamma\">Math functions</a>, Eigen::digamma(), Eigen::polygamma(), lgamma()\n  */\nEIGEN_DEVICE_FUNC\ninline const DigammaReturnType\ndigamma() const\n{\n  return DigammaReturnType(derived());\n}\n\n/** \\cpp11 \\returns an expression of the coefficient-wise Gauss error\n  * function of *this.\n  *\n  * \\specialfunctions_module\n  *\n  * \\note This function supports only float and double scalar types in c++11 mode. To support other scalar types,\n  * or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar\n  * type T to be supported.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_erf\">Math functions</a>, erfc()\n  */\nEIGEN_DEVICE_FUNC\ninline const ErfReturnType\nerf() const\n{\n  return ErfReturnType(derived());\n}\n\n/** \\cpp11 \\returns an expression of the coefficient-wise Complementary error\n  * function of *this.\n  *\n  * \\specialfunctions_module\n  *\n  * \\note This function supports only float and double scalar types in c++11 mode. To support other scalar types,\n  * or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar\n  * type T to be supported.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_erfc\">Math functions</a>, erf()\n  */\nEIGEN_DEVICE_FUNC\ninline const ErfcReturnType\nerfc() const\n{\n  return ErfcReturnType(derived());\n}\n\n/** \\returns an expression of the coefficient-wise inverse of the CDF of the Normal distribution function\n  * function of *this.\n  *\n  * \\specialfunctions_module\n  * \n  * In other words, considering `x = ndtri(y)`, it returns the argument, x, for which the area under the\n  * Gaussian probability density function (integrated from minus infinity to x) is equal to y.\n  *\n  * \\note This function supports only float and double scalar types. To support other scalar types,\n  * the user has to provide implementations of ndtri(T) for any scalar type T to be supported.\n  *\n  * \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_ndtri\">Math functions</a>\n  */\nEIGEN_DEVICE_FUNC\ninline const NdtriReturnType\nndtri() const\n{\n  return NdtriReturnType(derived());\n}\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/BlockMethods.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n\n/// \\internal expression type of a column */\ntypedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;\ntypedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr;\n/// \\internal expression type of a row */\ntypedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;\ntypedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr;\n/// \\internal expression type of a block of whole columns */\ntypedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;\ntypedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr;\n/// \\internal expression type of a block of whole rows */\ntypedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;\ntypedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr;\n/// \\internal expression type of a block of whole columns */\ntemplate<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };\ntemplate<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };\n/// \\internal expression type of a block of whole rows */\ntemplate<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };\ntemplate<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };\n/// \\internal expression of a block */\ntypedef Block<Derived> BlockXpr;\ntypedef const Block<const Derived> ConstBlockXpr;\n/// \\internal expression of a block of fixed sizes */\ntemplate<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };\ntemplate<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };\n\ntypedef VectorBlock<Derived> SegmentReturnType;\ntypedef const VectorBlock<const Derived> ConstSegmentReturnType;\ntemplate<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };\ntemplate<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };\n\n/// \\internal inner-vector\ntypedef Block<Derived,IsRowMajor?1:Dynamic,IsRowMajor?Dynamic:1,true>       InnerVectorReturnType;\ntypedef Block<const Derived,IsRowMajor?1:Dynamic,IsRowMajor?Dynamic:1,true> ConstInnerVectorReturnType;\n\n/// \\internal set of inner-vectors\ntypedef Block<Derived,Dynamic,Dynamic,true> InnerVectorsReturnType;\ntypedef Block<const Derived,Dynamic,Dynamic,true> ConstInnerVectorsReturnType;\n\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n/// \\returns an expression of a block in \\c *this with either dynamic or fixed sizes.\n///\n/// \\param  startRow  the first row in the block\n/// \\param  startCol  the first column in the block\n/// \\param  blockRows number of rows in the block, specified at either run-time or compile-time\n/// \\param  blockCols number of columns in the block, specified at either run-time or compile-time\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example using runtime (aka dynamic) sizes: \\include MatrixBase_block_int_int_int_int.cpp\n/// Output: \\verbinclude MatrixBase_block_int_int_int_int.out\n///\n/// \\newin{3.4}:\n///\n/// The number of rows \\a blockRows and columns \\a blockCols can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments. In the later case, \\c n plays the role of a runtime fallback value in case \\c N equals Eigen::Dynamic.\n/// Here is an example with a fixed number of rows \\c NRows and dynamic number of columns \\c cols:\n/// \\code\n/// mat.block(i,j,fix<NRows>,cols)\n/// \\endcode\n///\n/// This function thus fully covers the features offered by the following overloads block<NRows,NCols>(Index, Index),\n/// and block<NRows,NCols>(Index, Index, Index, Index) that are thus obsolete. Indeed, this generic version avoids\n/// redundancy, it preserves the argument order, and prevents the need to rely on the template keyword in templated code.\n///\n/// but with less redundancy and more consistency as it does not modify the argument order\n/// and seamlessly enable hybrid fixed/dynamic sizes.\n///\n/// \\note Even in the case that the returned expression has dynamic size, in the case\n/// when it is applied to a fixed-size matrix, it inherits a fixed maximal size,\n/// which means that evaluating it does not cause a dynamic memory allocation.\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa class Block, fix, fix<N>(int)\n///\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename FixedBlockXpr<...,...>::Type\n#endif\nblock(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols)\n{\n  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type(\n            derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols));\n}\n\n/// This is the const version of block(Index,Index,NRowsType,NColsType)\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstFixedBlockXpr<...,...>::Type\n#endif\nblock(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols) const\n{\n  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type(\n            derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols));\n}\n\n\n\n/// \\returns a expression of a top-right corner of \\c *this with either dynamic or fixed sizes.\n///\n/// \\param cRows the number of rows in the corner\n/// \\param cCols the number of columns in the corner\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example with dynamic sizes: \\include MatrixBase_topRightCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_topRightCorner_int_int.out\n///\n/// The number of rows \\a blockRows and columns \\a blockCols can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments. See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename FixedBlockXpr<...,...>::Type\n#endif\ntopRightCorner(NRowsType cRows, NColsType cCols)\n{\n  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// This is the const version of topRightCorner(NRowsType, NColsType).\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstFixedBlockXpr<...,...>::Type\n#endif\ntopRightCorner(NRowsType cRows, NColsType cCols) const\n{\n  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// \\returns an expression of a fixed-size top-right corner of \\c *this.\n///\n/// \\tparam CRows the number of rows in the corner\n/// \\tparam CCols the number of columns in the corner\n///\n/// Example: \\include MatrixBase_template_int_int_topRightCorner.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_topRightCorner.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa class Block, block<int,int>(Index,Index)\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type topRightCorner()\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);\n}\n\n/// This is the const version of topRightCorner<int, int>().\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);\n}\n\n/// \\returns an expression of a top-right corner of \\c *this.\n///\n/// \\tparam CRows number of rows in corner as specified at compile-time\n/// \\tparam CCols number of columns in corner as specified at compile-time\n/// \\param  cRows number of rows in corner as specified at run-time\n/// \\param  cCols number of columns in corner as specified at run-time\n///\n/// This function is mainly useful for corners where the number of rows is specified at compile-time\n/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time\n/// information should not contradict. In other words, \\a cRows should equal \\a CRows unless\n/// \\a CRows is \\a Dynamic, and the same for the number of columns.\n///\n/// Example: \\include MatrixBase_template_int_int_topRightCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);\n}\n\n/// This is the const version of topRightCorner<int, int>(Index, Index).\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);\n}\n\n\n\n/// \\returns an expression of a top-left corner of \\c *this  with either dynamic or fixed sizes.\n///\n/// \\param cRows the number of rows in the corner\n/// \\param cCols the number of columns in the corner\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example: \\include MatrixBase_topLeftCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_topLeftCorner_int_int.out\n///\n/// The number of rows \\a blockRows and columns \\a blockCols can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments. See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename FixedBlockXpr<...,...>::Type\n#endif\ntopLeftCorner(NRowsType cRows, NColsType cCols)\n{\n  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// This is the const version of topLeftCorner(Index, Index).\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstFixedBlockXpr<...,...>::Type\n#endif\ntopLeftCorner(NRowsType cRows, NColsType cCols) const\n{\n  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// \\returns an expression of a fixed-size top-left corner of \\c *this.\n///\n/// The template parameters CRows and CCols are the number of rows and columns in the corner.\n///\n/// Example: \\include MatrixBase_template_int_int_topLeftCorner.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_topLeftCorner.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);\n}\n\n/// This is the const version of topLeftCorner<int, int>().\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);\n}\n\n/// \\returns an expression of a top-left corner of \\c *this.\n///\n/// \\tparam CRows number of rows in corner as specified at compile-time\n/// \\tparam CCols number of columns in corner as specified at compile-time\n/// \\param  cRows number of rows in corner as specified at run-time\n/// \\param  cCols number of columns in corner as specified at run-time\n///\n/// This function is mainly useful for corners where the number of rows is specified at compile-time\n/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time\n/// information should not contradict. In other words, \\a cRows should equal \\a CRows unless\n/// \\a CRows is \\a Dynamic, and the same for the number of columns.\n///\n/// Example: \\include MatrixBase_template_int_int_topLeftCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);\n}\n\n/// This is the const version of topLeftCorner<int, int>(Index, Index).\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);\n}\n\n\n\n/// \\returns an expression of a bottom-right corner of \\c *this  with either dynamic or fixed sizes.\n///\n/// \\param cRows the number of rows in the corner\n/// \\param cCols the number of columns in the corner\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example: \\include MatrixBase_bottomRightCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_bottomRightCorner_int_int.out\n///\n/// The number of rows \\a blockRows and columns \\a blockCols can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments. See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename FixedBlockXpr<...,...>::Type\n#endif\nbottomRightCorner(NRowsType cRows, NColsType cCols)\n{\n  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols),\n                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// This is the const version of bottomRightCorner(NRowsType, NColsType).\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstFixedBlockXpr<...,...>::Type\n#endif\nbottomRightCorner(NRowsType cRows, NColsType cCols) const\n{\n  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols),\n                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// \\returns an expression of a fixed-size bottom-right corner of \\c *this.\n///\n/// The template parameters CRows and CCols are the number of rows and columns in the corner.\n///\n/// Example: \\include MatrixBase_template_int_int_bottomRightCorner.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_bottomRightCorner.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);\n}\n\n/// This is the const version of bottomRightCorner<int, int>().\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);\n}\n\n/// \\returns an expression of a bottom-right corner of \\c *this.\n///\n/// \\tparam CRows number of rows in corner as specified at compile-time\n/// \\tparam CCols number of columns in corner as specified at compile-time\n/// \\param  cRows number of rows in corner as specified at run-time\n/// \\param  cCols number of columns in corner as specified at run-time\n///\n/// This function is mainly useful for corners where the number of rows is specified at compile-time\n/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time\n/// information should not contradict. In other words, \\a cRows should equal \\a CRows unless\n/// \\a CRows is \\a Dynamic, and the same for the number of columns.\n///\n/// Example: \\include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);\n}\n\n/// This is the const version of bottomRightCorner<int, int>(Index, Index).\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);\n}\n\n\n\n/// \\returns an expression of a bottom-left corner of \\c *this  with either dynamic or fixed sizes.\n///\n/// \\param cRows the number of rows in the corner\n/// \\param cCols the number of columns in the corner\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example: \\include MatrixBase_bottomLeftCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_bottomLeftCorner_int_int.out\n///\n/// The number of rows \\a blockRows and columns \\a blockCols can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments. See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename FixedBlockXpr<...,...>::Type\n#endif\nbottomLeftCorner(NRowsType cRows, NColsType cCols)\n{\n  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), rows() - internal::get_runtime_value(cRows), 0,\n                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// This is the const version of bottomLeftCorner(NRowsType, NColsType).\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename ConstFixedBlockXpr<...,...>::Type\n#endif\nbottomLeftCorner(NRowsType cRows, NColsType cCols) const\n{\n  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), rows() - internal::get_runtime_value(cRows), 0,\n                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));\n}\n\n/// \\returns an expression of a fixed-size bottom-left corner of \\c *this.\n///\n/// The template parameters CRows and CCols are the number of rows and columns in the corner.\n///\n/// Example: \\include MatrixBase_template_int_int_bottomLeftCorner.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_bottomLeftCorner.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);\n}\n\n/// This is the const version of bottomLeftCorner<int, int>().\ntemplate<int CRows, int CCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);\n}\n\n/// \\returns an expression of a bottom-left corner of \\c *this.\n///\n/// \\tparam CRows number of rows in corner as specified at compile-time\n/// \\tparam CCols number of columns in corner as specified at compile-time\n/// \\param  cRows number of rows in corner as specified at run-time\n/// \\param  cCols number of columns in corner as specified at run-time\n///\n/// This function is mainly useful for corners where the number of rows is specified at compile-time\n/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time\n/// information should not contradict. In other words, \\a cRows should equal \\a CRows unless\n/// \\a CRows is \\a Dynamic, and the same for the number of columns.\n///\n/// Example: \\include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa class Block\n///\ntemplate<int CRows, int CCols>\nEIGEN_STRONG_INLINE\ntypename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)\n{\n  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);\n}\n\n/// This is the const version of bottomLeftCorner<int, int>(Index, Index).\ntemplate<int CRows, int CCols>\nEIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const\n{\n  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);\n}\n\n\n\n/// \\returns a block consisting of the top rows of \\c *this.\n///\n/// \\param n the number of rows in the block\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n///\n/// Example: \\include MatrixBase_topRows_int.cpp\n/// Output: \\verbinclude MatrixBase_topRows_int.out\n///\n/// The number of rows \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n#else\ntypename NRowsBlockXpr<...>::Type\n#endif\ntopRows(NRowsType n)\n{\n  return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n            (derived(), 0, 0, internal::get_runtime_value(n), cols());\n}\n\n/// This is the const version of topRows(NRowsType).\ntemplate<typename NRowsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n#else\nconst typename ConstNRowsBlockXpr<...>::Type\n#endif\ntopRows(NRowsType n) const\n{\n  return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n            (derived(), 0, 0, internal::get_runtime_value(n), cols());\n}\n\n/// \\returns a block consisting of the top rows of \\c *this.\n///\n/// \\tparam N the number of rows in the block as specified at compile-time\n/// \\param n the number of rows in the block as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_topRows.cpp\n/// Output: \\verbinclude MatrixBase_template_int_topRows.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename NRowsBlockXpr<N>::Type topRows(Index n = N)\n{\n  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());\n}\n\n/// This is the const version of topRows<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const\n{\n  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());\n}\n\n\n\n/// \\returns a block consisting of the bottom rows of \\c *this.\n///\n/// \\param n the number of rows in the block\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n///\n/// Example: \\include MatrixBase_bottomRows_int.cpp\n/// Output: \\verbinclude MatrixBase_bottomRows_int.out\n///\n/// The number of rows \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n#else\ntypename NRowsBlockXpr<...>::Type\n#endif\nbottomRows(NRowsType n)\n{\n  return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n            (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols());\n}\n\n/// This is the const version of bottomRows(NRowsType).\ntemplate<typename NRowsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n#else\nconst typename ConstNRowsBlockXpr<...>::Type\n#endif\nbottomRows(NRowsType n) const\n{\n  return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n            (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols());\n}\n\n/// \\returns a block consisting of the bottom rows of \\c *this.\n///\n/// \\tparam N the number of rows in the block as specified at compile-time\n/// \\param n the number of rows in the block as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_bottomRows.cpp\n/// Output: \\verbinclude MatrixBase_template_int_bottomRows.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename NRowsBlockXpr<N>::Type bottomRows(Index n = N)\n{\n  return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());\n}\n\n/// This is the const version of bottomRows<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const\n{\n  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());\n}\n\n\n\n/// \\returns a block consisting of a range of rows of \\c *this.\n///\n/// \\param startRow the index of the first row in the block\n/// \\param n the number of rows in the block\n/// \\tparam NRowsType the type of the value handling the number of rows in the block, typically Index.\n///\n/// Example: \\include DenseBase_middleRows_int.cpp\n/// Output: \\verbinclude DenseBase_middleRows_int.out\n///\n/// The number of rows \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NRowsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n#else\ntypename NRowsBlockXpr<...>::Type\n#endif\nmiddleRows(Index startRow, NRowsType n)\n{\n  return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n            (derived(), startRow, 0, internal::get_runtime_value(n), cols());\n}\n\n/// This is the const version of middleRows(Index,NRowsType).\ntemplate<typename NRowsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n#else\nconst typename ConstNRowsBlockXpr<...>::Type\n#endif\nmiddleRows(Index startRow, NRowsType n) const\n{\n  return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type\n            (derived(), startRow, 0, internal::get_runtime_value(n), cols());\n}\n\n/// \\returns a block consisting of a range of rows of \\c *this.\n///\n/// \\tparam N the number of rows in the block as specified at compile-time\n/// \\param startRow the index of the first row in the block\n/// \\param n the number of rows in the block as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include DenseBase_template_int_middleRows.cpp\n/// Output: \\verbinclude DenseBase_template_int_middleRows.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)\n{\n  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());\n}\n\n/// This is the const version of middleRows<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const\n{\n  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());\n}\n\n\n\n/// \\returns a block consisting of the left columns of \\c *this.\n///\n/// \\param n the number of columns in the block\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example: \\include MatrixBase_leftCols_int.cpp\n/// Output: \\verbinclude MatrixBase_leftCols_int.out\n///\n/// The number of columns \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename NColsBlockXpr<...>::Type\n#endif\nleftCols(NColsType n)\n{\n  return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, 0, rows(), internal::get_runtime_value(n));\n}\n\n/// This is the const version of leftCols(NColsType).\ntemplate<typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstNColsBlockXpr<...>::Type\n#endif\nleftCols(NColsType n) const\n{\n  return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, 0, rows(), internal::get_runtime_value(n));\n}\n\n/// \\returns a block consisting of the left columns of \\c *this.\n///\n/// \\tparam N the number of columns in the block as specified at compile-time\n/// \\param n the number of columns in the block as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_leftCols.cpp\n/// Output: \\verbinclude MatrixBase_template_int_leftCols.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename NColsBlockXpr<N>::Type leftCols(Index n = N)\n{\n  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);\n}\n\n/// This is the const version of leftCols<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const\n{\n  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);\n}\n\n\n\n/// \\returns a block consisting of the right columns of \\c *this.\n///\n/// \\param n the number of columns in the block\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example: \\include MatrixBase_rightCols_int.cpp\n/// Output: \\verbinclude MatrixBase_rightCols_int.out\n///\n/// The number of columns \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename NColsBlockXpr<...>::Type\n#endif\nrightCols(NColsType n)\n{\n  return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n));\n}\n\n/// This is the const version of rightCols(NColsType).\ntemplate<typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstNColsBlockXpr<...>::Type\n#endif\nrightCols(NColsType n) const\n{\n  return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n));\n}\n\n/// \\returns a block consisting of the right columns of \\c *this.\n///\n/// \\tparam N the number of columns in the block as specified at compile-time\n/// \\param n the number of columns in the block as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_rightCols.cpp\n/// Output: \\verbinclude MatrixBase_template_int_rightCols.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename NColsBlockXpr<N>::Type rightCols(Index n = N)\n{\n  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);\n}\n\n/// This is the const version of rightCols<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const\n{\n  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);\n}\n\n\n\n/// \\returns a block consisting of a range of columns of \\c *this.\n///\n/// \\param startCol the index of the first column in the block\n/// \\param numCols the number of columns in the block\n/// \\tparam NColsType the type of the value handling the number of columns in the block, typically Index.\n///\n/// Example: \\include DenseBase_middleCols_int.cpp\n/// Output: \\verbinclude DenseBase_middleCols_int.out\n///\n/// The number of columns \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n#else\ntypename NColsBlockXpr<...>::Type\n#endif\nmiddleCols(Index startCol, NColsType numCols)\n{\n  return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols));\n}\n\n/// This is the const version of middleCols(Index,NColsType).\ntemplate<typename NColsType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n#else\nconst typename ConstNColsBlockXpr<...>::Type\n#endif\nmiddleCols(Index startCol, NColsType numCols) const\n{\n  return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type\n            (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols));\n}\n\n/// \\returns a block consisting of a range of columns of \\c *this.\n///\n/// \\tparam N the number of columns in the block as specified at compile-time\n/// \\param startCol the index of the first column in the block\n/// \\param n the number of columns in the block as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include DenseBase_template_int_middleCols.cpp\n/// Output: \\verbinclude DenseBase_template_int_middleCols.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)\n{\n  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);\n}\n\n/// This is the const version of middleCols<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const\n{\n  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);\n}\n\n\n\n/// \\returns a fixed-size expression of a block of \\c *this.\n///\n/// The template parameters \\a NRows and \\a NCols are the number of\n/// rows and columns in the block.\n///\n/// \\param startRow the first row in the block\n/// \\param startCol the first column in the block\n///\n/// Example: \\include MatrixBase_block_int_int.cpp\n/// Output: \\verbinclude MatrixBase_block_int_int.out\n///\n/// \\note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic\n/// block(Index,Index,NRowsType,NColsType), here is the one-to-one equivalence:\n/// \\code\n/// mat.template block<NRows,NCols>(i,j)  <-->  mat.block(i,j,fix<NRows>,fix<NCols>)\n/// \\endcode\n///\n/// \\note since block is a templated member, the keyword template has to be used\n/// if the matrix type is also a template parameter: \\code m.template block<3,3>(1,1); \\endcode\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int NRows, int NCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)\n{\n  return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);\n}\n\n/// This is the const version of block<>(Index, Index). */\ntemplate<int NRows, int NCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const\n{\n  return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);\n}\n\n/// \\returns an expression of a block of \\c *this.\n///\n/// \\tparam NRows number of rows in block as specified at compile-time\n/// \\tparam NCols number of columns in block as specified at compile-time\n/// \\param  startRow  the first row in the block\n/// \\param  startCol  the first column in the block\n/// \\param  blockRows number of rows in block as specified at run-time\n/// \\param  blockCols number of columns in block as specified at run-time\n///\n/// This function is mainly useful for blocks where the number of rows is specified at compile-time\n/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time\n/// information should not contradict. In other words, \\a blockRows should equal \\a NRows unless\n/// \\a NRows is \\a Dynamic, and the same for the number of columns.\n///\n/// Example: \\include MatrixBase_template_int_int_block_int_int_int_int.cpp\n/// Output: \\verbinclude MatrixBase_template_int_int_block_int_int_int_int.out\n///\n/// \\note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic\n/// block(Index,Index,NRowsType,NColsType), here is the one-to-one complete equivalence:\n/// \\code\n/// mat.template block<NRows,NCols>(i,j,rows,cols)     <-->  mat.block(i,j,fix<NRows>(rows),fix<NCols>(cols))\n/// \\endcode\n/// If we known that, e.g., NRows==Dynamic and NCols!=Dynamic, then the equivalence becomes:\n/// \\code\n/// mat.template block<Dynamic,NCols>(i,j,rows,NCols)  <-->  mat.block(i,j,rows,fix<NCols>)\n/// \\endcode\n///\nEIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), class Block\n///\ntemplate<int NRows, int NCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,\n                                                  Index blockRows, Index blockCols)\n{\n  return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);\n}\n\n/// This is the const version of block<>(Index, Index, Index, Index).\ntemplate<int NRows, int NCols>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,\n                                                              Index blockRows, Index blockCols) const\n{\n  return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);\n}\n\n/// \\returns an expression of the \\a i-th column of \\c *this. Note that the numbering starts at 0.\n///\n/// Example: \\include MatrixBase_col.cpp\n/// Output: \\verbinclude MatrixBase_col.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)\n/**\n  * \\sa row(), class Block */\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nColXpr col(Index i)\n{\n  return ColXpr(derived(), i);\n}\n\n/// This is the const version of col().\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nConstColXpr col(Index i) const\n{\n  return ConstColXpr(derived(), i);\n}\n\n/// \\returns an expression of the \\a i-th row of \\c *this. Note that the numbering starts at 0.\n///\n/// Example: \\include MatrixBase_row.cpp\n/// Output: \\verbinclude MatrixBase_row.out\n///\nEIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)\n/**\n  * \\sa col(), class Block */\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nRowXpr row(Index i)\n{\n  return RowXpr(derived(), i);\n}\n\n/// This is the const version of row(). */\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nConstRowXpr row(Index i) const\n{\n  return ConstRowXpr(derived(), i);\n}\n\n/// \\returns an expression of a segment (i.e. a vector block) in \\c *this with either dynamic or fixed sizes.\n///\n/// \\only_for_vectors\n///\n/// \\param start the first coefficient in the segment\n/// \\param n the number of coefficients in the segment\n/// \\tparam NType the type of the value handling the number of coefficients in the segment, typically Index.\n///\n/// Example: \\include MatrixBase_segment_int_int.cpp\n/// Output: \\verbinclude MatrixBase_segment_int_int.out\n///\n/// The number of coefficients \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\n/// \\note Even in the case that the returned expression has dynamic size, in the case\n/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,\n/// which means that evaluating it does not cause a dynamic memory allocation.\n///\n/// \\sa block(Index,Index,NRowsType,NColsType), fix<N>, fix<N>(int), class Block\n///\ntemplate<typename NType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n#else\ntypename FixedSegmentReturnType<...>::Type\n#endif\nsegment(Index start, NType n)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n            (derived(), start, internal::get_runtime_value(n));\n}\n\n\n/// This is the const version of segment(Index,NType).\ntemplate<typename NType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n#else\nconst typename ConstFixedSegmentReturnType<...>::Type\n#endif\nsegment(Index start, NType n) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n            (derived(), start, internal::get_runtime_value(n));\n}\n\n/// \\returns an expression of the first coefficients of \\c *this with either dynamic or fixed sizes.\n///\n/// \\only_for_vectors\n///\n/// \\param n the number of coefficients in the segment\n/// \\tparam NType the type of the value handling the number of coefficients in the segment, typically Index.\n///\n/// Example: \\include MatrixBase_start_int.cpp\n/// Output: \\verbinclude MatrixBase_start_int.out\n///\n/// The number of coefficients \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\n/// \\note Even in the case that the returned expression has dynamic size, in the case\n/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,\n/// which means that evaluating it does not cause a dynamic memory allocation.\n///\n/// \\sa class Block, block(Index,Index)\n///\ntemplate<typename NType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n#else\ntypename FixedSegmentReturnType<...>::Type\n#endif\nhead(NType n)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n              (derived(), 0, internal::get_runtime_value(n));\n}\n\n/// This is the const version of head(NType).\ntemplate<typename NType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n#else\nconst typename ConstFixedSegmentReturnType<...>::Type\n#endif\nhead(NType n) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n            (derived(), 0, internal::get_runtime_value(n));\n}\n\n/// \\returns an expression of a last coefficients of \\c *this with either dynamic or fixed sizes.\n///\n/// \\only_for_vectors\n///\n/// \\param n the number of coefficients in the segment\n/// \\tparam NType the type of the value handling the number of coefficients in the segment, typically Index.\n///\n/// Example: \\include MatrixBase_end_int.cpp\n/// Output: \\verbinclude MatrixBase_end_int.out\n///\n/// The number of coefficients \\a n can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments.\n/// See \\link block(Index,Index,NRowsType,NColsType) block() \\endlink for the details.\n///\n/// \\note Even in the case that the returned expression has dynamic size, in the case\n/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,\n/// which means that evaluating it does not cause a dynamic memory allocation.\n///\n/// \\sa class Block, block(Index,Index)\n///\ntemplate<typename NType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\ntypename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n#else\ntypename FixedSegmentReturnType<...>::Type\n#endif\ntail(NType n)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n            (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n));\n}\n\n/// This is the const version of tail(Index).\ntemplate<typename NType>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nconst typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n#else\nconst typename ConstFixedSegmentReturnType<...>::Type\n#endif\ntail(NType n) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type\n            (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n));\n}\n\n/// \\returns a fixed-size expression of a segment (i.e. a vector block) in \\c *this\n///\n/// \\only_for_vectors\n///\n/// \\tparam N the number of coefficients in the segment as specified at compile-time\n/// \\param start the index of the first element in the segment\n/// \\param n the number of coefficients in the segment as specified at compile-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_segment.cpp\n/// Output: \\verbinclude MatrixBase_template_int_segment.out\n///\n/// \\sa segment(Index,NType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename FixedSegmentReturnType<N>::Type(derived(), start, n);\n}\n\n/// This is the const version of segment<int>(Index).\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);\n}\n\n/// \\returns a fixed-size expression of the first coefficients of \\c *this.\n///\n/// \\only_for_vectors\n///\n/// \\tparam N the number of coefficients in the segment as specified at compile-time\n/// \\param  n the number of coefficients in the segment as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_start.cpp\n/// Output: \\verbinclude MatrixBase_template_int_start.out\n///\n/// \\sa head(NType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedSegmentReturnType<N>::Type head(Index n = N)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);\n}\n\n/// This is the const version of head<int>().\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);\n}\n\n/// \\returns a fixed-size expression of the last coefficients of \\c *this.\n///\n/// \\only_for_vectors\n///\n/// \\tparam N the number of coefficients in the segment as specified at compile-time\n/// \\param  n the number of coefficients in the segment as specified at run-time\n///\n/// The compile-time and run-time information should not contradict. In other words,\n/// \\a n should equal \\a N unless \\a N is \\a Dynamic.\n///\n/// Example: \\include MatrixBase_template_int_end.cpp\n/// Output: \\verbinclude MatrixBase_template_int_end.out\n///\n/// \\sa tail(NType), class Block\n///\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename FixedSegmentReturnType<N>::Type tail(Index n = N)\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);\n}\n\n/// This is the const version of tail<int>.\ntemplate<int N>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);\n}\n\n/// \\returns the \\a outer -th column (resp. row) of the matrix \\c *this if \\c *this\n/// is col-major (resp. row-major).\n///\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nInnerVectorReturnType innerVector(Index outer)\n{ return InnerVectorReturnType(derived(), outer); }\n\n/// \\returns the \\a outer -th column (resp. row) of the matrix \\c *this if \\c *this\n/// is col-major (resp. row-major). Read-only.\n///\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst ConstInnerVectorReturnType innerVector(Index outer) const\n{ return ConstInnerVectorReturnType(derived(), outer); }\n\n/// \\returns the \\a outer -th column (resp. row) of the matrix \\c *this if \\c *this\n/// is col-major (resp. row-major).\n///\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nInnerVectorsReturnType\ninnerVectors(Index outerStart, Index outerSize)\n{\n  return Block<Derived,Dynamic,Dynamic,true>(derived(),\n                                             IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart,\n                                             IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize);\n\n}\n\n/// \\returns the \\a outer -th column (resp. row) of the matrix \\c *this if \\c *this\n/// is col-major (resp. row-major). Read-only.\n///\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\nconst ConstInnerVectorsReturnType\ninnerVectors(Index outerStart, Index outerSize) const\n{\n  return Block<const Derived,Dynamic,Dynamic,true>(derived(),\n                                                  IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart,\n                                                  IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize);\n\n}\n\n/** \\returns the i-th subvector (column or vector) according to the \\c Direction\n  * \\sa subVectors()\n  */\ntemplate<DirectionType Direction>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename internal::conditional<Direction==Vertical,ColXpr,RowXpr>::type\nsubVector(Index i)\n{\n  return typename internal::conditional<Direction==Vertical,ColXpr,RowXpr>::type(derived(),i);\n}\n\n/** This is the const version of subVector(Index) */\ntemplate<DirectionType Direction>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE\ntypename internal::conditional<Direction==Vertical,ConstColXpr,ConstRowXpr>::type\nsubVector(Index i) const\n{\n  return typename internal::conditional<Direction==Vertical,ConstColXpr,ConstRowXpr>::type(derived(),i);\n}\n\n/** \\returns the number of subvectors (rows or columns) in the direction \\c Direction\n  * \\sa subVector(Index)\n  */\ntemplate<DirectionType Direction>\nEIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR\nIndex subVectors() const\n{ return (Direction==Vertical)?cols():rows(); }\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/CommonCwiseBinaryOps.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n// This file is a base class plugin containing common coefficient wise functions.\n\n/** \\returns an expression of the difference of \\c *this and \\a other\n  *\n  * \\note If you want to substract a given scalar from all coefficients, see Cwise::operator-().\n  *\n  * \\sa class CwiseBinaryOp, operator-=()\n  */\nEIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)\n\n/** \\returns an expression of the sum of \\c *this and \\a other\n  *\n  * \\note If you want to add a given scalar to all coefficients, see Cwise::operator+().\n  *\n  * \\sa class CwiseBinaryOp, operator+=()\n  */\nEIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)\n\n/** \\returns an expression of a custom coefficient-wise operator \\a func of *this and \\a other\n  *\n  * The template parameter \\a CustomBinaryOp is the type of the functor\n  * of the custom operator (see class CwiseBinaryOp for an example)\n  *\n  * Here is an example illustrating the use of custom functors:\n  * \\include class_CwiseBinaryOp.cpp\n  * Output: \\verbinclude class_CwiseBinaryOp.out\n  *\n  * \\sa class CwiseBinaryOp, operator+(), operator-(), cwiseProduct()\n  */\ntemplate<typename CustomBinaryOp, typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>\nbinaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const\n{\n  return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);\n}\n\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)\n#else\n/** \\returns an expression of \\c *this scaled by the scalar factor \\a scalar\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  */\ntemplate<typename T>\nconst CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;\n/** \\returns an expression of \\a expr scaled by the scalar factor \\a scalar\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  */\ntemplate<typename T> friend\nconst CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);\n#endif\n\n\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\nEIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)\n#else\n/** \\returns an expression of \\c *this divided by the scalar value \\a scalar\n  *\n  * \\tparam T is the scalar type of \\a scalar. It must be compatible with the scalar type of the given expression.\n  */\ntemplate<typename T>\nconst CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;\n#endif\n\n/** \\returns an expression of the coefficient-wise boolean \\b and operator of \\c *this and \\a other\n  *\n  * \\warning this operator is for expression of bool only.\n  *\n  * Example: \\include Cwise_boolean_and.cpp\n  * Output: \\verbinclude Cwise_boolean_and.out\n  *\n  * \\sa operator||(), select()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\ninline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>\noperator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),\n                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);\n  return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise boolean \\b or operator of \\c *this and \\a other\n  *\n  * \\warning this operator is for expression of bool only.\n  *\n  * Example: \\include Cwise_boolean_or.cpp\n  * Output: \\verbinclude Cwise_boolean_or.out\n  *\n  * \\sa operator&&(), select()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\ninline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>\noperator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),\n                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);\n  return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());\n}\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/CommonCwiseUnaryOps.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n// This file is a base class plugin containing common coefficient wise functions.\n\n#ifndef EIGEN_PARSED_BY_DOXYGEN\n\n/** \\internal the return type of conjugate() */\ntypedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n                    const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,\n                    const Derived&\n                  >::type ConjugateReturnType;\n/** \\internal the return type of real() const */\ntypedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n                    const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,\n                    const Derived&\n                  >::type RealReturnType;\n/** \\internal the return type of real() */\ntypedef typename internal::conditional<NumTraits<Scalar>::IsComplex,\n                    CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,\n                    Derived&\n                  >::type NonConstRealReturnType;\n/** \\internal the return type of imag() const */\ntypedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;\n/** \\internal the return type of imag() */\ntypedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;\n\ntypedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;\n\n#endif // not EIGEN_PARSED_BY_DOXYGEN\n\n/// \\returns an expression of the opposite of \\c *this\n///\nEIGEN_DOC_UNARY_ADDONS(operator-,opposite)\n///\nEIGEN_DEVICE_FUNC\ninline const NegativeReturnType\noperator-() const { return NegativeReturnType(derived()); }\n\n\ntemplate<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };\n\n/// \\returns an expression of \\c *this with the \\a Scalar type casted to\n/// \\a NewScalar.\n///\n/// The template parameter \\a NewScalar is the type we are casting the scalars to.\n///\nEIGEN_DOC_UNARY_ADDONS(cast,conversion function)\n///\n/// \\sa class CwiseUnaryOp\n///\ntemplate<typename NewType>\nEIGEN_DEVICE_FUNC\ntypename CastXpr<NewType>::Type\ncast() const\n{\n  return typename CastXpr<NewType>::Type(derived());\n}\n\n/// \\returns an expression of the complex conjugate of \\c *this.\n///\nEIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)\n///\n/// \\sa <a href=\"group__CoeffwiseMathFunctions.html#cwisetable_conj\">Math functions</a>, MatrixBase::adjoint()\nEIGEN_DEVICE_FUNC\ninline ConjugateReturnType\nconjugate() const\n{\n  return ConjugateReturnType(derived());\n}\n\n/// \\returns an expression of the complex conjugate of \\c *this if Cond==true, returns derived() otherwise.\n///\nEIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)\n///\n/// \\sa conjugate()\ntemplate<bool Cond>\nEIGEN_DEVICE_FUNC\ninline typename internal::conditional<Cond,ConjugateReturnType,const Derived&>::type\nconjugateIf() const\n{\n  typedef typename internal::conditional<Cond,ConjugateReturnType,const Derived&>::type ReturnType;\n  return ReturnType(derived());\n}\n\n/// \\returns a read-only expression of the real part of \\c *this.\n///\nEIGEN_DOC_UNARY_ADDONS(real,real part function)\n///\n/// \\sa imag()\nEIGEN_DEVICE_FUNC\ninline RealReturnType\nreal() const { return RealReturnType(derived()); }\n\n/// \\returns an read-only expression of the imaginary part of \\c *this.\n///\nEIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)\n///\n/// \\sa real()\nEIGEN_DEVICE_FUNC\ninline const ImagReturnType\nimag() const { return ImagReturnType(derived()); }\n\n/// \\brief Apply a unary operator coefficient-wise\n/// \\param[in]  func  Functor implementing the unary operator\n/// \\tparam  CustomUnaryOp Type of \\a func\n/// \\returns An expression of a custom coefficient-wise unary operator \\a func of *this\n///\n/// The function \\c ptr_fun() from the C++ standard library can be used to make functors out of normal functions.\n///\n/// Example:\n/// \\include class_CwiseUnaryOp_ptrfun.cpp\n/// Output: \\verbinclude class_CwiseUnaryOp_ptrfun.out\n///\n/// Genuine functors allow for more possibilities, for instance it may contain a state.\n///\n/// Example:\n/// \\include class_CwiseUnaryOp.cpp\n/// Output: \\verbinclude class_CwiseUnaryOp.out\n///\nEIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)\n///\n/// \\sa unaryViewExpr, binaryExpr, class CwiseUnaryOp\n///\ntemplate<typename CustomUnaryOp>\nEIGEN_DEVICE_FUNC\ninline const CwiseUnaryOp<CustomUnaryOp, const Derived>\nunaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const\n{\n  return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);\n}\n\n/// \\returns an expression of a custom coefficient-wise unary operator \\a func of *this\n///\n/// The template parameter \\a CustomUnaryOp is the type of the functor\n/// of the custom unary operator.\n///\n/// Example:\n/// \\include class_CwiseUnaryOp.cpp\n/// Output: \\verbinclude class_CwiseUnaryOp.out\n///\nEIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)\n///\n/// \\sa unaryExpr, binaryExpr class CwiseUnaryOp\n///\ntemplate<typename CustomViewOp>\nEIGEN_DEVICE_FUNC\ninline const CwiseUnaryView<CustomViewOp, const Derived>\nunaryViewExpr(const CustomViewOp& func = CustomViewOp()) const\n{\n  return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);\n}\n\n/// \\returns a non const expression of the real part of \\c *this.\n///\nEIGEN_DOC_UNARY_ADDONS(real,real part function)\n///\n/// \\sa imag()\nEIGEN_DEVICE_FUNC\ninline NonConstRealReturnType\nreal() { return NonConstRealReturnType(derived()); }\n\n/// \\returns a non const expression of the imaginary part of \\c *this.\n///\nEIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)\n///\n/// \\sa real()\nEIGEN_DEVICE_FUNC\ninline NonConstImagReturnType\nimag() { return NonConstImagReturnType(derived()); }\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/IndexedViewMethods.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n#if !defined(EIGEN_PARSED_BY_DOXYGEN)\n\n// This file is automatically included twice to generate const and non-const versions\n\n#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS\n#define EIGEN_INDEXED_VIEW_METHOD_CONST const\n#define EIGEN_INDEXED_VIEW_METHOD_TYPE  ConstIndexedViewType\n#else\n#define EIGEN_INDEXED_VIEW_METHOD_CONST\n#define EIGEN_INDEXED_VIEW_METHOD_TYPE IndexedViewType\n#endif\n\n#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS\nprotected:\n\n// define some aliases to ease readability\n\ntemplate<typename Indices>\nstruct IvcRowType : public internal::IndexedViewCompatibleType<Indices,RowsAtCompileTime> {};\n\ntemplate<typename Indices>\nstruct IvcColType : public internal::IndexedViewCompatibleType<Indices,ColsAtCompileTime> {};\n\ntemplate<typename Indices>\nstruct IvcType : public internal::IndexedViewCompatibleType<Indices,SizeAtCompileTime> {};\n\ntypedef typename internal::IndexedViewCompatibleType<Index,1>::type IvcIndex;\n\ntemplate<typename Indices>\ntypename IvcRowType<Indices>::type\nivcRow(const Indices& indices) const {\n  return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,RowsAtCompileTime>(derived().rows()),Specialized);\n}\n\ntemplate<typename Indices>\ntypename IvcColType<Indices>::type\nivcCol(const Indices& indices) const {\n  return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,ColsAtCompileTime>(derived().cols()),Specialized);\n}\n\ntemplate<typename Indices>\ntypename IvcColType<Indices>::type\nivcSize(const Indices& indices) const {\n  return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,SizeAtCompileTime>(derived().size()),Specialized);\n}\n\npublic:\n\n#endif\n\ntemplate<typename RowIndices, typename ColIndices>\nstruct EIGEN_INDEXED_VIEW_METHOD_TYPE {\n  typedef IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,\n                      typename IvcRowType<RowIndices>::type,\n                      typename IvcColType<ColIndices>::type> type;\n};\n\n// This is the generic version\n\ntemplate<typename RowIndices, typename ColIndices>\ntypename internal::enable_if<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value\n  && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsIndexedView,\n  typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type >::type\noperator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  return typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type\n            (derived(), ivcRow(rowIndices), ivcCol(colIndices));\n}\n\n// The following overload returns a Block<> object\n\ntemplate<typename RowIndices, typename ColIndices>\ntypename internal::enable_if<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value\n  && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsBlock,\n  typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType>::type\noperator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  typedef typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType BlockType;\n  typename IvcRowType<RowIndices>::type actualRowIndices = ivcRow(rowIndices);\n  typename IvcColType<ColIndices>::type actualColIndices = ivcCol(colIndices);\n  return BlockType(derived(),\n                   internal::first(actualRowIndices),\n                   internal::first(actualColIndices),\n                   internal::size(actualRowIndices),\n                   internal::size(actualColIndices));\n}\n\n// The following overload returns a Scalar\n\ntemplate<typename RowIndices, typename ColIndices>\ntypename internal::enable_if<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value\n  && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsScalar,\n  CoeffReturnType >::type\noperator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  return Base::operator()(internal::eval_expr_given_size(rowIndices,rows()),internal::eval_expr_given_size(colIndices,cols()));\n}\n\n#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE\n\n// The following three overloads are needed to handle raw Index[N] arrays.\n\ntemplate<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndices>\nIndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type>\noperator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type>\n                    (derived(), rowIndices, ivcCol(colIndices));\n}\n\ntemplate<typename RowIndices, typename ColIndicesT, std::size_t ColIndicesN>\nIndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type, const ColIndicesT (&)[ColIndicesN]>\noperator()(const RowIndices& rowIndices, const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type,const ColIndicesT (&)[ColIndicesN]>\n                    (derived(), ivcRow(rowIndices), colIndices);\n}\n\ntemplate<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndicesT, std::size_t ColIndicesN>\nIndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN], const ColIndicesT (&)[ColIndicesN]>\noperator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],const ColIndicesT (&)[ColIndicesN]>\n                    (derived(), rowIndices, colIndices);\n}\n\n#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE\n\n// Overloads for 1D vectors/arrays\n\ntemplate<typename Indices>\ntypename internal::enable_if<\n  IsRowMajor && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_valid_index_type<Indices>::value)),\n  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> >::type\noperator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type>\n            (derived(), IvcIndex(0), ivcCol(indices));\n}\n\ntemplate<typename Indices>\ntypename internal::enable_if<\n  (!IsRowMajor) && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_valid_index_type<Indices>::value)),\n  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> >::type\noperator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex>\n            (derived(), ivcRow(indices), IvcIndex(0));\n}\n\ntemplate<typename Indices>\ntypename internal::enable_if<\n  (internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1) && (!internal::is_valid_index_type<Indices>::value) && (!symbolic::is_symbolic<Indices>::value),\n  VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> >::type\noperator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  typename IvcType<Indices>::type actualIndices = ivcSize(indices);\n  return VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value>\n            (derived(), internal::first(actualIndices), internal::size(actualIndices));\n}\n\ntemplate<typename IndexType>\ntypename internal::enable_if<symbolic::is_symbolic<IndexType>::value, CoeffReturnType >::type\noperator()(const IndexType& id) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  return Base::operator()(internal::eval_expr_given_size(id,size()));\n}\n\n#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE\n\ntemplate<typename IndicesT, std::size_t IndicesN>\ntypename internal::enable_if<IsRowMajor,\n  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> >::type\noperator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]>\n            (derived(), IvcIndex(0), indices);\n}\n\ntemplate<typename IndicesT, std::size_t IndicesN>\ntypename internal::enable_if<!IsRowMajor,\n  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> >::type\noperator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST\n{\n  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)\n  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex>\n            (derived(), indices, IvcIndex(0));\n}\n\n#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE\n\n#undef EIGEN_INDEXED_VIEW_METHOD_CONST\n#undef EIGEN_INDEXED_VIEW_METHOD_TYPE\n\n#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS\n#define EIGEN_INDEXED_VIEW_METHOD_2ND_PASS\n#include \"IndexedViewMethods.h\"\n#undef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS\n#endif\n\n#else // EIGEN_PARSED_BY_DOXYGEN\n\n/**\n  * \\returns a generic submatrix view defined by the rows and columns indexed \\a rowIndices and \\a colIndices respectively.\n  *\n  * Each parameter must either be:\n  *  - An integer indexing a single row or column\n  *  - Eigen::all indexing the full set of respective rows or columns in increasing order\n  *  - An ArithmeticSequence as returned by the Eigen::seq and Eigen::seqN functions\n  *  - Any %Eigen's vector/array of integers or expressions\n  *  - Plain C arrays: \\c int[N]\n  *  - And more generally any type exposing the following two member functions:\n  * \\code\n  * <integral type> operator[](<integral type>) const;\n  * <integral type> size() const;\n  * \\endcode\n  * where \\c <integral \\c type>  stands for any integer type compatible with Eigen::Index (i.e. \\c std::ptrdiff_t).\n  *\n  * The last statement implies compatibility with \\c std::vector, \\c std::valarray, \\c std::array, many of the Range-v3's ranges, etc.\n  *\n  * If the submatrix can be represented using a starting position \\c (i,j) and positive sizes \\c (rows,columns), then this\n  * method will returns a Block object after extraction of the relevant information from the passed arguments. This is the case\n  * when all arguments are either:\n  *  - An integer\n  *  - Eigen::all\n  *  - An ArithmeticSequence with compile-time increment strictly equal to 1, as returned by Eigen::seq(a,b), and Eigen::seqN(a,N).\n  *\n  * Otherwise a more general IndexedView<Derived,RowIndices',ColIndices'> object will be returned, after conversion of the inputs\n  * to more suitable types \\c RowIndices' and \\c ColIndices'.\n  *\n  * For 1D vectors and arrays, you better use the operator()(const Indices&) overload, which behave the same way but taking a single parameter.\n  *\n  * See also this <a href=\"https://stackoverflow.com/questions/46110917/eigen-replicate-items-along-one-dimension-without-useless-allocations\">question</a> and its answer for an example of how to duplicate coefficients.\n  *\n  * \\sa operator()(const Indices&), class Block, class IndexedView, DenseBase::block(Index,Index,Index,Index)\n  */\ntemplate<typename RowIndices, typename ColIndices>\nIndexedView_or_Block\noperator()(const RowIndices& rowIndices, const ColIndices& colIndices);\n\n/** This is an overload of operator()(const RowIndices&, const ColIndices&) for 1D vectors or arrays\n  *\n  * \\only_for_vectors\n  */\ntemplate<typename Indices>\nIndexedView_or_VectorBlock\noperator()(const Indices& indices);\n\n#endif  // EIGEN_PARSED_BY_DOXYGEN\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/MatrixCwiseBinaryOps.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n// This file is a base class plugin containing matrix specifics coefficient wise functions.\n\n/** \\returns an expression of the Schur product (coefficient wise product) of *this and \\a other\n  *\n  * Example: \\include MatrixBase_cwiseProduct.cpp\n  * Output: \\verbinclude MatrixBase_cwiseProduct.out\n  *\n  * \\sa class CwiseBinaryOp, cwiseAbs2\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)\ncwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise == operator of *this and \\a other\n  *\n  * \\warning this performs an exact comparison, which is generally a bad idea with floating-point types.\n  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is\n  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and\n  * isMuchSmallerThan().\n  *\n  * Example: \\include MatrixBase_cwiseEqual.cpp\n  * Output: \\verbinclude MatrixBase_cwiseEqual.out\n  *\n  * \\sa cwiseNotEqual(), isApprox(), isMuchSmallerThan()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\ninline const CwiseBinaryOp<numext::equal_to<Scalar>, const Derived, const OtherDerived>\ncwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return CwiseBinaryOp<numext::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise != operator of *this and \\a other\n  *\n  * \\warning this performs an exact comparison, which is generally a bad idea with floating-point types.\n  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is\n  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and\n  * isMuchSmallerThan().\n  *\n  * Example: \\include MatrixBase_cwiseNotEqual.cpp\n  * Output: \\verbinclude MatrixBase_cwiseNotEqual.out\n  *\n  * \\sa cwiseEqual(), isApprox(), isMuchSmallerThan()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\ninline const CwiseBinaryOp<numext::not_equal_to<Scalar>, const Derived, const OtherDerived>\ncwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return CwiseBinaryOp<numext::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise min of *this and \\a other\n  *\n  * Example: \\include MatrixBase_cwiseMin.cpp\n  * Output: \\verbinclude MatrixBase_cwiseMin.out\n  *\n  * \\sa class CwiseBinaryOp, max()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>\ncwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise min of *this and scalar \\a other\n  *\n  * \\sa class CwiseBinaryOp, min()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>\ncwiseMin(const Scalar &other) const\n{\n  return cwiseMin(Derived::Constant(rows(), cols(), other));\n}\n\n/** \\returns an expression of the coefficient-wise max of *this and \\a other\n  *\n  * Example: \\include MatrixBase_cwiseMax.cpp\n  * Output: \\verbinclude MatrixBase_cwiseMax.out\n  *\n  * \\sa class CwiseBinaryOp, min()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>\ncwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());\n}\n\n/** \\returns an expression of the coefficient-wise max of *this and scalar \\a other\n  *\n  * \\sa class CwiseBinaryOp, min()\n  */\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>\ncwiseMax(const Scalar &other) const\n{\n  return cwiseMax(Derived::Constant(rows(), cols(), other));\n}\n\n\n/** \\returns an expression of the coefficient-wise quotient of *this and \\a other\n  *\n  * Example: \\include MatrixBase_cwiseQuotient.cpp\n  * Output: \\verbinclude MatrixBase_cwiseQuotient.out\n  *\n  * \\sa class CwiseBinaryOp, cwiseProduct(), cwiseInverse()\n  */\ntemplate<typename OtherDerived>\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>\ncwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const\n{\n  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());\n}\n\ntypedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;\n\n/** \\returns an expression of the coefficient-wise == operator of \\c *this and a scalar \\a s\n  *\n  * \\warning this performs an exact comparison, which is generally a bad idea with floating-point types.\n  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is\n  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and\n  * isMuchSmallerThan().\n  *\n  * \\sa cwiseEqual(const MatrixBase<OtherDerived> &) const\n  */\nEIGEN_DEVICE_FUNC\ninline const CwiseScalarEqualReturnType\ncwiseEqual(const Scalar& s) const\n{\n  return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());\n}\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/MatrixCwiseUnaryOps.h",
    "content": "// This file is part of Eigen, a lightweight C++ template library\n// for linear algebra.\n//\n// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>\n// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>\n//\n// This Source Code Form is subject to the terms of the Mozilla\n// Public License v. 2.0. If a copy of the MPL was not distributed\n// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n// This file is included into the body of the base classes supporting matrix specific coefficient-wise functions.\n// This include MatrixBase and SparseMatrixBase.\n\n\ntypedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;\ntypedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;\ntypedef CwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived> CwiseArgReturnType;\ntypedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;\ntypedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;\ntypedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;\n\n/// \\returns an expression of the coefficient-wise absolute value of \\c *this\n///\n/// Example: \\include MatrixBase_cwiseAbs.cpp\n/// Output: \\verbinclude MatrixBase_cwiseAbs.out\n///\nEIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)\n///\n/// \\sa cwiseAbs2()\n///\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseAbsReturnType\ncwiseAbs() const { return CwiseAbsReturnType(derived()); }\n\n/// \\returns an expression of the coefficient-wise squared absolute value of \\c *this\n///\n/// Example: \\include MatrixBase_cwiseAbs2.cpp\n/// Output: \\verbinclude MatrixBase_cwiseAbs2.out\n///\nEIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)\n///\n/// \\sa cwiseAbs()\n///\nEIGEN_DEVICE_FUNC\nEIGEN_STRONG_INLINE const CwiseAbs2ReturnType\ncwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }\n\n/// \\returns an expression of the coefficient-wise square root of *this.\n///\n/// Example: \\include MatrixBase_cwiseSqrt.cpp\n/// Output: \\verbinclude MatrixBase_cwiseSqrt.out\n///\nEIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)\n///\n/// \\sa cwisePow(), cwiseSquare()\n///\nEIGEN_DEVICE_FUNC\ninline const CwiseSqrtReturnType\ncwiseSqrt() const { return CwiseSqrtReturnType(derived()); }\n\n/// \\returns an expression of the coefficient-wise signum of *this.\n///\n/// Example: \\include MatrixBase_cwiseSign.cpp\n/// Output: \\verbinclude MatrixBase_cwiseSign.out\n///\nEIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)\n///\nEIGEN_DEVICE_FUNC\ninline const CwiseSignReturnType\ncwiseSign() const { return CwiseSignReturnType(derived()); }\n\n\n/// \\returns an expression of the coefficient-wise inverse of *this.\n///\n/// Example: \\include MatrixBase_cwiseInverse.cpp\n/// Output: \\verbinclude MatrixBase_cwiseInverse.out\n///\nEIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)\n///\n/// \\sa cwiseProduct()\n///\nEIGEN_DEVICE_FUNC\ninline const CwiseInverseReturnType\ncwiseInverse() const { return CwiseInverseReturnType(derived()); }\n\n/// \\returns an expression of the coefficient-wise phase angle of \\c *this\n///\n/// Example: \\include MatrixBase_cwiseArg.cpp\n/// Output: \\verbinclude MatrixBase_cwiseArg.out\n///\nEIGEN_DOC_UNARY_ADDONS(cwiseArg,arg)\n\nEIGEN_DEVICE_FUNC\ninline const CwiseArgReturnType\ncwiseArg() const { return CwiseArgReturnType(derived()); }\n"
  },
  {
    "path": "external_libs/eigen-3.4.0/Eigen/src/plugins/ReshapedMethods.h",
    "content": "\n#ifdef EIGEN_PARSED_BY_DOXYGEN\n\n/// \\returns an expression of \\c *this with reshaped sizes.\n///\n/// \\param nRows the number of rows in the reshaped expression, specified at either run-time or compile-time, or AutoSize\n/// \\param nCols the number of columns in the reshaped expression, specified at either run-time or compile-time, or AutoSize\n/// \\tparam Order specifies whether the coefficients should be processed in column-major-order (ColMajor), in row-major-order (RowMajor),\n///               or follows the \\em natural order of the nested expression (AutoOrder). The default is ColMajor.\n/// \\tparam NRowsType the type of the value handling the number of rows, typically Index.\n/// \\tparam NColsType the type of the value handling the number of columns, typically Index.\n///\n/// Dynamic size example: \\include MatrixBase_reshaped_int_int.cpp\n/// Output: \\verbinclude MatrixBase_reshaped_int_int.out\n///\n/// The number of rows \\a nRows and columns \\a nCols can also be specified at compile-time by passing Eigen::fix<N>,\n/// or Eigen::fix<N>(n) as arguments. In the later case, \\c n plays the role of a runtime fallback value in case \\c N equals Eigen::Dynamic.\n/// Here is an example with a fixed number of rows and columns:\n/// \\include MatrixBase_reshaped_fixed.cpp\n/// Output: \\verbinclude MatrixBase_reshaped_fixed.out\n///\n/// Finally, one of the sizes parameter can be automatically deduced from the other one by passing AutoSize as in the following example:\n/// \\include MatrixBase_reshaped_auto.cpp\n/// Output: \\verbinclude MatrixBase_reshaped_auto.out\n/// AutoSize does preserve compile-time sizes when possible, i.e., when the sizes of the input are known at compile time \\b and\n/// that the other size is passed at compile-time using Eigen::fix<N> as above.\n///\n/// \\sa class Reshaped, fix, fix<N>(int)\n///\ntemplate<int Order = ColMajor, typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC\ninline Reshaped<Derived,...>\nreshaped(NRowsType nRows, NColsType nCols);\n\n/// This is the const version of reshaped(NRowsType,NColsType).\ntemplate<int Order = ColMajor, typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC\ninline const Reshaped<const Derived,...>\nreshaped(NRowsType nRows, NColsType nCols) const;\n\n/// \\returns an expression of \\c *this with columns (or rows) stacked to a linear column vector\n///\n/// \\tparam Order specifies whether the coefficients should be processed in column-major-order (ColMajor), in row-major-order (RowMajor),\n///               or follows the \\em natural order of the nested expression (AutoOrder). The default is ColMajor.\n///\n/// This overloads is essentially a shortcut for `A.reshaped<Order>(AutoSize,fix<1>)`.\n///\n/// - If `Order==ColMajor` (the default), then it returns a column-vector from the stacked columns of \\c *this.\n/// - If `Order==RowMajor`, then it returns a column-vector from the stacked rows of \\c *this.\n/// - If `Order==AutoOrder`, then it returns a column-vector with elements stacked following the storage order of \\c *this.\n///   This mode is the recommended one when the particular ordering of the element is not relevant.\n///\n/// Example:\n/// \\include MatrixBase_reshaped_to_vector.cpp\n/// Output: \\verbinclude MatrixBase_reshaped_to_vector.out\n///\n/// If you want more control, you can still fall back to reshaped(NRowsType,NColsType).\n///\n/// \\sa reshaped(NRowsType,NColsType), class Reshaped\n///\ntemplate<int Order = ColMajor>\nEIGEN_DEVICE_FUNC\ninline Reshaped<Derived,...>\nreshaped();\n\n/// This is the const version of reshaped().\ntemplate<int Order = ColMajor>\nEIGEN_DEVICE_FUNC\ninline const Reshaped<const Derived,...>\nreshaped() const;\n\n#else\n\n// This file is automatically included twice to generate const and non-const versions\n\n#ifndef EIGEN_RESHAPED_METHOD_2ND_PASS\n#define EIGEN_RESHAPED_METHOD_CONST const\n#else\n#define EIGEN_RESHAPED_METHOD_CONST\n#endif\n\n#ifndef EIGEN_RESHAPED_METHOD_2ND_PASS\n\n// This part is included once\n\n#endif\n\ntemplate<typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC\ninline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,\n                internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,\n                internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value>\nreshaped(NRowsType nRows, NColsType nCols) EIGEN_RESHAPED_METHOD_CONST\n{\n  return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,\n                  internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,\n                  internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value>\n                (derived(),\n                 internal::get_runtime_reshape_size(nRows,internal::get_runtime_value(nCols),size()),\n                 internal::get_runtime_reshape_size(nCols,internal::get_runtime_value(nRows),size()));\n}\n\ntemplate<int Order, typename NRowsType, typename NColsType>\nEIGEN_DEVICE_FUNC\ninline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,\n                internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,\n                internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value,\n                internal::get_compiletime_reshape_order<Flags,Order>::value>\nreshaped(NRowsType nRows, NColsType nCols) EIGEN_RESHAPED_METHOD_CONST\n{\n  return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,\n                  internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,\n                  internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value,\n                  internal::get_compiletime_reshape_order<Flags,Order>::value>\n                (derived(),\n                 internal::get_runtime_reshape_size(nRows,internal::get_runtime_value(nCols),size()),\n                 internal::get_runtime_reshape_size(nCols,internal::get_runtime_value(nRows),size()));\n}\n\n// Views as linear vectors\n\nEIGEN_DEVICE_FUNC\ninline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,SizeAtCompileTime,1>\nreshaped() EIGEN_RESHAPED_METHOD_CONST\n{\n  return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,SizeAtCompileTime,1>(derived(),size(),1);\n}\n\ntemplate<int Order>\nEIGEN_DEVICE_FUNC\ninline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1,\n                internal::get_compiletime_reshape_order<Flags,Order>::value>\nreshaped() EIGEN_RESHAPED_METHOD_CONST\n{\n  EIGEN_STATIC_ASSERT(Order==RowMajor || Order==ColMajor || Order==AutoOrder, INVALID_TEMPLATE_PARAMETER);\n  return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1,\n                  internal::get_compiletime_reshape_order<Flags,Order>::value>\n                (derived(), size(), 1);\n}\n\n#undef EIGEN_RESHAPED_METHOD_CONST\n\n#ifndef EIGEN_RESHAPED_METHOD_2ND_PASS\n#define EIGEN_RESHAPED_METHOD_2ND_PASS\n#include \"ReshapedMethods.h\"\n#undef EIGEN_RESHAPED_METHOD_2ND_PASS\n#endif\n\n#endif // EIGEN_PARSED_BY_DOXYGEN\n"
  },
  {
    "path": "external_libs/mvtnorm/Makefile",
    "content": "FLAG  =-O2 -DNDEBUG -fPIC\nAR   ?= ar\nCXX  ?= g++\nCC   ?= gcc\nFC   ?= gfortran\n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S),Darwin)\n FLAG += -arch x86_64\nendif\n\nlibMvtnorm.a: mvtnorm.o mvt.o randomF77.o\n\t${AR} rcs $@ $^ \n\nmvtnorm.o: mvtnorm.cpp mvtnorm.h\n\t${CXX} $(FLAG) -c mvtnorm.cpp -o mvtnorm.o\n\nmvt.o: mvt.f\n\t${FC} $(FLAG) -c mvt.f\n\nrandomF77.o: randomF77.c\n\t${CC} $(FLAG) -c randomF77.c\n\nclean:\n\trm -f *.o *.a\n"
  },
  {
    "path": "external_libs/mvtnorm/mvt.f",
    "content": "*\n*    $Id: mvt.f 231 2011-11-07 13:48:07Z thothorn $\n*\n      SUBROUTINE MVTDST( N, NU, LOWER, UPPER, INFIN, CORREL, DELTA, \n     &                   MAXPTS, ABSEPS, RELEPS, ERROR, VALUE, INFORM )       \n*\n*     A subroutine for computing non-central multivariate t probabilities.\n*     This subroutine uses an algorithm (QRSVN) described in the paper\n*     \"Comparison of Methods for the Computation of Multivariate \n*         t-Probabilities\", by Alan Genz and Frank Bretz\n*         J. Comp. Graph. Stat. 11 (2002), pp. 950-971.\n*\n*          Alan Genz \n*          Department of Mathematics\n*          Washington State University \n*          Pullman, WA 99164-3113\n*          Email : AlanGenz@wsu.edu\n*\n*\tOriginal source available from\n*\thttp://www.math.wsu.edu/faculty/genz/software/fort77/mvtdstpack.f\n*\n*\tThis is version 7/10 with better support for 100 < dimension < 1000\n*\n*  Parameters\n*\n*     N      INTEGER, the number of variables.    \n*     NU     INTEGER, the number of degrees of freedom.\n*            If NU < 1, then an MVN probability is computed.\n*     LOWER  DOUBLE PRECISION, array of lower integration limits.\n*     UPPER  DOUBLE PRECISION, array of upper integration limits.\n*     INFIN  INTEGER, array of integration limits flags:\n*             if INFIN(I) < 0, Ith limits are (-infinity, infinity);\n*             if INFIN(I) = 0, Ith limits are (-infinity, UPPER(I)];\n*             if INFIN(I) = 1, Ith limits are [LOWER(I), infinity);\n*             if INFIN(I) = 2, Ith limits are [LOWER(I), UPPER(I)].\n*     CORREL DOUBLE PRECISION, array of correlation coefficients; \n*            the correlation coefficient in row I column J of the \n*            correlation matrixshould be stored in \n*               CORREL( J + ((I-2)*(I-1))/2 ), for J < I.\n*            The correlation matrix must be positive semi-definite.\n*     DELTA  DOUBLE PRECISION, array of non-centrality parameters.\n*     MAXPTS INTEGER, maximum number of function values allowed. This \n*            parameter can be used to limit the time. A sensible \n*            strategy is to start with MAXPTS = 1000*N, and then\n*            increase MAXPTS if ERROR is too large.\n*     ABSEPS DOUBLE PRECISION absolute error tolerance.\n*     RELEPS DOUBLE PRECISION relative error tolerance.\n*     ERROR  DOUBLE PRECISION estimated absolute error, \n*            with 99% confidence level.\n*     VALUE  DOUBLE PRECISION estimated value for the integral\n*     INFORM INTEGER, termination status parameter:\n*            if INFORM = 0, normal completion with ERROR < EPS;\n*            if INFORM = 1, completion with ERROR > EPS and MAXPTS \n*                           function vaules used; increase MAXPTS to \n*                           decrease ERROR;\n*            if INFORM = 2, N > 1000 or N < 1.\n*            if INFORM = 3, correlation matrix not positive semi-definite.\n*\n      EXTERNAL MVSUBR\n      INTEGER N, ND, NU, INFIN(*), MAXPTS, INFORM, IVLS\n      DOUBLE PRECISION CORREL(*), LOWER(*), UPPER(*), DELTA(*), RELEPS, \n     &                 ABSEPS, ERROR, VALUE, E(1), V(1)\n      COMMON /PTBLCK/IVLS\n      IVLS = 0\n\n      CALL rndstart()\n\n      IF ( N .GT. 1000 .OR. N .LT. 1 ) THEN\n         VALUE = 0\n         ERROR = 1\n         INFORM = 2\n      ELSE\n         CALL MVINTS( N, NU, CORREL, LOWER, UPPER, DELTA, INFIN,\n     &                   ND, VALUE, ERROR, INFORM )\n         IF ( INFORM .EQ. 0 .AND. ND .GT. 0 ) THEN\n*\n*           Call the lattice rule integration subroutine\n*\n            CALL MVKBRV( ND, IVLS, MAXPTS, 1, MVSUBR, ABSEPS, RELEPS, \n     &                    E(1), V, INFORM )\n            ERROR = E(1)\n            VALUE = V(1)\n         ENDIF\n      ENDIF\n      \n      CALL rndend()\n      \n      END\n*\n      SUBROUTINE MVSUBR( N, W, NF, F )\n*     \n*     Integrand subroutine\n*\n      INTEGER N, NF, NUIN, INFIN(*), NL\n      DOUBLE PRECISION W(*),F(*), LOWER(*),UPPER(*), CORREL(*), DELTA(*)\n      PARAMETER ( NL = 1000 )\n      INTEGER INFI(NL), NU, ND, INFORM, NY \n      DOUBLE PRECISION COV(NL*(NL+1)/2), A(NL), B(NL), DL(NL), Y(NL)\n      DOUBLE PRECISION MVCHNV, SNU, R, VL, ER, DI, EI\n      SAVE NU, SNU, INFI, A, B, DL, COV\n      IF ( NU .LE. 0 ) THEN\n         R = 1\n         CALL MVVLSB( N+1, W, R, DL,INFI,A,B,COV, Y, DI,EI, NY, F(1) )\n      ELSE\n         R = MVCHNV( NU, W(N) )/SNU\n         CALL MVVLSB( N  , W, R, DL,INFI,A,B,COV, Y, DI,EI, NY, F(1) )\n      END IF\n      RETURN\n*\n*     Entry point for intialization.\n*\n      ENTRY MVINTS( N, NUIN, CORREL, LOWER, UPPER, DELTA, INFIN, \n     &     ND, VL, ER, INFORM )\n*\n*     Initialization and computation of covariance Cholesky factor.\n*\n      CALL MVSORT( N, LOWER, UPPER, DELTA, CORREL, INFIN, Y, .TRUE.,\n     &            ND,     A,     B,    DL,    COV,  INFI, INFORM )\n      NU = NUIN\n      CALL MVSPCL( ND, NU, A, B, DL, COV, INFI, SNU, VL, ER, INFORM )\n      END\n*\n      SUBROUTINE MVSPCL( ND, NU, A,B,DL, COV, INFI, SNU, VL,ER, INFORM )\n*\n*     Special cases subroutine\n*\n      DOUBLE PRECISION COV(*), A(*), B(*), DL(*), SNU, R, VL, ER\n      INTEGER ND, NU, INFI(*), INFORM\n      DOUBLE PRECISION MVBVT, MVSTDT\n      IF ( INFORM .GT. 0 ) THEN\n         VL = 0\n         ER = 1\n      ELSE\n*     \n*        Special cases\n*\n         IF ( ND .EQ. 0 ) THEN\n            ER = 0\n*  Code added to fix ND = 0 bug, 24/03/2009 ->\n            VL = 1\n*  <- Code added to fix ND = 0 bug, 24/03/2009\n\n         ELSE IF ( ND.EQ.1 .AND. ( NU.LT.1 .OR. ABS(DL(1)).EQ.0 ) ) THEN\n*     \n*           1-d case for normal or central t\n*\n            VL = 1\n            IF ( INFI(1) .NE. 1 ) VL = MVSTDT( NU, B(1) - DL(1) ) \n            IF ( INFI(1) .NE. 0 ) VL = VL - MVSTDT( NU, A(1) - DL(1) ) \n            IF ( VL .LT. 0 ) VL = 0\n            ER = 2D-16\n            ND = 0\n         ELSE IF ( ND .EQ. 2 .AND. \n     &            ( NU .LT. 1 .OR. ABS(DL(1))+ABS(DL(2)) .EQ. 0 ) ) THEN\n*     \n*           2-d case for normal or central t\n*\n            IF ( INFI(1) .NE. 0 ) A(1) = A(1) - DL(1)\n            IF ( INFI(1) .NE. 1 ) B(1) = B(1) - DL(1)\n            IF ( INFI(2) .NE. 0 ) A(2) = A(2) - DL(2)\n            IF ( INFI(2) .NE. 1 ) B(2) = B(2) - DL(2)\n            IF ( ABS( COV(3) ) .GT. 0 ) THEN\n*     \n*              2-d nonsingular case\n*\n               R = SQRT( 1 + COV(2)**2 )\n               IF ( INFI(2) .NE. 0 ) A(2) = A(2)/R\n               IF ( INFI(2) .NE. 1 ) B(2) = B(2)/R\n               COV(2) = COV(2)/R\n               VL = MVBVT( NU, A, B, INFI, COV(2) )\n               ER = 1D-15\n            ELSE\n*     \n*              2-d singular case\n*\n               IF ( INFI(1) .NE. 0 ) THEN\n                  IF ( INFI(2) .NE. 0 ) A(1) = MAX( A(1), A(2) )\n               ELSE\n                  IF ( INFI(2) .NE. 0 ) A(1) = A(2)\n               END IF\n               IF ( INFI(1) .NE. 1 ) THEN\n                  IF ( INFI(2) .NE. 1 ) B(1) = MIN( B(1), B(2) ) \n               ELSE\n                  IF ( INFI(2) .NE. 1 ) B(1) = B(2)\n               END IF\n               IF ( INFI(1) .NE. INFI(2) ) INFI(1) = 2\n               VL = 1\n               ! IF ( INFI(1) .NE. 1 ) VL = MVSTDT( NU, B(1)-DL(1) ) \n               ! IF ( INFI(1) .NE. 0 ) VL = VL - MVSTDT( NU, A(1)-DL(1) )      \n*  A(1), B(1) Bug Fixed, 28/05/2013\n               IF ( INFI(1) .NE. 1 ) VL = MVSTDT( NU, B(1) ) \n               IF ( INFI(1) .NE. 0 ) VL = VL - MVSTDT( NU, A(1) ) \n               IF ( VL .LT. 0 ) VL = 0\n               ER = 2D-16\n            END IF\n            ND = 0\n         ELSE\n            IF ( NU .GT. 0 ) THEN\n               SNU = SQRT( DBLE(NU) ) \n            ELSE \n               ND = ND - 1\n            END IF\n         END IF\n      END IF\n      END\n*\n      SUBROUTINE MVVLSB( N,W,R,DL,INFI, A,B,COV, Y, DI,EI, ND, VALUE )      \n*     \n*     Integrand subroutine\n*\n      INTEGER N, INFI(*), ND\n      DOUBLE PRECISION W(*), R, DL(*), A(*), B(*), COV(*), Y(*)\n      INTEGER I, J, IJ, INFA, INFB\n      DOUBLE PRECISION SUM, AI, BI, DI, EI, MVPHNV, VALUE\n      VALUE = 1\n      INFA = 0\n      INFB = 0\n      ND = 0\n      IJ = 0\n      DO I = 1, N\n         SUM = DL(I)\n         DO J = 1, I-1\n            IJ = IJ + 1\n            IF ( J .LE. ND ) SUM = SUM + COV(IJ)*Y(J)\n         END DO\n         IF ( INFI(I) .NE. 0 ) THEN\n            IF ( INFA .EQ. 1 ) THEN\n               AI = MAX( AI, R*A(I) - SUM )\n            ELSE\n               AI = R*A(I) - SUM \n               INFA = 1\n            END IF\n         END IF\n         IF ( INFI(I) .NE. 1 ) THEN\n            IF ( INFB .EQ. 1 ) THEN\n               BI = MIN( BI, R*B(I) - SUM )\n            ELSE\n               BI = R*B(I) - SUM \n               INFB = 1\n            END IF\n         END IF\n         IJ = IJ + 1\n         IF ( I .EQ. N .OR. COV(IJ+ND+2) .GT. 0 ) THEN \n            CALL MVLIMS( AI, BI, INFA + INFA + INFB - 1, DI, EI )\n            IF ( DI .GE. EI ) THEN\n               VALUE = 0\n               RETURN\n            ELSE\n               VALUE = VALUE*( EI - DI )\n               ND = ND + 1\n               IF ( I .LT. N ) Y(ND) = MVPHNV( DI + W(ND)*( EI - DI ) )\n               INFA = 0\n               INFB = 0\n            END IF\n         END IF\n      END DO\n      END\n*\n      SUBROUTINE MVSORT( N, LOWER, UPPER, DELTA, CORREL, INFIN, Y,PIVOT,\n     &                  ND,     A,     B,    DL,    COV,  INFI, INFORM )\n*\n*     Subroutine to sort integration limits and determine Cholesky factor.\n*\n      INTEGER N, ND, INFIN(*), INFI(*), INFORM\n      LOGICAL PIVOT\n      DOUBLE PRECISION     A(*),     B(*),    DL(*),    COV(*), \n     &                 LOWER(*), UPPER(*), DELTA(*), CORREL(*), Y(*)\n      INTEGER I, J, K, L, M, II, IJ, IL, JL, JMIN\n      DOUBLE PRECISION SUMSQ, AJ, BJ, SUM, EPS, EPSI, D, E\n      DOUBLE PRECISION CVDIAG, AMIN, BMIN, DEMIN, MVTDNS\n      PARAMETER ( EPS = 1D-10 )\n      INFORM = 0\n      IJ = 0\n      II = 0\n      ND = N\n      DO I = 1, N\n         A(I) = 0\n         B(I) = 0\n         DL(I) = 0\n         INFI(I) = INFIN(I) \n         IF ( INFI(I) .LT. 0 ) THEN\n            ND = ND - 1\n         ELSE \n            IF ( INFI(I) .NE. 0 ) A(I) = LOWER(I)\n            IF ( INFI(I) .NE. 1 ) B(I) = UPPER(I)\n            DL(I) = DELTA(I)\n         ENDIF\n         DO J = 1, I-1\n            IJ = IJ + 1\n            II = II + 1\n            COV(IJ) = CORREL(II)\n         END DO\n         IJ = IJ + 1\n         COV(IJ) = 1\n      END DO\n*\n*     First move any doubly infinite limits to innermost positions.\n*\n      IF ( ND .GT. 0 ) THEN\n         DO I = N, ND + 1, -1\n            IF ( INFI(I) .GE. 0 ) THEN \n               DO J = 1, I-1\n                  IF ( INFI(J) .LT. 0 ) THEN\n                     CALL MVSWAP( J, I, A, B, DL, INFI, N, COV )\n                     GO TO 10\n                  ENDIF\n               END DO\n            ENDIF\n 10         CONTINUE\n         END DO\n*\n*     Sort remaining limits and determine Cholesky factor.\n*\n         II = 0\n         JL = ND\n         DO I = 1, ND\n*\n*        Determine the integration limits for variable with minimum\n*        expected probability and interchange that variable with Ith.\n*\n            DEMIN = 1\n            JMIN = I\n            CVDIAG = 0\n            IJ = II\n            EPSI = EPS*I\n            IF ( .NOT. PIVOT ) JL = I\n            DO J = I, JL\n               IF ( COV(IJ+J) .GT. EPSI ) THEN\n                  SUMSQ = SQRT( COV(IJ+J) )\n                  SUM = DL(J) \n                  DO K = 1, I-1\n                     SUM = SUM + COV(IJ+K)*Y(K)\n                  END DO\n                  AJ = ( A(J) - SUM )/SUMSQ\n                  BJ = ( B(J) - SUM )/SUMSQ\n                  CALL MVLIMS( AJ, BJ, INFI(J), D, E )\n                  IF ( DEMIN .GE. E - D ) THEN\n                     JMIN = J\n                     AMIN = AJ\n                     BMIN = BJ\n                     DEMIN = E - D\n                     CVDIAG = SUMSQ\n                  ENDIF\n               ENDIF\n               IJ = IJ + J \n            END DO\n            IF ( JMIN .GT. I ) THEN\n               CALL MVSWAP( I, JMIN, A, B, DL, INFI, N, COV )\n            END IF\n            IF ( COV(II+I) .LT. -EPSI ) THEN\n               INFORM = 3\n            END IF\n            COV(II+I) = CVDIAG\n*\n*        Compute Ith column of Cholesky factor.\n*        Compute expected value for Ith integration variable and\n*         scale Ith covariance matrix row and limits.\n*\n            IF ( CVDIAG .GT. 0 ) THEN\n               IL = II + I\n               DO L = I+1, ND\n                  COV(IL+I) = COV(IL+I)/CVDIAG\n                  IJ = II + I\n                  DO J = I+1, L\n                     COV(IL+J) = COV(IL+J) - COV(IL+I)*COV(IJ+I)\n                     IJ = IJ + J\n                  END DO\n                  IL = IL + L\n               END DO\n* \n*              Expected Y = -( density(b) - density(a) )/( b - a )\n* \n               IF ( DEMIN .GT. EPSI ) THEN\n                  Y(I) = 0\n                  IF ( INFI(I) .NE. 0 ) Y(I) =        MVTDNS( 0, AMIN )        \n                  IF ( INFI(I) .NE. 1 ) Y(I) = Y(I) - MVTDNS( 0, BMIN )        \n                  Y(I) = Y(I)/DEMIN\n               ELSE\n                  IF ( INFI(I) .EQ. 0 ) Y(I) = BMIN\n                  IF ( INFI(I) .EQ. 1 ) Y(I) = AMIN\n                  IF ( INFI(I) .EQ. 2 ) Y(I) = ( AMIN + BMIN )/2\n               END IF\n               DO J = 1, I\n                  II = II + 1\n                  COV(II) = COV(II)/CVDIAG\n               END DO\n                A(I) =  A(I)/CVDIAG\n                B(I) =  B(I)/CVDIAG\n               DL(I) = DL(I)/CVDIAG\n            ELSE\n               IL = II + I\n               DO L = I+1, ND\n                  COV(IL+I) = 0\n                  IL = IL + L\n               END DO\n*\n*        If the covariance matrix diagonal entry is zero, \n*         permute limits and rows, if necessary.\n*\n*\n               DO J = I-1, 1, -1\n                  IF ( ABS( COV(II+J) ) .GT. EPSI ) THEN\n                      A(I) =  A(I)/COV(II+J)\n                      B(I) =  B(I)/COV(II+J)\n                     DL(I) = DL(I)/COV(II+J)\n                     IF ( COV(II+J) .LT. 0 ) THEN\n                        CALL MVSSWP( A(I), B(I) ) \n                        IF ( INFI(I) .NE. 2 ) INFI(I) = 1 - INFI(I)\n                     END IF\n                     DO L = 1, J\n                        COV(II+L) = COV(II+L)/COV(II+J)\n                     END DO\n                     DO L = J+1, I-1 \n                        IF( COV((L-1)*L/2+J+1) .GT. 0 ) THEN\n                           IJ = II\n                           DO K = I-1, L, -1 \n                              DO M = 1, K\n                                 CALL MVSSWP( COV(IJ-K+M), COV(IJ+M) )\n                              END DO\n                              CALL MVSSWP(  A(K),  A(K+1) ) \n                              CALL MVSSWP(  B(K),  B(K+1) ) \n                              CALL MVSSWP( DL(K), DL(K+1) ) \n                              M = INFI(K)\n                              INFI(K) = INFI(K+1)\n                              INFI(K+1) = M\n                              IJ = IJ - K \n                           END DO\n                           GO TO 20\n                        END IF\n                     END DO\n                     GO TO 20\n                  END IF\n                  COV(II+J) = 0\n               END DO\n 20            II = II + I\n               Y(I) = 0\n            END IF\n         END DO\n      ENDIF\n      END\n*\n      DOUBLE PRECISION FUNCTION MVTDNS( NU, X )\n      INTEGER NU, I\n      DOUBLE PRECISION X, PROD, PI, SQTWPI\n      PARAMETER (     PI = 3.141592653589793D0 )\n      PARAMETER ( SQTWPI = 2.506628274631001D0 )\n      MVTDNS = 0\n      IF ( NU .GT. 0 ) THEN\n         PROD = 1/SQRT( DBLE(NU) )\n         DO I = NU - 2, 1, -2\n            PROD = PROD*( I + 1 )/I\n         END DO\n         IF ( MOD( NU, 2 ) .EQ. 0 ) THEN\n            PROD = PROD/2\n         ELSE\n            PROD = PROD/PI\n         END IF\n         MVTDNS = PROD/SQRT( 1 + X*X/NU )**( NU + 1 )\n      ELSE\n        IF ( ABS(X) .LT. 10 ) MVTDNS = EXP( -X*X/2 )/SQTWPI\n      END IF\n      END\n*\n      SUBROUTINE MVLIMS( A, B, INFIN, LOWER, UPPER )\n      DOUBLE PRECISION A, B, LOWER, UPPER, MVPHI\n      INTEGER INFIN\n      LOWER = 0\n      UPPER = 1\n      IF ( INFIN .GE. 0 ) THEN\n         IF ( INFIN .NE. 0 ) LOWER = MVPHI(A)\n         IF ( INFIN .NE. 1 ) UPPER = MVPHI(B)\n      ENDIF\n      UPPER = MAX( UPPER, LOWER )\n      END      \n*\n      SUBROUTINE MVSSWP( X, Y )\n      DOUBLE PRECISION X, Y, T\n      T = X\n      X = Y\n      Y = T\n      END\n*\n      SUBROUTINE MVSWAP( P, Q, A, B, D, INFIN, N, C )\n*\n*     Swaps rows and columns P and Q in situ, with P <= Q.\n*\n      DOUBLE PRECISION A(*), B(*), C(*), D(*)\n      INTEGER INFIN(*), P, Q, N, I, J, II, JJ\n      CALL MVSSWP( A(P), A(Q) )\n      CALL MVSSWP( B(P), B(Q) )\n      CALL MVSSWP( D(P), D(Q) )\n      J = INFIN(P)\n      INFIN(P) = INFIN(Q)\n      INFIN(Q) = J\n      JJ = ( P*( P - 1 ) )/2\n      II = ( Q*( Q - 1 ) )/2\n      CALL MVSSWP( C(JJ+P), C(II+Q) )\n      DO J = 1, P-1\n         CALL MVSSWP( C(JJ+J), C(II+J) )\n      END DO\n      JJ = JJ + P\n      DO I = P+1, Q-1\n         CALL MVSSWP( C(JJ+P), C(II+I) )\n         JJ = JJ + I\n      END DO\n      II = II + Q\n      DO I = Q+1, N\n         CALL MVSSWP( C(II+P), C(II+Q) )\n         II = II + I\n      END DO\n      END\n*\n      DOUBLE PRECISION FUNCTION MVPHI(Z)\n*     \n*     Normal distribution probabilities accurate to 1d-15.\n*     Reference: J.L. Schonfelder, Math Comp 32(1978), pp 1232-1240. \n*     \n      INTEGER I, IM\n      DOUBLE PRECISION A(0:43), BM, B, BP, P, RTWO, T, XA, Z\n      PARAMETER( RTWO = 1.414213562373095048801688724209D0, IM = 24 )\n      SAVE A\n      DATA ( A(I), I = 0, 43 )/\n     &    6.10143081923200417926465815756D-1,\n     &   -4.34841272712577471828182820888D-1,\n     &    1.76351193643605501125840298123D-1,\n     &   -6.0710795609249414860051215825D-2,\n     &    1.7712068995694114486147141191D-2,\n     &   -4.321119385567293818599864968D-3, \n     &    8.54216676887098678819832055D-4, \n     &   -1.27155090609162742628893940D-4,\n     &    1.1248167243671189468847072D-5, 3.13063885421820972630152D-7,      \n     &   -2.70988068537762022009086D-7, 3.0737622701407688440959D-8,\n     &    2.515620384817622937314D-9, -1.028929921320319127590D-9,\n     &    2.9944052119949939363D-11, 2.6051789687266936290D-11,\n     &   -2.634839924171969386D-12, -6.43404509890636443D-13,\n     &    1.12457401801663447D-13, 1.7281533389986098D-14, \n     &   -4.264101694942375D-15, -5.45371977880191D-16,\n     &    1.58697607761671D-16, 2.0899837844334D-17, \n     &   -5.900526869409D-18, -9.41893387554D-19, 2.14977356470D-19, \n     &    4.6660985008D-20, -7.243011862D-21, -2.387966824D-21, \n     &    1.91177535D-22, 1.20482568D-22, -6.72377D-25, -5.747997D-24,\n     &   -4.28493D-25, 2.44856D-25, 4.3793D-26, -8.151D-27, -3.089D-27, \n     &    9.3D-29, 1.74D-28, 1.6D-29, -8.0D-30, -2.0D-30 /       \n*     \n      XA = ABS(Z)/RTWO\n      IF ( XA .GT. 100 ) THEN\n         P = 0\n      ELSE\n         T = ( 8*XA - 30 ) / ( 4*XA + 15 )\n         BM = 0\n         B  = 0\n         DO I = IM, 0, -1 \n            BP = B\n            B  = BM\n            BM = T*B - BP  + A(I)\n         END DO\n         P = EXP( -XA*XA )*( BM - BP )/4\n      END IF\n      IF ( Z .GT. 0 ) P = 1 - P\n      MVPHI = P\n      END\n*\n      DOUBLE PRECISION FUNCTION MVPHNV(P)\n*\n*\tALGORITHM AS241  APPL. STATIST. (1988) VOL. 37, NO. 3\n*\n*\tProduces the normal deviate Z corresponding to a given lower\n*\ttail area of P.\n*\n*\tThe hash sums below are the sums of the mantissas of the\n*\tcoefficients.   They are included for use in checking\n*\ttranscription.\n*\n      DOUBLE PRECISION SPLIT1, SPLIT2, CONST1, CONST2, \n     *     A0, A1, A2, A3, A4, A5, A6, A7, B1, B2, B3, B4, B5, B6, B7, \n     *     C0, C1, C2, C3, C4, C5, C6, C7, D1, D2, D3, D4, D5, D6, D7, \n     *     E0, E1, E2, E3, E4, E5, E6, E7, F1, F2, F3, F4, F5, F6, F7, \n     *     P, Q, R\n      PARAMETER ( SPLIT1 = 0.425, SPLIT2 = 5,\n     *            CONST1 = 0.180625D0, CONST2 = 1.6D0 )\n*     \n*     Coefficients for P close to 0.5\n*     \n      PARAMETER (\n     *     A0 = 3.38713 28727 96366 6080D0,\n     *     A1 = 1.33141 66789 17843 7745D+2,\n     *     A2 = 1.97159 09503 06551 4427D+3,\n     *     A3 = 1.37316 93765 50946 1125D+4,\n     *     A4 = 4.59219 53931 54987 1457D+4,\n     *     A5 = 6.72657 70927 00870 0853D+4,\n     *     A6 = 3.34305 75583 58812 8105D+4,\n     *     A7 = 2.50908 09287 30122 6727D+3,\n     *     B1 = 4.23133 30701 60091 1252D+1,\n     *     B2 = 6.87187 00749 20579 0830D+2,\n     *     B3 = 5.39419 60214 24751 1077D+3,\n     *     B4 = 2.12137 94301 58659 5867D+4,\n     *     B5 = 3.93078 95800 09271 0610D+4,\n     *     B6 = 2.87290 85735 72194 2674D+4,\n     *     B7 = 5.22649 52788 52854 5610D+3 )\n*     HASH SUM AB    55.88319 28806 14901 4439\n*     \n*     Coefficients for P not close to 0, 0.5 or 1.\n*     \n      PARAMETER (\n     *     C0 = 1.42343 71107 49683 57734D0,\n     *     C1 = 4.63033 78461 56545 29590D0,\n     *     C2 = 5.76949 72214 60691 40550D0,\n     *     C3 = 3.64784 83247 63204 60504D0,\n     *     C4 = 1.27045 82524 52368 38258D0,\n     *     C5 = 2.41780 72517 74506 11770D-1,\n     *     C6 = 2.27238 44989 26918 45833D-2,\n     *     C7 = 7.74545 01427 83414 07640D-4,\n     *     D1 = 2.05319 16266 37758 82187D0,\n     *     D2 = 1.67638 48301 83803 84940D0,\n     *     D3 = 6.89767 33498 51000 04550D-1,\n     *     D4 = 1.48103 97642 74800 74590D-1,\n     *     D5 = 1.51986 66563 61645 71966D-2,\n     *     D6 = 5.47593 80849 95344 94600D-4,\n     *     D7 = 1.05075 00716 44416 84324D-9 )\n*     HASH SUM CD    49.33206 50330 16102 89036\n*\n*\tCoefficients for P near 0 or 1.\n*\n      PARAMETER (\n     *     E0 = 6.65790 46435 01103 77720D0,\n     *     E1 = 5.46378 49111 64114 36990D0,\n     *     E2 = 1.78482 65399 17291 33580D0,\n     *     E3 = 2.96560 57182 85048 91230D-1,\n     *     E4 = 2.65321 89526 57612 30930D-2,\n     *     E5 = 1.24266 09473 88078 43860D-3,\n     *     E6 = 2.71155 55687 43487 57815D-5,\n     *     E7 = 2.01033 43992 92288 13265D-7,\n     *     F1 = 5.99832 20655 58879 37690D-1,\n     *     F2 = 1.36929 88092 27358 05310D-1,\n     *     F3 = 1.48753 61290 85061 48525D-2,\n     *     F4 = 7.86869 13114 56132 59100D-4,\n     *     F5 = 1.84631 83175 10054 68180D-5,\n     *     F6 = 1.42151 17583 16445 88870D-7,\n     *     F7 = 2.04426 31033 89939 78564D-15 )\n*     HASH SUM EF    47.52583 31754 92896 71629\n*     \n      Q = ( 2*P - 1 )/2\n      IF ( ABS(Q) .LE. SPLIT1 ) THEN\n         R = CONST1 - Q*Q\n         MVPHNV = Q*( ( ( ((((A7*R + A6)*R + A5)*R + A4)*R + A3)\n     *                  *R + A2 )*R + A1 )*R + A0 )\n     *            /( ( ( ((((B7*R + B6)*R + B5)*R + B4)*R + B3)\n     *                  *R + B2 )*R + B1 )*R + 1 )\n      ELSE\n         R = MIN( P, 1 - P )\n         IF ( R .GT. 0 ) THEN\n            R = SQRT( -LOG(R) )\n            IF ( R .LE. SPLIT2 ) THEN\n               R = R - CONST2\n               MVPHNV = ( ( ( ((((C7*R + C6)*R + C5)*R + C4)*R + C3)\n     *                      *R + C2 )*R + C1 )*R + C0 ) \n     *                /( ( ( ((((D7*R + D6)*R + D5)*R + D4)*R + D3)\n     *                      *R + D2 )*R + D1 )*R + 1 )\n            ELSE\n               R = R - SPLIT2\n               MVPHNV = ( ( ( ((((E7*R + E6)*R + E5)*R + E4)*R + E3)\n     *                      *R + E2 )*R + E1 )*R + E0 )\n     *                /( ( ( ((((F7*R + F6)*R + F5)*R + F4)*R + F3)\n     *                      *R + F2 )*R + F1 )*R + 1 )\n            END IF\n         ELSE\n            MVPHNV = 9\n         END IF\n         IF ( Q .LT. 0 ) MVPHNV = - MVPHNV\n      END IF\n      END\n      DOUBLE PRECISION FUNCTION MVBVN( LOWER, UPPER, INFIN, CORREL )\n*\n*     A function for computing bivariate normal probabilities.\n*\n*  Parameters\n*\n*     LOWER  REAL, array of lower integration limits.\n*     UPPER  REAL, array of upper integration limits.\n*     INFIN  INTEGER, array of integration limits flags:\n*            if INFIN(I) = 0, Ith limits are (-infinity, UPPER(I)];\n*            if INFIN(I) = 1, Ith limits are [LOWER(I), infinity);\n*            if INFIN(I) = 2, Ith limits are [LOWER(I), UPPER(I)].\n*     CORREL REAL, correlation coefficient.\n*\n      DOUBLE PRECISION LOWER(*), UPPER(*), CORREL, MVBVU\n      INTEGER INFIN(*)\n      IF ( INFIN(1) .EQ. 2  .AND. INFIN(2) .EQ. 2 ) THEN\n         MVBVN =  MVBVU ( LOWER(1), LOWER(2), CORREL )\n     +           - MVBVU ( UPPER(1), LOWER(2), CORREL )\n     +           - MVBVU ( LOWER(1), UPPER(2), CORREL )\n     +           + MVBVU ( UPPER(1), UPPER(2), CORREL )\n      ELSE IF ( INFIN(1) .EQ. 2  .AND. INFIN(2) .EQ. 1 ) THEN\n         MVBVN =  MVBVU ( LOWER(1), LOWER(2), CORREL )\n     +           - MVBVU ( UPPER(1), LOWER(2), CORREL )\n      ELSE IF ( INFIN(1) .EQ. 1  .AND. INFIN(2) .EQ. 2 ) THEN\n         MVBVN =  MVBVU ( LOWER(1), LOWER(2), CORREL )\n     +           - MVBVU ( LOWER(1), UPPER(2), CORREL )\n      ELSE IF ( INFIN(1) .EQ. 2  .AND. INFIN(2) .EQ. 0 ) THEN\n         MVBVN =  MVBVU ( -UPPER(1), -UPPER(2), CORREL )\n     +           - MVBVU ( -LOWER(1), -UPPER(2), CORREL )\n      ELSE IF ( INFIN(1) .EQ. 0  .AND. INFIN(2) .EQ. 2 ) THEN\n         MVBVN =  MVBVU ( -UPPER(1), -UPPER(2), CORREL )\n     +           - MVBVU ( -UPPER(1), -LOWER(2), CORREL )\n      ELSE IF ( INFIN(1) .EQ. 1  .AND. INFIN(2) .EQ. 0 ) THEN\n         MVBVN =  MVBVU ( LOWER(1), -UPPER(2), -CORREL )\n      ELSE IF ( INFIN(1) .EQ. 0  .AND. INFIN(2) .EQ. 1 ) THEN\n         MVBVN =  MVBVU ( -UPPER(1), LOWER(2), -CORREL )\n      ELSE IF ( INFIN(1) .EQ. 1  .AND. INFIN(2) .EQ. 1 ) THEN\n         MVBVN =  MVBVU ( LOWER(1), LOWER(2), CORREL )\n      ELSE IF ( INFIN(1) .EQ. 0  .AND. INFIN(2) .EQ. 0 ) THEN\n         MVBVN =  MVBVU ( -UPPER(1), -UPPER(2), CORREL )\n      ELSE\n         MVBVN = 1\n      END IF\n      END \n      DOUBLE PRECISION FUNCTION MVBVU( SH, SK, R )\n*\n*     A function for computing bivariate normal probabilities;\n*       developed using \n*         Drezner, Z. and Wesolowsky, G. O. (1989),\n*         On the Computation of the Bivariate Normal Integral,\n*         J. Stat. Comput. Simul.. 35 pp. 101-107.\n*       with extensive modications for double precisions by    \n*         Alan Genz and Yihong Ge\n*         Department of Mathematics\n*         Washington State University\n*         Pullman, WA 99164-3113\n*         Email : alangenz@wsu.edu\n*\n* BVN - calculate the probability that X is larger than SH and Y is\n*       larger than SK.\n*\n* Parameters\n*\n*   SH  REAL, integration limit\n*   SK  REAL, integration limit\n*   R   REAL, correlation coefficient\n*   LG  INTEGER, number of Gauss Rule Points and Weights\n*\n      DOUBLE PRECISION BVN, SH, SK, R, ZERO, TWOPI \n      INTEGER I, LG, NG\n      PARAMETER ( ZERO = 0, TWOPI = 6.283185307179586D0 ) \n      DOUBLE PRECISION X(10,3), W(10,3), AS, A, B, C, D, RS, XS\n      DOUBLE PRECISION MVPHI, SN, ASR, H, K, BS, HS, HK\n      SAVE X, W\n*     Gauss Legendre Points and Weights, N =  6\n      DATA ( W(I,1), X(I,1), I = 1, 3 ) /\n     *  0.1713244923791705D+00,-0.9324695142031522D+00,\n     *  0.3607615730481384D+00,-0.6612093864662647D+00,\n     *  0.4679139345726904D+00,-0.2386191860831970D+00/\n*     Gauss Legendre Points and Weights, N = 12\n      DATA ( W(I,2), X(I,2), I = 1, 6 ) /\n     *  0.4717533638651177D-01,-0.9815606342467191D+00,\n     *  0.1069393259953183D+00,-0.9041172563704750D+00,\n     *  0.1600783285433464D+00,-0.7699026741943050D+00,\n     *  0.2031674267230659D+00,-0.5873179542866171D+00,\n     *  0.2334925365383547D+00,-0.3678314989981802D+00,\n     *  0.2491470458134029D+00,-0.1252334085114692D+00/\n*     Gauss Legendre Points and Weights, N = 20\n      DATA ( W(I,3), X(I,3), I = 1, 10 ) /\n     *  0.1761400713915212D-01,-0.9931285991850949D+00,\n     *  0.4060142980038694D-01,-0.9639719272779138D+00,\n     *  0.6267204833410906D-01,-0.9122344282513259D+00,\n     *  0.8327674157670475D-01,-0.8391169718222188D+00,\n     *  0.1019301198172404D+00,-0.7463319064601508D+00,\n     *  0.1181945319615184D+00,-0.6360536807265150D+00,\n     *  0.1316886384491766D+00,-0.5108670019508271D+00,\n     *  0.1420961093183821D+00,-0.3737060887154196D+00,\n     *  0.1491729864726037D+00,-0.2277858511416451D+00,\n     *  0.1527533871307259D+00,-0.7652652113349733D-01/\n      IF ( ABS(R) .LT. 0.3 ) THEN\n         NG = 1\n         LG = 3\n      ELSE IF ( ABS(R) .LT. 0.75 ) THEN\n         NG = 2\n         LG = 6\n      ELSE \n         NG = 3\n         LG = 10\n      ENDIF\n      H = SH\n      K = SK \n      HK = H*K\n      BVN = 0\n      IF ( ABS(R) .LT. 0.925 ) THEN\n         HS = ( H*H + K*K )/2\n         ASR = ASIN(R)\n         DO I = 1, LG\n            SN = SIN(ASR*( X(I,NG)+1 )/2)\n            BVN = BVN + W(I,NG)*EXP( ( SN*HK - HS )/( 1 - SN*SN ) )\n            SN = SIN(ASR*(-X(I,NG)+1 )/2)\n            BVN = BVN + W(I,NG)*EXP( ( SN*HK - HS )/( 1 - SN*SN ) )\n         END DO\n         BVN = BVN*ASR/(2*TWOPI) + MVPHI(-H)*MVPHI(-K) \n      ELSE\n         IF ( R .LT. 0 ) THEN\n            K = -K\n            HK = -HK\n         ENDIF\n         IF ( ABS(R) .LT. 1 ) THEN\n            AS = ( 1 - R )*( 1 + R )\n            A = SQRT(AS)\n            BS = ( H - K )**2\n            C = ( 4 - HK )/8 \n            D = ( 12 - HK )/16\n            BVN = A*EXP( -(BS/AS + HK)/2 )\n     +             *( 1 - C*(BS - AS)*(1 - D*BS/5)/3 + C*D*AS*AS/5 )\n            IF ( HK .GT. -160 ) THEN\n               B = SQRT(BS)\n               BVN = BVN - EXP(-HK/2)*SQRT(TWOPI)*MVPHI(-B/A)*B\n     +                    *( 1 - C*BS*( 1 - D*BS/5 )/3 ) \n            ENDIF\n            A = A/2\n            DO I = 1, LG\n               XS = ( A*(X(I,NG)+1) )**2\n               RS = SQRT( 1 - XS )\n               BVN = BVN + A*W(I,NG)*\n     +              ( EXP( -BS/(2*XS) - HK/(1+RS) )/RS \n     +              - EXP( -(BS/XS+HK)/2 )*( 1 + C*XS*( 1 + D*XS ) ) )\n               XS = AS*(-X(I,NG)+1)**2/4\n               RS = SQRT( 1 - XS )\n               BVN = BVN + A*W(I,NG)*EXP( -(BS/XS + HK)/2 )\n     +                    *( EXP( -HK*(1-RS)/(2*(1+RS)) )/RS \n     +                       - ( 1 + C*XS*( 1 + D*XS ) ) )\n            END DO\n            BVN = -BVN/TWOPI\n         ENDIF\n         IF ( R .GT. 0 ) BVN =  BVN + MVPHI( -MAX( H, K ) )\n         IF ( R .LT. 0 ) BVN = -BVN + MAX( ZERO, MVPHI(-H) - MVPHI(-K) )     \n      ENDIF\n      MVBVU = BVN\n      END\n*\n      DOUBLE PRECISION FUNCTION MVSTDT( NU, T )\n*\n*     Student t Distribution Function\n*\n*                       T\n*         TSTDNT = C   I  ( 1 + y*y/NU )**( -(NU+1)/2 ) dy\n*                   NU -INF\n*\n      INTEGER NU, J\n      DOUBLE PRECISION MVPHI, T, CSTHE, SNTHE, POLYN, TT, TS, RN, PI\n      PARAMETER ( PI = 3.141592653589793D0 )\n      IF ( NU .LT. 1 ) THEN\n         MVSTDT = MVPHI( T )\n      ELSE IF ( NU .EQ. 1 ) THEN\n         MVSTDT = ( 1 + 2*ATAN( T )/PI )/2\n      ELSE IF ( NU .EQ. 2) THEN\n         MVSTDT = ( 1 + T/SQRT( 2 + T*T ))/2\n      ELSE \n         TT = T*T\n         CSTHE = NU/( NU + TT )\n         POLYN = 1\n         DO J = NU - 2, 2, -2\n            POLYN = 1 + ( J - 1 )*CSTHE*POLYN/J\n         END DO\n         IF ( MOD( NU, 2 ) .EQ. 1 ) THEN\n            RN = NU\n            TS = T/SQRT(RN)\n            MVSTDT = ( 1 + 2*( ATAN( TS ) + TS*CSTHE*POLYN )/PI )/2\n         ELSE\n            SNTHE = T/SQRT( NU + TT )\n            MVSTDT = ( 1 + SNTHE*POLYN )/2\n         END IF\n         IF ( MVSTDT .LT. 0 ) MVSTDT = 0\n      ENDIF\n      END\n*\n      DOUBLE PRECISION FUNCTION MVBVT( NU, LOWER, UPPER, INFIN, CORREL )      \n*\n*     A function for computing bivariate normal and t probabilities.\n*\n*  Parameters\n*\n*     NU     INTEGER degrees of freedom parameter; NU < 1 gives normal case.\n*     LOWER  REAL, array of lower integration limits.\n*     UPPER  REAL, array of upper integration limits.\n*     INFIN  INTEGER, array of integration limits flags:\n*            if INFIN(I) = 0, Ith limits are (-infinity, UPPER(I)];\n*            if INFIN(I) = 1, Ith limits are [LOWER(I), infinity);\n*            if INFIN(I) = 2, Ith limits are [LOWER(I), UPPER(I)].\n*     CORREL REAL, correlation coefficient.\n*\n      DOUBLE PRECISION LOWER(*), UPPER(*), CORREL, MVBVN, MVBVTL\n      INTEGER NU, INFIN(*)\n      IF ( NU .LT. 1 ) THEN\n            MVBVT =  MVBVN ( LOWER, UPPER, INFIN, CORREL )\n      ELSE\n         IF ( INFIN(1) .EQ. 2  .AND. INFIN(2) .EQ. 2 ) THEN\n            MVBVT =  MVBVTL ( NU, UPPER(1), UPPER(2), CORREL )\n     +           - MVBVTL ( NU, UPPER(1), LOWER(2), CORREL )\n     +           - MVBVTL ( NU, LOWER(1), UPPER(2), CORREL )\n     +           + MVBVTL ( NU, LOWER(1), LOWER(2), CORREL )\n         ELSE IF ( INFIN(1) .EQ. 2  .AND. INFIN(2) .EQ. 1 ) THEN\n            MVBVT =  MVBVTL ( NU, -LOWER(1), -LOWER(2), CORREL )\n     +           - MVBVTL ( NU, -UPPER(1), -LOWER(2), CORREL )\n         ELSE IF ( INFIN(1) .EQ. 1  .AND. INFIN(2) .EQ. 2 ) THEN\n            MVBVT =  MVBVTL ( NU, -LOWER(1), -LOWER(2), CORREL )\n     +           - MVBVTL ( NU, -LOWER(1), -UPPER(2), CORREL )\n         ELSE IF ( INFIN(1) .EQ. 2  .AND. INFIN(2) .EQ. 0 ) THEN\n            MVBVT =  MVBVTL ( NU, UPPER(1), UPPER(2), CORREL )\n     +           - MVBVTL ( NU, LOWER(1), UPPER(2), CORREL )\n         ELSE IF ( INFIN(1) .EQ. 0  .AND. INFIN(2) .EQ. 2 ) THEN\n            MVBVT =  MVBVTL ( NU, UPPER(1), UPPER(2), CORREL )\n     +           - MVBVTL ( NU, UPPER(1), LOWER(2), CORREL )\n         ELSE IF ( INFIN(1) .EQ. 1  .AND. INFIN(2) .EQ. 0 ) THEN\n            MVBVT =  MVBVTL ( NU, -LOWER(1), UPPER(2), -CORREL )\n         ELSE IF ( INFIN(1) .EQ. 0  .AND. INFIN(2) .EQ. 1 ) THEN\n            MVBVT =  MVBVTL ( NU, UPPER(1), -LOWER(2), -CORREL )\n         ELSE IF ( INFIN(1) .EQ. 1  .AND. INFIN(2) .EQ. 1 ) THEN\n            MVBVT =  MVBVTL ( NU, -LOWER(1), -LOWER(2), CORREL )\n         ELSE IF ( INFIN(1) .EQ. 0  .AND. INFIN(2) .EQ. 0 ) THEN\n            MVBVT =  MVBVTL ( NU, UPPER(1), UPPER(2), CORREL )\n         ELSE\n            MVBVT = 1\n         END IF\n      END IF\n      END\n*\n      DOUBLE PRECISION FUNCTION MVBVTC( NU, L, U, INFIN, RHO )      \n*\n*     A function for computing complementary bivariate normal and t \n*       probabilities.\n*\n*  Parameters\n*\n*     NU     INTEGER degrees of freedom parameter.\n*     L      REAL, array of lower integration limits.\n*     U      REAL, array of upper integration limits.\n*     INFIN  INTEGER, array of integration limits flags:\n*            if INFIN(1) INFIN(2),        then MVBVTC computes\n*                 0         0              P( X>U(1), Y>U(2) )\n*                 1         0              P( X<L(1), Y>U(2) )\n*                 0         1              P( X>U(1), Y<L(2) )\n*                 1         1              P( X<L(1), Y<L(2) )\n*                 2         0      P( X>U(1), Y>U(2) ) + P( X<L(1), Y>U(2) )\n*                 2         1      P( X>U(1), Y<L(2) ) + P( X<L(1), Y<L(2) )\n*                 0         2      P( X>U(1), Y>U(2) ) + P( X>U(1), Y<L(2) )\n*                 1         2      P( X<L(1), Y>U(2) ) + P( X<L(1), Y<L(2) )\n*                 2         2      P( X>U(1), Y<L(2) ) + P( X<L(1), Y<L(2) )\n*                               +  P( X>U(1), Y>U(2) ) + P( X<L(1), Y>U(2) )\n*\n*     RHO    REAL, correlation coefficient.\n*\n      DOUBLE PRECISION L(*), U(*), LW(2), UP(2), B, RHO, MVBVT\n      INTEGER I, NU, INFIN(*), INF(2)\n*\n      DO I = 1, 2\n         IF ( MOD( INFIN(I), 2 ) .EQ. 0 ) THEN\n            INF(I) = 1\n            LW(I) = U(I) \n         ELSE\n            INF(I) = 0\n            UP(I) = L(I) \n         END IF\n      END DO\n      B = MVBVT( NU, LW, UP, INF, RHO )\n      DO I = 1, 2\n         IF ( INFIN(I) .EQ. 2 ) THEN\n            INF(I) = 0\n            UP(I) = L(I) \n            B = B + MVBVT( NU, LW, UP, INF, RHO )\n         END IF\n      END DO\n      IF ( INFIN(1) .EQ. 2 .AND. INFIN(2) .EQ. 2 ) THEN\n         INF(1) = 1\n         LW(1) = U(1) \n         B = B + MVBVT( NU, LW, UP, INF, RHO )\n      END IF\n      MVBVTC = B\n      END\n*\n      double precision function mvbvtl( nu, dh, dk, r )\n*\n*     a function for computing bivariate t probabilities.\n*\n*       Alan Genz\n*       Department of Mathematics\n*       Washington State University\n*       Pullman, Wa 99164-3113\n*       Email : alangenz@wsu.edu\n*\n*    this function is based on the method described by \n*        Dunnett, C.W. and M. Sobel, (1954),\n*        A bivariate generalization of Student's t-distribution\n*        with tables for certain special cases,\n*        Biometrika 41, pp. 153-169.\n*\n* mvbvtl - calculate the probability that x < dh and y < dk. \n*\n* parameters\n*\n*   nu number of degrees of freedom\n*   dh 1st lower integration limit\n*   dk 2nd lower integration limit\n*   r   correlation coefficient\n*\n      integer nu, j, hs, ks\n      double precision dh, dk, r\n      double precision tpi, pi, ors, hrk, krh, bvt, snu \n      double precision gmph, gmpk, xnkh, xnhk, qhrk, hkn, hpk, hkrn\n      double precision btnckh, btnchk, btpdkh, btpdhk, one\n      parameter ( pi = 3.14159265358979323844d0, tpi = 2*pi, one = 1 )\n      snu = sqrt( dble(nu) )\n      ors = 1 - r*r  \n      hrk = dh - r*dk  \n      krh = dk - r*dh  \n      if ( abs(hrk) + ors .gt. 0 ) then\n         xnhk = hrk**2/( hrk**2 + ors*( nu + dk**2 ) ) \n         xnkh = krh**2/( krh**2 + ors*( nu + dh**2 ) ) \n      else\n         xnhk = 0\n         xnkh = 0  \n      end if\n      hs = sign( one, dh - r*dk )  \n      ks = sign( one, dk - r*dh ) \n      if ( mod( nu, 2 ) .eq. 0 ) then\n         bvt = atan2( sqrt(ors), -r )/tpi \n         gmph = dh/sqrt( 16*( nu + dh**2 ) )  \n         gmpk = dk/sqrt( 16*( nu + dk**2 ) )  \n         btnckh = 2*atan2( sqrt( xnkh ), sqrt( 1 - xnkh ) )/pi  \n         btpdkh = 2*sqrt( xnkh*( 1 - xnkh ) )/pi \n         btnchk = 2*atan2( sqrt( xnhk ), sqrt( 1 - xnhk ) )/pi  \n         btpdhk = 2*sqrt( xnhk*( 1 - xnhk ) )/pi \n         do j = 1, nu/2\n            bvt = bvt + gmph*( 1 + ks*btnckh ) \n            bvt = bvt + gmpk*( 1 + hs*btnchk ) \n            btnckh = btnckh + btpdkh  \n            btpdkh = 2*j*btpdkh*( 1 - xnkh )/( 2*j + 1 )  \n            btnchk = btnchk + btpdhk  \n            btpdhk = 2*j*btpdhk*( 1 - xnhk )/( 2*j + 1 )  \n            gmph = gmph*( 2*j - 1 )/( 2*j*( 1 + dh**2/nu ) ) \n            gmpk = gmpk*( 2*j - 1 )/( 2*j*( 1 + dk**2/nu ) ) \n         end do\n      else\n         qhrk = sqrt( dh**2 + dk**2 - 2*r*dh*dk + nu*ors )  \n         hkrn = dh*dk + r*nu  \n         hkn = dh*dk - nu  \n         hpk = dh + dk \n         bvt = atan2(-snu*(hkn*qhrk+hpk*hkrn),hkn*hkrn-nu*hpk*qhrk)/tpi  \n         if ( bvt .lt. -1d-15 ) bvt = bvt + 1\n         gmph = dh/( tpi*snu*( 1 + dh**2/nu ) )  \n         gmpk = dk/( tpi*snu*( 1 + dk**2/nu ) )  \n         btnckh = sqrt( xnkh )  \n         btpdkh = btnckh \n         btnchk = sqrt( xnhk )  \n         btpdhk = btnchk  \n         do j = 1, ( nu - 1 )/2\n            bvt = bvt + gmph*( 1 + ks*btnckh ) \n            bvt = bvt + gmpk*( 1 + hs*btnchk ) \n            btpdkh = ( 2*j - 1 )*btpdkh*( 1 - xnkh )/( 2*j )  \n            btnckh = btnckh + btpdkh  \n            btpdhk = ( 2*j - 1 )*btpdhk*( 1 - xnhk )/( 2*j )  \n            btnchk = btnchk + btpdhk  \n            gmph = 2*j*gmph/( ( 2*j + 1 )*( 1 + dh**2/nu ) ) \n            gmpk = 2*j*gmpk/( ( 2*j + 1 )*( 1 + dk**2/nu ) ) \n         end do\n      end if\n      mvbvtl = bvt \n*\n*     end mvbvtl\n*\n      end\n*\n      DOUBLE PRECISION FUNCTION MVCHNV( N, P )\n*\n*                  MVCHNV\n*     P =  1 - K  I     exp(-t*t/2) t**(N-1) dt, for N >= 1.\n*               N  0\n*\n      INTEGER I, N, NO\n      DOUBLE PRECISION P, TWO, R, RO, LRP, LKN, MVPHNV, MVCHNC\n      PARAMETER ( LRP = -.22579135264472743235D0, TWO = 2 )\n*                 LRP =   LOG( SQRT( 2/PI ) )\n      SAVE NO, LKN\n      DATA NO / 0 /\n      IF ( N .LE. 1 ) THEN\n         R = -MVPHNV( P/2 )\n      ELSE IF ( P .LT. 1 ) THEN\n         IF ( N .EQ. 2 ) THEN\n            R = SQRT( -2*LOG(P) )\n         ELSE\n            IF ( N .NE. NO ) THEN\n               NO = N\n               LKN = 0\n               DO I = N-2, 2, -2\n                  LKN = LKN - LOG( DBLE(I) )\n               END DO\n               IF ( MOD( N, 2 ) .EQ. 1 ) LKN = LKN + LRP\n            END IF\n            IF ( N .GE. -5*LOG(1-P)/4 ) THEN\n               R = TWO/( 9*N )\n               R = N*( -MVPHNV(P)*SQRT(R) + 1 - R )**3\n               IF ( R .GT. 2*N+6 ) THEN\n                  R = 2*( LKN - LOG(P) ) + ( N - 2 )*LOG(R)\n               END IF\n            ELSE\n               R = EXP( ( LOG( (1-P)*N ) - LKN )*TWO/N )\n            END IF\n            R = SQRT(R)\n            RO = R\n            R = MVCHNC( LKN, N, P, R )\n            IF ( ABS( R - RO ) .GT. 1D-6 ) THEN\n               RO = R\n               R = MVCHNC( LKN, N, P, R )\n               IF ( ABS( R - RO ) .GT. 1D-6 ) R = MVCHNC( LKN, N, P, R )\n            END IF\n         END IF\n      ELSE\n         R = 0\n      END IF\n      MVCHNV = R\n      END\n*\n      DOUBLE PRECISION FUNCTION MVCHNC( LKN, N, P, R )\n*\n*     Third order Schroeder correction to R for MVCHNV\n*\n      INTEGER I, N\n      DOUBLE PRECISION P, R, LKN, DF, RR, RN, CHI, MVPHI\n      DOUBLE PRECISION LRP, TWO, AL, DL, AI, BI, CI, DI, EPS\n      PARAMETER ( LRP = -.22579135264472743235D0, TWO = 2, EPS = 1D-14 )\n*                 LRP =   LOG( SQRT( 2/PI ) )\n      RR = R*R\n      IF ( N .LT. 2 ) THEN\n         CHI = 2*MVPHI(-R)\n      ELSE IF ( N .LT. 100 ) THEN\n*\n*        Use standard Chi series\n*\n         RN = 1\n         DO I = N - 2, 2, -2\n            RN = 1 + RR*RN/I\n         END DO\n         RR = RR/2\n         IF ( MOD( N, 2 ) .EQ. 0 ) THEN\n            CHI = EXP(       LOG(   RN ) - RR )\n         ELSE\n            CHI = EXP( LRP + LOG( R*RN ) - RR ) + 2*MVPHI(-R)\n         ENDIF\n      ELSE\n         RR = RR/2\n         AL = N/TWO\n         CHI = EXP( -RR + AL*LOG(RR) + LKN + LOG(TWO)*( N - 2 )/2 )\n         IF ( RR .LT. AL + 1 ) THEN \n*\n*           Use Incomplete Gamma series\n*\n            DL = CHI\n            DO I = 1, 1000\n               DL = DL*RR/( AL + I ) \n               CHI = CHI + DL\n               IF ( ABS( DL*RR/( AL + I + 1 - RR ) ) .LT. EPS ) GO TO 10\n            END DO\n 10         CHI = 1 - CHI/AL\n         ELSE\n*\n*           Use Incomplete Gamma continued fraction\n*\n            BI = RR + 1 - AL\n            CI = 1/EPS\n            DI = BI\n            CHI = CHI/BI \n            DO I = 1, 250\n               AI = I*( AL - I )\n               BI = BI + 2\n               CI = BI + AI/CI\n               IF ( CI .EQ. 0 ) CI = EPS \n               DI = BI + AI/DI\n               IF ( DI .EQ. 0 ) DI = EPS \n               DL = CI/DI\n               CHI = CHI*DL\n               IF ( ABS( DL - 1 ) .LT. EPS ) GO TO 20\n            END DO\n         END IF\n      END IF\n 20   DF =  ( P - CHI )/EXP( LKN + ( N - 1 )*LOG(R) - RR )\n      MVCHNC = R - DF*( 1 - DF*( R - ( N - 1 )/R )/2 )   \n      END\n*\n      SUBROUTINE MVKBRV( NDIM, MINVLS, MAXVLS, NF, FUNSUB, \n     &                   ABSEPS, RELEPS, ABSERR, FINEST, INFORM )\n*\n*  Automatic Multidimensional Integration Subroutine\n*               \n*         AUTHOR: Alan Genz\n*                 Department of Mathematics\n*                 Washington State University\n*                 Pulman, WA 99164-3113\n*                 Email: AlanGenz@wsu.edu\n*\n*         Last Change: 12/15/00\n*\n*  MVKBRV computes an approximation to the integral\n*\n*      1  1     1\n*     I  I ... I       F(X)  dx(NDIM)...dx(2)dx(1)\n*      0  0     0\n*\n*    F(X) is a real NF-vector of integrands.\n*\n*  It uses randomized Korobov rules. The primary references are\n*   \"Randomization of Number Theoretic Methods for Multiple Integration\"\n*    R. Cranley and T.N.L. Patterson, SIAM J Numer Anal, 13, pp. 904-14,\n*  and \n*   \"Optimal Parameters for Multidimensional Integration\", \n*    P. Keast, SIAM J Numer Anal, 10, pp.831-838.\n*  If there are more than 100 variables, the remaining variables are\n*  integrated using the rules described in the reference\n*   \"On a Number-Theoretical Integration Method\"\n*   H. Niederreiter, Aequationes Mathematicae, 8(1972), pp. 304-11.\n*\n***************  Parameters ********************************************\n****** Input parameters\n*  NDIM    Number of variables, must exceed 1, but not exceed 100\n*  MINVLS  Integer minimum number of function evaluations allowed.\n*          MINVLS must not exceed MAXVLS.  If MINVLS < 0 then the\n*          routine assumes a previous call has been made with \n*          the same integrands and continues that calculation.\n*  MAXVLS  Integer maximum number of function evaluations allowed.\n*  NF      Number of integrands, must exceed 1, but not exceed 5000\n*  FUNSUB  EXTERNALly declared user defined integrand subroutine.\n*          It must have parameters ( NDIM, Z, NF, FUNVLS ), where \n*          Z is a real NDIM-vector and FUNVLS is a real NF-vector.\n*                                     \n*  ABSEPS  Required absolute accuracy.\n*  RELEPS  Required relative accuracy.\n****** Output parameters\n*  MINVLS  Actual number of function evaluations used.\n*  ABSERR  Maximum norm of estimated absolute accuracy of FINEST.\n*  FINEST  Estimated NF-vector of values of the integrals.\n*  INFORM  INFORM = 0 for normal exit, when \n*                     ABSERR <= MAX(ABSEPS, RELEPS*||FINEST||)\n*                  and \n*                     INTVLS <= MAXCLS.\n*          INFORM = 1 If MAXVLS was too small to obtain the required \n*          accuracy. In this case a value FINEST is returned with \n*          estimated absolute accuracy ABSERR.\n************************************************************************\n      EXTERNAL FUNSUB\n      DOUBLE PRECISION ABSEPS, RELEPS, FINEST(*), ABSERR, ONE\n      INTEGER NDIM, NF, MINVLS, MAXVLS, INFORM, NP, PLIM, KLIM,\n     &        NLIM, FLIM, SAMPLS, I, K, INTVLS, MINSMP, KMX\n      PARAMETER ( PLIM = 28, NLIM = 1000, KLIM = 100, FLIM = 5000 )\n      PARAMETER ( MINSMP = 8 )\n      INTEGER P(PLIM), C(PLIM,KLIM-1), PR(NLIM) \n      DOUBLE PRECISION DIFINT, FINVAL(FLIM), VARSQR(FLIM), VAREST(FLIM), \n     &     VARPRD, X(NLIM), R(NLIM), VK(NLIM), VALUES(FLIM), FS(FLIM)\n      PARAMETER ( ONE = 1 )\n      SAVE P, C, SAMPLS, NP, VAREST\n      INFORM = 1\n      INTVLS = 0\n      VARPRD = 0\n      IF ( MINVLS .GE. 0 ) THEN\n         DO K = 1, NF\n            FINEST(K) = 0\n            VAREST(K) = 0\n         END DO\n         SAMPLS = MINSMP \n         DO I = MIN( NDIM, 10 ), PLIM\n            NP = I\n            IF ( MINVLS .LT. 2*SAMPLS*P(I) ) GO TO 10\n         END DO\n         SAMPLS = MAX( MINSMP, MINVLS/( 2*P(NP) ) )\n      ENDIF\n 10   VK(1) = ONE/P(NP)\n      K = 1\n      DO I = 2, NDIM\n         IF ( I .LE. KLIM ) THEN\n            K = MOD( C(NP, MIN(NDIM-1,KLIM-1))*DBLE(K), DBLE(P(NP)) )\n            VK(I) = K*VK(1)\n         ELSE\n            VK(I) = INT( P(NP)*2**( DBLE(I-KLIM)/(NDIM-KLIM+1) ) )\n            VK(I) = MOD( VK(I)/P(NP), ONE )\n         END IF\n      END DO\n      DO K = 1, NF\n         FINVAL(K) = 0\n         VARSQR(K) = 0\n      END DO\n*\n      DO I = 1, SAMPLS\n         CALL MVKRSV( NDIM,KLIM,VALUES, P(NP),VK, NF,FUNSUB, X,R,PR,FS )\n         DO K = 1, NF\n            DIFINT = ( VALUES(K) - FINVAL(K) )/I\n            FINVAL(K) = FINVAL(K) + DIFINT\n            VARSQR(K) = ( I - 2 )*VARSQR(K)/I + DIFINT**2\n         END DO\n      END DO\n*\n      INTVLS = INTVLS + 2*SAMPLS*P(NP)\n      KMX = 1\n      DO K = 1, NF\n         VARPRD = VAREST(K)*VARSQR(K)\n         FINEST(K) = FINEST(K) + ( FINVAL(K) - FINEST(K) )/( 1+VARPRD )      \n         IF ( VARSQR(K) .GT. 0 ) VAREST(K) = ( 1 + VARPRD )/VARSQR(K)\n         IF ( ABS(FINEST(K)) .GT. ABS(FINEST(KMX)) ) KMX = K\n      END DO\n      ABSERR = 7*SQRT( VARSQR(KMX)/( 1 + VARPRD ) )/2\n      IF ( ABSERR .GT. MAX( ABSEPS, ABS(FINEST(KMX))*RELEPS ) ) THEN\n         IF ( NP .LT. PLIM ) THEN\n            NP = NP + 1\n         ELSE\n            SAMPLS = MIN( 3*SAMPLS/2, ( MAXVLS - INTVLS )/( 2*P(NP) ) ) \n            SAMPLS = MAX( MINSMP, SAMPLS )\n         ENDIF\n         IF ( INTVLS + 2*SAMPLS*P(NP) .LE. MAXVLS ) GO TO 10\n      ELSE\n         INFORM = 0\n      ENDIF\n      MINVLS = INTVLS\n*\n*    Optimal Parameters for Lattice Rules\n*\n      DATA P( 1),(C( 1,I),I = 1,99)/     31, 12, 2*9, 13, 8*12, 3*3, 12,\n     & 2*7, 9*12, 3*3, 12, 2*7, 9*12, 3*3, 12, 2*7, 9*12, 3*3, 12, 2*7,\n     & 8*12, 7, 3*3, 3*7, 21*3/\n      DATA P( 2),(C( 2,I),I = 1,99)/    47, 13, 11, 17, 10, 6*15,\n     & 22, 2*15, 3*6, 2*15, 9, 13, 3*2, 13, 2*11, 10, 9*15, 3*6, 2*15,\n     & 9, 13, 3*2, 13, 2*11, 10, 9*15, 3*6, 2*15, 9, 13, 3*2, 13, 2*11,\n     & 2*10, 8*15, 6, 2, 3, 2, 3, 12*2/\n      DATA P( 3),(C( 3,I),I = 1,99)/    73, 27, 28, 10, 2*11, 20,\n     & 2*11, 28, 2*13, 28, 3*13, 16*14, 2*31, 3*5, 31, 13, 6*11, 7*13,\n     & 16*14, 2*31, 3*5, 11, 13, 7*11, 2*13, 11, 13, 4*5, 14, 13, 8*5/\n      DATA P( 4),(C( 4,I),I = 1,99)/   113, 35, 2*27, 36, 22, 2*29,\n     & 20, 45, 3*5, 16*21, 29, 10*17, 12*23, 21, 27, 3*3, 24, 2*27,\n     & 17, 3*29, 17, 4*5, 16*21, 3*17, 6, 2*17, 6, 3, 2*6, 5*3/\n      DATA P( 5),(C( 5,I),I = 1,99)/   173, 64, 66, 2*28, 2*44, 55,\n     & 67, 6*10, 2*38, 5*10, 12*49, 2*38, 31, 2*4, 31, 64, 3*4, 64,\n     & 6*45, 19*66, 11, 9*66, 45, 11, 7, 3, 3*2, 27, 5, 2*3, 2*5, 7*2/\n      DATA P( 6),(C( 6,I),I = 1,99)/   263, 111, 42, 54, 118, 20,\n     & 2*31, 72, 17, 94, 2*14, 11, 3*14, 94, 4*10, 7*14, 3*11, 7*8,\n     & 5*18, 113, 2*62, 2*45, 17*113, 2*63, 53, 63, 15*67, 5*51, 12,\n     & 51, 12, 51, 5, 2*3, 2*2, 5/\n      DATA P( 7),(C( 7,I),I = 1,99)/   397, 163, 154, 83, 43, 82,\n     & 92, 150, 59, 2*76, 47, 2*11, 100, 131, 6*116, 9*138, 21*101,\n     & 6*116, 5*100, 5*138, 19*101, 8*38, 5*3/\n      DATA P( 8),(C( 8,I),I = 1,99)/   593, 246, 189, 242, 102,\n     & 2*250, 102, 250, 280, 118, 196, 118, 191, 215, 2*121,\n     & 12*49, 34*171, 8*161, 17*14, 6*10, 103, 4*10, 5/\n      DATA P( 9),(C( 9,I),I = 1,99)/   907, 347, 402, 322, 418,\n     & 215, 220, 3*339, 337, 218, 4*315, 4*167, 361, 201, 11*124,\n     & 2*231, 14*90, 4*48, 23*90, 10*243, 9*283, 16, 283, 16, 2*283/\n      DATA P(10),(C(10,I),I = 1,99)/  1361, 505, 220, 601, 644,\n     & 612, 160, 3*206, 422, 134, 518, 2*134, 518, 652, 382,\n     & 206, 158, 441, 179, 441, 56, 2*559, 14*56, 2*101, 56,\n     & 8*101, 7*193, 21*101, 17*122, 4*101/\n      DATA P(11),(C(11,I),I = 1,99)/  2053, 794, 325, 960, 528,\n     & 2*247, 338, 366, 847, 2*753, 236, 2*334, 461, 711, 652,\n     & 3*381, 652, 7*381, 226, 7*326, 126, 10*326, 2*195, 19*55,\n     & 7*195, 11*132, 13*387/\n      DATA P(12),(C(12,I),I = 1,99)/  3079, 1189, 888, 259, 1082, 725,      \n     & 811, 636, 965, 2*497, 2*1490, 392, 1291, 2*508, 2*1291, 508,\n     & 1291, 2*508, 4*867, 934, 7*867, 9*1284, 4*563, 3*1010, 208,\n     & 838, 3*563, 2*759, 564, 2*759, 4*801, 5*759, 8*563, 22*226/\n      DATA P(13),(C(13,I),I = 1,99)/  4621, 1763, 1018, 1500, 432,\n     & 1332, 2203, 126, 2240, 1719, 1284, 878, 1983, 4*266,\n     & 2*747, 2*127, 2074, 127, 2074, 1400, 10*1383, 1400, 7*1383,\n     & 507, 4*1073, 5*1990, 9*507, 17*1073, 6*22, 1073, 6*452, 318,\n     & 4*301, 2*86, 15/\n      DATA P(14),(C(14,I),I = 1,99)/  6947, 2872, 3233, 1534, 2941,\n     & 2910, 393, 1796, 919, 446, 2*919, 1117, 7*103, 2311, 3117, 1101,\n     & 2*3117, 5*1101, 8*2503, 7*429, 3*1702, 5*184, 34*105, 13*784/\n      DATA P(15),(C(15,I),I = 1,99)/ 10427, 4309, 3758, 4034, 1963,\n     & 730, 642, 1502, 2246, 3834, 1511, 2*1102, 2*1522, 2*3427,\n     & 3928, 2*915, 4*3818, 3*4782, 3818, 4782, 2*3818, 7*1327, 9*1387,\n     & 13*2339, 18*3148, 3*1776, 3*3354, 925, 2*3354, 5*925, 8*2133/\n      DATA P(16),(C(16,I),I = 1,99)/ 15641, 6610, 6977, 1686, 3819,\n     & 2314, 5647, 3953, 3614, 5115, 2*423, 5408, 7426, 2*423,\n     & 487, 6227, 2660, 6227, 1221, 3811, 197, 4367, 351,\n     & 1281, 1221, 3*351, 7245, 1984, 6*2999, 3995, 4*2063, 1644,\n     & 2063, 2077, 3*2512, 4*2077, 19*754, 2*1097, 4*754, 248, 754,\n     & 4*1097, 4*222, 754,11*1982/\n      DATA P(17),(C(17,I),I = 1,99)/ 23473, 9861, 3647, 4073, 2535,\n     & 3430, 9865, 2830, 9328, 4320, 5913, 10365, 8272, 3706, 6186,\n     & 3*7806, 8610, 2563, 2*11558, 9421, 1181, 9421, 3*1181, 9421,\n     & 2*1181, 2*10574, 5*3534, 3*2898, 3450, 7*2141, 15*7055, 2831,\n     & 24*8204, 3*4688, 8*2831/\n      DATA P(18),(C(18,I),I = 1,99)/ 35221, 10327, 7582, 7124, 8214,\n     & 9600, 10271, 10193, 10800, 9086, 2365, 4409, 13812,\n     & 5661, 2*9344, 10362, 2*9344, 8585, 11114, 3*13080, 6949,\n     & 3*3436, 13213, 2*6130, 2*8159, 11595, 8159, 3436, 18*7096,\n     & 4377, 7096, 5*4377, 2*5410, 32*4377, 2*440, 3*1199/\n      DATA P(19),(C(19,I),I = 1,99)/ 52837, 19540, 19926, 11582,\n     & 11113, 24585, 8726, 17218, 419, 3*4918, 15701, 17710,\n     & 2*4037, 15808, 11401, 19398, 2*25950, 4454, 24987, 11719,\n     & 8697, 5*1452, 2*8697, 6436, 21475, 6436, 22913, 6434, 18497,\n     & 4*11089, 2*3036, 4*14208, 8*12906, 4*7614, 6*5021, 24*10145,\n     & 6*4544, 4*8394/    \n      DATA P(20),(C(20,I),I = 1,99)/ 79259, 34566, 9579, 12654,\n     & 26856, 37873, 38806, 29501, 17271, 3663, 10763, 18955,\n     & 1298, 26560, 2*17132, 2*4753, 8713, 18624, 13082, 6791,\n     & 1122, 19363, 34695, 4*18770, 15628, 4*18770, 33766, 6*20837,\n     & 5*6545, 14*12138, 5*30483, 19*12138, 9305, 13*11107, 2*9305/\n      DATA P(21),(C(21,I),I = 1,99)/118891, 31929, 49367, 10982, 3527,\n     & 27066, 13226, 56010, 18911, 40574, 2*20767, 9686, 2*47603, \n     & 2*11736, 41601, 12888, 32948, 30801, 44243, 2*53351, 16016, \n     & 2*35086, 32581, 2*2464, 49554, 2*2464, 2*49554, 2464, 81, 27260, \n     & 10681, 7*2185, 5*18086, 2*17631, 3*18086, 37335, 3*37774, \n     & 13*26401, 12982, 6*40398, 3*3518, 9*37799, 4*4721, 4*7067/\n      DATA P(22),(C(22,I),I = 1,99)/178349, 40701, 69087, 77576, 64590, \n     & 39397, 33179, 10858, 38935, 43129, 2*35468, 5279, 2*61518, 27945,\n     & 2*70975, 2*86478, 2*20514, 2*73178, 2*43098, 4701,\n     & 2*59979, 58556, 69916, 2*15170, 2*4832, 43064, 71685, 4832,\n     & 3*15170, 3*27679, 2*60826, 2*6187, 5*4264, 45567, 4*32269,\n     & 9*62060, 13*1803, 12*51108, 2*55315, 5*54140, 13134/\n      DATA P(23),(C(23,I),I = 1,99)/267523, 103650, 125480, 59978,\n     & 46875, 77172, 83021, 126904, 14541, 56299, 43636, 11655,\n     & 52680, 88549, 29804, 101894, 113675, 48040, 113675,\n     & 34987, 48308, 97926, 5475, 49449, 6850, 2*62545, 9440,\n     & 33242, 9440, 33242, 9440, 33242, 9440, 62850, 3*9440,\n     & 3*90308, 9*47904, 7*41143, 5*36114, 24997, 14*65162, 7*47650,\n     & 7*40586, 4*38725, 5*88329/\n      DATA P(24),(C(24,I),I = 1,99)/401287, 165843, 90647, 59925,\n     & 189541, 67647, 74795, 68365, 167485, 143918, 74912,\n     & 167289, 75517, 8148, 172106, 126159,3*35867, 121694,\n     & 52171, 95354, 2*113969, 76304, 2*123709, 144615, 123709,\n     & 2*64958, 32377, 2*193002, 25023, 40017, 141605, 2*189165,\n     & 141605, 2*189165, 3*141605, 189165, 20*127047, 10*127785,\n     & 6*80822, 16*131661, 7114, 131661/\n      DATA P(25),(C(25,I),I = 1,99)/601943, 130365, 236711, 110235,\n     & 125699, 56483, 93735, 234469, 60549, 1291, 93937,\n     & 245291, 196061, 258647, 162489, 176631, 204895, 73353,\n     & 172319, 28881, 136787,2*122081, 275993, 64673, 3*211587,\n     & 2*282859, 211587, 242821, 3*256865, 122203, 291915, 122203,\n     & 2*291915, 122203, 2*25639, 291803, 245397, 284047,\n     & 7*245397, 94241, 2*66575, 19*217673, 10*210249, 15*94453/\n      DATA P(26),(C(26,I),I = 1,99)/902933, 333459, 375354, 102417,            \n     & 383544, 292630, 41147, 374614, 48032, 435453, 281493, 358168, \n     & 114121, 346892, 238990, 317313, 164158, 35497, 2*70530, 434839,  \n     & 3*24754, 393656, 2*118711, 148227, 271087, 355831, 91034, \n     & 2*417029, 2*91034, 417029, 91034, 2*299843, 2*413548, 308300,  \n     & 3*413548, 3*308300, 413548, 5*308300, 4*15311, 2*176255, 6*23613, \n     & 172210, 4* 204328, 5*121626, 5*200187, 2*121551, 12*248492, \n     & 5*13942/\n      DATA P(27), (C(27,I), I = 1,99)/ 1354471, 500884, 566009, 399251,\n     & 652979, 355008, 430235, 328722, 670680, 2*405585, 424646, \n     & 2*670180, 641587, 215580, 59048, 633320, 81010, 20789, 2*389250,  \n     & 2*638764, 2*389250, 398094, 80846, 2*147776, 296177, 2*398094,  \n     & 2*147776, 396313, 3*578233, 19482, 620706, 187095, 620706, \n     & 187095, 126467, 12*241663, 321632, 2*23210, 3*394484, 3*78101, \n     & 19*542095, 3*277743, 12*457259/\n      DATA P(28), (C(28,I), I = 1, 99)/ 2031713, 858339, 918142, 501970, \n     & 234813, 460565, 31996, 753018, 256150, 199809, 993599, 245149,      \n     & 794183, 121349, 150619, 376952, 2*809123, 804319, 67352, 969594, \n     & 434796, 969594, 804319, 391368, 761041, 754049, 466264, 2*754049,\n     & 466264, 2*754049, 282852, 429907, 390017, 276645, 994856, 250142, \n     & 144595, 907454, 689648, 4*687580, 978368, 687580, 552742, 105195, \n     & 942843, 768249, 4*307142, 7*880619, 11*117185, 11*60731,  \n     & 4*178309, 8*74373, 3*214965/\n*\n      END\n*\n      SUBROUTINE MVKRSV( NDIM,KL,VALUES,PRIME,VK, NF,FUNSUB, X,R,PR,FS )\n*\n*     For lattice rule sums\n*\n      INTEGER NDIM, NF, PRIME, KL, K, J, JP, PR(*)\n      DOUBLE PRECISION VALUES(*), VK(*), FS(*), X(*), R(*), MVUNI\n      DO J = 1, NF\n         VALUES(J) = 0\n      END DO\n*\n*     Determine random shifts for each variable; scramble lattice rule\n*\n      DO J = 1, NDIM\n         R(J) = MVUNI()\n         IF ( J .LT. KL ) THEN\n            JP = 1 + J*R(J)\n            IF ( JP .LT. J ) PR(J) = PR(JP)\n            PR(JP) = J\n         ELSE \n            PR(J) = J\n         END IF\n      END DO\n*\n*     Compute latice rule sums\n*\n      DO K = 1, PRIME\n         DO J = 1, NDIM\n            R(J) = R(J) + VK(PR(J))\n            IF ( R(J) .GT. 1 ) R(J) = R(J) - 1\n            X(J) = ABS( 2*R(J) - 1 )\n         END DO\n         CALL FUNSUB( NDIM, X, NF, FS )\n         DO J = 1, NF\n            VALUES(J) = VALUES(J) + ( FS(J) - VALUES(J) )/( 2*K-1 )      \n         END DO\n         DO J = 1, NDIM\n            X(J) = 1 - X(J)\n         END DO\n         CALL FUNSUB( NDIM, X, NF, FS )\n         DO J = 1, NF\n            VALUES(J) = VALUES(J) + ( FS(J) - VALUES(J) )/( 2*K )      \n         END DO\n      END DO\n*\n      END\n*\n      DOUBLE PRECISION FUNCTION MVUNI()\n*\n*     Uniform (0,1) random number generator\n*\n*     use R's random number generator directly\n*     the way `Writing R extentions' advertises.\n*\n      DOUBLE PRECISION unifrnd, x\n\n      x = unifrnd()\n      MVUNI = x\n      END\n\n"
  },
  {
    "path": "external_libs/mvtnorm/mvtnorm.cpp",
    "content": "\n#include <stdlib.h>\n#include <stdio.h>\n#include \"mvtnorm.h\"\n\n// infinity bounds\nconst static int INFIN_BOUND_NORMAL = 2;        // (..., ...)\nconst static int INFIN_BOUND_UPPER = 1;         // (..., inf)\nconst static int INFIN_BOUND_LOWER = 0;         // (-inf, ..)\nconst static int INFIN_BOUND_LOWER_UPPER = -1;  // (-inf, inf)\n\n// returns the probability between 0 and 1 \n// returns a negative value if errors\n// -1 = completed with error > abseps\n// -2 = n greater 1000 or n < 1\n// -3 = matrix not positive semidefinite\ndouble pmvnorm(int* n, int* nu, double* lower, double* upper,\n    int* infin,  double* correl,  double* delta,   \n    int* maxpts, double* abseps,  double* releps,  \n    double* error, double* value, int* inform)\n{\n  mvtdst_ (n, nu,\n      lower, upper, infin, correl, delta,\n      maxpts, abseps, releps,\n      error, value, inform);\n  /* printf (\"error = %g, value = %g, inform = %d, abseps = %g \\n\", *error, *value, *inform, *abseps); */\n\n  switch (*inform) {\n    case 0:\n      return *value;\n    case 1: \n      return -1.0;\n    case 2:\n      return -2.0;\n    case 3:\n      return -3.0;\n  };\n\n  return *value;\n};\n\n// The complement CDF of X ~ MVN(0, Correlation Matrix)\ndouble pmvnorm_complement(int n,\n    int maxpts, double abseps,\n    double* bound, // all zeros if CDF = P(X <= 0)\n    double* cmat, // correlation matrix with only lower-diagonal entires stored: (2,1), (3,1), (3,2)...\n    double* error)\n{\n  int nu_ = 0;\n  int maxpts_ = maxpts;\n  double abseps_ = abseps;\n  double releps_ = 0;\n\n  double* upper = new double[n];\n  int* infin = new int[n];\n  double* delta = new double[n];\n\n  for (size_t i = 0; i < n; i++) {\n    infin[i] = 1; // (-inf, bound]\n    upper[i] = 0.0;\n    delta[i] = 0.0;\n  }\n\n  // values to return\n  double value_ = 0.0;\n  int inform_ = 0.0;\n\n  double ret = pmvnorm(&n, &nu_, \n      bound, upper, infin, cmat, delta, \n      &maxpts_, &abseps_, &releps_, error, &value_, &inform_);\n\n  delete[] (upper);\n  delete[] (infin);\n  delete[] (delta);\n\n  return ret;\n}\n"
  },
  {
    "path": "external_libs/mvtnorm/mvtnorm.h",
    "content": "#ifndef _MVT_H_\n#define _MVT_H_\n\n#ifdef __cplusplus\nextern \"C\" {\n\n  extern void mvtdst_(int* n, int* nu, double* lower, double* upper, int* infin,\n      double* correl, double* delta, int* maxpts, double* abseps, double* releps,\n      double* error, double* value, int* inform);\n\n#endif\n\n  double pmvnorm(int* n, int* nu, double* lower, double* upper,\n      int* infin,  double* correl,  double* delta,   \n      int* maxpts, double* abseps,  double* releps,  \n      double* error, double* value, int* inform);    \n\n  double pmvnorm_complement(int n, int maxpts, double abseps, double* bound,\n      double* cmat, double* error);\n\n#ifdef __cplusplus\n}\n#endif\n#endif /* _MVT_H_ */\n"
  },
  {
    "path": "external_libs/mvtnorm/randomF77.c",
    "content": "/* $Id: randomF77.c 95 2002-11-22 13:24:41Z hothorn $\n*\n*  wrapper for calling R's random number generator from\n*  the original FORTRAN code\n*\n*/\n\n/* #include <R.h> */\n\n/* void F77_SUB(rndstart)(void) { GetRNGstate(); } */\n/* void F77_SUB(rndend)(void) { PutRNGstate(); } */\n/* double F77_SUB(unifrnd)(void) { return unif_rand(); } */\n\n#include <stdlib.h>\n\nvoid rndstart_(void) {};\nvoid rndend_(void) {};\ndouble unifrnd_(void) { return 1.0 * rand()/RAND_MAX; };\n"
  },
  {
    "path": "external_libs/pgenlib/LICENSE",
    "content": "                   GNU LESSER GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n\n  This version of the GNU Lesser General Public License incorporates\nthe terms and conditions of version 3 of the GNU General Public\nLicense, supplemented by the additional permissions listed below.\n\n  0. Additional Definitions.\n\n  As used herein, \"this License\" refers to version 3 of the GNU Lesser\nGeneral Public License, and the \"GNU GPL\" refers to version 3 of the GNU\nGeneral Public License.\n\n  \"The Library\" refers to a covered work governed by this License,\nother than an Application or a Combined Work as defined below.\n\n  An \"Application\" is any work that makes use of an interface provided\nby the Library, but which is not otherwise based on the Library.\nDefining a subclass of a class defined by the Library is deemed a mode\nof using an interface provided by the Library.\n\n  A \"Combined Work\" is a work produced by combining or linking an\nApplication with the Library.  The particular version of the Library\nwith which the Combined Work was made is also called the \"Linked\nVersion\".\n\n  The \"Minimal Corresponding Source\" for a Combined Work means the\nCorresponding Source for the Combined Work, excluding any source code\nfor portions of the Combined Work that, considered in isolation, are\nbased on the Application, and not on the Linked Version.\n\n  The \"Corresponding Application Code\" for a Combined Work means the\nobject code and/or source code for the Application, including any data\nand utility programs needed for reproducing the Combined Work from the\nApplication, but excluding the System Libraries of the Combined Work.\n\n  1. Exception to Section 3 of the GNU GPL.\n\n  You may convey a covered work under sections 3 and 4 of this License\nwithout being bound by section 3 of the GNU GPL.\n\n  2. Conveying Modified Versions.\n\n  If you modify a copy of the Library, and, in your modifications, a\nfacility refers to a function or data to be supplied by an Application\nthat uses the facility (other than as an argument passed when the\nfacility is invoked), then you may convey a copy of the modified\nversion:\n\n   a) under this License, provided that you make a good faith effort to\n   ensure that, in the event an Application does not supply the\n   function or data, the facility still operates, and performs\n   whatever part of its purpose remains meaningful, or\n\n   b) under the GNU GPL, with none of the additional permissions of\n   this License applicable to that copy.\n\n  3. Object Code Incorporating Material from Library Header Files.\n\n  The object code form of an Application may incorporate material from\na header file that is part of the Library.  You may convey such object\ncode under terms of your choice, provided that, if the incorporated\nmaterial is not limited to numerical parameters, data structure\nlayouts and accessors, or small macros, inline functions and templates\n(ten or fewer lines in length), you do both of the following:\n\n   a) Give prominent notice with each copy of the object code that the\n   Library is used in it and that the Library and its use are\n   covered by this License.\n\n   b) Accompany the object code with a copy of the GNU GPL and this license\n   document.\n\n  4. Combined Works.\n\n  You may convey a Combined Work under terms of your choice that,\ntaken together, effectively do not restrict modification of the\nportions of the Library contained in the Combined Work and reverse\nengineering for debugging such modifications, if you also do each of\nthe following:\n\n   a) Give prominent notice with each copy of the Combined Work that\n   the Library is used in it and that the Library and its use are\n   covered by this License.\n\n   b) Accompany the Combined Work with a copy of the GNU GPL and this license\n   document.\n\n   c) For a Combined Work that displays copyright notices during\n   execution, include the copyright notice for the Library among\n   these notices, as well as a reference directing the user to the\n   copies of the GNU GPL and this license document.\n\n   d) Do one of the following:\n\n       0) Convey the Minimal Corresponding Source under the terms of this\n       License, and the Corresponding Application Code in a form\n       suitable for, and under terms that permit, the user to\n       recombine or relink the Application with a modified version of\n       the Linked Version to produce a modified Combined Work, in the\n       manner specified by section 6 of the GNU GPL for conveying\n       Corresponding Source.\n\n       1) Use a suitable shared library mechanism for linking with the\n       Library.  A suitable mechanism is one that (a) uses at run time\n       a copy of the Library already present on the user's computer\n       system, and (b) will operate properly with a modified version\n       of the Library that is interface-compatible with the Linked\n       Version.\n\n   e) Provide Installation Information, but only if you would otherwise\n   be required to provide such information under section 6 of the\n   GNU GPL, and only to the extent that such information is\n   necessary to install and execute a modified version of the\n   Combined Work produced by recombining or relinking the\n   Application with a modified version of the Linked Version. (If\n   you use option 4d0, the Installation Information must accompany\n   the Minimal Corresponding Source and Corresponding Application\n   Code. If you use option 4d1, you must provide the Installation\n   Information in the manner specified by section 6 of the GNU GPL\n   for conveying Corresponding Source.)\n\n  5. Combined Libraries.\n\n  You may place library facilities that are a work based on the\nLibrary side by side in a single library together with other library\nfacilities that are not Applications and are not covered by this\nLicense, and convey such a combined library under terms of your\nchoice, if you do both of the following:\n\n   a) Accompany the combined library with a copy of the same work based\n   on the Library, uncombined with any other library facilities,\n   conveyed under the terms of this License.\n\n   b) Give prominent notice with the combined library that part of it\n   is a work based on the Library, and explaining where to find the\n   accompanying uncombined form of the same work.\n\n  6. Revised Versions of the GNU Lesser General Public License.\n\n  The Free Software Foundation may publish revised and/or new versions\nof the GNU Lesser General Public License from time to time. Such new\nversions will be similar in spirit to the present version, but may\ndiffer in detail to address new problems or concerns.\n\n  Each version is given a distinguishing version number. If the\nLibrary as you received it specifies that a certain numbered version\nof the GNU Lesser General Public License \"or any later version\"\napplies to it, you have the option of following the terms and\nconditions either of that published version or of any later version\npublished by the Free Software Foundation. If the Library as you\nreceived it does not specify a version number of the GNU Lesser\nGeneral Public License, you may choose any version of the GNU Lesser\nGeneral Public License ever published by the Free Software Foundation.\n\n  If the Library as you received it specifies that a proxy can decide\nwhether future versions of the GNU Lesser General Public License shall\napply, that proxy's public statement of acceptance of any version is\npermanent authorization for you to choose that version for the\nLibrary.\n"
  },
  {
    "path": "external_libs/pgenlib/Makefile",
    "content": "#\n# Makefile to compile PGEN library\n#\nAR         ?= ar\nCXX        ?= g++\nCXXFLAGS    = -O3 -Wall -std=c++11\nCFLAGS      = \n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S),Darwin)\n CXXFLAGS += -arch x86_64\nendif\n\nOBJECTS       = $(patsubst %.cc,%.o,$(wildcard ./include/*.cc)) $(patsubst %.cpp,%.o,$(wildcard *.cpp))  $(patsubst %.cc,%.o,$(wildcard *.cc))\nINC           =  -I./simde/ -I./include/\n\npgenlib.a: ${OBJECTS}\n\t${AR} rcs $@ $^ \n\n%.o: %.cpp\n\t${CXX} ${CXXFLAGS} -o $@ -c $< ${INC}\n\n%.o: %.cc\n\t${CXX} ${CXXFLAGS} -o $@ -c $< ${INC}\n\nclean:\n\trm -f *.o *.a ./include/*.o\n"
  },
  {
    "path": "external_libs/pgenlib/include/pgenlib_misc.cc",
    "content": "// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n#include \"pgenlib_misc.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\n#ifdef USE_AVX2\nvoid CopyNyparrNonemptySubset(const uintptr_t* __restrict raw_nyparr, const uintptr_t* __restrict subset_mask, uint32_t raw_nyparr_entry_ct, uint32_t subset_entry_ct, uintptr_t* __restrict output_nyparr) {\n  if (subset_entry_ct == raw_nyparr_entry_ct) {\n    memcpy(output_nyparr, raw_nyparr, DivUp(subset_entry_ct, kBitsPerWordD2) * sizeof(intptr_t));\n    ZeroTrailingNyps(subset_entry_ct, output_nyparr);\n    return;\n  }\n  assert(subset_entry_ct);\n  uintptr_t cur_output_word = 0;\n\n  uintptr_t* output_nyparr_iter = output_nyparr;\n\n  uintptr_t* output_nyparr_last = &(output_nyparr[subset_entry_ct / kBitsPerWordD2]);\n  const uint32_t word_write_shift_end = 2 * (subset_entry_ct % kBitsPerWordD2);\n  uint32_t word_write_shift = 0;\n  for (uint32_t subset_mask_widx = 0; ; ++subset_mask_widx) {\n    const uintptr_t cur_include_word = subset_mask[subset_mask_widx];\n    if (cur_include_word) {\n      uint32_t cur_include_halfword = S_CAST(Halfword, cur_include_word);\n      for (uint32_t wordhalf_idx = 0; ; ++wordhalf_idx) {\n        if (cur_include_halfword) {\n          uintptr_t extracted_bits = raw_nyparr[subset_mask_widx * 2 + wordhalf_idx];\n          uint32_t set_bit_ct = kBitsPerWord;\n          if (cur_include_halfword != UINT32_MAX) {\n            const uintptr_t pext_mask = 3 * UnpackHalfwordToWord(cur_include_halfword);\n            extracted_bits = _pext_u64(extracted_bits, pext_mask);\n            set_bit_ct = PopcountWord(pext_mask);\n          }\n          cur_output_word |= extracted_bits << word_write_shift;\n          word_write_shift += set_bit_ct;\n          if (word_write_shift >= kBitsPerWord) {\n            *output_nyparr_iter++ = cur_output_word;\n            word_write_shift -= kBitsPerWord;\n            cur_output_word = 0;\n            if (word_write_shift) {\n              cur_output_word = extracted_bits >> (set_bit_ct - word_write_shift);\n            }\n          }\n        }\n        if (wordhalf_idx) {\n          break;\n        }\n        cur_include_halfword = cur_include_word >> kBitsPerWordD2;\n      }\n      if (output_nyparr_iter == output_nyparr_last) {\n        if (word_write_shift == word_write_shift_end) {\n          if (word_write_shift_end) {\n            *output_nyparr_last = cur_output_word;\n          }\n          return;\n        }\n      }\n    }\n  }\n}\n\n// bit_idx_start assumed to be < kBitsPerWord\nvoid CopyGenomatchSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict genoarr, uintptr_t match_word, uint32_t write_bit_idx_start, uint32_t bit_ct, void* __restrict output) {\n  const uint32_t bit_idx_end = bit_ct + write_bit_idx_start;\n  const uint32_t bit_idx_end_lowbits = bit_idx_end % kBitsPerWord;\n  const Halfword* raw_bitarr_alias = DowncastKWToHW(raw_bitarr);\n  unsigned char* output_biter = S_CAST(unsigned char*, output);\n  unsigned char* output_last = &(output_biter[(bit_idx_end / kBitsPerWord) * kBytesPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = write_bit_idx_start;\n  while ((output_biter != output_last) || (write_idx_lowbits != bit_idx_end_lowbits)) {\n    uintptr_t cur_mask_word;\n    // sparse genoarr optimization\n    // guaranteed to terminate since there's at least one more set bit\n    do {\n      // todo: try reading two genoarr words at a time.  would need to be very\n      // careful with the possible trailing word, though.\n      // more important to optimize this function now that regular phased-call\n      // handling code is using it.\n      cur_mask_word = genoarr[++read_widx] ^ match_word;\n      cur_mask_word = (~(cur_mask_word | (cur_mask_word >> 1))) & kMask5555;\n    } while (!cur_mask_word);\n    uintptr_t extracted_bits = raw_bitarr_alias[read_widx];\n    uint32_t set_bit_ct = kBitsPerWordD2;\n    if (cur_mask_word != kMask5555) {\n      const uintptr_t cur_mask_hw = PackWordToHalfword(cur_mask_word);\n      set_bit_ct = PopcountWord(cur_mask_word);\n      extracted_bits = _pext_u64(extracted_bits, cur_mask_hw);\n    }\n    cur_output_word |= extracted_bits << write_idx_lowbits;\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + set_bit_ct;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      AppendW(cur_output_word, &output_biter);\n      // ...and these are the bits that fell off\n      // impossible for write_idx_lowbits to be zero here\n      cur_output_word = extracted_bits >> (kBitsPerWord - write_idx_lowbits);\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    CopyToUnalignedW(output_biter, &cur_output_word);\n  }\n}\n\n// Variant of ExpandBytearr() which is based off a target 2-bit value instead\n// of single expand_mask bits.  expand_size must be the number of instances of\n// the target value in genovec.\nvoid ExpandBytearrFromGenoarr(const void* __restrict compact_bitarr, const uintptr_t* __restrict genoarr, uintptr_t match_word, uint32_t genoword_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target) {\n  const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;\n  const uint32_t leading_byte_ct = 1 + (expand_sizex_m1 % kBitsPerWord) / CHAR_BIT;\n  const uint32_t genoword_ct_m1 = genoword_ct - 1;\n  uintptr_t compact_word = SubwordLoad(compact_bitarr, leading_byte_ct) >> read_start_bit;\n  const unsigned char* compact_bitarr_biter = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  uint32_t compact_idx_lowbits = read_start_bit + CHAR_BIT * (sizeof(intptr_t) - leading_byte_ct);\n  for (uint32_t widx = 0; ; widx += 2) {\n    uintptr_t mask_word;\n    if (widx >= genoword_ct_m1) {\n      if (widx > genoword_ct_m1) {\n        return;\n      }\n      mask_word = 0;\n    } else {\n      const uintptr_t geno_word1 = genoarr[widx + 1] ^ match_word;\n      mask_word = PackWordToHalfwordMask5555(~(geno_word1 | (geno_word1 >> 1)));\n      mask_word = mask_word << 32;\n    }\n    const uintptr_t geno_word0 = genoarr[widx] ^ match_word;\n    mask_word |= PackWordToHalfwordMask5555(~(geno_word0 | (geno_word0 >> 1)));\n    uintptr_t write_word = 0;\n    if (mask_word) {\n      const uint32_t mask_set_ct = PopcountWord(mask_word);\n      uint32_t next_compact_idx_lowbits = compact_idx_lowbits + mask_set_ct;\n      if (next_compact_idx_lowbits <= kBitsPerWord) {\n        write_word = _pdep_u64(compact_word, mask_word);\n        if (mask_set_ct != kBitsPerWord) {\n          compact_word = compact_word >> mask_set_ct;\n        } else {\n          // avoid nasal demons\n          compact_word = 0;\n        }\n      } else {\n        uintptr_t next_compact_word;\n        CopyFromUnalignedIncrW(&next_compact_word, &compact_bitarr_biter);\n        next_compact_idx_lowbits -= kBitsPerWord;\n        compact_word |= next_compact_word << (kBitsPerWord - compact_idx_lowbits);\n        write_word = _pdep_u64(compact_word, mask_word);\n        if (next_compact_idx_lowbits != kBitsPerWord) {\n          compact_word = next_compact_word >> next_compact_idx_lowbits;\n        } else {\n          compact_word = 0;\n        }\n      }\n      compact_idx_lowbits = next_compact_idx_lowbits;\n    }\n    target[widx / 2] = write_word;\n  }\n}\n#else  // !USE_AVX2\nvoid CopyNyparrNonemptySubset(const uintptr_t* __restrict raw_nyparr, const uintptr_t* __restrict subset_mask, uint32_t raw_nyparr_entry_ct, uint32_t subset_entry_ct, uintptr_t* __restrict output_nyparr) {\n  if (subset_entry_ct == raw_nyparr_entry_ct) {\n    // subset_mask may be nullptr in this case\n    memcpy(output_nyparr, raw_nyparr, DivUp(subset_entry_ct, kBitsPerWordD2) * sizeof(intptr_t));\n    ZeroTrailingNyps(subset_entry_ct, output_nyparr);\n    return;\n  }\n  assert(subset_entry_ct);\n  assert(raw_nyparr_entry_ct >= subset_entry_ct);\n  uintptr_t cur_output_word = 0;\n\n  uintptr_t* output_nyparr_iter = output_nyparr;\n\n  uintptr_t* output_nyparr_last = &(output_nyparr[subset_entry_ct / kBitsPerWordD2]);\n  const uint32_t word_write_halfshift_end = subset_entry_ct % kBitsPerWordD2;\n  uint32_t word_write_halfshift = 0;\n  // if <= 2/3-filled, use sparse copy algorithm\n  // (tried CopyBitarrSubset() approach, that actually worsened things)\n  if (subset_entry_ct * (3 * k1LU) <= raw_nyparr_entry_ct * (2 * k1LU)) {\n    for (uint32_t subset_mask_widx = 0; ; ++subset_mask_widx) {\n      const uintptr_t cur_include_word = subset_mask[subset_mask_widx];\n      if (cur_include_word) {\n        uint32_t cur_include_halfword = S_CAST(Halfword, cur_include_word);\n        for (uint32_t wordhalf_idx = 0; ; ++wordhalf_idx) {\n          if (cur_include_halfword) {\n            uintptr_t raw_nyparr_word = raw_nyparr[subset_mask_widx * 2 + wordhalf_idx];\n            do {\n              uint32_t rqa_idx_lowbits = ctzu32(cur_include_halfword);\n              cur_output_word |= ((raw_nyparr_word >> (rqa_idx_lowbits * 2)) & 3) << (word_write_halfshift * 2);\n              if (++word_write_halfshift == kBitsPerWordD2) {\n                *output_nyparr_iter++ = cur_output_word;\n                word_write_halfshift = 0;\n                cur_output_word = 0;\n              }\n              cur_include_halfword &= cur_include_halfword - 1;\n            } while (cur_include_halfword);\n          }\n          if (wordhalf_idx) {\n            break;\n          }\n          cur_include_halfword = cur_include_word >> kBitsPerWordD2;\n        }\n        if (output_nyparr_iter == output_nyparr_last) {\n          if (word_write_halfshift == word_write_halfshift_end) {\n            if (word_write_halfshift_end) {\n              *output_nyparr_last = cur_output_word;\n            }\n            return;\n          }\n        }\n      }\n    }\n  }\n  // blocked copy\n  const uintptr_t* raw_nyparr_iter = raw_nyparr;\n  for (; ; ++subset_mask) {\n    const uintptr_t cur_include_word = *subset_mask;\n    uintptr_t cur_include_halfword = S_CAST(Halfword, cur_include_word);\n    for (uint32_t wordhalf_idx = 0; ; ++wordhalf_idx) {\n      uintptr_t raw_nyparr_word = *raw_nyparr_iter++;\n      while (cur_include_halfword) {\n        const uint32_t rqa_idx_lowbits = ctzw(cur_include_halfword);\n\n        // TAOCP, 7.1.3, (43).\n        const uintptr_t bottom_block_remover = (cur_include_halfword | (cur_include_halfword - 1)) + 1;\n\n        const uintptr_t raw_nyparr_curblock_unmasked = raw_nyparr_word >> (rqa_idx_lowbits * 2);\n        const uint32_t rqa_block_len = ctzw(bottom_block_remover) - rqa_idx_lowbits;\n        const uint32_t block_len_limit = kBitsPerWordD2 - word_write_halfshift;\n        cur_output_word |= raw_nyparr_curblock_unmasked << (2 * word_write_halfshift);\n        if (rqa_block_len < block_len_limit) {\n          word_write_halfshift += rqa_block_len;\n          cur_output_word = bzhi(cur_output_word, word_write_halfshift * 2);\n        } else {\n          // no need to mask, extra bits vanish off the high end\n          *output_nyparr_iter++ = cur_output_word;\n          word_write_halfshift = rqa_block_len - block_len_limit;\n          if (word_write_halfshift) {\n            cur_output_word = bzhi(raw_nyparr_curblock_unmasked >> (2 * block_len_limit), 2 * word_write_halfshift);\n          } else {\n            // avoid potential right-shift-[word length]\n            cur_output_word = 0;\n          }\n        }\n        cur_include_halfword &= bottom_block_remover;\n      }\n      if (wordhalf_idx) {\n        break;\n      }\n      cur_include_halfword = cur_include_word >> kBitsPerWordD2;\n    }\n    if (output_nyparr_iter == output_nyparr_last) {\n      if (word_write_halfshift == word_write_halfshift_end) {\n        if (word_write_halfshift_end) {\n          *output_nyparr_last = cur_output_word;\n        }\n        return;\n      }\n    }\n  }\n}\n\nvoid CopyGenomatchSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict genovec, uintptr_t match_word, uint32_t write_bit_idx_start, uint32_t bit_ct, void* __restrict output) {\n  const uint32_t bit_idx_end = write_bit_idx_start + bit_ct;\n  const uint32_t bit_idx_end_lowbits = bit_idx_end % kBitsPerWord;\n  const Halfword* raw_bitarr_alias = DowncastKWToHW(raw_bitarr);\n  unsigned char* output_biter = S_CAST(unsigned char*, output);\n  unsigned char* output_last = &(output_biter[(bit_idx_end / kBitsPerWord) * kBytesPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = write_bit_idx_start;\n  while ((output_biter != output_last) || (write_idx_lowbits != bit_idx_end_lowbits)) {\n    uintptr_t geno_word;\n    // sparse genovec optimization\n    // guaranteed to terminate since there's at least one more set bit\n    do {\n      geno_word = genovec[++read_widx] ^ match_word;\n      geno_word = (~(geno_word | (geno_word >> 1))) & kMask5555;\n    } while (!geno_word);\n    // screw it, just iterate over set bits\n    const uint32_t cur_halfword = raw_bitarr_alias[read_widx];\n    do {\n      const uint32_t sample_idx_lowbits = ctzw(geno_word) / 2;\n      cur_output_word |= S_CAST(uintptr_t, (cur_halfword >> sample_idx_lowbits) & k1LU) << write_idx_lowbits;\n      if (++write_idx_lowbits == kBitsPerWord) {\n        AppendW(cur_output_word, &output_biter);\n        cur_output_word = 0;\n        write_idx_lowbits = 0;\n      }\n      geno_word &= geno_word - 1;\n    } while (geno_word);\n  }\n  if (write_idx_lowbits) {\n    CopyToUnalignedW(output_biter, &cur_output_word);\n  }\n}\n\nvoid ExpandBytearrFromGenoarr(const void* __restrict compact_bitarr, const uintptr_t* __restrict genoarr, uintptr_t match_word, uint32_t genoword_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target) {\n  Halfword* target_alias = DowncastWToHW(target);\n  ZeroHwArr(RoundUpPow2(genoword_ct, 2), target_alias);\n  const unsigned char* compact_bitarr_alias = S_CAST(const unsigned char*, compact_bitarr);\n  const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;\n  const uint32_t compact_widx_last = expand_sizex_m1 / kBitsPerWord;\n  uint32_t compact_idx_lowbits = read_start_bit;\n  uint32_t loop_len = kBitsPerWord;\n  uintptr_t write_hwidx = 0;\n  uintptr_t genomatch_bits = genoarr[0] ^ match_word;\n  genomatch_bits = (~(genomatch_bits | (genomatch_bits >> 1))) & kMask5555;\n  for (uint32_t compact_widx = 0; ; ++compact_widx) {\n    uintptr_t compact_word;\n    if (compact_widx >= compact_widx_last) {\n      if (compact_widx > compact_widx_last) {\n        return;\n      }\n      loop_len = 1 + (expand_sizex_m1 % kBitsPerWord);\n      // avoid possible segfault\n      compact_word = SubwordLoad(&(compact_bitarr_alias[compact_widx * kBytesPerWord]), DivUp(loop_len, CHAR_BIT));\n    } else {\n      CopyFromUnalignedOffsetW(&compact_word, compact_bitarr_alias, compact_widx);\n    }\n    for (; compact_idx_lowbits != loop_len; ++compact_idx_lowbits) {\n      while (!genomatch_bits) {\n        genomatch_bits = genoarr[++write_hwidx] ^ match_word;\n        genomatch_bits = (~(genomatch_bits | (genomatch_bits >> 1))) & kMask5555;\n      }\n      if (compact_word & (k1LU << compact_idx_lowbits)) {\n        const uint32_t lowbit_idx = ctzw(genomatch_bits);\n        target_alias[write_hwidx] |= 1U << (lowbit_idx / 2);\n      }\n      genomatch_bits &= genomatch_bits - 1;\n    }\n    compact_idx_lowbits = 0;\n  }\n}\n#endif\n\n// Harley-Seal algorithm only works for bitarrays, not nyparrays, so don't\n// add an AVX2 specialization here.\n// ...unless something like the interleaved_vec strategy is used?  hmm.  should\n// test this on basic frequency counter.\n/*\nvoid Count2FreqVec3(const VecW* geno_vvec, uint32_t vec_ct, uint32_t* __restrict alt1_plus_bothset_ctp, uint32_t* __restrict bothset_ctp) {\n  assert(!(vec_ct % 3));\n  // Increments bothset_ct by the number of 0b11 in the current block, and\n  // alt1_ct by twice the number of 0b10 plus the number of 0b01.\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW* geno_vvec_iter = geno_vvec;\n  uint32_t alt1_plus_bothset_ct = 0;\n  uint32_t bothset_ct = 0;\n\n  while (1) {\n    UniVec acc_alt1_plus_bothset;\n    UniVec acc_bothset;\n    acc_alt1_plus_bothset.vw = vecw_setzero();\n    acc_bothset.vw = vecw_setzero();\n    const VecW* geno_vvec_stop;\n    if (vec_ct < 30) {\n      if (!vec_ct) {\n        *alt1_plus_bothset_ctp = alt1_plus_bothset_ct;\n        *bothset_ctp = bothset_ct;\n        return;\n      }\n      geno_vvec_stop = &(geno_vvec_iter[vec_ct]);\n      vec_ct = 0;\n    } else {\n      geno_vvec_stop = &(geno_vvec_iter[30]);\n      vec_ct -= 30;\n    }\n    do {\n      VecW cur_geno_vword1 = *geno_vvec_iter++;\n      // process first two vwords simultaneously to minimize linear dependence\n      VecW cur_geno_vword2 = *geno_vvec_iter++;\n      VecW cur_geno_vword_low_lshifted1 = vecw_slli(cur_geno_vword1 & m1, 1);\n      VecW cur_geno_vword_low_lshifted2 = vecw_slli(cur_geno_vword2 & m1, 1);\n\n      // 00 -> 00; 01 -> 01; 10 -> 10; 11 -> 01\n      // note that _mm_andnot_si128 flips the *first* argument before the AND\n      // operation.\n      VecW alt1_plus_bothset1 = vecw_and_notfirst(cur_geno_vword_low_lshifted1, cur_geno_vword1);\n      VecW alt1_plus_bothset2 = vecw_and_notfirst(cur_geno_vword_low_lshifted2, cur_geno_vword2);\n\n      VecW bothset1 = vecw_srli(cur_geno_vword_low_lshifted1 & cur_geno_vword1, 1);\n      VecW bothset2 = vecw_srli(cur_geno_vword_low_lshifted2 & cur_geno_vword2, 1);\n\n      cur_geno_vword1 = *geno_vvec_iter++;\n      alt1_plus_bothset1 = (alt1_plus_bothset1 & m2) + (vecw_srli(alt1_plus_bothset1, 2) & m2);\n      bothset2 = bothset1 + bothset2;\n      alt1_plus_bothset2 = (alt1_plus_bothset2 & m2) + (vecw_srli(alt1_plus_bothset2, 2) & m2);\n      cur_geno_vword_low_lshifted1 = vecw_slli(cur_geno_vword1 & m1, 1);\n\n      alt1_plus_bothset2 = alt1_plus_bothset1 + alt1_plus_bothset2;\n      // alt1_plus_bothset2 now contains 4-bit values from 0-8, while bothset2\n      // contains 2-bit values from 0-2\n      // (todo: check whether this is faster if we use double_bothsetx\n      // variables instead of bothset1/bothset2)\n      bothset1 = vecw_srli(cur_geno_vword_low_lshifted1 & cur_geno_vword1, 1);\n      alt1_plus_bothset1 = vecw_and_notfirst(cur_geno_vword_low_lshifted1, cur_geno_vword1);\n      bothset2 = bothset1 + bothset2;\n      alt1_plus_bothset1 = (alt1_plus_bothset1 & m2) + (vecw_srli(alt1_plus_bothset1, 2) & m2);\n\n      bothset2 = (bothset2 & m2) + (vecw_srli(bothset2, 2) & m2);\n      alt1_plus_bothset2 = alt1_plus_bothset1 + alt1_plus_bothset2;\n      // alt1_plus_bothset2 now contains 4-bit values from 0-12, while bothset2\n      // contains 4-bit values from 0-6.  aggregate both into 8-bit values.\n      bothset2 = (bothset2 & m4) + (vecw_srli(bothset2, 4) & m4);\n      alt1_plus_bothset2 = (alt1_plus_bothset2 & m4) + (vecw_srli(alt1_plus_bothset2, 4) & m4);\n\n      acc_bothset.vw = acc_bothset.vw + bothset2;\n      acc_alt1_plus_bothset.vw = acc_alt1_plus_bothset.vw + alt1_plus_bothset2;\n    } while (geno_vvec_iter < geno_vvec_stop);\n    const VecW m8 = VCONST_W(kMask00FF);\n    acc_bothset.vw = (acc_bothset.vw + vecw_srli(acc_bothset.vw, 8)) & m8;\n    acc_alt1_plus_bothset.vw = (acc_alt1_plus_bothset.vw & m8) + (vecw_srli(acc_alt1_plus_bothset.vw, 8) & m8);\n    bothset_ct += UniVecHsum16(acc_bothset);\n    alt1_plus_bothset_ct += UniVecHsum16(acc_alt1_plus_bothset);\n  }\n}\n*/\n\n// todo: benchmark against general-purpose counter\nvoid Count12Vec6(const VecW* geno_vvec, uint32_t vec_ct, uint32_t* __restrict raw_01_ctp, uint32_t* __restrict raw_both_ctp) {\n  assert(!(vec_ct % 6));\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW* geno_vvec_iter = geno_vvec;\n  VecW acc_01 = vecw_setzero();\n  VecW acc_both = vecw_setzero();\n  uintptr_t cur_incr = 60;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 60) {\n      if (!vec_ct) {\n        *raw_01_ctp = HsumW(acc_01);\n        *raw_both_ctp = HsumW(acc_both);\n        return;\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc_01 = vecw_setzero();\n    VecW inner_acc_both = vecw_setzero();\n    const VecW* geno_vvec_stop = &(geno_vvec_iter[cur_incr]);\n    do {\n      VecW cur_vvec = *geno_vvec_iter++;\n      VecW vvec_rshift = vecw_srli(cur_vvec, 1);\n      VecW nyp_both = m1 & (cur_vvec ^ vvec_rshift);\n      VecW nyp_01 = nyp_both & cur_vvec;\n\n      cur_vvec = *geno_vvec_iter++;\n      vvec_rshift = vecw_srli(cur_vvec, 1);\n      VecW vvec_both = m1 & (cur_vvec ^ vvec_rshift);\n      nyp_01 = nyp_01 + (vvec_both & cur_vvec);\n      nyp_both = nyp_both + vvec_both;\n\n      cur_vvec = *geno_vvec_iter++;\n      vvec_rshift = vecw_srli(cur_vvec, 1);\n      vvec_both = m1 & (cur_vvec ^ vvec_rshift);\n      nyp_01 = nyp_01 + (vvec_both & cur_vvec);\n      nyp_both = nyp_both + vvec_both;\n\n      VecW nybble_01 = (nyp_01 & m2) + (vecw_srli(nyp_01, 2) & m2);\n      VecW nybble_both = (nyp_both & m2) + (vecw_srli(nyp_both, 2) & m2);\n\n      cur_vvec = *geno_vvec_iter++;\n      vvec_rshift = vecw_srli(cur_vvec, 1);\n      nyp_both = m1 & (cur_vvec ^ vvec_rshift);\n      nyp_01 = nyp_both & cur_vvec;\n\n      cur_vvec = *geno_vvec_iter++;\n      vvec_rshift = vecw_srli(cur_vvec, 1);\n      vvec_both = m1 & (cur_vvec ^ vvec_rshift);\n      nyp_01 = nyp_01 + (vvec_both & cur_vvec);\n      nyp_both = nyp_both + vvec_both;\n\n      cur_vvec = *geno_vvec_iter++;\n      vvec_rshift = vecw_srli(cur_vvec, 1);\n      vvec_both = m1 & (cur_vvec ^ vvec_rshift);\n      nyp_01 = nyp_01 + (vvec_both & cur_vvec);\n      nyp_both = nyp_both + vvec_both;\n\n      nybble_01 = nybble_01 + (nyp_01 & m2) + (vecw_srli(nyp_01, 2) & m2);\n      nybble_both = nybble_both + (nyp_both & m2) + (vecw_srli(nyp_both, 2) & m2);\n\n      inner_acc_01 = inner_acc_01 + (nybble_01 & m4) + (vecw_srli(nybble_01, 4) & m4);\n      inner_acc_both = inner_acc_both + (nybble_both & m4) + (vecw_srli(nybble_both, 4) & m4);\n    } while (geno_vvec_iter < geno_vvec_stop);\n    const VecW m0 = vecw_setzero();\n    acc_01 = acc_01 + vecw_bytesum(inner_acc_01, m0);\n    acc_both = acc_both + vecw_bytesum(inner_acc_both, m0);\n  }\n}\n\nvoid GenovecCount12Unsafe(const uintptr_t* genovec, uint32_t sample_ct, uint32_t* __restrict raw_01_ctp, uint32_t* __restrict raw_10_ctp) {\n  // assumes trailing bits of last genovec word are zeroed out.\n  // sample_ct == 0 ok.\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  uint32_t raw_01_ct;\n  uint32_t raw_both_ct;\n  uint32_t word_idx = sample_ctl2 - (sample_ctl2 % (6 * kWordsPerVec));\n  assert(IsVecAligned(genovec));\n  Count12Vec6(R_CAST(const VecW*, genovec), word_idx / kWordsPerVec, &raw_01_ct, &raw_both_ct);\n  for (; word_idx != sample_ctl2; ++word_idx) {\n    const uintptr_t cur_geno_word = genovec[word_idx];\n    const uintptr_t cur_rshift = cur_geno_word >> 1;\n    const uintptr_t cur_both = kMask5555 & (cur_geno_word ^ cur_rshift);\n    raw_01_ct += Popcount01Word(cur_both & cur_geno_word);\n    raw_both_ct += Popcount01Word(cur_both);\n  }\n  *raw_01_ctp = raw_01_ct;\n  *raw_10_ctp = raw_both_ct - raw_01_ct;\n}\n\nvoid Count3FreqVec6(const void* genoarr, uint32_t vec_ct, uint32_t* __restrict even_ctp, uint32_t* __restrict odd_ctp, uint32_t* __restrict bothset_ctp) {\n  assert(!(vec_ct % 6));\n  // Sets even_ct to the number of set low bits in the current block, odd_ct to\n  // the number of set high bits, and bothset_ct by the number of 0b11s.\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const unsigned char* geno_vvec_biter = S_CAST(const unsigned char*, genoarr);\n  VecW acc_even = vecw_setzero();\n  VecW acc_odd = vecw_setzero();\n  VecW acc_bothset = vecw_setzero();\n  uintptr_t cur_incr = 60;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 60) {\n      if (!vec_ct) {\n        *even_ctp = HsumW(acc_even);\n        *odd_ctp = HsumW(acc_odd);\n        *bothset_ctp = HsumW(acc_bothset);\n        return;\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc_even = vecw_setzero();\n    VecW inner_acc_odd = vecw_setzero();\n    VecW inner_acc_bothset = vecw_setzero();\n    const unsigned char* geno_vvec_stop = &(geno_vvec_biter[cur_incr * kBytesPerVec]);\n    do {\n      // hmm, this seems to have more linear dependence than I'd want, but the\n      // reorderings I tried just made the code harder to read without helping,\n      // so I'll leave this alone\n      VecW cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      VecW odd1 = m1 & vecw_srli(cur_geno_vword, 1);\n      VecW even1 = m1 & cur_geno_vword;\n      VecW bothset1 = odd1 & cur_geno_vword;\n\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      VecW cur_geno_vword_high = m1 & vecw_srli(cur_geno_vword, 1);\n      even1 = even1 + (m1 & cur_geno_vword);\n      odd1 = odd1 + cur_geno_vword_high;\n      bothset1 = bothset1 + (cur_geno_vword_high & cur_geno_vword);\n\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      cur_geno_vword_high = m1 & vecw_srli(cur_geno_vword, 1);\n      even1 = even1 + (m1 & cur_geno_vword);\n      odd1 = odd1 + cur_geno_vword_high;\n      bothset1 = bothset1 + (cur_geno_vword_high & cur_geno_vword);\n\n      even1 = (even1 & m2) + (vecw_srli(even1, 2) & m2);\n      odd1 = (odd1 & m2) + (vecw_srli(odd1, 2) & m2);\n      bothset1 = (bothset1 & m2) + (vecw_srli(bothset1, 2) & m2);\n\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      VecW odd2 = m1 & vecw_srli(cur_geno_vword, 1);\n      VecW even2 = m1 & cur_geno_vword;\n      VecW bothset2 = odd2 & cur_geno_vword;\n\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      cur_geno_vword_high = m1 & vecw_srli(cur_geno_vword, 1);\n      even2 = even2 + (m1 & cur_geno_vword);\n      odd2 = odd2 + cur_geno_vword_high;\n      bothset2 = bothset2 + (cur_geno_vword_high & cur_geno_vword);\n\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      cur_geno_vword_high = m1 & vecw_srli(cur_geno_vword, 1);\n      even2 = even2 + (m1 & cur_geno_vword);\n      odd2 = odd2 + cur_geno_vword_high;\n      bothset2 = bothset2 + (cur_geno_vword_high & cur_geno_vword);\n\n      even1 = even1 + (even2 & m2) + (vecw_srli(even2, 2) & m2);\n      odd1 = odd1 + (odd2 & m2) + (vecw_srli(odd2, 2) & m2);\n      bothset1 = bothset1 + (bothset2 & m2) + (vecw_srli(bothset2, 2) & m2);\n      // these now contain 4-bit values from 0-12\n\n      inner_acc_even = inner_acc_even + (even1 & m4) + (vecw_srli(even1, 4) & m4);\n      inner_acc_odd = inner_acc_odd + (odd1 & m4) + (vecw_srli(odd1, 4) & m4);\n      inner_acc_bothset = inner_acc_bothset + (bothset1 & m4) + (vecw_srli(bothset1, 4) & m4);\n    } while (geno_vvec_biter < geno_vvec_stop);\n    const VecW m0 = vecw_setzero();\n    acc_even = acc_even + vecw_bytesum(inner_acc_even, m0);\n    acc_odd = acc_odd + vecw_bytesum(inner_acc_odd, m0);\n    acc_bothset = acc_bothset + vecw_bytesum(inner_acc_bothset, m0);\n  }\n}\n\nvoid FillInterleavedMaskVec(const uintptr_t* __restrict subset_mask, uint32_t base_vec_ct, uintptr_t* interleaved_mask_vec) {\n#ifdef USE_SSE2\n  // This is a cousin of github.com/KWillets/simd_interleave , which was\n  // written in response to\n  //   https://lemire.me/blog/2018/01/09/how-fast-can-you-bit-interleave-32-bit-integers-simd-edition/\n  // AVX2 implementation takes ~40% less time than before, and SSE4.2 takes\n  // ~65% less.  This also avoids the Ryzen-screwing _pdep_u64()/_pext_u64()\n  // operations.\n  const VecW m4 = VCONST_W(kMask0F0F);\n#  ifdef USE_SHUFFLE8\n  const VecW lookup0 = vecw_setr8(\n      0, 1, 4, 5, 16, 17, 20, 21,\n      64, 65, 68, 69, 80, 81, 84, 85);\n  const VecW lookup1 = vecw_slli(lookup0, 1);\n#  else\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW* subset_mask_valias = R_CAST(const VecW*, subset_mask);\n  VecW* interleaved_mask_valias = R_CAST(VecW*, interleaved_mask_vec);\n\n  for (uint32_t vidx = 0; vidx != base_vec_ct; ++vidx) {\n    // I'll assume the compiler can handle this register allocation job.\n    VecW vec_lo;\n    VecW vec_hi;\n    vecw_lo_and_hi_nybbles(subset_mask_valias[vidx], m4, &vec_lo, &vec_hi);\n#  ifdef USE_SHUFFLE8\n    vec_lo = vecw_shuffle8(lookup0, vec_lo);\n    vec_hi = vecw_shuffle8(lookup1, vec_hi);\n#  else\n    vec_lo = (vec_lo | vecw_slli(vec_lo, 2)) & m2;\n    vec_hi = (vec_hi | vecw_slli(vec_hi, 2)) & m2;\n    vec_lo = (vec_lo | vecw_slli(vec_lo, 1)) & m1;\n    vec_hi = (vec_hi | vecw_slli(vec_hi, 1)) & m1;\n    vec_hi = vecw_slli(vec_hi, 1);\n#  endif\n    interleaved_mask_valias[vidx] = vec_lo | vec_hi;\n  }\n#else  // !USE_SSE2\n  for (uint32_t widx = 0; widx != base_vec_ct; ++widx) {\n    const uintptr_t orig_word = subset_mask[widx];\n    uintptr_t ww_even = S_CAST(Halfword, orig_word);\n    uintptr_t ww_odd = orig_word >> kBitsPerWordD2;\n    ww_even = UnpackHalfwordToWord(ww_even);\n    ww_odd = UnpackHalfwordToWord(ww_odd);\n    interleaved_mask_vec[widx] = ww_even | (ww_odd << 1);\n  }\n#endif\n}\n\n/*\nvoid GenovecAlleleCtsUnsafe(const uintptr_t* genovec, uint32_t sample_ct, uint32_t* __restrict allele_cts, uint32_t* __restrict bothset_ctp) {\n  // assumes trailing bits of last genovec word are zeroed out.\n  // sets allele_cts[0] to the number of observed ref alleles, and\n  // allele_cts[1] to the number of observed alt1s.\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  uint32_t word_idx = sample_ctl2 - (sample_ctl2 % (3 * kWordsPerVec));\n  uint32_t alt1_plus_bothset_ct;\n  uint32_t bothset_ct;\n  assert(IsVecAligned(genovec));\n  Count2FreqVec3(R_CAST(const VecW*, genovec), word_idx / kWordsPerVec, &alt1_plus_bothset_ct, &bothset_ct);\n  for (; word_idx != sample_ctl2; ++word_idx) {\n    const uintptr_t cur_geno_word = genovec[word_idx];\n    const uintptr_t cur_geno_word_low_lshifted = (cur_geno_word & kMask5555) << 1;\n    alt1_plus_bothset_ct += NypsumWord((~cur_geno_word_low_lshifted) & cur_geno_word);\n    bothset_ct += NypsumWord(cur_geno_word_low_lshifted & cur_geno_word);\n  }\n  const uint32_t alt1_ct = alt1_plus_bothset_ct - bothset_ct;\n  allele_cts[0] = (sample_ct - bothset_ct) * 2 - alt1_ct;\n  allele_cts[1] = alt1_ct;\n  *bothset_ctp = bothset_ct;\n}\n*/\n\nvoid GenoarrCountFreqsUnsafe(const uintptr_t* genoarr, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // fills genocounts[0] with the number of 00s, genocounts[1] with the number\n  // of 01s, etc.\n  // assumes trailing bits of last genoarr word are zeroed out.\n  // sample_ct == 0 ok.\n  // no longer requires vector-alignment.\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  uint32_t even_ct;\n  uint32_t odd_ct;\n  uint32_t bothset_ct;\n  uint32_t word_idx = sample_ctl2 - (sample_ctl2 % (6 * kWordsPerVec));\n  Count3FreqVec6(genoarr, word_idx / kWordsPerVec, &even_ct, &odd_ct, &bothset_ct);\n  for (; word_idx != sample_ctl2; ++word_idx) {\n    const uintptr_t cur_geno_word = genoarr[word_idx];\n    const uintptr_t cur_geno_word_high = kMask5555 & (cur_geno_word >> 1);\n    even_ct += Popcount01Word(cur_geno_word & kMask5555);\n    odd_ct += Popcount01Word(cur_geno_word_high);\n    bothset_ct += Popcount01Word(cur_geno_word & cur_geno_word_high);\n  }\n  genocounts[0] = sample_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nuintptr_t MostCommonGenoUnsafe(const uintptr_t* genoarr, uint32_t sample_ct) {\n  STD_ARRAY_DECL(uint32_t, 4, genocounts);\n  GenoarrCountFreqsUnsafe(genoarr, sample_ct, genocounts);\n  uint32_t most_common_geno_ct = genocounts[0];\n  if (most_common_geno_ct * 2 >= sample_ct) {\n    return 0;\n  }\n  uintptr_t most_common_geno = 0;\n  for (uintptr_t cur_geno = 1; cur_geno != 4; ++cur_geno) {\n    if (most_common_geno_ct < genocounts[cur_geno]) {\n      most_common_geno = cur_geno;\n      most_common_geno_ct = genocounts[cur_geno];\n    }\n  }\n  return most_common_geno;\n}\n\n// genoarr now allowed to be unaligned.\nvoid CountSubset3FreqVec6(const void* __restrict genoarr, const VecW* __restrict interleaved_mask_vvec, uint32_t vec_ct, uint32_t* __restrict even_ctp, uint32_t* __restrict odd_ctp, uint32_t* __restrict bothset_ctp) {\n  assert(!(vec_ct % 6));\n  // Sets even_ct to the number of set low bits in the current block after\n  // subsetting, odd_ct to the number of set high bits, and bothset_ct by the\n  // number of 0b11s.\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const unsigned char* geno_vvec_biter = S_CAST(const unsigned char*, genoarr);\n  const VecW* interleaved_mask_vvec_iter = interleaved_mask_vvec;\n  VecW acc_even = vecw_setzero();\n  VecW acc_odd = vecw_setzero();\n  VecW acc_bothset = vecw_setzero();\n  uintptr_t cur_incr = 60;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 60) {\n      if (!vec_ct) {\n        *even_ctp = HsumW(acc_even);\n        *odd_ctp = HsumW(acc_odd);\n        *bothset_ctp = HsumW(acc_bothset);\n        return;\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc_even = vecw_setzero();\n    VecW inner_acc_odd = vecw_setzero();\n    VecW inner_acc_bothset = vecw_setzero();\n    const unsigned char* geno_vvec_stop = &(geno_vvec_biter[cur_incr * kBytesPerVec]);\n    do {\n      VecW interleaved_mask_vword = *interleaved_mask_vvec_iter++;\n      VecW cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      VecW cur_mask = interleaved_mask_vword & m1;\n      VecW odd1 = cur_mask & vecw_srli(cur_geno_vword, 1);\n      VecW even1 = cur_mask & cur_geno_vword;\n      VecW bothset1 = odd1 & cur_geno_vword;\n\n      cur_mask = vecw_srli(interleaved_mask_vword, 1) & m1;\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      VecW cur_geno_vword_high_masked = cur_mask & vecw_srli(cur_geno_vword, 1);\n      even1 = even1 + (cur_mask & cur_geno_vword);\n      odd1 = odd1 + cur_geno_vword_high_masked;\n      bothset1 = bothset1 + (cur_geno_vword_high_masked & cur_geno_vword);\n\n      interleaved_mask_vword = *interleaved_mask_vvec_iter++;\n      cur_mask = interleaved_mask_vword & m1;\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      cur_geno_vword_high_masked = cur_mask & vecw_srli(cur_geno_vword, 1);\n      even1 = even1 + (cur_mask & cur_geno_vword);\n      odd1 = odd1 + cur_geno_vword_high_masked;\n      bothset1 = bothset1 + (cur_geno_vword_high_masked & cur_geno_vword);\n\n      even1 = (even1 & m2) + (vecw_srli(even1, 2) & m2);\n      odd1 = (odd1 & m2) + (vecw_srli(odd1, 2) & m2);\n      bothset1 = (bothset1 & m2) + (vecw_srli(bothset1, 2) & m2);\n\n      cur_mask = vecw_srli(interleaved_mask_vword, 1) & m1;\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      VecW odd2 = cur_mask & vecw_srli(cur_geno_vword, 1);\n      VecW even2 = cur_mask & cur_geno_vword;\n      VecW bothset2 = odd2 & cur_geno_vword;\n\n      interleaved_mask_vword = *interleaved_mask_vvec_iter++;\n      cur_mask = interleaved_mask_vword & m1;\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      cur_geno_vword_high_masked = cur_mask & vecw_srli(cur_geno_vword, 1);\n      even2 = even2 + (cur_mask & cur_geno_vword);\n      odd2 = odd2 + cur_geno_vword_high_masked;\n      bothset2 = bothset2 + (cur_geno_vword_high_masked & cur_geno_vword);\n\n      cur_mask = vecw_srli(interleaved_mask_vword, 1) & m1;\n      cur_geno_vword = vecw_loadu(geno_vvec_biter);\n      geno_vvec_biter += kBytesPerVec;\n      cur_geno_vword_high_masked = cur_mask & vecw_srli(cur_geno_vword, 1);\n      even2 = even2 + (cur_mask & cur_geno_vword);\n      odd2 = odd2 + cur_geno_vword_high_masked;\n      bothset2 = bothset2 + (cur_geno_vword_high_masked & cur_geno_vword);\n\n      even1 = even1 + (even2 & m2) + (vecw_srli(even2, 2) & m2);\n      odd1 = odd1 + (odd2 & m2) + (vecw_srli(odd2, 2) & m2);\n      bothset1 = bothset1 + (bothset2 & m2) + (vecw_srli(bothset2, 2) & m2);\n      // these now contain 4-bit values from 0-12\n\n      inner_acc_even = inner_acc_even + (even1 & m4) + (vecw_srli(even1, 4) & m4);\n      inner_acc_odd = inner_acc_odd + (odd1 & m4) + (vecw_srli(odd1, 4) & m4);\n      inner_acc_bothset = inner_acc_bothset + (bothset1 & m4) + (vecw_srli(bothset1, 4) & m4);\n    } while (geno_vvec_biter < geno_vvec_stop);\n    const VecW m0 = vecw_setzero();\n    acc_even = acc_even + vecw_bytesum(inner_acc_even, m0);\n    acc_odd = acc_odd + vecw_bytesum(inner_acc_odd, m0);\n    acc_bothset = acc_bothset + vecw_bytesum(inner_acc_bothset, m0);\n  }\n}\n\nvoid GenoarrCountSubsetFreqs(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict sample_include_interleaved_vec, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // fills genocounts[0] with the number of 00s, genocounts[1] with the number\n  // of 01s, etc.\n  // {raw_}sample_ct == 0 ok.\n  const uint32_t raw_sample_ctv2 = NypCtToVecCt(raw_sample_ct);\n  uint32_t even_ct;\n  uint32_t odd_ct;\n  uint32_t bothset_ct;\n#ifdef USE_SSE2\n  uint32_t vec_idx = raw_sample_ctv2 - (raw_sample_ctv2 % 6);\n  CountSubset3FreqVec6(genoarr, R_CAST(const VecW*, sample_include_interleaved_vec), vec_idx, &even_ct, &odd_ct, &bothset_ct);\n  const uintptr_t* genoarr_iter = &(genoarr[kWordsPerVec * vec_idx]);\n  const uintptr_t* interleaved_mask_iter = &(sample_include_interleaved_vec[(kWordsPerVec / 2) * vec_idx]);\n#  ifdef USE_AVX2\n  uintptr_t mask_base1 = 0;\n  uintptr_t mask_base2 = 0;\n  uintptr_t mask_base3 = 0;\n  uintptr_t mask_base4 = 0;\n  for (; vec_idx != raw_sample_ctv2; ++vec_idx) {\n    uintptr_t mask_word1;\n    uintptr_t mask_word2;\n    uintptr_t mask_word3;\n    uintptr_t mask_word4;\n    if (!(vec_idx % 2)) {\n      mask_base1 = *interleaved_mask_iter++;\n      mask_base2 = *interleaved_mask_iter++;\n      mask_base3 = *interleaved_mask_iter++;\n      mask_base4 = *interleaved_mask_iter++;\n      mask_word1 = mask_base1 & kMask5555;\n      mask_word2 = mask_base2 & kMask5555;\n      mask_word3 = mask_base3 & kMask5555;\n      mask_word4 = mask_base4 & kMask5555;\n    } else {\n      mask_word1 = (mask_base1 >> 1) & kMask5555;\n      mask_word2 = (mask_base2 >> 1) & kMask5555;\n      mask_word3 = (mask_base3 >> 1) & kMask5555;\n      mask_word4 = (mask_base4 >> 1) & kMask5555;\n    }\n    for (uint32_t vechalf_idx = 0; ; ++vechalf_idx) {\n      const uintptr_t cur_geno_word1 = *genoarr_iter++;\n      const uintptr_t cur_geno_word2 = *genoarr_iter++;\n      const uintptr_t cur_geno_word1_high_masked = mask_word1 & (cur_geno_word1 >> 1);\n      const uintptr_t cur_geno_word2_high_masked = mask_word2 & (cur_geno_word2 >> 1);\n      even_ct += PopcountWord(((cur_geno_word1 & mask_word1) << 1) | (cur_geno_word2 & mask_word2));\n      odd_ct += PopcountWord((cur_geno_word1_high_masked << 1) | cur_geno_word2_high_masked);\n      bothset_ct += PopcountWord(((cur_geno_word1 & cur_geno_word1_high_masked) << 1) | (cur_geno_word2 & cur_geno_word2_high_masked));\n      if (vechalf_idx) {\n        break;\n      }\n      mask_word1 = mask_word3;\n      mask_word2 = mask_word4;\n    }\n  }\n#  else  // not USE_AVX2\n  uintptr_t mask_base1 = 0;\n  uintptr_t mask_base2 = 0;\n  for (; vec_idx != raw_sample_ctv2; ++vec_idx) {\n    uintptr_t mask_word1;\n    uintptr_t mask_word2;\n    if (!(vec_idx % 2)) {\n      mask_base1 = *interleaved_mask_iter++;\n      mask_base2 = *interleaved_mask_iter++;\n      mask_word1 = mask_base1 & kMask5555;\n      mask_word2 = mask_base2 & kMask5555;\n    } else {\n      mask_word1 = (mask_base1 >> 1) & kMask5555;\n      mask_word2 = (mask_base2 >> 1) & kMask5555;\n    }\n    const uintptr_t cur_geno_word1 = *genoarr_iter++;\n    const uintptr_t cur_geno_word2 = *genoarr_iter++;\n    const uintptr_t cur_geno_word1_high_masked = mask_word1 & (cur_geno_word1 >> 1);\n    const uintptr_t cur_geno_word2_high_masked = mask_word2 & (cur_geno_word2 >> 1);\n#    ifdef USE_SSE42\n    even_ct += PopcountWord(((cur_geno_word1 & mask_word1) << 1) | (cur_geno_word2 & mask_word2));\n    odd_ct += PopcountWord((cur_geno_word1_high_masked << 1) | cur_geno_word2_high_masked);\n    bothset_ct += PopcountWord(((cur_geno_word1 & cur_geno_word1_high_masked) << 1) | (cur_geno_word2 & cur_geno_word2_high_masked));\n#    else\n    even_ct += NypsumWord((cur_geno_word1 & mask_word1) + (cur_geno_word2 & mask_word2));\n    odd_ct += NypsumWord(cur_geno_word1_high_masked + cur_geno_word2_high_masked);\n    bothset_ct += NypsumWord((cur_geno_word1 & cur_geno_word1_high_masked) + (cur_geno_word2 & cur_geno_word2_high_masked));\n#    endif\n  }\n#  endif  // not USE_AVX2\n#else  // not USE_SSE2\n  uint32_t word_idx = raw_sample_ctv2 - (raw_sample_ctv2 % 6);\n  CountSubset3FreqVec6(genoarr, R_CAST(const VecW*, sample_include_interleaved_vec), word_idx, &even_ct, &odd_ct, &bothset_ct);\n  const uintptr_t* interleaved_mask_iter = &(sample_include_interleaved_vec[word_idx / 2]);\n  uintptr_t mask_base = 0;\n  for (; word_idx != raw_sample_ctv2; ++word_idx) {\n    uintptr_t mask_word;\n    if (!(word_idx % 2)) {\n      mask_base = *interleaved_mask_iter++;\n      mask_word = mask_base & kMask5555;\n    } else {\n      mask_word = (mask_base >> 1) & kMask5555;\n    }\n    const uintptr_t cur_geno_word = genoarr[word_idx];\n    const uintptr_t cur_geno_word_high_masked = mask_word & (cur_geno_word >> 1);\n    even_ct += Popcount01Word(cur_geno_word & mask_word);\n    odd_ct += Popcount01Word(cur_geno_word_high_masked);\n    bothset_ct += Popcount01Word(cur_geno_word & cur_geno_word_high_masked);\n  }\n#endif\n  genocounts[0] = sample_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nvoid GenoarrCountSubsetFreqs2(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict sample_include, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // slower GenoarrCountSubsetFreqs() which does not require\n  // sample_include_interleaved_vec to be precomputed.\n  // {raw_}sample_ct == 0 ok.\n  const uint32_t raw_sample_ctl2 = NypCtToWordCt(raw_sample_ct);\n  const uint32_t fullword_ct = raw_sample_ctl2 / 2;\n  uint32_t even_ct = 0;\n  uint32_t odd_ct = 0;\n  uint32_t bothset_ct = 0;\n  for (uint32_t widx = 0; widx != fullword_ct; ++widx) {\n    // possible todo: try vectorizing this loop in SSE4.2+ high-sample-ct case\n    // with shuffle-based dynamic unpacking of sample_include?\n    const uintptr_t mask_word = sample_include[widx];\n    if (mask_word) {\n      uintptr_t geno_word = genoarr[2 * widx];\n      uintptr_t geno_even = PackWordToHalfwordMask5555(geno_word);\n      uintptr_t geno_odd = PackWordToHalfwordMaskAAAA(geno_word);\n      geno_word = genoarr[2 * widx + 1];\n      geno_even |= S_CAST(uintptr_t, PackWordToHalfwordMask5555(geno_word)) << kBitsPerWordD2;\n      geno_odd |= S_CAST(uintptr_t, PackWordToHalfwordMaskAAAA(geno_word)) << kBitsPerWordD2;\n      const uintptr_t geno_even_masked = geno_even & mask_word;\n      even_ct += PopcountWord(geno_even_masked);\n      odd_ct += PopcountWord(geno_odd & mask_word);\n      bothset_ct += PopcountWord(geno_odd & geno_even_masked);\n    }\n  }\n  if (raw_sample_ctl2 % 2) {\n    const uintptr_t mask_hw = sample_include[fullword_ct];\n    if (mask_hw) {\n      const uintptr_t geno_word = genoarr[2 * fullword_ct];\n      // todo: benchmark main loop unpack vs. pack\n      const uintptr_t mask_word = UnpackHalfwordToWord(mask_hw);\n      const uintptr_t geno_word_shifted = geno_word >> 1;\n      const uintptr_t geno_word_masked = geno_word & mask_word;\n      even_ct += Popcount01Word(geno_word_masked);\n      odd_ct += Popcount01Word(geno_word_shifted & mask_word);\n      bothset_ct += Popcount01Word(geno_word_masked & geno_word_shifted);\n    }\n  }\n  genocounts[0] = sample_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nvoid GenoarrCountInvsubsetFreqs2(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict sample_exclude, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // {raw_}sample_ct == 0 ok.\n  // ugh, 'fullword' is overloaded.  probable todo: keep halfword/fullword,\n  // switch more common use case to bodyword/trailword.\n  const uint32_t bodyword_ct = raw_sample_ct / kBitsPerWord;\n  uint32_t even_ct = 0;\n  uint32_t odd_ct = 0;\n  uint32_t bothset_ct = 0;\n  for (uint32_t widx = 0; widx != bodyword_ct; ++widx) {\n    // possible todo: try vectorizing this loop in SSE4.2+ high-sample-ct case\n    // with shuffle-based dynamic unpacking of sample_exclude?\n    const uintptr_t mask_word = ~sample_exclude[widx];\n    if (mask_word) {\n      uintptr_t geno_word = genoarr[2 * widx];\n      uintptr_t geno_even = PackWordToHalfwordMask5555(geno_word);\n      uintptr_t geno_odd = PackWordToHalfwordMaskAAAA(geno_word);\n      geno_word = genoarr[2 * widx + 1];\n      geno_even |= S_CAST(uintptr_t, PackWordToHalfwordMask5555(geno_word)) << kBitsPerWordD2;\n      geno_odd |= S_CAST(uintptr_t, PackWordToHalfwordMaskAAAA(geno_word)) << kBitsPerWordD2;\n      const uintptr_t geno_even_masked = geno_even & mask_word;\n      even_ct += PopcountWord(geno_even_masked);\n      odd_ct += PopcountWord(geno_odd & mask_word);\n      bothset_ct += PopcountWord(geno_odd & geno_even_masked);\n    }\n  }\n  const uint32_t remainder = raw_sample_ct % kBitsPerWord;\n  if (remainder) {\n    const uintptr_t mask_word = bzhi(~sample_exclude[bodyword_ct], remainder);\n    if (mask_word) {\n      uintptr_t geno_word = genoarr[2 * bodyword_ct];\n      uintptr_t geno_even = PackWordToHalfwordMask5555(geno_word);\n      uintptr_t geno_odd = PackWordToHalfwordMaskAAAA(geno_word);\n      if (remainder > kBitsPerWordD2) {\n        geno_word = genoarr[2 * bodyword_ct + 1];\n        geno_even |= S_CAST(uintptr_t, PackWordToHalfwordMask5555(geno_word)) << kBitsPerWordD2;\n        geno_odd |= S_CAST(uintptr_t, PackWordToHalfwordMaskAAAA(geno_word)) << kBitsPerWordD2;\n      }\n      const uintptr_t geno_even_masked = geno_even & mask_word;\n      even_ct += PopcountWord(geno_even_masked);\n      odd_ct += PopcountWord(geno_odd & mask_word);\n      bothset_ct += PopcountWord(geno_odd & geno_even_masked);\n    }\n  }\n  genocounts[0] = sample_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nvoid GenoarrCountSubsetIntersectFreqs(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict subset1, const uintptr_t* __restrict subset2, uint32_t raw_sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // {raw_}sample_ct == 0 ok.\n  const uint32_t raw_sample_ctl2 = NypCtToWordCt(raw_sample_ct);\n  const uint32_t fullword_ct = raw_sample_ctl2 / 2;\n  uint32_t subset_intersect_ct = 0;\n  uint32_t even_ct = 0;\n  uint32_t odd_ct = 0;\n  uint32_t bothset_ct = 0;\n  for (uint32_t widx = 0; widx != fullword_ct; ++widx) {\n    // hmm, there may be little point to vectorizing this\n    const uintptr_t mask_word = subset1[widx] & subset2[widx];\n    if (mask_word) {\n      uintptr_t geno_word = genoarr[2 * widx];\n      uintptr_t geno_even = PackWordToHalfwordMask5555(geno_word);\n      uintptr_t geno_odd = PackWordToHalfwordMaskAAAA(geno_word);\n      geno_word = genoarr[2 * widx + 1];\n      geno_even |= S_CAST(uintptr_t, PackWordToHalfwordMask5555(geno_word)) << kBitsPerWordD2;\n      geno_odd |= S_CAST(uintptr_t, PackWordToHalfwordMaskAAAA(geno_word)) << kBitsPerWordD2;\n      const uintptr_t geno_even_masked = geno_even & mask_word;\n      subset_intersect_ct += PopcountWord(mask_word);\n      even_ct += PopcountWord(geno_even_masked);\n      odd_ct += PopcountWord(geno_odd & mask_word);\n      bothset_ct += PopcountWord(geno_odd & geno_even_masked);\n    }\n  }\n  if (raw_sample_ctl2 % 2) {\n    const uintptr_t mask_hw = subset1[fullword_ct] & subset2[fullword_ct];\n    if (mask_hw) {\n      const uintptr_t geno_word = genoarr[fullword_ct * 2];\n      const uintptr_t mask_word = UnpackHalfwordToWord(mask_hw);\n      const uintptr_t geno_word_shifted = geno_word >> 1;\n      const uintptr_t geno_word_masked = geno_word & mask_word;\n      subset_intersect_ct += Popcount01Word(mask_word);\n      even_ct += Popcount01Word(geno_word_masked);\n      odd_ct += Popcount01Word(geno_word_shifted & mask_word);\n      bothset_ct += Popcount01Word(geno_word_masked & geno_word_shifted);\n    }\n  }\n  genocounts[0] = subset_intersect_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nvoid GenovecInvertUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // flips 0 to 2 and vice versa.\n  // \"unsafe\" because trailing bits are not zeroed out.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  assert(IsVecAligned(genovec));\n  const VecW not_m1 = VCONST_W(kMaskAAAA);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    VecW cur_vec = vptr[vidx];\n    // flip high bit iff low bit is unset\n    vptr[vidx] = cur_vec ^ vecw_and_notfirst(vecw_slli(cur_vec, 1), not_m1);\n  }\n}\n\nvoid DifflistCountSubsetFreqs(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict raregeno, const uint32_t* __restrict difflist_sample_ids, uint32_t common_geno, uint32_t difflist_len, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  STD_ARRAY_REF_FILL0(4, genocounts);\n  for (uint32_t difflist_idx = 0; difflist_idx != difflist_len; ++difflist_idx) {\n    const uint32_t raw_sample_idx = difflist_sample_ids[difflist_idx];\n    if (IsSet(sample_include, raw_sample_idx)) {\n      genocounts[GetNyparrEntry(raregeno, difflist_idx)] += 1;\n    }\n  }\n  genocounts[common_geno] = sample_ct - genocounts[0] - genocounts[1] - genocounts[2] - genocounts[3];\n}\n\n\n#ifdef USE_SSE2\nstatic_assert(kPglNypTransposeBatch == S_CAST(uint32_t, kNypsPerCacheline), \"TransposeNypblock64() needs to be updated.\");\n#  ifdef CACHELINE64\nvoid TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, unsigned char* __restrict buf0, unsigned char* __restrict buf1) {\n  // buf0 and buf1 must each be vector-aligned and have size 16k\n  // Tried using previous AVX2 small-buffer approach, but that was a bit\n  // worse... though maybe it should be revisited?\n\n  // Each input row has 256 nyps, across 8 words.\n  // First word of each row goes into first buf0 row, etc.\n  const uint32_t buf0_row_ct = DivUp(write_batch_size, 32);\n  {\n    // Fold the first 3 shuffles into the ingestion loop.\n    // Can fold 2 additional shuffles here by ingesting uint16s instead.  That\n    // removes the need for the second loop, halving the workspace requirement.\n    // Benchmark results of that approach are slightly but consistently worse,\n    // though.\n    const uintptr_t* initial_read_iter = read_iter;\n    const uintptr_t* initial_read_end = &(initial_read_iter[buf0_row_ct]);\n    uintptr_t* initial_target_iter = R_CAST(uintptr_t*, buf0);\n    const uint32_t read_batch_rem = kNypsPerCacheline - read_batch_size;\n    // Tried fancier vector-based ingestion, didn't help.\n    for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n      const uintptr_t* read_iter_tmp = initial_read_iter;\n      for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n        *initial_target_iter++ = *read_iter_tmp;\n        read_iter_tmp = &(read_iter_tmp[read_ul_stride]);\n      }\n      ZeroWArr(read_batch_rem, initial_target_iter);\n      initial_target_iter = &(initial_target_iter[read_batch_rem]);\n    }\n  }\n\n  // shuffle from 64 -> 16\n  const uint32_t buf1_row_ct = DivUp(write_batch_size, 8);\n  {\n    // full buf0 row is 256 * 8 bytes\n    // full buf1 row is 512 bytes\n    const VecW* buf0_read_iter = R_CAST(const VecW*, buf0);\n    __m128i* write_iter0 = R_CAST(__m128i*, buf1);\n    const uint32_t buf0_row_clwidth = DivUp(read_batch_size, 8);\n#    ifdef USE_SHUFFLE8\n    const VecW gather_u32s = vecw_setr8(0, 1, 8, 9, 2, 3, 10, 11,\n                                        4, 5, 12, 13, 6, 7, 14, 15);\n#    else\n    const VecW m16 = VCONST_W(kMask0000FFFF);\n#    endif\n    for (uint32_t bidx = 0; bidx != buf0_row_ct; ++bidx) {\n      __m128i* write_iter1 = &(write_iter0[32]);\n      __m128i* write_iter2 = &(write_iter1[32]);\n      __m128i* write_iter3 = &(write_iter2[32]);\n      for (uint32_t clidx = 0; clidx != buf0_row_clwidth; ++clidx) {\n#    ifdef USE_AVX2\n        VecW loader0 = buf0_read_iter[clidx * 2];\n        VecW loader1 = buf0_read_iter[clidx * 2 + 1];\n        loader0 = vecw_shuffle8(loader0, gather_u32s);\n        loader1 = vecw_shuffle8(loader1, gather_u32s);\n        //    (0,0) (0,1) (1,0) ... (7,1) (0,2) ... (7,3) (8,0) ... (31,3)\n        //      (0,4) ... (31,7)\n        // -> (0,0) ... (7,1) (0,2) ... (7,3) (8,0) ... (15,3) (0,4) ... (15,7)\n        //      (16,0) ... (31,3) (16,4) ...\n        loader0 = vecw_permute0xd8_if_avx2(loader0);\n        loader1 = vecw_permute0xd8_if_avx2(loader1);\n        // -> (0,0) ... (7,1) (0,2) ... (7,3) (0,4) ... (7,7) (8,0) ... (15,7)\n        //      (16,0) ... (23,7) (24,0) ... (31,7)\n        const __m256i vec_lo = _mm256_shuffle_epi32(WToVec(loader0), 0xd8);\n        const __m256i vec_hi = _mm256_shuffle_epi32(WToVec(loader1), 0xd8);\n        const __m256i final0145 = _mm256_unpacklo_epi64(vec_lo, vec_hi);\n        const __m256i final2367 = _mm256_unpackhi_epi64(vec_lo, vec_hi);\n        // GCC doesn't support _mm256_storeu_si128i as of this writing.\n        write_iter0[clidx] = _mm256_castsi256_si128(final0145);\n        write_iter1[clidx] = _mm256_castsi256_si128(final2367);\n        write_iter2[clidx] = _mm256_extracti128_si256(final0145, 1);\n        write_iter3[clidx] = _mm256_extracti128_si256(final2367, 1);\n#    else\n        VecW loader0 = buf0_read_iter[clidx * 4];\n        VecW loader1 = buf0_read_iter[clidx * 4 + 1];\n        VecW loader2 = buf0_read_iter[clidx * 4 + 2];\n        VecW loader3 = buf0_read_iter[clidx * 4 + 3];\n#      ifdef USE_SHUFFLE8\n        loader0 = vecw_shuffle8(loader0, gather_u32s);\n        loader1 = vecw_shuffle8(loader1, gather_u32s);\n        loader2 = vecw_shuffle8(loader2, gather_u32s);\n        loader3 = vecw_shuffle8(loader3, gather_u32s);\n#      else\n        VecW tmp_lo = vecw_unpacklo16(loader0, loader1);\n        VecW tmp_hi = vecw_unpackhi16(loader0, loader1);\n        loader0 = vecw_blendv(vecw_slli(tmp_hi, 16), tmp_lo, m16);\n        loader1 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 16), m16);\n        tmp_lo = vecw_unpacklo16(loader2, loader3);\n        tmp_hi = vecw_unpackhi16(loader2, loader3);\n        loader2 = vecw_blendv(vecw_slli(tmp_hi, 16), tmp_lo, m16);\n        loader3 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 16), m16);\n#      endif\n        //    (0,0) (0,1) (1,0) ... (7,1) (0,2) ... (7,3) (8,0) ... (31,3)\n        //  + (0,4) ... (31,7)\n        // -> (0,0) ... (7,3) (0,4) ... (7,7) (8,0) ... (15,7)\n        const VecW lo_0_15 = vecw_unpacklo32(loader0, loader1);\n        const VecW lo_16_31 = vecw_unpackhi32(loader0, loader1);\n        const VecW hi_0_15 = vecw_unpacklo32(loader2, loader3);\n        const VecW hi_16_31 = vecw_unpackhi32(loader2, loader3);\n        write_iter0[clidx] = WToVec(vecw_unpacklo64(lo_0_15, hi_0_15));\n        write_iter1[clidx] = WToVec(vecw_unpackhi64(lo_0_15, hi_0_15));\n        write_iter2[clidx] = WToVec(vecw_unpacklo64(lo_16_31, hi_16_31));\n        write_iter3[clidx] = WToVec(vecw_unpackhi64(lo_16_31, hi_16_31));\n#    endif\n      }\n      buf0_read_iter = &(buf0_read_iter[2048 / kBytesPerVec]);\n      write_iter0 = &(write_iter3[32]);\n    }\n  }\n\n  // movemask from 16 -> 2\n  const VecW* source_iter = R_CAST(VecW*, buf1);\n  const VecW m8 = VCONST_W(kMask00FF);\n\n  // Take advantage of current function contract.\n  const uint32_t buf1_fullrow_ct = (write_batch_size + 3) / 8;\n\n  const uint32_t write_v8ui_stride = kVec8thUintPerWord * write_ul_stride;\n  const uint32_t vec_ct = DivUp(read_batch_size, (kBytesPerVec / 2));\n  Vec8thUint* target_iter0 = DowncastWToV8(write_iter);\n  for (uint32_t uii = 0; uii != buf1_fullrow_ct; ++uii) {\n    Vec8thUint* target_iter1 = &(target_iter0[write_v8ui_stride]);\n    Vec8thUint* target_iter2 = &(target_iter1[write_v8ui_stride]);\n    Vec8thUint* target_iter3 = &(target_iter2[write_v8ui_stride]);\n    Vec8thUint* target_iter4 = &(target_iter3[write_v8ui_stride]);\n    Vec8thUint* target_iter5 = &(target_iter4[write_v8ui_stride]);\n    Vec8thUint* target_iter6 = &(target_iter5[write_v8ui_stride]);\n    Vec8thUint* target_iter7 = &(target_iter6[write_v8ui_stride]);\n    for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n      const VecW loader = source_iter[vidx];\n      // Using goal bit-coordinates, where '_' indicates irrelevant content, we\n      // set target_0123 to\n      //   _ (0, 0) _ (1, 0) _ (2, 0) _ (3, 0) _ (0, 1) _ (1, 1) _ (2, 1) ...\n      // and target_4567 to\n      //   _ (4, 0) _ (5, 0) _ (6, 0) _ (7, 0) _ (4, 1) _ (5, 1) _ (6, 1) ...\n      // This is perfectly arranged for movemask.\n      VecW target_4567 = vecw_blendv(loader, vecw_srli(loader, 7), m8);\n      target_iter7[vidx] = vecw_movemask(target_4567);\n      target_4567 = vecw_slli(target_4567, 2);\n      target_iter6[vidx] = vecw_movemask(target_4567);\n      target_4567 = vecw_slli(target_4567, 2);\n      target_iter5[vidx] = vecw_movemask(target_4567);\n      target_4567 = vecw_slli(target_4567, 2);\n      target_iter4[vidx] = vecw_movemask(target_4567);\n      VecW target_0123 = vecw_blendv(vecw_slli(loader, 8), vecw_slli(loader, 1), m8);\n      target_iter3[vidx] = vecw_movemask(target_0123);\n      target_0123 = vecw_slli(target_0123, 2);\n      target_iter2[vidx] = vecw_movemask(target_0123);\n      target_0123 = vecw_slli(target_0123, 2);\n      target_iter1[vidx] = vecw_movemask(target_0123);\n      target_0123 = vecw_slli(target_0123, 2);\n      target_iter0[vidx] = vecw_movemask(target_0123);\n    }\n    source_iter = &(source_iter[(2 * kPglNypTransposeBatch) / kBytesPerVec]);\n    target_iter0 = &(target_iter7[write_v8ui_stride]);\n  }\n  if (buf1_fullrow_ct == buf1_row_ct) {\n    return;\n  }\n  Vec8thUint* target_iter1 = &(target_iter0[write_v8ui_stride]);\n  Vec8thUint* target_iter2 = &(target_iter1[write_v8ui_stride]);\n  Vec8thUint* target_iter3 = &(target_iter2[write_v8ui_stride]);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    const VecW loader = source_iter[vidx];\n    VecW target_0123 = vecw_blendv(vecw_slli(loader, 8), vecw_slli(loader, 1), m8);\n    target_iter3[vidx] = vecw_movemask(target_0123);\n    target_0123 = vecw_slli(target_0123, 2);\n    target_iter2[vidx] = vecw_movemask(target_0123);\n    target_0123 = vecw_slli(target_0123, 2);\n    target_iter1[vidx] = vecw_movemask(target_0123);\n    target_0123 = vecw_slli(target_0123, 2);\n    target_iter0[vidx] = vecw_movemask(target_0123);\n  }\n}\n#  else\n#    ifndef CACHELINE128\n#      error \"CACHELINE64 or CACHELINE128 expected.\"\n#    endif\n// assumes USE_SHUFFLE8, !USE_AVX2\nvoid TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, unsigned char* __restrict buf0, unsigned char* __restrict buf1) {\n  // buf0 and buf1 must each be vector-aligned and have size 64k\n  // Each input row has 512 nyps, across 16 words.\n  // First word of each row goes into first buf0 row, etc.\n  const uint32_t buf0_row_ct = DivUp(write_batch_size, 32);\n  {\n    // Fold the first 4 shuffles into the ingestion loop.\n    const uintptr_t* initial_read_iter = read_iter;\n    const uintptr_t* initial_read_end = &(initial_read_iter[buf0_row_ct]);\n    uintptr_t* initial_target_iter = R_CAST(uintptr_t*, buf0);\n    const uint32_t read_batch_rem = kNypsPerCacheline - read_batch_size;\n    for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n      const uintptr_t* read_iter_tmp = initial_read_iter;\n      for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n        *initial_target_iter++ = *read_iter_tmp;\n        read_iter_tmp = &(read_iter_tmp[read_ul_stride]);\n      }\n      ZeroWArr(read_batch_rem, initial_target_iter);\n      initial_target_iter = &(initial_target_iter[read_batch_rem]);\n    }\n  }\n\n  // First buf0 row now corresponds to a 512x32 nyp matrix (512 * 8 bytes) that\n  // we wish to transpose.  We split this into eight 512x4 matrices.\n  // (ARMv8 doesn't have efficient movemask, so this should be better than four\n  // 512x8 matrices.)\n  // This is nearly identical to the middle step in TransposeBitblock64().\n  {\n    const VecW* buf0_read_iter = R_CAST(const VecW*, buf0);\n    uintptr_t* write_iter0 = R_CAST(uintptr_t*, buf1);\n    const VecW gather_u16s = vecw_setr8(0, 8, 1, 9, 2, 10, 3, 11,\n                                        4, 12, 5, 13, 6, 14, 7, 15);\n    const uint32_t buf0_row_b64width = DivUp(read_batch_size, 8);\n    for (uint32_t ridx = 0; ridx != buf0_row_ct; ++ridx) {\n      uintptr_t* write_iter1 = &(write_iter0[64]);\n      uintptr_t* write_iter2 = &(write_iter1[64]);\n      uintptr_t* write_iter3 = &(write_iter2[64]);\n      uintptr_t* write_iter4 = &(write_iter3[64]);\n      uintptr_t* write_iter5 = &(write_iter4[64]);\n      uintptr_t* write_iter6 = &(write_iter5[64]);\n      uintptr_t* write_iter7 = &(write_iter6[64]);\n      for (uint32_t b64idx = 0; b64idx != buf0_row_b64width; ++b64idx) {\n        VecW loader0 = buf0_read_iter[b64idx * 4];\n        VecW loader1 = buf0_read_iter[b64idx * 4 + 1];\n        VecW loader2 = buf0_read_iter[b64idx * 4 + 2];\n        VecW loader3 = buf0_read_iter[b64idx * 4 + 3];\n        loader0 = vecw_shuffle8(loader0, gather_u16s);\n        loader1 = vecw_shuffle8(loader1, gather_u16s);\n        loader2 = vecw_shuffle8(loader2, gather_u16s);\n        loader3 = vecw_shuffle8(loader3, gather_u16s);\n        const VecW lo_0123 = vecw_unpacklo16(loader0, loader1);\n        const VecW lo_4567 = vecw_unpackhi16(loader0, loader1);\n        const VecW hi_0123 = vecw_unpacklo16(loader2, loader3);\n        const VecW hi_4567 = vecw_unpackhi16(loader2, loader3);\n\n        const VecW final01 = vecw_unpacklo32(lo_0123, hi_0123);\n        const VecW final23 = vecw_unpackhi32(lo_0123, hi_0123);\n        const VecW final45 = vecw_unpacklo32(lo_4567, hi_4567);\n        const VecW final67 = vecw_unpackhi32(lo_4567, hi_4567);\n        write_iter0[b64idx] = vecw_extract64_0(final01);\n        write_iter1[b64idx] = vecw_extract64_1(final01);\n        write_iter2[b64idx] = vecw_extract64_0(final23);\n        write_iter3[b64idx] = vecw_extract64_1(final23);\n        write_iter4[b64idx] = vecw_extract64_0(final45);\n        write_iter5[b64idx] = vecw_extract64_1(final45);\n        write_iter6[b64idx] = vecw_extract64_0(final67);\n        write_iter7[b64idx] = vecw_extract64_1(final67);\n      }\n      buf0_read_iter = &(buf0_read_iter[256]);\n      write_iter0 = &(write_iter7[64]);\n    }\n  }\n\n  // 8 -> 2\n  // This is similar to the main TransposeNybbleblock() loop.\n  const VecW* source_iter = R_CAST(VecW*, buf1);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  const VecW gather_even = vecw_setr8(0, 2, 4, 6, 8, 10, 12, 14,\n                                      -1, -1, -1, -1, -1, -1, -1, -1);\n  // Take advantage of current function contract.\n  const uint32_t buf1_row_ct = (write_batch_size + 3) / 4;\n\n  const uint32_t fourword_ct = DivUp(read_batch_size, 32);\n  uintptr_t* target_iter0 = write_iter;\n  for (uint32_t ridx = 0; ridx != buf1_row_ct; ++ridx) {\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    uintptr_t* target_iter2 = &(target_iter1[write_ul_stride]);\n    uintptr_t* target_iter3 = &(target_iter2[write_ul_stride]);\n    for (uint32_t dvidx = 0; dvidx != fourword_ct; ++dvidx) {\n      const VecW loader0 = source_iter[dvidx * 2];\n      const VecW loader1 = source_iter[dvidx * 2 + 1];\n\n      VecW even_nyps0 = loader0 & m2;\n      VecW even_nyps1 = loader1 & m2;\n      VecW odd_nyps0 = vecw_srli(loader0, 2) & m2;\n      VecW odd_nyps1 = vecw_srli(loader1, 2) & m2;\n      even_nyps0 = even_nyps0 | vecw_srli(even_nyps0, 6);\n      even_nyps1 = even_nyps1 | vecw_srli(even_nyps1, 6);\n      odd_nyps0 = odd_nyps0 | vecw_srli(odd_nyps0, 6);\n      odd_nyps1 = odd_nyps1 | vecw_srli(odd_nyps1, 6);\n      // Low four bits of even_nyps{0,1}[0], [2], ..., [14] are destined for\n      // target_iter0; high four bits of those bytes are destined for\n      // target_iter2.\n      const VecW even_nyps = vecw_gather_even(even_nyps0, even_nyps1, m8);\n      const VecW odd_nyps = vecw_gather_even(odd_nyps0, odd_nyps1, m8);\n\n      VecW mod0_nyps = even_nyps & m4;\n      VecW mod1_nyps = odd_nyps & m4;\n      VecW mod2_nyps = vecw_srli(even_nyps, 4) & m4;\n      VecW mod3_nyps = vecw_srli(odd_nyps, 4) & m4;\n      mod0_nyps = mod0_nyps | vecw_srli(mod0_nyps, 4);\n      mod1_nyps = mod1_nyps | vecw_srli(mod1_nyps, 4);\n      mod2_nyps = mod2_nyps | vecw_srli(mod2_nyps, 4);\n      mod3_nyps = mod3_nyps | vecw_srli(mod3_nyps, 4);\n      mod0_nyps = vecw_shuffle8(mod0_nyps, gather_even);\n      mod1_nyps = vecw_shuffle8(mod1_nyps, gather_even);\n      mod2_nyps = vecw_shuffle8(mod2_nyps, gather_even);\n      mod3_nyps = vecw_shuffle8(mod3_nyps, gather_even);\n      target_iter0[dvidx] = vecw_extract64_0(mod0_nyps);\n      target_iter1[dvidx] = vecw_extract64_0(mod1_nyps);\n      target_iter2[dvidx] = vecw_extract64_0(mod2_nyps);\n      target_iter3[dvidx] = vecw_extract64_0(mod3_nyps);\n    }\n    source_iter = &(source_iter[32]);\n    target_iter0 = &(target_iter3[write_ul_stride]);\n  }\n}\n#  endif\n#else  // !USE_SSE2\n#  ifdef __LP64__\nstatic_assert(kWordsPerVec == 1, \"TransposeNypblock64() needs to be updated.\");\nvoid TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, unsigned char* __restrict buf0, unsigned char* __restrict buf1) {\n  // buf0 and buf1 must each be vector-aligned and have size 16k\n  // defining them as unsigned char* might prevent a strict-aliasing issue?\n  // (might need to go through greater contortions to actually be safe?)\n  const uint32_t buf_row_ct = NypCtToByteCt(write_batch_size);\n  // fold the first 6 shuffles into the initial ingestion loop\n  const unsigned char* initial_read_iter = DowncastKToUc(read_iter);\n  const unsigned char* initial_read_end = &(initial_read_iter[buf_row_ct]);\n  unsigned char* initial_target_iter = buf0;\n  const uint32_t read_byte_stride = read_ul_stride * kBytesPerWord;\n  const uint32_t read_batch_rem = kNypsPerCacheline - read_batch_size;\n  for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n    const unsigned char* read_iter_tmp = initial_read_iter;\n    for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n      *initial_target_iter++ = *read_iter_tmp;\n      read_iter_tmp = &(read_iter_tmp[read_byte_stride]);\n    }\n    initial_target_iter = memsetua(initial_target_iter, 0, read_batch_rem);\n  }\n\n  // second-to-last shuffle, 8 bit spacing -> 4\n  const uintptr_t* source_iter = R_CAST(uintptr_t*, buf0);\n  uintptr_t* target_iter0 = R_CAST(uintptr_t*, buf1);\n  const uint32_t write_word_ct = NypCtToWordCt(read_batch_size);\n  const uint32_t penult_inner_loop_iter_ct = 2 * write_word_ct;\n  const uint32_t cur_write_skip = 2 * kWordsPerCacheline - penult_inner_loop_iter_ct;\n  for (uint32_t uii = 0; uii != buf_row_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[kWordsPerCacheline * 2]);\n    for (uint32_t ujj = 0; ujj != penult_inner_loop_iter_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      *target_iter0++ = PackTwo0F0FMask(source_word_lo, source_word_hi);\n      *target_iter1++ = PackTwo0F0FMask(source_word_lo >> 4, source_word_hi >> 4);\n    }\n    source_iter = &(source_iter[2 * cur_write_skip]);\n    target_iter0 = &(target_iter1[cur_write_skip]);\n  }\n\n  // last shuffle, 4 bit spacing -> 2\n  source_iter = R_CAST(uintptr_t*, buf1);\n  target_iter0 = write_iter;\n  const uint32_t last_loop_iter_ct = DivUp(write_batch_size, 2);\n  for (uint32_t uii = 0; uii != last_loop_iter_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    for (uint32_t ujj = 0; ujj != write_word_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      target_iter0[ujj] = PackTwo3333Mask(source_word_lo, source_word_hi);\n      target_iter1[ujj] = PackTwo3333Mask(source_word_lo >> 2, source_word_hi >> 2);\n    }\n    source_iter = &(source_iter[2 * (kWordsPerCacheline - write_word_ct)]);\n    target_iter0 = &(target_iter1[write_ul_stride]);\n  }\n}\n#  else\nstatic_assert(kWordsPerVec == 1, \"TransposeNypblock32() needs to be updated.\");\nvoid TransposeNypblock32(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, unsigned char* __restrict buf0, unsigned char* __restrict buf1) {\n  // buf0 and buf1 must each be vector-aligned and have size 16k\n  // defining them as unsigned char* might prevent a strict-aliasing issue?\n  // (might need to go through greater contortions to actually be safe?)\n  const uint32_t buf_row_ct = NypCtToByteCt(write_batch_size);\n  // fold the first 6 shuffles into the initial ingestion loop\n  const unsigned char* initial_read_iter = DowncastKToUc(read_iter);\n  const unsigned char* initial_read_end = &(initial_read_iter[buf_row_ct]);\n  unsigned char* initial_target_iter = buf0;\n  const uint32_t read_byte_stride = read_ul_stride * kBytesPerWord;\n  const uint32_t read_batch_rem = kNypsPerCacheline - read_batch_size;\n  for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n    const unsigned char* read_iter_tmp = initial_read_iter;\n    for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n      *initial_target_iter++ = *read_iter_tmp;\n      read_iter_tmp = &(read_iter_tmp[read_byte_stride]);\n    }\n    initial_target_iter = memsetua(initial_target_iter, 0, read_batch_rem);\n  }\n\n  // second-to-last shuffle, 8 bit spacing -> 4\n  const uintptr_t* source_iter = R_CAST(uintptr_t*, buf0);\n  uintptr_t* target_iter0 = R_CAST(uintptr_t*, buf1);\n  const uint32_t write_word_ct = NypCtToWordCt(read_batch_size);\n  const uint32_t penult_inner_loop_iter_ct = 2 * write_word_ct;\n  const uint32_t cur_write_skip = 2 * kWordsPerCacheline - penult_inner_loop_iter_ct;\n  for (uint32_t uii = 0; uii != buf_row_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[kWordsPerCacheline * 2]);\n    for (uint32_t ujj = 0; ujj != penult_inner_loop_iter_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      *target_iter0++ = PackTwo0F0FMask(source_word_lo, source_word_hi);\n      *target_iter1++ = PackTwo0F0FMask(source_word_lo >> 4, source_word_hi >> 4);\n    }\n    source_iter = &(source_iter[2 * cur_write_skip]);\n    target_iter0 = &(target_iter1[cur_write_skip]);\n  }\n\n  // last shuffle, 4 bit spacing -> 2\n  source_iter = R_CAST(uintptr_t*, buf1);\n  target_iter0 = write_iter;\n  const uint32_t last_loop_iter_ct = DivUp(write_batch_size, 2);\n  for (uint32_t uii = 0; uii != last_loop_iter_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    for (uint32_t ujj = 0; ujj != write_word_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      target_iter0[ujj] = PackTwo3333Mask(source_word_lo, source_word_hi);\n      target_iter1[ujj] = PackTwo3333Mask(source_word_lo >> 2, source_word_hi >> 2);\n    }\n    source_iter = &(source_iter[2 * (kWordsPerCacheline - write_word_ct)]);\n    target_iter0 = &(target_iter1[write_ul_stride]);\n  }\n}\n#  endif  // !__LP64__\n#endif  // !USE_SSE2\n\nvoid BiallelicDosage16Invert(uint32_t dosage_ct, uint16_t* dosage_main) {\n  // replace each x with (32768 - x).\n  // compiler is smart enough to vectorize this.\n  for (uint32_t uii = 0; uii != dosage_ct; ++uii) {\n    dosage_main[uii] = 32768 - dosage_main[uii];\n  }\n}\n\nvoid BiallelicDphase16Invert(uint32_t dphase_ct, int16_t* dphase_delta) {\n  for (uint32_t uii = 0; uii != dphase_ct; ++uii) {\n    dphase_delta[uii] = -dphase_delta[uii];\n  }\n}\n\n#if defined(USE_SSE2) && !defined(USE_AVX2)\nvoid PackWordsToHalfwordsInvmatch(const uintptr_t* __restrict genoarr, uintptr_t inv_match_word, uint32_t inword_ct, uintptr_t* __restrict dst) {\n  // In shuffle8 case, this takes ~30% less time than a\n  // PackWordToHalfwordMask5555 loop.\n  const uint32_t out_fullvec_ct = inword_ct / (kWordsPerVec * 2);\n  const VecW xor_vec = vecw_set1(inv_match_word);\n  const VecW m1 = VCONST_W(kMask5555);\n#  ifdef USE_SHUFFLE8\n  const VecW swap12 = vecw_setr8(\n      0, 1, 4, 5, 2, 3, 6, 7,\n      8, 9, 12, 13, 10, 11, 14, 15);\n#  else\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  for (uintptr_t vidx = 0; vidx != out_fullvec_ct; ++vidx) {\n    VecW vec_lo = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx])) ^ xor_vec;\n    VecW vec_hi = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx + kWordsPerVec])) ^ xor_vec;\n    vec_lo = vec_lo & vecw_srli(vec_lo, 1) & m1;\n    vec_hi = vec_hi & vecw_srli(vec_hi, 1) & m1;\n#  ifdef USE_SHUFFLE8\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 3)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 3)) & m4;\n    vec_lo = vecw_shuffle8(swap12, vec_lo);\n    vec_hi = vecw_shuffle8(swap12, vec_hi);\n#  else\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 1)) & m2;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 1)) & m2;\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 2)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 2)) & m4;\n#  endif\n    vec_lo = vec_lo | vecw_srli(vec_lo, 4);\n    vec_hi = vec_hi | vecw_srli(vec_hi, 4);\n    const VecW vec_packed = vecw_gather_even(vec_lo, vec_hi, m8);\n    vecw_storeu(&(dst[kWordsPerVec * vidx]), vec_packed);\n  }\n  Halfword* dst_alias = DowncastWToHW(dst);\n  uint32_t widx = RoundDownPow2(inword_ct, kWordsPerVec * 2);\n  for (; widx != inword_ct; ++widx) {\n    const uintptr_t cur_word = genoarr[widx] ^ inv_match_word;\n    const Halfword hw = PackWordToHalfwordMask5555(cur_word & (cur_word >> 1));\n    dst_alias[widx] = hw;\n  }\n}\n\nvoid PackWordsToHalfwordsMismatch(const uintptr_t* __restrict genoarr, uintptr_t mismatch_word, uint32_t inword_ct, uintptr_t* __restrict dst) {\n  const uint32_t out_fullvec_ct = inword_ct / (kWordsPerVec * 2);\n  const VecW xor_vec = vecw_set1(mismatch_word);\n  const VecW m1 = VCONST_W(kMask5555);\n#  ifdef USE_SHUFFLE8\n  const VecW swap12 = vecw_setr8(\n      0, 1, 4, 5, 2, 3, 6, 7,\n      8, 9, 12, 13, 10, 11, 14, 15);\n#  else\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  for (uintptr_t vidx = 0; vidx != out_fullvec_ct; ++vidx) {\n    VecW vec_lo = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx])) ^ xor_vec;\n    VecW vec_hi = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx + kWordsPerVec])) ^ xor_vec;\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 1)) & m1;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 1)) & m1;\n#  ifdef USE_SHUFFLE8\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 3)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 3)) & m4;\n    vec_lo = vecw_shuffle8(swap12, vec_lo);\n    vec_hi = vecw_shuffle8(swap12, vec_hi);\n#  else\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 1)) & m2;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 1)) & m2;\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 2)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 2)) & m4;\n#  endif\n    vec_lo = vec_lo | vecw_srli(vec_lo, 4);\n    vec_hi = vec_hi | vecw_srli(vec_hi, 4);\n    const VecW vec_packed = vecw_gather_even(vec_lo, vec_hi, m8);\n    vecw_storeu(&(dst[kWordsPerVec * vidx]), vec_packed);\n  }\n  Halfword* dst_alias = DowncastWToHW(dst);\n  uint32_t widx = RoundDownPow2(inword_ct, kWordsPerVec * 2);\n  for (; widx != inword_ct; ++widx) {\n    const uintptr_t cur_word = genoarr[widx] ^ mismatch_word;\n    const Halfword hw = PackWordToHalfwordMask5555(cur_word | (cur_word >> 1));\n    dst_alias[widx] = hw;\n  }\n}\n\nvoid MaskWordsToHalfwordsInvmatch(const uintptr_t* __restrict genoarr, uintptr_t inv_match_word, uint32_t inword_ct, uintptr_t* src, uintptr_t* dst) {\n  const uint32_t out_fullvec_ct = inword_ct / (kWordsPerVec * 2);\n  const VecW xor_vec = vecw_set1(inv_match_word);\n  const VecW m1 = VCONST_W(kMask5555);\n#  ifdef USE_SHUFFLE8\n  const VecW swap12 = vecw_setr8(\n      0, 1, 4, 5, 2, 3, 6, 7,\n      8, 9, 12, 13, 10, 11, 14, 15);\n#  else\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  for (uintptr_t vidx = 0; vidx != out_fullvec_ct; ++vidx) {\n    VecW vec_lo = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx])) ^ xor_vec;\n    VecW vec_hi = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx + kWordsPerVec])) ^ xor_vec;\n    VecW src_vec = vecw_loadu(&(src[kWordsPerVec * vidx]));\n    vec_lo = vec_lo & vecw_srli(vec_lo, 1) & m1;\n    vec_hi = vec_hi & vecw_srli(vec_hi, 1) & m1;\n#  ifdef USE_SHUFFLE8\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 3)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 3)) & m4;\n    vec_lo = vecw_shuffle8(swap12, vec_lo);\n    vec_hi = vecw_shuffle8(swap12, vec_hi);\n#  else\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 1)) & m2;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 1)) & m2;\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 2)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 2)) & m4;\n#  endif\n    vec_lo = vec_lo | vecw_srli(vec_lo, 4);\n    vec_hi = vec_hi | vecw_srli(vec_hi, 4);\n    const VecW vec_packed = vecw_gather_even(vec_lo, vec_hi, m8);\n    vecw_storeu(&(dst[kWordsPerVec * vidx]), src_vec & vec_packed);\n  }\n  Halfword* src_alias = DowncastWToHW(src);\n  Halfword* dst_alias = DowncastWToHW(dst);\n  uint32_t widx = RoundDownPow2(inword_ct, kWordsPerVec * 2);\n  for (; widx != inword_ct; ++widx) {\n    const uintptr_t cur_word = genoarr[widx] ^ inv_match_word;\n    const Halfword hw = PackWordToHalfwordMask5555(cur_word & (cur_word >> 1));\n    dst_alias[widx] = src_alias[widx] & hw;\n  }\n}\n#else\nvoid PackWordsToHalfwordsInvmatch(const uintptr_t* __restrict genoarr, uintptr_t inv_match_word, uint32_t inword_ct, uintptr_t* __restrict dst) {\n  Halfword* dst_alias = DowncastWToHW(dst);\n  for (uint32_t widx = 0; widx != inword_ct; ++widx) {\n    const uintptr_t cur_word = genoarr[widx] ^ inv_match_word;\n    const Halfword hw = PackWordToHalfwordMask5555(cur_word & (cur_word >> 1));\n    dst_alias[widx] = hw;\n  }\n}\n\nvoid PackWordsToHalfwordsMismatch(const uintptr_t* __restrict genoarr, uintptr_t mismatch_word, uint32_t inword_ct, uintptr_t* __restrict dst) {\n  Halfword* dst_alias = DowncastWToHW(dst);\n  for (uint32_t widx = 0; widx != inword_ct; ++widx) {\n    const uintptr_t cur_word = genoarr[widx] ^ mismatch_word;\n    const Halfword hw = PackWordToHalfwordMask5555(cur_word | (cur_word >> 1));\n    dst_alias[widx] = hw;\n  }\n}\n\nvoid MaskWordsToHalfwordsInvmatch(const uintptr_t* __restrict genoarr, uintptr_t inv_match_word, uint32_t inword_ct, uintptr_t* src, uintptr_t* dst) {\n  Halfword* src_alias = DowncastWToHW(src);\n  Halfword* dst_alias = DowncastWToHW(dst);\n  for (uint32_t widx = 0; widx != inword_ct; ++widx) {\n    const uintptr_t cur_word = genoarr[widx] ^ inv_match_word;\n    const Halfword hw = PackWordToHalfwordMask5555(cur_word & (cur_word >> 1));\n    dst_alias[widx] = src_alias[widx] & hw;\n  }\n}\n#endif\n\nvoid SparseToMissingness(const uintptr_t* __restrict raregeno, const uint32_t* difflist_sample_ids, uint32_t sample_ct, uint32_t difflist_common_geno, uint32_t difflist_len, uintptr_t* __restrict missingness) {\n  if (difflist_common_geno != 3) {\n    const uint32_t sample_ctl = BitCtToWordCt(sample_ct);\n    ZeroWArr(sample_ctl, missingness);\n    if (!difflist_len) {\n      return;\n    }\n    const uint32_t raregeno_word_ct = NypCtToWordCt(difflist_len);\n    for (uint32_t widx = 0; widx != raregeno_word_ct; ++widx) {\n      const uintptr_t raregeno_word = raregeno[widx];\n      uintptr_t raregeno_11 = raregeno_word & (raregeno_word >> 1) & kMask5555;\n      if (raregeno_11) {\n        const uint32_t* cur_difflist_sample_ids = &(difflist_sample_ids[widx * kBitsPerWordD2]);\n        do {\n          const uint32_t sample_idx_lowbits = ctzw(raregeno_11) / 2;\n          const uint32_t cur_sample_id = cur_difflist_sample_ids[sample_idx_lowbits];\n          SetBit(cur_sample_id, missingness);\n          raregeno_11 &= raregeno_11 - 1;\n        } while (raregeno_11);\n      }\n    }\n  } else {\n    SetAllBits(sample_ct, missingness);\n    // Don't need to look at raregeno, all cases are nonmissing.\n    for (uint32_t uii = 0; uii != difflist_len; ++uii) {\n      const uint32_t cur_sample_id = difflist_sample_ids[uii];\n      ClearBit(cur_sample_id, missingness);\n    }\n  }\n}\n\n#if defined(USE_SSE2) && !defined(USE_AVX2)\nvoid SplitHomRef2hetUnsafeW(const uintptr_t* __restrict genoarr, uint32_t inword_ct, uintptr_t* __restrict hom_buf, uintptr_t* __restrict ref2het_buf) {\n  const uint32_t out_fullvec_ct = inword_ct / (kWordsPerVec * 2);\n  // In shuffle8 case, this takes ~55% less time than unvectorized loop.\n  // homozygous: geno = 0 or 2\n  // ref2het: geno = 0 or 1\n  const VecW m1 = VCONST_W(kMask5555);\n#  ifdef USE_SHUFFLE8\n  const VecW swap12 = vecw_setr8(\n      0, 1, 4, 5, 2, 3, 6, 7,\n      8, 9, 12, 13, 10, 11, 14, 15);\n#  else\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  for (uintptr_t vidx = 0; vidx != out_fullvec_ct; ++vidx) {\n    const VecW vec_lo = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx]));\n    const VecW vec_hi = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx + kWordsPerVec]));\n    VecW hom_lo = vecw_and_notfirst(vec_lo, m1);\n    VecW r2h_lo = vecw_and_notfirst(vecw_srli(vec_lo, 1), m1);\n    VecW hom_hi = vecw_and_notfirst(vec_hi, m1);\n    VecW r2h_hi = vecw_and_notfirst(vecw_srli(vec_hi, 1), m1);\n#  ifdef USE_SHUFFLE8\n    hom_lo = (hom_lo | vecw_srli(hom_lo, 3)) & m4;\n    r2h_lo = (r2h_lo | vecw_srli(r2h_lo, 3)) & m4;\n    hom_hi = (hom_hi | vecw_srli(hom_hi, 3)) & m4;\n    r2h_hi = (r2h_hi | vecw_srli(r2h_hi, 3)) & m4;\n    hom_lo = vecw_shuffle8(swap12, hom_lo);\n    r2h_lo = vecw_shuffle8(swap12, r2h_lo);\n    hom_hi = vecw_shuffle8(swap12, hom_hi);\n    r2h_hi = vecw_shuffle8(swap12, r2h_hi);\n#  else\n    hom_lo = (hom_lo | vecw_srli(hom_lo, 1)) & m2;\n    r2h_lo = (r2h_lo | vecw_srli(r2h_lo, 1)) & m2;\n    hom_hi = (hom_hi | vecw_srli(hom_hi, 1)) & m2;\n    r2h_hi = (r2h_hi | vecw_srli(r2h_hi, 1)) & m2;\n    hom_lo = (hom_lo | vecw_srli(hom_lo, 2)) & m4;\n    r2h_lo = (r2h_lo | vecw_srli(r2h_lo, 2)) & m4;\n    hom_hi = (hom_hi | vecw_srli(hom_hi, 2)) & m4;\n    r2h_hi = (r2h_hi | vecw_srli(r2h_hi, 2)) & m4;\n#  endif\n    hom_lo = hom_lo | vecw_srli(hom_lo, 4);\n    r2h_lo = r2h_lo | vecw_srli(r2h_lo, 4);\n    hom_hi = hom_hi | vecw_srli(hom_hi, 4);\n    r2h_hi = r2h_hi | vecw_srli(r2h_hi, 4);\n    const VecW hom_packed = vecw_gather_even(hom_lo, hom_hi, m8);\n    const VecW r2h_packed = vecw_gather_even(r2h_lo, r2h_hi, m8);\n    vecw_storeu(&(hom_buf[kWordsPerVec * vidx]), hom_packed);\n    vecw_storeu(&(ref2het_buf[kWordsPerVec * vidx]), r2h_packed);\n  }\n  Halfword* hom_alias = DowncastWToHW(hom_buf);\n  Halfword* r2h_alias = DowncastWToHW(ref2het_buf);\n  uint32_t widx = RoundDownPow2(inword_ct, kWordsPerVec * 2);\n  for (; widx != inword_ct; ++widx) {\n    const uintptr_t inv_geno_word = ~genoarr[widx];\n    hom_alias[widx] = PackWordToHalfwordMask5555(inv_geno_word);\n    r2h_alias[widx] = PackWordToHalfwordMaskAAAA(inv_geno_word);\n  }\n}\n#else\nvoid SplitHomRef2hetUnsafeW(const uintptr_t* genoarr, uint32_t inword_ct, uintptr_t* __restrict hom_buf, uintptr_t* __restrict ref2het_buf) {\n  Halfword* hom_alias = DowncastWToHW(hom_buf);\n  Halfword* ref2het_alias = DowncastWToHW(ref2het_buf);\n  for (uint32_t widx = 0; widx != inword_ct; ++widx) {\n    const uintptr_t inv_geno_word = ~genoarr[widx];\n    hom_alias[widx] = PackWordToHalfwordMask5555(inv_geno_word);\n    ref2het_alias[widx] = PackWordToHalfwordMaskAAAA(inv_geno_word);\n  }\n}\n#endif\n\nvoid SplitHomRef2het(const uintptr_t* genoarr, uint32_t sample_ct, uintptr_t* __restrict hom_buf, uintptr_t* __restrict ref2het_buf) {\n  const uint32_t full_outword_ct = sample_ct / kBitsPerWord;\n  SplitHomRef2hetUnsafeW(genoarr, full_outword_ct * 2, hom_buf, ref2het_buf);\n  const uint32_t remainder = sample_ct % kBitsPerWord;\n  if (remainder) {\n    uintptr_t geno_word = genoarr[full_outword_ct * 2];\n    uintptr_t hom_word = PackWordToHalfwordMask5555(~geno_word);\n    uintptr_t ref2het_word = PackWordToHalfwordMaskAAAA(~geno_word);\n    if (remainder > kBitsPerWordD2) {\n      geno_word = genoarr[full_outword_ct * 2 + 1];\n      hom_word |= S_CAST(uintptr_t, PackWordToHalfwordMask5555(~geno_word)) << kBitsPerWordD2;\n      ref2het_word |= S_CAST(uintptr_t, PackWordToHalfwordMaskAAAA(~geno_word)) << kBitsPerWordD2;\n    }\n    const uintptr_t cur_mask = (k1LU << remainder) - 1;\n    hom_buf[full_outword_ct] = hom_word & cur_mask;\n    ref2het_buf[full_outword_ct] = ref2het_word & cur_mask;\n  }\n}\n\nBoolErr HapsplitMustPhased(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, uint32_t sample_ct, uint32_t phase_exists, uintptr_t* hap_arr, uintptr_t* nm_arr) {\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  uintptr_t detect_unphased = 0;\n  if (!phase_exists) {\n    // error out if het encountered\n    for (uint32_t widx = 0; widx != sample_ctl2; ++widx) {\n      const uintptr_t geno_word = genoarr[widx];\n      const uintptr_t nm_word = 3 * (kMask5555 & (~(geno_word & (geno_word >> 1))));\n      const uintptr_t geno_nm = geno_word & nm_word;\n      // geno_nm is now {00, 01, 10, 00}.  In particular, if we ever see 01, we\n      // error out.\n      const uintptr_t geno_nm_hi = (geno_nm >> 1) & kMask5555;\n      nm_arr[widx] = nm_word;\n      hap_arr[widx] = geno_nm | geno_nm_hi;\n      detect_unphased |= geno_nm;\n    }\n    detect_unphased &= kMask5555;\n  } else {\n    // error out if het encountered, and not covered by phasepresent\n    const Halfword* phasepresent_alias = DowncastKWToHW(phasepresent);\n    const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n    for (uint32_t widx = 0; widx != sample_ctl2; ++widx) {\n      const uintptr_t geno_word = genoarr[widx];\n      const uintptr_t geno_rshift = geno_word >> 1;\n      const uintptr_t nm_word = 3 * (kMask5555 & (~(geno_word & geno_rshift)));\n      const uintptr_t geno_nm = geno_word & nm_word;\n      const uintptr_t geno_nm_hi = (geno_nm >> 1) & kMask5555;\n      const uintptr_t het_word = geno_nm & kMask5555;\n      const uintptr_t phasepresent_word = UnpackHalfwordToWord(phasepresent_alias[widx]);\n      const uintptr_t phaseinfo_word = phasepresent_word & UnpackHalfwordToWord(phaseinfo_alias[widx]);\n      nm_arr[widx] = nm_word;\n      hap_arr[widx] = geno_nm + geno_nm_hi + phaseinfo_word;\n      detect_unphased |= het_word & (~phasepresent_word);\n    }\n  }\n  const uint32_t trailing_nyp_ct = sample_ct % kBitsPerWordD2;\n  if (trailing_nyp_ct) {\n    const uint32_t trailing_bit_ct = trailing_nyp_ct * 2;\n    const uint32_t last_word_idx = sample_ctl2 - 1;\n    nm_arr[last_word_idx] = bzhi(nm_arr[last_word_idx], trailing_bit_ct);\n    hap_arr[last_word_idx] = bzhi(hap_arr[last_word_idx], trailing_bit_ct);\n  }\n  return (detect_unphased != 0);\n}\n\n#if defined(USE_SSE2) && !defined(USE_AVX2)\nvoid HapsplitHaploid(const uintptr_t* __restrict genoarr, uint32_t sample_ct, uintptr_t* __restrict hap_arr, uintptr_t* __restrict nm_arr) {\n  // In shuffle8 case, this takes ~40% less time than unvectorized loop.\n  const uint32_t sample_ctl = BitCtToWordCt(sample_ct);\n  const uint32_t sample_ctl2_is_odd = NypCtToWordCt(sample_ct) & 1;\n  const uint32_t wordpair_ct = sample_ctl - sample_ctl2_is_odd;\n  const uint32_t out_fullvec_ct = wordpair_ct / 2;\n  const VecW m1 = VCONST_W(kMask5555);\n#  ifdef USE_SHUFFLE8\n  const VecW swap12 = vecw_setr8(\n      0, 1, 4, 5, 2, 3, 6, 7,\n      8, 9, 12, 13, 10, 11, 14, 15);\n#  else\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  for (uintptr_t vidx = 0; vidx != out_fullvec_ct; ++vidx) {\n    const VecW vec_lo = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx]));\n    const VecW vec_hi = vecw_loadu(&(genoarr[2 * kWordsPerVec * vidx + kWordsPerVec]));\n    VecW nm_lo = vecw_and_notfirst(vec_lo, m1);\n    VecW nm_hi = vecw_and_notfirst(vec_hi, m1);\n    VecW hap_lo = nm_lo & vecw_srli(vec_lo, 1);\n    VecW hap_hi = nm_hi & vecw_srli(vec_hi, 1);\n#  ifdef USE_SHUFFLE8\n    nm_lo = (nm_lo | vecw_srli(nm_lo, 3)) & m4;\n    nm_hi = (nm_hi | vecw_srli(nm_hi, 3)) & m4;\n    hap_lo = (hap_lo | vecw_srli(hap_lo, 3)) & m4;\n    hap_hi = (hap_hi | vecw_srli(hap_hi, 3)) & m4;\n    nm_lo = vecw_shuffle8(swap12, nm_lo);\n    nm_hi = vecw_shuffle8(swap12, nm_hi);\n    hap_lo = vecw_shuffle8(swap12, hap_lo);\n    hap_hi = vecw_shuffle8(swap12, hap_hi);\n#  else\n    nm_lo = (nm_lo | vecw_srli(nm_lo, 1)) & m2;\n    nm_hi = (nm_hi | vecw_srli(nm_hi, 1)) & m2;\n    hap_lo = (hap_lo | vecw_srli(hap_lo, 1)) & m2;\n    hap_hi = (hap_hi | vecw_srli(hap_hi, 1)) & m2;\n    nm_lo = (nm_lo | vecw_srli(nm_lo, 2)) & m4;\n    nm_hi = (nm_hi | vecw_srli(nm_hi, 2)) & m4;\n    hap_lo = (hap_lo | vecw_srli(hap_lo, 2)) & m4;\n    hap_hi = (hap_hi | vecw_srli(hap_hi, 2)) & m4;\n#  endif\n    nm_lo = nm_lo | vecw_srli(nm_lo, 4);\n    nm_hi = nm_hi | vecw_srli(nm_hi, 4);\n    hap_lo = hap_lo | vecw_srli(hap_lo, 4);\n    hap_hi = hap_hi | vecw_srli(hap_hi, 4);\n    const VecW nm_packed = vecw_gather_even(nm_lo, nm_hi, m8);\n    const VecW hap_packed = vecw_gather_even(hap_lo, hap_hi, m8);\n    vecw_storeu(&(nm_arr[kWordsPerVec * vidx]), nm_packed);\n    vecw_storeu(&(hap_arr[kWordsPerVec * vidx]), hap_packed);\n  }\n  if (wordpair_ct % 2) {\n    const uint32_t widx = wordpair_ct - 1;\n    const uintptr_t geno_word0 = genoarr[widx * 2];\n    const uintptr_t geno_word1 = genoarr[widx * 2 + 1];\n    const uintptr_t nm_word0 = ~geno_word0;\n    const uintptr_t nm_word1 = ~geno_word1;\n    const uintptr_t hap_word0 = nm_word0 & (geno_word0 >> 1);\n    const uintptr_t hap_word1 = nm_word1 & (geno_word1 >> 1);\n    nm_arr[widx] = PackTwo5555Mask(nm_word0, nm_word1);\n    hap_arr[widx] = PackTwo5555Mask(hap_word0, hap_word1);\n  }\n  if (sample_ctl2_is_odd) {\n    const uintptr_t geno_word0 = genoarr[wordpair_ct * 2];\n    const uintptr_t nm_word0 = ~geno_word0;\n    const uintptr_t hap_word0 = nm_word0 & (geno_word0 >> 1);\n    nm_arr[wordpair_ct] = PackWordToHalfwordMask5555(nm_word0);\n    hap_arr[wordpair_ct] = PackWordToHalfwordMask5555(hap_word0);\n  }\n  const uint32_t trailing_bit_ct = sample_ct % kBitsPerWord;\n  if (trailing_bit_ct) {\n    const uint32_t last_word_idx = sample_ctl - 1;\n    nm_arr[last_word_idx] = bzhi(nm_arr[last_word_idx], trailing_bit_ct);\n    hap_arr[last_word_idx] = bzhi(hap_arr[last_word_idx], trailing_bit_ct);\n  }\n}\n#else\nvoid HapsplitHaploid(const uintptr_t* __restrict genoarr, uint32_t sample_ct, uintptr_t* __restrict hap_arr, uintptr_t* __restrict nm_arr) {\n  const uint32_t sample_ctl = BitCtToWordCt(sample_ct);\n  const uint32_t sample_ctl2_is_odd = NypCtToWordCt(sample_ct) & 1;\n  const uint32_t wordpair_ct = sample_ctl - sample_ctl2_is_odd;\n  for (uint32_t widx = 0; widx != wordpair_ct; ++widx) {\n    const uintptr_t geno_word0 = genoarr[widx * 2];\n    const uintptr_t geno_word1 = genoarr[widx * 2 + 1];\n    const uintptr_t nm_word0 = ~geno_word0;\n    const uintptr_t nm_word1 = ~geno_word1;\n    const uintptr_t hap_word0 = nm_word0 & (geno_word0 >> 1);\n    const uintptr_t hap_word1 = nm_word1 & (geno_word1 >> 1);\n    nm_arr[widx] = PackTwo5555Mask(nm_word0, nm_word1);\n    hap_arr[widx] = PackTwo5555Mask(hap_word0, hap_word1);\n  }\n  if (sample_ctl2_is_odd) {\n    const uintptr_t geno_word0 = genoarr[wordpair_ct * 2];\n    const uintptr_t nm_word0 = ~geno_word0;\n    const uintptr_t hap_word0 = nm_word0 & (geno_word0 >> 1);\n    nm_arr[wordpair_ct] = PackWordToHalfwordMask5555(nm_word0);\n    hap_arr[wordpair_ct] = PackWordToHalfwordMask5555(hap_word0);\n  }\n  const uint32_t trailing_bit_ct = sample_ct % kBitsPerWord;\n  if (trailing_bit_ct) {\n    const uint32_t last_word_idx = sample_ctl - 1;\n    nm_arr[last_word_idx] = bzhi(nm_arr[last_word_idx], trailing_bit_ct);\n    hap_arr[last_word_idx] = bzhi(hap_arr[last_word_idx], trailing_bit_ct);\n  }\n}\n#endif\n\n#ifdef USE_SHUFFLE8\n// ~70% less time than per-byte lookup.\nvoid GenoarrLookup256x1bx4(const uintptr_t* genoarr, const void* table256x1bx4, uint32_t sample_ct, void* __restrict result) {\n  const uint32_t* table_alias = S_CAST(const uint32_t*, table256x1bx4);\n  const unsigned char* genoarr_alias = DowncastKToUc(genoarr);\n  unsigned char* resultb = S_CAST(unsigned char*, result);\n  const uint32_t full_byte_ct = sample_ct / 4;\n  if (full_byte_ct >= kBytesPerVec) {\n    const uint32_t last_genoarr_offset = full_byte_ct - kBytesPerVec;\n    const VecW lookup_even = vecw_loadu(&(table_alias[256]));\n    const VecW lookup_odd = vecw_loadu(&(table_alias[256 + kInt32PerVec]));\n    const VecW m4 = VCONST_W(kMask0F0F);\n    for (uint32_t genoarr_offset = 0; ; genoarr_offset += kBytesPerVec) {\n      if (genoarr_offset >= last_genoarr_offset) {\n        if (genoarr_offset == full_byte_ct) {\n          break;\n        }\n        genoarr_offset = last_genoarr_offset;\n      }\n      VecW cur_vec = vecw_loadu(&(genoarr_alias[genoarr_offset]));\n      VecW vec_lo;\n      VecW vec_hi;\n      vecw_lo_and_hi_nybbles(cur_vec, m4, &vec_lo, &vec_hi);\n      VecW result_lo_even = vecw_shuffle8(lookup_even, vec_lo);\n      VecW result_hi_even = vecw_shuffle8(lookup_even, vec_hi);\n      VecW result_lo_odd = vecw_shuffle8(lookup_odd, vec_lo);\n      VecW result_hi_odd = vecw_shuffle8(lookup_odd, vec_hi);\n      // In AVX2 case, result_lo_even has (0, 2, 4, ..., 62), and result_lo_odd\n      // has (1, 3, 5, ..., 63).\n      //   even -> (0, 2, 4, ..., 14, 32, ..., 46, 16, ..., 30, 48, ..., 62)\n      //   odd ->  (1, 3, 5, ..., 15, 33, ..., 47, 17, ..., 31, 49, ..., 63)\n      // Then unpacklo yields (0, 1, 2, ..., 31).\n      result_lo_even = vecw_permute0xd8_if_avx2(result_lo_even);\n      result_lo_odd = vecw_permute0xd8_if_avx2(result_lo_odd);\n      result_hi_even = vecw_permute0xd8_if_avx2(result_hi_even);\n      result_hi_odd = vecw_permute0xd8_if_avx2(result_hi_odd);\n      const VecW result0 = vecw_unpacklo8(result_lo_even, result_lo_odd);\n      const VecW result1 = vecw_unpackhi8(result_lo_even, result_lo_odd);\n      const VecW result2 = vecw_unpacklo8(result_hi_even, result_hi_odd);\n      const VecW result3 = vecw_unpackhi8(result_hi_even, result_hi_odd);\n      unsigned char* cur_resultb = &(resultb[genoarr_offset * 4]);\n      vecw_storeu(cur_resultb, result0);\n      vecw_storeu(&(cur_resultb[kBytesPerVec]), result1);\n      vecw_storeu(&(cur_resultb[2 * kBytesPerVec]), result2);\n      vecw_storeu(&(cur_resultb[3 * kBytesPerVec]), result3);\n    }\n  } else {\n    for (uint32_t byte_idx = 0; byte_idx != full_byte_ct; ++byte_idx) {\n      CopyToUnalignedOffsetU32(resultb, &(table_alias[genoarr_alias[byte_idx]]), byte_idx);\n    }\n  }\n  const uint32_t remainder = sample_ct % 4;\n  if (remainder) {\n    unsigned char* result_last = &(resultb[full_byte_ct * 4]);\n    uintptr_t geno_byte = genoarr_alias[full_byte_ct];\n    for (uint32_t uii = 0; uii != remainder; ++uii) {\n      result_last[uii] = table_alias[geno_byte & 3];\n      geno_byte >>= 2;\n    }\n  }\n}\n#else\nvoid GenoarrLookup256x1bx4(const uintptr_t* genoarr, const void* table256x1bx4, uint32_t sample_ct, void* __restrict result) {\n  const uint32_t* table_alias = S_CAST(const uint32_t*, table256x1bx4);\n  const unsigned char* genoarr_alias = DowncastKToUc(genoarr);\n  unsigned char* resultb = S_CAST(unsigned char*, result);\n  const uint32_t full_byte_ct = sample_ct / 4;\n  for (uint32_t byte_idx = 0; byte_idx != full_byte_ct; ++byte_idx) {\n    CopyToUnalignedOffsetU32(resultb, &(table_alias[genoarr_alias[byte_idx]]), byte_idx);\n  }\n  const uint32_t remainder = sample_ct % 4;\n  if (remainder) {\n    unsigned char* result_last = &(resultb[full_byte_ct * 4]);\n    uintptr_t geno_byte = genoarr_alias[full_byte_ct];\n    for (uint32_t uii = 0; uii != remainder; ++uii) {\n      result_last[uii] = table_alias[geno_byte & 3];\n      geno_byte >>= 2;\n    }\n  }\n}\n#endif\n\n#ifndef NO_UNALIGNED\nvoid GenoarrLookup16x4bx2(const uintptr_t* genoarr, const void* table16x4bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table16x4bx2);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  const uint32_t sample_ctl2m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2m1) {\n      if (widx > sample_ctl2m1) {\n        if (sample_ct % 2) {\n          // not a regular CopyToUnaligned, we're only copying the bottom half\n          // of the element\n          memcpy(result_biter, &(table_alias[geno_word & 3]), 4);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      const uintptr_t cur_2geno = geno_word & 15;\n      AppendU64(table_alias[cur_2geno], &result_biter);\n      geno_word >>= 4;\n    }\n  }\n}\n#else\n// plink2_glm_logistic does not guarantee 8 byte table alignment.\nvoid GenoarrLookup16x4bx2(const uintptr_t* genoarr, const void* table16x4bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint32_t* table_u32 = S_CAST(const uint32_t*, table16x4bx2);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  const uint32_t sample_ctl2m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2m1) {\n      if (widx > sample_ctl2m1) {\n        if (sample_ct % 2) {\n          // not a regular CopyToUnaligned, we're only copying the bottom half\n          // of the element\n          memcpy(result_biter, &(table_u32[(geno_word & 3) * 2]), 4);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      const uintptr_t cur_2geno = geno_word & 15;\n      memcpy(result_biter, &(table_u32[cur_2geno * 2]), 8);\n      result_biter += 8;\n      geno_word >>= 4;\n    }\n  }\n}\n#endif\n\n// this might be important for genovec -> AlleleCode expansion\nvoid GenoarrLookup256x2bx4(const uintptr_t* genoarr, const void* table256x2bx4, uint32_t sample_ct, void* __restrict result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table256x2bx4);\n  const unsigned char* genoarr_alias = DowncastKToUc(genoarr);\n  unsigned char* resultb = S_CAST(unsigned char*, result);\n  const uint32_t full_byte_ct = sample_ct / 4;\n  for (uint32_t byte_idx = 0; byte_idx != full_byte_ct; ++byte_idx) {\n    CopyToUnalignedOffsetU64(resultb, &(table_alias[genoarr_alias[byte_idx]]), byte_idx);\n  }\n  const uint32_t remainder = sample_ct % 4;\n  if (remainder) {\n    unsigned char* result_last = &(resultb[full_byte_ct * sizeof(int64_t)]);\n    uintptr_t geno_byte = genoarr_alias[full_byte_ct];\n    for (uint32_t uii = 0; uii != remainder; ++uii) {\n      CopyToUnalignedOffsetU16(result_last, DowncastKU64ToU16(&(table_alias[geno_byte & 3])), uii);\n      geno_byte >>= 2;\n    }\n  }\n}\n\n#ifdef USE_SSE2\nvoid GenoarrLookup4x16b(const uintptr_t* genoarr, const void* table4x16b, uint32_t sample_ct, void* result) {\n  const __m128i* table_alias = S_CAST(const __m128i*, table4x16b);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  const uint32_t sample_ctl2m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t loop_len = kBitsPerWordD2;\n  uintptr_t geno_word = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2m1) {\n      if (widx > sample_ctl2m1) {\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      _mm_storeu_si128(R_CAST(__m128i*, result_biter), table_alias[geno_word & 3]);\n      result_biter += 16;\n      geno_word >>= 2;\n    }\n  }\n}\n\nvoid GenoarrLookup16x8bx2(const uintptr_t* genoarr, const void* table16x8bx2, uint32_t sample_ct, void* __restrict result) {\n  const __m128i* table_alias = S_CAST(const __m128i*, table16x8bx2);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  const uint32_t sample_ctl2m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2m1) {\n      if (widx > sample_ctl2m1) {\n        if (sample_ct % 2) {\n          memcpy(result_biter, &(table_alias[geno_word & 3]), 8);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      const uintptr_t cur_2geno = geno_word & 15;\n      _mm_storeu_si128(R_CAST(__m128i*, result_biter), table_alias[cur_2geno]);\n      result_biter += 16;\n      geno_word >>= 4;\n    }\n  }\n}\n\nvoid GenoarrLookup256x4bx4(const uintptr_t* genoarr, const void* table256x4bx4, uint32_t sample_ct, void* __restrict result) {\n  const __m128i* table_alias = S_CAST(const __m128i*, table256x4bx4);\n  const unsigned char* genoarr_alias = DowncastKToUc(genoarr);\n  unsigned char* resultb = S_CAST(unsigned char*, result);\n  const uint32_t full_byte_ct = sample_ct / 4;\n  for (uint32_t byte_idx = 0; byte_idx != full_byte_ct; ++byte_idx) {\n    _mm_storeu_si128(R_CAST(__m128i*, &(resultb[byte_idx * 16])), table_alias[genoarr_alias[byte_idx]]);\n  }\n  const uint32_t remainder = sample_ct % 4;\n  if (remainder) {\n    unsigned char* result_last = &(resultb[full_byte_ct * 16]);\n    uintptr_t geno_byte = genoarr_alias[full_byte_ct];\n    for (uint32_t uii = 0; uii != remainder; ++uii) {\n      CopyToUnalignedOffsetU32(result_last, R_CAST(const uint32_t*, &(table_alias[geno_byte & 3])), uii);\n      geno_byte >>= 2;\n    }\n  }\n}\n#else // !USE_SSE2\nvoid GenoarrLookup4x16b(const uintptr_t* genoarr, const void* table4x16b, uint32_t sample_ct, void* result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table4x16b);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  const uint32_t sample_ctl2m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t loop_len = kBitsPerWordD2;\n  uintptr_t geno_word = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2m1) {\n      if (widx > sample_ctl2m1) {\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      memcpy(result_biter, &(table_alias[(geno_word & 3) * 2]), 16);\n      result_biter += 16;\n      geno_word >>= 2;\n    }\n  }\n}\n\nvoid GenoarrLookup16x8bx2(const uintptr_t* genoarr, const void* table16x8bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table16x8bx2);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  const uint32_t sample_ctl2m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2m1) {\n      if (widx > sample_ctl2m1) {\n        if (sample_ct % 2) {\n          CopyToUnalignedU64(result_biter, &(table_alias[(geno_word & 3) * 2]));\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      const uintptr_t cur_2geno = geno_word & 15;\n      memcpy(result_biter, &(table_alias[cur_2geno * 2]), 16);\n      result_biter += 16;\n      geno_word >>= 4;\n    }\n  }\n}\n\nvoid GenoarrLookup256x4bx4(const uintptr_t* genoarr, const void* table256x4bx4, uint32_t sample_ct, void* __restrict result) {\n  const uint32_t* table_alias = S_CAST(const uint32_t*, table256x4bx4);\n  const unsigned char* genoarr_alias = DowncastKToUc(genoarr);\n  unsigned char* resultb = S_CAST(unsigned char*, result);\n  const uint32_t full_byte_ct = sample_ct / 4;\n  for (uint32_t byte_idx = 0; byte_idx != full_byte_ct; ++byte_idx) {\n    memcpy(&(resultb[byte_idx * 16]), &(table_alias[genoarr_alias[byte_idx] * 4]), 16);\n  }\n  const uint32_t remainder = sample_ct % 4;\n  if (remainder) {\n    unsigned char* result_last = &(resultb[full_byte_ct * 16]);\n    uintptr_t geno_byte = genoarr_alias[full_byte_ct];\n    for (uint32_t uii = 0; uii != remainder; ++uii) {\n      CopyToUnalignedOffsetU32(result_last, &(table_alias[(geno_byte & 3) * 4]), uii);\n      geno_byte >>= 2;\n    }\n  }\n}\n#endif\n\nvoid InitLookup16x4bx2(void* table16x4bx2) {\n  uint32_t* table_iter = S_CAST(uint32_t*, table16x4bx2);\n  uint32_t vals[4];\n  vals[0] = table_iter[0];\n  table_iter[1] = vals[0];\n  vals[1] = table_iter[2];\n  table_iter[3] = vals[0];\n  vals[2] = table_iter[4];\n  table_iter[5] = vals[0];\n  vals[3] = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n}\n\nvoid InitLookup16x8bx2(void* table16x8bx2) {\n  uint64_t* table_iter = S_CAST(uint64_t*, table16x8bx2);\n  uint64_t vals[4];\n  vals[0] = table_iter[0];\n  table_iter[1] = vals[0];\n  vals[1] = table_iter[2];\n  table_iter[3] = vals[0];\n  vals[2] = table_iter[4];\n  table_iter[5] = vals[0];\n  vals[3] = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    // bugfix (20 Jun 2018): cur_high needs to be a uint64_t, not a uint32_t\n    const uint64_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n}\n\nvoid InitLookup256x1bx4(void* table256x1bx4) {\n  unsigned char* table_iter = S_CAST(unsigned char*, table256x1bx4);\n  unsigned char vals[4];\n  vals[0] = table_iter[0];\n  vals[1] = table_iter[4];\n  vals[2] = table_iter[8];\n  vals[3] = table_iter[12];\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t second_idx = 0; second_idx != 4; ++second_idx) {\n      const uint32_t cur_second = vals[second_idx];\n      for (uint32_t third_idx = 0; third_idx != 4; ++third_idx) {\n        const uint32_t cur_third = vals[third_idx];\n        for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n          *table_iter++ = vals[low_idx];\n          *table_iter++ = cur_third;\n          *table_iter++ = cur_second;\n          *table_iter++ = cur_high;\n        }\n      }\n    }\n  }\n#ifdef USE_SHUFFLE8\n  // first vector: 0, 1, 2, 3\n  // second vector: 0, 4, 8, 12\n  uint32_t* final_u32s = R_CAST(uint32_t*, table_iter);\n  uint32_t val0123;\n  memcpy(&val0123, vals, 4);\n  for (uint32_t uii = 0; uii != kInt32PerVec; ++uii) {\n    final_u32s[uii] = val0123;\n  }\n  final_u32s[kInt32PerVec] = S_CAST(uint32_t, vals[0]) * 0x1010101;\n  final_u32s[kInt32PerVec + 1] = S_CAST(uint32_t, vals[1]) * 0x1010101;\n  final_u32s[kInt32PerVec + 2] = S_CAST(uint32_t, vals[2]) * 0x1010101;\n  final_u32s[kInt32PerVec + 3] = S_CAST(uint32_t, vals[3]) * 0x1010101;\n#  ifdef USE_AVX2\n  memcpy(&(final_u32s[12]), &(final_u32s[8]), 16);\n#  endif\n#endif\n}\n\nvoid InitLookup256x2bx4(void* table256x2bx4) {\n  uint16_t* table_iter = S_CAST(uint16_t*, table256x2bx4);\n  uint16_t vals[4];\n  vals[0] = table_iter[0];\n  vals[1] = table_iter[4];\n  vals[2] = table_iter[8];\n  vals[3] = table_iter[12];\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t second_idx = 0; second_idx != 4; ++second_idx) {\n      const uint32_t cur_second = vals[second_idx];\n      for (uint32_t third_idx = 0; third_idx != 4; ++third_idx) {\n        const uint32_t cur_third = vals[third_idx];\n        for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n          *table_iter++ = vals[low_idx];\n          *table_iter++ = cur_third;\n          *table_iter++ = cur_second;\n          *table_iter++ = cur_high;\n        }\n      }\n    }\n  }\n}\n\nvoid InitLookup256x4bx4(void* table256x4bx4) {\n  uint32_t* table_iter = S_CAST(uint32_t*, table256x4bx4);\n  uint32_t vals[4];\n  vals[0] = table_iter[0];\n  vals[1] = table_iter[4];\n  vals[2] = table_iter[8];\n  vals[3] = table_iter[12];\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t second_idx = 0; second_idx != 4; ++second_idx) {\n      const uint32_t cur_second = vals[second_idx];\n      for (uint32_t third_idx = 0; third_idx != 4; ++third_idx) {\n        const uint32_t cur_third = vals[third_idx];\n        for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n          *table_iter++ = vals[low_idx];\n          *table_iter++ = cur_third;\n          *table_iter++ = cur_second;\n          *table_iter++ = cur_high;\n        }\n      }\n    }\n  }\n}\n\nvoid PhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x4bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table56x4bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* phasepresent_alias = DowncastKWToHW(phasepresent);\n  const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t phasepresent_hw_shifted = 0;\n  uintptr_t phaseinfo_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3);\n          // assume trailing bits of phasepresent/phaseinfo clear\n          // phaseinfo_hw_shifted not necessarily updated, so need if-statement\n          // bugfix (25 Jun 2018): must only consider active bit of\n          // phasepresent_hw_shifted, not the already-processed ones\n          if (phasepresent_hw_shifted & 16) {\n            cur_idx ^= 16 | (phaseinfo_hw_shifted & 2);\n          }\n          memcpy(result_biter, &(table_alias[cur_idx]), 4);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    phasepresent_hw_shifted = phasepresent_alias[widx];\n    if (!phasepresent_hw_shifted) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        AppendU64(table_alias[geno_word & 15], &result_biter);\n        geno_word >>= 4;\n      }\n    } else {\n      phasepresent_hw_shifted = phasepresent_hw_shifted << 4;\n      phaseinfo_hw_shifted = phaseinfo_alias[widx];\n\n      // note that this must be on a separate line (or we have to static_cast)\n      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 1;\n\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uintptr_t cur_idx = ((geno_word & 15) | (phasepresent_hw_shifted & 48)) ^ (phaseinfo_hw_shifted & 6);\n        AppendU64(table_alias[cur_idx], &result_biter);\n        geno_word >>= 4;\n        phasepresent_hw_shifted >>= 2;\n        phaseinfo_hw_shifted >>= 2;\n      }\n    }\n  }\n}\n\nvoid InitPhaseLookup4b(void* table56x4bx2) {\n  uint32_t* table_iter = S_CAST(uint32_t*, table56x4bx2);\n  uint32_t vals[4];\n  vals[0] = table_iter[0];\n  table_iter[1] = vals[0];\n  vals[1] = table_iter[2];\n  table_iter[3] = vals[0];\n  vals[2] = table_iter[4];\n  table_iter[5] = vals[0];\n  vals[3] = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [16][0]..[31][1]: bit 4 is set\n  // low bits must be 01 or 11\n  const uint32_t val_phaseinfo0 = table_iter[2];\n  table_iter[3] = vals[0];\n  const uint32_t val_phaseinfo1 = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    table_iter[2] = val_phaseinfo0;\n    table_iter[3] = cur_high;\n    table_iter[6] = val_phaseinfo1;\n    table_iter[7] = cur_high;\n    table_iter = &(table_iter[8]);\n  }\n  // [32][0]..[39][1]: bit 5 set, bit 4 unset\n  // high bits must be 00 or 01\n  for (uint32_t high_idx = 0; high_idx != 2; ++high_idx) {\n    const uint32_t cur_high = high_idx? val_phaseinfo0 : val_phaseinfo1;\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  table_iter = &(table_iter[16]);\n  // [48][0]..[55][1]: bits 4 and 5 set\n  for (uint32_t high_idx = 0; high_idx != 2; ++high_idx) {\n    const uint32_t cur_high = high_idx? val_phaseinfo0 : val_phaseinfo1;\n    table_iter[2] = val_phaseinfo0;\n    table_iter[3] = cur_high;\n    table_iter[6] = val_phaseinfo1;\n    table_iter[7] = cur_high;\n    table_iter = &(table_iter[8]);\n  }\n}\n\n#ifdef USE_SSE2\nvoid PhaseLookup8b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x8bx2, uint32_t sample_ct, void* __restrict result) {\n  const __m128i* table_alias = S_CAST(const __m128i*, table56x8bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* phasepresent_alias = DowncastKWToHW(phasepresent);\n  const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t phasepresent_hw_shifted = 0;\n  uintptr_t phaseinfo_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3);\n          if (phasepresent_hw_shifted & 16) {\n            cur_idx ^= 16 | (phaseinfo_hw_shifted & 2);\n          }\n          memcpy(result_biter, &(table_alias[cur_idx]), 8);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    phasepresent_hw_shifted = phasepresent_alias[widx];\n    if (!phasepresent_hw_shifted) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        _mm_storeu_si128(R_CAST(__m128i*, result_biter), table_alias[geno_word & 15]);\n        result_biter += 16;\n        geno_word >>= 4;\n      }\n    } else {\n      phasepresent_hw_shifted = phasepresent_hw_shifted << 4;\n      phaseinfo_hw_shifted = phaseinfo_alias[widx];\n      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 1;\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uintptr_t cur_idx = ((geno_word & 15) | (phasepresent_hw_shifted & 48)) ^ (phaseinfo_hw_shifted & 6);\n        _mm_storeu_si128(R_CAST(__m128i*, result_biter), table_alias[cur_idx]);\n        result_biter += 16;\n        geno_word >>= 4;\n        phasepresent_hw_shifted >>= 2;\n        phaseinfo_hw_shifted >>= 2;\n      }\n    }\n  }\n}\n#else // !USE_SSE2\nvoid PhaseLookup8b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x8bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table56x8bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* phasepresent_alias = DowncastKWToHW(phasepresent);\n  const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t phasepresent_hw_shifted = 0;\n  uintptr_t phaseinfo_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3);\n          if (phasepresent_hw_shifted & 16) {\n            cur_idx ^= 16 | (phaseinfo_hw_shifted & 2);\n          }\n          CopyToUnalignedU64(result_biter, &(table_alias[cur_idx * 2]));\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    phasepresent_hw_shifted = phasepresent_alias[widx];\n    if (!phasepresent_hw_shifted) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        memcpy(result_biter, &(table_alias[(geno_word & 15) * 2]), 16);\n        result_biter = &(result_biter[16]);\n        geno_word >>= 4;\n      }\n    } else {\n      phasepresent_hw_shifted = phasepresent_hw_shifted << 4;\n      phaseinfo_hw_shifted = phaseinfo_alias[widx];\n      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 1;\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uintptr_t cur_idx = ((geno_word & 15) | (phasepresent_hw_shifted & 48)) ^ (phaseinfo_hw_shifted & 6);\n        memcpy(result_biter, &(table_alias[cur_idx * 2]), 16);\n        // bugfix (12 Jun 2023): forgot this\n        result_biter = &(result_biter[16]);\n\n        geno_word >>= 4;\n        phasepresent_hw_shifted >>= 2;\n        phaseinfo_hw_shifted >>= 2;\n      }\n    }\n  }\n}\n#endif\n\nvoid InitPhaseLookup8b(void* table56x8bx2) {\n  uint64_t* table_iter = S_CAST(uint64_t*, table56x8bx2);\n  uint64_t vals[4];\n  vals[0] = table_iter[0];\n  table_iter[1] = vals[0];\n  vals[1] = table_iter[2];\n  table_iter[3] = vals[0];\n  vals[2] = table_iter[4];\n  table_iter[5] = vals[0];\n  vals[3] = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint64_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [16][0]..[31][1]: bit 4 is set\n  // low bits must be 01 or 11\n  const uint64_t val_phaseinfo0 = table_iter[2];\n  table_iter[3] = vals[0];\n  const uint64_t val_phaseinfo1 = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint64_t cur_high = vals[high_idx];\n    table_iter[2] = val_phaseinfo0;\n    table_iter[3] = cur_high;\n    table_iter[6] = val_phaseinfo1;\n    table_iter[7] = cur_high;\n    table_iter = &(table_iter[8]);\n  }\n  // [32][0]..[39][1]: bit 5 set, bit 4 unset\n  // high bits must be 00 or 01\n  for (uint32_t high_idx = 0; high_idx != 2; ++high_idx) {\n    const uint64_t cur_high = high_idx? val_phaseinfo0 : val_phaseinfo1;\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  table_iter = &(table_iter[16]);\n  // [48][0]..[55][1]: bits 4 and 5 set\n  for (uint32_t high_idx = 0; high_idx != 2; ++high_idx) {\n    const uint64_t cur_high = high_idx? val_phaseinfo0 : val_phaseinfo1;\n    table_iter[2] = val_phaseinfo0;\n    table_iter[3] = cur_high;\n    table_iter[6] = val_phaseinfo1;\n    table_iter[7] = cur_high;\n    table_iter = &(table_iter[8]);\n  }\n}\n\n// bits 0..3: two genotypes\n// bits 4..5: two (phasepresent | sex_male) bits\n// bits 1,3: unpacked phaseinfo xor\nvoid PhaseXNohhLookup4b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const uintptr_t* sex_male, const void* table64x4bx2, uint32_t sample_ct, void* result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table64x4bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* phasepresent_alias = DowncastKWToHW(phasepresent);\n  const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n  const Halfword* sex_male_alias = DowncastKWToHW(sex_male);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word_xored = 0;\n  uintptr_t male_or_phasepresent_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word_xored & 3) | (male_or_phasepresent_hw_shifted & 16);\n          memcpy(result_biter, &(table_alias[cur_idx]), 4);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word_xored = genoarr[widx];\n    male_or_phasepresent_hw_shifted = sex_male_alias[widx];\n    const uintptr_t phasepresent_hw = phasepresent_alias[widx];\n    male_or_phasepresent_hw_shifted |= phasepresent_hw;\n    male_or_phasepresent_hw_shifted <<= 4;\n    if (!phasepresent_hw) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        AppendU64(table_alias[(geno_word_xored & 15) | (male_or_phasepresent_hw_shifted & 48)], &result_biter);\n        geno_word_xored >>= 4;\n        male_or_phasepresent_hw_shifted >>= 2;\n      }\n    } else {\n      geno_word_xored ^= UnpackHalfwordToWordShift1(phaseinfo_alias[widx]);\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uintptr_t cur_idx = (geno_word_xored & 15) | (male_or_phasepresent_hw_shifted & 48);\n        AppendU64(table_alias[cur_idx], &result_biter);\n        geno_word_xored >>= 4;\n        male_or_phasepresent_hw_shifted >>= 2;\n      }\n    }\n  }\n}\n\nvoid InitPhaseXNohhLookup4b(void* table64x4bx2) {\n  uint32_t* table_iter = S_CAST(uint32_t*, table64x4bx2);\n  uint32_t vals[4];\n  vals[0] = table_iter[0];\n  table_iter[1] = vals[0];\n  vals[1] = table_iter[2];\n  table_iter[3] = vals[0];\n  vals[2] = table_iter[4];\n  table_iter[5] = vals[0];\n  vals[3] = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [16][0]..[31][1]: bit 4 is set\n  uint32_t male_or_phasepresent_vals[4];\n  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n    male_or_phasepresent_vals[low_idx] = *table_iter++;\n    *table_iter++ = vals[0];\n  }\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = male_or_phasepresent_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [32][0]..[47][1]: bit 5 set, bit 4 unset\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = male_or_phasepresent_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [48][0]..[63][1]: bits 4 and 5 set\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = male_or_phasepresent_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = male_or_phasepresent_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n}\n\nvoid GenoarrSexLookup4b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x4bx2, uint32_t sample_ct, void* result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table64x4bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* sex_male_alias = DowncastKWToHW(sex_male);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t male_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3) | (male_hw_shifted & 16);\n          memcpy(result_biter, &(table_alias[cur_idx]), 4);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    male_hw_shifted = sex_male_alias[widx];\n    male_hw_shifted <<= 4;\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      AppendU64(table_alias[(geno_word & 15) | (male_hw_shifted & 48)], &result_biter);\n      geno_word >>= 4;\n      male_hw_shifted >>= 2;\n    }\n  }\n}\n\nvoid InitPhaseXNohhLookup8b(void* table64x8bx2) {\n  uint64_t* table_iter = S_CAST(uint64_t*, table64x8bx2);\n  uint64_t vals[4];\n  vals[0] = table_iter[0];\n  table_iter[1] = vals[0];\n  vals[1] = table_iter[2];\n  table_iter[3] = vals[0];\n  vals[2] = table_iter[4];\n  table_iter[5] = vals[0];\n  vals[3] = table_iter[6];\n  table_iter[7] = vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint64_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [16][0]..[31][1]: bit 4 is set\n  uint64_t male_or_phasepresent_vals[4];\n  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n    male_or_phasepresent_vals[low_idx] = *table_iter++;\n    *table_iter++ = vals[0];\n  }\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint64_t cur_high = vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = male_or_phasepresent_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [32][0]..[47][1]: bit 5 set, bit 4 unset\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint64_t cur_high = male_or_phasepresent_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [48][0]..[63][1]: bits 4 and 5 set\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint64_t cur_high = male_or_phasepresent_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = male_or_phasepresent_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n}\n\n#ifdef USE_SSE2\nvoid GenoarrSexLookup8b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x8bx2, uint32_t sample_ct, void* result) {\n  const __m128i* table_alias = S_CAST(const __m128i*, table64x8bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* sex_male_alias = DowncastKWToHW(sex_male);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t male_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3) | (male_hw_shifted & 16);\n          memcpy(result_biter, &(table_alias[cur_idx]), 8);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    male_hw_shifted = sex_male_alias[widx];\n    male_hw_shifted <<= 4;\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      _mm_storeu_si128(R_CAST(__m128i*, result_biter), table_alias[(geno_word & 15) | (male_hw_shifted & 48)]);\n      result_biter += 16;\n      geno_word >>= 4;\n      male_hw_shifted >>= 2;\n    }\n  }\n}\n#else // !USE_SSE2\nvoid GenoarrSexLookup8b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x8bx2, uint32_t sample_ct, void* result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table64x8bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* sex_male_alias = DowncastKWToHW(sex_male);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t male_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          const uintptr_t cur_idx = (geno_word & 3) | (male_hw_shifted & 16);\n          memcpy(result_biter, &(table_alias[cur_idx * 2]), 8);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    male_hw_shifted = sex_male_alias[widx];\n    male_hw_shifted <<= 4;\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      memcpy(result_biter, &(table_alias[((geno_word & 15) | (male_hw_shifted & 48)) * 2]), 16);\n      result_biter = &(result_biter[16]);\n      geno_word >>= 4;\n      male_hw_shifted >>= 2;\n    }\n  }\n}\n#endif\n\nvoid VcfPhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x4bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint64_t* table_alias = S_CAST(const uint64_t*, table246x4bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* cur_phased_alias = DowncastKWToHW(cur_phased);\n  const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t cur_phased_hw_shifted = 0;\n  uintptr_t phaseinfo_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3) | (cur_phased_hw_shifted & 16) | (phaseinfo_hw_shifted & 64);\n          memcpy(result_biter, &(table_alias[cur_idx]), 4);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    cur_phased_hw_shifted = cur_phased_alias[widx];\n    if (!cur_phased_hw_shifted) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        AppendU64(table_alias[geno_word & 15], &result_biter);\n        geno_word >>= 4;\n      }\n    } else {\n      cur_phased_hw_shifted = cur_phased_hw_shifted << 4;\n      phaseinfo_hw_shifted = phaseinfo_alias[widx];\n\n      // note that this must be on a separate line (or we have to static_cast)\n      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 6;\n\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uintptr_t cur_idx = (geno_word & 15) | (cur_phased_hw_shifted & 48) | (phaseinfo_hw_shifted & 192);\n        AppendU64(table_alias[cur_idx], &result_biter);\n        geno_word >>= 4;\n        cur_phased_hw_shifted >>= 2;\n        phaseinfo_hw_shifted >>= 2;\n      }\n    }\n  }\n}\n\nvoid InitVcfPhaseLookup4b(void* table246x4bx2) {\n  uint32_t* table_iter = S_CAST(uint32_t*, table246x4bx2);\n  uint32_t unphased_vals[4];\n  unphased_vals[0] = table_iter[0];\n  table_iter[1] = unphased_vals[0];\n  unphased_vals[1] = table_iter[2];\n  table_iter[3] = unphased_vals[0];\n  unphased_vals[2] = table_iter[4];\n  table_iter[5] = unphased_vals[0];\n  unphased_vals[3] = table_iter[6];\n  table_iter[7] = unphased_vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = unphased_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = unphased_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [16][0]..[31][1]: first entry is phased and unflipped, second is unphased\n  uint32_t phased_unflipped_vals[4];\n  phased_unflipped_vals[0] = table_iter[0];\n  table_iter[1] = unphased_vals[0];\n  phased_unflipped_vals[1] = table_iter[2];\n  table_iter[3] = unphased_vals[0];\n  phased_unflipped_vals[2] = table_iter[4];\n  table_iter[5] = unphased_vals[0];\n  phased_unflipped_vals[3] = table_iter[6];\n  table_iter[7] = unphased_vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = unphased_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = phased_unflipped_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [32][0]..[63][1]: second entry is phased and unflipped\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = phased_unflipped_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = unphased_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = phased_unflipped_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = phased_unflipped_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [64][0]..[79][1] should be impossible\n  table_iter = &(table_iter[32]);\n  // [80][0]..[95][1]: first entry is phased and flipped, second is unphased\n  // genotype must be 01\n  const uint32_t phased_flipped_01 = table_iter[2];\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    table_iter[2] = phased_flipped_01;\n    table_iter[3] = unphased_vals[high_idx];\n    table_iter = &(table_iter[8]);\n  }\n  // [96][0]..[111][1] should be impossible\n  table_iter = &(table_iter[32]);\n  // [112][0]..[127][1]: first entry phased-flipped, second phased-unflipped\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    table_iter[2] = phased_flipped_01;\n    table_iter[3] = phased_unflipped_vals[high_idx];\n    table_iter = &(table_iter[8]);\n  }\n  // [128][0]..[163][1] should be impossible\n  table_iter = &(table_iter[72]);\n  // [164][0]..[167][1]: second entry phased-flipped, first entry unphased\n  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n    *table_iter++ = unphased_vals[low_idx];\n    *table_iter++ = phased_flipped_01;\n  }\n  // [168][0]..[179][1] should be impossible\n  table_iter = &(table_iter[24]);\n  // [180][0]..[183][1]: second entry phased-flipped, first phased-unflipped\n  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n    *table_iter++ = phased_unflipped_vals[low_idx];\n    *table_iter++ = phased_flipped_01;\n  }\n  // [184][0]..[244][1] should be impossible\n  // [245][0]..[245][1]: both phased-flipped\n  table_iter[122] = phased_flipped_01;\n  table_iter[123] = phased_flipped_01;\n}\n\nvoid VcfPhaseLookup2b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x2bx2, uint32_t sample_ct, void* __restrict result) {\n  const uint32_t* table_alias = S_CAST(const uint32_t*, table246x2bx2);\n  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const Halfword* cur_phased_alias = DowncastKWToHW(cur_phased);\n  const Halfword* phaseinfo_alias = DowncastKWToHW(phaseinfo);\n  unsigned char* result_biter = S_CAST(unsigned char*, result);\n  uint32_t loop_len = kBitsPerWordD4;\n  uintptr_t geno_word = 0;\n  uintptr_t cur_phased_hw_shifted = 0;\n  uintptr_t phaseinfo_hw_shifted = 0;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= sample_ctl2_m1) {\n      if (widx > sample_ctl2_m1) {\n        if (sample_ct % 2) {\n          uintptr_t cur_idx = (geno_word & 3) | (cur_phased_hw_shifted & 16) | (phaseinfo_hw_shifted & 64);\n          memcpy(result_biter, &(table_alias[cur_idx]), 2);\n        }\n        return;\n      }\n      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;\n    }\n    geno_word = genoarr[widx];\n    cur_phased_hw_shifted = cur_phased_alias[widx];\n    if (!cur_phased_hw_shifted) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        AppendU32(table_alias[geno_word & 15], &result_biter);\n        geno_word >>= 4;\n      }\n    } else {\n      cur_phased_hw_shifted = cur_phased_hw_shifted << 4;\n      phaseinfo_hw_shifted = phaseinfo_alias[widx];\n\n      // note that this must be on a separate line (or we have to static_cast)\n      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 6;\n\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uintptr_t cur_idx = (geno_word & 15) | (cur_phased_hw_shifted & 48) | (phaseinfo_hw_shifted & 192);\n        AppendU32(table_alias[cur_idx], &result_biter);\n        geno_word >>= 4;\n        cur_phased_hw_shifted >>= 2;\n        phaseinfo_hw_shifted >>= 2;\n      }\n    }\n  }\n}\n\nvoid InitVcfPhaseLookup2b(void* table246x2bx2) {\n  uint16_t* table_iter = S_CAST(uint16_t*, table246x2bx2);\n  uint16_t unphased_vals[4];\n  unphased_vals[0] = table_iter[0];\n  table_iter[1] = unphased_vals[0];\n  unphased_vals[1] = table_iter[2];\n  table_iter[3] = unphased_vals[0];\n  unphased_vals[2] = table_iter[4];\n  table_iter[5] = unphased_vals[0];\n  unphased_vals[3] = table_iter[6];\n  table_iter[7] = unphased_vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = unphased_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = unphased_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [16][0]..[31][1]: first entry is phased and unflipped, second is unphased\n  uint16_t phased_unflipped_vals[4];\n  phased_unflipped_vals[0] = table_iter[0];\n  table_iter[1] = unphased_vals[0];\n  phased_unflipped_vals[1] = table_iter[2];\n  table_iter[3] = unphased_vals[0];\n  phased_unflipped_vals[2] = table_iter[4];\n  table_iter[5] = unphased_vals[0];\n  phased_unflipped_vals[3] = table_iter[6];\n  table_iter[7] = unphased_vals[0];\n  table_iter = &(table_iter[8]);\n  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = unphased_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = phased_unflipped_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [32][0]..[63][1]: second entry is phased and unflipped\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = phased_unflipped_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = unphased_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    const uint32_t cur_high = phased_unflipped_vals[high_idx];\n    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n      *table_iter++ = phased_unflipped_vals[low_idx];\n      *table_iter++ = cur_high;\n    }\n  }\n  // [64][0]..[79][1] should be impossible\n  table_iter = &(table_iter[32]);\n  // [80][0]..[95][1]: first entry is phased and flipped, second is unphased\n  // genotype must be 01\n  const uint32_t phased_flipped_01 = table_iter[2];\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    table_iter[2] = phased_flipped_01;\n    table_iter[3] = unphased_vals[high_idx];\n    table_iter = &(table_iter[8]);\n  }\n  // [96][0]..[111][1] should be impossible\n  table_iter = &(table_iter[32]);\n  // [112][0]..[127][1]: first entry phased-flipped, second phased-unflipped\n  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {\n    table_iter[2] = phased_flipped_01;\n    table_iter[3] = phased_unflipped_vals[high_idx];\n    table_iter = &(table_iter[8]);\n  }\n  // [128][0]..[163][1] should be impossible\n  table_iter = &(table_iter[72]);\n  // [164][0]..[167][1]: second entry phased-flipped, first entry unphased\n  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n    *table_iter++ = unphased_vals[low_idx];\n    *table_iter++ = phased_flipped_01;\n  }\n  // [168][0]..[179][1] should be impossible\n  table_iter = &(table_iter[24]);\n  // [180][0]..[183][1]: second entry phased-flipped, first phased-unflipped\n  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {\n    *table_iter++ = phased_unflipped_vals[low_idx];\n    *table_iter++ = phased_flipped_01;\n  }\n  // [184][0]..[244][1] should be impossible\n  // [245][0]..[245][1]: both phased-flipped\n  table_iter[122] = phased_flipped_01;\n  table_iter[123] = phased_flipped_01;\n}\n\n\nvoid ClearGenoarrMissing1bit8Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals) {\n  const uint32_t orig_subset_size = *subset_sizep;\n  Halfword* subset_alias = DowncastWToHW(subset);\n  uint32_t read_idx = 0;\n  // deliberate overflow\n  for (uint32_t read_widx = UINT32_MAX; ; ) {\n    uint32_t subset_bits;\n    do {\n      subset_bits = subset_alias[++read_widx];\n    } while (!subset_bits);\n    uintptr_t detect_11 = genoarr[read_widx];\n    detect_11 = detect_11 & (detect_11 >> 1) & kMask5555;\n    if (detect_11) {\n      uint32_t detect_11_hw = PackWordToHalfword(detect_11);\n      const uint32_t joint_u32 = subset_bits & detect_11_hw;\n      if (joint_u32) {\n        uintptr_t lowbit = joint_u32 & (-joint_u32);\n        uint32_t write_idx = read_idx + PopcountWord(subset_bits & (lowbit - 1));\n        read_idx = write_idx + 1;\n        uint32_t subset_bits_write = subset_bits ^ lowbit;\n        unsigned char* sparse_vals_uc = S_CAST(unsigned char*, sparse_vals);\n        subset_bits &= -(2 * lowbit);\n        for (; read_idx != orig_subset_size; ++read_idx) {\n#ifdef USE_AVX2\n          if (!subset_bits) {\n            subset_alias[read_widx] = subset_bits_write;\n            do {\n              subset_bits = subset_alias[++read_widx];\n            } while (!subset_bits);\n            subset_bits_write = subset_bits;\n            detect_11 = genoarr[read_widx];\n            detect_11 = detect_11 & (detect_11 >> 1);\n            detect_11_hw = PackWordToHalfwordMask5555(detect_11);\n          }\n          lowbit = subset_bits & (-subset_bits);\n          subset_bits ^= lowbit;\n          if (lowbit & detect_11_hw) {\n            subset_bits_write ^= lowbit;\n            continue;\n          }\n#else\n          if (!subset_bits) {\n            subset_alias[read_widx] = subset_bits_write;\n            do {\n              subset_bits = subset_alias[++read_widx];\n            } while (!subset_bits);\n            subset_bits_write = subset_bits;\n            detect_11 = genoarr[read_widx];\n            detect_11 = detect_11 & (detect_11 >> 1);\n          }\n          lowbit = subset_bits & (-subset_bits);\n          subset_bits ^= lowbit;\n          if ((lowbit * lowbit) & detect_11) {\n            subset_bits_write ^= lowbit;\n            continue;\n          }\n#endif\n          sparse_vals_uc[write_idx++] = sparse_vals_uc[read_idx];\n        }\n        subset_alias[read_widx] = subset_bits_write;\n        *subset_sizep = write_idx;\n        return;\n      }\n    }\n    read_idx += PopcountWord(subset_bits);\n    if (read_idx == orig_subset_size) {\n      return;\n    }\n  }\n}\n\nvoid ClearGenoarrMissing1bit16Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals) {\n  const uint32_t orig_subset_size = *subset_sizep;\n  Halfword* subset_alias = DowncastWToHW(subset);\n  uint32_t read_idx = 0;\n  // deliberate overflow\n  for (uint32_t read_widx = UINT32_MAX; ; ) {\n    uint32_t subset_bits;\n    do {\n      subset_bits = subset_alias[++read_widx];\n    } while (!subset_bits);\n    uintptr_t detect_11 = genoarr[read_widx];\n    detect_11 = detect_11 & (detect_11 >> 1) & kMask5555;\n    if (detect_11) {\n      uint32_t detect_11_hw = PackWordToHalfword(detect_11);\n      const uint32_t joint_u32 = subset_bits & detect_11_hw;\n      if (joint_u32) {\n        uintptr_t lowbit = joint_u32 & (-joint_u32);\n        uint32_t write_idx = read_idx + PopcountWord(subset_bits & (lowbit - 1));\n        read_idx = write_idx + 1;\n        uint32_t subset_bits_write = subset_bits ^ lowbit;\n        uint16_t* sparse_vals_u16 = S_CAST(uint16_t*, sparse_vals);\n        subset_bits &= -(2 * lowbit);\n        for (; read_idx != orig_subset_size; ++read_idx) {\n#ifdef USE_AVX2\n          if (!subset_bits) {\n            subset_alias[read_widx] = subset_bits_write;\n            do {\n              subset_bits = subset_alias[++read_widx];\n            } while (!subset_bits);\n            subset_bits_write = subset_bits;\n            detect_11 = genoarr[read_widx];\n            detect_11 = detect_11 & (detect_11 >> 1);\n            detect_11_hw = PackWordToHalfwordMask5555(detect_11);\n          }\n          lowbit = subset_bits & (-subset_bits);\n          subset_bits ^= lowbit;\n          if (lowbit & detect_11_hw) {\n            subset_bits_write ^= lowbit;\n            continue;\n          }\n#else\n          if (!subset_bits) {\n            subset_alias[read_widx] = subset_bits_write;\n            do {\n              subset_bits = subset_alias[++read_widx];\n            } while (!subset_bits);\n            subset_bits_write = subset_bits;\n            detect_11 = genoarr[read_widx];\n            detect_11 = detect_11 & (detect_11 >> 1);\n          }\n          lowbit = subset_bits & (-subset_bits);\n          subset_bits ^= lowbit;\n          if ((lowbit * lowbit) & detect_11) {\n            subset_bits_write ^= lowbit;\n            continue;\n          }\n#endif\n          sparse_vals_u16[write_idx++] = sparse_vals_u16[read_idx];\n        }\n        subset_alias[read_widx] = subset_bits_write;\n        *subset_sizep = write_idx;\n        return;\n      }\n    }\n    read_idx += PopcountWord(subset_bits);\n    if (read_idx == orig_subset_size) {\n      return;\n    }\n  }\n}\n\ndouble u127prod_diff_d(uint64_t plus_term0, uint64_t plus_term1, uint64_t minus_term0, uint64_t minus_term1) {\n  uint64_t plus_hi;\n  const uint64_t plus_lo = multiply64to128(plus_term0, plus_term1, &plus_hi);\n  uint64_t minus_hi;\n  const uint64_t minus_lo = multiply64to128(minus_term0, minus_term1, &minus_hi);\n  const uint64_t result_lo = plus_lo - minus_lo;\n  const uint64_t result_hi = plus_hi - minus_hi - (plus_lo < minus_lo);\n  return u127tod(result_hi, result_lo);\n}\n\ndouble i127prod_diff_d(uint64_t plus_term0, uint64_t plus_term1, uint64_t minus_term0, uint64_t minus_term1) {\n  uint64_t plus_hi;\n  const uint64_t plus_lo = multiply64to128(plus_term0, plus_term1, &plus_hi);\n  uint64_t minus_hi;\n  const uint64_t minus_lo = multiply64to128(minus_term0, minus_term1, &minus_hi);\n  const uint64_t result_lo = plus_lo - minus_lo;\n  const uint64_t result_hi = plus_hi - minus_hi - (plus_lo < minus_lo);\n  if (!(result_hi & (1LLU << 63))) {\n    return u127tod(result_hi, result_lo);\n  } else {\n    const uint64_t minus_result_lo = -result_lo;\n    const uint64_t minus_result_hi = (-result_hi) - (result_lo != 0);\n    return -u127tod(minus_result_hi, minus_result_lo);\n  }\n}\n\ndouble MultiallelicDiploidMinimac3R2(const uint64_t* __restrict sums, const uint64_t* __restrict hap_ssqs_x2, uint32_t nm_sample_ct, uint32_t allele_ct, uint32_t extra_phased_het_ct) {\n  // sums[k] == sum_i [left_dosage_{ik} + right_dosage_{ik}]\n  // hap_ssqs_x2[k] ==\n  //   2 * sum_i [(left_dosage_{ik})^2 + (right_dosage_{ik})^2]\n  //   This may be odd, since it's computed as\n  //     (left + right)^2 + (left - right)^2\n  //   and the parities of the two integers can be different.\n  // For phased hardcalls, it is fine for the hap_ssqs_x2[k] values to\n  // correspond to unphased hardcalls iff extra_phased_het_ct is the number of\n  // phased-hets that weren't accounted for in hap_ssqs_x2[]; this makes it\n  // straightforward for GetMultiallelicCountsAndDosage16s to stick to the\n  // custom internal multiallelic-count functions.\n  if (!nm_sample_ct) {\n    return (0.0 / 0.0);\n  }\n  // Allelic phased-dosages are on a (k-1)-dimensional simplex; embed this in\n  // R^k as the (1, 0, ..., 0), (0, 1, ..., 0), ..., (0, 0, ..., 1) polytope.\n  // Define\n  //   m_k := (1/2n) * sum_i [left_dosage_{ik} + right_dosage_{ik}]\n  // Minimac3-r2 is defined as empirical phased-dosage variance divided by\n  // expected-under-allele-frequencies variance.\n  // Expected sum-of-squared-Euclidean-distance with perfect imputation is\n  //   2n(\"1\"^2 - sum_k ((m_k)^2))\n  // and observed sum-of-squared-distance is\n  //   sum_k (sum_i [(left_dosage_{ik})^2 + (right_dosage_{ik})^2] -\n  //          2n((m_k)^2))\n\n  // ssq_sum_x2 can be as large as 2^31 * nm_sample_ct; meansq_sum can cancel\n  // as little as (1 / allele_ct) of it\n  if (nm_sample_ct < 92682) {\n    uint64_t ssq_sum_x2 = extra_phased_het_ct * 0x20000000LLU;\n    uint64_t meansq_sum = 0;\n    for (uint32_t allele_idx = 0; allele_idx != allele_ct; ++allele_idx) {\n      const uint64_t cur_allele_dosage = sums[allele_idx];\n      ssq_sum_x2 += hap_ssqs_x2[allele_idx];\n      // cur_allele_dosage == 2n * m_k\n      // -> meansq_sum becomes 2n * sum_k [2n((m_k)^2)]\n      meansq_sum += cur_allele_dosage * cur_allele_dosage;\n    }\n    const uint64_t observed_variance_times_2n = ssq_sum_x2 * nm_sample_ct - meansq_sum;\n    // \"1\"^2 -> 16384^2 in our units.  So 2n * 2n * \"1\"^2 is equal to\n    //   n * n * 16384^2 * 4.\n    const uint64_t expected_variance_times_2n = nm_sample_ct * 0x40000000LLU * nm_sample_ct - meansq_sum;\n    // mach_r2 == 1 test cases:\n    // - AA, AB, BB: 1, 4, 4\n    //   sums[0] = 6 * 2^14\n    //   sums[1] = 12 * 2^14\n    //   ssqs[0] = 8 * 2^28\n    //   ssqs[1] = 20 * 2^28\n    //   ssq_sum = (8 + 20) * 2^28\n    //   meansq_sum = (6 * 6 + 12 * 12) * 2^28\n    //   observed_variance = 28 * 9 * 2^28 - 180 * 2^28\n    //   expected_variance = (9 * 9 * 4 * 2^28 - 180 * 2^28) / 2\n    // - AA, AB, BB, AC, BC, CC: 1, 4, 4, 6, 12, 9\n    //   sums[0] = 12 * 2^14\n    //   sums[1] = 24 * 2^14\n    //   sums[2] = 36 * 2^14\n    //   ssqs[0] = 14 * 2^28\n    //   ssqs[1] = 32 * 2^28\n    //   ssqs[2] = 54 * 2^28\n    //   ssq_sum = (14 + 32 + 54) * 2^28\n    //   meansq_sum = (12 * 12 + 24 * 24 + 36 * 36) * 2^28\n    //   observed_variance = 100 * 36 * 2^28 - 56 * 36 * 2^28\n    //   expected_variance = (36 * 36 * 4 * 2^28 - 56 * 36 * 2^28) / 2\n    return S_CAST(double, observed_variance_times_2n) / S_CAST(double, expected_variance_times_2n);\n  }\n  uint64_t ssq_sum_x2 = extra_phased_het_ct * 0x20000000LLU;\n  uint64_t meansq_sum_lo = 0;\n  uint64_t meansq_sum_hi = 0;\n  for (uint32_t allele_idx = 0; allele_idx != allele_ct; ++allele_idx) {\n    const uint64_t cur_allele_dosage = sums[allele_idx];\n    const uint64_t cur_ssq_x2 = hap_ssqs_x2[allele_idx];\n    ssq_sum_x2 += cur_ssq_x2;\n    uint64_t incr_hi;\n    uint64_t incr_lo = multiply64to128(cur_allele_dosage, cur_allele_dosage, &incr_hi);\n    meansq_sum_lo += incr_lo;\n    meansq_sum_hi += incr_hi + (meansq_sum_lo < incr_lo);\n  }\n  uint64_t prod_hi;\n  uint64_t prod_lo = multiply64to128(ssq_sum_x2, nm_sample_ct, &prod_hi);\n  const uint64_t observed_variance_times_2n_lo = prod_lo - meansq_sum_lo;\n  const uint64_t observed_variance_times_2n_hi = prod_hi - meansq_sum_hi - (prod_lo < meansq_sum_lo);\n  const double observed_variance_times_2n = u127tod(observed_variance_times_2n_hi, observed_variance_times_2n_lo);\n\n  prod_lo = multiply64to128(nm_sample_ct * 0x40000000LLU, nm_sample_ct, &prod_hi);\n  const uint64_t expected_variance_times_2n_lo = prod_lo - meansq_sum_lo;\n  const uint64_t expected_variance_times_2n_hi = prod_hi - meansq_sum_hi - (prod_lo < meansq_sum_lo);\n  const double expected_variance_times_2n = u127tod(expected_variance_times_2n_hi, expected_variance_times_2n_lo);\n  return observed_variance_times_2n / expected_variance_times_2n;\n}\n\nvoid PgrDifflistToGenovecUnsafe(const uintptr_t* __restrict raregeno, const uint32_t* difflist_sample_ids, uintptr_t difflist_common_geno, uint32_t sample_ct, uint32_t difflist_len, uintptr_t* __restrict genovec) {\n  // Ok for trailing bits of raregeno to be nonzero.  Does not zero out\n  // trailing bits of genovec.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  // could just memset up to word boundary; this should be a bit more\n  // vector-instruction-friendly, though\n  vecset(genovec, difflist_common_geno * kMask5555, vec_ct);\n  const uintptr_t* raregeno_incr = raregeno;\n  uint32_t difflist_idx = 0;\n  uint32_t difflist_idx_stop = kBitsPerWordD2;\n  if (!difflist_common_geno) {\n    // faster inner loop since there's no existing value to mask out\n    // todo: check if this should just be deleted since the code bloat causes\n    // too many more cache misses\n    for (; ; difflist_idx_stop += kBitsPerWordD2) {\n      if (difflist_idx_stop > difflist_len) {\n        if (difflist_idx == difflist_len) {\n          return;\n        }\n        difflist_idx_stop = difflist_len;\n      }\n      uintptr_t raregeno_word = *raregeno_incr++;\n      for (; difflist_idx != difflist_idx_stop; ++difflist_idx) {\n        const uint32_t cur_sample_idx = difflist_sample_ids[difflist_idx];\n        genovec[cur_sample_idx / kBitsPerWordD2] |= (raregeno_word & 3) << (2 * (cur_sample_idx % kBitsPerWordD2));\n        raregeno_word >>= 2;\n      }\n    }\n  }\n  for (; ; difflist_idx_stop += kBitsPerWordD2) {\n    if (difflist_idx_stop > difflist_len) {\n      if (difflist_idx == difflist_len) {\n        return;\n      }\n      difflist_idx_stop = difflist_len;\n    }\n    uintptr_t raregeno_word = *raregeno_incr++;\n    for (; difflist_idx != difflist_idx_stop; ++difflist_idx) {\n      const uint32_t cur_sample_idx = difflist_sample_ids[difflist_idx];\n      AssignNyparrEntry(cur_sample_idx, raregeno_word & 3, genovec);\n      raregeno_word >>= 2;\n    }\n  }\n}\n\nconst uint16_t kHcToAlleleCodes[1024] = QUAD_TABLE256(0, 0x100, 0x101, 0xffff);\n\nstatic_assert(sizeof(AlleleCode) == 1, \"PglMultiallelicSparseToDenseMiss() needs to be updated.\");\nvoid PglMultiallelicSparseToDenseMiss(const PgenVariant* pgvp, uint32_t sample_ct, AlleleCode* __restrict wide_codes) {\n  GenoarrLookup256x2bx4(pgvp->genovec, kHcToAlleleCodes, sample_ct, wide_codes);\n  const uint32_t patch_01_ct = pgvp->patch_01_ct;\n  if (patch_01_ct) {\n    const uintptr_t* patch_01_set = pgvp->patch_01_set;\n    uintptr_t sample_idx_base = 0;\n    uintptr_t cur_bits = patch_01_set[0];\n    const AlleleCode* patch_01_vals = pgvp->patch_01_vals;\n    AlleleCode* wide_codes1 = &(wide_codes[1]);\n    for (uint32_t uii = 0; uii != patch_01_ct; ++uii) {\n      const uintptr_t sample_idx = BitIter1(patch_01_set, &sample_idx_base, &cur_bits);\n      wide_codes1[2 * sample_idx] = patch_01_vals[uii];\n    }\n  }\n  const uint32_t patch_10_ct = pgvp->patch_10_ct;\n  if (patch_10_ct) {\n    const uintptr_t* patch_10_set = pgvp->patch_10_set;\n    uintptr_t sample_idx_base = 0;\n    uintptr_t cur_bits = patch_10_set[0];\n    const DoubleAlleleCode* patch_10_vals_alias = R_CAST(const DoubleAlleleCode*, pgvp->patch_10_vals);\n    DoubleAlleleCode* wide_codes_alias = R_CAST(DoubleAlleleCode*, wide_codes);\n    for (uint32_t uii = 0; uii != patch_10_ct; ++uii) {\n      const uintptr_t sample_idx = BitIter1(patch_10_set, &sample_idx_base, &cur_bits);\n      wide_codes_alias[sample_idx] = patch_10_vals_alias[uii];\n    }\n  }\n}\n\nuintptr_t PglComputeMaxAlleleCt(const uintptr_t* allele_idx_offsets, uint32_t variant_ct) {\n  if ((!allele_idx_offsets) || (allele_idx_offsets[variant_ct] == 2 * variant_ct)) {\n    return 2;\n  }\n  // todo: try vectorizing this\n  uintptr_t max_allele_ct = 2;\n  uintptr_t prev_offset = allele_idx_offsets[0];\n  const uintptr_t* shifted_offsets = &(allele_idx_offsets[1]);\n  for (uintptr_t uii = 0; uii != variant_ct; ++uii) {\n    const uintptr_t cur_offset = shifted_offsets[uii];\n    const uintptr_t cur_allele_ct = cur_offset - prev_offset;\n    if (cur_allele_ct > max_allele_ct) {\n      max_allele_ct = cur_allele_ct;\n    }\n    prev_offset = cur_offset;\n  }\n  return max_allele_ct;\n}\n\n// Ok for nybble_vvec to be unaligned.\nuint32_t CountNybbleVec(const unsigned char* nybble_vvec_biter, uintptr_t nybble_word, uint32_t vec_ct) {\n  const VecW m0 = vecw_setzero();\n  const VecW alld15 = VCONST_W(kMask1111);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW xor_vvec = vecw_set1(nybble_word);\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 15;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 15) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const unsigned char* nybble_vvec_stop = &(nybble_vvec_biter[cur_incr * kBytesPerVec]);\n    do {\n      VecW loader = vecw_loadu(nybble_vvec_biter) ^ xor_vvec;\n      nybble_vvec_biter += kBytesPerVec;\n      // DetectAllZeroNybbles() followed by right-shift-3 is the same number of\n      // operations, can see if that's any faster in practice\n      loader = vecw_srli(loader, 1) | loader;\n      loader = vecw_srli(loader, 2) | loader;\n      inner_acc = inner_acc + vecw_and_notfirst(loader, alld15);\n    } while (nybble_vvec_biter < nybble_vvec_stop);\n    inner_acc = (inner_acc & m4) + (vecw_srli(inner_acc, 4) & m4);\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\nuint32_t CountNybble(const void* nybblearr, uintptr_t nybble_word, uintptr_t nybble_ct) {\n  const unsigned char* nybblearr_uc = S_CAST(const unsigned char*, nybblearr);\n  const uint32_t fullword_ct = nybble_ct / kBitsPerWordD4;\n  uint32_t tot = CountNybbleVec(nybblearr_uc, nybble_word, fullword_ct / kWordsPerVec);\n#ifdef __LP64__\n  for (uint32_t word_idx = RoundDownPow2(fullword_ct, kWordsPerVec); word_idx != fullword_ct; ++word_idx) {\n    uintptr_t cur_word;\n    CopyFromUnalignedOffsetW(&cur_word, nybblearr_uc, word_idx);\n    cur_word ^= nybble_word;\n    cur_word = cur_word | (cur_word >> 1);\n    cur_word = cur_word | (cur_word >> 2);\n    tot += Popcount0001Word((~cur_word) & kMask1111);\n  }\n#endif\n  const uint32_t trailing_nybble_ct = nybble_ct % kBitsPerWordD4;\n  if (trailing_nybble_ct) {\n    const uint32_t trailing_byte_ct = DivUp(trailing_nybble_ct, (CHAR_BIT / 4));\n    uintptr_t cur_word = SubwordLoad(&(nybblearr_uc[fullword_ct * kBytesPerWord]), trailing_byte_ct) ^ nybble_word;\n    cur_word = cur_word | (cur_word >> 1);\n    cur_word = cur_word | (cur_word >> 2);\n    cur_word = bzhi((~cur_word) & kMask1111, trailing_nybble_ct * 4);\n#if defined(USE_SSE42) || !defined(__LP64__)\n    tot += Popcount0001Word(cur_word);\n#else\n    // minor optimization, can't overflow\n    tot += (cur_word * kMask1111) >> 60;\n#endif\n  }\n  return tot;\n}\n\nuint64_t PglHeaderBaseEndOffset(uint32_t variant_ct, uintptr_t vrec_len_byte_ct, uint32_t phase_or_dosage_present, uint32_t explicit_nonref_flags) {\n  const uint32_t vblock_ct = DivUp(variant_ct, kPglVblockSize);\n  uint64_t offset = 12 + vblock_ct * sizeof(int64_t) + variant_ct * vrec_len_byte_ct;\n  if (phase_or_dosage_present) {\n    // 8-bit vrtypes\n    offset += variant_ct;\n  } else {\n    // 4-bit vrtypes\n    offset += DivUp(variant_ct, 2);\n  }\n  if (explicit_nonref_flags) {\n    offset += DivUp(variant_ct, CHAR_BIT);\n  }\n  return offset;\n}\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n"
  },
  {
    "path": "external_libs/pgenlib/include/pgenlib_misc.h",
    "content": "#ifndef __PGENLIB_MISC_H__\n#define __PGENLIB_MISC_H__\n\n// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n// Low-level C99/C++03/C++11 library for reading .pgen (PLINK 2.0 binary) files\n// (designed to produce good lowest-common-denominator binaries across\n// Windows/OS X/Linux).\n//\n// File format design:\n// - With the header loaded, it is possible to efficiently access a variant by\n//   its index.  Since records can now be variable-length, this sometimes\n//   requires storage of record lengths.\n// - Due to the power of LD-based compression, we permit a variant record to\n//   just store a list of differences from an earlier, fully stored variant.\n//   However, only short-range dependence is permitted; sequential processing\n//   of the file only requires caching of the most recent explicitly stored\n//   variant.\n// - Like the plink1 format, this is balanced for relatively easy reading and\n//   writing; in particular, the mode-0x10/0x11 header is not read-optimized,\n//   it passes up some obvious compression opportunities which would make it\n//   more difficult to write e.g. an efficient file merger.  This isn't a big\n//   deal if we don't have a huge number of one-sample .pgen files sharing a\n//   single .bim file (or equivalent).  (If they don't share the same .bim\n//   file, .bim overhead > .pgen overhead.)  If we ever do, we can define an\n//   additional mode to handle that case more efficiently.\n// - Building blocks are arrays of 1-bit, 2-bit, 4-bit, 1-byte, 2-byte, 3-byte,\n//   and 4-byte values.  3/5/6/7(/9...)-bit values don't play well with\n//   bitwise operations, and when it's important, there's usually a natural way\n//   to split them into power-of-2-bit components.\n//   (unsigned integers known to be smaller than 2^24, but not known to be\n//   smaller than 2^16, are stored as 3-byte values on disk and \"decompressed\"\n//   to uint32_t during loading.)\n// - Missing value is usually all-1s.  (Only exceptions right now: plink1\n//   backward compatibility mode; presence/absence of rare alts for variants\n//   with >2 alt alleles is an array of 1-bit values, where absence = 0; and\n//   presence/absence of phasing info is similar.)  Time to move away from 01\n//   nonsense.\n// - Individual variant records are prohibited from being >= 4GiB, to reduce\n//   integer overflow issues.  (This may be reduced to 2GiB later, but I'll\n//   attempt to handle the 2-4GiB range properly for now since it's conceivable\n//   for multiallelic records in very large datasets to reach that size.)\n// - (later todo: include stuff like file creation command in .pvar header;\n//   that doesn't really belong in a binary file.)\n// See the bottom of this header file, and the pgen_spec/ subdirectory of\n// plink-ng on GitHub, for details.\n\n// Additional parameter conventions:\n// - \"nyparr\" indicates a word-aligned, packed array of 2-bit values, while\n//   \"nypvec\" is the vector-aligned equivalent.  \"nybblearr\" marks the much\n//   rarer case of a packed array of 4-bit values.\n// - \"nypvec_01\" indicates a packed, vector-aligned array of 2-bit values where\n//   each value is zero or one.  This data structure was used quite a bit by\n//   plink 1.9 for operating on a subset of a 2-bit-genotype array.\n// - \"genoarr\"/\"genovec\" indicates a nyparr/nypvec containing genotype\n//   information.\n// - \"interleaved_vec\" is plink 2.0's preferred alternative to nypvec_01: we\n//   basically stack pairs of adjacent vectors on top of each other and unpack\n//   on the fly, since that tends to be faster than having to access twice as\n//   much memory.\n\n#include \"plink2_bits.h\"\n\n// 10000 * major + 100 * minor + patch\n// Exception to CONSTI32, since we want the preprocessor to have access to this\n// value.  Named with all caps as a consequence.\n#define PGENLIB_INTERNAL_VERNUM 2003\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\n// other configuration-ish values needed by plink2_common subset\ntypedef unsigned char AlleleCode;\ntypedef uint16_t DoubleAlleleCode;\nstatic_assert(sizeof(DoubleAlleleCode) == 2 * sizeof(AlleleCode), \"Inconsistent AlleleCode and DoubleAlleleCode definitions.\");\n// Set this to 65534 if AlleleCode is uint16_t, 2^24 - 1 if uint32_t.\nCONSTI32(kPglMaxAltAlleleCt, 254);\n\nCONSTI32(kPglMaxAlleleCt, kPglMaxAltAlleleCt + 1);\n#define PGL_MAX_ALT_ALLELE_CT_STR \"254\"\n#define PGL_MAX_ALLELE_CT_STR \"255\"\n#ifdef __cplusplus\n#  define kMissingAlleleCode S_CAST(plink2::AlleleCode, -1)\n#  define kMissingDoubleAlleleCode S_CAST(plink2::DoubleAlleleCode, -1)\n#else\n#  define kMissingAlleleCode S_CAST(AlleleCode, -1)\n#  define kMissingDoubleAlleleCode S_CAST(DoubleAlleleCode, -1)\n#endif\nCONSTI32(kAlleleCodesPerVec, kBytesPerVec / sizeof(AlleleCode));\n\nHEADER_INLINE uintptr_t AlleleCodeCtToVecCt(uintptr_t val) {\n  return DivUp(val, kAlleleCodesPerVec);\n}\n\nHEADER_INLINE uintptr_t AlleleCodeCtToAlignedWordCt(uintptr_t val) {\n  return kWordsPerVec * AlleleCodeCtToVecCt(val);\n}\n\nHEADER_INLINE AlleleCode* DowncastWToAC(uintptr_t* pp) {\n  return R_CAST(AlleleCode*, pp);\n}\n\nHEADER_INLINE void AlignACToVec(AlleleCode** pp) {\n  const uintptr_t addr = R_CAST(uintptr_t, *pp);\n  *pp = R_CAST(AlleleCode*, RoundUpPow2(addr, kBytesPerVec));\n}\n\n// returns a word with low bit in each pair set at each 00.\nHEADER_INLINE uintptr_t Word00(uintptr_t ww) {\n  return (~(ww | (ww >> 1))) & kMask5555;\n}\n\nHEADER_INLINE uintptr_t Word01(uintptr_t ww) {\n  return ww & (~(ww >> 1)) & kMask5555;\n}\n\n// returns a word with *low* bit in each pair set at each 10.\nHEADER_INLINE uintptr_t Word10(uintptr_t ww) {\n  return (~ww) & (ww >> 1) & kMask5555;\n}\n\nHEADER_INLINE uintptr_t Word11(uintptr_t ww) {\n  return ww & (ww >> 1) & kMask5555;\n}\n\nHEADER_INLINE Halfword Pack00ToHalfword(uintptr_t ww) {\n  return PackWordToHalfwordMask5555(~(ww | (ww >> 1)));\n}\n\nHEADER_INLINE Halfword Pack01ToHalfword(uintptr_t ww) {\n  return PackWordToHalfwordMask5555(ww & (~(ww >> 1)));\n}\n\nHEADER_INLINE Halfword Pack11ToHalfword(uintptr_t ww) {\n  return PackWordToHalfwordMask5555(ww & (ww >> 1));\n}\n\n#ifdef USE_SSE42\nHEADER_INLINE uint32_t Popcount01Word(uintptr_t val) {\n  return PopcountWord(val);\n}\n\nHEADER_INLINE uint32_t Popcount0001Word(uintptr_t val) {\n  return PopcountWord(val);\n}\n#else\nHEADER_INLINE uint32_t Popcount01Word(uintptr_t val) {\n  return NypsumWord(val);\n}\n\nHEADER_INLINE uint32_t Popcount0001Word(uintptr_t val) {\n#  ifdef __LP64__\n  // (val * kMask1111) >> 60 can barely overflow, sigh\n  const uintptr_t val0 = val & 1;\n  return (((val - val0) * kMask1111) >> 60) + val0;\n#  else\n  return (val * kMask1111) >> 28;\n#  endif\n}\n#endif\n\n// assumes subset_mask has trailing zeroes up to the next vector boundary\nvoid FillInterleavedMaskVec(const uintptr_t* __restrict subset_mask, uint32_t base_vec_ct, uintptr_t* interleaved_mask_vec);\n\nHEADER_INLINE void CopyNyparr(const uintptr_t* __restrict source_nyparr, uint32_t nyparr_entry_ct, uintptr_t* __restrict target_nyparr) {\n  memcpy(target_nyparr, source_nyparr, NypCtToWordCt(nyparr_entry_ct) * kBytesPerWord);\n}\n\n// may want bit past the end of subset_mask (i.e. position\n// raw_nyparr_entry_ct) to always be allocated and unset.  This removes the\n// need for some explicit end-of-bitarray checks.\nvoid CopyNyparrNonemptySubset(const uintptr_t* __restrict raw_nyparr, const uintptr_t* __restrict subset_mask, uint32_t raw_nyparr_entry_ct, uint32_t subset_entry_ct, uintptr_t* __restrict output_nyparr);\n\n// Copies a bit from raw_bitarr for each genoarr entry matching match_word.\n// (match_word must be a multiple of kMask5555.)\nvoid CopyGenomatchSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict genoarr, uintptr_t match_word, uint32_t write_bit_idx_start, uint32_t bit_ct, void* __restrict output);\n\nvoid ExpandBytearrFromGenoarr(const void* __restrict compact_bitarr, const uintptr_t* __restrict genoarr, uintptr_t match_word, uint32_t genoword_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target);\n\n\n// These functions are \"unsafe\" since they assume trailing bits of last\n// genovec/genoarr word are zeroed out.\nvoid GenovecCount12Unsafe(const uintptr_t* genovec, uint32_t sample_ct, uint32_t* __restrict raw_01_ctp, uint32_t* __restrict raw_10_ctp);\n\nvoid Count3FreqVec6(const void* geno_vvec, uint32_t vec_ct, uint32_t* __restrict even_ctp, uint32_t* __restrict odd_ctp, uint32_t* __restrict bothset_ctp);\n\n// vector-alignment preferred.\nvoid GenoarrCountFreqsUnsafe(const uintptr_t* genoarr, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\n// GenoarrCountFreqsUnsafe() wrapper that returns most common genotype,\n// breaking ties in favor of the lower value.\nuintptr_t MostCommonGenoUnsafe(const uintptr_t* genoarr, uint32_t sample_ct);\n\nvoid CountSubset3FreqVec6(const void* __restrict genoarr, const VecW* __restrict interleaved_mask_vvec, uint32_t vec_ct, uint32_t* __restrict even_ctp, uint32_t* __restrict odd_ctp, uint32_t* __restrict bothset_ctp);\n\n// genoarr vector-alignment preferred.\nvoid GenoarrCountSubsetFreqs(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict sample_include_interleaved_vec, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\n// slower GenoarrCountSubsetFreqs() which does not require\n// sample_include_interleaved_vec to be precomputed\nvoid GenoarrCountSubsetFreqs2(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict sample_include, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\nvoid GenoarrCountInvsubsetFreqs2(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict sample_exclude, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\nvoid GenoarrCountSubsetIntersectFreqs(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict subset1, const uintptr_t* __restrict subset2, uint32_t raw_sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\nvoid GenovecInvertUnsafe(uint32_t sample_ct, uintptr_t* genovec);\n\nHEADER_INLINE uintptr_t InvertGenoWordUnsafe(uintptr_t geno_word) {\n  return (geno_word ^ ((~(geno_word << 1)) & kMaskAAAA));\n}\n\n// too easy to forget to multiply by 2\nHEADER_INLINE void ZeroTrailingNyps(uintptr_t nyp_ct, uintptr_t* bitarr) {\n  ZeroTrailingBits(nyp_ct * 2, bitarr);\n}\n\nHEADER_INLINE void SetTrailingNyps(uintptr_t nyp_ct, uintptr_t* bitarr) {\n  const uintptr_t trail_ct = nyp_ct % kBitsPerWordD2;\n  if (trail_ct) {\n    bitarr[nyp_ct / kBitsPerWordD2] |= (~k0LU) << (nyp_ct * 2);\n  }\n}\n\n// GetVint31 and Vint32Append moved to plink2_base.\n\n// Input must be validated.\nHEADER_INLINE uint32_t GetVint32Unsafe(const unsigned char** buf_iterp) {\n  uint32_t vint32 = *(*buf_iterp)++;\n  if (vint32 <= 127) {\n    return vint32;\n  }\n  vint32 &= 127;\n  for (uint32_t shift = 7; ; shift += 7) {\n    uint32_t uii = *(*buf_iterp)++;\n    vint32 |= (uii & 127) << shift;\n    if (uii <= 127) {\n      return vint32;\n    }\n  }\n}\n\nHEADER_INLINE void SkipVintUnsafe(const unsigned char** buf_iterp) {\n  uint32_t cur_byte;\n  do {\n    cur_byte = *(*buf_iterp)++;\n  } while (cur_byte & 128);\n}\n\n// Does not update buf_iter.\nHEADER_INLINE uint32_t PeekVint31(const unsigned char* buf_iter, const unsigned char* buf_end) {\n  if (likely(buf_end > buf_iter)) {\n    uint32_t vint32 = *buf_iter++;\n    if (vint32 <= 127) {\n      return vint32;\n    }\n    vint32 &= 127;\n    uint32_t shift = 7;\n    while (likely(buf_end > buf_iter)) {\n      uint32_t uii = *buf_iter++;\n      vint32 |= (uii & 127) << shift;\n      if (uii <= 127) {\n        return vint32;\n      }\n      shift += 7;\n    }\n  }\n  return 0x80000000U;\n}\n\n/*\nHEADER_INLINE void FPutVint31(uint32_t uii, FILE* ff) {\n  // caller's responsibility to periodically check ferror\n  while (uii > 127) {\n    putc_unlocked((uii & 127) + 128, ff);\n    uii >>= 7;\n  }\n  putc_unlocked(uii, ff);\n}\n*/\n\nHEADER_INLINE BoolErr FSkipVint(FILE* ff) {\n  while (1) {\n    const uint32_t cur_byte = getc_unlocked(ff);\n    if (cur_byte <= 127) {\n      return 0;\n    }\n    if (unlikely(cur_byte > 255)) {\n      return 1;\n    }\n  }\n}\n\nHEADER_INLINE uint64_t FGetVint63(FILE* ff) {\n  // Can't be used when multiple threads are reading from ff.\n  uint64_t vint64 = getc_unlocked(ff);\n  if (vint64 <= 127) {\n    return vint64;\n  }\n  if (unlikely(vint64 > 255)) {\n    return (1LLU << 63);\n  }\n  vint64 &= 127;\n  for (uint32_t shift = 7; ; shift += 7) {\n    const uint64_t ullii = getc_unlocked(ff);\n    vint64 |= (ullii & 127) << shift;\n    if (ullii <= 127) {\n      return vint64;\n    }\n    if (unlikely((ullii > 255) || (shift == 56))) {\n      return (1LLU << 63);\n    }\n  }\n}\n\n// Need this for sparse multiallelic dosage.\nHEADER_INLINE unsigned char* Vint64Append(uint64_t ullii, unsigned char* buf) {\n  while (ullii > 127) {\n    *buf++ = (ullii & 127) + 128;\n    ullii >>= 7;\n  }\n  *buf++ = ullii;\n  return buf;\n}\n\nHEADER_INLINE uint64_t GetVint64Unsafe(const unsigned char** buf_iterp) {\n  uint64_t vint64 = *(*buf_iterp)++;\n  if (vint64 <= 127) {\n    return vint64;\n  }\n  vint64 &= 127;\n  for (uint32_t shift = 7; ; shift += 7) {\n    uint64_t ullii = *(*buf_iterp)++;\n    vint64 |= (ullii & 127) << shift;\n    if (ullii <= 127) {\n      return vint64;\n    }\n  }\n}\n\nHEADER_INLINE void FPutVint64(uint64_t ullii, FILE* ff) {\n  // caller's responsibility to periodically check ferror\n  while (ullii > 127) {\n    putc_unlocked((ullii & 127) + 128, ff);\n    ullii >>= 7;\n  }\n  putc_unlocked(ullii, ff);\n}\n\n// TODO: make this work properly with kCacheline == 128, then fix other\n// transpose functions, etc.\n\n// main batch size\nCONSTI32(kPglNypTransposeBatch, kNypsPerCacheline);\n\n// word width of each matrix row\nCONSTI32(kPglNypTransposeWords, kWordsPerCacheline);\n\n#ifdef __LP64__\nCONSTI32(kPglNypTransposeBufbytes, (kPglNypTransposeBatch * kPglNypTransposeBatch) / 2);\n\n// buf0 and buf1 assumed to be vector-aligned.\nvoid TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, unsigned char* __restrict buf0, unsigned char* __restrict buf1);\n#else  // !__LP64__\nCONSTI32(kPglNypTransposeBufbytes, (kPglNypTransposeBatch * kPglNypTransposeBatch) / 2);\n\nvoid TransposeNypblock32(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, unsigned char* __restrict buf0, unsigned char* __restrict buf1);\n#endif\nCONSTI32(kPglNypTransposeBufwords, kPglNypTransposeBufbytes / kBytesPerWord);\n\n// - single block is up to 256x256 (CACHELINE64) or 512x512 (CACHELINE128)\n// - vecaligned_buf must have size 32k (CACHELINE64) or 128k (CACHELINE128)\n// - does NOT zero out trailing bits, because main application is ind-major-bed\n//   <-> plink2 format conversion, where the zeroing would be undone...\n// - important: write_iter must be allocated up to at least\n//   RoundUpPow2(write_batch_size, 4) rows (may want to remove this\n//   requirement)\n\nHEADER_INLINE void TransposeNypblock(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* write_iter, VecW* vecaligned_buf) {\n#ifdef __LP64__\n  // assert(!(write_ul_stride % 2));\n  TransposeNypblock64(read_iter, read_ul_stride, write_ul_stride, read_batch_size, write_batch_size, write_iter, DowncastToUc(vecaligned_buf), &(DowncastToUc(vecaligned_buf)[kPglNypTransposeBufbytes / 2]));\n#else\n  TransposeNypblock32(read_iter, read_ul_stride, write_ul_stride, read_batch_size, write_batch_size, write_iter, DowncastToUc(vecaligned_buf), &(DowncastToUc(vecaligned_buf)[kPglNypTransposeBufbytes / 2]));\n#endif\n}\n\n\n// replaces each x with (32768 - x)\n// okay for dosage_main to be nullptr if dosage_ct == 0\nvoid BiallelicDosage16Invert(uint32_t dosage_ct, uint16_t* dosage_main);\n\n// replaces each x with -x\nvoid BiallelicDphase16Invert(uint32_t dphase_ct, int16_t* dphase_delta);\n\nvoid PackWordsToHalfwordsInvmatch(const uintptr_t* __restrict genoarr, uintptr_t inv_match_word, uint32_t inword_ct, uintptr_t* __restrict dst);\n\nvoid PackWordsToHalfwordsMismatch(const uintptr_t* __restrict genoarr, uintptr_t mismatch_word, uint32_t inword_ct, uintptr_t* __restrict dst);\n\n// src and dst allowed to be identical; that's why src is not marked const\n// despite not being directly written to.\nvoid MaskWordsToHalfwordsInvmatch(const uintptr_t* __restrict genoarr, uintptr_t inv_match_word, uint32_t inword_ct, uintptr_t* src, uintptr_t* dst);\n\n// Unsafe since it assumes trailing genoarr bits are cleared.  But if they are,\n// trailing missingness bits will be clear.\nHEADER_INLINE void GenoarrToMissingnessUnsafe(const uintptr_t* __restrict genoarr, uint32_t sample_ct, uintptr_t* __restrict missingness) {\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  PackWordsToHalfwordsInvmatch(genoarr, 0, sample_ctl2, missingness);\n  if (sample_ctl2 % 2) {\n    Halfword* __attribute__((may_alias)) missingness_alias = DowncastWToHW(missingness);\n    missingness_alias[sample_ctl2] = 0;\n  }\n}\n\nHEADER_INLINE void GenoarrToNonmissing(const uintptr_t* __restrict genoarr, uint32_t sample_ct, uintptr_t* __restrict nonmissingness) {\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  PackWordsToHalfwordsMismatch(genoarr, ~k0LU, sample_ctl2, nonmissingness);\n  ZeroTrailingBits(sample_ct, nonmissingness);\n}\n\nvoid SparseToMissingness(const uintptr_t* __restrict raregeno, const uint32_t* difflist_sample_ids, uint32_t sample_ct, uint32_t difflist_common_geno, uint32_t difflist_len, uintptr_t* __restrict missingness);\n\n// hom_buf gets set bits when genoarr value is 0 or 2.\n// ref2het_buf gets set bits when genoarr value is 0 or 1.\n// N.B. assumes trailing bits of loadbuf have been filled with 1s, not 0s\n// Also takes genoarr word count instead of sample count.\nvoid SplitHomRef2hetUnsafeW(const uintptr_t* genoarr, uint32_t inword_ct, uintptr_t* __restrict hom_buf, uintptr_t* __restrict ref2het_buf);\n\nvoid SplitHomRef2het(const uintptr_t* genoarr, uint32_t sample_ct, uintptr_t* __restrict hom_buf, uintptr_t* __restrict ref2het_buf);\n\n\n// Support for 1-bit-per-haplotype representation.\n// Ok for genoarr to have garbage trailing bits.\nBoolErr HapsplitMustPhased(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, uint32_t sample_ct, uint32_t phasepresent_ct, uintptr_t* hap_arr, uintptr_t* nm_arr);\n\n// Only 1 haplotype per genotype, rather than 2; het treated as missing.\nvoid HapsplitHaploid(const uintptr_t* __restrict genoarr, uint32_t sample_ct, uintptr_t* __restrict hap_arr, uintptr_t* __restrict nm_arr);\n\n\n// These functions use 16- or 256-element lookup tables to apply functions of\n// the form\n//   f: {0,1,2,3} -> x\n// to genoarr, saving the output to result[].\n// 256-element tables result in a substantially faster inner loop, but they are\n// more expensive to set up and consume a non-negligible fraction of L1 cache,\n// so they aren't always the right choice.\n// When lookup table rows are 16 bytes, they are assumed to be 16-byte aligned\n// in 64-bit builds.  result[] is not assumed to be aligned.\nvoid GenoarrLookup256x1bx4(const uintptr_t* genoarr, const void* table256x1bx4, uint32_t sample_ct, void* __restrict result);\n\nvoid GenoarrLookup16x4bx2(const uintptr_t* genoarr, const void* table16x4bx2, uint32_t sample_ct, void* result);\n\nvoid GenoarrLookup256x2bx4(const uintptr_t* genoarr, const void* table256x2bx4, uint32_t sample_ct, void* result);\n\nvoid GenoarrLookup4x16b(const uintptr_t* genoarr, const void* table4x16b, uint32_t sample_ct, void* result);\n\n#define PAIR_TABLE16(a, b, c, d) \\\n  {(a), (a), (b), (a), (c), (a), (d), (a), \\\n  (a), (b), (b), (b), (c), (b), (d), (b), \\\n  (a), (c), (b), (c), (c), (c), (d), (c), \\\n  (a), (d), (b), (d), (c), (d), (d), (d)}\n\nvoid GenoarrLookup16x8bx2(const uintptr_t* genoarr, const void* table16x8bx2, uint32_t sample_ct, void* result);\n\n#define QUAD_TABLE256_INTERNAL2(a, b, c, d, f2, f3, f4) \\\n  (a), (f2), (f3), (f4), \\\n  (b), (f2), (f3), (f4), \\\n  (c), (f2), (f3), (f4), \\\n  (d), (f2), (f3), (f4)\n#define QUAD_TABLE256_INTERNAL3(a, b, c, d, f3, f4) \\\n  QUAD_TABLE256_INTERNAL2((a), (b), (c), (d), (a), (f3), (f4)), \\\n  QUAD_TABLE256_INTERNAL2((a), (b), (c), (d), (b), (f3), (f4)), \\\n  QUAD_TABLE256_INTERNAL2((a), (b), (c), (d), (c), (f3), (f4)), \\\n  QUAD_TABLE256_INTERNAL2((a), (b), (c), (d), (d), (f3), (f4))\n#define QUAD_TABLE256_INTERNAL4(a, b, c, d, f4) \\\n  QUAD_TABLE256_INTERNAL3((a), (b), (c), (d), (a), (f4)), \\\n  QUAD_TABLE256_INTERNAL3((a), (b), (c), (d), (b), (f4)), \\\n  QUAD_TABLE256_INTERNAL3((a), (b), (c), (d), (c), (f4)), \\\n  QUAD_TABLE256_INTERNAL3((a), (b), (c), (d), (d), (f4))\n#define QUAD_TABLE256(a, b, c, d) \\\n  {QUAD_TABLE256_INTERNAL4((a), (b), (c), (d), (a)), \\\n   QUAD_TABLE256_INTERNAL4((a), (b), (c), (d), (b)), \\\n   QUAD_TABLE256_INTERNAL4((a), (b), (c), (d), (c)), \\\n   QUAD_TABLE256_INTERNAL4((a), (b), (c), (d), (d))}\n\nvoid GenoarrLookup256x4bx4(const uintptr_t* genoarr, const void* table256x4bx4, uint32_t sample_ct, void* result);\n\n// Lookup table initialization functions.  table[0][0], [1][0], [2][0], and\n// [3][0] must be initialized to f(0), f(1), f(2), and f(3) respectively.\nvoid InitLookup16x4bx2(void* table16x4bx2);\n\nvoid InitLookup16x8bx2(void* table16x8bx2);\n\n#ifdef USE_SHUFFLE8\n// in bytes\nCONSTI32(kLookup256x1bx4Size, 1024 + 2 * kBytesPerVec);\n#else\nCONSTI32(kLookup256x1bx4Size, 1024);\n#endif\n\nvoid InitLookup256x1bx4(void* table256x1bx4);\n\nvoid InitLookup256x2bx4(void* table256x2bx4);\n\nvoid InitLookup256x4bx4(void* table256x4bx4);\n\nvoid PhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x4bx2, uint32_t sample_ct, void* result);\n\n// [0][0]..[3][0], [17][0], and [19][0] should contain the relevant values\nvoid InitPhaseLookup4b(void* table56x4bx2);\n\nvoid PhaseLookup8b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x8bx2, uint32_t sample_ct, void* result);\n\nvoid InitPhaseLookup8b(void* table56x8bx2);\n\n// het-haploid prohibited.  64-entry table suffices: we use the same bits for\n// phasepresent and sex_male since they can't be true simultaneously.\n// phaseinfo is xor'd with bits 1 and 3 instead of 1 and 2.\nvoid PhaseXNohhLookup4b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const uintptr_t* sex_male, const void* table64x4bx2, uint32_t sample_ct, void* result);\n\n// [0][0]..[3][0], [16][0]..[19][0]\nvoid InitPhaseXNohhLookup4b(void* table64x4bx2);\n\n// uses same table as PhaseXNohhLookup\nvoid GenoarrSexLookup4b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x4bx2, uint32_t sample_ct, void* result);\n\nvoid InitPhaseXNohhLookup8b(void* table64x8bx2);\n\nvoid GenoarrSexLookup8b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x8bx2, uint32_t sample_ct, void* result);\n\n// Unlike PhaseLookup4b(), this allows the cur_phased bit to be set when the\n// genoarr entry is not 01 (het).\nvoid VcfPhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x4bx2, uint32_t sample_ct, void* __restrict result);\n\n// Precondition:\n//   [0], [2], [4], [6] initialized with unphased entries\n//   [32], [34], [36], [38] initialized with phased-unflipped entries\n//   [162] initialized with phased-flipped case\nvoid InitVcfPhaseLookup4b(void* table246x4bx2);\n\nvoid VcfPhaseLookup2b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x2bx2, uint32_t sample_ct, void* __restrict result);\n\nvoid InitVcfPhaseLookup2b(void* table246x2bx2);\n\n\n// Analogue of BitIter1x.\nHEADER_INLINE uint32_t GenoIter1x(const uintptr_t* __restrict genoarr, uintptr_t match_word, uintptr_t* __restrict widxp, uintptr_t* __restrict cur_bitsp) {\n  uintptr_t cur_bits = *cur_bitsp;\n  while (!cur_bits) {\n    cur_bits = genoarr[++(*widxp)] ^ match_word;\n    cur_bits = (~(cur_bits | (cur_bits >> 1))) & kMask5555;\n  }\n  *cur_bitsp = cur_bits & (cur_bits - 1);\n  return ctzw(cur_bits);\n}\n\n// For every missing entry in genoarr, clear the corresponding subset and\n// sparse_vals entries.\nvoid ClearGenoarrMissing1bit8Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals);\n\nvoid ClearGenoarrMissing1bit16Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals);\n\n// See EasyasPi's answer to\n//   https://stackoverflow.com/questions/25095741/how-can-i-multiply-64-bit-operands-and-get-128-bit-result-portably\nHEADER_INLINE uint64_t multiply64to128(uint64_t lhs, uint64_t rhs, uint64_t* high) {\n  // GCC and Clang usually provide __uint128_t on 64-bit targets, although\n  // Clang also defines it on WASM despite having to use builtins for most\n  // purposes -- including multiplication.\n#if defined(__SIZEOF_INT128__) && !defined(__wasm__)\n  __uint128_t product = S_CAST(__uint128_t, lhs) * S_CAST(__uint128_t, rhs);\n  *high = S_CAST(uint64_t, product >> 64);\n  return S_CAST(uint64_t, product & 0xffffffffffffffffLLU);\n#else\n  // Fast yet simple grade school multiply that avoids 64-bit carries with the\n  // properties of multiplying by 11 and takes advantage of UMAAL on ARMv6 to\n  // only need 4 calculations.\n\n  // First calculate all of the cross products.\n  uint64_t lo_lo = (lhs & 0xffffffff) * (rhs & 0xffffffff);\n  uint64_t hi_lo = (lhs >> 32) * (rhs & 0xffffffff);\n  uint64_t lo_hi = (lhs & 0xffffffff) * (rhs >> 32);\n  uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);\n  // Now add the products together.  These will never overflow.\n  uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xffffffff) + lo_hi;\n  uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;\n\n  *high = upper;\n  return (cross << 32) | (lo_lo & 0xffffffff);\n#endif\n}\n\nHEADER_INLINE double u127tod(uint64_t hi, uint64_t lo) {\n  return u63tod(hi) * 18446744073709551616.0 + S_CAST(double, lo);\n}\n\n// plus_term0 * plus_term1 - minus_term0 * minus_term1\ndouble u127prod_diff_d(uint64_t plus_term0, uint64_t plus_term1, uint64_t minus_term0, uint64_t minus_term1);\n\ndouble i127prod_diff_d(uint64_t plus_term0, uint64_t plus_term1, uint64_t minus_term0, uint64_t minus_term1);\n\ndouble MultiallelicDiploidMinimac3R2(const uint64_t* __restrict sums, const uint64_t* __restrict hap_ssqs_x2, uint32_t nm_sample_ct, uint32_t allele_ct, uint32_t extra_phased_het_ct);\n\nHEADER_INLINE double MultiallelicDiploidMachR2(const uint64_t* __restrict sums, const uint64_t* __restrict ssqs, uint32_t nm_sample_ct, uint32_t allele_ct) {\n  return 2 * MultiallelicDiploidMinimac3R2(sums, ssqs, nm_sample_ct, allele_ct, 0);\n}\n\n// ----- end plink2_common subset -----\n\n// other configuration-ish values\n// this part of the specification is set in stone.\n\nCONSTI32(kPglVblockSize, 65536);\n\n// kPglDifflistGroupSize defined in plink2_base\n\n// Currently chosen so that it plus kPglFwriteBlockSize + kCacheline - 2 is\n// < 2^32, so DivUp(kPglMaxBytesPerVariant + kPglFwriteBlockSize - 1,\n// kCacheline) doesn't overflow.\nstatic const uint32_t kPglMaxBytesPerVariant = 0xfffdffc0U;\n// CONSTI32(kPglMaxBytesPerDataTrack, 0x7ffff000);\n// static_assert(kMaxBytesPerIO >= (int32_t)kPglMaxBytesPerDataTrack, \"pgenlib assumes a single variant data track always fits in one fread/fwrite operation.\");\n\nFLAGSET_DEF_START()\n  kfPgenGlobal0,\n  kfPgenGlobalLdCompressionPresent = (1 << 0),\n  kfPgenGlobalDifflistOrLdPresent = (1 << 1),\n\n  // Only guaranteed to be set when present if phase or dosage also present.\n  kfPgenGlobalMultiallelicHardcallFound = (1 << 2),\n\n  kfPgenGlobalHardcallPhasePresent = (1 << 3),\n  kfPgenGlobalDosagePresent = (1 << 4),\n  kfPgenGlobalDosagePhasePresent = (1 << 5),\n  kfPgenGlobalAllNonref = (1 << 6)\nFLAGSET_DEF_END(PgenGlobalFlags);\n\n// difflist/LD compression must not involve more than\n//   raw_sample_ct / kPglMaxDifflistLenDivisor\n// entries.  (however, returned difflists can have up to twice as many entries,\n// when a variant is LD-compressed and the reference variant is\n// difflist-compressed.)\n// This value can be considered set in stone.\nCONSTI32(kPglMaxDifflistLenDivisor, 8);\n\n// Threshold for using a deltalist to represent a bitarray on disk (currently\n// relevant for dosage data).  This is a tunable parameter, but must be >=\n// kPglMaxDifflistLenDivisor.\nCONSTI32(kPglMaxDeltalistLenDivisor, 9);\n\nvoid PgrDifflistToGenovecUnsafe(const uintptr_t* __restrict raregeno, const uint32_t* difflist_sample_ids, uintptr_t difflist_common_geno, uint32_t sample_ct, uint32_t difflist_len, uintptr_t* __restrict genovec);\n\n// This covers all the possibilities.  Todo: switch all functions exporting\n// multiallelic codes and/or phased dosage to use this struct.  (Biallelic\n// phased hardcalls and unphased dosages are simple enough for this to be\n// overkill, though.)\ntypedef struct PgenVariantStruct {\n  uintptr_t* genovec;\n  uintptr_t* patch_01_set;\n  AlleleCode* patch_01_vals;\n  uintptr_t* patch_10_set;\n  AlleleCode* patch_10_vals;\n  uintptr_t* phasepresent;\n  uintptr_t* phaseinfo;\n  uintptr_t* dosage_present;\n  uint16_t* dosage_main;\n  uintptr_t* multidosage_present;\n  unsigned char* multidosage_cts;\n  AlleleCode* multidosage_codes;\n  uint16_t* multidosage_vals;\n  uintptr_t* dphase_present;\n  int16_t* dphase_delta;\n  uintptr_t* multidphase_present;\n  unsigned char* multidphase_cts;\n  AlleleCode* multidphase_codes;\n  int16_t* multidphase_delta;\n\n  uint32_t patch_01_ct;\n  uint32_t patch_10_ct;\n  uint32_t phasepresent_ct;\n  uint32_t dosage_ct;\n  uint32_t multidosage_sample_ct;\n  uint32_t dphase_ct;\n  uint32_t multidphase_sample_ct;\n} PgenVariant;\n\nHEADER_INLINE uintptr_t GetAux1bAlleleEntryByteCt(uint32_t allele_ct, uint32_t rare10_ct) {\n  assert(allele_ct >= 3);\n  if (allele_ct == 3) {\n    return DivUp(rare10_ct, 8);\n  }\n  if (allele_ct < 6) {\n    return DivUp(rare10_ct, 2);\n  }\n  // one byte per entry for allele_ct <= 17, two bytes for 18..256\n  return ((allele_ct >= 18) + 1) * rare10_ct;\n  // todo: allele_ct > 257\n}\n\nextern const uint16_t kHcToAlleleCodes[1024];\n\n// Permits missing codes, does not remap.\nvoid PglMultiallelicSparseToDenseMiss(const PgenVariant* pgvp, uint32_t sample_ct, AlleleCode* __restrict wide_codes);\n\nuintptr_t PglComputeMaxAlleleCt(const uintptr_t* allele_idx_offsets, uint32_t variant_ct);\n\nHEADER_INLINE AlleleCode GetAidx(const uintptr_t* allele_idx_offsets, uint32_t variant_uidx, uintptr_t allele_idx) {\n  if (!allele_idx_offsets) {\n    return allele_idx - 2 * variant_uidx;\n  }\n  return allele_idx - allele_idx_offsets[variant_uidx];\n}\n\nuint32_t CountNybble(const void* nybblearr, uintptr_t nybble_word, uintptr_t nybble_ct);\n\n// The actual format:\n// 1. 2 magic bytes 0x6c 0x1b.\n//\n// 2. Mode byte.\n//      0x01 = plink1 variant-major.\n//      0x02 = plink2 basic variant-major.  variant/sample counts in header,\n//             00 = hom ref, 01 = het, 10 = hom alt, 11 = missing.  (vrtype 0)\n//      0x03 = plink2 basic unphased dosage (vrtype 0x40)\n//      0x04 = plink2 basic phased dosage (vrtype 0xc0)\n//      These are designed to be easy to write.  Note that the dosage formats\n//      require hardcalls to be stored as well; however, you can just set them\n//      to all-missing and then use\n//        plink2 --hard-call-threshold <...> --make-pgen\n//      to populate them.\n//\n//      0x10 = variable-type and/or variable-length records present.\n//      0x11 = mode 0x10, but with phase set information at the end of the\n//             file.\n//      0x05..0x0f and 0x12..0x7f are reserved for possible use by future\n//      versions of the PGEN specification, and 0 is off-limits (PLINK 1\n//      sample-major .bed).\n//      0x80..0xff can be safely used by developers for their own purposes.\n//\n// 3. If not plink1-format,\n//    a. 4-byte # of variants; call this M.\n//    b. 4-byte # of samples, call this N.\n//    c. Additional 1-byte header 'control' value (PgenHeaderCtrl).  May be\n//       extended in the future.\n//       bits 0-3: Indicates vrtype and variant record length storage widths.\n//         If bit 3 is unset, bits 0-1 store (vrec_len_byte_ct - 1), while bit\n//         2 is set iff phase or dosage info is present (requiring 8 bits\n//         instead of 4 bits for vrtypes).\n//         If bit 3 is set, a specialized encoding is used which combines the\n//         two pieces of information (reducing the overhead for files with few\n//         samples).  The following encodings are now defined (note that there\n//         was a change of plans in Mar 2019):\n//           8: 1 bit per fused vrtype-length.  Unset = vrtype 5, set = vrtype\n//              0.\n//           9: 2 bits, multiallelic.  0 = vrtype 5, 1 = vrtype 0, 2-3 = vrtype\n//              8 with that many more bytes than vrtype 0.  Note that this is\n//              limited to 16 ALT alleles.\n//          10: 2 bits, phased.  0 = vrtype 5, 1 = vrtype 0, 2-3 = vrtype 16\n//              with that many minus 1 bytes beyond vrtype 0.  While this is\n//              also aimed at the single-sample use case, it technically\n//              supports up to 15 always-phased or 7 partially-phased samples.\n//          11: 4 bits, multiallelic + phased.  0 = vrtype 5, 1 = vrtype 0, 2-7\n//              = vrtype 8 with that many bytes beyond vrtype 0, 9 = vrtype 16\n//              phase info requiring just 1 byte, 10-15 = vrtype 24 with (x-7)\n//              extra bytes required between multiallelic and phased tracks.\n//          12: 2 bits, dosage, must be single-sample.  0 = vrtype 5, 1 =\n//              vrtype 0, 2 = vrtype 0x45 with 2 bytes, 3 = vrtype 0x40 with 3\n//              total bytes.\n//          13: reserved for single-sample multiallelic + dosage.\n//          14: 4 bits, phased + dosage, must be single-sample.  0 and 1 as\n//              usual, 3 = vrtype 16 with 1 phaseinfo byte, 4 = vrtype 0x45\n//              with 2 bytes, 5 = vrtype 0x40 with 3 total bytes, 12 = vrtype\n//              0xc5 with 4 total bytes, 13 = vrtype 0xc0 with 5 total bytes,\n//              15 = vrtype 0xe0 with 6 total bytes\n//          15: reserved for single-sample multiallelic + phased dosage.\n//       bits 4-5: allele count storage (00 = unstored, 01-11 = bytes per ct)\n//       bits 6-7: nonref flags info (00 = unstored, 01 = all ref/alt, 10 =\n//                 never ref/alt, 11 = explicitly stored)\n//       Bits 0-5 do not apply to the fixed-length modes (currently 0x02-0x04)\n//       and should be zeroed out in that case.\n//\n// 4. If mode 0x10/0x11,\n//    a. Array of 8-byte fpos values for the first variant in each vblock.\n//       (Note that this suggests a way to support in-place insertions: some\n//       unused space can be left between the vblocks.)\n//    b. Sequence of header blocks, each containing information about\n//       kPglVblockSize variants (except the last may be shorter).  All values\n//       are known-width, to allow e.g. plink2 --make-pgen/--pmerge to compress\n//       all variant records first, then fseek to the beginning of the output\n//       file and write the header.\n//         i. array of 4-bit or 1-byte vrtypes.\n//        ii. array of variant record lengths (each occupying vrec_len_byte_ct\n//            bytes, or 2-4 bits).\n//       iii. if bits 4-5 of {3c} aren't 00, array of alt allele counts.\n//        iv. nonref flags info, if explicitly stored\n//      (this representation allows more efficient random access)\n//    If mode 0x02-0x04, and nonref flags info explicitly stored, just that\n//    bitarray.\n//\n// 5. The variant records.  See below for details.\n\n// Difflist format:\n//   a. <difflist_len VINT>\n//   If difflist_len is zero, that's it.  Otherwise, the difflist is organized\n//   into 64-element groups (the last group will usually be smaller), to make\n//   extraction of e.g. a single sample less painful.  Note that with 20k\n//   samples, a difflist is space-saving even with MAF 5%:\n//     ~1/400 hom alt + ~38/400 het = (~39/400) * 20k\n//                                  = ~1950 sample IDs.\n//     that's 31 groups, requiring about 2 + 62 + 30 + 488 + 1919 = 2501 bytes\n//     (can be slightly higher since a few ID deltas may be larger than 127);\n//     uncompressed storage requires 5000 bytes.\n//   b. <array of group start sample IDs, each of sample_id_byte_ct>\n//   c. <array of 1-byte <delta segment lengths minus 63>, with last entry\n//      omitted>\n//   d. <optional payload of fixed-width genotype values>\n//      (in retrospect, it might have been better to position this after (e)\n//      to avoid entanglement with the packed-bitarray definition, oh well...)\n//   e. one \"delta segment\"/group: <array of <group size - 1> VINT values,\n//      each indicating the difference between the current and previous sample\n//      IDs; i.e. value is 1 for two consecutive samples>\n\n// Variant record type ('vrtype') coding:\n// bits 0-2:\n//   000 = Simple 2-bit encoding.\n//   100, 110, 111 = Simple difflist.  Low two bits store the base value.  (101\n//                   isn't here since Hardy-Weinberg equilibrium prevents\n//                   all het ref/alt from arising much in practice, outside of\n//                   alignment/variant-calling technical artifacts that should\n//                   be removed.)\n//   010 = Differences-from-earlier-variant encoding (\"LD compression\").  The\n//         last variant without this type of encoding is the base.\n//         To simplify random access logic, the first variant in each vblock is\n//         prohibited from using this encoding.\n//   011 = Inverted differences-from-earlier-variant encoding.  (This covers\n//         the case where a reference allele is 'wrong'.)  When decoding, the\n//         difflist should be processed first, then the entire genovec should\n//         be flipped.\n//   001 = 1-bit + difflist representation.  Suppose most calls are hom ref or\n//         het (e.g. a 20% MAF variant with ~4% hom alt1, ~36% het ref/alt1,\n//         ~64% hom ref), then the main datatrack has just the low bits of the\n//         usual 2-bit codes.  This is followed by a difflist containing the\n//         hom alt1 and missing genotypes.\n//         The main datatrack is preceded by a single byte indicating what\n//         the two common values are: 2 low bits = <set value - unset value>,\n//         next 2 bits = unset value (6 possibilities).  Top 4 bits are\n//         reserved.\n//   101 = All alleles are reference, no missing data.  The main datatrack is\n//         empty in this case.  Although this saves only 1 byte per variant\n//         over vrtype 100, this actually makes a huge difference for\n//         single-sample files.\n//         Since this was not defined until mid-2019, the standard plink2\n//         alpha-test binaries will not use this encoding.  However,\n//         alpha-2-final and later binaries interpret this encoding correctly.\n//         If your workflow makes heavy use of single-sample .pgen files, you\n//         can add -DFUTURE_ENCODER during compilation to unlock this feature.\n//\n// bit 3: multiallelic hardcalls present with alt2/alt3/... present?  If yes,\n//        auxiliary data track #1 disambiguates the 0b01 (ref/altx) and 0b10\n//        (altx/alty, x may equal y) hardcalls.  This contains a format byte,\n//        followed by a list of ref/altx patches, then a list of altx/alty\n//        patches.  All unpatched genotypes are ref/alt1 or alt1/alt1.\n//        The bottom 4 bits of the format byte describe how the ref/altx patch\n//        set is stored.\n//   0 = Starts with a bitarray with <total ref/altx count> bits (divide by 8\n//       and round up to get byte count of this component; any trailing bits in\n//       the last byte must be 0), where each set bit corresponds to presence\n//       of a rarealt (i.e. alt2/alt3/...).\n//       ExpandBytearr(aux1_first_quarter, all_01, raw_sample_ctl, ct_01, 0,\n//                     patch_01);\n//       can be used to convert this into a set of sample IDs, though we may\n//       want to avoid an intermediate unpacking step in practice.  Note that\n//       when we're passing in sample_ct < raw_sample_ct and the main datatrack\n//       is LD-compressed, we'd like ldbase_raw_genovec to be cached.\n//       This is followed by a packed array of fixed-width <allele idx - 2>\n//       values, where the width depends on the total number of alt alleles.\n//       2 alts: width ZERO.  All set bits in the first bitarray correspond\n//               to ref/alt2.\n//       3 alts: width 1 bit.  Set bits correspond to ref/alt3, clear bits\n//               correspond to ref/alt2.\n//       4-5 alts: width 2 bits.  0b00 corresponds to ref/alt2, 0b01 =\n//                 ref/alt3, 0b10 = ref/alt4, etc.\n//       6-17 alts: width 4 bits.\n//       18-257 alts: width 8 bits.\n//       258-65537 alts: width 16 bits.\n//       65538-16777215 alts: width 24 bits.  Reasonable to prohibit more than\n//                            2^24 - 1 = 16777215, since variant records are\n//                            limited to 4 GiB.  I can imagine some\n//                            applications of >65534 in highly variable\n//                            regions, though, and it doesn't actually cost us\n//                            anything to define a way to represent it.  (A\n//                            plink2 binary compiled with AlleleCode typedef'd\n//                            as uint32_t will run more slowly, of course, but\n//                            most binaries will not be compiled that way.)\n//   1 = Same as mode 0, except the initial bitarray is replaced by a difflist\n//       with sample IDs.  (We could make that piece somewhat smaller by\n//       storing 0-based ref/altx indexes instead, but I'm pretty sure that\n//       isn't worth the performance penalty of requiring all_01 and more\n//       complicated unpacking.  Though we'll need to peek at aux1[0] before\n//       decompressing the main datatrack to exploit this.)\n//   15 = Empty patch set.  Might remove this (storing this as mode 1 just\n//        takes 1 more byte), but it may enable some relevant performance\n//        optimizations.\n//   2-14 are reserved for future use.  We don't define an efficient way to\n//   represent a variant that e.g. has more alt2s than alt1s for now, since alt\n//   alleles will usually be sorted in order of decreasing frequency, but maybe\n//   this will matter in the future.\n//\n//   The top 4 bits describe how the altx/alty patch set is stored.  0/1/15\n//   have the same meaning as they do for the ref/altx patch set; the only\n//   thing that changes is the format of the packed array of values at the end.\n//   2 alts: width 1.  This is treated as a special case.  Set bits correspond\n//           to alt2/alt2, clear = alt1/alt2.\n//   3-4 alts: width 2+2 bits.  Each stores <allele idx - 1>, with the smaller\n//             number in the lower bits.  E.g. alt1/alt2 is stored as 0b0100;\n//             alt3/alt3 is stored as 0b1010.\n//   5-16 alts: width 4+4 bits.\n//   17-256 alts: width 8+8 bits.\n//   257-65536 alts: width 16+16 bits.\n//   65537-16777215 alts: width 24+24 bits.\n//\n// bit 4: hardcall phased?  If yes, auxiliary data track #2 contains phasing\n//        information for heterozygous calls.\n//        The first *bit* of the track indicates whether an explicit\n//        \"phasepresent\" bitarray is stored.  If it's set, the next het_ct bits\n//        are 1-bit values, where 0 = no phasing info known, and 1 = phasing\n//        info present.  If it's unset, phasing info is present for every het\n//        call.\n//        This is followed by a \"phaseinfo\" bitarray, where 0 = unswapped,\n//        1 = swapped (e.g. \"1|0\" in VCF).\n//        This track is normally unpacked into fixed-size bitarrays when\n//        loaded, but a raw mode is also provided (which doesn't support\n//        subsetting).\n//        By default, entire chromosomes/contigs are assumed to be phased\n//        together.  (Todo: support contiguous phase sets.)\n//\n// bits 5-6:\n//   00 = no dosage data.\n//   01 = dosage list.  Auxiliary data track #3 contains a delta-encoded list\n//        of sample IDs (like a difflist, but with no genotypes).  Track #4\n//        contains a 16-bit (0..2^15; 65535 missing value is only permitted in\n//        unconditional-dosage case) value expressing the sum of all alt allele\n//        dosages.  (Yes, making this the ref allele dosage would have been a\n//        bit cleaner, but it's too late now.)\n//        If the variant is multiallelic, nonzero alt2/alt3/... dosages are\n//        likely to be sparse.  So,\n//        - track #5 contains a delta-encoded list describing which\n//          <sample_uidx x rarealt dosage> entries are nonzero, where rarealt\n//          index is in the lowest bits and sample_uidx can be computed via\n//          right-shift (to avoid integer-division headaches, especially\n//          important since indexes in this list can be larger than 2^32).\n//          We use sample_uidx here to make subsetting less painful.\n//          Since each nonzero dosage value requires 16 bits, and\n//          delta-encoding of a dense list adds less than 9 bits per entry,\n//          there isn't much point in supporting a dense bitarray mode here.\n//          To keep memory requirements sane for biobank-scale datasets when\n//          the number of alt alleles is very large, each sample is prohibited\n//          from having more than 255 nonzero allele dosages (this makes no\n//          difference when sizeof(AlleleCode) == 1, but it may matter later).\n//        - track #6 contains the rarealt nonzero dosage values.\n//        Note that this and the other dosage modes are in ADDITION to\n//        hardcalls.  This increases filesize by up to 12.5%, but makes the\n//        reader substantially simpler; --hard-call-threshold logic is nicely\n//        compartmentalized.\n//   10 = unconditional dosage (just track #4).\n//   11 = dosage bitarray.  In this case, auxiliary data track #3 contains an\n//        array of 1-bit values indicating which samples have dosages.  If the\n//        variant is multiallelic, tracks #5 and 6 are as described above.\n//   bgen 1.2 format no longer permits fractional missingness, so no good\n//   reason for us to support it.\n//   Considered putting *all* dosage data at the end of the file (like I will\n//   do for phase set info); this could actually be worthwhile for\n//   unconditional dosages, but it doesn't work well when only some samples\n//   have dosage data.\n// bit 7: some dosages explicitly phased?  If yes, and dosages are not\n//        unconditionally present, auxiliary data track #7 is a bitarray of\n//        length dosage_ct indicating whether dphase_delta exists for that\n//        sample.  Note that this is technically independent of bit 4; either\n//        can be set without the other.  (However, in practice, bit 4 is almost\n//        always set when bit 7 is, since that enables more efficient storage\n//        of 0|0.99, 1|0.02, and similar pairs.)\n//        When phased dosages are present, track #8 contains values\n//        representing <(hap1 alt prob) - (hap2 alt prob)>, etc., where the\n//        underlying values are represented in [0..16384] (so the signed\n//        difference is in [-16384..16384]).  Track #4 contains the\n//        corresponding sums; parity does NOT need to match (necessary to allow\n//        this since we treat omitted values as zero; and since we are allowing\n//        it, no point in making parity match in other situations either).  In\n//        fixed-width case, -32768 should be stored in track #8 when the entire\n//        call is missing, while 0 and missing-phase are considered synonymous.\n//        In the biallelic case, if a hardcall is phased, a dosage is present,\n//        and no explicit dosage-phase is, we define it to mean the unique\n//        dphase_delta sequence with maximal absolute value, and --make-pgen\n//        takes advantage of it.  This definition technically works for\n//        triallelic variants as well, but it breaks down with 4 alleles, so we\n//        prohibit hardcall-phase + dosage + no-dosage-phase with more than 2\n//        alleles.\n//        In the multiallelic case, tracks #9 and #10 are analogous to #5 and\n//        #6.\n//\n// Representation of variable ploidy (MT) was considered, but rejected since\n// dosages should be at least as appropriate for MT.\n// Oxford/VCF-style storage of separate probabilities for every possible\n// genotype (e.g. P(AA), P(AB), P(BB) instead of just 2P(AA) + P(AB) and\n// 2P(BB) + P(AB)) is tentatively rejected due to (i) lack of relevance to\n// PLINK's analysis functions and (ii) high storage cost where we can afford it\n// least.  In principle, this is subject to reevaluation if (i) changes, but\n// given the poor interaction with phased dosages, it's probably better to just\n// think of them as permanently outside PLINK's scope.\n\n// maximum prime < 2^32 is 4294967291; quadratic hashing guarantee breaks down\n// past that divided by 2.\nCONSTI32(kPglMaxVariantCt, 0x7ffffffd);\n\nCONSTI32(kPglMaxSampleCt, 0x7ffffffe);\n\nuint64_t PglHeaderBaseEndOffset(uint32_t variant_ct, uintptr_t vrec_len_byte_ct, uint32_t phase_or_dosage_present, uint32_t explicit_nonref_flags);\n\n// Current pgen-extension API assumes .pgen extension bodies fit comfortably in\n// memory.\n// It's easy to imagine a useful extension that breaks this assumption, e.g.\n// storage of VCF FORMAT/GQ and FORMAT/DP.  There's an escape hatch --\n// extensions are allowed to refer to additional files.  We'll see whether that\n// proves to be enough.\ntypedef struct PgenExtensionLlStruct {\n  struct PgenExtensionLlStruct* next;\n  uint64_t size;\n  unsigned char* contents;\n  uint8_t type_idx;\n} PgenExtensionLl;\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n\n#endif  // __PGENLIB_MISC_H__\n"
  },
  {
    "path": "external_libs/pgenlib/include/pgenlib_read.cc",
    "content": "// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n#include \"pgenlib_read.h\"\n\n#include <errno.h>\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\nstatic inline PgenReaderMain* GetPgrp(PgenReader* pgr_ptr) {\n  return &GET_PRIVATE(*pgr_ptr, m);\n}\n\nstatic inline const uint32_t* GetSicp(PgrSampleSubsetIndex pssi) {\n  return GET_PRIVATE(pssi, cumulative_popcounts);\n}\n\n// The subset of this codebase used by pgenlibr (which is compiled with\n// -DNO_UNALIGNED and -DPGENLIB_NOPRINT) is now subject to additional\n// restrictions.  In particular:\n// - No unaligned loads/stores/pointers, or other undefined behavior.\n// - The PRI{d,u}PTR and PRI{d,u}64 printf format-specifiers cannot be used.  I\n//   hope to remove this restriction later, but for now they make Windows\n//   compilation a clusterfuck.\n// - No exit(), or printing to stdout/stderr.\n// - No flexible array members.\n// - Not a restriction yet, but we want to properly distinguish between\n//   __LP64__ and USE_SSE2.\n#ifdef USE_SSE2\nvoid SmallGenoarrCount3FreqIncr(const unsigned char* genoarrb_iter, uint32_t byte_ct, uint32_t* even_ctp, uint32_t* odd_ctp, uint32_t* bothset_ctp) {\n  for (uint32_t bytes_left = byte_ct; ; ) {\n    uintptr_t cur_geno_word;\n    if (bytes_left < kBytesPerWord) {\n      if (!bytes_left) {\n        return;\n      }\n      cur_geno_word = ProperSubwordLoad(genoarrb_iter, bytes_left);\n      bytes_left = 0;\n    } else {\n      CopyFromUnalignedIncrW(&cur_geno_word, &genoarrb_iter);\n      bytes_left -= kBytesPerWord;\n    }\n    const uintptr_t cur_geno_word_high = kMask5555 & (cur_geno_word >> 1);\n    *even_ctp += Popcount01Word(cur_geno_word & kMask5555);\n    *odd_ctp += Popcount01Word(cur_geno_word_high);\n    *bothset_ctp += Popcount01Word(cur_geno_word & cur_geno_word_high);\n  }\n}\n#endif\n\nvoid GenoarrbCountFreqs(const unsigned char* genoarrb, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // does not read past the end of genoarrb\n  uint32_t even_ct = 0;\n  uint32_t odd_ct = 0;\n  uint32_t bothset_ct = 0;\n#ifndef USE_SSE2\n  const uintptr_t* geno_firstw_start;\n  const uint32_t lead_byte_ct = AlignKToW(genoarrb, &geno_firstw_start);\n  uint32_t byte_ct = NypCtToByteCt(sample_ct);\n  uintptr_t cur_geno_word;\n  uint32_t fullword_ct;\n  uint32_t trail_byte_ct;\n  if (lead_byte_ct) {\n    if (lead_byte_ct >= byte_ct) {\n      cur_geno_word = ProperSubwordLoad(genoarrb, byte_ct);\n      goto GenoarrbCountFreqsLastWord;\n    }\n    cur_geno_word = ProperSubwordLoad(genoarrb, lead_byte_ct);\n    const uintptr_t cur_geno_word_high = kMask5555 & (cur_geno_word >> 1);\n    even_ct += Popcount01Word(cur_geno_word & kMask5555);\n    odd_ct += Popcount01Word(cur_geno_word_high);\n    bothset_ct += Popcount01Word(cur_geno_word & cur_geno_word_high);\n    byte_ct -= lead_byte_ct;\n  }\n  fullword_ct = byte_ct / kBytesPerWord;\n  for (uint32_t widx = 0; widx != fullword_ct; ++widx) {\n    cur_geno_word = geno_firstw_start[widx];\n    const uintptr_t cur_geno_word_high = kMask5555 & (cur_geno_word >> 1);\n    even_ct += Popcount01Word(cur_geno_word & kMask5555);\n    odd_ct += Popcount01Word(cur_geno_word_high);\n    bothset_ct += Popcount01Word(cur_geno_word & cur_geno_word_high);\n  }\n  trail_byte_ct = byte_ct % kBytesPerWord;\n  if (trail_byte_ct) {\n    cur_geno_word = ProperSubwordLoad(&(geno_firstw_start[fullword_ct]), trail_byte_ct);\n  GenoarrbCountFreqsLastWord:\n    const uintptr_t cur_geno_word_high = kMask5555 & (cur_geno_word >> 1);\n    even_ct += Popcount01Word(cur_geno_word & kMask5555);\n    odd_ct += Popcount01Word(cur_geno_word_high);\n    bothset_ct += Popcount01Word(cur_geno_word & cur_geno_word_high);\n  }\n#else // USE_SSE2\n  const uint32_t lead_byte_ct = (-R_CAST(uintptr_t, genoarrb)) % kBytesPerVec;\n  const unsigned char* genoarrb_iter;\n  uint32_t trail_ct;\n  if (sample_ct >= lead_byte_ct * 4 + (6 * kNypsPerVec)) {\n    // Only enter this branch if we can execute at least one iteration of the\n    // main vectorized loop.\n    const uint32_t remaining_sample_ct = sample_ct - 4 * lead_byte_ct;\n    SmallGenoarrCount3FreqIncr(genoarrb, lead_byte_ct, &even_ct, &odd_ct, &bothset_ct);\n    genoarrb_iter = &(genoarrb[lead_byte_ct]); // now vector-aligned\n    const uint32_t remaining_full_vec_ct = remaining_sample_ct / kNypsPerVec;\n    uint32_t even_ct_incr;\n    uint32_t odd_ct_incr;\n    uint32_t bothset_ct_incr;\n    const uint32_t vec_ct = remaining_full_vec_ct - (remaining_full_vec_ct % 6);\n    Count3FreqVec6(genoarrb_iter, vec_ct, &even_ct_incr, &odd_ct_incr, &bothset_ct_incr);\n    even_ct += even_ct_incr;\n    odd_ct += odd_ct_incr;\n    bothset_ct += bothset_ct_incr;\n    genoarrb_iter = &(genoarrb_iter[kBytesPerVec * vec_ct]);\n    trail_ct = remaining_sample_ct - (vec_ct * kNypsPerVec);\n  } else {\n    genoarrb_iter = genoarrb;\n    trail_ct = sample_ct;\n  }\n  const uint32_t trail_byte_ct = NypCtToByteCt(trail_ct);\n  SmallGenoarrCount3FreqIncr(genoarrb_iter, trail_byte_ct, &even_ct, &odd_ct, &bothset_ct);\n#endif\n  genocounts[0] = sample_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nvoid GenoarrbCountSubsetFreqs(const unsigned char* genoarrb, const uintptr_t* __restrict sample_include_interleaved_vec, uint32_t raw_sample_ct, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // does not read past the end of genoarrb\n  const uint32_t raw_sample_ctv2 = NypCtToVecCt(raw_sample_ct);\n  uint32_t even_ct;\n  uint32_t odd_ct;\n  uint32_t bothset_ct;\n  uint32_t vec_idx = raw_sample_ctv2 - (raw_sample_ctv2 % 6);\n  CountSubset3FreqVec6(genoarrb, R_CAST(const VecW*, sample_include_interleaved_vec), vec_idx, &even_ct, &odd_ct, &bothset_ct);\n  const unsigned char* genoarrb_iter = &(genoarrb[kBytesPerVec * vec_idx]);\n#ifdef __LP64__\n  const uintptr_t* interleaved_mask_iter = &(sample_include_interleaved_vec[vec_idx * (kWordsPerVec / 2)]);\n#else\n  // bugfix (19 Jul 2018): (kWordsPerVec / 2) doesn't work in 32-bit case\n  const uintptr_t* interleaved_mask_iter = &(sample_include_interleaved_vec[(vec_idx * kWordsPerVec) / 2]);\n#endif\n#ifdef USE_AVX2\n  const uint32_t halfvec_idx_trail = (raw_sample_ct + 3) / (kBitsPerVec / 4);\n  uintptr_t mask_base1 = 0;\n  uintptr_t mask_base2 = 0;\n  uintptr_t mask_base3 = 0;\n  uintptr_t mask_base4 = 0;\n  for (; vec_idx != raw_sample_ctv2; ++vec_idx) {\n    uintptr_t mask_word1;\n    uintptr_t mask_word2;\n    uintptr_t mask_word3;\n    uintptr_t mask_word4;\n    if (!(vec_idx % 2)) {\n      mask_base1 = *interleaved_mask_iter++;\n      mask_base2 = *interleaved_mask_iter++;\n      mask_base3 = *interleaved_mask_iter++;\n      mask_base4 = *interleaved_mask_iter++;\n      mask_word1 = mask_base1 & kMask5555;\n      mask_word2 = mask_base2 & kMask5555;\n      mask_word3 = mask_base3 & kMask5555;\n      mask_word4 = mask_base4 & kMask5555;\n    } else {\n      mask_word1 = (mask_base1 >> 1) & kMask5555;\n      mask_word2 = (mask_base2 >> 1) & kMask5555;\n      mask_word3 = (mask_base3 >> 1) & kMask5555;\n      mask_word4 = (mask_base4 >> 1) & kMask5555;\n    }\n    uint32_t vechalf_idx = 0;\n    while (1) {\n      uintptr_t cur_geno_word1;\n      uintptr_t cur_geno_word2;\n      if (2 * vec_idx + vechalf_idx < halfvec_idx_trail) {\n        CopyFromUnalignedIncrW(&cur_geno_word1, &genoarrb_iter);\n        CopyFromUnalignedIncrW(&cur_geno_word2, &genoarrb_iter);\n      } else {\n        // bugfix (19 May 2022): this was in 0..31 when it needed to be in\n        // 0..15\n        const uint32_t remaining_byte_ct = NypCtToByteCt(raw_sample_ct) % (kBytesPerVec / 2);\n        // todo: check if this harms usual-case loop efficiency\n        vechalf_idx = 1;\n        if (remaining_byte_ct < kBytesPerWord) {\n          cur_geno_word1 = ProperSubwordLoad(genoarrb_iter, remaining_byte_ct);\n          cur_geno_word2 = 0;\n        } else {\n          CopyFromUnalignedIncrW(&cur_geno_word1, &genoarrb_iter);\n          cur_geno_word2 = ProperSubwordLoad(genoarrb_iter, remaining_byte_ct - kBytesPerWord);\n        }\n      }\n      const uintptr_t cur_geno_word1_high_masked = mask_word1 & (cur_geno_word1 >> 1);\n      const uintptr_t cur_geno_word2_high_masked = mask_word2 & (cur_geno_word2 >> 1);\n      even_ct += PopcountWord(((cur_geno_word1 & mask_word1) << 1) | (cur_geno_word2 & mask_word2));\n      odd_ct += PopcountWord((cur_geno_word1_high_masked << 1) | cur_geno_word2_high_masked);\n      bothset_ct += PopcountWord(((cur_geno_word1 & cur_geno_word1_high_masked) << 1) | (cur_geno_word2 & cur_geno_word2_high_masked));\n      if (vechalf_idx) {\n        break;\n      }\n      ++vechalf_idx;\n      mask_word1 = mask_word3;\n      mask_word2 = mask_word4;\n    }\n  }\n#else  // not USE_AVX2\n  const uint32_t vec_idx_trail = (raw_sample_ct + 3) / kNypsPerVec;\n#  ifdef USE_SSE2\n  uintptr_t mask_base1 = 0;\n  uintptr_t mask_base2 = 0;\n  for (; vec_idx != raw_sample_ctv2; ++vec_idx) {\n    uintptr_t mask_word1;\n    uintptr_t mask_word2;\n    if (!(vec_idx % 2)) {\n      mask_base1 = *interleaved_mask_iter++;\n      mask_base2 = *interleaved_mask_iter++;\n      mask_word1 = mask_base1 & kMask5555;\n      mask_word2 = mask_base2 & kMask5555;\n    } else {\n      mask_word1 = (mask_base1 >> 1) & kMask5555;\n      mask_word2 = (mask_base2 >> 1) & kMask5555;\n    }\n    uintptr_t cur_geno_word1;\n    uintptr_t cur_geno_word2;\n    if (vec_idx < vec_idx_trail) {\n      CopyFromUnalignedIncrW(&cur_geno_word1, &genoarrb_iter);\n      CopyFromUnalignedIncrW(&cur_geno_word2, &genoarrb_iter);\n    } else {\n      const uint32_t remaining_byte_ct = NypCtToByteCt(raw_sample_ct) % kBytesPerVec;\n      if (remaining_byte_ct < kBytesPerWord) {\n        cur_geno_word1 = ProperSubwordLoad(genoarrb_iter, remaining_byte_ct);\n        cur_geno_word2 = 0;\n      } else {\n        CopyFromUnalignedIncrW(&cur_geno_word1, &genoarrb_iter);\n        cur_geno_word2 = ProperSubwordLoad(genoarrb_iter, remaining_byte_ct - kBytesPerWord);\n      }\n    }\n    const uintptr_t cur_geno_word1_high_masked = mask_word1 & (cur_geno_word1 >> 1);\n    const uintptr_t cur_geno_word2_high_masked = mask_word2 & (cur_geno_word2 >> 1);\n#    ifdef USE_SSE42\n    even_ct += PopcountWord(((cur_geno_word1 & mask_word1) << 1) | (cur_geno_word2 & mask_word2));\n    odd_ct += PopcountWord((cur_geno_word1_high_masked << 1) | cur_geno_word2_high_masked);\n    bothset_ct += PopcountWord(((cur_geno_word1 & cur_geno_word1_high_masked) << 1) | (cur_geno_word2 & cur_geno_word2_high_masked));\n#    else\n    even_ct += NypsumWord((cur_geno_word1 & mask_word1) + (cur_geno_word2 & mask_word2));\n    odd_ct += NypsumWord(cur_geno_word1_high_masked + cur_geno_word2_high_masked);\n    bothset_ct += NypsumWord((cur_geno_word1 & cur_geno_word1_high_masked) + (cur_geno_word2 & cur_geno_word2_high_masked));\n#      endif\n  }\n#  else  // not USE_SSE2\n  uintptr_t mask_base = 0;\n  for (; vec_idx != raw_sample_ctv2; ++vec_idx) {\n    uintptr_t mask_word;\n    if (!(vec_idx % 2)) {\n      mask_base = *interleaved_mask_iter++;\n      mask_word = mask_base & kMask5555;\n    } else {\n      mask_word = (mask_base >> 1) & kMask5555;\n    }\n    uintptr_t cur_geno_word;\n    if (vec_idx < vec_idx_trail) {\n      CopyFromUnalignedIncrW(&cur_geno_word, &genoarrb_iter);\n    } else {\n      const uint32_t remaining_byte_ct = NypCtToByteCt(raw_sample_ct) % kBytesPerVec;\n      cur_geno_word = ProperSubwordLoad(genoarrb_iter, remaining_byte_ct);\n    }\n    const uintptr_t cur_geno_word_high_masked = mask_word & (cur_geno_word >> 1);\n    even_ct += Popcount01Word(cur_geno_word & mask_word);\n    odd_ct += Popcount01Word(cur_geno_word_high_masked);\n    bothset_ct += Popcount01Word(cur_geno_word & cur_geno_word_high_masked);\n  }\n#  endif  // not USE_SSE2\n#endif  // not USE_AVX2\n  genocounts[0] = sample_ct + bothset_ct - even_ct - odd_ct;\n  genocounts[1] = even_ct - bothset_ct;\n  genocounts[2] = odd_ct - bothset_ct;\n  genocounts[3] = bothset_ct;\n}\n\nvoid GenoarrCountFreqs(const uintptr_t* genoarr, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // this masks out trailing genoarr bits\n  const uint32_t sample_ct_remainder = sample_ct % kBitsPerWordD2;\n  GenoarrCountFreqsUnsafe(genoarr, sample_ct - sample_ct_remainder, genocounts);\n  if (sample_ct_remainder) {\n    uintptr_t cur_geno_word = bzhi(genoarr[sample_ct / kBitsPerWordD2], 2 * sample_ct_remainder);\n    const uintptr_t cur_geno_word_high = kMask5555 & (cur_geno_word >> 1);\n    const uint32_t even_ct = Popcount01Word(cur_geno_word & kMask5555);\n    const uint32_t odd_ct = Popcount01Word(cur_geno_word_high);\n    const uint32_t bothset_ct = Popcount01Word(cur_geno_word & cur_geno_word_high);\n    genocounts[0] += sample_ct_remainder + bothset_ct - even_ct - odd_ct;\n    genocounts[1] += even_ct - bothset_ct;\n    genocounts[2] += odd_ct - bothset_ct;\n    genocounts[3] += bothset_ct;\n  }\n}\n\nvoid GenovecNonmissingToZeroUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // sets 1 and 2 to zero; leaves 3s untouched.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  assert(IsVecAligned(genovec));\n  const VecW m1 = VCONST_W(kMask5555);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    VecW cur_vec = vptr[vidx];\n    const VecW cur_vec_rshifted = vecw_srli(cur_vec, 1);\n    cur_vec = cur_vec & m1;\n    cur_vec = cur_vec & cur_vec_rshifted;\n    vptr[vidx] = cur_vec | vecw_slli(cur_vec, 1);\n  }\n}\n\nvoid GenovecNonzeroToMissingUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // converts 1s and 2s to 3s, leaves zeroes untouched.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  assert(IsVecAligned(genovec));\n  const VecW m1 = VCONST_W(kMask5555);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    VecW cur_vec = vptr[vidx];\n    const VecW cur_vec_rshifted = vecw_srli(cur_vec, 1);\n    cur_vec = cur_vec | cur_vec_rshifted;\n    cur_vec = cur_vec & m1;\n    vptr[vidx] = cur_vec | vecw_slli(cur_vec, 1);\n  }\n}\n\nvoid GenovecNontwoToMissingUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // 0 -> 3, 1 -> 3.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  assert(IsVecAligned(genovec));\n  const VecW not_m1 = VCONST_W(kMaskAAAA);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    const VecW cur_vec = vptr[vidx];\n    const VecW cur_vec_hi = vecw_and_notfirst(cur_vec, not_m1);\n    const VecW cur_or = cur_vec_hi | vecw_srli(cur_vec_hi, 1);\n    vptr[vidx] = cur_vec | cur_or;\n  }\n}\n\nvoid GenovecNonzeroToMissingThenInvertUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // 0 -> 2, 1 -> 3, 2 -> 3\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  assert(IsVecAligned(genovec));\n  const VecW not_m1 = VCONST_W(kMaskAAAA);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    const VecW cur_vec = vptr[vidx];\n    vptr[vidx] = cur_vec | vecw_srli(cur_vec, 1) | not_m1;\n  }\n}\n\nvoid GenovecInvertThenNonzeroToMissingUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // 0 -> 3, 1 -> 3, 2 -> 0\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  assert(IsVecAligned(genovec));\n  const VecW m1 = VCONST_W(kMask5555);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {\n    const VecW cur_vec = vptr[vidx];\n    const VecW cur_vec_rshifted = vecw_srli(cur_vec, 1);\n    const VecW not2 = vecw_and_notfirst(vecw_and_notfirst(cur_vec, cur_vec_rshifted), m1);\n    vptr[vidx] = not2 | vecw_slli(not2, 1);\n  }\n}\n\ndouble BiallelicDiploidMinimac3R2(uint64_t alt1_dosage, uint64_t hap_alt1_ssq_x2, uint32_t nm_sample_ct) {\n  if (!nm_sample_ct) {\n    return (0.0 / 0.0);\n  }\n\n  const uint64_t nm_sample_ct_x32768 = nm_sample_ct * 0x8000LLU;\n  if (nm_sample_ct < 131072) {\n    const uint64_t alt1_dosage_sq = alt1_dosage * alt1_dosage;\n    const uint64_t observed_variance_times_2n = hap_alt1_ssq_x2 * nm_sample_ct - alt1_dosage * alt1_dosage;\n    const uint64_t expected_variance_times_2n = nm_sample_ct_x32768 * alt1_dosage - alt1_dosage_sq;\n    return S_CAST(double, observed_variance_times_2n) / S_CAST(double, expected_variance_times_2n);\n  }\n  // Need to avoid catastrophic cancellation here.\n  const double alt1_dosaged = u63tod(alt1_dosage);\n  const double expected_variance_times_2n = alt1_dosaged * u63tod(nm_sample_ct_x32768 - alt1_dosage);\n  const uint64_t hap_alt1_ssq_x2_hi = hap_alt1_ssq_x2 >> 32;\n  uint64_t left_lo = (hap_alt1_ssq_x2 & 0xffffffffLLU) * nm_sample_ct;\n  const uint64_t left_hi = (left_lo >> 32) + hap_alt1_ssq_x2_hi * nm_sample_ct;\n  left_lo &= 0xffffffffU;\n  const uint64_t alt1_dosage_lo = alt1_dosage & 0xffffffffLLU;\n  const uint64_t alt1_dosage_hi = alt1_dosage >> 32;\n  uint64_t right_lo = alt1_dosage_lo * alt1_dosage_lo;\n  const uint64_t right_hi = (right_lo >> 32) + (alt1_dosage_lo + alt1_dosage) * alt1_dosage_hi;\n  right_lo &= 0xffffffffU;\n  const double observed_variance_times_2n_hi = u63tod(left_hi - right_hi);\n  const int64_t observed_variance_times_2n_lo = S_CAST(int64_t, left_lo) - S_CAST(int64_t, right_lo);\n  const double observed_variance_times_2n = (observed_variance_times_2n_hi * 4294967296.0) + observed_variance_times_2n_lo;\n  return observed_variance_times_2n / expected_variance_times_2n;\n}\n\n// These two functions do not overread, but may write extra bytes up to the\n// word boundary.\n// They are likely to be moved to plink2_bits.\n// bugfix (30 Oct 2023): given how we use this function, we have to drop the\n// vector-alignment requirement for dst.\nvoid Expand2bitTo8(const void* __restrict bytearr, uint32_t input_nyp_ct, uint32_t incr, void* __restrict dst) {\n  // Tried adding incr == 0 fast path, negligible performance difference in\n  // benchmark.\n  // GenoarrLookup256x1bx4 takes ~3-4x as long.\n  uint32_t input_byte_ct = DivUp(input_nyp_ct, 4);\n#ifdef USE_SSE2\n  const unsigned char* src_iter = S_CAST(const unsigned char*, bytearr);\n  const uint32_t input_vec_ct = input_byte_ct / kBytesPerVec;\n  uintptr_t* dst_iter = S_CAST(uintptr_t*, dst);\n  if (input_vec_ct) {\n    const VecW mincr = VecUcToW(vecuc_set1(incr));\n    const VecW m03 = VCONST_W(kMask0303);\n    for (uint32_t vec_idx = 0; vec_idx != input_vec_ct; ++vec_idx) {\n      VecW cur_vec = vecw_loadu(src_iter);\n      src_iter = &(src_iter[kBytesPerVec]);\n#  ifdef USE_AVX2\n      // (todo: benchmark against just reading 8 bytes at a time and\n      // broadcasting.)\n      // midswapped_vec contains {0-1-2-3, 4-5-6-7, ..., 12-13-14-15,\n      //                          32-33-34-35, ..., 44-45-46-47,\n      //                          16-17-18-19, ..., 28-29-30-31,\n      //                          48-49-50-51, ..., 60-61-62-63,\n      //                          64-65-66-67, ..., 76-77-78-79,\n      //                          96-97-98-99, ..., 108-109-110-111,\n      //                          80-81-82-83, ..., 92-93-94-95,\n      //                          112-113-114-115, ..., 124-125-126-127}\n      // 0xd8: {0, 2, 1, 3}\n      const __m256i midswapped_vec = _mm256_shuffle_epi32(WToVec(cur_vec), 0xd8);\n      // This operation is also used in FillInterleavedMaskVec().\n      // cur_vec now contains {0-1-2-3, 4-5-6-7, 8-9-10-11, 12-13-14-15,\n      //                       32-33-34-35, ..., 44-45-46-47,\n      //                       64-65-66-67, ..., 76-77-78-79,\n      //                       96-97-98-99, ..., 108-109-110-111,\n      //                       16-17-18-19, ..., 28-29-30-31,\n      //                       48-49-50-51, ..., 60-61-62-63,\n      //                       80-81-82-83, ..., 92-93-94-95,\n      //                       112-113-114-115, ..., 124-125-126-127}\n      cur_vec = vecw_permute0xd8_if_avx2(VecToW(midswapped_vec));\n#  endif\n      // AVX2:\n      //   vec_even contains {0-1, 4-5, 8-9, 12-13, 32-33, ..., 44-45,\n      //                      64-65, ..., 76-77, 96-97, ..., 108-109,\n      //                      16-17, ..., 28-29, 48-49, ..., 60-61,\n      //                      80-81, ..., 92-93, 112-113, ..., 124-125}\n      //   vec_odd contains {2-3, 6-7, 10-11, 14-15, 34-35, ..., 46-47,\n      //                     66-67, ..., 78-79, 98-99, ..., 110-111,\n      //                     18-19, ..., 30-31, 50-51, ..., 62-63,\n      //                     82-83, ..., 94-95, 114-115, ..., 126-127}\n      // SSE2:\n      //   vec_even contains {0-1, 4-5, 8-9, ..., 60-61}\n      //   vec_odd contains {2-3, 6-7, 10-11, ..., 62-63}\n      const VecW vec_even = cur_vec;\n      const VecW vec_odd = vecw_srli(cur_vec, 4);\n\n      // AVX2:\n      //   vec01 contains {0-1, 2-3, 4-5, ..., 14-15, 32-33, ..., 46-47,\n      //                   16-17, ..., 30-31, 48-49, ..., 62-63}\n      //   vec23 contains {64-65, 66-67, ..., 78-79, 96-97, ..., 110-111,\n      //                   80-81, ..., 94-95, 112-113, ..., 126-127}\n      // SSE2:\n      //   vec01 contains {0-1, 2-3, 4-5, 6-7, ..., 30-31}\n      //   vec23 contains {32-33, 34-35, 36-37, 38-39, ..., 62-63}\n      // There's no m4 masking here, so we don't use vecw_lo_and_hi_nybbles.\n      const VecW vec01 = vecw_unpacklo8(vec_even, vec_odd);\n      const VecW vec23 = vecw_unpackhi8(vec_even, vec_odd);\n\n      // AVX2:\n      //   vec01_even contains {0, 2, 4, ..., 14, 32, 34, ..., 46,\n      //                        16, 18, ..., 30, 48, 50, ..., 62}\n      //   vec01_odd contains {1, 3, 5, ..., 15, 33, 35, ..., 47,\n      //                       17, 19, ..., 31, 49, 51, ..., 63}\n      // SSE2:\n      //   vec01_even contains {0, 2, 4, 6, ..., 30}\n      //   vec01_odd contains {1, 3, 5, 7, ..., 31}\n      const VecW vec01_even = vec01 & m03;\n      const VecW vec01_odd = vecw_srli(vec01, 2) & m03;\n\n      // AVX2:\n      //   vecw_unpacklo8() contains {0, 1, ..., 15, 16, ..., 31}\n      //   vecw_unpachhi8() contains {32, 33, ..., 47, 48, ..., 63}\n      // SSE2:\n      //   vecw_unpacklo8() contains {0, 1, ..., 15}\n      //   vecw_unpachhi8() contains {16, 17, ..., 31}\n      vecw_storeu(dst_iter, mincr + vecw_unpacklo8(vec01_even, vec01_odd));\n      dst_iter = &(dst_iter[kWordsPerVec]);\n      vecw_storeu(dst_iter, mincr + vecw_unpackhi8(vec01_even, vec01_odd));\n      dst_iter = &(dst_iter[kWordsPerVec]);\n      const VecW vec23_odd = vecw_srli(vec23, 2) & m03;\n      const VecW vec23_even = vec23 & m03;\n      vecw_storeu(dst_iter, mincr + vecw_unpacklo8(vec23_even, vec23_odd));\n      dst_iter = &(dst_iter[kWordsPerVec]);\n      vecw_storeu(dst_iter, mincr + vecw_unpackhi8(vec23_even, vec23_odd));\n      dst_iter = &(dst_iter[kWordsPerVec]);\n    }\n  }\n  input_byte_ct = input_byte_ct % kBytesPerVec;\n  if (!input_byte_ct) {\n    return;\n  }\n  const unsigned char* src_uc = src_iter;\n  uintptr_t* dstw = dst_iter;\n#else  // !USE_SSE2\n  const unsigned char* src_uc = S_CAST(const unsigned char*, bytearr);\n  uintptr_t* dstw = S_CAST(uintptr_t*, dst);\n#endif\n  const uint32_t full_qw_ct = input_byte_ct / sizeof(Quarterword);\n  const uintptr_t incr_word = kMask0101 * incr;\n  for (uint32_t uii = 0; uii != full_qw_ct; ++uii) {\n    Quarterword cur_qw;\n    CopyFromUnalignedOffsetQW(&cur_qw, src_uc, uii);\n    dstw[uii] = incr_word + Unpack0303(cur_qw);\n  }\n#ifdef __LP64__\n  if (input_byte_ct % 2) {\n    uintptr_t cur_byte = src_uc[input_byte_ct - 1];\n#  ifdef USE_AVX2\n    cur_byte = _pdep_u64(cur_byte, kMask0303);\n#  else\n    cur_byte = cur_byte | (cur_byte << 12);\n    cur_byte = (cur_byte | (cur_byte << 6)) & kMask0303;\n#  endif\n    dstw[full_qw_ct] = incr_word + cur_byte;\n  }\n#endif\n}\n\nvoid Expand4bitTo8(const void* __restrict bytearr, uint32_t input_nybble_ct, uint32_t incr, void* __restrict dst) {\n  // Tried adding incr == 0 fast path, negligible performance difference in\n  // benchmark.\n#ifdef USE_SSE2\n  uint32_t input_byte_ct = DivUp(input_nybble_ct, 2);\n  const unsigned char* src_iter = S_CAST(const unsigned char*, bytearr);\n  const uint32_t input_vec_ct = input_byte_ct / kBytesPerVec;\n  uintptr_t* dst_iter = S_CAST(uintptr_t*, dst);\n  if (input_vec_ct) {\n    const VecW mincr = VecUcToW(vecuc_set1(incr));\n    const VecW m4 = VCONST_W(kMask0F0F);\n    for (uint32_t vec_idx = 0; vec_idx != input_vec_ct; ++vec_idx) {\n      const VecW cur_vec = vecw_loadu(src_iter);\n      src_iter = &(src_iter[kBytesPerVec]);\n      VecW vec_lo;\n      VecW vec_hi;\n      vecw_lo_and_hi_nybbles(cur_vec, m4, &vec_lo, &vec_hi);\n      vecw_storeu(dst_iter, mincr + vec_lo);\n      dst_iter = &(dst_iter[kWordsPerVec]);\n      vecw_storeu(dst_iter, mincr + vec_hi);\n      dst_iter = &(dst_iter[kWordsPerVec]);\n    }\n  }\n  input_byte_ct = input_byte_ct % kBytesPerVec;\n  if (!input_byte_ct) {\n    return;\n  }\n  const unsigned char* src_uc = src_iter;\n  uintptr_t* dstw = dst_iter;\n#else\n  if (!input_nybble_ct) {\n    return;\n  }\n  const unsigned char* src_uc = S_CAST(const unsigned char*, bytearr);\n  uintptr_t* dstw = S_CAST(uintptr_t*, dst);\n  const uint32_t input_byte_ct = DivUp(input_nybble_ct, 2);\n#endif\n  const uint32_t hw_ct_m1 = (input_byte_ct - 1) / sizeof(Halfword);\n  const uintptr_t incr_word = kMask0101 * incr;\n  for (uint32_t hwidx = 0; ; ++hwidx) {\n    Halfword cur_hw;\n    if (hwidx >= hw_ct_m1) {\n      if (hwidx > hw_ct_m1) {\n        break;\n      }\n      cur_hw = SubHWLoad(&(src_uc[hwidx * sizeof(Halfword)]), ModNz(input_byte_ct, sizeof(Halfword)));\n    } else {\n      CopyFromUnalignedOffsetHW(&cur_hw, src_uc, hwidx);\n    }\n    dstw[hwidx] = incr_word + Unpack0F0F(cur_hw);\n  }\n}\n\nvoid PreinitPgfi(PgenFileInfo* pgfip) {\n  pgfip->shared_ff = nullptr;\n  pgfip->pgi_ff = nullptr;\n  pgfip->block_base = nullptr;\n  // we want this for proper handling of e.g. sites-only VCFs\n  pgfip->nonref_flags = nullptr;\n}\n\nuintptr_t CountPgfiAllocCachelinesRequired(uint32_t raw_variant_ct) {\n  // assumes variable-width variant records, otherwise pgfi.vrtypes and\n  // pgfi.vr_fpos can just be nullptr.\n\n  // vrtypes: 1 byte per entry, (raw_variant_ct + 1) entries\n  uintptr_t cachelines_required = 1 + (raw_variant_ct / kCacheline);\n\n  // var_fpos: 8 bytes per entry, (raw_variant_ct + 1) entries\n  cachelines_required += 1 + (raw_variant_ct / kInt64PerCacheline);\n  return cachelines_required;\n}\n\nuintptr_t CountPgrAllocCachelinesRequired(uint32_t raw_sample_ct, PgenGlobalFlags gflags, uint32_t max_allele_ct, uint32_t fread_buf_byte_ct) {\n  // ldbase_raw_genovec: always needed, 2 bits per entry, up to raw_sample_ct\n  // entries\n  const uint32_t genovec_cacheline_req = NypCtToCachelineCt(raw_sample_ct);\n  const uint32_t bitvec_cacheline_req = BitCtToCachelineCt(raw_sample_ct);\n  uintptr_t cachelines_required = genovec_cacheline_req;\n  // fread_buf.  DivUp() won't overflow since fread_buf_byte_ct requirement\n  // can't exceed kPglMaxBytesPerVariant, which is sufficiently far from 2^32.\n  cachelines_required += DivUp(fread_buf_byte_ct, kCacheline);\n\n  const uint32_t ld_compression_present = (gflags / kfPgenGlobalLdCompressionPresent) & 1;\n  const uint32_t max_difflist_entry_ct_base = (raw_sample_ct / kPglMaxDifflistLenDivisor);\n  if ((gflags & kfPgenGlobalDifflistOrLdPresent) || (max_allele_ct > 2)) {\n    // workspace_difflist_sample_ids\n    // bugfix: must add 1 since several routines add a terminator element\n    cachelines_required += 1 + (max_difflist_entry_ct_base / kInt32PerCacheline);\n  }\n  if (gflags & kfPgenGlobalDifflistOrLdPresent) {\n    // const uint32_t max_difflist_entry_ct = max_difflist_entry_ct_base * (1 + ld_compression_present);\n    // workspace_raregeno_vec\n    cachelines_required += NypCtToCachelineCt(max_difflist_entry_ct_base);\n\n    // workspace_raregeno_tmp_loadbuf\n    cachelines_required += NypCtToCachelineCt(max_difflist_entry_ct_base);\n\n    if (ld_compression_present) {\n      // ldbase_genovec\n      cachelines_required += genovec_cacheline_req;\n\n      // ldbase_raregeno\n      cachelines_required += NypCtToCachelineCt(max_difflist_entry_ct_base);\n\n      // ldbase_difflist_sample_ids\n      cachelines_required += 1 + (max_difflist_entry_ct_base / kInt32PerCacheline);\n    }\n  }\n  const PgenGlobalFlags gflags_hphase_dosage = gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePresent);\n  if ((max_allele_ct > 2) || gflags_hphase_dosage) {\n    cachelines_required += genovec_cacheline_req;  // workspace_vec\n    if (max_allele_ct > 2) {\n      // workspace_aux1x_present\n      cachelines_required += bitvec_cacheline_req;\n      // workspace_imp_r2\n      cachelines_required += Int64CtToCachelineCt(2 * max_allele_ct);\n    }\n    if (gflags & kfPgenGlobalHardcallPhasePresent) {\n      // workspace_all_hets, workspace_subset\n      cachelines_required += bitvec_cacheline_req * 2;\n    }\n    if (gflags & kfPgenGlobalDosagePresent) {\n      // aux track #3: usually bitarray tracking which samples have dosage info\n      // (may be stored on disk as a dosage list)\n      cachelines_required += bitvec_cacheline_req;\n      if (gflags & kfPgenGlobalDosagePhasePresent) {\n        // aux track #7: bitarray tracking which dosage entries are phased\n        cachelines_required += bitvec_cacheline_req;\n\n        // phased aux tracks #4,8: 2 bytes per sample\n        // There may be overflow risk here in the future.\n        // (commented out since caller always provides this buffer for now)\n        // cachelines_required += DivUp(2 * k1LU * raw_sample_ct, kCacheline);\n      }\n      // unphased aux track #4: 2 bytes per sample\n      // cachelines_required += DivUp(2 * k1LU * raw_sample_ct, kCacheline);\n\n      // may need deltalist64 workspace in multiallelic dosage case\n    }\n  }\n  return cachelines_required;\n}\n\nstatic_assert(kPglMaxAlleleCt == 255, \"Need to update PgfiInitPhase1().\");\nPglErr PgfiInitPhase1(const char* fname, const char* pgi_fname, uint32_t raw_variant_ct, uint32_t raw_sample_ct, PgenHeaderCtrl* header_ctrl_ptr, PgenFileInfo* pgfip, uintptr_t* pgfi_alloc_cacheline_ct_ptr, char* errstr_buf) {\n  pgfip->var_fpos = nullptr;\n  pgfip->vrtypes = nullptr;\n  pgfip->allele_idx_offsets = nullptr;\n  pgfip->nonref_flags = nullptr;\n\n  // Caller is currently expected to reset max_allele_ct if allele_idx_offsets\n  // is preloaded... need to fix this interface.\n  pgfip->max_allele_ct = 2;\n  // pgfip->max_dosage_allele_ct = 0;\n  pgfip->extensions_present = 0;\n\n  pgfip->block_base = nullptr;\n  // this should force overflow when value is uninitialized.\n  pgfip->block_offset = 1LLU << 63;\n\n  uint64_t fsize;\n  const unsigned char* fread_ptr;\n  unsigned char small_readbuf[3];\n  FILE* shared_ff = fopen(fname, FOPEN_RB);\n  pgfip->shared_ff = shared_ff;\n  if (unlikely(!shared_ff)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Failed to open %s : %s.\\n\", fname, strerror(errno));\n    return kPglRetOpenFail;\n  }\n  if (unlikely(fseeko(shared_ff, 0, SEEK_END))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s read failure: %s.\\n\", fname, strerror(errno));\n    return kPglRetReadFail;\n  }\n  fsize = ftello(shared_ff);\n  if (unlikely(fsize < 4)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s is too small to be a valid .pgen file.\\n\", fname);\n    return kPglRetMalformedInput;\n  }\n  rewind(shared_ff);\n  if (unlikely(!fread_unlocked(small_readbuf, 3, 1, shared_ff))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s read failure: %s.\\n\", fname, strerror(errno));\n    return kPglRetReadFail;\n  }\n  fread_ptr = small_readbuf;\n  // deliberate underflow\n  if (unlikely(((raw_variant_ct - 1) > (kPglMaxVariantCt - 1)) && (raw_variant_ct != UINT32_MAX))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid raw_variant_ct function parameter.\\n\");\n    return kPglRetImproperFunctionCall;\n  }\n  if (unlikely(((raw_sample_ct - 1) > (kPglMaxSampleCt - 1)) && (raw_sample_ct != UINT32_MAX))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid raw_sample_ct function parameter.\\n\");\n    return kPglRetImproperFunctionCall;\n  }\n  if (unlikely(!memequal_k(fread_ptr, \"l\\x1b\", 2))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s is not a .pgen file (first two bytes don't match the magic number).\\n\", fname);\n    return kPglRetMalformedInput;\n  }\n  const uint32_t file_type_code = fread_ptr[2];\n  *header_ctrl_ptr = 0;\n  if (file_type_code < 2) {\n    // plink 1 binary\n    if (unlikely(!file_type_code)) {\n      // sample-major.  validate file size here so we don't have to recheck it\n      if ((raw_sample_ct != UINT32_MAX) && (raw_variant_ct != UINT32_MAX)) {\n        const uint64_t fsize_expected = 3 + S_CAST(uint64_t, raw_sample_ct) * NypCtToByteCt(raw_variant_ct);\n        if (fsize != fsize_expected) {\n          char* write_iter = strcpya_k(errstr_buf, \"Error: Unexpected PLINK 1 sample-major .bed file size (\");\n          write_iter = i64toa(fsize_expected, write_iter);\n          strcpy_k(write_iter, \" bytes expected).\\n\");\n          return kPglRetMalformedInput;\n        }\n      }\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: pgenlib does not directly support sample-major PLINK 1 .bed files.\\n(However, PLINK 2 automatically transposes and compresses them for you.)\\n\");\n      return kPglRetSampleMajorBed;\n    }\n    if (unlikely(raw_sample_ct == UINT32_MAX)) {\n      // either .fam must be loaded first, or user must provide sample count\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: PgfiInitPhase1() must be called with an accurate raw_sample_ct value, since %s is a PLINK 1 .bed file.\\n\", fname);\n      return kPglRetImproperFunctionCall;\n    }\n    const uint32_t const_vrec_width = NypCtToByteCt(raw_sample_ct);\n    if (raw_variant_ct == UINT32_MAX) {\n      // allow raw_variant_ct to be inferred\n      uint64_t quotient = (fsize - 3) / const_vrec_width;\n      if (unlikely((quotient > kPglMaxSampleCt) || (quotient * const_vrec_width + 3 != fsize))) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Unexpected PLINK 1 .bed file size (since raw_sample_ct was %u, [file size - 3] should be divisible by %u and the quotient should be smaller than 2^31 - 1).\\n\", raw_sample_ct, const_vrec_width);\n        return kPglRetMalformedInput;\n      }\n      raw_variant_ct = quotient;\n    } else {\n      if (unlikely(S_CAST(uint64_t, raw_variant_ct) * const_vrec_width + 3 != fsize)) {\n        char* write_iter = strcpya_k(errstr_buf, \"Error: Unexpected PLINK 1 .bed file size (expected \");\n        write_iter = i64toa(S_CAST(uint64_t, raw_variant_ct) * const_vrec_width + 3, write_iter);\n        strcpy_k(write_iter, \" bytes).\\n\");\n        return kPglRetMalformedInput;\n      }\n    }\n    pgfip->raw_variant_ct = raw_variant_ct;\n    pgfip->raw_sample_ct = raw_sample_ct;\n    pgfip->const_fpos_offset = 3;\n\n    pgfip->const_vrtype = kPglVrtypePlink1;\n    pgfip->const_vrec_width = const_vrec_width;\n    pgfip->gflags = kfPgenGlobalAllNonref;\n    *pgfi_alloc_cacheline_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n\n  FILE* header_ff = shared_ff;\n  const char* header_fname = fname;\n\n  // Must declare here, rather than inside else{} block, since the buffer would\n  // otherwise be implicitly popped off the stack as soon as the else-block is\n  // exited, but we may still try to print this filename in an error message.\n  // This isn't Golang.\n  char pgi_fname_buf[kPglFnamesize];\n\n  if ((file_type_code & 0xfe) != 0x20) {\n    if (unlikely((file_type_code & 0xfe) == 0x30)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s is a .pgen.pgi index file, rather than a .pgen file.\\n\", fname);\n      return kPglRetMalformedInput;\n    }\n    if (unlikely(fsize < 12)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s is too small to be a valid .pgen file.\\n\", fname);\n      return kPglRetMalformedInput;\n    }\n  } else {\n    header_fname = pgi_fname;\n    if (!header_fname) {\n      const uint32_t fname_slen = strlen(fname);\n      // need space to append \".pgi\" plus null-terminator\n      if (unlikely(fname_slen > kPglFnamesize - 5)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: .pgen filename too long.\\n\");\n        return kPglRetMalformedInput;\n      }\n      char* fname_iter = memcpya(pgi_fname_buf, fname, fname_slen);\n      strcpy_k(fname_iter, \".pgi\");\n      header_fname = pgi_fname_buf;\n    }\n    header_ff = fopen(header_fname, FOPEN_RB);\n    pgfip->pgi_ff = header_ff;\n    if (unlikely(!header_ff)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Failed to open %s : %s.\\n\", header_fname, strerror(errno));\n      return kPglRetOpenFail;\n    }\n    if (unlikely(!fread_unlocked(small_readbuf, 3, 1, header_ff))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s read failure: %s.\\n\", header_fname, strerror(errno));\n      return kPglRetReadFail;\n    }\n    if (unlikely((!memequal_k(small_readbuf, \"l\\x1b\", 2)) || ((small_readbuf[2] & 0xfe) != 0x30))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s is not a .pgen.pgi file (first three bytes don't match the magic number).\\n\", header_fname);\n      return kPglRetMalformedInput;\n    }\n  }\n  if (unlikely((!fread_unlocked(&(pgfip->raw_variant_ct), sizeof(int32_t), 1, header_ff)) ||\n               (!fread_unlocked(&(pgfip->raw_sample_ct), sizeof(int32_t), 1, header_ff)) ||\n               (!fread_unlocked(header_ctrl_ptr, 1, 1, header_ff)))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: %s read failure: %s.\\n\", header_fname, strerror(errno));\n    return kPglRetReadFail;\n  }\n  const PgenHeaderCtrl header_ctrl = *header_ctrl_ptr;\n  if (raw_variant_ct == UINT32_MAX) {\n    raw_variant_ct = pgfip->raw_variant_ct;\n    // deliberate underflow\n    if (unlikely((raw_variant_ct - 1) > (kPglMaxVariantCt - 1))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid variant count in .pgen%s file.\\n\", pgfip->pgi_ff? \".pgi\" : \"\");\n      return kPglRetMalformedInput;\n    }\n  } else if (unlikely(raw_variant_ct != pgfip->raw_variant_ct)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: PgfiInitPhase1() was called with raw_variant_ct == %u, but %s contains %u variant%s.\\n\", raw_variant_ct, fname, pgfip->raw_variant_ct, (pgfip->raw_variant_ct == 1)? \"\" : \"s\");\n    return kPglRetInconsistentInput;\n  }\n  if (raw_sample_ct == UINT32_MAX) {\n    raw_sample_ct = pgfip->raw_sample_ct;\n    // deliberate underflow\n    if (unlikely((raw_sample_ct - 1) > (kPglMaxSampleCt - 1))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid sample count in .pgen%s file.\\n\", pgfip->pgi_ff? \".pgi\" : \"\");\n      return kPglRetMalformedInput;\n    }\n  } else if (unlikely(raw_sample_ct != pgfip->raw_sample_ct)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: PgfiInitPhase1() was called with raw_sample_ct == %u, but %s contains %u sample%s.\\n\", raw_sample_ct, fname, pgfip->raw_sample_ct, (pgfip->raw_sample_ct == 1)? \"\" : \"s\");\n    return kPglRetInconsistentInput;\n  }\n  pgfip->gflags = kfPgenGlobal0;\n\n  // explicit storage of \"is this reference allele untrusted?\"\n  // need caller to allocate this\n  uint32_t nonref_flags_storage = header_ctrl >> 6;\n  if (nonref_flags_storage == 2) {\n    pgfip->gflags |= kfPgenGlobalAllNonref;\n  }\n\n  if (file_type_code < 16) {\n    // plink 2 binary, single constant-width vrtype\n    pgfip->const_fpos_offset = 12;\n    if (nonref_flags_storage == 3) {\n      pgfip->const_fpos_offset += DivUp(raw_variant_ct, CHAR_BIT);\n    }\n    if (unlikely(file_type_code > 4)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Third byte of %s does not correspond to a storage mode supported by this version of pgenlib.\\n\", fname);\n      return kPglRetNotYetSupported;\n    }\n    if (unlikely(header_ctrl & 63)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Third byte of %s corresponds to a fixed-width storage mode, but twelfth byte is only consistent with a variable-width mode.\\n\", fname);\n      return kPglRetMalformedInput;\n    }\n    uint32_t vrtype = 0;\n    uintptr_t const_vrec_width = NypCtToByteCt(raw_sample_ct);\n    if (file_type_code == 3) {\n      vrtype = 0x40;\n      const_vrec_width += raw_sample_ct * 2;\n      pgfip->gflags |= kfPgenGlobalDosagePresent;\n    } else if (file_type_code == 4) {\n      vrtype = 0xc0;\n      const_vrec_width += raw_sample_ct * 4;\n      pgfip->gflags |= kfPgenGlobalDosagePresent | kfPgenGlobalDosagePhasePresent;\n    }\n    if (unlikely(S_CAST(uint64_t, raw_variant_ct) * const_vrec_width + pgfip->const_fpos_offset != fsize)) {\n      char* write_iter = strcpya_k(errstr_buf, \"Error: Unexpected .pgen file size (expected \");\n      write_iter = i64toa(S_CAST(uint64_t, raw_variant_ct) * const_vrec_width + pgfip->const_fpos_offset, write_iter);\n      strcpy_k(write_iter, \" bytes).\\n\");\n      return kPglRetMalformedInput;\n    }\n    pgfip->const_vrtype = vrtype;\n    pgfip->const_vrec_width = const_vrec_width;\n    *pgfi_alloc_cacheline_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n  if (unlikely((file_type_code >= 0x12) && ((file_type_code & 0xfe) != 0x20))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Third byte of %s does not correspond to a storage mode supported by this version of pgenlib.\\n\", fname);\n    return kPglRetNotYetSupported;\n  }\n  // plink 2 binary, general-purpose\n  pgfip->extensions_present = file_type_code & 1;\n  pgfip->const_fpos_offset = 0;\n  pgfip->const_vrtype = UINT32_MAX;\n  pgfip->const_vrec_width = 0;\n  const uintptr_t alt_allele_ct_byte_ct = (header_ctrl >> 4) & 3;\n  if (unlikely(alt_allele_ct_byte_ct > 1)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: This version of pgenlib does not support >254 alternate alleles for a single variant.\\n\");\n    return kPglRetNotYetSupported;\n  }\n\n  if (header_ctrl & 8) {\n    // Special header_ctrl modes:\n    //   8: 1 bit per fused vrtype-length.  Unset = vrtype 5, set = vrtype 0.\n    //   9: 2 bits, multiallelic.  0 = vrtype 5, 1 = vrtype 0, 2-3 = vrtype\n    //      8 with that many more bytes than vrtype 0.  Note that this is\n    //      limited to 16 ALT alleles.\n    //   10: 2 bits, phased.  0 = vrtype 5, 1 = vrtype 0, 2-3 = vrtype 16\n    //       with that many minus 1 bytes beyond vrtype 0.  While this is also\n    //       aimed at the single-sample use case, it technically supports up to\n    //       15 always-phased or 7 partially-phased samples.\n    //   11: 4 bits, multiallelic + phased.  0 = vrtype 5, 1 = vrtype 0,\n    //       2-7 = vrtype 8 with that many bytes beyond vrtype 0, 9 = vrtype 16\n    //       phase info requiring just 1 byte, 10-15 = vrtype 24 with (x-7)\n    //       extra bytes required between multiallelic and phased tracks.\n    //   12: 2 bits, dosage, must be single-sample.  0 = vrtype 5,\n    //       1 = vrtype 0, 2 = vrtype 0x45 with 2 bytes, 3 = vrtype 0x40 with 3\n    //       total bytes.\n    //   13: reserved for single-sample multiallelic + dosage.\n    //   14: 4 bits, phased + dosage, must be single-sample.  0 and 1 as usual,\n    //       3 = vrtype 16 with 1 phaseinfo byte, 4 = vrtype 0x45 with 2 bytes,\n    //       5 = vrtype 0x40 with 3 total bytes, 12 = vrtype 0xc5 with 4 total\n    //       bytes, 13 = vrtype 0xc0 with 5 total bytes, 15 = vrtype 0xe0 with\n    //       6 total bytes\n    //   15: reserved for single-sample multiallelic + phased dosage.\n    const uint32_t header_ctrl_low3 = header_ctrl & 7;\n    // this can be a table lookup once 13/15 are implemented\n    if (unlikely((header_ctrl_low3 == 5) || (header_ctrl_low3 == 7))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Twelfth byte of %s does not correspond to a format supported by this version of pgenlib.\\n\", header_fname);\n      return kPglRetNotYetSupported;\n    }\n  }\n  *pgfi_alloc_cacheline_ct_ptr = CountPgfiAllocCachelinesRequired(raw_variant_ct);\n  return kPglRetSuccess;\n}\n\nvoid FillPgenHeaderReadErrstrFromNzErrno(uint32_t is_pgi, char* errstr_buf) {\n  snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: .pgen%s read failure: %s.\\n\", is_pgi? \".pgi\" : \"\", strerror(errno));\n}\n\nvoid FillPgenReadErrstrFromNzErrno(char* errstr_buf) {\n  return FillPgenHeaderReadErrstrFromNzErrno(0, errstr_buf);\n}\n\nvoid FillPgenHeaderReadErrstrFromErrno(uint32_t is_pgi, char* errstr_buf) {\n  if (errno) {\n    FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n  } else {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: .pgen%s read failure: File appears to be corrupted.\\n\", is_pgi? \".pgi\" : \"\");\n  }\n}\n\nvoid FillPgenReadErrstrFromErrno(char* errstr_buf) {\n  return FillPgenHeaderReadErrstrFromErrno(0, errstr_buf);\n}\n\nvoid FillPgenHeaderReadErrstr(FILE* ff, uint32_t is_pgi, char* errstr_buf) {\n  if (feof_unlocked(ff)) {\n    errno = 0;\n  }\n  FillPgenHeaderReadErrstrFromErrno(is_pgi, errstr_buf);\n}\n\nvoid FillPgenReadErrstr(FILE* ff, char* errstr_buf) {\n  FillPgenHeaderReadErrstr(ff, 0, errstr_buf);\n}\n\n// Assumes ff points to first byte of appropriate extension-set varint.\n// For each exts_iter entry corresponding to a present extension, size is\n// initialized to 0-based relative sequence number of extension.  size is\n// initialized to ~0LLU for each absent extension.\n// If preprocessing header_exts, footer_fpos_ptr must be nullptr, and ff points\n// to first byte of footer-extension-set varint on successful exit.  If\n// preprocessing footer_exts, footer_fpos_ptr must be non-null, will be filled\n// if footer exists, and ff will be advanced past that on successful exit.\nPglErr PgfiInitPhase2PreprocessExts(uint32_t is_pgi, FILE* ff, PgenExtensionLl* exts_iter, uint64_t* footer_fpos_ptr, char* errstr_buf) {\n  uint32_t cur_type_idx = exts_iter? exts_iter->type_idx : UINT32_MAX;\n  uint32_t type_idx_start = 0;\n  uint32_t prev_ct = 0;\n  while (1) {\n    const uint32_t type_idx_stop = type_idx_start + 7;\n    const int32_t ii = getc_unlocked(ff);\n    if (unlikely(ii == EOF)) {\n      if (ferror_unlocked(ff)) {\n        FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n      return kPglRetMalformedInput;\n    }\n    const uint32_t cur_byte = ii;\n    while (cur_type_idx < type_idx_stop) {\n      const uint32_t shifted_bit = 1 << (cur_type_idx - type_idx_start);\n      if (cur_byte & shifted_bit) {\n        exts_iter->size = prev_ct + PopcountByte(cur_byte & (shifted_bit - 1));\n      } else {\n        exts_iter->size = ~0LLU;\n      }\n      exts_iter = exts_iter->next;\n      const uint32_t next_type_idx = exts_iter? exts_iter->type_idx : UINT32_MAX;\n      if (unlikely(next_type_idx <= cur_type_idx)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: PgfiInitPhase2Ex() extension linked-lists must be ordered by increasing type_idx.\\n\");\n        return kPglRetImproperFunctionCall;\n      }\n      cur_type_idx = next_type_idx;\n    }\n    if (unlikely((type_idx_start == 252) && (cur_byte & 0xf0))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n      return kPglRetMalformedInput;\n    }\n    if (!(cur_byte & 128)) {\n      if (footer_fpos_ptr) {\n        if (cur_byte || prev_ct) {\n          if (unlikely(!fread_unlocked(footer_fpos_ptr, sizeof(int64_t), 1, ff))) {\n            FillPgenHeaderReadErrstr(ff, is_pgi, errstr_buf);\n            return kPglRetReadFail;\n          }\n        }\n      }\n      break;\n    }\n    type_idx_start = type_idx_stop;\n    prev_ct += PopcountByte(cur_byte) - 1;\n  }\n  if (exts_iter) {\n    while (1) {\n      exts_iter->size = ~0LLU;\n      exts_iter = exts_iter->next;\n      if (!exts_iter) {\n        break;\n      }\n      const uint32_t next_type_idx = exts_iter->type_idx;\n      if (next_type_idx <= cur_type_idx) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: PgfiInitPhase2Ex() extension linked-lists must be ordered by increasing type_idx.\\n\");\n        return kPglRetImproperFunctionCall;\n      }\n      cur_type_idx = next_type_idx;\n    }\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgfiInitPhase2FillExtSizes(uint32_t is_pgi, FILE* ff, PgenExtensionLl* exts_iter, char* errstr_buf) {\n  uint32_t next_seq_idx = 0;\n  for (; exts_iter; exts_iter = exts_iter->next) {\n    if (exts_iter->size == ~0LLU) {\n      continue;\n    }\n    const uint32_t cur_seq_idx = exts_iter->size;\n    for (; next_seq_idx < cur_seq_idx; ++next_seq_idx) {\n      if (unlikely(FSkipVint(ff))) {\n        goto PgfiInitPhase2FillExtSizes_error_or_eof;\n      }\n    }\n    const uint64_t cur_size = FGetVint63(ff);\n    if (unlikely(cur_size == (1LLU << 63))) {\n      goto PgfiInitPhase2FillExtSizes_error_or_eof;\n    }\n    exts_iter->size = cur_size;\n    next_seq_idx = cur_seq_idx + 1;\n  }\n  return kPglRetSuccess;\n PgfiInitPhase2FillExtSizes_error_or_eof:\n  if (ferror_unlocked(ff)) {\n    FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n    return kPglRetReadFail;\n  }\n  snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n  return kPglRetMalformedInput;\n}\n\nstatic_assert(kPglMaxAlleleCt == 255, \"Need to update PgfiInitPhase2Ex().\");\nPglErr PgfiInitPhase2Ex(PgenHeaderCtrl header_ctrl, uint32_t allele_cts_already_loaded, uint32_t nonref_flags_already_loaded, uint32_t use_blockload, uint32_t vblock_idx_start, uint32_t vidx_end, uint32_t* max_vrec_width_ptr, PgenFileInfo* pgfip, unsigned char* pgfi_alloc, PgenExtensionLl* header_exts, PgenExtensionLl* footer_exts, uintptr_t* pgr_alloc_cacheline_ct_ptr, char* errstr_buf) {\n  // *max_vrec_width_ptr technically only needs to be set in single-variant\n  // fread() mode, but its computation is not currently optimized out in the\n  // other two modes.\n\n  // possible todo: add option to skip validation when allele_cts/nonref_flags\n  // are already loaded.  but let's play it safe for now.\n  const uint32_t raw_variant_ct = pgfip->raw_variant_ct;\n  const uint32_t const_vrec_width = pgfip->const_vrec_width;\n  *pgr_alloc_cacheline_ct_ptr = 0;\n\n  // Note that this is a rather hefty stack allocation.\n  unsigned char loadbuf[kPglVblockSize * 4];\n\n  uintptr_t* allele_idx_offsets_iter = pgfip->allele_idx_offsets;\n  uintptr_t prev_allele_idx_offset = 0;\n  if (allele_idx_offsets_iter) {\n    if (!allele_cts_already_loaded) {\n      *allele_idx_offsets_iter = 0;\n    } else {\n      prev_allele_idx_offset = *allele_idx_offsets_iter;\n    }\n    ++allele_idx_offsets_iter;\n  }\n  if (!raw_variant_ct) {\n    return kPglRetSuccess;\n  }\n  const uint32_t nonref_flags_stored = ((header_ctrl >> 6) == 3);\n  unsigned char* nonref_flags_iter = DowncastToUc(pgfip->nonref_flags);\n  const unsigned char* fread_ptr = nullptr;  // maybe-uninitialized warning\n  FILE* header_ff = pgfip->pgi_ff;\n  const uint32_t is_pgi = (header_ff != nullptr);\n  if (!is_pgi) {\n    header_ff = pgfip->shared_ff;\n    assert(header_ff);\n  }\n  if (const_vrec_width) {\n    assert(!is_pgi);\n    // no allele counts to verify if fixed-width\n    // always need ldbase_raw_genovec\n    *pgr_alloc_cacheline_ct_ptr = NypCtToCachelineCt(pgfip->raw_sample_ct);\n    *max_vrec_width_ptr = const_vrec_width;\n    if (!use_blockload) {\n      // using fread() single-variant-at-a-time, need pgr.fread_buf\n      *pgr_alloc_cacheline_ct_ptr += DivUp(const_vrec_width, kCacheline);\n    }\n    if ((!(header_ctrl & 192)) || (pgfip->const_vrtype == kPglVrtypePlink1)) {\n      return kPglRetSuccess;\n    }\n    if ((header_ctrl >> 6) == 1) {\n      // all ref\n      if (nonref_flags_already_loaded) {\n        if (unlikely(!AllWordsAreZero(pgfip->nonref_flags, BitCtToWordCt(raw_variant_ct)))) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Loaded nonref_flags do not match values in .pgen file.\\n\");\n          return kPglRetInconsistentInput;\n        }\n      }\n      return kPglRetSuccess;\n    }\n    if ((header_ctrl >> 6) == 2) {\n      // all nonref\n      if (nonref_flags_already_loaded) {\n        if (unlikely(!AllBitsAreOne(pgfip->nonref_flags, raw_variant_ct))) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Loaded nonref_flags do not match values in .pgen file.\\n\");\n          return kPglRetInconsistentInput;\n        }\n      }\n      return kPglRetSuccess;\n    }\n    // _last more useful than _end iff we just refer to the number of elements\n    // in the block and have no use for a _stop pointer\n    unsigned char* nonref_flags_last = &(nonref_flags_iter[((raw_variant_ct - 1) / (kPglVblockSize * 32)) * (kPglVblockSize * 4)]);\n    uint32_t cur_byte_ct = kPglVblockSize * 4;\n    for (; ; nonref_flags_iter = &(nonref_flags_iter[cur_byte_ct])) {\n      if (nonref_flags_iter >= nonref_flags_last) {\n        if (nonref_flags_iter > nonref_flags_last) {\n          break;\n        }\n        cur_byte_ct = 1 + ((raw_variant_ct - 1) % (kPglVblockSize * 32)) / CHAR_BIT;\n      }\n      unsigned char* loadptr = nonref_flags_already_loaded? loadbuf : nonref_flags_iter;\n      if (unlikely(!fread_unlocked(loadptr, cur_byte_ct, 1, header_ff))) {\n        FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      if (nonref_flags_already_loaded) {\n        if (unlikely(!memequal(nonref_flags_iter, loadbuf, cur_byte_ct))) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Loaded nonref_flags do not match values in .pgen file.\\n\");\n          return kPglRetInconsistentInput;\n        }\n      }\n    }\n    return kPglRetSuccess;\n  }\n\n  const uint32_t raw_sample_ct = pgfip->raw_sample_ct;\n  unsigned char* vrtypes_iter = pgfi_alloc;\n  pgfip->vrtypes = vrtypes_iter;\n  uint64_t* var_fpos_iter = R_CAST(uint64_t*, &(vrtypes_iter[RoundUpPow2(raw_variant_ct + 1, kCacheline)]));\n  pgfip->var_fpos = var_fpos_iter;\n  uint32_t vblock_ct_m1 = (raw_variant_ct - 1) / kPglVblockSize;\n  uint32_t max_vrec_width = 0;\n  uint64_t variant_fpos;\n  if (vblock_idx_start) {\n    if (unlikely(fseeko(header_ff, vblock_idx_start * sizeof(int64_t), SEEK_CUR))) {\n      FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  if (unlikely(!fread_unlocked(&variant_fpos, sizeof(int64_t), 1, header_ff))) {\n    FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n    return kPglRetReadFail;\n  }\n  // May also need to load the rest of these values in the future, if we want\n  // to support dynamic insertion into a memory-mapped file.  But skip them\n  // for now.\n  if (unlikely(fseeko(header_ff, (vblock_ct_m1 - vblock_idx_start) * sizeof(int64_t), SEEK_CUR))) {\n    FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n    return kPglRetReadFail;\n  }\n  const uint32_t vrtype_and_fpos_storage = header_ctrl & 15;\n  const uint32_t alt_allele_ct_byte_ct = (header_ctrl >> 4) & 3;\n  if (alt_allele_ct_byte_ct) {\n    assert(alt_allele_ct_byte_ct == 1);\n    if (unlikely(!allele_idx_offsets_iter)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: pgfip->allele_idx_offsets must be allocated before PgfiInitPhase2Ex() is called.\\n\");\n      return kPglRetImproperFunctionCall;\n    }\n  }\n  uint32_t vblock_idx = vblock_idx_start;\n  vblock_ct_m1 = (vidx_end - 1) / kPglVblockSize;\n  if (vblock_idx) {\n    uintptr_t header_vblock_byte_ct = kPglVblockSize * alt_allele_ct_byte_ct;\n    if (nonref_flags_stored) {\n      header_vblock_byte_ct += kPglVblockSize / CHAR_BIT;\n    }\n    if (vrtype_and_fpos_storage & 8) {\n      header_vblock_byte_ct += kPglVblockSize >> (10 - vrtype_and_fpos_storage);\n    } else {\n      if (!(vrtype_and_fpos_storage & 4)) {\n        header_vblock_byte_ct += kPglVblockSize / 2;\n      } else {\n        header_vblock_byte_ct += kPglVblockSize;\n      }\n      header_vblock_byte_ct += kPglVblockSize * (1 + (vrtype_and_fpos_storage & 3));\n    }\n    if (unlikely(fseeko(header_ff, header_vblock_byte_ct * S_CAST(uint64_t, vblock_idx), SEEK_CUR))) {\n      FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  uint32_t cur_vblock_variant_ct = kPglVblockSize;\n  uint32_t max_allele_ct = pgfip->max_allele_ct;\n  for (; ; ++vblock_idx) {\n    if (vblock_idx >= vblock_ct_m1) {\n      if (vblock_idx > vblock_ct_m1) {\n        break;\n      }\n      cur_vblock_variant_ct = ModNz(vidx_end, kPglVblockSize);\n    }\n    // 1. handle vrtypes and var_fpos.\n    if (vrtype_and_fpos_storage >= 8) {\n      // Special encodings.\n      uint32_t log2_entry_bit_width = 1;\n      unsigned char vrtype_table[16];\n      uint32_t vrec_len_table[16];\n      vrtype_table[0] = 5;\n      vrtype_table[1] = 0;\n      vrec_len_table[0] = 0;\n      const uint32_t raw_sample_ct4 = NypCtToByteCt(raw_sample_ct);\n      vrec_len_table[1] = raw_sample_ct4;\n      if (vrtype_and_fpos_storage == 8) {\n        log2_entry_bit_width = 0;\n      } else if (vrtype_and_fpos_storage == 9) {\n        vrtype_table[2] = 8;\n        vrtype_table[3] = 8;\n        vrec_len_table[2] = raw_sample_ct4 + 2;\n        vrec_len_table[3] = raw_sample_ct4 + 3;\n      } else if (vrtype_and_fpos_storage == 10) {\n        vrtype_table[2] = 16;\n        vrtype_table[3] = 16;\n        vrec_len_table[2] = raw_sample_ct4 + 1;\n        vrec_len_table[3] = raw_sample_ct4 + 2;\n      } else if (vrtype_and_fpos_storage == 11) {\n        log2_entry_bit_width = 2;\n        vrtype_table[2] = 8;\n        vrtype_table[3] = 8;\n        vrtype_table[4] = 8;\n        vrtype_table[5] = 8;\n        vrtype_table[6] = 8;\n        vrtype_table[7] = 8;\n        // 8 invalid\n        vrtype_table[9] = 16;\n        vrtype_table[10] = 24;\n        vrtype_table[11] = 24;\n        vrtype_table[12] = 24;\n        vrtype_table[13] = 24;\n        vrtype_table[14] = 24;\n        vrtype_table[15] = 24;\n        vrec_len_table[9] = raw_sample_ct4 + 1;\n        for (uint32_t uii = 2; uii < 8; ++uii) {\n          vrec_len_table[uii] = raw_sample_ct4 + uii;\n          vrec_len_table[uii + 8] = raw_sample_ct4 + 1 + uii;\n        }\n      } else if (vrtype_and_fpos_storage == 12) {\n        assert(raw_sample_ct == 1);\n        vrtype_table[2] = 0x45;\n        vrtype_table[3] = 0x40;\n        vrec_len_table[2] = 2;\n        vrec_len_table[3] = 3;\n      } else {\n        // 14 is only remaining possibility for now\n        assert(raw_sample_ct == 1);\n        log2_entry_bit_width = 2;\n        vrtype_table[3] = 0x10;\n        vrtype_table[4] = 0x45;\n        vrtype_table[5] = 0x40;\n        vrtype_table[12] = 0xc5;\n        vrtype_table[13] = 0xc0;\n        vrtype_table[15] = 0xe0;\n        vrec_len_table[3] = 2;\n        vrec_len_table[4] = 2;\n        vrec_len_table[5] = 3;\n        vrec_len_table[12] = 4;\n        vrec_len_table[13] = 5;\n        vrec_len_table[15] = 6;\n      }\n      const uint32_t entry_bit_width = 1 << log2_entry_bit_width;\n      const uint32_t entry_mask = (1 << entry_bit_width) - 1;\n      const uint32_t cur_byte_ct = 1 + ((cur_vblock_variant_ct - 1) >> (3 - log2_entry_bit_width));\n      const unsigned char* loadbuf_biter;\n      if (unlikely(!fread_unlocked(loadbuf, cur_byte_ct, 1, header_ff))) {\n        FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      loadbuf_biter = loadbuf;\n      const uint32_t log2_entries_per_word = kBitsPerWordLog2 - log2_entry_bit_width;\n      const uint32_t block_len = 1 << log2_entries_per_word;\n      uint32_t cur_vblock_idx = 0;\n      uint32_t cur_vblock_idx_stop = block_len;\n      for (; ; cur_vblock_idx_stop += block_len) {\n        if (cur_vblock_idx_stop > cur_vblock_variant_ct) {\n          if (cur_vblock_idx == cur_vblock_variant_ct) {\n            break;\n          }\n          cur_vblock_idx_stop = cur_vblock_variant_ct;\n        }\n        uintptr_t input_word;\n        CopyFromUnalignedIncrW(&input_word, &loadbuf_biter);\n        for (; cur_vblock_idx != cur_vblock_idx_stop; ++cur_vblock_idx) {\n          const uint32_t input_word_masked = input_word & entry_mask;\n          *vrtypes_iter++ = vrtype_table[input_word_masked];\n          *var_fpos_iter++ = variant_fpos;\n          variant_fpos += vrec_len_table[input_word_masked];\n          input_word >>= entry_bit_width;\n        }\n      }\n    } else {\n      if (vrtype_and_fpos_storage < 4) {\n        // no phase or dosage present, 4-bit vrtypes\n        const uint32_t cur_byte_ct = DivUp(cur_vblock_variant_ct, 2);\n        if (unlikely(!fread_unlocked(loadbuf, cur_byte_ct, 1, header_ff))) {\n          FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n          return kPglRetReadFail;\n        }\n        fread_ptr = loadbuf;\n        Expand4bitTo8(fread_ptr, cur_vblock_variant_ct, 0, vrtypes_iter);\n        vrtypes_iter = &(vrtypes_iter[cur_vblock_variant_ct]);\n      } else {\n        // phase and dosage\n        if (unlikely(!fread_unlocked(vrtypes_iter, cur_vblock_variant_ct, 1, header_ff))) {\n          FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n          return kPglRetReadFail;\n        }\n        vrtypes_iter = &(vrtypes_iter[cur_vblock_variant_ct]);\n      }\n      const uint32_t vrec_len_byte_ct = 1 + (vrtype_and_fpos_storage & 3);\n      const uint32_t cur_byte_ct = cur_vblock_variant_ct * vrec_len_byte_ct;\n      if (unlikely(!fread_unlocked(loadbuf, cur_byte_ct, 1, header_ff))) {\n        FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      fread_ptr = loadbuf;\n      if (vrec_len_byte_ct == 1) {\n        for (uint32_t cur_vblock_vidx = 0; cur_vblock_vidx != cur_vblock_variant_ct; ++cur_vblock_vidx) {\n          var_fpos_iter[cur_vblock_vidx] = variant_fpos;\n          uint32_t cur_vrec_len = fread_ptr[cur_vblock_vidx];\n          variant_fpos += cur_vrec_len;\n          // no need for correct max_vrec_width\n        }\n      } else if (vrec_len_byte_ct == 2) {\n        for (uint32_t cur_vblock_vidx = 0; cur_vblock_vidx != cur_vblock_variant_ct; ++cur_vblock_vidx) {\n          var_fpos_iter[cur_vblock_vidx] = variant_fpos;\n          uint16_t cur_vrec_len;\n          CopyFromUnalignedU16(&cur_vrec_len, &(fread_ptr[cur_vblock_vidx * 2]));\n          variant_fpos += cur_vrec_len;\n          if (cur_vrec_len > max_vrec_width) {\n            // todo: check whether we're better off just assuming 2^16 - 1\n            max_vrec_width = cur_vrec_len;\n          }\n        }\n      } else if (vrec_len_byte_ct == 3) {\n        for (uint32_t cur_vblock_vidx = 0; cur_vblock_vidx != cur_vblock_variant_ct; ++cur_vblock_vidx) {\n          var_fpos_iter[cur_vblock_vidx] = variant_fpos;\n          uint32_t cur_vrec_len;\n          // safe to read a byte past the end, since that's in loadbuf\n          CopyFromUnalignedU32(&cur_vrec_len, &(fread_ptr[cur_vblock_vidx * 3]));\n          cur_vrec_len &= 0xffffff;\n          variant_fpos += cur_vrec_len;\n          if (cur_vrec_len > max_vrec_width) {\n            max_vrec_width = cur_vrec_len;\n          }\n        }\n      } else {\n        for (uint32_t cur_vblock_vidx = 0; cur_vblock_vidx != cur_vblock_variant_ct; ++cur_vblock_vidx) {\n          var_fpos_iter[cur_vblock_vidx] = variant_fpos;\n          uint32_t cur_vrec_len;\n          CopyFromUnalignedU32(&cur_vrec_len, &(fread_ptr[cur_vblock_vidx * 4]));\n          variant_fpos += cur_vrec_len;\n          if (cur_vrec_len > max_vrec_width) {\n            max_vrec_width = cur_vrec_len;\n          }\n        }\n#ifdef __LP64__\n        if (unlikely(max_vrec_width > kPglMaxBytesPerVariant)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n          return kPglRetMalformedInput;\n        }\n#else\n        if (unlikely(max_vrec_width > kMaxBytesPerIO)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Variant records too large for 32-bit pgenlib.\\n\");\n          return kPglRetNomem;\n        }\n#endif\n      }\n      var_fpos_iter = &(var_fpos_iter[cur_vblock_variant_ct]);\n    }\n    // 2. allele counts?\n    if (alt_allele_ct_byte_ct) {\n      assert(alt_allele_ct_byte_ct == 1);\n      if (unlikely(!fread_unlocked(loadbuf, cur_vblock_variant_ct * alt_allele_ct_byte_ct, 1, header_ff))) {\n        FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      fread_ptr = loadbuf;\n      // max_allele_ct scan can probably be sped up with _mm{256}_max_epu8()?\n      // probably can't do much for main loop (at least in sizeof(AlleleCode)\n      // == 1 case)\n      if (allele_cts_already_loaded) {\n        // todo: update this for multibyte AlleleCode\n        for (uint32_t cur_vblock_vidx = 0; cur_vblock_vidx != cur_vblock_variant_ct; ++cur_vblock_vidx) {\n          const uintptr_t cur_allele_idx_offset = allele_idx_offsets_iter[cur_vblock_vidx];\n          const uint32_t cur_allele_ct = fread_ptr[cur_vblock_vidx];\n          if (unlikely((cur_allele_idx_offset - prev_allele_idx_offset) != cur_allele_ct)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Loaded allele_idx_offsets do not match values in .pgen%s file.\\n\", is_pgi? \".pgi\" : \"\");\n            return kPglRetInconsistentInput;\n          }\n          prev_allele_idx_offset = cur_allele_idx_offset;\n          if (cur_allele_ct > max_allele_ct) {\n            max_allele_ct = cur_allele_ct;\n          }\n        }\n      } else {\n        for (uint32_t cur_vblock_vidx = 0; cur_vblock_vidx != cur_vblock_variant_ct; ++cur_vblock_vidx) {\n          const uint32_t cur_allele_ct = fread_ptr[cur_vblock_vidx];\n          allele_idx_offsets_iter[cur_vblock_vidx] = prev_allele_idx_offset;\n          prev_allele_idx_offset += cur_allele_ct;\n          if (cur_allele_ct > max_allele_ct) {\n            max_allele_ct = cur_allele_ct;\n          }\n        }\n      }\n      allele_idx_offsets_iter = &(allele_idx_offsets_iter[cur_vblock_variant_ct]);\n    }\n    // 3. nonref flags?\n    if (nonref_flags_stored) {\n      const uint32_t cur_byte_ct = DivUp(cur_vblock_variant_ct, CHAR_BIT);\n      unsigned char* loadptr = nonref_flags_already_loaded? loadbuf : nonref_flags_iter;\n      if (unlikely(!fread_unlocked(loadptr, cur_byte_ct, 1, header_ff))) {\n        FillPgenHeaderReadErrstr(header_ff, is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      if (nonref_flags_already_loaded) {\n        if (unlikely(!memequal(nonref_flags_iter, loadbuf, cur_byte_ct))) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Loaded nonref_flags do not match values in .pgen%s file.\\n\", is_pgi? \".pgi\" : \"\");\n          return kPglRetInconsistentInput;\n        }\n      }\n      nonref_flags_iter = &(nonref_flags_iter[cur_byte_ct]);\n    }\n  }\n\n  const uint32_t last_word_byte_ct = cur_vblock_variant_ct % kBytesPerWord;\n  if (last_word_byte_ct) {\n    ProperSubwordStore(0, kBytesPerWord - last_word_byte_ct, vrtypes_iter);\n  } else {\n    // must guarantee a trailing zero for is_ldbase check to work\n    vrtypes_iter[0] = 0;\n  }\n\n  uint64_t footer_extensions_fpos = 0;\n  if (pgfip->extensions_present && (header_exts || footer_exts)) {\n    if (vidx_end < raw_variant_ct) {\n      const uint32_t vrec_len_byte_ct = 1 + (vrtype_and_fpos_storage & 3);\n      const uint32_t phase_or_dosage_present = (vrtype_and_fpos_storage >= 4);\n      const uint64_t ext_fpos = PglHeaderBaseEndOffset(raw_variant_ct, vrec_len_byte_ct, phase_or_dosage_present, nonref_flags_stored);\n      if (unlikely(fseeko(header_ff, ext_fpos, SEEK_SET))) {\n        FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n    }\n    PglErr reterr = PgfiInitPhase2PreprocessExts(is_pgi, header_ff, header_exts, nullptr, errstr_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = PgfiInitPhase2PreprocessExts(is_pgi, header_ff, footer_exts, &footer_extensions_fpos, errstr_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = PgfiInitPhase2FillExtSizes(is_pgi, header_ff, header_exts, errstr_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n\n  if (is_pgi && (!header_exts) && (!footer_exts)) {\n    if (unlikely(fclose_null(&pgfip->pgi_ff))) {\n      FillPgenHeaderReadErrstrFromNzErrno(1, errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n\n  const uint64_t actual_fpos = ftello(pgfip->shared_ff);\n  if (actual_fpos != pgfip->var_fpos[0]) {\n    // now > instead of != to allow additional information to be stored between\n    // header and first variant record (e.g. mode 0x11).\n    if (unlikely(actual_fpos > pgfip->var_fpos[0])) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n      return kPglRetMalformedInput;\n    }\n    if (unlikely(fseeko(pgfip->shared_ff, pgfip->var_fpos[0], SEEK_SET))) {\n      FillPgenReadErrstrFromNzErrno(errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  if (footer_extensions_fpos && footer_exts) {\n    if (unlikely(fseeko(pgfip->shared_ff, footer_extensions_fpos, SEEK_SET))) {\n      FillPgenReadErrstrFromNzErrno(errstr_buf);\n      return kPglRetReadFail;\n    }\n    PglErr reterr = PgfiInitPhase2FillExtSizes(0, pgfip->shared_ff, footer_exts, errstr_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (unlikely(fseeko(pgfip->shared_ff, pgfip->var_fpos[0], SEEK_SET))) {\n      FillPgenReadErrstrFromNzErrno(errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  pgfip->var_fpos[vidx_end] = variant_fpos;\n  pgfip->max_allele_ct = max_allele_ct;\n  // if difflist/LD might be present, scan for them in a way that's likely to\n  // terminate quickly\n  PgenGlobalFlags new_gflags = kfPgenGlobal0;\n  if (vrtype_and_fpos_storage != 8) {\n    const uint32_t trailing_byte_ct = vidx_end & (kBytesPerVec - 1);\n    if (trailing_byte_ct) {\n      memset(&(pgfip->vrtypes[vidx_end]), 0, kBytesPerVec - trailing_byte_ct);\n    }\n    const VecW* vrtypes_alias_start = R_CAST(VecW*, pgfip->vrtypes);\n    const VecW* vrtypes_alias_end = &(vrtypes_alias_start[DivUp(vidx_end, kBytesPerVec)]);\n    if (vblock_idx_start) {\n      vrtypes_alias_start = &(vrtypes_alias_start[vblock_idx_start * (kPglVblockSize / kBytesPerVec)]);\n    }\n    const VecW* vrtypes_alias_iter = vrtypes_alias_start;\n    if (vrtype_and_fpos_storage < 8) {\n      for (; vrtypes_alias_iter != vrtypes_alias_end; ++vrtypes_alias_iter) {\n        const VecW cur_vvec = *vrtypes_alias_iter;\n#ifdef __LP64__\n        const VecW cur_vvec_bit2 = vecw_slli(cur_vvec, 5);\n        const VecW cur_vvec_bit1 = vecw_slli(cur_vvec, 6);\n        // check if any vrtype has bit 1 set and bit 2 clear\n        if (vecw_movemask(vecw_and_notfirst(cur_vvec_bit2, cur_vvec_bit1))) {\n          new_gflags |= kfPgenGlobalLdCompressionPresent | kfPgenGlobalDifflistOrLdPresent;\n          break;\n        }\n        const VecW cur_vvec_bit0 = vecw_slli(cur_vvec, 7);\n        if (vecw_movemask(cur_vvec_bit0 | cur_vvec_bit2)) {\n          // this catches onebit\n          new_gflags |= kfPgenGlobalDifflistOrLdPresent;\n        }\n#else\n        const uintptr_t cur_vvec_shifted = cur_vvec >> 1;\n        // check if any vrtype has bit 1 set and bit 2 clear\n        if (vecw_and_notfirst(cur_vvec_shifted, cur_vvec) & (2 * kMask0101)) {\n          new_gflags |= kfPgenGlobalLdCompressionPresent | kfPgenGlobalDifflistOrLdPresent;\n          break;\n        }\n        if (cur_vvec & (5 * kMask0101)) {\n          // this catches onebit\n          new_gflags |= kfPgenGlobalDifflistOrLdPresent;\n        }\n#endif\n      }\n    }\n    if (vrtype_and_fpos_storage >= 4) {\n      // Likely for one of {hphase, dosage} to be present without the\n      // other; make this scan faster in that case, at the cost of\n      // failing to early-exit when both are present.\n      // This is also suboptimal for the vrtype_and_fpos_storage > 8\n      // special encodings.\n      VecW or_vvec = vecw_setzero();\n      for (vrtypes_alias_iter = vrtypes_alias_start; vrtypes_alias_iter != vrtypes_alias_end; ++vrtypes_alias_iter) {\n        or_vvec |= *vrtypes_alias_iter;\n      }\n#ifdef __LP64__\n      const VecW or_vvec_bit3 = vecw_slli(or_vvec, 4);\n      if (vecw_movemask(or_vvec_bit3)) {\n        // note that, if no phase or dosage data is present, we don't\n        // look for multiallelic hardcalls.\n        new_gflags |= kfPgenGlobalMultiallelicHardcallFound;\n      }\n      const VecW or_vvec_bit4 = vecw_slli(or_vvec, 3);\n      if (vecw_movemask(or_vvec_bit4)) {\n        new_gflags |= kfPgenGlobalHardcallPhasePresent;\n      }\n      const VecW or_vvec_bit5 = vecw_slli(or_vvec, 2);\n      const VecW or_vvec_bit6 = vecw_slli(or_vvec, 1);\n      if (vecw_movemask(or_vvec_bit5 | or_vvec_bit6)) {\n        new_gflags |= kfPgenGlobalDosagePresent;\n        if (vecw_movemask(or_vvec)) {\n          new_gflags |= kfPgenGlobalDosagePhasePresent;\n        }\n      }\n#else\n      if (or_vvec & (8 * kMask0101)) {\n        new_gflags |= kfPgenGlobalMultiallelicHardcallFound;\n      }\n      if (or_vvec & (0x10 * kMask0101)) {\n        new_gflags |= kfPgenGlobalHardcallPhasePresent;\n      }\n      if (or_vvec & (0x60 * kMask0101)) {\n        new_gflags |= kfPgenGlobalDosagePresent;\n        if (or_vvec & (0x80 * kMask0101)) {\n          new_gflags |= kfPgenGlobalDosagePhasePresent;\n        }\n      }\n#endif\n    }\n    if (vrtype_and_fpos_storage > 8) {\n      if (vrtype_and_fpos_storage == 12) {\n        max_vrec_width = 3;\n      } else if (vrtype_and_fpos_storage == 14) {\n        max_vrec_width = 6;\n      } else {\n        max_vrec_width = NypCtToByteCt(raw_sample_ct);\n        if (vrtype_and_fpos_storage == 9) {\n          max_vrec_width += 3;\n        } else if (vrtype_and_fpos_storage == 10) {\n          max_vrec_width += 2;\n        } else {\n          // 11\n          max_vrec_width += 8;\n        }\n        // 13 and 15 not specified yet\n      }\n    } else if (!(vrtype_and_fpos_storage & 3)) {\n      // 1 byte per vrec_len entry, don't bother to determine true\n      // maximum\n      max_vrec_width = 255;\n    }\n    pgfip->gflags |= new_gflags;\n  } else {\n    // vrtype_and_fpos_storage == 8.\n    max_vrec_width = NypCtToByteCt(raw_sample_ct);\n  }\n  *pgr_alloc_cacheline_ct_ptr = CountPgrAllocCachelinesRequired(raw_sample_ct, new_gflags, max_allele_ct, use_blockload? 0 : max_vrec_width);\n  *max_vrec_width_ptr = max_vrec_width;\n  return kPglRetSuccess;\n}\n\nPglErr PgfiInitReloadExtSet(uint32_t is_pgi, FILE* ff, uintptr_t* ext_bitarr, uint32_t* ext_bitarr_cumulative_popcounts, uint64_t* footer_fpos_ptr, uint32_t* word_ct_ptr, char* errstr_buf) {\n  uintptr_t cur_output_word = 0;\n  uintptr_t nonzero_present = 0;\n  uint32_t write_idx_lowbits = 0;\n  uint32_t widx = 0;\n  ext_bitarr_cumulative_popcounts[0] = 0;\n  while (1) {\n    const int32_t ii = getc_unlocked(ff);\n    if (unlikely(ii == EOF)) {\n      if (ferror_unlocked(ff)) {\n        FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n      return kPglRetMalformedInput;\n    }\n    const uintptr_t cur_masked_bits = ii & 127;\n    cur_output_word |= cur_masked_bits << write_idx_lowbits;\n    uint32_t new_write_idx_lowbits = write_idx_lowbits + 7;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      ext_bitarr[widx] = cur_output_word;\n      nonzero_present |= cur_output_word;\n      ++widx;\n      ext_bitarr_cumulative_popcounts[widx] = PopcountWord(cur_output_word);\n      cur_output_word = cur_masked_bits >> (kBitsPerWord - write_idx_lowbits);\n      new_write_idx_lowbits -= kBitsPerWord;\n    }\n    write_idx_lowbits = new_write_idx_lowbits;\n    if (!(ii & 128)) {\n      break;\n    }\n    if (unlikely(widx == (256 / kBitsPerWord))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n      return kPglRetMalformedInput;\n    }\n  }\n  ext_bitarr[widx] = cur_output_word;\n  nonzero_present |= cur_output_word;\n  *word_ct_ptr = widx + 1;\n  if (footer_fpos_ptr && nonzero_present) {\n    if (unlikely(!fread_unlocked(footer_fpos_ptr, sizeof(int64_t), 1, ff))) {\n      FillPgenHeaderReadErrstr(ff, is_pgi, errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgfiInitFillExts(const uintptr_t* ext_bitarr, const uint32_t* ext_cumulative_popcounts, uint32_t word_ct, uint32_t is_pgi, FILE* ff, PgenExtensionLl* exts, char* errstr_buf) {\n  {\n    const uint32_t ext_ct = ext_cumulative_popcounts[word_ct - 1] + PopcountWord(ext_bitarr[word_ct - 1]);\n    uint64_t sizes[256];\n    for (uint32_t seq_idx = 0; seq_idx != ext_ct; ++seq_idx) {\n      const uint64_t cur_size = FGetVint63(ff);\n      if (unlikely(cur_size == (1LLU << 63))) {\n        goto PgfiInitFillExts_error_or_eof;\n      }\n      sizes[seq_idx] = cur_size;\n    }\n    const uint32_t type_idx_limit = word_ct * kBitsPerWord;\n    uint32_t next_seq_idx = 0;\n    for (PgenExtensionLl* exts_iter = exts; exts_iter; exts_iter = exts_iter->next) {\n      const uint32_t type_idx = exts_iter->type_idx;\n      if (type_idx >= type_idx_limit) {\n        break;\n      }\n      if ((exts_iter->size == ~0LLU) || (!IsSet(ext_bitarr, type_idx))) {\n        continue;\n      }\n      const uint32_t seq_idx = RawToSubsettedPos(ext_bitarr, ext_cumulative_popcounts, type_idx);\n      const uint64_t cur_size = sizes[seq_idx];\n      if (unlikely(cur_size != exts_iter->size)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: PgfiInitLoadExts() extension byte-size mismatch.\\n\");\n        return kPglRetImproperFunctionCall;\n      }\n      if (seq_idx > next_seq_idx) {\n        uint64_t bytes_to_skip = 0;\n        for (uint32_t uii = next_seq_idx; uii != seq_idx; ++uii) {\n          bytes_to_skip += sizes[uii];\n        }\n        if (unlikely(fseeko(ff, bytes_to_skip, SEEK_CUR))) {\n          FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n          return kPglRetReadFail;\n        }\n      }\n      if (unlikely(fread_checked(exts_iter->contents, cur_size, ff))) {\n        FillPgenHeaderReadErrstr(ff, is_pgi, errstr_buf);\n        return kPglRetReadFail;\n      }\n      next_seq_idx = seq_idx + 1;\n    }\n    return kPglRetSuccess;\n  }\n PgfiInitFillExts_error_or_eof:\n  if (ferror_unlocked(ff)) {\n    FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n    return kPglRetReadFail;\n  }\n  snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid .pgen%s.\\n\", is_pgi? \".pgi file\" : \" header\");\n  return kPglRetMalformedInput;\n}\n\nPglErr PgfiInitLoadExts(PgenHeaderCtrl header_ctrl, PgenFileInfo* pgfip, PgenExtensionLl* header_exts, PgenExtensionLl* footer_exts, char* errstr_buf) {\n  const uint64_t starting_fpos = ftello(pgfip->shared_ff);\n  FILE* header_ff = pgfip->pgi_ff;\n  const uint32_t is_pgi = (header_ff != nullptr);\n  if (!is_pgi) {\n    header_ff = pgfip->shared_ff;\n    assert(header_ff);\n  }\n  {\n    const uint32_t vrtype_and_fpos_storage = header_ctrl & 15;\n    const uint32_t vrec_len_byte_ct = 1 + (vrtype_and_fpos_storage & 3);\n    const uint32_t phase_or_dosage_present = (vrtype_and_fpos_storage >= 4);\n    const uint32_t nonref_flags_stored = ((header_ctrl >> 6) == 3);\n    const uint64_t ext_fpos = PglHeaderBaseEndOffset(pgfip->raw_variant_ct, vrec_len_byte_ct, phase_or_dosage_present, nonref_flags_stored);\n    if (unlikely(fseeko(header_ff, ext_fpos, SEEK_SET))) {\n      FillPgenHeaderReadErrstrFromNzErrno(is_pgi, errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  uintptr_t header_bitarr[(256 / kBitsPerWord) + 1];\n  uint32_t header_bitarr_cumulative_popcounts[(256 / kBitsPerWord) + 1];\n  uint32_t header_ext_word_ct;\n  PglErr reterr = PgfiInitReloadExtSet(is_pgi, header_ff, header_bitarr, header_bitarr_cumulative_popcounts, nullptr, &header_ext_word_ct, errstr_buf);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  uintptr_t footer_bitarr[(256 / kBitsPerWord) + 1];\n  uint32_t footer_bitarr_cumulative_popcounts[(256 / kBitsPerWord) + 1];\n  uint32_t footer_ext_word_ct;\n  uint64_t footer_extensions_fpos;\n  reterr = PgfiInitReloadExtSet(is_pgi, header_ff, footer_bitarr, footer_bitarr_cumulative_popcounts, &footer_extensions_fpos, &footer_ext_word_ct, errstr_buf);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  if (header_exts) {\n    reterr = PgfiInitFillExts(header_bitarr, header_bitarr_cumulative_popcounts, header_ext_word_ct, is_pgi, header_ff, header_exts, errstr_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  if (is_pgi) {\n    if (unlikely(fclose_null(&pgfip->pgi_ff))) {\n      FillPgenHeaderReadErrstrFromNzErrno(1, errstr_buf);\n      return kPglRetReadFail;\n    }\n  }\n  if (footer_exts) {\n    if (unlikely(fseeko(pgfip->shared_ff, footer_extensions_fpos, SEEK_SET))) {\n      FillPgenReadErrstrFromNzErrno(errstr_buf);\n      return kPglRetReadFail;\n    }\n    reterr = PgfiInitFillExts(footer_bitarr, footer_bitarr_cumulative_popcounts, footer_ext_word_ct, 0, pgfip->shared_ff, footer_exts, errstr_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  if (unlikely(fseeko(pgfip->shared_ff, starting_fpos, SEEK_SET))) {\n    FillPgenReadErrstrFromNzErrno(errstr_buf);\n    return kPglRetReadFail;\n  }\n  return kPglRetSuccess;\n}\n\nuint32_t GetLdbaseVidx(const unsigned char* vrtypes, uint32_t cur_vidx) {\n#ifdef __LP64__\n  const VecW* vrtypes_valias = R_CAST(const VecW*, vrtypes);\n  const uint32_t cur_vidx_orig_remainder = cur_vidx % kBytesPerVec;\n  uint32_t vidx_vec_idx = cur_vidx / kBytesPerVec;\n  Vec8thUint v8ui = 0;\n  if (cur_vidx_orig_remainder) {\n    const VecW cur_vvec = vrtypes_valias[vidx_vec_idx];\n    // non-ld: ((bit 2) OR (NOT bit 1))\n    const VecW cur_vvec_bit2 = vecw_slli(cur_vvec, 5);\n    const VecW inv_cur_vvec_bit1 = ~vecw_slli(cur_vvec, 6);\n    v8ui = vecw_movemask(cur_vvec_bit2 | inv_cur_vvec_bit1);\n    v8ui = bzhi(v8ui, cur_vidx_orig_remainder);\n  }\n  while (!v8ui) {\n    const VecW cur_vvec = vrtypes_valias[--vidx_vec_idx];\n    const VecW cur_vvec_bit2 = vecw_slli(cur_vvec, 5);\n    const VecW inv_cur_vvec_bit1 = ~vecw_slli(cur_vvec, 6);\n    v8ui = vecw_movemask(cur_vvec_bit2 | inv_cur_vvec_bit1);\n  }\n  return (vidx_vec_idx * kBytesPerVec) + bsru32(v8ui);\n#else\n  const uintptr_t* vrtypes_walias = R_CAST(const uintptr_t*, vrtypes);\n  const uint32_t cur_vidx_orig_remainder = cur_vidx % kBytesPerWord;\n  uint32_t vidx_word_idx = (cur_vidx - 1) / kBytesPerWord;\n  uintptr_t cur_vrtypes_word = vrtypes_walias[vidx_word_idx];\n  if (cur_vidx_orig_remainder) {\n    // make sure we don't detect a byte after the current position.\n    cur_vrtypes_word = bzhi(cur_vrtypes_word, CHAR_BIT * cur_vidx_orig_remainder);\n    cur_vrtypes_word |= (kMask0101 * 2) << (CHAR_BIT * cur_vidx_orig_remainder);\n  }\n  while (1) {\n    // ((bit 2) OR (NOT bit 1)) for each byte.  (possible experiment: see if\n    // the same assembly is generated if this expression is rewritten to use\n    // ands/nots.)\n    const uintptr_t detect_non_ld_word = ((cur_vrtypes_word >> 1) | (~cur_vrtypes_word)) & (kMask0101 * 2);\n    if (detect_non_ld_word) {\n      // find the highest-order set bit in detect_non_ld_word; this corresponds\n      // to the last non-LD-compressed byte (assuming little-endian).\n      const uint32_t new_ldbase_vidx_loworder = bsrw(detect_non_ld_word) / CHAR_BIT;\n      return (vidx_word_idx * kBytesPerWord) + new_ldbase_vidx_loworder;\n    }\n    // everything LD-compressed in the current block.  move back 8 bytes in the\n    // array (or 4-bytes for 32-bit build).\n    cur_vrtypes_word = vrtypes_walias[--vidx_word_idx];\n  }\n#endif\n}\n\nuint64_t GetPgfiLdbaseFpos(const PgenFileInfo* pgfip, uintptr_t vidx) {\n  if (!pgfip->var_fpos) {\n    return pgfip->const_fpos_offset + pgfip->const_vrec_width * S_CAST(uint64_t, vidx);\n  }\n  if (pgfip->vrtypes && ((pgfip->vrtypes[vidx] & 6) == 2)) {\n    vidx = GetLdbaseVidx(pgfip->vrtypes, vidx);\n  }\n  return pgfip->var_fpos[vidx];\n}\n\nuint64_t PgfiMultireadGetCachelineReq(const uintptr_t* variant_include, const PgenFileInfo* pgfip, uint32_t variant_ct, uint32_t block_size) {\n  // if block_size < kPglVblockSize, it's ideal for it to be a power of 2 (to\n  // avoid unnecessary vblock crossing), but that's not required.\n  const uint32_t raw_variant_ct = pgfip->raw_variant_ct;\n  if (variant_ct == raw_variant_ct) {\n    variant_include = nullptr;\n  }\n  uint32_t block_ct_m1 = 0;\n  if (raw_variant_ct < block_size) {\n    block_size = raw_variant_ct;\n  } else {\n    block_ct_m1 = (raw_variant_ct - 1) / block_size;\n  }\n  const uint64_t* var_fpos = pgfip->var_fpos;\n  if ((!variant_include) && (!var_fpos)) {\n    return DivUpU64(S_CAST(uint64_t, pgfip->const_vrec_width) * block_size, kCacheline);\n  }\n  uint64_t max_block_byte_ct = 0;\n  uint32_t max_block_variant_ct = 0;\n  for (uint32_t block_idx = 0; ; ++block_idx) {\n    uint32_t variant_uidx_start = block_idx * block_size;\n    uint32_t variant_uidx_end = variant_uidx_start + block_size;\n    if (block_idx >= block_ct_m1) {\n      if (block_idx > block_ct_m1) {\n        break;\n      }\n      variant_uidx_end = raw_variant_ct;\n    }\n    if (variant_include) {\n      variant_uidx_start = AdvBoundedTo1Bit(variant_include, variant_uidx_start, variant_uidx_end);\n      if (variant_uidx_start == variant_uidx_end) {\n        continue;\n      }\n      variant_uidx_end = 1 + FindLast1BitBefore(variant_include, variant_uidx_end);\n    }\n    if (var_fpos) {\n      if (pgfip->vrtypes && ((pgfip->vrtypes[variant_uidx_start] & 6) == 2)) {\n        // need to start loading from LD-buddy\n        variant_uidx_start = GetLdbaseVidx(pgfip->vrtypes, variant_uidx_start);\n      }\n      uint64_t cur_block_byte_ct = var_fpos[variant_uidx_end] - var_fpos[variant_uidx_start];\n      if (cur_block_byte_ct > max_block_byte_ct) {\n        max_block_byte_ct = cur_block_byte_ct;\n      }\n    } else {\n      // no LD compression here\n      const uint32_t cur_block_variant_ct = variant_uidx_end - variant_uidx_start;\n      if (cur_block_variant_ct > max_block_variant_ct) {\n        max_block_variant_ct = cur_block_variant_ct;\n        if (cur_block_variant_ct == block_size) {\n          // no larger value possible, terminate search\n          break;\n        }\n      }\n    }\n  }\n  if (!var_fpos) {\n    max_block_byte_ct = max_block_variant_ct * S_CAST(uint64_t, pgfip->const_vrec_width);\n  }\n  return DivUpU64(max_block_byte_ct, kCacheline);\n}\n\nPglErr PgfiMultiread(const uintptr_t* variant_include, uint32_t variant_uidx_start, uint32_t variant_uidx_end, uint32_t load_variant_ct, PgenFileInfo* pgfip) {\n  // we could permit 0, but that encourages lots of unnecessary thread wakeups\n  assert(load_variant_ct);\n  if (variant_include) {\n    variant_uidx_start = AdvTo1Bit(variant_include, variant_uidx_start);\n  }\n  assert(variant_uidx_start < pgfip->raw_variant_ct);\n  uint64_t block_offset;\n  if (pgfip->vrtypes && ((pgfip->vrtypes[variant_uidx_start] & 6) == 2)) {\n    // need to start loading from LD-buddy\n    // assume for now that we can't skip any variants between the LD-buddy and\n    // the actual first variant; should remove this assumption later\n    block_offset = pgfip->var_fpos[GetLdbaseVidx(pgfip->vrtypes, variant_uidx_start)];\n  } else {\n    block_offset = GetPgfiFpos(pgfip, variant_uidx_start);\n  }\n  pgfip->block_offset = block_offset;\n  uint64_t next_read_start_fpos = block_offset;\n  // break this up into multiple freads whenever this lets us skip an entire\n  // disk block\n  // (possible todo: make the disk block size a parameter of this function)\n  do {\n    const uint64_t cur_read_start_fpos = next_read_start_fpos;\n    uint32_t cur_read_uidx_end;\n    uint64_t cur_read_end_fpos;\n    while (1) {\n      cur_read_uidx_end = variant_uidx_end;\n      if (cur_read_uidx_end - variant_uidx_start == load_variant_ct) {\n        cur_read_end_fpos = GetPgfiFpos(pgfip, cur_read_uidx_end);\n        load_variant_ct = 0;\n        break;\n      }\n      cur_read_uidx_end = AdvTo0Bit(variant_include, variant_uidx_start);\n      cur_read_end_fpos = GetPgfiFpos(pgfip, cur_read_uidx_end);\n      load_variant_ct -= cur_read_uidx_end - variant_uidx_start;\n      if (!load_variant_ct) {\n        break;\n      }\n      variant_uidx_start = AdvTo1Bit(variant_include, cur_read_uidx_end);\n      next_read_start_fpos = GetPgfiFpos(pgfip, variant_uidx_start);\n      if (pgfip->vrtypes && ((pgfip->vrtypes[variant_uidx_start] & 6) == 2)) {\n        const uint32_t variant_read_uidx_start = GetLdbaseVidx(pgfip->vrtypes, variant_uidx_start);\n        if (variant_read_uidx_start <= cur_read_uidx_end) {\n          continue;\n        }\n        next_read_start_fpos = pgfip->var_fpos[variant_read_uidx_start];\n      }\n      // bugfix: can't use do..while, since previous \"continue\" needs to skip\n      // this check\n      if (RoundDownPow2U64(cur_read_end_fpos + kDiskBlockSize + 1LLU, kDiskBlockSize) < RoundDownPow2U64(next_read_start_fpos, kDiskBlockSize)) {\n        // minor bugfix (7 Jul 2017): break, not continue\n        break;\n      }\n    }\n    if (unlikely(fseeko(pgfip->shared_ff, cur_read_start_fpos, SEEK_SET))) {\n      return kPglRetReadFail;\n    }\n    uintptr_t len = cur_read_end_fpos - cur_read_start_fpos;\n    if (unlikely(fread_checked(K_CAST(unsigned char*, &(pgfip->block_base[cur_read_start_fpos - block_offset])), len, pgfip->shared_ff))) {\n      if (feof_unlocked(pgfip->shared_ff)) {\n        errno = 0;\n      }\n      return kPglRetReadFail;\n    }\n  } while (load_variant_ct);\n  return kPglRetSuccess;\n}\n\n\nvoid PreinitPgr(PgenReader* pgr_ptr) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  pgrp->ff = nullptr;\n}\n\nPglErr PgrInit(const char* fname, uint32_t max_vrec_width, PgenFileInfo* pgfip, PgenReader* pgr_ptr, unsigned char* pgr_alloc) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  // See CountPgrAllocCachelinesRequired().\n  // Could add a debug mode.\n\n  // Mode 1 (block-fread): block_base initialized, shared_ff != nullptr.  fname\n  //   must be nullptr.\n  // Mode 2 (per-variant fread): block_base == nullptr.  fname must be\n  //   non-null, though it isn't actually referenced during the first\n  //   PgenReader initialization (instead shared_ff is moved).\n  unsigned char* pgr_alloc_iter = pgr_alloc;\n  if (pgfip->block_base != nullptr) {\n    if (unlikely(fname != nullptr)) {\n      return kPglRetImproperFunctionCall;\n    }\n    pgrp->ff = nullptr;  // make sure CleanupPgr() doesn't break\n  } else {\n    if (pgfip->shared_ff != nullptr) {\n      if (unlikely(fname == nullptr)) {\n        return kPglRetImproperFunctionCall;\n      }\n      // move instead of close/reopen.\n      pgrp->ff = pgfip->shared_ff;\n      pgfip->shared_ff = nullptr;\n    } else {\n      pgrp->ff = fopen(fname, FOPEN_RB);\n      if (unlikely(!pgrp->ff)) {\n        return kPglRetOpenFail;\n      }\n    }\n    // now that arbitrary info can be stored between header and first variant\n    // record, always seek.\n    uint64_t seek_pos;\n    if (pgfip->var_fpos) {\n      seek_pos = pgfip->var_fpos[0];\n    } else {\n      seek_pos = pgfip->const_fpos_offset;\n    }\n    if (unlikely(fseeko(pgrp->ff, seek_pos, SEEK_SET))) {\n      return kPglRetReadFail;\n    }\n  }\n  pgrp->fi = *pgfip;  // struct copy\n  if (fname) {\n    // Mode 2 per-reader load buffer\n    pgrp->fread_buf = pgr_alloc_iter;\n    pgr_alloc_iter = &(pgr_alloc_iter[RoundUpPow2(max_vrec_width, kCacheline)]);\n  }\n  pgrp->fp_vidx = 0;\n  pgrp->ldbase_vidx = UINT32_MAX;\n  pgrp->ldbase_stypes = kfPgrLdcache0;\n  pgrp->ldbase_genovec = nullptr;\n  pgrp->ldbase_raregeno = nullptr;\n  pgrp->ldbase_difflist_sample_ids = nullptr;\n\n  const PgenGlobalFlags gflags = pgrp->fi.gflags;\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t genovec_bytes_req = NypCtToCachelineCt(raw_sample_ct) * kCacheline;\n  pgrp->ldbase_raw_genovec = S_CAST(uintptr_t*, arena_alloc_raw(genovec_bytes_req, &pgr_alloc_iter));\n  const uint32_t bitvec_bytes_req = BitCtToCachelineCt(raw_sample_ct) * kCacheline;\n  const uint32_t ld_compression_present = (gflags / kfPgenGlobalLdCompressionPresent) & 1;\n  const uint32_t max_difflist_entry_ct_base = (raw_sample_ct / kPglMaxDifflistLenDivisor);\n  const uint32_t max_allele_ct = pgrp->fi.max_allele_ct;\n  pgrp->workspace_difflist_sample_ids = nullptr;\n  if ((gflags & kfPgenGlobalDifflistOrLdPresent) || (max_allele_ct > 2)) {\n    pgrp->workspace_difflist_sample_ids = S_CAST(uint32_t*, arena_alloc_raw_rd((max_difflist_entry_ct_base + 1) * sizeof(int32_t), &pgr_alloc_iter));\n  }\n  if (gflags & kfPgenGlobalDifflistOrLdPresent) {\n    // const uint32_t max_difflist_entry_ct = max_difflist_entry_ct_base * (1 + ld_compression_present);\n\n    const uintptr_t raregeno_bytes_req = NypCtToCachelineCt(max_difflist_entry_ct_base) * kCacheline;\n    pgrp->workspace_raregeno_vec = S_CAST(uintptr_t*, arena_alloc_raw(raregeno_bytes_req, &pgr_alloc_iter));\n    pgrp->workspace_raregeno_tmp_loadbuf = S_CAST(uintptr_t*, arena_alloc_raw(raregeno_bytes_req, &pgr_alloc_iter));\n\n    if (ld_compression_present) {\n      pgrp->ldbase_genovec = S_CAST(uintptr_t*, arena_alloc_raw(genovec_bytes_req, &pgr_alloc_iter));\n\n      pgrp->ldbase_raregeno = S_CAST(uintptr_t*, arena_alloc_raw(raregeno_bytes_req, &pgr_alloc_iter));\n\n      pgrp->ldbase_difflist_sample_ids = S_CAST(uint32_t*, arena_alloc_raw_rd((max_difflist_entry_ct_base + 1) * sizeof(int32_t), &pgr_alloc_iter));\n    }\n  } else {\n    pgrp->workspace_raregeno_vec = nullptr;\n    pgrp->workspace_raregeno_tmp_loadbuf = nullptr;\n  }\n  pgrp->workspace_vec = nullptr;\n  pgrp->workspace_aux1x_present = nullptr;\n  pgrp->workspace_imp_r2 = nullptr;\n  pgrp->workspace_all_hets = nullptr;\n  pgrp->workspace_subset = nullptr;\n  const PgenGlobalFlags gflags_hphase_dosage = gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePresent);\n  if ((max_allele_ct > 2) || gflags_hphase_dosage) {\n    pgrp->workspace_vec = S_CAST(uintptr_t*, arena_alloc_raw(genovec_bytes_req, &pgr_alloc_iter));\n    if (max_allele_ct > 2) {\n      pgrp->workspace_aux1x_present = S_CAST(uintptr_t*, arena_alloc_raw(bitvec_bytes_req, &pgr_alloc_iter));\n      pgrp->workspace_imp_r2 = S_CAST(uint64_t*, arena_alloc_raw_rd(2 * max_allele_ct * sizeof(int64_t), &pgr_alloc_iter));\n    }\n    if (gflags & kfPgenGlobalHardcallPhasePresent) {\n      pgrp->workspace_all_hets = S_CAST(uintptr_t*, arena_alloc_raw(bitvec_bytes_req, &pgr_alloc_iter));\n      pgrp->workspace_subset = S_CAST(uintptr_t*, arena_alloc_raw(bitvec_bytes_req, &pgr_alloc_iter));\n    }\n    pgrp->workspace_dosage_present = nullptr;\n    pgrp->workspace_dphase_present = nullptr;\n    if (gflags & kfPgenGlobalDosagePresent) {\n      pgrp->workspace_dosage_present = S_CAST(uintptr_t*, arena_alloc_raw(bitvec_bytes_req, &pgr_alloc_iter));\n      if (gflags & kfPgenGlobalDosagePhasePresent) {\n        pgrp->workspace_dphase_present = S_CAST(uintptr_t*, arena_alloc_raw(bitvec_bytes_req, &pgr_alloc_iter));\n      }\n    }\n  }\n  return kPglRetSuccess;\n}\n\nvoid PgrPlink1ToPlink2InplaceUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // 00 -> 10, 01 -> 11, 10 -> 01, 11 -> 00\n  // new low bit  = [old low] ^ [old high]\n  // new high bit = ~[old high]\n  // \"unsafe\" because trailing bits are not zeroed out.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW not_m1 = VCONST_W(kMaskAAAA);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; vidx++) {\n    const VecW not_cur_vec_high = vecw_and_notfirst(vptr[vidx], not_m1);\n    vptr[vidx] = (vecw_and_notfirst(vptr[vidx], m1) ^ vecw_srli(not_cur_vec_high, 1)) | not_cur_vec_high;\n  }\n}\n\nvoid PgrPlink2ToPlink1InplaceUnsafe(uint32_t sample_ct, uintptr_t* genovec) {\n  // 00 -> 11, 01 -> 10, 10 -> 00, 11 -> 01\n  // new low bit  = [old low] ^ (~[old high])\n  // new high bit = ~[old high]\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  const VecW not_m1 = VCONST_W(kMaskAAAA);\n  VecW* vptr = R_CAST(VecW*, genovec);\n  for (uint32_t vidx = 0; vidx != vec_ct; vidx++) {\n    VecW cur_vec = vptr[vidx];\n    VecW not_cur_vec_high = vecw_and_notfirst(cur_vec, not_m1);\n    vptr[vidx] = (vecw_and_notfirst(not_m1, cur_vec) ^ vecw_srli(not_cur_vec_high, 1)) | not_cur_vec_high;\n  }\n}\n\nPglErr ParseDifflistHeader(const unsigned char* fread_end, uint32_t raw_sample_ct, const unsigned char** fread_pp, uintptr_t* raregeno_buf, const unsigned char** difflist_group_info_ptr, uint32_t* difflist_len_ptr) {\n  // Can be used for deltalists as well: pass raregeno_buf == nullptr.\n  // Trailing bits of raregeno may not be zeroed out.\n  // Will need a separate 64-bit version of this for multiallelic dosages.\n  const uint32_t difflist_len = GetVint31(fread_end, fread_pp);\n  // moved here to address maybe-uninitialized warnings\n  *difflist_group_info_ptr = *fread_pp;\n  *difflist_len_ptr = difflist_len;\n  if (!difflist_len) {\n    return kPglRetSuccess;\n  }\n  if (unlikely(difflist_len > raw_sample_ct / kPglMaxDifflistLenDivisor)) {\n    // automatically catches GetVint31() failure\n    return kPglRetMalformedInput;\n  }\n  const uint32_t group_ct = DivUp(difflist_len, kPglDifflistGroupSize);\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  const uint32_t difflist_index_byte_ct = group_ct * (sample_id_byte_ct + 1) - 1;\n  if (PtrAddCk(fread_end, difflist_index_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  if (!raregeno_buf) {\n    // for sample ID lists without 2-bit genotype info, used for sparse dosage\n    return kPglRetSuccess;\n  }\n  const uint32_t raregeno_byte_ct = NypCtToByteCt(difflist_len);\n  const unsigned char* raregeno_start = *fread_pp;\n  if (PtrAddCk(fread_end, raregeno_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  // possible todo: just return raregeno_start, and let the caller perform this\n  // copy\n  memcpy(raregeno_buf, raregeno_start, raregeno_byte_ct);\n  return kPglRetSuccess;\n}\n\nPglErr ParseAndSaveDifflist(const unsigned char* fread_end, uint32_t raw_sample_ct, const unsigned char** fread_pp, uintptr_t* __restrict raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr) {\n  // Appropriate when we need to iterate through the difflist multiple times.\n  // Other functions are more efficient if we only need to process the list\n  // once.\n  // Trailing bits of raregeno may not be zeroed out.\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, raregeno, &group_info_iter, difflist_len_ptr);\n  uint32_t difflist_len = *difflist_len_ptr;\n  // todo: check if difflist_len == 0 early exit is a net positive or negative\n  // on a few test datasets\n  if (reterr || (!difflist_len)) {\n    return reterr;\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uint32_t* difflist_sample_ids_iter = difflist_sample_ids;\n  for (uint32_t difflist_remaining = difflist_len; ; ) {\n    const uint32_t* difflist_sample_ids_stop;\n    if (difflist_remaining < kPglDifflistGroupSize) {\n      if (!difflist_remaining) {\n        return kPglRetSuccess;\n      }\n      difflist_sample_ids_stop = &(difflist_sample_ids_iter[difflist_remaining]);\n      difflist_remaining = 0;\n    } else {\n      difflist_sample_ids_stop = &(difflist_sample_ids_iter[kPglDifflistGroupSize]);\n      difflist_remaining -= kPglDifflistGroupSize;\n    }\n    // can't use uint32_t assignment trick for now since there's a corner case\n    // where that would read past the end of the mapped address range\n    uintptr_t raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n    group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    while (1) {\n#ifndef __LP64__\n      // perform more frequent checks in 32-bit build since raw_sample_idx may\n      // overflow\n      // misses \"small negative\" malformed input, but it'll catch data\n      // corruption with very high probability\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      *difflist_sample_ids_iter++ = raw_sample_idx;\n      if (difflist_sample_ids_iter == difflist_sample_ids_stop) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n#ifdef __LP64__\n    if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n      return kPglRetMalformedInput;\n    }\n#endif\n  }\n  return kPglRetSuccess;\n}\n\nPglErr ParseAndSaveDifflistProperSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t raw_sample_ct, const unsigned char** fread_pp, uintptr_t* __restrict raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr, uintptr_t* __restrict raregeno_workspace) {\n  // Requires a PROPER subset.  Might want to just merge this with\n  // ParseAndSaveDifflist() and rename appropriately.\n  // Trailing bits of raregeno are zeroed out.\n  uint32_t raw_difflist_len;\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, raregeno_workspace, &group_info_iter, &raw_difflist_len);\n  if (reterr || (!raw_difflist_len)) {\n    *difflist_len_ptr = 0;\n    return reterr;\n  }\n  const uint32_t subgroup_idx_last = (raw_difflist_len - 1) / kBitsPerWordD2;\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uintptr_t* raregeno_workspace_iter = raregeno_workspace;\n  uintptr_t* raregeno_iter = raregeno;\n  uint32_t* difflist_sample_ids_iter = difflist_sample_ids;\n\n  // technically doesn't need to be initialized, but I have principles\n  uintptr_t raw_sample_idx = 0;\n\n  uintptr_t raregeno_word = 0;\n  uint32_t subgroup_len_m1 = kBitsPerWordD2 - 1;\n  uint32_t difflist_len_lowbits = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        if (difflist_len_lowbits) {\n          *raregeno_iter = raregeno_word;\n        }\n        *difflist_len_ptr = S_CAST(uintptr_t, difflist_sample_ids_iter - difflist_sample_ids) + difflist_len_lowbits;\n        return kPglRetSuccess;\n      }\n      subgroup_len_m1 &= raw_difflist_len - 1;\n    }\n    // We need to consume a new rare genotype word every 32 entries, and pull a\n    // raw sample index from the difflist header every 64 entries.  So it's\n    // best to make the inner loop have a period of 32 (call this a 'subgroup',\n    // where 'group' refers to a set of 64 entries).\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n#ifdef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    uintptr_t raregeno_workspace_word = *raregeno_workspace_iter++;\n    for (uint32_t raw_difflist_idx_lowbits = 0; ; ++raw_difflist_idx_lowbits) {\n#ifndef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      if (IsSet(sample_include, raw_sample_idx)) {\n        raregeno_word |= ((raregeno_workspace_word >> (2 * raw_difflist_idx_lowbits)) & 3) << (difflist_len_lowbits * 2);\n        difflist_sample_ids_iter[difflist_len_lowbits] = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, raw_sample_idx);\n        if (difflist_len_lowbits++ == (kBitsPerWordD2 - 1)) {\n          *raregeno_iter++ = raregeno_word;\n          raregeno_word = 0;\n          difflist_len_lowbits = 0;\n          difflist_sample_ids_iter = &(difflist_sample_ids_iter[kBitsPerWordD2]);\n        }\n      }\n      if (raw_difflist_idx_lowbits == subgroup_len_m1) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n  }\n}\n\nPglErr ParseLdAndMergeDifflistSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict ldbase_raregeno, const uint32_t* __restrict ldbase_difflist_sample_ids, uint32_t ldbase_difflist_len, uintptr_t ldbase_common_geno, uint32_t raw_sample_ct, uint32_t sample_ct, const unsigned char** fread_pp, uintptr_t* __restrict merged_raregeno, uint32_t* __restrict merged_difflist_sample_ids, uint32_t* __restrict merged_difflist_len_ptr, uintptr_t* __restrict diff_from_ldbase_raregeno_iter) {\n  // Used when the ldbase variant was saved as a difflist, and it's useful to\n  // process the current variant as a difflist.\n  // * Assumes ldbase_difflist_sample_ids[ldbase_difflist_len]==sample_ct.\n  // * Assumes sample_include == nullptr if no subsetting needed.  (Otherwise,\n  //   it'll still work, but performance will be worse.)\n  // Trailing bits of merged_raregeno may not be zeroed out.\n  // Caller is responsible for inverting ldbase_common_geno and merged_raregeno\n  // afterward if necessary.\n  assert(ldbase_difflist_sample_ids[ldbase_difflist_len] == sample_ct);\n  uint32_t diff_from_ldbase_len;\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, diff_from_ldbase_raregeno_iter, &group_info_iter, &diff_from_ldbase_len);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  if (!diff_from_ldbase_len) {\n    memcpy(merged_difflist_sample_ids, ldbase_difflist_sample_ids, ldbase_difflist_len * sizeof(int32_t));\n    *merged_difflist_len_ptr = ldbase_difflist_len;\n    CopyNyparr(ldbase_raregeno, ldbase_difflist_len, merged_raregeno);\n    return kPglRetSuccess;\n  }\n  const uint32_t subgroup_idx_last = (diff_from_ldbase_len - 1) / kBitsPerWordD2;\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uintptr_t* merged_raregeno_iter = merged_raregeno;\n  uint32_t* merged_difflist_sample_ids_iter = merged_difflist_sample_ids;\n  uintptr_t merged_raregeno_word = 0;\n  uintptr_t ldbase_raregeno_word = 0;\n  uintptr_t diff_from_ldbase_raregeno_word = 0;\n  uint32_t ldbase_sample_idx = ldbase_difflist_sample_ids[0];\n  uintptr_t raw_sample_idx = 0;\n  uintptr_t cur_geno = 0;\n  uint32_t sample_idx = 0;\n  uint32_t ldbase_difflist_idx = 0;\n  uint32_t done = 0;\n  uint32_t subgroup_len_m1 = kBitsPerWordD2 - 1;\n  uint32_t merge_idx_lowbits = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t diff_from_ldbase_idx_lowbits = 0;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        done = 1;\n        sample_idx = sample_ct;\n        goto ParseLdAndMergeDifflistSubset_finish;\n      }\n      subgroup_len_m1 &= diff_from_ldbase_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    diff_from_ldbase_raregeno_word = *diff_from_ldbase_raregeno_iter++;\n    for (; ; ++diff_from_ldbase_idx_lowbits) {\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n      cur_geno = diff_from_ldbase_raregeno_word & 3;\n      if ((!sample_include) || IsSet(sample_include, raw_sample_idx)) {\n        sample_idx = sample_include? RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, raw_sample_idx) : raw_sample_idx;\n      ParseLdAndMergeDifflistSubset_finish:\n        while (ldbase_sample_idx < sample_idx) {\n          // replace with blocked copy?\n          if (!(ldbase_difflist_idx % kBitsPerWordD2)) {\n            ldbase_raregeno_word = ldbase_raregeno[ldbase_difflist_idx / kBitsPerWordD2];\n          }\n          *merged_difflist_sample_ids_iter++ = ldbase_sample_idx;\n          merged_raregeno_word |= (ldbase_raregeno_word & 3) << (2 * merge_idx_lowbits);\n          if (merge_idx_lowbits++ == (kBitsPerWordD2 - 1)) {\n            *merged_raregeno_iter++ = merged_raregeno_word;\n            merged_raregeno_word = 0;\n            merge_idx_lowbits = 0;\n          }\n          ++ldbase_difflist_idx;\n          ldbase_raregeno_word >>= 2;\n          ldbase_sample_idx = ldbase_difflist_sample_ids[ldbase_difflist_idx];\n        }\n        if (ldbase_sample_idx == sample_idx) {\n          if (done) {\n            if (merge_idx_lowbits) {\n              *merged_raregeno_iter = merged_raregeno_word;\n            }\n            *merged_difflist_len_ptr = merged_difflist_sample_ids_iter - merged_difflist_sample_ids;\n            return kPglRetSuccess;\n          }\n          if (!(ldbase_difflist_idx % kBitsPerWordD2)) {\n            ldbase_raregeno_word = ldbase_raregeno[ldbase_difflist_idx / kBitsPerWordD2];\n          }\n          ++ldbase_difflist_idx;\n          ldbase_raregeno_word >>= 2;\n          ldbase_sample_idx = ldbase_difflist_sample_ids[ldbase_difflist_idx];\n        }\n        if (cur_geno != ldbase_common_geno) {\n          *merged_difflist_sample_ids_iter++ = sample_idx;\n          merged_raregeno_word |= cur_geno << (2 * merge_idx_lowbits);\n          if (merge_idx_lowbits++ == (kBitsPerWordD2 - 1)) {\n            *merged_raregeno_iter++ = merged_raregeno_word;\n            merged_raregeno_word = 0;\n            merge_idx_lowbits = 0;\n          }\n        }\n      }\n      if (diff_from_ldbase_idx_lowbits == subgroup_len_m1) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n      diff_from_ldbase_raregeno_word >>= 2;\n    }\n  }\n}\n\n/*\nvoid PrunedDifflistToGenovecSubsetUnsafe(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raregeno, const uint32_t* __restrict difflist_sample_ids, uint32_t sample_ct, uint32_t difflist_common_geno, uint32_t difflist_len, uintptr_t* __restrict genovec) {\n  // Designed to be used after genovec subsetting.  Assumes all difflist\n  // entries are valid.  Ok for trailing bits of raregeno to be nonzero.  Does\n  // not zero out trailing bits of genovec.\n  const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n  vecset(genovec, difflist_common_geno * kMask5555, vec_ct);\n  if (!difflist_len) {\n    return;\n  }\n  const uintptr_t* raregeno_incr = raregeno;\n  const uint32_t* difflist_sample_ids_iter = difflist_sample_ids;\n  const uint32_t* difflist_sample_ids_end = &(difflist_sample_ids[difflist_len]);\n  // don't think there's a point to separating out the\n  // difflist_common_geno == 0 case here, since the RawToSubsettedPos\n  // operation is a bit expensive\n  while (1) {\n    // er, get rid of this undefined behavior if we uncomment this function\n    const uint32_t* difflist_sample_ids_stop = &(difflist_sample_ids_iter[kBitsPerWordD2]);\n    uintptr_t raregeno_word = *raregeno_incr++;\n    if (difflist_sample_ids_stop > difflist_sample_ids_end) {\n      if (difflist_sample_ids_iter == difflist_sample_ids_end) {\n        return;\n      }\n      difflist_sample_ids_stop = difflist_sample_ids_end;\n    }\n    while (1) {\n      const uint32_t cur_sample_idx = *difflist_sample_ids_iter;\n      const uint32_t cur_subsetted_pos = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, cur_sample_idx);\n      AssignNyparrEntry(cur_subsetted_pos, raregeno_word & 3, genovec);\n      if (difflist_sample_ids_iter++ == difflist_sample_ids_stop) {\n        break;\n      }\n      raregeno_word >>= 2;\n    }\n  }\n}\n*/\n\nPglErr ParseAndApplyDifflist(const unsigned char* fread_end, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genoarr) {\n  // Side effects: uses pgr.workspace_raregeno_tmp_loadbuf.\n  // Cannot occur after genoarr subsetting since the difflist sample indexes\n  // will be incorrect.\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  uintptr_t* cur_raregeno_iter = pgrp->workspace_raregeno_tmp_loadbuf;\n  const unsigned char* group_info_iter;\n  uint32_t difflist_len;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, cur_raregeno_iter, &group_info_iter, &difflist_len);\n  if (reterr || (!difflist_len)) {\n    return reterr;\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  const uint32_t subgroup_idx_last = (difflist_len - 1) / kBitsPerWordD2;\n  uintptr_t raw_sample_idx = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t remaining_deltas_in_subgroup = kBitsPerWordD2 - 1;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        return kPglRetSuccess;\n      }\n      remaining_deltas_in_subgroup &= difflist_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    uintptr_t cur_raregeno_word = *cur_raregeno_iter++;\n    // This loop tends to be the decompression bottleneck.  Tried to modify it\n    // to process 4 entries at a time, but that didn't end up helping.\n    for (; ; --remaining_deltas_in_subgroup) {\n      // always check, since otherwise AssignNyparrEntry() can scribble\n      // over arbitrary memory\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n      const uintptr_t cur_geno = cur_raregeno_word & 3;\n      AssignNyparrEntry(raw_sample_idx, cur_geno, genoarr);\n      if (!remaining_deltas_in_subgroup) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n      cur_raregeno_word >>= 2;\n    }\n  }\n}\n\n// could merge ParseAndApplyDifflist() with this?\nPglErr ParseAndApplyDifflistSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genoarr) {\n  // Side effects: uses pgr.workspace_raregeno_tmp_loadbuf.\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  if (sample_ct == raw_sample_ct) {\n    return ParseAndApplyDifflist(fread_end, fread_pp, pgrp, genoarr);\n  }\n  uintptr_t* cur_raregeno_iter = pgrp->workspace_raregeno_tmp_loadbuf;\n  const unsigned char* group_info_iter;\n  uint32_t difflist_len;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, cur_raregeno_iter, &group_info_iter, &difflist_len);\n  if (reterr || (!difflist_len)) {\n    return reterr;\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  const uint32_t subgroup_idx_last = (difflist_len - 1) / kBitsPerWordD2;\n  uintptr_t raw_sample_idx = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t remaining_deltas_in_subgroup = kBitsPerWordD2 - 1;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        return kPglRetSuccess;\n      }\n      remaining_deltas_in_subgroup &= difflist_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    uintptr_t cur_raregeno_word = *cur_raregeno_iter++;\n    // This loop tends to be the decompression bottleneck.  Tried to modify it\n    // to process 4 entries at a time, but that didn't end up helping.\n    for (; ; --remaining_deltas_in_subgroup) {\n      // always check, since otherwise AssignNyparrEntry() can scribble\n      // over arbitrary memory\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n      if (IsSet(sample_include, raw_sample_idx)) {\n        const uintptr_t cur_geno = cur_raregeno_word & 3;\n        AssignNyparrEntry(RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, raw_sample_idx), cur_geno, genoarr);\n      }\n      if (!remaining_deltas_in_subgroup) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n      cur_raregeno_word >>= 2;\n    }\n  }\n}\n\n// vector-alignment preferred\nPglErr ParseOnebitUnsafe(const unsigned char* fread_end, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genoarr) {\n  // doesn't zero out trailing genoarr bits\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t common2_and_bitarray_byte_ct = (raw_sample_ct + 15) / CHAR_BIT;\n  const unsigned char* onebit_main_iter = *fread_pp;\n  if (PtrAddCk(fread_end, common2_and_bitarray_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uintptr_t common2_code = *onebit_main_iter++;\n  const uintptr_t word_base = (common2_code / 4) * kMask5555;\n  const uintptr_t common_code_delta = common2_code & 3;\n  uint32_t genoarr_widx = 0;\n#if defined(__LP64__) && !defined(USE_AVX2)\n  // this is slower in AVX2 case\n  const uint32_t read_hw_ct = raw_sample_ct / kBitsPerWordD2;\n  if (read_hw_ct >= 2 * kWordsPerVec) {\n    const uint32_t read_vec_ct = raw_sample_ct / kBitsPerVec;\n    const VecW m4 = VCONST_W(kMask0F0F);\n#  ifdef USE_SHUFFLE8\n    // 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85 if the codes\n    // are 0 and 1\n    const VecW lookup = {word_base + common_code_delta * 0x1514111005040100LLU,\n                         word_base + common_code_delta * 0x5554515045444140LLU};\n#  else\n    const VecW m1 = VCONST_W(kMask5555);\n    const VecW m2 = VCONST_W(kMask3333);\n    const VecW vec_base = VCONST_W(word_base);\n    const VecW vec_delta = VCONST_W(common_code_delta * kMask5555);\n#  endif\n    for (uint32_t vidx = 0; vidx != read_vec_ct; ++vidx) {\n      const VecW cur_vec = vecw_loadu(&(onebit_main_iter[vidx * kBytesPerVec]));\n      VecW vec_lo;\n      VecW vec_hi;\n      vecw_lo_and_hi_nybbles(cur_vec, m4, &vec_lo, &vec_hi);\n#  ifdef USE_SHUFFLE8\n      vec_lo = vecw_shuffle8(lookup, vec_lo);\n      vec_hi = vecw_shuffle8(lookup, vec_hi);\n#  else\n      // unpack bytes, then use as mask for vec_add.\n      vec_lo = (vec_lo | vecw_slli(vec_lo, 2)) & m2;\n      vec_hi = (vec_hi | vecw_slli(vec_hi, 2)) & m2;\n      vec_lo = (vec_lo | vecw_slli(vec_lo, 1)) & m1;\n      vec_hi = (vec_hi | vecw_slli(vec_hi, 1)) & m1;\n      vec_lo = vec_lo | vecw_slli(vec_lo, 1);\n      vec_hi = vec_hi | vecw_slli(vec_hi, 1);\n      vec_lo = vec_base + (vec_delta & vec_lo);\n      vec_hi = vec_base + (vec_delta & vec_hi);\n#  endif\n      vecw_storeu(&(genoarr[kWordsPerVec * 2 * vidx]), vec_lo);\n      vecw_storeu(&(genoarr[kWordsPerVec * (2 * vidx + 1)]), vec_hi);\n    }\n    genoarr_widx = read_vec_ct * (2 * kWordsPerVec);\n  }\n#endif\n  const uint32_t genoarr_widx_trail = (raw_sample_ct + 7) / kBitsPerWordD2;\n  const uint32_t genoarr_widx_end = NypCtToWordCt(raw_sample_ct);\n  for (; ; ++genoarr_widx) {\n    uintptr_t ww;\n    if (genoarr_widx >= genoarr_widx_trail) {\n      // might want to modify to not go here if last read is an entire halfword\n      if (genoarr_widx == genoarr_widx_end) {\n        break;\n      }\n      ww = ProperSubwordLoad(&(onebit_main_iter[genoarr_widx_trail * sizeof(Halfword)]), 1 + (((raw_sample_ct - 1) % kBitsPerWordD2) / CHAR_BIT));\n    } else {\n      Halfword hw;\n      CopyFromUnalignedOffsetHW(&hw, onebit_main_iter, genoarr_widx);\n      ww = hw;\n    }\n    // apply middle-out operation\n    // 64-bit:\n    //   const uintptr_t middle_out_result = (ww | (ww << 31)) & kMask5555;\n    // 32-bit:\n    //   *genoarr_iter++ = word_base + (ww & kMask5555) * common_code_delta;\n    //   *genoarr_iter++ = word_base + ((ww >> 1) & kMask5555) * common_code_delta;\n    // (scrapped since the time savings don't seem to be worth the extra\n    // end-of-vector corner cases, apparently the extra operations here are\n    // sufficiently cheap, or even negative-cost in AVX2 case)\n\n    ww = UnpackHalfwordToWord(ww);\n    genoarr[genoarr_widx] = word_base + ww * common_code_delta;\n  }\n  return ParseAndApplyDifflist(fread_end, fread_pp, pgrp, genoarr);\n}\n\n// vector-alignment preferred\nPglErr Parse1or2bitGenoarrUnsafe(const unsigned char* fread_end, uint32_t vrtype, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genoarr) {\n  // Side effect: may use pgrp->workspace_raregeno_tmp_loadbuf.\n  // Does not update fp_vidx, does not rotate plink1-formatted data (since it's\n  // better to do that post-subsetting)\n  if (vrtype & 3) {\n    return ParseOnebitUnsafe(fread_end, fread_pp, pgrp, genoarr);\n  }\n  // uncompressed storage\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t genoarr_byte_ct = NypCtToByteCt(raw_sample_ct);\n  const unsigned char* src_genodata = *fread_pp;\n  if (PtrAddCk(fread_end, genoarr_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  memcpy(genoarr, src_genodata, genoarr_byte_ct);\n  return kPglRetSuccess;\n}\n\nPglErr ParseNonLdGenovecSubsetUnsafe(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vrtype, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genovec) {\n  // Side effects:\n  //   may use pgrp->workspace_raregeno_tmp_loadbuf\n  //   fills pgrp->ldbase_raw_genovec iff (!(vrtype & 4)) and\n  //     subsetting_required (does not update ldbase_stypes, caller's\n  //     responsibility to care)\n  // See comments on Parse1or2bitGenoarrUnsafe().\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  if (!(vrtype & 4)) {\n    const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n    uintptr_t* raw_genovec = subsetting_required? pgrp->ldbase_raw_genovec : genovec;\n    PglErr reterr = Parse1or2bitGenoarrUnsafe(fread_end, vrtype, fread_pp, pgrp, raw_genovec);\n    if ((!subsetting_required) || reterr) {\n      return reterr;\n    }\n    CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, genovec);\n    return kPglRetSuccess;\n  }\n  const uint32_t vrtype_low2 = vrtype & 3;\n  if (vrtype_low2 != 1) {\n    const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n\n    // This memset is frequently the limiting operation.  This suggests that we\n    // should eventually make more use of the DifflistOrGenovec interface.\n    vecset(genovec, vrtype_low2 * kMask5555, vec_ct);\n    return ParseAndApplyDifflistSubset(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, fread_pp, pgrp, genovec);\n  }\n  // all homozygous-ref special case\n  ZeroWArr(NypCtToWordCt(sample_ct), genovec);\n  return kPglRetSuccess;\n}\n\nuint32_t LdLoadNecessary(uint32_t cur_vidx, PgenReaderMain* pgrp) {\n  // Determines whether LD base variant needs to be loaded (in addition to the\n  // current variant), assuming we need (possibly subsetted) hardcalls.\n  // Important: this updates pgrp->ldbase_vidx when necessary, as a side\n  // effect.\n  // bugfix (22 May 2018): this only checked whether ldbase_stypes was nonzero;\n  // there was an AllHets + cache-clear edge case where that's not good enough.\n  // now that AllHets has been removed, though, it should be safe again.\n  if (pgrp->ldbase_stypes && (cur_vidx == pgrp->fp_vidx)) {\n    // ldbase variant guaranteed to be up-to-date if we didn't skip the last\n    // variant, and cache wasn't cleared\n    return 0;\n  }\n  // Find the last vrtypes[] value before vrtypes[cur_vidx] with bit 1 unset or\n  // bit 2 set.\n  const uint32_t old_ldbase_vidx = pgrp->ldbase_vidx;\n  pgrp->ldbase_vidx = GetLdbaseVidx(pgrp->fi.vrtypes, cur_vidx);\n  return (pgrp->ldbase_vidx != old_ldbase_vidx);\n}\n\nBoolErr InitReadPtrs(uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp) {\n  const unsigned char* block_base = pgrp->fi.block_base;\n  if (block_base != nullptr) {\n    // possible todo: special handling of end of vblock\n    const uint64_t block_offset = pgrp->fi.block_offset;\n    *fread_pp = &(block_base[GetPgfiFpos(&(pgrp->fi), vidx) - block_offset]);\n    *fread_endp = &(block_base[GetPgfiFpos(&(pgrp->fi), vidx + 1) - block_offset]);\n\n    // still a useful hint to LdLoadNecessary()\n    pgrp->fp_vidx = vidx + 1;\n\n    return 0;\n  }\n  if (pgrp->fp_vidx != vidx) {\n    if (unlikely(fseeko(pgrp->ff, GetPgfiFpos(&(pgrp->fi), vidx), SEEK_SET))) {\n      return 1;\n    }\n  }\n  const uintptr_t cur_vrec_width = GetPgfiVrecWidth(&(pgrp->fi), vidx);\n#ifdef __LP64__\n  if (unlikely(fread_checked(pgrp->fread_buf, cur_vrec_width, pgrp->ff))) {\n    if (feof_unlocked(pgrp->ff)) {\n      errno = 0;\n    }\n    return 1;\n  }\n#else\n  // cur_vrec_width < 2^31 since otherwise we error out on initialization\n  if (unlikely(!fread_unlocked(pgrp->fread_buf, cur_vrec_width, 1, pgrp->ff))) {\n    if (feof_unlocked(pgrp->ff)) {\n      errno = 0;\n    }\n    return 1;\n  }\n#endif\n  *fread_pp = pgrp->fread_buf;\n  *fread_endp = &(pgrp->fread_buf[cur_vrec_width]);\n  pgrp->fp_vidx = vidx + 1;\n  return 0;\n}\n\n// Fills dest with subsetted ldbase contents, and ensures ldcache is filled so\n// no explicit reload of ldbase is needed for next variant if we're extracting\n// the same sample subset.  (Reload is occasionally needed if next variant is\n// multiallelic or phased, we only prevent that when convenient.)\nPglErr LdLoadAndCopyGenovecSubset(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, uintptr_t* dest) {\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  if (LdLoadNecessary(vidx, pgrp)) {\n    const uint32_t ldbase_vidx = pgrp->ldbase_vidx;\n    const unsigned char* fread_ptr;\n    const unsigned char* fread_end;\n    if (unlikely(InitReadPtrs(ldbase_vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    const uint32_t vrtype = pgrp->fi.vrtypes[ldbase_vidx];\n    PglErr reterr = ParseNonLdGenovecSubsetUnsafe(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, vrtype, &fread_ptr, pgrp, dest);\n    pgrp->ldbase_stypes = ((sample_ct != raw_sample_ct) && (!(vrtype & 4)))? (kfPgrLdcacheNyp | kfPgrLdcacheRawNyp) : kfPgrLdcacheNyp;\n    CopyNyparr(dest, sample_ct, pgrp->ldbase_genovec);\n    return reterr;\n  }\n  if (pgrp->ldbase_stypes & kfPgrLdcacheNyp) {\n    CopyNyparr(pgrp->ldbase_genovec, sample_ct, dest);\n  } else {\n    if ((pgrp->ldbase_stypes & kfPgrLdcacheRawNyp) && (sample_ct == raw_sample_ct)) {\n      CopyNyparr(pgrp->ldbase_raw_genovec, sample_ct, dest);\n    } else if (pgrp->ldbase_stypes & kfPgrLdcacheDifflist) {\n      // rematerialize-from-difflist is cheap.\n      PgrDifflistToGenovecUnsafe(pgrp->ldbase_raregeno, pgrp->ldbase_difflist_sample_ids, pgrp->fi.vrtypes[pgrp->ldbase_vidx] & 3, sample_ct, pgrp->ldbase_difflist_len, dest);\n    } else {\n      CopyNyparrNonemptySubset(pgrp->ldbase_raw_genovec, sample_include, pgrp->fi.raw_sample_ct, sample_ct, dest);\n      CopyNyparr(dest, sample_ct, pgrp->ldbase_genovec);\n      pgrp->ldbase_stypes |= kfPgrLdcacheNyp;\n    }\n  }\n  return kPglRetSuccess;\n}\n\n// fread_pp should be non-null iff this is being called by an internal function\n// as part of a more complex read.\n// in multiallelic case:\n//   hom-ref = 0\n//   het-ref = 1\n//   two nonref = 2\n//   missing = 3\nPglErr ReadGenovecSubsetUnsafe(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* __restrict genovec) {\n  // Side effects:\n  //   may use pgr.workspace_raregeno_tmp_loadbuf (any difflist)\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t maintrack_vrtype = vrtype & 7;\n  if (VrtypeLdCompressed(maintrack_vrtype)) {\n    // LD compression\n    PglErr reterr = LdLoadAndCopyGenovecSubset(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const unsigned char* fread_ptr;\n    const unsigned char* fread_end;\n    if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    reterr = ParseAndApplyDifflistSubset(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, &fread_ptr, pgrp, genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (maintrack_vrtype == 3) {\n      GenovecInvertUnsafe(sample_ct, genovec);\n    }\n    if (fread_pp) {\n      *fread_pp = fread_ptr;\n      *fread_endp = fread_end;\n    }\n    return kPglRetSuccess;\n  }\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end = nullptr;  // maybe-uninitialized warning\n  // tried inserting special-case code for the plink1 case to avoid a copy, and\n  // it was actually slower\n  if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n    return kPglRetReadFail;\n  }\n  // tried to add more sophisticated caching, but turns out it isn't worth it\n  PglErr reterr = ParseNonLdGenovecSubsetUnsafe(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, maintrack_vrtype, &fread_ptr, pgrp, genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  if (vrtype == kPglVrtypePlink1) {\n    PgrPlink1ToPlink2InplaceUnsafe(sample_ct, genovec);\n  } else {\n    const uint32_t is_ldbase = pgrp->fi.vrtypes && VrtypeLdCompressed(pgrp->fi.vrtypes[vidx + 1]);\n    const uint32_t ldbase_raw_genovec_saved = (sample_ct != pgrp->fi.raw_sample_ct) && (!(maintrack_vrtype & 4));\n    if (is_ldbase) {\n      CopyNyparr(genovec, sample_ct, pgrp->ldbase_genovec);\n      pgrp->ldbase_vidx = vidx;\n      // may be better to just always set to kfPgrLdcacheNyp?  this depends\n      // on multiallelic code\n      pgrp->ldbase_stypes = ldbase_raw_genovec_saved? (kfPgrLdcacheNyp | kfPgrLdcacheRawNyp) : kfPgrLdcacheNyp;\n    } else if (ldbase_raw_genovec_saved) {\n      // bugfix (22 Sep 2018): when accessing variants out of order, need to\n      // note that we just clobbered the cache\n      pgrp->ldbase_stypes &= ~kfPgrLdcacheRawNyp;\n    }\n  }\n  if (fread_pp) {\n    *fread_pp = fread_ptr;\n    *fread_endp = fread_end;\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgrGet(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec) {\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  return ReadGenovecSubsetUnsafe(sample_include, GetSicp(pssi), sample_ct, vidx, pgrp, nullptr, nullptr, genovec);\n}\n\n// Fills dest with ldbase contents, and ensures ldcache is filled so no\n// explicit reload of ldbase is needed for next variant.\nPglErr LdLoadAndCopyRawGenovec(uint32_t subsetting_required, uint32_t vidx, PgenReaderMain* pgrp, uintptr_t* dest) {\n  const uint32_t genovec_byte_ct = NypCtToVecCt(pgrp->fi.raw_sample_ct) * kBytesPerVec;\n  if (LdLoadNecessary(vidx, pgrp) || (subsetting_required && (!(pgrp->ldbase_stypes & kfPgrLdcacheRawNyp)))) {\n    const uint32_t ldbase_vidx = pgrp->ldbase_vidx;\n    const unsigned char* fread_ptr;\n    const unsigned char* fread_end;\n    if (unlikely(InitReadPtrs(ldbase_vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    const uint32_t vrtype = pgrp->fi.vrtypes[ldbase_vidx];\n    pgrp->ldbase_stypes = kfPgrLdcacheRawNyp;\n    assert((vrtype & 7) != 5); // all-hom-ref can't be ldbase\n    uintptr_t* raw_genovec = pgrp->ldbase_raw_genovec;\n    PglErr reterr;\n    if (!(vrtype & 4)) {\n      reterr = Parse1or2bitGenoarrUnsafe(fread_end, vrtype, &fread_ptr, pgrp, raw_genovec);\n    } else {\n      const uint32_t vrtype_low2 = vrtype & 3;\n      vecset(raw_genovec, vrtype_low2 * kMask5555, DivUp(genovec_byte_ct, kBytesPerVec));\n      reterr = ParseAndApplyDifflist(fread_end, &fread_ptr, pgrp, raw_genovec);\n    }\n    memcpy(dest, raw_genovec, genovec_byte_ct);\n    return reterr;\n  }\n  if (pgrp->ldbase_stypes & kfPgrLdcacheRawNyp) {\n    memcpy(dest, pgrp->ldbase_raw_genovec, genovec_byte_ct);\n  } else {\n    // no subsetting, can use regular Ldcache entries\n    const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n    if (pgrp->ldbase_stypes & kfPgrLdcacheNyp) {\n      memcpy(dest, pgrp->ldbase_genovec, genovec_byte_ct);\n    } else {\n      PgrDifflistToGenovecUnsafe(pgrp->ldbase_raregeno, pgrp->ldbase_difflist_sample_ids, pgrp->fi.vrtypes[pgrp->ldbase_vidx] & 3, raw_sample_ct, pgrp->ldbase_difflist_len, dest);\n    }\n  }\n  return kPglRetSuccess;\n}\n\n// Does not zero out trailing bits.\n// Requires fread_pp and fread_endp to be non-null for now.\nPglErr ReadRawGenovec(uint32_t subsetting_required, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* raw_genovec) {\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t maintrack_vrtype = vrtype & 7;\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  if (VrtypeLdCompressed(maintrack_vrtype)) {\n    // LD compression\n    PglErr reterr = LdLoadAndCopyRawGenovec(subsetting_required, vidx, pgrp, raw_genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (unlikely(InitReadPtrs(vidx, pgrp, fread_pp, fread_endp))) {\n      return kPglRetReadFail;\n    }\n    reterr = ParseAndApplyDifflist(*fread_endp, fread_pp, pgrp, raw_genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (maintrack_vrtype == 3) {\n      GenovecInvertUnsafe(raw_sample_ct, raw_genovec);\n    }\n    return kPglRetSuccess;\n  }\n  if (unlikely(InitReadPtrs(vidx, pgrp, fread_pp, fread_endp))) {\n    return kPglRetReadFail;\n  }\n  const unsigned char* fread_end = *fread_endp;\n  PglErr reterr;\n  if (!(vrtype & 4)) {\n    reterr = Parse1or2bitGenoarrUnsafe(fread_end, vrtype, fread_pp, pgrp, raw_genovec);\n  } else {\n    const uint32_t vrtype_low2 = vrtype & 3;\n    if (vrtype_low2 == 1) {\n      ZeroWArr(NypCtToWordCt(raw_sample_ct), raw_genovec);\n      // all-hom-ref can't be ldbase\n      return kPglRetSuccess;\n    }\n    const uint32_t vec_ct = NypCtToVecCt(raw_sample_ct);\n    vecset(raw_genovec, vrtype_low2 * kMask5555, vec_ct);\n    reterr = ParseAndApplyDifflist(fread_end, fread_pp, pgrp, raw_genovec);\n  }\n  if (vrtype == kPglVrtypePlink1) {\n    PgrPlink1ToPlink2InplaceUnsafe(raw_sample_ct, raw_genovec);\n  } else {\n    const uint32_t is_ldbase = pgrp->fi.vrtypes && VrtypeLdCompressed(pgrp->fi.vrtypes[vidx + 1]);\n    if (is_ldbase) {\n      CopyNyparr(raw_genovec, raw_sample_ct, pgrp->ldbase_raw_genovec);\n      pgrp->ldbase_vidx = vidx;\n      pgrp->ldbase_stypes = kfPgrLdcacheRawNyp;\n    }\n  }\n  return reterr;\n}\n/*\nvoid CopyAndSubsetDifflist(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raw_raregeno, const uint32_t* __restrict raw_difflist_sample_ids, uint32_t raw_difflist_len, uintptr_t* __restrict new_raregeno, uint32_t* __restrict new_difflist_sample_ids, uint32_t* __restrict new_difflist_len_ptr) {\n  // Trailing bits of new_raregeno are zeroed out.\n  if (!raw_difflist_len) {\n    *new_difflist_len_ptr = 0;\n    return;\n  }\n  const uintptr_t* raw_raregeno_incr = raw_raregeno;\n  const uint32_t* raw_difflist_sample_ids_iter = raw_difflist_sample_ids;\n  const uint32_t* raw_difflist_sample_ids_last = &(raw_difflist_sample_ids[RoundDownPow2(raw_difflist_len - 1, kBitsPerWordD2)]);\n  uintptr_t* new_raregeno_incr = new_raregeno;\n  uintptr_t new_raregeno_word = 0;\n  uint32_t new_difflist_len = 0;\n  uint32_t block_len_m1 = kBitsPerWordD2 - 1;\n  while (1) {\n    if (raw_difflist_sample_ids_iter >= raw_difflist_sample_ids_last) {\n      if (raw_difflist_sample_ids_iter > raw_difflist_sample_ids_last) {\n        if (new_difflist_len % kBitsPerWordD2) {\n          *new_raregeno_incr = new_raregeno_word;\n        }\n        *new_difflist_len_ptr = new_difflist_len;\n        return;\n      }\n      block_len_m1 &= raw_difflist_len - 1;\n    }\n    uintptr_t raw_raregeno_word = *raw_raregeno_incr++;\n    uint32_t raw_difflist_idx_lowbits = 0;\n    while (1) {\n      const uint32_t raw_sample_idx = raw_difflist_sample_ids_iter[raw_difflist_idx_lowbits];\n      if (IsSet(sample_include, raw_sample_idx)) {\n        new_difflist_sample_ids[new_difflist_len] = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, raw_sample_idx);\n        new_raregeno_word |= ((raw_raregeno_word >> (2 * raw_difflist_idx_lowbits)) & 3) << (2 * (new_difflist_len % kBitsPerWordD2));\n        ++new_difflist_len;\n        if (!(new_difflist_len % kBitsPerWordD2)) {\n          *new_raregeno_incr++ = new_raregeno_word;\n          new_raregeno_word = 0;\n        }\n      }\n      if (raw_difflist_idx_lowbits == block_len_m1) {\n        break;\n      }\n      ++raw_difflist_idx_lowbits;\n    }\n    raw_difflist_sample_ids_iter = &(raw_difflist_sample_ids_iter[kBitsPerWordD2]);\n  }\n}\n*/\n\n// Populates pgrp->ldbase_genovec or\n// pgrp->ldbase_{raregeno,difflist_sample_ids,difflist_len}, depending on\n// storage type.\n// Currently just called by ReadDifflistOrGenovecSubsetUnsafe().\nPglErr LdLoadMinimalSubsetIfNecessary(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp) {\n  if (!LdLoadNecessary(vidx, pgrp)) {\n    return kPglRetSuccess;\n  }\n  const uint32_t ldbase_vidx = pgrp->ldbase_vidx;\n  const uint64_t cur_vidx_fpos = pgrp->fi.var_fpos[ldbase_vidx];\n  const uint32_t ldbase_vrtype = pgrp->fi.vrtypes[ldbase_vidx];\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_genovec = subsetting_required? pgrp->ldbase_raw_genovec : pgrp->ldbase_genovec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  const unsigned char* block_base = pgrp->fi.block_base;\n  PglErr reterr = kPglRetSuccess;\n  if (block_base != nullptr) {\n    {\n      const uint64_t block_offset = pgrp->fi.block_offset;\n      fread_ptr = &(block_base[cur_vidx_fpos - block_offset]);\n      fread_end = &(block_base[pgrp->fi.var_fpos[ldbase_vidx + 1] - block_offset]);\n    }\n    if (!(ldbase_vrtype & 4)) {\n      reterr = Parse1or2bitGenoarrUnsafe(fread_end, ldbase_vrtype, &fread_ptr, pgrp, raw_genovec);\n    LdLoadMinimalSubsetIfNecessary_genovec_finish:\n      pgrp->ldbase_stypes = subsetting_required? (kfPgrLdcacheNyp | kfPgrLdcacheRawNyp) : kfPgrLdcacheNyp;\n      if ((!subsetting_required) || reterr) {\n        return reterr;\n      }\n      CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, pgrp->ldbase_genovec);\n      return kPglRetSuccess;\n    }\n    pgrp->fp_vidx = ldbase_vidx + 1;\n  } else {\n    if (unlikely(fseeko(pgrp->ff, pgrp->fi.var_fpos[ldbase_vidx], SEEK_SET))) {\n      return kPglRetReadFail;\n    }\n    const uintptr_t cur_vrec_width = pgrp->fi.var_fpos[ldbase_vidx + 1] - cur_vidx_fpos;\n    pgrp->fp_vidx = ldbase_vidx + 1;\n    if (!(ldbase_vrtype & 7)) {\n      // don't actually need to fread the whole record in this case\n      const uint32_t raw_sample_ct4 = NypCtToByteCt(raw_sample_ct);\n      if (unlikely(!fread_unlocked(raw_genovec, raw_sample_ct4, 1, pgrp->ff))) {\n        if (feof_unlocked(pgrp->ff)) {\n          errno = 0;\n        }\n        return kPglRetReadFail;\n      }\n      if (raw_sample_ct4 != cur_vrec_width) {\n        // ensure this doesn't match\n        pgrp->fp_vidx = 0;\n      }\n      goto LdLoadMinimalSubsetIfNecessary_genovec_finish;\n    }\n    if (unlikely(!fread_unlocked(pgrp->fread_buf, cur_vrec_width, 1, pgrp->ff))) {\n      if (feof_unlocked(pgrp->ff)) {\n        errno = 0;\n      }\n      return kPglRetReadFail;\n    }\n    fread_ptr = pgrp->fread_buf;\n    fread_end = &(pgrp->fread_buf[cur_vrec_width]);\n    if (!(ldbase_vrtype & 4)) {\n      reterr = ParseOnebitUnsafe(fread_end, &fread_ptr, pgrp, raw_genovec);\n      goto LdLoadMinimalSubsetIfNecessary_genovec_finish;\n    }\n  }\n  uint32_t ldbase_difflist_len;\n  if (!subsetting_required) {\n    reterr = ParseAndSaveDifflist(fread_end, raw_sample_ct, &fread_ptr, pgrp->ldbase_raregeno, pgrp->ldbase_difflist_sample_ids, &ldbase_difflist_len);\n  } else {\n    reterr = ParseAndSaveDifflistProperSubset(fread_end, sample_include, sample_include_cumulative_popcounts, raw_sample_ct, &fread_ptr, pgrp->ldbase_raregeno, pgrp->ldbase_difflist_sample_ids, &ldbase_difflist_len, pgrp->workspace_raregeno_tmp_loadbuf);\n  }\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  pgrp->ldbase_difflist_len = ldbase_difflist_len;\n  pgrp->ldbase_difflist_sample_ids[ldbase_difflist_len] = sample_ct;\n  pgrp->ldbase_stypes = kfPgrLdcacheDifflist;\n  return kPglRetSuccess;\n}\n\nPglErr ReadDifflistOrGenovecSubsetUnsafe(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t max_simple_difflist_len, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* __restrict genovec, uint32_t* difflist_common_geno_ptr, uintptr_t* __restrict main_raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr) {\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  assert(sample_ct);\n  assert(max_simple_difflist_len < sample_ct);\n  // Side effects:\n  //   may use pgr.workspace_raregeno_tmp_loadbuf\n  // Trailing bits of genovec/main_raregeno may not be zeroed out.\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t maintrack_vrtype = vrtype & 7;\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  // const uint32_t multiallelic_hc_present = fread_pp && VrtypeMultiallelic(vrtype);\n  if (VrtypeLdCompressed(maintrack_vrtype)) {\n    // LD compression\n\n    // note that this can currently load a difflist longer than\n    // max_simple_difflist_len\n    PglErr reterr = LdLoadMinimalSubsetIfNecessary(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const unsigned char* fread_ptr;\n    const unsigned char* fread_end;\n    if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    const uint32_t ld_invert = (maintrack_vrtype == 3);\n    if (pgrp->ldbase_stypes & kfPgrLdcacheDifflist) {\n      const uint32_t ldbase_common_geno = pgrp->fi.vrtypes[pgrp->ldbase_vidx] & 3;\n      // unnecessary for this to branch on LD difflist length, since that's\n      // limited to 3/4 of the ldbase difflist length.\n      *difflist_common_geno_ptr = ldbase_common_geno;\n      reterr = ParseLdAndMergeDifflistSubset(fread_end, subsetting_required? sample_include : nullptr, sample_include_cumulative_popcounts, pgrp->ldbase_raregeno, pgrp->ldbase_difflist_sample_ids, pgrp->ldbase_difflist_len, ldbase_common_geno, raw_sample_ct, sample_ct, &fread_ptr, main_raregeno, difflist_sample_ids, difflist_len_ptr, pgrp->workspace_raregeno_tmp_loadbuf);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n      if (ld_invert) {\n        *difflist_common_geno_ptr = (6 - ldbase_common_geno) & 3;\n        GenovecInvertUnsafe(*difflist_len_ptr, main_raregeno);\n      }\n      return kPglRetSuccess;\n    }\n    if (pgrp->ldbase_stypes & kfPgrLdcacheNyp) {\n      CopyNyparr(pgrp->ldbase_genovec, sample_ct, genovec);\n    } else {\n      assert(pgrp->ldbase_stypes & kfPgrLdcacheRawNyp);\n      CopyNyparrNonemptySubset(pgrp->ldbase_raw_genovec, sample_include, raw_sample_ct, sample_ct, genovec);\n      CopyNyparr(genovec, sample_ct, pgrp->ldbase_genovec);\n      pgrp->ldbase_stypes |= kfPgrLdcacheNyp;\n    }\n    *difflist_common_geno_ptr = UINT32_MAX;\n    reterr = ParseAndApplyDifflistSubset(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, &fread_ptr, pgrp, genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (ld_invert) {\n      GenovecInvertUnsafe(sample_ct, genovec);\n    }\n    if (fread_pp) {\n      *fread_pp = fread_ptr;\n      *fread_endp = fread_end;\n    }\n    return kPglRetSuccess;\n  }\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end = nullptr;  // maybe-uninitialized warning\n  if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n    return kPglRetReadFail;\n  }\n  const uint32_t is_ldbase = pgrp->fi.vrtypes && VrtypeLdCompressed(pgrp->fi.vrtypes[vidx + 1]);\n  const uint32_t saved_difflist_len = VrtypeDifflist(vrtype)? PeekVint31(fread_ptr, fread_end) : raw_sample_ct;\n  pgrp->ldbase_vidx = vidx;\n  // no limit is slightly better than /16 but substantially worse than /32 on\n  // the large test dataset (/64 is slightly worse than /32)\n  // no limit is best on the small test dataset\n  if (saved_difflist_len > max_simple_difflist_len) {\n    *difflist_common_geno_ptr = UINT32_MAX;\n    PglErr reterr = ParseNonLdGenovecSubsetUnsafe(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, vrtype, &fread_ptr, pgrp, genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const uint32_t ldbase_raw_genovec_saved = (subsetting_required && (!(vrtype & 4)));\n    if (is_ldbase) {\n      CopyNyparr(genovec, sample_ct, pgrp->ldbase_genovec);\n      pgrp->ldbase_stypes = ldbase_raw_genovec_saved? (kfPgrLdcacheNyp | kfPgrLdcacheRawNyp) : kfPgrLdcacheNyp;\n    } else if (ldbase_raw_genovec_saved) {\n      // bugfix (22 Sep 2018)\n      pgrp->ldbase_stypes &= ~kfPgrLdcacheRawNyp;\n    }\n    if (vrtype == kPglVrtypePlink1) {\n      PgrPlink1ToPlink2InplaceUnsafe(sample_ct, genovec);\n    }\n    if (fread_pp) {\n      *fread_pp = fread_ptr;\n      *fread_endp = fread_end;\n    }\n    return kPglRetSuccess;\n  }\n  *difflist_common_geno_ptr = vrtype & 3;\n  PglErr reterr;\n  if (!subsetting_required) {\n    reterr = ParseAndSaveDifflist(fread_end, raw_sample_ct, &fread_ptr, main_raregeno, difflist_sample_ids, difflist_len_ptr);\n  } else {\n    reterr = ParseAndSaveDifflistProperSubset(fread_end, sample_include, sample_include_cumulative_popcounts, raw_sample_ct, &fread_ptr, main_raregeno, difflist_sample_ids, difflist_len_ptr, pgrp->workspace_raregeno_tmp_loadbuf);\n  }\n  if (unlikely(reterr)) {\n    return kPglRetMalformedInput;\n  }\n  if (is_ldbase) {\n    const uint32_t difflist_len = *difflist_len_ptr;\n    pgrp->ldbase_stypes = kfPgrLdcacheDifflist;\n    pgrp->ldbase_difflist_len = difflist_len;\n    CopyNyparr(main_raregeno, difflist_len, pgrp->ldbase_raregeno);\n    memcpy(pgrp->ldbase_difflist_sample_ids, difflist_sample_ids, difflist_len * sizeof(int32_t));\n    pgrp->ldbase_difflist_sample_ids[difflist_len] = sample_ct;\n  }\n  if (fread_pp) {\n    *fread_pp = fread_ptr;\n    *fread_endp = fread_end;\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgrGetDifflistOrGenovec(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t max_simple_difflist_len, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uint32_t* __restrict difflist_common_geno_ptr, uintptr_t* __restrict main_raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr) {\n  if (!sample_ct) {\n    *difflist_common_geno_ptr = UINT32_MAX;\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  return ReadDifflistOrGenovecSubsetUnsafe(sample_include, GetSicp(pssi), sample_ct, max_simple_difflist_len, vidx, pgrp, nullptr, nullptr, genovec, difflist_common_geno_ptr, main_raregeno, difflist_sample_ids, difflist_len_ptr);\n}\n\nPglErr LdSubsetAdjustGenocounts(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict ldbase_genovec, uint32_t raw_sample_ct, const unsigned char** fread_pp, STD_ARRAY_REF(uint32_t, 4) genocounts, uintptr_t* __restrict raregeno_workspace) {\n  // * sample_include assumed to be nullptr if no subsetting required\n  // * Assumes genocounts[] is initialized to the proper values for the LD\n  //   reference variant (including subsetting).\n  // * Tried a hybrid implementation which allowed the base variant to be saved\n  //   as a difflist; turns out it's practically always better to unpack to a\n  //   genovec first.\n  // * There are two modes:\n  //   1. If sample_include is nullptr, we're not selecting a sample subset.\n  //   2. If sample_include and sample_include_cumulative_popcounts are both\n  //      non-null, we're computing counts over a sample subset, and\n  //      ldbase_genovec is assumed to be subsetted.\n  //   Experimented with a third mode where ldbase_genovec was replaced with\n  //   ldbase_raw_genovec in the subsetted case, but that didn't seem to pay\n  //   off.\n  // * This is the main frequency-counting bottleneck.\n  uint32_t raw_difflist_len;\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, raregeno_workspace, &group_info_iter, &raw_difflist_len);\n  if (reterr || (!raw_difflist_len)) {\n    return reterr;\n  }\n  const uint32_t subgroup_idx_last = (raw_difflist_len - 1) / kBitsPerWordD2;\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uintptr_t* raregeno_workspace_iter = raregeno_workspace;\n  uintptr_t raw_sample_idx = 0;\n  STD_ARRAY_DECL(uint32_t, 16, delta_counts);\n  STD_ARRAY_FILL0(delta_counts);\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t remaining_deltas_in_subgroup = kBitsPerWordD2 - 1;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        const int32_t incr0 = delta_counts[1] + delta_counts[2] + delta_counts[3] - delta_counts[4] - delta_counts[8] - delta_counts[12];\n        const int32_t incr1 = delta_counts[4] + delta_counts[6] + delta_counts[7] - delta_counts[1] - delta_counts[9] - delta_counts[13];\n        const int32_t incr2 = delta_counts[8] + delta_counts[9] + delta_counts[11] - delta_counts[2] - delta_counts[6] - delta_counts[14];\n        genocounts[0] += incr0;\n        genocounts[1] += incr1;\n        genocounts[2] += incr2;\n        genocounts[3] -= incr0 + incr1 + incr2;\n        return kPglRetSuccess;\n      }\n      remaining_deltas_in_subgroup &= raw_difflist_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n#ifdef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    uintptr_t cur_raregeno_word = *raregeno_workspace_iter++;\n    if (!sample_include) {\n      for (; ; --remaining_deltas_in_subgroup) {\n#ifndef __LP64__\n        if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n          return kPglRetMalformedInput;\n        }\n#endif\n        const uintptr_t cur_geno = cur_raregeno_word & 3;\n        delta_counts[cur_geno * 4 + GetNyparrEntry(ldbase_genovec, raw_sample_idx)] += 1;\n        if (!remaining_deltas_in_subgroup) {\n          break;\n        }\n        raw_sample_idx += GetVint31(fread_end, fread_pp);\n        cur_raregeno_word >>= 2;\n      }\n    } else {\n      for (; ; --remaining_deltas_in_subgroup) {\n#ifndef __LP64__\n        if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n          return kPglRetMalformedInput;\n        }\n#endif\n        if (IsSet(sample_include, raw_sample_idx)) {\n          const uintptr_t cur_geno = cur_raregeno_word & 3;\n          const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, raw_sample_idx);\n          delta_counts[cur_geno * 4 + GetNyparrEntry(ldbase_genovec, sample_idx)] += 1;\n        }\n        if (!remaining_deltas_in_subgroup) {\n          break;\n        }\n        raw_sample_idx += GetVint31(fread_end, fread_pp);\n        cur_raregeno_word >>= 2;\n      }\n    }\n  }\n}\n\nPglErr SkipDeltalistIds(const unsigned char* fread_end, const unsigned char* group_info, uint32_t difflist_len, uint32_t raw_sample_ct, uint32_t has_genotypes, const unsigned char** fread_pp) {\n  assert(difflist_len);\n  // fread_pp is a pure output parameter here\n  const uint32_t group_ct = DivUp(difflist_len, kPglDifflistGroupSize);\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  const unsigned char* extra_byte_cts = &(group_info[group_ct * sample_id_byte_ct]);\n  const uint32_t extra_byte_tot = BytesumArr(extra_byte_cts, group_ct - 1);\n\n  // (group_ct - 1) for extra_byte_cts\n  // (difflist_len + 3) / 4 for raregeno\n  // (group_ct - 1) * (kPglDifflistGroupSize - 1) + extra_byte_tot for\n  //   all but last ID block\n  // total = (group_ct - 1) * kPglDifflistGroupSize + extra_byte_tot +\n  //         (difflist_len + 3) / 4\n  const unsigned char* iddiff_start = &(extra_byte_cts[(group_ct - 1) * kPglDifflistGroupSize + extra_byte_tot]);\n  if (has_genotypes) {\n    iddiff_start = &(iddiff_start[NypCtToByteCt(difflist_len)]);\n  }\n  const unsigned char* fread_ptr = iddiff_start;\n  const unsigned char* fread_loop_stop = &(fread_end[-S_CAST(int32_t, kBytesPerWord)]);\n  uint32_t remaining_id_ct = (difflist_len - 1) % kPglDifflistGroupSize;\n#ifdef USE_SSE2\n  while (remaining_id_ct >= kBytesPerVec) {\n    if (unlikely(fread_ptr > fread_loop_stop)) {\n      return kPglRetMalformedInput;\n    }\n    const VecW vv = vecw_loadu(fread_ptr);\n    fread_ptr = &(fread_ptr[kBytesPerVec]);\n    const uint32_t highbits = vecw_movemask(vv);\n    remaining_id_ct -= kBytesPerVec - PopcountVec8thUint(highbits);\n  }\n#endif\n  while (remaining_id_ct >= kBytesPerWord) {\n    // scan a word at a time, count number of high bits set\n    if (unlikely(fread_ptr > fread_loop_stop)) {\n      return kPglRetMalformedInput;\n    }\n#ifdef USE_SSE42\n    uintptr_t ww;\n    CopyFromUnalignedIncrW(&ww, &fread_ptr);\n    ww &= 0x80 * kMask0101;\n    remaining_id_ct -= kBytesPerWord - PopcountWord(ww);\n#else\n    uintptr_t ww;\n    CopyFromUnalignedIncrW(&ww, &fread_ptr);\n    ww = (ww >> 7) & kMask0101;\n    remaining_id_ct -= kBytesPerWord - ((ww * kMask0101) >> (kBitsPerWord - 8));\n#endif\n  }\n  if (!remaining_id_ct) {\n    *fread_pp = fread_ptr;\n    return kPglRetSuccess;\n  }\n  --remaining_id_ct;\n  while (likely(fread_ptr < fread_end)) {\n    if ((*fread_ptr++) <= 127) {\n      if (!remaining_id_ct) {\n        *fread_pp = fread_ptr;\n        return kPglRetSuccess;\n      }\n      --remaining_id_ct;\n    }\n  }\n  return kPglRetMalformedInput;\n}\n\nPglErr CountparseDifflistSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, uint32_t common_geno, uint32_t raw_sample_ct, uint32_t sample_ct, const unsigned char** fread_pp, STD_ARRAY_REF(uint32_t, 4) genocounts, uintptr_t* __restrict raregeno_workspace) {\n  const unsigned char* group_info_iter;\n  uint32_t difflist_len;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, raregeno_workspace, &group_info_iter, &difflist_len);\n  STD_ARRAY_REF_FILL0(4, genocounts);\n  if (reterr || (!difflist_len)) {\n    genocounts[common_geno] = sample_ct;\n    return reterr;\n  }\n  if (raw_sample_ct == sample_ct) {\n    ZeroTrailingNyps(difflist_len, raregeno_workspace);\n    GenoarrCountFreqsUnsafe(raregeno_workspace, difflist_len, genocounts);\n    genocounts[common_geno] = sample_ct - difflist_len;\n    // bugfix (26 Mar 2019): forgot to advance fread_pp\n    return SkipDeltalistIds(fread_end, group_info_iter, difflist_len, raw_sample_ct, 1, fread_pp);\n  }\n  const uint32_t subgroup_idx_last = (difflist_len - 1) / kBitsPerWordD2;\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uintptr_t* raregeno_workspace_iter = raregeno_workspace;\n  uintptr_t raw_sample_idx = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t remaining_deltas_in_subgroup = kBitsPerWordD2 - 1;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        genocounts[common_geno] = sample_ct - genocounts[0] - genocounts[1] - genocounts[2] - genocounts[3];\n        return kPglRetSuccess;\n      }\n      remaining_deltas_in_subgroup &= difflist_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n#ifdef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    uintptr_t cur_raregeno_word = *raregeno_workspace_iter++;\n    for (; ; --remaining_deltas_in_subgroup) {\n#ifndef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      if (IsSet(sample_include, raw_sample_idx)) {\n        const uintptr_t cur_geno = cur_raregeno_word & 3;\n        genocounts[cur_geno] += 1;\n      }\n      if (!remaining_deltas_in_subgroup) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n      cur_raregeno_word >>= 2;\n    }\n  }\n}\n\n// 1-bit, unsubsetted: count 1-bit array, then count raregeno\n// 1-bit, subsetted: count [1-bit array AND sample_include], iterate through\n//   difflist\nPglErr CountparseOnebitSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, uint32_t raw_sample_ct, uint32_t sample_ct, const unsigned char** fread_pp, STD_ARRAY_REF(uint32_t, 4) genocounts, uintptr_t* __restrict raregeno_workspace) {\n  const uint32_t initial_bitarray_byte_ct = DivUp(raw_sample_ct, CHAR_BIT);\n  const unsigned char* onebit_main_iter = *fread_pp;\n  if (PtrAddCk(fread_end, initial_bitarray_byte_ct + 1, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t common2_code = *onebit_main_iter++;\n  const uint32_t geno_code_low = common2_code / 4;\n  const uint32_t geno_code_high = (common2_code & 3) + geno_code_low;\n  uint32_t high_geno_ct;\n  if (raw_sample_ct == sample_ct) {\n    high_geno_ct = PopcountBytes(onebit_main_iter, initial_bitarray_byte_ct);\n  } else {\n    high_geno_ct = PopcountBytesMasked(onebit_main_iter, sample_include, initial_bitarray_byte_ct);\n  }\n  const unsigned char* group_info_iter;\n  uint32_t difflist_len;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, raregeno_workspace, &group_info_iter, &difflist_len);\n  STD_ARRAY_REF_FILL0(4, genocounts);\n  if (reterr || (!difflist_len)) {\n    genocounts[geno_code_low] = sample_ct - high_geno_ct;\n    genocounts[geno_code_high] = high_geno_ct;\n    return reterr;\n  }\n  if (raw_sample_ct == sample_ct) {\n    ZeroTrailingNyps(difflist_len, raregeno_workspace);\n    GenoarrCountFreqsUnsafe(raregeno_workspace, difflist_len, genocounts);\n    genocounts[geno_code_low] = sample_ct - difflist_len - high_geno_ct;\n    genocounts[geno_code_high] = high_geno_ct;\n    // bugfix (26 Mar 2019): forgot to advance fread_pp\n    return SkipDeltalistIds(fread_end, group_info_iter, difflist_len, raw_sample_ct, 1, fread_pp);\n  }\n  const uint32_t subgroup_idx_last = (difflist_len - 1) / kBitsPerWordD2;\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uintptr_t* raregeno_workspace_iter = raregeno_workspace;\n  uintptr_t raw_sample_idx = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t remaining_deltas_in_subgroup = kBitsPerWordD2 - 1;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        // avoid read-after-write dependency?\n        genocounts[geno_code_low] = sample_ct - high_geno_ct - genocounts[0] - genocounts[1] - genocounts[2] - genocounts[3];\n        genocounts[geno_code_high] = high_geno_ct;\n        return kPglRetSuccess;\n      }\n      remaining_deltas_in_subgroup &= difflist_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n#ifdef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    uintptr_t cur_raregeno_word = *raregeno_workspace_iter++;\n    for (; ; --remaining_deltas_in_subgroup) {\n#ifndef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      if (IsSet(sample_include, raw_sample_idx)) {\n        const uintptr_t cur_geno = cur_raregeno_word & 3;\n        genocounts[cur_geno] += 1;\n        high_geno_ct -= IsSetUnaligned(onebit_main_iter, raw_sample_idx);\n      }\n      if (!remaining_deltas_in_subgroup) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n      cur_raregeno_word >>= 2;\n    }\n  }\n}\n\n// loads ldbase variant if necessary, guarantees pgrp->ldbase_genovec is filled\n// on return\n// only called by GetBasicGenotypeCounts(), usually LdLoadAndCopy... is better\nPglErr LdLoadGenovecSubsetIfNecessary(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp) {\n  if (LdLoadNecessary(vidx, pgrp)) {\n    const uint32_t ldbase_vidx = pgrp->ldbase_vidx;\n    const unsigned char* fread_ptr;\n    const unsigned char* fread_end;\n    if (unlikely(InitReadPtrs(ldbase_vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    const uint32_t vrtype = pgrp->fi.vrtypes[ldbase_vidx];\n    // bugfix (6 Mar 2019): ldbase_raw_genovec is only filled in (!difflist) &&\n    //   subsetting_required case; (!difflist) isn't enough\n    pgrp->ldbase_stypes = ((vrtype & 4) || (sample_ct == pgrp->fi.raw_sample_ct))? kfPgrLdcacheNyp : (kfPgrLdcacheNyp | kfPgrLdcacheRawNyp);\n    return ParseNonLdGenovecSubsetUnsafe(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, vrtype, &fread_ptr, pgrp, pgrp->ldbase_genovec);\n  }\n  if (!(pgrp->ldbase_stypes & kfPgrLdcacheNyp)) {\n    if (pgrp->ldbase_stypes & kfPgrLdcacheDifflist) {\n      PgrDifflistToGenovecUnsafe(pgrp->ldbase_raregeno, pgrp->ldbase_difflist_sample_ids, pgrp->fi.vrtypes[pgrp->ldbase_vidx] & 3, sample_ct, pgrp->ldbase_difflist_len, pgrp->ldbase_genovec);\n    } else {\n      assert(pgrp->ldbase_stypes & kfPgrLdcacheRawNyp);\n      CopyNyparrNonemptySubset(pgrp->ldbase_raw_genovec, sample_include, pgrp->fi.raw_sample_ct, sample_ct, pgrp->ldbase_genovec);\n    }\n    pgrp->ldbase_stypes |= kfPgrLdcacheNyp;\n  }\n  return kPglRetSuccess;\n}\n\nPglErr GetBasicGenotypeCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, uint32_t* unphased_het_ctp, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // genocounts[0] := ref/ref, genocounts[1] := ref/altx,\n  // genocounts[2] := altx/alty, genocounts[3] := missing\n  // If unphased_het_ctp is non-null, this assumes multiallelic hardcalls are\n  // not present, phased hardcalls are present, we aren't subsetting, and\n  // unphased_het_ct is initialized to zero.\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  assert(sample_ct);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end = nullptr;  // maybe-uninitialized warning\n  PglErr reterr;\n  if (VrtypeLdCompressed(vrtype)) {\n    // LD compression\n    reterr = LdLoadGenovecSubsetIfNecessary(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    if (!(pgrp->ldbase_stypes & kfPgrLdcacheBasicGenocounts)) {\n      ZeroTrailingNyps(sample_ct, pgrp->ldbase_genovec);\n      GenoarrCountFreqsUnsafe(pgrp->ldbase_genovec, sample_ct, pgrp->ldbase_basic_genocounts);\n      assert(pgrp->ldbase_stypes);\n      pgrp->ldbase_stypes |= kfPgrLdcacheBasicGenocounts;\n    }\n    STD_ARRAY_COPY(pgrp->ldbase_basic_genocounts, 4, genocounts);\n    reterr = LdSubsetAdjustGenocounts(fread_end, subsetting_required? sample_include : nullptr, sample_include_cumulative_popcounts, pgrp->ldbase_genovec, raw_sample_ct, &fread_ptr, genocounts, pgrp->workspace_raregeno_tmp_loadbuf);\n    if (vrtype & 1) {\n      // inverted\n      const uint32_t tmpval = genocounts[0];\n      genocounts[0] = genocounts[2];\n      genocounts[2] = tmpval;\n    }\n  } else {\n    if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n      return kPglRetReadFail;\n    }\n    const uint32_t is_ldbase = pgrp->fi.vrtypes && VrtypeLdCompressed(pgrp->fi.vrtypes[vidx + 1]);\n    if (is_ldbase) {\n      // difflists are very efficient to count directly when not subsetting\n      // (since we can entirely ignore the sample IDs), but it's often better\n      // to unpack them first when subsetting.\n\n      // ...er, the statement above is a lie, unpack-first almost always seems\n      // to be better.\n      pgrp->ldbase_vidx = vidx;\n      // this may be slowed down by the LD caching change.\n      reterr = ParseNonLdGenovecSubsetUnsafe(fread_end, sample_include, sample_include_cumulative_popcounts, sample_ct, vrtype, &fread_ptr, pgrp, pgrp->ldbase_genovec);\n      ZeroTrailingNyps(sample_ct, pgrp->ldbase_genovec);\n      GenoarrCountFreqsUnsafe(pgrp->ldbase_genovec, sample_ct, genocounts);\n      STD_ARRAY_COPY(genocounts, 4, pgrp->ldbase_basic_genocounts);\n      pgrp->ldbase_stypes = (subsetting_required && (!(vrtype & 4)))? (kfPgrLdcacheNyp | kfPgrLdcacheRawNyp | kfPgrLdcacheBasicGenocounts) : (kfPgrLdcacheNyp | kfPgrLdcacheBasicGenocounts);\n    } else if (vrtype & 4) {\n      const uint32_t vrtype_low2 = vrtype & 3;\n      if (vrtype_low2 != 1) {\n        reterr = CountparseDifflistSubset(fread_end, sample_include, vrtype & 3, raw_sample_ct, sample_ct, &fread_ptr, genocounts, pgrp->workspace_raregeno_tmp_loadbuf);\n      } else {\n        genocounts[0] = sample_ct;\n        genocounts[1] = 0;\n        genocounts[2] = 0;\n        genocounts[3] = 0;\n        reterr = kPglRetSuccess;\n      }\n    } else if (vrtype & 1) {\n      reterr = CountparseOnebitSubset(fread_end, sample_include, raw_sample_ct, sample_ct, &fread_ptr, genocounts, pgrp->workspace_raregeno_tmp_loadbuf);\n    } else {\n      const uint32_t genovec_byte_ct = NypCtToByteCt(raw_sample_ct);\n      const unsigned char* genoarrb = fread_ptr;\n      if (PtrAddCk(fread_end, genovec_byte_ct, &fread_ptr)) {\n        return kPglRetMalformedInput;\n      }\n      if (!subsetting_required) {\n        const uint32_t genoarrb_is_unaligned = R_CAST(uintptr_t, genoarrb) % kBytesPerVec;\n        if (genoarrb_is_unaligned) {\n          GenoarrbCountFreqs(genoarrb, raw_sample_ct, genocounts);\n        } else {\n          GenoarrCountFreqs(R_CAST(const uintptr_t*, genoarrb), raw_sample_ct, genocounts);\n        }\n      } else {\n        GenoarrbCountSubsetFreqs(genoarrb, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);\n      }\n      if (vrtype == kPglVrtypePlink1) {\n        // [3] -> [0]\n        // [2] -> [1]\n        // [1] -> [3]\n        // [0] -> [2]\n        const uint32_t save2 = genocounts[0];\n        const uint32_t save3 = genocounts[1];\n        genocounts[0] = genocounts[3];\n        genocounts[1] = genocounts[2];\n        genocounts[2] = save2;\n        genocounts[3] = save3;\n      }\n      reterr = kPglRetSuccess;\n    }\n  }\n  if ((!unphased_het_ctp) || reterr) {\n    return reterr;\n  }\n  assert((!subsetting_required) && ((vrtype & 0x18) == 0x10));\n  const uint32_t het_ct = genocounts[1];\n  const uint32_t aux2_first_part_byte_ct = 1 + (het_ct / CHAR_BIT);\n  if (PtrCheck(fread_end, fread_ptr, aux2_first_part_byte_ct)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t explicit_phasepresent = fread_ptr[0] & 1;\n  if (explicit_phasepresent) {\n    // otherwise initial value if 0 is correct\n    *unphased_het_ctp = het_ct + 1 - PopcountBytes(fread_ptr, aux2_first_part_byte_ct);\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgrGetCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  if (!sample_ct) {\n    STD_ARRAY_REF_FILL0(4, genocounts);\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  return GetBasicGenotypeCounts(sample_include, sample_include_interleaved_vec, GetSicp(pssi), sample_ct, vidx, pgrp, nullptr, genocounts);\n}\n\nuint32_t CountNypVec6(const unsigned char* nyp_vvec_biter, uintptr_t nyp_word, uint32_t vec_ct) {\n  assert(!(vec_ct % 6));\n  const VecW m0 = vecw_setzero();\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW xor_vvec = vecw_set1(nyp_word);\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 60;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 60) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const unsigned char* nyp_vvec_stop = &(nyp_vvec_biter[cur_incr * kBytesPerVec]);\n    do {\n      VecW loader1 = vecw_loadu(nyp_vvec_biter) ^ xor_vvec;\n      nyp_vvec_biter += kBytesPerVec;\n      VecW loader2 = vecw_loadu(nyp_vvec_biter) ^ xor_vvec;\n      nyp_vvec_biter += kBytesPerVec;\n      VecW count1 = vecw_and_notfirst(vecw_srli(loader1, 1) | loader1, m1);\n      VecW count2 = vecw_and_notfirst(vecw_srli(loader2, 1) | loader2, m1);\n\n      loader1 = vecw_loadu(nyp_vvec_biter) ^ xor_vvec;\n      nyp_vvec_biter += kBytesPerVec;\n      loader2 = vecw_loadu(nyp_vvec_biter) ^ xor_vvec;\n      nyp_vvec_biter += kBytesPerVec;\n      count1 = count1 + vecw_and_notfirst(vecw_srli(loader1, 1) | loader1, m1);\n      count2 = count2 + vecw_and_notfirst(vecw_srli(loader2, 1) | loader2, m1);\n\n      loader1 = vecw_loadu(nyp_vvec_biter) ^ xor_vvec;\n      nyp_vvec_biter += kBytesPerVec;\n      loader2 = vecw_loadu(nyp_vvec_biter) ^ xor_vvec;\n      nyp_vvec_biter += kBytesPerVec;\n      count1 = count1 + vecw_and_notfirst(vecw_srli(loader1, 1) | loader1, m1);\n      count2 = count2 + vecw_and_notfirst(vecw_srli(loader2, 1) | loader2, m1);\n\n      count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);\n      count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);\n      inner_acc = inner_acc + (count1 & m4) + (vecw_srli(count1, 4) & m4);\n    } while (nyp_vvec_biter < nyp_vvec_stop);\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\n// Ok for nyparr to be unaligned.  Ok if unsafe to read trailing bytes of\n// nyparr.\nuint32_t CountNyp(const void* nyparr, uintptr_t nyp_word, uint32_t nyp_ct) {\n  const unsigned char* nyparr_uc = S_CAST(const unsigned char*, nyparr);\n  const uint32_t fullword_ct = nyp_ct / kBitsPerWordD2;\n  uint32_t word_idx = fullword_ct - (fullword_ct % (6 * kWordsPerVec));\n  uint32_t tot = CountNypVec6(nyparr_uc, nyp_word, word_idx / kWordsPerVec);\n  for (; word_idx != fullword_ct; ++word_idx) {\n    uintptr_t cur_word;\n    CopyFromUnalignedOffsetW(&cur_word, nyparr_uc, word_idx);\n    tot += Popcount01Word(Word00(cur_word ^ nyp_word));\n  }\n  const uint32_t trailing_nyp_ct = nyp_ct % kBitsPerWordD2;\n  if (trailing_nyp_ct) {\n    const uint32_t trailing_byte_ct = DivUp(trailing_nyp_ct, (CHAR_BIT / 2));\n    uintptr_t cur_word = SubwordLoad(&(nyparr_uc[fullword_ct * kBytesPerWord]), trailing_byte_ct) ^ nyp_word;\n    cur_word = bzhi(Word00(cur_word), trailing_nyp_ct * 2);\n    tot += Popcount01Word(cur_word);\n  }\n  return tot;\n}\n\n/*\nuint32_t CountNypSubsetVec6(const VecW* __restrict nyp_vvec, const VecW* __restrict interleaved_mask_vvec, uintptr_t nyp_word, uint32_t vec_ct) {\n  assert(!(vec_ct % 6));\n  const VecW m0 = vecw_setzero();\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW xor_vvec = vecw_set1(nyp_word);\n  const VecW* nyp_vvec_iter = nyp_vvec;\n  const VecW* interleaved_mask_vvec_iter = interleaved_mask_vvec;\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 60;\n  while (1) {\n    if (vec_ct < 60) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const VecW* nyp_vvec_stop = &(nyp_vvec_iter[cur_incr]);\n    vec_ct -= cur_incr;\n    do {\n      VecW mask1 = *interleaved_mask_vvec_iter++;\n      VecW loader1 = vecw_loadu(nyp_vvec_iter++) ^ xor_vvec;\n      VecW mask2 = vecw_srli(mask1, 1) & m1;\n      VecW loader2 = vecw_loadu(nyp_vvec_iter++) ^ xor_vvec;\n      mask1 = mask1 & m1;\n      VecW count1 = vecw_and_notfirst(vecw_srli(loader1, 1) | loader1, mask1);\n      VecW count2 = vecw_and_notfirst(vecw_srli(loader2, 1) | loader2, mask2);\n\n      mask1 = *interleaved_mask_vvec_iter++;\n      loader1 = vecw_loadu(nyp_vvec_iter++) ^ xor_vvec;\n      mask2 = vecw_srli(mask1, 1) & m1;\n      loader2 = vecw_loadu(nyp_vvec_iter++) ^ xor_vvec;\n      mask1 = mask1 & m1;\n      count1 = count1 + vecw_and_notfirst(vecw_srli(loader1, 1) | loader1, mask1);\n      count2 = count2 + vecw_and_notfirst(vecw_srli(loader2, 1) | loader2, mask2);\n\n      mask1 = *interleaved_mask_vvec_iter++;\n      loader1 = vecw_loadu(nyp_vvec_iter++) ^ xor_vvec;\n      mask2 = vecw_srli(mask2, 1) & m1;\n      loader2 = vecw_loadu(nyp_vvec_iter++) ^ xor_vvec;\n      mask1 = mask1 & m1;\n      count1 = count1 + vecw_and_notfirst(vecw_srli(loader1, 1) | loader1, mask1);\n      count2 = count2 + vecw_and_notfirst(vecw_srli(loader2, 1) | loader2, mask2);\n\n      count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);\n      count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);\n      inner_acc = inner_acc + (count1 & m4) + (vecw_srli(count1, 4) & m4);\n    } while (nyp_vvec_iter < nyp_vvec_stop);\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\nuint32_t CountNypSubset(const uintptr_t* __restrict nypvec, const uintptr_t* __restrict interleaved_vec, uintptr_t nyp_word, uint32_t raw_nyp_ct) {\n  // simplified GenoarrCountSubsetFreqs()\n  const uint32_t raw_nyp_ctv2 = NypCtToVecCt(raw_nyp_ct);\n#ifdef __LP64__\n  uint32_t vec_idx = raw_nyp_ctv2 - (raw_nyp_ctv2 % 6);\n  uint32_t tot = CountNypSubsetVec6(R_CAST(const VecW*, nypvec), R_CAST(const VecW*, interleaved_vec), nyp_word, vec_idx);\n  const uintptr_t* nypvec_iter = &(nypvec[kWordsPerVec * vec_idx]);\n  const uintptr_t* interleaved_mask_iter = &(interleaved_vec[(kWordsPerVec / 2) * vec_idx]);\n#  ifdef USE_AVX2\n  uintptr_t mask_base1 = 0;\n  uintptr_t mask_base2 = 0;\n  uintptr_t mask_base3 = 0;\n  uintptr_t mask_base4 = 0;\n  for (; vec_idx != raw_nyp_ctv2; ++vec_idx) {\n    uintptr_t mask_word1;\n    uintptr_t mask_word2;\n    uintptr_t mask_word3;\n    uintptr_t mask_word4;\n    if (!(vec_idx % 2)) {\n      mask_base1 = *interleaved_mask_iter++;\n      mask_base2 = *interleaved_mask_iter++;\n      mask_base3 = *interleaved_mask_iter++;\n      mask_base4 = *interleaved_mask_iter++;\n      mask_word1 = mask_base1 & kMask5555;\n      mask_word2 = mask_base2 & kMask5555;\n      mask_word3 = mask_base3 & kMask5555;\n      mask_word4 = mask_base4 & kMask5555;\n    } else {\n      mask_word1 = (mask_base1 >> 1) & kMask5555;\n      mask_word2 = (mask_base2 >> 1) & kMask5555;\n      mask_word3 = (mask_base3 >> 1) & kMask5555;\n      mask_word4 = (mask_base4 >> 1) & kMask5555;\n    }\n    uint32_t uii = 0;\n    while (1) {\n      const uintptr_t cur_geno_word1 = (*nypvec_iter++) ^ nyp_word;\n      const uintptr_t cur_geno_word2 = (*nypvec_iter++) ^ nyp_word;\n      const uintptr_t masked1 = mask_word1 & (~(cur_geno_word1 | (cur_geno_word1 >> 1)));\n      const uintptr_t masked2 = mask_word2 & (~(cur_geno_word2 | (cur_geno_word2 >> 1)));\n      tot += PopcountWord((masked1 << 1) | masked2);\n      if (uii) {\n        break;\n      }\n      ++uii;\n      mask_word1 = mask_word3;\n      mask_word2 = mask_word4;\n    }\n  }\n#  else  // not USE_AVX2\n  uintptr_t mask_base1 = 0;\n  uintptr_t mask_base2 = 0;\n  for (; vec_idx != raw_nyp_ctv2; ++vec_idx) {\n    uintptr_t mask_word1;\n    uintptr_t mask_word2;\n    if (!(vec_idx % 2)) {\n      mask_base1 = *interleaved_mask_iter++;\n      mask_base2 = *interleaved_mask_iter++;\n      mask_word1 = mask_base1 & kMask5555;\n      mask_word2 = mask_base2 & kMask5555;\n    } else {\n      mask_word1 = (mask_base1 >> 1) & kMask5555;\n      mask_word2 = (mask_base2 >> 1) & kMask5555;\n    }\n    const uintptr_t cur_geno_word1 = (*nypvec_iter++) ^ nyp_word;\n    const uintptr_t cur_geno_word2 = (*nypvec_iter++) ^ nyp_word;\n    const uintptr_t masked1 = mask_word1 & (~(cur_geno_word1 | (cur_geno_word1 >> 1)));\n    const uintptr_t masked2 = mask_word2 & (~(cur_geno_word2 | (cur_geno_word2 >> 1)));\n#    ifdef USE_SSE42\n    tot += PopcountWord((masked1 << 1) | masked2);\n#    else\n    tot += NypsumWord(masked1 + masked2);\n#    endif\n  }\n#  endif  // not USE_AVX2\n#else  // not __LP64__\n  uint32_t word_idx = raw_nyp_ctv2 - (raw_nyp_ctv2 % 6);\n  uint32_t tot = CountNypSubsetVec6(R_CAST(const VecW*, nypvec), R_CAST(const VecW*, interleaved_vec), nyp_word, word_idx);\n  const uintptr_t* interleaved_mask_iter = &(interleaved_vec[word_idx / 2]);\n  uintptr_t mask_base = 0;\n  for (; word_idx != raw_nyp_ctv2; ++word_idx) {\n    uintptr_t mask_word;\n    if (!(word_idx % 2)) {\n      mask_base = *interleaved_mask_iter++;\n      mask_word = mask_base & kMask5555;\n    } else {\n      mask_word = (mask_base >> 1) & kMask5555;\n    }\n    const uintptr_t cur_geno_word = nypvec[word_idx] ^ nyp_word;\n    const uintptr_t masked = mask_word & (~(cur_geno_word | (cur_geno_word >> 1)));\n    tot += Popcount01Word(masked);\n  }\n#endif\n  return tot;\n}\n*/\n\n// similar to ParseAndSaveDifflist()\nPglErr ParseAndSaveDeltalist(const unsigned char* fread_end, uint32_t raw_sample_ct, const unsigned char** fread_pp, uint32_t* __restrict deltalist, uint32_t* __restrict deltalist_len_ptr) {\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, deltalist_len_ptr);\n  const uint32_t deltalist_len = *deltalist_len_ptr;\n  if (reterr || (!deltalist_len)) {\n    return reterr;\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  const uint32_t group_idx_last = (deltalist_len - 1) / kPglDifflistGroupSize;\n  uint32_t* deltalist_iter = deltalist;\n  uint32_t group_len_m1 = kPglDifflistGroupSize - 1;\n  for (uint32_t group_idx = 0; ; ++group_idx) {\n    if (group_idx >= group_idx_last) {\n      if (group_idx > group_idx_last) {\n        return kPglRetSuccess;\n      }\n      group_len_m1 &= deltalist_len - 1;\n    }\n    uintptr_t raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n    group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    for (uint32_t raw_deltalist_idx_lowbits = 0; ; ++raw_deltalist_idx_lowbits) {\n      // always check, otherwise we may scribble over arbitrary memory\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n      deltalist_iter[raw_deltalist_idx_lowbits] = raw_sample_idx;\n      if (raw_deltalist_idx_lowbits == group_len_m1) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n    deltalist_iter = &(deltalist_iter[group_len_m1 + 1]);\n  }\n}\n\nPglErr CountDeltalistIntersect(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, uint32_t raw_sample_ct, const unsigned char** fread_pp, uint32_t* __restrict intersect_ctp, uint32_t* __restrict raw_deltalist_len_ptr) {\n  // Requires a PROPER subset.\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, raw_deltalist_len_ptr);\n  const uint32_t raw_deltalist_len = *raw_deltalist_len_ptr;\n  if (reterr || (!raw_deltalist_len)) {\n    *intersect_ctp = 0;\n    return reterr;\n  }\n  const uint32_t group_idx_last = (raw_deltalist_len - 1) / kPglDifflistGroupSize;\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  uintptr_t intersect_ct = 0;\n\n  // technically doesn't need to be initialized, but I have principles\n  uintptr_t raw_sample_idx = 0;\n\n  uint32_t group_len_m1 = kPglDifflistGroupSize - 1;\n  for (uint32_t group_idx = 0; ; ++group_idx) {\n    if (group_idx >= group_idx_last) {\n      if (group_idx > group_idx_last) {\n        *intersect_ctp = intersect_ct;\n        return kPglRetSuccess;\n      }\n      group_len_m1 &= raw_deltalist_len - 1;\n    }\n    // We need to pull a raw sample index from the deltalist header every 64\n    // entries.\n#ifdef __LP64__\n    if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n      return kPglRetMalformedInput;\n    }\n#endif\n    raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n    group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    for (uint32_t raw_deltalist_idx_lowbits = 0; ; ++raw_deltalist_idx_lowbits) {\n#ifndef __LP64__\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n#endif\n      intersect_ct += IsSet(sample_include, raw_sample_idx);\n      if (raw_deltalist_idx_lowbits == group_len_m1) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n  }\n}\n\nuint32_t CountAux1aDense(const unsigned char* patch_01_fvals, uint32_t allele_ct, uint32_t allele_idx, uint32_t raw_01_ct, uint32_t rare01_ct) {\n  // The 'f' in patch_01_fset/patch_01_fvals is to distinguish the in-file\n  // representation from the returned AlleleCode*-based representation.\n  if (allele_idx == 1) {\n    // safe to ignore allele codes\n    return raw_01_ct - rare01_ct;\n  }\n  if (allele_ct < 5) {\n    if (allele_ct == 3) {\n      return rare01_ct;\n    }\n    // need to count matches\n    const uint32_t allele_code_byte_ct = DivUp(rare01_ct, 8);\n    const uint32_t alt3_ct = PopcountBytes(patch_01_fvals, allele_code_byte_ct);\n    if (allele_idx == 3) {\n      return alt3_ct;\n    }\n    return rare01_ct - alt3_ct;\n  }\n  if (allele_ct < 19) {\n    if (allele_ct < 7) {\n      return CountNyp(patch_01_fvals, (allele_idx - 2) * kMask5555, rare01_ct);\n    }\n    return CountNybble(patch_01_fvals, (allele_idx - 2) * kMask1111, rare01_ct);\n  }\n  return CountByte(patch_01_fvals, allele_idx - 2, rare01_ct);\n}\n\nuint32_t GetAux1aWidth(uint32_t allele_ct) {\n  if (allele_ct < 7) {\n    if (allele_ct < 5) {\n      return allele_ct - 3;\n    }\n    return 2;\n  }\n  if (allele_ct < 19) {\n    return 4;\n  }\n  return 8;\n}\n\n// Returns allele_code_width.  Other return values are inaccurate for allele_ct\n// == 3, since it's assumed that they're unused in that case.\nuint32_t GetAux1aConsts(uint32_t allele_ct, uintptr_t* detect_mask_hi_ptr, uintptr_t* detect_mask_lo_ptr, uint32_t* allele_code_logwidth_ptr) {\n  if (allele_ct < 7) {\n    if (allele_ct < 5) {\n      *detect_mask_hi_ptr = ~k0LU;\n      *detect_mask_lo_ptr = ~k0LU;\n      *allele_code_logwidth_ptr = 0;\n      return allele_ct - 3;\n    }\n    *detect_mask_hi_ptr = kMaskAAAA;\n    *detect_mask_lo_ptr = kMask5555;\n    *allele_code_logwidth_ptr = 1;\n    return 2;\n  }\n  if (allele_ct < 19) {\n    *detect_mask_hi_ptr = kMask1111 * 8;\n    *detect_mask_lo_ptr = kMask1111;\n    *allele_code_logwidth_ptr = 2;\n    return 4;\n  }\n  *detect_mask_hi_ptr = kMask0101 * 0x80;\n  *detect_mask_lo_ptr = kMask0101;\n  *allele_code_logwidth_ptr = 3;\n  return 8;\n}\n\n// Advances *fread_pp past aux1a, and sets *het_ctp to the number of ref-altx\n// hets where x == allele_idx in sample_include.  (If allele_idx == 1, *het_ctp\n// is raw_01_ct - [# of aux1a entries] when there's no subsetting.)\n// Note that raw_01_ct must be an un-subsetted count.\n// Ok for subsetted_01_ct to be uninitialized if not subsetting, or allele_idx\n// != 1.\n// sample_include assumed to be nullptr if no subsetting required\nPglErr CountAux1a(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uintptr_t* __restrict raw_genoarr, uint32_t aux1a_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t allele_idx, uint32_t raw_01_ct, uint32_t subsetted_01_ct, const unsigned char** fread_pp, uint32_t* __restrict het_ctp, uint32_t* __restrict deltalist_workspace) {\n  if (aux1a_mode == 15) {\n    if (allele_idx == 1) {\n      if (sample_include) {\n        *het_ctp = subsetted_01_ct;\n      } else {\n        *het_ctp = raw_01_ct;\n      }\n    } else {\n      *het_ctp = 0;\n    }\n    return kPglRetSuccess;\n  }\n  const uint32_t ignore_01_fvals = (allele_idx == 1) || (allele_ct == 3);\n  uintptr_t detect_mask_hi;\n  uintptr_t detect_mask_lo;\n  uint32_t allele_code_logwidth;\n  const uint32_t allele_code_width = GetAux1aConsts(allele_ct, &detect_mask_hi, &detect_mask_lo, &allele_code_logwidth);\n  const uintptr_t xor_word = (allele_idx - 2) * detect_mask_lo;\n  if (!aux1a_mode) {\n    // 01-collapsed bitarray\n    const uint32_t fset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n    const uint32_t rare01_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    const unsigned char* patch_01_fset = *fread_pp;\n    *fread_pp += fset_byte_ct;\n    const unsigned char* patch_01_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    if (!sample_include) {\n      *het_ctp = CountAux1aDense(patch_01_fvals, allele_ct, allele_idx, raw_01_ct, rare01_ct);\n      return kPglRetSuccess;\n    }\n    const Halfword* sample_include_hw = DowncastKWToHW(sample_include);\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_hets = Word01(raw_genoarr[0]);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t subsetted_hetx_ct = 0;\n    uint32_t loop_len = kBitsPerWord;\n    uint32_t rare01_lowbits = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_01_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_01_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_01_fset, fset_widx);\n      }\n      // format 0, sample_include non-null\n      if (ignore_01_fvals) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_hets) {\n            cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            // Considered replacing cur_raw_genoarr_hets with the result of\n            // two PackWordToHalfword() operations, since that keeps all\n            // the sample word-indexes aligned.  Couldn't justify it given\n            // the expected sparsity of this case, though.\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n            subsetted_hetx_ct += (sample_include_hw[sample_hwidx] >> sample_uidx_lowbits) & 1;\n          }\n          cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_hets) {\n            cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare01_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n              }\n              fvals_bits = fvals_bits ^ xor_word;\n              fvals_bits = (detect_mask_hi & (~(fvals_bits | ((fvals_bits | detect_mask_hi) - detect_mask_lo)))) >> (allele_code_width - 1);\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare01_lowbits = 0;\n            }\n            if (fvals_bits & (k1LU << rare01_lowbits)) {\n              const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n              subsetted_hetx_ct += (sample_include_hw[sample_hwidx] >> sample_uidx_lowbits) & 1;\n            }\n            rare01_lowbits += allele_code_width;\n          }\n          cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n    if (allele_idx == 1) {\n      *het_ctp = subsetted_01_ct - subsetted_hetx_ct;\n    } else {\n      *het_ctp = subsetted_hetx_ct;\n    }\n    return kPglRetSuccess;\n  }\n  // mode 1: difflist.\n  if (!sample_include) {\n    const unsigned char* group_info_iter;\n    uint32_t rare01_ct;\n    PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare01_ct);\n    // rare01_ct == 0 should be impossible\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = SkipDeltalistIds(fread_end, group_info_iter, rare01_ct, raw_sample_ct, 1, fread_pp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const unsigned char* patch_01_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n\n    *het_ctp = CountAux1aDense(patch_01_fvals, allele_ct, allele_idx, raw_01_ct, rare01_ct);\n    return kPglRetSuccess;\n  }\n  if (ignore_01_fvals) {\n    // Don't need to save deltalist contents in this case.\n    uint32_t subsetted_hetx_ct;\n    uint32_t rare01_ct;\n    PglErr reterr = CountDeltalistIntersect(fread_end, sample_include, raw_sample_ct, fread_pp, &subsetted_hetx_ct, &rare01_ct);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (allele_idx == 1) {\n      *het_ctp = subsetted_01_ct - subsetted_hetx_ct;\n      const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n      if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n        return kPglRetMalformedInput;\n      }\n    } else {\n      *het_ctp = subsetted_hetx_ct;\n    }\n    return kPglRetSuccess;\n  }\n  // Save deltalist elements, iterate.\n  uint32_t rare01_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare01_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_01_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  uint32_t subsetted_hetx_ct = 0;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n    }\n    fvals_bits = fvals_bits ^ xor_word;\n    fvals_bits = detect_mask_hi & (~(fvals_bits | ((fvals_bits | detect_mask_hi) - detect_mask_lo)));\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare01_ct << allele_code_logwidth, kBitsPerWord));\n    }\n    if (!fvals_bits) {\n      continue;\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - allele_code_logwidth)]);\n    do {\n      const uint32_t rare01_idx_lowbits = ctzw(fvals_bits) >> allele_code_logwidth;\n      const uint32_t sample_uidx = cur_deltalist_base[rare01_idx_lowbits];\n      subsetted_hetx_ct += IsSet(sample_include, sample_uidx);\n      fvals_bits &= fvals_bits - 1;\n    } while (fvals_bits);\n  }\n  *het_ctp = subsetted_hetx_ct;\n  return kPglRetSuccess;\n}\n\nvoid CountAux1bDense(const unsigned char* patch_10_fvals, uint32_t allele_ct, uint32_t allele_idx_m1, uint32_t raw_10_ct, uint32_t rare10_ct, uint32_t* __restrict het_ctp, uint32_t* __restrict hom_ctp) {\n  uint32_t matching_hom_ct = 0;\n  uint32_t het_incr;\n  if (allele_ct < 6) {\n    if (allele_ct == 3) {\n      const uint32_t allele_code_byte_ct = DivUp(rare10_ct, 8);\n      matching_hom_ct = PopcountBytes(patch_10_fvals, allele_code_byte_ct);\n      het_incr = rare10_ct - matching_hom_ct;\n    } else {\n      // 2+2 bits\n      het_incr = CountNyp(patch_10_fvals, allele_idx_m1 * kMask5555, rare10_ct * 2);\n      if (allele_idx_m1) {\n        matching_hom_ct = CountNybble(patch_10_fvals, allele_idx_m1 * kMask5555, rare10_ct);\n      }\n    }\n  } else {\n    if (allele_ct < 18) {\n      // 4+4 bits\n      het_incr = CountNybble(patch_10_fvals, allele_idx_m1 * kMask1111, rare10_ct * 2);\n      if (allele_idx_m1) {\n        matching_hom_ct = CountByte(patch_10_fvals, allele_idx_m1 * 0x11, rare10_ct);\n      }\n    } else {\n      // 8+8 bits\n      het_incr = CountByte(patch_10_fvals, allele_idx_m1 * 0x11, rare10_ct * 2);\n      if (allele_idx_m1) {\n        matching_hom_ct = CountU16(patch_10_fvals, allele_idx_m1 * 0x1111, rare10_ct);\n      }\n    }\n  }\n  if (!allele_idx_m1) {\n    *hom_ctp = raw_10_ct - rare10_ct;\n  } else {\n    het_incr -= 2 * matching_hom_ct;\n    *hom_ctp = matching_hom_ct;\n  }\n  *het_ctp += het_incr;\n}\n\n// Returns allele_code_logwidth.\nuint32_t GetAux1bConsts(uint32_t allele_ct, uintptr_t* detect_hom_mask_lo_ptr) {\n  if (allele_ct < 6) {\n    if (allele_ct == 3) {\n      *detect_hom_mask_lo_ptr = ~k0LU;\n      return 0;\n    }\n    *detect_hom_mask_lo_ptr = kMask1111;\n    return 1;\n  }\n  if (allele_ct < 18) {\n    *detect_hom_mask_lo_ptr = kMask0101;\n    return 2;\n  }\n  *detect_hom_mask_lo_ptr = kMask0001;\n  return 3;\n}\n\n// Advances *fread_pp past aux1b; increments *het_ctp by the number of\n// altx-alty genotypes in aux1b and sample_include with one allele ==\n// allele_idx; and sets *hom_ctp to the number of such hom-allele_idx genotypes\n// present.  (For allele_idx == 1, *hom_ctp is equal to raw_10_ct -\n// <# of aux1b entries> when there's no subsetting.)\n// Trailing bits of raw_genoarr must be cleared.\n// Ok for subsetted_10_ct to be uninitialized if not subsetting, or allele_idx\n// != 1.\n// sample_include assumed to be nullptr if no subsetting required\nPglErr CountAux1b(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t allele_idx, uint32_t raw_10_ct, uint32_t subsetted_10_ct, const unsigned char** fread_pp, uint32_t* __restrict het_ctp, uint32_t* __restrict hom_ctp, uint32_t* __restrict deltalist_workspace) {\n  if (aux1b_mode == 15) {\n    if (allele_idx == 1) {\n      if (sample_include) {\n        *hom_ctp = subsetted_10_ct;\n      } else {\n        *hom_ctp = raw_10_ct;\n      }\n    } else {\n      *hom_ctp = 0;\n    }\n    return kPglRetSuccess;\n  }\n  uintptr_t detect_hom_mask_lo;\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t allele_code_width = 1U << allele_code_logwidth;\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const uint32_t code10_width = 1U << code10_logwidth;\n  const uint32_t allele_idx_m1 = allele_idx - 1;\n  uint32_t rare10_lowbits = kBitsPerWord;\n  if (!aux1b_mode) {\n    // 10-collapsed bitarray\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n    const uint32_t rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    const unsigned char* patch_10_fset = *fread_pp;\n    *fread_pp += fset_byte_ct;\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) * code10_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    if (!sample_include) {\n      CountAux1bDense(patch_10_fvals, allele_ct, allele_idx_m1, raw_10_ct, rare10_ct, het_ctp, hom_ctp);\n      return kPglRetSuccess;\n    }\n    const Halfword* sample_include_hw = DowncastKWToHW(sample_include);\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_xys = Word10(raw_genoarr[0]);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t loop_len = kBitsPerWord;\n    if ((!allele_idx_m1) || (allele_ct == 3)) {\n      // bugfix (29 Dec 2019)\n      const uintptr_t detect_alt1_mask_hi = detect_hom_mask_lo << (allele_code_width - 1);\n      uint32_t subsetted_rare10_ct = 0;\n      uint32_t het_1x_ct = 0;\n      for (uint32_t fset_widx = 0; ; ++fset_widx) {\n        uintptr_t fset_bits;\n        if (fset_widx >= fset_word_ct_m1) {\n          if (fset_widx > fset_word_ct_m1) {\n            break;\n          }\n          fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n          loop_len = ModNz(raw_10_ct, kBitsPerWord);\n        } else {\n          CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n        }\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // This sets each fvals_bits entry to 1 iff the patch genotype is\n              // ALT1-ALTx, i.e. the original low bits were zero.\n              fvals_bits = (detect_alt1_mask_hi & (~(fvals_bits | ((fvals_bits | detect_alt1_mask_hi) - detect_hom_mask_lo)))) >> (allele_code_width - 1);\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n            if (sample_include_hw[sample_hwidx] & (1U << sample_uidx_lowbits)) {\n              ++subsetted_rare10_ct;\n              het_1x_ct += (fvals_bits >> rare10_lowbits) & 1;\n            }\n            rare10_lowbits += code10_width;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n      if (allele_ct == 3) {\n        if (allele_idx_m1) {\n          *hom_ctp = subsetted_rare10_ct - het_1x_ct;\n          *het_ctp += het_1x_ct;\n          return kPglRetSuccess;\n        }\n      }\n      *hom_ctp = subsetted_10_ct - subsetted_rare10_ct;\n      *het_ctp += het_1x_ct;\n      return kPglRetSuccess;\n    }\n    // allele_idx > 1, allele_ct > 3\n    const uintptr_t detect_all_mask_lo = detect_hom_mask_lo | (detect_hom_mask_lo << allele_code_width);\n    const uintptr_t detect_all_mask_hi = detect_all_mask_lo << (allele_code_width - 1);\n    const uintptr_t xor_word = allele_idx_m1 * detect_all_mask_lo;\n    uint32_t matching_allele_ct = 0;  // 2x hom + 1x het\n    uint32_t matching_het_or_hom_ct = 0;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_10_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n      }\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        while (!cur_raw_genoarr_xys) {\n          cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n        }\n        if (fset_bits & 1) {\n          if (rare10_lowbits == kBitsPerWord) {\n            if (fvals_widx == fvals_word_ct_m1) {\n              fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n            } else {\n              CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n            }\n            fvals_bits ^= xor_word;\n            fvals_bits = (detect_all_mask_hi & (~(fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)))) >> (allele_code_width - 1);\n            // unnecessary to apply bzhi or detect_hom_mask_lo here\n            fvals_bits = fvals_bits + (fvals_bits >> allele_code_width);\n            ++fvals_widx;\n            rare10_lowbits = 0;\n          }\n          const uintptr_t cur_hit_ct = (fvals_bits >> rare10_lowbits) & 3;\n          rare10_lowbits += code10_width;\n          if (cur_hit_ct) {\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n            if (sample_include_hw[sample_hwidx] & (1U << sample_uidx_lowbits)) {\n              ++matching_het_or_hom_ct;\n              matching_allele_ct += cur_hit_ct;\n            }\n          }\n        }\n        cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n        fset_bits = fset_bits >> 1;\n      }\n    }\n    const uint32_t matching_hom_ct = matching_allele_ct - matching_het_or_hom_ct;\n    *hom_ctp = matching_hom_ct;\n    *het_ctp += matching_het_or_hom_ct - matching_hom_ct;\n    return kPglRetSuccess;\n  }\n  // mode 1: difflist.\n  if (!sample_include) {\n    const unsigned char* group_info_iter;\n    uint32_t rare10_ct;\n    PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare10_ct);\n    // rare10_ct == 0 should be impossible\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = SkipDeltalistIds(fread_end, group_info_iter, rare10_ct, raw_sample_ct, 0, fread_pp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, CHAR_BIT);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    CountAux1bDense(patch_10_fvals, allele_ct, allele_idx_m1, raw_10_ct, rare10_ct, het_ctp, hom_ctp);\n    return kPglRetSuccess;\n  }\n  // Save deltalist elements, iterate.\n  uint32_t rare10_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare10_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, CHAR_BIT);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  if ((!allele_idx_m1) || (allele_ct == 3)) {\n    const uintptr_t detect_alt1_mask_hi = detect_hom_mask_lo << (allele_code_width - 1);\n    uint32_t subsetted_rare10_ct = 0;\n    uint32_t het_1x_ct = 0;\n    uint32_t loop_len = kBitsPerWord >> code10_logwidth;\n    for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n      uintptr_t fvals_bits;\n      if (fvals_widx >= fvals_word_ct_m1) {\n        if (fvals_widx > fvals_word_ct_m1) {\n          break;\n        }\n        fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n        loop_len = 1 + ((rare10_ct - 1) & ((kBitsPerWord >> code10_logwidth) - 1));\n      } else {\n        CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n      }\n      fvals_bits = (detect_alt1_mask_hi & (~(fvals_bits | ((fvals_bits | detect_alt1_mask_hi) - detect_hom_mask_lo)))) >> (allele_code_width - 1);\n      const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uint32_t sample_uidx = cur_deltalist_base[uii];\n        if (IsSet(sample_include, sample_uidx)) {\n          ++subsetted_rare10_ct;\n          het_1x_ct += (fvals_bits >> (uii << code10_logwidth)) & 1;\n        }\n      }\n    }\n    if (allele_ct == 3) {\n      if (allele_idx_m1) {\n        *hom_ctp = subsetted_rare10_ct - het_1x_ct;\n        *het_ctp += het_1x_ct;\n        return kPglRetSuccess;\n      }\n    }\n    *hom_ctp = subsetted_10_ct - subsetted_rare10_ct;\n    *het_ctp += het_1x_ct;\n    return kPglRetSuccess;\n  }\n  // allele_idx > 1, allele_ct > 3\n  const uintptr_t detect_all_mask_lo = detect_hom_mask_lo | (detect_hom_mask_lo << allele_code_width);\n  const uintptr_t detect_all_mask_hi = detect_all_mask_lo << (allele_code_width - 1);\n  detect_hom_mask_lo = detect_hom_mask_lo * 3;\n  const uintptr_t xor_word = allele_idx_m1 * detect_all_mask_lo;\n  uint32_t matching_het_or_hom_ct = 0;\n  uint32_t matching_hom_ct = 0;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    fvals_bits = fvals_bits ^ xor_word;\n    fvals_bits = detect_all_mask_hi & (~(fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)));\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct << code10_logwidth, kBitsPerWord));\n    }\n    if (!fvals_bits) {\n      continue;\n    }\n    fvals_bits = fvals_bits >> (allele_code_width - 1);\n    fvals_bits = (fvals_bits + (fvals_bits >> allele_code_width)) & detect_hom_mask_lo;\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n    do {\n      const uint32_t bit_idx = ctzw(fvals_bits);\n      const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n      if (IsSet(sample_include, sample_uidx)) {\n        ++matching_het_or_hom_ct;\n        matching_hom_ct += bit_idx & 1;\n      }\n      fvals_bits &= fvals_bits - 1;\n    } while (fvals_bits);\n  }\n  *hom_ctp = matching_hom_ct;\n  *het_ctp += matching_het_or_hom_ct - matching_hom_ct;\n  return kPglRetSuccess;\n}\n\nPglErr PgrGetInv1Counts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // May use workspace_vec and workspace_difflist_sample_ids.\n  if (!sample_ct) {\n    STD_ARRAY_REF_FILL0(4, genocounts);\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  PglErr reterr;\n  if ((!allele_idx) || (!allele_idx_offsets)) {\n  PgrGetInv1Counts_biallelic:\n    reterr = GetBasicGenotypeCounts(sample_include, sample_include_interleaved_vec, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, genocounts);\n    if (allele_idx) {\n      const uint32_t homref_ct = genocounts[0];\n      genocounts[0] = genocounts[2];\n      genocounts[2] = homref_ct;\n    }\n    return reterr;\n  }\n  const uint32_t allele_ct = allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx];\n  if (allele_ct == 2) {\n    goto PgrGetInv1Counts_biallelic;\n  }\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* tmp_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, tmp_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  ZeroTrailingNyps(raw_sample_ct, tmp_genovec);\n  const uint32_t aux1_first_byte = *fread_ptr++;\n  const uint32_t aux1a_mode = aux1_first_byte & 15;\n  const uint32_t aux1b_mode = aux1_first_byte >> 4;\n  // raw_01_ct not needed when aux1a uses difflist form and subsetting is\n  // occurring; same applies to raw_10_ct.\n  uint32_t raw_01_ct = 0;\n  uint32_t raw_10_ct = 0;\n  if ((!subsetting_required) || (!aux1a_mode) || (!aux1b_mode)) {\n    GenoarrCountFreqsUnsafe(tmp_genovec, raw_sample_ct, genocounts);\n    raw_01_ct = genocounts[1];\n    raw_10_ct = genocounts[2];\n  }\n  uint32_t subsetted_01_ct = 0;\n  uint32_t subsetted_10_ct = 0;\n  if (subsetting_required) {\n    // need accurate subsetted missing count for allele_idx > 1 case\n    GenoarrCountSubsetFreqs(tmp_genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);\n    subsetted_01_ct = genocounts[1];\n    subsetted_10_ct = genocounts[2];\n  } else {\n    sample_include = nullptr;\n  }\n  uint32_t het_ct;\n  reterr = CountAux1a(fread_end, sample_include, tmp_genovec, aux1a_mode, raw_sample_ct, allele_ct, allele_idx, raw_01_ct, subsetted_01_ct, &fread_ptr, &het_ct, pgrp->workspace_difflist_sample_ids);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  uint32_t hom_ct;\n  reterr = CountAux1b(fread_end, sample_include, tmp_genovec, aux1b_mode, raw_sample_ct, allele_ct, allele_idx, raw_10_ct, subsetted_10_ct, &fread_ptr, &het_ct, &hom_ct, pgrp->workspace_difflist_sample_ids);\n  genocounts[0] = hom_ct;\n  genocounts[1] = het_ct;\n  genocounts[2] = sample_ct - genocounts[3] - hom_ct - het_ct;\n  return reterr;\n}\n\n// sample_include assumed to be nullptr if no subsetting required\nPglErr GenoarrAux1aUpdate(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raw_genoarr, uint32_t aux1a_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t allele_idx, uintptr_t lshifted_bit, uint32_t raw_01_ct, const unsigned char** fread_pp, uintptr_t* __restrict target_genoarr, uint32_t* __restrict deltalist_workspace) {\n  if (aux1a_mode == 15) {\n    return kPglRetSuccess;\n  }\n  const uint32_t ignore_01_fvals = (allele_idx == 1) || (allele_ct == 3);\n  uintptr_t detect_mask_hi;\n  uintptr_t detect_mask_lo;\n  uint32_t allele_code_logwidth;\n  const uint32_t allele_code_width = GetAux1aConsts(allele_ct, &detect_mask_hi, &detect_mask_lo, &allele_code_logwidth);\n  const uintptr_t xor_word = (allele_idx - 2) * detect_mask_lo;\n  if (!aux1a_mode) {\n    const unsigned char* patch_01_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_01_ct, 8);\n    uint32_t rare01_ct = 0;\n    if (allele_ct > 3) {\n      rare01_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    }\n    *fread_pp += fset_byte_ct;\n    const unsigned char* patch_01_fvals = *fread_pp;\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_hets = Word01(raw_genoarr[0]);\n    uint32_t loop_len = kBitsPerWord;\n    const uintptr_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    const uint32_t lshift = lshifted_bit - 1;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t rare01_lowbits = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          return kPglRetSuccess;\n        }\n        fset_bits = SubwordLoad(&(patch_01_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_01_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_01_fset, fset_widx);\n      }\n      if (!sample_include) {\n        if (ignore_01_fvals) {\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            while (!cur_raw_genoarr_hets) {\n              cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n            }\n            if (fset_bits & 1) {\n              // ref/altx present for x>1.  Change genovec entry from 01 to 11\n              // (or 11 -> 01 in allele_idx == 2, allele_ct == 3 case; same xor\n              // operation works for that)\n              const uintptr_t lowbit = cur_raw_genoarr_hets & (-cur_raw_genoarr_hets);\n              target_genoarr[sample_hwidx] ^= lowbit << lshift;\n            }\n            cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n            fset_bits = fset_bits >> 1;\n          }\n        } else {\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            while (!cur_raw_genoarr_hets) {\n              cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n            }\n            if (fset_bits & 1) {\n              if (rare01_lowbits == kBitsPerWord) {\n                if (fvals_widx == fvals_word_ct_m1) {\n                  fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n                } else {\n                  CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n                }\n                fvals_bits = fvals_bits ^ xor_word;\n                fvals_bits = (detect_mask_hi & (~(fvals_bits | ((fvals_bits | detect_mask_hi) - detect_mask_lo)))) >> (allele_code_width - 1);\n                // unnecessary to apply bzhi here\n                ++fvals_widx;\n                rare01_lowbits = 0;\n              }\n              if (fvals_bits & (k1LU << rare01_lowbits)) {\n                const uintptr_t lowbit = cur_raw_genoarr_hets & (-cur_raw_genoarr_hets);\n                target_genoarr[sample_hwidx] ^= lowbit << lshift;\n              }\n              rare01_lowbits += allele_code_width;\n            }\n            cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n            fset_bits = fset_bits >> 1;\n          }\n        }\n      } else {\n        // format 0, sample_include non-null\n        if (ignore_01_fvals) {\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            while (!cur_raw_genoarr_hets) {\n              cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n            }\n            if (fset_bits & 1) {\n              // Considered replacing cur_raw_genoarr_hets with the result of\n              // two PackWordToHalfword() operations, since that keeps all\n              // the sample word-indexes aligned.  Couldn't justify it given\n              // the expected sparsity of this case, though.\n              const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n              if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                target_genoarr[sample_idx / kBitsPerWordD2] ^= lshifted_bit << (2 * (sample_idx % kBitsPerWordD2));\n              }\n            }\n            cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n            fset_bits = fset_bits >> 1;\n          }\n        } else {\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            while (!cur_raw_genoarr_hets) {\n              cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n            }\n            if (fset_bits & 1) {\n              if (rare01_lowbits == kBitsPerWord) {\n                if (fvals_widx == fvals_word_ct_m1) {\n                  fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n                } else {\n                  CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n                }\n                fvals_bits = fvals_bits ^ xor_word;\n                fvals_bits = (detect_mask_hi & (~(fvals_bits | ((fvals_bits | detect_mask_hi) - detect_mask_lo)))) >> (allele_code_width - 1);\n                // unnecessary to apply bzhi here\n                ++fvals_widx;\n                rare01_lowbits = 0;\n              }\n              if (fvals_bits & (k1LU << rare01_lowbits)) {\n                const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n                if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                  const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                  target_genoarr[sample_idx / kBitsPerWordD2] ^= lshifted_bit << (2 * (sample_idx % kBitsPerWordD2));\n                }\n              }\n              rare01_lowbits += allele_code_width;\n            }\n            cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n            fset_bits = fset_bits >> 1;\n          }\n        }\n      }\n    }\n  }\n  // aux1a_mode == 1\n  uint32_t rare01_ct;\n  // Might hardcode the ParseAndSaveDeltalist logic later, but lets get\n  // this working first.\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare01_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_01_fvals = *fread_pp;\n  const uintptr_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  if (ignore_01_fvals) {\n    if (!sample_include) {\n      for (uint32_t rare01_idx = 0; rare01_idx != rare01_ct; ++rare01_idx) {\n        const uint32_t sample_uidx = deltalist_workspace[rare01_idx];\n        // todo: benchmark against k1LU << (lshift + ...)\n        target_genoarr[sample_uidx / kBitsPerWordD2] ^= lshifted_bit << (2 * (sample_uidx % kBitsPerWordD2));\n      }\n      return kPglRetSuccess;\n    }\n    for (uint32_t rare01_idx = 0; rare01_idx != rare01_ct; ++rare01_idx) {\n      const uint32_t sample_uidx = deltalist_workspace[rare01_idx];\n      // could wrap this boilerplate\n      const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n      const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n      const uintptr_t sample_include_word = sample_include[sample_widx];\n      if (sample_include_word & lowbit) {\n        const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n        target_genoarr[sample_idx / kBitsPerWordD2] ^= lshifted_bit << (2 * (sample_idx % kBitsPerWordD2));\n      }\n    }\n    return kPglRetSuccess;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        return kPglRetSuccess;\n      }\n      fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n    }\n    fvals_bits = fvals_bits ^ xor_word;\n    fvals_bits = detect_mask_hi & (~(fvals_bits | ((fvals_bits | detect_mask_hi) - detect_mask_lo)));\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare01_ct << allele_code_logwidth, kBitsPerWord));\n    }\n    if (!fvals_bits) {\n      continue;\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - allele_code_logwidth)]);\n    if (!sample_include) {\n      do {\n        const uint32_t rare01_idx_lowbits = ctzw(fvals_bits) >> allele_code_logwidth;\n        const uint32_t sample_uidx = cur_deltalist_base[rare01_idx_lowbits];\n        target_genoarr[sample_uidx / kBitsPerWordD2] ^= lshifted_bit << (2 * (sample_uidx % kBitsPerWordD2));\n        fvals_bits &= fvals_bits - 1;\n      } while (fvals_bits);\n    } else {\n      do {\n        const uint32_t rare01_idx_lowbits = ctzw(fvals_bits) >> allele_code_logwidth;\n        const uint32_t sample_uidx = cur_deltalist_base[rare01_idx_lowbits];\n        const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n        const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n        const uintptr_t sample_include_word = sample_include[sample_widx];\n        if (sample_include_word & lowbit) {\n          const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n          target_genoarr[sample_idx / kBitsPerWordD2] ^= lshifted_bit << (2 * (sample_idx % kBitsPerWordD2));\n        }\n        fvals_bits &= fvals_bits - 1;\n      } while (fvals_bits);\n    }\n  }\n}\n\n// sample_include assumed to be nullptr if no subsetting required\nPglErr GenoarrAux1bStandardUpdate(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t allele_idx, uint32_t raw_10_ct, const unsigned char** fread_pp, uintptr_t* __restrict target_genoarr, uint32_t* __restrict deltalist_workspace) {\n  if (aux1b_mode == 15) {\n    return kPglRetSuccess;\n  }\n  const uint32_t allele_idx_m1 = allele_idx - 1;\n  uintptr_t detect_hom_mask_lo;\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t allele_code_width = 1U << allele_code_logwidth;\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const uint32_t code10_width = 1U << code10_logwidth;\n  uint32_t rare10_lowbits = kBitsPerWord;\n  if (!aux1b_mode) {\n    const unsigned char* patch_10_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, 8);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    *fread_pp += fset_byte_ct;\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_xys = Word10(raw_genoarr[0]);\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) * code10_width, CHAR_BIT);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t loop_len = kBitsPerWord;\n    if ((!allele_idx_m1) || (allele_ct == 3)) {\n      // bugfix (29 Dec 2019)\n      const uintptr_t detect_alt1_mask_hi = detect_hom_mask_lo << (allele_code_width - 1);\n      // If allele_ct == 3:\n      //   code10_width = 1\n      //   0 -> 1/2, 1 -> 2/2\n      //   if allele_idx == 1:\n      //     we want to convert 2 -> 1 for 1/2 genotypes, and 2 -> 0 for 2/2.\n      //   if allele_idx == 2:\n      //     we want to convert 0 -> 1 for 1/2 genotypes, and 0 -> 2 for 2/2.\n      // If allele_ct == 4 (allele_idx == 1 forced):\n      //   allele_code_width = 2\n      //   code10_width = 4\n      //   we want to convert 2 -> 1 for 1/x genotypes, and 2 -> 0 otherwise.\n      const uint32_t lowcode_add = 2 - allele_idx_m1;\n      for (uint32_t fset_widx = 0; ; ++fset_widx) {\n        uintptr_t fset_bits;\n        if (fset_widx >= fset_word_ct_m1) {\n          if (fset_widx > fset_word_ct_m1) {\n            break;\n          }\n          fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n          loop_len = ModNz(raw_10_ct, kBitsPerWord);\n        } else {\n          CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n        }\n        if (!sample_include) {\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            while (!cur_raw_genoarr_xys) {\n              cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n            }\n            if (fset_bits & 1) {\n              if (rare10_lowbits == kBitsPerWord) {\n                if (fvals_widx == fvals_word_ct_m1) {\n                  fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n                } else {\n                  CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n                }\n                // modify to het 1/x = 1, otherwise 0, except in allele_idx ==\n                // 2 special case.\n                if (!allele_idx_m1) {\n                  fvals_bits = (detect_alt1_mask_hi & (~(fvals_bits | ((fvals_bits | detect_alt1_mask_hi) - detect_hom_mask_lo)))) >> (allele_code_width - 1);\n                }\n                // unnecessary to apply bzhi here\n                ++fvals_widx;\n                rare10_lowbits = 0;\n              }\n              const uint32_t cur_lowcode0 = (fvals_bits >> rare10_lowbits) & 1;\n              rare10_lowbits += code10_width;\n              const uintptr_t lowbit = cur_raw_genoarr_xys & (-cur_raw_genoarr_xys);\n              target_genoarr[sample_hwidx] ^= lowbit * (lowcode_add + cur_lowcode0);\n            }\n            cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n            fset_bits = fset_bits >> 1;\n          }\n        } else {\n          // sample_include non-null\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            while (!cur_raw_genoarr_xys) {\n              cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n            }\n            if (fset_bits & 1) {\n              if (rare10_lowbits == kBitsPerWord) {\n                if (fvals_widx == fvals_word_ct_m1) {\n                  fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n                } else {\n                  CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n                }\n                // modify to het 1/x = 1, otherwise 0, except in allele_idx ==\n                // 2 special case\n                if (!allele_idx_m1) {\n                  fvals_bits = (detect_alt1_mask_hi & (~(fvals_bits | ((fvals_bits | detect_alt1_mask_hi) - detect_hom_mask_lo)))) >> (allele_code_width - 1);\n                }\n                // unnecessary to apply bzhi here\n                ++fvals_widx;\n                rare10_lowbits = 0;\n              }\n              const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n              if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                const uintptr_t cur_lowcode0 = (fvals_bits >> rare10_lowbits) & 1;\n                const uintptr_t shifted_xor_mult = (lowcode_add + cur_lowcode0) << (2 * (sample_idx % kBitsPerWordD2));\n                target_genoarr[sample_idx / kBitsPerWordD2] ^= shifted_xor_mult;\n              }\n              rare10_lowbits += code10_width;\n            }\n            cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n            fset_bits = fset_bits >> 1;\n          }\n        }\n      }\n      return kPglRetSuccess;\n    }\n    // allele_idx > 1, allele_ct > 3\n    const uintptr_t detect_all_mask_lo = detect_hom_mask_lo | (detect_hom_mask_lo << allele_code_width);\n    const uintptr_t detect_all_mask_hi = detect_all_mask_lo << (allele_code_width - 1);\n    const uintptr_t xor_word = allele_idx_m1 * detect_all_mask_lo;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_10_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n      }\n      if (!sample_include) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // modify to hom = 2, het = 1, neither = 0\n              fvals_bits = fvals_bits ^ xor_word;\n              fvals_bits = (detect_all_mask_hi & (~(fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)))) >> (allele_code_width - 1);\n              // unnecessary to apply bzhi or detect_hom_mask_lo here\n              fvals_bits = fvals_bits + (fvals_bits >> allele_code_width);\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uintptr_t cur_hit_ct = (fvals_bits >> rare10_lowbits) & 3;\n            rare10_lowbits += code10_width;\n            if (cur_hit_ct) {\n              const uintptr_t lowbit = cur_raw_genoarr_xys & (-cur_raw_genoarr_xys);\n              target_genoarr[sample_hwidx] ^= lowbit * cur_hit_ct;\n            }\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // modify to hom = 2, het = 1, neither = 0\n              fvals_bits = fvals_bits ^ xor_word;\n              fvals_bits = (detect_all_mask_hi & (~(fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)))) >> (allele_code_width - 1);\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct * code10_width, kBitsPerWord));\n              }\n              fvals_bits = fvals_bits + (fvals_bits >> allele_code_width);\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uintptr_t cur_hit_ct = (fvals_bits >> rare10_lowbits) & 3;\n            rare10_lowbits += code10_width;\n            if (cur_hit_ct) {\n              const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n              if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                target_genoarr[sample_idx / kBitsPerWordD2] ^= cur_hit_ct << (2 * (sample_idx % kBitsPerWordD2));\n              }\n            }\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n    return kPglRetSuccess;\n  }\n  // aux1b_mode == 1\n  uint32_t rare10_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare10_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, CHAR_BIT);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  if ((!allele_idx_m1) || (allele_ct == 3)) {\n    // bugfix (29 Dec 2019)\n    const uintptr_t detect_alt1_mask_hi = detect_hom_mask_lo << (allele_code_width - 1);\n    const uintptr_t lowcode_add = 2 - allele_idx_m1;\n    uint32_t loop_len = kBitsPerWord >> code10_logwidth;\n    for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n      uintptr_t fvals_bits;\n      if (fvals_widx >= fvals_word_ct_m1) {\n        if (fvals_widx > fvals_word_ct_m1) {\n          break;\n        }\n        fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n        loop_len = 1 + ((rare10_ct - 1) & ((kBitsPerWord >> code10_logwidth) - 1));\n      } else {\n        CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n      }\n      if (!allele_idx_m1) {\n        fvals_bits = (detect_alt1_mask_hi & (~(fvals_bits | ((fvals_bits | detect_alt1_mask_hi) - detect_hom_mask_lo)))) >> (allele_code_width - 1);\n      }\n      const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n      if (!sample_include) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          const uint32_t sample_uidx = cur_deltalist_base[uii];\n          const uintptr_t cur_lowcode0 = fvals_bits & 1;\n          const uintptr_t shifted_xor_mult = (lowcode_add + cur_lowcode0) << (2 * (sample_uidx % kBitsPerWordD2));\n          target_genoarr[sample_uidx / kBitsPerWordD2] ^= shifted_xor_mult;\n          fvals_bits = fvals_bits >> code10_width;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          const uint32_t sample_uidx = cur_deltalist_base[uii];\n          const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n          const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n          const uintptr_t sample_include_word = sample_include[sample_widx];\n          if (sample_include_word & lowbit) {\n            const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n            const uintptr_t cur_lowcode0 = fvals_bits & 1;\n            const uintptr_t shifted_xor_mult = (lowcode_add + cur_lowcode0) << (2 * (sample_idx % kBitsPerWordD2));\n            target_genoarr[sample_idx / kBitsPerWordD2] ^= shifted_xor_mult;\n          }\n          fvals_bits = fvals_bits >> code10_width;\n        }\n      }\n    }\n    return kPglRetSuccess;\n  }\n  // allele_idx > 1, allele_ct > 3\n  const uintptr_t detect_all_mask_lo = detect_hom_mask_lo | (detect_hom_mask_lo << allele_code_width);\n  const uintptr_t detect_all_mask_hi = detect_all_mask_lo << (allele_code_width - 1);\n  detect_hom_mask_lo = detect_hom_mask_lo * 3;\n  const uintptr_t xor_word = allele_idx_m1 * detect_all_mask_lo;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    fvals_bits = fvals_bits ^ xor_word;\n    fvals_bits = detect_all_mask_hi & (~(fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)));\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct << code10_logwidth, kBitsPerWord));\n    }\n    if (!fvals_bits) {\n      continue;\n    }\n    fvals_bits = fvals_bits >> (allele_code_width - 1);\n    fvals_bits = (fvals_bits + (fvals_bits >> allele_code_width)) & detect_hom_mask_lo;\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n    if (!sample_include) {\n      do {\n        const uint32_t bit_idx = ctzw(fvals_bits);\n        const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n        target_genoarr[sample_uidx / kBitsPerWordD2] ^= k1LU << ((bit_idx % 2) + 2 * (sample_uidx % kBitsPerWordD2));\n        fvals_bits &= fvals_bits - 1;\n      } while (fvals_bits);\n    } else {\n      do {\n        const uint32_t bit_idx = ctzw(fvals_bits);\n        const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n        const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n        const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n        const uintptr_t sample_include_word = sample_include[sample_widx];\n        if (sample_include_word & lowbit) {\n          const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n          target_genoarr[sample_idx / kBitsPerWordD2] ^= k1LU << ((bit_idx % 2) + 2 * (sample_idx % kBitsPerWordD2));\n        }\n        fvals_bits &= fvals_bits - 1;\n      } while (fvals_bits);\n    }\n  }\n  return kPglRetSuccess;\n}\n\n// if aux1b_het_present is true, aux1b_hets becomes a 1-bit-per-sample bitarray\n// with the positions of altx/alty hets in aux1b.\nPglErr GetAux1bHets(const unsigned char* fread_end, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_10_ct, const unsigned char** fread_pp, uintptr_t* __restrict aux1b_hets, uint32_t* __restrict aux1b_het_presentp, uint32_t* __restrict deltalist_workspace) {\n  if (aux1b_mode == 15) {\n    *aux1b_het_presentp = 0;\n    return kPglRetSuccess;\n  }\n  uintptr_t detect_hom_mask_lo;\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const uint32_t code10_width = 1U << code10_logwidth;\n  const uint32_t allele_code_width = 1U << allele_code_logwidth;\n  const uintptr_t detect_all_mask_lo = detect_hom_mask_lo | (detect_hom_mask_lo << allele_code_width);\n  const uintptr_t detect_all_mask_hi = detect_all_mask_lo << (allele_code_width - 1);\n  Halfword* aux1b_hets_alias = DowncastWToHW(aux1b_hets);\n  uint32_t rare10_lowbits = kBitsPerWord;\n  uint32_t aux1b_het_present = 0;\n  if (!aux1b_mode) {\n    const unsigned char* patch_10_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, 8);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    *fread_pp += fset_byte_ct;\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_xys = Word10(raw_genoarr[0]);\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) * code10_width, CHAR_BIT);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t loop_len = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_10_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n      }\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        while (!cur_raw_genoarr_xys) {\n          cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n        }\n        if (fset_bits & 1) {\n          if (rare10_lowbits == kBitsPerWord) {\n            if (fvals_widx == fvals_word_ct_m1) {\n              fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n            } else {\n              CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n            }\n            // allele_ct == 3: just invert raw fvals_bits\n            // allele_ct > 3: shift by allele_code_width, xor with self so that\n            // 0 == hom, detect nonzero by inverting the usual check\n            if (allele_ct == 3) {\n              fvals_bits = ~fvals_bits;\n            } else {\n              fvals_bits = fvals_bits ^ (fvals_bits << allele_code_width);\n              // conveniently, removing a ~ here is equivalent to inverting the\n              // relevant bits of the final result\n              fvals_bits = detect_hom_mask_lo & ((fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)) >> (code10_width - 1));\n            }\n            // bzhi only relevant for detecting if there are any hets at all\n            if (!aux1b_het_present) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct * code10_width, kBitsPerWord));\n              }\n              if (fvals_bits) {\n                // lazy-initialize\n                aux1b_het_present = 1;\n                ZeroHwArr(2 * BitCtToWordCt(raw_sample_ct), aux1b_hets_alias);\n              }\n            }\n            ++fvals_widx;\n            rare10_lowbits = 0;\n          }\n          if (fvals_bits & (k1LU << rare10_lowbits)) {\n            const uint32_t bit_idx = ctzw(cur_raw_genoarr_xys) / 2;\n            aux1b_hets_alias[sample_hwidx] |= 1U << bit_idx;\n          }\n          rare10_lowbits += code10_width;\n        }\n        cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n        fset_bits = fset_bits >> 1;\n      }\n    }\n    *aux1b_het_presentp = aux1b_het_present;\n    return kPglRetSuccess;\n  }\n  // aux1b_mode == 1\n  uint32_t rare10_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare10_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, CHAR_BIT);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    if (allele_ct == 3) {\n      fvals_bits = ~fvals_bits;\n    } else {\n      fvals_bits = fvals_bits ^ (fvals_bits << allele_code_width);\n      fvals_bits = detect_hom_mask_lo & ((fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo)) >> (code10_width - 1));\n    }\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct << code10_logwidth, kBitsPerWord));\n    }\n    if (!fvals_bits) {\n      continue;\n    }\n    if (!aux1b_het_present) {\n      aux1b_het_present = 1;\n      ZeroHwArr(2 * BitCtToWordCt(raw_sample_ct), aux1b_hets_alias);\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n    do {\n      const uint32_t bit_idx = ctzw(fvals_bits);\n      const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n      aux1b_hets_alias[sample_uidx / kBitsPerWordD2] |= 1U << (sample_uidx % kBitsPerWordD2);\n      fvals_bits &= fvals_bits - 1;\n    } while (fvals_bits);\n  }\n  *aux1b_het_presentp = aux1b_het_present;\n  return kPglRetSuccess;\n}\n\nstatic inline void SuppressHets00(const uintptr_t* allele_countvec, uintptr_t* subsetted_all_hets, uint32_t sample_ct, uintptr_t* subsetted_suppressed_hets) {\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  MaskWordsToHalfwordsInvmatch(allele_countvec, ~k0LU, sample_ctl2, subsetted_all_hets, subsetted_suppressed_hets);\n}\n\nPglErr Get1Multiallelic(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* __restrict all_hets, uintptr_t* __restrict allele_countvec, uintptr_t** subsetted_suppressed_hetp) {\n  // sample_ct > 0; either allele_idx > 1 or ((allele_idx == 1) &&\n  // multiallelic_hc_present)\n  // subsetted_suppressed_het assumed to be initialized to nullptr, if present\n  // at all\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, allele_countvec);\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n  if (fread_pp) {\n    *fread_endp = fread_end;\n    if (all_hets) {\n      PgrDetectGenoarrHets(raw_genovec, raw_sample_ct, all_hets);\n    }\n  }\n  if (allele_idx != 1) {\n    GenovecNonmissingToZeroUnsafe(sample_ct, allele_countvec);\n    if (!multiallelic_hc_present) {\n      if (fread_pp) {\n        *fread_pp = fread_ptr;\n      }\n      return kPglRetSuccess;\n    }\n  }\n  const uint32_t aux1_first_byte = *fread_ptr++;\n  const uint32_t aux1a_mode = aux1_first_byte & 15;\n  const uint32_t aux1b_mode = aux1_first_byte >> 4;\n  // only need to initialize these in dense modes\n  uint32_t raw_01_ct = 0;\n  uint32_t raw_10_ct = 0;\n  if ((!aux1a_mode) || (!aux1b_mode)) {\n    GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n  }\n\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx];\n  if (!subsetting_required) {\n    sample_include = nullptr;\n  }\n  // allele_idx == 1 case:\n  //   allele_countvec currently contains ALT counts; we want to reduce them to\n  //   ALT1 counts.  This can be done with the following steps:\n  //   1. For every element of patch_01_fset, reduce the value from 1 to 0.  We\n  //      don't actually need to look at patch_01_fvals.\n  //   2. For every element of patch_10_fset, reduce the value from 2 depending\n  //      on the low bit(s) of the patch_01_fvals entry (reduce to 0 unless low\n  //      bit(s) are all zero).\n  // allele_idx > 1 case:\n  //   1. For every element of patch_01_fset, set a 1 for each matching value\n  //      of patch_01_fvals.\n  //   2. For every element of patch_10_fset, set a 1 for each het-matching\n  //      value of patch_10_fvals, and a 2 for each hom-match.\n  uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n  // Two cases:\n  // - If allele_idx == 1, convert all aux1a entries from 01 to 00.\n  // - Otherwise, for each matching aux1a entry, convert from 00 to 01.\n  reterr = GenoarrAux1aUpdate(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1a_mode, raw_sample_ct, allele_ct, allele_idx, 1, raw_01_ct, &fread_ptr, allele_countvec, deltalist_workspace);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* aux1b_start = fread_ptr;\n  reterr = GenoarrAux1bStandardUpdate(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, allele_idx, raw_10_ct, &fread_ptr, allele_countvec, deltalist_workspace);\n  if ((!fread_pp) || reterr) {\n    return reterr;\n  }\n  *fread_pp = fread_ptr;\n  if (all_hets) {\n    // can merge this with GenovecAux1bStandardUpdate if this is ever a\n    // significant bottleneck\n    uintptr_t* aux1b_hets = pgrp->workspace_aux1x_present;\n    uint32_t aux1b_het_present;\n    reterr = GetAux1bHets(fread_end, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &aux1b_start, aux1b_hets, &aux1b_het_present, deltalist_workspace);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (aux1b_het_present) {\n      BitvecOr(aux1b_hets, BitCtToWordCt(raw_sample_ct), all_hets);\n    }\n    // We now want to make subsetted_suppressed_het flag all hets where neither\n    // allele is equal to allele_idx, i.e. the allele_countvec value is 0 yet\n    // the all_hets bit is set.\n    // This was done incorrectly before 17 Aug 2023.\n    if ((allele_idx > 1) || aux1b_het_present) {\n      // We can now clobber the contents of pgrp->workspace_vec (raw_genovec)\n      // and pgrp->workspace_aux1x_present (aux1b_hets).\n      // We use the former as the subsetted_suppressed_het return buffer.\n      uintptr_t* all_hets_subsetted = all_hets;\n      if (subsetting_required) {\n        all_hets_subsetted = aux1b_hets;\n        CopyBitarrSubset(all_hets, sample_include, sample_ct, all_hets_subsetted);\n      }\n      *subsetted_suppressed_hetp = raw_genovec;\n      SuppressHets00(allele_countvec, all_hets_subsetted, sample_ct, raw_genovec);\n    }\n  }\n  return kPglRetSuccess;\n}\n\nPglErr IMPLPgrGet1(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_countvec) {\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if ((!allele_idx) || ((allele_idx == 1) && (!multiallelic_hc_present))) {\n    PglErr reterr = ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, allele_countvec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (!allele_idx) {\n      GenovecInvertUnsafe(sample_ct, allele_countvec);\n    }\n    return kPglRetSuccess;\n  }\n  return Get1Multiallelic(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, nullptr, nullptr, nullptr, allele_countvec, nullptr);\n}\n\nPglErr IMPLPgrGetInv1(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_invcountvec) {\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if ((!allele_idx) || ((allele_idx == 1) && (!multiallelic_hc_present))) {\n    PglErr reterr = ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, allele_invcountvec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (allele_idx) {\n      GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n    }\n    return kPglRetSuccess;\n  }\n  PglErr reterr = Get1Multiallelic(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, nullptr, nullptr, nullptr, allele_invcountvec, nullptr);\n  GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n  return reterr;\n}\n\nPglErr IMPLPgrGetInv1DifflistOrGenovec(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t max_simple_difflist_len, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_invcountvec, uint32_t* __restrict difflist_common_geno_ptr, uintptr_t* __restrict main_raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr) {\n  if (!sample_ct) {\n    *difflist_common_geno_ptr = UINT32_MAX;\n    return kPglRetSuccess;\n  }\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if ((!allele_idx) || ((allele_idx == 1) && (!multiallelic_hc_present))) {\n    PglErr reterr = ReadDifflistOrGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, max_simple_difflist_len, vidx, pgrp, nullptr, nullptr, allele_invcountvec, difflist_common_geno_ptr, main_raregeno, difflist_sample_ids, difflist_len_ptr);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (allele_idx) {\n      if (*difflist_common_geno_ptr == UINT32_MAX) {\n        GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n      } else {\n        const uint32_t orig_common_geno = *difflist_common_geno_ptr;\n        GenovecInvertUnsafe(*difflist_len_ptr, main_raregeno);\n        if (orig_common_geno != 3) {\n          *difflist_common_geno_ptr = 2 - orig_common_geno;\n        }\n      }\n    }\n    return kPglRetSuccess;\n  }\n  *difflist_common_geno_ptr = UINT32_MAX;\n  PglErr reterr = Get1Multiallelic(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, nullptr, nullptr, nullptr, allele_invcountvec, nullptr);\n  GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n  return reterr;\n}\n\n// Assumes allele_idx0 < allele_idx1, and allele_idx0 < 2.  Rotates hardcalls\n// such that, if no multiallelic hardcalls are present, 0 = 0/0, 1 = 0/1,\n// 2 = 1/1, and 3 = anything else.\nvoid Rotate2(uint32_t allele_idx0, uint32_t allele_idx1, uint32_t sample_ct, uintptr_t* genovec) {\n  if (!allele_idx0) {\n    if (allele_idx1 > 1) {\n      GenovecNonzeroToMissingUnsafe(sample_ct, genovec);\n    }\n  } else {\n    GenovecInvertThenNonzeroToMissingUnsafe(sample_ct, genovec);\n  }\n}\n\nPglErr SkipAux1a(const unsigned char* fread_end, uint32_t aux1a_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_01_ct, const unsigned char** fread_pp) {\n  if (aux1a_mode == 15) {\n    return kPglRetSuccess;\n  }\n  uint32_t rare01_ct;\n  if (!aux1a_mode) {\n    const uint32_t fset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n    rare01_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    *fread_pp += fset_byte_ct;\n  } else {\n    const unsigned char* group_info_iter;\n    PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare01_ct);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = SkipDeltalistIds(fread_end, group_info_iter, rare01_ct, raw_sample_ct, 0, fread_pp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  const uint32_t fvals_byte_ct = GetAux1aAlleleEntryByteCt(allele_ct, rare01_ct);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  return kPglRetSuccess;\n}\n\n// sample_include assumed to be nullptr if no subsetting required\nPglErr GenoarrAux1bUpdate2(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t allele_idx0, uint32_t allele_idx1, uint32_t raw_10_ct, const unsigned char** fread_pp, uintptr_t* __restrict target_genoarr, uint32_t* __restrict deltalist_workspace) {\n  // Possible aux1b updates:\n  // - allele_idx0 == 0:\n  //     allele_idx1 == 1: all altx/alty including a rarealt from 10 to 11\n  //     allele_idx1 > 1: set one rarealtx/rarealtx from 11 to 10\n  //\n  // - allele_idx0 == 1: change all alt1/rarealtx from 00 to 01,\n  //   rarealtx/rarealtx from 00 to 10, and all other aux1b entries to missing.\n  //   This can use the same driver as Get1Multiallelic.\n  //\n  // - allele_idx0 > 1: change all rarealtx/rarealtx from missing to 00,\n  //   rarealtx/rarealty to 01, and rarealty/rarealty to 10.\n  if (aux1b_mode == 15) {\n    return kPglRetSuccess;\n  }\n  if (allele_idx0 == 1) {\n    return GenoarrAux1bStandardUpdate(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genoarr, aux1b_mode, raw_sample_ct, allele_ct, allele_idx1, raw_10_ct, fread_pp, target_genoarr, deltalist_workspace);\n  }\n  uintptr_t detect_hom_mask_lo;\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const uint32_t code10_width = 1U << code10_logwidth;\n  const uintptr_t detect_hom_mask_hi = detect_hom_mask_lo << (code10_width - 1);\n  uintptr_t xor_word2 = allele_idx1 - 1;\n  // fortunately, this sequence of operations happens to work for allele_ct ==\n  // 3\n  xor_word2 = xor_word2 | (xor_word2 << (code10_width / 2));\n  xor_word2 = xor_word2 * detect_hom_mask_lo;\n  uint32_t rare10_lowbits = kBitsPerWord;\n  if (!aux1b_mode) {\n    const unsigned char* patch_10_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, 8);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    *fread_pp += fset_byte_ct;\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_xys = Word10(raw_genoarr[0]);\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) * code10_width, CHAR_BIT);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t loop_len = kBitsPerWord;\n    if (!allele_idx0) {\n      for (uint32_t fset_widx = 0; ; ++fset_widx) {\n        uintptr_t fset_bits;\n        if (fset_widx >= fset_word_ct_m1) {\n          if (fset_widx > fset_word_ct_m1) {\n            return kPglRetSuccess;\n          }\n          fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n          loop_len = ModNz(raw_10_ct, kBitsPerWord);\n        } else {\n          CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n        }\n        if (!sample_include) {\n          if (allele_idx1 == 1) {\n            // All aux1b 10 -> 11.  Ignore aux1b_fvals.\n            for (uint32_t uii = 0; uii != loop_len; ++uii) {\n              while (!cur_raw_genoarr_xys) {\n                cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n              }\n              if (fset_bits & 1) {\n                const uintptr_t lowbit = cur_raw_genoarr_xys & (-cur_raw_genoarr_xys);\n                target_genoarr[sample_hwidx] ^= lowbit;\n              }\n              cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n              fset_bits = fset_bits >> 1;\n            }\n          } else {\n            // hom-altx 11 -> 10.\n            for (uint32_t uii = 0; uii != loop_len; ++uii) {\n              while (!cur_raw_genoarr_xys) {\n                cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n              }\n              if (fset_bits & 1) {\n                if (rare10_lowbits == kBitsPerWord) {\n                  if (fvals_widx == fvals_word_ct_m1) {\n                    fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n                  } else {\n                    CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n                  }\n                  fvals_bits = fvals_bits ^ xor_word2;\n                  fvals_bits = (detect_hom_mask_hi & (~(fvals_bits | ((fvals_bits | detect_hom_mask_hi) - detect_hom_mask_lo)))) >> (code10_width - 1);\n                  // unnecessary to apply bzhi here\n                  ++fvals_widx;\n                  rare10_lowbits = 0;\n                }\n                if (fvals_bits & (k1LU << rare10_lowbits)) {\n                  const uintptr_t lowbit = cur_raw_genoarr_xys & (-cur_raw_genoarr_xys);\n                  target_genoarr[sample_hwidx] ^= lowbit;\n                }\n                rare10_lowbits += code10_width;\n              }\n              cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n              fset_bits = fset_bits >> 1;\n            }\n          }\n        } else {\n          // sample_include non-null\n          if (allele_idx1 == 1) {\n            for (uint32_t uii = 0; uii != loop_len; ++uii) {\n              while (!cur_raw_genoarr_xys) {\n                cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n              }\n              if (fset_bits & 1) {\n                const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n                if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                  const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                  target_genoarr[sample_idx / kBitsPerWordD2] ^= k1LU << (2 * (sample_idx % kBitsPerWordD2));\n                }\n                rare10_lowbits += code10_width;\n              }\n              cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n              fset_bits = fset_bits >> 1;\n            }\n          } else {\n            for (uint32_t uii = 0; uii != loop_len; ++uii) {\n              while (!cur_raw_genoarr_xys) {\n                cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n              }\n              if (fset_bits & 1) {\n                if (rare10_lowbits == kBitsPerWord) {\n                  if (fvals_widx == fvals_word_ct_m1) {\n                    fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n                  } else {\n                    CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n                  }\n                  fvals_bits = fvals_bits ^ xor_word2;\n                  fvals_bits = (detect_hom_mask_hi & (~(fvals_bits | ((fvals_bits | detect_hom_mask_hi) - detect_hom_mask_lo)))) >> (code10_width - 1);\n                  // unnecessary to apply bzhi here\n                  ++fvals_widx;\n                  rare10_lowbits = 0;\n                }\n                if (fvals_bits & (k1LU << rare10_lowbits)) {\n                  const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n                  if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                    const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                    target_genoarr[sample_idx / kBitsPerWordD2] ^= k1LU << (2 * (sample_idx % kBitsPerWordD2));\n                  }\n                }\n                rare10_lowbits += code10_width;\n              }\n              cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n              fset_bits = fset_bits >> 1;\n            }\n          }\n        }\n      }\n    }\n    // 2 <= allele_idx0 < allele_idx1 (so allele_ct > 3 guaranteed)\n    uintptr_t xor_word1 = allele_idx1 - 1;\n    uintptr_t xor_word0 = allele_idx0 - 1;\n    xor_word1 = xor_word0 | (xor_word1 << (code10_width / 2));\n    xor_word0 = xor_word0 | (xor_word0 << (code10_width / 2));\n    xor_word1 *= detect_hom_mask_lo;\n    xor_word0 *= detect_hom_mask_lo;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          return kPglRetSuccess;\n        }\n        fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_10_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n      }\n      if (!sample_include) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              uintptr_t match0 = fvals_bits ^ xor_word0;\n              uintptr_t match1 = fvals_bits ^ xor_word1;\n              uintptr_t match2 = fvals_bits ^ xor_word2;\n              match0 = detect_hom_mask_hi & (~(match0 | ((match0 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n              match1 = detect_hom_mask_hi & (~(match1 | ((match1 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n              match2 = detect_hom_mask_hi & (~(match2 | ((match2 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n              // Now want match0 -> 11, match1 -> 10, and match2 -> 01.\n              fvals_bits = ((match0 | match1) >> (code10_width - 2)) | ((match0 | match2) >> (code10_width - 1));\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uintptr_t xor_val = (fvals_bits >> rare10_lowbits) & 3;\n            if (xor_val) {\n              const uintptr_t lowbit = cur_raw_genoarr_xys & (-cur_raw_genoarr_xys);\n              target_genoarr[sample_hwidx] ^= lowbit * xor_val;\n            }\n            rare10_lowbits += code10_width;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        // sample_include non-null\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              uintptr_t match0 = fvals_bits ^ xor_word0;\n              uintptr_t match1 = fvals_bits ^ xor_word1;\n              uintptr_t match2 = fvals_bits ^ xor_word2;\n              match0 = detect_hom_mask_hi & (~(match0 | ((match0 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n              match1 = detect_hom_mask_hi & (~(match1 | ((match1 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n              match2 = detect_hom_mask_hi & (~(match2 | ((match2 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n              fvals_bits = ((match0 | match1) >> (code10_width - 2)) | ((match0 | match2) >> (code10_width - 1));\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uintptr_t xor_val = (fvals_bits >> rare10_lowbits) & 3;\n            if (xor_val) {\n              const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n              if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n                const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n                target_genoarr[sample_idx / kBitsPerWordD2] ^= xor_val << (2 * (sample_idx % kBitsPerWordD2));\n              }\n            }\n            rare10_lowbits += code10_width;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n  }\n  // aux1b_mode == 1\n  uint32_t rare10_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare10_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, CHAR_BIT);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  if (allele_idx1 == 1) {\n    if (!sample_include) {\n      for (uint32_t rare10_idx = 0; rare10_idx != rare10_ct; ++rare10_idx) {\n        const uint32_t sample_uidx = deltalist_workspace[rare10_idx];\n        target_genoarr[sample_uidx / kBitsPerWordD2] ^= k1LU << (2 * (sample_uidx % kBitsPerWordD2));\n      }\n      return kPglRetSuccess;\n    }\n    for (uint32_t rare10_idx = 0; rare10_idx != rare10_ct; ++rare10_idx) {\n      const uint32_t sample_uidx = deltalist_workspace[rare10_idx];\n      const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n      const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n      const uintptr_t sample_include_word = sample_include[sample_widx];\n      if (sample_include_word & lowbit) {\n        const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n        target_genoarr[sample_idx / kBitsPerWordD2] ^= k1LU << (2 * (sample_idx % kBitsPerWordD2));\n      }\n    }\n    return kPglRetSuccess;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  if (!allele_idx0) {\n    for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n      uintptr_t fvals_bits;\n      if (fvals_widx >= fvals_word_ct_m1) {\n        if (fvals_widx > fvals_word_ct_m1) {\n          return kPglRetSuccess;\n        }\n        fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n      } else {\n        CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n      }\n      fvals_bits = fvals_bits ^ xor_word2;\n      fvals_bits = detect_hom_mask_hi & (~(fvals_bits | ((fvals_bits | detect_hom_mask_hi) - detect_hom_mask_lo)));\n      if (fvals_widx == fvals_word_ct_m1) {\n        fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct << code10_logwidth, kBitsPerWord));\n      }\n      if (!fvals_bits) {\n        continue;\n      }\n      const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n      if (!sample_include) {\n        do {\n          const uint32_t bit_idx = ctzw(fvals_bits);\n          const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n          target_genoarr[sample_uidx / kBitsPerWordD2] ^= k1LU << (2 * (sample_uidx % kBitsPerWordD2));\n          fvals_bits &= fvals_bits - 1;\n        } while (fvals_bits);\n      } else {\n        do {\n          const uint32_t bit_idx = ctzw(fvals_bits);\n          const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n          const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n          const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n          const uintptr_t sample_include_word = sample_include[sample_widx];\n          if (sample_include_word & lowbit) {\n            const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n            target_genoarr[sample_idx / kBitsPerWordD2] ^= k1LU << (2 * (sample_idx % kBitsPerWordD2));\n          }\n          fvals_bits &= fvals_bits - 1;\n        } while (fvals_bits);\n      }\n    }\n  }\n  // 2 <= allele_idx0 < allele_idx1\n  uintptr_t xor_word1 = allele_idx1 - 1;\n  uintptr_t xor_word0 = allele_idx0 - 1;\n  xor_word1 = xor_word0 | (xor_word1 << (code10_width / 2));\n  xor_word0 = xor_word0 | (xor_word0 << (code10_width / 2));\n  xor_word1 *= detect_hom_mask_lo;\n  xor_word0 *= detect_hom_mask_lo;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        return kPglRetSuccess;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    uintptr_t match0 = fvals_bits ^ xor_word0;\n    uintptr_t match1 = fvals_bits ^ xor_word1;\n    uintptr_t match2 = fvals_bits ^ xor_word2;\n    match0 = detect_hom_mask_hi & (~(match0 | ((match0 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n    match1 = detect_hom_mask_hi & (~(match1 | ((match1 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n    match2 = detect_hom_mask_hi & (~(match2 | ((match2 | detect_hom_mask_hi) - detect_hom_mask_lo)));\n    // since code10_width >= 4, we can use match0 == 3 (mod 4), match1 == 2\n    // (mod 4), match2 == 1 (mod 4) representation.\n    fvals_bits = (match0 >> (code10_width - 4)) | (match1 >> (code10_width - 3)) | (match2 >> (code10_width - 2));\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct << code10_logwidth, kBitsPerWord));\n    }\n    if (!fvals_bits) {\n      continue;\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n    if (!sample_include) {\n      do {\n        const uintptr_t bit_idx = ctzw(fvals_bits);\n        const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n        target_genoarr[sample_uidx / kBitsPerWordD2] ^= (bit_idx & 3) << (2 * (sample_uidx % kBitsPerWordD2));\n        fvals_bits &= fvals_bits - 1;\n      } while (fvals_bits);\n    } else {\n      do {\n        const uintptr_t bit_idx = ctzw(fvals_bits);\n        const uint32_t sample_uidx = cur_deltalist_base[bit_idx >> code10_logwidth];\n        const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n        const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n        const uintptr_t sample_include_word = sample_include[sample_widx];\n        if (sample_include_word & lowbit) {\n          const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n          target_genoarr[sample_idx / kBitsPerWordD2] ^= (bit_idx & 3) << (2 * (sample_idx % kBitsPerWordD2));\n        }\n        fvals_bits &= fvals_bits - 1;\n      } while (fvals_bits);\n    }\n  }\n}\n\nPglErr IMPLPgrGet2(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx0, uint32_t allele_idx1, PgenReaderMain* pgrp, uintptr_t* __restrict genovec) {\n  assert(allele_idx0 != allele_idx1);\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if (!multiallelic_hc_present) {\n    if ((allele_idx0 > 1) && (allele_idx1 > 1)) {\n      // Trivial all-missing case.\n      SetAllBits(2 * sample_ct, genovec);\n      return kPglRetSuccess;\n    }\n    PglErr reterr = ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, genovec);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (allele_idx0 < allele_idx1) {\n      Rotate2(allele_idx0, allele_idx1, sample_ct, genovec);\n      return kPglRetSuccess;\n    }\n    if (allele_idx0 == 1) {\n      GenovecInvertUnsafe(sample_ct, genovec);\n      return kPglRetSuccess;\n    }\n    if (!allele_idx1) {\n      GenovecNonzeroToMissingThenInvertUnsafe(sample_ct, genovec);\n      return kPglRetSuccess;\n    }\n    GenovecNontwoToMissingUnsafe(sample_ct, genovec);\n    return kPglRetSuccess;\n  }\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n\n  uint32_t invert = 0;\n  if (allele_idx0 > allele_idx1) {\n    const uint32_t swap = allele_idx0;\n    allele_idx0 = allele_idx1;\n    allele_idx1 = swap;\n    invert = 1;\n  }\n  if (allele_idx0 > 1) {\n    SetAllBits(2 * sample_ct, genovec);\n  } else {\n    CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, genovec);\n    Rotate2(allele_idx0, allele_idx1, sample_ct, genovec);\n  }\n  const uint32_t aux1_first_byte = *fread_ptr++;\n  const uint32_t aux1a_mode = aux1_first_byte & 15;\n  const uint32_t aux1b_mode = aux1_first_byte >> 4;\n  uint32_t raw_01_ct = 0;\n  uint32_t raw_10_ct = 0;\n  if ((!aux1a_mode) || (!aux1b_mode)) {\n    GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n  }\n  if (!subsetting_required) {\n    sample_include = nullptr;\n  }\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx];\n  uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n  if (!allele_idx0) {\n    // Two cases:\n    // - If allele_idx == 1, convert all aux1a entries from 01 to 11.\n    // - Otherwise, for each matching aux1a entry, convert from 11 to 01.\n    reterr = GenoarrAux1aUpdate(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1a_mode, raw_sample_ct, allele_ct, allele_idx1, 2, raw_01_ct, &fread_ptr, genovec, deltalist_workspace);\n  } else {\n    reterr = SkipAux1a(fread_end, aux1a_mode, raw_sample_ct, allele_ct, raw_01_ct, &fread_ptr);\n  }\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  reterr = GenoarrAux1bUpdate2(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, allele_idx0, allele_idx1, raw_10_ct, &fread_ptr, genovec, deltalist_workspace);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  if (invert) {\n    GenovecInvertUnsafe(sample_ct, genovec);\n  }\n  return kPglRetSuccess;\n}\n\nvoid PreinitPgv(PgenVariant* pgvp) {\n  pgvp->genovec = nullptr;\n  pgvp->patch_01_set = nullptr;\n  pgvp->patch_01_vals = nullptr;\n  pgvp->patch_10_set = nullptr;\n  pgvp->patch_10_vals = nullptr;\n  pgvp->phasepresent = nullptr;\n  pgvp->phaseinfo = nullptr;\n  pgvp->dosage_present = nullptr;\n  pgvp->dosage_main = nullptr;\n  pgvp->multidosage_present = nullptr;\n  pgvp->multidosage_cts = nullptr;\n  pgvp->multidosage_codes = nullptr;\n  pgvp->multidosage_vals = nullptr;\n  pgvp->dphase_present = nullptr;\n  pgvp->dphase_delta = nullptr;\n  pgvp->multidphase_present = nullptr;\n  pgvp->multidphase_cts = nullptr;\n  pgvp->multidphase_codes = nullptr;\n  pgvp->multidphase_delta = nullptr;\n\n  pgvp->patch_01_ct = 0;\n  pgvp->patch_10_ct = 0;\n  pgvp->phasepresent_ct = 0;\n  pgvp->dosage_ct = 0;\n  pgvp->multidosage_sample_ct = 0;\n  pgvp->dphase_ct = 0;\n  pgvp->multidphase_sample_ct = 0;\n}\n\n// similar to ParseAndSaveDifflist()\nPglErr ParseAndSaveDeltalistAsBitarr(const unsigned char* fread_end, uint32_t raw_sample_ct, const unsigned char** fread_pp, uintptr_t* deltalist_include, uint32_t* deltalist_len_ptr) {\n  const unsigned char* group_info_iter;\n  PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, deltalist_len_ptr);\n  const uint32_t deltalist_len = *deltalist_len_ptr;\n  if (reterr || (!deltalist_len)) {\n    return reterr;\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(raw_sample_ct);\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  const uint32_t group_idx_last = (deltalist_len - 1) / kPglDifflistGroupSize;\n  ZeroWArr(raw_sample_ctl, deltalist_include);\n  uint32_t group_len_m1 = kPglDifflistGroupSize - 1;\n  for (uint32_t group_idx = 0; ; ++group_idx) {\n    if (group_idx >= group_idx_last) {\n      if (group_idx > group_idx_last) {\n        return kPglRetSuccess;\n      }\n      group_len_m1 &= deltalist_len - 1;\n    }\n    uintptr_t raw_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n    group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    for (uint32_t raw_deltalist_idx_lowbits = 0; ; ++raw_deltalist_idx_lowbits) {\n      // always check, otherwise we may scribble over arbitrary memory\n      if (unlikely(raw_sample_idx >= raw_sample_ct)) {\n        return kPglRetMalformedInput;\n      }\n      SetBit(raw_sample_idx, deltalist_include);\n      if (raw_deltalist_idx_lowbits == group_len_m1) {\n        break;\n      }\n      raw_sample_idx += GetVint31(fread_end, fread_pp);\n    }\n  }\n}\n\nstatic_assert(sizeof(AlleleCode) == 1, \"GetAux1aCodes() must be updated.\");\n// read-buffer patch_01_vals, patch_10_vals, etc. assumed to be word-aligned.\nPglErr GetAux1aCodes(const unsigned char* fread_end, uint32_t rare01_ct, uint32_t allele_ct, const unsigned char** fread_pp, AlleleCode* __restrict patch_01_vals) {\n  if (allele_ct == 3) {\n    memset(patch_01_vals, 2, rare01_ct);\n    return kPglRetSuccess;\n  }\n  const unsigned char* patch_01_fvals = *fread_pp;\n  if (allele_ct == 4) {\n    const uint32_t patch_01_fvals_byte_ct = DivUp(rare01_ct, CHAR_BIT);\n    if (PtrAddCk(fread_end, patch_01_fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    Expand1bitTo8(patch_01_fvals, rare01_ct, 2, patch_01_vals);\n    return kPglRetSuccess;\n  }\n  if (allele_ct < 7) {\n    const uint32_t patch_01_fvals_byte_ct = DivUp(rare01_ct, 4);\n    if (PtrAddCk(fread_end, patch_01_fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    Expand2bitTo8(patch_01_fvals, rare01_ct, 2, patch_01_vals);\n    return kPglRetSuccess;\n  }\n  if (allele_ct < 19) {\n    const uint32_t patch_01_fvals_byte_ct = DivUp(rare01_ct, 2);\n    if (PtrAddCk(fread_end, patch_01_fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    Expand4bitTo8(patch_01_fvals, rare01_ct, 2, patch_01_vals);\n    return kPglRetSuccess;\n  }\n  if (PtrAddCk(fread_end, rare01_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  // todo: verify the compiler recognizes this\n  for (uint32_t uii = 0; uii < rare01_ct; ++uii) {\n    patch_01_vals[uii] = patch_01_fvals[uii] + 2;\n  }\n  return kPglRetSuccess;\n}\n\n// Assumes aux1a_mode != 15.\nPglErr ExportAux1a(const unsigned char* fread_end, const uintptr_t* __restrict raw_genoarr, uint32_t aux1a_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_01_ct, const unsigned char** fread_pp, uintptr_t* __restrict patch_01_set, AlleleCode* __restrict patch_01_vals, uint32_t* __restrict rare01_ctp) {\n  uint32_t rare01_ct;\n  if (!aux1a_mode) {\n    const unsigned char* patch_01_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n    if (PtrAddCk(fread_end, fset_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    rare01_ct = PopcountBytes(patch_01_fset, fset_byte_ct);\n    ExpandBytearrFromGenoarr(patch_01_fset, raw_genoarr, kMask5555, NypCtToWordCt(raw_sample_ct), raw_01_ct, 0, patch_01_set);\n  } else {\n    if (unlikely(ParseAndSaveDeltalistAsBitarr(fread_end, raw_sample_ct, fread_pp, patch_01_set, &rare01_ct))) {\n      return kPglRetMalformedInput;\n    }\n  }\n  *rare01_ctp = rare01_ct;\n  return GetAux1aCodes(fread_end, rare01_ct, allele_ct, fread_pp, patch_01_vals);\n}\n\nPglErr ExportAux1aProperSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raw_genoarr, uint32_t aux1a_mode, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t allele_ct, uint32_t raw_01_ct, const unsigned char** fread_pp, uintptr_t* __restrict dst_01_set, AlleleCode* __restrict dst_01_vals, uint32_t* __restrict rare01_ctp, uint32_t* __restrict deltalist_workspace) {\n  const uint32_t allele_code_width = GetAux1aWidth(allele_ct);\n  const uintptr_t allele_code_mask = (1U << allele_code_width) - 1;\n  memset(dst_01_set, 0, BitCtToWordCt(sample_ct) * sizeof(intptr_t));\n  AlleleCode* dst_01_vals_iter = dst_01_vals;\n  if (!aux1a_mode) {\n    // similar to GenoarrAux1aUpdate()\n    const unsigned char* patch_01_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n    const uint32_t rare01_ct = PopcountBytes(patch_01_fset, fset_byte_ct);\n    if (PtrAddCk(fread_end, fset_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const unsigned char* patch_01_fvals = *fread_pp;\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_hets = Word01(raw_genoarr[0]);\n    uint32_t loop_len = kBitsPerWord;\n    const uintptr_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t rare01_lowbits = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_01_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_01_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_01_fset, fset_widx);\n      }\n      if (allele_ct == 3) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_hets) {\n            cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n            if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n              const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n              SetBit(sample_idx, dst_01_set);\n              *dst_01_vals_iter++ = 2;\n            }\n          }\n          cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_hets) {\n            cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare01_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n              }\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare01_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n            if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n              const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n              SetBit(sample_idx, dst_01_set);\n              *dst_01_vals_iter++ = 2 + ((fvals_bits >> rare01_lowbits) & allele_code_mask);\n            }\n            rare01_lowbits += allele_code_width;\n          }\n          cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n    *rare01_ctp = dst_01_vals_iter - dst_01_vals;\n    return kPglRetSuccess;\n  }\n  // aux1a_mode == 1\n  uint32_t rare01_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare01_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_01_fvals = *fread_pp;\n  const uintptr_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  if (allele_ct == 3) {\n    for (uint32_t rare01_idx = 0; rare01_idx != rare01_ct; ++rare01_idx) {\n      const uint32_t sample_uidx = deltalist_workspace[rare01_idx];\n      // could wrap this boilerplate\n      const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n      const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n      const uintptr_t sample_include_word = sample_include[sample_widx];\n      if (sample_include_word & lowbit) {\n        const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n        SetBit(sample_idx, dst_01_set);\n        *dst_01_vals_iter++ = 2;\n      }\n    }\n    *rare01_ctp = dst_01_vals_iter - dst_01_vals;\n    return kPglRetSuccess;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  const uint32_t allele_code_logwidth = ctzu32(allele_code_width);\n  uint32_t loop_len = kBitsPerWord >> allele_code_logwidth;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n      loop_len = 1 + ((rare01_ct - 1) & (loop_len - 1));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - allele_code_logwidth)]);\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      const uint32_t sample_uidx = cur_deltalist_base[uii];\n      const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n      const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n      const uintptr_t sample_include_word = sample_include[sample_widx];\n      if (sample_include_word & lowbit) {\n        const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n        SetBit(sample_idx, dst_01_set);\n        *dst_01_vals_iter++ = 2 + ((fvals_bits >> (uii << allele_code_logwidth)) & allele_code_mask);\n      }\n    }\n  }\n  *rare01_ctp = dst_01_vals_iter - dst_01_vals;\n  return kPglRetSuccess;\n}\n\nstatic_assert(sizeof(AlleleCode) == 1, \"GetAux1bCodes() must be updated.\");\nPglErr GetAux1bCodes(const unsigned char* fread_end, uint32_t rare10_ct, uint32_t allele_ct, const unsigned char** fread_pp, AlleleCode* __restrict patch_10_vals) {\n  const unsigned char* patch_10_fvals = *fread_pp;\n  if (allele_ct == 3) {\n    // 1 bit, distinguishes between 0x0201 and 0x0202\n    const uint32_t patch_10_fvals_byte_ct = DivUp(rare10_ct, CHAR_BIT);\n    if (PtrAddCk(fread_end, patch_10_fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    Expand1bitTo16(patch_10_fvals, rare10_ct, 0x0201, patch_10_vals);\n    return kPglRetSuccess;\n  }\n  const uint32_t rare10_ct_x2 = rare10_ct * 2;\n  if (allele_ct < 6) {\n    // 2+2 bits, add 1\n    const uint32_t patch_10_fvals_byte_ct = DivUp(rare10_ct, 2);\n    if (PtrAddCk(fread_end, patch_10_fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    Expand2bitTo8(patch_10_fvals, rare10_ct_x2, 1, patch_10_vals);\n    return kPglRetSuccess;\n  }\n  if (allele_ct < 18) {\n    // 4+4 bits\n    if (PtrAddCk(fread_end, rare10_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    Expand4bitTo8(patch_10_fvals, rare10_ct_x2, 1, patch_10_vals);\n    return kPglRetSuccess;\n  }\n  if (PtrAddCk(fread_end, rare10_ct_x2, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  // todo: verify the compiler recognizes this\n  for (uint32_t uii = 0; uii < rare10_ct_x2; ++uii) {\n    patch_10_vals[uii] = patch_10_fvals[uii] + 1;\n  }\n  return kPglRetSuccess;\n}\n\n// Assumes aux1b_mode != 15.\nPglErr ExportAux1b(const unsigned char* fread_end, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_10_ct, const unsigned char** fread_pp, uintptr_t* __restrict patch_10_set, AlleleCode* __restrict patch_10_vals, uint32_t* __restrict rare10_ctp) {\n  uint32_t rare10_ct;\n  if (!aux1b_mode) {\n    const unsigned char* patch_10_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n    if (PtrAddCk(fread_end, fset_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    rare10_ct = PopcountBytes(patch_10_fset, fset_byte_ct);\n    ExpandBytearrFromGenoarr(patch_10_fset, raw_genoarr, kMaskAAAA, NypCtToWordCt(raw_sample_ct), raw_10_ct, 0, patch_10_set);\n  } else {\n    if (unlikely(ParseAndSaveDeltalistAsBitarr(fread_end, raw_sample_ct, fread_pp, patch_10_set, &rare10_ct))) {\n      return kPglRetMalformedInput;\n    }\n  }\n  *rare10_ctp = rare10_ct;\n  return GetAux1bCodes(fread_end, rare10_ct, allele_ct, fread_pp, patch_10_vals);\n}\n\nPglErr ExportAux1bProperSubset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t allele_ct, uint32_t raw_10_ct, const unsigned char** fread_pp, uintptr_t* __restrict dst_10_set, AlleleCode* __restrict dst_10_vals, uint32_t* __restrict rare10_ctp, uint32_t* __restrict deltalist_workspace) {\n  uintptr_t detect_hom_mask_lo;  // unused\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t allele_code_width = 1U << allele_code_logwidth;\n  const uintptr_t allele_code_mask = (1U << allele_code_width) - 1;\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const uint32_t code10_width = 1U << code10_logwidth;\n  memset(dst_10_set, 0, BitCtToWordCt(sample_ct) * sizeof(intptr_t));\n  AlleleCode* dst_10_vals_iter = dst_10_vals;\n  if (!aux1b_mode) {\n    const unsigned char* patch_10_fset = *fread_pp;\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n    const uint32_t rare10_ct = PopcountBytes(patch_10_fset, fset_byte_ct);\n    if (PtrAddCk(fread_end, fset_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const unsigned char* patch_10_fvals = *fread_pp;\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_xys = Word10(raw_genoarr[0]);\n    uint32_t loop_len = kBitsPerWord;\n    const uintptr_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) * code10_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t rare10_lowbits = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_10_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n      }\n      if (allele_ct == 3) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n            if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n              const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n              SetBit(sample_idx, dst_10_set);\n              *dst_10_vals_iter++ = 1 + ((fvals_bits >> rare10_lowbits) & 1);\n              *dst_10_vals_iter++ = 2;\n            }\n            ++rare10_lowbits;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n            if ((DowncastKWToHW(sample_include)[sample_hwidx]) & (1U << sample_uidx_lowbits)) {\n              const uint32_t sample_idx = RawToSubsettedPos(sample_include, sample_include_cumulative_popcounts, sample_hwidx * kBitsPerWordD2 + sample_uidx_lowbits);\n              SetBit(sample_idx, dst_10_set);\n              const uintptr_t cur_code_pair = fvals_bits >> rare10_lowbits;\n              const uint32_t cur_code_hi = (cur_code_pair >> allele_code_width) & allele_code_mask;\n              const uint32_t cur_code_lo = cur_code_pair & allele_code_mask;\n              *dst_10_vals_iter++ = 1 + cur_code_lo;\n              *dst_10_vals_iter++ = 1 + cur_code_hi;\n            }\n            rare10_lowbits += code10_width;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n    *rare10_ctp = S_CAST(uintptr_t, dst_10_vals_iter - dst_10_vals) / 2;\n    return kPglRetSuccess;\n  }\n  // aux1b_mode == 1\n  uint32_t rare10_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare10_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uintptr_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, 8);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  uint32_t loop_len = kBitsPerWord >> code10_logwidth;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n      loop_len = 1 + ((rare10_ct - 1) & (loop_len - 1));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n    if (allele_ct == 3) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uint32_t sample_uidx = cur_deltalist_base[uii];\n        const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n        const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n        const uintptr_t sample_include_word = sample_include[sample_widx];\n        if (sample_include_word & lowbit) {\n          const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n          SetBit(sample_idx, dst_10_set);\n          *dst_10_vals_iter++ = 1 + ((fvals_bits >> uii) & 1);\n          *dst_10_vals_iter++ = 2;\n        }\n      }\n    } else {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uint32_t sample_uidx = cur_deltalist_base[uii];\n        const uint32_t sample_widx = sample_uidx / kBitsPerWord;\n        const uintptr_t lowbit = k1LU << (sample_uidx % kBitsPerWord);\n        const uintptr_t sample_include_word = sample_include[sample_widx];\n        if (sample_include_word & lowbit) {\n          const uint32_t sample_idx = sample_include_cumulative_popcounts[sample_widx] + PopcountWord(sample_include_word & (lowbit - 1));\n          SetBit(sample_idx, dst_10_set);\n          const uintptr_t cur_code_pair = fvals_bits >> (uii << code10_logwidth);\n          const uint32_t cur_code_hi = (cur_code_pair >> allele_code_width) & allele_code_mask;\n          const uint32_t cur_code_lo = cur_code_pair & allele_code_mask;\n          *dst_10_vals_iter++ = 1 + cur_code_lo;\n          *dst_10_vals_iter++ = 1 + cur_code_hi;\n        }\n      }\n    }\n  }\n  *rare10_ctp = S_CAST(uintptr_t, dst_10_vals_iter - dst_10_vals) / 2;\n  return kPglRetSuccess;\n}\n\n// Assumes sample_ct > 0, multiallelic-hc track is present, and patch_01_ct and\n// patch_10_ct are zero-initialized.\nPglErr GetMultiallelicCodes(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* __restrict all_hets, PgenVariant* pgvp) {\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, pgvp->genovec);\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n  const uint32_t aux1_first_byte = *fread_ptr++;\n  const uint32_t aux1a_mode = aux1_first_byte & 15;\n  const uint32_t aux1b_mode = aux1_first_byte >> 4;\n  uint32_t raw_01_ct = 0;\n  uint32_t raw_10_ct = 0;\n  if ((!aux1a_mode) || (!aux1b_mode)) {\n    GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n  }\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx];\n  uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n  if (aux1a_mode != 15) {\n    if (!subsetting_required) {\n      reterr = ExportAux1a(fread_end, raw_genovec, aux1a_mode, raw_sample_ct, allele_ct, raw_01_ct, &fread_ptr, pgvp->patch_01_set, pgvp->patch_01_vals, &(pgvp->patch_01_ct));\n    } else {\n      reterr = ExportAux1aProperSubset(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1a_mode, raw_sample_ct, sample_ct, allele_ct, raw_01_ct, &fread_ptr, pgvp->patch_01_set, pgvp->patch_01_vals, &(pgvp->patch_01_ct), deltalist_workspace);\n    }\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  const unsigned char* aux1b_start = fread_ptr;\n  if (aux1b_mode != 15) {\n    if (!subsetting_required) {\n      reterr = ExportAux1b(fread_end, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &fread_ptr, pgvp->patch_10_set, pgvp->patch_10_vals, &(pgvp->patch_10_ct));\n    } else {\n      reterr = ExportAux1bProperSubset(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1b_mode, raw_sample_ct, sample_ct, allele_ct, raw_10_ct, &fread_ptr, pgvp->patch_10_set, pgvp->patch_10_vals, &(pgvp->patch_10_ct), deltalist_workspace);\n    }\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  if (fread_pp) {\n    *fread_pp = fread_ptr;\n    *fread_endp = fread_end;\n    if (all_hets) {\n      PgrDetectGenoarrHets(raw_genovec, raw_sample_ct, all_hets);\n      if (aux1b_mode != 15) {\n        // can merge this with ExportAux1b functions later\n        uintptr_t* aux1b_hets = pgrp->workspace_aux1x_present;\n        uint32_t aux1b_het_present;\n        reterr = GetAux1bHets(fread_end, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &aux1b_start, aux1b_hets, &aux1b_het_present, deltalist_workspace);\n        if (unlikely(reterr)) {\n          return reterr;\n        }\n        if (aux1b_het_present) {\n          BitvecOr(aux1b_hets, BitCtToWordCt(raw_sample_ct), all_hets);\n        }\n      }\n    }\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgrGetM(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp) {\n  pgvp->patch_01_ct = 0;\n  pgvp->patch_10_ct = 0;\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if (!multiallelic_hc_present) {\n    return ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, pgvp->genovec);\n  }\n  return GetMultiallelicCodes(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, nullptr, pgvp);\n}\n\nvoid PgrDetectGenoarrHetsMultiallelic(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict patch_10_set, const AlleleCode* __restrict patch_10_vals, uint32_t raw_sample_ct, uintptr_t* __restrict all_hets) {\n  const Halfword* patch_10_set_alias = DowncastKWToHW(patch_10_set);\n  const AlleleCode* patch_10_vals_iter = patch_10_vals;\n  const uint32_t word_ct_m1 = (raw_sample_ct - 1) / kBitsPerWordD2;\n  Halfword* all_hets_hw = DowncastWToHW(all_hets);\n  for (uint32_t widx = 0; ; ++widx) {\n    uintptr_t cur_geno_word;\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        if (widx % 2) {\n          all_hets_hw[widx] = 0;\n        }\n        return;\n      }\n      const uint32_t final_ct = ModNz(raw_sample_ct, kBitsPerWordD2);\n      cur_geno_word = bzhi_max(genoarr[widx], 2 * final_ct);\n    } else {\n      cur_geno_word = genoarr[widx];\n    }\n    uint32_t patch_10_hw = patch_10_set_alias[widx];\n    uint32_t cur_hets = Pack01ToHalfword(cur_geno_word);\n    while (patch_10_hw) {\n      const AlleleCode code1 = *patch_10_vals_iter++;\n      const AlleleCode code2 = *patch_10_vals_iter++;\n      const uint32_t lowbit = patch_10_hw & (-patch_10_hw);\n      if (code1 != code2) {\n        cur_hets |= lowbit;\n      }\n      patch_10_hw ^= lowbit;\n    }\n    all_hets_hw[widx] = cur_hets;\n  }\n}\n\nPglErr SkipAux1b(const unsigned char* fread_end, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_10_ct, const unsigned char** fread_pp) {\n  if (aux1b_mode == 15) {\n    return kPglRetSuccess;\n  }\n  uint32_t rare10_ct;\n  if (!aux1b_mode) {\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n    rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    *fread_pp += fset_byte_ct;\n  } else {\n    const unsigned char* group_info_iter;\n    PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare10_ct);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = SkipDeltalistIds(fread_end, group_info_iter, rare10_ct, raw_sample_ct, 0, fread_pp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  const uint32_t fvals_byte_ct = GetAux1bAlleleEntryByteCt(allele_ct, rare10_ct);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  return kPglRetSuccess;\n}\n\nPglErr SkipAux1(const unsigned char* fread_end, const uintptr_t* __restrict raw_genovec, uint32_t raw_sample_ct, uint32_t allele_ct, const unsigned char** fread_pp) {\n  const uint32_t aux1_first_byte = **fread_pp;\n  (*fread_pp) += 1;\n  const uint32_t aux1a_mode = aux1_first_byte & 15;\n  const uint32_t aux1b_mode = aux1_first_byte >> 4;\n  uint32_t raw_01_ct = 0;\n  uint32_t raw_10_ct = 0;\n  if ((!aux1a_mode) || (!aux1b_mode)) {\n    GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n  }\n  PglErr reterr = SkipAux1a(fread_end, aux1a_mode, raw_sample_ct, allele_ct, raw_01_ct, fread_pp);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  return SkipAux1b(fread_end, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, fread_pp);\n}\n\n// sample_include assumed to be nullptr if no subsetting required\n// subsetted_suppressed_het should only be provided when you explicitly want to\n// exclude those phase entries\n// set phasepresent == phaseinfo == nullptr if you want to skip the entire\n// track; ok for phasepresent_ct_ptr to be nullptr too in that case\n// (also see SkipAux2() and GetPhasepresentAndSkipPhaseinfo() below)\nPglErr ParseAux2Subset(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uintptr_t* __restrict all_hets, const uintptr_t* __restrict subsetted_suppressed_het, uint32_t raw_sample_ct, uint32_t sample_ct, const unsigned char** fread_pp, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr, uintptr_t* __restrict workspace_subset) {\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  const uint32_t het_ct = PopcountWords(all_hets, raw_sample_ctl);\n  if (unlikely(!het_ct)) {\n    // there shouldn't be a hphase track at all in this case, het_ct is not\n    // computed off a subset\n    return kPglRetMalformedInput;\n  }\n  const uint32_t sample_ctl = BitCtToWordCt(sample_ct);\n  const unsigned char* aux2_start = *fread_pp;\n  if (!(aux2_start[0] & 1)) {\n    // phase always present\n    if (PtrAddCk(fread_end, 1 + (het_ct / CHAR_BIT), fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    if (!phaseinfo) {\n      // for internal callers which just want to skip aux2\n      return kPglRetSuccess;\n    }\n    if (!sample_include) {\n      memcpy(phasepresent, all_hets, raw_sample_ctl * kBytesPerWord);\n      ExpandBytearr(aux2_start, all_hets, raw_sample_ctl, het_ct, 1, phaseinfo);\n      if (!subsetted_suppressed_het) {\n        *phasepresent_ct_ptr = het_ct;\n        return kPglRetSuccess;\n      }\n    } else {\n      CopyBitarrSubset(all_hets, sample_include, sample_ct, phasepresent);\n      if (AllWordsAreZero(phasepresent, sample_ctl)) {\n        *phasepresent_ct_ptr = 0;\n        // bugfix (7 Dec 2017): clear sample_ctl words here, not raw_sample_ctl\n        ZeroWArr(sample_ctl, phaseinfo);\n        return kPglRetSuccess;\n      }\n      ExpandThenSubsetBytearr(aux2_start, all_hets, sample_include, het_ct, sample_ct, 1, phaseinfo);\n    }\n    // bugfix (25 Feb 2020): forgot to mask out subsetted_suppressed_het here\n  } else {\n    const uint32_t het_ctdl = het_ct / kBitsPerWord;\n\n    // explicit phasepresent\n    uintptr_t* aux2_first_part_copy = workspace_subset;\n    aux2_first_part_copy[het_ctdl] = 0;\n    memcpy(aux2_first_part_copy, aux2_start, 1 + (het_ct / CHAR_BIT));\n    const uint32_t raw_phasepresent_ct = PopcountWords(aux2_first_part_copy, het_ctdl + 1) - 1;\n    if (unlikely(!raw_phasepresent_ct)) {\n      // there shouldn't be a hphase track at all in this case\n      return kPglRetMalformedInput;\n    }\n    const unsigned char* aux2_second_part = &(aux2_start[1 + (het_ct / CHAR_BIT)]);\n    *fread_pp = aux2_second_part;\n    if (PtrAddCk(fread_end, DivUp(raw_phasepresent_ct, CHAR_BIT), fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    if (!phaseinfo) {\n      return kPglRetSuccess;\n    }\n    if (!sample_include) {\n      ExpandBytearrNested(aux2_second_part, aux2_first_part_copy, all_hets, sample_ctl, raw_phasepresent_ct, 1, phasepresent, phaseinfo);\n      if (!subsetted_suppressed_het) {\n        *phasepresent_ct_ptr = raw_phasepresent_ct;\n        return kPglRetSuccess;\n      }\n    } else {\n      // could skip if intersection of phasepresent with sample_include is\n      // empty, but this function call should be fast enough there anyway?\n      ExpandThenSubsetBytearrNested(aux2_second_part, aux2_first_part_copy, all_hets, sample_include, sample_ct, raw_phasepresent_ct, 1, phasepresent, phaseinfo);\n    }\n  }\n  if (subsetted_suppressed_het) {\n    BitvecInvmask(subsetted_suppressed_het, sample_ctl, phasepresent);\n  }\n  *phasepresent_ct_ptr = PopcountWords(phasepresent, sample_ctl);\n  return kPglRetSuccess;\n}\n\nPglErr SkipAux2(const unsigned char* fread_end, uint32_t het_ct, const unsigned char** fread_pp, uint32_t* __restrict phasepresent_ctp) {\n  const unsigned char* aux2_start = *fread_pp;\n  const uint32_t aux2_first_part_byte_ct = 1 + (het_ct / CHAR_BIT);\n  if (PtrAddCk(fread_end, aux2_first_part_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  if (!(aux2_start[0] & 1)) {\n    if (phasepresent_ctp) {\n      *phasepresent_ctp = het_ct;\n    }\n    return kPglRetSuccess;\n  }\n  const uint32_t phasepresent_ct = PopcountBytes(aux2_start, aux2_first_part_byte_ct) - 1;\n  if (phasepresent_ctp) {\n    *phasepresent_ctp = phasepresent_ct;\n  }\n  if (PtrAddCk(fread_end, DivUp(phasepresent_ct, CHAR_BIT), fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  return kPglRetSuccess;\n}\n\n// If fread_pp/fread_endp are non-null, this always moves fread_ptr to the end\n// of aux2.  Set phasepresent/phaseinfo to nullptr when you don't actually care\n// about the contents of aux2.\n// In multiallelic case, this guarantees phasepresent bits are only set at\n// ref/altx hets, not at altx/alty hets.  (We don't currently guarantee this\n// for phaseinfo, since popcounts on that array are meaningless.)  Yes, this is\n// mildly annoying, but the code would be messier if the ordering of\n// multiallelic-hardcall and hardcall-phase info were swapped.\nPglErr ReadGenovecHphaseSubsetUnsafe(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* __restrict genovec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* phasepresent_ct_ptr) {\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  if ((!(vrtype & 0x18)) || ((!fread_pp) && (!VrtypeHphase(vrtype)))) {\n    *phasepresent_ct_ptr = 0;\n    return ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, fread_pp, fread_endp, genovec);\n  }\n  // Either hphase track is present; or if it's absent, multiallelic track is\n  // present and we were asked to advance fread_ptr to the end of aux2.\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_genovec = (subsetting_required || VrtypeMultiallelicHc(vrtype))? pgrp->workspace_vec : genovec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  if (raw_genovec != genovec) {\n    CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, genovec);\n    if (!VrtypeHphase(vrtype)) {\n      // only possible if multiallelic track present and fread_ptr must be\n      // advanced to end of aux2\n      *fread_pp = fread_ptr;\n      *fread_endp = fread_end;\n      return SkipAux1(fread_end, raw_genovec, raw_sample_ct, allele_ct, fread_pp);\n    }\n  }\n  uintptr_t* all_hets = pgrp->workspace_all_hets;\n  PgrDetectGenoarrHets(raw_genovec, raw_sample_ct, all_hets);\n  uintptr_t* subsetted_suppressed_het = nullptr;\n  if (VrtypeMultiallelicHc(vrtype)) {\n    const uint32_t aux1_first_byte = *fread_ptr++;\n    const uint32_t aux1a_mode = aux1_first_byte & 15;\n    const uint32_t aux1b_mode = aux1_first_byte >> 4;\n    uint32_t raw_01_ct = 0;\n    uint32_t raw_10_ct = 0;\n    if ((!aux1a_mode) || (!aux1b_mode)) {\n      GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n    }\n    reterr = SkipAux1a(fread_end, aux1a_mode, raw_sample_ct, allele_ct, raw_01_ct, &fread_ptr);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    // 1. fill workspace_aux1x_present with aux1b\n    // 2. clear bit for each hom-altx call in aux1b\n    // 3. bitvec-or to set new workspace_all_hets bits\n    // 4. if not subsetting, set subsetted_suppressed_het := workspace_all_hets\n    //    if subsetting, copy-subset to pgrp->workspace_vec and set to that\n    //    if AllWordsAreZero, keep as nullptr\n    uintptr_t* aux1b_hets = pgrp->workspace_aux1x_present;\n    uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n    uint32_t aux1b_het_present;\n    reterr = GetAux1bHets(fread_end, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &fread_ptr, aux1b_hets, &aux1b_het_present, deltalist_workspace);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (aux1b_het_present) {\n      BitvecOr(aux1b_hets, BitCtToWordCt(raw_sample_ct), all_hets);\n      if (!subsetting_required) {\n        subsetted_suppressed_het = aux1b_hets;\n      } else {\n        // Don't need raw_genovec any more.\n        CopyBitarrSubset(aux1b_hets, sample_include, sample_ct, raw_genovec);\n        subsetted_suppressed_het = raw_genovec;\n      }\n    }\n  }\n  reterr = ParseAux2Subset(fread_end, subsetting_required? sample_include : nullptr, all_hets, subsetted_suppressed_het, raw_sample_ct, sample_ct, &fread_ptr, phasepresent, phaseinfo, phasepresent_ct_ptr, pgrp->workspace_subset);\n  if (fread_pp) {\n    *fread_pp = fread_ptr;\n    *fread_endp = fread_end;\n  }\n  return reterr;\n}\n\nPglErr PgrGetP(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr) {\n  if (!sample_ct) {\n    *phasepresent_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  return ReadGenovecHphaseSubsetUnsafe(sample_include, GetSicp(pssi), sample_ct, vidx, pgrp, nullptr, nullptr, genovec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n}\n\n// eventually want to return fread_ptr/fread_end, but not relevant until\n// multiallelic dosage working\nPglErr Get1MP(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_countvec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr) {\n  // sample_ct > 0; either allele_idx > 1 or ((allele_idx == 1) &&\n  // multiallelic_hc_present)\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  if (!VrtypeHphase(vrtype)) {\n    *phasepresent_ct_ptr = 0;\n    return IMPLPgrGet1(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_countvec);\n  }\n  uintptr_t* all_hets = pgrp->workspace_all_hets;\n  uintptr_t* subsetted_suppressed_het = nullptr;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = Get1Multiallelic(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, &fread_ptr, &fread_end, all_hets, allele_countvec, &subsetted_suppressed_het);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  reterr = ParseAux2Subset(fread_end, (sample_ct != raw_sample_ct)? sample_include : nullptr, all_hets, subsetted_suppressed_het, raw_sample_ct, sample_ct, &fread_ptr, phasepresent, phaseinfo, phasepresent_ct_ptr, pgrp->workspace_subset);\n  // bugfix (7 Sep 2018): Need to postprocess phasepresent when collapsing\n  // multiple alleles.\n  if (reterr || (!(*phasepresent_ct_ptr))) {\n    return reterr;\n  }\n\n  // Might want to make this its own function.\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  MaskWordsToHalfwordsInvmatch(allele_countvec, kMaskAAAA, sample_ctl2, phasepresent, phasepresent);\n  *phasepresent_ct_ptr = PopcountWords(phasepresent, BitCtToWordCt(sample_ct));\n\n  return kPglRetSuccess;\n}\n\nPglErr PgrGet1P(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_countvec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr) {\n  if (!sample_ct) {\n    *phasepresent_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if ((!allele_idx) || ((allele_idx == 1) && (!multiallelic_hc_present))) {\n    PglErr reterr = ReadGenovecHphaseSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, allele_countvec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n    if (allele_idx) {\n      GenovecInvertUnsafe(sample_ct, allele_countvec);\n      if (*phasepresent_ct_ptr) {\n        BitvecInvert(BitCtToWordCt(sample_ct), phaseinfo);\n      }\n    }\n    return reterr;\n  }\n  return Get1MP(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_countvec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n}\n\nPglErr IMPLPgrGetInv1P(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_invcountvec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr) {\n  if (!sample_ct) {\n    *phasepresent_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if ((!allele_idx) || ((allele_idx == 1) && (!multiallelic_hc_present))) {\n    PglErr reterr = ReadGenovecHphaseSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, allele_invcountvec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n    if (!allele_idx) {\n      GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n      if (*phasepresent_ct_ptr) {\n        BitvecInvert(BitCtToWordCt(sample_ct), phaseinfo);\n      }\n    }\n    return reterr;\n  }\n  PglErr reterr = Get1MP(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_invcountvec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n  if (*phasepresent_ct_ptr) {\n    BitvecInvert(BitCtToWordCt(sample_ct), phaseinfo);\n  }\n  return kPglRetSuccess;\n}\n\nvoid SuppressHets11(const uintptr_t* genovec, uintptr_t* subsetted_all_hets, uint32_t sample_ct, uintptr_t* subsetted_suppressed_hets) {\n  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n  MaskWordsToHalfwordsInvmatch(genovec, 0, sample_ctl2, subsetted_all_hets, subsetted_suppressed_hets);\n}\n\nPglErr PgrGet2P(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx0, uint32_t allele_idx1, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  if (!VrtypeHphase(vrtype)) {\n    *phasepresent_ct_ptr = 0;\n    return IMPLPgrGet2(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx0, allele_idx1, pgrp, genovec);\n  }\n  if (!sample_ct) {\n    *phasepresent_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n  if (allele_idx0 + allele_idx1 == 1) {\n    PglErr reterr = ReadGenovecHphaseSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, genovec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n    if (allele_idx0) {\n      GenovecInvertUnsafe(sample_ct, genovec);\n      if (*phasepresent_ct_ptr) {\n        BitvecInvert(BitCtToWordCt(sample_ct), phaseinfo);\n      }\n    }\n    return reterr;\n  }\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n\n  uint32_t invert = 0;\n  if (allele_idx0 > allele_idx1) {\n    const uint32_t swap = allele_idx0;\n    allele_idx0 = allele_idx1;\n    allele_idx1 = swap;\n    invert = 1;\n  }\n  if (allele_idx0 > 1) {\n    SetAllBits(2 * sample_ct, genovec);\n  } else {\n    CopyNyparrNonemptySubset(raw_genovec, sample_include, raw_sample_ct, sample_ct, genovec);\n    // allele_idx1 > 1 guaranteed\n    if (!allele_idx0) {\n      GenovecNonzeroToMissingUnsafe(sample_ct, genovec);\n    } else {\n      GenovecInvertThenNonzeroToMissingUnsafe(sample_ct, genovec);\n    }\n  }\n  uintptr_t* all_hets = pgrp->workspace_all_hets;\n  PgrDetectGenoarrHets(raw_genovec, raw_sample_ct, all_hets);\n  uintptr_t* subsetted_suppressed_het = nullptr;\n  if (!subsetting_required) {\n    sample_include = nullptr;\n  }\n\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx];\n  if (VrtypeMultiallelicHc(vrtype)) {\n    // This combines ReadGenovecHphaseSubsetUnsafe() and Get2()'s logic.\n    const uint32_t aux1_first_byte = *fread_ptr++;\n    const uint32_t aux1a_mode = aux1_first_byte & 15;\n    const uint32_t aux1b_mode = aux1_first_byte >> 4;\n    uint32_t raw_01_ct = 0;\n    uint32_t raw_10_ct = 0;\n    if ((!aux1a_mode) || (!aux1b_mode)) {\n      GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n    }\n    uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n    if (!allele_idx0) {\n      // Two cases:\n      // - If allele_idx == 1, convert all aux1a entries from 01 to 11.\n      // - Otherwise, for each matching aux1a entry, convert from 11 to 01.\n      reterr = GenoarrAux1aUpdate(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1a_mode, raw_sample_ct, allele_ct, allele_idx1, 2, raw_01_ct, &fread_ptr, genovec, deltalist_workspace);\n    } else {\n      reterr = SkipAux1a(fread_end, aux1a_mode, raw_sample_ct, allele_ct, raw_01_ct, &fread_ptr);\n    }\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const unsigned char* aux1b_start = fread_ptr;\n    reterr = GenoarrAux1bUpdate2(fread_end, sample_include, sample_include_cumulative_popcounts, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, allele_idx0, allele_idx1, raw_10_ct, &fread_ptr, genovec, deltalist_workspace);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    // Can have a modified version of GenoarrAux1bUpdate2() which only requires\n    // one pass, but let's keep the logic simpler for now since I don't expect\n    // this function to be used frequently.\n    uintptr_t* aux1b_hets = pgrp->workspace_aux1x_present;\n    uint32_t aux1b_het_present;\n    reterr = GetAux1bHets(fread_end, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &aux1b_start, aux1b_hets, &aux1b_het_present, deltalist_workspace);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (aux1b_het_present) {\n      BitvecOr(aux1b_hets, BitCtToWordCt(raw_sample_ct), all_hets);\n    }\n    if ((allele_idx0 + allele_idx1 != 1) || aux1b_het_present) {\n      // We can now clobber the contents of pgrp->workspace_vec (raw_genovec)\n      // and pgrp->workspace_aux1x_present (aux1b_hets).\n      // We use the former as the subsetted_suppressed_het return buffer.\n      uintptr_t* all_hets_subsetted = all_hets;\n      if (sample_include) {\n        all_hets_subsetted = aux1b_hets;\n        CopyBitarrSubset(all_hets, sample_include, sample_ct, all_hets_subsetted);\n      }\n      subsetted_suppressed_het = raw_genovec;\n      SuppressHets11(genovec, all_hets_subsetted, sample_ct, raw_genovec);\n    }\n  }\n  reterr = ParseAux2Subset(fread_end, sample_include, all_hets, subsetted_suppressed_het, raw_sample_ct, sample_ct, &fread_ptr, phasepresent, phaseinfo, phasepresent_ct_ptr, pgrp->workspace_subset);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  if (VrtypeMultiallelicHc(vrtype) && (*phasepresent_ct_ptr)) {\n    const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);\n    MaskWordsToHalfwordsInvmatch(genovec, kMaskAAAA, sample_ctl2, phasepresent, phasepresent);\n    *phasepresent_ct_ptr = PopcountWords(phasepresent, BitCtToWordCt(sample_ct));\n  }\n  if (invert) {\n    GenovecInvertUnsafe(sample_ct, genovec);\n    if (*phasepresent_ct_ptr) {\n      BitvecInvert(BitCtToWordCt(sample_ct), phaseinfo);\n    }\n  }\n  return kPglRetSuccess;\n}\n\nPglErr PgrGetMP(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp) {\n  pgvp->patch_01_ct = 0;\n  pgvp->patch_10_ct = 0;\n  if (!sample_ct) {\n    pgvp->phasepresent_ct = 0;\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t multiallelic_hc_present = VrtypeMultiallelicHc(vrtype);\n  if (!multiallelic_hc_present) {\n    return ReadGenovecHphaseSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, pgvp->genovec, pgvp->phasepresent, pgvp->phaseinfo, &(pgvp->phasepresent_ct));\n  }\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  uintptr_t* all_hets = VrtypeHphase(vrtype)? pgrp->workspace_all_hets : nullptr;\n  PglErr reterr = GetMultiallelicCodes(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, all_hets? (&fread_ptr) : nullptr, all_hets? (&fread_end) : nullptr, all_hets, pgvp);\n  if (reterr || (!all_hets)) {\n    // bugfix (17 Apr 2023): need to zero out phasepresent_ct in this case\n    pgvp->phasepresent_ct = 0;\n    return reterr;\n  }\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  return ParseAux2Subset(fread_end, (sample_ct != raw_sample_ct)? sample_include : nullptr, all_hets, nullptr, raw_sample_ct, sample_ct, &fread_ptr, pgvp->phasepresent, pgvp->phaseinfo, &(pgvp->phasepresent_ct), pgrp->workspace_subset);\n}\n\n// ok for sample_include to be nullptr if not subsetting, though this is not\n// required\nPglErr ParseDosage16(const unsigned char* fread_ptr, const unsigned char* fread_end, const uintptr_t* __restrict sample_include, uint32_t sample_ct, uint32_t vidx, uint32_t allele_ct, PgenReaderMain* pgrp, uint32_t* __restrict dosage_ct_ptr, uintptr_t* __restrict dphase_present, int16_t* dphase_delta, uint32_t* __restrict dphase_ct_ptr, uintptr_t* __restrict dosage_present, uint16_t* dosage_main) {\n  // Side effect: may use pgrp->workspace_dosage_present and\n  // pgrp->workspace_dphase_present\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_dosage_present = subsetting_required? pgrp->workspace_dosage_present : dosage_present;\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t is_unconditional_dosage = ((vrtype & 0x60) == 0x40);\n  uint32_t raw_dosage_ct;\n  if ((vrtype & 0x60) == 0x20) {\n    // case 1: dosage list\n    if (unlikely(ParseAndSaveDeltalistAsBitarr(fread_end, raw_sample_ct, &fread_ptr, raw_dosage_present, &raw_dosage_ct))) {\n      return kPglRetMalformedInput;\n    }\n    if ((!raw_dosage_ct) && (!dosage_ct_ptr)) {\n      // bugfix (7 Oct 2024)\n      ZeroWArr(raw_sample_ctl, raw_dosage_present);\n    }\n  } else if (is_unconditional_dosage) {\n    // case 2: unconditional dosage.  handle separately from other two cases\n    // since missing values may be present.\n    SetAllBits(raw_sample_ct, raw_dosage_present);\n    raw_dosage_ct = raw_sample_ct;\n  } else {\n    // case 3: dosage bitarray\n    raw_dosage_present[raw_sample_ctl - 1] = 0;\n    const uint32_t raw_sample_ctb = DivUp(raw_sample_ct, CHAR_BIT);\n    memcpy(raw_dosage_present, fread_ptr, raw_sample_ctb);\n    fread_ptr = &(fread_ptr[raw_sample_ctb]);\n    raw_dosage_ct = PopcountWords(raw_dosage_present, raw_sample_ctl);\n  }\n  const uint32_t sample_ctl = BitCtToWordCt(sample_ct);\n  uint32_t dosage_ct;\n  if (subsetting_required) {\n    CopyBitarrSubset(raw_dosage_present, sample_include, sample_ct, dosage_present);\n    dosage_ct = PopcountWords(dosage_present, sample_ctl);\n  } else {\n    dosage_ct = raw_dosage_ct;\n  }\n  if (dosage_ct_ptr) {\n    *dosage_ct_ptr = dosage_ct;\n  }\n  if (!dosage_ct) {\n    if (dphase_ct_ptr) {\n      *dphase_ct_ptr = 0;\n    }\n    return kPglRetSuccess;\n  }\n  const unsigned char* dosage_main_read_biter = fread_ptr;\n  uint16_t* dosage_main_write_iter = dosage_main;\n  uint32_t raw_dphase_ct = 0;\n  uint32_t dphase_ct = 0;\n  uintptr_t* raw_dphase_present = nullptr;\n  if (dphase_present && (vrtype & 0x80)) {\n    fread_ptr = &(fread_ptr[raw_dosage_ct * 2]);\n    if (!is_unconditional_dosage) {\n      const unsigned char* file_dphase_present = fread_ptr;\n      fread_ptr = &(fread_ptr[DivUp(raw_dosage_ct, CHAR_BIT)]);\n      raw_dphase_present = subsetting_required? pgrp->workspace_dphase_present : dphase_present;\n      ExpandBytearr(file_dphase_present, raw_dosage_present, raw_sample_ctl, raw_dosage_ct, 0, raw_dphase_present);\n      raw_dphase_ct = PopcountWords(raw_dphase_present, raw_sample_ctl);\n      dphase_ct = raw_dphase_ct;\n      if (subsetting_required) {\n        CopyBitarrSubset(raw_dphase_present, sample_include, sample_ct, dphase_present);\n        dphase_ct = PopcountWords(dphase_present, sample_ctl);\n      }\n    } else {\n      // raw_dphase_present = raw_dosage_present;\n      dphase_ct = dosage_ct;\n      SetAllBits(sample_ct, dphase_present);\n    }\n  }\n  if (!dphase_ct) {\n    if (allele_ct == 2) {\n      if (!is_unconditional_dosage) {\n        if (dosage_ct == raw_dosage_ct) {\n          memcpy(dosage_main_write_iter, dosage_main_read_biter, dosage_ct * sizeof(int16_t));\n        } else {\n          // bugfix (22 May 2017): dosage_entry_idx needs to iterate up to\n          // raw_dosage_ct, not dosage_ct\n          uintptr_t widx = ~k0LU;\n          uint32_t dosage_entry_idx = 0;\n          do {\n            uintptr_t cur_bits;\n            do {\n              cur_bits = raw_dosage_present[++widx];\n            } while (!cur_bits);\n            const uintptr_t sample_include_word = sample_include[widx];\n            do {\n              const uintptr_t low_bit = cur_bits & (-cur_bits);\n              if (sample_include_word & low_bit) {\n                CopyFromUnalignedOffsetU16(dosage_main_write_iter, dosage_main_read_biter, dosage_entry_idx);\n                ++dosage_main_write_iter;\n              }\n              ++dosage_entry_idx;\n              cur_bits ^= low_bit;\n            } while (cur_bits);\n          } while (dosage_entry_idx != raw_dosage_ct);\n        }\n      } else {\n        if (!subsetting_required) {\n          for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n            uint16_t cur_dosage;\n            CopyFromUnalignedIncrU16(&cur_dosage, &dosage_main_read_biter);\n            if (cur_dosage != 65535) {\n              *dosage_main_write_iter++ = cur_dosage;\n            } else {\n              ClearBit(sample_idx, dosage_present);\n            }\n          }\n        } else {\n          uintptr_t widx = ~k0LU;\n          uint32_t sample_idx = 0;\n          do {\n            uintptr_t cur_bits;\n            do {\n              cur_bits = sample_include[++widx];\n            } while (!cur_bits);\n            const uintptr_t sample_uidx_base = widx * kBitsPerWord;\n            const unsigned char* dosage_main_readp = &(dosage_main_read_biter[sample_uidx_base * sizeof(int16_t)]);\n            do {\n              const uint32_t sample_uidx_lowbits = ctzw(cur_bits);\n              uint16_t cur_dosage;\n              CopyFromUnalignedOffsetU16(&cur_dosage, dosage_main_readp, sample_uidx_lowbits);\n              if (cur_dosage != 65535) {\n                *dosage_main_write_iter++ = cur_dosage;\n              } else {\n                ClearBit(sample_idx, dosage_present);\n              }\n              ++sample_idx;\n              cur_bits &= cur_bits - 1;\n            } while (cur_bits);\n          } while (sample_idx != sample_ct);\n        }\n        if (dosage_ct_ptr) {\n          *dosage_ct_ptr = dosage_main_write_iter - dosage_main;\n        }\n      }\n    } else {\n      // todo: multiallelic dosage\n      // need to support downcode to ref/nonref as well as raw load\n      // (dosage_ct_ptr should be nullptr iff we're doing a raw load)\n#ifndef PGENLIB_NOPRINT\n      fputs(\"multiallelic variants not yet supported by ParseDosage16()\\n\", stderr);\n#endif\n      return kPglRetNotYetSupported;\n    }\n    if (dphase_ct_ptr) {\n      *dphase_ct_ptr = 0;\n    }\n  } else {\n    // phased dosage\n    if (allele_ct == 2) {\n      if (!is_unconditional_dosage) {\n        // bugfix (15 Sep 2023): dphase_ct == raw_dphase_ct doesn't guarantee\n        // dosage_ct == raw_dosage_ct\n        if (dosage_ct == raw_dosage_ct) {\n          memcpy(dosage_main_write_iter, dosage_main_read_biter, dosage_ct * sizeof(int16_t));\n        } else {\n          uintptr_t widx = ~k0LU;\n          uint32_t dosage_entry_idx = 0;\n          do {\n            uintptr_t cur_bits;\n            do {\n              cur_bits = raw_dosage_present[++widx];\n            } while (!cur_bits);\n            const uintptr_t sample_include_word = sample_include[widx];\n            do {\n              const uintptr_t low_bit = cur_bits & (-cur_bits);\n              if (sample_include_word & low_bit) {\n                CopyFromUnalignedOffsetU16(dosage_main_write_iter, dosage_main_read_biter, dosage_entry_idx);\n                ++dosage_main_write_iter;\n              }\n              ++dosage_entry_idx;\n              cur_bits ^= low_bit;\n            } while (cur_bits);\n          } while (dosage_entry_idx != raw_dosage_ct);\n        }\n        if (dphase_ct == raw_dphase_ct) {\n          memcpy(dphase_delta, fread_ptr, dphase_ct * sizeof(int16_t));\n          if (dphase_ct_ptr) {\n            *dphase_ct_ptr = dphase_ct;\n          }\n        } else {\n          uintptr_t widx = ~k0LU;\n          uint32_t dphase_entry_idx = 0;\n          const unsigned char* dphase_delta_read = fread_ptr;\n          int16_t* dphase_delta_write_iter = dphase_delta;\n          do {\n            uintptr_t cur_bits;\n            do {\n              cur_bits = raw_dphase_present[++widx];\n            } while (!cur_bits);\n            const uintptr_t sample_include_word = sample_include[widx];\n            do {\n              const uintptr_t low_bit = cur_bits & (-cur_bits);\n              if (sample_include_word & low_bit) {\n                CopyFromUnalignedOffsetI16(dphase_delta_write_iter, dphase_delta_read, dphase_entry_idx);\n                ++dphase_delta_write_iter;\n              }\n              ++dphase_entry_idx;\n              cur_bits ^= low_bit;\n            } while (cur_bits);\n          } while (dphase_entry_idx != raw_dphase_ct);\n          if (dphase_ct_ptr) {\n            *dphase_ct_ptr = dphase_delta_write_iter - dphase_delta;\n          }\n        }\n      } else {\n        const unsigned char* dphase_delta_read = fread_ptr;\n        int16_t* dphase_delta_write_iter = dphase_delta;\n        if (!subsetting_required) {\n          for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n            uint16_t cur_dosage;\n            CopyFromUnalignedIncrU16(&cur_dosage, &dosage_main_read_biter);\n            if (cur_dosage != 65535) {\n              *dosage_main_write_iter++ = cur_dosage;\n              int16_t dphase_delta_val;\n              CopyFromUnalignedOffsetI16(&dphase_delta_val, dphase_delta_read, sample_idx);\n              if (dphase_delta_val) {\n                *dphase_delta_write_iter++ = dphase_delta_val;\n              } else {\n                ClearBit(sample_idx, dphase_present);\n              }\n            } else {\n              // assert(dphase_delta_read[sample_idx] == -32768);\n              ClearBit(sample_idx, dosage_present);\n            }\n          }\n        } else {\n          uintptr_t sample_uidx_base = 0;\n          uintptr_t sample_include_bits = sample_include[0];\n          for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n            const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &sample_include_bits);\n            uint16_t cur_dosage;\n            CopyFromUnalignedOffsetU16(&cur_dosage, dosage_main_read_biter, sample_uidx);\n            if (cur_dosage != 65535) {\n              *dosage_main_write_iter++ = cur_dosage;\n              int16_t dphase_delta_val;\n              CopyFromUnalignedOffsetI16(&dphase_delta_val, dphase_delta_read, sample_uidx);\n              if (dphase_delta_val) {\n                *dphase_delta_write_iter++ = dphase_delta_val;\n              } else {\n                ClearBit(sample_idx, dphase_present);\n              }\n            } else {\n              // assert(dphase_delta_read[sample_uidx] == -32768);\n              ClearBit(sample_idx, dosage_present);\n            }\n          }\n        }\n        dosage_ct = dosage_main_write_iter - dosage_main;\n        if (dosage_ct != sample_ct) {\n          BitvecAnd(dosage_present, sample_ctl, dphase_present);\n        }\n        if (dosage_ct_ptr) {\n          *dosage_ct_ptr = dosage_ct;\n        }\n        if (dphase_ct_ptr) {\n          *dphase_ct_ptr = dphase_delta_write_iter - dphase_delta;\n        }\n      }\n    } else {\n      // multiallelic subcase\n#ifndef PGENLIB_NOPRINT\n      fputs(\"multiallelic variants not yet supported by ParseDosage16()\\n\", stderr);\n#endif\n      return kPglRetNotYetSupported;\n    }\n  }\n  return kPglRetSuccess;\n}\n\nPglErr IMPLPgrGetD(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, uintptr_t* __restrict genovec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr) {\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  if (!sample_ct) {\n    *dosage_ct_ptr = 0;\n    return kPglRetSuccess;\n  }\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  if ((!VrtypeDosage(vrtype)) || (!dosage_present)) {\n    *dosage_ct_ptr = 0;\n    return ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, nullptr, nullptr, genovec);\n  }\n  const unsigned char* fread_ptr = nullptr;\n  const unsigned char* fread_end = nullptr;\n  uint32_t phasepresent_ct;\n  PglErr reterr = ReadGenovecHphaseSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, &fread_ptr, &fread_end, genovec, nullptr, nullptr, &phasepresent_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  return ParseDosage16(fread_ptr, fread_end, sample_include, sample_ct, vidx, allele_ct, pgrp, dosage_ct_ptr, nullptr, nullptr, nullptr, dosage_present, dosage_main);\n}\n\nPglErr PgrGet1D(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, AlleleCode allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_countvec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  if ((allele_ct == 2) || (!allele_idx)) {\n    uint32_t dosage_ct;\n    PglErr reterr = IMPLPgrGetD(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, allele_countvec, dosage_present, dosage_main, &dosage_ct);\n    if (!allele_idx) {\n      GenovecInvertUnsafe(sample_ct, allele_countvec);\n      if (dosage_ct) {\n        BiallelicDosage16Invert(dosage_ct, dosage_main);\n      }\n    }\n    *dosage_ct_ptr = dosage_ct;\n    return reterr;\n  }\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  if (!VrtypeDosage(vrtype)) {\n    *dosage_ct_ptr = 0;\n    return IMPLPgrGet1(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_countvec);\n  }\n#ifndef PGENLIB_NOPRINT\n  fputs(\"multiallelic variants not yet supported by PgrGet1D()\\n\", stderr);\n#endif\n  return kPglRetNotYetSupported;\n}\n\nPglErr PgrGetInv1D(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, AlleleCode allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_invcountvec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  if ((allele_ct == 2) || (!allele_idx)) {\n    uint32_t dosage_ct;\n    PglErr reterr = IMPLPgrGetD(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, allele_invcountvec, dosage_present, dosage_main, &dosage_ct);\n    if (allele_idx) {\n      GenovecInvertUnsafe(sample_ct, allele_invcountvec);\n      if (dosage_ct) {\n        BiallelicDosage16Invert(dosage_ct, dosage_main);\n      }\n    }\n    *dosage_ct_ptr = dosage_ct;\n    return reterr;\n  }\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  if (!VrtypeDosage(vrtype)) {\n    *dosage_ct_ptr = 0;\n    return IMPLPgrGetInv1(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_invcountvec);\n  }\n#ifndef PGENLIB_NOPRINT\n  fputs(\"multiallelic variants not yet supported by PgrGetInv1D()\\n\", stderr);\n#endif\n  return kPglRetNotYetSupported;\n}\n\nPglErr GetAux1bHetIncr(const unsigned char* fread_end, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_10_ct, const unsigned char** fread_pp, uint32_t* __restrict raw_het_ctp) {\n  if (aux1b_mode == 15) {\n    return kPglRetSuccess;\n  }\n  uint32_t rare10_ct;\n  if (!aux1b_mode) {\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, 8);\n    rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    *fread_pp += fset_byte_ct;\n  } else {\n    // aux1b_mode == 1\n    const unsigned char* group_info_iter;\n    PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare10_ct);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    reterr = SkipDeltalistIds(fread_end, group_info_iter, rare10_ct, raw_sample_ct, 0, fread_pp);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  uintptr_t detect_hom_mask_lo;\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, CHAR_BIT);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  if (allele_ct == 3) {\n    const uint32_t hom22_ct = PopcountBytes(patch_10_fvals, fvals_byte_ct);\n    *raw_het_ctp += rare10_ct - hom22_ct;\n    return kPglRetSuccess;\n  }\n  // possible todo: vectorized het-counter, analogous to CountAux1bDense()\n  const uint32_t code10_width = 1U << code10_logwidth;\n  const uint32_t allele_code_width = 1U << allele_code_logwidth;\n  const uintptr_t detect_all_mask_lo = detect_hom_mask_lo | (detect_hom_mask_lo << allele_code_width);\n  const uintptr_t detect_all_mask_hi = detect_all_mask_lo << (allele_code_width - 1);\n  const uintptr_t detect_hom_mask_hi = detect_hom_mask_lo << (code10_width - 1);\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  uint32_t het_incr = 0;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    // allele_ct > 3 guaranteed\n    fvals_bits = fvals_bits ^ (fvals_bits << allele_code_width);\n    fvals_bits = detect_hom_mask_hi & (fvals_bits | ((fvals_bits | detect_all_mask_hi) - detect_all_mask_lo));\n    if (fvals_widx == fvals_word_ct_m1) {\n      fvals_bits = bzhi_max(fvals_bits, ModNz(rare10_ct << code10_logwidth, kBitsPerWord));\n    }\n    het_incr += PopcountWord(fvals_bits);\n  }\n  *raw_het_ctp += het_incr;\n  return kPglRetSuccess;\n}\n\nuint64_t U16VecSum(const uint16_t* __restrict uint16_vec, uint32_t entry_ct) {\n#ifdef USE_SSE2\n  // UniVecHsum32() could overflow once we exceed this\n  const uint32_t max_loop_len = (131072 / kInt32PerVec) - 1;\n\n  const VecW m16 = VCONST_W(kMask0000FFFF);\n  const VecW* uint16_vvec_iter = R_CAST(const VecW*, uint16_vec);\n  uint64_t sum = 0;\n  for (uint32_t full_vecs_remaining = entry_ct / (kBytesPerVec / sizeof(int16_t)); ; ) {\n    UniVec acc_even;\n    UniVec acc_odd;\n    acc_even.vw = vecw_setzero();\n    acc_odd.vw = vecw_setzero();\n    const VecW* uint16_vvec_stop;\n    if (full_vecs_remaining < max_loop_len) {\n      if (!full_vecs_remaining) {\n        const uint32_t trail_ct = entry_ct % (kBytesPerVec / sizeof(int16_t));\n        uint16_vec = DowncastKVecWToU16(uint16_vvec_iter);\n        for (uint32_t uii = 0; uii != trail_ct; ++uii) {\n          sum += uint16_vec[uii];\n        }\n        return sum;\n      }\n      uint16_vvec_stop = &(uint16_vvec_iter[full_vecs_remaining]);\n      full_vecs_remaining = 0;\n    } else {\n      uint16_vvec_stop = &(uint16_vvec_iter[max_loop_len]);\n      full_vecs_remaining -= max_loop_len;\n    }\n    do {\n      const VecW cur_vec = *uint16_vvec_iter++;\n      acc_even.vw = acc_even.vw + (cur_vec & m16);\n      acc_odd.vw = acc_odd.vw + (vecw_srli(cur_vec, 16) & m16);\n    } while (uint16_vvec_iter < uint16_vvec_stop);\n    sum += UniVecHsum32(acc_even);\n    sum += UniVecHsum32(acc_odd);\n  }\n#else\n  uint64_t sum = 0;\n  for (uint32_t uii = 0; uii != entry_ct; ++uii) {\n    sum += uint16_vec[uii];\n  }\n  return sum;\n#endif\n}\n\nPglErr GetPhasepresentAndSkipPhaseinfo(const unsigned char* fread_end, const uintptr_t* __restrict all_hets, uint32_t raw_sample_ct, uint32_t het_ct, const unsigned char** fread_pp, uintptr_t* __restrict phasepresent, uint32_t* __restrict phasepresent_ctp) {\n  const unsigned char* aux2_start = *fread_pp;\n  const uint32_t aux2_first_part_byte_ct = 1 + (het_ct / CHAR_BIT);\n  if (PtrAddCk(fread_end, aux2_first_part_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  if (!(aux2_start[0] & 1)) {\n    memcpy(phasepresent, all_hets, raw_sample_ctl * kBytesPerWord);\n    *phasepresent_ctp = het_ct;\n    return kPglRetSuccess;\n  }\n  const uint32_t phasepresent_ct = PopcountBytes(aux2_start, aux2_first_part_byte_ct) - 1;\n  if (PtrAddCk(fread_end, DivUp(phasepresent_ct, CHAR_BIT), fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  *phasepresent_ctp = phasepresent_ct;\n  ExpandBytearr(aux2_start, all_hets, raw_sample_ctl, het_ct, 1, phasepresent);\n  return kPglRetSuccess;\n}\n\nPglErr GetUnphasedBiallelicHetCt(const uintptr_t* __restrict sample_include, const uintptr_t* raw_genoarr, const unsigned char* fread_ptr, const unsigned char* fread_end, uint32_t subsetted_het_ct, PgenReaderMain* pgrp, uint32_t* unphased_het_ctp) {\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  uint32_t raw_het_ct;\n  if (!sample_include) {\n    raw_het_ct = subsetted_het_ct;\n  } else {\n    raw_het_ct = CountNyp(raw_genoarr, kMask5555, raw_sample_ct);\n  }\n  const uint32_t aux2_first_part_byte_ct = 1 + (raw_het_ct / CHAR_BIT);\n  if (PtrCheck(fread_end, fread_ptr, aux2_first_part_byte_ct)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t explicit_phasepresent = fread_ptr[0] & 1;\n  if (!explicit_phasepresent) {\n    // initial value of 0 is correct\n    return kPglRetSuccess;\n  }\n  if (raw_het_ct == subsetted_het_ct) {\n    *unphased_het_ctp = raw_het_ct + 1 - PopcountBytes(fread_ptr, aux2_first_part_byte_ct);\n    return kPglRetSuccess;\n  }\n  // A dedicated counting function would be faster, but this case\n  // should rarely come up.\n  uintptr_t* all_hets = pgrp->workspace_all_hets;\n  PgrDetectGenoarrHets(raw_genoarr, raw_sample_ct, all_hets);\n  uintptr_t* raw_phasepresent = pgrp->workspace_subset;\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  // todo: compare against ExpandThenSubsetBytearr followed by simple popcount\n  ExpandBytearr(fread_ptr, all_hets, raw_sample_ctl, raw_het_ct, 1, raw_phasepresent);\n  *unphased_het_ctp = subsetted_het_ct - PopcountWordsIntersect(raw_phasepresent, sample_include, raw_sample_ctl);\n  return kPglRetSuccess;\n}\n\nPglErr GetPhasedBiallelicGenotypeSubsetCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, uint32_t* unphased_het_ctp, STD_ARRAY_REF(uint32_t, 4) genocounts) {\n  // Currently much less optimized than the other count functions.  (This case\n  // shouldn't come up much, the user has to be computing minimac-r2 on a file\n  // with no dosages...)\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(1, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n  GenoarrCountSubsetFreqs(raw_genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);\n  return GetUnphasedBiallelicHetCt(sample_include, raw_genovec, fread_ptr, fread_end, genocounts[1], pgrp, unphased_het_ctp);\n}\n\n// Imputation r^2 computation:\n// * This function assumes the biallelic diploid case.  Divide by two to get\n//   the biallelic haploid value, for whatever that's worth.\n// * chrX requires sex information, so that's handled directly in\n//   LoadAlleleAndGenoCountsThread()... er, actually, we just give up on that\n//   for now.\n// * See PgrGetMDCounts() support functions below for multiallelic-diploid\n//   notes.\nPglErr GetBasicGenotypeCountsAndDosage16s(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t is_minimac3_r2, PgenReaderMain* pgrp, double* imp_r2_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* __restrict all_dosages) {\n  // genocounts[0] := ref/ref, genocounts[1] := ref/altx,\n  // genocounts[2] := altx/alty, genocounts[3] := missing\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uint32_t unphased_het_ct = 0;\n  // To avoid LD cache thrashing, we try to either always keep a subsetted\n  // cache, or never do so.  (Always, when only hardcalls are present;\n  // otherwise never.)\n  if ((!(pgrp->fi.gflags & kfPgenGlobalDosagePresent)) ||\n      ((!(vrtype & 0x60)) && (!subsetting_required))) {\n    {\n      const uint32_t need_unphased_het_ct = is_minimac3_r2 && VrtypeHphase(vrtype);\n      PglErr reterr;\n      if (!(subsetting_required && need_unphased_het_ct)) {\n        reterr = GetBasicGenotypeCounts(sample_include, sample_include_interleaved_vec, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, need_unphased_het_ct? (&unphased_het_ct) : nullptr, genocounts);\n      } else {\n        reterr = GetPhasedBiallelicGenotypeSubsetCounts(sample_include, sample_include_interleaved_vec, sample_ct, vidx, pgrp, &unphased_het_ct, genocounts);\n      }\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    }\n  GetBasicGenotypeCountsAndDosage16s_basic_finish:\n    all_dosages[0] = (genocounts[0] * 2 + genocounts[1]) * 16384LLU;\n    all_dosages[1] = (genocounts[2] * 2 + genocounts[1]) * 16384LLU;\n    if (!imp_r2_ptr) {\n      return kPglRetSuccess;\n    }\n    // yeah, it's sinful to implement imputation r2 here...\n    const uint32_t nm_sample_ct = sample_ct - genocounts[3];\n    const uint64_t alt1_dosage = genocounts[2] * 0x8000LLU + genocounts[1] * 0x4000LLU;\n    uint64_t hap_alt1_ssq_x2 = genocounts[2] * 0x40000000LLU + genocounts[1] * 0x10000000LLU;\n    if (is_minimac3_r2) {\n      if (!VrtypeHphase(vrtype)) {\n        unphased_het_ct = genocounts[1];\n      }\n      hap_alt1_ssq_x2 += (genocounts[1] - unphased_het_ct) * 0x10000000LLU;\n    }\n    *imp_r2_ptr = BiallelicDiploidMinimac3R2(alt1_dosage, hap_alt1_ssq_x2, nm_sample_ct);\n    if (!is_minimac3_r2) {\n      *imp_r2_ptr *= 2;\n    }\n    return kPglRetSuccess;\n  }\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n  if (!subsetting_required) {\n    GenoarrCountFreqsUnsafe(raw_genovec, raw_sample_ct, genocounts);\n  } else {\n    GenoarrCountSubsetFreqs(raw_genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);\n  }\n  if (!(vrtype & 0x60)) {\n    if (is_minimac3_r2 && VrtypeHphase(vrtype)) {\n      assert(!VrtypeMultiallelicHc(vrtype));\n      reterr = GetUnphasedBiallelicHetCt(subsetting_required? sample_include : nullptr, raw_genovec, fread_ptr, fread_end, genocounts[1], pgrp, &unphased_het_ct);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    }\n    goto GetBasicGenotypeCountsAndDosage16s_basic_finish;\n  }\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  uintptr_t* raw_phasepresent = pgrp->workspace_subset;\n  uint32_t raw_phasepresent_ct = 0;\n  if (VrtypeHphase(vrtype)) {\n    uint32_t raw_het_ct = genocounts[1];  // inaccurate if subsetting_required\n    if (!is_minimac3_r2) {\n      if (VrtypeMultiallelicHc(vrtype)) {\n        const uint32_t aux1_first_byte = *fread_ptr++;\n        const uint32_t aux1a_mode = aux1_first_byte & 15;\n        const uint32_t aux1b_mode = aux1_first_byte >> 4;\n        uint32_t raw_10_ct = 0;\n        if ((!aux1a_mode) || (!aux1b_mode) || subsetting_required) {\n          GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_het_ct, &raw_10_ct);\n        }\n        reterr = SkipAux1a(fread_end, aux1a_mode, raw_sample_ct, allele_ct, raw_het_ct, &fread_ptr);\n        if (unlikely(reterr)) {\n          return reterr;\n        }\n        reterr = GetAux1bHetIncr(fread_end, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &fread_ptr, &raw_het_ct);\n        if (unlikely(reterr)) {\n          return reterr;\n        }\n      } else if (subsetting_required) {\n        raw_het_ct = CountNyp(raw_genovec, kMask5555, raw_sample_ct);\n      }\n      reterr = SkipAux2(fread_end, raw_het_ct, &fread_ptr, nullptr);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    } else {\n      assert(!VrtypeMultiallelicHc(vrtype));\n      uintptr_t* all_hets = pgrp->workspace_all_hets;\n      PgrDetectGenoarrHets(raw_genovec, raw_sample_ct, all_hets);\n      if (subsetting_required) {\n        raw_het_ct = PopcountWords(all_hets, raw_sample_ctl);\n      }\n      const uint32_t first_half_byte_ct = 1 + (raw_het_ct / CHAR_BIT);\n      const uint32_t explicit_phasepresent = fread_ptr[0] & 1;\n      if (explicit_phasepresent) {\n        ExpandBytearr(fread_ptr, all_hets, raw_sample_ctl, raw_het_ct, 1, raw_phasepresent);\n        raw_phasepresent_ct = PopcountBytes(fread_ptr, first_half_byte_ct) - 1;\n        const uint32_t second_half_byte_ct = DivUp(raw_phasepresent_ct, CHAR_BIT);\n        fread_ptr = &(fread_ptr[first_half_byte_ct + second_half_byte_ct]);\n      } else {\n        raw_phasepresent_ct = raw_het_ct;\n        memcpy(raw_phasepresent, all_hets, raw_sample_ctl * sizeof(intptr_t));\n        fread_ptr = &(fread_ptr[first_half_byte_ct]);\n      }\n    }\n  } else if (VrtypeMultiallelicHc(vrtype)) {\n    reterr = SkipAux1(fread_end, raw_genovec, raw_sample_ct, allele_ct, &fread_ptr);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n  }\n  if (allele_ct != 2) {\n    // Maybe make this an invalid function call?  If that happens, the\n    // VrtypeMultiallelicHc() branch above can be removed.\n#ifndef PGENLIB_NOPRINT\n    fputs(\"multiallelic dosages not yet supported by GetBasicGenotypeCountsAndDosage16s()\\n\", stderr);\n#endif\n    return kPglRetNotYetSupported;\n  }\n\n  const uint32_t is_unconditional_dosage = ((vrtype & 0x60) == 0x40);\n  uint64_t alt1_dosage = 0;\n  uint32_t dosage_ct = 0;\n  STD_ARRAY_DECL(uint32_t, 4, replaced_genocounts);\n  if ((!is_minimac3_r2) || (!(vrtype & 0x90))) {\n    uint64_t alt1_dosage_sq_sum = 0;\n    if (is_unconditional_dosage) {\n      // needs to be handled separately from the other cases due to possible\n      // presence of missing values.\n      // note that this code will also need to be adjusted when multiallelic\n      // support is added.\n      STD_ARRAY_FILL0(replaced_genocounts);\n      const unsigned char* dosage_main_read = fread_ptr;\n      if (PtrAddCk(fread_end, raw_sample_ct * sizeof(int16_t), &fread_ptr)) {\n        return kPglRetMalformedInput;\n      }\n      if (subsetting_required) {\n        uintptr_t sample_uidx_base = 0;\n        uintptr_t sample_include_bits = sample_include[0];\n        for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n          const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &sample_include_bits);\n          const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(dosage_main_read, sample_uidx);\n          if (cur_dosage_val != 65535) {\n            alt1_dosage += cur_dosage_val;\n\n            // todo: check if this is slow enough to justify removing it from\n            // the main loop\n            alt1_dosage_sq_sum += cur_dosage_val * cur_dosage_val;\n            ++dosage_ct;\n          }\n        }\n      } else {\n        for (uint32_t sample_uidx = 0; sample_uidx != sample_ct; ++sample_uidx) {\n          const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(dosage_main_read, sample_uidx);\n          if (cur_dosage_val != 65535) {\n            alt1_dosage += cur_dosage_val;\n            alt1_dosage_sq_sum += cur_dosage_val * cur_dosage_val;\n            ++dosage_ct;\n          }\n        }\n      }\n      // update (20 Mar 2019): .pgen specification tightened to remove the need\n      // to update replaced_genocounts in the main loops above.\n      STD_ARRAY_COPY(genocounts, 4, replaced_genocounts);\n      replaced_genocounts[3] = replaced_genocounts[3] + dosage_ct - sample_ct;\n    } else {\n      uintptr_t* raw_dosage_present = pgrp->workspace_dosage_present;\n      uint32_t raw_dosage_ct;\n      if (!(vrtype & 0x40)) {\n        // dosage list\n        if (unlikely(ParseAndSaveDeltalistAsBitarr(fread_end, raw_sample_ct, &fread_ptr, raw_dosage_present, &raw_dosage_ct))) {\n          return kPglRetMalformedInput;\n        }\n      } else {\n        // dosage bitarray\n        raw_dosage_present[raw_sample_ctl - 1] = 0;\n        const uint32_t raw_sample_ctb = DivUp(raw_sample_ct, CHAR_BIT);\n        memcpy(raw_dosage_present, fread_ptr, raw_sample_ctb);\n        fread_ptr = &(fread_ptr[raw_sample_ctb]);\n        raw_dosage_ct = PopcountWords(raw_dosage_present, raw_sample_ctl);\n      }\n      const unsigned char* dosage_main_read = fread_ptr;\n      if (PtrAddCk(fread_end, raw_dosage_ct * sizeof(int16_t), &fread_ptr)) {\n        return kPglRetMalformedInput;\n      }\n      if (subsetting_required) {\n        uintptr_t sample_widx = 0;\n        uintptr_t dosage_present_bits = raw_dosage_present[0];\n        for (uint32_t dosage_idx = 0; dosage_idx != raw_dosage_ct; ++dosage_idx) {\n          const uintptr_t lowbit = BitIter1y(raw_dosage_present, &sample_widx, &dosage_present_bits);\n          if (sample_include[sample_widx] & lowbit) {\n            const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(dosage_main_read, dosage_idx);\n            alt1_dosage += cur_dosage_val;\n            alt1_dosage_sq_sum += cur_dosage_val * cur_dosage_val;\n            ++dosage_ct;\n          }\n        }\n        GenoarrCountSubsetIntersectFreqs(raw_genovec, raw_dosage_present, sample_include, raw_sample_ct, replaced_genocounts);\n      } else {\n        if (!imp_r2_ptr) {\n          for (uint32_t dosage_idx = 0; dosage_idx != raw_dosage_ct; ++dosage_idx) {\n            uint16_t cur_dosage_u16;\n            CopyFromUnalignedOffsetU16(&cur_dosage_u16, dosage_main_read, dosage_idx);\n            alt1_dosage += cur_dosage_u16;\n          }\n        } else {\n          for (uint32_t dosage_idx = 0; dosage_idx != raw_dosage_ct; ++dosage_idx) {\n            const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(dosage_main_read, dosage_idx);\n            alt1_dosage += cur_dosage_val;\n            alt1_dosage_sq_sum += cur_dosage_val * cur_dosage_val;\n          }\n        }\n        dosage_ct = raw_dosage_ct;\n        GenoarrCountSubsetFreqs2(raw_genovec, raw_dosage_present, raw_sample_ct, raw_dosage_ct, replaced_genocounts);\n      }\n    }\n    const uint32_t replaced_ct = replaced_genocounts[0] + replaced_genocounts[1] + replaced_genocounts[2];\n    const uint32_t remaining_het_ct = genocounts[1] - replaced_genocounts[1];\n    const uint32_t remaining_hom_alt_ct = genocounts[2] - replaced_genocounts[2];\n    const uint32_t alt1_ct = 2 * remaining_hom_alt_ct + remaining_het_ct;\n    alt1_dosage += alt1_ct * 16384LLU;\n    all_dosages[1] = alt1_dosage;\n    const uint32_t nondosage_nm_ct = sample_ct - genocounts[3] - replaced_ct;\n    const uint32_t new_sample_nm_ct = dosage_ct + nondosage_nm_ct;\n    all_dosages[0] = new_sample_nm_ct * 32768LLU - alt1_dosage;\n    if (!imp_r2_ptr) {\n      return kPglRetSuccess;\n    }\n    // possible todo: also move all-hardcall-phase-present, no-dosage\n    // is_minimac3_r2 case under this branch, since we can just set imp_r2 to\n    // NaN or 1.\n    // 16384^2, 32768^2\n    alt1_dosage_sq_sum += remaining_het_ct * 0x10000000LLU + remaining_hom_alt_ct * 0x40000000LLU;\n    *imp_r2_ptr = BiallelicDiploidMinimac3R2(alt1_dosage, alt1_dosage_sq_sum, new_sample_nm_ct);\n    if (!is_minimac3_r2) {\n      *imp_r2_ptr *= 2;\n    }\n    return kPglRetSuccess;\n  }\n  // Need to deal with implicitly phased dosages.  Best to have raw_genovec,\n  // raw_phasepresent, dosage_present, and dosage_main all available, then loop\n  // over everything at once.\n  // (phaseinfo is irrelevant since only absolute value of (left - right)\n  // matters.)\n\n  // We have the following 2x2x3 cases to deal with:\n  // - Subsetted vs. un-subsetted.  Un-subsetted comes up a lot, so we have an\n  //   optimized code path for it.\n  // - Unconditional vs. conditional dosage.  Unconditional should not come up\n  //   much, so we just mock up raw_dosage_present... er, actually, that\n  //   doesn't work because dosage_main would also need to be collapsed.  Sigh.\n  //   Ok, it's still handled separately.\n  // - Only hardcall-phase, vs. only dosage-phase, vs. both.  At least we can\n  //   merge the \"only dosage-phase\" and \"both\" cases.\n  // So we end up with 8 primary code paths.\n  // This is kind of a nightmare; it would obviously be nicer to move this\n  // out of pgenlib_internal, and that may eventually happen.  But we don't\n  // want users to be discouraged from running --minimac3-r2-filter when it's\n  // appropriate just because it's a lot slower than other standard filters;\n  // and this also serves as a testing ground for efficient phased-dosage\n  // handling strategies.\n  if (!VrtypeHphase(vrtype)) {\n    ZeroWArr(raw_sample_ctl, raw_phasepresent);\n  }\n  uintptr_t* raw_dosage_present = nullptr;\n  const unsigned char* dosage_main_read;\n  uint32_t raw_dosage_ct = 0;\n  if (is_unconditional_dosage) {\n    dosage_main_read = fread_ptr;\n    if (PtrAddCk(fread_end, raw_sample_ct * sizeof(int16_t), &fread_ptr)) {\n      return kPglRetMalformedInput;\n    }\n    // raw_dosage_ct unused in this case.\n  } else {\n    // could move some duplicate code before the big branch\n    raw_dosage_present = pgrp->workspace_dosage_present;\n    if (!(vrtype & 0x40)) {\n      // dosage list\n      if (unlikely(ParseAndSaveDeltalistAsBitarr(fread_end, raw_sample_ct, &fread_ptr, raw_dosage_present, &raw_dosage_ct))) {\n        return kPglRetMalformedInput;\n      }\n    } else {\n      // dosage bitarray\n      raw_dosage_present[raw_sample_ctl - 1] = 0;\n      const uint32_t raw_sample_ctb = DivUp(raw_sample_ct, CHAR_BIT);\n      memcpy(raw_dosage_present, fread_ptr, raw_sample_ctb);\n      fread_ptr = &(fread_ptr[raw_sample_ctb]);\n      raw_dosage_ct = PopcountWords(raw_dosage_present, raw_sample_ctl);\n    }\n    dosage_main_read = fread_ptr;\n    if (PtrAddCk(fread_end, raw_dosage_ct * sizeof(int16_t), &fread_ptr)) {\n      return kPglRetMalformedInput;\n    }\n  }\n  const unsigned char* dosage_main_biter = dosage_main_read;\n  uint64_t hap_ssq_x2 = 0;\n  uint32_t phased_hc_het_ct = 0;\n  if (!(vrtype & 0x80)) {\n    if (is_unconditional_dosage) {\n      if (!subsetting_required) {\n        const uint32_t raw_sample_ctl_m1 = raw_sample_ctl - 1;\n        uint32_t loop_len = kBitsPerWord;\n        for (uint32_t widx = 0; ; ++widx) {\n          if (widx >= raw_sample_ctl_m1) {\n            if (widx > raw_sample_ctl_m1) {\n              break;\n            }\n            loop_len = ModNz(raw_sample_ct, kBitsPerWord);\n          }\n          uintptr_t phasepresent_word = raw_phasepresent[widx];\n          for (uint32_t uii = 0; uii != loop_len; ++uii) {\n            const uintptr_t cur_dosage_val = CopyFromUnalignedIncrU16ZX(&dosage_main_biter);\n            if (cur_dosage_val != 65535) {\n              alt1_dosage += cur_dosage_val;\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              ++dosage_ct;\n              if (phasepresent_word & 1) {\n                // For each dosage, when phasepresent bit is set, implicit\n                // dphase_delta value is 16384 - |16384 - x|.\n                const uintptr_t homdist = 16384 - abs_i32(16384 - cur_dosage_val);\n                hap_ssq_x2 += homdist * homdist;\n              }\n            }\n            phasepresent_word = phasepresent_word >> 1;\n          }\n        }\n      } else {\n        for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n          uintptr_t sample_include_word = sample_include[widx];\n          if (!sample_include_word) {\n            continue;\n          }\n          const uintptr_t phasepresent_word = raw_phasepresent[widx];\n          const unsigned char* cur_dosage_main = &(dosage_main_read[widx * kBitsPerWord * sizeof(int16_t)]);\n          do {\n            const uint32_t sample_idx_lowbits = ctzw(sample_include_word);\n            const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(cur_dosage_main, sample_idx_lowbits);\n            const uintptr_t lowbit = sample_include_word & (-sample_include_word);\n            if (cur_dosage_val != 65535) {\n              alt1_dosage += cur_dosage_val;\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              ++dosage_ct;\n              if (lowbit & phasepresent_word) {\n                const uintptr_t homdist = 16384 - abs_i32(16384 - cur_dosage_val);\n                hap_ssq_x2 += homdist * homdist;\n              }\n            }\n            sample_include_word ^= lowbit;\n          } while (sample_include_word);\n        }\n      }\n      STD_ARRAY_COPY(genocounts, 4, replaced_genocounts);\n      replaced_genocounts[3] = replaced_genocounts[3] + dosage_ct - sample_ct;\n    } else {  // !is_unconditional_dosage\n      if (!subsetting_required) {\n        // phased_hc_het_ct := popcount(phasepresent & (~dosage_present))\n        phased_hc_het_ct = raw_phasepresent_ct - PopcountWordsIntersect(raw_phasepresent, raw_dosage_present, raw_sample_ctl);\n\n        for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n          uintptr_t dosage_present_word = raw_dosage_present[widx];\n          if (dosage_present_word) {\n            const uintptr_t phasepresent_word = raw_phasepresent[widx];\n            do {\n              const uintptr_t cur_dosage_val = CopyFromUnalignedIncrU16ZX(&dosage_main_biter);\n              alt1_dosage += cur_dosage_val;\n              const uintptr_t lowbit = dosage_present_word & (-dosage_present_word);\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              if (lowbit & phasepresent_word) {\n                const uintptr_t homdist = 16384 - abs_i32(16384 - cur_dosage_val);\n                hap_ssq_x2 += homdist * homdist;\n              }\n              dosage_present_word ^= lowbit;\n            } while (dosage_present_word);\n          }\n        }\n        dosage_ct = raw_dosage_ct;\n        GenoarrCountSubsetFreqs2(raw_genovec, raw_dosage_present, raw_sample_ct, raw_dosage_ct, replaced_genocounts);\n      } else {\n        for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n          const uintptr_t sample_include_word = sample_include[widx];\n          uintptr_t dosage_present_word = raw_dosage_present[widx];\n          if (!sample_include_word) {\n            dosage_main_biter = &(dosage_main_biter[PopcountWord(dosage_present_word) * sizeof(int16_t)]);\n            continue;\n          }\n          const uintptr_t phasepresent_word = raw_phasepresent[widx];\n          phased_hc_het_ct += PopcountWord(sample_include_word & phasepresent_word & (~dosage_present_word));\n          while (dosage_present_word) {\n            const uintptr_t lowbit = dosage_present_word & (-dosage_present_word);\n            if (lowbit & sample_include_word) {\n              const uintptr_t cur_dosage_val = CopyFromUnalignedU16ZX(dosage_main_biter);\n              alt1_dosage += cur_dosage_val;\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              ++dosage_ct;\n              if (lowbit & phasepresent_word) {\n                const uintptr_t homdist = 16384 - abs_i32(16384 - cur_dosage_val);\n                hap_ssq_x2 += homdist * homdist;\n              }\n            }\n            dosage_present_word ^= lowbit;\n            dosage_main_biter += sizeof(int16_t);\n          }\n        }\n        GenoarrCountSubsetIntersectFreqs(raw_genovec, raw_dosage_present, sample_include, raw_sample_ct, replaced_genocounts);\n      }\n    }\n  } else {\n    if (is_unconditional_dosage) {\n      if (PtrCheck(fread_end, fread_ptr, raw_sample_ct * sizeof(int16_t))) {\n        return kPglRetMalformedInput;\n      }\n      const unsigned char* dphase_delta_read = fread_ptr;\n      if (!subsetting_required) {\n        for (uint32_t sample_uidx = 0; sample_uidx != raw_sample_ct; ++sample_uidx) {\n          const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(dosage_main_read, sample_uidx);\n          if (cur_dosage_val != 65535) {\n            alt1_dosage += cur_dosage_val;\n            hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n            ++dosage_ct;\n            // .pgen specification now requires this value to never be missing.\n            const intptr_t dphase_delta_val = CopyFromUnalignedOffsetI16ZX(dphase_delta_read, sample_uidx);\n            hap_ssq_x2 += dphase_delta_val * dphase_delta_val;\n          }\n        }\n      } else {\n        for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n          uintptr_t sample_include_word = sample_include[widx];\n          if (!sample_include_word) {\n            continue;\n          }\n          const unsigned char* cur_dosage_main = &(dosage_main_read[widx * kBitsPerWord * sizeof(int16_t)]);\n          const unsigned char* cur_dphase_delta = &(dphase_delta_read[widx * kBitsPerWord * sizeof(int16_t)]);\n          do {\n            const uint32_t sample_idx_lowbits = ctzw(sample_include_word);\n            const uintptr_t cur_dosage_val = CopyFromUnalignedOffsetU16ZX(cur_dosage_main, sample_idx_lowbits);\n            if (cur_dosage_val != 65535) {\n              alt1_dosage += cur_dosage_val;\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              ++dosage_ct;\n              const intptr_t dphase_delta_val = CopyFromUnalignedOffsetI16ZX(cur_dphase_delta, sample_idx_lowbits);\n              hap_ssq_x2 += dphase_delta_val * dphase_delta_val;\n            }\n            sample_include_word &= sample_include_word - 1;\n          } while (sample_include_word);\n        }\n      }\n      STD_ARRAY_COPY(genocounts, 4, replaced_genocounts);\n      replaced_genocounts[3] = replaced_genocounts[3] + dosage_ct - sample_ct;\n    } else {\n      const unsigned char* file_dphase_present = fread_ptr;\n      const uint32_t raw_dosage_ctb = DivUp(raw_dosage_ct, CHAR_BIT);\n      if (PtrAddCk(fread_end, raw_dosage_ctb, &fread_ptr)) {\n        return kPglRetMalformedInput;\n      }\n      const uint32_t raw_dphase_ct = PopcountBytes(file_dphase_present, raw_dosage_ctb);\n      if (PtrCheck(fread_end, fread_ptr, raw_dphase_ct * sizeof(int16_t))) {\n        return kPglRetMalformedInput;\n      }\n      uintptr_t* raw_dphase_present = pgrp->workspace_dphase_present;\n      ExpandBytearr(file_dphase_present, raw_dosage_present, raw_sample_ctl, raw_dosage_ct, 0, raw_dphase_present);\n      const unsigned char* dphase_delta_biter = fread_ptr;\n      if (!subsetting_required) {\n        phased_hc_het_ct = raw_phasepresent_ct - PopcountWordsIntersect(raw_phasepresent, raw_dosage_present, raw_sample_ctl);\n\n        for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n          uintptr_t dosage_present_word = raw_dosage_present[widx];\n          if (dosage_present_word) {\n            const uintptr_t phasepresent_word = raw_phasepresent[widx];\n            const uintptr_t dphase_present_word = raw_dphase_present[widx];\n            do {\n              const uintptr_t cur_dosage_val = CopyFromUnalignedIncrU16ZX(&dosage_main_biter);\n              alt1_dosage += cur_dosage_val;\n              const uintptr_t lowbit = dosage_present_word & (-dosage_present_word);\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              if (lowbit & dphase_present_word) {\n                const intptr_t dphase_delta_val = CopyFromUnalignedIncrI16ZX(&dphase_delta_biter);\n                hap_ssq_x2 += dphase_delta_val * dphase_delta_val;\n              } else if (lowbit & phasepresent_word) {\n                const uintptr_t homdist = 16384 - abs_i32(16384 - cur_dosage_val);\n                hap_ssq_x2 += homdist * homdist;\n              }\n              dosage_present_word ^= lowbit;\n            } while (dosage_present_word);\n          }\n        }\n        dosage_ct = raw_dosage_ct;\n        GenoarrCountSubsetFreqs2(raw_genovec, raw_dosage_present, raw_sample_ct, raw_dosage_ct, replaced_genocounts);\n      } else {\n        for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n          const uintptr_t sample_include_word = sample_include[widx];\n          const uintptr_t dphase_present_word = raw_dphase_present[widx];\n          uintptr_t dosage_present_word = raw_dosage_present[widx];\n          if (!sample_include_word) {\n            dosage_main_biter = &(dosage_main_biter[PopcountWord(dosage_present_word) * sizeof(int16_t)]);\n            dphase_delta_biter = &(dphase_delta_biter[PopcountWord(dphase_present_word) * sizeof(int16_t)]);\n            continue;\n          }\n          const uintptr_t phasepresent_word = raw_phasepresent[widx];\n          phased_hc_het_ct += PopcountWord(sample_include_word & phasepresent_word & (~dosage_present_word));\n          while (dosage_present_word) {\n            const uintptr_t lowbit = dosage_present_word & (-dosage_present_word);\n            const uintptr_t dphase_here = lowbit & dphase_present_word;\n            if (lowbit & sample_include_word) {\n              const uintptr_t cur_dosage_val = CopyFromUnalignedU16ZX(dosage_main_biter);\n              alt1_dosage += cur_dosage_val;\n              hap_ssq_x2 += cur_dosage_val * cur_dosage_val;\n              ++dosage_ct;\n              if (dphase_here) {\n                const intptr_t dphase_delta_val = CopyFromUnalignedI16ZX(dphase_delta_biter);\n                hap_ssq_x2 += dphase_delta_val * dphase_delta_val;\n              } else if (lowbit & phasepresent_word) {\n                const uintptr_t homdist = 16384 - abs_i32(16384 - cur_dosage_val);\n                hap_ssq_x2 += homdist * homdist;\n              }\n            }\n            dphase_delta_biter += (dphase_here != 0) * sizeof(int16_t);\n            dosage_present_word ^= lowbit;\n            dosage_main_biter += sizeof(int16_t);\n          }\n        }\n        GenoarrCountSubsetIntersectFreqs(raw_genovec, raw_dosage_present, sample_include, raw_sample_ct, replaced_genocounts);\n      }\n    }\n  }\n  const uint32_t replaced_ct = replaced_genocounts[0] + replaced_genocounts[1] + replaced_genocounts[2];\n  const uint32_t remaining_het_ct = genocounts[1] - replaced_genocounts[1];\n  const uint32_t remaining_hom_alt_ct = genocounts[2] - replaced_genocounts[2];\n  const uint32_t alt1_ct = 2 * remaining_hom_alt_ct + remaining_het_ct;\n  alt1_dosage += alt1_ct * 16384LLU;\n  all_dosages[1] = alt1_dosage;\n  const uint32_t nondosage_nm_ct = sample_ct - genocounts[3] - replaced_ct;\n  const uint32_t new_sample_nm_ct = dosage_ct + nondosage_nm_ct;\n  all_dosages[0] = new_sample_nm_ct * 32768LLU - alt1_dosage;\n  hap_ssq_x2 += (remaining_het_ct + phased_hc_het_ct) * 0x10000000LLU + remaining_hom_alt_ct * 0x40000000LLU;\n  *imp_r2_ptr = BiallelicDiploidMinimac3R2(alt1_dosage, hap_ssq_x2, new_sample_nm_ct);\n  return kPglRetSuccess;\n}\n\nPglErr PgrGetDCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t is_minimac3_r2, PgenReader* pgr_ptr, double* imp_r2_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* __restrict all_dosages) {\n  if (!sample_ct) {\n    STD_ARRAY_REF_FILL0(4, genocounts);\n    all_dosages[0] = 0;\n    all_dosages[1] = 0;\n    if (imp_r2_ptr) {\n      *imp_r2_ptr = 0.0 / 0.0;\n    }\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  return GetBasicGenotypeCountsAndDosage16s(sample_include, sample_include_interleaved_vec, GetSicp(pssi), sample_ct, vidx, is_minimac3_r2, pgrp, imp_r2_ptr, genocounts, all_dosages);\n}\n\n// Does not zero-initialize results[].\nvoid CountAllBytes64(const void* bytearr, uintptr_t byte_ct, uint64_t* __restrict results) {\n  const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n  for (uintptr_t ulii = 0; ulii != byte_ct; ++ulii) {\n    results[bytearr_uc[ulii]] += 1;\n  }\n}\n\n// Does not zero-initialize results[].\nvoid CountAllNybbles64(const void* nybblearr, uintptr_t nybble_ct, uint64_t* __restrict results) {\n  // possible todo: for sufficiently large nybble_ct, use CountAllBytes and\n  // then postprocess\n  const uintptr_t fullbyte_ct = nybble_ct / 2;\n  const unsigned char* nybblearr_uc = S_CAST(const unsigned char*, nybblearr);\n  for (uintptr_t ulii = 0; ulii != fullbyte_ct; ++ulii) {\n    const uint32_t uii = nybblearr_uc[ulii];\n    results[uii & 15] += 1;\n    results[uii >> 4] += 1;\n  }\n  if (nybble_ct % 2) {\n    results[nybblearr_uc[fullbyte_ct] & 15] += 1;\n  }\n}\n\nvoid CountAllAux1aDense(const void* patch_01_fvals, uint32_t allele_ct, uint32_t rare01_ct, uint64_t* __restrict one_cts) {\n  one_cts[1] -= rare01_ct;\n  if (allele_ct < 5) {\n    if (allele_ct == 3) {\n      // all entries are 0/1 -> 0/2\n      one_cts[2] = rare01_ct;\n      return;\n    }\n    const uint32_t allele_code_byte_ct = DivUp(rare01_ct, 8);\n    const uint32_t alt3_ct = PopcountBytes(patch_01_fvals, allele_code_byte_ct);\n    one_cts[2] = rare01_ct - alt3_ct;\n    one_cts[3] = alt3_ct;\n    return;\n  }\n  if (allele_ct < 19) {\n    if (allele_ct < 7) {\n      STD_ARRAY_DECL(uint32_t, 4, rare0het_counts);\n#ifndef NO_UNALIGNED\n      GenoarrCountFreqs(S_CAST(const uintptr_t*, patch_01_fvals), rare01_ct, rare0het_counts);\n#else\n      GenoarrbCountFreqs(S_CAST(const unsigned char*, patch_01_fvals), rare01_ct, rare0het_counts);\n#endif\n      for (uint32_t allele_idx_p2 = 2; allele_idx_p2 != allele_ct; ++allele_idx_p2) {\n        one_cts[allele_idx_p2] = rare0het_counts[allele_idx_p2 - 2];\n      }\n      return;\n    }\n    CountAllNybbles64(patch_01_fvals, rare01_ct, &(one_cts[2]));\n    return;\n  }\n  CountAllBytes64(patch_01_fvals, rare01_ct, &(one_cts[2]));\n}\n\n// assumes one_cts[1] initialized to genocounts[1]\n// sample_include should be nullptr if we aren't subsetting\nPglErr CountAllAux1a(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uintptr_t* __restrict raw_genoarr, uint32_t aux1a_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_01_ct, const unsigned char** fread_pp, uint64_t* __restrict one_cts, uint32_t* __restrict deltalist_workspace) {\n  if (aux1a_mode == 15) {\n    return kPglRetSuccess;\n  }\n  if (!sample_include) {\n    uint32_t rare01_ct;\n    if (!aux1a_mode) {\n      const uint32_t fset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n      rare01_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n      *fread_pp += fset_byte_ct;\n    } else {\n      const unsigned char* group_info_iter;\n      PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare01_ct);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n      reterr = SkipDeltalistIds(fread_end, group_info_iter, rare01_ct, raw_sample_ct, 0, fread_pp);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    }\n    const unsigned char* patch_01_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = GetAux1aAlleleEntryByteCt(allele_ct, rare01_ct);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    CountAllAux1aDense(patch_01_fvals, allele_ct, rare01_ct, one_cts);\n    return kPglRetSuccess;\n  }\n  const uint32_t allele_code_width = GetAux1aWidth(allele_ct);\n  const uintptr_t allele_code_mask = (1U << allele_code_width) - 1;\n  uint64_t* one_cts_offset2 = &(one_cts[2]);\n  if (!aux1a_mode) {\n    const uint32_t fset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n    const uint32_t rare01_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    const unsigned char* patch_01_fset = *fread_pp;\n    *fread_pp += fset_byte_ct;\n    const unsigned char* patch_01_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const Halfword* sample_include_hw = DowncastKWToHW(sample_include);\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_hets = Word01(raw_genoarr[0]);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t subsetted_rare01_ct = 0;\n    uint32_t loop_len = kBitsPerWord;\n    uint32_t rare01_lowbits = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_01_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_01_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_01_fset, fset_widx);\n      }\n      if (allele_ct == 3) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_hets) {\n            cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n            subsetted_rare01_ct += (sample_include_hw[sample_hwidx] >> sample_uidx_lowbits) & 1;\n          }\n          cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_hets) {\n            cur_raw_genoarr_hets = Word01(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare01_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n              }\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare01_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_hets) / 2;\n            if (sample_include_hw[sample_hwidx] & (1U << sample_uidx_lowbits)) {\n              ++subsetted_rare01_ct;\n              one_cts_offset2[(fvals_bits >> rare01_lowbits) & allele_code_mask] += 1;\n            }\n            rare01_lowbits += allele_code_width;\n          }\n          cur_raw_genoarr_hets &= cur_raw_genoarr_hets - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n    one_cts_offset2[-1] -= subsetted_rare01_ct;\n    if (allele_ct == 3) {\n      one_cts_offset2[0] = subsetted_rare01_ct;\n    }\n    return kPglRetSuccess;\n  }\n  // mode 1: difflist.\n  if (allele_ct == 3) {\n    // Use CountDeltalistIntersect shortcut here.\n    uint32_t subsetted_02_ct;\n    uint32_t rare01_ct;\n    PglErr reterr = CountDeltalistIntersect(fread_end, sample_include, raw_sample_ct, fread_pp, &subsetted_02_ct, &rare01_ct);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    one_cts_offset2[-1] -= subsetted_02_ct;\n    one_cts_offset2[0] = subsetted_02_ct;\n    return kPglRetSuccess;\n  }\n  // Save deltalist elements, iterate.\n  uint32_t rare01_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare01_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_01_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare01_ct) * allele_code_width, 8);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  const uint32_t allele_code_logwidth = ctzu32(allele_code_width);\n  uint32_t subsetted_rare01_ct = 0;\n  uint32_t loop_len = kBitsPerWord >> allele_code_logwidth;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_01_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n      loop_len = 1 + ((rare01_ct - 1) & (loop_len - 1));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_01_fvals, fvals_widx);\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - allele_code_logwidth)]);\n    for (uint32_t uii = 0; uii != loop_len; ++uii) {\n      const uint32_t sample_uidx = cur_deltalist_base[uii];\n      if (IsSet(sample_include, sample_uidx)) {\n        ++subsetted_rare01_ct;\n        one_cts_offset2[(fvals_bits >> (uii << allele_code_logwidth)) & allele_code_mask] += 1;\n      }\n    }\n  }\n  one_cts_offset2[-1] -= subsetted_rare01_ct;\n  return kPglRetSuccess;\n}\n\nvoid CountAllAux1bDense(const void* __restrict patch_10_fvals, uint32_t allele_ct, uint32_t rare10_ct, uint64_t* __restrict one_cts_offset1, uint64_t* __restrict two_cts_offset1) {\n  // probable todo: faster path if two_cts_offset1 == nullptr\n  const uint32_t allele_ct_m1 = allele_ct - 1;\n  two_cts_offset1[0] -= rare10_ct;\n  if (allele_ct_m1 < 5) {\n    if (allele_ct_m1 == 2) {\n      const uint32_t allele_code_byte_ct = DivUp(rare10_ct, 8);\n      const uint32_t hom22_ct = PopcountBytes(patch_10_fvals, allele_code_byte_ct);\n      const uint32_t het12_ct = rare10_ct - hom22_ct;\n      one_cts_offset1[0] += het12_ct;\n      one_cts_offset1[1] += het12_ct;\n      two_cts_offset1[1] = hom22_ct;\n      return;\n    }\n    STD_ARRAY_DECL(uint32_t, 4, alt_counts);\n#ifndef NO_UNALIGNED\n    GenoarrCountFreqs(S_CAST(const uintptr_t*, patch_10_fvals), rare10_ct * 2, alt_counts);\n#else\n    GenoarrbCountFreqs(S_CAST(const unsigned char*, patch_10_fvals), rare10_ct * 2, alt_counts);\n#endif\n    one_cts_offset1[0] += alt_counts[0];\n    for (uint32_t allele_idx_m1 = 1; allele_idx_m1 != allele_ct_m1; ++allele_idx_m1) {\n      const uint32_t homxx_ct = CountNybble(S_CAST(const unsigned char*, patch_10_fvals), allele_idx_m1 * kMask5555, rare10_ct);\n      one_cts_offset1[allele_idx_m1] += alt_counts[allele_idx_m1] - 2 * homxx_ct;\n      two_cts_offset1[allele_idx_m1] = homxx_ct;\n    }\n    return;\n  }\n  const unsigned char* patch_10_fvals_uc = S_CAST(const unsigned char*, patch_10_fvals);\n  if (allele_ct_m1 < 17) {\n    // for larger rare10_ct, this should use a byte counter\n    for (uint32_t uii = 0; uii != rare10_ct; ++uii) {\n      const uint32_t cur_byte = patch_10_fvals_uc[uii];\n      const uint32_t cur_byte_hi = cur_byte >> 4;\n      const uint32_t cur_byte_lo = cur_byte & 15;\n      if (cur_byte_hi == cur_byte_lo) {\n        two_cts_offset1[cur_byte_lo] += 1;\n      } else {\n        one_cts_offset1[cur_byte_lo] += 1;\n        one_cts_offset1[cur_byte_hi] += 1;\n      }\n    }\n    return;\n  }\n  for (uint32_t uii = 0; uii != rare10_ct; ++uii) {\n    const uint32_t cur_byte_lo = patch_10_fvals_uc[2 * uii];\n    const uint32_t cur_byte_hi = patch_10_fvals_uc[2 * uii + 1];\n    if (cur_byte_hi == cur_byte_lo) {\n      two_cts_offset1[cur_byte_lo] += 1;\n    } else {\n      one_cts_offset1[cur_byte_lo] += 1;\n      one_cts_offset1[cur_byte_hi] += 1;\n    }\n  }\n}\n\nPglErr CountAllAux1b(const unsigned char* fread_end, const uintptr_t* __restrict sample_include, const uintptr_t* __restrict raw_genoarr, uint32_t aux1b_mode, uint32_t raw_sample_ct, uint32_t allele_ct, uint32_t raw_10_ct, const unsigned char** fread_pp, uint64_t* __restrict one_cts, uint64_t* __restrict two_cts, uint32_t* __restrict deltalist_workspace) {\n  if (aux1b_mode == 15) {\n    return kPglRetSuccess;\n  }\n  uint64_t* one_cts_offset1 = &(one_cts[1]);\n  uint64_t* two_cts_offset1 = &(two_cts[1]);\n  if (!sample_include) {\n    uint32_t rare10_ct;\n    if (!aux1b_mode) {\n      const uint32_t fset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n      rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n      *fread_pp += fset_byte_ct;\n    } else {\n      const unsigned char* group_info_iter;\n      PglErr reterr = ParseDifflistHeader(fread_end, raw_sample_ct, fread_pp, nullptr, &group_info_iter, &rare10_ct);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n      reterr = SkipDeltalistIds(fread_end, group_info_iter, rare10_ct, raw_sample_ct, 0, fread_pp);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    }\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = GetAux1bAlleleEntryByteCt(allele_ct, rare10_ct);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    CountAllAux1bDense(patch_10_fvals, allele_ct, rare10_ct, one_cts_offset1, two_cts_offset1);\n    return kPglRetSuccess;\n  }\n  uintptr_t detect_hom_mask_lo;  // unused\n  const uint32_t allele_code_logwidth = GetAux1bConsts(allele_ct, &detect_hom_mask_lo);\n  const uint32_t code10_logwidth = allele_code_logwidth + (allele_code_logwidth != 0);\n  const uint32_t allele_code_width = 1U << allele_code_logwidth;\n  const uint32_t allele_code_mask = (1U << allele_code_width) - 1;\n  const uint32_t allele_ct_m1 = allele_ct - 1;\n  uint32_t rare10_lowbits = kBitsPerWord;\n  // probable todo: faster paths when two_cts_offset1 == nullptr\n  if (!aux1b_mode) {\n    const uint32_t fset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n    const uint32_t rare10_ct = PopcountBytes(*fread_pp, fset_byte_ct);\n    const unsigned char* patch_10_fset = *fread_pp;\n    *fread_pp += fset_byte_ct;\n    const unsigned char* patch_10_fvals = *fread_pp;\n    const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, 8);\n    if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n      return kPglRetMalformedInput;\n    }\n    const Halfword* sample_include_hw = DowncastKWToHW(sample_include);\n    uintptr_t sample_hwidx = 0;\n    uintptr_t cur_raw_genoarr_xys = Word10(raw_genoarr[0]);\n    const uint32_t fset_word_ct_m1 = (fset_byte_ct - 1) / kBytesPerWord;\n    const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n    const uint32_t code10_width = 1U << code10_logwidth;\n    uintptr_t fvals_bits = 0;\n    uint32_t fvals_widx = 0;\n    uint32_t subsetted_rare10_ct = 0;\n    uint32_t loop_len = kBitsPerWord;\n    for (uint32_t fset_widx = 0; ; ++fset_widx) {\n      uintptr_t fset_bits;\n      if (fset_widx >= fset_word_ct_m1) {\n        if (fset_widx > fset_word_ct_m1) {\n          break;\n        }\n        fset_bits = SubwordLoad(&(patch_10_fset[fset_word_ct_m1 * kBytesPerWord]), ModNz(fset_byte_ct, kBytesPerWord));\n        loop_len = ModNz(raw_10_ct, kBitsPerWord);\n      } else {\n        CopyFromUnalignedOffsetW(&fset_bits, patch_10_fset, fset_widx);\n      }\n      if (allele_ct_m1 == 2) {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n            if (sample_include_hw[sample_hwidx] & (1U << sample_uidx_lowbits)) {\n              ++subsetted_rare10_ct;\n              two_cts_offset1[1] += (fvals_bits >> rare10_lowbits) & 1;\n            }\n            ++rare10_lowbits;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      } else {\n        for (uint32_t uii = 0; uii != loop_len; ++uii) {\n          while (!cur_raw_genoarr_xys) {\n            cur_raw_genoarr_xys = Word10(raw_genoarr[++sample_hwidx]);\n          }\n          if (fset_bits & 1) {\n            if (rare10_lowbits == kBitsPerWord) {\n              if (fvals_widx == fvals_word_ct_m1) {\n                fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n              } else {\n                CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n              }\n              // unnecessary to apply bzhi here\n              ++fvals_widx;\n              rare10_lowbits = 0;\n            }\n            const uint32_t sample_uidx_lowbits = ctzw(cur_raw_genoarr_xys) / 2;\n            if (sample_include_hw[sample_hwidx] & (1U << sample_uidx_lowbits)) {\n              ++subsetted_rare10_ct;\n              const uintptr_t cur_code_pair = fvals_bits >> rare10_lowbits;\n              const uint32_t cur_code_hi = (cur_code_pair >> allele_code_width) & allele_code_mask;\n              const uint32_t cur_code_lo = cur_code_pair & allele_code_mask;\n              if (cur_code_hi == cur_code_lo) {\n                two_cts_offset1[cur_code_lo] += 1;\n              } else {\n                one_cts_offset1[cur_code_lo] += 1;\n                one_cts_offset1[cur_code_hi] += 1;\n              }\n            }\n            rare10_lowbits += code10_width;\n          }\n          cur_raw_genoarr_xys &= cur_raw_genoarr_xys - 1;\n          fset_bits = fset_bits >> 1;\n        }\n      }\n    }\n    two_cts_offset1[0] -= subsetted_rare10_ct;\n    if (allele_ct == 3) {\n      const uint32_t subsetted_het12_ct = subsetted_rare10_ct - two_cts_offset1[1];\n      one_cts_offset1[0] += subsetted_het12_ct;\n      one_cts_offset1[1] += subsetted_het12_ct;\n    }\n    return kPglRetSuccess;\n  }\n  // Save deltalist elements, iterate.\n  uint32_t rare10_ct;\n  PglErr reterr = ParseAndSaveDeltalist(fread_end, raw_sample_ct, fread_pp, deltalist_workspace, &rare10_ct);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  const unsigned char* patch_10_fvals = *fread_pp;\n  const uint32_t fvals_byte_ct = DivUpU64(S_CAST(uint64_t, rare10_ct) << code10_logwidth, 8);\n  if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n    return kPglRetMalformedInput;\n  }\n  const uint32_t fvals_word_ct_m1 = (fvals_byte_ct - 1) / kBytesPerWord;\n  uint32_t subsetted_rare10_ct = 0;\n  uint32_t loop_len = kBitsPerWord >> code10_logwidth;\n  for (uint32_t fvals_widx = 0; ; ++fvals_widx) {\n    uintptr_t fvals_bits;\n    if (fvals_widx >= fvals_word_ct_m1) {\n      if (fvals_widx > fvals_word_ct_m1) {\n        break;\n      }\n      fvals_bits = SubwordLoad(&(patch_10_fvals[fvals_widx * kBytesPerWord]), ModNz(fvals_byte_ct, kBytesPerWord));\n      loop_len = 1 + ((rare10_ct - 1) & (loop_len - 1));\n    } else {\n      CopyFromUnalignedOffsetW(&fvals_bits, patch_10_fvals, fvals_widx);\n    }\n    const uint32_t* cur_deltalist_base = &(deltalist_workspace[fvals_widx << (kBitsPerWordLog2 - code10_logwidth)]);\n    if (allele_ct == 3) {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uint32_t sample_uidx = cur_deltalist_base[uii];\n        if (IsSet(sample_include, sample_uidx)) {\n          ++subsetted_rare10_ct;\n          two_cts_offset1[1] += (fvals_bits >> uii) & 1;\n        }\n      }\n    } else {\n      for (uint32_t uii = 0; uii != loop_len; ++uii) {\n        const uint32_t sample_uidx = cur_deltalist_base[uii];\n        if (IsSet(sample_include, sample_uidx)) {\n          ++subsetted_rare10_ct;\n          const uintptr_t cur_code_pair = fvals_bits >> (uii << code10_logwidth);\n          const uint32_t cur_code_hi = (cur_code_pair >> allele_code_width) & allele_code_mask;\n          const uint32_t cur_code_lo = cur_code_pair & allele_code_mask;\n          if (cur_code_hi == cur_code_lo) {\n            two_cts_offset1[cur_code_lo] += 1;\n          } else {\n            one_cts_offset1[cur_code_lo] += 1;\n            one_cts_offset1[cur_code_hi] += 1;\n          }\n        }\n      }\n    }\n  }\n  two_cts_offset1[0] -= subsetted_rare10_ct;\n  if (allele_ct == 3) {\n    const uint32_t subsetted_het12_ct = subsetted_rare10_ct - two_cts_offset1[1];\n    one_cts_offset1[0] += subsetted_het12_ct;\n    one_cts_offset1[1] += subsetted_het12_ct;\n  }\n  return kPglRetSuccess;\n}\n\nPglErr GetMultiallelicCountsAndDosage16s(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, uint32_t sample_ct, uint32_t vidx, uint32_t allele_ct, __maybe_unused uint32_t is_minimac3_r2, PgenReaderMain* pgrp, double* __restrict imp_r2_ptr, uint32_t* __restrict het_ctp, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* all_dosages) {\n  // only called on multiallelic variants\n  // no dosages for now\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  uintptr_t* raw_genovec = pgrp->workspace_vec;\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, raw_genovec);\n  if (unlikely(reterr)) {\n    return reterr;\n  }\n  ZeroTrailingNyps(raw_sample_ct, raw_genovec);\n  if (!subsetting_required) {\n    GenoarrCountFreqsUnsafe(raw_genovec, raw_sample_ct, genocounts);\n    sample_include = nullptr;\n  } else {\n    GenoarrCountSubsetFreqs(raw_genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);\n  }\n  uint64_t* one_cts = pgrp->workspace_imp_r2;\n  uint64_t* two_cts = &(one_cts[allele_ct]);\n  one_cts[0] = genocounts[1];\n  one_cts[1] = genocounts[1];\n  ZeroU64Arr(allele_ct - 2, &(one_cts[2]));\n  two_cts[0] = genocounts[0];\n  two_cts[1] = genocounts[2];\n  ZeroU64Arr(allele_ct - 2, &(two_cts[2]));\n  // Cases:\n  // - No hardcall-phase present.  Then we don't need to know raw_het_ct.\n  // - No multiallelic dosages present, not computing minimac3-r2.  Then we\n  //   still don't need to know raw_het_ct.\n  // - Otherwise, we need to know raw_het_ct, either for the minimac3-r2\n  //   computation or to locate the beginning of aux3/aux4.\n  //   If we're computing minimac3-r2, AND\n  //     (i) we're subsetting, or\n  //     (ii) multiallelic dosages are present,\n  //   it's also necessary to compute all_hets, either to compute correct\n  //   subsetted minimac3-r2 or to know how many phased-hardcalls are\n  //   overridden by phased dosages.\n  const uint32_t raw_het_ct_needed = VrtypeHphase(vrtype) && (is_minimac3_r2 || (vrtype & 0x60));\n  uintptr_t* all_hets = nullptr;\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  uint32_t raw_het_ct = genocounts[1]; // inaccurate, corrected later if needed\n  if (VrtypeMultiallelicHc(vrtype)) {\n    const uint32_t aux1_first_byte = *fread_ptr++;\n    const uint32_t aux1a_mode = aux1_first_byte & 15;\n    const uint32_t aux1b_mode = aux1_first_byte >> 4;\n    uint32_t raw_10_ct = 0;\n    if ((!aux1a_mode) || (!aux1b_mode) || sample_include) {\n      GenovecCount12Unsafe(raw_genovec, raw_sample_ct, &raw_het_ct, &raw_10_ct);\n    }\n    uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n    reterr = CountAllAux1a(fread_end, sample_include, raw_genovec, aux1a_mode, raw_sample_ct, allele_ct, raw_het_ct, &fread_ptr, one_cts, deltalist_workspace);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    const unsigned char* aux1b_start = fread_ptr;\n    reterr = CountAllAux1b(fread_end, sample_include, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &fread_ptr, one_cts, two_cts, deltalist_workspace);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    if (raw_het_ct_needed) {\n      if (!sample_include) {\n        raw_het_ct += genocounts[2];\n        for (uint32_t aidx = 1; aidx != allele_ct; ++aidx) {\n          raw_het_ct -= two_cts[aidx];\n        }\n      }\n      if (sample_include || (is_minimac3_r2 && (vrtype & 0x60))) {\n        all_hets = pgrp->workspace_all_hets;\n        PgrDetectGenoarrHets(raw_genovec, raw_sample_ct, all_hets);\n        if (aux1b_mode != 15) {\n          uintptr_t* aux1b_hets = pgrp->workspace_aux1x_present;\n          uint32_t aux1b_het_present;\n          reterr = GetAux1bHets(fread_end, raw_genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &aux1b_start, aux1b_hets, &aux1b_het_present, deltalist_workspace);\n          if (unlikely(reterr)) {\n            return reterr;\n          }\n          if (aux1b_het_present) {\n            BitvecOr(aux1b_hets, raw_sample_ctl, all_hets);\n          }\n        }\n        if (sample_include) {\n          raw_het_ct = PopcountWords(all_hets, raw_sample_ctl);\n        }\n      }\n    }\n  }\n  uintptr_t* raw_phasepresent = nullptr;\n  uint32_t extra_phased_het_ct = 0;\n  if (raw_het_ct_needed) {\n    if (!all_hets) {\n      reterr = SkipAux2(fread_end, raw_het_ct, &fread_ptr, is_minimac3_r2? (&extra_phased_het_ct) : nullptr);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    } else {\n      raw_phasepresent = pgrp->workspace_subset;\n      reterr = GetPhasepresentAndSkipPhaseinfo(fread_end, all_hets, raw_sample_ct, raw_het_ct, &fread_ptr, raw_phasepresent, &extra_phased_het_ct);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n      if (sample_include) {\n        extra_phased_het_ct = PopcountWordsIntersect(raw_phasepresent, sample_include, raw_sample_ctl);\n      }\n    }\n  }\n  if (!(vrtype & 0x60)) {\n    uint32_t hom_hc_ct = 0;\n    for (uint32_t allele_idx = 0; allele_idx != allele_ct; ++allele_idx) {\n      const uint64_t cur_hom_ct = two_cts[allele_idx];\n      hom_hc_ct += cur_hom_ct;\n      const uint64_t two_dosage = cur_hom_ct * 0x8000LLU;\n      const uint64_t dosage_sum = one_cts[allele_idx] * 0x4000LLU + two_dosage;\n      all_dosages[allele_idx] = dosage_sum;\n      // Repurpose two_cts[] to store ssqs.\n      two_cts[allele_idx] = (dosage_sum + two_dosage) * 0x4000LLU;\n    }\n    const uint32_t nm_sample_ct = sample_ct - genocounts[3];\n    *het_ctp = nm_sample_ct - hom_hc_ct;\n    if (!imp_r2_ptr) {\n      return kPglRetSuccess;\n    }\n    *imp_r2_ptr = MultiallelicDiploidMinimac3R2(all_dosages, two_cts, nm_sample_ct, allele_ct, extra_phased_het_ct);\n    if (!is_minimac3_r2) {\n      *imp_r2_ptr *= 2;\n    }\n    return kPglRetSuccess;\n  }\n#ifndef PGENLIB_NOPRINT\n  fputs(\"dosages not yet supported by GetMultiallelicCountsAndDosage16s()\\n\", stderr);\n#endif\n  return kPglRetNotYetSupported;\n}\n\nPglErr PgrGetMDCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t is_minimac3_r2, PgenReader* pgr_ptr, double* __restrict imp_r2_ptr, uint32_t* __restrict het_ctp, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* __restrict all_dosages) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  if (!sample_ct) {\n    STD_ARRAY_REF_FILL0(4, genocounts);\n    ZeroU64Arr(allele_ct, all_dosages);\n    if (imp_r2_ptr) {\n      *imp_r2_ptr = 0.0 / 0.0;\n    }\n    return kPglRetSuccess;\n  }\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  if ((allele_ct == 2) || (!(vrtype & 0x68))) {\n    PglErr reterr = GetBasicGenotypeCountsAndDosage16s(sample_include, sample_include_interleaved_vec, sample_include_cumulative_popcounts, sample_ct, vidx, is_minimac3_r2, pgrp, imp_r2_ptr, genocounts, all_dosages);\n    *het_ctp = genocounts[1];\n    ZeroU64Arr(allele_ct - 2, &(all_dosages[2]));\n    return reterr;\n  }\n  return GetMultiallelicCountsAndDosage16s(sample_include, sample_include_interleaved_vec, sample_ct, vidx, allele_ct, is_minimac3_r2, pgrp, imp_r2_ptr, het_ctp, genocounts, all_dosages);\n}\n\nPglErr PgrGetMD(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp) {\n  pgvp->patch_01_ct = 0;\n  pgvp->patch_10_ct = 0;\n  pgvp->dosage_ct = 0;\n  pgvp->multidosage_sample_ct = 0;\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  if ((allele_ct == 2) || (!(vrtype & 0x68))) {\n    return IMPLPgrGetD(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, pgvp->genovec, pgvp->dosage_present, pgvp->dosage_main, &(pgvp->dosage_ct));\n  }\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  uintptr_t* all_hets = VrtypeHphase(vrtype)? pgrp->workspace_all_hets : nullptr;\n  if (VrtypeMultiallelicHc(vrtype)) {\n    PglErr reterr = GetMultiallelicCodes(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, all_hets? (&fread_ptr) : nullptr, all_hets? (&fread_end) : nullptr, all_hets, pgvp);\n    if (!(vrtype & 0x60)) {\n      return reterr;\n    }\n  } else {\n    // todo: ReadRawGenovec, etc.\n  }\n#ifndef PGENLIB_NOPRINT\n  fputs(\"true multiallelic dosages not yet supported by PgrGetMD()\\n\", stderr);\n#endif\n  return kPglRetNotYetSupported;\n}\n\nPglErr IMPLPgrGetDp(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, PgenVariant* pgvp) {\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  if (!sample_ct) {\n    pgvp->phasepresent_ct = 0;\n    pgvp->dosage_ct = 0;\n    pgvp->dphase_ct = 0;\n    return kPglRetSuccess;\n  }\n  const unsigned char* fread_ptr = nullptr;\n  const unsigned char* fread_end = nullptr;\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t dosage_is_present = VrtypeDosage(vrtype);\n  PglErr reterr = ReadGenovecHphaseSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, dosage_is_present? (&fread_ptr) : nullptr, dosage_is_present? (&fread_end) : nullptr, pgvp->genovec, pgvp->phasepresent, pgvp->phaseinfo, &(pgvp->phasepresent_ct));\n  if (reterr || (!dosage_is_present)) {\n    pgvp->dosage_ct = 0;\n    pgvp->dphase_ct = 0;\n    return reterr;\n  }\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  return ParseDosage16(fread_ptr, fread_end, sample_include, sample_ct, vidx, allele_ct, pgrp, &(pgvp->dosage_ct), pgvp->dphase_present, pgvp->dphase_delta, &(pgvp->dphase_ct), pgvp->dosage_present, pgvp->dosage_main);\n}\n\nPglErr PgrGetInv1Dp(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, AlleleCode allele_idx, PgenReader* pgr_ptr, PgenVariant* pgvp) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  if ((allele_ct == 2) || (!allele_idx)) {\n    PglErr reterr = IMPLPgrGetDp(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, pgvp);\n    if (allele_idx) {\n      GenovecInvertUnsafe(sample_ct, pgvp->genovec);\n      if (pgvp->phasepresent_ct) {\n        BitvecInvert(BitCtToWordCt(sample_ct), pgvp->phaseinfo);\n      }\n      if (pgvp->dosage_ct) {\n        BiallelicDosage16Invert(pgvp->dosage_ct, pgvp->dosage_main);\n        if (pgvp->dphase_ct) {\n          BiallelicDphase16Invert(pgvp->dphase_ct, pgvp->dphase_delta);\n        }\n      }\n    }\n    return reterr;\n  }\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  if (!VrtypeDosage(vrtype)) {\n    pgvp->dosage_ct = 0;\n    pgvp->dphase_ct = 0;\n    return IMPLPgrGetInv1P(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, pgvp->genovec, pgvp->phasepresent, pgvp->phaseinfo, &(pgvp->phasepresent_ct));\n  }\n#ifndef PGENLIB_NOPRINT\n  fputs(\"multiallelic dosage not yet supported by GetInv1Dp()\\n\", stderr);\n#endif\n  return kPglRetNotYetSupported;\n}\n\nPglErr PgrGetMDp(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  pgvp->patch_01_ct = 0;\n  pgvp->patch_10_ct = 0;\n  pgvp->phasepresent_ct = 0;\n  pgvp->dosage_ct = 0;\n  pgvp->multidosage_sample_ct = 0;\n  pgvp->dphase_ct = 0;\n  pgvp->multidphase_sample_ct = 0;\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  if ((allele_ct == 2) || (!(vrtype & 0x68))) {\n    return IMPLPgrGetDp(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, pgvp);\n  }\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  uintptr_t* all_hets = VrtypeHphase(vrtype)? pgrp->workspace_all_hets : nullptr;\n  if (VrtypeMultiallelicHc(vrtype)) {\n    PglErr reterr = GetMultiallelicCodes(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, all_hets? (&fread_ptr) : nullptr, all_hets? (&fread_end) : nullptr, all_hets, pgvp);\n    if (reterr || (!all_hets)) {\n      pgvp->phasepresent_ct = 0;\n      return reterr;\n    }\n    if (!(vrtype & 0x60)) {\n      const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n      return ParseAux2Subset(fread_end, (sample_ct != raw_sample_ct)? sample_include : nullptr, all_hets, nullptr, raw_sample_ct, sample_ct, &fread_ptr, pgvp->phasepresent, pgvp->phaseinfo, &(pgvp->phasepresent_ct), pgrp->workspace_subset);\n    }\n  } else {\n    // todo: ReadRawGenovec, etc.\n  }\n#ifndef PGENLIB_NOPRINT\n  fputs(\"true multiallelic dosages not yet supported by PgrGetMDp()\\n\", stderr);\n#endif\n  return kPglRetNotYetSupported;\n}\n\nstatic_assert(sizeof(AlleleCode) == 1, \"CountAux1bHets() must be updated.\");\nuintptr_t CountAux1bHets(const AlleleCode* patch_10_vals, uintptr_t rare10_ct) {\n  // Similar to CountByte().\n  uintptr_t byte_ct = rare10_ct * 2;\n#ifdef __LP64__\n  if (byte_ct < kBytesPerVec) {\n#endif\n    uintptr_t tot = 0;\n    for (uintptr_t offset = 0; offset < byte_ct; offset += 2) {\n      tot += (patch_10_vals[offset] != patch_10_vals[offset + 1]);\n    }\n    return tot;\n#ifdef __LP64__\n  }\n  const unsigned char* bytearr_uc_iter = DowncastKToUc(patch_10_vals);\n  const VecW m0 = vecw_setzero();\n  const VecW m8 = VCONST_W(kMask00FF);\n  VecW acc = vecw_setzero();\n  while (byte_ct > 255 * kBytesPerVec) {\n    VecUc inner_acc = vecuc_setzero();\n    for (uint32_t uii = 0; uii != 255; ++uii) {\n      const VecUc cur_vvec = vecuc_loadu(bytearr_uc_iter);\n      bytearr_uc_iter = &(bytearr_uc_iter[kBytesPerVec]);\n      const VecUc shifted_vvec = VecWToUc(vecw_srli(VecUcToW(cur_vvec), 8));\n      inner_acc = inner_acc - (cur_vvec == shifted_vvec);\n    }\n    const VecW partial_sums = VecUcToW(inner_acc) & m8;\n    acc = acc + vecw_sad(partial_sums, m0);\n    byte_ct -= 255 * kBytesPerVec;\n  }\n  const unsigned char* bytearr_uc_final = &(bytearr_uc_iter[byte_ct - kBytesPerVec]);\n  VecUc inner_acc = vecuc_setzero();\n  while (bytearr_uc_iter < bytearr_uc_final) {\n    const VecUc cur_vvec = vecuc_loadu(bytearr_uc_iter);\n    bytearr_uc_iter = &(bytearr_uc_iter[kBytesPerVec]);\n    const VecUc shifted_vvec = VecWToUc(vecw_srli(VecUcToW(cur_vvec), 8));\n    inner_acc = inner_acc - (cur_vvec == shifted_vvec);\n  }\n  VecUc cur_vvec = vecuc_loadu(bytearr_uc_final);\n  const uintptr_t overlap_byte_ct = bytearr_uc_iter - bytearr_uc_final;\n  const VecUc shifted_vvec = VecWToUc(vecw_srli(VecUcToW(cur_vvec), 8));\n  const VecUc mask_vvec = vecuc_loadu(&(kLeadMask[kBytesPerVec - overlap_byte_ct]));\n  cur_vvec = (cur_vvec == shifted_vvec) & mask_vvec;\n  inner_acc = inner_acc - cur_vvec;\n  const VecW partial_sums = VecUcToW(inner_acc) & m8;\n  acc = acc + vecw_sad(partial_sums, m0);\n  const uintptr_t tot = HsumW(acc);\n  return rare10_ct - tot;\n#endif\n}\n\nPglErr PgrGetRaw(uint32_t vidx, PgenGlobalFlags read_gflags, PgenReader* pgr_ptr, uintptr_t** loadbuf_iter_ptr, unsigned char* loaded_vrtype_ptr) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  // currently handles multiallelic hardcalls, hardcall phase, and biallelic\n  // dosage (both unphased and phased)\n  // todo: multiallelic dosage\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  uintptr_t* genovec = (*loadbuf_iter_ptr);\n  uintptr_t* loadbuf_iter = &(genovec[NypCtToAlignedWordCt(raw_sample_ct)]);\n  const uint32_t multiallelic_hc_present = (vrtype / 8) & 1;\n  const uint32_t save_multiallelic_hc = multiallelic_hc_present && (read_gflags & kfPgenGlobalMultiallelicHardcallFound);\n  const uint32_t hphase_is_present = (vrtype / 0x10) & 1;\n  const uint32_t save_hphase = hphase_is_present && (read_gflags & kfPgenGlobalHardcallPhasePresent);\n  const uint32_t dosage_is_present = (vrtype & 0x60)? 1 : 0;\n  const uint32_t save_dosage = dosage_is_present && (read_gflags & kfPgenGlobalDosagePresent);\n\n  const uint32_t save_dphase = (vrtype & 0x80) && (read_gflags & kfPgenGlobalDosagePhasePresent);\n  assert(save_dosage || (!save_dphase));\n\n  if (loaded_vrtype_ptr) {\n    *loaded_vrtype_ptr = save_multiallelic_hc * 8 + save_hphase * 0x10 + save_dosage * 0x60 + save_dphase * 0x80;\n  }\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadRawGenovec(0, vidx, pgrp, &fread_ptr, &fread_end, genovec);\n  if ((!(multiallelic_hc_present || save_hphase || save_dosage)) || reterr) {\n    *loadbuf_iter_ptr = loadbuf_iter;\n    return reterr;\n  }\n\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  ZeroTrailingNyps(raw_sample_ct, genovec);\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n  uint32_t het_ct = 0;\n  if (multiallelic_hc_present) {\n    if (!save_multiallelic_hc) {\n      // todo: erase-alt2+ fast path\n      // mostly mirror PgrGet2P(0, 1), but a bit of extra logic is needed to\n      // throw out phased-10het entries\n      return kPglRetNotYetSupported;\n    }\n    // assume we always save multiallelic info\n    // raw format:\n    //   rare01_ct, padded out to a word\n    //   rare10_ct, padded out to a word\n    //   [round up to vector boundary, for patch_01_set]\n    //   aux1a, if not mode 15:\n    //     patch_01_set as bitarray, raw_sample_ctl words\n    //     patch_01_vals, round up to word boundary\n    //     [round up to vector boundary, for patch_10_set]\n    //   aux1b, if not mode 15:\n    //     patch_10_set as bitarray, raw_sample_ctl words\n    //     patch_10_vals, round up to word boundary\n    // round up to vector boundary at end\n    const uint32_t aux1_first_byte = *fread_ptr++;\n    const uint32_t aux1a_mode = aux1_first_byte & 15;\n    const uint32_t aux1b_mode = aux1_first_byte >> 4;\n    uint32_t raw_10_ct = 0;\n    if ((!aux1a_mode) || hphase_is_present) {\n      if (!aux1b_mode) {\n        GenovecCount12Unsafe(genovec, raw_sample_ct, &het_ct, &raw_10_ct);\n      } else {\n        het_ct = CountNyp(genovec, kMask5555, raw_sample_ct);\n      }\n    } else if (!aux1b_mode) {\n      raw_10_ct = CountNyp(genovec, kMaskAAAA, raw_sample_ct);\n    }\n    uintptr_t* multihc_raw = loadbuf_iter;\n    loadbuf_iter = &(loadbuf_iter[RoundUpPow2(2, kWordsPerVec)]);\n    uint32_t rare01_ct = 0;\n    if (aux1a_mode != 15) {\n      uintptr_t* patch_01_set = loadbuf_iter;\n      loadbuf_iter = &(loadbuf_iter[raw_sample_ctl]);\n      // (could decide to vector-align patch_01_vals later)\n      AlleleCode* patch_01_vals = DowncastWToAC(loadbuf_iter);\n      reterr = ExportAux1a(fread_end, genovec, aux1a_mode, raw_sample_ct, allele_ct, het_ct, &fread_ptr, patch_01_set, patch_01_vals, &rare01_ct);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n      loadbuf_iter = &(loadbuf_iter[DivUp(rare01_ct, kBytesPerWord / sizeof(AlleleCode))]);\n      AlignWToVec(&loadbuf_iter);\n    }\n    uint32_t rare10_ct = 0;\n    if (aux1b_mode != 15) {\n      uintptr_t* patch_10_set = loadbuf_iter;\n      loadbuf_iter = &(loadbuf_iter[raw_sample_ctl]);\n      AlleleCode* patch_10_vals = DowncastWToAC(loadbuf_iter);\n      reterr = ExportAux1b(fread_end, genovec, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &fread_ptr, patch_10_set, patch_10_vals, &rare10_ct);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n      loadbuf_iter = &(loadbuf_iter[DivUp(rare10_ct, kBytesPerWord / (2 * sizeof(AlleleCode)))]);\n      AlignWToVec(&loadbuf_iter);\n      if (hphase_is_present) {\n        het_ct += CountAux1bHets(patch_10_vals, rare10_ct);\n      }\n    }\n    multihc_raw[0] = rare01_ct;\n    multihc_raw[1] = rare10_ct;\n  } else if (hphase_is_present) {\n    het_ct = CountNyp(genovec, kMask5555, raw_sample_ct);\n  }\n\n  if (hphase_is_present) {\n    if (unlikely(!het_ct)) {\n      // there shouldn't be a hphase track at all in this case\n      return kPglRetMalformedInput;\n    }\n    const uint32_t het_ctdl = het_ct / kBitsPerWord;\n    uintptr_t* phaseraw = loadbuf_iter;\n    const uint32_t first_half_byte_ct = 1 + (het_ct / CHAR_BIT);\n    if (save_hphase) {\n      // this needs to be synced with MakePgenThread()\n#ifdef __LP64__\n      // save het_ct later so we can use PopcountWords() below\n      phaseraw[0] = 0;\n#else\n      phaseraw[0] = het_ct;\n      phaseraw[1] = 0;\n#endif\n      loadbuf_iter = &(loadbuf_iter[8 / kBytesPerWord]);\n      loadbuf_iter[het_ctdl] = 0;\n      memcpy(loadbuf_iter, fread_ptr, first_half_byte_ct);\n      loadbuf_iter = &(loadbuf_iter[1 + het_ctdl]);\n    }\n    const uint32_t explicit_phasepresent = fread_ptr[0] & 1;\n    const unsigned char* aux2_start = fread_ptr;\n    fread_ptr = &(fread_ptr[first_half_byte_ct]);\n    if (explicit_phasepresent) {\n      uint32_t raw_phasepresent_ct;\n      if (save_hphase) {\n#ifdef __LP64__\n        raw_phasepresent_ct = PopcountWords(phaseraw, het_ctdl + 2);\n#else\n        raw_phasepresent_ct = PopcountWords(&(phaseraw[2]), het_ctdl + 1);\n#endif\n      } else {\n        // bugfix (11 Apr 2018): not copied to phaseraw in this case\n        raw_phasepresent_ct = PopcountBytes(aux2_start, first_half_byte_ct);\n      }\n      --raw_phasepresent_ct;\n      if (unlikely(!raw_phasepresent_ct)) {\n        // there shouldn't be a hphase track at all in this case, either\n        return kPglRetMalformedInput;\n      }\n      const uint32_t second_half_byte_ct = DivUp(raw_phasepresent_ct, CHAR_BIT);\n      if (save_hphase) {\n#ifdef __LP64__\n        phaseraw[0] = het_ct | (S_CAST(uint64_t, raw_phasepresent_ct) << 32);\n#else\n        phaseraw[1] = raw_phasepresent_ct;\n#endif\n        memcpy(loadbuf_iter, fread_ptr, second_half_byte_ct);\n        loadbuf_iter = &(loadbuf_iter[BitCtToWordCt(raw_phasepresent_ct)]);\n      }\n      fread_ptr = &(fread_ptr[second_half_byte_ct]);\n    }\n#ifdef __LP64__\n    if (save_hphase) {\n      if (!explicit_phasepresent) {\n        phaseraw[0] = het_ct;\n      }\n      AlignWToVec(&loadbuf_iter);\n    }\n#endif\n  }\n  if (!save_dosage) {\n    *loadbuf_iter_ptr = loadbuf_iter;\n    return kPglRetSuccess;\n  }\n  uintptr_t* dosage_present = loadbuf_iter;\n  const uint32_t raw_sample_ctaw = BitCtToAlignedWordCt(raw_sample_ct);\n  loadbuf_iter = &(loadbuf_iter[raw_sample_ctaw]);\n  uint16_t* dosage_main = DowncastWToU16(loadbuf_iter);\n  // probable todo: pack this more tightly in the future\n  const uintptr_t dosage_main_aligned_wordct = kWordsPerVec * DivUp(raw_sample_ct, (kBytesPerVec / sizeof(int16_t)));\n  loadbuf_iter = &(loadbuf_iter[dosage_main_aligned_wordct]);\n  uintptr_t* dphase_present = nullptr;\n  int16_t* dphase_delta = nullptr;\n  if (save_dphase) {\n    dphase_present = loadbuf_iter;\n    loadbuf_iter = &(loadbuf_iter[raw_sample_ctaw]);\n    dphase_delta = DowncastWToI16(loadbuf_iter);\n    loadbuf_iter = &(loadbuf_iter[dosage_main_aligned_wordct]);\n  }\n  *loadbuf_iter_ptr = loadbuf_iter;\n  reterr = ParseDosage16(fread_ptr, fread_end, nullptr, raw_sample_ct, vidx, allele_ct, pgrp, nullptr, dphase_present, dphase_delta, nullptr, dosage_present, dosage_main);\n  return reterr;\n}\n\n\n// Currently assumes no phase or multiallelic hardcalls.\n// tried to have more custom code, turned out to not be worth it\nPglErr ReadMissingness(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, const unsigned char** fread_endp, uintptr_t* __restrict missingness, uintptr_t* __restrict hets, uintptr_t* __restrict genovec_buf) {\n  const unsigned char* fread_ptr;\n  const unsigned char* fread_end;\n  PglErr reterr = ReadGenovecSubsetUnsafe(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, &fread_ptr, &fread_end, genovec_buf);\n  ZeroTrailingNyps(sample_ct, genovec_buf);\n  GenoarrToMissingnessUnsafe(genovec_buf, sample_ct, missingness);\n  if (hets) {\n    PgrDetectGenoarrHetsUnsafe(genovec_buf, NypCtToWordCt(sample_ct), hets);\n  }\n  if (fread_pp) {\n    *fread_pp = fread_ptr;\n    *fread_endp = fread_end;\n  }\n  return reterr;\n}\n\nPglErr PgrGetMissingness(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict missingness, uintptr_t* __restrict genovec_buf) {\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  // may as well add a hets parameter?\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  return ReadMissingness(sample_include, GetSicp(pssi), sample_ct, vidx, pgrp, nullptr, nullptr, missingness, nullptr, genovec_buf);\n}\n\nPglErr PgrGetMissingnessD(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict missingness_hc, uintptr_t* __restrict missingness_dosage, uintptr_t* __restrict hets, uintptr_t* __restrict genovec_buf) {\n  if (!sample_ct) {\n    return kPglRetSuccess;\n  }\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  // sample_include can't be null\n  // either missingness_hc or missingness_dosage must be non-null\n  assert(vidx < pgrp->fi.raw_variant_ct);\n  const uint32_t* sample_include_cumulative_popcounts = GetSicp(pssi);\n  const uint32_t vrtype = GetPgfiVrtype(&(pgrp->fi), vidx);\n  const uint32_t dosage_is_relevant = missingness_dosage && VrtypeDosage(vrtype);\n  const uint32_t need_to_skip_aux1or2 = dosage_is_relevant && (vrtype & 0x18);\n  const uint32_t raw_sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);\n  const uint32_t subsetting_required = (sample_ct != raw_sample_ct);\n  const unsigned char* fread_ptr = nullptr;\n  const unsigned char* fread_end = nullptr;\n  uintptr_t* missingness_base = missingness_hc? missingness_hc : missingness_dosage;\n  if (!need_to_skip_aux1or2) {\n    PglErr reterr = ReadMissingness(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, dosage_is_relevant? (&fread_ptr) : nullptr, dosage_is_relevant? (&fread_end) : nullptr, missingness_base, hets, genovec_buf);\n    if (missingness_dosage && missingness_hc) {\n      memcpy(missingness_dosage, missingness_hc, BitCtToWordCt(sample_ct) * sizeof(intptr_t));\n    }\n    if (reterr || (!dosage_is_relevant)) {\n      return reterr;\n    }\n  } else {\n    PglErr reterr = ReadRawGenovec(subsetting_required, vidx, pgrp, &fread_ptr, &fread_end, genovec_buf);\n    if (unlikely(reterr)) {\n      return reterr;\n    }\n    ZeroTrailingNyps(raw_sample_ct, genovec_buf);\n    uintptr_t* subsetted_genovec = pgrp->workspace_vec;\n    CopyNyparrNonemptySubset(genovec_buf, sample_include, raw_sample_ct, sample_ct, subsetted_genovec);\n    GenoarrToMissingnessUnsafe(subsetted_genovec, sample_ct, missingness_base);\n    if (missingness_hc) {\n      memcpy(missingness_dosage, missingness_hc, BitCtToWordCt(sample_ct) * sizeof(intptr_t));\n    }\n\n    const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n    const uint32_t allele_ct = allele_idx_offsets? (allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx]) : 2;\n    if (VrtypeHphase(vrtype) || hets) {\n      uintptr_t* all_hets = pgrp->workspace_all_hets;\n      PgrDetectGenoarrHets(genovec_buf, raw_sample_ct, all_hets);\n      if (VrtypeMultiallelicHc(vrtype)) {\n        // see analogous branch in ReadGenovecHphaseSubsetUnsafe()\n        // probable todo: make this a separate function\n        const uint32_t aux1_first_byte = *fread_ptr++;\n        const uint32_t aux1a_mode = aux1_first_byte & 15;\n        const uint32_t aux1b_mode = aux1_first_byte >> 4;\n        uint32_t raw_01_ct = 0;\n        uint32_t raw_10_ct = 0;\n        if ((!aux1a_mode) || (!aux1b_mode)) {\n          GenovecCount12Unsafe(genovec_buf, raw_sample_ct, &raw_01_ct, &raw_10_ct);\n        }\n        reterr = SkipAux1a(fread_end, aux1a_mode, raw_sample_ct, allele_ct, raw_01_ct, &fread_ptr);\n        if (unlikely(reterr)) {\n          return reterr;\n        }\n        uintptr_t* aux1b_hets = pgrp->workspace_aux1x_present;\n        uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n        uint32_t aux1b_het_present;\n        reterr = GetAux1bHets(fread_end, genovec_buf, aux1b_mode, raw_sample_ct, allele_ct, raw_10_ct, &fread_ptr, aux1b_hets, &aux1b_het_present, deltalist_workspace);\n        if (unlikely(reterr)) {\n          return reterr;\n        }\n        if (aux1b_het_present) {\n          BitvecOr(aux1b_hets, raw_sample_ctl, all_hets);\n        }\n      }\n      if (hets) {\n        CopyBitarrSubset(all_hets, sample_include, sample_ct, hets);\n      }\n      if (VrtypeHphase(vrtype)) {\n        reterr = SkipAux2(fread_end, PopcountWords(all_hets, raw_sample_ctl), &fread_ptr, nullptr);\n        if (unlikely(reterr)) {\n          return reterr;\n        }\n      }\n    } else {\n      SkipAux1(fread_end, genovec_buf, raw_sample_ct, allele_ct, &fread_ptr);\n    }\n  }\n  // now perform bitwise andnot with dosage_present\n  if ((vrtype & 0x60) == 0x40) {\n    // unconditional dosage.  spot-check the appropriate entries for equality\n    // to 65535.\n    const unsigned char* dosage_main_read = fread_ptr;\n    // bugfix (18 Feb 2019): sample_include is permitted to be nullptr here\n    if (!subsetting_required) {\n      // probable todo: faster iteration over set bits\n      for (uint32_t widx = 0; widx != raw_sample_ctl; ++widx) {\n        uintptr_t missing_dosage_bits = missingness_dosage[widx];\n        if (missing_dosage_bits) {\n          const unsigned char* cur_dosage_main = &(dosage_main_read[widx * kBitsPerWord * sizeof(int16_t)]);\n          do {\n            uint32_t sample_idx_lowbits = ctzw(missing_dosage_bits);\n            uint16_t cur_dosage_u16;\n            CopyFromUnalignedOffsetU16(&cur_dosage_u16, cur_dosage_main, sample_idx_lowbits);\n            if (cur_dosage_u16 != 65535) {\n              missingness_dosage[widx] ^= missing_dosage_bits & (-missing_dosage_bits);\n            }\n            missing_dosage_bits &= missing_dosage_bits - 1;\n          } while (missing_dosage_bits);\n        }\n      }\n    } else {\n      uintptr_t sample_uidx_base = 0;\n      uintptr_t sample_include_bits = sample_include[0];\n      for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n        const uintptr_t sample_uidx = BitIter1(sample_include, &sample_uidx_base, &sample_include_bits);\n        if (!IsSet(missingness_dosage, sample_idx)) {\n          continue;\n        }\n        uint16_t cur_dosage_u16;\n        CopyFromUnalignedOffsetU16(&cur_dosage_u16, dosage_main_read, sample_uidx);\n        if (cur_dosage_u16 != 65535) {\n          ClearBit(sample_idx, missingness_dosage);\n        }\n      }\n    }\n    return kPglRetSuccess;\n  }\n  uintptr_t* dosage_present = pgrp->workspace_dosage_present;\n  if ((vrtype & 0x60) == 0x20) {\n    // dosage list\n    uint32_t dummy;\n    if (unlikely(ParseAndSaveDeltalistAsBitarr(fread_end, raw_sample_ct, &fread_ptr, dosage_present, &dummy))) {\n      return kPglRetMalformedInput;\n    }\n  } else {\n    // dosage bitarray\n    dosage_present[raw_sample_ctl - 1] = 0;\n    const uint32_t raw_sample_ctb = DivUp(raw_sample_ct, CHAR_BIT);\n    memcpy(dosage_present, fread_ptr, raw_sample_ctb);\n  }\n  if (subsetting_required) {\n    CopyBitarrSubset(dosage_present, sample_include, sample_ct, pgrp->workspace_vec);\n    dosage_present = pgrp->workspace_vec;\n  }\n  BitvecInvmask(dosage_present, BitCtToWordCt(sample_ct), missingness_dosage);\n  return kPglRetSuccess;\n}\n\nstatic inline BoolErr ValidateVint31(const unsigned char* buf_end, const unsigned char** bufpp, uint32_t* val_ptr) {\n  if (unlikely(buf_end <= (*bufpp))) {\n    return 1;\n  }\n  uint32_t vint32 = *((*bufpp)++);\n  if (vint32 <= 127) {\n    *val_ptr = vint32;\n    return 0;\n  }\n  vint32 &= 127;\n  for (uint32_t shift = 7; shift != 28; shift += 7) {\n    if (unlikely(buf_end == (*bufpp))) {\n      return 1;\n    }\n    uint32_t uii = *((*bufpp)++);\n    vint32 |= (uii & 127) << shift;\n    if (uii <= 127) {\n      *val_ptr = vint32;\n      return 0;\n    }\n  }\n  if (unlikely(buf_end == (*bufpp))) {\n    return 1;\n  }\n  uint32_t uii = *((*bufpp)++);\n  if (unlikely(uii > 7)) {\n    return 1;\n  }\n  vint32 |= uii << 28;\n  *val_ptr = vint32;\n  return 0;\n}\n\nBoolErr ValidateDifflistHeader(const unsigned char* fread_end, uint32_t sample_ct, const unsigned char** fread_pp, uintptr_t* raregeno_buf, const unsigned char** difflist_group_info_ptr, uint32_t* difflist_len_ptr) {\n  // can be used for deltalists: pass raregeno_buf == nullptr.\n  if (unlikely(ValidateVint31(fread_end, fread_pp, difflist_len_ptr))) {\n    // todo: ensure fread_pp points to a problematic byte whenever a validate_\n    // function returns an error, so the error message can provide an accurate\n    // byte offset.\n    return 1;\n  }\n  const uint32_t difflist_len = *difflist_len_ptr;\n  *difflist_group_info_ptr = *fread_pp;\n  if (!difflist_len) {\n    return 0;\n  }\n  if (unlikely(difflist_len > sample_ct / kPglMaxDifflistLenDivisor)) {\n    return 1;\n  }\n  const uint32_t group_ct = DivUp(difflist_len, kPglDifflistGroupSize);\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(sample_ct);\n  const uint32_t difflist_index_byte_ct = group_ct * (sample_id_byte_ct + 1) - 1;\n  if (PtrAddCk(fread_end, difflist_index_byte_ct, fread_pp)) {\n    return 1;\n  }\n  if (!raregeno_buf) {\n    return 0;\n  }\n  const uint32_t raregeno_byte_ct = NypCtToByteCt(difflist_len);\n  const unsigned char* raregeno_start = *fread_pp;\n  if (PtrAddCk(fread_end, raregeno_byte_ct, fread_pp)) {\n    return 1;\n  }\n  memcpy(raregeno_buf, raregeno_start, raregeno_byte_ct);\n  const uint32_t difflist_len_mod4 = difflist_len % 4;\n  if (difflist_len_mod4) {\n    const uint32_t last_raregeno_byte = (*fread_pp)[-1];\n    if (unlikely(last_raregeno_byte >> (2 * difflist_len_mod4))) {\n      return 1;\n    }\n  }\n  return 0;\n}\n\nBoolErr ValidateAndApplyDifflist(const unsigned char* fread_end, uint32_t common2_code, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genoarr) {\n  // Side effects: uses pgr.workspace_raregeno_tmp_loadbuf.\n  // Similar to ParseAndApplyDifflist(), but with exhaustive input\n  // validation.\n  const uint32_t sample_ct = pgrp->fi.raw_sample_ct;\n  uintptr_t* cur_raregeno_iter = pgrp->workspace_raregeno_tmp_loadbuf;\n  const unsigned char* group_info_iter;\n  uint32_t difflist_len;\n  if (unlikely(ValidateDifflistHeader(fread_end, sample_ct, fread_pp, cur_raregeno_iter, &group_info_iter, &difflist_len))) {\n    return 1;\n  }\n  if (!difflist_len) {\n    return 0;\n  }\n  const uint32_t subgroup_idx_last = (difflist_len - 1) / kBitsPerWordD2;\n  if (common2_code) {\n    // 1-bit format + list of exceptions.  In this case,\n    //   (i) the length of the exception list must be < (sample_ct / 16)\n    //   (ii) every raregeno entry must either be one of the two rare genotype\n    //        values, or involve a rare alt allele.\n    if (unlikely(difflist_len >= (sample_ct / (2 * kPglMaxDifflistLenDivisor)))) {\n      return 1;\n    }\n    const uintptr_t common_code_delta = common2_code & 3;\n    const uintptr_t inv_common_word1 = (3 - common2_code / 4) * kMask5555;\n    const uintptr_t inv_common_word2 = inv_common_word1 - (common_code_delta * kMask5555);\n    for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n      uintptr_t cur_raregeno_word = cur_raregeno_iter[subgroup_idx];\n      const uintptr_t match1 = Word11(cur_raregeno_word ^ inv_common_word1);\n      const uintptr_t match2 = Word11(cur_raregeno_word ^ inv_common_word2);\n      if (subgroup_idx == subgroup_idx_last) {\n        // ignore trailing bits\n        const uint32_t lshift = ((-difflist_len) % kBitsPerWordD2) * 2;\n        if (unlikely((match1 << lshift) || (match2 << lshift))) {\n          return 1;\n        }\n        break;\n      }\n      if (unlikely(match1 || match2)) {\n        // todo: if (multiallelic_hc_present && (!inv_common_word2)), record\n        // might be fine; but we need to verify these are actually rare alt\n        // alleles.\n        // (er, above comment is obsolete)\n        return 1;\n      }\n    }\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(sample_ct);\n  const unsigned char* group_byte_cts_iter = &(group_info_iter[DivUp(difflist_len, kPglDifflistGroupSize) * sample_id_byte_ct]);\n  const unsigned char* prev_group_start = *fread_pp;\n\n  uintptr_t sample_idx = 0;\n  for (uint32_t subgroup_idx = 0; ; ++subgroup_idx) {\n    uint32_t remaining_deltas_in_subgroup = kBitsPerWordD2 - 1;\n    if (subgroup_idx >= subgroup_idx_last) {\n      if (subgroup_idx > subgroup_idx_last) {\n        return 0;\n      }\n      remaining_deltas_in_subgroup &= difflist_len - 1;\n    }\n    if (!(subgroup_idx % (kPglDifflistGroupSize / kBitsPerWordD2))) {\n      uintptr_t new_sample_idx_start = SubU32Load(group_info_iter, sample_id_byte_ct);\n      if (subgroup_idx) {\n        if (unlikely(sample_idx >= new_sample_idx_start)) {\n          return 1;\n        }\n        const uint32_t group_byte_ct = S_CAST(uint32_t, *group_byte_cts_iter++) + 63;\n        if (unlikely(S_CAST(uintptr_t, (*fread_pp) - prev_group_start) != group_byte_ct)) {\n          return 1;\n        }\n        prev_group_start = *fread_pp;\n      }\n      sample_idx = new_sample_idx_start;\n      group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    } else {\n      uint32_t sample_idx_incr;\n      if (unlikely(ValidateVint31(fread_end, fread_pp, &sample_idx_incr) || (!sample_idx_incr))) {\n        return 1;\n      }\n      sample_idx += sample_idx_incr;\n    }\n    uintptr_t cur_raregeno_word = *cur_raregeno_iter++;\n    for (; ; --remaining_deltas_in_subgroup) {\n      if (unlikely(sample_idx >= sample_ct)) {\n        return 1;\n      }\n      const uintptr_t cur_geno = cur_raregeno_word & 3;\n      AssignNyparrEntry(sample_idx, cur_geno, genoarr);\n      if (!remaining_deltas_in_subgroup) {\n        break;\n      }\n      uint32_t sample_idx_incr;\n      if (unlikely(ValidateVint31(fread_end, fread_pp, &sample_idx_incr) || (!sample_idx_incr))) {\n        return 1;\n      }\n      sample_idx += sample_idx_incr;\n      cur_raregeno_word >>= 2;\n    }\n  }\n}\n\nBoolErr ValidateOnebit(const unsigned char* fread_end, const unsigned char** fread_pp, PgenReaderMain* pgrp, uintptr_t* __restrict genoarr) {\n  // ParseOnebitUnsafe() with exhaustive input validation.\n  const uint32_t sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t common2_and_bitarray_byte_ct = (sample_ct + 15) / CHAR_BIT;\n  const unsigned char* onebit_main_iter = *fread_pp;\n  if (PtrAddCk(fread_end, common2_and_bitarray_byte_ct, fread_pp)) {\n    return 1;\n  }\n  const uintptr_t common2_code = *onebit_main_iter++;\n  const uintptr_t common_code_delta = common2_code & 3;\n  uintptr_t word_base = common2_code / 4;\n  if (unlikely((!common_code_delta) || (word_base + common_code_delta > 3))) {\n    return 1;\n  }\n  word_base *= kMask5555;\n  const uint32_t genoarr_widx_trail = (sample_ct + 7) / kBitsPerWordD2;\n  const uint32_t genoarr_widx_end = NypCtToWordCt(sample_ct);\n  for (uint32_t genoarr_widx = 0; ; ++genoarr_widx) {\n    uintptr_t ww;\n    if (genoarr_widx >= genoarr_widx_trail) {\n      if (genoarr_widx == genoarr_widx_end) {\n        break;\n      }\n      const uint32_t nontrail_byte_ct = ((sample_ct - 1) % kBitsPerWordD2) / CHAR_BIT;\n      ww = ProperSubwordLoad(&(onebit_main_iter[genoarr_widx_trail * sizeof(Halfword)]), 1 + nontrail_byte_ct);\n      const uint32_t sample_ct_mod8 = sample_ct % 8;\n      if (sample_ct_mod8) {\n        if (unlikely(ww >> (nontrail_byte_ct * 8 + sample_ct_mod8))) {\n          return 1;\n        }\n      }\n    } else {\n      Halfword hw;\n      CopyFromUnalignedOffsetHW(&hw, onebit_main_iter, genoarr_widx);\n      ww = hw;\n    }\n    ww = UnpackHalfwordToWord(ww);\n    genoarr[genoarr_widx] = word_base + ww * common_code_delta;\n  }\n  return ValidateAndApplyDifflist(fread_end, common2_code, fread_pp, pgrp, genoarr);\n}\n\n// assumes that we aren't dealing with the trivial fixed-width case.\n// saves main genotype array to genovec.  does not zero out trailing bits.\nBoolErr ValidateGeno(const unsigned char* fread_end, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, uintptr_t* genovec, char* errstr_buf) {\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  const uint32_t sample_ct = pgrp->fi.raw_sample_ct;\n  if (VrtypeLdCompressed(vrtype)) {\n    CopyNyparr(pgrp->ldbase_genovec, sample_ct, genovec);\n    if (unlikely(ValidateAndApplyDifflist(fread_end, 0, fread_pp, pgrp, genovec))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid LD difflist for (0-based) variant #%u.\\n\", vidx);\n      return 1;\n    }\n    if (vrtype & 1) {\n      GenovecInvertUnsafe(sample_ct, genovec);\n    }\n    return 0;\n  }\n  const uint32_t is_ldbase = VrtypeLdCompressed(pgrp->fi.vrtypes[vidx + 1]);\n  if (!(vrtype & 4)) {\n    if (vrtype & 1) {\n      if (unlikely(ValidateOnebit(fread_end, fread_pp, pgrp, genovec))) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid 1-bit genotype record for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n    } else {\n      const uint32_t genovec_byte_ct = DivUp(sample_ct, 4);\n      const unsigned char* src_genodata = *fread_pp;\n      if (PtrAddCk(fread_end, genovec_byte_ct, fread_pp)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid 2-bit genotype record for (0-based) variant #%u\\n\", vidx);\n        return 1;\n      }\n      memcpy(genovec, src_genodata, genovec_byte_ct);\n      const uint32_t sample_ct_mod4 = sample_ct % 4;\n      if (sample_ct_mod4) {\n        const uint32_t last_geno_byte = (*fread_pp)[-1];\n        if (unlikely(last_geno_byte >> (2 * sample_ct_mod4))) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Last genotype byte for (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n          return 1;\n        }\n      }\n    }\n  } else {\n    const uint32_t vrtype_low2 = vrtype & 3;\n    if (vrtype_low2 != 1) {\n      const uint32_t vec_ct = NypCtToVecCt(sample_ct);\n      vecset(genovec, vrtype_low2 * kMask5555, vec_ct);\n      if (unlikely(ValidateAndApplyDifflist(fread_end, 0, fread_pp, pgrp, genovec))) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid genotype difflist for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n    } else {\n      if (is_ldbase) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid LD back-reference from variant #%u to all-hom-ref variant #%u.\\n\", vidx + 1, vidx);\n        return 1;\n      }\n      ZeroWArr(NypCtToWordCt(sample_ct), genovec);\n    }\n  }\n  if (is_ldbase) {\n    CopyNyparr(genovec, sample_ct, pgrp->ldbase_genovec);\n  }\n  return 0;\n}\n\nBoolErr ValidateAndCountDeltalist(const unsigned char* fread_end, uint32_t sample_ct, const unsigned char** fread_pp, uint32_t* __restrict deltalist, uint32_t* deltalist_len_ptr) {\n  // pass deltalist == nullptr when actual bit positions aren't needed\n  const unsigned char* group_info_iter;\n  if (unlikely(ValidateDifflistHeader(fread_end, sample_ct, fread_pp, nullptr, &group_info_iter, deltalist_len_ptr))) {\n    return 1;\n  }\n  const uint32_t deltalist_len = *deltalist_len_ptr;\n  if (!deltalist_len) {\n    return 0;\n  }\n  const uint32_t sample_id_byte_ct = BytesToRepresentNzU32(sample_ct);\n  const uint32_t group_idx_last = (deltalist_len - 1) / kPglDifflistGroupSize;\n  const unsigned char* group_byte_cts_iter = &(group_info_iter[DivUp(deltalist_len, kPglDifflistGroupSize) * sample_id_byte_ct]);\n  const unsigned char* prev_group_start = *fread_pp;\n  uint32_t* deltalist_iter = deltalist;\n  uint32_t group_len_m1 = kPglDifflistGroupSize - 1;\n  uintptr_t sample_idx = 0;\n  for (uint32_t group_idx = 0; ; ++group_idx) {\n    if (group_idx >= group_idx_last) {\n      if (group_idx > group_idx_last) {\n        return 0;\n      }\n      group_len_m1 &= deltalist_len - 1;\n    }\n    uintptr_t new_sample_idx = SubU32Load(group_info_iter, sample_id_byte_ct);\n    if (group_idx) {\n      if (unlikely(sample_idx >= new_sample_idx)) {\n        return 1;\n      }\n      const uint32_t group_byte_ct = S_CAST(uint32_t, *group_byte_cts_iter++) + 63;\n      if (unlikely(S_CAST(uintptr_t, (*fread_pp) - prev_group_start) != group_byte_ct)) {\n        return 1;\n      }\n      prev_group_start = *fread_pp;\n    }\n    sample_idx = new_sample_idx;\n    group_info_iter = &(group_info_iter[sample_id_byte_ct]);\n    for (uint32_t deltalist_idx_lowbits = 0; ; ++deltalist_idx_lowbits) {\n      if (unlikely(sample_idx >= sample_ct)) {\n        return 1;\n      }\n      if (deltalist_iter) {\n        *deltalist_iter++ = sample_idx;\n      }\n      if (deltalist_idx_lowbits == group_len_m1) {\n        break;\n      }\n      uint32_t sample_idx_incr;\n      if (unlikely(ValidateVint31(fread_end, fread_pp, &sample_idx_incr) || (!sample_idx_incr))) {\n        return 1;\n      }\n      sample_idx += sample_idx_incr;\n    }\n  }\n}\n\nBoolErr ValidateMultiallelicHc(const unsigned char* fread_end, const uintptr_t* __restrict raw_genovec, uint32_t vidx, uint32_t allele_ct, PgenReaderMain* pgrp, const unsigned char** fread_pp, uint32_t* __restrict het_ctp, char* __restrict errstr_buf) {\n  if (unlikely(allele_ct <= 2)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Multiallelic hardcall track present for (0-based) variant #%u, but it apparently has only %u allele%s.\\n\", vidx, allele_ct, (allele_ct == 1)? \"\" : \"s\");\n    return 1;\n  }\n  const uint32_t sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t aux1_first_byte = **fread_pp;\n  *fread_pp += 1;\n  if (unlikely(aux1_first_byte &&\n               (aux1_first_byte != 1) &&\n               (aux1_first_byte != 15) &&\n               (aux1_first_byte != 16) &&\n               (aux1_first_byte != 17) &&\n               (aux1_first_byte != 31) &&\n               (aux1_first_byte != 240) &&\n               (aux1_first_byte != 241))) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic hardcall track mode byte (%u; must be in {0, 1, 15, 16, 17, 31, 240, 241}) in (0-based) variant #%u.\\n\", aux1_first_byte, vidx);\n    return 1;\n  }\n  const uint32_t aux1a_mode = aux1_first_byte & 15;\n  const uint32_t aux1b_mode = aux1_first_byte >> 4;\n  uint32_t raw_01_ct;\n  uint32_t raw_10_ct;\n  GenovecCount12Unsafe(raw_genovec, sample_ct, &raw_01_ct, &raw_10_ct);\n  uint32_t* deltalist_workspace = pgrp->workspace_difflist_sample_ids;\n  if (aux1a_mode != 15) {\n    if (unlikely(!raw_01_ct)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Multiallelic het-ref hardcall track present for (0-based) variant #%u, but no het-ref calls exist.\\n\", vidx);\n      return 1;\n    }\n    uint32_t rare01_ct;\n    if (!aux1a_mode) {\n      const uint32_t subset_byte_ct = DivUp(raw_01_ct, CHAR_BIT);\n      if (PtrCheck(fread_end, *fread_pp, subset_byte_ct)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall bitarray-subset for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n      rare01_ct = PopcountBytes(*fread_pp, subset_byte_ct);\n      if (unlikely(!rare01_ct)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Empty multiallelic het-ref hardcall bitarray-subset for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n      *fread_pp += subset_byte_ct;\n      const uint32_t raw_01_ct_mod8 = raw_01_ct % 8;\n      if (raw_01_ct_mod8) {\n        if (unlikely((*fread_pp)[-1] >> raw_01_ct_mod8)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Multiallelic het-ref hardcall bitarray-subset for (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n          return 1;\n        }\n      }\n    } else {\n      if (unlikely(ValidateAndCountDeltalist(fread_end, sample_ct, fread_pp, deltalist_workspace, &rare01_ct) || (!rare01_ct))) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall deltalist-subset for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n      for (uint32_t uii = 0; uii != rare01_ct; ++uii) {\n        if (unlikely(GetNyparrEntry(raw_genovec, deltalist_workspace[uii]) != 1)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall deltalist-subset for (0-based) variant #%u (an index doesn't correspond to a het-ref call).\\n\", vidx);\n          return 1;\n        }\n      }\n    }\n    if (allele_ct < 5) {\n      // Nothing to do for allele_ct == 3.\n      if (allele_ct == 4) {\n        // 1-bit entries.  Contents must be in range, so just validate trailing\n        // bits.\n        const uint32_t fvals_byte_ct = DivUp(rare01_ct, 8);\n        if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n          return 1;\n        }\n        const uint32_t rare01_ct_mod8 = rare01_ct % 8;\n        if (rare01_ct_mod8) {\n          if (unlikely((*fread_pp)[-1] >> rare01_ct_mod8)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (nonzero trailing bits).\\n\", vidx);\n            return 1;\n          }\n        }\n      }\n    } else {\n      const unsigned char* fvals = *fread_pp;\n      if (allele_ct < 19) {\n        if (allele_ct < 7) {\n          // 2-bit entries.\n          const uint32_t fvals_byte_ct = DivUp(rare01_ct, 4);\n          if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n            return 1;\n          }\n          if (allele_ct == 5) {\n            // Contents may be out-of-range.\n            const uint32_t fullword_ct = fvals_byte_ct / kBytesPerWord;\n            uint32_t widx = 0;\n            if (fullword_ct) {\n              for (; widx != fullword_ct; ++widx) {\n                uintptr_t cur_word;\n                CopyFromUnalignedOffsetW(&cur_word, fvals, widx);\n                if (unlikely(cur_word & (cur_word >> 1) & kMask5555)) {\n                  snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (out-of-range allele code).\\n\", vidx);\n                  return 1;\n                }\n              }\n            }\n            for (uint32_t uii = widx * kBytesPerWord; uii != fvals_byte_ct; ++uii) {\n              const uint32_t cur_byte = fvals[uii];\n              if (unlikely(cur_byte & (cur_byte >> 1) & 0x55)) {\n                snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (out-of-range allele code).\\n\", vidx);\n                return 1;\n              }\n            }\n          }\n          // Validate trailing bits.\n          const uint32_t rare01_ct_mod4 = rare01_ct % 4;\n          if (rare01_ct_mod4) {\n            if (unlikely((*fread_pp)[-1] >> (2 * rare01_ct_mod4))) {\n              snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (nonzero trailing bits).\\n\", vidx);\n              return 1;\n            }\n          }\n        } else {\n          // 4-bit entries.\n          const uint32_t fvals_byte_ct = DivUp(rare01_ct, 2);\n          if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n            return 1;\n          }\n          if (allele_ct != 18) {\n            // Contents may be out-of-range.\n            // (Can optimize this loop later.)\n            const uint32_t max_code = allele_ct - 3;\n            for (uint32_t uii = 0; uii != fvals_byte_ct; ++uii) {\n              const uint32_t cur_byte = fvals[uii];\n              if (unlikely(((cur_byte & 15) > max_code) || ((cur_byte >> 4) > max_code))) {\n                snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (out-of-range allele code).\\n\", vidx);\n                return 1;\n              }\n            }\n          }\n          // Validate trailing bits.\n          const uint32_t rare01_ct_mod2 = rare01_ct % 2;\n          if (rare01_ct_mod2) {\n            if (unlikely((*fread_pp)[-1] >> 4)) {\n              snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (nonzero trailing bits).\\n\", vidx);\n              return 1;\n            }\n          }\n        }\n      } else {\n        // 8-bit entries.\n        if (PtrAddCk(fread_end, rare01_ct, fread_pp)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n          return 1;\n        }\n        // Can optimize this loop later.\n        const uint32_t max_code = allele_ct - 3;\n        for (uint32_t uii = 0; uii != rare01_ct; ++uii) {\n          const uint32_t cur_byte = fvals[uii];\n          if (unlikely(cur_byte > max_code)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic het-ref hardcall track for (0-based) variant #%u (out-of-range allele code).\\n\", vidx);\n            return 1;\n          }\n        }\n      }\n    }\n  }\n  if (aux1b_mode != 15) {\n    if (unlikely(!raw_10_ct)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Multiallelic altxy hardcall track present for (0-based) variant #%u, but no altxy calls exist.\\n\", vidx);\n      return 1;\n    }\n    uint32_t rare10_ct;\n    if (!aux1b_mode) {\n      const uint32_t subset_byte_ct = DivUp(raw_10_ct, CHAR_BIT);\n      if (PtrCheck(fread_end, *fread_pp, subset_byte_ct)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall bitarray-subset for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n      rare10_ct = PopcountBytes(*fread_pp, subset_byte_ct);\n      if (unlikely(!rare10_ct)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Empty multiallelic altxy hardcall bitarray-subset for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n      *fread_pp += subset_byte_ct;\n      const uint32_t raw_10_ct_mod8 = raw_10_ct % 8;\n      if (raw_10_ct_mod8) {\n        if (unlikely((*fread_pp)[-1] >> raw_10_ct_mod8)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Multiallelic altxy hardcall bitarray-subset for (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n          return 1;\n        }\n      }\n    } else {\n      if (unlikely(ValidateAndCountDeltalist(fread_end, sample_ct, fread_pp, deltalist_workspace, &rare10_ct) || (!rare10_ct))) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall deltalist-subset for (0-based) variant #%u.\\n\", vidx);\n        return 1;\n      }\n      for (uint32_t uii = 0; uii != rare10_ct; ++uii) {\n        if (unlikely(GetNyparrEntry(raw_genovec, deltalist_workspace[uii]) != 2)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall deltalist-subset for (0-based) variant #%u (an index doesn't correspond to an altxy call).\\n\", vidx);\n          return 1;\n        }\n      }\n    }\n    const unsigned char* fvals = *fread_pp;\n    uint32_t het_incr;\n    if (allele_ct < 6) {\n      if (allele_ct == 3) {\n        // 1-bit entries.\n        const uint32_t fvals_byte_ct = DivUp(rare10_ct, 8);\n        if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n          return 1;\n        }\n        const uint32_t rare10_ct_mod8 = rare10_ct % 8;\n        if (rare10_ct_mod8) {\n          if (unlikely((*fread_pp)[-1] >> rare10_ct_mod8)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (nonzero trailing bits).\\n\", vidx);\n            return 1;\n          }\n        }\n        het_incr = rare10_ct - PopcountBytes(fvals, fvals_byte_ct);\n      } else {\n        // 2+2 bit entries.\n        const uint32_t fvals_byte_ct = DivUp(rare10_ct, 2);\n        if (PtrAddCk(fread_end, fvals_byte_ct, fread_pp)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n          return 1;\n        }\n        // Can optimize this later.\n        uint64_t nybble_cts[16];\n        ZeroU64Arr(16, nybble_cts);\n        CountAllNybbles64(fvals, rare10_ct, nybble_cts);\n        // 1/1 is invalid here\n        if (nybble_cts[0]) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (out-of-range allele code pair).\\n\", vidx);\n          return 1;\n        }\n        const uint32_t max_code = allele_ct - 2;\n        for (uint32_t hi_code = 0; hi_code != 4; ++hi_code) {\n          uint32_t lo_code = hi_code + 1;\n          if (hi_code > max_code) {\n            lo_code = 0;\n          }\n          const uint64_t* nybble_cts_offset = &(nybble_cts[hi_code * 4]);\n          for (; lo_code != 4; ++lo_code) {\n            if (nybble_cts_offset[lo_code]) {\n              snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (out-of-range allele code pair).\\n\", vidx);\n              return 1;\n            }\n          }\n        }\n        const uintptr_t rarehom_ct = nybble_cts[5] + nybble_cts[10] + nybble_cts[15];\n        het_incr = rare10_ct - rarehom_ct;\n        const uint32_t rare10_ct_mod2 = rare10_ct % 2;\n        if (rare10_ct_mod2) {\n          if (unlikely((*fread_pp)[-1] >> 4)) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (nonzero trailing bits).\\n\", vidx);\n            return 1;\n          }\n        }\n      }\n    } else {\n      if (allele_ct < 18) {\n        // 4+4 bit entries.\n        if (PtrAddCk(fread_end, rare10_ct, fread_pp)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n          return 1;\n        }\n        const uint32_t max_code = allele_ct - 2;\n        het_incr = 0;\n        for (uint32_t uii = 0; uii != rare10_ct; ++uii) {\n          const uint32_t cur_byte = fvals[uii];\n          const uint32_t lo_code = cur_byte & 15;\n          const uint32_t hi_code = cur_byte >> 4;\n          if (unlikely((!hi_code) || (hi_code > max_code) || (lo_code > hi_code))) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (out-of-range or misordered allele code pair).\\n\", vidx);\n            return 1;\n          }\n          het_incr += (lo_code != hi_code);\n        }\n      } else {\n        // 8+8 bit entries\n        if (PtrAddCk(fread_end, 2 * rare10_ct, fread_pp)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (shorter than expected).\\n\", vidx);\n          return 1;\n        }\n        const uint32_t max_code = allele_ct - 2;\n        het_incr = 0;\n        for (uint32_t uii = 0; uii != rare10_ct; ++uii) {\n          const AlleleCode lo_code = fvals[2 * uii];\n          const AlleleCode hi_code = fvals[2 * uii + 1];\n          if (unlikely((!hi_code) || (hi_code > max_code) || (lo_code > hi_code))) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid multiallelic altxy hardcall track for (0-based) variant #%u (out-of-range or misordered allele code pair).\\n\", vidx);\n            return 1;\n          }\n          het_incr += (lo_code != hi_code);\n        }\n      }\n    }\n    *het_ctp += het_incr;\n  }\n  return 0;\n}\n\nBoolErr ValidateHphase(const unsigned char* fread_end, uint32_t vidx, uint32_t het_ct, const unsigned char** fread_pp, char* errstr_buf) {\n  if (unlikely(!het_ct)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Hardcall phase track present for (0-based) variant #%u, but there were no heterozygous calls.\\n\", vidx);\n    return 1;\n  }\n  const uint32_t aux2_first_part_byte_ct = 1 + (het_ct / CHAR_BIT);\n  const unsigned char* aux2_first_part = *fread_pp;\n  if (PtrAddCk(fread_end, aux2_first_part_byte_ct, fread_pp)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid hardcall phase track present for (0-based) variant #%u.\\n\", vidx);\n    return 1;\n  }\n  const uint32_t het_ct_p1_mod8 = (het_ct + 1) % CHAR_BIT;\n  if (het_ct_p1_mod8) {\n    // verify trailing bits are zero\n    if (unlikely((*fread_pp)[-1] >> het_ct_p1_mod8)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Hardcall phase track for (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n      return 1;\n    }\n  }\n  if (!((*aux2_first_part) & 1)) {\n    // phase always present, \"first part\" is only part\n    return 0;\n  }\n  const uint32_t phasepresent_ct = PopcountBytes(aux2_first_part, aux2_first_part_byte_ct) - 1;\n  if (unlikely(!phasepresent_ct)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Hardcall phase track for (0-based) variant #%u does not have any actual phase information.\\n\", vidx);\n    return 1;\n  }\n  const uint32_t phaseinfo_byte_ct = DivUp(phasepresent_ct, CHAR_BIT);\n  if (PtrAddCk(fread_end, phaseinfo_byte_ct, fread_pp)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid hardcall phase track present for (0-based) variant #%u.\\n\", vidx);\n    return 1;\n  }\n  const uint32_t phasepresent_ct_mod8 = phasepresent_ct % 8;\n  if (phasepresent_ct_mod8) {\n    if (unlikely((*fread_pp)[-1] >> phasepresent_ct_mod8)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Hardcall phase track for (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n      return 1;\n    }\n  }\n  return 0;\n}\n\nPglErr ValidateDosage16(const unsigned char* fread_end, uint32_t vidx, PgenReaderMain* pgrp, const unsigned char** fread_pp, char* errstr_buf) {\n  // similar to ParseDosage16().  doesn't support multiallelic data yet.\n  const uint32_t vrtype = pgrp->fi.vrtypes[vidx];\n  const uint32_t sample_ct = pgrp->fi.raw_sample_ct;\n  if ((vrtype & 0x60) == 0x40) {\n    // unconditional dosage.  handle separately from other two cases since\n    // 65535 is valid.\n    const unsigned char* dosage_main_read = *fread_pp;\n    if (PtrAddCk(fread_end, sample_ct * sizeof(int16_t), fread_pp)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid unconditional dosage track for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n    // todo: verify genotype and dosage are consistent\n    for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n      uint16_t cur_dosage_val_p1;\n      CopyFromUnalignedOffsetU16(&cur_dosage_val_p1, dosage_main_read, sample_idx);\n      cur_dosage_val_p1 += 1;  // intentional overflow on 65535\n      if (unlikely(cur_dosage_val_p1 > 32769)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid unconditional dosage track for (0-based) variant #%u (dosage is greater than 2).\\n\", vidx);\n        return kPglRetMalformedInput;\n      }\n    }\n    if (vrtype & 0x80) {\n      const unsigned char* dphase_delta_read = *fread_pp;\n      if (PtrAddCk(fread_end, sample_ct * sizeof(int16_t), fread_pp)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid unconditional phased-dosages for (0-based) variant #%u.\\n\", vidx);\n        return kPglRetMalformedInput;\n      }\n      for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {\n        uint16_t dosage_val;\n        CopyFromUnalignedOffsetU16(&dosage_val, dosage_main_read, sample_idx);\n        int16_t dphase_delta_val;\n        CopyFromUnalignedOffsetI16(&dphase_delta_val, dphase_delta_read, sample_idx);\n        const uint16_t dpiece0_x2 = dosage_val + dphase_delta_val;\n        const uint16_t dpiece1_x2 = dosage_val - dphase_delta_val;\n        // Update (11 May 2018): parity condition removed.\n        if ((dpiece0_x2 > 32768) || (dpiece1_x2 > 32768)) {\n          if (unlikely((dphase_delta_val != -32768) || (dosage_val != 65535))) {\n            snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid unconditional phased-dosages for (0-based) variant #%u.\\n\", vidx);\n            return kPglRetMalformedInput;\n          }\n        }\n      }\n    }\n    return kPglRetSuccess;\n  }\n  uint32_t dosage_ct;\n  if ((vrtype & 0x60) == 0x20) {\n    // dosage list\n    if (unlikely(ValidateAndCountDeltalist(fread_end, sample_ct, fread_pp, nullptr, &dosage_ct))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid dosage list for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n  } else {\n    const uint32_t sample_ctb = DivUp(sample_ct, CHAR_BIT);\n    if (PtrCheck(fread_end, *fread_pp, sample_ctb)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid dosage subset for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n    dosage_ct = PopcountBytes(*fread_pp, sample_ctb);\n    *fread_pp += sample_ctb;\n    const uint32_t sample_ct_mod8 = sample_ct % 8;\n    if (sample_ct_mod8) {\n      if (unlikely((*fread_pp)[-1] >> sample_ct_mod8)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Dosage subset bitarray for (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n        return kPglRetMalformedInput;\n      }\n    }\n  }\n  const unsigned char* dosage_main_read = *fread_pp;\n  if (PtrAddCk(fread_end, dosage_ct * sizeof(int16_t), fread_pp)) {\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid dosage track for (0-based) variant #%u.\\n\", vidx);\n    return kPglRetMalformedInput;\n  }\n  for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n    uint16_t cur_dosage_u16;\n    CopyFromUnalignedOffsetU16(&cur_dosage_u16, dosage_main_read, dosage_idx);\n    if (unlikely(cur_dosage_u16 > 32768)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid dosage track for (0-based) variant #%u (dosage is greater than 2).\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n  }\n  if (vrtype & 0x80) {\n    const unsigned char* file_dphase_present = *fread_pp;\n    const uint32_t dphase_present_byte_ct = DivUp(dosage_ct, CHAR_BIT);\n    if (PtrAddCk(fread_end, dphase_present_byte_ct, fread_pp)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid phased-dosage track for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n    const uint32_t trailing_bit_ct = dosage_ct % CHAR_BIT;\n    if (unlikely(trailing_bit_ct && ((*fread_pp)[-1] & (255 << trailing_bit_ct)))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid phased-dosage track for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n    const unsigned char* dosage_main_read_biter = dosage_main_read;\n    const unsigned char* dphase_delta_read_biter = *fread_pp;\n    const uint32_t dphase_widx_last = (dphase_present_byte_ct - 1) / kBytesPerWord;\n    uint32_t loop_end = kBitsPerWord;\n    for (uint32_t dphase_widx = 0; ; ++dphase_widx) {\n      uintptr_t ww;\n      if (dphase_widx >= dphase_widx_last) {\n        if (dphase_widx > dphase_widx_last) {\n          break;\n        }\n        loop_end = 1 + ((dosage_ct - 1) % kBitsPerWord);\n        const uint32_t final_byte_ct = DivUp(loop_end, CHAR_BIT);\n        ww = SubwordLoad(&(file_dphase_present[dphase_widx * kBytesPerWord]), final_byte_ct);\n      } else {\n        CopyFromUnalignedOffsetW(&ww, file_dphase_present, dphase_widx);\n      }\n      for (uint32_t dphase_lowbits = 0; dphase_lowbits != loop_end; ++dphase_lowbits, dosage_main_read_biter += sizeof(int16_t)) {\n        if (!((ww >> dphase_lowbits) & 1)) {\n          continue;\n        }\n        uint16_t dosage_val;\n        CopyFromUnalignedU16(&dosage_val, dosage_main_read_biter);\n        int16_t dphase_delta_val;\n        CopyFromUnalignedIncrI16(&dphase_delta_val, &dphase_delta_read_biter);\n        const uint16_t dpiece0_x2 = dosage_val + dphase_delta_val;\n        const uint16_t dpiece1_x2 = dosage_val - dphase_delta_val;\n        if (unlikely((dpiece0_x2 > 32768) || (dpiece1_x2 > 32768))) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid phased-dosage track for (0-based) variant #%u.\\n\", vidx);\n          return kPglRetMalformedInput;\n        }\n      }\n    }\n    if (unlikely(dphase_delta_read_biter == *fread_pp)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid phased-dosage track for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n    *fread_pp = dphase_delta_read_biter;\n    if (PtrCheck(fread_end, *fread_pp, 0)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid phased-dosage track for (0-based) variant #%u.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n  }\n  return kPglRetSuccess;\n}\n\nstatic_assert(kPglVblockSize == 65536, \"PgrValidate() needs to have an error message updated.\");\nPglErr PgrValidate(PgenReader* pgr_ptr, uintptr_t* genovec_buf, char* errstr_buf) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  // Performs all validation which isn't done by pgfi_init_phase{1,2}() and\n  // PgrInit().\n  const uintptr_t* allele_idx_offsets = pgrp->fi.allele_idx_offsets;\n  const uint32_t variant_ct = pgrp->fi.raw_variant_ct;\n  const uint32_t sample_ct = pgrp->fi.raw_sample_ct;\n  const uint32_t const_vrtype = pgrp->fi.const_vrtype;\n  if (const_vrtype != UINT32_MAX) {\n    if (unlikely(allele_idx_offsets && (allele_idx_offsets[variant_ct] != 2 * variant_ct))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: .pvar file contains multiallelic variant(s), but .%s file does not.\\n\", (const_vrtype == kPglVrtypePlink1)? \"bed\" : \"pgen\");\n      return kPglRetInconsistentInput;\n    }\n    // const uintptr_t const_vrec_width = pgrp->fi.const_vrec_width;\n    if ((!const_vrtype) || (const_vrtype == kPglVrtypePlink1)) {\n      // only thing that can go wrong is nonzero trailing bits\n      const uint32_t dbl_sample_ct_mod4 = 2 * (sample_ct % 4);\n      if (!dbl_sample_ct_mod4) {\n        return kPglRetSuccess;\n      }\n      for (uint32_t vidx = 0; vidx != variant_ct; ++vidx) {\n        const unsigned char* fread_ptr;\n        const unsigned char* fread_end = nullptr;\n        if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n          FillPgenReadErrstrFromErrno(errstr_buf);\n          return kPglRetReadFail;\n        }\n        const uint32_t last_byte_in_record = fread_end[-1];\n        if (unlikely(last_byte_in_record >> dbl_sample_ct_mod4)) {\n          snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Last byte of (0-based) variant #%u has nonzero trailing bits.\\n\", vidx);\n          return kPglRetMalformedInput;\n        }\n      }\n      return kPglRetSuccess;\n    }\n    // todo: 16-bit dosage entries can't be in [32769,65534]\n    snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Validation of fixed-width dosage formats is not implemented yet.\\n\");\n    return kPglRetNotYetSupported;\n  }\n  const unsigned char* vrtypes = pgrp->fi.vrtypes;\n  for (uint32_t vidx = 0; vidx < variant_ct; vidx += kPglVblockSize) {\n    if (unlikely(VrtypeLdCompressed(vrtypes[vidx]))) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: (0-based) variant #%u is LD-compressed; this is prohibited when the variant index is a multiple of 65536.\\n\", vidx);\n      return kPglRetMalformedInput;\n    }\n  }\n  // file size may not be validated yet.\n  uint64_t fsize;\n  FILE* ff = pgrp->ff;\n  if (unlikely(fseeko(ff, 0, SEEK_END))) {\n    FillPgenReadErrstrFromNzErrno(errstr_buf);\n    return kPglRetReadFail;\n  }\n  fsize = ftello(ff);\n  pgrp->fp_vidx = 1;  // force fseek when loading first variant\n  // todo: verify equality if no mode-0x11 footer; and if there is a footer,\n  // validate it\n  const uint64_t expected_fsize_min = pgrp->fi.var_fpos[variant_ct];\n  if (unlikely(expected_fsize_min > fsize)) {\n    char* write_iter = strcpya_k(errstr_buf, \"Error: .pgen header indicates that file size should be at least \");\n    write_iter = i64toa(expected_fsize_min, write_iter);\n    write_iter = strcpya_k(write_iter, \" bytes, but actual file size is \");\n    write_iter = i64toa(fsize, write_iter);\n    strcpy_k(write_iter, \" bytes.\\n\");\n    return kPglRetMalformedInput;\n  }\n  const uint32_t vblock_ct = DivUp(variant_ct, kPglVblockSize);\n  uint32_t header_ctrl = 0;\n  if (unlikely(fseeko(ff, 11, SEEK_SET))) {\n    FillPgenReadErrstrFromNzErrno(errstr_buf);\n    return kPglRetReadFail;\n  }\n  header_ctrl = getc_unlocked(ff);\n  if (unlikely(header_ctrl > 255)) {\n    FillPgenReadErrstr(ff, errstr_buf);\n    return kPglRetReadFail;\n  }\n  for (uint32_t vblock_idx = 0; vblock_idx != vblock_ct; ++vblock_idx) {\n    uint64_t vblock_start_fpos;\n    if (unlikely(!fread_unlocked(&vblock_start_fpos, sizeof(int64_t), 1, ff))) {\n      FillPgenReadErrstr(ff, errstr_buf);\n      return kPglRetReadFail;\n    }\n    if (unlikely(vblock_start_fpos != pgrp->fi.var_fpos[vblock_idx * kPglVblockSize])) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: .pgen header vblock-start index is inconsistent with variant record length index.\\n\");\n      return kPglRetMalformedInput;\n    }\n  }\n  const uint32_t vrtype_and_fpos_storage = header_ctrl & 15;\n  const uint32_t alt_allele_ct_byte_ct = (header_ctrl >> 4) & 3;\n  const uint32_t nonref_flags_stored = ((header_ctrl >> 6) == 3);\n\n  // does not include vrtypes yet\n  uint64_t vblock_index_byte_ct = kPglVblockSize * (1 + (vrtype_and_fpos_storage & 3) + alt_allele_ct_byte_ct);\n  if (nonref_flags_stored) {\n    vblock_index_byte_ct += kPglVblockSize / CHAR_BIT;\n  }\n  uint64_t last_vrtype_byte_offset = 0;\n  uint32_t trailing_shift = 4;\n  if (vrtype_and_fpos_storage & 8) {\n    vblock_index_byte_ct += kPglVblockSize >> (10 - vrtype_and_fpos_storage);\n    if (vrtype_and_fpos_storage == 8) {\n      const uint32_t variant_ct_mod4 = variant_ct % 4;\n      if (variant_ct_mod4) {\n        last_vrtype_byte_offset = 20 + (vblock_ct - 1) * (vblock_index_byte_ct + sizeof(int64_t)) + ((variant_ct % kPglVblockSize) / 4);\n        trailing_shift = variant_ct_mod4 * 2;\n      }\n    } else {\n      assert(vrtype_and_fpos_storage == 9);\n      if (variant_ct % 2) {\n        last_vrtype_byte_offset = 20 + (vblock_ct - 1) * (vblock_index_byte_ct + sizeof(int64_t)) + ((variant_ct % kPglVblockSize) / 2);\n      }\n    }\n  } else if (!(vrtype_and_fpos_storage & 4)) {\n    vblock_index_byte_ct += kPglVblockSize / 2;\n    if (variant_ct % 2) {\n      // bugfix (22 Nov 2017): forgot to add offset in last block\n      last_vrtype_byte_offset = 20 + (vblock_ct - 1) * (vblock_index_byte_ct + sizeof(int64_t)) + ((variant_ct % kPglVblockSize) / 2);\n    }\n    /*\n  } else {\n    vblock_index_byte_ct += kPglVblockSize;\n    */\n  }\n  if (last_vrtype_byte_offset) {\n    uint32_t last_vrtype_byte = 0;\n    if (unlikely(fseeko(ff, last_vrtype_byte_offset, SEEK_SET))) {\n      FillPgenReadErrstrFromNzErrno(errstr_buf);\n      return kPglRetReadFail;\n    }\n    last_vrtype_byte = getc_unlocked(ff);\n    if (unlikely(last_vrtype_byte > 255)) {\n      FillPgenReadErrstr(ff, errstr_buf);\n      return kPglRetReadFail;\n    }\n    if (unlikely(last_vrtype_byte >> trailing_shift)) {\n      snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Nonzero trailing bits in last vrtype index byte.\\n\");\n      return kPglRetMalformedInput;\n    }\n  }\n  const uintptr_t* nonref_flags = pgrp->fi.nonref_flags;\n  if (nonref_flags) {\n    const uint32_t variant_ct_modl = variant_ct % kBitsPerWord;\n    if (variant_ct % CHAR_BIT) {\n      if (unlikely(nonref_flags[variant_ct / kBitsPerWord] >> variant_ct_modl)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Nonzero trailing bits in last nonref_flags byte.\\n\");\n        return kPglRetMalformedInput;\n      }\n    }\n  }\n\n  // could move most of this into plink2_common and make it multithreaded, if\n  // speed is ever an issue.\n  uint32_t allele_ct = 2;\n  for (uint32_t vidx = 0; vidx != variant_ct; ++vidx) {\n    const unsigned char* fread_ptr;\n    const unsigned char* fread_end;\n    if (unlikely(InitReadPtrs(vidx, pgrp, &fread_ptr, &fread_end))) {\n      FillPgenReadErrstrFromErrno(errstr_buf);\n      return kPglRetReadFail;\n    }\n    const unsigned char* fread_ptr_start = fread_ptr;\n    if (unlikely(ValidateGeno(fread_end, vidx, pgrp, &fread_ptr, genovec_buf, errstr_buf))) {\n      return kPglRetMalformedInput;\n    }\n    ZeroTrailingNyps(sample_ct, genovec_buf);\n    const uint32_t vrtype = vrtypes[vidx];\n    uint32_t het_ct = CountNyp(genovec_buf, kMask5555, sample_ct);\n    if (allele_idx_offsets) {\n      allele_ct = allele_idx_offsets[vidx + 1] - allele_idx_offsets[vidx];\n    }\n    if (VrtypeMultiallelicHc(vrtype)) {\n      if (unlikely(ValidateMultiallelicHc(fread_end, genovec_buf, vidx, allele_ct, pgrp, &fread_ptr, &het_ct, errstr_buf))) {\n        return kPglRetMalformedInput;\n      }\n    }\n    // don't need genovec_buf to store main genotypes past this point.\n    if (VrtypeHphase(vrtype)) {\n      if (unlikely(ValidateHphase(fread_end, vidx, het_ct, &fread_ptr, errstr_buf))) {\n        return kPglRetMalformedInput;\n      }\n    }\n    if (vrtype & 0xe0) {\n      if (unlikely((vrtype & 0xe0) == 0x80)) {\n        snprintf(errstr_buf, kPglErrstrBufBlen, \"Error: Invalid record type for (0-based) variant #%u (phased dosage bit set, but main dosage bits unset).\\n\", vidx);\n        return kPglRetMalformedInput;\n      }\n      PglErr reterr = ValidateDosage16(fread_end, vidx, pgrp, &fread_ptr, errstr_buf);\n      if (unlikely(reterr)) {\n        return reterr;\n      }\n    }\n    if (unlikely(fread_ptr != fread_end)) {\n      // possible todo: tolerate this at the end of a vblock.\n      char* write_iter = strcpya_k(errstr_buf, \"Error: Extra byte(s) in (0-based) variant record #\");\n      write_iter = u32toa(vidx, write_iter);\n      write_iter = strcpya_k(write_iter, \". (record type = \");\n      write_iter = u32toa(vrtype, write_iter);\n      write_iter = strcpya_k(write_iter, \"; expected length = \");\n      write_iter = wtoa(S_CAST(uintptr_t, fread_ptr - fread_ptr_start), write_iter);\n      write_iter = strcpya_k(write_iter, \", actual = \");\n      write_iter = wtoa(S_CAST(uintptr_t, fread_end - fread_ptr_start), write_iter);\n      memcpy_k(write_iter, \")\\n\\0\", 4);\n      return kPglRetMalformedInput;\n    }\n  }\n  return kPglRetSuccess;\n}\n\n\nBoolErr CleanupPgfi(PgenFileInfo* pgfip, PglErr* reterrp) {\n  // memory is the responsibility of the caller\n  if (pgfip->shared_ff) {\n    BoolErr pgi_fclose_err = 0;\n    if (pgfip->pgi_ff) {\n      pgi_fclose_err = fclose_null(&pgfip->pgi_ff);\n    }\n    if (unlikely(fclose_null(&pgfip->shared_ff) || pgi_fclose_err)) {\n      if (*reterrp == kPglRetSuccess) {\n        *reterrp = kPglRetReadFail;\n        return 1;\n      }\n    }\n  } else {\n    assert(!pgfip->pgi_ff);\n  }\n  return 0;\n}\n\nBoolErr CleanupPgr(PgenReader* pgr_ptr, PglErr* reterrp) {\n  PgenReaderMain* pgrp = GetPgrp(pgr_ptr);\n  // assume file is open if pgr.ff is not null\n  // memory is the responsibility of the caller for now\n  if (!pgrp->ff) {\n    return 0;\n  }\n  if (fclose_null(&(pgrp->ff))) {\n    if (*reterrp == kPglRetSuccess) {\n      *reterrp = kPglRetReadFail;\n      return 1;\n    }\n  }\n  return 0;\n}\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n"
  },
  {
    "path": "external_libs/pgenlib/include/pgenlib_read.h",
    "content": "#ifndef __PGENLIB_READ_H__\n#define __PGENLIB_READ_H__\n\n// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n// pgenlib_read contains reader-specific code.\n\n#include \"pgenlib_misc.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\nFLAGSET_DEF_START()\n  kfPgrLdcache0,\n  kfPgrLdcacheNyp = (1 << 0),\n  kfPgrLdcacheDifflist = (1 << 1),\n  kfPgrLdcacheRawNyp = (1 << 2),\n  // may also want RawDifflist\n  kfPgrLdcacheBasicGenocounts = (1 << 3)\nFLAGSET_DEF_END(PgrLdcacheFlags);\n\n// PgenFileInfo and PgenReader are the main exported \"classes\".\n// Exported functions involving these data structure should all have\n// \"pgfi\"/\"pgr\" in their names.\n\n// Note that this can be default-copied.\ntypedef struct PgenFileInfoStruct {\n  // ----- Header information, constant after initialization -----\n  uint32_t raw_variant_ct;\n  uint32_t raw_sample_ct;\n\n  // 0 if variant records aren't all the same length.\n  // If they are (e.g. PLINK 1 encoding; or vrtype bits 0-5 unset), we just\n  // fseek to\n  //   const_fpos_offset + const_vrec_width * ((uint64_t)variant_idx).\n  uint64_t const_fpos_offset;\n\n  uint32_t const_vrec_width;\n\n  // see below.  positioned here instead of slightly later due to struct\n  // packing behavior.\n  uint32_t const_vrtype;  // 256 for plink 1 encoding, UINT32_MAX for nonconst\n\n  // size (raw_variant_ct + 1), so that the number of bytes of (zero-based)\n  // variant n is var_fpos[n+1] - var_fpos[n].  nullptr if\n  // const_vrec_width is nonzero.\n  // It's not difficult to save some memory here (e.g. unless we're dealing\n  // with >256 TB files, it's trivial to go from 8 bytes down to 6 bytes per\n  // entry), but I doubt that's worth the trouble; let's worry about\n  // O(mn)-or-worse stuff, and on-disk stuff, first.\n  uint64_t* var_fpos;\n\n  // Variant record type codes.\n  // base pointer is null if mode is 0x01-0x04 (const_vrtype != UINT32_MAX).\n  // if not nullptr, required to be length >=\n  //   max(raw_variant_ct + 1, RoundUpPow2(raw_variant_ct, kBytesPerWord))\n  unsigned char* vrtypes;\n\n  // alt allele counts.\n\n  // This can be nullptr if all alt allele counts are 1.\n  // (actually, we store the allele index offsets, so\n  // (allele_idx_offsets[n+1] - allele_idx_offsets[n]) is the number of alleles\n  // for variant n.  Otherwise, we'd need another data structure to support\n  // fast allele name lookup.)\n  uintptr_t* allele_idx_offsets;\n\n  uintptr_t* nonref_flags;\n\n  // If pgr.nonref_flags is nullptr and kfPgenGlobalAllNonref is unset, all\n  // reference alleles are assumed to be correct.\n  PgenGlobalFlags gflags;\n\n  uint32_t max_allele_ct;\n  // uint32_t max_dosage_allele_ct;  // might need this later\n\n  uint32_t extensions_present;\n\n  // if using per-variant fread(), this is non-null during PgenFileInfo\n  // initialization, but it's then \"moved\" to the first Pgen_reader and set to\n  // nullptr.\n  FILE* shared_ff;\n\n  // can only be non-null after PgfiInitPhase1 and before PgfiInitPhase2 /\n  // PgfiInitLoadExts, and only if the external-index-file representation is\n  // used.\n  FILE* pgi_ff;\n\n  const unsigned char* block_base;  // nullptr if using per-variant fread()\n  uint64_t block_offset;\n} PgenFileInfo;\n\ntypedef struct PgenReaderMainStruct {\n  MOVABLE_BUT_NONCOPYABLE(PgenReaderMainStruct);\n  // would like to make this const, but that makes initialization really\n  // annoying in C99\n  struct PgenFileInfoStruct fi;\n\n  // ----- Mutable state -----\n  // If we don't fseek, what's the next variant we'd read?\n  uint32_t fp_vidx;\n\n  // ** per-variant fread()-only **\n  FILE* ff;\n  unsigned char* fread_buf;\n  // ** end per-variant fread()-only **\n\n  // if LD compression is present, cache the last non-LD-compressed variant\n  uint32_t ldbase_vidx;\n\n  // flags indicating which base_variant buffers are populated\n  PgrLdcacheFlags ldbase_stypes;\n\n  uint32_t ldbase_difflist_len;\n\n  // these should be treated as private after initial allocation.\n  // not currently guaranteed to have trailing zeroes.\n  uintptr_t* ldbase_raw_genovec;  // now allocated even with no LD compression\n  uintptr_t* ldbase_genovec;\n  uintptr_t* ldbase_raregeno;\n\n  // when ldbase_difflist_sample_ids[] is initialized, element\n  // [ldbase_difflist_len] must be set to sample_ct.\n  uint32_t* ldbase_difflist_sample_ids;\n\n  // common genotype can be looked up from vrtypes[]\n\n  STD_ARRAY_DECL(uint32_t, 4, ldbase_basic_genocounts);\n\n  // now only allocated if multiallelic variants, phase, and/or dosage present\n  // most commonly used for unsubsetted genovec; all_hets can be computed from\n  // this and patch_10_{set,vals}, and then aux2 can be interpreted.\n  // can also be used for other purposes after we're done processing aux2.\n  uintptr_t* workspace_vec;\n\n  // currently must hold (raw_sample_ct / kPglMaxDifflistLenDivisor)\n  // entries; may need to double the sizes later\n  // some top-level interface functions use these, so several lower-level\n  // functions cannot\n  uintptr_t* workspace_raregeno_vec;\n  uint32_t* workspace_difflist_sample_ids;\n\n  // must hold (raw_sample_ct / kPglMaxDifflistLenDivisor) entries\n  uintptr_t* workspace_raregeno_tmp_loadbuf;\n\n  uintptr_t* workspace_aux1x_present;\n  uint64_t* workspace_imp_r2;  // needed in multiallelic case\n\n  uintptr_t* workspace_all_hets;\n  uintptr_t* workspace_subset;  // currently used for hphase decoding\n\n  uintptr_t* workspace_dosage_present;\n  uintptr_t* workspace_dphase_present;\n\n  // phase set loading (footer track in mode 0x11) unimplemented for now;\n  // should be a sequence of (sample ID, [uint32_t phase set begin, set end),\n  // [set begin, set end), ...).\n} PgenReaderMain;\n\ntypedef struct PgenReaderStruct {\n#ifdef __cplusplus\n  PgenReaderMain& GET_PRIVATE_m() { return m; }\n  PgenReaderMain const& GET_PRIVATE_m() const { return m; }\n private:\n#endif\n  PgenReaderMain m;\n} PgenReader;\n\nCONSTI32(kPglVrtypePlink1, 256);\n\nHEADER_INLINE uint32_t GetPgfiVrtype(const PgenFileInfo* pgfip, uint32_t vidx) {\n  if (pgfip->vrtypes) {\n    return pgfip->vrtypes[vidx];\n  }\n  return pgfip->const_vrtype;\n}\n\nHEADER_INLINE uint64_t GetPgfiFpos(const PgenFileInfo* pgfip, uintptr_t vidx) {\n  if (pgfip->var_fpos) {\n    return pgfip->var_fpos[vidx];\n  }\n  return pgfip->const_fpos_offset + pgfip->const_vrec_width * S_CAST(uint64_t, vidx);\n}\n\nHEADER_INLINE uint32_t GetPgfiVrecWidth(const PgenFileInfo* pgfip, uint32_t vidx) {\n  if (pgfip->var_fpos) {\n    return pgfip->var_fpos[vidx + 1] - pgfip->var_fpos[vidx];\n  }\n  return pgfip->const_vrec_width;\n}\n\nHEADER_INLINE uint32_t PgfiIsSimpleFormat(const PgenFileInfo* pgfip) {\n  return (pgfip->const_vrtype != UINT32_MAX);\n}\n\nHEADER_INLINE uint32_t VrtypeDifflist(uint32_t vrtype) {\n  return (vrtype & 4) && ((vrtype & 3) != 1);\n}\n\nHEADER_INLINE uint32_t VrtypeLdCompressed(uint32_t vrtype) {\n  return (vrtype & 6) == 2;\n}\n\n// Only checks for rarealt-containing hardcall.  Multiallelic dosage may still\n// be present when this returns zero.\nHEADER_INLINE uint32_t VrtypeMultiallelicHc(uint32_t vrtype) {\n  return (vrtype & 8);\n}\n\nHEADER_INLINE uint32_t VrtypeHphase(uint32_t vrtype) {\n  return (vrtype & 0x10);\n}\n\nHEADER_INLINE uint32_t VrtypeAuxTracksPresent(uint32_t vrtype) {\n  return (vrtype & 0x78);\n}\n\nHEADER_INLINE uint32_t VrtypeVariableWidth(uint32_t vrtype) {\n  return (vrtype & 0x3e);\n}\n\nHEADER_INLINE uint32_t VrtypeDosage(uint32_t vrtype) {\n  return (vrtype & 0x60);\n}\n\nstatic_assert(kPglMaxAltAlleleCt <= 254, \"GetAux1xAlleleEntryByteCt() needs to be updated.\");\nHEADER_INLINE uintptr_t GetAux1aAlleleEntryByteCt(uint32_t allele_ct, uint32_t rare01_ct) {\n  assert(allele_ct >= 3);\n  if (allele_ct == 3) {\n    return 0;\n  }\n  if (allele_ct == 4) {\n    return DivUp(rare01_ct, 8);\n  }\n  if (allele_ct <= 6) {\n    return DivUp(rare01_ct, 4);\n  }\n  if (allele_ct <= 18) {\n    return DivUp(rare01_ct, 2);\n  }\n  return rare01_ct;\n}\n\nHEADER_INLINE unsigned char* PgrGetFreadBuf(PgenReader* pgr_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  return pgrp->fread_buf;\n}\n\nHEADER_INLINE unsigned char* PgrGetVrtypes(PgenReader* pgr_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  return pgrp->fi.vrtypes;\n}\n\nHEADER_INLINE uint32_t PgrGetVrtype(const PgenReader* pgr_ptr, uint32_t vidx) {\n  const PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  if (pgrp->fi.vrtypes) {\n    return pgrp->fi.vrtypes[vidx];\n  }\n  return pgrp->fi.const_vrtype;\n}\n\nHEADER_INLINE uintptr_t* PgrGetNonrefFlags(PgenReader* pgr_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  return pgrp->fi.nonref_flags;\n}\n\nHEADER_INLINE PgenGlobalFlags PgrGetGflags(const PgenReader* pgr_ptr) {\n  const PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  return pgrp->fi.gflags;\n}\n\nHEADER_INLINE uint32_t PgrGetMaxAlleleCt(const PgenReader* pgr_ptr) {\n  const PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  return pgrp->fi.max_allele_ct;\n}\n\nHEADER_INLINE void PgrSetFreadBuf(unsigned char* fread_buf, PgenReader* pgr_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  pgrp->fread_buf = fread_buf;\n}\n\nHEADER_INLINE void PgrCopyBaseAndOffset(const PgenFileInfo* pgfip, uint32_t thread_ct, PgenReader** pgr_ptr_arr) {\n  for (uint32_t tidx = 0; tidx != thread_ct; ++tidx) {\n    PgenReaderMain* pgrp = &GET_PRIVATE(*(pgr_ptr_arr[tidx]), m);\n    pgrp->fi.block_base = pgfip->block_base;\n    pgrp->fi.block_offset = pgfip->block_offset;\n  }\n}\n\n// This is necessary when changing sample_include, unless the new query is\n// iterating from the first variant.  (Which can almost never be assumed in\n// plink2 since variant_include[] may not include the first variant.)\nHEADER_INLINE void PgrClearLdCache(PgenReader* pgr_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  pgrp->ldbase_stypes &= kfPgrLdcacheRawNyp;\n\n  // bugfix, LdLoadNecessary() was otherwise claiming that reload wasn't\n  // necessary in certain cases\n  pgrp->ldbase_vidx = 0x80000000U;\n}\n\n// Design change (30 Nov 2019): It is easy to forget to call PgrClearLdCache\n// when changing sample_include.  However, each sample_include change must be\n// accompanied by a sample_include_cumulative_popcounts update.  So, if we\n// define a sample_include_cumulative_popcounts wrapper-type which can only be\n// initialized by a function that also clears a PgenReader LD cache, and modify\n// all PgrGet... functions to require this wrapper-type, the frequency of\n// foot-shooting should go down.\n//\n// The key usage rule is: only use this as a local variable type, and define\n// only one of these per function (unless you're using multiple PgenReaders\n// simultaneously, anyway).  If you're changing the sample-subset when entering\n// and exiting chrY, call PgrSetSampleSubsetIndex on your single\n// PgrSampleSubsetIndex at the time you're crossing a chrY boundary.  Don't\n// define two preinitialized PgrSetSampleSubsetIndexes...\n// (possible todo: if compiling as C++ and NDEBUG isn't defined, add a counter\n// field to PgenReader which is initialized to zero, asserted to be zero and\n// then incremented by PgrSetSampleSubsetIndex, and decremented by the\n// PgrSampleSubsetIndex destructor.)\ntypedef struct PgrSampleSubsetIndexStruct {\n#ifdef __cplusplus\n  const uint32_t*& GET_PRIVATE_cumulative_popcounts() { return cumulative_popcounts; }\n  const uint32_t* const& GET_PRIVATE_cumulative_popcounts() const { return cumulative_popcounts; }\n private:\n#endif\n  const uint32_t* cumulative_popcounts;\n} PgrSampleSubsetIndex;\n\nHEADER_INLINE void PgrSetSampleSubsetIndex(const uint32_t* sample_include_cumulative_popcounts, PgenReader* pgr_ptr, PgrSampleSubsetIndex* pssi_ptr) {\n  GET_PRIVATE(*pssi_ptr, cumulative_popcounts) = sample_include_cumulative_popcounts;\n  PgrClearLdCache(pgr_ptr);\n}\n\nHEADER_INLINE void PgrClearSampleSubsetIndex(PgenReader* pgr_ptr, PgrSampleSubsetIndex* pssi_ptr) {\n  GET_PRIVATE(*pssi_ptr, cumulative_popcounts) = nullptr;\n  if (pgr_ptr) {\n    PgrClearLdCache(pgr_ptr);\n  }\n}\n\nHEADER_INLINE void PgrSetBaseAndOffset0(unsigned char* block_base, uint32_t thread_ct, PgenReader** pgr_ptr_arr) {\n  for (uint32_t tidx = 0; tidx != thread_ct; ++tidx) {\n    PgenReader* pgr_ptr = pgr_ptr_arr[tidx];\n    PgrClearLdCache(pgr_ptr);\n    PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n    pgrp->fi.block_base = block_base;\n    pgrp->fi.block_offset = 0;\n  }\n}\n\n// PgenFileInfo initialization is split into two phases, to decouple\n// plink2's arena allocator from this library.  (obvious todo: provide a simple\n// malloc-using PgenReader constructor for anyone who doesn't want to worry\n// about these details.)\n//\n// Phase 1: Open the .pgen (and .pgen.pgi, if relevant); verify that the\n//   initial bytes are consistent with the file format; load/verify sample and\n//   variant counts, initialize pgfi.const_vrtype, pgfi.const_vrec_width, and\n//   pgfi.const_fpos_offset; determine initial memory allocation requirement.\n//   pgfi_alloc_cacheline_ct does not include allele counts and nonref flags,\n//   since it may be more appropriate to allocate those arrays earlier (during\n//   loading of a .bim-like file).\n//\n//   pgfi.var_fpos is set to nullptr if pgfi.const_vrec_width is nonzero.\n//   pgfi.vrtypes/var_allele_cts are set to nullptr in the plink1-format case.\n//\n//   raw_sample_ct and raw_variant_ct should be UINT32_MAX if not previously\n//   known.\n//\n// Intermission: Caller obtains a block of pgfi_alloc_cacheline_ct * 64 bytes,\n//   64-byte aligned.  The cachealigned_malloc() function can be used for this\n//   purpose.  If necessary, pgfi.allele_idx_offsets and pgfi.nonref_flags\n//   should be pointed at already-loaded data, or allocated so they can be\n//   loaded during phase 2.\n//\n// Phase 2: Initialize most pointers in the PgenReader struct to appropriate\n//   positions in first_alloc.  For modes 0x10-0x11, load pgfi.var_fpos and\n//   pgfi.vrtypes, load/validate pgfi.allele_idx_offsets and pgfi.nonref_flags\n//   if appropriate, and initialize pgfi.gflags, pgfi.max_allele_ct, and\n//   pgfi.max_dosage_allele_ct.\n//\n// Finally, if block-fread mode is being used, pgfi.block_base must be\n//   initialized to point to a memory large enough to handle the largest\n//   pgfi_block_read() operation that will be attempted.\n//   pgfi_blockload_get_cacheline_req() can be used to determine the necessary\n//   buffer size.\n\n// This type may change if we introduce a more read-optimized format in the\n// future.  Right now it just tracks the presence/absence of two optional\n// pieces of information: allele counts and nonref flags.\ntypedef uint32_t PgenHeaderCtrl;\n\nvoid PreinitPgfi(PgenFileInfo* pgfip);\n\n// There are two modes of operation:\n// 1. fread block-load.  Block-load operations are single-threaded, while\n//    decompression/counting is multithreaded.  Appropriate for whole-genome\n//    queries, since even with a SSD, reading from multiple parts of a file\n//    simultaneously doesn't work well.\n// 2. fread single-variant-at-a-time.  Simpler interface than block-load, and\n//    doesn't share its inability to handle multiple queries at a time, but\n//    less performant for CPU-heavy operations on the whole genome.\n// First mode corresponds to use_blockload == 1 in phase2, and second mode\n// corresponds to use_blockload == 0.\n//\n// There was originally a third mmap-based mode, which was removed on 14 Mar\n// 2022.  If you are interested in building e.g. a webserver backend that can\n// address multiple queries in parallel, refer to plink-ng commit c470317,\n// which captures the state of the codebase immediately preceding removal of\n// the mmap mode.\n//\n// Other notes:\n// - If pgi_fname is nullptr but the .pgen has an external index file, the\n//   index file name is assumed to be the .pgen filename with .pgi appended.\n// - pgi_fname is ignored if the .pgen does not have an external index file.\n// - raw_variant_ct must be in [1, 2^31 - 3], and raw_sample_ct must be in [1,\n//   2^31 - 2].\nPglErr PgfiInitPhase1(const char* fname, const char* pgi_fname, uint32_t raw_variant_ct, uint32_t raw_sample_ct, PgenHeaderCtrl* header_ctrl_ptr, PgenFileInfo* pgfip, uintptr_t* pgfi_alloc_cacheline_ct_ptr, char* errstr_buf);\n\n// If allele_cts_already_loaded is set, but they're present in the file,\n// they'll be validated; similarly for nonref_flags_already_loaded.\n//\n// If caller is interested in extensions, they should pass in header_exts\n// and/or footer_exts entries with type_idx set to those of the extensions of\n// interest, and type_idx values in increasing order.  On return, .size values\n// of each entry will be filled when the extension is present, and set to ~0LLU\n// when the extension is absent.\nPglErr PgfiInitPhase2Ex(PgenHeaderCtrl header_ctrl, uint32_t allele_cts_already_loaded, uint32_t nonref_flags_already_loaded, uint32_t use_blockload, uint32_t vblock_idx_start, uint32_t vidx_end, uint32_t* max_vrec_width_ptr, PgenFileInfo* pgfip, unsigned char* pgfi_alloc, PgenExtensionLl* header_exts, PgenExtensionLl* footer_exts, uintptr_t* pgr_alloc_cacheline_ct_ptr, char* errstr_buf);\n\nHEADER_INLINE PglErr PgfiInitPhase2(PgenHeaderCtrl header_ctrl, uint32_t allele_cts_already_loaded, uint32_t nonref_flags_already_loaded, uint32_t use_blockload, uint32_t vblock_idx_start, uint32_t vidx_end, uint32_t* max_vrec_width_ptr, PgenFileInfo* pgfip, unsigned char* pgfi_alloc, uintptr_t* pgr_alloc_cacheline_ct_ptr, char* errstr_buf) {\n  return PgfiInitPhase2Ex(header_ctrl, allele_cts_already_loaded, nonref_flags_already_loaded, use_blockload, vblock_idx_start, vidx_end, max_vrec_width_ptr, pgfip, pgfi_alloc, nullptr, nullptr, pgr_alloc_cacheline_ct_ptr, errstr_buf);\n}\n\n// Expected to be called right after PgfiInitPhase2Ex(), after memory buffers\n// are provided for header_exts / footer_exts entries.\nPglErr PgfiInitLoadExts(PgenHeaderCtrl header_ctrl, PgenFileInfo* pgfip, PgenExtensionLl* header_exts, PgenExtensionLl* footer_exts, char* errstr_buf);\n\nuint64_t GetPgfiLdbaseFpos(const PgenFileInfo* pgfip, uintptr_t vidx);\n\nuint64_t PgfiMultireadGetCachelineReq(const uintptr_t* variant_include, const PgenFileInfo* pgfip, uint32_t variant_ct, uint32_t block_size);\n\n// variant_include can be nullptr; in that case, we simply load all the\n// variants (load_variant_ct must be variant_uidx_end - variant_uidx_start).)\n// IMPORTANT: pgfi.block_offset must be manually copied to each reader for now.\n//   (todo: probably replace pgr.fi with a pointer.  when doing that, need to\n//   ensure multiple per-variant readers still works.)\nPglErr PgfiMultiread(const uintptr_t* variant_include, uint32_t variant_uidx_start, uint32_t variant_uidx_end, uint32_t load_variant_ct, PgenFileInfo* pgfip);\n\n\nvoid PreinitPgr(PgenReader* pgr_ptr);\n\n// Before PgrInit() is called, the caller must obtain a block of\n// pgr_alloc_cacheline_ct * 64 bytes (this value is returned by\n// pgfi_init_phase2), 64-byte aligned; this is the pgr_alloc parameter.\n//\n// There's also a modal usage difference:\n//\n// * Mode 1 (block-fread): There is one PgenFileInfo per file which doesn't\n//   belong to any reader.  After it's initialized, multiple PgenReaders can be\n//   based off of it.  When the PgenFileInfo is destroyed, those PgenReaders\n//   are invalidated and should be destroyed if that hasn't already happened.\n//\n//   fname parameter must be nullptr.\n//\n// * Mode 2 (per-variant fread): Destruction of the original PgenFileInfo\n//   struct does not invalidate any extant PgenReader instances (at least\n//   from pgenlib_read's perspective).  Instead, destruction of the\n//   corresponding memory block or allele_idx_offsets/nonref_flags invalidates\n//   the associated PgenReaders.\n//\n//   The only difference between the first reader and later readers of the same\n//   file is that the first reader steals the shared_ff used to read the\n//   header.\n//\n//   fname parameter must be non-null.\n\n// max_vrec_width ignored when using mode 1.\nPglErr PgrInit(const char* fname, uint32_t max_vrec_width, PgenFileInfo* pgfip, PgenReader* pgr_ptr, unsigned char* pgr_alloc);\n\n// practically all these functions require genovec to be allocated up to\n// vector, not word, boundary\nvoid PgrPlink1ToPlink2InplaceUnsafe(uint32_t sample_ct, uintptr_t* genovec);\n\nvoid PgrPlink2ToPlink1InplaceUnsafe(uint32_t sample_ct, uintptr_t* genovec);\n\n// Function names for the main reader functions were getting ridiculous.\n// New naming scheme:\n// * PgrGet() is the basic two-bit genovec loader.  All ALT alleles are treated\n//   as equivalent.  (00 = hom ref, 01 = het ref, 10 = two alt alleles, 11 =\n//   missing.)\n// * PgrGetInv1() is similar, except that the allele index to treat as REF can\n//   be changed.\n// * PgrGet1() only counts the specified allele.  To minimize inversion costs,\n//   GetInv1() should be called on major alleles and Get1() should be called on\n//   minor ones.\n// * PgrGetM() is the multiallelic loader which doesn't collapse multiple\n//   alleles into one.  This retrieves a sparse form identical to what\n//   PwcAppendMultiallelicSparse takes.\n//   Multiallelic-dosage read functions (PgrReadRaw() included) will probably\n//   fill a 3-part data structure of the following form:\n//   1. Bitarray indicating which samples have at least one rarealt dosage.\n//   2. unsigned char array where, if bits a, b, and c are the only set ones in\n//      the first array, the first three elements of the second array are\n//      rarealt dosage counts (1..255) for those three samples.  (Could also\n//      put those in positions [a], [b], and [c], but that produces worse\n//      memory access locality, and it makes sense to treat multiallelic\n//      dosages as fundamentally sparse.)\n//   3. Let R := MINV(255, allele_ct - 2).\n//      a. Length-(sample_ct x R) array of AlleleCodes.\n//      b. Length-(sample_ct x R) array of uint16_t dosage (or int16_t dphase)\n//         values.\n//      Again we use the sparse representation, with payload values packed at\n//      the beginning.\n//   (--indiv-sort algorithm: initialize an array of uintptr_ts of length\n//   sample_ct where [k] has that sample's start index in the payload arrays.)\n// * PgrGetDifflistOrGenovec() opportunistically returns the sparse genotype\n//   representation ('difflist'), for functions capable of taking advantage of\n//   it.  See SampleCountsThread() in plink2_misc for a usage example.\n// * PgrGetCounts() is equivalent to calling PgrGet() and then counting the\n//   number of 00s, 01s, 10s, and 11s, without the overhead of fully expanding\n//   the compressed data, etc.\n// * P suffix = also returns hardcall-phase information.\n// * D suffix = also returns dosage information.\n// * Dp suffix = also returns hardcall-phase, dosage and phased-dosage\n//   information.\n// * PgrGet2() and PgrGet2P() loads biallelic (possibly phased) hardcalls from\n//   a possibly-multiallelic variant.  Any hardcall where either allele is not\n//   one of the specified two alleles is set to missing.\n//   There is no dosage-supporting version of this because rescaling sucks.\n\n// This will normally extract only the genotype indexes corresponding to set\n// bits in sample_include.  Set sample_ct == raw_sample_ct if you don't want\n// any subsetting to occur (in this case sample_include is ignored, can be\n// nullptr).\n// sample_ct cannot be zero.  Trailing bits of genovec are not zeroed out.\n// Ok if genovec only has space for sample_ct values.\nPglErr PgrGet(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec);\n\n// Loads the specified variant as a difflist if that's more efficient, setting\n// difflist_common_geno to the common genotype value in that case.  Otherwise,\n// genovec is populated and difflist_common_geno is set to UINT32_MAX.\n//\n// max_simple_difflist_len must be smaller than sample_ct.\n//\n// Note that the returned difflist_len can be much larger than\n// max_simple_difflist_len when the variant is LD-encoded; it's bounded by\n//   2 * (raw_sample_ct / kPglMaxDifflistLenDivisor).\n// (probable todo: this interface has... rather sharp edges, even relative to\n// the rest of this low-level library.  Maybe it shouldn't be deleted, but it\n// would be better if there was a function that took a max_difflist_len\n// parameter, and it was safe for difflist_sample_ids to only be allocated up\n// to that length.)\nPglErr PgrGetDifflistOrGenovec(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t max_simple_difflist_len, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uint32_t* __restrict difflist_common_geno_ptr, uintptr_t* __restrict main_raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr);\n\n// genocounts[0] = # hom ref, [1] = # het ref, [2] = two alts, [3] = missing\nPglErr PgrGetCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\n// genocounts[0] = # of hardcalls with two copies of specified allele\n// genocounts[1] = # of hardcalls with exactly one copy of specified allele\n// genocounts[2] = # of hardcalls with no copies\n// genocounts[3] = missing\nPglErr PgrGetInv1Counts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts);\n\nPglErr IMPLPgrGet1(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_countvec);\n\n// Loads a nypvec with counts of a single allele (allele_idx 0 corresponds to\n// the reference allele, allele_idx 1 corresponds to alt1, etc.).  0b11 ==\n// missing call.\n// Note that calling this with allele_idx == 0 is similar to a plink1 load\n// (except with missing == 0b11, of course).\n// todo: provide a difflist interface once anyone wants it.\nHEADER_INLINE PglErr PgrGet1(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_countvec) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGet1(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_countvec);\n}\n\nPglErr IMPLPgrGetInv1(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_invcountvec);\n\nHEADER_INLINE PglErr PgrGetInv1(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_invcountvec) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGetInv1(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_invcountvec);\n}\n\nPglErr IMPLPgrGetInv1DifflistOrGenovec(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t max_simple_difflist_len, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_invcountvec, uint32_t* __restrict difflist_common_geno_ptr, uintptr_t* __restrict main_raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr);\n\nHEADER_INLINE PglErr PgrGetInv1DifflistOrGenovec(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t max_simple_difflist_len, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_invcountvec, uint32_t* __restrict difflist_common_geno_ptr, uintptr_t* __restrict main_raregeno, uint32_t* __restrict difflist_sample_ids, uint32_t* __restrict difflist_len_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGetInv1DifflistOrGenovec(sample_include, sample_include_cumulative_popcounts, sample_ct, max_simple_difflist_len, vidx, allele_idx, pgrp, allele_invcountvec, difflist_common_geno_ptr, main_raregeno, difflist_sample_ids, difflist_len_ptr);\n}\n\nPglErr IMPLPgrGet2(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx0, uint32_t allele_idx1, PgenReaderMain* pgrp, uintptr_t* __restrict genovec);\n\nHEADER_INLINE PglErr PgrGet2(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx0, uint32_t allele_idx1, PgenReader* pgr_ptr, uintptr_t* __restrict genovec) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGet2(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx0, allele_idx1, pgrp, genovec);\n}\n\nvoid PreinitPgv(PgenVariant* pgvp);\n\nPglErr PgrGetM(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp);\n\n// possible todo: add functions which directly support MAF-based queries.  Note\n// that when the difflist representation is used, we can disqualify some\n// low-MAF variants without actually loading the genotype data, since the size\n// of the record puts an upper bound on the alt allele frequency.\n\n// requires trailing bits of genoarr to be zeroed out.\nHEADER_INLINE void PgrDetectGenoarrHetsUnsafe(const uintptr_t*__restrict genoarr, uint32_t raw_sample_ctl2, uintptr_t* __restrict all_hets) {\n  PackWordsToHalfwordsInvmatch(genoarr, kMaskAAAA, raw_sample_ctl2, all_hets);\n  if (raw_sample_ctl2 % 2) {\n    Halfword* __attribute__((may_alias)) all_hets_alias = DowncastWToHW(all_hets);\n    all_hets_alias[raw_sample_ctl2] = 0;\n  }\n}\n\nHEADER_INLINE void PgrDetectGenoarrHets(const uintptr_t* __restrict genoarr, uint32_t raw_sample_ct, uintptr_t* __restrict all_hets) {\n  PackWordsToHalfwordsInvmatch(genoarr, kMaskAAAA, NypCtToWordCt(raw_sample_ct), all_hets);\n  ZeroTrailingBits(raw_sample_ct, all_hets);\n}\n\n// sample_ct > 0.  ok for trailing bits of genoarr to not be zeroed out.\nvoid PgrDetectGenoarrHetsMultiallelic(const uintptr_t* __restrict genoarr, const uintptr_t* __restrict patch_10_set, const AlleleCode* __restrict patch_10_vals, uint32_t raw_sample_ct, uintptr_t* __restrict all_hets);\n\n// cannot assume phaseinfo bit is clear when phasepresent is clear.\nPglErr PgrGetP(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr);\n\nPglErr PgrGet1P(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_countvec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr);\n\nPglErr IMPLPgrGetInv1P(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReaderMain* pgrp, uintptr_t* __restrict allele_invcountvec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr);\n\nHEADER_INLINE PglErr PgrGetInv1P(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_invcountvec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGetInv1P(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, allele_idx, pgrp, allele_invcountvec, phasepresent, phaseinfo, phasepresent_ct_ptr);\n}\n\nPglErr PgrGet2P(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t allele_idx0, uint32_t allele_idx1, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uintptr_t* __restrict phasepresent, uintptr_t* __restrict phaseinfo, uint32_t* __restrict phasepresent_ct_ptr);\n\nPglErr PgrGetMP(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp);\n\nPglErr IMPLPgrGetD(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, uintptr_t* __restrict genovec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr);\n\n// if dosage_present and dosage_main are nullptr, dosage data is ignored\nHEADER_INLINE PglErr PgrGetD(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict genovec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGetD(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, genovec, dosage_present, dosage_main, dosage_ct_ptr);\n}\n\nPglErr PgrGet1D(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, AlleleCode allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_countvec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr);\n\nPglErr PgrGetInv1D(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, AlleleCode allele_idx, PgenReader* pgr_ptr, uintptr_t* __restrict allele_invcountvec, uintptr_t* __restrict dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr);\n\n// When computing either form of imputation-r2, this function requires the\n// variant to be biallelic; PgrGetMDCounts must be called in that multiallelic\n// case.\n// imp_r2_ptr must be non-null when is_minimac3_r2 is set.\nPglErr PgrGetDCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t is_minimac3_r2, PgenReader* pgr_ptr, double* imp_r2_ptr, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* __restrict all_dosages);\n\nPglErr PgrGetMDCounts(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict sample_include_interleaved_vec, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, uint32_t is_minimac3_r2, PgenReader* pgr_ptr, double* __restrict imp_r2_ptr, uint32_t* __restrict het_ctp, STD_ARRAY_REF(uint32_t, 4) genocounts, uint64_t* __restrict all_dosages);\n\nPglErr PgrGetMD(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp);\n\nPglErr IMPLPgrGetDp(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp, PgenVariant* pgvp);\n\n// ok for both dosage_present and dosage_main to be nullptr when no dosage data\n// is present\n// ok for dphase_present/dphase_delta to be nullptr; dphase_ct always set to 0\n// in that case\nHEADER_INLINE PglErr PgrGetDp(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp) {\n  PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);\n  const uint32_t* sample_include_cumulative_popcounts = GET_PRIVATE(pssi, cumulative_popcounts);\n  return IMPLPgrGetDp(sample_include, sample_include_cumulative_popcounts, sample_ct, vidx, pgrp, pgvp);\n}\n\n// pgvp->genovec filled with inverse-counts for specified allele\nPglErr PgrGetInv1Dp(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, AlleleCode allele_idx, PgenReader* pgr_ptr, PgenVariant* pgvp);\n\nPglErr PgrGetMDp(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, PgenVariant* pgvp);\n\n// interface used by --make-pgen, just performs basic LD/difflist decompression\n// to maximize parallelism\nPglErr PgrGetRaw(uint32_t vidx, PgenGlobalFlags read_gflags, PgenReader* pgr_ptr, uintptr_t** loadbuf_iter_ptr, unsigned char* loaded_vrtype_ptr);\n\nPglErr PgrValidate(PgenReader* pgr_ptr, uintptr_t* genovec_buf, char* errstr_buf);\n\n// missingness bit is set iff hardcall is not present (even if dosage info *is*\n// present)\nPglErr PgrGetMissingness(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict missingness, uintptr_t* __restrict genovec_buf);\n\n// either missingness_hc (hardcall) or missingness_dosage must be non-null for\n// now\n// missingness_dosage must be vector-aligned\nPglErr PgrGetMissingnessD(const uintptr_t* __restrict sample_include, PgrSampleSubsetIndex pssi, uint32_t sample_ct, uint32_t vidx, PgenReader* pgr_ptr, uintptr_t* __restrict missingness_hc, uintptr_t* __restrict missingness_dosage, uintptr_t* __restrict hets, uintptr_t* __restrict genovec_buf);\n\n\n// error-return iff reterr was success and was changed to kPglRetReadFail (i.e.\n// an error message should be printed).\nBoolErr CleanupPgfi(PgenFileInfo* pgfip, PglErr* reterrp);\n\nBoolErr CleanupPgr(PgenReader* pgr_ptr, PglErr* reterrp);\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n\n#endif  // __PGENLIB_READ_H__\n"
  },
  {
    "path": "external_libs/pgenlib/include/plink2_base.cc",
    "content": "// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n#include \"plink2_base.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\nuint64_t g_failed_alloc_attempt_size = 0;\n\n#if (((__GNUC__ == 4) && (__GNUC_MINOR__ < 7)) || (__GNUC__ >= 11)) && !defined(__APPLE__)\nBoolErr pgl_malloc(uintptr_t size, void* pp) {\n  *S_CAST(unsigned char**, pp) = S_CAST(unsigned char*, malloc(size));\n  if (likely(*S_CAST(unsigned char**, pp))) {\n    return 0;\n  }\n  g_failed_alloc_attempt_size = size;\n  return 1;\n}\n#endif\n\nBoolErr fwrite_checked(const void* buf, uintptr_t len, FILE* outfile) {\n  while (len > kMaxBytesPerIO) {\n    // OS X fwrite() doesn't support 2GiB+ writes\n    // typical disk block size is 4kb, so 0x7ffff000 is the largest sensible\n    // write size\n    // bugfix (9 Mar 2018): forgot a 'not' here...\n    if (unlikely(!fwrite_unlocked(buf, kMaxBytesPerIO, 1, outfile))) {\n      return 1;\n    }\n    buf = &(S_CAST(const unsigned char*, buf)[kMaxBytesPerIO]);\n    len -= kMaxBytesPerIO;\n  }\n  uintptr_t written_byte_ct = fwrite_unlocked(buf, 1, len, outfile);\n  // must do the right thing when len == 0\n  return (written_byte_ct != len);\n}\n\n/*\nIntErr fread_checked2(void* buf, uintptr_t len, FILE* infile, uintptr_t* bytes_read_ptr) {\n  uintptr_t bytes_read = 0;\n  while (len > kMaxBytesPerIO) {\n    const uintptr_t cur_bytes_read = fread_unlocked(buf, 1, kMaxBytesPerIO, infile);\n    bytes_read += cur_bytes_read;\n    if (cur_bytes_read != kMaxBytesPerIO) {\n      *bytes_read_ptr = bytes_read;\n      return ferror_unlocked(infile);\n    }\n    buf = &(((char*)buf)[kMaxBytesPerIO]);\n    len -= kMaxBytesPerIO;\n  }\n  bytes_read += fread_unlocked(buf, 1, len, infile);\n  *bytes_read_ptr = bytes_read;\n  // could skip ferror_unlocked call if bytes_read == original len\n  return ferror_unlocked(infile);\n}\n*/\n\nBoolErr fread_checked(void* buf, uintptr_t len, FILE* infile) {\n  while (len > kMaxBytesPerIO) {\n    const uintptr_t cur_bytes_read = fread_unlocked(buf, 1, kMaxBytesPerIO, infile);\n    if (unlikely(cur_bytes_read != kMaxBytesPerIO)) {\n      return 1;\n    }\n    buf = &(S_CAST(unsigned char*, buf)[kMaxBytesPerIO]);\n    len -= kMaxBytesPerIO;\n  }\n  const uintptr_t cur_bytes_read = fread_unlocked(buf, 1, len, infile);\n  return (cur_bytes_read != len);\n}\n\n#ifdef __LP64__\nstatic inline BoolErr ScanUintCappedFinish(const char* str_iter, uint64_t cap, uint32_t* valp) {\n  uint64_t val = *valp;\n  while (1) {\n    // a little bit of unrolling seems to help\n    const uint64_t cur_digit = ctou64(*str_iter++) - 48;\n    if (cur_digit >= 10) {\n      break;\n    }\n    // val = val * 10 + cur_digit;\n    const uint64_t cur_digit2 = ctou64(*str_iter++) - 48;\n    if (cur_digit2 >= 10) {\n      val = val * 10 + cur_digit;\n      if (unlikely(val > cap)) {\n        return 1;\n      }\n      break;\n    }\n    val = val * 100 + cur_digit * 10 + cur_digit2;\n    if (unlikely(val > cap)) {\n      return 1;\n    }\n  }\n  *valp = val;\n  return 0;\n}\n\nBoolErr ScanPosintCapped(const char* str_iter, uint64_t cap, uint32_t* valp) {\n  // '0' has ascii code 48\n  assert(ctou32(str_iter[0]) > 32);\n  *valp = ctou32(*str_iter++) - 48;\n  if (*valp >= 10) {\n    // permit leading '+' (ascii 43), but not '++' or '+-'\n    // reasonable to use unlikely() here since these functions aren't used for\n    // numeric vs. non-numeric classification anyway due to erroring out on\n    // overflow\n    if (unlikely(*valp != 0xfffffffbU)) {\n      return 1;\n    }\n    *valp = ctou32(*str_iter++) - 48;\n    if (unlikely(*valp >= 10)) {\n      return 1;\n    }\n  }\n  while (!(*valp)) {\n    *valp = ctou32(*str_iter++) - 48;\n    if (unlikely((*valp) >= 10)) {\n      return 1;\n    }\n  }\n  return ScanUintCappedFinish(str_iter, cap, valp);\n}\n\n// Note that NumericRangeListToBitarr() can call this in an ignore-overflow\n// mode.  If similar logic ever goes into an inner loop, remove all unlikely()\n// annotations in this function and its children.\nBoolErr ScanUintCapped(const char* str_iter, uint64_t cap, uint32_t* valp) {\n  // Reads an integer in [0, cap].  Assumes first character is nonspace.\n  assert(ctou32(str_iter[0]) > 32);\n  uint32_t val = ctou32(*str_iter++) - 48;\n  if (val >= 10) {\n    if (val != 0xfffffffbU) {\n      // '-' has ascii code 45, so unsigned 45 - 48 = 0xfffffffdU\n      if (unlikely((val != 0xfffffffdU) || (*str_iter != '0'))) {\n        return 1;\n      }\n      // accept \"-0\", \"-00\", etc.\n      while (*(++str_iter) == '0');\n      *valp = 0;\n      return (ctou32(*str_iter) - 48) < 10;\n    }\n    // accept leading '+'\n    val = ctou32(*str_iter++) - 48;\n    if (unlikely(val >= 10)) {\n      return 1;\n    }\n  }\n  *valp = val;\n  return ScanUintCappedFinish(str_iter, cap, valp);\n}\n\nBoolErr ScanIntAbsBounded(const char* str_iter, uint64_t bound, int32_t* valp) {\n  // Reads an integer in [-bound, bound].  Assumes first character is nonspace.\n  assert(ctou32(str_iter[0]) > 32);\n  *valp = ctou32(*str_iter++) - 48;\n  int32_t sign = 1;\n  if (ctou32(*valp) >= 10) {\n    if (*valp == -3) {\n      sign = -1;\n    } else if (unlikely(*valp != -5)) {\n      return 1;\n    }\n    *valp = ctou32(*str_iter++) - 48;\n    if (unlikely(*valp >= 10)) {\n      return 1;\n    }\n  }\n  if (unlikely(ScanUintCappedFinish(str_iter, bound, I32ToU32(valp)))) {\n    return 1;\n  }\n  *valp *= sign;\n  return 0;\n}\n#else  // not __LP64__\nBoolErr ScanPosintCapped32(const char* str_iter, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp) {\n  // '0' has ascii code 48\n  assert(ctou32(str_iter[0]) > 32);\n  uint32_t val = ctou32(*str_iter++) - 48;\n  if (val >= 10) {\n    if (unlikely(val != 0xfffffffbU)) {\n      return 1;\n    }\n    val = ctou32(*str_iter++) - 48;\n    if (unlikely(val >= 10)) {\n      return 1;\n    }\n  }\n  while (!val) {\n    val = ctou32(*str_iter++) - 48;\n    if (unlikely(val >= 10)) {\n      return 1;\n    }\n  }\n  for (; ; ++str_iter) {\n    const uint32_t cur_digit = ctou32(*str_iter) - 48;\n    if (cur_digit >= 10) {\n      *valp = val;\n      return 0;\n    }\n    // avoid integer overflow in middle of computation\n    if (unlikely((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10)))) {\n      return 1;\n    }\n    val = val * 10 + cur_digit;\n  }\n}\n\nBoolErr ScanUintCapped32(const char* str_iter, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp) {\n  // Reads an integer in [0, cap].  Assumes first character is nonspace.\n  assert(ctou32(str_iter[0]) > 32);\n  uint32_t val = ctou32(*str_iter++) - 48;\n  if (val >= 10) {\n    if (val != 0xfffffffbU) {\n      if (unlikely((val != 0xfffffffdU) || (*str_iter != '0'))) {\n        return 1;\n      }\n      while (*(++str_iter) == '0');\n      *valp = 0;\n      return (ctou32(*str_iter) - 48) < 10;\n    }\n    val = ctou32(*str_iter++) - 48;\n    if (unlikely(val >= 10)) {\n      return 1;\n    }\n  }\n  for (; ; ++str_iter) {\n    const uint32_t cur_digit = ctou32(*str_iter) - 48;\n    if (cur_digit >= 10) {\n      *valp = val;\n      return 0;\n    }\n    if (unlikely((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10)))) {\n      return 1;\n    }\n    val = val * 10 + cur_digit;\n  }\n}\n\nBoolErr ScanIntAbsBounded32(const char* str_iter, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp) {\n  // Reads an integer in [-bound, bound].  Assumes first character is nonspace.\n  assert(ctou32(str_iter[0]) > 32);\n  uint32_t val = ctou32(*str_iter++) - 48;\n  int32_t sign = 1;\n  if (val >= 10) {\n    if (val == 0xfffffffdU) {\n      sign = -1;\n    } else if (unlikely(val != 0xfffffffbU)) {\n      return 1;\n    }\n    val = ctou32(*str_iter++) - 48;\n    if (unlikely(val >= 10)) {\n      return 1;\n    }\n  }\n  for (; ; ++str_iter) {\n    const uint32_t cur_digit = ctou32(*str_iter) - 48;\n    if (cur_digit >= 10) {\n      *valp = sign * S_CAST(int32_t, val);\n      return 0;\n    }\n    if (unlikely((val >= bound_div_10) && ((val > bound_div_10) || (cur_digit > bound_mod_10)))) {\n      return 1;\n    }\n    val = val * 10 + cur_digit;\n  }\n}\n#endif\n\nBoolErr aligned_malloc(uintptr_t size, uintptr_t alignment, void* aligned_pp) {\n  // Assumes malloc returns word-aligned addresses.\n  assert(alignment);\n  assert(!(alignment % kBytesPerWord));\n  uintptr_t malloc_addr;\n  if (unlikely(pgl_malloc(size + alignment, &malloc_addr))) {\n    return 1;\n  }\n  assert(!(malloc_addr % kBytesPerWord));\n  uintptr_t** casted_aligned_pp = S_CAST(uintptr_t**, aligned_pp);\n  *casted_aligned_pp = R_CAST(uintptr_t*, RoundDownPow2(malloc_addr + alignment, alignment));\n  (*casted_aligned_pp)[-1] = malloc_addr;\n  return 0;\n}\n\n#if defined(USE_SSE2) && !defined(NO_UNALIGNED)\nint32_t memequal(const void* m1, const void* m2, uintptr_t byte_ct) {\n  const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n  const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n  if (byte_ct < 16 + (kBytesPerVec / 2)) {\n    if (byte_ct < kBytesPerWord) {\n      if (byte_ct < 4) {\n        if (byte_ct < 2) {\n          return (!byte_ct) || (m1_uc[0] == m2_uc[0]);\n        }\n        if ((*S_CAST(const uint16_t*, m1)) != (*S_CAST(const uint16_t*, m2))) {\n          return 0;\n        }\n        if ((byte_ct == 3) && (m1_uc[2] != m2_uc[2])) {\n          return 0;\n        }\n        return 1;\n      }\n      if ((*R_CAST(const uint32_t*, m1_uc)) != (*R_CAST(const uint32_t*, m2_uc))) {\n        return 0;\n      }\n      if (byte_ct > 4) {\n        const uintptr_t final_offset = byte_ct - 4;\n        if ((*R_CAST(const uint32_t*, &(m1_uc[final_offset]))) != (*R_CAST(const uint32_t*, &(m2_uc[final_offset])))) {\n          return 0;\n        }\n      }\n      return 1;\n    }\n    const uintptr_t* m1_alias = R_CAST(const uintptr_t*, m1_uc);\n    const uintptr_t* m2_alias = R_CAST(const uintptr_t*, m2_uc);\n    if (m1_alias[0] != m2_alias[0]) {\n      return 0;\n    }\n    if (byte_ct >= 16) {\n      if (m1_alias[1] != m2_alias[1]) {\n        return 0;\n      }\n#  ifdef USE_AVX2\n      if (byte_ct >= 24) {\n        if (m1_alias[2] != m2_alias[2]) {\n          return 0;\n        }\n      }\n#  endif\n    }\n    if (byte_ct % kBytesPerWord) {\n      const uintptr_t final_offset = byte_ct - kBytesPerWord;\n      if ((*R_CAST(const uintptr_t*, &(m1_uc[final_offset]))) != (*R_CAST(const uintptr_t*, &(m2_uc[final_offset])))) {\n        return 0;\n      }\n    }\n    return 1;\n  }\n  // Don't use VecW since _mm_cmpeq_epi64() not defined until SSE4.1.\n  const VecUc* m1_alias = S_CAST(const VecUc*, m1);\n  const VecUc* m2_alias = S_CAST(const VecUc*, m2);\n  const uintptr_t vec_ct = byte_ct / kBytesPerVec;\n  for (uintptr_t vidx = 0; vidx != vec_ct; ++vidx) {\n    // tried unrolling this, doesn't make a difference\n    const VecUc v1 = vecuc_loadu(&(m1_alias[vidx]));\n    const VecUc v2 = vecuc_loadu(&(m2_alias[vidx]));\n    if (vecuc_movemask(v1 == v2) != kVec8thUintMax) {\n      return 0;\n    }\n  }\n  if (byte_ct % kBytesPerVec) {\n    // put this last instead of first, for better behavior when inputs are\n    // aligned\n    const uintptr_t final_offset = byte_ct - kBytesPerVec;\n    const VecUc v1 = vecuc_loadu(&(m1_uc[final_offset]));\n    const VecUc v2 = vecuc_loadu(&(m2_uc[final_offset]));\n    if (vecuc_movemask(v1 == v2) != kVec8thUintMax) {\n      return 0;\n    }\n  }\n  return 1;\n}\n\n// clang/gcc memcmp is not that well-optimized for the short strings we usually\n// compare.\nint32_t Memcmp(const void* m1, const void* m2, uintptr_t byte_ct) {\n  const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n  const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n  // tried larger crossover threshold, doesn't help\n  if (byte_ct < kBytesPerVec) {\n    if (byte_ct < kBytesPerWord) {\n      if (byte_ct < 4) {\n        for (uintptr_t pos = 0; pos != byte_ct; ++pos) {\n          const unsigned char ucc1 = m1_uc[pos];\n          const unsigned char ucc2 = m2_uc[pos];\n          if (ucc1 != ucc2) {\n            return (ucc1 < ucc2)? -1 : 1;\n          }\n        }\n        return 0;\n      }\n      uint32_t m1_u32 = *S_CAST(const uint32_t*, m1);\n      uint32_t m2_u32 = *S_CAST(const uint32_t*, m2);\n      if (m1_u32 != m2_u32) {\n        return (__builtin_bswap32(m1_u32) < __builtin_bswap32(m2_u32))? -1 : 1;\n      }\n      if (byte_ct > 4) {\n        const uintptr_t final_offset = byte_ct - 4;\n        m1_u32 = *R_CAST(const uint32_t*, &(m1_uc[final_offset]));\n        m2_u32 = *R_CAST(const uint32_t*, &(m2_uc[final_offset]));\n        if (m1_u32 != m2_u32) {\n          return (__builtin_bswap32(m1_u32) < __builtin_bswap32(m2_u32))? -1 : 1;\n        }\n      }\n      return 0;\n    }\n    const uintptr_t* m1_alias = R_CAST(const uintptr_t*, m1_uc);\n    const uintptr_t* m2_alias = R_CAST(const uintptr_t*, m2_uc);\n    uintptr_t m1_word = m1_alias[0];\n    uintptr_t m2_word = m2_alias[0];\n    if (m1_word != m2_word) {\n      return (__builtin_bswap64(m1_word) < __builtin_bswap64(m2_word))? -1 : 1;\n    }\n#  ifdef USE_AVX2\n    if (byte_ct >= 16) {\n      m1_word = m1_alias[1];\n      m2_word = m2_alias[1];\n      if (m1_word != m2_word) {\n        return (__builtin_bswap64(m1_word) < __builtin_bswap64(m2_word))? -1 : 1;\n      }\n      if (byte_ct >= 24) {\n        m1_word = m1_alias[2];\n        m2_word = m2_alias[2];\n        if (m1_word != m2_word) {\n          return (__builtin_bswap64(m1_word) < __builtin_bswap64(m2_word))? -1 : 1;\n        }\n      }\n    }\n#  endif\n    if (byte_ct % kBytesPerWord) {\n      const uintptr_t final_offset = byte_ct - kBytesPerWord;\n      m1_word = *R_CAST(const uintptr_t*, &(m1_uc[final_offset]));\n      m2_word = *R_CAST(const uintptr_t*, &(m2_uc[final_offset]));\n      if (m1_word != m2_word) {\n        return (__builtin_bswap64(m1_word) < __builtin_bswap64(m2_word))? -1 : 1;\n      }\n    }\n    return 0;\n  }\n  const VecUc* m1_alias = S_CAST(const VecUc*, m1);\n  const VecUc* m2_alias = S_CAST(const VecUc*, m2);\n  const uintptr_t fullvec_ct = byte_ct / kBytesPerVec;\n  // uh, clang/LLVM -O2 optimizes this better when comparison is != instead of\n  // <?  ugh, time to change all of the for loops...\n  // (and yes, both -O3 configurations generate worse code here)\n  // at least for loop is better than do-while loop even when 1 iteration is\n  // guaranteed...\n  for (uintptr_t vidx = 0; vidx != fullvec_ct; ++vidx) {\n    const VecUc v1 = vecuc_loadu(&(m1_alias[vidx]));\n    const VecUc v2 = vecuc_loadu(&(m2_alias[vidx]));\n    // is this even worthwhile now in non-AVX2 case?\n    const uint32_t movemask_result = vecuc_movemask(v1 == v2);\n    if (movemask_result != kVec8thUintMax) {\n      const uintptr_t diff_pos = vidx * kBytesPerVec + ctzu32(~movemask_result);\n      return (m1_uc[diff_pos] < m2_uc[diff_pos])? -1 : 1;\n    }\n  }\n  if (byte_ct % kBytesPerVec) {\n    const uintptr_t final_offset = byte_ct - kBytesPerVec;\n    const VecUc v1 = vecuc_loadu(&(m1_uc[final_offset]));\n    const VecUc v2 = vecuc_loadu(&(m2_uc[final_offset]));\n    const uint32_t movemask_result = vecuc_movemask(v1 == v2);\n    if (movemask_result != kVec8thUintMax) {\n      const uintptr_t diff_pos = final_offset + ctzu32(~movemask_result);\n      return (m1_uc[diff_pos] < m2_uc[diff_pos])? -1 : 1;\n    }\n  }\n  return 0;\n}\n#endif // defined(USE_SSE2) && !defined(NO_UNALIGNED)\n\nconst uint16_t kDigitPair[] = {\n  0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,\n  0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,\n  0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,\n  0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,\n  0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,\n  0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,\n  0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,\n  0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,\n  0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,\n  0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939};\n\nchar* u32toa(uint32_t uii, char* start) {\n  // Memory-efficient fast integer writer.  (You can do a bit better sometimes\n  // by using a larger lookup table, but on average I doubt that pays off.)\n  // Returns a pointer to the end of the integer (not null-terminated).\n  //\n  // Nearly identical to 'branchlut' from\n  // https://github.com/miloyip/itoa-benchmark , except that the hardcoded\n  // binary search is more balanced (start by comparing 6+ digits vs. <6,\n  // instead of 9+ digits vs. <8).  This tends to be slightly better unless the\n  // integers are almost uniformly distributed over [0, 2^32).\n  //\n  // Todo: compare against an_itoa in https://github.com/appnexus/acf/ .\n  //\n  // (Making the first comparison 7+ digits vs. <7 would seem to make sense,\n  // but it seems to benchmark slightly worse on my Mac?)\n  //\n  // (Since we want to execute different code depending on the number of\n  // digits, the UintSlen() approach doesn't pay off.)\n  uint32_t quotient;\n  if (uii < 100000) {\n    if (uii < 100) {\n      if (uii >= 10) {\n        goto u32toa_just2;\n      }\n      *start++ = '0' + uii;\n      return start;\n    }\n    if (uii < 10000) {\n      if (uii >= 1000) {\n        goto u32toa_just4;\n      }\n      quotient = uii / 100;\n      *start++ = '0' + quotient;\n      goto u32toa_2left;\n    }\n    quotient = uii / 10000;\n    *start++ = '0' + quotient;\n    goto u32toa_4left;\n  }\n  if (uii < 100000000) {\n    if (uii < 1000000) {\n      goto u32toa_just6;\n    }\n    if (uii >= 10000000) {\n      goto u32toa_just8;\n    }\n    quotient = uii / 1000000;\n    *start++ = '0' + quotient;\n    goto u32toa_6left;\n  }\n  quotient = uii / 100000000;\n  if (uii < 1000000000) {\n    *start++ = '0' + quotient;\n  } else {\n    start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n  }\n  uii -= quotient * 100000000;\n u32toa_just8:\n  quotient = uii / 1000000;\n  start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n u32toa_6left:\n  uii -= quotient * 1000000;\n u32toa_just6:\n  quotient = uii / 10000;\n  start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n u32toa_4left:\n  uii -= quotient * 10000;\n u32toa_just4:\n  quotient = uii / 100;\n  start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n u32toa_2left:\n  uii -= quotient * 100;\n u32toa_just2:\n  return memcpya_k(start, &(kDigitPair[uii]), 2);\n}\n\nchar* i64toa(int64_t llii, char* start) {\n  uint64_t ullii = llii;\n  uint64_t top_digits;\n  uint32_t bottom_eight;\n  uint32_t middle_eight;\n  if (llii < 0) {\n    *start++ = '-';\n    ullii = -ullii;\n  }\n  if (ullii <= 0xffffffffLLU) {\n    return u32toa(S_CAST(uint32_t, ullii), start);\n  }\n  top_digits = ullii / 100000000;\n  bottom_eight = S_CAST(uint32_t, ullii - (top_digits * 100000000));\n  if (top_digits <= 0xffffffffLLU) {\n    start = u32toa(S_CAST(uint32_t, top_digits), start);\n    return uitoa_z8(bottom_eight, start);\n  }\n  ullii = top_digits / 100000000;\n  middle_eight = S_CAST(uint32_t, top_digits - (ullii * 100000000));\n  start = u32toa(S_CAST(uint32_t, ullii), start);\n  start = uitoa_z8(middle_eight, start);\n  return uitoa_z8(bottom_eight, start);\n}\n\n#if defined(USE_SSE2) && !defined(NO_UNALIGNED)\nuintptr_t FirstUnequal4(const void* arr1, const void* arr2, uintptr_t nbytes) {\n  // Similar to memequal().\n  if (nbytes < kBytesPerVec) {\n    if (nbytes < kBytesPerWord) {\n      uint32_t xor_result = (*S_CAST(const uint32_t*, arr1)) ^ (*S_CAST(const uint32_t*, arr2));\n      if (xor_result) {\n        return ctzu32(xor_result) / CHAR_BIT;\n      }\n      if (nbytes > 4) {\n        const uintptr_t final_offset = nbytes - 4;\n        const char* s1 = S_CAST(const char*, arr1);\n        const char* s2 = S_CAST(const char*, arr2);\n        xor_result = (*R_CAST(const uint32_t*, &(s1[final_offset]))) ^ (*R_CAST(const uint32_t*, &(s2[final_offset])));\n        if (xor_result) {\n          return final_offset + ctzu32(xor_result) / CHAR_BIT;\n        }\n      }\n      return nbytes;\n    }\n    const uintptr_t* arr1_alias = S_CAST(const uintptr_t*, arr1);\n    const uintptr_t* arr2_alias = S_CAST(const uintptr_t*, arr2);\n    const uintptr_t word_ct = nbytes / kBytesPerWord;\n    for (uint32_t widx = 0; widx != word_ct; ++widx) {\n      const uintptr_t xor_result = arr1_alias[widx] ^ arr2_alias[widx];\n      if (xor_result) {\n        return widx * kBytesPerWord + ctzw(xor_result) / CHAR_BIT;\n      }\n    }\n    if (nbytes % kBytesPerWord) {\n      const uintptr_t final_offset = nbytes - kBytesPerWord;\n      const char* s1 = S_CAST(const char*, arr1);\n      const char* s2 = S_CAST(const char*, arr2);\n      const uintptr_t xor_result = (*R_CAST(const uintptr_t*, &(s1[final_offset]))) ^ (*R_CAST(const uintptr_t*, &(s2[final_offset])));\n      if (xor_result) {\n        return final_offset + ctzw(xor_result) / CHAR_BIT;\n      }\n    }\n    return nbytes;\n  }\n  const VecUc* arr1_alias = S_CAST(const VecUc*, arr1);\n  const VecUc* arr2_alias = S_CAST(const VecUc*, arr2);\n  const uintptr_t vec_ct = nbytes / kBytesPerVec;\n  for (uintptr_t vidx = 0; vidx != vec_ct; ++vidx) {\n    const VecUc v1 = vecuc_loadu(&(arr1_alias[vidx]));\n    const VecUc v2 = vecuc_loadu(&(arr2_alias[vidx]));\n    const uint32_t eq_result = vecw_movemask(v1 == v2);\n    if (eq_result != kVec8thUintMax) {\n      return vidx * kBytesPerVec + ctzu32(~eq_result);\n    }\n  }\n  if (nbytes % kBytesPerVec) {\n    const uintptr_t final_offset = nbytes - kBytesPerVec;\n    const char* s1 = S_CAST(const char*, arr1);\n    const char* s2 = S_CAST(const char*, arr2);\n    const VecW v1 = vecw_loadu(&(s1[final_offset]));\n    const VecW v2 = vecw_loadu(&(s2[final_offset]));\n    const uint32_t eq_result = vecw_movemask(v1 == v2);\n    if (eq_result != kVec8thUintMax) {\n      return final_offset + ctzu32(~eq_result);\n    }\n  }\n  return nbytes;\n}\n#else // !(defined(USE_SSE2) && !defined(NO_UNALIGNED))\nuintptr_t FirstUnequalW(const void* arr1, const void* arr2, uintptr_t nbytes) {\n  const unsigned char* arr1b = S_CAST(const unsigned char*, arr1);\n  const unsigned char* arr2b = S_CAST(const unsigned char*, arr2);\n  const uintptr_t word_ct = nbytes / kBytesPerWord;\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    uintptr_t arr1_word;\n    uintptr_t arr2_word;\n    CopyFromUnalignedOffsetW(&arr1_word, arr1b, widx);\n    CopyFromUnalignedOffsetW(&arr2_word, arr2b, widx);\n    const uintptr_t xor_result = arr1_word ^ arr2_word;\n    if (xor_result) {\n      return widx * kBytesPerWord + ctzw(xor_result) / CHAR_BIT;\n    }\n  }\n  if (nbytes % kBytesPerWord) {\n    const uintptr_t final_offset = nbytes - kBytesPerWord;\n    uintptr_t arr1_word;\n    uintptr_t arr2_word;\n    CopyFromUnalignedW(&arr1_word, &(arr1b[final_offset]));\n    CopyFromUnalignedW(&arr2_word, &(arr2b[final_offset]));\n    const uintptr_t xor_result = arr1_word ^ arr2_word;\n    if (xor_result) {\n      return final_offset + ctzw(xor_result) / CHAR_BIT;\n    }\n  }\n  return nbytes;\n}\n#endif\n\n#ifdef __LP64__\nuintptr_t CountVintsNonempty(const unsigned char* buf, const unsigned char* buf_end) {\n  const uintptr_t starting_addr = R_CAST(uintptr_t, buf);\n  const VecUc* buf_viter = R_CAST(const VecUc*, RoundDownPow2(starting_addr, kBytesPerVec));\n  const uintptr_t ending_addr = R_CAST(uintptr_t, buf_end);\n  const VecUc* buf_vlast = R_CAST(const VecUc*, RoundDownPow2(ending_addr - 1, kBytesPerVec));\n  const uint32_t leading_byte_ct = starting_addr - R_CAST(uintptr_t, buf_viter);\n  Vec8thUint vint_ends = (UINT32_MAX << leading_byte_ct) & (~vecuc_movemask(*buf_viter));\n  uintptr_t total = 0;\n  while (buf_viter != buf_vlast) {\n    total += PopcountVec8thUint(vint_ends);\n    ++buf_viter;\n    vint_ends = ~vecuc_movemask(*buf_viter);\n  }\n  const uint32_t trailing_byte_ct = ending_addr - R_CAST(uintptr_t, buf_vlast);\n  vint_ends &= (k1LU << trailing_byte_ct) - 1;\n  total += PopcountVec8thUint(vint_ends);\n  return total;\n}\n#else\nuintptr_t CountVints(const unsigned char* buf, const unsigned char* buf_end) {\n  // Could check one word at a time.\n  const uintptr_t len = buf_end - buf;\n  uintptr_t inv_result = 0;\n  for (uintptr_t ulii = 0; ulii != len; ++ulii) {\n    inv_result += buf[ulii] >> 7;\n  }\n  return len - inv_result;\n}\n#endif\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n"
  },
  {
    "path": "external_libs/pgenlib/include/plink2_base.h",
    "content": "#ifndef __PLINK2_BASE_H__\n#define __PLINK2_BASE_H__\n\n// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n// Low-level C99/C++03/C++11 library covering basic I/O, SWAR/SIMD, and\n// Windows/OS X/Linux portability.  We try to benefit from as much C++ type\n// safety as we can without either breaking compatibility with C-only codebases\n// or making extension of pgenlib/plink2 code more difficult than the old\n// type-unsafe style.\n//\n// Parameter conventions:\n// - Input parameters, then in/out, then pure outputs, then temporary buffers.\n//   Reference-style input parameters tend to go in the very front, to make it\n//   more obvious that they aren't in/out.\n// - \"bitarr\" indicates a word-aligned, packed array of bits, while \"bitvec\"\n//   indicates vector-alignment in 64-bit builds.  (\"vector\" always means SIMD\n//   inputs/outputs here; C++ std::vector is not used in this codebase.)\n// - Most pointers are stationary; moving pointers have an _iter suffix.\n//\n// Type-choice guidelines:\n// - Integers are unsigned by default, signed only when necessary.\n//   It's necessary to choose one or the other to avoid drowning in a sea of\n//   casts and unexpected behavior.  Each choice has its own surprising\n//   pitfalls that the developer had better be aware of; and I definitely do\n//   not take the position that unsigned is the better default *in all C and/or\n//   C++ code*.  However, for this codebase, the extremely high frequency of\n//   bitwise operations makes unsigned-default the only sane choice.\n//   Some consequences of this choice:\n//   - All pointer differences that are part of a larger arithmetic or\n//     comparison expression are explicitly casted to uintptr_t.\n//   - Since uint64_t -> double conversion is frequently slower than int64_t ->\n//     double conversion, u63tod() should be used when the integer is known to\n//     be less than 2^63.  If we also know it's less than 2^31, u31tod() can\n//     provide a performance improvement on Win32.\n// - Integers that can be >= 2^32 in some of the largest existing datasets, but\n//   are usually smaller, should be defined as uintptr_t, to strike a good\n//   balance between 32-bit performance and 64-bit scaling.  Exhaustive\n//   overflow checking in the 32-bit build is a non-goal; but I do aim for very\n//   high statistical reliability, by inserting checks whenever it occurs to me\n//   that overflow is especially likely (e.g. when multiplying two potentially\n//   large 32-bit numbers).\n// - Bitarrays and 'nyparrays' (packed arrays of 2-bit elements, such as a row\n//   of a plink 1.x .bed file) are usually uintptr_t*, to encourage\n//   word-at-a-time iteration without requiring vector-alignment.  Quite a few\n//   low-level library functions cast them to VecW*.  As mentioned above, the\n//   affected function parameter names must end in 'vec' when this creates an\n//   alignment requirement.\n// - A buffer/iterator expected to contain only UTF-8 text should be char*.\n//   unsigned char* should be reserved for byte-array buffers and iterators\n//   which are expected to interact with some non-text bytes, and generic\n//   memory-location pointers which will be subject to pointer arithmetic.\n//   (Note that this creates some clutter in low-level parsing code: since the\n//   signedness of char is platform-dependent, it becomes necessary to use e.g.\n//   ctou32() a fair bit.)\n// - unsigned char is an acceptable argument type for functions intended to\n//   process a single text character, thanks to implicit char -> unsigned char\n//   conversion; it's just unsigned char* that should be avoided.\n// - void* return values should be restricted to generic pointers which are\n//   *not* expected to be subject to pointer arithmetic.  void* as input\n//   parameter type should only be used when there are at least two equally\n//   valid input types, NOT counting VecW*.\n\n\n// gcc 8.3.0 has been miscompiling the ParseOnebitUnsafe() function in\n// pgenlib_read.cc for the last several years.  gcc 8.4 does not have this\n// problem, and neither does any other gcc major version I've tested to date.\n#ifndef __clang__\n#  if (__GNUC__ == 8) && (__GNUC_MINOR__ < 4)\n#    error \"gcc 8.3 is known to have a miscompilation bug that was fixed in 8.4.\"\n#  endif\n#endif\n\n#if (__GNUC__ < 4)\n// may eventually add MSVC support to gain access to MKL on Windows, but can't\n// justify doing that before all major features are implemented.\n#  error \"gcc 4.x+ or clang equivalent required.\"\n#endif\n\n// The -Wshorten-64-to-32 diagnostic forces the code to be cluttered with\n// meaningless uintptr_t -> uint32_t static casts (number known to be < 2^32,\n// just stored in a uintptr_t because there's no speed penalty and we generally\n// want to think in terms of word-based operations).  The code is more readable\n// if S_CAST(uint32_t, <potentially wider value>) is reserved for situations\n// where a higher bit may actually be set.  This pragma can always be commented\n// out on the few occasions where inappropriate silent truncation is suspected.\n#ifdef __clang__\n#  pragma clang diagnostic ignored \"-Wshorten-64-to-32\"\n#endif\n\n// 10000 * major + 100 * minor + patch\n// Exception to CONSTI32, since we want the preprocessor to have access\n// to this value.  Named with all caps as a consequence.\n#define PLINK2_BASE_VERNUM 815\n\n\n#define _FILE_OFFSET_BITS 64\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stddef.h>  // offsetof()\n#include <stdint.h>\n#ifndef __STDC_FORMAT_MACROS\n#  define __STDC_FORMAT_MACROS 1\n#endif\n#include <inttypes.h>\n#include <limits.h>  // CHAR_BIT, PATH_MAX\n\n// #define NDEBUG\n#include <assert.h>\n\n#ifdef _WIN32\n  // needed for EnterCriticalSection, etc.\n#  ifndef _WIN64\n#    define WINVER 0x0501\n#  else\n#    define __LP64__\n#  endif\n#  ifndef WIN32_LEAN_AND_MEAN\n#    define WIN32_LEAN_AND_MEAN\n#  endif\n#  include <windows.h>\n#endif\n\n#if __cplusplus >= 201103L\n#  include <array>\n#endif\n\n#ifdef __LP64__\n// TODO: working no-SSE2 fallback on 64-bit little-endian platforms unsupported\n// by simde.  Can perform early test by compiling on M1/M2 without simde.\n#  define USE_SSE2\n#  ifdef __x86_64__\n#    include <emmintrin.h>\n#  else\n#    define SIMDE_ENABLE_NATIVE_ALIASES\n// Since e.g. an old zstd system header breaks the build, and plink2 is\n// expected to remain under active development for the next few years, we\n// currently default to using vendored copies of zstd/libdeflate/simde, which\n// are manually updated as necessary.\n// To use system headers, define IGNORE_BUNDLED_{ZSTD,LIBDEFLATE.SIMDE}.\n#    ifdef IGNORE_BUNDLED_SIMDE\n#      include <simde/x86/sse2.h>\n#    else\n#      include \"../simde/x86/sse2.h\"\n#    endif\n#    ifdef SIMDE_ARM_NEON_A32V8_NATIVE\n// For Apple M1, we effectively use SSE2 + constrained _mm_shuffle_epi8().\n// - We don't want to use simde's emulated _mm_shuffle_epi8 since it has an\n//   extra and-with-0x8f step that we never need.\n//   In the event the and-with-0x8f is actually needed, we'll define\n//   vec..._x86_shuffle8() helper functions.\n// - M1 also doesn't have efficient word-popcount.\n#      define USE_SHUFFLE8\n#    endif\n#  endif\n#  ifdef __SSE4_2__\n#    define USE_SSE42\n#    define USE_SHUFFLE8\n#    include <smmintrin.h>\n#    ifdef __AVX2__\n#      if defined(__BMI__) && defined(__BMI2__) && defined(__LZCNT__)\n#        include <immintrin.h>\n#        define USE_AVX2\n#      else\n// Graceful downgrade, in case -march=native misfires on a VM.  See\n// https://github.com/chrchang/plink-ng/issues/155 .\n#        warning \"AVX2 builds require -mbmi, -mbmi2, and -mlzcnt as well.  Downgrading to SSE4.2 build.\"\n#        undef USE_AVX2\n#      endif\n#    endif\n#  endif\n#  define ALIGNV16 __attribute__ ((aligned (16)))\n#else\n#  define ALIGNV16\n#endif\n\n// done with #includes, can start C++ namespace...\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\n// ...though a bunch of symbols remain to be #defined; try to reduce the number\n// over time.\n\n#ifndef UINT32_MAX\n  // can theoretically be undefined in C++03\n#  define UINT32_MAX 0xffffffffU\n#endif\n\n#define UINT32_MAXM1 0xfffffffeU\n\n#ifndef UINT64_MAX\n#  define UINT64_MAX 0xffffffffffffffffLLU\n#endif\n\n#ifdef __cplusplus\n#  define HEADER_INLINE inline\n// Previously went on a wild constexpr spree, but now these are mostly unused.\n// Reserve for cases where (i) there's a clear constant-initialization use case\n// for an imaginable downstream program (I'm looking at you, DivUp() and\n// RoundUpPow2()...), or (ii) it allows a useful static_assert to be inserted\n// for a hardcoded constant.\n#  if __cplusplus >= 201103L\n#    define HEADER_CINLINE constexpr\n#    define CSINLINE static constexpr\n#    if __cplusplus > 201103L\n#      define HEADER_CINLINE2 constexpr\n#      define CSINLINE2 static constexpr\n#    else\n#      define HEADER_CINLINE2 inline\n#      define CSINLINE2 static inline\n#    endif\n#  else\n#    define HEADER_CINLINE inline\n#    define HEADER_CINLINE2 inline\n#    define CSINLINE static inline\n#    define CSINLINE2 static inline\n#  endif\n#  if __cplusplus <= 199711L\n    // this may be defined anyway, at least on OS X\n#    ifndef static_assert\n      // todo: check other cases\n#      define static_assert(cond, msg)\n#    endif\n#  endif\n#else\n#  define HEADER_INLINE static inline\n#  define HEADER_CINLINE static inline\n#  define HEADER_CINLINE2 static inline\n#  define CSINLINE static inline\n#  define CSINLINE2 static inline\n  // _Static_assert() should work in gcc 4.6+\n#  if (__GNUC__ == 4) && (__GNUC_MINOR__ < 6)\n#    if defined(__clang__) && defined(__has_feature) && defined(__has_extension)\n#      if __has_feature(c_static_assert) || __has_extension(c_static_assert)\n#        define static_assert _Static_assert\n#      else\n#        define static_assert(cond, msg)\n#      endif\n#    else\n#      define static_assert(cond, msg)\n#    endif\n#  else\n#    define static_assert _Static_assert\n#  endif\n#endif\n\n#define __maybe_unused __attribute__((unused))\n\n// Rule of thumb: Use these macros if, and only if, the condition would always\n// trigger exit-from-program.  As a side effect, this makes it more\n// straightforward, if still tedious, to make global changes to error-handling\n// strategy (always dump backtrace and exit immediately?), though provision\n// must still be made for sometimes-error-sometimes-not return paths which\n// don't get an unlikely annotation.\n#ifndef likely\n#  define likely(expr) __builtin_expect(!!(expr), 1)\n#  define unlikely(expr) __builtin_expect(!!(expr), 0)\n#endif\n\n#ifdef __cplusplus\n#  define K_CAST(type, val) (const_cast<type>(val))\n#  define R_CAST(type, val) (reinterpret_cast<type>(val))\n#  define S_CAST(type, val) (static_cast<type>(val))\n#else\n#  define K_CAST(type, val) ((type)(val))\n#  define R_CAST(type, val) ((type)(val))\n#  define S_CAST(type, val) ((type)(val))\n#endif\n\n// (from Linux kernel)\n// container_of - cast a member of a structure out to the containing structure\n// @ptr: the pointer to the member.\n// @type: the type of the container struct this is embedded in.\n// @member: the name of the member within the struct.\n#define container_of(ptr, type, member) \\\n  (R_CAST(type*, R_CAST(char*, ptr) - offsetof(type, member)))\n\n// original macro doesn't work in C++ when e.g. ptr is a const char*, and the\n// quick workaround of casting away the const is unsafe.\n#define const_container_of(ptr, type, member) \\\n  (R_CAST(const type*, R_CAST(const char*, ptr) - offsetof(type, member)))\n\nHEADER_INLINE double u31tod(uint32_t uii) {\n  const int32_t ii = uii;\n  assert(ii >= 0);\n  return S_CAST(double, ii);\n}\n\nHEADER_INLINE double swtod(intptr_t lii) {\n  return S_CAST(double, lii);\n}\n\nHEADER_INLINE double u63tod(uint64_t ullii) {\n  const int64_t llii = ullii;\n  assert(llii >= 0);\n  return S_CAST(double, llii);\n}\n\nHEADER_INLINE float u31tof(uint32_t uii) {\n  const int32_t ii = uii;\n  assert(ii >= 0);\n  return S_CAST(float, ii);\n}\n\nHEADER_INLINE uint32_t ctou32(char cc) {\n  return S_CAST(unsigned char, cc);\n}\n\nHEADER_INLINE uintptr_t ctow(char cc) {\n  return S_CAST(unsigned char, cc);\n}\n\nHEADER_INLINE uint64_t ctou64(char cc) {\n  return S_CAST(unsigned char, cc);\n}\n\nHEADER_INLINE unsigned char* CToUc(char* pp) {\n  return R_CAST(unsigned char*, pp);\n}\n\n// Error return types.  All of these evaluate to true on error and false on\n// success, but otherwise they have slightly different semantics:\n// * PglErr is the general-purpose enum.  Unlike an enum, implicit conversion\n//   *to* int, not just from int, is prevented by the C++11 compiler (and the\n//   C++11-compiler-validated code still works under C99).  (To achieve this\n//   additional safety, we engage in a bit of code duplication which would be\n//   unreasonable for flagsets.)\n//   (Previously, explicit cast to uint32_t, but not int32_t, was supported, to\n//   reflect the fact that all error codes are positive.  This was deemed\n//   silly.)\n// * BoolErr allows implicit conversion from int, but conversion back to\n//   uint32_t requires an explicit cast.  (It should always be 0/1-valued, but\n//   this isn't enforced by the compiler.)\n// * IntErr allows implicit conversion from int, but conversion back to\n//   int32_t requires an explicit cast.  It mainly serves as a holding pen for\n//   C standard library error return values, which can be negative.\n#if __cplusplus >= 201103L\nstruct PglErr {\n  enum class ec\n#else\ntypedef enum\n#endif\n  {\n  kPglRetSuccess,\n  kPglRetSkipped,\n  kPglRetNomem,\n  kPglRetOpenFail,\n  kPglRetReadFail,\n  kPglRetWriteFail,\n  // MalformedInput should be returned on low-level file format violations,\n  // while InconsistentInput should be returned for higher-level logical\n  // problems like mismatched files (generally solvable by fixing the command\n  // line), and DegenerateData for properly-formatted-and-matched files that\n  // yields degenerate computational results due to e.g. divide by zero or\n  // insufficient rank.\n  kPglRetMalformedInput,\n  kPglRetInconsistentInput,\n  kPglRetInvalidCmdline,\n  kPglRetThreadCreateFail,\n  kPglRetNetworkFail,\n  kPglRetVarRecordTooLarge,\n  kPglRetUnsupportedInstructions,\n  kPglRetDegenerateData,\n  kPglRetDecompressFail, // also distinguish this from MalformedInput\n  kPglRetRewindFail,\n  kPglRetGpuFail,\n  kPglRetInternalUse1,\n  kPglRetSampleMajorBed = 32,\n  kPglRetNomemCustomMsg = 59,\n  kPglRetInternalError = 60,\n  kPglRetWarningErrcode = 61,\n  kPglRetImproperFunctionCall = 62,\n  kPglRetNotYetSupported = 63,\n\n  // These are only for internal use.  If any of these reach the top level\n  // instead of being handled or converted to another error code, that's a bug,\n  // and plink2 prints a message to that effect.\n  kPglRetRetry = 124,\n  kPglRetHelp = 125,\n  kPglRetLongLine = 126,\n  kPglRetEof = 127}\n#if __cplusplus >= 201103L\n  ;\n\n  PglErr() {}\n\n  PglErr(const PglErr& source) : value_(source.value_) {}\n\n  PglErr(ec source) : value_(source) {}\n\n  // Allow explicit conversion from uint64_t, and NOT uint32_t, to this error\n  // type, to support reproducible multithreaded error reporting (where\n  // multiple threads may atomically attempt to modify a single uint64_t with\n  // the error code in the low 32 bits and a priority number in the high bits).\n  explicit PglErr(uint64_t source) : value_(static_cast<ec>(source)) {}\n\n  PglErr& operator=(const PglErr&) = default;\n\n  operator ec() const {\n    return value_;\n  }\n\n  explicit operator uint32_t() const {\n    return static_cast<uint32_t>(value_);\n  }\n\n  explicit operator int32_t() const {\n    return static_cast<int32_t>(value_);\n  }\n\n  explicit operator bool() const {\n    return (static_cast<uint32_t>(value_) != 0);\n  }\n\nprivate:\n  ec value_;\n};\n\nconst PglErr kPglRetSuccess = PglErr::ec::kPglRetSuccess;\nconst PglErr kPglRetSkipped = PglErr::ec::kPglRetSkipped;\nconst PglErr kPglRetNomem = PglErr::ec::kPglRetNomem;\nconst PglErr kPglRetOpenFail = PglErr::ec::kPglRetOpenFail;\nconst PglErr kPglRetReadFail = PglErr::ec::kPglRetReadFail;\nconst PglErr kPglRetWriteFail = PglErr::ec::kPglRetWriteFail;\nconst PglErr kPglRetMalformedInput = PglErr::ec::kPglRetMalformedInput;\nconst PglErr kPglRetInconsistentInput = PglErr::ec::kPglRetInconsistentInput;\nconst PglErr kPglRetInvalidCmdline = PglErr::ec::kPglRetInvalidCmdline;\nconst PglErr kPglRetRetry = PglErr::ec::kPglRetRetry;\nconst PglErr kPglRetHelp = PglErr::ec::kPglRetHelp;\nconst PglErr kPglRetThreadCreateFail = PglErr::ec::kPglRetThreadCreateFail;\nconst PglErr kPglRetNetworkFail = PglErr::ec::kPglRetNetworkFail;\nconst PglErr kPglRetVarRecordTooLarge = PglErr::ec::kPglRetVarRecordTooLarge;\nconst PglErr kPglRetUnsupportedInstructions = PglErr::ec::kPglRetUnsupportedInstructions;\nconst PglErr kPglRetDegenerateData = PglErr::ec::kPglRetDegenerateData;\nconst PglErr kPglRetDecompressFail = PglErr::ec::kPglRetDecompressFail;\nconst PglErr kPglRetRewindFail = PglErr::ec::kPglRetRewindFail;\nconst PglErr kPglRetGpuFail = PglErr::ec::kPglRetGpuFail;\nconst PglErr kPglRetInternalUse1 = PglErr::ec::kPglRetInternalUse1;\nconst PglErr kPglRetSampleMajorBed = PglErr::ec::kPglRetSampleMajorBed;\nconst PglErr kPglRetWarningErrcode = PglErr::ec::kPglRetWarningErrcode;\nconst PglErr kPglRetNomemCustomMsg = PglErr::ec::kPglRetNomemCustomMsg;\nconst PglErr kPglRetInternalError = PglErr::ec::kPglRetInternalError;\nconst PglErr kPglRetImproperFunctionCall = PglErr::ec::kPglRetImproperFunctionCall;\nconst PglErr kPglRetNotYetSupported = PglErr::ec::kPglRetNotYetSupported;\nconst PglErr kPglRetLongLine = PglErr::ec::kPglRetLongLine;\nconst PglErr kPglRetEof = PglErr::ec::kPglRetEof;\n#else\n  PglErr;\n#endif\n\n#if __cplusplus >= 201103L\n// allow efficient arithmetic on these, but force them to require explicit\n// int32_t/uint32_t casts; only permit implicit assignment from\n// int32_t/uint32_t by default.\n// built-in bool type does too many things we don't want...\n\n// expected to be integer-valued, but not necessarily 0/1 or positive\nstruct IntErr {\n  IntErr() {}\n\n  IntErr(int32_t source) : value_(source) {}\n\n  explicit operator int32_t() const {\n    return value_;\n  }\n\n  explicit operator bool() const {\n    return (value_ != 0);\n  }\n\nprivate:\n  int32_t value_;\n};\n\n// expected to be 0/1-valued\nstruct BoolErr {\n  BoolErr() {}\n\n  BoolErr(uint32_t source) : value_(source) {}\n\n  explicit operator uint32_t() const {\n    return value_;\n  }\n\n  explicit operator bool() const {\n    return (value_ != 0);\n  }\n\nprivate:\n  uint32_t value_;\n};\n#else\ntypedef int32_t IntErr;\ntypedef uint32_t BoolErr;\n#endif\n\n// make this work on 32-bit as well as 64-bit systems, across\n// Windows/OS X/Linux\n// (todo: clean this up a bit.  it's inherently a baling-wire-and-duct-tape\n// sort of thing, though...)\n#ifdef _WIN32\n  // must compile with -std=gnu++11, not c++11, on 32-bit Windows since\n  // otherwise fseeko64 not defined...\n#  define fseeko fseeko64\n#  define ftello ftello64\n#  define FOPEN_RB \"rb\"\n#  define FOPEN_WB \"wb\"\n#  define FOPEN_AB \"ab\"\n#  define ferror_unlocked ferror\n#  define feof_unlocked feof\n#  ifdef __LP64__\n#    define getc_unlocked _fgetc_nolock\n#    define putc_unlocked _fputc_nolock\n    // todo: find mingw-w64 build which properly links _fread_nolock, and\n    // conditional-compile\n#    define fread_unlocked fread\n#    define fwrite_unlocked fwrite\n#  else\n#    define getc_unlocked getc\n#    define putc_unlocked putc\n#    define fread_unlocked fread\n#    define fwrite_unlocked fwrite\n#  endif\n#  if __cplusplus < 201103L\n#    define uint64_t unsigned long long\n#    define int64_t long long\n#  endif\n#else  // Linux or OS X\n#  define FOPEN_RB \"r\"\n#  define FOPEN_WB \"w\"\n#  define FOPEN_AB \"a\"\n#  if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__)\n#    define fread_unlocked fread\n#    define fwrite_unlocked fwrite\n#  endif\n#  if defined(__NetBSD__)\n#    define ferror_unlocked ferror\n#    define feof_unlocked feof\n#  endif\n#endif\n\n#ifdef _WIN32\n#  undef PRId64\n#  undef PRIu64\n#  define PRId64 \"I64d\"\n#  define PRIu64 \"I64u\"\n#else\n#  ifdef __cplusplus\n#    ifndef PRId64\n#      define PRId64 \"lld\"\n#    endif\n#  endif\n#endif\n\n// These are useful for defending against base-pointer integer overflow on bad\n// input.\nHEADER_INLINE BoolErr PtrAddCk(const unsigned char* end, intptr_t incr, const unsigned char** basep) {\n  *basep += incr;\n  return unlikely((end - (*basep)) < 0);\n}\n\n// 'W' for writable\nHEADER_INLINE BoolErr PtrWSubCk(unsigned char* base, intptr_t decr, unsigned char** endp) {\n  *endp -= decr;\n  return unlikely(((*endp) - base) < 0);\n}\n\nHEADER_INLINE BoolErr PtrCheck(const void* end, const void* base, intptr_t req) {\n  const unsigned char* end_uc = S_CAST(const unsigned char*, end);\n  const unsigned char* base_uc = S_CAST(const unsigned char*, base);\n  return unlikely((end_uc - base_uc) < req);\n}\n\n// We want this to return an uint32_t, not an int32_t.\nHEADER_INLINE uint32_t ctzu32(uint32_t uii) {\n  return __builtin_ctz(uii);\n}\n\n// this should always compile down to bsr.\nHEADER_INLINE uint32_t bsru32(uint32_t uii) {\n  return 31 - __builtin_clz(uii);\n}\n\n#ifdef _WIN64\nHEADER_INLINE uint32_t ctzw(unsigned long long ullii) {\n  return __builtin_ctzll(ullii);\n}\n\nHEADER_INLINE uint32_t bsrw(unsigned long long ullii) {\n  // Note that this actually compiles to a single instruction on x86; it's\n  // naked __builtin_clzll which requires an additional subtraction.\n  return 63 - __builtin_clzll(ullii);\n}\n#else\nHEADER_INLINE uint32_t ctzw(unsigned long ulii) {\n  return __builtin_ctzl(ulii);\n}\n\nHEADER_INLINE uint32_t bsrw(unsigned long ulii) {\n  return (8 * sizeof(intptr_t) - 1) - __builtin_clzl(ulii);\n}\n#  ifndef __LP64__\n    // needed to prevent GCC 6 build failure\n#    if (__GNUC__ == 4) && (__GNUC_MINOR__ < 8)\n#      if (__cplusplus < 201103L) && !defined(__APPLE__)\n#        ifndef uintptr_t\n#          define uintptr_t unsigned long\n#        endif\n#        ifndef intptr_t\n#          define intptr_t long\n#        endif\n#      endif\n#    endif\n#  endif\n#endif\n\n#ifdef __LP64__\n#  ifdef _WIN32 // i.e. Win64\n\n#    undef PRIuPTR\n#    undef PRIdPTR\n#    define PRIuPTR PRIu64\n#    define PRIdPTR PRId64\n#    define PRIxPTR2 \"016I64x\"\n\n#  else  // not _WIN32\n\n#    ifndef PRIuPTR\n#      define PRIuPTR \"lu\"\n#    endif\n#    ifndef PRIdPTR\n#      define PRIdPTR \"ld\"\n#    endif\n#    define PRIxPTR2 \"016lx\"\n\n#  endif  // Win64\n\n#else  // not __LP64__\n\n  // without this, we get ridiculous warning spew...\n  // not 100% sure this is the right cutoff, but this has been tested on 4.7\n  // and 4.8 build machines, so it plausibly is.\n#  if (__GNUC__ == 4) && (__GNUC_MINOR__ < 8) && (__cplusplus < 201103L)\n#    undef PRIuPTR\n#    undef PRIdPTR\n#    define PRIuPTR \"lu\"\n#    define PRIdPTR \"ld\"\n#  endif\n\n#  define PRIxPTR2 \"08lx\"\n\n#endif\n\n#ifndef HAVE_NULLPTR\n#  ifndef __cplusplus\n#    define nullptr NULL\n#  else\n#    if __cplusplus <= 199711L\n#      ifndef nullptr\n#        define nullptr NULL\n#      endif\n#    endif\n#  endif\n#endif\n\n// Checked a bunch of alternatives to #define constants.  For integer constants\n// in [-2^31, 2^31), enum {} avoids macro expansion issues that actually\n// matter, and that more than cancels out any tiny increase in binary size due\n// to additional debugger information (which has value, anyway).  However, we\n// don't want to use this under C++ due to enumeral/non-enumeral conditional\n// expression warnings, so this isn't one-size-fits-all; and plain old const\n// int has all the functionality we want under C++ (including internal linkage,\n// so it's fine to define them in header files).  Thus we wrap the\n// implementation in a macro.\n//\n// Otherwise, the macro expansion thing is still annoying but we suck it up due\n// to the need for too much duplicate C vs. C++ code (\"initializer element is\n// not constant\" when using const <type> in C99...)\n//\n// We start most plink2- and pgenlib-specific numeric constant names here with\n// \"kPgl\", which should have a vanishingly small chance of colliding with\n// anything in C99.  Note that stuff like kBytesPerWord is not considered\n// library-specific, so it's exempt from having \"Pgl\" in the name.  Also, the\n// few string literals here are of the FOPEN_WB sort, which have similar usage\n// patterns to e.g. PRIuPTR which shouldn't be renamed, so those remain\n// all-caps.\n//\n// (Update, May 2018: CONSTU31 was renamed to CONSTI32 and changed to type\n// int32_t, to prevent C vs. C++ differences.  This almost never makes a\n// difference, since when int32_t and uint32_t are mixed in the same\n// expression, the former gets converted to unsigned.  However, unpleasant\n// surprises are occasionally possible when mixing these constants with\n// uint16_t or unsigned char values, since then the unsigned values are\n// promoted to int32_t.  Also, this essentially forces use of -Wno-sign-compare\n// when using gcc 4.4.\n//\n// Biggest thing to watch out for is mixing of Halfword with these constants in\n// 32-bit builds.  Dosage and Vec8thUint are also relevant.)\n#ifdef __cplusplus\n#  define CONSTI32(name, expr) const int32_t name = (expr)\n#else\n#  define CONSTI32(name, expr) enum {name = (expr)}\n#endif\n\n// useful because of its bitwise complement: ~k0LU is a word with all 1 bits,\n// while ~0 is always 32 1 bits.\n// LLU is used over ULL for searchability (no conflict with NULL).\nstatic const uintptr_t k0LU = S_CAST(uintptr_t, 0);\n\n// mainly useful for bitshifts: (k1LU << 32) works in 64-bit builds, while\n// (1 << 32) is undefined.  also used as a quicker-to-type way of casting\n// numbers/expressions to uintptr_t (via multiplication).\nstatic const uintptr_t k1LU = S_CAST(uintptr_t, 1);\n\n#ifdef __LP64__\nCONSTI32(kBitsPerWord, 64);\nCONSTI32(kBitsPerWordLog2, 6);\n\ntypedef uint32_t Halfword;\ntypedef uint16_t Quarterword;\n#else\nCONSTI32(kBitsPerWord, 32);\nCONSTI32(kBitsPerWordLog2, 5);\n\ntypedef uint16_t Halfword;\ntypedef uint8_t Quarterword;\n#endif\n\n#ifdef USE_SSE2\n#  ifdef USE_AVX2\nCONSTI32(kBytesPerVec, 32);\n\n// 16 still seems to noticeably outperform 32 on my Mac test machine, and\n// is about equal on my Linux test machine, probably due to reduced clock\n// frequency when 32-byte floating point vector operations are used (as in, ALL\n// operations, sometimes on ALL cores, become slower when a single core\n// performs a 32-byte fp vector operation).\n// However, processor power management, numeric libraries, and my AVX2 code\n// should improve over time.  There will probably come a time where switching\n// to 32-byte fp is worthwhile.\n#    define FVEC_32\n\n// bleah, have to define these here, vector_size doesn't see enum values\ntypedef uintptr_t VecW __attribute__ ((vector_size (32)));\ntypedef uint32_t VecU32 __attribute__ ((vector_size (32)));\ntypedef int32_t VecI32 __attribute__ ((vector_size (32)));\ntypedef unsigned short VecU16 __attribute__ ((vector_size (32)));\ntypedef short VecI16 __attribute__ ((vector_size (32)));\n// documentation says 'char', but int8_t works fine under gcc 4.4 and conveys\n// intent better (char not guaranteed to be signed)\ntypedef int8_t VecI8 __attribute__ ((vector_size (32)));\ntypedef unsigned char VecUc __attribute__ ((vector_size (32)));\n\nHEADER_INLINE VecW VecToW(__m256i vv) {\n  return R_CAST(VecW, vv);\n}\n\nHEADER_INLINE VecU32 VecToU32(__m256i vv) {\n  return R_CAST(VecU32, vv);\n}\n\nHEADER_INLINE VecI32 VecToI32(__m256i vv) {\n  return R_CAST(VecI32, vv);\n}\n\nHEADER_INLINE VecU16 VecToU16(__m256i vv) {\n  return R_CAST(VecU16, vv);\n}\n\nHEADER_INLINE VecI16 VecToI16(__m256i vv) {\n  return R_CAST(VecI16, vv);\n}\n\nHEADER_INLINE VecUc VecToUc(__m256i vv) {\n  return R_CAST(VecUc, vv);\n}\n\nHEADER_INLINE VecI8 VecToI8(__m256i vv) {\n  return R_CAST(VecI8, vv);\n}\n\nHEADER_INLINE __m256i WToVec(VecW vv) {\n  return R_CAST(__m256i, vv);\n}\n\nHEADER_INLINE __m256i U32ToVec(VecU32 vv) {\n  return R_CAST(__m256i, vv);\n}\n\nHEADER_INLINE __m256i I32ToVec(VecI32 vv) {\n  return R_CAST(__m256i, vv);\n}\n\nHEADER_INLINE __m256i U16ToVec(VecU16 vv) {\n  return R_CAST(__m256i, vv);\n}\n\nHEADER_INLINE __m256i I16ToVec(VecI16 vv) {\n  return R_CAST(__m256i, vv);\n}\n\nHEADER_INLINE __m256i UcToVec(VecUc vv) {\n  return R_CAST(__m256i, vv);\n}\n\nHEADER_INLINE __m256i I8ToVec(VecI8 vv) {\n  return R_CAST(__m256i, vv);\n}\n\n// _mm256_set1_... seems to have the same performance; could use that instead.\n#    define VCONST_W(xx) {xx, xx, xx, xx}\n#    define VCONST_S(xx) {xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx}\n#    define VCONST_C(xx) {xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx}\n#    define VCONST_UC VCONST_C\n\n// vv = VCONST_W(k0LU) doesn't work (only ok for initialization)\nHEADER_INLINE VecW vecw_setzero() {\n  return VecToW(_mm256_setzero_si256());\n}\n\nHEADER_INLINE VecU32 vecu32_setzero() {\n  return VecToU32(_mm256_setzero_si256());\n}\n\nHEADER_INLINE VecU16 vecu16_setzero() {\n  return VecToU16(_mm256_setzero_si256());\n}\n\nHEADER_INLINE VecI16 veci16_setzero() {\n  return VecToI16(_mm256_setzero_si256());\n}\n\nHEADER_INLINE VecUc vecuc_setzero() {\n  return VecToUc(_mm256_setzero_si256());\n}\n\nHEADER_INLINE VecI8 veci8_setzero() {\n  return VecToI8(_mm256_setzero_si256());\n}\n\n// \"vv >> ct\" doesn't work, and Scientific Linux gcc 4.4 might not optimize\n// VCONST_W shift properly (todo: test this)\nHEADER_INLINE VecW vecw_srli(VecW vv, uint32_t ct) {\n  return VecToW(_mm256_srli_epi64(WToVec(vv), ct));\n}\n\nHEADER_INLINE VecW vecw_slli(VecW vv, uint32_t ct) {\n  return VecToW(_mm256_slli_epi64(WToVec(vv), ct));\n}\n\nHEADER_INLINE VecU32 vecu32_srli(VecU32 vv, uint32_t ct) {\n  return VecToU32(_mm256_srli_epi32(U32ToVec(vv), ct));\n}\n\nHEADER_INLINE VecU32 vecu32_slli(VecU32 vv, uint32_t ct) {\n  return VecToU32(_mm256_slli_epi32(U32ToVec(vv), ct));\n}\n\nHEADER_INLINE VecU16 vecu16_srli(VecU16 vv, uint32_t ct) {\n  return VecToU16(_mm256_srli_epi16(U16ToVec(vv), ct));\n}\n\nHEADER_INLINE VecU16 vecu16_slli(VecU16 vv, uint32_t ct) {\n  return VecToU16(_mm256_slli_epi16(U16ToVec(vv), ct));\n}\n\n// Compiler still doesn't seem to be smart enough to use andnot properly.\nHEADER_INLINE VecW vecw_and_notfirst(VecW excl, VecW main) {\n  return VecToW(_mm256_andnot_si256(WToVec(excl), WToVec(main)));\n}\n\nHEADER_INLINE VecU32 vecu32_and_notfirst(VecU32 excl, VecU32 main) {\n  return VecToU32(_mm256_andnot_si256(U32ToVec(excl), U32ToVec(main)));\n}\n\nHEADER_INLINE VecI32 veci32_and_notfirst(VecI32 excl, VecI32 main) {\n  return VecToI32(_mm256_andnot_si256(I32ToVec(excl), I32ToVec(main)));\n}\n\nHEADER_INLINE VecU16 vecu16_and_notfirst(VecU16 excl, VecU16 main) {\n  return VecToU16(_mm256_andnot_si256(U16ToVec(excl), U16ToVec(main)));\n}\n\nHEADER_INLINE VecI16 veci16_and_notfirst(VecI16 excl, VecI16 main) {\n  return VecToI16(_mm256_andnot_si256(I16ToVec(excl), I16ToVec(main)));\n}\n\nHEADER_INLINE VecUc vecuc_and_notfirst(VecUc excl, VecUc main) {\n  return VecToUc(_mm256_andnot_si256(UcToVec(excl), UcToVec(main)));\n}\n\nHEADER_INLINE VecI8 veci8_and_notfirst(VecI8 excl, VecI8 main) {\n  return VecToI8(_mm256_andnot_si256(I8ToVec(excl), I8ToVec(main)));\n}\n\nHEADER_INLINE VecW vecw_set1(uintptr_t ulii) {\n  return VecToW(_mm256_set1_epi64x(ulii));\n}\n\nHEADER_INLINE VecU32 vecu32_set1(uint32_t uii) {\n  return VecToU32(_mm256_set1_epi32(uii));\n}\n\nHEADER_INLINE VecI32 veci32_set1(int32_t ii) {\n  return VecToI32(_mm256_set1_epi32(ii));\n}\n\nHEADER_INLINE VecU16 vecu16_set1(unsigned short usi) {\n  return VecToU16(_mm256_set1_epi16(usi));\n}\n\nHEADER_INLINE VecI16 veci16_set1(short si) {\n  return VecToI16(_mm256_set1_epi16(si));\n}\n\nHEADER_INLINE VecUc vecuc_set1_epi16(unsigned short usi) {\n  return VecToUc(_mm256_set1_epi16(usi));\n}\n\nHEADER_INLINE VecUc vecuc_set1(unsigned char ucc) {\n  return VecToUc(_mm256_set1_epi8(ucc));\n}\n\nHEADER_INLINE VecI8 veci8_set1(char cc) {\n  return VecToI8( _mm256_set1_epi8(cc));\n}\n\n// TODO: on ARM, replace most movemask uses:\n// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon\nHEADER_INLINE uint32_t vecw_movemask(VecW vv) {\n  return _mm256_movemask_epi8(WToVec(vv));\n}\n\nHEADER_INLINE uint32_t vecu32_movemask(VecU32 vv) {\n  return _mm256_movemask_epi8(U32ToVec(vv));\n}\n\nHEADER_INLINE uint32_t veci32_movemask(VecI32 vv) {\n  return _mm256_movemask_epi8(I32ToVec(vv));\n}\n\nHEADER_INLINE uint32_t vecu16_movemask(VecU16 vv) {\n  return _mm256_movemask_epi8(U16ToVec(vv));\n}\n\nHEADER_INLINE uint32_t veci16_movemask(VecI16 vv) {\n  return _mm256_movemask_epi8(I16ToVec(vv));\n}\n\nHEADER_INLINE uint32_t veci8_movemask(VecI8 vv) {\n  return _mm256_movemask_epi8(I8ToVec(vv));\n}\n\nHEADER_INLINE uint32_t vecuc_movemask(VecUc vv) {\n  return _mm256_movemask_epi8(UcToVec(vv));\n}\n\n// Repeats elements in second lane in AVX2 case.\nHEADER_INLINE VecW vecw_setr8(char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToW(_mm256_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecU16 vecu16_setr8(char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToU16(_mm256_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecUc vecuc_setr8(char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToUc(_mm256_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\n// Discards last 16 arguments in SSE2/SSE4.2 case.\nHEADER_INLINE VecW vecw_setr8x(char e31, char e30, char e29, char e28, char e27, char e26, char e25, char e24, char e23, char e22, char e21, char e20, char e19, char e18, char e17, char e16, char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToW(_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecUc vecuc_setr8x(char e31, char e30, char e29, char e28, char e27, char e26, char e25, char e24, char e23, char e22, char e21, char e20, char e19, char e18, char e17, char e16, char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToUc(_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecW vecw_setr32(uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {\n  return VecToW(_mm256_setr_epi32(e3, e2, e1, e0, e3, e2, e1, e0));\n}\n\n\nHEADER_INLINE VecW vecw_unpacklo8(VecW evens, VecW odds) {\n  return VecToW(_mm256_unpacklo_epi8(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi8(VecW evens, VecW odds) {\n  return VecToW(_mm256_unpackhi_epi8(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecI8 veci8_unpacklo8(VecI8 evens, VecI8 odds) {\n  return VecToI8(_mm256_unpacklo_epi8(I8ToVec(evens), I8ToVec(odds)));\n}\n\nHEADER_INLINE VecI8 veci8_unpackhi8(VecI8 evens, VecI8 odds) {\n  return VecToI8(_mm256_unpackhi_epi8(I8ToVec(evens), I8ToVec(odds)));\n}\n\nHEADER_INLINE VecUc vecuc_unpacklo8(VecUc evens, VecUc odds) {\n  return VecToUc(_mm256_unpacklo_epi8(UcToVec(evens), UcToVec(odds)));\n}\n\nHEADER_INLINE VecUc vecuc_unpackhi8(VecUc evens, VecUc odds) {\n  return VecToUc(_mm256_unpackhi_epi8(UcToVec(evens), UcToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpacklo16(VecW evens, VecW odds) {\n  return VecToW(_mm256_unpacklo_epi16(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi16(VecW evens, VecW odds) {\n  return VecToW(_mm256_unpackhi_epi16(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpacklo32(VecW evens, VecW odds) {\n  return VecToW(_mm256_unpacklo_epi32(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi32(VecW evens, VecW odds) {\n  return VecToW(_mm256_unpackhi_epi32(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_permute0xd8_if_avx2(VecW vv) {\n  return VecToW(_mm256_permute4x64_epi64(WToVec(vv), 0xd8));\n}\n\nHEADER_INLINE VecI8 veci8_permute0xd8_if_avx2(VecI8 vv) {\n  return VecToI8(_mm256_permute4x64_epi64(I8ToVec(vv), 0xd8));\n}\n\nHEADER_INLINE VecUc vecuc_permute0xd8_if_avx2(VecUc vv) {\n  return VecToUc(_mm256_permute4x64_epi64(UcToVec(vv), 0xd8));\n}\n\n// Could have a single-src gather_even function, but that should wait until\n// there is a clear SSE2 use case.\nHEADER_INLINE VecW vecw_gather_even(VecW src_lo, VecW src_hi, VecW m8) {\n  const VecW gathered_laneswapped = VecToW(_mm256_packus_epi16(WToVec(src_lo & m8), WToVec(src_hi & m8)));\n  return vecw_permute0xd8_if_avx2(gathered_laneswapped);\n}\n\nHEADER_INLINE VecUc vecuc_gather_even(VecUc src_lo, VecUc src_hi, VecUc m8) {\n  const VecUc gathered_laneswapped = VecToUc(_mm256_packus_epi16(UcToVec(src_lo & m8), UcToVec(src_hi & m8)));\n  return vecuc_permute0xd8_if_avx2(gathered_laneswapped);\n}\n\nHEADER_INLINE VecUc vecuc_gather_odd(VecUc src_lo, VecUc src_hi) {\n  const VecUc gathered_laneswapped = VecToUc(_mm256_packus_epi16(_mm256_srli_epi16(UcToVec(src_lo), 8), _mm256_srli_epi16(UcToVec(src_hi), 8)));\n  return vecuc_permute0xd8_if_avx2(gathered_laneswapped);\n}\n\nHEADER_INLINE VecW vecw_shuffle8(VecW table, VecW indexes) {\n  return VecToW(_mm256_shuffle_epi8(WToVec(table), WToVec(indexes)));\n}\n\nHEADER_INLINE VecU16 vecu16_shuffle8(VecU16 table, VecU16 indexes) {\n  return VecToU16(_mm256_shuffle_epi8(U16ToVec(table), U16ToVec(indexes)));\n}\n\nHEADER_INLINE VecUc vecuc_shuffle8(VecUc table, VecUc indexes) {\n  return VecToUc(_mm256_shuffle_epi8(UcToVec(table), UcToVec(indexes)));\n}\n\nHEADER_INLINE uintptr_t vecw_extract64_0(VecW vv) {\n  return _mm256_extract_epi64(WToVec(vv), 0);\n}\n\nHEADER_INLINE uintptr_t vecw_extract64_1(VecW vv) {\n  return _mm256_extract_epi64(WToVec(vv), 1);\n}\n\n// *** AVX2-only section ***\nHEADER_INLINE uintptr_t vecw_extract64_2(VecW vv) {\n  return _mm256_extract_epi64(WToVec(vv), 2);\n}\n\nHEADER_INLINE uintptr_t vecw_extract64_3(VecW vv) {\n  return _mm256_extract_epi64(WToVec(vv), 3);\n}\n\n// todo: permute\n\n// *** end AVX2-only section ***\n\n#    define kVec8thUintMax UINT32_MAX\n\ntypedef uint16_t Vec16thUint;\ntypedef uint32_t Vec8thUint;\ntypedef uint64_t Vec4thUint;\n\nHEADER_INLINE VecW vecw_load(const void* mem_addr) {\n  return VecToW(_mm256_load_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\n// There may be some value in adding a 4-consecutive-vector load function when\n// addresses are expected to be unaligned: see\n//   https://www.agner.org/optimize/blog/read.php?i=627&v=t\n\nHEADER_INLINE VecW vecw_loadu(const void* mem_addr) {\n  return VecToW(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE VecU32 vecu32_loadu(const void* mem_addr) {\n  return VecToU32(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE VecI32 veci32_loadu(const void* mem_addr) {\n  return VecToI32(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE VecU16 vecu16_loadu(const void* mem_addr) {\n  return VecToU16(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE VecI16 veci16_loadu(const void* mem_addr) {\n  return VecToI16(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE VecUc vecuc_loadu(const void* mem_addr) {\n  return VecToUc(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE VecI8 veci8_loadu(const void* mem_addr) {\n  return VecToI8(_mm256_loadu_si256(S_CAST(const __m256i*, mem_addr)));\n}\n\nHEADER_INLINE void vec_storeu(void* mem_addr, __m256i vv) {\n  _mm256_storeu_si256(S_CAST(__m256i*, mem_addr), vv);\n}\n\nHEADER_INLINE VecI32 veci32_max(VecI32 v1, VecI32 v2) {\n  return VecToI32(_mm256_max_epi32(I32ToVec(v1), I32ToVec(v2)));\n}\n\nHEADER_INLINE VecI16 veci16_max(VecI16 v1, VecI16 v2) {\n  return VecToI16(_mm256_max_epi16(I16ToVec(v1), I16ToVec(v2)));\n}\n\nHEADER_INLINE VecW vecw_sad(VecW v1, VecW v2) {\n  return VecToW(_mm256_sad_epu8(WToVec(v1), WToVec(v2)));\n}\n\nHEADER_INLINE VecUc vecuc_adds(VecUc v1, VecUc v2) {\n  return VecToUc(_mm256_adds_epu8(UcToVec(v1), UcToVec(v2)));\n}\n\nHEADER_INLINE VecU16 vecu16_min8(VecU16 v1, VecU16 v2) {\n  return VecToU16(_mm256_min_epu8(U16ToVec(v1), U16ToVec(v2)));\n}\n\nHEADER_INLINE VecUc vecuc_min(VecUc v1, VecUc v2) {\n  return VecToUc(_mm256_min_epu8(UcToVec(v1), UcToVec(v2)));\n}\n\nHEADER_INLINE VecW vecw_blendv(VecW aa, VecW bb, VecW mask) {\n  return VecToW(_mm256_blendv_epi8(WToVec(aa), WToVec(bb), WToVec(mask)));\n}\n\nHEADER_INLINE VecU32 vecu32_blendv(VecU32 aa, VecU32 bb, VecU32 mask) {\n  return VecToU32(_mm256_blendv_epi8(U32ToVec(aa), U32ToVec(bb), U32ToVec(mask)));\n}\n\nHEADER_INLINE VecU16 vecu16_blendv(VecU16 aa, VecU16 bb, VecU16 mask) {\n  return VecToU16(_mm256_blendv_epi8(U16ToVec(aa), U16ToVec(bb), U16ToVec(mask)));\n}\n\nHEADER_INLINE VecUc vecuc_blendv(VecUc aa, VecUc bb, VecUc mask) {\n  return VecToUc(_mm256_blendv_epi8(UcToVec(aa), UcToVec(bb), UcToVec(mask)));\n}\n\n#  else  // USE_SSE2, !USE_AVX2\n\nCONSTI32(kBytesPerVec, 16);\ntypedef uintptr_t VecW __attribute__ ((vector_size (16)));\ntypedef uint32_t VecU32 __attribute ((vector_size (16)));\ntypedef int32_t VecI32 __attribute ((vector_size (16)));\ntypedef unsigned short VecU16 __attribute__ ((vector_size (16)));\ntypedef short VecI16 __attribute__ ((vector_size (16)));\ntypedef int8_t VecI8 __attribute__ ((vector_size (16)));\ntypedef unsigned char VecUc __attribute__ ((vector_size (16)));\n\nHEADER_INLINE VecW VecToW(__m128i vv) {\n  return R_CAST(VecW, vv);\n}\n\nHEADER_INLINE VecU32 VecToU32(__m128i vv) {\n  return R_CAST(VecU32, vv);\n}\n\nHEADER_INLINE VecI32 VecToI32(__m128i vv) {\n  return R_CAST(VecI32, vv);\n}\n\nHEADER_INLINE VecU16 VecToU16(__m128i vv) {\n  return R_CAST(VecU16, vv);\n}\n\nHEADER_INLINE VecI16 VecToI16(__m128i vv) {\n  return R_CAST(VecI16, vv);\n}\n\nHEADER_INLINE VecUc VecToUc(__m128i vv) {\n  return R_CAST(VecUc, vv);\n}\n\nHEADER_INLINE VecI8 VecToI8(__m128i vv) {\n  return R_CAST(VecI8, vv);\n}\n\nHEADER_INLINE __m128i WToVec(VecW vv) {\n  return R_CAST(__m128i, vv);\n}\n\nHEADER_INLINE __m128i U32ToVec(VecU32 vv) {\n  return R_CAST(__m128i, vv);\n}\n\nHEADER_INLINE __m128i I32ToVec(VecI32 vv) {\n  return R_CAST(__m128i, vv);\n}\n\nHEADER_INLINE __m128i U16ToVec(VecU16 vv) {\n  return R_CAST(__m128i, vv);\n}\n\nHEADER_INLINE __m128i I16ToVec(VecI16 vv) {\n  return R_CAST(__m128i, vv);\n}\n\nHEADER_INLINE __m128i UcToVec(VecUc vv) {\n  return R_CAST(__m128i, vv);\n}\n\nHEADER_INLINE __m128i I8ToVec(VecI8 vv) {\n  return R_CAST(__m128i, vv);\n}\n\n#    define VCONST_W(xx) {xx, xx}\n#    define VCONST_S(xx) {xx, xx, xx, xx, xx, xx, xx, xx}\n#    define VCONST_C(xx) {xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx}\n#    define VCONST_UC VCONST_C\n\nHEADER_INLINE VecW vecw_setzero() {\n  return VecToW(_mm_setzero_si128());\n}\n\nHEADER_INLINE VecU32 vecu32_setzero() {\n  return VecToU32(_mm_setzero_si128());\n}\n\nHEADER_INLINE VecU16 vecu16_setzero() {\n  return VecToU16(_mm_setzero_si128());\n}\n\nHEADER_INLINE VecI16 veci16_setzero() {\n  return VecToI16(_mm_setzero_si128());\n}\n\nHEADER_INLINE VecUc vecuc_setzero() {\n  return VecToUc(_mm_setzero_si128());\n}\n\nHEADER_INLINE VecI8 veci8_setzero() {\n  return VecToI8(_mm_setzero_si128());\n}\n\n// simde is incompatible with defining these as inline functions\n#    define vecw_srli(vv, ct) VecToW(_mm_srli_epi64(WToVec(vv), ct))\n\n#    define vecw_slli(vv, ct) VecToW(_mm_slli_epi64(WToVec(vv), ct))\n\n#    define vecu32_srli(vv, ct) VecToU32(_mm_srli_epi32(U32ToVec(vv), ct))\n\n#    define vecu32_slli(vv, ct) VecToU32(_mm_slli_epi32(U32ToVec(vv), ct))\n\n#    define vecu16_srli(vv, ct) VecToU16(_mm_srli_epi16(U16ToVec(vv), ct))\n\n#    define vecu16_slli(vv, ct) VecToU16(_mm_slli_epi16(U16ToVec(vv), ct))\n\nHEADER_INLINE VecW vecw_and_notfirst(VecW excl, VecW main) {\n  return VecToW(_mm_andnot_si128(WToVec(excl), WToVec(main)));\n}\n\nHEADER_INLINE VecU32 vecu32_and_notfirst(VecU32 excl, VecU32 main) {\n  return VecToU32(_mm_andnot_si128(U32ToVec(excl), U32ToVec(main)));\n}\n\nHEADER_INLINE VecI32 veci32_and_notfirst(VecI32 excl, VecI32 main) {\n  return VecToI32(_mm_andnot_si128(I32ToVec(excl), I32ToVec(main)));\n}\n\nHEADER_INLINE VecU16 vecu16_and_notfirst(VecU16 excl, VecU16 main) {\n  return VecToU16(_mm_andnot_si128(U16ToVec(excl), U16ToVec(main)));\n}\n\nHEADER_INLINE VecI16 veci16_and_notfirst(VecI16 excl, VecI16 main) {\n  return VecToI16(_mm_andnot_si128(I16ToVec(excl), I16ToVec(main)));\n}\n\nHEADER_INLINE VecUc vecuc_and_notfirst(VecUc excl, VecUc main) {\n  return VecToUc(_mm_andnot_si128(UcToVec(excl), UcToVec(main)));\n}\n\nHEADER_INLINE VecI8 veci8_and_notfirst(VecI8 excl, VecI8 main) {\n  return VecToI8(_mm_andnot_si128(I8ToVec(excl), I8ToVec(main)));\n}\n\nHEADER_INLINE VecW vecw_set1(uintptr_t ulii) {\n  return VecToW(_mm_set1_epi64x(ulii));\n}\n\nHEADER_INLINE VecU32 vecu32_set1(uint32_t uii) {\n  return VecToU32(_mm_set1_epi32(uii));\n}\n\nHEADER_INLINE VecI32 veci32_set1(int32_t ii) {\n  return VecToI32(_mm_set1_epi32(ii));\n}\n\nHEADER_INLINE VecU16 vecu16_set1(unsigned short usi) {\n  return VecToU16(_mm_set1_epi16(usi));\n}\n\nHEADER_INLINE VecI16 veci16_set1(short si) {\n  return VecToI16(_mm_set1_epi16(si));\n}\n\nHEADER_INLINE VecUc vecuc_set1_epi16(unsigned short usi) {\n  return VecToUc(_mm_set1_epi16(usi));\n}\n\nHEADER_INLINE VecUc vecuc_set1(unsigned char ucc) {\n  return VecToUc(_mm_set1_epi8(ucc));\n}\n\nHEADER_INLINE VecI8 veci8_set1(char cc) {\n  return VecToI8(_mm_set1_epi8(cc));\n}\n\nHEADER_INLINE uint32_t vecw_movemask(VecW vv) {\n  return _mm_movemask_epi8(WToVec(vv));\n}\n\nHEADER_INLINE uint32_t vecu32_movemask(VecU32 vv) {\n  return _mm_movemask_epi8(U32ToVec(vv));\n}\n\nHEADER_INLINE uint32_t veci32_movemask(VecI32 vv) {\n  return _mm_movemask_epi8(I32ToVec(vv));\n}\n\nHEADER_INLINE uint32_t vecu16_movemask(VecU16 vv) {\n  return _mm_movemask_epi8(U16ToVec(vv));\n}\n\nHEADER_INLINE uint32_t veci16_movemask(VecI16 vv) {\n  return _mm_movemask_epi8(I16ToVec(vv));\n}\n\nHEADER_INLINE uint32_t veci8_movemask(VecI8 vv) {\n  return _mm_movemask_epi8(I8ToVec(vv));\n}\n\nHEADER_INLINE uint32_t vecuc_movemask(VecUc vv) {\n  return _mm_movemask_epi8(UcToVec(vv));\n}\n\nCONSTI32(kVec8thUintMax, 65535);\n\n// #    define kVec8thUintMax 65535\n\ntypedef unsigned char Vec16thUint;\ntypedef uint16_t Vec8thUint;\ntypedef uint32_t Vec4thUint;\n\nHEADER_INLINE VecW vecw_load(const void* mem_addr) {\n  return VecToW(_mm_load_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecW vecw_loadu(const void* mem_addr) {\n  return VecToW(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecU32 vecu32_loadu(const void* mem_addr) {\n  return VecToU32(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecI32 veci32_loadu(const void* mem_addr) {\n  return VecToI32(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecU16 vecu16_loadu(const void* mem_addr) {\n  return VecToU16(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecI16 veci16_loadu(const void* mem_addr) {\n  return VecToI16(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecUc vecuc_loadu(const void* mem_addr) {\n  return VecToUc(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE VecI8 veci8_loadu(const void* mem_addr) {\n  return VecToI8(_mm_loadu_si128(S_CAST(const __m128i*, mem_addr)));\n}\n\nHEADER_INLINE void vec_storeu(void* mem_addr, __m128i vv) {\n  _mm_storeu_si128(S_CAST(__m128i*, mem_addr), vv);\n}\n\n// Repeats arguments in AVX2 case.\nHEADER_INLINE VecW vecw_setr8(char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToW(_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecU16 vecu16_setr8(char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToU16(_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecUc vecuc_setr8(char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) {\n  return VecToUc(_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));\n}\n\n// Discards last 16 arguments in SSE2/SSE4.2 case.\nHEADER_INLINE VecW vecw_setr8x(\n    char e31, char e30, char e29, char e28,\n    char e27, char e26, char e25, char e24,\n    char e23, char e22, char e21, char e20,\n    char e19, char e18, char e17, char e16,\n    __maybe_unused char e15, __maybe_unused char e14,\n    __maybe_unused char e13, __maybe_unused char e12,\n    __maybe_unused char e11, __maybe_unused char e10,\n    __maybe_unused char e9, __maybe_unused char e8,\n    __maybe_unused char e7, __maybe_unused char e6,\n    __maybe_unused char e5, __maybe_unused char e4,\n    __maybe_unused char e3, __maybe_unused char e2,\n    __maybe_unused char e1, __maybe_unused char e0) {\n  return VecToW(_mm_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16));\n}\n\nHEADER_INLINE VecUc vecuc_setr8x(\n    char e31, char e30, char e29, char e28,\n    char e27, char e26, char e25, char e24,\n    char e23, char e22, char e21, char e20,\n    char e19, char e18, char e17, char e16,\n    __maybe_unused char e15, __maybe_unused char e14,\n    __maybe_unused char e13, __maybe_unused char e12,\n    __maybe_unused char e11, __maybe_unused char e10,\n    __maybe_unused char e9, __maybe_unused char e8,\n    __maybe_unused char e7, __maybe_unused char e6,\n    __maybe_unused char e5, __maybe_unused char e4,\n    __maybe_unused char e3, __maybe_unused char e2,\n    __maybe_unused char e1, __maybe_unused char e0) {\n  return VecToUc(_mm_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16));\n}\n\nHEADER_INLINE VecW vecw_setr32(uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {\n  return VecToW(_mm_setr_epi32(e3, e2, e1, e0));\n}\n\nHEADER_INLINE VecW vecw_unpacklo8(VecW evens, VecW odds) {\n  return VecToW(_mm_unpacklo_epi8(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi8(VecW evens, VecW odds) {\n  return VecToW(_mm_unpackhi_epi8(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecI8 veci8_unpacklo8(VecI8 evens, VecI8 odds) {\n  return VecToI8(_mm_unpacklo_epi8(I8ToVec(evens), I8ToVec(odds)));\n}\n\nHEADER_INLINE VecI8 veci8_unpackhi8(VecI8 evens, VecI8 odds) {\n  return VecToI8(_mm_unpackhi_epi8(I8ToVec(evens), I8ToVec(odds)));\n}\n\nHEADER_INLINE VecUc vecuc_unpacklo8(VecUc evens, VecUc odds) {\n  return VecToUc(_mm_unpacklo_epi8(UcToVec(evens), UcToVec(odds)));\n}\n\nHEADER_INLINE VecUc vecuc_unpackhi8(VecUc evens, VecUc odds) {\n  return VecToUc(_mm_unpackhi_epi8(UcToVec(evens), UcToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpacklo16(VecW evens, VecW odds) {\n  return VecToW(_mm_unpacklo_epi16(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi16(VecW evens, VecW odds) {\n  return VecToW(_mm_unpackhi_epi16(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpacklo32(VecW evens, VecW odds) {\n  return VecToW(_mm_unpacklo_epi32(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi32(VecW evens, VecW odds) {\n  return VecToW(_mm_unpackhi_epi32(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpacklo64(VecW evens, VecW odds) {\n  return VecToW(_mm_unpacklo_epi64(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_unpackhi64(VecW evens, VecW odds) {\n  return VecToW(_mm_unpackhi_epi64(WToVec(evens), WToVec(odds)));\n}\n\nHEADER_INLINE VecW vecw_permute0xd8_if_avx2(VecW vv) {\n  return vv;\n}\n\nHEADER_INLINE VecI8 veci8_permute0xd8_if_avx2(VecI8 vv) {\n  return vv;\n}\n\nHEADER_INLINE VecUc vecuc_permute0xd8_if_avx2(VecUc vv) {\n  return vv;\n}\n\nHEADER_INLINE VecW vecw_gather_even(VecW src_lo, VecW src_hi, VecW m8) {\n  return VecToW(_mm_packus_epi16(WToVec(src_lo & m8), WToVec(src_hi & m8)));\n}\n\nHEADER_INLINE VecUc vecuc_gather_even(VecUc src_lo, VecUc src_hi, VecUc m8) {\n  return VecToUc(_mm_packus_epi16(UcToVec(src_lo & m8), UcToVec(src_hi & m8)));\n}\n\nHEADER_INLINE VecUc vecuc_gather_odd(VecUc src_lo, VecUc src_hi) {\n  return VecToUc(_mm_packus_epi16(_mm_srli_epi16(UcToVec(src_lo), 8), _mm_srli_epi16(UcToVec(src_hi), 8)));\n}\n\n#    ifdef USE_SHUFFLE8\n#      ifdef SIMDE_ARM_NEON_A64V8_NATIVE\n// See simde_mm_shuffle_epi8().\n// In the future, this may need to be written more carefully in the\n// IGNORE_BUNDLED_SIMDE case.  But this is compatible with simde v0.7.x and\n// v0.8.x.\nSIMDE_FUNCTION_ATTRIBUTES simde__m128i _mm_shuffle_epi8(simde__m128i a, simde__m128i b) {\n  simde__m128i_private a_ = simde__m128i_to_private(a);\n  simde__m128i_private b_ = simde__m128i_to_private(b);\n  simde__m128i_private r_;\n  r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, b_.neon_u8);\n  return simde__m128i_from_private(r_);\n}\n#      endif\nHEADER_INLINE VecW vecw_shuffle8(VecW table, VecW indexes) {\n  return VecToW(_mm_shuffle_epi8(WToVec(table), WToVec(indexes)));\n}\n\nHEADER_INLINE VecU16 vecu16_shuffle8(VecU16 table, VecU16 indexes) {\n  return VecToU16(_mm_shuffle_epi8(U16ToVec(table), U16ToVec(indexes)));\n}\n\nHEADER_INLINE VecUc vecuc_shuffle8(VecUc table, VecUc indexes) {\n  return VecToUc(_mm_shuffle_epi8(UcToVec(table), UcToVec(indexes)));\n}\n#    endif\n#    ifdef USE_SSE42\nHEADER_INLINE VecI32 veci32_max(VecI32 v1, VecI32 v2) {\n  return VecToI32(_mm_max_epi32(I32ToVec(v1), I32ToVec(v2)));\n}\n\nHEADER_INLINE uintptr_t vecw_extract64_0(VecW vv) {\n  return _mm_extract_epi64(WToVec(vv), 0);\n}\n\nHEADER_INLINE uintptr_t vecw_extract64_1(VecW vv) {\n  return _mm_extract_epi64(WToVec(vv), 1);\n}\n\nHEADER_INLINE VecW vecw_blendv(VecW aa, VecW bb, VecW mask) {\n  return VecToW(_mm_blendv_epi8(WToVec(aa), WToVec(bb), WToVec(mask)));\n}\n\nHEADER_INLINE VecU32 vecu32_blendv(VecU32 aa, VecU32 bb, VecU32 mask) {\n  return VecToU32(_mm_blendv_epi8(U32ToVec(aa), U32ToVec(bb), U32ToVec(mask)));\n}\n\nHEADER_INLINE VecU16 vecu16_blendv(VecU16 aa, VecU16 bb, VecU16 mask) {\n  return VecToU16(_mm_blendv_epi8(U16ToVec(aa), U16ToVec(bb), U16ToVec(mask)));\n}\n\nHEADER_INLINE VecUc vecuc_blendv(VecUc aa, VecUc bb, VecUc mask) {\n  return VecToUc(_mm_blendv_epi8(UcToVec(aa), UcToVec(bb), UcToVec(mask)));\n}\n#    else // USE_SSE2, !USE_SSE42\nHEADER_INLINE uintptr_t vecw_extract64_0(VecW vv) {\n  return R_CAST(uintptr_t, _mm_movepi64_pi64(WToVec(vv)));\n}\n\n// compiler recognizes this on ARMv8\nHEADER_INLINE uintptr_t vecw_extract64_1(VecW vv) {\n  const __m128i v0 = _mm_srli_si128(WToVec(vv), 8);\n  return R_CAST(uintptr_t, _mm_movepi64_pi64(v0));\n}\n\n// N.B. we do *not* enforce the low bits of each mask byte matching the high\n// bit.\nHEADER_INLINE VecW vecw_blendv(VecW aa, VecW bb, VecW mask) {\n  return vecw_and_notfirst(mask, aa) | (mask & bb);\n}\n\nHEADER_INLINE VecU32 vecu32_blendv(VecU32 aa, VecU32 bb, VecU32 mask) {\n  return vecu32_and_notfirst(mask, aa) | (mask & bb);\n}\n\nHEADER_INLINE VecU16 vecu16_blendv(VecU16 aa, VecU16 bb, VecU16 mask) {\n  return vecu16_and_notfirst(mask, aa) | (mask & bb);\n}\n\nHEADER_INLINE VecUc vecuc_blendv(VecUc aa, VecUc bb, VecUc mask) {\n  return vecuc_and_notfirst(mask, aa) | (mask & bb);\n}\n#    endif\n\nHEADER_INLINE VecI16 veci16_max(VecI16 v1, VecI16 v2) {\n  return VecToI16(_mm_max_epi16(I16ToVec(v1), I16ToVec(v2)));\n}\n\nHEADER_INLINE VecW vecw_sad(VecW v1, VecW v2) {\n  return VecToW(_mm_sad_epu8(WToVec(v1), WToVec(v2)));\n}\n\nHEADER_INLINE VecUc vecuc_adds(VecUc v1, VecUc v2) {\n  return VecToUc(_mm_adds_epu8(UcToVec(v1), UcToVec(v2)));\n}\n\nHEADER_INLINE VecU16 vecu16_min8(VecU16 v1, VecU16 v2) {\n  return VecToU16(_mm_min_epu8(U16ToVec(v1), U16ToVec(v2)));\n}\n\nHEADER_INLINE VecUc vecuc_min(VecUc v1, VecUc v2) {\n  return VecToUc(_mm_min_epu8(UcToVec(v1), UcToVec(v2)));\n}\n#  endif  // USE_SSE2, !USE_AVX2\n\nHEADER_INLINE void vecw_storeu(void* mem_addr, VecW vv) {\n  vec_storeu(mem_addr, WToVec(vv));\n}\n\nHEADER_INLINE void vecu32_storeu(void* mem_addr, VecU32 vv) {\n  vec_storeu(mem_addr, U32ToVec(vv));\n}\n\nHEADER_INLINE void veci32_storeu(void* mem_addr, VecI32 vv) {\n  vec_storeu(mem_addr, I32ToVec(vv));\n}\n\nHEADER_INLINE void vecu16_storeu(void* mem_addr, VecU16 vv) {\n  vec_storeu(mem_addr, U16ToVec(vv));\n}\n\nHEADER_INLINE void veci16_storeu(void* mem_addr, VecI16 vv) {\n  vec_storeu(mem_addr, I16ToVec(vv));\n}\n\nHEADER_INLINE void vecuc_storeu(void* mem_addr, VecUc vv) {\n  vec_storeu(mem_addr, UcToVec(vv));\n}\n\nHEADER_INLINE VecW vecw_bytesum(VecW src, VecW m0) {\n  return vecw_sad(src, m0);\n}\n\nCONSTI32(kVec8thUintPerWord, sizeof(intptr_t) / sizeof(Vec8thUint));\n\n#  ifdef FVEC_32\n\n#    ifndef __FMA__\n#      error \"32-byte-float-vector builds require FMA3 as well.\"\n#    endif\n\nCONSTI32(kBytesPerFVec, 32);\nCONSTI32(kBytesPerDVec, 32);\ntypedef float VecF __attribute__ ((vector_size (32)));\ntypedef double VecD __attribute__ ((vector_size (32)));\n\n#    define VCONST_F(xx) {xx, xx, xx, xx, xx, xx, xx, xx}\n#    define VCONST_D(xx) {xx, xx, xx, xx}\n\nHEADER_INLINE VecF VecToF(__m256 xxv) {\n  return R_CAST(VecF, xxv);\n}\n\nHEADER_INLINE VecD VecToD(__m256d xxv) {\n  return R_CAST(VecD, xxv);\n}\n\nHEADER_INLINE __m256 FToVec(VecF xxv) {\n  return R_CAST(__m256, xxv);\n}\n\nHEADER_INLINE __m256d DToVec(VecD xxv) {\n  return R_CAST(__m256d, xxv);\n}\n\nHEADER_INLINE VecF vecf_setzero() {\n  return VecToF(_mm256_setzero_ps());\n}\n\nHEADER_INLINE VecD vecd_setzero() {\n  return VecToD(_mm256_setzero_pd());\n}\n\n#  else  // !FVEC_32\n\nCONSTI32(kBytesPerFVec, 16);\nCONSTI32(kBytesPerDVec, 16);\ntypedef float VecF __attribute__ ((vector_size (16)));\ntypedef double VecD __attribute__ ((vector_size (16)));\n\n#    define VCONST_F(xx) {xx, xx, xx, xx}\n#    define VCONST_D(xx) {xx, xx}\n\nHEADER_INLINE VecF VecToF(__m128 xxv) {\n  return R_CAST(VecF, xxv);\n}\n\nHEADER_INLINE VecD VecToD(__m128d xxv) {\n  return R_CAST(VecD, xxv);\n}\n\nHEADER_INLINE __m128 FToVec(VecF xxv) {\n  return R_CAST(__m128, xxv);\n}\n\nHEADER_INLINE __m128d DToVec(VecD xxv) {\n  return R_CAST(__m128d, xxv);\n}\n\nHEADER_INLINE VecF vecf_setzero() {\n  return VecToF(_mm_setzero_ps());\n}\n\nHEADER_INLINE VecD vecd_setzero() {\n  return VecToD(_mm_setzero_pd());\n}\n\n#  endif  // !FVEC_32\n\nHEADER_INLINE VecUc VecWToUc(VecW vv) {\n  return R_CAST(VecUc, vv);\n}\n\nHEADER_INLINE VecW VecU16ToW(VecU16 vv) {\n  return R_CAST(VecW, vv);\n}\n\nHEADER_INLINE VecW VecUcToW(VecUc vv) {\n  return R_CAST(VecW, vv);\n}\n\nHEADER_INLINE void vecw_lo_and_hi_nybbles(VecW cur_vec, VecW m4, VecW* vec_lo_ptr, VecW* vec_hi_ptr) {\n  // Assumes m4 is VCONST_W(kMask0F0F).\n  // Returned vec_lo and vec_hi have top nybble of each byte zeroed out.\n  cur_vec = vecw_permute0xd8_if_avx2(cur_vec);\n  // AVX2:\n  //   vec_even contains {0, 2, 4, ..., 14, 32, 34, ..., 46,\n  //                      16, 18, ..., 30, 48, ... 62}\n  //   vec_odd contains {1, 3, 5, ..., 15, 33, 35, ..., 47,\n  //                     17, 19, ..., 31, 49, ..., 63}\n  // SSE2:\n  //   vec_even contains {0, 2, 4, ..., 30}\n  //   vec_odd contains {1, 3, 5, ..., 31}\n  const VecW vec_even = cur_vec & m4;\n  const VecW vec_odd = vecw_srli(cur_vec, 4) & m4;\n\n  // AVX2:\n  //   vec_lo contains {0, 1, 2, ..., 31}\n  //   vec_hi contains {32, 33, 34, ..., 63}\n  // SSE2:\n  //   vec_lo contains {0, 1, 2, ..., 15}\n  //   vec_hi contains {16, 17, 18, ..., 31}\n  *vec_lo_ptr = vecw_unpacklo8(vec_even, vec_odd);\n  *vec_hi_ptr = vecw_unpackhi8(vec_even, vec_odd);\n}\n#else  // !USE_SSE2\n#  ifdef __LP64__\nCONSTI32(kBytesPerVec, 8);\n#  else\nCONSTI32(kBytesPerVec, 4);\n#  endif\nCONSTI32(kBytesPerFVec, 4);\nCONSTI32(kBytesPerDVec, 8);\n\ntypedef uintptr_t VecW;\ntypedef uint32_t VecU32;\ntypedef float VecF;\ntypedef double VecD;\n// VecI16 and VecI8 aren't worth the trouble of scaling down to 32-bit\n\n#  define VCONST_W(xx) (xx)\n\nHEADER_INLINE VecW vecw_setzero() {\n  return k0LU;\n}\n\nHEADER_INLINE VecW vecw_srli(VecW vv, uint32_t ct) {\n  return vv >> ct;\n}\n\nHEADER_INLINE VecW vecw_slli(VecW vv, uint32_t ct) {\n  return vv << ct;\n}\n\nHEADER_INLINE VecW vecw_set1(uintptr_t ulii) {\n  return ulii;\n}\n\nHEADER_INLINE VecW vecw_loadu(const void* mem_addr) {\n  return *S_CAST(const VecW*, mem_addr);\n}\n\n#  ifdef __LP64__\nHEADER_INLINE VecW vecw_bytesum(VecW src, __maybe_unused VecW m0) {\n  src = (src & 0x00ff00ff00ff00ffLLU) + ((src >> 8) & 0x00ff00ff00ff00ffLLU);\n  return (src * 0x1000100010001LLU) >> 48;\n}\n#  else\nHEADER_INLINE VecW vecw_bytesum(VecW src, __maybe_unused VecW m0) {\n  src = (src & 0x00ff00ff) + ((src >> 8) & 0x00ff00ff);\n  return (src & 0xffff) + (src >> 16);\n}\n#  endif\n\nHEADER_INLINE VecW vecw_and_notfirst(VecW excl, VecW main) {\n  return (~excl) & main;\n}\n\nHEADER_INLINE VecU32 vecu32_and_notfirst(VecU32 excl, VecU32 main) {\n  return (~excl) & main;\n}\n#endif  // !USE_SSE2\n\nHEADER_INLINE uint32_t* I32ToU32(int32_t* pp) {\n  return R_CAST(uint32_t*, pp);\n}\n\n// Unfortunately, we need to spell out S_CAST(uintptr_t, 0) instead of just\n// typing k0LU in C99.\nstatic const uintptr_t kMask5555 = (~S_CAST(uintptr_t, 0)) / 3;\nstatic const uintptr_t kMaskAAAA = ((~S_CAST(uintptr_t, 0)) / 3) * 2;\nstatic const uintptr_t kMask3333 = (~S_CAST(uintptr_t, 0)) / 5;\nstatic const uintptr_t kMask1111 = (~S_CAST(uintptr_t, 0)) / 15;\nstatic const uintptr_t kMask0F0F = (~S_CAST(uintptr_t, 0)) / 17;\nstatic const uintptr_t kMask0101 = (~S_CAST(uintptr_t, 0)) / 255;\nstatic const uintptr_t kMask00FF = (~S_CAST(uintptr_t, 0)) / 257;\nstatic const uintptr_t kMask0001 = (~S_CAST(uintptr_t, 0)) / 65535;\nstatic const uintptr_t kMask0000FFFF = (~S_CAST(uintptr_t, 0)) / 65537;\nstatic const uintptr_t kMask00000001 = (~S_CAST(uintptr_t, 0)) / 4294967295U;\n\nstatic const uintptr_t kMask000000FF = (~S_CAST(uintptr_t, 0)) / 16843009;\nstatic const uintptr_t kMask000F = (~S_CAST(uintptr_t, 0)) / 4369;\nstatic const uintptr_t kMask0303 = (~S_CAST(uintptr_t, 0)) / 85;\n\nCONSTI32(kBitsPerVec, kBytesPerVec * CHAR_BIT);\n\n// We now use Knuth's Nyp/Nybble vocabulary for 2-bit and 4-bit elements,\n// respectively.\nCONSTI32(kNypsPerVec, kBytesPerVec * 4);\nCONSTI32(kNybblesPerVec, kBytesPerVec * 2);\n\nCONSTI32(kBitsPerWordD2, kBitsPerWord / 2);\nCONSTI32(kBitsPerWordD4, kBitsPerWord / 4);\n\n// number of bytes in a word\nCONSTI32(kBytesPerWord, kBitsPerWord / CHAR_BIT);\n\nCONSTI32(kInt16PerWord, kBytesPerWord / 2);\n\nstatic_assert(CHAR_BIT == 8, \"plink2_base requires CHAR_BIT == 8.\");\nstatic_assert(sizeof(int8_t) == 1, \"plink2_base requires sizeof(int8_t) == 1.\");\nstatic_assert(sizeof(int16_t) == 2, \"plink2_base requires sizeof(int16_t) == 2.\");\nstatic_assert(sizeof(int32_t) == 4, \"plink2_base requires sizeof(int32_t) == 4.\");\nstatic_assert(sizeof(int) >= 4, \"plink2_base requires sizeof(int) >= 4.\");\nstatic_assert(sizeof(intptr_t) == kBytesPerWord, \"plink2_base requires sizeof(intptr_t) == kBytesPerWord.\");\nstatic_assert(sizeof(int64_t) == 8, \"plink2_base requires sizeof(int64_t) == 8.\");\n\nCONSTI32(kWordsPerVec, kBytesPerVec / kBytesPerWord);\nCONSTI32(kInt32PerVec, kBytesPerVec / 4);\nCONSTI32(kInt16PerVec, kBytesPerVec / 2);\n\nCONSTI32(kFloatPerFVec, kBytesPerFVec / 4);\nCONSTI32(kDoublePerDVec, kBytesPerDVec / 8);\n\n#if defined(__APPLE__) && defined(__LP64__) && !defined(__x86_64__)\n// TODO: make this 128 once that stops breaking code\n#  define CACHELINE128\nCONSTI32(kCacheline, 128);\n#else\n#  define CACHELINE64\nCONSTI32(kCacheline, 64);\n#endif\n\nCONSTI32(kBitsPerCacheline, kCacheline * CHAR_BIT);\nCONSTI32(kNypsPerCacheline, kCacheline * 4);\nCONSTI32(kInt16PerCacheline, kCacheline / sizeof(int16_t));\nCONSTI32(kInt32PerCacheline, kCacheline / sizeof(int32_t));\nCONSTI32(kInt64PerCacheline, kCacheline / sizeof(int64_t));\nCONSTI32(kWordsPerCacheline, kCacheline / kBytesPerWord);\nCONSTI32(kDoublesPerCacheline, kCacheline / sizeof(double));\nCONSTI32(kVecsPerCacheline, kCacheline / kBytesPerVec);\n\n// could use ioctl, etc. to dynamically determine this later, and pass it as a\n// parameter to e.g. PgfiMultiread()\nCONSTI32(kDiskBlockSize, 4096);\n\nCONSTI32(kPglFwriteBlockSize, 131072);\n\n// unsafe to fread or fwrite more bytes than this on e.g. OS X\nCONSTI32(kMaxBytesPerIO, 0x7ffff000);\n\n// Maximum size of \"dynamically\" allocated line load buffer.  (This is the\n// limit that applies to .vcf and similar files.)  Inconvenient to go higher\n// since fgets() takes a int32_t size argument.\n#if defined(__APPLE__) && defined(__LP64__) && !defined(__x86_64__)\nCONSTI32(kMaxLongLine, 0x7fffff80);\n#else\nCONSTI32(kMaxLongLine, 0x7fffffc0);\n#endif\nstatic_assert(!(kMaxLongLine % kCacheline), \"kMaxLongLine must be a multiple of kCacheline.\");\n\n#ifdef __APPLE__\n// OS X is limited to 256?\nCONSTI32(kMaxOpenFiles, 252);\n#else\n// Can't assume more than 512 are allowed on Windows, with current compilation\n// settings.\nCONSTI32(kMaxOpenFiles, 504);\n#endif\n\n// note that this is NOT foolproof: see e.g.\n// http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html .  (This\n// is why I haven't bothered with OS-based #ifdefs here.)  But it should be\n// good enough in practice.  And PATH_MAX itself is still relevant due to use\n// of realpath().\nCONSTI32(kPglFnamesize, 4096);\n#if defined(PATH_MAX) && !defined(_WIN32)\nstatic_assert(kPglFnamesize >= PATH_MAX, \"plink2_base assumes PATH_MAX <= 4096.  (Safe to increase kPglFnamesize to address this, up to 131072.)\");\n#endif\n\n// safe errstr_buf size for PgenInitPhase{1,2}(), PgrValidate(),\n// BitmapReaderInitPhase{1,2}()\nCONSTI32(kPglErrstrBufBlen, kPglFnamesize + 256);\n\n// shared between .pgen and plink-bitmap formats\n// currently must be power of 2, and multiple of (kBitsPerWord / 2)\nCONSTI32(kPglDifflistGroupSize, 64);\n\n// debug\nHEADER_INLINE void PrintVec(const void* vv) {\n  const unsigned char* vv_alias = S_CAST(const unsigned char*, vv);\n  for (uint32_t uii = 0; uii != kBytesPerVec; ++uii) {\n    printf(\"%u \", vv_alias[uii]);\n  }\n  printf(\"\\n\");\n}\n\nHEADER_INLINE void PrintVecD(const VecD* vv_ptr, const char* preprint) {\n  fputs(preprint, stdout);\n  const double* vv_alias = R_CAST(const double*, vv_ptr);\n  for (uint32_t uii = 0; uii != kDoublePerDVec; ++uii) {\n    printf(\" %g\", vv_alias[uii]);\n  }\n  fputs(\"\\n\", stdout);\n}\n\n#if __cplusplus >= 201103L\n// Main application of std::array in this codebase is enforcing length when\n// passing references between functions.  Conversely, if the array type has\n// different lengths in different functions (e.g. col_skips[]/col_types[]), we\n// actively want to avoid &arr[0] clutter.\n// When neither applies, it doesn't really matter whether we use this or not;\n// I normally won't use it unless it plausibly makes sense to pass\n// fixed-length-array-references in the future.\n#  define STD_ARRAY_DECL(tt, nn, vv) std::array<tt, nn> vv\n#  define STD_ARRAY_REF(tt, nn) std::array<tt, nn>&\n\n// necessary if tt is a pointer type, otherwise optional\n#  define STD_ARRAY_KREF(tt, nn) const std::array<tt, nn>&\n\n#  define STD_ARRAY_COPY(src, nn, dst) static_assert(sizeof(dst) == sizeof((dst)[0]) * nn, \"Invalid STD_ARRAY_COPY() invocation.\"); (dst) = (src)\n\n#  define STD_ARRAY_PTR_TYPE(tt, nn) std::array<tt, nn>*\n#  define STD_ARRAY_PTR_DECL(tt, nn, vv) std::array<tt, nn>* vv\n\n// argh, need double-braces for C++11 std::array and single-braces for C\n#  define STD_ARRAY_INIT_START() {\n#  define STD_ARRAY_INIT_END() }\n\ntemplate <class T, std::size_t N> void STD_ARRAY_FILL0(std::array<T, N>& arr) {\n  arr.fill(0);\n}\n\n// plain STD_ARRAY_FILL0() can't be used on array-references due to fallback\n// code.\n// this macro ensures that we *only* use it with uint32_t array-references\n#  define STD_ARRAY_REF_FILL0(ct, aref) static_assert(ct * sizeof(aref[0]) == sizeof(aref), \"invalid STD_ARRAY_REF_FILL0() invocation\"); aref.fill(0)\n\n#  define NONCOPYABLE(struct_name) \\\n  struct_name() = default; \\\n  struct_name(const struct_name&) = delete; \\\n  struct_name& operator=(const struct_name&) = delete\n\n#  define MOVABLE_BUT_NONCOPYABLE(struct_name) \\\n  struct_name() = default; \\\n  struct_name(const struct_name&) = delete; \\\n  struct_name& operator=(const struct_name&) = delete; \\\n  struct_name(struct_name&&) = default; \\\n  struct_name& operator=(struct_name&&) = default\n\n#else\n#  define STD_ARRAY_DECL(tt, nn, vv) tt vv[nn]\n#  define STD_ARRAY_REF(tt, nn) tt* const\n#  define STD_ARRAY_KREF(tt, nn) tt const* const\n#  define STD_ARRAY_COPY(src, nn, dst) memcpy(dst, src, nn * sizeof(dst[0]));\n#  define STD_ARRAY_PTR_TYPE(tt, nn) tt(*)[nn]\n#  define STD_ARRAY_PTR_DECL(tt, nn, vv) tt(*vv)[nn]\n#  define STD_ARRAY_INIT_START()\n#  define STD_ARRAY_INIT_END()\n#  define STD_ARRAY_FILL0(arr) memset(arr, 0, sizeof(arr))\n#  define STD_ARRAY_REF_FILL0(ct, aref) memset(aref, 0, ct * sizeof(*aref))\n\n#  define NONCOPYABLE(struct_name)\n#  define MOVABLE_BUT_NONCOPYABLE(struct_name)\n#endif\n\ntypedef union {\n  VecW vw;\n\n  STD_ARRAY_DECL(uintptr_t, kWordsPerVec, w);\n\n  STD_ARRAY_DECL(uint32_t, kInt32PerVec, u32);\n} UniVec;\n\ntypedef union {\n  VecF vf;\n  STD_ARRAY_DECL(float, kFloatPerFVec, f4);\n} UniVecF;\n\ntypedef union {\n  VecD vd;\n  STD_ARRAY_DECL(double, kDoublePerDVec, d8);\n} UniVecD;\n\n// sum must fit in 16 bits\nHEADER_INLINE uintptr_t UniVecHsum16(UniVec uv) {\n#ifdef __LP64__\n#  ifdef USE_AVX2\n  return ((uv.w[0] + uv.w[1] + uv.w[2] + uv.w[3]) * kMask0001) >> 48;\n#  else\n  return ((uv.w[0] + uv.w[1]) * kMask0001) >> 48;\n#  endif\n#else\n  return (uv.w[0] * kMask0001) >> 16;\n#endif\n}\n\n// sum must fit in 32 bits\nHEADER_INLINE uintptr_t UniVecHsum32(UniVec uv) {\n#ifdef __LP64__\n#  ifdef USE_AVX2\n  return ((uv.w[0] + uv.w[1] + uv.w[2] + uv.w[3]) * kMask00000001) >> 32;\n#  else\n  return ((uv.w[0] + uv.w[1]) * kMask00000001) >> 32;\n#  endif\n#else\n  return uv.w[0];\n#endif\n}\n\nHEADER_INLINE float VecFHsum(VecF vecf) {\n  UniVecF uvf;\n  uvf.vf = vecf;\n#ifdef __LP64__\n#  ifdef FVEC_32\n  // tested various uses of _mm256_hadd_ps, couldn't get them to be faster\n  return uvf.f4[0] + uvf.f4[1] + uvf.f4[2] + uvf.f4[3] + uvf.f4[4] + uvf.f4[5] + uvf.f4[6] + uvf.f4[7];\n#  else\n  return uvf.f4[0] + uvf.f4[1] + uvf.f4[2] + uvf.f4[3];\n#  endif\n#else\n  return uvf.f4[0];\n#endif\n}\n\nHEADER_INLINE double VecDHsum(VecD vecd) {\n  UniVecD uvd;\n  uvd.vd = vecd;\n#ifdef __LP64__\n#  ifdef FVEC_32\n  return uvd.d8[0] + uvd.d8[1] + uvd.d8[2] + uvd.d8[3];\n#  else\n  return uvd.d8[0] + uvd.d8[1];\n#  endif\n#else\n  return uvd.d8[0];\n#endif\n}\n\n#ifdef USE_AVX2\nHEADER_INLINE uintptr_t UnpackHalfwordToWord(uintptr_t hw) {\n  return _pdep_u64(hw, kMask5555);\n}\n\nHEADER_INLINE uintptr_t UnpackHalfwordToWordShift1(uintptr_t hw) {\n  return _pdep_u64(hw, kMaskAAAA);\n}\n\nHEADER_INLINE Vec4thUint UnpackVec8thUintTo4th(Vec8thUint hw) {\n  return _pdep_u64(hw, kMask5555);\n}\n\nHEADER_INLINE Halfword PackWordToHalfword(uintptr_t ww) {\n  return _pext_u64(ww, kMask5555);\n}\n\nHEADER_INLINE Halfword PackWordToHalfwordMask5555(uintptr_t ww) {\n  return _pext_u64(ww, kMask5555);\n}\n\nHEADER_INLINE Halfword PackWordToHalfwordMaskAAAA(uintptr_t ww) {\n  return _pext_u64(ww, kMaskAAAA);\n}\n\nHEADER_INLINE uintptr_t PackTwo5555Mask(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = PackWordToHalfwordMask5555(lo);\n  const uintptr_t hi_packed = _pext_u64(hi, kMask5555);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\nHEADER_INLINE Vec8thUint PackVec4thUintTo8th(Vec4thUint ww) {\n  return _pext_u64(ww, kMask5555);\n}\n\nHEADER_INLINE Vec16thUint PackVec8thUintTo16th(Vec8thUint ww) {\n  return _pext_u64(ww, kMask5555);\n}\n\nHEADER_INLINE Halfword Pack3333(uintptr_t ww) {\n  return _pext_u64(ww, kMask3333);\n}\n\nHEADER_INLINE Halfword Pack3333Mask(uintptr_t ww) {\n  return _pext_u64(ww, kMask3333);\n}\n\nHEADER_INLINE uintptr_t PackTwo3333Mask(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = Pack3333Mask(lo);\n  const uintptr_t hi_packed = _pext_u64(hi, kMask3333);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\nHEADER_INLINE uintptr_t Unpack0F0F(uintptr_t hw) {\n  return _pdep_u64(hw, kMask0F0F);\n}\n\nHEADER_INLINE Halfword Pack0F0F(uintptr_t ww) {\n  return _pext_u64(ww, kMask0F0F);\n}\n\nHEADER_INLINE Halfword Pack0F0FMask(uintptr_t ww) {\n  return _pext_u64(ww, kMask0F0F);\n}\n\nHEADER_INLINE uintptr_t PackTwo0F0F(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = _pext_u64(lo, kMask0F0F);\n  const uintptr_t hi_packed = _pext_u64(hi, kMask0F0F);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\nHEADER_INLINE uintptr_t PackTwo0F0FMask(uintptr_t lo, uintptr_t hi) {\n  return PackTwo0F0F(lo, hi);\n}\n\nHEADER_INLINE uintptr_t Unpack0303(uintptr_t qw) {\n  return _pdep_u64(qw, kMask0303);\n}\n\nHEADER_INLINE Quarterword Pack0303(uintptr_t ww) {\n  return _pext_u64(ww, kMask0303);\n}\n\nHEADER_INLINE Quarterword Pack0303Mask(uintptr_t ww) {\n  return _pext_u64(ww, kMask0303);\n}\n\n// See https://stackoverflow.com/questions/21622212/how-to-perform-the-inverse-of-mm256-movemask-epi8-vpmovmskb .\nHEADER_INLINE VecUc InverseMovemaskFF(Vec8thUint mask) {\n  __m256i vmask = _mm256_set1_epi32(mask);\n  const __m256i byte_gather = _mm256_setr_epi64x(0, kMask0101, 2 * kMask0101, 3 * kMask0101);\n  vmask = _mm256_shuffle_epi8(vmask, byte_gather);\n  const __m256i bit_mask = _mm256_set1_epi64x(0x7fbfdfeff7fbfdfeLL);\n  vmask = _mm256_or_si256(vmask, bit_mask);\n  return R_CAST(VecUc, _mm256_cmpeq_epi8(vmask, _mm256_set1_epi64x(-1)));\n}\n\n// If we're only interested in the even bits of mask.  No need to mask out odd\n// bits before calling.\nHEADER_INLINE VecUc InverseMovespreadmaskFF(Vec4thUint mask) {\n  __m256i vmask = _mm256_set1_epi64x(mask);\n  const __m256i byte_gather = _mm256_setr_epi32(0, 0x01010101, 0x02020202, 0x03030303, 0x04040404, 0x05050505, 0x06060606, 0x07070707);\n  vmask = _mm256_shuffle_epi8(vmask, byte_gather);\n  const __m256i bit_mask = _mm256_set1_epi32(0xbfeffbfeU);\n  vmask = _mm256_or_si256(vmask, bit_mask);\n  return R_CAST(VecUc, _mm256_cmpeq_epi8(vmask, _mm256_set1_epi64x(-1)));\n}\n\n#else  // !USE_AVX2\nHEADER_INLINE uintptr_t UnpackHalfwordToWord(uintptr_t hw) {\n#  ifdef __LP64__\n  hw = (hw | (hw << 16)) & kMask0000FFFF;\n#  endif\n  hw = (hw | (hw << 8)) & kMask00FF;\n  hw = (hw | (hw << 4)) & kMask0F0F;\n  hw = (hw | (hw << 2)) & kMask3333;\n  return ((hw | (hw << 1)) & kMask5555);\n}\n\nHEADER_INLINE uintptr_t UnpackHalfwordToWordShift1(uintptr_t hw) {\n  return UnpackHalfwordToWord(hw) << 1;\n}\n\nHEADER_INLINE Halfword PackWordToHalfword(uintptr_t ww) {\n  // assumes only even bits of ww can be set\n  ww = (ww | (ww >> 1)) & kMask3333;\n  ww = (ww | (ww >> 2)) & kMask0F0F;\n  ww = (ww | (ww >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  ww = (ww | (ww >> 8)) & kMask0000FFFF;\n#  endif\n  return S_CAST(Halfword, ww | (ww >> kBitsPerWordD4));\n}\n\nHEADER_INLINE Halfword PackWordToHalfwordMask5555(uintptr_t ww) {\n  return PackWordToHalfword(ww & kMask5555);\n}\n\nHEADER_INLINE Halfword PackWordToHalfwordMaskAAAA(uintptr_t ww) {\n  return PackWordToHalfword((ww >> 1) & kMask5555);\n}\n\nHEADER_INLINE uintptr_t PackTwo5555Mask(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = PackWordToHalfwordMask5555(lo);\n\n  // Avoid Halfword cast here.\n  hi = hi & kMask5555;\n  hi = (hi | (hi >> 1)) & kMask3333;\n  hi = (hi | (hi >> 2)) & kMask0F0F;\n  hi = (hi | (hi >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  hi = (hi | (hi >> 8)) & kMask0000FFFF;\n#  endif\n  const uintptr_t hi_packed = hi | (hi >> kBitsPerWordD4);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\nHEADER_INLINE Halfword Pack3333(uintptr_t ww) {\n  ww = (ww | (ww >> 2)) & kMask0F0F;\n  ww = (ww | (ww >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  ww = (ww | (ww >> 8)) & kMask0000FFFF;\n#  endif\n  return S_CAST(Halfword, ww | (ww >> kBitsPerWordD4));\n}\n\nHEADER_INLINE Halfword Pack3333Mask(uintptr_t ww) {\n  return Pack3333(ww & kMask3333);\n}\n\nHEADER_INLINE uintptr_t PackTwo3333Mask(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = Pack3333Mask(lo);\n\n  // Avoid Halfword cast here.\n  hi = hi & kMask3333;\n  hi = (hi | (hi >> 2)) & kMask0F0F;\n  hi = (hi | (hi >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  hi = (hi | (hi >> 8)) & kMask0000FFFF;\n#  endif\n  const uintptr_t hi_packed = hi | (hi >> kBitsPerWordD4);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\nHEADER_INLINE uintptr_t Unpack0F0F(uintptr_t hw) {\n#  ifdef __LP64__\n  hw = (hw | (hw << 16)) & kMask0000FFFF;\n#  endif\n  hw = (hw | (hw << 8)) & kMask00FF;\n  return ((hw | (hw << 4)) & kMask0F0F);\n}\n\nHEADER_INLINE Halfword Pack0F0F(uintptr_t ww) {\n  ww = (ww | (ww >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  ww = (ww | (ww >> 8)) & kMask0000FFFF;\n#  endif\n  return S_CAST(Halfword, ww | (ww >> kBitsPerWordD4));\n}\n\nHEADER_INLINE Halfword Pack0F0FMask(uintptr_t ww) {\n  return Pack0F0F(ww & kMask0F0F);\n}\n\nHEADER_INLINE uintptr_t PackTwo0F0F(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = Pack0F0F(lo);\n\n  // Avoid Halfword cast here.\n  hi = (hi | (hi >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  hi = (hi | (hi >> 8)) & kMask0000FFFF;\n#  endif\n  const uintptr_t hi_packed = hi | (hi >> kBitsPerWordD4);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\nHEADER_INLINE uintptr_t PackTwo0F0FMask(uintptr_t lo, uintptr_t hi) {\n  const Halfword lo_packed = Pack0F0FMask(lo);\n\n  // Avoid Halfword cast here.\n  hi = hi & kMask0F0F;\n  hi = (hi | (hi >> 4)) & kMask00FF;\n#  ifdef __LP64__\n  hi = (hi | (hi >> 8)) & kMask0000FFFF;\n#  endif\n  const uintptr_t hi_packed = hi | (hi >> kBitsPerWordD4);\n  return lo_packed | (hi_packed << kBitsPerWordD2);\n}\n\n\nHEADER_INLINE uintptr_t Unpack0303(uintptr_t qw) {\n  // ................................................fedcba9876543210\n#  ifdef __LP64__\n  qw = (qw | (qw << 24)) & kMask000000FF;\n  // ........................fedcba98........................76543210\n#  endif\n  qw = qw | (qw << 12);\n  // ............fedcba98....fedcba98............76543210....76543210\n\n  qw = qw | (qw << 6);\n  // ......fedcbaXXdcbaXXdcbaXXdcba98......765432XX5432XX5432XX543210\n\n  return (qw & kMask0303);\n  // ......fe......dc......ba......98......76......54......32......10\n}\n\nHEADER_INLINE Quarterword Pack0303(uintptr_t ww) {\n  // ......fe......dc......ba......98......76......54......32......10\n\n  ww = ww | (ww >> 6);\n  // ......fe....fedc....dcba....ba98....9876....7654....5432....3210\n\n  ww = ww | (ww >> 12);\n  // ......fe....fedc..fedcbafedcba98dcba9876ba9876549876543276543210\n\n#  ifdef __LP64__\n  ww = ww & kMask000000FF;\n  // ........................fedcba98........................76543210\n\n  return S_CAST(Quarterword, ww | (ww >> 24));\n#  else\n  return S_CAST(Quarterword, ww);\n#  endif\n}\n\nHEADER_INLINE uintptr_t Pack0303Mask(uintptr_t ww) {\n  return Pack0303(ww & kMask0303);\n}\n\n#  ifdef USE_SSE2\nHEADER_INLINE Vec4thUint UnpackVec8thUintTo4th(Vec8thUint hw) {\n  hw = (hw | (hw << 8)) & 0x00ff00ffU;\n  hw = (hw | (hw << 4)) & 0x0f0f0f0fU;\n  hw = (hw | (hw << 2)) & 0x33333333U;\n  return (hw | (hw << 1)) & 0x55555555U;\n}\n\nHEADER_INLINE Vec8thUint PackVec4thUintTo8th(Vec4thUint ww) {\n  ww = (ww | (ww >> 1)) & kMask3333;\n  ww = (ww | (ww >> 2)) & kMask0F0F;\n  ww = (ww | (ww >> 4)) & kMask00FF;\n  return S_CAST(Vec8thUint, ww | (ww >> 8));\n}\n\nHEADER_INLINE Vec16thUint PackVec8thUintTo16th(Vec8thUint ww) {\n  ww = (ww | (ww >> 1)) & 0x3333;\n  ww = (ww | (ww >> 2)) & 0x0f0f;\n  return S_CAST(Vec16thUint, ww | (ww >> 4));\n}\n\n#    ifdef USE_SSE42\nHEADER_INLINE VecUc InverseMovemaskFF(Vec8thUint mask) {\n  __m128i vmask = _mm_set1_epi16(mask);\n  const __m128i byte_gather = _mm_setr_epi32(0, 0, 0x01010101, 0x01010101);\n  vmask = _mm_shuffle_epi8(vmask, byte_gather);\n  const __m128i bit_mask = _mm_set1_epi64x(0x7fbfdfeff7fbfdfeLL);\n  vmask = _mm_or_si128(vmask, bit_mask);\n  return R_CAST(VecUc, _mm_cmpeq_epi8(vmask, _mm_set1_epi64x(-1)));\n}\n\nHEADER_INLINE VecUc InverseMovespreadmaskFF(Vec4thUint mask) {\n  __m128i vmask = _mm_set1_epi32(mask);\n  const __m128i byte_gather = _mm_setr_epi32(0, 0x01010101, 0x02020202, 0x03030303);\n  vmask = _mm_shuffle_epi8(vmask, byte_gather);\n  const __m128i bit_mask = _mm_set1_epi32(0xbfeffbfeU);\n  vmask = _mm_or_si128(vmask, bit_mask);\n  return R_CAST(VecUc, _mm_cmpeq_epi8(vmask, _mm_set1_epi64x(-1)));\n}\n#    endif\n\n#  endif // !USE_SSE2\n#endif  // !USE_AVX2\n\n// alignment must be a power of 2\n// tried splitting out RoundDownPow2U32() and RoundUpPow2U32() functions, no\n// practical difference\nHEADER_CINLINE uintptr_t RoundDownPow2(uintptr_t val, uintptr_t alignment) {\n  return val & (~(alignment - 1));\n}\n\nHEADER_CINLINE uint64_t RoundDownPow2U64(uint64_t val, uint64_t alignment) {\n  return val & (~(alignment - 1));\n}\n\nHEADER_CINLINE uintptr_t RoundUpPow2(uintptr_t val, uintptr_t alignment) {\n  return (val + alignment - 1) & (~(alignment - 1));\n}\n\nHEADER_CINLINE uint64_t RoundUpPow2U64(uint64_t val, uint64_t alignment) {\n  return (val + alignment - 1) & (~(alignment - 1));\n}\n\n\n// This is best when the divisor is constant (so (divisor - 1) can be\n// collapsed), and handles val == 0 properly.  If the divisor isn't constant\n// and val is guaranteed to be nonzero, go with explicit\n// \"1 + (val - 1) / divisor\".\n//\n// Note that this fails if (val + divisor - 1) overflows the widest integer\n// type on the left.\n//\n// Since forced-uint32_t RoundDownPow2 was pointless, it stands to reason that\n// the same applies to DivUp.  With that said, we may as well make divisor a\n// uint32_t just in case this ever gets used on a not-known-at-compile-time\n// divisor, since 64/64 can be slower than 64/32.\nHEADER_CINLINE uintptr_t DivUp(uintptr_t val, uint32_t divisor) {\n  return (val + divisor - 1) / divisor;\n}\n\nHEADER_CINLINE uint64_t DivUpU64(uint64_t val, uint32_t divisor) {\n  return (val + divisor - 1) / divisor;\n}\n\n// \"Nz\" means nonzero in two ways:\n// * result is in [1, modulus], not [0, modulus - 1]\n// * val should not be zero (though this expression still works if val is zero\n//   and modulus is a hardcoded power of 2)\nHEADER_INLINE uint32_t ModNz(uintptr_t val, uint32_t modulus) {\n  return (1 + ((val - 1) % modulus));\n}\n\n// No need for ModNzU64 in practice, since high bits don't affect result when\n// modulus is a power of 2.\n\n// Equivalent to (static_cast<int32_t>(uii) < 0).  Most frequently used on\n// possibly-error chromosome indexes.\nHEADER_INLINE uint32_t IsI32Neg(uint32_t uii) {\n  return uii >> 31;\n}\n\nHEADER_INLINE uint32_t abs_i32(int32_t ii) {\n  // Arithmetic right shift.  0xffffffffU when ii is negative, 0 otherwise.\n  const uint32_t neg_sign_bit = S_CAST(uint32_t, ii >> 31);\n\n  return (S_CAST(uint32_t, ii) ^ neg_sign_bit) - neg_sign_bit;\n}\n\nextern uint64_t g_failed_alloc_attempt_size;\n// with NDEBUG undefined, may want to define a bunch of macros so that line\n// number is printed as well; see e.g.\n//   https://stackoverflow.com/questions/15884793/how-to-get-the-name-or-file-and-line-of-caller-method\n\n#if (((__GNUC__ == 4) && (__GNUC_MINOR__ < 7)) || (__GNUC__ >= 11)) && !defined(__APPLE__)\n// putting this in the header file caused a bunch of gcc 4.4 strict-aliasing\n// warnings, while not doing so seems to inhibit some malloc-related compiler\n// optimizations, bleah\n// compromise: header-inline iff gcc version >= 4.7 (might not be the right\n// cutoff?)\n// update (18 Feb 2022): looks like inlined pgl_malloc is not compiled as\n// intended by gcc 11, due to new ipa-modref pass?  Open to suggestions on how\n// to fix this; maybe it's now necessary to define type-specific malloc\n// wrappers, ugh...\nBoolErr pgl_malloc(uintptr_t size, void* pp);\n#else\n// Unfortunately, defining the second parameter to be of type void** doesn't do\n// the right thing.\nHEADER_INLINE BoolErr pgl_malloc(uintptr_t size, void* pp) {\n  *S_CAST(unsigned char**, pp) = S_CAST(unsigned char*, malloc(size));\n  if (likely(*S_CAST(unsigned char**, pp))) {\n    return 0;\n  }\n  g_failed_alloc_attempt_size = size;\n  return 1;\n}\n#endif\n\n// This must be used for all fwrite() calls where len could be >= 2^31, since\n// OS X raw fwrite() doesn't work in that case.\nstatic_assert(sizeof(size_t) == sizeof(intptr_t), \"plink2_base assumes size_t and intptr_t are synonymous.\");\nBoolErr fwrite_checked(const void* buf, uintptr_t len, FILE* outfile);\n\nHEADER_INLINE IntErr putc_checked(int32_t ii, FILE* outfile) {\n  putc_unlocked(ii, outfile);\n  return ferror_unlocked(outfile);\n}\n\n// Only use this if loading < len bytes is not an error.\n// IntErr fread_checked2(void* buf, uintptr_t len, FILE* infile, uintptr_t* bytes_read_ptr);\n\nBoolErr fread_checked(void* buf, uintptr_t len, FILE* infile);\n\nHEADER_INLINE BoolErr fclose_null(FILE** fptr_ptr) {\n  int32_t ii = ferror_unlocked(*fptr_ptr);\n  int32_t jj = fclose(*fptr_ptr);\n  *fptr_ptr = nullptr;\n  return ii || jj;\n}\n\n\n#ifdef __LP64__\n// Reads an integer in [1, cap].\n// * Errors out unless first character is a digit, or is '+' followed by a\n//   digit.  Initial whitespace is not permitted.\n// * Like atoi(), this considereds the number to be terminated by *any*\n//   nondigit character.  E.g. \"1000genomes\" is treated as a valid instance of\n//   1000 rather than a nonnumeric token, and \"98.6\" is treated as 98.  (See\n//   ScanmovPosintCapped(), ScanmovUintCapped(), etc. in plink2_string if\n//   you want strtol-like semantics, where the pointer is moved.)\n// * Errors out on overflow.  This may be the biggest advantage over atoi().\nBoolErr ScanPosintCapped(const char* str_iter, uint64_t cap, uint32_t* valp);\n\n// [0, cap]\nBoolErr ScanUintCapped(const char* str_iter, uint64_t cap, uint32_t* valp);\n\n// [-bound, bound]\nBoolErr ScanIntAbsBounded(const char* str_iter, uint64_t bound, int32_t* valp);\n#else  // not __LP64__\n// Need to be more careful in 32-bit case due to overflow.\n// A funny-looking div_10/mod_10 interface is used since the cap will usually\n// be a constant, and we want the integer division/modulus to occur at compile\n// time.\nBoolErr ScanPosintCapped32(const char* str_iter, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);\n\nBoolErr ScanUintCapped32(const char* str_iter, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);\n\nBoolErr ScanIntAbsBounded32(const char* str_iter, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp);\n\nHEADER_INLINE BoolErr ScanPosintCapped(const char* str, uint32_t cap, uint32_t* valp) {\n  return ScanPosintCapped32(str, cap / 10, cap % 10, valp);\n}\n\nHEADER_INLINE BoolErr ScanUintCapped(const char* str, uint32_t cap, uint32_t* valp) {\n  return ScanUintCapped32(str, cap / 10, cap % 10, valp);\n}\n\nHEADER_INLINE BoolErr ScanIntAbsBounded(const char* str, uint32_t bound, int32_t* valp) {\n  return ScanIntAbsBounded32(str, bound / 10, bound % 10, valp);\n}\n#endif\n\n\n// intentionally rejects -2^31 for now\n// (that's a reason why this doesn't have the shorter name 'ScanI32')\nHEADER_INLINE BoolErr ScanInt32(const char* str, int32_t* valp) {\n  return ScanIntAbsBounded(str, 0x7fffffff, valp);\n}\n\n// default cap = 0x7ffffffe\nHEADER_INLINE BoolErr ScanPosintDefcap(const char* str, uint32_t* valp) {\n  return ScanPosintCapped(str, 0x7ffffffe, valp);\n}\n\nHEADER_INLINE BoolErr ScanUintDefcap(const char* str, uint32_t* valp) {\n  return ScanUintCapped(str, 0x7ffffffe, valp);\n}\n\nHEADER_INLINE BoolErr ScanIntAbsDefcap(const char* str, int32_t* valp) {\n  return ScanIntAbsBounded(str, 0x7ffffffe, valp);\n}\n\nHEADER_INLINE BoolErr ScanUintIcap(const char* str, uint32_t* valp) {\n  return ScanUintCapped(str, 0x7fffffff, valp);\n}\n\n\n// memcpya() tends to be used to copy known-length text strings, while\n// memseta() has more mixed usage but char* type is also at least as common as\n// unsigned char*; append comes up less when working with raw byte arrays.  So\n// give the shortest-name forms char* return types.\nHEADER_INLINE char* memseta(void* target, unsigned char val, uintptr_t ct) {\n  memset(target, val, ct);\n  return &(S_CAST(char*, target)[ct]);\n}\n\nHEADER_INLINE unsigned char* memsetua(void* target, unsigned char val, uintptr_t ct) {\n  memset(target, val, ct);\n  return &(S_CAST(unsigned char*, target)[ct]);\n}\n\nHEADER_CINLINE uintptr_t BitCtToVecCt(uintptr_t val) {\n  return DivUp(val, kBitsPerVec);\n}\n\nHEADER_CINLINE uintptr_t BitCtToWordCt(uintptr_t val) {\n  return DivUp(val, kBitsPerWord);\n}\n\nHEADER_CINLINE uintptr_t BitCtToAlignedWordCt(uintptr_t val) {\n  return kWordsPerVec * BitCtToVecCt(val);\n}\n\nHEADER_CINLINE uintptr_t BitCtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kBitsPerCacheline);\n}\n\nHEADER_CINLINE uintptr_t Int32CtToVecCt(uintptr_t val) {\n  return DivUp(val, kInt32PerVec);\n}\n\nHEADER_CINLINE uintptr_t Int32CtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kInt32PerCacheline);\n}\n\nHEADER_CINLINE uintptr_t WordCtToVecCt(uintptr_t val) {\n  return DivUp(val, kWordsPerVec);\n}\n\nHEADER_CINLINE uintptr_t WordCtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kWordsPerCacheline);\n}\n\nHEADER_CINLINE uint64_t WordCtToCachelineCtU64(uint64_t val) {\n  return DivUpU64(val, kWordsPerCacheline);\n}\n\n#ifdef __LP64__\nHEADER_CINLINE uintptr_t Int64CtToVecCt(uintptr_t val) {\n  return DivUp(val, kBytesPerVec / 8);\n}\n#else\nHEADER_CINLINE uintptr_t Int64CtToVecCt(uintptr_t val) {\n  return val * 2;\n}\n#endif\n\nHEADER_CINLINE uintptr_t Int64CtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kInt64PerCacheline);\n}\n\nHEADER_CINLINE uintptr_t DblCtToVecCt(uintptr_t val) {\n  return Int64CtToVecCt(val);\n}\n\nHEADER_CINLINE uintptr_t VecCtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kVecsPerCacheline);\n}\n\nHEADER_CINLINE uint64_t VecCtToCachelineCtU64(uint64_t val) {\n  return DivUpU64(val, kVecsPerCacheline);\n}\n\n// C++11 standard guarantees std::min and std::max return leftmost minimum in\n// case of equality; best to adhere to that\n// We don't actually use std::min/max since casting one argument when comparing\n// e.g. a uint32_t with a uintptr_t is pointlessly verbose.  Compiler will\n// still warn against comparison of signed with unsigned.\n#define MAXV(aa, bb) (((bb) > (aa))? (bb) : (aa))\n#define MINV(aa, bb) (((bb) < (aa))? (bb) : (aa))\n\n\n// don't use PglErr here since there's only one failure mode, it's\n// obvious what it is, and stacking multiple aligned_mallocs in a single\n// if-statement is useful.\nBoolErr aligned_malloc(uintptr_t size, uintptr_t alignment, void* aligned_pp);\n\n#ifdef USE_SSE42\nHEADER_CINLINE uint32_t NypsumWord(uintptr_t val) {\n  return __builtin_popcountll(val) + __builtin_popcountll(val & kMaskAAAA);\n}\n#else\nHEADER_CINLINE2 uint32_t NypsumWord(uintptr_t val) {\n  val = (val & kMask3333) + ((val >> 2) & kMask3333);\n  return (((val + (val >> 4)) & kMask0F0F) * kMask0101) >> (kBitsPerWord - 8);\n}\n#endif\n\n// the simple version, good enough for all non-time-critical stuff\n// (without SSE4.2, PopcountWords() tends to be >3x as fast on arrays.  with\n// SSE4.2 but no AVX2, there's no noticeable difference.  with AVX2,\n// PopcountWords() gains another factor of 1.5-2x.)\n#ifdef USE_SSE42\nHEADER_CINLINE uint32_t PopcountWord(uintptr_t val) {\n  return __builtin_popcountll(val);\n}\n#else\nHEADER_CINLINE2 uint32_t PopcountWord(uintptr_t val) {\n  // Sadly, this was still faster than the LLVM implementation of the intrinsic\n  // as of 2016.\n  return NypsumWord(val - ((val >> 1) & kMask5555));\n}\n#endif\n\n#ifdef USE_SSE42\nHEADER_INLINE uint32_t Popcount2Words(uintptr_t val0, uintptr_t val1) {\n  return __builtin_popcountll(val0) + __builtin_popcountll(val1);\n}\n#else\nHEADER_INLINE uint32_t Popcount2Words(uintptr_t val0, uintptr_t val1) {\n  val0 -= (val0 >> 1) & kMask5555;\n  val1 -= (val1 >> 1) & kMask5555;\n  const uintptr_t four_bit = (val0 & kMask3333) + ((val0 >> 2) & kMask3333) + (val1 & kMask3333) + ((val1 >> 2) & kMask3333);\n  // up to 16 values in 0..12; sum fits in 8 bits\n  return (((four_bit & kMask0F0F) + ((four_bit >> 4) & kMask0F0F)) * kMask0101) >> (kBitsPerWord - 8);\n}\n#endif\n\n#ifndef __LP64__\nHEADER_INLINE uint32_t Popcount4Words(uintptr_t val0, uintptr_t val1, uintptr_t val2, uintptr_t val3) {\n  val0 -= (val0 >> 1) & kMask5555;\n  val1 -= (val1 >> 1) & kMask5555;\n  val2 -= (val2 >> 1) & kMask5555;\n  val3 -= (val3 >> 1) & kMask5555;\n  const uintptr_t four_bit_0 = (val0 & kMask3333) + ((val0 >> 2) & kMask3333) + (val1 & kMask3333) + ((val1 >> 2) & kMask3333);\n  const uintptr_t four_bit_1 = (val2 & kMask3333) + ((val2 >> 2) & kMask3333) + (val3 & kMask3333) + ((val3 >> 2) & kMask3333);\n  return (((four_bit_0 & kMask0F0F) + ((four_bit_0 >> 4) & kMask0F0F) + (four_bit_1 & kMask0F0F) + ((four_bit_1 >> 4) & kMask0F0F)) * kMask0101) >> (kBitsPerWord - 8);\n}\n#endif\n\n#ifdef USE_SSE42\nHEADER_INLINE uint32_t PopcountHW(uint32_t val) {\n  return __builtin_popcount(val);\n}\n\nHEADER_INLINE uint32_t PopcountByte(uint32_t val) {\n  return __builtin_popcount(val);\n}\n#else\n#  ifdef __LP64__\nHEADER_INLINE uint32_t PopcountHW(uint32_t val) {\n  val = val - ((val >> 1) & 0x55555555);\n  val = (val & 0x33333333) + ((val >> 2) & 0x33333333);\n  val = (val + (val >> 4)) & 0x0f0f0f0f;\n  return (val * 0x1010101) >> 24;\n}\n#  else\nHEADER_INLINE uint32_t PopcountHW(uint32_t val) {\n  val = val - ((val >> 1) & 0x5555);\n  val = (val & 0x3333) + ((val >> 2) & 0x3333);\n  val = (val + (val >> 4)) & 0x0f0f;\n  return (val + (val >> 8)) & 0xff;\n}\n#  endif\n\nHEADER_INLINE uint32_t PopcountByte(uint32_t val) {\n  val = val - ((val >> 1) & 0x55);\n  val = (val & 0x33) + ((val >> 2) & 0x33);\n  return (val + (val >> 4)) & 0xf;\n}\n#endif\n\n#ifdef USE_SSE2\n#  ifdef USE_SSE42\nHEADER_INLINE uint32_t PopcountVec8thUint(uint32_t val) {\n  return __builtin_popcount(val);\n}\n#  else\nHEADER_INLINE uint32_t PopcountVec8thUint(uint32_t val) {\n  // May as well exploit the fact that only the low 16 bits may be set.\n  val = val - ((val >> 1) & 0x5555);\n  val = (val & 0x3333) + ((val >> 2) & 0x3333);\n  val = (val + (val >> 4)) & 0x0f0f;\n  return (val + (val >> 8)) & 0xff;\n}\n#  endif\n#endif\n\n// Downcasts don't risk alignment issues.\nHEADER_INLINE unsigned char* DowncastToUc(void* pp) {\n  return S_CAST(unsigned char*, pp);\n}\n\nHEADER_INLINE char* DowncastToC(void* pp) {\n  return S_CAST(char*, pp);\n}\n\nHEADER_INLINE const unsigned char* DowncastKToUc(const void* pp) {\n  return S_CAST(const unsigned char*, pp);\n}\n\nHEADER_INLINE const char* DowncastKToC(const void* pp) {\n  return S_CAST(const char*, pp);\n}\n\nHEADER_INLINE uintptr_t* DowncastVecWToW(VecW* pp) {\n  return R_CAST(uintptr_t*, pp);\n}\n\nHEADER_INLINE uint32_t* DowncastVecWToU32(VecW* pp) {\n  return R_CAST(uint32_t*, pp);\n}\n\nHEADER_INLINE Halfword* DowncastWToHW(uintptr_t* pp) {\n  return R_CAST(Halfword*, pp);\n}\n\nHEADER_INLINE uint32_t* DowncastWToU32(uintptr_t* pp) {\n  return R_CAST(uint32_t*, pp);\n}\n\nHEADER_INLINE uint16_t* DowncastWToU16(uintptr_t* pp) {\n  return R_CAST(uint16_t*, pp);\n}\n\nHEADER_INLINE int16_t* DowncastWToI16(uintptr_t* pp) {\n  return R_CAST(int16_t*, pp);\n}\n\n#ifdef USE_SSE2\nHEADER_INLINE Vec8thUint* DowncastWToV8(uintptr_t* pp) {\n  return R_CAST(Vec8thUint*, pp);\n}\n#endif\n\nHEADER_INLINE uint16_t* DowncastU32ToU16(uint32_t* pp) {\n  return R_CAST(uint16_t*, pp);\n}\n\nHEADER_INLINE const uintptr_t* DowncastKVecWToW(const VecW* pp) {\n  return R_CAST(const uintptr_t*, pp);\n}\n\nHEADER_INLINE const uint16_t* DowncastKVecWToU16(const VecW* pp) {\n  return R_CAST(const uint16_t*, pp);\n}\n\nHEADER_INLINE const Halfword* DowncastKWToHW(const uintptr_t* pp) {\n  return R_CAST(const Halfword*, pp);\n}\n\nHEADER_INLINE const uint32_t* DowncastKWToU32(const uintptr_t* pp) {\n  return R_CAST(const uint32_t*, pp);\n}\n\nHEADER_INLINE const uint16_t* DowncastKWToU16(const uintptr_t* pp) {\n  return R_CAST(const uint16_t*, pp);\n}\n\nHEADER_INLINE const uint16_t* DowncastKU64ToU16(const uint64_t* pp) {\n  return R_CAST(const uint16_t*, pp);\n}\n\n\nHEADER_INLINE uint32_t IsVecAligned(const void* ptr) {\n  return !(R_CAST(uintptr_t, ptr) % kBytesPerVec);\n}\n\n#ifdef USE_SSE2\nHEADER_INLINE void AlignWToVec(uintptr_t** pp) {\n  const uintptr_t addr = R_CAST(uintptr_t, *pp);\n  *pp = R_CAST(uintptr_t*, RoundUpPow2(addr, kBytesPerVec));\n}\n#else\nHEADER_INLINE void AlignWToVec(__maybe_unused uintptr_t** pp) {\n}\n#endif\n\nHEADER_INLINE void AlignKUcToVec(const unsigned char** pp) {\n  const uintptr_t addr = R_CAST(uintptr_t, *pp);\n  *pp = R_CAST(const unsigned char*, RoundUpPow2(addr, kBytesPerVec));\n}\n\n/*\nHEADER_INLINE uint32_t AlignToVecW(void* prestart, VecW** result_ptr) {\n  unsigned char* prestart_uc = S_CAST(unsigned char*, prestart);\n  const uint32_t lead_byte_ct = (-R_CAST(uintptr_t, prestart_uc)) % kBytesPerVec;\n  *result_ptr = R_CAST(VecW*, &(prestart_uc[lead_byte_ct]));\n  return lead_byte_ct;\n}\n*/\n\nHEADER_INLINE uint32_t AlignKToAW(const void* prestart, const uintptr_t** result_ptr) {\n  const unsigned char* prestart_uc = S_CAST(const unsigned char*, prestart);\n  const uint32_t lead_byte_ct = (-R_CAST(uintptr_t, prestart_uc)) % kBytesPerVec;\n  *result_ptr = R_CAST(const uintptr_t*, &(prestart_uc[lead_byte_ct]));\n  return lead_byte_ct;\n}\n\nHEADER_INLINE uint32_t AlignKToW(const void* prestart, const uintptr_t** result_ptr) {\n  const unsigned char* prestart_uc = S_CAST(const unsigned char*, prestart);\n  const uint32_t lead_byte_ct = (-R_CAST(uintptr_t, prestart_uc)) % kBytesPerWord;\n  *result_ptr = R_CAST(const uintptr_t*, &(prestart_uc[lead_byte_ct]));\n  return lead_byte_ct;\n}\n\n\n// Turns out memcpy(&cur_word, bytearr, ct) can't be trusted to be fast when ct\n// isn't known at compile time.\n//\n// ct must be less than sizeof(intptr_t).  ct == 0 handled correctly, albeit\n// inefficiently.\n#ifndef NO_UNALIGNED\nHEADER_INLINE uintptr_t ProperSubwordLoad(const void* bytearr, uint32_t ct) {\n  const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n#  ifdef __LP64__\n  if (ct >= 4) {\n    const uint32_t remainder = ct - 4;\n    bytearr_uc = &(bytearr_uc[remainder]);\n    uintptr_t cur_word = *R_CAST(const uint32_t*, bytearr_uc);\n    if (remainder) {\n      cur_word <<= remainder * CHAR_BIT;\n      cur_word |= *S_CAST(const uint32_t*, bytearr);\n    }\n    return cur_word;\n  }\n#  endif\n  if (ct >= 2) {\n    const uint32_t remainder = ct & 1;\n    uintptr_t cur_word = *R_CAST(const uint16_t*, &(bytearr_uc[remainder]));\n    if (remainder) {\n      cur_word <<= 8;\n      cur_word |= bytearr_uc[0];\n    }\n    return cur_word;\n  }\n  return ct? bytearr_uc[0] : 0;\n}\n\nHEADER_INLINE uintptr_t SubwordLoad(const void* bytearr, uint32_t ct) {\n  if (ct == S_CAST(uint32_t, kBytesPerWord)) {\n    return *S_CAST(const uintptr_t*, bytearr);\n  }\n  return ProperSubwordLoad(bytearr, ct);\n}\n\n// ct must be in 1..4.\nHEADER_INLINE uint32_t SubU32Load(const void* bytearr, uint32_t ct) {\n  if (ct & 1) {\n    const unsigned char* bytearr_iter = S_CAST(const unsigned char*, bytearr);\n    uint32_t cur_uint = *bytearr_iter;\n    if (ct == 3) {\n      ++bytearr_iter;\n      cur_uint |= S_CAST(uint32_t, *R_CAST(const uint16_t*, bytearr_iter)) << 8;\n    }\n    return cur_uint;\n  }\n  if (ct == 2) {\n    return *S_CAST(const uint16_t*, bytearr);\n  }\n  return *S_CAST(const uint32_t*, bytearr);\n}\n\n// ct must be 1 or 2.\nHEADER_INLINE uint16_t SubU16Load(const void* bytearr, uint32_t ct) {\n  if (ct == 1) {\n    const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n    return bytearr_uc[0];\n  }\n  return *S_CAST(const uint16_t*, bytearr);\n}\n\n// ct must be in 1..sizeof(Halfword).\n#ifdef __LP64__\nHEADER_INLINE Halfword SubHWLoad(const void* bytearr, uint32_t ct) {\n  return SubU32Load(bytearr, ct);\n}\n#else\nHEADER_INLINE Halfword SubHWLoad(const void* bytearr, uint32_t ct) {\n  return SubU16Load(bytearr, ct);\n}\n#endif\n\n// tried making this non-inline, loop took more than 50% longer\nHEADER_INLINE void ProperSubwordStore(uintptr_t cur_word, uint32_t byte_ct, void* target) {\n  unsigned char* target_iter = S_CAST(unsigned char*, target);\n#  ifdef __LP64__\n  if (byte_ct >= 4) {\n    *R_CAST(uint32_t*, target_iter) = cur_word;\n    if (byte_ct == 4) {\n      return;\n    }\n    const uint32_t remainder = byte_ct - 4;\n    target_iter = &(target_iter[remainder]);\n    cur_word >>= remainder * CHAR_BIT;\n    *R_CAST(uint32_t*, target_iter) = cur_word;\n    return;\n  }\n#  endif\n  if (byte_ct & 1) {\n    *target_iter++ = cur_word;\n    cur_word >>= 8;\n  }\n  if (byte_ct & 2) {\n    *R_CAST(uint16_t*, target_iter) = cur_word;\n  }\n}\n\nHEADER_INLINE void SubwordStore(uintptr_t cur_word, uint32_t byte_ct, void* target) {\n  if (byte_ct == kBytesPerWord) {\n    *S_CAST(uintptr_t*, target) = cur_word;\n    return;\n  }\n  ProperSubwordStore(cur_word, byte_ct, target);\n}\n\n// byte_ct must be in 1..4.\nHEADER_INLINE void SubU32Store(uint32_t cur_uint, uint32_t byte_ct, void* target) {\n  if (byte_ct & 1) {\n    unsigned char* target_iter = S_CAST(unsigned char*, target);\n    *target_iter = cur_uint;\n    if (byte_ct == 3) {\n      ++target_iter;\n      *R_CAST(uint16_t*, target_iter) = cur_uint >> 8;\n    }\n    return;\n  }\n  if (byte_ct == 2) {\n    *S_CAST(uint16_t*, target) = cur_uint;\n    return;\n  }\n  *S_CAST(uint32_t*, target) = cur_uint;\n  return;\n}\n#else  // NO_UNALIGNED\nHEADER_INLINE uintptr_t ProperSubwordLoad(const void* bytearr, uint32_t ct) {\n  uintptr_t cur_word = 0;\n  memcpy(&cur_word, bytearr, ct);\n  return cur_word;\n}\n\nHEADER_INLINE uintptr_t SubwordLoad(const void* bytearr, uint32_t ct) {\n  uintptr_t cur_word = 0;\n  memcpy(&cur_word, bytearr, ct);\n  return cur_word;\n}\n\nHEADER_INLINE uint32_t SubU32Load(const void* bytearr, uint32_t ct) {\n  uint32_t cur_uint = 0;\n  memcpy(&cur_uint, bytearr, ct);\n  return cur_uint;\n}\n\nHEADER_INLINE Halfword SubHWLoad(const void* bytearr, uint32_t ct) {\n  Halfword cur_uint = 0;\n  memcpy(&cur_uint, bytearr, ct);\n  return cur_uint;\n}\n\nHEADER_INLINE void ProperSubwordStore(uintptr_t cur_word, uint32_t byte_ct, void* target) {\n  memcpy(target, &cur_word, byte_ct);\n}\n\nHEADER_INLINE void SubwordStore(uintptr_t cur_word, uint32_t byte_ct, void* target) {\n  memcpy(target, &cur_word, byte_ct);\n}\n\nHEADER_INLINE void SubU32Store(uint32_t cur_uint, uint32_t byte_ct, void* target) {\n  memcpy(target, &cur_uint, byte_ct);\n}\n#endif // NO_UNALIGNED\n\nHEADER_INLINE uint64_t SubU64Load(const void* bytearr, uint32_t ct) {\n#ifdef __LP64__\n  return SubwordLoad(bytearr, ct);\n#else\n  uint64_t cur_u64 = 0;\n  memcpy(&cur_u64, bytearr, ct);\n  return cur_u64;\n#endif\n}\n\nHEADER_INLINE void ProperSubwordStoreMov(uintptr_t cur_word, uint32_t byte_ct, unsigned char** targetp) {\n  ProperSubwordStore(cur_word, byte_ct, *targetp);\n  *targetp += byte_ct;\n}\n\nHEADER_INLINE void SubwordStoreMov(uintptr_t cur_word, uint32_t byte_ct, unsigned char** targetp) {\n  SubwordStore(cur_word, byte_ct, *targetp);\n  *targetp += byte_ct;\n}\n\nHEADER_INLINE void SubU32StoreMov(uint32_t cur_uint, uint32_t byte_ct, unsigned char** targetp) {\n  SubU32Store(cur_uint, byte_ct, *targetp);\n  *targetp += byte_ct;\n}\n\n#ifndef NO_UNALIGNED\n#  ifdef __LP64__\nHEADER_INLINE void SubU64StoreMov(uint64_t cur_u64, uint32_t byte_ct, unsigned char** targetp) {\n  return SubwordStoreMov(cur_u64, byte_ct, targetp);\n}\n#  else\nHEADER_INLINE void SubU64StoreMov(uint64_t cur_u64, uint32_t byte_ct, unsigned char** targetp) {\n  if (byte_ct > 4) {\n    *R_CAST(uint32_t*, *targetp) = cur_u64;\n    *targetp += 4;\n    byte_ct -= 4;\n    cur_u64 >>= 32;\n  }\n  return SubU32StoreMov(cur_u64, byte_ct, targetp);\n}\n#  endif\n#else\nHEADER_INLINE void SubU64StoreMov(uint64_t cur_u64, uint32_t byte_ct, unsigned char** targetp) {\n  memcpy(*targetp, &cur_u64, byte_ct);\n  *targetp += byte_ct;\n}\n#endif\n\n\nHEADER_INLINE BoolErr vecaligned_malloc(uintptr_t size, void* aligned_pp) {\n#ifdef USE_AVX2\n  return aligned_malloc(size, kBytesPerVec, aligned_pp);\n#else\n#  if defined(__APPLE__) || !defined(__LP64__)\n  const BoolErr ret_boolerr = pgl_malloc(size, aligned_pp);\n  assert(IsVecAligned(*S_CAST(uintptr_t**, aligned_pp)));\n  return ret_boolerr;\n#  else\n  return aligned_malloc(size, kBytesPerVec, aligned_pp);\n#  endif\n#endif\n}\n\nHEADER_INLINE BoolErr cachealigned_malloc(uintptr_t size, void* aligned_pp) {\n  return aligned_malloc(size, kCacheline, aligned_pp);\n}\n\nHEADER_INLINE void aligned_free(void* aligned_ptr) {\n  free(R_CAST(void*, S_CAST(uintptr_t*, aligned_ptr)[-1]));\n}\n\nHEADER_INLINE void aligned_free_cond(void* aligned_ptr) {\n  if (aligned_ptr) {\n    free(R_CAST(void*, S_CAST(uintptr_t*, aligned_ptr)[-1]));\n  }\n}\n\n// C spec is slightly broken here\nHEADER_INLINE void free_const(const void* memptr) {\n  free(K_CAST(void*, memptr));\n}\n\nHEADER_INLINE void free_cond(const void* memptr) {\n  if (memptr) {\n    free_const(memptr);\n  }\n}\n\n#ifdef USE_AVX2\nHEADER_INLINE void vecaligned_free(void* aligned_ptr) {\n  aligned_free(aligned_ptr);\n}\n\nHEADER_INLINE void vecaligned_free_cond(void* aligned_ptr) {\n  aligned_free_cond(aligned_ptr);\n}\n#else\n#  if defined(__APPLE__) || !defined(__LP64__)\nHEADER_INLINE void vecaligned_free(void* aligned_ptr) {\n  free(aligned_ptr);\n}\n\nHEADER_INLINE void vecaligned_free_cond(void* aligned_ptr) {\n  free_cond(aligned_ptr);\n}\n#  else\nHEADER_INLINE void vecaligned_free(void* aligned_ptr) {\n  aligned_free(aligned_ptr);\n}\n\nHEADER_INLINE void vecaligned_free_cond(void* aligned_ptr) {\n  aligned_free_cond(aligned_ptr);\n}\n#  endif\n#endif\n\n\n#if defined(USE_SSE2) && !defined(NO_UNALIGNED)\nint32_t memequal(const void* m1, const void* m2, uintptr_t byte_ct);\n\n// This is also better than the June 2018 OS X/LLVM stock implementation,\n// especially for small values of ct.\n// (gcc 7.1 and clang 6.0.0 should have better stock implementations;\n// re-benchmark this once Linux build machine is upgraded to Ubuntu 18.04.)\nint32_t Memcmp(const void* m1, const void* m2, uintptr_t ct);\n#else\nHEADER_INLINE int32_t memequal(const void* m1, const void* m2, uintptr_t byte_ct) {\n  return !memcmp(m1, m2, byte_ct);\n}\n\nHEADER_INLINE int32_t Memcmp(const void* m1, const void* m2, uintptr_t ct) {\n  return memcmp(m1, m2, ct);\n}\n#endif\n\n\nHEADER_INLINE char* memcpya(void* __restrict target, const void* __restrict source, uintptr_t ct) {\n  memcpy(target, source, ct);\n  return &(S_CAST(char*, target)[ct]);\n}\n\nHEADER_INLINE unsigned char* memcpyua(void* __restrict target, const void* __restrict source, uintptr_t ct) {\n  memcpy(target, source, ct);\n  return &(S_CAST(unsigned char*, target)[ct]);\n}\n\n// Tried beating memcpy for usually-small strings not known to have length <=\n// 8, gave up.\n\n#if defined(USE_SSE2) && defined(__cplusplus) && !defined(NO_UNALIGNED)\n// See https://stackoverflow.com/questions/9510514/integer-range-based-template-specialisation .\n\ntemplate <bool> struct TRange;\n\n// This makes MemequalKImpl<byte_ct> expand to\n// MemequalKImpl<byte_ct, TRange<true> >.\n// If a later single-parameter template defines the same thing, that takes\n// precedence.\ntemplate <uint32_t N, typename = TRange<true> > struct MemequalKImpl {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    return memequal(m1, m2, N);\n  }\n};\n\ntemplate <> struct MemequalKImpl<1> {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n    const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n    return (m1_uc[0] == m2_uc[0]);\n  }\n};\n\ntemplate <> struct MemequalKImpl<2> {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    return ((*R_CAST(const uint16_t*, m1)) == (*R_CAST(const uint16_t*, m2)));\n  }\n};\n\ntemplate <> struct MemequalKImpl<3> {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n    const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n    return\n      ((*R_CAST(const uint16_t*, m1)) == (*R_CAST(const uint16_t*, m2))) &&\n      (m1_uc[2] == m2_uc[2]);\n  }\n};\n\ntemplate <> struct MemequalKImpl<4> {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    return ((*R_CAST(const uint32_t*, m1)) == (*R_CAST(const uint32_t*, m2)));\n  }\n};\n\ntemplate <uint32_t N> struct MemequalKImpl<N, TRange<(5 <= N) && (N <= 7)> > {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n    const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n    return\n      ((*R_CAST(const uint32_t*, m1)) == (*R_CAST(const uint32_t*, m2))) &&\n      ((*R_CAST(const uint32_t*, &(m1_uc[N - 4]))) == (*R_CAST(const uint32_t*, &(m2_uc[N - 4]))));\n  }\n};\n\ntemplate <> struct MemequalKImpl<8> {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    return ((*R_CAST(const uint64_t*, m1)) == (*R_CAST(const uint64_t*, m2)));\n  }\n};\n\ntemplate <uint32_t N> struct MemequalKImpl<N, TRange<(9 <= N) && (N <= 15)> > {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n    const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n    return\n      ((*R_CAST(const uint64_t*, m1)) == (*R_CAST(const uint64_t*, m2))) &&\n      ((*R_CAST(const uint64_t*, &(m1_uc[N - 8]))) == (*R_CAST(const uint64_t*, &(m2_uc[N - 8]))));\n  }\n};\n\ntemplate <> struct MemequalKImpl<16> {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    const __m128i v1 = _mm_loadu_si128(S_CAST(const __m128i*, m1));\n    const __m128i v2 = _mm_loadu_si128(S_CAST(const __m128i*, m2));\n    return (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 65535);\n  }\n};\n\ntemplate <uint32_t N> struct MemequalKImpl<N, TRange<(17 <= N) && (N <= 24)> > {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n    const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n    const __m128i v1 = _mm_loadu_si128(S_CAST(const __m128i*, m1));\n    const __m128i v2 = _mm_loadu_si128(S_CAST(const __m128i*, m2));\n    return\n      (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 65535) &&\n      ((*R_CAST(const uint64_t*, &(m1_uc[N - 8]))) == (*R_CAST(const uint64_t*, &(m2_uc[N - 8]))));\n  }\n};\n\ntemplate <uint32_t N> struct MemequalKImpl<N, TRange<(25 <= N) && (N <= 31)> > {\n  static int32_t MemequalK(const void* m1, const void* m2) {\n    __m128i v1 = _mm_loadu_si128(S_CAST(const __m128i*, m1));\n    __m128i v2 = _mm_loadu_si128(S_CAST(const __m128i*, m2));\n    if (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) != 65535) {\n      return 0;\n    }\n    const unsigned char* m1_uc = S_CAST(const unsigned char*, m1);\n    const unsigned char* m2_uc = S_CAST(const unsigned char*, m2);\n    v1 = _mm_loadu_si128(R_CAST(const __m128i*, &(m1_uc[N - 16])));\n    v2 = _mm_loadu_si128(R_CAST(const __m128i*, &(m2_uc[N - 16])));\n    return (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 65535);\n  }\n};\n\n#  define memequal_k(m1, m2, byte_ct) plink2::MemequalKImpl<byte_ct>::MemequalK(m1, m2)\n\ntemplate <uint32_t N, typename = TRange<true> > struct MemcpyKImpl {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    memcpy(dst, src, N);\n  }\n};\n\n// Patch a bunch of cases where some commonly-used gcc and clang versions\n// generate suboptimal code.  (Since this code is shamelessly x86-specific, we\n// don't worry about the formal undefinedness of unaligned pointer dereferences\n// here.)\n// (todo: check if/when this has been fixed, and remove this bloat once all\n// production build machines have sufficiently new compilers.)\ntemplate <> struct MemcpyKImpl<2> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    *S_CAST(uint16_t*, dst) = *S_CAST(const uint16_t*, src);\n  }\n};\n\ntemplate <> struct MemcpyKImpl<3> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    *S_CAST(uint16_t*, dst) = *S_CAST(const uint16_t*, src);\n    dst_uc[2] = src_uc[2];\n  }\n};\n\ntemplate <> struct MemcpyKImpl<5> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    *S_CAST(uint32_t*, dst) = *S_CAST(const uint32_t*, src);\n    dst_uc[4] = src_uc[4];\n  }\n};\n\ntemplate <> struct MemcpyKImpl<6> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    uint16_t* dst_u16 = S_CAST(uint16_t*, dst);\n    const uint16_t* src_u16 = S_CAST(const uint16_t*, src);\n    *S_CAST(uint32_t*, dst) = *S_CAST(const uint32_t*, src);\n    dst_u16[2] = src_u16[2];\n  }\n};\n\ntemplate <> struct MemcpyKImpl<7> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    *S_CAST(uint32_t*, dst) = *S_CAST(const uint32_t*, src);\n    *R_CAST(uint32_t*, &(dst_uc[3])) = *R_CAST(const uint32_t*, &(src_uc[3]));\n  }\n};\n\ntemplate <> struct MemcpyKImpl<9> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    *S_CAST(uint64_t*, dst) = *S_CAST(const uint64_t*, src);\n    dst_uc[8] = src_uc[8];\n  }\n};\n\ntemplate <> struct MemcpyKImpl<10> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    uint16_t* dst_u16 = S_CAST(uint16_t*, dst);\n    const uint16_t* src_u16 = S_CAST(const uint16_t*, src);\n    *S_CAST(uint64_t*, dst) = *S_CAST(const uint64_t*, src);\n    dst_u16[4] = src_u16[4];\n  }\n};\n\ntemplate <uint32_t N> struct MemcpyKImpl<N, TRange<(11 <= N) && (N <= 12)> > {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    *S_CAST(uint64_t*, dst) = *S_CAST(const uint64_t*, src);\n    *R_CAST(uint32_t*, &(dst_uc[N - 4])) = *R_CAST(const uint32_t*, &(src_uc[N - 4]));\n  }\n};\n\ntemplate <uint32_t N> struct MemcpyKImpl<N, TRange<(13 <= N) && (N <= 15)> > {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    *S_CAST(uint64_t*, dst) = *S_CAST(const uint64_t*, src);\n    *R_CAST(uint64_t*, &(dst_uc[N - 8])) = *R_CAST(const uint64_t*, &(src_uc[N - 8]));\n  }\n};\n\ntemplate <> struct MemcpyKImpl<17> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    const __m128i vv = _mm_loadu_si128(S_CAST(const __m128i*, src));\n    _mm_storeu_si128(S_CAST(__m128i*, dst), vv);\n    dst_uc[16] = src_uc[16];\n  }\n};\n\ntemplate <> struct MemcpyKImpl<18> {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    uint16_t* dst_u16 = S_CAST(uint16_t*, dst);\n    const uint16_t* src_u16 = S_CAST(const uint16_t*, src);\n    const __m128i vv = _mm_loadu_si128(S_CAST(const __m128i*, src));\n    _mm_storeu_si128(S_CAST(__m128i*, dst), vv);\n    dst_u16[8] = src_u16[8];\n  }\n};\n\ntemplate <uint32_t N> struct MemcpyKImpl<N, TRange<(19 <= N) && (N <= 20)> > {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    const __m128i vv = _mm_loadu_si128(S_CAST(const __m128i*, src));\n    _mm_storeu_si128(S_CAST(__m128i*, dst), vv);\n    *R_CAST(uint32_t*, &(dst_uc[N - 4])) = *R_CAST(const uint32_t*, &(src_uc[N - 4]));\n  }\n};\n\ntemplate <uint32_t N> struct MemcpyKImpl<N, TRange<(21 <= N) && (N <= 24)> > {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    const __m128i vv = _mm_loadu_si128(S_CAST(const __m128i*, src));\n    _mm_storeu_si128(S_CAST(__m128i*, dst), vv);\n    *R_CAST(uint64_t*, &(dst_uc[N - 8])) = *R_CAST(const uint64_t*, &(src_uc[N - 8]));\n  }\n};\n\ntemplate <uint32_t N> struct MemcpyKImpl<N, TRange<(25 <= N) && (N <= 31)> > {\n  static void MemcpyK(void* __restrict dst, const void* __restrict src) {\n    unsigned char* dst_uc = S_CAST(unsigned char*, dst);\n    const unsigned char* src_uc = S_CAST(const unsigned char*, src);\n    const __m128i v1 = _mm_loadu_si128(S_CAST(const __m128i*, src));\n    const __m128i v2 = _mm_loadu_si128(R_CAST(const __m128i*, &(src_uc[N - 16])));\n    _mm_storeu_si128(S_CAST(__m128i*, dst), v1);\n    _mm_storeu_si128(R_CAST(__m128i*, &(dst_uc[N - 16])), v2);\n  }\n};\n\n// Note that there's no difference between memcpy() and memcpy_k() for common\n// 'well-behaved' sizes like 1, 4, 8, and 16.  It's the funny numbers in\n// between, which often arise with constant strings, which this template is\n// targeting.\n#  define memcpy_k(dst, src, ct) plink2::MemcpyKImpl<ct>::MemcpyK(dst, src)\n\ntemplate <uint32_t N> char* MemcpyaK(void* __restrict dst, const void* __restrict src) {\n  MemcpyKImpl<N>::MemcpyK(dst, src);\n  char* dst_c = S_CAST(char*, dst);\n  return &(dst_c[N]);\n}\n\n#  define memcpya_k(dst, src, ct) plink2::MemcpyaK<ct>(dst, src)\n#  define memcpyua_k(dst, src, ct) CToUc(plink2::MemcpyaK<ct>(dst, src))\n\ntemplate <uint32_t N> struct MemcpyoKImpl {\n  static void MemcpyoK(void* __restrict dst, const void* __restrict src) {\n    MemcpyKImpl<N>::MemcpyK(dst, src);\n  }\n};\n\ntemplate <> struct MemcpyoKImpl<3> {\n  static void MemcpyoK(void* __restrict dst, const void* __restrict src) {\n    *S_CAST(uint32_t*, dst) = *S_CAST(const uint32_t*, src);\n  }\n};\n\ntemplate <> struct MemcpyoKImpl<7> {\n  static void MemcpyoK(void* __restrict dst, const void* __restrict src) {\n    *S_CAST(uint64_t*, dst) = *S_CAST(const uint64_t*, src);\n  }\n};\n\ntemplate <> struct MemcpyoKImpl<15> {\n  static void MemcpyoK(void* __restrict dst, const void* __restrict src) {\n    const __m128i vv = _mm_loadu_si128(S_CAST(const __m128i*, src));\n    _mm_storeu_si128(S_CAST(__m128i*, dst), vv);\n  }\n};\n\n// interestingly, __m256i copy does not seem to be better in 31 byte case\n\n#  define memcpyo_k(dst, src, ct) plink2::MemcpyoKImpl<ct>::MemcpyoK(dst, src)\n\ntemplate <uint32_t N> char* MemcpyaoK(void* __restrict dst, const void* __restrict src) {\n  MemcpyoKImpl<N>::MemcpyoK(dst, src);\n  char* dst_c = S_CAST(char*, dst);\n  return &(dst_c[N]);\n}\n\n#  define memcpyao_k(dst, src, ct) plink2::MemcpyaoK<ct>(dst, src)\n#  define memcpyuao_k(dst, src, ct) DowncastToUc(plink2::MemcpyaoK<ct>(dst, src))\n\n#  else  // !(defined(__LP64__) && defined(__cplusplus) && !defined(NO_UNALIGNED))\n\nHEADER_INLINE int32_t memequal_k(const void* m1, const void* m2, uintptr_t ct) {\n  return !memcmp(m1, m2, ct);\n}\n\nHEADER_INLINE void memcpy_k(void* __restrict dst, const void* __restrict src, uintptr_t ct) {\n  memcpy(dst, src, ct);\n}\n\nHEADER_INLINE char* memcpya_k(void* __restrict dst, const void* __restrict src, uintptr_t ct) {\n  return memcpya(dst, src, ct);\n}\n\nHEADER_INLINE unsigned char* memcpyua_k(void* __restrict dst, const void* __restrict src, uintptr_t ct) {\n  return memcpyua(dst, src, ct);\n}\n\nHEADER_INLINE void memcpyo_k(void* __restrict dst, const void* __restrict src, uintptr_t ct) {\n  memcpy(dst, src, ct);\n}\n\nHEADER_INLINE char* memcpyao_k(void* __restrict dst, const void* __restrict src, uintptr_t ct) {\n  return memcpya(dst, src, ct);\n}\n\nHEADER_INLINE unsigned char* memcpyuao_k(void* __restrict dst, const void* __restrict src, uintptr_t ct) {\n  return memcpyua(dst, src, ct);\n}\n\n#endif\n\nHEADER_INLINE char* strcpya(char* __restrict dst, const void* __restrict src) {\n  const uintptr_t slen = strlen(S_CAST(const char*, src));\n  return memcpya(dst, src, slen);\n}\n\n#if defined(__LP64__) && (__cplusplus >= 201103L)\nconstexpr uint32_t CompileTimeSlen(const char* k_str) {\n  return k_str[0]? (1 + CompileTimeSlen(&(k_str[1]))) : 0;\n}\n#endif\n\n#if defined(USE_SSE2) && (__cplusplus >= 201103L) && !defined(NO_UNALIGNED)\n\n#  define strcpy_k(dst, src) plink2::MemcpyKImpl<plink2::CompileTimeSlen(src) + 1>::MemcpyK(dst, src);\n\n#  define strcpya_k(dst, src) plink2::MemcpyaoK<plink2::CompileTimeSlen(src)>(dst, src);\n\n#else\n\nHEADER_INLINE void strcpy_k(char* __restrict dst, const void* __restrict src) {\n  strcpy(dst, S_CAST(const char*, src));\n}\n\nHEADER_INLINE char* strcpya_k(char* __restrict dst, const void* __restrict src) {\n  return strcpya(dst, src);\n}\n\n#endif\n\n// A few more string-rendering functions that would normally live in\n// plink2_string, to work around PRI{d,u}PTR and PRI{d,u}64 warning on CRAN\n// Windows builds.\n\nextern const uint16_t kDigitPair[];\n\nchar* u32toa(uint32_t uii, char* start);\n\nHEADER_INLINE char* uitoa_z4(uint32_t uii, char* start) {\n  uint32_t quotient = uii / 100;\n  assert(quotient < 100);\n  uii -= 100 * quotient;\n  start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n  return memcpya_k(start, &(kDigitPair[uii]), 2);\n}\n\nHEADER_INLINE char* u32toa_z6(uint32_t uii, char* start) {\n  uint32_t quotient = uii / 10000;\n  start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n  return uitoa_z4(uii - 10000 * quotient, start);\n}\n\nHEADER_INLINE char* uitoa_z8(uint32_t uii, char* start) {\n  uint32_t quotient = uii / 1000000;\n  start = memcpya_k(start, &(kDigitPair[quotient]), 2);\n  return u32toa_z6(uii - 1000000 * quotient, start);\n}\n\nchar* i64toa(int64_t llii, char* start);\n\n#ifdef __LP64__\n// really just for printing line numbers\n// must be less than 2^63\nHEADER_INLINE char* wtoa(uintptr_t ulii, char* start) {\n  return i64toa(ulii, start);\n}\n#else\nHEADER_INLINE char* wtoa(uintptr_t ulii, char* start) {\n  return u32toa(ulii, start);\n}\n#endif\n\nHEADER_INLINE void CopyFromUnalignedW(uintptr_t* dst, const unsigned char* src) {\n  memcpy_k(dst, src, kBytesPerWord);\n}\n\nHEADER_INLINE void CopyFromUnalignedU32(uint32_t* dst, const unsigned char* src) {\n  memcpy_k(dst, src, sizeof(int32_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedU16(uint16_t* dst, const unsigned char* src) {\n  memcpy_k(dst, src, sizeof(int16_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedI16(int16_t* dst, const unsigned char* src) {\n  memcpy_k(dst, src, sizeof(int16_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedF(float* dst, const unsigned char* src) {\n  memcpy_k(dst, src, sizeof(float));\n}\n\n// [u]int16_t (and unsigned char) arithmetic is slower than uint32_t/uint64_t\n// arithmetic on some CPUs.\n// todo: check whether uint32_t vs. uintptr_t return value matters\nHEADER_INLINE uint32_t CopyFromUnalignedU16ZX(const unsigned char* src) {\n#ifndef NO_UNALIGNED\n  return *R_CAST(const uint16_t*, src);\n#else\n  uint16_t cur_u16;\n  memcpy_k(&cur_u16, src, sizeof(int16_t));\n  return cur_u16;\n#endif\n}\n\nHEADER_INLINE int32_t CopyFromUnalignedI16ZX(const unsigned char* src) {\n#ifndef NO_UNALIGNED\n  return *R_CAST(const int16_t*, src);\n#else\n  int16_t cur_i16;\n  memcpy_k(&cur_i16, src, sizeof(int16_t));\n  return cur_i16;\n#endif\n}\n\nHEADER_INLINE void CopyToUnalignedW(unsigned char* dst, const uintptr_t* src) {\n  memcpy_k(dst, src, kBytesPerWord);\n}\n\nHEADER_INLINE void CopyToUnalignedU64(unsigned char* dst, const uint64_t* src) {\n  memcpy_k(dst, src, sizeof(int64_t));\n}\n\nHEADER_INLINE void CopyToUnalignedU32(unsigned char* dst, const uint32_t* src) {\n  memcpy_k(dst, src, sizeof(int32_t));\n}\n\nHEADER_INLINE void CopyToUnalignedF(unsigned char* dst, const float* src) {\n  memcpy_k(dst, src, sizeof(float));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetW(uintptr_t* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * kBytesPerWord]), kBytesPerWord);\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetHW(Halfword* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(Halfword)]), sizeof(Halfword));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetQW(Quarterword* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(Quarterword)]), sizeof(Quarterword));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetU64(uint64_t* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(int64_t)]), sizeof(int64_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetU32(uint32_t* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(int32_t)]), sizeof(int32_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetI32(int32_t* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(int32_t)]), sizeof(int32_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetU16(uint16_t* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(int16_t)]), sizeof(int16_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetI16(int16_t* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(int16_t)]), sizeof(int16_t));\n}\n\nHEADER_INLINE void CopyFromUnalignedOffsetF(float* dst, const unsigned char* src, uintptr_t offset) {\n  memcpy_k(dst, &(src[offset * sizeof(float)]), sizeof(float));\n}\n\nHEADER_INLINE uint32_t CopyFromUnalignedOffsetU16ZX(const unsigned char* src, uintptr_t offset) {\n#ifndef NO_UNALIGNED\n  return R_CAST(const uint16_t*, src)[offset];\n#else\n  uint16_t cur_u16;\n  memcpy_k(&cur_u16, &(src[offset * sizeof(int16_t)]), sizeof(int16_t));\n  return cur_u16;\n#endif\n}\n\nHEADER_INLINE int32_t CopyFromUnalignedOffsetI16ZX(const unsigned char* src, uintptr_t offset) {\n#ifndef NO_UNALIGNED\n  return R_CAST(const int16_t*, src)[offset];\n#else\n  int16_t cur_i16;\n  memcpy_k(&cur_i16, &(src[offset * sizeof(int16_t)]), sizeof(int16_t));\n  return cur_i16;\n#endif\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetW(unsigned char* dst, const uintptr_t* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(intptr_t)]), src, sizeof(intptr_t));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetHW(unsigned char* dst, const Halfword* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(Halfword)]), src, sizeof(Halfword));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetU64(unsigned char* dst, const uint64_t* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(int64_t)]), src, sizeof(int64_t));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetU32(unsigned char* dst, const uint32_t* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(int32_t)]), src, sizeof(int32_t));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetU16(unsigned char* dst, const uint16_t* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(int16_t)]), src, sizeof(int16_t));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetI16(unsigned char* dst, const int16_t* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(int16_t)]), src, sizeof(int16_t));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetF(unsigned char* dst, const float* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(float)]), src, sizeof(float));\n}\n\n#ifdef USE_SSE2\nHEADER_INLINE void CopyToUnalignedOffsetV8(unsigned char* dst, const Vec8thUint* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(Vec8thUint)]), src, sizeof(Vec8thUint));\n}\n\nHEADER_INLINE void CopyToUnalignedOffsetV16(unsigned char* dst, const Vec16thUint* src, uintptr_t offset) {\n  memcpy_k(&(dst[offset * sizeof(Vec16thUint)]), src, sizeof(Vec16thUint));\n}\n#endif\n\nHEADER_INLINE void CopyFromUnalignedIncrW(uintptr_t* dst, const unsigned char** srcp) {\n  memcpy_k(dst, *srcp, kBytesPerWord);\n  *srcp += kBytesPerWord;\n}\n\nHEADER_INLINE void CopyFromUnalignedIncrU32(uint32_t* dst, const unsigned char** srcp) {\n  memcpy_k(dst, *srcp, sizeof(int32_t));\n  *srcp += sizeof(int32_t);\n}\n\nHEADER_INLINE void CopyFromUnalignedIncrU16(uint16_t* dst, const unsigned char** srcp) {\n  memcpy_k(dst, *srcp, sizeof(int16_t));\n  *srcp += sizeof(int16_t);\n}\n\nHEADER_INLINE void CopyFromUnalignedIncrI16(int16_t* dst, const unsigned char** srcp) {\n  memcpy_k(dst, *srcp, sizeof(int16_t));\n  *srcp += sizeof(int16_t);\n}\n\nHEADER_INLINE uint32_t CopyFromUnalignedIncrU16ZX(const unsigned char** srcp) {\n#ifndef NO_UNALIGNED\n  const uint32_t result = *R_CAST(const uint16_t*, *srcp);\n#else\n  uint16_t result;\n  memcpy_k(&result, *srcp, sizeof(int16_t));\n#endif\n  *srcp += sizeof(int16_t);\n  return result;\n}\n\nHEADER_INLINE int32_t CopyFromUnalignedIncrI16ZX(const unsigned char** srcp) {\n#ifndef NO_UNALIGNED\n  const int32_t result = *R_CAST(const int16_t*, *srcp);\n#else\n  int16_t result;\n  memcpy_k(&result, *srcp, sizeof(int16_t));\n#endif\n  *srcp += sizeof(int16_t);\n  return result;\n}\n\n// no need to spell out 'CopyToUnalignedIncr' here; and we append constants\n// often enough to justify no src pointer\n\nHEADER_INLINE void AppendW(uintptr_t ulii, unsigned char** targetp) {\n  memcpy_k(*targetp, &ulii, kBytesPerWord);\n  *targetp += kBytesPerWord;\n}\n\nHEADER_INLINE void AppendU64(uint64_t ullii, unsigned char** targetp) {\n  memcpy_k(*targetp, &ullii, sizeof(int64_t));\n  *targetp += sizeof(int64_t);\n}\n\nHEADER_INLINE void AppendU32(uint32_t uii, unsigned char** targetp) {\n  memcpy_k(*targetp, &uii, sizeof(int32_t));\n  *targetp += sizeof(int32_t);\n}\n\nHEADER_INLINE void AppendU16(uint16_t usii, unsigned char** targetp) {\n  memcpy_k(*targetp, &usii, sizeof(int16_t));\n  *targetp += sizeof(int16_t);\n}\n\nHEADER_INLINE void CAppendW(uintptr_t ulii, char** targetp) {\n  memcpy_k(*targetp, &ulii, kBytesPerWord);\n  *targetp += kBytesPerWord;\n}\n\nHEADER_INLINE void CAppendU32(uint32_t uii, char** targetp) {\n  memcpy_k(*targetp, &uii, sizeof(int32_t));\n  *targetp += sizeof(int32_t);\n}\n\nHEADER_INLINE void CAppendU16(uint16_t usii, char** targetp) {\n  memcpy_k(*targetp, &usii, sizeof(int16_t));\n  *targetp += sizeof(int16_t);\n}\n\n\n// now compiling with gcc >= 4.4 (or clang equivalent) on all platforms, so\n// safe to use memset everywhere\nHEADER_INLINE void ZeroU32Arr(uintptr_t entry_ct, uint32_t* u32arr) {\n  memset(u32arr, 0, entry_ct * sizeof(int32_t));\n}\n\nHEADER_INLINE void ZeroWArr(uintptr_t entry_ct, uintptr_t* warr) {\n  memset(warr, 0, entry_ct * sizeof(intptr_t));\n}\n\nHEADER_INLINE void ZeroU64Arr(uintptr_t entry_ct, uint64_t* u64arr) {\n  memset(u64arr, 0, entry_ct * sizeof(int64_t));\n}\n\nHEADER_INLINE void ZeroPtrArr(uintptr_t entry_ct, void* pp) {\n  memset(pp, 0, entry_ct * sizeof(intptr_t));\n}\n\nHEADER_INLINE void ZeroHwArr(uintptr_t entry_ct, Halfword* hwarr) {\n  memset(hwarr, 0, entry_ct * sizeof(Halfword));\n}\n\nHEADER_INLINE void SetAllWArr(uintptr_t entry_ct, uintptr_t* warr) {\n  // todo: test this against vecset()\n  for (uintptr_t idx = 0; idx != entry_ct; ++idx) {\n    warr[idx] = ~k0LU;\n  }\n}\n\n\n// tried _bzhi_u64() in AVX2 case, it was actually worse on my Mac (more\n// opaque to compiler?)\n// todo: check gcc behavior since it may be different: see\n// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82298 .\n//\n// This is undefined if idx == kBitsPerWord.\nHEADER_INLINE uintptr_t bzhi(uintptr_t ww, uint32_t idx) {\n  return ww & ((k1LU << idx) - k1LU);\n}\n\n// This is undefined if idx == 0.\nHEADER_INLINE uintptr_t bzhi_max(uintptr_t ww, uint32_t idx) {\n  return ww & ((~k0LU) >> (kBitsPerWord - idx));\n}\n\n// Don't bother defining blsr(), compiler should automatically use the\n// instruction under -mbmi and regular code is more readable?  (again, should\n// verify this is true for gcc)\n\nHEADER_INLINE uint32_t BytesToRepresentNzU32(uint32_t uii) {\n  return 1 + (bsru32(uii) / CHAR_BIT);\n}\n\n// analogous to memset()\n// this can be slightly slower if e.g. system supports AVX2 but non-AVX2 plink2\n// build is in use; fine to pay that price given the small-array advantage for\n// now.  Should revisit this after next build-machine Ubuntu upgrade, though.\nHEADER_INLINE void vecset(void* target_vec, uintptr_t ww, uintptr_t vec_ct) {\n  VecW* target_vec_iter = S_CAST(VecW*, target_vec);\n#ifdef USE_SSE2\n  const VecW payload = VCONST_W(ww);\n  for (uintptr_t vec_idx = 0; vec_idx != vec_ct; ++vec_idx) {\n    *target_vec_iter++ = payload;\n  }\n#else\n  for (uintptr_t vec_idx = 0; vec_idx != vec_ct; ++vec_idx) {\n    *target_vec_iter++ = ww;\n  }\n#endif\n}\n\n// todo: make sure these are efficient for small ct\nHEADER_INLINE void u16set(void* dst, uint16_t usii, uintptr_t ct) {\n  uint16_t* dst_u16 = S_CAST(uint16_t*, dst);\n  for (uintptr_t ulii = 0; ulii != ct; ++ulii) {\n    dst_u16[ulii] = usii;\n  }\n}\n\nHEADER_INLINE char* u16setsa(char* dst, uint16_t usii, uintptr_t ct) {\n  u16set(dst, usii, ct);\n  return &(dst[ct * 2]);\n}\n\nHEADER_INLINE uintptr_t ClearBottomSetBits(uint32_t ct, uintptr_t ulii) {\n#ifdef USE_AVX2\n  return _pdep_u64((~k0LU) << ct, ulii);\n#else\n  for (uint32_t uii = 0; uii != ct; ++uii) {\n    ulii &= ulii - 1;\n  }\n  return ulii;\n#endif\n}\n\nHEADER_INLINE uint32_t WordBitIdxToUidx(uintptr_t ulii, uint32_t bit_idx) {\n  return ctzw(ClearBottomSetBits(bit_idx, ulii));\n}\n\nCONSTI32(kNybblesPerWord, 2 * kBytesPerWord);\nCONSTI32(kNybblesPerCacheline, 2 * kCacheline);\n\nHEADER_CINLINE uintptr_t NybbleCtToByteCt(uintptr_t val) {\n  return DivUp(val, 2);\n}\n\nHEADER_CINLINE uintptr_t NybbleCtToWordCt(uintptr_t val) {\n  return DivUp(val, kNybblesPerWord);\n}\n\nHEADER_INLINE uintptr_t GetNybbleArrEntry(const uintptr_t* nybblearr, uint32_t idx) {\n  return (nybblearr[idx / kBitsPerWordD4] >> (4 * (idx % kBitsPerWordD4))) & 15;\n}\n\n// Returns zero when ww has no zero bytes, and a word where the lowest set bit\n// is at position 8x + 7 when the first zero byte is [8x .. 8x+7].\nHEADER_INLINE uintptr_t DetectFirstZeroByte(uintptr_t ww) {\n  return (ww - kMask0101) & (~ww) & (kMask0101 * 0x80);\n}\n\n// From TAOCP 4a, 7.1.3, (91).\n// Position 8x + 7 is always set iff byte x is zero.  All other bits are always\n// zero.\nHEADER_INLINE uintptr_t DetectAllZeroBytes(uintptr_t ww) {\n  return (kMask0101 * 0x80) & (~(ww | ((ww | (kMask0101 * 0x80)) - kMask0101)));\n}\n\nHEADER_INLINE uintptr_t DetectAllZeroNybbles(uintptr_t ww) {\n  return (kMask1111 * 8) & (~(ww | ((ww | (kMask1111 * 8)) - kMask1111)));\n}\n\n#if defined(USE_SSE2) && !defined(NO_UNALIGNED)\n// This requires nbytes >= 4.\nuintptr_t FirstUnequal4(const void* arr1, const void* arr2, uintptr_t nbytes);\n\nHEADER_INLINE uintptr_t FirstUnequal(const void* arr1, const void* arr2, uintptr_t nbytes) {\n  // Returns position of first byte mismatch, or nbytes if none was found.\n  if (nbytes >= 4) {\n    return FirstUnequal4(arr1, arr2, nbytes);\n  }\n  const char* s1 = S_CAST(const char*, arr1);\n  const char* s2 = S_CAST(const char*, arr2);\n  for (uintptr_t pos = 0; pos != nbytes; ++pos) {\n    if (s1[pos] != s2[pos]) {\n      return pos;\n    }\n  }\n  return nbytes;\n}\n#else // !(defined(USE_SSE2) && !defined(NO_UNALIGNED))\n// This requires nbytes >= kBytesPerWord.\nuintptr_t FirstUnequalW(const void* arr1, const void* arr2, uintptr_t nbytes);\n\nHEADER_INLINE uintptr_t FirstUnequal(const void* arr1, const void* arr2, uintptr_t nbytes) {\n  // Returns position of first byte mismatch, or nbytes if none was found.\n  if (nbytes >= kBytesPerWord) {\n    return FirstUnequalW(arr1, arr2, nbytes);\n  }\n  const char* s1 = S_CAST(const char*, arr1);\n  const char* s2 = S_CAST(const char*, arr2);\n  for (uintptr_t pos = 0; pos != nbytes; ++pos) {\n    if (s1[pos] != s2[pos]) {\n      return pos;\n    }\n  }\n  return nbytes;\n}\n#endif\n\n\nHEADER_INLINE uintptr_t FirstUnequalFrom(const void* arr1, const void* arr2, uintptr_t start, uintptr_t nbytes) {\n  const char* s1 = S_CAST(const char*, arr1);\n  const char* s2 = S_CAST(const char*, arr2);\n  return start + FirstUnequal(&(s1[start]), &(s2[start]), nbytes - start);\n}\n\n\nHEADER_INLINE void* arena_alloc_raw(uintptr_t size, unsigned char** arena_bottom_ptr) {\n  assert(!(size % kCacheline));\n  unsigned char* alloc_ptr = *arena_bottom_ptr;\n  *arena_bottom_ptr = &(alloc_ptr[size]);\n  return alloc_ptr;\n}\n\nHEADER_INLINE void* arena_alloc_raw_rd(uintptr_t size, unsigned char** arena_bottom_ptr) {\n  unsigned char* alloc_ptr = *arena_bottom_ptr;\n  *arena_bottom_ptr = &(alloc_ptr[RoundUpPow2(size, kCacheline)]);\n  return alloc_ptr;\n}\n\n// A VINT is a sequence of bytes where each byte stores just 7 bits of an\n// an integer, and the high bit is set when the integer has more nonzero bits.\n// See e.g.\n//   https://developers.google.com/protocol-buffers/docs/encoding#varints\n// (Note that protocol buffers used \"group varints\" at one point, but then\n// abandoned them.  I suspect they'd be simultaneously slower and less\n// compact here.)\n\nHEADER_INLINE unsigned char* Vint32Append(uint32_t uii, unsigned char* buf) {\n  while (uii > 127) {\n    *buf++ = (uii & 127) + 128;\n    uii >>= 7;\n  }\n  *buf++ = uii;\n  return buf;\n}\n\n// Returns 0x80000000U on read-past-end instead of UINT32_MAX so overflow check\n// works properly in 32-bit build.  Named \"GetVint31\" to make it more obvious\n// that a 2^31 return value can't be legitimate.\nHEADER_INLINE uint32_t GetVint31(const unsigned char* buf_end, const unsigned char** buf_iterp) {\n  if (likely(buf_end > (*buf_iterp))) {\n    uint32_t vint32 = *((*buf_iterp)++);\n    if (vint32 <= 127) {\n      return vint32;\n    }\n    vint32 &= 127;\n    uint32_t shift = 7;\n    while (likely(buf_end > (*buf_iterp))) {\n      uint32_t uii = *((*buf_iterp)++);\n      vint32 |= (uii & 127) << shift;\n      if (uii <= 127) {\n        return vint32;\n      }\n      shift += 7;\n      // currently don't check for shift >= 32 (that's what ValidateVint31()\n      // is for).\n    }\n  }\n  return 0x80000000U;\n}\n\n#ifdef __LP64__\nuintptr_t CountVintsNonempty(const unsigned char* buf, const unsigned char* buf_end);\n\nHEADER_INLINE uintptr_t CountVints(const unsigned char* buf, const unsigned char* buf_end) {\n  if (buf == buf_end) {\n    return 0;\n  }\n  return CountVintsNonempty(buf, buf_end);\n}\n#else\nuintptr_t CountVints(const unsigned char* buf, const unsigned char* buf_end);\n\nHEADER_INLINE uintptr_t CountVintsNonempty(const unsigned char* buf, const unsigned char* buf_end) {\n  return CountVints(buf, buf_end);\n}\n#endif\n\n// Number of bytes required to encode ulii as a varint.\nHEADER_INLINE uint32_t VintBytect(uintptr_t ulii) {\n  if (ulii < 128) {\n    // bsrw(0) is undefined.\n    return 1;\n  }\n  return 1 + (bsrw(ulii) / 7);\n}\n\n// Flagset conventions:\n// * Each 32-bit and 64-bit flagset has its own type, which is guaranteed to be\n//   the appropriate width.  (Todo: verify that bit 31 works properly in 32-bit\n//   case.)\n// * Constant flag names start with \"kf[CamelCase description]\", followed by a\n//   description that shouldn't suck too badly.  The zero flagset is always\n//   named kf[CamelCase description]0.\n// * The type name is always of the form [snake_case description]_flags_t.\n// * To gain the desired level of type-checking under C++11 without pointless\n//   verbosity, &, |, ^, ~, &=, |=, and ^= operations are defined; [my_flags_t\n//   variable] |= [another my_flags_t variable] & [a my_flags_t constant] works\n//   without an explicit cast.  (Defining \"struct my_flags_t\" separately from\n//   the enum global-scope-constants container is necessary to make |= work\n//   without a cast.  inline is needed due to duplicate operator definitions\n//   across multiple files.)\n// * To slightly reduce the chance of breakage under C99/C++03, the enum is\n//   nameless; the flagset type is just a uint32_t/uint64_t alias.  This is\n//   because the C99 and C++03 specs do not provide enough control over the\n//   enum base type to make it safe for the enum to serve as the flagset type.\n// * Implicit conversion to int is not prevented for now, since I'm trying to\n//   keep PglErr-style code duplication to a minimum.\n#if __cplusplus >= 201103L\n\n  // could avoid the typedef here, but that leads to a bit more verbosity.\n#  define FLAGSET_DEF_START() typedef enum : uint32_t {\n#  define FLAGSET_DEF_END(tname) } tname ## _PLINK2_BASE_DO_NOT_USE__ ; \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator|(tname ## _PLINK2_BASE_DO_NOT_USE__ aa, tname ## _PLINK2_BASE_DO_NOT_USE__ bb) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(static_cast<uint32_t>(aa) | static_cast<uint32_t>(bb)); \\\n} \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator&(tname ## _PLINK2_BASE_DO_NOT_USE__ aa, tname ## _PLINK2_BASE_DO_NOT_USE__ bb) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(static_cast<uint32_t>(aa) & static_cast<uint32_t>(bb)); \\\n} \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator^(tname ## _PLINK2_BASE_DO_NOT_USE__ aa, tname ## _PLINK2_BASE_DO_NOT_USE__ bb) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(static_cast<uint32_t>(aa) ^ static_cast<uint32_t>(bb)); \\\n} \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator~(tname ## _PLINK2_BASE_DO_NOT_USE__ aa) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(~static_cast<uint32_t>(aa)); \\\n} \\\n  \\\nstruct tname { \\\n  tname() {} \\\n  \\\n  tname(const tname& source) : value_(source.value_) {} \\\n  \\\n  tname(const tname ## _PLINK2_BASE_DO_NOT_USE__ source) : value_(static_cast<uint32_t>(source)) {} \\\n  \\\n  explicit tname(uint32_t source) : value_(source) {} \\\n  \\\n  operator tname ## _PLINK2_BASE_DO_NOT_USE__() const { \\\n    return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(value_); \\\n  } \\\n  \\\n  tname& operator|=(const tname ## _PLINK2_BASE_DO_NOT_USE__ rhs) { \\\n    value_ |= rhs; \\\n    return *this; \\\n  } \\\n  \\\n  tname& operator&=(const tname ## _PLINK2_BASE_DO_NOT_USE__ rhs) { \\\n    value_ &= rhs; \\\n    return *this; \\\n  } \\\n  \\\n  tname& operator^=(const tname ## _PLINK2_BASE_DO_NOT_USE__ rhs) { \\\n    value_ ^= rhs; \\\n    return *this; \\\n  } \\\n  \\\n  tname& operator=(const tname& rhs) = default; \\\n  \\\nprivate: \\\n  uint32_t value_; \\\n}\n\n#  define FLAGSET64_DEF_START() typedef enum : uint64_t {\n#  define FLAGSET64_DEF_END(tname) } tname ## _PLINK2_BASE_DO_NOT_USE__ ; \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator|(tname ## _PLINK2_BASE_DO_NOT_USE__ aa, tname ## _PLINK2_BASE_DO_NOT_USE__ bb) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(static_cast<uint64_t>(aa) | static_cast<uint64_t>(bb)); \\\n} \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator&(tname ## _PLINK2_BASE_DO_NOT_USE__ aa, tname ## _PLINK2_BASE_DO_NOT_USE__ bb) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(static_cast<uint64_t>(aa) & static_cast<uint64_t>(bb)); \\\n} \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator^(tname ## _PLINK2_BASE_DO_NOT_USE__ aa, tname ## _PLINK2_BASE_DO_NOT_USE__ bb) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(static_cast<uint64_t>(aa) ^ static_cast<uint64_t>(bb)); \\\n} \\\n  \\\ninline tname ## _PLINK2_BASE_DO_NOT_USE__ operator~(tname ## _PLINK2_BASE_DO_NOT_USE__ aa) { \\\n  return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(~static_cast<uint64_t>(aa)); \\\n} \\\n  \\\nstruct tname { \\\n  tname() {} \\\n  \\\n  tname(const tname& source) : value_(source.value_) {} \\\n  \\\n  tname(const tname ## _PLINK2_BASE_DO_NOT_USE__ source) : value_(static_cast<uint64_t>(source)) {} \\\n  \\\n  explicit tname(uint64_t source) : value_(source) {} \\\n  \\\n  operator tname ## _PLINK2_BASE_DO_NOT_USE__() const { \\\n    return static_cast<tname ## _PLINK2_BASE_DO_NOT_USE__>(value_); \\\n  } \\\n  \\\n  tname& operator|=(const tname ## _PLINK2_BASE_DO_NOT_USE__ rhs) { \\\n    value_ |= rhs; \\\n    return *this; \\\n  } \\\n  \\\n  tname& operator&=(const tname ## _PLINK2_BASE_DO_NOT_USE__ rhs) { \\\n    value_ &= rhs; \\\n    return *this; \\\n  } \\\n  \\\n  tname& operator^=(const tname ## _PLINK2_BASE_DO_NOT_USE__ rhs) { \\\n    value_ ^= rhs; \\\n    return *this; \\\n  } \\\n  \\\n  tname& operator=(const tname& rhs) = default; \\\n  \\\nprivate: \\\n  uint64_t value_; \\\n}\n\n#  define ENUM_U31_DEF_START() typedef enum : uint32_t {\n#  define ENUM_U31_DEF_END(tname) } tname\n\n#else  // !__cplusplus >= 201103L\n\n#  define FLAGSET_DEF_START() enum {\n#  define FLAGSET_DEF_END(tname) } ; \\\ntypedef uint32_t tname\n\n  // don't use a nameless enum here, since we want to be able to static_assert\n  // the enum size.\n  // best to artificially add an element to the end for now to force width to\n  // 64-bit, otherwise gcc actually shrinks it even when the constants are\n  // defined with LLU.\n#  define FLAGSET64_DEF_START() typedef enum {\n#  define FLAGSET64_DEF_END(tname) , \\\n  tname ## PLINK2_BASE_DO_NOT_USE__ALL_64_SET__ = ~(0LLU) } tname ## _PLINK2_BASE_DO_NOT_USE__ ; \\\nstatic_assert(sizeof(tname ## _PLINK2_BASE_DO_NOT_USE__) == 8, \"64-bit flagset constants are not actually uint64_ts.\"); \\\ntypedef uint64_t tname\n\n#  define ENUM_U31_DEF_START() typedef enum {\n#  define ENUM_U31_DEF_END(tname) } tname ## _PLINK2_BASE_DO_NOT_USE__ ; \\\ntypedef uint32_t tname\n\n#endif\n\n// This supports private struct members in code that still compiles as C.\n//\n// Internal code should access these members with GET_PRIVATE(), and define a\n// pair of public C++-only GET_PRIVATE_...() member functions (one const and\n// one non-const) that each return a reference to the member; see plink2_thread\n// for examples.  In addition, .cc API code should define a small number of\n// standard GET_PRIVATE() accessors at the top of the file, and practically all\n// private-member access should occur through those file-scope accessors; this\n// keeps the surface area under control.\n//\n// (Tried to define a DECLARE_PRIVATE(typ, member) macro as well, but didn't\n// get that to work.  This is already pretty painless if it's restricted to key\n// APIs, though.)\n//\n// probable todo: see if the intended effect can be achieved in a simpler\n// manner with well-chosen explicit and implicit type conversions.\n#ifdef __cplusplus\n#  define GET_PRIVATE(par, member) (par).GET_PRIVATE_ ## member()\n#else\n#  define GET_PRIVATE(par, member) (par).member\n#endif\n\nstatic const double kLn2 = 0.6931471805599453;\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n\n#endif  // __PLINK2_BASE_H__\n"
  },
  {
    "path": "external_libs/pgenlib/include/plink2_bits.cc",
    "content": "// This library is part of PLINK 2, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n#include \"plink2_bits.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\n#if defined(USE_SSE2) && !defined(USE_AVX2)\nvoid Pack32bTo16bMask(const void* words_vec, uintptr_t ct_32b, void* dest) {\n  // This is also competitive in the AVX2 case, but never quite beats the\n  // simple loop.  (We'd want to enable a similar function for Ryzen,\n  // processing one 32-byte vector instead of two 16-byte vectors at a time in\n  // the main loop since _mm256_packus_epi16() doesn't do what we want.)\n  const VecW m1 = VCONST_W(kMask5555);\n#  ifdef USE_SHUFFLE8\n  const VecW swap12 = vecw_setr8(\n      0, 1, 4, 5, 2, 3, 6, 7,\n      8, 9, 12, 13, 10, 11, 14, 15);\n#  else\n  const VecW m2 = VCONST_W(kMask3333);\n#  endif\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW m8 = VCONST_W(kMask00FF);\n  const VecW* words_valias = S_CAST(const VecW*, words_vec);\n  unsigned char* dest_uc = S_CAST(unsigned char*, dest);\n  for (uintptr_t vidx = 0; vidx != ct_32b; ++vidx) {\n    VecW vec_lo = vecw_loadu(&(words_valias[2 * vidx])) & m1;\n    VecW vec_hi = vecw_loadu(&(words_valias[2 * vidx + 1])) & m1;\n#  ifdef USE_SHUFFLE8\n    // this right-shift-3 + shuffle shortcut saves two operations.\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 3)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 3)) & m4;\n    vec_lo = vecw_shuffle8(swap12, vec_lo);\n    vec_hi = vecw_shuffle8(swap12, vec_hi);\n#  else\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 1)) & m2;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 1)) & m2;\n    vec_lo = (vec_lo | vecw_srli(vec_lo, 2)) & m4;\n    vec_hi = (vec_hi | vecw_srli(vec_hi, 2)) & m4;\n#  endif\n    vec_lo = vec_lo | vecw_srli(vec_lo, 4);\n    vec_hi = vec_hi | vecw_srli(vec_hi, 4);\n    const VecW vec_packed = vecw_gather_even(vec_lo, vec_hi, m8);\n    vecw_storeu(&(dest_uc[vidx * 16]), vec_packed);\n  }\n}\n#endif\n\n#ifdef __x86_64__\nVecW vecw_slli_variable_ct(VecW vv, uint32_t ct) {\n  return vecw_slli(vv, ct);\n}\n#else\n// Using a lookup table because NEON bit shift functions can only be called\n// with compile-time constants\n// https://eigen.tuxfamily.org/bz/show_bug.cgi?id=1631\n// https://github.com/VectorCamp/vectorscan/issues/21\nVecW vecw_slli_variable_ct(VecW vv, uint32_t ct) {\n  switch(ct) {\n    default: return vv;\n    case 1: return vecw_slli(vv, 1);\n    case 2: return vecw_slli(vv, 2);\n    case 3: return vecw_slli(vv, 3);\n    case 4: return vecw_slli(vv, 4);\n    case 5: return vecw_slli(vv, 5);\n    case 6: return vecw_slli(vv, 6);\n    case 7: return vecw_slli(vv, 7);\n  }\n}\n#endif\n\nvoid SetAllBits(uintptr_t ct, uintptr_t* bitarr) {\n  // leaves bits beyond the end unset\n  // ok for ct == 0\n  uintptr_t quotient = ct / kBitsPerWord;\n  uintptr_t remainder = ct % kBitsPerWord;\n  SetAllWArr(quotient, bitarr);\n  if (remainder) {\n    bitarr[quotient] = (k1LU << remainder) - k1LU;\n  }\n}\n\nvoid FillBitsNz(uintptr_t start_idx, uintptr_t end_idx, uintptr_t* bitarr) {\n  assert(end_idx > start_idx);\n  uintptr_t maj_start = start_idx / kBitsPerWord;\n  uintptr_t maj_end = end_idx / kBitsPerWord;\n  uintptr_t minor;\n  if (maj_start == maj_end) {\n    bitarr[maj_start] |= (k1LU << (end_idx % kBitsPerWord)) - (k1LU << (start_idx % kBitsPerWord));\n  } else {\n    bitarr[maj_start] |= ~((k1LU << (start_idx % kBitsPerWord)) - k1LU);\n    SetAllWArr(maj_end - maj_start - 1, &(bitarr[maj_start + 1]));\n    minor = end_idx % kBitsPerWord;\n    if (minor) {\n      bitarr[maj_end] |= (k1LU << minor) - k1LU;\n    }\n  }\n}\n\nvoid ClearBitsNz(uintptr_t start_idx, uintptr_t end_idx, uintptr_t* bitarr) {\n  assert(end_idx > start_idx);\n  uintptr_t maj_start = start_idx / kBitsPerWord;\n  uintptr_t maj_end = end_idx / kBitsPerWord;\n  uintptr_t minor;\n  if (maj_start == maj_end) {\n    bitarr[maj_start] &= ~((k1LU << (end_idx % kBitsPerWord)) - (k1LU << (start_idx % kBitsPerWord)));\n  } else {\n    bitarr[maj_start] = bzhi(bitarr[maj_start], start_idx % kBitsPerWord);\n    ZeroWArr(maj_end - maj_start - 1, &(bitarr[maj_start + 1]));\n    minor = end_idx % kBitsPerWord;\n    if (minor) {\n      bitarr[maj_end] &= ~((k1LU << minor) - k1LU);\n    }\n  }\n}\n\nvoid BitvecAnd(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec) {\n  // main_bitvec := main_bitvec AND arg_bitvec\n#ifdef USE_SSE2\n  VecW* main_bitvvec_iter = R_CAST(VecW*, main_bitvec);\n  const VecW* arg_bitvvec_iter = R_CAST(const VecW*, arg_bitvec);\n  const uintptr_t full_vec_ct = word_ct / kWordsPerVec;\n  // ok, retested this explicit unroll (Jun 2018) and it's still noticeably\n  // faster for small cases than the simple loop.  sigh.\n  if (full_vec_ct & 1) {\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n  }\n  if (full_vec_ct & 2) {\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n  }\n  for (uintptr_t ulii = 3; ulii < full_vec_ct; ulii += 4) {\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n    *main_bitvvec_iter++ &= *arg_bitvvec_iter++;\n  }\n#  ifdef USE_AVX2\n  if (word_ct & 2) {\n    const uintptr_t base_idx = full_vec_ct * kWordsPerVec;\n    main_bitvec[base_idx] &= arg_bitvec[base_idx];\n    main_bitvec[base_idx + 1] &= arg_bitvec[base_idx + 1];\n  }\n#  endif\n  if (word_ct & 1) {\n    main_bitvec[word_ct - 1] &= arg_bitvec[word_ct - 1];\n  }\n#else\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    main_bitvec[widx] &= arg_bitvec[widx];\n  }\n#endif\n}\n\nvoid BitvecInvmask(const uintptr_t* __restrict exclude_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec) {\n  // main_bitvec := main_bitvec ANDNOT exclude_bitvec\n  // note that this is the reverse of the _mm_andnot() operand order\n#ifdef USE_SSE2\n  VecW* main_bitvvec_iter = R_CAST(VecW*, main_bitvec);\n  const VecW* exclude_bitvvec_iter = R_CAST(const VecW*, exclude_bitvec);\n  const uintptr_t full_vec_ct = word_ct / kWordsPerVec;\n  if (full_vec_ct & 1) {\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n  }\n  if (full_vec_ct & 2) {\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n  }\n  for (uintptr_t ulii = 3; ulii < full_vec_ct; ulii += 4) {\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n    *main_bitvvec_iter = vecw_and_notfirst(*exclude_bitvvec_iter++, *main_bitvvec_iter);\n    ++main_bitvvec_iter;\n  }\n#  ifdef USE_AVX2\n  if (word_ct & 2) {\n    const uintptr_t base_idx = full_vec_ct * kWordsPerVec;\n    main_bitvec[base_idx] &= ~exclude_bitvec[base_idx];\n    main_bitvec[base_idx + 1] &= ~exclude_bitvec[base_idx + 1];\n  }\n#  endif\n  if (word_ct & 1) {\n    main_bitvec[word_ct - 1] &= ~exclude_bitvec[word_ct - 1];\n  }\n#else\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    main_bitvec[widx] &= ~exclude_bitvec[widx];\n  }\n#endif\n}\n\nvoid BitvecOr(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec) {\n  // main_bitvec := main_bitvec OR arg_bitvec\n#ifdef USE_SSE2\n  VecW* main_bitvvec_iter = R_CAST(VecW*, main_bitvec);\n  const VecW* arg_bitvvec_iter = R_CAST(const VecW*, arg_bitvec);\n  const uintptr_t full_vec_ct = word_ct / kWordsPerVec;\n  if (full_vec_ct & 1) {\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n  }\n  if (full_vec_ct & 2) {\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n  }\n  for (uintptr_t ulii = 3; ulii < full_vec_ct; ulii += 4) {\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n    *main_bitvvec_iter++ |= (*arg_bitvvec_iter++);\n  }\n#  ifdef USE_AVX2\n  if (word_ct & 2) {\n    const uintptr_t base_idx = full_vec_ct * kWordsPerVec;\n    main_bitvec[base_idx] |= arg_bitvec[base_idx];\n    main_bitvec[base_idx + 1] |= arg_bitvec[base_idx + 1];\n  }\n#  endif\n  if (word_ct & 1) {\n    main_bitvec[word_ct - 1] |= arg_bitvec[word_ct - 1];\n  }\n#else\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    main_bitvec[widx] |= arg_bitvec[widx];\n  }\n#endif\n}\n\nvoid BitvecInvert(uintptr_t word_ct, uintptr_t* main_bitvec) {\n#ifdef USE_SSE2\n  VecW* main_bitvvec_iter = R_CAST(VecW*, main_bitvec);\n  const uintptr_t full_vec_ct = word_ct / kWordsPerVec;\n  const VecW all1 = VCONST_W(~k0LU);\n  if (full_vec_ct & 1) {\n    *main_bitvvec_iter++ ^= all1;\n  }\n  if (full_vec_ct & 2) {\n    *main_bitvvec_iter++ ^= all1;\n    *main_bitvvec_iter++ ^= all1;\n  }\n  for (uintptr_t ulii = 3; ulii < full_vec_ct; ulii += 4) {\n    *main_bitvvec_iter++ ^= all1;\n    *main_bitvvec_iter++ ^= all1;\n    *main_bitvvec_iter++ ^= all1;\n    *main_bitvvec_iter++ ^= all1;\n  }\n#  ifdef USE_AVX2\n  if (word_ct & 2) {\n    const uintptr_t base_idx = full_vec_ct * kWordsPerVec;\n    main_bitvec[base_idx] ^= ~k0LU;\n    main_bitvec[base_idx + 1] ^= ~k0LU;\n  }\n#  endif\n  if (word_ct & 1) {\n    main_bitvec[word_ct - 1] ^= ~k0LU;\n  }\n#else\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    main_bitvec[widx] ^= ~k0LU;\n  }\n#endif\n}\n\nvoid BitvecXorCopy(const uintptr_t* __restrict source1_bitvec, const uintptr_t* __restrict source2_bitvec, uintptr_t word_ct, uintptr_t* target_bitvec) {\n#ifdef USE_SSE2\n  VecW* target_bitvvec = R_CAST(VecW*, target_bitvec);\n  const VecW* source1_bitvvec = R_CAST(const VecW*, source1_bitvec);\n  const VecW* source2_bitvvec = R_CAST(const VecW*, source2_bitvec);\n  const uintptr_t full_vec_ct = word_ct / kWordsPerVec;\n  for (uintptr_t ulii = 0; ulii != full_vec_ct; ++ulii) {\n    target_bitvvec[ulii] = source1_bitvvec[ulii] ^ source2_bitvvec[ulii];\n  }\n#  ifdef USE_AVX2\n  if (word_ct & 2) {\n    const uintptr_t base_idx = full_vec_ct * kWordsPerVec;\n    target_bitvec[base_idx] = source1_bitvec[base_idx] ^ source2_bitvec[base_idx];\n    target_bitvec[base_idx + 1] = source1_bitvec[base_idx + 1] ^ source2_bitvec[base_idx + 1];\n  }\n#  endif\n  if (word_ct & 1) {\n    target_bitvec[word_ct - 1] = source1_bitvec[word_ct - 1] ^ source2_bitvec[word_ct - 1];\n  }\n#else\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    target_bitvec[widx] = source1_bitvec[widx] ^ source2_bitvec[widx];\n  }\n#endif\n}\n\nvoid BitvecInvertCopy(const uintptr_t* __restrict source_bitvec, uintptr_t word_ct, uintptr_t* __restrict target_bitvec) {\n#ifdef USE_SSE2\n  const VecW* source_bitvvec_iter = R_CAST(const VecW*, source_bitvec);\n  VecW* target_bitvvec_iter = R_CAST(VecW*, target_bitvec);\n  const uintptr_t full_vec_ct = word_ct / kWordsPerVec;\n  const VecW all1 = VCONST_W(~k0LU);\n  // As of Apple clang 11, this manual unroll is no longer relevant.  todo:\n  // check Linux performance, and remove all of these unrolls if perf is good\n  // enough without them.\n  if (full_vec_ct & 1) {\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n  }\n  if (full_vec_ct & 2) {\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n  }\n  for (uintptr_t ulii = 3; ulii < full_vec_ct; ulii += 4) {\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n    *target_bitvvec_iter++ = (*source_bitvvec_iter++) ^ all1;\n  }\n#  ifdef USE_AVX2\n  if (word_ct & 2) {\n    const uintptr_t base_idx = full_vec_ct * kWordsPerVec;\n    target_bitvec[base_idx] = ~source_bitvec[base_idx];\n    target_bitvec[base_idx + 1] = ~source_bitvec[base_idx + 1];\n  }\n#  endif\n  if (word_ct & 1) {\n    target_bitvec[word_ct - 1] = ~source_bitvec[word_ct - 1];\n  }\n#else\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    target_bitvec[widx] = ~source_bitvec[widx];\n  }\n#endif\n}\n\nuintptr_t AdvTo1Bit(const uintptr_t* bitarr, uintptr_t loc) {\n  const uintptr_t* bitarr_iter = &(bitarr[loc / kBitsPerWord]);\n  uintptr_t ulii = (*bitarr_iter) >> (loc % kBitsPerWord);\n  if (ulii) {\n    return loc + ctzw(ulii);\n  }\n  do {\n    ulii = *(++bitarr_iter);\n  } while (!ulii);\n  return S_CAST(uintptr_t, bitarr_iter - bitarr) * kBitsPerWord + ctzw(ulii);\n}\n\nuintptr_t AdvTo0Bit(const uintptr_t* bitarr, uintptr_t loc) {\n  const uintptr_t* bitarr_iter = &(bitarr[loc / kBitsPerWord]);\n  uintptr_t ulii = (~(*bitarr_iter)) >> (loc % kBitsPerWord);\n  if (ulii) {\n    return loc + ctzw(ulii);\n  }\n  do {\n    ulii = *(++bitarr_iter);\n  } while (ulii == ~k0LU);\n  return S_CAST(uintptr_t, bitarr_iter - bitarr) * kBitsPerWord + ctzw(~ulii);\n}\n\n/*\nuintptr_t NextNonmissingUnsafe(const uintptr_t* genoarr, uintptr_t loc) {\n  const uintptr_t* genoarr_iter = &(genoarr[loc / kBitsPerWordD2]);\n  uintptr_t ulii = (~(*genoarr_iter)) >> (2 * (loc % kBitsPerWordD2));\n  if (ulii) {\n    return loc + (ctzw(ulii) / 2);\n  }\n  do {\n    ulii = *(++genoarr_iter);\n  } while (ulii == ~k0LU);\n  return S_CAST(uintptr_t, genoarr_iter - genoarr) * kBitsPerWordD2 + (ctzw(~ulii) / 2);\n}\n*/\n\nuint32_t AdvBoundedTo1Bit(const uintptr_t* bitarr, uint32_t loc, uint32_t ceil) {\n  // safe version.\n  const uintptr_t* bitarr_iter = &(bitarr[loc / kBitsPerWord]);\n  uintptr_t ulii = (*bitarr_iter) >> (loc % kBitsPerWord);\n  if (ulii) {\n    const uint32_t rval = loc + ctzw(ulii);\n    return MINV(rval, ceil);\n  }\n  const uintptr_t* bitarr_last = &(bitarr[(ceil - 1) / kBitsPerWord]);\n  do {\n    if (bitarr_iter >= bitarr_last) {\n      return ceil;\n    }\n    ulii = *(++bitarr_iter);\n  } while (!ulii);\n  const uint32_t rval = S_CAST(uintptr_t, bitarr_iter - bitarr) * kBitsPerWord + ctzw(ulii);\n  return MINV(rval, ceil);\n}\n\nuintptr_t AdvBoundedTo0Bit(const uintptr_t* bitarr, uintptr_t loc, uintptr_t ceil) {\n  assert(ceil >= 1);\n  const uintptr_t* bitarr_ptr = &(bitarr[loc / kBitsPerWord]);\n  uintptr_t ulii = (~(*bitarr_ptr)) >> (loc % kBitsPerWord);\n  if (ulii) {\n    loc += ctzw(ulii);\n    return MINV(loc, ceil);\n  }\n  const uintptr_t* bitarr_last = &(bitarr[(ceil - 1) / kBitsPerWord]);\n  do {\n    if (bitarr_ptr >= bitarr_last) {\n      return ceil;\n    }\n    ulii = *(++bitarr_ptr);\n  } while (ulii == ~k0LU);\n  loc = S_CAST(uintptr_t, bitarr_ptr - bitarr) * kBitsPerWord + ctzw(~ulii);\n  return MINV(loc, ceil);\n}\n\nuintptr_t FindLast1BitBefore(const uintptr_t* bitarr, uintptr_t loc) {\n  // unlike the next_{un}set family, this always returns a STRICTLY earlier\n  // position\n  const uintptr_t* bitarr_iter = &(bitarr[loc / kBitsPerWord]);\n  const uint32_t remainder = loc % kBitsPerWord;\n  uintptr_t ulii;\n  if (remainder) {\n    ulii = bzhi(*bitarr_iter, remainder);\n    if (ulii) {\n      return loc - remainder + bsrw(ulii);\n    }\n  }\n  do {\n    ulii = *(--bitarr_iter);\n  } while (!ulii);\n  return S_CAST(uintptr_t, bitarr_iter - bitarr) * kBitsPerWord + bsrw(ulii);\n}\n\n#ifndef NO_UNALIGNED\nuint32_t AllBytesAreX(const unsigned char* bytes, unsigned char match, uintptr_t byte_ct) {\n  if (byte_ct < kBytesPerWord) {\n    for (uint32_t uii = 0; uii != byte_ct; ++uii) {\n      if (bytes[uii] != match) {\n        return 0;\n      }\n    }\n    return 1;\n  }\n  const uintptr_t* bytes_alias = R_CAST(const uintptr_t*, bytes);\n  const uintptr_t word_match = S_CAST(uintptr_t, match) * kMask0101;\n  uintptr_t word_ct_m1 = (byte_ct - 1) / kBytesPerWord;\n  // todo: try movemask in AVX2 case\n  for (uintptr_t widx = 0; widx != word_ct_m1; ++widx) {\n    if (bytes_alias[widx] != word_match) {\n      return 0;\n    }\n  }\n  const uintptr_t last_word = *R_CAST(const uintptr_t*, &(bytes[byte_ct - kBytesPerWord]));\n  if (last_word != word_match) {\n    return 0;\n  }\n  return 1;\n}\n#else // NO_UNALIGNED\nuint32_t AllBytesAreX(const unsigned char* bytes, unsigned char match, uintptr_t byte_ct) {\n  if (byte_ct < 2 * kBytesPerWord - 1) {\n    // use simple loop instead of main algorithm unless byte_ct guarantees\n    // fullword_ct >= 1\n    for (uint32_t uii = 0; uii != byte_ct; ++uii) {\n      if (bytes[uii] != match) {\n        return 0;\n      }\n    }\n    return 1;\n  }\n  // todo: try movemask in AVX2 case\n  const uintptr_t* bytes_alias;\n  const uint32_t lead_byte_ct = AlignKToW(bytes, &bytes_alias);\n  for (uint32_t uii = 0; uii != lead_byte_ct; ++uii) {\n    if (bytes[uii] != match) {\n      return 0;\n    }\n  }\n  bytes = &(bytes[lead_byte_ct]);\n  byte_ct -= lead_byte_ct;\n  const uintptr_t fullword_ct = byte_ct / kBytesPerWord;\n  const uintptr_t word_match = S_CAST(uintptr_t, match) * kMask0101;\n  for (uintptr_t widx = 0; widx != fullword_ct; ++widx) {\n    if (bytes_alias[widx] != word_match) {\n      return 0;\n    }\n  }\n  for (uintptr_t ulii = fullword_ct * kBytesPerWord; ulii != byte_ct; ++ulii) {\n    if (bytes[ulii] != match) {\n      return 0;\n    }\n  }\n  return 1;\n}\n#endif\n\n#ifdef USE_AVX2\n// void CopyBitarrSubsetEx(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t bit_idx_start, uint32_t output_bit_idx_end, uintptr_t* __restrict output_bitarr) {\nvoid CopyBitarrSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, uintptr_t* __restrict output_bitarr) {\n  const uint32_t output_bit_idx_end_lowbits = output_bit_idx_end % kBitsPerWord;\n  uintptr_t* output_bitarr_iter = output_bitarr;\n  uintptr_t* output_bitarr_last = &(output_bitarr[output_bit_idx_end / kBitsPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = 0;\n  while ((output_bitarr_iter != output_bitarr_last) || (write_idx_lowbits != output_bit_idx_end_lowbits)) {\n    uintptr_t cur_mask_word;\n    // sparse subset_mask optimization\n    // guaranteed to terminate since there's at least one more set bit\n    do {\n      cur_mask_word = subset_mask[++read_widx];\n    } while (!cur_mask_word);\n    uintptr_t extracted_bits = raw_bitarr[read_widx];\n    uint32_t set_bit_ct = kBitsPerWord;\n    if (cur_mask_word != ~k0LU) {\n      extracted_bits = _pext_u64(extracted_bits, cur_mask_word);\n      set_bit_ct = PopcountWord(cur_mask_word);\n    }\n    cur_output_word |= extracted_bits << write_idx_lowbits;\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + set_bit_ct;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      *output_bitarr_iter++ = cur_output_word;\n      // ...and these are the bits that fell off\n      // bugfix: unsafe to right-shift 64\n      if (write_idx_lowbits) {\n        cur_output_word = extracted_bits >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        cur_output_word = 0;\n      }\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    *output_bitarr_iter = cur_output_word;\n  }\n}\n\n#  ifdef NO_UNALIGNED\nvoid CopyBitarrSubsetToUnaligned(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, void* __restrict output_bitarr) {\n  const uint32_t output_bit_idx_end_lowbits = output_bit_idx_end % kBitsPerWord;\n  unsigned char* output_bitarr_iter = S_CAST(unsigned char*, output_bitarr);\n  unsigned char* output_bitarr_last = &(output_bitarr_iter[(output_bit_idx_end / kBitsPerWord) * kBytesPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = 0;\n  while ((output_bitarr_iter != output_bitarr_last) || (write_idx_lowbits != output_bit_idx_end_lowbits)) {\n    uintptr_t cur_mask_word;\n    // sparse subset_mask optimization\n    // guaranteed to terminate since there's at least one more set bit\n    do {\n      cur_mask_word = subset_mask[++read_widx];\n    } while (!cur_mask_word);\n    uintptr_t extracted_bits = raw_bitarr[read_widx];\n    uint32_t set_bit_ct = kBitsPerWord;\n    if (cur_mask_word != ~k0LU) {\n      extracted_bits = _pext_u64(extracted_bits, cur_mask_word);\n      set_bit_ct = PopcountWord(cur_mask_word);\n    }\n    cur_output_word |= extracted_bits << write_idx_lowbits;\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + set_bit_ct;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      AppendW(cur_output_word, &output_bitarr_iter);\n      // ...and these are the bits that fell off\n      // bugfix: unsafe to right-shift 64\n      if (write_idx_lowbits) {\n        cur_output_word = extracted_bits >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        cur_output_word = 0;\n      }\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    CopyToUnalignedW(output_bitarr_iter, &cur_output_word);\n  }\n}\n#  endif\n\nuintptr_t PopcountVecsAvx2(const VecW* bit_vvec, uintptr_t vec_ct) {\n  // See popcnt_avx2() in libpopcnt.\n  VecW cnt = vecw_setzero();\n  VecW ones = vecw_setzero();\n  VecW twos = vecw_setzero();\n  VecW fours = vecw_setzero();\n  VecW eights = vecw_setzero();\n  VecW prev_sad_result = vecw_setzero();\n  const uintptr_t vec_ct_a16 = RoundDownPow2(vec_ct, 16);\n  for (uintptr_t vec_idx = 0; vec_idx != vec_ct_a16; vec_idx += 16) {\n    VecW twos_a = Csa256(bit_vvec[vec_idx + 0], bit_vvec[vec_idx + 1], &ones);\n    VecW twos_b = Csa256(bit_vvec[vec_idx + 2], bit_vvec[vec_idx + 3], &ones);\n    VecW fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(bit_vvec[vec_idx + 4], bit_vvec[vec_idx + 5], &ones);\n    twos_b = Csa256(bit_vvec[vec_idx + 6], bit_vvec[vec_idx + 7], &ones);\n    VecW fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_a = Csa256(fours_a, fours_b, &fours);\n\n    twos_a = Csa256(bit_vvec[vec_idx + 8], bit_vvec[vec_idx + 9], &ones);\n    twos_b = Csa256(bit_vvec[vec_idx + 10], bit_vvec[vec_idx + 11], &ones);\n    fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(bit_vvec[vec_idx + 12], bit_vvec[vec_idx + 13], &ones);\n    twos_b = Csa256(bit_vvec[vec_idx + 14], bit_vvec[vec_idx + 15], &ones);\n    fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_b = Csa256(fours_a, fours_b, &fours);\n    const VecW sixteens = Csa256(eights_a, eights_b, &eights);\n    cnt = cnt + prev_sad_result;\n    // work around high SAD latency\n    prev_sad_result = PopcountVecAvx2(sixteens);\n  }\n  bit_vvec = &(bit_vvec[vec_ct_a16]);\n  const uintptr_t remainder = vec_ct % 16;\n  cnt = cnt + prev_sad_result;\n  if (remainder < 12) {\n    cnt = vecw_slli(cnt, 4);\n    if (remainder) {\n      VecW popcnt1_acc = vecw_setzero();\n      VecW popcnt2_acc = vecw_setzero();\n      const VecW lookup1 = vecw_setr8(4, 5, 5, 6, 5, 6, 6, 7,\n                                      5, 6, 6, 7, 6, 7, 7, 8);\n      const VecW lookup2 = vecw_setr8(4, 3, 3, 2, 3, 2, 2, 1,\n                                      3, 2, 2, 1, 2, 1, 1, 0);\n\n      const VecW m4 = VCONST_W(kMask0F0F);\n      for (uintptr_t vec_idx = 0; vec_idx != remainder; ++vec_idx) {\n        const VecW vv = bit_vvec[vec_idx];\n        const VecW lo = vv & m4;\n        const VecW hi = vecw_srli(vv, 4) & m4;\n        popcnt1_acc = popcnt1_acc + vecw_shuffle8(lookup1, lo);\n        popcnt2_acc = popcnt2_acc + vecw_shuffle8(lookup2, hi);\n      }\n      cnt = cnt + vecw_sad(popcnt1_acc, popcnt2_acc);\n    }\n  } else {\n    VecW twos_a = Csa256(bit_vvec[0], bit_vvec[1], &ones);\n    VecW twos_b = Csa256(bit_vvec[2], bit_vvec[3], &ones);\n    VecW fours_a = Csa256(twos_a, twos_b, &twos);\n    twos_a = Csa256(bit_vvec[4], bit_vvec[5], &ones);\n    twos_b = Csa256(bit_vvec[6], bit_vvec[7], &ones);\n    VecW fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_a = Csa256(fours_a, fours_b, &fours);\n    twos_a = Csa256(bit_vvec[8], bit_vvec[9], &ones);\n    twos_b = Csa256(bit_vvec[10], bit_vvec[11], &ones);\n    fours_a = Csa256(twos_a, twos_b, &twos);\n    twos_a = vecw_setzero();\n    if (remainder & 2) {\n      twos_a = Csa256(bit_vvec[12], bit_vvec[13], &ones);\n    }\n    twos_b = vecw_setzero();\n    if (remainder & 1) {\n      twos_b = CsaOne256(bit_vvec[remainder - 1], &ones);\n    }\n    fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_b = Csa256(fours_a, fours_b, &fours);\n    const VecW sixteens = Csa256(eights_a, eights_b, &eights);\n    cnt = cnt + PopcountVecAvx2(sixteens);\n    cnt = vecw_slli(cnt, 4);\n  }\n  // Appears to be counterproductive to put multiple SAD instructions in\n  // flight.\n  // Compiler is smart enough that it's pointless to manually inline\n  // PopcountVecAvx2.  (Tried combining the 4 SAD calls into one, didn't help.)\n  cnt = cnt + vecw_slli(PopcountVecAvx2(eights), 3);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(fours), 2);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(twos), 1);\n  cnt = cnt + PopcountVecAvx2(ones);\n  return HsumW(cnt);\n}\n\nuintptr_t PopcountVecsAvx2Intersect(const VecW* __restrict vvec1_iter, const VecW* __restrict vvec2_iter, uintptr_t vec_ct) {\n  // See popcnt_avx2() in libpopcnt.  vec_ct must be a multiple of 16.\n  VecW cnt = vecw_setzero();\n  VecW ones = vecw_setzero();\n  VecW twos = vecw_setzero();\n  VecW fours = vecw_setzero();\n  VecW eights = vecw_setzero();\n  for (uintptr_t vec_idx = 0; vec_idx < vec_ct; vec_idx += 16) {\n    VecW twos_a = Csa256(vvec1_iter[vec_idx + 0] & vvec2_iter[vec_idx + 0], vvec1_iter[vec_idx + 1] & vvec2_iter[vec_idx + 1], &ones);\n    VecW twos_b = Csa256(vvec1_iter[vec_idx + 2] & vvec2_iter[vec_idx + 2], vvec1_iter[vec_idx + 3] & vvec2_iter[vec_idx + 3], &ones);\n    VecW fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 4] & vvec2_iter[vec_idx + 4], vvec1_iter[vec_idx + 5] & vvec2_iter[vec_idx + 5], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 6] & vvec2_iter[vec_idx + 6], vvec1_iter[vec_idx + 7] & vvec2_iter[vec_idx + 7], &ones);\n    VecW fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_a = Csa256(fours_a, fours_b, &fours);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 8] & vvec2_iter[vec_idx + 8], vvec1_iter[vec_idx + 9] & vvec2_iter[vec_idx + 9], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 10] & vvec2_iter[vec_idx + 10], vvec1_iter[vec_idx + 11] & vvec2_iter[vec_idx + 11], &ones);\n    fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 12] & vvec2_iter[vec_idx + 12], vvec1_iter[vec_idx + 13] & vvec2_iter[vec_idx + 13], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 14] & vvec2_iter[vec_idx + 14], vvec1_iter[vec_idx + 15] & vvec2_iter[vec_idx + 15], &ones);\n    fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_b = Csa256(fours_a, fours_b, &fours);\n    const VecW sixteens = Csa256(eights_a, eights_b, &eights);\n    cnt = cnt + PopcountVecAvx2(sixteens);\n  }\n  cnt = vecw_slli(cnt, 4);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(eights), 3);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(fours), 2);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(twos), 1);\n  cnt = cnt + PopcountVecAvx2(ones);\n  return HsumW(cnt);\n}\n\nuintptr_t PopcountWordsIntersect(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, uintptr_t word_ct) {\n  const uintptr_t* bitvec1_end = &(bitvec1_iter[word_ct]);\n  const uintptr_t block_ct = word_ct / (16 * kWordsPerVec);\n  uintptr_t tot = 0;\n  if (block_ct) {\n    tot = PopcountVecsAvx2Intersect(R_CAST(const VecW*, bitvec1_iter), R_CAST(const VecW*, bitvec2_iter), block_ct * 16);\n    bitvec1_iter = &(bitvec1_iter[block_ct * (16 * kWordsPerVec)]);\n    bitvec2_iter = &(bitvec2_iter[block_ct * (16 * kWordsPerVec)]);\n  }\n  while (bitvec1_iter < bitvec1_end) {\n    tot += PopcountWord((*bitvec1_iter++) & (*bitvec2_iter++));\n  }\n  return tot;\n}\n\nuintptr_t PopcountVecsAvx2Xor(const VecW* __restrict vvec1_iter, const VecW* __restrict vvec2_iter, uintptr_t vec_ct) {\n  // vec_ct must be a multiple of 16.\n  VecW cnt = vecw_setzero();\n  VecW ones = vecw_setzero();\n  VecW twos = vecw_setzero();\n  VecW fours = vecw_setzero();\n  VecW eights = vecw_setzero();\n  for (uintptr_t vec_idx = 0; vec_idx < vec_ct; vec_idx += 16) {\n    VecW twos_a = Csa256(vvec1_iter[vec_idx + 0] ^ vvec2_iter[vec_idx + 0], vvec1_iter[vec_idx + 1] ^ vvec2_iter[vec_idx + 1], &ones);\n    VecW twos_b = Csa256(vvec1_iter[vec_idx + 2] ^ vvec2_iter[vec_idx + 2], vvec1_iter[vec_idx + 3] ^ vvec2_iter[vec_idx + 3], &ones);\n    VecW fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 4] ^ vvec2_iter[vec_idx + 4], vvec1_iter[vec_idx + 5] ^ vvec2_iter[vec_idx + 5], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 6] ^ vvec2_iter[vec_idx + 6], vvec1_iter[vec_idx + 7] ^ vvec2_iter[vec_idx + 7], &ones);\n    VecW fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_a = Csa256(fours_a, fours_b, &fours);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 8] ^ vvec2_iter[vec_idx + 8], vvec1_iter[vec_idx + 9] ^ vvec2_iter[vec_idx + 9], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 10] ^ vvec2_iter[vec_idx + 10], vvec1_iter[vec_idx + 11] ^ vvec2_iter[vec_idx + 11], &ones);\n    fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 12] ^ vvec2_iter[vec_idx + 12], vvec1_iter[vec_idx + 13] ^ vvec2_iter[vec_idx + 13], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 14] ^ vvec2_iter[vec_idx + 14], vvec1_iter[vec_idx + 15] ^ vvec2_iter[vec_idx + 15], &ones);\n    fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_b = Csa256(fours_a, fours_b, &fours);\n    const VecW sixteens = Csa256(eights_a, eights_b, &eights);\n    cnt = cnt + PopcountVecAvx2(sixteens);\n  }\n  cnt = vecw_slli(cnt, 4);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(eights), 3);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(fours), 2);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(twos), 1);\n  cnt = cnt + PopcountVecAvx2(ones);\n  return HsumW(cnt);\n}\n\nuintptr_t PopcountWordsXor(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, uintptr_t word_ct) {\n  const uintptr_t* bitvec1_end = &(bitvec1_iter[word_ct]);\n  const uintptr_t block_ct = word_ct / (16 * kWordsPerVec);\n  uintptr_t tot = 0;\n  if (block_ct) {\n    tot = PopcountVecsAvx2Xor(R_CAST(const VecW*, bitvec1_iter), R_CAST(const VecW*, bitvec2_iter), block_ct * 16);\n    bitvec1_iter = &(bitvec1_iter[block_ct * (16 * kWordsPerVec)]);\n    bitvec2_iter = &(bitvec2_iter[block_ct * (16 * kWordsPerVec)]);\n  }\n  while (bitvec1_iter < bitvec1_end) {\n    tot += PopcountWord((*bitvec1_iter++) ^ (*bitvec2_iter++));\n  }\n  return tot;\n}\n\n/*\nuintptr_t PopcountVecsAvx2Intersect3(const VecW* __restrict vvec1_iter, const VecW* __restrict vvec2_iter, const VecW* __restrict vvec3_iter, uintptr_t vec_ct) {\n  VecW cnt = vecw_setzero();\n  VecW ones = vecw_setzero();\n  VecW twos = vecw_setzero();\n  VecW fours = vecw_setzero();\n  VecW eights = vecw_setzero();\n  for (uintptr_t vec_idx = 0; vec_idx < vec_ct; vec_idx += 16) {\n    VecW twos_a = Csa256(vvec1_iter[vec_idx + 0] & vvec2_iter[vec_idx + 0] & vvec3_iter[vec_idx + 0], vvec1_iter[vec_idx + 1] & vvec2_iter[vec_idx + 1] & vvec3_iter[vec_idx + 1], &ones);\n    VecW twos_b = Csa256(vvec1_iter[vec_idx + 2] & vvec2_iter[vec_idx + 2] & vvec3_iter[vec_idx + 2], vvec1_iter[vec_idx + 3] & vvec2_iter[vec_idx + 3] & vvec3_iter[vec_idx + 3], &ones);\n    VecW fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 4] & vvec2_iter[vec_idx + 4] & vvec3_iter[vec_idx + 4], vvec1_iter[vec_idx + 5] & vvec2_iter[vec_idx + 5] & vvec3_iter[vec_idx + 5], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 6] & vvec2_iter[vec_idx + 6] & vvec3_iter[vec_idx + 6], vvec1_iter[vec_idx + 7] & vvec2_iter[vec_idx + 7] & vvec3_iter[vec_idx + 7], &ones);\n    VecW fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_a = Csa256(fours_a, fours_b, &fours);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 8] & vvec2_iter[vec_idx + 8] & vvec3_iter[vec_idx + 8], vvec1_iter[vec_idx + 9] & vvec2_iter[vec_idx + 9] & vvec3_iter[vec_idx + 9], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 10] & vvec2_iter[vec_idx + 10] & vvec3_iter[vec_idx + 10], vvec1_iter[vec_idx + 11] & vvec2_iter[vec_idx + 11] & vvec3_iter[vec_idx + 11], &ones);\n    fours_a = Csa256(twos_a, twos_b, &twos);\n\n    twos_a = Csa256(vvec1_iter[vec_idx + 12] & vvec2_iter[vec_idx + 12] & vvec3_iter[vec_idx + 12], vvec1_iter[vec_idx + 13] & vvec2_iter[vec_idx + 13] & vvec3_iter[vec_idx + 13], &ones);\n    twos_b = Csa256(vvec1_iter[vec_idx + 14] & vvec2_iter[vec_idx + 14] & vvec3_iter[vec_idx + 14], vvec1_iter[vec_idx + 15] & vvec2_iter[vec_idx + 15] & vvec3_iter[vec_idx + 15], &ones);\n    fours_b = Csa256(twos_a, twos_b, &twos);\n    const VecW eights_b = Csa256(fours_a, fours_b, &fours);\n    const VecW sixteens = Csa256(eights_a, eights_b, &eights);\n    cnt = cnt + PopcountVecAvx2(sixteens);\n  }\n  cnt = vecw_slli(cnt, 4);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(eights), 3);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(fours), 2);\n  cnt = cnt + vecw_slli(PopcountVecAvx2(twos), 1);\n  cnt = cnt + PopcountVecAvx2(ones);\n  return HsumW(cnt);\n}\n\nuintptr_t PopcountWordsIntersect3(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, const uintptr_t* __restrict bitvec3_iter, uintptr_t word_ct) {\n  const uintptr_t* bitvec1_end = &(bitvec1_iter[word_ct]);\n  const uintptr_t block_ct = word_ct / (16 * kWordsPerVec);\n  uintptr_t tot = 0;\n  if (block_ct) {\n    tot = PopcountVecsAvx2Intersect3(R_CAST(const VecW*, bitvec1_iter), R_CAST(const VecW*, bitvec2_iter), R_CAST(const VecW*, bitvec3_iter), block_ct * 16);\n    bitvec1_iter = &(bitvec1_iter[block_ct * (16 * kWordsPerVec)]);\n    bitvec2_iter = &(bitvec2_iter[block_ct * (16 * kWordsPerVec)]);\n    bitvec3_iter = &(bitvec3_iter[block_ct * (16 * kWordsPerVec)]);\n  }\n  while (bitvec1_iter < bitvec1_end) {\n    tot += PopcountWord((*bitvec1_iter++) & (*bitvec2_iter++) & (*bitvec3_iter++));\n  }\n  return tot;\n}\n*/\n\nvoid ExpandBytearr(const void* __restrict compact_bitarr, const uintptr_t* __restrict expand_mask, uint32_t word_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target) {\n  const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;\n  const uint32_t leading_byte_ct = 1 + (expand_sizex_m1 % kBitsPerWord) / CHAR_BIT;\n  uintptr_t compact_word = SubwordLoad(compact_bitarr, leading_byte_ct) >> read_start_bit;\n  const unsigned char* compact_bitarr_biter = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  uint32_t compact_idx_lowbits = read_start_bit + CHAR_BIT * (sizeof(intptr_t) - leading_byte_ct);\n  for (uint32_t widx = 0; widx != word_ct; ++widx) {\n    const uintptr_t mask_word = expand_mask[widx];\n    uintptr_t write_word = 0;\n    if (mask_word) {\n      const uint32_t mask_set_ct = PopcountWord(mask_word);\n      uint32_t next_compact_idx_lowbits = compact_idx_lowbits + mask_set_ct;\n      if (next_compact_idx_lowbits <= kBitsPerWord) {\n        write_word = _pdep_u64(compact_word, mask_word);\n        if (mask_set_ct != kBitsPerWord) {\n          compact_word = compact_word >> mask_set_ct;\n        } else {\n          // avoid nasal demons\n          compact_word = 0;\n        }\n      } else {\n        uintptr_t next_compact_word;\n        CopyFromUnalignedIncrW(&next_compact_word, &compact_bitarr_biter);\n        next_compact_idx_lowbits -= kBitsPerWord;\n        compact_word |= next_compact_word << (kBitsPerWord - compact_idx_lowbits);\n        write_word = _pdep_u64(compact_word, mask_word);\n        if (next_compact_idx_lowbits != kBitsPerWord) {\n          compact_word = next_compact_word >> next_compact_idx_lowbits;\n        } else {\n          compact_word = 0;\n        }\n      }\n      compact_idx_lowbits = next_compact_idx_lowbits;\n    }\n    target[widx] = write_word;\n  }\n}\n\nvoid ExpandThenSubsetBytearr(const void* __restrict compact_bitarr, const uintptr_t* __restrict expand_mask, const uintptr_t* __restrict subset_mask, uint32_t expand_size, uint32_t subset_size, uint32_t read_start_bit, uintptr_t* __restrict target) {\n  const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;\n  const uint32_t leading_byte_ct = 1 + (expand_sizex_m1 % kBitsPerWord) / CHAR_BIT;\n  uintptr_t compact_word = SubwordLoad(compact_bitarr, leading_byte_ct) >> read_start_bit;\n  const unsigned char* compact_bitarrb = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  uint32_t compact_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t compact_idx_lowbits = read_start_bit + CHAR_BIT * (sizeof(uintptr_t) - leading_byte_ct);\n  const uint32_t subset_size_lowbits = subset_size % kBitsPerWord;\n  uintptr_t* target_iter = target;\n  uintptr_t* target_last = &(target[subset_size / kBitsPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = 0;\n\n  // bugfix (5 Feb 2018): missed a case in sparse subset_mask optimization\n  uint32_t expand_bit_ct_skip = 0;\n  while ((target_iter != target_last) || (write_idx_lowbits != subset_size_lowbits)) {\n    uintptr_t expand_word;\n    uintptr_t subset_word;\n    uint32_t expand_bit_ct;\n    while (1) {\n      ++read_widx;\n      expand_word = expand_mask[read_widx];\n      subset_word = subset_mask[read_widx];\n      expand_bit_ct = PopcountWord(expand_word);\n      if (subset_word) {\n        break;\n      }\n      expand_bit_ct_skip += expand_bit_ct;\n    }\n    uintptr_t extracted_bits = 0;\n    const uint32_t set_bit_ct = PopcountWord(subset_word);\n    if (expand_word & subset_word) {\n      // lazy load\n      compact_idx_lowbits += expand_bit_ct_skip;\n      if (compact_idx_lowbits >= kBitsPerWord) {\n        compact_widx += compact_idx_lowbits / kBitsPerWord;\n        compact_idx_lowbits = compact_idx_lowbits % kBitsPerWord;\n        CopyFromUnalignedOffsetW(&compact_word, compact_bitarrb, compact_widx);\n        compact_word >>= compact_idx_lowbits;\n      } else {\n        compact_word = compact_word >> expand_bit_ct_skip;\n      }\n      uint32_t next_compact_idx_lowbits = compact_idx_lowbits + expand_bit_ct;\n      uintptr_t expanded_bits;\n      if (next_compact_idx_lowbits <= kBitsPerWord) {\n        expanded_bits = _pdep_u64(compact_word, expand_word);\n        if (expand_bit_ct != kBitsPerWord) {\n          compact_word = compact_word >> expand_bit_ct;\n        }\n      } else {\n        ++compact_widx;\n        uintptr_t next_compact_word;\n        CopyFromUnalignedOffsetW(&next_compact_word, compact_bitarrb, compact_widx);\n        next_compact_idx_lowbits -= kBitsPerWord;\n        compact_word |= next_compact_word << (kBitsPerWord - compact_idx_lowbits);\n        expanded_bits = _pdep_u64(compact_word, expand_word);\n        if (next_compact_idx_lowbits != kBitsPerWord) {\n          compact_word = next_compact_word >> next_compact_idx_lowbits;\n        }\n      }\n      extracted_bits = _pext_u64(expanded_bits, subset_word);\n      compact_idx_lowbits = next_compact_idx_lowbits;\n      cur_output_word |= extracted_bits << write_idx_lowbits;\n      expand_bit_ct_skip = 0;\n    } else {\n      expand_bit_ct_skip += expand_bit_ct;\n    }\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + set_bit_ct;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      *target_iter++ = cur_output_word;\n      // ...and these are the bits that fell off\n      if (write_idx_lowbits) {\n        cur_output_word = extracted_bits >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        cur_output_word = 0;\n      }\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    *target_iter = cur_output_word;\n  }\n}\n\nvoid ExpandBytearrNested(const void* __restrict compact_bitarr, const uintptr_t* __restrict mid_bitarr, const uintptr_t* __restrict top_expand_mask, uint32_t word_ct, uint32_t mid_popcount, uint32_t mid_start_bit, uintptr_t* __restrict mid_target, uintptr_t* __restrict compact_target) {\n  assert(mid_popcount);\n  const uint32_t leading_byte_ct = 1 + ((mid_popcount - 1) % kBitsPerWord) / CHAR_BIT;\n  uintptr_t compact_read_word = SubwordLoad(compact_bitarr, leading_byte_ct);\n  uint32_t compact_idx_lowbits = CHAR_BIT * (sizeof(intptr_t) - leading_byte_ct);\n  const unsigned char* compact_bitarr_biter = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  const uintptr_t* mid_bitarr_iter = mid_bitarr;\n  uint32_t mid_idx_lowbits = mid_start_bit;\n  uintptr_t mid_read_word = (*mid_bitarr_iter) >> mid_start_bit;\n  for (uint32_t widx = 0; widx != word_ct; ++widx) {\n    const uintptr_t top_word = top_expand_mask[widx];\n    uintptr_t mid_write_word = 0;\n    uintptr_t compact_write_word = 0;\n    if (top_word) {\n      const uint32_t top_set_ct = PopcountWord(top_word);\n      uint32_t next_mid_idx_lowbits = mid_idx_lowbits + top_set_ct;\n      if (next_mid_idx_lowbits <= kBitsPerWord) {\n        mid_write_word = _pdep_u64(mid_read_word, top_word);\n        if (top_set_ct != kBitsPerWord) {\n          mid_read_word = mid_read_word >> top_set_ct;\n        } else {\n          // avoid nasal demons\n          mid_read_word = 0;\n        }\n      } else {\n        uintptr_t next_mid_read_word = *(++mid_bitarr_iter);\n        next_mid_idx_lowbits -= kBitsPerWord;\n        mid_read_word |= next_mid_read_word << (kBitsPerWord - mid_idx_lowbits);\n        mid_write_word = _pdep_u64(mid_read_word, top_word);\n        if (next_mid_idx_lowbits != kBitsPerWord) {\n          mid_read_word = next_mid_read_word >> next_mid_idx_lowbits;\n        } else {\n          mid_read_word = 0;\n        }\n      }\n      mid_idx_lowbits = next_mid_idx_lowbits;\n      if (mid_write_word) {\n        const uint32_t mid_set_ct = PopcountWord(mid_write_word);\n        uint32_t next_compact_idx_lowbits = compact_idx_lowbits + mid_set_ct;\n        if (next_compact_idx_lowbits <= kBitsPerWord) {\n          compact_write_word = _pdep_u64(compact_read_word, mid_write_word);\n          if (mid_set_ct != kBitsPerWord) {\n            compact_read_word = compact_read_word >> mid_set_ct;\n          } else {\n            compact_read_word = 0;\n          }\n        } else {\n          uintptr_t next_compact_word;\n          CopyFromUnalignedIncrW(&next_compact_word, &compact_bitarr_biter);\n          next_compact_idx_lowbits -= kBitsPerWord;\n          compact_read_word |= next_compact_word << (kBitsPerWord - compact_idx_lowbits);\n          compact_write_word = _pdep_u64(compact_read_word, mid_write_word);\n          if (next_compact_idx_lowbits != kBitsPerWord) {\n            compact_read_word = next_compact_word >> next_compact_idx_lowbits;\n          } else {\n            compact_read_word = 0;\n          }\n        }\n        compact_idx_lowbits = next_compact_idx_lowbits;\n      }\n    }\n    mid_target[widx] = mid_write_word;\n    compact_target[widx] = compact_write_word;\n  }\n}\n\nvoid ExpandThenSubsetBytearrNested(const void* __restrict compact_bitarr, const uintptr_t* __restrict mid_bitarr, const uintptr_t* __restrict top_expand_mask, const uintptr_t* __restrict subset_mask, uint32_t subset_size, uint32_t mid_popcount, uint32_t mid_start_bit, uintptr_t* __restrict mid_target, uintptr_t* __restrict compact_target) {\n  assert(mid_popcount);\n  const uint32_t leading_byte_ct = 1 + ((mid_popcount - 1) % kBitsPerWord) / CHAR_BIT;\n  uintptr_t compact_read_word = SubwordLoad(compact_bitarr, leading_byte_ct);\n  uint32_t compact_idx_lowbits = CHAR_BIT * (sizeof(intptr_t) - leading_byte_ct);\n  const unsigned char* compact_bitarrb = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  const uintptr_t* mid_bitarr_iter = mid_bitarr;\n  const uint32_t subset_size_lowbits = subset_size % kBitsPerWord;\n  const uint32_t write_widx_last = subset_size / kBitsPerWord;\n  uintptr_t mid_read_word = (*mid_bitarr_iter) >> mid_start_bit;\n  uintptr_t mid_output_word = 0;\n  uintptr_t compact_output_word = 0;\n  uint32_t mid_idx_lowbits = mid_start_bit;\n  uint32_t compact_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = 0;\n  uint32_t write_widx = 0;\n\n  // bugfix (5 Feb 2018): missed a case in sparse subset_mask optimization\n  uint32_t mid_set_skip = 0;\n  while ((write_widx != write_widx_last) || (write_idx_lowbits != subset_size_lowbits)) {\n    uintptr_t subset_word;\n    uintptr_t mid_expanded_bits;\n    uint32_t mid_set_ct;\n    while (1) {\n      ++read_widx;\n      uintptr_t top_word = top_expand_mask[read_widx];\n      subset_word = subset_mask[read_widx];\n      mid_expanded_bits = 0;\n      if (top_word) {\n        uint32_t top_set_ct = PopcountWord(top_word);\n        uint32_t next_mid_idx_lowbits = mid_idx_lowbits + top_set_ct;\n        if (next_mid_idx_lowbits <= kBitsPerWord) {\n          mid_expanded_bits = _pdep_u64(mid_read_word, top_word);\n          if (top_set_ct != kBitsPerWord) {\n            mid_read_word = mid_read_word >> top_set_ct;\n          } else {\n            // avoid nasal demons\n            mid_read_word = 0;\n          }\n        } else {\n          uintptr_t next_mid_read_word = *(++mid_bitarr_iter);\n          next_mid_idx_lowbits -= kBitsPerWord;\n          mid_read_word |= next_mid_read_word << (kBitsPerWord - mid_idx_lowbits);\n          mid_expanded_bits = _pdep_u64(mid_read_word, top_word);\n          if (next_mid_idx_lowbits != kBitsPerWord) {\n            mid_read_word = next_mid_read_word >> next_mid_idx_lowbits;\n          } else {\n            mid_read_word = 0;\n          }\n        }\n        mid_idx_lowbits = next_mid_idx_lowbits;\n      }\n      mid_set_ct = PopcountWord(mid_expanded_bits);\n      if (subset_word) {\n        break;\n      }\n      mid_set_skip += mid_set_ct;\n    }\n\n    uintptr_t mid_extracted_bits = 0;\n    uintptr_t compact_extracted_bits = 0;\n    uint32_t set_bit_ct = PopcountWord(subset_word);\n    if (mid_expanded_bits & subset_word) {\n      // lazy load\n      compact_idx_lowbits += mid_set_skip;\n      if (compact_idx_lowbits >= kBitsPerWord) {\n        compact_widx += compact_idx_lowbits / kBitsPerWord;\n        compact_idx_lowbits = compact_idx_lowbits % kBitsPerWord;\n        CopyFromUnalignedOffsetW(&compact_read_word, compact_bitarrb, compact_widx);\n        compact_read_word >>= compact_idx_lowbits;\n      } else {\n        compact_read_word = compact_read_word >> mid_set_skip;\n      }\n      uint32_t next_compact_idx_lowbits = compact_idx_lowbits + mid_set_ct;\n      uintptr_t compact_expanded_bits;\n      if (next_compact_idx_lowbits <= kBitsPerWord) {\n        compact_expanded_bits = _pdep_u64(compact_read_word, mid_expanded_bits);\n        if (mid_set_ct != kBitsPerWord) {\n          compact_read_word = compact_read_word >> mid_set_ct;\n        }\n      } else {\n        ++compact_widx;\n        uintptr_t next_compact_word;\n        CopyFromUnalignedOffsetW(&next_compact_word, compact_bitarrb, compact_widx);\n        next_compact_idx_lowbits -= kBitsPerWord;\n        compact_read_word |= next_compact_word << (kBitsPerWord - compact_idx_lowbits);\n        compact_expanded_bits = _pdep_u64(compact_read_word, mid_expanded_bits);\n        if (next_compact_idx_lowbits != kBitsPerWord) {\n          compact_read_word = next_compact_word >> next_compact_idx_lowbits;\n        }\n      }\n      compact_extracted_bits = _pext_u64(compact_expanded_bits, subset_word);\n      mid_extracted_bits = _pext_u64(mid_expanded_bits, subset_word);\n      compact_idx_lowbits = next_compact_idx_lowbits;\n      compact_output_word |= compact_extracted_bits << write_idx_lowbits;\n      mid_output_word |= mid_extracted_bits << write_idx_lowbits;\n      mid_set_skip = 0;\n    } else {\n      mid_set_skip += mid_set_ct;\n    }\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + set_bit_ct;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      mid_target[write_widx] = mid_output_word;\n      compact_target[write_widx] = compact_output_word;\n      ++write_widx;\n      if (write_idx_lowbits) {\n        mid_output_word = mid_extracted_bits >> (kBitsPerWord - write_idx_lowbits);\n        compact_output_word = compact_extracted_bits >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        mid_output_word = 0;\n        compact_output_word = 0;\n      }\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    mid_target[write_widx] = mid_output_word;\n    compact_target[write_widx] = compact_output_word;\n  }\n}\n#else  // !USE_AVX2\nvoid CopyBitarrSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, uintptr_t* __restrict output_bitarr) {\n  const uint32_t output_bit_idx_end_lowbits = output_bit_idx_end % kBitsPerWord;\n  uintptr_t* output_bitarr_iter = output_bitarr;\n  uintptr_t* output_bitarr_last = &(output_bitarr[output_bit_idx_end / kBitsPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = 0;\n  while ((output_bitarr_iter != output_bitarr_last) || (write_idx_lowbits != output_bit_idx_end_lowbits)) {\n    uintptr_t cur_mask_word;\n    // sparse subset_mask optimization\n    // guaranteed to terminate since there's at least one more set bit\n    do {\n      cur_mask_word = subset_mask[++read_widx];\n    } while (!cur_mask_word);\n    uintptr_t cur_masked_input_word = raw_bitarr[read_widx] & cur_mask_word;\n    const uint32_t cur_mask_popcount = PopcountWord(cur_mask_word);\n    uintptr_t subsetted_input_word = 0;\n    while (cur_masked_input_word) {\n      const uintptr_t mask_word_high = (cur_mask_word | (cur_masked_input_word ^ (cur_masked_input_word - 1))) + 1;\n      if (!mask_word_high) {\n        subsetted_input_word |= cur_masked_input_word >> (kBitsPerWord - cur_mask_popcount);\n        break;\n      }\n      const uint32_t cur_read_end = ctzw(mask_word_high);\n      const uintptr_t bits_to_copy = cur_masked_input_word & (~mask_word_high);\n      cur_masked_input_word ^= bits_to_copy;\n      const uint32_t cur_write_end = PopcountWord(cur_mask_word & (~mask_word_high));\n      subsetted_input_word |= bits_to_copy >> (cur_read_end - cur_write_end);\n    }\n    cur_output_word |= subsetted_input_word << write_idx_lowbits;\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + cur_mask_popcount;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      *output_bitarr_iter++ = cur_output_word;\n      // ...and these are the bits that fell off\n      // bugfix: unsafe to right-shift 64\n      if (write_idx_lowbits) {\n        cur_output_word = subsetted_input_word >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        cur_output_word = 0;\n      }\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    *output_bitarr_iter = cur_output_word;\n  }\n}\n\n#  ifdef NO_UNALIGNED\nvoid CopyBitarrSubsetToUnaligned(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, void* __restrict output_bitarr) {\n  const uint32_t output_bit_idx_end_lowbits = output_bit_idx_end % kBitsPerWord;\n  unsigned char* output_bitarr_iter = S_CAST(unsigned char*, output_bitarr);\n  unsigned char* output_bitarr_last = &(output_bitarr_iter[(output_bit_idx_end / kBitsPerWord) * kBytesPerWord]);\n  uintptr_t cur_output_word = 0;\n  uint32_t read_widx = UINT32_MAX;  // deliberate overflow\n  uint32_t write_idx_lowbits = 0;\n  while ((output_bitarr_iter != output_bitarr_last) || (write_idx_lowbits != output_bit_idx_end_lowbits)) {\n    uintptr_t cur_mask_word;\n    // sparse subset_mask optimization\n    // guaranteed to terminate since there's at least one more set bit\n    do {\n      cur_mask_word = subset_mask[++read_widx];\n    } while (!cur_mask_word);\n    uintptr_t cur_masked_input_word = raw_bitarr[read_widx] & cur_mask_word;\n    const uint32_t cur_mask_popcount = PopcountWord(cur_mask_word);\n    uintptr_t subsetted_input_word = 0;\n    while (cur_masked_input_word) {\n      const uintptr_t mask_word_high = (cur_mask_word | (cur_masked_input_word ^ (cur_masked_input_word - 1))) + 1;\n      if (!mask_word_high) {\n        subsetted_input_word |= cur_masked_input_word >> (kBitsPerWord - cur_mask_popcount);\n        break;\n      }\n      const uint32_t cur_read_end = ctzw(mask_word_high);\n      const uintptr_t bits_to_copy = cur_masked_input_word & (~mask_word_high);\n      cur_masked_input_word ^= bits_to_copy;\n      const uint32_t cur_write_end = PopcountWord(cur_mask_word & (~mask_word_high));\n      subsetted_input_word |= bits_to_copy >> (cur_read_end - cur_write_end);\n    }\n    cur_output_word |= subsetted_input_word << write_idx_lowbits;\n    const uint32_t new_write_idx_lowbits = write_idx_lowbits + cur_mask_popcount;\n    if (new_write_idx_lowbits >= kBitsPerWord) {\n      AppendW(cur_output_word, &output_bitarr_iter);\n      // ...and these are the bits that fell off\n      // bugfix: unsafe to right-shift 64\n      if (write_idx_lowbits) {\n        cur_output_word = subsetted_input_word >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        cur_output_word = 0;\n      }\n    }\n    write_idx_lowbits = new_write_idx_lowbits % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    CopyToUnalignedW(output_bitarr_iter, &cur_output_word);\n  }\n}\n#  endif\n\n// Basic SSE2 implementation of Lauradoux/Walisch popcount.\nuintptr_t PopcountVecsNoAvx2(const VecW* bit_vvec, uintptr_t vec_ct) {\n  // popcounts vptr[0..(vec_ct-1)].  Assumes vec_ct is a multiple of 3 (0 ok).\n  assert(!(vec_ct % 3));\n  const VecW m0 = vecw_setzero();\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW* bit_vvec_iter = bit_vvec;\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 30;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 30) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const VecW* bit_vvec_stop = &(bit_vvec_iter[cur_incr]);\n    do {\n      VecW count1 = *bit_vvec_iter++;\n      VecW count2 = *bit_vvec_iter++;\n      VecW half1 = *bit_vvec_iter++;\n      VecW half2 = vecw_srli(half1, 1) & m1;\n      half1 = half1 & m1;\n      // Two bits can represent values from 0-3, so make each pair in count1\n      // count2 store a partial bitcount covering themselves AND another bit\n      // from elsewhere.\n      count1 = count1 - (vecw_srli(count1, 1) & m1);\n      count2 = count2 - (vecw_srli(count2, 1) & m1);\n      count1 = count1 + half1;\n      count2 = count2 + half2;\n      // Four bits represent 0-15, so we can safely add four 0-3 partial\n      // bitcounts together.\n      count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);\n      count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);\n      // Accumulator stores sixteen 0-255 counts in parallel.\n      // (32 in AVX2 case, 4 in 32-bit case)\n      inner_acc = inner_acc + (count1 & m4) + (vecw_srli(count1, 4) & m4);\n    } while (bit_vvec_iter < bit_vvec_stop);\n    // _mm_sad_epu8() has better throughput than the previous method of\n    // horizontal-summing the bytes in inner_acc, by enough to compensate for\n    // the loop length being reduced from 30 to 15 vectors, but it has high\n    // latency.  We work around that by waiting till the end of the next full\n    // loop iteration to actually use the SAD result.\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\nstatic inline uintptr_t PopcountVecsNoAvx2Intersect(const VecW* __restrict vvec1_iter, const VecW* __restrict vvec2_iter, uintptr_t vec_ct) {\n  // popcounts vvec1 AND vvec2[0..(ct-1)].  ct is a multiple of 3.\n  assert(!(vec_ct % 3));\n  const VecW m0 = vecw_setzero();\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 30;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 30) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const VecW* vvec1_stop = &(vvec1_iter[cur_incr]);\n    do {\n      VecW count1 = (*vvec1_iter++) & (*vvec2_iter++);\n      VecW count2 = (*vvec1_iter++) & (*vvec2_iter++);\n      VecW half1 = (*vvec1_iter++) & (*vvec2_iter++);\n      const VecW half2 = vecw_srli(half1, 1) & m1;\n      half1 = half1 & m1;\n      count1 = count1 - (vecw_srli(count1, 1) & m1);\n      count2 = count2 - (vecw_srli(count2, 1) & m1);\n      count1 = count1 + half1;\n      count2 = count2 + half2;\n      count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);\n      count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);\n      inner_acc = inner_acc + (count1 & m4) + (vecw_srli(count1, 4) & m4);\n    } while (vvec1_iter < vvec1_stop);\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\nuintptr_t PopcountWordsIntersect(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, uintptr_t word_ct) {\n  uintptr_t tot = 0;\n  const uintptr_t* bitvec1_end = &(bitvec1_iter[word_ct]);\n  const uintptr_t trivec_ct = word_ct / (3 * kWordsPerVec);\n  tot += PopcountVecsNoAvx2Intersect(R_CAST(const VecW*, bitvec1_iter), R_CAST(const VecW*, bitvec2_iter), trivec_ct * 3);\n  bitvec1_iter = &(bitvec1_iter[trivec_ct * (3 * kWordsPerVec)]);\n  bitvec2_iter = &(bitvec2_iter[trivec_ct * (3 * kWordsPerVec)]);\n  while (bitvec1_iter < bitvec1_end) {\n    tot += PopcountWord((*bitvec1_iter++) & (*bitvec2_iter++));\n  }\n  return tot;\n}\n\nstatic inline uintptr_t PopcountVecsNoAvx2Xor(const VecW* __restrict vvec1_iter, const VecW* __restrict vvec2_iter, uintptr_t vec_ct) {\n  // popcounts vvec1 XOR vvec2[0..(ct-1)].  ct is a multiple of 3.\n  assert(!(vec_ct % 3));\n  const VecW m0 = vecw_setzero();\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 30;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 30) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const VecW* vvec1_stop = &(vvec1_iter[cur_incr]);\n    do {\n      VecW count1 = (*vvec1_iter++) ^ (*vvec2_iter++);\n      VecW count2 = (*vvec1_iter++) ^ (*vvec2_iter++);\n      VecW half1 = (*vvec1_iter++) ^ (*vvec2_iter++);\n      const VecW half2 = vecw_srli(half1, 1) & m1;\n      half1 = half1 & m1;\n      count1 = count1 - (vecw_srli(count1, 1) & m1);\n      count2 = count2 - (vecw_srli(count2, 1) & m1);\n      count1 = count1 + half1;\n      count2 = count2 + half2;\n      count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);\n      count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);\n      inner_acc = inner_acc + (count1 & m4) + (vecw_srli(count1, 4) & m4);\n    } while (vvec1_iter < vvec1_stop);\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\nuintptr_t PopcountWordsXor(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, uintptr_t word_ct) {\n  uintptr_t tot = 0;\n  const uintptr_t* bitvec1_end = &(bitvec1_iter[word_ct]);\n  const uintptr_t trivec_ct = word_ct / (3 * kWordsPerVec);\n  tot += PopcountVecsNoAvx2Xor(R_CAST(const VecW*, bitvec1_iter), R_CAST(const VecW*, bitvec2_iter), trivec_ct * 3);\n  bitvec1_iter = &(bitvec1_iter[trivec_ct * (3 * kWordsPerVec)]);\n  bitvec2_iter = &(bitvec2_iter[trivec_ct * (3 * kWordsPerVec)]);\n  while (bitvec1_iter < bitvec1_end) {\n    tot += PopcountWord((*bitvec1_iter++) ^ (*bitvec2_iter++));\n  }\n  return tot;\n}\n\n/*\nstatic inline uintptr_t PopcountVecsNoAvx2Intersect3(const VecW* __restrict vvec1_iter, const VecW* __restrict vvec2_iter, const VecW* __restrict vvec3_iter, uintptr_t vec_ct) {\n  assert(!(vec_ct % 3));\n  const VecW m0 = vecw_setzero();\n  const VecW m1 = VCONST_W(kMask5555);\n  const VecW m2 = VCONST_W(kMask3333);\n  const VecW m4 = VCONST_W(kMask0F0F);\n  VecW prev_sad_result = vecw_setzero();\n  VecW acc = vecw_setzero();\n  uintptr_t cur_incr = 30;\n  for (; ; vec_ct -= cur_incr) {\n    if (vec_ct < 30) {\n      if (!vec_ct) {\n        acc = acc + prev_sad_result;\n        return HsumW(acc);\n      }\n      cur_incr = vec_ct;\n    }\n    VecW inner_acc = vecw_setzero();\n    const VecW* vvec1_stop = &(vvec1_iter[cur_incr]);\n    do {\n      VecW count1 = (*vvec1_iter++) & (*vvec2_iter++) & (*vvec3_iter++);\n      VecW count2 = (*vvec1_iter++) & (*vvec2_iter++) & (*vvec3_iter++);\n      VecW half1 = (*vvec1_iter++) & (*vvec2_iter++) & (*vvec3_iter++);\n      const VecW half2 = vecw_srli(half1, 1) & m1;\n      half1 = half1 & m1;\n      count1 = count1 - (vecw_srli(count1, 1) & m1);\n      count2 = count2 - (vecw_srli(count2, 1) & m1);\n      count1 = count1 + half1;\n      count2 = count2 + half2;\n      count1 = (count1 & m2) + (vecw_srli(count1, 2) & m2);\n      count1 = count1 + (count2 & m2) + (vecw_srli(count2, 2) & m2);\n      inner_acc = inner_acc + (count1 & m4) + (vecw_srli(count1, 4) & m4);\n    } while (vvec1_iter < vvec1_stop);\n    acc = acc + prev_sad_result;\n    prev_sad_result = vecw_bytesum(inner_acc, m0);\n  }\n}\n\nuintptr_t PopcountWordsIntersect3(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, const uintptr_t* __restrict bitvec3_iter, uintptr_t word_ct) {\n  uintptr_t tot = 0;\n  const uintptr_t* bitvec1_end = &(bitvec1_iter[word_ct]);\n  const uintptr_t trivec_ct = word_ct / (3 * kWordsPerVec);\n  tot += PopcountVecsNoAvx2Intersect3(R_CAST(const VecW*, bitvec1_iter), R_CAST(const VecW*, bitvec2_iter), R_CAST(const VecW*, bitvec3_iter), trivec_ct * 3);\n  bitvec1_iter = &(bitvec1_iter[trivec_ct * (3 * kWordsPerVec)]);\n  bitvec2_iter = &(bitvec2_iter[trivec_ct * (3 * kWordsPerVec)]);\n  bitvec3_iter = &(bitvec3_iter[trivec_ct * (3 * kWordsPerVec)]);\n  while (bitvec1_iter < bitvec1_end) {\n    tot += PopcountWord((*bitvec1_iter++) & (*bitvec2_iter++) & (*bitvec3_iter++));\n  }\n  return tot;\n}\n*/\n\nvoid ExpandBytearr(const void* __restrict compact_bitarr, const uintptr_t* __restrict expand_mask, uint32_t word_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target) {\n  ZeroWArr(word_ct, target);\n  const unsigned char* compact_bitarrb = S_CAST(const unsigned char*, compact_bitarr);\n  const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;\n  const uint32_t compact_widx_last = expand_sizex_m1 / kBitsPerWord;\n  uint32_t compact_idx_lowbits = read_start_bit;\n  uint32_t loop_len = kBitsPerWord;\n  uintptr_t write_widx = 0;\n  uintptr_t expand_mask_bits = expand_mask[0];\n  for (uint32_t compact_widx = 0; ; ++compact_widx) {\n    uintptr_t compact_word;\n    if (compact_widx >= compact_widx_last) {\n      if (compact_widx > compact_widx_last) {\n        return;\n      }\n      loop_len = 1 + (expand_sizex_m1 % kBitsPerWord);\n      // avoid possible segfault\n      compact_word = SubwordLoad(&(compact_bitarrb[compact_widx * kBytesPerWord]), DivUp(loop_len, CHAR_BIT));\n    } else {\n      CopyFromUnalignedOffsetW(&compact_word, compact_bitarrb, compact_widx);\n    }\n    for (; compact_idx_lowbits != loop_len; ++compact_idx_lowbits) {\n      const uintptr_t lowbit = BitIter1y(expand_mask, &write_widx, &expand_mask_bits);\n      // bugfix: can't just use (compact_word & 1) and compact_word >>= 1,\n      // since we may skip the first bit on the first loop iteration\n      if ((compact_word >> compact_idx_lowbits) & 1) {\n        target[write_widx] |= lowbit;\n      }\n    }\n    compact_idx_lowbits = 0;\n  }\n}\n\nvoid ExpandThenSubsetBytearr(const void* __restrict compact_bitarr, const uintptr_t* __restrict expand_mask, const uintptr_t* __restrict subset_mask, uint32_t expand_size, uint32_t subset_size, uint32_t read_start_bit, uintptr_t* __restrict target) {\n  const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;\n  const uint32_t leading_byte_ct = 1 + (expand_sizex_m1 % kBitsPerWord) / CHAR_BIT;\n  uint32_t read_idx_lowbits = CHAR_BIT * (sizeof(intptr_t) - leading_byte_ct);\n  uintptr_t compact_read_word = SubwordLoad(compact_bitarr, leading_byte_ct) << read_idx_lowbits;\n  read_idx_lowbits += read_start_bit;\n  const unsigned char* compact_bitarr_biter = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  const uint32_t subset_size_lowbits = subset_size % kBitsPerWord;\n  uintptr_t* target_iter = target;\n  uintptr_t* target_last = &(target[subset_size / kBitsPerWord]);\n  uintptr_t compact_write_word = 0;\n  uint32_t read_widx = 0;\n  // further improvement is probably possible (e.g. use AVX2 lazy-load), but\n  // I'll postpone for now\n  uint32_t write_idx_lowbits = 0;\n  while ((target_iter != target_last) || (write_idx_lowbits != subset_size_lowbits)) {\n    const uintptr_t subset_word = subset_mask[read_widx];\n    const uintptr_t expand_word = expand_mask[read_widx];\n    ++read_widx;\n    uintptr_t tmp_compact_write_word = 0;\n    if (expand_word) {\n      const uint32_t expand_bit_ct = PopcountWord(expand_word);\n      uint32_t read_idx_lowbits_end = read_idx_lowbits + expand_bit_ct;\n      uintptr_t tmp_compact_read_word = 0;\n      if (read_idx_lowbits != kBitsPerWord) {\n        tmp_compact_read_word = compact_read_word >> read_idx_lowbits;\n      }\n      if (read_idx_lowbits_end > kBitsPerWord) {\n        CopyFromUnalignedIncrW(&compact_read_word, &compact_bitarr_biter);\n        tmp_compact_read_word |= compact_read_word << (kBitsPerWord - read_idx_lowbits);\n        read_idx_lowbits_end -= kBitsPerWord;\n      }\n      tmp_compact_read_word = bzhi_max(tmp_compact_read_word, expand_bit_ct);\n      read_idx_lowbits = read_idx_lowbits_end;\n      if (tmp_compact_read_word) {\n        uintptr_t cur_intersect = subset_word & expand_word;\n        while (cur_intersect) {\n          const uintptr_t cur_intersect_and_arg = cur_intersect - k1LU;\n          const uintptr_t lowmask = (cur_intersect ^ cur_intersect_and_arg) >> 1;\n          const uint32_t read_idx_offset = PopcountWord(expand_word & lowmask);\n          uintptr_t shifted_compact_read_word = tmp_compact_read_word >> read_idx_offset;\n          if (shifted_compact_read_word & 1) {\n            tmp_compact_write_word |= (k1LU << PopcountWord(subset_word & lowmask));\n            if (shifted_compact_read_word == 1) {\n              break;\n            }\n          }\n          cur_intersect &= cur_intersect_and_arg;\n        }\n      }\n      compact_write_word |= tmp_compact_write_word << write_idx_lowbits;\n    }\n    const uint32_t write_idx_lowbits_end = write_idx_lowbits + PopcountWord(subset_word);\n    if (write_idx_lowbits_end >= kBitsPerWord) {\n      *target_iter++ = compact_write_word;\n      if (write_idx_lowbits) {\n        compact_write_word = tmp_compact_write_word >> (kBitsPerWord - write_idx_lowbits);\n      } else {\n        compact_write_word = 0;\n      }\n    }\n    write_idx_lowbits = write_idx_lowbits_end % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    *target_iter = compact_write_word;\n  }\n}\n\n// compact_bitarr := phaseinfo\n// mid_bitarr := phasepresent, [1 + het_ct]\n// top_expand_mask := all_hets, [raw_sample_ct]\nvoid ExpandBytearrNested(const void* __restrict compact_bitarr, const uintptr_t* __restrict mid_bitarr, const uintptr_t* __restrict top_expand_mask, uint32_t word_ct, uint32_t mid_popcount, uint32_t mid_start_bit, uintptr_t* __restrict mid_target, uintptr_t* __restrict compact_target) {\n  ZeroWArr(word_ct, mid_target);\n  ZeroWArr(word_ct, compact_target);\n  const unsigned char* compact_bitarrb = S_CAST(const unsigned char*, compact_bitarr);\n  const uint32_t mid_popcount_m1 = mid_popcount - 1;\n  const uint32_t compact_widx_last = mid_popcount_m1 / kBitsPerWord;\n  uint32_t mid_idx = mid_start_bit;\n  // can allow compact_idx_lowbits to be initialized to nonzero\n  uint32_t loop_len = kBitsPerWord;\n  uintptr_t write_widx = 0;\n  uintptr_t top_expand_mask_bits = top_expand_mask[0];\n  for (uint32_t compact_widx = 0; ; ++compact_widx) {\n    uintptr_t compact_word;\n    if (compact_widx >= compact_widx_last) {\n      if (compact_widx > compact_widx_last) {\n        return;\n      }\n      loop_len = 1 + (mid_popcount_m1 % kBitsPerWord);\n      // avoid possible segfault\n      compact_word = SubwordLoad(&(compact_bitarrb[compact_widx * kBytesPerWord]), DivUp(loop_len, CHAR_BIT));\n    } else {\n      CopyFromUnalignedOffsetW(&compact_word, compact_bitarrb, compact_widx);\n    }\n    for (uint32_t compact_idx_lowbits = 0; compact_idx_lowbits != loop_len; ++mid_idx) {\n      const uintptr_t lowbit = BitIter1y(top_expand_mask, &write_widx, &top_expand_mask_bits);\n      if (IsSet(mid_bitarr, mid_idx)) {\n        mid_target[write_widx] |= lowbit;\n        compact_target[write_widx] |= lowbit * (compact_word & 1);\n        compact_word >>= 1;\n        ++compact_idx_lowbits;\n      }\n    }\n  }\n}\n\nvoid ExpandThenSubsetBytearrNested(const void* __restrict compact_bitarr, const uintptr_t* __restrict mid_bitarr, const uintptr_t* __restrict top_expand_mask, const uintptr_t* __restrict subset_mask, uint32_t subset_size, uint32_t mid_popcount, uint32_t mid_start_bit, uintptr_t* __restrict mid_target, uintptr_t* __restrict compact_target) {\n  assert(mid_popcount);\n  const uint32_t leading_byte_ct = 1 + ((mid_popcount - 1) % kBitsPerWord) / CHAR_BIT;\n  uint32_t compact_idx_lowbits = CHAR_BIT * (sizeof(intptr_t) - leading_byte_ct);\n  uintptr_t compact_read_word = SubwordLoad(compact_bitarr, leading_byte_ct) << compact_idx_lowbits;\n  const unsigned char* compact_bitarr_biter = &(S_CAST(const unsigned char*, compact_bitarr)[leading_byte_ct]);\n  // bugfix (12 Apr 2018): need to round down here\n  const uint32_t subset_size_dl = subset_size / kBitsPerWord;\n  const uint32_t subset_size_lowbits = subset_size % kBitsPerWord;\n  const uintptr_t* mid_read_iter = mid_bitarr;\n  uintptr_t mid_read_word = *mid_read_iter++;\n  uintptr_t mid_write_word = 0;\n  uintptr_t compact_write_word = 0;\n  uint32_t mid_idx_lowbits = mid_start_bit;\n  uint32_t write_idx_lowbits = 0;\n  uint32_t write_widx = 0;\n  uint32_t read_widx = 0;\n  while ((write_widx != subset_size_dl) || (write_idx_lowbits != subset_size_lowbits)) {\n    const uintptr_t subset_word = subset_mask[read_widx];\n    const uintptr_t top_word = top_expand_mask[read_widx];\n    ++read_widx;\n    uintptr_t tmp_mid_write_word = 0;\n    uintptr_t tmp_compact_write_word = 0;\n    if (top_word) {\n      const uint32_t top_set_ct = PopcountWord(top_word);\n      uint32_t mid_idx_lowbits_end = mid_idx_lowbits + top_set_ct;\n      uintptr_t tmp_mid_read_word = 0;\n      if (mid_idx_lowbits != kBitsPerWord) {\n        tmp_mid_read_word = mid_read_word >> mid_idx_lowbits;\n      }\n      if (mid_idx_lowbits_end > kBitsPerWord) {\n        // be paranoid for now re: reading an extra word off the end of\n        // mid_bitarr\n        mid_read_word = *mid_read_iter++;\n        tmp_mid_read_word |= mid_read_word << (kBitsPerWord - mid_idx_lowbits);\n        mid_idx_lowbits_end -= kBitsPerWord;\n      }\n      tmp_mid_read_word = bzhi_max(tmp_mid_read_word, top_set_ct);\n      mid_idx_lowbits = mid_idx_lowbits_end;\n      if (tmp_mid_read_word) {\n        const uint32_t mid_set_ct = PopcountWord(tmp_mid_read_word);\n        uintptr_t tmp_compact_read_word;\n        if (compact_idx_lowbits != kBitsPerWord) {\n          const uint32_t compact_idx_lowbits_end = compact_idx_lowbits + mid_set_ct;\n          tmp_compact_read_word = compact_read_word >> compact_idx_lowbits;\n          // avoid reading off end of compact_bitarr here\n          if (compact_idx_lowbits_end <= kBitsPerWord) {\n            compact_idx_lowbits = compact_idx_lowbits_end;\n          } else {\n            CopyFromUnalignedIncrW(&compact_read_word, &compact_bitarr_biter);\n            tmp_compact_read_word |= compact_read_word << (kBitsPerWord - compact_idx_lowbits);\n            compact_idx_lowbits = compact_idx_lowbits_end - kBitsPerWord;\n          }\n        } else {\n          // special case, can't right-shift 64\n          CopyFromUnalignedIncrW(&compact_read_word, &compact_bitarr_biter);\n          compact_idx_lowbits = mid_set_ct;\n          tmp_compact_read_word = compact_read_word;\n        }\n        tmp_compact_read_word = bzhi_max(tmp_compact_read_word, mid_set_ct);\n\n        uintptr_t cur_masked_top = subset_word & top_word;\n        while (cur_masked_top) {\n          const uintptr_t cur_masked_top_and_arg = cur_masked_top - k1LU;\n          const uintptr_t lowmask = (cur_masked_top ^ cur_masked_top_and_arg) >> 1;\n          const uint32_t read_idx_offset = PopcountWord(top_word & lowmask);\n          uintptr_t shifted_mid_read_word = tmp_mid_read_word >> read_idx_offset;\n          if (shifted_mid_read_word & 1) {\n            // bugfix (7 Sep 2017): forgot the \"k1LU << \" part of this\n            const uintptr_t cur_bit = k1LU << PopcountWord(subset_word & lowmask);\n            tmp_mid_write_word |= cur_bit;\n            tmp_compact_write_word += cur_bit * ((tmp_compact_read_word >> (mid_set_ct - PopcountWord(shifted_mid_read_word))) & 1);\n            if (shifted_mid_read_word == 1) {\n              break;\n            }\n          }\n          cur_masked_top &= cur_masked_top_and_arg;\n        }\n      }\n      mid_write_word |= tmp_mid_write_word << write_idx_lowbits;\n      compact_write_word |= tmp_compact_write_word << write_idx_lowbits;\n    }\n    const uint32_t write_idx_lowbits_end = write_idx_lowbits + PopcountWord(subset_word);\n    if (write_idx_lowbits_end >= kBitsPerWord) {\n      mid_target[write_widx] = mid_write_word;\n      compact_target[write_widx] = compact_write_word;\n      ++write_widx;\n      if (write_idx_lowbits) {\n        const uint32_t rshift = kBitsPerWord - write_idx_lowbits;\n        mid_write_word = tmp_mid_write_word >> rshift;\n        compact_write_word = tmp_compact_write_word >> rshift;\n      } else {\n        mid_write_word = 0;\n        compact_write_word = 0;\n      }\n    }\n    write_idx_lowbits = write_idx_lowbits_end % kBitsPerWord;\n  }\n  if (write_idx_lowbits) {\n    mid_target[write_widx] = mid_write_word;\n    compact_target[write_widx] = compact_write_word;\n  }\n}\n#endif\nuintptr_t PopcountBytes(const void* bitarr, uintptr_t byte_ct) {\n  const unsigned char* bitarr_uc = S_CAST(const unsigned char*, bitarr);\n  const uintptr_t* bitvec;\n  const uint32_t lead_byte_ct = AlignKToAW(bitarr_uc, &bitvec);\n  uintptr_t tot = 0;\n  uint32_t trail_byte_ct;\n  // bugfix: had wrong condition here\n  if (byte_ct >= lead_byte_ct) {\n#ifdef USE_SSE2\n    const uint32_t word_rem = lead_byte_ct % kBytesPerWord;\n    if (word_rem) {\n      tot = PopcountWord(ProperSubwordLoad(bitarr_uc, word_rem));\n    }\n    if (lead_byte_ct >= kBytesPerWord) {\n      tot += PopcountWord(bitvec[-1]);\n#  ifdef USE_AVX2\n      if (lead_byte_ct >= 2 * kBytesPerWord) {\n        tot += PopcountWord(bitvec[-2]);\n        if (lead_byte_ct >= 3 * kBytesPerWord) {\n          tot += PopcountWord(bitvec[-3]);\n        }\n      }\n#  endif\n    }\n#else\n    if (lead_byte_ct) {\n      tot = PopcountWord(ProperSubwordLoad(bitarr_uc, lead_byte_ct));\n    }\n#endif\n    byte_ct -= lead_byte_ct;\n    const uintptr_t word_ct = byte_ct / kBytesPerWord;\n    // vec-alignment required here\n    tot += PopcountWords(bitvec, word_ct);\n    bitarr_uc = DowncastKToUc(&(bitvec[word_ct]));\n    trail_byte_ct = byte_ct % kBytesPerWord;\n  } else {\n    // this may still be >= kBytesPerWord in USE_SSE2 case, so can't remove\n    // loop\n    trail_byte_ct = byte_ct;\n  }\n  for (uint32_t bytes_remaining = trail_byte_ct; ; ) {\n    uintptr_t cur_word;\n    if (bytes_remaining < kBytesPerWord) {\n      if (!bytes_remaining) {\n        return tot;\n      }\n      cur_word = ProperSubwordLoad(bitarr_uc, bytes_remaining);\n      bytes_remaining = 0;\n    } else {\n      CopyFromUnalignedIncrW(&cur_word, &bitarr_uc);\n      bytes_remaining -= kBytesPerWord;\n    }\n    tot += PopcountWord(cur_word);\n  }\n}\n\nuintptr_t PopcountBytesMasked(const void* bitarr, const uintptr_t* mask_arr, uintptr_t byte_ct) {\n  // todo: try modifying PopcountWordsIntersect() to use unaligned load\n  // instructions; then, if there is no performance penalty, try modifying this\n  // main loop to call it.\n  const uintptr_t word_ct = byte_ct / kBytesPerWord;\n#ifdef USE_SSE42\n  const unsigned char* bitarr_uc = S_CAST(const unsigned char*, bitarr);\n  uintptr_t tot = 0;\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    uintptr_t cur_word;\n    CopyFromUnalignedOffsetW(&cur_word, bitarr_uc, widx);\n    tot += PopcountWord(cur_word & mask_arr[widx]);\n  }\n  const uint32_t trail_byte_ct = byte_ct % kBytesPerWord;\n  if (trail_byte_ct) {\n    const uintptr_t cur_word = ProperSubwordLoad(&(bitarr_uc[word_ct * kBytesPerWord]), trail_byte_ct);\n    tot += PopcountWord(cur_word & mask_arr[word_ct]);\n  }\n  return tot;\n#else\n  const unsigned char* bitarr_biter = S_CAST(const unsigned char*, bitarr);\n  const uintptr_t mainblock_word_ct = word_ct - (word_ct % (24 / kBytesPerWord));\n  const unsigned char* bitarr_24b_end = &(bitarr_biter[mainblock_word_ct * kBytesPerWord]);\n  const uintptr_t* mask_arr_iter = mask_arr;\n  uintptr_t tot = 0;\n  while (bitarr_biter < bitarr_24b_end) {\n    uintptr_t loader;\n    CopyFromUnalignedIncrW(&loader, &bitarr_biter);\n    loader &= (*mask_arr_iter++);\n    uintptr_t ulii = loader - ((loader >> 1) & kMask5555);\n    CopyFromUnalignedIncrW(&loader, &bitarr_biter);\n    loader &= (*mask_arr_iter++);\n    uintptr_t uljj = loader - ((loader >> 1) & kMask5555);\n    CopyFromUnalignedIncrW(&loader, &bitarr_biter);\n    loader &= (*mask_arr_iter++);\n    ulii += (loader >> 1) & kMask5555;\n    uljj += loader & kMask5555;\n    ulii = (ulii & kMask3333) + ((ulii >> 2) & kMask3333);\n    ulii += (uljj & kMask3333) + ((uljj >> 2) & kMask3333);\n    uintptr_t tmp_stor = (ulii & kMask0F0F) + ((ulii >> 4) & kMask0F0F);\n\n#  ifndef __LP64__\n    CopyFromUnalignedIncrW(&loader, &bitarr_biter);\n    loader &= (*mask_arr_iter++);\n    ulii = loader - ((loader >> 1) & kMask5555);\n    CopyFromUnalignedIncrW(&loader, &bitarr_biter);\n    loader &= (*mask_arr_iter++);\n    uljj = loader - ((loader >> 1) & kMask5555);\n    CopyFromUnalignedIncrW(&loader, &bitarr_biter);\n    loader &= (*mask_arr_iter++);\n    ulii += (loader >> 1) & kMask5555;\n    uljj += loader & kMask5555;\n    ulii = (ulii & kMask3333) + ((ulii >> 2) & kMask3333);\n    ulii += (uljj & kMask3333) + ((uljj >> 2) & kMask3333);\n    tmp_stor += (ulii & kMask0F0F) + ((ulii >> 4) & kMask0F0F);\n#  endif\n\n    // 32-bit case: each 8-bit slot stores a number in 0..48.  Multiplying by\n    // 0x01010101 is equivalent to the left-shifts and adds we need to sum\n    // those four 8-bit numbers in the high-order slot.\n    // 64-bit case: each 8-bit slot stores a number in 0..24.\n    tot += (tmp_stor * kMask0101) >> (kBitsPerWord - 8);\n  }\n  for (uint32_t trail_byte_ct = byte_ct - (mainblock_word_ct * kBytesPerWord); ; ) {\n    uintptr_t cur_word;\n    if (trail_byte_ct < kBytesPerWord) {\n      if (!trail_byte_ct) {\n        return tot;\n      }\n      cur_word = ProperSubwordLoad(bitarr_biter, trail_byte_ct);\n      trail_byte_ct = 0;\n    } else {\n      CopyFromUnalignedIncrW(&cur_word, &bitarr_biter);\n      trail_byte_ct -= kBytesPerWord;\n    }\n    tot += PopcountWord(cur_word & (*mask_arr_iter++));\n  }\n#endif\n}\n\nvoid FillCumulativePopcounts(const uintptr_t* subset_mask, uint32_t word_ct, uint32_t* cumulative_popcounts) {\n  assert(word_ct);\n  const uint32_t word_ct_m1 = word_ct - 1;\n  uint32_t cur_sum = 0;\n  for (uint32_t widx = 0; widx != word_ct_m1; ++widx) {\n    cumulative_popcounts[widx] = cur_sum;\n    cur_sum += PopcountWord(subset_mask[widx]);\n  }\n  cumulative_popcounts[word_ct_m1] = cur_sum;\n}\n\nvoid FillCumulativePopcountsW(const uintptr_t* subset_mask, uintptr_t word_ct, uintptr_t* cumulative_popcounts_w) {\n  assert(word_ct);\n  const uintptr_t word_ct_m1 = word_ct - 1;\n  uintptr_t cur_sum = 0;\n  for (uintptr_t widx = 0; widx != word_ct_m1; ++widx) {\n    cumulative_popcounts_w[widx] = cur_sum;\n    cur_sum += PopcountWord(subset_mask[widx]);\n  }\n  cumulative_popcounts_w[word_ct_m1] = cur_sum;\n}\n\nvoid UidxsToIdxs(const uintptr_t* subset_mask, const uint32_t* subset_cumulative_popcounts, const uintptr_t idx_list_len, uint32_t* idx_list) {\n  uint32_t* idx_list_end = &(idx_list[idx_list_len]);\n  for (uint32_t* idx_list_iter = idx_list; idx_list_iter != idx_list_end; ++idx_list_iter) {\n    *idx_list_iter = RawToSubsettedPos(subset_mask, subset_cumulative_popcounts, *idx_list_iter);\n  }\n}\n\nvoid Expand1bitTo8(const void* __restrict bytearr, uint32_t input_bit_ct, uint32_t incr, void* __restrict dst) {\n  const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n  const uint32_t input_bit_ct_plus = input_bit_ct + kBytesPerWord - 1;\n#if defined(USE_SHUFFLE8) && !defined(NO_UNALIGNED)\n  const uint32_t input_byte_ct = input_bit_ct_plus / 8;\n  const uint32_t fullvec_ct = input_byte_ct / (kBytesPerVec / 8);\n  uint32_t byte_idx = 0;\n  if (fullvec_ct) {\n    const Vec8thUint* bytearr_alias = R_CAST(const Vec8thUint*, bytearr_uc);\n#  ifdef USE_AVX2\n    const VecUc byte_gather = VecToUc(_mm256_setr_epi64x(0, kMask0101, 2 * kMask0101, 3 * kMask0101));\n    const VecUc bit_mask = VecToUc(_mm256_set1_epi64x(0x7fbfdfeff7fbfdfeLL));\n#  else\n    const VecUc byte_gather = VecToUc(_mm_setr_epi32(0, 0, 0x01010101, 0x01010101));\n    const VecUc bit_mask = VecToUc(_mm_set1_epi64x(0x7fbfdfeff7fbfdfeLL));\n#  endif\n    const VecUc all1 = vecuc_set1(255);\n    const VecUc subfrom = vecuc_set1(incr);\n    VecUc* dst_alias = S_CAST(VecUc*, dst);\n    for (uint32_t vec_idx = 0; vec_idx != fullvec_ct; ++vec_idx) {\n#  ifdef USE_AVX2\n      VecUc vmask = VecToUc(_mm256_set1_epi32(bytearr_alias[vec_idx]));\n#  else\n      VecUc vmask = VecToUc(_mm_set1_epi16(bytearr_alias[vec_idx]));\n#  endif\n      vmask = vecuc_shuffle8(vmask, byte_gather);\n      vmask = vmask | bit_mask;\n      vmask = (vmask == all1);\n      const VecUc result = subfrom - vmask;\n      vecuc_storeu(&(dst_alias[vec_idx]), result);\n    }\n    byte_idx = fullvec_ct * (kBytesPerVec / 8);\n  }\n  const uintptr_t incr_word = incr * kMask0101;\n  uintptr_t* dst_w = S_CAST(uintptr_t*, dst);\n  for (; byte_idx != input_byte_ct; ++byte_idx) {\n    const uintptr_t input_byte = bytearr_uc[byte_idx];\n#  ifdef USE_AVX2\n    const uintptr_t input_byte_scatter = _pdep_u64(input_byte, kMask0101);\n#  else\n    const uintptr_t input_byte_scatter = (((input_byte & 0xfe) * 0x2040810204080LLU) & kMask0101) | (input_byte & 1);\n#  endif\n    dst_w[byte_idx] = incr_word + input_byte_scatter;\n  }\n#else // NO_UNALIGNED || (!USE_SSE42)\n  const uintptr_t incr_word = incr * kMask0101;\n  uintptr_t* dst_w = S_CAST(uintptr_t*, dst);\n#  ifdef __LP64__\n  const uint32_t input_byte_ct = input_bit_ct_plus / 8;\n  for (uint32_t uii = 0; uii != input_byte_ct; ++uii) {\n    // this operation maps binary hgfedcba to h0000000g0000000f...\n    //                                        ^       ^       ^\n    //                                        |       |       |\n    //                                       56      48      40\n    // 1. (cur_variant_include_word & 0xfe) gives us hgfedcb0; necessary to\n    //    avoid carryover.\n    // 2. multiply by the number with bits 7, 14, 21, ..., 49 set, to get\n    //    hgfedcbhgfedcbhgf...\n    //    ^       ^       ^\n    //    |       |       |\n    //   56      48      40\n    // 3. mask out all but bits 8, 16, 24, ..., 56\n    // todo: test if this actually beats the per-character loop...\n    const uintptr_t input_byte = bytearr_uc[uii];\n    const uintptr_t input_byte_scatter = (((input_byte & 0xfe) * 0x2040810204080LLU) & kMask0101) | (input_byte & 1);\n    dst_w[uii] = incr_word + input_byte_scatter;\n  }\n#  else\n  const uint32_t fullbyte_ct = input_bit_ct_plus / 8;\n  for (uint32_t uii = 0; uii != fullbyte_ct; ++uii) {\n    // dcba -> d0000000c0000000b0000000a\n    const uintptr_t input_byte = bytearr_uc[uii];\n    uintptr_t input_byte_scatter = ((input_byte & 0xf) * 0x204081) & kMask0101;\n    dst_w[2 * uii] = incr_word + input_byte_scatter;\n    input_byte_scatter = ((input_byte >> 4) * 0x204081) & kMask0101;\n    dst_w[2 * uii + 1] = incr_word + input_byte_scatter;\n  }\n  if (input_bit_ct_plus & 4) {\n    uintptr_t input_byte = bytearr_uc[fullbyte_ct];\n    // input_bit_ct mod 8 in 1..4, so high bits zeroed out\n    uintptr_t input_byte_scatter = (input_byte * 0x204081) & kMask0101;\n    dst_w[2 * fullbyte_ct] = incr_word + input_byte_scatter;\n  }\n#  endif\n#endif\n}\n\nvoid Expand1bitTo16(const void* __restrict bytearr, uint32_t input_bit_ct, uint32_t incr, void* __restrict dst) {\n  const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n#if defined(USE_SHUFFLE8) && (!(defined(USE_AVX2) && defined(NO_UNALIGNED)))\n  const uint32_t input_nybble_ct = DivUp(input_bit_ct, 4);\n  const uint32_t fullvec_ct = input_nybble_ct / (kBytesPerVec / 8);\n  uint32_t byte_idx = 0;\n  if (fullvec_ct) {\n    const Vec16thUint* bytearr_alias = R_CAST(const Vec16thUint*, bytearr_uc);\n#  ifdef USE_AVX2\n    const VecU16 byte_gather = VecToU16(_mm256_setr_epi64x(0, 0, kMask0101, kMask0101));\n    const VecU16 bit_mask = VecToU16(_mm256_set_epi32(0xff7fffbfU, 0xffdfffefU, 0xfff7fffbU, 0xfffdfffeU, 0xff7fffbfU, 0xffdfffefU, 0xfff7fffbU, 0xfffdfffeU));\n#  else\n    const VecU16 bit_mask = VecToU16(_mm_set_epi32(0xff7fffbfU, 0xffdfffefU, 0xfff7fffbU, 0xfffdfffeU));\n#  endif\n    const VecU16 all1 = VCONST_S(0xffff);\n    const VecU16 subfrom = vecu16_set1(incr);\n    VecU16* dst_alias = S_CAST(VecU16*, dst);\n    // todo: check whether this is actually any better than the non-vectorized\n    // loop\n    for (uint32_t vec_idx = 0; vec_idx != fullvec_ct; ++vec_idx) {\n#  ifdef USE_AVX2\n      VecU16 vmask = VecToU16(_mm256_set1_epi16(bytearr_alias[vec_idx]));\n      vmask = vecu16_shuffle8(vmask, byte_gather);\n#  else\n      VecU16 vmask = VecToU16(_mm_set1_epi8(bytearr_alias[vec_idx]));\n#  endif\n      vmask = vmask | bit_mask;\n      vmask = (vmask == all1);\n      const VecU16 result = subfrom - vmask;\n      vecu16_storeu(&(dst_alias[vec_idx]), result);\n    }\n    byte_idx = fullvec_ct * (kBytesPerVec / 16);\n  }\n  const uintptr_t incr_word = incr * kMask0001;\n  const uint32_t fullbyte_ct = input_nybble_ct / 2;\n  uintptr_t* dst_w = S_CAST(uintptr_t*, dst);\n  for (; byte_idx != fullbyte_ct; ++byte_idx) {\n    const uintptr_t input_byte = bytearr_uc[byte_idx];\n    const uintptr_t input_byte_scatter = input_byte * 0x200040008001LLU;\n    const uintptr_t write0 = input_byte_scatter & kMask0001;\n    const uintptr_t write1 = (input_byte_scatter >> 4) & kMask0001;\n    dst_w[2 * byte_idx] = incr_word + write0;\n    dst_w[2 * byte_idx + 1] = incr_word + write1;\n  }\n  if (input_nybble_ct % 2) {\n    const uintptr_t input_byte = bytearr_uc[byte_idx];\n    const uintptr_t write0 = (input_byte * 0x200040008001LLU) & kMask0001;\n    dst_w[input_nybble_ct - 1] = incr_word + write0;\n  }\n#else // (!USE_SHUFFLE8) || (NO_UNALIGNED && USE_AVX2)\n  const uintptr_t incr_word = incr * kMask0001;\n  uintptr_t* dst_w = S_CAST(uintptr_t*, dst);\n#  ifdef __LP64__\n  const uint32_t input_nybble_ct = DivUp(input_bit_ct, 4);\n  const uint32_t fullbyte_ct = input_nybble_ct / 2;\n  for (uint32_t uii = 0; uii != fullbyte_ct; ++uii) {\n    const uintptr_t input_byte = bytearr_uc[uii];\n    const uintptr_t input_byte_scatter = input_byte * 0x200040008001LLU;\n    const uintptr_t write0 = input_byte_scatter & kMask0001;\n    const uintptr_t write1 = (input_byte_scatter >> 4) & kMask0001;\n    dst_w[2 * uii] = incr_word + write0;\n    dst_w[2 * uii + 1] = incr_word + write1;\n  }\n  if (input_nybble_ct % 2) {\n    const uintptr_t input_byte = bytearr_uc[fullbyte_ct];\n    const uintptr_t write0 = (input_byte * 0x200040008001LLU) & kMask0001;\n    dst_w[input_nybble_ct - 1] = incr_word + write0;\n  }\n#  else // !__LP64__\n  const uint32_t fullbyte_ct = input_bit_ct / 8;\n  for (uint32_t uii = 0; uii != fullbyte_ct; ++uii) {\n    uintptr_t input_byte = bytearr_uc[uii];\n    const uintptr_t input_byte_scatter = input_byte * 0x8001;\n    dst_w[4 * uii] = (input_byte_scatter & kMask0001) + incr_word;\n    dst_w[4 * uii + 1] = ((input_byte_scatter >> 2) & kMask0001) + incr_word;\n    dst_w[4 * uii + 2] = ((input_byte_scatter >> 4) & kMask0001) + incr_word;\n    dst_w[4 * uii + 3] = ((input_byte_scatter >> 6) & kMask0001) + incr_word;\n  }\n  const uint32_t remainder = input_bit_ct % 8;\n  if (remainder) {\n    uintptr_t input_byte = bytearr_uc[fullbyte_ct];\n    uint16_t* dst_u16 = S_CAST(uint16_t*, dst);\n    uint16_t* dst_u16_last = &(dst_u16[8 * fullbyte_ct]);\n    for (uint32_t uii = 0; uii < remainder; ++uii) {\n      dst_u16_last[uii] = (input_byte & 1) + incr;\n      input_byte = input_byte >> 1;\n    }\n  }\n#  endif\n#endif\n}\n\n#ifdef USE_SSE2\nstatic_assert(kPglBitTransposeBatch == S_CAST(uint32_t, kBitsPerCacheline), \"TransposeBitblock64() needs to be updated.\");\nvoid TransposeBitblock64(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_row_ct, uint32_t write_row_ct, uintptr_t* write_iter, VecW* __restrict buf0, VecW* __restrict buf1) {\n  // We need to perform the equivalent of 9-10 shuffles (assuming a full-size\n  // 512x512 or 1024x1024 bitblock).\n  // The first shuffles are performed by the ingestion loop: we write the first\n  // word from every row to buf0, then the second word from every row, etc.,\n  // yielding\n  //   (0,0) ...   (0,63)  (1,0) ...   (1,63)  (2,0) ...   (511,63)\n  //   (0,64) ...  (0,127) (1,64) ...  (1,127) (2,64) ...  (511,127)\n  //   ...\n  //   (0,448) ... (0,511) (1,448) ... (1,511) (2,448) ... (511,511)\n  // in terms of the original bit positions when kCacheline==64.\n  // Since each input row has 8-16 words, this amounts to 3-4 shuffles.\n  //\n  // The second step writes\n  //   (0,0) (0,1) ... (0,7)   (1,0) (1,1) ... (1,7) ...   (511,7)\n  //   (0,8) (0,9) ... (0,15)  (1,8) (1,9) ... (1,15) ...  (511,15)\n  //   ...\n  //   (0,504) ...     (0,511) (1,504) ...     (1,511) ... (511,511)\n  // to buf1, performing the equivalent of 3 shuffles, and the third step\n  // finishes the transpose using movemask.\n  //\n  // buf0 and buf1 must both be 32KiB vector-aligned buffers when\n  // kCacheline==64, and 128KiB when kCacheline==128.\n\n  const uint32_t buf0_row_ct = DivUp(write_row_ct, 64);\n  {\n    uintptr_t* buf0_ul = DowncastVecWToW(buf0);\n    const uint32_t zfill_ct = (-read_row_ct) & (kBitsPerWord - 1);\n    for (uint32_t ridx = 0; ridx != buf0_row_ct; ++ridx) {\n      const uintptr_t* read_iter_tmp = &(read_iter[ridx]);\n      uintptr_t* buf0_row_start = &(buf0_ul[kPglBitTransposeBatch * ridx]);\n      for (uint32_t uii = 0; uii != read_row_ct; ++uii) {\n        buf0_row_start[uii] = *read_iter_tmp;\n        read_iter_tmp = &(read_iter_tmp[read_ul_stride]);\n      }\n      // This is a simple way of fulfilling the trailing-zero part of the\n      // function contract.\n      // (   buf0 rows zeroed out to 512 bytes\n      //  -> buf1 rows zeroed out to 64 bytes\n      //  -> output rows zeroed out to 8 bytes)\n      ZeroWArr(zfill_ct, &(buf0_row_start[read_row_ct]));\n    }\n  }\n  const uint32_t write_word_width = DivUp(read_row_ct, 64);\n  {\n    const VecW* buf0_read_iter = buf0;\n    uintptr_t* write_iter0 = DowncastVecWToW(buf1);\n#  ifdef USE_SHUFFLE8\n    const VecW gather_u16s = vecw_setr8(0, 8, 1, 9, 2, 10, 3, 11,\n                                        4, 12, 5, 13, 6, 14, 7, 15);\n#    ifdef USE_AVX2\n    const VecW gather_u32s = vecw_setr8(0, 1, 8, 9, 2, 3, 10, 11,\n                                        4, 5, 12, 13, 6, 7, 14, 15);\n#    endif\n#  else\n    const VecW m8 = VCONST_W(kMask00FF);\n#  endif\n    const uint32_t buf0_row_b64width = write_word_width * 8;\n    for (uint32_t ridx = 0; ridx != buf0_row_ct; ++ridx) {\n      uintptr_t* write_iter1 = &(write_iter0[kCacheline]);\n      uintptr_t* write_iter2 = &(write_iter1[kCacheline]);\n      uintptr_t* write_iter3 = &(write_iter2[kCacheline]);\n      uintptr_t* write_iter4 = &(write_iter3[kCacheline]);\n      uintptr_t* write_iter5 = &(write_iter4[kCacheline]);\n      uintptr_t* write_iter6 = &(write_iter5[kCacheline]);\n      uintptr_t* write_iter7 = &(write_iter6[kCacheline]);\n      for (uint32_t b64idx = 0; b64idx != buf0_row_b64width; ++b64idx) {\n#  ifdef USE_AVX2\n        VecW loader0 = buf0_read_iter[b64idx * 2];\n        VecW loader1 = buf0_read_iter[b64idx * 2 + 1];\n        //    (0,0) (0,1) ... (0,7) (1,0) (1,1) ... (1,7) (2,0) ... (3,7)\n        // -> (0,0) (1,0) (0,1) (1,1) (0,2) .... (1,7) (2,0) (3,0) (2,1) ...\n        loader0 = vecw_shuffle8(loader0, gather_u16s);\n        loader1 = vecw_shuffle8(loader1, gather_u16s);\n        // -> (0,0) (1,0) (0,1) (1,1) (0,2) (1,2) (0,3) (1,3) (2,0) (3,0) ...\n        VecW vec_lo = vecw_permute0xd8_if_avx2(loader0);\n        VecW vec_hi = vecw_permute0xd8_if_avx2(loader1);\n        // -> (0,0) (1,0) (2,0) (3,0) (0,1) (1,1) (2,1) (3,1) (0,2) ...\n        vec_lo = vecw_shuffle8(vec_lo, gather_u32s);\n        // -> (4,0) (5,0) (6,0) (7,0) (4,1) (5,1) (6,1) (7,1) (4,2) ...\n        vec_hi = vecw_shuffle8(vec_hi, gather_u32s);\n        const VecW final0145 = vecw_unpacklo32(vec_lo, vec_hi);\n        const VecW final2367 = vecw_unpackhi32(vec_lo, vec_hi);\n        write_iter0[b64idx] = vecw_extract64_0(final0145);\n        write_iter1[b64idx] = vecw_extract64_1(final0145);\n        write_iter2[b64idx] = vecw_extract64_0(final2367);\n        write_iter3[b64idx] = vecw_extract64_1(final2367);\n        write_iter4[b64idx] = vecw_extract64_2(final0145);\n        write_iter5[b64idx] = vecw_extract64_3(final0145);\n        write_iter6[b64idx] = vecw_extract64_2(final2367);\n        write_iter7[b64idx] = vecw_extract64_3(final2367);\n#  else  // !USE_AVX2\n        VecW loader0 = buf0_read_iter[b64idx * 4];\n        VecW loader1 = buf0_read_iter[b64idx * 4 + 1];\n        VecW loader2 = buf0_read_iter[b64idx * 4 + 2];\n        VecW loader3 = buf0_read_iter[b64idx * 4 + 3];\n        //    (0,0) (0,1) ... (0,7) (1,0) (1,1) ... (1,7)\n        // -> (0,0) (1,0) (0,1) (1,1) (0,2) ... (1,7)\n#    ifdef USE_SHUFFLE8\n        loader0 = vecw_shuffle8(loader0, gather_u16s);\n        loader1 = vecw_shuffle8(loader1, gather_u16s);\n        loader2 = vecw_shuffle8(loader2, gather_u16s);\n        loader3 = vecw_shuffle8(loader3, gather_u16s);\n#    else\n        VecW tmp_lo = vecw_unpacklo8(loader0, loader1);\n        VecW tmp_hi = vecw_unpackhi8(loader0, loader1);\n        loader0 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);\n        loader1 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);\n        tmp_lo = vecw_unpacklo8(loader2, loader3);\n        tmp_hi = vecw_unpackhi8(loader2, loader3);\n        loader2 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);\n        loader3 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);\n#    endif\n        // -> (0,0) (1,0) (2,0) (3,0) (0,1) (1,1) (2,1) (3,1) (0,2) ...\n        const VecW lo_0123 = vecw_unpacklo16(loader0, loader1);\n        // -> (0,4) (1,4) (2,4) (3,4) (0,5) (1,5) (2,5) (3,5) (0,6) ...\n        const VecW lo_4567 = vecw_unpackhi16(loader0, loader1);\n        const VecW hi_0123 = vecw_unpacklo16(loader2, loader3);\n        const VecW hi_4567 = vecw_unpackhi16(loader2, loader3);\n\n        VecW final01 = vecw_unpacklo32(lo_0123, hi_0123);\n        VecW final23 = vecw_unpackhi32(lo_0123, hi_0123);\n        VecW final45 = vecw_unpacklo32(lo_4567, hi_4567);\n        VecW final67 = vecw_unpackhi32(lo_4567, hi_4567);\n        write_iter0[b64idx] = vecw_extract64_0(final01);\n        write_iter1[b64idx] = vecw_extract64_1(final01);\n        write_iter2[b64idx] = vecw_extract64_0(final23);\n        write_iter3[b64idx] = vecw_extract64_1(final23);\n        write_iter4[b64idx] = vecw_extract64_0(final45);\n        write_iter5[b64idx] = vecw_extract64_1(final45);\n        write_iter6[b64idx] = vecw_extract64_0(final67);\n        write_iter7[b64idx] = vecw_extract64_1(final67);\n#  endif  // !USE_AVX2\n      }\n      buf0_read_iter = &(buf0_read_iter[kPglBitTransposeBatch / kWordsPerVec]);\n      write_iter0 = &(write_iter7[kCacheline]);\n    }\n  }\n  const VecW* buf1_read_iter = buf1;\n  const uint32_t write_v8ui_stride = kVec8thUintPerWord * write_ul_stride;\n  const uint32_t buf1_fullrow_ct = write_row_ct / 8;\n  const uint32_t buf1_row_vecwidth = write_word_width * (8 / kWordsPerVec);\n  Vec8thUint* write_iter0 = DowncastWToV8(write_iter);\n  for (uint32_t ridx = 0; ridx != buf1_fullrow_ct; ++ridx) {\n    Vec8thUint* write_iter1 = &(write_iter0[write_v8ui_stride]);\n    Vec8thUint* write_iter2 = &(write_iter1[write_v8ui_stride]);\n    Vec8thUint* write_iter3 = &(write_iter2[write_v8ui_stride]);\n    Vec8thUint* write_iter4 = &(write_iter3[write_v8ui_stride]);\n    Vec8thUint* write_iter5 = &(write_iter4[write_v8ui_stride]);\n    Vec8thUint* write_iter6 = &(write_iter5[write_v8ui_stride]);\n    Vec8thUint* write_iter7 = &(write_iter6[write_v8ui_stride]);\n    for (uint32_t vidx = 0; vidx != buf1_row_vecwidth; ++vidx) {\n      VecW loader = buf1_read_iter[vidx];\n      write_iter7[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter6[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter5[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter4[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter3[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter2[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter1[vidx] = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      write_iter0[vidx] = vecw_movemask(loader);\n    }\n    buf1_read_iter = &(buf1_read_iter[kCacheline / kWordsPerVec]);\n    write_iter0 = &(write_iter7[write_v8ui_stride]);\n  }\n  const uint32_t row_ct_rem = write_row_ct % 8;\n  if (!row_ct_rem) {\n    return;\n  }\n  const uint32_t lshift = 8 - row_ct_rem;\n  Vec8thUint* write_iter_last = &(write_iter0[write_v8ui_stride * (row_ct_rem - 1)]);\n  for (uint32_t vidx = 0; vidx != buf1_row_vecwidth; ++vidx) {\n    VecW loader = buf1_read_iter[vidx];\n    loader = vecw_slli_variable_ct(loader, lshift);\n    Vec8thUint* inner_write_iter = &(write_iter_last[vidx]);\n    for (uint32_t uii = 0; uii != row_ct_rem; ++uii) {\n      *inner_write_iter = vecw_movemask(loader);\n      loader = vecw_slli(loader, 1);\n      inner_write_iter -= write_v8ui_stride;\n    }\n  }\n}\n#else  // !USE_SSE2\n#  ifdef __LP64__\nstatic_assert(kWordsPerVec == 1, \"TransposeBitblock64() needs to be updated.\");\nvoid TransposeBitblock64(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_row_ct, uint32_t write_row_ct, uintptr_t* write_iter, VecW* __restrict buf0, VecW* __restrict buf1) {\n  // buf must be vector-aligned and have size 64k\n  const uint32_t initial_read_byte_ct = DivUp(write_row_ct, CHAR_BIT);\n  // fold the first 6 shuffles into the initial ingestion loop\n  const unsigned char* initial_read_iter = DowncastKToUc(read_iter);\n  const unsigned char* initial_read_end = &(initial_read_iter[initial_read_byte_ct]);\n  unsigned char* initial_target_iter = DowncastToUc(buf0);\n  const uint32_t read_byte_stride = read_ul_stride * kBytesPerWord;\n  const uint32_t read_batch_rem = kBitsPerCacheline - read_row_ct;\n  for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n    const unsigned char* read_iter_tmp = initial_read_iter;\n    for (uint32_t ujj = 0; ujj != read_row_ct; ++ujj) {\n      *initial_target_iter++ = *read_iter_tmp;\n      read_iter_tmp = &(read_iter_tmp[read_byte_stride]);\n    }\n    initial_target_iter = memsetua(initial_target_iter, 0, read_batch_rem);\n  }\n\n  // third-to-last shuffle, 8 bit spacing -> 4\n  const VecW* source_iter = buf0;\n  uintptr_t* target_iter0 = buf1;\n  const uint32_t write_word_ct = BitCtToWordCt(read_row_ct);\n  const uint32_t first_inner_loop_iter_ct = 4 * write_word_ct;\n  uint32_t cur_write_skip = 4 * kWordsPerCacheline - first_inner_loop_iter_ct;\n  // coincidentally, this also needs to run DivUp(write_row_ct, CHAR_BIT)\n  // times\n  for (uint32_t uii = 0; uii != initial_read_byte_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[kWordsPerCacheline * 4]);\n    for (uint32_t ujj = 0; ujj != first_inner_loop_iter_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      *target_iter0++ = PackTwo0F0FMask(source_word_lo, source_word_hi);\n      *target_iter1++ = PackTwo0F0FMask(source_word_lo >> 4, source_word_hi >> 4);\n    }\n    source_iter = &(source_iter[2 * cur_write_skip]);\n    target_iter0 = &(target_iter1[cur_write_skip]);\n  }\n\n  // second-to-last shuffle, 4 bit spacing -> 2\n  source_iter = buf1;\n  target_iter0 = buf0;\n  const uint32_t second_outer_loop_iter_ct = DivUp(write_row_ct, 4);\n  const uint32_t second_inner_loop_iter_ct = 2 * write_word_ct;\n  cur_write_skip = 2 * kWordsPerCacheline - second_inner_loop_iter_ct;\n  for (uint32_t uii = 0; uii != second_outer_loop_iter_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[kWordsPerCacheline * 2]);\n    for (uint32_t ujj = 0; ujj != second_inner_loop_iter_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      *target_iter0++ = PackTwo3333Mask(source_word_lo, source_word_hi);\n      *target_iter1++ = PackTwo3333Mask(source_word_lo >> 2, source_word_hi >> 2);\n    }\n    source_iter = &(source_iter[2 * cur_write_skip]);\n    target_iter0 = &(target_iter1[cur_write_skip]);\n  }\n  // last shuffle, 2 bit spacing -> 1\n  source_iter = buf0;\n  target_iter0 = write_iter;\n  const uint32_t last_loop_iter_ct = DivUp(write_row_ct, 2);\n  for (uint32_t uii = 0; uii != last_loop_iter_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    for (uint32_t ujj = 0; ujj != write_word_ct; ++ujj) {\n      const uintptr_t source_word_lo = S_CAST(uintptr_t, *source_iter++);\n      const uintptr_t source_word_hi = S_CAST(uintptr_t, *source_iter++);\n      target_iter0[ujj] = PackTwo5555Mask(source_word_lo, source_word_hi);\n      target_iter1[ujj] = PackTwo5555Mask(source_word_lo >> 1, source_word_hi >> 1);\n    }\n    source_iter = &(source_iter[2 * (kWordsPerCacheline - write_word_ct)]);\n    target_iter0 = &(target_iter1[write_ul_stride]);\n  }\n}\n#  else\nstatic_assert(kWordsPerVec == 1, \"TransposeBitblock32() needs to be updated.\");\nvoid TransposeBitblock32(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* write_iter, VecW* __restrict buf0, VecW* __restrict buf1) {\n  // buf must be vector-aligned and have size 64k\n  const uint32_t initial_read_byte_ct = DivUp(write_batch_size, CHAR_BIT);\n  // fold the first 6 shuffles into the initial ingestion loop\n  const unsigned char* initial_read_iter = DowncastKToUc(read_iter);\n  const unsigned char* initial_read_end = &(initial_read_iter[initial_read_byte_ct]);\n  unsigned char* initial_target_iter = DowncastToUc(buf0);\n  const uint32_t read_byte_stride = read_ul_stride * kBytesPerWord;\n  const uint32_t read_batch_rem = kBitsPerCacheline - read_batch_size;\n  for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n    const unsigned char* read_iter_tmp = initial_read_iter;\n    for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n      *initial_target_iter++ = *read_iter_tmp;\n      read_iter_tmp = &(read_iter_tmp[read_byte_stride]);\n    }\n    initial_target_iter = memsetua(initial_target_iter, 0, read_batch_rem);\n  }\n\n  // third-to-last shuffle, 8 bit spacing -> 4\n  const VecW* source_iter = buf0;\n  uintptr_t* target_iter0 = buf1;\n  const uint32_t write_word_ct = BitCtToWordCt(read_batch_size);\n  const uint32_t first_inner_loop_iter_ct = 4 * write_word_ct;\n  uint32_t cur_write_skip = 4 * kWordsPerCacheline - first_inner_loop_iter_ct;\n  // coincidentally, this also needs to run DivUp(write_batch_size, CHAR_BIT)\n  // times\n  for (uint32_t uii = 0; uii != initial_read_byte_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[kWordsPerCacheline * 4]);\n    for (uint32_t ujj = 0; ujj != first_inner_loop_iter_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      *target_iter0++ = PackTwo0F0FMask(source_word_lo, source_word_hi);\n      *target_iter1++ = PackTwo0F0FMask(source_word_lo >> 4, source_word_hi >> 4);\n    }\n    source_iter = &(source_iter[2 * cur_write_skip]);\n    target_iter0 = &(target_iter1[cur_write_skip]);\n  }\n\n  // second-to-last shuffle, 4 bit spacing -> 2\n  source_iter = buf1;\n  target_iter0 = buf0;\n  const uint32_t second_outer_loop_iter_ct = DivUp(write_batch_size, 4);\n  const uint32_t second_inner_loop_iter_ct = 2 * write_word_ct;\n  cur_write_skip = 2 * kWordsPerCacheline - second_inner_loop_iter_ct;\n  for (uint32_t uii = 0; uii != second_outer_loop_iter_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[kWordsPerCacheline * 2]);\n    for (uint32_t ujj = 0; ujj != second_inner_loop_iter_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      *target_iter0++ = PackTwo3333Mask(source_word_lo, source_word_hi);\n      *target_iter1++ = PackTwo3333Mask(source_word_lo >> 2, source_word_hi >> 2);\n    }\n    source_iter = &(source_iter[2 * cur_write_skip]);\n    target_iter0 = &(target_iter1[cur_write_skip]);\n  }\n  // last shuffle, 2 bit spacing -> 1\n  source_iter = buf0;\n  target_iter0 = write_iter;\n  const uint32_t last_loop_iter_ct = DivUp(write_batch_size, 2);\n  for (uint32_t uii = 0; uii != last_loop_iter_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    for (uint32_t ujj = 0; ujj != write_word_ct; ++ujj) {\n      const uintptr_t source_word_lo = S_CAST(uintptr_t, *source_iter++);\n      const uintptr_t source_word_hi = S_CAST(uintptr_t, *source_iter++);\n      target_iter0[ujj] = PackTwo5555Mask(source_word_lo, source_word_hi);\n      target_iter1[ujj] = PackTwo5555Mask(source_word_lo >> 1, source_word_hi >> 1);\n    }\n    source_iter = &(source_iter[2 * (kWordsPerCacheline - write_word_ct)]);\n    target_iter0 = &(target_iter1[write_ul_stride]);\n  }\n}\n#  endif\n#endif  // !USE_SSE2\n\n#ifdef USE_SSE2\nvoid TransposeNybbleblock(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, VecW* vecaligned_buf) {\n  // Very similar to TransposeNypblock64() in pgenlib_internal.\n  // vecaligned_buf must be vector-aligned and have size 8k if kCacheline==64,\n  // 32k if kCacheline==128\n  const uint32_t buf_row_ct = DivUp(write_batch_size, 8);\n  // fold the first 4-5 shuffles into the initial ingestion loop\n  const uint32_t* initial_read_iter = DowncastKWToU32(read_iter);\n  const uint32_t* initial_read_end = &(initial_read_iter[buf_row_ct]);\n  uint32_t* initial_target_iter = DowncastVecWToU32(vecaligned_buf);\n  const uint32_t read_u32_stride = read_ul_stride * (kBytesPerWord / 4);\n  const uint32_t read_batch_rem = kNybblesPerCacheline - read_batch_size;\n  for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n    const uint32_t* read_iter_tmp = initial_read_iter;\n    for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n      *initial_target_iter++ = *read_iter_tmp;\n      read_iter_tmp = &(read_iter_tmp[read_u32_stride]);\n    }\n    if (!read_batch_rem) {\n      continue;\n    }\n    memset(initial_target_iter, 0, read_batch_rem * 4);\n    initial_target_iter = &(initial_target_iter[read_batch_rem]);\n  }\n\n  // 32 bit spacing -> 4\n  const VecW* source_iter = vecaligned_buf;\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const uint32_t buf_fullrow_ct = write_batch_size / 8;\n  const uint32_t b64width = DivUp(read_batch_size, 16);\n  uintptr_t* target_iter0 = write_iter;\n  uint32_t cur_dst_row_ct = 8;\n#  ifdef USE_SHUFFLE8\n  const VecW gather_u16s = vecw_setr8(0, 8, 1, 9, 2, 10, 3, 11,\n                                      4, 12, 5, 13, 6, 14, 7, 15);\n#  else\n  const VecW m8 = VCONST_W(kMask00FF);\n#  endif\n#  ifdef USE_AVX2\n  // movemask is slower even in AVX2 case\n  const VecW gather_u32s = vecw_setr8(0, 1, 8, 9, 2, 3, 10, 11,\n                                      4, 5, 12, 13, 6, 7, 14, 15);\n  for (uint32_t buf_row_idx = 0; ; ++buf_row_idx) {\n    if (buf_row_idx >= buf_fullrow_ct) {\n      if (buf_row_idx == buf_row_ct) {\n        return;\n      }\n      cur_dst_row_ct = write_batch_size % 8;\n    }\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    uintptr_t* target_iter2 = &(target_iter1[write_ul_stride]);\n    uintptr_t* target_iter3 = &(target_iter2[write_ul_stride]);\n    uintptr_t* target_iter4 = &(target_iter3[write_ul_stride]);\n    uintptr_t* target_iter5 = &(target_iter4[write_ul_stride]);\n    uintptr_t* target_iter6 = &(target_iter5[write_ul_stride]);\n    uintptr_t* target_iter7 = &(target_iter6[write_ul_stride]);\n    for (uint32_t b64idx = 0; b64idx != b64width; ++b64idx) {\n      const VecW loader0 = source_iter[b64idx * 2];\n      const VecW loader1 = source_iter[b64idx * 2 + 1];\n      VecW even_nybbles0 = loader0 & m4;\n      VecW odd_nybbles0 = vecw_and_notfirst(m4, loader0);\n      VecW even_nybbles1 = loader1 & m4;\n      VecW odd_nybbles1 = vecw_and_notfirst(m4, loader1);\n      even_nybbles0 = even_nybbles0 | vecw_srli(even_nybbles0, 28);\n      odd_nybbles0 = vecw_slli(odd_nybbles0, 28) | odd_nybbles0;\n      even_nybbles1 = even_nybbles1 | vecw_srli(even_nybbles1, 28);\n      odd_nybbles1 = vecw_slli(odd_nybbles1, 28) | odd_nybbles1;\n      // Label the bytes in even_nybbles0 (0, 1, 2, ..., 31), and the bytes in\n      // even_nybbles1 (32, 33, ..., 63).  We wish to generate the following\n      // lane-and-vector-crossing permutation:\n      //   (0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57)\n      //   (2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59)\n\n      // first shuffle:\n      //   (0, 8, 1, 9, 2, 10, 3, 11, _, _, _, _, _, _, _, _,\n      //    16, 24, 17, 25, 18, 26, 19, 27, _, _, _, _, _, _, _, _)\n      //\n      //   (32, 40, 33, 41, 34, 42, 35, 43, _, _, _, _, _, _, _, _,\n      //    48, 56, 49, 57, 50, 58, 51, 59, _, _, _, _, _, _, _, _)\n      //\n      // _mm256_unpacklo_epi16:\n      //   (0, 8, 32, 40, 1, 9, 33, 41, 2, 10, 34, 42, 3, 11, 35, 43,\n      //    16, 24, 48, 56, 17, 25, 49, 57, 18, 26, 50, 58, 19, 27, 51, 59)\n      //\n      // {0, 2, 1, 3} permute:\n      //   (0, 8, 32, 40, 1, 9, 33, 41, 16, 24, 48, 56, 17, 25, 49, 57,\n      //    2, 10, 34, 42, 3, 11, 35, 43, 18, 26, 50, 58, 19, 27, 51, 59)\n      //\n      // final shuffle gives us what we want.\n      even_nybbles0 = vecw_shuffle8(even_nybbles0, gather_u16s);\n      odd_nybbles0 = vecw_shuffle8(odd_nybbles0, gather_u16s);\n      even_nybbles1 = vecw_shuffle8(even_nybbles1, gather_u16s);\n      odd_nybbles1 = vecw_shuffle8(odd_nybbles1, gather_u16s);\n\n      VecW target_even = vecw_unpacklo16(even_nybbles0, even_nybbles1);\n      VecW target_odd = vecw_unpackhi16(odd_nybbles0, odd_nybbles1);\n\n      target_even = vecw_permute0xd8_if_avx2(target_even);\n      target_odd = vecw_permute0xd8_if_avx2(target_odd);\n\n      target_even = vecw_shuffle8(target_even, gather_u32s);\n      target_odd = vecw_shuffle8(target_odd, gather_u32s);\n\n      // tried using _mm_stream_si64 here, that totally sucked\n      switch (cur_dst_row_ct) {\n        case 8:\n          target_iter7[b64idx] = vecw_extract64_3(target_odd);\n          // fall through\n        case 7:\n          target_iter6[b64idx] = vecw_extract64_3(target_even);\n          // fall through\n        case 6:\n          target_iter5[b64idx] = vecw_extract64_2(target_odd);\n          // fall through\n        case 5:\n          target_iter4[b64idx] = vecw_extract64_2(target_even);\n          // fall through\n        case 4:\n          target_iter3[b64idx] = vecw_extract64_1(target_odd);\n          // fall through\n        case 3:\n          target_iter2[b64idx] = vecw_extract64_1(target_even);\n          // fall through\n        case 2:\n          target_iter1[b64idx] = vecw_extract64_0(target_odd);\n          // fall through\n        default:\n          target_iter0[b64idx] = vecw_extract64_0(target_even);\n      }\n    }\n    source_iter = &(source_iter[(4 * kPglNybbleTransposeBatch) / kBytesPerVec]);\n    target_iter0 = &(target_iter7[write_ul_stride]);\n  }\n#  else  // !USE_AVX2\n  for (uint32_t buf_row_idx = 0; ; ++buf_row_idx) {\n    if (buf_row_idx >= buf_fullrow_ct) {\n      if (buf_row_idx == buf_row_ct) {\n        return;\n      }\n      cur_dst_row_ct = write_batch_size % 8;\n    }\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    uintptr_t* target_iter2 = &(target_iter1[write_ul_stride]);\n    uintptr_t* target_iter3 = &(target_iter2[write_ul_stride]);\n    uintptr_t* target_iter4 = &(target_iter3[write_ul_stride]);\n    uintptr_t* target_iter5 = &(target_iter4[write_ul_stride]);\n    uintptr_t* target_iter6 = &(target_iter5[write_ul_stride]);\n    uintptr_t* target_iter7 = &(target_iter6[write_ul_stride]);\n    for (uint32_t b64idx = 0; b64idx != b64width; ++b64idx) {\n      const VecW loader0 = source_iter[b64idx * 4];\n      const VecW loader1 = source_iter[b64idx * 4 + 1];\n      const VecW loader2 = source_iter[b64idx * 4 + 2];\n      const VecW loader3 = source_iter[b64idx * 4 + 3];\n      VecW even_nybbles0 = loader0 & m4;\n      VecW odd_nybbles0 = vecw_and_notfirst(m4, loader0);\n      VecW even_nybbles1 = loader1 & m4;\n      VecW odd_nybbles1 = vecw_and_notfirst(m4, loader1);\n      VecW even_nybbles2 = loader2 & m4;\n      VecW odd_nybbles2 = vecw_and_notfirst(m4, loader2);\n      VecW even_nybbles3 = loader3 & m4;\n      VecW odd_nybbles3 = vecw_and_notfirst(m4, loader3);\n      even_nybbles0 = even_nybbles0 | vecw_srli(even_nybbles0, 28);\n      odd_nybbles0 = vecw_slli(odd_nybbles0, 28) | odd_nybbles0;\n      even_nybbles1 = even_nybbles1 | vecw_srli(even_nybbles1, 28);\n      odd_nybbles1 = vecw_slli(odd_nybbles1, 28) | odd_nybbles1;\n      even_nybbles2 = even_nybbles2 | vecw_srli(even_nybbles2, 28);\n      odd_nybbles2 = vecw_slli(odd_nybbles2, 28) | odd_nybbles2;\n      even_nybbles3 = even_nybbles3 | vecw_srli(even_nybbles3, 28);\n      odd_nybbles3 = vecw_slli(odd_nybbles3, 28) | odd_nybbles3;\n      // Label the bytes in even_nybbles0 (0, 1, 2, ..., 15), the bytes in\n      // even_nybbles1 (16, 17, ..., 31), ..., up to even_nybbles3 being (48,\n      // 49, ..., 63).  We wish to generate the following vector-crossing\n      // permutation:\n      //   (0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57)\n      //   (2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59)\n\n      // first shuffle:\n      //   (0, 8, 1, 9, 2, 10, 3, 11, _, _, _, _, _, _, _, _)\n      //   (16, 24, 17, 25, 18, 26, 19, 27, _, _, _, _, _, _, _, _)\n      //   (32, 40, 33, 41, 34, 42, 35, 43, _, _, _, _, _, _, _, _)\n      //   (48, 56, 49, 57, 50, 58, 51, 59, _, _, _, _, _, _, _, _)\n\n      // _mm_unpacklo_epi16:\n      //   (0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27)\n      //   (32, 40, 48, 56, 33, 41, 49, 57, 34, 42, 50, 58, 35, 43, 51, 59)\n      //\n      // finish with _mm_unpack{lo,hi}_epi32\n#    ifdef USE_SHUFFLE8\n      even_nybbles0 = vecw_shuffle8(even_nybbles0, gather_u16s);\n      odd_nybbles0 = vecw_shuffle8(odd_nybbles0, gather_u16s);\n      even_nybbles1 = vecw_shuffle8(even_nybbles1, gather_u16s);\n      odd_nybbles1 = vecw_shuffle8(odd_nybbles1, gather_u16s);\n      even_nybbles2 = vecw_shuffle8(even_nybbles2, gather_u16s);\n      odd_nybbles2 = vecw_shuffle8(odd_nybbles2, gather_u16s);\n      even_nybbles3 = vecw_shuffle8(even_nybbles3, gather_u16s);\n      odd_nybbles3 = vecw_shuffle8(odd_nybbles3, gather_u16s);\n#    else\n      VecW tmp_lo = vecw_unpacklo8(even_nybbles0, odd_nybbles0);\n      VecW tmp_hi = vecw_unpackhi8(even_nybbles0, odd_nybbles0);\n      even_nybbles0 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);\n      odd_nybbles0 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);\n      tmp_lo = vecw_unpacklo8(even_nybbles1, odd_nybbles1);\n      tmp_hi = vecw_unpackhi8(even_nybbles1, odd_nybbles1);\n      even_nybbles1 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);\n      odd_nybbles1 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);\n      tmp_lo = vecw_unpacklo8(even_nybbles2, odd_nybbles2);\n      tmp_hi = vecw_unpackhi8(even_nybbles2, odd_nybbles2);\n      even_nybbles2 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);\n      odd_nybbles2 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);\n      tmp_lo = vecw_unpacklo8(even_nybbles3, odd_nybbles3);\n      tmp_hi = vecw_unpackhi8(even_nybbles3, odd_nybbles3);\n      even_nybbles3 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);\n      odd_nybbles3 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);\n#    endif\n\n      const VecW even_lo = vecw_unpacklo16(even_nybbles0, even_nybbles1);\n      const VecW odd_lo = vecw_unpackhi16(odd_nybbles0, odd_nybbles1);\n      const VecW even_hi = vecw_unpacklo16(even_nybbles2, even_nybbles3);\n      const VecW odd_hi = vecw_unpackhi16(odd_nybbles2, odd_nybbles3);\n\n      const VecW final02 = vecw_unpacklo32(even_lo, even_hi);\n      const VecW final13 = vecw_unpacklo32(odd_lo, odd_hi);\n      const VecW final46 = vecw_unpackhi32(even_lo, even_hi);\n      const VecW final57 = vecw_unpackhi32(odd_lo, odd_hi);\n      switch (cur_dst_row_ct) {\n        case 8:\n          target_iter7[b64idx] = vecw_extract64_1(final57);\n          // fall through\n        case 7:\n          target_iter6[b64idx] = vecw_extract64_1(final46);\n          // fall through\n        case 6:\n          target_iter5[b64idx] = vecw_extract64_0(final57);\n          // fall through\n        case 5:\n          target_iter4[b64idx] = vecw_extract64_0(final46);\n          // fall through\n        case 4:\n          target_iter3[b64idx] = vecw_extract64_1(final13);\n          // fall through\n        case 3:\n          target_iter2[b64idx] = vecw_extract64_1(final02);\n          // fall through\n        case 2:\n          target_iter1[b64idx] = vecw_extract64_0(final13);\n          // fall through\n        default:\n          target_iter0[b64idx] = vecw_extract64_0(final02);\n      }\n    }\n    source_iter = &(source_iter[(4 * kPglNybbleTransposeBatch) / kBytesPerVec]);\n    target_iter0 = &(target_iter7[write_ul_stride]);\n  }\n#  endif  // !USE_AVX2\n}\n#else  // !USE_SSE2\nstatic_assert(kWordsPerVec == 1, \"TransposeNybbleblock() needs to be updated.\");\nvoid TransposeNybbleblock(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, VecW* vecaligned_buf) {\n  // Very similar to unvectorized TransposeNypblock{32,64}() in pgenlib_misc.\n  // vecaligned_buf must have size 8k\n  const uint32_t buf_row_ct = NybbleCtToByteCt(write_batch_size);\n  // fold the first 6 shuffles into the initial ingestion loop\n  const unsigned char* initial_read_iter = DowncastKToUc(read_iter);\n  const unsigned char* initial_read_end = &(initial_read_iter[buf_row_ct]);\n  unsigned char* initial_target_iter = DowncastToUc(vecaligned_buf);\n  const uint32_t read_byte_stride = read_ul_stride * kBytesPerWord;\n  const uint32_t read_batch_rem = kNybblesPerCacheline - read_batch_size;\n  for (; initial_read_iter != initial_read_end; ++initial_read_iter) {\n    const unsigned char* read_iter_tmp = initial_read_iter;\n    for (uint32_t ujj = 0; ujj != read_batch_size; ++ujj) {\n      *initial_target_iter++ = *read_iter_tmp;\n      read_iter_tmp = &(read_iter_tmp[read_byte_stride]);\n    }\n    initial_target_iter = memsetua(initial_target_iter, 0, read_batch_rem);\n  }\n\n  // 8 bit spacing -> 4\n  const VecW* source_iter = vecaligned_buf;\n  uintptr_t* target_iter0 = write_iter;\n  const uint32_t buf_fullrow_ct = write_batch_size / 2;\n  const uint32_t write_word_ct = NybbleCtToWordCt(read_batch_size);\n  for (uint32_t uii = 0; uii != buf_fullrow_ct; ++uii) {\n    uintptr_t* target_iter1 = &(target_iter0[write_ul_stride]);\n    for (uint32_t ujj = 0; ujj != write_word_ct; ++ujj) {\n      const uintptr_t source_word_lo = *source_iter++;\n      const uintptr_t source_word_hi = *source_iter++;\n      target_iter0[ujj] = PackTwo0F0FMask(source_word_lo, source_word_hi);\n      target_iter1[ujj] = PackTwo0F0FMask(source_word_lo >> 4, source_word_hi >> 4);\n    }\n    source_iter = &(source_iter[2 * (kWordsPerCacheline - write_word_ct)]);\n    target_iter0 = &(target_iter1[write_ul_stride]);\n  }\n  const uint32_t remainder = write_batch_size % 2;\n  if (!remainder) {\n    return;\n  }\n  for (uint32_t ujj = 0; ujj != write_word_ct; ++ujj) {\n    const uintptr_t source_word_lo = *source_iter++;\n    const uintptr_t source_word_hi = *source_iter++;\n    target_iter0[ujj] = PackTwo0F0FMask(source_word_lo, source_word_hi);\n  }\n}\n#endif  // !USE_SSE2\n\n#ifdef USE_SSE2\n#  ifdef USE_AVX2\nconst unsigned char kLeadMask[2 * kBytesPerVec] __attribute__ ((aligned (64))) =\n  {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,\n   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};\n#  else\nconst unsigned char kLeadMask[2 * kBytesPerVec] __attribute__ ((aligned (32))) =\n  {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};\n#  endif\n\nuintptr_t BytesumArr(const void* bytearr, uintptr_t byte_ct) {\n  uintptr_t tot = 0;\n  if (byte_ct < kBytesPerVec) {\n    const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n    for (uintptr_t ulii = 0; ulii != byte_ct; ++ulii) {\n      tot += bytearr_uc[ulii];\n    }\n    return tot;\n  }\n  const unsigned char* bytearr_uc_iter = S_CAST(const unsigned char*, bytearr);\n  const unsigned char* bytearr_uc_final = &(bytearr_uc_iter[byte_ct - kBytesPerVec]);\n  const VecW m0 = vecw_setzero();\n  VecW acc = vecw_setzero();\n  while (bytearr_uc_iter < bytearr_uc_final) {\n    const VecW cur_vec = vecw_loadu(bytearr_uc_iter);\n    acc = acc + vecw_sad(cur_vec, m0);\n    bytearr_uc_iter = &(bytearr_uc_iter[kBytesPerVec]);\n  }\n  VecW cur_vec = vecw_loadu(bytearr_uc_final);\n  const uintptr_t overlap_byte_ct = bytearr_uc_iter - bytearr_uc_final;\n  const VecW mask_vec = vecw_loadu(&(kLeadMask[kBytesPerVec - overlap_byte_ct]));\n  cur_vec = cur_vec & mask_vec;\n  acc = acc + vecw_sad(cur_vec, m0);\n  return HsumW(acc);\n}\n\n#else // !USE_SSE2\nuintptr_t BytesumArr(const void* bytearr, uintptr_t byte_ct) {\n  // Assumes sum is representable by uintptr_t.\n  const unsigned char* bytearr_uc = S_CAST(const unsigned char*, bytearr);\n  const uintptr_t* bytearr_alias_iter;\n  uint32_t lead_byte_ct = AlignKToAW(bytearr_uc, &bytearr_alias_iter);\n  uintptr_t tot = 0;\n  if (lead_byte_ct) {\n    if (lead_byte_ct > byte_ct) {\n      lead_byte_ct = byte_ct;\n    }\n    for (uint32_t uii = 0; uii != lead_byte_ct; ++uii) {\n      tot += bytearr_uc[uii];\n    }\n    byte_ct -= lead_byte_ct;\n    if (!byte_ct) {\n      return tot;\n    }\n  }\n  const uint32_t fullword_ct = byte_ct / kBytesPerWord;\n  const uint32_t wordblock_idx_trail = fullword_ct / 256;\n  const uint32_t wordblock_idx_end = DivUp(fullword_ct, 256);\n  uint32_t wordblock_len = 256;\n  for (uint32_t wordblock_idx = 0; ; ++wordblock_idx) {\n    if (wordblock_idx >= wordblock_idx_trail) {\n      if (wordblock_idx == wordblock_idx_end) {\n        byte_ct = byte_ct % kBytesPerWord;\n        const unsigned char* last_word_start = DowncastKToUc(bytearr_alias_iter);\n        for (uint32_t uii = 0; uii != byte_ct; ++uii) {\n          tot += last_word_start[uii];\n        }\n        return tot;\n      }\n      wordblock_len = fullword_ct % 256;\n    }\n    const uintptr_t* bytearr_alias_stop = &(bytearr_alias_iter[wordblock_len]);\n    uintptr_t acc_even = 0;\n    uintptr_t acc_odd = 0;\n    do {\n      uintptr_t cur_word = *bytearr_alias_iter++;\n      acc_even += cur_word & kMask00FF;\n      acc_odd += (cur_word >> 8) & kMask00FF;\n    } while (bytearr_alias_iter < bytearr_alias_stop);\n    acc_even = S_CAST(Halfword, acc_even) + (acc_even >> kBitsPerWordD2);\n    acc_odd = S_CAST(Halfword, acc_odd) + (acc_odd >> kBitsPerWordD2);\n    tot += acc_even + acc_odd;\n  }\n}\n#endif  // !USE_SSE2\n\nuintptr_t CountByte(const void* bytearr, unsigned char ucc, uintptr_t byte_ct) {\n  const unsigned char* bytearr_uc_iter = S_CAST(const unsigned char*, bytearr);\n#ifdef USE_SSE2\n  if (byte_ct < kBytesPerVec) {\n#endif\n    uintptr_t tot = 0;\n    for (uintptr_t ulii = 0; ulii != byte_ct; ++ulii) {\n      tot += (bytearr_uc_iter[ulii] == ucc);\n    }\n    return tot;\n#ifdef USE_SSE2\n  }\n  const VecW m0 = vecw_setzero();\n  const VecUc match_vvec = vecuc_set1(ucc);\n  VecW acc = vecw_setzero();\n  while (byte_ct > 255 * kBytesPerVec) {\n    VecUc inner_acc = vecuc_setzero();\n    for (uint32_t uii = 0; uii != 255; ++uii) {\n      const VecUc cur_vvec = vecuc_loadu(bytearr_uc_iter);\n      bytearr_uc_iter = &(bytearr_uc_iter[kBytesPerVec]);\n      inner_acc = inner_acc - (cur_vvec == match_vvec);\n    }\n    acc = acc + vecw_sad(VecUcToW(inner_acc), m0);\n    byte_ct -= 255 * kBytesPerVec;\n  }\n  const unsigned char* bytearr_uc_final = &(bytearr_uc_iter[byte_ct - kBytesPerVec]);\n  VecUc inner_acc = vecuc_setzero();\n  while (bytearr_uc_iter < bytearr_uc_final) {\n    const VecUc cur_vvec = vecuc_loadu(bytearr_uc_iter);\n    bytearr_uc_iter = &(bytearr_uc_iter[kBytesPerVec]);\n    inner_acc = inner_acc - (cur_vvec == match_vvec);\n  }\n  VecUc cur_vvec = vecuc_loadu(bytearr_uc_final);\n  const uintptr_t overlap_byte_ct = bytearr_uc_iter - bytearr_uc_final;\n  const VecUc mask_vvec = vecuc_loadu(&(kLeadMask[kBytesPerVec - overlap_byte_ct]));\n  cur_vvec = (cur_vvec == match_vvec) & mask_vvec;\n  inner_acc = inner_acc - cur_vvec;\n  acc = acc + vecw_sad(VecUcToW(inner_acc), m0);\n  return HsumW(acc);\n#endif  // USE_SSE2\n}\n\nuintptr_t CountU16(const void* u16arr, uint16_t usii, uintptr_t u16_ct) {\n  const unsigned char* u16arr_biter = S_CAST(const unsigned char*, u16arr);\n#ifdef USE_SSE2\n  if (u16_ct < (kBytesPerVec / 2)) {\n#endif\n    // todo: benchmark this vs. zero-extend in a non-SSE2 setting\n    uintptr_t tot = 0;\n    for (uintptr_t ulii = 0; ulii != u16_ct; ++ulii) {\n      uint16_t cur_u16;\n      CopyFromUnalignedOffsetU16(&cur_u16, u16arr_biter, ulii);\n      tot += (cur_u16 == usii);\n    }\n    return tot;\n#ifdef USE_SSE2\n  }\n  const VecW m0 = vecw_setzero();\n  const VecU16 match_vvec = vecu16_set1(usii);\n  VecW acc = vecw_setzero();\n  // can also use larger loop and a slightly different accumulation algorithm,\n  // but it should make practically no difference; lets keep these loops as\n  // similar as possible for now.\n  while (u16_ct > 255 * (kBytesPerVec / 2)) {\n    VecU16 inner_acc = vecu16_setzero();\n    for (uint32_t uii = 0; uii != 255; ++uii) {\n      const VecU16 cur_vvec = vecu16_loadu(u16arr_biter);\n      u16arr_biter = &(u16arr_biter[kBytesPerVec]);\n      inner_acc = inner_acc - (cur_vvec == match_vvec);\n    }\n    acc = acc + vecw_sad(VecU16ToW(inner_acc), m0);\n    u16_ct -= 255 * (kBytesPerVec / 2);\n  }\n  const unsigned char* u16arr_final = &(u16arr_biter[u16_ct * sizeof(int16_t) - kBytesPerVec]);\n  VecU16 inner_acc = vecu16_setzero();\n  while (u16arr_biter < u16arr_final) {\n    const VecU16 cur_vvec = vecu16_loadu(u16arr_biter);\n    u16arr_biter = &(u16arr_biter[kBytesPerVec]);\n    inner_acc = inner_acc - (cur_vvec == match_vvec);\n  }\n  VecU16 cur_vvec = vecu16_loadu(u16arr_final);\n  const uintptr_t overlap_byte_ct = u16arr_biter - u16arr_final;\n  const VecU16 mask_vvec = vecu16_loadu(&(kLeadMask[kBytesPerVec - overlap_byte_ct]));\n  cur_vvec = (cur_vvec == match_vvec) & mask_vvec;\n  inner_acc = inner_acc - cur_vvec;\n  acc = acc + vecw_sad(VecU16ToW(inner_acc), m0);\n  return HsumW(acc);\n#endif  // USE_SSE2\n}\n\nuint32_t Copy1bit8Subset(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uintptr_t* __restrict sample_include, uint32_t src_subset_size, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals) {\n  if (!src_subset_size) {\n    return 0;\n  }\n  CopyBitarrSubset(src_subset, sample_include, sample_ct, dst_subset);\n  const unsigned char* src_vals_uc = S_CAST(const unsigned char*, src_vals);\n  unsigned char* dst_vals_uc = S_CAST(unsigned char*, dst_vals);\n  unsigned char* dst_vals_iter = dst_vals_uc;\n  uintptr_t sample_widx = 0;\n  uintptr_t src_subset_bits = src_subset[0];\n  for (uint32_t src_idx = 0; src_idx != src_subset_size; ++src_idx) {\n    const uintptr_t lowbit = BitIter1y(src_subset, &sample_widx, &src_subset_bits);\n    if (sample_include[sample_widx] & lowbit) {\n      *dst_vals_iter++ = src_vals_uc[src_idx];\n    }\n  }\n  return dst_vals_iter - dst_vals_uc;\n}\n\nuint32_t Copy1bit16Subset(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uintptr_t* __restrict sample_include, uint32_t src_subset_size, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals) {\n  if (!src_subset_size) {\n    return 0;\n  }\n  CopyBitarrSubset(src_subset, sample_include, sample_ct, dst_subset);\n  const uint16_t* src_vals_u16 = S_CAST(const uint16_t*, src_vals);\n  uint16_t* dst_vals_u16 = S_CAST(uint16_t*, dst_vals);\n  uint16_t* dst_vals_iter = dst_vals_u16;\n  uintptr_t sample_widx = 0;\n  uintptr_t src_subset_bits = src_subset[0];\n  for (uint32_t src_idx = 0; src_idx != src_subset_size; ++src_idx) {\n    const uintptr_t lowbit = BitIter1y(src_subset, &sample_widx, &src_subset_bits);\n    if (sample_include[sample_widx] & lowbit) {\n      *dst_vals_iter++ = src_vals_u16[src_idx];\n    }\n  }\n  return dst_vals_iter - dst_vals_u16;\n}\n\n// 'Unsafe' because it assumes high bits of every byte are 0.\nvoid Reduce8to4bitInplaceUnsafe(uintptr_t entry_ct, uintptr_t* mainvec) {\n#ifdef USE_SSE2\n  const uintptr_t fullvec_ct = entry_ct / (kBytesPerVec * 2);\n  const VecW m8 = VCONST_W(kMask00FF);\n  VecW* vmainvec = R_CAST(VecW*, mainvec);\n  for (uintptr_t write_vidx = 0; write_vidx != fullvec_ct; ++write_vidx) {\n    VecW v0 = vmainvec[write_vidx * 2];\n    VecW v1 = vmainvec[write_vidx * 2 + 1];\n    v0 = v0 | vecw_srli(v0, 4);\n    v1 = v1 | vecw_srli(v1, 4);\n    vmainvec[write_vidx] = vecw_gather_even(v0, v1, m8);\n  }\n  uintptr_t write_idx = fullvec_ct * kWordsPerVec;\n  if (write_idx == entry_ct * 2) {\n    return;\n  }\n#else\n  uintptr_t write_idx = 0;\n#endif\n  // Read two words at a time and write one.\n  // We could instead read one word and write a Halfword at a time, but I'd\n  // rather not worry about the strict-aliasing issues involved there.\n  const uintptr_t write_idx_last = (entry_ct - 1) / (kBytesPerWord * 2);\n  uintptr_t write_word;\n  for (; ; ++write_idx) {\n    const uintptr_t inword0 = mainvec[2 * write_idx];\n    const uintptr_t inword1 = mainvec[2 * write_idx + 1];\n    write_word = PackTwo0F0F(inword0, inword1);\n    if (write_idx == write_idx_last) {\n      break;\n    }\n    mainvec[write_idx] = write_word;\n  }\n  const uint32_t remaining_entry_ct = ModNz(entry_ct, kBytesPerWord * 2);\n  mainvec[write_idx] = bzhi_max(write_word, remaining_entry_ct * 4);\n}\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n"
  },
  {
    "path": "external_libs/pgenlib/include/plink2_bits.h",
    "content": "#ifndef __PLINK2_BITS_H__\n#define __PLINK2_BITS_H__\n\n// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n\n// Bitarray support.  (Inline single-word operations are in plink2_base.h.)\n\n#include \"plink2_base.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\n#if defined(USE_SSE2) && !defined(USE_AVX2)\n// may also want a version which doesn't always apply kMask5555\nvoid Pack32bTo16bMask(const void* words_vec, uintptr_t ct_32b, void* dest);\n\nHEADER_INLINE void PackWordsToHalfwordsMask(const uintptr_t* words_vec, uintptr_t word_ct, void* dest) {\n  uintptr_t widx = 0;\n  if (word_ct >= (32 / kBytesPerWord)) {\n    const uintptr_t ct_32b = word_ct / (32 / kBytesPerWord);\n    Pack32bTo16bMask(words_vec, ct_32b, dest);\n    widx = ct_32b * (32 / kBytesPerWord);\n  }\n  unsigned char* dest_uc = S_CAST(unsigned char*, dest);\n  for (; widx != word_ct; ++widx) {\n    const Halfword hw = PackWordToHalfwordMask5555(words_vec[widx]);\n    CopyToUnalignedOffsetHW(dest_uc, &hw, widx);\n  }\n}\n#else\nHEADER_INLINE void PackWordsToHalfwordsMask(const uintptr_t* words_vec, uintptr_t word_ct, void* dest) {\n  unsigned char* dest_uc = S_CAST(unsigned char*, dest);\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    const Halfword hw = PackWordToHalfwordMask5555(words_vec[widx]);\n    CopyToUnalignedOffsetHW(dest_uc, &hw, widx);\n  }\n}\n\n#endif\n\n// ok for ct == 0\nvoid SetAllBits(uintptr_t ct, uintptr_t* bitarr);\n\n// \"Nz\" added to names to make it obvious these require positive len\nvoid FillBitsNz(uintptr_t start_idx, uintptr_t end_idx, uintptr_t* bitarr);\nvoid ClearBitsNz(uintptr_t start_idx, uintptr_t end_idx, uintptr_t* bitarr);\n\nvoid BitvecAnd(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);\n\nvoid BitvecInvmask(const uintptr_t* __restrict exclude_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);\n\nvoid BitvecOr(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec);\n\nvoid BitvecInvert(uintptr_t word_ct, uintptr_t* main_bitvec);\n\nvoid BitvecXorCopy(const uintptr_t* __restrict source1_bitvec, const uintptr_t* __restrict source2_bitvec, uintptr_t word_ct, uintptr_t* target_bitvec);\n\nvoid BitvecInvertCopy(const uintptr_t* __restrict source_bitvec, uintptr_t word_ct, uintptr_t* __restrict target_bitvec);\n\n// These ensure the trailing bits are zeroed out.\n// 'AlignedBitarr' instead of Bitvec since this takes bit_ct instead of word_ct\n// as the size argument, and zeroes trailing bits.\nHEADER_INLINE void AlignedBitarrInvert(uintptr_t bit_ct, uintptr_t* main_bitvec) {\n  const uintptr_t fullword_ct = bit_ct / kBitsPerWord;\n  BitvecInvert(fullword_ct, main_bitvec);\n  const uint32_t trail_ct = bit_ct % kBitsPerWord;\n  if (trail_ct) {\n    main_bitvec[fullword_ct] = bzhi(~main_bitvec[fullword_ct], trail_ct);\n  }\n}\n\nHEADER_INLINE void AlignedBitarrInvertCopy(const uintptr_t* __restrict source_bitvec, uintptr_t bit_ct, uintptr_t* __restrict target_bitvec) {\n  const uintptr_t fullword_ct = bit_ct / kBitsPerWord;\n  BitvecInvertCopy(source_bitvec, fullword_ct, target_bitvec);\n  const uint32_t trail_ct = bit_ct % kBitsPerWord;\n  if (trail_ct) {\n    target_bitvec[fullword_ct] = bzhi(~source_bitvec[fullword_ct], trail_ct);\n  }\n}\n\n// Functions with \"adv\" in the name generally take an index or char-pointer as\n// an argument and return its new value, while \"mov\" functions take a\n// pointer-to-index or pointer-to-char-pointer and move it.\n\n// These return the current index if the corresponding bit satisfies the\n// condition.\nuintptr_t AdvTo1Bit(const uintptr_t* bitarr, uintptr_t loc);\n\nuintptr_t AdvTo0Bit(const uintptr_t* bitarr, uintptr_t loc);\n\n// uintptr_t NextNonmissingUnsafe(const uintptr_t* genoarr, uintptr_t loc);\n\nuint32_t AdvBoundedTo1Bit(const uintptr_t* bitarr, uint32_t loc, uint32_t ceil);\n\nuintptr_t AdvBoundedTo0Bit(const uintptr_t* bitarr, uintptr_t loc, uintptr_t ceil);\n\nuintptr_t FindLast1BitBefore(const uintptr_t* bitarr, uintptr_t loc);\n\n// possible todo: check if movemask-based solution is better in AVX2 case\nHEADER_INLINE uint32_t AllWordsAreZero(const uintptr_t* word_arr, uintptr_t word_ct) {\n  while (word_ct--) {\n    if (*word_arr++) {\n      return 0;\n    }\n  }\n  return 1;\n}\n\nHEADER_INLINE uint32_t AllBitsAreOne(const uintptr_t* bitarr, uintptr_t bit_ct) {\n  const uintptr_t fullword_ct = bit_ct / kBitsPerWord;\n  for (uintptr_t widx = 0; widx != fullword_ct; ++widx) {\n    if (~(bitarr[widx])) {\n      return 0;\n    }\n  }\n  const uint32_t trailing_bit_ct = bit_ct % kBitsPerWord;\n  return (!trailing_bit_ct) || ((~(bitarr[fullword_ct])) << (kBitsPerWord - trailing_bit_ct));\n}\n\nuint32_t AllBytesAreX(const unsigned char* bytes, unsigned char match, uintptr_t byte_ct);\n\n// Updated PopcountWords() code is based on\n// https://github.com/kimwalisch/libpopcnt .  libpopcnt license text follows.\n\n/*\n * libpopcnt.h - C/C++ library for counting the number of 1 bits (bit\n * population count) in an array as quickly as possible using\n * specialized CPU instructions i.e. POPCNT, AVX2, AVX512, NEON.\n *\n * Copyright (c) 2016 - 2017, Kim Walisch\n * Copyright (c) 2016 - 2017, Wojciech Mula\n *\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n * 1. Redistributions of source code must retain the above copyright notice, this\n *    list of conditions and the following disclaimer.\n * 2. Redistributions in binary form must reproduce the above copyright notice,\n *    this list of conditions and the following disclaimer in the documentation\n *    and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifdef USE_AVX2\n// 'Csa' = carry, save, add\n// If bb, cc, and *lp are bitvectors, this returns the carry bitvector and sets\n// *lp to contain the low-order bits of the sums.  I.e. for each position:\n//   if none of bb, cc, and *lp are set, *lp bit is zero and carry bit is zero\n//   if exactly 1 is set, *lp bit becomes one and carry bit is zero\n//   if exactly 2 are set, *lp bit becomes zero and carry bit is one\n//   if all 3 are set, *lp bit becomes one and carry bit is one\nHEADER_INLINE VecW Csa256(VecW bb, VecW cc, VecW* lp) {\n  const VecW aa = *lp;\n  const VecW uu = aa ^ bb;\n  *lp = uu ^ cc;\n  return (aa & bb) | (uu & cc);\n}\n\nHEADER_INLINE VecW CsaOne256(VecW bb, VecW* lp) {\n  const VecW aa = *lp;\n  *lp = aa ^ bb;\n  return aa & bb;\n}\n\nHEADER_INLINE VecW PopcountVecAvx2(VecW vv) {\n  const VecW lookup1 = vecw_setr8(4, 5, 5, 6, 5, 6, 6, 7,\n                                  5, 6, 6, 7, 6, 7, 7, 8);\n  const VecW lookup2 = vecw_setr8(4, 3, 3, 2, 3, 2, 2, 1,\n                                  3, 2, 2, 1, 2, 1, 1, 0);\n\n  const VecW m4 = VCONST_W(kMask0F0F);\n  const VecW lo = vv & m4;\n  const VecW hi = vecw_srli(vv, 4) & m4;\n  const VecW popcnt1 = vecw_shuffle8(lookup1, lo);\n  const VecW popcnt2 = vecw_shuffle8(lookup2, hi);\n  return vecw_sad(popcnt1, popcnt2);\n}\n\nHEADER_INLINE uintptr_t HsumW(VecW vv) {\n  UniVec vu;\n  vu.vw = vv;\n  return vu.w[0] + vu.w[1] + vu.w[2] + vu.w[3];\n  // _mm256_extract_epi64() only worth it if we don't need to extract all the\n  // values.\n  // (also, I wouldn't be surprised if the compiler recognized the pattern\n  // above)\n}\n\n// This no longer has any restrictions on vec_ct, though it isn't worth the\n// overhead for vec_ct < 16.\nuintptr_t PopcountVecsAvx2(const VecW* bit_vvec, uintptr_t vec_ct);\n\nHEADER_INLINE uintptr_t PopcountWords(const uintptr_t* bitvec, uintptr_t word_ct) {\n  // Efficiently popcounts bitvec[0..(word_ct - 1)].  In the 64-bit case,\n  // bitvec[] must be 16-byte aligned.\n  // The PopcountWordsNzbase() wrapper takes care of starting from a later\n  // index.\n  uintptr_t tot = 0;\n  if (word_ct >= 76) {\n    assert(IsVecAligned(bitvec));\n    const uintptr_t remainder = word_ct % kWordsPerVec;\n    const uintptr_t main_block_word_ct = word_ct - remainder;\n    tot = PopcountVecsAvx2(R_CAST(const VecW*, bitvec), main_block_word_ct / kWordsPerVec);\n    bitvec = &(bitvec[main_block_word_ct]);\n    word_ct = remainder;\n  }\n  // note that recent clang versions automatically expand this to a\n  // full-service routine; takes ~50% longer than PopcountVecsAvx2 on >1kb\n  // arrays, but way better than the naive loop\n  for (uintptr_t widx = 0; widx != word_ct; ++widx) {\n    tot += PopcountWord(bitvec[widx]);\n  }\n  return tot;\n}\n#else  // !USE_AVX2\n#  ifdef USE_SSE2\nHEADER_INLINE uintptr_t HsumW(VecW vv) {\n  UniVec vu;\n  vu.vw = vv;\n  return vu.w[0] + vu.w[1];\n}\n#  else\nHEADER_INLINE uintptr_t HsumW(VecW vv) {\n  return vv;\n}\n#  endif\n\n// assumes vec_ct is a multiple of 3\nuintptr_t PopcountVecsNoAvx2(const VecW* bit_vvec, uintptr_t vec_ct);\n\nHEADER_INLINE uintptr_t PopcountWords(const uintptr_t* bitvec, uintptr_t word_ct) {\n  uintptr_t tot = 0;\n#ifndef USE_SSE42\n  if (word_ct >= (3 * kWordsPerVec)) {\n    // This has an asymptotic ~10% advantage in the SSE4.2 case, but word_ct\n    // needs to be in the hundreds before the initial comparison even starts to\n    // pay for itself.\n    assert(IsVecAligned(bitvec));\n    const uintptr_t remainder = word_ct % (3 * kWordsPerVec);\n    const uintptr_t main_block_word_ct = word_ct - remainder;\n    tot = PopcountVecsNoAvx2(R_CAST(const VecW*, bitvec), main_block_word_ct / kWordsPerVec);\n    word_ct = remainder;\n    bitvec = &(bitvec[main_block_word_ct]);\n  }\n#endif\n  for (uintptr_t trailing_word_idx = 0; trailing_word_idx != word_ct; ++trailing_word_idx) {\n    tot += PopcountWord(bitvec[trailing_word_idx]);\n  }\n  return tot;\n}\n#endif  // !USE_AVX2\n\nuintptr_t PopcountWordsIntersect(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, uintptr_t word_ct);\n\nuintptr_t PopcountWordsXor(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, uintptr_t word_ct);\n\n// uintptr_t PopcountWordsIntersect3(const uintptr_t* __restrict bitvec1_iter, const uintptr_t* __restrict bitvec2_iter, const uintptr_t* __restrict bitvec3_iter, uintptr_t word_ct);\n\n// requires positive word_ct\n// stay agnostic a bit longer re: word_ct := DIV_UP(entry_ct, kBitsPerWord)\n// vs. word_ct := 1 + (entry_ct / kBitsPerWord)\n// (this is a source of bugs, though; interface should probably be changed to\n// use entry_ct once multiallelic/dosage implementation is done)\nvoid FillCumulativePopcounts(const uintptr_t* subset_mask, uint32_t word_ct, uint32_t* cumulative_popcounts);\n\nvoid FillCumulativePopcountsW(const uintptr_t* subset_mask, uintptr_t word_ct, uintptr_t* cumulative_popcounts_w);\n\n// If idx_list is a list of valid unfiltered indexes, this converts them\n// in-place to corresponding filtered indexes.\nvoid UidxsToIdxs(const uintptr_t* subset_mask, const uint32_t* subset_cumulative_popcounts, const uintptr_t idx_list_len, uint32_t* idx_list);\n\n// These functions do not overread, but may write extra bytes up to the word\n// boundary.\nvoid Expand1bitTo8(const void* __restrict bytearr, uint32_t input_bit_ct, uint32_t incr, void* __restrict dst);\n\nvoid Expand1bitTo16(const void* __restrict bytearr, uint32_t input_bit_ct, uint32_t incr, void* __restrict dst);\n\n\n// might rename this to IsSet01 (guaranteeing 0/1 return value), and change\n// IsSet() to bitarr[idx / kBitsPerWord] & (k1LU << (idx % kBitsPerWord)) since\n// I'd expect that to play better with out-of-order execution.  but need to\n// benchmark first.\nHEADER_INLINE uintptr_t IsSet(const uintptr_t* bitarr, uintptr_t idx) {\n  return (bitarr[idx / kBitsPerWord] >> (idx % kBitsPerWord)) & 1;\n}\n\nHEADER_INLINE uintptr_t IsSetUnaligned(const void* bitarr, uintptr_t idx) {\n  const unsigned char* bitarr_uc = S_CAST(const unsigned char*, bitarr);\n  return (bitarr_uc[idx / CHAR_BIT] >> (idx % CHAR_BIT)) & 1;\n}\n\nHEADER_INLINE void SetBit(uintptr_t idx, uintptr_t* bitarr) {\n  bitarr[idx / kBitsPerWord] |= k1LU << (idx % kBitsPerWord);\n}\n\nHEADER_INLINE void ClearBit(uintptr_t idx, uintptr_t* bitarr) {\n  bitarr[idx / kBitsPerWord] &= ~(k1LU << (idx % kBitsPerWord));\n}\n\nHEADER_INLINE void AssignBit(uintptr_t idx, uintptr_t newbit, uintptr_t* bitarr) {\n  const uintptr_t inv_mask = k1LU << (idx % kBitsPerWord);\n  uintptr_t* cur_word_ptr = &(bitarr[idx / kBitsPerWord]);\n  *cur_word_ptr = ((*cur_word_ptr) & (~inv_mask)) | (inv_mask * newbit);\n}\n\n/*\nHEADER_INLINE uintptr_t BitInnerIter1(uintptr_t uidx_base, uintptr_t* cur_bitsp, uintptr_t* cur_uidx_stopp) {\n  const uintptr_t cur_bits = *cur_bitsp;\n  const uint32_t uidx_start_lowbits = ctzw(*cur_bitsp);\n  // Key idea is to iterate over sub-blocks of set bits in a single word, in\n  // essentially the same manner as non-AVX2 CopyBitarrSubset() was doing.\n  // This particular expression 'finds' the end of the current sub-block.\n  const uintptr_t cur_bits_lfill_p1 = (cur_bits | (cur_bits - 1)) + 1;\n  *cur_bitsp = cur_bits & cur_bits_lfill_p1;\n  uint32_t uidx_stop_lowbits = kBitsPerWord;\n  if (cur_bits_lfill_p1) {\n    uidx_stop_lowbits = ctzw(cur_bits_lfill_p1);\n  }\n  *cur_uidx_stopp = uidx_base + uidx_stop_lowbits;\n  return uidx_base + uidx_start_lowbits;\n}\n*/\n\nHEADER_INLINE uintptr_t BitIter1(const uintptr_t* __restrict bitarr, uintptr_t* __restrict uidx_basep, uintptr_t* __restrict cur_bitsp) {\n  uintptr_t cur_bits = *cur_bitsp;\n  if (!cur_bits) {\n    uintptr_t widx = (*uidx_basep) / kBitsPerWord;\n    do {\n      cur_bits = bitarr[++widx];\n    } while (!cur_bits);\n    *uidx_basep = widx * kBitsPerWord;\n  }\n  *cur_bitsp = cur_bits & (cur_bits - 1);\n  return (*uidx_basep) + ctzw(cur_bits);\n}\n\n// Returns lowbit index instead of the full index.\nHEADER_INLINE uint32_t BitIter1x(const uintptr_t* __restrict bitarr, uintptr_t* __restrict widxp, uintptr_t* __restrict cur_bitsp) {\n  uintptr_t cur_bits = *cur_bitsp;\n  while (!cur_bits) {\n    cur_bits = bitarr[++(*widxp)];\n  }\n  *cur_bitsp = cur_bits & (cur_bits - 1);\n  return ctzw(cur_bits);\n}\n\n// Returns isolated lowbit.\nHEADER_INLINE uintptr_t BitIter1y(const uintptr_t* __restrict bitarr, uintptr_t* __restrict widxp, uintptr_t* __restrict cur_bitsp) {\n  uintptr_t cur_bits = *cur_bitsp;\n  while (!cur_bits) {\n    cur_bits = bitarr[++(*widxp)];\n  }\n  const uintptr_t shifted_bit = cur_bits & (-cur_bits);\n  *cur_bitsp = cur_bits ^ shifted_bit;\n  return shifted_bit;\n}\n\nHEADER_INLINE void BitIter1Start(const uintptr_t* __restrict bitarr, uintptr_t restart_uidx, uintptr_t* __restrict uidx_basep, uintptr_t* __restrict cur_bitsp) {\n  const uintptr_t widx = restart_uidx / kBitsPerWord;\n  *cur_bitsp = bitarr[widx] & ((~k0LU) << (restart_uidx % kBitsPerWord));\n  *uidx_basep = widx * kBitsPerWord;\n}\n\nHEADER_INLINE uintptr_t BitIter1NoAdv(const uintptr_t* __restrict bitarr, uintptr_t* __restrict uidx_basep, uintptr_t* __restrict cur_bitsp) {\n  uintptr_t cur_bits = *cur_bitsp;\n  if (!cur_bits) {\n    uintptr_t widx = (*uidx_basep) / kBitsPerWord;\n    do {\n      cur_bits = bitarr[++widx];\n    } while (!cur_bits);\n    *uidx_basep = widx * kBitsPerWord;\n    *cur_bitsp = cur_bits;\n  }\n  return (*uidx_basep) + ctzw(cur_bits);\n}\n\nHEADER_INLINE uintptr_t BitIter0(const uintptr_t* __restrict bitarr, uintptr_t* __restrict uidx_basep, uintptr_t* __restrict cur_inv_bitsp) {\n  uintptr_t cur_inv_bits = *cur_inv_bitsp;\n  if (!cur_inv_bits) {\n    uintptr_t widx = (*uidx_basep) / kBitsPerWord;\n    do {\n      cur_inv_bits = ~bitarr[++widx];\n    } while (!cur_inv_bits);\n    *uidx_basep = widx * kBitsPerWord;\n  }\n  *cur_inv_bitsp = cur_inv_bits & (cur_inv_bits - 1);\n  return (*uidx_basep) + ctzw(cur_inv_bits);\n}\n\nHEADER_INLINE void BitIter0Start(const uintptr_t* __restrict bitarr, uintptr_t restart_uidx, uintptr_t* __restrict uidx_basep, uintptr_t* __restrict cur_inv_bitsp) {\n  const uintptr_t widx = restart_uidx / kBitsPerWord;\n  *cur_inv_bitsp = (~bitarr[widx]) & ((~k0LU) << (restart_uidx % kBitsPerWord));\n  *uidx_basep = widx * kBitsPerWord;\n}\n\nHEADER_INLINE uintptr_t BitIter0NoAdv(const uintptr_t* __restrict bitarr, uintptr_t* __restrict uidx_basep, uintptr_t* __restrict cur_inv_bitsp) {\n  uintptr_t cur_inv_bits = *cur_inv_bitsp;\n  if (!cur_inv_bits) {\n    uintptr_t widx = (*uidx_basep) / kBitsPerWord;\n    do {\n      cur_inv_bits = ~bitarr[++widx];\n    } while (!cur_inv_bits);\n    *uidx_basep = widx * kBitsPerWord;\n    *cur_inv_bitsp = cur_inv_bits;\n  }\n  return (*uidx_basep) + ctzw(cur_inv_bits);\n}\n\n// todo: test this against extracting a nonmissing bitarr first\n/*\nHEADER_INLINE void NextNonmissingUnsafeCk32(const uintptr_t* __restrict genoarr, uint32_t* __restrict loc_ptr) {\n  if (GetNyparrEntry(genoarr, *loc_ptr) == 3) {\n    *loc_ptr = NextNonmissingUnsafe(genoarr, *loc_ptr);\n  }\n}\n*/\n\n// Equivalent to PopcountBitRange(subset_mask, 0, raw_idx).\nHEADER_INLINE uint32_t RawToSubsettedPos(const uintptr_t* subset_mask, const uint32_t* subset_cumulative_popcounts, uintptr_t raw_idx) {\n  // this should be much better than keeping a uidx_to_idx array!\n  // (update: there are more compact indexes, but postpone for now, this is\n  // is nice and simple and gets us most of what we need.)\n  const uintptr_t raw_widx = raw_idx / kBitsPerWord;\n  return subset_cumulative_popcounts[raw_widx] + PopcountWord(bzhi(subset_mask[raw_widx], raw_idx % kBitsPerWord));\n}\n\nHEADER_INLINE uintptr_t RawToSubsettedPosW(const uintptr_t* subset_mask, const uintptr_t* subset_cumulative_popcounts, uintptr_t raw_idx) {\n  const uintptr_t raw_widx = raw_idx / kBitsPerWord;\n  return subset_cumulative_popcounts[raw_widx] + PopcountWord(bzhi(subset_mask[raw_widx], raw_idx % kBitsPerWord));\n}\n\nHEADER_INLINE void ZeroTrailingBits(uintptr_t bit_ct, uintptr_t* bitarr) {\n  const uint32_t trail_ct = bit_ct % kBitsPerWord;\n  if (trail_ct) {\n    bitarr[bit_ct / kBitsPerWord] = bzhi(bitarr[bit_ct / kBitsPerWord], trail_ct);\n  }\n}\n\n#ifdef USE_SSE2\nHEADER_INLINE void ZeroTrailingWords(uint32_t word_ct, uintptr_t* bitvec) {\n  const uint32_t remainder = word_ct % kWordsPerVec;\n  if (remainder) {\n    ZeroWArr(kWordsPerVec - remainder, &(bitvec[word_ct]));\n  }\n}\n#else\nHEADER_INLINE void ZeroTrailingWords(__maybe_unused uint32_t word_ct, __maybe_unused uintptr_t* bitvec) {\n}\n#endif\n\nHEADER_INLINE void CopyBitarr(const uintptr_t* __restrict src, uintptr_t bit_ct, uintptr_t* __restrict dst) {\n  memcpy(dst, src, BitCtToWordCt(bit_ct) * kBytesPerWord);\n}\n\n// output_bit_idx_end is practically always subset_size\n// if not, it currently must correspond to PopcountWords(subset_mask, word_ct)\n// for some word_ct\nvoid CopyBitarrSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, uintptr_t* __restrict output_bitarr);\n\n#ifndef NO_UNALIGNED\nHEADER_INLINE void CopyBitarrSubsetToUnaligned(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, void* __restrict output_bitarr) {\n  CopyBitarrSubset(raw_bitarr, subset_mask, output_bit_idx_end, S_CAST(uintptr_t*, output_bitarr));\n}\n#else\nvoid CopyBitarrSubsetToUnaligned(const uintptr_t* __restrict raw_bitarr, const uintptr_t* __restrict subset_mask, uint32_t output_bit_idx_end, void* __restrict output_bitarr);\n#endif\n\n// expand_size + read_start_bit must be positive.\nvoid ExpandBytearr(const void* __restrict compact_bitarr, const uintptr_t* __restrict expand_mask, uint32_t word_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target);\n\n// equivalent to calling ExpandBytearr() followed by CopyBitarrSubset()\nvoid ExpandThenSubsetBytearr(const void* __restrict compact_bitarr, const uintptr_t* __restrict expand_mask, const uintptr_t* __restrict subset_mask, uint32_t expand_size, uint32_t subset_size, uint32_t read_start_bit, uintptr_t* __restrict target);\n\n// mid_popcount must be positive\nvoid ExpandBytearrNested(const void* __restrict compact_bitarr, const uintptr_t* __restrict mid_bitarr, const uintptr_t* __restrict top_expand_mask, uint32_t word_ct, uint32_t mid_popcount, uint32_t mid_start_bit, uintptr_t* __restrict mid_target, uintptr_t* __restrict compact_target);\n\n// mid_popcount must be positive\n// if mid_start_bit == 1, mid_popcount should not include that bit\nvoid ExpandThenSubsetBytearrNested(const void* __restrict compact_bitarr, const uintptr_t* __restrict mid_bitarr, const uintptr_t* __restrict top_expand_mask, const uintptr_t* __restrict subset_mask, uint32_t subset_size, uint32_t mid_popcount, uint32_t mid_start_bit, uintptr_t* __restrict mid_target, uintptr_t* __restrict compact_target);\n\n// these don't read past the end of bitarr\nuintptr_t PopcountBytes(const void* bitarr, uintptr_t byte_ct);\nuintptr_t PopcountBytesMasked(const void* bitarr, const uintptr_t* mask_arr, uintptr_t byte_ct);\n\n\n// TransposeNypblock(), which is more plink-specific, is in pgenlib_misc\nCONSTI32(kPglBitTransposeBatch, kBitsPerCacheline);\nCONSTI32(kPglBitTransposeWords, kWordsPerCacheline);\n// * Up to 512x512 (CACHELINE64) or 1024x1024 (CACHELINE128)\n// * vecaligned_buf must have size 64k (CACHELINE64) or 256k (CACHELINE128)\n// * write_iter must be allocated up to at least\n//   RoundUpPow2(write_batch_size, 2) rows\n// * We use pointers with different types to read from and write to buf0/buf1,\n//   so defining the base type as unsigned char* is theoretically necessary to\n//   avoid breaking strict-aliasing rules, while the restrict qualifiers should\n//   tell the compiler it doesn't need to be paranoid about writes to one of\n//   the buffers screwing with reads from the other.\nCONSTI32(kPglBitTransposeBufbytes, (kPglBitTransposeBatch * kPglBitTransposeBatch) / (CHAR_BIT / 2));\n#ifdef __LP64__\nvoid TransposeBitblock64(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_row_ct, uint32_t write_row_ct, uintptr_t* write_iter, VecW* __restrict buf0, VecW* __restrict buf1);\n\nHEADER_INLINE void TransposeBitblock(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* write_iter, VecW* vecaligned_buf) {\n  TransposeBitblock64(read_iter, read_ul_stride, write_ul_stride, read_batch_size, write_batch_size, write_iter, vecaligned_buf, &(vecaligned_buf[kPglBitTransposeBufbytes / (2 * kBytesPerVec)]));\n}\n\n#else  // !__LP64__\nvoid TransposeBitblock32(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* write_iter, VecW* __restrict buf0, VecW* __restrict buf1);\n\n// If this ever needs to be called on an input byte array, read_iter could be\n// changed to const void*; in that case, read_ul_stride should be changed to a\n// byte count.\nHEADER_INLINE void TransposeBitblock(const uintptr_t* read_iter, uintptr_t read_ul_stride, uintptr_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* write_iter, VecW* vecaligned_buf) {\n  TransposeBitblock32(read_iter, read_ul_stride, write_ul_stride, read_batch_size, write_batch_size, write_iter, vecaligned_buf, &(vecaligned_buf[kPglBitTransposeBufbytes / (2 * kBytesPerVec)]));\n}\n#endif\n\nCONSTI32(kPglBitTransposeBufwords, kPglBitTransposeBufbytes / kBytesPerWord);\nCONSTI32(kPglBitTransposeBufvecs, kPglBitTransposeBufbytes / kBytesPerVec);\n\nCONSTI32(kPglNybbleTransposeBatch, kNybblesPerCacheline);\nCONSTI32(kPglNybbleTransposeWords, kWordsPerCacheline);\n\nCONSTI32(kPglNybbleTransposeBufbytes, (kPglNybbleTransposeBatch * kPglNybbleTransposeBatch) / 2);\n\n// * Up to 128x128 (CACHELINE64) or 256x256 (CACHELINE128)\n// * vecaligned_buf must have size 8k (CACHELINE64) or 32k (CACHELINE128)\n// * Now ok for write_iter to not be padded when write_batch_size odd\nvoid TransposeNybbleblock(const uintptr_t* read_iter, uint32_t read_ul_stride, uint32_t write_ul_stride, uint32_t read_batch_size, uint32_t write_batch_size, uintptr_t* __restrict write_iter, VecW* vecaligned_buf);\n\n#ifdef USE_SSE2\n#  ifdef USE_AVX2\nextern const unsigned char kLeadMask[2 * kBytesPerVec] __attribute__ ((aligned (64)));\n#  else\nextern const unsigned char kLeadMask[2 * kBytesPerVec] __attribute__ ((aligned (32)));\n#  endif\n#endif\n\nuintptr_t BytesumArr(const void* bytearr, uintptr_t byte_ct);\n\nuintptr_t CountByte(const void* bytearr, unsigned char ucc, uintptr_t byte_ct);\n\nuintptr_t CountU16(const void* u16arr, uint16_t usii, uintptr_t u16_ct);\n\n\n// Applies sample_include to {src_subset, src_vals}.\nuint32_t Copy1bit8Subset(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uintptr_t* __restrict sample_include, uint32_t src_subset_size, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals);\n\nuint32_t Copy1bit16Subset(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uintptr_t* __restrict sample_include, uint32_t src_subset_size, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals);\n\n// more verbose than (val + 3) / 4, but may as well make semantic meaning\n// obvious; any explicit DivUp(val, 4) expressions should have a different\n// meaning\n// (not needed for bitct -> bytect, DivUp(val, CHAR_BIT) is clear enough)\nHEADER_CINLINE uintptr_t NypCtToByteCt(uintptr_t val) {\n  return DivUp(val, 4);\n}\n\nHEADER_CINLINE uintptr_t NypCtToVecCt(uintptr_t val) {\n  return DivUp(val, kNypsPerVec);\n}\n\nHEADER_CINLINE uintptr_t NypCtToWordCt(uintptr_t val) {\n  return DivUp(val, kBitsPerWordD2);\n}\n\nHEADER_CINLINE uintptr_t NypCtToAlignedWordCt(uintptr_t val) {\n  return kWordsPerVec * NypCtToVecCt(val);\n}\n\nHEADER_CINLINE uintptr_t NypCtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kNypsPerCacheline);\n}\n\nHEADER_INLINE uintptr_t GetNyparrEntry(const uintptr_t* nyparr, uint32_t idx) {\n  return (nyparr[idx / kBitsPerWordD2] >> (2 * (idx % kBitsPerWordD2))) & 3;\n}\n\n// todo: check if this optimizes newval=0 out\nHEADER_INLINE void AssignNyparrEntry(uint32_t idx, uintptr_t newval, uintptr_t* nyparr) {\n  const uint32_t bit_shift_ct = 2 * (idx % kBitsPerWordD2);\n  uintptr_t* wordp = &(nyparr[idx / kBitsPerWordD2]);\n  *wordp = ((*wordp) & (~((3 * k1LU) << bit_shift_ct))) | (newval << bit_shift_ct);\n}\n\nHEADER_INLINE void ClearNyparrEntry(uint32_t idx, uintptr_t* nyparr) {\n  nyparr[idx / kBitsPerWordD2] &= ~((3 * k1LU) << (idx % kBitsPerWordD2));\n}\n\nHEADER_CINLINE uintptr_t NybbleCtToVecCt(uintptr_t val) {\n  return DivUp(val, kNybblesPerVec);\n}\n\nHEADER_CINLINE uintptr_t NybbleCtToAlignedWordCt(uintptr_t val) {\n  return kWordsPerVec * NybbleCtToVecCt(val);\n}\n\nHEADER_CINLINE uintptr_t NybbleCtToCachelineCt(uintptr_t val) {\n  return DivUp(val, kNybblesPerCacheline);\n}\n\nHEADER_INLINE void AssignNybblearrEntry(uint32_t idx, uintptr_t newval, uintptr_t* nybblearr) {\n  const uint32_t bit_shift_ct = 4 * (idx % kBitsPerWordD4);\n  uintptr_t* wordp = &(nybblearr[idx / kBitsPerWordD4]);\n  *wordp = ((*wordp) & (~((15 * k1LU) << bit_shift_ct))) | (newval << bit_shift_ct);\n}\n\n// 'Unsafe' because it assumes high bits of every byte are 0 and entry_ct is\n// positive.\nvoid Reduce8to4bitInplaceUnsafe(uintptr_t entry_ct, uintptr_t* mainvec);\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n\n#endif  // __PLINK2_BITS_H__\n"
  },
  {
    "path": "external_libs/pgenlib/pgenlib_ffi_support.cpp",
    "content": "// This library is part of PLINK 2.0, copyright (C) 2005-2022 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n#include \"pgenlib_ffi_support.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\nvoid GenoarrToBytesMinus9(const uintptr_t* genoarr, uint32_t sample_ct, int8_t* genobytes) {\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBytesPerWord;\n  const Quarterword* read_alias = R_CAST(const Quarterword*, genoarr);\n  unsigned char* genobytes_uc = DowncastToUc(genobytes);\n  for (uint32_t widx = 0; ; ++widx) {\n    uintptr_t qw = Unpack0303(read_alias[widx]);\n    // now each byte is in {0, 1, 2, 3}.  Convert the 3s to -9s in a branchless\n    // manner.\n    // (-9) - 3 = -12, which is represented as 244 in a uint8_t\n    const uintptr_t geno_missing = qw & (qw >> 1) & kMask0101;\n    qw += geno_missing * 244;\n    if (widx == word_ct_m1) {\n      SubwordStore(qw, ModNz(sample_ct, kBytesPerWord), &(genobytes_uc[widx * kBytesPerWord]));\n      return;\n    }\n    CopyToUnalignedOffsetW(genobytes_uc, &qw, widx);\n  }\n}\n\nstatic const int32_t kGenoInt32Quads[1024] ALIGNV16 = QUAD_TABLE256(0, 1, 2, -9);\n\nvoid GenoarrToInt32sMinus9(const uintptr_t* genoarr, uint32_t sample_ct, int32_t* geno_int32) {\n  GenoarrLookup256x4bx4(genoarr, kGenoInt32Quads, sample_ct, geno_int32);\n}\n\n// todo: use GenoarrLookup16x8bx2()\nstatic const int64_t kGenoToInt64[4] = {0, 1, 2, -9};\n\nvoid GenoarrToInt64sMinus9(const uintptr_t* genoarr, uint32_t sample_ct, int64_t* geno_int64) {\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  int64_t* write_iter = geno_int64;\n  uint32_t subgroup_len = kBitsPerWordD2;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        return;\n      }\n      subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    uintptr_t geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n      *write_iter++ = kGenoToInt64[geno_word & 3];\n      geno_word >>= 2;\n    }\n  }\n}\n\n// missing = -9\nconst double kGenoDoublePairs[32] ALIGNV16 = PAIR_TABLE16(0.0, 1.0, 2.0, -9.0);\n\nconst uint64_t kGenoToIntcodeDPairs[32] ALIGNV16 = PAIR_TABLE16(0, 0x100000000LLU, 0x100000001LLU, 0xfffffff7fffffff7LLU);\n\nvoid GenoarrPhasedToAlleleCodes(const uint64_t* genoarr_to_intcode_dpair_table, const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, uint32_t sample_ct, uint32_t phasepresent_ct, unsigned char* phasebytes, int32_t* allele_codes) {\n  // phasebytes can be nullptr, phasepresent cannot\n  GenoarrToAlleleCodes(genoarr_to_intcode_dpair_table, genoarr, sample_ct, allele_codes);\n  // should be safe to assume allele_codes are 8-byte aligned?\n  uint64_t* allele_codes_alias64 = R_CAST(uint64_t*, allele_codes);\n  uintptr_t sample_uidx_base = 0;\n  uintptr_t cur_bits = phasepresent[0];\n  if (!phasebytes) {\n    for (uint32_t phased_idx = 0; phased_idx != phasepresent_ct; ++phased_idx) {\n      const uintptr_t sample_uidx = BitIter1(phasepresent, &sample_uidx_base, &cur_bits);\n      if (IsSet(phaseinfo, sample_uidx)) {\n        // 1|0\n        allele_codes_alias64[sample_uidx] = 1;\n      }\n    }\n    return;\n  }\n  // 0 and 2 = homozygous, automatically phased; otherwise patch in from\n  // phaseinfo if phasepresent_ct is nonzero\n  // so, start off by extracting low bit from each pair and flipping it\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBytesPerWord;\n  const Quarterword* read_alias = R_CAST(const Quarterword*, genoarr);\n  uintptr_t* write_walias = R_CAST(uintptr_t*, phasebytes);\n  for (uint32_t widx = 0; ; ++widx) {\n    uintptr_t qw = Unpack0303(read_alias[widx]);\n    qw = (~qw) & kMask0101;\n    if (widx == word_ct_m1) {\n      SubwordStore(qw, ModNz(sample_ct, kBytesPerWord), &(write_walias[widx]));\n      break;\n    }\n    write_walias[widx] = qw;\n  }\n  for (uint32_t phased_idx = 0; phased_idx != phasepresent_ct; ++phased_idx) {\n    const uintptr_t sample_uidx = BitIter1(phasepresent, &sample_uidx_base, &cur_bits);\n    phasebytes[sample_uidx] = 1;\n    if (IsSet(phaseinfo, sample_uidx)) {\n      allele_codes_alias64[sample_uidx] = 1;\n    }\n  }\n}\n\nvoid GenoarrMPToAlleleCodes(const uint64_t* geno_to_intcode_dpair_table, const PgenVariant* pgv, uint32_t sample_ct, unsigned char* phasebytes, int32_t* allele_codes) {\n  // phasebytes can be nullptr, phasepresent cannot\n  const uintptr_t* genoarr = pgv->genovec;\n  const uintptr_t* phasepresent = pgv->phasepresent;\n  const uintptr_t* phaseinfo = pgv->phaseinfo;\n  const uint32_t phasepresent_ct = pgv->phasepresent_ct;\n  const uint32_t patch_01_ct = pgv->patch_01_ct;\n  const uint32_t patch_10_ct = pgv->patch_10_ct;\n  if ((!patch_01_ct) && (!patch_10_ct)) {\n    GenoarrPhasedToAlleleCodes(geno_to_intcode_dpair_table, genoarr, phasepresent, phaseinfo, sample_ct, phasepresent_ct, phasebytes, allele_codes);\n    return;\n  }\n  GenoarrToAlleleCodes(geno_to_intcode_dpair_table, genoarr, sample_ct, allele_codes);\n  // See e.g. PglMultiallelicSparseToDense().\n  if (patch_01_ct) {\n    const uintptr_t* patch_01_set = pgv->patch_01_set;\n    const AlleleCode* patch_01_vals = pgv->patch_01_vals;\n    uintptr_t sample_idx_base = 0;\n    uintptr_t cur_bits = patch_01_set[0];\n    int32_t* allele_codes1 = &(allele_codes[1]);\n    for (uint32_t uii = 0; uii != patch_01_ct; ++uii) {\n      const uintptr_t sample_idx = BitIter1(patch_01_set, &sample_idx_base, &cur_bits);\n      allele_codes1[2 * sample_idx] = patch_01_vals[uii];\n    }\n  }\n  if (phasebytes) {\n    // Initialize 0/2 phasebytes before processing patch_10.\n    const uint32_t word_ct_m1 = (sample_ct - 1) / kBytesPerWord;\n    const Quarterword* read_alias = R_CAST(const Quarterword*, genoarr);\n    for (uint32_t widx = 0; ; ++widx) {\n      uintptr_t qw = Unpack0303(read_alias[widx]);\n      qw = (~qw) & kMask0101;\n      if (widx == word_ct_m1) {\n        SubwordStore(qw, ModNz(sample_ct, kBytesPerWord), &(phasebytes[widx * kBytesPerWord]));\n        break;\n      }\n      CopyToUnalignedOffsetW(phasebytes, &qw, widx);\n    }\n  }\n  if (patch_10_ct) {\n    const uintptr_t* patch_10_set = pgv->patch_10_set;\n    const AlleleCode* patch_10_vals = pgv->patch_10_vals;\n    uintptr_t sample_idx_base = 0;\n    uintptr_t cur_bits = patch_10_set[0];\n    if (!phasebytes) {\n      for (uint32_t uii = 0; uii != patch_10_ct; ++uii) {\n        const uintptr_t sample_idx = BitIter1(patch_10_set, &sample_idx_base, &cur_bits);\n        allele_codes[2 * sample_idx] = patch_10_vals[2 * uii];\n        allele_codes[2 * sample_idx + 1] = patch_10_vals[2 * uii + 1];\n      }\n    } else {\n      for (uint32_t uii = 0; uii != patch_10_ct; ++uii) {\n        const uintptr_t sample_idx = BitIter1(patch_10_set, &sample_idx_base, &cur_bits);\n        const AlleleCode ac0 = patch_10_vals[2 * uii];\n        const AlleleCode ac1 = patch_10_vals[2 * uii + 1];\n        allele_codes[2 * sample_idx] = ac0;\n        allele_codes[2 * sample_idx + 1] = ac1;\n        if (ac0 != ac1) {\n          phasebytes[sample_idx] = 0;\n          // When phasepresent bit is set, we'll fix this up later.\n        }\n      }\n    }\n  }\n  uintptr_t sample_uidx_base = 0;\n  uintptr_t cur_bits = phasepresent[0];\n  if (!phasebytes) {\n    for (uint32_t phased_idx = 0; phased_idx != phasepresent_ct; ++phased_idx) {\n      const uintptr_t sample_uidx = BitIter1(phasepresent, &sample_uidx_base, &cur_bits);\n      if (IsSet(phaseinfo, sample_uidx)) {\n        const int32_t tmp_code = allele_codes[2 * sample_uidx];\n        allele_codes[2 * sample_uidx] = allele_codes[2 * sample_uidx + 1];\n        allele_codes[2 * sample_uidx + 1] = tmp_code;\n      }\n    }\n    return;\n  }\n  for (uint32_t phased_idx = 0; phased_idx != phasepresent_ct; ++phased_idx) {\n    const uintptr_t sample_uidx = BitIter1(phasepresent, &sample_uidx_base, &cur_bits);\n    phasebytes[sample_uidx] = 1;\n    if (IsSet(phaseinfo, sample_uidx)) {\n      const int32_t tmp_code = allele_codes[2 * sample_uidx];\n      allele_codes[2 * sample_uidx] = allele_codes[2 * sample_uidx + 1];\n      allele_codes[2 * sample_uidx + 1] = tmp_code;\n    }\n  }\n}\n\n// missing = -9\n// may want a double-lookup function for this\nstatic const int32_t kGenoToHap0Code[6] = {0, 0, 1, -9, 0, 1};\nstatic const int32_t kGenoToHap1Code[6] = {0, 1, 1, -9, 0, 0};\n\n// todo: write version of this which fills phasebytes\nvoid GenoarrPhasedToHapCodes(const uintptr_t* genoarr, const uintptr_t* phaseinfo, uint32_t variant_batch_size, int32_t* hap0_codes_iter, int32_t* hap1_codes_iter) {\n  // assumes genoarr and phaseinfo have already been transposed\n  const uint32_t word_ct_m1 = (variant_batch_size - 1) / kBitsPerWordD2;\n  const Halfword* phaseinfo_alias = R_CAST(const Halfword*, phaseinfo);\n  uint32_t subgroup_len = kBitsPerWordD2;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        return;\n      }\n      subgroup_len = ModNz(variant_batch_size, kBitsPerWordD2);\n    }\n    uintptr_t geno_word = genoarr[widx];\n    uintptr_t phaseinfo_hw = phaseinfo_alias[widx];\n    for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n      const uintptr_t cur_pgeno_code = (geno_word & 3) + 4 * (phaseinfo_hw & 1);\n      *hap0_codes_iter++ = kGenoToHap0Code[cur_pgeno_code];\n      *hap1_codes_iter++ = kGenoToHap1Code[cur_pgeno_code];\n      geno_word >>= 2;\n      phaseinfo_hw >>= 1;\n    }\n  }\n}\n\n// todo: use GenoarrLookup256x4bx4()\nstatic const float kGenoToFloat[4] = {0.0, 1.0, 2.0, -9.0};\n\nvoid Dosage16ToFloatsMinus9(const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, float* geno_float) {\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  float* write_iter = geno_float;\n  uint32_t subgroup_len = kBitsPerWordD2;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        break;\n      }\n      subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    uintptr_t geno_word = genoarr[widx];\n    for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n      *write_iter++ = kGenoToFloat[geno_word & 3];\n      geno_word >>= 2;\n    }\n  }\n  if (dosage_ct) {\n    const uint16_t* dosage_main_iter = dosage_main;\n    uintptr_t sample_uidx_base = 0;\n    uintptr_t cur_bits = dosage_present[0];\n    for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n      const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);\n      // multiply by 2^{-14}\n      geno_float[sample_uidx] = S_CAST(float, *dosage_main_iter++) * S_CAST(float, 0.00006103515625);\n    }\n  }\n}\n\nvoid Dosage16ToDoubles(const double* geno_double_pair_table, const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, double* geno_double) {\n  GenoarrLookup16x8bx2(genoarr, geno_double_pair_table, sample_ct, geno_double);\n  if (dosage_ct) {\n    const uint16_t* dosage_main_iter = dosage_main;\n    uintptr_t sample_uidx_base = 0;\n    uintptr_t cur_bits = dosage_present[0];\n    for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n      const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);\n      geno_double[sample_uidx] = S_CAST(double, *dosage_main_iter++) * 0.00006103515625;\n    }\n  }\n}\n\nBoolErr Dosage16ToDoublesMeanimpute(const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, double* geno_double) {\n  STD_ARRAY_DECL(uint32_t, 4, genocounts);\n  double geno_double_pair_buf[32];\n  if (!dosage_ct) {\n    GenoarrCountFreqsUnsafe(genoarr, sample_ct, genocounts);\n    const double* geno_double_pair_table = kGenoDoublePairs;\n    if (genocounts[3]) {\n      const uint32_t denom = sample_ct - genocounts[3];\n      if (!denom) {\n        return 1;\n      }\n      const uint32_t numer = genocounts[1] + 2 * genocounts[2];\n      const double missing_val = u63tod(numer) / u31tod(denom);\n      geno_double_pair_buf[0] = 0.0;\n      geno_double_pair_buf[2] = 1.0;\n      geno_double_pair_buf[4] = 2.0;\n      geno_double_pair_buf[6] = missing_val;\n      InitLookup16x8bx2(geno_double_pair_buf);\n      geno_double_pair_table = geno_double_pair_buf;\n    }\n    GenoarrLookup16x8bx2(genoarr, geno_double_pair_table, sample_ct, geno_double);\n    return 0;\n  }\n  // In the generic case, it may be faster to check for the existence of a\n  // missing value before calling GenoarrCountInvsubsetFreqs2() (since if there\n  // are no missing values, we don't need to count at all).  However, we assume\n  // the caller is using this function over Dosage16ToDoubles() for a reason.\n  GenoarrCountInvsubsetFreqs2(genoarr, dosage_present, sample_ct, sample_ct - dosage_ct, genocounts);\n  const double* geno_double_pair_table = kGenoDoublePairs;\n  if (genocounts[3]) {\n    uint64_t denom = sample_ct - genocounts[3];\n    if (!denom) {\n      return 1;\n    }\n    denom *= 16384LLU;\n    uint64_t numer = 0;\n    for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n      numer += dosage_main[dosage_idx];\n    }\n    numer += 16384LLU * (genocounts[1] + 2 * genocounts[2]);\n    const double missing_val = u63tod(numer) / u63tod(denom);\n    geno_double_pair_buf[0] = 0.0;\n    geno_double_pair_buf[2] = 1.0;\n    geno_double_pair_buf[4] = 2.0;\n    geno_double_pair_buf[6] = missing_val;\n    InitLookup16x8bx2(geno_double_pair_buf);\n    geno_double_pair_table = geno_double_pair_buf;\n  }\n  GenoarrLookup16x8bx2(genoarr, geno_double_pair_table, sample_ct, geno_double);\n  uintptr_t sample_uidx_base = 0;\n  uintptr_t cur_bits = dosage_present[0];\n  for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n    const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);\n    geno_double[sample_uidx] = S_CAST(double, dosage_main[dosage_idx]) * 0.00006103515625;\n  }\n  return 0;\n}\n\ndouble LinearCombinationMeanimpute(const double* weights, const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct) {\n  const uint32_t word_ct = DivUp(sample_ct, kBitsPerWordD2);\n  double result = 0.0;\n  double result2 = 0.0;\n  double miss_weight = 0.0;\n  if (!dosage_ct) {\n    for (uint32_t widx = 0; widx != word_ct; ++widx) {\n      const uintptr_t geno_word = genoarr[widx];\n      if (!geno_word) {\n        continue;\n      }\n      const double* cur_weights = &(weights[widx * kBitsPerWordD2]);\n      uintptr_t geno_word1 = geno_word & kMask5555;\n      uintptr_t geno_word2 = (geno_word >> 1) & kMask5555;\n      uintptr_t geno_missing_word = geno_word1 & geno_word2;\n      geno_word1 ^= geno_missing_word;\n      while (geno_word1) {\n        const uint32_t sample_idx_lowbits = ctzw(geno_word1) / 2;\n        result += cur_weights[sample_idx_lowbits];\n        geno_word1 &= geno_word1 - 1;\n      }\n      geno_word2 ^= geno_missing_word;\n      while (geno_word2) {\n        const uint32_t sample_idx_lowbits = ctzw(geno_word2) / 2;\n        result2 += cur_weights[sample_idx_lowbits];\n        geno_word2 &= geno_word2 - 1;\n      }\n      while (geno_missing_word) {\n        const uint32_t sample_idx_lowbits = ctzw(geno_missing_word) / 2;\n        miss_weight += cur_weights[sample_idx_lowbits];\n        geno_missing_word &= geno_missing_word - 1;\n      }\n    }\n    result += 2 * result2;\n    if (miss_weight != 0.0) {\n      // bugfix (29 Oct 2019): previous mean-imputation formula was based on\n      // *weighted* MAF, which was obviously nonsense when negative weights\n      // were present.\n      STD_ARRAY_DECL(uint32_t, 4, genocounts);\n      GenoarrCountFreqsUnsafe(genoarr, sample_ct, genocounts);\n      const double numer = u63tod(genocounts[1] + 2 * genocounts[2]);\n      const double denom = u31tod(sample_ct - genocounts[3]);\n      result += miss_weight * (numer / denom);\n    }\n    return result;\n  }\n  const Halfword* dosage_present_hws = R_CAST(const Halfword*, dosage_present);\n  uint32_t onealt_ct = 0;\n  uint32_t twoalt_ct = 0;\n  uint32_t missing_ct = 0;\n  for (uint32_t widx = 0; widx != word_ct; ++widx) {\n    const uintptr_t geno_word = genoarr[widx];\n    if (geno_word) {\n      const double* cur_weights = &(weights[widx * kBitsPerWordD2]);\n      uintptr_t geno_word1 = geno_word & kMask5555;\n      uintptr_t geno_word2 = (geno_word >> 1) & kMask5555;\n      uintptr_t geno_missing_word = geno_word1 & geno_word2;\n      const uintptr_t mask_word = ~(geno_missing_word | UnpackHalfwordToWord(dosage_present_hws[widx]));\n      geno_word1 &= mask_word;\n      while (geno_word1) {\n        const uint32_t sample_idx_lowbits = ctzw(geno_word1) / 2;\n        result += cur_weights[sample_idx_lowbits];\n        // probably sparse enough that this is faster than popcount?\n        ++onealt_ct;\n        geno_word1 &= geno_word1 - 1;\n      }\n      geno_word2 &= mask_word;\n      while (geno_word2) {\n        const uint32_t sample_idx_lowbits = ctzw(geno_word2) / 2;\n        result2 += cur_weights[sample_idx_lowbits];\n        ++twoalt_ct;\n        geno_word2 &= geno_word2 - 1;\n      }\n      while (geno_missing_word) {\n        const uint32_t sample_idx_lowbits = ctzw(geno_missing_word) / 2;\n        miss_weight += cur_weights[sample_idx_lowbits];\n        ++missing_ct;\n        geno_missing_word &= geno_missing_word - 1;\n      }\n    }\n  }\n  result += result2 * 2;\n  const uint16_t* dosage_main_iter = dosage_main;\n  double resultx = 0.0;\n  uintptr_t sample_uidx_base = 0;\n  uintptr_t cur_bits = dosage_present[0];\n  if (miss_weight == 0.0) {\n    for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n      const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);\n      resultx += S_CAST(double, *dosage_main_iter++) * weights[sample_uidx];\n    }\n    result += 0.00006103515625 * resultx;\n    return result;\n  }\n  // Also need to track dosage-sum for mean-imputation.\n  uint64_t dosage_sum = 0;\n  for (uint32_t dosage_idx = 0; dosage_idx != dosage_ct; ++dosage_idx) {\n    const uintptr_t sample_uidx = BitIter1(dosage_present, &sample_uidx_base, &cur_bits);\n    const uint32_t cur_dosage = *dosage_main_iter++;\n    dosage_sum += cur_dosage;\n    resultx += u31tod(cur_dosage) * weights[sample_uidx];\n  }\n  result += 0.00006103515625 * resultx;\n  const double numer = u63tod(16384 * S_CAST(uint64_t, onealt_ct + 2 * twoalt_ct) + dosage_sum);\n  const double denom = 16384 * u31tod(sample_ct - missing_ct);\n  result += miss_weight * (numer / denom);\n  return result;\n}\n\nvoid BytesToBitsUnsafe(const uint8_t* boolbytes, uint32_t sample_ct, uintptr_t* bitarr) {\n  const uint32_t ull_ct_m1 = (sample_ct - 1) / 8;\n  const unsigned char* boolbytes_uc = DowncastKToUc(boolbytes);\n  unsigned char* write_alias = DowncastToUc(bitarr);\n  for (uint32_t ullidx = 0; ; ++ullidx) {\n    uint64_t cur_ull;\n    if (ullidx >= ull_ct_m1) {\n      if (ullidx > ull_ct_m1) {\n        return;\n      }\n      cur_ull = SubU64Load(&(boolbytes_uc[ullidx * sizeof(int64_t)]), ModNz(sample_ct, 8));\n    } else {\n      CopyFromUnalignedOffsetU64(&cur_ull, boolbytes_uc, ullidx);\n    }\n    // assuming boolbytes is 0/1-valued, this multiply-and-shift maps binary\n    //  h0000000g0000000f... to binary hgfedcba.\n    //  ^       ^       ^\n    //  |       |       |\n    // 56      48      40\n    // (the constant has bits 0, 7, 14, 21, 28, 35, 42, and 49 set)\n    // (can also use _pext_u64() in AVX2 case)\n    write_alias[ullidx] = S_CAST(unsigned char, (cur_ull * 0x2040810204081LLU) >> 49);\n  }\n}\n\nvoid BytesToGenoarrUnsafe(const int8_t* genobytes, uint32_t sample_ct, uintptr_t* genoarr) {\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBytesPerWord;\n  const unsigned char* genobytes_uc = DowncastKToUc(genobytes);\n  Quarterword* write_alias = R_CAST(Quarterword*, genoarr);\n  for (uint32_t widx = 0; ; ++widx) {\n    uintptr_t ww;\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        return;\n      }\n      ww = SubwordLoad(&(genobytes_uc[widx * kBytesPerWord]), ModNz(sample_ct, kBytesPerWord));\n    } else {\n      CopyFromUnalignedOffsetW(&ww, genobytes_uc, widx);\n    }\n    write_alias[widx] = Pack0303Mask(ww);\n  }\n}\n\nvoid AlleleCodesToGenoarrUnsafe(const int32_t* allele_codes, const unsigned char* phasepresent_bytes, uint32_t sample_ct, uintptr_t* genoarr, uintptr_t* phasepresent, uintptr_t* phaseinfo) {\n  // - If phasepresent_bytes is nullptr, phasepresent is not updated.  In this\n  //   case, phaseinfo is updated iff it's not nullptr.  It's okay for both\n  //   phasepresent and phaseinfo to be nullptr here.\n  // - Otherwise, phasepresent and phaseinfo are always updated; neither can be\n  //   nullptr.\n  // - Trailing bits of phasepresent/phaseinfo may not be zeroed out.\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t subgroup_len = kBitsPerWordD2;\n  const uint32_t* read_alias = R_CAST(const uint32_t*, allele_codes);\n  Halfword* phaseinfo_alias = R_CAST(Halfword*, phaseinfo);\n  if (!phasepresent_bytes) {\n    for (uint32_t widx = 0; ; ++widx) {\n      if (widx >= word_ct_m1) {\n        if (widx > word_ct_m1) {\n          return;\n        }\n        subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n      }\n      uintptr_t geno_write_word = 0;\n      if (!phaseinfo) {\n        for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n          // 0,0 -> 0\n          // 0,1 or 1,0 -> 1\n          // 1,1 -> 2\n          // -9,-9 -> 3\n          // undefined behavior on e.g. 0,2\n          const uint32_t first_code = *read_alias++;\n          const uint32_t second_code = *read_alias++;\n          uintptr_t cur_geno;\n          if (first_code <= 1) {\n            cur_geno = first_code + second_code;\n          } else {\n            // todo: test whether branchless is better\n            // (in practice, this will usually be predictable?)\n            cur_geno = 3;\n          }\n          geno_write_word |= (cur_geno << (uii * 2));\n        }\n      } else {\n        Halfword phaseinfo_write_hw = 0;\n        for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n          // set phaseinfo_write_hw bit iff 1,0\n          const uint32_t first_code = *read_alias++;\n          const uint32_t second_code = *read_alias++;\n          uintptr_t cur_geno;\n          if (first_code <= 1) {\n            cur_geno = first_code + second_code;\n            phaseinfo_write_hw |= (cur_geno & first_code) << uii;\n          } else {\n            // todo: test whether branchless is better\n            // (in practice, this will usually be predictable?)\n            cur_geno = 3;\n          }\n          geno_write_word |= (cur_geno << (uii * 2));\n        }\n        phaseinfo_alias[widx] = phaseinfo_write_hw;\n      }\n      genoarr[widx] = geno_write_word;\n    }\n  }\n  const unsigned char* phasepresent_bytes_iter = phasepresent_bytes;\n  Halfword* phasepresent_alias = R_CAST(Halfword*, phasepresent);\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        return;\n      }\n      subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    uintptr_t geno_write_word = 0;\n    Halfword phasepresent_write_hw = 0;\n    Halfword phaseinfo_write_hw = 0;\n    for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n      const uint32_t first_code = *read_alias++;\n      const uint32_t second_code = *read_alias++;\n      uintptr_t cur_geno;\n      if (first_code <= 1) {\n        cur_geno = first_code + second_code;\n        const uint32_t cur_phasepresent = cur_geno & phasepresent_bytes_iter[uii];\n        phasepresent_write_hw |= cur_phasepresent << uii;\n        phaseinfo_write_hw |= (cur_phasepresent & first_code) << uii;\n      } else {\n        cur_geno = 3;\n      }\n      geno_write_word |= (cur_geno << (uii * 2));\n    }\n    phasepresent_bytes_iter = &(phasepresent_bytes_iter[subgroup_len]);\n    phasepresent_alias[widx] = phasepresent_write_hw;\n    phaseinfo_alias[widx] = phaseinfo_write_hw;\n    genoarr[widx] = geno_write_word;\n  }\n}\n\n// Does not clear trailing bits of genovec, phasepresent, or phaseinfo.\n// Returns max(2, 1 + max allele code) if allele_codes is valid, -1 if invalid.\nint32_t ConvertMultiAlleleCodesUnsafe(const int32_t* allele_codes, const unsigned char* phasepresent_bytes, uint32_t sample_ct, uintptr_t* genoarr, uintptr_t* patch_01_set, AlleleCode* patch_01_vals, uintptr_t* patch_10_set, AlleleCode* patch_10_vals, uint32_t* patch_01_ctp, uint32_t* patch_10_ctp, uintptr_t* phasepresent, uintptr_t* phaseinfo) {\n  const uint32_t sample_ctl = DivUp(sample_ct, kBitsPerWord);\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  uint32_t subgroup_len = kBitsPerWordD2;\n  const uint32_t* read_alias = R_CAST(const uint32_t*, allele_codes);\n  if (phasepresent_bytes) {\n    BytesToBitsUnsafe(phasepresent_bytes, sample_ct, phasepresent);\n  }\n  Halfword* phasepresent_alias = R_CAST(Halfword*, phasepresent);\n  Halfword* phaseinfo_alias = R_CAST(Halfword*, phaseinfo);\n  // todo: try scanning allele_codes for its maximum value upfront, instead of\n  // checking in inner loops; then mirror PglMultiallelicDenseToSparse() in\n  // valid multiallelic case, and call AlleleCodesToGenoarrUnsafe() otherwise.\n  ZeroWArr(sample_ctl, patch_01_set);\n  ZeroWArr(sample_ctl, patch_10_set);\n  uint32_t max_allele_code = 1;\n  Halfword* patch_01_set_alias = R_CAST(Halfword*, patch_01_set);\n  Halfword* patch_10_set_alias = R_CAST(Halfword*, patch_10_set);\n  AlleleCode* patch_01_iter = patch_01_vals;\n  AlleleCode* patch_10_iter = patch_10_vals;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        if (max_allele_code >= kPglMaxAlleleCt) {\n          return -1;\n        }\n        *patch_01_ctp = patch_01_iter - patch_01_vals;\n        *patch_10_ctp = (patch_10_iter - patch_10_vals) >> 1;\n        return S_CAST(int32_t, max_allele_code + 1);\n      }\n      subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    uintptr_t geno_write_word = 0;\n    Halfword phaseinfo_write_hw = 0;\n    Halfword het_2_hw = 0;\n    for (uint32_t uii = 0; uii != subgroup_len; ++uii) {\n      // 0,0 -> 0\n      // 0,x or x,0 -> 1\n      // x,y -> 2\n      // -9,-9 -> 3\n      const uint32_t first_code = *read_alias++;\n      const uint32_t second_code = *read_alias++;\n      uintptr_t cur_geno = 0;\n      if (first_code == 0) {\n        if (second_code != 0) {\n          cur_geno = 1;\n          if (second_code > 1) {\n            if (second_code > max_allele_code) {\n              max_allele_code = second_code;\n            }\n            patch_01_set_alias[widx] |= 1U << uii;\n            // If second_code is actually out-of-range, harmlessly truncate\n            // here, and error out before function return.\n            // (this code is correct without the static-cast, but may as well\n            // be explicit about where truncation could happen.)\n            *patch_01_iter++ = S_CAST(AlleleCode, second_code);\n          }\n        }\n      } else if (first_code == 0xfffffff7U) {\n        if (second_code != 0xfffffff7U) {\n          return -1;\n        }\n        cur_geno = 3;\n      } else {\n        // first_code >= 1\n        if (second_code == 0) {\n          cur_geno = 1;\n          phaseinfo_write_hw |= 1U << uii;\n          if (first_code > 1) {\n            if (first_code > max_allele_code) {\n              max_allele_code = first_code;\n            }\n            patch_01_set_alias[widx] |= 1U << uii;\n            *patch_01_iter++ = S_CAST(AlleleCode, first_code);\n          }\n        } else {\n          cur_geno = 2;\n          if (first_code <= second_code) {\n            if (second_code > 1) {\n              if (second_code > max_allele_code) {\n                max_allele_code = second_code;\n              }\n              patch_10_set_alias[widx] |= 1U << uii;\n              *patch_10_iter++ = S_CAST(AlleleCode, first_code);\n              *patch_10_iter++ = S_CAST(AlleleCode, second_code);\n              if (first_code != second_code) {\n                het_2_hw |= 1U << uii;\n              }\n            }\n          } else {\n            // first_code > second_code\n            if (first_code > max_allele_code) {\n              max_allele_code = first_code;\n            }\n            phaseinfo_write_hw |= 1U << uii;\n            patch_10_set_alias[widx] |= 1U << uii;\n            het_2_hw |= 1U << uii;\n            *patch_10_iter++ = S_CAST(AlleleCode, second_code);\n            *patch_10_iter++ = S_CAST(AlleleCode, first_code);\n          }\n        }\n      }\n      geno_write_word |= (cur_geno << (uii * 2));\n    }\n    genoarr[widx] = geno_write_word;\n    if (phasepresent_bytes) {\n      const uintptr_t het_1_word = geno_write_word & (~(geno_write_word >> 1)) & kMask5555;\n      Halfword het_hw = het_2_hw | PackWordToHalfword(het_1_word);\n      phasepresent_alias[widx] &= het_hw;\n    }\n    if (phaseinfo_alias) {\n      phaseinfo_alias[widx] = phaseinfo_write_hw;\n    }\n  }\n}\n\nstatic inline uint32_t BiallelicDosage16Halfdist(uint32_t dosage_int) {\n  const uint32_t dosage_int_rem = dosage_int & 16383;\n  return abs_i32(S_CAST(int32_t, dosage_int_rem) - 8192);\n}\n\nvoid FloatsToDosage16(const float* floatarr, uint32_t sample_ct, uint32_t hard_call_halfdist, uintptr_t* genoarr, uintptr_t* dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr) {\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const float* read_iter = floatarr;\n  Halfword* dosage_present_alias = R_CAST(Halfword*, dosage_present);\n  uint16_t* dosage_main_iter = dosage_main;\n  uint32_t subgroup_len = kBitsPerWordD2;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        if (widx % 2) {\n          dosage_present_alias[widx] = 0;\n        }\n        break;\n      }\n      subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    uintptr_t geno_word = 0;\n    uint32_t dosage_present_hw = 0;\n    for (uint32_t sample_idx_lowbits = 0; sample_idx_lowbits != subgroup_len; ++sample_idx_lowbits) {\n      // 0..2 -> 0..32768\n      const float fxx = (*read_iter++) * 16384 + 0.5;\n      uintptr_t cur_geno = 3;\n      if ((fxx >= 0.0) && (fxx < 32769)) {\n        uint32_t dosage_int = S_CAST(int32_t, fxx);\n        const uint32_t cur_halfdist = BiallelicDosage16Halfdist(dosage_int);\n        if (cur_halfdist >= hard_call_halfdist) {\n          cur_geno = (dosage_int + (8192 * k1LU)) / 16384;\n        }\n        if (cur_halfdist != 8192) {\n          dosage_present_hw |= 1U << sample_idx_lowbits;\n          *dosage_main_iter++ = dosage_int;\n        }\n      }\n      geno_word |= cur_geno << (2 * sample_idx_lowbits);\n    }\n    genoarr[widx] = geno_word;\n    dosage_present_alias[widx] = dosage_present_hw;\n  }\n  *dosage_ct_ptr = dosage_main_iter - dosage_main;\n}\n\nvoid DoublesToDosage16(const double* doublearr, uint32_t sample_ct, uint32_t hard_call_halfdist, uintptr_t* genoarr, uintptr_t* dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr) {\n  const uint32_t word_ct_m1 = (sample_ct - 1) / kBitsPerWordD2;\n  const double* read_iter = doublearr;\n  Halfword* dosage_present_alias = R_CAST(Halfword*, dosage_present);\n  uint16_t* dosage_main_iter = dosage_main;\n  uint32_t subgroup_len = kBitsPerWordD2;\n  for (uint32_t widx = 0; ; ++widx) {\n    if (widx >= word_ct_m1) {\n      if (widx > word_ct_m1) {\n        if (widx % 2) {\n          dosage_present_alias[widx] = 0;\n        }\n        break;\n      }\n      subgroup_len = ModNz(sample_ct, kBitsPerWordD2);\n    }\n    uintptr_t geno_word = 0;\n    uint32_t dosage_present_hw = 0;\n    for (uint32_t sample_idx_lowbits = 0; sample_idx_lowbits != subgroup_len; ++sample_idx_lowbits) {\n      // 0..2 -> 0..32768\n      const double dxx = (*read_iter++) * 16384 + 0.5;\n      uintptr_t cur_geno = 3;\n      if ((dxx >= 0.0) && (dxx < 32769)) {\n        uint32_t dosage_int = S_CAST(int32_t, dxx);\n        const uint32_t cur_halfdist = BiallelicDosage16Halfdist(dosage_int);\n        if (cur_halfdist >= hard_call_halfdist) {\n          cur_geno = (dosage_int + (8192 * k1LU)) / 16384;\n        }\n        if (cur_halfdist != 8192) {\n          dosage_present_hw |= 1U << sample_idx_lowbits;\n          *dosage_main_iter++ = dosage_int;\n        }\n      }\n      geno_word |= cur_geno << (2 * sample_idx_lowbits);\n    }\n    genoarr[widx] = geno_word;\n    dosage_present_alias[widx] = dosage_present_hw;\n  }\n  *dosage_ct_ptr = dosage_main_iter - dosage_main;\n}\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n"
  },
  {
    "path": "external_libs/pgenlib/pgenlib_ffi_support.h",
    "content": "#ifndef __PGENLIB_FFI_SUPPORT_H__\n#define __PGENLIB_FFI_SUPPORT_H__\n\n// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n\n#include \"include/pgenlib_misc.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\n// Could define a slightly-more-efficient version of this function which uses a\n// missing code of 3 instead of -9.  But let's play well with existing scripts\n// first.\nvoid GenoarrToBytesMinus9(const uintptr_t* genoarr, uint32_t sample_ct, int8_t* genobytes);\n\nvoid GenoarrToInt32sMinus9(const uintptr_t* genoarr, uint32_t sample_ct, int32_t* geno_int32);\n\nvoid GenoarrToInt64sMinus9(const uintptr_t* genoarr, uint32_t sample_ct, int64_t* geno_int64);\n\n// May want to use STD_ARRAY_INIT_{START,END}... though it may not be worth the\n// additional compilation headaches here.\nextern const double kGenoDoublePairs[32];\n\nHEADER_INLINE void GenoarrToDoublesMinus9(const uintptr_t* genoarr, uint32_t sample_ct, double* geno_double) {\n  GenoarrLookup16x8bx2(genoarr, kGenoDoublePairs, sample_ct, geno_double);\n}\n\nHEADER_INLINE void GenoarrToAlleleCodes(const uint64_t* geno_to_intcode_pair_table, const uintptr_t* genoarr, uint32_t sample_ct, int32_t* allele_codes) {\n  GenoarrLookup16x8bx2(genoarr, geno_to_intcode_pair_table, sample_ct, allele_codes);\n}\n\nextern const uint64_t kGenoToIntcodeDPairs[32];\n\n// For FFI, allele_codes is always int32_t.  Python/R programmers should not\n// need to worry about whether pgenlib was compiled with 1-, 2-, or 4-byte\n// AlleleCode.\n//\n// phasebytes can be nullptr; if it isn't, entry is 1 iff genotype is an\n// explicitly phased het, OR genotype is homozygous\n// phasepresent cannot be nullptr\nvoid GenoarrPhasedToAlleleCodes(const uint64_t* geno_to_intcode_dpair_table, const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, uint32_t sample_ct, uint32_t phasepresent_ct, unsigned char* phasebytes, int32_t* allele_codes);\n\nHEADER_INLINE void GenoarrPhasedToAlleleCodesMinus9(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, uint32_t sample_ct, uint32_t phasepresent_ct, unsigned char* phasebytes, int32_t* allele_codes) {\n  GenoarrPhasedToAlleleCodes(kGenoToIntcodeDPairs, genoarr, phasepresent, phaseinfo, sample_ct, phasepresent_ct, phasebytes, allele_codes);\n}\n\nvoid GenoarrMPToAlleleCodes(const uint64_t* geno_to_intcode_dpair_table, const PgenVariant* pgv, uint32_t sample_ct, unsigned char* phasebytes, int32_t* allele_codes);\n\nHEADER_INLINE void GenoarrMPToAlleleCodesMinus9(const PgenVariant* pgv, uint32_t sample_ct, unsigned char* phasebytes, int32_t* allele_codes) {\n  GenoarrMPToAlleleCodes(kGenoToIntcodeDPairs, pgv, sample_ct, phasebytes, allele_codes);\n}\n\n// assumes transposed genoarr, phaseinfo\nvoid GenoarrPhasedToHapCodes(const uintptr_t* genoarr, const uintptr_t* phaseinfo, uint32_t variant_batch_size, int32_t* hap0_codes_iter, int32_t* hap1_codes_iter);\n\nvoid Dosage16ToFloatsMinus9(const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, float* geno_float);\n\nvoid Dosage16ToDoubles(const double* geno_double_pair_table, const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, double* geno_double);\n\n// If all samples are missing, this errors out.\nBoolErr Dosage16ToDoublesMeanimpute(const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, double* geno_double);\n\n// Currently requires trailing bits of genoarr to be zeroed out.\ndouble LinearCombinationMeanimpute(const double* weights, const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct);\n\nHEADER_INLINE void Dosage16ToDoublesMinus9(const uintptr_t* genoarr, const uintptr_t* dosage_present, const uint16_t* dosage_main, uint32_t sample_ct, uint32_t dosage_ct, double* geno_double) {\n  Dosage16ToDoubles(kGenoDoublePairs, genoarr, dosage_present, dosage_main, sample_ct, dosage_ct, geno_double);\n}\n\n// Does not zero out trailing bits of bitarr.\nvoid BytesToBitsUnsafe(const uint8_t* boolbytes, uint32_t sample_ct, uintptr_t* bitarr);\n\n// Bottom 2 bits are extracted from every byte.  Conveniently, -9 and 3 are\n// treated identically.\n// Does not zero out trailing bits of genoarr.\nvoid BytesToGenoarrUnsafe(const int8_t* genobytes, uint32_t sample_ct, uintptr_t* genoarr);\n\n// - Assumes biallelic variant, does not validate that.\n// - Low bit of each element of phasepresent_bytes is significant.\n// - If phasepresent_bytes is nullptr, phasepresent is not updated.  In this\n//   case, phaseinfo is updated iff it's not nullptr.  It's okay for both\n//   phasepresent and phaseinfo to be nullptr here.\n// - Otherwise, phasepresent and phaseinfo are always updated; neither can be\n//   nullptr.\n// - Trailing bits of phasepresent/phaseinfo may not be zeroed out.\nvoid AlleleCodesToGenoarrUnsafe(const int32_t* allele_codes, const unsigned char* phasepresent_bytes, uint32_t sample_ct, uintptr_t* genoarr, uintptr_t* phasepresent, uintptr_t* phaseinfo);\n\n// Does not clear trailing bits of genovec, phasepresent, or phaseinfo.\n// Returns min(2, 1 + max allele code) if allele_codes is valid, -1 if invalid.\nint32_t ConvertMultiAlleleCodesUnsafe(const int32_t* allele_codes, const unsigned char* phasepresent_bytes, uint32_t sample_ct, uintptr_t* genoarr, uintptr_t* patch_01_set, AlleleCode* patch_01_vals, uintptr_t* patch_10_set, AlleleCode* patch_10_vals, uint32_t* patch_01_ctp, uint32_t* patch_10_ctp, uintptr_t* phasepresent, uintptr_t* phaseinfo);\n\nvoid FloatsToDosage16(const float* floatarr, uint32_t sample_ct, uint32_t hard_call_halfdist, uintptr_t* genoarr, uintptr_t* dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr);\n\nvoid DoublesToDosage16(const double* doublearr, uint32_t sample_ct, uint32_t hard_call_halfdist, uintptr_t* genoarr, uintptr_t* dosage_present, uint16_t* dosage_main, uint32_t* dosage_ct_ptr);\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n\n#endif  // __PGENLIB_FFI_SUPPORT_H__\n"
  },
  {
    "path": "external_libs/pgenlib/pgenlibr.cpp",
    "content": "/*\n *\n * File obtained from pgenlibr R library:\n * https://github.com/chrchang/plink-ng/tree/master/2.0/pgenlibr\n *\n * License info obtained from DESCRIPTION file:\n * https://github.com/chrchang/plink-ng/blob/master/2.0/pgenlibr/DESCRIPTION\n * -----------------------------------------------------\n    Package: pgenlibr\n    Type: Package\n    Title: PLINK 2 Binary (.pgen) Reader\n    Version: 0.2\n    Date: 2019-07-10\n    Author: Christopher Chang\n    Maintainer: Christopher Chang <chrchang@alumni.caltech.edu>\n    Description: A thin wrapper over PLINK 2's core libraries which provides an R\n    interface for reading .pgen files.  A minimal .pvar loader is also included.\n    License: LGPL (>= 3)\n    Imports: Rcpp (>= 1.0.1)\n    LinkingTo: Rcpp\n * -----------------------------------------------------\n\n *  Modified by Joelle Mbatchou - June 29 2020\n *  - removed functions that were for R\n *  - split file to header (added link to several standard C++ libraries)\n *  - modified remaining functions to be fully C/C++ compatible \n *  - multithreaded reading of pgen file (04/13/2021)\n *\n * This file remains under LGPL v3 license (license is in same directory as this file)\n */\n\n#include \"pgenlibr.h\"\n\nPgenReader::PgenReader() : _info_ptr(nullptr),\n                             //_allele_idx_offsetsp(nullptr),\n                             _nonref_flagsp(nullptr)\n                             //_state_ptr(nullptr) \n                             {\n}\n\nPgenReader::~PgenReader() {\n  Close();\n}\n\nvoid PgenReader::Load(std::string filename, uint32_t cur_sample_ct, std::vector<int> sample_subset_1based, int nthr) {\n  if (_info_ptr) {\n    Close();\n  }\n  _info_ptr = static_cast<plink2::PgenFileInfo*>(malloc(sizeof(plink2::PgenFileInfo)));\n  if (!_info_ptr) {\n    fprintf(stderr,\"Out of memory\");\n    exit(-1);\n  }\n  plink2::PreinitPgfi(_info_ptr);\n  uint32_t cur_variant_ct = UINT32_MAX;\n  const char* fname = filename.c_str();\n  plink2::PgenHeaderCtrl header_ctrl;\n  uintptr_t pgfi_alloc_cacheline_ct;\n  char errstr_buf[plink2::kPglErrstrBufBlen];\n  if (PgfiInitPhase1(fname, nullptr, cur_variant_ct, cur_sample_ct, &header_ctrl, _info_ptr, &pgfi_alloc_cacheline_ct, errstr_buf) != plink2::kPglRetSuccess) {\n    fprintf(stderr, \"%s\\n\", &(errstr_buf[7]));\n    exit(-1);\n  }\n  const uint32_t raw_variant_ct = _info_ptr->raw_variant_ct;\n  if (header_ctrl & 0x30) {\n    fprintf(stderr,\"Storing of allele count information is not supported (only bi-allelic variants should be present).\");\n    exit(-1);\n    // no need to zero-initialize this\n    //_allele_idx_offsetsp = plink2::CreateRefcountedWptr(raw_variant_ct + 1);\n    //_info_ptr->allele_idx_offsets = _allele_idx_offsetsp->p;\n    // _info_ptr->max_allele_ct updated by PgfiInitPhase2() in this case\n  }\n  _info_ptr->max_allele_ct = 2;\n  if ((header_ctrl & 0xc0) == 0xc0) {\n    // todo: load this in pvar, to enable consistency check.  we use a\n    // (manually implemented) shared_ptr in preparation for this.\n    const uintptr_t raw_variant_ctl = plink2::DivUp(raw_variant_ct, plink2::kBitsPerWord);\n    // no need to zero-initialize this\n    _nonref_flagsp = plink2::CreateRefcountedWptr(raw_variant_ctl + 1);\n    _info_ptr->nonref_flags = _nonref_flagsp->p;\n  }\n  const uint32_t file_sample_ct = _info_ptr->raw_sample_ct;\n  unsigned char* pgfi_alloc = nullptr;\n  if (plink2::cachealigned_malloc(pgfi_alloc_cacheline_ct * plink2::kCacheline, &pgfi_alloc)) {\n    fprintf(stderr,\"Out of memory\");\n    exit(-1);\n  }\n  uint32_t max_vrec_width;\n  uintptr_t pgr_alloc_cacheline_ct;\n  if (PgfiInitPhase2(header_ctrl, 1, 0, 0, 0, raw_variant_ct, &max_vrec_width, _info_ptr, pgfi_alloc, &pgr_alloc_cacheline_ct, errstr_buf)) {\n    if (pgfi_alloc && (!_info_ptr->vrtypes)) {\n      plink2::aligned_free(pgfi_alloc);\n    }\n    fprintf(stderr,\"%s\\n\", &(errstr_buf[7]));\n    exit(-1);\n  }\n  if ((!_allele_idx_offsetsp) && (_info_ptr->gflags & 4)) {\n    // Note that it's safe to be ignorant of multiallelic variants when\n    // phase and dosage info aren't present; GetAlleleCt() then always returns\n    // 2 when that isn't actually true, and all ALTs are treated as if they\n    // were ALT1, but otherwise everything works properly.\n    fprintf(stderr,\"Multiallelic variants and phase/dosage info simultaneously present; pvar required in this case\");\n    exit(-1);\n  }\n\n  _state_ptr.resize(nthr);\n  _subset_index.resize(nthr);\n  _pgv.resize(nthr);\n  _subset_include_interleaved_vec.resize(nthr);\n  _subset_cumulative_popcounts.resize(nthr);\n  _subset_size.resize(nthr);\n  _subset_include_vec.resize(nthr);\n\n  for(int i = 0; i < nthr; i++) {\n    _state_ptr[i] = static_cast<plink2::PgenReader*>(malloc(sizeof(plink2::PgenReader)));\n    if (!_state_ptr[i]) {\n      fprintf(stderr,\"Out of memory\");\n      exit(-1);\n    }\n    plink2::PreinitPgr(_state_ptr[i]);\n    plink2::PgrSetFreadBuf(nullptr, _state_ptr[i]);\n  }\n\n  const uintptr_t pgr_alloc_main_byte_ct = pgr_alloc_cacheline_ct * plink2::kCacheline;\n  const uintptr_t sample_subset_byte_ct = plink2::DivUp(file_sample_ct, plink2::kBitsPerVec) * plink2::kBytesPerVec;\n  const uintptr_t cumulative_popcounts_byte_ct = plink2::DivUp(file_sample_ct, plink2::kBitsPerWord * plink2::kInt32PerVec) * plink2::kBytesPerVec;\n  const uintptr_t genovec_byte_ct = plink2::DivUp(file_sample_ct, plink2::kNypsPerVec) * plink2::kBytesPerVec;\n  //const uintptr_t ac_byte_ct = plink2::RoundUpPow2(file_sample_ct * sizeof(plink2::AlleleCode), plink2::kBytesPerVec);\n  //const uintptr_t ac2_byte_ct = plink2::RoundUpPow2(file_sample_ct * 2 * sizeof(plink2::AlleleCode), plink2::kBytesPerVec);\n  uintptr_t multiallelic_hc_byte_ct = 0;\n  if (_info_ptr->max_allele_ct != 2) {\n    fprintf(stderr,\"no multiallelic vaariants allowed\");\n    exit(-1);\n    //multiallelic_hc_byte_ct = 2 * sample_subset_byte_ct + ac_byte_ct + ac2_byte_ct;\n  }\n  const uintptr_t dosage_main_byte_ct = plink2::DivUp(file_sample_ct, (2 * plink2::kInt32PerVec)) * plink2::kBytesPerVec;\n\n\n  for(int i = 0; i < nthr; i++) {\n    unsigned char* pgr_alloc;\n    if (plink2::cachealigned_malloc(pgr_alloc_main_byte_ct + (2 * plink2::kPglNypTransposeBatch + 5) * sample_subset_byte_ct + cumulative_popcounts_byte_ct + (1 + plink2::kPglNypTransposeBatch) * genovec_byte_ct + multiallelic_hc_byte_ct + dosage_main_byte_ct + plink2::kPglBitTransposeBufbytes + 4 * (plink2::kPglNypTransposeBatch * plink2::kPglNypTransposeBatch / 8), &pgr_alloc)) {\n      fprintf(stderr,\"Out of memory\");\n      exit(-1);\n    }\n    plink2::PglErr reterr = PgrInit(fname, max_vrec_width, _info_ptr, _state_ptr[i], pgr_alloc);\n    if (reterr != plink2::kPglRetSuccess) {\n      if (!plink2::PgrGetFreadBuf(_state_ptr[i])) {\n        plink2::aligned_free(pgr_alloc);\n      }\n      sprintf(errstr_buf, \"PgrInit() error %d\", static_cast<int>(reterr));\n      fprintf(stderr,\"%s\\n\", errstr_buf);\n      exit(-1);\n    }\n    unsigned char* pgr_alloc_iter = &(pgr_alloc[pgr_alloc_main_byte_ct]);\n    _subset_include_vec[i] = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n    pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n    _subset_include_interleaved_vec[i] = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n    pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n\n#ifdef USE_AVX2\n    _subset_include_interleaved_vec[i][-3] = 0;\n    _subset_include_interleaved_vec[i][-2] = 0;\n#endif\n    _subset_include_interleaved_vec[i][-1] = 0;\n\n    _subset_cumulative_popcounts[i] = reinterpret_cast<uint32_t*>(pgr_alloc_iter);\n\n    pgr_alloc_iter = &(pgr_alloc_iter[cumulative_popcounts_byte_ct]);\n\n    _pgv[i] = std::make_shared<plink2::PgenVariant>();\n    _pgv[i]->genovec = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n    pgr_alloc_iter = &(pgr_alloc_iter[genovec_byte_ct]);\n    /*\n       if (multiallelic_hc_byte_ct) {\n       _pgv.patch_01_set = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n       pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n       _pgv.patch_01_vals = reinterpret_cast<plink2::AlleleCode*>(pgr_alloc_iter);\n       pgr_alloc_iter = &(pgr_alloc_iter[ac_byte_ct]);\n       _pgv.patch_10_set = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n       pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n       _pgv.patch_10_vals = reinterpret_cast<plink2::AlleleCode*>(pgr_alloc_iter);\n       pgr_alloc_iter = &(pgr_alloc_iter[ac2_byte_ct]);\n       } else {\n       */\n    _pgv[i]->patch_01_set = nullptr;\n    _pgv[i]->patch_01_vals = nullptr;\n    _pgv[i]->patch_10_set = nullptr;\n    _pgv[i]->patch_10_vals = nullptr;\n\n    _pgv[i]->phasepresent = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n\n    // }\n    pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n    _pgv[i]->phaseinfo = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n    pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n    _pgv[i]->dosage_present = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n    pgr_alloc_iter = &(pgr_alloc_iter[sample_subset_byte_ct]);\n    _pgv[i]->dosage_main = reinterpret_cast<uint16_t*>(pgr_alloc_iter);\n    pgr_alloc_iter = &(pgr_alloc_iter[dosage_main_byte_ct]);\n\n\n    if (sample_subset_1based.size() > 0) {\n      SetSampleSubsetInternal(sample_subset_1based, i);\n    } else {\n      _subset_size[i] = file_sample_ct;\n    }\n  }\n\n  /*\n  pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglBitTransposeBufbytes]);\n  _multivar_vmaj_geno_buf = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n  pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglNypTransposeBatch * genovec_byte_ct]);\n  _multivar_vmaj_phasepresent_buf = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n  pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglNypTransposeBatch * sample_subset_byte_ct]);\n  _multivar_vmaj_phaseinfo_buf = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n  pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglNypTransposeBatch * sample_subset_byte_ct]);\n  _multivar_smaj_geno_batch_buf = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n  pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglNypTransposeBatch * plink2::kPglNypTransposeBatch / 4]);\n  _multivar_smaj_phaseinfo_batch_buf = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n  pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglNypTransposeBatch * plink2::kPglNypTransposeBatch / 8]);\n  _multivar_smaj_phasepresent_batch_buf = reinterpret_cast<uintptr_t*>(pgr_alloc_iter);\n   pgr_alloc_iter = &(pgr_alloc_iter[plink2::kPglNypTransposeBatch * plink2::kPglNypTransposeBatch / 8]);\n   */\n\n}\n\nuint32_t PgenReader::GetRawSampleCt() const {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  return _info_ptr->raw_sample_ct;\n}\n\nuint32_t PgenReader::GetSubsetSize() const {\n  return _subset_size[0];\n}\n\nuint32_t PgenReader::GetVariantCt() const {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  return _info_ptr->raw_variant_ct;\n}\n\nuint32_t PgenReader::GetAlleleCt(uint32_t variant_idx) const {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  if (variant_idx >= _info_ptr->raw_variant_ct) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"variant_num out of range (%d; must be 1..%u)\", variant_idx + 1, _info_ptr->raw_variant_ct);\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  if (!_allele_idx_offsetsp) {\n    return 2;\n  }\n  fprintf(stderr,\"Error: only bi-allelic variants are supported\");\n    exit(-1);\n  //const uintptr_t* allele_idx_offsets = _allele_idx_offsetsp->p;\n  //return allele_idx_offsets[variant_idx + 1] - allele_idx_offsets[variant_idx];\n}\n\nuint32_t PgenReader::GetMaxAlleleCt() const {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  return _info_ptr->max_allele_ct;\n}\n\nbool PgenReader::HardcallPhasePresent() const {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  return ((_info_ptr->gflags & plink2::kfPgenGlobalHardcallPhasePresent) != 0);\n}\n\n// added by J.Mbatchou (09/22/20) to check if dosages are present in PGEN file\nbool PgenReader::DosagePresent() const {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  return ((_info_ptr->gflags & plink2::kfPgenGlobalDosagePresent) != 0);\n}\n\n//static const int32_t kGenoRInt32Quads[1024] ALIGNV16 = QUAD_TABLE256(0, 1, 2, -3);\n\nstatic const double kGenoRDoublePairs[32] ALIGNV16 = PAIR_TABLE16(0.0, 1.0, 2.0, -3.0);\n\nvoid PgenReader::ReadHardcalls(double* buf, size_t const& n, int const& thr, int variant_idx, int allele_idx) {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  if (static_cast<uint32_t>(variant_idx) >= _info_ptr->raw_variant_ct) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"variant_num out of range (%d; must be 1..%u)\", variant_idx + 1, _info_ptr->raw_variant_ct);\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  if (n != _subset_size[thr]) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"buf has wrong length (%\" PRIdPTR \"; %u expected)\", n, _subset_size[thr]);\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  plink2::PglErr reterr = PgrGet1(_subset_include_vec[thr], _subset_index[thr], _subset_size[thr], variant_idx, allele_idx, _state_ptr[thr], _pgv[thr]->genovec);\n  if (reterr != plink2::kPglRetSuccess) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"PgrGet1() error %d\", static_cast<int>(reterr));\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  plink2::GenoarrLookup16x8bx2(_pgv[thr]->genovec, kGenoRDoublePairs, _subset_size[thr], buf);\n}\n\nvoid PgenReader::Read(double* buf, size_t const& n, int const& thr, int variant_idx, int allele_idx) {\n  if (!_info_ptr) {\n    fprintf(stderr,\"pgen is closed\");\n    exit(-1);\n  }\n  if (static_cast<uint32_t>(variant_idx) >= _info_ptr->raw_variant_ct) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"variant_num out of range (%d; must be 1..%u)\", variant_idx + 1, _info_ptr->raw_variant_ct);\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  if (n != _subset_size[thr]) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"buf has wrong length (%\" PRIdPTR \"; %u expected)\", n, _subset_size[thr]);\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  uint32_t dosage_ct;\n  plink2::PglErr reterr = PgrGet1D(_subset_include_vec[thr], _subset_index[thr], _subset_size[thr], variant_idx, allele_idx, _state_ptr[thr], _pgv[thr]->genovec, _pgv[thr]->dosage_present, _pgv[thr]->dosage_main, &dosage_ct);\n  if (reterr != plink2::kPglRetSuccess) {\n    char errstr_buf[256];\n    sprintf(errstr_buf, \"PgrGet1D() error %d\", static_cast<int>(reterr));\n    fprintf(stderr,\"%s\\n\", errstr_buf);\n    exit(-1);\n  }\n  plink2::Dosage16ToDoubles(kGenoRDoublePairs, _pgv[thr]->genovec, _pgv[thr]->dosage_present, _pgv[thr]->dosage_main, _subset_size[thr], dosage_ct, buf);\n}\n\nvoid PgenReader::Close() {\n  // don't bother propagating file close errors for now\n  if (_info_ptr) {\n    //CondReleaseRefcountedWptr(&_allele_idx_offsetsp);\n    CondReleaseRefcountedWptr(&_nonref_flagsp);\n    if (_info_ptr->vrtypes) {\n      plink2::aligned_free(_info_ptr->vrtypes);\n    }\n    plink2::PglErr reterr = plink2::kPglRetSuccess;\n    plink2::CleanupPgfi(_info_ptr, &reterr);\n    free(_info_ptr);\n    _info_ptr = nullptr;\n  }\n\n  for(size_t i = 0; i < _state_ptr.size(); i++) {\n    if (_state_ptr[i]) {\n      plink2::PglErr reterr = plink2::kPglRetSuccess;\n      plink2::CleanupPgr(_state_ptr[i], &reterr);\n      if (PgrGetFreadBuf(_state_ptr[i])) {\n        plink2::aligned_free(PgrGetFreadBuf(_state_ptr[i]));\n      }\n      free(_state_ptr[i]);\n      _state_ptr[i] = nullptr;\n    }\n    _subset_size[i] = 0;\n  }\n}\n\nvoid PgenReader::SetSampleSubsetInternal(std::vector<int>& sample_subset_1based, int const& thr) {\n  const uint32_t raw_sample_ct = _info_ptr->raw_sample_ct;\n  const uint32_t raw_sample_ctv = plink2::DivUp(raw_sample_ct, plink2::kBitsPerVec);\n  const uint32_t raw_sample_ctaw = raw_sample_ctv * plink2::kWordsPerVec;\n  uintptr_t* sample_include = _subset_include_vec[thr];\n  plink2::ZeroWArr(raw_sample_ctaw, sample_include);\n  const uint32_t subset_size = sample_subset_1based.size();\n  if (subset_size == 0) {\n    fprintf(stderr,\"Empty sample_subset is not currently permitted\");\n    exit(-1);\n  }\n  uint32_t sample_uidx = sample_subset_1based[0] - 1;\n  uint32_t idx = 0;\n  uint32_t next_uidx;\n  while (1) {\n    if (sample_uidx >= raw_sample_ct) {\n      char errstr_buf[256];\n      sprintf(errstr_buf, \"sample number out of range (%d; must be 1..%u)\", static_cast<int>(sample_uidx + 1), raw_sample_ct);\n      fprintf(stderr,\"%s\\n\", errstr_buf);\n      exit(-1);\n    }\n    plink2::SetBit(sample_uidx, sample_include);\n    if (++idx == subset_size) {\n      break;\n    }\n    next_uidx = sample_subset_1based[idx] - 1;\n\n    // prohibit this since it implies that the caller expects genotypes to be\n    // returned in a different order\n    if (next_uidx <= sample_uidx) {\n      fprintf(stderr,\"sample_subset is not in strictly increasing order\");\n    exit(-1);\n    }\n    sample_uidx = next_uidx;\n  }\n\n  plink2::FillInterleavedMaskVec(sample_include, raw_sample_ctv, _subset_include_interleaved_vec[thr]);\n  const uint32_t raw_sample_ctl = plink2::DivUp(raw_sample_ct, plink2::kBitsPerWord);\n  plink2::FillCumulativePopcounts(sample_include, raw_sample_ctl, _subset_cumulative_popcounts[thr]);\n  plink2::PgrSetSampleSubsetIndex(_subset_cumulative_popcounts[thr], _state_ptr[thr], &_subset_index[thr]);\n  _subset_size[thr] = subset_size;\n}\n\n"
  },
  {
    "path": "external_libs/pgenlib/pgenlibr.h",
    "content": "/*\n *\n * File derived from pgenlibr R library:\n * https://github.com/chrchang/plink-ng/tree/master/2.0/pgenlibr\n *\n * License info obtained from DESCRIPTION file:\n * https://github.com/chrchang/plink-ng/blob/master/2.0/pgenlibr/DESCRIPTION\n * -----------------------------------------------------\n    Package: pgenlibr\n    Type: Package\n    Title: PLINK 2 Binary (.pgen) Reader\n    Version: 0.2\n    Date: 2019-07-10\n    Author: Christopher Chang\n    Maintainer: Christopher Chang <chrchang@alumni.caltech.edu>\n    Description: A thin wrapper over PLINK 2's core libraries which provides an R\n    interface for reading .pgen files.  A minimal .pvar loader is also included.\n    License: LGPL (>= 3)\n    Imports: Rcpp (>= 1.0.1)\n    LinkingTo: Rcpp\n * -----------------------------------------------------\n\n *  Modified by Joelle Mbatchou - June 29 2020\n *  - removed functions that were for R\n *  - split file to header (added link to several standard C++ libraries)\n *  - modified remaining functions to be fully C/C++ compatible \n *\n * This file remains under LGPL v3 license (license is in same directory as this file)\n */\n\n\n#include <vector>\n#include <string>\n#include <stdio.h>\n#include <stdlib.h>\n#include <memory>\n#include \"pvar_ffi_support.h\"\n#include \"pgenlib_ffi_support.h\"\n#include \"include/pgenlib_read.h\"\n\n\nclass PgenReader {\npublic:\n  PgenReader();\n\n  void Load(std::string filename, uint32_t cur_sample_ct, std::vector<int> sample_subset_1based, int nthr);\n\n  uint32_t GetRawSampleCt() const;\n\n  uint32_t GetSubsetSize() const;\n\n  uint32_t GetVariantCt() const;\n\n  uint32_t GetAlleleCt(uint32_t variant_idx) const;\n\n  uint32_t GetMaxAlleleCt() const;\n\n  bool HardcallPhasePresent() const;\n  \n  bool DosagePresent() const;\n\n  void ReadIntHardcalls(std::vector<int>& buf, int variant_idx, int allele_idx);\n\n  void ReadHardcalls(double* buf, size_t const& n, int const& thr, int variant_idx, int allele_idx);\n\n  void Read(double* buf, size_t const& n, int const& thr, int variant_idx, int allele_idx);\n\n  void Close();\n\n  ~PgenReader();\n\nprivate:\n  plink2::PgenFileInfo* _info_ptr;\n  uintptr_t* _allele_idx_offsetsp = nullptr;\n  //plink2::RefcountedWptr* _allele_idx_offsetsp;\n  plink2::RefcountedWptr* _nonref_flagsp;\n\n  // have all below be threads specific\n  std::vector<plink2::PgenReader*> _state_ptr;\n  std::vector<plink2::PgrSampleSubsetIndex> _subset_index;\n  std::vector<std::shared_ptr<plink2::PgenVariant>> _pgv;\n  std::vector<uintptr_t*> _subset_include_interleaved_vec;\n  std::vector<uint32_t*> _subset_cumulative_popcounts;\n  std::vector<uint32_t> _subset_size;\n  std::vector<uintptr_t*> _subset_include_vec;\n\n  /*\n  // kPglNypTransposeBatch (= 256) variants at a time, and then transpose\n  uintptr_t* _multivar_vmaj_geno_buf;\n  uintptr_t* _multivar_vmaj_phasepresent_buf;\n  uintptr_t* _multivar_vmaj_phaseinfo_buf;\n  uintptr_t* _multivar_smaj_geno_batch_buf;\n  uintptr_t* _multivar_smaj_phaseinfo_batch_buf;\n  uintptr_t* _multivar_smaj_phasepresent_batch_buf;\n*/\n\n  void SetSampleSubsetInternal(std::vector<int>& sample_subset_1based, int const& thr);\n  void ReadAllelesPhasedInternal(int variant_idx);\n};\n\n"
  },
  {
    "path": "external_libs/pgenlib/pvar_ffi_support.cc",
    "content": "// This library is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n//\n//\n// *  Modified by Joelle Mbatchou - Apr 27 2021\n// *  - kept only needed functions and modify to recude needed headers\n\n#include \"pvar_ffi_support.h\"\n\nnamespace plink2 {\n\nRefcountedWptr* CreateRefcountedWptr(uintptr_t size) {\n  RefcountedWptr* rwp = static_cast<RefcountedWptr*>(malloc(sizeof(RefcountedWptr)));\n  if (!rwp) {\n    return nullptr;\n  }\n  rwp->ref_ct = 1;\n  rwp->p = static_cast<uintptr_t*>(malloc(sizeof(uintptr_t) * size));\n\n  return rwp;\n}\n\nvoid CondReleaseRefcountedWptr(RefcountedWptr** rwpp) {\n  RefcountedWptr* rwp = *rwpp;\n  if (!rwp) {\n    return;\n  }\n  --rwp->ref_ct;\n  if (!rwp->ref_ct) {\n    free(rwp->p);\n    free(rwp);\n  }\n  *rwpp = nullptr;\n}\n\n}  // namespace plink2\n"
  },
  {
    "path": "external_libs/pgenlib/pvar_ffi_support.h",
    "content": "#ifndef __PVAR_FFI_SUPPORT_H__\n#define __PVAR_FFI_SUPPORT_H__\n\n// This library is part of PLINK 2.00, copyright (C) 2005-2020 Shaun Purcell,\n// Christopher Chang.\n//\n// This library is free software: you can redistribute it and/or modify it\n// under the terms of the GNU Lesser General Public License as published by the\n// Free Software Foundation; either version 3 of the License, or (at your\n// option) any later version.\n//\n// This library is distributed in the hope that it will be useful, but WITHOUT\n// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License\n// for more details.\n//\n// You should have received a copy of the GNU Lesser General Public License\n// along with this library.  If not, see <http://www.gnu.org/licenses/>.\n//\n// *  Modified by Joelle Mbatchou - Apr 27 2021\n// *  - kept only needed functions\n\n#include \"include/pgenlib_misc.h\"\n\n#ifdef __cplusplus\nnamespace plink2 {\n#endif\n\nstruct RefcountedWptrStruct {\n  uintptr_t ref_ct;\n  // flexible member array is not C++ compatible\n  //uintptr_t p[];\n  uintptr_t* p;\n};\n\ntypedef struct RefcountedWptrStruct RefcountedWptr;\n\nRefcountedWptr* CreateRefcountedWptr(uintptr_t size);\n\nvoid CondReleaseRefcountedWptr(RefcountedWptr** rwpp);\n\n\n#ifdef __cplusplus\n}  // namespace plink2\n#endif\n\n#endif  // __PVAR_FFI_SUPPORT_H__\n"
  },
  {
    "path": "external_libs/pgenlib/simde/check.h",
    "content": "/* Check (assertions)\n * Portable Snippets - https://github.com/nemequ/portable-snippets\n * Created by Evan Nemerson <evan@nemerson.com>\n *\n *   To the extent possible under law, the authors have waived all\n *   copyright and related or neighboring rights to this code.  For\n *   details, see the Creative Commons Zero 1.0 Universal license at\n *   https://creativecommons.org/publicdomain/zero/1.0/\n *\n * SPDX-License-Identifier: CC0-1.0\n */\n\n#if !defined(SIMDE_CHECK_H)\n#define SIMDE_CHECK_H\n\n#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)\n#  define SIMDE_NDEBUG 1\n#endif\n\n#include \"hedley.h\"\n#include \"simde-diagnostic.h\"\n#include <stdint.h>\n\n#if !defined(_WIN32)\n#  define SIMDE_SIZE_MODIFIER \"z\"\n#  define SIMDE_CHAR_MODIFIER \"hh\"\n#  define SIMDE_SHORT_MODIFIER \"h\"\n#else\n#  if defined(_M_X64) || defined(__amd64__)\n#    define SIMDE_SIZE_MODIFIER \"I64\"\n#  else\n#    define SIMDE_SIZE_MODIFIER \"\"\n#  endif\n#  define SIMDE_CHAR_MODIFIER \"\"\n#  define SIMDE_SHORT_MODIFIER \"\"\n#endif\n\n#if defined(_MSC_VER) &&  (_MSC_VER >= 1500)\n#  define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127))\n#  define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))\n#else\n#  define SIMDE_PUSH_DISABLE_MSVC_C4127_\n#  define SIMDE_POP_DISABLE_MSVC_C4127_\n#endif\n\n#if !defined(simde_errorf)\n#  if defined(__has_include)\n#    if __has_include(<stdio.h>)\n#      include <stdio.h>\n#    endif\n#  elif defined(SIMDE_STDC_HOSTED)\n#    if SIMDE_STDC_HOSTED == 1\n#      include <stdio.h>\n#    endif\n#  elif defined(__STDC_HOSTED__)\n#    if __STDC_HOSTETD__ == 1\n#      include <stdio.h>\n#    endif\n#  endif\n\n#  include \"debug-trap.h\"\n\n   HEDLEY_DIAGNOSTIC_PUSH\n   SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_\n#  if defined(EOF)\n#    define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort())\n#  else\n#    define simde_errorf(format, ...) (simde_trap())\n#  endif\n   HEDLEY_DIAGNOSTIC_POP\n#endif\n\n#define simde_error(msg) simde_errorf(\"%s\", msg)\n\n#if defined(SIMDE_NDEBUG) || \\\n    (defined(__cplusplus) && (__cplusplus < 201103L)) || \\\n    (defined(__STDC__) && (__STDC__ < 199901L))\n#  if defined(SIMDE_CHECK_FAIL_DEFINED)\n#    define simde_assert(expr)\n#  else\n#    if defined(HEDLEY_ASSUME)\n#      define simde_assert(expr) HEDLEY_ASSUME(expr)\n#    elif HEDLEY_GCC_VERSION_CHECK(4,5,0)\n#      define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1)))\n#    elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)\n#      define simde_assert(expr) __assume(expr)\n#    else\n#      define simde_assert(expr)\n#    endif\n#  endif\n#  define simde_assert_true(expr) simde_assert(expr)\n#  define simde_assert_false(expr) simde_assert(!(expr))\n#  define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b)))\n#  define simde_assert_double_equal(a, b, precision)\n#  define simde_assert_string_equal(a, b)\n#  define simde_assert_string_not_equal(a, b)\n#  define simde_assert_memory_equal(size, a, b)\n#  define simde_assert_memory_not_equal(size, a, b)\n#else\n#  define simde_assert(expr) \\\n    do { \\\n      if (!HEDLEY_LIKELY(expr)) { \\\n        simde_error(\"assertion failed: \" #expr \"\\n\"); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_true(expr) \\\n    do { \\\n      if (!HEDLEY_LIKELY(expr)) { \\\n        simde_error(\"assertion failed: \" #expr \" is not true\\n\"); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_false(expr) \\\n    do { \\\n      if (!HEDLEY_LIKELY(!(expr))) { \\\n        simde_error(\"assertion failed: \" #expr \" is not false\\n\"); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b)   \\\n    do { \\\n      T simde_tmp_a_ = (a); \\\n      T simde_tmp_b_ = (b); \\\n      if (!(simde_tmp_a_ op simde_tmp_b_)) { \\\n        simde_errorf(\"assertion failed: %s %s %s (\" prefix \"%\" fmt suffix \" %s \" prefix \"%\" fmt suffix \")\\n\", \\\n                     #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_double_equal(a, b, precision) \\\n    do { \\\n      const double simde_tmp_a_ = (a); \\\n      const double simde_tmp_b_ = (b); \\\n      const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \\\n        -(simde_tmp_a_ - simde_tmp_b_) : \\\n        (simde_tmp_a_ - simde_tmp_b_); \\\n      if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \\\n        simde_errorf(\"assertion failed: %s == %s (%0.\" #precision \"g == %0.\" #precision \"g)\\n\", \\\n                     #a, #b, simde_tmp_a_, simde_tmp_b_); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  include <string.h>\n#  define simde_assert_string_equal(a, b) \\\n    do { \\\n      const char* simde_tmp_a_ = a; \\\n      const char* simde_tmp_b_ = b; \\\n      if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \\\n        simde_errorf(\"assertion failed: string %s == %s (\\\"%s\\\" == \\\"%s\\\")\\n\", \\\n                     #a, #b, simde_tmp_a_, simde_tmp_b_); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_string_not_equal(a, b) \\\n    do { \\\n      const char* simde_tmp_a_ = a; \\\n      const char* simde_tmp_b_ = b; \\\n      if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \\\n        simde_errorf(\"assertion failed: string %s != %s (\\\"%s\\\" == \\\"%s\\\")\\n\", \\\n                     #a, #b, simde_tmp_a_, simde_tmp_b_); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_memory_equal(size, a, b) \\\n    do { \\\n      const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \\\n      const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \\\n      const size_t simde_tmp_size_ = (size); \\\n      if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \\\n        size_t simde_tmp_pos_; \\\n        for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \\\n          if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \\\n            simde_errorf(\"assertion failed: memory %s == %s, at offset %\" SIMDE_SIZE_MODIFIER \"u\\n\", \\\n                         #a, #b, simde_tmp_pos_); \\\n            break; \\\n          } \\\n        } \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n\n#  define simde_assert_memory_not_equal(size, a, b) \\\n    do { \\\n      const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \\\n      const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \\\n      const size_t simde_tmp_size_ = (size); \\\n      if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \\\n        simde_errorf(\"assertion failed: memory %s != %s (%\" SIMDE_SIZE_MODIFIER \"u bytes)\\n\", \\\n                     #a, #b, simde_tmp_size_); \\\n      } \\\n      SIMDE_PUSH_DISABLE_MSVC_C4127_ \\\n    } while (0) \\\n    SIMDE_POP_DISABLE_MSVC_C4127_\n#endif\n\n#define simde_assert_type(T, fmt, a, op, b) \\\n  simde_assert_type_full(\"\", \"\", T, fmt, a, op, b)\n\n#define simde_assert_char(a, op, b) \\\n  simde_assert_type_full(\"'\\\\x\", \"'\", char, \"02\" SIMDE_CHAR_MODIFIER \"x\", a, op, b)\n#define simde_assert_uchar(a, op, b) \\\n  simde_assert_type_full(\"'\\\\x\", \"'\", unsigned char, \"02\" SIMDE_CHAR_MODIFIER \"x\", a, op, b)\n#define simde_assert_short(a, op, b) \\\n  simde_assert_type(short, SIMDE_SHORT_MODIFIER \"d\", a, op, b)\n#define simde_assert_ushort(a, op, b) \\\n  simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER \"u\", a, op, b)\n#define simde_assert_int(a, op, b) \\\n  simde_assert_type(int, \"d\", a, op, b)\n#define simde_assert_uint(a, op, b) \\\n  simde_assert_type(unsigned int, \"u\", a, op, b)\n#define simde_assert_long(a, op, b) \\\n  simde_assert_type(long int, \"ld\", a, op, b)\n#define simde_assert_ulong(a, op, b) \\\n  simde_assert_type(unsigned long int, \"lu\", a, op, b)\n#define simde_assert_llong(a, op, b) \\\n  simde_assert_type(long long int, \"lld\", a, op, b)\n#define simde_assert_ullong(a, op, b) \\\n  simde_assert_type(unsigned long long int, \"llu\", a, op, b)\n\n#define simde_assert_size(a, op, b) \\\n  simde_assert_type(size_t, SIMDE_SIZE_MODIFIER \"u\", a, op, b)\n\n#define simde_assert_float(a, op, b) \\\n  simde_assert_type(float, \"f\", a, op, b)\n#define simde_assert_double(a, op, b) \\\n  simde_assert_type(double, \"g\", a, op, b)\n#define simde_assert_ptr(a, op, b) \\\n  simde_assert_type(const void*, \"p\", a, op, b)\n\n#define simde_assert_int8(a, op, b) \\\n  simde_assert_type(int8_t, PRIi8, a, op, b)\n#define simde_assert_uint8(a, op, b) \\\n  simde_assert_type(uint8_t, PRIu8, a, op, b)\n#define simde_assert_int16(a, op, b) \\\n  simde_assert_type(int16_t, PRIi16, a, op, b)\n#define simde_assert_uint16(a, op, b) \\\n  simde_assert_type(uint16_t, PRIu16, a, op, b)\n#define simde_assert_int32(a, op, b) \\\n  simde_assert_type(int32_t, PRIi32, a, op, b)\n#define simde_assert_uint32(a, op, b) \\\n  simde_assert_type(uint32_t, PRIu32, a, op, b)\n#define simde_assert_int64(a, op, b) \\\n  simde_assert_type(int64_t, PRIi64, a, op, b)\n#define simde_assert_uint64(a, op, b) \\\n  simde_assert_type(uint64_t, PRIu64, a, op, b)\n\n#define simde_assert_ptr_equal(a, b) \\\n  simde_assert_ptr(a, ==, b)\n#define simde_assert_ptr_not_equal(a, b) \\\n  simde_assert_ptr(a, !=, b)\n#define simde_assert_null(ptr) \\\n  simde_assert_ptr(ptr, ==, NULL)\n#define simde_assert_not_null(ptr) \\\n  simde_assert_ptr(ptr, !=, NULL)\n#define simde_assert_ptr_null(ptr) \\\n  simde_assert_ptr(ptr, ==, NULL)\n#define simde_assert_ptr_not_null(ptr) \\\n  simde_assert_ptr(ptr, !=, NULL)\n\n#endif /* !defined(SIMDE_CHECK_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/debug-trap.h",
    "content": "/* Debugging assertions and traps\n * Portable Snippets - https://github.com/nemequ/portable-snippets\n * Created by Evan Nemerson <evan@nemerson.com>\n *\n *   To the extent possible under law, the authors have waived all\n *   copyright and related or neighboring rights to this code.  For\n *   details, see the Creative Commons Zero 1.0 Universal license at\n *   https://creativecommons.org/publicdomain/zero/1.0/\n *\n * SPDX-License-Identifier: CC0-1.0\n */\n\n#if !defined(SIMDE_DEBUG_TRAP_H)\n#define SIMDE_DEBUG_TRAP_H\n\n#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG)\n#  define SIMDE_NDEBUG 1\n#endif\n\n#if defined(__has_builtin) && !defined(__ibmxl__)\n#  if __has_builtin(__builtin_debugtrap)\n#    define simde_trap() __builtin_debugtrap()\n#  elif __has_builtin(__debugbreak)\n#    define simde_trap() __debugbreak()\n#  endif\n#endif\n#if !defined(simde_trap)\n#  if defined(_MSC_VER) || defined(__INTEL_COMPILER)\n#    define simde_trap() __debugbreak()\n#  elif defined(__ARMCC_VERSION)\n#    define simde_trap() __breakpoint(42)\n#  elif defined(__ibmxl__) || defined(__xlC__)\n#    include <builtins.h>\n#    define simde_trap() __trap(42)\n#  elif defined(__DMC__) && defined(_M_IX86)\n     static inline void simde_trap(void) { __asm int 3h; }\n#  elif defined(__i386__) || defined(__x86_64__)\n     static inline void simde_trap(void) { __asm__ __volatile__(\"int $03\"); }\n#  elif defined(__thumb__)\n     static inline void simde_trap(void) { __asm__ __volatile__(\".inst 0xde01\"); }\n#  elif defined(__aarch64__)\n     static inline void simde_trap(void) { __asm__ __volatile__(\".inst 0xd4200000\"); }\n#  elif defined(__arm__)\n     static inline void simde_trap(void) { __asm__ __volatile__(\".inst 0xe7f001f0\"); }\n#  elif defined (__alpha__) && !defined(__osf__)\n     static inline void simde_trap(void) { __asm__ __volatile__(\"bpt\"); }\n#  elif defined(_54_)\n     static inline void simde_trap(void) { __asm__ __volatile__(\"ESTOP\"); }\n#  elif defined(_55_)\n     static inline void simde_trap(void) { __asm__ __volatile__(\";\\n .if (.MNEMONIC)\\n ESTOP_1\\n .else\\n ESTOP_1()\\n .endif\\n NOP\"); }\n#  elif defined(_64P_)\n     static inline void simde_trap(void) { __asm__ __volatile__(\"SWBP 0\"); }\n#  elif defined(_6x_)\n     static inline void simde_trap(void) { __asm__ __volatile__(\"NOP\\n .word 0x10000000\"); }\n#  elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__)\n#    define simde_trap() __builtin_trap()\n#  else\n#    include <signal.h>\n#    if defined(SIGTRAP)\n#      define simde_trap() raise(SIGTRAP)\n#    else\n#      define simde_trap() raise(SIGABRT)\n#    endif\n#  endif\n#endif\n\n#if defined(HEDLEY_LIKELY)\n#  define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr)\n#elif defined(__GNUC__) && (__GNUC__ >= 3)\n#  define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1)\n#else\n#  define SIMDE_DBG_LIKELY(expr) (!!(expr))\n#endif\n\n#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0)\n#  define simde_dbg_assert(expr) do { \\\n    if (!SIMDE_DBG_LIKELY(expr)) { \\\n      simde_trap(); \\\n    } \\\n  } while (0)\n#else\n#  define simde_dbg_assert(expr)\n#endif\n\n#endif /* !defined(SIMDE_DEBUG_TRAP_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/hedley.h",
    "content": "/* Hedley - https://nemequ.github.io/hedley\n * Created by Evan Nemerson <evan@nemerson.com>\n *\n * To the extent possible under law, the author(s) have dedicated all\n * copyright and related and neighboring rights to this software to\n * the public domain worldwide. This software is distributed without\n * any warranty.\n *\n * For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.\n * SPDX-License-Identifier: CC0-1.0\n */\n\n#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16)\n#if defined(HEDLEY_VERSION)\n#  undef HEDLEY_VERSION\n#endif\n#define HEDLEY_VERSION 16\n\n#if defined(HEDLEY_STRINGIFY_EX)\n#  undef HEDLEY_STRINGIFY_EX\n#endif\n#define HEDLEY_STRINGIFY_EX(x) #x\n\n#if defined(HEDLEY_STRINGIFY)\n#  undef HEDLEY_STRINGIFY\n#endif\n#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x)\n\n#if defined(HEDLEY_CONCAT_EX)\n#  undef HEDLEY_CONCAT_EX\n#endif\n#define HEDLEY_CONCAT_EX(a,b) a##b\n\n#if defined(HEDLEY_CONCAT)\n#  undef HEDLEY_CONCAT\n#endif\n#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b)\n\n#if defined(HEDLEY_CONCAT3_EX)\n#  undef HEDLEY_CONCAT3_EX\n#endif\n#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c\n\n#if defined(HEDLEY_CONCAT3)\n#  undef HEDLEY_CONCAT3\n#endif\n#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c)\n\n#if defined(HEDLEY_VERSION_ENCODE)\n#  undef HEDLEY_VERSION_ENCODE\n#endif\n#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision))\n\n#if defined(HEDLEY_VERSION_DECODE_MAJOR)\n#  undef HEDLEY_VERSION_DECODE_MAJOR\n#endif\n#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000)\n\n#if defined(HEDLEY_VERSION_DECODE_MINOR)\n#  undef HEDLEY_VERSION_DECODE_MINOR\n#endif\n#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000)\n\n#if defined(HEDLEY_VERSION_DECODE_REVISION)\n#  undef HEDLEY_VERSION_DECODE_REVISION\n#endif\n#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000)\n\n#if defined(HEDLEY_GNUC_VERSION)\n#  undef HEDLEY_GNUC_VERSION\n#endif\n#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)\n#  define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)\n#elif defined(__GNUC__)\n#  define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0)\n#endif\n\n#if defined(HEDLEY_GNUC_VERSION_CHECK)\n#  undef HEDLEY_GNUC_VERSION_CHECK\n#endif\n#if defined(HEDLEY_GNUC_VERSION)\n#  define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_MSVC_VERSION)\n#  undef HEDLEY_MSVC_VERSION\n#endif\n#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)\n#  define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100)\n#elif defined(_MSC_FULL_VER) && !defined(__ICL)\n#  define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10)\n#elif defined(_MSC_VER) && !defined(__ICL)\n#  define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)\n#endif\n\n#if defined(HEDLEY_MSVC_VERSION_CHECK)\n#  undef HEDLEY_MSVC_VERSION_CHECK\n#endif\n#if !defined(HEDLEY_MSVC_VERSION)\n#  define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0)\n#elif defined(_MSC_VER) && (_MSC_VER >= 1400)\n#  define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))\n#elif defined(_MSC_VER) && (_MSC_VER >= 1200)\n#  define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))\n#else\n#  define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor)))\n#endif\n\n#if defined(HEDLEY_INTEL_VERSION)\n#  undef HEDLEY_INTEL_VERSION\n#endif\n#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL)\n#  define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE)\n#elif defined(__INTEL_COMPILER) && !defined(__ICL)\n#  define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)\n#endif\n\n#if defined(HEDLEY_INTEL_VERSION_CHECK)\n#  undef HEDLEY_INTEL_VERSION_CHECK\n#endif\n#if defined(HEDLEY_INTEL_VERSION)\n#  define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_INTEL_CL_VERSION)\n#  undef HEDLEY_INTEL_CL_VERSION\n#endif\n#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL)\n#  define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)\n#endif\n\n#if defined(HEDLEY_INTEL_CL_VERSION_CHECK)\n#  undef HEDLEY_INTEL_CL_VERSION_CHECK\n#endif\n#if defined(HEDLEY_INTEL_CL_VERSION)\n#  define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_PGI_VERSION)\n#  undef HEDLEY_PGI_VERSION\n#endif\n#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)\n#  define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)\n#endif\n\n#if defined(HEDLEY_PGI_VERSION_CHECK)\n#  undef HEDLEY_PGI_VERSION_CHECK\n#endif\n#if defined(HEDLEY_PGI_VERSION)\n#  define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_SUNPRO_VERSION)\n#  undef HEDLEY_SUNPRO_VERSION\n#endif\n#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)\n#  define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10)\n#elif defined(__SUNPRO_C)\n#  define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf)\n#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)\n#  define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10)\n#elif defined(__SUNPRO_CC)\n#  define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf)\n#endif\n\n#if defined(HEDLEY_SUNPRO_VERSION_CHECK)\n#  undef HEDLEY_SUNPRO_VERSION_CHECK\n#endif\n#if defined(HEDLEY_SUNPRO_VERSION)\n#  define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_EMSCRIPTEN_VERSION)\n#  undef HEDLEY_EMSCRIPTEN_VERSION\n#endif\n#if defined(__EMSCRIPTEN__)\n#  include <emscripten.h>\n#  define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__)\n#endif\n\n#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK)\n#  undef HEDLEY_EMSCRIPTEN_VERSION_CHECK\n#endif\n#if defined(HEDLEY_EMSCRIPTEN_VERSION)\n#  define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_ARM_VERSION)\n#  undef HEDLEY_ARM_VERSION\n#endif\n#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)\n#  define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100)\n#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)\n#  define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100)\n#endif\n\n#if defined(HEDLEY_ARM_VERSION_CHECK)\n#  undef HEDLEY_ARM_VERSION_CHECK\n#endif\n#if defined(HEDLEY_ARM_VERSION)\n#  define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_IBM_VERSION)\n#  undef HEDLEY_IBM_VERSION\n#endif\n#if defined(__ibmxl__)\n#  define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__)\n#elif defined(__xlC__) && defined(__xlC_ver__)\n#  define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)\n#elif defined(__xlC__)\n#  define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0)\n#endif\n\n#if defined(HEDLEY_IBM_VERSION_CHECK)\n#  undef HEDLEY_IBM_VERSION_CHECK\n#endif\n#if defined(HEDLEY_IBM_VERSION)\n#  define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_VERSION)\n#  undef HEDLEY_TI_VERSION\n#endif\n#if \\\n    defined(__TI_COMPILER_VERSION__) && \\\n    ( \\\n      defined(__TMS470__) || defined(__TI_ARM__) || \\\n      defined(__MSP430__) || \\\n      defined(__TMS320C2000__) \\\n    )\n#  if (__TI_COMPILER_VERSION__ >= 16000000)\n#    define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#  endif\n#endif\n\n#if defined(HEDLEY_TI_VERSION_CHECK)\n#  undef HEDLEY_TI_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_VERSION)\n#  define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_CL2000_VERSION)\n#  undef HEDLEY_TI_CL2000_VERSION\n#endif\n#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__)\n#  define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#endif\n\n#if defined(HEDLEY_TI_CL2000_VERSION_CHECK)\n#  undef HEDLEY_TI_CL2000_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_CL2000_VERSION)\n#  define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_CL430_VERSION)\n#  undef HEDLEY_TI_CL430_VERSION\n#endif\n#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__)\n#  define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#endif\n\n#if defined(HEDLEY_TI_CL430_VERSION_CHECK)\n#  undef HEDLEY_TI_CL430_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_CL430_VERSION)\n#  define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_ARMCL_VERSION)\n#  undef HEDLEY_TI_ARMCL_VERSION\n#endif\n#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__))\n#  define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#endif\n\n#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK)\n#  undef HEDLEY_TI_ARMCL_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_ARMCL_VERSION)\n#  define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_CL6X_VERSION)\n#  undef HEDLEY_TI_CL6X_VERSION\n#endif\n#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__)\n#  define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#endif\n\n#if defined(HEDLEY_TI_CL6X_VERSION_CHECK)\n#  undef HEDLEY_TI_CL6X_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_CL6X_VERSION)\n#  define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_CL7X_VERSION)\n#  undef HEDLEY_TI_CL7X_VERSION\n#endif\n#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__)\n#  define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#endif\n\n#if defined(HEDLEY_TI_CL7X_VERSION_CHECK)\n#  undef HEDLEY_TI_CL7X_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_CL7X_VERSION)\n#  define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TI_CLPRU_VERSION)\n#  undef HEDLEY_TI_CLPRU_VERSION\n#endif\n#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__)\n#  define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))\n#endif\n\n#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK)\n#  undef HEDLEY_TI_CLPRU_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TI_CLPRU_VERSION)\n#  define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_CRAY_VERSION)\n#  undef HEDLEY_CRAY_VERSION\n#endif\n#if defined(_CRAYC)\n#  if defined(_RELEASE_PATCHLEVEL)\n#    define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL)\n#  else\n#    define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0)\n#  endif\n#endif\n\n#if defined(HEDLEY_CRAY_VERSION_CHECK)\n#  undef HEDLEY_CRAY_VERSION_CHECK\n#endif\n#if defined(HEDLEY_CRAY_VERSION)\n#  define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_IAR_VERSION)\n#  undef HEDLEY_IAR_VERSION\n#endif\n#if defined(__IAR_SYSTEMS_ICC__)\n#  if __VER__ > 1000\n#    define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000))\n#  else\n#    define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0)\n#  endif\n#endif\n\n#if defined(HEDLEY_IAR_VERSION_CHECK)\n#  undef HEDLEY_IAR_VERSION_CHECK\n#endif\n#if defined(HEDLEY_IAR_VERSION)\n#  define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_TINYC_VERSION)\n#  undef HEDLEY_TINYC_VERSION\n#endif\n#if defined(__TINYC__)\n#  define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)\n#endif\n\n#if defined(HEDLEY_TINYC_VERSION_CHECK)\n#  undef HEDLEY_TINYC_VERSION_CHECK\n#endif\n#if defined(HEDLEY_TINYC_VERSION)\n#  define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_DMC_VERSION)\n#  undef HEDLEY_DMC_VERSION\n#endif\n#if defined(__DMC__)\n#  define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf)\n#endif\n\n#if defined(HEDLEY_DMC_VERSION_CHECK)\n#  undef HEDLEY_DMC_VERSION_CHECK\n#endif\n#if defined(HEDLEY_DMC_VERSION)\n#  define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_COMPCERT_VERSION)\n#  undef HEDLEY_COMPCERT_VERSION\n#endif\n#if defined(__COMPCERT_VERSION__)\n#  define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100)\n#endif\n\n#if defined(HEDLEY_COMPCERT_VERSION_CHECK)\n#  undef HEDLEY_COMPCERT_VERSION_CHECK\n#endif\n#if defined(HEDLEY_COMPCERT_VERSION)\n#  define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_PELLES_VERSION)\n#  undef HEDLEY_PELLES_VERSION\n#endif\n#if defined(__POCC__)\n#  define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0)\n#endif\n\n#if defined(HEDLEY_PELLES_VERSION_CHECK)\n#  undef HEDLEY_PELLES_VERSION_CHECK\n#endif\n#if defined(HEDLEY_PELLES_VERSION)\n#  define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_MCST_LCC_VERSION)\n#  undef HEDLEY_MCST_LCC_VERSION\n#endif\n#if defined(__LCC__) && defined(__LCC_MINOR__)\n#  define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__)\n#endif\n\n#if defined(HEDLEY_MCST_LCC_VERSION_CHECK)\n#  undef HEDLEY_MCST_LCC_VERSION_CHECK\n#endif\n#if defined(HEDLEY_MCST_LCC_VERSION)\n#  define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_GCC_VERSION)\n#  undef HEDLEY_GCC_VERSION\n#endif\n#if \\\n  defined(HEDLEY_GNUC_VERSION) && \\\n  !defined(__clang__) && \\\n  !defined(HEDLEY_INTEL_VERSION) && \\\n  !defined(HEDLEY_PGI_VERSION) && \\\n  !defined(HEDLEY_ARM_VERSION) && \\\n  !defined(HEDLEY_CRAY_VERSION) && \\\n  !defined(HEDLEY_TI_VERSION) && \\\n  !defined(HEDLEY_TI_ARMCL_VERSION) && \\\n  !defined(HEDLEY_TI_CL430_VERSION) && \\\n  !defined(HEDLEY_TI_CL2000_VERSION) && \\\n  !defined(HEDLEY_TI_CL6X_VERSION) && \\\n  !defined(HEDLEY_TI_CL7X_VERSION) && \\\n  !defined(HEDLEY_TI_CLPRU_VERSION) && \\\n  !defined(__COMPCERT__) && \\\n  !defined(HEDLEY_MCST_LCC_VERSION)\n#  define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION\n#endif\n\n#if defined(HEDLEY_GCC_VERSION_CHECK)\n#  undef HEDLEY_GCC_VERSION_CHECK\n#endif\n#if defined(HEDLEY_GCC_VERSION)\n#  define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n#else\n#  define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0)\n#endif\n\n#if defined(HEDLEY_HAS_ATTRIBUTE)\n#  undef HEDLEY_HAS_ATTRIBUTE\n#endif\n#if \\\n  defined(__has_attribute) && \\\n  ( \\\n    (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \\\n  )\n#  define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)\n#else\n#  define HEDLEY_HAS_ATTRIBUTE(attribute) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE)\n#  undef HEDLEY_GNUC_HAS_ATTRIBUTE\n#endif\n#if defined(__has_attribute)\n#  define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute)\n#else\n#  define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_ATTRIBUTE)\n#  undef HEDLEY_GCC_HAS_ATTRIBUTE\n#endif\n#if defined(__has_attribute)\n#  define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute)\n#else\n#  define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_HAS_CPP_ATTRIBUTE)\n#  undef HEDLEY_HAS_CPP_ATTRIBUTE\n#endif\n#if \\\n  defined(__has_cpp_attribute) && \\\n  defined(__cplusplus) && \\\n  (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0))\n#  define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute)\n#else\n#  define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0)\n#endif\n\n#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS)\n#  undef HEDLEY_HAS_CPP_ATTRIBUTE_NS\n#endif\n#if !defined(__cplusplus) || !defined(__has_cpp_attribute)\n#  define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)\n#elif \\\n  !defined(HEDLEY_PGI_VERSION) && \\\n  !defined(HEDLEY_IAR_VERSION) && \\\n  (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \\\n  (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n#  define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute)\n#else\n#  define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE)\n#  undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE\n#endif\n#if defined(__has_cpp_attribute) && defined(__cplusplus)\n#  define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)\n#else\n#  define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE)\n#  undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE\n#endif\n#if defined(__has_cpp_attribute) && defined(__cplusplus)\n#  define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)\n#else\n#  define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_HAS_BUILTIN)\n#  undef HEDLEY_HAS_BUILTIN\n#endif\n#if defined(__has_builtin)\n#  define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin)\n#else\n#  define HEDLEY_HAS_BUILTIN(builtin) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_BUILTIN)\n#  undef HEDLEY_GNUC_HAS_BUILTIN\n#endif\n#if defined(__has_builtin)\n#  define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)\n#else\n#  define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_BUILTIN)\n#  undef HEDLEY_GCC_HAS_BUILTIN\n#endif\n#if defined(__has_builtin)\n#  define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)\n#else\n#  define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_HAS_FEATURE)\n#  undef HEDLEY_HAS_FEATURE\n#endif\n#if defined(__has_feature)\n#  define HEDLEY_HAS_FEATURE(feature) __has_feature(feature)\n#else\n#  define HEDLEY_HAS_FEATURE(feature) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_FEATURE)\n#  undef HEDLEY_GNUC_HAS_FEATURE\n#endif\n#if defined(__has_feature)\n#  define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)\n#else\n#  define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_FEATURE)\n#  undef HEDLEY_GCC_HAS_FEATURE\n#endif\n#if defined(__has_feature)\n#  define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)\n#else\n#  define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_HAS_EXTENSION)\n#  undef HEDLEY_HAS_EXTENSION\n#endif\n#if defined(__has_extension)\n#  define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension)\n#else\n#  define HEDLEY_HAS_EXTENSION(extension) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_EXTENSION)\n#  undef HEDLEY_GNUC_HAS_EXTENSION\n#endif\n#if defined(__has_extension)\n#  define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)\n#else\n#  define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_EXTENSION)\n#  undef HEDLEY_GCC_HAS_EXTENSION\n#endif\n#if defined(__has_extension)\n#  define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)\n#else\n#  define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE)\n#  undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE\n#endif\n#if defined(__has_declspec_attribute)\n#  define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute)\n#else\n#  define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE)\n#  undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE\n#endif\n#if defined(__has_declspec_attribute)\n#  define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)\n#else\n#  define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE)\n#  undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE\n#endif\n#if defined(__has_declspec_attribute)\n#  define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)\n#else\n#  define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_HAS_WARNING)\n#  undef HEDLEY_HAS_WARNING\n#endif\n#if defined(__has_warning)\n#  define HEDLEY_HAS_WARNING(warning) __has_warning(warning)\n#else\n#  define HEDLEY_HAS_WARNING(warning) (0)\n#endif\n\n#if defined(HEDLEY_GNUC_HAS_WARNING)\n#  undef HEDLEY_GNUC_HAS_WARNING\n#endif\n#if defined(__has_warning)\n#  define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)\n#else\n#  define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_GCC_HAS_WARNING)\n#  undef HEDLEY_GCC_HAS_WARNING\n#endif\n#if defined(__has_warning)\n#  define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)\n#else\n#  define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if \\\n  (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \\\n  defined(__clang__) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,0,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_IAR_VERSION_CHECK(8,0,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(18,4,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \\\n  HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \\\n  (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR))\n#  define HEDLEY_PRAGMA(value) _Pragma(#value)\n#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0)\n#  define HEDLEY_PRAGMA(value) __pragma(value)\n#else\n#  define HEDLEY_PRAGMA(value)\n#endif\n\n#if defined(HEDLEY_DIAGNOSTIC_PUSH)\n#  undef HEDLEY_DIAGNOSTIC_PUSH\n#endif\n#if defined(HEDLEY_DIAGNOSTIC_POP)\n#  undef HEDLEY_DIAGNOSTIC_POP\n#endif\n#if defined(__clang__)\n#  define HEDLEY_DIAGNOSTIC_PUSH _Pragma(\"clang diagnostic push\")\n#  define HEDLEY_DIAGNOSTIC_POP _Pragma(\"clang diagnostic pop\")\n#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_DIAGNOSTIC_PUSH _Pragma(\"warning(push)\")\n#  define HEDLEY_DIAGNOSTIC_POP _Pragma(\"warning(pop)\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,6,0)\n#  define HEDLEY_DIAGNOSTIC_PUSH _Pragma(\"GCC diagnostic push\")\n#  define HEDLEY_DIAGNOSTIC_POP _Pragma(\"GCC diagnostic pop\")\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))\n#  define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))\n#elif HEDLEY_ARM_VERSION_CHECK(5,6,0)\n#  define HEDLEY_DIAGNOSTIC_PUSH _Pragma(\"push\")\n#  define HEDLEY_DIAGNOSTIC_POP _Pragma(\"pop\")\n#elif \\\n    HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n    HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n    HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \\\n    HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n    HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)\n#  define HEDLEY_DIAGNOSTIC_PUSH _Pragma(\"diag_push\")\n#  define HEDLEY_DIAGNOSTIC_POP _Pragma(\"diag_pop\")\n#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0)\n#  define HEDLEY_DIAGNOSTIC_PUSH _Pragma(\"warning(push)\")\n#  define HEDLEY_DIAGNOSTIC_POP _Pragma(\"warning(pop)\")\n#else\n#  define HEDLEY_DIAGNOSTIC_PUSH\n#  define HEDLEY_DIAGNOSTIC_POP\n#endif\n\n/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for\n   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */\n#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)\n#  undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_\n#endif\n#if defined(__cplusplus)\n#  if HEDLEY_HAS_WARNING(\"-Wc++98-compat\")\n#    if HEDLEY_HAS_WARNING(\"-Wc++17-extensions\")\n#      if HEDLEY_HAS_WARNING(\"-Wc++1z-extensions\")\n#        define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \\\n           HEDLEY_DIAGNOSTIC_PUSH \\\n           _Pragma(\"clang diagnostic ignored \\\"-Wc++98-compat\\\"\") \\\n           _Pragma(\"clang diagnostic ignored \\\"-Wc++17-extensions\\\"\") \\\n           _Pragma(\"clang diagnostic ignored \\\"-Wc++1z-extensions\\\"\") \\\n           xpr \\\n           HEDLEY_DIAGNOSTIC_POP\n#      else\n#        define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \\\n           HEDLEY_DIAGNOSTIC_PUSH \\\n           _Pragma(\"clang diagnostic ignored \\\"-Wc++98-compat\\\"\") \\\n           _Pragma(\"clang diagnostic ignored \\\"-Wc++17-extensions\\\"\") \\\n           xpr \\\n           HEDLEY_DIAGNOSTIC_POP\n#      endif\n#    else\n#      define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \\\n         HEDLEY_DIAGNOSTIC_PUSH \\\n         _Pragma(\"clang diagnostic ignored \\\"-Wc++98-compat\\\"\") \\\n         xpr \\\n         HEDLEY_DIAGNOSTIC_POP\n#    endif\n#  endif\n#endif\n#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x\n#endif\n\n#if defined(HEDLEY_CONST_CAST)\n#  undef HEDLEY_CONST_CAST\n#endif\n#if defined(__cplusplus)\n#  define HEDLEY_CONST_CAST(T, expr) (const_cast<T>(expr))\n#elif \\\n  HEDLEY_HAS_WARNING(\"-Wcast-qual\") || \\\n  HEDLEY_GCC_VERSION_CHECK(4,6,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \\\n      HEDLEY_DIAGNOSTIC_PUSH \\\n      HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \\\n      ((T) (expr)); \\\n      HEDLEY_DIAGNOSTIC_POP \\\n    }))\n#else\n#  define HEDLEY_CONST_CAST(T, expr) ((T) (expr))\n#endif\n\n#if defined(HEDLEY_REINTERPRET_CAST)\n#  undef HEDLEY_REINTERPRET_CAST\n#endif\n#if defined(__cplusplus)\n#  define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast<T>(expr))\n#else\n#  define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr))\n#endif\n\n#if defined(HEDLEY_STATIC_CAST)\n#  undef HEDLEY_STATIC_CAST\n#endif\n#if defined(__cplusplus)\n#  define HEDLEY_STATIC_CAST(T, expr) (static_cast<T>(expr))\n#else\n#  define HEDLEY_STATIC_CAST(T, expr) ((T) (expr))\n#endif\n\n#if defined(HEDLEY_CPP_CAST)\n#  undef HEDLEY_CPP_CAST\n#endif\n#if defined(__cplusplus)\n#  if HEDLEY_HAS_WARNING(\"-Wold-style-cast\")\n#    define HEDLEY_CPP_CAST(T, expr) \\\n       HEDLEY_DIAGNOSTIC_PUSH \\\n       _Pragma(\"clang diagnostic ignored \\\"-Wold-style-cast\\\"\") \\\n       ((T) (expr)) \\\n       HEDLEY_DIAGNOSTIC_POP\n#  elif HEDLEY_IAR_VERSION_CHECK(8,3,0)\n#    define HEDLEY_CPP_CAST(T, expr) \\\n       HEDLEY_DIAGNOSTIC_PUSH \\\n       _Pragma(\"diag_suppress=Pe137\") \\\n       HEDLEY_DIAGNOSTIC_POP\n#  else\n#    define HEDLEY_CPP_CAST(T, expr) ((T) (expr))\n#  endif\n#else\n#  define HEDLEY_CPP_CAST(T, expr) (expr)\n#endif\n\n#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)\n#  undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wdeprecated-declarations\")\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"clang diagnostic ignored \\\"-Wdeprecated-declarations\\\"\")\n#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"warning(disable:1478 1786)\")\n#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786))\n#elif HEDLEY_PGI_VERSION_CHECK(20,7,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"diag_suppress 1215,1216,1444,1445\")\n#elif HEDLEY_PGI_VERSION_CHECK(17,10,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"diag_suppress 1215,1444\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,3,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"GCC diagnostic ignored \\\"-Wdeprecated-declarations\\\"\")\n#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996))\n#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"diag_suppress 1215,1444\")\n#elif \\\n    HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n    (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n    HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n    (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n    HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n    (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n    HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n    (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n    HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n    HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"diag_suppress 1291,1718\")\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)\")\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"error_messages(off,symdeprecated,symdeprecated2)\")\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"diag_suppress=Pe1444,Pe1215\")\n#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma(\"warn(disable:2241)\")\n#else\n#  define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED\n#endif\n\n#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS)\n#  undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wunknown-pragmas\")\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"clang diagnostic ignored \\\"-Wunknown-pragmas\\\"\")\n#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"warning(disable:161)\")\n#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161))\n#elif HEDLEY_PGI_VERSION_CHECK(17,10,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"diag_suppress 1675\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,3,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"GCC diagnostic ignored \\\"-Wunknown-pragmas\\\"\")\n#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068))\n#elif \\\n    HEDLEY_TI_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \\\n    HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n    HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"diag_suppress 163\")\n#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"diag_suppress 163\")\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"diag_suppress=Pe161\")\n#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma(\"diag_suppress 161\")\n#else\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS\n#endif\n\n#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES)\n#  undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wunknown-attributes\")\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"clang diagnostic ignored \\\"-Wunknown-attributes\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,6,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"GCC diagnostic ignored \\\"-Wdeprecated-declarations\\\"\")\n#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"warning(disable:1292)\")\n#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292))\n#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030))\n#elif HEDLEY_PGI_VERSION_CHECK(20,7,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"diag_suppress 1097,1098\")\n#elif HEDLEY_PGI_VERSION_CHECK(17,10,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"diag_suppress 1097\")\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"error_messages(off,attrskipunsup)\")\n#elif \\\n    HEDLEY_TI_VERSION_CHECK(18,1,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \\\n    HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"diag_suppress 1173\")\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"diag_suppress=Pe1097\")\n#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma(\"diag_suppress 1097\")\n#else\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES\n#endif\n\n#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL)\n#  undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wcast-qual\")\n#  define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma(\"clang diagnostic ignored \\\"-Wcast-qual\\\"\")\n#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma(\"warning(disable:2203 2331)\")\n#elif HEDLEY_GCC_VERSION_CHECK(3,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma(\"GCC diagnostic ignored \\\"-Wcast-qual\\\"\")\n#else\n#  define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL\n#endif\n\n#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION)\n#  undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wunused-function\")\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma(\"clang diagnostic ignored \\\"-Wunused-function\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(3,4,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma(\"GCC diagnostic ignored \\\"-Wunused-function\\\"\")\n#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505))\n#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma(\"diag_suppress 3142\")\n#else\n#  define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION\n#endif\n\n#if defined(HEDLEY_DEPRECATED)\n#  undef HEDLEY_DEPRECATED\n#endif\n#if defined(HEDLEY_DEPRECATED_FOR)\n#  undef HEDLEY_DEPRECATED_FOR\n#endif\n#if \\\n  HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_DEPRECATED(since) __declspec(deprecated(\"Since \" # since))\n#  define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated(\"Since \" #since \"; use \" #replacement))\n#elif \\\n  (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,5,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(5,6,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(17,10,0) || \\\n  HEDLEY_TI_VERSION_CHECK(18,1,0) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__(\"Since \" #since)))\n#  define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__(\"Since \" #since \"; use \" #replacement)))\n#elif defined(__cplusplus) && (__cplusplus >= 201402L)\n#  define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated(\"Since \" #since)]])\n#  define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated(\"Since \" #since \"; use \" #replacement)]])\n#elif \\\n  HEDLEY_HAS_ATTRIBUTE(deprecated) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,1,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \\\n  HEDLEY_IAR_VERSION_CHECK(8,10,0)\n#  define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))\n#  define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__))\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \\\n  HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_DEPRECATED(since) __declspec(deprecated)\n#  define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_DEPRECATED(since) _Pragma(\"deprecated\")\n#  define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma(\"deprecated\")\n#else\n#  define HEDLEY_DEPRECATED(since)\n#  define HEDLEY_DEPRECATED_FOR(since, replacement)\n#endif\n\n#if defined(HEDLEY_UNAVAILABLE)\n#  undef HEDLEY_UNAVAILABLE\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(warning) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,3,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__(\"Not available until \" #available_since)))\n#else\n#  define HEDLEY_UNAVAILABLE(available_since)\n#endif\n\n#if defined(HEDLEY_WARN_UNUSED_RESULT)\n#  undef HEDLEY_WARN_UNUSED_RESULT\n#endif\n#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG)\n#  undef HEDLEY_WARN_UNUSED_RESULT_MSG\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \\\n  HEDLEY_PGI_VERSION_CHECK(17,10,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))\n#  define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__))\n#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)\n#  define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])\n#  define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])\n#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)\n#  define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])\n#  define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])\n#elif defined(_Check_return_) /* SAL */\n#  define HEDLEY_WARN_UNUSED_RESULT _Check_return_\n#  define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_\n#else\n#  define HEDLEY_WARN_UNUSED_RESULT\n#  define HEDLEY_WARN_UNUSED_RESULT_MSG(msg)\n#endif\n\n#if defined(HEDLEY_SENTINEL)\n#  undef HEDLEY_SENTINEL\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(sentinel) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,0,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(5,4,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position)))\n#else\n#  define HEDLEY_SENTINEL(position)\n#endif\n\n#if defined(HEDLEY_NO_RETURN)\n#  undef HEDLEY_NO_RETURN\n#endif\n#if HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_NO_RETURN __noreturn\n#elif \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_NO_RETURN __attribute__((__noreturn__))\n#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L\n#  define HEDLEY_NO_RETURN _Noreturn\n#elif defined(__cplusplus) && (__cplusplus >= 201103L)\n#  define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]])\n#elif \\\n  HEDLEY_HAS_ATTRIBUTE(noreturn) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,2,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_IAR_VERSION_CHECK(8,10,0)\n#  define HEDLEY_NO_RETURN __attribute__((__noreturn__))\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)\n#  define HEDLEY_NO_RETURN _Pragma(\"does_not_return\")\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_NO_RETURN __declspec(noreturn)\n#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)\n#  define HEDLEY_NO_RETURN _Pragma(\"FUNC_NEVER_RETURNS;\")\n#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)\n#  define HEDLEY_NO_RETURN __attribute((noreturn))\n#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0)\n#  define HEDLEY_NO_RETURN __declspec(noreturn)\n#else\n#  define HEDLEY_NO_RETURN\n#endif\n\n#if defined(HEDLEY_NO_ESCAPE)\n#  undef HEDLEY_NO_ESCAPE\n#endif\n#if HEDLEY_HAS_ATTRIBUTE(noescape)\n#  define HEDLEY_NO_ESCAPE __attribute__((__noescape__))\n#else\n#  define HEDLEY_NO_ESCAPE\n#endif\n\n#if defined(HEDLEY_UNREACHABLE)\n#  undef HEDLEY_UNREACHABLE\n#endif\n#if defined(HEDLEY_UNREACHABLE_RETURN)\n#  undef HEDLEY_UNREACHABLE_RETURN\n#endif\n#if defined(HEDLEY_ASSUME)\n#  undef HEDLEY_ASSUME\n#endif\n#if \\\n  HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_ASSUME(expr) __assume(expr)\n#elif HEDLEY_HAS_BUILTIN(__builtin_assume)\n#  define HEDLEY_ASSUME(expr) __builtin_assume(expr)\n#elif \\\n    HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)\n#  if defined(__cplusplus)\n#    define HEDLEY_ASSUME(expr) std::_nassert(expr)\n#  else\n#    define HEDLEY_ASSUME(expr) _nassert(expr)\n#  endif\n#endif\n#if \\\n  (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,5,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(18,10,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(13,1,5) || \\\n  HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_UNREACHABLE() __builtin_unreachable()\n#elif defined(HEDLEY_ASSUME)\n#  define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0)\n#endif\n#if !defined(HEDLEY_ASSUME)\n#  if defined(HEDLEY_UNREACHABLE)\n#    define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1)))\n#  else\n#    define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr)\n#  endif\n#endif\n#if defined(HEDLEY_UNREACHABLE)\n#  if  \\\n      HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \\\n      HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)\n#    define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value))\n#  else\n#    define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE()\n#  endif\n#else\n#  define HEDLEY_UNREACHABLE_RETURN(value) return (value)\n#endif\n#if !defined(HEDLEY_UNREACHABLE)\n#  define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0)\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\n#if HEDLEY_HAS_WARNING(\"-Wpedantic\")\n#  pragma clang diagnostic ignored \"-Wpedantic\"\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wc++98-compat-pedantic\") && defined(__cplusplus)\n#  pragma clang diagnostic ignored \"-Wc++98-compat-pedantic\"\n#endif\n#if HEDLEY_GCC_HAS_WARNING(\"-Wvariadic-macros\",4,0,0)\n#  if defined(__clang__)\n#    pragma clang diagnostic ignored \"-Wvariadic-macros\"\n#  elif defined(HEDLEY_GCC_VERSION)\n#    pragma GCC diagnostic ignored \"-Wvariadic-macros\"\n#  endif\n#endif\n#if defined(HEDLEY_NON_NULL)\n#  undef HEDLEY_NON_NULL\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(nonnull) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0)\n#  define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))\n#else\n#  define HEDLEY_NON_NULL(...)\n#endif\nHEDLEY_DIAGNOSTIC_POP\n\n#if defined(HEDLEY_PRINTF_FORMAT)\n#  undef HEDLEY_PRINTF_FORMAT\n#endif\n#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO)\n#  define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check)))\n#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO)\n#  define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check)))\n#elif \\\n  HEDLEY_HAS_ATTRIBUTE(format) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,1,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(5,6,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check)))\n#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0)\n#  define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check))\n#else\n#  define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check)\n#endif\n\n#if defined(HEDLEY_CONSTEXPR)\n#  undef HEDLEY_CONSTEXPR\n#endif\n#if defined(__cplusplus)\n#  if __cplusplus >= 201103L\n#    define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr)\n#  endif\n#endif\n#if !defined(HEDLEY_CONSTEXPR)\n#  define HEDLEY_CONSTEXPR\n#endif\n\n#if defined(HEDLEY_PREDICT)\n#  undef HEDLEY_PREDICT\n#endif\n#if defined(HEDLEY_LIKELY)\n#  undef HEDLEY_LIKELY\n#endif\n#if defined(HEDLEY_UNLIKELY)\n#  undef HEDLEY_UNLIKELY\n#endif\n#if defined(HEDLEY_UNPREDICTABLE)\n#  undef HEDLEY_UNPREDICTABLE\n#endif\n#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable)\n#  define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))\n#endif\n#if \\\n  (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \\\n  HEDLEY_GCC_VERSION_CHECK(9,0,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability(  (expr), (value), (probability))\n#  define HEDLEY_PREDICT_TRUE(expr, probability)   __builtin_expect_with_probability(!!(expr),    1   , (probability))\n#  define HEDLEY_PREDICT_FALSE(expr, probability)  __builtin_expect_with_probability(!!(expr),    0   , (probability))\n#  define HEDLEY_LIKELY(expr)                      __builtin_expect                 (!!(expr),    1                  )\n#  define HEDLEY_UNLIKELY(expr)                    __builtin_expect                 (!!(expr),    0                  )\n#elif \\\n  (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,0,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \\\n  HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_PREDICT(expr, expected, probability) \\\n     (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr)))\n#  define HEDLEY_PREDICT_TRUE(expr, probability) \\\n     (__extension__ ({ \\\n       double hedley_probability_ = (probability); \\\n       ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \\\n     }))\n#  define HEDLEY_PREDICT_FALSE(expr, probability) \\\n     (__extension__ ({ \\\n       double hedley_probability_ = (probability); \\\n       ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \\\n     }))\n#  define HEDLEY_LIKELY(expr)   __builtin_expect(!!(expr), 1)\n#  define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)\n#else\n#  define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr))\n#  define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr))\n#  define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr))\n#  define HEDLEY_LIKELY(expr) (!!(expr))\n#  define HEDLEY_UNLIKELY(expr) (!!(expr))\n#endif\n#if !defined(HEDLEY_UNPREDICTABLE)\n#  define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5)\n#endif\n\n#if defined(HEDLEY_MALLOC)\n#  undef HEDLEY_MALLOC\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(malloc) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,1,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(12,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_MALLOC __attribute__((__malloc__))\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)\n#  define HEDLEY_MALLOC _Pragma(\"returns_new_memory\")\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_MALLOC __declspec(restrict)\n#else\n#  define HEDLEY_MALLOC\n#endif\n\n#if defined(HEDLEY_PURE)\n#  undef HEDLEY_PURE\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(pure) || \\\n  HEDLEY_GCC_VERSION_CHECK(2,96,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(17,10,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_PURE __attribute__((__pure__))\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)\n#  define HEDLEY_PURE _Pragma(\"does_not_write_global_data\")\n#elif defined(__cplusplus) && \\\n    ( \\\n      HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \\\n      HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \\\n      HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \\\n    )\n#  define HEDLEY_PURE _Pragma(\"FUNC_IS_PURE;\")\n#else\n#  define HEDLEY_PURE\n#endif\n\n#if defined(HEDLEY_CONST)\n#  undef HEDLEY_CONST\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(const) || \\\n  HEDLEY_GCC_VERSION_CHECK(2,5,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(17,10,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_CONST __attribute__((__const__))\n#elif \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)\n#  define HEDLEY_CONST _Pragma(\"no_side_effect\")\n#else\n#  define HEDLEY_CONST HEDLEY_PURE\n#endif\n\n#if defined(HEDLEY_RESTRICT)\n#  undef HEDLEY_RESTRICT\n#endif\n#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus)\n#  define HEDLEY_RESTRICT restrict\n#elif \\\n  HEDLEY_GCC_VERSION_CHECK(3,1,0) || \\\n  HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(17,10,0) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \\\n  HEDLEY_IAR_VERSION_CHECK(8,0,0) || \\\n  defined(__clang__) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_RESTRICT __restrict\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus)\n#  define HEDLEY_RESTRICT _Restrict\n#else\n#  define HEDLEY_RESTRICT\n#endif\n\n#if defined(HEDLEY_INLINE)\n#  undef HEDLEY_INLINE\n#endif\n#if \\\n  (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \\\n  (defined(__cplusplus) && (__cplusplus >= 199711L))\n#  define HEDLEY_INLINE inline\n#elif \\\n  defined(HEDLEY_GCC_VERSION) || \\\n  HEDLEY_ARM_VERSION_CHECK(6,2,0)\n#  define HEDLEY_INLINE __inline__\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_INLINE __inline\n#else\n#  define HEDLEY_INLINE\n#endif\n\n#if defined(HEDLEY_ALWAYS_INLINE)\n#  undef HEDLEY_ALWAYS_INLINE\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(always_inline) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,0,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \\\n  HEDLEY_IAR_VERSION_CHECK(8,10,0)\n#  define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_ALWAYS_INLINE __forceinline\n#elif defined(__cplusplus) && \\\n    ( \\\n      HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n      HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n      HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n      HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \\\n      HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n      HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \\\n    )\n#  define HEDLEY_ALWAYS_INLINE _Pragma(\"FUNC_ALWAYS_INLINE;\")\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_ALWAYS_INLINE _Pragma(\"inline=forced\")\n#else\n#  define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE\n#endif\n\n#if defined(HEDLEY_NEVER_INLINE)\n#  undef HEDLEY_NEVER_INLINE\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(noinline) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,0,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(10,1,0) || \\\n  HEDLEY_TI_VERSION_CHECK(15,12,0) || \\\n  (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \\\n  (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \\\n  (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \\\n  (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \\\n  HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n  HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \\\n  HEDLEY_IAR_VERSION_CHECK(8,10,0)\n#  define HEDLEY_NEVER_INLINE __attribute__((__noinline__))\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_NEVER_INLINE __declspec(noinline)\n#elif HEDLEY_PGI_VERSION_CHECK(10,2,0)\n#  define HEDLEY_NEVER_INLINE _Pragma(\"noinline\")\n#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)\n#  define HEDLEY_NEVER_INLINE _Pragma(\"FUNC_CANNOT_INLINE;\")\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_NEVER_INLINE _Pragma(\"inline=never\")\n#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)\n#  define HEDLEY_NEVER_INLINE __attribute((noinline))\n#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0)\n#  define HEDLEY_NEVER_INLINE __declspec(noinline)\n#else\n#  define HEDLEY_NEVER_INLINE\n#endif\n\n#if defined(HEDLEY_PRIVATE)\n#  undef HEDLEY_PRIVATE\n#endif\n#if defined(HEDLEY_PUBLIC)\n#  undef HEDLEY_PUBLIC\n#endif\n#if defined(HEDLEY_IMPORT)\n#  undef HEDLEY_IMPORT\n#endif\n#if defined(_WIN32) || defined(__CYGWIN__)\n#  define HEDLEY_PRIVATE\n#  define HEDLEY_PUBLIC   __declspec(dllexport)\n#  define HEDLEY_IMPORT   __declspec(dllimport)\n#else\n#  if \\\n    HEDLEY_HAS_ATTRIBUTE(visibility) || \\\n    HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n    HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n    HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n    HEDLEY_IBM_VERSION_CHECK(13,1,0) || \\\n    ( \\\n      defined(__TI_EABI__) && \\\n      ( \\\n        (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \\\n        HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \\\n      ) \\\n    ) || \\\n    HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#    define HEDLEY_PRIVATE __attribute__((__visibility__(\"hidden\")))\n#    define HEDLEY_PUBLIC  __attribute__((__visibility__(\"default\")))\n#  else\n#    define HEDLEY_PRIVATE\n#    define HEDLEY_PUBLIC\n#  endif\n#  define HEDLEY_IMPORT    extern\n#endif\n\n#if defined(HEDLEY_NO_THROW)\n#  undef HEDLEY_NO_THROW\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(nothrow) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_NO_THROW __attribute__((__nothrow__))\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0)\n#  define HEDLEY_NO_THROW __declspec(nothrow)\n#else\n#  define HEDLEY_NO_THROW\n#endif\n\n#if defined(HEDLEY_FALL_THROUGH)\n# undef HEDLEY_FALL_THROUGH\n#endif\n#if defined(HEDLEY_INTEL_VERSION)\n#  define HEDLEY_FALL_THROUGH\n#elif \\\n  HEDLEY_HAS_ATTRIBUTE(fallthrough) || \\\n  HEDLEY_GCC_VERSION_CHECK(7,0,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))\n#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough)\n#  define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]])\n#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough)\n#  define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]])\n#elif defined(__fallthrough) /* SAL */\n#  define HEDLEY_FALL_THROUGH __fallthrough\n#else\n#  define HEDLEY_FALL_THROUGH\n#endif\n\n#if defined(HEDLEY_RETURNS_NON_NULL)\n#  undef HEDLEY_RETURNS_NON_NULL\n#endif\n#if \\\n  HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \\\n  HEDLEY_GCC_VERSION_CHECK(4,9,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__))\n#elif defined(_Ret_notnull_) /* SAL */\n#  define HEDLEY_RETURNS_NON_NULL _Ret_notnull_\n#else\n#  define HEDLEY_RETURNS_NON_NULL\n#endif\n\n#if defined(HEDLEY_ARRAY_PARAM)\n#  undef HEDLEY_ARRAY_PARAM\n#endif\n#if \\\n  defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \\\n  !defined(__STDC_NO_VLA__) && \\\n  !defined(__cplusplus) && \\\n  !defined(HEDLEY_PGI_VERSION) && \\\n  !defined(HEDLEY_TINYC_VERSION)\n#  define HEDLEY_ARRAY_PARAM(name) (name)\n#else\n#  define HEDLEY_ARRAY_PARAM(name)\n#endif\n\n#if defined(HEDLEY_IS_CONSTANT)\n#  undef HEDLEY_IS_CONSTANT\n#endif\n#if defined(HEDLEY_REQUIRE_CONSTEXPR)\n#  undef HEDLEY_REQUIRE_CONSTEXPR\n#endif\n/* HEDLEY_IS_CONSTEXPR_ is for\n   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */\n#if defined(HEDLEY_IS_CONSTEXPR_)\n#  undef HEDLEY_IS_CONSTEXPR_\n#endif\n#if \\\n  HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \\\n  HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(13,1,0) || \\\n  HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \\\n  (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \\\n  HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \\\n  HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#  define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr)\n#endif\n#if !defined(__cplusplus)\n#  if \\\n       HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \\\n       HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n       HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n       HEDLEY_IBM_VERSION_CHECK(13,1,0) || \\\n       HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \\\n       HEDLEY_ARM_VERSION_CHECK(5,4,0) || \\\n       HEDLEY_TINYC_VERSION_CHECK(0,9,24)\n#    if defined(__INTPTR_TYPE__)\n#      define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*)\n#    else\n#      include <stdint.h>\n#      define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*)\n#    endif\n#  elif \\\n       ( \\\n          defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \\\n          !defined(HEDLEY_SUNPRO_VERSION) && \\\n          !defined(HEDLEY_PGI_VERSION) && \\\n          !defined(HEDLEY_IAR_VERSION)) || \\\n       (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \\\n       HEDLEY_GCC_VERSION_CHECK(4,9,0) || \\\n       HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \\\n       HEDLEY_IBM_VERSION_CHECK(12,1,0) || \\\n       HEDLEY_ARM_VERSION_CHECK(5,3,0)\n#    if defined(__INTPTR_TYPE__)\n#      define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0)\n#    else\n#      include <stdint.h>\n#      define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0)\n#    endif\n#  elif \\\n       defined(HEDLEY_GCC_VERSION) || \\\n       defined(HEDLEY_INTEL_VERSION) || \\\n       defined(HEDLEY_TINYC_VERSION) || \\\n       defined(HEDLEY_TI_ARMCL_VERSION) || \\\n       HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \\\n       defined(HEDLEY_TI_CL2000_VERSION) || \\\n       defined(HEDLEY_TI_CL6X_VERSION) || \\\n       defined(HEDLEY_TI_CL7X_VERSION) || \\\n       defined(HEDLEY_TI_CLPRU_VERSION) || \\\n       defined(__clang__)\n#    define HEDLEY_IS_CONSTEXPR_(expr) ( \\\n         sizeof(void) != \\\n         sizeof(*( \\\n           1 ? \\\n             ((void*) ((expr) * 0L) ) : \\\n             ((struct { char v[sizeof(void) * 2]; } *) 1) \\\n           ) \\\n         ) \\\n       )\n#  endif\n#endif\n#if defined(HEDLEY_IS_CONSTEXPR_)\n#  if !defined(HEDLEY_IS_CONSTANT)\n#    define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr)\n#  endif\n#  define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1))\n#else\n#  if !defined(HEDLEY_IS_CONSTANT)\n#    define HEDLEY_IS_CONSTANT(expr) (0)\n#  endif\n#  define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr)\n#endif\n\n#if defined(HEDLEY_BEGIN_C_DECLS)\n#  undef HEDLEY_BEGIN_C_DECLS\n#endif\n#if defined(HEDLEY_END_C_DECLS)\n#  undef HEDLEY_END_C_DECLS\n#endif\n#if defined(HEDLEY_C_DECL)\n#  undef HEDLEY_C_DECL\n#endif\n#if defined(__cplusplus)\n#  define HEDLEY_BEGIN_C_DECLS extern \"C\" {\n#  define HEDLEY_END_C_DECLS }\n#  define HEDLEY_C_DECL extern \"C\"\n#else\n#  define HEDLEY_BEGIN_C_DECLS\n#  define HEDLEY_END_C_DECLS\n#  define HEDLEY_C_DECL\n#endif\n\n#if defined(HEDLEY_STATIC_ASSERT)\n#  undef HEDLEY_STATIC_ASSERT\n#endif\n#if \\\n  !defined(__cplusplus) && ( \\\n      (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \\\n      (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \\\n      HEDLEY_GCC_VERSION_CHECK(6,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      defined(_Static_assert) \\\n    )\n#  define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)\n#elif \\\n  (defined(__cplusplus) && (__cplusplus >= 201103L)) || \\\n  HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message))\n#else\n#  define HEDLEY_STATIC_ASSERT(expr, message)\n#endif\n\n#if defined(HEDLEY_NULL)\n#  undef HEDLEY_NULL\n#endif\n#if defined(__cplusplus)\n#  if __cplusplus >= 201103L\n#    define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr)\n#  elif defined(NULL)\n#    define HEDLEY_NULL NULL\n#  else\n#    define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0)\n#  endif\n#elif defined(NULL)\n#  define HEDLEY_NULL NULL\n#else\n#  define HEDLEY_NULL ((void*) 0)\n#endif\n\n#if defined(HEDLEY_MESSAGE)\n#  undef HEDLEY_MESSAGE\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wunknown-pragmas\")\n#  define HEDLEY_MESSAGE(msg) \\\n  HEDLEY_DIAGNOSTIC_PUSH \\\n  HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \\\n  HEDLEY_PRAGMA(message msg) \\\n  HEDLEY_DIAGNOSTIC_POP\n#elif \\\n  HEDLEY_GCC_VERSION_CHECK(4,4,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg)\n#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0)\n#  define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg)\n#elif HEDLEY_IAR_VERSION_CHECK(8,0,0)\n#  define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg))\n#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0)\n#  define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg))\n#else\n#  define HEDLEY_MESSAGE(msg)\n#endif\n\n#if defined(HEDLEY_WARNING)\n#  undef HEDLEY_WARNING\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wunknown-pragmas\")\n#  define HEDLEY_WARNING(msg) \\\n  HEDLEY_DIAGNOSTIC_PUSH \\\n  HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \\\n  HEDLEY_PRAGMA(clang warning msg) \\\n  HEDLEY_DIAGNOSTIC_POP\n#elif \\\n  HEDLEY_GCC_VERSION_CHECK(4,8,0) || \\\n  HEDLEY_PGI_VERSION_CHECK(18,4,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n#  define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg)\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg))\n#else\n#  define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg)\n#endif\n\n#if defined(HEDLEY_REQUIRE)\n#  undef HEDLEY_REQUIRE\n#endif\n#if defined(HEDLEY_REQUIRE_MSG)\n#  undef HEDLEY_REQUIRE_MSG\n#endif\n#if HEDLEY_HAS_ATTRIBUTE(diagnose_if)\n#  if HEDLEY_HAS_WARNING(\"-Wgcc-compat\")\n#    define HEDLEY_REQUIRE(expr) \\\n       HEDLEY_DIAGNOSTIC_PUSH \\\n       _Pragma(\"clang diagnostic ignored \\\"-Wgcc-compat\\\"\") \\\n       __attribute__((diagnose_if(!(expr), #expr, \"error\"))) \\\n       HEDLEY_DIAGNOSTIC_POP\n#    define HEDLEY_REQUIRE_MSG(expr,msg) \\\n       HEDLEY_DIAGNOSTIC_PUSH \\\n       _Pragma(\"clang diagnostic ignored \\\"-Wgcc-compat\\\"\") \\\n       __attribute__((diagnose_if(!(expr), msg, \"error\"))) \\\n       HEDLEY_DIAGNOSTIC_POP\n#  else\n#    define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, \"error\")))\n#    define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, \"error\")))\n#  endif\n#else\n#  define HEDLEY_REQUIRE(expr)\n#  define HEDLEY_REQUIRE_MSG(expr,msg)\n#endif\n\n#if defined(HEDLEY_FLAGS)\n#  undef HEDLEY_FLAGS\n#endif\n#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING(\"-Wbitfield-enum-conversion\"))\n#  define HEDLEY_FLAGS __attribute__((__flag_enum__))\n#else\n#  define HEDLEY_FLAGS\n#endif\n\n#if defined(HEDLEY_FLAGS_CAST)\n#  undef HEDLEY_FLAGS_CAST\n#endif\n#if HEDLEY_INTEL_VERSION_CHECK(19,0,0)\n#  define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \\\n  HEDLEY_DIAGNOSTIC_PUSH \\\n      _Pragma(\"warning(disable:188)\") \\\n      ((T) (expr)); \\\n      HEDLEY_DIAGNOSTIC_POP \\\n    }))\n#else\n#  define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr)\n#endif\n\n#if defined(HEDLEY_EMPTY_BASES)\n#  undef HEDLEY_EMPTY_BASES\n#endif\n#if \\\n  (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \\\n  HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)\n#  define HEDLEY_EMPTY_BASES __declspec(empty_bases)\n#else\n#  define HEDLEY_EMPTY_BASES\n#endif\n\n/* Remaining macros are deprecated. */\n\n#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK)\n#  undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK\n#endif\n#if defined(__clang__)\n#  define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0)\n#else\n#  define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch)\n#endif\n\n#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE)\n#  undef HEDLEY_CLANG_HAS_ATTRIBUTE\n#endif\n#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute)\n\n#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE)\n#  undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE\n#endif\n#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute)\n\n#if defined(HEDLEY_CLANG_HAS_BUILTIN)\n#  undef HEDLEY_CLANG_HAS_BUILTIN\n#endif\n#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin)\n\n#if defined(HEDLEY_CLANG_HAS_FEATURE)\n#  undef HEDLEY_CLANG_HAS_FEATURE\n#endif\n#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature)\n\n#if defined(HEDLEY_CLANG_HAS_EXTENSION)\n#  undef HEDLEY_CLANG_HAS_EXTENSION\n#endif\n#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension)\n\n#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE)\n#  undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE\n#endif\n#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute)\n\n#if defined(HEDLEY_CLANG_HAS_WARNING)\n#  undef HEDLEY_CLANG_HAS_WARNING\n#endif\n#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning)\n\n#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-aes.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2023      Yi-Yen Chung <eric681@andestech.com> (Copyright owned by Andes Technology)\n */\n\n#if !defined(SIMDE_AES_H)\n#define SIMDE_AES_H\n\n#include \"simde-features.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\n\n#if !(defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_AES) && \\\n      defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))\n\n/*\n * Number of columns (32-bit words) comprising the State. For this\n * standard, Nb = 4.\n */\n#define simde_x_aes_Nb 4\n\nstatic uint8_t simde_x_aes_gmult_lookup_table[8][256] = {\n{ // gmult(0x02, b);\n  0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,\n  0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,\n  0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,\n  0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,\n  0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,\n  0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,\n  0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,\n  0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,\n  0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,\n  0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,\n  0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,\n  0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,\n  0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,\n  0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,\n  0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,\n  0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5\n},\n{ // gmult(0x01, b);\n  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,\n  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,\n  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,\n  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,\n  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,\n  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,\n  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,\n  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,\n  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,\n  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,\n  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,\n  0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,\n  0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,\n  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,\n  0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,\n  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,\n},\n{ // gmult(0x01, b);\n  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,\n  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,\n  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,\n  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,\n  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,\n  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,\n  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,\n  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,\n  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,\n  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,\n  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,\n  0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,\n  0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,\n  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,\n  0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,\n  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,\n},\n{ // gmult(0x03, b);\n  0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,\n  0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,\n  0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,\n  0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,\n  0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,\n  0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,\n  0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,\n  0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,\n  0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,\n  0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,\n  0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,\n  0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,\n  0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,\n  0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,\n  0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,\n  0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,\n},\n{ // gmult(0x0e, b);\n  0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,\n  0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,\n  0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,\n  0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,\n  0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,\n  0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,\n  0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,\n  0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,\n  0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,\n  0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,\n  0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,\n  0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,\n  0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,\n  0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,\n  0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,\n  0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,\n},\n{ // gmult(0x09, b);\n  0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,\n  0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,\n  0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,\n  0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,\n  0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,\n  0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,\n  0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,\n  0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,\n  0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,\n  0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,\n  0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,\n  0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,\n  0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,\n  0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,\n  0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,\n  0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,\n\n},\n{ // gmult(0x0d, b);\n  0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,\n  0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,\n  0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,\n  0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,\n  0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,\n  0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,\n  0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,\n  0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,\n  0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,\n  0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,\n  0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,\n  0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,\n  0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,\n  0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,\n  0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,\n  0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,\n},\n{ // gmult(0x0b, b);\n  0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,\n  0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,\n  0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,\n  0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,\n  0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,\n  0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,\n  0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,\n  0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,\n  0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,\n  0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,\n  0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,\n  0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,\n  0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,\n  0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,\n  0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,\n  0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,\n}\n};\n\n/*\n * S-box transformation table\n */\nstatic uint8_t simde_x_aes_s_box[256] = {\n  // 0     1     2     3     4     5     6     7     8     9     a     b     c     d     e     f\n  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, // 0\n  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, // 1\n  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, // 2\n  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, // 3\n  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, // 4\n  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, // 5\n  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, // 6\n  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, // 7\n  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, // 8\n  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, // 9\n  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, // a\n  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, // b\n  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, // c\n  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, // d\n  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, // e\n  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};// f\n\n/*\n * Inverse S-box transformation table\n */\nstatic uint8_t simde_x_aes_inv_s_box[256] = {\n  // 0     1     2     3     4     5     6     7     8     9     a     b     c     d     e     f\n  0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, // 0\n  0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, // 1\n  0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, // 2\n  0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, // 3\n  0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, // 4\n  0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, // 5\n  0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, // 6\n  0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, // 7\n  0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, // 8\n  0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, // 9\n  0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, // a\n  0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, // b\n  0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, // c\n  0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, // d\n  0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, // e\n  0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d};// f\n\n/*\n * Multiplication of 4 byte words\n * m(x) = x4+1\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid coef_mult(uint8_t *a, uint8_t *b, uint8_t *d) {\n\n  d[0] = gmult(a[0],b[0])^gmult(a[3],b[1])^gmult(a[2],b[2])^gmult(a[1],b[3]);\n  d[1] = gmult(a[1],b[0])^gmult(a[0],b[1])^gmult(a[3],b[2])^gmult(a[2],b[3]);\n  d[2] = gmult(a[2],b[0])^gmult(a[1],b[1])^gmult(a[0],b[2])^gmult(a[3],b[3]);\n  d[3] = gmult(a[3],b[0])^gmult(a[2],b[1])^gmult(a[1],b[2])^gmult(a[0],b[3]);\n}\n*/\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_coef_mult_lookup(int lookup_table_offset, uint8_t *b, uint8_t *d) {\n  int o = lookup_table_offset;\n\n  #define gmultl(o,b) simde_x_aes_gmult_lookup_table[o][b]\n  d[0] = gmultl(o+0,b[0])^gmultl(o+3,b[1])^gmultl(o+2,b[2])^gmultl(o+1,b[3]);\n  d[1] = gmultl(o+1,b[0])^gmultl(o+0,b[1])^gmultl(o+3,b[2])^gmultl(o+2,b[3]);\n  d[2] = gmultl(o+2,b[0])^gmultl(o+1,b[1])^gmultl(o+0,b[2])^gmultl(o+3,b[3]);\n  d[3] = gmultl(o+3,b[0])^gmultl(o+2,b[1])^gmultl(o+1,b[2])^gmultl(o+0,b[3]);\n  #undef gmultl\n}\n\n#endif\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_AES_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-align.h",
    "content": "/* Alignment\n * Created by Evan Nemerson <evan@nemerson.com>\n *\n *   To the extent possible under law, the authors have waived all\n *   copyright and related or neighboring rights to this code.  For\n *   details, see the Creative Commons Zero 1.0 Universal license at\n *   <https://creativecommons.org/publicdomain/zero/1.0/>\n *\n * SPDX-License-Identifier: CC0-1.0\n *\n **********************************************************************\n *\n * This is portability layer which should help iron out some\n * differences across various compilers, as well as various versions of\n * C and C++.\n *\n * It was originally developed for SIMD Everywhere\n * (<https://github.com/simd-everywhere/simde>), but since its only\n * dependency is Hedley (<https://nemequ.github.io/hedley>, also CC0)\n * it can easily be used in other projects, so please feel free to do\n * so.\n *\n * If you do use this in your project, please keep a link to SIMDe in\n * your code to remind you where to report any bugs and/or check for\n * updated versions.\n *\n * # API Overview\n *\n * The API has several parts, and most macros have a few variations.\n * There are APIs for declaring aligned fields/variables, optimization\n * hints, and run-time alignment checks.\n *\n * Briefly, macros ending with \"_TO\" take numeric values and are great\n * when you know the value you would like to use.  Macros ending with\n * \"_LIKE\", on the other hand, accept a type and are used when you want\n * to use the alignment of a type instead of hardcoding a value.\n *\n * Documentation for each section of the API is inline.\n *\n * True to form, MSVC is the main problem and imposes several\n * limitations on the effectiveness of the APIs.  Detailed descriptions\n * of the limitations of each macro are inline, but in general:\n *\n *  * On C11+ or C++11+ code written using this API will work.  The\n *    ASSUME macros may or may not generate a hint to the compiler, but\n *    that is only an optimization issue and will not actually cause\n *    failures.\n *  * If you're using pretty much any compiler other than MSVC,\n *    everything should basically work as well as in C11/C++11.\n */\n\n#if !defined(SIMDE_ALIGN_H)\n#define SIMDE_ALIGN_H\n\n#include \"hedley.h\"\n\n/* I know this seems a little silly, but some non-hosted compilers\n * don't have stddef.h, so we try to accommodate them. */\n#if !defined(SIMDE_ALIGN_SIZE_T_)\n  #if defined(__SIZE_TYPE__)\n    #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__\n  #elif defined(__SIZE_T_TYPE__)\n    #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__\n  #elif defined(__cplusplus)\n    #include <cstddef>\n    #define SIMDE_ALIGN_SIZE_T_ size_t\n  #else\n    #include <stddef.h>\n    #define SIMDE_ALIGN_SIZE_T_ size_t\n  #endif\n#endif\n\n#if !defined(SIMDE_ALIGN_INTPTR_T_)\n  #if defined(__INTPTR_TYPE__)\n    #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__\n  #elif defined(__PTRDIFF_TYPE__)\n    #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__\n  #elif defined(__PTRDIFF_T_TYPE__)\n    #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__\n  #elif defined(__cplusplus)\n    #include <cstddef>\n    #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t\n  #else\n    #include <stddef.h>\n    #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t\n  #endif\n#endif\n\n#if defined(SIMDE_ALIGN_DEBUG)\n  #if defined(__cplusplus)\n    #include <cstdio>\n  #else\n    #include <stdio.h>\n  #endif\n#endif\n\n/* SIMDE_ALIGN_OF(Type)\n *\n * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or\n * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler.\n * It isn't defined everywhere (only when the compiler has some alignof-\n * like feature we can use to implement it), but it should work in most\n * modern compilers, as well as C11 and C++11.\n *\n * If we can't find an implementation for SIMDE_ALIGN_OF then the macro\n * will not be defined, so if you can handle that situation sensibly\n * you may need to sprinkle some ifdefs into your code.\n */\n#if \\\n    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \\\n    (0 && HEDLEY_HAS_FEATURE(c_alignof))\n  #define SIMDE_ALIGN_OF(Type) _Alignof(Type)\n#elif \\\n    (defined(__cplusplus) && (__cplusplus >= 201103L)) || \\\n    (0 && HEDLEY_HAS_FEATURE(cxx_alignof))\n  #define SIMDE_ALIGN_OF(Type) alignof(Type)\n#elif \\\n    HEDLEY_GCC_VERSION_CHECK(2,95,0) || \\\n    HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n    HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \\\n    HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \\\n    HEDLEY_PGI_VERSION_CHECK(19,10,0) || \\\n    HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \\\n    HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \\\n    HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n    HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \\\n    HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \\\n    defined(__IBM__ALIGNOF__) || \\\n    defined(__clang__)\n  #define SIMDE_ALIGN_OF(Type) __alignof__(Type)\n#elif \\\n  HEDLEY_IAR_VERSION_CHECK(8,40,0)\n  #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type)\n#elif \\\n  HEDLEY_MSVC_VERSION_CHECK(19,0,0)\n  /* Probably goes back much further, but MS takes down their old docs.\n   * If you can verify that this works in earlier versions please let\n   * me know! */\n  #define SIMDE_ALIGN_OF(Type) __alignof(Type)\n#endif\n\n/* SIMDE_ALIGN_MAXIMUM:\n *\n * This is the maximum alignment that the compiler supports.  You can\n * define the value prior to including SIMDe if necessary, but in that\n * case *please* submit an issue so we can add the platform to the\n * detection code.\n *\n * Most compilers are okay with types which are aligned beyond what\n * they think is the maximum, as long as the alignment is a power\n * of two.  Older versions of MSVC is the exception, so we need to cap\n * the alignment requests at values that the implementation supports.\n *\n * XL C/C++ will accept values larger than 16 (which is the alignment\n * of an AltiVec vector), but will not reliably align to the larger\n * value, so so we cap the value at 16 there.\n *\n * If the compiler accepts any power-of-two value within reason then\n * this macro should be left undefined, and the SIMDE_ALIGN_CAP\n * macro will just return the value passed to it. */\n#if !defined(SIMDE_ALIGN_MAXIMUM)\n  #if defined(HEDLEY_MSVC_VERSION)\n    #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0)\n      // Visual studio 2017 and newer does not need a max\n    #else\n      #if defined(_M_IX86) || defined(_M_AMD64)\n        #if HEDLEY_MSVC_VERSION_CHECK(19,14,0)\n          #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64\n        #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0)\n          /* VS 2010 is really a guess based on Wikipedia; if anyone can\n           * test with old VS versions I'd really appreciate it. */\n          #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32\n        #else\n          #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16\n        #endif\n      #elif defined(_M_ARM) || defined(_M_ARM64)\n        #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8\n      #endif\n    #endif\n  #elif defined(HEDLEY_IBM_VERSION)\n    #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16\n  #endif\n#endif\n\n/* You can mostly ignore these; they're intended for internal use.\n * If you do need to use them please let me know; if they fulfill\n * a common use case I'll probably drop the trailing underscore\n * and make them part of the public API. */\n#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM)\n  #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64\n    #define SIMDE_ALIGN_64_ 64\n    #define SIMDE_ALIGN_32_ 32\n    #define SIMDE_ALIGN_16_ 16\n    #define SIMDE_ALIGN_8_ 8\n  #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32\n    #define SIMDE_ALIGN_64_ 32\n    #define SIMDE_ALIGN_32_ 32\n    #define SIMDE_ALIGN_16_ 16\n    #define SIMDE_ALIGN_8_ 8\n  #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16\n    #define SIMDE_ALIGN_64_ 16\n    #define SIMDE_ALIGN_32_ 16\n    #define SIMDE_ALIGN_16_ 16\n    #define SIMDE_ALIGN_8_ 8\n  #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8\n    #define SIMDE_ALIGN_64_ 8\n    #define SIMDE_ALIGN_32_ 8\n    #define SIMDE_ALIGN_16_ 8\n    #define SIMDE_ALIGN_8_ 8\n  #else\n    #error Max alignment expected to be >= 8\n  #endif\n#else\n  #define SIMDE_ALIGN_64_ 64\n  #define SIMDE_ALIGN_32_ 32\n  #define SIMDE_ALIGN_16_ 16\n  #define SIMDE_ALIGN_8_ 8\n#endif\n\n/**\n * SIMDE_ALIGN_CAP(Alignment)\n *\n * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM.\n */\n#if defined(SIMDE_ALIGN_MAXIMUM)\n  #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM))\n#else\n  #define SIMDE_ALIGN_CAP(Alignment) (Alignment)\n#endif\n\n/* SIMDE_ALIGN_TO(Alignment)\n *\n * SIMDE_ALIGN_TO is used to declare types or variables.  It basically\n * maps to the align attribute in most compilers, the align declspec\n * in MSVC, or _Alignas/alignas in C11/C++11.\n *\n * Example:\n *\n *   struct i32x4 {\n *     SIMDE_ALIGN_TO(16) int32_t values[4];\n *   }\n *\n * Limitations:\n *\n * MSVC requires that the Alignment parameter be numeric; you can't do\n * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`.  This is\n * unfortunate because that's really how the LIKE macros are\n * implemented, and I am not aware of a way to get anything like this\n * to work without using the C11/C++11 keywords.\n *\n * It also means that we can't use SIMDE_ALIGN_CAP to limit the\n * alignment to the value specified, which MSVC also requires, so on\n * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead.\n * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would,\n * but should be safe to use on MSVC.\n *\n * All this is to say that, if you want your code to work on MSVC, you\n * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of\n * SIMDE_ALIGN_TO(8/16/32/64).\n */\n#if \\\n    HEDLEY_HAS_ATTRIBUTE(aligned) || \\\n    HEDLEY_GCC_VERSION_CHECK(2,95,0) || \\\n    HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \\\n    HEDLEY_IBM_VERSION_CHECK(11,1,0) || \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n    HEDLEY_PGI_VERSION_CHECK(19,4,0) || \\\n    HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n    HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \\\n    HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \\\n    HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n    HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \\\n    HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2)\n  #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment))))\n#elif \\\n    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))\n  #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment))\n#elif \\\n    (defined(__cplusplus) && (__cplusplus >= 201103L))\n  #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment))\n#elif \\\n    defined(HEDLEY_MSVC_VERSION)\n  #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment))\n  /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type)));\n   * the alignment passed to the declspec has to be an integer. */\n  #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE\n#endif\n#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_)\n#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_)\n#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_)\n#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_)\n\n/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment)\n *\n * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's\n * std::assume_aligned, or __builtin_assume_aligned.  It tells the\n * compiler to assume that the provided pointer is aligned to an\n * `Alignment`-byte boundary.\n *\n * If you define SIMDE_ALIGN_DEBUG prior to including this header then\n * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check.   We don't\n * integrate with NDEBUG in this header, but it may be a good idea to\n * put something like this in your code:\n *\n *   #if !defined(NDEBUG)\n *     #define SIMDE_ALIGN_DEBUG\n *   #endif\n *   #include <.../simde-align.h>\n */\n#if \\\n    HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \\\n    HEDLEY_GCC_VERSION_CHECK(4,7,0)\n  #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \\\n    HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment))\n#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n  #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \\\n      __typeof__(v) simde_assume_aligned_t_ = (Pointer); \\\n      __assume_aligned(simde_assume_aligned_t_, Alignment); \\\n      simde_assume_aligned_t_; \\\n    }))\n#elif defined(__cplusplus) && (__cplusplus > 201703L)\n  #include <memory>\n  #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned<Alignment>(Pointer)\n#else\n  #if defined(__cplusplus)\n    template<typename T> HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment)\n  #else\n    HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment)\n  #endif\n  {\n    HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0);\n    return ptr;\n  }\n  #if defined(__cplusplus)\n    #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment))\n  #else\n    #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment))\n  #endif\n#endif\n\n#if !defined(SIMDE_ALIGN_DEBUG)\n  #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)\n#else\n  #include <stdio.h>\n  #if defined(__cplusplus)\n    template<typename T>\n    static HEDLEY_ALWAYS_INLINE\n    T*\n    simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname)\n  #else\n    static HEDLEY_ALWAYS_INLINE\n    void*\n    simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname)\n  #endif\n  {\n    if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) {\n      fprintf(stderr, \"%s:%d: alignment check failed for `%s' (%p %% %u == %u)\\n\",\n        file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr),\n        HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)),\n        HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))));\n    }\n\n    return ptr;\n  }\n\n  #if defined(__cplusplus)\n    #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer)\n  #else\n    #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer)\n  #endif\n#endif\n\n/* SIMDE_ALIGN_LIKE(Type)\n * SIMDE_ALIGN_LIKE_#(Type)\n *\n * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros\n * except instead of an integer they take a type; basically, it's just\n * a more convenient way to do something like:\n *\n *   SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))\n *\n * The versions with a numeric suffix will fall back on using a numeric\n * value in the event we can't use SIMDE_ALIGN_OF(Type).  This is\n * mainly for MSVC, where __declspec(align()) can't handle anything\n * other than hard-coded numeric values.\n */\n#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE)\n  #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))\n  #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type)\n  #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type)\n  #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type)\n  #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type)\n#else\n  #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64\n  #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32\n  #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16\n  #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8\n#endif\n\n/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type)\n *\n * This is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a\n * type instead of a numeric value. */\n#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO)\n  #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type))\n#endif\n\n/* SIMDE_ALIGN_CAST(Type, Pointer)\n *\n * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try\n * to silence warnings that some compilers may produce if you try\n * to assign to a type with increased alignment requirements.\n *\n * Note that it does *not* actually attempt to tell the compiler that\n * the pointer is aligned like the destination should be; that's the\n * job of the next macro.  This macro is necessary for stupid APIs\n * like _mm_loadu_si128 where the input is a __m128i* but the function\n * is specifically for data which isn't necessarily aligned to\n * _Alignof(__m128i).\n */\n#if HEDLEY_HAS_WARNING(\"-Wcast-align\") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0)\n  #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \\\n      HEDLEY_DIAGNOSTIC_PUSH \\\n      _Pragma(\"GCC diagnostic ignored \\\"-Wcast-align\\\"\") \\\n      Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \\\n      HEDLEY_DIAGNOSTIC_POP \\\n      simde_r_; \\\n    }))\n#else\n  #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer)\n#endif\n\n/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer)\n *\n * This is sort of like a combination of a reinterpret_cast and a\n * SIMDE_ALIGN_ASSUME_LIKE.  It uses SIMDE_ALIGN_ASSUME_LIKE to tell\n * the compiler that the pointer is aligned like the specified type\n * and casts the pointer to the specified type while suppressing any\n * warnings from the compiler about casting to a type with greater\n * alignment requirements.\n */\n#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type)\n\n#endif /* !defined(SIMDE_ALIGN_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-arch.h",
    "content": "/* Architecture detection\n * Created by Evan Nemerson <evan@nemerson.com>\n *\n *   To the extent possible under law, the authors have waived all\n *   copyright and related or neighboring rights to this code.  For\n *   details, see the Creative Commons Zero 1.0 Universal license at\n *   <https://creativecommons.org/publicdomain/zero/1.0/>\n *\n * SPDX-License-Identifier: CC0-1.0\n *\n * Different compilers define different preprocessor macros for the\n * same architecture.  This is an attempt to provide a single\n * interface which is usable on any compiler.\n *\n * In general, a macro named SIMDE_ARCH_* is defined for each\n * architecture the CPU supports.  When there are multiple possible\n * versions, we try to define the macro to the target version.  For\n * example, if you want to check for i586+, you could do something\n * like:\n *\n *   #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)\n *   ...\n *   #endif\n *\n * You could also just check that SIMDE_ARCH_X86 >= 5 without checking\n * if it's defined first, but some compilers may emit a warning about\n * an undefined macro being used (e.g., GCC with -Wundef).\n *\n * This was originally created for SIMDe\n * <https://github.com/simd-everywhere/simde> (hence the prefix), but this\n * header has no dependencies and may be used anywhere.  It is\n * originally based on information from\n * <https://sourceforge.net/p/predef/wiki/Architectures/>, though it\n * has been enhanced with additional information.\n *\n * If you improve this file, or find a bug, please file the issue at\n * <https://github.com/simd-everywhere/simde/issues>.  If you copy this into\n * your project, even if you change the prefix, please keep the links\n * to SIMDe intact so others know where to report issues, submit\n * enhancements, and find the latest version. */\n\n#if !defined(SIMDE_ARCH_H)\n#define SIMDE_ARCH_H\n\n#include \"hedley.h\"\n\n/* Alpha\n   <https://en.wikipedia.org/wiki/DEC_Alpha> */\n#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)\n#  if defined(__alpha_ev6__)\n#    define SIMDE_ARCH_ALPHA 6\n#  elif defined(__alpha_ev5__)\n#    define SIMDE_ARCH_ALPHA 5\n#  elif defined(__alpha_ev4__)\n#    define SIMDE_ARCH_ALPHA 4\n#  else\n#    define SIMDE_ARCH_ALPHA 1\n#  endif\n#endif\n#if defined(SIMDE_ARCH_ALPHA)\n#  define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA)\n#else\n#  define SIMDE_ARCH_ALPHA_CHECK(version) (0)\n#endif\n\n/* Atmel AVR\n   <https://en.wikipedia.org/wiki/Atmel_AVR> */\n#if defined(__AVR_ARCH__)\n#  define SIMDE_ARCH_AVR __AVR_ARCH__\n#endif\n\n/* AMD64 / x86_64\n   <https://en.wikipedia.org/wiki/X86-64> */\n#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)\n#  if !defined(_M_ARM64EC)\n#     define SIMDE_ARCH_AMD64 1000\n#  endif\n#endif\n\n/* ARM\n   <https://en.wikipedia.org/wiki/ARM_architecture> */\n#if defined(__ARM_ARCH)\n#  if __ARM_ARCH > 100\n#    define SIMDE_ARCH_ARM (__ARM_ARCH)\n#  else\n#    define SIMDE_ARCH_ARM (__ARM_ARCH * 100)\n#  endif\n#elif defined(_M_ARM)\n#  if _M_ARM > 100\n#    define SIMDE_ARCH_ARM (_M_ARM)\n#  else\n#    define SIMDE_ARCH_ARM (_M_ARM * 100)\n#  endif\n#elif defined(_M_ARM64) || defined(_M_ARM64EC)\n#  define SIMDE_ARCH_ARM 800\n#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)\n#  define SIMDE_ARCH_ARM 1\n#endif\n#if defined(SIMDE_ARCH_ARM)\n#  define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM)\n#else\n#  define SIMDE_ARCH_ARM_CHECK(major, minor) (0)\n#endif\n\n/* AArch64\n   <https://en.wikipedia.org/wiki/ARM_architecture> */\n#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)\n#  define SIMDE_ARCH_AARCH64 1000\n#endif\n#if defined(SIMDE_ARCH_AARCH64)\n#  define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64)\n#else\n#  define SIMDE_ARCH_AARCH64_CHECK(version) (0)\n#endif\n\n/* ARM SIMD ISA extensions */\n#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64)\n#  if defined(SIMDE_ARCH_AARCH64)\n#    define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64\n#  elif defined(SIMDE_ARCH_ARM)\n#    define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM\n#  endif\n#endif\n#if defined(__ARM_FEATURE_AES) && __ARM_FEATURE_AES\n#  define SIMDE_ARCH_ARM_AES\n#endif\n#if defined(__ARM_FEATURE_COMPLEX) && __ARM_FEATURE_COMPLEX\n#  define SIMDE_ARCH_ARM_COMPLEX\n#endif\n#if defined(__ARM_FEATURE_CRYPTO) && __ARM_FEATURE_CRYPTO\n#  define SIMDE_ARCH_ARM_CRYPTO\n#endif\n#if defined(__ARM_FEATURE_DOTPROD) && __ARM_FEATURE_DOTPROD\n#  define SIMDE_ARCH_ARM_DOTPROD\n#endif\n#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA\n#  define SIMDE_ARCH_ARM_FMA\n#endif\n#if defined(__ARM_FEATURE_FP16_FML) && __ARM_FEATURE_FP16_FML\n#  define SIMDE_ARCH_ARM_FP16_FML\n#endif\n#if defined(__ARM_FEATURE_FRINT) && __ARM_FEATURE_FRINT\n#  define SIMDE_ARCH_ARM_FRINT\n#endif\n#if defined(__ARM_FEATURE_MATMUL_INT8) && __ARM_FEATURE_MATMUL_INT8\n#  define SIMDE_ARCH_ARM_MATMUL_INT8\n#endif\n#if defined(__ARM_FEATURE_SHA2) && __ARM_FEATURE_SHA2 && !defined(__APPLE_CC__)\n#  define SIMDE_ARCH_ARM_SHA2\n#endif\n#if defined(__ARM_FEATURE_SHA3) && __ARM_FEATURE_SHA3\n#  define SIMDE_ARCH_ARM_SHA3\n#endif\n#if defined(__ARM_FEATURE_SHA512) && __ARM_FEATURE_SHA512\n#  define SIMDE_ARCH_ARM_SHA512\n#endif\n#if defined(__ARM_FEATURE_SM3) && __ARM_FEATURE_SM3\n#  define SIMDE_ARCH_ARM_SM3\n#endif\n#if defined(__ARM_FEATURE_SM4) && __ARM_FEATURE_SM4\n#  define SIMDE_ARCH_ARM_SM4\n#endif\n#if defined(__ARM_FEATURE_SVE) && __ARM_FEATURE_SVE\n#  define SIMDE_ARCH_ARM_SVE\n#endif\n#if defined(__ARM_FEATURE_QRDMX) && __ARM_FEATURE_QRDMX\n#  define SIMDE_ARCH_ARM_QRDMX\n#endif\n\n/* Blackfin\n   <https://en.wikipedia.org/wiki/Blackfin> */\n#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)\n#  define SIMDE_ARCH_BLACKFIN 1\n#endif\n\n/* CRIS\n   <https://en.wikipedia.org/wiki/ETRAX_CRIS> */\n#if defined(__CRIS_arch_version)\n#  define SIMDE_ARCH_CRIS __CRIS_arch_version\n#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__)\n#  define SIMDE_ARCH_CRIS 1\n#endif\n\n/* Convex\n   <https://en.wikipedia.org/wiki/Convex_Computer> */\n#if defined(__convex_c38__)\n#  define SIMDE_ARCH_CONVEX 38\n#elif defined(__convex_c34__)\n#  define SIMDE_ARCH_CONVEX 34\n#elif defined(__convex_c32__)\n#  define SIMDE_ARCH_CONVEX 32\n#elif defined(__convex_c2__)\n#  define SIMDE_ARCH_CONVEX 2\n#elif defined(__convex__)\n#  define SIMDE_ARCH_CONVEX 1\n#endif\n#if defined(SIMDE_ARCH_CONVEX)\n#  define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX)\n#else\n#  define SIMDE_ARCH_CONVEX_CHECK(version) (0)\n#endif\n\n/* Adapteva Epiphany\n   <https://en.wikipedia.org/wiki/Adapteva_Epiphany> */\n#if defined(__epiphany__)\n#  define SIMDE_ARCH_EPIPHANY 1\n#endif\n\n/* Fujitsu FR-V\n   <https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */\n#if defined(__frv__)\n#  define SIMDE_ARCH_FRV 1\n#endif\n\n/* H8/300\n   <https://en.wikipedia.org/wiki/H8_Family> */\n#if defined(__H8300__)\n#  define SIMDE_ARCH_H8300\n#endif\n\n/* Elbrus (8S, 8SV and successors)\n   <https://en.wikipedia.org/wiki/Elbrus-8S> */\n#if defined(__e2k__)\n#  define SIMDE_ARCH_E2K\n#endif\n\n/* HP/PA / PA-RISC\n   <https://en.wikipedia.org/wiki/PA-RISC> */\n#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0)\n#  define SIMDE_ARCH_HPPA 20\n#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)\n#  define SIMDE_ARCH_HPPA 11\n#elif defined(_PA_RISC1_0)\n#  define SIMDE_ARCH_HPPA 10\n#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)\n#  define SIMDE_ARCH_HPPA 1\n#endif\n#if defined(SIMDE_ARCH_HPPA)\n#  define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA)\n#else\n#  define SIMDE_ARCH_HPPA_CHECK(version) (0)\n#endif\n\n/* x86\n   <https://en.wikipedia.org/wiki/X86> */\n#if defined(_M_IX86)\n#  define SIMDE_ARCH_X86 (_M_IX86 / 100)\n#elif defined(__I86__)\n#  define SIMDE_ARCH_X86 __I86__\n#elif defined(i686) || defined(__i686) || defined(__i686__)\n#  define SIMDE_ARCH_X86 6\n#elif defined(i586) || defined(__i586) || defined(__i586__)\n#  define SIMDE_ARCH_X86 5\n#elif defined(i486) || defined(__i486) || defined(__i486__)\n#  define SIMDE_ARCH_X86 4\n#elif defined(i386) || defined(__i386) || defined(__i386__)\n#  define SIMDE_ARCH_X86 3\n#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)\n#  define SIMDE_ARCH_X86 3\n#endif\n#if defined(SIMDE_ARCH_X86)\n#  define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86)\n#else\n#  define SIMDE_ARCH_X86_CHECK(version) (0)\n#endif\n\n/* SIMD ISA extensions for x86/x86_64 and Elbrus */\n#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K)\n#  if defined(_M_IX86_FP)\n#    define SIMDE_ARCH_X86_MMX\n#    if (_M_IX86_FP >= 1)\n#      define SIMDE_ARCH_X86_SSE 1\n#    endif\n#    if (_M_IX86_FP >= 2)\n#      define SIMDE_ARCH_X86_SSE2 1\n#    endif\n#  elif defined(_M_X64)\n#    define SIMDE_ARCH_X86_SSE 1\n#    define SIMDE_ARCH_X86_SSE2 1\n#  else\n#    if defined(__MMX__)\n#      define SIMDE_ARCH_X86_MMX 1\n#    endif\n#    if defined(__SSE__)\n#      define SIMDE_ARCH_X86_SSE 1\n#    endif\n#    if defined(__SSE2__)\n#      define SIMDE_ARCH_X86_SSE2 1\n#    endif\n#  endif\n#  if defined(__SSE3__)\n#    define SIMDE_ARCH_X86_SSE3 1\n#  endif\n#  if defined(__SSSE3__)\n#    define SIMDE_ARCH_X86_SSSE3 1\n#  endif\n#  if defined(__SSE4_1__)\n#    define SIMDE_ARCH_X86_SSE4_1 1\n#  endif\n#  if defined(__SSE4_2__)\n#    define SIMDE_ARCH_X86_SSE4_2 1\n#  endif\n#  if defined(__XOP__)\n#    define SIMDE_ARCH_X86_XOP 1\n#  endif\n#  if defined(__AVX__)\n#    define SIMDE_ARCH_X86_AVX 1\n#    if !defined(SIMDE_ARCH_X86_SSE3)\n#      define SIMDE_ARCH_X86_SSE3 1\n#    endif\n#    if !defined(SIMDE_ARCH_X86_SSE4_1)\n#      define SIMDE_ARCH_X86_SSE4_1 1\n#    endif\n#    if !defined(SIMDE_ARCH_X86_SSE4_2)\n#      define SIMDE_ARCH_X86_SSE4_2 1\n#    endif\n#  endif\n#  if defined(__AVX2__)\n#    define SIMDE_ARCH_X86_AVX2 1\n#    if defined(_MSC_VER)\n#      define SIMDE_ARCH_X86_FMA 1\n#    endif\n#  endif\n#  if defined(__FMA__)\n#    define SIMDE_ARCH_X86_FMA 1\n#    if !defined(SIMDE_ARCH_X86_AVX)\n#      define SIMDE_ARCH_X86_AVX 1\n#    endif\n#  endif\n#  if defined(__AVX512VP2INTERSECT__)\n#    define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1\n#  endif\n#  if defined(__AVX512BITALG__)\n#    define SIMDE_ARCH_X86_AVX512BITALG 1\n#  endif\n#  if defined(__AVX512VPOPCNTDQ__)\n#    define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1\n#  endif\n#  if defined(__AVX512VBMI__)\n#    define SIMDE_ARCH_X86_AVX512VBMI 1\n#  endif\n#  if defined(__AVX512VBMI2__)\n#    define SIMDE_ARCH_X86_AVX512VBMI2 1\n#  endif\n#  if defined(__AVX512VNNI__)\n#    define SIMDE_ARCH_X86_AVX512VNNI 1\n#  endif\n#  if defined(__AVX5124VNNIW__)\n#    define SIMDE_ARCH_X86_AVX5124VNNIW 1\n#  endif\n#  if defined(__AVX512BW__)\n#    define SIMDE_ARCH_X86_AVX512BW 1\n#  endif\n#  if defined(__AVX512BF16__)\n#    define SIMDE_ARCH_X86_AVX512BF16 1\n#  endif\n#  if defined(__AVX512CD__)\n#    define SIMDE_ARCH_X86_AVX512CD 1\n#  endif\n#  if defined(__AVX512DQ__)\n#    define SIMDE_ARCH_X86_AVX512DQ 1\n#  endif\n#  if defined(__AVX512F__)\n#    define SIMDE_ARCH_X86_AVX512F 1\n#  endif\n#  if defined(__AVX512VL__)\n#    define SIMDE_ARCH_X86_AVX512VL 1\n#  endif\n#  if defined(__AVX512FP16__)\n#    define SIMDE_ARCH_X86_AVX512FP16 1\n#  endif\n#  if defined(__GFNI__)\n#    define SIMDE_ARCH_X86_GFNI 1\n#  endif\n#  if defined(__PCLMUL__)\n#    define SIMDE_ARCH_X86_PCLMUL 1\n#  endif\n#  if defined(__VPCLMULQDQ__)\n#    define SIMDE_ARCH_X86_VPCLMULQDQ 1\n#  endif\n#  if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) )\n#    define SIMDE_ARCH_X86_F16C 1\n#  endif\n#  if defined(__AES__)\n#    define SIMDE_ARCH_X86_AES 1\n#  endif\n#endif\n\n/* Itanium\n   <https://en.wikipedia.org/wiki/Itanium> */\n#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__)\n#  define SIMDE_ARCH_IA64 1\n#endif\n\n/* Renesas M32R\n   <https://en.wikipedia.org/wiki/M32R> */\n#if defined(__m32r__) || defined(__M32R__)\n#  define SIMDE_ARCH_M32R\n#endif\n\n/* Motorola 68000\n   <https://en.wikipedia.org/wiki/Motorola_68000> */\n#if defined(__mc68060__) || defined(__MC68060__)\n#  define SIMDE_ARCH_M68K 68060\n#elif defined(__mc68040__) || defined(__MC68040__)\n#  define SIMDE_ARCH_M68K 68040\n#elif defined(__mc68030__) || defined(__MC68030__)\n#  define SIMDE_ARCH_M68K 68030\n#elif defined(__mc68020__) || defined(__MC68020__)\n#  define SIMDE_ARCH_M68K 68020\n#elif defined(__mc68010__) || defined(__MC68010__)\n#  define SIMDE_ARCH_M68K 68010\n#elif defined(__mc68000__) || defined(__MC68000__)\n#  define SIMDE_ARCH_M68K 68000\n#endif\n#if defined(SIMDE_ARCH_M68K)\n#  define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K)\n#else\n#  define SIMDE_ARCH_M68K_CHECK(version) (0)\n#endif\n\n/* Xilinx MicroBlaze\n   <https://en.wikipedia.org/wiki/MicroBlaze> */\n#if defined(__MICROBLAZE__) || defined(__microblaze__)\n#  define SIMDE_ARCH_MICROBLAZE\n#endif\n\n/* MIPS\n   <https://en.wikipedia.org/wiki/MIPS_architecture> */\n#if defined(_MIPS_ISA_MIPS64R2)\n#  define SIMDE_ARCH_MIPS 642\n#elif defined(_MIPS_ISA_MIPS64)\n#  define SIMDE_ARCH_MIPS 640\n#elif defined(_MIPS_ISA_MIPS32R2)\n#  define SIMDE_ARCH_MIPS 322\n#elif defined(_MIPS_ISA_MIPS32)\n#  define SIMDE_ARCH_MIPS 320\n#elif defined(_MIPS_ISA_MIPS4)\n#  define SIMDE_ARCH_MIPS 4\n#elif defined(_MIPS_ISA_MIPS3)\n#  define SIMDE_ARCH_MIPS 3\n#elif defined(_MIPS_ISA_MIPS2)\n#  define SIMDE_ARCH_MIPS 2\n#elif defined(_MIPS_ISA_MIPS1)\n#  define SIMDE_ARCH_MIPS 1\n#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)\n#  define SIMDE_ARCH_MIPS 1\n#endif\n#if defined(SIMDE_ARCH_MIPS)\n#  define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS)\n#else\n#  define SIMDE_ARCH_MIPS_CHECK(version) (0)\n#endif\n\n#if defined(__mips_loongson_mmi)\n#  define SIMDE_ARCH_MIPS_LOONGSON_MMI 1\n#endif\n\n#if defined(__mips_msa)\n#  define SIMDE_ARCH_MIPS_MSA 1\n#endif\n\n/* Matsushita MN10300\n   <https://en.wikipedia.org/wiki/MN103> */\n#if defined(__MN10300__) || defined(__mn10300__)\n#  define SIMDE_ARCH_MN10300 1\n#endif\n\n/* POWER\n   <https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */\n#if defined(_M_PPC)\n#  define SIMDE_ARCH_POWER _M_PPC\n#elif defined(_ARCH_PWR9)\n#  define SIMDE_ARCH_POWER 900\n#elif defined(_ARCH_PWR8)\n#  define SIMDE_ARCH_POWER 800\n#elif defined(_ARCH_PWR7)\n#  define SIMDE_ARCH_POWER 700\n#elif defined(_ARCH_PWR6)\n#  define SIMDE_ARCH_POWER 600\n#elif defined(_ARCH_PWR5)\n#  define SIMDE_ARCH_POWER 500\n#elif defined(_ARCH_PWR4)\n#  define SIMDE_ARCH_POWER 400\n#elif defined(_ARCH_440) || defined(__ppc440__)\n#  define SIMDE_ARCH_POWER 440\n#elif defined(_ARCH_450) || defined(__ppc450__)\n#  define SIMDE_ARCH_POWER 450\n#elif defined(_ARCH_601) || defined(__ppc601__)\n#  define SIMDE_ARCH_POWER 601\n#elif defined(_ARCH_603) || defined(__ppc603__)\n#  define SIMDE_ARCH_POWER 603\n#elif defined(_ARCH_604) || defined(__ppc604__)\n#  define SIMDE_ARCH_POWER 604\n#elif defined(_ARCH_605) || defined(__ppc605__)\n#  define SIMDE_ARCH_POWER 605\n#elif defined(_ARCH_620) || defined(__ppc620__)\n#  define SIMDE_ARCH_POWER 620\n#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc)\n#  define SIMDE_ARCH_POWER 1\n#endif\n#if defined(SIMDE_ARCH_POWER)\n  #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER)\n#else\n  #define SIMDE_ARCH_POWER_CHECK(version) (0)\n#endif\n\n#if defined(__ALTIVEC__)\n#  define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER\n  #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER)\n#else\n  #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0)\n#endif\n\n/* RISC-V\n   <https://en.wikipedia.org/wiki/RISC-V> */\n#if defined(__riscv) || defined(__riscv__)\n#  if __riscv_xlen == 64\n#     define SIMDE_ARCH_RISCV64\n#  elif __riscv_xlen == 32\n#     define SIMDE_ARCH_RISCV32\n#  endif\n#endif\n\n/* RISC-V SIMD ISA extensions */\n#if defined(__riscv_zve32x)\n#  define SIMDE_ARCH_RISCV_ZVE32X 1\n#endif\n#if defined(__riscv_zve32f)\n#  define SIMDE_ARCH_RISCV_ZVE32F 1\n#endif\n#if defined(__riscv_zve64x)\n#  define SIMDE_ARCH_RISCV_ZVE64X 1\n#endif\n#if defined(__riscv_zve64f)\n#  define SIMDE_ARCH_RISCV_ZVE64F 1\n#endif\n#if defined(__riscv_zve64d)\n#  define SIMDE_ARCH_RISCV_ZVE64D 1\n#endif\n#if defined(__riscv_v)\n#  define SIMDE_ARCH_RISCV_V 1\n#endif\n#if defined(__riscv_zvfh)\n#  define SIMDE_ARCH_RISCV_ZVFH 1\n#endif\n#if defined(__riscv_zvfhmin)\n#  define SIMDE_ARCH_RISCV_ZVFHMIN 1\n#endif\n\n/* SPARC\n   <https://en.wikipedia.org/wiki/SPARC> */\n#if defined(__sparc_v9__) || defined(__sparcv9)\n#  define SIMDE_ARCH_SPARC 9\n#elif defined(__sparc_v8__) || defined(__sparcv8)\n#  define SIMDE_ARCH_SPARC 8\n#elif defined(__sparc_v7__) || defined(__sparcv7)\n#  define SIMDE_ARCH_SPARC 7\n#elif defined(__sparc_v6__) || defined(__sparcv6)\n#  define SIMDE_ARCH_SPARC 6\n#elif defined(__sparc_v5__) || defined(__sparcv5)\n#  define SIMDE_ARCH_SPARC 5\n#elif defined(__sparc_v4__) || defined(__sparcv4)\n#  define SIMDE_ARCH_SPARC 4\n#elif defined(__sparc_v3__) || defined(__sparcv3)\n#  define SIMDE_ARCH_SPARC 3\n#elif defined(__sparc_v2__) || defined(__sparcv2)\n#  define SIMDE_ARCH_SPARC 2\n#elif defined(__sparc_v1__) || defined(__sparcv1)\n#  define SIMDE_ARCH_SPARC 1\n#elif defined(__sparc__) || defined(__sparc)\n#  define SIMDE_ARCH_SPARC 1\n#endif\n#if defined(SIMDE_ARCH_SPARC)\n  #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC)\n#else\n  #define SIMDE_ARCH_SPARC_CHECK(version) (0)\n#endif\n\n/* SuperH\n   <https://en.wikipedia.org/wiki/SuperH> */\n#if defined(__sh5__) || defined(__SH5__)\n#  define SIMDE_ARCH_SUPERH 5\n#elif defined(__sh4__) || defined(__SH4__)\n#  define SIMDE_ARCH_SUPERH 4\n#elif defined(__sh3__) || defined(__SH3__)\n#  define SIMDE_ARCH_SUPERH 3\n#elif defined(__sh2__) || defined(__SH2__)\n#  define SIMDE_ARCH_SUPERH 2\n#elif defined(__sh1__) || defined(__SH1__)\n#  define SIMDE_ARCH_SUPERH 1\n#elif defined(__sh__) || defined(__SH__)\n#  define SIMDE_ARCH_SUPERH 1\n#endif\n\n/* IBM System z\n   <https://en.wikipedia.org/wiki/IBM_System_z> */\n#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)\n#  define SIMDE_ARCH_ZARCH __ARCH__\n#endif\n#if defined(SIMDE_ARCH_ZARCH)\n  #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH)\n#else\n  #define SIMDE_ARCH_ZARCH_CHECK(version) (0)\n#endif\n\n#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__)\n  #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH\n#endif\n\n/* TMS320 DSP\n   <https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */\n#if defined(_TMS320C6740) || defined(__TMS320C6740__)\n#  define SIMDE_ARCH_TMS320 6740\n#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)\n#  define SIMDE_ARCH_TMS320 6701\n#elif defined(_TMS320C6700) || defined(__TMS320C6700__)\n#  define SIMDE_ARCH_TMS320 6700\n#elif defined(_TMS320C6600) || defined(__TMS320C6600__)\n#  define SIMDE_ARCH_TMS320 6600\n#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)\n#  define SIMDE_ARCH_TMS320 6401\n#elif defined(_TMS320C6400) || defined(__TMS320C6400__)\n#  define SIMDE_ARCH_TMS320 6400\n#elif defined(_TMS320C6200) || defined(__TMS320C6200__)\n#  define SIMDE_ARCH_TMS320 6200\n#elif defined(_TMS320C55X) || defined(__TMS320C55X__)\n#  define SIMDE_ARCH_TMS320 550\n#elif defined(_TMS320C54X) || defined(__TMS320C54X__)\n#  define SIMDE_ARCH_TMS320 540\n#elif defined(_TMS320C28X) || defined(__TMS320C28X__)\n#  define SIMDE_ARCH_TMS320 280\n#endif\n#if defined(SIMDE_ARCH_TMS320)\n  #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320)\n#else\n  #define SIMDE_ARCH_TMS320_CHECK(version) (0)\n#endif\n\n/* WebAssembly */\n#if defined(__wasm__)\n#  define SIMDE_ARCH_WASM 1\n#endif\n\n#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__)\n#  define SIMDE_ARCH_WASM_SIMD128\n#endif\n\n#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__)\n#  define SIMDE_ARCH_WASM_RELAXED_SIMD\n#endif\n\n/* Xtensa\n   <https://en.wikipedia.org/wiki/> */\n#if defined(__xtensa__) || defined(__XTENSA__)\n#  define SIMDE_ARCH_XTENSA 1\n#endif\n\n/* Availability of 16-bit floating-point arithmetic intrinsics */\n#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n#  define SIMDE_ARCH_ARM_NEON_FP16\n#endif\n\n/* Availability of 16-bit brain floating-point arithmetic intrinsics */\n#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)\n#  define SIMDE_ARCH_ARM_NEON_BF16\n#endif\n\n/* LoongArch\n   <https://en.wikipedia.org/wiki/Loongson#LoongArch> */\n#if defined(__loongarch32)\n#  define SIMDE_ARCH_LOONGARCH 1\n#elif defined(__loongarch64)\n#  define SIMDE_ARCH_LOONGARCH 2\n#endif\n\n/* LSX: LoongArch 128-bits SIMD extension */\n#if defined(__loongarch_sx)\n#  define SIMDE_ARCH_LOONGARCH_LSX 1\n#endif\n\n/* LASX: LoongArch 256-bits SIMD extension */\n#if defined(__loongarch_asx)\n#  define SIMDE_ARCH_LOONGARCH_LASX 2\n#endif\n\n#endif /* !defined(SIMDE_ARCH_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-bf16.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2023      Yi-Yen Chung <eric681@andestech.com> (Copyright owned by Andes Technology)\n */\n\n#include \"hedley.h\"\n#include \"simde-common.h\"\n#include \"simde-detect-clang.h\"\n\n#if !defined(SIMDE_BFLOAT16_H)\n#define SIMDE_BFLOAT16_H\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\n/* This implementations is based upon simde-f16.h */\n\n/* Portable version which should work on pretty much any compiler.\n * Obviously you can't rely on compiler support for things like\n * conversion to/from 32-bit floats, so make sure you always use the\n * functions and macros in this file!\n */\n#define SIMDE_BFLOAT16_API_PORTABLE 1\n\n#define SIMDE_BFLOAT16_API_BF16 2\n\n#if !defined(SIMDE_BFLOAT16_API)\n  #if defined(SIMDE_ARM_NEON_BF16)\n    #define SIMDE_BFLOAT16_API SIMDE_BFLOAT16_API_BF16\n  #else\n    #define SIMDE_BFLOAT16_API SIMDE_BFLOAT16_API_PORTABLE\n  #endif\n#endif\n\n#if SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_BF16\n  #include <arm_bf16.h>\n  typedef __bf16 simde_bfloat16;\n#elif SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_PORTABLE\n  typedef struct { uint16_t value; } simde_bfloat16;\n#else\n  #error No 16-bit floating point API.\n#endif\n\n/* Conversion -- convert between single-precision and brain half-precision\n * floats. */\nstatic HEDLEY_ALWAYS_INLINE HEDLEY_CONST\nsimde_bfloat16\nsimde_bfloat16_from_float32 (simde_float32 value) {\n#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)\n  return vcvth_bf16_f32(value);\n#else\n  simde_bfloat16 res;\n  char* src = HEDLEY_REINTERPRET_CAST(char*, &value);\n  // rounding to nearest bfloat16\n  // If the 17th bit of value is 1, set the rounding to 1.\n  uint8_t rounding = 0;\n\n  #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE\n    if (src[1] & UINT8_C(0x80)) rounding = 1;\n    src[2] = HEDLEY_STATIC_CAST(char, (HEDLEY_STATIC_CAST(uint8_t, src[2]) + rounding));\n    simde_memcpy(&res, src+2, sizeof(res));\n  #else\n    if (src[2] & UINT8_C(0x80)) rounding = 1;\n    src[1] = HEDLEY_STATIC_CAST(char, (HEDLEY_STATIC_CAST(uint8_t, src[1]) + rounding));\n    simde_memcpy(&res, src, sizeof(res));\n  #endif\n\n  return res;\n#endif\n}\n\nstatic HEDLEY_ALWAYS_INLINE HEDLEY_CONST\nsimde_float32\nsimde_bfloat16_to_float32 (simde_bfloat16 value) {\n#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16)\n  return vcvtah_f32_bf16(value);\n#else\n  simde_float32 res = 0.0;\n  char* _res = HEDLEY_REINTERPRET_CAST(char*, &res);\n\n  #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE\n    simde_memcpy(_res+2, &value, sizeof(value));\n  #else\n    simde_memcpy(_res, &value, sizeof(value));\n  #endif\n\n  return res;\n#endif\n}\n\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_bfloat16, simde_bfloat16,      uint16_t)\n\n#define SIMDE_NANBF simde_uint16_as_bfloat16(0xFFC1) // a quiet Not-a-Number\n#define SIMDE_INFINITYBF simde_uint16_as_bfloat16(0x7F80)\n#define SIMDE_NINFINITYBF simde_uint16_as_bfloat16(0xFF80)\n\n#define SIMDE_BFLOAT16_VALUE(value) simde_bfloat16_from_float32(SIMDE_FLOAT32_C(value))\n\n#if !defined(simde_isinfbf) && defined(simde_math_isinff)\n  #define simde_isinfbf(a) simde_math_isinff(simde_bfloat16_to_float32(a))\n#endif\n#if !defined(simde_isnanbf) && defined(simde_math_isnanf)\n  #define simde_isnanbf(a) simde_math_isnanf(simde_bfloat16_to_float32(a))\n#endif\n\nSIMDE_END_DECLS_\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_BFLOAT16_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-common.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n *   2023      Yi-Yen Chung <eric681@andestech.com> (Copyright owned by Andes Technology)\n *   2023      Ju-Hung Li <jhlee@pllab.cs.nthu.edu.tw> (Copyright owned by NTHU pllab)\n */\n\n#if !defined(SIMDE_COMMON_H)\n#define SIMDE_COMMON_H\n\n#include \"hedley.h\"\n\n#define SIMDE_VERSION_MAJOR 0\n#define SIMDE_VERSION_MINOR 8\n#define SIMDE_VERSION_MICRO 2\n#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO)\n// Also update meson.build in the root directory of the repository\n\n#include <stddef.h>\n#include <stdint.h>\n\n#include \"simde-detect-clang.h\"\n#include \"simde-arch.h\"\n#include \"simde-features.h\"\n#include \"simde-diagnostic.h\"\n#include \"simde-math.h\"\n#include \"simde-constify.h\"\n#include \"simde-align.h\"\n\n/* In some situations, SIMDe has to make large performance sacrifices\n * for small increases in how faithfully it reproduces an API, but\n * only a relatively small number of users will actually need the API\n * to be completely accurate.  The SIMDE_FAST_* options can be used to\n * disable these trade-offs.\n *\n * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or\n * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to\n * enable some optimizations.  Using -ffast-math and/or\n * -ffinite-math-only will also enable the relevant options.  If you\n * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */\n\n/* Most programs avoid NaNs by never passing values which can result in\n * a NaN; for example, if you only pass non-negative values to the sqrt\n * functions, it won't generate a NaN.  On some platforms, similar\n * functions handle NaNs differently; for example, the _mm_min_ps SSE\n * function will return 0.0 if you pass it (0.0, NaN), but the NEON\n * vminq_f32 function will return NaN.  Making them behave like one\n * another is expensive; it requires generating a mask of all lanes\n * with NaNs, then performing the operation (e.g., vminq_f32), then\n * blending together the result with another vector using the mask.\n *\n * If you don't want SIMDe to worry about the differences between how\n * NaNs are handled on the two platforms, define this (or pass\n * -ffinite-math-only) */\n#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__)\n  #define SIMDE_FAST_MATH\n#endif\n\n#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS)\n  #if defined(SIMDE_FAST_MATH)\n    #define SIMDE_FAST_NANS\n  #elif defined(__FINITE_MATH_ONLY__)\n    #if __FINITE_MATH_ONLY__\n      #define SIMDE_FAST_NANS\n    #endif\n  #endif\n#endif\n\n/* Many functions are defined as using the current rounding mode\n * (i.e., the SIMD version of fegetround()) when converting to\n * an integer.  For example, _mm_cvtpd_epi32.  Unfortunately,\n * on some platforms (such as ARMv8+ where round-to-nearest is\n * always used, regardless of the FPSCR register) this means we\n * have to first query the current rounding mode, then choose\n * the proper function (rounnd\n , ceil, floor, etc.) */\n#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH)\n  #define SIMDE_FAST_ROUND_MODE\n#endif\n\n/* This controls how ties are rounded.  For example, does 10.5 round to\n * 10 or 11?  IEEE 754 specifies round-towards-even, but ARMv7 (for\n * example) doesn't support it and it must be emulated (which is rather\n * slow).  If you're okay with just using the default for whatever arch\n * you're on, you should definitely define this.\n *\n * Note that we don't use this macro to avoid correct implementations\n * in functions which are explicitly about rounding (such as vrnd* on\n * NEON, _mm_round_* on x86, etc.); it is only used for code where\n * rounding is a component in another function, and even then it isn't\n * usually a problem since such functions will use the current rounding\n * mode. */\n#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH)\n  #define SIMDE_FAST_ROUND_TIES\n#endif\n\n/* For functions which convert from one type to another (mostly from\n * floating point to integer types), sometimes we need to do a range\n * check and potentially return a different result if the value\n * falls outside that range.  Skipping this check can provide a\n * performance boost, at the expense of faithfulness to the API we're\n * emulating. */\n#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH)\n  #define SIMDE_FAST_CONVERSION_RANGE\n#endif\n\n/* Due to differences across platforms, sometimes it can be much\n * faster for us to allow spurious floating point exceptions,\n * or to no generate them when we should. */\n#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH)\n  #define SIMDE_FAST_EXCEPTIONS\n#endif\n\n#if \\\n    HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \\\n    HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n    HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \\\n    HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n    HEDLEY_IBM_VERSION_CHECK(13,1,0) || \\\n    HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \\\n    (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \\\n    HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \\\n    HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n  #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr))\n#elif defined(__cplusplus) && (__cplusplus > 201703L)\n  #include <type_traits>\n  #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated())\n#endif\n\n#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT)\n  #if defined(SIMDE_CHECK_CONSTANT_) && \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \\\n      (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000)))\n    #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), \"`\" #arg \"' must be constant\")\n  #else\n    #define SIMDE_REQUIRE_CONSTANT(arg)\n  #endif\n#else\n  #define SIMDE_REQUIRE_CONSTANT(arg)\n#endif\n\n#define SIMDE_REQUIRE_RANGE(arg, min, max) \\\n  HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), \"'\" #arg \"' must be in [\" #min \", \" #max \"]\")\n\n#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \\\n  SIMDE_REQUIRE_CONSTANT(arg) \\\n  SIMDE_REQUIRE_RANGE(arg, min, max)\n\n/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty\n * fallback if we can't find an implementation; instead we have to\n * check if SIMDE_STATIC_ASSERT is defined before using it. */\n#if \\\n  !defined(__cplusplus) && ( \\\n      (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \\\n      HEDLEY_HAS_FEATURE(c_static_assert) || \\\n      HEDLEY_GCC_VERSION_CHECK(6,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      defined(_Static_assert) \\\n    )\n  /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which\n   * starts with a double-underscore. This is a system header so we have no\n   * control over it, but since it's a macro it will emit a diagnostic which\n   * prevents compilation with -Werror. */\n  #if HEDLEY_HAS_WARNING(\"-Wreserved-identifier\")\n    #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \\\n      HEDLEY_DIAGNOSTIC_PUSH \\\n      _Pragma(\"clang diagnostic ignored \\\"-Wreserved-identifier\\\"\") \\\n      _Static_assert(expr, message); \\\n      HEDLEY_DIAGNOSTIC_POP \\\n    }))\n  #else\n    #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message)\n  #endif\n#elif \\\n  (defined(__cplusplus) && (__cplusplus >= 201103L)) || \\\n  HEDLEY_MSVC_VERSION_CHECK(16,0,0)\n  #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message))\n#endif\n\n/* Statement exprs */\n#if \\\n    HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \\\n    HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \\\n    HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \\\n    HEDLEY_PGI_VERSION_CHECK(18,10,0) || \\\n    HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \\\n    HEDLEY_IBM_VERSION_CHECK(11,1,0) || \\\n    HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n  #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr)\n#endif\n\n/* This is just a convenience macro to make it easy to call a single\n * function with a specific diagnostic disabled. */\n#if defined(SIMDE_STATEMENT_EXPR_)\n  #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \\\n    SIMDE_STATEMENT_EXPR_(({ \\\n      HEDLEY_DIAGNOSTIC_PUSH \\\n      diagnostic \\\n      (expr); \\\n      HEDLEY_DIAGNOSTIC_POP \\\n    }))\n#endif\n\n#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT)\n  #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v \" must be constant.\")\n#endif\n\n#if \\\n  (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \\\n  HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n  HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n  HEDLEY_IBM_VERSION_CHECK(13,1,0)\n#  define SIMDE_MAY_ALIAS __attribute__((__may_alias__))\n#else\n#  define SIMDE_MAY_ALIAS\n#endif\n\n/*  Lots of compilers support GCC-style vector extensions, but many\n    don't support all the features.  Define different macros depending\n    on support for\n\n    * SIMDE_VECTOR - Declaring a vector.\n    * SIMDE_VECTOR_OPS - basic operations (binary and unary).\n    * SIMDE_VECTOR_NEGATE - negating a vector\n    * SIMDE_VECTOR_SCALAR - For binary operators, the second argument\n        can be a scalar, in which case the result is as if that scalar\n        had been broadcast to all lanes of a vector.\n    * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for\n        extracting/inserting a single element.=\n\n    SIMDE_VECTOR can be assumed if any others are defined, the\n    others are independent. */\n#if !defined(SIMDE_NO_VECTOR)\n#  if \\\n    HEDLEY_GCC_VERSION_CHECK(4,8,0)\n#    define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))\n#    define SIMDE_VECTOR_OPS\n#    define SIMDE_VECTOR_NEGATE\n#    define SIMDE_VECTOR_SCALAR\n#    define SIMDE_VECTOR_SUBSCRIPT\n#  elif HEDLEY_INTEL_VERSION_CHECK(16,0,0)\n#    define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))\n#    define SIMDE_VECTOR_OPS\n#    define SIMDE_VECTOR_NEGATE\n/* ICC only supports SIMDE_VECTOR_SCALAR for constants */\n#    define SIMDE_VECTOR_SUBSCRIPT\n#  elif \\\n    HEDLEY_GCC_VERSION_CHECK(4,1,0) || \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n    HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)\n#    define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))\n#    define SIMDE_VECTOR_OPS\n#  elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0)\n#    define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))\n#  elif HEDLEY_HAS_ATTRIBUTE(vector_size)\n#    define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))\n#    define SIMDE_VECTOR_OPS\n#    define SIMDE_VECTOR_NEGATE\n#    define SIMDE_VECTOR_SUBSCRIPT\n#    if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)\n#      define SIMDE_VECTOR_SCALAR\n#    endif\n#  endif\n\n/* GCC and clang have built-in functions to handle shuffling and\n   converting of vectors, but the implementations are slightly\n   different.  This macro is just an abstraction over them.  Note that\n   elem_size is in bits but vec_size is in bytes. */\n#  if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT)\n     HEDLEY_DIAGNOSTIC_PUSH\n     /* We don't care about -Wvariadic-macros; all compilers that support\n      * shufflevector/shuffle support them. */\n#    if HEDLEY_HAS_WARNING(\"-Wc++98-compat-pedantic\")\n#      pragma clang diagnostic ignored \"-Wc++98-compat-pedantic\"\n#    endif\n#    if HEDLEY_HAS_WARNING(\"-Wvariadic-macros\") || HEDLEY_GCC_VERSION_CHECK(4,0,0)\n#      pragma GCC diagnostic ignored \"-Wvariadic-macros\"\n#    endif\n\n#    if HEDLEY_HAS_BUILTIN(__builtin_shufflevector)\n#      define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__)\n#    elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER)\n#      define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \\\n         int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \\\n           __builtin_shuffle(a, b, simde_shuffle_); \\\n         }))\n#    endif\n     HEDLEY_DIAGNOSTIC_POP\n#  endif\n\n/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT\n   but the code needs to be refactored a bit to take advantage. */\n#  if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT)\n#    if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0)\n#      if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0)\n         /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */\n#        define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \\\n             __typeof__(from) from_ = (from); \\\n             ((void) from_); \\\n             __builtin_convertvector(from_, __typeof__(to)); \\\n           })))\n#      else\n#        define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to)))\n#      endif\n#    endif\n#  endif\n#endif\n\n/* Since we currently require SUBSCRIPT before using a vector in a\n   union, we define these as dependencies of SUBSCRIPT.  They are\n   likely to disappear in the future, once SIMDe learns how to make\n   use of vectors without using the union members.  Do not use them\n   in your code unless you're okay with it breaking when SIMDe\n   changes. */\n#if defined(SIMDE_VECTOR_SUBSCRIPT)\n#  if defined(SIMDE_VECTOR_OPS)\n#    define SIMDE_VECTOR_SUBSCRIPT_OPS\n#  endif\n#  if defined(SIMDE_VECTOR_SCALAR)\n#    define SIMDE_VECTOR_SUBSCRIPT_SCALAR\n#  endif\n#endif\n\n#if !defined(SIMDE_DISABLE_OPENMP)\n  #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION)\n    #define SIMDE_ENABLE_OPENMP\n  #endif\n#endif\n\n#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION))\n#  define SIMDE_ENABLE_CILKPLUS\n#endif\n\n#if defined(SIMDE_ENABLE_OPENMP)\n#  define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd)\n#  define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))\n#  if defined(__clang__)\n#    define SIMDE_VECTORIZE_REDUCTION(r) \\\n        HEDLEY_DIAGNOSTIC_PUSH \\\n        _Pragma(\"clang diagnostic ignored \\\"-Wsign-conversion\\\"\") \\\n        HEDLEY_PRAGMA(omp simd reduction(r)) \\\n        HEDLEY_DIAGNOSTIC_POP\n#  else\n#    define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))\n#  endif\n#  if !defined(HEDLEY_MCST_LCC_VERSION)\n#    define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))\n#  else\n#    define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd)\n#  endif\n#elif defined(SIMDE_ENABLE_CILKPLUS)\n#  define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd)\n#  define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))\n#  define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))\n#  define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))\n#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION)\n#  define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable))\n#  define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))\n#  define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE\n#  define SIMDE_VECTORIZE_ALIGNED(a)\n#elif HEDLEY_GCC_VERSION_CHECK(4,9,0)\n#  define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep)\n#  define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE\n#  define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE\n#  define SIMDE_VECTORIZE_ALIGNED(a)\n#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0)\n#  define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep)\n#  define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE\n#  define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE\n#  define SIMDE_VECTORIZE_ALIGNED(a)\n#else\n#  define SIMDE_VECTORIZE\n#  define SIMDE_VECTORIZE_SAFELEN(l)\n#  define SIMDE_VECTORIZE_REDUCTION(r)\n#  define SIMDE_VECTORIZE_ALIGNED(a)\n#endif\n\n#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask)))\n\n/* Intended for checking coverage, you should never use this in\n   production. */\n#if defined(SIMDE_NO_INLINE)\n#  define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static\n#else\n#  define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static\n#endif\n\n#if defined(SIMDE_NO_INLINE)\n#  define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static\n#elif defined(SIMDE_CONSTRAINED_COMPILATION)\n#  define SIMDE_HUGE_FUNCTION_ATTRIBUTES static\n#else\n#  define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static\n#endif\n\n#if \\\n    HEDLEY_HAS_ATTRIBUTE(unused) || \\\n    HEDLEY_GCC_VERSION_CHECK(2,95,0)\n#  define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__))\n#else\n#  define SIMDE_FUNCTION_POSSIBLY_UNUSED_\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_\n\n#if defined(_MSC_VER)\n#  define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS\n#  define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS\n#else\n#  define SIMDE_BEGIN_DECLS_ \\\n     HEDLEY_DIAGNOSTIC_PUSH \\\n     SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \\\n     HEDLEY_BEGIN_C_DECLS\n#  define SIMDE_END_DECLS_ \\\n     HEDLEY_END_C_DECLS \\\n     HEDLEY_DIAGNOSTIC_POP\n#endif\n\n#if defined(__SIZEOF_INT128__)\n#  define SIMDE_HAVE_INT128_\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_\ntypedef __int128 simde_int128;\ntypedef unsigned __int128 simde_uint128;\nHEDLEY_DIAGNOSTIC_POP\n#endif\n\n#if !defined(SIMDE_ENDIAN_LITTLE)\n#  define SIMDE_ENDIAN_LITTLE 1234\n#endif\n#if !defined(SIMDE_ENDIAN_BIG)\n#  define SIMDE_ENDIAN_BIG 4321\n#endif\n\n#if !defined(SIMDE_ENDIAN_ORDER)\n/* GCC (and compilers masquerading as GCC) define  __BYTE_ORDER__. */\n#  if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#  elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */\n#  elif defined(_BIG_ENDIAN)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n#  elif defined(_LITTLE_ENDIAN)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n/* We know the endianness of some common architectures.  Common\n * architectures not listed (ARM, POWER, MIPS, etc.) here are\n * bi-endian. */\n#  elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#  elif defined(__s390x__) || defined(__zarch__)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n/* Looks like we'll have to rely on the platform.  If we're missing a\n * platform, please let us know. */\n#  elif defined(_WIN32)\n#    define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#  elif defined(sun) || defined(__sun) /* Solaris */\n#    include <sys/byteorder.h>\n#    if defined(_LITTLE_ENDIAN)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#    elif defined(_BIG_ENDIAN)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n#    endif\n#  elif defined(__APPLE__)\n#    include <libkern/OSByteOrder.h>\n#    if defined(__LITTLE_ENDIAN__)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#    elif defined(__BIG_ENDIAN__)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n#    endif\n#  elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD)\n#    include <machine/endian.h>\n#    if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#    elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n#    endif\n#  elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__)\n#    include <endian.h>\n#    if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE\n#    elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)\n#      define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG\n#    endif\n#  endif\n#endif\n\n#if \\\n    HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \\\n    HEDLEY_GCC_VERSION_CHECK(4,3,0) || \\\n    HEDLEY_IBM_VERSION_CHECK(13,1,0) || \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n  #define simde_bswap64(v) __builtin_bswap64(v)\n#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0)\n  #define simde_bswap64(v) _byteswap_uint64(v)\n#else\n  SIMDE_FUNCTION_ATTRIBUTES\n  uint64_t\n  simde_bswap64(uint64_t v) {\n    return\n      ((v & (((uint64_t) 0xff) << 56)) >> 56) |\n      ((v & (((uint64_t) 0xff) << 48)) >> 40) |\n      ((v & (((uint64_t) 0xff) << 40)) >> 24) |\n      ((v & (((uint64_t) 0xff) << 32)) >>  8) |\n      ((v & (((uint64_t) 0xff) << 24)) <<  8) |\n      ((v & (((uint64_t) 0xff) << 16)) << 24) |\n      ((v & (((uint64_t) 0xff) <<  8)) << 40) |\n      ((v & (((uint64_t) 0xff)      )) << 56);\n  }\n#endif\n\n#if !defined(SIMDE_ENDIAN_ORDER)\n#  error Unknown byte order; please file a bug\n#else\n#  if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE\n#    define simde_endian_bswap64_be(value) simde_bswap64(value)\n#    define simde_endian_bswap64_le(value) (value)\n#  elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG\n#    define simde_endian_bswap64_be(value) (value)\n#    define simde_endian_bswap64_le(value) simde_bswap64(value)\n#  endif\n#endif\n\n/* TODO: we should at least make an attempt to detect the correct\n   types for simde_float32/float64 instead of just assuming float and\n   double. */\n\n#if !defined(SIMDE_FLOAT32_TYPE)\n#  define SIMDE_FLOAT32_TYPE float\n#  define SIMDE_FLOAT32_C(value) value##f\n#else\n#  define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value)\n#endif\ntypedef SIMDE_FLOAT32_TYPE simde_float32;\n\n#if !defined(SIMDE_FLOAT64_TYPE)\n#  define SIMDE_FLOAT64_TYPE double\n#  define SIMDE_FLOAT64_C(value) value\n#else\n#  define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value)\n#endif\ntypedef SIMDE_FLOAT64_TYPE simde_float64;\n\n#if defined(SIMDE_POLY8_TYPE)\n#  undef SIMDE_POLY8_TYPE\n#endif\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define SIMDE_POLY8_TYPE poly8_t\n#  define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value))\n#else\n#  define SIMDE_POLY8_TYPE uint8_t\n#  define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value))\n#endif\ntypedef SIMDE_POLY8_TYPE simde_poly8;\n\n#if defined(SIMDE_POLY16_TYPE)\n#  undef SIMDE_POLY16_TYPE\n#endif\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define SIMDE_POLY16_TYPE poly16_t\n#  define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value))\n#else\n#  define SIMDE_POLY16_TYPE uint16_t\n#  define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value))\n#endif\ntypedef SIMDE_POLY16_TYPE simde_poly16;\n\n#if defined(SIMDE_POLY64_TYPE)\n#  undef SIMDE_POLY64_TYPE\n#endif\n#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n#  define SIMDE_POLY64_TYPE poly64_t\n#  define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull))\n#else\n#  define SIMDE_POLY64_TYPE uint64_t\n#  define SIMDE_POLY64_C(value) value ## ull\n#endif\ntypedef SIMDE_POLY64_TYPE simde_poly64;\n\n#if defined(SIMDE_POLY128_TYPE)\n#  undef SIMDE_POLY128_TYPE\n#endif\n#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)\n#  define SIMDE_POLY128_TYPE poly128_t\n#  define SIMDE_POLY128_C(value) value\n#elif defined(__SIZEOF_INT128__)\n#  define SIMDE_POLY128_TYPE __int128\n#  define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value))\n#else\n#  define SIMDE_POLY128_TYPE uint64_t\n#  define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1\n#endif\ntypedef SIMDE_POLY128_TYPE simde_poly128;\n\n#if defined(__cplusplus)\n  typedef bool simde_bool;\n#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)\n  typedef _Bool simde_bool;\n#elif defined(bool)\n  typedef bool simde_bool;\n#else\n  #include <stdbool.h>\n  typedef bool simde_bool;\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wbad-function-cast\")\n#  define SIMDE_CONVERT_FTOI(T,v) \\\n    HEDLEY_DIAGNOSTIC_PUSH \\\n    _Pragma(\"clang diagnostic ignored \\\"-Wbad-function-cast\\\"\") \\\n    HEDLEY_STATIC_CAST(T, (v)) \\\n    HEDLEY_DIAGNOSTIC_POP\n#else\n#  define SIMDE_CONVERT_FTOI(T,v) ((T) (v))\n#endif\n\n/* TODO: detect compilers which support this outside of C11 mode */\n#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)\n  #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value)))))\n  #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value)))))\n#else\n  #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value)\n  #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value)\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wfloat-equal\")\n#  define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma(\"clang diagnostic ignored \\\"-Wfloat-equal\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(3,0,0)\n#  define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma(\"GCC diagnostic ignored \\\"-Wfloat-equal\\\"\")\n#else\n#  define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL\n#endif\n\n/* Some functions can trade accuracy for speed.  For those functions\n   you can control the trade-off using this macro.  Possible values:\n\n   0: prefer speed\n   1: reasonable trade-offs\n   2: prefer accuracy */\n#if !defined(SIMDE_ACCURACY_PREFERENCE)\n#  define SIMDE_ACCURACY_PREFERENCE 1\n#endif\n\n#if defined(__STDC_HOSTED__)\n#  define SIMDE_STDC_HOSTED __STDC_HOSTED__\n#else\n#  if \\\n     defined(HEDLEY_PGI_VERSION) || \\\n     defined(HEDLEY_MSVC_VERSION)\n#    define SIMDE_STDC_HOSTED 1\n#  else\n#    define SIMDE_STDC_HOSTED 0\n#  endif\n#endif\n\n/* Try to deal with environments without a standard library. */\n#if !defined(simde_memcpy)\n  #if HEDLEY_HAS_BUILTIN(__builtin_memcpy)\n    #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)\n  #endif\n#endif\n#if !defined(simde_memset)\n  #if HEDLEY_HAS_BUILTIN(__builtin_memset)\n    #define simde_memset(s, c, n) __builtin_memset(s, c, n)\n  #endif\n#endif\n#if !defined(simde_memcmp)\n  #if HEDLEY_HAS_BUILTIN(__builtin_memcmp)\n    #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n)\n  #endif\n#endif\n\n#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp)\n  #if !defined(SIMDE_NO_STRING_H)\n    #if defined(__has_include)\n      #if !__has_include(<string.h>)\n        #define SIMDE_NO_STRING_H\n      #endif\n    #elif (SIMDE_STDC_HOSTED == 0)\n      #define SIMDE_NO_STRING_H\n    #endif\n  #endif\n\n  #if !defined(SIMDE_NO_STRING_H)\n    #include <string.h>\n    #if !defined(simde_memcpy)\n      #define simde_memcpy(dest, src, n) memcpy(dest, src, n)\n    #endif\n    #if !defined(simde_memset)\n      #define simde_memset(s, c, n) memset(s, c, n)\n    #endif\n    #if !defined(simde_memcmp)\n      #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n)\n    #endif\n  #else\n    /* These are meant to be portable, not fast.  If you're hitting them you\n     * should think about providing your own (by defining the simde_memcpy\n     * macro prior to including any SIMDe files) or submitting a patch to\n     * SIMDe so we can detect your system-provided memcpy/memset, like by\n     * adding your compiler to the checks for __builtin_memcpy and/or\n     * __builtin_memset. */\n    #if !defined(simde_memcpy)\n      SIMDE_FUNCTION_ATTRIBUTES\n      void\n      simde_memcpy_(void* dest, const void* src, size_t len) {\n        char* dest_ = HEDLEY_STATIC_CAST(char*, dest);\n        char* src_ = HEDLEY_STATIC_CAST(const char*, src);\n        for (size_t i = 0 ; i < len ; i++) {\n          dest_[i] = src_[i];\n        }\n      }\n      #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n)\n    #endif\n\n    #if !defined(simde_memset)\n      SIMDE_FUNCTION_ATTRIBUTES\n      void\n      simde_memset_(void* s, int c, size_t len) {\n        char* s_ = HEDLEY_STATIC_CAST(char*, s);\n        char c_ = HEDLEY_STATIC_CAST(char, c);\n        for (size_t i = 0 ; i < len ; i++) {\n          s_[i] = c_[i];\n        }\n      }\n      #define simde_memset(s, c, n) simde_memset_(s, c, n)\n    #endif\n\n    #if !defined(simde_memcmp)\n      SIMDE_FUCTION_ATTRIBUTES\n      int\n      simde_memcmp_(const void *s1, const void *s2, size_t n) {\n        unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1);\n        unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2);\n        for (size_t i = 0 ; i < len ; i++) {\n          if (s1_[i] != s2_[i]) {\n            return (int) (s1_[i] - s2_[i]);\n          }\n        }\n        return 0;\n      }\n    #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n)\n    #endif\n  #endif\n#endif\n\n/*** Functions that quiet a signaling NaN ***/\n\nstatic HEDLEY_INLINE\ndouble\nsimde_math_quiet(double x) {\n  uint64_t tmp, mask;\n  if (!simde_math_isnan(x)) {\n    return x;\n  }\n  simde_memcpy(&tmp, &x, 8);\n  mask = 0x7ff80000;\n  mask <<= 32;\n  tmp |= mask;\n  simde_memcpy(&x, &tmp, 8);\n  return x;\n}\n\nstatic HEDLEY_INLINE\nfloat\nsimde_math_quietf(float x) {\n  uint32_t tmp;\n  if (!simde_math_isnanf(x)) {\n    return x;\n  }\n  simde_memcpy(&tmp, &x, 4);\n  tmp |= 0x7fc00000lu;\n  simde_memcpy(&x, &tmp, 4);\n  return x;\n}\n\n#if defined(FE_ALL_EXCEPT)\n  #define SIMDE_HAVE_FENV_H\n#elif defined(__has_include)\n  #if __has_include(<fenv.h>)\n    #include <fenv.h>\n    #define SIMDE_HAVE_FENV_H\n  #endif\n#elif SIMDE_STDC_HOSTED == 1\n  #include <fenv.h>\n  #define SIMDE_HAVE_FENV_H\n#endif\n\n#if defined(EXIT_FAILURE)\n  #define SIMDE_HAVE_STDLIB_H\n#elif defined(__has_include)\n  #if __has_include(<stdlib.h>)\n    #include <stdlib.h>\n    #define SIMDE_HAVE_STDLIB_H\n  #endif\n#elif SIMDE_STDC_HOSTED == 1\n  #include <stdlib.h>\n  #define SIMDE_HAVE_STDLIB_H\n#endif\n\n#if defined(__has_include)\n#  if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include(<cfenv>)\n#    include <cfenv>\n#  elif __has_include(<fenv.h>)\n#    include <fenv.h>\n#  endif\n#  if __has_include(<stdlib.h>)\n#    include <stdlib.h>\n#  endif\n#elif SIMDE_STDC_HOSTED == 1\n#  include <stdlib.h>\n#  include <fenv.h>\n#endif\n\n#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \\\n  static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \\\n  T_To \\\n  Name (T_From value) { \\\n    T_To r; \\\n    simde_memcpy(&r, &value, sizeof(r)); \\\n    return r; \\\n  }\n\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32,      uint32_t, simde_float32)\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t)\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64,      uint64_t, simde_float64)\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t)\n\n#include \"check.h\"\n\n/* GCC/clang have a bunch of functionality in builtins which we would\n * like to access, but the suffixes indicate whether the operate on\n * int, long, or long long, not fixed width types (e.g., int32_t).\n * we use these macros to attempt to map from fixed-width to the\n * names GCC uses.  Note that you should still cast the input(s) and\n * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if\n * types are the same size they may not be compatible according to the\n * compiler.  For example, on x86 long and long lonsg are generally\n * both 64 bits, but platforms vary on whether an int64_t is mapped\n * to a long or long long. */\n\n#include <limits.h>\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_\n\n#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_8_\n  #define SIMDE_BUILTIN_TYPE_8_ int\n#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_8_ l\n  #define SIMDE_BUILTIN_TYPE_8_ long\n#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_8_ ll\n  #define SIMDE_BUILTIN_TYPE_8_ long long\n#endif\n\n#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_16_\n  #define SIMDE_BUILTIN_TYPE_16_ int\n#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_16_ l\n  #define SIMDE_BUILTIN_TYPE_16_ long\n#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_16_ ll\n  #define SIMDE_BUILTIN_TYPE_16_ long long\n#endif\n\n#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_32_\n  #define SIMDE_BUILTIN_TYPE_32_ int\n#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_32_ l\n  #define SIMDE_BUILTIN_TYPE_32_ long\n#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_32_ ll\n  #define SIMDE_BUILTIN_TYPE_32_ long long\n#endif\n\n#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_64_\n  #define SIMDE_BUILTIN_TYPE_64_ int\n#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_64_ l\n  #define SIMDE_BUILTIN_TYPE_64_ long\n#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN)\n  #define SIMDE_BUILTIN_SUFFIX_64_ ll\n  #define SIMDE_BUILTIN_TYPE_64_ long long\n#endif\n\n/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */\nHEDLEY_DIAGNOSTIC_POP\n\n#if defined(SIMDE_BUILTIN_SUFFIX_8_)\n  #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)\n  #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_))\n#else\n  #define SIMDE_BUILTIN_HAS_8_(name) 0\n#endif\n#if defined(SIMDE_BUILTIN_SUFFIX_16_)\n  #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)\n  #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_))\n#else\n  #define SIMDE_BUILTIN_HAS_16_(name) 0\n#endif\n#if defined(SIMDE_BUILTIN_SUFFIX_32_)\n  #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)\n  #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_))\n#else\n  #define SIMDE_BUILTIN_HAS_32_(name) 0\n#endif\n#if defined(SIMDE_BUILTIN_SUFFIX_64_)\n  #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)\n  #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_))\n#else\n  #define SIMDE_BUILTIN_HAS_64_(name) 0\n#endif\n\n#if !defined(__cplusplus)\n  #if defined(__clang__)\n    #if HEDLEY_HAS_WARNING(\"-Wc11-extensions\")\n      #define SIMDE_GENERIC_(...) (__extension__ ({ \\\n          HEDLEY_DIAGNOSTIC_PUSH \\\n          _Pragma(\"clang diagnostic ignored \\\"-Wc11-extensions\\\"\") \\\n          _Generic(__VA_ARGS__); \\\n          HEDLEY_DIAGNOSTIC_POP \\\n        }))\n    #elif HEDLEY_HAS_WARNING(\"-Wc1x-extensions\")\n      #define SIMDE_GENERIC_(...) (__extension__ ({ \\\n          HEDLEY_DIAGNOSTIC_PUSH \\\n          _Pragma(\"clang diagnostic ignored \\\"-Wc1x-extensions\\\"\") \\\n          _Generic(__VA_ARGS__); \\\n          HEDLEY_DIAGNOSTIC_POP \\\n        }))\n    #endif\n  #elif \\\n      defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \\\n      HEDLEY_HAS_EXTENSION(c_generic_selections) || \\\n      HEDLEY_GCC_VERSION_CHECK(4,9,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \\\n      HEDLEY_IBM_VERSION_CHECK(12,1,0) || \\\n      HEDLEY_ARM_VERSION_CHECK(5,3,0)\n    #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__)\n  #endif\n#endif\n\n/* Sometimes we run into problems with specific versions of compilers\n   which make the native versions unusable for us.  Often this is due\n   to missing functions, sometimes buggy implementations, etc.  These\n   macros are how we check for specific bugs.  As they are fixed we'll\n   start only defining them for problematic compiler versions. */\n\n#if !defined(SIMDE_IGNORE_COMPILER_BUGS)\n#  if defined(HEDLEY_GCC_VERSION)\n#    if !HEDLEY_GCC_VERSION_CHECK(4,9,0)\n#      define SIMDE_BUG_GCC_REV_208793\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(5,0,0)\n#      define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(6,0,0)\n#      define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(4,6,0)\n#      define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0))\n#      define SIMDE_BUG_GCC_87467\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(8,0,0)\n#      define SIMDE_BUG_GCC_REV_247851\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(10,0,0)\n#      define SIMDE_BUG_GCC_REV_274313\n#      define SIMDE_BUG_GCC_91341\n#      define SIMDE_BUG_GCC_92035\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64)\n#      define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64)\n#      define SIMDE_BUG_GCC_BAD_VEXT_REV32\n#    endif\n#    if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \\\n          || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \\\n         ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)\n#      define SIMDE_BUG_GCC_94482\n#    endif\n#    if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH)\n#      define SIMDE_BUG_GCC_53784\n#    endif\n#    if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)\n#      if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */\n#        define SIMDE_BUG_GCC_95144\n#      endif\n#      if !HEDLEY_GCC_VERSION_CHECK(11,2,0)\n#        define SIMDE_BUG_GCC_95483\n#      endif\n#      if defined(__OPTIMIZE__)\n#        define SIMDE_BUG_GCC_100927\n#      endif\n#      if !(HEDLEY_GCC_VERSION_CHECK(10,3,0))\n#        define SIMDE_BUG_GCC_98521\n#      endif\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64)\n#      define SIMDE_BUG_GCC_94488\n#    endif\n#    if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64)\n#      define SIMDE_BUG_GCC_REV_264019\n#    endif\n#    if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM))\n#      define SIMDE_BUG_GCC_REV_260989\n#    endif\n#    if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64)\n#      define SIMDE_BUG_GCC_95399\n#      define SIMDE_BUG_GCC_95471\n#      define SIMDE_BUG_GCC_111609\n#      if SIMDE_ARCH_ARM_CHECK(8,0)\n#        define SIMDE_BUG_GCC_113065\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_POWER)\n#      define SIMDE_BUG_GCC_95227\n#      define SIMDE_BUG_GCC_95782\n#      if !HEDLEY_GCC_VERSION_CHECK(12,0,0)\n#        define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)\n#      if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__)\n#        define SIMDE_BUG_GCC_96174\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_ZARCH)\n#      define SIMDE_BUG_GCC_95782\n#      if HEDLEY_GCC_VERSION_CHECK(10,0,0)\n#        define SIMDE_BUG_GCC_101614\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_MIPS_MSA)\n#      define SIMDE_BUG_GCC_97248\n#      if !HEDLEY_GCC_VERSION_CHECK(12,1,0)\n#        define SIMDE_BUG_GCC_100760\n#        define SIMDE_BUG_GCC_100761\n#        define SIMDE_BUG_GCC_100762\n#      endif\n#    endif\n#    if !defined(__OPTIMIZE__) && !(\\\n       HEDLEY_GCC_VERSION_CHECK(11,4,0) \\\n       || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \\\n       || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0))))\n#      define SIMDE_BUG_GCC_105339\n#    endif\n#  elif defined(__clang__)\n#    if defined(SIMDE_ARCH_AARCH64)\n#      define SIMDE_BUG_CLANG_48257  // https://github.com/llvm/llvm-project/issues/47601\n#      define SIMDE_BUG_CLANG_71362  // https://github.com/llvm/llvm-project/issues/71362\n#      define SIMDE_BUG_CLANG_71365  // https://github.com/llvm/llvm-project/issues/71365\n#      define SIMDE_BUG_CLANG_71751  // https://github.com/llvm/llvm-project/issues/71751\n#      if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)\n#        define SIMDE_BUG_CLANG_45541\n#      endif\n#      if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)\n#        define SIMDE_BUG_CLANG_46840\n#        define SIMDE_BUG_CLANG_46844\n#      endif\n#      if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)\n#        define SIMDE_BUG_CLANG_BAD_VI64_OPS\n#      endif\n#      if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0)\n#        define SIMDE_BUG_CLANG_GIT_4EC445B8\n#        define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_ARM)\n#      if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)\n#        define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES\n#      endif\n#      if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n#        define SIMDE_BUG_CLANG_71763  // https://github.com/llvm/llvm-project/issues/71763\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)\n#      define SIMDE_BUG_CLANG_46770\n#    endif\n#    if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0))\n#      if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0)\n#        define SIMDE_BUG_CLANG_50893\n#        define SIMDE_BUG_CLANG_50901\n#      endif\n#    endif\n#    if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__)\n#      define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT\n#    endif\n#    if defined(SIMDE_ARCH_POWER)\n#      if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0)\n#        define SIMDE_BUG_CLANG_50932\n#      endif\n#      if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)\n#        define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS\n#      endif\n#    endif\n#    if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)\n#      if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0)\n#        define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */\n#      endif\n#      if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0)\n#        define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */\n#      endif\n#      if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0)\n#        define SIMDE_BUG_CLANG_BAD_MADD\n#      endif\n#      if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0)\n#        define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */\n#      endif\n#      if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0)\n#        define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */\n#      endif\n#      if HEDLEY_HAS_WARNING(\"-Wsign-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)\n#        define SIMDE_BUG_CLANG_45931\n#      endif\n#      if HEDLEY_HAS_WARNING(\"-Wvector-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)\n#        define SIMDE_BUG_CLANG_44589\n#      endif\n#      define SIMDE_BUG_CLANG_48673  // https://github.com/llvm/llvm-project/issues/48017\n#    endif\n#    define SIMDE_BUG_CLANG_45959  // https://github.com/llvm/llvm-project/issues/45304\n#    if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0)\n#      define SIMDE_BUG_CLANG_60655\n#    endif\n#  elif defined(HEDLEY_MSVC_VERSION)\n#    if defined(SIMDE_ARCH_X86)\n#      define SIMDE_BUG_MSVC_ROUND_EXTRACT\n#    endif\n#  elif defined(HEDLEY_INTEL_VERSION)\n#    define SIMDE_BUG_INTEL_857088\n#  elif defined(HEDLEY_MCST_LCC_VERSION)\n#    define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS\n#    define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256\n#    define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT\n#  elif defined(HEDLEY_PGI_VERSION)\n#    define SIMDE_BUG_PGI_30104\n#    define SIMDE_BUG_PGI_30107\n#    define SIMDE_BUG_PGI_30106\n#  endif\n#endif\n\n/* GCC and Clang both have the same issue:\n * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144\n * https://bugs.llvm.org/show_bug.cgi?id=45931\n * This is just an easy way to work around it.\n */\n#if \\\n    (HEDLEY_HAS_WARNING(\"-Wsign-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \\\n    HEDLEY_GCC_VERSION_CHECK(4,3,0)\n#  define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \\\n       HEDLEY_DIAGNOSTIC_PUSH  \\\n       _Pragma(\"GCC diagnostic ignored \\\"-Wsign-conversion\\\"\") \\\n       __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \\\n       HEDLEY_DIAGNOSTIC_POP  \\\n       simde_bug_ignore_sign_conversion_v_; \\\n     }))\n#else\n#  define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr)\n#endif\n\n/* Usually the shift count is signed (for example, NEON or SSE).\n * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K.\n * Further info: https://github.com/simd-everywhere/simde/pull/700\n */\n#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER)\n  #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value))\n#else\n  #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value))\n#endif\n\n/* Initial support for RISCV V extensions based on ZVE64D. */\n#if defined(SIMDE_ARCH_RISCV_ZVE64D) && SIMDE_NATURAL_VECTOR_SIZE >= 64\n  #define RVV_FIXED_TYPE_DEF(name, lmul) \\\n    typedef vint8##name##_t  fixed_vint8##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vint16##name##_t fixed_vint16##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vint32##name##_t fixed_vint32##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vuint8##name##_t fixed_vuint8##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vuint16##name##_t fixed_vuint16##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vuint32##name##_t fixed_vuint32##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vfloat32##name##_t fixed_vfloat32##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul)));\n    RVV_FIXED_TYPE_DEF(mf2, 1/2);\n    RVV_FIXED_TYPE_DEF(m1, 1);\n    RVV_FIXED_TYPE_DEF(m2, 2);\n  #define RVV_FIXED_TYPE_DEF_64B(name, lmul) \\\n    typedef vint64##name##_t fixed_vint64##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vuint64##name##_t fixed_vuint64##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul))); \\\n    typedef vfloat64##name##_t fixed_vfloat64##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul)));\n    RVV_FIXED_TYPE_DEF_64B(m1, 1);\n    RVV_FIXED_TYPE_DEF_64B(m2, 2);\n  #if defined(SIMDE_ARCH_RISCV_ZVFH)\n    #define RVV_FIXED_TYPE_DEF_16F(name, lmul) \\\n      typedef vfloat16##name##_t fixed_vfloat16##name##_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * lmul)));\n    RVV_FIXED_TYPE_DEF_16F(mf2, 1/2);\n    RVV_FIXED_TYPE_DEF_16F(m1, 1);\n    RVV_FIXED_TYPE_DEF_16F(m2, 2);\n  #endif\n#endif\n\n/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_COMMON_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-complex.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020-2021 Evan Nemerson <evan@nemerson.com>\n */\n\n/* Support for complex math.\n *\n * We try to avoid including <complex> (in C++ mode) since it pulls in\n * a *lot* of code.  Unfortunately this only works for GNU modes (i.e.,\n * -std=gnu++14 not -std=c++14) unless you pass -fext-numeric-literals,\n * but there is no way (AFAICT) to detect that flag so we have to rely\n * on __STRICT_ANSI__ to instead detect GNU mode.\n *\n * This header is separate from simde-math.h since there is a good\n * chance it will pull in <complex>, and most of the time we don't need\n * complex math (on x86 only SVML uses it). */\n\n#if !defined(SIMDE_COMPLEX_H)\n#define SIMDE_COMPLEX_H 1\n\n#include \"simde-math.h\"\n\n#if ( \\\n      HEDLEY_HAS_BUILTIN(__builtin_creal) || \\\n      HEDLEY_GCC_VERSION_CHECK(4,7,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) \\\n    ) && (!defined(__cplusplus) && !defined(__STRICT_ANSI__))\n  HEDLEY_DIAGNOSTIC_PUSH\n  SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_\n    typedef __complex__ float simde_cfloat32;\n    typedef __complex__ double simde_cfloat64;\n  HEDLEY_DIAGNOSTIC_POP\n  #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j))\n  #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj))\n\n  #if !defined(simde_math_creal)\n    #define simde_math_crealf(z) __builtin_crealf(z)\n  #endif\n  #if !defined(simde_math_crealf)\n    #define simde_math_creal(z) __builtin_creal(z)\n  #endif\n  #if !defined(simde_math_cimag)\n    #define simde_math_cimagf(z) __builtin_cimagf(z)\n  #endif\n  #if !defined(simde_math_cimagf)\n    #define simde_math_cimag(z) __builtin_cimag(z)\n  #endif\n  #if !defined(simde_math_cexp)\n    #define simde_math_cexp(z) __builtin_cexp(z)\n  #endif\n  #if !defined(simde_math_cexpf)\n    #define simde_math_cexpf(z) __builtin_cexpf(z)\n  #endif\n#elif !defined(__cplusplus)\n  #include <complex.h>\n\n  #if !defined(HEDLEY_MSVC_VERSION)\n    typedef float _Complex simde_cfloat32;\n    typedef double _Complex simde_cfloat64;\n  #else\n    typedef _Fcomplex simde_cfloat32;\n    typedef _Dcomplex simde_cfloat64;\n  #endif\n\n  #if defined(HEDLEY_MSVC_VERSION)\n    #define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64) { (x), (y) })\n    #define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32) { (x), (y) })\n  #elif defined(CMPLX) && defined(CMPLXF)\n    #define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y)\n    #define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y)\n  #else\n    #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I)\n    #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I)\n  #endif\n\n  #if !defined(simde_math_creal)\n    #define simde_math_creal(z) creal(z)\n  #endif\n  #if !defined(simde_math_crealf)\n    #define simde_math_crealf(z) crealf(z)\n  #endif\n  #if !defined(simde_math_cimag)\n    #define simde_math_cimag(z) cimag(z)\n  #endif\n  #if !defined(simde_math_cimagf)\n    #define simde_math_cimagf(z) cimagf(z)\n  #endif\n  #if !defined(simde_math_cexp)\n    #define simde_math_cexp(z) cexp(z)\n  #endif\n  #if !defined(simde_math_cexpf)\n    #define simde_math_cexpf(z) cexpf(z)\n  #endif\n#else\n  HEDLEY_DIAGNOSTIC_PUSH\n  #if defined(HEDLEY_MSVC_VERSION)\n    #pragma warning(disable:4530)\n  #endif\n  #include <complex>\n  HEDLEY_DIAGNOSTIC_POP\n\n  typedef std::complex<float> simde_cfloat32;\n  typedef std::complex<double> simde_cfloat64;\n  #define SIMDE_MATH_CMPLX(x, y) (std::complex<double>(x, y))\n  #define SIMDE_MATH_CMPLXF(x, y) (std::complex<float>(x, y))\n\n  #if !defined(simde_math_creal)\n    #define simde_math_creal(z) ((z).real())\n  #endif\n  #if !defined(simde_math_crealf)\n    #define simde_math_crealf(z) ((z).real())\n  #endif\n  #if !defined(simde_math_cimag)\n    #define simde_math_cimag(z) ((z).imag())\n  #endif\n  #if !defined(simde_math_cimagf)\n    #define simde_math_cimagf(z) ((z).imag())\n  #endif\n  #if !defined(simde_math_cexp)\n    #define simde_math_cexp(z) std::exp(z)\n  #endif\n  #if !defined(simde_math_cexpf)\n    #define simde_math_cexpf(z) std::exp(z)\n  #endif\n#endif\n\n#endif /* !defined(SIMDE_COMPLEX_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-constify.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020      Evan Nemerson <evan@nemerson.com>\n */\n\n/* Constify macros.  For internal use only.\n *\n * These are used to make it possible to call a function which takes\n * an Integer Constant Expression (ICE) using a compile time constant.\n * Technically it would also be possible to use a value not trivially\n * known by the compiler, but there would be a siginficant performance\n * hit (a switch switch is used).\n *\n * The basic idea is pretty simple; we just emit a do while loop which\n * contains a switch with a case for every possible value of the\n * constant.\n *\n * As long as the value you pass to the function in constant, pretty\n * much any copmiler shouldn't have a problem generating exactly the\n * same code as if you had used an ICE.\n *\n * This is intended to be used in the SIMDe implementations of\n * functions the compilers require to be an ICE, but the other benefit\n * is that if we also disable the warnings from\n * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests\n * to use non-ICE parameters\n */\n\n#if !defined(SIMDE_CONSTIFY_H)\n#define SIMDE_CONSTIFY_H\n\n#include \"simde-diagnostic.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_\nSIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_\n\n#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case 0: result = func_name(__VA_ARGS__, 0); break; \\\n      case 1: result = func_name(__VA_ARGS__, 1); break; \\\n      default: result = default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case 0: result = func_name(__VA_ARGS__, 0); break; \\\n      case 1: result = func_name(__VA_ARGS__, 1); break; \\\n      case 2: result = func_name(__VA_ARGS__, 2); break; \\\n      case 3: result = func_name(__VA_ARGS__, 3); break; \\\n      default: result = default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case 0: result = func_name(__VA_ARGS__, 0); break; \\\n      case 1: result = func_name(__VA_ARGS__, 1); break; \\\n      case 2: result = func_name(__VA_ARGS__, 2); break; \\\n      case 3: result = func_name(__VA_ARGS__, 3); break; \\\n      case 4: result = func_name(__VA_ARGS__, 4); break; \\\n      case 5: result = func_name(__VA_ARGS__, 5); break; \\\n      case 6: result = func_name(__VA_ARGS__, 6); break; \\\n      case 7: result = func_name(__VA_ARGS__, 7); break; \\\n      default: result = default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case  0: result = func_name(__VA_ARGS__,  0); break; \\\n      case  1: result = func_name(__VA_ARGS__,  1); break; \\\n      case  2: result = func_name(__VA_ARGS__,  2); break; \\\n      case  3: result = func_name(__VA_ARGS__,  3); break; \\\n      case  4: result = func_name(__VA_ARGS__,  4); break; \\\n      case  5: result = func_name(__VA_ARGS__,  5); break; \\\n      case  6: result = func_name(__VA_ARGS__,  6); break; \\\n      case  7: result = func_name(__VA_ARGS__,  7); break; \\\n      case  8: result = func_name(__VA_ARGS__,  8); break; \\\n      case  9: result = func_name(__VA_ARGS__,  9); break; \\\n      case 10: result = func_name(__VA_ARGS__, 10); break; \\\n      case 11: result = func_name(__VA_ARGS__, 11); break; \\\n      case 12: result = func_name(__VA_ARGS__, 12); break; \\\n      case 13: result = func_name(__VA_ARGS__, 13); break; \\\n      case 14: result = func_name(__VA_ARGS__, 14); break; \\\n      case 15: result = func_name(__VA_ARGS__, 15); break; \\\n      default: result = default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case  0: result = func_name(__VA_ARGS__,  0); break; \\\n      case  1: result = func_name(__VA_ARGS__,  1); break; \\\n      case  2: result = func_name(__VA_ARGS__,  2); break; \\\n      case  3: result = func_name(__VA_ARGS__,  3); break; \\\n      case  4: result = func_name(__VA_ARGS__,  4); break; \\\n      case  5: result = func_name(__VA_ARGS__,  5); break; \\\n      case  6: result = func_name(__VA_ARGS__,  6); break; \\\n      case  7: result = func_name(__VA_ARGS__,  7); break; \\\n      case  8: result = func_name(__VA_ARGS__,  8); break; \\\n      case  9: result = func_name(__VA_ARGS__,  9); break; \\\n      case 10: result = func_name(__VA_ARGS__, 10); break; \\\n      case 11: result = func_name(__VA_ARGS__, 11); break; \\\n      case 12: result = func_name(__VA_ARGS__, 12); break; \\\n      case 13: result = func_name(__VA_ARGS__, 13); break; \\\n      case 14: result = func_name(__VA_ARGS__, 14); break; \\\n      case 15: result = func_name(__VA_ARGS__, 15); break; \\\n      case 16: result = func_name(__VA_ARGS__, 16); break; \\\n      case 17: result = func_name(__VA_ARGS__, 17); break; \\\n      case 18: result = func_name(__VA_ARGS__, 18); break; \\\n      case 19: result = func_name(__VA_ARGS__, 19); break; \\\n      case 20: result = func_name(__VA_ARGS__, 20); break; \\\n      case 21: result = func_name(__VA_ARGS__, 21); break; \\\n      case 22: result = func_name(__VA_ARGS__, 22); break; \\\n      case 23: result = func_name(__VA_ARGS__, 23); break; \\\n      case 24: result = func_name(__VA_ARGS__, 24); break; \\\n      case 25: result = func_name(__VA_ARGS__, 25); break; \\\n      case 26: result = func_name(__VA_ARGS__, 26); break; \\\n      case 27: result = func_name(__VA_ARGS__, 27); break; \\\n      case 28: result = func_name(__VA_ARGS__, 28); break; \\\n      case 29: result = func_name(__VA_ARGS__, 29); break; \\\n      case 30: result = func_name(__VA_ARGS__, 30); break; \\\n      case 31: result = func_name(__VA_ARGS__, 31); break; \\\n      default: result = default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case  0: result = func_name(__VA_ARGS__,  0); break; \\\n      case  1: result = func_name(__VA_ARGS__,  1); break; \\\n      case  2: result = func_name(__VA_ARGS__,  2); break; \\\n      case  3: result = func_name(__VA_ARGS__,  3); break; \\\n      case  4: result = func_name(__VA_ARGS__,  4); break; \\\n      case  5: result = func_name(__VA_ARGS__,  5); break; \\\n      case  6: result = func_name(__VA_ARGS__,  6); break; \\\n      case  7: result = func_name(__VA_ARGS__,  7); break; \\\n      case  8: result = func_name(__VA_ARGS__,  8); break; \\\n      case  9: result = func_name(__VA_ARGS__,  9); break; \\\n      case 10: result = func_name(__VA_ARGS__, 10); break; \\\n      case 11: result = func_name(__VA_ARGS__, 11); break; \\\n      case 12: result = func_name(__VA_ARGS__, 12); break; \\\n      case 13: result = func_name(__VA_ARGS__, 13); break; \\\n      case 14: result = func_name(__VA_ARGS__, 14); break; \\\n      case 15: result = func_name(__VA_ARGS__, 15); break; \\\n      case 16: result = func_name(__VA_ARGS__, 16); break; \\\n      case 17: result = func_name(__VA_ARGS__, 17); break; \\\n      case 18: result = func_name(__VA_ARGS__, 18); break; \\\n      case 19: result = func_name(__VA_ARGS__, 19); break; \\\n      case 20: result = func_name(__VA_ARGS__, 20); break; \\\n      case 21: result = func_name(__VA_ARGS__, 21); break; \\\n      case 22: result = func_name(__VA_ARGS__, 22); break; \\\n      case 23: result = func_name(__VA_ARGS__, 23); break; \\\n      case 24: result = func_name(__VA_ARGS__, 24); break; \\\n      case 25: result = func_name(__VA_ARGS__, 25); break; \\\n      case 26: result = func_name(__VA_ARGS__, 26); break; \\\n      case 27: result = func_name(__VA_ARGS__, 27); break; \\\n      case 28: result = func_name(__VA_ARGS__, 28); break; \\\n      case 29: result = func_name(__VA_ARGS__, 29); break; \\\n      case 30: result = func_name(__VA_ARGS__, 30); break; \\\n      case 31: result = func_name(__VA_ARGS__, 31); break; \\\n      case 32: result = func_name(__VA_ARGS__, 32); break; \\\n      case 33: result = func_name(__VA_ARGS__, 33); break; \\\n      case 34: result = func_name(__VA_ARGS__, 34); break; \\\n      case 35: result = func_name(__VA_ARGS__, 35); break; \\\n      case 36: result = func_name(__VA_ARGS__, 36); break; \\\n      case 37: result = func_name(__VA_ARGS__, 37); break; \\\n      case 38: result = func_name(__VA_ARGS__, 38); break; \\\n      case 39: result = func_name(__VA_ARGS__, 39); break; \\\n      case 40: result = func_name(__VA_ARGS__, 40); break; \\\n      case 41: result = func_name(__VA_ARGS__, 41); break; \\\n      case 42: result = func_name(__VA_ARGS__, 42); break; \\\n      case 43: result = func_name(__VA_ARGS__, 43); break; \\\n      case 44: result = func_name(__VA_ARGS__, 44); break; \\\n      case 45: result = func_name(__VA_ARGS__, 45); break; \\\n      case 46: result = func_name(__VA_ARGS__, 46); break; \\\n      case 47: result = func_name(__VA_ARGS__, 47); break; \\\n      case 48: result = func_name(__VA_ARGS__, 48); break; \\\n      case 49: result = func_name(__VA_ARGS__, 49); break; \\\n      case 50: result = func_name(__VA_ARGS__, 50); break; \\\n      case 51: result = func_name(__VA_ARGS__, 51); break; \\\n      case 52: result = func_name(__VA_ARGS__, 52); break; \\\n      case 53: result = func_name(__VA_ARGS__, 53); break; \\\n      case 54: result = func_name(__VA_ARGS__, 54); break; \\\n      case 55: result = func_name(__VA_ARGS__, 55); break; \\\n      case 56: result = func_name(__VA_ARGS__, 56); break; \\\n      case 57: result = func_name(__VA_ARGS__, 57); break; \\\n      case 58: result = func_name(__VA_ARGS__, 58); break; \\\n      case 59: result = func_name(__VA_ARGS__, 59); break; \\\n      case 60: result = func_name(__VA_ARGS__, 60); break; \\\n      case 61: result = func_name(__VA_ARGS__, 61); break; \\\n      case 62: result = func_name(__VA_ARGS__, 62); break; \\\n      case 63: result = func_name(__VA_ARGS__, 63); break; \\\n      default: result = default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case 0: func_name(__VA_ARGS__, 0); break; \\\n      case 1: func_name(__VA_ARGS__, 1); break; \\\n      default: default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case 0: func_name(__VA_ARGS__, 0); break; \\\n      case 1: func_name(__VA_ARGS__, 1); break; \\\n      case 2: func_name(__VA_ARGS__, 2); break; \\\n      case 3: func_name(__VA_ARGS__, 3); break; \\\n      default: default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case 0: func_name(__VA_ARGS__, 0); break; \\\n      case 1: func_name(__VA_ARGS__, 1); break; \\\n      case 2: func_name(__VA_ARGS__, 2); break; \\\n      case 3: func_name(__VA_ARGS__, 3); break; \\\n      case 4: func_name(__VA_ARGS__, 4); break; \\\n      case 5: func_name(__VA_ARGS__, 5); break; \\\n      case 6: func_name(__VA_ARGS__, 6); break; \\\n      case 7: func_name(__VA_ARGS__, 7); break; \\\n      default: default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case  0: func_name(__VA_ARGS__,  0); break; \\\n      case  1: func_name(__VA_ARGS__,  1); break; \\\n      case  2: func_name(__VA_ARGS__,  2); break; \\\n      case  3: func_name(__VA_ARGS__,  3); break; \\\n      case  4: func_name(__VA_ARGS__,  4); break; \\\n      case  5: func_name(__VA_ARGS__,  5); break; \\\n      case  6: func_name(__VA_ARGS__,  6); break; \\\n      case  7: func_name(__VA_ARGS__,  7); break; \\\n      case  8: func_name(__VA_ARGS__,  8); break; \\\n      case  9: func_name(__VA_ARGS__,  9); break; \\\n      case 10: func_name(__VA_ARGS__, 10); break; \\\n      case 11: func_name(__VA_ARGS__, 11); break; \\\n      case 12: func_name(__VA_ARGS__, 12); break; \\\n      case 13: func_name(__VA_ARGS__, 13); break; \\\n      case 14: func_name(__VA_ARGS__, 14); break; \\\n      case 15: func_name(__VA_ARGS__, 15); break; \\\n      default: default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case  0: func_name(__VA_ARGS__,  0); break; \\\n      case  1: func_name(__VA_ARGS__,  1); break; \\\n      case  2: func_name(__VA_ARGS__,  2); break; \\\n      case  3: func_name(__VA_ARGS__,  3); break; \\\n      case  4: func_name(__VA_ARGS__,  4); break; \\\n      case  5: func_name(__VA_ARGS__,  5); break; \\\n      case  6: func_name(__VA_ARGS__,  6); break; \\\n      case  7: func_name(__VA_ARGS__,  7); break; \\\n      case  8: func_name(__VA_ARGS__,  8); break; \\\n      case  9: func_name(__VA_ARGS__,  9); break; \\\n      case 10: func_name(__VA_ARGS__, 10); break; \\\n      case 11: func_name(__VA_ARGS__, 11); break; \\\n      case 12: func_name(__VA_ARGS__, 12); break; \\\n      case 13: func_name(__VA_ARGS__, 13); break; \\\n      case 14: func_name(__VA_ARGS__, 14); break; \\\n      case 15: func_name(__VA_ARGS__, 15); break; \\\n      case 16: func_name(__VA_ARGS__, 16); break; \\\n      case 17: func_name(__VA_ARGS__, 17); break; \\\n      case 18: func_name(__VA_ARGS__, 18); break; \\\n      case 19: func_name(__VA_ARGS__, 19); break; \\\n      case 20: func_name(__VA_ARGS__, 20); break; \\\n      case 21: func_name(__VA_ARGS__, 21); break; \\\n      case 22: func_name(__VA_ARGS__, 22); break; \\\n      case 23: func_name(__VA_ARGS__, 23); break; \\\n      case 24: func_name(__VA_ARGS__, 24); break; \\\n      case 25: func_name(__VA_ARGS__, 25); break; \\\n      case 26: func_name(__VA_ARGS__, 26); break; \\\n      case 27: func_name(__VA_ARGS__, 27); break; \\\n      case 28: func_name(__VA_ARGS__, 28); break; \\\n      case 29: func_name(__VA_ARGS__, 29); break; \\\n      case 30: func_name(__VA_ARGS__, 30); break; \\\n      case 31: func_name(__VA_ARGS__, 31); break; \\\n      default: default_case; break; \\\n    } \\\n  } while (0)\n\n#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \\\n  do { \\\n    switch(imm) { \\\n      case  0: func_name(__VA_ARGS__,  0); break; \\\n      case  1: func_name(__VA_ARGS__,  1); break; \\\n      case  2: func_name(__VA_ARGS__,  2); break; \\\n      case  3: func_name(__VA_ARGS__,  3); break; \\\n      case  4: func_name(__VA_ARGS__,  4); break; \\\n      case  5: func_name(__VA_ARGS__,  5); break; \\\n      case  6: func_name(__VA_ARGS__,  6); break; \\\n      case  7: func_name(__VA_ARGS__,  7); break; \\\n      case  8: func_name(__VA_ARGS__,  8); break; \\\n      case  9: func_name(__VA_ARGS__,  9); break; \\\n      case 10: func_name(__VA_ARGS__, 10); break; \\\n      case 11: func_name(__VA_ARGS__, 11); break; \\\n      case 12: func_name(__VA_ARGS__, 12); break; \\\n      case 13: func_name(__VA_ARGS__, 13); break; \\\n      case 14: func_name(__VA_ARGS__, 14); break; \\\n      case 15: func_name(__VA_ARGS__, 15); break; \\\n      case 16: func_name(__VA_ARGS__, 16); break; \\\n      case 17: func_name(__VA_ARGS__, 17); break; \\\n      case 18: func_name(__VA_ARGS__, 18); break; \\\n      case 19: func_name(__VA_ARGS__, 19); break; \\\n      case 20: func_name(__VA_ARGS__, 20); break; \\\n      case 21: func_name(__VA_ARGS__, 21); break; \\\n      case 22: func_name(__VA_ARGS__, 22); break; \\\n      case 23: func_name(__VA_ARGS__, 23); break; \\\n      case 24: func_name(__VA_ARGS__, 24); break; \\\n      case 25: func_name(__VA_ARGS__, 25); break; \\\n      case 26: func_name(__VA_ARGS__, 26); break; \\\n      case 27: func_name(__VA_ARGS__, 27); break; \\\n      case 28: func_name(__VA_ARGS__, 28); break; \\\n      case 29: func_name(__VA_ARGS__, 29); break; \\\n      case 30: func_name(__VA_ARGS__, 30); break; \\\n      case 31: func_name(__VA_ARGS__, 31); break; \\\n      case 32: func_name(__VA_ARGS__, 32); break; \\\n      case 33: func_name(__VA_ARGS__, 33); break; \\\n      case 34: func_name(__VA_ARGS__, 34); break; \\\n      case 35: func_name(__VA_ARGS__, 35); break; \\\n      case 36: func_name(__VA_ARGS__, 36); break; \\\n      case 37: func_name(__VA_ARGS__, 37); break; \\\n      case 38: func_name(__VA_ARGS__, 38); break; \\\n      case 39: func_name(__VA_ARGS__, 39); break; \\\n      case 40: func_name(__VA_ARGS__, 40); break; \\\n      case 41: func_name(__VA_ARGS__, 41); break; \\\n      case 42: func_name(__VA_ARGS__, 42); break; \\\n      case 43: func_name(__VA_ARGS__, 43); break; \\\n      case 44: func_name(__VA_ARGS__, 44); break; \\\n      case 45: func_name(__VA_ARGS__, 45); break; \\\n      case 46: func_name(__VA_ARGS__, 46); break; \\\n      case 47: func_name(__VA_ARGS__, 47); break; \\\n      case 48: func_name(__VA_ARGS__, 48); break; \\\n      case 49: func_name(__VA_ARGS__, 49); break; \\\n      case 50: func_name(__VA_ARGS__, 50); break; \\\n      case 51: func_name(__VA_ARGS__, 51); break; \\\n      case 52: func_name(__VA_ARGS__, 52); break; \\\n      case 53: func_name(__VA_ARGS__, 53); break; \\\n      case 54: func_name(__VA_ARGS__, 54); break; \\\n      case 55: func_name(__VA_ARGS__, 55); break; \\\n      case 56: func_name(__VA_ARGS__, 56); break; \\\n      case 57: func_name(__VA_ARGS__, 57); break; \\\n      case 58: func_name(__VA_ARGS__, 58); break; \\\n      case 59: func_name(__VA_ARGS__, 59); break; \\\n      case 60: func_name(__VA_ARGS__, 60); break; \\\n      case 61: func_name(__VA_ARGS__, 61); break; \\\n      case 62: func_name(__VA_ARGS__, 62); break; \\\n      case 63: func_name(__VA_ARGS__, 63); break; \\\n      default: default_case; break; \\\n    } \\\n  } while (0)\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-detect-clang.h",
    "content": "/* Detect Clang Version\n * Created by Evan Nemerson <evan@nemerson.com>\n *\n * To the extent possible under law, the author(s) have dedicated all\n * copyright and related and neighboring rights to this software to\n * the public domain worldwide. This software is distributed without\n * any warranty.\n *\n * For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.\n * SPDX-License-Identifier: CC0-1.0\n */\n\n/* This file was originally part of SIMDe\n * (<https://github.com/simd-everywhere/simde>).  You're free to do with it as\n * you please, but I do have a few small requests:\n *\n *  * If you make improvements, please submit them back to SIMDe\n *    (at <https://github.com/simd-everywhere/simde/issues>) so others can\n *    benefit from them.\n *  * Please keep a link to SIMDe intact so people know where to submit\n *    improvements.\n *  * If you expose it publicly, please change the SIMDE_ prefix to\n *    something specific to your project.\n *\n * The version numbers clang exposes (in the ___clang_major__,\n * __clang_minor__, and __clang_patchlevel__ macros) are unreliable.\n * Vendors such as Apple will define these values to their version\n * numbers; for example, \"Apple Clang 4.0\" is really clang 3.1, but\n * __clang_major__ and __clang_minor__ are defined to 4 and 0\n * respectively, instead of 3 and 1.\n *\n * The solution is *usually* to use clang's feature detection macros\n * (<https://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros>)\n * to determine if the feature you're interested in is available.  This\n * generally works well, and it should probably be the first thing you\n * try.  Unfortunately, it's not possible to check for everything.  In\n * particular, compiler bugs.\n *\n * This file just uses the feature checking macros to detect features\n * added in specific versions of clang to identify which version of\n * clang the compiler is based on.\n *\n * Right now it only goes back to 3.6, but I'm happy to accept patches\n * to go back further.  And, of course, newer versions are welcome if\n * they're not already present, and if you find a way to detect a point\n * release that would be great, too!\n */\n\n#if !defined(SIMDE_DETECT_CLANG_H)\n#define SIMDE_DETECT_CLANG_H 1\n\n/* Attempt to detect the upstream clang version number.  I usually only\n * worry about major version numbers (at least for 4.0+), but if you\n * need more resolution I'm happy to accept patches that are able to\n * detect minor versions as well.  That said, you'll probably have a\n * hard time with detection since AFAIK most minor releases don't add\n * anything we can detect. Updated based on\n * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73\n * - would welcome patches/updates there as well.\n */\n\n#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION)\n#  if __has_warning(\"-Wmissing-designated-field-initializers\")\n#    define SIMDE_DETECT_CLANG_VERSION 190000\n#  elif __has_warning(\"-Woverriding-option\")\n#    define SIMDE_DETECT_CLANG_VERSION 180000\n#  elif __has_attribute(unsafe_buffer_usage)  // no new warnings in 17.0\n#    define SIMDE_DETECT_CLANG_VERSION 170000\n#  elif __has_attribute(nouwtable)  // no new warnings in 16.0\n#    define SIMDE_DETECT_CLANG_VERSION 160000\n#  elif __has_warning(\"-Warray-parameter\")\n#    define SIMDE_DETECT_CLANG_VERSION 150000\n#  elif __has_warning(\"-Wbitwise-instead-of-logical\")\n#    define SIMDE_DETECT_CLANG_VERSION 140000\n#  elif __has_warning(\"-Waix-compat\")\n#    define SIMDE_DETECT_CLANG_VERSION 130000\n#  elif __has_warning(\"-Wformat-insufficient-args\")\n#    define SIMDE_DETECT_CLANG_VERSION 120000\n#  elif __has_warning(\"-Wimplicit-const-int-float-conversion\")\n#    define SIMDE_DETECT_CLANG_VERSION 110000\n#  elif __has_warning(\"-Wmisleading-indentation\")\n#    define SIMDE_DETECT_CLANG_VERSION 100000\n#  elif defined(__FILE_NAME__)\n#    define SIMDE_DETECT_CLANG_VERSION 90000\n#  elif __has_warning(\"-Wextra-semi-stmt\") || __has_builtin(__builtin_rotateleft32)\n#    define SIMDE_DETECT_CLANG_VERSION 80000\n// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently\n// based on Clang 7, but does not support the warning we test.\n// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and\n// https://trac.macports.org/wiki/XcodeVersionInfo.\n#  elif __has_warning(\"-Wc++98-compat-extra-semi\") || \\\n      (defined(__apple_build_version__) && __apple_build_version__ >= 10010000)\n#    define SIMDE_DETECT_CLANG_VERSION 70000\n#  elif __has_warning(\"-Wpragma-pack\")\n#    define SIMDE_DETECT_CLANG_VERSION 60000\n#  elif __has_warning(\"-Wbitfield-enum-conversion\")\n#    define SIMDE_DETECT_CLANG_VERSION 50000\n#  elif __has_attribute(diagnose_if)\n#    define SIMDE_DETECT_CLANG_VERSION 40000\n#  elif __has_warning(\"-Wcomma\")\n#    define SIMDE_DETECT_CLANG_VERSION 39000\n#  elif __has_warning(\"-Wdouble-promotion\")\n#    define SIMDE_DETECT_CLANG_VERSION 38000\n#  elif __has_warning(\"-Wshift-negative-value\")\n#    define SIMDE_DETECT_CLANG_VERSION 37000\n#  elif __has_warning(\"-Wambiguous-ellipsis\")\n#    define SIMDE_DETECT_CLANG_VERSION 36000\n#  else\n#    define SIMDE_DETECT_CLANG_VERSION 1\n#  endif\n#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */\n\n/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty\n * straightforward; it returns true if the compiler is a derivative\n * of clang >= the specified version.\n *\n * Since this file is often (primarily?) useful for working around bugs\n * it is also helpful to have a macro which returns true if only if the\n * compiler is a version of clang *older* than the specified version to\n * make it a bit easier to ifdef regions to add code for older versions,\n * such as pragmas to disable a specific warning. */\n\n#if defined(SIMDE_DETECT_CLANG_VERSION)\n#  define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision)))\n#  define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision)))\n#else\n#  define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0)\n#  define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0)\n#endif\n\n#endif /* !defined(SIMDE_DETECT_CLANG_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-diagnostic.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n */\n\n/* SIMDe targets a very wide range of standards and compilers, and our\n * goal is to compile cleanly even with extremely aggressive warnings\n * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.)\n * treated as errors.\n *\n * While our preference is to resolve the underlying issue a given\n * diagnostic is warning us about, sometimes that's not possible.\n * Fixing a warning in one compiler may cause problems in another.\n * Sometimes a warning doesn't really apply to us (false positives),\n * and sometimes adhering to a warning would mean dropping a feature\n * we *know* the compiler supports since we have tested specifically\n * for the compiler or feature.\n *\n * When practical, warnings are only disabled for specific code.  For\n * a list of warnings which are enabled by default in all SIMDe code,\n * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS.  Note that we restore the\n * warning stack when SIMDe is done parsing, so code which includes\n * SIMDe is not deprived of these warnings.\n */\n\n#if !defined(SIMDE_DIAGNOSTIC_H)\n#define SIMDE_DIAGNOSTIC_H\n\n#include \"hedley.h\"\n#include \"simde-detect-clang.h\"\n#include \"simde-arch.h\"\n\n/* This is only to help us implement functions like _mm_undefined_ps. */\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n  #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_\n#endif\n#if HEDLEY_HAS_WARNING(\"-Wuninitialized\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"clang diagnostic ignored \\\"-Wuninitialized\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,2,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"GCC diagnostic ignored \\\"-Wuninitialized\\\"\")\n#elif HEDLEY_PGI_VERSION_CHECK(19,10,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"diag_suppress 549\")\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)\")\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)\")\n#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"error_messages(off,unassigned)\")\n#elif \\\n     HEDLEY_TI_VERSION_CHECK(16,9,9) || \\\n     HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \\\n     HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \\\n     HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"diag_suppress 551\")\n#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma(\"warning(disable:592)\")\n#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700))\n#endif\n\n/* GCC emits a lot of \"notes\" about the ABI being different for things\n * in newer versions of GCC.  We don't really care because all our\n * functions are inlined and don't generate ABI. */\n#if HEDLEY_GCC_VERSION_CHECK(7,0,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma(\"GCC diagnostic ignored \\\"-Wpsabi\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_\n#endif\n\n/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty()\n * after each MMX function before any floating point instructions.\n * Some compilers warn about functions which use MMX functions but\n * don't call _mm_empty().  However, since SIMDe is implementyng the\n * MMX API we shouldn't be calling _mm_empty(); we leave it to the\n * caller to invoke simde_mm_empty(). */\n#if HEDLEY_INTEL_VERSION_CHECK(19,0,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma(\"warning(disable:13200 13203)\")\n#elif defined(HEDLEY_MSVC_VERSION)\n  #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799))\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_\n#endif\n\n/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they\n * emit a diagnostic if you use #pragma simd instead of\n * #pragma omp simd.  SIMDe supports OpenMP SIMD, you just need to\n * compile with -qopenmp or -qopenmp-simd and define\n * SIMDE_ENABLE_OPENMP.  Cilk+ is just a fallback. */\n#if HEDLEY_INTEL_VERSION_CHECK(18,0,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma(\"warning(disable:3948)\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_\n#endif\n\n/* MSVC emits a diagnostic when we call a function (like\n * simde_mm_set_epi32) while initializing a struct.  We currently do\n * this a *lot* in the tests. */\n#if \\\n  defined(HEDLEY_MSVC_VERSION)\n  #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204))\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_\n#endif\n\n/* This warning needs a lot of work.  It is triggered if all you do is\n * pass the value to memcpy/__builtin_memcpy, or if you initialize a\n * member of the union, even if that member takes up the entire union.\n * Last tested with clang-10, hopefully things will improve in the\n * future; if clang fixes this I'd love to enable it. */\n#if \\\n  HEDLEY_HAS_WARNING(\"-Wconditional-uninitialized\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma(\"clang diagnostic ignored \\\"-Wconditional-uninitialized\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_\n#endif\n\n/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which\n * will is false.  However, SIMDe uses these operations exclusively\n * for things like _mm_cmpeq_ps, for which we really do want to check\n * for equality (or inequality).\n *\n * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro\n * which just wraps a check in some code do disable this diagnostic I'd\n * be happy to accept it. */\n#if \\\n  HEDLEY_HAS_WARNING(\"-Wfloat-equal\") || \\\n  HEDLEY_GCC_VERSION_CHECK(3,0,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma(\"GCC diagnostic ignored \\\"-Wfloat-equal\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_\n#endif\n\n/* This is because we use HEDLEY_STATIC_ASSERT for static assertions.\n * If Hedley can't find an implementation it will preprocess to\n * nothing, which means there will be a trailing semi-colon. */\n#if HEDLEY_HAS_WARNING(\"-Wextra-semi\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma(\"clang diagnostic ignored \\\"-Wextra-semi\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus)\n  #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma(\"GCC diagnostic ignored \\\"-Wextra-semi\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_\n#endif\n\n/* We do use a few variadic macros, which technically aren't available\n * until C99 and C++11, but every compiler I'm aware of has supported\n * them for much longer.  That said, usage is isolated to the test\n * suite and compilers known to support them. */\n#if HEDLEY_HAS_WARNING(\"-Wvariadic-macros\") || HEDLEY_GCC_VERSION_CHECK(4,0,0)\n  #if HEDLEY_HAS_WARNING(\"-Wc++98-compat-pedantic\")\n    #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \\\n      _Pragma(\"clang diagnostic ignored \\\"-Wvariadic-macros\\\"\") \\\n      _Pragma(\"clang diagnostic ignored \\\"-Wc++98-compat-pedantic\\\"\")\n  #else\n    #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma(\"GCC diagnostic ignored \\\"-Wvariadic-macros\\\"\")\n  #endif\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_\n#endif\n\n/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro\n * before we can access certain SIMD intrinsics, but this diagnostic\n * warns about it being a reserved name.  It is a reserved name, but\n * it's reserved for the compiler and we are using it to convey\n * information to the compiler.\n *\n * This is also used when enabling native aliases since we don't get to\n * choose the macro names. */\n#if HEDLEY_HAS_WARNING(\"-Wreserved-id-macro\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma(\"clang diagnostic ignored \\\"-Wreserved-id-macro\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_\n#endif\n\n/* Similar to above; types like simde__m128i are reserved due to the\n * double underscore, but we didn't choose them, Intel did. */\n#if HEDLEY_HAS_WARNING(\"-Wreserved-identifier\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma(\"clang diagnostic ignored \\\"-Wreserved-identifier\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_\n#endif\n\n/* clang 3.8 warns about the packed attribute being unnecessary when\n * used in the _mm_loadu_* functions.  That *may* be true for version\n * 3.8, but for later versions it is crucial in order to make unaligned\n * access safe. */\n#if HEDLEY_HAS_WARNING(\"-Wpacked\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma(\"clang diagnostic ignored \\\"-Wpacked\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_\n#endif\n\n/* Triggered when assigning a float to a double implicitly.  We use\n * explicit casts in SIMDe, this is only used in the test suite. */\n#if HEDLEY_HAS_WARNING(\"-Wdouble-promotion\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma(\"clang diagnostic ignored \\\"-Wdouble-promotion\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_\n#endif\n\n/* Several compilers treat conformant array parameters as VLAs.  We\n * test to make sure we're in C mode (C++ doesn't support CAPs), and\n * that the version of the standard supports CAPs.  We also reject\n * some buggy compilers like MSVC (the logic is in Hedley if you want\n * to take a look), but with certain warnings enabled some compilers\n * still like to emit a diagnostic. */\n#if HEDLEY_HAS_WARNING(\"-Wvla\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma(\"clang diagnostic ignored \\\"-Wvla\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,3,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma(\"GCC diagnostic ignored \\\"-Wvla\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_VLA_\n#endif\n\n/* If you add an unused attribute to a function and don't use it, clang\n * may emit this. */\n#if HEDLEY_HAS_WARNING(\"-Wused-but-marked-unused\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma(\"clang diagnostic ignored \\\"-Wused-but-marked-unused\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wpass-failed\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma(\"clang diagnostic ignored \\\"-Wpass-failed\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wpadded\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma(\"clang diagnostic ignored \\\"-Wpadded\\\"\")\n#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */\n  #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324))\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wzero-as-null-pointer-constant\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma(\"clang diagnostic ignored \\\"-Wzero-as-null-pointer-constant\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wold-style-cast\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma(\"clang diagnostic ignored \\\"-Wold-style-cast\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_\n#endif\n\n#if HEDLEY_HAS_WARNING(\"-Wcast-function-type\") || HEDLEY_GCC_VERSION_CHECK(8,0,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma(\"GCC diagnostic ignored \\\"-Wcast-function-type\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_\n#endif\n\n/* clang will emit this warning when we use C99 extensions when not in\n * C99 mode, even though it does support this.  In such cases we check\n * the compiler and version first, so we know it's not a problem. */\n#if HEDLEY_HAS_WARNING(\"-Wc99-extensions\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma(\"clang diagnostic ignored \\\"-Wc99-extensions\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_\n#endif\n\n/* Similar problm as above; we rely on some basic C99 support, but clang\n * has started warning obut this even in C17 mode with -Weverything. */\n#if HEDLEY_HAS_WARNING(\"-Wdeclaration-after-statement\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma(\"clang diagnostic ignored \\\"-Wdeclaration-after-statement\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_\n#endif\n\n/* https://github.com/simd-everywhere/simde/issues/277 */\n#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus)\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma(\"GCC diagnostic ignored \\\"-Wunused-but-set-variable\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_\n#endif\n\n/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS\n * to silence, but you have to do that before including anything and\n * that would require reordering includes. */\n#if defined(_MSC_VER)\n  #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996))\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_\n#endif\n\n/* Some compilers, such as clang, may use `long long` for 64-bit\n * integers, but `long long` triggers a diagnostic with\n * -Wc++98-compat-pedantic which says 'long long' is incompatible with\n * C++98. */\n#if HEDLEY_HAS_WARNING(\"-Wc++98-compat-pedantic\")\n  #if HEDLEY_HAS_WARNING(\"-Wc++11-long-long\")\n    #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \\\n      _Pragma(\"clang diagnostic ignored \\\"-Wc++98-compat-pedantic\\\"\") \\\n      _Pragma(\"clang diagnostic ignored \\\"-Wc++11-long-long\\\"\")\n  #else\n    #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma(\"clang diagnostic ignored \\\"-Wc++98-compat-pedantic\\\"\")\n  #endif\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_\n#endif\n\n/* Some problem as above */\n#if HEDLEY_HAS_WARNING(\"-Wc++11-long-long\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma(\"clang diagnostic ignored \\\"-Wc++11-long-long\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_\n#endif\n\n/* emscripten emits this whenever stdin/stdout/stderr is used in a\n * macro. */\n#if HEDLEY_HAS_WARNING(\"-Wdisabled-macro-expansion\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma(\"clang diagnostic ignored \\\"-Wdisabled-macro-expansion\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_\n#endif\n\n/* Clang uses C11 generic selections to implement some AltiVec\n * functions, which triggers this diagnostic when not compiling\n * in C11 mode */\n#if HEDLEY_HAS_WARNING(\"-Wc11-extensions\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma(\"clang diagnostic ignored \\\"-Wc11-extensions\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_\n#endif\n\n/* Clang sometimes triggers this warning in macros in the AltiVec and\n * NEON headers, or due to missing functions. */\n#if HEDLEY_HAS_WARNING(\"-Wvector-conversion\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma(\"clang diagnostic ignored \\\"-Wvector-conversion\\\"\")\n  /* For NEON, the situation with -Wvector-conversion in clang < 10 is\n   * bad enough that we just disable the warning altogether.  On x86,\n   * clang has similar issues on several sse4.2+ intrinsics before 3.8. */\n  #if \\\n      (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \\\n      SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0)\n    #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_\n  #endif\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_\n#endif\n#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_)\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_\n#endif\n\n/* Prior to 5.0, clang didn't support disabling diagnostics in\n * statement exprs.  As a result, some macros we use don't\n * properly silence warnings. */\n#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING(\"-Wcast-qual\") && HEDLEY_HAS_WARNING(\"-Wcast-align\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma(\"clang diagnostic ignored \\\"-Wcast-qual\\\"\") _Pragma(\"clang diagnostic ignored \\\"-Wcast-align\\\"\")\n#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING(\"-Wcast-qual\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma(\"clang diagnostic ignored \\\"-Wcast-qual\\\"\")\n#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING(\"-Wcast-align\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma(\"clang diagnostic ignored \\\"-Wcast-align\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_\n#endif\n\n/* SLEEF triggers this a *lot* in their headers */\n#if HEDLEY_HAS_WARNING(\"-Wignored-qualifiers\")\n  #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma(\"clang diagnostic ignored \\\"-Wignored-qualifiers\\\"\")\n#elif HEDLEY_GCC_VERSION_CHECK(4,3,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma(\"GCC diagnostic ignored \\\"-Wignored-qualifiers\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_\n#endif\n\n/* GCC emits this under some circumstances when using __int128 */\n#if HEDLEY_GCC_VERSION_CHECK(4,8,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma(\"GCC diagnostic ignored \\\"-Wpedantic\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_\n#endif\n\n/* MSVC doesn't like (__assume(0), code) and will warn about code being\n * unreachable, but we want it there because not all compilers\n * understand the unreachable macro and will complain if it is missing.\n * I'm planning on adding a new macro to Hedley to handle this a bit\n * more elegantly, but until then... */\n#if defined(HEDLEY_MSVC_VERSION)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702))\n#elif defined(__clang__)\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored \"-Wunreachable-code\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_\n#endif\n\n/* This is a false positive from GCC in a few places. */\n#if HEDLEY_GCC_VERSION_CHECK(4,7,0)\n  #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma(\"GCC diagnostic ignored \\\"-Wmaybe-uninitialized\\\"\")\n#else\n  #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_\n#endif\n\n#if defined(SIMDE_ENABLE_NATIVE_ALIASES)\n  #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \\\n    SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_\n#else\n  #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_\n#endif\n\n/* Some native functions on E2K with instruction set < v6 are declared\n * as deprecated due to inefficiency. Still they are more efficient\n * than SIMDe implementation. So we're using them, and switching off\n * these deprecation warnings. */\n#if defined(HEDLEY_MCST_LCC_VERSION)\n#  define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma(\"diag_suppress 1215,1444\")\n#  define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma(\"diag_default 1215,1444\")\n#else\n#  define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS\n#  define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS\n#endif\n\n#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \\\n  HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \\\n  SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_VLA_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \\\n  SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_\n\n#endif /* !defined(SIMDE_DIAGNOSTIC_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-f16.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2021      Evan Nemerson <evan@nemerson.com>\n *   2023      Ju-Hung Li <jhlee@pllab.cs.nthu.edu.tw> (Copyright owned by NTHU pllab)\n */\n\n#include \"hedley.h\"\n#include \"simde-common.h\"\n#include \"simde-detect-clang.h\"\n\n#if !defined(SIMDE_FLOAT16_H)\n#define SIMDE_FLOAT16_H\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\n/* Portable version which should work on pretty much any compiler.\n * Obviously you can't rely on compiler support for things like\n * conversion to/from 32-bit floats, so make sure you always use the\n * functions and macros in this file!\n *\n * The portable implementations are (heavily) based on CC0 code by\n * Fabian Giesen: <https://gist.github.com/rygorous/2156668> (see also\n * <https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/>).\n * I have basically just modified it to get rid of some UB (lots of\n * aliasing, right shifting a negative value), use fixed-width types,\n * and work in C. */\n#define SIMDE_FLOAT16_API_PORTABLE 1\n/* _Float16, per C standard (TS 18661-3;\n * <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1945.pdf>). */\n#define SIMDE_FLOAT16_API_FLOAT16 2\n/* clang >= 6.0 supports __fp16 as an interchange format on all\n * targets, but only allows you to use them for arguments and return\n * values on targets which have defined an ABI.  We get around the\n * restriction by wrapping the __fp16 in a struct, but we can't do\n * that on Arm since it would break compatibility with the NEON F16\n * functions. */\n#define SIMDE_FLOAT16_API_FP16_NO_ABI 3\n/* This is basically __fp16 as specified by Arm, where arguments and\n * return values are raw __fp16 values not structs. */\n#define SIMDE_FLOAT16_API_FP16 4\n\n/* Choosing an implementation.  This is a bit rough, but I don't have\n * any ideas on how to improve it.  If you do, patches are definitely\n * welcome. */\n#if !defined(SIMDE_FLOAT16_API)\n  #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS))\n    #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16\n  #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \\\n    !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \\\n    !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \\\n    !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \\\n      defined(SIMDE_X86_AVX512FP16_NATIVE) || \\\n      (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \\\n      (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \\\n      ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \\\n      (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) || \\\n      defined(SIMDE_ARCH_RISCV_ZVFH)\n    /* We haven't found a better way to detect this.  It seems like defining\n    * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then\n    * checking for defined(FLT16_MAX) should work, but both gcc and\n    * clang will define the constants even if _Float16 is not\n    * supported.  Ideas welcome. */\n    #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16\n  #elif defined(__FLT16_MIN__) && \\\n      (defined(__clang__) && \\\n      (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \\\n      && !defined(SIMDE_ARCH_RISCV64))\n    #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI\n  #else\n    #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE\n  #endif\n#endif\n\n#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16\n  typedef _Float16 simde_float16;\n  #define SIMDE_FLOAT16_IS_SCALAR 1\n  #if !defined(__cplusplus)\n    #define SIMDE_FLOAT16_C(value) value##f16\n  #else\n    #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value))\n  #endif\n#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI\n  typedef struct { __fp16 value; } simde_float16;\n  #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H)\n    #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP }))\n  #else\n    #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) })\n    #define SIMDE_FLOAT16_IS_SCALAR 1\n  #endif\n#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16\n  typedef __fp16 simde_float16;\n  #define SIMDE_FLOAT16_IS_SCALAR 1\n  #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value))\n#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE\n  typedef struct { uint16_t value; } simde_float16;\n#else\n  #error No 16-bit floating point API.\n#endif\n\n#if \\\n    defined(SIMDE_VECTOR_OPS) && \\\n    (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \\\n    (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI)\n  #define SIMDE_FLOAT16_VECTOR\n#endif\n\n/* Reinterpret -- you *generally* shouldn't need these, they're really\n * intended for internal use.  However, on x86 half-precision floats\n * get stuffed into a __m128i/__m256i, so it may be useful. */\n\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16,      uint16_t, simde_float16)\nSIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16,      uint16_t)\n\n#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE\n  #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number\n  #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00)\n  #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00)\n#else\n  #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI\n    #if SIMDE_MATH_BUILTIN_LIBM(nanf16)\n      #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16(\"\"))\n    #elif defined(SIMDE_MATH_NAN)\n      #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN)\n    #endif\n    #if SIMDE_MATH_BUILTIN_LIBM(inf16)\n      #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16())\n      #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16())\n    #else\n      #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY)\n      #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY)\n    #endif\n  #else\n    #if SIMDE_MATH_BUILTIN_LIBM(nanf16)\n      #define SIMDE_NANHF  __builtin_nanf16(\"\")\n    #elif defined(SIMDE_MATH_NAN)\n      #define SIMDE_NANHF SIMDE_MATH_NAN\n    #endif\n    #if SIMDE_MATH_BUILTIN_LIBM(inf16)\n      #define SIMDE_INFINITYHF __builtin_inf16()\n      #define SIMDE_NINFINITYHF -(__builtin_inf16())\n    #else\n      #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY)\n      #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY)\n    #endif\n  #endif\n#endif\n\n/* Conversion -- convert between single-precision and half-precision\n * floats. */\nstatic HEDLEY_ALWAYS_INLINE HEDLEY_CONST\nsimde_float16\nsimde_float16_from_float32 (simde_float32 value) {\n  simde_float16 res;\n\n  #if \\\n      (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \\\n      (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16)\n    res = HEDLEY_STATIC_CAST(simde_float16, value);\n  #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI)\n    res.value = HEDLEY_STATIC_CAST(__fp16, value);\n  #else\n    /* This code is CC0, based heavily on code by Fabian Giesen. */\n    uint32_t f32u = simde_float32_as_uint32(value);\n    static const uint32_t f32u_infty = UINT32_C(255) << 23;\n    static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23;\n    static const uint32_t denorm_magic =\n      ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23;\n    uint16_t f16u;\n\n    uint32_t sign = f32u & (UINT32_C(1) << 31);\n    f32u ^= sign;\n\n   /* NOTE all the integer compares in this function cast the operands\n    * to signed values to help compilers vectorize to SSE2, which lacks\n    * unsigned comparison instructions.  This is fine since all\n    * operands are below 0x80000000 (we clear the sign bit). */\n\n    if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */\n      f16u = (f32u > f32u_infty) ?  UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */\n    } else { /* (De)normalized number or zero */\n      if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */\n        /* use a magic value to align our 10 mantissa bits at the bottom of\n        * the float. as long as FP addition is round-to-nearest-even this\n        * just works. */\n        f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic));\n\n        /* and one integer subtract of the bias later, we have our final float! */\n        f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic);\n      } else {\n        uint32_t mant_odd = (f32u >> 13) & 1;\n\n        /* update exponent, rounding bias part 1 */\n        f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff);\n        /* rounding bias part 2 */\n        f32u += mant_odd;\n        /* take the bits! */\n        f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13);\n      }\n    }\n\n    f16u |= sign >> 16;\n    res = simde_uint16_as_float16(f16u);\n  #endif\n\n  return res;\n}\n\nstatic HEDLEY_ALWAYS_INLINE HEDLEY_CONST\nsimde_float32\nsimde_float16_to_float32 (simde_float16 value) {\n  simde_float32 res;\n\n  #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16)\n    res = HEDLEY_STATIC_CAST(simde_float32, value);\n  #else\n    /* This code is CC0, based heavily on code by Fabian Giesen. */\n    uint16_t half = simde_float16_as_uint16(value);\n    const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23));\n    const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */\n    uint32_t f32u;\n\n    f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */\n    uint32_t exp = shifted_exp & f32u; /* just the exponent */\n    f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */\n\n    /* handle exponent special cases */\n    if (exp == shifted_exp) /* Inf/NaN? */\n      f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */\n    else if (exp == 0) { /* Zero/Denormal? */\n      f32u += (1) << 23; /* extra exp adjust */\n      f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */\n    }\n\n    f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */\n    res = simde_uint32_as_float32(f32u);\n  #endif\n\n  return res;\n}\n\n#ifdef SIMDE_FLOAT16_C\n  #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value)\n#else\n  #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value))\n#endif\n\n#if !defined(simde_isinfhf) && defined(simde_math_isinff)\n  #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a))\n#endif\n#if !defined(simde_isnanhf) && defined(simde_math_isnanf)\n  #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a))\n#endif\n#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf)\n  #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a))\n#endif\n#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf)\n  #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a))\n#endif\n\n#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a))\n\nstatic HEDLEY_INLINE\nuint8_t\nsimde_fpclasshf(simde_float16 v, const int imm8) {\n  uint16_t bits = simde_float16_as_uint16(v);\n  uint8_t negative = (bits >> 15) & 1;\n  uint16_t const ExpMask = 0x7C00; // [14:10]\n  uint16_t const MantMask = 0x03FF; // [9:0]\n  uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask);\n  uint8_t exponent_all_zeros = ((bits & ExpMask) == 0);\n  uint8_t mantissa_all_zeros = ((bits & MantMask) == 0);\n  uint8_t zero = exponent_all_zeros & mantissa_all_zeros;\n  uint8_t signaling_bit = (bits >> 9) & 1;\n\n  uint8_t result = 0;\n  uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit);\n  uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit;\n  uint8_t positive_zero = (!negative) & zero;\n  uint8_t negative_zero = negative & zero;\n  uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros;\n  uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros;\n  uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros);\n  uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero);\n  result = (((imm8 >> 0) & qnan)              | \\\n            ((imm8 >> 1) & positive_zero)     | \\\n            ((imm8 >> 2) & negative_zero)     | \\\n            ((imm8 >> 3) & positive_infinity) | \\\n            ((imm8 >> 4) & negative_infinity) | \\\n            ((imm8 >> 5) & denormal)          | \\\n            ((imm8 >> 6) & finite_negative)   | \\\n            ((imm8 >> 7) & snan));\n  return result;\n}\n\nSIMDE_END_DECLS_\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_FLOAT16_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-features.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020      Evan Nemerson <evan@nemerson.com>\n *   2023      Ju-Hung Li <jhlee@pllab.cs.nthu.edu.tw> (Copyright owned by NTHU pllab)\n */\n\n/* simde-arch.h is used to determine which features are available according\n   to the compiler.  However, we want to make it possible to forcibly enable\n   or disable APIs */\n\n#if !defined(SIMDE_FEATURES_H)\n#define SIMDE_FEATURES_H\n\n#include \"simde-arch.h\"\n#include \"simde-diagnostic.h\"\n\n#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SVML)\n    #define SIMDE_X86_SVML_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT)\n    #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ)\n    #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512BITALG)\n    #define SIMDE_X86_AVX512BITALG_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512VBMI)\n    #define SIMDE_X86_AVX512VBMI_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512VBMI2)\n    #define SIMDE_X86_AVX512VBMI2_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512VNNI)\n    #define SIMDE_X86_AVX512VNNI_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX5124VNNIW)\n    #define SIMDE_X86_AVX5124VNNIW_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512CD)\n    #define SIMDE_X86_AVX512CD_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512DQ)\n    #define SIMDE_X86_AVX512DQ_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512VL)\n    #define SIMDE_X86_AVX512VL_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512BW)\n    #define SIMDE_X86_AVX512BW_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512FP16)\n    #define SIMDE_X86_AVX512FP16_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512BF16)\n    #define SIMDE_X86_AVX512BF16_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)\n  #define SIMDE_X86_AVX512F_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX512F)\n    #define SIMDE_X86_AVX512F_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE)\n  #define SIMDE_X86_AVX2_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_FMA)\n    #define SIMDE_X86_FMA_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE)\n  #define SIMDE_X86_AVX_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX2)\n    #define SIMDE_X86_AVX2_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE)\n  #define SIMDE_X86_AVX_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AVX)\n    #define SIMDE_X86_AVX_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE)\n  #define SIMDE_X86_SSE4_2_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_XOP)\n    #define SIMDE_X86_XOP_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE)\n  #define SIMDE_X86_SSE4_2_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SSE4_2)\n    #define SIMDE_X86_SSE4_2_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define SIMDE_X86_SSE4_1_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SSE4_1)\n    #define SIMDE_X86_SSE4_1_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE)\n  #define SIMDE_X86_SSSE3_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SSSE3)\n    #define SIMDE_X86_SSSE3_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE)\n  #define SIMDE_X86_SSE3_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SSE3)\n    #define SIMDE_X86_SSE3_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE)\n  #define SIMDE_X86_SSE2_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_AES)\n    #define SIMDE_X86_AES_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE)\n  #define SIMDE_X86_SSE2_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SSE2)\n    #define SIMDE_X86_SSE2_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE)\n  #define SIMDE_X86_SSE_NATIVE\n#endif\n\n#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_SSE)\n    #define SIMDE_X86_SSE_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_MMX)\n    #define SIMDE_X86_MMX_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_GFNI)\n    #define SIMDE_X86_GFNI_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_PCLMUL)\n    #define SIMDE_X86_PCLMUL_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_VPCLMULQDQ)\n    #define SIMDE_X86_VPCLMULQDQ_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86_F16C)\n    #define SIMDE_X86_F16C_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__)))\n    #define SIMDE_X86_SVML_NATIVE\n  #endif\n#endif\n\n#if defined(HEDLEY_MSVC_VERSION)\n  #pragma warning(push)\n  #pragma warning(disable:4799)\n#endif\n\n#if \\\n    defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE)\n  #include <immintrin.h>\n#elif defined(SIMDE_X86_SSE4_2_NATIVE)\n  #include <nmmintrin.h>\n#elif defined(SIMDE_X86_SSE4_1_NATIVE)\n  #include <smmintrin.h>\n#elif defined(SIMDE_X86_SSSE3_NATIVE)\n  #include <tmmintrin.h>\n#elif defined(SIMDE_X86_SSE3_NATIVE)\n  #include <pmmintrin.h>\n#elif defined(SIMDE_X86_SSE2_NATIVE)\n  #include <emmintrin.h>\n#elif defined(SIMDE_X86_SSE_NATIVE)\n  #include <xmmintrin.h>\n#elif defined(SIMDE_X86_MMX_NATIVE)\n  #include <mmintrin.h>\n#endif\n\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #if defined(_MSC_VER)\n    #include <intrin.h>\n  #else\n    #include <x86intrin.h>\n  #endif\n#endif\n\n#if defined(SIMDE_X86_AES_NATIVE)\n  #include <wmmintrin.h>\n#endif\n\n#if defined(HEDLEY_MSVC_VERSION)\n  #pragma warning(pop)\n#endif\n\n#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0)\n    #define SIMDE_ARM_NEON_A64V8_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n  #define SIMDE_ARM_NEON_A32V8_NATIVE\n#endif\n\n#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02)\n    #define SIMDE_ARM_NEON_A32V8_NATIVE\n  #endif\n#endif\n#if defined(__ARM_ACLE)\n  #include <arm_acle.h>\n#endif\n#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define SIMDE_ARM_NEON_A32V7_NATIVE\n#endif\n\n#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0)\n    #define SIMDE_ARM_NEON_A32V7_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #include <arm_neon.h>\n  #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n    #include <arm_fp16.h>\n  #endif\n#endif\n\n#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_ARM_SVE)\n    #define SIMDE_ARM_SVE_NATIVE\n    #include <arm_sve.h>\n  #endif\n#endif\n\n#if !defined(SIMDE_RISCV_V_NATIVE) && !defined(SIMDE_RISCV_V_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_RISCV_V)\n    #define SIMDE_RISCV_V_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_RISCV_V_NATIVE)\n  #include <riscv_vector.h>\n#endif\n\n#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_WASM_SIMD128)\n    #define SIMDE_WASM_SIMD128_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD)\n    #define SIMDE_WASM_RELAXED_SIMD_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE)\n  #include <wasm_simd128.h>\n#endif\n\n#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900)\n    #define SIMDE_POWER_ALTIVEC_P9_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8)\n  #define SIMDE_POWER_ALTIVEC_P8_NATIVE\n#endif\n\n#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800)\n    #define SIMDE_POWER_ALTIVEC_P8_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7)\n  #define SIMDE_POWER_ALTIVEC_P7_NATIVE\n#endif\n\n#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700)\n    #define SIMDE_POWER_ALTIVEC_P7_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6)\n  #define SIMDE_POWER_ALTIVEC_P6_NATIVE\n#endif\n\n#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600)\n    #define SIMDE_POWER_ALTIVEC_P6_NATIVE\n  #endif\n#endif\n#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5)\n  #define SIMDE_POWER_ALTIVEC_P5_NATIVE\n#endif\n\n#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500)\n    #define SIMDE_POWER_ALTIVEC_P5_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR)\n    #define SIMDE_ZARCH_ZVECTOR_15_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR)\n    #define SIMDE_ZARCH_ZVECTOR_14_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR)\n    #define SIMDE_ZARCH_ZVECTOR_13_NATIVE\n  #endif\n#endif\n\n#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  /* AltiVec conflicts with lots of stuff.  The bool keyword conflicts\n   * with the bool keyword in C++ and the bool macro in C99+ (defined\n   * in stdbool.h).  The vector keyword conflicts with std::vector in\n   * C++ if you are `using std;`.\n   *\n   * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel`\n   * instead, but altivec.h will unconditionally define\n   * `vector`/`bool`/`pixel` so we need to work around that.\n   *\n   * Unfortunately this means that if your code uses AltiVec directly\n   * it may break.  If this is the case you'll want to define\n   * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe.  Or, even\n   * better, port your code to use the double-underscore versions. */\n  #if defined(bool)\n    #undef bool\n  #endif\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    #include <altivec.h>\n\n    #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)\n      #if defined(vector)\n        #undef vector\n      #endif\n      #if defined(pixel)\n        #undef pixel\n      #endif\n      #if defined(bool)\n        #undef bool\n      #endif\n    #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */\n  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    #include <vecintrin.h>\n  #endif\n\n  /* Use these intsead of vector/pixel/bool in SIMDe. */\n  #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T\n  #define SIMDE_POWER_ALTIVEC_PIXEL __pixel\n  #define SIMDE_POWER_ALTIVEC_BOOL __bool\n\n  /* Re-define bool if we're using stdbool.h */\n  #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)\n    #define bool _Bool\n  #endif\n#endif\n\n#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI)\n    #define SIMDE_MIPS_LOONGSON_MMI_NATIVE  1\n  #endif\n#endif\n#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n  #include <loongson-mmiintrin.h>\n#endif\n\n#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_MIPS_MSA)\n    #define SIMDE_MIPS_MSA_NATIVE  1\n  #endif\n#endif\n#if defined(SIMDE_MIPS_MSA_NATIVE)\n  #include <msa.h>\n#endif\n\n/* This is used to determine whether or not to fall back on a vector\n * function in an earlier ISA extensions, as well as whether\n * we expected any attempts at vectorization to be fruitful or if we\n * expect to always be running serial code.\n *\n * Note that, for some architectures (okay, *one* architecture) there\n * can be a split where some types are supported for one vector length\n * but others only for a shorter length.  Therefore, it is possible to\n * provide separate values for float/int/double types. */\n\n#if !defined(SIMDE_NATURAL_VECTOR_SIZE)\n  #if defined(SIMDE_X86_AVX512F_NATIVE)\n    #define SIMDE_NATURAL_VECTOR_SIZE (512)\n  #elif defined(SIMDE_X86_AVX2_NATIVE)\n    #define SIMDE_NATURAL_VECTOR_SIZE (256)\n  #elif defined(SIMDE_X86_AVX_NATIVE)\n    #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256)\n    #define SIMDE_NATURAL_INT_VECTOR_SIZE (128)\n    #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128)\n  #elif \\\n      defined(SIMDE_X86_SSE2_NATIVE) || \\\n      defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \\\n      defined(SIMDE_WASM_SIMD128_NATIVE) || \\\n      defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \\\n      defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \\\n      defined(SIMDE_MIPS_MSA_NATIVE)\n    #define SIMDE_NATURAL_VECTOR_SIZE (128)\n  #elif defined(SIMDE_X86_SSE_NATIVE)\n    #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128)\n    #define SIMDE_NATURAL_INT_VECTOR_SIZE (64)\n    #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0)\n  #elif defined(SIMDE_RISCV_V_NATIVE) && defined(__riscv_v_fixed_vlen)\n        //FIXME : SIMDE_NATURAL_VECTOR_SIZE == __riscv_v_fixed_vlen\n        #define SIMDE_NATURAL_VECTOR_SIZE (128)\n  #endif\n\n  #if !defined(SIMDE_NATURAL_VECTOR_SIZE)\n    #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE)\n      #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE\n    #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE)\n      #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE\n    #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE)\n      #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE\n    #else\n      #define SIMDE_NATURAL_VECTOR_SIZE (0)\n    #endif\n  #endif\n\n  #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE)\n    #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE\n  #endif\n  #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE)\n    #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE\n  #endif\n  #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE)\n    #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE\n  #endif\n#endif\n\n#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x)))\n#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x)))\n#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x)))\n#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x)))\n#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x)))\n#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x)))\n#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x)))\n#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x)))\n\n/* Native aliases */\n#if defined(SIMDE_ENABLE_NATIVE_ALIASES)\n  #if !defined(SIMDE_X86_MMX_NATIVE)\n    #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SSE_NATIVE)\n    #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SSE2_NATIVE)\n    #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SSE3_NATIVE)\n    #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SSSE3_NATIVE)\n    #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SSE4_1_NATIVE)\n    #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SSE4_2_NATIVE)\n    #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX_NATIVE)\n    #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX2_NATIVE)\n    #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_FMA_NATIVE)\n    #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512F_NATIVE)\n    #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512VL_NATIVE)\n    #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512VBMI_NATIVE)\n    #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE)\n    #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512BW_NATIVE)\n    #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512VNNI_NATIVE)\n    #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE)\n    #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512BF16_NATIVE)\n    #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512BITALG_NATIVE)\n    #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE)\n    #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE)\n    #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512DQ_NATIVE)\n    #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512CD_NATIVE)\n    #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AVX512FP16_NATIVE)\n    #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_GFNI_NATIVE)\n    #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_PCLMUL_NATIVE)\n    #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE)\n    #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_F16C_NATIVE)\n    #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_AES_NATIVE)\n    #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_X86_SVML_NATIVE)\n    #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES\n  #endif\n\n  #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n    #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES\n  #endif\n  #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES\n  #endif\n\n  #if !defined(SIMDE_ARM_SVE_NATIVE)\n    #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES\n  #endif\n\n  #if !defined(SIMDE_RISCV_V_NATIVE)\n    #define SIMDE_RISCV_V_ENABLE_NATIVE_ALIASES\n  #endif\n\n  #if !defined(SIMDE_MIPS_MSA_NATIVE)\n    #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES\n  #endif\n\n  #if !defined(SIMDE_WASM_SIMD128_NATIVE)\n    #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES\n  #endif\n#endif\n\n/* Are floating point values stored using IEEE 754?  Knowing\n * this at during preprocessing is a bit tricky, mostly because what\n * we're curious about is how values are stored and not whether the\n * implementation is fully conformant in terms of rounding, NaN\n * handling, etc.\n *\n * For example, if you use -ffast-math or -Ofast on\n * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754\n * support is not advertised (by defining __STDC_IEC_559__).\n *\n * However, what we care about is whether it is safe to assume that\n * floating point values are stored in IEEE 754 format, in which case\n * we can provide faster implementations of some functions.\n *\n * Luckily every vaugely modern architecture I'm aware of uses IEEE 754-\n * so we just assume IEEE 754 for now.  There is a test which verifies\n * this, if that test fails sowewhere please let us know and we'll add\n * an exception for that platform.  Meanwhile, you can define\n * SIMDE_NO_IEEE754_STORAGE. */\n#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE)\n  #define SIMDE_IEEE754_STORAGE\n#endif\n\n#if defined(SIMDE_ARCH_ARM_NEON_FP16)\n  #define SIMDE_ARM_NEON_FP16\n#endif\n\n#if defined(SIMDE_ARCH_ARM_NEON_BF16)\n  #define SIMDE_ARM_NEON_BF16\n#endif\n\n#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_LOONGARCH_LASX)\n    #define SIMDE_LOONGARCH_LASX_NATIVE\n  #endif\n#endif\n\n#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)\n  #if defined(SIMDE_ARCH_LOONGARCH_LSX)\n    #define SIMDE_LOONGARCH_LSX_NATIVE\n  #endif\n#endif\n\n#if defined(SIMDE_LOONGARCH_LASX_NATIVE)\n  #include <lasxintrin.h>\n#endif\n#if defined(SIMDE_LOONGARCH_LSX_NATIVE)\n  #include <lsxintrin.h>\n#endif\n\n#endif /* !defined(SIMDE_FEATURES_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/simde-math.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n *   2023      Yi-Yen Chung <eric681@andestech.com> (Copyright owned by Andes Technology)\n */\n\n/* Attempt to find math functions.  Functions may be in <cmath>,\n * <math.h>, compiler built-ins/intrinsics, or platform/architecture\n * specific headers.  In some cases, especially those not built in to\n * libm, we may need to define our own implementations. */\n\n#if !defined(SIMDE_MATH_H)\n#define SIMDE_MATH_H 1\n\n#include \"hedley.h\"\n#include \"simde-features.h\"\n\n#include <stdint.h>\n#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n  #include <arm_neon.h>\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\n\n/* SLEEF support\n * https://sleef.org/\n *\n * If you include <sleef.h> prior to including SIMDe, SIMDe will use\n * SLEEF.  You can also define SIMDE_MATH_SLEEF_ENABLE prior to\n * including SIMDe to force the issue.\n *\n * Note that SLEEF does requires linking to libsleef.\n *\n * By default, SIMDe will use the 1 ULP functions, but if you use\n * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP.  This is\n * only the case for the simde_math_* functions; for code in other\n * SIMDe headers which calls SLEEF directly we may use functions with\n * greater error if the API we're implementing is less precise (for\n * example, SVML guarantees 4 ULP, so we will generally use the 3.5\n * ULP functions from SLEEF). */\n#if !defined(SIMDE_MATH_SLEEF_DISABLE)\n  #if defined(__SLEEF_H__)\n    #define SIMDE_MATH_SLEEF_ENABLE\n  #endif\n#endif\n\n#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__)\n  HEDLEY_DIAGNOSTIC_PUSH\n  SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_\n  #include <sleef.h>\n  HEDLEY_DIAGNOSTIC_POP\n#endif\n\n#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__)\n  #if defined(SLEEF_VERSION_MAJOR)\n    #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n  #else\n    #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch))\n  #endif\n#else\n  #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0)\n#endif\n\n#if defined(__has_builtin)\n  #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func)\n#elif \\\n    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n    HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n    HEDLEY_GCC_VERSION_CHECK(4,4,0)\n  #define SIMDE_MATH_BUILTIN_LIBM(func) (1)\n#else\n  #define SIMDE_MATH_BUILTIN_LIBM(func) (0)\n#endif\n\n#if defined(HUGE_VAL)\n  /* Looks like <math.h> or <cmath> has already been included. */\n\n  /* The math.h from libc++ (yes, the C header from the C++ standard\n   * library) will define an isnan function, but not an isnan macro\n   * like the C standard requires.  So we detect the header guards\n   * macro libc++ uses. */\n  #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH))\n    #define SIMDE_MATH_HAVE_MATH_H\n  #elif defined(__cplusplus)\n    #define SIMDE_MATH_HAVE_CMATH\n  #endif\n#elif defined(__has_include)\n  #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include(<cmath>)\n    #define SIMDE_MATH_HAVE_CMATH\n    #include <cmath>\n  #elif __has_include(<math.h>)\n    #define SIMDE_MATH_HAVE_MATH_H\n    #include <math.h>\n  #elif !defined(SIMDE_MATH_NO_LIBM)\n    #define SIMDE_MATH_NO_LIBM\n  #endif\n#elif !defined(SIMDE_MATH_NO_LIBM)\n  #if defined(__cplusplus) && (__cplusplus >= 201103L)\n    #define SIMDE_MATH_HAVE_CMATH\n    HEDLEY_DIAGNOSTIC_PUSH\n    #if defined(HEDLEY_MSVC_VERSION)\n      /* VS 14 emits this diagnostic about noexcept being used on a\n       * <cmath> function, which we can't do anything about. */\n      #pragma warning(disable:4996)\n    #endif\n    #include <cmath>\n    HEDLEY_DIAGNOSTIC_POP\n  #else\n    #define SIMDE_MATH_HAVE_MATH_H\n    #include <math.h>\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_INFINITY)\n  #if \\\n      HEDLEY_HAS_BUILTIN(__builtin_inf) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n      HEDLEY_CRAY_VERSION_CHECK(8,1,0)\n    #define SIMDE_MATH_INFINITY (__builtin_inf())\n  #elif defined(INFINITY)\n    #define SIMDE_MATH_INFINITY INFINITY\n  #endif\n#endif\n\n#if !defined(SIMDE_INFINITYF)\n  #if \\\n      HEDLEY_HAS_BUILTIN(__builtin_inff) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \\\n      HEDLEY_IBM_VERSION_CHECK(13,1,0)\n    #define SIMDE_MATH_INFINITYF (__builtin_inff())\n  #elif defined(INFINITYF)\n    #define SIMDE_MATH_INFINITYF INFINITYF\n  #elif defined(SIMDE_MATH_INFINITY)\n    #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY)\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_NAN)\n  #if \\\n      HEDLEY_HAS_BUILTIN(__builtin_nan) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n      HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \\\n      HEDLEY_IBM_VERSION_CHECK(13,1,0)\n    #define SIMDE_MATH_NAN (__builtin_nan(\"\"))\n  #elif defined(NAN)\n    #define SIMDE_MATH_NAN NAN\n  #endif\n#endif\n\n#if !defined(SIMDE_NANF)\n  #if \\\n      HEDLEY_HAS_BUILTIN(__builtin_nanf) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,3,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n      HEDLEY_CRAY_VERSION_CHECK(8,1,0)\n    #define SIMDE_MATH_NANF (__builtin_nanf(\"\"))\n  #elif defined(NANF)\n    #define SIMDE_MATH_NANF NANF\n  #elif defined(SIMDE_MATH_NAN)\n    #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN)\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_PI)\n  #if defined(M_PI)\n    #define SIMDE_MATH_PI M_PI\n  #else\n    #define SIMDE_MATH_PI 3.14159265358979323846\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_PIF)\n  #if defined(M_PI)\n    #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI)\n  #else\n    #define SIMDE_MATH_PIF 3.14159265358979323846f\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_PI_OVER_180)\n  #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144\n#endif\n\n#if !defined(SIMDE_MATH_PI_OVER_180F)\n  #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f\n#endif\n\n#if !defined(SIMDE_MATH_180_OVER_PI)\n  #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861\n#endif\n\n#if !defined(SIMDE_MATH_180_OVER_PIF)\n  #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f\n#endif\n\n#if !defined(SIMDE_MATH_FLT_MIN)\n  #if defined(__FLT_MIN__)\n    #define SIMDE_MATH_FLT_MIN __FLT_MIN__\n  #else\n    #if !defined(FLT_MIN)\n      #if defined(__cplusplus)\n        #include <cfloat>\n      #else\n        #include <float.h>\n      #endif\n    #endif\n    #define SIMDE_MATH_FLT_MIN FLT_MIN\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_FLT_MAX)\n  #if defined(__FLT_MAX__)\n    #define SIMDE_MATH_FLT_MAX __FLT_MAX__\n  #else\n    #if !defined(FLT_MAX)\n      #if defined(__cplusplus)\n        #include <cfloat>\n      #else\n        #include <float.h>\n      #endif\n    #endif\n    #define SIMDE_MATH_FLT_MAX FLT_MAX\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_DBL_MIN)\n  #if defined(__DBL_MIN__)\n    #define SIMDE_MATH_DBL_MIN __DBL_MIN__\n  #else\n    #if !defined(DBL_MIN)\n      #if defined(__cplusplus)\n        #include <cfloat>\n      #else\n        #include <float.h>\n      #endif\n    #endif\n    #define SIMDE_MATH_DBL_MIN DBL_MIN\n  #endif\n#endif\n\n#if !defined(SIMDE_MATH_DBL_MAX)\n  #if defined(__DBL_MAX__)\n    #define SIMDE_MATH_DBL_MAX __DBL_MAX__\n  #else\n    #if !defined(DBL_MAX)\n      #if defined(__cplusplus)\n        #include <cfloat>\n      #else\n        #include <float.h>\n      #endif\n    #endif\n    #define SIMDE_MATH_DBL_MAX DBL_MAX\n  #endif\n#endif\n\n/*** Classification macros from C99 ***/\n\n#if !defined(simde_math_isinf)\n  #if SIMDE_MATH_BUILTIN_LIBM(isinf)\n    #define simde_math_isinf(v) __builtin_isinf(v)\n  #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_isinf(v) isinf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_isinf(v) std::isinf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_isinff)\n  #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0)\n    #define simde_math_isinff(v) __builtin_isinff(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_isinff(v) std::isinf(v)\n  #elif defined(simde_math_isinf)\n    #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v))\n  #endif\n#endif\n\n#if !defined(simde_math_isnan)\n  #if SIMDE_MATH_BUILTIN_LIBM(isnan)\n    #define simde_math_isnan(v) __builtin_isnan(v)\n  #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_isnan(v) isnan(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_isnan(v) std::isnan(v)\n  #endif\n#endif\n\n#if !defined(simde_math_isnanf)\n  #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0)\n    /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */\n    #define simde_math_isnanf(v) __builtin_isnanf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_isnanf(v) std::isnan(v)\n  #elif defined(simde_math_isnan)\n    #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v))\n  #endif\n#endif\n\n#if !defined(simde_math_isnormal)\n  #if SIMDE_MATH_BUILTIN_LIBM(isnormal)\n    #define simde_math_isnormal(v) __builtin_isnormal(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_isnormal(v) isnormal(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_isnormal(v) std::isnormal(v)\n  #endif\n#endif\n\n#if !defined(simde_math_isnormalf)\n  #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf)\n    #define simde_math_isnormalf(v) __builtin_isnormalf(v)\n  #elif SIMDE_MATH_BUILTIN_LIBM(isnormal)\n    #define simde_math_isnormalf(v) __builtin_isnormal(v)\n  #elif defined(isnormalf)\n    #define simde_math_isnormalf(v) isnormalf(v)\n  #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_isnormalf(v) isnormal(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_isnormalf(v) std::isnormal(v)\n  #elif defined(simde_math_isnormal)\n    #define simde_math_isnormalf(v) simde_math_isnormal(v)\n  #endif\n#endif\n\n#if !defined(simde_math_issubnormalf)\n  #if SIMDE_MATH_BUILTIN_LIBM(fpclassify)\n    #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v)\n  #elif defined(fpclassify)\n    #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL)\n  #elif defined(SIMDE_IEEE754_STORAGE)\n    #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0)))\n  #endif\n#endif\n\n#if !defined(simde_math_issubnormal)\n  #if SIMDE_MATH_BUILTIN_LIBM(fpclassify)\n    #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v)\n  #elif defined(fpclassify)\n    #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL)\n  #elif defined(SIMDE_IEEE754_STORAGE)\n    #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0)))\n  #endif\n#endif\n\n#if defined(FP_NAN)\n  #define SIMDE_MATH_FP_NAN FP_NAN\n#else\n  #define SIMDE_MATH_FP_NAN 0\n#endif\n#if defined(FP_INFINITE)\n  #define SIMDE_MATH_FP_INFINITE FP_INFINITE\n#else\n  #define SIMDE_MATH_FP_INFINITE 1\n#endif\n#if defined(FP_ZERO)\n  #define SIMDE_MATH_FP_ZERO FP_ZERO\n#else\n  #define SIMDE_MATH_FP_ZERO 2\n#endif\n#if defined(FP_SUBNORMAL)\n  #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL\n#else\n  #define SIMDE_MATH_FP_SUBNORMAL 3\n#endif\n#if defined(FP_NORMAL)\n  #define SIMDE_MATH_FP_NORMAL FP_NORMAL\n#else\n  #define SIMDE_MATH_FP_NORMAL 4\n#endif\n\nstatic HEDLEY_INLINE\nint\nsimde_math_fpclassifyf(float v) {\n  #if SIMDE_MATH_BUILTIN_LIBM(fpclassify)\n    return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v);\n  #elif defined(fpclassify)\n    return fpclassify(v);\n  #else\n    return\n      simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL    :\n      (v == 0.0f)             ? SIMDE_MATH_FP_ZERO      :\n      simde_math_isnanf(v)    ? SIMDE_MATH_FP_NAN       :\n      simde_math_isinff(v)    ? SIMDE_MATH_FP_INFINITE  :\n                                SIMDE_MATH_FP_SUBNORMAL;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint\nsimde_math_fpclassify(double v) {\n  #if SIMDE_MATH_BUILTIN_LIBM(fpclassify)\n    return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v);\n  #elif defined(fpclassify)\n    return fpclassify(v);\n  #else\n    return\n      simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL    :\n      (v == 0.0)             ? SIMDE_MATH_FP_ZERO      :\n      simde_math_isnan(v)    ? SIMDE_MATH_FP_NAN       :\n      simde_math_isinf(v)    ? SIMDE_MATH_FP_INFINITE  :\n                               SIMDE_MATH_FP_SUBNORMAL;\n  #endif\n}\n\n#define SIMDE_MATH_FP_QNAN      0x01\n#define SIMDE_MATH_FP_PZERO     0x02\n#define SIMDE_MATH_FP_NZERO     0x04\n#define SIMDE_MATH_FP_PINF      0x08\n#define SIMDE_MATH_FP_NINF      0x10\n#define SIMDE_MATH_FP_DENORMAL  0x20\n#define SIMDE_MATH_FP_NEGATIVE  0x40\n#define SIMDE_MATH_FP_SNAN      0x80\n\nstatic HEDLEY_INLINE\nuint8_t\nsimde_math_fpclassf(float v, const int imm8) {\n  union {\n    float f;\n    uint32_t u;\n  } fu;\n  fu.f = v;\n  uint32_t bits = fu.u;\n  uint8_t NegNum = (bits >> 31) & 1;\n  uint32_t const ExpMask = 0x3F800000; // [30:23]\n  uint32_t const MantMask = 0x007FFFFF; // [22:0]\n  uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask);\n  uint8_t ExpAllZeros = ((bits & ExpMask) == 0);\n  uint8_t MantAllZeros = ((bits & MantMask) == 0);\n  uint8_t ZeroNumber = ExpAllZeros & MantAllZeros;\n  uint8_t SignalingBit = (bits >> 22) & 1;\n\n  uint8_t result = 0;\n  uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit;\n  uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros;\n  uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros;\n  uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros;\n  uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros;\n  uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros);\n  uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber);\n  uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit);\n  result = (((imm8 >> 0) & qNaN_res)   | \\\n            ((imm8 >> 1) & Pzero_res)  | \\\n            ((imm8 >> 2) & Nzero_res)  | \\\n            ((imm8 >> 3) & Pinf_res)   | \\\n            ((imm8 >> 4) & Ninf_res)   | \\\n            ((imm8 >> 5) & Denorm_res) | \\\n            ((imm8 >> 6) & FinNeg_res) | \\\n            ((imm8 >> 7) & sNaN_res));\n  return result;\n}\n\nstatic HEDLEY_INLINE\nuint8_t\nsimde_math_fpclass(double v, const int imm8) {\n  union {\n    double d;\n    uint64_t u;\n  } du;\n  du.d = v;\n  uint64_t bits = du.u;\n  uint8_t NegNum = (bits >> 63) & 1;\n  uint64_t const ExpMask =  0x3FF0000000000000; // [62:52]\n  uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0]\n  uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask);\n  uint8_t ExpAllZeros = ((bits & ExpMask) == 0);\n  uint8_t MantAllZeros = ((bits & MantMask) == 0);\n  uint8_t ZeroNumber = ExpAllZeros & MantAllZeros;\n  uint8_t SignalingBit = (bits >> 51) & 1;\n\n  uint8_t result = 0;\n  uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit;\n  uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros;\n  uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros;\n  uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros;\n  uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros;\n  uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros);\n  uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber);\n  uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit);\n  result = (((imm8 >> 0) & qNaN_res)   | \\\n            ((imm8 >> 1) & Pzero_res)  | \\\n            ((imm8 >> 2) & Nzero_res)  | \\\n            ((imm8 >> 3) & Pinf_res)   | \\\n            ((imm8 >> 4) & Ninf_res)   | \\\n            ((imm8 >> 5) & Denorm_res) | \\\n            ((imm8 >> 6) & FinNeg_res) | \\\n            ((imm8 >> 7) & sNaN_res));\n  return result;\n}\n\n/*** Manipulation functions ***/\n\n#if !defined(simde_math_nextafter)\n  #if \\\n      (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n    #define simde_math_nextafter(x, y) __builtin_nextafter(x, y)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_nextafter(x, y) std::nextafter(x, y)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_nextafter(x, y) nextafter(x, y)\n  #endif\n#endif\n\n#if !defined(simde_math_nextafterf)\n  #if \\\n      (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \\\n      HEDLEY_ARM_VERSION_CHECK(4,1,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n    #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_nextafterf(x, y) std::nextafter(x, y)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_nextafterf(x, y) nextafterf(x, y)\n  #endif\n#endif\n\n/*** Functions from C99 ***/\n\n#if !defined(simde_math_abs)\n  #if SIMDE_MATH_BUILTIN_LIBM(abs)\n    #define simde_math_abs(v) __builtin_abs(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_abs(v) std::abs(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_abs(v) abs(v)\n  #endif\n#endif\n\n#if !defined(simde_math_labs)\n  #if SIMDE_MATH_BUILTIN_LIBM(labs)\n    #define simde_math_labs(v) __builtin_labs(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_labs(v) std::labs(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_labs(v) labs(v)\n  #endif\n#endif\n\n#if !defined(simde_math_llabs)\n  #if SIMDE_MATH_BUILTIN_LIBM(llabs)\n    #define simde_math_llabs(v) __builtin_llabs(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_llabs(v) std::llabs(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_llabs(v) llabs(v)\n  #endif\n#endif\n\n#if !defined(simde_math_fabsf)\n  #if SIMDE_MATH_BUILTIN_LIBM(fabsf)\n    #define simde_math_fabsf(v) __builtin_fabsf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fabsf(v) std::abs(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fabsf(v) fabsf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_acos)\n  #if SIMDE_MATH_BUILTIN_LIBM(acos)\n    #define simde_math_acos(v) __builtin_acos(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_acos(v) std::acos(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_acos(v) acos(v)\n  #endif\n#endif\n\n#if !defined(simde_math_acosf)\n  #if SIMDE_MATH_BUILTIN_LIBM(acosf)\n    #define simde_math_acosf(v) __builtin_acosf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_acosf(v) std::acos(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_acosf(v) acosf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_acosh)\n  #if SIMDE_MATH_BUILTIN_LIBM(acosh)\n    #define simde_math_acosh(v) __builtin_acosh(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_acosh(v) std::acosh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_acosh(v) acosh(v)\n  #endif\n#endif\n\n#if !defined(simde_math_acoshf)\n  #if SIMDE_MATH_BUILTIN_LIBM(acoshf)\n    #define simde_math_acoshf(v) __builtin_acoshf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_acoshf(v) std::acosh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_acoshf(v) acoshf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_asin)\n  #if SIMDE_MATH_BUILTIN_LIBM(asin)\n    #define simde_math_asin(v) __builtin_asin(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_asin(v) std::asin(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_asin(v) asin(v)\n  #endif\n#endif\n\n#if !defined(simde_math_asinf)\n  #if SIMDE_MATH_BUILTIN_LIBM(asinf)\n    #define simde_math_asinf(v) __builtin_asinf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_asinf(v) std::asin(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_asinf(v) asinf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_asinh)\n  #if SIMDE_MATH_BUILTIN_LIBM(asinh)\n    #define simde_math_asinh(v) __builtin_asinh(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_asinh(v) std::asinh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_asinh(v) asinh(v)\n  #endif\n#endif\n\n#if !defined(simde_math_asinhf)\n  #if SIMDE_MATH_BUILTIN_LIBM(asinhf)\n    #define simde_math_asinhf(v) __builtin_asinhf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_asinhf(v) std::asinh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_asinhf(v) asinhf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_atan)\n  #if SIMDE_MATH_BUILTIN_LIBM(atan)\n    #define simde_math_atan(v) __builtin_atan(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_atan(v) std::atan(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_atan(v) atan(v)\n  #endif\n#endif\n\n#if !defined(simde_math_atan2)\n  #if SIMDE_MATH_BUILTIN_LIBM(atan2)\n    #define simde_math_atan2(y, x) __builtin_atan2(y, x)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_atan2(y, x) std::atan2(y, x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_atan2(y, x) atan2(y, x)\n  #endif\n#endif\n\n#if !defined(simde_math_atan2f)\n  #if SIMDE_MATH_BUILTIN_LIBM(atan2f)\n    #define simde_math_atan2f(y, x) __builtin_atan2f(y, x)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_atan2f(y, x) std::atan2(y, x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_atan2f(y, x) atan2f(y, x)\n  #endif\n#endif\n\n#if !defined(simde_math_atanf)\n  #if SIMDE_MATH_BUILTIN_LIBM(atanf)\n    #define simde_math_atanf(v) __builtin_atanf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_atanf(v) std::atan(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_atanf(v) atanf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_atanh)\n  #if SIMDE_MATH_BUILTIN_LIBM(atanh)\n    #define simde_math_atanh(v) __builtin_atanh(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_atanh(v) std::atanh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_atanh(v) atanh(v)\n  #endif\n#endif\n\n#if !defined(simde_math_atanhf)\n  #if SIMDE_MATH_BUILTIN_LIBM(atanhf)\n    #define simde_math_atanhf(v) __builtin_atanhf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_atanhf(v) std::atanh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_atanhf(v) atanhf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_cbrt)\n  #if SIMDE_MATH_BUILTIN_LIBM(cbrt)\n    #define simde_math_cbrt(v) __builtin_cbrt(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_cbrt(v) std::cbrt(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_cbrt(v) cbrt(v)\n  #endif\n#endif\n\n#if !defined(simde_math_cbrtf)\n  #if SIMDE_MATH_BUILTIN_LIBM(cbrtf)\n    #define simde_math_cbrtf(v) __builtin_cbrtf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_cbrtf(v) std::cbrt(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_cbrtf(v) cbrtf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_ceil)\n  #if SIMDE_MATH_BUILTIN_LIBM(ceil)\n    #define simde_math_ceil(v) __builtin_ceil(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_ceil(v) std::ceil(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_ceil(v) ceil(v)\n  #endif\n#endif\n\n#if !defined(simde_math_ceilf)\n  #if SIMDE_MATH_BUILTIN_LIBM(ceilf)\n    #define simde_math_ceilf(v) __builtin_ceilf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_ceilf(v) std::ceil(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_ceilf(v) ceilf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_copysign)\n  #if SIMDE_MATH_BUILTIN_LIBM(copysign)\n    #define simde_math_copysign(x, y) __builtin_copysign(x, y)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_copysign(x, y) std::copysign(x, y)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_copysign(x, y) copysign(x, y)\n  #endif\n#endif\n\n#if !defined(simde_math_copysignf)\n  #if SIMDE_MATH_BUILTIN_LIBM(copysignf)\n    #define simde_math_copysignf(x, y) __builtin_copysignf(x, y)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_copysignf(x, y) std::copysignf(x, y)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_copysignf(x, y) copysignf(x, y)\n  #endif\n#endif\n\n#if !defined(simde_math_signbit)\n  #if SIMDE_MATH_BUILTIN_LIBM(signbit)\n    #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))\n      #define simde_math_signbit(x) __builtin_signbit(x)\n    #else\n      #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x)))\n    #endif\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_signbit(x) std::signbit(x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_signbit(x) signbit(x)\n  #endif\n#endif\n\n#if !defined(simde_math_cos)\n  #if SIMDE_MATH_BUILTIN_LIBM(cos)\n    #define simde_math_cos(v) __builtin_cos(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_cos(v) std::cos(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_cos(v) cos(v)\n  #endif\n#endif\n\n#if !defined(simde_math_cosf)\n  #if defined(SIMDE_MATH_SLEEF_ENABLE)\n    #if SIMDE_ACCURACY_PREFERENCE < 1\n      #define simde_math_cosf(v) Sleef_cosf_u35(v)\n    #else\n      #define simde_math_cosf(v) Sleef_cosf_u10(v)\n    #endif\n  #elif SIMDE_MATH_BUILTIN_LIBM(cosf)\n    #define simde_math_cosf(v) __builtin_cosf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_cosf(v) std::cos(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_cosf(v) cosf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_cosh)\n  #if SIMDE_MATH_BUILTIN_LIBM(cosh)\n    #define simde_math_cosh(v) __builtin_cosh(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_cosh(v) std::cosh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_cosh(v) cosh(v)\n  #endif\n#endif\n\n#if !defined(simde_math_coshf)\n  #if SIMDE_MATH_BUILTIN_LIBM(coshf)\n    #define simde_math_coshf(v) __builtin_coshf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_coshf(v) std::cosh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_coshf(v) coshf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_erf)\n  #if SIMDE_MATH_BUILTIN_LIBM(erf)\n    #define simde_math_erf(v) __builtin_erf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_erf(v) std::erf(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_erf(v) erf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_erff)\n  #if SIMDE_MATH_BUILTIN_LIBM(erff)\n    #define simde_math_erff(v) __builtin_erff(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_erff(v) std::erf(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_erff(v) erff(v)\n  #endif\n#endif\n\n#if !defined(simde_math_erfc)\n  #if SIMDE_MATH_BUILTIN_LIBM(erfc)\n    #define simde_math_erfc(v) __builtin_erfc(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_erfc(v) std::erfc(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_erfc(v) erfc(v)\n  #endif\n#endif\n\n#if !defined(simde_math_erfcf)\n  #if SIMDE_MATH_BUILTIN_LIBM(erfcf)\n    #define simde_math_erfcf(v) __builtin_erfcf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_erfcf(v) std::erfc(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_erfcf(v) erfcf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_exp)\n  #if SIMDE_MATH_BUILTIN_LIBM(exp)\n    #define simde_math_exp(v) __builtin_exp(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_exp(v) std::exp(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_exp(v) exp(v)\n  #endif\n#endif\n\n#if !defined(simde_math_expf)\n  #if SIMDE_MATH_BUILTIN_LIBM(expf)\n    #define simde_math_expf(v) __builtin_expf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_expf(v) std::exp(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_expf(v) expf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_expm1)\n  #if SIMDE_MATH_BUILTIN_LIBM(expm1)\n    #define simde_math_expm1(v) __builtin_expm1(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_expm1(v) std::expm1(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_expm1(v) expm1(v)\n  #endif\n#endif\n\n#if !defined(simde_math_expm1f)\n  #if SIMDE_MATH_BUILTIN_LIBM(expm1f)\n    #define simde_math_expm1f(v) __builtin_expm1f(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_expm1f(v) std::expm1(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_expm1f(v) expm1f(v)\n  #endif\n#endif\n\n#if !defined(simde_math_exp2)\n  #if SIMDE_MATH_BUILTIN_LIBM(exp2)\n    #define simde_math_exp2(v) __builtin_exp2(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_exp2(v) std::exp2(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_exp2(v) exp2(v)\n  #endif\n#endif\n\n#if !defined(simde_math_exp2f)\n  #if SIMDE_MATH_BUILTIN_LIBM(exp2f)\n    #define simde_math_exp2f(v) __builtin_exp2f(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_exp2f(v) std::exp2(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_exp2f(v) exp2f(v)\n  #endif\n#endif\n\n#if HEDLEY_HAS_BUILTIN(__builtin_exp10) ||  HEDLEY_GCC_VERSION_CHECK(3,4,0)\n  #  define simde_math_exp10(v) __builtin_exp10(v)\n#else\n#  define simde_math_exp10(v) pow(10.0, (v))\n#endif\n\n#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) ||  HEDLEY_GCC_VERSION_CHECK(3,4,0)\n  #  define simde_math_exp10f(v) __builtin_exp10f(v)\n#else\n#  define simde_math_exp10f(v) powf(10.0f, (v))\n#endif\n\n#if !defined(simde_math_fabs)\n  #if SIMDE_MATH_BUILTIN_LIBM(fabs)\n    #define simde_math_fabs(v) __builtin_fabs(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fabs(v) std::fabs(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fabs(v) fabs(v)\n  #endif\n#endif\n\n#if !defined(simde_math_fabsf)\n  #if SIMDE_MATH_BUILTIN_LIBM(fabsf)\n    #define simde_math_fabsf(v) __builtin_fabsf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fabsf(v) std::fabs(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fabsf(v) fabsf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_floor)\n  #if SIMDE_MATH_BUILTIN_LIBM(floor)\n    #define simde_math_floor(v) __builtin_floor(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_floor(v) std::floor(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_floor(v) floor(v)\n  #endif\n#endif\n\n#if !defined(simde_math_floorf)\n  #if SIMDE_MATH_BUILTIN_LIBM(floorf)\n    #define simde_math_floorf(v) __builtin_floorf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_floorf(v) std::floor(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_floorf(v) floorf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_fma)\n  #if SIMDE_MATH_BUILTIN_LIBM(fma)\n    #define simde_math_fma(x, y, z) __builtin_fma(x, y, z)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fma(x, y, z) std::fma(x, y, z)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fma(x, y, z) fma(x, y, z)\n  #endif\n#endif\n\n#if !defined(simde_math_fmaf)\n  #if SIMDE_MATH_BUILTIN_LIBM(fmaf)\n    #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fmaf(x, y, z) std::fma(x, y, z)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fmaf(x, y, z) fmaf(x, y, z)\n  #endif\n#endif\n\n#if !defined(simde_math_fmax)\n  #if SIMDE_MATH_BUILTIN_LIBM(fmax)\n    #define simde_math_fmax(x, y) __builtin_fmax(x, y)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fmax(x, y) std::fmax(x, y)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fmax(x, y) fmax(x, y)\n  #endif\n#endif\n\n#if !defined(simde_math_fmaxf)\n  #if SIMDE_MATH_BUILTIN_LIBM(fmaxf)\n    #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_fmaxf(x, y) std::fmax(x, y)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_fmaxf(x, y) fmaxf(x, y)\n  #endif\n#endif\n\n#if !defined(simde_math_hypot)\n  #if SIMDE_MATH_BUILTIN_LIBM(hypot)\n    #define simde_math_hypot(y, x) __builtin_hypot(y, x)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_hypot(y, x) std::hypot(y, x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_hypot(y, x) hypot(y, x)\n  #endif\n#endif\n\n#if !defined(simde_math_hypotf)\n  #if SIMDE_MATH_BUILTIN_LIBM(hypotf)\n    #define simde_math_hypotf(y, x) __builtin_hypotf(y, x)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_hypotf(y, x) std::hypot(y, x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_hypotf(y, x) hypotf(y, x)\n  #endif\n#endif\n\n#if !defined(simde_math_log)\n  #if SIMDE_MATH_BUILTIN_LIBM(log)\n    #define simde_math_log(v) __builtin_log(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log(v) std::log(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log(v) log(v)\n  #endif\n#endif\n\n#if !defined(simde_math_logf)\n  #if SIMDE_MATH_BUILTIN_LIBM(logf)\n    #define simde_math_logf(v) __builtin_logf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_logf(v) std::log(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_logf(v) logf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_logb)\n  #if SIMDE_MATH_BUILTIN_LIBM(logb)\n    #define simde_math_logb(v) __builtin_logb(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_logb(v) std::logb(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_logb(v) logb(v)\n  #endif\n#endif\n\n#if !defined(simde_math_logbf)\n  #if SIMDE_MATH_BUILTIN_LIBM(logbf)\n    #define simde_math_logbf(v) __builtin_logbf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_logbf(v) std::logb(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_logbf(v) logbf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_log1p)\n  #if SIMDE_MATH_BUILTIN_LIBM(log1p)\n    #define simde_math_log1p(v) __builtin_log1p(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log1p(v) std::log1p(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log1p(v) log1p(v)\n  #endif\n#endif\n\n#if !defined(simde_math_log1pf)\n  #if SIMDE_MATH_BUILTIN_LIBM(log1pf)\n    #define simde_math_log1pf(v) __builtin_log1pf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log1pf(v) std::log1p(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log1pf(v) log1pf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_log2)\n  #if SIMDE_MATH_BUILTIN_LIBM(log2)\n    #define simde_math_log2(v) __builtin_log2(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log2(v) std::log2(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log2(v) log2(v)\n  #endif\n#endif\n\n#if !defined(simde_math_log2f)\n  #if SIMDE_MATH_BUILTIN_LIBM(log2f)\n    #define simde_math_log2f(v) __builtin_log2f(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log2f(v) std::log2(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log2f(v) log2f(v)\n  #endif\n#endif\n\n#if !defined(simde_math_log10)\n  #if SIMDE_MATH_BUILTIN_LIBM(log10)\n    #define simde_math_log10(v) __builtin_log10(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log10(v) std::log10(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log10(v) log10(v)\n  #endif\n#endif\n\n#if !defined(simde_math_log10f)\n  #if SIMDE_MATH_BUILTIN_LIBM(log10f)\n    #define simde_math_log10f(v) __builtin_log10f(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_log10f(v) std::log10(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_log10f(v) log10f(v)\n  #endif\n#endif\n\n#if !defined(simde_math_modf)\n  #if SIMDE_MATH_BUILTIN_LIBM(modf)\n    #define simde_math_modf(x, iptr) __builtin_modf(x, iptr)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_modf(x, iptr) std::modf(x, iptr)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_modf(x, iptr) modf(x, iptr)\n  #endif\n#endif\n\n#if !defined(simde_math_modff)\n  #if SIMDE_MATH_BUILTIN_LIBM(modff)\n    #define simde_math_modff(x, iptr) __builtin_modff(x, iptr)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_modff(x, iptr) std::modf(x, iptr)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_modff(x, iptr) modff(x, iptr)\n  #endif\n#endif\n\n#if !defined(simde_math_nearbyint)\n  #if SIMDE_MATH_BUILTIN_LIBM(nearbyint)\n    #define simde_math_nearbyint(v) __builtin_nearbyint(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_nearbyint(v) std::nearbyint(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_nearbyint(v) nearbyint(v)\n  #endif\n#endif\n\n#if !defined(simde_math_nearbyintf)\n  #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf)\n    #define simde_math_nearbyintf(v) __builtin_nearbyintf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_nearbyintf(v) std::nearbyint(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_nearbyintf(v) nearbyintf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_pow)\n  #if SIMDE_MATH_BUILTIN_LIBM(pow)\n    #define simde_math_pow(y, x) __builtin_pow(y, x)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_pow(y, x) std::pow(y, x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_pow(y, x) pow(y, x)\n  #endif\n#endif\n\n#if !defined(simde_math_powf)\n  #if SIMDE_MATH_BUILTIN_LIBM(powf)\n    #define simde_math_powf(y, x) __builtin_powf(y, x)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_powf(y, x) std::pow(y, x)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_powf(y, x) powf(y, x)\n  #endif\n#endif\n\n#if !defined(simde_math_rint)\n  #if SIMDE_MATH_BUILTIN_LIBM(rint)\n    #define simde_math_rint(v) __builtin_rint(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_rint(v) std::rint(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_rint(v) rint(v)\n  #endif\n#endif\n\n#if !defined(simde_math_rintf)\n  #if SIMDE_MATH_BUILTIN_LIBM(rintf)\n    #define simde_math_rintf(v) __builtin_rintf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_rintf(v) std::rint(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_rintf(v) rintf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_round)\n  #if SIMDE_MATH_BUILTIN_LIBM(round)\n    #define simde_math_round(v) __builtin_round(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_round(v) std::round(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_round(v) round(v)\n  #endif\n#endif\n\n#if !defined(simde_math_roundf)\n  #if SIMDE_MATH_BUILTIN_LIBM(roundf)\n    #define simde_math_roundf(v) __builtin_roundf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_roundf(v) std::round(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_roundf(v) roundf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_roundeven)\n  #if \\\n     ((!defined(HEDLEY_EMSCRIPTEN_VERSION) || HEDLEY_EMSCRIPTEN_VERSION_CHECK(3, 1, 43)) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \\\n      HEDLEY_GCC_VERSION_CHECK(10,0,0)\n    #define simde_math_roundeven(v) __builtin_roundeven(v)\n  #elif defined(simde_math_round) && defined(simde_math_fabs)\n    static HEDLEY_INLINE\n    double\n    simde_math_roundeven(double v) {\n      double rounded = simde_math_round(v);\n      double diff = rounded - v;\n      if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) {\n        rounded = v - diff;\n      }\n      return rounded;\n    }\n    #define simde_math_roundeven simde_math_roundeven\n  #endif\n#endif\n\n#if !defined(simde_math_roundevenf)\n  #if \\\n     ((!defined(HEDLEY_EMSCRIPTEN_VERSION) || HEDLEY_EMSCRIPTEN_VERSION_CHECK(3, 1, 43)) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \\\n      HEDLEY_GCC_VERSION_CHECK(10,0,0)\n    #define simde_math_roundevenf(v) __builtin_roundevenf(v)\n  #elif defined(simde_math_roundf) && defined(simde_math_fabsf)\n    static HEDLEY_INLINE\n    float\n    simde_math_roundevenf(float v) {\n      float rounded = simde_math_roundf(v);\n      float diff = rounded - v;\n      if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) {\n        rounded = v - diff;\n      }\n      return rounded;\n    }\n    #define simde_math_roundevenf simde_math_roundevenf\n  #endif\n#endif\n\n#if !defined(simde_math_sin)\n  #if SIMDE_MATH_BUILTIN_LIBM(sin)\n    #define simde_math_sin(v) __builtin_sin(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sin(v) std::sin(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sin(v) sin(v)\n  #endif\n#endif\n\n#if !defined(simde_math_sinf)\n  #if SIMDE_MATH_BUILTIN_LIBM(sinf)\n    #define simde_math_sinf(v) __builtin_sinf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sinf(v) std::sin(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sinf(v) sinf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_sinh)\n  #if SIMDE_MATH_BUILTIN_LIBM(sinh)\n    #define simde_math_sinh(v) __builtin_sinh(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sinh(v) std::sinh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sinh(v) sinh(v)\n  #endif\n#endif\n\n#if !defined(simde_math_sinhf)\n  #if SIMDE_MATH_BUILTIN_LIBM(sinhf)\n    #define simde_math_sinhf(v) __builtin_sinhf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sinhf(v) std::sinh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sinhf(v) sinhf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_sqrt)\n  #if SIMDE_MATH_BUILTIN_LIBM(sqrt)\n    #define simde_math_sqrt(v) __builtin_sqrt(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sqrt(v) std::sqrt(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sqrt(v) sqrt(v)\n  #endif\n#endif\n\n#if !defined(simde_math_sqrtf)\n  #if SIMDE_MATH_BUILTIN_LIBM(sqrtf)\n    #define simde_math_sqrtf(v) __builtin_sqrtf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sqrtf(v) std::sqrt(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sqrtf(v) sqrtf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_sqrtl)\n  #if SIMDE_MATH_BUILTIN_LIBM(sqrtl)\n    #define simde_math_sqrtl(v) __builtin_sqrtl(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_sqrtl(v) std::sqrt(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_sqrtl(v) sqrtl(v)\n  #endif\n#endif\n\n#if !defined(simde_math_tan)\n  #if SIMDE_MATH_BUILTIN_LIBM(tan)\n    #define simde_math_tan(v) __builtin_tan(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_tan(v) std::tan(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_tan(v) tan(v)\n  #endif\n#endif\n\n#if !defined(simde_math_tanf)\n  #if SIMDE_MATH_BUILTIN_LIBM(tanf)\n    #define simde_math_tanf(v) __builtin_tanf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_tanf(v) std::tan(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_tanf(v) tanf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_tanh)\n  #if SIMDE_MATH_BUILTIN_LIBM(tanh)\n    #define simde_math_tanh(v) __builtin_tanh(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_tanh(v) std::tanh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_tanh(v) tanh(v)\n  #endif\n#endif\n\n#if !defined(simde_math_tanhf)\n  #if SIMDE_MATH_BUILTIN_LIBM(tanhf)\n    #define simde_math_tanhf(v) __builtin_tanhf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_tanhf(v) std::tanh(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_tanhf(v) tanhf(v)\n  #endif\n#endif\n\n#if !defined(simde_math_trunc)\n  #if SIMDE_MATH_BUILTIN_LIBM(trunc)\n    #define simde_math_trunc(v) __builtin_trunc(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_trunc(v) std::trunc(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_trunc(v) trunc(v)\n  #endif\n#endif\n\n#if !defined(simde_math_truncf)\n  #if SIMDE_MATH_BUILTIN_LIBM(truncf)\n    #define simde_math_truncf(v) __builtin_truncf(v)\n  #elif defined(SIMDE_MATH_HAVE_CMATH)\n    #define simde_math_truncf(v) std::trunc(v)\n  #elif defined(SIMDE_MATH_HAVE_MATH_H)\n    #define simde_math_truncf(v) truncf(v)\n  #endif\n#endif\n\n/*** Comparison macros (which don't raise invalid errors) ***/\n\n#if defined(isunordered)\n  #define simde_math_isunordered(x, y) isunordered(x, y)\n#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered)\n  #define simde_math_isunordered(x, y) __builtin_isunordered(x, y)\n#else\n  static HEDLEY_INLINE\n  int simde_math_isunordered(double x, double y) {\n    return (x != y) && (x != x || y != y);\n  }\n  #define simde_math_isunordered simde_math_isunordered\n\n  static HEDLEY_INLINE\n  int simde_math_isunorderedf(float x, float y) {\n    return (x != y) && (x != x || y != y);\n  }\n  #define simde_math_isunorderedf simde_math_isunorderedf\n#endif\n#if !defined(simde_math_isunorderedf)\n  #define simde_math_isunorderedf simde_math_isunordered\n#endif\n\n/*** Additional functions not in libm ***/\n\n#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp)\n  static HEDLEY_INLINE\n  double\n  simde_math_cdfnorm(double x) {\n    /* https://www.johndcook.com/blog/cpp_phi/\n    * Public Domain */\n    static const double a1 =  0.254829592;\n    static const double a2 = -0.284496736;\n    static const double a3 =  1.421413741;\n    static const double a4 = -1.453152027;\n    static const double a5 =  1.061405429;\n    static const double p  =  0.3275911;\n\n    const int sign = x < 0;\n    x = simde_math_fabs(x) / simde_math_sqrt(2.0);\n\n    /* A&S formula 7.1.26 */\n    double t = 1.0 / (1.0 + p * x);\n    double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x);\n\n    return 0.5 * (1.0 + (sign ? -y : y));\n  }\n  #define simde_math_cdfnorm simde_math_cdfnorm\n#endif\n\n#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf)\n  static HEDLEY_INLINE\n  float\n  simde_math_cdfnormf(float x) {\n    /* https://www.johndcook.com/blog/cpp_phi/\n    * Public Domain */\n    static const float a1 =  0.254829592f;\n    static const float a2 = -0.284496736f;\n    static const float a3 =  1.421413741f;\n    static const float a4 = -1.453152027f;\n    static const float a5 =  1.061405429f;\n    static const float p  =  0.3275911f;\n\n    const int sign = x < 0;\n    x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f);\n\n    /* A&S formula 7.1.26 */\n    float t = 1.0f / (1.0f + p * x);\n    float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x);\n\n    return 0.5f * (1.0f + (sign ? -y : y));\n  }\n  #define simde_math_cdfnormf simde_math_cdfnormf\n#endif\n\n#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt)\n  /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/\n  static HEDLEY_INLINE\n  double\n  simde_math_cdfnorminv(double p) {\n    static const double a[6] = {\n      -3.969683028665376e+01,\n       2.209460984245205e+02,\n      -2.759285104469687e+02,\n       1.383577518672690e+02,\n      -3.066479806614716e+01,\n       2.506628277459239e+00\n    };\n\n    static const double b[5] = {\n      -5.447609879822406e+01,\n       1.615858368580409e+02,\n      -1.556989798598866e+02,\n       6.680131188771972e+01,\n      -1.328068155288572e+01\n    };\n\n    static const double c[6] = {\n      -7.784894002430293e-03,\n      -3.223964580411365e-01,\n      -2.400758277161838e+00,\n      -2.549732539343734e+00,\n       4.374664141464968e+00,\n       2.938163982698783e+00\n    };\n\n    static const double d[4] = {\n      7.784695709041462e-03,\n      3.224671290700398e-01,\n      2.445134137142996e+00,\n      3.754408661907416e+00\n    };\n\n    static const double low  = 0.02425;\n    static const double high = 0.97575;\n    double q, r;\n\n    if (p < 0 || p > 1) {\n      return 0.0;\n    } else if (p == 0) {\n      return -SIMDE_MATH_INFINITY;\n    } else if (p == 1) {\n      return SIMDE_MATH_INFINITY;\n    } else if (p < low) {\n      q = simde_math_sqrt(-2.0 * simde_math_log(p));\n      return\n        (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /\n        (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));\n    } else if (p > high) {\n      q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p));\n      return\n        -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /\n         (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));\n    } else {\n      q = p - 0.5;\n      r = q * q;\n      return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) *\n        q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1);\n    }\n}\n#define simde_math_cdfnorminv simde_math_cdfnorminv\n#endif\n\n#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf)\n  static HEDLEY_INLINE\n  float\n  simde_math_cdfnorminvf(float p) {\n    static const float a[6] = {\n      -3.969683028665376e+01f,\n       2.209460984245205e+02f,\n      -2.759285104469687e+02f,\n       1.383577518672690e+02f,\n      -3.066479806614716e+01f,\n       2.506628277459239e+00f\n    };\n    static const float b[5] = {\n      -5.447609879822406e+01f,\n       1.615858368580409e+02f,\n      -1.556989798598866e+02f,\n       6.680131188771972e+01f,\n      -1.328068155288572e+01f\n    };\n    static const float c[6] = {\n      -7.784894002430293e-03f,\n      -3.223964580411365e-01f,\n      -2.400758277161838e+00f,\n      -2.549732539343734e+00f,\n       4.374664141464968e+00f,\n       2.938163982698783e+00f\n    };\n    static const float d[4] = {\n      7.784695709041462e-03f,\n      3.224671290700398e-01f,\n      2.445134137142996e+00f,\n      3.754408661907416e+00f\n    };\n    static const float low  = 0.02425f;\n    static const float high = 0.97575f;\n    float q, r;\n\n    if (p < 0 || p > 1) {\n      return 0.0f;\n    } else if (p == 0) {\n      return -SIMDE_MATH_INFINITYF;\n    } else if (p == 1) {\n      return SIMDE_MATH_INFINITYF;\n    } else if (p < low) {\n      q = simde_math_sqrtf(-2.0f * simde_math_logf(p));\n      return\n        (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /\n        (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));\n    } else if (p > high) {\n      q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p));\n      return\n        -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /\n         (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1));\n    } else {\n      q = p - 0.5f;\n      r = q * q;\n      return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) *\n         q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1);\n    }\n  }\n  #define simde_math_cdfnorminvf simde_math_cdfnorminvf\n#endif\n\n#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt)\n  static HEDLEY_INLINE\n  double\n  simde_math_erfinv(double x) {\n    /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c\n     *\n     * The original answer on SO uses a constant of 0.147, but in my\n     * testing 0.14829094707965850830078125 gives a lower average absolute error\n     * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803).\n     * That said, if your goal is to minimize the *maximum* absolute\n     * error, 0.15449436008930206298828125 provides significantly better\n     * results; 0.0009250640869140625000000000 vs ~ 0.005. */\n    double tt1, tt2, lnx;\n    double sgn = simde_math_copysign(1.0, x);\n\n    x = (1.0 - x) * (1.0 + x);\n    lnx = simde_math_log(x);\n\n    tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx;\n    tt2 = (1.0 / 0.14829094707965850830078125) * lnx;\n\n    return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2));\n  }\n  #define simde_math_erfinv simde_math_erfinv\n#endif\n\n#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf)\n  static HEDLEY_INLINE\n  float\n  simde_math_erfinvf(float x) {\n    float tt1, tt2, lnx;\n    float sgn = simde_math_copysignf(1.0f, x);\n\n    x = (1.0f - x) * (1.0f + x);\n    lnx = simde_math_logf(x);\n\n    tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx;\n    tt2 = (1.0f / 0.14829094707965850830078125f) * lnx;\n\n    return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2));\n  }\n  #define simde_math_erfinvf simde_math_erfinvf\n#endif\n\n#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt)\n  static HEDLEY_INLINE\n  double\n  simde_math_erfcinv(double x) {\n    if(x >= 0.0625 && x < 2.0) {\n      return simde_math_erfinv(1.0 - x);\n    } else if (x < 0.0625 && x >= 1.0e-100) {\n      static const double p[6] = {\n        0.1550470003116,\n        1.382719649631,\n        0.690969348887,\n        -1.128081391617,\n        0.680544246825,\n        -0.16444156791\n      };\n      static const double q[3] = {\n        0.155024849822,\n        1.385228141995,\n        1.000000000000\n      };\n\n      const double t = 1.0 / simde_math_sqrt(-simde_math_log(x));\n      return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) /\n            (q[0] + t * (q[1] + t * (q[2])));\n    } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) {\n      static const double p[4] = {\n        0.00980456202915,\n        0.363667889171,\n        0.97302949837,\n        -0.5374947401\n      };\n      static const double q[3] = {\n        0.00980451277802,\n        0.363699971544,\n        1.000000000000\n      };\n\n      const double t = 1.0 / simde_math_sqrt(-simde_math_log(x));\n      return (p[0] / t + p[1] + t * (p[2] + t * p[3])) /\n             (q[0] + t * (q[1] + t * (q[2])));\n    } else if (!simde_math_isnormal(x)) {\n      return SIMDE_MATH_INFINITY;\n    } else {\n      return -SIMDE_MATH_INFINITY;\n    }\n  }\n\n  #define simde_math_erfcinv simde_math_erfcinv\n#endif\n\n#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf)\n  static HEDLEY_INLINE\n  float\n  simde_math_erfcinvf(float x) {\n    if(x >= 0.0625f && x < 2.0f) {\n      return simde_math_erfinvf(1.0f - x);\n    } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) {\n      static const float p[6] = {\n         0.1550470003116f,\n         1.382719649631f,\n         0.690969348887f,\n        -1.128081391617f,\n         0.680544246825f\n        -0.164441567910f\n      };\n      static const float q[3] = {\n        0.155024849822f,\n        1.385228141995f,\n        1.000000000000f\n      };\n\n      const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x));\n      return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) /\n             (q[0] + t * (q[1] + t * (q[2])));\n    } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) {\n      static const float p[4] = {\n        0.00980456202915f,\n        0.36366788917100f,\n        0.97302949837000f,\n        -0.5374947401000f\n      };\n      static const float q[3] = {\n        0.00980451277802f,\n        0.36369997154400f,\n        1.00000000000000f\n      };\n\n      const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x));\n      return (p[0] / t + p[1] + t * (p[2] + t * p[3])) /\n             (q[0] + t * (q[1] + t * (q[2])));\n    } else {\n      return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF;\n    }\n  }\n\n  #define simde_math_erfcinvf simde_math_erfcinvf\n#endif\n\nstatic HEDLEY_INLINE\ndouble\nsimde_math_rad2deg(double radians) {\n return radians * SIMDE_MATH_180_OVER_PI;\n}\n\nstatic HEDLEY_INLINE\nfloat\nsimde_math_rad2degf(float radians) {\n    return radians * SIMDE_MATH_180_OVER_PIF;\n}\n\nstatic HEDLEY_INLINE\ndouble\nsimde_math_deg2rad(double degrees) {\n  return degrees * SIMDE_MATH_PI_OVER_180;\n}\n\nstatic HEDLEY_INLINE\nfloat\nsimde_math_deg2radf(float degrees) {\n    return degrees * (SIMDE_MATH_PI_OVER_180F);\n}\n\n/***  Saturated arithmetic ***/\n\nstatic HEDLEY_INLINE\nint8_t\nsimde_math_adds_i8(int8_t a, int8_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqaddb_s8(a, b);\n  #else\n    uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);\n    uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);\n    uint8_t r_ = a_ + b_;\n\n    a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX;\n    if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int8_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint16_t\nsimde_math_adds_i16(int16_t a, int16_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqaddh_s16(a, b);\n  #else\n    uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);\n    uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);\n    uint16_t r_ = a_ + b_;\n\n    a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX;\n    if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int16_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint32_t\nsimde_math_adds_i32(int32_t a, int32_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqadds_s32(a, b);\n  #else\n    uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);\n    uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);\n    uint32_t r_ = a_ + b_;\n\n    a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX;\n    if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int32_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint64_t\nsimde_math_adds_i64(int64_t a, int64_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqaddd_s64(a, b);\n  #else\n    uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);\n    uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);\n    uint64_t r_ = a_ + b_;\n\n    a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX;\n    if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int64_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint8_t\nsimde_math_adds_u8(uint8_t a, uint8_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqaddb_u8(a, b);\n  #else\n    uint8_t r = a + b;\n    r |= -(r < a);\n    return r;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint16_t\nsimde_math_adds_u16(uint16_t a, uint16_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqaddh_u16(a, b);\n  #else\n    uint16_t r = a + b;\n    r |= -(r < a);\n    return r;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint32_t\nsimde_math_adds_u32(uint32_t a, uint32_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqadds_u32(a, b);\n  #else\n    uint32_t r = a + b;\n    r |= -(r < a);\n    return r;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint64_t\nsimde_math_adds_u64(uint64_t a, uint64_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqaddd_u64(a, b);\n  #else\n    uint64_t r = a + b;\n    r |= -(r < a);\n    return r;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint8_t\nsimde_math_subs_i8(int8_t a, int8_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubb_s8(a, b);\n  #else\n    uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);\n    uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);\n    uint8_t r_ = a_ - b_;\n\n    a_ = (a_ >> 7) + INT8_MAX;\n\n    if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int8_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint16_t\nsimde_math_subs_i16(int16_t a, int16_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubh_s16(a, b);\n  #else\n    uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);\n    uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);\n    uint16_t r_ = a_ - b_;\n\n    a_ = (a_ >> 15) + INT16_MAX;\n\n    if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int16_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint32_t\nsimde_math_subs_i32(int32_t a, int32_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubs_s32(a, b);\n  #else\n    uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);\n    uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);\n    uint32_t r_ = a_ - b_;\n\n    a_ = (a_ >> 31) + INT32_MAX;\n\n    if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int32_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nint64_t\nsimde_math_subs_i64(int64_t a, int64_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubd_s64(a, b);\n  #else\n    uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);\n    uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);\n    uint64_t r_ = a_ - b_;\n\n    a_ = (a_ >> 63) + INT64_MAX;\n\n    if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {\n      r_ = a_;\n    }\n\n    return HEDLEY_STATIC_CAST(int64_t, r_);\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint8_t\nsimde_math_subs_u8(uint8_t a, uint8_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubb_u8(a, b);\n  #else\n    uint8_t res = a - b;\n    res &= -(res <= a);\n    return res;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint16_t\nsimde_math_subs_u16(uint16_t a, uint16_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubh_u16(a, b);\n  #else\n    uint16_t res = a - b;\n    res &= -(res <= a);\n    return res;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint32_t\nsimde_math_subs_u32(uint32_t a, uint32_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubs_u32(a, b);\n  #else\n    uint32_t res = a - b;\n    res &= -(res <= a);\n    return res;\n  #endif\n}\n\nstatic HEDLEY_INLINE\nuint64_t\nsimde_math_subs_u64(uint64_t a, uint64_t b) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vqsubd_u64(a, b);\n  #else\n    uint64_t res = a - b;\n    res &= -(res <= a);\n    return res;\n  #endif\n}\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_MATH_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/aes.h",
    "content": "/* MIT License\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n */\n\n#if !defined(SIMDE_X86_AES_H)\n#define SIMDE_X86_AES_H\n\n/*\n * Advanced Encryption Standard\n * @author Dani Huertas\n * @email huertas.dani@gmail.com\n *\n * Based on the document FIPS PUB 197\n */\n\n#include \"sse2.h\"\n\n/*\n * Multiplication in GF(2^8)\n * http://en.wikipedia.org/wiki/Finite_field_arithmetic\n * Irreducible polynomial m(x) = x8 + x4 + x3 + x + 1\n *\n * NOTE: This function can be easily replaced with a look up table for a speed\n *       boost, at the expense of an increase in memory size.\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint8_t gmult(uint8_t a, uint8_t b) {\n  uint8_t p = 0, i = 0, hbs = 0;\n\n  for (i = 0; i < 8; i++) {\n    if (b & 1) {\n      p ^= a;\n    }\n\n    hbs = a & 0x80;\n    a <<= 1;\n    if (hbs) a ^= 0x1b; // 0000 0001 0001 1011\n    b >>= 1;\n  }\n\n  return (uint8_t)p;\n}\n */\n\n#if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))\n\n#include \"../simde-aes.h\"\n\n/*\n * Transformation in the Cipher and Inverse Cipher in which a Round\n * Key is added to the State using an XOR operation. The length of a\n * Round Key equals the size of the State (i.e., for Nb = 4, the Round\n * Key length equals 128 bits/16 bytes).\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_add_round_key(uint8_t *state, simde__m128i_private w, uint8_t r) {\n\n  int Nb = simde_x_aes_Nb;\n  uint8_t c;\n\n  for (c = 0; c < Nb; c++) {\n    state[Nb*0+c] = state[Nb*0+c]^w.u8[4*Nb*r+4*c+0];\n    state[Nb*1+c] = state[Nb*1+c]^w.u8[4*Nb*r+4*c+1];\n    state[Nb*2+c] = state[Nb*2+c]^w.u8[4*Nb*r+4*c+2];\n    state[Nb*3+c] = state[Nb*3+c]^w.u8[4*Nb*r+4*c+3];\n  }\n}\n\n/*\n * Transformation in the Cipher that takes all of the columns of the\n * State and mixes their data (independently of one another) to\n * produce new columns.\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_mix_columns(uint8_t *state) {\n\n  int Nb = simde_x_aes_Nb;\n  // uint8_t k[] = {0x02, 0x01, 0x01, 0x03}; // a(x) = {02} + {01}x + {01}x2 + {03}x3\n  uint8_t i, j, col[4], res[4];\n\n  for (j = 0; j < Nb; j++) {\n    for (i = 0; i < 4; i++) {\n      col[i] = state[Nb*i+j];\n    }\n\n    //coef_mult(k, col, res);\n    simde_x_aes_coef_mult_lookup(0, col, res);\n\n    for (i = 0; i < 4; i++) {\n      state[Nb*i+j] = res[i];\n    }\n  }\n}\n\n/*\n * Transformation in the Inverse Cipher that is the inverse of\n * MixColumns().\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_inv_mix_columns(uint8_t *state) {\n\n  int Nb = simde_x_aes_Nb;\n  // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3\n  uint8_t i, j, col[4], res[4];\n\n  for (j = 0; j < Nb; j++) {\n    for (i = 0; i < 4; i++) {\n      col[i] = state[Nb*i+j];\n    }\n\n    //coef_mult(k, col, res);\n    simde_x_aes_coef_mult_lookup(4, col, res);\n\n    for (i = 0; i < 4; i++) {\n      state[Nb*i+j] = res[i];\n    }\n  }\n}\n\n/*\n * Transformation in the Cipher that processes the State by cyclically\n * shifting the last three rows of the State by different offsets.\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_shift_rows(uint8_t *state) {\n\n  int Nb = simde_x_aes_Nb;\n  uint8_t i, k, s, tmp;\n\n  for (i = 1; i < 4; i++) {\n    // shift(1,4)=1; shift(2,4)=2; shift(3,4)=3\n    // shift(r, 4) = r;\n    s = 0;\n    while (s < i) {\n      tmp = state[Nb*i+0];\n\n      for (k = 1; k < Nb; k++) {\n        state[Nb*i+k-1] = state[Nb*i+k];\n      }\n\n      state[Nb*i+Nb-1] = tmp;\n      s++;\n    }\n  }\n}\n\n/*\n * Transformation in the Inverse Cipher that is the inverse of\n * ShiftRows().\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_inv_shift_rows(uint8_t *state) {\n\n  uint8_t Nb = simde_x_aes_Nb;\n  uint8_t i, k, s, tmp;\n\n  for (i = 1; i < 4; i++) {\n    s = 0;\n    while (s < i) {\n      tmp = state[Nb*i+Nb-1];\n\n      for (k = Nb-1; k > 0; k--) {\n        state[Nb*i+k] = state[Nb*i+k-1];\n      }\n\n      state[Nb*i+0] = tmp;\n      s++;\n    }\n  }\n}\n\n/*\n * Transformation in the Cipher that processes the State using a non\n * linear byte substitution table (S-box) that operates on each of the\n * State bytes independently.\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_sub_bytes(uint8_t *state) {\n\n  int Nb = simde_x_aes_Nb;\n  uint8_t i, j;\n\n  for (i = 0; i < 4; i++) {\n    for (j = 0; j < Nb; j++) {\n      // s_box row: yyyy ----\n      // s_box col: ---- xxxx\n      // s_box[16*(yyyy) + xxxx] == s_box[yyyyxxxx]\n      state[Nb*i+j] = simde_x_aes_s_box[state[Nb*i+j]];\n    }\n  }\n}\n\n/*\n * Transformation in the Inverse Cipher that is the inverse of\n * SubBytes().\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_inv_sub_bytes(uint8_t *state) {\n\n  int Nb = simde_x_aes_Nb;\n  uint8_t i, j;\n\n  for (i = 0; i < 4; i++) {\n    for (j = 0; j < Nb; j++) {\n      state[Nb*i+j] = simde_x_aes_inv_s_box[state[Nb*i+j]];\n    }\n  }\n}\n\n/*\n * Performs the AES cipher operation\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_enc(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) {\n\n  int Nb = simde_x_aes_Nb;\n  uint8_t state[4*simde_x_aes_Nb];\n  uint8_t r = 0, i, j;\n\n  for (i = 0; i < 4; i++) {\n    for (j = 0; j < Nb; j++) {\n      state[Nb*i+j] = in.u8[i+4*j];\n    }\n  }\n\n  simde_x_aes_sub_bytes(state);\n  simde_x_aes_shift_rows(state);\n\n  if (!is_last)\n    simde_x_aes_mix_columns(state);\n\n  simde_x_aes_add_round_key(state, w, r);\n\n  for (i = 0; i < 4; i++) {\n    for (j = 0; j < Nb; j++) {\n      out->u8[i+4*j] = state[Nb*i+j];\n    }\n  }\n}\n\n/*\n * Performs the AES inverse cipher operation\n */\nSIMDE_FUNCTION_ATTRIBUTES\nvoid simde_x_aes_dec(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) {\n\n  int Nb = simde_x_aes_Nb;\n  uint8_t state[4*simde_x_aes_Nb];\n  uint8_t r = 0, i, j;\n\n  for (i = 0; i < 4; i++) {\n    for (j = 0; j < Nb; j++) {\n      state[Nb*i+j] = in.u8[i+4*j];\n    }\n  }\n\n  simde_x_aes_inv_shift_rows(state);\n  simde_x_aes_inv_sub_bytes(state);\n\n  if (!is_last)\n    simde_x_aes_inv_mix_columns(state);\n\n  simde_x_aes_add_round_key(state, w, r);\n\n  for (i = 0; i < 4; i++) {\n    for (j = 0; j < Nb; j++) {\n      out->u8[i+4*j] = state[Nb*i+j];\n    }\n  }\n}\n#endif // if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i simde_mm_aesenc_si128(simde__m128i a, simde__m128i round_key) {\n  #if defined(SIMDE_X86_AES_NATIVE)\n    return _mm_aesenc_si128(a, round_key);\n  #else\n    simde__m128i_private result_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    simde__m128i_private round_key_ = simde__m128i_to_private(round_key);\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)\n      result_.neon_u8 = veorq_u8(\n        vaesmcq_u8(vaeseq_u8(a_.neon_u8, vdupq_n_u8(0))),\n        round_key_.neon_u8);\n    #else\n      simde_x_aes_enc(a_, &result_, round_key_, 0);\n    #endif\n    return simde__m128i_from_private(result_);\n  #endif\n}\n#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)\n  #define _mm_aesenc_si128(a, b) simde_mm_aesenc_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i simde_mm_aesdec_si128(simde__m128i a, simde__m128i round_key) {\n  #if defined(SIMDE_X86_AES_NATIVE)\n    return _mm_aesdec_si128(a, round_key);\n  #else\n    simde__m128i_private result_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    simde__m128i_private round_key_ = simde__m128i_to_private(round_key);\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)\n      result_.neon_u8 = veorq_u8(\n        vaesimcq_u8(vaesdq_u8(a_.neon_u8, vdupq_n_u8(0))),\n        round_key_.neon_u8);\n    #else\n      simde_x_aes_dec(a_, &result_, round_key_, 0);\n    #endif\n    return simde__m128i_from_private(result_);\n  #endif\n}\n#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)\n  #define _mm_aesdec_si128(a, b) simde_mm_aesdec_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i simde_mm_aesenclast_si128(simde__m128i a, simde__m128i round_key) {\n  #if defined(SIMDE_X86_AES_NATIVE)\n    return _mm_aesenclast_si128(a, round_key);\n  #else\n    simde__m128i_private result_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    simde__m128i_private round_key_ = simde__m128i_to_private(round_key);\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)\n      result_.neon_u8 = vaeseq_u8(a_.neon_u8, vdupq_n_u8(0));\n      result_.neon_i32 = veorq_s32(result_.neon_i32, round_key_.neon_i32); // _mm_xor_si128\n    #else\n      simde_x_aes_enc(a_, &result_, round_key_, 1);\n    #endif\n    return simde__m128i_from_private(result_);\n  #endif\n}\n#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)\n  #define _mm_aesenclast_si128(a, b) simde_mm_aesenclast_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i simde_mm_aesdeclast_si128(simde__m128i a, simde__m128i round_key) {\n  #if defined(SIMDE_X86_AES_NATIVE)\n    return _mm_aesdeclast_si128(a, round_key);\n  #else\n    simde__m128i_private result_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    simde__m128i_private round_key_ = simde__m128i_to_private(round_key);\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)\n      result_.neon_u8 = veorq_u8(\n        vaesdq_u8(a_.neon_u8, vdupq_n_u8(0)),\n        round_key_.neon_u8);\n    #else\n      simde_x_aes_dec(a_, &result_, round_key_, 1);\n    #endif\n    return simde__m128i_from_private(result_);\n  #endif\n}\n#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)\n  #define _mm_aesdeclast_si128(a, b) simde_mm_aesdeclast_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i simde_mm_aesimc_si128(simde__m128i a) {\n  #if defined(SIMDE_X86_AES_NATIVE)\n    return _mm_aesimc_si128(a);\n  #else\n    simde__m128i_private result_ = simde__m128i_to_private(simde_mm_setzero_si128());\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)\n      result_.neon_u8 = vaesimcq_u8(a_.neon_u8);\n    #else\n      int Nb = simde_x_aes_Nb;\n      // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3\n      uint8_t i, j, col[4], res[4];\n\n      for (j = 0; j < Nb; j++) {\n        for (i = 0; i < 4; i++) {\n          col[i] = a_.u8[Nb*j+i];\n        }\n\n        //coef_mult(k, col, res);\n        simde_x_aes_coef_mult_lookup(4, col, res);\n\n        for (i = 0; i < 4; i++) {\n          result_.u8[Nb*j+i] = res[i];\n        }\n      }\n    #endif\n    return simde__m128i_from_private(result_);\n  #endif\n}\n#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)\n  #define _mm_aesimc_si128(a) simde_mm_aesimc_si128(a)\n#endif\n\n#undef simde_x_aes_Nb\n\n#endif /* !defined(SIMDE_X86_AES_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/avx.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2018-2020 Evan Nemerson <evan@nemerson.com>\n *        2020 Michael R. Crusoe <crusoe@debian.org>\n */\n\n#include \"sse.h\"\n#if !defined(SIMDE_X86_AVX_H)\n#define SIMDE_X86_AVX_H\n\n#include \"sse4.2.h\"\n#include \"../simde-f16.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_32 int8_t          i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int16_t        i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int32_t        i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int64_t        i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint8_t         u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint16_t       u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint32_t       u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint64_t       u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_32 simde_int128  i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #endif\n    SIMDE_ALIGN_TO_32 simde_float32  f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 simde_float64  f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int_fast32_t  i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_32 int8_t          i8[32];\n    SIMDE_ALIGN_TO_32 int16_t        i16[16];\n    SIMDE_ALIGN_TO_32 int32_t        i32[8];\n    SIMDE_ALIGN_TO_32 int64_t        i64[4];\n    SIMDE_ALIGN_TO_32 uint8_t         u8[32];\n    SIMDE_ALIGN_TO_32 uint16_t       u16[16];\n    SIMDE_ALIGN_TO_32 uint32_t       u32[8];\n    SIMDE_ALIGN_TO_32 uint64_t       u64[4];\n    SIMDE_ALIGN_TO_32 int_fast32_t  i32f[32 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)];\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_32 simde_int128  i128[2];\n    SIMDE_ALIGN_TO_32 simde_uint128 u128[2];\n    #endif\n    SIMDE_ALIGN_TO_32 simde_float32  f32[8];\n    SIMDE_ALIGN_TO_32 simde_float64  f64[4];\n  #endif\n\n    SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2];\n    SIMDE_ALIGN_TO_32 simde__m128         m128[2];\n\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    SIMDE_ALIGN_TO_32 __m256         n;\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)      altivec_u8[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)     altivec_u16[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)       altivec_u32[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)        altivec_i8[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)       altivec_i16[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int)                altivec_i32[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)              altivec_f32[2];\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2];\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long)          altivec_i64[2];\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64[2];\n    #endif\n  #endif\n} simde__m256_private;\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_32 int8_t          i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int16_t        i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int32_t        i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int64_t        i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint8_t         u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint16_t       u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint32_t       u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint64_t       u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_32 simde_int128  i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #endif\n    SIMDE_ALIGN_TO_32 simde_float32  f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 simde_float64  f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int_fast32_t  i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_32 int8_t          i8[32];\n    SIMDE_ALIGN_TO_32 int16_t        i16[16];\n    SIMDE_ALIGN_TO_32 int32_t        i32[8];\n    SIMDE_ALIGN_TO_32 int64_t        i64[4];\n    SIMDE_ALIGN_TO_32 uint8_t         u8[32];\n    SIMDE_ALIGN_TO_32 uint16_t       u16[16];\n    SIMDE_ALIGN_TO_32 uint32_t       u32[8];\n    SIMDE_ALIGN_TO_32 uint64_t       u64[4];\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_32 simde_int128  i128[2];\n    SIMDE_ALIGN_TO_32 simde_uint128 u128[2];\n    #endif\n    SIMDE_ALIGN_TO_32 simde_float32  f32[8];\n    SIMDE_ALIGN_TO_32 simde_float64  f64[4];\n    SIMDE_ALIGN_TO_32 int_fast32_t  i32f[32 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)];\n  #endif\n\n    SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2];\n    SIMDE_ALIGN_TO_32 simde__m128d         m128d[2];\n\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    SIMDE_ALIGN_TO_32 __m256d        n;\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)      altivec_u8[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)     altivec_u16[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)       altivec_u32[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)        altivec_i8[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)       altivec_i16[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)         altivec_i32[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)              altivec_f32[2];\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2];\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64[2];\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64[2];\n    #endif\n  #endif\n} simde__m256d_private;\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_32 int8_t          i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int16_t        i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int32_t        i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int64_t        i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint8_t         u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint16_t       u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint32_t       u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint64_t       u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_32 simde_int128  i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #endif\n    #if defined(SIMDE_FLOAT16_VECTOR)\n    SIMDE_ALIGN_TO_32 simde_float16  f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    #else\n    SIMDE_ALIGN_TO_32 simde_float16  f16[16];\n    #endif\n    SIMDE_ALIGN_TO_32 simde_float32  f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 simde_float64  f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 int_fast32_t  i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_32 int8_t          i8[32];\n    SIMDE_ALIGN_TO_32 int16_t        i16[16];\n    SIMDE_ALIGN_TO_32 int32_t        i32[8];\n    SIMDE_ALIGN_TO_32 int64_t        i64[4];\n    SIMDE_ALIGN_TO_32 uint8_t         u8[32];\n    SIMDE_ALIGN_TO_32 uint16_t       u16[16];\n    SIMDE_ALIGN_TO_32 uint32_t       u32[8];\n    SIMDE_ALIGN_TO_32 uint64_t       u64[4];\n    SIMDE_ALIGN_TO_32 int_fast32_t  i32f[32 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)];\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_32 simde_int128  i128[2];\n    SIMDE_ALIGN_TO_32 simde_uint128 u128[2];\n    #endif\n    SIMDE_ALIGN_TO_32 simde_float16  f16[16];\n    SIMDE_ALIGN_TO_32 simde_float32  f32[8];\n    SIMDE_ALIGN_TO_32 simde_float64  f64[4];\n  #endif\n\n    SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2];\n    SIMDE_ALIGN_TO_32 simde__m128i         m128i[2];\n\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    SIMDE_ALIGN_TO_32 __m256i        n;\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)      altivec_u8[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)     altivec_u16[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)       altivec_u32[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)        altivec_i8[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)       altivec_i16[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)         altivec_i32[2];\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)              altivec_f32[2];\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2];\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64[2];\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64[2];\n    #endif\n  #endif\n} simde__m256i_private;\n\n#if defined(SIMDE_X86_AVX_NATIVE)\n  typedef __m256 simde__m256;\n  typedef __m256i simde__m256i;\n  typedef __m256d simde__m256d;\n#elif defined(SIMDE_VECTOR_SUBSCRIPT)\n  typedef simde_float32 simde__m256  SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n  typedef int_fast32_t  simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n  typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;\n#else\n  typedef simde__m256_private  simde__m256;\n  typedef simde__m256i_private simde__m256i;\n  typedef simde__m256d_private simde__m256d;\n#endif\n\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ)\n    typedef simde__m256 __m256;\n    typedef simde__m256i __m256i;\n    typedef simde__m256d __m256d;\n  #else\n    #undef __m256\n    #define __m256 simde__m256\n    #undef __m256i\n    #define __m256i simde__m256i\n    #undef __m256d\n    #define __m256d simde__m256d\n  #endif\n#endif\n\nHEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), \"simde__m256 size incorrect\");\nHEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), \"simde__m256_private size incorrect\");\nHEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), \"simde__m256i size incorrect\");\nHEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), \"simde__m256i_private size incorrect\");\nHEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), \"simde__m256d size incorrect\");\nHEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), \"simde__m256d_private size incorrect\");\n#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, \"simde__m256 is not 32-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, \"simde__m256_private is not 32-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, \"simde__m256i is not 32-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, \"simde__m256i_private is not 32-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, \"simde__m256d is not 32-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, \"simde__m256d_private is not 32-byte aligned\");\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde__m256_from_private(simde__m256_private v) {\n  simde__m256 r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256_private\nsimde__m256_to_private(simde__m256 v) {\n  simde__m256_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde__m256i_from_private(simde__m256i_private v) {\n  simde__m256i r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i_private\nsimde__m256i_to_private(simde__m256i v) {\n  simde__m256i_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde__m256d_from_private(simde__m256d_private v) {\n  simde__m256d r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d_private\nsimde__m256d_to_private(simde__m256d v) {\n  simde__m256d_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\n#define SIMDE_CMP_EQ_OQ     0\n#define SIMDE_CMP_LT_OS     1\n#define SIMDE_CMP_LE_OS     2\n#define SIMDE_CMP_UNORD_Q   3\n#define SIMDE_CMP_NEQ_UQ    4\n#define SIMDE_CMP_NLT_US    5\n#define SIMDE_CMP_NLE_US    6\n#define SIMDE_CMP_ORD_Q     7\n#define SIMDE_CMP_EQ_UQ     8\n#define SIMDE_CMP_NGE_US    9\n#define SIMDE_CMP_NGT_US   10\n#define SIMDE_CMP_FALSE_OQ 11\n#define SIMDE_CMP_NEQ_OQ   12\n#define SIMDE_CMP_GE_OS    13\n#define SIMDE_CMP_GT_OS    14\n#define SIMDE_CMP_TRUE_UQ  15\n#define SIMDE_CMP_EQ_OS    16\n#define SIMDE_CMP_LT_OQ    17\n#define SIMDE_CMP_LE_OQ    18\n#define SIMDE_CMP_UNORD_S  19\n#define SIMDE_CMP_NEQ_US   20\n#define SIMDE_CMP_NLT_UQ   21\n#define SIMDE_CMP_NLE_UQ   22\n#define SIMDE_CMP_ORD_S    23\n#define SIMDE_CMP_EQ_US    24\n#define SIMDE_CMP_NGE_UQ   25\n#define SIMDE_CMP_NGT_UQ   26\n#define SIMDE_CMP_FALSE_OS 27\n#define SIMDE_CMP_NEQ_OS   28\n#define SIMDE_CMP_GE_OQ    29\n#define SIMDE_CMP_GT_OQ    30\n#define SIMDE_CMP_TRUE_US  31\n\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ)\n#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ\n#define _CMP_LT_OS SIMDE_CMP_LT_OS\n#define _CMP_LE_OS SIMDE_CMP_LE_OS\n#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q\n#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ\n#define _CMP_NLT_US SIMDE_CMP_NLT_US\n#define _CMP_NLE_US SIMDE_CMP_NLE_US\n#define _CMP_ORD_Q SIMDE_CMP_ORD_Q\n#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ\n#define _CMP_NGE_US SIMDE_CMP_NGE_US\n#define _CMP_NGT_US SIMDE_CMP_NGT_US\n#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ\n#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ\n#define _CMP_GE_OS SIMDE_CMP_GE_OS\n#define _CMP_GT_OS SIMDE_CMP_GT_OS\n#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ\n#define _CMP_EQ_OS SIMDE_CMP_EQ_OS\n#define _CMP_LT_OQ SIMDE_CMP_LT_OQ\n#define _CMP_LE_OQ SIMDE_CMP_LE_OQ\n#define _CMP_UNORD_S SIMDE_CMP_UNORD_S\n#define _CMP_NEQ_US SIMDE_CMP_NEQ_US\n#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ\n#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ\n#define _CMP_ORD_S SIMDE_CMP_ORD_S\n#define _CMP_EQ_US SIMDE_CMP_EQ_US\n#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ\n#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ\n#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS\n#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS\n#define _CMP_GE_OQ SIMDE_CMP_GE_OQ\n#define _CMP_GT_OQ SIMDE_CMP_GT_OQ\n#define _CMP_TRUE_US SIMDE_CMP_TRUE_US\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_castps_pd (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castps_pd(a);\n  #else\n    return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castps_pd\n  #define _mm256_castps_pd(a) simde_mm256_castps_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_castps_si256 (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castps_si256(a);\n  #else\n    return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castps_si256\n  #define _mm256_castps_si256(a) simde_mm256_castps_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_castsi256_pd (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castsi256_pd(a);\n  #else\n    return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castsi256_pd\n  #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_castsi256_ps (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castsi256_ps(a);\n  #else\n    return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castsi256_ps\n  #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_castpd_ps (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castpd_ps(a);\n  #else\n    return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castpd_ps\n  #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_castpd_si256 (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castpd_si256(a);\n  #else\n    return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castpd_si256\n  #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_setzero_si256 (void) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setzero_si256();\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_setzero_si128();\n      r_.m128i[1] = simde_mm_setzero_si128();\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = 0;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setzero_si256\n  #define _mm256_setzero_si256() simde_mm256_setzero_si256()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_setzero_ps (void) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setzero_ps();\n  #else\n    return simde_mm256_castsi256_ps(simde_mm256_setzero_si256());\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setzero_ps\n  #define _mm256_setzero_ps() simde_mm256_setzero_ps()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_setzero_pd (void) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setzero_pd();\n  #else\n    return simde_mm256_castsi256_pd(simde_mm256_setzero_si256());\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setzero_pd\n  #define _mm256_setzero_pd() simde_mm256_setzero_pd()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_not_ps(simde__m256 a) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    r_.i32 = ~a_.i32;\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]);\n    r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = ~(a_.i32[i]);\n    }\n  #endif\n\n  return simde__m256_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) {\n  /* This function is for when you want to blend two elements together\n   * according to a mask.  It is similar to _mm256_blendv_ps, except that\n   * it is undefined whether the blend is based on the highest bit in\n   * each lane (like blendv) or just bitwise operations.  This allows\n   * us to implement the function efficiently everywhere.\n   *\n   * Basically, you promise that all the lanes in mask are either 0 or\n   * ~0. */\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_blendv_ps(a, b, mask);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b),\n      mask_ = simde__m256_to_private(mask);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32);\n    #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n      r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]);\n      r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_not_pd(simde__m256d a) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    r_.i64 = ~a_.i64;\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]);\n    r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = ~(a_.i64[i]);\n    }\n  #endif\n\n  return simde__m256d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) {\n  /* This function is for when you want to blend two elements together\n   * according to a mask.  It is similar to _mm256_blendv_pd, except that\n   * it is undefined whether the blend is based on the highest bit in\n   * each lane (like blendv) or just bitwise operations.  This allows\n   * us to implement the function efficiently everywhere.\n   *\n   * Basically, you promise that all the lanes in mask are either 0 or\n   * ~0. */\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_blendv_pd(a, b, mask);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b),\n      mask_ = simde__m256d_to_private(mask);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64);\n    #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n      r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]);\n      r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_setone_si256 (void) {\n  simde__m256i_private r_;\n\n#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n  __typeof__(r_.i32f) rv = { 0, };\n  r_.i32f = ~rv;\n#elif defined(SIMDE_X86_AVX2_NATIVE)\n  __m256i t = _mm256_setzero_si256();\n  r_.n = _mm256_cmpeq_epi32(t, t);\n#else\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n    r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0);\n  }\n#endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_setone_ps (void) {\n  return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256());\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_setone_pd (void) {\n  return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256());\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28,\n                      int8_t e27, int8_t e26, int8_t e25, int8_t e24,\n                      int8_t e23, int8_t e22, int8_t e21, int8_t e20,\n                      int8_t e19, int8_t e18, int8_t e17, int8_t e16,\n                      int8_t e15, int8_t e14, int8_t e13, int8_t e12,\n                      int8_t e11, int8_t e10, int8_t  e9, int8_t  e8,\n                      int8_t  e7, int8_t  e6, int8_t  e5, int8_t  e4,\n                      int8_t  e3, int8_t  e2, int8_t  e1, int8_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24,\n                           e23, e22, e21, e20, e19, e18, e17, e16,\n                           e15, e14, e13, e12, e11, e10,  e9,  e8,\n                            e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set_epi8(\n        e15, e14, e13, e12, e11, e10,  e9,  e8,\n        e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n      r_.m128i[1] = simde_mm_set_epi8(\n        e31, e30, e29, e28, e27, e26, e25, e24,\n        e23, e22, e21, e20, e19, e18, e17, e16);\n    #else\n      r_.i8[ 0] =  e0;\n      r_.i8[ 1] =  e1;\n      r_.i8[ 2] =  e2;\n      r_.i8[ 3] =  e3;\n      r_.i8[ 4] =  e4;\n      r_.i8[ 5] =  e5;\n      r_.i8[ 6] =  e6;\n      r_.i8[ 7] =  e7;\n      r_.i8[ 8] =  e8;\n      r_.i8[ 9] =  e9;\n      r_.i8[10] = e10;\n      r_.i8[11] = e11;\n      r_.i8[12] = e12;\n      r_.i8[13] = e13;\n      r_.i8[14] = e14;\n      r_.i8[15] = e15;\n      r_.i8[16] = e16;\n      r_.i8[17] = e17;\n      r_.i8[18] = e18;\n      r_.i8[19] = e19;\n      r_.i8[20] = e20;\n      r_.i8[21] = e21;\n      r_.i8[22] = e22;\n      r_.i8[23] = e23;\n      r_.i8[24] = e24;\n      r_.i8[25] = e25;\n      r_.i8[26] = e26;\n      r_.i8[27] = e27;\n      r_.i8[28] = e28;\n      r_.i8[29] = e29;\n      r_.i8[30] = e30;\n      r_.i8[31] = e31;\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_epi8\n  #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \\\n  simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12,\n                       int16_t e11, int16_t e10, int16_t  e9, int16_t  e8,\n                       int16_t  e7, int16_t  e6, int16_t  e5, int16_t  e4,\n                       int16_t  e3, int16_t  e2, int16_t  e1, int16_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_epi16(e15, e14, e13, e12, e11, e10,  e9,  e8,\n                            e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set_epi16( e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n      r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10,  e9,  e8);\n    #else\n      r_.i16[ 0] =  e0;\n      r_.i16[ 1] =  e1;\n      r_.i16[ 2] =  e2;\n      r_.i16[ 3] =  e3;\n      r_.i16[ 4] =  e4;\n      r_.i16[ 5] =  e5;\n      r_.i16[ 6] =  e6;\n      r_.i16[ 7] =  e7;\n      r_.i16[ 8] =  e8;\n      r_.i16[ 9] =  e9;\n      r_.i16[10] = e10;\n      r_.i16[11] = e11;\n      r_.i16[12] = e12;\n      r_.i16[13] = e13;\n      r_.i16[14] = e14;\n      r_.i16[15] = e15;\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_epi16\n  #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \\\n  simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4,\n                       int32_t e3, int32_t e2, int32_t e1, int32_t e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0);\n      r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4);\n    #else\n      r_.i32[ 0] =  e0;\n      r_.i32[ 1] =  e1;\n      r_.i32[ 2] =  e2;\n      r_.i32[ 3] =  e3;\n      r_.i32[ 4] =  e4;\n      r_.i32[ 5] =  e5;\n      r_.i32[ 6] =  e6;\n      r_.i32[ 7] =  e7;\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_epi32\n  #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \\\n  simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set_epi64x (int64_t  e3, int64_t  e2, int64_t  e1, int64_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_epi64x(e3, e2, e1, e0);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set_epi64x(e1, e0);\n      r_.m128i[1] = simde_mm_set_epi64x(e3, e2);\n    #else\n      r_.i64[0] = e0;\n      r_.i64[1] = e1;\n      r_.i64[2] = e2;\n      r_.i64[3] = e3;\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_epi64x\n  #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28,\n                        uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24,\n                        uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20,\n                        uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16,\n                        uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12,\n                        uint8_t e11, uint8_t e10, uint8_t  e9, uint8_t  e8,\n                        uint8_t  e7, uint8_t  e6, uint8_t  e5, uint8_t  e4,\n                        uint8_t  e3, uint8_t  e2, uint8_t  e1, uint8_t  e0) {\n  simde__m256i_private r_;\n\n  r_.u8[ 0] =  e0;\n  r_.u8[ 1] =  e1;\n  r_.u8[ 2] =  e2;\n  r_.u8[ 3] =  e3;\n  r_.u8[ 4] =  e4;\n  r_.u8[ 5] =  e5;\n  r_.u8[ 6] =  e6;\n  r_.u8[ 7] =  e7;\n  r_.u8[ 8] =  e8;\n  r_.u8[ 9] =  e9;\n  r_.u8[10] = e10;\n  r_.u8[11] = e11;\n  r_.u8[12] = e12;\n  r_.u8[13] = e13;\n  r_.u8[14] = e14;\n  r_.u8[15] = e15;\n  r_.u8[16] = e16;\n  r_.u8[17] = e17;\n  r_.u8[18] = e18;\n  r_.u8[19] = e19;\n  r_.u8[20] = e20;\n  r_.u8[20] = e20;\n  r_.u8[21] = e21;\n  r_.u8[22] = e22;\n  r_.u8[23] = e23;\n  r_.u8[24] = e24;\n  r_.u8[25] = e25;\n  r_.u8[26] = e26;\n  r_.u8[27] = e27;\n  r_.u8[28] = e28;\n  r_.u8[29] = e29;\n  r_.u8[30] = e30;\n  r_.u8[31] = e31;\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12,\n                       uint16_t e11, uint16_t e10, uint16_t  e9, uint16_t  e8,\n                       uint16_t  e7, uint16_t  e6, uint16_t  e5, uint16_t  e4,\n                       uint16_t  e3, uint16_t  e2, uint16_t  e1, uint16_t  e0) {\n  simde__m256i_private r_;\n\n  r_.u16[ 0] =  e0;\n  r_.u16[ 1] =  e1;\n  r_.u16[ 2] =  e2;\n  r_.u16[ 3] =  e3;\n  r_.u16[ 4] =  e4;\n  r_.u16[ 5] =  e5;\n  r_.u16[ 6] =  e6;\n  r_.u16[ 7] =  e7;\n  r_.u16[ 8] =  e8;\n  r_.u16[ 9] =  e9;\n  r_.u16[10] = e10;\n  r_.u16[11] = e11;\n  r_.u16[12] = e12;\n  r_.u16[13] = e13;\n  r_.u16[14] = e14;\n  r_.u16[15] = e15;\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4,\n                         uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4),\n                            HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0));\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0));\n      r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4));\n    #else\n      r_.u32[ 0] =  e0;\n      r_.u32[ 1] =  e1;\n      r_.u32[ 2] =  e2;\n      r_.u32[ 3] =  e3;\n      r_.u32[ 4] =  e4;\n      r_.u32[ 5] =  e5;\n      r_.u32[ 6] =  e6;\n      r_.u32[ 7] =  e7;\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_set_epu64x (uint64_t  e3, uint64_t  e2, uint64_t  e1, uint64_t  e0) {\n  simde__m256i_private r_;\n\n  r_.u64[0] = e0;\n  r_.u64[1] = e1;\n  r_.u64[2] = e2;\n  r_.u64[3] = e3;\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,\n                    simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    simde__m256_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0);\n      r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4);\n    #else\n      r_.f32[0] = e0;\n      r_.f32[1] = e1;\n      r_.f32[2] = e2;\n      r_.f32[3] = e3;\n      r_.f32[4] = e4;\n      r_.f32[5] = e5;\n      r_.f32[6] = e6;\n      r_.f32[7] = e7;\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_ps\n  #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \\\n  simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set_pd(e3, e2, e1, e0);\n  #else\n    simde__m256d_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_set_pd(e1, e0);\n      r_.m128d[1] = simde_mm_set_pd(e3, e2);\n    #else\n      r_.f64[0] = e0;\n      r_.f64[1] = e1;\n      r_.f64[2] = e2;\n      r_.f64[3] = e3;\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_pd\n  #define _mm256_set_pd(e3, e2, e1, e0) \\\n  simde_mm256_set_pd(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1);\n  #else\n    simde__m256_private r_;\n    simde__m128_private\n      e1_ = simde__m128_to_private(e1),\n      e0_ = simde__m128_to_private(e0);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128_private[0] = e0_;\n      r_.m128_private[1] = e1_;\n    #elif defined(SIMDE_HAVE_INT128_)\n      r_.i128[0] = e0_.i128[0];\n      r_.i128[1] = e1_.i128[0];\n    #else\n      r_.i64[0] = e0_.i64[0];\n      r_.i64[1] = e0_.i64[1];\n      r_.i64[2] = e1_.i64[0];\n      r_.i64[3] = e1_.i64[1];\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_m128\n  #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1);\n  #else\n    simde__m256d_private r_;\n    simde__m128d_private\n      e1_ = simde__m128d_to_private(e1),\n      e0_ = simde__m128d_to_private(e0);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d_private[0] = e0_;\n      r_.m128d_private[1] = e1_;\n    #else\n      r_.i64[0] = e0_.i64[0];\n      r_.i64[1] = e0_.i64[1];\n      r_.i64[2] = e1_.i64[0];\n      r_.i64[3] = e1_.i64[1];\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_m128d\n  #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private\n      e1_ = simde__m128i_to_private(e1),\n      e0_ = simde__m128i_to_private(e0);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i_private[0] = e0_;\n      r_.m128i_private[1] = e1_;\n    #else\n      r_.i64[0] = e0_.i64[0];\n      r_.i64[1] = e0_.i64[1];\n      r_.i64[2] = e1_.i64[0];\n      r_.i64[3] = e1_.i64[1];\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set_m128i\n  #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set1_epi8 (int8_t a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set1_epi8(a);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set1_epi8(a);\n      r_.m128i[1] = simde_mm_set1_epi8(a);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set1_epi8\n  #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set1_epi16 (int16_t a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set1_epi16(a);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set1_epi16(a);\n      r_.m128i[1] = simde_mm_set1_epi16(a);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set1_epi16\n  #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set1_epi32 (int32_t a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set1_epi32(a);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set1_epi32(a);\n      r_.m128i[1] = simde_mm_set1_epi32(a);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set1_epi32\n  #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_set1_epi64x (int64_t a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set1_epi64x(a);\n  #else\n    simde__m256i_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_set1_epi64x(a);\n      r_.m128i[1] = simde_mm_set1_epi64x(a);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set1_epi64x\n  #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_set1_ps (simde_float32 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set1_ps(a);\n  #else\n    simde__m256_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_set1_ps(a);\n      r_.m128[1] = simde_mm_set1_ps(a);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a;\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set1_ps\n  #define _mm256_set1_ps(a) simde_mm256_set1_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_set1_pd (simde_float64 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_set1_pd(a);\n  #else\n    simde__m256d_private r_;\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_set1_pd(a);\n      r_.m128d[1] = simde_mm_set1_pd(a);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a;\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_set1_pd\n  #define _mm256_set1_pd(a) simde_mm256_set1_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30);\n  #else\n    const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.i16[i] = a_.i16[2 * i];\n      r_.i16[i + quarter_point] = b_.i16[2 * i];\n      r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i];\n      r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i];\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31);\n  #else\n    const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.i16[i] = a_.i16[2 * i + 1];\n      r_.i16[i + quarter_point] = b_.i16[2 * i + 1];\n      r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1];\n      r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1];\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14);\n  #else\n    const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.i32[i] = a_.i32[2 * i];\n      r_.i32[i + quarter_point] = b_.i32[2 * i];\n      r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i];\n      r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i];\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15);\n  #else\n    const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.i32[i] = a_.i32[2 * i + 1];\n      r_.i32[i + quarter_point] = b_.i32[2 * i + 1];\n      r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1];\n      r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1];\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]);\n    r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14);\n  #else\n    const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.f32[i] = a_.f32[2 * i];\n      r_.f32[i + quarter_point] = b_.f32[2 * i];\n      r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i];\n      r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i];\n    }\n  #endif\n\n  return simde__m256_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]);\n    r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15);\n  #else\n    const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.f32[i] = a_.f32[2 * i + 1];\n      r_.f32[i + quarter_point] = b_.f32[2 * i + 1];\n      r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1];\n      r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1];\n    }\n  #endif\n\n  return simde__m256_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]);\n    r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6);\n  #else\n    const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.f64[i] = a_.f64[2 * i];\n      r_.f64[i + quarter_point] = b_.f64[2 * i];\n      r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i];\n      r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i];\n    }\n  #endif\n\n  return simde__m256d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]);\n    r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7);\n  #else\n    const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2;\n    const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4;\n    for (size_t i = 0 ; i < quarter_point ; i++) {\n      r_.f64[i] = a_.f64[2 * i + 1];\n      r_.f64[i + quarter_point] = b_.f64[2 * i + 1];\n      r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1];\n      r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1];\n    }\n  #endif\n\n  return simde__m256d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_abs_ps(simde__m256 a) {\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_fabsf(a_.f32[i]);\n      }\n    return simde__m256_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_abs_pd(simde__m256d a) {\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_fabs(a_.f64[i]);\n      }\n    return simde__m256d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_add_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_add_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 + b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] + b_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_add_ps\n  #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_hadd_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_hadd_ps(a, b);\n  #else\n    return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hadd_ps\n  #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_add_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_add_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 + b_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] + b_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_add_pd\n  #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_hadd_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_hadd_pd(a, b);\n  #else\n      return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hadd_pd\n  #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_addsub_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_addsub_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n        r_.f32[  i  ] = a_.f32[  i  ] - b_.f32[  i  ];\n        r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_addsub_ps\n  #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_addsub_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_addsub_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {\n        r_.f64[  i  ] = a_.f64[  i  ] - b_.f64[  i  ];\n        r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_addsub_pd\n  #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_and_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_and_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_and_ps\n  #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_and_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_and_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_and_pd\n  #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_andnot_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_andnot_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_andnot_ps\n  #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_andnot_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_andnot_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_andnot_pd\n  #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n    r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i];\n  }\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8)\n#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n#  define simde_mm256_blend_ps(a, b, imm8) \\\n      simde_mm256_set_m128( \\\n          simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \\\n          simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blend_ps\n  #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i];\n  }\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8)\n#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n#  define simde_mm256_blend_pd(a, b, imm8) \\\n      simde_mm256_set_m128d( \\\n          simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \\\n          simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blend_pd\n  #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_blendv_ps(a, b, mask);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b),\n      mask_ = simde__m256_to_private(mask);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]);\n      r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blendv_ps\n  #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_blendv_pd(a, b, mask);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b),\n      mask_ = simde__m256d_to_private(mask);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]);\n      r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blendv_pd\n  #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_broadcast_pd (simde__m128d const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_broadcast_pd(mem_addr);\n  #else\n    simde__m256d_private r_;\n\n    simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr));\n    r_.m128d[0] = tmp;\n    r_.m128d[1] = tmp;\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcast_pd\n  #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_broadcast_ps (simde__m128 const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_broadcast_ps(mem_addr);\n  #else\n    simde__m256_private r_;\n\n    simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr));\n    r_.m128[0] = tmp;\n    r_.m128[1] = tmp;\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcast_ps\n  #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_broadcast_sd (simde_float64 const * a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_broadcast_sd(a);\n  #else\n    return simde_mm256_set1_pd(*a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcast_sd\n  #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_broadcast_ss (simde_float32 const * a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_broadcast_ss(a);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a));\n  #else\n    return simde_mm_set1_ps(*a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcast_ss\n  #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_broadcast_ss (simde_float32 const * a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_broadcast_ss(a);\n  #else\n    return simde_mm256_set1_ps(*a);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcast_ss\n  #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_castpd128_pd256 (simde__m128d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castpd128_pd256(a);\n  #else\n    simde__m256d_private r_;\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    r_.m128d_private[0] = a_;\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castpd128_pd256\n  #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm256_castpd256_pd128 (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castpd256_pd128(a);\n  #else\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n    return a_.m128d[0];\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castpd256_pd128\n  #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_castps128_ps256 (simde__m128 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castps128_ps256(a);\n  #else\n    simde__m256_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    r_.m128_private[0] = a_;\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castps128_ps256\n  #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm256_castps256_ps128 (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castps256_ps128(a);\n  #else\n    simde__m256_private a_ = simde__m256_to_private(a);\n    return a_.m128[0];\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castps256_ps128\n  #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_castsi128_si256 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castsi128_si256(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    r_.m128i_private[0] = a_;\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castsi128_si256\n  #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_castsi256_si128 (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_castsi256_si128(a);\n  #else\n    simde__m256i_private a_ = simde__m256i_to_private(a);\n    return a_.m128i[0];\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_castsi256_si128\n  #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_round_ps (simde__m256 a, const int rounding) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a);\n\n  switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {\n    #if defined(simde_math_nearbyintf)\n      case SIMDE_MM_FROUND_CUR_DIRECTION:\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_roundf)\n      case SIMDE_MM_FROUND_TO_NEAREST_INT:\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_roundf(a_.f32[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_floorf)\n      case SIMDE_MM_FROUND_TO_NEG_INF:\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_floorf(a_.f32[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_ceilf)\n      case SIMDE_MM_FROUND_TO_POS_INF:\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_ceilf(a_.f32[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_truncf)\n      case SIMDE_MM_FROUND_TO_ZERO:\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_truncf(a_.f32[i]);\n        }\n        break;\n    #endif\n\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps());\n  }\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding)\n#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_)\n  #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \\\n    simde__m256_private \\\n      simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \\\n      simde_mm256_round_ps_a_ = simde__m256_to_private(a); \\\n    \\\n    for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \\\n      simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \\\n    } \\\n    \\\n    simde__m256_from_private(simde_mm256_round_ps_r_); \\\n  }))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_round_ps\n  #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_round_pd (simde__m256d a, const int rounding) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a);\n\n  switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {\n    #if defined(simde_math_nearbyint)\n      case SIMDE_MM_FROUND_CUR_DIRECTION:\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_nearbyint(a_.f64[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_round)\n      case SIMDE_MM_FROUND_TO_NEAREST_INT:\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_round(a_.f64[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_floor)\n      case SIMDE_MM_FROUND_TO_NEG_INF:\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_floor(a_.f64[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_ceil)\n      case SIMDE_MM_FROUND_TO_POS_INF:\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_ceil(a_.f64[i]);\n        }\n        break;\n    #endif\n\n    #if defined(simde_math_trunc)\n      case SIMDE_MM_FROUND_TO_ZERO:\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_trunc(a_.f64[i]);\n        }\n        break;\n    #endif\n\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd());\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding)\n#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_)\n  #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \\\n    simde__m256d_private \\\n      simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \\\n      simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \\\n    \\\n    for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \\\n      simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \\\n    } \\\n    \\\n    simde__m256d_from_private(simde_mm256_round_pd_r_); \\\n  }))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_round_pd\n  #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_ceil_pd (simde__m256d a) {\n  return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_ceil_pd\n  #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_ceil_ps (simde__m256 a) {\n  return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_ceil_ps\n  #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a)\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL\n\n/* This implementation does not support signaling NaNs (yet?) */\nSIMDE_HUGE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {\n  switch (imm8) {\n    case SIMDE_CMP_EQ_UQ:\n    case SIMDE_CMP_EQ_US:\n      return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b));\n      break;\n    case SIMDE_CMP_EQ_OQ:\n    case SIMDE_CMP_EQ_OS:\n      return simde_mm_cmpeq_pd(a, b);\n      break;\n    case SIMDE_CMP_NGE_US:\n    case SIMDE_CMP_NGE_UQ:\n      return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b));\n      break;\n    case SIMDE_CMP_LT_OS:\n    case SIMDE_CMP_LT_OQ:\n      return simde_mm_cmplt_pd(a, b);\n      break;\n    case SIMDE_CMP_NGT_US:\n    case SIMDE_CMP_NGT_UQ:\n      return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b));\n      break;\n    case SIMDE_CMP_LE_OS:\n    case SIMDE_CMP_LE_OQ:\n      return simde_mm_cmple_pd(a, b);\n      break;\n    case SIMDE_CMP_NEQ_UQ:\n    case SIMDE_CMP_NEQ_US:\n      return simde_mm_cmpneq_pd(a, b);\n      break;\n    case SIMDE_CMP_NEQ_OQ:\n    case SIMDE_CMP_NEQ_OS:\n      return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b));\n      break;\n    case SIMDE_CMP_NLT_US:\n    case SIMDE_CMP_NLT_UQ:\n      return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b));\n      break;\n    case SIMDE_CMP_GE_OS:\n    case SIMDE_CMP_GE_OQ:\n      return simde_mm_cmpge_pd(a, b);\n      break;\n    case SIMDE_CMP_NLE_US:\n    case SIMDE_CMP_NLE_UQ:\n      return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b));\n      break;\n    case SIMDE_CMP_GT_OS:\n    case SIMDE_CMP_GT_OQ:\n      return simde_mm_cmpgt_pd(a, b);\n      break;\n    case SIMDE_CMP_FALSE_OQ:\n    case SIMDE_CMP_FALSE_OS:\n      return simde_mm_setzero_pd();\n      break;\n    case SIMDE_CMP_TRUE_UQ:\n    case SIMDE_CMP_TRUE_US:\n      return simde_x_mm_setone_pd();\n      break;\n    case SIMDE_CMP_UNORD_Q:\n    case SIMDE_CMP_UNORD_S:\n      return simde_mm_cmpunord_pd(a, b);\n      break;\n    case SIMDE_CMP_ORD_Q:\n    case SIMDE_CMP_ORD_S:\n      return simde_mm_cmpord_pd(a, b);\n      break;\n  }\n\n  HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd());\n}\n#if defined(__clang__) && defined(__AVX512DQ__)\n  #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \\\n    simde__m128d simde_mm_cmp_pd_r; \\\n    switch (imm8) { \\\n      case SIMDE_CMP_FALSE_OQ: \\\n      case SIMDE_CMP_FALSE_OS: \\\n        simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \\\n        break; \\\n      case SIMDE_CMP_TRUE_UQ: \\\n      case SIMDE_CMP_TRUE_US: \\\n        simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \\\n        break; \\\n      default: \\\n        simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \\\n        break; \\\n    } \\\n    simde_mm_cmp_pd_r; \\\n  }))\n#elif defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmp_pd\n  #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8)\n#endif\n\nSIMDE_HUGE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {\n  switch (imm8) {\n    case SIMDE_CMP_EQ_UQ:\n    case SIMDE_CMP_EQ_US:\n      return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b));\n      break;\n    case SIMDE_CMP_EQ_OQ:\n    case SIMDE_CMP_EQ_OS:\n      return simde_mm_cmpeq_ps(a, b);\n      break;\n    case SIMDE_CMP_NGE_US:\n    case SIMDE_CMP_NGE_UQ:\n      return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b));\n      break;\n    case SIMDE_CMP_LT_OS:\n    case SIMDE_CMP_LT_OQ:\n      return simde_mm_cmplt_ps(a, b);\n      break;\n    case SIMDE_CMP_NGT_US:\n    case SIMDE_CMP_NGT_UQ:\n      return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b));\n      break;\n    case SIMDE_CMP_LE_OS:\n    case SIMDE_CMP_LE_OQ:\n      return simde_mm_cmple_ps(a, b);\n      break;\n    case SIMDE_CMP_NEQ_UQ:\n    case SIMDE_CMP_NEQ_US:\n      return simde_mm_cmpneq_ps(a, b);\n      break;\n    case SIMDE_CMP_NEQ_OQ:\n    case SIMDE_CMP_NEQ_OS:\n      return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b));\n      break;\n    case SIMDE_CMP_NLT_US:\n    case SIMDE_CMP_NLT_UQ:\n      return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b));\n      break;\n    case SIMDE_CMP_GE_OS:\n    case SIMDE_CMP_GE_OQ:\n      return simde_mm_cmpge_ps(a, b);\n      break;\n    case SIMDE_CMP_NLE_US:\n    case SIMDE_CMP_NLE_UQ:\n      return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b));\n      break;\n    case SIMDE_CMP_GT_OS:\n    case SIMDE_CMP_GT_OQ:\n      return simde_mm_cmpgt_ps(a, b);\n      break;\n    case SIMDE_CMP_FALSE_OQ:\n    case SIMDE_CMP_FALSE_OS:\n      return simde_mm_setzero_ps();\n      break;\n    case SIMDE_CMP_TRUE_UQ:\n    case SIMDE_CMP_TRUE_US:\n      return simde_x_mm_setone_ps();\n      break;\n    case SIMDE_CMP_UNORD_Q:\n    case SIMDE_CMP_UNORD_S:\n      return simde_mm_cmpunord_ps(a, b);\n      break;\n    case SIMDE_CMP_ORD_Q:\n    case SIMDE_CMP_ORD_S:\n      return simde_mm_cmpord_ps(a, b);\n      break;\n  }\n\n  HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps());\n}\n/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false\n * comparisons, but only when AVX-512 is enabled. */\n#if defined(__clang__) && defined(__AVX512DQ__)\n  #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \\\n    simde__m128 simde_mm_cmp_ps_r; \\\n    switch (imm8) { \\\n      case SIMDE_CMP_FALSE_OQ: \\\n      case SIMDE_CMP_FALSE_OS: \\\n        simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \\\n        break; \\\n      case SIMDE_CMP_TRUE_UQ: \\\n      case SIMDE_CMP_TRUE_US: \\\n        simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \\\n        break; \\\n      default: \\\n        simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \\\n        break; \\\n    } \\\n    simde_mm_cmp_ps_r; \\\n  }))\n#elif defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmp_ps\n  #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8)\n#endif\n\nSIMDE_HUGE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {\n  simde__m128d_private\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  switch (imm8) {\n    case SIMDE_CMP_EQ_OQ:\n    case SIMDE_CMP_EQ_OS:\n      a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_LT_OQ:\n    case SIMDE_CMP_LT_OS:\n      a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_LE_OQ:\n    case SIMDE_CMP_LE_OS:\n      a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_UNORD_Q:\n    case SIMDE_CMP_UNORD_S:\n      a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_NEQ_UQ:\n    case SIMDE_CMP_NEQ_US:\n      a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_NEQ_OQ:\n    case SIMDE_CMP_NEQ_OS:\n      a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_NLT_UQ:\n    case SIMDE_CMP_NLT_US:\n      a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_NLE_UQ:\n    case SIMDE_CMP_NLE_US:\n      a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_ORD_Q:\n    case SIMDE_CMP_ORD_S:\n      a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_EQ_UQ:\n    case SIMDE_CMP_EQ_US:\n      a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_NGE_UQ:\n    case SIMDE_CMP_NGE_US:\n      a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_NGT_UQ:\n    case SIMDE_CMP_NGT_US:\n      a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_FALSE_OQ:\n    case SIMDE_CMP_FALSE_OS:\n      a_.i64[0] = INT64_C(0);\n      break;\n\n    case SIMDE_CMP_GE_OQ:\n    case SIMDE_CMP_GE_OS:\n      a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_GT_OQ:\n    case SIMDE_CMP_GT_OS:\n      a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0);\n      break;\n\n    case SIMDE_CMP_TRUE_UQ:\n    case SIMDE_CMP_TRUE_US:\n      a_.i64[0] = ~INT64_C(0);\n      break;\n\n    default:\n      HEDLEY_UNREACHABLE();\n  }\n\n  return simde__m128d_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmp_sd\n  #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8)\n#endif\n\nSIMDE_HUGE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {\n  simde__m128_private\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  switch (imm8) {\n    case SIMDE_CMP_EQ_OQ:\n    case SIMDE_CMP_EQ_OS:\n      a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_LT_OQ:\n    case SIMDE_CMP_LT_OS:\n      a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_LE_OQ:\n    case SIMDE_CMP_LE_OS:\n      a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_UNORD_Q:\n    case SIMDE_CMP_UNORD_S:\n      a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_NEQ_UQ:\n    case SIMDE_CMP_NEQ_US:\n      a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_NEQ_OQ:\n    case SIMDE_CMP_NEQ_OS:\n      a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_NLT_UQ:\n    case SIMDE_CMP_NLT_US:\n      a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_NLE_UQ:\n    case SIMDE_CMP_NLE_US:\n      a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_ORD_Q:\n    case SIMDE_CMP_ORD_S:\n      a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_EQ_UQ:\n    case SIMDE_CMP_EQ_US:\n      a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_NGE_UQ:\n    case SIMDE_CMP_NGE_US:\n      a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_NGT_UQ:\n    case SIMDE_CMP_NGT_US:\n      a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_FALSE_OQ:\n    case SIMDE_CMP_FALSE_OS:\n      a_.i32[0] = INT32_C(0);\n      break;\n\n    case SIMDE_CMP_GE_OQ:\n    case SIMDE_CMP_GE_OS:\n      a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_GT_OQ:\n    case SIMDE_CMP_GT_OS:\n      a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0);\n      break;\n\n    case SIMDE_CMP_TRUE_UQ:\n    case SIMDE_CMP_TRUE_US:\n      a_.i32[0] = ~INT32_C(0);\n      break;\n\n    default:\n      HEDLEY_UNREACHABLE();\n  }\n\n  return simde__m128_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmp_ss\n  #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8)\n#endif\n\nSIMDE_HUGE_FUNCTION_ATTRIBUTES\nsimde__m256d\n#if defined(__clang__) && defined(__AVX512DQ__)\nsimde_mm256_cmp_pd_internal_\n#else\nsimde_mm256_cmp_pd\n#endif\n(simde__m256d a, simde__m256d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n\n  switch (imm8) {\n    case SIMDE_CMP_EQ_OQ:\n    case SIMDE_CMP_EQ_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_LT_OQ:\n    case SIMDE_CMP_LT_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_LE_OQ:\n    case SIMDE_CMP_LE_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_UNORD_Q:\n    case SIMDE_CMP_UNORD_S:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NEQ_UQ:\n    case SIMDE_CMP_NEQ_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NEQ_OQ:\n    case SIMDE_CMP_NEQ_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NLT_UQ:\n    case SIMDE_CMP_NLT_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NLE_UQ:\n    case SIMDE_CMP_NLE_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_ORD_Q:\n    case SIMDE_CMP_ORD_S:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64)));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_EQ_UQ:\n    case SIMDE_CMP_EQ_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NGE_UQ:\n    case SIMDE_CMP_NGE_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NGT_UQ:\n    case SIMDE_CMP_NGT_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_FALSE_OQ:\n    case SIMDE_CMP_FALSE_OS:\n      r_ = simde__m256d_to_private(simde_mm256_setzero_pd());\n      break;\n\n    case SIMDE_CMP_GE_OQ:\n    case SIMDE_CMP_GE_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_GT_OQ:\n    case SIMDE_CMP_GT_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_TRUE_UQ:\n    case SIMDE_CMP_TRUE_US:\n      r_ = simde__m256d_to_private(simde_x_mm256_setone_pd());\n      break;\n\n    default:\n      HEDLEY_UNREACHABLE();\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(__clang__) && defined(__AVX512DQ__)\n  #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \\\n    simde__m256d simde_mm256_cmp_pd_r; \\\n    switch (imm8) { \\\n      case SIMDE_CMP_FALSE_OQ: \\\n      case SIMDE_CMP_FALSE_OS: \\\n        simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \\\n        break; \\\n      case SIMDE_CMP_TRUE_UQ: \\\n      case SIMDE_CMP_TRUE_US: \\\n        simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \\\n        break; \\\n      default: \\\n        simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \\\n        break; \\\n    } \\\n    simde_mm256_cmp_pd_r; \\\n  }))\n#elif defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmp_pd\n  #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8)\n#endif\n\nSIMDE_HUGE_FUNCTION_ATTRIBUTES\nsimde__m256\n#if defined(__clang__) && defined(__AVX512DQ__)\nsimde_mm256_cmp_ps_internal_\n#else\nsimde_mm256_cmp_ps\n#endif\n(simde__m256 a, simde__m256 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n\n  switch (imm8) {\n    case SIMDE_CMP_EQ_OQ:\n    case SIMDE_CMP_EQ_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_LT_OQ:\n    case SIMDE_CMP_LT_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_LE_OQ:\n    case SIMDE_CMP_LE_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_UNORD_Q:\n    case SIMDE_CMP_UNORD_S:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NEQ_UQ:\n    case SIMDE_CMP_NEQ_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NEQ_OQ:\n    case SIMDE_CMP_NEQ_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NLT_UQ:\n    case SIMDE_CMP_NLT_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NLE_UQ:\n    case SIMDE_CMP_NLE_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_ORD_Q:\n    case SIMDE_CMP_ORD_S:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32)));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_EQ_UQ:\n    case SIMDE_CMP_EQ_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NGE_UQ:\n    case SIMDE_CMP_NGE_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_NGT_UQ:\n    case SIMDE_CMP_NGT_US:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_FALSE_OQ:\n    case SIMDE_CMP_FALSE_OS:\n      r_ = simde__m256_to_private(simde_mm256_setzero_ps());\n      break;\n\n    case SIMDE_CMP_GE_OQ:\n    case SIMDE_CMP_GE_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_GT_OQ:\n    case SIMDE_CMP_GT_OS:\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n        r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0);\n        }\n      #endif\n      break;\n\n    case SIMDE_CMP_TRUE_UQ:\n    case SIMDE_CMP_TRUE_US:\n      r_ = simde__m256_to_private(simde_x_mm256_setone_ps());\n      break;\n\n    default:\n      HEDLEY_UNREACHABLE();\n  }\n\n  return simde__m256_from_private(r_);\n}\n#if defined(__clang__) && defined(__AVX512DQ__)\n  #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \\\n    simde__m256 simde_mm256_cmp_ps_r; \\\n    switch (imm8) { \\\n      case SIMDE_CMP_FALSE_OQ: \\\n      case SIMDE_CMP_FALSE_OS: \\\n        simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \\\n        break; \\\n      case SIMDE_CMP_TRUE_UQ: \\\n      case SIMDE_CMP_TRUE_US: \\\n        simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \\\n        break; \\\n      default: \\\n        simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \\\n        break; \\\n    } \\\n    simde_mm256_cmp_ps_r; \\\n  }))\n#elif defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8)\n#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n  #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \\\n    simde__m256_private \\\n      simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \\\n      simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \\\n      simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \\\n    \\\n    for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \\\n      simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \\\n    } \\\n    \\\n    simde__m256_from_private(simde_mm256_cmp_ps_r_); \\\n  }))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmp_ps\n  #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) {\n  simde__m256_private\n    r_,\n    dest_ = simde__m256_to_private(dest),\n    src_ = simde__m256_to_private(src);\n\n  #if defined(simde_math_copysignf)\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]);\n    }\n  #else\n    simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0)));\n    return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest));\n  #endif\n\n  return simde__m256_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) {\n  simde__m256d_private\n    r_,\n    dest_ = simde__m256d_to_private(dest),\n    src_ = simde__m256d_to_private(src);\n\n  #if defined(simde_math_copysign)\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]);\n    }\n  #else\n    simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0)));\n    return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest));\n  #endif\n\n  return simde__m256d_from_private(r_);\n}\n\nHEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cvtepi32_pd (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtepi32_pd(a);\n  #else\n    simde__m256d_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]);\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi32_pd\n  #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\n  simde_mm256_cvtepi32_ps (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtepi32_ps(a);\n  #else\n    simde__m256_private r_;\n    simde__m256i_private a_ = simde__m256i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]);\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi32_ps\n  #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_cvtpd_epi32 (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtpd_epi32(a);\n  #else\n    simde__m128i_private r_;\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n\n    #if defined(simde_math_nearbyint)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {\n        r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i]));\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtpd_epi32\n  #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm256_cvtpd_ps (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtpd_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtpd_ps\n  #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtps_epi32 (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtps_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m256_private a_ = simde__m256_to_private(a);\n\n    #if defined(simde_math_nearbyintf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i]));\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtps_epi32\n  #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cvtps_pd (simde__m128 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtps_pd(a);\n  #else\n    simde__m256d_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n      r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]);\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtps_pd\n  #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde_float64\nsimde_mm256_cvtsd_f64 (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(7,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_MSVC_VERSION_CHECK(19,14,0))\n    return _mm256_cvtsd_f64(a);\n  #else\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n    return a_.f64[0];\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtsd_f64\n  #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm256_cvtsi256_si32 (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_MSVC_VERSION_CHECK(19,14,0))\n    return _mm256_cvtsi256_si32(a);\n  #else\n    simde__m256i_private a_ = simde__m256i_to_private(a);\n    return a_.i32[0];\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtsi256_si32\n  #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde_float32\nsimde_mm256_cvtss_f32 (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(7,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \\\n      HEDLEY_MSVC_VERSION_CHECK(19,14,0))\n    return _mm256_cvtss_f32(a);\n  #else\n    simde__m256_private a_ = simde__m256_to_private(a);\n    return a_.f32[0];\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtss_f32\n  #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_cvttpd_epi32 (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvttpd_epi32(a);\n  #else\n    simde__m128i_private r_;\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n\n    #if defined(simde_math_trunc)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {\n        r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i]));\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvttpd_epi32\n  #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvttps_epi32 (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvttps_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m256_private a_ = simde__m256_to_private(a);\n\n    #if defined(simde_math_truncf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i]));\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvttps_epi32\n  #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_div_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 / b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] / b_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_ps\n  #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_div_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 / b_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] / b_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_pd\n  #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm256_extractf128_pd (simde__m256d a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256d_private a_ = simde__m256d_to_private(a);\n  return a_.m128d[imm8];\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extractf128_pd\n  #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm256_extractf128_ps (simde__m256 a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256_private a_ = simde__m256_to_private(a);\n  return a_.m128[imm8];\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extractf128_ps\n  #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_extractf128_si256 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  return a_.m128i[imm8];\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extractf128_si256\n  #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_floor_pd (simde__m256d a) {\n  return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_floor_pd\n  #define _mm256_floor_pd(a) simde_mm256_floor_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_floor_ps (simde__m256 a) {\n  return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_floor_ps\n  #define _mm256_floor_ps(a) simde_mm256_floor_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 31) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n\n  a_.i8[index] = i;\n\n  return simde__m256i_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))\n  #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_insert_epi8\n  #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 15)  {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n\n  a_.i16[index] = i;\n\n  return simde__m256i_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))\n  #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_insert_epi16\n  #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 7)  {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n\n  a_.i32[index] = i;\n\n  return simde__m256i_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))\n  #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_insert_epi32\n  #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 3)  {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n\n  a_.i64[index] = i;\n\n  return simde__m256i_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \\\n    SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0)\n  #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #undef _mm256_insert_epi64\n  #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256d_private a_ = simde__m256d_to_private(a);\n  simde__m128d_private b_ = simde__m128d_to_private(b);\n\n  a_.m128d_private[imm8] = b_;\n\n  return simde__m256d_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_insertf128_pd\n  #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256_private a_ = simde__m256_to_private(a);\n  simde__m128_private b_ = simde__m128_to_private(b);\n\n  a_.m128_private[imm8] = b_;\n\n  return simde__m256_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_insertf128_ps\n  #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  simde__m128i_private b_ = simde__m128i_to_private(b);\n\n  a_.m128i_private[imm8] = b_;\n\n  return simde__m256i_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_insertf128_si256\n  #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8)\n#endif\n\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8)\n#else\n#  define simde_mm256_dp_ps(a, b, imm8) \\\n    simde_mm256_set_m128( \\\n      simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \\\n      simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_dp_ps\n  #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm256_extract_epi32 (simde__m256i a, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 7) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  return a_.i32[index];\n}\n#if defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))\n  #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extract_epi32\n  #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm256_extract_epi64 (simde__m256i a, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 3) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  return a_.i64[index];\n}\n#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64)\n  #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)\n    #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #undef _mm256_extract_epi64\n  #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_lddqu_si256 (simde__m256i const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_si256(mem_addr);\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_lddqu_si256\n  #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_load_pd(mem_addr);\n  #else\n    simde__m256d r;\n    simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_load_pd\n  #define _mm256_load_pd(a) simde_mm256_load_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_load_ps(mem_addr);\n  #else\n    simde__m256 r;\n    simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_load_ps\n  #define _mm256_load_ps(a) simde_mm256_load_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_load_si256 (simde__m256i const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_load_si256(mem_addr);\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_load_si256\n  #define _mm256_load_si256(a) simde_mm256_load_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_pd(a);\n  #else\n    simde__m256d r;\n    simde_memcpy(&r, a, sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_loadu_pd\n  #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_ps(a);\n  #else\n    simde__m256 r;\n    simde_memcpy(&r, a, sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_loadu_ps\n  #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \\\n    && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_loadu_epi8(void const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr));\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, mem_addr, sizeof(r));\n    return r;\n  #endif\n}\n#endif\n#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm256_loadu_epi8\n  #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \\\n    && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_loadu_epi16(void const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr));\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, mem_addr, sizeof(r));\n    return r;\n  #endif\n}\n#endif\n#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm256_loadu_epi16\n  #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \\\n    && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_loadu_epi32(void const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr));\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, mem_addr, sizeof(r));\n    return r;\n  #endif\n}\n#endif\n#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm256_loadu_epi32\n  #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \\\n    && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_loadu_epi64(void const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr));\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, mem_addr, sizeof(r));\n    return r;\n  #endif\n}\n#endif\n#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm256_loadu_epi64\n  #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_loadu_si256 (void const * mem_addr) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr));\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, mem_addr, sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_loadu_si256\n  #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS)\n    return _mm256_loadu2_m128(hiaddr, loaddr);\n  #else\n    return\n      simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)),\n              simde_mm_loadu_ps(hiaddr), 1);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_loadu2_m128\n  #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS)\n    return _mm256_loadu2_m128d(hiaddr, loaddr);\n  #else\n    return\n      simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)),\n              simde_mm_loadu_pd(hiaddr), 1);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_loadu2_m128d\n  #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS)\n    return _mm256_loadu2_m128i(hiaddr, loaddr);\n  #else\n    return\n      simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)),\n          simde_mm_loadu_si128(hiaddr), 1);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_loadu2_m128i\n  #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask));\n    #else\n      return _mm_maskload_pd(mem_addr, mask);\n    #endif\n  #else\n    simde__m128d_private r_;\n    simde__m128i_private\n      mask_ = simde__m128i_to_private(mask),\n      mask_shr_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return simde_mm_and_pd(simde_mm_load_pd(mem_addr),\n          simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) {\n        mask_shr_.i64[i] = mask_.i64[i] >> 63;\n      }\n    #endif\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0);\n      }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskload_pd\n  #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask));\n    #else\n      return _mm256_maskload_pd(mem_addr, mask);\n    #endif\n  #else\n    simde__m256d_private r_;\n    simde__m256i_private mask_ = simde__m256i_to_private(mask);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0);\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskload_pd\n  #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask));\n    #else\n      return _mm_maskload_ps(mem_addr, mask);\n    #endif\n  #else\n    simde__m128_private r_;\n    simde__m128i_private\n      mask_ = simde__m128i_to_private(mask),\n      mask_shr_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return simde_mm_and_ps(simde_mm_load_ps(mem_addr),\n          simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) {\n        mask_shr_.i32[i] = mask_.i32[i] >> 31;\n      }\n    #endif\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0);\n      }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskload_ps\n  #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask));\n    #else\n      return _mm256_maskload_ps(mem_addr, mask);\n    #endif\n  #else\n    simde__m256_private r_;\n    simde__m256i_private mask_ = simde__m256i_to_private(mask);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0);\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskload_ps\n  #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a);\n    #else\n      _mm_maskstore_pd(mem_addr, mask, a);\n    #endif\n  #else\n    simde__m128i_private mask_ = simde__m128i_to_private(mask);\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0)\n        mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0);\n      if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0)\n        mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {\n        if (mask_.u64[i] >> 63)\n          mem_addr[i] = a_.f64[i];\n      }\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskstore_pd\n  #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a);\n    #else\n      _mm256_maskstore_pd(mem_addr, mask, a);\n    #endif\n  #else\n    simde__m256i_private mask_ = simde__m256i_to_private(mask);\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {\n      if (mask_.u64[i] & (UINT64_C(1) << 63))\n        mem_addr[i] = a_.f64[i];\n    }\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskstore_pd\n  #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a);\n    #else\n      _mm_maskstore_ps(mem_addr, mask, a);\n    #endif\n  #else\n    simde__m128i_private mask_ = simde__m128i_to_private(mask);\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0)\n        mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0);\n      if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0)\n        mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1);\n      if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0)\n        mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2);\n      if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0)\n        mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        if (mask_.u32[i] & (UINT32_C(1) << 31))\n          mem_addr[i] = a_.f32[i];\n      }\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskstore_ps\n  #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a);\n    #else\n      _mm256_maskstore_ps(mem_addr, mask, a);\n    #endif\n  #else\n    simde__m256i_private mask_ = simde__m256i_to_private(mask);\n    simde__m256_private a_ = simde__m256_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n      if (mask_.u32[i] & (UINT32_C(1) << 31))\n        mem_addr[i] = a_.f32[i];\n    }\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskstore_ps\n  #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_min_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_min_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_ps\n  #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_min_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_min_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_pd\n  #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_max_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_max_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_ps\n  #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_max_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_max_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_pd\n  #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_movedup_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_movedup_pd(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {\n        r_.f64[i] = r_.f64[i + 1] = a_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_movedup_pd\n  #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_movehdup_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_movehdup_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n        r_.f32[i - 1] = r_.f32[i] = a_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_movehdup_ps\n  #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_moveldup_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_moveldup_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n        r_.f32[i] = r_.f32[i + 1] = a_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_moveldup_ps\n  #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_movemask_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_movemask_ps(a);\n  #else\n    simde__m256_private a_ = simde__m256_to_private(a);\n    int r = 0;\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n      r |= (a_.u32[i] >> 31) << i;\n    }\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_movemask_ps\n  #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_movemask_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_movemask_pd(a);\n  #else\n    simde__m256d_private a_ = simde__m256d_to_private(a);\n    int r = 0;\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {\n      r |= (a_.u64[i] >> 63) << i;\n    }\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_movemask_pd\n  #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_mul_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_mul_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 * b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] * b_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mul_ps\n  #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_mul_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_mul_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 * b_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] * b_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mul_pd\n  #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_or_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_or_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f | b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] | b_.u32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_or_ps\n  #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_or_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_or_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f | b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] | b_.u64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_or_pd\n  #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_permute_ps (simde__m256 a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n    r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3];\n  }\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute_ps\n  #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_permute_pd (simde__m256d a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)];\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute_pd\n  #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_permute_ps (simde__m128 a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n    r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3];\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8)\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3)))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_permute_ps\n  #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_permute_pd (simde__m128d a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)];\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8)\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 )))\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_permute_pd\n  #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_permutevar_ps (simde__m128 a, simde__m128i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_permutevar_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n    simde__m128i_private b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_make(\n        (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]),\n        (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]),\n        (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]),\n        (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3]));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[b_.i32[i] & 3];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_permutevar_ps\n  #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_permutevar_pd (simde__m128d a, simde__m128i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_permutevar_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n    simde__m128i_private b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_make(\n        (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]),\n        (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1]));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_permutevar_pd\n  #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_permutevar_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n    simde__m256i_private b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)];\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permutevar_ps\n  #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_permutevar_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n    simde__m256i_private b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)];\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permutevar_pd\n  #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n\n  r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8     ) & 1] : a_.m128_private[(imm8     ) & 1]);\n  r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]);\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute2f128_ps\n  #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n\n  r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8     ) & 1] : a_.m128d_private[(imm8     ) & 1]);\n  r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]);\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute2f128_pd\n  #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8     ) & 1] : a_.m128i_private[(imm8     ) & 1]);\n  r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]);\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n#  define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute2f128_si256\n  #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_rcp_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rcp_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]);\n      r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rcp_ps\n  #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_rsqrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rsqrt_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if defined(simde_math_sqrtf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rsqrt_ps\n  #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_setr_epi8 (\n    int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24,\n    int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16,\n    int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t  e9, int8_t  e8,\n    int8_t  e7, int8_t  e6, int8_t  e5, int8_t  e4, int8_t  e3, int8_t  e2, int8_t  e1, int8_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setr_epi8(\n        e31, e30, e29, e28, e27, e26, e25, e24,\n        e23, e22, e21, e20, e19, e18, e17, e16,\n        e15, e14, e13, e12, e11, e10,  e9,  e8,\n        e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    return simde_mm256_set_epi8(\n        e0,  e1,  e2,  e3,  e4,  e5,  e6,  e7,\n        e8,  e9, e10, e11, e12, e13, e14, e15,\n        e16, e17, e18, e19, e20, e21, e22, e23,\n        e24, e25, e26, e27, e28, e29, e30, e31);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_epi8\n  #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \\\n    simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_setr_epi16 (\n    int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t  e9, int16_t  e8,\n    int16_t  e7, int16_t  e6, int16_t  e5, int16_t  e4, int16_t  e3, int16_t  e2, int16_t  e1, int16_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setr_epi16(\n        e15, e14, e13, e12, e11, e10,  e9,  e8,\n        e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    return simde_mm256_set_epi16(\n        e0,  e1,  e2,  e3,  e4,  e5,  e6,  e7,\n        e8,  e9, e10, e11, e12, e13, e14, e15);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_epi16\n  #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \\\n    simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_setr_epi32 (\n    int32_t  e7, int32_t  e6, int32_t  e5, int32_t  e4, int32_t  e3, int32_t  e2, int32_t  e1, int32_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_epi32\n  #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \\\n    simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_setr_epi64x (int64_t  e3, int64_t  e2, int64_t  e1, int64_t  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setr_epi64x(e3, e2, e1, e0);\n  #else\n    return simde_mm256_set_epi64x(e0, e1, e2, e3);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_epi64x\n  #define _mm256_setr_epi64x(e3, e2, e1, e0) \\\n    simde_mm256_setr_epi64x(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_setr_ps (\n    simde_float32  e7, simde_float32  e6, simde_float32  e5, simde_float32  e4,\n    simde_float32  e3, simde_float32  e2, simde_float32  e1, simde_float32  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_ps\n  #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \\\n    simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_setr_pd (simde_float64  e3, simde_float64  e2, simde_float64  e1, simde_float64  e0) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_setr_pd(e3, e2, e1, e0);\n  #else\n    return simde_mm256_set_pd(e0, e1, e2, e3);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_pd\n  #define _mm256_setr_pd(e3, e2, e1, e0) \\\n    simde_mm256_setr_pd(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && \\\n      !defined(SIMDE_BUG_GCC_REV_247851) && \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0)\n    return _mm256_setr_m128(lo, hi);\n  #else\n    return simde_mm256_set_m128(hi, lo);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_m128\n  #define _mm256_setr_m128(lo, hi) \\\n    simde_mm256_setr_m128(lo, hi)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && \\\n      !defined(SIMDE_BUG_GCC_REV_247851) && \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0)\n    return _mm256_setr_m128d(lo, hi);\n  #else\n    return simde_mm256_set_m128d(hi, lo);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_m128d\n  #define _mm256_setr_m128d(lo, hi) \\\n    simde_mm256_setr_m128d(lo, hi)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && \\\n      !defined(SIMDE_BUG_GCC_REV_247851) && \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0)\n    return _mm256_setr_m128i(lo, hi);\n  #else\n    return simde_mm256_set_m128i(hi, lo);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_setr_m128i\n  #define _mm256_setr_m128i(lo, hi) \\\n    simde_mm256_setr_m128i(lo, hi)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n\n  r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3];\n  r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3];\n  r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3];\n  r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3];\n  r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3];\n  r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3];\n  r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3];\n  r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3];\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8)\n#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n  #define simde_mm256_shuffle_ps(a, b, imm8) \\\n      simde_mm256_set_m128( \\\n          simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \\\n          simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8)))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm256_shuffle_ps(a, b, imm8) \\\n    SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \\\n      (((imm8) >> 0) & 3) + 0, \\\n      (((imm8) >> 2) & 3) + 0, \\\n      (((imm8) >> 4) & 3) + 8, \\\n      (((imm8) >> 6) & 3) + 8, \\\n      (((imm8) >> 0) & 3) + 4, \\\n      (((imm8) >> 2) & 3) + 4, \\\n      (((imm8) >> 4) & 3) + 12, \\\n      (((imm8) >> 6) & 3) + 12)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_shuffle_ps\n  #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n\n  r_.f64[0] = a_.f64[((imm8     ) & 1)    ];\n  r_.f64[1] = b_.f64[((imm8 >> 1) & 1)    ];\n  r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2];\n  r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2];\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8)\n#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n  #define simde_mm256_shuffle_pd(a, b, imm8) \\\n      simde_mm256_set_m128d( \\\n          simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \\\n          simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm256_shuffle_pd(a, b, imm8) \\\n    SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \\\n      (((imm8) >> 0) & 1) + 0, \\\n      (((imm8) >> 1) & 1) + 4, \\\n      (((imm8) >> 2) & 1) + 2, \\\n      (((imm8) >> 3) & 1) + 6)\n#endif\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_shuffle_pd\n  #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_sqrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sqrt_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]);\n      r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]);\n    #elif defined(simde_math_sqrtf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sqrtf(a_.f32[i]);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sqrt_ps\n  #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_sqrt_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sqrt_pd(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]);\n      r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]);\n    #elif defined(simde_math_sqrt)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sqrt(a_.f64[i]);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sqrt_pd\n  #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_store_ps(mem_addr, a);\n  #else\n    simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_store_ps\n  #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_store_pd(mem_addr, a);\n  #else\n    simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_store_pd\n  #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_store_si256(mem_addr, a);\n  #else\n  simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_store_si256\n  #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_storeu_ps(mem_addr, a);\n  #else\n    simde_memcpy(mem_addr, &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_storeu_ps\n  #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_storeu_pd(mem_addr, a);\n  #else\n    simde_memcpy(mem_addr, &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_storeu_pd\n  #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a);\n  #else\n    simde_memcpy(mem_addr, &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_storeu_si256\n  #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS)\n    _mm256_storeu2_m128(hi_addr, lo_addr, a);\n  #else\n    simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a));\n    simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_storeu2_m128\n  #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS)\n    _mm256_storeu2_m128d(hi_addr, lo_addr, a);\n  #else\n    simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a));\n    simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_storeu2_m128d\n  #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS)\n    _mm256_storeu2_m128i(hi_addr, lo_addr, a);\n  #else\n    simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a));\n    simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_storeu2_m128i\n  #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_stream_ps(mem_addr, a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT)\n    __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr));\n  #else\n    simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_stream_ps\n  #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_stream_pd(mem_addr, a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT)\n    __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr));\n  #else\n    simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_stream_pd\n  #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    _mm256_stream_si256(mem_addr, a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT)\n    __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr));\n  #else\n    simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_stream_si256\n  #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_sub_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sub_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 - b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] - b_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sub_ps\n  #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_hsub_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_hsub_ps(a, b);\n  #else\n      return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hsub_ps\n  #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_sub_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sub_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 - b_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] - b_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sub_pd\n  #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_hsub_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_hsub_pd(a, b);\n  #else\n      return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hsub_pd\n  #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b)\n#endif\n\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n  HEDLEY_DIAGNOSTIC_PUSH\n  SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_undefined_ps (void) {\n  simde__m256_private r_;\n\n#if \\\n    defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \\\n    (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256))\n  r_.n = _mm256_undefined_ps();\n#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n  r_ = simde__m256_to_private(simde_mm256_setzero_ps());\n#endif\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_undefined_ps\n  #define _mm256_undefined_ps() simde_mm256_undefined_ps()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_undefined_pd (void) {\n  simde__m256d_private r_;\n\n#if \\\n    defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \\\n    (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256))\n  r_.n = _mm256_undefined_pd();\n#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n  r_ = simde__m256d_to_private(simde_mm256_setzero_pd());\n#endif\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_undefined_pd\n  #define _mm256_undefined_pd() simde_mm256_undefined_pd()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_undefined_si256 (void) {\n  simde__m256i_private r_;\n#if \\\n    defined(SIMDE_X86_AVX_NATIVE) && \\\n    (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \\\n    (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256))\n  r_.n = _mm256_undefined_si256();\n#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n  r_ = simde__m256i_to_private(simde_mm256_setzero_si256());\n#endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_undefined_si256\n  #define _mm256_undefined_si256() simde_mm256_undefined_si256()\n#endif\n\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n  HEDLEY_DIAGNOSTIC_POP\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_xor_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_xor_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]);\n      r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] ^ b_.u32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_xor_ps\n  #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_xor_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_xor_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]);\n      r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] ^ b_.u64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_xor_pd\n  #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) {\n  return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) {\n  return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_negate_ps(simde__m256 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)));\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if defined(SIMDE_VECTOR_NEGATE)\n      r_.f32 = -a_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = -a_.f32[i];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_negate_pd(simde__m256d a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)));\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if defined(SIMDE_VECTOR_NEGATE)\n      r_.f64 = -a_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = -a_.f64[i];\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_unpackhi_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15);\n    #else\n      r_.f32[0] = a_.f32[2];\n      r_.f32[1] = b_.f32[2];\n      r_.f32[2] = a_.f32[3];\n      r_.f32[3] = b_.f32[3];\n      r_.f32[4] = a_.f32[6];\n      r_.f32[5] = b_.f32[6];\n      r_.f32[6] = a_.f32[7];\n      r_.f32[7] = b_.f32[7];\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpackhi_ps\n  #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_unpackhi_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7);\n    #else\n      r_.f64[0] = a_.f64[1];\n      r_.f64[1] = b_.f64[1];\n      r_.f64[2] = a_.f64[3];\n      r_.f64[3] = b_.f64[3];\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpackhi_pd\n  #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_unpacklo_ps(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13);\n    #else\n      r_.f32[0] = a_.f32[0];\n      r_.f32[1] = b_.f32[0];\n      r_.f32[2] = a_.f32[1];\n      r_.f32[3] = b_.f32[1];\n      r_.f32[4] = a_.f32[4];\n      r_.f32[5] = b_.f32[4];\n      r_.f32[6] = a_.f32[5];\n      r_.f32[7] = b_.f32[5];\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpacklo_ps\n  #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_unpacklo_pd(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6);\n    #else\n      r_.f64[0] = a_.f64[0];\n      r_.f64[1] = b_.f64[0];\n      r_.f64[2] = a_.f64[2];\n      r_.f64[3] = b_.f64[2];\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpacklo_pd\n  #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_zextps128_ps256 (simde__m128 a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0);\n  #else\n    simde__m256_private r_;\n\n    r_.m128_private[0] = simde__m128_to_private(a);\n    r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps());\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_zextps128_ps256\n  #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_zextpd128_pd256 (simde__m128d a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0);\n  #else\n    simde__m256d_private r_;\n\n    r_.m128d_private[0] = simde__m128d_to_private(a);\n    r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd());\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_zextpd128_pd256\n  #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_zextsi128_si256 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0);\n  #else\n    simde__m256i_private r_;\n\n    r_.m128i_private[0] = simde__m128i_to_private(a);\n    r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128());\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_zextsi128_si256\n  #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testc_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_testc_ps(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31);\n      m = wasm_v128_and(m, simde_mm_movehl_ps(m, m));\n      m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1)));\n      return wasm_i32x4_extract_lane(m, 0);\n    #else\n      uint_fast32_t r = 0;\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) {\n        r |= ~a_.u32[i] & b_.u32[i];\n      }\n\n      return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testc_ps\n  #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testc_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_testc_pd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63);\n      return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1));\n    #else\n      uint_fast64_t r = 0;\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n        r |= ~a_.u64[i] & b_.u64[i];\n      }\n\n      return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testc_pd\n  #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testc_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testc_ps(a, b);\n  #else\n    uint_fast32_t r = 0;\n    simde__m256_private\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) {\n      r |= ~a_.u32[i] & b_.u32[i];\n    }\n\n    return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testc_ps\n  #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testc_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testc_pd(a, b);\n  #else\n    uint_fast64_t r = 0;\n    simde__m256d_private\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n      r |= ~a_.u64[i] & b_.u64[i];\n    }\n\n    return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testc_pd\n  #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testc_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testc_si256(a, b);\n  #else\n    int_fast32_t r = 0;\n    simde__m256i_private\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {\n      r |= ~a_.i32f[i] & b_.i32f[i];\n    }\n\n    return HEDLEY_STATIC_CAST(int, !r);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testc_si256\n  #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testz_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_testz_ps(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31);\n      m = wasm_v128_and(m, simde_mm_movehl_ps(m, m));\n      m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1)));\n      return wasm_i32x4_extract_lane(m, 0);\n    #else\n      uint_fast32_t r = 0;\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) {\n        r |= a_.u32[i] & b_.u32[i];\n      }\n\n      return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testz_ps\n  #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testz_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_testz_pd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63);\n      return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1));\n    #else\n      uint_fast64_t r = 0;\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n        r |= a_.u64[i] & b_.u64[i];\n      }\n\n      return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testz_pd\n  #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testz_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testz_ps(a, b);\n  #else\n    uint_fast32_t r = 0;\n    simde__m256_private\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) {\n      r |= a_.u32[i] & b_.u32[i];\n    }\n\n    return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testz_ps\n  #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testz_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testz_pd(a, b);\n  #else\n    uint_fast64_t r = 0;\n    simde__m256d_private\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    SIMDE_VECTORIZE_REDUCTION(|:r)\n    for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n      r |= a_.u64[i] & b_.u64[i];\n    }\n\n    return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testz_pd\n  #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testz_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testz_si256(a, b);\n  #else\n    int_fast32_t r = 0;\n    simde__m256i_private\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {\n        r |= a_.i32f[i] & b_.i32f[i];\n      }\n\n      r = !r;\n    #endif\n\n    return HEDLEY_STATIC_CAST(int, r);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testz_si256\n  #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testnzc_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_testnzc_ps(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31);\n      v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31);\n      m  = wasm_v128_or(m,  simde_mm_movehl_ps(m, m));\n      m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2));\n      m  = wasm_v128_or(m,  simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1)));\n      m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1)));\n      return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0);\n    #else\n      uint32_t rz = 0, rc = 0;\n      for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) {\n        rc |= ~a_.u32[i] & b_.u32[i];\n        rz |=  a_.u32[i] & b_.u32[i];\n      }\n\n      return\n        (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) &\n        (rz >> ((sizeof(rz) * CHAR_BIT) - 1));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testnzc_ps\n  #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testnzc_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm_testnzc_pd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63);\n      v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63);\n      return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0)  | wasm_i64x2_extract_lane(m, 1))\n                                   & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)));\n    #else\n      uint64_t rc = 0, rz = 0;\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n        rc |= ~a_.u64[i] & b_.u64[i];\n        rz |=  a_.u64[i] & b_.u64[i];\n      }\n\n      return\n        (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) &\n        (rz >> ((sizeof(rz) * CHAR_BIT) - 1));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testnzc_pd\n  #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testnzc_ps(a, b);\n  #else\n    uint32_t rc = 0, rz = 0;\n    simde__m256_private\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) {\n      rc |= ~a_.u32[i] & b_.u32[i];\n      rz |=  a_.u32[i] & b_.u32[i];\n    }\n\n    return\n      (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) &\n      (rz >> ((sizeof(rz) * CHAR_BIT) - 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testnzc_ps\n  #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testnzc_pd(a, b);\n  #else\n    uint64_t rc = 0, rz = 0;\n    simde__m256d_private\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n      rc |= ~a_.u64[i] & b_.u64[i];\n      rz |=  a_.u64[i] & b_.u64[i];\n    }\n\n    return\n      (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) &\n      (rz >> ((sizeof(rz) * CHAR_BIT) - 1));\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testnzc_pd\n  #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_testnzc_si256(a, b);\n  #else\n    int32_t rc = 0, rz = 0;\n    simde__m256i_private\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {\n      rc |= ~a_.i32f[i] & b_.i32f[i];\n      rz |=  a_.i32f[i] & b_.i32f[i];\n    }\n\n    return !!(rc & rz);\n  #endif\n}\n#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_testnzc_si256\n  #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_AVX_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/avx2.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2018-2020 Evan Nemerson <evan@nemerson.com>\n *   2019-2020 Michael R. Crusoe <crusoe@debian.org>\n *   2020      Himanshi Mathur <himanshi18037@iiitd.ac.in>\n *   2020      Hidayat Khan <huk2209@gmail.com>\n */\n\n#if !defined(SIMDE_X86_AVX2_H)\n#define SIMDE_X86_AVX2_H\n\n#include \"avx.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_abs_epi8 (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_abs_epi8(a);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]);\n      r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_abs_epi8\n  #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_abs_epi16 (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_abs_epi16(a);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]);\n      r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_abs_epi16\n  #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_abs_epi32(simde__m256i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_abs_epi32(a);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]);\n      r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {\n        r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_abs_epi32\n  #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_add_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_add_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 + b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] + b_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_add_epi8\n  #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_add_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_add_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 + b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] + b_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_add_epi16\n  #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_hadd_epi16(a, b);\n  #else\n    return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hadd_epi16\n  #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_add_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_add_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 + b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] + b_.i32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_add_epi32\n  #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_hadd_epi32(a, b);\n  #else\n    return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hadd_epi32\n  #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_add_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_add_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS)\n      r_.i64 = a_.i64 + b_.i64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] + b_.i64[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_add_epi64\n  #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count)\n    SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  if (HEDLEY_UNLIKELY(count > 31))\n    return simde_mm256_setzero_si256();\n\n  for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) {\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {\n      const int srcpos = count + HEDLEY_STATIC_CAST(int, i);\n      if (srcpos > 31) {\n        r_.m128i_private[h].i8[i] = 0;\n      } else if (srcpos > 15) {\n        r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15];\n      } else {\n        r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos];\n      }\n    }\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106)\n#  define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_alignr_epi8(a, b, count) \\\n      simde_mm256_set_m128i( \\\n          simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \\\n          simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_alignr_epi8\n  #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_and_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_and_si256(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] & b_.i64[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_and_si256\n  #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_andnot_si256(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_andnot_si256\n  #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_adds_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_adds_epi8\n  #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_adds_epi16(simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_adds_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_adds_epi16\n  #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_hadds_epi16(a, b);\n  #else\n    return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hadds_epi16\n  #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_adds_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_adds_epu8\n  #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_adds_epu16(simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_adds_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]);\n    }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_adds_epu16\n  #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_avg_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n      r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_avg_epu8\n  #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_avg_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n      r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_avg_epu16\n  #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n    r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8)\n#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128)\n#  define simde_mm_blend_epi32(a, b, imm8) \\\n  simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blend_epi32\n  #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n    r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i];\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560)\n#  define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8))\n#elif defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_blend_epi16(a, b, imm8) \\\n      simde_mm256_set_m128i( \\\n          simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \\\n          simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blend_epi16\n  #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n    r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_blend_epi32(a, b, imm8) \\\n      simde_mm256_set_m128i( \\\n          simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \\\n          simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blend_epi32\n  #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_blendv_epi8(a, b, mask);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b),\n      mask_ = simde__m256i_to_private(mask);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]);\n      r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      __typeof__(mask_.i8) tmp = mask_.i8 >> 7;\n      r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        int8_t tmp = mask_.i8[i] >> 7;\n        r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_blendv_epi8(a, b, imm8)  _mm256_blendv_epi8(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_blendv_epi8\n  #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_broadcastb_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_broadcastb_epi8(a);\n  #else\n    simde__m128i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n      r_.i8[i] = a_.i8[0];\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcastb_epi8\n  #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_broadcastb_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_broadcastb_epi8(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n      r_.i8[i] = a_.i8[0];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastb_epi8\n  #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_broadcastw_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_broadcastw_epi16(a);\n  #else\n    simde__m128i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = a_.i16[0];\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcastw_epi16\n  #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_broadcastw_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_broadcastw_epi16(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = a_.i16[0];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastw_epi16\n  #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_broadcastd_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_broadcastd_epi32(a);\n  #else\n    simde__m128i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[0];\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcastd_epi32\n  #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_broadcastd_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_broadcastd_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[0];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastd_epi32\n  #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_broadcastq_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_broadcastq_epi64(a);\n  #else\n    simde__m128i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.i64[0];\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcastq_epi64\n  #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_broadcastq_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_broadcastq_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_= simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.i64[0];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastq_epi64\n  #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_broadcastss_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_broadcastss_ps(a);\n  #elif defined(SIMDE_X86_SSE_NATIVE)\n    return simde_mm_shuffle_ps(a, a, 0);\n  #else\n    simde__m128_private r_;\n    simde__m128_private a_= simde__m128_to_private(a);\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[0];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcastss_ps\n  #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_broadcastss_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_broadcastss_ps(a);\n  #else\n    simde__m256_private r_;\n    simde__m128_private a_= simde__m128_to_private(a);\n\n    #if defined(SIMDE_X86_AVX_NATIVE)\n      __m128 tmp = _mm_permute_ps(a_.n, 0);\n      r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1);\n    #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector)\n      r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0);\n    #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128)\n      r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[0];\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastss_ps\n  #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_broadcastsd_pd (simde__m128d a) {\n  return simde_mm_movedup_pd(a);\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_broadcastsd_pd\n  #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_broadcastsd_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_broadcastsd_pd(a);\n  #else\n    simde__m256d_private r_;\n    simde__m128d_private a_= simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = a_.f64[0];\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastsd_pd\n  #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_broadcastsi128_si256 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE) && \\\n      (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0))\n    return _mm256_broadcastsi128_si256(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i_private[0] = a_;\n      r_.m128i_private[1] = a_;\n    #else\n      r_.i64[0] = a_.i64[0];\n      r_.i64[1] = a_.i64[1];\n      r_.i64[2] = a_.i64[0];\n      r_.i64[3] = a_.i64[1];\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_broadcastsi128_si256\n  #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)\n  #undef _mm_broadcastsi128_si256\n  #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_bslli_epi128 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n    const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0])));\n\n    SIMDE_VECTORIZE\n    for (int i = 0 ; i < ssize ; i++) {\n      const int e = i - imm8;\n      if(i >= (ssize/2)) {\n        if(e >= (ssize/2) && e < ssize)\n          r_.i8[i] = a_.i8[e];\n        else\n          r_.i8[i] = 0;\n      }\n      else{\n        if(e >= 0 && e < (ssize/2))\n          r_.i8[i] = a_.i8[e];\n        else\n          r_.i8[i] = 0;\n      }\n    }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && \\\n    (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \\\n    SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0)\n  #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_bslli_epi128\n  #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_bsrli_epi128 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n    const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0])));\n\n    SIMDE_VECTORIZE\n    for (int i = 0 ; i < ssize ; i++) {\n      const int e = i + imm8;\n      if(i < (ssize/2)) {\n        if(e >= 0 && e < (ssize/2))\n          r_.i8[i] = a_.i8[e];\n        else\n          r_.i8[i] = 0;\n      }\n      else{\n        if(e >= (ssize/2) && e < ssize)\n          r_.i8[i] = a_.i8[e];\n        else\n          r_.i8[i] = 0;\n      }\n    }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && \\\n    (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \\\n    SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0)\n  #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_bsrli_epi128\n  #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpeq_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpeq_epi8\n  #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpeq_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpeq_epi16\n  #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpeq_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpeq_epi32\n  #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpeq_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpeq_epi64\n  #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpgt_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpgt_epi8\n  #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpgt_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 > b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpgt_epi16\n  #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpgt_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpgt_epi32\n  #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cmpgt_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cmpgt_epi64\n  #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepi8_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepi8_epi16(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi8_epi16\n  #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepi8_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepi8_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi8_epi32\n  #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepi8_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepi8_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.i8[i];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi8_epi64\n  #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepi16_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepi16_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi16_epi32\n  #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepi16_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepi16_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi16_epi64\n  #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepi32_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepi32_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepi32_epi64\n  #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepu8_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepu8_epi16(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.u8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepu8_epi16\n  #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepu8_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepu8_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.u8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepu8_epi32\n  #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepu8_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepu8_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.u8[i];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepu8_epi64\n  #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepu16_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepu16_epi32(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.u16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepu16_epi32\n  #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepu16_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepu16_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.u16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepu16_epi64\n  #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cvtepu32_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_cvtepu32_epi64(a);\n  #else\n    simde__m256i_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.u32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cvtepu32_epi64\n  #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_extract_epi8 (simde__m256i a, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 31){\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  return a_.i8[index];\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))\n  #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extract_epi8\n  #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm256_extract_epi16 (simde__m256i a, const int index)\n    SIMDE_REQUIRE_RANGE(index, 0, 15)  {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  return a_.i16[index];\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && \\\n    (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))\n  #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extract_epi16\n  #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_extracti128_si256 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  return a_.m128i[imm8];\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_extracti128_si256\n  #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int32_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i32[i] = dst;\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i32gather_epi32\n  #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    src_ = simde__m128i_to_private(src),\n    mask_ = simde__m128i_to_private(mask),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int32_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i32[i] = dst;\n    }\n    else {\n      r_.i32[i] = src_.i32[i];\n    }\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i32gather_epi32\n  #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int32_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i32[i] = dst;\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i32gather_epi32\n  #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex),\n    src_ = simde__m256i_to_private(src),\n    mask_ = simde__m256i_to_private(mask),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int32_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i32[i] = dst;\n    }\n    else {\n      r_.i32[i] = src_.i32[i];\n    }\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i32gather_epi32\n  #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int32_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i32[i] = dst;\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i64gather_epi32\n  #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    src_ = simde__m128i_to_private(src),\n    mask_ = simde__m128i_to_private(mask),\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int32_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i32[i] = dst;\n    }\n    else {\n      r_.i32[i] = src_.i32[i];\n    }\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i64gather_epi32\n  #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m128i_private\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int32_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i32[i] = dst;\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i64gather_epi32\n  #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m128i_private\n    src_ = simde__m128i_to_private(src),\n    mask_ = simde__m128i_to_private(mask),\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int32_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i32[i] = dst;\n    }\n    else {\n      r_.i32[i] = src_.i32[i];\n    }\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i64gather_epi32\n  #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int64_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i64[i] = dst;\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n  #else\n    #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i32gather_epi64\n  #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    src_ = simde__m128i_to_private(src),\n    mask_ = simde__m128i_to_private(mask),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int64_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i64[i] = dst;\n    }\n    else {\n      r_.i64[i] = src_.i64[i];\n    }\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n  #else\n    #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i32gather_epi64\n  #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m256i_private\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int64_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i64[i] = dst;\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n  #else\n    #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i32gather_epi64\n  #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    src_ = simde__m256i_to_private(src),\n    mask_ = simde__m256i_to_private(mask),\n    r_;\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int64_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i64[i] = dst;\n    }\n    else {\n      r_.i64[i] = src_.i64[i];\n    }\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n  #else\n    #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i32gather_epi64\n  #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int64_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i64[i] = dst;\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n  #else\n    #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i64gather_epi64\n  #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex),\n    src_ = simde__m128i_to_private(src),\n    mask_ = simde__m128i_to_private(mask),\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int64_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i64[i] = dst;\n    }\n    else {\n      r_.i64[i] = src_.i64[i];\n    }\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n  #else\n    #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i64gather_epi64\n  #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex),\n    r_ = simde__m256i_to_private(simde_mm256_setzero_si256());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    int64_t dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.i64[i] = dst;\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n  #else\n    #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i64gather_epi64\n  #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex),\n    src_ = simde__m256i_to_private(src),\n    mask_ = simde__m256i_to_private(mask),\n    r_ = simde__m256i_to_private(simde_mm256_setzero_si256());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      int64_t dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.i64[i] = dst;\n    }\n    else {\n      r_.i64[i] = src_.i64[i];\n    }\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n  #else\n    #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)\n  #endif\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i64gather_epi64\n  #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128_private\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float32 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f32[i] = dst;\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i32gather_ps\n  #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128_private\n    src_ = simde__m128_to_private(src),\n    mask_ = simde__m128_to_private(mask),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float32 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f32[i] = dst;\n    }\n    else {\n      r_.f32[i] = src_.f32[i];\n    }\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i32gather_ps\n  #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m256_private\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float32 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f32[i] = dst;\n  }\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i32gather_ps\n  #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m256_private\n    src_ = simde__m256_to_private(src),\n    mask_ = simde__m256_to_private(mask),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float32 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f32[i] = dst;\n    }\n    else {\n      r_.f32[i] = src_.f32[i];\n    }\n  }\n\n  return simde__m256_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i32gather_ps\n  #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128_private\n    r_ = simde__m128_to_private(simde_mm_setzero_ps());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float32 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f32[i] = dst;\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i64gather_ps\n  #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128_private\n    src_ = simde__m128_to_private(src),\n    mask_ = simde__m128_to_private(mask),\n    r_ = simde__m128_to_private(simde_mm_setzero_ps());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n     simde_float32 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f32[i] = dst;\n    }\n    else {\n      r_.f32[i] = src_.f32[i];\n    }\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i64gather_ps\n  #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m128_private\n    r_ = simde__m128_to_private(simde_mm_setzero_ps());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float32 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f32[i] = dst;\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i64gather_ps\n  #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m128_private\n    src_ = simde__m128_to_private(src),\n    mask_ = simde__m128_to_private(mask),\n    r_ = simde__m128_to_private(simde_mm_setzero_ps());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i32[i] >> 31) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float32 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f32[i] = dst;\n    }\n    else {\n      r_.f32[i] = src_.f32[i];\n    }\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i64gather_ps\n  #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128d_private\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float64 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f64[i] = dst;\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i32gather_pd\n  #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128d_private\n    src_ = simde__m128d_to_private(src),\n    mask_ = simde__m128d_to_private(mask),\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float64 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f64[i] = dst;\n    }\n    else {\n      r_.f64[i] = src_.f64[i];\n    }\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i32gather_pd\n  #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m256d_private\n    r_;\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float64 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f64[i] = dst;\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i32gather_pd\n  #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256d_private\n    src_ = simde__m256d_to_private(src),\n    mask_ = simde__m256d_to_private(mask),\n    r_;\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float64 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f64[i] = dst;\n    }\n    else {\n      r_.f64[i] = src_.f64[i];\n    }\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i32gather_pd\n  #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128d_private\n    r_ = simde__m128d_to_private(simde_mm_setzero_pd());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float64 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f64[i] = dst;\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_i64gather_pd\n  #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m128i_private\n    vindex_ = simde__m128i_to_private(vindex);\n  simde__m128d_private\n    src_ = simde__m128d_to_private(src),\n    mask_ = simde__m128d_to_private(mask),\n    r_ = simde__m128d_to_private(simde_mm_setzero_pd());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float64 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f64[i] = dst;\n    }\n    else {\n      r_.f64[i] = src_.f64[i];\n    }\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_i64gather_pd\n  #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m256d_private\n    r_ = simde__m256d_to_private(simde_mm256_setzero_pd());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n    simde_float64 dst;\n    simde_memcpy(&dst, src, sizeof(dst));\n    r_.f64[i] = dst;\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_i64gather_pd\n  #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale)\n    SIMDE_REQUIRE_CONSTANT(scale)\n    HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), \"`scale' must be a power of two less than or equal to 8\") {\n  simde__m256i_private\n    vindex_ = simde__m256i_to_private(vindex);\n  simde__m256d_private\n    src_ = simde__m256d_to_private(src),\n    mask_ = simde__m256d_to_private(mask),\n    r_ = simde__m256d_to_private(simde_mm256_setzero_pd());\n  const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {\n    if ((mask_.i64[i] >> 63) & 1) {\n      const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));\n      simde_float64 dst;\n      simde_memcpy(&dst, src1, sizeof(dst));\n      r_.f64[i] = dst;\n    }\n    else {\n      r_.f64[i] = src_.f64[i];\n    }\n  }\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_i64gather_pd\n  #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  simde__m128i_private b_ = simde__m128i_to_private(b);\n\n  a_.m128i_private[ imm8 & 1 ] = b_;\n\n  return simde__m256i_from_private(a_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_inserti128_si256\n  #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_madd_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)\n      SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64);\n      SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64);\n      SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64);\n      SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32);\n      SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32);\n\n      SIMDE_CONVERT_VECTOR_(a32x16, a_.i16);\n      SIMDE_CONVERT_VECTOR_(b32x16, b_.i16);\n      product = a32x16 * b32x16;\n\n      even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14);\n      odd  = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15);\n\n      r_.i32 = even + odd;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) {\n        r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_madd_epi16\n  #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_maddubs_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        const int idx = HEDLEY_STATIC_CAST(int, i) << 1;\n        int32_t ts =\n          (HEDLEY_STATIC_CAST(int16_t, a_.u8[  idx  ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[  idx  ])) +\n          (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));\n        r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maddubs_epi16\n  #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_maskload_epi32(mem_addr, mask);\n  #else\n    simde__m128i_private\n      r_,\n      mask_ = simde__m128i_to_private(mask),\n      mask_shr_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        mask_shr_.i32[i] = mask_.i32[i] >> 31;\n      }\n    #endif\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0);\n      }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskload_epi32\n  #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_maskload_epi32(mem_addr, mask);\n  #else\n    simde__m256i_private\n      mask_ = simde__m256i_to_private(mask),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskload_epi32\n  #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask);\n  #else\n    simde__m128i_private\n      r_,\n      mask_ = simde__m128i_to_private(mask),\n      mask_shr_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) {\n        mask_shr_.i64[i] = mask_.i64[i] >> 63;\n      }\n    #endif\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskload_epi64\n  #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask);\n  #else\n    simde__m256i_private\n      mask_ = simde__m256i_to_private(mask),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskload_epi64\n  #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    _mm_maskstore_epi32(mem_addr, mask, a);\n  #else\n    simde__m128i_private mask_ = simde__m128i_to_private(mask);\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {\n      if (mask_.u32[i] & (UINT32_C(1) << 31))\n        mem_addr[i] = a_.i32[i];\n    }\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskstore_epi32\n  #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    _mm256_maskstore_epi32(mem_addr, mask, a);\n  #else\n    simde__m256i_private mask_ = simde__m256i_to_private(mask);\n    simde__m256i_private a_ = simde__m256i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {\n      if (mask_.u32[i] & (UINT32_C(1) << 31))\n        mem_addr[i] = a_.i32[i];\n    }\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskstore_epi32\n  #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a);\n  #else\n    simde__m128i_private mask_ = simde__m128i_to_private(mask);\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {\n      if (mask_.u64[i] >> 63)\n        mem_addr[i] = a_.i64[i];\n    }\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskstore_epi64\n  #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a);\n  #else\n    simde__m256i_private mask_ = simde__m256i_to_private(mask);\n    simde__m256i_private a_ = simde__m256i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {\n      if (mask_.u64[i] & (UINT64_C(1) << 63))\n        mem_addr[i] = a_.i64[i];\n    }\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskstore_epi64\n  #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_max_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI)\n    return _mm256_max_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_epi8\n  #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_max_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_max_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_epu8\n  #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_max_epu16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_max_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_epu16\n  #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_max_epu32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_max_epu32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_epu32\n  #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_max_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_max_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_epi16\n  #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_max_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_max_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_max_epi32\n  #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_min_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI)\n    return _mm256_min_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_epi8\n  #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_min_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_min_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_epi16\n  #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_min_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_min_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_epi32\n  #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_min_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_min_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_epu8\n  #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_min_epu16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_min_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_epu16\n  #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_min_epu32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_min_epu32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_min_epu32\n  #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm256_movemask_epi8 (simde__m256i a) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_movemask_epi8(a);\n  #else\n    simde__m256i_private a_ = simde__m256i_to_private(a);\n    uint32_t r = 0;\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) {\n        r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i);\n      }\n    #else\n      r = 0;\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {\n        r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i);\n      }\n    #endif\n\n    return HEDLEY_STATIC_CAST(int32_t, r);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_movemask_epi8\n  #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  const int a_offset1 = imm8 & 4;\n  const int b_offset1 = (imm8 & 3) << 2;\n  const int a_offset2 = (imm8 >> 3) & 4;\n  const int b_offset2 = ((imm8 >> 3) & 3) << 2;\n\n  #if defined(simde_math_abs)\n    const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2;\n    for (int i = 0 ; i < halfway_point ; i++) {\n      r_.u16[i] =\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) +\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) +\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) +\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3])));\n      r_.u16[halfway_point + i] =\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) +\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) +\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) +\n        HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3])));\n    }\n  #else\n    HEDLEY_UNREACHABLE();\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0)\n  #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n  #define simde_mm256_mpsadbw_epu8(a, b, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \\\n       simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mpsadbw_epu8\n  #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mul_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] =\n          HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) *\n          HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n#  define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mul_epu32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n#  define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mulhi_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16));\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n#  define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mulhi_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n      r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n#  define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mulhrs_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n#  define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mullo_epi16(a, b);\n  #else\n    simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mullo_epi16\n  #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_mullo_epi32(a, b);\n  #else\n    simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mullo_epi32\n  #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = a_.u32 * b_.u32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] * b_.u32[i];\n      }\n    #endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_or_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_or_si256(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f | b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] | b_.i32f[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_or_si256\n  #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_packs_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2;\n      const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4;\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < quarter_point ; i++) {\n        r_.i8[i]     = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i]));\n        r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i]));\n        r_.i8[halfway_point + i]     = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i]));\n        r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i]));\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_packs_epi16\n  #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_packs_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      v_[] = {\n        simde__m256i_to_private(a),\n        simde__m256i_to_private(b)\n      };\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]);\n      r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)];\n        r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v));\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_packs_epi32\n  #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_packus_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2;\n      const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4;\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < quarter_point ; i++) {\n        r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]));\n        r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]));\n        r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i]));\n        r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i]));\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_packus_epi16\n  #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_packus_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]);\n    #else\n      const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2;\n      const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4;\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < quarter_point ; i++) {\n        r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i]));\n        r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i]));\n        r_.u16[halfway_point + i]     = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i]));\n        r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i]));\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_packus_epi32\n  #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8     ) & 1] : a_.m128i_private[(imm8     ) & 1]);\n  r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]);\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute2x128_si256\n  #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8       ) & 1)+2] : a_.i64[(imm8       ) & 1];\n  r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2  ) & 1)+2] : a_.i64[(imm8 >> 2  ) & 1];\n  r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4  ) & 1)+2] : a_.i64[(imm8 >> 4  ) & 1];\n  r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6  ) & 1)+2] : a_.i64[(imm8 >> 6  ) & 1];\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute4x64_epi64\n  #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_permute4x64_pd (simde__m256d a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a);\n\n  r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8       ) & 1)+2] : a_.f64[(imm8       ) & 1];\n  r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2  ) & 1)+2] : a_.f64[(imm8 >> 2  ) & 1];\n  r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4  ) & 1)+2] : a_.f64[(imm8 >> 4  ) & 1];\n  r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6  ) & 1)+2] : a_.f64[(imm8 >> 6  ) & 1];\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permute4x64_pd\n  #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_permutevar8x32_epi32(a, idx);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      idx_ = simde__m256i_to_private(idx);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[idx_.i32[i] & 7];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permutevar8x32_epi32\n  #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n      return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx));\n    #else\n      return _mm256_permutevar8x32_ps(a, idx);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n    simde__m256i_private\n      idx_ = simde__m256i_to_private(idx);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = a_.f32[idx_.i32[i] & 7];\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_permutevar8x32_ps\n  #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sad_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]);\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        uint16_t tmp = 0;\n        SIMDE_VECTORIZE_REDUCTION(+:tmp)\n        for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) {\n          const size_t e = j + (i * 8);\n          tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]);\n        }\n        r_.i64[i] = tmp;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sad_epu8\n  #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_shuffle_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) {\n        r_.u8[  i   ] = (b_.u8[  i   ] & 0x80) ? 0 : a_.u8[(b_.u8[  i   ] & 0x0f)     ];\n        r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_shuffle_epi8\n  #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_shuffle_epi32 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {\n    r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];\n  }\n  for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {\n    r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4];\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI)\n#  define simde_mm256_shuffle_epi32(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n       simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n#  define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \\\n      const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \\\n      simde__m256i_from_private((simde__m256i_private) { .i32 = \\\n          SIMDE_SHUFFLE_VECTOR_(32, 32, \\\n                                (simde_tmp_a_).i32, \\\n                                (simde_tmp_a_).i32, \\\n                                ((imm8)     ) & 3, \\\n                                ((imm8) >> 2) & 3, \\\n                                ((imm8) >> 4) & 3, \\\n                                ((imm8) >> 6) & 3, \\\n                                (((imm8)     ) & 3) + 4, \\\n                                (((imm8) >> 2) & 3) + 4, \\\n                                (((imm8) >> 4) & 3) + 4, \\\n                                (((imm8) >> 6) & 3) + 4) }); }))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_shuffle_epi32\n  #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8)\n#endif\n\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_shufflehi_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n       simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n#  define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \\\n      const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \\\n      simde__m256i_from_private((simde__m256i_private) { .i16 = \\\n        SIMDE_SHUFFLE_VECTOR_(16, 32, \\\n          (simde_tmp_a_).i16, \\\n          (simde_tmp_a_).i16, \\\n          0, 1, 2, 3, \\\n          (((imm8)     ) & 3) + 4, \\\n          (((imm8) >> 2) & 3) + 4, \\\n          (((imm8) >> 4) & 3) + 4, \\\n          (((imm8) >> 6) & 3) + 4, \\\n          8, 9, 10, 11, \\\n          ((((imm8)     ) & 3) + 8 + 4), \\\n          ((((imm8) >> 2) & 3) + 8 + 4), \\\n          ((((imm8) >> 4) & 3) + 8 + 4), \\\n          ((((imm8) >> 6) & 3) + 8 + 4) \\\n          ) }); }))\n#else\n#  define simde_mm256_shufflehi_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \\\n       simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_shufflehi_epi16\n  #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8)\n#endif\n\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_shufflelo_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n       simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n#  define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \\\n      const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \\\n      simde__m256i_from_private((simde__m256i_private) { .i16 = \\\n        SIMDE_SHUFFLE_VECTOR_(16, 32, \\\n          (simde_tmp_a_).i16, \\\n          (simde_tmp_a_).i16, \\\n          (((imm8)     ) & 3), \\\n          (((imm8) >> 2) & 3), \\\n          (((imm8) >> 4) & 3), \\\n          (((imm8) >> 6) & 3), \\\n          4, 5, 6, 7, \\\n          ((((imm8)     ) & 3) + 8), \\\n          ((((imm8) >> 2) & 3) + 8), \\\n          ((((imm8) >> 4) & 3) + 8), \\\n          ((((imm8) >> 6) & 3) + 8), \\\n          12, 13, 14, 15) }); }))\n#else\n#  define simde_mm256_shufflelo_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \\\n       simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_shufflelo_epi16\n  #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sign_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n      r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sign_epi8\n  #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sign_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sign_epi16\n  #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sign_epi32(simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sign_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {\n      r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sign_epi32\n  #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sll_epi16(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);\n      if (shift > 15)\n        return simde_mm256_setzero_si256();\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift));\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sll_epi16\n  #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sll_epi32(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);\n      if (shift > 31)\n        return simde_mm256_setzero_si256();\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift));\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sll_epi32\n  #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sll_epi64(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);\n      if (shift > 63)\n        return simde_mm256_setzero_si256();\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n          r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift));\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sll_epi64\n  #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_slli_epi16 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  /* Note: There is no consistency in how compilers handle values outside of\n     the expected range, hence the discrepancy between what we allow and what\n     Intel specifies.  Some compilers will return 0, others seem to just mask\n     off everything outside of the range. */\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8));\n    for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) {\n      r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv);\n    }\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff));\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_slli_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_slli_epi16\n  #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_slli_epi32 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8));\n    for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) {\n      r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv);\n    }\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[i] << (imm8 & 0xff);\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_slli_epi32(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_slli_epi32\n  #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_slli_epi64 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n  r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8);\n#else\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n    r_.i64[i] = a_.i64[i] << (imm8 & 0xff);\n  }\n#endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_slli_epi64(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_slli_epi64\n  #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_slli_si256 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) {\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {\n      const int e = HEDLEY_STATIC_CAST(int, i) - imm8;\n      r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0;\n    }\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI)\n#  define simde_mm256_slli_si256(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm256_slli_si256(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n       simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_slli_si256\n  #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b),\n    r_;\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32));\n    r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32)));\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sllv_epi32\n  #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n  #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]);\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sllv_epi32\n  #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b),\n    r_;\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64));\n    r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64)));\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n      r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sllv_epi64\n  #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n  #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]);\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n      r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sllv_epi64\n  #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sra_epi16(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);\n\n      if (shift > 15) shift = 15;\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.i16[i] = a_.i16[i] >> shift;\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sra_epi16\n  #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sra_epi32(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);\n\n      if (shift > 31) shift = 31;\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.i32[i] = a_.i32[i] >> shift;\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sra_epi32\n  #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srai_epi16 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n  unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8);\n\n  if (shift > 15) shift = 15;\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = a_.i16[i] >> shift;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_srai_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srai_epi16\n  #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srai_epi32 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n  unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8);\n\n  if (shift > 31) shift = 31;\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[i] >> shift;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_srai_epi32(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srai_epi32\n  #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srav_epi32 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm_srav_epi32(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31)));\n      r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]);\n        r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_srav_epi32\n  #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_srav_epi32(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      count_ = simde__m256i_to_private(count);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]);\n      r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]);\n        if (shift > 31) shift = 31;\n        r_.i32[i] = a_.i32[i] >> shift;\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srav_epi32\n  #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_srl_epi16(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0]));\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.u16[i] = a_.u16[i] >> (shift);\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srl_epi16\n  #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_srl_epi32(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0]));\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.u32[i] = a_.u32[i] >> (shift);\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srl_epi32\n  #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_srl_epi64(a, count);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count);\n      r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count);\n    #else\n      simde__m128i_private\n        count_ = simde__m128i_to_private(count);\n\n      uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0]));\n\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n          r_.u64[i] = a_.u64[i] >> (shift);\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srl_epi64\n  #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srli_epi16 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  if (imm8 > 15)\n    return simde_mm256_setzero_si256();\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8));\n    for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) {\n      r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv);\n    }\n  #else\n    if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) {\n      simde_memset(&r_, 0, sizeof(r_));\n    } else {\n      #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n        r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n          r_.u16[i] = a_.u16[i] >> imm8;\n        }\n      #endif\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_srli_epi16(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srli_epi16\n  #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srli_epi32 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8));\n    for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) {\n      r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv);\n    }\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i] >> imm8;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_srli_epi32(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srli_epi32\n  #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srli_epi64 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n  r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8);\n#else\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n    r_.u64[i] = a_.u64[i] >> imm8;\n  }\n#endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n#  define simde_mm256_srli_epi64(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srli_epi64\n  #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srli_si256 (simde__m256i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a);\n\n  for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) {\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {\n      const int e = imm8 + HEDLEY_STATIC_CAST(int, i);\n      r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0;\n    }\n  }\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n#  define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8)\n#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI)\n#  define simde_mm256_srli_si256(a, imm8) \\\n     simde_mm256_set_m128i( \\\n         simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n         simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm256_srli_si256(a, imm8) \\\n     simde_mm256_set_m128i( \\\n       simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \\\n       simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srli_si256\n  #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b),\n    r_;\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_srlv_epi32\n  #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srlv_epi32\n  #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b),\n    r_;\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n      r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_srlv_epi64\n  #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n      r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0;\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_AVX2_NATIVE)\n  #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b)\n#endif\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_srlv_epi64\n  #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_stream_load_si256 (const simde__m256i* mem_addr) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr));\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT)\n    return __builtin_nontemporal_load(mem_addr);\n  #else\n    simde__m256i r;\n    simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n#  define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sub_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 - b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] - b_.i8[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sub_epi8\n  #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sub_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 - b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] - b_.i16[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sub_epi16\n  #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_hsub_epi16(a, b);\n  #else\n    return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hsub_epi16\n  #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sub_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 - b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] - b_.i32[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sub_epi32\n  #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_hsub_epi32(a, b);\n  #else\n    return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hsub_epi32\n  #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_sub_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 - b_.i64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] - b_.i64[i];\n      }\n    #endif\n\n  return simde__m256i_from_private(r_);\n#endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sub_epi64\n  #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) {\n  simde__m256i_private\n    r_,\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    r_.u32 = a_.u32 - b_.u32;\n  #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n    r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]);\n    r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i] - b_.u32[i];\n    }\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_subs_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_subs_epi8\n  #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_subs_epi16(simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_subs_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_subs_epi16\n  #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_hsubs_epi16(a, b);\n  #else\n    return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hsubs_epi16\n  #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_subs_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_subs_epu8\n  #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_subs_epu16(simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_subs_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_subs_epu16\n  #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_x_mm256_test_all_ones (simde__m256i a) {\n  simde__m256i_private a_ = simde__m256i_to_private(a);\n  int r;\n  int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0);\n\n  SIMDE_VECTORIZE_REDUCTION(&:r_)\n  for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {\n    r_ &= a_.i32f[i];\n  }\n\n  r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0));\n\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpacklo_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8,\n           0, 32,  1, 33,  2, 34,  3, 35,\n           4, 36,  5, 37,  6, 38,  7, 39,\n          16, 48, 17, 49, 18, 50, 19, 51,\n          20, 52, 21, 53, 22, 54, 23, 55);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) {\n        r_.i8[2 * i] = a_.i8[i + ~(~i | 7)];\n        r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpacklo_epi8\n  #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpacklo_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16,\n        0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) {\n        r_.i16[2 * i] = a_.i16[i + ~(~i | 3)];\n        r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpacklo_epi16\n  #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpacklo_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32,\n                                    0, 8, 1, 9, 4, 12, 5, 13);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) {\n        r_.i32[2 * i] = a_.i32[i + ~(~i | 1)];\n        r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpacklo_epi32\n  #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpacklo_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) {\n        r_.i64[2 * i] = a_.i64[2 * i];\n        r_.i64[2 * i + 1] = b_.i64[2 * i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpacklo_epi64\n  #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpackhi_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8,\n           8, 40,  9, 41, 10, 42, 11, 43,\n          12, 44, 13, 45, 14, 46, 15, 47,\n          24, 56, 25, 57, 26, 58, 27, 59,\n          28, 60, 29, 61, 30, 62, 31, 63);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) {\n        r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)];\n        r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpackhi_epi8\n  #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpackhi_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16,\n         4, 20,  5, 21,  6, 22,  7, 23,\n        12, 28, 13, 29, 14, 30, 15, 31);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) {\n        r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)];\n        r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpackhi_epi16\n  #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpackhi_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32,\n                                    2, 10, 3, 11, 6, 14, 7, 15);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) {\n        r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)];\n        r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpackhi_epi32\n  #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_unpackhi_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) {\n        r_.i64[2 * i] = a_.i64[2 * i + 1];\n        r_.i64[2 * i + 1] = b_.i64[2 * i + 1];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_unpackhi_epi64\n  #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_xor_si256 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_xor_si256(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)\n      r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);\n      r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] ^ b_.i64[i];\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_xor_si256\n  #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_AVX2_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/clmul.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020      Evan Nemerson <evan@nemerson.com>\n *   2016      Thomas Pornin <pornin@bolet.org>\n */\n\n/* The portable version is based on the implementation in BearSSL,\n * which is MIT licensed, constant-time / branch-free, and documented\n * at https://www.bearssl.org/constanttime.html (specifically, we use\n * the implementation from ghash_ctmul64.c). */\n\n#if !defined(SIMDE_X86_CLMUL_H)\n#define SIMDE_X86_CLMUL_H\n\n#include \"avx512/set.h\"\n#include \"avx512/setzero.h\"\n\n#if !defined(SIMDE_X86_PCLMUL_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)\n#  define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint64_t\nsimde_x_clmul_u64(uint64_t x, uint64_t y) {\n  uint64_t x0, x1, x2, x3;\n  uint64_t y0, y1, y2, y3;\n  uint64_t z0, z1, z2, z3;\n\n  x0 = x & UINT64_C(0x1111111111111111);\n  x1 = x & UINT64_C(0x2222222222222222);\n  x2 = x & UINT64_C(0x4444444444444444);\n  x3 = x & UINT64_C(0x8888888888888888);\n  y0 = y & UINT64_C(0x1111111111111111);\n  y1 = y & UINT64_C(0x2222222222222222);\n  y2 = y & UINT64_C(0x4444444444444444);\n  y3 = y & UINT64_C(0x8888888888888888);\n\n  z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);\n  z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);\n  z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);\n  z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);\n\n  z0 &= UINT64_C(0x1111111111111111);\n  z1 &= UINT64_C(0x2222222222222222);\n  z2 &= UINT64_C(0x4444444444444444);\n  z3 &= UINT64_C(0x8888888888888888);\n\n  return z0 | z1 | z2 | z3;\n}\n\nstatic uint64_t\nsimde_x_bitreverse_u64(uint64_t v) {\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    uint8x8_t bytes = vreinterpret_u8_u64(vmov_n_u64(v));\n    bytes = vrbit_u8(bytes);\n    bytes = vrev64_u8(bytes);\n    return vget_lane_u64(vreinterpret_u64_u8(bytes), 0);\n  #elif defined(SIMDE_X86_GFNI_NATIVE)\n    /* I don't think there is (or likely will ever be) a CPU with GFNI\n     * but not pclmulq, but this may be useful for things other than\n     * _mm_clmulepi64_si128. */\n    __m128i vec = _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, v));\n\n    /* Reverse bits within each byte */\n    vec = _mm_gf2p8affine_epi64_epi8(vec, _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0);\n\n    /* Reverse bytes */\n    #if defined(SIMDE_X86_SSSE3_NATIVE)\n      vec = _mm_shuffle_epi8(vec, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));\n    #else\n      vec = _mm_or_si128(_mm_slli_epi16(vec, 8), _mm_srli_epi16(vec, 8));\n      vec = _mm_shufflelo_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3));\n      vec = _mm_shufflehi_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3));\n    #endif\n\n    return HEDLEY_STATIC_CAST(uint64_t, _mm_cvtsi128_si64(vec));\n  #elif HEDLEY_HAS_BUILTIN(__builtin_bitreverse64)\n    return __builtin_bitreverse64(v);\n  #else\n    v = ((v >>  1) & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) <<  1);\n    v = ((v >>  2) & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) <<  2);\n    v = ((v >>  4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) <<  4);\n    v = ((v >>  8) & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) <<  8);\n    v = ((v >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((v & UINT64_C(0x0000FFFF0000FFFF)) << 16);\n    return (v >> 32) | (v << 32);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT(imm8) {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b),\n    r_;\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      switch (imm8 & 0x11) {\n        case 0x00:\n          b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0);\n          a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0);\n          break;\n        case 0x01:\n          b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0);\n          a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1);\n          break;\n        case 0x10:\n          b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1);\n          a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0);\n          break;\n        case 0x11:\n          b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1);\n          a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1);\n          break;\n      }\n    #else\n      {\n        const uint64_t A = a_.u64[(imm8     ) & 1];\n        const uint64_t B = b_.u64[(imm8 >> 4) & 1];\n\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n          a_.u64[i] = A;\n          b_.u64[i] = B;\n        }\n      }\n    #endif\n\n    simde__m128i_private reversed_;\n    {\n      #if defined(SIMDE_SHUFFLE_VECTOR_)\n        reversed_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, b_.u64, 1, 3);\n      #else\n        reversed_.u64[0] = a_.u64[1];\n        reversed_.u64[1] = b_.u64[1];\n      #endif\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) {\n        reversed_.u64[i] = simde_x_bitreverse_u64(reversed_.u64[i]);\n      }\n    }\n\n    #if defined(SIMDE_SHUFFLE_VECTOR_)\n      a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, reversed_.u64, 0, 2);\n      b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, reversed_.u64, 1, 3);\n    #else\n      a_.u64[1] = reversed_.u64[0];\n      b_.u64[1] = reversed_.u64[1];\n    #endif\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) {\n      r_.u64[i] = simde_x_clmul_u64(a_.u64[i], b_.u64[i]);\n    }\n\n    r_.u64[1] = simde_x_bitreverse_u64(r_.u64[1]) >> 1;\n  #else\n    r_.u64[0] =                        simde_x_clmul_u64(                       a_.u64[imm8 & 1],                         b_.u64[(imm8 >> 4) & 1]);\n    r_.u64[1] = simde_x_bitreverse_u64(simde_x_clmul_u64(simde_x_bitreverse_u64(a_.u64[imm8 & 1]), simde_x_bitreverse_u64(b_.u64[(imm8 >> 4) & 1]))) >> 1;\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_PCLMUL_NATIVE)\n  #if defined(HEDLEY_MCST_LCC_VERSION)\n    #define simde_mm_clmulepi64_si128(a, b, imm8) (__extension__ ({ \\\n      SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \\\n      _mm_clmulepi64_si128((a), (b), (imm8)); \\\n      SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \\\n    }))\n  #else\n    #define simde_mm_clmulepi64_si128(a, b, imm8) _mm_clmulepi64_si128(a, b, imm8)\n  #endif\n#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_AES) && !defined(__clang__)\n  #define simde_mm_clmulepi64_si128(a, b, imm8) \\\n    simde__m128i_from_neon_u64( \\\n      vreinterpretq_u64_p128( \\\n        vmull_p64( \\\n          vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(a)), (imm8     ) & 1), \\\n          vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(b)), (imm8 >> 4) & 1) \\\n        ) \\\n      ) \\\n    )\n#endif\n#if defined(SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES)\n  #undef _mm_clmulepi64_si128\n  #define _mm_clmulepi64_si128(a, b, imm8) simde_mm_clmulepi64_si128(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_clmulepi64_epi128 (simde__m256i a, simde__m256i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT(imm8) {\n  simde__m256i_private\n    a_ = simde__m256i_to_private(a),\n    b_ = simde__m256i_to_private(b),\n    r_;\n\n  simde__m128i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_;\n\n  #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION)\n    switch (imm8 & 0x01) {\n      case 0x00:\n        a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2);\n        break;\n      case 0x01:\n        a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3);\n        break;\n    }\n    switch (imm8 & 0x10) {\n      case 0x00:\n        b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2);\n        break;\n      case 0x10:\n        b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3);\n        break;\n    }\n  #else\n    a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0];\n    a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2];\n    b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0];\n    b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2];\n  #endif\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) {\n    a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]);\n    b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]);\n\n    r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]);\n    r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]);\n\n    r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1;\n  }\n\n  #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION)\n    r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 2, 1, 3);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_ = simde__m256i_to_private(simde_mm256_set_m128i(simde__m128i_from_private(r_hi_), simde__m128i_from_private(r_lo_)));\n    r_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 32, r_.u64, r_.u64, 0, 2, 1, 3);\n  #else\n    r_.u64[0] = r_lo_.u64[0];\n    r_.u64[1] = r_hi_.u64[0];\n    r_.u64[2] = r_lo_.u64[1];\n    r_.u64[3] = r_hi_.u64[1];\n  #endif\n\n  return simde__m256i_from_private(r_);\n}\n#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm256_clmulepi64_epi128(a, b, imm8) _mm256_clmulepi64_epi128(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_clmulepi64_epi128\n  #define _mm256_clmulepi64_epi128(a, b, imm8) simde_mm256_clmulepi64_epi128(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_clmulepi64_epi128 (simde__m512i a, simde__m512i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT(imm8) {\n  simde__m512i_private\n    a_ = simde__m512i_to_private(a),\n    b_ = simde__m512i_to_private(b),\n    r_;\n\n  #if defined(HEDLEY_MSVC_VERSION)\n    r_ = simde__m512i_to_private(simde_mm512_setzero_si512());\n  #endif\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n    switch (imm8 & 0x11) {\n      case 0x00:\n        r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x00);\n        r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x00);\n        break;\n      case 0x01:\n        r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x01);\n        r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x01);\n        break;\n      case 0x10:\n        r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x10);\n        r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x10);\n        break;\n      case 0x11:\n        r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x11);\n        r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x11);\n        break;\n    }\n  #else\n    simde__m256i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_;\n\n    #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION)\n      switch (imm8 & 0x01) {\n        case 0x00:\n          a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2, 4, 6);\n          break;\n        case 0x01:\n          a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3, 5, 7);\n          break;\n      }\n      switch (imm8 & 0x10) {\n        case 0x00:\n          b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2, 4, 6);\n          break;\n        case 0x10:\n          b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3, 5, 7);\n          break;\n      }\n    #else\n      a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0];\n      a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2];\n      a_lo_.u64[2] = a_.u64[((imm8 >> 0) & 1) + 4];\n      a_lo_.u64[3] = a_.u64[((imm8 >> 0) & 1) + 6];\n      b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0];\n      b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2];\n      b_lo_.u64[2] = b_.u64[((imm8 >> 4) & 1) + 4];\n      b_lo_.u64[3] = b_.u64[((imm8 >> 4) & 1) + 6];\n    #endif\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) {\n      a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]);\n      b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]);\n\n      r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]);\n      r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]);\n\n      r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1;\n    }\n\n    #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION)\n      r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 4, 1, 5, 2, 6, 3, 7);\n    #else\n      r_.u64[0] = r_lo_.u64[0];\n      r_.u64[1] = r_hi_.u64[0];\n      r_.u64[2] = r_lo_.u64[1];\n      r_.u64[3] = r_hi_.u64[1];\n      r_.u64[4] = r_lo_.u64[2];\n      r_.u64[5] = r_hi_.u64[2];\n      r_.u64[6] = r_lo_.u64[3];\n      r_.u64[7] = r_hi_.u64[3];\n    #endif\n  #endif\n\n  return simde__m512i_from_private(r_);\n}\n#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_clmulepi64_epi128(a, b, imm8) _mm512_clmulepi64_epi128(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_clmulepi64_epi128\n  #define _mm512_clmulepi64_epi128(a, b, imm8) simde_mm512_clmulepi64_epi128(a, b, imm8)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_CLMUL_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/f16c.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2021      Evan Nemerson <evan@nemerson.com>\n */\n\n#include \"../simde-common.h\"\n#include \"../simde-math.h\"\n#include \"../simde-f16.h\"\n\n#if !defined(SIMDE_X86_F16C_H)\n#define SIMDE_X86_F16C_H\n\n#include \"avx.h\"\n\n#if !defined(SIMDE_X86_PF16C_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)\n#  define SIMDE_X86_PF16C_ENABLE_NATIVE_ALIASES\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtps_ph(simde__m128 a, const int imm8) {\n  simde__m128_private a_ = simde__m128_to_private(a);\n  simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n\n  HEDLEY_STATIC_CAST(void, imm8);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n    r_.neon_f16 = vcombine_f16(vcvt_f16_f32(a_.neon_f32), vdup_n_f16(SIMDE_FLOAT16_C(0.0)));\n  #elif defined(SIMDE_FLOAT16_VECTOR)\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n      r_.f16[i] = simde_float16_from_float32(a_.f32[i]);\n    }\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n      r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i]));\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_F16C_NATIVE)\n  #define simde_mm_cvtps_ph(a, imm8) _mm_cvtps_ph(a, imm8)\n#endif\n#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtps_ph(a, sae) simde_mm_cvtps_ph(a, sae)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtph_ps(simde__m128i a) {\n  #if defined(SIMDE_X86_F16C_NATIVE)\n    return _mm_cvtph_ps(a);\n  #else\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    simde__m128_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n      r_.neon_f32 = vcvt_f32_f16(vget_low_f16(a_.neon_f16));\n    #elif defined(SIMDE_FLOAT16_VECTOR)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        r_.f32[i] = simde_float16_to_float32(a_.f16[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i]));\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtph_ps(a) simde_mm_cvtph_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm256_cvtps_ph(simde__m256 a, const int imm8) {\n  simde__m256_private a_ = simde__m256_to_private(a);\n  simde__m128i_private r_;\n\n  HEDLEY_STATIC_CAST(void, imm8);\n\n  #if defined(SIMDE_FLOAT16_VECTOR)\n    SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        r_.f16[i] = simde_float16_from_float32(a_.f32[i]);\n      }\n  #else\n    SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i]));\n      }\n  #endif\n\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_F16C_NATIVE)\n  #define simde_mm256_cvtps_ph(a, imm8) _mm256_cvtps_ph(a, imm8)\n#endif\n#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES)\n  #define _mm256_cvtps_ph(a, imm8) simde_mm256_cvtps_ph(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cvtph_ps(simde__m128i a) {\n  #if defined(SIMDE_X86_F16C_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cvtph_ps(a);\n  #elif defined(SIMDE_X86_F16C_NATIVE)\n    return _mm256_setr_m128(\n      _mm_cvtph_ps(a),\n      _mm_cvtph_ps(_mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(a), 0xee)))\n    );\n  #else\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    simde__m256_private r_;\n\n    #if defined(SIMDE_FLOAT16_VECTOR)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_float16_to_float32(a_.f16[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i]));\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES)\n  #define _mm256_cvtph_ps(a) simde_mm256_cvtph_ps(a)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_F16C_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/fma.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2019      Evan Nemerson <evan@nemerson.com>\n */\n\n#if !defined(SIMDE_X86_FMA_H)\n#define SIMDE_X86_FMA_H\n\n#include \"avx.h\"\n\n#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)\n#  define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmadd_pd(a, b, c);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      c_ = simde__m128d_to_private(c),\n      r_;\n\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64);\n    #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA))\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmadd_pd\n  #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmadd_pd(a, b, c);\n  #else\n    return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmadd_pd\n  #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmadd_ps(a, b, c);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      c_ = simde__m128_to_private(c),\n      r_;\n\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)\n      r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32);\n    #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF))\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmadd_ps\n  #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmadd_ps(a, b, c);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    simde__m256_private\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b),\n      c_ = simde__m256_to_private(c),\n      r_;\n\n    for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n      r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]);\n    }\n\n    return simde__m256_from_private(r_);\n  #else\n    return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmadd_ps\n  #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fmadd_sd(a, b, c);\n  #else\n    return simde_mm_add_sd(simde_mm_mul_sd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmadd_sd\n  #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fmadd_ss(a, b, c);\n  #else\n    return simde_mm_add_ss(simde_mm_mul_ss(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmadd_ss\n  #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmaddsub_pd(a, b, c);\n  #else\n    return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmaddsub_pd\n  #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmaddsub_pd(a, b, c);\n  #else\n    return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmaddsub_pd\n  #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmaddsub_ps(a, b, c);\n  #else\n    return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmaddsub_ps\n  #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmaddsub_ps(a, b, c);\n  #else\n    return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmaddsub_ps\n  #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmsub_pd(a, b, c);\n  #else\n    return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmsub_pd\n  #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmsub_pd(a, b, c);\n  #else\n    return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmsub_pd\n  #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmsub_ps(a, b, c);\n  #else\n    return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmsub_ps\n  #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmsub_ps(a, b, c);\n  #else\n    return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmsub_ps\n  #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fmsub_sd(a, b, c);\n  #else\n    return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmsub_sd\n  #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fmsub_ss(a, b, c);\n  #else\n    return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmsub_ss\n  #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmsubadd_pd(a, b, c);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      c_ = simde__m128d_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {\n      r_.f64[  i  ] = (a_.f64[  i  ] * b_.f64[  i  ]) + c_.f64[  i  ];\n      r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmsubadd_pd\n  #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmsubadd_pd(a, b, c);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b),\n      c_ = simde__m256d_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {\n      r_.f64[  i  ] = (a_.f64[  i  ] * b_.f64[  i  ]) + c_.f64[  i  ];\n      r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmsubadd_pd\n  #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fmsubadd_ps(a, b, c);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      c_ = simde__m128_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n      r_.f32[  i  ] = (a_.f32[  i  ] * b_.f32[  i  ]) + c_.f32[  i  ];\n      r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fmsubadd_ps\n  #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fmsubadd_ps(a, b, c);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b),\n      c_ = simde__m256_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n      r_.f32[  i  ] = (a_.f32[  i  ] * b_.f32[  i  ]) + c_.f32[  i  ];\n      r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fmsubadd_ps\n  #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fnmadd_pd(a, b, c);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      c_ = simde__m128d_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmadd_pd\n  #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fnmadd_pd(a, b, c);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b),\n      c_ = simde__m256d_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fnmadd_pd\n  #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fnmadd_ps(a, b, c);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      c_ = simde__m128_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)\n      r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmadd_ps\n  #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fnmadd_ps(a, b, c);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b),\n      c_ = simde__m256_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fnmadd_ps\n  #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fnmadd_sd(a, b, c);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      c_ = simde__m128d_to_private(c);\n\n    r_ = a_;\n    r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmadd_sd\n  #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fnmadd_ss(a, b, c);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      c_ = simde__m128_to_private(c);\n\n    r_ = a_;\n    r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0];\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmadd_ss\n  #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fnmsub_pd(a, b, c);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      c_ = simde__m128d_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmsub_pd\n  #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fnmsub_pd(a, b, c);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b),\n      c_ = simde__m256d_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fnmsub_pd\n  #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm_fnmsub_ps(a, b, c);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      c_ = simde__m128_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmsub_ps\n  #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE)\n    return _mm256_fnmsub_ps(a, b, c);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b),\n      c_ = simde__m256_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_fnmsub_ps\n  #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fnmsub_sd(a, b, c);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      c_ = simde__m128d_to_private(c);\n\n    r_ = a_;\n    r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmsub_sd\n  #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {\n  #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT)\n    return _mm_fnmsub_ss(a, b, c);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      c_ = simde__m128_to_private(c);\n\n    r_ = simde__m128_to_private(a);\n    r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0];\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)\n  #undef _mm_fnmsub_ss\n  #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_FMA_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/gfni.h",
    "content": "/* Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020-2021 Christopher Moore <moore@free.fr>\n *   2020      Evan Nemerson <evan@nemerson.com>\n */\n\n#if !defined(SIMDE_X86_GFNI_H)\n#define SIMDE_X86_GFNI_H\n\n#include \"avx512/add.h\"\n#include \"avx512/and.h\"\n#include \"avx512/broadcast.h\"\n#include \"avx512/cmpeq.h\"\n#include \"avx512/cmpge.h\"\n#include \"avx512/cmpgt.h\"\n#include \"avx512/cmplt.h\"\n#include \"avx512/extract.h\"\n#include \"avx512/insert.h\"\n#include \"avx512/kshift.h\"\n#include \"avx512/mov.h\"\n#include \"avx512/mov_mask.h\"\n#include \"avx512/permutex2var.h\"\n#include \"avx512/set.h\"\n#include \"avx512/set1.h\"\n#include \"avx512/setzero.h\"\n#include \"avx512/shuffle.h\"\n#include \"avx512/srli.h\"\n#include \"avx512/test.h\"\n#include \"avx512/xor.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\n/* In all the *gf2p8affine* intrinsics the argument b must be a compile-time constant so we must use macros and simde_x_mm* helper functions */\n\n/* N.B. The _mm*gf2p8affineinv_epi64_epi8 and _mm*gf2p8mul_epi8 intrinsics are for a Field Generator Polynomial (FGP) (aka reduction polynomial) of 0x11B */\n/* Only the _mm*gf2p8affine_epi64_epi8 intrinsics do not assume this specific FGP */\n\n/* The field generator polynomial is 0x11B but we make the 0x100 bit implicit to fit inside 8 bits */\n#define SIMDE_X86_GFNI_FGP 0x1B\n\n/* Computing the inverse of a GF element is expensive so use this LUT for an FGP of 0x11B */\n\nstatic const union {\n  uint8_t      u8[256];\n  simde__m128i m128i[16];\n} simde_x_gf2p8inverse_lut = {\n  {\n   0x00, 0x01, 0x8d, 0xf6, 0xcb, 0x52, 0x7b, 0xd1, 0xe8, 0x4f, 0x29, 0xc0, 0xb0, 0xe1, 0xe5, 0xc7,\n   0x74, 0xb4, 0xaa, 0x4b, 0x99, 0x2b, 0x60, 0x5f, 0x58, 0x3f, 0xfd, 0xcc, 0xff, 0x40, 0xee, 0xb2,\n   0x3a, 0x6e, 0x5a, 0xf1, 0x55, 0x4d, 0xa8, 0xc9, 0xc1, 0x0a, 0x98, 0x15, 0x30, 0x44, 0xa2, 0xc2,\n   0x2c, 0x45, 0x92, 0x6c, 0xf3, 0x39, 0x66, 0x42, 0xf2, 0x35, 0x20, 0x6f, 0x77, 0xbb, 0x59, 0x19,\n   0x1d, 0xfe, 0x37, 0x67, 0x2d, 0x31, 0xf5, 0x69, 0xa7, 0x64, 0xab, 0x13, 0x54, 0x25, 0xe9, 0x09,\n   0xed, 0x5c, 0x05, 0xca, 0x4c, 0x24, 0x87, 0xbf, 0x18, 0x3e, 0x22, 0xf0, 0x51, 0xec, 0x61, 0x17,\n   0x16, 0x5e, 0xaf, 0xd3, 0x49, 0xa6, 0x36, 0x43, 0xf4, 0x47, 0x91, 0xdf, 0x33, 0x93, 0x21, 0x3b,\n   0x79, 0xb7, 0x97, 0x85, 0x10, 0xb5, 0xba, 0x3c, 0xb6, 0x70, 0xd0, 0x06, 0xa1, 0xfa, 0x81, 0x82,\n   0x83, 0x7e, 0x7f, 0x80, 0x96, 0x73, 0xbe, 0x56, 0x9b, 0x9e, 0x95, 0xd9, 0xf7, 0x02, 0xb9, 0xa4,\n   0xde, 0x6a, 0x32, 0x6d, 0xd8, 0x8a, 0x84, 0x72, 0x2a, 0x14, 0x9f, 0x88, 0xf9, 0xdc, 0x89, 0x9a,\n   0xfb, 0x7c, 0x2e, 0xc3, 0x8f, 0xb8, 0x65, 0x48, 0x26, 0xc8, 0x12, 0x4a, 0xce, 0xe7, 0xd2, 0x62,\n   0x0c, 0xe0, 0x1f, 0xef, 0x11, 0x75, 0x78, 0x71, 0xa5, 0x8e, 0x76, 0x3d, 0xbd, 0xbc, 0x86, 0x57,\n   0x0b, 0x28, 0x2f, 0xa3, 0xda, 0xd4, 0xe4, 0x0f, 0xa9, 0x27, 0x53, 0x04, 0x1b, 0xfc, 0xac, 0xe6,\n   0x7a, 0x07, 0xae, 0x63, 0xc5, 0xdb, 0xe2, 0xea, 0x94, 0x8b, 0xc4, 0xd5, 0x9d, 0xf8, 0x90, 0x6b,\n   0xb1, 0x0d, 0xd6, 0xeb, 0xc6, 0x0e, 0xcf, 0xad, 0x08, 0x4e, 0xd7, 0xe3, 0x5d, 0x50, 0x1e, 0xb3,\n   0x5b, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8c, 0xdd, 0x9c, 0x7d, 0xa0, 0xcd, 0x1a, 0x41, 0x1c\n  }\n};\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_gf2p8matrix_multiply_epi64_epi8 (simde__m128i x, simde__m128i A) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    const __m128i byte_select = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);\n    const __m128i zero = _mm_setzero_si128();\n    __m128i r, a, p, X;\n\n    a = _mm_shuffle_epi8(A, _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8));\n    X = x;\n    r = zero;\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = _mm_insert_epi16(zero, _mm_movemask_epi8(a), 0);\n      p = _mm_shuffle_epi8(p, byte_select);\n      p = _mm_and_si128(p, _mm_cmpgt_epi8(zero, X));\n      r = _mm_xor_si128(r, p);\n      a = _mm_add_epi8(a, a);\n      X = _mm_add_epi8(X, X);\n    }\n\n    return r;\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    const __m128i zero = _mm_setzero_si128();\n    __m128i r, a, p, X;\n\n    a = _mm_shufflehi_epi16(A, (0 << 6) + (1 << 4) + (2 << 2) + (3 << 0));\n    a = _mm_shufflelo_epi16(a, (0 << 6) + (1 << 4) + (2 << 2) + (3 << 0));\n    a = _mm_or_si128(_mm_slli_epi16(a, 8), _mm_srli_epi16(a, 8));\n    X = _mm_unpacklo_epi8(x, _mm_unpackhi_epi64(x, x));\n    r = zero;\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = _mm_set1_epi16(HEDLEY_STATIC_CAST(short, _mm_movemask_epi8(a)));\n      p = _mm_and_si128(p, _mm_cmpgt_epi8(zero, X));\n      r = _mm_xor_si128(r, p);\n      a = _mm_add_epi8(a, a);\n      X = _mm_add_epi8(X, X);\n    }\n\n    return _mm_packus_epi16(_mm_srli_epi16(_mm_slli_epi16(r, 8), 8), _mm_srli_epi16(r, 8));\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    static const uint8_t byte_interleave[16] = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};\n    static const uint8_t byte_deinterleave[16] = {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15};\n    static const uint8_t mask_d[16] = {128, 128, 64, 64, 32, 32, 16, 16, 8, 8, 4, 4, 2, 2, 1, 1};\n    const int8x16_t mask = vreinterpretq_s8_u8(vld1q_u8(mask_d));\n    int8x16_t r, a, t, X;\n\n    t = simde__m128i_to_neon_i8(A);\n    a = vqtbl1q_s8(t, vld1q_u8(byte_interleave));\n    t = simde__m128i_to_neon_i8(x);\n    X = vqtbl1q_s8(t, vld1q_u8(byte_interleave));\n    r = vdupq_n_s8(0);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      t = vshrq_n_s8(a, 7);\n      t = vandq_s8(t, mask);\n      t = vreinterpretq_s8_u16(vdupq_n_u16(vaddvq_u16(vreinterpretq_u16_s8(t))));\n      t = vandq_s8(t, vshrq_n_s8(X, 7));\n      r = veorq_s8(r, t);\n      a = vshlq_n_s8(a, 1);\n      X = vshlq_n_s8(X, 1);\n    }\n\n    r = vqtbl1q_s8(r, vld1q_u8(byte_deinterleave));\n    return simde__m128i_from_neon_i8(r);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    static const uint8_t mask_d[16] = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1};\n    const int8x16_t mask = vreinterpretq_s8_u8(vld1q_u8(mask_d));\n    int8x16_t r, a, t, X;\n    int16x8_t t16;\n    int32x4_t t32;\n\n    a = simde__m128i_to_neon_i8(A);\n    X = simde__m128i_to_neon_i8(x);\n    r = vdupq_n_s8(0);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      t = vshrq_n_s8(a, 7);\n      t = vandq_s8(t, mask);\n      t16 = vreinterpretq_s16_s8 (vorrq_s8 (t  , vrev64q_s8 (t  )));\n      t32 = vreinterpretq_s32_s16(vorrq_s16(t16, vrev64q_s16(t16)));\n      t   = vreinterpretq_s8_s32 (vorrq_s32(t32, vrev64q_s32(t32)));\n      t = vandq_s8(t, vshrq_n_s8(X, 7));\n      r = veorq_s8(r, t);\n      a = vshlq_n_s8(a, 1);\n      X = vshlq_n_s8(X, 1);\n    }\n\n    return simde__m128i_from_neon_i8(r);\n  #elif defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_interleave = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_deinterleave= {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bit_select = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r;\n    SIMDE_POWER_ALTIVEC_VECTOR(signed char) X;\n\n    X = simde__m128i_to_altivec_i8(x);\n    a = simde__m128i_to_altivec_u8(A);\n    X = vec_perm(X, X, byte_interleave);\n    r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      #if defined(SIMDE_BUG_CLANG_50932)\n        p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                              vec_bperm(HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a), bit_select));\n      #else\n        p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm_u128(a, bit_select));\n      #endif\n      p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                                  vec_splat(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), p), 3));\n      p &= X < zero;\n      r ^= p;\n      a += a;\n      X += X;\n    }\n\n    r = vec_perm(r, r, byte_deinterleave);\n    return simde__m128i_from_altivec_u8(r);\n  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) mask = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_select = {7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) p, r;\n    SIMDE_POWER_ALTIVEC_VECTOR(signed char) a, X;\n\n    X = simde__m128i_to_altivec_i8(x);\n    a = simde__m128i_to_altivec_i8(A);\n    r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = a < zero;\n      p &= mask;\n      p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                                  vec_sum2(vec_sum4(p, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero)),\n                                           HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), zero)));\n      p = vec_perm(p, p, byte_select);\n      p &= X < zero;\n      r ^= p;\n      a += a;\n      X += X;\n    }\n\n    return simde__m128i_from_altivec_u8(r);\n  #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_interleave = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_deinterleave= {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bit_select = {64, 72, 80, 88, 96, 104, 112, 120, 0, 8, 16, 24, 32, 40, 48, 56};\n    const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splats(HEDLEY_STATIC_CAST(signed char, 0));\n    SIMDE_POWER_ALTIVEC_VECTOR(signed char) X;\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r;\n\n    X = simde__m128i_to_altivec_i8(x);\n    a = simde__m128i_to_altivec_u8(A);\n    X = vec_perm(X, X, byte_interleave);\n    r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      #if defined(SIMDE_BUG_CLANG_50932)\n        p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                                    vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a), bit_select));\n      #else\n        p = vec_bperm(a, bit_select);\n      #endif\n      p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                                  vec_splat(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), p), 4));\n      p = vec_and(p, vec_cmplt(X, zero));\n      r = vec_xor(r, p);\n      a = vec_add(a, a);\n      X = vec_add(X, X);\n    }\n\n    r = vec_perm(r, r, byte_deinterleave);\n    return simde__m128i_from_altivec_u8(r);\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) mask = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1};\n    static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_select = {4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12};\n    const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) sevens = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 7));\n    const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splats(HEDLEY_STATIC_CAST(signed char, 0));\n    SIMDE_POWER_ALTIVEC_VECTOR(signed char) X;\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r;\n\n    X = simde__m128i_to_altivec_i8(x);\n    a = simde__m128i_to_altivec_u8(A);\n    r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = vec_sr(a, sevens);\n      p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                                  vec_msum(p,\n                                           mask,\n                                           HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), zero)));\n      p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char),\n                                  vec_sum2s(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), p),\n                                            HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), zero)));\n      p = vec_perm(p, p, byte_select);\n      p = vec_and(p, vec_cmplt(X, zero));\n      r = vec_xor(r, p);\n      a = vec_add(a, a);\n      X = vec_add(X, X);\n    }\n\n    return simde__m128i_from_altivec_u8(r);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    const v128_t zero = wasm_i8x16_splat(0);\n    v128_t a, p, r, X;\n\n    X = simde__m128i_to_wasm_v128(x);\n    a = simde__m128i_to_wasm_v128(A);\n    a = wasm_i8x16_shuffle(a, a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);\n    X = wasm_i8x16_shuffle(X, X, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15);\n    r = zero;\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, wasm_i8x16_bitmask(a)));\n      p = wasm_v128_and(p, wasm_i8x16_lt(X, zero));\n      r = wasm_v128_xor(r, p);\n      a = wasm_i8x16_add(a, a);\n      X = wasm_i8x16_add(X, X);\n    }\n\n    r = wasm_i8x16_shuffle(r, r, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);\n    return simde__m128i_from_wasm_v128(r);\n  #else\n    simde__m128i_private\n      r_,\n      x_ = simde__m128i_to_private(x),\n      A_ = simde__m128i_to_private(A);\n\n    const uint64_t ones = UINT64_C(0x0101010101010101);\n    const uint64_t mask = UINT64_C(0x0102040810204080);\n    uint64_t q;\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n      q = simde_endian_bswap64_le(A_.u64[i / 8]);\n      q &= HEDLEY_STATIC_CAST(uint64_t, x_.u8[i]) * ones;\n      q ^= q >> 4;\n      q ^= q >> 2;\n      q ^= q >> 1;\n      q &= ones;\n      q *= 255;\n      q &= mask;\n      q |= q >> 32;\n      q |= q >> 16;\n      q |= q >> 8;\n      r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, q);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_gf2p8matrix_multiply_epi64_epi8 (simde__m256i x, simde__m256i A) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    simde__m256i r, a, p;\n    const simde__m256i byte_select = simde_x_mm256_set_epu64x(UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202),\n                                                              UINT64_C(0x0101010101010101), UINT64_C(0x0000000000000000));\n    a = simde_mm256_shuffle_epi8(A, simde_mm256_broadcastsi128_si256(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607))));\n    r = simde_mm256_setzero_si256();\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = simde_mm256_set1_epi32(simde_mm256_movemask_epi8(a));\n      p = simde_mm256_shuffle_epi8(p, byte_select);\n      p = simde_mm256_xor_si256(r, p);\n      r = simde_mm256_blendv_epi8(r, p, x);\n      a = simde_mm256_add_epi8(a, a);\n      x = simde_mm256_add_epi8(x, x);\n    }\n\n    return r;\n  #else\n    simde__m256i_private\n      r_,\n      x_ = simde__m256i_to_private(x),\n      A_ = simde__m256i_to_private(A);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n      r_.m128i[i] = simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x_.m128i[i], A_.m128i[i]);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_x_mm512_gf2p8matrix_multiply_epi64_epi8 (simde__m512i x, simde__m512i A) {\n  #if defined(SIMDE_X86_AVX512BW_NATIVE)\n    simde__m512i r, a, p;\n    const simde__m512i byte_select = simde_x_mm512_set_epu64(UINT64_C(0x0707070707070707), UINT64_C(0x0606060606060606), UINT64_C(0x0505050505050505), UINT64_C(0x0404040404040404),\n                                                             UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), UINT64_C(0x0101010101010101), UINT64_C(0X0000000000000000));\n    a = simde_mm512_shuffle_epi8(A, simde_mm512_broadcast_i32x4(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607))));\n    r = simde_mm512_setzero_si512();\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 8 ; i++) {\n      p = simde_mm512_set1_epi64(HEDLEY_STATIC_CAST(int64_t, simde_mm512_movepi8_mask(a)));\n      p = simde_mm512_maskz_shuffle_epi8(simde_mm512_movepi8_mask(x), p, byte_select);\n      r = simde_mm512_xor_si512(r, p);\n      a = simde_mm512_add_epi8(a, a);\n      x = simde_mm512_add_epi8(x, x);\n    }\n\n    return r;\n  #else\n    simde__m512i_private\n      r_,\n      x_ = simde__m512i_to_private(x),\n      A_ = simde__m512i_to_private(A);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n      r_.m256i[i] = simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x_.m256i[i], A_.m256i[i]);\n    }\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_gf2p8inverse_epi8 (simde__m128i x) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    /* N.B. CM: this fallback may not be faster */\n    simde__m128i r, u, t, test;\n    const simde__m128i sixteens = simde_mm_set1_epi8(16);\n    const simde__m128i masked_x = simde_mm_and_si128(x, simde_mm_set1_epi8(0x0F));\n\n    test = simde_mm_set1_epi8(INT8_MIN /* 0x80 */);\n    x = simde_mm_xor_si128(x, test);\n    r = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[0], masked_x);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 1 ; i < 16 ; i++) {\n      t = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[i], masked_x);\n      test = simde_mm_add_epi8(test, sixteens);\n      u = simde_mm_cmplt_epi8(x, test);\n      r = simde_mm_blendv_epi8(t, r, u);\n    }\n\n    return r;\n  #else\n    simde__m128i_private\n      r_,\n      x_ = simde__m128i_to_private(x);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n      r_.u8[i] = simde_x_gf2p8inverse_lut.u8[x_.u8[i]];\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_x_mm256_gf2p8inverse_epi8 (simde__m256i x) {\n  #if defined(SIMDE_X86_AVX2_NATIVE)\n    /* N.B. CM: this fallback may not be faster */\n    simde__m256i r, u, t, test;\n    const simde__m256i sixteens = simde_mm256_set1_epi8(16);\n    const simde__m256i masked_x = simde_mm256_and_si256(x, simde_mm256_set1_epi8(0x0F));\n\n    test = simde_mm256_set1_epi8(INT8_MIN /* 0x80 */);\n    x = simde_mm256_xor_si256(x, test);\n    r = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[0]), masked_x);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 1 ; i < 16 ; i++) {\n      t = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[i]), masked_x);\n      test = simde_mm256_add_epi8(test, sixteens);\n      u = simde_mm256_cmpgt_epi8(test, x);\n      r = simde_mm256_blendv_epi8(t, r, u);\n    }\n\n    return r;\n  #else\n    simde__m256i_private\n      r_,\n      x_ = simde__m256i_to_private(x);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n      r_.m128i[i] = simde_x_mm_gf2p8inverse_epi8(x_.m128i[i]);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_x_mm512_gf2p8inverse_epi8 (simde__m512i x) {\n  /* N.B. CM: TODO: later add VBMI version using just two _mm512_permutex2var_epi8 and friends */\n  /* But except for Cannon Lake all processors with VBMI also have GFNI */\n  #if defined(SIMDE_X86_AVX512BW_NATIVE)\n    /* N.B. CM: this fallback may not be faster */\n    simde__m512i r, test;\n    const simde__m512i sixteens = simde_mm512_set1_epi8(16);\n    const simde__m512i masked_x = simde_mm512_and_si512(x, simde_mm512_set1_epi8(0x0F));\n\n    r = simde_mm512_shuffle_epi8(simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[0]), masked_x);\n    test = sixteens;\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 1 ; i < 16 ; i++) {\n      r = simde_mm512_mask_shuffle_epi8(r, simde_mm512_cmpge_epu8_mask(x, test), simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[i]), masked_x);\n      test = simde_mm512_add_epi8(test, sixteens);\n    }\n\n    return r;\n  #else\n    simde__m512i_private\n      r_,\n      x_ = simde__m512i_to_private(x);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n      r_.m256i[i] = simde_x_mm256_gf2p8inverse_epi8(x_.m256i[i]);\n    }\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n\n#define simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm_gf2p8matrix_multiply_epi64_epi8(simde_x_mm_gf2p8inverse_epi8(x), A)\n#define simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(simde_x_mm256_gf2p8inverse_epi8(x), A)\n#define simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(simde_x_mm512_gf2p8inverse_epi8(x), A)\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_gf2p8affine_epi64_epi8 (simde__m128i x, simde__m128i A, int b)\n    SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) {\n  return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b)));\n}\n#if defined(SIMDE_X86_GFNI_NATIVE)\n  #define simde_mm_gf2p8affine_epi64_epi8(x, A, b) _mm_gf2p8affine_epi64_epi8(x, A, b)\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm_gf2p8affine_epi64_epi8\n  #define _mm_gf2p8affine_epi64_epi8(x, A, b) simde_mm_gf2p8affine_epi64_epi8(x, A, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_gf2p8affine_epi64_epi8 (simde__m256i x, simde__m256i A, int b)\n    SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) {\n  return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b)));\n}\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_gf2p8affine_epi64_epi8(x, A, b) _mm256_gf2p8affine_epi64_epi8(x, A, b)\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_gf2p8affine_epi64_epi8\n  #define _mm256_gf2p8affine_epi64_epi8(x, A, b) simde_mm256_gf2p8affine_epi64_epi8(x, A, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_gf2p8affine_epi64_epi8 (simde__m512i x, simde__m512i A, int b)\n    SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) {\n  return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b)));\n}\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_gf2p8affine_epi64_epi8(x, A, b) _mm512_gf2p8affine_epi64_epi8(x, A, b)\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_gf2p8affine_epi64_epi8\n  #define _mm512_gf2p8affine_epi64_epi8(x, A, b) simde_mm512_gf2p8affine_epi64_epi8(x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b)\n#else\n  #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affine_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_gf2p8affine_epi64_epi8\n  #define _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b)\n#else\n  #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_gf2p8affine_epi64_epi8\n  #define _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b)\n#else\n  #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_gf2p8affine_epi64_epi8\n  #define _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b)\n#else\n  #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affine_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskz_gf2p8affine_epi64_epi8\n  #define _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b)\n#else\n  #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskz_gf2p8affine_epi64_epi8\n  #define _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b)\n#else\n  #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_maskz_gf2p8affine_epi64_epi8\n  #define _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_gf2p8affineinv_epi64_epi8 (simde__m128i x, simde__m128i A, int b)\n    SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) {\n  return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b)));\n}\n#if defined(SIMDE_X86_GFNI_NATIVE)\n  #define simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) _mm_gf2p8affineinv_epi64_epi8(x, A, b)\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm_gf2p8affineinv_epi64_epi8\n  #define _mm_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_gf2p8affineinv_epi64_epi8 (simde__m256i x, simde__m256i A, int b)\n    SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) {\n  return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b)));\n}\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n  #define simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) _mm256_gf2p8affineinv_epi64_epi8(x, A, b)\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_gf2p8affineinv_epi64_epi8\n  #define _mm256_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_gf2p8affineinv_epi64_epi8 (simde__m512i x, simde__m512i A, int b)\n    SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) {\n  return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b)));\n}\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) _mm512_gf2p8affineinv_epi64_epi8(x, A, b)\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_gf2p8affineinv_epi64_epi8\n  #define _mm512_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b)\n#else\n  #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_gf2p8affineinv_epi64_epi8\n  #define _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b)\n#else\n  #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_gf2p8affineinv_epi64_epi8\n  #define _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b)\n#else\n  #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_gf2p8affineinv_epi64_epi8\n  #define _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b)\n#else\n  #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskz_gf2p8affineinv_epi64_epi8\n  #define _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n  #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b)\n#else\n  #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskz_gf2p8affineinv_epi64_epi8\n  #define _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b)\n#endif\n\n#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n  #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b)\n#else\n  #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b))\n#endif\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_maskz_gf2p8affineinv_epi64_epi8\n  #define _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i simde_mm_gf2p8mul_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && (defined(SIMDE_X86_AVX512VL_NATIVE) || !defined(SIMDE_X86_AVX512F_NATIVE))\n    return _mm_gf2p8mul_epi8(a, b);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    const poly8x16_t pa = vreinterpretq_p8_u8(simde__m128i_to_neon_u8(a));\n    const poly8x16_t pb = vreinterpretq_p8_u8(simde__m128i_to_neon_u8(b));\n    const uint8x16_t lo = vreinterpretq_u8_p16(vmull_p8(vget_low_p8(pa), vget_low_p8(pb)));\n    #if defined (SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint8x16_t hi = vreinterpretq_u8_p16(vmull_high_p8(pa, pb));\n    #else\n      uint8x16_t hi = vreinterpretq_u8_p16(vmull_p8(vget_high_p8(pa), vget_high_p8(pb)));\n    #endif\n    uint8x16x2_t hilo = vuzpq_u8(lo, hi);\n    uint8x16_t r = hilo.val[0];\n    hi = hilo.val[1];\n    const uint8x16_t idxHi = vshrq_n_u8(hi, 4);\n    const uint8x16_t idxLo = vandq_u8(hi, vdupq_n_u8(0xF));\n\n    #if defined (SIMDE_ARM_NEON_A64V8_NATIVE)\n      static const uint8_t reduceLutHiData[] = {\n        0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c,\n        0x2f, 0x84, 0x62, 0xc9, 0xb5, 0x1e, 0xf8, 0x53\n      };\n      static const uint8_t reduceLutLoData[] = {\n        0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,\n        0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99\n      };\n      const uint8x16_t reduceLutHi = vld1q_u8(reduceLutHiData);\n      const uint8x16_t reduceLutLo = vld1q_u8(reduceLutLoData);\n      r = veorq_u8(r, vqtbl1q_u8(reduceLutHi, idxHi));\n      r = veorq_u8(r, vqtbl1q_u8(reduceLutLo, idxLo));\n    #else\n      static const uint8_t reduceLutHiData[] = {\n        0x00, 0x2f,\n        0xab, 0x84,\n        0x4d, 0x62,\n        0xe6, 0xc9,\n        0x9a, 0xb5,\n        0x31, 0x1e,\n        0xd7, 0xf8,\n        0x7c, 0x53\n      };\n      static const uint8_t reduceLutLoData[] = {\n        0x00, 0xd8,\n        0x1b, 0xc3,\n        0x36, 0xee,\n        0x2d, 0xf5,\n        0x6c, 0xb4,\n        0x77, 0xaf,\n        0x5a, 0x82,\n        0x41, 0x99\n      };\n      const uint8x8x2_t reduceLutHi = vld2_u8(reduceLutHiData);\n      const uint8x8x2_t reduceLutLo = vld2_u8(reduceLutLoData);\n      r = veorq_u8(r, vcombine_u8(vtbl2_u8(reduceLutHi, vget_low_u8(idxHi)), vtbl2_u8(reduceLutHi, vget_high_u8(idxHi))));\n      r = veorq_u8(r, vcombine_u8(vtbl2_u8(reduceLutLo, vget_low_u8(idxLo)), vtbl2_u8(reduceLutLo, vget_high_u8(idxLo))));\n    #endif\n    return simde__m128i_from_neon_u8(r);\n  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) x, y, lo, hi;\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) even, odd, mask0x00FF;\n    x = simde__m128i_to_altivec_u8(a);\n    y = simde__m128i_to_altivec_u8(b);\n    mask0x00FF = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x00FF));\n    lo = y & HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), mask0x00FF);\n    hi = y ^ lo;\n    even = vec_gfmsum(x, lo);\n    odd  = vec_gfmsum(x, hi);\n    lo = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_sel(vec_rli(odd, 8), even, mask0x00FF));\n    hi = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_sel(odd, vec_rli(even, 8), mask0x00FF));\n    const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) reduceLutHi = {0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, 0x2f, 0x84, 0x62, 0xc9, 0xb5, 0x1e, 0xf8, 0x53};\n    const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) reduceLutLo = {0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99};\n    lo = lo ^ vec_perm(reduceLutHi, reduceLutHi, vec_rli(hi, 4));\n    lo = lo ^ vec_perm(reduceLutLo, reduceLutLo, hi);\n    return simde__m128i_from_altivec_u8(lo);\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) x, y, r, t, m;\n    x = simde__m128i_to_altivec_u8(a);\n    y = simde__m128i_to_altivec_u8(b);\n\n    const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splat_s8(0);\n\n    m = vec_splat_u8(0x01);\n\n    const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) fgp = vec_splats(HEDLEY_STATIC_CAST(unsigned char, SIMDE_X86_GFNI_FGP));\n    t = vec_and(y, m);\n    t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(t, m));\n    r = vec_and(x, t);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 7 ; i++) {\n      t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), x), zero));\n      x = vec_add(x, x);\n      t = vec_and(fgp, t);\n      x = vec_xor(x, t);\n      m = vec_add(m, m);\n      t = vec_and(y, m);\n      t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(t, m));\n      t = vec_and(x, t);\n      r = vec_xor(r, t);\n    }\n\n    return simde__m128i_from_altivec_u8(r);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    v128_t x, y, r, t, m;\n    x = simde__m128i_to_wasm_v128(a);\n    y = simde__m128i_to_wasm_v128(b);\n\n    m = wasm_i8x16_splat(0x01);\n\n    const v128_t fgp = wasm_i8x16_splat(SIMDE_X86_GFNI_FGP);\n\n    t = wasm_v128_and(y, m);\n    t = wasm_i8x16_eq(t, m);\n    r = wasm_v128_and(x, t);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 7 ; i++) {\n      t = wasm_i8x16_shr(x, 7);\n      x = wasm_i8x16_add(x, x);\n      t = wasm_v128_and(fgp, t);\n      x = wasm_v128_xor(x, t);\n      m = wasm_i8x16_add(m, m);\n      t = wasm_v128_and(y, m);\n      t = wasm_i8x16_eq(t, m);\n      t = wasm_v128_and(x, t);\n      r = wasm_v128_xor(r, t);\n    }\n\n    return simde__m128i_from_wasm_v128(r);\n  #elif defined(SIMDE_X86_AVX512BW_NATIVE)\n    simde__m512i r4, t4, u4;\n    simde__mmask64 ma, mb;\n\n    simde__m512i a4 = simde_mm512_broadcast_i32x4(a);\n    const simde__m512i zero = simde_mm512_setzero_si512();\n    simde__mmask16 m8 = simde_mm512_cmpeq_epi32_mask(zero, zero);\n\n    const simde__m512i b4 = simde_mm512_broadcast_i32x4(b);\n\n    simde__m512i bits = simde_mm512_set_epi64(0x4040404040404040,\n                                              0x4040404040404040,\n                                              0x1010101010101010,\n                                              0x1010101010101010,\n                                              0x0404040404040404,\n                                              0x0404040404040404,\n                                              0x0101010101010101,\n                                              0x0101010101010101);\n\n    const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP);\n\n    for (int i = 0 ; i < 3 ; i++) {\n      m8 = simde_kshiftli_mask16(m8, 4);\n\n      ma = simde_mm512_cmplt_epi8_mask(a4, zero);\n      u4 = simde_mm512_add_epi8(a4, a4);\n      t4 = simde_mm512_maskz_mov_epi8(ma, fgp);\n      u4 = simde_mm512_xor_epi32(u4, t4);\n\n      ma = simde_mm512_cmplt_epi8_mask(u4, zero);\n      u4 = simde_mm512_add_epi8(u4, u4);\n      t4 = simde_mm512_maskz_mov_epi8(ma, fgp);\n      a4 = simde_mm512_mask_xor_epi32(a4, m8, u4, t4);\n    }\n\n    mb = simde_mm512_test_epi8_mask(b4, bits);\n    bits = simde_mm512_add_epi8(bits, bits);\n    ma = simde_mm512_cmplt_epi8_mask(a4, zero);\n    r4 = simde_mm512_maskz_mov_epi8(mb, a4);\n    mb = simde_mm512_test_epi8_mask(b4, bits);\n    a4 = simde_mm512_add_epi8(a4, a4);\n    t4 = simde_mm512_maskz_mov_epi8(ma, fgp);\n    a4 = simde_mm512_xor_si512(a4, t4);\n    t4 = simde_mm512_maskz_mov_epi8(mb, a4);\n    r4 = simde_mm512_xor_si512(r4, t4);\n\n    r4 = simde_mm512_xor_si512(r4, simde_mm512_shuffle_i32x4(r4, r4, (1 << 6) + (0 << 4) + (3 << 2) + 2));\n    r4 = simde_mm512_xor_si512(r4, simde_mm512_shuffle_i32x4(r4, r4, (0 << 6) + (3 << 4) + (2 << 2) + 1));\n\n    return simde_mm512_extracti32x4_epi32(r4, 0);\n  #elif defined(SIMDE_X86_AVX2_NATIVE)\n    simde__m256i r2, t2;\n    simde__m256i a2 = simde_mm256_broadcastsi128_si256(a);\n    const simde__m256i zero = simde_mm256_setzero_si256();\n    const simde__m256i fgp = simde_mm256_set1_epi8(SIMDE_X86_GFNI_FGP);\n    const simde__m256i ones = simde_mm256_set1_epi8(0x01);\n    simde__m256i b2 = simde_mm256_set_m128i(simde_mm_srli_epi64(b, 4), b);\n\n    for (int i = 0 ; i < 4 ; i++) {\n      t2 = simde_mm256_cmpgt_epi8(zero, a2);\n      t2 = simde_mm256_and_si256(fgp, t2);\n      a2 = simde_mm256_add_epi8(a2, a2);\n      a2 = simde_mm256_xor_si256(a2, t2);\n    }\n\n    a2 = simde_mm256_inserti128_si256(a2, a, 0);\n\n    t2 = simde_mm256_and_si256(b2, ones);\n    t2 = simde_mm256_cmpeq_epi8(t2, ones);\n    r2 = simde_mm256_and_si256(a2, t2);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 3 ; i++) {\n      t2 = simde_mm256_cmpgt_epi8(zero, a2);\n      t2 = simde_mm256_and_si256(fgp, t2);\n      a2 = simde_mm256_add_epi8(a2, a2);\n      a2 = simde_mm256_xor_si256(a2, t2);\n      b2 = simde_mm256_srli_epi64(b2, 1);\n      t2 = simde_mm256_and_si256(b2, ones);\n      t2 = simde_mm256_cmpeq_epi8(t2, ones);\n      t2 = simde_mm256_and_si256(a2, t2);\n      r2 = simde_mm256_xor_si256(r2, t2);\n    }\n\n    return simde_mm_xor_si128(simde_mm256_extracti128_si256(r2, 1),\n                              simde_mm256_extracti128_si256(r2, 0));\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    simde__m128i r, t;\n    const simde__m128i zero = simde_mm_setzero_si128();\n    const simde__m128i ones = simde_mm_set1_epi8(0x01);\n\n    const simde__m128i fgp = simde_mm_set1_epi8(SIMDE_X86_GFNI_FGP);\n\n    t = simde_mm_and_si128(b, ones);\n    t = simde_mm_cmpeq_epi8(t, ones);\n    r = simde_mm_and_si128(a, t);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 7 ; i++) {\n      t = simde_mm_cmpgt_epi8(zero, a);\n      t = simde_mm_and_si128(fgp, t);\n      a = simde_mm_add_epi8(a, a);\n      a = simde_mm_xor_si128(a, t);\n      b = simde_mm_srli_epi64(b, 1);\n      t = simde_mm_and_si128(b, ones);\n      t = simde_mm_cmpeq_epi8(t, ones);\n      t = simde_mm_and_si128(a, t);\n      r = simde_mm_xor_si128(r, t);\n    }\n\n    return r;\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    const uint8_t fgp = SIMDE_X86_GFNI_FGP;\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n      r_.u8[i] = 0;\n      while ((a_.u8[i] != 0) && (b_.u8[i] != 0)) {\n        if (b_.u8[i] & 1)\n          r_.u8[i] ^= a_.u8[i];\n\n        if (a_.u8[i] & 0x80)\n          a_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.u8[i] << 1) ^ fgp);\n        else\n          a_.u8[i] <<= 1;\n\n        b_.u8[i] >>= 1;\n      }\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)\n  #undef _mm_gf2p8mul_epi8\n  #define _mm_gf2p8mul_epi8(a, b) simde_mm_gf2p8mul_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_gf2p8mul_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && (defined(SIMDE_X86_AVX512VL_NATIVE) || (defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)))\n    return _mm256_gf2p8mul_epi8(a, b);\n  #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)\n    simde__mmask64 ma, mb;\n    simde__m512i r, t, s;\n    simde__m512i a2 = simde_mm512_broadcast_i64x4(a);\n    const simde__m512i zero = simde_mm512_setzero_si512();\n\n    const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP);\n\n    s = simde_mm512_set1_epi8(0x01);\n\n    for (int i = 0 ; i < 4 ; i++) {\n      ma = simde_mm512_cmplt_epi8_mask(a2, zero);\n      a2 = simde_mm512_add_epi8(a2, a2);\n      t = simde_mm512_xor_si512(a2, fgp);\n      a2 = simde_mm512_mask_mov_epi8(a2, ma, t);\n    }\n\n    simde__m512i b2 = simde_mm512_inserti64x4(zero, simde_mm256_srli_epi64(b, 4), 1);\n    b2 = simde_mm512_inserti64x4(b2, b, 0);\n    a2 = simde_mm512_inserti64x4(a2, a, 0);\n\n    mb = simde_mm512_test_epi8_mask(b2, s);\n    r = simde_mm512_maskz_mov_epi8(mb, a2);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 3 ; i++) {\n      ma = simde_mm512_cmplt_epi8_mask(a2, zero);\n      s = simde_mm512_add_epi8(s, s);\n      mb = simde_mm512_test_epi8_mask(b2, s);\n      a2 = simde_mm512_add_epi8(a2, a2);\n      t = simde_mm512_maskz_mov_epi8(ma, fgp);\n      a2 = simde_mm512_xor_si512(a2, t);\n      t = simde_mm512_maskz_mov_epi8(mb, a2);\n      r = simde_mm512_xor_si512(r, t);\n    }\n\n    return simde_mm256_xor_si256(simde_mm512_extracti64x4_epi64(r, 1),\n                                 simde_mm512_extracti64x4_epi64(r, 0));\n  #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX2_NATIVE)\n    simde__m256i r, t;\n    const simde__m256i zero = simde_mm256_setzero_si256();\n    const simde__m256i ones = simde_mm256_set1_epi8(0x01);\n\n    const simde__m256i fgp = simde_mm256_set1_epi8(SIMDE_X86_GFNI_FGP);\n\n    t = simde_mm256_and_si256(b, ones);\n    t = simde_mm256_cmpeq_epi8(t, ones);\n    r = simde_mm256_and_si256(a, t);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 7 ; i++) {\n      t = simde_mm256_cmpgt_epi8(zero, a);\n      t = simde_mm256_and_si256(fgp, t);\n      a = simde_mm256_add_epi8(a, a);\n      a = simde_mm256_xor_si256(a, t);\n      b = simde_mm256_srli_epi64(b, 1);\n      t = simde_mm256_and_si256(b, ones);\n      t = simde_mm256_cmpeq_epi8(t, ones);\n      t = simde_mm256_and_si256(a, t);\n      r = simde_mm256_xor_si256(r, t);\n    }\n\n    return r;\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n      r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]);\n    }\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_gf2p8mul_epi8\n  #define _mm256_gf2p8mul_epi8(a, b) simde_mm256_gf2p8mul_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_gf2p8mul_epi8 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_gf2p8mul_epi8(a, b);\n  #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)\n    simde__m512i r, s, t;\n    simde__mmask64 ma, mb;\n    const simde__m512i zero = simde_mm512_setzero_si512();\n\n    const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP);\n\n    s = simde_mm512_set1_epi8(0x01);\n\n    mb = simde_mm512_test_epi8_mask(b, s);\n    r = simde_mm512_maskz_mov_epi8(mb, a);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (int i = 0 ; i < 7 ; i++) {\n      ma = simde_mm512_cmplt_epi8_mask(a, zero);\n      s = simde_mm512_add_epi8(s, s);\n      mb = simde_mm512_test_epi8_mask(b, s);\n      a = simde_mm512_add_epi8(a, a);\n      t = simde_mm512_maskz_mov_epi8(ma, fgp);\n      a = simde_mm512_xor_si512(a, t);\n      t = simde_mm512_maskz_mov_epi8(mb, a);\n      r = simde_mm512_xor_si512(r, t);\n    }\n\n    return r;\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if !defined(__INTEL_COMPILER)\n      SIMDE_VECTORIZE\n    #endif\n    for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n      r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]);\n    }\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_gf2p8mul_epi8\n  #define _mm512_gf2p8mul_epi8(a, b) simde_mm512_gf2p8mul_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mask_gf2p8mul_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm_mask_gf2p8mul_epi8(src, k, a, b);\n  #else\n    return simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8mul_epi8(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mask_gf2p8mul_epi8\n  #define _mm_mask_gf2p8mul_epi8(src, k, a, b) simde_mm_mask_gf2p8mul_epi8(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_mask_gf2p8mul_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm256_mask_gf2p8mul_epi8(src, k, a, b);\n  #else\n    return simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8mul_epi8(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_mask_gf2p8mul_epi8\n  #define _mm256_mask_gf2p8mul_epi8(src, k, a, b) simde_mm256_mask_gf2p8mul_epi8(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_mask_gf2p8mul_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_gf2p8mul_epi8(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8mul_epi8(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_gf2p8mul_epi8\n  #define _mm512_mask_gf2p8mul_epi8(src, k, a, b) simde_mm512_mask_gf2p8mul_epi8(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maskz_gf2p8mul_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm_maskz_gf2p8mul_epi8(k, a, b);\n  #else\n    return simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8mul_epi8(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)\n  #undef _mm_maskz_gf2p8mul_epi8\n  #define _mm_maskz_gf2p8mul_epi8(k, a, b) simde_mm_maskz_gf2p8mul_epi8(k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_maskz_gf2p8mul_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm256_maskz_gf2p8mul_epi8(k, a, b);\n  #else\n    return  simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8mul_epi8(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_maskz_gf2p8mul_epi8\n  #define _mm256_maskz_gf2p8mul_epi8(k, a, b) simde_mm256_maskz_gf2p8mul_epi8(k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_maskz_gf2p8mul_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_maskz_gf2p8mul_epi8(k, a, b);\n  #else\n    return simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8mul_epi8(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_maskz_gf2p8mul_epi8\n  #define _mm512_maskz_gf2p8mul_epi8(k, a, b) simde_mm512_maskz_gf2p8mul_epi8(k, a, b)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_GFNI_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/mmx.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n */\n\n#if !defined(SIMDE_X86_MMX_H)\n#define SIMDE_X86_MMX_H\n\n#include \"../simde-common.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\n\n#if defined(SIMDE_X86_MMX_NATIVE)\n  #define SIMDE_X86_MMX_USE_NATIVE_TYPE\n#elif defined(SIMDE_X86_SSE_NATIVE)\n  #define SIMDE_X86_MMX_USE_NATIVE_TYPE\n#endif\n\n#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)\n  #include <mmintrin.h>\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #include <arm_neon.h>\n#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n  #include <loongson-mmiintrin.h>\n#endif\n\n#include <stdint.h>\n#include <limits.h>\n\nSIMDE_BEGIN_DECLS_\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_8 int8_t          i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 int16_t        i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 int32_t        i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 int64_t        i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 uint8_t         u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 uint16_t       u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 uint32_t       u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 uint64_t       u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 simde_float32  f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 int_fast32_t  i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_8 int8_t          i8[8];\n    SIMDE_ALIGN_TO_8 int16_t        i16[4];\n    SIMDE_ALIGN_TO_8 int32_t        i32[2];\n    SIMDE_ALIGN_TO_8 int64_t        i64[1];\n    SIMDE_ALIGN_TO_8 uint8_t         u8[8];\n    SIMDE_ALIGN_TO_8 uint16_t       u16[4];\n    SIMDE_ALIGN_TO_8 uint32_t       u32[2];\n    SIMDE_ALIGN_TO_8 uint64_t       u64[1];\n    SIMDE_ALIGN_TO_8 simde_float32  f32[2];\n    SIMDE_ALIGN_TO_8 int_fast32_t  i32f[8 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];\n  #endif\n\n  #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)\n    __m64          n;\n  #endif\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int8x8_t       neon_i8;\n    int16x4_t      neon_i16;\n    int32x2_t      neon_i32;\n    int64x1_t      neon_i64;\n    uint8x8_t      neon_u8;\n    uint16x4_t     neon_u16;\n    uint32x2_t     neon_u32;\n    uint64x1_t     neon_u64;\n    float32x2_t    neon_f32;\n  #endif\n  #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n    int8x8_t       mmi_i8;\n    int16x4_t      mmi_i16;\n    int32x2_t      mmi_i32;\n    int64_t        mmi_i64;\n    uint8x8_t      mmi_u8;\n    uint16x4_t     mmi_u16;\n    uint32x2_t     mmi_u32;\n    uint64_t       mmi_u64;\n  #endif\n} simde__m64_private;\n\n#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)\n  typedef __m64 simde__m64;\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  typedef int32x2_t simde__m64;\n#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n  typedef int32x2_t simde__m64;\n#elif defined(SIMDE_VECTOR_SUBSCRIPT)\n  typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;\n#else\n  typedef simde__m64_private simde__m64;\n#endif\n\n#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)\n  #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES\n  typedef simde__m64 __m64;\n#endif\n\nHEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), \"__m64 size incorrect\");\nHEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), \"__m64 size incorrect\");\n#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, \"simde__m64 is not 8-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, \"simde__m64_private is not 8-byte aligned\");\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde__m64_from_private(simde__m64_private v) {\n  simde__m64 r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64_private\nsimde__m64_to_private(simde__m64 v) {\n  simde__m64_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\n#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \\\n  SIMDE_FUNCTION_ATTRIBUTES \\\n  simde__##simde_type \\\n  simde__##simde_type##_from_##isax##_##fragment(source_type value) { \\\n    simde__##simde_type##_private r_; \\\n    r_.isax##_##fragment = value; \\\n    return simde__##simde_type##_from_private(r_); \\\n  } \\\n  \\\n  SIMDE_FUNCTION_ATTRIBUTES \\\n  source_type \\\n  simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \\\n    simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \\\n    return r_.isax##_##fragment; \\\n  }\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)\n#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */\n\n#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64)\n#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_add_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_add_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 + b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] + b_.i8[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)\n#  define _m_paddb(a, b) simde_m_paddb(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_add_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_add_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);\n  #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n    r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16);\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    r_.i16 = a_.i16 + b_.i16;\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = a_.i16[i] + b_.i16[i];\n    }\n  #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)\n#  define _m_paddw(a, b) simde_mm_add_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_add_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_add_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 + b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] + b_.i32[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)\n#  define _m_paddd(a, b) simde_mm_add_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_adds_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_adds_pi8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {\n          r_.i8[i] = INT8_MAX;\n        } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {\n          r_.i8[i] = INT8_MIN;\n        } else {\n          r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)\n#  define _m_paddsb(a, b) simde_mm_adds_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_adds_pu8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_adds_pu8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);\n        if (x > UINT8_MAX)\n          r_.u8[i] = UINT8_MAX;\n        else\n          r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)\n#  define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_adds_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_adds_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {\n          r_.i16[i] = INT16_MAX;\n        } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {\n          r_.i16[i] = SHRT_MIN;\n        } else {\n          r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)\n#  define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_adds_pu16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_adds_pu16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        const uint32_t x = a_.u16[i] + b_.u16[i];\n        if (x > UINT16_MAX)\n          r_.u16[i] = UINT16_MAX;\n        else\n          r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)\n#  define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_and_si64 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_and_si64(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 & b_.i64;\n    #else\n      r_.i64[0] = a_.i64[0] & b_.i64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pand(a, b) simde_mm_and_si64(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_and_si64(a, b) simde_mm_and_si64(a, b)\n#  define _m_pand(a, b) simde_mm_and_si64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_andnot_si64 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_andnot_si64(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f & b_.i32f;\n    #else\n      r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)\n#  define _m_pandn(a, b) simde_mm_andnot_si64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cmpeq_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)\n#  define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cmpeq_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)\n#  define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cmpeq_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)\n#  define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cmpgt_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)\n#  define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cmpgt_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)\n#  define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cmpgt_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)\n#  define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_cvtm64_si64 (simde__m64 a) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)\n    return _mm_cvtm64_si64(a);\n  #else\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      #if HEDLEY_HAS_WARNING(\"-Wvector-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)\n        // #pragma clang diagnostic ignored \"-Wvector-conversion\"\n      #endif\n      return vget_lane_s64(a_.neon_i64, 0);\n      HEDLEY_DIAGNOSTIC_POP\n    #else\n      return a_.i64[0];\n    #endif\n  #endif\n}\n#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n#  define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)\n#  define _m_to_int64(a) simde_mm_cvtm64_si64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtsi32_si64 (int32_t a) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtsi32_si64(a);\n  #else\n    simde__m64_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const int32_t av[2] = { a, 0 };\n      r_.neon_i32 = vld1_s32(av);\n    #else\n      r_.i32[0] = a;\n      r_.i32[1] = 0;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)\n#  define _m_from_int(a) simde_mm_cvtsi32_si64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtsi64_m64 (int64_t a) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)\n    return _mm_cvtsi64_m64(a);\n  #else\n    simde__m64_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vld1_s64(&a);\n    #else\n      r_.i64[0] = a;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n#  define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)\n#  define _m_from_int64(a) simde_mm_cvtsi64_m64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvtsi64_si32 (simde__m64 a) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtsi64_si32(a);\n  #else\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      #if HEDLEY_HAS_WARNING(\"-Wvector-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)\n        // #pragma clang diagnostic ignored \"-Wvector-conversion\"\n      #endif\n      return vget_lane_s32(a_.neon_i32, 0);\n      HEDLEY_DIAGNOSTIC_POP\n    #else\n      return a_.i32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_empty (void) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    _mm_empty();\n  #else\n    /* noop */\n  #endif\n}\n#define simde_m_empty() simde_mm_empty()\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_empty() simde_mm_empty()\n#  define _m_empty() simde_mm_empty()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_madd_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_madd_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) {\n        r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)\n#  define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_mulhi_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);\n      const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);\n      const uint16x4_t t3 = vmovn_u32(t2);\n      r_.neon_u16 = t3;\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)\n#  define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_mullo_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);\n      const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));\n      r_.neon_u16 = t2;\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)\n#  define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_or_si64 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_or_si64(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 | b_.i64;\n    #else\n      r_.i64[0] = a_.i64[0] | b_.i64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_por(a, b) simde_mm_or_si64(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_or_si64(a, b) simde_mm_or_si64(a, b)\n#  define _m_por(a, b) simde_mm_or_si64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_packs_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_packs_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if (a_.i16[i] < INT8_MIN) {\n          r_.i8[i] = INT8_MIN;\n        } else if (a_.i16[i] > INT8_MAX) {\n          r_.i8[i] = INT8_MAX;\n        } else {\n          r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);\n        }\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if (b_.i16[i] < INT8_MIN) {\n          r_.i8[i + 4] = INT8_MIN;\n        } else if (b_.i16[i] > INT8_MAX) {\n          r_.i8[i + 4] = INT8_MAX;\n        } else {\n          r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)\n#  define _m_packsswb(a, b) simde_mm_packs_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_packs_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_packs_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) {\n        if (a_.i32[i] < SHRT_MIN) {\n          r_.i16[i] = SHRT_MIN;\n        } else if (a_.i32[i] > INT16_MAX) {\n          r_.i16[i] = INT16_MAX;\n        } else {\n          r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);\n        }\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) {\n        if (b_.i32[i] < SHRT_MIN) {\n          r_.i16[i + 2] = SHRT_MIN;\n        } else if (b_.i32[i] > INT16_MAX) {\n          r_.i16[i + 2] = INT16_MAX;\n        } else {\n          r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)\n#  define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_packs_pu16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_packs_pu16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);\n\n      /* Set elements which are < 0 to 0 */\n      const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));\n\n      /* Vector with all s16 elements set to UINT8_MAX */\n      const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX));\n\n      /* Elements which are within the acceptable range */\n      const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));\n      const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));\n\n      /* Final values as 16-bit integers */\n      const int16x8_t values = vorrq_s16(le_max, gt_max);\n\n      r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if (a_.i16[i] > UINT8_MAX) {\n          r_.u8[i] = UINT8_MAX;\n        } else if (a_.i16[i] < 0) {\n          r_.u8[i] = 0;\n        } else {\n          r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);\n        }\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if (b_.i16[i] > UINT8_MAX) {\n          r_.u8[i + 4] = UINT8_MAX;\n        } else if (b_.i16[i] < 0) {\n          r_.u8[i + 4] = 0;\n        } else {\n          r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)\n#  define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    simde__m64_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 };\n      r_.neon_i8 = vld1_s8(v);\n    #else\n      r_.i8[0] = e0;\n      r_.i8[1] = e1;\n      r_.i8[2] = e2;\n      r_.i8[3] = e3;\n      r_.i8[4] = e4;\n      r_.i8[5] = e5;\n      r_.i8[6] = e6;\n      r_.i8[7] = e7;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {\n  simde__m64_private r_;\n\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    r_.n = _mm_set_pi8(\n        HEDLEY_STATIC_CAST(int8_t, e7),\n        HEDLEY_STATIC_CAST(int8_t, e6),\n        HEDLEY_STATIC_CAST(int8_t, e5),\n        HEDLEY_STATIC_CAST(int8_t, e4),\n        HEDLEY_STATIC_CAST(int8_t, e3),\n        HEDLEY_STATIC_CAST(int8_t, e2),\n        HEDLEY_STATIC_CAST(int8_t, e1),\n        HEDLEY_STATIC_CAST(int8_t, e0));\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 };\n    r_.neon_u8 = vld1_u8(v);\n  #else\n    r_.u8[0] = e0;\n    r_.u8[1] = e1;\n    r_.u8[2] = e2;\n    r_.u8[3] = e3;\n    r_.u8[4] = e4;\n    r_.u8[5] = e5;\n    r_.u8[6] = e6;\n    r_.u8[7] = e7;\n  #endif\n\n  return simde__m64_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set_pi16(e3, e2, e1, e0);\n  #else\n    simde__m64_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 };\n      r_.neon_i16 = vld1_s16(v);\n    #else\n      r_.i16[0] = e0;\n      r_.i16[1] = e1;\n      r_.i16[2] = e2;\n      r_.i16[3] = e3;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {\n  simde__m64_private r_;\n\n#if defined(SIMDE_X86_MMX_NATIVE)\n  r_.n = _mm_set_pi16(\n      HEDLEY_STATIC_CAST(int16_t, e3),\n      HEDLEY_STATIC_CAST(int16_t, e2),\n      HEDLEY_STATIC_CAST(int16_t, e1),\n      HEDLEY_STATIC_CAST(int16_t, e0)\n    );\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 };\n  r_.neon_u16 = vld1_u16(v);\n#else\n  r_.u16[0] = e0;\n  r_.u16[1] = e1;\n  r_.u16[2] = e2;\n  r_.u16[3] = e3;\n#endif\n\n  return simde__m64_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) {\n  simde__m64_private r_;\n\n#if defined(SIMDE_X86_MMX_NATIVE)\n  r_.n = _mm_set_pi32(\n      HEDLEY_STATIC_CAST(int32_t, e1),\n      HEDLEY_STATIC_CAST(int32_t, e0));\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 };\n  r_.neon_u32 = vld1_u32(v);\n#else\n  r_.u32[0] = e0;\n  r_.u32[1] = e1;\n#endif\n\n  return simde__m64_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_set_pi32 (int32_t e1, int32_t e0) {\n  simde__m64_private r_;\n\n#if defined(SIMDE_X86_MMX_NATIVE)\n  r_.n = _mm_set_pi32(e1, e0);\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 };\n  r_.neon_i32 = vld1_s32(v);\n#else\n  r_.i32[0] = e0;\n  r_.i32[1] = e1;\n#endif\n\n  return simde__m64_from_private(r_);\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_set_pi64 (int64_t e0) {\n  simde__m64_private r_;\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 };\n  r_.neon_i64 = vld1_s64(v);\n#else\n  r_.i64[0] = e0;\n#endif\n\n  return simde__m64_from_private(r_);\n}\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) {\n  simde__m64_private r_;\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 };\n  r_.neon_f32 = vld1_f32(v);\n#else\n  r_.f32[0] = e0;\n  r_.f32[1] = e1;\n#endif\n\n  return simde__m64_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_set1_pi8 (int8_t a) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set1_pi8(a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    simde__m64_private r_;\n    r_.neon_i8 = vmov_n_s8(a);\n    return simde__m64_from_private(r_);\n  #else\n    return simde_mm_set_pi8(a, a, a, a, a, a, a, a);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_set1_pi8(a) simde_mm_set1_pi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_set1_pi16 (int16_t a) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set1_pi16(a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    simde__m64_private r_;\n    r_.neon_i16 = vmov_n_s16(a);\n    return simde__m64_from_private(r_);\n  #else\n    return simde_mm_set_pi16(a, a, a, a);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_set1_pi16(a) simde_mm_set1_pi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_set1_pi32 (int32_t a) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set1_pi32(a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    simde__m64_private r_;\n    r_.neon_i32 = vmov_n_s32(a);\n    return simde__m64_from_private(r_);\n  #else\n    return simde_mm_set_pi32(a, a);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_set1_pi32(a) simde_mm_set1_pi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_setr_pi16(e3, e2, e1, e0);\n  #else\n    return simde_mm_set_pi16(e0, e1, e2, e3);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_setr_pi32 (int32_t e1, int32_t e0) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_setr_pi32(e1, e0);\n  #else\n    return simde_mm_set_pi32(e0, e1);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_setzero_si64 (void) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_setzero_si64();\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    simde__m64_private r_;\n    r_.neon_u32 = vmov_n_u32(0);\n    return simde__m64_from_private(r_);\n  #else\n    return simde_mm_set_pi32(0, 0);\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_setzero_si64() simde_mm_setzero_si64()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_load_si64 (const void* mem_addr) {\n  simde__m64 r;\n  simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_loadu_si64 (const void* mem_addr) {\n  simde__m64 r;\n  simde_memcpy(&r, mem_addr, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_x_mm_store_si64 (void* mem_addr, simde__m64 value) {\n  simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value));\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) {\n  simde_memcpy(mem_addr, &value, sizeof(value));\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_x_mm_setone_si64 (void) {\n  return simde_mm_set1_pi32(~INT32_C(0));\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sll_pi16 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sll_pi16(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      #if HEDLEY_HAS_WARNING(\"-Wvector-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)\n        // #pragma clang diagnostic ignored \"-Wvector-conversion\"\n      #endif\n      r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0))));\n      HEDLEY_DIAGNOSTIC_POP\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 15))\n        return simde_mm_setzero_si64();\n\n      r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i16 = a_.i16 << count_.u64[0];\n    #else\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {\n        simde_memset(&r_, 0, sizeof(r_));\n        return simde__m64_from_private(r_);\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)\n#  define _m_psllw(a, count) simde_mm_sll_pi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sll_pi32 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sll_pi32(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      #if HEDLEY_HAS_WARNING(\"-Wvector-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)\n        // #pragma clang diagnostic ignored \"-Wvector-conversion\"\n      #endif\n      r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0))));\n      HEDLEY_DIAGNOSTIC_POP\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i32 = a_.i32 << count_.u64[0];\n    #else\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {\n        simde_memset(&r_, 0, sizeof(r_));\n        return simde__m64_from_private(r_);\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] << count_.u64[0];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)\n#  define _m_pslld(a, count) simde_mm_sll_pi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_slli_pi16 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_slli_pi16(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = psllh_s(a_.mmi_i16, count);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)\n      if (HEDLEY_UNLIKELY(count > 15))\n        return simde_mm_setzero_si64();\n\n      r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i16 = a_.i16 << count;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)\n#  define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_slli_pi32 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_slli_pi32(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i32 = a_.i32 << count;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] << count;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)\n#  define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_slli_si64 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_slli_si64(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i64 = a_.i64 << count;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count));\n    #else\n      r_.u64[0] = a_.u64[0] << count;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)\n#  define _m_psllqi(a, count) simde_mm_slli_si64(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sll_si64 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sll_si64(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 << count_.i64;\n    #else\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {\n        simde_memset(&r_, 0, sizeof(r_));\n        return simde__m64_from_private(r_);\n      }\n\n      r_.u64[0] = a_.u64[0] << count_.u64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)\n#  define _m_psllq(a, count) simde_mm_sll_si64(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srl_pi16 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_srl_pi16(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 15))\n        return simde_mm_setzero_si64();\n\n      r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u16 = a_.u16 >> count_.u64[0];\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0))));\n    #else\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {\n        simde_memset(&r_, 0, sizeof(r_));\n        return simde__m64_from_private(r_);\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) {\n        r_.u16[i] = a_.u16[i] >> count_.u64[0];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)\n#  define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srl_pi32 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_srl_pi32(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u32 = a_.u32 >> count_.u64[0];\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0))));\n    #else\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {\n        simde_memset(&r_, 0, sizeof(r_));\n        return simde__m64_from_private(r_);\n      }\n\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) {\n        r_.u32[i] = a_.u32[i] >> count_.u64[0];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)\n#  define _m_psrld(a, count) simde_mm_srl_pi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srli_pi16 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_srli_pi16(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u16 = a_.u16 >> count;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count)));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = a_.u16[i] >> count;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)\n#  define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srli_pi32 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_srli_pi32(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u32 = a_.u32 >> count;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count)));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] >> count;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)\n#  define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srli_si64 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_srli_si64(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u64 = a_.u64 >> count;\n    #else\n      r_.u64[0] = a_.u64[0] >> count;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)\n#  define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srl_si64 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_srl_si64(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = a_.u64 >> count_.u64;\n    #else\n      if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {\n        simde_memset(&r_, 0, sizeof(r_));\n        return simde__m64_from_private(r_);\n      }\n\n      r_.u64[0] = a_.u64[0] >> count_.u64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)\n#  define _m_psrlq(a, count) simde_mm_srl_si64(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srai_pi16 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_srai_pi16(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i16 = a_.i16 >> (count & 0xff);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = psrah_s(a_.mmi_i16, count);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] >> (count & 0xff);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)\n#  define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_srai_pi32 (simde__m64 a, int count) {\n  #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_srai_pi32(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i32 = a_.i32 >> (count & 0xff);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = psraw_s(a_.mmi_i32, count);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] >> (count & 0xff);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)\n#  define _m_psradi(a, count) simde_mm_srai_pi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sra_pi16 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sra_pi16(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n    const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i16 = a_.i16 >> cnt;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0))));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] >> cnt;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)\n#  define _m_psraw(a, count) simde_mm_sra_pi16(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sra_pi32 (simde__m64 a, simde__m64 count) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sra_pi32(a, count);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private count_ = simde__m64_to_private(count);\n    const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i32 = a_.i32 >> cnt;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0))));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] >> cnt;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)\n#  define _m_psrad(a, count) simde_mm_sra_pi32(a, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sub_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sub_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 - b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] - b_.i8[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)\n#  define _m_psubb(a, b) simde_mm_sub_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sub_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sub_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 - b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] - b_.i16[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)\n#  define _m_psubw(a, b) simde_mm_sub_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sub_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sub_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 - b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] - b_.i32[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)\n#  define _m_psubd(a, b) simde_mm_sub_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_subs_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_subs_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {\n          r_.i8[i] = INT8_MIN;\n        } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {\n          r_.i8[i] = INT8_MAX;\n        } else {\n          r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)\n#  define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_subs_pu8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_subs_pu8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        const int32_t x = a_.u8[i] - b_.u8[i];\n        if (x < 0) {\n          r_.u8[i] = 0;\n        } else if (x > UINT8_MAX) {\n          r_.u8[i] = UINT8_MAX;\n        } else {\n          r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)\n#  define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_subs_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_subs_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {\n          r_.i16[i] = SHRT_MIN;\n        } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {\n          r_.i16[i] = INT16_MAX;\n        } else {\n          r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)\n#  define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_subs_pu16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_subs_pu16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        const int x = a_.u16[i] - b_.u16[i];\n        if (x < 0) {\n          r_.u16[i] = 0;\n        } else if (x > UINT16_MAX) {\n          r_.u16[i] = UINT16_MAX;\n        } else {\n          r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);\n        }\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)\n#  define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_unpackhi_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15);\n    #else\n      r_.i8[0] = a_.i8[4];\n      r_.i8[1] = b_.i8[4];\n      r_.i8[2] = a_.i8[5];\n      r_.i8[3] = b_.i8[5];\n      r_.i8[4] = a_.i8[6];\n      r_.i8[5] = b_.i8[6];\n      r_.i8[6] = a_.i8[7];\n      r_.i8[7] = b_.i8[7];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)\n#  define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_unpackhi_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);\n    #else\n      r_.i16[0] = a_.i16[2];\n      r_.i16[1] = b_.i16[2];\n      r_.i16[2] = a_.i16[3];\n      r_.i16[3] = b_.i16[3];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)\n#  define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_unpackhi_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);\n    #else\n      r_.i32[0] = a_.i32[1];\n      r_.i32[1] = b_.i32[1];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)\n#  define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_unpacklo_pi8(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11);\n    #else\n      r_.i8[0] = a_.i8[0];\n      r_.i8[1] = b_.i8[0];\n      r_.i8[2] = a_.i8[1];\n      r_.i8[3] = b_.i8[1];\n      r_.i8[4] = a_.i8[2];\n      r_.i8[5] = b_.i8[2];\n      r_.i8[6] = a_.i8[3];\n      r_.i8[7] = b_.i8[3];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)\n#  define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_unpacklo_pi16(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);\n    #else\n      r_.i16[0] = a_.i16[0];\n      r_.i16[1] = b_.i16[0];\n      r_.i16[2] = a_.i16[1];\n      r_.i16[3] = b_.i16[1];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)\n#  define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_unpacklo_pi32(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)\n      r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);\n    #else\n      r_.i32[0] = a_.i32[0];\n      r_.i32[1] = b_.i32[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)\n#  define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_xor_si64 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_xor_si64(a, b);\n  #else\n    simde__m64_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #else\n      r_.u64[0] = a_.u64[0] ^ b_.u64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)\n#  define _m_pxor(a, b) simde_mm_xor_si64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_m_to_int (simde__m64 a) {\n  #if defined(SIMDE_X86_MMX_NATIVE)\n    return _m_to_int(a);\n  #else\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      #if HEDLEY_HAS_WARNING(\"-Wvector-conversion\") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)\n        // #pragma clang diagnostic ignored \"-Wvector-conversion\"\n      #endif\n      return vget_lane_s32(a_.neon_i32, 0);\n      HEDLEY_DIAGNOSTIC_POP\n    #else\n      return a_.i32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)\n#  define _m_to_int(a) simde_m_to_int(a)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_MMX_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/sse.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n *   2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>\n *   2015      Brandon Rowlett <browlett@nvidia.com>\n *   2015      Ken Fast <kfast@gdeb.com>\n */\n\n#if !defined(SIMDE_X86_SSE_H)\n#define SIMDE_X86_SSE_H\n\n#include \"mmx.h\"\n#include \"../simde-f16.h\"\n\n#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER)\n  #define NOMINMAX\n  #include <windows.h>\n#endif\n\n#if defined(__ARM_ACLE)\n  #include <arm_acle.h>\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_16 int8_t          i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int16_t        i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int32_t        i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int64_t        i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint8_t         u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint16_t       u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint32_t       u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint64_t       u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_16 simde_int128  i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    #endif\n    #if defined(SIMDE_FLOAT16_VECTOR)\n    SIMDE_ALIGN_TO_16 simde_float16  f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    #else\n    SIMDE_ALIGN_TO_16 simde_float16  f16[8];\n    #endif\n    SIMDE_ALIGN_TO_16 simde_float32  f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int_fast32_t  i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_16 int8_t         i8[16];\n    SIMDE_ALIGN_TO_16 int16_t        i16[8];\n    SIMDE_ALIGN_TO_16 int32_t        i32[4];\n    SIMDE_ALIGN_TO_16 int64_t        i64[2];\n    SIMDE_ALIGN_TO_16 uint8_t        u8[16];\n    SIMDE_ALIGN_TO_16 uint16_t       u16[8];\n    SIMDE_ALIGN_TO_16 uint32_t       u32[4];\n    SIMDE_ALIGN_TO_16 uint64_t       u64[2];\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_16 simde_int128  i128[1];\n    SIMDE_ALIGN_TO_16 simde_uint128 u128[1];\n    #endif\n    SIMDE_ALIGN_TO_16 simde_float16  f16[8];\n    SIMDE_ALIGN_TO_16 simde_float32  f32[4];\n    SIMDE_ALIGN_TO_16 int_fast32_t  i32f[16 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];\n  #endif\n\n    SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];\n    SIMDE_ALIGN_TO_16 simde__m64         m64[2];\n\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    SIMDE_ALIGN_TO_16 __m128         n;\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    SIMDE_ALIGN_TO_16 int8x16_t      neon_i8;\n    SIMDE_ALIGN_TO_16 int16x8_t      neon_i16;\n    SIMDE_ALIGN_TO_16 int32x4_t      neon_i32;\n    SIMDE_ALIGN_TO_16 int64x2_t      neon_i64;\n    SIMDE_ALIGN_TO_16 uint8x16_t     neon_u8;\n    SIMDE_ALIGN_TO_16 uint16x8_t     neon_u16;\n    SIMDE_ALIGN_TO_16 uint32x4_t     neon_u32;\n    SIMDE_ALIGN_TO_16 uint64x2_t     neon_u64;\n    SIMDE_ALIGN_TO_16 float32x4_t    neon_f32;\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      SIMDE_ALIGN_TO_16 float64x2_t    neon_f64;\n    #endif\n  #elif defined(SIMDE_MIPS_MSA_NATIVE)\n    v16i8 msa_i8;\n    v8i16 msa_i16;\n    v4i32 msa_i32;\n    v2i64 msa_i64;\n    v16u8 msa_u8;\n    v8u16 msa_u16;\n    v4u32 msa_u32;\n    v2u64 msa_u64;\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    SIMDE_ALIGN_TO_16 v128_t         wasm_v128;\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)      altivec_u8;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)     altivec_u16;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)       altivec_u32;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)        altivec_i8;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)       altivec_i16;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)         altivec_i32;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)              altivec_f32;\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64;\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64;\n    #endif\n  #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n    v16i8 lsx_i8;\n    v8i16 lsx_i16;\n    v4i32 lsx_i32;\n    v2i64 lsx_i64;\n    v16u8 lsx_u8;\n    v8u16 lsx_u16;\n    v4u32 lsx_u32;\n    v2u64 lsx_u64;\n    v4f32 lsx_f32;\n    v2f64 lsx_f64;\n  #endif\n} simde__m128_private;\n\n#if defined(SIMDE_X86_SSE_NATIVE)\n  typedef __m128 simde__m128;\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n   typedef float32x4_t simde__m128;\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n   typedef v128_t simde__m128;\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n   typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128;\n#elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n  typedef v4f32 simde__m128;\n#elif defined(SIMDE_VECTOR_SUBSCRIPT)\n  typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n#else\n  typedef simde__m128_private simde__m128;\n#endif\n\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  typedef simde__m128 __m128;\n#endif\n\nHEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), \"simde__m128 size incorrect\");\nHEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), \"simde__m128_private size incorrect\");\n#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, \"simde__m128 is not 16-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, \"simde__m128_private is not 16-byte aligned\");\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde__m128_from_private(simde__m128_private v) {\n  simde__m128 r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128_private\nsimde__m128_to_private(simde__m128 v) {\n  simde__m128_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32)\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64)\n  #endif\n#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */\n\n#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)\n\n  #if defined(SIMDE_BUG_GCC_95782)\n    SIMDE_FUNCTION_ATTRIBUTES\n    SIMDE_POWER_ALTIVEC_VECTOR(float)\n    simde__m128_to_altivec_f32(simde__m128 value) {\n      simde__m128_private r_ = simde__m128_to_private(value);\n      return r_.altivec_f32;\n    }\n\n    SIMDE_FUNCTION_ATTRIBUTES\n    simde__m128\n    simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) {\n      simde__m128_private r_;\n      r_.altivec_f32 = value;\n      return simde__m128_from_private(r_);\n    }\n  #else\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32)\n  #endif\n\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)\n  #endif\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128);\n#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */\n\n#if defined(SIMDE_LOONGARCH_LSX_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64)\n#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */\n\nenum {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    SIMDE_MM_ROUND_NEAREST     = _MM_ROUND_NEAREST,\n    SIMDE_MM_ROUND_DOWN        = _MM_ROUND_DOWN,\n    SIMDE_MM_ROUND_UP          = _MM_ROUND_UP,\n    SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO\n  #else\n    SIMDE_MM_ROUND_NEAREST     = 0x0000,\n    SIMDE_MM_ROUND_DOWN        = 0x2000,\n    SIMDE_MM_ROUND_UP          = 0x4000,\n    SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000\n  #endif\n};\n#if defined(_MM_ROUND_MASK)\n#  define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK\n#else\n#  define SIMDE_MM_ROUND_MASK (0x6000)\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK\n#endif\n\n#if defined(_MM_FROUND_TO_NEAREST_INT)\n#  define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT\n#  define SIMDE_MM_FROUND_TO_NEG_INF     _MM_FROUND_TO_NEG_INF\n#  define SIMDE_MM_FROUND_TO_POS_INF     _MM_FROUND_TO_POS_INF\n#  define SIMDE_MM_FROUND_TO_ZERO        _MM_FROUND_TO_ZERO\n#  define SIMDE_MM_FROUND_CUR_DIRECTION  _MM_FROUND_CUR_DIRECTION\n\n#  define SIMDE_MM_FROUND_RAISE_EXC      _MM_FROUND_RAISE_EXC\n#  define SIMDE_MM_FROUND_NO_EXC         _MM_FROUND_NO_EXC\n#else\n#  define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00\n#  define SIMDE_MM_FROUND_TO_NEG_INF     0x01\n#  define SIMDE_MM_FROUND_TO_POS_INF     0x02\n#  define SIMDE_MM_FROUND_TO_ZERO        0x03\n#  define SIMDE_MM_FROUND_CUR_DIRECTION  0x04\n\n#  define SIMDE_MM_FROUND_RAISE_EXC      0x00\n#  define SIMDE_MM_FROUND_NO_EXC         0x08\n#endif\n\n#define SIMDE_MM_FROUND_NINT \\\n  (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)\n#define SIMDE_MM_FROUND_FLOOR \\\n  (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)\n#define SIMDE_MM_FROUND_CEIL \\\n  (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)\n#define SIMDE_MM_FROUND_TRUNC \\\n  (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)\n#define SIMDE_MM_FROUND_RINT \\\n  (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)\n#define SIMDE_MM_FROUND_NEARBYINT \\\n  (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)\n\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT)\n#  define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT\n#  define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF\n#  define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF\n#  define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO\n#  define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION\n#  define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC\n#  define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT\n#  define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR\n#  define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL\n#  define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC\n#  define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT\n#  define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT\n#endif\n\n#if defined(_MM_EXCEPT_INVALID)\n#  define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID\n#else\n#  define SIMDE_MM_EXCEPT_INVALID (0x0001)\n#endif\n#if defined(_MM_EXCEPT_DENORM)\n#  define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM\n#else\n#  define SIMDE_MM_EXCEPT_DENORM (0x0002)\n#endif\n#if defined(_MM_EXCEPT_DIV_ZERO)\n#  define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO\n#else\n#  define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004)\n#endif\n#if defined(_MM_EXCEPT_OVERFLOW)\n#  define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW\n#else\n#  define SIMDE_MM_EXCEPT_OVERFLOW (0x0008)\n#endif\n#if defined(_MM_EXCEPT_UNDERFLOW)\n#  define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW\n#else\n#  define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010)\n#endif\n#if defined(_MM_EXCEPT_INEXACT)\n#  define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT\n#else\n#  define SIMDE_MM_EXCEPT_INEXACT (0x0020)\n#endif\n#if defined(_MM_EXCEPT_MASK)\n#  define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK\n#else\n#  define SIMDE_MM_EXCEPT_MASK \\\n     (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \\\n      SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \\\n      SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT)\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID\n  #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM\n  #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO\n  #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW\n  #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW\n  #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT\n  #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK\n#endif\n\n#if defined(_MM_MASK_INVALID)\n#  define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID\n#else\n#  define SIMDE_MM_MASK_INVALID (0x0080)\n#endif\n#if defined(_MM_MASK_DENORM)\n#  define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM\n#else\n#  define SIMDE_MM_MASK_DENORM (0x0100)\n#endif\n#if defined(_MM_MASK_DIV_ZERO)\n#  define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO\n#else\n#  define SIMDE_MM_MASK_DIV_ZERO (0x0200)\n#endif\n#if defined(_MM_MASK_OVERFLOW)\n#  define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW\n#else\n#  define SIMDE_MM_MASK_OVERFLOW (0x0400)\n#endif\n#if defined(_MM_MASK_UNDERFLOW)\n#  define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW\n#else\n#  define SIMDE_MM_MASK_UNDERFLOW (0x0800)\n#endif\n#if defined(_MM_MASK_INEXACT)\n#  define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT\n#else\n#  define SIMDE_MM_MASK_INEXACT (0x1000)\n#endif\n#if defined(_MM_MASK_MASK)\n#  define SIMDE_MM_MASK_MASK _MM_MASK_MASK\n#else\n#  define SIMDE_MM_MASK_MASK \\\n     (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \\\n      SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \\\n      SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT)\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID\n  #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM\n  #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO\n  #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW\n  #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW\n  #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT\n  #define _MM_MASK_MASK SIMDE_MM_MASK_MASK\n#endif\n\n#if defined(_MM_FLUSH_ZERO_MASK)\n#  define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK\n#else\n#  define SIMDE_MM_FLUSH_ZERO_MASK (0x8000)\n#endif\n#if defined(_MM_FLUSH_ZERO_ON)\n#  define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON\n#else\n#  define SIMDE_MM_FLUSH_ZERO_ON (0x8000)\n#endif\n#if defined(_MM_FLUSH_ZERO_OFF)\n#  define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF\n#else\n#  define SIMDE_MM_FLUSH_ZERO_OFF (0x0000)\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK\n  #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON\n  #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint32_t\nSIMDE_MM_GET_ROUNDING_MODE(void) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _MM_GET_ROUNDING_MODE();\n  #elif defined(SIMDE_HAVE_FENV_H)\n    unsigned int vfe_mode;\n\n    switch (fegetround()) {\n      #if defined(FE_TONEAREST)\n        case FE_TONEAREST:\n          vfe_mode = SIMDE_MM_ROUND_NEAREST;\n          break;\n      #endif\n\n      #if defined(FE_TOWARDZERO)\n        case FE_TOWARDZERO:\n          vfe_mode = SIMDE_MM_ROUND_DOWN;\n          break;\n      #endif\n\n      #if defined(FE_UPWARD)\n        case FE_UPWARD:\n          vfe_mode = SIMDE_MM_ROUND_UP;\n          break;\n      #endif\n\n      #if defined(FE_DOWNWARD)\n        case FE_DOWNWARD:\n          vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO;\n          break;\n      #endif\n\n      default:\n        vfe_mode = SIMDE_MM_ROUND_NEAREST;\n        break;\n    }\n\n    return vfe_mode;\n  #else\n    return SIMDE_MM_ROUND_NEAREST;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nSIMDE_MM_SET_ROUNDING_MODE(uint32_t a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _MM_SET_ROUNDING_MODE(a);\n  #elif defined(SIMDE_HAVE_FENV_H)\n    int fe_mode = FE_TONEAREST;\n\n    switch (a) {\n      #if defined(FE_TONEAREST)\n        case SIMDE_MM_ROUND_NEAREST:\n          fe_mode = FE_TONEAREST;\n          break;\n      #endif\n\n      #if defined(FE_TOWARDZERO)\n        case SIMDE_MM_ROUND_TOWARD_ZERO:\n          fe_mode = FE_TOWARDZERO;\n          break;\n      #endif\n\n      #if defined(FE_DOWNWARD)\n        case SIMDE_MM_ROUND_DOWN:\n          fe_mode = FE_DOWNWARD;\n          break;\n      #endif\n\n      #if defined(FE_UPWARD)\n        case SIMDE_MM_ROUND_UP:\n          fe_mode = FE_UPWARD;\n          break;\n      #endif\n\n      default:\n        return;\n    }\n\n    fesetround(fe_mode);\n  #else\n    (void) a;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint32_t\nSIMDE_MM_GET_FLUSH_ZERO_MODE (void) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;\n  #else\n    return SIMDE_MM_FLUSH_ZERO_OFF;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nSIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _MM_SET_FLUSH_ZERO_MODE(a);\n  #else\n    (void) a;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint32_t\nsimde_mm_getcsr (void) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_getcsr();\n  #else\n    return SIMDE_MM_GET_ROUNDING_MODE();\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _mm_getcsr() simde_mm_getcsr()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_setcsr (uint32_t a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_setcsr(a);\n  #else\n    SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK));\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _mm_setcsr(a) simde_mm_setcsr(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)\n    SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15)\n    SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a);\n\n  (void) lax_rounding;\n\n  /* For architectures which lack a current direction SIMD instruction.\n   *\n   * Note that NEON actually has a current rounding mode instruction,\n   * but in ARMv8+ the rounding mode is ignored and nearest is always\n   * used, so we treat ARMv7 as having a rounding mode but ARMv8 as\n   * not. */\n  #if \\\n      defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \\\n      defined(SIMDE_ARM_NEON_A32V8)\n    if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION)\n      rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13;\n  #endif\n\n  switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {\n    case SIMDE_MM_FROUND_CUR_DIRECTION:\n      #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n        r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32));\n      #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)\n        r_.neon_f32 = vrndiq_f32(a_.neon_f32);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128);\n      #elif defined(simde_math_nearbyintf)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_NEAREST_INT:\n      #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n        r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32));\n      #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n        r_.neon_f32 = vrndnq_f32(a_.neon_f32);\n      #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n        r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128);\n      #elif defined(simde_math_roundevenf)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_roundevenf(a_.f32[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_NEG_INF:\n      #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n        r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32));\n      #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n        r_.neon_f32 = vrndmq_f32(a_.neon_f32);\n      #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n        r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128);\n      #elif defined(simde_math_floorf)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_floorf(a_.f32[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_POS_INF:\n      #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n        r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32));\n      #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n        r_.neon_f32 = vrndpq_f32(a_.neon_f32);\n      #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n        r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128);\n      #elif defined(simde_math_ceilf)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_ceilf(a_.f32[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_ZERO:\n      #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n        r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32));\n      #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE)\n        r_.neon_f32 = vrndq_f32(a_.neon_f32);\n      #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n        r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128);\n      #elif defined(simde_math_truncf)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n          r_.f32[i] = simde_math_truncf(a_.f32[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding))\n#else\n  #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_set_ps(e3, e2, e1, e0);\n  #else\n    simde__m128_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 };\n      r_.neon_f32 = vld1q_f32(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3);\n    #else\n      r_.f32[0] = e0;\n      r_.f32[1] = e1;\n      r_.f32[2] = e2;\n      r_.f32[3] = e3;\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_set_ps1 (simde_float32 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_set_ps1(a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return vdupq_n_f32(a);\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n    (void) a;\n    return vec_splats(a);\n  #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n    return (simde__m128)__lsx_vldrepl_w(&a, 0);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return wasm_f32x4_splat(a);\n  #else\n    return simde_mm_set_ps(a, a, a, a);\n  #endif\n}\n#define simde_mm_set1_ps(a) simde_mm_set_ps1(a)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_set_ps1(a) simde_mm_set_ps1(a)\n#  define _mm_set1_ps(a) simde_mm_set1_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_move_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_move_ss(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U };\n      r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3);\n    #else\n      r_.f32[0] = b_.f32[0];\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_move_ss(a, b) simde_mm_move_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_broadcastlow_ps(simde__m128 a) {\n  /* This function broadcasts the first element in the input vector to\n   * all lanes.  It is used to avoid generating spurious exceptions in\n   * *_ss functions since there may be garbage in the upper lanes. */\n\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_shuffle_ps(a, a, 0);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_splat(a_.altivec_f32, 0);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[0];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_add_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_add_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 + b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] + b_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_add_ps(a, b) simde_mm_add_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_add_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_add_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_add_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0);\n      float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);\n      // the upper values in the result must be the remnants of <a>.\n      r_.neon_f32 = vaddq_f32(a_.neon_f32, value);\n    #else\n      r_.f32[0] = a_.f32[0] + b_.f32[0];\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_add_ss(a, b) simde_mm_add_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_and_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_and_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 & b_.i32;\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] & b_.i32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_and_ps(a, b) simde_mm_and_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_andnot_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_andnot_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = ~a_.i32 & b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = ~(a_.i32[i]) & b_.i32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_xor_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_xor_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] ^ b_.u32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_or_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_or_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f | b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] | b_.u32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_or_ps(a, b) simde_mm_or_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_not_ps(simde__m128 a) {\n  #if defined(SIMDE_X86_AVX512VL_NATIVE)\n    __m128i ai = _mm_castps_si128(a);\n    return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55));\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    /* Note: we use ints instead of floats because we don't want cmpeq\n     * to return false for (NaN, NaN) */\n    __m128i ai = _mm_castps_si128(a);\n    return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmvnq_s32(a_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = ~a_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = ~(a_.i32[i]);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) {\n  /* This function is for when you want to blend two elements together\n   * according to a mask.  It is similar to _mm_blendv_ps, except that\n   * it is undefined whether the blend is based on the highest bit in\n   * each lane (like blendv) or just bitwise operations.  This allows\n   * us to implement the function efficiently everywhere.\n   *\n   * Basically, you promise that all the lanes in mask are either 0 or\n   * ~0. */\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_blendv_ps(a, b, mask);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b),\n      mask_ = simde__m128_to_private(mask);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_avg_pu16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_avg_pu16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)\n      uint32_t wa SIMDE_VECTOR(16);\n      uint32_t wb SIMDE_VECTOR(16);\n      uint32_t wr SIMDE_VECTOR(16);\n      SIMDE_CONVERT_VECTOR_(wa, a_.u16);\n      SIMDE_CONVERT_VECTOR_(wb, b_.u16);\n      wr = (wa + wb + 1) >> 1;\n      SIMDE_CONVERT_VECTOR_(r_.u16, wr);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b)\n#  define _m_pavgw(a, b) simde_mm_avg_pu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_avg_pu8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_avg_pu8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761)\n      uint16_t wa SIMDE_VECTOR(16);\n      uint16_t wb SIMDE_VECTOR(16);\n      uint16_t wr SIMDE_VECTOR(16);\n      SIMDE_CONVERT_VECTOR_(wa, a_.u8);\n      SIMDE_CONVERT_VECTOR_(wb, b_.u8);\n      wr = (wa + wb + 1) >> 1;\n      SIMDE_CONVERT_VECTOR_(r_.u8, wr);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b)\n#  define _m_pavgb(a, b) simde_mm_avg_pu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_abs_ps(simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    simde_float32 mask_;\n    uint32_t u32_ = UINT32_C(0x7FFFFFFF);\n    simde_memcpy(&mask_, &u32_, sizeof(u32_));\n    return _mm_and_ps(_mm_set1_ps(mask_), a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vabsq_f32(a_.neon_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      r_.altivec_f32 = vec_abs(a_.altivec_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_fabsf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpeq_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpeq_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);\n    SIMDE_VECTORIZE\n    for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpge_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpge_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpge_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)\n    return _mm_cmpge_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);\n    SIMDE_VECTORIZE\n    for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpgt_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)\n    return _mm_cmpgt_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);\n    SIMDE_VECTORIZE\n    for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmple_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmple_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmple_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmple_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);\n    SIMDE_VECTORIZE\n    for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmplt_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmplt_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmplt_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmplt_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);\n    SIMDE_VECTORIZE\n    for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpneq_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32));\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpneq_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);\n    SIMDE_VECTORIZE\n    for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i];\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmplt_ps(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmplt_ss(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmple_ps(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmple_ss(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmpgt_ps(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmpgt_ss(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmpge_ps(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) {\n  return simde_mm_cmpge_ss(a, b);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpord_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpord_ps(a, b);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* Note: NEON does not have ordered compare builtin\n        Need to compare a eq a and b eq b to check for NaN\n        Do AND of results to get final */\n      uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      r_.neon_u32 = vandq_u32(ceqaa, ceqbb);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),\n          vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32);\n      r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64);\n    #elif defined(simde_math_isnanf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpunord_ps(a, b);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128));\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),\n          vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),\n          vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32)));\n      r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(simde_math_isnanf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)\n    return _mm_cmpunord_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(simde_math_isnanf)\n      r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0);\n      SIMDE_VECTORIZE\n      for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i];\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comieq_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_comieq_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));\n      uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);\n      return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f32[0] == b_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comige_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_comige_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);\n      uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);\n      return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f32[0] >= b_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comigt_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_comigt_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);\n      uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);\n      return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f32[0] > b_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comile_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_comile_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));\n      uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);\n      return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f32[0] <= b_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comilt_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_comilt_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));\n      uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);\n      return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f32[0] < b_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comineq_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_comineq_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);\n      uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));\n      return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f32[0] != b_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) {\n  simde__m128_private\n    r_,\n    dest_ = simde__m128_to_private(dest),\n    src_ = simde__m128_to_private(src);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0)));\n    r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    const v128_t sign_pos = wasm_f32x4_splat(-0.0f);\n    r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos);\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS)\n      r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32);\n    #else\n      r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32);\n    #endif\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n    const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f));\n    r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos);\n  #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n    const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f};\n    r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos);\n  #elif defined(SIMDE_IEEE754_STORAGE)\n    (void) src_;\n    (void) dest_;\n    simde__m128 sign_pos = simde_mm_set1_ps(-0.0f);\n    r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos)));\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]);\n    }\n  #endif\n\n  return simde__m128_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) {\n  return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvt_pi2ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);\n      r_.m64_private[1] = a_.m64_private[1];\n    #else\n      r_.f32[0] = (simde_float32) b_.i32[0];\n      r_.f32[1] = (simde_float32) b_.i32[1];\n      r_.i32[2] = a_.i32[2];\n      r_.i32[3] = a_.i32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvt_ps2pi (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvt_ps2pi(a);\n  #else\n    simde__m64_private r_;\n    simde__m128_private a_;\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));\n    r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));\n  #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761)\n    a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));\n    SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32);\n  #else\n    a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i]));\n    }\n  #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvt_si2ss (simde__m128 a, int32_t b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cvt_si2ss(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0);\n    #else\n      r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);\n      r_.i32[1] = a_.i32[1];\n      r_.i32[2] = a_.i32[2];\n      r_.i32[3] = a_.i32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvt_ss2si (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cvt_ss2si(a);\n  #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)\n    return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION));\n    #if !defined(SIMDE_FAST_CONVERSION_RANGE)\n      return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) &&\n          (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?\n        SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN;\n    #else\n      return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]);\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpi16_ps (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpi16_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16));\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        simde_float32 v = a_.i16[i];\n        r_.f32[i] = v;\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpi32_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n    simde__m64_private b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32);\n      r_.m64_private[1] = a_.m64_private[1];\n    #else\n      r_.f32[0] = (simde_float32) b_.i32[0];\n      r_.f32[1] = (simde_float32) b_.i32[1];\n      r_.i32[2] = a_.i32[2];\n      r_.i32[3] = a_.i32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpi32x2_ps(a, b);\n  #else\n    simde__m128_private r_;\n    simde__m64_private\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32);\n      SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32);\n    #else\n      r_.f32[0] = (simde_float32) a_.i32[0];\n      r_.f32[1] = (simde_float32) a_.i32[1];\n      r_.f32[2] = (simde_float32) b_.i32[0];\n      r_.f32[3] = (simde_float32) b_.i32[1];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpi8_ps (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpi8_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8))));\n    #else\n      r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]);\n      r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]);\n      r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]);\n      r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]);\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtps_pi16 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtps_pi16(a);\n  #else\n    simde__m64_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399)\n      r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtps_pi32 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtps_pi32(a);\n  #else\n    simde__m64_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399)\n      r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        simde_float32 v = simde_math_roundf(a_.f32[i]);\n        #if !defined(SIMDE_FAST_CONVERSION_RANGE)\n          r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?\n            SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n        #else\n          r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);\n        #endif\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtps_pi8 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtps_pi8(a);\n  #else\n    simde__m64_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471)\n      /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to\n      * i16, combine with an all-zero vector of i16 (which will become the upper\n      * half), narrow to i8. */\n      float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX));\n      float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN));\n      float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min));\n      r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {\n        if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX))\n          r_.i8[i] = INT8_MAX;\n        else if (a_.f32[i] <  HEDLEY_STATIC_CAST(simde_float32, INT8_MIN))\n          r_.i8[i] = INT8_MIN;\n        else\n          r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i]));\n      }\n      /* Note: the upper half is undefined */\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpu16_ps (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpu16_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16));\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (simde_float32) a_.u16[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpu8_ps (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpu8_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8))));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]);\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtsi32_ss (simde__m128 a, int32_t b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cvtsi32_ss(a, b);\n  #else\n    simde__m128_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0);\n    #else\n      r_ = a_;\n      r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtsi64_ss (simde__m128 a, int64_t b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if !defined(__PGI)\n      return _mm_cvtsi64_ss(a, b);\n    #else\n      return _mm_cvtsi64x_ss(a, b);\n    #endif\n  #else\n    simde__m128_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0);\n    #else\n      r_ = a_;\n      r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b);\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n#  define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde_float32\nsimde_mm_cvtss_f32 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cvtss_f32(a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      return vgetq_lane_f32(a_.neon_f32, 0);\n    #else\n      return a_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvtss_si32 (simde__m128 a) {\n  return simde_mm_cvt_ss2si(a);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_cvtss_si64 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if !defined(__PGI)\n      return _mm_cvtss_si64(a);\n    #else\n      return _mm_cvtss_si64x(a);\n    #endif\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0)));\n    #else\n      return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0]));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n#  define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtt_ps2pi (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtt_ps2pi(a);\n  #else\n    simde__m64_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)\n      r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        simde_float32 v = a_.f32[i];\n        #if !defined(SIMDE_FAST_CONVERSION_RANGE)\n          r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?\n            SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n        #else\n          r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);\n        #endif\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a))\n#  define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvtt_ss2si (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cvtt_ss2si(a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)\n      return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0));\n    #else\n      simde_float32 v = a_.f32[0];\n      #if !defined(SIMDE_FAST_CONVERSION_RANGE)\n        return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?\n          SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n      #else\n        return SIMDE_CONVERT_FTOI(int32_t, v);\n      #endif\n    #endif\n  #endif\n}\n#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a))\n#  define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_cvttss_si64 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER)\n    #if defined(__PGI)\n      return _mm_cvttss_si64x(a);\n    #else\n      return _mm_cvttss_si64(a);\n    #endif\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));\n    #else\n      return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n#  define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cmpord_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cmpord_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(simde_math_isnanf)\n      r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0);\n      SIMDE_VECTORIZE\n      for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i];\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_div_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x4_t recip0 = vrecpeq_f32(b_.neon_f32);\n      float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32));\n      r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 =  wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LASX_NATIVE)\n      r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 / b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] / b_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_div_ps(a, b) simde_mm_div_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_div_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_div_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32_t value =\n              vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0);\n      r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);\n    #else\n      r_.f32[0] = a_.f32[0] / b_.f32[0];\n      SIMDE_VECTORIZE\n      for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_div_ss(a, b) simde_mm_div_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint16_t\nsimde_mm_extract_pi16 (simde__m64 a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {\n  simde__m64_private a_ = simde__m64_to_private(a);\n  return a_.i16[imm8];\n}\n#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589)\n  #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8)\n#endif\n#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8))\n#  define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) {\n  simde__m64_private\n    a_ = simde__m64_to_private(a);\n\n  a_.i16[imm8] = i;\n\n  return simde__m64_from_private(a_);\n}\n#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589)\n  #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8)))\n#endif\n#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8))\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)\n#  define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {\n#if defined(SIMDE_X86_SSE_NATIVE)\n  return _mm_load_ps(mem_addr);\n#else\n  simde__m128_private r_;\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_f32 = vld1q_f32(mem_addr);\n  #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n    r_.altivec_f32 = vec_vsx_ld(0, mem_addr);\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    r_.altivec_f32 = vec_ld(0, mem_addr);\n  #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n    r_.lsx_i64 = __lsx_vld(mem_addr, 0);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_v128_load(mem_addr);\n  #else\n    simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_));\n  #endif\n\n  return simde__m128_from_private(r_);\n#endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_load1_ps (simde_float32 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_load_ps1(mem_addr);\n  #else\n    simde__m128_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vld1q_dup_f32(mem_addr);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load32_splat(mem_addr);\n    #else\n      r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr));\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr)\n#  define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_load_ss (simde_float32 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_load_ss(mem_addr);\n  #else\n    simde__m128_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load32_zero(mem_addr);\n    #else\n      r_.f32[0] = *mem_addr;\n      r_.i32[1] = 0;\n      r_.i32[2] = 0;\n      r_.i32[3] = 0;\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1);\n  #else\n    simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr);\n    r_.f32[0] = a_.f32[0];\n    r_.f32[1] = a_.f32[1];\n    r_.f32[2] = b_.f32[0];\n    r_.f32[3] = b_.f32[1];\n  #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #if HEDLEY_HAS_WARNING(\"-Wold-style-cast\")\n    #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr)))\n  #else\n    #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr))\n  #endif\n#endif\n\n/* The SSE documentation says that there are no alignment requirements\n   for mem_addr.  Unfortunately they used the __m64 type for the argument\n   which is supposed to be 8-byte aligned, so some compilers (like clang\n   with -Wcast-align) will generate a warning if you try to cast, say,\n   a simde_float32* to a simde__m64* for this function.\n\n   I think the choice of argument type is unfortunate, but I do think we\n   need to stick to it here.  If there is demand I can always add something\n   like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcombine_f32(vld1_f32(\n        HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0);\n    #else\n      simde__m64_private b_;\n      simde_memcpy(&b_, mem_addr, sizeof(b_));\n      r_.i32[0] = b_.i32[0];\n      r_.i32[1] = b_.i32[1];\n      r_.i32[2] = a_.i32[2];\n      r_.i32[3] = a_.i32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #if HEDLEY_HAS_WARNING(\"-Wold-style-cast\")\n    #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr)))\n  #else\n    #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr))\n  #endif\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_loadr_ps(mem_addr);\n  #else\n    simde__m128_private\n      r_,\n      v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr));\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vrev64q_f32(v_.neon_f32);\n      r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)\n      r_.altivec_f32 = vec_reve(v_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0);\n    #else\n      r_.f32[0] = v_.f32[3];\n      r_.f32[1] = v_.f32[2];\n      r_.f32[2] = v_.f32[1];\n      r_.f32[3] = v_.f32[0];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_loadu_ps(mem_addr);\n  #else\n    simde__m128_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load(mem_addr);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__)\n      r_.altivec_f32 = vec_vsx_ld(0, mem_addr);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vld(mem_addr, 0);\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr));\n  #else\n    simde__m64_private\n      a_ = simde__m64_to_private(a),\n      mask_ = simde__m64_to_private(mask);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++)\n      if (mask_.i8[i] < 0)\n        mem_addr[i] = a_.i8[i];\n  #endif\n}\n#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))\n#  define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_max_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_max_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b)\n#  define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_max_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_max_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS)\n      r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS)\n      r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128));\n    #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS)\n      r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS)\n      r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_max_ps(a, b) simde_mm_max_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_max_pu8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_max_pu8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b)\n#  define _m_pmaxub(a, b) simde_mm_max_pu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_max_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_max_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_max_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0);\n      r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);\n    #else\n      r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0];\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_max_ss(a, b) simde_mm_max_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_min_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_min_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b)\n#  define _m_pminsw(a, b) simde_mm_min_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_min_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_min_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      #if defined(SIMDE_FAST_NANS)\n        r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);\n      #else\n        r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32));\n      #endif\n    #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32);\n      r_.f32 =\n        HEDLEY_REINTERPRET_CAST(\n          __typeof__(r_.f32),\n          ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) &  m) |\n            (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m)\n          )\n        );\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_min_ps(a, b) simde_mm_min_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_min_pu8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_min_pu8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b)\n#  define _m_pminub(a, b) simde_mm_min_pu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_min_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_min_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_min_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0);\n      r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);\n    #else\n      r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0];\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_min_ss(a, b) simde_mm_min_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_movehl_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_movehl_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x2_t a32 = vget_high_f32(a_.neon_f32);\n      float32x2_t b32 = vget_high_f32(b_.neon_f32);\n      r_.neon_f32 = vcombine_f32(b32, a32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),\n          vec_mergel(b_.altivec_i64, a_.altivec_i64));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3);\n    #else\n      r_.f32[0] = b_.f32[2];\n      r_.f32[1] = b_.f32[3];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_movelh_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_movelh_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x2_t a10 = vget_low_f32(a_.neon_f32);\n      float32x2_t b10 = vget_low_f32(b_.neon_f32);\n      r_.neon_f32 = vcombine_f32(a10, b10);\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float),\n          vec_mergeh(a_.altivec_i64, b_.altivec_i64));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5);\n    #else\n      r_.f32[0] = a_.f32[0];\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = b_.f32[0];\n      r_.f32[3] = b_.f32[1];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_movemask_pi8 (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_movemask_pi8(a);\n  #else\n    simde__m64_private a_ = simde__m64_to_private(a);\n    int r = 0;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint8x8_t input = a_.neon_u8;\n      const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};\n      const uint8x8_t mask_and = vdup_n_u8(0x80);\n      const int8x8_t mask_shift = vld1_s8(xr);\n      const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift);\n      uint8x8_t lo = mask_result;\n      r = vaddv_u8(lo);\n    #else\n      const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]);\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < nmemb ; i++) {\n        r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i);\n      }\n    #endif\n\n    return r;\n  #endif\n}\n#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a)\n#  define _m_pmovmskb(a) simde_mm_movemask_pi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_movemask_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_movemask_ps(a);\n  #else\n    int r = 0;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      static const int32_t shift[4] = {0, 1, 2, 3};\n      uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31);\n      return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift))));\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      // Shift out everything but the sign bits with a 32-bit unsigned shift right.\n      uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31));\n      // Merge the two pairs together with a 64-bit unsigned shift right + add.\n      uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31));\n      // Extract the result.\n      return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2);\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932)\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx));\n      return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx);\n      return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64);\n      r = __lsx_vpickve2gr_wu(t64, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128));\n    #else\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) {\n        r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i;\n      }\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_movemask_ps(a) simde_mm_movemask_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_mul_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_mul_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 * b_.f32;\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] * b_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_mul_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_mul_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_mul_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.f32[0] = a_.f32[0] * b_.f32[0];\n    r_.f32[1] = a_.f32[1];\n    r_.f32[2] = a_.f32[2];\n    r_.f32[3] = a_.f32[3];\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_mulhi_pu16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16);\n      const uint32x4_t t2 = vshrq_n_u32(t1, 16);\n      const uint16x4_t t3 = vmovn_u32(t2);\n      r_.neon_u16 = t3;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16)));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b)\n#  define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)\n#endif\n\n#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION)\n  #define SIMDE_MM_HINT_NTA  HEDLEY_STATIC_CAST(enum _mm_hint, 0)\n  #define SIMDE_MM_HINT_T0   HEDLEY_STATIC_CAST(enum _mm_hint, 1)\n  #define SIMDE_MM_HINT_T1   HEDLEY_STATIC_CAST(enum _mm_hint, 2)\n  #define SIMDE_MM_HINT_T2   HEDLEY_STATIC_CAST(enum _mm_hint, 3)\n  #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4)\n  #define SIMDE_MM_HINT_ET0  HEDLEY_STATIC_CAST(enum _mm_hint, 5)\n  #define SIMDE_MM_HINT_ET1  HEDLEY_STATIC_CAST(enum _mm_hint, 6)\n  #define SIMDE_MM_HINT_ET2  HEDLEY_STATIC_CAST(enum _mm_hint, 7)\n#else\n  #define SIMDE_MM_HINT_NTA  0\n  #define SIMDE_MM_HINT_T0   1\n  #define SIMDE_MM_HINT_T1   2\n  #define SIMDE_MM_HINT_T2   3\n  #define SIMDE_MM_HINT_ENTA 4\n  #define SIMDE_MM_HINT_ET0  5\n  #define SIMDE_MM_HINT_ET1  6\n  #define SIMDE_MM_HINT_ET2  7\n#endif\n\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  HEDLEY_DIAGNOSTIC_PUSH\n  #if HEDLEY_HAS_WARNING(\"-Wreserved-id-macro\")\n    _Pragma(\"clang diagnostic ignored \\\"-Wreserved-id-macro\\\"\")\n  #endif\n  #undef  _MM_HINT_NTA\n  #define _MM_HINT_NTA  SIMDE_MM_HINT_NTA\n  #undef  _MM_HINT_T0\n  #define _MM_HINT_T0   SIMDE_MM_HINT_T0\n  #undef  _MM_HINT_T1\n  #define _MM_HINT_T1   SIMDE_MM_HINT_T1\n  #undef  _MM_HINT_T2\n  #define _MM_HINT_T2   SIMDE_MM_HINT_T2\n  #undef  _MM_HINT_ENTA\n  #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA\n  #undef  _MM_HINT_ET0\n  #define _MM_HINT_ET0  SIMDE_MM_HINT_ET0\n  #undef  _MM_HINT_ET1\n  #define _MM_HINT_ET1  SIMDE_MM_HINT_ET1\n  #undef  _MM_HINT_ET1\n  #define _MM_HINT_ET2  SIMDE_MM_HINT_ET2\n  HEDLEY_DIAGNOSTIC_POP\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_prefetch (const void* p, int i) {\n  #if \\\n      HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \\\n      HEDLEY_GCC_VERSION_CHECK(3,4,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(13,0,0)\n    switch(i) {\n      case SIMDE_MM_HINT_NTA:\n        __builtin_prefetch(p, 0, 0);\n        break;\n      case SIMDE_MM_HINT_T0:\n        __builtin_prefetch(p, 0, 3);\n        break;\n      case SIMDE_MM_HINT_T1:\n        __builtin_prefetch(p, 0, 2);\n        break;\n      case SIMDE_MM_HINT_T2:\n        __builtin_prefetch(p, 0, 1);\n        break;\n      case SIMDE_MM_HINT_ENTA:\n        __builtin_prefetch(p, 1, 0);\n        break;\n      case SIMDE_MM_HINT_ET0:\n        __builtin_prefetch(p, 1, 3);\n        break;\n      case SIMDE_MM_HINT_ET1:\n        __builtin_prefetch(p, 1, 2);\n        break;\n      case SIMDE_MM_HINT_ET2:\n        __builtin_prefetch(p, 0, 1);\n        break;\n    }\n  #elif defined(__ARM_ACLE)\n    #if (__ARM_ACLE >= 101)\n      switch(i) {\n        case SIMDE_MM_HINT_NTA:\n          __pldx(0, 0, 1, p);\n          break;\n        case SIMDE_MM_HINT_T0:\n          __pldx(0, 0, 0, p);\n          break;\n        case SIMDE_MM_HINT_T1:\n          __pldx(0, 1, 0, p);\n          break;\n        case SIMDE_MM_HINT_T2:\n          __pldx(0, 2, 0, p);\n          break;\n        case SIMDE_MM_HINT_ENTA:\n          __pldx(1, 0, 1, p);\n          break;\n        case SIMDE_MM_HINT_ET0:\n          __pldx(1, 0, 0, p);\n          break;\n        case SIMDE_MM_HINT_ET1:\n          __pldx(1, 1, 0, p);\n          break;\n        case SIMDE_MM_HINT_ET2:\n          __pldx(1, 2, 0, p);\n          break;\n      }\n    #else\n      (void) i;\n      __pld(p)\n    #endif\n  #elif HEDLEY_PGI_VERSION_CHECK(10,0,0)\n    (void) i;\n    #pragma mem prefetch p\n  #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0)\n    switch (i) {\n      case SIMDE_MM_HINT_NTA:\n        #pragma _CRI prefetch (nt) p\n        break;\n      case SIMDE_MM_HINT_T0:\n      case SIMDE_MM_HINT_T1:\n      case SIMDE_MM_HINT_T2:\n        #pragma _CRI prefetch p\n        break;\n      case SIMDE_MM_HINT_ENTA:\n        #pragma _CRI prefetch (write, nt) p\n        break;\n      case SIMDE_MM_HINT_ET0:\n      case SIMDE_MM_HINT_ET1:\n      case SIMDE_MM_HINT_ET2:\n        #pragma _CRI prefetch (write) p\n        break;\n    }\n  #elif HEDLEY_IBM_VERSION_CHECK(11,0,0)\n    switch(i) {\n      case SIMDE_MM_HINT_NTA:\n        __prefetch_by_load(p, 0, 0);\n        break;\n      case SIMDE_MM_HINT_T0:\n        __prefetch_by_load(p, 0, 3);\n        break;\n      case SIMDE_MM_HINT_T1:\n        __prefetch_by_load(p, 0, 2);\n        break;\n      case SIMDE_MM_HINT_T2:\n        __prefetch_by_load(p, 0, 1);\n        break;\n      case SIMDE_MM_HINT_ENTA:\n        __prefetch_by_load(p, 1, 0);\n        break;\n      case SIMDE_MM_HINT_ET0:\n        __prefetch_by_load(p, 1, 3);\n        break;\n      case SIMDE_MM_HINT_ET1:\n        __prefetch_by_load(p, 1, 2);\n        break;\n      case SIMDE_MM_HINT_ET2:\n        __prefetch_by_load(p, 0, 1);\n        break;\n    }\n  #elif HEDLEY_MSVC_VERSION\n    (void) i;\n    (void) p;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_NATIVE)\n  #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */\n    #define simde_mm_prefetch(p, i) \\\n      (__extension__({ \\\n        HEDLEY_DIAGNOSTIC_PUSH \\\n        HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \\\n        _mm_prefetch((p), (i)); \\\n        HEDLEY_DIAGNOSTIC_POP \\\n      }))\n  #else\n    #define simde_mm_prefetch(p, i) _mm_prefetch(p, i)\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n  #define _mm_prefetch(p, i) simde_mm_prefetch(p, i)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_negate_ps(simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vnegq_f32(a_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      r_.altivec_f32 = vec_neg(a_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f};\n      r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_NEGATE)\n      r_.f32 = -a_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = -a_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_rcp_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rcp_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x4_t recip = vrecpeq_f32(a_.neon_f32);\n\n      #if SIMDE_ACCURACY_PREFERENCE > 0\n        for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) {\n          recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32));\n        }\n      #endif\n\n      r_.neon_f32 = recip;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_re(a_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.f32 = 1.0f / a_.f32;\n    #elif defined(SIMDE_IEEE754_STORAGE)\n      /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        int32_t ix;\n        simde_float32 fx = a_.f32[i];\n        simde_memcpy(&ix, &fx, sizeof(ix));\n        int32_t x = INT32_C(0x7EF311C3) - ix;\n        simde_float32 temp;\n        simde_memcpy(&temp, &x, sizeof(temp));\n        r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = 1.0f / a_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_rcp_ps(a) simde_mm_rcp_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_rcp_ss (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rcp_ss(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_rcp_ps(a));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    r_.f32[0] = 1.0f / a_.f32[0];\n    r_.f32[1] = a_.f32[1];\n    r_.f32[2] = a_.f32[2];\n    r_.f32[3] = a_.f32[3];\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_rcp_ss(a) simde_mm_rcp_ss((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_rsqrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rsqrt_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vrsqrteq_f32(a_.neon_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_rsqrte(a_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128));\n    #elif defined(SIMDE_IEEE754_STORAGE)\n      /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf\n        Pages 100 - 103 */\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        #if SIMDE_ACCURACY_PREFERENCE <= 0\n          r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1);\n        #else\n          simde_float32 x = a_.f32[i];\n          simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;\n          int32_t ix;\n\n          simde_memcpy(&ix, &x, sizeof(ix));\n\n          #if SIMDE_ACCURACY_PREFERENCE == 1\n            ix = INT32_C(0x5F375A82) - (ix >> 1);\n          #else\n            ix = INT32_C(0x5F37599E) - (ix >> 1);\n          #endif\n\n          simde_memcpy(&x, &ix, sizeof(x));\n\n          #if SIMDE_ACCURACY_PREFERENCE >= 2\n            x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);\n          #endif\n          x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);\n\n          r_.f32[i] = x;\n        #endif\n      }\n    #elif defined(simde_math_sqrtf)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_rsqrt_ss (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rsqrt_ss(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0);\n  #elif defined(SIMDE_IEEE754_STORAGE)\n    {\n      #if SIMDE_ACCURACY_PREFERENCE <= 0\n        r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1);\n      #else\n        simde_float32 x = a_.f32[0];\n        simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x;\n        int32_t ix;\n\n        simde_memcpy(&ix, &x, sizeof(ix));\n\n        #if SIMDE_ACCURACY_PREFERENCE == 1\n          ix = INT32_C(0x5F375A82) - (ix >> 1);\n        #else\n          ix = INT32_C(0x5F37599E) - (ix >> 1);\n        #endif\n\n        simde_memcpy(&x, &ix, sizeof(x));\n\n        #if SIMDE_ACCURACY_PREFERENCE >= 2\n          x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);\n        #endif\n        x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x);\n\n        r_.f32[0] = x;\n      #endif\n    }\n    r_.f32[1] = a_.f32[1];\n    r_.f32[2] = a_.f32[2];\n    r_.f32[3] = a_.f32[3];\n  #elif defined(simde_math_sqrtf)\n    r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]);\n    r_.f32[1] = a_.f32[1];\n    r_.f32[2] = a_.f32[2];\n    r_.f32[3] = a_.f32[3];\n  #else\n    HEDLEY_UNREACHABLE();\n  #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sad_pu8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sad_pu8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8))));\n      r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0);\n    #else\n      uint16_t sum = 0;\n\n      SIMDE_VECTORIZE_REDUCTION(+:sum)\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i]));\n      }\n\n      r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum);\n      r_.i16[1] = 0;\n      r_.i16[2] = 0;\n      r_.i16[3] = 0;\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b)\n#  define _m_psadbw(a, b) simde_mm_sad_pu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_set_ss (simde_float32 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_set_ss(a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0);\n  #else\n    return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_set_ss(a) simde_mm_set_ss(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_setr_ps(e3, e2, e1, e0);\n  #else\n    return simde_mm_set_ps(e0, e1, e2, e3);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_setzero_ps (void) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_setzero_ps();\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return vdupq_n_f32(SIMDE_FLOAT32_C(0.0));\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    return vec_splats(SIMDE_FLOAT32_C(0.0));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f);\n  #else\n    simde__m128 r;\n    simde_memset(&r, 0, sizeof(r));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_setzero_ps() simde_mm_setzero_ps()\n#endif\n\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_undefined_ps (void) {\n  simde__m128_private r_;\n\n  #if defined(SIMDE_HAVE_UNDEFINED128)\n    r_.n = _mm_undefined_ps();\n  #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n    r_ = simde__m128_to_private(simde_mm_setzero_ps());\n  #endif\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_undefined_ps() simde_mm_undefined_ps()\n#endif\n\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\nHEDLEY_DIAGNOSTIC_POP\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_setone_ps (void) {\n  simde__m128 t = simde_mm_setzero_ps();\n  return simde_mm_cmpeq_ps(t, t);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_sfence (void) {\n    /* TODO: Use Hedley. */\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_sfence();\n  #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))\n    __atomic_thread_fence(__ATOMIC_SEQ_CST);\n  #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)\n    #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)\n      __atomic_thread_fence(__ATOMIC_SEQ_CST);\n    #else\n      atomic_thread_fence(memory_order_seq_cst);\n    #endif\n  #elif defined(_MSC_VER)\n    MemoryBarrier();\n  #elif HEDLEY_HAS_EXTENSION(c_atomic)\n    __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);\n  #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))\n    __sync_synchronize();\n  #elif defined(_OPENMP)\n    #pragma omp critical(simde_mm_sfence_)\n    { }\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_sfence() simde_mm_sfence()\n#endif\n\n#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)\n#endif\n\n#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n#  define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8)\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n#  define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \\\n      const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \\\n      simde__m64_from_private((simde__m64_private) { .i16 = \\\n        SIMDE_SHUFFLE_VECTOR_(16, 8, \\\n          (simde_tmp_a_).i16, \\\n          (simde_tmp_a_).i16, \\\n          (((imm8)     ) & 3), \\\n          (((imm8) >> 2) & 3), \\\n          (((imm8) >> 4) & 3), \\\n          (((imm8) >> 6) & 3)) }); }))\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_shuffle_pi16 (simde__m64 a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m64_private r_;\n  simde__m64_private a_ = simde__m64_to_private(a);\n\n  for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) {\n    r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3];\n  }\n\nHEDLEY_DIAGNOSTIC_PUSH\n#if HEDLEY_HAS_WARNING(\"-Wconditional-uninitialized\")\n#  pragma clang diagnostic ignored \"-Wconditional-uninitialized\"\n#endif\n  return simde__m64_from_private(r_);\nHEDLEY_DIAGNOSTIC_POP\n}\n#endif\n#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n#  define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8)\n#else\n#  define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8)\n#  define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  r_.f32[0] = a_.f32[(imm8 >> 0) & 3];\n  r_.f32[1] = a_.f32[(imm8 >> 2) & 3];\n  r_.f32[2] = b_.f32[(imm8 >> 4) & 3];\n  r_.f32[3] = b_.f32[(imm8 >> 6) & 3];\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI)\n#  define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8)\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \\\n    simde__m128_from_private((simde__m128_private) { .wasm_v128 = \\\n      wasm_i32x4_shuffle( \\\n        simde__m128_to_private(a).wasm_v128, \\\n        simde__m128_to_private(b).wasm_v128, \\\n        (((imm8)     ) & 3), \\\n        (((imm8) >> 2) & 3), \\\n        (((imm8) >> 4) & 3) + 4, \\\n        (((imm8) >> 6) & 3) + 4) }); }))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_)\n  #define simde_mm_shuffle_ps(a, b, imm8) \\\n    (__extension__({ \\\n      float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \\\n      float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \\\n      float32x4_t simde_mm_shuffle_ps_r_; \\\n      \\\n      simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \\\n      simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \\\n      simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \\\n                               vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \\\n    }))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \\\n      simde__m128_from_private((simde__m128_private) { .f32 = \\\n        SIMDE_SHUFFLE_VECTOR_(32, 16, \\\n          simde__m128_to_private(a).f32, \\\n          simde__m128_to_private(b).f32, \\\n          (((imm8)     ) & 3), \\\n          (((imm8) >> 2) & 3), \\\n          (((imm8) >> 4) & 3) + 4, \\\n          (((imm8) >> 6) & 3) + 4) }); }))\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sqrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sqrt_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vsqrtq_f32(a_.neon_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x4_t est = vrsqrteq_f32(a_.neon_f32);\n      for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) {\n        est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est);\n      }\n      r_.neon_f32 = vmulq_f32(a_.neon_f32, est);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)\n      r_.altivec_f32 = vec_sqrt(a_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32);\n    #elif defined(simde_math_sqrt)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) {\n        r_.f32[i] = simde_math_sqrtf(a_.f32[i]);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sqrt_ss (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sqrt_ss(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_sqrt_ps(a));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32_t value =\n            vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0);\n      r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0);\n    #elif defined(simde_math_sqrtf)\n      r_.f32[0] = simde_math_sqrtf(a_.f32[0]);\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[3];\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_store_ps(mem_addr, a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1q_f32(mem_addr, a_.neon_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      vec_st(a_.altivec_f32, 0, mem_addr);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      wasm_v128_store(mem_addr, a_.wasm_v128);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      __lsx_vst(a_.lsx_f32, mem_addr, 0);\n    #else\n      simde_memcpy(mem_addr, &a_, sizeof(a));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {\n  simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128);\n\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_store_ps1(mem_addr_, a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      simde__m128_private tmp_;\n      tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);\n      simde_mm_store_ps(mem_addr_, tmp_.f32);\n    #else\n      SIMDE_VECTORIZE_ALIGNED(mem_addr_:16)\n      for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {\n        mem_addr_[i] = a_.f32[0];\n      }\n    #endif\n  #endif\n}\n#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a)\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#  define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_store_ss(mem_addr, a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1q_lane_f32(mem_addr, a_.neon_f32, 0);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0);\n    #else\n      *mem_addr = a_.f32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1);\n    #else\n      simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1]));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    simde__m128_private a_ = simde__m128_to_private(a);\n    wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0);\n  #else\n    simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      dest_->neon_f32 = vget_low_f32(a_.neon_f32);\n    #else\n      dest_->f32[0] = a_.f32[0];\n      dest_->f32[1] = a_.f32[1];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_storer_ps(mem_addr, a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      vec_st(vec_reve(a_.altivec_f32), 0, mem_addr);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x4_t tmp = vrev64q_f32(a_.neon_f32);\n      vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2));\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0);\n      simde_mm_store_ps(mem_addr, simde__m128_from_private(a_));\n    #else\n      SIMDE_VECTORIZE_ALIGNED(mem_addr:16)\n      for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {\n        mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i];\n      }\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_storeu_ps(mem_addr, a);\n  #else\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1q_f32(mem_addr, a_.neon_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      vec_vsx_st(a_.altivec_f32, 0, mem_addr);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      __lsx_vst(a_.lsx_f32, mem_addr, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      wasm_v128_store(mem_addr, a_.wasm_v128);\n    #else\n      simde_memcpy(mem_addr, &a_, sizeof(a_));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sub_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sub_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f32 = a_.f32 - b_.f32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i] - b_.f32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sub_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sub_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_sub_ps(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    r_.f32[0] = a_.f32[0] - b_.f32[0];\n    r_.f32[1] = a_.f32[1];\n    r_.f32[2] = a_.f32[2];\n    r_.f32[3] = a_.f32[3];\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_ucomieq_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));\n      uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);\n      r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f32[0] == b_.f32[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f32[0] == b_.f32[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomige_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_ucomige_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);\n      uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);\n      r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f32[0] >= b_.f32[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f32[0] >= b_.f32[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_ucomigt_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);\n      uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);\n      r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f32[0] > b_.f32[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f32[0] > b_.f32[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomile_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_ucomile_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));\n      uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);\n      r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f32[0] <= b_.f32[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f32[0] <= b_.f32[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_ucomilt_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));\n      uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);\n      r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f32[0] < b_.f32[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f32[0] < b_.f32[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_ucomineq_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);\n      uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);\n      uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);\n      uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));\n      r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f32[0] != b_.f32[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f32[0] != b_.f32[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b))\n#endif\n\n#if defined(SIMDE_X86_SSE_NATIVE)\n#  if defined(__has_builtin)\n#    if __has_builtin(__builtin_ia32_undef128)\n#      define SIMDE_HAVE_UNDEFINED128\n#    endif\n#  elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER)\n#    define SIMDE_HAVE_UNDEFINED128\n#  endif\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_unpackhi_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x2_t a1 = vget_high_f32(a_.neon_f32);\n      float32x2_t b1 = vget_high_f32(b_.neon_f32);\n      float32x2x2_t result = vzip_f32(a1, b1);\n      r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7);\n    #else\n      r_.f32[0] = a_.f32[2];\n      r_.f32[1] = b_.f32[2];\n      r_.f32[2] = a_.f32[3];\n      r_.f32[3] = b_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_unpacklo_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32);\n    #elif defined(SIMDE_LOONGARCH_LSX_NATIVE)\n      r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      float32x2_t a1 = vget_low_f32(a_.neon_f32);\n      float32x2_t b1 = vget_low_f32(b_.neon_f32);\n      float32x2x2_t result = vzip_f32(a1, b1);\n      r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5);\n    #else\n      r_.f32[0] = a_.f32[0];\n      r_.f32[1] = b_.f32[0];\n      r_.f32[2] = a_.f32[1];\n      r_.f32[3] = b_.f32[1];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \\\n      defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \\\n      defined(SIMDE_VECTOR_SUBSCRIPT))\n    __builtin_nontemporal_store(a, mem_addr);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    simde__m64_private a_ = simde__m64_to_private(a);\n    vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64);\n  #else\n    simde__m64_private*\n      dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr),\n      a_ = simde__m64_to_private(a);\n\n    dest->i64[0] = a_.i64[0];\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {\n  #if defined(SIMDE_X86_SSE_NATIVE)\n    _mm_stream_ps(mem_addr, a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \\\n      defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \\\n      defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \\\n      defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE))\n    __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr));\n  #else\n    simde_mm_store_ps(mem_addr, a);\n  #endif\n}\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))\n#endif\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n  #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n    do { \\\n          float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \\\n          float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \\\n          row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \\\n                              vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \\\n          row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \\\n                              vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \\\n          row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \\\n                              vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \\\n          row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \\\n                              vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \\\n      } while (0)\n#else\n  #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n    do { \\\n      simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \\\n      SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \\\n      SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \\\n      SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \\\n      SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \\\n      row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \\\n      row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \\\n      row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \\\n      row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \\\n    } while (0)\n#endif\n#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)\n#  define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SSE_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/sse2.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n *   2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>\n *   2015      Brandon Rowlett <browlett@nvidia.com>\n *   2015      Ken Fast <kfast@gdeb.com>\n *   2017      Hasindu Gamaarachchi <hasindu@unsw.edu.au>\n *   2018      Jeff Daily <jeff.daily@amd.com>\n */\n\n#if !defined(SIMDE_X86_SSE2_H)\n#define SIMDE_X86_SSE2_H\n\n#include \"sse.h\"\n#include \"../simde-f16.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_16 int8_t          i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int16_t        i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int32_t        i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int64_t        i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint8_t         u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint16_t       u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint32_t       u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint64_t       u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_16 simde_int128  i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    #endif\n    #if defined(SIMDE_FLOAT16_VECTOR)\n    SIMDE_ALIGN_TO_16 simde_float16  f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    #else\n    SIMDE_ALIGN_TO_16 simde_float16  f16[8];\n    #endif\n    SIMDE_ALIGN_TO_16 simde_float32  f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 simde_float64  f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n\n    SIMDE_ALIGN_TO_16 int_fast32_t  i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_16 int8_t         i8[16];\n    SIMDE_ALIGN_TO_16 int16_t        i16[8];\n    SIMDE_ALIGN_TO_16 int32_t        i32[4];\n    SIMDE_ALIGN_TO_16 int64_t        i64[2];\n    SIMDE_ALIGN_TO_16 uint8_t        u8[16];\n    SIMDE_ALIGN_TO_16 uint16_t       u16[8];\n    SIMDE_ALIGN_TO_16 uint32_t       u32[4];\n    SIMDE_ALIGN_TO_16 uint64_t       u64[2];\n    #if defined(SIMDE_HAVE_INT128_)\n    SIMDE_ALIGN_TO_16 simde_int128  i128[1];\n    SIMDE_ALIGN_TO_16 simde_uint128 u128[1];\n    #endif\n    SIMDE_ALIGN_TO_16 simde_float16  f16[8];\n    SIMDE_ALIGN_TO_16 simde_float32  f32[4];\n    SIMDE_ALIGN_TO_16 simde_float64  f64[2];\n\n    SIMDE_ALIGN_TO_16 int_fast32_t  i32f[16 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];\n  #endif\n\n    SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];\n    SIMDE_ALIGN_TO_16 simde__m64         m64[2];\n\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    SIMDE_ALIGN_TO_16 __m128i        n;\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    SIMDE_ALIGN_TO_16 int8x16_t      neon_i8;\n    SIMDE_ALIGN_TO_16 int16x8_t      neon_i16;\n    SIMDE_ALIGN_TO_16 int32x4_t      neon_i32;\n    SIMDE_ALIGN_TO_16 int64x2_t      neon_i64;\n    SIMDE_ALIGN_TO_16 uint8x16_t     neon_u8;\n    SIMDE_ALIGN_TO_16 uint16x8_t     neon_u16;\n    SIMDE_ALIGN_TO_16 uint32x4_t     neon_u32;\n    SIMDE_ALIGN_TO_16 uint64x2_t     neon_u64;\n    #if defined(__ARM_FP16_FORMAT_IEEE)\n    SIMDE_ALIGN_TO_16 float16x8_t    neon_f16;\n    #endif\n    SIMDE_ALIGN_TO_16 float32x4_t    neon_f32;\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    SIMDE_ALIGN_TO_16 float64x2_t    neon_f64;\n    #endif\n  #elif defined(SIMDE_MIPS_MSA_NATIVE)\n    v16i8 msa_i8;\n    v8i16 msa_i16;\n    v4i32 msa_i32;\n    v2i64 msa_i64;\n    v16u8 msa_u8;\n    v8u16 msa_u16;\n    v4u32 msa_u32;\n    v2u64 msa_u64;\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    SIMDE_ALIGN_TO_16 v128_t         wasm_v128;\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)          altivec_i8;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)         altivec_i16;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32;\n    #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__)  altivec_i32f;\n    #else\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32f;\n    #endif\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)        altivec_u8;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)       altivec_u16;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32;\n    #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;\n    #else\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32f;\n    #endif\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)                altivec_f32;\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64;\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64;\n    #endif\n  #endif\n} simde__m128i_private;\n\ntypedef union {\n  #if defined(SIMDE_VECTOR_SUBSCRIPT)\n    SIMDE_ALIGN_TO_16 int8_t          i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int16_t        i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int32_t        i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int64_t        i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint8_t         u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint16_t       u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint32_t       u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint64_t       u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 simde_float32  f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 simde_float64  f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 int_fast32_t  i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n    SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n  #else\n    SIMDE_ALIGN_TO_16 int8_t         i8[16];\n    SIMDE_ALIGN_TO_16 int16_t        i16[8];\n    SIMDE_ALIGN_TO_16 int32_t        i32[4];\n    SIMDE_ALIGN_TO_16 int64_t        i64[2];\n    SIMDE_ALIGN_TO_16 uint8_t        u8[16];\n    SIMDE_ALIGN_TO_16 uint16_t       u16[8];\n    SIMDE_ALIGN_TO_16 uint32_t       u32[4];\n    SIMDE_ALIGN_TO_16 uint64_t       u64[2];\n    SIMDE_ALIGN_TO_16 simde_float32  f32[4];\n    SIMDE_ALIGN_TO_16 simde_float64  f64[2];\n    SIMDE_ALIGN_TO_16 int_fast32_t  i32f[16 / sizeof(int_fast32_t)];\n    SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];\n  #endif\n\n    SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];\n    SIMDE_ALIGN_TO_16 simde__m64         m64[2];\n\n  #if defined(SIMDE_X86_SSE2_NATIVE) || defined(SIMDE_X86_SVML_NATIVE)\n    SIMDE_ALIGN_TO_16 __m128d        n;\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    SIMDE_ALIGN_TO_16 int8x16_t      neon_i8;\n    SIMDE_ALIGN_TO_16 int16x8_t      neon_i16;\n    SIMDE_ALIGN_TO_16 int32x4_t      neon_i32;\n    SIMDE_ALIGN_TO_16 int64x2_t      neon_i64;\n    SIMDE_ALIGN_TO_16 uint8x16_t     neon_u8;\n    SIMDE_ALIGN_TO_16 uint16x8_t     neon_u16;\n    SIMDE_ALIGN_TO_16 uint32x4_t     neon_u32;\n    SIMDE_ALIGN_TO_16 uint64x2_t     neon_u64;\n    SIMDE_ALIGN_TO_16 float32x4_t    neon_f32;\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    SIMDE_ALIGN_TO_16 float64x2_t    neon_f64;\n    #endif\n  #elif defined(SIMDE_MIPS_MSA_NATIVE)\n    v16i8 msa_i8;\n    v8i16 msa_i16;\n    v4i32 msa_i32;\n    v2i64 msa_i64;\n    v16u8 msa_u8;\n    v8u16 msa_u16;\n    v4u32 msa_u32;\n    v2u64 msa_u64;\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    SIMDE_ALIGN_TO_16 v128_t         wasm_v128;\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)          altivec_i8;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)         altivec_i16;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32;\n    #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__)  altivec_i32f;\n    #else\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32f;\n    #endif\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)        altivec_u8;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)       altivec_u16;\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32;\n    #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;\n    #else\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32f;\n    #endif\n    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)                altivec_f32;\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64;\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;\n      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64;\n    #endif\n  #endif\n} simde__m128d_private;\n\n#if defined(SIMDE_X86_SSE2_NATIVE) || defined(SIMDE_X86_SVML_NATIVE)\n  typedef __m128i simde__m128i;\n  typedef __m128d simde__m128d;\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n   typedef int64x2_t simde__m128i;\n#  if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n     typedef float64x2_t simde__m128d;\n#  elif defined(SIMDE_VECTOR_SUBSCRIPT)\n     typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n#  else\n     typedef simde__m128d_private simde__m128d;\n#  endif\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n   typedef v128_t simde__m128i;\n   typedef v128_t simde__m128d;\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i;\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n     typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d;\n  #else\n     typedef simde__m128d_private simde__m128d;\n  #endif\n#elif defined(SIMDE_VECTOR_SUBSCRIPT)\n  typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n  typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;\n#else\n  typedef simde__m128i_private simde__m128i;\n  typedef simde__m128d_private simde__m128d;\n#endif\n\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  typedef simde__m128i __m128i;\n  typedef simde__m128d __m128d;\n#endif\n\nHEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), \"simde__m128i size incorrect\");\nHEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), \"simde__m128i_private size incorrect\");\nHEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), \"simde__m128d size incorrect\");\nHEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), \"simde__m128d_private size incorrect\");\n#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, \"simde__m128i is not 16-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, \"simde__m128i_private is not 16-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, \"simde__m128d is not 16-byte aligned\");\nHEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, \"simde__m128d_private is not 16-byte aligned\");\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde__m128i_from_private(simde__m128i_private v) {\n  simde__m128i r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i_private\nsimde__m128i_to_private(simde__m128i v) {\n  simde__m128i_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde__m128d_from_private(simde__m128d_private v) {\n  simde__m128d r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d_private\nsimde__m128d_to_private(simde__m128d v) {\n  simde__m128d_private r;\n  simde_memcpy(&r, &v, sizeof(r));\n  return r;\n}\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32)\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64)\n  #endif\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)\n  #endif\n#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */\n\n#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32)\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64)\n  #endif\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)\n    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)\n    #if defined(SIMDE_BUG_GCC_95782)\n      SIMDE_FUNCTION_ATTRIBUTES\n      SIMDE_POWER_ALTIVEC_VECTOR(double)\n      simde__m128d_to_altivec_f64(simde__m128d value) {\n        simde__m128d_private r_ = simde__m128d_to_private(value);\n        return r_.altivec_f64;\n      }\n\n      SIMDE_FUNCTION_ATTRIBUTES\n      simde__m128d\n      simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) {\n        simde__m128d_private r_;\n        r_.altivec_f64 = value;\n        return simde__m128d_from_private(r_);\n      }\n    #else\n      SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64)\n    #endif\n  #endif\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128);\n  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128);\n#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_set_pd (simde_float64 e1, simde_float64 e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_pd(e1, e0);\n  #else\n    simde__m128d_private r_;\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_make(e0, e1);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 };\n      r_.neon_f64 = vld1q_f64(data);\n    #else\n      r_.f64[0] = e0;\n      r_.f64[1] = e1;\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_set1_pd (simde_float64 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set1_pd(a);\n  #else\n    simde__m128d_private r_;\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_splat(a);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vdupq_n_f64(a);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.f64[i] = a;\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#define simde_mm_set_pd1(a) simde_mm_set1_pd(a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set1_pd(a) simde_mm_set1_pd(a)\n  #define _mm_set_pd1(a) simde_mm_set1_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_abs_pd(simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    simde_float64 mask_;\n    uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);\n    simde_memcpy(&mask_, &u64_, sizeof(u64_));\n    return _mm_and_pd(_mm_set1_pd(mask_), a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vabsq_f64(a_.neon_f64);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_abs(a_.altivec_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_fabs(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_not_pd(simde__m128d a) {\n  #if defined(SIMDE_X86_AVX512VL_NATIVE)\n    __m128i ai = _mm_castpd_si128(a);\n    return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmvnq_s32(a_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = ~(a_.i32f[i]);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) {\n  /* This function is for when you want to blend two elements together\n   * according to a mask.  It is similar to _mm_blendv_pd, except that\n   * it is undefined whether the blend is based on the highest bit in\n   * each lane (like blendv) or just bitwise operations.  This allows\n   * us to implement the function efficiently everywhere.\n   *\n   * Basically, you promise that all the lanes in mask are either 0 or\n   * ~0. */\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_blendv_pd(a, b, mask);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b),\n      mask_ = simde__m128d_to_private(mask);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_add_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_add_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 + b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] + b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_add_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_add_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 + b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] + b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_add_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_add_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 + b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] + b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_add_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_add_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 + b_.i64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] + b_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_add_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_add_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 + b_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] + b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_pd(a, b) simde_mm_add_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_move_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_move_sd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      #if defined(HEDLEY_IBM_VERSION)\n        r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1);\n      #else\n        r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1);\n      #endif\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1);\n    #else\n      r_.f64[0] = b_.f64[0];\n      r_.f64[1] = a_.f64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_move_sd(a, b) simde_mm_move_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_broadcastlow_pd(simde__m128d a) {\n  /* This function broadcasts the first element in the input vector to\n   * all lanes.  It is used to avoid generating spurious exceptions in\n   * *_sd functions since there may be garbage in the upper lanes. */\n\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f64 = vec_splat(a_.altivec_f64, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[0];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_add_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_add_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_add_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.f64[0] = a_.f64[0] + b_.f64[0];\n    r_.f64[1] = a_.f64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_sd(a, b) simde_mm_add_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_add_si64 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_add_si64(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64);\n    #else\n      r_.i64[0] = a_.i64[0] + b_.i64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_add_si64(a, b) simde_mm_add_si64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_adds_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_adds_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_adds_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_adds_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_adds_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_adds_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_adds_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_adds_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_and_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_and_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_and_pd(a, b) simde_mm_and_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_and_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_and_si128(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_and_si128(a, b) simde_mm_and_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_andnot_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_andnot_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = ~a_.u64[i] & b_.u64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_andnot_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_andnot_si128(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f & b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_xor_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_xor_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_avg_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_avg_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)\n      uint16_t wa SIMDE_VECTOR(32);\n      uint16_t wb SIMDE_VECTOR(32);\n      uint16_t wr SIMDE_VECTOR(32);\n      SIMDE_CONVERT_VECTOR_(wa, a_.u8);\n      SIMDE_CONVERT_VECTOR_(wb, b_.u8);\n      wr = (wa + wb + 1) >> 1;\n      SIMDE_CONVERT_VECTOR_(r_.u8, wr);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_avg_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_avg_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)\n      uint32_t wa SIMDE_VECTOR(32);\n      uint32_t wb SIMDE_VECTOR(32);\n      uint32_t wr SIMDE_VECTOR(32);\n      SIMDE_CONVERT_VECTOR_(wa, a_.u16);\n      SIMDE_CONVERT_VECTOR_(wb, b_.u16);\n      wr = (wa + wb + 1) >> 1;\n      SIMDE_CONVERT_VECTOR_(r_.u16, wr);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_setzero_si128 (void) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_setzero_si128();\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vdupq_n_s32(0);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT)\n      r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 };\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = 0;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setzero_si128() (simde_mm_setzero_si128())\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_bslli_si128 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  if (HEDLEY_UNLIKELY((imm8 & ~15))) {\n    return simde_mm_setzero_si128();\n  }\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)\n    r_.altivec_i8 =\n      #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n        vec_slo\n      #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */\n        vec_sro\n      #endif\n        (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));\n  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3)));\n  #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n    r_.u128[0] = a_.u128[0] << (imm8 * 8);\n  #else\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n    for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n      r_.i8[i] = a_.i8[i - imm8];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n  #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)\n  #define simde_mm_bslli_si128(a, imm8) \\\n  simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8)))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_bslli_si128(a, imm8) __extension__ ({        \\\n    simde__m128i_from_wasm_v128(                                \\\n      wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)),          \\\n                         simde__m128i_to_wasm_v128((a)),        \\\n                         ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \\\n                         ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); })\n#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \\\n    const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n    const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \\\n    simde__m128i_private simde_tmp_r_; \\\n    if (HEDLEY_UNLIKELY(imm8 > 15)) { \\\n      simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \\\n    } else { \\\n      simde_tmp_r_.i8 = \\\n        SIMDE_SHUFFLE_VECTOR_(8, 16, \\\n          simde_tmp_z_.i8, \\\n          (simde_tmp_a_).i8, \\\n          HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \\\n          HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \\\n    } \\\n    simde__m128i_from_private(simde_tmp_r_); }))\n#endif\n#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)\n  #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_bsrli_si128 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  if (HEDLEY_UNLIKELY((imm8 & ~15))) {\n    return simde_mm_setzero_si128();\n  }\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)\n    r_.altivec_i8 =\n    #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      vec_sro\n    #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */\n      vec_slo\n    #endif\n        (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));\n  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n    r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3)));\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n      const int e = HEDLEY_STATIC_CAST(int, i) + imm8;\n      r_.i8[i] = (e < 16) ? a_.i8[e] : 0;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n  #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)\n  #define simde_mm_bsrli_si128(a, imm8) \\\n  simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \\\n    const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n    const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \\\n    simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \\\n    if (HEDLEY_UNLIKELY(imm8 > 15)) { \\\n      simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \\\n    } else { \\\n      simde_tmp_r_.wasm_v128 = \\\n      wasm_i8x16_shuffle( \\\n        simde_tmp_z_.wasm_v128, \\\n        simde_tmp_a_.wasm_v128, \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \\\n    } \\\n    simde__m128i_from_private(simde_tmp_r_); }))\n#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n  #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \\\n    const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n    const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \\\n    simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \\\n    if (HEDLEY_UNLIKELY(imm8 > 15)) { \\\n      simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \\\n    } else { \\\n      simde_tmp_r_.i8 = \\\n      SIMDE_SHUFFLE_VECTOR_(8, 16, \\\n        simde_tmp_z_.i8, \\\n        (simde_tmp_a_).i8, \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \\\n        HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \\\n    } \\\n    simde__m128i_from_private(simde_tmp_r_); }))\n#endif\n#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))\n  #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_clflush (void const* p) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_clflush(p);\n  #else\n    (void) p;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_clflush(p) simde_mm_clflush(p)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comieq_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_comieq_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f64[0] == b_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comige_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_comige_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f64[0] >= b_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comigt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_comigt_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f64[0] > b_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comile_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_comile_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f64[0] <= b_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comilt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_comilt_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f64[0] < b_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_comineq_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_comineq_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #else\n      return a_.f64[0] != b_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) {\n  simde__m128d_private\n    r_,\n    dest_ = simde__m128d_to_private(dest),\n    src_ = simde__m128d_to_private(src);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0)));\n    #else\n      simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0);\n      uint64_t u64_nz;\n      simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz));\n      uint64x2_t sign_pos = vdupq_n_u64(u64_nz);\n    #endif\n    r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64);\n  #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n    #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS)\n      r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64);\n    #else\n      r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64);\n    #endif\n  #elif defined(simde_math_copysign)\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]);\n    }\n  #else\n    simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0));\n    return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest));\n  #endif\n\n  return simde__m128d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) {\n  return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_castpd_ps (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castpd_ps(a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vreinterpretq_f32_f64(a);\n  #else\n    simde__m128 r;\n    simde_memcpy(&r, &a, sizeof(a));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_castpd_ps(a) simde_mm_castpd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_castpd_si128 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castpd_si128(a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vreinterpretq_s64_f64(a);\n  #else\n    simde__m128i r;\n    simde_memcpy(&r, &a, sizeof(a));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_castpd_si128(a) simde_mm_castpd_si128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_castps_pd (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castps_pd(a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vreinterpretq_f64_f32(a);\n  #else\n    simde__m128d r;\n    simde_memcpy(&r, &a, sizeof(a));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_castps_pd(a) simde_mm_castps_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_castps_si128 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castps_si128(a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32);\n  #else\n    simde__m128i r;\n    simde_memcpy(&r, &a, sizeof(a));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_castps_si128(a) simde_mm_castps_si128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_castsi128_pd (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castsi128_pd(a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vreinterpretq_f64_s64(a);\n  #else\n    simde__m128d r;\n    simde_memcpy(&r, &a, sizeof(a));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_castsi128_ps (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_castsi128_ps(a);\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32);\n  #else\n    simde__m128 r;\n    simde_memcpy(&r, &a, sizeof(a));\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpeq_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpeq_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = (a_.i16 == b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpeq_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpeq_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64));\n    #elif defined(SIMDE_MIPS_MSA_NATIVE)\n      r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpeq_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0;\n    r_.u64[1] = a_.u64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpneq_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpneq_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);\n    r_.u64[1] = a_.u64[1];\n\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmplt_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmplt_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmplt_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmplt_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmplt_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmplt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmplt_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);\n    r_.u64[1] = a_.u64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmple_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmple_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmple_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmple_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);\n    r_.u64[1] = a_.u64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpgt_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpgt_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpgt_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpgt_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n    return _mm_cmpgt_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);\n    r_.u64[1] = a_.u64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpge_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpge_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpge_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n    return _mm_cmpge_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0);\n    r_.u64[1] = a_.u64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpngt_pd(a, b);\n  #else\n    return simde_mm_cmple_pd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n    return _mm_cmpngt_sd(a, b);\n  #else\n    return simde_mm_cmple_sd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpnge_pd(a, b);\n  #else\n    return simde_mm_cmplt_pd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n    return _mm_cmpnge_sd(a, b);\n  #else\n    return simde_mm_cmplt_sd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpnlt_pd(a, b);\n  #else\n    return simde_mm_cmpge_pd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpnlt_sd(a, b);\n  #else\n    return simde_mm_cmpge_sd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpnle_pd(a, b);\n  #else\n    return simde_mm_cmpgt_pd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpnle_sd(a, b);\n  #else\n    return simde_mm_cmpgt_sd(a, b);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpord_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpord_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      /* Note: NEON does not have ordered compare builtin\n        Need to compare a eq a and b eq b to check for NaN\n        Do AND of results to get final */\n      uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      r_.neon_u64 = vandq_u64(ceqaa, ceqbb);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128),\n                                   wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128));\n    #elif defined(simde_math_isnan)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde_float64\nsimde_mm_cvtsd_f64 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n    return _mm_cvtsd_f64(a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0));\n    #else\n      return a_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpord_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpord_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(simde_math_isnan)\n      r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0);\n      r_.u64[1] = a_.u64[1];\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpunord_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb))));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128),\n                                  wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128));\n    #elif defined(simde_math_isnan)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cmpunord_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(simde_math_isnan)\n      r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0);\n      r_.u64[1] = a_.u64[1];\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cvtepi32_pd (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtepi32_pd(a);\n  #else\n    simde__m128d_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128);\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (simde_float64) a_.i32[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtepi32_ps (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtepi32_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      #if HEDLEY_HAS_WARNING(\"-Wc11-extensions\")\n        // #pragma clang diagnostic ignored \"-Wc11-extensions\"\n      #endif\n      r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0);\n      HEDLEY_DIAGNOSTIC_POP\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = (simde_float32) a_.i32[i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvtpd_pi32 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpd_pi32(a);\n  #else\n    simde__m64_private r_;\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      simde_float64 v = simde_math_round(a_.f64[i]);\n      #if defined(SIMDE_FAST_CONVERSION_RANGE)\n        r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);\n      #else\n        r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ?\n          SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n      #endif\n    }\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtpd_epi32 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107)\n    return _mm_cvtpd_epi32(a);\n  #else\n    simde__m128i_private r_;\n\n    r_.m64[0] = simde_mm_cvtpd_pi32(a);\n    r_.m64[1] = simde_mm_setzero_si64();\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtpd_ps (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtpd_ps(a);\n  #else\n    simde__m128_private r_;\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f));\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128);\n    #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector)\n      float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f };\n      r_.f32 =\n        __builtin_shufflevector(\n          __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z,\n          0, 1, 2, 3\n        );\n    #else\n      r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]);\n      r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]);\n      r_.f32[2] = SIMDE_FLOAT32_C(0.0);\n      r_.f32[3] = SIMDE_FLOAT32_C(0.0);\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cvtpi32_pd (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvtpi32_pd(a);\n  #else\n    simde__m128d_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (simde_float64) a_.i32[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtps_epi32 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtps_epi32(a);\n  #else\n    simde__m128i_private r_;\n    simde__m128_private a_;\n\n    #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399)\n      a_ = simde__m128_to_private(a);\n      r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES)\n      a_ = simde__m128_to_private(a);\n      HEDLEY_DIAGNOSTIC_PUSH\n      SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_\n      SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_\n      r_.altivec_i32 = vec_cts(a_.altivec_f32, 1);\n      HEDLEY_DIAGNOSTIC_POP\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES)\n      a_ = simde__m128_to_private(a);\n      r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128);\n    #else\n      a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1));\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        simde_float32 v = simde_math_roundf(a_.f32[i]);\n        #if defined(SIMDE_FAST_CONVERSION_RANGE)\n          r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);\n        #else\n          r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?\n            SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n        #endif\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cvtps_pd (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtps_pd(a);\n  #else\n    simde__m128d_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128);\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f32[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvtsd_si32 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtsd_si32(a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    simde_float64 v = simde_math_round(a_.f64[0]);\n    #if defined(SIMDE_FAST_CONVERSION_RANGE)\n      return SIMDE_CONVERT_FTOI(int32_t, v);\n    #else\n      return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ?\n        SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_cvtsd_si64 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if defined(__PGI)\n      return _mm_cvtsd_si64x(a);\n    #else\n      return _mm_cvtsd_si64(a);\n    #endif\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n    return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0]));\n  #endif\n}\n#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a)\n  #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtsd_ss(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n    simde__m128d_private b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0);\n    #else\n      r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]);\n\n      SIMDE_VECTORIZE\n      for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i];\n      }\n    #endif\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint16_t\nsimde_x_mm_cvtsi128_si16 (simde__m128i a) {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return vgetq_lane_s16(a_.neon_i16, 0);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0));\n  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    #if defined(SIMDE_BUG_GCC_95227)\n      (void) a_;\n    #endif\n    return vec_extract(a_.altivec_i16, 0);\n  #else\n    return a_.i16[0];\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvtsi128_si32 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtsi128_si32(a);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      return vgetq_lane_s32(a_.neon_i32, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      #if defined(SIMDE_BUG_GCC_95227)\n        (void) a_;\n      #endif\n      return vec_extract(a_.altivec_i32, 0);\n    #else\n      return a_.i32[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_cvtsi128_si64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if defined(__PGI)\n      return _mm_cvtsi128_si64x(a);\n    #else\n      return _mm_cvtsi128_si64(a);\n    #endif\n  #else\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION)\n    return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return vgetq_lane_s64(a_.neon_i64, 0);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0));\n  #endif\n    return a_.i64[0];\n  #endif\n}\n#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a)\n  #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cvtsi32_sd (simde__m128d a, int32_t b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtsi32_sd(a, b);\n  #else\n    simde__m128d_private r_;\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0);\n    #else\n      r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b);\n      r_.i64[1] = a_.i64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_cvtsi16_si128 (int16_t a) {\n  simde__m128i_private r_;\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0);\n  #else\n    r_.i16[0] = a;\n    r_.i16[1] = 0;\n    r_.i16[2] = 0;\n    r_.i16[3] = 0;\n    r_.i16[4] = 0;\n    r_.i16[5] = 0;\n    r_.i16[6] = 0;\n    r_.i16[7] = 0;\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtsi32_si128 (int32_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtsi32_si128(a);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0);\n    #else\n      r_.i32[0] = a;\n      r_.i32[1] = 0;\n      r_.i32[2] = 0;\n      r_.i32[3] = 0;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cvtsi64_sd (simde__m128d a, int64_t b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if !defined(__PGI)\n      return _mm_cvtsi64_sd(a, b);\n    #else\n      return _mm_cvtsi64x_sd(a, b);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0);\n    #else\n      r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b);\n      r_.f64[1] = a_.f64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b)\n  #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtsi64_si128 (int64_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if !defined(__PGI)\n      return _mm_cvtsi64_si128(a);\n    #else\n      return _mm_cvtsi64x_si128(a);\n    #endif\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_make(a, 0);\n    #else\n      r_.i64[0] = a;\n      r_.i64[1] = 0;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a)\n  #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cvtss_sd (simde__m128d a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvtss_sd(a, b);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0));\n    return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a);\n    simde__m128_private b_ = simde__m128_to_private(b);\n\n    a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]);\n\n    return simde__m128d_from_private(a_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_cvttpd_pi32 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_cvttpd_pi32(a);\n  #else\n    simde__m64_private r_;\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE)\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64);\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        simde_float64 v = a_.f64[i];\n        #if defined(SIMDE_FAST_CONVERSION_RANGE)\n          r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);\n        #else\n          r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ?\n            SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n        #endif\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvttpd_epi32 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvttpd_epi32(a);\n  #else\n    simde__m128i_private r_;\n\n    r_.m64[0] = simde_mm_cvttpd_pi32(a);\n    r_.m64[1] = simde_mm_setzero_si64();\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvttps_epi32 (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvttps_epi32(a);\n  #else\n    simde__m128i_private r_;\n    simde__m128_private a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32);\n\n      #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS)\n        /* Values below INT32_MIN saturate anyways, so we don't need to\n         * test for that. */\n        #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS)\n          uint32x4_t valid_input =\n            vandq_u32(\n              vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))),\n              vceqq_f32(a_.neon_f32, a_.neon_f32)\n            );\n        #elif !defined(SIMDE_FAST_CONVERSION_RANGE)\n          uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0)));\n        #elif !defined(SIMDE_FAST_NANS)\n          uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32);\n        #endif\n\n        r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN));\n      #endif\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128);\n\n      #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS)\n        #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS)\n          v128_t valid_input =\n            wasm_v128_and(\n              wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))),\n              wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128)\n            );\n        #elif !defined(SIMDE_FAST_CONVERSION_RANGE)\n          v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0)));\n        #elif !defined(SIMDE_FAST_NANS)\n          v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128);\n        #endif\n\n        r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input);\n      #endif\n    #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER)\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32);\n\n      #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS)\n        #if !defined(SIMDE_FAST_CONVERSION_RANGE)\n          static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) };\n\n          __typeof__(r_.i32) valid_input =\n            HEDLEY_REINTERPRET_CAST(\n              __typeof__(r_.i32),\n              (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high)\n            );\n        #elif !defined(SIMDE_FAST_NANS)\n          __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32);\n        #endif\n\n        __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN };\n        r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input);\n      #endif\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        simde_float32 v = a_.f32[i];\n        #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS)\n          r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);\n        #else\n          r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?\n            SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n        #endif\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_cvttsd_si32 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_cvttsd_si32(a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n    simde_float64 v = a_.f64[0];\n    #if defined(SIMDE_FAST_CONVERSION_RANGE)\n      return SIMDE_CONVERT_FTOI(int32_t, v);\n    #else\n      return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ?\n        SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_cvttsd_si64 (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    #if !defined(__PGI)\n      return _mm_cvttsd_si64(a);\n    #else\n      return _mm_cvttsd_si64x(a);\n    #endif\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n    return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]);\n  #endif\n}\n#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a)\n  #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_div_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_div_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 / b_.f64;\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 =  wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] / b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_div_pd(a, b) simde_mm_div_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_div_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_div_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_div_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64);\n      r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);\n    #else\n      r_.f64[0] = a_.f64[0] / b_.f64[0];\n      r_.f64[1] = a_.f64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_div_sd(a, b) simde_mm_div_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_extract_epi16 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7)  {\n  uint16_t r;\n  simde__m128i_private a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    #if defined(SIMDE_BUG_GCC_95227)\n      (void) a_;\n      (void) imm8;\n    #endif\n    r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8));\n  #else\n    r = a_.u16[imm8 & 7];\n  #endif\n\n  return  HEDLEY_STATIC_CAST(int32_t, r);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0))\n  #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff)))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7)  {\n  simde__m128i_private a_ = simde__m128i_to_private(a);\n  a_.i16[imm8 & 7] = i;\n  return simde__m128i_from_private(a_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n  #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8)))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_load_pd(mem_addr);\n  #else\n    simde__m128d_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vld1q_f64(mem_addr);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load(mem_addr);\n    #else\n      simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_));\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_load1_pd (simde_float64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_load1_pd(mem_addr);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr));\n  #else\n    return simde_mm_set1_pd(*mem_addr);\n  #endif\n}\n#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr)\n  #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_load_sd (simde_float64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_load_sd(mem_addr);\n  #else\n    simde__m128d_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr));\n    #else\n      r_.f64[0] = *mem_addr;\n      r_.u64[1] = UINT64_C(0);\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_load_si128 (simde__m128i const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr));\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr));\n    #else\n      simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadh_pd(a, mem_addr);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1);\n    #else\n      simde_float64 t;\n\n      simde_memcpy(&t, mem_addr, sizeof(t));\n      r_.f64[0] = a_.f64[0];\n      r_.f64[1] = t;\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadl_epi64 (simde__m128i const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadl_epi64(mem_addr);\n  #else\n    simde__m128i_private r_;\n\n    int64_t value;\n    simde_memcpy(&value, mem_addr, sizeof(value));\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0));\n    #else\n      r_.i64[0] = value;\n      r_.i64[1] = 0;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadl_pd(a, mem_addr);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vcombine_f64(vld1_f64(\n        HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0);\n    #else\n      r_.f64[0] = *mem_addr;\n      r_.u64[1] = a_.u64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadr_pd(mem_addr);\n  #else\n    simde__m128d_private\n      r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vld1q_f64(mem_addr);\n      r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr));\n      r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t tmp = wasm_v128_load(mem_addr);\n      r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0);\n    #else\n      r_.f64[0] = mem_addr[1];\n      r_.f64[1] = mem_addr[0];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadu_pd(mem_addr);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vld1q_f64(mem_addr);\n  #else\n    simde__m128d_private r_;\n\n    simde_memcpy(&r_, mem_addr, sizeof(r_));\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \\\n    && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_epi8(void const * mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr));\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#endif\n#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm_loadu_epi8\n  #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \\\n    && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_epi16(void const * mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)));\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#endif\n#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm_loadu_epi16\n  #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \\\n    && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_epi32(void const * mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)));\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#endif\n#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm_loadu_epi32\n  #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a)\n#endif\n\n#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \\\n    && !defined(SIMDE_BUG_CLANG_REV_344862) \\\n    && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))\n  #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr)\n#else\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_epi64(void const * mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)));\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#endif\n#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr)\n#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862)))\n  #undef _mm_loadu_epi64\n  #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_si128 (void const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr));\n  #else\n    simde__m128i_private r_;\n\n    #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0)\n      HEDLEY_DIAGNOSTIC_PUSH\n      SIMDE_DIAGNOSTIC_DISABLE_PACKED_\n      struct simde_mm_loadu_si128_s {\n        __typeof__(r_) v;\n      } __attribute__((__packed__, __may_alias__));\n      r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v;\n      HEDLEY_DIAGNOSTIC_POP\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr));\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_madd_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_madd_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16),  vget_low_s16(b_.neon_i16));\n      int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i32 = vpaddq_s32(pl, ph);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16),  vget_low_s16(b_.neon_i16));\n      int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16));\n      int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl));\n      int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph));\n      r_.neon_i32 = vcombine_s32(rl, rh);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0));\n    #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)\n      int32_t SIMDE_VECTOR(32) a32, b32, p32;\n      SIMDE_CONVERT_VECTOR_(a32, a_.i16);\n      SIMDE_CONVERT_VECTOR_(b32, b_.i16);\n      p32 = a32 * b32;\n      r_.i32 =\n        __builtin_shufflevector(p32, p32, 0, 2, 4, 6) +\n        __builtin_shufflevector(p32, p32, 1, 3, 5, 7);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) {\n        r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr));\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      mask_ = simde__m128i_to_private(mask);\n\n    for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) {\n      if (mask_.u8[i] & 0x80) {\n        mem_addr[i] = a_.i8[i];\n      }\n    }\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_movemask_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER)\n    /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */\n    return _mm_movemask_epi8(a);\n  #else\n    int32_t r = 0;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */\n      static const uint8_t md[16] = {\n        1 << 0, 1 << 1, 1 << 2, 1 << 3,\n        1 << 4, 1 << 5, 1 << 6, 1 << 7,\n        1 << 0, 1 << 1, 1 << 2, 1 << 3,\n        1 << 4, 1 << 5, 1 << 6, 1 << 7,\n      };\n\n      /* Extend sign bit over entire lane */\n      uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7));\n      /* Clear all but the bit we're interested in. */\n      uint8x16_t masked = vandq_u8(vld1q_u8(md), extended);\n      /* Alternate bytes from low half and high half */\n      uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked));\n      uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1]));\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        r = vaddvq_u16(x);\n      #else\n        uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x));\n        r =\n          HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) +\n          HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1));\n      #endif\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 };\n      r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1));\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG)\n      static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 };\n      r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128));\n    #else\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {\n        r |= (a_.u8[15 - i] >> 7) << (15 - i);\n      }\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_movemask_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_movemask_pd(a);\n  #else\n    int32_t r = 0;\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      HEDLEY_DIAGNOSTIC_PUSH\n      SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_\n      uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63);\n      r =\n        HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) +\n        (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1);\n      HEDLEY_DIAGNOSTIC_POP\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932)\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx));\n      r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx);\n      r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128));\n    #else\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n        r |= (a_.u64[i] >> 63) << i;\n      }\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_movemask_pd(a) simde_mm_movemask_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_movepi64_pi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_movepi64_pi64(a);\n  #else\n    simde__m64_private r_;\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i64 = vget_low_s64(a_.neon_i64);\n    #else\n      r_.i64[0] = a_.i64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_movpi64_epi64 (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_movpi64_epi64(a);\n  #else\n    simde__m128i_private r_;\n    simde__m64_private a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0));\n    #else\n      r_.i64[0] = a_.i64[0];\n      r_.i64[1] = 0;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_min_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_min_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_min_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_min_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_min_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_min_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_min_pd(a, b) simde_mm_min_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_min_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_min_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_min_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64);\n      r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);\n    #else\n      r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0];\n      r_.f64[1] = a_.f64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_min_sd(a, b) simde_mm_min_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_max_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_max_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_max_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_max_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_max_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_max_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_max_pd(a, b) simde_mm_max_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_max_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_max_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_max_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64);\n      r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);\n    #else\n      r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0];\n      r_.f64[1] = a_.f64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_max_sd(a, b) simde_mm_max_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_move_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_move_epi64(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2);\n    #else\n      r_.i64[0] = a_.i64[0];\n      r_.i64[1] = 0;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_move_epi64(a) simde_mm_move_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mul_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_mul_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x2_t a_lo = vmovn_u64(a_.neon_u64);\n      uint32x2_t b_lo = vmovn_u64(b_.neon_u64);\n      r_.neon_u64 = vmull_u32(a_lo, b_lo);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4(\n        wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2),\n        wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2));\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(a_.u32) z = { 0, };\n      a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6);\n      b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6);\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) *\n               HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128);\n  #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    r_.i64 = a_.i64 * b_.i64;\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.i64[i] * b_.i64[i];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n    r_.i64 = a_.i64 % b_.i64;\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.i64[i] % b_.i64[i];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_mul_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_mul_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 * b_.f64;\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] * b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_mul_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_mul_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_mul_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64);\n      r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1);\n    #else\n      r_.f64[0] = a_.f64[0] * b_.f64[0];\n      r_.f64[1] = a_.f64[1];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_mul_su32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)\n    return _mm_mul_su32(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0);\n    #else\n      r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]);\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_mulhi_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4_t a3210 = vget_low_s16(a_.neon_i16);\n      int16x4_t b3210 = vget_low_s16(b_.neon_i16);\n      int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16);\n        r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654));\n      #else\n        int16x4_t a7654 = vget_high_s16(a_.neon_i16);\n        int16x4_t b7654 = vget_high_s16(b_.neon_i16);\n        int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */\n        uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654));\n        r_.neon_u16 = rv.val[1];\n      #endif\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128);\n      const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128);\n      r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n    return _mm_mulhi_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint16x4_t a3210 = vget_low_u16(a_.neon_u16);\n      uint16x4_t b3210 = vget_low_u16(b_.neon_u16);\n      uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16);\n        r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654));\n      #else\n        uint16x4_t a7654 = vget_high_u16(a_.neon_u16);\n        uint16x4_t b7654 = vget_high_u16(b_.neon_u16);\n        uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */\n        uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654));\n        r_.neon_u16 = neon_r.val[1];\n      #endif\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128);\n      const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128);\n      r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_mullo_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      (void) a_;\n      (void) b_;\n      r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_or_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_or_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f | b_.i32f;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] | b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_or_pd(a, b) simde_mm_or_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_or_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_or_si128(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f | b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] | b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_or_si128(a, b) simde_mm_or_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_packs_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_packs_epi16(a, b);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)\n      int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);\n      const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN };\n      const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX };\n\n      int16_t m SIMDE_VECTOR(32);\n      m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min);\n      v = (v & ~m) | (min & m);\n\n      m = v > max;\n      v = (v & ~m) | (max & m);\n\n      SIMDE_CONVERT_VECTOR_(r_.i8, v);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7];\n        r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_packs_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_packs_epi32(a, b);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_X86_SSE2_NATIVE)\n      r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)\n      int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7);\n      const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN };\n      const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX };\n\n      int32_t m SIMDE_VECTOR(32);\n      m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min);\n      v = (v & ~m) | (min & m);\n\n      m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max);\n      v = (v & ~m) | (max & m);\n\n      SIMDE_CONVERT_VECTOR_(r_.i16, v);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3];\n        r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_packus_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_packus_epi16(a, b);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      #if defined(SIMDE_BUG_CLANG_46840)\n        r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16);\n      #else\n        r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16);\n      #endif\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 =\n        vcombine_u8(\n          vqmovun_s16(a_.neon_i16),\n          vqmovun_s16(b_.neon_i16)\n        );\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);\n\n      v &= ~(v >> 15);\n      v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX);\n\n      SIMDE_CONVERT_VECTOR_(r_.i8, v);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7];\n        r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_pause (void) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_pause();\n  #elif defined(SIMDE_ARCH_X86)\n    #if defined(_MSC_VER)\n      __asm pause;\n    #else\n      __asm__ __volatile__(\"pause\");\n    #endif\n  #elif defined(SIMDE_ARCH_ARM_NEON)\n    #if defined(_MSC_VER)\n      __isb(_ARM64_BARRIER_SY);\n    #else\n      __asm__ __volatile__(\"isb\\n\");\n    #endif\n  #elif defined(SIMDE_ARCH_POWER)\n    __asm__ __volatile__ (\"or 27,27,27\" ::: \"memory\");\n  #elif defined(SIMDE_ARCH_WASM)\n    __asm__ __volatile__ (\"nop\");\n  #elif defined(HEDLEY_GCC_VERSION)\n    #if defined(SIMDE_ARCH_RISCV)\n      __builtin_riscv_pause();\n    #else\n      __asm__ __volatile__ (\"nop\" ::: \"memory\");\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_pause() (simde_mm_pause())\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sad_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sad_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8));\n      r_.neon_u64 = vcombine_u64(\n        vpaddl_u32(vpaddl_u16(vget_low_u16(t))),\n        vpaddl_u32(vpaddl_u16(vget_high_u16(t))));\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        uint16_t tmp = 0;\n        SIMDE_VECTORIZE_REDUCTION(+:tmp)\n        for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) {\n          const size_t e = j + (i * 8);\n          tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]);\n        }\n        r_.i64[i] = tmp;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12,\n       int8_t e11, int8_t e10, int8_t  e9, int8_t  e8,\n       int8_t  e7, int8_t  e6, int8_t  e5, int8_t  e4,\n       int8_t  e3, int8_t  e2, int8_t  e1, int8_t  e0) {\n\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_epi8(\n      e15, e14, e13, e12, e11, e10,  e9,  e8,\n       e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_make(\n         e0,  e1,  e2,  e3,  e4,  e5,  e6,  e7,\n         e8,  e9, e10, e11, e12, e13, e14, e15);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = {\n        e0,  e1,  e2,  e3,\n        e4,  e5,  e6,  e7,\n        e8,  e9,  e10, e11,\n        e12, e13, e14, e15};\n      r_.neon_i8 = vld1q_s8(data);\n    #else\n      r_.i8[ 0] =  e0;\n      r_.i8[ 1] =  e1;\n      r_.i8[ 2] =  e2;\n      r_.i8[ 3] =  e3;\n      r_.i8[ 4] =  e4;\n      r_.i8[ 5] =  e5;\n      r_.i8[ 6] =  e6;\n      r_.i8[ 7] =  e7;\n      r_.i8[ 8] =  e8;\n      r_.i8[ 9] =  e9;\n      r_.i8[10] = e10;\n      r_.i8[11] = e11;\n      r_.i8[12] = e12;\n      r_.i8[13] = e13;\n      r_.i8[14] = e14;\n      r_.i8[15] = e15;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_epi8(e15, e14, e13, e12, e11, e10,  e9,  e8,  e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10,  e9,  e8,  e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4,\n        int16_t e3, int16_t e2, int16_t e1, int16_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 };\n      r_.neon_i16 = vld1q_s16(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7);\n    #else\n      r_.i16[0] = e0;\n      r_.i16[1] = e1;\n      r_.i16[2] = e2;\n      r_.i16[3] = e3;\n      r_.i16[4] = e4;\n      r_.i16[5] = e5;\n      r_.i16[6] = e6;\n      r_.i16[7] = e7;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_epi16(e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0) simde_mm_set_epi16(e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_si16 (void const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \\\n      HEDLEY_GCC_VERSION_CHECK(12,1,0))\n    return _mm_loadu_si16(mem_addr);\n  #else\n    int16_t val;\n    simde_memcpy(&val, mem_addr, sizeof(val));\n    return simde_x_mm_cvtsi16_si128(val);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_epi32(e3, e2, e1, e0);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 };\n      r_.neon_i32 = vld1q_s32(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3);\n    #else\n      r_.i32[0] = e0;\n      r_.i32[1] = e1;\n      r_.i32[2] = e2;\n      r_.i32[3] = e3;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_epi32(e3,  e2,  e1,  e0) simde_mm_set_epi32(e3,  e2,  e1,  e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_si32 (void const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \\\n      HEDLEY_GCC_VERSION_CHECK(12,1,0))\n    return _mm_loadu_si32(mem_addr);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr));\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    simde__m128i_private r_;\n    r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0);\n    return simde__m128i_from_private(r_);\n  #else\n    int32_t val;\n    simde_memcpy(&val, mem_addr, sizeof(val));\n    return simde_mm_cvtsi32_si128(val);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set_epi64(e1, e0);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1));\n    #else\n      r_.m64[0] = e0;\n      r_.m64[1] = e1;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set_epi64x (int64_t e1, int64_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))\n    return _mm_set_epi64x(e1, e0);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1};\n      r_.neon_i64 = vld1q_s64(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_make(e0, e1);\n    #else\n      r_.i64[0] = e0;\n      r_.i64[1] = e1;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_loadu_si64 (void const* mem_addr) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(11,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(20,21,1))\n    return _mm_loadu_si64(mem_addr);\n  #else\n  int64_t val;\n    simde_memcpy(&val, mem_addr, sizeof(val));\n    return simde_mm_cvtsi64_si128(val);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12,\n         uint8_t e11, uint8_t e10, uint8_t  e9, uint8_t  e8,\n         uint8_t  e7, uint8_t  e6, uint8_t  e5, uint8_t  e4,\n         uint8_t  e3, uint8_t  e2, uint8_t  e1, uint8_t  e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_epi8(\n      HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12),\n      HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char,  e9), HEDLEY_STATIC_CAST(char,  e8),\n      HEDLEY_STATIC_CAST(char,  e7), HEDLEY_STATIC_CAST(char,  e6), HEDLEY_STATIC_CAST(char,  e5), HEDLEY_STATIC_CAST(char,  e4),\n      HEDLEY_STATIC_CAST(char,  e3), HEDLEY_STATIC_CAST(char,  e2), HEDLEY_STATIC_CAST(char,  e1), HEDLEY_STATIC_CAST(char,  e0));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = {\n        e0,  e1,  e2,  e3,\n        e4,  e5,  e6,  e7,\n        e8,  e9,  e10, e11,\n        e12, e13, e14, e15};\n      r_.neon_u8 = vld1q_u8(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);\n    #else\n      r_.u8[ 0] =  e0; r_.u8[ 1] =  e1; r_.u8[ 2] =  e2; r_.u8[ 3] =  e3;\n      r_.u8[ 4] =  e4; r_.u8[ 5] =  e5; r_.u8[ 6] =  e6; r_.u8[ 7] =  e7;\n      r_.u8[ 8] =  e8; r_.u8[ 9] =  e9; r_.u8[10] = e10; r_.u8[11] = e11;\n      r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4,\n          uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_epi16(\n      HEDLEY_STATIC_CAST(short,  e7), HEDLEY_STATIC_CAST(short,  e6), HEDLEY_STATIC_CAST(short,  e5), HEDLEY_STATIC_CAST(short,  e4),\n      HEDLEY_STATIC_CAST(short,  e3), HEDLEY_STATIC_CAST(short,  e2), HEDLEY_STATIC_CAST(short,  e1), HEDLEY_STATIC_CAST(short,  e0));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 };\n      r_.neon_u16 = vld1q_u16(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7);\n    #else\n      r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3;\n      r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_epi32(\n      HEDLEY_STATIC_CAST(int,  e3), HEDLEY_STATIC_CAST(int,  e2), HEDLEY_STATIC_CAST(int,  e1), HEDLEY_STATIC_CAST(int,  e0));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 };\n      r_.neon_u32 = vld1q_u32(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3);\n    #else\n      r_.u32[0] = e0;\n      r_.u32[1] = e1;\n      r_.u32[2] = e2;\n      r_.u32[3] = e3;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))\n    return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t,  e1), HEDLEY_STATIC_CAST(int64_t,  e0));\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1};\n      r_.neon_u64 = vld1q_u64(data);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u64x2_make(e0, e1);\n    #else\n      r_.u64[0] = e0;\n      r_.u64[1] = e1;\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_set_sd (simde_float64 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set_sd(a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0));\n  #else\n    return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set_sd(a) simde_mm_set_sd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set1_epi8 (int8_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set1_epi8(a);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vdupq_n_s8(a);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_splat(a);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set1_epi8(a) simde_mm_set1_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set1_epi16 (int16_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set1_epi16(a);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vdupq_n_s16(a);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_splat(a);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set1_epi16(a) simde_mm_set1_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set1_epi32 (int32_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_set1_epi32(a);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vdupq_n_s32(a);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_splat(a);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set1_epi32(a) simde_mm_set1_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set1_epi64x (int64_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0))\n    return _mm_set1_epi64x(a);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vdupq_n_s64(a);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_splat(a);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_set1_epi64 (simde__m64 a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_set1_epi64(a);\n  #else\n    simde__m64_private a_ = simde__m64_to_private(a);\n    return simde_mm_set1_epi64x(a_.i64[0]);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_set1_epi64(a) simde_mm_set1_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set1_epu8 (uint8_t value) {\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value)));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value));\n  #else\n    return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value));\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set1_epu16 (uint16_t value) {\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value)));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value));\n  #else\n    return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value));\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set1_epu32 (uint32_t value) {\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value)));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value));\n  #else\n    return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value));\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_set1_epu64 (uint64_t value) {\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n    return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value)));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value));\n  #else\n    return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value));\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12,\n        int8_t e11, int8_t e10, int8_t  e9, int8_t  e8,\n        int8_t  e7, int8_t  e6, int8_t  e5, int8_t  e4,\n        int8_t  e3, int8_t  e2, int8_t  e1, int8_t  e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_setr_epi8(\n      e15, e14, e13, e12, e11, e10,  e9,    e8,\n      e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    return simde_mm_set_epi8(\n      e0, e1, e2, e3, e4, e5, e6, e7,\n      e8, e9, e10, e11, e12, e13, e14, e15);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4,\n         int16_t e3, int16_t e2, int16_t e1, int16_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_setr_epi16(e7,  e6,  e5,  e4,  e3,  e2,  e1,  e0);\n  #else\n    return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_setr_epi32(e3, e2, e1, e0);\n  #else\n    return simde_mm_set_epi32(e0, e1, e2, e3);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_setr_epi64(e1, e0);\n  #else\n    return simde_mm_set_epi64(e0, e1);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_setr_pd (simde_float64 e1, simde_float64 e0) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_setr_pd(e1, e0);\n  #else\n    return simde_mm_set_pd(e0, e1);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_setzero_pd (void) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_setzero_pd();\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0));\n  #else\n    return simde_mm_castsi128_pd(simde_mm_setzero_si128());\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_setzero_pd() simde_mm_setzero_pd()\n#endif\n\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_undefined_pd (void) {\n  simde__m128d_private r_;\n\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128)\n    r_.n = _mm_undefined_pd();\n  #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n    r_ = simde__m128d_to_private(simde_mm_setzero_pd());\n  #endif\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_undefined_pd() simde_mm_undefined_pd()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_undefined_si128 (void) {\n  simde__m128i_private r_;\n\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128)\n    r_.n = _mm_undefined_si128();\n  #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\n    r_ = simde__m128i_to_private(simde_mm_setzero_si128());\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_undefined_si128() (simde_mm_undefined_si128())\n#endif\n\n#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)\nHEDLEY_DIAGNOSTIC_POP\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_setone_pd (void) {\n  return simde_mm_castps_pd(simde_x_mm_setone_ps());\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_setone_si128 (void) {\n  return simde_mm_castps_si128(simde_x_mm_setone_ps());\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shuffle_epi32 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n    r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \\\n      const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n      simde__m128i_from_wasm_v128( \\\n        wasm_i32x4_shuffle( \\\n          (simde_tmp_a_).wasm_v128, \\\n          (simde_tmp_a_).wasm_v128, \\\n          ((imm8)     ) & 3, \\\n          ((imm8) >> 2) & 3, \\\n          ((imm8) >> 4) & 3, \\\n          ((imm8) >> 6) & 3)); }))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_)\n  #define simde_mm_shuffle_epi32(a, imm8) \\\n    (__extension__ ({ \\\n      const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \\\n      int32x4_t simde_mm_shuffle_epi32_r_; \\\n      simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \\\n      simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \\\n      simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \\\n      simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \\\n      vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \\\n    }))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \\\n      const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n      simde__m128i_from_private((simde__m128i_private) { .i32 = \\\n        SIMDE_SHUFFLE_VECTOR_(32, 16, \\\n          (simde_tmp_a_).i32, \\\n          (simde_tmp_a_).i32, \\\n          ((imm8)     ) & 3, \\\n          ((imm8) >> 2) & 3, \\\n          ((imm8) >> 4) & 3, \\\n          ((imm8) >> 6) & 3) }); }))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)  {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1];\n  r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1];\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)\n  #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \\\n      simde__m128d_from_private((simde__m128d_private) { .f64 = \\\n        SIMDE_SHUFFLE_VECTOR_(64, 16, \\\n          simde__m128d_to_private(a).f64, \\\n          simde__m128d_to_private(b).f64, \\\n          (((imm8)     ) & 1), \\\n          (((imm8) >> 1) & 1) + 2) }); }))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shufflehi_epi16 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) {\n    r_.i16[i] = a_.i16[i];\n  }\n  for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n    r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4];\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_)\n  #define simde_mm_shufflehi_epi16(a, imm8) \\\n    (__extension__ ({ \\\n      int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \\\n      int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \\\n      simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8)     ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \\\n      simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \\\n      simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \\\n      simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \\\n      simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \\\n    }))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \\\n      const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n      simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \\\n        wasm_i16x8_shuffle( \\\n          (simde_tmp_a_).wasm_v128, \\\n          (simde_tmp_a_).wasm_v128, \\\n          0, 1, 2, 3, \\\n          (((imm8)     ) & 3) + 4, \\\n          (((imm8) >> 2) & 3) + 4, \\\n          (((imm8) >> 4) & 3) + 4, \\\n          (((imm8) >> 6) & 3) + 4) }); }))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \\\n      const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n      simde__m128i_from_private((simde__m128i_private) { .i16 = \\\n        SIMDE_SHUFFLE_VECTOR_(16, 16, \\\n          (simde_tmp_a_).i16, \\\n          (simde_tmp_a_).i16, \\\n          0, 1, 2, 3, \\\n          (((imm8)     ) & 3) + 4, \\\n          (((imm8) >> 2) & 3) + 4, \\\n          (((imm8) >> 4) & 3) + 4, \\\n          (((imm8) >> 6) & 3) + 4) }); }))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shufflelo_epi16 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {\n    r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)];\n  }\n  SIMDE_VECTORIZE\n  for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n    r_.i16[i] = a_.i16[i];\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_shufflelo_epi16(a, imm8) \\\n    simde__m128i_from_wasm_v128(            \\\n      wasm_i16x8_shuffle(                   \\\n        simde__m128i_to_wasm_v128((a)),     \\\n        wasm_i16x8_splat(0),                \\\n        (((imm8) & 0x03)     ),             \\\n        (((imm8) & 0x0c) >> 2),             \\\n        (((imm8) & 0x30) >> 4),             \\\n        (((imm8) & 0xc0) >> 6),             \\\n        4, 5, 6, 7))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_)\n  #define simde_mm_shufflelo_epi16(a, imm8) \\\n    (__extension__({ \\\n      int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \\\n      int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \\\n      simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8)     ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \\\n      simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \\\n      simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \\\n      simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \\\n      simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \\\n    }))\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \\\n      const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \\\n      simde__m128i_from_private((simde__m128i_private) { .i16 = \\\n        SIMDE_SHUFFLE_VECTOR_(16, 16, \\\n          (simde_tmp_a_).i16, \\\n          (simde_tmp_a_).i16, \\\n          (((imm8)     ) & 3), \\\n          (((imm8) >> 2) & 3), \\\n          (((imm8) >> 4) & 3), \\\n          (((imm8) >> 6) & 3), \\\n          4, 5, 6, 7) }); }))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sll_epi16 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sll_epi16(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    if (count_.u64[0] > 15)\n      return simde_mm_setzero_si128();\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u16 = (a_.u16 << count_.u64[0]);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0])));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0]));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sll_epi32 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sll_epi32(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    if (count_.u64[0] > 31)\n      return simde_mm_setzero_si128();\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u32 = (a_.u32 << count_.u64[0]);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0])));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0]));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sll_epi64 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sll_epi64(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    if (count_.u64[0] > 63)\n      return simde_mm_setzero_si128();\n\n    const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]);\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0);\n    #else\n      #if !defined(SIMDE_BUG_GCC_94488)\n        SIMDE_VECTORIZE\n      #endif\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] << s;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sqrt_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sqrt_pd(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vsqrtq_f64(a_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_f64 = vec_sqrt(a_.altivec_f64);\n    #elif defined(simde_math_sqrt)\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sqrt(a_.f64[i]);\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sqrt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sqrt_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_sqrt_pd(b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(simde_math_sqrt)\n      r_.f64[0] = simde_math_sqrt(b_.f64[0]);\n      r_.f64[1] = a_.f64[1];\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srl_epi16 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_srl_epi16(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0]));\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = a_.u16[i] >> cnt;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srl_epi32 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_srl_epi32(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0]));\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] >> cnt;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srl_epi64 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_srl_epi64(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0]));\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt));\n    #else\n      #if !defined(SIMDE_BUG_GCC_94488)\n        SIMDE_VECTORIZE\n      #endif\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] >> cnt;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srai_epi16 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  /* MSVC requires a range of (0, 255). */\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  const int cnt = (imm8 & ~15) ? 15 : imm8;\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt));\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = a_.i16[i] >> cnt;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srai_epi32 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_RANGE(imm8, 0, 255) {\n  /* MSVC requires a range of (0, 255). */\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  const int cnt = (imm8 & ~31) ? 31 : imm8;\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt));\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt));\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[i] >> cnt;\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sra_epi16 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sra_epi16(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] >> cnt;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sra_epi32 (simde__m128i a, simde__m128i count) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32)\n    return _mm_sra_epi32(a, count);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      count_ = simde__m128i_to_private(count);\n\n    const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] >> cnt;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count)))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_slli_epi16 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  if (HEDLEY_UNLIKELY((imm8 > 15))) {\n    return simde_mm_setzero_si128();\n  }\n\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff);\n  #else\n    const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8;\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s);\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_slli_epi16(a, imm8) \\\n    (((imm8) <= 0) ? \\\n      (a) : \\\n      simde__m128i_from_neon_i16( \\\n        ((imm8) > 15) ? \\\n          vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \\\n          vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_slli_epi16(a, imm8) \\\n    ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0))\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n  #define simde_mm_slli_epi16(a, imm8) \\\n    ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8)))))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_slli_epi32 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  if (HEDLEY_UNLIKELY((imm8 > 31))) {\n    return simde_mm_setzero_si128();\n  }\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i32 = a_.i32 << imm8;\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] = a_.i32[i] << (imm8 & 0xff);\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_slli_epi32(a, imm8) \\\n    (((imm8) <= 0) ? \\\n      (a) : \\\n      simde__m128i_from_neon_i32( \\\n        ((imm8) > 31) ? \\\n          vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \\\n          vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_slli_epi32(a, imm8) \\\n    ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0))\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n  #define simde_mm_slli_epi32(a, imm8) \\\n     (__extension__ ({ \\\n       simde__m128i ret; \\\n       if ((imm8) <= 0) { \\\n         ret = a; \\\n       } else if ((imm8) > 31) { \\\n         ret = simde_mm_setzero_si128(); \\\n       } else { \\\n         ret = simde__m128i_from_altivec_i32( \\\n           vec_sl(simde__m128i_to_altivec_i32(a), \\\n             vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \\\n       } \\\n       ret; \\\n     }))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_slli_epi64 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  if (HEDLEY_UNLIKELY((imm8 > 63))) {\n    return simde_mm_setzero_si128();\n  }\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.i64 = a_.i64 << imm8;\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = a_.i64[i] << (imm8 & 0xff);\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_slli_epi64(a, imm8) \\\n    (((imm8) <= 0) ? \\\n      (a) : \\\n      simde__m128i_from_neon_i64( \\\n        ((imm8) > 63) ? \\\n          vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \\\n          vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_slli_epi64(a, imm8) \\\n    ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srli_epi16 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  if (HEDLEY_UNLIKELY((imm8 > 15))) {\n    return simde_mm_setzero_si128();\n  }\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.u16[i] = a_.u16[i] >> (imm8 & 0xff);\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_srli_epi16(a, imm8) \\\n    (((imm8) <= 0) ? \\\n      (a) : \\\n      simde__m128i_from_neon_u16( \\\n        ((imm8) > 15) ? \\\n          vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \\\n          vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_srli_epi16(a, imm8) \\\n    ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0))\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n  #define simde_mm_srli_epi16(a, imm8) \\\n    ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8)))))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srli_epi32 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  if (HEDLEY_UNLIKELY((imm8 > 31))) {\n    return simde_mm_setzero_si128();\n  }\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n    r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i] >> (imm8 & 0xff);\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_srli_epi32(a, imm8) \\\n    (((imm8) <= 0) ? \\\n      (a) : \\\n      simde__m128i_from_neon_u32( \\\n        ((imm8) > 31) ? \\\n          vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \\\n          vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_srli_epi32(a, imm8) \\\n    ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0))\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n  #define simde_mm_srli_epi32(a, imm8) \\\n    (__extension__ ({ \\\n        simde__m128i ret; \\\n        if ((imm8) <= 0) { \\\n            ret = a; \\\n        } else if ((imm8) > 31) { \\\n            ret = simde_mm_setzero_si128(); \\\n        } else { \\\n            ret = simde__m128i_from_altivec_i32( \\\n              vec_sr(simde__m128i_to_altivec_i32(a), \\\n                vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \\\n        } \\\n        ret; \\\n    }))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_srli_epi64 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  if (HEDLEY_UNLIKELY((imm8 & 63) != imm8))\n    return simde_mm_setzero_si128();\n\n  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8));\n  #else\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488)\n      r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] >> imm8;\n      }\n    #endif\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE2_NATIVE)\n  #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_srli_epi64(a, imm8) \\\n    (((imm8) <= 0) ? \\\n      (a) : \\\n      simde__m128i_from_neon_u64( \\\n        ((imm8) > 63) ? \\\n          vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \\\n          vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0))))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_srli_epi64(a, imm8) \\\n    ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0))\n#endif\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_store_pd(mem_addr, a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64);\n  #else\n    simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_store1_pd(mem_addr, a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0));\n    #else\n      mem_addr[0] = a_.f64[0];\n      mem_addr[1] = a_.f64[0];\n    #endif\n  #endif\n}\n#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n  #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_store_sd(mem_addr, a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0);\n      simde_memcpy(mem_addr, &v, sizeof(v));\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      const int64_t v = vgetq_lane_s64(a_.neon_i64, 0);\n      simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0);\n    #else\n      simde_float64 v = a_.f64[0];\n      simde_memcpy(mem_addr, &v, sizeof(simde_float64));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a);\n  #else\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32);\n    #else\n      simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_));\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\n  simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_storeh_pd(mem_addr, a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      *mem_addr = vgetq_lane_f64(a_.neon_f64, 1);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n       wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1);\n    #else\n      *mem_addr = a_.f64[1];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a);\n  #else\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    int64_t tmp;\n\n    /* memcpy to prevent aliasing, tmp because we can't take the\n     * address of a vector element. */\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      tmp = vgetq_lane_s64(a_.neon_i64, 0);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      #if defined(SIMDE_BUG_GCC_95227)\n        (void) a_;\n      #endif\n      tmp = vec_extract(a_.altivec_i64, 0);\n    #else\n      tmp = a_.i64[0];\n    #endif\n\n    simde_memcpy(mem_addr, &tmp, sizeof(tmp));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_storel_pd(mem_addr, a);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    simde_float64 tmp;\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      tmp = vgetq_lane_f64(a_.neon_f64, 0);\n    #else\n      tmp = a_.f64[0];\n    #endif\n    simde_memcpy(mem_addr, &tmp, sizeof(tmp));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_storer_pd(mem_addr, a);\n  #else\n    simde__m128d_private a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0);\n      simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_));\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0);\n      simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_));\n    #else\n      mem_addr[0] = a_.f64[1];\n      mem_addr[1] = a_.f64[0];\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_storeu_pd(mem_addr, a);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64);\n  #else\n    simde_memcpy(mem_addr, &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeu_si128 (void* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a);\n  #else\n    simde_memcpy(mem_addr, &a, sizeof(a));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeu_si16 (void* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(11,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(20,21,1))\n    _mm_storeu_si16(mem_addr, a);\n  #else\n    int16_t val = simde_x_mm_cvtsi128_si16(a);\n    simde_memcpy(mem_addr, &val, sizeof(val));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeu_si32 (void* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(11,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(20,21,1))\n    _mm_storeu_si32(mem_addr, a);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0);\n  #else\n    int32_t val = simde_mm_cvtsi128_si32(a);\n    simde_memcpy(mem_addr, &val, sizeof(val));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_storeu_si64 (void* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && ( \\\n      SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \\\n      HEDLEY_GCC_VERSION_CHECK(11,0,0) || \\\n      HEDLEY_INTEL_VERSION_CHECK(20,21,1))\n    _mm_storeu_si64(mem_addr, a);\n  #else\n    int64_t val = simde_mm_cvtsi128_si64(a);\n    simde_memcpy(mem_addr, &val, sizeof(val));\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_stream_pd(mem_addr, a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \\\n      defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \\\n      defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \\\n      defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n      __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr));\n  #else\n    simde_mm_store_pd(mem_addr, a);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \\\n      defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \\\n      defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \\\n      defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n      __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr));\n  #else\n    simde_mm_store_si128(mem_addr, a);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_stream_si32 (int32_t* mem_addr, int32_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_stream_si32(mem_addr, a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store)\n    __builtin_nontemporal_store(a, mem_addr);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0);\n  #else\n    *mem_addr = a;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_stream_si64 (int64_t* mem_addr, int64_t a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION)\n    _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a);\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store)\n    __builtin_nontemporal_store(a, mem_addr);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    vst1_s64(mem_addr, vdup_n_s64(a));\n  #else\n    *mem_addr = a;\n  #endif\n}\n#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a)\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a)\n  #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sub_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sub_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 - b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] - b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sub_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sub_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 - b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] - b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sub_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sub_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 - b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] - b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sub_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sub_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 - b_.i64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] - b_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    r_.u32 = a_.u32 - b_.u32;\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = a_.u32[i] - b_.u32[i];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sub_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sub_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.f64 = a_.f64 - b_.f64;\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = a_.f64[i] - b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sub_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_sub_sd(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_sd(a, simde_mm_sub_pd(a, b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    r_.f64[0] = a_.f64[0] - b_.f64[0];\n    r_.f64[1] = a_.f64[1];\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sub_si64 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sub_si64(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 - b_.i64;\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64);\n    #else\n      r_.i64[0] = a_.i64[0] - b_.i64[0];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_subs_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_subs_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_subs_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_subs_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_subs_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_subs_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_subs_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_subs_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_ucomieq_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));\n      uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64);\n      r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r =  a_.f64[0] == b_.f64[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r =  a_.f64[0] == b_.f64[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomige_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_ucomige_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);\n      uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64);\n      r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f64[0] >= b_.f64[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f64[0] >= b_.f64[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_ucomigt_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);\n      uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64);\n      r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f64[0] > b_.f64[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f64[0] > b_.f64[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomile_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_ucomile_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));\n      uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64);\n      r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f64[0] <= b_.f64[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f64[0] <= b_.f64[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_ucomilt_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan))));\n      uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64);\n      r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f64[0] < b_.f64[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f64[0] < b_.f64[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_ucomineq_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n    int r;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64);\n      uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64);\n      uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan);\n      uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64))));\n      r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0);\n    #elif defined(SIMDE_HAVE_FENV_H)\n      fenv_t envp;\n      int x = feholdexcept(&envp);\n      r = a_.f64[0] != b_.f64[0];\n      if (HEDLEY_LIKELY(x == 0))\n        fesetenv(&envp);\n    #else\n      r = a_.f64[0] != b_.f64[0];\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_lfence (void) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_lfence();\n  #else\n    simde_mm_sfence();\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_lfence() simde_mm_lfence()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nvoid\nsimde_mm_mfence (void) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    _mm_mfence();\n  #else\n    simde_mm_sfence();\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_mfence() simde_mm_mfence()\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpackhi_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16));\n      int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16));\n      int8x8x2_t result = vzip_s8(a1, b1);\n      r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) {\n        r_.i8[(i * 2)]     = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)];\n        r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpackhi_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4_t a1 = vget_high_s16(a_.neon_i16);\n      int16x4_t b1 = vget_high_s16(b_.neon_i16);\n      int16x4x2_t result = vzip_s16(a1, b1);\n      r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) {\n        r_.i16[(i * 2)]     = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)];\n        r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpackhi_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x2_t a1 = vget_high_s32(a_.neon_i32);\n      int32x2_t b1 = vget_high_s32(b_.neon_i32);\n      int32x2x2_t result = vzip_s32(a1, b1);\n      r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) {\n        r_.i32[(i * 2)]     = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)];\n        r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpackhi_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int64x1_t a_h = vget_high_s64(a_.neon_i64);\n      int64x1_t b_h = vget_high_s64(b_.neon_i64);\n      r_.neon_i64 = vcombine_s64(a_h, b_h);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) {\n        r_.i64[(i * 2)]     = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)];\n        r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpackhi_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) {\n        r_.f64[(i * 2)]     = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)];\n        r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16));\n      int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16));\n      int8x8x2_t result = vzip_s8(a1, b1);\n      r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) {\n        r_.i8[(i * 2)]     = a_.i8[i];\n        r_.i8[(i * 2) + 1] = b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4_t a1 = vget_low_s16(a_.neon_i16);\n      int16x4_t b1 = vget_low_s16(b_.neon_i16);\n      int16x4x2_t result = vzip_s16(a1, b1);\n      r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) {\n        r_.i16[(i * 2)]     = a_.i16[i];\n        r_.i16[(i * 2) + 1] = b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x2_t a1 = vget_low_s32(a_.neon_i32);\n      int32x2_t b1 = vget_low_s32(b_.neon_i32);\n      int32x2x2_t result = vzip_s32(a1, b1);\n      r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) {\n        r_.i32[(i * 2)]     = a_.i32[i];\n        r_.i32[(i * 2) + 1] = b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int64x1_t a_l = vget_low_s64(a_.neon_i64);\n      int64x1_t b_l = vget_low_s64(b_.neon_i64);\n      r_.neon_i64 = vcombine_s64(a_l, b_l);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) {\n        r_.i64[(i * 2)]     = a_.i64[i];\n        r_.i64[(i * 2) + 1] = b_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) {\n        r_.f64[(i * 2)]     = a_.f64[i];\n        r_.f64[(i * 2) + 1] = b_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_negate_pd(simde__m128d a) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \\\n        (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0))\n      r_.altivec_f64 = vec_neg(a_.altivec_f64);\n    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vnegq_f64(a_.neon_f64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_NEGATE)\n      r_.f64 = -a_.f64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = -a_.f64[i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_xor_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_xor_si128(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = a_.i32f ^ b_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_not_si128 (simde__m128i a) {\n  #if defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm_ternarylogic_epi32(a, a, a, 0x55);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmvnq_s32(a_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = ~a_.i32f;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = ~(a_.i32f[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\n#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y))\n#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)\n  #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SSE2_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/sse3.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n */\n\n#if !defined(SIMDE_X86_SSE3_H)\n#define SIMDE_X86_SSE3_H\n\n#include \"sse2.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16);\n    r_.neon_i16 = t.val[0];\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14);\n  #else\n    const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.i16[i] = a_.i16[2 * i];\n      r_.i16[i + halfway_point] = b_.i16[2 * i];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16);\n    r_.neon_i16 = t.val[1];\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15);\n  #else\n    const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.i16[i] = a_.i16[2 * i + 1];\n      r_.i16[i + halfway_point] = b_.i16[2 * i + 1];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32);\n    r_.neon_i32 = t.val[0];\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6);\n  #else\n    const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.i32[i] = a_.i32[2 * i];\n      r_.i32[i + halfway_point] = b_.i32[2 * i];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32);\n    r_.neon_i32 = t.val[1];\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7);\n  #else\n    const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.i32[i] = a_.i32[2 * i + 1];\n      r_.i32[i + halfway_point] = b_.i32[2 * i + 1];\n    }\n  #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32);\n    r_.neon_f32 = t.val[0];\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6);\n  #else\n    const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.f32[i] = a_.f32[2 * i];\n      r_.f32[i + halfway_point] = b_.f32[2 * i];\n    }\n  #endif\n\n  return simde__m128_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32);\n    r_.neon_f32 = t.val[1];\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7);\n  #else\n    const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.f32[i] = a_.f32[2 * i + 1];\n      r_.f32[i + halfway_point] = b_.f32[2 * i + 1];\n    }\n  #endif\n\n  return simde__m128_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2);\n  #else\n    const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.f64[i] = a_.f64[2 * i];\n      r_.f64[i + halfway_point] = b_.f64[2 * i];\n    }\n  #endif\n\n  return simde__m128d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3);\n  #elif defined(SIMDE_SHUFFLE_VECTOR_)\n    r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3);\n  #else\n    const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2;\n    for(size_t i = 0 ; i < halfway_point ; i++) {\n      r_.f64[i] = a_.f64[2 * i + 1];\n      r_.f64[i + halfway_point] = b_.f64[2 * i + 1];\n    }\n  #endif\n\n  return simde__m128d_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_addsub_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_addsub_pd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64);\n      float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64);\n      return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra));\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3);\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {\n        r_.f64[  i  ] = a_.f64[  i  ] - b_.f64[  i  ];\n        r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i];\n      }\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_addsub_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_addsub_ps(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32);\n      float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32);\n      return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7);\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n        r_.f32[  i  ] = a_.f32[  i  ] - b_.f32[  i  ];\n        r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i];\n      }\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_hadd_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_hadd_pd(a, b);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b)));\n  #else\n    return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_hadd_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_hadd_ps(a, b);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)));\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b));\n    return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1]));\n  #else\n    return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_hsub_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_hsub_pd(a, b);\n  #else\n    return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_hsub_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_hsub_ps(a, b);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b));\n    return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1])));\n  #else\n    return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_lddqu_si128 (simde__m128i const* mem_addr) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_lddqu_si128(mem_addr);\n  #else\n    simde__m128i_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr));\n    #else\n      simde_memcpy(&r_, mem_addr, sizeof(r_));\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_loaddup_pd (simde_float64 const* mem_addr) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_loaddup_pd(mem_addr);\n  #else\n    simde__m128d_private r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vdupq_n_f64(*mem_addr);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr));\n    #else\n      r_.f64[0] = *mem_addr;\n      r_.f64[1] = *mem_addr;\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_movedup_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_movedup_pd(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0);\n    #else\n      r_.f64[0] = a_.f64[0];\n      r_.f64[1] = a_.f64[0];\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_movedup_pd(a) simde_mm_movedup_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_movehdup_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SSE3_NATIVE)\n    return _mm_movehdup_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3);\n    #else\n      r_.f32[0] = a_.f32[1];\n      r_.f32[1] = a_.f32[1];\n      r_.f32[2] = a_.f32[3];\n      r_.f32[3] = a_.f32[3];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_moveldup_ps (simde__m128 a) {\n  #if defined(SIMDE__SSE3_NATIVE)\n    return _mm_moveldup_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2);\n    #else\n      r_.f32[0] = a_.f32[0];\n      r_.f32[1] = a_.f32[0];\n      r_.f32[2] = a_.f32[2];\n      r_.f32[3] = a_.f32[2];\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SSE3_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/sse4.1.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n */\n\n#include \"sse.h\"\n#if !defined(SIMDE_X86_SSE4_1_H)\n#define SIMDE_X86_SSE4_1_H\n\n#include \"ssse3.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\n#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)\n#  define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n    r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i];\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8)\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_blend_epi16(a, b, imm8) \\\n    (__extension__ ({ \\\n      simde__m128i_private \\\n        simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \\\n        simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \\\n        simde_mm_blend_epi16_r_; \\\n      \\\n      simde_mm_blend_epi16_r_.i16 = \\\n        SIMDE_SHUFFLE_VECTOR_( \\\n          16, 16, \\\n          simde_mm_blend_epi16_a_.i16, \\\n          simde_mm_blend_epi16_b_.i16, \\\n          ((imm8) & (1 << 0)) ?  8 : 0, \\\n          ((imm8) & (1 << 1)) ?  9 : 1, \\\n          ((imm8) & (1 << 2)) ? 10 : 2, \\\n          ((imm8) & (1 << 3)) ? 11 : 3, \\\n          ((imm8) & (1 << 4)) ? 12 : 4, \\\n          ((imm8) & (1 << 5)) ? 13 : 5, \\\n          ((imm8) & (1 << 6)) ? 14 : 6, \\\n          ((imm8) & (1 << 7)) ? 15 : 7  \\\n        ); \\\n      \\\n      simde__m128i_from_private(simde_mm_blend_epi16_r_); \\\n    }))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blend_epi16\n  #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)  {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i];\n  }\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8)\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_blend_pd(a, b, imm8) \\\n    (__extension__ ({ \\\n      simde__m128d_private \\\n        simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \\\n        simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \\\n        simde_mm_blend_pd_r_; \\\n      \\\n      simde_mm_blend_pd_r_.f64 = \\\n        SIMDE_SHUFFLE_VECTOR_( \\\n          64, 16, \\\n          simde_mm_blend_pd_a_.f64, \\\n          simde_mm_blend_pd_b_.f64, \\\n          ((imm8) & (1 << 0)) ?  2 : 0, \\\n          ((imm8) & (1 << 1)) ?  3 : 1  \\\n        ); \\\n      \\\n      simde__m128d_from_private(simde_mm_blend_pd_r_); \\\n    }))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blend_pd\n  #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15)  {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n    r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i];\n  }\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n#  define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8)\n#elif defined(SIMDE_SHUFFLE_VECTOR_)\n  #define simde_mm_blend_ps(a, b, imm8) \\\n    (__extension__ ({ \\\n      simde__m128_private \\\n        simde_mm_blend_ps_a_ = simde__m128_to_private(a), \\\n        simde_mm_blend_ps_b_ = simde__m128_to_private(b), \\\n        simde_mm_blend_ps_r_; \\\n      \\\n      simde_mm_blend_ps_r_.f32 = \\\n        SIMDE_SHUFFLE_VECTOR_( \\\n          32, 16, \\\n          simde_mm_blend_ps_a_.f32, \\\n          simde_mm_blend_ps_b_.f32, \\\n          ((imm8) & (1 << 0)) ? 4 : 0, \\\n          ((imm8) & (1 << 1)) ? 5 : 1, \\\n          ((imm8) & (1 << 2)) ? 6 : 2, \\\n          ((imm8) & (1 << 3)) ? 7 : 3  \\\n        ); \\\n      \\\n      simde__m128_from_private(simde_mm_blend_ps_r_); \\\n    }))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blend_ps\n  #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_blendv_epi8(a, b, mask);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask);\n    return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      mask_ = simde__m128i_to_private(mask);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* Use a signed shift right to create a mask with the sign bit */\n      mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7);\n      r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7);\n      r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0)));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */\n      #if defined(HEDLEY_INTEL_VERSION_CHECK)\n        __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };\n        mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z);\n      #else\n        mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1;\n      #endif\n\n      r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        int8_t m = mask_.i8[i] >> 7;\n        r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blendv_epi8\n  #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) {\n  #if defined(SIMDE_X86_SSE2_NATIVE)\n    mask = simde_mm_srai_epi16(mask, 15);\n    return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      mask_ = simde__m128i_to_private(mask);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128()));\n      r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0)));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      #if defined(HEDLEY_INTEL_VERSION_CHECK)\n        __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 };\n        mask_.i16 = mask_.i16 < z;\n      #else\n        mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1;\n      #endif\n\n      r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        int16_t m = mask_.i16[i] >> 15;\n        r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask)));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      mask_ = simde__m128i_to_private(mask);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128()));\n      r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31);\n      r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m));\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0)));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      #if defined(HEDLEY_INTEL_VERSION_CHECK)\n        __typeof__(mask_.i32) z = { 0, 0, 0, 0 };\n        mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z);\n      #else\n        mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1;\n      #endif\n\n      r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        int32_t m = mask_.i32[i] >> 31;\n        r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask)));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      mask_ = simde__m128i_to_private(mask);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0)));\n      r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63);\n      r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m));\n    #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))));\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63)));\n      r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      #if defined(HEDLEY_INTEL_VERSION_CHECK)\n        __typeof__(mask_.i64) z = { 0, 0 };\n        mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z);\n      #else\n        mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1;\n      #endif\n\n    r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64);\n  #else\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      int64_t m = mask_.i64[i] >> 63;\n      r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]);\n    }\n  #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_blendv_pd(a, b, mask);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63);\n    return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_));\n  #else\n    return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask)));\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blendv_pd\n  #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_blendv_ps(a, b, mask);\n  #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31);\n    return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_));\n  #else\n    return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask)));\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_blendv_ps\n  #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_round_pd (simde__m128d a, int rounding)\n    SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a);\n\n  /* For architectures which lack a current direction SIMD instruction. */\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION)\n      rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13;\n  #endif\n\n  switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {\n    case SIMDE_MM_FROUND_CUR_DIRECTION:\n      #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n        r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64));\n      #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        r_.neon_f64 = vrndiq_f64(a_.neon_f64);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128);\n      #elif defined(simde_math_nearbyint)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_nearbyint(a_.f64[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_NEAREST_INT:\n      #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n        r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64));\n      #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        r_.neon_f64 = vrndaq_f64(a_.neon_f64);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128);\n      #elif defined(simde_math_roundeven)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_roundeven(a_.f64[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_NEG_INF:\n      #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n        r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64));\n      #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        r_.neon_f64 = vrndmq_f64(a_.neon_f64);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_floor(a_.f64[i]);\n        }\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_POS_INF:\n      #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n        r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64));\n      #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        r_.neon_f64 = vrndpq_f64(a_.neon_f64);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128);\n      #elif defined(simde_math_ceil)\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_ceil(a_.f64[i]);\n        }\n      #else\n        HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n      #endif\n      break;\n\n    case SIMDE_MM_FROUND_TO_ZERO:\n      #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n        r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64));\n      #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        r_.neon_f64 = vrndq_f64(a_.neon_f64);\n      #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128);\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n          r_.f64[i] = simde_math_trunc(a_.f64[i]);\n        }\n      #endif\n      break;\n\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_round_pd\n  #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_ceil_pd (simde__m128d a) {\n  #if defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a)));\n  #endif\n  return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_ceil_pd\n  #define _mm_ceil_pd(a) simde_mm_ceil_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_ceil_ps (simde__m128 a) {\n  #if defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a)));\n  #endif\n  return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_ceil_ps\n  #define _mm_ceil_ps(a) simde_mm_ceil_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_ceil_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_ceil_sd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(simde_math_ceilf)\n      r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0])));\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_ceil_sd\n  #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_ceil_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_ceil_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n    return simde_mm_move_ss(a, simde_mm_ceil_ps(b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(simde_math_ceilf)\n      r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0])));\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_ceil_ss\n  #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cmpeq_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */\n      uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32);\n      uint32x4_t swapped = vrev64q_u32(cmp);\n      r_.neon_u32 = vandq_u32(cmp, swapped);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmpeq_epi64\n  #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepi8_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepi8_epi16(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int8x16_t s8x16 = a_.neon_i8;                   /* xxxx xxxx xxxx DCBA */\n      int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */\n      r_.neon_i16 = s16x8;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8,\n          -1,  0, -1,  1, -1,  2,  -1,  3,\n          -1,  4, -1,  5, -1,  6,  -1,  7));\n      r_.i16 >>= 8;\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepi8_epi16\n  #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepi8_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepi8_epi32(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i tmp = _mm_unpacklo_epi8(a, a);\n    tmp = _mm_unpacklo_epi16(tmp, tmp);\n    return _mm_srai_epi32(tmp, 24);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int8x16_t s8x16 = a_.neon_i8;                     /* xxxx xxxx xxxx DCBA */\n      int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0D0C 0B0A */\n      int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */\n      r_.neon_i32 = s32x4;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128));\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8,\n          -1, -1, -1,  0, -1, -1,  -1,  1,\n          -1, -1, -1,  2, -1, -1,  -1,  3));\n      r_.i32 >>= 24;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepi8_epi32\n  #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepi8_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepi8_epi64(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int8x16_t s8x16 = a_.neon_i8;                     /* xxxx xxxx xxxx xxBA */\n      int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16));   /* 0x0x 0x0x 0x0x 0B0A */\n      int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */\n      int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */\n      r_.neon_i64 = s64x2;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128));\n      v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra);\n      r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5);\n    #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      /* Disabled on x86 due to lack of 64-bit arithmetic shift until\n       * until AVX-512 (at which point we would be using the native\n       * _mm_cvtepi_epi64 anyways). */\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8,\n          -1, -1, -1, -1, -1, -1,  -1,  0,\n          -1, -1, -1, -1, -1, -1,  -1,  1));\n      r_.i64 >>= 56;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepi8_epi64\n  #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepu8_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepu8_epi16(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi8(a, _mm_setzero_si128());\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint8x16_t u8x16 = a_.neon_u8;                   /* xxxx xxxx xxxx DCBA */\n      uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */\n      r_.neon_u16 = u16x8;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(r_.i8) z = { 0, };\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z,\n          0, 16, 1, 17, 2, 18, 3, 19,\n          4, 20, 5, 21, 6, 22, 7, 23));\n    #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__))\n      SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepu8_epi16\n  #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepu8_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepu8_epi32(a);\n  #elif defined(SIMDE_X86_SSSE3_NATIVE)\n    __m128i s = _mm_set_epi8(\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03),\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02),\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01),\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00));\n    return _mm_shuffle_epi8(a, s);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i z = _mm_setzero_si128();\n    return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint8x16_t u8x16 = a_.neon_u8;                     /* xxxx xxxx xxxx DCBA */\n      uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0D0C 0B0A */\n      uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */\n      r_.neon_u32 = u32x4;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128));\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(r_.i8) z = { 0, };\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z,\n          0, 17, 18, 19, 1, 21, 22, 23,\n          2, 25, 26, 27, 3, 29, 30, 31));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepu8_epi32\n  #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepu8_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepu8_epi64(a);\n  #elif defined(SIMDE_X86_SSSE3_NATIVE)\n    __m128i s = _mm_set_epi8(\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80),\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01),\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80),\n        HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00));\n    return _mm_shuffle_epi8(a, s);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i z = _mm_setzero_si128();\n    return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint8x16_t u8x16 = a_.neon_u8;                     /* xxxx xxxx xxxx xxBA */\n      uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0x0x 0B0A */\n      uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */\n      uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */\n      r_.neon_u64 = u64x2;\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(r_.i8) z = { 0, };\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z,\n          0, 17, 18, 19, 20, 21, 22, 23,\n          1, 25, 26, 27, 28, 29, 30, 31));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepu8_epi64\n  #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepi16_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepi16_epi32(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128);\n    #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3));\n      r_.i32 >>= 16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepi16_epi32\n  #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepu16_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepu16_epi32(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi16(a, _mm_setzero_si128());\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128);\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(r_.u16) z = { 0, };\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z,\n          0, 9, 1, 11, 2, 13, 3, 15));\n    #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__))\n      SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.u16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepu16_epi32\n  #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepu16_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepu16_epi64(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i z = _mm_setzero_si128();\n    return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint16x8_t u16x8 = a_.neon_u16;                    /* xxxx xxxx xxxx 0B0A */\n      uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */\n      uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */\n      r_.neon_u64 = u64x2;\n    #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(r_.u16) z = { 0, };\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z,\n          0,  9, 10, 11,\n          1, 13, 14, 15));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.u16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepu16_epi64\n  #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepi16_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepi16_epi64(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x8_t s16x8 = a_.neon_i16;                    /* xxxx xxxx xxxx 0B0A */\n      int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */\n      int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */\n      r_.neon_i64 = s64x2;\n    #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16,\n           8,  9, 10, 0,\n          12, 13, 14, 1));\n      r_.i64 >>= 48;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepi16_epi64\n  #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepi32_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepi32_epi64(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i tmp = _mm_shuffle_epi32(a, 0x50);\n    tmp = _mm_srai_epi32(tmp, 31);\n    tmp = _mm_shuffle_epi32(tmp, 0xed);\n    return _mm_unpacklo_epi32(a, tmp);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32));\n    #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1));\n      r_.i64 >>= 32;\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepi32_epi64\n  #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cvtepu32_epi64 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_cvtepu32_epi64(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_unpacklo_epi32(a, _mm_setzero_si128());\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32));\n    #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)\n      __typeof__(r_.u32) z = { 0, };\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6));\n    #elif defined(SIMDE_CONVERT_VECTOR_)\n      SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.u32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cvtepu32_epi64\n  #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64);\n\n    switch (imm8) {\n      case 0xff:\n        r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1));\n        break;\n      case 0x13:\n        r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0);\n        break;\n      default:\n        { /* imm8 is a compile-time constant, so this all becomes just a load */\n          uint64_t mask_data[] = {\n            (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0),\n            (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0),\n          };\n          r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64)));\n        }\n\n        r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64));\n\n        {\n          uint64_t mask_data[] = {\n            (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0),\n            (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0)\n          };\n          r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64)));\n        }\n        break;\n    }\n  #else\n    simde_float64 sum = SIMDE_FLOAT64_C(0.0);\n\n    SIMDE_VECTORIZE_REDUCTION(+:sum)\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0;\n    }\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0;\n    }\n  #endif\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n#  define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_dp_pd\n  #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);\n\n    switch (imm8) {\n      case 0xff:\n        r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32));\n        break;\n      case 0x7f:\n        r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3);\n        r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32));\n        break;\n      default:\n        {\n          {\n            uint32_t mask_data[] = {\n              (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0),\n              (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0),\n              (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0),\n              (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0)\n            };\n            r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32)));\n          }\n\n          r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32));\n\n          {\n            uint32_t mask_data[] = {\n              (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0),\n              (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0),\n              (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0),\n              (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0)\n            };\n            r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32)));\n          }\n        }\n        break;\n    }\n  #else\n    simde_float32 sum = SIMDE_FLOAT32_C(0.0);\n\n    SIMDE_VECTORIZE_REDUCTION(+:sum)\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0);\n    }\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0);\n    }\n  #endif\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #if defined(HEDLEY_MCST_LCC_VERSION)\n    #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \\\n      SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \\\n      _mm_dp_ps((a), (b), (imm8)); \\\n      SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \\\n    }))\n  #else\n    #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8)\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_dp_ps\n  #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8)\n#endif\n\n#if defined(simde_mm_extract_epi8)\n#  undef simde_mm_extract_epi8\n#endif\nSIMDE_FUNCTION_ATTRIBUTES\nint8_t\nsimde_mm_extract_epi8 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15)  {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    #if defined(SIMDE_BUG_GCC_95227)\n      (void) a_;\n      (void) imm8;\n    #endif\n    return vec_extract(a_.altivec_i8, imm8);\n  #else\n    return a_.i8[imm8 & 15];\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8)\n#  define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8))\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8)\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_extract_epi8\n  #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8))\n#endif\n\n#if defined(simde_mm_extract_epi32)\n#  undef simde_mm_extract_epi32\n#endif\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_extract_epi32 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)  {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n    #if defined(SIMDE_BUG_GCC_95227)\n      (void) a_;\n      (void) imm8;\n    #endif\n    return vec_extract(a_.altivec_i32, imm8);\n  #else\n    return a_.i32[imm8 & 3];\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n#  define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8)\n#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n#  define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_extract_epi32\n  #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8)\n#endif\n\n#if defined(simde_mm_extract_epi64)\n#  undef simde_mm_extract_epi64\n#endif\nSIMDE_FUNCTION_ATTRIBUTES\nint64_t\nsimde_mm_extract_epi64 (simde__m128i a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1)  {\n  simde__m128i_private\n    a_ = simde__m128i_to_private(a);\n\n  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n    #if defined(SIMDE_BUG_GCC_95227)\n      (void) a_;\n      (void) imm8;\n    #endif\n    return vec_extract(a_.altivec_i64, imm8);\n  #else\n    return a_.i64[imm8 & 1];\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)\n#  define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8)\n#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n#  define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #undef _mm_extract_epi64\n  #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8)\n#endif\n\n#if defined(simde_mm_extract_ps)\n#  undef simde_mm_extract_ps\n#endif\nSIMDE_FUNCTION_ATTRIBUTES\nint32_t\nsimde_mm_extract_ps (simde__m128 a, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)  {\n  simde__m128_private\n    a_ = simde__m128_to_private(a);\n\n  return a_.i32[imm8 & 3];\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8)\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n  #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_extract_ps\n  #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_floor_pd (simde__m128d a) {\n  #if defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a)));\n  #endif\n  return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_floor_pd\n  #define _mm_floor_pd(a) simde_mm_floor_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_floor_ps (simde__m128 a) {\n  #if defined(SIMDE_WASM_SIMD128_NATIVE)\n    return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a)));\n  #endif\n  return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_floor_ps\n  #define _mm_floor_ps(a) simde_mm_floor_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_floor_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_floor_sd(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(simde_math_floor)\n      r_.f64[0] = simde_math_floor(b_.f64[0]);\n      r_.f64[1] = a_.f64[1];\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_floor_sd\n  #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_floor_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_floor_ss(a, b);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)\n      return simde_mm_move_ss(a, simde_mm_floor_ps(b));\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(simde_math_floorf)\n      r_.f32[0] = simde_math_floorf(b_.f32[0]);\n      for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = a_.f32[i];\n      }\n    #else\n      HEDLEY_UNREACHABLE();\n    #endif\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_floor_ss\n  #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_insert_epi8 (simde__m128i a, int i, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15)  {\n  simde__m128i_private\n    r_ = simde__m128i_to_private(a);\n\n  r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i);\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  /* clang-3.8 returns an incompatible type, so we need the cast.  MSVC\n   * can't handle the cast (\"error C2440: 'type cast': cannot convert\n   * from '__m128i' to '__m128i'\").  */\n  #if defined(__clang__)\n    #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8))\n  #else\n    #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8)\n  #endif\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i))))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_insert_epi8\n  #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_insert_epi32 (simde__m128i a, int i, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3)  {\n  simde__m128i_private\n    r_ = simde__m128i_to_private(a);\n\n  r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i);\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #if defined(__clang__)\n    #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8))\n  #else\n    #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8)\n  #endif\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i)))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_insert_epi32\n  #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1)  {\n  #if defined(SIMDE_BUG_GCC_94482)\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a);\n\n    switch(imm8) {\n      case 0:\n        return simde_mm_set_epi64x(a_.i64[1], i);\n        break;\n      case 1:\n        return simde_mm_set_epi64x(i, a_.i64[0]);\n        break;\n      default:\n        HEDLEY_UNREACHABLE();\n        break;\n    }\n  #else\n    simde__m128i_private\n      r_ = simde__m128i_to_private(a);\n\n    r_.i64[imm8] = i;\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)\n#  define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n#  define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8))\n#elif defined(SIMDE_WASM_SIMD128_NATIVE)\n#  define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i)))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #undef _mm_insert_epi64\n  #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  float tmp1_ = b_.f32[(imm8 >> 6) & 3];\n  a_.f32[(imm8 >> 4) & 3] = tmp1_;\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n    r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i];\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n#  define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_insert_ps\n  #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_max_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI)\n    return _mm_max_epi8(a, b);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i m = _mm_cmpgt_epi8(a, b);\n    return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_max_epi8\n  #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_max_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI)\n    return _mm_max_epi32(a, b);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    __m128i m = _mm_cmpgt_epi32(a, b);\n    return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_max_epi32\n  #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_max_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_max_epu16(a, b);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */\n    return _mm_add_epi16(b, _mm_subs_epu16(a, b));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_max_epu16\n  #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_max_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_max_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_max_epu32\n  #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_min_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI)\n    return _mm_min_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_min_epi8\n  #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_min_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI)\n    return _mm_min_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_min_epi32\n  #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_min_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_min_epu16(a, b);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */\n    return _mm_sub_epi16(a, _mm_subs_epu16(a, b));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_min_epu16\n  #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_min_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_min_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_min_epu32\n  #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_minpos_epu16 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_minpos_epu16(a);\n  #else\n    simde__m128i_private\n      r_ = simde__m128i_to_private(simde_mm_setzero_si128()),\n      a_ = simde__m128i_to_private(a);\n\n    r_.u16[0] = UINT16_MAX;\n    for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n      if (a_.u16[i] < r_.u16[0]) {\n        r_.u16[0] = a_.u16[i];\n        r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i);\n      }\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_minpos_epu16\n  #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  const int a_offset = imm8 & 4;\n  const int b_offset = (imm8 & 3) << 2;\n\n#if defined(simde_math_abs)\n  for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) {\n    r_.u16[i] =\n      HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) +\n      HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) +\n      HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) +\n      HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3])));\n  }\n#else\n  HEDLEY_UNREACHABLE();\n#endif\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107)\n#  define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8)\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mpsadbw_epu8\n  #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mul_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_mul_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      // vmull_s32 upcasts instead of masking, so we downcast.\n      int32x2_t a_lo = vmovn_s64(a_.neon_i64);\n      int32x2_t b_lo = vmovn_s64(b_.neon_i64);\n      r_.neon_i64 = vmull_s32(a_lo, b_lo);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_make(\n        wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)),\n        wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] =\n          HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) *\n          HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mul_epi32\n  #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_mullo_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      (void) a_;\n      (void) b_;\n      r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_mullo_epi32\n  #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = a_.u32 * b_.u32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] * b_.u32[i];\n      }\n    #endif\n\n  return simde__m128i_from_private(r_);\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_packus_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_packus_epi32(a, b);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    const __m128i max = _mm_set1_epi32(UINT16_MAX);\n    const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a);\n    const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b);\n    return\n      _mm_packs_epi32(\n        _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16),\n        _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16)\n      );\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      r_;\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      #if defined(SIMDE_BUG_CLANG_46840)\n        r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32);\n      #else\n        r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32);\n      #endif\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 =\n        vcombine_u16(\n          vqmovun_s32(a_.neon_i32),\n          vqmovun_s32(b_.neon_i32)\n        );\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7);\n\n      v &= ~(v >> 31);\n      v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX);\n\n      SIMDE_CONVERT_VECTOR_(r_.i16, v);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3];\n        r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_packus_epi32\n  #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding)\n    SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) {\n  simde__m128d_private\n    r_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n\n  switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {\n    #if defined(simde_math_nearbyint)\n      case SIMDE_MM_FROUND_TO_NEAREST_INT:\n      case SIMDE_MM_FROUND_CUR_DIRECTION:\n        r_.f64[0] = simde_math_nearbyint(b_.f64[0]);\n        break;\n    #endif\n\n    #if defined(simde_math_floor)\n      case SIMDE_MM_FROUND_TO_NEG_INF:\n        r_.f64[0] = simde_math_floor(b_.f64[0]);\n        break;\n    #endif\n\n    #if defined(simde_math_ceil)\n      case SIMDE_MM_FROUND_TO_POS_INF:\n        r_.f64[0] = simde_math_ceil(b_.f64[0]);\n        break;\n    #endif\n\n    #if defined(simde_math_trunc)\n      case SIMDE_MM_FROUND_TO_ZERO:\n        r_.f64[0] = simde_math_trunc(b_.f64[0]);\n        break;\n    #endif\n\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n  }\n\n  return simde__m128d_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n#  define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding)\n#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS)\n#  define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding))\n#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n  #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_round_sd\n  #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding)\n    SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) {\n  simde__m128_private\n    r_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n\n  switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {\n    #if defined(simde_math_nearbyintf)\n      case SIMDE_MM_FROUND_TO_NEAREST_INT:\n      case SIMDE_MM_FROUND_CUR_DIRECTION:\n        r_.f32[0] = simde_math_nearbyintf(b_.f32[0]);\n        break;\n    #endif\n\n    #if defined(simde_math_floorf)\n      case SIMDE_MM_FROUND_TO_NEG_INF:\n        r_.f32[0] = simde_math_floorf(b_.f32[0]);\n        break;\n    #endif\n\n    #if defined(simde_math_ceilf)\n      case SIMDE_MM_FROUND_TO_POS_INF:\n        r_.f32[0] = simde_math_ceilf(b_.f32[0]);\n        break;\n    #endif\n\n    #if defined(simde_math_truncf)\n      case SIMDE_MM_FROUND_TO_ZERO:\n        r_.f32[0] = simde_math_truncf(b_.f32[0]);\n        break;\n    #endif\n\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_SSE4_1_NATIVE)\n  #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding)\n#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS)\n  #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding)))\n#elif SIMDE_NATURAL_VECTOR_SIZE > 0\n  #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding)))\n#endif\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_round_ss\n  #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_stream_load_si128 (const simde__m128i* mem_addr) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr));\n  #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \\\n      defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \\\n      defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \\\n      defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))\n    return __builtin_nontemporal_load(mem_addr);\n  #else\n    return simde_mm_load_si128(mem_addr);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_stream_load_si128\n  #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_test_all_ones (simde__m128i a) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_test_all_ones(a);\n  #else\n    simde__m128i_private a_ = simde__m128i_to_private(a);\n    int r;\n\n    #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r = vec_all_eq(a_.altivec_i32, vec_splats(~0));\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull;\n    #else\n      int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0);\n\n      SIMDE_VECTORIZE_REDUCTION(&:r_)\n      for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {\n        r_ &= a_.i32f[i];\n      }\n\n      r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0));\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_test_all_ones\n  #define _mm_test_all_ones(a) simde_mm_test_all_ones(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_test_all_zeros(a, mask);\n  #else\n    simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask));\n    int r;\n\n    #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      r = vec_all_eq(tmp_.altivec_i32, vec_splats(0));\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0;\n    #else\n      int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0);\n\n      SIMDE_VECTORIZE_REDUCTION(|:r_)\n      for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) {\n        r_ |= tmp_.i32f[i];\n      }\n\n      r = !r_;\n    #endif\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_test_all_zeros\n  #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_test_mix_ones_zeros(a, mask);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      mask_ = simde__m128i_to_private(mask);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64);\n      int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64);\n      return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128);\n      long long c0 = wasm_i64x2_extract_lane(m, 0);\n      long long c1 = wasm_i64x2_extract_lane(m, 1);\n      long long ones = c0 | c1;\n      long long zeros = ~(c0 & c1);\n      return ones && zeros;\n    #else\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++)\n        if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0))\n          return 1;\n\n      return 0;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_test_mix_ones_zeros\n  #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testc_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_testc_si128(a, b);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64);\n      return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);\n      return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0;\n    #else\n      int_fast32_t r = 0;\n\n      SIMDE_VECTORIZE_REDUCTION(|:r)\n      for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {\n        r |= ~a_.i32f[i] & b_.i32f[i];\n      }\n\n      return HEDLEY_STATIC_CAST(int, !r);\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testc_si128\n  #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_testnzc_si128(a, b);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64);\n      int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64);\n      return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \\\n             || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) );\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);\n      v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);\n      return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \\\n        && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1));\n    #else\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n        if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0))\n          return 1;\n      }\n\n      return 0;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testnzc_si128\n  #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_testz_si128 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_1_NATIVE)\n    return _mm_testz_si128(a, b);\n  #else\n    simde__m128i_private\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64);\n      return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);\n      return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0;\n    #elif defined(SIMDE_HAVE_INT128_)\n      if ((a_.u128[0] & b_.u128[0]) == 0) {\n        return 1;\n      }\n      return 0;\n    #else\n      for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {\n        if ((a_.u64[i] & b_.u64[i]) > 0)\n          return 0;\n      }\n    #endif\n\n    return 1;\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)\n  #undef _mm_testz_si128\n  #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SSE4_1_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/sse4.2.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017      Evan Nemerson <evan@nemerson.com>\n *   2020      Hidayat Khan <huk2209@gmail.com>\n */\n\n#if !defined(SIMDE_X86_SSE4_2_H)\n#define SIMDE_X86_SSE4_2_H\n\n#include \"sse4.1.h\"\n\n#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32))\n  #include <arm_acle.h>\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\n#if defined(SIMDE_X86_SSE4_2_NATIVE)\n  #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS\n  #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS\n  #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS\n  #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS\n  #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY\n  #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES\n  #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH\n  #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED\n  #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY\n  #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY\n  #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY\n  #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY\n  #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT\n  #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT\n  #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK\n  #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK\n#else\n  #define SIMDE_SIDD_UBYTE_OPS 0x00\n  #define SIMDE_SIDD_UWORD_OPS 0x01\n  #define SIMDE_SIDD_SBYTE_OPS 0x02\n  #define SIMDE_SIDD_SWORD_OPS 0x03\n  #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00\n  #define SIMDE_SIDD_CMP_RANGES 0x04\n  #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08\n  #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c\n  #define SIMDE_SIDD_POSITIVE_POLARITY 0x00\n  #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10\n  #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20\n  #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30\n  #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00\n  #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40\n  #define SIMDE_SIDD_BIT_MASK 0x00\n  #define SIMDE_SIDD_UNIT_MASK 0x40\n#endif\n\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS)\n  #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS\n  #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS\n  #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS\n  #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS\n  #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY\n  #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES\n  #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH\n  #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED\n  #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY\n  #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY\n  #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY\n  #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY\n  #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT\n  #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT\n  #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK\n  #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  #if !defined(HEDLEY_PGI_VERSION)\n    /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */\n    (void) a;\n    (void) b;\n  #endif\n  (void) la;\n  (void) lb;\n  return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1);\n}\n#if defined(SIMDE_X86_SSE4_2_NATIVE)\n  #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_cmpestrs(a, la, b, lb, imm8) \\\n      _mm_cmpestrs( \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \\\n        imm8)\n  #else\n    #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8)\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmpestrs\n  #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {\n  #if !defined(HEDLEY_PGI_VERSION)\n    /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */\n    (void) a;\n    (void) b;\n  #endif\n  (void) la;\n  (void) lb;\n  return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1);\n}\n#if defined(SIMDE_X86_SSE4_2_NATIVE)\n  #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_cmpestrz(a, la, b, lb, imm8) \\\n      _mm_cmpestrz( \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \\\n        imm8)\n  #else\n    #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8)\n  #endif\n#endif\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmpestrz\n  #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSE4_2_NATIVE)\n    return _mm_cmpgt_epi64(a, b);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    /* https://stackoverflow.com/a/65175746/501126 */\n    __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a));\n    r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b));\n    return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* https://stackoverflow.com/a/65223269/501126 */\n      r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63);\n    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)\n      r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmpgt_epi64\n  #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_cmpistrs_8_(simde__m128i a) {\n  simde__m128i_private a_= simde__m128i_to_private(a);\n  const int upper_bound = (128 / 8) - 1;\n  int a_invalid = 0;\n  SIMDE_VECTORIZE\n  for (int i = 0 ; i <= upper_bound ; i++) {\n    if(!a_.i8[i])\n      a_invalid = 1;\n  }\n  return a_invalid;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_cmpistrs_16_(simde__m128i a) {\n  simde__m128i_private a_= simde__m128i_to_private(a);\n  const int upper_bound = (128 / 16) - 1;\n  int a_invalid = 0;\n  SIMDE_VECTORIZE\n  for (int i = 0 ; i <= upper_bound ; i++) {\n    if(!a_.i16[i])\n      a_invalid = 1;\n  }\n  return a_invalid;\n}\n\n#if defined(SIMDE_X86_SSE4_2_NATIVE)\n  #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_cmpistrs(a, b, imm8) \\\n      _mm_cmpistrs( \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, a), \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, b), \\\n        imm8)\n  #else\n    #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8)\n  #endif\n#else\n  #define simde_mm_cmpistrs(a, b, imm8) \\\n     (((imm8) & SIMDE_SIDD_UWORD_OPS) \\\n       ? simde_mm_cmpistrs_16_((a)) \\\n       : simde_mm_cmpistrs_8_((a)))\n#endif\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmpistrs\n  #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_cmpistrz_8_(simde__m128i b) {\n  simde__m128i_private b_= simde__m128i_to_private(b);\n  const int upper_bound = (128 / 8) - 1;\n  int b_invalid = 0;\n  SIMDE_VECTORIZE\n  for (int i = 0 ; i <= upper_bound ; i++) {\n    if(!b_.i8[i])\n      b_invalid = 1;\n  }\n  return b_invalid;\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nint\nsimde_mm_cmpistrz_16_(simde__m128i b) {\n  simde__m128i_private b_= simde__m128i_to_private(b);\n  const int upper_bound = (128 / 16) - 1;\n  int b_invalid = 0;\n  SIMDE_VECTORIZE\n  for (int i = 0 ; i <= upper_bound ; i++) {\n    if(!b_.i16[i])\n      b_invalid = 1;\n  }\n  return b_invalid;\n}\n\n#if defined(SIMDE_X86_SSE4_2_NATIVE)\n  #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)\n    #define simde_mm_cmpistrz(a, b, imm8) \\\n      _mm_cmpistrz( \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, a), \\\n        HEDLEY_REINTERPRET_CAST(__v16qi, b), \\\n        imm8)\n  #else\n    #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8)\n  #endif\n#else\n  #define simde_mm_cmpistrz(a, b, imm8) \\\n     (((imm8) & SIMDE_SIDD_UWORD_OPS) \\\n       ? simde_mm_cmpistrz_16_((b)) \\\n       : simde_mm_cmpistrz_8_((b)))\n#endif\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cmpistrz\n  #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint32_t\nsimde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) {\n  #if defined(SIMDE_X86_SSE4_2_NATIVE)\n    return _mm_crc32_u8(prevcrc, v);\n  #else\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32)\n      return __crc32cb(prevcrc, v);\n    #else\n      uint32_t crc = prevcrc;\n      crc ^= v;\n      for(int bit = 0 ; bit < 8 ; bit++) {\n        if (crc & 1)\n          crc = (crc >> 1) ^ UINT32_C(0x82f63b78);\n        else\n          crc = (crc >> 1);\n      }\n      return crc;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint32_t\nsimde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) {\n  #if defined(SIMDE_X86_SSE4_2_NATIVE)\n    return _mm_crc32_u16(prevcrc, v);\n  #else\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32)\n      return __crc32ch(prevcrc, v);\n    #else\n      uint32_t crc = prevcrc;\n      crc = simde_mm_crc32_u8(crc, v & 0xff);\n      crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff);\n      return crc;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint32_t\nsimde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) {\n  #if defined(SIMDE_X86_SSE4_2_NATIVE)\n    return _mm_crc32_u32(prevcrc, v);\n  #else\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32)\n      return __crc32cw(prevcrc, v);\n    #else\n      uint32_t crc = prevcrc;\n      crc = simde_mm_crc32_u16(crc, v & 0xffff);\n      crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff);\n      return crc;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)\n  #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nuint64_t\nsimde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) {\n  #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64)\n    return _mm_crc32_u64(prevcrc, v);\n  #else\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32)\n      return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v);\n    #else\n      uint64_t crc = prevcrc;\n      crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff);\n      crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff);\n      return crc;\n    #endif\n  #endif\n}\n#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64))\n  #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SSE4_2_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/ssse3.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2017-2020 Evan Nemerson <evan@nemerson.com>\n */\n\n#if !defined(SIMDE_X86_SSSE3_H)\n#define SIMDE_X86_SSSE3_H\n\n#include \"sse3.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_abs_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_abs_epi8(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vabsq_s8(a_.neon_i8);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i8 = vec_abs(a_.altivec_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_abs_epi8(a) simde_mm_abs_epi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_abs_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_abs_epi16(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vabsq_s16(a_.neon_i16);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i16 = vec_abs(a_.altivec_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_abs_epi16(a) simde_mm_abs_epi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_abs_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_abs_epi32(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a);\n    return _mm_sub_epi32(_mm_xor_si128(a, m), m);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vabsq_s32(a_.neon_i32);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)\n      r_.altivec_i32 = vec_abs(a_.altivec_i32);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        #if defined(_MSC_VER)\n          HEDLEY_DIAGNOSTIC_PUSH\n          #pragma warning(disable:4146)\n        #endif\n        r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]);\n        #if defined(_MSC_VER)\n          HEDLEY_DIAGNOSTIC_POP\n        #endif\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_abs_epi32(a) simde_mm_abs_epi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_abs_pi8 (simde__m64 a) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_abs_pi8(a);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vabs_s8(a_.neon_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_abs_pi8(a) simde_mm_abs_pi8(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_abs_pi16 (simde__m64 a) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_abs_pi16(a);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vabs_s16(a_.neon_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_abs_pi16(a) simde_mm_abs_pi16(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_abs_pi32 (simde__m64 a) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_abs_pi32(a);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vabs_s32(a_.neon_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_abs_pi32(a) simde_mm_abs_pi32(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count)\n    SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a),\n    b_ = simde__m128i_to_private(b);\n\n  if (HEDLEY_UNLIKELY(count > 31))\n    return simde_mm_setzero_si128();\n\n  for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n    const int srcpos = count + HEDLEY_STATIC_CAST(int, i);\n    if (srcpos > 31) {\n      r_.i8[i] = 0;\n    } else if (srcpos > 15) {\n      r_.i8[i] = a_.i8[(srcpos) & 15];\n    } else {\n      r_.i8[i] = b_.i8[srcpos];\n    }\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_SSSE3_NATIVE)\n  #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_alignr_epi8(a, b, count) \\\n    ( \\\n      ((count) > 31) \\\n        ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \\\n        : ( \\\n          ((count) > 15) \\\n            ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \\\n            : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15))))))\n#endif\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n  #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count)\n    SIMDE_REQUIRE_CONSTANT(count) {\n  simde__m64_private\n    r_,\n    a_ = simde__m64_to_private(a),\n    b_ = simde__m64_to_private(b);\n\n  if (HEDLEY_UNLIKELY(count > 15))\n    return simde_mm_setzero_si64();\n\n  for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n    const int srcpos = count + HEDLEY_STATIC_CAST(int, i);\n    if (srcpos > 15) {\n      r_.i8[i] = 0;\n    } else if (srcpos > 7) {\n      r_.i8[i] = a_.i8[(srcpos) & 7];\n    } else {\n      r_.i8[i] = b_.i8[srcpos];\n    }\n  }\n\n  return simde__m64_from_private(r_);\n}\n#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n#  define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count)\n#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n  #define simde_mm_alignr_pi8(a, b, count) \\\n    ( \\\n      ((count) > 15) \\\n        ? simde__m64_from_neon_i8(vdup_n_s8(0)) \\\n        : ( \\\n          ((count) > 7) \\\n            ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \\\n            : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7))))))\n#endif\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_shuffle_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F)));\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* Mask out the bits we're not interested in.  vtbl will result in 0\n       * for any values outside of [0, 15], so if the high bit is set it\n       * will return 0, just like in SSSE3. */\n      b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15)));\n\n      /* Convert a from an int8x16_t to an int8x8x2_t */\n      int8x8x2_t i;\n      i.val[0] = vget_low_s8(a_.neon_i8);\n      i.val[1] = vget_high_s8(a_.neon_i8);\n\n      /* Table lookups */\n      int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8));\n      int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8));\n\n      r_.neon_i8 = vcombine_s8(l, h);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      /* This is a bit ugly because of the casts and the awful type\n       * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just\n       * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */\n      SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, };\n      SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z));\n      SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8));\n      r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i8x16_swizzle(\n        a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F)));\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n#endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_shuffle_pi8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7)));\n      r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8);\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_hadd_epi16(a, b);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)));\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b));\n    return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1]));\n  #else\n    return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_hadd_epi32(a, b);\n  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n    return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)));\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b));\n    return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1]));\n  #else\n    return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_hadd_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i16 = vadd_s16(t.val[0], t.val[1]);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 =\n        SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) +\n        SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7);\n    #else\n      r_.i16[0] = a_.i16[0] + a_.i16[1];\n      r_.i16[1] = a_.i16[2] + a_.i16[3];\n      r_.i16[2] = b_.i16[0] + b_.i16[1];\n      r_.i16[3] = b_.i16[2] + b_.i16[3];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_hadd_pi32(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32);\n      r_.neon_i32 = vadd_s32(t.val[0], t.val[1]);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 =\n        SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) +\n        SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);\n    #else\n      r_.i32[0] = a_.i32[0] + a_.i32[1];\n      r_.i32[1] = b_.i32[0] + b_.i32[1];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_hadds_epi16(a, b);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b));\n    return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1]));\n  #else\n    return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_hadds_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]);\n    #else\n      for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {\n        int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);\n        r_.i16[  i  ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;\n        int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);\n        r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_hsub_epi16(a, b);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b));\n    return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1]));\n  #else\n    return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_hsub_epi32(a, b);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b));\n    return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1]));\n  #else\n    return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_hsub_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i16 = vsub_s16(t.val[0], t.val[1]);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i16 =\n        SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) -\n        SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7);\n    #else\n      r_.i16[0] = a_.i16[0] - a_.i16[1];\n      r_.i16[1] = a_.i16[2] - a_.i16[3];\n      r_.i16[2] = b_.i16[0] - b_.i16[1];\n      r_.i16[3] = b_.i16[2] - b_.i16[3];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_hsub_pi32(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32);\n      r_.neon_i32 = vsub_s32(t.val[0], t.val[1]);\n    #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_)\n      r_.i32 =\n        SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) -\n        SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);\n    #else\n      r_.i32[0] = a_.i32[0] - a_.i32[1];\n      r_.i32[1] = b_.i32[0] - b_.i32[1];\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_hsubs_epi16(a, b);\n  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n    int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b));\n    return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1]));\n  #else\n    return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_hsubs_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]);\n    #else\n      for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {\n        r_.i16[  i  ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]);\n        r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_maddubs_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* Zero extend a */\n      int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8));\n      int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00)));\n\n      /* Sign extend by shifting left then shifting right. */\n      int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8);\n      int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8);\n\n      /* multiply */\n      int16x8_t prod1 = vmulq_s16(a_even, b_even);\n      int16x8_t prod2 = vmulq_s16(a_odd, b_odd);\n\n      /* saturated add */\n      r_.neon_i16 = vqaddq_s16(prod1, prod2);\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        const int idx = HEDLEY_STATIC_CAST(int, i) << 1;\n        int32_t ts =\n          (HEDLEY_STATIC_CAST(int16_t, a_.u8[  idx  ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[  idx  ])) +\n          (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));\n        r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_maddubs_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8));\n      int16x8_t bi = vmovl_s8(b_.neon_i8);\n      int16x8_t p = vmulq_s16(ai, bi);\n      int16x4_t l = vget_low_s16(p);\n      int16x4_t h = vget_high_s16(p);\n      r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h));\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        const int idx = HEDLEY_STATIC_CAST(int, i) << 1;\n        int32_t ts =\n          (HEDLEY_STATIC_CAST(int16_t, a_.u8[  idx  ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[  idx  ])) +\n          (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));\n        r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_mulhrs_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* Multiply */\n      int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16),\n                                  vget_low_s16(b_.neon_i16));\n      int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16),\n                                  vget_high_s16(b_.neon_i16));\n\n      /* Rounding narrowing shift right\n       * narrow = (int16_t)((mul + 16384) >> 15); */\n      int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15);\n      int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15);\n\n      /* Join together */\n      r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n        v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128));\n        v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128));\n        const v128_t __inc = wasm_i32x4_splat(0x4000);\n        __lo = wasm_i32x4_add(__lo, __inc);\n        __hi = wasm_i32x4_add(__hi, __inc);\n        __lo = wasm_i32x4_add(__lo, __lo);\n        __hi = wasm_i32x4_add(__hi, __hi);\n        r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_mulhrs_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      /* Multiply */\n      int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16);\n\n      /* Rounding narrowing shift right\n       * narrow = (int16_t)((mul + 16384) >> 15); */\n      int16x4_t narrow = vrshrn_n_s32(mul, 15);\n\n      /* Join together */\n      r_.neon_i16 = narrow;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sign_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_sign_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7));\n      uint8x16_t bnz_mask;\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        bnz_mask = vceqzq_s8(b_.neon_i8);\n      #else\n        bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0));\n      #endif\n      bnz_mask = vmvnq_u8(bnz_mask);\n\n      r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7);\n      simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128());\n      r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sign_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_sign_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15));\n      uint16x8_t bnz_mask;\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        bnz_mask = vceqzq_s16(b_.neon_i16);\n      #else\n        bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0));\n      #endif\n      bnz_mask = vmvnq_u16(bnz_mask);\n\n      r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15);\n      simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128());\n      r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sign_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_sign_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31));\n      uint32x4_t bnz_mask;\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        bnz_mask = vceqzq_s32(b_.neon_i32);\n      #else\n        bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0));\n      #endif\n      bnz_mask = vmvnq_u32(bnz_mask);\n\n      r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask)));\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31);\n      simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128());\n      r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask));\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0));\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sign_pi8 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sign_pi8(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7));\n      uint8x8_t bnz_mask;\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        bnz_mask = vceqz_s8(b_.neon_i8);\n      #else\n        bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0));\n      #endif\n      bnz_mask = vmvn_u8(bnz_mask);\n\n      r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sign_pi16 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sign_pi16(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15));\n      uint16x4_t bnz_mask;\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        bnz_mask = vceqz_s16(b_.neon_i16);\n      #else\n        bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0));\n      #endif\n      bnz_mask = vmvn_u16(bnz_mask);\n\n      r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m64\nsimde_mm_sign_pi32 (simde__m64 a, simde__m64 b) {\n  #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)\n    return _mm_sign_pi32(a, b);\n  #else\n    simde__m64_private\n      r_,\n      a_ = simde__m64_to_private(a),\n      b_ = simde__m64_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31));\n      uint32x2_t bnz_mask;\n      #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n        bnz_mask = vceqz_s32(b_.neon_i32);\n      #else\n        bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0));\n      #endif\n      bnz_mask = vmvn_u32(bnz_mask);\n\n      r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask)));\n    #else\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0));\n      }\n    #endif\n\n    return simde__m64_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES)\n#  define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b)\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SSE2_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/svml.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020      Evan Nemerson <evan@nemerson.com>\n *   2020      Himanshi Mathur <himanshi18037@iiitd.ac.in>\n */\n\n#if !defined(SIMDE_X86_SVML_H)\n#define SIMDE_X86_SVML_H\n\n#include \"fma.h\"\n#include \"avx2.h\"\n#include \"avx512/abs.h\"\n#include \"avx512/add.h\"\n#include \"avx512/cmp.h\"\n#include \"avx512/copysign.h\"\n#include \"avx512/xorsign.h\"\n#include \"avx512/div.h\"\n#include \"avx512/fmadd.h\"\n#include \"avx512/mov.h\"\n#include \"avx512/mul.h\"\n#include \"avx512/negate.h\"\n#include \"avx512/or.h\"\n#include \"avx512/set1.h\"\n#include \"avx512/setone.h\"\n#include \"avx512/setzero.h\"\n#include \"avx512/sqrt.h\"\n#include \"avx512/sub.h\"\n\n#include \"../simde-complex.h\"\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_acos_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_acos_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_acosf4_u10(a);\n    #else\n      return Sleef_acosf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_acosf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_acos_ps\n  #define _mm_acos_ps(a) simde_mm_acos_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_acos_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_acos_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_acosd2_u10(a);\n    #else\n      return Sleef_acosd2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_acos(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_acos_pd\n  #define _mm_acos_pd(a) simde_mm_acos_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_acos_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_acos_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_acosf8_u10(a);\n    #else\n      return Sleef_acosf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_acos_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_acosf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_acos_ps\n  #define _mm256_acos_ps(a) simde_mm256_acos_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_acos_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_acos_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_acosd4_u10(a);\n    #else\n      return Sleef_acosd4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_acos(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_acos_pd\n  #define _mm256_acos_pd(a) simde_mm256_acos_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_acos_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_acos_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_acosf16_u10(a);\n    #else\n      return Sleef_acosf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_acosf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_acos_ps\n  #define _mm512_acos_ps(a) simde_mm512_acos_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_acos_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_acos_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_acosd8_u10(a);\n    #else\n      return Sleef_acosd8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_acos(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_acos_pd\n  #define _mm512_acos_pd(a) simde_mm512_acos_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_acos_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_acos_ps\n  #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_acos_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_acos_pd\n  #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_acosh_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_acosh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_acoshf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_acoshf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_acosh_ps\n  #define _mm_acosh_ps(a) simde_mm_acosh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_acosh_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_acosh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_acoshd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_acosh(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_acosh_pd\n  #define _mm_acosh_pd(a) simde_mm_acosh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_acosh_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_acosh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_acoshf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_acoshf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_acosh_ps\n  #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_acosh_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_acosh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_acoshd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_acosh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_acosh_pd\n  #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_acosh_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_acosh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_acoshf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_acoshf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_acosh_ps\n  #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_acosh_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_acosh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_acoshd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_acosh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_acosh_pd\n  #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_acosh_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_acosh_ps\n  #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_acosh_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_acosh_pd\n  #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_asin_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_asin_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_asinf4_u10(a);\n    #else\n      return Sleef_asinf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_asinf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_asin_ps\n  #define _mm_asin_ps(a) simde_mm_asin_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_asin_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_asin_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_asind2_u10(a);\n    #else\n      return Sleef_asind2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_asin(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_asin_pd\n  #define _mm_asin_pd(a) simde_mm_asin_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_asin_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_asin_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_asinf8_u10(a);\n    #else\n      return Sleef_asinf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_asin_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_asinf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_asin_ps\n  #define _mm256_asin_ps(a) simde_mm256_asin_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_asin_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_asin_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_asind4_u10(a);\n    #else\n      return Sleef_asind4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_asin(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_asin_pd\n  #define _mm256_asin_pd(a) simde_mm256_asin_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_asin_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_asin_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_asinf16_u10(a);\n    #else\n      return Sleef_asinf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_asinf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_asin_ps\n  #define _mm512_asin_ps(a) simde_mm512_asin_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_asin_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_asin_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_asind8_u10(a);\n    #else\n      return Sleef_asind8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_asin(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_asin_pd\n  #define _mm512_asin_pd(a) simde_mm512_asin_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_asin_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_asin_ps\n  #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_asin_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_asin_pd\n  #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_asinh_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_asinh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_asinhf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_asinhf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_asinh_ps\n  #define _mm_asinh_ps(a) simde_mm_asinh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_asinh_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_asinh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_asinhd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_asinh(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_asinh_pd\n  #define _mm_asinh_pd(a) simde_mm_asinh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_asinh_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_asinh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_asinhf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_asinhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_asinh_ps\n  #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_asinh_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_asinh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_asinhd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_asinh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_asinh_pd\n  #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_asinh_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_asinh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_asinhf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_asinhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_asinh_ps\n  #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_asinh_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_asinh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_asinhd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_asinh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_asinh_pd\n  #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_asinh_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_asinh_ps\n  #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_asinh_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_asinh_pd\n  #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_atan_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_atan_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atanf4_u10(a);\n    #else\n      return Sleef_atanf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_atanf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_atan_ps\n  #define _mm_atan_ps(a) simde_mm_atan_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_atan_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_atan_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atand2_u10(a);\n    #else\n      return Sleef_atand2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_atan(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_atan_pd\n  #define _mm_atan_pd(a) simde_mm_atan_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_atan_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_atan_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atanf8_u10(a);\n    #else\n      return Sleef_atanf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_atan_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_atanf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_atan_ps\n  #define _mm256_atan_ps(a) simde_mm256_atan_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_atan_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_atan_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atand4_u10(a);\n    #else\n      return Sleef_atand4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_atan(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_atan_pd\n  #define _mm256_atan_pd(a) simde_mm256_atan_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_atan_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_atan_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atanf16_u10(a);\n    #else\n      return Sleef_atanf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_atanf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_atan_ps\n  #define _mm512_atan_ps(a) simde_mm512_atan_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_atan_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_atan_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atand8_u10(a);\n    #else\n      return Sleef_atand8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_atan(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_atan_pd\n  #define _mm512_atan_pd(a) simde_mm512_atan_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_atan_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_atan_ps\n  #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_atan_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_atan_pd\n  #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_atan2_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_atan2_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atan2f4_u10(a, b);\n    #else\n      return Sleef_atan2f4_u35(a, b);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_atan2_ps\n  #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_atan2_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_atan2_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atan2d2_u10(a, b);\n    #else\n      return Sleef_atan2d2_u35(a, b);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_atan2_pd\n  #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_atan2_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_atan2_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atan2f8_u10(a, b);\n    #else\n      return Sleef_atan2f8_u35(a, b);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]);\n    }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_atan2_ps\n  #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_atan2_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_atan2_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atan2d4_u10(a, b);\n    #else\n      return Sleef_atan2d4_u35(a, b);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);\n      }\n  #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_atan2_pd\n  #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_atan2_ps (simde__m512 a, simde__m512 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_atan2_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atan2f16_u10(a, b);\n    #else\n      return Sleef_atan2f16_u35(a, b);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a),\n      b_ = simde__m512_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_atan2_ps\n  #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_atan2_pd (simde__m512d a, simde__m512d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_atan2_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_atan2d8_u10(a, b);\n    #else\n      return Sleef_atan2d8_u35(a, b);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a),\n      b_ = simde__m512d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_atan2_pd\n  #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_atan2_ps(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_atan2_ps\n  #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_atan2_pd(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_atan2_pd\n  #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_atanh_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_atanh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_atanhf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_atanhf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_atanh_ps\n  #define _mm_atanh_ps(a) simde_mm_atanh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_atanh_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_atanh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_atanhd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_atanh(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_atanh_pd\n  #define _mm_atanh_pd(a) simde_mm_atanh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_atanh_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_atanh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_atanhf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_atanhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_atanh_ps\n  #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_atanh_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_atanh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_atanhd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_atanh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_atanh_pd\n  #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_atanh_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_atanh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_atanhf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_atanhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_atanh_ps\n  #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_atanh_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_atanh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_atanhd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_atanh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_atanh_pd\n  #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_atanh_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_atanh_ps\n  #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_atanh_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_atanh_pd\n  #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cbrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cbrt_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_cbrtf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_cbrtf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cbrt_ps\n  #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cbrt_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cbrt_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_cbrtd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_cbrt(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cbrt_pd\n  #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cbrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cbrt_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_cbrtf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cbrtf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cbrt_ps\n  #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cbrt_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cbrt_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_cbrtd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cbrt(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cbrt_pd\n  #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_cbrt_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cbrt_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_cbrtf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cbrtf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cbrt_ps\n  #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_cbrt_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cbrt_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_cbrtd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cbrt(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cbrt_pd\n  #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cbrt_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cbrt_ps\n  #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cbrt_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cbrt_pd\n  #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cexp_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cexp_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {\n      simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1]));\n      r_.f32[  i  ] = simde_math_crealf(val);\n      r_.f32[i + 1] = simde_math_cimagf(val);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cexp_ps\n  #define _mm_cexp_ps(a) simde_mm_cexp_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cexp_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cexp_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {\n      simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1]));\n      r_.f32[  i  ] = simde_math_crealf(val);\n      r_.f32[i + 1] = simde_math_cimagf(val);\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cexp_ps\n  #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cos_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cos_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosf4_u10(a);\n    #else\n      return Sleef_cosf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_cosf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cos_ps\n  #define _mm_cos_ps(a) simde_mm_cos_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cos_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cos_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosd2_u10(a);\n    #else\n      return Sleef_cosd2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_cos(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cos_pd\n  #define _mm_cos_pd(a) simde_mm_cos_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cos_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cos_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosf8_u10(a);\n    #else\n      return Sleef_cosf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_cos_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cosf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cos_ps\n  #define _mm256_cos_ps(a) simde_mm256_cos_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cos_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cos_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosd4_u10(a);\n    #else\n      return Sleef_cosd4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cos(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cos_pd\n  #define _mm256_cos_pd(a) simde_mm256_cos_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_cos_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cos_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosf16_u10(a);\n    #else\n      return Sleef_cosf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cosf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cos_ps\n  #define _mm512_cos_ps(a) simde_mm512_cos_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_cos_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cos_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosd8_u10(a);\n    #else\n      return Sleef_cosd8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cos(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cos_pd\n  #define _mm512_cos_pd(a) simde_mm512_cos_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cos_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cos_ps\n  #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cos_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cos_pd\n  #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_x_mm_deg2rad_ps(simde__m128 a) {\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)\n      r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F };\n      r_.f32 = a_.f32 * tmp;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_deg2radf(a_.f32[i]);\n      }\n\n    #endif\n    return simde__m128_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_x_mm_deg2rad_pd(simde__m128d a) {\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n      return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)\n    r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 };\n      r_.f64 = a_.f64 * tmp;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_deg2rad(a_.f64[i]);\n      }\n\n    #endif\n    return simde__m128d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_x_mm256_deg2rad_ps(simde__m256 a) {\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(256)\n    return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F));\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]);\n      }\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)\n    r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    const __typeof__(r_.f32) tmp = {\n        SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,\n        SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F\n      };\n      r_.f32 = a_.f32 * tmp;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_deg2radf(a_.f32[i]);\n      }\n\n    #endif\n    return simde__m256_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_x_mm256_deg2rad_pd(simde__m256d a) {\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(256)\n    return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180));\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]);\n      }\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)\n    r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 };\n      r_.f64 = a_.f64 * tmp;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_deg2rad(a_.f64[i]);\n      }\n\n    #endif\n    return simde__m256d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_x_mm512_deg2rad_ps(simde__m512 a) {\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(512)\n      return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F));\n  #else\n    simde__m512_private\n        r_,\n        a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]);\n      }\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)\n    r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    const __typeof__(r_.f32) tmp = {\n        SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,\n        SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,\n        SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,\n        SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F\n      };\n      r_.f32 = a_.f32 * tmp;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_deg2radf(a_.f32[i]);\n      }\n\n    #endif\n    return simde__m512_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_x_mm512_deg2rad_pd(simde__m512d a) {\n  #if SIMDE_NATURAL_VECTOR_SIZE_GE(512)\n      return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180));\n  #else\n    simde__m512d_private\n        r_,\n        a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]);\n      }\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)\n    r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n    const __typeof__(r_.f64) tmp = {\n        SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180,\n        SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180\n      };\n      r_.f64 = a_.f64 * tmp;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_deg2rad(a_.f64[i]);\n      }\n\n    #endif\n    return simde__m512d_from_private(r_);\n  #endif\n}\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cosd_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cosd_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a));\n    #else\n      return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cosd_ps\n  #define _mm_cosd_ps(a) simde_mm_cosd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cosd_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cosd_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a));\n    #else\n      return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cosd_pd\n  #define _mm_cosd_pd(a) simde_mm_cosd_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cosd_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cosd_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a));\n    #else\n      return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cosd_ps\n  #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cosd_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cosd_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a));\n    #else\n      return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cosd_pd\n  #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_cosd_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cosd_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a));\n    #else\n      return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cosd_ps\n  #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_cosd_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cosd_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a));\n    #else\n      return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cosd_pd\n  #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cosd_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cosd_ps\n  #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cosd_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cosd_pd\n  #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cosh_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cosh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_coshf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_coshf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cosh_ps\n  #define _mm_cosh_ps(a) simde_mm_cosh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cosh_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cosh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_coshd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_cosh(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cosh_pd\n  #define _mm_cosh_pd(a) simde_mm_cosh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cosh_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cosh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_coshf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_coshf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cosh_ps\n  #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cosh_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cosh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_coshd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cosh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cosh_pd\n  #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_cosh_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cosh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_coshf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_coshf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cosh_ps\n  #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_cosh_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cosh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_coshd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cosh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cosh_pd\n  #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cosh_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cosh_ps\n  #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cosh_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cosh_pd\n  #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 / b_.i8;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] / b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epi8\n  #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 / b_.i16;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] / b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epi16\n  #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 / b_.i32;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] / b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epi32\n  #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b)\n  #undef _mm_idiv_epi32\n  #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 / b_.i64;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] / b_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epi64\n  #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = a_.u8 / b_.u8;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = a_.u8[i] / b_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epu8\n  #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = a_.u16 / b_.u16;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = a_.u16[i] / b_.u16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epu16\n  #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = a_.u32 / b_.u32;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] / b_.u32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epu32\n  #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b)\n  #undef _mm_udiv_epi32\n  #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_div_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_div_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = a_.u64 / b_.u64;\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n    r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] / b_.u64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_div_epu64\n  #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 / b_.i8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n          r_.i8[i] = a_.i8[i] / b_.i8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epi8\n  #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 / b_.i16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.i16[i] = a_.i16[i] / b_.i16[i];\n        }\n       #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epi16\n  #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 / b_.i32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.i32[i] = a_.i32[i] / b_.i32[i];\n        }\n       #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epi32\n  #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b)\n  #undef _mm256_idiv_epi32\n  #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 / b_.i64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n          r_.i64[i] = a_.i64[i] / b_.i64[i];\n        }\n        #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epi64\n  #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = a_.u8 / b_.u8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n          r_.u8[i] = a_.u8[i] / b_.u8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epu8\n  #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epu16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = a_.u16 / b_.u16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n          r_.u16[i] = a_.u16[i] / b_.u16[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epu16\n  #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epu32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epu32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = a_.u32 / b_.u32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n          r_.u32[i] = a_.u32[i] / b_.u32[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epu32\n  #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b)\n  #undef _mm256_udiv_epi32\n  #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_div_epu64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_div_epu64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = a_.u64 / b_.u64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n          r_.u64[i] = a_.u64[i] / b_.u64[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_div_epu64\n  #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epi8 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epi8(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = a_.i8 / b_.i8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n          r_.i8[i] = a_.i8[i] / b_.i8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epi8\n  #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epi16 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epi16(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = a_.i16 / b_.i16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.i16[i] = a_.i16[i] / b_.i16[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epi16\n  #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epi32 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epi32(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = a_.i32 / b_.i32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.i32[i] = a_.i32[i] / b_.i32[i];\n        }\n        #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epi32\n  #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_div_epi32(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_div_epi32\n  #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epi64 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epi64(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = a_.i64 / b_.i64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n          r_.i64[i] = a_.i64[i] / b_.i64[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epi64\n  #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epu8 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epu8(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = a_.u8 / b_.u8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n          r_.u8[i] = a_.u8[i] / b_.u8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epu8\n  #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epu16 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epu16(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = a_.u16 / b_.u16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n          r_.u16[i] = a_.u16[i] / b_.u16[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epu16\n  #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epu32 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epu32(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = a_.u32 / b_.u32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n          r_.u32[i] = a_.u32[i] / b_.u32[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epu32\n  #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_div_epu32(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_div_epu32\n  #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_div_epu64 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_div_epu64(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = a_.u64 / b_.u64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n          r_.u64[i] = a_.u64[i] / b_.u64[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_div_epu64\n  #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_erf_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erf_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_erff4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_erff(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erf_ps\n  #define _mm_erf_ps(a) simde_mm_erf_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_erf_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erf_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_erfd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_erf(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erf_pd\n  #define _mm_erf_pd(a) simde_mm_erf_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_erf_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erf_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_erff8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_erf_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_erff(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erf_ps\n  #define _mm256_erf_ps(a) simde_mm256_erf_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_erf_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erf_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_erfd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_erf(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erf_pd\n  #define _mm256_erf_pd(a) simde_mm256_erf_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_erf_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erf_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_erff16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_erff(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erf_ps\n  #define _mm512_erf_ps(a) simde_mm512_erf_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_erf_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erf_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_erfd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_erf(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erf_pd\n  #define _mm512_erf_pd(a) simde_mm512_erf_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erf_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erf_ps\n  #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erf_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erf_pd\n  #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_erfc_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erfc_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_erfcf4_u15(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_erfcf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erfc_ps\n  #define _mm_erfc_ps(a) simde_mm_erfc_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_erfc_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erfc_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_erfcd2_u15(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_erfc(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erfc_pd\n  #define _mm_erfc_pd(a) simde_mm_erfc_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_erfc_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erfc_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_erfcf8_u15(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_erfcf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erfc_ps\n  #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_erfc_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erfc_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_erfcd4_u15(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_erfc(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erfc_pd\n  #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_erfc_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erfc_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_erfcf16_u15(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_erfcf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erfc_ps\n  #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_erfc_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erfc_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_erfcd8_u15(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_erfc(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erfc_pd\n  #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erfc_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erfc_ps\n  #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erfc_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erfc_pd\n  #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_exp_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_exp_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_expf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_expf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_exp_ps\n  #define _mm_exp_ps(a) simde_mm_exp_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_exp_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_exp_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_expd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_exp(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_exp_pd\n  #define _mm_exp_pd(a) simde_mm_exp_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_exp_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_exp_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_expf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_exp_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_expf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_exp_ps\n  #define _mm256_exp_ps(a) simde_mm256_exp_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_exp_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_exp_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_expd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_exp(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_exp_pd\n  #define _mm256_exp_pd(a) simde_mm256_exp_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_exp_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_exp_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_expf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_expf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_exp_ps\n  #define _mm512_exp_ps(a) simde_mm512_exp_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_exp_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_exp_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_expd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_exp(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_exp_pd\n  #define _mm512_exp_pd(a) simde_mm512_exp_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_exp_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_exp_ps\n  #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_exp_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_exp_pd\n  #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_expm1_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_expm1_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_expm1f4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_expm1f(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_expm1_ps\n  #define _mm_expm1_ps(a) simde_mm_expm1_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_expm1_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_expm1_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_expm1d2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_expm1(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_expm1_pd\n  #define _mm_expm1_pd(a) simde_mm_expm1_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_expm1_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_expm1_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_expm1f8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_expm1f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_expm1_ps\n  #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_expm1_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_expm1_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_expm1d4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_expm1(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_expm1_pd\n  #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_expm1_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_expm1_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_expm1f16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_expm1f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_expm1_ps\n  #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_expm1_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_expm1_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_expm1d8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_expm1(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_expm1_pd\n  #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_expm1_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_expm1_ps\n  #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_expm1_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_expm1_pd\n  #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_exp2_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_exp2_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_exp2f4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_exp2f(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_exp2_ps\n  #define _mm_exp2_ps(a) simde_mm_exp2_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_exp2_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_exp2_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_exp2d2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_exp2(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_exp2_pd\n  #define _mm_exp2_pd(a) simde_mm_exp2_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_exp2_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_exp2_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_exp2f8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_exp2f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_exp2_ps\n  #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_exp2_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_exp2_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_exp2d4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_exp2(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_exp2_pd\n  #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_exp2_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_exp2_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_exp2f16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_exp2f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_exp2_ps\n  #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_exp2_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_exp2_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_exp2d8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_exp2(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_exp2_pd\n  #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_exp2_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_exp2_ps\n  #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_exp2_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_exp2_pd\n  #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_exp10_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_exp10_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_exp10f4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_exp10f(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_exp10_ps\n  #define _mm_exp10_ps(a) simde_mm_exp10_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_exp10_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_exp10_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_exp10d2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_exp10(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_exp10_pd\n  #define _mm_exp10_pd(a) simde_mm_exp10_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_exp10_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_exp10_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_exp10f8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_exp10f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_exp10_ps\n  #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_exp10_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_exp10_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_exp10d4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_exp10(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_exp10_pd\n  #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_exp10_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_exp10_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_exp10f16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_exp10f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_exp10_ps\n  #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_exp10_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_exp10_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_exp10d8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_exp10(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_exp10_pd\n  #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_exp10_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_exp10_ps\n  #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_exp10_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_exp10_pd\n  #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cdfnorm_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cdfnorm_ps(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://www.johndcook.com/blog/cpp_phi/ */\n    const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592));\n    const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736));\n    const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741));\n    const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027));\n    const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429));\n    const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911));\n    const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0));\n\n    /* simde_math_fabsf(x) / sqrtf(2.0) */\n    const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))));\n\n    /* 1.0 / (1.0 + p * x) */\n    const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x)));\n\n    /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */\n    simde__m128 y = simde_mm_mul_ps(a5, t);\n    y = simde_mm_add_ps(y, a4);\n    y = simde_mm_mul_ps(y, t);\n    y = simde_mm_add_ps(y, a3);\n    y = simde_mm_mul_ps(y, t);\n    y = simde_mm_add_ps(y, a2);\n    y = simde_mm_mul_ps(y, t);\n    y = simde_mm_add_ps(y, a1);\n    y = simde_mm_mul_ps(y, t);\n    y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x))));\n    y = simde_mm_sub_ps(one, y);\n\n    /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */\n    return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a)));\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cdfnorm_ps\n  #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cdfnorm_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cdfnorm_pd(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://www.johndcook.com/blog/cpp_phi/ */\n    const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592));\n    const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736));\n    const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741));\n    const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027));\n    const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429));\n    const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911));\n    const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0));\n\n    /* simde_math_fabs(x) / sqrt(2.0) */\n    const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))));\n\n    /* 1.0 / (1.0 + p * x) */\n    const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x)));\n\n    /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */\n    simde__m128d y = simde_mm_mul_pd(a5, t);\n    y = simde_mm_add_pd(y, a4);\n    y = simde_mm_mul_pd(y, t);\n    y = simde_mm_add_pd(y, a3);\n    y = simde_mm_mul_pd(y, t);\n    y = simde_mm_add_pd(y, a2);\n    y = simde_mm_mul_pd(y, t);\n    y = simde_mm_add_pd(y, a1);\n    y = simde_mm_mul_pd(y, t);\n    y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x))));\n    y = simde_mm_sub_pd(one, y);\n\n    /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */\n    return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a)));\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cdfnorm_pd\n  #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cdfnorm_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cdfnorm_ps(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://www.johndcook.com/blog/cpp_phi/ */\n    const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592));\n    const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736));\n    const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741));\n    const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027));\n    const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429));\n    const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911));\n    const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0));\n\n    /* simde_math_fabsf(x) / sqrtf(2.0) */\n    const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))));\n\n    /* 1.0 / (1.0 + p * x) */\n    const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x)));\n\n    /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */\n    simde__m256 y = simde_mm256_mul_ps(a5, t);\n    y = simde_mm256_add_ps(y, a4);\n    y = simde_mm256_mul_ps(y, t);\n    y = simde_mm256_add_ps(y, a3);\n    y = simde_mm256_mul_ps(y, t);\n    y = simde_mm256_add_ps(y, a2);\n    y = simde_mm256_mul_ps(y, t);\n    y = simde_mm256_add_ps(y, a1);\n    y = simde_mm256_mul_ps(y, t);\n    y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x))));\n    y = simde_mm256_sub_ps(one, y);\n\n    /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */\n    return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a)));\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cdfnorm_ps\n  #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cdfnorm_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cdfnorm_pd(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://www.johndcook.com/blog/cpp_phi/ */\n    const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592));\n    const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736));\n    const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741));\n    const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027));\n    const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429));\n    const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911));\n    const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0));\n\n    /* simde_math_fabs(x) / sqrt(2.0) */\n    const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0))));\n\n    /* 1.0 / (1.0 + p * x) */\n    const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x)));\n\n    /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */\n    simde__m256d y = simde_mm256_mul_pd(a5, t);\n    y = simde_mm256_add_pd(y, a4);\n    y = simde_mm256_mul_pd(y, t);\n    y = simde_mm256_add_pd(y, a3);\n    y = simde_mm256_mul_pd(y, t);\n    y = simde_mm256_add_pd(y, a2);\n    y = simde_mm256_mul_pd(y, t);\n    y = simde_mm256_add_pd(y, a1);\n    y = simde_mm256_mul_pd(y, t);\n    y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x))));\n    y = simde_mm256_sub_pd(one, y);\n\n    /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */\n    return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a)));\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cdfnorm_pd\n  #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_cdfnorm_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cdfnorm_ps(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://www.johndcook.com/blog/cpp_phi/ */\n    const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592));\n    const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736));\n    const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741));\n    const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027));\n    const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429));\n    const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911));\n    const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0));\n\n    /* simde_math_fabsf(x) / sqrtf(2.0) */\n    const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0))));\n\n    /* 1.0 / (1.0 + p * x) */\n    const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x)));\n\n    /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */\n    simde__m512 y = simde_mm512_mul_ps(a5, t);\n    y = simde_mm512_add_ps(y, a4);\n    y = simde_mm512_mul_ps(y, t);\n    y = simde_mm512_add_ps(y, a3);\n    y = simde_mm512_mul_ps(y, t);\n    y = simde_mm512_add_ps(y, a2);\n    y = simde_mm512_mul_ps(y, t);\n    y = simde_mm512_add_ps(y, a1);\n    y = simde_mm512_mul_ps(y, t);\n    y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x))));\n    y = simde_mm512_sub_ps(one, y);\n\n    /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */\n    return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a)));\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cdfnorm_ps\n  #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_cdfnorm_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cdfnorm_pd(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://www.johndcook.com/blog/cpp_phi/ */\n    const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592));\n    const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736));\n    const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741));\n    const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027));\n    const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429));\n    const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911));\n    const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0));\n\n    /* simde_math_fabs(x) / sqrt(2.0) */\n    const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0))));\n\n    /* 1.0 / (1.0 + p * x) */\n    const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x)));\n\n    /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */\n    simde__m512d y = simde_mm512_mul_pd(a5, t);\n    y = simde_mm512_add_pd(y, a4);\n    y = simde_mm512_mul_pd(y, t);\n    y = simde_mm512_add_pd(y, a3);\n    y = simde_mm512_mul_pd(y, t);\n    y = simde_mm512_add_pd(y, a2);\n    y = simde_mm512_mul_pd(y, t);\n    y = simde_mm512_add_pd(y, a1);\n    y = simde_mm512_mul_pd(y, t);\n    y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x))));\n    y = simde_mm512_sub_pd(one, y);\n\n    /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */\n    return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a)));\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cdfnorm_pd\n  #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cdfnorm_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cdfnorm_ps\n  #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cdfnorm_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cdfnorm_pd\n  #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b);\n  #else\n    simde__m128i r;\n\n    r = simde_mm_div_epi32(a, b);\n    *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b));\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_idivrem_epi32\n  #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b);\n  #else\n    simde__m256i r;\n\n    r = simde_mm256_div_epi32(a, b);\n    *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b));\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_idivrem_epi32\n  #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_hypot_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_hypot_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_hypotf4_u05(a, b);\n    #else\n      return Sleef_hypotf4_u35(a, b);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_hypot_ps\n  #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_hypot_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_hypot_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_hypotd2_u05(a, b);\n    #else\n      return Sleef_hypotd2_u35(a, b);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_hypot_pd\n  #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_hypot_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_hypot_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_hypotf8_u05(a, b);\n    #else\n      return Sleef_hypotf8_u35(a, b);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]);\n    }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hypot_ps\n  #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_hypot_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_hypot_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_hypotd4_u05(a, b);\n    #else\n      return Sleef_hypotd4_u35(a, b);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);\n      }\n  #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_hypot_pd\n  #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_hypot_ps (simde__m512 a, simde__m512 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_hypot_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_hypotf16_u05(a, b);\n    #else\n      return Sleef_hypotf16_u35(a, b);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a),\n      b_ = simde__m512_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_hypot_ps\n  #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_hypot_pd (simde__m512d a, simde__m512d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_hypot_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_hypotd8_u05(a, b);\n    #else\n      return Sleef_hypotd8_u35(a, b);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a),\n      b_ = simde__m512d_to_private(b);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_hypot_pd\n  #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_hypot_ps(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_hypot_ps\n  #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_hypot_pd(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_hypot_pd\n  #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_invcbrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_invcbrt_ps(a);\n  #else\n    return simde_mm_rcp_ps(simde_mm_cbrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_invcbrt_ps\n  #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_invcbrt_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_invcbrt_pd(a);\n  #else\n    return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_invcbrt_pd\n  #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_invcbrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_invcbrt_ps(a);\n  #else\n    return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_invcbrt_ps\n  #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_invcbrt_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_invcbrt_pd(a);\n  #else\n    return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_invcbrt_pd\n  #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_invsqrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_invsqrt_ps(a);\n  #else\n    return simde_mm_rcp_ps(simde_mm_sqrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_invsqrt_ps\n  #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_invsqrt_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_invsqrt_pd(a);\n  #else\n    return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_invsqrt_pd\n  #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_invsqrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_invsqrt_ps(a);\n  #else\n    return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_invsqrt_ps\n  #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_invsqrt_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_invsqrt_pd(a);\n  #else\n    return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_invsqrt_pd\n  #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_invsqrt_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_invsqrt_ps(a);\n  #else\n    return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_invsqrt_ps\n  #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_invsqrt_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_invsqrt_pd(a);\n  #else\n    return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_invsqrt_pd\n  #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_invsqrt_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_invsqrt_ps\n  #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_invsqrt_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_invsqrt_pd\n  #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_log_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_logf4_u10(a);\n    #else\n      return Sleef_logf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_logf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log_ps\n  #define _mm_log_ps(a) simde_mm_log_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_log_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_logd2_u10(a);\n    #else\n      return Sleef_logd2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_log(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log_pd\n  #define _mm_log_pd(a) simde_mm_log_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_log_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_logf8_u10(a);\n    #else\n      return Sleef_logf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_log_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_logf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log_ps\n  #define _mm256_log_ps(a) simde_mm256_log_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_log_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_logd4_u10(a);\n    #else\n      return Sleef_logd4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log_pd\n  #define _mm256_log_pd(a) simde_mm256_log_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_log_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_logf16_u10(a);\n    #else\n      return Sleef_logf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_log_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_logf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log_ps\n  #define _mm512_log_ps(a) simde_mm512_log_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_log_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_logd8_u10(a);\n    #else\n      return Sleef_logd8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log_pd\n  #define _mm512_log_pd(a) simde_mm512_log_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log_ps\n  #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log_pd\n  #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_cdfnorminv_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cdfnorminv_ps(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    simde__m128 matched, retval = simde_mm_setzero_ps();\n\n    { /* if (a < 0 || a > 1) */\n      matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))));\n\n      /* We don't actually need to do anything here since we initialize\n       * retval to 0.0. */\n    }\n\n    { /* else if (a == 0) */\n      simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));\n      mask = simde_mm_andnot_ps(matched, mask);\n      matched = simde_mm_or_ps(matched, mask);\n\n      simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));\n    }\n\n    { /* else if (a == 1) */\n      simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)));\n      mask = simde_mm_andnot_ps(matched, mask);\n      matched = simde_mm_or_ps(matched, mask);\n\n      simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));\n    }\n\n    { /* Remaining conditions.\n       *\n       * Including the else case in this complicates things a lot, but\n       * we're using cheap operations to get rid of expensive multiply\n       * and add functions.  This should be a small improvement on SSE\n       * prior to 4.1.  On SSE 4.1 we can use _mm_blendv_ps which is\n       * very fast and this becomes a huge win.  NEON, AltiVec, and\n       * WASM also have blend operations, so this should be a big win\n       * there, too. */\n\n      /* else if (a < 0.02425) */\n      simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425)));\n      /* else if (a > 0.97575) */\n      simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575)));\n\n      simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi);\n      matched = simde_mm_or_ps(matched, mask);\n\n      /* else */\n      simde__m128 mask_el = simde_x_mm_not_ps(matched);\n      mask = simde_mm_or_ps(mask, mask_el);\n\n      /* r = a - 0.5f */\n      simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)));\n\n      /* lo: q = a\n       * hi: q = (1.0 - a) */\n      simde__m128 q = simde_mm_and_ps(mask_lo, a);\n      q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a)));\n\n      /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */\n      q = simde_mm_log_ps(q);\n      q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0)));\n      q = simde_mm_sqrt_ps(q);\n\n      /* el: q = r * r */\n      q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el);\n\n      /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */\n      /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */\n      /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */\n      simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el);\n      numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el));\n      numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el));\n      numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el));\n      numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el));\n      numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el));\n      {\n        simde__m128 multiplier;\n        multiplier =                            simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)));\n        multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0))));\n        multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r));\n        numerator = simde_mm_mul_ps(numerator, multiplier);\n      }\n\n      /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */\n      /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */\n      simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el);\n      denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el));\n      denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el));\n      denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el));\n      denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el),\n                                                   simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el));\n      denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)));\n\n      /* res = numerator / denominator; */\n      simde__m128 res = simde_mm_div_ps(numerator, denominator);\n\n      retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));\n    }\n\n    return retval;\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cdfnorminv_ps\n  #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_cdfnorminv_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_cdfnorminv_pd(a);\n   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    simde__m128d matched, retval = simde_mm_setzero_pd();\n\n    { /* if (a < 0 || a > 1) */\n      matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))));\n\n      /* We don't actually need to do anything here since we initialize\n       * retval to 0.0. */\n    }\n\n    { /* else if (a == 0) */\n      simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));\n      mask = simde_mm_andnot_pd(matched, mask);\n      matched = simde_mm_or_pd(matched, mask);\n\n      simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY);\n\n      retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));\n    }\n\n    { /* else if (a == 1) */\n      simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)));\n      mask = simde_mm_andnot_pd(matched, mask);\n      matched = simde_mm_or_pd(matched, mask);\n\n      simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY);\n\n      retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));\n    }\n\n    { /* Remaining conditions.\n       *\n       * Including the else case in this complicates things a lot, but\n       * we're using cheap operations to get rid of expensive multiply\n       * and add functions.  This should be a small improvement on SSE\n       * prior to 4.1.  On SSE 4.1 we can use _mm_blendv_pd which is\n       * very fast and this becomes a huge win.  NEON, AltiVec, and\n       * WASM also have blend operations, so this should be a big win\n       * there, too. */\n\n      /* else if (a < 0.02425) */\n      simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425)));\n      /* else if (a > 0.97575) */\n      simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575)));\n\n      simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi);\n      matched = simde_mm_or_pd(matched, mask);\n\n      /* else */\n      simde__m128d mask_el = simde_x_mm_not_pd(matched);\n      mask = simde_mm_or_pd(mask, mask_el);\n\n      /* r = a - 0.5 */\n      simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)));\n\n      /* lo: q = a\n       * hi: q = (1.0 - a) */\n      simde__m128d q = simde_mm_and_pd(mask_lo, a);\n      q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a)));\n\n      /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */\n      q = simde_mm_log_pd(q);\n      q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0)));\n      q = simde_mm_sqrt_pd(q);\n\n      /* el: q = r * r */\n      q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el);\n\n      /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0); */\n      /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */\n      /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */\n      simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el);\n      numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el));\n      numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el));\n      numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el));\n      numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el));\n      numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el));\n      {\n        simde__m128d multiplier;\n        multiplier =                            simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)));\n        multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0))));\n        multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r));\n        numerator = simde_mm_mul_pd(numerator, multiplier);\n      }\n\n      /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */\n      /*    el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */\n      simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el);\n      denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el));\n      denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el));\n      denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el));\n      denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el),\n                                                   simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el));\n      denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)));\n\n      /* res = numerator / denominator; */\n      simde__m128d res = simde_mm_div_pd(numerator, denominator);\n\n      retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));\n    }\n\n    return retval;\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_cdfnorminv_pd\n  #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_cdfnorminv_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cdfnorminv_ps(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)\n    simde__m256 matched, retval = simde_mm256_setzero_ps();\n\n    { /* if (a < 0 || a > 1) */\n      matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ));\n\n      /* We don't actually need to do anything here since we initialize\n       * retval to 0.0. */\n    }\n\n    { /* else if (a == 0) */\n      simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);\n      mask = simde_mm256_andnot_ps(matched, mask);\n      matched = simde_mm256_or_ps(matched, mask);\n\n      simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));\n    }\n\n    { /* else if (a == 1) */\n      simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ);\n      mask = simde_mm256_andnot_ps(matched, mask);\n      matched = simde_mm256_or_ps(matched, mask);\n\n      simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));\n    }\n\n    { /* Remaining conditions.\n       *\n       * Including the else case in this complicates things a lot, but\n       * we're using cheap operations to get rid of expensive multiply\n       * and add functions.  This should be a small improvement on SSE\n       * prior to 4.1.  On SSE 4.1 we can use _mm256_blendv_ps which is\n       * very fast and this becomes a huge win.  NEON, AltiVec, and\n       * WASM also have blend operations, so this should be a big win\n       * there, too. */\n\n      /* else if (a < 0.02425) */\n      simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ);\n      /* else if (a > 0.97575) */\n      simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ);\n\n      simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi);\n      matched = simde_mm256_or_ps(matched, mask);\n\n      /* else */\n      simde__m256 mask_el = simde_x_mm256_not_ps(matched);\n      mask = simde_mm256_or_ps(mask, mask_el);\n\n      /* r = a - 0.5f */\n      simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)));\n\n      /* lo: q = a\n       * hi: q = (1.0 - a) */\n      simde__m256 q = simde_mm256_and_ps(mask_lo, a);\n      q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a)));\n\n      /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */\n      q = simde_mm256_log_ps(q);\n      q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0)));\n      q = simde_mm256_sqrt_ps(q);\n\n      /* el: q = r * r */\n      q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el);\n\n      /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */\n      /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */\n      /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */\n      simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el);\n      numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el));\n      numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el));\n      numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el));\n      numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el));\n      numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el));\n      {\n        simde__m256 multiplier;\n        multiplier =                            simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)));\n        multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0))));\n        multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r));\n        numerator = simde_mm256_mul_ps(numerator, multiplier);\n      }\n\n      /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */\n      /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */\n      simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el);\n      denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el));\n      denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el));\n      denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el));\n      denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el),\n                                                   simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el));\n      denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)));\n\n      /* res = numerator / denominator; */\n      simde__m256 res = simde_mm256_div_ps(numerator, denominator);\n\n      retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));\n    }\n\n    return retval;\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cdfnorminv_ps\n  #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_cdfnorminv_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_cdfnorminv_pd(a);\n   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)\n    simde__m256d matched, retval = simde_mm256_setzero_pd();\n\n    { /* if (a < 0 || a > 1) */\n      matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ));\n\n      /* We don't actually need to do anything here since we initialize\n       * retval to 0.0. */\n    }\n\n    { /* else if (a == 0) */\n      simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);\n      mask = simde_mm256_andnot_pd(matched, mask);\n      matched = simde_mm256_or_pd(matched, mask);\n\n      simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY);\n\n      retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));\n    }\n\n    { /* else if (a == 1) */\n      simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ);\n      mask = simde_mm256_andnot_pd(matched, mask);\n      matched = simde_mm256_or_pd(matched, mask);\n\n      simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY);\n\n      retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));\n    }\n\n    { /* Remaining conditions.\n       *\n       * Including the else case in this complicates things a lot, but\n       * we're using cheap operations to get rid of expensive multiply\n       * and add functions.  This should be a small improvement on SSE\n       * prior to 4.1.  On SSE 4.1 we can use _mm256_blendv_pd which is\n       * very fast and this becomes a huge win.  NEON, AltiVec, and\n       * WASM also have blend operations, so this should be a big win\n       * there, too. */\n\n      /* else if (a < 0.02425) */\n      simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ);\n      /* else if (a > 0.97575) */\n      simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ);\n\n      simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi);\n      matched = simde_mm256_or_pd(matched, mask);\n\n      /* else */\n      simde__m256d mask_el = simde_x_mm256_not_pd(matched);\n      mask = simde_mm256_or_pd(mask, mask_el);\n\n      /* r = a - 0.5 */\n      simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)));\n\n      /* lo: q = a\n       * hi: q = (1.0 - a) */\n      simde__m256d q = simde_mm256_and_pd(mask_lo, a);\n      q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a)));\n\n      /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */\n      q = simde_mm256_log_pd(q);\n      q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0)));\n      q = simde_mm256_sqrt_pd(q);\n\n      /* el: q = r * r */\n      q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el);\n\n      /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0); */\n      /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */\n      /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */\n      simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el);\n      numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el));\n      numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el));\n      numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el));\n      numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el));\n      numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el));\n      {\n        simde__m256d multiplier;\n        multiplier =                            simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)));\n        multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0))));\n        multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r));\n        numerator = simde_mm256_mul_pd(numerator, multiplier);\n      }\n\n      /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */\n      /*    el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */\n      simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el);\n      denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el));\n      denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el));\n      denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el));\n      denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el),\n                                                   simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el));\n      denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)));\n\n      /* res = numerator / denominator; */\n      simde__m256d res = simde_mm256_div_pd(numerator, denominator);\n\n      retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));\n    }\n\n    return retval;\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_cdfnorminv_pd\n  #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_cdfnorminv_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cdfnorminv_ps(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n      r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]);\n    }\n\n    return simde__m512_from_private(r_);\n  #else\n\n    simde__m512 retval = simde_mm512_setzero_ps();\n    simde__mmask16 matched;\n\n    { /* if (a < 0 || a > 1) */\n      matched  = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);\n      matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ);\n\n      /* We don't actually need to do anything here since we initialize\n       * retval to 0.0. */\n    }\n\n    { /* else if (a == 0) */\n      simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);\n      matched |= mask;\n\n      retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF));\n    }\n\n    { /* else if (a == 1) */\n      simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);\n      matched |= mask;\n\n      retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF));\n    }\n\n    { /* else if (a < 0.02425) */\n      simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ);\n      /* else if (a > 0.97575) */\n      simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ);\n\n      simde__mmask16 mask = mask_lo | mask_hi;\n      matched = matched | mask;\n\n      /* else */\n      simde__mmask16 mask_el = ~matched;\n\n      /* r = a - 0.5f */\n      simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)));\n\n      /* lo: q = a\n       * hi: q = (1.0 - a) */\n      simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a);\n      q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a);\n\n      /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */\n      q = simde_mm512_log_ps(q);\n      q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0)));\n      q = simde_mm512_sqrt_ps(q);\n\n      /* el: q = r * r */\n      q = simde_mm512_mask_mul_ps(q, mask_el, r, r);\n\n      /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */\n      /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */\n      /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */\n      simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)));\n      numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02))));\n      numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02))));\n      numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02))));\n      numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01))));\n      numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00))));\n      {\n        simde__m512 multiplier;\n        multiplier =                                              simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0));\n        multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0)));\n        multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r);\n        numerator = simde_mm512_mul_ps(numerator, multiplier);\n      }\n\n      /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */\n      /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */\n      simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)));\n      denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02))));\n      denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02))));\n      denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01))));\n      denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q),\n                                                      simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01))));\n      denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)));\n\n      /* res = numerator / denominator; */\n      retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator);\n    }\n\n    return retval;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cdfnorminv_ps\n  #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_cdfnorminv_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_cdfnorminv_pd(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n      r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]);\n    }\n\n    return simde__m512d_from_private(r_);\n  #else\n\n    simde__m512d retval = simde_mm512_setzero_pd();\n    simde__mmask8 matched;\n\n    { /* if (a < 0 || a > 1) */\n      matched  = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);\n      matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ);\n\n      /* We don't actually need to do anything here since we initialize\n       * retval to 0.0. */\n    }\n\n    { /* else if (a == 0) */\n      simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);\n      matched |= mask;\n\n      retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY));\n    }\n\n    { /* else if (a == 1) */\n      simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);\n      matched |= mask;\n\n      retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY));\n    }\n\n    { /* else if (a < 0.02425) */\n      simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ);\n      /* else if (a > 0.97575) */\n      simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ);\n\n      simde__mmask8 mask = mask_lo | mask_hi;\n      matched = matched | mask;\n\n      /* else */\n      simde__mmask8 mask_el = ~matched;\n\n      /* r = a - 0.5f */\n      simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)));\n\n      /* lo: q = a\n       * hi: q = (1.0 - a) */\n      simde__m512d q = a;\n      q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a);\n\n      /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */\n      q = simde_mm512_log_pd(q);\n      q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0)));\n      q = simde_mm512_sqrt_pd(q);\n\n      /* el: q = r * r */\n      q = simde_mm512_mask_mul_pd(q, mask_el, r, r);\n\n      /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */\n      /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */\n      /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */\n      simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)));\n      numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02))));\n      numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02))));\n      numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02))));\n      numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01))));\n      numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00))));\n      {\n        simde__m512d multiplier;\n        multiplier =                                              simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0));\n        multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0)));\n        multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r);\n        numerator = simde_mm512_mul_pd(numerator, multiplier);\n      }\n\n      /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */\n      /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */\n      simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)));\n      denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02))));\n      denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02))));\n      denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01))));\n      denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q),\n                                                      simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01))));\n      denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)));\n\n      /* res = numerator / denominator; */\n      retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator);\n    }\n\n    return retval;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_cdfnorminv_pd\n  #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cdfnorminv_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cdfnorminv_ps\n  #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_cdfnorminv_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_cdfnorminv_pd\n  #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_erfinv_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erfinv_ps(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */\n    simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0));\n\n    simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a)));\n\n    simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147)));\n    tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);\n    tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));\n\n    simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));\n    tt2 = simde_mm_mul_ps(tt2, lnx);\n\n    simde__m128 r = simde_mm_mul_ps(tt1, tt1);\n    r = simde_mm_sub_ps(r, tt2);\n    r = simde_mm_sqrt_ps(r);\n    r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r);\n    r = simde_mm_sqrt_ps(r);\n\n    return simde_x_mm_xorsign_ps(r, a);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_erfinvf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erfinv_ps\n  #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_erfinv_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erfinv_pd(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0));\n\n    simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a)));\n\n    simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147)));\n    tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);\n    tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));\n\n    simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));\n    tt2 = simde_mm_mul_pd(tt2, lnx);\n\n    simde__m128d r = simde_mm_mul_pd(tt1, tt1);\n    r = simde_mm_sub_pd(r, tt2);\n    r = simde_mm_sqrt_pd(r);\n    r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r);\n    r = simde_mm_sqrt_pd(r);\n\n    return simde_x_mm_xorsign_pd(r, a);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_erfinv(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erfinv_pd\n  #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_erfinv_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erfinv_ps(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0));\n    simde__m256 sgn = simde_x_mm256_copysign_ps(one, a);\n\n    a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a));\n    simde__m256 lnx = simde_mm256_log_ps(a);\n\n    simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147)));\n    tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);\n    tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));\n\n    simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));\n    tt2 = simde_mm256_mul_ps(tt2, lnx);\n\n    simde__m256 r = simde_mm256_mul_ps(tt1, tt1);\n    r = simde_mm256_sub_ps(r, tt2);\n    r = simde_mm256_sqrt_ps(r);\n    r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r);\n    r = simde_mm256_sqrt_ps(r);\n\n    return simde_mm256_mul_ps(sgn, r);\n  #else\n    simde__m256_private\n      a_ = simde__m256_to_private(a),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_erfinvf(a_.f32[i]);\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erfinv_ps\n  #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_erfinv_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erfinv_pd(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0));\n    simde__m256d sgn = simde_x_mm256_copysign_pd(one, a);\n\n    a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a));\n    simde__m256d lnx = simde_mm256_log_pd(a);\n\n    simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147)));\n    tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);\n    tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));\n\n    simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));\n    tt2 = simde_mm256_mul_pd(tt2, lnx);\n\n    simde__m256d r = simde_mm256_mul_pd(tt1, tt1);\n    r = simde_mm256_sub_pd(r, tt2);\n    r = simde_mm256_sqrt_pd(r);\n    r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r);\n    r = simde_mm256_sqrt_pd(r);\n\n    return simde_mm256_mul_pd(sgn, r);\n  #else\n    simde__m256d_private\n      a_ = simde__m256d_to_private(a),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_erfinv(a_.f64[i]);\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erfinv_pd\n  #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_erfinv_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erfinv_ps(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0));\n    simde__m512 sgn = simde_x_mm512_copysign_ps(one, a);\n\n    a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a));\n    simde__m512 lnx = simde_mm512_log_ps(a);\n\n    simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147)));\n    tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);\n    tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));\n\n    simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));\n    tt2 = simde_mm512_mul_ps(tt2, lnx);\n\n    simde__m512 r = simde_mm512_mul_ps(tt1, tt1);\n    r = simde_mm512_sub_ps(r, tt2);\n    r = simde_mm512_sqrt_ps(r);\n    r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r);\n    r = simde_mm512_sqrt_ps(r);\n\n    return simde_mm512_mul_ps(sgn, r);\n  #else\n    simde__m512_private\n      a_ = simde__m512_to_private(a),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_erfinvf(a_.f32[i]);\n    }\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erfinv_ps\n  #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_erfinv_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erfinv_pd(a);\n  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)\n    simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0));\n    simde__m512d sgn = simde_x_mm512_copysign_pd(one, a);\n\n    a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a));\n    simde__m512d lnx = simde_mm512_log_pd(a);\n\n    simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147)));\n    tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);\n    tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));\n\n    simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));\n    tt2 = simde_mm512_mul_pd(tt2, lnx);\n\n    simde__m512d r = simde_mm512_mul_pd(tt1, tt1);\n    r = simde_mm512_sub_pd(r, tt2);\n    r = simde_mm512_sqrt_pd(r);\n    r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r);\n    r = simde_mm512_sqrt_pd(r);\n\n    return simde_mm512_mul_pd(sgn, r);\n  #else\n    simde__m512d_private\n      a_ = simde__m512d_to_private(a),\n      r_;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_erfinv(a_.f64[i]);\n    }\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erfinv_pd\n  #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erfinv_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erfinv_ps\n  #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erfinv_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erfinv_pd\n  #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_erfcinv_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erfcinv_ps(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    simde__m128 matched, retval = simde_mm_setzero_ps();\n\n    { /* if (a < 2.0f && a > 0.0625f) */\n      matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)));\n      matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))));\n\n      if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) {\n        retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a));\n      }\n\n      if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) {\n        return retval;\n      }\n    }\n\n    { /* else if (a < 0.0625f && a > 0.0f) */\n      simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)));\n      mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));\n      mask = simde_mm_andnot_ps(matched, mask);\n\n      if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) {\n        matched = simde_mm_or_ps(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a));\n        t = simde_mm_sqrt_ps(t);\n        t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t);\n\n        const simde__m128 p[] = {\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910))\n        };\n\n        const simde__m128 q[] = {\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))\n        };\n\n        /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */\n        simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]);\n        numerator = simde_mm_fmadd_ps(numerator, t, p[3]);\n        numerator = simde_mm_fmadd_ps(numerator, t, p[2]);\n        numerator = simde_mm_fmadd_ps(numerator, t, p[1]);\n        numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]);\n        denominator = simde_mm_fmadd_ps(denominator, t, q[0]);\n\n        simde__m128 res = simde_mm_div_ps(numerator, denominator);\n\n        retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));\n      }\n    }\n\n    { /* else if (a < 0.0f) */\n      simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));\n      mask = simde_mm_andnot_ps(matched, mask);\n\n      if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) {\n        matched = simde_mm_or_ps(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a));\n        t = simde_mm_sqrt_ps(t);\n        t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t);\n\n        const simde__m128 p[] = {\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000))\n        };\n\n        const simde__m128 q[] = {\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),\n          simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))\n        };\n\n        /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */\n        simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]);\n        numerator = simde_mm_fmadd_ps(numerator, t, p[1]);\n        numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]);\n        denominator = simde_mm_fmadd_ps(denominator, t, q[0]);\n\n        simde__m128 res = simde_mm_div_ps(numerator, denominator);\n\n        retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));\n\n        if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) {\n          return retval;\n        }\n      }\n    }\n\n    { /* else if (a == 0.0f) */\n      simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));\n      mask = simde_mm_andnot_ps(matched, mask);\n      matched = simde_mm_or_ps(matched, mask);\n\n      simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));\n    }\n\n    { /* else */\n      /* (a >= 2.0f) */\n      retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF)));\n    }\n\n    return retval;\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_erfcinvf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erfcinv_ps\n  #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_erfcinv_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_erfcinv_pd(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)\n    simde__m128d matched, retval = simde_mm_setzero_pd();\n\n    { /* if (a < 2.0 && a > 0.0625) */\n      matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)));\n      matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))));\n\n      if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) {\n        retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a));\n      }\n\n      if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) {\n        return retval;\n      }\n    }\n\n    { /* else if (a < 0.0625 && a > 0.0) */\n      simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)));\n      mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));\n      mask = simde_mm_andnot_pd(matched, mask);\n\n      if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) {\n        matched = simde_mm_or_pd(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a));\n        t = simde_mm_sqrt_pd(t);\n        t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t);\n\n        const simde__m128d p[] = {\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910))\n        };\n\n        const simde__m128d q[] = {\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))\n        };\n\n        /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */\n        simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]);\n        numerator = simde_mm_fmadd_pd(numerator, t, p[3]);\n        numerator = simde_mm_fmadd_pd(numerator, t, p[2]);\n        numerator = simde_mm_fmadd_pd(numerator, t, p[1]);\n        numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]);\n        denominator = simde_mm_fmadd_pd(denominator, t, q[0]);\n\n        simde__m128d res = simde_mm_div_pd(numerator, denominator);\n\n        retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));\n      }\n    }\n\n    { /* else if (a < 0.0) */\n      simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));\n      mask = simde_mm_andnot_pd(matched, mask);\n\n      if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) {\n        matched = simde_mm_or_pd(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a));\n        t = simde_mm_sqrt_pd(t);\n        t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t);\n\n        const simde__m128d p[] = {\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000))\n        };\n\n        const simde__m128d q[] = {\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),\n          simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))\n        };\n\n        /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */\n        simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]);\n        numerator = simde_mm_fmadd_pd(numerator, t, p[1]);\n        numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]);\n        denominator = simde_mm_fmadd_pd(denominator, t, q[0]);\n\n        simde__m128d res = simde_mm_div_pd(numerator, denominator);\n\n        retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));\n\n        if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) {\n          return retval;\n        }\n      }\n    }\n\n    { /* else if (a == 0.0) */\n      simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));\n      mask = simde_mm_andnot_pd(matched, mask);\n      matched = simde_mm_or_pd(matched, mask);\n\n      simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY);\n\n      retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));\n    }\n\n    { /* else */\n      /* (a >= 2.0) */\n      retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY)));\n    }\n\n    return retval;\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_erfcinv(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_erfcinv_pd\n  #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_erfcinv_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erfcinv_ps(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)\n    simde__m256 matched, retval = simde_mm256_setzero_ps();\n\n    { /* if (a < 2.0f && a > 0.0625f) */\n      matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ);\n      matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ));\n\n      if (!simde_mm256_testz_ps(matched, matched)) {\n        retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a));\n      }\n\n      if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) {\n        return retval;\n      }\n    }\n\n    { /* else if (a < 0.0625f && a > 0.0f) */\n      simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ);\n      mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ));\n      mask = simde_mm256_andnot_ps(matched, mask);\n\n      if (!simde_mm256_testz_ps(mask, mask)) {\n        matched = simde_mm256_or_ps(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a));\n        t = simde_mm256_sqrt_ps(t);\n        t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t);\n\n        const simde__m256 p[] = {\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791))\n        };\n\n        const simde__m256 q[] = {\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))\n        };\n\n        /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */\n        simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]);\n        numerator = simde_mm256_fmadd_ps(numerator, t, p[3]);\n        numerator = simde_mm256_fmadd_ps(numerator, t, p[2]);\n        numerator = simde_mm256_fmadd_ps(numerator, t, p[1]);\n        numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]);\n        denominator = simde_mm256_fmadd_ps(denominator, t, q[0]);\n\n        simde__m256 res = simde_mm256_div_ps(numerator, denominator);\n\n        retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));\n      }\n    }\n\n    { /* else if (a < 0.0f) */\n      simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);\n      mask = simde_mm256_andnot_ps(matched, mask);\n\n      if (!simde_mm256_testz_ps(mask, mask)) {\n        matched = simde_mm256_or_ps(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a));\n        t = simde_mm256_sqrt_ps(t);\n        t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t);\n\n        const simde__m256 p[] = {\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000))\n        };\n\n        const simde__m256 q[] = {\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),\n          simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))\n        };\n\n        /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */\n        simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]);\n        numerator = simde_mm256_fmadd_ps(numerator, t, p[1]);\n        numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]);\n        denominator = simde_mm256_fmadd_ps(denominator, t, q[0]);\n\n        simde__m256 res = simde_mm256_div_ps(numerator, denominator);\n\n        retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));\n\n        if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) {\n          return retval;\n        }\n      }\n    }\n\n    { /* else if (a == 0.0f) */\n      simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);\n      mask = simde_mm256_andnot_ps(matched, mask);\n      matched = simde_mm256_or_ps(matched, mask);\n\n      simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));\n    }\n\n    { /* else */\n      /* (a >= 2.0f) */\n      retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF)));\n    }\n\n    return retval;\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_erfcinvf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erfcinv_ps\n  #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_erfcinv_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_erfcinv_pd(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)\n    simde__m256d matched, retval = simde_mm256_setzero_pd();\n\n    { /* if (a < 2.0 && a > 0.0625) */\n      matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ);\n      matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ));\n\n      if (!simde_mm256_testz_pd(matched, matched)) {\n        retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a));\n      }\n\n      if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) {\n        return retval;\n      }\n    }\n\n    { /* else if (a < 0.0625 && a > 0.0) */\n      simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ);\n      mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ));\n      mask = simde_mm256_andnot_pd(matched, mask);\n\n      if (!simde_mm256_testz_pd(mask, mask)) {\n        matched = simde_mm256_or_pd(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a));\n        t = simde_mm256_sqrt_pd(t);\n        t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t);\n\n        const simde__m256d p[] = {\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791))\n        };\n\n        const simde__m256d q[] = {\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))\n        };\n\n        /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */\n        simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]);\n        numerator = simde_mm256_fmadd_pd(numerator, t, p[3]);\n        numerator = simde_mm256_fmadd_pd(numerator, t, p[2]);\n        numerator = simde_mm256_fmadd_pd(numerator, t, p[1]);\n        numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]);\n        denominator = simde_mm256_fmadd_pd(denominator, t, q[0]);\n\n        simde__m256d res = simde_mm256_div_pd(numerator, denominator);\n\n        retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));\n      }\n    }\n\n    { /* else if (a < 0.0) */\n      simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);\n      mask = simde_mm256_andnot_pd(matched, mask);\n\n      if (!simde_mm256_testz_pd(mask, mask)) {\n        matched = simde_mm256_or_pd(matched, mask);\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a));\n        t = simde_mm256_sqrt_pd(t);\n        t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t);\n\n        const simde__m256d p[] = {\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000))\n        };\n\n        const simde__m256d q[] = {\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),\n          simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))\n        };\n\n        /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */\n        simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]);\n        numerator = simde_mm256_fmadd_pd(numerator, t, p[1]);\n        numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]);\n        denominator = simde_mm256_fmadd_pd(denominator, t, q[0]);\n\n        simde__m256d res = simde_mm256_div_pd(numerator, denominator);\n\n        retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));\n\n        if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) {\n          return retval;\n        }\n      }\n    }\n\n    { /* else if (a == 0.0) */\n      simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);\n      mask = simde_mm256_andnot_pd(matched, mask);\n      matched = simde_mm256_or_pd(matched, mask);\n\n      simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY);\n\n      retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));\n    }\n\n    { /* else */\n      /* (a >= 2.0) */\n      retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY)));\n    }\n\n    return retval;\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_erfcinv(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_erfcinv_pd\n  #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_erfcinv_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erfcinv_ps(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64))\n    /* The results on Arm are *slightly* off, which causes problems for\n     * the edge cases; for example, if you pass 2.0 sqrt will be called\n     * with a value of -0.0 instead of 0.0, resulting in a NaN. */\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n      r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]);\n    }\n    return simde__m512_from_private(r_);\n  #else\n    simde__m512 retval = simde_mm512_setzero_ps();\n    simde__mmask16 matched;\n\n    { /* if (a < 2.0f && a > 0.0625f) */\n      matched =  simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ);\n      matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ);\n\n      if (matched != 0) {\n        retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a));\n      }\n\n      if (matched == 1) {\n        return retval;\n      }\n    }\n\n    { /* else if (a < 0.0625f && a > 0.0f) */\n      simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ);\n      mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ);\n      mask = ~matched & mask;\n\n      if (mask != 0) {\n        matched = matched | mask;\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a));\n        t = simde_mm512_sqrt_ps(t);\n        t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t);\n\n        const simde__m512 p[] = {\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791))\n        };\n\n        const simde__m512 q[] = {\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))\n        };\n\n        /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */\n        simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]);\n        numerator = simde_mm512_fmadd_ps(numerator, t, p[3]);\n        numerator = simde_mm512_fmadd_ps(numerator, t, p[2]);\n        numerator = simde_mm512_fmadd_ps(numerator, t, p[1]);\n        numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]);\n        denominator = simde_mm512_fmadd_ps(denominator, t, q[0]);\n\n        simde__m512 res = simde_mm512_div_ps(numerator, denominator);\n\n        retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));\n      }\n    }\n\n    { /* else if (a < 0.0f) */\n      simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);\n      mask = ~matched & mask;\n\n      if (mask != 0) {\n        matched = matched | mask;\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a));\n        t = simde_mm512_sqrt_ps(t);\n        t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t);\n\n        const simde__m512 p[] = {\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000))\n        };\n\n        const simde__m512 q[] = {\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),\n          simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))\n        };\n\n        /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */\n        simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]);\n        numerator = simde_mm512_fmadd_ps(numerator, t, p[1]);\n        numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]);\n        denominator = simde_mm512_fmadd_ps(denominator, t, q[0]);\n\n        simde__m512 res = simde_mm512_div_ps(numerator, denominator);\n\n        retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));\n\n        if (matched == 1) {\n          return retval;\n        }\n      }\n    }\n\n    { /* else if (a == 0.0f) */\n      simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);\n      mask = ~matched & mask;\n      matched = matched | mask;\n\n      simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF);\n\n      retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));\n    }\n\n    { /* else */\n      /* (a >= 2.0f) */\n      retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)));\n    }\n\n    return retval;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erfcinv_ps\n  #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_erfcinv_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_erfcinv_pd(a);\n  #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n      r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]);\n    }\n    return simde__m512d_from_private(r_);\n  #else\n    simde__m512d retval = simde_mm512_setzero_pd();\n    simde__mmask8 matched;\n\n    { /* if (a < 2.0f && a > 0.0625f) */\n      matched =  simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ);\n      matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ);\n\n      if (matched != 0) {\n        retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a));\n      }\n\n      if (matched == 1) {\n        return retval;\n      }\n    }\n\n    { /* else if (a < 0.0625f && a > 0.0f) */\n      simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ);\n      mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ);\n      mask = ~matched & mask;\n\n      if (mask != 0) {\n        matched = matched | mask;\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a));\n        t = simde_mm512_sqrt_pd(t);\n        t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t);\n\n        const simde__m512d p[] = {\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791))\n        };\n\n        const simde__m512d q[] = {\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))\n        };\n\n        /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */\n        simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]);\n        numerator = simde_mm512_fmadd_pd(numerator, t, p[3]);\n        numerator = simde_mm512_fmadd_pd(numerator, t, p[2]);\n        numerator = simde_mm512_fmadd_pd(numerator, t, p[1]);\n        numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]);\n        denominator = simde_mm512_fmadd_pd(denominator, t, q[0]);\n\n        simde__m512d res = simde_mm512_div_pd(numerator, denominator);\n\n        retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));\n      }\n    }\n\n    { /* else if (a < 0.0f) */\n      simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);\n      mask = ~matched & mask;\n\n      if (mask != 0) {\n        matched = matched | mask;\n\n        /* t =  1/(sqrt(-log(a))) */\n        simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a));\n        t = simde_mm512_sqrt_pd(t);\n        t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t);\n\n        const simde__m512d p[] = {\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000))\n        };\n\n        const simde__m512d q[] = {\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),\n          simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))\n        };\n\n        /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */\n        simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]);\n        numerator = simde_mm512_fmadd_pd(numerator, t, p[1]);\n        numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t));\n\n        /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */\n        simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]);\n        denominator = simde_mm512_fmadd_pd(denominator, t, q[0]);\n\n        simde__m512d res = simde_mm512_div_pd(numerator, denominator);\n\n        retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));\n\n        if (matched == 1) {\n          return retval;\n        }\n      }\n    }\n\n    { /* else if (a == 0.0f) */\n      simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);\n      mask = ~matched & mask;\n      matched = matched | mask;\n\n      simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY);\n\n      retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));\n    }\n\n    { /* else */\n      /* (a >= 2.0f) */\n      retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)));\n    }\n\n    return retval;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_erfcinv_pd\n  #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erfcinv_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erfcinv_ps\n  #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_erfcinv_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_erfcinv_pd\n  #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_logb_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_logb_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_logbf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_logb_ps\n  #define _mm_logb_ps(a) simde_mm_logb_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_logb_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_logb_pd(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_logb(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_logb_pd\n  #define _mm_logb_pd(a) simde_mm_logb_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_logb_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_logb_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_logb_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_logbf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_logb_ps\n  #define _mm256_logb_ps(a) simde_mm256_logb_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_logb_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_logb_pd(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_logb(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_logb_pd\n  #define _mm256_logb_pd(a) simde_mm256_logb_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_logb_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_logb_ps(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_logbf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_logb_ps\n  #define _mm512_logb_ps(a) simde_mm512_logb_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_logb_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_logb_pd(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_logb(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_logb_pd\n  #define _mm512_logb_pd(a) simde_mm512_logb_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_logb_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_logb_ps\n  #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_logb_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_logb_pd\n  #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_log2_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log2_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)\n      return Sleef_log2f4_u35(a);\n    #else\n      return Sleef_log2f4_u10(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_log2f(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log2_ps\n  #define _mm_log2_ps(a) simde_mm_log2_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_log2_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log2_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)\n      return Sleef_log2d2_u35(a);\n    #else\n      return Sleef_log2d2_u10(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_log2(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log2_pd\n  #define _mm_log2_pd(a) simde_mm_log2_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_log2_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log2_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)\n      return Sleef_log2f8_u35(a);\n    #else\n      return Sleef_log2f8_u10(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_log2_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_log2f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log2_ps\n  #define _mm256_log2_ps(a) simde_mm256_log2_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_log2_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log2_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)\n      return Sleef_log2d4_u35(a);\n    #else\n      return Sleef_log2d4_u10(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log2(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log2_pd\n  #define _mm256_log2_pd(a) simde_mm256_log2_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_log2_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log2_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)\n      return Sleef_log2f16_u35(a);\n    #else\n      return Sleef_log2f16_u10(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_log2f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log2_ps\n  #define _mm512_log2_ps(a) simde_mm512_log2_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_log2_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log2_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)\n      return Sleef_log2d8_u35(a);\n    #else\n      return Sleef_log2d8_u10(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log2(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log2_pd\n  #define _mm512_log2_pd(a) simde_mm512_log2_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log2_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log2_ps\n  #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log2_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log2_pd\n  #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_log1p_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log1p_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_log1pf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_log1pf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log1p_ps\n  #define _mm_log1p_ps(a) simde_mm_log1p_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_log1p_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log1p_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_log1pd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_log1p(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log1p_pd\n  #define _mm_log1p_pd(a) simde_mm_log1p_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_log1p_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log1p_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_log1pf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_log1pf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log1p_ps\n  #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_log1p_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log1p_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_log1pd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log1p(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log1p_pd\n  #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_log1p_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log1p_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_log1pf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_log1pf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log1p_ps\n  #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_log1p_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log1p_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_log1pd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log1p(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log1p_pd\n  #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log1p_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log1p_ps\n  #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log1p_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log1p_pd\n  #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_log10_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log10_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_log10f4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_log10f(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log10_ps\n  #define _mm_log10_ps(a) simde_mm_log10_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_log10_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_log10_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_log10d2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_log10(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_log10_pd\n  #define _mm_log10_pd(a) simde_mm_log10_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_log10_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log10_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_log10f8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_log10_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_log10f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log10_ps\n  #define _mm256_log10_ps(a) simde_mm256_log10_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_log10_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_log10_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_log10d4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log10(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_log10_pd\n  #define _mm256_log10_pd(a) simde_mm256_log10_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_log10_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log10_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_log10f16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_log10f(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log10_ps\n  #define _mm512_log10_ps(a) simde_mm512_log10_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_log10_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_log10_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_log10d8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_log10(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_log10_pd\n  #define _mm512_log10_pd(a) simde_mm512_log10_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log10_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log10_ps\n  #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_log10_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_log10_pd\n  #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_nearbyint_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_nearbyint_ps(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);\n    }\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_nearbyint_ps\n  #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_nearbyint_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_nearbyint_pd(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_nearbyint(a_.f64[i]);\n    }\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_nearbyint_pd\n  #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_nearbyint_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_nearbyint_ps\n  #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_nearbyint_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_nearbyint_pd\n  #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_pow_ps (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_pow_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_powf4_u10(a, b);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_pow_ps\n  #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_pow_pd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_pow_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_powd2_u10(a, b);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_pow_pd\n  #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_pow_ps (simde__m256 a, simde__m256 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_pow_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_powf8_u10(a, b);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a),\n      b_ = simde__m256_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_pow_ps\n  #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_pow_pd (simde__m256d a, simde__m256d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_pow_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_powd4_u10(a, b);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a),\n      b_ = simde__m256d_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);\n    }\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_pow_pd\n  #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_pow_ps (simde__m512 a, simde__m512 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_pow_ps(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_powf16_u10(a, b);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a),\n      b_ = simde__m512_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);\n    }\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_pow_ps\n  #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_pow_pd (simde__m512d a, simde__m512d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_pow_pd(a, b);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_powd8_u10(a, b);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a),\n      b_ = simde__m512d_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);\n    }\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_pow_pd\n  #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_pow_ps(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_pow_ps\n  #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_pow_pd(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_pow_pd\n  #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_clog_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_clog_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))));\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n      r_.f32[  i  ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]));\n      r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_clog_ps\n  #define _mm_clog_ps(a) simde_mm_clog_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_clog_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_clog_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))));\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {\n      r_.f32[  i  ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1]));\n      r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]);\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_clog_ps\n  #define _mm256_clog_ps(a) simde_mm256_clog_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_csqrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_csqrt_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)));\n    simde__m128_private pow_res_=simde__m128_to_private(pow_res);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {\n      simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]);\n      simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]);\n\n      r_.f32[  i  ] =       simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));\n      r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_csqrt_ps\n  #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_csqrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_csqrt_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)));\n    simde__m256_private pow_res_=simde__m256_to_private(pow_res);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {\n      simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]);\n      simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]);\n\n      r_.f32[  i  ] =       simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));\n      r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));\n    }\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_csqrt_ps\n  #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i8 = a_.i8 % b_.i8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = a_.i8[i] % b_.i8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epi8\n  #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i16 = a_.i16 % b_.i16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = a_.i16[i] % b_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epi16\n  #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i32 = a_.i32 % b_.i32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = a_.i32[i] % b_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epi32\n  #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b)\n  #undef _mm_irem_epi32\n  #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i64 = a_.i64 % b_.i64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = a_.i64[i] % b_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epi64\n  #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u8 = a_.u8 % b_.u8;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = a_.u8[i] % b_.u8[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epu8\n  #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u16 = a_.u16 % b_.u16;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = a_.u16[i] % b_.u16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epu16\n  #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u32 = a_.u32 % b_.u32;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = a_.u32[i] % b_.u32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epu32\n  #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b)\n  #undef _mm_urem_epi32\n  #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rem_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_rem_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u64 = a_.u64 % b_.u64;\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = a_.u64[i] % b_.u64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_rem_epu64\n  #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epi8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i8 = a_.i8 % b_.i8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n          r_.i8[i] = a_.i8[i] % b_.i8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epi8\n  #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epi16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i16 = a_.i16 % b_.i16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.i16[i] = a_.i16[i] % b_.i16[i];\n        }\n       #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epi16\n  #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epi32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i32 = a_.i32 % b_.i32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.i32[i] = a_.i32[i] % b_.i32[i];\n        }\n       #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epi32\n  #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b)\n  #undef _mm256_irem_epi32\n  #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epi64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i64 = a_.i64 % b_.i64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n          r_.i64[i] = a_.i64[i] % b_.i64[i];\n        }\n        #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epi64\n  #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epu8(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u8 = a_.u8 % b_.u8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n          r_.u8[i] = a_.u8[i] % b_.u8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epu8\n  #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epu16(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u16 = a_.u16 % b_.u16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n          r_.u16[i] = a_.u16[i] % b_.u16[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epu16\n  #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epu32(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u32 = a_.u32 % b_.u32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n          r_.u32[i] = a_.u32[i] % b_.u32[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b)\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epu32\n  #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b)\n  #undef _mm256_urem_epi32\n  #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_rem_epu64(a, b);\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u64 = a_.u64 % b_.u64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n        for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n          r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n          r_.u64[i] = a_.u64[i] % b_.u64[i];\n        }\n      #endif\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_rem_epu64\n  #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epi8(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i8 = a_.i8 % b_.i8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n          r_.i8[i] = a_.i8[i] % b_.i8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epi8\n  #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epi16(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i16 = a_.i16 % b_.i16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n          r_.i16[i] = a_.i16[i] % b_.i16[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epi16\n  #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epi32(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i32 = a_.i32 % b_.i32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n          r_.i32[i] = a_.i32[i] % b_.i32[i];\n        }\n        #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epi32\n  #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_rem_epi32(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_rem_epi32\n  #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epi64(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.i64 = a_.i64 % b_.i64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n          r_.i64[i] = a_.i64[i] % b_.i64[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epi64\n  #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epu8(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u8 = a_.u8 % b_.u8;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n          r_.u8[i] = a_.u8[i] % b_.u8[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epu8\n  #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epu16(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u16 = a_.u16 % b_.u16;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n          r_.u16[i] = a_.u16[i] % b_.u16[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epu16\n  #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epu32(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u32 = a_.u32 % b_.u32;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n          r_.u32[i] = a_.u32[i] % b_.u32[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epu32\n  #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_rem_epu32(src, k, a, b);\n  #else\n    return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_rem_epu32\n  #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512i\nsimde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rem_epu64(a, b);\n  #else\n    simde__m512i_private\n      r_,\n      a_ = simde__m512i_to_private(a),\n      b_ = simde__m512i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104)\n      r_.u64 = a_.u64 % b_.u64;\n    #else\n      #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n        for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {\n          r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]);\n        }\n      #else\n        SIMDE_VECTORIZE\n        for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n          r_.u64[i] = a_.u64[i] % b_.u64[i];\n        }\n      #endif\n    #endif\n\n    return simde__m512i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rem_epu64\n  #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_recip_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_recip_ps(a);\n  #else\n    return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_recip_ps\n  #define _mm512_recip_ps(a) simde_mm512_recip_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_recip_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_recip_pd(a);\n  #else\n    return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_recip_pd\n  #define _mm512_recip_pd(a) simde_mm512_recip_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_recip_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_recip_ps\n  #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_recip_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_recip_pd\n  #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_rint_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rint_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_rintf16(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_rintf(a_.f32[i]);\n    }\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rint_ps\n  #define _mm512_rint_ps(a) simde_mm512_rint_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_rint_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_rint_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_rintd8(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_rint(a_.f64[i]);\n    }\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_rint_pd\n  #define _mm512_rint_pd(a) simde_mm512_rint_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_rint_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_rint_ps\n  #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_rint_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_rint_pd\n  #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sin_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sin_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sinf4_u10(a);\n    #else\n      return Sleef_sinf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_sinf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sin_ps\n  #define _mm_sin_ps(a) simde_mm_sin_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sin_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sin_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sind2_u10(a);\n    #else\n      return Sleef_sind2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_sin(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sin_pd\n  #define _mm_sin_pd(a) simde_mm_sin_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_sin_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sin_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sinf8_u10(a);\n    #else\n      return Sleef_sinf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_sin_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sinf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sin_ps\n  #define _mm256_sin_ps(a) simde_mm256_sin_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_sin_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sin_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sind4_u10(a);\n    #else\n      return Sleef_sind4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sin(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sin_pd\n  #define _mm256_sin_pd(a) simde_mm256_sin_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_sin_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sin_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sinf16_u10(a);\n    #else\n      return Sleef_sinf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sinf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sin_ps\n  #define _mm512_sin_ps(a) simde_mm512_sin_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_sin_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sin_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sind8_u10(a);\n    #else\n      return Sleef_sind8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sin(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sin_pd\n  #define _mm512_sin_pd(a) simde_mm512_sin_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sin_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sin_ps\n  #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sin_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sin_pd\n  #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    Sleef___m128_2 temp;\n\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      temp = Sleef_sincosf4_u10(a);\n    #else\n      temp = Sleef_sincosf4_u35(a);\n    #endif\n\n    *mem_addr = temp.y;\n    return temp.x;\n  #else\n    simde__m128 r;\n\n    r = simde_mm_sin_ps(a);\n    *mem_addr = simde_mm_cos_ps(a);\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sincos_ps\n  #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    Sleef___m128d_2 temp;\n\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      temp = Sleef_sincosd2_u10(a);\n    #else\n      temp = Sleef_sincosd2_u35(a);\n    #endif\n\n    *mem_addr = temp.y;\n    return temp.x;\n  #else\n    simde__m128d r;\n\n    r = simde_mm_sin_pd(a);\n    *mem_addr = simde_mm_cos_pd(a);\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sincos_pd\n  #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    Sleef___m256_2 temp;\n\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      temp = Sleef_sincosf8_u10(a);\n    #else\n      temp = Sleef_sincosf8_u35(a);\n    #endif\n\n    *mem_addr = temp.y;\n    return temp.x;\n  #else\n    simde__m256 r;\n\n    r = simde_mm256_sin_ps(a);\n    *mem_addr = simde_mm256_cos_ps(a);\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sincos_ps\n  #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    Sleef___m256d_2 temp;\n\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      temp = Sleef_sincosd4_u10(a);\n    #else\n      temp = Sleef_sincosd4_u35(a);\n    #endif\n\n    *mem_addr = temp.y;\n    return temp.x;\n  #else\n    simde__m256d r;\n\n    r = simde_mm256_sin_pd(a);\n    *mem_addr = simde_mm256_cos_pd(a);\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sincos_pd\n  #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    Sleef___m512_2 temp;\n\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      temp = Sleef_sincosf16_u10(a);\n    #else\n      temp = Sleef_sincosf16_u35(a);\n    #endif\n\n    *mem_addr = temp.y;\n    return temp.x;\n  #else\n    simde__m512 r;\n\n    r = simde_mm512_sin_ps(a);\n    *mem_addr = simde_mm512_cos_ps(a);\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sincos_ps\n  #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    Sleef___m512d_2 temp;\n\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      temp = Sleef_sincosd8_u10(a);\n    #else\n      temp = Sleef_sincosd8_u35(a);\n    #endif\n\n    *mem_addr = temp.y;\n    return temp.x;\n  #else\n    simde__m512d r;\n\n    r = simde_mm512_sin_pd(a);\n    *mem_addr = simde_mm512_cos_pd(a);\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sincos_pd\n  #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a);\n  #else\n    simde__m512 cos_res, sin_res;\n    sin_res = simde_mm512_sincos_ps(&cos_res, a);\n    *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res);\n    return simde_mm512_mask_mov_ps(sin_src, k, sin_res);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sincos_ps\n  #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a);\n  #else\n    simde__m512d cos_res, sin_res;\n    sin_res = simde_mm512_sincos_pd(&cos_res, a);\n    *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res);\n    return simde_mm512_mask_mov_pd(sin_src, k, sin_res);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sincos_pd\n  #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sind_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sind_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a));\n    #else\n      return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sind_ps\n  #define _mm_sind_ps(a) simde_mm_sind_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sind_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sind_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a));\n    #else\n      return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sind_pd\n  #define _mm_sind_pd(a) simde_mm_sind_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_sind_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sind_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a));\n    #else\n      return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_sind_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sind_ps\n  #define _mm256_sind_ps(a) simde_mm256_sind_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_sind_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sind_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a));\n    #else\n      return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sind_pd\n  #define _mm256_sind_pd(a) simde_mm256_sind_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_sind_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sind_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a));\n    #else\n      return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sind_ps\n  #define _mm512_sind_ps(a) simde_mm512_sind_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_sind_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sind_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a));\n    #else\n      return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sind_pd\n  #define _mm512_sind_pd(a) simde_mm512_sind_pd(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sind_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sind_ps\n  #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sind_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sind_pd\n  #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_sinh_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sinh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_sinhf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_sinhf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sinh_ps\n  #define _mm_sinh_ps(a) simde_mm_sinh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_sinh_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_sinh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_sinhd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_sinh(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_sinh_pd\n  #define _mm_sinh_pd(a) simde_mm_sinh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_sinh_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sinh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_sinhf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sinhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sinh_ps\n  #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_sinh_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_sinh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_sinhd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sinh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_sinh_pd\n  #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_sinh_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sinh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_sinhf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_sinhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sinh_ps\n  #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_sinh_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_sinh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_sinhd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_sinh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_sinh_pd\n  #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sinh_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sinh_ps\n  #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_sinh_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_sinh_pd\n  #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_svml_ceil_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_ceil_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_ceilf4(a);\n  #else\n    return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_ceil_ps\n  #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_svml_ceil_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_ceil_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_ceild2(a);\n  #else\n    return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_ceil_pd\n  #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_svml_ceil_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_ceil_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_ceilf8(a);\n  #else\n    return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_ceil_ps\n  #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_svml_ceil_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_ceil_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_ceild4(a);\n  #else\n    return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_ceil_pd\n  #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_ceil_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_ceil_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_ceilf16(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_ceilf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_ceil_ps\n  #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_ceil_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_ceil_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_ceild8(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_ceil(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_ceil_pd\n  #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_ceil_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_ceil_ps\n  #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_ceil_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_ceil_pd\n  #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_svml_floor_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_floor_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_floorf4(a);\n  #else\n    return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_floor_ps\n  #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_svml_floor_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_floor_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_floord2(a);\n  #else\n    return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_floor_pd\n  #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_svml_floor_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_floor_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_floorf8(a);\n  #else\n    return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_floor_ps\n  #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_svml_floor_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_floor_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_floord4(a);\n  #else\n    return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_floor_pd\n  #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_floor_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_floor_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_floorf16(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_floorf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_floor_ps\n  #define _mm512_floor_ps(a) simde_mm512_floor_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_floor_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_floor_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_floord8(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_floor(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_floor_pd\n  #define _mm512_floor_pd(a) simde_mm512_floor_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_floor_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_floor_ps\n  #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_floor_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_floor_pd\n  #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_svml_round_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_round_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_roundf4(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_roundf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_round_ps\n  #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_svml_round_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_round_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_roundd2(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_round(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_round_pd\n  #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_svml_round_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_round_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_roundf8(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_roundf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_round_ps\n  #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_svml_round_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_round_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_roundd4(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_round(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_round_pd\n  #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_svml_round_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_svml_round_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_roundd8(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_round(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_svml_round_pd\n  #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_svml_round_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_svml_round_pd\n  #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_svml_sqrt_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_sqrt_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_sqrtf4(a);\n  #else\n    return simde_mm_sqrt_ps(a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_sqrt_ps\n  #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_svml_sqrt_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_svml_sqrt_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_sqrtd2(a);\n  #else\n    return simde_mm_sqrt_pd(a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_svml_sqrt_pd\n  #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_svml_sqrt_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_sqrt_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_sqrtf8(a);\n  #else\n    return simde_mm256_sqrt_ps(a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_sqrt_ps\n  #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_svml_sqrt_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_svml_sqrt_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_sqrtd4(a);\n  #else\n    return simde_mm256_sqrt_pd(a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_svml_sqrt_pd\n  #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_svml_sqrt_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_svml_sqrt_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_sqrtf16(a);\n  #else\n    return simde_mm512_sqrt_ps(a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_svml_sqrt_ps\n  #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_svml_sqrt_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_svml_sqrt_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_sqrtd8(a);\n  #else\n    return simde_mm512_sqrt_pd(a);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_svml_sqrt_pd\n  #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_tan_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_tan_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tanf4_u10(a);\n    #else\n      return Sleef_tanf4_u35(a);\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_tanf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_tan_ps\n  #define _mm_tan_ps(a) simde_mm_tan_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_tan_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_tan_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tand2_u10(a);\n    #else\n      return Sleef_tand2_u35(a);\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_tan(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_tan_pd\n  #define _mm_tan_pd(a) simde_mm_tan_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_tan_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_tan_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tanf8_u10(a);\n    #else\n      return Sleef_tanf8_u35(a);\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_tan_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_tanf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_tan_ps\n  #define _mm256_tan_ps(a) simde_mm256_tan_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_tan_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_tan_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tand4_u10(a);\n    #else\n      return Sleef_tand4_u35(a);\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_tan(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_tan_pd\n  #define _mm256_tan_pd(a) simde_mm256_tan_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_tan_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_tan_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tanf16_u10(a);\n    #else\n      return Sleef_tanf16_u35(a);\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_tanf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_tan_ps\n  #define _mm512_tan_ps(a) simde_mm512_tan_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_tan_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_tan_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tand8_u10(a);\n    #else\n      return Sleef_tand8_u35(a);\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_tan(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_tan_pd\n  #define _mm512_tan_pd(a) simde_mm512_tan_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_tan_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_tan_ps\n  #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_tan_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_tan_pd\n  #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_tand_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_tand_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a));\n    #else\n      return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_tand_ps\n  #define _mm_tand_ps(a) simde_mm_tand_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_tand_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_tand_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a));\n    #else\n      return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_tand_pd\n  #define _mm_tand_pd(a) simde_mm_tand_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_tand_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_tand_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a));\n    #else\n      return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_tand_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_tand_ps\n  #define _mm256_tand_ps(a) simde_mm256_tand_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_tand_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_tand_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a));\n    #else\n      return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_tand_pd\n  #define _mm256_tand_pd(a) simde_mm256_tand_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_tand_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_tand_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a));\n    #else\n      return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a));\n    #endif\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_tand_ps\n  #define _mm512_tand_ps(a) simde_mm512_tand_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_tand_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_tand_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    #if SIMDE_ACCURACY_PREFERENCE > 1\n      return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a));\n    #else\n      return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a));\n    #endif\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_tand_pd\n  #define _mm512_tand_pd(a) simde_mm512_tand_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_tand_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_tand_ps\n  #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_tand_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_tand_pd\n  #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_tanh_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_tanh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_tanhf4_u10(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      r_.f32[i] = simde_math_tanhf(a_.f32[i]);\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_tanh_ps\n  #define _mm_tanh_ps(a) simde_mm_tanh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_tanh_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_tanh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_tanhd2_u10(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      r_.f64[i] = simde_math_tanh(a_.f64[i]);\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_tanh_pd\n  #define _mm_tanh_pd(a) simde_mm_tanh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_tanh_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_tanh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_tanhf8_u10(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_tanhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_tanh_ps\n  #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a)\n#endif\n\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_tanh_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_tanh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_tanhd4_u10(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_tanh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_tanh_pd\n  #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_tanh_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_tanh_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_tanhf16_u10(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_tanhf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_tanh_ps\n  #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_tanh_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_tanh_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_tanhd8_u10(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_tanh(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_tanh_pd\n  #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_tanh_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_tanh_ps\n  #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_tanh_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_tanh_pd\n  #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_trunc_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_trunc_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_truncf4(a);\n  #else\n    return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_trunc_ps\n  #define _mm_trunc_ps(a) simde_mm_trunc_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_trunc_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_trunc_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)\n    return Sleef_truncd2(a);\n  #else\n    return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_trunc_pd\n  #define _mm_trunc_pd(a) simde_mm_trunc_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_trunc_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_trunc_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_truncf8(a);\n  #else\n    return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_trunc_ps\n  #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_trunc_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_trunc_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)\n    return Sleef_truncd4(a);\n  #else\n    return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_trunc_pd\n  #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_trunc_ps (simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_trunc_ps(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_truncf16(a);\n  #else\n    simde__m512_private\n      r_,\n      a_ = simde__m512_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {\n        r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        r_.f32[i] = simde_math_truncf(a_.f32[i]);\n      }\n    #endif\n\n    return simde__m512_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_trunc_ps\n  #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_trunc_pd (simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_trunc_pd(a);\n  #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return Sleef_truncd8(a);\n  #else\n    simde__m512d_private\n      r_,\n      a_ = simde__m512d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)\n      for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {\n        r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        r_.f64[i] = simde_math_trunc(a_.f64[i]);\n      }\n    #endif\n\n    return simde__m512d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_trunc_pd\n  #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512\nsimde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_trunc_ps(src, k, a);\n  #else\n    return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_trunc_ps\n  #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m512d\nsimde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)\n    return _mm512_mask_trunc_pd(src, k, a);\n  #else\n    return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a));\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm512_mask_trunc_pd\n  #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a)\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)\n    return _mm_udivrem_epi32(mem_addr, a, b);\n  #else\n    simde__m128i r;\n\n    r = simde_mm_div_epu32(a, b);\n    *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b));\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm_udivrem_epi32\n  #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) {\n  #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)\n    return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b);\n  #else\n    simde__m256i r;\n\n    r = simde_mm256_div_epu32(a, b);\n    *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b));\n\n    return r;\n  #endif\n}\n#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)\n  #undef _mm256_udivrem_epi32\n  #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b))\n#endif\n\nSIMDE_END_DECLS_\n\nHEDLEY_DIAGNOSTIC_POP\n\n#endif /* !defined(SIMDE_X86_SVML_H) */\n"
  },
  {
    "path": "external_libs/pgenlib/simde/x86/xop.h",
    "content": "/* SPDX-License-Identifier: MIT\n *\n * Permission is hereby granted, free of charge, to any person\n * obtaining a copy of this software and associated documentation\n * files (the \"Software\"), to deal in the Software without\n * restriction, including without limitation the rights to use, copy,\n * modify, merge, publish, distribute, sublicense, and/or sell copies\n * of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n * SOFTWARE.\n *\n * Copyright:\n *   2020      Evan Nemerson <evan@nemerson.com>\n */\n\n#if !defined(SIMDE_X86_XOP_H)\n#define SIMDE_X86_XOP_H\n\n#include \"avx2.h\"\n\n#if !defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)\n#  define SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES\n#endif\n\nHEDLEY_DIAGNOSTIC_PUSH\nSIMDE_DISABLE_UNWANTED_DIAGNOSTICS\nSIMDE_BEGIN_DECLS_\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_cmov_si128 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_cmov_si128(a, b, c);\n  #elif defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm_ternarylogic_epi32(a, b, c, 0xe4);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_or_si128(_mm_and_si128(c, a), _mm_andnot_si128(c, b));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vbslq_s8(c_.neon_u8, a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, c_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)\n      r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, c_.altivec_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32f = (c_.i32f & a_.i32f) | (~c_.i32f & b_.i32f);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_cmov_si128(a, b, c) simde_mm_cmov_si128((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256i\nsimde_mm256_cmov_si256 (simde__m256i a, simde__m256i b, simde__m256i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_98521) && !defined(SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256)\n    return _mm256_cmov_si256(a, b, c);\n  #elif defined(SIMDE_X86_AVX512VL_NATIVE)\n    return _mm256_ternarylogic_epi32(a, b, c, 0xe4);\n  #elif defined(SIMDE_X86_AVX2_NATIVE)\n    return _mm256_or_si256(_mm256_and_si256(c, a), _mm256_andnot_si256(c, b));\n  #else\n    simde__m256i_private\n      r_,\n      a_ = simde__m256i_to_private(a),\n      b_ = simde__m256i_to_private(b),\n      c_ = simde__m256i_to_private(c);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {\n        r_.m128i[i] = simde_mm_cmov_si128(a_.m128i[i], b_.m128i[i], c_.m128i[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {\n        r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]);\n      }\n    #endif\n\n    return simde__m256i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm256_cmov_si256(a, b, c) simde_mm256_cmov_si256((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epi8(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epi8(a, b) simde_mm_comeq_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epi16(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epi16(a, b) simde_mm_comeq_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epi32(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epi32(a, b) simde_mm_comeq_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epi64(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epi64(a, b) simde_mm_comeq_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epu8(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vceqq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 == b_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] == b_.u8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epu8(a, b) simde_mm_comeq_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epu16(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vceqq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 == b_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epu16(a, b) simde_mm_comeq_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epu32(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vceqq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 == b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epu32(a, b) simde_mm_comeq_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comeq_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ)\n    return _mm_com_epu64(a, b, _MM_PCOMCTRL_EQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comeq_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 == b_.u64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comeq_epu64(a, b) simde_mm_comeq_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epi8(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epi8(a, b) simde_mm_comge_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epi16(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epi16(a, b) simde_mm_comge_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epi32(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epi32(a, b) simde_mm_comge_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epi64(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epi64(a, b) simde_mm_comge_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epu8(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epu8(a, b) simde_mm_comge_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epu16(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epu16(a, b) simde_mm_comge_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epu32(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epu32(a, b) simde_mm_comge_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comge_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE)\n    return _mm_com_epu64(a, b, _MM_PCOMCTRL_GE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comge_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comge_epu64(a, b) simde_mm_comge_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epi8(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epi8(a, b) simde_mm_comgt_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epi16(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epi16(a, b) simde_mm_comgt_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epi32(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epi32(a, b) simde_mm_comgt_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epi64(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epi64(a, b) simde_mm_comgt_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epu8(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcgtq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epu8(a, b) simde_mm_comgt_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epu16(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcgtq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epu16(a, b) simde_mm_comgt_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epu32(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcgtq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epu32(a, b) simde_mm_comgt_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comgt_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT)\n    return _mm_com_epu64(a, b, _MM_PCOMCTRL_GT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comgt_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcgtq_u64(a_.neon_u64, b_.neon_u64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 > b_.u64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comgt_epu64(a, b) simde_mm_comgt_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epi8(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epi8(a, b) simde_mm_comle_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epi16(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epi16(a, b) simde_mm_comle_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epi32(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epi32(a, b) simde_mm_comle_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epi64(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epi64(a, b) simde_mm_comle_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epu8(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epu8(a, b) simde_mm_comle_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epu16(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epu16(a, b) simde_mm_comle_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epu32(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epu32(a, b) simde_mm_comle_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comle_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE)\n    return _mm_com_epu64(a, b, _MM_PCOMCTRL_LE);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comle_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcleq_u64(a_.neon_u64, b_.neon_u64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comle_epu64(a, b) simde_mm_comle_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epi8(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epi8(a, b) simde_mm_comlt_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epi16(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epi16(a, b) simde_mm_comlt_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epi32(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epi32(a, b) simde_mm_comlt_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epi64(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epi64(a, b) simde_mm_comlt_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epu8(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epu8(a, b) simde_mm_comlt_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epu16(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epu16(a, b) simde_mm_comlt_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epu32(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epu32(a, b) simde_mm_comlt_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comlt_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n    return _mm_com_epu64(a, b, _MM_PCOMCTRL_LT);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comlt_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u64 = vcltq_u64(a_.neon_u64, b_.neon_u64);\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 < b_.u64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comlt_epu64(a, b) simde_mm_comlt_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epi8(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epi8(a, b) simde_mm_comneq_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epi16(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epi16(a, b) simde_mm_comneq_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epi32(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epi32(a, b) simde_mm_comneq_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epi64(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64)));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epi64(a, b) simde_mm_comneq_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epu8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epu8(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epu8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vmvnq_u8(vceqq_u8(a_.neon_u8, b_.neon_u8));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 != b_.u8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        r_.u8[i] = (a_.u8[i] != b_.u8[i]) ? ~INT8_C(0) : INT8_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epu8(a, b) simde_mm_comneq_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epu16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epu16(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epu16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vmvnq_u16(vceqq_u16(a_.neon_u16, b_.neon_u16));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 != b_.u16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~INT16_C(0) : INT16_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epu16(a, b) simde_mm_comneq_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epu32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epu32(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epu32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vmvnq_u32(vceqq_u32(a_.neon_u32, b_.neon_u32));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 != b_.u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~INT32_C(0) : INT32_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epu32(a, b) simde_mm_comneq_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comneq_epu64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ)\n    return _mm_com_epu64(a, b, _MM_PCOMCTRL_NEQ);\n  #elif defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_comneq_epu64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_u64(a_.neon_u64, b_.neon_u64)));\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)\n      r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 != b_.u64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~INT64_C(0) : INT64_C(0);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comneq_epu64(a, b) simde_mm_comneq_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epi8 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epi8(a, b) simde_mm_comfalse_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epi16 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epi16(a, b) simde_mm_comfalse_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epi32 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epi32(a, b) simde_mm_comfalse_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epi64 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epi64(a, b) simde_mm_comfalse_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epu8 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epu8(a, b) simde_mm_comfalse_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epu16 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epu16(a, b) simde_mm_comfalse_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epu32 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epu32(a, b) simde_mm_comfalse_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comfalse_epu64 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_mm_setzero_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comfalse_epu64(a, b) simde_mm_comfalse_epu64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epi8 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epi8(a, b) simde_mm_comtrue_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epi16 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epi16(a, b) simde_mm_comtrue_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epi32 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epi32(a, b) simde_mm_comtrue_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epi64 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epi64(a, b) simde_mm_comtrue_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epu8 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epu8(a, b) simde_mm_comtrue_epu8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epu16 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epu16(a, b) simde_mm_comtrue_epu16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epu32 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epu32(a, b) simde_mm_comtrue_epu32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_comtrue_epu64 (simde__m128i a, simde__m128i b) {\n  (void) a;\n  (void) b;\n  return simde_x_mm_setone_si128();\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_comtrue_epu64(a, b) simde_mm_comtrue_epu64((a), (b))\n#endif\n\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT)\n  #define SIMDE_X86_XOP_HAVE_COM_ 1\n  #define SIMDE_MM_PCOMCTRL_LT    _MM_PCOMCTRL_LT\n  #define SIMDE_MM_PCOMCTRL_LE    _MM_PCOMCTRL_LE\n  #define SIMDE_MM_PCOMCTRL_GT    _MM_PCOMCTRL_GT\n  #define SIMDE_MM_PCOMCTRL_GE    _MM_PCOMCTRL_GE\n  #define SIMDE_MM_PCOMCTRL_EQ    _MM_PCOMCTRL_EQ\n  #define SIMDE_MM_PCOMCTRL_NEQ   _MM_PCOMCTRL_NEQ\n  #define SIMDE_MM_PCOMCTRL_FALSE _MM_PCOMCTRL_FALSE\n  #define SIMDE_MM_PCOMCTRL_TRUE  _MM_PCOMCTRL_TRUE\n#else\n  #define SIMDE_MM_PCOMCTRL_LT    0\n  #define SIMDE_MM_PCOMCTRL_LE    1\n  #define SIMDE_MM_PCOMCTRL_GT    2\n  #define SIMDE_MM_PCOMCTRL_GE    3\n  #define SIMDE_MM_PCOMCTRL_EQ    4\n  #define SIMDE_MM_PCOMCTRL_NEQ   5\n  #define SIMDE_MM_PCOMCTRL_FALSE 6\n  #define SIMDE_MM_PCOMCTRL_TRUE  7\n\n  #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n    #define _MM_PCOMCTRL_LT    SIMDE_MM_PCOMCTRL_LT\n    #define _MM_PCOMCTRL_LE    SIMDE_MM_PCOMCTRL_LE\n    #define _MM_PCOMCTRL_GT    SIMDE_MM_PCOMCTRL_GT\n    #define _MM_PCOMCTRL_GE    SIMDE_MM_PCOMCTRL_GE\n    #define _MM_PCOMCTRL_EQ    SIMDE_MM_PCOMCTRL_EQ\n    #define _MM_PCOMCTRL_NEQ   SIMDE_MM_PCOMCTRL_NEQ\n    #define _MM_PCOMCTRL_FALSE SIMDE_MM_PCOMCTRL_FALSE\n    #define _MM_PCOMCTRL_TRUE  SIMDE_MM_PCOMCTRL_TRUE\n  #endif\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epi8 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epi8(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epi8(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epi8(a, b, imm8) _mm_com_epi8((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epi8(a, b, imm8) simde_mm_com_epi8((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epi16 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epi16(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epi16(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epi16(a, b, imm8) _mm_com_epi16((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epi16(a, b, imm8) simde_mm_com_epi16((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epi32 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epi32(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epi32(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epi32(a, b, imm8) _mm_com_epi32((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epi32(a, b, imm8) simde_mm_com_epi32((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epi64 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epi64(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epi64(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epi64(a, b, imm8) _mm_com_epi64((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epi64(a, b, imm8) simde_mm_com_epi64((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epu8 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epu8(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epu8(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epu8(a, b, imm8) _mm_com_epu8((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epu8(a, b, imm8) simde_mm_com_epu8((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epu16 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epu16(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epu16(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epu16(a, b, imm8) _mm_com_epu16((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epu16(a, b, imm8) simde_mm_com_epu16((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epu32 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epu32(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epu32(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epu32(a, b, imm8) _mm_com_epu32((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epu32(a, b, imm8) simde_mm_com_epu32((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_com_epu64 (simde__m128i a, simde__m128i b, const int imm8)\n    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) {\n  switch (imm8) {\n    case SIMDE_MM_PCOMCTRL_LT:\n      return simde_mm_comlt_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_LE:\n      return simde_mm_comle_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_GT:\n      return simde_mm_comgt_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_GE:\n      return simde_mm_comge_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_EQ:\n      return simde_mm_comeq_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_NEQ:\n      return simde_mm_comneq_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_FALSE:\n      return simde_mm_comfalse_epu64(a, b);\n    case SIMDE_MM_PCOMCTRL_TRUE:\n      return simde_mm_comtrue_epu64(a, b);\n    default:\n      HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128());\n  }\n}\n#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_)\n  #define simde_mm_com_epu64(a, b, imm8) _mm_com_epu64((a), (b), (imm8))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_com_epu64(a, b, imm8) simde_mm_com_epu64((a), (b), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_frcz_ps (simde__m128 a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_frcz_ps(a);\n  #else\n    simde__m128_private\n      r_,\n      a_ = simde__m128_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      #if defined(simde_math_modff)\n        simde_float32 integral;\n        r_.f32[i] = simde_math_modff(a_.f32[i], &integral);\n      #else\n        r_.f32[i] = (a_.f32[i] / 1.0f);\n      #endif\n    }\n\n    return simde__m128_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_frcz_ps(a) simde_mm_frcz_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_frcz_pd (simde__m128d a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_frcz_pd(a);\n  #else\n    simde__m128d_private\n      r_,\n      a_ = simde__m128d_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      #if defined(simde_math_modf)\n        simde_float64 integral;\n        r_.f64[i] = simde_math_modf(a_.f64[i], &integral);\n      #else\n        r_.f64[i] = (a_.f64[i] / 1.0f);\n      #endif\n    }\n\n    return simde__m128d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_frcz_ps(a) simde_mm_frcz_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_frcz_ss (simde__m128 a, simde__m128 b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673)\n    return _mm_frcz_ss(a, b);\n  #else\n    simde__m128_private\n      a_ = simde__m128_to_private(a),\n      b_ = simde__m128_to_private(b);\n\n    #if defined(simde_math_modff)\n      simde_float32 integral;\n      a_.f32[0] = simde_math_modff(b_.f32[0], &integral);\n    #else\n      a_.f32[0] = (b_.f32[0] / 1.0f);\n    #endif\n\n    return simde__m128_from_private(a_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_frcz_ss(a, b) simde_mm_frcz_ss((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_frcz_sd (simde__m128d a, simde__m128d b) {\n  #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673)\n    return _mm_frcz_sd(a, b);\n  #else\n    simde__m128d_private\n      a_ = simde__m128d_to_private(a),\n      b_ = simde__m128d_to_private(b);\n\n    #if defined(simde_math_modf)\n      simde_float64 integral;\n      a_.f64[0] = simde_math_modf(b_.f64[0], &integral);\n    #else\n      a_.f64[0] = (b_.f64[0] / 1.0f);\n    #endif\n\n    return simde__m128d_from_private(a_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_frcz_sd(a, b) simde_mm_frcz_sd((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_frcz_ps (simde__m256 a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm256_frcz_ps(a);\n  #else\n    simde__m256_private\n      r_,\n      a_ = simde__m256_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n        r_.m128[i] = simde_mm_frcz_ps(a_.m128[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n        #if defined(simde_math_modff)\n          simde_float32 integral;\n          r_.f32[i] = simde_math_modff(a_.f32[i], &integral);\n        #else\n          r_.f32[i] = (a_.f32[i] / 1.0f);\n        #endif\n      }\n    #endif\n\n    return simde__m256_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_frcz_pd (simde__m256d a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm256_frcz_pd(a);\n  #else\n    simde__m256d_private\n      r_,\n      a_ = simde__m256d_to_private(a);\n\n    #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n      for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n        r_.m128d[i] = simde_mm_frcz_pd(a_.m128d[i]);\n      }\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n        #if defined(simde_math_modf)\n          simde_float64 integral;\n          r_.f64[i] = simde_math_modf(a_.f64[i], &integral);\n        #else\n          r_.f64[i] = (a_.f64[i] / 1.0f);\n        #endif\n      }\n    #endif\n\n    return simde__m256d_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddw_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddw_epi8(a);\n  #elif defined(SIMDE_X86_SSSE3_NATIVE)\n      return _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vpaddlq_s8(a_.neon_i8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i16x8_extadd_pairwise_i8x16(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1);\n      r_.altivec_i16 =\n        vec_add(\n          vec_mule(a_.altivec_i8, one),\n          vec_mulo(a_.altivec_i8, one)\n        );\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i16 =\n        ((a_.i16 << 8) >> 8) +\n        ((a_.i16 >> 8)     );\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2)]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddw_epi8(a) simde_mm_haddw_epi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddw_epu8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddw_epu8(a);\n  #elif defined(SIMDE_X86_SSSE3_NATIVE)\n    return _mm_maddubs_epi16(a, _mm_set1_epi8(INT8_C(1)));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vpaddlq_u8(a_.neon_u8);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u16x8_extadd_pairwise_u8x16(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1);\n      r_.altivec_u16 =\n        vec_add(\n          vec_mule(a_.altivec_u8, one),\n          vec_mulo(a_.altivec_u8, one)\n        );\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u16 =\n        ((a_.u16 << 8) >> 8) +\n        ((a_.u16 >> 8)     );\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2)]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddw_epu8(a) simde_mm_haddw_epu8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddd_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddd_epi8(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vpaddlq_s16(vpaddlq_s8(a_.neon_i8));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] =\n          HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4)    ]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 1]) +\n          HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 3]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddd_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddd_epi16(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return _mm_madd_epi16(a, _mm_set1_epi16(INT8_C(1)));\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vpaddlq_s16(a_.neon_i16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_i32x4_extadd_pairwise_i16x8(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1);\n      r_.altivec_i32 =\n        vec_add(\n          vec_mule(a_.altivec_i16, one),\n          vec_mulo(a_.altivec_i16, one)\n        );\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.i32 =\n        ((a_.i32 << 16) >> 16) +\n        ((a_.i32 >> 16)      );\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddd_epu8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddd_epu8(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vpaddlq_u16(vpaddlq_u8(a_.neon_u8));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] =\n          HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4)    ]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 1]) +\n          HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 3]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddd_epu16 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddd_epu16(a);\n  #elif defined(SIMDE_X86_SSE2_NATIVE)\n    return\n      _mm_add_epi32(\n        _mm_srli_epi32(a, 16),\n        _mm_and_si128(a, _mm_set1_epi32(INT32_C(0x0000ffff)))\n      );\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vpaddlq_u16(a_.neon_u16);\n    #elif defined(SIMDE_WASM_SIMD128_NATIVE)\n      r_.wasm_v128 = wasm_u32x4_extadd_pairwise_u16x8(a_.wasm_v128);\n    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)\n      SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1);\n      r_.altivec_u32 =\n        vec_add(\n          vec_mule(a_.altivec_u16, one),\n          vec_mulo(a_.altivec_u16, one)\n        );\n    #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)\n      r_.u32 =\n        ((a_.u32 << 16) >> 16) +\n        ((a_.u32 >> 16)      );\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2)]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddq_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddq_epi8(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(vpaddlq_s8(a_.neon_i8)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] =\n          HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8)    ]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 1]) +\n          HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 3]) +\n          HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 5]) +\n          HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 7]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddq_epi8(a) simde_mm_haddq_epi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddq_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddq_epi16(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(a_.neon_i16));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] =\n          HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4)    ]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 1]) +\n          HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 3]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddq_epi16(a) simde_mm_haddq_epi16((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddq_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddq_epi32(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vpaddlq_s32(a_.neon_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2)    ]) + HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddq_epi32(a) simde_mm_haddq_epi32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddq_epu8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddq_epu8(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(a_.neon_u8)));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.u64[i] =\n          HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8)    ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 1]) +\n          HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 3]) +\n          HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 5]) +\n          HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 7]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddq_epu8(a) simde_mm_haddq_epu8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddq_epu16 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddq_epu16(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(a_.neon_u16));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.u64[i] =\n          HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4)    ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 1]) +\n          HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 3]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddq_epu16(a) simde_mm_haddq_epu16((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_haddq_epu32 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_haddq_epu32(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vpaddlq_u32(a_.neon_u32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2)    ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) + 1]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_haddq_epu32(a) simde_mm_haddq_epu32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hsubw_epi8 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_hsubw_epi8(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n      r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i * 2]) - HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_hsubw_epi8(a) simde_mm_hsubw_epi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hsubd_epi16 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_hsubd_epi16(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] =\n        HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)    ]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_hsubd_epi8(a) simde_mm_hsubd_epi8((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_hsubq_epi32 (simde__m128i a) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_hsubq_epi32(a);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2)    ]) - HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_hsubq_epi32(a) simde_mm_hsubq_epi32((a))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_macc_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_macc_epi16(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vmlaq_s16(c_.neon_i16, a_.neon_i16, b_.neon_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        r_.i16[i] = (a_.i16[i] * b_.i16[i]) + c_.i16[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_macc_epi16(a, b, c) simde_mm_macc_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_macc_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_macc_epi32(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_.neon_i32, b_.neon_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (a_.i32[i] * b_.i32[i]) + c_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_macc_epi32(a, b, c) simde_mm_macc_epi32((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maccd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maccd_epi16(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16);\n      int32x4_t a_even = vmovl_s16(vget_low_s16(even));\n      int32x4_t b_even = vmovl_high_s16(even);\n      r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_even, b_even);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        r_.i32[i] = (HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2])) + c_.i32[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maccd_epi16(a, b, c) simde_mm_maccd_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_macclo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_macclo_epi32(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int32x4_t even = vuzp1q_s32(a_.neon_i32, b_.neon_i32);\n      r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0])) + c_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_macclo_epi16(a, b, c) simde_mm_macclo_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_macchi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_macchi_epi32(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int32x4_t even = vuzp2q_s32(a_.neon_i32, b_.neon_i32);\n      r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1])) + c_.i64[i];\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_macchi_epi16(a, b, c) simde_mm_macchi_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maccs_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maccs_epi16(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int32x4_t c_lo = vmovl_s16(vget_low_s16(c_.neon_i16));\n      int32x4_t c_hi = vmovl_high_s16(c_.neon_i16);\n      int32x4_t lo = vmlal_s16(c_lo, vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16));\n      int32x4_t hi = vmlal_high_s16(c_hi, a_.neon_i16, b_.neon_i16);\n      r_.neon_i16 = vcombine_s16(vqmovn_s32(lo), vqmovn_s32(hi));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);\n        tmp += c_.i16[i];\n        if (tmp > INT16_MAX)\n          r_.i16[i] = INT16_MAX;\n        else if (tmp < INT16_MIN)\n          r_.i16[i] = INT16_MIN;\n        else\n          r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, tmp);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maccs_epi16(a, b, c) simde_mm_maccs_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maccs_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maccs_epi32(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int64x2_t c_lo = vmovl_s32(vget_low_s32(c_.neon_i32));\n      int64x2_t c_hi = vmovl_high_s32(c_.neon_i32);\n      int64x2_t lo = vmlal_s32(c_lo, vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32));\n      int64x2_t hi = vmlal_high_s32(c_hi, a_.neon_i32, b_.neon_i32);\n      r_.neon_i32 = vcombine_s32(vqmovn_s64(lo), vqmovn_s64(hi));\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]);\n        tmp += HEDLEY_STATIC_CAST(int64_t, c_.i32[i]);\n        if (tmp > INT32_MAX)\n          r_.i32[i] = INT32_MAX;\n        else if (tmp < INT32_MIN)\n          r_.i32[i] = INT32_MIN;\n        else\n          r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, tmp);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maccs_epi32(a, b, c) simde_mm_maccs_epi32((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maccsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maccsd_epi16(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)\n      int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16);\n      r_.neon_i32 = vqaddq_s32(vmull_s16(vget_low_s16(even), vget_high_s16(even)), c_.neon_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        int32_t prod = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]);\n        r_.i32[i] = simde_math_adds_i32(prod, c_.i32[i]);\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maccsd_epi16(a, b, c) simde_mm_maccsd_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maccslo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maccslo_epi32(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0]);\n      r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maccslo_epi16(a, b, c) simde_mm_maccslo_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maccshi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maccshi_epi32(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n      int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1]);\n      r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maccshi_epi16(a, b, c) simde_mm_maccshi_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maddd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maddd_epi16(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      r_.i32[i] =\n        (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) +\n        (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]);\n      r_.i32[i] += c_.i32[i];\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maddd_epi16(a, b, c) simde_mm_maddd_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_maddsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_maddsd_epi16(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n      /* The AMD64 Architecture Programmer's Manual says that \"the\"\n       * addition is saturated; I'm not sure whether that means\n       * the pairwise addition or the accumulate, or both. */\n      r_.i32[i] =\n        (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) +\n        (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]);\n      r_.i32[i] = simde_math_adds_i32(r_.i32[i], c_.i32[i]);\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_maddsd_epi16(a, b, c) simde_mm_maddsd_epi16((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sha_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_sha_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i8 = vshlq_s8(a_.neon_i8, b_.neon_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n        if (b_.i8[i] < 0) {\n          r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> -b_.i8[i]);\n        } else {\n          r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] <<  b_.i8[i]);\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_sha_epi8(a, b) simde_mm_sha_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sha_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_sha_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i16 = vshlq_s16(a_.neon_i16, b_.neon_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {\n        if (b_.i16[i] < 0) {\n          r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> -b_.i16[i]);\n        } else {\n          r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] <<  b_.i16[i]);\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_sha_epi16(a, b) simde_mm_sha_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sha_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_sha_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i32 = vshlq_s32(a_.neon_i32, b_.neon_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {\n        if (b_.i32[i] < 0) {\n          r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> -b_.i32[i]);\n        } else {\n          r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] <<  b_.i32[i]);\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_sha_epi32(a, b) simde_mm_sha_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_sha_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_sha_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_i64 = vshlq_s64(a_.neon_i64, b_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {\n        if (b_.i64[i] < 0) {\n          r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> -b_.i64[i]);\n        } else {\n          r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] <<  b_.i64[i]);\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_sha_epi64(a, b) simde_mm_sha_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shl_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_shl_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u8 = vshlq_u8(a_.neon_u8, b_.neon_i8);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n        if (HEDLEY_UNLIKELY(b_.i8[i] < -7 || b_.i8[i] > 7)) {\n          r_.u8[i] = 0;\n        } else {\n          if (b_.i8[i] < 0) {\n            r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> -b_.i8[i]);\n          } else {\n            r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] <<  b_.i8[i]);\n          }\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_shl_epi8(a, b) simde_mm_shl_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shl_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_shl_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u16 = vshlq_u16(a_.neon_u16, b_.neon_i16);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n        if (HEDLEY_UNLIKELY(b_.i16[i] < -15 || b_.i16[i] > 15)) {\n          r_.u16[i] = 0;\n        } else {\n          if (b_.i16[i] < 0) {\n            r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> -b_.i16[i]);\n          } else {\n            r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] <<  b_.i16[i]);\n          }\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_shl_epi16(a, b) simde_mm_shl_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shl_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_shl_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u32 = vshlq_u32(a_.neon_u32, b_.neon_i32);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n        if (HEDLEY_UNLIKELY(b_.i32[i] < -31 || b_.i32[i] > 31)) {\n          r_.u32[i] = 0;\n        } else {\n          if (b_.i32[i] < 0) {\n            r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> -b_.i32[i]);\n          } else {\n            r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] <<  b_.i32[i]);\n          }\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_shl_epi32(a, b) simde_mm_shl_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_shl_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_shl_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)\n      r_.neon_u64 = vshlq_u64(a_.neon_u64, b_.neon_i64);\n    #else\n      SIMDE_VECTORIZE\n      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n        if (HEDLEY_UNLIKELY(b_.i64[i] < -63 || b_.i64[i] > 63)) {\n          r_.u64[i] = 0;\n        } else {\n          if (b_.i64[i] < 0) {\n            r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> -b_.i64[i]);\n          } else {\n            r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] <<  b_.i64[i]);\n          }\n        }\n      }\n    #endif\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_shl_epi64(a, b) simde_mm_shl_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rot_epi8 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_rot_epi8(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n      r_.u8[i] = (b_.i8[i] < 0) ?\n        HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -b_.i8[i]) | (a_.u8[i] << ( b_.i8[i] & 7)))) :\n        HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] <<  b_.i8[i]) | (a_.u8[i] >> (-b_.i8[i] & 7))));\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_rot_epi8(a, b) simde_mm_rot_epi8((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rot_epi16 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_rot_epi16(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n      r_.u16[i] = (b_.i16[i] < 0) ?\n        HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -b_.i16[i]) | (a_.u16[i] << ( b_.i16[i] & 15)))) :\n        HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] <<  b_.i16[i]) | (a_.u16[i] >> (-b_.i16[i] & 15))));\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_rot_epi16(a, b) simde_mm_rot_epi16((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rot_epi32 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_rot_epi32(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n      r_.u32[i] = (b_.i32[i] < 0) ?\n        HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -b_.i32[i]) | (a_.u32[i] << ( b_.i32[i] & 31)))) :\n        HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] <<  b_.i32[i]) | (a_.u32[i] >> (-b_.i32[i] & 31))));\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_rot_epi32(a, b) simde_mm_rot_epi32((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_rot_epi64 (simde__m128i a, simde__m128i b) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_rot_epi64(a, b);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n      r_.u64[i] = (b_.i64[i] < 0) ?\n        HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -b_.i64[i]) | (a_.u64[i] << ( b_.i64[i] & 63)))) :\n        HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] <<  b_.i64[i]) | (a_.u64[i] >> (-b_.i64[i] & 63))));\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_rot_epi64(a, b) simde_mm_rot_epi64((a), (b))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_roti_epi8 (simde__m128i a, const int count) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {\n    r_.u8[i] = (count < 0) ?\n      HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -count) | (a_.u8[i] << ( count & 7)))) :\n      HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] <<  count) | (a_.u8[i] >> (-count & 7))));\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #define simde_mm_roti_epi8(a, count) _mm_roti_epi8((a), (count))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_roti_epi8(a, b) simde_mm_roti_epi8((a), (count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_roti_epi16 (simde__m128i a, const int count) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {\n    r_.u16[i] = (count < 0) ?\n      HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -count) | (a_.u16[i] << ( count & 15)))) :\n      HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] <<  count) | (a_.u16[i] >> (-count & 15))));\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #define simde_mm_roti_epi16(a, count) _mm_roti_epi16((a), (count))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_roti_epi16(a, count) simde_mm_roti_epi16((a), (count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_roti_epi32 (simde__m128i a, const int count) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {\n    r_.u32[i] = (count < 0) ?\n      HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -count) | (a_.u32[i] << ( count & 31)))) :\n      HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] <<  count) | (a_.u32[i] >> (-count & 31))));\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #define simde_mm_roti_epi32(a, count) _mm_roti_epi32((a), (count))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_roti_epi32(a, count) simde_mm_roti_epi32((a), (count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_roti_epi64 (simde__m128i a, const int count) {\n  simde__m128i_private\n    r_,\n    a_ = simde__m128i_to_private(a);\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {\n    r_.u64[i] = (count < 0) ?\n      HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -count) | (a_.u64[i] << ( count & 63)))) :\n      HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] <<  count) | (a_.u64[i] >> (-count & 63))));\n  }\n\n  return simde__m128i_from_private(r_);\n}\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #define simde_mm_roti_epi64(a, count) _mm_roti_epi64((a), (count))\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_roti_epi64(a, count) simde_mm_roti_epi64((a), (count))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128i\nsimde_mm_perm_epi8 (simde__m128i a, simde__m128i b, simde__m128i c) {\n  #if defined(SIMDE_X86_XOP_NATIVE)\n    return _mm_perm_epi8(a, b, c);\n  #else\n    simde__m128i_private\n      r_,\n      a_ = simde__m128i_to_private(a),\n      b_ = simde__m128i_to_private(b),\n      c_ = simde__m128i_to_private(c);\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {\n      int8_t src = (c_.u8[i] & 0x10) ? b_.i8[c_.u8[i] & 0xf] : a_.i8[c_.u8[i] & 0xf];\n\n      switch (c_.u8[i] & 0xc0) {\n        case 0x40:\n          #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION)\n            src = HEDLEY_STATIC_CAST(int8_t, __builtin_bitreverse8(HEDLEY_STATIC_CAST(uint8_t, src)));\n          #else\n            src = HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t, src) * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101) >> 32);\n          #endif\n          break;\n        case 0x80:\n          src = 0;\n          break;\n        case 0xc0:\n          src >>= 7;\n          break;\n      }\n\n      r_.i8[i] = (c_.u8[i] & 0x20) ? ~src : src;\n    }\n\n    return simde__m128i_from_private(r_);\n  #endif\n}\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_perm_epi8(a, b, c) simde_mm_perm_epi8((a), (b), (c))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128\nsimde_mm_permute2_ps (simde__m128 a, simde__m128 b, simde__m128i c, const int imm8) {\n  simde__m128_private\n    r_,\n    a_ = simde__m128_to_private(a),\n    b_ = simde__m128_to_private(b);\n  simde__m128i_private c_ = simde__m128i_to_private(c);\n\n  const int m2z = imm8 & 0x03;\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n    const int sel = c_.i32[i] & 0x07;\n    const int   m = c_.i32[i] & 0x08;\n\n    switch (m | m2z) {\n      case 0xa:\n      case 0x3:\n        r_.i32[i] = 0;\n        break;\n      default:\n        r_.i32[i] = (sel > 3) ? b_.i32[sel - 4] : a_.i32[sel];\n        break;\n    }\n  }\n\n  return simde__m128_from_private(r_);\n}\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #if defined(HEDLEY_MCST_LCC_VERSION)\n    #define simde_mm_permute2_ps(a, b, c, imm8) (__extension__ ({ \\\n      SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \\\n      _mm_permute2_ps((a), (b), (c), (imm8)); \\\n      SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \\\n    }))\n  #else\n    #define simde_mm_permute2_ps(a, b, c, imm8) _mm_permute2_ps((a), (b), (c), (imm8))\n  #endif\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_permute2_ps(a, b, c, imm8) simde_mm_permute2_ps((a), (b), (c), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m128d\nsimde_mm_permute2_pd (simde__m128d a, simde__m128d b, simde__m128i c, const int imm8) {\n  simde__m128d_private\n    r_,\n    a_ = simde__m128d_to_private(a),\n    b_ = simde__m128d_to_private(b);\n  simde__m128i_private c_ = simde__m128i_to_private(c);\n\n  const int m2z = imm8 & 0x03;\n\n  SIMDE_VECTORIZE\n  for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n    const int sel = (c_.i64[i] & 0x06) >> 1;\n    const int   m = c_.i64[i] & 0x08;\n\n    switch (m | m2z) {\n      case 0x0a:\n      case 0x03:\n        r_.i64[i] = 0;\n        break;\n      default:\n        r_.i64[i] = (sel > 1) ? b_.i64[sel - 2] : a_.i64[sel];\n        break;\n    }\n  }\n\n  return simde__m128d_from_private(r_);\n}\n\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #if defined(HEDLEY_MCST_LCC_VERSION)\n    #define simde_mm_permute2_pd(a, b, c, imm8) (__extension__ ({ \\\n      SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \\\n      _mm_permute2_pd((a), (b), (c), (imm8)); \\\n      SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \\\n    }))\n  #else\n    #define simde_mm_permute2_pd(a, b, c, imm8) _mm_permute2_pd((a), (b), (c), (imm8))\n  #endif\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm_permute2_pd(a, b, c, imm8) simde_mm_permute2_pd((a), (b), (c), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256\nsimde_mm256_permute2_ps (simde__m256 a, simde__m256 b, simde__m256i c, const int imm8) {\n  simde__m256_private\n    r_,\n    a_ = simde__m256_to_private(a),\n    b_ = simde__m256_to_private(b);\n  simde__m256i_private c_ = simde__m256i_to_private(c);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {\n      r_.m128[i] = simde_mm_permute2_ps(a_.m128[i], b_.m128[i], c_.m128i[i], imm8);\n    }\n  #else\n    const int m2z = imm8 & 0x03;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {\n      const int sel = c_.i32[i] & 0x07;\n      const int   m = c_.i32[i] & 0x08;\n\n      switch (m | m2z) {\n        case 0xa:\n        case 0x3:\n          r_.i32[i] = 0;\n          break;\n        default:\n          r_.i32[i] = (sel > 3) ? b_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4) - 4] : a_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4)];\n          break;\n      }\n    }\n  #endif\n\n  return simde__m256_from_private(r_);\n}\n\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #if defined(HEDLEY_MCST_LCC_VERSION)\n    #define simde_mm256_permute2_ps(a, b, c, imm8) (__extension__ ({ \\\n      SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \\\n      _mm256_permute2_ps((a), (b), (c), (imm8)); \\\n      SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \\\n    }))\n  #else\n    #define simde_mm256_permute2_ps(a, b, c, imm8) _mm256_permute2_ps((a), (b), (c), (imm8))\n  #endif\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm256_permute2_ps(a, b, c, imm8) simde_mm256_permute2_ps((a), (b), (c), (imm8))\n#endif\n\nSIMDE_FUNCTION_ATTRIBUTES\nsimde__m256d\nsimde_mm256_permute2_pd (simde__m256d a, simde__m256d b, simde__m256i c, const int imm8) {\n  simde__m256d_private\n    r_,\n    a_ = simde__m256d_to_private(a),\n    b_ = simde__m256d_to_private(b);\n  simde__m256i_private c_ = simde__m256i_to_private(c);\n\n  #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)\n    for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {\n      r_.m128d[i] = simde_mm_permute2_pd(a_.m128d[i], b_.m128d[i], c_.m128i[i], imm8);\n    }\n  #else\n    const int m2z = imm8 & 0x03;\n\n    SIMDE_VECTORIZE\n    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {\n      const int sel = (c_.i64[i] & 0x06) >> 1;\n      const int   m = c_.i64[i] & 0x08;\n\n      switch (m | m2z) {\n        case 0x0a:\n        case 0x03:\n          r_.i64[i] = 0;\n          break;\n        default:\n          r_.i64[i] = (sel > 1) ? b_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2) - 2] : a_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2)];\n          break;\n      }\n    }\n  #endif\n\n  return simde__m256d_from_private(r_);\n}\n#if defined(SIMDE_X86_XOP_NATIVE)\n  #if defined(HEDLEY_MCST_LCC_VERSION)\n    #define simde_mm256_permute2_pd(a, b, c, imm8) (__extension__ ({ \\\n      SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \\\n      _mm256_permute2_pd((a), (b), (c), (imm8)); \\\n      SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \\\n    }))\n  #else\n    #define simde_mm256_permute2_pd(a, b, c, imm8) _mm256_permute2_pd((a), (b), (c), (imm8))\n  #endif\n#endif\n#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES)\n  #define _mm256_permute2_pd(a, b, c, imm8) simde_mm256_permute2_pd((a), (b), (c), (imm8))\n#endif\n\nHEDLEY_DIAGNOSTIC_POP\nSIMDE_END_DECLS_\n\n#endif /* !defined(SIMDE_X86_XOP_H) */\n"
  },
  {
    "path": "external_libs/qf/Makefile",
    "content": "AR       ?= ar\nCXX      ?= g++\nCXXFLAGS  = -O2 -DNDEBUG -fPIC\n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S),Darwin)\n CXXFLAGS += -arch x86_64\nendif\n\nqf.a: qfc.o\n\t${AR} rcs $@ $^ \n\n%.o: %.cpp\n\t${CXX} ${CXXFLAGS} -o $@ -c $< ${INC}\n\nclean:\n\trm -f *.o *.a\n"
  },
  {
    "path": "external_libs/qf/README.md",
    "content": "C code to compute p-value from linear combination of chisq(1) by inverting the characteristic function.\n\nCode was obtained from Robert Davies' webpage (http://www.robertnz.net/download.html) and was modified to remove the use of functions from `setjmp` library which does not work well with C++.\n"
  },
  {
    "path": "external_libs/qf/qfc.cpp",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n#include <math.h>\n// #include <setjmp.h> // Removed by J.Mbatchou (10/28/2021)\n\n#include \"qfc.h\"\n\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\n  static double sigsq, lmax, lmin, mean, c;\n  static double intl, ersm;\n  static int count, r, lim, env;  static BOOL ndtsrt, fail;\n  static int *n,*th; static double *lb,*nc;\n  // static jmp_buf env; // does not work well in C++ (change it to an int)\n\n\n  static double exp1(double x)               /* to avoid underflows  */\n  { return x < -50.0 ? 0.0 : exp(x); }\n\n  static void counter(void)\n    /*  count number of calls to errbd, truncation, cfe */\n  {\n    extern int count, lim, env;\n    count = count + 1;\n    if ( count > lim ) env=1; // JM edit\n  }\n\n  static double square(double x)  { return x*x; }\n\n  static double cube(double x)  { return x*x*x; }\n\n  static double  log1(double x, BOOL first)\n    /* if (first) log(1 + x) ; else  log(1 + x) - x */\n  {\n    if (fabs(x) > 0.1)\n    {\n      return (first ? log(1.0 + x) : (log(1.0 + x) - x));\n    }\n    else\n    {\n      double s, s1, term, y, k;\n      y = x / (2.0 + x);  term = 2.0 * cube(y);  k = 3.0;\n      s = (first ? 2.0 : - x) * y;\n      y = square(y);\n      for (s1 = s + term / k; s1 != s; s1 = s + term / k)\n      { k = k + 2.0; term = term * y; s = s1; }\n      return s;\n    }\n  }\n\n  static void order(void)\n    /* find order of absolute values of lb */\n  {\n    int j, k; double lj;\n    extern double *lb; extern int *th; extern int r; extern BOOL ndtsrt;\n    for ( j=0; j<r; j++ )\n    {\n      lj = fabs(lb[j]);\n      for (k = j-1; k>=0; k--)\n      {\n        if ( lj > fabs(lb[th[k]]) )  th[k + 1] = th[k];\n        else goto l1;\n      }\n      k = -1;\nl1 :\n      th[k + 1] = j;\n    }\n    ndtsrt = FALSE;\n  }\n\n\n  static double   errbd(double u, double* cx)\n    /*  find bound on tail probability using mgf, cutoff\n        point returned to *cx */\n  {\n    double sum1, lj, ncj, x, y, xconst; int j, nj;\n    extern double sigsq,*lb,*nc; extern int *n; extern int r;\n    counter();\n    xconst = u * sigsq;  sum1 = u * xconst;  u = 2.0 * u;\n    for (j=r-1; j>=0; j--)\n    {\n      nj = n[j]; lj = lb[j]; ncj = nc[j];\n      x = u * lj; y = 1.0 - x;\n      xconst = xconst + lj * (ncj / y + nj) / y;\n      sum1 = sum1 + ncj * square(x / y)\n        + nj * (square(x) / y + log1(-x, FALSE ));\n    }\n    *cx = xconst; return exp1(-0.5 * sum1);\n  }\n\n  static double  ctff(double accx, double* upn)\n    /*  find ctff so that p(qf > ctff) < accx  if (upn > 0,\n        p(qf < ctff) < accx otherwise */\n  {\n    double u1, u2, u, rb, xconst, c1, c2;\n    extern double lmin,lmax,mean;\n    u2 = *upn;   u1 = 0.0;  c1 = mean;\n    rb = 2.0 * ((u2 > 0.0) ? lmax : lmin);\n    for (u = u2 / (1.0 + u2 * rb); errbd(u, &c2) > accx; \n        u = u2 / (1.0 + u2 * rb))\n    {\n      u1 = u2;  c1 = c2;  u2 = 2.0 * u2;\n    }\n    for (u = (c1 - mean) / (c2 - mean); u < 0.9;\n        u = (c1 - mean) / (c2 - mean))\n    {\n      u = (u1 + u2) / 2.0;\n      if (errbd(u / (1.0 + u * rb), &xconst) > accx)\n      {  u1 = u; c1 = xconst;  }\n      else\n      {  u2 = u;  c2 = xconst; }\n    }\n    *upn = u2; return c2;\n  }\n\n  static double truncation(double u, double tausq)\n    /* bound integration error due to truncation at u */\n  {\n    double sum1, sum2, prod1, prod2, prod3, lj, ncj,\n         x, y, err1, err2;\n    int j, nj, s;\n    extern double sigsq,*lb,*nc; extern int *n; extern int r;\n\n    counter();\n    sum1  = 0.0; prod2 = 0.0;  prod3 = 0.0;  s = 0;\n    sum2 = (sigsq + tausq) * square(u); prod1 = 2.0 * sum2;\n    u = 2.0 * u;\n    for (j=0; j<r; j++ )\n    {\n      lj = lb[j];  ncj = nc[j]; nj = n[j];\n      x = square(u * lj);\n      sum1 = sum1 + ncj * x / (1.0 + x);\n      if (x > 1.0)\n      {\n        prod2 = prod2 + nj * log(x);\n        prod3 = prod3 + nj * log1(x, TRUE );\n        s = s + nj;\n      }\n      else  prod1 = prod1 + nj * log1(x, TRUE );\n    }\n    sum1 = 0.5 * sum1;\n    prod2 = prod1 + prod2;  prod3 = prod1 + prod3;\n    x = exp1(-sum1 - 0.25 * prod2) / pi;\n    y = exp1(-sum1 - 0.25 * prod3) / pi;\n    err1 =  ( s  ==  0 )  ? 1.0 : x * 2.0 / s;\n    err2 =  ( prod3 > 1.0 )  ? 2.5 * y : 1.0;\n    if (err2 < err1) err1 = err2;\n    x = 0.5 * sum2;\n    err2 =  ( x  <=  y )  ? 1.0  : y / x;\n    return  ( err1 < err2 )  ? err1  :  err2;\n  }\n\n  static void findu(double* utx, double accx)\n    /*  find u such that truncation(u) < accx and truncation(u / 1.2) > accx */\n  {\n    double u, ut; int i;\n    static double divis[]={2.0,1.4,1.2,1.1};\n    ut = *utx; u = ut / 4.0;\n    if ( truncation(u, 0.0) > accx )\n    {\n      for ( u = ut; truncation(u, 0.0) > accx; u = ut) ut = ut * 4.0;\n    }\n    else\n    {\n      ut = u;\n      for ( u = u / 4.0; truncation(u, 0.0) <=  accx; u = u / 4.0 )\n        ut = u;\n    }\n    for ( i=0;i<4;i++)\n    { u = ut/divis[i]; if ( truncation(u, 0.0)  <=  accx )  ut = u; }\n    *utx = ut;\n  }\n\n\n  static void integrate(int nterm, double interv, double tausq, BOOL mainx)\n    /*  carry out integration with nterm terms, at stepsize\n        interv.  if (! mainx) multiply integrand by\n        1.0-exp(-0.5*tausq*u^2) */\n  {\n    double inpi, u, sum1, sum2, sum3, x, y, z;\n    int k, j, nj;\n    extern double intl,ersm; extern double sigsq,c;\n    extern int *n; extern double *lb,*nc; extern int r;\n    inpi = interv / pi;\n    for ( k = nterm; k>=0; k--)\n    {\n      u = (k + 0.5) * interv;\n      sum1 = - 2.0 * u * c;  sum2 = fabs(sum1);\n      sum3 = - 0.5 * sigsq * square(u);\n      for ( j = r-1; j>=0; j--)\n      {\n        nj = n[j];  x = 2.0 * lb[j] * u;  y = square(x);\n        sum3 = sum3 - 0.25 * nj * log1(y, TRUE );\n        y = nc[j] * x / (1.0 + y);\n        z = nj * atan(x) + y;\n        sum1 = sum1 + z;   sum2 = sum2 + fabs(z);\n        sum3 = sum3 - 0.5 * x * y;\n      }\n      x = inpi * exp1(sum3) / u;\n      if ( !  mainx )\n        x = x * (1.0 - exp1(-0.5 * tausq * square(u)));\n      sum1 = sin(0.5 * sum1) * x;  sum2 = 0.5 * sum2 * x;\n      intl = intl + sum1; ersm = ersm + sum2;\n    }\n  }\n\n  static double cfe(double x)\n    /*  coef of tausq in error when convergence factor of\n        exp1(-0.5*tausq*u^2) is used when df is evaluated at x */\n  {\n    double axl, axl1, axl2, sxl, sum1, lj; int j, k, t;\n    extern BOOL ndtsrt,fail; extern int *th,*n; extern double *lb,*nc;\n    extern int r;\n    counter();\n    if (ndtsrt) order();\n    axl = fabs(x);  sxl = (x>0.0) ? 1.0 : -1.0;  sum1 = 0.0;\n    for ( j = r-1; j>=0; j-- )\n    { t = th[j];\n      if ( lb[t] * sxl > 0.0 )\n      {\n        lj = fabs(lb[t]);\n        axl1 = axl - lj * (n[t] + nc[t]);  axl2 = lj / log28;\n        if ( axl1 > axl2 )  axl = axl1  ; else\n        {\n          if ( axl > axl2 )  axl = axl2;\n          sum1 = (axl - axl1) / lj;\n          for ( k = j-1; k>=0; k--)\n            sum1 = sum1 + (n[th[k]] + nc[th[k]]);\n          goto  l;\n        }\n      }\n    }\nl:\n    if (sum1 > 100.0)\n    { fail = TRUE; return 1.0; } else\n    return pow(2.0,(sum1 / 4.0)) / (pi * square(axl));\n  }\n\ndouble   qf(double* lb1, double* nc1, int* n1, int r1, double sigma, double c1,\n    int lim1, double acc, double* trace, int* ifault)\n\n  /*  distribution function of a linear combination of non-central\n      chi-squared random variables :\n\n      input:\n      lb[j]            coefficient of j-th chi-squared variable\n      nc[j]            non-centrality parameter\n      n[j]             degrees of freedom\n      j = 0, 2 ... r-1\n      sigma            coefficient of standard normal variable\n      c                point at which df is to be evaluated\n      lim              maximum number of terms in integration\n      acc              maximum error\n\n      output:\n      ifault = 1       required accuracy NOT achieved\n      2       round-off error possibly significant\n      3       invalid parameters\n      4       unable to locate integration parameters\n      5       out of memory\n\n      trace[0]         absolute sum\n      trace[1]         total number of integration terms\n      trace[2]         number of integrations\n      trace[3]         integration interval in final integration\n      trace[4]         truncation point in initial integration\n      trace[5]         s.d. of initial convergence factor\n      trace[6]         cycles to locate integration parameters     */\n\n{\n  int j, nj, nt, ntm;  double acc1, almx, xlim, xnt, xntm;\n  double utx, tausq, sd, intv, intv1, x, up, un, d1, d2, lj, ncj;\n  extern double sigsq, lmax, lmin, mean;\n  extern double intl,ersm;\n  extern int r,lim, env; extern double c; // JM edit\n  extern int *n,*th; extern double *lb,*nc;\n  double qfval;\n  static int rats[]={1,2,4,8};\n\n  //if (setjmp(env) != 0) { *ifault=4; goto endofproc; } // JM edit\n  r=r1; lim=lim1; c=c1;\n  n=n1; lb=lb1; nc=nc1;\n  env=0; // set to 0  // JM edit\n  for ( j = 0; j<7; j++ )  trace[j] = 0.0;\n  *ifault = 0; count = 0;\n  intl = 0.0; ersm = 0.0;\n  qfval = -1.0; acc1 = acc; ndtsrt = TRUE;  fail = FALSE;\n  xlim = (double)lim;\n  th=(int*)malloc(r*(sizeof(int)));\n  if (! th) { *ifault=5;  goto  endofproc; } \n\n  /* find mean, sd, max and min of lb,\n     check that parameter values are valid */\n  sigsq = square(sigma); sd = sigsq;\n  lmax = 0.0; lmin = 0.0; mean = 0.0;\n  for (j=0; j<r; j++ )\n  {\n    nj = n[j];  lj = lb[j];  ncj = nc[j];\n    if ( nj < 0  ||  ncj < 0.0 ) { *ifault = 3;  goto  endofproc;  }\n    sd  = sd  + square(lj) * (2 * nj + 4.0 * ncj);\n    mean = mean + lj * (nj + ncj);\n    if (lmax < lj) lmax = lj ; else if (lmin > lj) lmin = lj;\n  }\n  if ( sd == 0.0  )\n  {  qfval = (c > 0.0) ? 1.0 : 0.0; goto  endofproc;  }\n  if ( lmin == 0.0 && lmax == 0.0 && sigma == 0.0 )\n  { *ifault = 3;  goto  endofproc;  }\n  sd = sqrt(sd);\n  almx = (lmax < - lmin) ? - lmin : lmax;\n\n  /* starting values for findu, ctff */\n  utx = 16.0 / sd;  up = 4.5 / sd;  un = - up;\n  /* truncation point with no convergence factor */\n  findu(&utx, .5 * acc1);\n  if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n  /* does convergence factor help */\n  if (c != 0.0  && (almx > 0.07 * sd))\n  {\n    tausq = .25 * acc1 / cfe(c);\n    if (fail) fail = FALSE ;\n    else if (truncation(utx, tausq) < .2 * acc1)\n    {\n      sigsq = sigsq + tausq;\n      findu(&utx, .25 * acc1);\n      trace[5] = sqrt(tausq);\n    }\n  }\n  if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n  trace[4] = utx;  acc1 = 0.5 * acc1;\n\n  /* find RANGE of distribution, quit if outside this */\nl1:\n  d1 = ctff(acc1, &up) - c;\n  if (d1 < 0.0) { qfval = 1.0; goto endofproc; }\n  if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n  d2 = c - ctff(acc1, &un);\n  if (d2 < 0.0) { qfval = 0.0; goto endofproc; }\n  if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n  /* find integration interval */\n  intv = 2.0 * pi / ((d1 > d2) ? d1 : d2);\n  /* calculate number of terms required for main and\n     auxillary integrations */\n  xnt = utx / intv;  xntm = 3.0 / sqrt(acc1);\n  if (xnt > xntm * 1.5)\n  {\n    /* parameters for auxillary integration */\n    if (xntm > xlim) { *ifault = 1; goto endofproc; }\n    ntm = (int)floor(xntm+0.5);\n    intv1 = utx / ntm;  x = 2.0 * pi / intv1;\n    if (x <= fabs(c)) goto l2;\n    /* calculate convergence factor */\n    tausq = .33 * acc1 / (1.1 * (cfe(c - x) + cfe(c + x)));\n    if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n    if (fail) goto l2;\n    acc1 = .67 * acc1;\n    /* auxillary integration */\n    integrate(ntm, intv1, tausq, FALSE );\n    if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n    xlim = xlim - xntm;  sigsq = sigsq + tausq;\n    trace[2] = trace[2] + 1; trace[1] = trace[1] + ntm + 1;\n    /* find truncation point with new convergence factor */\n    findu(&utx, .25 * acc1);  acc1 = 0.75 * acc1;\n    if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n    goto l1;\n  }\n\n  /* main integration */\nl2:\n  trace[3] = intv;\n  if (xnt > xlim) { *ifault = 1; goto endofproc; }\n  nt = (int)floor(xnt+0.5);\n  integrate(nt, intv, 0.0, TRUE );\n  if (env != 0) { *ifault=4; goto endofproc; } // JM edit\n  trace[2] = trace[2] + 1; trace[1] = trace[1] + nt + 1;\n  qfval = 0.5 - intl;\n  trace[0] = ersm;\n\n  /* test whether round-off error could be significant\n     allow for radix 8 or 16 machines */\n  up=ersm; x = up + acc / 10.0;\n  for (j=0;j<4;j++) { if (rats[j] * x == rats[j] * up) *ifault = 2; }\n\nendofproc :\n  free((char*)th);\n  trace[6] = (double)count;\n  return qfval;\n}\n\n#ifdef __cplusplus\n}\n#endif\n\n"
  },
  {
    "path": "external_libs/qf/qfc.h",
    "content": "#ifndef QFC_H\n#define QFC_H\n\n//#define UseDouble 0             /* all floating point double */\n\n#define TRUE  1\n#define FALSE 0\ntypedef int BOOL;\n\n#define pi 3.14159265358979\n#define log28 .0866  /*  log(2.0) / 8.0  */\n\n\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\n  double qf(double*,double*,int*,int,double,double,int,double,double*,int*);\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "external_libs/quadpack/Makefile",
    "content": "FLAG=-O2 -DNDEBUG -fPIC -std=legacy\nAR   ?= ar\nFC   ?= gfortran\n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S),Darwin)\n FLAG += -arch x86_64\nendif\n\nOBJECTS = $(patsubst %.f,%.o,$(wildcard ./*.f))\n\nlibquad.a: ${OBJECTS}\n\t${AR} rcs $@ $^ \n\n%.o: %.f\n\t${FC} $(FLAG) -c $<\n\nclean:\n\trm -f *.o *.a\n"
  },
  {
    "path": "external_libs/quadpack/README.md",
    "content": "Fortran code to perform numerical integration.\n\nCode was obtained from netlib (http://www.netlib.org/quadpack/; http://netlib.org/blas/d1mach.f).\n"
  },
  {
    "path": "external_libs/quadpack/d1mach.f",
    "content": "      DOUBLE PRECISION FUNCTION D1MACH(I)\n      INTEGER I\nC\nC  DOUBLE-PRECISION MACHINE CONSTANTS\nC  D1MACH( 1) = B**(EMIN-1), THE SMALLEST POSITIVE MAGNITUDE.\nC  D1MACH( 2) = B**EMAX*(1 - B**(-T)), THE LARGEST MAGNITUDE.\nC  D1MACH( 3) = B**(-T), THE SMALLEST RELATIVE SPACING.\nC  D1MACH( 4) = B**(1-T), THE LARGEST RELATIVE SPACING.\nC  D1MACH( 5) = LOG10(B)\nC\n      INTEGER SMALL(2)\n      INTEGER LARGE(2)\n      INTEGER RIGHT(2)\n      INTEGER DIVER(2)\n      INTEGER LOG10(2)\n      INTEGER SC, CRAY1(38), J\n      COMMON /D9MACH/ CRAY1\n      SAVE SMALL, LARGE, RIGHT, DIVER, LOG10, SC\n      DOUBLE PRECISION DMACH(5)\n      EQUIVALENCE (DMACH(1),SMALL(1))\n      EQUIVALENCE (DMACH(2),LARGE(1))\n      EQUIVALENCE (DMACH(3),RIGHT(1))\n      EQUIVALENCE (DMACH(4),DIVER(1))\n      EQUIVALENCE (DMACH(5),LOG10(1))\nC  THIS VERSION ADAPTS AUTOMATICALLY TO MOST CURRENT MACHINES.\nC  R1MACH CAN HANDLE AUTO-DOUBLE COMPILING, BUT THIS VERSION OF\nC  D1MACH DOES NOT, BECAUSE WE DO NOT HAVE QUAD CONSTANTS FOR\nC  MANY MACHINES YET.\nC  TO COMPILE ON OLDER MACHINES, ADD A C IN COLUMN 1\nC  ON THE NEXT LINE\n      DATA SC/0/\nC  AND REMOVE THE C FROM COLUMN 1 IN ONE OF THE SECTIONS BELOW.\nC  CONSTANTS FOR EVEN OLDER MACHINES CAN BE OBTAINED BY\nC          mail netlib@research.bell-labs.com\nC          send old1mach from blas\nC  PLEASE SEND CORRECTIONS TO dmg OR ehg@bell-labs.com.\nC\nC     MACHINE CONSTANTS FOR THE HONEYWELL DPS 8/70 SERIES.\nC      DATA SMALL(1),SMALL(2) / O402400000000, O000000000000 /\nC      DATA LARGE(1),LARGE(2) / O376777777777, O777777777777 /\nC      DATA RIGHT(1),RIGHT(2) / O604400000000, O000000000000 /\nC      DATA DIVER(1),DIVER(2) / O606400000000, O000000000000 /\nC      DATA LOG10(1),LOG10(2) / O776464202324, O117571775714 /, SC/987/\nC\nC     MACHINE CONSTANTS FOR PDP-11 FORTRANS SUPPORTING\nC     32-BIT INTEGERS.\nC      DATA SMALL(1),SMALL(2) /    8388608,           0 /\nC      DATA LARGE(1),LARGE(2) / 2147483647,          -1 /\nC      DATA RIGHT(1),RIGHT(2) /  612368384,           0 /\nC      DATA DIVER(1),DIVER(2) /  620756992,           0 /\nC      DATA LOG10(1),LOG10(2) / 1067065498, -2063872008 /, SC/987/\nC\nC     MACHINE CONSTANTS FOR THE UNIVAC 1100 SERIES.\nC      DATA SMALL(1),SMALL(2) / O000040000000, O000000000000 /\nC      DATA LARGE(1),LARGE(2) / O377777777777, O777777777777 /\nC      DATA RIGHT(1),RIGHT(2) / O170540000000, O000000000000 /\nC      DATA DIVER(1),DIVER(2) / O170640000000, O000000000000 /\nC      DATA LOG10(1),LOG10(2) / O177746420232, O411757177572 /, SC/987/\nC\nC     ON FIRST CALL, IF NO DATA UNCOMMENTED, TEST MACHINE TYPES.\n      IF (SC .NE. 987) THEN\n         DMACH(1) = 1.D13\n         IF (      SMALL(1) .EQ. 1117925532\n     *       .AND. SMALL(2) .EQ. -448790528) THEN\n*           *** IEEE BIG ENDIAN ***\n            SMALL(1) = 1048576\n            SMALL(2) = 0\n            LARGE(1) = 2146435071\n            LARGE(2) = -1\n            RIGHT(1) = 1017118720\n            RIGHT(2) = 0\n            DIVER(1) = 1018167296\n            DIVER(2) = 0\n            LOG10(1) = 1070810131\n            LOG10(2) = 1352628735\n         ELSE IF ( SMALL(2) .EQ. 1117925532\n     *       .AND. SMALL(1) .EQ. -448790528) THEN\n*           *** IEEE LITTLE ENDIAN ***\n            SMALL(2) = 1048576\n            SMALL(1) = 0\n            LARGE(2) = 2146435071\n            LARGE(1) = -1\n            RIGHT(2) = 1017118720\n            RIGHT(1) = 0\n            DIVER(2) = 1018167296\n            DIVER(1) = 0\n            LOG10(2) = 1070810131\n            LOG10(1) = 1352628735\n         ELSE IF ( SMALL(1) .EQ. -2065213935\n     *       .AND. SMALL(2) .EQ. 10752) THEN\n*               *** VAX WITH D_FLOATING ***\n            SMALL(1) = 128\n            SMALL(2) = 0\n            LARGE(1) = -32769\n            LARGE(2) = -1\n            RIGHT(1) = 9344\n            RIGHT(2) = 0\n            DIVER(1) = 9472\n            DIVER(2) = 0\n            LOG10(1) = 546979738\n            LOG10(2) = -805796613\n         ELSE IF ( SMALL(1) .EQ. 1267827943\n     *       .AND. SMALL(2) .EQ. 704643072) THEN\n*               *** IBM MAINFRAME ***\n            SMALL(1) = 1048576\n            SMALL(2) = 0\n            LARGE(1) = 2147483647\n            LARGE(2) = -1\n            RIGHT(1) = 856686592\n            RIGHT(2) = 0\n            DIVER(1) = 873463808\n            DIVER(2) = 0\n            LOG10(1) = 1091781651\n            LOG10(2) = 1352628735\n         ELSE IF ( SMALL(1) .EQ. 1120022684\n     *       .AND. SMALL(2) .EQ. -448790528) THEN\n*           *** CONVEX C-1 ***\n            SMALL(1) = 1048576\n            SMALL(2) = 0\n            LARGE(1) = 2147483647\n            LARGE(2) = -1\n            RIGHT(1) = 1019215872\n            RIGHT(2) = 0\n            DIVER(1) = 1020264448\n            DIVER(2) = 0\n            LOG10(1) = 1072907283\n            LOG10(2) = 1352628735\n         ELSE IF ( SMALL(1) .EQ. 815547074\n     *       .AND. SMALL(2) .EQ. 58688) THEN\n*           *** VAX G-FLOATING ***\n            SMALL(1) = 16\n            SMALL(2) = 0\n            LARGE(1) = -32769\n            LARGE(2) = -1\n            RIGHT(1) = 15552\n            RIGHT(2) = 0\n            DIVER(1) = 15568\n            DIVER(2) = 0\n            LOG10(1) = 1142112243\n            LOG10(2) = 2046775455\n         ELSE\n            DMACH(2) = 1.D27 + 1\n            DMACH(3) = 1.D27\n            LARGE(2) = LARGE(2) - RIGHT(2)\n            IF (LARGE(2) .EQ. 64 .AND. SMALL(2) .EQ. 0) THEN\n               CRAY1(1) = 67291416\n               DO 10 J = 1, 20\n                  CRAY1(J+1) = CRAY1(J) + CRAY1(J)\n 10               CONTINUE\n               CRAY1(22) = CRAY1(21) + 321322\n               DO 20 J = 22, 37\n                  CRAY1(J+1) = CRAY1(J) + CRAY1(J)\n 20               CONTINUE\n               IF (CRAY1(38) .EQ. SMALL(1)) THEN\n*                  *** CRAY ***\n                  CALL I1MCRY(SMALL(1), J, 8285, 8388608, 0)\n                  SMALL(2) = 0\n                  CALL I1MCRY(LARGE(1), J, 24574, 16777215, 16777215)\n                  CALL I1MCRY(LARGE(2), J, 0, 16777215, 16777214)\n                  CALL I1MCRY(RIGHT(1), J, 16291, 8388608, 0)\n                  RIGHT(2) = 0\n                  CALL I1MCRY(DIVER(1), J, 16292, 8388608, 0)\n                  DIVER(2) = 0\n                  CALL I1MCRY(LOG10(1), J, 16383, 10100890, 8715215)\n                  CALL I1MCRY(LOG10(2), J, 0, 16226447, 9001388)\n               ELSE\n                  WRITE(*,9000)\n                  STOP 779\n                  END IF\n            ELSE\n               WRITE(*,9000)\n               STOP 779\n               END IF\n            END IF\n         SC = 987\n         END IF\n*    SANITY CHECK\n      IF (DMACH(4) .GE. 1.0D0) STOP 778\n      IF (I .LT. 1 .OR. I .GT. 5) THEN\n         WRITE(*,*) 'D1MACH(I): I =',I,' is out of bounds.'\n         STOP\n         END IF\n      D1MACH = DMACH(I)\n      RETURN\n 9000 FORMAT(/' Adjust D1MACH by uncommenting data statements'/\n     *' appropriate for your machine.')\n* /* Standard C source for D1MACH -- remove the * in column 1 */\n*#include <stdio.h>\n*#include <float.h>\n*#include <math.h>\n*double d1mach_(long *i)\n*{\n*\tswitch(*i){\n*\t  case 1: return DBL_MIN;\n*\t  case 2: return DBL_MAX;\n*\t  case 3: return DBL_EPSILON/FLT_RADIX;\n*\t  case 4: return DBL_EPSILON;\n*\t  case 5: return log10((double)FLT_RADIX);\n*\t  }\n*\tfprintf(stderr, \"invalid argument: d1mach(%ld)\\n\", *i);\n*\texit(1); return 0; /* some compilers demand return values */\n*}\n      END\n      SUBROUTINE I1MCRY(A, A1, B, C, D)\n**** SPECIAL COMPUTATION FOR OLD CRAY MACHINES ****\n      INTEGER A, A1, B, C, D\n      A1 = 16777216*B + C\n      A = 16777216*A1 + D\n      END\n"
  },
  {
    "path": "external_libs/quadpack/dqags.f",
    "content": "      subroutine dqags(f,a,b,epsabs,epsrel,result,abserr,neval,ier,\n     *   limit,lenw,last,iwork,work)\nc***begin prologue  dqags\nc***date written   800101   (yymmdd)\nc***revision date  830518   (yymmdd)\nc***category no.  h2a1a1\nc***keywords  automatic integrator, general-purpose,\nc             (end-point) singularities, extrapolation,\nc             globally adaptive\nc***author  piessens,robert,appl. math. & progr. div. - k.u.leuven\nc           de doncker,elise,appl. math. & prog. div. - k.u.leuven\nc***purpose  the routine calculates an approximation result to a given\nc            definite integral  i = integral of f over (a,b),\nc            hopefully satisfying following claim for accuracy\nc            abs(i-result).le.max(epsabs,epsrel*abs(i)).\nc***description\nc\nc        computation of a definite integral\nc        standard fortran subroutine\nc        double precision version\nc\nc\nc        parameters\nc         on entry\nc            f      - double precision\nc                     function subprogram defining the integrand\nc                     function f(x). the actual name for f needs to be\nc                     declared e x t e r n a l in the driver program.\nc\nc            a      - double precision\nc                     lower limit of integration\nc\nc            b      - double precision\nc                     upper limit of integration\nc\nc            epsabs - double precision\nc                     absolute accuracy requested\nc            epsrel - double precision\nc                     relative accuracy requested\nc                     if  epsabs.le.0\nc                     and epsrel.lt.max(50*rel.mach.acc.,0.5d-28),\nc                     the routine will end with ier = 6.\nc\nc         on return\nc            result - double precision\nc                     approximation to the integral\nc\nc            abserr - double precision\nc                     estimate of the modulus of the absolute error,\nc                     which should equal or exceed abs(i-result)\nc\nc            neval  - integer\nc                     number of integrand evaluations\nc\nc            ier    - integer\nc                     ier = 0 normal and reliable termination of the\nc                             routine. it is assumed that the requested\nc                             accuracy has been achieved.\nc                     ier.gt.0 abnormal termination of the routine\nc                             the estimates for integral and error are\nc                             less reliable. it is assumed that the\nc                             requested accuracy has not been achieved.\nc            error messages\nc                     ier = 1 maximum number of subdivisions allowed\nc                             has been achieved. one can allow more sub-\nc                             divisions by increasing the value of limit\nc                             (and taking the according dimension\nc                             adjustments into account. however, if\nc                             this yields no improvement it is advised\nc                             to analyze the integrand in order to\nc                             determine the integration difficulties. if\nc                             the position of a local difficulty can be\nc                             determined (e.g. singularity,\nc                             discontinuity within the interval) one\nc                             will probably gain from splitting up the\nc                             interval at this point and calling the\nc                             integrator on the subranges. if possible,\nc                             an appropriate special-purpose integrator\nc                             should be used, which is designed for\nc                             handling the type of difficulty involved.\nc                         = 2 the occurrence of roundoff error is detec-\nc                             ted, which prevents the requested\nc                             tolerance from being achieved.\nc                             the error may be under-estimated.\nc                         = 3 extremely bad integrand behaviour\nc                             occurs at some points of the integration\nc                             interval.\nc                         = 4 the algorithm does not converge.\nc                             roundoff error is detected in the\nc                             extrapolation table. it is presumed that\nc                             the requested tolerance cannot be\nc                             achieved, and that the returned result is\nc                             the best which can be obtained.\nc                         = 5 the integral is probably divergent, or\nc                             slowly convergent. it must be noted that\nc                             divergence can occur with any other value\nc                             of ier.\nc                         = 6 the input is invalid, because\nc                             (epsabs.le.0 and\nc                              epsrel.lt.max(50*rel.mach.acc.,0.5d-28)\nc                             or limit.lt.1 or lenw.lt.limit*4.\nc                             result, abserr, neval, last are set to\nc                             zero.except when limit or lenw is invalid,\nc                             iwork(1), work(limit*2+1) and\nc                             work(limit*3+1) are set to zero, work(1)\nc                             is set to a and work(limit+1) to b.\nc\nc         dimensioning parameters\nc            limit - integer\nc                    dimensioning parameter for iwork\nc                    limit determines the maximum number of subintervals\nc                    in the partition of the given integration interval\nc                    (a,b), limit.ge.1.\nc                    if limit.lt.1, the routine will end with ier = 6.\nc\nc            lenw  - integer\nc                    dimensioning parameter for work\nc                    lenw must be at least limit*4.\nc                    if lenw.lt.limit*4, the routine will end\nc                    with ier = 6.\nc\nc            last  - integer\nc                    on return, last equals the number of subintervals\nc                    produced in the subdivision process, detemines the\nc                    number of significant elements actually in the work\nc                    arrays.\nc\nc         work arrays\nc            iwork - integer\nc                    vector of dimension at least limit, the first k\nc                    elements of which contain pointers\nc                    to the error estimates over the subintervals\nc                    such that work(limit*3+iwork(1)),... ,\nc                    work(limit*3+iwork(k)) form a decreasing\nc                    sequence, with k = last if last.le.(limit/2+2),\nc                    and k = limit+1-last otherwise\nc\nc            work  - double precision\nc                    vector of dimension at least lenw\nc                    on return\nc                    work(1), ..., work(last) contain the left\nc                     end-points of the subintervals in the\nc                     partition of (a,b),\nc                    work(limit+1), ..., work(limit+last) contain\nc                     the right end-points,\nc                    work(limit*2+1), ..., work(limit*2+last) contain\nc                     the integral approximations over the subintervals,\nc                    work(limit*3+1), ..., work(limit*3+last)\nc                     contain the error estimates.\nc\nc***references  (none)\nc***routines called  dqagse,xerror\nc***end prologue  dqags\nc\nc\n      double precision a,abserr,b,epsabs,epsrel,f,result,work\n      integer ier,iwork,last,lenw,limit,lvl,l1,l2,l3,neval\nc\n      dimension iwork(limit),work(lenw)\nc\n      external f\nc\nc         check validity of limit and lenw.\nc\nc***first executable statement  dqags\n      ier = 6\n      neval = 0\n      last = 0\n      result = 0.0d+00\n      abserr = 0.0d+00\n      if(limit.lt.1.or.lenw.lt.limit*4) go to 10\nc\nc         prepare call for dqagse.\nc\n      l1 = limit+1\n      l2 = limit+l1\n      l3 = limit+l2\nc\n      call dqagse(f,a,b,epsabs,epsrel,limit,result,abserr,neval,\n     *  ier,work(1),work(l1),work(l2),work(l3),iwork,last)\nc\nc         call error handler if necessary.\nc\n      lvl = 0\n10    if(ier.eq.6) lvl = 1\nc      Commented out by JMbatchou (11/19/2021)      \nc      if(ier.ne.0) call xerror(26habnormal return from dqags,26,ier,lvl)\n      return\n      end\n"
  },
  {
    "path": "external_libs/quadpack/dqagse.f",
    "content": "      subroutine dqagse(f,a,b,epsabs,epsrel,limit,result,abserr,neval,\n     *   ier,alist,blist,rlist,elist,iord,last)\nc***begin prologue  dqagse\nc***date written   800101   (yymmdd)\nc***revision date  830518   (yymmdd)\nc***category no.  h2a1a1\nc***keywords  automatic integrator, general-purpose,\nc             (end point) singularities, extrapolation,\nc             globally adaptive\nc***author  piessens,robert,appl. math. & progr. div. - k.u.leuven\nc           de doncker,elise,appl. math. & progr. div. - k.u.leuven\nc***purpose  the routine calculates an approximation result to a given\nc            definite integral i = integral of f over (a,b),\nc            hopefully satisfying following claim for accuracy\nc            abs(i-result).le.max(epsabs,epsrel*abs(i)).\nc***description\nc\nc        computation of a definite integral\nc        standard fortran subroutine\nc        double precision version\nc\nc        parameters\nc         on entry\nc            f      - double precision\nc                     function subprogram defining the integrand\nc                     function f(x). the actual name for f needs to be\nc                     declared e x t e r n a l in the driver program.\nc\nc            a      - double precision\nc                     lower limit of integration\nc\nc            b      - double precision\nc                     upper limit of integration\nc\nc            epsabs - double precision\nc                     absolute accuracy requested\nc            epsrel - double precision\nc                     relative accuracy requested\nc                     if  epsabs.le.0\nc                     and epsrel.lt.max(50*rel.mach.acc.,0.5d-28),\nc                     the routine will end with ier = 6.\nc\nc            limit  - integer\nc                     gives an upperbound on the number of subintervals\nc                     in the partition of (a,b)\nc\nc         on return\nc            result - double precision\nc                     approximation to the integral\nc\nc            abserr - double precision\nc                     estimate of the modulus of the absolute error,\nc                     which should equal or exceed abs(i-result)\nc\nc            neval  - integer\nc                     number of integrand evaluations\nc\nc            ier    - integer\nc                     ier = 0 normal and reliable termination of the\nc                             routine. it is assumed that the requested\nc                             accuracy has been achieved.\nc                     ier.gt.0 abnormal termination of the routine\nc                             the estimates for integral and error are\nc                             less reliable. it is assumed that the\nc                             requested accuracy has not been achieved.\nc            error messages\nc                         = 1 maximum number of subdivisions allowed\nc                             has been achieved. one can allow more sub-\nc                             divisions by increasing the value of limit\nc                             (and taking the according dimension\nc                             adjustments into account). however, if\nc                             this yields no improvement it is advised\nc                             to analyze the integrand in order to\nc                             determine the integration difficulties. if\nc                             the position of a local difficulty can be\nc                             determined (e.g. singularity,\nc                             discontinuity within the interval) one\nc                             will probably gain from splitting up the\nc                             interval at this point and calling the\nc                             integrator on the subranges. if possible,\nc                             an appropriate special-purpose integrator\nc                             should be used, which is designed for\nc                             handling the type of difficulty involved.\nc                         = 2 the occurrence of roundoff error is detec-\nc                             ted, which prevents the requested\nc                             tolerance from being achieved.\nc                             the error may be under-estimated.\nc                         = 3 extremely bad integrand behaviour\nc                             occurs at some points of the integration\nc                             interval.\nc                         = 4 the algorithm does not converge.\nc                             roundoff error is detected in the\nc                             extrapolation table.\nc                             it is presumed that the requested\nc                             tolerance cannot be achieved, and that the\nc                             returned result is the best which can be\nc                             obtained.\nc                         = 5 the integral is probably divergent, or\nc                             slowly convergent. it must be noted that\nc                             divergence can occur with any other value\nc                             of ier.\nc                         = 6 the input is invalid, because\nc                             epsabs.le.0 and\nc                             epsrel.lt.max(50*rel.mach.acc.,0.5d-28).\nc                             result, abserr, neval, last, rlist(1),\nc                             iord(1) and elist(1) are set to zero.\nc                             alist(1) and blist(1) are set to a and b\nc                             respectively.\nc\nc            alist  - double precision\nc                     vector of dimension at least limit, the first\nc                      last  elements of which are the left end points\nc                     of the subintervals in the partition of the\nc                     given integration range (a,b)\nc\nc            blist  - double precision\nc                     vector of dimension at least limit, the first\nc                      last  elements of which are the right end points\nc                     of the subintervals in the partition of the given\nc                     integration range (a,b)\nc\nc            rlist  - double precision\nc                     vector of dimension at least limit, the first\nc                      last  elements of which are the integral\nc                     approximations on the subintervals\nc\nc            elist  - double precision\nc                     vector of dimension at least limit, the first\nc                      last  elements of which are the moduli of the\nc                     absolute error estimates on the subintervals\nc\nc            iord   - integer\nc                     vector of dimension at least limit, the first k\nc                     elements of which are pointers to the\nc                     error estimates over the subintervals,\nc                     such that elist(iord(1)), ..., elist(iord(k))\nc                     form a decreasing sequence, with k = last\nc                     if last.le.(limit/2+2), and k = limit+1-last\nc                     otherwise\nc\nc            last   - integer\nc                     number of subintervals actually produced in the\nc                     subdivision process\nc\nc***references  (none)\nc***routines called  d1mach,dqelg,dqk21,dqpsrt\nc***end prologue  dqagse\nc\n      double precision a,abseps,abserr,alist,area,area1,area12,area2,a1,\n     *  a2,b,blist,b1,b2,correc,dabs,defabs,defab1,defab2,d1mach,dmax1,\n     *  dres,elist,epmach,epsabs,epsrel,erlarg,erlast,errbnd,errmax,\n     *  error1,error2,erro12,errsum,ertest,f,oflow,resabs,reseps,result,\n     *  res3la,rlist,rlist2,small,uflow\n      integer id,ier,ierro,iord,iroff1,iroff2,iroff3,jupbnd,k,ksgn,\n     *  ktmin,last,limit,maxerr,neval,nres,nrmax,numrl2\n      logical extrap,noext\nc\n      dimension alist(limit),blist(limit),elist(limit),iord(limit),\n     * res3la(3),rlist(limit),rlist2(52)\nc\n      external f\nc\nc            the dimension of rlist2 is determined by the value of\nc            limexp in subroutine dqelg (rlist2 should be of dimension\nc            (limexp+2) at least).\nc\nc            list of major variables\nc            -----------------------\nc\nc           alist     - list of left end points of all subintervals\nc                       considered up to now\nc           blist     - list of right end points of all subintervals\nc                       considered up to now\nc           rlist(i)  - approximation to the integral over\nc                       (alist(i),blist(i))\nc           rlist2    - array of dimension at least limexp+2 containing\nc                       the part of the epsilon table which is still\nc                       needed for further computations\nc           elist(i)  - error estimate applying to rlist(i)\nc           maxerr    - pointer to the interval with largest error\nc                       estimate\nc           errmax    - elist(maxerr)\nc           erlast    - error on the interval currently subdivided\nc                       (before that subdivision has taken place)\nc           area      - sum of the integrals over the subintervals\nc           errsum    - sum of the errors over the subintervals\nc           errbnd    - requested accuracy max(epsabs,epsrel*\nc                       abs(result))\nc           *****1    - variable for the left interval\nc           *****2    - variable for the right interval\nc           last      - index for subdivision\nc           nres      - number of calls to the extrapolation routine\nc           numrl2    - number of elements currently in rlist2. if an\nc                       appropriate approximation to the compounded\nc                       integral has been obtained it is put in\nc                       rlist2(numrl2) after numrl2 has been increased\nc                       by one.\nc           small     - length of the smallest interval considered up\nc                       to now, multiplied by 1.5\nc           erlarg    - sum of the errors over the intervals larger\nc                       than the smallest interval considered up to now\nc           extrap    - logical variable denoting that the routine is\nc                       attempting to perform extrapolation i.e. before\nc                       subdividing the smallest interval we try to\nc                       decrease the value of erlarg.\nc           noext     - logical variable denoting that extrapolation\nc                       is no longer allowed (true value)\nc\nc            machine dependent constants\nc            ---------------------------\nc\nc           epmach is the largest relative spacing.\nc           uflow is the smallest positive magnitude.\nc           oflow is the largest positive magnitude.\nc\nc***first executable statement  dqagse\n      epmach = d1mach(4)\nc\nc            test on validity of parameters\nc            ------------------------------\n      ier = 0\n      neval = 0\n      last = 0\n      result = 0.0d+00\n      abserr = 0.0d+00\n      alist(1) = a\n      blist(1) = b\n      rlist(1) = 0.0d+00\n      elist(1) = 0.0d+00\n      if(epsabs.le.0.0d+00.and.epsrel.lt.dmax1(0.5d+02*epmach,0.5d-28))\n     *   ier = 6\n      if(ier.eq.6) go to 999\nc\nc           first approximation to the integral\nc           -----------------------------------\nc\n      uflow = d1mach(1)\n      oflow = d1mach(2)\n      ierro = 0\n      call dqk21(f,a,b,result,abserr,defabs,resabs)\nc\nc           test on accuracy.\nc\n      dres = dabs(result)\n      errbnd = dmax1(epsabs,epsrel*dres)\n      last = 1\n      rlist(1) = result\n      elist(1) = abserr\n      iord(1) = 1\n      if(abserr.le.1.0d+02*epmach*defabs.and.abserr.gt.errbnd) ier = 2\n      if(limit.eq.1) ier = 1\n      if(ier.ne.0.or.(abserr.le.errbnd.and.abserr.ne.resabs).or.\n     *  abserr.eq.0.0d+00) go to 140\nc\nc           initialization\nc           --------------\nc\n      rlist2(1) = result\n      errmax = abserr\n      maxerr = 1\n      area = result\n      errsum = abserr\n      abserr = oflow\n      nrmax = 1\n      nres = 0\n      numrl2 = 2\n      ktmin = 0\n      extrap = .false.\n      noext = .false.\n      iroff1 = 0\n      iroff2 = 0\n      iroff3 = 0\n      ksgn = -1\n      if(dres.ge.(0.1d+01-0.5d+02*epmach)*defabs) ksgn = 1\nc\nc           main do-loop\nc           ------------\nc\n      do 90 last = 2,limit\nc\nc           bisect the subinterval with the nrmax-th largest error\nc           estimate.\nc\n        a1 = alist(maxerr)\n        b1 = 0.5d+00*(alist(maxerr)+blist(maxerr))\n        a2 = b1\n        b2 = blist(maxerr)\n        erlast = errmax\n        call dqk21(f,a1,b1,area1,error1,resabs,defab1)\n        call dqk21(f,a2,b2,area2,error2,resabs,defab2)\nc\nc           improve previous approximations to integral\nc           and error and test for accuracy.\nc\n        area12 = area1+area2\n        erro12 = error1+error2\n        errsum = errsum+erro12-errmax\n        area = area+area12-rlist(maxerr)\n        if(defab1.eq.error1.or.defab2.eq.error2) go to 15\n        if(dabs(rlist(maxerr)-area12).gt.0.1d-04*dabs(area12)\n     *  .or.erro12.lt.0.99d+00*errmax) go to 10\n        if(extrap) iroff2 = iroff2+1\n        if(.not.extrap) iroff1 = iroff1+1\n   10   if(last.gt.10.and.erro12.gt.errmax) iroff3 = iroff3+1\n   15   rlist(maxerr) = area1\n        rlist(last) = area2\n        errbnd = dmax1(epsabs,epsrel*dabs(area))\nc\nc           test for roundoff error and eventually set error flag.\nc\n        if(iroff1+iroff2.ge.10.or.iroff3.ge.20) ier = 2\n        if(iroff2.ge.5) ierro = 3\nc\nc           set error flag in the case that the number of subintervals\nc           equals limit.\nc\n        if(last.eq.limit) ier = 1\nc\nc           set error flag in the case of bad integrand behaviour\nc           at a point of the integration range.\nc\n        if(dmax1(dabs(a1),dabs(b2)).le.(0.1d+01+0.1d+03*epmach)*\n     *  (dabs(a2)+0.1d+04*uflow)) ier = 4\nc\nc           append the newly-created intervals to the list.\nc\n        if(error2.gt.error1) go to 20\n        alist(last) = a2\n        blist(maxerr) = b1\n        blist(last) = b2\n        elist(maxerr) = error1\n        elist(last) = error2\n        go to 30\n   20   alist(maxerr) = a2\n        alist(last) = a1\n        blist(last) = b1\n        rlist(maxerr) = area2\n        rlist(last) = area1\n        elist(maxerr) = error2\n        elist(last) = error1\nc\nc           call subroutine dqpsrt to maintain the descending ordering\nc           in the list of error estimates and select the subinterval\nc           with nrmax-th largest error estimate (to be bisected next).\nc\n   30   call dqpsrt(limit,last,maxerr,errmax,elist,iord,nrmax)\nc ***jump out of do-loop\n        if(errsum.le.errbnd) go to 115\nc ***jump out of do-loop\n        if(ier.ne.0) go to 100\n        if(last.eq.2) go to 80\n        if(noext) go to 90\n        erlarg = erlarg-erlast\n        if(dabs(b1-a1).gt.small) erlarg = erlarg+erro12\n        if(extrap) go to 40\nc\nc           test whether the interval to be bisected next is the\nc           smallest interval.\nc\n        if(dabs(blist(maxerr)-alist(maxerr)).gt.small) go to 90\n        extrap = .true.\n        nrmax = 2\n   40   if(ierro.eq.3.or.erlarg.le.ertest) go to 60\nc\nc           the smallest interval has the largest error.\nc           before bisecting decrease the sum of the errors over the\nc           larger intervals (erlarg) and perform extrapolation.\nc\n        id = nrmax\n        jupbnd = last\n        if(last.gt.(2+limit/2)) jupbnd = limit+3-last\n        do 50 k = id,jupbnd\n          maxerr = iord(nrmax)\n          errmax = elist(maxerr)\nc ***jump out of do-loop\n          if(dabs(blist(maxerr)-alist(maxerr)).gt.small) go to 90\n          nrmax = nrmax+1\n   50   continue\nc\nc           perform extrapolation.\nc\n   60   numrl2 = numrl2+1\n        rlist2(numrl2) = area\n        call dqelg(numrl2,rlist2,reseps,abseps,res3la,nres)\n        ktmin = ktmin+1\n        if(ktmin.gt.5.and.abserr.lt.0.1d-02*errsum) ier = 5\n        if(abseps.ge.abserr) go to 70\n        ktmin = 0\n        abserr = abseps\n        result = reseps\n        correc = erlarg\n        ertest = dmax1(epsabs,epsrel*dabs(reseps))\nc ***jump out of do-loop\n        if(abserr.le.ertest) go to 100\nc\nc           prepare bisection of the smallest interval.\nc\n   70   if(numrl2.eq.1) noext = .true.\n        if(ier.eq.5) go to 100\n        maxerr = iord(1)\n        errmax = elist(maxerr)\n        nrmax = 1\n        extrap = .false.\n        small = small*0.5d+00\n        erlarg = errsum\n        go to 90\n   80   small = dabs(b-a)*0.375d+00\n        erlarg = errsum\n        ertest = errbnd\n        rlist2(2) = area\n   90 continue\nc\nc           set final result and error estimate.\nc           ------------------------------------\nc\n  100 if(abserr.eq.oflow) go to 115\n      if(ier+ierro.eq.0) go to 110\n      if(ierro.eq.3) abserr = abserr+correc\n      if(ier.eq.0) ier = 3\n      if(result.ne.0.0d+00.and.area.ne.0.0d+00) go to 105\n      if(abserr.gt.errsum) go to 115\n      if(area.eq.0.0d+00) go to 130\n      go to 110\n  105 if(abserr/dabs(result).gt.errsum/dabs(area)) go to 115\nc\nc           test on divergence.\nc\n  110 if(ksgn.eq.(-1).and.dmax1(dabs(result),dabs(area)).le.\n     * defabs*0.1d-01) go to 130\n      if(0.1d-01.gt.(result/area).or.(result/area).gt.0.1d+03\n     * .or.errsum.gt.dabs(area)) ier = 6\n      go to 130\nc\nc           compute global integral sum.\nc\n  115 result = 0.0d+00\n      do 120 k = 1,last\n         result = result+rlist(k)\n  120 continue\n      abserr = errsum\n  130 if(ier.gt.2) ier = ier-1\n  140 neval = 42*last-21\n  999 return\n      end\n"
  },
  {
    "path": "external_libs/quadpack/dqelg.f",
    "content": "      subroutine dqelg(n,epstab,result,abserr,res3la,nres)\nc***begin prologue  dqelg\nc***refer to  dqagie,dqagoe,dqagpe,dqagse\nc***routines called  d1mach\nc***revision date  830518   (yymmdd)\nc***keywords  epsilon algorithm, convergence acceleration,\nc             extrapolation\nc***author  piessens,robert,appl. math. & progr. div. - k.u.leuven\nc           de doncker,elise,appl. math & progr. div. - k.u.leuven\nc***purpose  the routine determines the limit of a given sequence of\nc            approximations, by means of the epsilon algorithm of\nc            p.wynn. an estimate of the absolute error is also given.\nc            the condensed epsilon table is computed. only those\nc            elements needed for the computation of the next diagonal\nc            are preserved.\nc***description\nc\nc           epsilon algorithm\nc           standard fortran subroutine\nc           double precision version\nc\nc           parameters\nc              n      - integer\nc                       epstab(n) contains the new element in the\nc                       first column of the epsilon table.\nc\nc              epstab - double precision\nc                       vector of dimension 52 containing the elements\nc                       of the two lower diagonals of the triangular\nc                       epsilon table. the elements are numbered\nc                       starting at the right-hand corner of the\nc                       triangle.\nc\nc              result - double precision\nc                       resulting approximation to the integral\nc\nc              abserr - double precision\nc                       estimate of the absolute error computed from\nc                       result and the 3 previous results\nc\nc              res3la - double precision\nc                       vector of dimension 3 containing the last 3\nc                       results\nc\nc              nres   - integer\nc                       number of calls to the routine\nc                       (should be zero at first call)\nc\nc***end prologue  dqelg\nc\n      double precision abserr,dabs,delta1,delta2,delta3,dmax1,d1mach,\n     *  epmach,epsinf,epstab,error,err1,err2,err3,e0,e1,e1abs,e2,e3,\n     *  oflow,res,result,res3la,ss,tol1,tol2,tol3\n      integer i,ib,ib2,ie,indx,k1,k2,k3,limexp,n,newelm,nres,num\n      dimension epstab(52),res3la(3)\nc\nc           list of major variables\nc           -----------------------\nc\nc           e0     - the 4 elements on which the computation of a new\nc           e1       element in the epsilon table is based\nc           e2\nc           e3                 e0\nc                        e3    e1    new\nc                              e2\nc           newelm - number of elements to be computed in the new\nc                    diagonal\nc           error  - error = abs(e1-e0)+abs(e2-e1)+abs(new-e2)\nc           result - the element in the new diagonal with least value\nc                    of error\nc\nc           machine dependent constants\nc           ---------------------------\nc\nc           epmach is the largest relative spacing.\nc           oflow is the largest positive magnitude.\nc           limexp is the maximum number of elements the epsilon\nc           table can contain. if this number is reached, the upper\nc           diagonal of the epsilon table is deleted.\nc\nc***first executable statement  dqelg\n      epmach = d1mach(4)\n      oflow = d1mach(2)\n      nres = nres+1\n      abserr = oflow\n      result = epstab(n)\n      if(n.lt.3) go to 100\n      limexp = 50\n      epstab(n+2) = epstab(n)\n      newelm = (n-1)/2\n      epstab(n) = oflow\n      num = n\n      k1 = n\n      do 40 i = 1,newelm\n        k2 = k1-1\n        k3 = k1-2\n        res = epstab(k1+2)\n        e0 = epstab(k3)\n        e1 = epstab(k2)\n        e2 = res\n        e1abs = dabs(e1)\n        delta2 = e2-e1\n        err2 = dabs(delta2)\n        tol2 = dmax1(dabs(e2),e1abs)*epmach\n        delta3 = e1-e0\n        err3 = dabs(delta3)\n        tol3 = dmax1(e1abs,dabs(e0))*epmach\n        if(err2.gt.tol2.or.err3.gt.tol3) go to 10\nc\nc           if e0, e1 and e2 are equal to within machine\nc           accuracy, convergence is assumed.\nc           result = e2\nc           abserr = abs(e1-e0)+abs(e2-e1)\nc\n        result = res\n        abserr = err2+err3\nc ***jump out of do-loop\n        go to 100\n   10   e3 = epstab(k1)\n        epstab(k1) = e1\n        delta1 = e1-e3\n        err1 = dabs(delta1)\n        tol1 = dmax1(e1abs,dabs(e3))*epmach\nc\nc           if two elements are very close to each other, omit\nc           a part of the table by adjusting the value of n\nc\n        if(err1.le.tol1.or.err2.le.tol2.or.err3.le.tol3) go to 20\n        ss = 0.1d+01/delta1+0.1d+01/delta2-0.1d+01/delta3\n        epsinf = dabs(ss*e1)\nc\nc           test to detect irregular behaviour in the table, and\nc           eventually omit a part of the table adjusting the value\nc           of n.\nc\n        if(epsinf.gt.0.1d-03) go to 30\n   20   n = i+i-1\nc ***jump out of do-loop\n        go to 50\nc\nc           compute a new element and eventually adjust\nc           the value of result.\nc\n   30   res = e1+0.1d+01/ss\n        epstab(k1) = res\n        k1 = k1-2\n        error = err2+dabs(res-e2)+err3\n        if(error.gt.abserr) go to 40\n        abserr = error\n        result = res\n   40 continue\nc\nc           shift the table.\nc\n   50 if(n.eq.limexp) n = 2*(limexp/2)-1\n      ib = 1\n      if((num/2)*2.eq.num) ib = 2\n      ie = newelm+1\n      do 60 i=1,ie\n        ib2 = ib+2\n        epstab(ib) = epstab(ib2)\n        ib = ib2\n   60 continue\n      if(num.eq.n) go to 80\n      indx = num-n+1\n      do 70 i = 1,n\n        epstab(i)= epstab(indx)\n        indx = indx+1\n   70 continue\n   80 if(nres.ge.4) go to 90\n      res3la(nres) = result\n      abserr = oflow\n      go to 100\nc\nc           compute error estimate\nc\n   90 abserr = dabs(result-res3la(3))+dabs(result-res3la(2))\n     *  +dabs(result-res3la(1))\n      res3la(1) = res3la(2)\n      res3la(2) = res3la(3)\n      res3la(3) = result\n  100 abserr = dmax1(abserr,0.5d+01*epmach*dabs(result))\n      return\n      end\n"
  },
  {
    "path": "external_libs/quadpack/dqk21.f",
    "content": "      subroutine dqk21(f,a,b,result,abserr,resabs,resasc)\nc***begin prologue  dqk21\nc***date written   800101   (yymmdd)\nc***revision date  830518   (yymmdd)\nc***category no.  h2a1a2\nc***keywords  21-point gauss-kronrod rules\nc***author  piessens,robert,appl. math. & progr. div. - k.u.leuven\nc           de doncker,elise,appl. math. & progr. div. - k.u.leuven\nc***purpose  to compute i = integral of f over (a,b), with error\nc                           estimate\nc                       j = integral of abs(f) over (a,b)\nc***description\nc\nc           integration rules\nc           standard fortran subroutine\nc           double precision version\nc\nc           parameters\nc            on entry\nc              f      - double precision\nc                       function subprogram defining the integrand\nc                       function f(x). the actual name for f needs to be\nc                       declared e x t e r n a l in the driver program.\nc\nc              a      - double precision\nc                       lower limit of integration\nc\nc              b      - double precision\nc                       upper limit of integration\nc\nc            on return\nc              result - double precision\nc                       approximation to the integral i\nc                       result is computed by applying the 21-point\nc                       kronrod rule (resk) obtained by optimal addition\nc                       of abscissae to the 10-point gauss rule (resg).\nc\nc              abserr - double precision\nc                       estimate of the modulus of the absolute error,\nc                       which should not exceed abs(i-result)\nc\nc              resabs - double precision\nc                       approximation to the integral j\nc\nc              resasc - double precision\nc                       approximation to the integral of abs(f-i/(b-a))\nc                       over (a,b)\nc\nc***references  (none)\nc***routines called  d1mach\nc***end prologue  dqk21\nc\n      double precision a,absc,abserr,b,centr,dabs,dhlgth,dmax1,dmin1,\n     *  d1mach,epmach,f,fc,fsum,fval1,fval2,fv1,fv2,hlgth,resabs,resasc,\n     *  resg,resk,reskh,result,uflow,wg,wgk,xgk\n      integer j,jtw,jtwm1\n      external f\nc\n      dimension fv1(10),fv2(10),wg(5),wgk(11),xgk(11)\nc\nc           the abscissae and weights are given for the interval (-1,1).\nc           because of symmetry only the positive abscissae and their\nc           corresponding weights are given.\nc\nc           xgk    - abscissae of the 21-point kronrod rule\nc                    xgk(2), xgk(4), ...  abscissae of the 10-point\nc                    gauss rule\nc                    xgk(1), xgk(3), ...  abscissae which are optimally\nc                    added to the 10-point gauss rule\nc\nc           wgk    - weights of the 21-point kronrod rule\nc\nc           wg     - weights of the 10-point gauss rule\nc\nc\nc gauss quadrature weights and kronron quadrature abscissae and weights\nc as evaluated with 80 decimal digit arithmetic by l. w. fullerton,\nc bell labs, nov. 1981.\nc\n      data wg  (  1) / 0.0666713443 0868813759 3568809893 332 d0 /\n      data wg  (  2) / 0.1494513491 5058059314 5776339657 697 d0 /\n      data wg  (  3) / 0.2190863625 1598204399 5534934228 163 d0 /\n      data wg  (  4) / 0.2692667193 0999635509 1226921569 469 d0 /\n      data wg  (  5) / 0.2955242247 1475287017 3892994651 338 d0 /\nc\n      data xgk (  1) / 0.9956571630 2580808073 5527280689 003 d0 /\n      data xgk (  2) / 0.9739065285 1717172007 7964012084 452 d0 /\n      data xgk (  3) / 0.9301574913 5570822600 1207180059 508 d0 /\n      data xgk (  4) / 0.8650633666 8898451073 2096688423 493 d0 /\n      data xgk (  5) / 0.7808177265 8641689706 3717578345 042 d0 /\n      data xgk (  6) / 0.6794095682 9902440623 4327365114 874 d0 /\n      data xgk (  7) / 0.5627571346 6860468333 9000099272 694 d0 /\n      data xgk (  8) / 0.4333953941 2924719079 9265943165 784 d0 /\n      data xgk (  9) / 0.2943928627 0146019813 1126603103 866 d0 /\n      data xgk ( 10) / 0.1488743389 8163121088 4826001129 720 d0 /\n      data xgk ( 11) / 0.0000000000 0000000000 0000000000 000 d0 /\nc\n      data wgk (  1) / 0.0116946388 6737187427 8064396062 192 d0 /\n      data wgk (  2) / 0.0325581623 0796472747 8818972459 390 d0 /\n      data wgk (  3) / 0.0547558965 7435199603 1381300244 580 d0 /\n      data wgk (  4) / 0.0750396748 1091995276 7043140916 190 d0 /\n      data wgk (  5) / 0.0931254545 8369760553 5065465083 366 d0 /\n      data wgk (  6) / 0.1093871588 0229764189 9210590325 805 d0 /\n      data wgk (  7) / 0.1234919762 6206585107 7958109831 074 d0 /\n      data wgk (  8) / 0.1347092173 1147332592 8054001771 707 d0 /\n      data wgk (  9) / 0.1427759385 7706008079 7094273138 717 d0 /\n      data wgk ( 10) / 0.1477391049 0133849137 4841515972 068 d0 /\n      data wgk ( 11) / 0.1494455540 0291690566 4936468389 821 d0 /\nc\nc\nc           list of major variables\nc           -----------------------\nc\nc           centr  - mid point of the interval\nc           hlgth  - half-length of the interval\nc           absc   - abscissa\nc           fval*  - function value\nc           resg   - result of the 10-point gauss formula\nc           resk   - result of the 21-point kronrod formula\nc           reskh  - approximation to the mean value of f over (a,b),\nc                    i.e. to i/(b-a)\nc\nc\nc           machine dependent constants\nc           ---------------------------\nc\nc           epmach is the largest relative spacing.\nc           uflow is the smallest positive magnitude.\nc\nc***first executable statement  dqk21\n      epmach = d1mach(4)\n      uflow = d1mach(1)\nc\n      centr = 0.5d+00*(a+b)\n      hlgth = 0.5d+00*(b-a)\n      dhlgth = dabs(hlgth)\nc\nc           compute the 21-point kronrod approximation to\nc           the integral, and estimate the absolute error.\nc\n      resg = 0.0d+00\n      fc = f(centr)\n      resk = wgk(11)*fc\n      resabs = dabs(resk)\n      do 10 j=1,5\n        jtw = 2*j\n        absc = hlgth*xgk(jtw)\n        fval1 = f(centr-absc)\n        fval2 = f(centr+absc)\n        fv1(jtw) = fval1\n        fv2(jtw) = fval2\n        fsum = fval1+fval2\n        resg = resg+wg(j)*fsum\n        resk = resk+wgk(jtw)*fsum\n        resabs = resabs+wgk(jtw)*(dabs(fval1)+dabs(fval2))\n   10 continue\n      do 15 j = 1,5\n        jtwm1 = 2*j-1\n        absc = hlgth*xgk(jtwm1)\n        fval1 = f(centr-absc)\n        fval2 = f(centr+absc)\n        fv1(jtwm1) = fval1\n        fv2(jtwm1) = fval2\n        fsum = fval1+fval2\n        resk = resk+wgk(jtwm1)*fsum\n        resabs = resabs+wgk(jtwm1)*(dabs(fval1)+dabs(fval2))\n   15 continue\n      reskh = resk*0.5d+00\n      resasc = wgk(11)*dabs(fc-reskh)\n      do 20 j=1,10\n        resasc = resasc+wgk(j)*(dabs(fv1(j)-reskh)+dabs(fv2(j)-reskh))\n   20 continue\n      result = resk*hlgth\n      resabs = resabs*dhlgth\n      resasc = resasc*dhlgth\n      abserr = dabs((resk-resg)*hlgth)\n      if(resasc.ne.0.0d+00.and.abserr.ne.0.0d+00)\n     *  abserr = resasc*dmin1(0.1d+01,(0.2d+03*abserr/resasc)**1.5d+00)\n      if(resabs.gt.uflow/(0.5d+02*epmach)) abserr = dmax1\n     *  ((epmach*0.5d+02)*resabs,abserr)\n      return\n      end\n"
  },
  {
    "path": "external_libs/quadpack/dqpsrt.f",
    "content": "      subroutine dqpsrt(limit,last,maxerr,ermax,elist,iord,nrmax)\nc***begin prologue  dqpsrt\nc***refer to  dqage,dqagie,dqagpe,dqawse\nc***routines called  (none)\nc***revision date  810101   (yymmdd)\nc***keywords  sequential sorting\nc***author  piessens,robert,appl. math. & progr. div. - k.u.leuven\nc           de doncker,elise,appl. math. & progr. div. - k.u.leuven\nc***purpose  this routine maintains the descending ordering in the\nc            list of the local error estimated resulting from the\nc            interval subdivision process. at each call two error\nc            estimates are inserted using the sequential search\nc            method, top-down for the largest error estimate and\nc            bottom-up for the smallest error estimate.\nc***description\nc\nc           ordering routine\nc           standard fortran subroutine\nc           double precision version\nc\nc           parameters (meaning at output)\nc              limit  - integer\nc                       maximum number of error estimates the list\nc                       can contain\nc\nc              last   - integer\nc                       number of error estimates currently in the list\nc\nc              maxerr - integer\nc                       maxerr points to the nrmax-th largest error\nc                       estimate currently in the list\nc\nc              ermax  - double precision\nc                       nrmax-th largest error estimate\nc                       ermax = elist(maxerr)\nc\nc              elist  - double precision\nc                       vector of dimension last containing\nc                       the error estimates\nc\nc              iord   - integer\nc                       vector of dimension last, the first k elements\nc                       of which contain pointers to the error\nc                       estimates, such that\nc                       elist(iord(1)),...,  elist(iord(k))\nc                       form a decreasing sequence, with\nc                       k = last if last.le.(limit/2+2), and\nc                       k = limit+1-last otherwise\nc\nc              nrmax  - integer\nc                       maxerr = iord(nrmax)\nc\nc***end prologue  dqpsrt\nc\n      double precision elist,ermax,errmax,errmin\n      integer i,ibeg,ido,iord,isucc,j,jbnd,jupbn,k,last,limit,maxerr,\n     *  nrmax\n      dimension elist(last),iord(last)\nc\nc           check whether the list contains more than\nc           two error estimates.\nc\nc***first executable statement  dqpsrt\n      if(last.gt.2) go to 10\n      iord(1) = 1\n      iord(2) = 2\n      go to 90\nc\nc           this part of the routine is only executed if, due to a\nc           difficult integrand, subdivision increased the error\nc           estimate. in the normal case the insert procedure should\nc           start after the nrmax-th largest error estimate.\nc\n   10 errmax = elist(maxerr)\n      if(nrmax.eq.1) go to 30\n      ido = nrmax-1\n      do 20 i = 1,ido\n        isucc = iord(nrmax-1)\nc ***jump out of do-loop\n        if(errmax.le.elist(isucc)) go to 30\n        iord(nrmax) = isucc\n        nrmax = nrmax-1\n   20    continue\nc\nc           compute the number of elements in the list to be maintained\nc           in descending order. this number depends on the number of\nc           subdivisions still allowed.\nc\n   30 jupbn = last\n      if(last.gt.(limit/2+2)) jupbn = limit+3-last\n      errmin = elist(last)\nc\nc           insert errmax by traversing the list top-down,\nc           starting comparison from the element elist(iord(nrmax+1)).\nc\n      jbnd = jupbn-1\n      ibeg = nrmax+1\n      if(ibeg.gt.jbnd) go to 50\n      do 40 i=ibeg,jbnd\n        isucc = iord(i)\nc ***jump out of do-loop\n        if(errmax.ge.elist(isucc)) go to 60\n        iord(i-1) = isucc\n   40 continue\n   50 iord(jbnd) = maxerr\n      iord(jupbn) = last\n      go to 90\nc\nc           insert errmin by traversing the list bottom-up.\nc\n   60 iord(i-1) = maxerr\n      k = jbnd\n      do 70 j=i,jbnd\n        isucc = iord(k)\nc ***jump out of do-loop\n        if(errmin.lt.elist(isucc)) go to 80\n        iord(k+1) = isucc\n        k = k-1\n   70 continue\n      iord(i) = last\n      go to 90\n   80 iord(k+1) = last\nc\nc           set maxerr and ermax.\nc\n   90 maxerr = iord(nrmax)\n      ermax = elist(maxerr)\n      return\n      end\n"
  },
  {
    "path": "external_libs/remeta/Makefile",
    "content": "#\n# Makefile to compile REMETA library\n#\nAR         ?= ar\nCXX        ?= g++\nCXXFLAGS    = -O3 -Wall -std=c++17\nCFLAGS      = \nINC         = ../eigen-3.4.0/\n\nifneq ($(strip $(HTSLIB_PATH)),) # defined externally\n\tCXXFLAGS   += -DWITH_HTSLIB\nendif\n\nUNAME_S := $(shell uname -s)\nifeq ($(UNAME_S),Darwin)\n\tCXXFLAGS += -arch x86_64\nendif\n\nOBJECTS       = bgz_writer.o regenie_ld_matrix_writer.o\n\nremeta.a: ${OBJECTS}\n\t${AR} rcs $@ $^\n\n%.o: %.cpp\n\t${CXX} ${CXXFLAGS} -o $@ -c $< -I${INC}\n\nclean:\n\trm -f *.o *.a"
  },
  {
    "path": "external_libs/remeta/README.md",
    "content": "# Helper Classes From Remeta\nThe classes in this folder are used to store sparse compressed covariance/LD\nmatrices from SKAT for meta-analysis. Because they rely on HTSlib, which might\nnot be available on a user's system, they only get compiled if the WITH_HTSLIB\nmacro is set."
  },
  {
    "path": "external_libs/remeta/bgz_writer.cpp",
    "content": "#ifdef WITH_HTSLIB\n#include \"bgz_writer.hpp\"\n#include <utility>\n\nusing namespace std;\n\nBgzWriter::BgzWriter()\n : filepath(\"\")\n , mode(\"\")\n , bgzf(nullptr)\n , closed(true) {\n  this->buffer = KS_INITIALIZE;\n}\n\nBgzWriter::BgzWriter(string filepath, string mode)\n : filepath(filepath)\n , mode(mode)\n , closed(false) {\n  if (mode != \"w\" && mode != \"a\" && mode != \"wu\" && mode != \"au\") {\n    throw runtime_error(\"invalid write mode \" + mode + \" in BgzWriter\");\n  }\n  this->bgzf = bgzf_open(filepath.c_str(), mode.c_str());\n  this->buffer = KS_INITIALIZE;\n  if (this->bgzf == NULL) {\n    throw runtime_error(\"failed to open \"+ filepath);\n  }\n}\n\nBgzWriter::~BgzWriter() {\n  if (this->bgzf != nullptr && !this->is_closed()) {\n    bgzf_close(this->bgzf);\n  }\n  ks_free(&this->buffer);\n}\n\nBgzWriter::BgzWriter(BgzWriter&& other)\n : filepath(std::move(other.filepath))\n , mode(std::move(other.mode))\n , bgzf(std::exchange(other.bgzf, nullptr))\n , closed(std::move(other.closed)) {\n  this->buffer = KS_INITIALIZE;\n}\n\nvoid BgzWriter::write(string s) {\n  if (this->closed) {\n    throw runtime_error(\"attempted to write to closed file \" + this->filepath);\n  }\n\n  ssize_t bytes_written = bgzf_write(this->bgzf, s.c_str(), s.size());\n  if (bytes_written != (int)s.size()) {\n    throw runtime_error(\"failed to write \" + this->filepath);\n  }\n}\n\nvoid BgzWriter::open(string filepath, string mode) {\n  this->close();\n  this->filepath = filepath;\n  this->mode = mode;\n  this->closed = false;\n  this->bgzf = bgzf_open(filepath.c_str(), mode.c_str());\n  if (this->bgzf == NULL) {\n    throw runtime_error(\"failed to open \"+ filepath);\n  }\n}\n\nvoid BgzWriter::close() {\n  bgzf_close(this->bgzf);\n  this->bgzf = NULL;\n  this->closed = true;\n}\n#endif\n"
  },
  {
    "path": "external_libs/remeta/bgz_writer.hpp",
    "content": "/* bgz_writer.hpp\n* Author: Tyler Joseph\n* \n* HTSlib wrapper to write bgzip files.\n* \n* Example:\n*   // Create a Bgz file for writing:\n*   BgzWriter writer(\"myfile.gz\", \"w\");\n*\n*   // Write to an uncompressed file:\n*   BgzWriter writer(\"myfile\", \"wu\");\n* \n*   // Or append to a file:\n*   BgzWriter writer(\"myfile.gz\", \"a\");\n* \n*   // Write to the file:\n*   writer.write(string);\n* \n*   // Close:\n*   writer.close()\n*/\n#ifdef WITH_HTSLIB\n/* bgz_writer.hpp\n* Author: Tyler Joseph\n* \n* HTSlib wrapper to write bgzip files.\n* \n* Example:\n*   // Create a Bgz file for writing:\n*   BgzWriter writer = BgzWriter(\"myfile.gz\", \"w\");\n*\n*   // Write to an uncompressed file:\n*   BgzWriter writer = BgzWriter(\"myfile\", \"wu\");\n* \n*   // Or append to a file:\n*   BgzWriter writer = BgzWriter(\"myfile.gz\", \"a\");\n* \n*   // Write to the file:\n*   writer.write(string);\n* \n*   // Close:\n*   writer.close()\n*/\n\n#ifndef BGZ_WRITER_H\n#define BGZ_WRITER_H\n\n#include <htslib/bgzf.h>\n#include <htslib/kstring.h>\n#include <htslib/tbx.h>\n#include <iostream>\n#include <map>\n#include <queue>\n#include <sstream>\n#include <string>\n#include <vector>\n#include <utility>\n\nclass BgzWriter {\n public:\n  BgzWriter();\n  BgzWriter(std::string filepath, std::string mode);\n  ~BgzWriter();\n\n  /*\n    We don't want to have duplicates of the same file open for writing, so\n    we need to delete the default copy constructor and assignment operator.\n  */\n  BgzWriter(const BgzWriter& other) = delete;\n  BgzWriter& operator=(BgzWriter other) = delete;\n\n  // Allow BgzWriter to be placed in containers using its move constructor\n  BgzWriter(BgzWriter&& other);\n\n  void write(std::string s);\n\n  // this gets overridden in ld_matrix_writer.hpp, but\n  // it's annoying that we have to make these a virtual function\n  virtual void open(std::string filepath, std::string mode);\n  virtual void close();\n\n  bool is_closed() { return closed; }\n\n  int64_t tell() { return bgzf_tell(this->bgzf); }\n\n  template<typename T>\n  void write(T data) {\n    ssize_t size = sizeof(data);\n    if (this->closed) {\n      throw std::runtime_error(\"attempted to write closed file \" + this->filepath);\n    }\n\n    ssize_t bytes_written = bgzf_write(\n      this->bgzf,\n      static_cast<char *>(static_cast<void *>(&data)),\n      size\n    );\n    if (bytes_written != size) {\n      throw std::runtime_error(\"failed to write \" + this->filepath);\n    }\n  }\n\n private:\n  std::string filepath;\n  std::string mode;\n  BGZF* bgzf;\n  kstring_t buffer;\n  bool closed;\n};\n\n#endif\n#endif"
  },
  {
    "path": "external_libs/remeta/regenie_ld_matrix_writer.cpp",
    "content": "#ifdef WITH_HTSLIB\n#include \"regenie_ld_matrix_writer.hpp\"\n\n#include <Eigen/Dense>\nusing Eigen::VectorXd;\nusing Eigen::DiagonalMatrix;\n\nvoid cov_to_corr(VectorXd& variances, MatrixXd& corr, const MatrixXd& cov) {\n  variances = cov.diagonal();\n  MatrixXd tmp = (variances.array() > 0).select(\n                    (variances.array() > 0).select(variances, 1)\n                                           .array()\n                                           .sqrt()\n                                           .inverse()\n                                           .matrix(),\n                    0).asDiagonal();\n  corr = tmp * cov * tmp;\n}\n\nRegenieLDMatrixWriter::RegenieLDMatrixWriter()\n : BgzWriter()\n , idx() {}\n\nRegenieLDMatrixWriter::RegenieLDMatrixWriter(string file_prefix, int sample_size)\n : BgzWriter(file_prefix + \".rg.ld\", \"w\")\n , idx(file_prefix + \".rg.ld.idx.gz\", \"w\") {\n  this->write((int32_t)sample_size);\n  if (sizeof(float) != 4) {\n    throw runtime_error(\"bad float size: sizeof(float) != 4\");\n  }\n}\n\nvoid RegenieLDMatrixWriter::write_matrix_dense(const MatrixXd& ld_mat,\n                                        const string& gene_name,\n                                        const vector<string>& variant_ids) {\n  if (this->is_closed()) {\n    throw runtime_error(\"operating on a closed file\");\n  }\n\n  int64_t addr = this->tell();\n  size_t nrows = ld_mat.rows();\n  size_t ncols = ld_mat.cols();\n  this->write('d'); // dense\n  this->write((int32_t)nrows);\n  this->write((int32_t)0); // symmetry with sparse format\n\n  if (nrows != ncols || nrows != variant_ids.size()) {\n    throw runtime_error(\"dimension mismatch when writing LD matrix\");\n  } else if ( ((ld_mat - ld_mat.transpose()).array().abs() > 1e-3).any() ) {\n    throw runtime_error(\"LD matrix must be symmetric\");\n  }\n\n  for (size_t i = 0; i < nrows; ++i) {\n    for (size_t j = 0; j <= i; ++j) {\n      this->write((float)ld_mat(i, j));\n    }\n  }\n\n  this->write_idx_entry(gene_name, variant_ids, addr);\n}\n\nvoid RegenieLDMatrixWriter::write_matrix_sparse(const MatrixXd& ld_mat,\n                                         const string& gene_name,\n                                         const vector<string>& variant_ids,\n                                         const double& sparsity_threshold) {\n  size_t nrows = ld_mat.rows();\n  size_t ncols = ld_mat.cols();\n  if (nrows != ncols || nrows != variant_ids.size()) {\n    throw runtime_error(\"dimension mismatch when writing LD matrix\");\n  } else if ( ((ld_mat - ld_mat.transpose()).array().abs() > 1e-3).any() ) {\n    throw runtime_error(\"LD matrix should be symmetric.\");\n  } else if ( (ld_mat.diagonal().array() < 0).any()) {\n    throw runtime_error(\"Diagonal elements of LD matrix should be non-negative.\");\n  }\n\n  VectorXd variances(nrows);\n  MatrixXd corr(nrows, ncols);\n  cov_to_corr(variances, corr, ld_mat);\n  this->write_sparse_header(gene_name, variances, variant_ids, sparsity_threshold);\n\n  for (size_t i = 0; i < nrows; ++i) {\n    for (size_t j = 0; j < i; ++j) {\n      if (abs(corr(i, j)) > sparsity_threshold) {\n        this->write_sparse_entry(\n          sparse_matrix_entry {\n            (int32_t)i,\n            (int32_t)j,\n            (float)corr(i, j)\n          }\n        );\n      }\n    }\n  }\n  this->write_sparse_footer();\n}\n\nvoid RegenieLDMatrixWriter::write_sparse_header(const string& gene_name,\n                                         const VectorXd& variances,\n                                         const vector<string>& variant_ids,\n                                         const double& sparsity_threshold) {\n  if (this->is_closed()) {\n    throw runtime_error(\"operating on a closed file\");\n  }\n  if (variant_ids.size() == 0) {\n    throw runtime_error(\"writing an empty matrix\");\n  }\n  \n  int64_t addr = this->tell();\n  size_t nrows = variances.size();\n  this->write('s'); // sparse\n  this->write((int32_t)nrows);\n  this->write((float)sparsity_threshold);\n  for (size_t i = 0; i < nrows; ++i) {\n    this->write((float)variances[i]);\n  }\n  this->write_idx_entry(gene_name, variant_ids, addr);\n}\n\nvoid RegenieLDMatrixWriter::write_sparse_entry(const sparse_matrix_entry& entry) {\n  this->write(entry);\n}\n\nvoid RegenieLDMatrixWriter::write_sparse_footer() {\n  this->write(sparse_matrix_entry {\n    (int32_t)-1,\n    (int32_t)-1,\n    (float)0\n  });\n}\n\nvoid RegenieLDMatrixWriter::write_idx_entry(const string& gene_name,\n                                     const vector<string>& variant_ids,\n                                     const int64_t& addr) {\n  string ids = \"\";\n  for (size_t i = 0; i < variant_ids.size() - 1; ++i) {\n    ids += variant_ids[i] + \",\";\n  }\n  ids += variant_ids[variant_ids.size() - 1];\n  idx.write(gene_name + \"\\t\" + to_string(addr) + \"\\t\" + ids + \"\\n\");                            \n}\n\nvoid RegenieLDMatrixWriter::open(string file_prefix, int sample_size) {\n  BgzWriter::open(file_prefix + \".metamat\", \"w\");\n  this->idx.open(file_prefix + \".metamat.idx.gz\", \"w\");\n  this->write((int32_t)sample_size);\n  if (sizeof(float) != 4) {\n    throw runtime_error(\"bad float size: sizeof(float) != 4\");\n  }\n}\n\nvoid RegenieLDMatrixWriter::close() {\n  this->idx.close();\n  BgzWriter::close();\n}\n#endif"
  },
  {
    "path": "external_libs/remeta/regenie_ld_matrix_writer.hpp",
    "content": "#ifdef WITH_HTSLIB\n#ifndef REGENIE_LD_MATRIX_WRITER_H\n#define REGENIE_LD_MATRIX_WRITER_H\n\n#include <string>\n#include <vector>\nusing namespace std;\n\n#include <htslib/bgzf.h>\n#include <htslib/kstring.h>\n\n#include <Eigen/Dense>\nusing Eigen::MatrixXd;\nusing Eigen::VectorXd;\n\n#include \"bgz_writer.hpp\"\n\nstruct sparse_matrix_entry {\n  int32_t i;  // row index\n  int32_t j;  // col index\n  float data; // value at entry i,j\n};\n\nclass RegenieLDMatrixWriter : public BgzWriter {\n public:\n  RegenieLDMatrixWriter();\n\n  RegenieLDMatrixWriter(string file_prefix, int sample_size);\n\n  void write_matrix_dense(const MatrixXd& ld_mat,\n                          const string& gene_name,\n                          const vector<string>& variant_ids);\n\n  void write_matrix_sparse(const MatrixXd& ld_mat,\n                           const string& gene_name,\n                           const vector<string>& variant_ids,\n                           const double& sparsity_threshold);\n\n  /* \n    These functions write sparse matrices in pieces. Writing each matrix\n    requires 3 steps:\n      1. write_sparse_header  : Writes the matrix header to the file and\n                                writes its address to the index.\n      2. write_sparse_entry   : Writes matrix entry to a file. This can be\n                                called multiple times.\n      3. write_sparse_footer  : Writes a footer signifying the end of the matrix.\n  */\n  void write_sparse_header(const string& gene_name,\n                           const VectorXd& variances,\n                           const vector<string>& variant_ids,\n                           const double& sparsity_threshold);\n\n  void write_sparse_entry(const sparse_matrix_entry& entry);\n\n  void write_sparse_footer();\n\n  void open(string file_prefix, int sample_size);\n\n  void close();\n \n private:\n  BgzWriter idx;\n\n  void write_idx_entry(const string& gene_name,\n                       const vector<string>& variant_ids,\n                       const int64_t& addr);\n};\n\n#endif\n#endif"
  },
  {
    "path": "scripts/parseLD.r",
    "content": "#!/usr/bin/Rscript\n\n### This function returns the LD matrix computed from the compressed binary file \n### output from REGENIE\n###   ld.file: binary file output from Regenie\n\nget.corr.sq.matrix <- function( ld.file = NULL ){\n\n  if(is.null(ld.file) || !file.exists(ld.file))\n    stop(\"Need to pass valid LD file!\")\n  list.file <- paste0(ld.file, \".snplist\")\n  if(!file.exists(list.file))\n    stop(\"Cannot open accompagnying snplist file: \", list.file)\n\n  # Variant IDs\n  snplist <- read.table(list.file)$V1\n  n.snps <- length(snplist)\n\n  rfile <- file(ld.file, \"rb\")\n  # number of samples and snps\n  f.info <- readBin(\n    con = rfile, \n    what = \"integer\", \n    n = 2,\n  ) \n  if(f.info[2] != n.snps)\n    stop(\"Number of variants from snplist does not match that in correlation file\")\n\n  # R^2 as stored using uint16 representation\n  rvals <- readBin(\n    con = rfile, \n    what = \"integer\", \n    n = choose(n.snps, 2),\n    signed = FALSE,\n    size = 2\n  ) \n  close(rfile)\n\n  # matrix of R^2 values\n  rmat <- matrix(0, n.snps, n.snps)\n  rmat[ lower.tri(rmat) ] <- rvals / (2^16 - 1)\n  rmat[ upper.tri(rmat) ] <- t(rmat)[ upper.tri(rmat) ]\n  diag(rmat) <- 1\n  colnames(rmat) <- rownames(rmat) <- snplist\n\n  return(rmat)\n}\n\n"
  },
  {
    "path": "scripts/regenie_docker.sh",
    "content": "#!/usr/bin/env bash\n#####\n## For use with Docker\nhelp_menu=\"\nUsage regenie_docker.sh OPTIONS\n\nOptions:\n\t--build     create docker image\n\t--test      test a generated docker image\n\t--with-bio  compile with Boost Iostreams library\n\t--with-mkl  compile with MKL library\n\t--file      custom docker file to use\n\t--rg-dir    path to the REGENIE source directory\n\"\n\n# default variables\naction=\nHAS_BOOST_IOSTREAM=0\nMKLROOT=\nDFILE=Dockerfile\nTEST_SCRIPT=test/test_docker.sh\nRGSRC=$(pwd)\n\nwhile [[ \"$#\" -gt 0 ]]; do\n  case $1 in\n    --build) action=build ;;\n    --test) action=test ;;\n    --with-bio) HAS_BOOST_IOSTREAM=1 ;;\n    --with-mkl) MKLROOT=/mkl/ ;;\n    --file) CUSTOM_DFILE=\"$2\"; shift ;;\n    --rg-dir) RGSRC=\"$2\"; shift ;;\n    -h|--help) action=\"\" ; break ;;\n    *) echo \"Unknown parameter passed: $1\"; echo \"$help_menu\"; exit 1 ;;\n  esac\n  shift\ndone\n\nif [ \"$action\" = \"\" ]; then\n  echo \"$help_menu\"; exit 1\nfi\n\nif [ ! -f \"${RGSRC}/VERSION\" ]; then\n  echo \"must specify REGENIE source directory using '--rg-dir'\"; exit 1\nfi\ncd $RGSRC\nRG_VERSION=$(cat VERSION)\n\nif (( HAS_BOOST_IOSTREAM == 1 )); then\n  RG_VERSION+=.gz\nfi\n\n# create Docker image\nif [ \"$action\" = \"build\" ]; then\n  echo \"Building docker image for REGENIE v${RG_VERSION}\"\n\n  if (( HAS_BOOST_IOSTREAM == 1 )); then\n    echo Compiling with Boost Iostream library\n    LIB_BIO=libboost-iostreams-dev\n  fi\n  if [ \"$MKLROOT\" != \"\" ]; then\n    echo Compiling with Intel MKL library\n    DFILE=Dockerfile_mkl\n  fi\n  if [ \"$CUSTOM_DFILE\" != \"\" ]; then\n    DFILE=$CUSTOM_DFILE\n  fi\n\n\tdocker build --rm -f ${DFILE} \\\n\t\t--no-cache --pull \\\n\t\t--build-arg BOOST_IO=${HAS_BOOST_IOSTREAM} \\\n\t\t--build-arg LIB_INSTALL=${LIB_BIO} \\\n\t\t--build-arg LIB_INSTALL2=${LIB_BIO} \\\n\t\t--build-arg STATIC=1 \\\n\t\t-t regenie:v${RG_VERSION} .\n\nelif [ \"$action\" = \"test\" ]; then\n\t${TEST_SCRIPT} . \"regenie:v${RG_VERSION}\" ${HAS_BOOST_IOSTREAM}\nfi\n\ncd -\n"
  },
  {
    "path": "scripts/regenie_paper/README.md",
    "content": "# Code for Regenie paper\n\nThis folder contains code used to perform the analyses in the manuscript:\n\nMbatchou, J. et al. Computationally efficient whole genome regression for quantitative and binary traits. bioRxiv, 2020.06.19.162354 (2020).\n\nThe pre-print is available on [biorXiv](https://www.biorxiv.org/content/10.1101/2020.06.19.162354v1).\n\nAll key analysis steps performed are in the file `scripts/master.sh` and we also provide code to generate the main figures.\n\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/manP.colors",
    "content": "#F8766D\n#EC8239\n#DB8E00\n#C79800\n#AEA200\n#8FAA00\n#64B200\n#00B81B\n#00BD5C\n#00C085\n#00C1A7\n#00BFC4\n#00BADE\n#00B2F3\n#00A6FF\n#7C96FF\n#B385FF\n#D874FD\n#EF67EB\n#FD61D3\n#FF63B6\n#FF6B94\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/master.sh",
    "content": "#########################################\n## script used to run regenie/BOLT/fastGWA/SAIGE\n## for the analyses in the REGENIE 2020 paper\n## For more details, visit: https://rgcgithub.github.io/regenie/\n##\n\nmain_dir=/regenie_paper/\ncode_dir=${main_dir}scripts/\nin_raw_dir=${main_dir}input_raw/\nfiles_dir=${main_dir}input/\nout_dir=${main_dir}output/\nfig_dir=${main_dir}figures/\nmkdir -p $files_dir $out_dir $fig_dir\n\n\n## Input files needed\n## * UKB 500K array data BED file\nukb_array_prefix=${in_raw_dir}/ukb_array\n## * UKB 500K HRC imputed data BGEN files split by chromosome (22)\n###   - only variants which have minor allele frequency above 0.5% \n###      or have minor allele count above 5 and are annotated as functional\nukb_imputed_prefix=${in_raw_dir}/ukb_imputed_chr\n## * List of regions to exclude for step 1 (ICLD and low-complexity regions) \nregions_ignore=${in_raw_dir}/regions.exclude\n## * List of white British ancestry samples (FID/IID) \nwb_samples=${in_raw_dir}/wb.samples\n## * UKB 500K phenotype files for the QTs and BTs analyzed in the Regenie paper\n###   -format is be FID IID followed by the phenotypes analyzed\n###   - 4 sets of files for 3 exemplar QTs, 50 multi-trait QTs, 4 exemplar BTs and 50 multi-trait BTs\nukb_pheno_exQT=${files_dir}/ukb_phenos_exQTs.txt\nukb_pheno_mtQT=${files_dir}/ukb_phenos_mtQTs.txt\nukb_pheno_exBT=${files_dir}/ukb_phenos_exBTs.txt\nukb_pheno_mtBT=${files_dir}/ukb_phenos_mtBTs.txt\n## * UKB 500K covariate file\n###   -format is be FID IID followed by the covariates included in paper\nukb_covars=${files_dir}/ukb_cov.txt\n\n\nsudo chmod u+x ${code_dir}*\n# setup imagequick for tiff to pdf conversion\nsudo apt install -y imagemagick-6.q16\ncp /etc/ImageMagick-6/policy.xml /etc/ImageMagick-6/policy.xml.cpy\nline_change=`grep -n \"coder.*PDF\"  /etc/ImageMagick-6/policy.xml | head -n 1 | cut -f1 -d':'`\nhead -n $(( line_change - 1 )) /etc/ImageMagick-6/policy.xml > tmp.policy\nsed \"${line_change}q;d\" /etc/ImageMagick-6/policy.xml | sed 's/none/write/' >> tmp.policy\ntail -n +$(( line_change + 1 )) /etc/ImageMagick-6/policy.xml >> tmp.policy\nmv tmp.policy /etc/ImageMagick-6/policy.xml\n# install parallel for SAIGE step 2\nsudo apt install -y parallel\n\n\n##########\n## Prepare Step 1 files\n#########\n\n### Apply filters for Step 1 file of array SNPs\narray_step1=${files_dir}/ukb_wb_array_step1\n\n${code_dir}prep_files.sh \\\n  $ukb_array_prefix \\\n  ${ukb_imputed_prefix}1 \\\n  $wb_samples \\\n  $regions_ignore \\\n  $array_step1\n\n### Create sparse GRM for fastGWA\nnpartitions=250\nfor job in $(seq 1 $npartitions); do\n\n  ${code_dir}mk_sparseGRM.r \\\n    --step1File=$array_step1 \\\n    --partition=$job \\\n    --npartitions=$npartitions \\\n    --prefix=${array_step1}_fastgwa\n\ndone\n\n${code_dir}mk_sparseGRM.r \\\n  --npartitions=$npartitions \\\n  --prefix=${array_step1}_fastgwa\n\n# Make file with phenotype names\nfor f in $ukb_pheno_exQT $ukb_pheno_mtQT $ukb_pheno_exBT $ukb_pheno_mtBT ; do \n  head -n 1 $f | tr ' ' '\\n' | grep -v \"FID\\|IID\" > ${f}.names\ndone\n\n\n##########\n## Run GWAS for each data set\n#########\nnchr=22\n\n##################\n### 1. exemplarQTs (analyze one trait at a time)\noprefix=${out_dir}exQTs\nnpheno=`cat ${ukb_pheno_exQT}.names | wc -l`\n\nfor ipheno in $(seq 1 $npheno); do\n\n  ## regenie step 1\n  ${code_dir}run_methods.r \\\n    --method=regenie \\\n    --phenoFile=$ukb_pheno_exQT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --lowmem \\\n    --prefix=${oprefix}_regenie\n\n  ## regenie step 2\n  for chr in $(seq 1 $nchr); do\n\n    ${code_dir}run_methods.r \\\n      --method=regenie \\\n      --skipNull \\\n      --lowmem \\\n      --phenoFile=$ukb_pheno_exQT \\\n      --pheno=$ipheno \\\n      --covarFile=$ukb_covars \\\n      --step2File=$ukb_imputed_prefix \\\n      --chr $chr \\\n      --prefix=${oprefix}_regenie\n\n    done\n\n  # bolt-lmm\n  ${code_dir}run_methods.r \\\n    --method=bolt \\\n    --phenoFile=$ukb_pheno_exQT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_bolt\n\n  # fastgwa\n  ${code_dir}run_methods.r \\\n    --method=fastgwa \\\n    --phenoFile=$ukb_pheno_exQT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --grm=${array_step1}_fastgwa_sp \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_fastgwa\n\n  done\n\n\n## make ManP plot\n${code_dir}mk_plots_qt.r \\\n  --loadFuns=${code_dir}std_ffuns.r \\\n  --manColors=${code_dir}manP.colors \\\n  --phenoNames=${ukb_pheno_exQT}.names \\\n  --figfolder=$fig_dir \\\n  --prefix=$oprefix\n\n\n\n##################\n### 2. exemplarBTs (analyze one trait at a time)\noprefix=${out_dir}exBTs\nnpheno=`cat ${ukb_pheno_exBT}.names | wc -l`\n\nfor ipheno in $(seq 1 $npheno); do\n\n  ## regenie step 1\n  ${code_dir}run_methods.r \\\n    --method=regenie \\\n    --bt \\\n    --phenoFile=$ukb_pheno_exBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --lowmem \\\n    --prefix=${oprefix}_regenie\n\n\n  ## regenie step 2\n  for chr in $(seq 1 $nchr); do\n\n    # firth\n    ${code_dir}run_methods.r \\\n      --method=regenie \\\n      --bt \\\n      --skipNull \\\n      --lowmem \\\n      --phenoFile=$ukb_pheno_exBT \\\n      --pheno=$ipheno \\\n      --covarFile=$ukb_covars \\\n      --step2File=$ukb_imputed_prefix \\\n      --chr $chr \\\n      --prefix=${oprefix}_regenie\n\n\n    # spa\n    ${code_dir}run_methods.r \\\n      --method=regenie \\\n      --bt --spa \\\n      --skipNull \\\n      --lowmem \\\n      --phenoFile=$ukb_pheno_exBT \\\n      --pheno=$ipheno \\\n      --covarFile=$ukb_covars \\\n      --step2File=$ukb_imputed_prefix \\\n      --chr $chr \\\n      --prefix=${oprefix}_regenie\n\n  done\n\n  # bolt-lmm\n  ${code_dir}run_methods.r \\\n    --method=bolt \\\n    --phenoFile=$ukb_pheno_exBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_bolt\n\n\n  # saige step 1\n  ${code_dir}run_methods.r \\\n    --method=saige \\\n    --phenoFile=$ukb_pheno_exBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --prefix=${oprefix}_saige\n\n  # saige step 2 (run in parallel since saige only uses ~1 thread here)\n  parallel --jobs $(nproc) --link \\\n    ${code_dir}run_methods.r \\\n    --method=saige \\\n    --skipNull \\\n    --phenoFile=$ukb_pheno_exBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_saige \\\n    --chr \\\n    ::: $(seq 1 $nchr)\n\ndone\n\n## make ManP plot\n${code_dir}mk_plots_bt.r \\\n  --loadFuns=${code_dir}std_ffuns.r \\\n  --manColors=${code_dir}manP.colors \\\n  --phenoNames=${ukb_pheno_exBT}.names \\\n  --figfolder=$fig_dir \\\n  --prefix=$oprefix\n\n\n\n##################\n### 3. multi-trait QTs\noprefix=${out_dir}mtQTs\nnpheno=`cat ${ukb_pheno_mtQT}.names | wc -l`\n\n## regenie step 1 (all traits analyzed) K-fold CV\n${code_dir}run_methods.r \\\n  --method=regenie \\\n  --phenoFile=$ukb_pheno_mtQT \\\n  --covarFile=$ukb_covars \\\n  --step1File=$array_step1 \\\n  --lowmem \\\n  --prefix=${oprefix}_regenie\n\n## regenie step 1 LOOCV\n${code_dir}run_methods.r \\\n  --method=regenie \\\n  --phenoFile=$ukb_pheno_mtQT \\\n  --covarFile=$ukb_covars \\\n  --step1File=$array_step1 \\\n  --loocv \\\n  --lowmem \\\n  --prefix=${oprefix}_regenie\n\n## regenie step 2\nfor chr in $(seq 1 $nchr); do\n\n  ${code_dir}run_methods.r \\\n    --method=regenie \\\n    --skipNull \\\n    --lowmem \\\n    --phenoFile=$ukb_pheno_mtQT \\\n    --covarFile=$ukb_covars \\\n    --step2File=$ukb_imputed_prefix \\\n    --chr $chr \\\n    --prefix=${oprefix}_regenie\n\ndone\n\nfor ipheno in $(seq 1 $npheno); do\n\n  # bolt-lmm\n  ${code_dir}run_methods.r \\\n    --method=bolt \\\n    --phenoFile=$ukb_pheno_mtQT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_bolt\n\n  # fastgwa\n  ${code_dir}run_methods.r \\\n    --method=fastgwa \\\n    --phenoFile=$ukb_pheno_mtQT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --grm=${array_step1}_fastgwa_sp \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_fastgwa\n\ndone\n\n\n##################\n### 4. multi-trait BTs\noprefix=${out_dir}mtBTs\nnpheno=`cat ${ukb_pheno_mtBT}.names | wc -l`\n\n## regenie step 1 (all traits analyzed at once) K-fold CV\n${code_dir}run_methods.r \\\n  --method=regenie \\\n  --bt \\\n  --phenoFile=$ukb_pheno_mtBT \\\n  --covarFile=$ukb_covars \\\n  --step1File=$array_step1 \\\n  --lowmem \\\n  --prefix=${oprefix}_regenie\n\n## regenie step 1 LOOCV\n${code_dir}run_methods.r \\\n  --method=regenie \\\n  --bt \\\n  --loocv \\\n  --phenoFile=$ukb_pheno_mtBT \\\n  --covarFile=$ukb_covars \\\n  --step1File=$array_step1 \\\n  --lowmem \\\n  --prefix=${oprefix}_regenie\n\n## regenie step 2\nfor chr in $(seq 1 $nchr); do\n\n  # firth\n  ${code_dir}run_methods.r \\\n    --method=regenie \\\n    --bt \\\n    --skipNull \\\n    --lowmem \\\n    --phenoFile=$ukb_pheno_mtBT \\\n    --covarFile=$ukb_covars \\\n    --step2File=$ukb_imputed_prefix \\\n    --chr $chr \\\n    --prefix=${oprefix}_regenie\n\n\n  # spa\n  ${code_dir}run_methods.r \\\n    --method=regenie \\\n    --bt --spa \\\n    --skipNull \\\n    --lowmem \\\n    --phenoFile=$ukb_pheno_mtBT \\\n    --covarFile=$ukb_covars \\\n    --step2File=$ukb_imputed_prefix \\\n    --chr $chr \\\n    --prefix=${oprefix}_regenie\n\ndone\n\nfor ipheno in $(seq 1 $npheno); do\n\n  # bolt-lmm\n  ${code_dir}run_methods.r \\\n    --method=bolt \\\n    --phenoFile=$ukb_pheno_mtBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_bolt\n\n\n  # saige step 1\n  ${code_dir}run_methods.r \\\n    --method=saige \\\n    --phenoFile=$ukb_pheno_mtBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step1File=$array_step1 \\\n    --prefix=${oprefix}_saige\n\n  # saige step 2 (run in parallel since saige only uses ~1 thread here)\n  parallel --jobs $(nproc) --link \\\n    ${code_dir}run_methods.r \\\n    --method=saige \\\n    --skipNull \\\n    --phenoFile=$ukb_pheno_mtBT \\\n    --pheno=$ipheno \\\n    --covarFile=$ukb_covars \\\n    --step2File=$ukb_imputed_prefix \\\n    --prefix=${oprefix}_saige \\\n    --chr \\\n    ::: $(seq 1 $nchr)\n\ndone\n\n\n# reset policies for imagemagick\nmv /etc/ImageMagick-6/policy.xml.cpy /etc/ImageMagick-6/policy.xml\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/mk_plots_bt.r",
    "content": "#!/usr/bin/env Rscript\n\nsuppressPackageStartupMessages({\n  if(!require(optparse)){ install.packages(\"optparse\"); library(optparse) }\n  if(!require(data.table)){ install.packages(\"data.table\"); library(data.table) }\n  if(!require(dplyr)){ install.packages(\"dplyr\"); library(dplyr) }\n  if(!require(tidyr)){ install.packages(\"tidyr\"); library(tidyr) }\n  if(!require(ggplot2)){ install.packages(\"ggplot2\"); library(ggplot2) }\n  if(!require(scales)){ install.packages(\"scales\"); library(scales) }\n  if(!require(viridis)){ install.packages(\"viridis\"); library(viridis) }\n  if(!require(gridExtra)){ install.packages(\"gridExtra\"); library(gridExtra) }\n  if(!require(cowplot)){ install.packages(\"cowplot\"); library(cowplot) }\n})\n#########################################\n##\n## Script used to run REGENIE/BOLT/fastGWA/SAIGE for GWAS\n## for the analyses in the REGENIE 2020 paper\n## For more details, visit: https://rgcgithub.github.io/regenie/\n##  \n#########################################\noption_list = list(\n  make_option(\"--loadFuns\", type=\"character\", default=\"\",\n    help=\"script with functions for plotting\"),\n  make_option(\"--manColors\", type=\"character\", default=\"\",\n    help=\"file with colors for Manhattan plot\"),\n  make_option(\"--phenoNames\", type=\"character\", default=\"\",\n    help=\"phenotype file\"),\n  make_option(\"--figfolder\", type=\"character\", default=\"\",\n    help=\"prefix of output files\"),\n  make_option(\"--prefix\", type=\"character\", default=\"\",\n    help=\"prefix of output files\")\n  );\nopt_parser = OptionParser(option_list=option_list);\nopt = parse_args(opt_parser);\n\nsource(opt$loadFuns)\nalpha <- .05\np.thr <- 324\nplot.folder <- opt$figfolder\nmanP.colors <- fread( opt$manColors, header=FALSE) %>% pull\npheno.names <- fread(opt$phenoNames, header = FALSE) %>% pull\nlog10P.X <- expression(paste(\"SAIGE -log\"[10],\"P\"))\nlog10P.Y <- expression(paste(\"-log\"[10],\"P\"))\n################################################\n\n## Manhatthan plots\npdf(NULL, width = 36, height=24)\ndev.control(displaylist=\"enable\")\n\nlayout(matrix(1:16,4, byrow=T))\npar(mar = c(6,5,8,0.1), oma = c(1.1, 1.1, 1.1, 0.1))\npar(tcl = -0.25)\npar(mgp = c(3, 0.6, 0))\n\ndummy <- lapply(1:length(pheno.names), function(ipheno){\n  panel.lab <- letters[ipheno]\n  pn <- pheno.names[ipheno]\n\n  ## SAIGE results\n  fns <- system(paste0(\"ls \", opt$prefix, \"_saige_phenoCol\", ipheno,\"_chr*\\\\.test\" ), intern = TRUE)\n  saige.df <- rbindlist(lapply(fns, function(fn){\n      fread(fn, fill=TRUE, showProgress = FALSE) %>%\n        mutate( pval.SAIGE = -log10(Tstat) ,\n          SE.SAIGE = BETA, BETA.SAIGE = N,\n          A1FREQ = as.numeric(AC_Allele2),\n          MAF = pmin(A1FREQ, 1 - A1FREQ) ) %>%\n      select( rsid, MAF, BETA.SAIGE, SE.SAIGE, pval.SAIGE )\n}))\n\n  ## BOLT results\n  fns <- system(paste0(\"ls \", opt$prefix, \"_bolt_phenoCol\", ipheno,\"\\\\.test\" ), intern = TRUE)\n  bolt.df <- rbindlist(lapply(fns, function(fn){\n      fread(fn, colClasses=\"character\", showProgress = FALSE) %>%\n        mutate( pval.BOLT = -log10(as.numeric(P_BOLT_LMM)) ) %>%\n        select( SNP, pval.BOLT )\n}))\n\n  # regenie-Firth\n  fns <- system(paste0(\"ls \", opt$prefix, \"_regenie_phenoCol\", ipheno,\"_Firth_chr*\\\\.regenie\" ), intern = TRUE)\n  regenie.firth.df <- rbindlist(lapply(fns, function(fn){\n      m1 <-  fread(fn, showProgress = FALSE) %>%\n        select( CHROM, GENPOS, ID, BETA, SE, CHISQ,LOG10P ) %>%\n        rename_(.dots = setNames(c(\"BETA\",\"SE\",\"CHISQ\",\"LOG10P\"), c(\"beta.rg.firth\",\"se.rg.firth\",\"test.firth\",\"pval.regenie.firth\")) )\n      m1 %>%\n        select( CHROM, GENPOS, ID, beta.rg.firth, se.rg.firth, pval.regenie.firth)\n}))\n\n  # regenie-SPA\n  fns <- system(paste0(\"ls \", opt$prefix, \"_regenie_phenoCol\", ipheno,\"_SPA_chr*\\\\.regenie\" ), intern = TRUE)\n  regenie.spa.df <- rbindlist(lapply(fns, function(fn){\n      m1 <-  fread(fn, showProgress = FALSE) %>%\n        select( ID, BETA, SE, LOG10P ) %>%\n        rename_(.dots = setNames(c(\"BETA\",\"SE\",\"LOG10P\"), c(\"beta.rg.spa\",\"se.rg.spa\",\"pval.regenie.spa\")) )\n}))\n\n  #     # combine info\n  comb.df <- left_join(saige.df, regenie.firth.df, by = c( \"rsid\" = \"ID\") )  %>%\n    left_join(., bolt.df, by = c( \"rsid\" = \"SNP\") )  %>%\n    left_join(., regenie.spa.df, by = c( \"rsid\" = \"ID\") ) %>%\n    as.data.table\n  cat(\"# variants tested for\", pn, \"=\", scales::number(comb.df %>% drop_na %>% nrow, big.mark=\",\") ,\"\\n\")\n\n  # convert infinite values to p.thr\n  comb.df[ is.infinite(pval.BOLT), `:=`(pval.BOLT=p.thr)]\n  comb.df[ is.infinite(pval.SAIGE), `:=`(pval.SAIGE=p.thr)]\n  comb.df[ pval.regenie.firth > p.thr, `:=`(pval.regenie.firth = p.thr)]\n  comb.df[ pval.regenie.spa > p.thr, `:=`(pval.regenie.spa = p.thr)]\n  comb.df <- comb.df %>% drop_na\n\n  man.data <- comb.df  %>%\n    rename(POS = GENPOS, CHR = CHROM) %>%\n    prep.data\n  axisdf <- man.data %>% group_by(CHR) %>% summarize(center=( max(POScum) + min(POScum) ) / 2 )\n\n  ## using base R\n  all.methods <- c(\"BOLT-LMM\",\"REGENIE-FIRTH\",\"REGENIE-SPA\",\"SAIGE\")\n  df0 <- man.data %>%\n    gather(Method, pval, pval.BOLT, pval.SAIGE, pval.regenie.firth, pval.regenie.spa) %>%\n    mutate( Method = factor(Method,\n        levels = paste0(\"pval.\",c(\"BOLT\",\"regenie.firth\", \"regenie.spa\",\"SAIGE\")),\n        labels= all.methods) ,\n      pval = as.numeric(pval),\n      pval = pval * (pval < p.thr) + p.thr * (pval >= p.thr ),\n      color = manP.colors[CHR]\n      ) %>%\n    select(POScum,Method,pval,color)\n  chrlims.min <- man.data %>% group_by(CHR) %>% summarize(bot= min(POScum))%>%pull\n  chrlims.max <- man.data %>% group_by(CHR) %>% summarize(top= max(POScum))%>%pull\n  chrlims <- rowMeans(cbind(head(chrlims.max,-1), chrlims.min[-1]))\n  myaxis <- c(0, ifelse(max(df0$pval) < 70 , 70, 120))\n\n\n  # compress plot for pvalues >= 20 by 8/100 => x/12.5 + 20 - 1.6\n  df1 <- df0  %>%\n    mutate(\n      pval = pval * (pval <= 20) + (pval/12.5+20-1.6) * (pval > 20 )\n    )\n  tot.vals <- ifelse(max(df0$pval) < 70 , 7, 8)\n  myaxis <- c(seq(0, 20, by = 4), seq(24, 60, by = 4))[1:tot.vals]\n  myaxis.lab <- c(seq(0,20,by=4),70,120)[1:tot.vals]\n\n  ddd <- lapply(all.methods, function(my.method){\n    cat(my.method,\"\\n\")\n\n    df1.meth <- df1 %>% filter( Method == my.method ) %>%\n      mutate(x=POScum,y=pval) %>%\n      select(x,y,color)\n\n    plot(df1.meth$x, df1.meth$y, axes = FALSE,\n      col=df1.meth$color, cex=1, pch=16, cex.lab=1.7,\n      xlab=\"\", ylab=\"\",\n      main=\"\", ylim=c(0,max(myaxis)))\n    abline(h=-log10(5e-8),lty=2); abline(h=20,col=\"gray45\",lty=2)\n\n    # method name\n    title(my.method, cex.main=3.5, font.main=1, line=0)\n\n    if(my.method==all.methods[1]) {\n      # add trait title, x/y-axis label\n      fig_label(panel.lab, cex=4, font=2)\n      title(main=pn, adj=0, line = 4.5, cex.main=3, font.main=1)\n      # y-axis\n      mtext(log10P.Y, side = 2, outer = F, cex = 1.6, line = 3.5)\n      axis(2, at = myaxis, labels=myaxis.lab, las=1,, cex.axis=2)\n      plotrix::axis.break(axis=2,breakpos=20,bgcol=\"white\",breakcol=\"black\", style=\"zigzag\",brw=0.02)\n    }\n\n    # x-axis labels (jittered)\n    abline(v=chrlims, lty=2, col=\"gray65\") #chr breaks\n    axis(1, at = c(0,chrlims, tail(chrlims.max,1)), labels=FALSE) #chr ticks\n    # x-axes jittered\n    x.c <- seq(1,22,2)\n    text(x=axisdf$center[x.c],  par(\"usr\")[3],\n      labels = axisdf$CHR[x.c], pos = 1, xpd = TRUE, cex=2.1, offset=.7)\n    text(x=axisdf$center[-x.c],  par(\"usr\")[3],\n      labels = axisdf$CHR[-x.c], pos = 1, xpd = TRUE, cex=2.1, offset=2.7)\n    return(NULL)\n    })\n  return(NULL)\n})\n\np1.base <- recordPlot()\ninvisible(dev.off())\n\n## TIFF format\ntiff(paste0(plot.folder, \"Figure2.tiff\"), width= 300*6*1.5*4 , height = 300*6*4, res = 300, compression =\"lzw\")\np1.base\ndev.off()\n## convert to pdf using imagemagick\nsystem(paste0(\"convert -density 300 -units PixelsPerInch \", plot.folder, \"Figure2.tiff \", plot.folder, \"Figure2.pdf\"))\n\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/mk_plots_qt.r",
    "content": "#!/usr/bin/env Rscript\n\nsuppressPackageStartupMessages({\n  if(!require(optparse)){ install.packages(\"optparse\"); library(optparse) }\n  if(!require(data.table)){ install.packages(\"data.table\"); library(data.table) }\n  if(!require(tidyverse)){ install.packages(\"tidyverse\"); library(tidyverse) }\n  if(!require(ggplot2)){ install.packages(\"ggplot2\"); library(ggplot2) }\n  if(!require(gridExtra)){ install.packages(\"gridExtra\"); library(gridExtra) }\n  if(!require(cowplot)){ install.packages(\"cowplot\"); library(cowplot) }\n  if(!require(extrafont)){ install.packages(\"extrafont\"); library(extrafont) }\n})\n#########################################\n##\n## Script used to run REGENIE/BOLT/fastGWA/SAIGE for GWAS\n## for the analyses in the REGENIE 2020 paper\n## For more details, visit: https://rgcgithub.github.io/regenie/\n##  \n#########################################\noption_list = list(\n  make_option(\"--loadFuns\", type=\"character\", default=\"\",\n    help=\"script with functions for plotting\"),\n  make_option(\"--manColors\", type=\"character\", default=\"\",\n    help=\"file with colors for Manhattan plot\"),\n  make_option(\"--phenoNames\", type=\"character\", default=\"\",\n    help=\"phenotype file\"),\n  make_option(\"--figfolder\", type=\"character\", default=\"\",\n    help=\"prefix of output files\"),\n  make_option(\"--prefix\", type=\"character\", default=\"\",\n    help=\"prefix of output files\")\n  );\nopt_parser = OptionParser(option_list=option_list);\nopt = parse_args(opt_parser);\n\nsource(opt$loadFuns)\nalpha <- .05\np.thr <- 324\nlog10P.Y <- expression(paste(\"-log\"[10],\"P\"))\nplot.folder <- opt$figfolder\nmanP.colors <- fread( opt$manColors, header=FALSE) %>% pull\npheno.names <- fread(opt$phenoNames, header = FALSE) %>% pull\n################################################\n\n\n ## Manhatthan plots\n## 3x3 plot\npdf(NULL)\ndev.control(displaylist=\"enable\")\nlayout(matrix(1:9,3, byrow=T))\npar(mar = c(6,5,6,0.1), oma = c(1.1, 1.1, 1.1, 0.1))\npar(tcl = -0.25)\npar(mgp = c(3, 0.6, 0))\n\ndummy <- lapply(1:length(pheno.names), function(ipheno){\n  panel.lab <- letters[ipheno]\n  pn <- pheno.names[ipheno]\n  pn.title <- c(\"LDL\", \"BMI\", \"Bilirubin\")\n  pn.title <- pn.title[ sapply(pn.title, function(x) grepl(x, pn, ignore.case=TRUE)) ]\n\n  ## BOLT results\n  fns <- system(paste0(\"ls \", opt$prefix, \"_bolt_phenoCol\", ipheno,\"\\\\.test\" ), intern = TRUE)\n  bolt.df <- rbindlist(lapply(fns, function(fn){\n      fread(fn, colClasses=\"character\", showProgress = FALSE) %>%\n        mutate(pval.BOLT = -log10(as.numeric(P_BOLT_LMM)) ,\n          A1FREQ = as.numeric(A1FREQ),\n          MAF = pmin(A1FREQ, 1 - A1FREQ) ) %>%\n      dplyr::filter( MAF >= 0.01 ) %>%\n      select( SNP, pval.BOLT )\n}))\n\n  ## fastgwa results\n  fns <- system(paste0(\"ls \", opt$prefix, \"_fastgwa_phenoCol\", ipheno,\"\\\\.test.fastGWA\" ), intern = TRUE)\n  fastGWA.df <- rbindlist(lapply(fns, function(fn){\n      m1 <- fread(fn, showProgress = FALSE) %>%\n        mutate( pval.fastGWA = -log10(P))\n      # set the p-values of 0 to minimum p-value for plitting\n      m1$pval.fastGWA[ m1$ P == 0 ]  <-  p.thr\n      m1 %>% select( SNP, pval.fastGWA )\n}))\n\n  ## regenie results\n  fns <- system(paste0(\"ls \", opt$prefix, \"_regenie_phenoCol\", ipheno,\"_chr*\\\\.regenie\" ), intern = TRUE)\n  regenie.df <- rbindlist(lapply(fns, function(fn){\n      fread(fn) %>%\n        select( CHROM, GENPOS, ID, LOG10P ) %>%\n        setNames(c(\"CHROM\",\"GENPOS\",\"ID\",\"pval.regenie\") )\n}))\n\n  ## combine info\n  comb.df <- left_join(bolt.df, fastGWA.df, by = c( \"SNP\" = \"SNP\") ) %>%\n    left_join(., regenie.df, by = c( \"SNP\" = \"ID\") ) %>%\n    as.data.table\n\n  # convert infinite values to p.thr\n  comb.df[ is.infinite(pval.BOLT), `:=`(pval.BOLT=p.thr)]\n  comb.df[ pval.regenie > p.thr, `:=`(pval.regenie = p.thr)]\n  comb.df <- comb.df %>% drop_na\n\n  man.data <- comb.df  %>%\n    rename(POS = GENPOS, CHR = CHROM) %>%\n    prep.data\n  axisdf <- man.data %>% group_by(CHR) %>% summarize(center=( max(POScum) + min(POScum) ) / 2 )\n  chrlims <- man.data %>% group_by(CHR) %>% summarize(top= max(POScum))%>% head(-1)\n\n  # using base R\n  all.methods <- c(\"REGENIE\" ,\"fastGWA\",\"BOLT-LMM\")\n  df0 <- man.data %>%\n    gather(Method, pval, pval.BOLT, pval.fastGWA, pval.regenie) %>%\n    mutate( Method = factor(Method,\n        levels = paste0(\"pval.\",c(\"regenie\",\"fastGWA\",\"BOLT\")),\n        labels = all.methods ),\n      color = manP.colors[CHR]\n      ) %>%\n    select(-SNP,-POS,-tot,-CHR)\n  chrlims.min <- man.data %>% group_by(CHR) %>% summarize(bot= min(POScum))%>%pull\n  chrlims.max <- man.data %>% group_by(CHR) %>% summarize(top= max(POScum))%>%pull\n  chrlims <- rowMeans(cbind(head(chrlims.max,-1), chrlims.min[-1]))\n  myaxis <- c(0, ifelse(max(df0$pval < 300) , 300, 350))\n\n  # compress plot for pvalues >= 20 by 1/20 => x/20 + 19\n  df1 <- df0  %>%\n    mutate(\n      pval = pval * (pval <= 20) + (pval/20+19) * (pval > 20 )\n    )\n  tot.vals <- ifelse(max(df0$pval) < 300 , 9, 10)\n  myaxis <- c(seq(0, 20, by = 4), seq(24, 60, by = 5))[1:tot.vals]\n  myaxis.lab <- c(seq(0,20, by=4), seq(100,400,by=100))[1:tot.vals]\n\n  plot.d <- lapply(all.methods, function(my.method){\n\n    df1.meth <- df1 %>% dplyr::filter( Method == my.method ) %>%\n      mutate(x=POScum,y=pval) %>%\n      select(x,y,color)\n\n    plot(df1.meth$x, df1.meth$y, axes = FALSE,\n      col=alpha(df1.meth$color, .6), cex=.8, pch=16, cex.lab=1.7,\n      xlab=\"\", ylab=\"\",\n      main=\"\", ylim=c(0,max(myaxis))\n    )\n    abline(h=-log10(5e-8),lty=2);abline(h=20,col=\"gray45\",lty=2)\n\n    # method name\n    title(my.method, cex.main=3.2, font.main=1, line=1)\n\n    # add trait title, x/y-axis label\n    if(my.method == all.methods[1]) {\n      fig_label(panel.lab, cex=4, font=2)\n      title(main=pn.title, adj=0, line = 2.5, cex.main=3.5, font.main=1)\n      # y-axis\n      mtext(log10P.Y, side = 2, outer = F, cex = 1.5, line = 3.5)\n      axis(2, at = myaxis, labels=myaxis.lab, las=1, cex.axis=2)\n      plotrix::axis.break(axis=2,breakpos=20,bgcol=\"white\",breakcol=\"black\", style=\"zigzag\",brw=0.02)\n    } \n\n    # x-axis labels (jittered)\n    abline(v=chrlims, lty=2, col=\"gray65\") #chr breaks\n    axis(1, at = c(0,chrlims, tail(chrlims.max,1)), labels=FALSE) #chr ticks\n    x.c <- seq(1,22,2)\n    text(x=axisdf$center[x.c],  par(\"usr\")[3],\n      labels = axisdf$CHR[x.c], pos = 1, xpd = TRUE, cex=2.1, offset=.7)\n    text(x=axisdf$center[-x.c],  par(\"usr\")[3],\n      labels = axisdf$CHR[-x.c], pos = 1, xpd = TRUE, cex=2.1, offset=2.5)\n\n    return(NULL)\n    })\n\n  return(NULL)\n  })\n\np1.base <- recordPlot()\ninvisible(dev.off())\n\n## TIFF format\ntiff(paste0(plot.folder, \"Figure1.tiff\"), width= 300*6*1.5*3 , height = 300*6*3, res = 300, compression =\"lzw\")\np1.base\ndev.off()\n\n## convert to pdf using imagemagick\nsystem(paste0(\"convert -density 300 -units PixelsPerInch \", plot.folder, \"Figure1.tiff \", plot.folder, \"Figure1.pdf\"))\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/mk_sparseGRM.r",
    "content": "#!/usr/bin/env Rscript\n\nsuppressPackageStartupMessages({\n  if(!require(data.table)){ install.packages(\"data.table\"); library(data.table) }\n  if(!require(tidyverse)){ install.packages(\"tidyverse\"); library(tidyverse) }\n  if(!require(optparse)){ install.packages(\"optparse\"); library(optparse) }\n})\n#########################################\n##\n## Script used to make sparse GRM file for fastGWA\n## for the analyses in the REGENIE 2020 paper\n## For more details, visit: https://rgcgithub.github.io/regenie/\n##  \n#########################################\n\noption_list = list(\n  make_option(\"--step1File\", type=\"character\", default=\"\",\n    help=\"bed file prefix for step 1\", metavar=\"string\"),\n  make_option(\"--prefix\", type=\"character\", default=\"\",\n    help=\"output files prefix\", metavar=\"string\"),\n  make_option(\"--partition\", type=\"integer\", default=0,\n    help=\"partition number for fastGWA GRM computation\"),\n  make_option(\"--npartitions\", type=\"integer\", default=250,\n    help=\"number of partitions for fastGWA GRM computation\")\n  );\nopt_parser = OptionParser(option_list=option_list);\nopt = parse_args(opt_parser);\n\ntotal.partitions <- opt$npartitions\nif( opt$partition > total.partitions) stop(\"Invalid argument\")\nprint(opt)\n\n########### Functions ############\nfastGWA.computeGRM <- function(){\n\n  fastGWA.call <- paste0(\"gcta64 \",\n    \"--bfile \", bed.file, \" \",\n    \"--thread-num \", parallel::detectCores(), \" \",\n    \"--out \", outprefix, \" \",\n    \"--make-grm-part \", total.partitions, \" \", opt$partition)\n  print(fastGWA.call)\n\n  tot.time <- system.time({\n    system(fastGWA.call)\n  })\n\n  write( tot.time, paste0(outprefix, \"_time_pt_\", opt$partition), ncol = length(tot.time))\n}\n\nfastGWA.compute.sparseGRM <- function(){\n\n  ## Combine the partitions\n  system( paste0(\"cat \", outprefix,\n      \".part_\",total.partitions,\"_*.grm.id > \",outprefix, \".grm.id\") )\n  system( paste0(\"cat \", outprefix,\n      \".part_\",total.partitions,\"_*.grm.bin > \",outprefix, \".grm.bin\") )\n  system( paste0(\"cat \", outprefix,\n      \".part_\",total.partitions,\"_*.grm.N.bin > \",outprefix, \".grm.N.bin\") )\n\n  # compute sparse GRM\n  fastGWA.call <- paste0(\"gcta64 \",\n    \"--grm \", outprefix, \" \",\n    \"--make-bK-sparse 0.05 \",\n    \"--out \", outprefix, \"_sp \")\n  system(fastGWA.call)\n\n  if(file.exists( paste0(outprefix, \"_sp.grm.id\") )){ # in case of error so no need to redo\n    system( paste0(\"rm \", outprefix,\".part_\",total.partitions,\"_*.grm.*\") ) \n  }\n}\n\n#################################\noutprefix <- opt$prefix\nbed.file <- opt$step1File\n\nif( opt$partition > 0 ){ # run fastGWA GRM computation partitions\n\n  fastGWA.computeGRM()\n\n} else if(opt$partition == 0){ # finish fastGWA sparse GRM computation\n\n  tot.time <- system.time({\n    fastGWA.compute.sparseGRM()\n  })\n  write( tot.time, paste0(outprefix, \"_time_compSparseGRM\"), ncol = length(tot.time))\n\n} \n\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/prep_files.sh",
    "content": "#########################################\n## To prepare step 1 file that will be used in regenie/BOLT/fastGWA/SAIGE in UKB WB samples\n##\n\n## array bed\nstep1file_pre=$1\n## imputed bgen\nstep2file=$2\n## list of FID/IID for UKB WB samples with covariate info\nsample_keep=$3\n# bed region file listing ICLD and low-complexity regions\nregions_ignore=$4\noutprefix=$5\n\n## subset to samples with array & imputed data\ngrep -wFf $sample_keep ${step1file_pre}.fam | cut -f1,2 > samples_keep.tmp\ngrep -wFf samples_keep.tmp <(cat ${step2file}.sample | tr ' ' '\\t') | cut -f1,2 > samples_keep.tmp1\n\n## filters for step 1\n## samples: %missing < 10%\n## variants: MAF>1%, %miss<1%, HWE p>1e-15, no ICLD, no low-complexity regions, light LD pruning\nplink2 \\\n  --write-samples \\\n  --write-snplist \\\n  --autosome \\\n  --bfile $step1file_pre \\\n  --keep samples_keep.tmp1 \\\n  --mind 0.1 \\\n  --geno 0.01 --maf 0.01 --hwe 1e-15 \\\n  --exclude range $regions_ignore \\\n  --indep-pairwise 1000 100 0.9 \\\n  --out $outprefix\n\nrm samples_keep.tmp samples_keep.tmp1\n\n## make bed file applying filters\nplink2 \\\n  --make-bed \\\n  --bfile $step1file_pre \\\n  --keep ${outprefix}.id \\\n  --extract ${outprefix}.prune.in \\\n  --out $outprefix\n\n\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/run_methods.r",
    "content": "#!/usr/bin/env Rscript\n\nsuppressPackageStartupMessages({\n  if(!require(data.table)){ install.packages(\"data.table\"); library(data.table) }\n  if(!require(tidyverse)){ install.packages(\"tidyverse\"); library(tidyverse) }\n  if(!require(optparse)){ install.packages(\"optparse\"); library(optparse) }\n})\n#########################################\n##\n## Script used to run REGENIE/BOLT/fastGWA/SAIGE for GWAS\n## for the analyses in the REGENIE 2020 paper\n## For more details, visit: https://rgcgithub.github.io/regenie/\n##  \n#########################################\noption_list = list(\n  make_option(\"--method\", type=\"character\", default=\"\",\n    help=\"method to run in analysis\"),\n  make_option(\"--step1File\", type=\"character\", default=\"\",\n    help=\"bed file prefix for step 1\", metavar=\"string\"),\n  make_option(\"--step2File\", type=\"character\", default=\"\",\n    help=\"bgen file (or prefix) for step 2\", metavar=\"string\"),\n  make_option(\"--phenoFile\", type=\"character\", default=\"\",\n    help=\"phenotype file\", metavar=\"string\"),\n  make_option(\"--covarFile\", type=\"character\", default=\"\",\n    help=\"covariate file\", metavar=\"string\"),\n  make_option(\"--prefix\", type=\"character\", default=\"\",\n    help=\"output files prefix\", metavar=\"string\"),\n  make_option(\"--pheno\", type=\"integer\", default=0,\n    help=\"which phenotype column to run [default is all for regenie]\", metavar=\"number\"),\n  make_option(\"--bt\", action=\"store_true\", default=FALSE,\n    help=\"run regenie in BT mode\"),\n  make_option(\"--lowmem\", action=\"store_true\", default=FALSE,\n    help=\"run regenie with lowmem option\"),\n  make_option(\"--loocv\", action=\"store_true\", default=FALSE,\n    help=\"run regenie with LOOCV\"),\n  make_option(\"--grm\", type=\"character\", default=\"\",\n    help=\"path to sparse GRM for fastGWA\", metavar=\"string\"),\n  make_option(\"--chr\", type=\"integer\", default=0,\n    help=\"chromosome to test\", metavar=\"number\"),\n  make_option(\"--noapprox\", action=\"store_true\", default=FALSE,\n    help=\"use exact Firth\"),\n  make_option(\"--spa\", action=\"store_true\", default=FALSE,\n    help=\"use SPA\"),\n  make_option(\"--skipNull\", action=\"store_true\", default=FALSE,\n    help=\"Run step 2\")\n  );\nopt_parser = OptionParser(option_list=option_list);\nopt = parse_args(opt_parser);\n\nif(!file.exists(opt$phenoFile)) stop(\"Phenotype file does not exist\")\nif(!file.exists(opt$covarFile)) stop(\"Covariate file does not exist\")\n\nprint(opt)\n\n########### Functions ############\n# fit regenie\nfit.regenie <- function() {\n\n  mode.rg <- test.type <- phenoCols <- rg.suffix <- rg.suffix2 <- \"\"\n\n  # get phenotype name\n  phenoNames <- fread(pheno.file) %>% select(-FID,-IID) %>% colnames\n  if( opt$pheno > 0) {\n    if(opt$pheno > length(phenoNames)) stop(\"Invalid phenotype column provided\")\n    phenoCols <- paste0(\"--phenoCol \", phenoNames[opt$pheno])\n    rg.suffix <- paste0(\"_phenoCol\", opt$pheno) \n  }\n\n  # BT mode\n  if( opt$bt ) mode.rg <- \"--bt\"\n  # cv mode\n  if( opt$loocv ){\n    rg.suffix <- paste0(rg.suffix, \"_loocv\" )\n    mode.rg <- paste0(mode.rg, \" --loocv\" )\n  }\n  # lowmem mode\n  if( opt$lowmem ) \n    mode.rg <- paste0(mode.rg, \" --lowmem\" )\n  else\n    rg.suffix <- paste0(rg.suffix, \"_nowrite\" )\n  # spa/firth\n  if( opt$skipNull & opt$bt ) {\n    if(opt$spa){\n      mode.rg <- paste0(mode.rg, \" --spa\" )\n      rg.suffix2 <- \"_SPA\"\n    } else if(opt$noapprox){\n      mode.rg <- paste0(mode.rg, \" --firth\" )\n      rg.suffix2 <- \"_FirthExact\"\n    } else{\n      mode.rg <- paste0(mode.rg, \" --firth --approx\" )\n      rg.suffix2 <- \"_Firth\"\n    }\n  }\n\n  if( !opt$skipNull) { # step 1\n\n    # regenie call\n    rg.call <- paste0(\"regenie \",\n      \"--bed \", bed.file, \" \",\n      \"--phenoFile \", pheno.file, \" \", phenoCols, \" \",\n      \"--covarFile \", covar.file, \" \",\n      \"--bsize 1000 \",\n      \"--step 1 \",\n      mode.rg, \" \", \n      \"--threads \", parallel::detectCores(),\" \",\n      \"--out \", outprefix, rg.suffix\n    )\n    rg.time.suffix <- \"_time.step1\"\n\n  } else { # step 2 for each chromosome\n\n    # regenie call\n    rg.call <- paste0(\"regenie \",\n      \"--bgen \", bgen.file, opt$chr, \".bgen \",\n      \"--phenoFile \", pheno.file, \" \", phenoCols, \" \",\n      \"--covarFile \", covar.file, \" \",\n      \"--bsize 400 \",\n      \"--step 2 \",\n      mode.rg, \" \", \n      \"--threads \", parallel::detectCores(),\" \",\n      \"--pred \", outprefix, rg.suffix, \"_pred.list \",\n      \"--out \", outprefix, rg.suffix, rg.suffix2, \"_chr\", opt$chr\n    )\n    rg.time.suffix <- paste0(\"_time.chr\", opt$chr)\n\n  }\n\n  cat( cmd <- paste0(\"/usr/bin/time -v \", rg.call, \" > \", track.file, \" 2>&1\" ) )\n  t0 <- system.time(system(cmd))\n  write( t0, paste0(outprefix, rg.suffix, rg.suffix2, rg.time.suffix), ncol = length(t0))\n\n  return(NULL)\n}\n\n# fit bolt (step 2 for all chromosomes)\nfit.bolt <- function() {\n\n  phenoNames <- fread(pheno.file) %>% select(-FID,-IID) %>% colnames\n  phenoCol <- phenoNames[opt$pheno]\n  covNames <- fread(covar.file) %>% select(-FID,-IID) %>% colnames\n  split.bgen.files <- paste0(bgen.file, 1:22, \".bgen\")\n\n  bolt.call <- paste0(\"bolt \",\n    \"--bfile=\", bed.file, \"  \",\n    \"--phenoFile=\", pheno.file, \" \",\n    \"--phenoCol=\", phenoCol, \" \",\n    \"--covarFile=\", covar.file, \" \",\n    paste0(\"--qCovarCol=\",covNames, collapse=\" \"), \" \",\n    \"--lmmForceNonInf \",\n    \"--LDscoresUseChip \",\n    \"--numThreads=\", parallel::detectCores(),\" \",\n    \"--statsFile=\", outprefix,\"_phenoCol\", opt$pheno ,\".grm \",\n    \"--predBetasFile=\", outprefix,\"_phenoCol\", opt$pheno ,\".loco \",\n    paste( paste0(\"--bgenFile=\", split.bgen.files), collapse=\" \"), \" \",\n    \"--sampleFile=\", gsub(\"bgen\", \"sample\", split.bgen.files[1]), \" \",\n    \"--statsFileBgenSnps=\", outprefix, \"_phenoCol\", opt$pheno ,\".test\"\n  )\n  bolt.time.suffix <- paste0(\"_time\")\n\n  cat( cmd <- paste0(\"/usr/bin/time -v \", bolt.call, \" > \", track.file, \" 2>&1\" ) )\n  t0 <- system.time(system(cmd))\n  write( t0, paste0(outprefix, \"_phenoCol\", opt$pheno , bolt.time.suffix), ncol = length(t0))\n\n  return(NULL)\n}\n\n# fit saige\nfit.saige <- function() {\n\n  if( !opt$skipNull) { ## step 1\n\n    cov.file <- fread(covar.file) %>% select(-FID,-IID)\n    ncov <- ncol(cov.file)\n    colnames(cov.file) <- paste0(\"V\",1:ncov)\n    dp <- fread(pheno.file)\n    colnames(dp) <- c(\"FID\",\"IID\", paste0(\"P\",1:(ncol(dp)-2)))\n\n    fwrite(cbind(dp, cov.file), paste0(outprefix, \"_phenoCol\", opt$pheno, \".yout.SAIGE\"), \n      sep=\" \", quote=FALSE, na=\"NA\")\n    phenoNames <- dp %>% select(-FID,-IID) %>% colnames\n    phenoCol <- phenoNames[opt$pheno]\n\n    saige.call <- paste0(\"step1_fitNULLGLMM.R \",\n      \"--plinkFile=\", bed.file, \" \",\n      \"--phenoFile=\", outprefix, \"_phenoCol\", opt$pheno, \".yout.SAIGE \",\n      \"--phenoCol=P\", opt$pheno, \" \",\n      \"--covarColList=\", paste0(paste0(\"V\",1:ncov), collapse=\",\"),\" \",\n      \"--sampleIDColinphenoFile=IID \",\n      \"--traitType=binary \",\n      \"--LOCO=TRUE \",\n      \"--minMAFforGRM=0.0001 \",\n      \"--nThreads=\", parallel::detectCores(),\" \",\n      \"--outputPrefix=\", outprefix, \"_phenoCol\", opt$pheno\n    )\n    saige.time.suffix <- \"_time.step1\"\n\n  } else { # step 2 for each chromosome\n\n    fread( paste0(bgen.file, opt$chr, \".sample\") )  %>%\n      select(ID_1) %>%\n      slice(-1) %>%\n      fwrite(file=paste0(outprefix, \"_phenoCol\", opt$pheno, \".sample.SAIGE\"), quote=FALSE, na=\"NA\", col.names=FALSE)\n\n    saige.call <- paste0(\"step2_SPAtests.R \",\n      \"--sampleFile=\", outprefix, \"_phenoCol\", opt$pheno, \".sample.SAIGE \",\n      \"--LOCO=TRUE \",\n      \"--chrom=\", opt$chr, \" \",\n      \"--bgenFile=\", bgen.file, opt$chr,\".bgen \",\n      \"--bgenFileIndex=\", bgen.file, opt$chr,\".bgen.bgi \",\n      \"--GMMATmodelFile=\", outprefix, \"_phenoCol\", opt$pheno, \".rda \",\n      \"--varianceRatioFile=\", outprefix, \"_phenoCol\", opt$pheno, \".varianceRatio.txt \",\n      \"--SAIGEOutputFile=\", outprefix, \"_phenoCol\", opt$pheno, \"_chr\", opt$chr, \".test\"\n    )\n    saige.time.suffix <- paste0(\"_time.chr\", opt$chr)\n  }\n\n  cat( cmd <- paste0(\"/usr/bin/time -v \", saige.call, \" > \", track.file, \" 2>&1\" ) )\n  t0 <- system.time( system(cmd) )\n  write( t0, paste0(outprefix, \"_phenoCol\", opt$pheno, saige.time.suffix), ncol = length(t0))\n\n  return(NULL)\n}\n\n#fastGWA\nfit.fastGWA <- function(){\n\n  fread(pheno.file) %>%\n    fwrite( paste0(outprefix, \".pheno.fastGWA\"), col.names=FALSE, na=NA, sep=\" \",quote=FALSE)\n  fread(covar.file) %>%\n    fwrite( paste0(outprefix, \".covar.fastGWA\"), col.names=FALSE, na=NA, sep=\" \",quote=FALSE)\n  fread(pheno.file) %>% select(FID, IID) %>%\n    fwrite(paste0(outprefix, \".sample.fastGWA\"), col.names=FALSE, na=NA, sep=\" \",quote=FALSE)\n\n  phenoNames <- fread(pheno.file) %>% select(-FID,-IID) %>% colnames\n  phenoCol <- phenoNames[opt$pheno]\n  data.frame( bgen.names = paste0(bgen.file,1:22, \".bgen\") ) %>%\n    fwrite(paste0(outprefix, \"_phenoCol\", opt$pheno, \".bgen.list\"),\n      quote=F, col.names=F)\n\n  fastGWA.call <- paste0(\"gcta64 \",\n    \"--pheno \", outprefix, \".pheno.fastGWA \",\n    \"--mpheno \", opt$pheno, \" \", \n    \"--qcovar \", outprefix, \".covar.fastGWA \",\n    \"--keep \", outprefix, \".sample.fastGWA \",\n    \"--fastGWA-mlm --h2-limit 2.5 --maf 0 --geno 1 \",\n    \"--grm-sparse \", opt$grm, \" \",\n    \"--mbgen \", outprefix, \"_phenoCol\", opt$pheno, \".bgen.list \",\n    \"--sample \", bgen.file, \"1.sample \",\n    \"--threads \", parallel::detectCores(),\" \",\n    \"--out \", outprefix, \"_phenoCol\", opt$pheno, \".test\"\n  )\n\n  cat( cmd <- paste0(\"/usr/bin/time -v \", fastGWA.call, \" > \", track.file, \" 2>&1\" ) )\n  t0 <- system.time( system(cmd) )\n  write( t0, paste0(outprefix, \"_phenoCol\", opt$pheno, \"_time\"), ncol = length(t0))\n\n  return(NULL)\n}\n\n#################################\n# main paths + files\noutprefix <- opt$prefix\n\npheno.file <- opt$phenoFile\ncovar.file <- opt$covarFile\nbed.file <- opt$step1File\nbgen.file <- opt$step2File\n\n## tracking memory usage\ntrack.file <- paste0(outprefix,\"_timing\")\n# all phenos\nif( opt$pheno == 0 ){\n  if(!opt$skipNull) \n    track.file <- paste0(track.file, \"_step1\")\n  else {\n    track.file <- paste0(track.file, \"_step2\")\n    if( opt$chr != 0 ) track.file <- paste0(track.file, \"_chr\", opt$chr)\n  }\n} else { # by phenotype\n  if(!opt$skipNull) \n    track.file <- paste0(track.file, \"_step1_phenoCol\", opt$pheno)\n  else { \n    track.file <- paste0(track.file, \"_step2_phenoCol\", opt$pheno)\n    if( opt$chr != 0 ) track.file <- paste0(track.file, \"_chr\", opt$chr)\n  }\n}\n\nif( opt$method == \"regenie\") {\n  if( !opt$lowmem) track.file <- paste0(track.file, \"_nowrite\")\n  if( opt$loocv) track.file <- paste0(track.file, \"_loocv\")\n  if( opt$skipNull & opt$bt) \n    track.file <- paste0(track.file, ifelse(opt$spa, \"_spa\", ifelse(opt$noapprox, \"_firthexact\",  \"_firth\")))\n}\n\ntrack.file <- paste0(track.file, \".log\")\ncat( paste0(\"Tracking memory in file: \", track.file),\"\\n\")\n\nif( opt$method == \"regenie\") {\n  fit.regenie()\n} else if( opt$method == \"bolt\") {\n  fit.bolt()\n} else if( opt$method == \"saige\") {\n  fit.saige()\n} else if( opt$method == \"fastgwa\") {\n  fit.fastGWA()\n} else {\n  stop(\"Invalid method\")\n}\n\n"
  },
  {
    "path": "scripts/regenie_paper/scripts/std_ffuns.r",
    "content": "#!/usr/bin/Rscript\n\nfig_label <- function(text, region=\"figure\", pos=\"topleft\", cex=NULL, ...) {\n\n  region <- match.arg(region, c(\"figure\", \"plot\", \"device\"))\n  pos <- match.arg(pos, c(\"topleft\", \"top\", \"topright\",\n                          \"left\", \"center\", \"right\",\n                          \"bottomleft\", \"bottom\", \"bottomright\"))\n\n  if(region %in% c(\"figure\", \"device\")) {\n    ds <- dev.size(\"in\")\n    # xy coordinates of device corners in user coordinates\n    x <- grconvertX(c(0, ds[1]), from=\"in\", to=\"user\")\n    y <- grconvertY(c(0, ds[2]), from=\"in\", to=\"user\")\n\n    # fragment of the device we use to plot\n    if(region == \"figure\") {\n      # account for the fragment of the device that\n      # the figure is using\n      fig <- par(\"fig\")\n      dx <- (x[2] - x[1])\n      dy <- (y[2] - y[1])\n      x <- x[1] + dx * fig[1:2]\n      y <- y[1] + dy * fig[3:4]\n    }\n  }\n\n  # much simpler if in plotting region\n  if(region == \"plot\") {\n    u <- par(\"usr\")\n    x <- u[1:2]\n    y <- u[3:4]\n  }\n\n  sw <- strwidth(text, cex=cex) * 60/100\n  sh <- strheight(text, cex=cex) * 60/100\n\n  x1 <- switch(pos,\n    topleft     =x[1] + sw,\n    left        =x[1] + sw,\n    bottomleft  =x[1] + sw,\n    top         =(x[1] + x[2])/2,\n    center      =(x[1] + x[2])/2,\n    bottom      =(x[1] + x[2])/2,\n    topright    =x[2] - sw,\n    right       =x[2] - sw,\n    bottomright =x[2] - sw)\n\n  y1 <- switch(pos,\n    topleft     =y[2] - sh,\n    top         =y[2] - sh,\n    topright    =y[2] - sh,\n    left        =(y[1] + y[2])/2,\n    center      =(y[1] + y[2])/2,\n    right       =(y[1] + y[2])/2,\n    bottomleft  =y[1] + sh,\n    bottom      =y[1] + sh,\n    bottomright =y[1] + sh)\n\n  old.par <- par(xpd=NA)\n  on.exit(par(old.par))\n\n  text(x1, y1, text, cex=cex, ...)\n  return(invisible(c(x,y)))\n}\n\nprep.data <- function(gwasResults){\n\n  gwasResults %>%\n\n    # Compute chromosome size\n    group_by(CHR) %>%\n    summarise(chr_len=max(POS)) %>%\n\n    # Calculate cumulative position of each chromosome\n    mutate(tot=cumsum(as.numeric(chr_len))-chr_len) %>%\n    select(-chr_len) %>%\n\n    # Add this info to the initial dataset\n    left_join(gwasResults, ., by=c(\"CHR\"=\"CHR\")) %>%\n\n    # Add a cumulative position of each SNP\n    arrange(CHR, POS) %>%\n    mutate( POScum=POS+tot)\n}\n\n"
  },
  {
    "path": "scripts/rplot",
    "content": "#!/usr/bin/env Rscript\n\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n# rplot     [Andrey Ziyatdinov]\n#\n# Plot Regenie results\n#\n# rplot uses the Rscript executable to run R code for plotting.\n# Required R libraries: docopt, data.table, dplyr, unglue, \n# ggplot2, scales, ggrepel.\n#\n# Inspired by https://github.com/coolbutuseless/dplyr-cli.\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nsuppressMessages({\n  library(docopt)\n  library(data.table)\n  library(dplyr)\n  library(unglue)\n  library(ggplot2)\n  library(scales)\n  library(ggrepel)\n})\n\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n# configuration for docopt\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\ndoc <- \"rplot\n\nUsage:\n  rplot <command> (--file=FILE) (--out=PREFIX) [--htp]\n  Rscript rplot <command> (--file=FILE) (--out=PREFIX) [--htp]\n  rplot -h | --help\n  rplot lovo -f lovo.regenie.gz -o out\n\nOptions:\n  -h --help               show the help\n  -f FILE --file=FILE     Regenie output file\n  -o PREFIX --out=PREFIX  output prefix\n  --htp                   flag for HTPv4 format\n  \"\n\narg <- docopt(doc)\n\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n# LOVO\n#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nif(arg$command == \"lovo\") {\n  # check inputs\n  stopifnot(!is.null(arg$file))\n  stopifnot(file.exists(arg$file))\n  stopifnot(!is.null(arg$out))\n\n  # Set format of sum stats file\n  if(arg$htp){\n    cols.extract <- c(\"Chr\",\"Pos\",\"Name\", \"Effect\", \"Pval\")\n    fn.pval <- function(x) -log10(x)\n  } else {\n    cols.extract <- c(\"CHROM\",\"GENPOS\", \"ID\", \"BETA\", \"LOG10P\")\n    fn.pval <- function(x) x\n  }\n\n  # read lovo results\n  lovo <- fread(arg$file) %>% as_tibble\n  stopifnot(all(cols.extract %in% names(lovo)))\n  lovo <- lovo %>%\n    select(all_of(cols.extract)) %>%\n    setNames(c(\"CHROM\",\"GENPOS\", \"ID\", \"BETA\", \"LOG10P\")) %>%\n    mutate(\n      LOG10P = fn.pval(LOG10P),\n      ID = unglue_vec(ID, \"{}_{x}\")\n    )\n\n  # full mask will correspond to \"NA\" for ID\n  full.mask.p <- lovo[ is.na(lovo$ID), \"LOG10P\"] %>% pull\n  full.mask.b <- lovo[ is.na(lovo$ID), \"BETA\"] %>% pull\n  stopifnot(length(full.mask.p) == 1)\n  lovo <- lovo[!is.na(lovo$ID),]\n  # get chr\n  chr <- lovo$CHROM[1]\n  stopifnot(all(lovo$CHROM == chr))\n\n  # extract variant with extreme LOG10P\n  lovo$score <- full.mask.p - lovo$LOG10P\n  med <- median(lovo$score)\n  iqb <- quantile(lovo$score, c(0.25,0.75))\n  iqr <- diff(iqb)\n  min.b <- iqb[1] - 1.5 * iqr\n  max.b <- iqb[2] + 1.5 * iqr\n  lovo_top <- lovo[ (lovo$score < min.b) | (lovo$score > max.b),]\n  # if no extreme variants, choose the one with largest p-value\n  if(nrow(lovo_top) == 0) lovo_top = lovo %>% slice_max(order_by = score)\n  lovo_top <- lovo_top %>% distinct\n  # limit to top 3 for smaller & top 1 for larger (should be less common) \n  top3.max <- lovo_top %>%\n    slice_max(n=3, order_by = score)\n  top1.min <- lovo_top %>%\n    slice_min(n=1, order_by = score)\n  lovo_top <- rbind(top1.min, top3.max) %>% distinct\n\n\n  # plot p-values\n  p <- ggplot(lovo, aes(GENPOS, LOG10P)) +\n    geom_point(size = 2) +\n    geom_label_repel(data = lovo_top, aes(GENPOS, LOG10P, label = ID))\n\n  # Add line for full mask\n  p <- p +\n    geom_hline(yintercept = full.mask.p, col=\"red\")\n\n  p <- p +  scale_x_continuous(labels = comma) +\n    labs(\n      x = paste0(\"Genomic position on chomosome \", chr),\n      y = bquote(\"Observed p-value (on -\"*log[10]~\"scale)\")\n      ) +\n    theme_minimal()\n\n  # save\n  f_out <- paste0(arg$out, \".lovo.png\")\n  ggsave(f_out, plot = p, dpi = 100)\n\n\n  # plot effect sizes\n  p <- ggplot(lovo, aes(GENPOS, BETA)) +\n    geom_point(size = 2) +\n    geom_label_repel(data = lovo_top, aes(GENPOS, BETA, label = ID))\n\n  # Add line for full mask\n  p <- p +\n    geom_hline(yintercept = full.mask.b, col=\"red\")\n\n  p <- p +  scale_x_continuous(labels = comma) +\n    labs(\n      x = paste0(\"Genomic position on chomosome \", chr),\n      y = \"Estimated effect\"\n    ) +\n    theme_minimal()\n\n  # save\n  f_out <- paste0(arg$out, \".lovo_beta.png\")\n  ggsave(f_out, plot = p, dpi = 100)\n} \n"
  },
  {
    "path": "src/Data.cpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include <limits.h> /* for PATH_MAX */\n#include <chrono>\n\n#if defined(__GNUC__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmisleading-indentation\"\n#pragma GCC diagnostic ignored \"-Wint-in-bool-context\"\n#pragma GCC diagnostic ignored \"-Wparentheses\"\n#endif\n#include <boost/filesystem.hpp>\n#include <boost/exception/all.hpp>\n#if defined(__GNUC__)\n#pragma GCC diagnostic pop\n#endif\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Joint_Tests.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"Pheno.hpp\"\n#include \"MultiTrait_Tests.hpp\"\n#include \"Ordinal.hpp\"\n#include \"HLM.hpp\"\n#include \"SKAT.hpp\"\n#include \"Interaction.hpp\"\n#include \"Masks.hpp\"\n#include \"Data.hpp\"\n\n#ifdef WITH_HTSLIB\n#include \"remeta/regenie_ld_matrix_writer.hpp\"\n#endif\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\nnamespace fs = boost::filesystem;\n\n\nusing boost::math::normal;\nusing boost::math::chi_squared;\n\nData::Data() { // @suppress(\"Class members should be properly initialized\")\n}\n\nData::~Data() {\n  // TODO Auto-generated destructor stub\n}\n\n\nvoid Data::run() {\n\n  // set number of threads\n  set_threads(&params);\n\n  if(params.streamBGEN) check_bgen(files.bgen_file, params.file_type, params.zlib_compress, params.streamBGEN, params.BGENbits, params.nChrom);\n\n  if(params.test_mode){  // step 2\n    run_step2();\n  } else {  // step 1\n    run_step1();\n  }\n\n}\n\nvoid Data::run_step1(){\n\n  sout << \"Fitting null model\\n\";\n\n  // set up file for reading\n  file_read_initialization();\n  // if splitting l0 into many jobs\n  if(params.split_l0) set_parallel_l0();\n  // read phenotype and covariate files\n  read_pheno_and_cov(&files, &params, &in_filters, &pheno_data, &m_ests, &Gblock, sout);\n  // adjust for covariates\n  prep_run(&files, &in_filters, &params, &pheno_data, &m_ests, sout);\n  // set number of blocks and block size and ridge parameters\n  set_blocks();\n  // some initializations\n  setmem();\n  // level 0\n  level_0_calculations();\n  // print y/x/logreg offset used for level 1 \n  if(params.debug) write_inputs();\n  // prep for level 1 models\n  prep_l1_models();\n  // level 1 ridge\n  if(params.trait_mode == 0){ // QT\n    if(params.use_loocv) ridge_level_1_loocv(&files, &params, &pheno_data, &l1_ests, sout);\n    else ridge_level_1(&files, &params, &pheno_data, &l1_ests, sout);\n  } else if(params.trait_mode == 1){ // BT\n    if(params.use_loocv) ridge_logistic_level_1_loocv(&files, &params, &pheno_data, &m_ests, &l1_ests, sout);\n    else ridge_logistic_level_1(&files, &params, &pheno_data, &l1_ests, masked_in_folds, sout);\n  } else if(params.trait_mode == 2){ // CT\n    if(params.use_loocv) ridge_poisson_level_1_loocv(&files, &params, &pheno_data, &m_ests, &l1_ests, sout);\n    else ridge_poisson_level_1(&files, &params, &pheno_data, &l1_ests, masked_in_folds, sout);\n  } else if(params.trait_mode == 3){ // T2E\n    ridge_cox_level_1(&files, &params, &pheno_data, &l1_ests, &m_ests, sout);\n  }\n  // output results\n  output();\n\n}\n\nvoid Data::run_step2(){\n\n  // allocate per thread if using OpenMP\n  Gblock.thread_data.resize(params.neff_threads);\n\n  if(params.getCorMat) ld_comp();\n  else if( params.snp_set ) test_joint();\n  else if (params.trait_set) test_multitrait();\n  else if (params.multiphen) test_multiphen();\n  else test_snps_fast();\n\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          read in files\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::file_read_initialization() {\n\n  // prepare genotype data\n  files.chr_counts.assign(params.nChrom, 0.0);\n\n  // for l0 in parallel\n  if(params.run_l0_only) prep_parallel_l0();\n\n  if( params.condition_snps )\n    get_conditional_vars(in_filters.condition_snp_names, &files, &params, sout);\n\n  if(params.file_type == \"bed\") read_bed_bim_fam(&files, &params, &in_filters, snpinfo, chr_map, sout);\n  else if(params.file_type == \"pgen\") read_pgen_pvar_psam(&files, &params, &in_filters, &Gblock, snpinfo, chr_map, sout);\n  else prep_bgen(&files, &params, &in_filters, snpinfo, chr_map, Gblock.bgen, sout);\n\n  params.nvs_stored = snpinfo.size();\n  //if(params.getCorMat) params.block_size = params.n_variants;\n\n  if(!params.test_mode && !params.force_run && ((int)params.nvs_stored > params.max_step1_variants))\n    throw \"it is not recommened to use more than \" + to_string( params.max_step1_variants ) + \n      \" variants in step 1 (otherwise use '--force-step1'). \" + params.webinfo ;\n\n  if( params.setMinINFO && !params.dosage_mode )\n    sout << \"WARNING: Dosages are not present in the genotype file. Option --minINFO is skipped.\\n\";\n\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          adjust for covariates in G\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\n// only for step 1\nvoid Data::residualize_genotypes() {\n\n  sout << \"   -residualizing and scaling genotypes...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n\n  // mask missing individuals\n  Gblock.Gmat.array().rowwise() *= in_filters.ind_in_analysis.matrix().transpose().array().cast<double>();\n\n  // residuals (centered)\n  MatrixXd beta = Gblock.Gmat * pheno_data.new_cov;\n  Gblock.Gmat -= beta * pheno_data.new_cov.transpose();\n\n  // scaling (use [N-C] where C=#covariates)\n  scale_G = Gblock.Gmat.rowwise().norm() / sqrt(params.n_analyzed - params.ncov);\n\n  // check sd\n  MatrixXd::Index minIndex;\n  if(scale_G.array().minCoeff(&minIndex) < params.numtol) \n    throw \"!! Uh-oh, SNP \" + snpinfo[in_filters.step1_snp_count+minIndex].ID + \n      \" has low variance (=\" + to_string( scale_G(minIndex,0) ) + \").\";\n\n  Gblock.Gmat.array().colwise() /= scale_G.array();\n\n  // to use MAF dependent prior on effect size [only for step 1]\n  // multiply by [p*(1-p)]^(1+alpha)/2\n  if(params.alpha_prior != -1) \n    Gblock.Gmat.array().colwise() *= pow(Gblock.snp_afs.col(0).array() * (1-Gblock.snp_afs.col(0).array()), 0.5 * (params.alpha_prior + 1) );\n\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          step 1: prepare for level 0\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::set_parallel_l0(){\n\n  // compute the number of blocks\n  set_blocks();\n\n  // Make master file for L0 jobs\n  write_l0_master();\n\n  // exit software\n  exit_early();\n}\n\nvoid Data::write_l0_master(){\n\n  string fout = files.split_file + \".master\";\n\n  sout << \" * running level 0 in parallel across \" << params.total_n_block << \" genotype blocks\\n\";\n\n  if(params.njobs <= 1)\n    throw \"number of jobs must be >1.\";\n  else if(params.njobs > params.total_n_block){\n\n    sout << \"   -WARNING: Number of jobs cannot be greater than number of blocks.\\n\";\n    params.njobs = params.total_n_block;\n\n  }\n\n  sout << \"   -using \" << params.njobs << \" jobs\\n\";\n  sout << \"   -master file written to [\" << fout << \"]\\n\";\n  sout << \"   -variant list files written to [\" << files.split_file << \"_job*.snplist]\\n\";\n\n  // open master\n  ofstream ofile;\n  openStream(&ofile, fout, ios::out, sout);\n\n  // header\n  ofile << params.nvs_stored << \" \" << params.block_size << endl;\n\n  // split blocks in chunks of ~B/njobs\n  int nall = params.total_n_block / params.njobs;\n  int remainder = params.total_n_block - nall * params.njobs;\n  int nb = 0, bs, ns = 0, bcount = 0, scount = 0, jcount = 0;\n  int btarget = nall + (jcount < remainder ? 1 : 0);\n  map<int, vector<int> >::iterator itr;\n\n  for (itr = chr_map.begin(); itr != chr_map.end(); ++itr) {\n    int chrom_nsnps = itr->second[0];\n    int chrom_nb = ceil(chrom_nsnps * 1.0 / params.block_size);\n    if(chrom_nb == 0) continue;\n\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n\n      get_block_size(params.block_size, chrom_nsnps, bb, bs);\n\n      ns+=bs;\n      nb++, bcount++;\n\n      if( nb == btarget ){\n        string fname = files.split_file + \"_job\" + to_string( jcount+1 );\n        // write in master\n        ofile << fname << \" \" << btarget << \" \" << ns << endl;\n        // write snplist\n        writeSnplist(fname, scount, ns, snpinfo, sout);\n\n        jcount++;\n        scount += ns;\n        ns = nb = 0;\n        btarget = nall + (jcount < remainder ? 1 : 0);\n      }\n    }\n  }\n\n  if((bcount != params.total_n_block) || (jcount !=params.njobs))\n    throw \"could not create master file.\";\n\n  ofile.close();\n\n}\n\nvoid Data::set_blocks() {\n\n  params.total_n_block = 0, total_chrs_loco = 0;\n  int blocks_left = params.n_block;\n  map<int, vector<int> >::iterator itr;\n  map<int, vector<int> > m1;\n\n  // compute number of blocks for each chromosome\n  for (itr = chr_map.begin(); itr != chr_map.end(); ++itr) {\n    int chrom_nsnps = itr->second[0];\n    int nb = ceil(chrom_nsnps * 1.0 / params.block_size);\n    if(params.n_block > 0) {\n      if(blocks_left > 0) {\n        int minb = min(nb, blocks_left);\n        //sout << << endl;\n        itr->second[1] = minb;\n        params.total_n_block += minb;\n        blocks_left -= minb;\n      }\n    } else {\n      itr->second[1] = nb;\n      params.total_n_block += nb;\n    }\n\n    // track how many chromosome will have blups\n    if(itr->second[1] > 0) total_chrs_loco++;\n    m1[ itr->first ] = itr->second;\n  }\n  chr_map = m1;\n  //sout << \"#chrs = \"<< chr_map.size() << \";#loco chrs = \"<< total_chrs_loco << endl;\n\n  if(params.total_n_block == 0)\n    throw \"total number of blocks must be > 0.\";\n\n  if(params.split_l0) return;\n  else if(params.run_l0_only) {\n    if((params.parallel_nBlocks != params.total_n_block) || (params.parallel_nSnps!= (int)params.n_variants))\n      throw \"number of variants/blocks in file (=\" + to_string(params.parallel_nSnps) + \"/\" + to_string(params.total_n_block) +\n        \") don't match with that in master file (=\" + to_string(params.n_variants) + \"/\" + to_string(params.parallel_nBlocks) +\").\";\n  } else if(params.run_l1_only) prep_parallel_l1();\n\n  // for BTs: check if the sample size is lower than 5K (if so, force loocv)\n  if( (params.trait_mode == 1) && !params.use_loocv && ( params.n_analyzed < 5000) ) {\n    sout << \"   -WARNING: Sample size is less than 5,000 so using LOOCV instead of \" << params.cv_folds << \"-fold CV.\\n\";\n    params.use_loocv = true;\n  }\n\n  /*\n  // check block size vs sample size\n  if(params.use_loocv && params.block_size > params.n_analyzed)\n    throw \"block size must be smaller than the number of samples to perform LOOCV!\";\n  */\n  if(params.use_loocv) params.cv_folds = params.n_samples;\n\n  uint32_t neff_folds = params.use_loocv ? params.n_analyzed : params.cv_folds;\n\n  // summarize block sizes and ridge params\n  sout << left << std::setw(20) << \" * # threads\" << \": [\" << params.threads << \"]\\n\";\n  sout << left << std::setw(20) << \" * block size\" << \": [\" << params.block_size << \"]\\n\";\n  sout << left << std::setw(20) << \" * # blocks\" << \": [\" << params.total_n_block << \"] for \" << params.nvs_stored << \" variants\\n\";\n  sout << left << std::setw(20) << \" * # CV folds\" << \": [\" << neff_folds << \"]\\n\";\n\n  if(!params.run_l1_only){\n    IOFormat Fmt(FullPrecision, DontAlignCols, \" \", \" \", \"\", \"\",\"\",\"\");\n    sout << left << std::setw(20) << \" * ridge data_l0\" << \": [ \" << params.n_ridge_l0 << \" : \" << params.lambda.format(Fmt) << \" ]\\n\";\n  }\n\n  if(!params.run_l0_only){\n    IOFormat Fmt(FullPrecision, DontAlignCols, \" \", \" \", \"\", \"\",\"\",\"\");\n    sout << left << std::setw(20) << \" * ridge data_l1\" << \": [ \" << params.n_ridge_l1 << \" : \" << params.tau[0].format(Fmt) << \" ]\\n\";\n  }\n\n  // if using maf dependent prior\n  if(!params.test_mode && (params.alpha_prior != -1) ) \n    sout << \" * applying a MAF dependent prior to the SNP effect sizes in level 0 models (alpha=\" << params.alpha_prior << \")\\n\";\n\n  // print approx. amount of memory needed\n  print_usage_info(&params, &files, sout);\n\n  // storing null estimates from firth\n  if(params.write_null_firth ) \n    sout << \" * writing null Firth estimates to file\\n\";\n\n  // if within sample predictions are used in level 1\n  if (params.within_sample_l0) \n    sout << \" * using within-sample predictions from level 0 as features at level 1\\n\";\n\n}\n\n\nvoid Data::set_folds() {\n\n  // set up folds\n  params.cv_sizes.resize(params.cv_folds, 1);\n\n  // assign folds for individuals in analysis\n  if( !params.use_loocv ){\n\n    uint32_t target_size_folds = floor( params.n_analyzed / params.cv_folds );\n    if( target_size_folds < 1 )\n      throw \"not enough samples are present for \" + to_string( params.cv_folds ) + \"-fold CV.\";\n\n    uint32_t n_non_miss = 0, cum_size_folds = 0;\n    int cur_fold = 0;\n    for(size_t i = 0; i < params.n_samples; i++){\n\n      if( in_filters.ind_in_analysis(i) ) n_non_miss++;\n\n      if( n_non_miss == target_size_folds){\n        params.cv_sizes(cur_fold) = i - cum_size_folds + 1;\n        cum_size_folds += params.cv_sizes(cur_fold);\n        n_non_miss = 0, cur_fold++;\n      } else if( cur_fold == (params.cv_folds - 1) ){\n        params.cv_sizes(cur_fold) = params.n_samples - i;\n        break;\n      }\n\n      //sout << i << \" \" << cur_fold << \" \" << n_non_miss << \" \" << in_filters.ind_in_analysis(i) << \" \"<< target_size_folds << endl;\n    }\n\n  } else // loocv\n    params.cv_sizes = ArrayXi::Constant(params.cv_folds, 1);\n\n\n  // check sd(Y) in folds\n  if(!params.use_loocv && params.trait_mode){\n\n    int minIndex;\n    uint32_t cum_size_folds = 0;\n    ArrayXd sum, n_cv, sd_phenos;\n\n    for(int i = 0; i < params.cv_folds; i++) {\n\n      MatrixXb M = pheno_data.masked_indivs.middleRows(cum_size_folds, params.cv_sizes(i)); // nxp\n      MatrixXd Y = (pheno_data.phenotypes_raw.middleRows(cum_size_folds, params.cv_sizes(i)).array() * M.array().cast<double>()).matrix().transpose(); // pxn\n\n      sum = params.pheno_pass.select( Y.array().rowwise().sum() , 10);\n\n      // BTs\n      if(params.trait_mode == 1){\n        n_cv = params.pheno_pass.select( M.transpose().array().rowwise().count().cast<double>() , 100);\n        sd_phenos = (sum/n_cv) * (1 - sum/n_cv);\n\n        if( sd_phenos.minCoeff(&minIndex) < params.numtol )\n          throw \"one of the folds has only cases/controls for phenotype '\" + files.pheno_names[minIndex] \n            + \"'. Either use smaller #folds (option --cv) or use LOOCV (option --loocv).\";\n      } else if(params.trait_mode == 2){\n\n        if( sum.minCoeff(&minIndex) == 0 )\n          throw \"one of the folds has only zero counts for phenotype '\" + files.pheno_names[minIndex] \n            + \"'. Either use smaller #folds (option --cv) or use LOOCV (option --loocv).\";\n      }\n\n      cum_size_folds += params.cv_sizes(i);\n    }\n\n  }\n\n  // only used for K-fold CV\n  if(!params.use_loocv && !params.within_sample_l0){\n    l1_ests.X_folds.resize(params.cv_folds);\n    l1_ests.XtY.resize(params.cv_folds);\t\n  }\n\n}\n\n\nvoid Data::setmem() {\n\n  bool is_set = false;\n  sout << \" * setting memory...\" << flush;\n\n  set_folds();\n  l1_ests.cumsum_values.resize(6);\n  if(params.test_l0) l1_ests.cumsum_values_full.resize(6);\n  predictions.resize(1);\n  predictions[0] = MatrixXd::Zero(params.n_samples, total_chrs_loco);\n\n  if (params.within_sample_l0) {\n    l1_ests.pred_mat.resize(params.n_pheno);\n    l1_ests.pred_pheno.resize(params.n_pheno);\n  } else if(!params.use_loocv) l1_ests.beta_hat_level_1.resize(params.n_pheno);\n\n  if (params.use_loocv || params.trait_mode == 3) {\n    l1_ests.test_mat_conc.resize(params.n_pheno);\n    l1_ests.fold_id.resize(params.n_pheno);\n  } else {\n    l1_ests.test_pheno.resize(params.n_pheno);\n    l1_ests.test_mat.resize(params.n_pheno);\n  }\n\n  if(params.trait_mode){ // non-QT\n    if (params.within_sample_l0) {\n      l1_ests.pred_pheno_raw.resize(params.n_pheno);\n      l1_ests.pred_offset.resize(params.n_pheno);\n    }\n    l1_ests.test_pheno_raw.resize(params.n_pheno);\n    if(!params.use_loocv) l1_ests.test_offset.resize(params.n_pheno);\n  }\n  masked_in_folds.resize(params.cv_folds);\n  if(params.print_block_betas) {\n    if(params.n_pheno>1) throw \"cannot have run --print in multi-trait mode!\";\n    params.beta_print_out.resize(params.n_pheno);\n    l1_ests.beta_snp_step1.resize(params.n_variants, params.n_ridge_l0);\n  }\n\n  for(int i = 0; i < params.n_pheno; ++i ) {\n\n    if( !params.pheno_pass(i) && (!params.write_l0_pred || (i!=0)) ) continue;\n\n    if (params.within_sample_l0) {\n      l1_ests.pred_mat[i].resize(params.cv_folds);\n      l1_ests.pred_pheno[i].resize(params.cv_folds);\n    } else if(!params.use_loocv) l1_ests.beta_hat_level_1[i].resize(params.cv_folds);\n\n    if (params.use_loocv || params.trait_mode == 3) {\n      l1_ests.test_mat_conc[i] = MatrixXd::Zero(params.n_samples, params.n_ridge_l0 * ( params.write_l0_pred ? 1 : params.total_n_block) );\n      l1_ests.fold_id[i].resize(params.n_samples);\n    } else {\n      l1_ests.test_pheno[i].resize(params.cv_folds);\n      l1_ests.test_mat[i].resize(params.cv_folds);\n    }\n    \n    if(params.trait_mode) {\n      if (params.within_sample_l0) {\n        l1_ests.pred_pheno_raw[i].resize(params.cv_folds);\n        l1_ests.pred_offset[i].resize(params.cv_folds);\n      }\n      l1_ests.test_pheno_raw[i].resize(params.cv_folds);\n      if(!params.use_loocv) l1_ests.test_offset[i].resize(params.cv_folds);\n    }\n\n    for(int j = 0; j < params.cv_folds; ++j ) {\n\n      if (params.within_sample_l0) {\n        l1_ests.pred_mat[i][j] = MatrixXd::Zero(params.n_samples - params.cv_sizes(j), params.total_n_block * params.n_ridge_l0);\n        l1_ests.pred_pheno[i][j] = MatrixXd::Zero(params.n_samples - params.cv_sizes(j), 1);\n      } else if(!params.use_loocv) l1_ests.beta_hat_level_1[i][j] = MatrixXd::Zero(params.total_n_block * params.n_ridge_l0, params.n_ridge_l1);\n\n      if(!params.use_loocv && params.trait_mode != 3) {\n        l1_ests.test_pheno[i][j] = MatrixXd::Zero(params.cv_sizes(j), 1);\n        l1_ests.test_mat[i][j] = MatrixXd::Zero(params.cv_sizes(j), params.n_ridge_l0 * ( params.write_l0_pred ? 1 : params.total_n_block));\n      }\n\n      if(params.trait_mode) {\n        if (params.within_sample_l0) {\n          l1_ests.pred_pheno_raw[i][j] = MatrixXd::Zero(params.n_samples - params.cv_sizes(j), 1);\n          l1_ests.pred_offset[i][j] = MatrixXd::Zero(params.n_samples - params.cv_sizes(j), 1);\n        }\n        l1_ests.test_pheno_raw[i][j] = MatrixXd::Zero(params.cv_sizes(j), 1);\n        if(!params.use_loocv && params.trait_mode != 3) l1_ests.test_offset[i][j] = MatrixXd::Zero(params.cv_sizes(j), 1);\n      }\n\n    }\n\n    if(!is_set) {// only done once\n      for(int j = 0; j < params.cv_folds; ++j ) \n        masked_in_folds[j] = MatrixXb::Constant(params.cv_sizes(j), params.n_pheno, false);\n      is_set = true;\n    }\n\n\n  }\n\n  sout << \"done\\n\\n\";\n\n}\n\nvoid Data::get_block_size(int const& target, int const& total, int const& block, int& bs){\n\n  if( ((block + 1) * target) > total)\n    bs = total - (block * target);\n  else\n    bs = target;\n\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          step 1: level 0\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::level_0_calculations() {\n\n  if(params.run_l1_only) {\n    set_mem_l1(&files, &params, &in_filters, &m_ests, &Gblock, &pheno_data, &l1_ests, masked_in_folds, sout);\n    sout << \" (skipping to level 1 models)\";\n    return;\n  }\n\n  int block = 0, bs; \n  if(params.print_block_betas) params.print_snpcount = 0;\n  ridgel0 l0;\n\n  // set ridge params\n  params.lambda = (params.run_l0_only ? params.parallel_nGeno : params.n_variants) * (1 - params.lambda) / params.lambda;\n\n  if(!params.use_loocv){\n    l0.G_folds.resize(params.cv_folds);\n    l0.GtY.resize(params.cv_folds);\n  }\n\n  // open streams to write level 0 predictions\n  if(params.write_l0_pred){\n    string fout_p;\n    files.write_preds_files.resize(params.n_pheno);\n    for(int ph = 0; ph < params.n_pheno; ph++){\n      if( !params.pheno_pass(ph) ) continue;\n      files.write_preds_files[ph] = std::make_shared<ofstream>();\n      fout_p = files.loco_tmp_prefix + \"_l0_Y\" + to_string(ph+1);\n      openStream(files.write_preds_files[ph].get(), fout_p, ios::out | ios::binary, sout);\n    }\n  }\n\n  if(params.test_l0){\n    l0.ymat_res = pheno_data.phenotypes;\n    l1_ests.top_snp_pgs.assign(params.nChrom + 1, MatrixXd::Zero(params.n_samples, params.n_pheno));\n    l0.nspns_picked = l0.nspns_picked_block = ArrayXi::Zero(params.n_pheno);\n    if(params.l0_snp_pval_thr < 0)\n      params.l0_snp_pval_thr = 0.05 / min((uint)1e6, params.n_variants);\n    sout << \" * p-value threshold for selecting top SNPs in level 0 blocks = \" <<  params.l0_snp_pval_thr << \"\\n\\n\";\n  }\n\n  // start level 0\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    int chrom_nsnps = chr_map[chrom][0];\n    int chrom_nb = chr_map[chrom][1];\n    if(chrom_nb == 0) continue;\n\n    sout << \"Chromosome \" << chrom << endl;\n    //sout << \"Ns=\"<< chrom_nsnps << endl;\n\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n\n      get_block_size(params.block_size, chrom_nsnps, bb, bs);\n\n      Gblock.Gmat = MatrixXd::Zero(bs, params.n_samples);\n      if(params.alpha_prior != -1) Gblock.snp_afs = MatrixXd::Zero(bs, 1);\n\n      get_G(block, bs, chrom, in_filters.step1_snp_count, snpinfo, &params, &files, &Gblock, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, sout);\n\n      // residualize and scale genotypes\n      residualize_genotypes();\n\n      // calc working matrices for ridge regressions across folds\n      calc_cv_matrices(&l0);\n\n      // test association for block\n      if(params.test_l0)\n        test_assoc_block(chrom, block, l0, l1_ests, &Gblock, &pheno_data, &snpinfo[in_filters.step1_snp_count], params, sout);\n\n      // calc level 0 ridge regressions\n      if(params.use_loocv)\n        ridge_level_0_loocv(block, &files, &params, &in_filters, &m_ests, &Gblock, &pheno_data, snpinfo, &l0, &l1_ests, sout);\n      else\n        ridge_level_0(block, &files, &params, &in_filters, &m_ests, &Gblock, &pheno_data, snpinfo, &l0, &l1_ests, masked_in_folds, sout);\n\n      if(params.print_block_betas && params.use_loocv) // keep on raw scale\n        l1_ests.beta_snp_step1.middleRows(in_filters.step1_snp_count, bs).array().colwise() /= scale_G.array() / pheno_data.scale_Y(0);\n\n      block++; in_filters.step1_snp_count += bs;\n    }\n  }\n\n  // close streams\n  if(params.write_l0_pred) {\n    for(int ph = 0; ph < params.n_pheno; ph++){\n      if( !params.pheno_pass(ph) ) continue;\n      if(files.write_preds_files[ph]->is_open()) files.write_preds_files[ph]->close();\n    }\n  }\n  \n  if(params.test_l0) {\n    if(params.use_loocv) {\n    } else {\n      uint32_t cum_size_folds = 0;\n      for(int ph = 0; ph < params.n_pheno; ph++)\n        for(int i = 0; i < params.cv_folds; ++i ) {\n          l1_ests.test_pheno[ph][i] = pheno_data.phenotypes.block(cum_size_folds, ph, params.cv_sizes(i), 1) - l1_ests.top_snp_pgs[0].block(cum_size_folds, ph, params.cv_sizes(i), 1);\n          cum_size_folds += params.cv_sizes(i);\n        }\n    }\n  }\n\n  if(params.early_exit) {\n    sout << \"\\nDone printing out level 0 predictions. There are \" <<\n      params.n_samples << \" rows and \" <<\n      params.total_n_block * params.n_ridge_l0 << \" columns \" <<\n      \"stored in column-major order. Exiting...\\n\";\n    exit_early();\n  } else if(params.run_l0_only) {\n    sout << \"\\nDone writing level 0 predictions to file.\\n\";\n    exit_early();\n  }\n  if(params.test_l0) sout << \"\\n* # picked top SNPs at level 0 for each trait = [ \" << l0.nspns_picked.matrix().transpose() << \" ]\\n\";\n\n  // free up memory not used anymore\n  Gblock.Gmat.resize(0,0);\n  if(params.write_l0_pred && (params.n_pheno > 1) ){\n    // free level 0 predictions for (P-1) indices in test_mat\n    for(int ph = 1; ph < params.n_pheno; ++ph ) {\n      if( !params.pheno_pass(ph) ) continue;\n      if((!params.use_loocv) && (params.trait_mode != 3)){ // k-fold\n        for(int i = 0; i < params.cv_folds; ++i ) \n          l1_ests.test_mat[ph][i].resize(0,0);\n        l1_ests.test_mat[ph].resize(0);\n      } else {\n        l1_ests.test_mat_conc[ph].resize(0,0); // loocv\n      }\n    }\n  }\n}\n\nvoid Data::calc_cv_matrices(struct ridgel0* l0) {\n\n  sout << \"   -calc working matrices...\" << flush;\n  auto t2 = std::chrono::high_resolution_clock::now();\n  int bs = Gblock.Gmat.rows();\n\n  if(!params.use_loocv){ // k-fold\n\n    l0->GGt.setZero(bs,bs);\n    l0->GTY.setZero(bs,params.n_pheno);\n    uint32_t cum_size_folds = 0;\n\n    for( int i = 0; i < params.cv_folds; ++i ) {\n      MapMatXd Gmat (&(Gblock.Gmat(0,cum_size_folds)), bs, params.cv_sizes(i));\n      if(params.test_l0)\n        l0->GtY[i] = Gmat * l0->ymat_res.middleRows(cum_size_folds, params.cv_sizes(i));\n      else\n        l0->GtY[i] = Gmat * pheno_data.phenotypes.middleRows(cum_size_folds, params.cv_sizes(i));\n      l0->GTY += l0->GtY[i];\n      l0->G_folds[i] = Gmat * Gmat.transpose();\n      l0->GGt += l0->G_folds[i];\n      cum_size_folds += params.cv_sizes(i);\n    }\n    \n  } else { // loocv\n\n    l0->GGt.setZero(bs,bs);\n    l0->GGt.selfadjointView<Lower>().rankUpdate(Gblock.Gmat);\n    l0->GGt.triangularView<Eigen::Upper>() = l0->GGt.transpose(); // fill upper-triangular part\n    if(params.test_l0)\n      l0->GTY = Gblock.Gmat * l0->ymat_res;\n    else\n      l0->GTY = Gblock.Gmat * pheno_data.phenotypes;\n    if(!params.test_l0){\n      SelfAdjointEigenSolver<MatrixXd> esG(l0->GGt);\n      l0->GGt_eig_vec = esG.eigenvectors();\n      l0->GGt_eig_val = esG.eigenvalues();\n      l0->Wmat = l0->GGt_eig_vec.transpose() * l0->GTY;\n    }\n\n  }\n\n  sout << \"done\";\n  auto t3 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n}\n\n// select which level 0 predictors to use at level 1\nvoid Data::prep_l1_models(){\n\n  int bs_l1 = params.total_n_block * params.n_ridge_l0;\n  // arrayxb of which level 0 predictors to keep (default is all)\n  l1_ests.l0_colkeep = MatrixXb::Constant(bs_l1, params.n_pheno, true);\n\n  if(params.select_l0){\n    // read in pvals for level 0 blocks\n    int lineread = 0;\n    string line;\n    std::vector< string > tmp_str_vec ;\n    Files fClass;\n    fClass.openForRead(params.l0_pvals_file, sout);\n\n    l1_ests.l0_pv_block.resize(params.total_n_block, params.n_pheno);\n    l1_ests.chrom_block.resize(params.total_n_block);\n\n    while( fClass.readLine(line) ){\n      tmp_str_vec = string_split(line,\" \");\n      if(lineread >= params.total_n_block)\n        throw \"number of blocks in file is greater than that analyzed in run.\";\n      l1_ests.chrom_block(lineread) = atoi(tmp_str_vec[0].c_str());\n      if( (int)tmp_str_vec.size() > (params.n_pheno + 2))\n        throw \"number of phenotypes in file is greater than that analyzed in run.\";\n      for(int i = 0; i < params.n_pheno; i++)\n        l1_ests.l0_pv_block(lineread, i) = convertDouble(tmp_str_vec[i + 2], &params, sout);\n      lineread++;\n    }\n  }\n\n  // set ridge params\n  ArrayXd base_tau = params.tau[0];\n  params.tau.assign(params.n_pheno, base_tau);\n\n  // for chr map\n  l1_ests.chrom_map_ndiff = ArrayXi::Zero(params.nChrom);\n\n}\n\n// identify which block to analyze\nvoid Data::prep_parallel_l0(){\n\n  int tmpi;\n  string line, fin = files.split_file; // master file\n  std::vector< string > tmp_str_vec ;\n  ifstream infile;\n\n  // print info\n  sout << \" * running jobs in parallel (job #\" << params.job_num << \")\\n\";\n\n  openStream(&infile, fin, ios::in, sout);\n\n  // check header\n  if(!getline(infile, line))\n    throw \"cannot read header line in master file.\"; \n\n  if( (sscanf( line.c_str(), \"%d %d\", &params.parallel_nGeno, &tmpi ) != 2) || (tmpi != params.block_size) )\n    throw \"invalid header line in master file.\"; \n\n  // skip to line job_num\n  int nskip=1;\n  while( (nskip++ < params.job_num) && !infile.eof() )\n    infile.ignore(std::numeric_limits<std::streamsize>::max(), '\\n');\n\n  if( (--nskip != params.job_num) || infile.eof() )\n    throw \"could not read line \" + to_string( params.job_num+1 ) + \" (check number of lines in file).\";\n  \n  // read in line\n  getline(infile, line);\n  char tmp_chr[MAXFILELEN];\n\n  if( sscanf( line.c_str(), \"%s %d %d\", tmp_chr, &params.parallel_nBlocks, &params.parallel_nSnps ) != 3 )\n    throw \"could not read line \" + to_string( params.job_num + 1 ) + \" (check number of lines and format in file).\"; \n\n  files.loco_tmp_prefix = tmp_chr;\n  files.file_snps_include.resize(1);\n  files.file_snps_include[0] = files.loco_tmp_prefix + \".snplist\";\n\n  infile.close();\n  //cerr << files.loco_tmp_prefix << \" \" << params.parallel_nBlocks << \" \" << params.parallel_nSnps << endl;\n\n}\n\n\nvoid Data::prep_parallel_l1(){\n\n  int nblocks, lineread, nb ,ns; // make sure all blocks are read\n  uint32_t nsnps;\n  string line;\n  string fin = files.split_file; // master file\n  std::vector< string > tmp_str_vec ;\n  ifstream infile;\n\n  openStream(&infile, fin, ios::in, sout);\n\n  // check header\n  if(!getline(infile, line))\n    throw \"cannot read header line in master file.\"; \n  if( (sscanf( line.c_str(), \"%d %d\", &params.parallel_nGeno, &nb ) != 2) || (nb != params.block_size) )\n    throw \"invalid header line in master file.\"; \n\n  nblocks = 0, nsnps = 0, lineread=0;\n  while( getline(infile, line) ){\n\n    char tmp_chr[MAXFILELEN];\n    if( sscanf( line.c_str(), \"%s %d %d\", tmp_chr, &nb, &ns ) != 3 )\n      throw \"could not read line \" + to_string( params.job_num + 1 ) + \" (check number of lines and format in file).\"; \n\n    files.bstart.push_back( nblocks );\n    files.btot.push_back( nb );\n    files.mprefix.push_back( string(tmp_chr) );\n\n    // check params\n    if( (files.bstart[lineread] < 0) || (files.bstart[lineread]>params.total_n_block) || (files.btot[lineread] < 0) )\n      throw \"invalid block information in master file at line \" + to_string( lineread + 2 ) + \".\";\n\n    nblocks += nb; // update # blocks\n    nsnps += ns;\n    lineread++;\n  }\n\n  if((nblocks != params.total_n_block) || (nsnps != params.n_variants))\n    throw \"number of blocks/variants in master file '\" + fin + \"' doesn't match that in the analysis.\";\n\n  // print info\n  params.job_num = lineread;\n  sout << \" * using results from running \" << params.job_num << \" parallel jobs at level 0\\n\";\n\n  infile.close();\n\n}\n\n\nvoid Data::write_inputs(){\n\n  // write Y\n  IOFormat Fmt(FullPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n  ofstream ofile;\n  openStream(&ofile, files.out_file + \"_y.txt\", ios::out, sout);\n  if(params.trait_mode == 0)\n    ofile << pheno_data.phenotypes.format(Fmt) << \"\\n\";\n  else\n    ofile << pheno_data.phenotypes_raw.format(Fmt) << \"\\n\";\n  ofile.close();\n\n  // write X\n  openStream(&ofile, files.out_file + \"_x.txt\", ios::out, sout);\n  ofile << pheno_data.new_cov.format(Fmt) << \"\\n\";\n  ofile.close();\n\n  // write offset\n  if(params.trait_mode != 0 && params.trait_mode != 3){\n    openStream(&ofile, files.out_file + \"_offset.txt\", ios::out, sout);\n    ofile << m_ests.offset_nullreg.format(Fmt) << \"\\n\";\n    ofile.close();\n  } else if (params.trait_mode == 3) {\n    openStream(&ofile, files.out_file + \"_offset.txt\", ios::out, sout);\n    ofile << firth_est.cov_blup_offset.format(Fmt) << \"\\n\";\n    ofile.close();\n  }\n}\n\nvoid Data::exit_early(){\n\n    runtime.stop();\n    sout << \"\\nElapsed time : \" << std::chrono::duration<double>(runtime.end - runtime.begin).count() << \"s\\n\";\n    sout << \"End time: \" << ctime(&runtime.end_time_info) << endl;\n    exit( EXIT_SUCCESS );\n\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          Evaluate level 1 output\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::output() {\n\n  int min_index;\n  double performance_measure, rsq, sse, ll_avg, min_val;\n  double rate=0, zv;\n  string pfile, out_blup_list, out_prs_list, out_firth_list, loco_filename, prs_filename, firth_filename;\n  string fullpath_str, path_prs, path_firth;\n  Files outb, outp, outf;\n\n  sout << \"Output\\n------\\n\";\n\n  if(params.make_loco || params.trait_mode){\n    out_blup_list = files.out_file + \"_pred.list\";\n    outb.openForWrite(out_blup_list, sout);\n  }\n\n  if(params.print_prs){\n    out_prs_list = files.out_file + \"_prs.list\";\n    outp.openForWrite(out_prs_list, sout);\n  }\n\n  if(params.write_null_firth){\n    out_firth_list = files.out_file + \"_firth.list\";\n    outf.openForWrite(out_firth_list, sout);\n    m_ests.blups.resize(params.n_samples, params.n_pheno);\n  }\n\n  for(int ph = 0; ph < params.n_pheno; ++ph ) {\n    if( !params.pheno_pass(ph) ) continue;\n\n    sout << \"phenotype \" << ph+1 << \" (\" << files.pheno_names[ph] << \") : \" ;\n    loco_filename = files.out_file + \"_\" + to_string(ph + 1) + \".loco\" + (params.gzOut ? \".gz\" : \"\");\n    prs_filename = files.out_file + \"_\" + to_string(ph + 1) + \".prs\" + (params.gzOut ? \".gz\" : \"\");\n    firth_filename = files.out_file + \"_\" + to_string(ph + 1) + \".firth\" + (params.gzOut ? \".gz\" : \"\");\n\n    if( params.make_loco || params.trait_mode || params.print_prs ) {\n\n      fullpath_str = (params.use_rel_path ? loco_filename : get_fullpath(loco_filename));\n      if(params.print_prs) path_prs = (params.use_rel_path ? prs_filename : get_fullpath(prs_filename));\n\n      if(params.trait_mode == 0) { // for quantitative traits\n\n        outb << files.pheno_names[ph]  << \" \" <<  fullpath_str << endl;\n        if(params.print_prs) \n          outp << files.pheno_names[ph]  << \" \" <<  path_prs << endl;\n\n      } else { // check level 1 ridge converged\n\n        if( !l1_ests.pheno_l1_not_converged(ph) ) {\n          outb << files.pheno_names[ph]  << \" \" << fullpath_str << endl;\n          if(params.print_prs) \n            outp << files.pheno_names[ph]  << \" \" <<  path_prs << endl;\n\n        } else { // failed level 1\n\n          if(params.write_l0_pred && params.rm_l0_pred) \n            rm_l0_files(ph); // cleanup level 0 predictions\n          sout << \"Level 1 model did not converge. LOCO predictions calculations are skipped.\\n\\n\";\n          continue;\n\n        }\n      }\n    }\n    sout << endl;\n\n    min_index = 0;\n    min_val = 1e10;\n\n    // determine optimal parameter by cv using: QT: MSE, nonQT: -loglik\n    for(int j = 0; j < params.n_ridge_l1; ++j ) {\n      if(params.trait_mode == 0)\n        performance_measure = l1_ests.cumsum_values[2](ph, j) + l1_ests.cumsum_values[3](ph,j) - 2 * l1_ests.cumsum_values[4](ph,j);\n      else\n        performance_measure = l1_ests.cumsum_values[5](ph, j);\n\n      if(params.trait_mode != 3) performance_measure /= pheno_data.Neff(ph);\n\n      if( performance_measure < min_val) {\n        min_index = j;\n        min_val = performance_measure;\n      }\n    }\n\n    if(params.trait_mode == 2)\n      rate = pheno_data.phenotypes_raw.col(ph).sum() / pheno_data.Neff(ph); // separate for each trait\n\n    for(int j = 0; j < params.n_ridge_l1; ++j ) {\n      if (params.trait_mode == 3) {\n        sout << \" \" << setw(5) << params.tau[ph](j) << \" : \" << \"Deviance = \" << l1_ests.cumsum_values[5](ph, j);\n        if(j == min_index) \n          sout << \"<- min value\";\n        sout << endl;\n        continue;\n      }\n\n      if(params.trait_mode == 2){\n        zv = exp(l1_ests.l0_colkeep.col(ph).count() / params.tau[ph](j)) - 1; \n        sout << \"  \" << setw(5) << rate * zv / (1 + rate * zv);\n      } else sout << \"  \" << setw(5) << l1_ests.l0_colkeep.col(ph).count() / (l1_ests.l0_colkeep.col(ph).count() + (params.trait_mode == 1? (M_PI * M_PI / 3) : 1) * params.tau[ph](j) );\n\n      // output Rsq and MSE\n      rsq = l1_ests.cumsum_values[4](ph,j) - l1_ests.cumsum_values[0](ph,j) * l1_ests.cumsum_values[1](ph,j) / pheno_data.Neff(ph); // num = Sxy - SxSy/n\n      rsq = (rsq * rsq) / ((l1_ests.cumsum_values[2](ph,j) - l1_ests.cumsum_values[0](ph,j) * l1_ests.cumsum_values[0](ph,j) / pheno_data.Neff(ph)) * (l1_ests.cumsum_values[3](ph,j) - l1_ests.cumsum_values[1](ph,j) * l1_ests.cumsum_values[1](ph,j) / pheno_data.Neff(ph))); // num^2 / ( (Sx2 - Sx^2/n)* (Sy2 - Sy^2/n) )\n      sse = l1_ests.cumsum_values[2](ph, j) + l1_ests.cumsum_values[3](ph,j) - 2 * l1_ests.cumsum_values[4](ph,j); // Sx2 + Sy2 - SxSy\n      if(params.trait_mode) ll_avg = l1_ests.cumsum_values[5](ph, j) / pheno_data.Neff(ph);\n\n      sout << \" : \" \n        << \"Rsq = \" << rsq;\n      if(params.test_l0){ // pred = p1 + top_snp_pgs; Y is res pheno\n          double rsq_pgs = l1_ests.cumsum_values_full[4](ph,j) - l1_ests.cumsum_values_full[0](ph,j) * l1_ests.cumsum_values_full[1](ph,j) / pheno_data.Neff(ph); // num = Sxy - SxSy/n\n      rsq_pgs = (rsq_pgs * rsq_pgs) / ((l1_ests.cumsum_values_full[2](ph,j) - l1_ests.cumsum_values_full[0](ph,j) * l1_ests.cumsum_values_full[0](ph,j) / pheno_data.Neff(ph)) * (l1_ests.cumsum_values_full[3](ph,j) - l1_ests.cumsum_values_full[1](ph,j) * l1_ests.cumsum_values_full[1](ph,j) / pheno_data.Neff(ph))); // num^2 / ( (Sx2 - Sx^2/n)* (Sy2 - Sy^2/n) )\n        sout << \" (with top_snps_pgs = \" << rsq_pgs << \")\"; \n      }\n      if(params.trait_mode!=2) \n        sout  << \", MSE = \" << sse/pheno_data.Neff(ph);\n      if(params.trait_mode) \n        sout << \", -logLik/N = \" << ll_avg;\n      if(j == min_index) \n        sout << \"<- min value\";\n      sout << endl;\n\n    }\n\n    if(params.trait_mode == 0){\n      if(params.use_loocv) \n        make_predictions_loocv(ph, min_index);\n      else \n        make_predictions(ph, min_index);\n    } else if(params.trait_mode == 1){\n      if(params.l1_full_samples) \n        make_predictions_binary_loocv_full(ph, min_index);\n      else if(params.use_loocv) \n        make_predictions_binary_loocv(ph, min_index);\n      else \n        make_predictions_binary(ph, min_index);\n    } else if(params.trait_mode == 2){\n      if(params.use_loocv) \n        make_predictions_count_loocv(ph, min_index);\n      else \n        make_predictions_count(ph, min_index);\n    } else if(params.trait_mode == 3){\n      if (!params.use_loocv)\n        make_predictions_cox(ph, min_index);\n    }\n\n    // check if firth estimates converged (should have been written to file)\n    if(params.write_null_firth && file_exists(firth_filename)){\n      path_firth = (params.use_rel_path ? firth_filename : get_fullpath(firth_filename));\n      outf << files.pheno_names[ph]  << \" \" <<  path_firth << endl;\n    }\n\n    // delete file used to store l0 predictions\n    if(params.write_l0_pred && params.rm_l0_pred)\n      rm_l0_files(ph);\n\n  }\n\n  if(params.make_loco || (params.trait_mode!=0)){\n    outb.closeFile();\n    sout << \"List of blup files written to: [\" \n      << out_blup_list << \"]\\n\";\n  }\n  if(params.write_null_firth) {\n    outf.closeFile();\n    sout << \"List of files with null Firth estimates written to: [\" \n      << out_firth_list << \"]\\n\";\n  }\n  if(params.print_prs) {\n    outp.closeFile();\n    sout << \"List of files with whole genome PRS written to: [\" \n      << out_prs_list << \"]\\n\";\n  }\n\n}\n\nvoid Data::rm_l0_files(int const& ph){\n\n  string pfile;\n\n  if(!params.run_l1_only){\n    pfile = files.loco_tmp_prefix + \"_l0_Y\" + to_string(ph+1);\n    remove(pfile.c_str());\n  } else {\n    for(auto const& pfx : files.mprefix){\n      pfile = pfx + \"_l0_Y\" + to_string(ph+1);\n      if(file_exists(pfile)) remove(pfile.c_str()); // l0 predictions\n      pfile = pfx + \".snplist\";\n      if(file_exists(pfile)) remove(pfile.c_str()); // snplist\n    }\n  }\n\n}\n\n// convert filename to full path\nstd::string get_fullpath(std::string fname){\n\n  string fout;\n  fs::path fullpath;\n\n  try {\n\n    // convert to full path using boost filesystem library\n    // this can generate errors due to LC_ALL locale being invalid\n    fullpath = fs::absolute(fname);\n    fout = fullpath.make_preferred().string();\n\n  } catch ( std::runtime_error& ex ) {\n\n    // to avoid boost::filesystem error\n    setenv(\"LC_ALL\", \"C\", 1);\n\n    try {\n\n      // try again\n      fullpath = fs::absolute(fname);\n      fout = fullpath.make_preferred().string();\n\n    } catch ( std::runtime_error& ex ) {\n\n      try {\n\n        // use realpath\n        char buf[PATH_MAX];\n        char *res = realpath(fname.c_str(), buf);\n        if(res) fout = string(buf);\n        else fout = fname; // if failed to get full path\n\n      } catch ( const std::bad_alloc& ) {\n        fout = fname; // if failed to get full path\n      } catch ( std::runtime_error& ex ) {\n        fout = fname; // if failed to get full path\n      }\n\n    }\n  }\n\n  return fout;\n\n}\n\nvoid Data::make_predictions(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat[ph_eff][0].cols();\n  MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n  MatrixXd X1, X2, beta_l1, beta_avg;\n  string outname;\n  ofstream ofile;\n\n  if(params.within_sample_l0){ // DEPRECATED\n    X1 = l1_ests.test_mat[ph_eff][0].transpose() * l1_ests.test_mat[ph_eff][0];\n    X2 = l1_ests.test_mat[ph_eff][0].transpose() * l1_ests.test_pheno[ph][0];\n    for(int i = 1; i < params.cv_folds; ++i ) {\n      X1 += l1_ests.test_mat[ph_eff][i].transpose() * l1_ests.test_mat[ph_eff][i];\n      X2 += l1_ests.test_mat[ph_eff][i].transpose() * l1_ests.test_pheno[ph][i];\n    }\n    beta_l1 = (X1 + params.tau[ph](val) * ident_l1).llt().solve(X2);\n  } else if(params.print_block_betas) {\n    beta_avg = MatrixXd::Zero(bs_l1, 1);\n    for(int i = 0; i < params.cv_folds; ++i ) {\n      beta_avg += l1_ests.beta_hat_level_1[ph][i].col(val);\n    }\n    beta_avg /= params.cv_folds;\n  }\n\n  // if specified, write betas to file (open in append mode)\n  if(params.print_block_betas && !params.within_sample_l0 ) {\n    outname = files.out_file + \"_level1.betas\";\n    openStream(&ofile, outname, ios::out | ios::app, sout);\n    ofile << ph + 1 << \" \";\n    ofile << beta_avg.transpose() << endl;\n    ofile.close();\n  }\n\n  // sout << \"\\nFor tau[\" << val <<\"] = \" << params.tau[ph](val) << endl <<  beta_l1 << endl ;\n  int ctr = 0, chr_ctr = 0;\n  int nn, cum_size_folds;\n\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n    if(nn > 0) {\n      cum_size_folds = 0;\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        if(!params.within_sample_l0) beta_l1 = l1_ests.beta_hat_level_1[ph][i].col(val);\n        predictions[0].block(cum_size_folds, chr_ctr, params.cv_sizes(i), 1) = l1_ests.test_mat[ph_eff][i].block(0, ctr, params.cv_sizes(i), nn) * beta_l1.block(ctr, 0, nn, 1);\n        cum_size_folds += params.cv_sizes(i);\n      }\n      if(params.test_l0) predictions[0].col(chr_ctr) += l1_ests.top_snp_pgs[chrom].col(ph);\n      chr_ctr++;\n      ctr += nn;\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\n\nvoid Data::make_predictions_loocv(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat_conc[ph_eff].cols();\n  MatrixXd b0, xtx, tmpMat, HX_chunk;\n  VectorXd zvec, bvec, Yvec;\n  ArrayXd calFactor, yres;\n\n  uint64 max_bytes = params.chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (target_size * bs_l1) matrix ]\n  int nchunk = ceil( params.cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  if (params.verbose) sout << nchunk << \" chunks...\" << flush;\n  int chunk, size_chunk, target_size = params.cv_folds / nchunk;\n  int j_start;\n  if(params.test_l0)\n    Yvec = pheno_data.phenotypes.col(ph) - l1_ests.top_snp_pgs[0].col(ph);\n  else\n    Yvec = pheno_data.phenotypes.col(ph);\n\n  xtx = l1_ests.test_mat_conc[ph_eff].transpose() * l1_ests.test_mat_conc[ph_eff];\n  xtx.diagonal().array() += params.tau[ph](val) * l1_ests.ridge_param_mult;\n  zvec = l1_ests.test_mat_conc[ph_eff].transpose() * Yvec;\n\n  // fit model on whole data again for optimal ridge param\n  SelfAdjointEigenSolver<MatrixXd> eigMat(xtx);\n  tmpMat = eigMat.eigenvectors() * (1/eigMat.eigenvalues().array()).matrix().asDiagonal() * eigMat.eigenvectors().transpose();\n  bvec = tmpMat * zvec;\n  yres = (Yvec - l1_ests.test_mat_conc[ph_eff] * bvec).array();\n\n  for(chunk = 0; chunk < nchunk; ++chunk ) {\n    size_chunk = chunk == nchunk - 1? params.cv_folds - target_size * chunk : target_size;\n    j_start = chunk * target_size;\n\n    HX_chunk = tmpMat * l1_ests.test_mat_conc[ph_eff].middleRows(j_start, size_chunk).transpose(); // k x Nc\n    calFactor = (l1_ests.test_mat_conc[ph_eff].middleRows(j_start, size_chunk).array() * HX_chunk.transpose().array()).matrix().rowwise().sum().array();\n    b0 = bvec.rowwise().replicate(size_chunk) - HX_chunk * (yres.segment(j_start, size_chunk)/(1-calFactor)).matrix().asDiagonal() ;\n\n    int ctr = 0, chr_ctr = 0;\n    int nn;\n\n    for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n      int chrom = files.chr_read[itr];\n      if( !in_map(chrom, chr_map) ) continue;\n\n      nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n      if(nn > 0) {\n        predictions[0].block(j_start, chr_ctr, size_chunk, 1) = (l1_ests.test_mat_conc[ph_eff].block(j_start, ctr, size_chunk, nn).array() * b0.block(ctr, 0, nn, size_chunk).transpose().array()).rowwise().sum();\n        if(params.test_l0) predictions[0].block(j_start, chr_ctr, size_chunk, 1) += l1_ests.top_snp_pgs[chrom].block(j_start, ph, size_chunk, 1);\n        chr_ctr++;\n        ctr += nn;\n      }\n    }\n  }\n\n  if(params.print_block_betas) {\n    //cerr << \"Wb:\\n\"<<l1_ests.test_mat_conc[ph_eff].topRows(5) * bvec << \"\\n\\nPRS:\\n\"<< predictions[0].rowwise().sum().head(5) << \"\\n\\n\";\n    print_snp_betas(bvec);\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\n\n// predictions for binary traits\nvoid Data::make_predictions_binary(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat[ph_eff][0].cols();\n  ArrayXd etavec, pivec, wvec, zvec, score;\n  MatrixXd betaold, betanew, XtW, XtWX, XtWZ;\n  MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n  // fit model using out-of-sample level 0 predictions from whole data\n  if(params.within_sample_l0){\n    betaold = MatrixXd::Zero(bs_l1, 1);\n\n    int niter_cur = 0;\n    while(niter_cur++ < params.niter_max_ridge){\n\n      XtWX = MatrixXd::Zero(bs_l1, bs_l1);\n      XtWZ = MatrixXd::Zero(bs_l1, 1);\n\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        etavec = (l1_ests.test_offset[ph][i] + l1_ests.test_mat[ph_eff][i] * betaold).array();\n        pivec = 1 - 1/(etavec.exp() + 1);\n        wvec =  pivec * (1 - pivec);\n        zvec = (etavec - l1_ests.test_offset[ph][i].array()) + (l1_ests.test_pheno_raw[ph][i].array() - pivec) / wvec;\n\n        XtW = l1_ests.test_mat[ph_eff][i].transpose() * wvec.matrix().asDiagonal();\n        XtWX += XtW * l1_ests.test_mat[ph_eff][i];\n        XtWZ += XtW * zvec.matrix();\n      }\n      XtWX.diagonal() += (params.tau[ph](val) * l1_ests.ridge_param_mult).matrix();\n      betanew = XtWX.llt().solve(XtWZ);\n      // compute score\n      score = ArrayXd::Zero(betanew.rows());\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        etavec = (l1_ests.test_offset[ph][i] + l1_ests.test_mat[ph_eff][i] * betanew).array();\n        pivec = 1 - 1/(etavec.exp() + 1);\n        score += (l1_ests.test_mat[ph_eff][i].transpose() * (l1_ests.test_pheno_raw[ph][i].array() - pivec).matrix()).array();\n      }\n      score -= params.tau[ph](val) * l1_ests.ridge_param_mult * betanew.array();\n\n      // stopping criterion\n      if( score.abs().maxCoeff() < params.l1_ridge_eps) break;\n\n      betaold = betanew;\n    }\n  }\n\n  // compute predictor for each chr\n  int ctr = 0, chr_ctr = 0;\n  int nn, cum_size_folds;\n\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n    if(nn > 0) {\n      cum_size_folds = 0;\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        if(!params.within_sample_l0) betanew = l1_ests.beta_hat_level_1[ph][i].col(val);\n        predictions[0].block(cum_size_folds, chr_ctr, params.cv_sizes(i), 1) = l1_ests.test_mat[ph_eff][i].block(0, ctr, params.cv_sizes(i), nn) * betanew.block(ctr, 0, nn, 1);\n        cum_size_folds += params.cv_sizes(i);\n      }\n      chr_ctr++;\n      ctr += nn;\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\nvoid Data::make_predictions_binary_loocv_full(int const& ph, int const& val) {\n\n  sout << \"  * making predictions (using all samples)...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat_conc[ph_eff].cols();\n  ArrayXd beta, pivec, wvec;\n  MatrixXd XtWX, V1;\n\n  uint64 max_bytes = params.chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (bs_l1 * target_size) matrix ]\n  int nchunk = ceil( params.cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  int target_size = params.cv_folds / nchunk;\n\n  MapArXd Y (pheno_data.phenotypes_raw.col(ph).data(), pheno_data.phenotypes_raw.rows());\n  MapMatXd X (l1_ests.test_mat_conc[ph_eff].data(), pheno_data.phenotypes_raw.rows(), bs_l1);\n  MapArXd offset (m_ests.offset_nullreg.col(ph).data(), pheno_data.phenotypes_raw.rows());\n  MapArXb mask (pheno_data.masked_indivs.col(ph).data(), pheno_data.phenotypes_raw.rows());\n\n  // fit logistic on whole data again for optimal ridge param\n  beta = ArrayXd::Zero(bs_l1);\n  run_log_ridge_loocv(params.tau[ph](val), l1_ests.ridge_param_mult, target_size, nchunk, beta, pivec, wvec, Y, X, offset, mask, &params, sout);\n\n  // use estimates from this model directly\n  // compute predictor for each chr\n  int ctr = 0, chr_ctr = 0;\n  int nn;\n\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n\n    if(nn > 0) {\n      predictions[0].col(chr_ctr) = l1_ests.test_mat_conc[ph_eff].middleCols(ctr, nn) * beta.segment(ctr, nn).matrix();\n      chr_ctr++;\n      ctr += nn;\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\nvoid Data::make_predictions_binary_loocv(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat_conc[ph_eff].cols();\n  ArrayXd beta, pivec, wvec, v2;\n  MatrixXd XtWX, V1, beta_final;\n  LLT<MatrixXd> Hinv;\n\n  uint64 max_bytes = params.chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (bs_l1 * target_size) matrix ]\n  int nchunk = ceil( params.cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  int chunk, size_chunk, target_size = params.cv_folds / nchunk;\n  int j_start;\n\n  MapArXd Y (pheno_data.phenotypes_raw.col(ph).data(), pheno_data.phenotypes_raw.rows());\n  MapMatXd X (l1_ests.test_mat_conc[ph_eff].data(), pheno_data.phenotypes_raw.rows(), bs_l1);\n  MapArXd offset (m_ests.offset_nullreg.col(ph).data(), pheno_data.phenotypes_raw.rows());\n  MapArXb mask (pheno_data.masked_indivs.col(ph).data(), pheno_data.phenotypes_raw.rows());\n\n  // fit logistic on whole data again for optimal ridge param\n  beta = ArrayXd::Zero(bs_l1);\n  run_log_ridge_loocv(params.tau[ph](val), l1_ests.ridge_param_mult, target_size, nchunk, beta, pivec, wvec, Y, X, offset, mask, &params, sout);\n\n  // compute Hinv\n  //zvec = (etavec - m_ests.offset_nullreg.col(ph).array()) + (pheno_data.phenotypes_raw.col(ph).array() - pivec) / wvec;\n  XtWX = (params.tau[ph](val) * l1_ests.ridge_param_mult).matrix().asDiagonal(); // compute XtWX in chunks\n  for(chunk = 0; chunk < nchunk; ++chunk){\n    size_chunk = ( chunk == nchunk - 1 ? params.cv_folds - target_size * chunk : target_size );\n    j_start = chunk * target_size;\n\n    Ref<MatrixXd> Xmat_chunk = X.middleRows(j_start, size_chunk); // n x k\n    Ref<ArrayXd> w_chunk = wvec.segment(j_start, size_chunk);\n    Ref<ArrayXb> mask_chunk = mask.segment(j_start, size_chunk);\n\n    XtWX.noalias() += Xmat_chunk.transpose() * mask_chunk.select(w_chunk,0).matrix().asDiagonal() * Xmat_chunk;\n  }\n  Hinv.compute( XtWX );\n\n  // loo estimates\n  beta_final = MatrixXd::Zero(bs_l1, target_size);\n  for(chunk = 0; chunk < nchunk; ++chunk ) {\n    size_chunk = chunk == nchunk - 1? params.cv_folds - target_size * chunk : target_size;\n    j_start = chunk * target_size;\n    if( chunk == (nchunk - 1) ) beta_final = MatrixXd::Zero(bs_l1, size_chunk);\n\n    Ref<MatrixXd> Xmat_chunk = X.middleRows(j_start, size_chunk); // n x k\n    Ref<ArrayXd> Yvec_chunk = Y.segment(j_start, size_chunk);\n    Ref<ArrayXd> p_chunk = pivec.segment(j_start, size_chunk);\n    Ref<ArrayXd> w_chunk = wvec.segment(j_start, size_chunk);\n\n    V1 = Hinv.solve( Xmat_chunk.transpose() ); // k x n\n    v2 = (Xmat_chunk.array() * V1.transpose().array()).rowwise().sum() * w_chunk;\n    beta_final.array().colwise() = beta;\n    beta_final -= V1 * ((Yvec_chunk - p_chunk)/(1-v2)).matrix().asDiagonal();\n\n    // compute predictor for each chr\n    int ctr = 0, chr_ctr = 0;\n    int nn;\n\n    for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n      int chrom = files.chr_read[itr];\n      if( !in_map(chrom, chr_map) ) continue;\n\n      nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n\n      if(nn > 0) {\n        predictions[0].block(j_start, chr_ctr, size_chunk, 1) = ( X.block(j_start, ctr, size_chunk, nn).array() * beta_final.middleRows(ctr, nn).transpose().array() ).matrix().rowwise().sum();\n        chr_ctr++;\n        ctr += nn;\n      }\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\n\n// predictions for count phenotypes\nvoid Data::make_predictions_count(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat[ph_eff][0].cols();\n  ArrayXd etavec, pivec, zvec, score;\n  MatrixXd betaold, betanew, XtW, XtWX, XtWZ;\n  MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n  // fit model using out-of-sample level 0 predictions from whole data\n  if(params.within_sample_l0)\n    throw \"--within is not supported for count phenotypes\";\n\n  // compute predictor for each chr\n  int ctr = 0, chr_ctr = 0;\n  int nn, cum_size_folds;\n\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n    if(nn > 0) {\n      cum_size_folds = 0;\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        betanew = l1_ests.beta_hat_level_1[ph][i].col(val);\n        predictions[0].block(cum_size_folds, chr_ctr, params.cv_sizes(i), 1) = l1_ests.test_mat[ph_eff][i].block(0, ctr, params.cv_sizes(i), nn) * betanew.block(ctr, 0, nn, 1);\n        cum_size_folds += params.cv_sizes(i);\n      }\n      chr_ctr++;\n      ctr += nn;\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\n\nvoid Data::make_predictions_count_loocv(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  int bs_l1 = l1_ests.test_mat_conc[ph_eff].cols();\n  double v2;\n  ArrayXd beta, pivec;\n  MatrixXd XtWX, V1, beta_final;\n  LLT<MatrixXd> Hinv;\n  MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n  uint64 max_bytes = params.chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (bs_l1 * target_size) matrix ]\n  int nchunk = ceil( params.cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  int chunk, size_chunk, target_size = params.cv_folds / nchunk;\n  int j_start;\n\n  // read in level 0 predictions from file\n  MapArXd Y (pheno_data.phenotypes_raw.col(ph).data(), pheno_data.phenotypes_raw.rows());\n  MapMatXd X (l1_ests.test_mat_conc[ph_eff].data(), pheno_data.phenotypes_raw.rows(), bs_l1);\n  MapArXd offset (m_ests.offset_nullreg.col(ph).data(), pheno_data.phenotypes_raw.rows());\n  MapArXb mask (pheno_data.masked_indivs.col(ph).data(), pheno_data.phenotypes_raw.rows());\n\n  // fit logistic on whole data again for optimal ridge param\n  beta = ArrayXd::Zero(bs_l1);\n  run_ct_ridge_loocv(params.tau[ph](val), l1_ests.ridge_param_mult, target_size, nchunk, beta, pivec, Y, X, offset, mask, &params, sout);\n\n  // compute Hinv\n  //zvec = (etavec - m_ests.offset_nullreg.col(ph).array()) + (pheno_data.phenotypes_raw.col(ph).array() - pivec) / wvec;\n  XtWX = (params.tau[ph](val) * l1_ests.ridge_param_mult).matrix().asDiagonal();\n  for(chunk = 0; chunk < nchunk; ++chunk){\n    size_chunk = ( chunk == nchunk - 1 ? params.cv_folds - target_size * chunk : target_size );\n    j_start = chunk * target_size;\n\n    Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n    Ref<MatrixXd> w_chunk = pivec.matrix().block(j_start, 0, size_chunk,1);\n\n    XtWX += Xmat_chunk.transpose() * w_chunk.asDiagonal() * Xmat_chunk;\n  }\n  Hinv.compute( XtWX );\n\n  // loo estimates\n  for(chunk = 0; chunk < nchunk; ++chunk ) {\n    size_chunk = chunk == nchunk - 1? params.cv_folds - target_size * chunk : target_size;\n    j_start = chunk * target_size;\n    if( (chunk == 0) || (chunk == nchunk - 1) ) beta_final = MatrixXd::Zero(bs_l1, size_chunk);\n\n    Ref<MatrixXd> Xmat_chunk = l1_ests.test_mat_conc[ph_eff].block(j_start, 0, size_chunk, bs_l1); // n x k\n    Ref<MatrixXd> Yvec_chunk = pheno_data.phenotypes_raw.block(j_start, ph, size_chunk, 1);\n\n    V1 = Hinv.solve( Xmat_chunk.transpose() ); // k x n\n    for(int i = 0; i < size_chunk; ++i ) {\n      v2 = Xmat_chunk.row(i) * V1.col(i);\n      v2 *= pivec(j_start + i);\n      beta_final.col(i) = (beta - V1.col(i).array() * (Yvec_chunk(i,0) - pivec(j_start + i)) / (1 - v2)).matrix();\n    }\n\n    // compute predictor for each chr\n    int ctr = 0, chr_ctr = 0;\n    int nn;\n\n    for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n      int chrom = files.chr_read[itr];\n      if( !in_map(chrom, chr_map) ) continue;\n\n      nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n\n      if(nn > 0) {\n        predictions[0].block(j_start, chr_ctr, size_chunk, 1) = ( l1_ests.test_mat_conc[ph_eff].block(j_start, ctr, size_chunk, nn).array() * beta_final.block(ctr, 0, nn, size_chunk).transpose().array() ).matrix().rowwise().sum();\n        chr_ctr++;\n        ctr += nn;\n      }\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\n\n// predictions for t2e traits\nvoid Data::make_predictions_cox(int const& ph, int const& val) {\n\n  sout << \"  * making predictions...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n  int ph_eff = params.write_l0_pred ? 0 : ph;\n\n  // read in level 0 predictions from file\n  if(params.write_l0_pred)\n    read_l0(ph, ph_eff, &files, &params, &l1_ests, sout);\n  check_l0(ph, ph_eff, &params, &l1_ests, &pheno_data, sout, true);\n\n  // compute predictor for each chr\n  int ctr = 0, chr_ctr = 0;\n  int nn, cum_size_folds;\n  MatrixXd beta;\n\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    nn = chr_map[chrom][1] * params.n_ridge_l0 - l1_ests.chrom_map_ndiff(chrom-1);\n    if(nn > 0) {\n      cum_size_folds = 0;\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        beta = l1_ests.beta_hat_level_1[ph][i].col(val);\n        predictions[0].block(cum_size_folds, chr_ctr, params.cv_sizes(i), 1) = l1_ests.test_mat_conc[ph_eff].block(cum_size_folds, ctr, params.cv_sizes(i), nn) * beta.block(ctr, 0, nn, 1);\n        cum_size_folds += params.cv_sizes(i);\n      }\n      chr_ctr++;\n      ctr += nn;\n    }\n  }\n\n  write_predictions(ph);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl << endl;\n}\nvoid Data::print_snp_betas(const Ref<const VectorXd>& l1_betas){\n\n  ofstream ofile;\n  std::ostringstream buffer;\n  string outname = files.out_file + \"_step1_betas.txt\";\n  openStream(&ofile, outname, ios::out, sout);\n  buffer << \"SNP\\tCHROM\\tGENPOS\\tALLELE0\\tALLELE1\\tBETA_level_0\\tBETA\\n\";\n\n  // go through each block and update the betas for all SNPs in block\n  int block = 0, bs, snp_tally = 0;\n  MatrixXd beta_l1;\n  for (size_t itr = 0; itr < files.chr_read.size(); ++itr) {\n\n    int chrom = files.chr_read[itr];\n    if( !in_map(chrom, chr_map) ) continue;\n\n    int chrom_nsnps = chr_map[chrom][0];\n    int chrom_nb = chr_map[chrom][1];\n    if(chrom_nb == 0) continue;\n\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n      get_block_size(params.block_size, chrom_nsnps, bb, bs);\n      beta_l1 = l1_ests.beta_snp_step1.middleRows(snp_tally, bs) * l1_betas.segment(block * params.n_ridge_l0, params.n_ridge_l0).asDiagonal();\n\n      // print out to file (step1_betas.txt)\n      for(int i = 0; i < bs ; i++) { // snp/chr/pos/ref/alt/beta_l0/beta_step1\n        snp* vinfo = &(snpinfo[snp_tally + i]);\n        buffer << vinfo->ID << \"\\t\" << vinfo->chrom << \"\\t\" << vinfo->physpos << \"\\t\" << vinfo->allele1 << \"\\t\" << vinfo->allele2 << \"\\t\" << l1_ests.beta_snp_step1.row(snp_tally+i).sum() << \"\\t\" << beta_l1.row(i).sum() << \"\\n\";\n      }\n\n      block++; snp_tally += bs;\n    }\n\n  }\n\n  ofile << buffer.str();\n  ofile.close();\n\n}\n\nvoid Data::write_predictions(int const& ph){\n  // output predictions to file\n  string out, header;\n  Files ofile;\n  MatrixXd pred, prs;\n\n  // get header line once\n  if(params.write_blups || params.make_loco || params.trait_mode || params.print_prs)\n    header = write_ID_header();\n\n  // for the per chromosome predictions (not used)\n  if(params.write_blups) {\n\n    out = files.out_file + \"_\" + to_string(ph+1) + (params.gzOut ? \".gz\" : \"\");\n    sout << \"writing file \" << out << \"...\" << flush;\n    ofile.openForWrite(out, sout);\n\n    // enforce all chromosomes are printed\n    pred = MatrixXd::Zero(predictions[0].rows(), params.nChrom);\n\n    int nn, chr_ctr = 0;\n    for(auto const& chr : files.chr_read){\n\n      if( !in_map(chr, chr_map) ) continue;\n\n      nn = chr_map[chr][1];\n      if(nn > 0){\n        pred.col(chr - 1) = predictions[0].col(chr_ctr);\n        ++chr_ctr;\n      }\n\n    }\n\n    // header line : FID_IID for all individuals\n    ofile << header;\n\n    // for each row: print chromosome then blups\n    for(int chr = 0; chr < params.nChrom; chr++) \n      ofile << write_chr_row(chr+1, ph, pred.col(chr));\n\n    ofile.closeFile();\n\n  }\n\n  // output LOCO predictions G_loco * beta_loco for each autosomal chr\n  if(params.make_loco || params.trait_mode){\n\n    out = files.out_file + \"_\" + to_string(ph+1) + \".loco\" + (params.gzOut ? \".gz\" : \"\");\n    sout << \"writing LOCO predictions...\" << flush;\n    ofile.openForWrite(out, sout);\n\n    pred.resize(predictions[0].rows(), params.nChrom);\n    pred.colwise() = predictions[0].rowwise().sum();\n\n    int nn, chr_ctr = 0;\n    for(auto const& chr : files.chr_read){\n\n      if( !in_map(chr, chr_map) ) continue;\n\n      nn = chr_map[chr][1];\n      if(nn > 0) {\n        pred.col(chr - 1) -= predictions[0].col(chr_ctr);\n        ++chr_ctr;\n      }\n\n    }\n\n    // header line : FID_IID for all individuals\n    ofile << header;\n\n    // print loco predictions for each chromosome\n    for(int chr = 0; chr < params.nChrom; chr++) \n      ofile << write_chr_row(chr+1, ph, pred.col(chr));\n\n    ofile.closeFile();\n\n  }\n\n  if(params.write_null_firth){ // store null estimates for Firth\n\n    bool has_converged = true;\n    IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n\n    out = files.out_file + \"_\" + to_string(ph+1) + \".firth\" + (params.gzOut ? \".gz\" : \"\");\n    sout << \"writing null approximate Firth estimates...\" << flush;\n    ofile.openForWrite(out, sout);\n\n    MapArXd Y (pheno_data.phenotypes_raw.col(ph).data(), pheno_data.phenotypes_raw.rows());\n    MapArXb mask (pheno_data.masked_indivs.col(ph).data(), pheno_data.masked_indivs.rows());\n    // not quite matching with step 2 due to offset not being used in logreg\n    ArrayXd bhat = m_ests.bhat_start.col(ph).array();\n\n    for(int chr = 0; chr < params.nChrom; chr++) {\n      // fit null approximate Firth\n      // use warm starts from previous chromosomes\n      m_ests.blups.col(ph) = pred.col(chr);\n      has_converged = fit_approx_firth_null(chr, ph, &pheno_data, &m_ests, bhat, &params);\n      if(!has_converged) break;\n      ofile << chr + 1 << \" \" << bhat.matrix().transpose().format(Fmt) << endl;\n    }\n\n    if(!has_converged){ // remove file\n      sout << \"WARNING: Firth failed to converge\";\n      remove(out.c_str());\n    } else\n      ofile.closeFile();\n\n  }\n\n  if(params.print_prs){\n\n    out = files.out_file + \"_\" + to_string(ph+1) + \".prs\" + (params.gzOut ? \".gz\" : \"\");\n    sout << \"writing whole genome PRS...\" << flush;\n    ofile.openForWrite(out, sout);\n\n    // output predictions sum(G * beta)\n    prs.resize(predictions[0].rows(), 1);\n    prs = predictions[0].rowwise().sum();\n\n    // header line : FID_IID for all individuals\n    ofile << header;\n\n    // print prs (set chr=0)\n    ofile << write_chr_row(0, ph, prs.col(0));\n\n    ofile.closeFile();\n\n  }\n\n}\n\nstd::string Data::write_ID_header(){\n\n  uint32_t index;\n  string out, id_index;\n  std::ostringstream buffer;\n  map<string, uint32_t >::iterator itr_ind;\n\n  buffer << \"FID_IID \";\n  for (itr_ind = params.FID_IID_to_ind.begin(); itr_ind != params.FID_IID_to_ind.end(); ++itr_ind) {\n\n    // check individual was included in analysis, if not then skip\n    index = itr_ind->second;\n    if( !in_filters.ind_in_analysis( index ) ) continue;\n\n    id_index = itr_ind->first;\n    buffer << id_index << \" \";\n\n  }\n\n  buffer << endl;\n  return buffer.str();\n\n}\n\n\nstd::string Data::write_chr_row(int const& chr, int const& ph, const Ref<const VectorXd>& pred){\n\n  uint32_t index;\n  string out;\n  std::ostringstream buffer;\n  map<string, uint32_t >::iterator itr_ind;\n\n  buffer << chr << \" \";\n  for (itr_ind = params.FID_IID_to_ind.begin(); itr_ind != params.FID_IID_to_ind.end(); ++itr_ind) {\n\n    // check individual was included in analysis, if not then skip\n    index = itr_ind->second;\n    if( !in_filters.ind_in_analysis( index ) ) continue;\n\n    // print prs\n    if( pheno_data.masked_indivs(index, ph) )\n      buffer << pred(index) << \" \";\n    else\n      buffer << \"NA \";\n  }\n\n  buffer << endl;\n  return buffer.str();\n\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Functions needed in testing mode\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::setup_output(Files* ofile, string& out, std::vector<std::shared_ptr<Files>>& ofile_split, std::vector< string >& out_split){\n\n  if(params.getCorMat){ // header N,M\n    out = files.out_file + \".corr\";\n    string runmode = (params.dosage_mode ? \"in dosage mode\" : \"in hard-call mode\");\n    if(params.cor_out_txt){\n      sout << \" * computing correlation matrix \" + runmode + \"\\n  + output to text file [\"<<out<<\"]\\n\";\n      ofile->openForWrite(out, sout);\n      if(params.skip_scaleG) (*ofile) << params.extract_vars_order.size() << \" \" << params.n_samples << \"\\n\";\n    } else {\n      sout << \" * computing correlation matrix \" + runmode + \" (storing R^2 values)\\n  + output to binary file [\"<<out<<\"]\\n\";\n      ofile->openMode(out, std::ios_base::out | std::ios_base::binary, sout);\n      ArrayXi vals(2);\n      vals << params.n_samples, params.extract_vars_order.size();\n      //cerr << vals << endl;\n      ofile->writeBinMode(vals, sout);\n    }\n    sout << \"  + list of snps written to [\" << out << \".snplist]\\n\";\n    sout << \"  + n_snps = \" << params.extract_vars_order.size() <<\"\\n\\n\";\n    return;\n  }\n\n  // info for output file\n  string tmpstr = (params.htp_out ? print_header_output_htp() : print_header_output(&params));\n  string mask_header = (params.build_mask ? bm.build_header() : \"\");\n  string gz_ext = (params.gzOut ? \".gz\" : \"\");\n\n  if( !params.split_by_pheno ){\n    out = files.out_file + \".regenie\" + gz_ext;\n    ofile->openForWrite( out, sout );\n    (*ofile) << mask_header << tmpstr;\n    // write dictionary file\n    Files dict_file;\n    dict_file.openForWrite( files.out_file + \".regenie.Ydict\", sout);\n    for(int i = 0; i < params.n_pheno; i++) \n      dict_file << \"Y\" << i+1 << \" \" << files.pheno_names[i] << endl;\n    dict_file.closeFile();\n    return;\n  }\n\n  out_split.resize( params.n_pheno );\n  ofile_split.resize( params.n_pheno );\n\n  for(int i = 0; i < params.n_pheno; i++) {\n    if( !params.pheno_pass(i) ) continue;\n    out_split[i] = files.out_file + \"_\" + files.pheno_names[i] + \".regenie\" + gz_ext;\n    ofile_split[i] = std::make_shared<Files>();\n    ofile_split[i]->openForWrite( out_split[i], sout );\n    (*ofile_split[i]) << mask_header << tmpstr;\n  }\n\n}\n\nvoid Data::print_test_info(){\n\n  if(params.getCorMat) { params.with_flip = false; return; }\n\n  if(params.write_masks) {\n\n    bm.write_info(&params, &in_filters, sout);\n    sout << \" * user specified to write masks (in PLINK bed format)\\n\";\n    if(params.dosage_mode) \n      sout << \"   +dosages will be converted to hardcalls\\n\";\n    if(params.write_setlist) \n      bm.prep_setlists(files.new_sets, files.out_file, sout);\n\n  }\n  if(params.write_mask_snplist) \n    bm.prep_snplist(files.out_file, sout);\n\n  sout << \" * using minimum MAC of \" << (params.build_mask ? params.min_MAC_mask : params.min_MAC) << \n    \" (\" << (params.build_mask ? \"masks\" : \"variants\") << \" with lower MAC are ignored)\\n\";\n  if(params.forced_MAC > 0) sout << \"   -using threshold of \" << params.forced_MAC << \" for subset of specified variants\\n\";\n  if(params.setMinINFO) \n    sout << \" * using minimum imputation info score of \" << params.min_INFO << \" (variants with lower info score are ignored)\\n\";\n  if((params.test_type == 2) && (params.minHOMs > 0))\n    sout << \" * ignoring variants (masks for gene-based tests) with fewer than \" << params.minHOMs << \" homALT carriers\\n\";\n\n  if(params.firth || params.use_SPA) {\n\n    sout << \" * using \" << (params.firth_approx ? \"fast \": \"\") << (params.firth ? \"Firth \": \"SPA \");\n    sout << \"correction for logistic/cox regression p-values less than \" << params.alpha_pvalue << endl;\n    if(params.back_correct_se) sout << \"    - using back-correction to compute Firth SE\\n\";\n    if(params.firth && params.use_adam) sout << \"    - using \" << (params.adam_mini? \"mini-\":\"\") << \"batch ADAM to get starting values\\n\";\n    n_corrected = 0;\n\n  }\n\n  // if testing select chromosomes\n  if( params.select_chrs ) \n    sout << \" * user specified to test only on select chromosomes\\n\";\n\n\n  switch(params.test_type){\n    case 0:\n      test_string = \"ADD\";\n      break;\n    case 1:\n      test_string = \"DOM\";\n      break;\n    case 2:\n      test_string = \"REC\";\n      break;\n    default:\n      throw \"unrecognized test value\";\n  }\n  wgr_string = ( params.skip_blups && !params.interaction_prs && !params.blup_cov ?  \"\" : \"-WGR\" );\n\n\n  if(params.htp_out){\n    if((params.trait_mode==1) & params.firth) correction_type = \"-FIRTH\";\n    else if((params.trait_mode==1) & params.use_SPA) correction_type = \"-SPA\";\n    else if(params.trait_mode==1) correction_type = \"-LOG\";\n    else if(params.trait_mode==2) correction_type = \"-POISSON\";\n    else if((params.trait_mode==3) & params.firth) correction_type = \"-COX-FIRTH\";\n    else if(params.trait_mode==3) correction_type = \"-COX\";\n    else correction_type = \"-LR\";\n\n    model_type = test_string + wgr_string + correction_type;\n  }\n\n  if(params.gwas_condtl) // specify main sum stats is conditional gwas\n    params.condtl_suff = \"-CONDTL\";\n\n  params.with_flip = params.with_flip && !params.build_mask && params.trait_mode && (params.test_type == 0);\n\n  if( params.joint_test ) {\n    jt.out_file_prefix = files.out_file;\n    params.with_flip = jt.get_test_info(&params, test_string, sout) && params.with_flip;\n    sout << \" * list of joint tests run on burden masks: \" << get_test_list(jt.test_list, jt.joint_tests_map) << \"\\n\";\n  }\n\n  normal nd(0,1);\n  chi_squared chisq(1);\n  params.zcrit = quantile(complement(nd, .025));\n  params.chisq_thr = quantile(chisq, 1 - params.alpha_pvalue);\n  params.z_thr = sqrt(params.chisq_thr);\n\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    prep for association test\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::set_blocks_for_testing() {\n\n  params.total_n_block = 0;\n  int blocks_left = params.n_block;\n  int nchr = 0;\n\n  map<int, vector<int> >::iterator itr;\n  map<int, vector<int> > m1;\n  for (itr = chr_map.begin(); itr != chr_map.end(); ++itr) {\n    int chrom_nsnps = itr->second[0];\n    int nb = ceil((double) chrom_nsnps / params.block_size);\n\n    if(params.n_block > 0) {\n      if(blocks_left > 0) {\n        int minb = min(nb, blocks_left);\n        //sout << << endl;\n        itr->second[1] = minb;\n        params.total_n_block += minb;\n        blocks_left -= minb;\n      }\n    } else {\n      itr->second[1] = nb;\n      params.total_n_block += nb;\n    }\n    if(params.getCorMat && (itr->second[1] > 0)) nchr++;\n    m1[ itr->first ] = itr->second;\n  }\n  chr_map = m1;\n\n  if(params.getCorMat && (nchr > 1))\n    throw \"can only compute LD matrix for a single chromosome (use --chr/--chrList/--range).\";\n\n  // summarize block sizes\n  sout << left << std::setw(20) << \" * # threads\" << \": [\" << params.threads << \"]\\n\";\n  sout << left << std::setw(20) << \" * block size\" << \": [\" << params.block_size << \"]\\n\";\n  if(!params.getCorMat) {\n    sout << left << std::setw(20) << \" * # blocks\" << \": [\" << params.total_n_block << \"]\\n\";\n    if(params.start_block > 1) sout << \"    + skipping to block #\" << params.start_block << endl;\n  }\n\n  // storing null estimates from firth\n  if(params.use_null_firth) \n    sout << \" * reading null Firth estimates using file : [\" << files.null_firth_file << \"]\\n\";\n  if(params.write_null_firth ) \n    sout << \" * writing null Firth estimates to file\\n\";\n\n}\n\nvoid Data::set_nullreg_mat(){\n  if(params.trait_mode == 3){\n    m_ests.cox_MLE_NULL.resize(params.n_pheno);\n    m_ests.survival_data_pheno.resize(params.n_pheno);\n    if (params.firth) {\n      firth_est.beta_null_firth = MatrixXd::Zero(pheno_data.new_cov.cols(), params.n_pheno);\n      firth_est.cov_blup_offset = MatrixXd::Zero(params.n_samples, params.n_pheno);\n    }\n  } else {\n    m_ests.Y_hat_p = MatrixXd::Zero(params.n_samples, params.n_pheno);\n    m_ests.Gamma_sqrt = m_ests.Gamma_sqrt_mask = MatrixXd::Zero(params.n_samples, params.n_pheno);\n    m_ests.X_Gamma.resize(params.n_pheno);\n  }\n\n  // for firth  approx\n  if(params.firth_approx){\n    if (params.trait_mode == 1) {\n      firth_est.beta_null_firth = MatrixXd::Zero(pheno_data.new_cov.cols() + 1, params.n_pheno);\n      if(params.test_mode) firth_est.cov_blup_offset = MatrixXd::Zero(params.n_samples, params.n_pheno);\n    }\n\n    // open streams to write firth null estimates\n    if(params.write_null_firth){\n      string fout_p;\n      firth_est.firth_est_files.resize(params.n_pheno);\n      for(int ph = 0; ph < params.n_pheno; ph++){\n        firth_est.firth_est_files[ph] = std::make_shared<Files>();\n        fout_p = files.out_file + \"_\" + to_string(ph + 1) + \".firth\" + (params.gzOut ? \".gz\" : \"\");\n        firth_est.firth_est_files[ph]->openForWrite(fout_p, sout);\n      }\n      if(params.compute_all_chr){\n        params.use_null_firth = false; // make sure nothing is read\n        files.null_firth_file = get_firth_est_allChr(files, in_filters, m_ests, firth_est, pheno_data, params, sout);;\n        params.write_null_firth = false;\n        params.use_null_firth = true;\n      }\n    }\n\n\n    if(params.use_null_firth) // get files with firth estimates\n      check_beta_start_firth(files, params, sout);\n  }\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Testing mode (multi-threaded with OpenMP)\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::test_snps_fast() {\n\n  sout << \"Association testing mode\";\n\n  string out;\n  vector < string > out_split;\n  // output files\n  Files ofile;\n  // use pointer to class since it contains non-copyable elements\n  vector < std::shared_ptr<Files> > ofile_split;\n\n#if defined(_OPENMP)\n  sout << \" with \" << (params.streamBGEN? \"fast \" : \"\") << \"multithreading using OpenMP\";\n#endif\n  sout << endl;\n\n  file_read_initialization(); // set up files for reading\n  read_pheno_and_cov(&files, &params, &in_filters, &pheno_data, &m_ests, &Gblock, sout);   // read phenotype and covariate files\n  prep_run(&files, &in_filters, &params, &pheno_data, &m_ests, sout); // check blup files and adjust for covariates\n  set_blocks_for_testing();   // set number of blocks\n  print_usage_info(&params, &files, sout);\n  print_test_info();\n  setup_output(&ofile, out, ofile_split, out_split); // result file\n  if(params.w_interaction && (params.trait_mode==0) && !params.no_robust && !params.force_robust) \n    nullHLM.prep_run(&pheno_data, &params);\n  if(params.trait_mode) set_nullreg_mat();\n  sout << endl;\n\n  // start analyzing each chromosome\n  bool block_init_pass = false;\n  int block = 0, chrom_nsnps, chrom_nb, bs;\n  tally snp_tally;\n  vector< variant_block > block_info;\n  initialize_thread_data(Gblock.thread_data, params);\n\n\n  for(auto const& chrom : files.chr_read) {\n\n    if( !in_map(chrom, chr_map) ) continue;\n\n    chrom_nsnps = chr_map[chrom][0];\n    chrom_nb = chr_map[chrom][1];\n    if(chrom_nb == 0) continue;\n\n    // If specified starting block\n    if(!block_init_pass && (params.start_block > (block + chrom_nb)) ) {\n      snp_tally.snp_count += chrom_nsnps;\n      block += chrom_nb;\n      continue;\n    }\n\n    sout << \"Chromosome \" << chrom << \" [\" << chrom_nb << \" blocks in total]\\n\";\n\n    if(!params.getCorMat){\n      // read polygenic effect predictions from step 1\n      blup_read_chr(false, chrom, m_ests, files, in_filters, pheno_data, params, sout);\n\n      // compute phenotype residual (adjusting for BLUP [and covariates for non-QTs])\n      if(params.trait_mode == 1) compute_res_bin(chrom);\n      else if(params.trait_mode == 2) compute_res_count(chrom);\n      else if(params.trait_mode == 3) compute_res_cox(chrom);\n      else compute_res();\n\n      // print y/x/logreg offset used for level 1 \n      if(params.debug) write_inputs();\n    }\n\n    // analyze by blocks of SNPs\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n\n      get_block_size(params.block_size, chrom_nsnps, bb, bs);\n\n      // If specified starting block\n      if(!block_init_pass && (params.start_block > (block+1)) ) {\n        snp_tally.snp_count += bs;\n        block++;\n        continue;\n      } else if(!block_init_pass) block_init_pass = true;\n\n      sout << \" block [\" << block + 1 << \"/\" << params.total_n_block << \"] : \" << flush;\n      \n      allocate_mat(Gblock.Gmat, params.n_samples, bs);\n      block_info.resize(bs);\n\n      // read SNP, impute missing & compute association test statistic\n      analyze_block(chrom, bs, &snp_tally, block_info);\n\n      // print the results\n      for (auto const& snp_data : block_info){\n\n        if( snp_data.ignored ) {\n          snp_tally.n_ignored_snps++;\n          continue;\n        }\n\n        snp_tally.n_ignored_tests += snp_data.ignored_trait.count();\n        if(params.firth || params.use_SPA) {\n          n_corrected += (!snp_data.ignored_trait && snp_data.is_corrected).count();\n          snp_tally.n_failed_tests += (!snp_data.ignored_trait && snp_data.test_fail).count();\n          if(params.w_interaction) {\n            n_corrected += (2 + params.ncov_interaction) * snp_data.is_corrected_inter.count(); // main, inter & joint\n            snp_tally.n_failed_tests += (2 + params.ncov_interaction) * (snp_data.is_corrected_inter && snp_data.test_fail_inter).count(); // main, inter & joint\n          }\n        }\n\n        for(int j = 0; j < params.n_pheno; ++j) {\n\n          if( !params.pheno_pass(j) || snp_data.ignored_trait(j) ) {\n            if(!params.split_by_pheno) // if using single file, print NAs for snp/trait sum stats\n              ofile << snp_data.sum_stats[j];\n\n            continue;\n          }\n\n          if(params.split_by_pheno)\n            (*ofile_split[j]) << snp_data.sum_stats[j]; // add test info\n          else\n            ofile << snp_data.sum_stats[j]; // add test info\n        }\n\n      }\n\n      snp_tally.snp_count += bs;\n      block++;\n    }\n\n  }\n\n  sout << print_summary(&ofile, out, ofile_split, out_split, n_corrected, snp_tally, files, firth_est, params);\n\n}\n\n// test SNPs in block\nvoid Data::analyze_block(int const& chrom, int const& n_snps, tally* snp_tally, vector<variant_block> &all_snps_info){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  const int start = snp_tally->snp_count;\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n\n  vector<uint64> indices(n_snps);\n  std::iota(indices.begin(), indices.end(), start);\n\n  readChunk(indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n\n  // analyze using openmp\n  compute_tests_mt(chrom, indices, snp_data_blocks, insize, outsize, all_snps_info);\n  //compute_tests_st(chrom, indices, snp_data_blocks, insize, outsize, all_snps_info); // this is slower\n\n\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n}\n\n\nvoid Data::compute_res(){\n\n  if(params.blup_cov) // blup as covariate\n    get_lm_resid(res, m_ests.blups, pheno_data.phenotypes); \n  else res = pheno_data.phenotypes - m_ests.blups;\n  res.array() *= pheno_data.masked_indivs.array().cast<double>();\n\n  if(params.rerint | params.rerintcov) {\n    residualize_res();\n  }\n\n  p_sd_yres = res.colwise().norm();\n  p_sd_yres.array() /= sqrt(pheno_data.Neff - params.ncov_analyzed); // if blup is cov\n  res.array().rowwise() /= p_sd_yres.array();\n  pheno_data.scf_sv = ( pheno_data.scale_Y.array() * p_sd_yres.array()).matrix().transpose().array();\n\n  if(!params.trait_set && !params.multiphen) pheno_data.YtX = res.transpose() * pheno_data.new_cov;\n  if(params.w_interaction && (params.trait_mode==0) && !params.no_robust && !params.force_robust) \n    HLM_fitNull(nullHLM, m_ests, pheno_data, files, params, sout);\n}\n\n// two-stage rinting, as described in Sofer et al., 2020, ttps://www.ncbi.nlm.nih.gov/pmc/articles/PMC6416071/\n// --apply-rerint = RN-Resid-Unadj (Table 1, Sofer et al., 2020)\n// --apply-rerint-cov = RN-Resid-Adj (Table 1, Sofer et al., 2020)\nvoid Data::residualize_res() {\n  // for each residual, apply rank-inverse normal transformation\n  for(int ph = 0; ph < res.cols(); ph++) {\n    rint_pheno(res.col(ph), pheno_data.masked_indivs.col(ph).array());\n  }\n\n  // further project covariates out from residuals\n  if(params.rerintcov) {\n    MatrixXd beta = res.transpose() * pheno_data.new_cov;\n    res -= ( (pheno_data.new_cov * beta.transpose()).array() * pheno_data.masked_indivs.array().cast<double>() ).matrix();\n  }\n\n  // respect masked individuals (needed here?) \n  res.array() *= pheno_data.masked_indivs.array().cast<double>();\n\n  // performa scaling of reisidualized phenotypes, similar to residualize_phenotypes in Pheno.cpp\n  // compute the scale of residuals \n  pheno_data.scale_Y = res.colwise().norm().array() / sqrt(pheno_data.Neff.matrix().transpose().array() - params.ncov_analyzed);\n  // set sd for phenotypes which are ignored to 1\n  pheno_data.scale_Y = params.pheno_pass.select(pheno_data.scale_Y.transpose().array(), 1).matrix().transpose();\n  // check sd is not 0 \n  MatrixXd::Index minIndex;\n  if(pheno_data.scale_Y.minCoeff(&minIndex) < params.numtol)\n    throw \"some phenotype residuals has sd=0.\";\n  // scale residuals\n  res.array().rowwise() /= pheno_data.scale_Y.array();\n}\n\n\nvoid Data::compute_res_bin(int const& chrom){\n\n  fit_null_logistic(false, chrom, &params, &pheno_data, &m_ests, &files, sout); // for all phenotypes\n\n  res = pheno_data.phenotypes_raw - m_ests.Y_hat_p;\n  res.array() /= m_ests.Gamma_sqrt.array();\n  res.array() *= pheno_data.masked_indivs.array().cast<double>();\n\n  // if using firth approximation, fit null penalized model with only covariates and store the estimates (to be used as offset when computing LRT in full model)\n  if(params.firth_approx) fit_null_firth(false, chrom, &firth_est, &pheno_data, &m_ests, &files, &params, sout);\n  else if(params.firth){ // get estimates of covs without tested snp\n    params.cov_betas = MatrixXd::Zero(pheno_data.new_cov.cols(), params.n_pheno);\n    for( int ph = 0; ph < params.n_pheno; ++ph ) \n      if(params.pheno_pass(ph))\n        params.pheno_pass(ph) = fit_approx_firth_null(chrom, ph, &pheno_data, &m_ests, params.cov_betas.col(ph), &params);\n  }\n}\n\nvoid Data::compute_res_count(int const& chrom){\n\n  fit_null_poisson(chrom, &params, &pheno_data, &m_ests, &files, sout); // for all phenotypes\n\n  res = pheno_data.phenotypes_raw - m_ests.Y_hat_p;\n  res.array() /= m_ests.Gamma_sqrt.array();\n  res.array() *= pheno_data.masked_indivs.array().cast<double>();\n\n}\n\nvoid Data::compute_res_cox(int const& chrom){\n\n  fit_null_cox(false, chrom, &params, &pheno_data, &m_ests, &files, sout); // for all phenotypes\n\n  if(params.firth_approx) fit_null_firth_cox(false, chrom, &firth_est, &pheno_data, &m_ests, &files, &params, sout);\n\n}\n\n\nvoid Data::compute_tests_mt(int const& chrom, vector<uint64> indices,vector< vector < uchar > >& snp_data_blocks, vector< uint32_t > insize, vector< uint32_t >& outsize, vector<variant_block> &all_snps_info){\n  \n  size_t const bs = indices.size();\n  ArrayXb err_caught = ArrayXb::Constant(bs, false);\n\n    // start openmp for loop\n#if defined(_OPENMP)\n    setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n    for(size_t isnp = 0; isnp < bs; isnp++) {\n      uint32_t const snp_index = indices[isnp];\n      variant_block* block_info = &(all_snps_info[isnp]);\n      int thread_num = 0;\n      #if defined(_OPENMP)\n      thread_num = omp_get_thread_num();\n      #endif\n\n      // to store variant information\n      if( !params.build_mask && (((params.file_type == \"bgen\") && params.streamBGEN) || params.file_type == \"bed\") )\n        parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n      // to store variant information\n      reset_thread(&(Gblock.thread_data[thread_num]), params);\n\n      // check if g is sparse\n      if (!params.w_interaction)\n        check_sparse_G(isnp, thread_num, &Gblock, params.n_samples, in_filters.ind_in_analysis, block_info->n_zero, params.prop_zero_thr);\n\n      if (params.w_interaction)\n      {\n        if (params.interaction_snp && (snpinfo[snp_index].ID == in_filters.interaction_cov))\n          block_info->skip_int = true;\n        get_interaction_terms(isnp, thread_num, &pheno_data, &Gblock, block_info, nullHLM, &params, sout);\n      }\n\n      // for QTs with non-sparse G: residualize and re-scale\n      if (!params.skip_cov_res && (params.trait_mode == 0) && !Gblock.thread_data[thread_num].is_sparse)\n        residualize_geno(pheno_data.new_cov, Gblock.Gmat.col(isnp), block_info, params);\n      else block_info->scale_fac = 1;\n\n      // skip SNP if fails filters\n      if (block_info->ignored || params.getCorMat)\n        continue;\n\n      reset_stats(block_info, params);\n\n      try\n      {\n        // if ran vc tests, print out results before mask test\n        if ((block_info->sum_stats_vc.size() > 0) && !params.p_joint_only)\n          print_vc_sumstats(snp_index, \"ADD\", wgr_string, block_info, snpinfo, files, &params);\n\n        compute_score(isnp, snp_index, chrom, thread_num, test_string + params.condtl_suff, model_type + params.condtl_suff, res, p_sd_yres, params, pheno_data, Gblock, block_info, snpinfo, m_ests, firth_est, files, sout);\n\n        // for joint test, store logp\n        if (params.joint_test)\n          block_info->pval_log = Gblock.thread_data[thread_num].pval_log;\n\n        if (params.w_interaction)\n          apply_interaction_tests(snp_index, isnp, thread_num, res, p_sd_yres, model_type, test_string, &pheno_data, nullHLM, &in_filters, &files, &Gblock, block_info, snpinfo, &m_ests, &firth_est, &params, sout);\n      } catch (...) {\n        err_caught(isnp) = true;\n        block_info->sum_stats[0] = boost::current_exception_diagnostic_information();\n        continue;\n      }\n    }\n\n#if defined(_OPENMP)\n  setNbThreads(params.threads);\n#endif\n\n  // check no errors\n  if(err_caught.any())\n    for(int i = 0; i < err_caught.size(); i++)\n      if(err_caught(i)) throw all_snps_info[i].sum_stats[0];\n\n}\n\n/*\nvoid Data::compute_tests_st(int const& chrom, vector<uint64> indices,vector< vector < uchar > >& snp_data_blocks, vector< uint32_t > insize, vector< uint32_t >& outsize, vector<variant_block> &all_snps_info){\n\n  size_t const bs = indices.size();\n\n  // start openmp for loop\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(size_t isnp = 0; isnp < bs; isnp++) {\n    uint32_t const snp_index = indices[isnp];\n\n    // to store variant information\n    variant_block* block_info = &(all_snps_info[isnp]);\n\n    if( !params.build_mask )\n      parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n    if(params.w_interaction) {\n      if(params.interaction_snp && (snpinfo[snp_index].ID == in_filters.interaction_cov))\n        block_info->skip_int = true;\n      get_interaction_terms(isnp, &pheno_data, &Gblock, block_info, nullHLM, &params, sout);\n    }\n\n    // for QTs (or BTs with firth approx): project out covariates & scale\n    residualize_geno(isnp, block_info, false, pheno_data.new_cov, &Gblock, &params);\n\n    // skip SNP if fails filters\n    if( block_info->ignored || params.getCorMat ) continue;\n\n    reset_stats(block_info, params);\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(params.threads);\n#endif\n\n  int npass = 0;\n  for(size_t isnp = 0; (isnp < bs) && (npass == 0); isnp++)\n    if(!all_snps_info[isnp].ignored) npass++;\n\n  // skip block if all fails filters\n  if( (npass == 0) || params.getCorMat ) return;\n\n  compute_score(indices, chrom, test_string, model_type, res, p_sd_yres, params, pheno_data, Gblock, all_snps_info, snpinfo, m_ests, firth_est, files, sout);\n\n  //if( (isnp==0) || (isnp == (n_snps-1)) ) cout << \"G\"<<isnp+1<<\" MAF = \" <<  block_info.MAF << endl;\n\n  if(params.w_interaction) {\n    // start openmp for loop\n#if defined(_OPENMP)\n    setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n    for(size_t isnp = 0; isnp < bs; isnp++) {\n      uint32_t const snp_index = indices[isnp];\n      variant_block* block_info = &(all_snps_info[isnp]);\n      apply_interaction_tests(snp_index, isnp, res, p_sd_yres, model_type, test_string, &pheno_data, nullHLM, &in_filters, &files, &Gblock, block_info, snpinfo, &m_ests, &firth_est, &params, sout);\n    }\n#if defined(_OPENMP)\n    setNbThreads(params.threads);\n#endif\n  }\n\n}\n*/\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Testing mode (joint tests)\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::test_joint() {\n\n  \n  sout << \"Association testing mode (joint tests)\";\n\n  std::chrono::high_resolution_clock::time_point t1, t2;\n  string out;\n  vector < string > out_split, tmp_str;\n  // output files\n  Files ofile;\n  // use pointer to class since it contains non-copyable elements\n  vector < std::shared_ptr<Files> > ofile_split;\n\n  // set some parameters\n  if( params.build_mask ) bm.prep_run(params, files);\n\n#if defined(_OPENMP)\n  sout << \" with \" << (params.streamBGEN? \"fast \" : \"\") << \"multithreading using OpenMP\";\n#endif\n  sout << endl;\n\n  file_read_initialization(); // set up files for reading\n  read_pheno_and_cov(&files, &params, &in_filters, &pheno_data, &m_ests, &Gblock, sout);   // read phenotype and covariate files\n  prep_run(&files, &in_filters, &params, &pheno_data, &m_ests, sout); // check blup files and adjust for covariates\n  set_groups_for_testing();   // set groups of snps to test jointly\n  print_usage_info(&params, &files, sout);\n  print_test_info();\n  if(params.w_interaction && (params.trait_mode==0) && !params.no_robust && !params.force_robust) \n    nullHLM.prep_run(&pheno_data, &params);\n  if(!params.skip_test) setup_output(&ofile, out, ofile_split, out_split); // result file\n  if(params.trait_mode) set_nullreg_mat();\n  sout << endl;\n\n#ifdef WITH_HTSLIB\n  if (params.remeta_save_ld) {\n    for (size_t i = 0; i < files.pheno_names.size(); ++i) {\n      if(params.pheno_pass(i)) {\n        remeta_sumstats.skat_matrix_writers.emplace_back(\n          RegenieLDMatrixWriter(\n            files.out_file + \"_\" + files.pheno_names[i],\n            params.pheno_counts(i, 0)\n          )\n        );\n      } else {\n        remeta_sumstats.skat_matrix_writers.emplace_back(\n          RegenieLDMatrixWriter()\n        );\n      }\n    }\n    remeta_sumstats.sparsity_threshold = params.remeta_ld_spr;\n  }\n#endif\n\n  // start analyzing each chromosome\n  bool block_init_pass = false;\n  int block = 0, chrom_nb, bs;\n  tally snp_tally;\n  vector< variant_block > block_info;\n  if(params.joint_test) jt.scale_denum = params.n_analyzed - jt.ncovars; // for gates\n  initialize_thread_data(Gblock.thread_data, params);\n\n  for (auto const& chrom : files.chr_read){\n\n    if( !in_map(chrom, chr_map) ) continue;\n\n    chrom_nb = chr_map[chrom][1];\n\n    // if no sets in chromosome, skip\n    if(chrom_nb == 0)  continue;\n\n    // If specified starting block\n    if(!block_init_pass && (params.start_block > (block + chrom_nb)) ) {\n      for(int bb = 0; bb < chrom_nb ; bb++)\n        snp_tally.snp_count += jt.setinfo[chrom - 1][bb].snp_indices.size();\n      block += chrom_nb;\n      continue;\n    }\n\n    sout << \"Chromosome \" << chrom << \" [\" << chrom_nb << \" sets in total]\\n\";\n\n    // read polygenic effect predictions from step 1\n    blup_read_chr(false, chrom, m_ests, files, in_filters, pheno_data, params, sout);\n\n    // compute phenotype residual (adjusting for BLUP [and covariates for BTs])\n    if(params.trait_mode == 1) compute_res_bin(chrom);\n    else if(params.trait_mode == 2) compute_res_count(chrom);\n    else if(params.trait_mode == 3) compute_res_cox(chrom);\n    else compute_res();\n\n\n    // analyze by blocks of SNPs\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n\n      bs = jt.setinfo[chrom - 1][bb].snp_indices.size();\n\n      // If specified starting block\n      if(!block_init_pass && (params.start_block > (block+1)) ) {\n        snp_tally.snp_count += bs;\n        block++;\n        continue;\n      } else if(!block_init_pass) block_init_pass = true;\n\n      sout << \" set [\" << block + 1 << \"/\" << params.total_n_block << \"] : \" << jt.setinfo[chrom - 1][bb].ID << \" - \" << bs << \" variants...\" << flush;\n      if(params.joint_test && !params.build_mask) allocate_mat(Gblock.Gmat, params.n_samples, bs);\n      block_info.resize(bs);\n\n      // compute single snp association test statistic\n      get_sum_stats(chrom, bb, block_info);\n\n      // update number of variants (if masks were built)\n      bs = block_info.size();\n      jt.nvars = bs;\n\n      if(params.skip_test) { // skip assoc tests\n        snp_tally.snp_count += bs;\n        block++;\n        continue;\n      }\n\n      // tally the results\n      for (auto const& snp_data : block_info){\n\n        if( snp_data.ignored ) {\n          snp_tally.n_ignored_snps++;\n          jt.nvars--;\n          continue;\n        }\n\n        snp_tally.n_ignored_tests += snp_data.ignored_trait.count();\n        if(params.firth || params.use_SPA) {\n          n_corrected += (!snp_data.ignored_trait && snp_data.is_corrected).count();\n          snp_tally.n_failed_tests += (!snp_data.ignored_trait && snp_data.test_fail).count();\n          if(params.w_interaction) {\n            n_corrected += (2 + params.ncov_interaction) * snp_data.is_corrected_inter.count(); // main, inter & joint\n            snp_tally.n_failed_tests += (2 + params.ncov_interaction) * (snp_data.is_corrected_inter && snp_data.test_fail_inter).count(); // main, inter & joint\n          }\n        }\n\n        for(int j = 0; j < params.n_pheno; ++j) {\n\n          if( !params.pheno_pass(j) || snp_data.ignored_trait(j) ) {\n            if(!params.split_by_pheno) // if using single file, print NAs for snp/trait sum stats\n              ofile << snp_data.sum_stats[j];\n\n            continue;\n          }\n\n          if(params.split_by_pheno)\n            (*ofile_split[j]) << snp_data.sum_stats[j]; // add test info\n          else\n            ofile << snp_data.sum_stats[j]; // add test info\n        }\n\n      }\n\n      if( params.joint_test && block_info.size()){\n\n        // compute and print set-based test result\n        t1 = std::chrono::high_resolution_clock::now();\n        sout << \"     -computing joint association tests...\" << flush;\n\n        jt.get_variant_names(chrom, bb, snpinfo);\n        tmp_str = jt.apply_joint_test(chrom, bb, &pheno_data, res, &Gblock, block_info, files, &params);\n\n        for(int j = 0; j < params.n_pheno; ++j) {\n          if(params.split_by_pheno)\n            (*ofile_split[j]) << tmp_str[j];\n          else\n            ofile << tmp_str[j]; // add test info\n        }\n\n        auto t2 = std::chrono::high_resolution_clock::now();\n        auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n        sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n      }\n\n      snp_tally.snp_count += bs;\n      block++;\n    }\n\n  }\n\n  sout << print_summary(&ofile, out, ofile_split, out_split, n_corrected, snp_tally, files, firth_est, params);\n\n  if(params.write_masks)  bm.closeFiles();\n\n}\n\n\n\nvoid Data::set_groups_for_testing() {\n\n  int blocks_left = params.n_block;\n  params.total_n_block = 0;\n\n  // annotate variants by categories\n  if(params.build_mask) {\n    get_masks_info(&files, &params, &in_filters, bm.annotations, bm.regions, bm.masks, bm.mask_out, bm.all_masks, snpinfo, sout);\n    bm.setBins(&params, sout);\n  }\n\n  // read list of variant sets to use for joint test\n  read_setlist(&files, &params, &in_filters, jt.setinfo, snpinfo, bm.all_masks, bm.max_aaf, sout);\n\n  // delete snpID map\n  in_filters.snpID_to_ind.clear();\n\n  // for each chromosome, count number of variant sets\n  map<int, vector<int> >::iterator itr;\n  map<int, vector<int> > m1;\n  for (itr = chr_map.begin(); itr != chr_map.end(); ++itr) {\n    int chrom = itr->first;\n    int nb = jt.setinfo[ chrom - 1].size();\n\n    if(params.n_block > 0) {\n      if(blocks_left > 0) {\n        int minb = min(nb, blocks_left);\n        //sout << << endl;\n        itr->second[1] = minb;\n        params.total_n_block += minb;\n        blocks_left -= minb;\n      }\n    } else {\n      itr->second[1] = nb;\n      params.total_n_block += nb;\n    }\n    m1[ itr->first ] = itr->second;\n  }\n  chr_map = m1;\n\n\n  // summarize block sizes\n  sout << left << std::setw(20) << \" * # threads\" << \": [\" << params.threads << \"]\\n\";\n  sout << left << std::setw(20) << \" * # tested sets\" << \": [\" << params.total_n_block << \"]\\n\";\n  if(params.start_block > params.total_n_block)\n    throw \"Starting set > number of sets analyzed\";\n  else if(params.start_block > 1) sout << \"    + skipping to set #\" << params.start_block << endl;\n  sout << left << std::setw(20) << \" * max block size\" << \": [\" << params.block_size << \"]\\n\";\n\n  if(params.build_mask) sout << \" * rule used to build masks : \" << params.mask_rule << endl;\n  if(params.vc_test) {\n\n    if(params.skato_rho.size() == 0){\n      params.skato_rho = ArrayXd::Zero(1); // assume rho=0\n      sout << \" * computing gene-based tests for each set of variants included in a mask\\n\";\n    } else {\n      if(CHECK_BIT(params.vc_test,1) && (params.skato_rho(0) != 0) ){\n        ArrayXd tmp_rho (params.skato_rho.size()+1); tmp_rho(0) = 0; tmp_rho.tail(params.skato_rho.size()) = params.skato_rho; // insert rho=0 for skat\n        params.skato_rho = tmp_rho;\n      }\n      IOFormat Fmt(StreamPrecision, DontAlignCols, \",\", \"\", \"\", \"\",\"\",\"\");\n      sout << \" * computing gene-based tests for each set of variants included in a mask (rho=[\" << params.skato_rho.matrix().transpose().format(Fmt) << \"])\\n\";\n    }\n\n    sout << \"  -variants with MAC <= \" << params.skat_collapse_MAC << \" are collapsed into a mask\\n\";\n    if(params.vc_multiply_weights)\n      sout << \"  -user-provided weights will be multiplied by default weights [from Beta(MAF,\"<< params.skat_a1 <<\",\"<< params.skat_a2 <<\")] for SKAT/ACAT tests\\n\";\n    else if(params.vc_with_weights)\n      sout << \"  -user-provided weights will be used for gene-based tests\\n\";\n    else\n      sout << \"  -weights are obtained from Beta(MAF,\"<< params.skat_a1 <<\",\"<< params.skat_a2 <<\")\\n\";\n    sout << \"  -list of gene-based tests run: \" << get_test_list(params.vc_test, params.vc_tests_map) << \"\\n\";\n\n    // set max rho to 0.999 as it will o.w. cause issue with skat-o p-value calculation\n    if(params.skato_rho.size() > 1) params.skato_rho = params.skato_rho.min(0.999); \n\n    // single p per gene\n    if(params.apply_gene_pval_strategy)\n      sout << \" * applying ACAT to output overall gene p-value\\n\";\n  }\n\n  if(params.remeta_save_ld)\n    sout << \" * saving SKAT LD matrices for REMETA\\n\";\n\n}\n\n// test SNPs in block\nvoid Data::get_sum_stats(int const& chrom, int const& varset, vector<variant_block>& all_snps_info){\n\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n\n  if (params.mask_loo) {\n    getMask_loo(chrom, varset, snp_data_blocks, insize, outsize, all_snps_info);\n  } else {\n\n    auto t1 = std::chrono::high_resolution_clock::now();\n    vset* set_info = &(jt.setinfo[chrom - 1][varset]);\n\n    // read in markers and if applicable build masks\n    if(!params.build_mask) readChunk(set_info->snp_indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n    else {\n      getMask(chrom, varset, snp_data_blocks, insize, outsize, all_snps_info);\n      // update size with new masks\n      int n_snps = set_info->snp_indices.size();\n      //cerr << \"M=\" << n_snps << endl;\n      if(params.skip_test || (n_snps == 0)) return;\n\n      // starting association testing with built masks\n      t1 = std::chrono::high_resolution_clock::now();\n      sout << \"     -computing association tests...\" << flush;\n    }\n\n    // analyze using openmp\n    compute_tests_mt(chrom, set_info->snp_indices, snp_data_blocks, insize, outsize, all_snps_info);\n\n    auto t2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n    sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n\n  }\n\n}\n\n\nvoid Data::readChunk(vector<uint64>& indices, int const& chrom, vector< vector < uchar > >& snp_data_blocks, vector<uint32_t>& insize, vector<uint32_t>& outsize, vector<variant_block>& all_snps_info){\n\n\n  int const n_snps = indices.size();\n\n  if((params.file_type == \"bgen\") && params.streamBGEN){\n    snp_data_blocks.resize( n_snps );\n    insize.resize(n_snps); outsize.resize(n_snps);\n    vector<uint64> offsets(n_snps);\n    for (int i = 0; i < n_snps; i++) offsets[i] = snpinfo[indices[i]].offset;\n\n    readChunkFromBGEN(&files.geno_ifstream, insize, outsize, snp_data_blocks, offsets);\n\n  } else if((params.file_type == \"bgen\") && !params.streamBGEN) \n    readChunkFromBGENFileToG(indices, chrom, snpinfo, &params, Gblock.Gmat, Gblock.bgen, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, all_snps_info, sout);\n  else if(params.file_type == \"pgen\") {\n    readChunkFromPGENFileToG(indices, chrom, &params, &in_filters, Gblock.Gmat, Gblock.pgr, pheno_data.masked_indivs, pheno_data.phenotypes_raw, snpinfo, all_snps_info);\n  } else {\n\n    snp_data_blocks.resize( n_snps );\n    for(int isnp = 0; isnp < n_snps; isnp++) {\n\n      jumpto_bed( snpinfo[ indices[isnp] ].offset, files.bed_block_size, files.geno_ifstream);\n      snp_data_blocks[isnp].resize(files.bed_block_size);\n      files.geno_ifstream.read( reinterpret_cast<char *> (&snp_data_blocks[isnp][0]), files.bed_block_size);\n\n    }\n  }\n\n}\n\nvoid Data::getMask(int const& chrom, int const& varset, vector< vector < uchar > >& snp_data_blocks, vector<uint32_t>& insize, vector<uint32_t>& outsize, vector<variant_block>& all_snps_info){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  vset* set_info = &(jt.setinfo[chrom - 1][varset]);\n\n  // do it in chunks to reduce memory usage\n  bool last_chunk = false;\n  int n_snps = set_info->snp_indices.size(), nvar_read = 0;\n  int nchunks, bsize; \n  SpMat vc_sparse_gmat;\n  if(params.use_max_bsize) { // process all variants at once\n    nchunks = 1;\n    bsize = n_snps; \n  } else { // process variants in blocks\n    nchunks = ceil( n_snps * 1.0 / params.block_size );\n    bsize = params.block_size; // default number of SNPs to read at a time\n  }\n  //if(params.mask_loo) bm.nmasks_total = n_snps;\n\n  // custom user weights\n  ArrayXd snp_weights = ArrayXd::Constant(n_snps, 1), vc_weights, vc_weights_acat;\n  if( params.vc_with_weights )\n    if(!get_custom_weights(set_info->ID, snp_weights, snpinfo, set_info->snp_indices)){\n      sout << \"\\n     -WARNING: all variants have 0 weights (set will be skipped)\\n\";\n      set_info->snp_indices.resize(0);\n      all_snps_info.resize(0);\n      return;\n    }\n\n  if(params.verbose) sout << nchunks << \" chunks\";\n  sout << \"\\n     -reading in genotypes\" << ( params.vc_test ? \", computing gene-based tests\" : \"\" ) << \" and building masks...\" << flush;\n\n  if(params.debug) sout << \"(1)\" << print_mem() << \"...\" << flush;\n  bm.prepMasks(params.n_samples, set_info->ID);\n  allocate_mat(Gblock.Gmat, params.n_samples, bsize);\n  if(params.vc_test) {\n    set_info->Jmat = MatrixXb::Constant(n_snps + bm.nmasks_total, bm.nmasks_total, false); // MxKm (last S rows are for ultra-rare masks)\n    set_info->ultra_rare_ind = ArrayXb::Constant(n_snps, false); // identify which vars are rare\n    set_info->vc_rare_mask.resize(params.n_samples, bm.nmasks_total); // identify which vars are rare\n    set_info->vc_rare_mask.setZero();\n    set_info->vc_rare_mask_non_missing = MatrixXb::Constant(params.n_samples, bm.nmasks_total, false); // distinguish 0 from missing\n    vc_sparse_gmat.resize(params.n_samples, n_snps + bm.nmasks_total); // store wG\n    vc_sparse_gmat.setZero();\n    vc_weights = ArrayXd::Zero(n_snps + bm.nmasks_total, 1);\n    vc_weights.head( n_snps ) = snp_weights;\n    vc_weights_acat = vc_weights;\n  }\n  if(params.debug) sout << \"(2)\" << print_mem() << \"...\" << flush;\n\n\n  for(int i = 0; i < nchunks; i++){\n\n    last_chunk = ( i == (nchunks-1) );\n    if( last_chunk ) {\n      bsize = n_snps - i * bsize;// use remainder number of variants\n      allocate_mat(Gblock.Gmat, params.n_samples, bsize);\n    }\n\n    vector<uint64> indices (set_info->snp_indices.begin() + nvar_read, set_info->snp_indices.begin() + nvar_read + bsize);\n    readChunk(indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n\n    // build genotype matrix\n    if( ((params.file_type == \"bgen\") && params.streamBGEN) || params.file_type == \"bed\") {\n#if defined(_OPENMP)\n      setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n      for(int isnp = 0; isnp < bsize; isnp++) {\n\n        uint32_t snp_index = indices[isnp];\n\n        variant_block* block_info = &(all_snps_info[isnp]);\n        parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n      }\n#if defined(_OPENMP)\n      setNbThreads(params.threads);\n#endif\n    }\n\n    // update mask (taking max/sum)\n    /*if(params.mask_loo)\n      bm.updateMasks_loo(nvar_read, bsize, &params, &in_filters, pheno_data.masked_indivs, &Gblock, all_snps_info, *set_info, snpinfo, sout);\n    else*/\n      bm.updateMasks(nvar_read, bsize, &params, &in_filters, pheno_data.masked_indivs, &Gblock, snp_weights, all_snps_info, *set_info, snpinfo, sout);\n\n    if(params.vc_test) // get G and w\n      update_vc_gmat(vc_sparse_gmat, vc_weights, vc_weights_acat, set_info->ultra_rare_ind, nvar_read, bsize, params, in_filters.ind_in_analysis, Gblock.Gmat, all_snps_info, set_info->Jmat);\n\n    /*\n    if(params.debug){ \n      cerr << \"GG.diag()=\\n\" << Gblock.Gmat.array().square().colwise().sum() << \"\\n\";\n      MatrixXd Gtmp_res;\n      residualize_gmat(false, pheno_data.new_cov, Gblock.Gmat, Gtmp_res, params);\n      cerr << \"GrGr.diag()=\\n\" << Gtmp_res.array().square().colwise().sum() << \"\\n\";\n      cerr << \"WGGW.diag()=\\n\" << vc_weights.head(bsize).square() * Gtmp_res.array().square().colwise().sum().matrix().transpose().array() << \"\\n\";\n    }\n    */\n\n    nvar_read += bsize;\n  }\n\n  // check mask and store in setinfo & snpinfo\n  /*if(params.mask_loo)\n    bm.computeMasks_loo(&params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &Gblock, all_snps_info, *set_info, snpinfo, sout);\n  else*/\n    bm.computeMasks(&params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &Gblock, all_snps_info, *set_info, snpinfo, sout);\n  if(params.debug) sout << \"(3)\" << print_mem() << \"...\" << flush;\n\n  if(params.vc_test) {\n    #ifdef WITH_HTSLIB\n      remeta_sumstats.skat_snplist = &bm.remeta_snplist;\n      remeta_sumstats.gene_name = &set_info->ID;\n    #endif\n    try {\n    compute_vc_masks(vc_sparse_gmat, vc_weights, vc_weights_acat, set_info->vc_rare_mask, set_info->vc_rare_mask_non_missing, pheno_data.new_cov, m_ests, firth_est, res, pheno_data.phenotypes_raw, pheno_data.masked_indivs, set_info->Jmat, all_snps_info, in_filters.ind_in_analysis, params, remeta_sumstats); \n    } catch (std::exception const& e) {\n      sout << \"WARNING: \" << e.what();\n    } catch (...) {\n      sout << \"WARNING: error in gene-based tests. Skipping set...\";\n    }\n\n    set_info->Jmat.resize(0,0);\n    set_info->ultra_rare_ind.resize(0);\n    set_info->vc_rare_mask.setZero(); set_info->vc_rare_mask.resize(0,0); set_info->vc_rare_mask.data().squeeze();\n    set_info->vc_rare_mask_non_missing.resize(0,0);\n  }\n  if(params.debug) sout << \"(4)\" << print_mem() << \"...\" << flush;\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\nvoid Data::getMask_loo(int const& chrom, int const& varset, vector< vector < uchar > >& snp_data_blocks, vector<uint32_t>& insize, vector<uint32_t>& outsize, vector<variant_block>& all_snps_info){\n\n  MeasureTime mt;\n  vset* set_info = &(jt.setinfo[chrom - 1][varset]);\n  vector<uint64> orig_indices = set_info->snp_indices;\n  vector<variant_block> out_snps_info;\n  int n_snps = orig_indices.size();\n\n  // read in variants in chunks storing as sparse matrix\n  sout << \"\\n     -reading in genotypes...\" << flush;\n  mt.start_ms();\n  if(params.debug) sout << \"(0)\" << print_mem() << \"...\";\n  uint64 snp_index; \n  SpMat rv_mat(params.n_samples, n_snps);\n  ArrayXb in_lovo_mask(n_snps), ur_variant(n_snps), var_flip(n_snps);\n  in_lovo_mask = false; ur_variant = false; var_flip = false;\n  ArrayXd Gvec(params.n_samples), var_mafs(n_snps);\n\n  for(int snp = 0, j = 0; snp < n_snps; snp++){\n    snp_index = orig_indices[ snp ];\n    read_snp(false, snpinfo[ snp_index ].offset, Gvec, in_filters.ind_in_analysis, in_filters.ind_ignore, &files, Gblock.pgr, &params, false);\n    in_lovo_mask(snp) = bm.check_in_lovo_mask(Gvec, in_filters, set_info->ID, snpinfo[ snp_index ], ur_variant(j), var_flip(j), var_mafs(j), chrom, &params);\n    if( in_lovo_mask(snp) )\n      rv_mat.col(j++) = Gvec.matrix().sparseView();\n  }\n\n  int n_snps_lovo = in_lovo_mask.count();\n  ArrayXi col_indices_lovo_mask = get_true_indices( in_lovo_mask );\n  // remove cols/entries not in any lovo masks\n  rv_mat.conservativeResize(params.n_samples, n_snps_lovo);\n  rv_mat.makeCompressed();\n  ur_variant.conservativeResize(n_snps_lovo, 1);\n  var_flip.conservativeResize(n_snps_lovo, 1);\n  var_mafs.conservativeResize(n_snps_lovo, 1);\n  ArrayXi col_indices_lovo_mask_non_ur = get_true_indices( !ur_variant );\n\n  // custom user weights\n  ArrayXd snp_weights = ArrayXd::Constant(n_snps_lovo, 1);\n  if( params.vc_with_weights )\n    if(!get_custom_weights(set_info->ID, snp_weights, snpinfo, col_indices_lovo_mask, orig_indices)){\n      sout << \"\\n     -WARNING: all variants have 0 weights (set will be skipped)\\n\";\n      return;\n    }\n\n  if(params.debug) sout << n_snps_lovo << \" variants...(1)\" << print_mem() << \"...\";\n  sout << mt.stop_ms() << \"\\n\";\n\n  // generate LOVO masks in chunks\n  bool last_chunk = false;\n  ArrayXi lovo_masks_indices = check_lovo_snplist(col_indices_lovo_mask, orig_indices, snpinfo, params.masks_loo_snpfile); // if computing a subset of the lovo masks\n  int neff_lovo = lovo_masks_indices.size(), nchunks, bsize, nvar_read = 0; // default number of SNPs to read at a time\n  bsize = min(neff_lovo, 128);\n  nchunks = ceil( neff_lovo * 1.0 / bsize );\n\n  sout << \"     -splitting into \" <<  nchunks << \" chunk\" << (nchunks > 1 ? \"s\" : \"\") << \" of size \" << bsize << \" (\" << neff_lovo << \" LOVO masks in total)\\n\";\n  mt.start_ms();\n\n  // For SKAT tests\n  ArrayXd vc_weights, vc_weights_acat;\n  SpMat vc_sparse_gmat; // contains single variants + ultra-rare masks (incl.for full mask)\n  if(params.vc_test) {\n    MeasureTime mt_skat;\n    if(params.debug) mt_skat.start_ms();\n    vc_sparse_gmat.resize(params.n_samples, n_snps_lovo); // store wG\n    vc_sparse_gmat.setZero();\n    vc_weights = snp_weights;\n    vc_weights_acat = vc_weights;\n    // store G & SKAT weights\n    update_vc_gmat(vc_sparse_gmat, vc_weights, vc_weights_acat, rv_mat, ur_variant, var_flip, var_mafs, in_filters.ind_in_analysis, params);\n    if(params.trait_mode == 1) // if need to apply cc correction\n      check_cc_correction(vc_sparse_gmat, vc_weights, pheno_data.new_cov, m_ests, firth_est, res, pheno_data.phenotypes_raw, pheno_data.masked_indivs, params); \n    if(params.debug) sout << \"(0)skat prep...\"<< mt_skat.stop_ms() << \"\\n\";\n  }\n\n  for(int i = 0; i < nchunks; i++) {\n\n    last_chunk = ( i == (nchunks-1) );\n    if( last_chunk )\n      bsize = neff_lovo - i * bsize; // use remainder number of variants\n    set_info->snp_indices = orig_indices;\n    MeasureTime mt_chunk;\n\n    sout << \"      +chunk #\" << i+1 << \" (\" << bsize << \" LOVO masks)\\n       -\" << ( params.vc_test ? \"computing gene-based tests and \" : \"\" ) << \"building masks...\" << flush;\n    mt_chunk.start_ms();\n\n    bm.nmasks_total = bsize + last_chunk;\n    bm.prepMasks(params.n_samples, set_info->ID);  \n    if(!bm.take_max && !bm.take_comphet) {\n      bm.nsites = ArrayXi::Constant(bm.nmasks_total, n_snps_lovo - 1); // each loo mask has (n-1) sites included for AAF calculation\n      if( last_chunk ) bm.nsites.tail(1) += 1; // last entry is for full mask\n    }\n\n    ArrayXi chunk_indices = lovo_masks_indices.segment(nvar_read, bsize);\n    ArrayXb in_chunk = ArrayXb::Constant(n_snps_lovo, false);\n    in_chunk(chunk_indices) = true;\n    if(params.debug) sout << \"(1)\" << print_mem() << \"...\" << flush;\n\n    // collapse mask for variants not in chunk\n    ArrayXd mask_excl_chunk = ArrayXd::Constant(params.n_samples, -3);\n    ArrayXd ur_mask_excl_chunk = ArrayXd::Constant(params.n_samples, -3);\n    if((!in_chunk).any())\n      bm.collapse_mask_chunk(get_true_indices(!in_chunk), rv_mat, ur_variant, var_flip, snp_weights, mask_excl_chunk, ur_mask_excl_chunk, in_filters.ind_in_analysis);\n    if(params.debug) {\n      sout << mt_chunk.stop_ms() << \"\\n\";\n      mt_chunk.start_ms();\n    }\n\n    // update mask (taking max/sum)\n    bm.updateMasks_loo(chunk_indices, last_chunk, rv_mat, ur_variant, var_flip, snp_weights, mask_excl_chunk, ur_mask_excl_chunk, in_filters.ind_in_analysis, *set_info, params.threads);\n    if(params.debug) {\n      sout << \"(2)\" << print_mem() << \"...\" << flush;\n      sout << mt_chunk.stop_ms() << \"\\n\";\n      mt_chunk.start_ms();\n    }\n\n    // check mask and store in setinfo & snpinfo\n    bm.computeMasks_loo(col_indices_lovo_mask(chunk_indices), last_chunk, &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &Gblock, all_snps_info, *set_info, snpinfo, sout);\n    if(params.debug) {\n      sout << \"(3)\" << print_mem() << \"...\" << flush;\n      sout << mt_chunk.stop_ms() << \"\\n\";\n      mt_chunk.start_ms();\n    }\n\n    if(params.vc_test) {// run skat/acat\n\n      MatrixXb Jmat = MatrixXb::Constant(n_snps_lovo + bm.nmasks_total, bm.nmasks_total, false); // rows: snps + ur masks; cols: lovo sets\n      Jmat(col_indices_lovo_mask_non_ur, all).array() = true; // non-ur snps\n      for(int j = 0; j < chunk_indices.size(); j++) // apply lovo\n        Jmat( chunk_indices(j), j) = false;\n\n      SpMat vc_sparse_gmat_chunk(params.n_samples, Jmat.rows()); // cols: snps + ur masks\n      vc_sparse_gmat_chunk.reserve(vc_sparse_gmat.nonZeros());\n      vc_sparse_gmat_chunk.leftCols(vc_sparse_gmat.cols()) = vc_sparse_gmat;\n\n      ArrayXd vc_weights_chunk(Jmat.rows()), vc_weights_acat_chunk(Jmat.rows());\n      vc_weights_chunk.head(vc_weights.size()) = vc_weights;\n      vc_weights_acat_chunk.head(vc_weights_acat.size()) = vc_weights_acat;\n\n      try {\n        compute_vc_masks(vc_sparse_gmat_chunk, vc_weights_chunk, vc_weights_acat_chunk, set_info->vc_rare_mask, set_info->vc_rare_mask_non_missing, pheno_data.new_cov, m_ests, firth_est, res, pheno_data.phenotypes_raw, pheno_data.masked_indivs, Jmat, all_snps_info, in_filters.ind_in_analysis, params, remeta_sumstats); \n      } catch (std::exception const& e) {\n        sout << \"WARNING: \" << e.what();\n      } catch (...) {\n        sout << \"WARNING: error in gene-based tests. Skipping set...\";\n      }\n\n    }\n\n    if(params.debug) sout << \"(4)\" << print_mem() << \"...\" << flush;\n    nvar_read += bsize;\n    sout << mt_chunk.stop_ms() << \"\\n\";\n\n    if(params.skip_test || (set_info->snp_indices.size() == 0)) continue;\n\n    // burden association tests with built masks\n    sout << \"       -computing association tests...\" << flush;\n    mt_chunk.start_ms();\n    compute_tests_mt(chrom, set_info->snp_indices, snp_data_blocks, insize, outsize, all_snps_info);\n    sout << mt_chunk.stop_ms() << \"\\n\";\n    out_snps_info.insert(out_snps_info.end(), all_snps_info.begin(), all_snps_info.end());\n  }\n\n  if(params.vc_test) {\n    set_info->vc_rare_mask.setZero(); set_info->vc_rare_mask.resize(0,0); set_info->vc_rare_mask.data().squeeze();\n    set_info->vc_rare_mask_non_missing.resize(0,0);\n  }\n\n  sout << \"     -> \" << mt.stop_ms() << \"\\n\";\n  all_snps_info = out_snps_info;\n\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Testing mode (multi-trait tests)\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::test_multitrait() \n{\n  sout << \"Association testing mode (multi-trait tests)\";\n\n  std::chrono::high_resolution_clock::time_point t1, t2;\n  string out;\n  vector<string> out_split, tmp_str;\n  // output files\n  Files ofile;\n  // use pointer to class since it contains non-copyable elements\n  vector <std::shared_ptr<Files>> ofile_split;\n\n#if defined(_OPENMP)\n  sout << \" with \" << (params.streamBGEN? \"fast \" : \"\") << \"multithreading using OpenMP\";\n#endif\n  sout << endl;\n\n  // Set up \n  file_read_initialization(); // set up files for reading\n  read_pheno_and_cov(&files, &params, &in_filters, &pheno_data, &m_ests, &Gblock, sout);   // read phenotype and covariate files\n  prep_run(&files, &in_filters, &params, &pheno_data, &m_ests, sout); // check blup files and adjust for covariates\n  set_blocks_for_testing();   // set number of blocks\n  print_usage_info(&params, &files, sout);\n  print_test_info();\n  setup_output(&ofile, out, ofile_split, out_split); // result files\n  sout << endl;\n\n  // Set up mt for all chr: verbose level, masks \n  /* if(params.mt_out_all) mt.verbose = 3; */\n  /* mt.verbose = 3; */\n  /* if(params.mt_precomp) mt.precomp = true; */\n  mt.precomp = true;\n\n  mt.setup_masks(pheno_data.masked_indivs);\n\n  // Loop 1: start analyzing each chromosome\n  bool block_init_pass = false;\n  int block = 0, chrom_nsnps, chrom_nb, bs;\n  tally snp_tally;\n  vector< variant_block > block_info;\n  /* initialize_thread_data(Gblock.thread_data, params); */\n\n  for(auto const& chrom : files.chr_read) {\n    if( !in_map(chrom, chr_map) ) continue;\n\n    chrom_nsnps = chr_map[chrom][0];\n    chrom_nb = chr_map[chrom][1];\n    if(chrom_nb == 0) continue;\n\n    // If specified starting block\n    if(!block_init_pass && (params.start_block > (block + chrom_nb)) ) {\n      snp_tally.snp_count += chrom_nsnps;\n      block += chrom_nb;\n      continue;\n    }\n\n    sout << \"Chromosome \" << chrom << \" [\" << chrom_nb << \" blocks in total]\\n\";\n\n    // read polygenic effect predictions from step 1\n    blup_read_chr(false, chrom, m_ests, files, in_filters, pheno_data, params, sout);\n\n    // compute phenotype residual (adjusting for BLUP)\n    if(params.trait_mode == 0) {\n      compute_res();\n    } else {\n      throw std::runtime_error(\"multi-trait tests only for QTs\");\n    }\n\n    // Set up mt for each chr: matrix of traits Y\n    mt.setup_yres(res);\n\n    /* const static IOFormat CSVFormat(StreamPrecision, DontAlignCols, \", \", \"\\n\"); */\n    /* string f_cory = files.out_file + \".regenie.Ycor.chr\" + to_string(chrom) + \".txt\"; */\n    /* mt.compute_cory(mt.Yres, mt.Mask); */\n    /* ofstream out_cory(f_cory.c_str()); */\n    /* out_cory << mt.Ryy.format(CSVFormat); */\n    /* out_cory.close(); */\n\n    // analyze by blocks of SNPs\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n      get_block_size(params.block_size, chrom_nsnps, bb, bs);\n\n      // If specified starting block\n      if(!block_init_pass && (params.start_block > (block+1)) ) {\n        snp_tally.snp_count += bs;\n        block++;\n        continue;\n      } else {\n        if(!block_init_pass) block_init_pass = true;\n      }\n\n      sout << \" block [\" << block + 1 << \"/\" << params.total_n_block << \"] : \" << flush;\n\n      allocate_mat(Gblock.Gmat, params.n_samples, bs);\n      block_info.resize(bs);\n\n      // read SNP, impute missing & compute association test statistic\n      analyze_block_multitrait(chrom, bs, &snp_tally, block_info);\n\n      // print the results\n      if(params.split_by_pheno) {\n        throw std::runtime_error(\"test_multitrait: split_by_pheno\");\n      }\n\n      for (auto const& snp_data : block_info){\n        if( snp_data.ignored ) {\n          snp_tally.n_ignored_snps++;\n          continue;\n        }\n        snp_tally.n_ignored_tests += snp_data.ignored_trait.count();\n\n        /* size_t n_trait_sets = 1; */\n        size_t j = 0;\n        ofile << snp_data.sum_stats_mt[j]; // add test info\n      }\n\n      snp_tally.snp_count += bs;\n      block++;\n    }\n\n  }\n\n  sout << print_summary(&ofile, out, ofile_split, out_split, n_corrected, snp_tally, files, firth_est, params);\n}\n\n// test SNPs in block for multi-trait tests\nvoid Data::analyze_block_multitrait(int const& chrom, int const& n_snps, tally* snp_tally, vector<variant_block> &all_snps_info){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  const int start = snp_tally->snp_count;\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n\n  vector<uint64> indices(n_snps);\n  std::iota(indices.begin(), indices.end(), start);\n\n  readChunk(indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n\n  // analyze using openmp\n  /* compute_tests_mt(chrom, indices, snp_data_blocks, insize, outsize, all_snps_info); */\n  compute_tests_mt_multitrait(chrom, indices, snp_data_blocks, insize, outsize, all_snps_info);\n\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n}\n\nvoid Data::compute_tests_mt_multitrait(int const& chrom, vector<uint64> indices,vector< vector < uchar > >& snp_data_blocks, vector< uint32_t > insize, vector< uint32_t >& outsize, vector<variant_block> &all_snps_info){\n  size_t const bs = indices.size();\n  ArrayXb err_caught = ArrayXb::Constant(bs, false);\n\n  // start openmp for loop\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(size_t isnp = 0; isnp < bs; isnp++) {\n    uint32_t const snp_index = indices[isnp];\n\n    int thread_num = 0;\n#if defined(_OPENMP)\n    thread_num = omp_get_thread_num();\n#endif\n\n    // to store variant information\n    variant_block* block_info = &(all_snps_info[isnp]);\n    reset_thread(&(Gblock.thread_data[thread_num]), params);\n\n    parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n    // for QTs: project out covariates & scale\n    residualize_geno(isnp, thread_num, block_info, false, pheno_data.new_cov, &Gblock, &params);\n\n    // skip SNP if fails filters\n    if( block_info->ignored ) continue;\n    \n    reset_stats(block_info, params);\n\n    try {\n      // run multi-trait tests & save summary stats\n      // v1: store results in mt \n      // - doesn't work for multi-threaded calculations\n      /* mt.apply_tests_snp(isnp, Gblock, res, p_sd_yres, params); */\n      /* string tmp_str = mt.print_sumstats(isnp, snp_index, test_string + params.condtl_suff, model_type + params.condtl_suff, block_info, snpinfo, &params); */\n      // v2: store results outside mt, i.e. separately for each thread\n      /* MTestsResults mt_results_i = mt.run_tests_snp(isnp, Gblock, res, p_sd_yres, params); */\n      // v3: pre-load Yres/Y0res\n      MTestsResults mt_results_i = mt.run_tests_snp_precomp(isnp, Gblock, params);\n      /* string tmp_str = mt.print_sumstats(isnp, snp_index, test_string + params.condtl_suff, model_type + params.condtl_suff, block_info, snpinfo, &params); */\n      string tmp_str = mt.print_sumstats(mt_results_i, isnp, snp_index, test_string + params.condtl_suff, model_type + params.condtl_suff, block_info, snpinfo, &params);\n\n      block_info->sum_stats_mt[0].append(tmp_str);\n    } catch (...) {\n      err_caught(isnp) = true;\n      block_info->sum_stats[0] = boost::current_exception_diagnostic_information();\n      continue;\n    }\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(params.threads);\n#endif\n\n  // check no errors\n  if(err_caught.any())\n    for(int i = 0; i < err_caught.size(); i++)\n      if(err_caught(i)) throw all_snps_info[i].sum_stats[0];\n\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Testing mode (MultiPhen test)\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::test_multiphen() \n{\n  sout << \"Association testing mode (MultiPhen test)\";\n\n  std::chrono::high_resolution_clock::time_point t1, t2;\n  string out;\n  vector<string> out_split, tmp_str;\n  // output files\n  Files ofile;\n  // use pointer to class since it contains non-copyable elements\n  vector <std::shared_ptr<Files>> ofile_split;\n\n#if defined(_OPENMP)\n  sout << \" with \" << (params.streamBGEN? \"fast \" : \"\") << \"multithreading using OpenMP\";\n#endif\n  sout << endl;\n\n  // Set up \n  file_read_initialization(); // set up files for reading\n  read_pheno_and_cov(&files, &params, &in_filters, &pheno_data, &m_ests, &Gblock, sout);   // read phenotype and covariate files\n  prep_run(&files, &in_filters, &params, &pheno_data, &m_ests, sout); // check blup files and adjust for covariates\n  set_blocks_for_testing();   // set number of blocks\n  print_usage_info(&params, &files, sout);\n  print_test_info();\n  setup_output(&ofile, out, ofile_split, out_split); // result files\n  sout << endl;\n\n  // Set up mt\n  prep_multiphen();\n\n  // Loop 1: start analyzing each chromosome\n  bool block_init_pass = false;\n  int block = 0, chrom_nsnps, chrom_nb, bs;\n  tally snp_tally;\n  vector< variant_block > block_info;\n  /* initialize_thread_data(Gblock.thread_data, params); */\n\n  for(auto const& chrom : files.chr_read) {\n    if( !in_map(chrom, chr_map) ) continue;\n\n    chrom_nsnps = chr_map[chrom][0];\n    chrom_nb = chr_map[chrom][1];\n    if(chrom_nb == 0) continue;\n\n    // If specified starting block\n    if(!block_init_pass && (params.start_block > (block + chrom_nb)) ) {\n      snp_tally.snp_count += chrom_nsnps;\n      block += chrom_nb;\n      continue;\n    }\n\n    sout << \"Chromosome \" << chrom << \" [\" << chrom_nb << \" blocks in total]\\n\";\n\n    // read polygenic effect predictions from step 1\n    blup_read_chr(false, chrom, m_ests, files, in_filters, pheno_data, params, sout);\n\n    // compute phenotype residual (adjusting for BLUP)\n    if(params.trait_mode == 0) {\n      compute_res();\n      set_multiphen();\n    } else {\n      throw std::runtime_error(\"MultiPhen test for QTs only\");\n    }\n\n    // analyze by blocks of SNPs\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n      get_block_size(params.block_size, chrom_nsnps, bb, bs);\n\n      // If specified starting block\n      if(!block_init_pass && (params.start_block > (block+1)) ) {\n        snp_tally.snp_count += bs;\n        block++;\n        continue;\n      } else {\n        if(!block_init_pass) block_init_pass = true;\n      }\n\n      sout << \" block [\" << block + 1 << \"/\" << params.total_n_block << \"] : \" << flush;\n\n      allocate_mat(Gblock.Gmat, params.n_samples, bs);\n      block_info.resize(bs);\n\n      // read SNP, impute missing & compute association test statistic\n      analyze_block_multiphen(chrom, bs, &snp_tally, block_info);\n\n      // print the results\n      if(params.split_by_pheno) {\n        // ignore split_by_pheno for MultiPhen\n        // throw std::runtime_error(\"test_multiphen: split_by_pheno\");\n      }\n\n      for (auto const& snp_data : block_info){\n        if( snp_data.ignored ) {\n          snp_tally.n_ignored_snps++;\n          continue;\n        }\n        snp_tally.n_ignored_tests += snp_data.ignored_trait.count();\n\n        /* size_t n_trait_sets = 1; */\n        size_t j = 0;\n        ofile << snp_data.sum_stats_multiphen[j]; // add test info\n      }\n\n      snp_tally.snp_count += bs;\n      block++;\n    }\n\n  }\n\n  sout << print_summary(&ofile, out, ofile_split, out_split, n_corrected, snp_tally, files, firth_est, params);\n}\n\n// test SNPs in block for multi-trait tests\nvoid Data::analyze_block_multiphen(int const& chrom, int const& n_snps, tally* snp_tally, vector<variant_block> &all_snps_info){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  const int start = snp_tally->snp_count;\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n\n  vector<uint64> indices(n_snps);\n  std::iota(indices.begin(), indices.end(), start);\n\n  readChunk(indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n\n  // analyze using openmp\n  compute_tests_mt_multiphen(chrom, indices, snp_data_blocks, insize, outsize, all_snps_info);\n\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n}\n\nvoid Data::compute_tests_mt_multiphen(int const& chrom, vector<uint64> indices,vector< vector < uchar > >& snp_data_blocks, vector< uint32_t > insize, vector< uint32_t >& outsize, vector<variant_block> &all_snps_info){\n  size_t const bs = indices.size();\n  ArrayXb err_caught = ArrayXb::Constant(bs, false);\n\n  // start openmp for loop\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(size_t isnp = 0; isnp < bs; isnp++) {\n    uint32_t const snp_index = indices[isnp];\n\n    int thread_num = 0;\n#if defined(_OPENMP)\n    thread_num = omp_get_thread_num();\n#endif\n\n    // to store variant information\n    variant_block* block_info = &(all_snps_info[isnp]);\n    reset_thread(&(Gblock.thread_data[thread_num]), params);\n\n    parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n    // for QTs: project out covariates & scale\n    /* residualize_geno(isnp, thread_num, block_info, false, pheno_data.new_cov, &Gblock, &params); */\n\n    // skip SNP if fails filters\n    if( block_info->ignored ) continue;\n    \n    reset_stats(block_info, params);\n\n    try {\n      // load one SNP into Gmat\n      MapMatXd Gmat(Gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n      // create a copy of mphen for every SNP (for parallel processing)\n      MultiPhen mphen_i = mphen;\n\n      // run MultiPhen test & save summary stats\n      /* cout << snpinfo[snp_index].ID << endl; */\n      mphen_i.run(Gmat, pheno_data.cov_phenotypes, pheno_data.new_cov.cols() - 1, params.n_pheno); // the last 2 arg.: #cov excluding intercept; #phenotypes\n\n      /* string tmp_str = mphen_i.print_sumstats(isnp, snp_index, test_string + params.condtl_suff, model_type + params.condtl_suff, block_info, snpinfo, &params); */\n      /* print_sum_stats_line(int const& snp_index, int const& i, string const& tmpstr, string const& test_string, string const& model_type, variant_block* block_info, data_thread* dt_thr, vector<snp> const& snpinfo, struct in_files const& files, struct param const& params){ */\n      std::ostringstream buffer;\n      if(params.htp_out) buffer << print_sum_stats_head_htp(snp_index, \"MultiPhen\", model_type + params.condtl_suff, snpinfo, &params) << mphen_i.print_sum_stats_htp(block_info, &params);\n      else buffer << mphen_i.print_sumstats(isnp, snp_index, test_string + params.condtl_suff, model_type + params.condtl_suff, block_info, snpinfo, &params); \n      /* else buffer << print_sum_stats( */\n      /*     (params.split_by_pheno ? block_info->af(i) : block_info->af1), */ \n      /*     block_info->af_case(i),block_info->af_control(i), */ \n      /*     (params.split_by_pheno ? block_info->info(i) : block_info->info1), */\n      /*     (params.split_by_pheno ? block_info->ns(i) : block_info->ns1), */ \n      /*     block_info->ns_case(i), */ \n      /*     block_info->ns_control(i), */ \n      /*     test_string, */ \n      /*     dt_thr->bhat(i), dt_thr->se_b(i), dt_thr->chisq_val(i), dt_thr->pval_log(i), !block_info->test_fail(i), 1, &params, (i+1)); */ \n\n    /* else */  \n    /*   buffer << (!params.split_by_pheno && (i>0) ? \"\" : tmpstr) << print_sum_stats((params.split_by_pheno ? block_info->af(i) : block_info->af1), block_info->af_case(i),block_info->af_control(i), (params.split_by_pheno ? block_info->info(i) : block_info->info1), (params.split_by_pheno ? block_info->ns(i) : block_info->ns1), block_info->ns_case(i), block_info->ns_control(i), test_string, dt_thr->bhat(i), dt_thr->se_b(i), dt_thr->chisq_val(i), dt_thr->pval_log(i), !block_info->test_fail(i), 1, &params, (i+1)); */\n    /* return buffer.str(); */\n    /* if(params.htp_out) */ \n    /*   buffer <<  print_sum_stats_head_htp(snp_index, files.pheno_names[i], model_type, snpinfo, &params) << \n     *   print_sum_stats_htp(dt_thr->bhat(i), dt_thr->se_b(i), dt_thr->chisq_val(i), dt_thr->pval_log(i), \n     *   block_info->af(i), block_info->info(i), block_info->mac(i), block_info->genocounts, \n     *   i, !block_info->test_fail(i), 1, &params, dt_thr->scores(i), dt_thr->cal_factor(i)); */\n    /* else */  \n    /*   buffer << (!params.split_by_pheno && (i>0) ? \"\" : tmpstr) << print_sum_stats((params.split_by_pheno ? block_info->af(i) : block_info->af1), block_info->af_case(i),block_info->af_control(i), (params.split_by_pheno ? block_info->info(i) : block_info->info1), (params.split_by_pheno ? block_info->ns(i) : block_info->ns1), block_info->ns_case(i), block_info->ns_control(i), test_string, dt_thr->bhat(i), dt_thr->se_b(i), dt_thr->chisq_val(i), dt_thr->pval_log(i), !block_info->test_fail(i), 1, &params, (i+1)); */\n      std::string tmp_str = buffer.str();\n\n      // 1 set of traits\n      block_info->sum_stats_multiphen[0].append(tmp_str);\n    } catch (...) {\n      err_caught(isnp) = true;\n      block_info->sum_stats[0] = boost::current_exception_diagnostic_information();\n      continue;\n    }\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(params.threads);\n#endif\n\n  // check no errors\n  if(err_caught.any())\n    for(int i = 0; i < err_caught.size(); i++)\n      if(err_caught(i)) throw all_snps_info[i].sum_stats[0];\n\n}\n\nvoid Data::prep_multiphen()\n{\n  // user parameters \n  mphen.test = params.multiphen_test;\n  mphen.optim = params.multiphen_optim;\n  mphen.pval_thr = params.multiphen_thr;\n  mphen.firth_mult = params.multiphen_firth_mult;\n  mphen.firth_binom = (params.multiphen_firth_mult > 0);\n  mphen.firth_multinom = (params.multiphen_firth_mult > 0);\n  mphen.tol = params.multiphen_tol;\n  mphen.trace = params.multiphen_trace;\n  mphen.verbose = params.multiphen_verbose;\n  // parameters for model fitting\n  mphen.maxit = params.multiphen_maxit; mphen.maxit2 = params.multiphen_maxit2;\n  mphen.strict = params.multiphen_strict;\n  mphen.check_step = (params.multiphen_maxstep > 0);\n  mphen.max_step = params.multiphen_maxstep; \n  mphen.reuse_start = true;\n  mphen.mac_approx_offset = params.multiphen_approx_offset;\n  mphen.pseudo_stophalf = params.multiphen_pseudo_stophalf;\n  mphen.reset_start = params.multiphen_reset_start;\n  mphen.offset_mode = params.multiphen_offset;\n\n  // prepare new matrix of covariates X + matrix of phenotypes Y\n  unsigned int n_samples = pheno_data.new_cov.rows(), n_cov1 = pheno_data.new_cov.cols();\n  unsigned int n_cov = n_cov1 - 1;\n  unsigned int n_phen = params.n_pheno;\n\n  pheno_data.cov_phenotypes.resize(n_samples, n_cov + 2*n_phen + 2); // +2 intercepts\n  // column # 1 = Intercept\n  pheno_data.cov_phenotypes.col(0) = ArrayXd::Constant(n_samples, 1.0);\n  // next n_cov columns = covariates **without** intercept\n  // new_cov has intercept in the last column\n  if(n_cov) {\n    pheno_data.cov_phenotypes.leftCols(n_cov1).rightCols(n_cov) = pheno_data.new_cov.leftCols(n_cov);\n  }\n  // next n_phen columns = phenotypes (skipped here & to be filled in for each chr.)\n  // next & the last column = Intercept\n  pheno_data.cov_phenotypes.rightCols(1) = ArrayXd::Constant(n_samples, 1.0);\n\n  // v2\n  if(!params.strict_mode) throw std::runtime_error(\"--strict mode is required for MultiPhen test\");\n\n  VectorXb Mask = pheno_data.masked_indivs.col(0);\n  for(unsigned int i = 1; i < pheno_data.masked_indivs.cols(); i++) {\n    Mask.col(0).array() = Mask.col(0).array() || pheno_data.masked_indivs.col(i).array();\n  }\n  pheno_data.cov_phenotypes.array().colwise() *= Mask.array().cast<double>().array();\n  mphen.setup_x(Mask, pheno_data.cov_phenotypes, n_cov, n_phen, true, false); // (ignored by MultiPhen) pos_intercept_first = true, pos_phen_first = false\n}\n\nvoid Data::set_multiphen()\n{\n  unsigned int n_samples = pheno_data.new_cov.rows(), n_cov1 = pheno_data.new_cov.cols();\n  unsigned int n_cov = n_cov1 - 1;\n  unsigned int n_phen = params.n_pheno;\n\n  if(pheno_data.cov_phenotypes.rows() != n_samples) throw std::runtime_error(\"#rows in cov_phenotypes\");\n  if(pheno_data.cov_phenotypes.cols() != n_cov + 2*n_phen + 2) throw std::runtime_error(\"#rows in cov_phenotypes\");\n\n  // v2\n  for(unsigned i = n_cov1, k = 0; k < n_phen; i++, k++) {\n    pheno_data.cov_phenotypes.col(i) = mphen.Mask.select(res.col(k), 0.0);\n  }\n  for(unsigned i = n_cov1 + n_phen + 1, k = 0; k < n_phen; i++, k++) {\n    pheno_data.cov_phenotypes.col(i) = mphen.Mask.select(res.col(k), 0.0);\n  }\n  /* pheno_data.cov_phenotypes.rightCols(n_phen + 1).leftCols(n_phen) = res; */\n  /* pheno_data.cov_phenotypes.array().colwise() *= mphen.Mask.array().cast<double>().array(); */\n\n  // v1\n  /* if(!params.strict_mode) throw std::runtime_error(\"--strict mode is required for MultiPhen test\"); */\n  /* mphen.setup_x(pheno_data.masked_indivs.col(0), pheno_data.cov_phenotypes, n_cov, n_phen, true, false); // (ignored by MultiPhen) pos_intercept_first = true, pos_phen_first = false */\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    for LD computation\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid Data::ld_comp() {\n\n  sout << \"LD computation\";\n\n  string out;\n  vector < string > out_split, tmp_str;\n  // output files\n  Files ofile;\n  // use pointer to class since it contains non-copyable elements\n  vector < std::shared_ptr<Files> > ofile_split;\n\n  // set some parameters\n  if( params.build_mask ) bm.prep_run(params, files);\n\n#if defined(_OPENMP)\n  sout << \" with \" << (params.streamBGEN? \"fast \" : \"\") << \"multithreading using OpenMP\";\n#endif\n  sout << endl;\n\n  file_read_initialization(); // set up files for reading\n  read_pheno_and_cov(&files, &params, &in_filters, &pheno_data, &m_ests, &Gblock, sout);   // read phenotype and covariate files\n  prep_run(&files, &in_filters, &params, &pheno_data, &m_ests, sout); // check blup files and adjust for covariates\n  if(params.build_mask)\n    set_groups_for_testing();   // set groups of snps to test jointly\n  else\n    set_blocks_for_testing();   // set number of blocks\n  print_usage_info(&params, &files, sout);\n  print_test_info();\n  setup_output(&ofile, out, ofile_split, out_split); // result file\n  sout << endl;\n\n  // start analyzing each chromosome\n  initialize_thread_data(Gblock.thread_data, params);\n  params.ld_n = params.extract_vars_order.size();\n\n  if(params.dosage_mode) // with dosages, avoid using sparse matrix\n    compute_ld_dosages(&ofile);\n  else // hard-calls only so use sparse matrix\n    compute_ld_hardcalls(&ofile);\n\n  return;\n}\n\nvoid Data::get_G_indices(ArrayXi& indices_ld, map<string, int>& colnames_Gmat){\n\n  map<string, uint32_t >::iterator itr;\n  int i_absent = colnames_Gmat.size();\n  for (itr = params.extract_vars_order.begin(); itr != params.extract_vars_order.end(); ++itr) \n    if(in_map(itr->first, colnames_Gmat))\n      indices_ld(itr->second) = colnames_Gmat[ itr->first ];\n    else\n      indices_ld(itr->second) = i_absent++; // cols for absent sv/masks are the same (ie 0 vector)\n\n}\n\nvoid Data::write_snplist(ArrayXb& is_absent){\n\n  string const out = files.out_file + \".corr.snplist\";\n  map<string, uint32_t >::iterator itr;\n  Files ofile;\n  IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\\n\");\n\n  Eigen::Array<std::string,Eigen::Dynamic,1> ID_sorted (params.extract_vars_order.size());\n  for (itr = params.extract_vars_order.begin(); itr != params.extract_vars_order.end(); ++itr) \n      ID_sorted( itr->second ) = itr->first;\n  // write SNP list\n  ofile.openForWrite(out, sout);\n  ofile << ID_sorted.format(Fmt);\n  ofile.closeFile();\n\n  if(is_absent.any()){\n    sout << \" WARNING: there were variants\" << (params.build_mask ? \"/masks\" : \"\") << \" not found in the data; these were kept in the LD matrix.\\n\" <<\n      \"  + list is written to [\" << files.out_file << \".corr.forcedIn.snplist]\\n\";\n    ofile.openForWrite(files.out_file + \".corr.forcedIn.snplist\", sout);\n    ofile << ID_sorted(get_true_indices(is_absent)).format(Fmt);\n    ofile.closeFile();\n  }\n\n}\n\nvoid Data::compute_ld_dosages(Files* ofile){\n\n  ArrayXb ld_var_absent = ArrayXb::Constant(params.ld_n, true);\n  map<string, int> colnames_ld_mat;// to track id of cols in full_mat\n  ArrayXi indices_ld(params.ld_n);\n\n  MatrixXd LD = MatrixXd::Zero(params.ld_n, params.ld_n);\n\n  // LD matrix will have first SVs then the burden masks\n  for(size_t isnp = 0; isnp < params.ld_sv_offsets.size(); isnp++) {\n    uint32_t snp_index = params.ld_sv_offsets[isnp];\n    colnames_ld_mat[ snpinfo[ snp_index ].ID ] = colnames_ld_mat.size();\n    ld_var_absent(params.extract_vars_order[snpinfo[ snp_index ].ID]) = false;\n  }\n\n  // build and read in masks (use sparse matrix)\n  SpMat Gmask;\n  MatrixXd Gmask_X;\n  if(params.build_mask) {\n    get_G_masks(Gmask, ld_var_absent, colnames_ld_mat);\n    if(Gmask.cols() > 0){\n      // project covariates\n      Gmask_X = Gmask.transpose() * pheno_data.new_cov; // MxK\n    }\n  }\n\n  // to set columns of LD mat in right order \n  get_G_indices(indices_ld, colnames_ld_mat);\n\n  // compute LD blocks for SVs with burden masks\n  int nblocks_sv = ceil( params.ld_sv_offsets.size() * 1.0 / params.block_size );\n  if(params.debug) cout << print_mem() << \"\\n\";\n  sout << \"** Computing LD matrix \" << (params.skip_scaleG ? \"(=GtG) \" : \"\") << \"**\\n\";\n  if(nblocks_sv > 0) sout << \"  -> splitting across \" << nblocks_sv << \" SV blocks\\n\";\n  MeasureTime mt_chunk;\n\n  for(int snp_row = 0; snp_row < nblocks_sv; snp_row++){\n    int row_start = params.block_size * snp_row;\n    int row_nsnps = (snp_row == (nblocks_sv - 1)) ? (params.ld_sv_offsets.size() - snp_row *  params.block_size) : params.block_size;\n    MatrixXd Grow(params.n_samples, row_nsnps);\n\n    sout << \"     - row \" << snp_row + 1 << \"\\n\";\n    mt_chunk.start_ms();\n\n    // read in G\n    get_G_svs(snp_row, row_nsnps);\n    Grow = Gblock.Gmat;\n    // project covariates\n    MatrixXd GtX_row = Grow.transpose() * pheno_data.new_cov; // MxK\n    // compute diagonal\n    LD.block(row_start, row_start, row_nsnps, row_nsnps).noalias() = -GtX_row * GtX_row.transpose();\n    LD.block(row_start, row_start, row_nsnps, row_nsnps) += Grow.transpose() * Grow;\n    sout << \"       -> LD diagonal block computation...\" << (params.debug ? print_mem() : \"\") << \"...\" << mt_chunk.stop_ms() << \"\\n\";\n    if((nblocks_sv > 1) && (snp_row < (nblocks_sv - 1))) {\n      sout << \"       -> computing LD with other variants (\" << nblocks_sv - snp_row - 1 << \" blocks)... \" << flush;\n      mt_chunk.start_ms();\n    }\n\n    for(int snp_col = (snp_row + 1); snp_col < nblocks_sv; snp_col++){\n      int col_start = params.block_size * snp_col;\n      int col_nsnps = (snp_col == (nblocks_sv - 1)) ? (params.ld_sv_offsets.size() - snp_col *  params.block_size) : params.block_size;\n      if(params.debug) sout << snp_col - snp_row << \"...\" << flush;\n\n      get_G_svs(snp_col, col_nsnps);\n      // project covariates\n      MatrixXd GtX_col = Gblock.Gmat.transpose() * pheno_data.new_cov; // MxK\n\n      // compute ld block\n      LD.block(row_start, col_start, row_nsnps, col_nsnps).noalias() = -GtX_row * GtX_col.transpose();\n      LD.block(row_start, col_start, row_nsnps, col_nsnps) += Grow.transpose() * Gblock.Gmat;\n    }\n    if((nblocks_sv > 1) && (snp_row < (nblocks_sv - 1))) sout << mt_chunk.stop_ms() << \"\\n\";\n\n    // compute LD block for burden mask\n    if(Gmask.cols() > 0){\n      mt_chunk.start_ms();\n      LD.block(row_start, params.ld_sv_offsets.size(), row_nsnps, Gmask.cols()).noalias() = -GtX_row * Gmask_X.transpose();\n      LD.block(row_start, params.ld_sv_offsets.size(), row_nsnps, Gmask.cols()) += Grow.transpose() * Gmask;\n      sout << \"       -> computing LD with burden masks...\" << mt_chunk.stop_ms() << \"\\n\";\n    }\n  }\n\n  // compute LD diagonal block for burden mask\n  if(Gmask.cols() > 0){\n    mt_chunk.start_ms();\n    LD.block(params.ld_sv_offsets.size(), params.ld_sv_offsets.size(), Gmask.cols(), Gmask.cols()).noalias() = -Gmask_X * Gmask_X.transpose();\n    LD.block(params.ld_sv_offsets.size(), params.ld_sv_offsets.size(), Gmask.cols(), Gmask.cols()) += Gmask.transpose() * Gmask;\n    sout << \"     - computing LD between burden masks...\" << mt_chunk.stop_ms() << \"\\n\";\n  }\n\n  // write out LD matrix\n  print_ld(LD, indices_ld, ld_var_absent, ofile);\n\n}\n\n\nvoid Data::get_G_masks(SpMat& Gmat, ArrayXb& is_absent, map<string, int>& colnames_Gmat){\n\n  MeasureTime mt;\n  int block = 0, chrom_nb, bs;\n  vector< variant_block > block_info;\n  MatrixXd burden_mat;\n\n  sout << \"** Building burden masks **\\n\";\n  mt.start_ms();\n\n  // start analyzing each chromosome\n  for (auto const& chrom : files.chr_read){\n\n    if( !in_map(chrom, chr_map) ) continue;\n    chrom_nb = chr_map[chrom][1];\n    // if no sets in chromosome, skip\n    if(chrom_nb == 0)  continue;\n\n    // go through each set\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n\n      vector< vector < uchar > > snp_data_blocks;\n      vector< uint32_t > insize, outsize;\n      vector<int> indices_mask_keep;\n\n      vset* set_info = &(jt.setinfo[chrom - 1][bb]);\n      bs = set_info->snp_indices.size();\n\n      sout << \" set [\" << block + 1 << \"/\" << params.total_n_block << \"] : \" << set_info->ID << \" - \" << bs << \" variants...\" << flush;\n      if(bs == 0){\n        sout << \"skipped\\n\";\n        block++;\n        continue;\n      }\n\n      // build the masks\n      block_info.resize(bs);\n      getMask(chrom, bb, snp_data_blocks, insize, outsize, block_info);\n\n      // store only the ones used in ld matrix\n      for(size_t mask = 0; mask < set_info->snp_indices.size(); mask++)\n        if(in_map(snpinfo[ set_info->snp_indices[mask] ].ID, params.extract_vars_order)){\n          is_absent(params.extract_vars_order[ snpinfo[ set_info->snp_indices[mask] ].ID ]) = false;\n          colnames_Gmat[ snpinfo[ set_info->snp_indices[mask] ].ID ] = colnames_Gmat.size(); // burden are after SV\n          indices_mask_keep.push_back(mask);\n        }\n\n      // store in Gmat\n      if(indices_mask_keep.size() > 0){\n        burden_mat.conservativeResize(Gblock.Gmat.rows(), burden_mat.cols() + indices_mask_keep.size());\n        burden_mat.rightCols(indices_mask_keep.size()) = Gblock.Gmat(Eigen::placeholders::all, indices_mask_keep);\n      }\n\n      block++;\n    }\n\n  }\n\n  if(burden_mat.cols() > 0) Gmat = burden_mat.sparseView();\n\n  if(params.debug) cout << print_mem() << \"...\";\n  sout << \" -> \" << mt.stop_ms() << \"\\n\\n\";\n\n}\n\n\nvoid Data::get_G_svs(int const& sv_block, int const& bsize){\n\n  int chrom; \n  if( bsize == 0) return;\n  int nvar_read = params.block_size * sv_block;\n\n  vector<variant_block> all_snps_info;\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n\n  allocate_mat(Gblock.Gmat, params.n_samples, bsize);\n  all_snps_info.resize(bsize);\n  chrom = snpinfo[ params.ld_sv_offsets[0] ].chrom;\n\n  vector<uint64> indices (params.ld_sv_offsets.begin() + nvar_read, params.ld_sv_offsets.begin() + nvar_read + bsize);\n  readChunk(indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int isnp = 0; isnp < bsize; isnp++) {\n\n    uint32_t snp_index = indices[isnp];\n    variant_block* block_info = &(all_snps_info[isnp]);\n\n    // build genotype matrix\n    if( ((params.file_type == \"bgen\") && params.streamBGEN) || params.file_type == \"bed\") \n      parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n    // impute missing if present\n    if(block_info->ns1 < params.n_analyzed){\n      MapArXd Geno (Gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n      mean_impute_g(block_info->af1*2, Geno, in_filters.ind_in_analysis);\n    }\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(params.threads);\n#endif\n\n}\n\nvoid Data::print_ld(MatrixXd& LDmat, ArrayXi& indices_ld, ArrayXb& is_absent, Files* ofile){\n\n  MeasureTime mt;\n  int bits = 16; // break [0,1] into 2^bits intervals\n  double mult = (1ULL << bits) - 1; // map to 0,...,2^bits-1\n\n  // write list of snps to file (corresponding to columns in LD matrix)\n  write_snplist(is_absent);\n\n  // only upper tri is loaded\n  if(params.debug) cout << \"     - raw covariance matrix[1:5,1:5]:\\n\" << LDmat.block(0,0,min(params.ld_n,5),min(params.ld_n,5)) << \"\\n\" << print_mem() << \"\\n\";\n\n  // check if any of the diagonal entries are negative (but numerically zero -- due to rounding error)\n  ArrayXb sd_G_zero = (LDmat.diagonal().array() < 0) && (LDmat.diagonal().array().abs() < params.tol) ;\n  if(sd_G_zero.any()) {// set entries in LD matrix to 0\n    ArrayXi ind_0 = get_true_indices(sd_G_zero);\n    LDmat(ind_0,all).array() = 0; LDmat(all,ind_0).array() = 0;\n  }\n\n  if(!params.skip_scaleG) { // get cormat\n    ArrayXd sds = (LDmat.diagonal().array() <= 0).select(sqrt(params.numtol), LDmat.diagonal().array().sqrt()); // bug fix for negative but numerically zero diagonal entries\n    LDmat.diagonal().array() = sds.square();\n    if(params.debug) cout << \"     - thresholded covariance matrix[1:5,1:5]:\\n\" << LDmat.block(0,0,min(params.ld_n,5),min(params.ld_n,5)) << \"\\n\" << print_mem() << \"\\n\";\n    LDmat = (1/sds).matrix().asDiagonal() * LDmat * (1/sds).matrix().asDiagonal();\n    if(params.debug) cout << \"     - correlation matrix[1:5,1:5]:\\n\" << LDmat.block(0,0,min(params.ld_n,5),min(params.ld_n,5)) << \"\\n\" << print_mem() << \"\\n\";\n  } else \n    LDmat.diagonal().array() = LDmat.diagonal().array().max(params.numtol);\n\n  // print corr\n  sout << \"     - writing to file...\" << flush;\n  mt.start_ms();\n\n  if(params.ld_sparse_thr > 0){ // apply sparse threshold to LD matrix for off diagonal entries\n\n    double out_val;\n    // first diagonal entries (single line)\n    ArrayXd sds = LDmat.diagonal().array().sqrt();\n    IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\\n\");\n    (*ofile) << sds(indices_ld).matrix().transpose().format(Fmt);\n    // off diagonal entries above thr based on corr (fmt = row/col/value [1-based])\n    for(int i = 0; i < LDmat.rows(); i++)\n      for(int j = i+1; j < LDmat.cols(); j++){\n        if( indices_ld(i) < indices_ld(j) )\n          out_val = LDmat(indices_ld(i),indices_ld(j)) / sds(indices_ld(i)) / sds(indices_ld(j));\n        else\n          out_val = LDmat(indices_ld(j),indices_ld(i)) / sds(indices_ld(i)) / sds(indices_ld(j));\n        if(fabs(out_val) >= params.ld_sparse_thr)\n          (*ofile) << i+1 << \" \" << j+1 << \" \" << out_val << \"\\n\";\n      }\n    ofile->closeFile();\n\n  } else if(params.cor_out_txt){// write out to text file (in batches of rows)\n\n    int batch_size = 1e3;\n    int nbtaches_row = ceil(LDmat.rows() * 1.0 / batch_size), nrow_start = 0, nrows_batch = batch_size;\n\n    for(int batch = 0; batch < nbtaches_row; batch++){ \n     if(batch == (nbtaches_row - 1)) nrows_batch = LDmat.rows() - nrow_start;\n     vector< ostringstream > buffers (nrows_batch);\n\n#if defined(_OPENMP)\n     setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n     for(int i = 0; i < nrows_batch; i++){ // store row in parallel\n       for(int j = 0; j < LDmat.cols(); j++){\n         if( indices_ld(nrow_start + i) < indices_ld(j) )\n           buffers[i] << LDmat(indices_ld(nrow_start + i),indices_ld(j));\n         else\n           buffers[i] << LDmat(indices_ld(j),indices_ld(nrow_start + i));\n         if(j < (LDmat.cols() - 1)) buffers[i] << \" \";\n       }\n       if( (batch < (nbtaches_row - 1)) || ( i < (nrows_batch - 1)) ) buffers[i] << '\\n';\n     }\n#if defined(_OPENMP)\n    setNbThreads(params.threads);\n#endif\n\n    // concatenante all rows\n    string combined_buffer;\n    for(int i = 0; i < nrows_batch; i++)\n      combined_buffer += buffers[i].str();\n    (*ofile) << combined_buffer; // write them to file\n    nrow_start += nrows_batch;\n   }\n\n    ofile->closeFile();\n\n  } else {\n\n    ArrayXt vals;\n    vals.resize( (LDmat.rows() * (LDmat.rows() - 1)) / 2 ); // m choose 2\n\n    for(int i = 0, k = 0; i < LDmat.rows(); i++)\n      for(int j = i+1; j < LDmat.cols(); j++)\n        if( indices_ld(i) < indices_ld(j) )\n          vals(k++) = LDmat(indices_ld(i),indices_ld(j)) * LDmat(indices_ld(i),indices_ld(j)) * mult + 0.5; // round to nearest integer\n        else\n          vals(k++) = LDmat(indices_ld(j),indices_ld(i)) * LDmat(indices_ld(j),indices_ld(i)) * mult + 0.5; // round to nearest integer\n\n    //cerr << \"\\norig:\\n\" << LDmat.block(0,0,5,5).array().square().matrix() << \"\\nbin:\\n\" << \n     // vals.head(5) << \"\\n-->\" << vals.size() << endl;\n\n    ofile->writeBinMode(vals, sout);\n    ofile->closeFile();\n  }\n\n  sout << \" -> \" << mt.stop_ms() << \"\\n\";\n\n  exit_early();\n\n}\n\n\nvoid Data::compute_ld_hardcalls(Files* ofile){\n\n  ArrayXb ld_var_absent = ArrayXb::Constant(params.ld_n, true);\n  map<string, int> colnames_ld_mat;// to track id of cols in full_mat\n  ArrayXi indices_ld(params.ld_n);\n\tSpMat Gmat(params.n_samples, params.ld_n);\n\n\t// read in SVs\n\tget_G_svs(Gmat, ld_var_absent, colnames_ld_mat);\n\n\t// read in masks\n\tif(params.build_mask) get_G_masks_hc(Gmat, ld_var_absent, colnames_ld_mat);\n\n\t// to set columns of LD mat in right order \n\tget_G_indices(indices_ld, colnames_ld_mat);\n\n\t// compute LD matrix\n\tprint_ld(Gmat, indices_ld, ld_var_absent, ofile);\n\n}\n\nvoid Data::get_G_svs(SpMat& Gmat, ArrayXb& is_absent, map<string, int>& colnames_Gmat){\n\n  int n_snps = params.ld_sv_offsets.size();\n  if( n_snps == 0) return;\n\n  bool last_chunk = false;\n  int nchunks, bsize, chrom, nvar_read = 0; \n  vector<variant_block> all_snps_info;\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n\n  // read in variants in chunks storing as sparse matrix\n  nchunks = ceil( n_snps * 1.0 / params.block_size );\n  sout << \"** reading in single variant genotypes **\\n  + \" << n_snps << \" variants in total split across \" << nchunks << \" blocks\\n\";\n  MeasureTime mt, mt_chunk;\n  mt.start_ms();\n  if(params.debug) cerr << print_mem() << \"...\";\n\n  // do it in chunks to reduce memory usage when reading as dense\n  bsize = params.block_size; // default number of SNPs to read at a time\n  allocate_mat(Gblock.Gmat, params.n_samples, bsize);\n  all_snps_info.resize(bsize);\n  chrom = snpinfo[ params.ld_sv_offsets[0] ].chrom;\n  for(int i = 0; i < nchunks; i++){\n\n    sout << \"  block [\" << i + 1 << \"/\" << nchunks << \"] : reading in genotypes...\" << flush;\n    mt_chunk.start_ms();\n\n    last_chunk = ( i == (nchunks-1) );\n    if( last_chunk ) {\n      bsize = n_snps - i * bsize;// use remainder number of variants\n      allocate_mat(Gblock.Gmat, params.n_samples, bsize);\n    }\n\n    vector<uint64> indices (params.ld_sv_offsets.begin() + nvar_read, params.ld_sv_offsets.begin() + nvar_read + bsize);\n    readChunk(indices, chrom, snp_data_blocks, insize, outsize, all_snps_info);\n\n#if defined(_OPENMP)\n    setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n    for(int isnp = 0; isnp < bsize; isnp++) {\n\n      uint32_t snp_index = indices[isnp];\n      variant_block* block_info = &(all_snps_info[isnp]);\n\n      // build genotype matrix\n      if( ((params.file_type == \"bgen\") && params.streamBGEN) || params.file_type == \"bed\") \n        parseSNP(isnp, chrom, &(snp_data_blocks[isnp]), insize[isnp], outsize[isnp], &params, &in_filters, pheno_data.masked_indivs, pheno_data.phenotypes_raw, &snpinfo[snp_index], &Gblock, block_info, sout);\n\n      // impute missing if present\n      MapArXd Geno (Gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n      mean_impute_g(block_info->af1*2, Geno, in_filters.ind_in_analysis);\n\n      // check if in LD matrix \n      is_absent(params.extract_vars_order[snpinfo[ snp_index ].ID]) = false;\n    }\n#if defined(_OPENMP)\n    setNbThreads(params.threads);\n#endif\n    // convert to sparse\n    sout << mt_chunk.stop_ms() << \"...converting to sparse...\"; mt_chunk.start_ms();\n    Gmat.middleCols(nvar_read, bsize) = Gblock.Gmat.sparseView();\n    // store ID in Gmat (can't do it multithreaded)\n    for(int isnp = 0; isnp < bsize; isnp++) {\n      uint32_t snp_index = indices[isnp];\n      colnames_Gmat[ snpinfo[ snp_index ].ID ] = colnames_Gmat.size();\n    }\n\n    if(params.debug) cout << print_mem() << \"...\";\n    sout << mt_chunk.stop_ms() << \"\\n\";\n    nvar_read += bsize;\n  }\n\n  if(params.debug) cout << print_mem() << \"...\";\n  sout << \" -> \" << mt.stop_ms() << \"\\n\";\n\n}\n\nvoid Data::get_G_masks_hc(SpMat& Gmat, ArrayXb& is_absent, map<string, int>& colnames_Gmat){\n\n  MeasureTime mt;\n  int block = 0, chrom_nb, bs;\n  int nvar_read = colnames_Gmat.size();\n  vector< variant_block > block_info;\n\n  sout << \"\\n** Building burden masks **\\n\";\n  mt.start_ms();\n\n  // start analyzing each chromosome\n  for (auto const& chrom : files.chr_read){\n\n    if( !in_map(chrom, chr_map) ) continue;\n    chrom_nb = chr_map[chrom][1];\n    // if no sets in chromosome, skip\n    if(chrom_nb == 0)  continue;\n\n    // go through each set\n    for(int bb = 0; bb < chrom_nb ; bb++) {\n\n      vector< vector < uchar > > snp_data_blocks;\n      vector< uint32_t > insize, outsize;\n      vector<int> indices_mask_keep;\n\n      vset* set_info = &(jt.setinfo[chrom - 1][bb]);\n      bs = set_info->snp_indices.size();\n\n      sout << \" set [\" << block + 1 << \"/\" << params.total_n_block << \"] : \" << set_info->ID << \" - \" << bs << \" variants...\" << flush;\n      if(bs == 0){\n        sout << \"skipped\\n\";\n        block++;\n        continue;\n      }\n\n      // build the masks\n      block_info.resize(bs);\n      getMask(chrom, bb, snp_data_blocks, insize, outsize, block_info);\n\n      // store only the ones used in ld matrix\n      for(size_t mask = 0; mask < set_info->snp_indices.size(); mask++)\n        if(in_map(snpinfo[ set_info->snp_indices[mask] ].ID, params.extract_vars_order)){\n          is_absent(params.extract_vars_order[ snpinfo[ set_info->snp_indices[mask] ].ID ]) = false;\n          colnames_Gmat[ snpinfo[ set_info->snp_indices[mask] ].ID ] = colnames_Gmat.size();\n          indices_mask_keep.push_back(mask);\n        }\n\n      // store in Gmat\n      if(indices_mask_keep.size() > 0){\n        Gmat.middleCols(nvar_read, indices_mask_keep.size()) = Gblock.Gmat(Eigen::placeholders::all, indices_mask_keep).sparseView();\n        nvar_read += indices_mask_keep.size();\n      }\n      block++;\n    }\n\n  }\n\n  if(params.debug) cout << print_mem() << \"...\";\n  sout << \" -> \" << mt.stop_ms() << \"\\n\";\n\n}\n\nvoid Data::print_ld(SpMat& Gmat, ArrayXi& indices_ld, ArrayXb& is_absent, Files* ofile){\n\n  MeasureTime mt;\n  int bits = 16; // break [0,1] into 2^bits intervals\n  double mult = (1ULL << bits) - 1; // map to 0,...,2^bits-1\n\n  sout << \"\\n** computing LD matrix \" << (params.skip_scaleG ? \"(=GtG) \" : \"\") << \"**\\n\";\n  mt.start_ms();\n\n  // write list of snps to file (corresponding to columns in LD matrix)\n  write_snplist(is_absent);\n\n  // get LD matrix - first project covariates\n  MatrixXd GtX = Gmat.transpose() * pheno_data.new_cov; // MxK\n  MatrixXd LDmat = -GtX * GtX.transpose();\n  LDmat += Gmat.transpose() * Gmat;\n  if(params.debug) cout << \"     - raw covariance matrix[1:5,1:5]:\\n\" << LDmat.block(0,0,min(params.ld_n,5),min(params.ld_n,5)) << \"\\n\" << print_mem() << \"\\n\";\n\n  // check if any of the diagonal entries are negative (but numerically zero -- due to rounding error)\n  ArrayXb sd_G_zero = (LDmat.diagonal().array() < 0) && (LDmat.diagonal().array().abs() < params.tol) ;\n  if(sd_G_zero.any()) {// set entries in LD matrix to 0\n    ArrayXi ind_0 = get_true_indices(sd_G_zero);\n    LDmat(ind_0,all).array() = 0; LDmat(all,ind_0).array() = 0;\n  }\n\n  if(!params.skip_scaleG) { // get cormat\n    ArrayXd sds = (LDmat.diagonal().array() <= 0).select(sqrt(params.numtol), LDmat.diagonal().array().sqrt()); // bug fix for negative but numerically zero diagonal entries\n    LDmat.diagonal().array() = sds.square();\n  if(params.debug) cout << \"     - thresholded covariance matrix[1:5,1:5]:\\n\" << LDmat.block(0,0,min(params.ld_n,5),min(params.ld_n,5)) << \"\\n\" << print_mem() << \"\\n\";\n    LDmat = (1/sds).matrix().asDiagonal() * LDmat * (1/sds).matrix().asDiagonal();\n  if(params.debug) cout << \"     - correlation matrix[1:5,1:5]:\\n\" << LDmat.block(0,0,min(params.ld_n,5),min(params.ld_n,5)) << \"\\n\" << print_mem() << \"\\n\";\n  } else \n    LDmat.diagonal().array() = LDmat.diagonal().array().max(params.numtol);\n  sout << \" -> \" << mt.stop_ms() << \"\\n\";\n\n  // print corr\n  sout << \"\\n** writing to file **\\n\";\n  mt.start_ms();\n\n  if(params.ld_sparse_thr > 0){ // apply sparse threshold to LD matrix for off diagonal entries\n\n    double out_val;\n    // first diagonal entries (single line)\n    ArrayXd sds = LDmat.diagonal().array().sqrt();\n    IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\\n\");\n    (*ofile) << sds(indices_ld).matrix().transpose().format(Fmt);\n    // off diagonal entries above thr based on corr (fmt = row/col/value [1-based])\n    for(int i = 0; i < LDmat.rows(); i++)\n      for(int j = i+1; j < LDmat.cols(); j++){\n        out_val = LDmat(indices_ld(i),indices_ld(j)) / sds(indices_ld(i)) / sds(indices_ld(j));\n        if(fabs(out_val) >= params.ld_sparse_thr)\n          (*ofile) << i+1 << \" \" << j+1 << \" \" << out_val << \"\\n\";\n      }\n    ofile->closeFile();\n\n  } else if(params.cor_out_txt){\n\n    IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n    (*ofile) << LDmat(indices_ld, indices_ld).format(Fmt);\n    ofile->closeFile();\n\n  } else {\n\n    ArrayXt vals;\n    vals.resize( (LDmat.rows() * (LDmat.rows() - 1)) / 2 ); // m choose 2\n\n    for(int i = 0, k = 0; i < LDmat.rows(); i++)\n      for(int j = i+1; j < LDmat.cols(); j++)\n        vals(k++) = LDmat(indices_ld(i),indices_ld(j)) * LDmat(indices_ld(i),indices_ld(j)) * mult + 0.5; // round to nearest integer\n\n    //cerr << \"\\norig:\\n\" << LDmat.block(0,0,5,5).array().square().matrix() << \"\\nbin:\\n\" << \n     // vals.head(5) << \"\\n-->\" << vals.size() << endl;\n\n    ofile->writeBinMode(vals, sout);\n    ofile->closeFile();\n  }\n\n  sout << \" -> \" << mt.stop_ms() << \"\\n\";\n\n  exit_early();\n\n}\n\n"
  },
  {
    "path": "src/Data.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef DATA_H\n#define DATA_H\n\nclass Data {\n\n  public:\n    // class elements\n    mstream sout;\n    MeasureTime runtime;\n    param params;\n    in_files files;\n    filter in_filters;\n    std::vector<snp> snpinfo;\n    phenodt pheno_data;\n    geno_block Gblock;\n    std::map<int, std::vector<int>> chr_map; // first=chr; second=[# SNPs analyzed, #blocks, # SNPs in file]\n    ests m_ests;\n    ridgel1 l1_ests;\n    f_ests firth_est;\n    // HLM\n    HLM nullHLM; // for null model fitting of HLM\n    remeta_sumstat_writer remeta_sumstats;\n\n    std::string model_type, correction_type, test_string, wgr_string;\n\n    uint32_t n_corrected = 0; // to keep track of how many SNPs require correction\n    bool pval_converged = false; // keep track of whether SPA/Firth converged\n    bool fastSPA; // use fast approx. for rare SNPs\n\n    std::vector < MatrixXb > masked_in_folds;\n    std::vector<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> > predictions;\n\n    uint32_t total_chrs_loco;\n    Eigen::MatrixXd blup;\n    Eigen::VectorXd denum_tstat;\n    Eigen::MatrixXd res, stats, W_hat;\n    Eigen::RowVectorXd p_sd_yres;\n    Eigen::VectorXd scale_G; // keep track of sd(Y) (1xP) and sd(G) (M*1)\n    MultiPhen mphen;\n\n    // function definitions\n    void run();\n    void run_step1();\n    void run_step2();\n\n    void file_read_initialization();\n    void residualize_genotypes();\n    void scale_genotypes(bool);\n    void get_block_size(int const&,int const&,int const&,int&);\n\n    // step 1 \n    void set_parallel_l0();\n    void write_l0_master();\n    void prep_parallel_l0();\n    void prep_parallel_l1();\n    void set_blocks();\n    void set_folds();\n    void setmem();\n    void calc_cv_matrices(struct ridgel0*);\n    void level_0_calculations();\n    void prep_l1_models();\n    void write_inputs(); \n    void exit_early();\n    // output of step 1\n    void output();\n    void make_predictions(int const&,int const&);\n    void make_predictions_loocv(int const&,int const&);\n    void make_predictions_binary(int const&,int const&);\n    void make_predictions_binary_loocv_full(int const&,int const&);\n    void make_predictions_binary_loocv(int const&,int const&);\n    void make_predictions_count(int const&,int const&);\n    void make_predictions_count_loocv(int const&,int const&);\n    void make_predictions_cox(int const&, int const&);\n    void print_snp_betas(const Eigen::Ref<const Eigen::VectorXd>&);\n    void write_predictions(int const&);\n    std::string write_ID_header();\n    std::string write_chr_row(int const&,int const&,const Eigen::Ref<const Eigen::VectorXd>&);\n    void rm_l0_files(int const& ph);\n\n    // step 2 main functions\n    void test_snps();\n    void set_blocks_for_testing();\n    void print_test_info();\n    void set_nullreg_mat();\n    void compute_res();\n    void residualize_res();\n    void compute_res_bin(int const&);\n    void compute_res_count(int const&);\n    void compute_res_cox(int const&);\n    void setup_output(Files*,std::string&,std::vector<std::shared_ptr<Files>>&,std::vector<std::string>&);\n\n    // step 2 using multithreading in eigen\n    double check_pval(double const&,int const&,int const&,int const&);\n    double run_firth_correction(int const&,int const&,int const&);\n    void run_SPA_test(int const&);\n\n    // step2 using multithreading in openmp\n    void test_snps_fast();\n    void analyze_block(int const&,int const&,tally*,std::vector<variant_block>&);\n    void compute_tests_mt(int const&,std::vector<uint64>,std::vector<std::vector <uchar>>&,std::vector<uint32_t>,std::vector<uint32_t>&,std::vector<variant_block>&);\n    void compute_tests_st(int const&,std::vector<uint64>,std::vector<std::vector <uchar>>&,std::vector<uint32_t>,std::vector<uint32_t>&,std::vector<variant_block>&);\n\n    // step 2 with joint tests\n    JTests jt;\n    GenoMask bm;\n    void test_joint();\n    void set_groups_for_testing();\n    void get_sum_stats(int const&,int const&,std::vector<variant_block>&);\n    void readChunk(std::vector<uint64>&,int const&,std::vector<std::vector<uchar>>&,std::vector<uint32_t>&,std::vector<uint32_t>&,std::vector<variant_block>&);\n    void getMask(int const&,int const&,std::vector<std::vector<uchar>>&,std::vector<uint32_t>&,std::vector<uint32_t>&,std::vector<variant_block>&);\n    void getMask_loo(int const&,int const&,std::vector<std::vector<uchar>>&,std::vector<uint32_t>&,std::vector<uint32_t>&,std::vector<variant_block>&);\n\n    // step 2 with multi-trait tests\n    MTests mt;\n    void test_multitrait();\n    void analyze_block_multitrait(int const&,int const&,tally*,std::vector<variant_block>&);\n    void compute_tests_mt_multitrait(int const&,std::vector<uint64>,std::vector<std::vector <uchar>>&,std::vector<uint32_t>,std::vector<uint32_t>&,std::vector<variant_block>&);\n    void prep_multitrait(); \n\n    // step 2 with MultiPhen test\n    /* MTests mt; */\n    void test_multiphen();\n    void analyze_block_multiphen(int const&,int const&,tally*,std::vector<variant_block>&);\n    void compute_tests_mt_multiphen(int const&,std::vector<uint64>,std::vector<std::vector <uchar>>&,std::vector<uint32_t>,std::vector<uint32_t>&,std::vector<variant_block>&);\n    void prep_multiphen(); \n    void set_multiphen();\n\n    // for LD computation\n    void ld_comp();\n    void get_G_indices(Eigen::ArrayXi&,std::map<std::string,int>&);\n    void write_snplist(ArrayXb&);\n    // dosage-mode\n    void compute_ld_dosages(Files*);\n    void get_G_masks(SpMat&,ArrayXb&,std::map<std::string,int>&);\n    void get_G_svs(int const&,int const&);\n    void print_ld(MatrixXd&,Eigen::ArrayXi&,ArrayXb&,Files*);\n    // hard-call mode\n    void compute_ld_hardcalls(Files*);\n    void get_G_svs(SpMat&,ArrayXb&,std::map<std::string,int>&);\n    void get_G_masks_hc(SpMat&,ArrayXb&,std::map<std::string,int>&);\n    void print_ld(SpMat&,Eigen::ArrayXi&,ArrayXb&,Files*);\n    \n    Data();\n    ~Data();\n};\n\n// extra function\nstd::string get_fullpath(std::string);\n\n#endif\n"
  },
  {
    "path": "src/Files.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n\nnamespace fs = boost::filesystem;\n\nFiles::Files(){\n}\nFiles::~Files(){\n}\n\n// Open file (either regular or gzipped)\nvoid Files::openForRead(std::string const& filename, mstream& sout){\n\n  read_mode = true;\n  is_gz = isGzipped(filename, true);\n  //std::cerr << filename << \" - gzip = \" << std::boolalpha << is_gz << std::endl;\n\n  // only used if compiled with boost iostream\n# if not defined(HAS_BOOST_IOSTREAM)\n  if(is_gz) \n    throw \"cannot read gzip file if compilation is not done with the Boost Iostream library (i.e. 'make HAS_BOOST_IOSTREAM=1').\";\n#endif\n\n  std::ios_base::openmode mode = (is_gz ? std::ios_base::in | std::ios_base::binary : std::ios_base::in ); \n\n  openStream(&infile, filename, mode, sout);\n\n# if defined(HAS_BOOST_IOSTREAM)\n  if(is_gz){\n    ingzfile.push(boost::iostreams::gzip_decompressor());\n    ingzfile.push(infile);\n  }\n#endif\n\n}\n\nbool Files::readLine(std::string& line){\n\n# if defined(HAS_BOOST_IOSTREAM)\n  if(is_gz) \n    return static_cast<bool>( getline(ingzfile, line) );\n#endif\n\n  return  static_cast<bool>( getline(infile, line) );\n}\n\n\nvoid Files::ignoreLines(int const& nlines){\n\n  int linenumber=0;\n\n  if(nlines < 1) return;\n\n  while(linenumber++ < nlines){\n\n    if(is_gz) {\n# if defined(HAS_BOOST_IOSTREAM)\n      ingzfile.ignore(std::numeric_limits<std::streamsize>::max(), '\\n');\n#endif\n    } else \n      infile.ignore(std::numeric_limits<std::streamsize>::max(), '\\n');\n\n  }\n}\n\n\n// Open file for writing\nvoid Files::openForWrite(std::string const& filename, mstream& sout){\n\n  read_mode = false;\n  is_gz = isGzipped(filename, false);\n\n  // only used if compiled with boost iostream\n# if not defined(HAS_BOOST_IOSTREAM)\n  if(is_gz) \n    throw \"cannot write gzip file if compilation is not done with the Boost Iostream library (i.e. 'make HAS_BOOST_IOSTREAM=1').\";\n#endif\n\n  std::ios_base::openmode mode = (is_gz ? std::ios_base::out | std::ios_base::binary : std::ios_base::out ); \n\n  openStream(&outfile, filename, mode, sout);\n\n# if defined(HAS_BOOST_IOSTREAM)\n  if(is_gz){\n    outgzfile.push(boost::iostreams::gzip_compressor());\n    outgzfile.push(outfile);\n  }\n#endif\n\n}\n\nvoid Files::closeFile(){\n\n  if( read_mode ){\n\n# if defined(HAS_BOOST_IOSTREAM)\n    if(is_gz) \n      ingzfile.reset();\n#endif\n    infile.close();\n\n  } else {\n\n# if defined(HAS_BOOST_IOSTREAM)\n    if(is_gz) \n      outgzfile.reset();\n#endif\n    outfile.close();\n\n  }\n}\n\n// Check file extension\nbool Files::isGzipped(std::string const& filename, bool const& check_file) {\n\n  // require all gzipped file to end in .gz\n  if( fs::extension(filename) != \".gz\" )\n    return false;\n\n  // open file and check first 2 bytes (should equal 0x1f8b)\n  if(check_file){\n    infile.open(filename, std::ios_base::in | std::ios_base::binary);\n    if (infile.fail()) \n      throw \"cannot read file : \" + filename ;\n\n    uchar header[2];\n    infile.read( reinterpret_cast<char *> (&header[0]), 2);\n    infile.close();\n\n    if ( (header[0] != 0x1f) || (header[1] != 0x8b) ) \n      return false;\n  }\n\n  return true;\n}\n\nvoid Files::openMode(std::string const& filename, std::ios_base::openmode mode, mstream& sout){\n\n  if(mode & std::ios_base::out){\n    read_mode = false;\n    openStream(&outfile, filename, mode, sout);\n  } else {\n    read_mode = true;\n    openStream(&infile, filename, mode, sout);\n  }\n\n}\n\n// Split string by tokens\nstd::vector<std::string> string_split(std::string const& s, const char* delims) {\n\n  std::vector<std::string> out;\n\n  if(s.size() == 0) return out;\n\n  const char* p = s.c_str(); //beginning of string\n  const char* q = strpbrk(p+1, delims);//to first delimiter\n\n  for( ; q != NULL; q = strpbrk(p, delims)){\n    out.push_back( std::string(p,q) );// add to vector using range constructor\n    p = q + 1;\n  }\n\n  // check string after last delimiter\n  if(p && (p[0] != '\\0')) out.push_back( std::string(p) );\n\n  return(out);\n\n}\n\nbool startswith(const char* s, const char* prefix) {\n   if(strncmp(s, prefix, strlen(prefix)) == 0) return true;\n   return false;\n}\n\nint find_col(std::vector<std::string> const& str_vec, std::string const& name){\n\n  if(str_vec.size() < 1) return -1;\n\n  auto scol = std::find(str_vec.begin(), str_vec.end(), name); \n  if(scol == str_vec.end())\n    return -1;\n  else return std::distance(str_vec.begin(), scol);\n\n}\n\n"
  },
  {
    "path": "src/Files.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n#ifndef RFILES_H\n#define RFILES_H\n\n#include <boost/filesystem.hpp>\n\n# if defined(HAS_BOOST_IOSTREAM)\n#include <boost/iostreams/filtering_stream.hpp>\n#include <boost/iostreams/filter/gzip.hpp>\n#endif\n\nclass Files {\n\n  public:\n    // variables\n    bool is_gz = false;\n    bool read_mode = true;\n\n    // for reading\n    std::ifstream infile;\n    // for writing\n    std::ofstream outfile;\n\n# if defined(HAS_BOOST_IOSTREAM)\n    boost::iostreams::filtering_istream ingzfile;\n    boost::iostreams::filtering_ostream outgzfile;\n#endif\n\n\n    // functions\n    bool isGzipped(std::string  const&,bool const&);\n    void openForRead(std::string const&,mstream&);\n    bool readLine(std::string&);\n    void ignoreLines(int const&);\n    void openForWrite(std::string const&,mstream&);\n    void closeFile();\n    void openMode(std::string const&,std::ios_base::openmode,mstream&);\n    template<typename Derived>\n      void writeBinMode(Eigen::ArrayBase< Derived >& vals, mstream& sout){\n        outfile.write( reinterpret_cast<char *> (&vals(0)), vals.size() * sizeof(vals(0)) );\n        if (outfile.fail()) {    \n          sout << \"ERROR: Cannot write values to file.\\n\";\n          exit(EXIT_FAILURE);\n        }\n      }\n\n    // to write to file\n    template <class S>\n      Files& operator<< (const S& val)\n      {\n# if defined(HAS_BOOST_IOSTREAM)\n        if(is_gz) {\n          outgzfile << val;\n          return *this;\n        }\n#endif\n        outfile << val;\n        return *this;\n      }\n\n    // for std::endl\n    Files& operator<< (std::ostream& (*pfun)(std::ostream&))\n    {\n# if defined(HAS_BOOST_IOSTREAM)\n        if(is_gz) {\n          pfun(outgzfile);\n          return *this;\n        }\n#endif\n      pfun(outfile);\n      return *this;\n    };\n\n    Files();\n    ~Files();\n};\n\ntemplate <typename T>\nvoid openStream(T* ofs, std::string const& fname, std::ios_base::openmode mode, mstream& sout){\n\n  ofs->open(fname, mode);\n  if (ofs->fail()) {\n    std::string str_mode = mode & std::ios_base::out ? \"write\" : \"read\";\n    throw \"cannot \" + str_mode + \" file : \" + fname ;\n  }\n\n  return;\n}\nstd::vector<std::string> string_split(std::string const&,const char*);\nbool startswith(const char*,const char*);\nint find_col(std::vector<std::string> const&,std::string const&);\n\n#endif\n"
  },
  {
    "path": "src/Geno.cpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"db/sqlite3.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\n\n\n\nvoid prep_bgen(struct in_files* files, struct param* params, struct filter* filters, vector<snp>& snpinfo, map<int, vector<int>>& chr_map, BgenParser& bgen, mstream& sout){\n\n  bool interaction_snp_found = false;\n  uint32_t nOutofOrder = 0, lineread = 0;\n  std::string chromosome, rsid, msg;\n  uint32_t position ;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n  std::vector< string > tmp_ids ;\n  snp tmp_snp;\n  BgenParser bgen_tmp;\n\n  sout << left << std::setw(20) << \" * bgen\" << \": [\" << files->bgen_file << \"]\" << endl;\n  // open file and print file info\n  bgen_tmp.open( files->bgen_file ) ;\n  sout << bgen_tmp.summarise( ) << \n    // also add in the number of bits\n    (params->BGENbits > 0 ? \" with \" + to_string(params->BGENbits) + \"-bit encoding\" : \"\") << \".\\n\";\n\n  // get info for variants\n  if( params->with_bgi ) read_bgi_file(bgen_tmp, files, params, filters, snpinfo, sout);\n  else {\n    tmp_snp.offset = bgen_tmp.get_position();\n    while(bgen_tmp.read_variant( &chromosome, &position, &rsid, &alleles )) {\n\n      assert(alleles.size() == 2) ; // only bi-allelic allowed\n      // check phasing for first variant\n      if(lineread == 0){\n        bgen_tmp.read_probs( &probs ) ;\n\n        if( probs[0].size() != 3 ) // unphased only \n          throw \"only unphased bgen are supported.\";\n\n      } else bgen_tmp.ignore_probs();\n\n      tmp_snp.chrom = chrStrToInt(chromosome, params->nChrom);\n      if (tmp_snp.chrom == -1) \n        throw \"unknown chromosome code in bgen file.\";\n\n      if( files->chr_read.empty() || (tmp_snp.chrom != files->chr_read.back()) ) files->chr_read.push_back(tmp_snp.chrom);\n\n      tmp_snp.physpos = position;\n      tmp_snp.ID = rsid;\n      if( params->ref_first ) { // reference is first (i.e. allele0)\n        tmp_snp.allele1 = alleles[0];\n        tmp_snp.allele2 = alleles[1];\n      } else {\n        tmp_snp.allele1 = alleles[1];\n        tmp_snp.allele2 = alleles[0]; // switch so allele0 is ALT\n      }\n\n      // check if snps are in order (same chromosome & non-decreasing positions)\n      if (!snpinfo.empty() && (tmp_snp.chrom == snpinfo.back().chrom) && ( (tmp_snp.physpos < snpinfo.back().physpos) )) nOutofOrder++;\n\n      lineread++;\n\n      // if using GxG interaction test\n      if(params->interaction_snp && (tmp_snp.ID == filters->interaction_cov)){\n        if(!params->interaction_file) {\n          params->interaction_snp_offset = tmp_snp.offset;\n          params->ltco_chr = tmp_snp.chrom;\n          interaction_snp_found = true;\n        }\n        // go to next variant (get its offset first)\n        tmp_snp.offset = bgen_tmp.get_position();\n        continue;\n      }\n\n      // if specified chrlist/range\n      if(\n          (params->select_chrs && !in_chrList(tmp_snp.chrom, filters))\n          ||\n          (params->set_range && !in_range(tmp_snp.chrom, position, params))\n        ) {\n        // go to next variant (get its offset first)\n        tmp_snp.offset = bgen_tmp.get_position();\n        continue;\n      }\n\n      // make list of variant IDs if inclusion/exclusion file is given\n      if(params->mk_snp_map){\n        if (in_map(tmp_snp.ID, filters->snpID_to_ind)) { // ignore duplicate\n          tmp_snp.offset = bgen_tmp.get_position();\n          continue;\n        }\n        filters->snpID_to_ind[ tmp_snp.ID ] = snpinfo.size();\n      }\n\n      // keep track of how many included snps per chromosome there are\n      files->chr_counts[tmp_snp.chrom-1]++;\n\n      snpinfo.push_back(tmp_snp);\n\n      tmp_snp.offset = bgen_tmp.get_position();\n    }\n\n    if(params->interaction_snp && !params->interaction_file && !interaction_snp_found)\n      throw \"SNP specified for GxG interaction test was not found.\";\n\n    if (!params->test_mode && (nOutofOrder > 0)) \n      sout << \"WARNING: Total number of snps out-of-order in bgen file : \" << nOutofOrder << endl;\n  }\n\n  // check if should mask snps\n  check_snps_include_exclude(files, params, filters, snpinfo, chr_map, sout);\n\n  // get info on samples\n  params->n_samples  = bgen_tmp.number_of_samples();\n\n  // get sample IDs (from sample file or directly from bgen file)\n  if( params->bgenSample ) {\n    read_bgen_sample(files->sample_file, params, tmp_ids, sout);\n  } else {\n    bgen_tmp.get_sample_ids(\n        [&tmp_ids]( std::string const& id ) { tmp_ids.push_back( id ) ; } );\n    // set to unknown\n    params->sex = ArrayXi::Constant(params->n_samples, 0);\n  }\n\n  // check duplicates -- if not, store in map\n  for(size_t i = 0; i < params->n_samples; i++) {\n\n    if (in_map(tmp_ids[i], params->FID_IID_to_ind)) \n      throw \"duplicate individual in bgen file : FID_IID =\" + tmp_ids[i];\n\n    params->FID_IID_to_ind[ tmp_ids[i] ] = i;\n  }\n\n  // check if should mask samples\n  check_samples_include_exclude(files, params, filters, sout);\n\n  // setup file for reading the genotype probabilities later\n  if( !params->streamBGEN ) \n    bgen.open( files->bgen_file ) ;\n  else\n    openStream(&files->geno_ifstream, files->bgen_file, ios::in | ios::binary, sout);\n\n  if (params->test_mode) params->dosage_mode = true;\n}\n\n\n// read .bgi file to get SNP info\nvoid read_bgi_file(BgenParser& bgen, struct in_files* files, struct param* params, struct filter* filters, std::vector<snp>& snpinfo, mstream& sout){\n\n  bool interaction_snp_found = false;\n  int nalleles;\n  uint32_t lineread = 0;\n  uint64 variant_bgi_size, variant_bgen_size;\n  string bgi_file = files->bgi_file;\n  string sql_query = \"SELECT * FROM Variant\", cnd1 = \"\";\n  snp tmp_snp;\n  sqlite3* db;\n  sqlite3_stmt* stmt;\n\n  uint32_t n_variants = bgen.number_of_variants();\n  uint32_t position ;\n  std::string chromosome, rsid, tmpchrom;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  // edit sql statement if chromosome position range is given\n  if( params->set_range ){\n    cnd1 = \" WHERE ( chromosome IN (\" + bgi_chrList(params->range_chr, params->nChrom) + \") AND position>=\" + to_string(params->range_min) + \" AND position<=\" + to_string(params->range_max) + \")\";\n  } else if( params->select_chrs ){\n    cnd1 = \" WHERE ( chromosome IN (\" + bgi_chrList(filters, params->nChrom) + \" ) )\";\n  }\n  // with GxG tests\n  if(params->interaction_snp && (cnd1.size() > 0)){ // bug fix - only use this if querying on chrs/range\n    cnd1.append(\" OR ( rsid = '\" + filters->interaction_cov + \"' )\" );\n  }\n  sql_query.append( cnd1 );\n\n  sout << \"   -index bgi file [\" << bgi_file<< \"]\" << endl;\n  if( sqlite3_open( bgi_file.c_str(), &db ) != SQLITE_OK ) \n    throw  sqlite3_errmsg(db);\n\n\n  // header: chromosome|position|rsid|number_of_alleles|allele1|allele2|file_start_position|size_in_bytes\n  if( sqlite3_prepare_v2( db, sql_query.c_str(), -1, &stmt, NULL ) != SQLITE_OK )\n    throw sqlite3_errmsg(db);\n\n  bool done = false;\n  uint32_t nOutofOrder = 0;\n  while (!done) {\n    switch (sqlite3_step(stmt)) {\n      case SQLITE_ROW:\n\n        chromosome = std::string( (char *) sqlite3_column_text(stmt, 0) );\n        tmp_snp.chrom = chrStrToInt(chromosome, params->nChrom);\n        if (tmp_snp.chrom == -1) \n          throw \"unknown chromosome code in bgi file (=\" + chromosome + \").\";\n        if( files->chr_read.empty() || (tmp_snp.chrom != files->chr_read.back()) ) files->chr_read.push_back(tmp_snp.chrom);\n\n        tmp_snp.physpos = strtoul( (char *) sqlite3_column_text(stmt, 1), NULL, 10);\n        tmp_snp.ID = std::string( (char *) sqlite3_column_text(stmt, 2) );\n        nalleles = atoi( (char *) sqlite3_column_text(stmt, 3) );\n        assert(nalleles == 2) ; // only bi-allelic allowed\n        if( params->ref_first ){ // reference is first\n          tmp_snp.allele1 = std::string( (char *) sqlite3_column_text(stmt, 4) );\n          tmp_snp.allele2 = std::string( (char *) sqlite3_column_text(stmt, 5) );\n        } else {\n          tmp_snp.allele1 = std::string( (char *) sqlite3_column_text(stmt, 5) );\n          tmp_snp.allele2 = std::string( (char *) sqlite3_column_text(stmt, 4) ); // switch so allele0 is ALT\n        }\n        tmp_snp.offset = strtoull( (char *) sqlite3_column_text(stmt, 6), NULL, 10);\n\n        // check if matches with info from bgenparser for first read variant\n        if( snpinfo.empty() ){\n          bgen.jumpto(tmp_snp.offset);\n          bgen.read_variant( &tmpchrom, &position, &rsid, &alleles );\n          bgen.read_probs( &probs ) ;\n          if( probs[0].size() != 3 ) // unphased only \n            throw \"only unphased bgen are supported.\";\n          variant_bgen_size = bgen.get_position() - tmp_snp.offset;\n          variant_bgi_size = strtoull( (char *) sqlite3_column_text(stmt, 7), NULL, 10);\n          // check CPRA\n          assert( chromosome == tmpchrom );\n          assert( tmp_snp.physpos == position );\n          assert( ( (tmp_snp.allele1 == alleles[0]) && (tmp_snp.allele2 == alleles[1]) ) || ( (tmp_snp.allele1 == alleles[1]) && (tmp_snp.allele2 == alleles[0])) );\n          assert( variant_bgi_size == variant_bgen_size );\n        }\n\n        // check if snps are in order (same chromosome & non-decreasing positions)\n        if (!snpinfo.empty()\n            && (tmp_snp.chrom == snpinfo.back().chrom)\n            && ( (tmp_snp.physpos < snpinfo.back().physpos) ))\n          nOutofOrder++;\n\n        lineread++;\n\n        // if using GxG interaction test\n        if(params->interaction_snp && (tmp_snp.ID == filters->interaction_cov)){\n          if(!params->interaction_file) {\n            params->interaction_snp_offset = tmp_snp.offset;\n            params->ltco_chr = tmp_snp.chrom;\n            interaction_snp_found = true;\n          }\n          continue; // don't save it\n        }\n\n        // make list of variant IDs if inclusion/exclusion file is given\n        if(params->mk_snp_map){\n          if (in_map(tmp_snp.ID, filters->snpID_to_ind))\n            continue; // don't save it\n          filters->snpID_to_ind[ tmp_snp.ID ] = snpinfo.size();\n        }\n\n        // keep track of how many included snps per chromosome there are\n        files->chr_counts[tmp_snp.chrom-1]++;\n\n        snpinfo.push_back(tmp_snp);\n        break;\n\n      case SQLITE_DONE:\n        done = true;\n        break;\n\n      default:\n        throw \"failed reading file (\" + std::string( sqlite3_errmsg(db) ) + \").\";\n    }\n  }\n\n  sqlite3_finalize(stmt);\n  sqlite3_close(db);\n\n  if(params->interaction_snp && !params->interaction_file && !interaction_snp_found)\n    throw \"SNP specified for GxG interaction test was not found.\";\n\n  if( !params->set_range && !params->select_chrs) assert( lineread == n_variants );\n  if (!params->test_mode && (nOutofOrder > 0)) sout << \"WARNING: Total number of snps out-of-order in bgen file : \" << nOutofOrder << endl;\n\n}\n\nvoid read_bgi_file(string const& setting, BgenParser& bgen, geno_file_info* ext_file_info, map <string, uint64>* variant_names, struct param* params, mstream& sout){\n\n  uint32_t lineread = 0;\n  uint64 offset, variant_bgi_size, variant_bgen_size;\n  string bgi_file = ext_file_info->file + \".bgi\";\n  string sql_query, rsid;\n  map <string, uint64> tmp_map;\n  sqlite3* db;\n  sqlite3_stmt* stmt;\n\n  int nalleles, chrom;\n  uint32_t position ;\n  std::string tmp_str, chr;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  // sql statement to pass variants to keep\n  sql_query = \"SELECT * FROM Variant WHERE rsid IN (\" + bgi_rsidList((*variant_names)) + \" )\";\n\n  sout << \"      -index bgi file [\" << bgi_file<< \"]\" << endl;\n  if( sqlite3_open( bgi_file.c_str(), &db ) != SQLITE_OK ) \n    throw  sqlite3_errmsg(db);\n\n  // header: chromosome|position|rsid|number_of_alleles|allele1|allele2|file_start_position|size_in_bytes\n  if( sqlite3_prepare_v2( db, sql_query.c_str(), -1, &stmt, NULL ) != SQLITE_OK )\n    throw sqlite3_errmsg(db);\n\n  bool done = false;\n  while (!done) {\n    switch (sqlite3_step(stmt)) {\n      case SQLITE_ROW:\n\n        nalleles = atoi( (char *) sqlite3_column_text(stmt, 3) );\n        assert(nalleles == 2) ; // only bi-allelic allowed\n\n        rsid = std::string( (char *) sqlite3_column_text(stmt, 2) );\n        offset = strtoull( (char *) sqlite3_column_text(stmt, 6), NULL, 10);\n        // make list of variant IDs\n        tmp_map[ rsid ] = offset;\n\n        // check if matches with info from bgenparser for first read variant\n        if( lineread++ == 0 ){\n          bgen.jumpto(offset);\n          bgen.read_variant( &chr, &position, &tmp_str, &alleles );\n          bgen.read_probs( &probs ) ;\n          if( probs[0].size() != 3 ) // unphased only \n            throw \"only unphased bgen are supported.\";\n          variant_bgen_size = bgen.get_position() - offset;\n          variant_bgi_size = strtoull( (char *) sqlite3_column_text(stmt, 7), NULL, 10);\n          assert( tmp_str == rsid );\n          assert( variant_bgi_size == variant_bgen_size );\n          if(setting == \"interaction\") {\n            chrom = chrStrToInt(chr, params->nChrom);\n            if (chrom <= 0) \n              throw \"unknown chromosome code in bgen file.\";\n            params->ltco_chr = chrom;\n          }\n        }\n\n        break;\n\n      case SQLITE_DONE:\n        done = true;\n        break;\n\n      default:\n        throw \"failed reading file (\" + std::string( sqlite3_errmsg(db) ) + \").\";\n    }\n  }\n\n  sqlite3_finalize(stmt);\n  sqlite3_close(db);\n\n  if(tmp_map.size() > params->max_condition_vars) // not relevant for gxg(=1)\n    throw \"number of variants used for conditional analysis is greater than maximum of \" + to_string(params->max_condition_vars) + \" (otherwise use --max-condition-vars)\";\n  else if(tmp_map.size() == 0)\n    throw \"no variants were found in the BGEN file\";\n\n  // replace with new map\n  (*variant_names) = tmp_map;\n\n}\n\n\nvoid read_bgen_sample(const string& sample_file, struct param* params, std::vector<string> &ids, mstream& sout){\n\n  int nline = 0;\n  string FID, IID, line, tmp_str, fname;\n  std::vector<int> sex;\n  std::vector<string> IDvec;\n  Files myfile;\n  if( params->write_samples || params->write_masks) IDvec.resize(2);\n\n  fname = sample_file;\n  if(!file_exists (fname)) fname.append(\".gz\");\n  sout << \"   -sample file: \" << fname << endl;\n  myfile.openForRead(fname, sout);\n\n  // read fid/iid information\n  while (myfile.readLine(line)) {\n    removeCarriageReturn( line );\n    std::istringstream iss(line);\n\n    if( !(iss >> FID >> IID) )\n      throw \"incorrectly formatted sample file at line\" + to_string( ids.size() + 1 );\n\n    // check first two lines for correct format\n    if(nline == 0){\n\n      if( (FID != \"ID_1\") || (IID != \"ID_2\") ) \n        throw \"header of the sample file must start with: ID_1 ID_2\";\n\n    } else if(nline == 1){\n\n      if( (FID != \"0\") || (IID != \"0\") ) \n        throw \"second line of sample file must start with: 0 0.\";\n\n    } else {\n\n      tmp_str = FID + \"_\" + IID;\n      ids.push_back(tmp_str);\n      if(params->write_samples || params->write_masks) {\n        IDvec[0] = FID;\n        IDvec[1] = IID;\n        params->FIDvec.push_back(IDvec);\n      }\n\n      // get sex into IID (if no sex column, set to 0)\n      if( !(iss >> FID >> IID) ) sex.push_back(0);\n      else if( (IID == \"0\") || (IID == \"NA\") ) sex.push_back(0);\n      else if( IID == \"1\" ) sex.push_back(1);\n      else if( IID == \"2\" ) sex.push_back(2);\n      else throw \"unrecognized sex code in file : '\" + IID + \"'\";\n\n    }\n\n    nline++;\n  }\n\n  if( params->n_samples != ids.size() )\n    throw \"number of samples in BGEN file does not match that in the sample file.\";\n\n  params->sex = Map<ArrayXi>(sex.data(), params->n_samples, 1);\n\n  myfile.closeFile();\n}\n\nvoid read_bgen_sample(const string& sample_file, std::vector<string> &ids, mstream& sout){\n\n  int nline = 0;\n  string FID, IID, line, fname;\n  Files myfile;\n\n  fname = sample_file;\n  if(!file_exists (fname)) fname.append(\".gz\");\n  sout << \"      -sample file: \" << fname << endl;\n  myfile.openForRead(fname, sout);\n\n  // read fid/iid information\n  while (myfile.readLine(line)) {\n    removeCarriageReturn( line );\n    std::istringstream iss(line);\n\n    if( !(iss >> FID >> IID) )\n      throw \"incorrectly formatted sample file at line\" + to_string( nline + 1 );\n\n    // check first two lines for correct format\n    if(nline == 0){\n\n      if( (FID != \"ID_1\") || (IID != \"ID_2\") ) \n        throw \"header of the sample file must start with: ID_1 ID_2\";\n\n    } else if(nline == 1){\n\n      if( (FID != \"0\") || (IID != \"0\") ) \n        throw \"second line of sample file must start with: 0 0.\";\n\n    } else ids.push_back(FID + \"_\" + IID);\n\n    nline++;\n  }\n\n  myfile.closeFile();\n\n}\n\n\nvoid read_bed_bim_fam(struct in_files* files, struct param* params, struct filter* filters, vector<snp>& snpinfo, map<int,vector<int>>& chr_map, mstream& sout) {\n\n  uint32_t nsamples_bed;\n  read_bim(files, params, filters, snpinfo, sout);\n\n  // check if should mask snps\n  check_snps_include_exclude(files, params, filters, snpinfo, chr_map, sout);\n\n  read_fam(files, params, sout);\n  nsamples_bed = params->n_samples;\n  // check if should mask samples\n  check_samples_include_exclude(files, params, filters, sout);\n\n  prep_bed(nsamples_bed, files, sout);\n\n  // build lookup table\n  buildLookupTable(params->bed_lookup_table);\n}\n\n\nvoid read_bim(struct in_files* files, struct param* params, struct filter* filters, vector<snp>& snpinfo, mstream& sout) {\n\n  bool interaction_snp_found = false;\n  uint32_t nOutofOrder = 0;\n  int minChr_read = 0; // enforce that chromosomes in file are sorted\n  uint64 lineread = 0;\n  std::vector< string > tmp_str_vec ;\n  snp tmp_snp;\n  string line, fname;\n  Files myfile;\n\n  fname = files->bed_prefix + \".bim\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  sout << left << std::setw(20) << \" * bim\" << \": [\" << fname << \"] \" << flush;\n  myfile.openForRead(fname, sout);\n\n  //if(params->set_range) cerr << params->range_chr << \"\\t\" << params->range_min << \"\\t\" << params->range_max<< endl;\n\n  while (myfile.readLine(line)) {\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 6 )\n      throw \"incorrectly formatted bim file at line \" + to_string( snpinfo.size()+1 );\n\n    tmp_snp.chrom = chrStrToInt(tmp_str_vec[0], params->nChrom);\n    tmp_snp.ID = tmp_str_vec[1];\n    //tmp_snp.genpos = std::stod( tmp_str_vec[2]);\n    tmp_snp.physpos = std::stoul( tmp_str_vec[3],nullptr,0);\n    if( params->ref_first ){ // reference is first\n      tmp_snp.allele1 = tmp_str_vec[4];\n      tmp_snp.allele2 = tmp_str_vec[5];\n    } else { // reference is last\n      tmp_snp.allele1 = tmp_str_vec[5];\n      tmp_snp.allele2 = tmp_str_vec[4];\n    }\n    tmp_snp.offset = lineread;\n\n    if (tmp_snp.chrom == -1) \n      throw \"unknown chromosome code in bim file at line \" + to_string( snpinfo.size()+1 );\n\n    if( files->chr_read.empty() || (tmp_snp.chrom != files->chr_read.back() ) ) {\n      files->chr_read.push_back(tmp_snp.chrom);\n      if( tmp_snp.chrom <= minChr_read )\n        throw \"chromosomes in bim file are not in ascending order.\";\n      else \n        minChr_read = tmp_snp.chrom;\n    }\n\n    // check if snps are in order (same chromosome & non-decreasing positions)\n    if (!snpinfo.empty() && (tmp_snp.chrom == snpinfo.back().chrom) && ( (tmp_snp.physpos < snpinfo.back().physpos) )) nOutofOrder++;\n\n    lineread++;\n\n    // if using GxG interaction test\n    if(params->interaction_snp && (tmp_snp.ID == filters->interaction_cov)){\n      if(!params->interaction_file) {\n        params->interaction_snp_offset = tmp_snp.offset;\n        params->ltco_chr = tmp_snp.chrom;\n        interaction_snp_found = true;\n      }\n      continue;\n    }\n\n    // if specified chrlist/range\n    if(\n        (params->select_chrs && !in_chrList(tmp_snp.chrom, filters))\n        ||\n        (params->set_range && !in_range(tmp_snp.chrom, tmp_snp.physpos, params))\n      ) continue;\n\n    // make list of variant IDs if inclusion/exclusion file is given\n    if(params->mk_snp_map){\n      if (in_map(tmp_snp.ID, filters->snpID_to_ind))\n        continue; // skip duplicate\n      filters->snpID_to_ind[ tmp_snp.ID ] = snpinfo.size();\n    }\n\n    // keep track of how many included snps per chromosome there are\n    files->chr_counts[tmp_snp.chrom-1]++;\n\n    snpinfo.push_back(tmp_snp);\n  }\n\n  sout << \"n_snps = \" << lineread << endl;\n\n  if(params->interaction_snp && !params->interaction_file && !interaction_snp_found)\n    throw \"SNP specified for GxG interaction test was not found.\";\n\n  if (!params->test_mode && (nOutofOrder > 0)) sout << \"WARNING: Total number of snps out-of-order in bim file : \" << nOutofOrder << endl;\n\n  myfile.closeFile();\n\n}\n\nuint32_t read_bim(map<string, vector<uint64>>& index_map, geno_file_info* ext_file_info, struct param* params, mstream& sout) {\n\n  int chrom;\n  uint64 lineread = 0;\n  std::vector< string > tmp_str_vec ;\n  std::vector< uint64 > tmp_v = std::vector< uint64 >(2);\n  string line, fname;\n  Files myfile;\n\n  fname = ext_file_info->file + \".bim\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) {\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() < 6 )\n      throw \"incorrectly formatted bim file at line \" + to_string( lineread+1 );\n    chrom = chrStrToInt(tmp_str_vec[0], params->nChrom);\n    if (chrom <= 0) \n      throw \"unknown chromosome code in bgen file.\";\n    tmp_v[0] = lineread++, tmp_v[1] = chrom;\n    index_map[ tmp_str_vec[1] ] = tmp_v;\n  }\n\n  myfile.closeFile();\n  return index_map.size();\n}\n\n\nvoid read_fam(struct in_files* files, struct param* params, mstream& sout) {\n\n  int lineread = 0;\n  string line, tmp_id, fname;\n  std::vector<int> sex;\n  std::vector< string > tmp_str_vec, IDvec;\n  Files myfile;\n  if( params->write_samples || params->write_masks) IDvec.resize(2);\n\n  fname = files->bed_prefix + \".fam\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  sout << left << std::setw(20) << \" * fam\" << \": [\" << fname << \"] \";\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) {\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 6 )\n      throw \"incorrectly formatted fam file at line \" + to_string( lineread + 1 );\n\n    tmp_id = tmp_str_vec[0] + \"_\" + tmp_str_vec[1];\n\n    // check duplicates -- if not, store in map\n    if (in_map(tmp_id, params->FID_IID_to_ind)) \n      throw \"duplicate individual in fam file : FID_IID=\" + tmp_id ;\n\n    params->FID_IID_to_ind[ tmp_id ] = lineread;\n    if(params->write_samples || params->write_masks) {\n      IDvec[0] = tmp_str_vec[0];\n      IDvec[1] = tmp_str_vec[1];\n      params->FIDvec.push_back(IDvec);\n    }\n\n    // store sex\n    if( tmp_str_vec[4] == \"0\" ) sex.push_back(0);\n    else if( tmp_str_vec[4] == \"1\" ) sex.push_back(1);\n    else if( tmp_str_vec[4] == \"2\" ) sex.push_back(2);\n    else throw \"unrecognized sex code in file : '\" + tmp_str_vec[4] + \"'\";\n\n    lineread++;\n  }\n\n  myfile.closeFile();\n  params->n_samples = lineread;\n  params->sex = Map<ArrayXi>(sex.data(), params->n_samples, 1);\n\n  sout << \"n_samples = \" << params->n_samples << endl;\n}\n\nuint32_t read_fam(struct ext_geno_info& ginfo, geno_file_info* ext_file_info, Ref<ArrayXb> mask, struct param* params, mstream& sout) {\n\n  uint32_t position;\n  string line, fname;\n  std::vector< string > tmp_str_vec, tmp_ids;\n  Files myfile;\n\n  fname = ext_file_info->file + \".fam\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) {\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() < 6 )\n      throw \"incorrectly formatted fam file at line \" + to_string( tmp_ids.size() + 1 );\n    tmp_ids.push_back(tmp_str_vec[0] + \"_\" + tmp_str_vec[1]);\n  }\n\n  myfile.closeFile();\n\n  // check if included in the analysis (if yes, store IDs)\n  ginfo.sample_keep.resize(tmp_ids.size());\n  ginfo.sample_index.resize(tmp_ids.size());\n  for(size_t i = 0; i < tmp_ids.size(); i++) {\n    ginfo.sample_keep(i) = in_map(tmp_ids[i], params->FID_IID_to_ind); \n    if(ginfo.sample_keep(i)) {\n      position = params->FID_IID_to_ind[ tmp_ids[i] ];\n      if(mask(position)) // analyzed sample\n        ginfo.sample_index(i) = position;\n      else\n        ginfo.sample_keep(i) = false; \n    }\n  }\n\n  if(!ginfo.sample_keep.any())\n    throw \"none of the analyzed samples are present in the file\";\n\n  return tmp_ids.size();\n}\n\n\nvoid prep_bed(const uint32_t& nsamples, struct in_files* files, mstream& sout) {\n\n  string fname;\n\n  fname = files->bed_prefix + \".bed\";\n  sout << left << std::setw(20) << \" * bed\" << \": [\" << fname << \"]\" << endl;\n  openStream(&files->geno_ifstream, fname, std::ios::in | std::ios::binary, sout);\n\n  uchar header[3];\n  files->geno_ifstream.read( reinterpret_cast<char *> (&header[0]), 3);\n  if ( (header[0] != 0x6c) || (header[1] != 0x1b) || (header[2] != 0x01) ) \n    throw \"incorrect magic number in bed file.\";\n\n  // size of genotype block [(n+3)/4 = ceil(n/4.0)]\n  files->bed_block_size = (nsamples+3)>>2;\n  files->inbed.resize( files->bed_block_size );\n}\n\n\nvoid read_pgen_pvar_psam(struct in_files* files, struct param* params, struct filter* filters, struct geno_block* gblock, vector<snp>& snpinfo, map<int,vector<int>>& chr_map, mstream& sout) {\n\n  gblock->nv = read_pvar(files, params, filters, snpinfo, sout);\n\n  // check if should mask snps\n  check_snps_include_exclude(files, params, filters, snpinfo, chr_map, sout);\n\n  read_psam(files, params, sout);\n  sout << \"n_samples = \" << params->n_samples << endl;\n  gblock->ns = params->n_samples;\n  // check if should mask samples\n  check_samples_include_exclude(files, params, filters, sout);\n\n  prep_pgen(files, filters, gblock, params, sout);\n\n}\n\nuint64 read_pvar(struct in_files* files, struct param* params, struct filter* filters, vector<snp>& snpinfo, mstream& sout) {\n\n  bool interaction_snp_found = false;\n  uint32_t nOutofOrder = 0;\n  int minChr_read = 0; // enforce that chromosomes in file are sorted\n  uint64 lineread = 0;\n  std::vector< string > tmp_str_vec ;\n  snp tmp_snp;\n  string line, fname;\n  Files myfile;\n\n  fname = files->pgen_prefix + \".pvar\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  sout << left << std::setw(20) << \" * pvar\" << \": [\" << fname << \"] \" << flush;\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) { // skip to main header line\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 1 )\n      throw \"no blank lines should be before the header line in pvar file.\";\n\n    if( tmp_str_vec[0] == \"#CHROM\" ) break;\n  }\n\n  // check header\n  if(tmp_str_vec.size() < 5)\n    throw \"header of pvar file does not have correct format.\";\n\n  auto posIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"POS\");\n  auto idIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"ID\");\n  auto refIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"REF\");\n  auto altIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"ALT\");\n\n  if(posIter == tmp_str_vec.end() ||\n      idIter == tmp_str_vec.end() ||\n      refIter == tmp_str_vec.end() ||\n      altIter == tmp_str_vec.end())\n    throw \"header of pvar file does not have correct format.\";\n\n  int posIndex, idIndex, refIndex, altIndex;\n  posIndex = std::distance(tmp_str_vec.begin(), posIter);\n  idIndex = std::distance(tmp_str_vec.begin(), idIter);\n  refIndex = std::distance(tmp_str_vec.begin(), refIter);\n  altIndex = std::distance(tmp_str_vec.begin(), altIter);\n\n  while (myfile.readLine(line)) {\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 5 )\n      throw \"incorrectly formatted pvar file at line \" + to_string( snpinfo.size()+1 );\n\n    tmp_snp.chrom = chrStrToInt(tmp_str_vec[0], params->nChrom);\n    tmp_snp.physpos = std::stoul( tmp_str_vec.at(posIndex),nullptr,0);\n    tmp_snp.ID = tmp_str_vec.at(idIndex);\n    tmp_snp.allele1 = tmp_str_vec.at(refIndex);\n    tmp_snp.allele2 = tmp_str_vec.at(altIndex);\n    tmp_snp.offset = lineread; // store index in file\n\n    if (tmp_snp.chrom == -1) \n      throw \"unknown chromosome code in pvar file at line \" + to_string( snpinfo.size()+1 ) ;\n\n    if( files->chr_read.empty() || (tmp_snp.chrom != files->chr_read.back() ) ) {\n      files->chr_read.push_back(tmp_snp.chrom);\n      if( tmp_snp.chrom <= minChr_read )\n        throw \"chromosomes in pvar file are not in ascending order.\";\n      else \n        minChr_read = tmp_snp.chrom;\n    }\n\n    // check if snps are in order (same chromosome & non-decreasing positions)\n    if (!snpinfo.empty() && (tmp_snp.chrom == snpinfo.back().chrom) && ( (tmp_snp.physpos < snpinfo.back().physpos) )) nOutofOrder++;\n\n    lineread++;\n\n    // if using GxG interaction test\n    if(params->interaction_snp && (tmp_snp.ID == filters->interaction_cov)){\n        if(!params->interaction_file) {\n          params->interaction_snp_offset = tmp_snp.offset;\n          params->ltco_chr = tmp_snp.chrom;\n          interaction_snp_found = true;\n        }\n      continue;\n    }\n\n    // if specified chrlist/range\n    if((params->select_chrs && !in_chrList(tmp_snp.chrom, filters)) ||\n        (params->set_range && !in_range(tmp_snp.chrom, tmp_snp.physpos, params))) \n      continue;\n\n    // make list of variant IDs if inclusion/exclusion file is given\n    if(params->mk_snp_map){\n      if (in_map(tmp_snp.ID, filters->snpID_to_ind)) \n        continue; // skip duplicate\n      filters->snpID_to_ind[ tmp_snp.ID ] = snpinfo.size();\n    }\n\n    // keep track of how many included snps per chromosome there are\n    files->chr_counts[tmp_snp.chrom-1]++;\n\n    snpinfo.push_back(tmp_snp);\n  }\n\n  sout << \"n_snps = \" <<  lineread << endl;\n\n  if(params->interaction_snp && !params->interaction_file && !interaction_snp_found)\n    throw \"SNP specified for GxG interaction test was not found.\";\n\n  if (!params->test_mode && (nOutofOrder > 0)) sout << \"WARNING: Total number of snps out-of-order in bim file : \" << nOutofOrder << endl;\n\n  myfile.closeFile();\n\n  return lineread;\n}\n\nuint32_t read_pvar(map<string, vector<uint64>>& index_map, geno_file_info* ext_file_info, struct param* params, mstream& sout) {\n\n  int chrom;\n  uint64 lineread = 0;\n  std::vector< string > tmp_str_vec ;\n  std::vector< uint64 > tmp_v = std::vector< uint64 >(2);\n  string line, fname;\n  Files myfile;\n\n  fname = ext_file_info->file + \".pvar\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) { // skip to main header line\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() < 1 )\n      throw \"no blank lines should be before the header line in pvar file.\";\n    if( tmp_str_vec[0] == \"#CHROM\" ) break;\n  }\n\n  // check header\n  if(tmp_str_vec.size() < 5)\n    throw \"header of pvar file does not have correct format.\";\n\n  auto posIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"POS\");\n  auto idIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"ID\");\n  auto refIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"REF\");\n  auto altIter = std::find(tmp_str_vec.begin(), tmp_str_vec.end(), \"ALT\");\n\n  if(posIter == tmp_str_vec.end() ||\n      idIter == tmp_str_vec.end() ||\n      refIter == tmp_str_vec.end() ||\n      altIter == tmp_str_vec.end())\n    throw \"header of pvar file does not have correct format.\";\n\n  int idIndex;\n  idIndex = std::distance(tmp_str_vec.begin(), idIter);\n\n  while (myfile.readLine(line)) {\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() < 5 )\n      throw \"incorrectly formatted pvar file at line \" + to_string( lineread+1 );\n    chrom = chrStrToInt(tmp_str_vec[0], params->nChrom);\n    if (chrom <= 0) \n      throw \"unknown chromosome code in bgen file.\";\n    tmp_v[0] = lineread++, tmp_v[1] = chrom;\n    index_map[ tmp_str_vec.at(idIndex) ] = tmp_v;\n  }\n\n  myfile.closeFile();\n  return index_map.size();\n}\n\nvoid read_psam(struct in_files* files, struct param* params, mstream& sout) {\n\n  int lineread = 0, sex_col = 0;\n  bool col_found = false;\n  string line, tmp_id, fname;\n  std::vector<int> sex;\n  std::vector< string > tmp_str_vec, IDvec;\n  Files myfile;\n  if( params->write_samples || params->write_masks) IDvec.resize(2);\n\n  fname = files->pgen_prefix + \".psam\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  sout << left << std::setw(20) << \" * psam\" << \": [\" << fname << \"] \" << flush;\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) { // skip to main header line\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 1 )\n      throw \"no blank lines should be before the header line in psam file.\";\n\n    if( tmp_str_vec[0] == \"#IID\" ) \n      throw \"invalid header (must start with #FID [not #IID]).\";\n\n    if( tmp_str_vec[0] == \"#FID\" ) \n      break;\n  }\n\n  // check header\n  if( (tmp_str_vec.size() < 2) || (tmp_str_vec[1] != \"IID\"))\n    throw \"header does not have the correct format.\";\n\n  // find if sex column is present\n  auto scol = find(tmp_str_vec.begin(), tmp_str_vec.end(), \"SEX\");\n  col_found = scol != tmp_str_vec.end();\n  if(col_found) sex_col = std::distance(tmp_str_vec.begin(), scol);\n\n  while (myfile.readLine(line)) {\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 3 )\n      throw \"incorrectly formatted psam file at line \" + to_string( lineread + 1 ) ;\n\n    tmp_id = tmp_str_vec[0] + \"_\" + tmp_str_vec[1];\n\n    // check duplicates -- if not, store in map\n    if (in_map(tmp_id, params->FID_IID_to_ind)) \n      throw \"duplicate individual in fam file : FID_IID=\" + tmp_id ;\n\n    params->FID_IID_to_ind[ tmp_id ] = lineread;\n    if(params->write_samples || params->write_masks) {\n      IDvec[0] = tmp_str_vec[0];\n      IDvec[1] = tmp_str_vec[1];\n      params->FIDvec.push_back(IDvec);\n    }\n\n    // store sex\n    if( col_found ){\n      if(( tmp_str_vec[sex_col] == \"0\") || (tmp_str_vec[sex_col] == \"NA\") ) sex.push_back(0);\n      else if( tmp_str_vec[sex_col] == \"1\" ) sex.push_back(1);\n      else if( tmp_str_vec[sex_col] == \"2\" ) sex.push_back(2);\n      else throw \"unrecognized sex code in file : '\" + tmp_str_vec[sex_col] + \"'\";\n    } else sex.push_back(0);\n\n    lineread++;\n  }\n\n  myfile.closeFile();\n  params->n_samples = lineread;\n  params->sex = Map<ArrayXi>(sex.data(), params->n_samples, 1);\n\n}\n\nuint32_t read_psam(struct ext_geno_info& ginfo, geno_file_info* ext_file_info, Ref<ArrayXb> mask, struct param* params, mstream& sout) {\n\n  uint32_t position;\n  string line, fname;\n  std::vector< string > tmp_str_vec, tmp_ids;\n  Files myfile;\n\n  fname = ext_file_info->file + \".psam\";\n  if(!file_exists (fname)) fname.append(\".gz\");\n  myfile.openForRead(fname, sout);\n\n  while (myfile.readLine(line)) { // skip to main header line\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() < 1 )\n      throw \"no blank lines should be before the header line in psam file.\";\n    if( tmp_str_vec[0] == \"#IID\" ) \n      throw \"invalid header (must start with #FID [not #IID]).\";\n    if( tmp_str_vec[0] == \"#FID\" ) \n      break;\n  }\n\n  // check header\n  if( (tmp_str_vec.size() < 2) || (tmp_str_vec[1] != \"IID\"))\n    throw \"header does not have the correct format.\";\n\n  while (myfile.readLine(line)) {\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() < 2 )\n      throw \"incorrectly formatted psam file at line \" + to_string( tmp_ids.size() + 1 ) ;\n    tmp_ids.push_back(tmp_str_vec[0] + \"_\" + tmp_str_vec[1]);\n  }\n\n  myfile.closeFile();\n\n  // check if included in the analysis (if yes, store IDs)\n  ginfo.sample_keep.resize(tmp_ids.size());\n  ginfo.sample_index.resize(tmp_ids.size());\n  for(size_t i = 0; i < tmp_ids.size(); i++) {\n    ginfo.sample_keep(i) = in_map(tmp_ids[i], params->FID_IID_to_ind); \n    if(ginfo.sample_keep(i)) {\n      position = params->FID_IID_to_ind[ tmp_ids[i] ];\n      if(mask(position)) // analyzed sample\n        ginfo.sample_index(i) = position;\n      else\n        ginfo.sample_keep(i) = false; \n    }\n  }\n\n  if(ginfo.sample_keep.count() == 0)\n    throw \"none of the analyzed samples are present in the file\";\n\n  return tmp_ids.size();\n}\n\n\nvoid prep_pgen(struct in_files const* files, struct filter const* filters, struct geno_block* gblock, struct param* params, mstream& sout){\n\n  int pgen_samples, pgen_variants, pgen_ac;\n  vector<int> subset_indices_1based;\n  string fname;\n\n  // need to know maximum block size before loading pgen\n  fname = files->pgen_prefix + \".pgen\";\n  sout << left << std::setw(20) << \" * pgen\" << \": [\" << fname << \"] \" << endl;\n\n  // set subset when samples have been excluded from analysis\n  if( filters->ind_in_analysis.size() < gblock->ns ){\n    // need to create vector of indices to keep (1-based)\n    for( size_t i = 0; i < gblock->ns; i++)\n      if(!filters->ind_ignore(i))\n        subset_indices_1based.push_back(i+1);\n  }\n\n  gblock->pgr.Load(fname, gblock->ns, subset_indices_1based, params->threads);\n  pgen_samples = gblock->pgr.GetRawSampleCt();\n  pgen_variants = gblock->pgr.GetVariantCt();\n  pgen_ac = gblock->pgr.GetMaxAlleleCt();\n\n  if(pgen_samples != (int) gblock->ns)\n    throw \"number of samples in pgen file and psam file don't match.\";\n  if(pgen_variants != (int) gblock->nv)\n    throw \"number of variants in pgen file and pvar file don't match.\";\n  if(pgen_ac != 2)\n    throw \"only bi-allelic variants are accepted.\";\n\n  params->dosage_mode = gblock->pgr.DosagePresent();\n\n}\n\nvoid prep_pgen(uint32_t& nsamples, uint32_t& nvars, struct ext_geno_info& ginfo, geno_file_info* ext_file_info){\n\n  vector<int> subset_indices_1based;\n  string fname = ext_file_info->file + \".pgen\";\n\n  // set subset when samples have been excluded from analysis\n  if( ginfo.sample_keep.count() < nsamples )\n    for(size_t i = 0; i < nsamples; i++)\n      if(ginfo.sample_keep(i))\n        subset_indices_1based.push_back(i+1);\n\n  ginfo.pgr.Load(fname, nsamples, subset_indices_1based, 1);\n  if(ginfo.pgr.GetRawSampleCt() != nsamples)\n    throw \"number of samples in pgen file and psam file don't match.\";\n  if(ginfo.pgr.GetVariantCt() != nvars)\n    throw \"number of variants in pgen file and pvar file don't match.\";\n  if(ginfo.pgr.GetMaxAlleleCt() != 2)\n    throw \"only bi-allelic variants are accepted.\";\n\n  ginfo.dosage_mode = ginfo.pgr.DosagePresent();\n}\n\n// determine if snps should be included/excluded for step 1\nvoid check_snps_include_exclude(struct in_files* files, struct param* params, struct filter* filters, vector<snp>& snpinfo, map<int,vector<int>>& chr_map, mstream& sout){\n\n  uint32_t tmppos = 0;\n  vector<snp> tmp_snpinfo;\n  std::map <std::string, uint32_t> tmp_map;\n\n  params->n_variants = snpinfo.size(); // current variants count\n\n  if( params->condition_snps && !params->condition_file)\n    get_snps_offset(filters->condition_snp_names, filters->snpID_to_ind, snpinfo, sout);\n\n  if(params->set_range)\n    sout << \"   -number of variants after filtering on range = \" << params->n_variants << endl;\n\n  // if inclusion/exclusion file is given\n  if(params->rm_snps || params->keep_snps) {\n\n    assert( snpinfo.size() == filters->snpID_to_ind.size() ); // should be the same\n    ArrayXb geno_mask, geno_mask_rm, geno_mask_keep;// true = keep, false = rm\n    geno_mask_keep = geno_mask_rm = ArrayXb::Constant(params->n_variants, true);\n\n    // apply masking to snps\n   if( params->keep_snps ) {\n      sout << \"   -keeping variants specified by --extract\\n\";\n      if(params->cormat_force_vars && (params->ld_list_file == \"\")) geno_mask_keep = check_in_map_from_files(filters->snpID_to_ind, files->file_snps_include, params, sout);\n      else geno_mask_keep = check_in_map_from_files(filters->snpID_to_ind, files->file_snps_include, sout);\n    }\n    if( params->rm_snps ) {\n      sout << \"   -removing variants specified by --exclude\\n\";\n      geno_mask_rm = !check_in_map_from_files(filters->snpID_to_ind, files->file_snps_exclude, sout);\n    } \n    geno_mask = geno_mask_rm && geno_mask_keep;\n\n    if(geno_mask.all()) // no snps to remove\n      params->rm_snps = params->keep_snps = false;\n    else {\n\n      // delete snpID map\n      filters->snpID_to_ind.clear();\n\n      // set chr counts to 0\n      std::fill(files->chr_counts.begin(), files->chr_counts.end(), 0);\n\n      // make snpinfo only with kept elements\n      params->n_variants = geno_mask.count();\n      tmp_snpinfo.reserve( params->n_variants );\n      for(int i = 0; i < geno_mask.size(); i++){\n\n        if(!geno_mask(i)) continue;\n\n        //cerr << snpinfo[i].ID << endl;\n        tmp_snpinfo.push_back( snpinfo[i] );\n        files->chr_counts[ snpinfo[ i ].chrom - 1 ]++;\n        // remake map if needed\n        if( params->keep_snp_map ) tmp_map[ snpinfo[i].ID ] = tmppos;\n\n        tmppos++;\n      }\n\n      snpinfo.clear();\n      std::vector<snp>().swap(snpinfo); // free memory\n      snpinfo = tmp_snpinfo;\n      if( params->keep_snp_map ) filters->snpID_to_ind = tmp_map;\n\n    }\n  }\n\n  // check nonzero\n  if(params->n_variants == 0)\n    throw \"no variant left to include in analysis.\";\n  if(params->rm_snps || params->keep_snps)\n    sout << \"   -number of variants remaining in the analysis = \" << params->n_variants << endl;\n\n  // go through each chromosome in order & save number of snps\n  // and save how many are actually read\n  vector<int> tmp_v;\n  tmp_v.resize(2, 0);\n  for(size_t j = 0; j < files->chr_read.size(); j++){\n    int i = files->chr_read[j];\n    tmp_v[0] = files->chr_counts[i-1];\n    chr_map[ i ] = tmp_v;\n  }\n\n  if(params->getCorMat)\n    check_ld_list(filters->snpID_to_ind, files, params, sout);\n\n  // with OR\n  check_snps_include_exclude_or(files, params, filters, snpinfo, sout);\n\n  if(params->forced_MAC > 0)\n    check_forced_MAC_file(filters->snpID_to_ind, snpinfo, params, sout);\n\n}\n\n// determine if snps should be included/excluded for step 2 using OR filter with MAC\nvoid check_snps_include_exclude_or(struct in_files* files, struct param* params, struct filter* filters, vector<snp>& snpinfo, mstream& sout){\n\n  if(!(params->rm_or || params->keep_or)) \n    return;\n\n  assert( snpinfo.size() == filters->snpID_to_ind.size() ); // should be the same\n  ArrayXb geno_mask;// if true, check MAC\n\n  if( params->rm_or ) {\n    sout << \"   -removing variants specified by --exclude-or and with MAC below threshold\\n\";\n    geno_mask = check_in_map_from_files(filters->snpID_to_ind, files->file_snps_exclude_or, sout);\n  } else if( params->keep_or ) {\n    sout << \"   -keeping only variants specified by --extract-or or with MAC above threshold\\n\";\n    geno_mask = !check_in_map_from_files(filters->snpID_to_ind, files->file_snps_include_or, sout);\n  }\n\n  for(int i = 0; i < geno_mask.size(); i++)\n    snpinfo[ i ].MAC_fail_if_checked = geno_mask(i);\n\n  // not needed if not using sets\n  if(!params->snp_set)\n    filters->snpID_to_ind.clear();\n\n}\n\n// use different MAC filter for subset of SNPs\nvoid check_forced_MAC_file(map <string, uint32_t>& map_ID, vector<snp>& snpinfo, struct param* params, mstream& sout){\n\n  assert( snpinfo.size() == map_ID.size() ); // should be the same\n\n  ArrayXb geno_mask = check_in_map_from_files(map_ID, {params->forced_MAC_snpfile}, sout);\n  for(int i = 0; i < geno_mask.size(); i++)\n    snpinfo[ i ].apply_diff_MAC_filter = geno_mask(i);\n\n  // not needed if not using sets\n  if(!params->snp_set)\n    map_ID.clear();\n\n}\n\nvoid check_samples_include_exclude(struct in_files const* files, struct param* params, struct filter* filters, mstream& sout){\n\n  bool keep_ids = params->write_samples || params->write_masks;\n  uint32_t ind_pos = 0, cum_pos;\n  string ind_ID;\n  std::map <std::string, uint32_t> new_map;\n  std::map <std::string, uint32_t>::iterator itr;\n  vector< string > allIDs;\n  vector< vector<string> > newFIDs;\n\n  // check number of samples\n  if( params->n_samples == 0 )\n    throw \"no samples remaining in the analysis.\";\n\n  if( params->rm_indivs ){\n    sout << \"   -removing individuals specified by --remove\\n\";\n    filters->ind_in_analysis = !check_in_map_from_files_IDs(files->file_ind_exclude, params, sout);\n  } else if( params->keep_indivs ){\n    sout << \"   -keeping only individuals specified by --keep\\n\";\n    filters->ind_in_analysis = check_in_map_from_files_IDs(files->file_ind_include, params, sout);\n  } else\n    filters->ind_in_analysis = ArrayXb::Constant(params->n_samples, true);\n\n  // for sex-specific analyses\n  if(params->sex_specific > 0){\n    if(params->sex_specific == 1) // male-only\n      filters->ind_in_analysis = filters->ind_in_analysis && (params->sex == 1);\n    else if(params->sex_specific == 2) // female-only\n      filters->ind_in_analysis = filters->ind_in_analysis && (params->sex == 2);\n    sout << \"   -keeping only \" << ( params->sex_specific == 1 ? \"male\" : \"female\" ) << \" individuals in the analysis\\n\";\n  }\n\n  // keep track of individual to exclude (i.e. not stored in memory)\n  filters->ind_ignore = !filters->ind_in_analysis;\n\n  if( !(filters->ind_in_analysis.all()) ) {\n\n    if( !(filters->ind_in_analysis.any()) )\n      throw \"no samples remaining in the analysis.\";\n\n    // need to re-assign indices\n    // retrieve all sample IDs (need to keep same order as in genotype file)\n    allIDs.resize( params->n_samples );\n    for (itr = params->FID_IID_to_ind.begin(); itr != params->FID_IID_to_ind.end(); ++itr) {\n      ind_ID = itr->first;\n      ind_pos = itr->second;\n      allIDs[ ind_pos ] = ind_ID;\n    }\n\n    // create new map\n    if( keep_ids ) \n      newFIDs.reserve( filters->ind_in_analysis.count() );\n    cum_pos = 0;\n    for( size_t j = 0; j < params->n_samples; j++){\n\n      if( filters->ind_ignore(j) ) continue;\n\n        new_map[ allIDs[j] ] = cum_pos;\n        if(keep_ids) newFIDs.push_back( params->FIDvec[j] );\n        cum_pos++;\n\n    }\n\n    // save map\n    params->FID_IID_to_ind = new_map;\n    if(keep_ids) params->FIDvec = newFIDs;\n\n    // subset sex to samples kept in the analysis\n    ArrayXi tmpVi = params->sex( get_true_indices(filters->ind_in_analysis) ); \n    params->sex = tmpVi;\n\n    // resize ind_in_analysis\n    filters->ind_in_analysis = ArrayXb::Constant(cum_pos, true);\n    sout << \"   -number of genotyped individuals remaining in the analysis = \" << cum_pos << endl;\n\n  }\n\n  params->n_samples = filters->ind_in_analysis.count();\n}\n\nArrayXb check_in_map_from_files(map <string, uint32_t>& map_ID, vector<string> const& file_list, struct param* params, mstream& sout) {\n\n  uint32_t lineread = 0;\n  string line;\n  std::vector< string > tmp_str_vec ;\n  Files myfile;\n  ArrayXb mask = ArrayXb::Constant( map_ID.size() , false); \n\n  // only allow a single extract file\n  if(file_list.size() > 1) throw \"cannot have multiple extract files\";\n\n  for(auto fin : file_list) {\n\n    myfile.openForRead (fin, sout);\n\n    while( myfile.readLine(line) ){\n      tmp_str_vec = string_split(line,\"\\t \");\n\n      if( tmp_str_vec.size() < 1 )\n        throw \"incorrectly formatted file.\";\n\n      removeCarriageReturn( tmp_str_vec[0] );\n\n      if( in_map(tmp_str_vec[0], params->extract_vars_order) ) \n        continue; // ignore duplicates\n\n      if( in_map(tmp_str_vec[0], map_ID) )\n        mask( map_ID[ tmp_str_vec[0] ] ) = true;\n\n      params->extract_vars_order[ tmp_str_vec[0] ] = lineread++;\n    }\n\n    myfile.closeFile();\n  }\n\n  return mask;\n\n}\n\nArrayXb check_in_map_from_files(map <string, uint32_t>& map_ID, vector<string> const& file_list, mstream& sout) {\n\n  string line;\n  std::vector< string > tmp_str_vec ;\n  Files myfile;\n  ArrayXb mask = ArrayXb::Constant( map_ID.size() , false); \n\n  for(auto fin : file_list) {\n\n    myfile.openForRead (fin, sout);\n\n    while( myfile.readLine(line) ){\n      tmp_str_vec = string_split(line,\"\\t \");\n\n      if( tmp_str_vec.size() < 1 )\n        throw \"incorrectly formatted file.\";\n\n      removeCarriageReturn( tmp_str_vec[0] );\n      if( in_map(tmp_str_vec[0], map_ID) ) \n        mask( map_ID[ tmp_str_vec[0] ] ) = true;\n    }\n\n    myfile.closeFile();\n  }\n\n  return mask;\n\n}\n\nArrayXb check_in_map_from_files_IDs(vector<string> const& file_list, struct param* params, mstream& sout) {\n\n  uint32_t nids = params->n_samples;\n  findID person;\n  string line;\n  std::vector< string > tmp_str_vec ;\n  Files myfile;\n  ArrayXb mask = ArrayXb::Constant(nids, false); \n\n  for(auto fin : file_list) {\n\n    myfile.openForRead (fin, sout);\n\n    while( myfile.readLine(line) ){\n      tmp_str_vec = string_split(line,\"\\t \");\n\n      if( tmp_str_vec.size() < 2 )\n        throw \"incorrectly formatted file.\";\n\n      removeCarriageReturn( tmp_str_vec[1] );\n      person = getIndivIndex(tmp_str_vec[0], tmp_str_vec[1], params, sout);\n      if(!person.is_found) continue;\n      mask(person.index) = true;\n    }\n\n    myfile.closeFile();\n  }\n\n  return mask;\n\n}\n\nvoid check_ld_list(map <string, uint32_t>& map_ID, struct in_files* files, struct param* params, mstream& sout) {\n\n  if(params->ld_list_file == \"\") {\n    if(params->extract_vars_order.size() == 0) // use all genotyped variants\n      params->extract_vars_order = map_ID; \n    map<string, uint32_t >::iterator itr;\n    for (itr = params->extract_vars_order.begin(); itr != params->extract_vars_order.end(); ++itr) \n      if(in_map(itr->first, map_ID))\n        params->ld_sv_offsets.push_back( map_ID[ itr->first ] );\n    return;\n  }\n\n  string line;\n  std::vector< string > tmp_str_vec, set_keep_names;\n  Files myfile;\n\n  myfile.openForRead (params->ld_list_file, sout);\n\n  while( myfile.readLine(line) ){\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 2 )\n      throw \"incorrectly formatted file (fewer than 2 entries)\";\n\n    if( in_map(tmp_str_vec[1], params->extract_vars_order) ) \n      continue; // ignore duplicates\n\n    if(tmp_str_vec[0] == \"sv\"){ // single variant\n\n      // check if in geno file & if so, store index\n      if( in_map(tmp_str_vec[1], map_ID) ) \n        params->ld_sv_offsets.push_back( map_ID[ tmp_str_vec[1] ] );\n\n    } else if(tmp_str_vec[0] == \"mask\"){ // mask\n\n      if( tmp_str_vec.size() < 3 )\n        throw \"incorrectly formatted file (fewer than 3 entries)\";\n      // store gene name for extraction\n      removeCarriageReturn( tmp_str_vec[2] );\n      set_keep_names.push_back( tmp_str_vec[2] );\n\n    } else throw \"unrecognized entry in first column (=\" + tmp_str_vec[0] + \"). Should be sv/mask\";\n\n    params->extract_vars_order[ tmp_str_vec[1] ] = params->extract_vars_order.size();\n  }\n\n  params->keep_sets = params->set_select_list = set_keep_names.size() > 0;\n  if(params->keep_sets)\n    files->file_sets_include = {print_csv(set_keep_names)};\n  if(set_keep_names.size() == 0) params->build_mask = false;\n\n  myfile.closeFile();\n}\n\n// only used in step 1\nvoid get_G(const int& block, const int& bs, const int& chrom, const uint32_t& snpcount, vector<snp> const& snpinfo, struct param const* params, struct in_files* files, struct geno_block* gblock, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, mstream& sout){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  sout << \" block [\" << block + 1 << \"] : \" << flush;\n\n  if(params->file_type == \"bed\")\n    readChunkFromBedFileToG(bs, chrom, snpcount, snpinfo, params, files, gblock, filters, masked_indivs, phenotypes_raw, sout);\n  else if(params->file_type == \"pgen\")\n    readChunkFromPGENFileToG(bs, snpcount, snpinfo, params, gblock, filters, masked_indivs, sout);\n  else if(params->streamBGEN)\n    readChunkFromBGENFileToG_fast(bs, chrom, snpcount, snpinfo, params, files, gblock, filters, masked_indivs, phenotypes_raw, sout);\n  else\n    readChunkFromBGENFileToG(bs, chrom, snpcount, snpinfo, params, gblock, filters, masked_indivs, phenotypes_raw, sout);\n\n  sout << bs << \" snps \";\n\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n}\n\n// step 1 using BGEN library API\nvoid readChunkFromBGENFileToG(const int& bs, const int& chrom, const uint32_t& snpcount, vector<snp> const& snpinfo, struct param const* params, struct geno_block* gblock, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, mstream& sout) {\n\n  int ns;\n  uint32_t index ;\n  double ds, total;\n  std::string chromosome, rsid;\n  uint32_t position ;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  for(int snp = 0; snp < bs; snp++) {\n\n    // set to correct position\n    gblock->bgen.jumpto( snpinfo[ snpcount + snp ].offset );\n    gblock->bgen.read_variant( &chromosome, &position, &rsid, &alleles );\n\n    //sout << \"[\"<< chrom << \"]SNPid stored (\"<< snpinfo[snpcount+snp].chrom <<\") = \" << snpinfo[snpcount+snp].ID<< \"/ SNPIDread (\"<<chromosome<<\")= \" << rsid << endl; exit(EXIT_FAILURE);\n\n    assert(chrStrToInt(chromosome, params->nChrom) == chrom);\n    gblock->bgen.read_probs( &probs ) ;\n\n    ns = 0, index = 0, total = 0;\n    for( std::size_t i = 0; i < probs.size(); ++i ) {\n\n      // skip samples that were ignored from the analysis\n      if( filters->ind_ignore(i) ) continue;\n\n      ds = 0;\n      for( std::size_t j = 1; j < probs[i].size(); ++j ) ds += probs[i][j] * j;\n\n      if(ds != -3) {\n        ds = params->ref_first ? ds : (2 - ds); // if ref-first, no need to switch\n\n        if( filters->ind_in_analysis(index) ){\n            total += ds;\n            ns++;\n        }\n      }\n      gblock->Gmat(snp, index) = ds;\n      index++;\n    }\n\n    total /= ns;\n    if( params->alpha_prior != -1 ) gblock->snp_afs(snp, 0) = total / 2;\n\n      // impute missing\n    for (size_t i = 0; i < params->n_samples; ++i ) \n      mean_impute_g(gblock->Gmat(snp, i), total, filters->ind_in_analysis(i));\n\n  }\n\n}\n\n// step 1 BGEN faster file reading using OpenMP\nvoid readChunkFromBGENFileToG_fast(const int& bs, const int& chrom, const uint32_t& start, vector<snp> const& snpinfo, struct param const* params, struct in_files* files, struct geno_block* gblock, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, mstream& sout) {\n\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize(bs), outsize(bs);\n  vector<uint64> indices(bs);\n\n  snp_data_blocks.resize( bs );\n  for (int i = 0; i < bs; i++) indices[i] = snpinfo[start + i].offset;\n\n  readChunkFromBGEN(&files->geno_ifstream, insize, outsize, snp_data_blocks, indices);\n\n  // unpack data for each variant\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int isnp = 0; isnp < bs; isnp++) {\n\n    uint32_t const snpindex = start + isnp;\n\n    uint minploidy = 0, maxploidy = 0, phasing = 0, bits_prob = 0;\n    uint16_t numberOfAlleles = 0 ;\n    uint32_t nindivs = 0, index;\n    string tmp_buffer;\n    vector<uchar>* geno_block = &snp_data_blocks[isnp];\n\n    // set genotype data block\n    vector < uchar > geno_block_uncompressed;\n    geno_block_uncompressed.resize(outsize[isnp]);\n\n    // uncompress the block\n    bool compress_fail;\n    if(params->zlib_compress){ // using zlib\n      uLongf dest_size = outsize[isnp];\n      compress_fail = (uncompress( &(geno_block_uncompressed[0]), &dest_size, &((*geno_block)[0]), insize[isnp] - 4) != Z_OK) || (dest_size != outsize[isnp]);\n    } else { // using zstd\n      size_t const dest_size = ZSTD_decompress(&(geno_block_uncompressed[0]), outsize[isnp], &((*geno_block)[0]), insize[isnp] - 4) ;\n      compress_fail = (dest_size != outsize[isnp]);\n    }\n    // check it was successful\n    if( compress_fail )\n      throw \"failed to decompress genotype data block for variant: \" + snpinfo[ snpindex ].ID;\n\n    // stream to uncompressed block\n    uchar *buffer = &geno_block_uncompressed[0];\n    // sample size in file\n    std::memcpy(&nindivs, &(buffer[0]), 4);\n    assert( nindivs == filters->ind_ignore.size() );\n    buffer += 4;\n    // num alleles\n    std::memcpy(&numberOfAlleles, &(buffer[0]), 2);\n    assert( numberOfAlleles == 2 );\n    buffer += 2;\n    // ploidy\n    std::memcpy(&minploidy, &(buffer[0]), 1);\n    assert( minploidy == 2 );\n    buffer ++;\n    std::memcpy(&maxploidy, &(buffer[0]), 1);\n    assert( maxploidy == 2 );\n    buffer ++;\n\n    //to identify missing when getting dosages\n    vector < uchar > ploidy_n;\n    ploidy_n.resize( nindivs );\n    std::memcpy(&(ploidy_n[0]), &(buffer[0]), nindivs);\n    buffer += nindivs;\n\n    // phasing\n    std::memcpy(&phasing, &(buffer[0]), 1);\n    assert( phasing == 0 );\n    buffer++;\n\n    // bits per probability\n    std::memcpy(&bits_prob, &(buffer[0]), 1);\n    assert( bits_prob == 8 );\n    buffer++;\n\n    // get dosages \n    int ns = 0;\n    double prob0, prob1, prob2, total = 0;\n\n    // parse genotype probabilities block\n    index = 0;\n    for(size_t i = 0; i < nindivs; i++) {\n\n      // skip samples that were ignored from the analysis\n      if( filters->ind_ignore(i) ) {\n        buffer+=2;\n        continue;\n      }\n\n      if(ploidy_n[i] & 0x80) {\n        gblock->Gmat(isnp, index++) = -3;\n        buffer+=2;\n        continue;\n      }\n\n      prob0 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n      prob1 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n      prob2 = std::max( 1 - prob0 - prob1, 0.0);\n\n      if(params->ref_first) \n        gblock->Gmat(isnp, index) = prob1 + 2 * prob2;\n      else // switch allele0 to ALT\n        gblock->Gmat(isnp, index) = prob1 + 2 * prob0;\n\n      if( filters->ind_in_analysis(index) ){\n          total += gblock->Gmat(isnp, index);\n          ns++;\n      }\n      index++;\n    }\n    total /= ns;\n\n    if (params->alpha_prior != -1) gblock->snp_afs(isnp, 0) = total / 2;\n\n    // impute missing\n    for (size_t i = 0; i < params->n_samples; ++i ) \n      mean_impute_g(gblock->Gmat(isnp, i), total, filters->ind_in_analysis(i));\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n}\n\n// only for step 1\nvoid readChunkFromBedFileToG(const int& bs, const int& chrom, const uint32_t& snpcount, vector<snp> const& snpinfo, struct param const* params, struct in_files* files, struct geno_block* gblock, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, mstream& sout) {\n\n  int const nbl = files->bed_data_blocks.size();\n  uint32_t const nmax = filters->ind_ignore.size();\n\n  // allocate memory if needed\n  if( nbl < bs ){\n    files->bed_data_blocks.resize(bs);\n    for (int i = nbl; i < bs; i++)\n      files->bed_data_blocks[i].resize(files->bed_block_size);\n  }\n  // read in N/4 bytes from bed file for each snp\n  for(int j = 0; j < bs; j++) {\n    // set to correct position\n    jumpto_bed( snpinfo[snpcount + j].offset, files->bed_block_size, files->geno_ifstream);\n    files->geno_ifstream.read( reinterpret_cast<char *> (&files->bed_data_blocks[j][0]), files->bed_block_size);\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int j = 0; j < bs; j++) {\n\n    int hc, ns;\n    uint32_t i, index ;\n    double total;\n    ArrayXd geno4; // genotype values for 4 samples at a time\n\n    ns = 0, total = 0, i = 0, index = 0;\n\n    for (size_t byte_start = 0; byte_start < files->bed_block_size; byte_start++) {\n\n      geno4 = params->bed_lookup_table[ files->bed_data_blocks[j][byte_start] ];\n\n      for(int bit_start = 0; bit_start < 4; bit_start++, i++){\n\n        // skip remainder past N samples\n        if(i >= nmax) break;\n\n        // skip samples that were ignored from the analysis\n        if( filters->ind_ignore(i) ) continue;\n\n        hc = geno4(bit_start);\n        if(params->ref_first && (hc != -3)) hc = 2 - hc;\n        gblock->Gmat(j, index) = hc;\n\n        if( filters->ind_in_analysis(index) && (hc != -3) ){\n          total += hc;\n          ns++;\n        }\n        index++;\n      }\n    }\n    total /= ns;\n    if(params->alpha_prior != -1) gblock->snp_afs(j, 0) = total / 2;\n\n    // impute missing\n    for (size_t i = 0; i < params->n_samples; i++) \n      mean_impute_g(gblock->Gmat(j, i), total, filters->ind_in_analysis(i));\n\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n}\n\n\n// only for step 1\nvoid readChunkFromPGENFileToG(const int& bs, const uint32_t& snpcount, vector<snp> const& snpinfo, struct param const* params, struct geno_block* gblock, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, mstream& sout) {\n\n  ArrayXb oob_err = ArrayXb::Constant(bs, false);\n\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int j = 0; j < bs; j++) {\n\n    int thread_num = 0;\n#if defined(_OPENMP)\n    thread_num = omp_get_thread_num();\n#endif\n    //cerr << \"#\" << thread_num << endl;\n\n    double total;\n    ArrayXb keep_indices;\n    // G is MxN, but need to pass g as column vector\n    ArrayXd g (params->n_samples, 1);\n\n    // read genotype data\n    if( params->dosage_mode ){\n      gblock->pgr.Read(g.data(), params->n_samples, thread_num, snpinfo[snpcount+j].offset, 1);\n    } else\n      gblock->pgr.ReadHardcalls(g.data(), params->n_samples, thread_num, snpinfo[snpcount+j].offset, 1);\n\n    oob_err(j) = ((g < -3) || (g > 2)).any();\n    if(oob_err(j)) continue;\n\n    gblock->Gmat.row(j) = g.matrix().transpose();\n\n    keep_indices = filters->ind_in_analysis && (g != -3.0);\n    total = keep_indices.select(g,0).sum() / keep_indices.count();\n\n    if( params->alpha_prior != -1) gblock->snp_afs(j, 0) = total / 2;\n\n    // impute missing\n    for (size_t i = 0; i < params->n_samples; i++) \n      mean_impute_g(gblock->Gmat(j, i), total, filters->ind_in_analysis(i));\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  if(oob_err.any()) \n    throw \"there is a variant in the block that has a value not in [0,2] or missing\";\n\n}\n\n\n// check if uses Layout 2 (v1.2/1.3) & check for first SNP if precision for probabilities is 8 bits\nvoid check_bgen(const string& bgen_file, string const& file_type, bool& zlib_compress, bool& streamBGEN, uint& BGENbits, int const& nChrom){\n\n  // for non-bgen file input, skip check\n  if(file_type != \"bgen\") return;\n\n  BgenParser bgen_ck;\n  bgen_ck.open( bgen_file ) ;\n  bool layoutV2 = bgen_ck.get_layout();\n  zlib_compress = bgen_ck.get_compression();\n  if( !layoutV2 ){\n    streamBGEN = false;\n    return;\n  }\n  uint64 first_snp = bgen_ck.get_position();\n\n  uint minploidy = 0, maxploidy = 0, phasing = 0, bits_prob = 0;\n  uint16_t SNPID_size = 0, RSID_size = 0, chromosome_size = 0 , numberOfAlleles = 0 ;\n  uint32_t position = 0, allele_size = 0, nindivs = 0;\n  string allele, tmp_buffer;\n\n  // check bits only for first snp\n  //cout << endl << \"Snp1 pos:\" << first_snp << endl;\n  ifstream bfile;\n  bfile.open( bgen_file, ios::in | ios::binary );\n  bfile.seekg( first_snp );\n  // snpid\n  bfile.read( reinterpret_cast<char *> (&SNPID_size), 2 );\n  tmp_buffer.resize(SNPID_size);\n  bfile.read( reinterpret_cast<char *> (&tmp_buffer[0]), SNPID_size );\n  // rsid\n  bfile.read( reinterpret_cast<char *> (&RSID_size), 2) ;\n  tmp_buffer.resize(RSID_size);\n  bfile.read( reinterpret_cast<char *> (&tmp_buffer[0]), RSID_size );\n  //cout << \"RSID:\" << tmp_buffer ;\n  // chromosome\n  bfile.read( reinterpret_cast<char *> (&chromosome_size), 2 );\n  tmp_buffer.resize(chromosome_size);\n  bfile.read( reinterpret_cast<char *> (&tmp_buffer[0]), chromosome_size );\n  assert( chrStrToInt(tmp_buffer , nChrom) > 0 );\n  //cout << \",CHR:\" << tmp_buffer ;\n  // position\n  bfile.read( reinterpret_cast<char *> (&position), 4 );\n  //cout << \",POS:\" << position << endl;\n  // number of alleles\n  bfile.read( reinterpret_cast<char *> (&numberOfAlleles), 2 );\n  assert( numberOfAlleles == 2 ); // only diploid\n  //cout << \",Nalleles:\" << numberOfAlleles ;\n  // alleles\n  bfile.read( reinterpret_cast<char *> (&allele_size), 4 );\n  tmp_buffer.resize(allele_size);\n  bfile.read( reinterpret_cast<char *> (&tmp_buffer[0]), allele_size );\n  //cout << \",A0:\"<<tmp_buffer ;\n  bfile.read( reinterpret_cast<char *> (&allele_size), 4 );\n  tmp_buffer.resize(allele_size);\n  bfile.read( reinterpret_cast<char *> (&tmp_buffer[0]), allele_size );\n  //cout << \",A1:\"<<tmp_buffer ;\n\n  // set genotype data block\n  vector < uchar > geno_block, geno_block_uncompressed;\n  uint32_t size_block = 0, size_block_post_compression = 0;\n  bfile.read( reinterpret_cast<char *> (&size_block), 4 );\n  bfile.read( reinterpret_cast<char *> (&size_block_post_compression), 4);\n  //cout << \",block size:\"<<size_block  << \",block size post compress:\" << size_block_post_compression << endl;\n  geno_block.resize(size_block - 4);\n  geno_block_uncompressed.resize(size_block_post_compression);\n  bfile.read( reinterpret_cast<char *> (&geno_block[0]), size_block - 4);\n\n  // uncompress the block\n  //cout << \"zlib:\"<< std::boolalpha << zlib_compress ;\n  if(zlib_compress){ // using zlib\n    uLongf dest_size = size_block_post_compression;\n    if( (uncompress( &(geno_block_uncompressed[0]), &dest_size, &geno_block[0], size_block - 4) != Z_OK) || (dest_size != size_block_post_compression) ){\n      streamBGEN = false;\n      return;\n    }\n  } else { // using zstd\n    size_t const dest_size = ZSTD_decompress(&(geno_block_uncompressed[0]), size_block_post_compression, &geno_block[0], size_block - 4) ;\n    //cerr << size_block_post_compression << \" \" << dest_size << \" \" << size_block - 4 <<endl;\n    if( dest_size != size_block_post_compression ){\n      streamBGEN = false;\n      return;\n    }\n  }\n\n  // stream to uncompressed block\n  uchar *buffer = &geno_block_uncompressed[0];\n  // sample size\n  std::memcpy(&nindivs, &(buffer[0]), 4);\n  //cout << \"N:\"<< nindivs ;\n  assert( ((int) nindivs) == bgen_ck.number_of_samples() );\n  buffer += 4;\n  // num alleles\n  std::memcpy(&numberOfAlleles, &(buffer[0]), 2);\n  //cout << \",allele:\"<< numberOfAlleles ;\n  assert( numberOfAlleles == 2 );\n  buffer += 2;\n  // ploidy\n  std::memcpy(&minploidy, &(buffer[0]), 1);\n  //cout << \",minP:\"<< minploidy ;\n  assert( minploidy == 2 );\n  buffer ++;\n  std::memcpy(&maxploidy, &(buffer[0]), 1);\n  //cout << \",maxP:\"<< maxploidy ;\n  assert( maxploidy == 2 );\n  buffer ++;\n\n  /* //to identify missing when getting dosages\n     vector < uchar > ploidy_n;\n     ploidy_n.resize( nindivs );\n     std::memcpy(&(ploidy_n[0]), &(buffer[0]), nindivs);\n     */\n  buffer += nindivs;\n\n  // phasing\n  std::memcpy(&phasing, &(buffer[0]), 1);\n  //cout << \",phasing:\"<< phasing ;\n  assert( phasing == 0 ); // must be unphased\n  buffer ++;\n\n  // bits per probability\n  std::memcpy(&bits_prob, &(buffer[0]), 1);\n  //cout << \",bits:\"<< bits_prob ;\n  BGENbits = bits_prob;;\n  if( bits_prob != 8 ){\n    streamBGEN = false;\n    return;\n  }\n\n  streamBGEN = true;\n  bfile.close();\n}\n\n\n// for step 2 (using BGEN library API)\nvoid readChunkFromBGENFileToG(vector<uint64> const& indices, const int& chrom, vector<snp> const& snpinfo, struct param const* params, Ref<MatrixXd> Gmat, BgenParser& bgen, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, vector<variant_block> &all_snps_info, mstream& sout) {\n\n  int const bs = indices.size();\n  int lval, ncarriers, nmales;\n  uint32_t index ;\n  double ds, total, mac, mval, ival, info_num, sum_pos;\n  std::string chromosome, rsid;\n  uint32_t position ;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  for(int snp = 0; snp < bs; snp++) {\n\n    variant_block* snp_data = &(all_snps_info[snp]);\n    struct snp const* snp_info = &(snpinfo[indices[snp]]);\n    MapArXd Geno (Gmat.col(snp).data(), params->n_samples, 1);\n\n    // reset variant info\n    Geno = 0;\n    prep_snp_stats(snp_data, params);\n\n    index = 0, ncarriers = 0, nmales = 0;\n    total = 0, mac = 0, info_num = 0;\n    bool non_par = in_non_par(chrom, snp_info->physpos, params);\n\n    // set to correct position\n    bgen.jumpto( snp_info->offset );\n    bgen.read_variant( &chromosome, &position, &rsid, &alleles );\n    bgen.read_probs( &probs ) ;\n    //sout << \"[\"<< chrom << \"]SNPid stored (\"<< snp_info->chrom <<\") = \" << snp_info->ID<< \"/ SNPIDread (\"<<chromosome<<\")= \" << rsid << endl; exit(-1);\n    assert( snp_info->ID == rsid );\n    //assert(chrStrToInt(chromosome, params->nChrom) == chrom);\n\n    for( std::size_t i = 0; i < probs.size(); ++i ) {\n\n      // skip samples that were ignored from the analysis\n      if( filters->ind_ignore(i) ) continue;\n\n      ds = 0;\n      //cerr << \"index \" << index << \" : \" << probs[i][0] << \" / \" << probs[i][1] << \" / \" << probs[i][2] << \"\\n\";\n      for( std::size_t j = 1; j < probs[i].size(); ++j ) ds += probs[i][j] * j;\n\n      if(ds != -3) {\n        ds = params->ref_first ? ds : (2 - ds); // if ref-first, no need to switch\n\n        if( filters->ind_in_analysis(index) ){\n          // compute MAC using 0.5*g for males for variants on sex chr (males coded as diploid)\n          // sex is 1 for males and 0 o.w.\n          lval = 0, mval = ds;\n          if(params->test_mode && non_par) {\n            lval = (params->sex(index) == 1);\n            mval = ds * 0.5 * (2 - lval);\n          }\n          \n          if( params->ref_first )\n            ival = 4 * probs[i][2] + probs[i][1] - ds * ds;\n          else\n            ival = 4 * probs[i][0] + probs[i][1] - ds * ds;\n\n          // check if carrier\n          if(params->build_mask && params->singleton_carriers && (ds >= 0.5)) ncarriers ++;\n\n          if (params->test_mode && non_par && params->skip_dosage_comp && lval){\n            ds /= 2.0; // divide by 2 for males in non-par X\n            if (params->af_cc && masked_indivs.row(index).any()) // n_males_cases\n              snp_data->ns_case_adj += masked_indivs.row(index).array().cast<int>() * phenotypes_raw.row(index).array().cast<int>();\n          }\n\n          total += ds;\n          mac += mval;\n          nmales += lval;\n          info_num += ival;\n          snp_data->ns1++;\n\n          // counts by trait\n          if(filters->has_missing(index)) update_trait_counts(index, ds, mval, lval, ival, snp_data, masked_indivs);\n\n          /* // get genotype counts (convert to hardcall)\n          if( params->htp_out ) {\n            // counts for males are 0/2\n            if(params->test_mode && non_par && (lval>0)) \n              hc_val = (ds < 1 ? 0 : 2);\n            else\n              hc_val = (int) (ds + 0.5); // round to nearest integer (0/1/2)\n            update_genocounts(params->trait_mode==1, index, hc_val, snp_data->genocounts, masked_indivs, phenotypes_raw);\n          } else*/\n          if( params->af_cc )\n            update_af_cc(index, ds, snp_data, masked_indivs, phenotypes_raw);\n          if (!params->split_by_pheno){\n            if(ds >= 1.5) snp_data->n_aa++;\n            else if(ds < 0.5) snp_data->n_rr++;\n            else if(non_par && lval && !(params->test_mode && params->skip_dosage_comp)){\n              if (ds < 1) snp_data->n_rr++;\n              else snp_data->n_aa++;\n            }\n          }\n        }\n      }\n\n      Geno(index) = ds;\n      index++;\n    }\n\n    // check MAC\n    if( params->test_mode){\n      compute_mac(!non_par, mac, total, nmales, ncarriers, snp_info->MAC_fail_if_checked, snp_info->apply_diff_MAC_filter, snp_data, params);\n      if(snp_data->ignored) continue;\n    }\n\n    //sout << \"SNP#\" << snp + 1 << \"AC=\" << mac << endl;\n    if (non_par && params->skip_dosage_comp) \n      snp_data->ns1_adj = nmales;\n    compute_aaf_info(total, info_num, non_par, snp_data, params);\n\n    if(params->test_mode && params->setMinINFO && ( snp_data->info1 < params->min_INFO) ) {\n      snp_data->ignored = true; continue;\n    }\n\n    if( params->htp_out ) \n      compute_genocounts(params->trait_mode==1 || params->trait_mode==3, non_par, mac, Geno, snp_data->genocounts, params->sex, filters->case_control_indices);\n\n    // for SPA switch effect allele to minor allele\n    flip_geno(total, Geno, snp_data, params);\n\n    // apply dominant/recessive encoding & recompute mean\n    if(!params->build_mask && (params->test_type > 0)){\n      index = 0;\n      for( std::size_t i = 0; i < probs.size(); ++i ) {\n        // skip samples that were ignored from the analysis\n        if( filters->ind_ignore(i) ) continue;\n\n        if( filters->ind_in_analysis(index) && (Geno(index) != -3) ){\n          if(params->test_type == 1){ //dominant\n            Geno(index) = params->ref_first ? (probs[i][1] + probs[i][2]) : (probs[i][0] + probs[i][1]);\n          } else if(params->test_type == 2){ //recessive\n            Geno(index) = params->ref_first ? probs[i][2] : probs[i][0];\n          }\n        }\n        index++;\n      }\n\n      sum_pos = ((Geno != -3) && filters->ind_in_analysis).select(Geno, 0).sum();\n      if((params->test_type == 2) && (sum_pos < params->minHOMs)) { // filter on homALT carriers\n        snp_data->ignored = true;\n        continue;\n      }\n\n      total = sum_pos / snp_data->ns1;\n      if(total < params->numtol) {\n        snp_data->ignored = true;\n        continue;\n      }\n    }\n\n    // impute missing\n    if(!params->build_mask)\n      mean_impute_g(total, Geno, filters->ind_in_analysis);\n  }\n\n}\n\n// for step 2 (read in raw data)\nvoid readChunkFromBGEN(std::istream* bfile, vector<uint32_t>& insize, vector<uint32_t>& outsize, vector<vector<uchar>>& snp_data_blocks, vector<uint64>& indices){\n\n  uint16_t SNPID_size = 0, RSID_size = 0, chromosome_size = 0 , numberOfAlleles = 0 ;\n  uint32_t position = 0, allele_size = 0;\n  int n_snps = indices.size();\n  string tmp_buffer;\n\n  // extract genotype data blocks single-threaded\n  for(int isnp = 0; isnp < n_snps; isnp++) {\n    //if(isnp % 100 == 0) cerr << \"At #\" << isnp+1 << endl;\n\n    vector<uchar>* geno_block = &(snp_data_blocks[isnp]);\n    uint32_t* size1 = &insize[isnp];\n    uint32_t* size2 = &outsize[isnp];\n\n    bfile->seekg( indices[isnp] );\n\n    // snpid\n    bfile->read( reinterpret_cast<char *> (&SNPID_size), 2 );\n    tmp_buffer.resize(SNPID_size);\n    bfile->read( reinterpret_cast<char *> (&tmp_buffer[0]), SNPID_size );\n    // rsid\n    bfile->read( reinterpret_cast<char *> (&RSID_size), 2) ;\n    tmp_buffer.resize(RSID_size);\n    bfile->read( reinterpret_cast<char *> (&tmp_buffer[0]), RSID_size );\n    // chromosome\n    bfile->read( reinterpret_cast<char *> (&chromosome_size), 2 );\n    tmp_buffer.resize(chromosome_size);\n    bfile->read( reinterpret_cast<char *> (&tmp_buffer[0]), chromosome_size );\n    // position\n    bfile->read( reinterpret_cast<char *> (&position), 4 );\n    // number of alleles\n    bfile->read( reinterpret_cast<char *> (&numberOfAlleles), 2 );\n    // alleles\n    bfile->read( reinterpret_cast<char *> (&allele_size), 4 );\n    tmp_buffer.resize(allele_size);\n    bfile->read( reinterpret_cast<char *> (&tmp_buffer[0]), allele_size );\n    bfile->read( reinterpret_cast<char *> (&allele_size), 4 );\n    tmp_buffer.resize(allele_size);\n    bfile->read( reinterpret_cast<char *> (&tmp_buffer[0]), allele_size );\n\n    // set genotype data block\n    bfile->read( reinterpret_cast<char *> (size1), 4 );\n    bfile->read( reinterpret_cast<char *> (size2), 4);\n    geno_block->resize(*size1 - 4);\n    bfile->read( reinterpret_cast<char *> (&((*geno_block)[0])), *size1 - 4);\n\n  }\n\n}\n\nvoid parseSNP(const int& isnp, const int &chrom, vector<uchar>* geno_block, const uint32_t& insize, const uint32_t& outsize, struct param const* params, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, const snp* infosnp, struct geno_block* gblock, variant_block* snp_data, mstream& sout){\n\n  if( ((params->file_type == \"bgen\") && !params->streamBGEN) || params->file_type == \"pgen\")\n    return;\n\n  if(params->file_type == \"bgen\") // uncompress and extract the dosages\n    parseSnpfromBGEN(isnp, chrom, geno_block, insize, outsize, params,filters, masked_indivs, phenotypes_raw, infosnp, gblock, snp_data, sout);\n  else if(params->file_type == \"bed\") // extract hardcalls\n    parseSnpfromBed(isnp, chrom, *geno_block, params, filters, masked_indivs, phenotypes_raw, infosnp, gblock, snp_data);\n\n}\n\n\nvoid parseSnpfromBGEN(const int& isnp, const int &chrom, vector<uchar>* geno_block, const uint32_t& insize, const uint32_t& outsize, struct param const* params, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, const snp* infosnp, struct geno_block* gblock, variant_block* snp_data, mstream& sout){\n\n  uint minploidy = 0, maxploidy = 0, phasing = 0, bits_prob = 0;\n  uint16_t numberOfAlleles = 0 ;\n  uint32_t nindivs = 0;\n  uint32_t index;\n  string tmp_buffer;\n\n  MapArXd Geno (gblock->Gmat.col(isnp).data(), params->n_samples, 1);\n  Geno = 0;\n  // reset variant info\n  prep_snp_stats(snp_data, params);\n\n  // set genotype data block\n  vector < uchar > geno_block_uncompressed;\n  geno_block_uncompressed.resize(outsize);\n\n  // uncompress the block\n  bool compress_fail;\n  if(params->zlib_compress){ // using zlib\n    uLongf dest_size = outsize;\n    compress_fail = (uncompress( &(geno_block_uncompressed[0]), &dest_size, &((*geno_block)[0]), insize - 4) != Z_OK) || (dest_size != outsize);\n  } else { // using zstd\n    size_t const dest_size = ZSTD_decompress(&(geno_block_uncompressed[0]), outsize, &((*geno_block)[0]), insize - 4) ;\n    //cerr << outsize << \" \" << dest_size << \" \" << insize - 4 << endl;\n    compress_fail = (dest_size != outsize);\n  }\n  // check it was successful\n  if( compress_fail )\n    throw \"failed to decompress genotype data block for variant: \" + infosnp->ID;\n\n  // stream to uncompressed block\n  uchar *buffer = &geno_block_uncompressed[0];\n  // sample size in file\n  std::memcpy(&nindivs, &(buffer[0]), 4);\n  assert( nindivs == filters->ind_ignore.size() );\n  buffer += 4;\n  // num alleles\n  std::memcpy(&numberOfAlleles, &(buffer[0]), 2);\n  assert( numberOfAlleles == 2 );\n  buffer += 2;\n  // ploidy\n  std::memcpy(&minploidy, &(buffer[0]), 1);\n  assert( minploidy == 2 );\n  buffer ++;\n  std::memcpy(&maxploidy, &(buffer[0]), 1);\n  assert( maxploidy == 2 );\n  buffer ++;\n\n  //to identify missing when getting dosages\n  vector < uchar > ploidy_n;\n  ploidy_n.resize( nindivs );\n  std::memcpy(&(ploidy_n[0]), &(buffer[0]), nindivs);\n  buffer += nindivs;\n\n  // phasing\n  std::memcpy(&phasing, &(buffer[0]), 1);\n  buffer++;\n\n  // bits per probability\n  std::memcpy(&bits_prob, &(buffer[0]), 1);\n  buffer++;\n\n  // get dosages (can compute mean as going along (and identify non-zero entries if SPA is used)\n  bool missing;\n  bool non_par = in_non_par(chrom, infosnp->physpos, params);\n  int lval, ncarriers = 0, nmales = 0;\n  double prob0, prob1, prob2, total = 0, mac = 0, mval, ival, info_num = 0, sum_pos;\n\n  // parse genotype probabilities block\n  index = 0;\n  for(size_t i = 0; i < nindivs; i++) {\n\n    // skip samples that were ignored from the analysis\n    if( filters->ind_ignore(i) ) {\n      buffer+=2;\n      continue;\n    }\n\n    missing = ((ploidy_n[i]) & 0x80);\n    if(missing) {\n      // bug fix (with imputed data this case should not occur)\n      Geno(index++) = -3;\n      buffer+=2;\n      continue;\n    }\n\n    prob0 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n    prob1 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n    prob2 = std::max( 1 - prob0 - prob1, 0.0);\n    //cerr << \"index \" << index << \" : \" << prob0 << \" / \" << prob1 << \" / \" << prob2 << \"\\n\";\n\n    if(params->ref_first) \n      Geno(index) = prob1 + 2 * prob2;\n    else \n      Geno(index) = prob1 + 2 * prob0; // switch allele0 to ALT\n\n    if( filters->ind_in_analysis(index) ){\n      // compute MAC using 0.5*g for males for variants on sex chr (males coded as diploid)\n      // sex is 1 for males and 0 o.w.\n      lval = 0, mval = Geno(index);\n      if(params->test_mode && non_par) {\n        lval = (params->sex(index) == 1);\n        mval =  Geno(index) * 0.5 * (2 - lval);\n      }\n\n      if( params->ref_first )\n        ival = 4 * prob2 + prob1 - Geno(index) * Geno(index);\n      else\n        ival = 4 * prob0 + prob1 - Geno(index) * Geno(index);\n\n      // check if carrier\n      if(params->build_mask && params->singleton_carriers) ncarriers += (int) (Geno(index) >= 0.5); // round dosages\n\n      if (params->test_mode && non_par && params->skip_dosage_comp && lval){\n        Geno(index) /= 2.0; // divide by 2 for males in non-par X\n        if (params->af_cc && masked_indivs.row(index).any()) // n_males_cases\n          snp_data->ns_case_adj += masked_indivs.row(index).array().cast<int>() * phenotypes_raw.row(index).array().cast<int>();\n      }\n\n      total += Geno(index);\n      mac += mval;\n      nmales += lval;\n      info_num += ival;\n      snp_data->ns1++;\n\n      // counts by trait\n      if(filters->has_missing(index)) update_trait_counts(index, Geno(index), mval, lval, ival, snp_data, masked_indivs);\n\n      /* // get genotype counts (convert to hardcall)\n      if( params->htp_out ) {\n        // counts for males are 0/2\n        if(params->test_mode && non_par && (lval>0)) \n          hc_val = (Geno(index) < 1 ? 0 : 2);\n        else\n          hc_val = (int) (Geno(index) + 0.5); // round to nearest integer 0/1/2\n        update_genocounts(params->trait_mode==1, index, hc_val, snp_data->genocounts, masked_indivs, phenotypes_raw);\n      } else*/ \n      if( params->af_cc )\n        update_af_cc(index, Geno(index), snp_data, masked_indivs, phenotypes_raw);\n      if (!params->split_by_pheno){\n        if(Geno(index) >= 1.5) snp_data->n_aa++;\n        else if(Geno(index) < 0.5) snp_data->n_rr++;\n        else if(non_par && lval && !(params->test_mode && params->skip_dosage_comp)){\n          if (Geno(index) < 1) snp_data->n_rr++;\n          else snp_data->n_aa++;\n        }\n      }\n\n    }\n    index++;\n  }\n\n  // check MAC\n  if( params->test_mode){\n    compute_mac(!non_par, mac, total, nmales, ncarriers, infosnp->MAC_fail_if_checked, infosnp->apply_diff_MAC_filter, snp_data, params);\n    if(snp_data->ignored) return;\n  }\n\n  if (non_par && params->skip_dosage_comp) \n    snp_data->ns1_adj = nmales;\n  compute_aaf_info(total, info_num, non_par, snp_data, params);\n\n  // check INFO score\n  if( params->setMinINFO && ( snp_data->info1 < params->min_INFO) ) {\n    snp_data->ignored = true;\n    return;\n  }\n\n  if( params->htp_out ) \n    compute_genocounts(params->trait_mode==1 || params->trait_mode==3, non_par, mac, Geno, snp_data->genocounts, params->sex, filters->case_control_indices);\n\n  // for SPA switch effect allele to minor allele\n  flip_geno(total, Geno, snp_data, params);\n\n  // apply dominant/recessive encoding & recompute mean\n  if(!params->build_mask && (params->test_type > 0)){\n    // go over data block again\n    buffer -= 2 * nindivs;\n    index = 0;\n    for(size_t i = 0; i < nindivs; i++) {\n\n      // skip samples that were ignored from the analysis\n      if( filters->ind_ignore(i) ) {\n        buffer+=2;\n        continue;\n      }\n\n      missing = ((ploidy_n[i]) & 0x80);\n      if(missing) {\n        index++; // bug fix\n        buffer+=2;\n        continue;\n      }\n      prob0 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n      prob1 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n      prob2 = std::max( 1 - prob0 - prob1, 0.0);\n\n      if(filters->ind_in_analysis(index)){\n        if(params->test_type == 1){ //dominant\n          Geno(index) = params->ref_first ? (prob1 + prob2) : (prob0 + prob1);\n        } else if(params->test_type == 2){ //recessive\n          Geno(index) = params->ref_first ? prob2 : prob0;\n        }\n      }\n      index++;\n    }\n    sum_pos = ((Geno != -3) && filters->ind_in_analysis).select(Geno, 0).sum();\n    if((params->test_type == 2) && (sum_pos < params->minHOMs)) { // filter on homALT carriers\n      snp_data->ignored = true;\n      return;\n    }\n    \n    total = sum_pos / snp_data->ns1;\n    if(total < params->numtol) {\n      snp_data->ignored = true;\n      return;\n    }\n  }\n\n  // impute missing\n  if(!params->build_mask)\n    mean_impute_g(total, Geno, filters->ind_in_analysis);\n\n  return;\n}\n\n\nvoid parseSnpfromBed(const int& isnp, const int &chrom, const vector<uchar>& bed_data, struct param const* params, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, const snp* infosnp, struct geno_block* gblock, variant_block* snp_data){\n\n  int hc, lval, ncarriers = 0, nmales;\n  bool non_par = in_non_par(chrom, infosnp->physpos, params);\n  uint32_t const nmax = filters->ind_ignore.size();\n  uint32_t i, index ;\n  double total, mac, mval, sum_pos;\n  ArrayXd geno4; // genotype values for 4 samples at a time\n\n  MapArXd Geno (gblock->Gmat.col(isnp).data(), params->n_samples, 1);\n  Geno = ArrayXd::Zero(params->n_samples);\n  // reset variant info\n  prep_snp_stats(snp_data, params);\n\n  total = 0, mac = 0, i = 0, index = 0, nmales = 0;\n  for (size_t byte_start = 0; byte_start < bed_data.size(); byte_start++) {\n\n    geno4 = params->bed_lookup_table[ bed_data[byte_start] ];\n\n    for(int bit_start = 0; bit_start < 4; bit_start++, i++){\n\n      // skip remainder past N samples\n      if(i >= nmax) break;\n\n      // skip samples that were ignored from the analysis\n      if( filters->ind_ignore(i) ) continue;\n\n      hc = geno4(bit_start);\n      if(params->ref_first && (hc != -3)) hc = 2 - hc;\n      Geno(index) = hc;\n\n      if( filters->ind_in_analysis(index) && (Geno(index) != -3) ){\n        // compute MAC using 0.5*g for males for variants on sex chr (males coded as diploid)\n        // sex is 1 for males and 0 o.w.\n        lval = 0, mval = Geno(index);\n        if(params->test_mode && non_par) {\n          lval = (params->sex(index) == 1);\n          mval = Geno(index) * 0.5 * (2 - lval);\n          // check if not 0/2\n          if( (lval == 1) && (Geno(index) == 1) ) cerr << \"WARNING: genotype is 1 for a male on chrX at \" << infosnp->ID << \" (males should coded as diploid).\";\n        }\n\n        // check if carrier\n        if(params->build_mask && params->singleton_carriers) ncarriers += (int) (Geno(index) >= 1);\n\n        if (params->test_mode && non_par && params->skip_dosage_comp && lval){\n          Geno(index) /= 2.0; // divide by 2 for males in non-par X\n          if (params->af_cc && masked_indivs.row(index).any()) // n_males_cases\n            snp_data->ns_case_adj += masked_indivs.row(index).array().cast<int>() * phenotypes_raw.row(index).array().cast<int>();\n        }\n\n        total += Geno(index);\n        mac += mval;\n        nmales += lval;\n        snp_data->ns1++;\n\n        // counts by trait\n        if(filters->has_missing(index)) update_trait_counts(index, Geno(index), mval, lval, 0, snp_data, masked_indivs);\n\n        /* // get genotype counts\n        if( params->htp_out ) \n          update_genocounts(params->trait_mode==1, index, hc, snp_data->genocounts, masked_indivs, phenotypes_raw);\n        else */\n        if( params->af_cc )\n          update_af_cc(index, Geno(index), snp_data, masked_indivs, phenotypes_raw);\n        if (!params->split_by_pheno){\n          if(Geno(index) >= 1.5) snp_data->n_aa++;\n          else if(Geno(index) < 0.5) snp_data->n_rr++;\n          else if(non_par && lval && !(params->test_mode && params->skip_dosage_comp)){\n            if (Geno(index) < 1) snp_data->n_rr++;\n            else snp_data->n_aa++;\n          }\n        }\n\n      }\n      index++;\n    }\n  }\n\n  // check MAC\n  if( params->test_mode){\n    compute_mac(!non_par, mac, total, nmales, ncarriers, infosnp->MAC_fail_if_checked, infosnp->apply_diff_MAC_filter, snp_data, params);\n    if(snp_data->ignored) return;\n  }\n\n  if (non_par && params->skip_dosage_comp) \n    snp_data->ns1_adj = nmales;\n  compute_aaf_info(total, 0, non_par, snp_data, params);\n\n  if( params->htp_out ) \n    compute_genocounts(params->trait_mode==1 || params->trait_mode==3, non_par, mac, Geno, snp_data->genocounts, params->sex, filters->case_control_indices);\n\n  // for SPA switch effect allele to minor allele\n  flip_geno(total, Geno, snp_data, params);\n\n  // apply dominant/recessive encoding & recompute mean\n  if(!params->build_mask && (params->test_type > 0)){\n    if(params->test_type == 1){ //dominant\n      Geno = (Geno == 2).select(1, Geno);\n    } else if(params->test_type == 2){ //recessive\n      Geno = (Geno >= 1).select(Geno - 1, Geno);\n    }\n\n    sum_pos = ((Geno != -3) && filters->ind_in_analysis).select(Geno, 0).sum();\n    if((params->test_type == 2) && (sum_pos < params->minHOMs)) { // filter on homALT carriers\n      snp_data->ignored = true;\n      return;\n    }\n\n    total = sum_pos / snp_data->ns1;\n    if(total < params->numtol) {\n      snp_data->ignored = true;\n      return;\n    }\n  }\n\n  // impute missing\n  if(!params->build_mask)\n    mean_impute_g(total, Geno, filters->ind_in_analysis);\n\n}\n\n\n// step 2\nvoid readChunkFromPGENFileToG(vector<uint64> const& indices, const int &chrom, struct param const* params, struct filter const* filters, Ref<MatrixXd> Gmat, PgenReader& pgr, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& phenotypes_raw, vector<snp> const& snpinfo, vector<variant_block> &all_snps_info){\n\n  int const bs = indices.size();\n  ArrayXb oob_err = ArrayXb::Constant(bs, false), het_male_X = ArrayXb::Constant(bs, false);\n\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int j = 0; j < bs; j++) {\n\n    int thread_num = 0;\n#if defined(_OPENMP)\n    thread_num = omp_get_thread_num();\n#endif\n\n    int hc, cur_index, lval, nmales, ncarriers = 0;\n    double total, mac, mval, ival, eij2 = 0, sum_pos;\n    ArrayXb keep_index;\n\n    variant_block* snp_data = &(all_snps_info[j]);\n    struct snp const* snp_info = &(snpinfo[ indices[j] ]);\n    MapArXd Geno (Gmat.col(j).data(), params->n_samples, 1);\n\n    // reset variant info\n    prep_snp_stats(snp_data, params);\n\n    mac = 0, nmales = 0;\n    bool non_par = in_non_par(chrom, snp_info->physpos, params);\n    if( params->dosage_mode ) eij2 = 0;\n\n    // read genotype data\n    cur_index = snp_info->offset;\n    if( params->dosage_mode )\n      pgr.Read(Geno.data(), Geno.size(), thread_num, cur_index, 1);\n    else\n      pgr.ReadHardcalls(Geno.data(), Geno.size(), thread_num, cur_index, 1);\n\n    oob_err(j) = ((Geno < -3) || (Geno > 2)).any();\n    if(oob_err(j)) continue;\n\n    keep_index = filters->ind_in_analysis && (Geno != -3.0);\n    total = keep_index.select(Geno,0).sum();\n    snp_data->ns1 = keep_index.count();\n    if(params->test_mode && !params->build_mask) snp_data->n_zero = filters->ind_in_analysis.size() - params->n_samples;\n    //cerr << \"ID: \" << snp_info->ID << \"\\nG bounds: \" << \n    //  (Geno * keep_index.cast<double>()).minCoeff() << \" - \" << (Geno * keep_index.cast<double>()).maxCoeff() << \"\\n\\n\";\n\n    for (int index = 0; index < filters->ind_in_analysis.size(); index++) {\n\n      if( keep_index(index) ){\n        // compute MAC using 0.5*g for males for variants on sex chr non-PAR (males coded as diploid)\n        // sex is 1 for males and 0 o.w.\n        ival = 0, lval = 0, mval = Geno(index);\n        //cerr << \"index (\" << params->sex(index)<<\") \" << index << \" : \" << mval << \"\\n\";\n        if(params->test_mode){\n          if(!params->build_mask && (mval == 0)) snp_data->n_zero++;\n          if (mval >= 0.5) ncarriers++;\n\n          if(non_par) {\n            lval = (params->sex(index) == 1);\n            mval *= 0.5 * (2 - lval);\n            // check if not 0/2\n            if( !params->dosage_mode && (lval == 1) && (Geno(index) == 1) )\n              het_male_X(j) = true;\n          }\n        }\n\n        if( params->dosage_mode ) ival = Geno(index) * Geno(index);\n\n        if (params->test_mode && non_par && params->skip_dosage_comp && lval){\n          Geno(index) /= 2.0; // divide by 2 for males in non-par X\n          if (params->af_cc && masked_indivs.row(index).any()) // n_males_cases\n            snp_data->ns_case_adj += masked_indivs.row(index).array().cast<int>() * phenotypes_raw.row(index).array().cast<int>();\n        }\n\n        mac += mval;\n        nmales += lval;\n        eij2 += ival;\n\n        // counts by trait\n        if(filters->has_missing(index)) update_trait_counts(index, Geno(index), mval, lval, ival, snp_data, masked_indivs);\n\n       /* // get genotype counts\n        if( params->htp_out ) {\n          // counts for males are 0/2\n          if(params->test_mode && non_par && (lval>0)) \n            hc = (Geno(index) < 1 ? 0 : 2);\n          else\n            hc = (int) (Geno(index) + 0.5); // round to nearest integer 0/1/2\n          update_genocounts(params->trait_mode==1, index, hc, snp_data->genocounts, masked_indivs, phenotypes_raw);\n        } else */\n        if( params->af_cc )\n        update_af_cc(index, Geno(index), snp_data, masked_indivs, phenotypes_raw);\n        if (!params->split_by_pheno){\n          if(Geno(index) >= 1.5) snp_data->n_aa++;\n          else if(Geno(index) < 0.5) snp_data->n_rr++;\n          else if(non_par && lval && !(params->test_mode && params->skip_dosage_comp)){\n            if (Geno(index) < 1) snp_data->n_rr++;\n            else snp_data->n_aa++;\n          }\n        }\n\n      }\n    }\n\n    // check MAC\n    if( params->test_mode){\n      compute_mac(!non_par, mac, total, nmales, ncarriers, snp_info->MAC_fail_if_checked, snp_info->apply_diff_MAC_filter, snp_data, params);\n      if(snp_data->ignored) continue;\n    }\n\n    if (non_par && params->skip_dosage_comp) {\n      total = keep_index.select(Geno, 0).sum();\n      snp_data->ns1_adj = nmales;\n    }\n    compute_aaf_info(total, eij2, non_par, snp_data, params);\n\n    // check INFO score\n    if( params->dosage_mode && params->setMinINFO && ( snp_data->info1 < params->min_INFO) ) {\n      snp_data->ignored = true; continue;\n    }\n\n    if( params->htp_out )\n      compute_genocounts(params->trait_mode==1 || params->trait_mode==3, non_par, mac, Geno, snp_data->genocounts, params->sex, filters->case_control_indices);\n    \n    // for SPA switch effect allele to minor allele\n    flip_geno(total, Geno, snp_data, params);\n\n    // apply dominant/recessive encoding & recompute mean\n    // pgen does not contain genotype probs for dosages so convert to hardcalls\n    if(!params->build_mask && (params->test_type > 0)){\n      for( size_t i = 0; i < params->n_samples; ++i ) {\n        if( (Geno(i) == -3.0) || !filters->ind_in_analysis(i) ) continue;\n        hc = (int) (Geno(i) + 0.5);\n\n        if(params->test_type == 1){ //dominant\n          Geno(i) = (hc == 2 ? 1 : hc);\n        } else if(params->test_type == 2){ //recessive\n          Geno(i) = (hc >= 1 ? hc - 1 : hc);\n        }\n      }\n\n      sum_pos = ((Geno != -3) && filters->ind_in_analysis).select(Geno, 0).sum();\n      if((params->test_type == 2) && (sum_pos < params->minHOMs)) { // filter on homALT carriers\n        snp_data->ignored = true;\n        continue;\n      }\n\n      total = sum_pos / snp_data->ns1;\n      if( params->test_mode && (total < params->numtol) ) {\n        snp_data->ignored = true;\n        continue;\n      }\n    }\n\n    // impute missing\n    if(!params->build_mask)\n      mean_impute_g(total, Geno, filters->ind_in_analysis);\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  if(oob_err.any()) \n    throw \"there is a variant in the block that has a value not in [0,2] or missing\";\n  if(het_male_X.any()){\n    vector<string> msg;\n    for(int j = 0; j < bs; j++)\n      if(het_male_X(j))\n        msg.push_back( snpinfo[ indices[j] ].ID );\n    cerr << \"WARNING: genotype is 1 for a male on chrX (males should coded as diploid) at variants [\" << print_sv(msg, \";\") << \"].\";\n  }\n\n}\n\nbool in_chrList(const int& snp_chr, struct filter const* filters){\n  return in_map(snp_chr, filters->chrKeep_test);\n}\n\nstring bgi_chrList(struct filter* filters, const int& nChrom){// for --chr/--chrList\n\n  string fmt;\n  vector<string> clist;\n  map<int, bool >::iterator itr;\n\n  for (itr = filters->chrKeep_test.begin(); itr != filters->chrKeep_test.end(); ++itr) {\n    // add X and chrX format\n    fmt = to_string(itr->first);\n    clist.push_back( fmt );\n    fmt = \"'chr\" + to_string(itr->first) + \"'\";\n    clist.push_back( fmt );\n\n    if(itr->first < 10){ // add 0X and chr0x format\n      fmt = \"'0\" + to_string(itr->first) + \"'\";\n      clist.push_back( fmt );\n      fmt = \"'chr0\" + to_string(itr->first) + \"'\";\n      clist.push_back( fmt );\n    } else if(itr->first == nChrom){ // add XY, X, PARs\n      clist.push_back( \"'X'\" );\n      clist.push_back( \"'chrX'\" );\n      clist.push_back( \"'XY'\" );\n      clist.push_back( \"'chrXY'\" );\n      clist.push_back( \"'PAR1'\" );\n      clist.push_back( \"'chrPAR1'\" );\n      clist.push_back( \"'PAR2'\" );\n      clist.push_back( \"'chrPAR2'\" );\n    }\n  }\n\n  return print_csv(clist);\n}\n\nstring bgi_chrList(const int& range_chr, const int& nChrom){// for range\n\n  string fmt = to_string(range_chr);\n  vector<string> clist;\n\n  // add X and chrX format\n  clist.push_back( fmt );\n  fmt = \"'chr\" + to_string(range_chr) + \"'\";\n  clist.push_back( fmt );\n\n  if(range_chr < 10){ // add 0X and chr0X format\n    fmt = \"'0\" + to_string(range_chr) + \"'\";\n    clist.push_back( fmt );\n    fmt = \"'chr0\" + to_string(range_chr) + \"'\";\n    clist.push_back( fmt );\n  } else if(range_chr == nChrom){ // add XY, X, PARs\n    clist.push_back( \"'X'\" );\n    clist.push_back( \"'chrX'\" );\n    clist.push_back( \"'XY'\" );\n    clist.push_back( \"'chrXY'\" );\n    clist.push_back( \"'PAR1'\" );\n    clist.push_back( \"'chrPAR1'\" );\n    clist.push_back( \"'PAR2'\" );\n    clist.push_back( \"'chrPAR2'\" );\n  }\n\n  return print_csv(clist);\n}\n\nstring bgi_rsidList(std::map <std::string, uint64>& rsids){// list of snp names\n\n  std::map <std::string, uint64>::iterator itr;\n  vector<string> clist;\n\n  for (itr = rsids.begin(); itr != rsids.end(); ++itr)\n    clist.push_back( \"'\" + itr->first + \"'\" );\n\n  return print_csv(clist);\n}\n\nbool in_range(int const& snp_chr, uint32_t const& snp_pos, struct param const* params){\n\n  if( (snp_chr != params->range_chr) ||\n      (snp_pos < params->range_min) || \n      (snp_pos > params->range_max) )\n    return false;\n\n  return true; \n}\n\nbool in_non_par(int const& snp_chr, uint32_t const& snp_pos, struct param const* params){\n\n  // if not on chrX, return false\n  if(snp_chr != params->nChrom) return false;\n\n  // in par1 or par2\n  if( (snp_pos <= params->par1_max_bound) || \n      (snp_pos >= params->par2_min_bound) )\n    return false;\n\n  // in non-par chrX\n  return true; \n}\n\n\nvoid skip_snps(uint64 const& offset, struct param const* params, struct in_files* files, struct geno_block* gblock){\n\n  // set to new position based on offset\n  if(params->file_type == \"bed\") \n    jumpto_bed(offset, files->bed_block_size, files->geno_ifstream);\n  else if(params->file_type == \"bgen\") \n    gblock->bgen.jumpto(offset);\n\n}\n\n// jump to given snp index in bed file (+magic number)\nvoid jumpto_bed(uint64 const& offset, uint64 const& bed_block_size, std::ifstream& bed_ifstream){\n  bed_ifstream.seekg( 3 + offset * bed_block_size, ios_base::beg);\n}\n\n// create table for all possible values in 1 PLINK byte\nvoid buildLookupTable(vector<ArrayXd>& lookup_table){\n\n  uchar plink_byte;\n  int bit_start;\n  const int nvals = 256;\n  // using 'ref-last':\n  //  00 -> hom. alt\n  //  10 -> missing\n  //  01 -> het\n  //  11 -> hom. ref\n  const int maptogeno[4] = {2, -3, 1, 0};\n\n  lookup_table.assign(nvals, ArrayXd::Zero(4));\n\n  for(size_t i = 0; i < nvals; i++){\n    plink_byte = i;\n\n    for(int j=0; j<4; j++){\n      bit_start = j<<1; // 2 bits per sample\n      lookup_table[i](j) = maptogeno[ (plink_byte >> bit_start)&3 ];\n    }\n\n  }\n\n}\n\nvoid prep_snp_stats(variant_block* snp_data, struct param const* params){\n\n  // reset variant info\n  snp_data->af = ArrayXd::Zero(params->n_pheno);\n  snp_data->af_case = ArrayXd::Zero(params->n_pheno);\n  snp_data->af_control = ArrayXd::Zero(params->n_pheno);\n  snp_data->mac = ArrayXd::Zero(params->n_pheno);\n  snp_data->info = ArrayXd::Zero(params->n_pheno);\n  snp_data->cf_burden = ArrayXd::Constant(params->n_pheno, -1);\n  snp_data->nmales = ArrayXi::Zero(params->n_pheno);\n  snp_data->ns = ArrayXi::Zero(params->n_pheno);\n  snp_data->ns_case = ArrayXi::Zero(params->n_pheno);\n  snp_data->ns_control= ArrayXi::Zero(params->n_pheno);\n  snp_data->genocounts = MatrixXi::Zero(6, params->n_pheno);\n  snp_data->ignored = false;\n  snp_data->skip_int = false;\n  snp_data->fitHLM = false;\n  snp_data->flipped = false;\n  snp_data->ns1 = 0, snp_data->n_rr = 0, snp_data->n_aa = 0;\n  if (params->skip_dosage_comp) {\n    snp_data->ns1_adj = 0;\n    if(params->af_cc) snp_data->ns_case_adj = ArrayXi::Zero(params->n_pheno);\n  }\n  snp_data->ignored_trait = ArrayXb::Constant(params->n_pheno, false);\n\n}\n\nvoid initialize_thread_data(vector<data_thread>& all_snp_data, struct param const& params){\n\n  for(size_t i = 0; i < all_snp_data.size(); i++){\n    data_thread* snp_data = &(all_snp_data[i]);\n\n    snp_data->chisq_val = ArrayXd::Zero(params.n_pheno);\n    snp_data->pval_log = ArrayXd::Zero(params.n_pheno);\n    snp_data->bhat = ArrayXd::Zero(params.n_pheno);\n    snp_data->se_b = ArrayXd::Zero(params.n_pheno);\n    snp_data->scores = ArrayXd::Zero(params.n_pheno);\n    snp_data->cal_factor = ArrayXd::Zero(params.n_pheno);\n    if(params.trait_mode){\n      snp_data->stats = ArrayXd::Zero(params.n_pheno);\n      snp_data->denum = ArrayXd::Zero(params.n_pheno);\n    }\n    snp_data->skat_var = ArrayXd::Zero(params.n_pheno);\n  }\n}\n\nvoid reset_thread(data_thread* snp_data, struct param const& params){\n\n    snp_data->chisq_val = 0;\n    snp_data->pval_log = 0;\n    snp_data->bhat = 0;\n    snp_data->se_b = 0;\n    snp_data->scores = params.missing_value_double;\n    snp_data->cal_factor = -1;\n    if(params.trait_mode){\n      snp_data->stats = 0;\n      snp_data->denum = 0;\n    }\n    snp_data->skat_var = params.missing_value_double;\n    snp_data->is_sparse = false;\n    snp_data->fastSPA = params.use_SPA && (!params.build_mask || (params.mask_rule_max || params.mask_rule_comphet));\n}\n\nvoid reset_stats(variant_block* snp_data, struct param const& params){\n\n    snp_data->test_fail = ArrayXb::Constant(params.n_pheno, false);\n    snp_data->is_corrected = ArrayXb::Constant(params.n_pheno, params.firth || params.use_SPA);\n    if(params.w_interaction && params.firth) {\n      snp_data->is_corrected_inter = ArrayXb::Constant(params.n_pheno, false);\n      snp_data->test_fail_inter = ArrayXb::Constant(params.n_pheno, true);\n    }\n    if( params.joint_test ) snp_data->pval_log = ArrayXd::Zero(params.n_pheno);\n\n    snp_data->sum_stats.resize( params.n_pheno );\n    std::fill(snp_data->sum_stats.begin(), snp_data->sum_stats.end(), \"\");\n\n    // multi-trait test results\n    if(params.trait_set) {\n      snp_data->sum_stats_mt.resize(1); // current only 1 trait set\n      std::fill(snp_data->sum_stats_mt.begin(), snp_data->sum_stats_mt.end(), \"\");\n    }\n    // MultiPhen test results\n    if(params.multiphen) {\n      snp_data->sum_stats_multiphen.resize(1); // current only 1 trait set\n      std::fill(snp_data->sum_stats_multiphen.begin(), snp_data->sum_stats_multiphen.end(), \"\");\n    }\n\n}\n\nvoid update_trait_counts(int const& index, double const& genoValue, double const& macValue, int const& sexValue, double const& infoValue, variant_block* snp_data, const Ref<const MatrixXb>& mask){\n\n  ArrayXi imask = 1 - mask.row(index).cast<int>().array(); // get masked samples\n\n  // will subtract from total computed on all analyzed samples (masked & unmasked)\n  snp_data->af -= genoValue * imask.cast<double>();\n  snp_data->mac -= macValue * imask.cast<double>();\n  snp_data->info -= infoValue * imask.cast<double>();\n  snp_data->nmales -= imask * sexValue;\n  snp_data->ns -= imask;\n\n}\n\nvoid update_genocounts(bool const& binary_mode, int const& ind, int const& hc, MatrixXi& genocounts, const Ref<const MatrixXb>& mask, const Ref<const MatrixXd>& ymat){\n\n  if( !binary_mode ) {\n    genocounts.row(hc) += mask.row(ind).cast<int>();\n  } else {\n    genocounts.row(hc).array() += mask.row(ind).array().cast<int>() * ymat.row(ind).array().cast<int>();\n    genocounts.row(3 + hc).array() += mask.row(ind).array().cast<int>() * (1 - ymat.row(ind).array()).cast<int>();\n  }\n\n}\n\nvoid compute_genocounts(bool const& binary_mode, bool const& non_par, double const& mac, const Ref<const ArrayXd>& Geno, Ref<MatrixXi> genocounts, const Ref<const ArrayXi>& sex, std::vector<std::vector<Eigen::ArrayXi>> const& cc_indices_phenos){\n\n  int mac_thr = 100, Nmin = 1e3;\n  bool snp_ur = (Geno.size() > Nmin) && (mac <= mac_thr); // for sparse G with QTs\n  for(size_t ph = 0; ph < cc_indices_phenos.size(); ph++) {\n    if(!binary_mode & snp_ur) {// ultra-rare variants\n      update_genocounts_sp(binary_mode, non_par, genocounts.col(ph), sex, Geno, cc_indices_phenos[ph]);\n    } else {\n      update_genocounts(binary_mode, non_par, genocounts.col(ph), sex, Geno, cc_indices_phenos[ph]);\n    }\n  }\n}\n\n// for each trait\nvoid update_genocounts(bool const& bt_mode, bool const& non_par, Ref<VectorXi> genocounts, const Eigen::Ref<const Eigen::ArrayXi>& sex, const Eigen::Ref<const Eigen::ArrayXd>& Geno, std::vector<Eigen::ArrayXi> const& cc_indices){\n  int miss_cases = 0, miss_controls = 0;\n  bool is_maleC;\n  double val;\n  // get counts in non-missing cases (or samples for QTs)\n  if (cc_indices.size() == 0) return;\n  for(int k = 0; k < cc_indices[0].size(); k++){\n    val = Geno( cc_indices[0](k) ); is_maleC = sex( cc_indices[0](k) ) != 1;\n    if(val < 0) miss_cases++;\n    else if (non_par && val >= 1.5 && is_maleC) genocounts(2)++;\n    else if (non_par && val >= 0.5 && is_maleC) genocounts(1)++;\n    else if (non_par && val >= 1 && !is_maleC) genocounts(2)++;\n    else if (non_par && !is_maleC) continue;\n    else if (val >= 1.5) genocounts(2)++;\n    else if (val >= 0.5) genocounts(1)++;\n  }\n  genocounts(0) = cc_indices[0].size() - genocounts(1) - genocounts(2) - miss_cases;\n\n  if(bt_mode) {\n    for(int k = 0; k < cc_indices[1].size(); k++){\n      val = Geno( cc_indices[1](k) ); is_maleC = sex( cc_indices[1](k) ) != 1;\n      if(val < 0) miss_controls++;\n      else if (non_par && val >= 1.5 && is_maleC) genocounts(5)++;\n      else if (non_par && val >= 0.5 && is_maleC) genocounts(4)++;\n      else if (non_par && val >= 1 && !is_maleC) genocounts(5)++;\n      else if (non_par && !is_maleC) continue;\n      else if (val >= 1.5) genocounts(5)++;\n      else if (val >= 0.5) genocounts(4)++;\n    }\n    genocounts(3) = cc_indices[1].size() - genocounts(4) - genocounts(5) - miss_controls;\n\n  }\n}\n\n// with sparse Geno (per-trait)\nvoid update_genocounts_sp(bool const& bt_mode, bool const& non_par, Ref<VectorXi> genocounts, const Eigen::Ref<const Eigen::ArrayXi>& sex, const Eigen::Ref<const Eigen::ArrayXd>& Geno, std::vector<Eigen::ArrayXi> const& cc_indices){\n  bool not_male = true;\n  int miss_cases = 0, miss_controls = 0, index;\n  double val;\n  \n  if (cc_indices.size() == 0) return;\n  SpVec Gsp;\n  if(!bt_mode && (cc_indices[0].size() == Geno.size())) // no need to subset \n    Gsp = Geno.matrix().sparseView();\n  else Gsp = Geno(cc_indices[0]).matrix().sparseView();\n  for (SpVec::InnerIterator it(Gsp); it; ++it) {\n    val = it.value();\n    index = it.index();\n    if(non_par) not_male = (sex(index) != 1);\n\n    if(val < 0) miss_cases++;\n    else if (non_par && val >= 1.5 && not_male) genocounts(2)++;\n    else if (non_par && val >= 0.5 && not_male) genocounts(1)++;\n    else if (non_par && val >= 1 && !not_male) genocounts(2)++;\n    else if (non_par && !not_male) continue;\n    else if (val >= 1.5) genocounts(2)++;\n    else if (val >= 0.5) genocounts(1)++;\n\n  }\n  genocounts(0) = cc_indices[0].size() - genocounts(1) - genocounts(2) - miss_cases;\n\n  if(bt_mode) { \n    SpVec Gspc = Geno(cc_indices[1]).matrix().sparseView(); \n    for (SpVec::InnerIterator it(Gspc); it; ++it) {\n      val = it.value();\n      index = it.index();\n      if(non_par) not_male = (sex(index) == 1);\n\n      if(val < 0) miss_controls++;\n      else if (non_par && val >= 1.5 && not_male) genocounts(2)++;\n      else if (non_par && val >= 0.5 && not_male) genocounts(1)++;\n      else if (non_par && val >= 1 && !not_male) genocounts(2)++;\n\t\t\telse if (non_par && !not_male) continue;\n      else if (val >= 1.5) genocounts(2)++;\n      else if (val >= 0.5) genocounts(1)++;\n    }\n    genocounts(3) = cc_indices[1].size() - genocounts(4) - genocounts(5) - miss_controls;\n\n  }\n\n}\n\n\nvoid update_af_cc(int const& ind, double const& genoValue, variant_block* snp_data, const Ref<const MatrixXb>& mask, const Ref<const MatrixXd>& ymat){\n\n  // only compute in cases as N-case=control\n  snp_data->af_case += genoValue * mask.row(ind).array().cast<double>() * ymat.row(ind).array();\n  snp_data->ns_case += mask.row(ind).array().cast<int>() * ymat.row(ind).array().cast<int>();\n\n}\n\nvoid compute_mac(bool const& auto_chrom, double& mac, double const& total, int const& nmales, int const& ncarriers, bool const& MAC_fail_if_checked, bool const& sep_MAC_filter, variant_block* snp_data, struct param const* params){\n\n  if(auto_chrom) mac = total; // use MAC assuming diploid coding\n  //cerr << snp_data->mac << endl << endl; \n  snp_data->mac1 = mac; // across all traits\n\n  // for masks, identify singletons\n  if(params->build_mask && !params->singleton_carriers) snp_data->singleton = ( ((int)(mac+0.5)) == 1 ); // use AAC (round for dosages)\n  else if(params->build_mask && params->singleton_carriers) snp_data->singleton = (ncarriers == 1);\n\n  // get counts by trait \n  snp_data->mac += mac; // aac\n  snp_data->ns += snp_data->ns1; // ns\n  snp_data->nmales += nmales; // nmales\n\n  if(auto_chrom) {\n    mac = min( mac, 2 * snp_data->ns1 - mac );\n    snp_data->mac = snp_data->mac.min( 2 * snp_data->ns.cast<double>() - snp_data->mac ); // mac for each trait\n  } else {\n    mac = min(mac, 2 * snp_data->ns1 - nmales - mac); // males are 0/1\n    snp_data->mac = snp_data->mac.min( 2 * snp_data->ns.cast<double>() - snp_data->nmales.cast<double>() - snp_data->mac );\n  }\n\n  double MAC_thr = sep_MAC_filter ? params->forced_MAC : params->min_MAC;\n\n  snp_data->ignored_trait = MAC_fail_if_checked;\n  snp_data->ignored_trait = snp_data->ignored_trait && (snp_data->mac < MAC_thr);\n  //cerr << snp_data->ignored_trait.cast<double>() << endl << endl; exit(EXIT_FAILURE);\n  if((mac < MAC_thr) && MAC_fail_if_checked) \n    snp_data->ignored = true;\n\n}\n\nvoid compute_aaf_info(double& total, double const& info_num, bool const& non_par, variant_block* snp_data, struct param const* params){\n\n  // get counts by trait \n  snp_data->af += total;\n  snp_data->info += info_num;\n  double n_alleles_all = 2 * snp_data->ns1; // all traits\n  ArrayXd n_alleles = 2 * snp_data->ns.cast<double>(); // per trait\n  if(non_par && params->skip_dosage_comp){ n_alleles_all -= snp_data->ns1_adj; n_alleles -= snp_data->nmales.cast<double>(); }\n\n  if(params->af_cc){\n    ArrayXd n_case_alleles;\n    n_case_alleles = 2 * snp_data->ns_case.cast<double>();\n    if(non_par && params->skip_dosage_comp){ n_case_alleles -= snp_data->ns_case_adj.cast<double>();}\n    snp_data->ns_control = snp_data->ns - snp_data->ns_case;\n    snp_data->af_control = (snp_data->af - snp_data->af_case) / (n_alleles - n_case_alleles);\n    snp_data->af_case /= n_case_alleles;\n  }\n\n  snp_data->af1 = total / n_alleles_all; // all traits\n  snp_data->af /= n_alleles; // single trait\n  total /= snp_data->ns1;\n\n  if(params->test_mode && params->dosage_mode){\n\n    // all traits\n    if( (snp_data->af1 == 0) || (snp_data->af1 == 1) ) snp_data->info1 = 1;\n    else if(params->file_type == \"bgen\") snp_data->info1 = 1 - info_num / (2 * snp_data->ns1 * snp_data->af1 * (1 - snp_data->af1)); // impute\n    else snp_data->info1 = (info_num / snp_data->ns1 - total * total) / (2 * snp_data->af1 * (1 - snp_data->af1)); // mach r2 info score\n\n    // single trait\n    if(params->file_type == \"bgen\") snp_data->info = ((snp_data->af == 0) || (snp_data->af == 1)).select(1, 1 - snp_data->info / (2 * snp_data->ns.cast<double>() * snp_data->af * (1 - snp_data->af)) );\n    else snp_data->info = ((snp_data->af == 0) || (snp_data->af == 1)).select(1, (snp_data->info / snp_data->ns.cast<double>() - 4 * snp_data->af.square()) / (2 * snp_data->af * (1 - snp_data->af)) );\n\n    if(params->setMinINFO) \n      snp_data->ignored_trait = snp_data->ignored_trait || (snp_data->info < params->min_INFO);\n\n  }\n\n}\n\nvoid flip_geno(double& total, Ref<ArrayXd> Geno, variant_block* snp_data, struct param const* params){\n\n  if(!params->with_flip) return;\n\n  // switch to minor allele\n  snp_data->flipped = (total > 1);\n\n  if(snp_data->flipped){\n    Geno = ( Geno != -3.0 ).select( 2 - Geno, Geno);\n    total = 2 - total;\n  }\n\n}\n\n// for rarer variants, use sparse format\nvoid check_sparse_G(int const& isnp, int const& thread_num, struct geno_block* gblock, uint32_t const& nsamples, const Ref<const ArrayXb>& mask, int const& n_zero, const double& prop_zero_thr){\n\n  data_thread* snp_data = &(gblock->thread_data[thread_num]);\n  MapArXd Geno ( gblock->Gmat.col(isnp).data(), nsamples, 1);\n\n  if (n_zero != -1)\n    snp_data->is_sparse = (n_zero >= (nsamples * prop_zero_thr));\n  else\n    snp_data->is_sparse = (mask && (Geno != 0)).count() <= (nsamples * (1 - prop_zero_thr));\n\n  if(snp_data->is_sparse) // get nonzero entries\n    snp_data->Gsparse = mask.select(Geno,0).matrix().sparseView();\n\n  // for SPA\n  if( snp_data->fastSPA ) snp_data->fastSPA = snp_data->is_sparse;\n}\n\n// mean impute (only individuals who are not masked)\nvoid mean_impute_g(double &geno, const double& mu, const bool& in_analysis){\n  if (!in_analysis) // zero individuals masked\n    geno = 0;\n  else if(geno == -3) \n    geno = mu;\n}\n// impute all at once\nvoid mean_impute_g(const double& mu, Ref<ArrayXd> Geno, const Ref<const ArrayXb>& in_analysis){\n  Geno = (!in_analysis).select(0, Geno);\n  Geno = (in_analysis && (Geno == -3)).select(mu, Geno);\n}\n\nfindID getIndivIndex(const string &FID, const string &IID, struct param* params, mstream& sout){\n\n  string tmp_str;\n  findID indiv;\n\n  // get ID of individual\n  tmp_str = FID + \"_\" + IID;\n\n  // check individual is in genotype data\n  indiv.is_found = in_map(tmp_str, params->FID_IID_to_ind);\n\n  if(indiv.is_found)\n    indiv.index = params->FID_IID_to_ind[tmp_str];\n\n  return indiv;\n}\n\nvoid residualize_geno(int const& isnp, int const& thread_num, variant_block* snp_data, bool const& force, const Ref<const MatrixXd>& X, struct geno_block* gblock, struct param const* params){\n\n  if(snp_data->ignored) return;\n\n  if((params->trait_mode==0) || force){\n    MatrixXd beta;\n    data_thread* dt_thr = &(gblock->thread_data[thread_num]);\n\n    // project out covariates\n    if(dt_thr->is_sparse) \n      beta = X.transpose() * dt_thr->Gsparse;\n    else\n      beta = X.transpose() * gblock->Gmat.col(isnp);\n\n    gblock->Gmat.col(isnp) -= X * beta;\n\n    // scale\n    snp_data->scale_fac = gblock->Gmat.col(isnp).norm();\n    snp_data->scale_fac /= sqrt( params->n_analyzed - X.cols() );\n\n    if( snp_data->scale_fac < params->numtol ) {\n      snp_data->ignored = true;\n      return;\n    }\n    gblock->Gmat.col(isnp).array() /= snp_data->scale_fac;\n\n  } else snp_data->scale_fac = 1;\n\n}\n\nvoid residualize_geno(const Ref<const MatrixXd>& X, Ref<VectorXd> Graw, variant_block* snp_data, struct param const& params){\n\n  if(snp_data->ignored) return;\n\n  VectorXd beta = X.transpose() * Graw;\n  Graw -= X * beta;\n\n  // already computed\n  if(params.skip_scaleG) return; // don't scale\n\n  // scale\n  snp_data->scale_fac = Graw.norm();\n  snp_data->scale_fac /= sqrt( params.n_analyzed - X.cols() );\n\n  if( snp_data->scale_fac < params.numtol ) {\n    snp_data->ignored = true;\n    return;\n  }\n  Graw /= snp_data->scale_fac;\n\n}\n\nvoid writeSnplist(string const& fname, int const& start, int const& ns, vector<snp> const& snpinfo, mstream& sout){\n\n  ofstream ofile;\n  openStream(&ofile, fname + \".snplist\", ios::out, sout);\n\n  for(int i = 0; i < ns; i++) \n    ofile << snpinfo[start+i].ID << endl;\n\n  ofile.close();\n}\n\n\n\n// joint testing\nvoid read_setlist(const struct in_files* files, struct param* params, struct filter* filters, vector< vector<vset> >& setinfo, vector<snp>& snpinfo, const uint64 all_masks, const double mask_max_aaf, mstream& sout) {\n\n  bool bsize_set, all_in_geno, loo_found = false, all_w_anno, same_chr = true, no_AAF = false;\n  int n_sets_incomplete = 0, n_sets_ignored = 0, n_sets_analyzed = 0;\n  uint32_t lineread = 0, snp_index;\n  std::vector< string > tmp_str_vec, tmp_snp_id, set_problem ;\n  std::vector<int> tmpvec(3);\n  string line, fname;\n  Files myfile;\n  ofstream report_file;\n\n  // for snps with no anno for the set\n  annoinfo ainfo_null;\n  ainfo_null.regionid = get_max(ainfo_null.regionid); // any region (set all bits to 1)\n  BIT_SET(ainfo_null.id, 0);\n\n  sout << left << std::setw(20) << \" * set file\" << \": [\" << files->set_file << \"] \" << flush;\n  myfile.openForRead (files->set_file, sout);\n  if(params->check_mask_files) {\n    line = files->out_file + \"_masks_report.txt\";\n    openStream(&report_file, line, ios::out | ios::app, sout);\n    report_file << \"\\n## set file: [\" << files->set_file << \"]\\n## list of variants not in annotation or genetic data input files\\n\";\n  }\n\n  setinfo.resize( params->nChrom );\n\n  // check block size\n  if(params->use_max_bsize) bsize_set = false;\n  else bsize_set = params->block_size >= 2;\n\n  if(!bsize_set) params->block_size = 0;\n\n  // if extract/exclude for sets\n  if(params->keep_sets) tmpvec[2]=0;\n  else if(params->rm_sets) tmpvec[2]=1;\n\n  while (myfile.readLine(line)) {\n\n    all_in_geno = all_w_anno = true;\n    vset tmp_set;\n    if(params->check_mask_files) set_problem.resize(0);\n\n    tmp_str_vec = string_split(line,\"\\t ,\");\n\n    // at least 4 columns: set name | set chr | set position | variant list \n    if( tmp_str_vec.size() < 4 )\n      throw \"incorrectly formatted file at line \" + to_string( lineread+1 ) + \" (has \" + to_string(tmp_str_vec.size()) + \" columns)\";\n\n    // name of set\n    tmp_set.ID = tmp_str_vec[0];\n\n    // check set if using LOO \n    if(params->mask_loo || params->mask_lodo) {\n      if (params->mask_loo_set != tmp_set.ID) {\n        lineread++;\n        continue;\n      } else loo_found = true;\n    }\n\n    // chr of set\n    tmp_set.chrom = chrStrToInt(tmp_str_vec[1], params->nChrom);\n    if (tmp_set.chrom == -1) \n      throw \"unknown chromosome code in set list file.\";\n    //// check if it is in chrlist\n    if(params->select_chrs && !in_chrList(tmp_set.chrom, filters)) {\n      lineread++;\n      continue;\n    }\n\n    // position of set\n    tmp_set.physpos = std::stoul( tmp_str_vec[2],nullptr,0);\n\n    // for each variant in set, get index in genotype file\n    for (size_t i = 3; i < tmp_str_vec.size(); i++){\n\n      // check variant is in genotype file\n      if (!in_map(tmp_str_vec[i], filters->snpID_to_ind)) {\n        if(params->check_mask_files) set_problem.push_back(tmp_str_vec[i]);\n        all_in_geno = false; continue;// mark as incomplete\n      }\n\n      // get index in geno file\n      snp_index = filters->snpID_to_ind[ tmp_str_vec[i] ];\n      struct snp* snp_info = &(snpinfo[ snp_index ]);\n\n      // check chromosome\n      if( tmp_set.chrom != snp_info->chrom )\n        same_chr = false;\n\n      if( params->build_mask ){\n        // check annotation for set has been given for variant\n        // else, assign to default annotation category 0\n        if (!in_map(tmp_set.ID, snp_info->anno)) {\n          all_w_anno = false;\n          if(params->check_mask_files) set_problem.push_back(tmp_str_vec[i]);\n          snp_info->anno[ tmp_set.ID ] = ainfo_null;\n        }\n\n        // check that variant has category in at least one of the masks\n        if( (snp_info->anno[tmp_set.ID].id & all_masks) == 0 )  \n          continue;\n      }\n\n      // if AAF is user defined, check it has been given for the variants\n      if(params->set_aaf) {\n        if(snp_info->aaf < 0) // don't add variant to set\n        { no_AAF=true; continue;}\n        // check that variant has AAF < max mask AAF (unless singleton)\n        else if( (mask_max_aaf > 0) && (snp_info->aaf > mask_max_aaf) ) \n          continue;\n      }\n\n      // add index\n      tmp_set.snp_indices.push_back(snp_index);\n    }\n\n    if(!all_in_geno || !all_w_anno ) {\n      if(!all_w_anno && params->strict_check_burden) params->fail_check = true;\n      if(params->check_mask_files)\n        report_file << tmp_set.ID << \" \" << print_csv(set_problem) << endl; \n      if( tmp_set.snp_indices.size() > 0 ) n_sets_incomplete++;\n      else { n_sets_ignored++; continue; } //ignore set\n    }\n\n    // sort and retain unique values\n    std::sort(tmp_set.snp_indices.begin(), tmp_set.snp_indices.end());\n    tmp_set.snp_indices.erase( unique( tmp_set.snp_indices.begin(), tmp_set.snp_indices.end() ), tmp_set.snp_indices.end() );\n\n    // check how many variants are present\n    if( !params->build_mask && (tmp_set.snp_indices.size() > params->max_set_size) ) \n      throw \"set '\" + tmp_set.ID + \"' is larger than maximum allowed (=\" + to_string( params->max_set_size ) + \").\";\n\n    // if not set, fix block size to maximum number of variants in set\n    if(tmp_set.snp_indices.size() > params->max_bsize) params->max_bsize = tmp_set.snp_indices.size();\n\n    // add to map if needed\n    if( !(params->mask_loo || params->mask_lodo) && (params->keep_sets || params->rm_sets) ){\n      tmpvec[0] = tmp_set.chrom;\n      tmpvec[1] = setinfo[tmp_set.chrom - 1].size();\n      filters->setID_to_ind[ tmp_set.ID ] = tmpvec;\n    }\n\n    // add to list of sets (check unique set names?)\n    setinfo[tmp_set.chrom - 1].push_back(tmp_set);\n    n_sets_analyzed++; lineread++;\n\n    if(loo_found) break; // stop reading after LOO set \n  }\n\n  myfile.closeFile();\n\n  if(n_sets_analyzed == 0)\n    throw \"no sets are left to be analyzed.\";\n\n  sout << \"n_sets = \" << n_sets_analyzed << endl;\n\n  if(!same_chr) sout << \"WARNING: Detected at least one set where variants are not all in the same chromosome.\\n\";\n  // report\n  if(n_sets_incomplete > 0) sout << \"WARNING: Detected \" << n_sets_incomplete << \" sets with variants not in genetic data or annotation files.\\n\";\n  if(n_sets_ignored > 0) sout << \"WARNING: Detected \" << n_sets_ignored << \" sets with only unknown variants (these are ignored).\\n\";\n\n  if(params->check_mask_files) {\n    report_file << \"->Detected \" << n_sets_incomplete << \" sets with variants not in genetic data or annotation files.\\n\";\n    report_file << \"->Detected \" << n_sets_ignored << \" sets with only unknown variants.\\n\";\n    report_file.close();\n    sout << \"     +report on burden input files written to [\" << files->out_file + \"_masks_report.txt]\\n\";\n  }\n  if(params->strict_check_burden && params->fail_check){\n    string msg;\n    if(params->check_mask_files) msg = \" Check report for details.\";\n    else msg = \" For more details, re-run with '--check-burden-files'.\";\n    throw \"Annotation/Set list/Mask definition files don't agree.\" + msg;\n  }\n\n  if(no_AAF) sout << \"WARNING: Variants in the set list file not in the AAF file will be ignored.\\n\";\n  if( !bsize_set ) params->block_size = params->max_bsize;\n\n  if( !(params->mask_loo || params->mask_lodo) && (params->keep_sets || params->rm_sets) ) \n    check_sets_include_exclude(bsize_set, files, params, filters, setinfo, sout);\n\n}\n\n\n// determine if sets should be included/excluded\nvoid check_sets_include_exclude(bool const& bsize_set, const struct in_files* files, struct param* params, struct filter* filters, vector< vector<vset> >& setinfo, mstream& sout){\n\n  uint32_t nsets = 0;\n  unsigned long bsize = 0;\n  vector< vector<vset> > tmp_setinfo;\n  map<string, vector<int> >::iterator itr;\n\n  //cerr << nsets << endl;\n\n  // apply masking to sets\n  if( params->rm_sets ) {\n    sout << \"   -removing specified sets\\n\";\n    check_in_map_from_files_sets(false, filters->setID_to_ind, files->file_sets_exclude, params->set_select_list, sout);\n  } else if( params->keep_sets ) {\n    sout << \"   -keeping only specified sets\\n\";\n    check_in_map_from_files_sets(true, filters->setID_to_ind, files->file_sets_include, params->set_select_list, sout);\n  }\n\n  // re-make setinfo only with kept elements\n  tmp_setinfo.resize( setinfo.size() );\n  for (itr = filters->setID_to_ind.begin(); itr != filters->setID_to_ind.end(); ++itr) {\n\n    if(itr->second[2] == 0) continue;\n\n    tmp_setinfo[itr->second[0] - 1].push_back( setinfo[itr->second[0] - 1][itr->second[1]] );\n\n    // track max set size\n    if(!bsize_set) \n      bsize = max(bsize, setinfo[itr->second[0] - 1][itr->second[1]].snp_indices.size());\n\n    nsets++;\n  }\n\n  // check nonzero\n  if(nsets == 0)\n    throw \"no set left to include in analysis.\";\n\n  // free memory\n  for(size_t i = 0; i < setinfo.size(); i++){\n    setinfo[i].clear();\n    std::vector<vset>().swap(setinfo[i]);\n  }\n  setinfo = tmp_setinfo;\n\n  if(!bsize_set) params->block_size = bsize;\n\n  // delete setID map\n  filters->setID_to_ind.clear();\n\n  sout << \"     +number of sets remaining in the analysis = \" << nsets << endl;\n\n}\n\nvoid check_in_map_from_files_sets(bool const& keep, map <string, vector<int>>& map_ID, vector<string> const& file_list, bool const& csv_list, mstream& sout) {\n\n  int keep_int = (int) keep; // 0 for rm and 1 for keep\n  string name;\n  Files myfile;\n\n  // user gave a comma-seperated list\n  if(csv_list){\n    for(auto const& setname : string_split(file_list[0],\",\"))\n      if (in_map(setname, map_ID)) \n        map_ID[ setname ][2] = keep_int;\n    return;\n  }\n\n  // user gave a list of files\n  for(auto fin : file_list) {\n\n    myfile.openForRead (fin, sout);\n\n    while( myfile.readLine(name) ){// assume single column with setname\n      if (in_map(name, map_ID)) \n        map_ID[ name ][2] = keep_int;\n    }\n\n    myfile.closeFile();\n  }\n\n}\n\nvoid get_masks_info(const struct in_files* files, struct param* params, struct filter* filters, map<std::string, anno_name>& anno_map, std::map <std::string, std::map <std::string, uint64>>& regions, vector<maskinfo>& mask_map, std::vector <std::vector<string>>& mask_out, uint64& all_masks, vector<snp>& snpinfo, mstream& sout) {\n\n  // read annotation categories if specified\n  if(params->w_anno_lab) read_anno_cat(files, params, anno_map, sout);\n\n  // read annotations\n  read_anno(params, files, filters, anno_map, regions, snpinfo, sout);\n\n  if(params->set_aaf) read_aafs(params->tol, files, filters, snpinfo, params->aaf_file_wSingletons, sout);\n\n  // read masks\n  read_masks(files, params, anno_map, mask_map, mask_out, all_masks, sout);\n\n}\n\nvoid read_anno_cat(const struct in_files* files, struct param* params, map<string, anno_name>& anno_map, mstream& sout) {\n\n  int lineread = 0, cval;\n  uint64 null_cat = 0ULL;\n  std::vector< string > tmp_str_vec ;\n  string line;\n  anno_name new_anno;\n  Files myfile;\n\n  sout << left << std::setw(20) << \" * annotation labels\" << \": [\" << files->anno_labs_file << \"] \" << flush;\n  myfile.openForRead (files->anno_labs_file, sout);\n\n\n  while (myfile.readLine(line)) {\n\n    new_anno.id = null_cat;\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t ,\");\n\n    if( tmp_str_vec.size() != 2 )\n      throw \"incorrectly formatted file at line \" + to_string( lineread+1 );\n\n    // name of category\n    new_anno.name = tmp_str_vec[1];\n    cval = atoi( tmp_str_vec[0].c_str() );\n\n    // check value is in 0-max \n    if( (cval < 0) || (cval >= (int)params->max_cat) )\n      throw \"category must be <= \" + to_string( params->max_cat - 1 ) + \n        \" on line \" + to_string( lineread+1 ) + \" (=\" + tmp_str_vec[0] +  \").\";\n\n    // check category has not been specified\n    if (in_map(tmp_str_vec[0], anno_map)) \n      throw \"duplicate category on line \" + to_string(lineread+1) + \" (=\" + tmp_str_vec[0] + \").\";\n\n    // set bit for category\n    BIT_SET(new_anno.id, cval);\n\n    // insert in map\n    anno_map[ tmp_str_vec[0] ] = new_anno;\n\n    lineread++;\n  }\n\n  // insert category 0 if not already given\n  line = \"0\";\n  if (!in_map(line, anno_map)) {\n    new_anno.name = \"NULL\";\n    new_anno.id = null_cat;\n    BIT_SET(new_anno.id, 0);\n    anno_map[ \"0\" ] = new_anno;\n    lineread++; // count in the category\n  }\n  myfile.closeFile();\n\n  sout << \"n_categories = \" << lineread << endl;\n}\n\nvoid read_anno(struct param* params, const struct in_files* files, struct filter* filters, map<string, anno_name>& anno_map, std::map <std::string, std::map <std::string, uint64>>& regions, vector<snp>& snpinfo, mstream& sout) {\n\n  int lineread = 0, col_cat = 2, nregions = 0;\n  uint32_t snp_pos, ncat = 0, n_anno_read = 0;\n  uint64 null_id = 0ULL;\n  uint64 null_region = 0ULL;\n  double set_weight = 0;\n  anno_name new_anno;\n  annoinfo ainfo;\n  std::vector< string > tmp_str_vec ;\n  string line, sname, gname;\n  Files myfile;\n\n  if(!params->w_anno_lab) { // add NULL category\n    new_anno.name = \"NULL\";\n    new_anno.id = null_id;\n    BIT_SET(new_anno.id, ncat++);\n    anno_map[ new_anno.name ] = new_anno;\n  }\n\n  sout << left << std::setw(20) << \" * annotations \" << \": [\" << files->anno_file << \"] \" << endl;\n  myfile.openForRead (files->anno_file, sout);\n  if(params->vc_with_weights && (params->vc_weight_col < 4))\n   throw \"invalid column index specified for user-defined weights (=\" + to_string( params->vc_weight_col ) + \")\";\n\n  while (myfile.readLine(line)) {\n\n    ainfo.id = null_id;\n    ainfo.regionid = null_region;\n\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t ,\");\n    if(lineread == 0) {\n      // for LOVO with region\n      if((params->mask_loo || params->mask_lodo) && params->w_regions && (tmp_str_vec.size() != 4))\n        throw \"annotation file has fewer than 4 columns for LOVO.\";\n      params->w_regions = !params->vc_with_weights && (tmp_str_vec.size() == 4);\n      //cerr << std::boolalpha << params->w_regions << endl;\n      if(params->w_regions)  col_cat = 3; // set label column\n    }\n\n    // variants | set_name | region (optional) | annotation (unique)\n    if( (!params->w_regions && !params->vc_with_weights && (tmp_str_vec.size() < 3)) || \n        (params->w_regions && (tmp_str_vec.size() != 4)) || \n        (params->vc_with_weights && ((int)tmp_str_vec.size() < params->vc_weight_col)) \n        ) \n      throw \"incorrectly formatted file at line \" + to_string(lineread+1);\n\n    // name of variant\n    sname = tmp_str_vec[0];\n    // check it is in genotype file\n    if (!in_map(sname, filters->snpID_to_ind)) {\n      lineread++; continue;\n    }\n    snp_pos = filters->snpID_to_ind[ sname ];\n    struct snp* snp_info = &(snpinfo[ snp_pos ]);\n\n    // set name\n    gname = tmp_str_vec[1];\n    if (!params->w_regions && in_map(gname, snp_info->anno)) \n      throw \"duplicate variant annotations at line \" + to_string( lineread+1 ) + \".\";\n\n    // check if matches with LOVO gene\n    if((params->mask_loo || params->mask_lodo) && (gname != params->mask_loo_set)){\n      lineread++; continue;\n    }\n\n    // get regions\n    if(params->w_regions){\n\n      // check if matches with LOVO region\n      if(params->mask_loo && (tmp_str_vec[col_cat-1] != params->mask_loo_region)){\n        lineread++; continue;\n      }\n\n      // check if new set\n      if (!in_map(gname, regions)){ // create new map with region for set\n        BIT_SET(ainfo.regionid, 0); // set first bit\n        std::map <std::string, decltype(null_region)> gene_region_map;\n        gene_region_map[tmp_str_vec[col_cat-1]] = ainfo.regionid;\n        regions[gname] = gene_region_map;\n        nregions++;\n      } else if (!in_map(tmp_str_vec[col_cat-1], regions[gname])) { // add region for set\n\n        if(regions[gname].size() >= params->nmax_regions) \n          throw \"cannot have more than \" + to_string(params->nmax_regions) + \" domains per set.\";\n\n        BIT_SET(ainfo.regionid, regions[gname].size()); // set bit for new region\n        regions[gname][tmp_str_vec[col_cat-1]] = ainfo.regionid;\n        nregions++;\n\n      } else ainfo.regionid = regions[gname][tmp_str_vec[col_cat-1]]; \n\n    }\n\n    // check category is in map\n    if (!in_map(tmp_str_vec[col_cat], anno_map)) {\n\n      if(params->w_anno_lab) \n        throw \"unknown category at line \" + to_string( lineread+1 ) +  \" (=\" + tmp_str_vec[col_cat] + \".\";\n      else { \n        // check # categories \n        if( ncat >= params->max_cat) \n          throw \"cannot have more than \" + to_string( params->max_cat ) + \" categories (including NULL category).\";\n\n        // add to map\n        new_anno.name = tmp_str_vec[col_cat];\n        new_anno.id = null_id;\n        BIT_SET(new_anno.id, ncat++);\n        anno_map[ new_anno.name ] = new_anno;\n\n      }\n    }\n\n    // with multiple regions for same variant & gene\n    // annotation must be the same\n    if (params->w_regions && in_map(gname, snp_info->anno) && \n        (snp_info->anno[gname].id != anno_map[ tmp_str_vec[col_cat] ].id) ) \n      throw \"inconsistent variant annotation at line \" + to_string( lineread+1 ) +  \".\";\n\n    // set bit for category\n    ainfo.id |= anno_map[ tmp_str_vec[col_cat] ].id;\n\n    //insert in snpinfo\n    if (in_map(gname, snp_info->anno)) \n      snp_info->anno[gname].regionid |= ainfo.regionid;\n    else\n      snp_info->anno[ gname ] = ainfo;\n    //if(lineread <5) cerr << snp_info->ID << \"--\" << ainfo.id << \" \" << (int) ainfo.regionid <<  endl; \n\n    // if using custom weights in VC tests\n    if( params->vc_with_weights ){\n        set_weight = convertDouble(tmp_str_vec[params->vc_weight_col - 1], params, sout);\n        if( set_weight < 0 ) throw \"weight = \" + tmp_str_vec[params->vc_weight_col - 1] + \" for variant \" + sname  + \" in set \" + gname;\n        snp_info->set_weight[ gname ] = set_weight;\n    }\n    \n    n_anno_read++, lineread++;\n  }\n\n  myfile.closeFile();\n\n  if(n_anno_read == 0){\n    string msg_err = \"annotation information could not be read. Perhaps check variant IDs matches those in the genotype file?\";\n    if ( params->mask_loo || params->mask_lodo) msg_err.append( \" (Or set name for LOVO/LODO matches with that in annotation file?)\" );\n    throw msg_err;\n  }\n  if(!params->w_anno_lab) {\n    if(ncat == 0)\n      throw \"there are no annotation categories read from file.\";\n    sout << \"   +number of annotations categories = \" << ncat << endl;\n  }\n  if(params->w_regions) {\n    if(nregions == 0)\n      throw \"there are no domains read from file.\";\n    sout << \"   +number of domains across all sets = \" << nregions << endl;\n  }\n\n}\n\nvoid read_aafs(const double tol, const struct in_files* files, struct filter* filters, vector<snp>& snpinfo, bool const& wSingletons, mstream& sout) {\n\n  int lineread = 0, id_col = 0, aaf_col = 1, singleton_col = -1, npass = 0;\n  float aaf;\n  uint32_t snp_pos, ncols_min = 2;\n  std::vector< string > tmp_str_vec ;\n  string line, sname;\n  Files myfile;\n\n  sout << left << std::setw(20) << \" * user-given AAFs \" << \": [\" << files->aaf_file << \"] \" << endl;\n  myfile.openForRead (files->aaf_file, sout);\n\n  // check if there is a header line\n  myfile.readLine(line);\n  removeCarriageReturn( line );\n  tmp_str_vec = string_split(line,\"\\t ,\");\n  if( tmp_str_vec.size() < ncols_min ) \n    throw \"incorrectly formatted file at line \" + to_string( lineread+1 );\n  if( startswith(tmp_str_vec[0].c_str(), \"#\") ){\n    // find ID column\n    id_col = find_col(tmp_str_vec, \"ID\");\n    // find AAF column\n    aaf_col = find_col(tmp_str_vec, \"ALT_FREQS\");\n    // check if columns were found\n    if( (id_col < 0) || (aaf_col < 0) ) throw \"could not find 'ID' or 'ALT_FREQS' in header\";\n    ncols_min = max(id_col, aaf_col) + 1;\n    if(wSingletons) throw \"cannot use --set-singleton with PLINK AAF file\";\n  } else {\n    if(wSingletons) {\n      if(tmp_str_vec.size() < 3) throw \"not enough columns in AAF file in line 1\";\n      singleton_col = 2;\n      sout << left << std::setw(20) << \"  -using third column to identify singleton variants\\n\";\n      ncols_min = 3;\n    }\n    if (in_map(tmp_str_vec[id_col], filters->snpID_to_ind)){ // read in AAF for variant\n      snp_pos = filters->snpID_to_ind[ tmp_str_vec[id_col] ];\n      try {\n        aaf = stof( tmp_str_vec[aaf_col] );\n      } catch(...) {\n        throw \"only numerical values are allowed in column #\" + to_string(aaf_col) + \" (='\" + tmp_str_vec[aaf_col] + \"')\";\n      }\n      snpinfo[ snp_pos ].aaf = aaf;\n      if(wSingletons) snpinfo[ snp_pos ].force_singleton = check_singleton_column( tmp_str_vec[singleton_col] );\n      npass++;\n    }\n  }\n  lineread++;\n\n\n  while (myfile.readLine(line)) {\n\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t ,\");\n\n    if( tmp_str_vec.size() < ncols_min ) \n      throw \"incorrectly formatted file at line \" + to_string( lineread+1 );\n\n    // name of variant\n    sname = tmp_str_vec[id_col];\n\n    // check it is in genotype file\n    if (!in_map(sname, filters->snpID_to_ind)) {\n      lineread++;\n      continue;\n    }\n    snp_pos = filters->snpID_to_ind[ sname ];\n\n    try {\n      aaf = stof( tmp_str_vec[aaf_col] );\n    } catch(...) {\n      throw \"only numerical values are allowed in column #\" + to_string(aaf_col) + \" (='\" + tmp_str_vec[aaf_col] + \"')\";\n    }\n\n    /* // not necessary (other checks to remove monomorphic masks)\n    if( (aaf < tol) || (aaf > (1-tol)) )\n      throw \"invalid AAF given at line \" + to_string( lineread+1 );\n    */\n    snpinfo[ snp_pos ].aaf = aaf;\n    if(wSingletons) snpinfo[ snp_pos ].force_singleton = check_singleton_column( tmp_str_vec[singleton_col] );\n\n    npass++;\n    lineread++;\n  }\n\n  myfile.closeFile();\n  if( !npass ) throw \"could not process any variant in the AAF file\";\n\n}\n\nbool check_singleton_column(string const& col_str){\n\n  if(col_str == \"0\") return false;\n  else if(col_str == \"1\") return true;\n  else throw \"unindentified value in third column ('=\" + col_str + \"')\";\n\n}\n\nvoid read_masks(const struct in_files* files, struct param* params, map<string, anno_name>& anno_map, vector<maskinfo>& minfo, std::vector <std::vector<string>>& mask_out, uint64& all_masks, mstream& sout) {\n\n  bool valid_mask;\n  int lineread = 0, ncat = 0, n_with_missing = 0, n_non_valid = 0;\n  uint64 id;\n  maskinfo tmp_mask;\n  std::vector< string > tmp_str_vec, mask_str, anno_problem;\n  mask_str.resize(2);\n  string line;\n  Files myfile;\n  ofstream report_file;\n\n  sout << left << std::setw(20) << \" * masks \" << \": [\" << files->mask_file << \"] \" << flush;\n  myfile.openForRead (files->mask_file, sout);\n\n  if(params->check_mask_files) {\n    line = files->out_file + \"_masks_report.txt\";\n    openStream(&report_file, line, ios::out, sout);\n    report_file << \"## mask file: [\" << files->mask_file << \"]\\n## list of unknown annnotations in mask file\\n\";\n  }\n\n  while (myfile.readLine(line)) {\n\n    valid_mask = true;\n    id = 0ULL;\n    if(params->check_mask_files) anno_problem.resize(0);\n\n    removeCarriageReturn( line );\n    tmp_str_vec = string_split(line,\"\\t ,\");\n    ncat = tmp_str_vec.size() - 1;\n\n    if( ncat < 1 ) \n      throw \"incorrectly formatted file at line \" + to_string( lineread+1 );\n\n    // mask name\n    tmp_mask.name = tmp_str_vec[0];\n    mask_str[0] = tmp_mask.name;\n\n    // check if using LOO (then single mask)\n    if((params->mask_loo || params->mask_lodo) && (params->mask_loo_name != tmp_mask.name)) {\n      lineread++;\n      continue;\n    }\n\n    // go through each category to define mask\n    std::vector< string > s_vec;\n    for(int i = 0; i < ncat; i++){\n\n      // check it is in map\n      if (!in_map(tmp_str_vec[i+1], anno_map)) {\n        if( tmp_str_vec[i+1].size() > 0 ){\n          valid_mask = false;\n          if(params->strict_check_burden) params->fail_check = true;\n          if(params->check_mask_files) anno_problem.push_back(tmp_str_vec[i+1]);\n        }\n        continue;\n      }\n      s_vec.push_back( anno_map[ tmp_str_vec[i+1] ].name );\n\n      // set bit for category\n      id |= anno_map[ tmp_str_vec[i+1] ].id;\n    }\n\n    if(!valid_mask) { // one of the categories is unrecognized\n      if(params->check_mask_files)\n        report_file << tmp_mask.name << \" \" << print_csv(anno_problem) << endl;\n      if(id == 0) { n_non_valid++; continue; }\n      else n_with_missing++;\n    }\n\n    tmp_mask.id = id;\n    mask_str[1] = print_csv( s_vec );\n    //if(lineread<5)cerr << tmp_mask.name << \"--\" << tmp_mask.id << endl; \n\n    // save mask\n    mask_out.push_back(mask_str);\n    minfo.push_back(tmp_mask);\n    params->mask_map[ tmp_mask.name ] = true;\n\n    // take union across all categories read\n    all_masks |= id;\n\n    lineread++;\n  }\n\n  myfile.closeFile();\n\n  sout << \"n_masks = \" << minfo.size() << endl;\n\n  // report\n  if(n_with_missing > 0) sout << \"WARNING: Detected \" << n_with_missing << \" masks with unknown annotations.\\n\";\n  if(n_non_valid > 0) sout << \"WARNING: Detected \" << n_non_valid << \" masks with only unknown annotations (these are ignored).\\n\";\n  if(params->check_mask_files) {\n    report_file << \"->Detected \" << n_with_missing << \" masks with unknown annotations.\\n\";\n    report_file << \"->Detected \" << n_non_valid << \" masks with only unknown annotations.\\n\";\n    report_file.close();\n  }\n\n  if(minfo.size() == 0)\n    throw \"no masks are left to be included in the analysis.\";\n}\n\n\n// read a single variant\nvoid read_snp(bool const& mean_impute, uint64 const& offset, Ref<ArrayXd> Geno, Ref<ArrayXb> mask, const Eigen::Ref<const ArrayXb>& ind_ignore, struct in_files* files, PgenReader& pgr, struct param* params, bool const& check_miss){\n\n  Geno = 0;\n  if(params->file_type == \"bed\")\n    read_snp_bed(offset, Geno, mask, ind_ignore, files, params);\n  else if(params->file_type == \"pgen\")\n    read_snp_pgen(offset, Geno, mask, pgr, params->dosage_mode);\n  else\n    read_snp_bgen(offset, Geno, mask, ind_ignore, files->bgen_file, params->ref_first, 0);\n\n  if(check_miss){\n    // mask missing or impute with mean\n    if(mean_impute) {\n      double meanG = (mask && (Geno != -3)).select(Geno,0).sum() / (mask && (Geno != -3)).count();\n      Geno = (mask && (Geno == -3)).select(meanG, Geno); \n    } else  mask = (Geno != -3).select(mask, false);\n  }\n\n}\n\nvoid read_snp_bed(uint64 const& offset, Ref<ArrayXd> Geno, Ref<ArrayXb> mask, const Eigen::Ref<const ArrayXb>& ind_ignore, struct in_files* files, struct param* params){\n\n  int hc;\n  uint32_t const nmax = ind_ignore.size();\n  uint32_t i = 0, index = 0;\n  ArrayXd geno4; // genotype values for 4 samples at a time\n\n  // set to correct position\n  jumpto_bed(offset, files->bed_block_size, files->geno_ifstream);\n  files->geno_ifstream.read( reinterpret_cast<char *> (&files->inbed[0]), files->bed_block_size);\n\n  for (size_t byte_start = 0; byte_start < files->bed_block_size; byte_start++) {\n\n    geno4 = params->bed_lookup_table[ files->inbed[byte_start] ];\n\n    for(int bit_start = 0; bit_start < 4; bit_start++, i++){\n\n      // skip remainder past N samples\n      if(i >= nmax) break;\n\n      // skip samples that were ignored from the analysis\n      if( ind_ignore(i) ) continue;\n\n      if(mask(index)){\n        hc = geno4(bit_start);\n        if(params->ref_first && (hc != -3)) hc = 2 - hc;\n        Geno(index) = hc;\n      }\n\n      index++;\n    }\n  }\n\n}\n\nvoid read_snp_pgen(uint64 const& offset, Ref<ArrayXd> Geno, Ref<ArrayXb> mask, PgenReader& pgr, bool const& dosage_mode){\n\n  // read genotype data\n  if( dosage_mode )\n    pgr.Read(Geno.data(), Geno.size(), 0, offset, 1);\n  else\n    pgr.ReadHardcalls(Geno.data(), Geno.size(), 0, offset, 1);\n\n  Geno *= mask.cast<double>();\n\n}\n\n// using bgen library API\n// ttype is 0: add, 1:dom, 2:rec\nvoid read_snp_bgen(uint64 const& offset, Ref<ArrayXd> Geno, Ref<ArrayXb> mask, const Eigen::Ref<const ArrayXb>& ind_ignore, string const& bgen_file, bool const& ref_first, int const& ttype){\n\n  uint32_t index = 0;\n  double ds;\n  std::string chromosome, rsid;\n  uint32_t position ;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  // open file\n  BgenParser bgen_tmp;\n  bgen_tmp.open( bgen_file ) ;\n  bgen_tmp.jumpto( offset );\n  bgen_tmp.read_variant( &chromosome, &position, &rsid, &alleles );\n  bgen_tmp.read_probs( &probs ) ;\n\n  for( std::size_t i = 0; i < probs.size(); ++i ) {\n    // skip samples that were ignored from the analysis\n    if( ind_ignore(i) ) continue;\n\n    if(mask(index)){\n\n      // get dosage from file\n      ds = 0;\n      for( std::size_t j = 1; j < probs[i].size(); ++j ) ds += probs[i][j] * j;\n\n      // coding\n      if(ds!= -3){\n        if(ttype == 1) //dominant\n          ds = ref_first ? (probs[i][1] + probs[i][2]) : (probs[i][0] + probs[i][1]);\n        else if(ttype == 2) //recessive\n          ds = ref_first ? probs[i][2] : probs[i][0];\n        else // additive\n          ds = ref_first ? ds : (2 - ds); // if ref-first, no need to switch\n      }\n\n      Geno(index) = ds;\n    }\n\n    index++;\n  }\n\n}\n\n// check how to code G_E variant: add/dom/rec/cat\nvoid code_snp(MatrixXd& Gcov, Ref<ArrayXb> mask, uint64 const& offset, struct filter* filters, struct in_files* files, struct param* params, mstream& sout){\n\n  string const ttype = filters->interaction_cov_null_level;\n\n  if( (ttype == \"add\" ) || (ttype == \"add-homdev\" ) || (ttype.size() == 0) ){ // additive\n    params->add_homdev = (ttype == \"add-homdev\");\n    if(params->gwas_condtl && params->add_homdev)\n      throw \"'add-homdev' coding cannot be used with --force-condtl\";\n    return;\n  }\n\n  MapArXd Geno ( Gcov.col(0).data(), params->n_samples, 1);\n\n  if(ttype == \"dom\"){ // dominant\n\n    if(params->file_type == \"bgen\")\n      read_snp_bgen(offset, Geno, mask, filters->ind_ignore, files->bgen_file, params->ref_first, 1);\n    else {\n\n      if((params->file_type == \"pgen\") && params->dosage_mode)\n        sout <<  \"     +converting dosages to hardcalls for the variant\\n\";\n      Geno = Geno.round(); // convert to hardcalls\n      Geno = (mask && (Geno >= 1)).cast<double>();\n\n    }\n\n  } else if(ttype == \"rec\"){ //recessive\n\n    if(params->file_type == \"bgen\")\n      read_snp_bgen(offset, Geno, mask, filters->ind_ignore, files->bgen_file, params->ref_first, 2);\n    else {\n\n      if((params->file_type == \"pgen\") && params->dosage_mode)\n        sout <<  \"     +converting dosages to hardcalls for the variant\\n\";\n      Geno = Geno.round(); // convert to hardcalls\n      Geno = (mask && (Geno == 2)).cast<double>();\n\n    }\n\n  } else if(ttype == \"cat\"){ // categorical\n\n    // convert to hardcalls\n    Geno = Geno.round();\n\n    // create 2 dummy predictors for 1 and 2\n    MatrixXd newGeno = MatrixXd::Zero(params->n_samples, 2);\n    newGeno.col(0).array() = (mask && (Geno == 1)).cast<double>();\n    newGeno.col(1).array() = (mask && (Geno == 2)).cast<double>();\n\n    Gcov = newGeno;\n    params->interaction_cat = true;\n    vector<string> vecstr{ \"1\",\"2\" };\n    params->interaction_lvl_names = vecstr;\n\n  } else throw \"unrecognized coding for GxG variant (can be either add/dom/rec/cat/add-homdev).\";\n\n}\n\n\nvoid get_conditional_vars(map<string, uint64>& snps, struct in_files* files, struct param const* params, mstream& sout) {\n\n  string line;\n  std::vector< string > tmp_str_vec ;\n  Files myfile;\n\n  myfile.openForRead (files->condition_snps_list, sout);\n\n  // get list of variants\n  while( myfile.readLine(line) ){\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec.size() < 1 )\n      throw \"incorrectly formatted file (\" + files->condition_snps_list + \")\";\n\n    snps[tmp_str_vec[0]] = 0;\n  }\n\n  myfile.closeFile();\n\n  if(snps.size() > params->max_condition_vars)\n    throw \"number of variants used for conditional analysis is greater than maximum of \" + to_string(params->max_condition_vars) + \" (otherwise use --max-condition-vars)\";\n  else if(snps.size() == 0)\n    throw \"no variants for conditional analysis given in file \" + files->condition_snps_list;\n\n  // make sure variants will be ignored by adding to exclude list\n  files->file_snps_exclude.push_back(files->condition_snps_list);\n\n}\n\nvoid get_snps_offset(map<string, uint64>& snps, map<string, uint32_t>& index_map, vector<snp> const& snpinfo, mstream& sout){\n\n  uint32_t nstart = snps.size();\n  std::map <std::string, uint64> snps_found;\n  std::map <std::string, uint64>::iterator itr;\n\n  for (itr = snps.begin(); itr != snps.end(); ++itr) {\n    if (in_map(itr->first, index_map))\n      snps_found[ itr->first ] = snpinfo[ index_map[ itr->first ] ].offset;\n  }\n\n  snps = snps_found;\n\n  if(snps.size() == 0)\n    throw \"none of the variants were found in the genotype file\";\n  else if(snps.size() != nstart) // enforce this\n    throw to_string( nstart - snps.size() ) + \" of the variants could not be found in the genotype file\";\n\n}\n\nvoid get_snps_offset(map<string, uint64>& snps, map<string, vector<uint64>>& index_map, mstream& sout){\n\n  std::map <std::string, uint64> snps_found;\n  std::map <std::string, uint64>::iterator itr;\n\n  for (itr = snps.begin(); itr != snps.end(); ++itr) \n    if (in_map(itr->first, index_map))\n      snps_found[ itr->first ] = index_map[ itr->first ][0];\n\n  snps = snps_found;\n\n  if(snps.size() == 0)\n    throw \"none of the conditional variants were found in the genotype file\";\n\n}\n\nvoid extract_from_genofile(string const& setting, Ref<MatrixXd> Gmat, bool const& mean_impute, Ref<ArrayXb> mask, struct filter* filters, struct in_files* files, struct param* params, mstream& sout){\n\n  ext_geno_info geno_info;\n  map <string, vector<uint64>> tmp_map;\n  map <string, uint64> tmp_map_inter;\n  uint32_t nstart;\n  // pointers to various information\n  geno_file_info* ext_file_info;\n  map <string, uint64>* variant_names;\n  \n  if(setting == \"interaction\") {\n    nstart = 1;\n    ext_file_info = &(files->interaction_snp_info);\n    tmp_map_inter[filters->interaction_cov] = 0;\n    variant_names = &(tmp_map_inter);\n  } else if(setting == \"conditional\") {\n    nstart = filters->condition_snp_names.size();\n    ext_file_info = &(files->condition_snps_info);\n    variant_names = &(filters->condition_snp_names);\n  } else throw \"unrecognized input to extract from external genotype file\";\n\n\n  if(ext_file_info->format == \"bgen\")\n    setup_bgen(setting, geno_info, ext_file_info, variant_names, tmp_map, mask, files, params, filters, sout);\n  else if(ext_file_info->format == \"pgen\")\n    setup_pgen(geno_info, ext_file_info, tmp_map, mask, params, sout);\n  else if(ext_file_info->format == \"bed\")\n    setup_bed(geno_info, ext_file_info, tmp_map, mask, params, sout);\n\n  if(params->debug) cerr << geno_info.sample_keep.count() << \" \" << tmp_map.size() << \"\\n\\n\" << geno_info.sample_index.head(10) << \"\\n\\n\";\n\n  \n  if(!ext_file_info->with_bgi) { // filter using map\n    get_snps_offset((*variant_names), tmp_map, sout);\n    if(setting == \"interaction\") params->ltco_chr = tmp_map[filters->interaction_cov][1];\n  }\n\n  // check number of variants\n  if(variant_names->size() > params->max_condition_vars)\n    throw \"number of variants used for conditional analysis is greater than maximum of \" + to_string(params->max_condition_vars) + \" (otherwise use --max-condition-vars)\";\n  else if(variant_names->size() != nstart)\n    throw to_string( nstart - variant_names->size() ) + \" of the variants could not be found in the genotype file\";\n\n  if(setting == \"conditional\") \n    sout <<  \"      -n_used = \" << variant_names->size() << endl;\n  Gmat.array() = -3; // set all to missing\n\n  // read in variants & impute if missing\n  // note variant with all missing will be captured in intercept\n  if((ext_file_info->format == \"bgen\") && geno_info.streamBGEN)\n    read_snps_bgen(mean_impute, (*variant_names), Gmat, geno_info, mask, ext_file_info->file, params);\n  else if(ext_file_info->format == \"bgen\")\n    read_snps_bgen(mean_impute, (*variant_names), Gmat, geno_info, mask, ext_file_info->file);\n  else if(ext_file_info->format == \"pgen\")\n    read_snps_pgen(mean_impute, (*variant_names), Gmat, geno_info, mask);\n  else if(ext_file_info->format == \"bed\")\n    read_snps_bed(mean_impute, (*variant_names), Gmat, geno_info, mask, ext_file_info->file, params, sout);\n\n  // if did not impute missing with mean, mask samples\n  if(!mean_impute){\n    mask = ((Gmat.array() == -3).rowwise().any()).select(false, mask);\n    Gmat.array().colwise() *= mask.cast<double>();\n  }\n\n  // if ref-first for GxG (inactive for pgen)\n  // bug fix: bgen api already assumes ref-first\n  if((setting == \"interaction\") && files->interaction_snp_info.ref_first && !((ext_file_info->format == \"bgen\") && !geno_info.streamBGEN))\n    Gmat.array() = (2 - Gmat.array()).colwise() * mask.cast<double>();\n\n}\n\n// for conditional analyses\nvoid setup_bgen(string const& setting, struct ext_geno_info& ginfo, geno_file_info* ext_file_info, map <string, uint64>* variant_names, map<string, vector<uint64>>& index_map, Ref<ArrayXb> mask, struct in_files* files, struct param* params, struct filter* filters, mstream& sout){\n\n  sout << \"      -extracting variants from file [\" << ext_file_info->file << \"]\\n\";\n\n  int chrom;\n  uint32_t lineread = 0;\n  uint BGENbits;\n  uint64 offset;\n  std::vector< string > tmp_ids ;\n  std::vector< uint64 > tmp_v = std::vector< uint64 >(2);\n  BgenParser bgen_tmp;\n\n  uint32_t position ;\n  std::string chromosome, rsid, msg;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  // check if can use faster file stream\n  check_bgen(ext_file_info->file, ext_file_info->format, ginfo.zlib_compress, ginfo.streamBGEN, BGENbits, params->nChrom);\n\n  // open file and print file info\n  bgen_tmp.open( ext_file_info->file ) ;\n\n  // get info for variants\n  if( ext_file_info->with_bgi ) read_bgi_file(setting, bgen_tmp, ext_file_info, variant_names, params, sout);\n  else {\n    offset = bgen_tmp.get_position();\n    while(bgen_tmp.read_variant( &chromosome, &position, &rsid, &alleles )) {\n\n      assert(alleles.size() == 2) ; // only bi-allelic allowed\n      // check phasing for first variant\n      if(lineread++ == 0){\n        bgen_tmp.read_probs( &probs ) ;\n        if( probs[0].size() != 3 ) // unphased only \n          throw \"only unphased bgen are supported.\";\n      } else bgen_tmp.ignore_probs();\n\n      chrom = chrStrToInt(chromosome, params->nChrom);\n      if (chrom <= 0) \n        throw \"unknown chromosome code in bgen file.\";\n\n      // make list of variant IDs\n      tmp_v[0] = offset, tmp_v[1] = chrom;\n      index_map[ rsid ] = tmp_v;\n\n      offset = bgen_tmp.get_position();\n    }\n\n  }\n\n  // get sample IDs (from sample file or directly from bgen file)\n  if( ext_file_info->with_sample ) {\n    read_bgen_sample(ext_file_info->sample, tmp_ids, sout);\n  } else {\n    bgen_tmp.get_sample_ids(\n        [&tmp_ids]( std::string const& id ) { tmp_ids.push_back( id ) ; } );\n  }\n\n  // check if included in the analysis (if yes, store IDs)\n  ginfo.sample_keep.resize(tmp_ids.size());\n  ginfo.sample_index.resize(tmp_ids.size());\n  for(size_t i = 0; i < tmp_ids.size(); i++) {\n    ginfo.sample_keep(i) = in_map(tmp_ids[i], params->FID_IID_to_ind); \n    if(ginfo.sample_keep(i)) {\n      position = params->FID_IID_to_ind[ tmp_ids[i] ];\n      if(mask(position)) // analyzed sample\n        ginfo.sample_index(i) = position;\n      else\n        ginfo.sample_keep(i) = false; \n    }\n  }\n\n  if(ginfo.sample_keep.count() == 0)\n    throw \"none of the analyzed samples are present in the file\";\n\n}\n\n// fast streaming\nvoid read_snps_bgen(bool const& mean_impute, map<string, uint64>& snp_map, Ref<MatrixXd> Gmat, struct ext_geno_info& ginfo, Ref<ArrayXb> mask, string const& bgen_file, struct param* params){\n\n  int bs = snp_map.size();\n  vector< vector < uchar > > snp_data_blocks;\n  vector< uint32_t > insize, outsize;\n  vector<uint64> indices;\n  ArrayXb read_error = ArrayXb::Constant(bs, false);\n  std::map <std::string, uint64>::iterator itr;\n  std::ifstream bgen_ifstream;\n\n  snp_data_blocks.resize( bs );\n  insize.resize( bs );\n  outsize.resize( bs );\n  indices.reserve( bs );\n  for (itr = snp_map.begin(); itr != snp_map.end(); ++itr)\n    indices.push_back(itr->second);\n  std::sort(indices.begin(), indices.end());// sort indices to read in order\n\n  bgen_ifstream.open( bgen_file, ios::in | ios::binary);\n  readChunkFromBGEN(&bgen_ifstream, insize, outsize, snp_data_blocks, indices);\n\n\n  // unpack data for each variant\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int isnp = 0; isnp < bs; isnp++) {\n\n    uint minploidy = 0, maxploidy = 0, phasing = 0, bits_prob = 0;\n    uint16_t numberOfAlleles = 0 ;\n    uint32_t nindivs = 0, index;\n    string tmp_buffer;\n    vector<uchar>* geno_block = &snp_data_blocks[isnp];\n\n    // set genotype data block\n    vector < uchar > geno_block_uncompressed;\n    geno_block_uncompressed.resize(outsize[isnp]);\n\n    // uncompress the block\n    bool compress_fail;\n    if(ginfo.zlib_compress){ // using zlib\n      uLongf dest_size = outsize[isnp];\n      compress_fail = (uncompress( &(geno_block_uncompressed[0]), &dest_size, &((*geno_block)[0]), insize[isnp] - 4) != Z_OK) || (dest_size != outsize[isnp]);\n    } else { // using zstd\n      size_t const dest_size = ZSTD_decompress(&(geno_block_uncompressed[0]), outsize[isnp], &((*geno_block)[0]), insize[isnp] - 4) ;\n      compress_fail = (dest_size != outsize[isnp]);\n    }\n    // check it was successful\n    if( compress_fail ){\n      read_error(isnp) = true;\n      continue; // don't use throw as not thread-safe\n    }\n\n    // stream to uncompressed block\n    uchar *buffer = &geno_block_uncompressed[0];\n    // sample size in file\n    std::memcpy(&nindivs, &(buffer[0]), 4);\n    buffer += 4;\n    // num alleles\n    std::memcpy(&numberOfAlleles, &(buffer[0]), 2);\n    assert( numberOfAlleles == 2 );\n    buffer += 2;\n    // ploidy\n    std::memcpy(&minploidy, &(buffer[0]), 1);\n    assert( minploidy == 2 );\n    buffer ++;\n    std::memcpy(&maxploidy, &(buffer[0]), 1);\n    assert( maxploidy == 2 );\n    buffer ++;\n    //to identify missing when getting dosages\n    vector < uchar > ploidy_n;\n    ploidy_n.resize( nindivs );\n    std::memcpy(&(ploidy_n[0]), &(buffer[0]), nindivs);\n    buffer += nindivs;\n    // phasing\n    std::memcpy(&phasing, &(buffer[0]), 1);\n    assert( phasing == 0 );\n    buffer++;\n    // bits per probability\n    std::memcpy(&bits_prob, &(buffer[0]), 1);\n    assert( bits_prob == 8 );\n    buffer++;\n\n    // get dosages \n    int ns = 0;\n    double prob0, prob1, total = 0;\n    MapArXd Geno (Gmat.col(isnp).data(), Gmat.rows(), 1);\n\n    // parse genotype probabilities block\n    for(size_t i = 0; i < nindivs; i++) {\n\n      // skip samples that were ignored from the analysis\n      if( !ginfo.sample_keep(i) ) {\n        buffer+=2;\n        continue;\n      }\n      index = ginfo.sample_index(i);\n\n      if(ploidy_n[i] & 0x80) {\n        Geno(index) = -3;\n        buffer+=2;\n        continue;\n      }\n\n      prob0 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n      prob1 = double((*reinterpret_cast< uint8_t const* >( buffer++ ))) / 255.0;\n      Geno(index) = prob1 + 2 * prob0;\n\n      if( Geno(index) != -3 ){\n          total += Geno(index);\n          ns++;\n      }\n    }\n\n    if(ns==0) {Geno=-3; continue;} // mask all samples\n    if(mean_impute) mean_impute_g(total/ns, Geno, mask);\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  bgen_ifstream.close();\n  if(read_error.any())\n    throw \"failed to decompress genotype data block.\";\n\n}\n\nvoid read_snps_bgen(bool const& mean_impute, map<string, uint64>& snp_map, Ref<MatrixXd> Gmat, struct ext_geno_info& ginfo, Ref<ArrayXb> mask, string const& bgen_file){\n\n  int index = 0, count = 0;\n  double ds, total, ns;\n  std::map <std::string, uint64>::iterator itr;\n\n  std::string chromosome, rsid;\n  uint32_t position ;\n  std::vector< std::string > alleles ;\n  std::vector< std::vector< double > > probs ;\n\n  // open file\n  BgenParser bgen_tmp;\n  bgen_tmp.open( bgen_file ) ;\n\n  for (itr = snp_map.begin(); itr != snp_map.end(); ++itr, count++) {\n\n    MapArXd Geno (Gmat.col(count).data(), Gmat.rows(), 1);\n\n    bgen_tmp.jumpto( itr->second );\n    bgen_tmp.read_variant( &chromosome, &position, &rsid, &alleles );\n    bgen_tmp.read_probs( &probs ) ;\n\n    total = 0, ns = 0;\n    for( std::size_t i = 0; i < probs.size(); ++i ) {\n      // skip samples that were ignored from the analysis\n      if( !ginfo.sample_keep(i) ) continue;\n      index = ginfo.sample_index(i);\n\n      // get dosage from file\n      ds = 0;\n      for( std::size_t j = 1; j < probs[i].size(); ++j ) ds += probs[i][j] * j;\n      // does not matter if ref-first/ref-last since used as covar\n\n      if(ds != -3) {total += ds; ns++;}\n      Geno(index) = ds;\n    }\n\n    if(ns==0) {Geno=-3; continue;} // mask all samples\n\n    // impute missing\n    if(mean_impute) mean_impute_g(total/ns, Geno, mask);\n\n  }\n\n}\n\nvoid setup_pgen(struct ext_geno_info& ginfo, geno_file_info* ext_file_info, map<string, vector<uint64>>& index_map, Ref<ArrayXb> mask, struct param* params, mstream& sout) {\n\n sout << \"      -extracting variants using PGEN file prefix [\" << ext_file_info->file << \"]\\n\";\n\n  uint32_t nv = read_pvar(index_map, ext_file_info, params, sout);\n  uint32_t ns = read_psam(ginfo, ext_file_info, mask, params, sout);\n  //cerr << \"Nsamples=\" << ns << \"\\tNvariants=\" << nv << endl;\n  prep_pgen(ns, nv, ginfo, ext_file_info);\n\n}\n\nvoid read_snps_pgen(bool const& mean_impute, map<string, uint64>& snp_map, Ref<MatrixXd> Gmat, struct ext_geno_info& ginfo, Ref<ArrayXb> mask){\n\n  int index = 0, count = 0;\n  double total, ns;\n  std::map <std::string, uint64>::iterator itr;\n  ArrayXd Gread (ginfo.sample_keep.count()); // some analyzed samples may not be in file\n\n  for (itr = snp_map.begin(); itr != snp_map.end(); ++itr, count++) {\n\n    MapArXd Geno (Gmat.col(count).data(), Gmat.rows(), 1);\n\n    // read genotype data\n    if( ginfo.dosage_mode )\n      ginfo.pgr.Read(Gread.data(), Gread.size(), 0, itr->second, 1);\n    else\n      ginfo.pgr.ReadHardcalls(Gread.data(), Gread.size(), 0, itr->second, 1);\n\n    total = 0, ns = 0;\n    for(int i_raw = 0, i = 0; i_raw < ginfo.sample_keep.size(); ++i_raw ) {\n      // skip samples that were ignored from the analysis\n      if( !ginfo.sample_keep(i_raw) ) continue;\n      index = ginfo.sample_index(i_raw);\n\n      if(Gread(i) != -3) {total += Gread(i); ns++;}\n      Geno(index) = Gread(i++);\n    }\n\n    if(ns==0) {Geno=-3; continue;} // mask all samples\n\n    // impute missing\n    if(mean_impute) mean_impute_g(total/ns, Geno, mask);\n\n  }\n\n}\n\n\nvoid setup_bed(struct ext_geno_info& ginfo, geno_file_info* ext_file_info, map<string, vector<uint64>>& index_map, Ref<ArrayXb> mask, struct param* params, mstream& sout) {\n\n sout << \"      -extracting variants using BED file prefix [\" << ext_file_info->file << \"]\\n\";\n\n  uint32_t nv = read_bim(index_map, ext_file_info, params, sout);\n  uint32_t ns = read_fam(ginfo, ext_file_info, mask, params, sout);\n  if(params->debug) cerr << \"Nsamples=\" << ns << \"\\tNvariants=\" << nv << endl;\n\n  // check if need to make lookup table\n  if(params->bed_lookup_table.size() == 0)\n    buildLookupTable(params->bed_lookup_table);\n}\n\n\nvoid read_snps_bed(bool const& mean_impute, map<string, uint64>& snp_map, Ref<MatrixXd> Gmat, struct ext_geno_info& ginfo, Ref<ArrayXb> mask, string const& bed_prefix, struct param* params, mstream& sout){\n\n  int index = 0, count = 0, hc;\n  uint32_t const nmax = ginfo.sample_keep.size();\n  uint64 bed_block_size;\n  double total, ns;\n  std::map <std::string, uint64>::iterator itr;\n  ArrayXd geno4; // genotype values for 4 samples at a time\n  ifstream bed_ifstream;\n  vector<uchar> inbed;\n\n  // open file and check header\n  uchar header[3];\n  string fname = bed_prefix + \".bed\";\n  openStream(&bed_ifstream, fname, std::ios::in | std::ios::binary, sout);\n  bed_ifstream.read( reinterpret_cast<char *> (&header[0]), 3);\n  if ( (header[0] != 0x6c) || (header[1] != 0x1b) || (header[2] != 0x01) ) \n    throw \"incorrect magic number in bed file.\";\n  // size of genotype block [(n+3)/4 = ceil(n/4.0)]\n  bed_block_size = (nmax+3)>>2;\n  inbed.resize( bed_block_size );\n\n\n  for (itr = snp_map.begin(); itr != snp_map.end(); ++itr, count++) {\n\n    MapArXd Geno (Gmat.col(count).data(), Gmat.rows(), 1);\n\n    // set to correct position\n    jumpto_bed(itr->second, bed_block_size, bed_ifstream);\n    bed_ifstream.read( reinterpret_cast<char *> (&inbed[0]), bed_block_size);\n    total = 0, ns = 0;\n\n    for (size_t byte_start = 0, i = 0; byte_start < bed_block_size; byte_start++) {\n\n      geno4 = params->bed_lookup_table[ inbed[byte_start] ];\n\n      for(int bit_start = 0; bit_start < 4; bit_start++, i++){\n\n        // skip remainder past N samples\n        if(i >= nmax) break;\n        // skip samples that were ignored from the analysis\n        if( !ginfo.sample_keep(i) ) continue;\n        index = ginfo.sample_index(i);\n\n        hc = geno4(bit_start);\n        if(hc != -3) {total += hc; ns++;}\n        Geno(index) = hc;\n      }\n    }\n\n    if(ns==0) {Geno=-3; continue;} // mask all samples\n\n    // impute missing\n    if(mean_impute) mean_impute_g(total/ns, Geno, mask);\n\n  }\n\n}\n"
  },
  {
    "path": "src/Geno.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef GENO_H\n#define GENO_H\n\n#if defined(__GNUC__)\n// turn off the specific warning\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wsign-compare\"\n#endif\n#include \"bgen_to_vcf.hpp\"\n#if defined(__GNUC__)\n#pragma GCC diagnostic pop\n#endif\n\n#include \"pgenlibr.h\"\n\nstruct annoinfo {\n  uint64 regionid = 0ULL;\n  uint64 id = 0ULL;\n};\n\nstruct snp {\n  int chrom;\n  std::string ID;\n  //double genpos; \n  uint32_t physpos;\n  std::string allele1, allele2;\n  double MAF;\n  uint64 offset;\n  // for masks\n  std::map <std::string, annoinfo> anno; // annotation\n  std::map <std::string, double> set_weight; // weight\n  float aaf = -1;\n  bool force_singleton = false; // for singleton masks\n  bool MAC_fail_if_checked = true; // for extract/exclude OR\n  bool apply_diff_MAC_filter = false; // for forced MAC filter\n};\n\nstruct tally {\n  uint32_t snp_count = 0;\n  uint32_t n_failed_tests = 0;\n  uint32_t n_ignored_snps = 0;\n  uint32_t n_ignored_tests = 0;\n};\n\n// for step 2 per thread\nstruct data_thread {\n  SpVec Gsparse;\n  Eigen::ArrayXd scale_fac_pheno;\n  Eigen::MatrixXd Gres;\n  Eigen::ArrayXd Gmod;\n  Eigen::ArrayXd denum;\n  Eigen::ArrayXd scores;\n  Eigen::ArrayXd cal_factor;\n  Eigen::ArrayXd stats;\n  Eigen::ArrayXd chisq_val;\n  Eigen::ArrayXd pval_log;\n  Eigen::ArrayXd bhat;\n  Eigen::ArrayXd se_b;\n  Eigen::ArrayXd skat_var;\n  // for spa\n  bool pos_score;\n  double val_a, val_b, val_c, val_d; \n  // firth\n  double dif_deviance;\n  Eigen::MatrixXd beta_null_firth;\n  // reset each time\n  bool fastSPA = true;\n  bool is_sparse = false;\n};\n\nstruct geno_block {\n  uint32_t ns, nv;\n  BgenParser bgen;\n  PgenReader pgr;\n  Eigen::MatrixXd Gmat;\n  Eigen::MatrixXd snp_afs;\n  std::vector<data_thread> thread_data;\n};\n\nstruct variant_block {\n  bool ignored, flipped;\n  int n_rr, n_aa, n_zero = -1;\n  double scale_fac, mac1, af1, info1, ns1, ns1_adj;\n  Eigen::ArrayXi ns, ns_case, ns_control, nmales, ns_case_adj;\n  Eigen::ArrayXd af, af_case, af_control, mac, info, cf_burden;\n  Eigen::MatrixXi genocounts;\n  ArrayXb ignored_trait;\n  ArrayXb test_fail;\n  ArrayXb is_corrected; // for firth/spa\n  // for masks\n  bool singleton = false;\n  int col_jmat_skat = -1;\n  bool skip_for_vc = true;\n  std::map <std::string, Eigen::MatrixXd> sum_stats_vc; // log10p & chisq for each vc test\n  std::string mask_name = \"\";\n  // for joint test\n  Eigen::ArrayXd pval_log;\n  // interaction test\n  bool skip_int, fitHLM;\n  ArrayXb is_corrected_inter; // for firth\n  ArrayXb test_fail_inter; // for firth\n  // multi-trait tests\n  std::vector<std::string> sum_stats_mt;\n  // MultiPhen test\n  std::vector<std::string> sum_stats_multiphen;\n  // association test info\n  std::vector<std::string> sum_stats;\n};\n\n// for conditional analyses\nstruct ext_geno_info {\n  bool dosage_mode, zlib_compress, streamBGEN;\n  PgenReader pgr;\n  ArrayXb sample_keep; // keep track of samples in analysis\n  Eigen::ArrayXi sample_index; // index of samples in analysis\n};\n\nstruct findID {\n  uint32_t index;\n  bool is_found;\n};\n\n\n\nvoid check_bgen(const std::string&,std::string const&,bool&,bool&,uint&,int const&);\nvoid prep_bgen(struct in_files*,struct param*,struct filter*,std::vector<snp>&,std::map<int,std::vector<int>>&,BgenParser&,mstream&);\nvoid read_bgen_sample(const std::string&,struct param*,std::vector<std::string> &,mstream&);\nvoid read_bgen_sample(const std::string&,std::vector<std::string> &,mstream&);\nvoid read_bgi_file(BgenParser&,struct in_files*,struct param*,struct filter*,std::vector<snp>&,mstream&);\nvoid read_bgi_file(std::string const&,BgenParser&,geno_file_info*,std::map<std::string,uint64>*,struct param*,mstream&);\n\nvoid read_bed_bim_fam(struct in_files*,struct param*,struct filter*,std::vector<snp>&,std::map<int,std::vector<int>>&,mstream&);\nvoid read_bim(struct in_files*,struct param*,struct filter*,std::vector<snp>&,mstream&);\nvoid read_fam(struct in_files*,struct param*,mstream&);\nvoid prep_bed(const uint32_t&, struct in_files*,mstream&);\n\nvoid read_pgen_pvar_psam(struct in_files*,struct param*,struct filter*,struct geno_block*,std::vector<snp>&,std::map<int,std::vector<int>>&,mstream&);\nuint64 read_pvar(struct in_files*,struct param*,struct filter*,std::vector<snp>&,mstream&);\nvoid read_psam(struct in_files*,struct param*,mstream&);\nvoid prep_pgen(struct in_files const*,struct filter const*,struct geno_block*,struct param*,mstream&);\n\nArrayXb check_in_map_from_files(std::map<std::string,uint>&,std::vector<std::string> const&,mstream&);\nArrayXb check_in_map_from_files(std::map<std::string,uint>&,std::vector<std::string> const&,struct param*,mstream&);\nArrayXb check_in_map_from_files_IDs(std::vector<std::string> const&,struct param*,mstream&);\nvoid check_snps_include_exclude(struct in_files*,struct param*,struct filter*,std::vector<snp>&,std::map<int,std::vector<int>>&,mstream&);\nvoid check_snps_include_exclude_or(struct in_files*,struct param*,struct filter*,std::vector<snp>&,mstream&);\nvoid check_forced_MAC_file(std::map<std::string,uint32_t>&,std::vector<snp>&,struct param*,mstream&);\nvoid check_samples_include_exclude(struct in_files const*,struct param*,struct filter*,mstream&);\nvoid check_ld_list(std::map<std::string,uint32_t>&,struct in_files*,struct param*,mstream&);\n\nvoid get_G(const int&,const int&,const int&,const uint32_t&,std::vector<snp> const&,struct param const*,struct in_files*,struct geno_block*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,mstream&);\n\nvoid readChunkFromBGENFileToG(const int&,const int&,const uint32_t&,std::vector<snp> const&,struct param const*,struct geno_block*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,mstream&);\nvoid readChunkFromBGENFileToG_fast(const int&,const int&,const uint32_t&,std::vector<snp> const&,struct param const*,struct in_files*,struct geno_block*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,mstream&);\nvoid readChunkFromBedFileToG(const int&,const int&,const uint32_t&,std::vector<snp> const&,struct param const*,struct in_files*,struct geno_block*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,mstream&);\nvoid readChunkFromPGENFileToG(const int&,const uint32_t&,std::vector<snp> const&,struct param const*,struct geno_block*,struct filter const*,const Eigen::Ref<const MatrixXb>&,mstream&);\n\nvoid readChunkFromBGENFileToG(std::vector<uint64> const&,const int&,std::vector<snp> const&,struct param const*,Eigen::Ref<Eigen::MatrixXd>,BgenParser&,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,std::vector<variant_block>&,mstream&);\nvoid readChunkFromBGEN(std::istream*,std::vector<uint32_t>&,std::vector<uint32_t>&,std::vector<std::vector<uchar>>&,std::vector<uint64>&);\nvoid parseSNP(const int&,const int&,std::vector<uchar>*,const uint32_t&,const uint32_t&,struct param const*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,const snp*,struct geno_block*,variant_block*,mstream&);\nvoid parseSnpfromBGEN(const int&,const int&,std::vector<uchar>*,const uint32_t&,const uint32_t&,struct param const*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,const snp*,struct geno_block*,variant_block*,mstream&);\nvoid parseSnpfromBed(const int&,const int&,const std::vector<uchar>&,struct param const*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,const snp*,struct geno_block*,variant_block*);\nvoid readChunkFromPGENFileToG(std::vector<uint64> const&,const int&,struct param const*,struct filter const*,Eigen::Ref<Eigen::MatrixXd>,PgenReader&,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,std::vector<snp> const&,std::vector<variant_block>&);\n\nvoid skip_snps(uint64 const&,struct param const*,struct in_files*,struct geno_block*);\nvoid jumpto_bed(uint64 const&,uint64 const&,std::ifstream&);\nvoid buildLookupTable(std::vector<Eigen::ArrayXd>&);\nvoid prep_snp_stats(variant_block*,struct param const*);\nvoid initialize_thread_data(std::vector<data_thread>&,struct param const&);\nvoid reset_thread(data_thread*,struct param const&);\nvoid reset_stats(variant_block*,struct param const&);\nvoid update_trait_counts(int const&,double const&,double const&,int const&,double const&,variant_block*,const Eigen::Ref<const MatrixXb>&);\nvoid update_genocounts(bool const&,int const&,int const&,Eigen::MatrixXd&,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&);\nvoid compute_genocounts(bool const&,bool const&,double const&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::MatrixXi>,const Eigen::Ref<const Eigen::ArrayXi>&,std::vector<std::vector<Eigen::ArrayXi>> const&);\nvoid update_genocounts(bool const&,bool const&,Eigen::Ref<Eigen::VectorXi>,const Eigen::Ref<const Eigen::ArrayXi>&,const Eigen::Ref<const Eigen::ArrayXd>&,std::vector<Eigen::ArrayXi> const&);\nvoid update_genocounts_sp(bool const&,bool const&,Eigen::Ref<Eigen::VectorXi>,const Eigen::Ref<const Eigen::ArrayXi>&,const Eigen::Ref<const Eigen::ArrayXd>&,std::vector<Eigen::ArrayXi> const&);\nvoid update_af_cc(int const&,double const&,variant_block*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&);\nvoid compute_mac(bool const&,double&,double const&,int const&,int const&,bool const&,bool const&,variant_block*,struct param const*);\nvoid compute_aaf_info(double&,double const&,bool const&,variant_block*,struct param const*);\nvoid flip_geno(double&,Eigen::Ref<Eigen::ArrayXd>,variant_block*,struct param const*);\nvoid check_sparse_G(int const&,int const&,struct geno_block*,uint32_t const&,const Eigen::Ref<const ArrayXb>&,int const&,const double&);\nvoid mean_impute_g(double &,const double&,const bool&);\nvoid mean_impute_g(const double&,Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const ArrayXb>&);\nvoid residualize_geno(int const&,int const&,variant_block*,bool const&,const Eigen::Ref<const Eigen::MatrixXd>&,struct geno_block*,struct param const*);\nvoid residualize_geno(const Eigen::Ref<const Eigen::MatrixXd>&,Eigen::Ref<Eigen::VectorXd>,variant_block*,struct param const&);\nvoid writeSnplist(std::string const&,int const&,int const&,std::vector<snp> const&,mstream&);\n\nbool in_chrList(const int&,struct filter const*);\nstd::string bgi_chrList(struct filter*,const int&);\nstd::string bgi_chrList(const int&,const int&);\nstd::string bgi_rsidList(std::map <std::string, uint64>&);\nbool in_range(int const&,uint32_t const&,struct param const*);\nbool in_non_par(int const&,uint32_t const&,struct param const*);\nfindID getIndivIndex(const std::string&,const std::string&,struct param*,mstream&);\n\n\n// for snp-set methods\nstruct vset {\n  int chrom = 0;\n  uint32_t physpos;\n  std::string ID;\n  std::vector<uint64> snp_indices;\n  MatrixXb Jmat; // MxKm for SKAT\n  ArrayXb  ultra_rare_ind; // Mx1 for SKAT\n  SpMat vc_rare_mask;// NxKm for SKAT\n  MatrixXb vc_rare_mask_non_missing;// NxKm for SKAT\n} ;\nstruct anno_name {\n  std::string name;\n  uint64 id;\n};\nstruct maskinfo {\n  std::string name, region_name = \"\";\n  uint64 id = 0ULL;\n  uint64 region = 0ULL;\n};\n\nvoid read_setlist(const struct in_files*,struct param*,struct filter*,std::vector<std::vector<vset>>&,std::vector<snp>&,const uint64,const double,mstream&);\nvoid check_sets_include_exclude(bool const&,const struct in_files*,struct param*,struct filter*,std::vector<std::vector<vset>>&,mstream&);\nvoid check_in_map_from_files_sets(bool const&,std::map<std::string,std::vector<int>>&,std::vector<std::string> const&,bool const&,mstream&);\n\nvoid get_masks_info(const struct in_files*,struct param*,struct filter*,std::map<std::string,anno_name>&,std::map <std::string, std::map <std::string,uint64>>&,std::vector<maskinfo>&,std::vector<std::vector<std::string>>&,uint64&,std::vector<snp>&,mstream& sout);\nvoid read_anno_cat(const struct in_files*,struct param*,std::map<std::string,anno_name>&,mstream& sout);\nvoid read_anno(struct param*,const struct in_files*,struct filter*,std::map<std::string,anno_name>&,std::map <std::string, std::map <std::string,uint64>>&,std::vector<snp>&,mstream& sout);\nvoid read_aafs(const double,const struct in_files*,struct filter*,std::vector<snp>&,bool const&,mstream& sout);\nbool check_singleton_column(std::string const&);\nvoid read_masks(const struct in_files*,struct param*,std::map<std::string,anno_name>&,std::vector<maskinfo>&,std::vector<std::vector<std::string>>&,uint64&,mstream& sout);\n\nvoid read_snp(bool const&,uint64 const&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<ArrayXb>,const Eigen::Ref<const ArrayXb>&,struct in_files*,PgenReader&,struct param*,bool const&);\nvoid read_snp_bed(uint64 const&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<ArrayXb>,const Eigen::Ref<const ArrayXb>&,struct in_files*,struct param*);\nvoid read_snp_pgen(uint64 const&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<ArrayXb>,PgenReader&,bool const&);\nvoid read_snp_bgen(uint64 const&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<ArrayXb>,const Eigen::Ref<const ArrayXb>&,std::string const&,bool const&,int const&);\nvoid code_snp(Eigen::MatrixXd&,Eigen::Ref<ArrayXb>,uint64 const&,struct filter*,struct in_files*,struct param*,mstream&);\n\n// for conditional analyses\nvoid get_conditional_vars(std::map<std::string,uint64>&,struct in_files*,struct param const*,mstream&);\nvoid get_snps_offset(std::map<std::string,uint64>&,std::map<std::string,uint32_t>&,std::vector<snp> const&,mstream&);\nvoid get_snps_offset(std::map<std::string,uint64>&,std::map<std::string,std::vector<uint64>>&,mstream&);\nvoid extract_from_genofile(std::string const&,Eigen::Ref<Eigen::MatrixXd>,bool const&,Eigen::Ref<ArrayXb>,struct filter*,struct in_files*,struct param*,mstream&);\nvoid setup_bgen(std::string const&,struct ext_geno_info&,geno_file_info*,std::map<std::string,uint64>*,std::map<std::string,std::vector<uint64>>&,Eigen::Ref<ArrayXb>,struct in_files*,struct param*,struct filter*,mstream&);\nvoid read_snps_bgen(bool const&,std::map<std::string,uint64>&,Eigen::Ref<Eigen::MatrixXd>,struct ext_geno_info&,Eigen::Ref<ArrayXb>,std::string const&,struct param*);\nvoid read_snps_bgen(bool const&,std::map<std::string,uint64>&,Eigen::Ref<Eigen::MatrixXd>,struct ext_geno_info&,Eigen::Ref<ArrayXb>,std::string const&);\nvoid setup_pgen(struct ext_geno_info&,geno_file_info*,std::map<std::string,std::vector<uint64>>&,Eigen::Ref<ArrayXb>,struct param*,mstream&);\nuint32_t read_pvar(std::map<std::string,std::vector<uint64>>&,geno_file_info*,mstream&);\nuint32_t read_psam(struct ext_geno_info&,geno_file_info*,Eigen::Ref<ArrayXb>,struct param*,mstream&);\nvoid prep_pgen(uint32_t&,uint32_t&,struct ext_geno_info&,geno_file_info*);\nvoid read_snps_pgen(bool const&,std::map<std::string,uint64>&,Eigen::Ref<Eigen::MatrixXd>,struct ext_geno_info&,Eigen::Ref<ArrayXb>);\nvoid setup_bed(struct ext_geno_info&,geno_file_info*,std::map<std::string,std::vector<uint64>>&,Eigen::Ref<ArrayXb>,struct param*,mstream&);\nuint32_t read_bim(std::map<std::string,std::vector<uint64>>&,geno_file_info*,mstream&);\nuint32_t read_fam(struct ext_geno_info&,geno_file_info*,Eigen::Ref<ArrayXb>,struct param*,mstream&);\nvoid prep_bed(uint32_t&,struct ext_geno_info&,struct in_files const*);\nvoid read_snps_bed(bool const&,std::map<std::string,uint64>&,Eigen::Ref<Eigen::MatrixXd>,struct ext_geno_info&,Eigen::Ref<ArrayXb>,std::string const&,struct param*,mstream&);\n\n#endif\n"
  },
  {
    "path": "src/HLM.cpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Pheno.hpp\"\n#include \"HLM.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\nusing namespace LBFGSpp;\nusing boost::math::normal;\nusing boost::math::chi_squared;\n\nHLM::HLM(){\n}\n\nHLM::~HLM(){\n}\n\nvoid HLM::prep_run(struct phenodt const* pheno_data, struct param const* params){\n\n  // store Vlin = (1, E) \n  allocate_mat(Vlin, pheno_data->interaction_cov.rows(), params->ncov_interaction + 1);\n  Vlin << MatrixXd::Ones(pheno_data->interaction_cov.rows(),1), pheno_data->interaction_cov; \n\n  if(params->hlm_vquad && params->int_add_extra_term){\n\n    // set V = (1, E, E^2) [ apply QR to U = (E,E^2), center & scale, and set V = (1, U)]\n    MatrixXd Vtmp (Vlin.rows(), params->ncov_interaction * 2);\n    Vtmp << pheno_data->interaction_cov, pheno_data->interaction_cov.array().square().matrix();\n    //cerr << \"pre:\\n\" << Vtmp.topRows(5) << \"\\n\\n\";\n    apply_QR(Vtmp, params, true);\n    allocate_mat(V, Vtmp.rows(), Vtmp.cols() + 1);\n    V << MatrixXd::Ones(Vtmp.rows(),1), Vtmp; \n    //cerr << \"post:\\n\" << V.topRows(5) << \"\\n\\n\";\n\n  } else {\n\n    // set V = (1, E) and center & scale E\n    allocate_mat(V, Vlin.rows(), Vlin.cols());\n    V = Vlin;\n    rescale_mat(V.rightCols(params->ncov_interaction), params); \n\n  }\n\n  // set X = (covs, E^2?, blup) - covs may include E\n  MatrixXd Xtmp (pheno_data->new_cov.rows(), params->ncov + (params->int_add_extra_term && !params->add_homdev ? params->ncov_interaction : 0 ));\n  if(params->int_add_extra_term && !params->add_homdev) {\n    //cerr << \"pre:\\n\" << Xtmp.topRows(5) << \"\\n\\n\";\n    Xtmp << pheno_data->new_cov, pheno_data->interaction_cov.array().square().matrix();\n    apply_QR(Xtmp, params, false);\n  } else\n    Xtmp = pheno_data->new_cov;\n  //cerr << \"post:\\n\" << Xtmp.topRows(5) << \"\\n\\n\";\n\n  allocate_mat(X, Xtmp.rows(), Xtmp.cols() + (params->skip_blups ? 0 : 1) );\n  X.leftCols(Xtmp.cols()) = Xtmp; \n\n  // for projection under null\n  Px.resize(params->n_pheno);\n  allocate_mat(yres, pheno_data->interaction_cov.rows(), params->n_pheno);\n\n}\n\n// For each phenotype, fit the null HLM \n//  Y = Xa + e, where e ~ N(0, exp(Vb) )\n//  Have this be outside of the class so can use LBFGS solver\nvoid HLM_fitNull(HLM& nullHLM, struct ests const& m_ests, struct phenodt const& pheno_data, struct in_files const& files, struct param const& params, mstream& sout){\n\n  // if no blup predictions are given, this should only be ran once\n  if(params.skip_blups && !nullHLM.first_fit) \n    return;\n\n  sout << \"   -fitting null HLMs for each trait...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n\n  double fx;\n  VectorXd beta(nullHLM.V.cols());\n  allocate_mat(nullHLM.Dinv_sqrt, pheno_data.phenotypes_raw.rows(), pheno_data.phenotypes_raw.cols());\n\n  LBFGSParam<double> bfgs_param;\n  bfgs_param.max_iterations = nullHLM.max_iter;\n  bfgs_param.max_linesearch = nullHLM.linesearch_try; // use more lenient number\n  LBFGSSolver<double> solver(bfgs_param);\n\n  \n  for(int i = 0; i < params.n_pheno; i++){\n    if( !params.pheno_pass(i) ) continue;\n\n    nullHLM.n = pheno_data.Neff(i);\n    nullHLM.mask = pheno_data.masked_indivs.col(i);\n    nullHLM.y = pheno_data.phenotypes_raw.col(i);\n    if(!params.skip_blups) // add blup as a covariate\n      nullHLM.X.rightCols(1) = m_ests.blups.col(i);\n    beta.array() = 0;\n\n    try {\n      \n      // get starting value for b\n      nullHLM.get_alpha(beta);\n      nullHLM.get_beta_approx(beta);\n      // LBFGS\n      solver.minimize(nullHLM, beta, fx);\n      nullHLM.store_null_est(i);\n      if(params.debug) nullHLM.check_gradient();\n\n    } catch(...){\n\n      // redo with higher number of line search trials\n      try {\n\n        if(params.debug) nullHLM.check_gradient();\n        if(params.verbose) \n          sout << \"Retrying HLM null model fitting for \" << files.pheno_names[i] << \"...\";\n\n        LBFGSParam<double> bfgs_param_retry;\n        bfgs_param_retry.max_iterations = nullHLM.max_iter_retry;\n        bfgs_param_retry.max_linesearch = nullHLM.linesearch_retry; \n        bfgs_param_retry.max_step = nullHLM.max_step_retry; \n        LBFGSSolver<double> solver_retry(bfgs_param_retry);\n\n        // set starting value for b\n        beta.array() = 0.01;\n        nullHLM.get_alpha(beta);\n        nullHLM.get_beta_approx(beta);\n        // LBFGS\n        solver_retry.minimize(nullHLM, beta, fx);\n        nullHLM.store_null_est(i);\n        if(params.debug) nullHLM.check_gradient();\n\n      } catch(...){\n        \n        // Final fallback with very conservative parameters\n        try {\n\n          if(params.debug) nullHLM.check_gradient();\n          if(params.verbose)\n            sout << \"Final fallback attempt for HLM null model fitting for \" << files.pheno_names[i] << \"...\";\n\n          LBFGSParam<double> bfgs_param_fallback;\n          bfgs_param_fallback.epsilon = 1e-4;      // Relax convergence\n          LBFGSSolver<double> solver_fallback(bfgs_param_fallback);\n\n          // set starting value for b\n          beta.array() = 0.02;\n          solver_fallback.minimize(nullHLM, beta, fx);\n          nullHLM.store_null_est(i);\n          if(params.debug) nullHLM.check_gradient();\n          \n        } catch(...){\n          if( nullHLM.check_gradient() ) continue; // if gradient is ok, then just continue\n          throw \"LBFGS could not fit HLM null model for trait \" + files.pheno_names[i] + \" even with fallback methods\";\n        }\n      }\n\n    }\n    //cerr << \"\\nFinal--\\nalpha=\\n\"<<nullHLM.alpha << \"\\n\\nbeta=\\n\" << beta <<\"\\n\\nfx=\" << fx << \"\\t\" << std::boolalpha << isnan(fx);\n\n  }\n\n  //cerr << \"\\n\\n\" << nullHLM.yres.topRows(5)<<\"\\n\\n\";\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n  nullHLM.first_fit = false;\n}\n\n\nvoid HLM::get_alpha(VectorXd const& beta){\n\n  Vb = (V * beta).array();\n  Dinv = (-Vb).exp() * mask.cast<double>();\n  if( (Dinv == 0).all() ) // will cause Xd = 0\n    throw std::underflow_error(\"D=0 occurred\");\n  MatrixXd Xd = (X.array().colwise() * Dinv).matrix().transpose();\n  alpha = (Xd * X).colPivHouseholderQr().solve( Xd * y );\n\n}\n\nvoid HLM::get_beta_approx(VectorXd& beta){\n\n  ArrayXd esq = ((y - X * alpha).array() * mask.cast<double>()).square();\n  //cerr << \"\\nE=\\n\" << esq.head(10) << \"\\n\\n\";\n  beta = (V.transpose() * esq.matrix().asDiagonal() * V).colPivHouseholderQr().solve( V.transpose() * ((esq - 1) * mask.cast<double>()).matrix() );\n  //cerr << \"alpha:\\n\" << alpha << \"\\n\\nbeta:\\n\" << beta <<\"\\n\\n\";\n\n}\n\n// get projection matrix\nvoid HLM::store_null_est(int const& ph){\n\n  Dinv_sqrt.col(ph) = Dinv.sqrt().matrix();\n  MatrixXd Xd = (X.array().colwise() * Dinv_sqrt.col(ph).array()).matrix();\n  SelfAdjointEigenSolver<MatrixXd> es(Xd.transpose() * Xd);\n  VectorXd eigD = es.eigenvalues();\n\n  Px[ph] = ((Xd * es.eigenvectors()).array().rowwise() / eigD.transpose().array().sqrt()).matrix();\n  //cerr << \"\\nP=\\n\" << Px[ph].topRows(5) << \"\\n\\n\";\n\n  residualize(ph, y, yres.col(ph));\n  \n}\n\nvoid HLM::residualize(int const& ph, Ref<MatrixXd> mat_orig, Ref<MatrixXd> mat_res){\n\n  MatrixXd m = (mat_orig.array().colwise() * Dinv_sqrt.col(ph).array()).matrix();\n  //cerr << \"Y\" << ph+1 << \"\\norig:\\n\" << print_mat_dims(mat_orig) << \n   // \"\\nm:\\n\" << print_mat_dims(m) << \"\\nPx:\\n\" << print_mat_dims(Px[ph]) << endl;\n  mat_res = m -  Px[ph] * (Px[ph].transpose() * m);\n\n}\n\nbool HLM::check_gradient() {\n  // This function is for debugging purposes to check the gradient after fitting the null model.\n  if( (Dinv == 0).all() ) return false; // will cause Xd = 0\n  Eigen::ArrayXd esq = ((y - X * alpha).array()).square();\n  VectorXd gradient = V.transpose() * ( ((1 - esq * Dinv) * mask.cast<double>()) / n ).matrix();\n  double max_grad = gradient.array().abs().maxCoeff();\n  cerr << \"max_grad : \" << max_grad << \"\\n\";\n  return max_grad < 2.5e-4;\n}\n"
  },
  {
    "path": "src/HLM.hpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n\n#ifndef HLM_H\n#define HLM_H\n\n#include <LBFGS.h>\n\n// to use heteroskedastic linear model\nclass HLM\n{\n\n  public:\n\n    int first_fit = true; // only run null model once if no blups\n    int max_iter = 100; // maximum number of iterations for LBFGS\n    int max_iter_retry = 500; // maximum number of iterations for LBFGS\n    int linesearch_try = 50; // number of linesearch trials\n    int linesearch_retry = 200; // max number of linesearch trials\n    int max_step_retry = 1000; // max step size\n    int n; // sample size for each trait\n\n    // to fit null\n    Eigen::MatrixXd X; // covariates for mean\n    Eigen::MatrixXd Vlin, V; // covariates for variance\n    Eigen::VectorXd y; // phenotype analyzed\n    Eigen::VectorXd alpha;\n    Eigen::ArrayXd Vb, Dinv;\n    ArrayXb mask;\n\n    // stored est from null\n    Eigen::MatrixXd Dinv_sqrt, yres;\n    std::vector<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> > Px; // for each trait\n\n    void prep_run(struct phenodt const*,struct param const*);\n    void get_alpha(Eigen::VectorXd const&);\n    void get_beta_approx(Eigen::VectorXd&);\n    void store_null_est(int const&);\n    void residualize(int const&,Eigen::Ref<Eigen::MatrixXd>,Eigen::Ref<Eigen::MatrixXd>);\n    bool check_gradient();\n\n    // functor to minimize\n    double operator()(Eigen::VectorXd const& beta, Eigen::VectorXd& gradient){\n\n      // get Vb, Dinv and alpha\n      get_alpha(beta);\n      Eigen::ArrayXd esq = ((y - X * alpha).array()).square();\n\n      // f = -2 ( ll/n + 0.5 log(2pi) )\n      double fval = ((Vb + Dinv * esq) * mask.cast<double>()).sum() / n;\n      // update gradient\n      gradient = V.transpose() * ( ((1 - esq * Dinv) * mask.cast<double>()) / n ).matrix();\n\n      return fval;\n    }\n\n    HLM();\n    ~HLM();\n\n};\n\n\nvoid HLM_fitNull(HLM& nullHLM, struct ests const&,struct phenodt const&,struct in_files const&,struct param const&,mstream&);\n\n#endif\n"
  },
  {
    "path": "src/Interaction.cpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"Pheno.hpp\"\n#include \"HLM.hpp\"\n#include \"Interaction.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\nusing boost::math::normal;\nusing boost::math::chi_squared;\n\nvoid get_interaction_terms(const int& isnp, const int& thread, struct phenodt* pheno_data, struct geno_block* gblock, variant_block* snp_data, HLM& nullHLM, struct param const* params, mstream& sout){\n\n  if(snp_data->skip_int) return; \n\n  data_thread* dt_thr = &(gblock->thread_data[thread]);\n  MatrixXd iMat;\n  SpMat Gdiag;\n\n  if(dt_thr->is_sparse)\n    Gdiag = gblock->Gmat.col(isnp).asDiagonal();\n\n  // if rare, use HLM\n  if((params->trait_mode==0) && !params->no_robust && !params->force_robust && (snp_data->mac < params->rareMAC_inter).any()){\n\n    // get (G, G*E)\n    allocate_mat(pheno_data->Hmat[thread], nullHLM.Vlin.rows(), params->interaction_istart + nullHLM.Vlin.cols());\n    if(dt_thr->is_sparse)\n      pheno_data->Hmat[thread].rightCols(nullHLM.Vlin.cols()) = Gdiag * nullHLM.Vlin;\n    else\n      pheno_data->Hmat[thread].rightCols(nullHLM.Vlin.cols()) = (nullHLM.Vlin.array().colwise() * gblock->Gmat.col(isnp).array()).matrix();\n\n    // add main effects for G_E if specified (not re-scaled)\n    if(!params->gwas_condtl) {\n      pheno_data->Hmat[thread].leftCols(pheno_data->interaction_cov.cols()) = pheno_data->interaction_cov;\n      if(params->add_homdev)\n        pheno_data->Hmat[thread].col(pheno_data->interaction_cov.cols()) = pheno_data->interaction_homdev;\n    }\n\n    snp_data->fitHLM = true;\n    return;\n\n  }\n\n  if(dt_thr->is_sparse) \n    iMat = Gdiag * pheno_data->interaction_cov;\n  else\n    iMat = (pheno_data->interaction_cov.array().colwise() * gblock->Gmat.col(isnp).array()).matrix();\n  //if(isnp==0) cerr << iMat.topRows(10) << endl; \n\n  // remove covariate effects\n  snp_data->skip_int = !residualize_matrix(iMat, pheno_data->scf_i[thread], pheno_data->new_cov.leftCols( params->ncov + (params->blup_cov && (params->trait_mode == 1) ? -1 : 0)), params->n_analyzed, params->numtol);\n  if(snp_data->skip_int) return;\n\n  // start filling matrix with C*G terms (if not condtl, also add residual of G_E)\n  pheno_data->Hmat[thread].resize(pheno_data->interaction_cov.rows(), params->ncov_interaction + params->interaction_istart + 1);\n  pheno_data->Hmat[thread].rightCols(iMat.cols()) = iMat;\n  if(!params->gwas_condtl) pheno_data->Hmat[thread].leftCols(pheno_data->interaction_cov_res.cols()) = pheno_data->interaction_cov_res;\n\n}\n\n\n/// Interaction tests\nvoid apply_interaction_tests(const int& index, const int& isnp, const int& thread, const Ref<const MatrixXd>& res, const Ref<const RowVectorXd>& sd_yres, string const& model_type, string const& test_string, struct phenodt* pheno_data, HLM& nullHLM, struct filter const* filters, struct in_files* files, struct geno_block* gblock, variant_block* snp_data, vector<snp> const& snpinfo, struct ests* m_ests, struct f_ests* fest, struct param const* params, mstream& sout){\n\n  if(snp_data->skip_int) return;\n\n  if(params->trait_mode==1)\n    apply_interaction_tests_bt(index, isnp, thread, model_type, test_string, pheno_data, filters, files, gblock, snp_data, snpinfo, m_ests, fest, params, sout);\n  else if((params->trait_mode==0) && snp_data->fitHLM)\n    apply_interaction_tests_HLM(index, isnp, thread, res, sd_yres, model_type, test_string, pheno_data, nullHLM, filters, files, gblock, snp_data, snpinfo, params, sout);\n  else if(params->trait_mode==0)\n    apply_interaction_tests_qt(index, isnp, thread, res, sd_yres, model_type, test_string, pheno_data, filters, files, gblock, snp_data, snpinfo, params, sout);\n\n}\n\nvoid apply_interaction_tests_qt(const int& index, const int& isnp, const int& thread, const Ref<const MatrixXd>& res, const Ref<const RowVectorXd>& sd_yres, string const& model_type, string const& test_string, struct phenodt* pheno_data, struct filter const* filters, struct in_files* files, struct geno_block* gblock, variant_block* snp_data, vector<snp> const& snpinfo, struct param const* params, mstream& sout){\n\n  int beg = params->interaction_istart;\n  string df_str = to_string(1+params->ncov_interaction);\n\n  // fill rest of matrix [ G, C*G ]\n  pheno_data->Hmat[thread].col(beg) = gblock->Gmat.col(isnp);\n  //cerr << pheno_data->Hmat[thread].topRows(3) << endl; exit(-1);\n\n  // pre-compute Z = (M^tM)^(-1) for all phenos \n  SelfAdjointEigenSolver<MatrixXd> esM(pheno_data->Hmat[thread].transpose() * pheno_data->Hmat[thread]);\n  if( esM.eigenvalues().minCoeff() < params->numtol ) return;\n  MatrixXd Z = esM.eigenvectors() * esM.eigenvalues().cwiseInverse().asDiagonal() * esM.eigenvectors().transpose();\n\n  // get leverage h = diag( M * Z * M^t )\n  VectorXd hvec = ((pheno_data->Hmat[thread] * Z).array() * pheno_data->Hmat[thread].array()).matrix().rowwise().sum();\n\n  // estimates for all traits\n  ArrayXd hc3, hc4;\n  MatrixXd tau = Z * pheno_data->Hmat[thread].transpose() * res;\n  MatrixXd e_sq = ((res - pheno_data->Hmat[thread] * tau).array().square() * pheno_data->masked_indivs.array().cast<double>()).matrix();\n  if(!params->no_robust){\n    hc3 = (1 - hvec.array()).square(); \n    if(params->force_hc4)\n      hc4 = (1 - hvec.array()).pow( (pheno_data->Hmat[thread].rows() * hvec.array() / pheno_data->Hmat[thread].cols()).min(4) );\n    //if(isnp==0) cerr << \"tau=\\n\" << tau << \"\\n\\n ei=\\n\" << e_sq.topRows(3) << endl;\n  }\n\n  chi_squared chisqI(params->ncov_interaction);\n  chi_squared chisqK(params->ncov_interaction+1);\n  double logp, gscale, tstat, sehat;\n  string head = \"\", stmp;\n  MatrixXd Vmat, Vinv;\n  ArrayXd iscale, cscale;\n\n  // for output\n  if(!params->htp_out) head = print_sum_stats_head(index, snpinfo);\n\n  for(int i = 0; i < params->n_pheno; ++i ) {\n    if( !params->pheno_pass(i) ) continue;\n\n    if( snp_data->ignored_trait(i) ) continue;\n\n    std::ostringstream buffer;\n\n    MapArXd bhat (tau.col(i).data(), tau.rows(), 1);\n    gscale = pheno_data->scale_Y(i) * sd_yres(i) / snp_data->scale_fac;\n    iscale = pheno_data->scale_Y(i) * sd_yres(i) / pheno_data->scf_i[thread];\n    if(!params->gwas_condtl) cscale = pheno_data->scale_Y(i) * sd_yres(i) / pheno_data->scl_inter_X;\n\n    // using sandwich estimator\n    if(params->no_robust) // model-based\n      Vmat = e_sq.col(i).sum() / (pheno_data->Neff(i) - params->ncov_analyzed - Z.cols()) * Z; // s^2*(XtX)^-1\n    else if(params->force_hc4 && (snp_data->mac(i) <= params->rareMAC_inter)) // HC4\n      Vmat = Z * pheno_data->Hmat[thread].transpose() * (e_sq.col(i).array() / hc4).matrix().asDiagonal() * pheno_data->Hmat[thread] * Z;\n    else // HC3\n      Vmat = Z * pheno_data->Hmat[thread].transpose() * (e_sq.col(i).array() / hc3).matrix().asDiagonal() * pheno_data->Hmat[thread] * Z;\n    //if(index==500) {cerr << \"\\nZ:\\n\" << Z << \"\\nV=\\n\" << Vmat ; exit(-1);}\n\n    // print cov(beta) (rescale)\n    if(params->print_vcov && !params->gwas_condtl){\n      Files fout;\n      fout.openForWrite(files->out_file + \"_\" + files->pheno_names[i] + \"_\" + filters->interaction_cov + \"_\" + snpinfo[index].ID + \".vcov\", sout);\n      MatrixXd scvec (pheno_data->Hmat[thread].cols(), 1);\n      scvec.col(0).array().head( cscale.size() ) = cscale;\n      scvec(beg, 0) = gscale;\n      scvec.col(0).array().tail( iscale.size() ) = iscale;\n      IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n      fout << (scvec.col(0).asDiagonal() * Vmat * scvec.col(0).asDiagonal()).format(Fmt); \n      fout.closeFile();\n    }\n\n    ///////////////////////\n    // print main effect of G_E\n    if(beg > 0){\n      for(int j = 0; j < beg; j++){ \n        tstat = bhat(j) * bhat(j) / Vmat(j,j);\n        sehat = sqrt(Vmat(j,j)) * cscale(j);\n        get_logp(logp, tstat);\n        if(params->interaction_cat)\n          stmp=\"-INT_\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n        else if(params->add_homdev && (j != 0))\n          stmp=\"-INT_\" + filters->interaction_cov + \"-HOM\"; // G_E>=1.5\n        else\n          stmp=\"-INT_\" + filters->interaction_cov; // single cov\n\n        // print sum_stats\n        if(params->htp_out) \n          buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(j) * cscale(j), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n        else \n          buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(j) * cscale(j), sehat, tstat, logp, true, 1, params, (i+1));\n      }\n    }\n\n    ///////////////////////\n    //////  marginal test\n    // T, beta, se & pv\n    tstat = bhat(beg) * bhat(beg) / Vmat(beg,beg);\n    sehat = sqrt(Vmat(beg,beg)) * gscale;\n    get_logp(logp, tstat);\n    stmp=\"-INT_SNP\";\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg) * gscale, sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n    else \n      buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg) * gscale, sehat, tstat, logp, true, 1, params, (i+1));\n\n\n    ///////////////////////\n    //////  interaction tests\n    if(params->ncov_interaction > 1){ \n\n      // print effects for each interaction term\n      for(int j = 0; j < params->ncov_interaction; j++){ \n        tstat = bhat(beg+1+j) * bhat(beg+1+j) / Vmat(beg+1+j,beg+1+j);\n        sehat = sqrt(Vmat(beg+1+j,beg+1+j)) * iscale(j);\n        get_logp(logp, tstat);\n        stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n        // print sum_stats\n        if(params->htp_out) \n          buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg+1+j) * iscale(j), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n        else \n          buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg+1+j) * iscale(j), sehat, tstat, logp, true, 1, params, (i+1));\n      }\n\n      // joint test for interaction terms\n      // T, beta, se & pv\n      Vinv = Vmat.block(beg+1,beg+1,params->ncov_interaction,params->ncov_interaction).inverse();\n      tstat = fabs( (bhat.tail(params->ncov_interaction).matrix().transpose() * Vinv * bhat.tail(params->ncov_interaction).matrix()).sum() );\n      logp = max(params->nl_dbl_dmin, cdf(complement(chisqI, tstat)));\n      logp = -log10( logp );\n      stmp=\"-INT_SNPx\" + filters->interaction_cov;\n      // print sum_stats\n      if(params->htp_out) \n        buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, params->ncov_interaction, params);\n      else \n        buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, -1, -1, tstat, logp, true, params->ncov_interaction, params, (i+1));\n\n    } else {\n      // T, beta, se & pv\n      tstat = bhat(beg+1) * bhat(beg+1) / Vmat(beg+1,beg+1);\n      sehat = sqrt(Vmat(beg+1,beg+1)) * iscale(0);\n      get_logp(logp, tstat);\n      if(params->interaction_cat)\n        stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[0];\n      else\n        stmp=\"-INT_SNPx\" + filters->interaction_cov; // single cov\n\n      // print sum_stats\n      if(params->htp_out) \n        buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg+1) * iscale(0), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n      else \n        buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg+1) * iscale(0), sehat, tstat, logp, true, 1, params, (i+1));\n    }\n\n    ///////////////////////\n    //////  joint test for G and C*G\n    Vinv = Vmat.block(beg,beg,params->ncov_interaction+1,params->ncov_interaction+1).inverse();\n\n    // T & pv\n    tstat = fabs( (bhat.tail(params->ncov_interaction+1).matrix().transpose() * Vinv * bhat.tail(params->ncov_interaction+1).matrix()).sum() );\n    logp = max(params->nl_dbl_dmin, cdf(complement(chisqK, tstat)));\n    logp = -log10( logp );\n    stmp=\"-INT_\" + df_str + \"DF\";\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1+params->ncov_interaction, params);\n    else \n      buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, -1, -1, tstat, logp, true, 1+params->ncov_interaction, params, (i+1));\n\n    //if(isnp==0 & i==0) cerr << endl << buffer.str() << endl;\n    snp_data->sum_stats[i].append( buffer.str() );\n\n  }\n\n}\n\n\nvoid apply_interaction_tests_HLM(const int& index, const int& isnp, const int& thread, const Ref<const MatrixXd>& res, const Ref<const RowVectorXd>& sd_yres, string const& model_type, string const& test_string, struct phenodt* pheno_data, HLM& nullHLM, struct filter const* filters, struct in_files* files, struct geno_block* gblock, variant_block* snp_data, vector<snp> const& snpinfo, struct param const* params, mstream& sout){\n\n  int beg = params->interaction_istart;\n  string df_str = to_string(1+params->ncov_interaction);\n\n  //cerr << pheno_data->Hmat[thread].topRows(20) << endl; exit(-1);\n\n  chi_squared chisqI(params->ncov_interaction);\n  chi_squared chisqK(params->ncov_interaction+1);\n  double logp, tstat, sehat;\n  string head = \"\", stmp;\n  ArrayXd bhat;\n  MatrixXd Xres, Vmat, Vinv;\n  allocate_mat(Xres, pheno_data->Hmat[thread].rows(), pheno_data->Hmat[thread].cols());\n\n  // for output\n  if(!params->htp_out) head = print_sum_stats_head(index, snpinfo);\n\n  for(int i = 0; i < params->n_pheno; ++i ) {\n    if( !params->pheno_pass(i) ) continue;\n\n    if( snp_data->ignored_trait(i) ) continue;\n\n    std::ostringstream buffer;\n\n    // get the residuals using null HLM model\n    nullHLM.residualize(i, pheno_data->Hmat[thread], Xres);\n\n    // OLS (V is different for each trait) - sigma^2=1\n    SelfAdjointEigenSolver<MatrixXd> esM(Xres.transpose() * Xres);\n    if( esM.eigenvalues().minCoeff() < params->numtol ) return;\n    Vmat = esM.eigenvectors() * esM.eigenvalues().cwiseInverse().asDiagonal() * esM.eigenvectors().transpose();\n    bhat = Vmat * (Xres.transpose() * nullHLM.yres.col(i));\n    //cerr << \"\\n\" << bhat << \"\\n\\n\" << Vmat.array().sqrt().matrix() << \"\\n\\n\"; exit(-1);\n\n    // print cov(beta) (rescale)\n    if(params->print_vcov && !params->gwas_condtl){\n      Files fout;\n      fout.openForWrite( files->out_file + \"_\" + files->pheno_names[i] + \"_\" + filters->interaction_cov + \"_\" + snpinfo[index].ID + \".vcov\", sout);\n      IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n      fout << Vmat.format(Fmt); \n      fout.closeFile();\n    }\n\n    ///////////////////////\n    // print main effect of G_E\n    if(beg > 0){\n      for(int j = 0; j < beg; j++){ \n        tstat = bhat(j) * bhat(j) / Vmat(j,j);\n        sehat = sqrt(Vmat(j,j));\n        get_logp(logp, tstat);\n        if(params->interaction_cat)\n          stmp=\"-INT_\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n        else if(params->add_homdev && (j != 0))\n          stmp=\"-INT_\" + filters->interaction_cov + \"-HOM\"; // G_E>=1.5\n        else\n          stmp=\"-INT_\" + filters->interaction_cov; // single cov\n\n        // print sum_stats\n        if(params->htp_out) \n          buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(j), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n        else \n          buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(j), sehat, tstat, logp, true, 1, params, (i+1));\n      }\n    }\n\n    ///////////////////////\n    //////  marginal test\n    // T, beta, se & pv\n    tstat = bhat(beg) * bhat(beg) / Vmat(beg,beg);\n    sehat = sqrt(Vmat(beg,beg));\n    get_logp(logp, tstat);\n    stmp=\"-INT_SNP\";\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n    else \n      buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg), sehat, tstat, logp, true, 1, params, (i+1));\n\n\n    ///////////////////////\n    //////  interaction tests\n    if(params->ncov_interaction > 1){ \n\n      // print effects for each interaction term\n      for(int j = 0; j < params->ncov_interaction; j++){ \n        tstat = bhat(beg+1+j) * bhat(beg+1+j) / Vmat(beg+1+j,beg+1+j);\n        sehat = sqrt(Vmat(beg+1+j,beg+1+j));\n        get_logp(logp, tstat);\n        stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n        // print sum_stats\n        if(params->htp_out) \n          buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg+1+j), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n        else \n          buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg+1+j), sehat, tstat, logp, true, 1, params, (i+1));\n      }\n\n      // joint test for interaction terms\n      // T, beta, se & pv\n      Vinv = Vmat.block(beg+1,beg+1,params->ncov_interaction,params->ncov_interaction).inverse();\n      tstat = fabs( (bhat.tail(params->ncov_interaction).matrix().transpose() * Vinv * bhat.tail(params->ncov_interaction).matrix()).sum() );\n      logp = max(params->nl_dbl_dmin, cdf(complement(chisqI, tstat)));\n      logp = -log10( logp );\n      stmp=\"-INT_SNPx\" + filters->interaction_cov;\n      // print sum_stats\n      if(params->htp_out) \n        buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, params->ncov_interaction, params);\n      else \n        buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, -1, -1, tstat, logp, true, params->ncov_interaction, params, (i+1));\n\n    } else {\n      // T, beta, se & pv\n      tstat = fabs( bhat(beg+1) * bhat(beg+1) / Vmat(beg+1,beg+1) );\n      sehat = sqrt(Vmat(beg+1,beg+1));\n      get_logp(logp, tstat);\n      if(params->interaction_cat)\n        stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[0];\n      else\n        stmp=\"-INT_SNPx\" + filters->interaction_cov; // single cov\n\n      // print sum_stats\n      if(params->htp_out) \n        buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg+1), sehat, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n      else \n        buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg+1), sehat, tstat, logp, true, 1, params, (i+1));\n    }\n\n    ///////////////////////\n    //////  joint test for G and C*G\n    Vinv = Vmat.block(beg,beg,params->ncov_interaction+1,params->ncov_interaction+1).inverse();\n\n    // T & pv\n    tstat = fabs( (bhat.tail(params->ncov_interaction+1).matrix().transpose() * Vinv * bhat.tail(params->ncov_interaction+1).matrix()).sum() );\n    logp = max(params->nl_dbl_dmin, cdf(complement(chisqK, tstat)));\n    logp = -log10( logp );\n    stmp=\"-INT_\" + df_str + \"DF\";\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1+params->ncov_interaction, params);\n    else \n      buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, -1, -1, tstat, logp, true, 1+params->ncov_interaction, params, (i+1));\n\n    //cerr << endl << buffer.str() << endl; exit(-1);\n    snp_data->sum_stats[i].append( buffer.str() );\n  }\n\n}\n\n\n\nvoid apply_interaction_tests_bt(const int& index, const int& isnp, const int& thread, string const& model_type, string const& test_string, struct phenodt* pheno_data, struct filter const* filters, struct in_files* files,struct geno_block* gblock, variant_block* snp_data, vector<snp> const& snpinfo, struct ests* m_ests, struct f_ests* firth_est, struct param const* params, mstream& sout){\n\n  int beg = params->interaction_istart;\n  int np = pheno_data->Hmat[thread].cols() - beg;\n  string df_str = to_string(np);\n\n  // fill rest of matrix [ G, C*G ]\n  // remove covariate effects (not done for marginal test)\n  // use projection from linear regression so only needs to be done once for all traits\n  // with step 1 preds as cov, ignore last column of X\n  residualize_geno(isnp, thread, snp_data, true, pheno_data->new_cov.leftCols( params->ncov + (params->blup_cov ? -1 : 0)), gblock, params);\n  pheno_data->Hmat[thread].col(beg) = gblock->Gmat.col(isnp);\n  //cerr << pheno_data->Hmat[thread].topRows(3) << endl;\n\n  chi_squared chisqI(np-1);\n  chi_squared chisqK(np);\n  bool use_robust;\n  double logp, tstat, sehat;\n  double lpfirth = -log10( params->alpha_pvalue ), lpbase = -log10(0.05);\n  string head = \"\", stmp;\n\n  // write Hmat\n  if(params->debug){\n    IOFormat Fmt(FullPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n    ofstream ofile;\n    openStream(&ofile, files->out_file + \"_H.txt\", ios::out, sout);\n    ofile << pheno_data->Hmat[thread].format(Fmt) << \"\\n\";\n    ofile.close();\n  }\n\n  // for output\n  if(!params->htp_out) head = print_sum_stats_head(index, snpinfo);\n\n  // for logistic regression\n  ArrayXd bhat, etavec, pivec, hvec;\n  MatrixXd WX, Vmat, tmpMat, V_robust;\n\n  for(int i = 0; i < params->n_pheno; ++i ){\n    if( !params->pheno_pass(i) ) continue;\n\n    if( snp_data->ignored_trait(i) ) continue;\n      \n    std::ostringstream buffer, buffer_int;\n\n    MapArXd Y (pheno_data->phenotypes_raw.col(i).data(), pheno_data->phenotypes_raw.rows());\n    MapArXb mask (pheno_data->masked_indivs.col(i).data(), pheno_data->masked_indivs.rows());\n    MapcArXd offset (m_ests->offset_nullreg.col(i).data(), m_ests->offset_nullreg.rows());\n\n    // starting values\n    bhat = ArrayXd::Zero(np+beg);\n    etavec = mask.select(offset, 0);\n    get_pvec(pivec, etavec, params->numtol_eps);\n\n    if(!(fit_logistic(Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat, params, sout, true) || fit_logistic(Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat, params, sout, false)))\n      continue; // no results for trait\n    /*else if( (mask && (pivec < params->numtol_eps || pivec > 1 - params->numtol_eps)).count() > 0 )\n      sout << \"\\n     WARNING: Fitted probabilities numerically 0/1 occurred (phenotype #\" << files->pheno_names[i] <<\").\";*/\n    //cerr << bhat << endl;\n\n    // get cov(beta)\n    WX = ( pheno_data->Hmat[thread].array().colwise() * mask.select(pivec * (1 - pivec), 0).sqrt() ).matrix();\n    SelfAdjointEigenSolver<MatrixXd> esM(WX.transpose() * WX);\n    if( esM.eigenvalues().minCoeff() < params->numtol ) continue;\n    Vmat = esM.eigenvectors() * esM.eigenvalues().cwiseInverse().asDiagonal() * esM.eigenvectors().transpose(); // model-based SE\n\n    // check pvalues of main & interaction effect (if sig, use robust SE unless very rare variant)\n    use_robust = params->force_robust;\n    if(!params->no_robust && (snp_data->mac(i) > params->rareMAC_inter))\n      for(int j = beg; j < (np+beg); j++){\n        tstat = bhat(j) * bhat(j) / Vmat(j,j);\n        get_logp(logp, tstat);\n        if(logp > lpbase) use_robust = true;\n      }\n\n    if(use_robust) { // robust se\n      hvec = ((WX * Vmat).array() * WX.array()).rowwise().sum();\n      V_robust = pheno_data->Hmat[thread].transpose() * mask.select((Y - pivec)/(1-hvec), 0).square().matrix().asDiagonal() * pheno_data->Hmat[thread];\n      tmpMat = Vmat * V_robust * Vmat;\n      if(params->debug) cerr << \"h:\\n\" << hvec.minCoeff() << \" - \" << hvec.maxCoeff() << \"\\n\\nb:\\n\" << bhat << \"\\n\\nV:\\n\" << tmpMat << \"\\n\\nXWXinv:\\n\" << Vmat << \"\\n\\nVh:\\n\"<< V_robust << \"\\n\\n\";\n      Vmat = tmpMat;\n    }\n    if( Vmat.diagonal().minCoeff() < 0 ) continue; // if robust SE computation fails\n    if(snp_data->flipped) bhat *= -1;\n\n    ///////////////////////\n    //////  interaction tests\n    bool use_firth = false;\n    if(params->ncov_interaction > 1){ \n\n      // print effects for each interaction term\n      for(int j = 0; j < params->ncov_interaction; j++){\n        // T, beta, se & pv\n        tstat = fabs( bhat(beg+1+j) * bhat(beg+1+j) / Vmat(beg+1+j,beg+1+j) );\n        sehat = sqrt(Vmat(beg+1+j,beg+1+j));\n        get_logp(logp, tstat);\n\n        // if firth, check pvalue <= thresh\n        use_firth = params->firth && (logp >= lpfirth);\n        if(use_firth) break;\n\n        stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n\n        // print sum_stats\n        if(params->htp_out) \n          buffer_int << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg+1+j)/pheno_data->scf_i[thread](j), sehat/pheno_data->scf_i[thread](j), tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n        else \n          buffer_int << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg+1+j)/pheno_data->scf_i[thread](j), sehat/pheno_data->scf_i[thread](j), tstat, logp, true, 1, params, (i+1));\n      }\n\n      // switch to firth\n      if(use_firth){\n        //cerr << i+1 << \"   \" << snpinfo[index].ID << endl;\n        snp_data->sum_stats[i].append( \n            apply_interaction_tests_firth(index, isnp, thread, i, model_type, test_string, pheno_data, filters, files, gblock, snp_data, snpinfo, m_ests, firth_est, params, sout)\n            );\n        continue; // go to next trait\n      }\n\n      /// joint test for interaction\n      // T & pv\n      tstat = fabs( (bhat.tail(params->ncov_interaction).matrix().transpose() * Vmat.block(beg+1,beg+1,params->ncov_interaction,params->ncov_interaction).inverse() * bhat.tail(params->ncov_interaction).matrix()).sum() );\n      logp = max(params->nl_dbl_dmin, cdf(complement(chisqI, tstat)));\n      logp = -log10( logp );\n      stmp=\"-INT_SNPx\" + filters->interaction_cov;\n\n      // print sum_stats\n      if(params->htp_out) \n        buffer_int << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, np-1, params);\n      else \n        buffer_int << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, -1, -1, tstat, logp, true, np-1, params, (i+1));\n\n    } else {\n\n      // T, beta, se & pv\n      tstat = fabs( bhat(beg+1) * bhat(beg+1) / Vmat(beg+1,beg+1) );\n      sehat = sqrt(Vmat(beg+1,beg+1));\n      get_logp(logp, tstat);\n\n      // if firth, check pvalue <= thresh\n      use_firth = params->firth && (logp >= lpfirth);\n      // switch to firth\n      if(use_firth){\n        snp_data->sum_stats[i].append( \n            apply_interaction_tests_firth(index, isnp, thread, i, model_type, test_string, pheno_data, filters, files, gblock, snp_data, snpinfo, m_ests, firth_est, params, sout)\n            );\n        continue; // go to next trait\n      }\n\n      if(params->interaction_cat)\n        stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[0];\n      else\n        stmp=\"-INT_SNPx\" + filters->interaction_cov; // single cov\n\n      // print sum_stats\n      if(params->htp_out) \n        buffer_int << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg+1)/pheno_data->scf_i[thread](0), sehat/pheno_data->scf_i[thread](0), tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n      else \n        buffer_int << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg+1)/pheno_data->scf_i[thread](0), sehat/pheno_data->scf_i[thread](0), tstat, logp, true, 1, params, (i+1));\n\n    }\n\n    // print cov(beta) (rescale)\n    if(params->print_vcov && !params->gwas_condtl){\n      Files fout;\n      fout.openForWrite( files->out_file + \"_\" + files->pheno_names[i] + \"_\" + filters->interaction_cov + \"_\" + snpinfo[index].ID + \".vcov\", sout);\n      MatrixXd scvec (pheno_data->Hmat[thread].cols(), 1);\n      scvec.col(0).array().head( pheno_data->scl_inter_X.size() ) = 1/pheno_data->scl_inter_X;\n      scvec(beg, 0) = 1/snp_data->scale_fac;\n      scvec.col(0).array().tail( pheno_data->scf_i[thread].size() ) = 1/pheno_data->scf_i[thread];\n      IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n      fout << (scvec.col(0).asDiagonal() * Vmat * scvec.col(0).asDiagonal()).format(Fmt); \n      fout.closeFile();\n    }\n\n    ///////////////////////\n    // print main effect of G_E\n    if(beg > 0){\n      for(int j = 0; j < beg; j++){ \n        tstat = bhat(j) * bhat(j) / Vmat(j,j);\n        sehat = sqrt(Vmat(j,j));\n        get_logp(logp, tstat);\n        if(params->interaction_cat)\n          stmp=\"-INT_\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n        else if(params->int_add_esq && (j != 0))\n          stmp=\"-INT_\" + filters->interaction_cov + \"^2\"; // G_E^2\n        else if(params->add_homdev && (j != 0))\n          stmp=\"-INT_\" + filters->interaction_cov + \"-HOM\"; // G_E>=1.5\n        else\n          stmp=\"-INT_\" + filters->interaction_cov; // single cov\n\n        // print sum_stats\n        if(params->htp_out) \n          buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(j)/pheno_data->scl_inter_X(j), sehat/pheno_data->scl_inter_X(j), tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n        else \n          buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(j)/pheno_data->scl_inter_X(j), sehat/pheno_data->scl_inter_X(j), tstat, logp, true, 1, params, (i+1));\n      }\n    }\n\n    ///////////////////////\n    //////  marginal test\n    // T, beta, se & pv\n    tstat = bhat(beg) * bhat(beg) / Vmat(beg,beg);\n    sehat = sqrt(Vmat(beg,beg));\n    get_logp(logp, tstat);\n    stmp=\"-INT_SNP\";\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(beg)/ snp_data->scale_fac, sehat/ snp_data->scale_fac, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, 1, params);\n     else \n      buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, bhat(beg)/ snp_data->scale_fac, sehat/ snp_data->scale_fac, tstat, logp, true, 1, params, (i+1));\n\n    // add interaction test results\n    buffer << buffer_int.str();\n\n    ///////////////////////\n    //////  joint test for G and C*G\n    // T & pv\n    if(beg!=0)\n      tstat = fabs( (bhat.tail(params->ncov_interaction+1).matrix().transpose() * Vmat.block(beg,beg,params->ncov_interaction+1,params->ncov_interaction+1).inverse() * bhat.tail(params->ncov_interaction+1).matrix()).sum() );\n    else\n      tstat = fabs( (bhat.matrix().transpose() * Vmat.inverse() * bhat.matrix()).sum() );\n    logp = max(params->nl_dbl_dmin, cdf(complement(chisqK, tstat)));\n    logp = -log10( logp );\n    stmp=\"-INT_\" + df_str + \"DF\";\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[i], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(i), snp_data->info(i), snp_data->mac(i), snp_data->genocounts, i, true, np, params);\n    else \n      buffer << (!params->split_by_pheno && (i>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(i) : snp_data->af1),snp_data->af_case(i),snp_data->af_control(i),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(i) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(i) : snp_data->ns1),snp_data->ns_case(i),snp_data->ns_control(i), test_string + stmp, -1, -1, tstat, logp, true, np, params, (i+1));\n\n    //if(isnp==0 & i==0) cerr << endl << buffer.str() << endl;\n    snp_data->sum_stats[i].append( buffer.str() );\n\n  }\n\n}\n\nstd::string apply_interaction_tests_firth(const int& index, const int& isnp, const int& thread, const int& ipheno, string const& model_type, string const& test_string, struct phenodt* pheno_data, struct filter const* filters, struct in_files const* files, struct geno_block* gblock, variant_block* snp_data, vector<snp> const& snpinfo, struct ests const* m_ests, struct f_ests const* firth_est, struct param const* params, mstream& sout){\n\n  int beg = params->interaction_istart;\n  int np = pheno_data->Hmat[thread].cols() - beg;\n  string df_str = to_string(np);\n  snp_data->is_corrected_inter(ipheno) = true;\n\n  chi_squared chisqI(np-1);\n  chi_squared chisqK(np);\n  double logp, tstat;\n  double bsign = snp_data->flipped ? -1 : 1;\n  string head = \"\", stmp;\n\n  // for output\n  if(!params->htp_out) head = print_sum_stats_head(index, snpinfo);\n\n  // for firth regression\n  double dev, dev_s, se_val;\n  ArrayXd bhat, bhat_s, se, se_s, etavec, pivec;\n  std::ostringstream buffer, buffer_joint;\n\n  MapArXd Y (pheno_data->phenotypes_raw.col(ipheno).data(), pheno_data->phenotypes_raw.rows());\n  MapArXb mask (pheno_data->masked_indivs.col(ipheno).data(), pheno_data->masked_indivs.rows());\n  MapcArXd offset (firth_est->cov_blup_offset.col(ipheno).data(), firth_est->cov_blup_offset.rows());\n\n  // fit full model with G & C*G\n  // starting values\n  bhat = ArrayXd::Zero(np+beg);\n  if(beg!=0) {// fit null model with only G_E\n    if(!fit_firth(ipheno, Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat, se, beg, dev_s, false, tstat, params->maxstep_null, params->niter_max_firth_null, params->numtol_firth, params))\n      return \"\";\n  }\n  if(!fit_firth(ipheno, Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat, se, np+beg, dev, true, tstat, params->maxstep, params->niter_max_firth, params->numtol_firth, params))\n    return \"\";\n  //if(isnp==0 & ipheno==0)cerr << bhat << \"\\n\\n\" << se << endl;exit(-1);\n\n  // print cov(beta) (rescale)\n  if(params->print_vcov && !params->gwas_condtl){\n    Files fout;\n    fout.openForWrite( files->out_file + \"_\" + files->pheno_names[ipheno] + \"_\" + filters->interaction_cov + \"_\" + snpinfo[index].ID + \".vcov\", sout);\n    ArrayXd wvec = mask.select( ( pivec * (1 - pivec) ).sqrt(), 0);\n    MatrixXd XtW = pheno_data->Hmat[thread].transpose() * wvec.matrix().asDiagonal();\n    ColPivHouseholderQR<MatrixXd> qr(XtW * XtW.transpose());\n    MatrixXd scvec (pheno_data->Hmat[thread].cols(), 1);\n    scvec.col(0).array().head( pheno_data->scl_inter_X.size() ) = 1/pheno_data->scl_inter_X;\n    scvec(beg, 0) = 1/snp_data->scale_fac;\n    scvec.col(0).array().tail( pheno_data->scf_i[thread].size() ) = 1/pheno_data->scf_i[thread];\n    IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n    fout << (scvec.col(0).asDiagonal() * qr.inverse() * scvec.col(0).asDiagonal()).format(Fmt); \n    fout.closeFile();\n  }\n\n  ///////////////////////\n  //////  GxG: G_E main effect\n  if(!params->gwas_condtl){\n    for(int j = 0; j < beg; j++){\n      if(params->interaction_cat)\n        stmp=\"-INT_\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n      else if(params->int_add_esq && (j != 0))\n        stmp=\"-INT_\" + filters->interaction_cov + \"^2\"; // G_E^2\n      else if(params->add_homdev && (j != 0))\n        stmp=\"-INT_\" + filters->interaction_cov + \"-HOM\"; // G_E>=1.5\n      else\n        stmp=\"-INT_\" + filters->interaction_cov; // single cov\n\n      // print sum_stats\n      if(params->htp_out) \n        buffer << print_sum_stats_head_htp(index, files->pheno_names[ipheno], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bhat(j)/pheno_data->scl_inter_X(j), se(j)/pheno_data->scl_inter_X(j), -1, -1, snp_data->af(ipheno), snp_data->info(ipheno), snp_data->mac(ipheno), snp_data->genocounts, ipheno, true, 1, params);\n      else \n        buffer << (!params->split_by_pheno && (ipheno>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(ipheno) : snp_data->af1),snp_data->af_case(ipheno),snp_data->af_control(ipheno),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(ipheno) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(ipheno) : snp_data->ns1),snp_data->ns_case(ipheno),snp_data->ns_control(ipheno), test_string + stmp, bhat(j)/pheno_data->scl_inter_X(j), se(j)/pheno_data->scl_inter_X(j), -1, -1, true, 1, params, (ipheno+1));\n    }\n  }\n\n  /////////////// joint test\n  // pv\n  if(beg!=0) tstat = dev_s - dev;\n  if(tstat < 0) return \"\";\n  logp = max(params->nl_dbl_dmin, cdf(complement(chisqK, tstat)));\n  logp = -log10( logp );\n  stmp=\"-INT_\" + df_str + \"DF\";\n\n  // print sum_stats\n  if(params->htp_out) \n    buffer_joint << print_sum_stats_head_htp(index, files->pheno_names[ipheno], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(ipheno), snp_data->info(ipheno), snp_data->mac(ipheno), snp_data->genocounts, ipheno, true, np, params);\n  else \n    buffer_joint << (!params->split_by_pheno && (ipheno>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(ipheno) : snp_data->af1),snp_data->af_case(ipheno),snp_data->af_control(ipheno),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(ipheno) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(ipheno) : snp_data->ns1),snp_data->ns_case(ipheno),snp_data->ns_control(ipheno), test_string + stmp, -1, -1, tstat, logp, true, np, params, (ipheno+1));\n\n  // get lrt values for each test\n  ///////////////////////\n  //////  marginal test\n  pheno_data->Hmat[thread].col(beg).swap( pheno_data->Hmat[thread].rightCols(1) ); // put G in last column\n  bhat_s = bhat;\n  bhat_s(beg) = bhat.tail(1)(0);\n  bhat_s.tail(1)(0) = 0;\n\n  if(!fit_firth(ipheno, Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat_s, se_s, beg+np-1, dev_s, false, tstat, params->maxstep, params->niter_max_firth, params->numtol_firth, params))\n    return \"\";\n  // T, beta, se & pv\n  tstat = dev_s - dev;\n  if(tstat < 0) return \"\";\n  get_logp(logp, tstat);\n  stmp=\"-INT_SNP\";\n  if( params->back_correct_se && (tstat > 0) )\n    se_val = fabs(bhat(beg)) / sqrt(tstat);\n  else\n    se_val = se(beg);\n\n  // print sum_stats\n  if(params->htp_out) \n    buffer << print_sum_stats_head_htp(index, files->pheno_names[ipheno], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bsign * bhat(beg)/snp_data->scale_fac, se_val/snp_data->scale_fac, tstat, logp, snp_data->af(ipheno), snp_data->info(ipheno), snp_data->mac(ipheno), snp_data->genocounts, ipheno, true, 1, params);\n  else \n    buffer << (!params->split_by_pheno && (ipheno>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(ipheno) : snp_data->af1),snp_data->af_case(ipheno),snp_data->af_control(ipheno),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(ipheno) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(ipheno) : snp_data->ns1),snp_data->ns_case(ipheno),snp_data->ns_control(ipheno), test_string + stmp, bsign * bhat(beg)/ snp_data->scale_fac, se_val/ snp_data->scale_fac, tstat, logp, true, 1, params, (ipheno+1));\n\n  ///////////////////////\n  //////  interaction tests\n  pheno_data->Hmat[thread].col(beg).swap( pheno_data->Hmat[thread].rightCols(1) ); // put back G in correct column\n  if(params->ncov_interaction > 1){\n\n    // print b/se for each interaction term (from full model)\n    for(int j = 0; j < params->ncov_interaction; j++){\n      stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[j];\n      // print sum_stats\n      if(params->htp_out) \n        buffer << print_sum_stats_head_htp(index, files->pheno_names[ipheno], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bsign * bhat(beg+1+j)/pheno_data->scf_i[thread](j), se(beg+1+j)/pheno_data->scf_i[thread](j), -1, -1, snp_data->af(ipheno), snp_data->info(ipheno), snp_data->mac(ipheno), snp_data->genocounts, ipheno, true, 1, params);\n      else \n        buffer << (!params->split_by_pheno && (ipheno>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(ipheno) : snp_data->af1),snp_data->af_case(ipheno),snp_data->af_control(ipheno),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(ipheno) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(ipheno) : snp_data->ns1),snp_data->ns_case(ipheno),snp_data->ns_control(ipheno), test_string + stmp, bsign * bhat(beg+1+j)/pheno_data->scf_i[thread](j), se(beg+1+j)/pheno_data->scf_i[thread](j), -1, -1, true, 1, params, (ipheno+1));\n    }\n\n    /// joint test for interaction\n    bhat_s = bhat;\n    bhat_s.tail(params->ncov_interaction) = 0;\n\n    // run firth\n    if(!fit_firth(ipheno, Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat_s, se_s, beg+1, dev_s, false, tstat, params->maxstep, params->niter_max_firth, params->numtol_firth, params))\n      return \"\";\n    // pv\n    tstat = dev_s - dev;\n    if(tstat < 0) return \"\";\n    logp = max(params->nl_dbl_dmin, cdf(complement(chisqI, tstat)));\n    logp = -log10( logp );\n    stmp=\"-INT_SNPx\" + filters->interaction_cov; // single cov\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[ipheno], model_type + stmp, snpinfo, params) << print_sum_stats_htp(-1, -1, tstat, logp, snp_data->af(ipheno), snp_data->info(ipheno), snp_data->mac(ipheno), snp_data->genocounts, ipheno, true, np-1, params);\n    else \n      buffer << (!params->split_by_pheno && (ipheno>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(ipheno) : snp_data->af1),snp_data->af_case(ipheno),snp_data->af_control(ipheno),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(ipheno) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(ipheno) : snp_data->ns1),snp_data->ns_case(ipheno),snp_data->ns_control(ipheno), test_string + stmp, -1, -1, tstat, logp, true, np-1, params, (ipheno+1));\n\n  } else { // single interaction term\n\n    bhat_s = bhat;\n    bhat_s.tail(1) = 0;\n\n    // run firth\n    if(!fit_firth(ipheno, Y, pheno_data->Hmat[thread], offset, mask, pivec, etavec, bhat_s, se_s, beg+np-1, dev_s, false, tstat, params->maxstep, params->niter_max_firth, params->numtol_firth, params))\n      return \"\";\n    // T, beta, se & pv\n    tstat = dev_s - dev;\n    if(tstat < 0) return \"\";\n    get_logp(logp, tstat);\n\n    if(params->interaction_cat)\n      stmp=\"-INT_SNPx\" + filters->interaction_cov + \"=\" + params->interaction_lvl_names[0];\n    else\n      stmp=\"-INT_SNPx\" + filters->interaction_cov; // single cov\n\n    if( params->back_correct_se && (tstat > 0) )\n      se_val = fabs(bhat(beg+1)) / sqrt(tstat);\n    else\n      se_val = se(beg+1);\n\n    // print sum_stats\n    if(params->htp_out) \n      buffer << print_sum_stats_head_htp(index, files->pheno_names[ipheno], model_type + stmp, snpinfo, params) << print_sum_stats_htp(bsign * bhat(beg+1)/pheno_data->scf_i[thread](0), se(beg+1)/pheno_data->scf_i[thread](0), tstat, logp, snp_data->af(ipheno), snp_data->info(ipheno), snp_data->mac(ipheno), snp_data->genocounts, ipheno, true, 1, params);\n    else \n      buffer << (!params->split_by_pheno && (ipheno>0) ? \"\" : head) << print_sum_stats((params->split_by_pheno ? snp_data->af(ipheno) : snp_data->af1),snp_data->af_case(ipheno),snp_data->af_control(ipheno),snp_data->n_rr,snp_data->n_aa, (params->split_by_pheno ? snp_data->info(ipheno) : snp_data->info1), (params->split_by_pheno ? snp_data->ns(ipheno) : snp_data->ns1),snp_data->ns_case(ipheno),snp_data->ns_control(ipheno), test_string + stmp, bsign * bhat(beg+1)/pheno_data->scf_i[thread](0), se(beg+1)/pheno_data->scf_i[thread](0), tstat, logp, true, 1, params, (ipheno+1));\n\n  }\n\n  //if(isnp==0 & ipheno==0) cerr << endl << buffer.str() << endl; exit(-1);\n\n  snp_data->test_fail_inter[ipheno] = false;\n  return buffer.str() + buffer_joint.str();\n}\n\n"
  },
  {
    "path": "src/Interaction.hpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n\n#ifndef INTERACTION_H\n#define INTERACTION_H\n\n// for interaction testing\nvoid get_interaction_terms(const int&,const int&,struct phenodt*,struct geno_block*,variant_block*,HLM&,struct param const*,mstream&);\nvoid apply_interaction_tests(const int&,const int&,const int&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::RowVectorXd>&,std::string const&,std::string const&,struct phenodt*,HLM&,struct filter const*,struct in_files*,struct geno_block*,variant_block*,std::vector<snp> const&,struct ests*,struct f_ests*,struct param const*,mstream&);\nvoid apply_interaction_tests_qt(const int&,const int&,const int&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::RowVectorXd>&,std::string const&,std::string const&,struct phenodt*,struct filter const*,struct in_files*,struct geno_block*,variant_block*,std::vector<snp> const&,struct param const*,mstream&);\nvoid apply_interaction_tests_HLM(const int&,const int&,const int&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::RowVectorXd>&,std::string const&,std::string const&,struct phenodt*,HLM&,struct filter const*,struct in_files*,struct geno_block*,variant_block*,std::vector<snp> const&,struct param const*,mstream&);\nvoid apply_interaction_tests_bt(const int&,const int&,const int&,std::string const&,std::string const&,struct phenodt*,struct filter const*,struct in_files*,struct geno_block*,variant_block*,std::vector<snp> const&,struct ests*,struct f_ests*,struct param const*,mstream&);\nstd::string apply_interaction_tests_firth(const int&,const int&,const int&,const int&,std::string const&,std::string const&,struct phenodt*,struct filter const*,struct in_files const*,struct geno_block*,variant_block*,std::vector<snp> const&,struct ests const*,struct f_ests const*,struct param const*,mstream&);\n\n\n#endif\n"
  },
  {
    "path": "src/Joint_Tests.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Pheno.hpp\"\n#include \"NNLS.hpp\"\n#include \"Joint_Tests.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing boost::math::fisher_f;\nusing boost::math::chi_squared;\nusing boost::math::cauchy;\n\nJTests::JTests() { // @suppress(\"Class members should be properly initialized\")\n}\n\nJTests::~JTests() {\n  // TODO Auto-generated destructor stub\n}\n\ntemplate <typename T>\nstd::vector<size_t> sort_indexes(const std::vector<T> &v){\n  // initialize original index locations\n  vector<size_t> idx(v.size());\n  iota(idx.begin(), idx.end(), 0);\n\n  // sort indexes based on comparing values in v\n  // using std::stable_sort instead of std::sort\n  // to avoid unnecessary index re-orderings\n  // when v contains elements of equal values\n  stable_sort(idx.begin(), idx.end(),\n       [&v](size_t i1, size_t i2) {return v[i1] < v[i2];});\n\n  return idx;\n}\n\n\nbool JTests::get_test_info(const struct param* params, string const& test_string, mstream& sout){\n\n  bool with_flip = true; // allow to flip to minor alleles\n  std::vector< string > tmp_str_vec ;\n  test_list = 0ULL;\n  // for tests requiring QR decomp\n  BIT_SET(qr_tests, joint_tests_map[\"ftest\"]);\n  BIT_SET(qr_tests, joint_tests_map[\"gates\"]);\n  BIT_SET(qr_tests, joint_tests_map[\"sbat\"]);\n\n  test_pfx = test_string + \"-\";\n  burden_str = test_pfx + \"BURDEN-\";\n  if(params->htp_out){\n    if(params->skip_blups) burden_model = burden_str;\n    else { test_pfx = test_string + \"-WGR-\"; burden_model = test_pfx + \"BURDEN-\"; }\n  }\n\n  // activate tests chosen by user\n  tmp_str_vec = string_split(params->burden,\",\");\n  for( auto const& input_test: tmp_str_vec ){\n\n    if( input_test == \"minp\" || input_test == \"gates\" )\n      BIT_SET(test_list, joint_tests_map[input_test]);\n    else if( input_test == \"ftest\" ){\n      if(params->trait_mode) sout << \"WARNING: Joint F-test only for QTs.\\n\";\n      else BIT_SET(test_list, joint_tests_map[input_test]);\n    } else if( input_test == \"sbat\" ){\n      if(params->trait_mode) sout << \"WARNING: Joint SBAT test only for QTs.\\n\";\n      else { \n        BIT_SET(test_list, joint_tests_map[input_test]); \n        nnls_napprox = params->nnls_napprox; \n        nnls_verbose_out = params->nnls_out_all;\n      }\n      with_flip = false;\n    } else if( input_test == \"acat\" ){\n      BIT_SET(test_list, joint_tests_map[input_test]);\n      valid_snp_mode = !params->build_mask || (params->build_mask && params->mask_rule_max) ;\n      acat_a1 = params->acat_a1;\n      acat_a2 = params->acat_a2;\n    } else throw \"unrecognized joint test (='\" + input_test + \"').\";\n\n  }\n\n  ncovars = params->ncov_analyzed;\n  rng_rd = params->rng_rd;\n  debug_mode = params->debug;\n  nnls_adaptive = params->nnls_adaptive && CHECK_BIT(test_list,joint_tests_map[\"sbat\"]);\n  nnls_mt_weights = params->nnls_mt_weights && CHECK_BIT(test_list,joint_tests_map[\"sbat\"]);\n  apply_single_p = params->apply_gene_pval_strategy;\n\n  if(apply_single_p) {\n    check_class_genep(params->genep_mask_sets_file, params->mask_map);\n    int nclass = genep_all_masks + gene_p_tests.size();\n    if(nclass == 0) throw \"No valid mask groups were specified for GENE_P strategy\";\n    else sout << \" * number of mask groups run through gene-p strategy = \" << nclass << \"\\n\";\n  }\n\n  if(nnls_verbose_out) { // header\n    string fname = out_file_prefix + \"_sbat.info\";\n    ofstream file_info(fname, std::ios_base::out);\n    file_info << \"MASK_GROUP ID SEL BETA_SBAT BETA_OLS\\n\";\n    file_info.close();\n  }\n\n  return with_flip;\n}\n\nvector<string> JTests::apply_joint_test(const int& chrom, const int& block, struct phenodt const* pheno_data, const Eigen::Ref<const Eigen::MatrixXd>& res, struct geno_block const* gblock, std::vector<variant_block>& block_info, struct in_files& files, struct param const* params){\n\n  int print_index, bs = setinfo[chrom - 1][block].snp_indices.size();\n  vector<string> out_str(params->n_pheno);\n  vector<vector<string>> sum_stats_str;\n  sum_stats_str.resize(joint_tests_map.size());\n  for (size_t i = 0; i < joint_tests_map.size(); i++)\n    sum_stats_str[i].resize(params->n_pheno); // store sum stats for each test/phenotype\n  std::map <std::string, std::map <std::string, bool>>::iterator itr;\n  if( nnls_mt_weights ) prep_nnls_weights(gblock->Gmat.cols());\n\n  for(int ph = 0; ph < params->n_pheno; ++ph) {\n\n    std::map<std::string, double> overall_p;\n    reset_vals();\n\n    MapcMatXd yres (res.col(ph).data(), res.rows(), 1);\n    string pheno_name = ( params->htp_use_eventname ? files.t2e_map[files.pheno_names[ph]] : files.pheno_names[ph] );\n\n    // keep track of this when not splitting sum stats file\n    bool run_tests = params->pheno_pass(ph) && (nvars > 0) && !set_vars(bs, ph, block_info);\n    bool print_stats = run_tests || !params->split_by_pheno;\n\n    if( CHECK_BIT(test_list,joint_tests_map[\"minp\"]) ) { // minP\n      if(run_tests) compute_minp();\n      if(print_stats) sum_stats_str[joint_tests_map[\"minp\"]][ph] = print_output(joint_tests_map[\"minp\"], ph+1, chrom, block, pheno_name, params);\n    } \n    if( CHECK_BIT(test_list,joint_tests_map[\"acat\"]) ) { // ACAT\n      if(run_tests && ((apply_single_p && genep_all_masks) || !apply_single_p)) {\n        compute_acat(bs, ph, block_info);\n        if(apply_single_p && genep_all_masks && (plog >= 0)) overall_p[\"BURDEN-ACAT\"] = plog;\n      }\n      if(print_stats && ((apply_single_p && genep_all_masks) || !apply_single_p))\n        sum_stats_str[joint_tests_map[\"acat\"]][ph] = print_output(joint_tests_map[\"acat\"], genep_all_sfx, ph+1, chrom, block, pheno_name, params);\n    } \n\n    // check other test\n    if( test_list & qr_tests ) {\n\n      if(run_tests) compute_qr_G(pheno_data->masked_indivs.col(ph), gblock);\n\n      if( CHECK_BIT(test_list,joint_tests_map[\"ftest\"]) ) { // F-test\n        if(run_tests) compute_ftest(pheno_data->masked_indivs.col(ph), yres); \n        if(print_stats) sum_stats_str[joint_tests_map[\"ftest\"]][ph] = print_output(joint_tests_map[\"ftest\"], ph+1, chrom, block, pheno_name, params);\n      } \n      if( CHECK_BIT(test_list,joint_tests_map[\"gates\"]) ) { // GATES\n        if(run_tests) compute_gates(ph, block_info);\n        if(print_stats) sum_stats_str[joint_tests_map[\"gates\"]][ph] = print_output(joint_tests_map[\"gates\"], ph+1, chrom, block, pheno_name, params);\n      } \n      if( CHECK_BIT(test_list,joint_tests_map[\"sbat\"]) ) { // SBAT (NNLS)\n        if(run_tests && ((apply_single_p && genep_all_masks) || !apply_single_p))\n          compute_nnls(pheno_data->masked_indivs.col(ph), yres, (genep_all_sfx == \"\" ? \"ALL\" : genep_all_sfx));\n        else reset_vals();\n\n        if( apply_single_p && genep_all_masks && valid_pval(pval_nnls_pos) && valid_pval(pval_nnls_neg)) overall_p[\"SBAT\"] = plog;\n        if( ((apply_single_p && genep_all_masks) || !apply_single_p) && print_stats) // default output\n          sum_stats_str[joint_tests_map[\"sbat\"]][ph] = print_output(joint_tests_map[\"sbat\"], genep_all_sfx, ph+1, chrom, block, pheno_name, params);\n        if((apply_single_p && genep_all_masks) || (!apply_single_p && nnls_verbose_out)) {\n          // verbose output with NNLS pos & neg split into two\n          // 1. NNLS pos\n          if(run_tests && valid_pval(pval_nnls_pos)) get_pv(pval_nnls_pos);\n          else reset_vals();\n          if(print_stats) sum_stats_str[joint_tests_map[\"sbat_pos\"]][ph] = print_output(joint_tests_map[\"sbat_pos\"], genep_all_sfx, ph+1, chrom, block, pheno_name, params);\n          // 2. NNLS neg\n          if(run_tests && valid_pval(pval_nnls_neg)) get_pv(pval_nnls_neg);\n          else reset_vals();\n          if(print_stats) sum_stats_str[joint_tests_map[\"sbat_neg\"]][ph] = print_output(joint_tests_map[\"sbat_neg\"], genep_all_sfx, ph+1, chrom, block, pheno_name, params);\n        }\n      }\n    }\n\n    // should at least have burden-acat p-value\n    if(apply_single_p) {\n      if(run_tests) run_single_p_acat(bs, chrom, block, ph, pheno_name, block_info, overall_p, gblock, yres, pheno_data->masked_indivs.col(ph), sum_stats_str, params);\n      else if(!params->split_by_pheno) { // when printing to single file and test failed\n        if( genep_all_masks )\n          sum_stats_str[joint_tests_map[\"gene_p\"]][ph] = print_gene_output(\"GENE_P\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), \"\", ph+1, chrom, block, pheno_name, params);\n        for (itr = gene_p_tests.begin(); itr !=  gene_p_tests.end(); ++itr) \n          sum_stats_str[joint_tests_map[\"gene_p\" + itr->first]][ph] = print_gene_output(\"GENE_P_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      }\n    }\n\n  }\n\n  // store sum stats (if single file, store at index 0)\n  for(size_t i = 0; i < joint_tests_map.size(); ++i)\n    for(int ph = 0; ph < params->n_pheno; ++ph){\n      print_index = params->split_by_pheno ? ph : 0;\n      out_str[print_index].append( sum_stats_str[i][ph] );\n    }\n\n  return out_str;\n}\n\n\n// determine if marginal test failed \nbool JTests::set_vars(const int& bs, const int& ph, std::vector<variant_block> const& block_info){\n\n  good_vars = ArrayXb::Constant(bs, false);\n  log10pv = ArrayXd::Zero(bs);\n\n  //if(debug_mode) cerr << \"checking burden masks in set...\";\n  for(int isnp = 0; isnp < bs; isnp++){\n    good_vars(isnp) = !block_info[isnp].ignored && !block_info[isnp].ignored_trait(ph) && !block_info[isnp].test_fail(ph);\n    if(!good_vars(isnp)) continue;\n    log10pv(isnp) = block_info[isnp].pval_log(ph);\n  }\n  nvars = good_vars.count();\n\n  return (nvars == 0);\n}\n\n\nvoid JTests::compute_minp(){\n\n  df_test = good_vars.count();\n  if( df_test == 0 ) {reset_vals(); return;}\n\n  // get minimum p-value (on log scale)\n  get_pv( pow(10, -(log10pv.maxCoeff())) );\n\n}\n\n\nvoid JTests::compute_acat(const int& bs, const int& ph, const vector<variant_block>& block_info){\n\n  double v_maf, tmpd;\n  boost::math::beta_distribution<>  dist(acat_a1, acat_a2);\n\n  df_test = good_vars.count();\n  //if(debug_mode) cerr << \"# burden masks for joint acat test = \" << df_test << \"\\n\";\n  if( df_test == 0 ) {reset_vals(); return;}\n\n  // make array of weights\n  ArrayXd wts = ArrayXd::Zero(bs);\n  for(int isnp = 0; isnp < bs; isnp++) {\n    if( !good_vars(isnp) ) continue;\n    // compute weights\n    if( valid_snp_mode && !apply_single_p) {// sqrt(w)=dbeta(maf,a1,a2)*sqrt(maf*(1-maf))\n      v_maf = min( block_info[isnp].af(ph), 1 - block_info[isnp].af(ph) );\n      //cerr << v_maf << endl;\n      tmpd = pdf( dist, v_maf );\n      wts(isnp) = v_maf * (1-v_maf) * tmpd * tmpd;\n    } else wts(isnp) = 1; // assume weight=1\n  }\n  //if(debug_mode) cerr << \"done building acat weights\\n\";\n  //if(debug_mode) cerr << log10pv.matrix().transpose() << \"\\n\\n\" << wts.matrix().transpose() << \"\\n\";\n\n  // get ACAT test stat\n  get_chisq(get_acat(log10pv, wts));\n\n}\n\ndouble get_acat_robust(const Eigen::Ref<const ArrayXd>& logpvals, const Eigen::Ref<const ArrayXd>& weights){ // robust to low pvalues\n\n  // if single pval, return log10p\n  int n_pv = ((weights!=0) && (logpvals >= 0)).count();\n  if(n_pv == 0) return -1;\n  else if(n_pv == 1) return (weights!=0).select(logpvals, 0).maxCoeff();\n\n  cauchy dc(0,1);\n  double lpv_thr = 15, lpval_out;\n\n  // split pvals by thr\n  int n_A = ((weights!=0) && (logpvals >= lpv_thr)).count(); // very small pvals\n  int n_B = ((weights!=0) && (logpvals >= 0) && (logpvals < lpv_thr)).count();\n  double wsum = (logpvals >= 0).select(weights, 0).sum();\n  double l_TA = 0, TB = 0;\n\n  // T_A\n  if(n_A > 0){ // compute on log scale to handle the very small pvalues\n    ArrayXi vind = get_true_indices((weights!=0) && (logpvals >= lpv_thr));\n    ArrayXd lp = logpvals( vind ), ws = weights( vind ) / wsum;\n    ArrayXd zvec = lp * log(10) + ws.log() - log(M_PI);\n    double zmax = zvec.maxCoeff();\n    l_TA = zmax + log( (zvec - zmax).exp().sum() );\n  }\n  // T_B (can be negative)\n  if(n_B > 0){\n    ArrayXi vind = get_true_indices((weights!=0) && (logpvals >= 0) && (logpvals < lpv_thr));\n    ArrayXd pv = pow(10, -logpvals(vind)).min(0.999); // avoid pvalues of 1\n    ArrayXd ws = weights( vind ) / wsum; \n    TB = ( ws * tan( M_PI * (0.5 - pv)) ).sum(); \n  }\n\n  // T_ACAT = TA + TB\n  if(n_A == 0){ // avoid computing log(TB) as TB can be negative\n    lpval_out = ( TB >= 8886111 ? -log(TB) - log(M_PI) : log(cdf(complement(dc, TB))) );\n  } else if ((n_B == 0) || (TB == 0)){\n    lpval_out = ( l_TA >= 16 ? -l_TA - log(M_PI) : log(cdf(complement(dc, exp(l_TA)))) );\n  } else {\n    double lsum; // get sum on log scale\n    if(TB < 0){\n      double l_abs_TB = log(fabs(TB));\n      if(l_abs_TB < l_TA)\n        lsum = l_TA + log1p(-exp(l_abs_TB - l_TA));\n      else { // compute log(-Tacat)\n        lsum = l_abs_TB + log1p(-exp(l_TA - l_abs_TB)); \n        lpval_out = ( lsum >= 16 ? log1p(-exp(-lsum-log(M_PI))) : log(cdf(complement(dc, -exp(lsum)))) );\n        return -lpval_out/log(10);\n      }\n    } else {\n      double l_TB = log(TB);\n      lsum = fmax(l_TA, l_TB) + log1p(exp(-fabs(l_TB - l_TA)));\n    } \n    lpval_out = ( lsum >= 16 ? -lsum - log(M_PI) : log(cdf(complement(dc, exp(lsum) ))) );\n  }\n\n  // return log10P\n  return -lpval_out/log(10);\n}\n\ndouble get_acat_robust(const Eigen::Ref<const ArrayXd>& logpvals){\n  ArrayXd wts = ArrayXd::Constant(logpvals.size(), 1); // uniform weights\n  return get_acat_robust(logpvals, wts);\n}\n\ndouble get_acat(const Eigen::Ref<const ArrayXd>& logpvals, const Eigen::Ref<const ArrayXd>& weights){\n  double logp = get_acat_robust(logpvals, weights);\n  return logp;\n}\n\ndouble get_acat(const Eigen::Ref<const ArrayXd>& logpvals){ // uniform weights\n  double logp = get_acat_robust(logpvals);\n  return logp;\n}\n\n/*\ndouble get_acat(const Eigen::Ref<const ArrayXd>& logpvals, const Eigen::Ref<const ArrayXd>& weights){\n\n  cauchy dc(0,1);\n  double tol = 10.0 * std::numeric_limits<double>::min(), pv_thr = 1e-15;\n\n  // if single pval, return pval\n  if(logpvals.size() == 1) {\n    if((logpvals(0) >= 0) && (weights(0) != 0)) return pow(10, -logpvals(0));\n    else return -1;\n  }\n\n  // use approx for small p-values (from ACAT R package)\n  ArrayXd pvals = ((weights!=0) && (logpvals >= 0)).select( pow(10, -logpvals) , 0.5).max(tol).min(0.999); // to prevent underflow/overflow\n  //cerr << \"log10pv=\" << logpvals.matrix().transpose() << \"\\npv=\" << pvals.matrix().transpose() << \"\\nw=\" << weights.matrix().transpose() << \"\\n\";\n  double acat = (pvals > pv_thr).select( weights * tan( M_PI * (0.5 - pvals)), (weights / pvals) / M_PI).sum();\n  double wsum = (logpvals >= 0).select(weights, 0).sum();\n  //cerr << std::setprecision(10) << \"acat num=\" << acat << \" denum=\" << wsum << endl;\n\n  return cdf(complement(dc, acat/wsum ));\n}\n\ndouble get_acat(const Eigen::Ref<const ArrayXd>& logpvals){ // uniform weights\n\n  cauchy dc(0,1);\n  double tol = 10.0 * std::numeric_limits<double>::min(), pv_thr = 1e-15;\n\n  // if single pval, return pval\n  if(logpvals.size() == 1) return pow(10, -logpvals(0));\n\n  // use approx for small p-values (from ACAT R package)\n  ArrayXd pvals = pow(10, -logpvals).max(tol).min(0.999); // to prevent underflow/overflow\n  double acat = (pvals > pv_thr).select( tan( M_PI * (0.5 - pvals)), (1.0 / pvals) / M_PI).sum();\n  double wsum = logpvals.size();\n  //cerr << std::setprecision(10) << \"acat num=\" << acat << \" denum=\" << wsum << endl;\n\n  return cdf(complement(dc, acat/wsum ));\n}\n*/\n\nvoid JTests::compute_qr_G(const Eigen::Ref<const MatrixXb>& mask, struct geno_block const* gblock){\n\n  ArrayXi colkeep;\n  MatrixXd Gnew;\n  indices_vars.resize(0);\n\n  // filter out bad variants\n  Gnew = MatrixXd::Zero( gblock->Gmat.rows(), good_vars.count() );\n  for(int i = 0, j = 0; i < gblock->Gmat.cols(); i++){\n    if(!good_vars(i)) continue;\n    Gnew.col(j++) = gblock->Gmat.col(i);\n    indices_vars.push_back(i);\n  }\n  Gnew.array().colwise() *= mask.col(0).array().cast<double>();\n\n  // find set of linearly independent cols\n  ColPivHouseholderQR<MatrixXd> qrA(Gnew);\n  qrA.setThreshold(qr_tol); \n  df_test = qrA.rank();\n\n  if(df_test == 0) return;\n  else if ( df_test < good_vars.count() ){\n    colKeep = qrA.colsPermutation().indices();\n    //ArrayXi tmp1(df_test);tmp1 << 0,1,3,4,5,6,7,9,10;colKeep = tmp1;\n    //cerr << qr_tol << \" -> \" << colKeep.matrix().transpose().array() << endl;\n    std::vector<int> new_indices;\n\n    // keep only linearly independent columns\n    Gtmp.resize(gblock->Gmat.rows(), df_test);\n\n    for(int i = 0; i < df_test; i++){\n      Gtmp.col(i) = Gnew.col( colKeep(i,0) );\n      new_indices.push_back( indices_vars[ colKeep(i,0) ] );\n    }\n\n    indices_vars = new_indices;\n\n  } else Gtmp = Gnew;\n\n  /*\n  // check min eigenvalue\n  MatrixXd gtg = Gtmp.transpose() * Gtmp;\n  SelfAdjointEigenSolver<MatrixXd> es(gtg, false);\n  cerr << es.eigenvalues().head(2) << endl << endl;\n  */\n\n  //cerr << Gtmp.block(0,0,5,5) << endl;\n}\n\n\nvoid JTests::compute_ftest(const Eigen::Ref<const MatrixXb>& mask, const Eigen::Ref<const Eigen::MatrixXd>& ymat){\n\n  if( df_test == 0 ) {\n    reset_vals();\n    return;\n  }\n\n  int ns = mask.col(0).array().count() - ncovars;\n  int df_ur = ns - df_test;\n\n  if( df_ur <= 0 ) {\n    reset_vals();\n    return;\n  }\n\n  double ss_r, ss_m, tmpd;\n  ArrayXd y_tmp;\n  MatrixXd bhat, GtG;\n  fisher_f dist(df_test, df_ur);\n\n  y_tmp = ymat.col(0).array() *  mask.col(0).array().cast<double>();\n  GtG = Gtmp.transpose() * Gtmp;\n  LLT<MatrixXd> lltOfA(GtG);\n  bhat = lltOfA.solve(Gtmp.transpose() * y_tmp.matrix()) ; // vector\n  //cerr << bhat << endl;\n\n  ArrayXd yhat = (Gtmp * bhat).array();\n\n  // SSM\n  ss_m = yhat.square().sum();\n  // SSR\n  ss_r = ns - ss_m; // y is standardized\n\n  // Ftest\n  zval = (ss_m / df_test) / (ss_r / df_ur);\n  //cerr << \"DF1=\" << df_test  << \";DF2=\" << df_ur << \";SS1=\" << ss_r << \";SS2=\" << ss_m << \";F=\" << zval << endl;\n\n  // pvalue\n  if(zval >= 0) {\n    tmpd = cdf(complement(dist, zval)); \n    get_pv( tmpd );\n  } else reset_vals();\n\n}\n\n\nvoid JTests::compute_nnls(const Eigen::Ref<const MatrixXb>& mask, const Eigen::Ref<const Eigen::MatrixXd>& ymat, string const& input_class){\n\n  pval_nnls_pos = -1; pval_nnls_neg = -1;\n  if( df_test == 0 ) {\n    reset_vals();\n    return;\n  }\n\n  int ns = mask.col(0).array().cast<int>().sum() - ncovars;\n  int df_ur = ns - df_test, adapt_napprox = 2;\n\n  if( df_ur <= 0 ) {\n    reset_vals();\n    return;\n  }\n  \n  double pval_min2, adapt_thr = 1e-3; \n  VectorXd y_tmp = ymat.col(0).array() * mask.col(0).array().cast<double>();\n  \n  // (depreciated) compute NNLS p-value by function\n  /* double pval_min2_old = jburden_test(y_tmp, Gtmp, df_ur, nnls_tol, nnls_napprox, true, 3); */\n\n  // initialize an object of NNLS class & pass parameters\n  NNLS nnls(nnls_napprox, nnls_normalize, nnls_tol, nnls_strict, nnls_verbose);\n  nnls.gen = rng_rd;\n\n  if(nnls_adaptive){ // run the NNLS test using adaptive strategy\n\n    nnls.napprox = adapt_napprox;\n\n    int p = Gtmp.cols();\n    MatrixXd XtX(MatrixXd(p, p).setZero().selfadjointView<Lower>().rankUpdate(Gtmp.adjoint()));\n    MatrixXd XtX_inv = XtX.llt().solve(MatrixXd::Identity(p, p));\n\n    nnls.pw_weights(XtX_inv);\n    if(nnls.nw > 0) nnls.pw_run(y_tmp, Gtmp, df_ur);\n\n    if((nnls.pval_min2 >= 0) && (nnls.pval_min2 < adapt_thr)) {\n      nnls.pw_weights(nnls_napprox);\n      // note: no need to refit the NNLS model; done above by nnls.pw_run(Y, X)\n      nnls.pw_calc_pvals();\n    }\n\n  } else if (nnls_mt_weights && nnls_weights[input_class][Gtmp.cols()-1].size() ) { // use pre-computed weights\n    nnls.wts = nnls_weights[input_class][Gtmp.cols()-1];\n    nnls.nw = nnls_weights[input_class][Gtmp.cols()-1].size();\n    nnls.pw_run(y_tmp, Gtmp, df_ur);\n  } else  // run the NNLS test: model fitting and inference\n    nnls.run(y_tmp, Gtmp, df_ur);\n\n  // get the final p-value = min(NNLS with b>=0, NNLS with b<=0)\n  // -1 value means that NNLS failed; check the error message\n  pval_min2 = nnls.pval_min2; \n  // get additional p-values & assign\n  // to be used downstream if NNLS pos/neg are reported separately\n  pval_nnls_pos = nnls.fit_pos.pval;\n  pval_nnls_neg = nnls.fit_neg.pval;\n\n  if(valid_pval(pval_min2)) {\n\n    // store wts\n    if (nnls_mt_weights && !nnls_weights[input_class][Gtmp.cols()-1].size() ) // store weights used\n      nnls_weights[input_class][Gtmp.cols()-1] = nnls.wts;\n\n    // pvalue\n    //if(apply_single_p) { // compute pval_min2 using ACAT\n    ArrayXd nnls_lpvs(2); \n    nnls_lpvs << -log10(max(nl_dbl_dmin, pval_nnls_pos)), -log10(max(nl_dbl_dmin, pval_nnls_neg)); // avoid 0 p-values (TBD: switch to log10p)\n    get_chisq(get_acat(nnls_lpvs)); \n    pval_min2 = pval;\n    //} else pval_min2 = min(1.0, 2 * pval_min2); // apply bonferroni correction\n\n    // print extra NNLS information if requested\n    if(nnls_verbose_out) {\n      string fname = out_file_prefix + \"_sbat.info\";\n      ofstream file_info(fname, std::ios_base::out | std::ios_base::app);\n\n      // v1: tabular format\n      // variant results per line\n      for(int i = 0; i < df_test; i++) {\n        unsigned int k = indices_vars[i]; \n        file_info << input_class << \" \"; // to accomodate for gene_p classes\n        file_info << variant_names[k] << \" \"; // variant ID\n        file_info << nnls.str_sel_i(i) << \" \"; // NNLS selection status\n        file_info << nnls.str_bhat_i(i) << \" \"; // NNLS beta\n        file_info << nnls.bhat_ols[i] << \"\\n\"; // OLS beta\n      }\n\n\n      // v2: non-tabular format\n      /* // write line with variant names */\n      /* for(unsigned int i = 0; i < df_test; i++) { */\n      /*   file_info << variant_names[ indices_vars[i] ] << \" \"; */\n      /* } */\n      /* file_info << endl; */\n\n      /* // write nnls info */ \n      /* file_info << nnls.str_info(); */\n\n      file_info.close();\n    } \n\n  } else reset_vals();\n\n}\n\nvoid JTests::compute_gates(const int& ph, const std::vector<variant_block>& block_info){\n\n  int gcol;\n  double p_gates, m_e, p_i, m_ei;\n\n  if( df_test == 0 ) {\n    reset_vals();\n    return;\n  } else if( df_test == 1 ){\n    p_gates = pow(10, -log10pv(indices_vars[0]));\n    if( p_gates >= 0) get_pv( p_gates );\n    else reset_vals();\n    return;\n  }\n\n  // Sort p-values\n  vector<double> pvals, sorted_pv;\n  MatrixXd tmpG = Gtmp;\n\n  pvals.resize(df_test);\n  for(int i = 0; i < df_test; i++) {\n    //cerr << log10pv(indices_vars[i]) ;\n    pvals[i] = pow(10, -log10pv(indices_vars[i]));\n    //cerr << \"\\t\" << pvals[i] << endl;\n  }\n\n  gcol = 0;\n  for (auto i: sort_indexes(pvals)) {\n    sorted_pv.push_back(pvals[i]);\n    tmpG.col(gcol++) = Gtmp.col(i);\n    //cout << i << \" \" << pvals[i] << endl;\n  }\n\n  // Compute corr(G)\n  MatrixXd GtG, corP;\n  GtG = tmpG.transpose() * tmpG / scale_denum;\n  //cerr << endl << scale_denum << endl << GtG.block(0,0,4,4) << endl << endl ;\n\n  corP.resize( GtG.rows(), GtG.cols() ); \n  //approximation used from Postgwas R package in gene2p.R (Milan Hiersche et al, 2013)\n  corP.array() = 0.7723 * GtG.array().pow(6) -\n    1.5659 * GtG.array().pow(5) +\n    1.201 * GtG.array().pow(4) -\n    0.2355 * GtG.array().pow(3) +\n    0.2184 * GtG.array().pow(2) +\n    0.6086 * GtG.array();\n  //cerr << corP.block(0,0,4,4) << endl;\n  //cerr<<endl<<sorted_pv[0] << \" \" << sorted_pv[1] << endl << endl;\n\n  // Me\n  m_e = get_me(corP);\n  //cerr << m_e << \"\\n\\n\";\n\n  // get Gates p-value\n  p_gates = 1;\n  for(int i = 0; i < df_test; i++) {\n    m_ei = get_me( corP.block(0,0,i+1,i+1) );\n    //if(i<2) cerr << m_ei << endl;\n    p_i = m_e * sorted_pv[i] / m_ei;\n    if(p_i < p_gates) p_gates = p_i;\n  }\n\n  // pvalue\n  if( p_gates >= 0) get_pv( p_gates );\n  else reset_vals();\n  \n}\n\ndouble JTests::get_me(const Ref<const MatrixXd>& ldmat){\n\n  int ncols = ldmat.cols();\n  double m_e; \n\n  if(ncols == 1) return 1;\n\n  // Get eigen values\n  SelfAdjointEigenSolver<MatrixXd> es(ldmat, Eigen::EigenvaluesOnly);\n  ArrayXd D = es.eigenvalues().array();\n  //cerr << D.head(5) << endl;\n  m_e = ncols - ( D > 1 ).select(D - 1, 0).sum();\n\n  return m_e;\n}\n\nvoid JTests::run_single_p_acat(int const& bs, const int& chrom, const int& block, int const& ph, const string& pheno_name, std::vector<variant_block>& block_info, std::map<std::string, double>& overall_p, struct geno_block const* gblock, const Eigen::Ref<const Eigen::MatrixXd>& yres, const Eigen::Ref<const MatrixXb>& mask, vector<vector<string>>& sum_stats_str, struct param const* params){\n\n  double max_logp = -1, pv; \n  string mname, max_logp_mask = \"\";\n  vector<string> keep_tests = { \"ACATV\", \"SKATO-ACAT\" };\n  ArrayXd pvals_gene;\n\n  // gene_p combining all masks\n  if( genep_all_masks ){\n    vector<double> acatv_acat, skato_acat;\n    // get SKATO/ACATV p-values as well as top mask\n    for(int imask = 0; imask < bs; imask++){\n      mname = block_info[imask].mask_name;\n      if( (log10pv(imask) > max_logp) && (log10pv(imask) > 0) ){ // check strongest signal also in burden-only test\n        max_logp_mask = mname;\n        max_logp = log10pv(imask);\n      }\n      if(block_info[imask].skip_for_vc) continue; \n      for(auto const& extract_test : keep_tests)\n        if(in_map(extract_test, block_info[imask].sum_stats_vc)){\n          pv = block_info[imask].sum_stats_vc[extract_test](ph,1); \n          if(pv>=0){\n            if(pv>max_logp){\n              max_logp_mask = mname;\n              max_logp = pv;\n            }\n            if(extract_test == \"ACATV\") acatv_acat.push_back( pv );\n            else if(extract_test == \"SKATO-ACAT\") skato_acat.push_back( pv );\n          }\n        }\n    }\n    // compute acat for acatv & skato\n    if(acatv_acat.size() > 0){\n      df_test = acatv_acat.size();\n      ArrayXd pvals_arr = MapArXd( acatv_acat.data(), df_test); \n      get_chisq(get_acat(pvals_arr));\n      if(plog>=0) overall_p[\"ACATV-ACAT\"] = plog;\n      sum_stats_str[joint_tests_map[\"acatv_acat\"]][ph] = print_gene_output(test_pfx + \"ACATV-ACAT\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), \"\", ph+1, chrom, block, pheno_name, params);\n    } else if(!params->split_by_pheno){\n      reset_vals();\n      sum_stats_str[joint_tests_map[\"acatv_acat\"]][ph] = print_gene_output(test_pfx + \"ACATV-ACAT\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), \"\", ph+1, chrom, block, pheno_name, params);\n    }\n    if(skato_acat.size() > 0){\n      df_test = skato_acat.size();\n      ArrayXd pvals_arr = MapArXd( skato_acat.data(), df_test); \n      get_chisq(get_acat(pvals_arr));\n      if(plog>=0) overall_p[\"SKATO-ACAT\"] = plog;\n      sum_stats_str[joint_tests_map[\"skato_acat\"]][ph] = print_gene_output(test_pfx + \"SKATO-ACAT\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), \"\", ph+1, chrom, block, pheno_name, params);\n    } else if(!params->split_by_pheno){\n      reset_vals();\n      sum_stats_str[joint_tests_map[\"skato_acat\"]][ph] = print_gene_output(test_pfx + \"SKATO-ACAT\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), \"\", ph+1, chrom, block, pheno_name, params);\n    }\n    // combine all p-values and pass through acat\n    if(overall_p.size()>0){\n      map_to_vec(df_test, overall_p, pvals_gene);\n      get_chisq(get_acat(pvals_gene));\n      sum_stats_str[joint_tests_map[\"gene_p\"]][ph] = print_gene_output(\"GENE_P\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), max_logp_mask, ph+1, chrom, block, pheno_name, params);\n    } else if(!params->split_by_pheno){\n      reset_vals();\n      sum_stats_str[joint_tests_map[\"gene_p\"]][ph] = print_gene_output(\"GENE_P\" + (genep_all_sfx == \"\" ? \"\" : \"_\" + genep_all_sfx), \"\", ph+1, chrom, block, pheno_name, params);\n    }\n  }\n\n  // go through each set of masks\n  std::map <std::string, std::map <std::string, bool>>::iterator itr;\n  for (itr = gene_p_tests.begin(); itr !=  gene_p_tests.end(); ++itr) {\n\n    std::map <std::string, double> overall_p_set;\n    good_vars = false;\n    max_logp = -1;\n    max_logp_mask = \"\";\n    bool get_top_mask = itr->second.size() > 1;\n    vector<double> acatv_acat, skato_acat;\n    if(params->debug) cerr << itr->first << \":\\n\";\n\n    // identify all the masks in the set\n    for(int imask = 0; imask < bs; imask++){\n      mname = block_info[imask].mask_name;\n      good_vars(imask) = in_map(mname, itr->second) && !block_info[imask].ignored && !block_info[imask].ignored_trait(ph) && !block_info[imask].test_fail(ph);\n      //if(params->debug) cerr << mname << \" \" << std::boolalpha << in_map(mname, itr->second) << \" && \" << (!block_info[imask].ignored && !block_info[imask].ignored_trait(ph) && !block_info[imask].test_fail(ph)) << \" -> \" << good_vars(imask) << \"\\n\";\n      if(!good_vars(imask)) continue;\n      //if(params->debug) cerr << mname << \":\" << log10pv(imask) << \"\\n\";\n      if( get_top_mask && (log10pv(imask) > max_logp) && (log10pv(imask) > 0) ){ // check strongest signal also in burden-only test\n        max_logp_mask = mname;\n        max_logp = log10pv(imask);\n      }\n      if(block_info[imask].skip_for_vc) continue; \n      for(auto const& extract_test : keep_tests)\n        if( in_map(extract_test, block_info[imask].sum_stats_vc) ){\n          pv = block_info[imask].sum_stats_vc[extract_test](ph,1); \n          if(pv>=0){\n            if(get_top_mask && (pv > max_logp)){\n              max_logp_mask = mname;\n              max_logp = pv;\n            }\n            if(extract_test == \"ACATV\") acatv_acat.push_back( pv );\n            else if(extract_test == \"SKATO-ACAT\") skato_acat.push_back( pv );\n          }\n        }\n    }\n\n    if(good_vars.any()){\n      if(params->debug) cerr << \"M=\" << good_vars.count() << \"\\n\";\n\n      // run acat\n      compute_acat(bs, ph, block_info);\n      if(plog >= 0) overall_p_set[\"BURDEN-ACAT\"] = plog;\n      sum_stats_str[joint_tests_map[\"acat\" + itr->first]][ph].append(print_output(joint_tests_map[\"acat\"], itr->first, ph+1, chrom, block, pheno_name, params));\n\n      // run nnls\n      if( CHECK_BIT(test_list, joint_tests_map[\"sbat\"]) ) {\n        compute_qr_G(mask, gblock);\n        compute_nnls(mask, yres, itr->first);\n        if(valid_pval(pval_nnls_pos) && valid_pval(pval_nnls_neg)) {\n          overall_p_set[\"SBAT\"] = plog;\n          sum_stats_str[joint_tests_map[\"sbat\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat\"], itr->first, ph+1, chrom, block, pheno_name, params);\n          get_pv(pval_nnls_pos);\n          sum_stats_str[joint_tests_map[\"sbat_pos\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat_pos\"], itr->first, ph+1, chrom, block, pheno_name, params);\n          get_pv(pval_nnls_neg);\n          sum_stats_str[joint_tests_map[\"sbat_neg\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat_neg\"], itr->first, ph+1, chrom, block, pheno_name, params);\n        } else if(!params->split_by_pheno) {\n          reset_vals();\n          sum_stats_str[joint_tests_map[\"sbat\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat\"], itr->first, ph+1, chrom, block, pheno_name, params);\n          sum_stats_str[joint_tests_map[\"sbat_pos\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat_pos\"], itr->first, ph+1, chrom, block, pheno_name, params);\n          sum_stats_str[joint_tests_map[\"sbat_neg\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat_neg\"], itr->first, ph+1, chrom, block, pheno_name, params);\n        }\n      }\n\n      // compute acat for acatv & skato\n      if(acatv_acat.size() > 0){\n        df_test = acatv_acat.size();\n        ArrayXd pvals_arr = MapArXd( acatv_acat.data(), df_test); \n        get_chisq(get_acat(pvals_arr));\n        if(plog>=0) overall_p_set[\"ACATV-ACAT\"] = plog;\n        sum_stats_str[joint_tests_map[\"acatv_acat\" + itr->first]][ph] = print_gene_output(test_pfx + \"ACATV-ACAT_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      } else if(!params->split_by_pheno){\n        reset_vals();\n        sum_stats_str[joint_tests_map[\"acatv_acat\" + itr->first]][ph] = print_gene_output(test_pfx + \"ACATV-ACAT_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      }\n      if(skato_acat.size() > 0){\n        df_test = skato_acat.size();\n        ArrayXd pvals_arr = MapArXd( skato_acat.data(), df_test); \n        get_chisq(get_acat(pvals_arr));\n        if(plog>=0) overall_p_set[\"SKATO-ACAT\"] = plog;\n        sum_stats_str[joint_tests_map[\"skato_acat\" + itr->first]][ph] = print_gene_output(test_pfx + \"SKATO-ACAT_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      } else if(!params->split_by_pheno){\n        reset_vals();\n        sum_stats_str[joint_tests_map[\"skato_acat\" + itr->first]][ph] = print_gene_output(test_pfx + \"SKATO-ACAT_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      }\n\n      // apply acat to all p\n      if(overall_p_set.size()>0){\n        map_to_vec(df_test, overall_p_set, pvals_gene);\n        get_chisq(get_acat(pvals_gene));\n        sum_stats_str[joint_tests_map[\"gene_p\" + itr->first]][ph] = print_gene_output(\"GENE_P_\" + itr->first, max_logp_mask, ph+1, chrom, block, pheno_name, params);\n      } else if(!params->split_by_pheno){\n        reset_vals();\n        sum_stats_str[joint_tests_map[\"gene_p\" + itr->first]][ph] = print_gene_output(\"GENE_P_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      }\n\n    } else if(!params->split_by_pheno){\n      reset_vals();\n      // print NA for all tests for that phenotype\n      sum_stats_str[joint_tests_map[\"acat\" + itr->first]][ph].append(print_output(joint_tests_map[\"acat\"], itr->first, ph+1, chrom, block, pheno_name, params));\n      if( CHECK_BIT(test_list, joint_tests_map[\"sbat\"]) ) {\n          sum_stats_str[joint_tests_map[\"sbat\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat\"], itr->first, ph+1, chrom, block, pheno_name, params);\n          sum_stats_str[joint_tests_map[\"sbat_pos\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat_pos\"], itr->first, ph+1, chrom, block, pheno_name, params);\n          sum_stats_str[joint_tests_map[\"sbat_neg\" + itr->first]][ph] = print_output(joint_tests_map[\"sbat_neg\"], itr->first, ph+1, chrom, block, pheno_name, params);\n      }\n      sum_stats_str[joint_tests_map[\"acatv_acat\" + itr->first]][ph] = print_gene_output(test_pfx + \"ACATV-ACAT_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      sum_stats_str[joint_tests_map[\"skato_acat\" + itr->first]][ph] = print_gene_output(test_pfx + \"SKATO-ACAT_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n      sum_stats_str[joint_tests_map[\"gene_p\" + itr->first]][ph] = print_gene_output(\"GENE_P_\" + itr->first, \"\", ph+1, chrom, block, pheno_name, params);\n    }\n  }\n\n}\n\nstring JTests::print_output(const int& ttype, const int& ipheno, const int& chrom, const int& block, const string& pheno_name, struct param const* params){\n\n  if(!params->htp_out) return print_sum_stats(test_names[ttype], ipheno, chrom, block, params);\n  else return print_sum_stats_htp(test_names[ttype], chrom, block, pheno_name, ipheno, params);\n\n}\n\nstring JTests::print_output(const int& ttype, string const& tsuf, const int& ipheno, const int& chrom, const int& block, const string& pheno_name, struct param const* params){\n\n  if(!params->htp_out) return print_sum_stats(test_names[ttype] + (tsuf == \"\" ? \"\" : \"_\" + tsuf), ipheno, chrom, block, params);\n  else return print_sum_stats_htp(test_names[ttype] + (tsuf == \"\" ? \"\" : \"_\" + tsuf), chrom, block, pheno_name, ipheno, params);\n\n}\n\n// normal regenie format\nstd::string JTests::print_sum_stats(const string& tname, const int& ipheno, const int& chrom, const int& block, struct param const* params){\n\n  std::ostringstream buffer;\n\n  // chr pos id a0 a1 af\n  if(params->split_by_pheno || ipheno == 1) {\n    buffer << setinfo[chrom - 1][block].chrom << \" \" << setinfo[chrom - 1][block].physpos << \" \" << setinfo[chrom - 1][block].ID << \" NA NA NA \" ;\n    if( params->af_cc ) buffer << \"NA NA \";\n    // info\n    if(!params->build_mask && params->dosage_mode) buffer << \"NA \";\n    // n\n    if(params->split_by_pheno) buffer << params->pheno_counts.row(ipheno-1).sum() << \" \";\n    else buffer << \"NA \";\n    if( params->af_cc ) {\n      if(params->split_by_pheno) buffer << params->pheno_counts(ipheno-1, 0) << \" \" << params->pheno_counts(ipheno-1, 1) << \" \";\n      else buffer << \"NA NA \";\n    }\n    if(!params->split_by_pheno) buffer << \"NA NA NA \"; // genotype counts\n    // test\n    buffer << burden_str << tname << \" \";\n  }\n\n  //beta se\n  buffer << \"NA NA \";\n\n  // chisq\n  if( zval != -9 ) buffer << zval << \" \";\n  else buffer << \"NA \";\n\n  // pval\n  if( (plog != -9) && !is_nan(plog) )  buffer << plog << \" \";\n  else buffer << \"NA \";\n\n  //df (print it out only if split by pheno)\n  if(params->split_by_pheno || (ipheno == params->n_pheno)) {\n    if(params->split_by_pheno && (plog != -9))  buffer << \"DF=\" << df_test << endl;\n    else buffer << \"DF=NA\\n\";\n  }\n\n  reset_vals();\n  return buffer.str();\n}\n\n\n// htpv4 format\nstd::string JTests::print_sum_stats_htp(const string& tname, const int& chrom, const int& block, const string& yname, const int& ipheno, struct param const* params){\n\n  std::ostringstream buffer;\n  bool test_pass = (pval != -9) && !is_nan(plog);\n  const string cohort = params->cohort_name;\n\n  string outp_val = \"-1\";\n  if(test_pass){\n    if(!params->uncapped_pvals && (plog > log10_nl_dbl_dmin)) outp_val = convert_logp_raw( log10_nl_dbl_dmin );\n    else if(plog > 0) outp_val = convert_logp_raw( plog );\n    else outp_val = \"0.9999999\";\n  }\n\n  // SNP info\n  buffer << setinfo[chrom - 1][block].ID << \"\\t\"<< setinfo[chrom - 1][block].chrom << \"\\t\" << setinfo[chrom - 1][block].physpos << \"\\tref\\tset\\t\";\n  // trait, cohort, test name\n  buffer << yname << \"\\t\" << params->cohort_name << \"\\t\" << burden_model << tname;\n\n  // bhat & 95 CI\n  buffer << \"\\tNA\\tNA\\tNA\\t\" ;\n  // Pvalue\n  if(test_pass) buffer << outp_val << \"\\t\";\n  else buffer << \"NA\\t\";\n\n  // print out AF, counts in cases, counts in controls\n  buffer << \"NA\\t\" << params->pheno_counts(ipheno-1, 0) << \"\\tNA\\tNA\\tNA\\t\";\n  if(params->trait_mode == 1) buffer << params->pheno_counts(ipheno-1, 1);\n  else buffer << \"NA\";\n  buffer << \"\\tNA\\tNA\\tNA\\t\";\n\n  // info column\n  if(test_pass) buffer << \"DF=\" << df_test;\n  else buffer << \"DF=0\";\n  // log10P\n  if(test_pass) buffer << \";LOG10P=\" << plog;\n  else buffer << \";LOG10P=NA\";\n\n  buffer << \";NO_BETA\\n\";\n\n  reset_vals();\n  return buffer.str();\n\n}\n\n// single gene p\nstring JTests::print_gene_output(const string& mname, const string& max_name, const int& ipheno, const int& chrom, const int& block, const string& pheno_name, struct param const* params){\n\n  if(!params->htp_out) return print_sum_stats_gene(mname, max_name, ipheno, chrom, block, params);\n  else return print_sum_stats_htp_gene(mname, max_name, chrom, block, pheno_name, ipheno, params);\n\n}\n\n// normal regenie format\nstd::string JTests::print_sum_stats_gene(const string& mname, const string& max_name, const int& ipheno, const int& chrom, const int& block, struct param const* params){\n\n  std::ostringstream buffer;\n\n  if(params->split_by_pheno || ipheno == 1) {\n    // chr pos id a0 a1 af\n    buffer << setinfo[chrom - 1][block].chrom << \" \" << setinfo[chrom - 1][block].physpos << \" \" << setinfo[chrom - 1][block].ID << \" NA NA NA \" ;\n    if( params->af_cc ) buffer << \"NA NA \";\n    // info\n    if(!params->build_mask && params->dosage_mode) buffer << \"NA \";\n    // n\n    if(params->split_by_pheno) buffer << params->pheno_counts.row(ipheno-1).sum() << \" \";\n    else buffer << \"NA \";\n    if( params->af_cc ) {\n      if(params->split_by_pheno) buffer << params->pheno_counts(ipheno-1, 0) << \" \" << params->pheno_counts(ipheno-1, 1) << \" \";\n      else buffer << \"NA NA \";\n    }\n    if(!params->split_by_pheno) buffer << \"NA NA NA \"; // genotype counts\n    // test\n    buffer << mname << \" \";\n  }\n\n  //beta se\n  buffer << \"NA NA \";\n\n  // chisq\n  if( zval != -9 ) buffer << zval << \" \";\n  else buffer << \"NA \";\n\n  // pval\n  if( (plog != -9) && !is_nan(plog) )  buffer << plog << \" \";\n  else buffer << \"NA \";\n\n  //df (print it out only if split by pheno)\n  if(params->split_by_pheno || (ipheno == params->n_pheno)) {\n    if(params->split_by_pheno && (plog != -9))  buffer << \"DF=\" << df_test;\n    else buffer << \"DF=NA\";\n\n    // top signal (only if split files)\n    if( params->split_by_pheno && (max_name != \"\") )\n      buffer << \";STRONGEST_MASK=\" << max_name;\n\n    buffer << endl;\n  }\n\n  reset_vals();\n  return buffer.str();\n}\n\n\n// htpv4 format\nstd::string JTests::print_sum_stats_htp_gene(const string& mname, const string& max_name, const int& chrom, const int& block, const string& yname, const int& ipheno, struct param const* params){\n\n  std::ostringstream buffer;\n  bool test_pass = (pval != -9) && !is_nan(plog);\n  const string cohort = params->cohort_name;\n\n  string outp_val = \"-1\";\n  if(test_pass){\n    if(!params->uncapped_pvals && (plog > log10_nl_dbl_dmin)) outp_val = convert_logp_raw( log10_nl_dbl_dmin );\n    else if(plog > 0) outp_val = convert_logp_raw( plog );\n    else outp_val = \"0.9999999\";\n  }\n\n  // SNP info\n  buffer << setinfo[chrom - 1][block].ID << \"\\t\"<< setinfo[chrom - 1][block].chrom << \"\\t\" << setinfo[chrom - 1][block].physpos << \"\\tref\\tset\\t\";\n  // trait, cohort, test name\n  buffer << yname << \"\\t\" << params->cohort_name << \"\\t\" << mname;\n\n  // bhat & 95 CI\n  buffer << \"\\tNA\\tNA\\tNA\\t\" ;\n  // Pvalue\n  if(test_pass) buffer << outp_val << \"\\t\";\n  else buffer << \"NA\\t\";\n\n  // print out AF, counts in cases, counts in controls\n  buffer << \"NA\\t\" << params->pheno_counts(ipheno-1, 0) << \"\\tNA\\tNA\\tNA\\t\";\n  if(params->trait_mode == 1) buffer << params->pheno_counts(ipheno-1, 1);\n  else buffer << \"NA\";\n  buffer << \"\\tNA\\tNA\\tNA\\t\";\n\n  // info column\n  if(test_pass) buffer << \"DF=\" << df_test;\n  else buffer << \"DF=0\";\n  // top signal\n  if(max_name != \"\") buffer << \";STRONGEST_MASK=\" << max_name;\n  // log10P\n  if(test_pass) buffer << \";LOG10P=\" << plog;\n  else buffer << \";LOG10P=NA\";\n  buffer << \";NO_BETA\\n\";\n\n  reset_vals();\n  return buffer.str();\n\n}\n\nvoid JTests::get_variant_names(int const& chrom, int const& block, vector<snp> const& snpinfo){\n\n  // only for NNLS (for now)\n  if( !(CHECK_BIT(test_list, joint_tests_map[\"sbat\"]) && nnls_verbose_out) ) return;\n\n  vector<uint64> *indices =  &(setinfo[chrom - 1][block].snp_indices);\n  int bs = indices->size();\n  variant_names.resize(bs);\n\n  for(int i = 0; i < bs; i++)\n    variant_names[i] = snpinfo[ indices->at(i) ].ID;\n\n}\n\nvoid JTests::check_class_genep(string const& mask_set_file, std::map<std::string, bool> const& mask_map){\n\n  if(mask_set_file == \"\"){\n    vector<string> tmp_vec = {\"M1\"};//plof only\n    add_class(\"M1\", tmp_vec, mask_map);\n    tmp_vec[0] = \"pLoF\";//plof only\n    add_class(\"pLoF\", tmp_vec, mask_map);\n    tmp_vec[0] = \"LoF\";//plof only\n    add_class(\"LoF\", tmp_vec, mask_map);\n\n  } else {\n\n    genep_all_masks = false;\n    string line;\n    std::vector< string > tmp_str_vec;\n    ifstream mfile;\n    mfile.open( mask_set_file, ios::in );\n\n    while( getline(mfile, line) ){\n      tmp_str_vec = string_split(line,\" \\t\");\n      if(tmp_str_vec.size() < 2)\n        throw \"invalid line = '\" + line + \"'\";\n      add_class(tmp_str_vec[0], string_split(tmp_str_vec[1],\",\"), mask_map);\n    }\n    mfile.close();\n    \n  }\n\n  // allocate entry in map for each test on set of masks\n  // make sure results are printed in the order of each test then list of mask groups\n  std::map <std::string, std::map <std::string, bool>>::iterator itr;\n  vector<string> tests = {\"acat\", \"sbat\", \"acatv_acat\", \"skato_acat\", \"gene_p\"};\n  for (auto const& test_name : tests)\n    for (itr = gene_p_tests.begin(); itr !=  gene_p_tests.end(); ++itr)\n      if( (test_name == \"sbat\") && CHECK_BIT(test_list, joint_tests_map[\"sbat\"]) ) {\n          joint_tests_map[test_name + itr->first] = joint_tests_map.size();\n          joint_tests_map[test_name + \"_pos\" + itr->first] = joint_tests_map.size();\n          joint_tests_map[test_name + \"_neg\" + itr->first] = joint_tests_map.size();\n      } else if (test_name != \"sbat\") joint_tests_map[test_name + itr->first] = joint_tests_map.size();\n\n}\n\nvoid JTests::add_class(string const& sfx_test, vector<string> const& mask_vec, std::map<std::string, bool> const& mask_map){\n  \n  std::map <std::string, bool> tmp_map;// keep track of masks in genep class\n\n  // check it is a valid mask\n  for(auto const& mask : mask_vec) \n    if(in_map(mask, mask_map))\n      tmp_map[mask] = true;\n\n  if(in_map(sfx_test, gene_p_tests))\n    throw \"GENE_P_'\" + sfx_test + \"' has already been defined (check for duplicates in the `--rgc-gene-def` file).\";\n\n  // check it has at least one mask\n  if( tmp_map.size() > 0 ){\n    if(tmp_map.size() == mask_map.size()) {\n      genep_all_masks = true;\n      genep_all_sfx = sfx_test;\n    } else gene_p_tests[sfx_test] = tmp_map;\n  }\n\n}\n\nvoid JTests::prep_nnls_weights(int const& max_cols){\n  \n  std::vector<VectorXd> tmp_v(max_cols);\n  std::map <std::string, std::map <std::string, bool>>::iterator itr;\n\n  // for each gene-p class, fill with empty weight vectors\n  for (itr = gene_p_tests.begin(); itr !=  gene_p_tests.end(); ++itr)  \n    nnls_weights[itr->first] = tmp_v;\n  nnls_weights[ (genep_all_sfx == \"\" ? \"ALL\" : genep_all_sfx) ] = tmp_v;\n\n}\n\nvoid map_to_vec(int& nvals, std::map <std::string, double>& map_pvals, ArrayXd& vec_pvals){\n  std::map <std::string, double>::iterator itr;\n  nvals = map_pvals.size();\n  vec_pvals.resize( nvals );\n  int i = 0;\n  for (itr = map_pvals.begin(); itr !=  map_pvals.end(); ++itr) \n    vec_pvals(i++) = itr->second;\n}\n\nvoid JTests::reset_vals(){\n  pval = -9, plog = -9, zval = -9;\n}\n\nvoid JTests::get_pv(const double& pv){\n\n  if((pv < 0) || is_nan(pv)) {reset_vals(); return;}\n  chi_squared chisq(1);\n\n  pval = max(nl_dbl_dmin, pv); // to prevent underflow\n  zval = quantile(complement(chisq, pval)); // chisq stat\n  plog = -log10(pval); // -log10p\n\n}\n\nvoid JTests::get_chisq(const double& lpv){\n\n  if((lpv < 0) || is_nan(lpv))  {reset_vals(); return;}\n\n  get_chisq_stat_pv(pval, zval, lpv, nl_dbl_dmin, log10_nl_dbl_dmin);\n  plog = lpv;\n\n}\n\n\nbool valid_pval(double const& pv){\n  return !is_nan(pv) && (pv >= 0) && (pv <= 1);\n}\n"
  },
  {
    "path": "src/Joint_Tests.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef JOINT_H\n#define JOINT_H\n\n\nclass JTests {\n\n  public:\n    std::map<std::string, uint16_t> joint_tests_map = { {\"minp\", 0}, {\"ftest\", 1}, {\"gates\", 2}, {\"acat\", 3}, {\"sbat\", 4}, {\"sbat_pos\", 5}, {\"sbat_neg\", 6}, {\"acatv_acat\", 7}, {\"skato_acat\", 8}, {\"gene_p\", 9} };\n    std::vector<std::string> test_names = {\"MINP\",\"F\",\"GATES\",\"ACAT\",\"SBAT\",\"SBAT_POS\",\"SBAT_NEG\", \"ACATV-ACAT\", \"SKATO-ACAT\", \"GENE_P\"};\n    std::map <std::string, std::map <std::string, bool>> gene_p_tests;\n    bool genep_all_masks = true;\n    uint16_t test_list, qr_tests = 0ULL;\n\n    // store variant set info (for each chr)\n    std::vector<std::vector<vset>> setinfo;\n\n    // for testing\n    int df_test, nvars, ncovars; // df, number of variants which passed filters,#covariates\n    bool nnls_verbose_out = false, nnls_adaptive = false, nnls_mt_weights = false;\n    bool nnls_normalize = true, nnls_strict = false;\n    int nnls_napprox, nnls_verbose = 0;\n    double acat_a1,acat_a2;\n    bool valid_snp_mode, debug_mode;\n    double pval, plog, zval, scale_denum = 0;\n    double pval_nnls_pos, pval_nnls_neg;\n    double tol = 1e-6, qr_tol = 1e-7, nnls_tol = 1e-10; // qr threshold used in R\n    double nl_dbl_dmin = 10.0 * std::numeric_limits<double>::min();\n    double log10_nl_dbl_dmin = -log10(nl_dbl_dmin);\n    std::mt19937_64* rng_rd;\n    std::string burden_type, test_pfx, burden_str, burden_model, genep_all_sfx = \"\";\n    std::string out_file_prefix; // prefix of output files\n    Eigen::ArrayXi colKeep; // keep track of linearly independent columns in Gmat\n    Eigen::MatrixXd Gtmp;\n    std::map<std::string, std::vector<Eigen::VectorXd>> nnls_weights;\n    ArrayXb good_vars;\n    Eigen::ArrayXd log10pv; \n    std::vector<int> indices_vars;\n    std::vector<std::string> variant_names;\n    bool apply_single_p = false;\n\n    // for prep.\n    bool get_test_info(const struct param*,const std::string&,mstream&);\n    bool set_vars(const int&,const int&,std::vector<variant_block> const&);\n    void compute_qr_G(const Eigen::Ref<const MatrixXb>&,struct geno_block const*);\n    void prep_nnls_weights(int const&);\n\n    // assoc. tests\n    std::vector<std::string> apply_joint_test(const int&,const int&,struct phenodt const*,const Eigen::Ref<const Eigen::MatrixXd>&,struct geno_block const*,std::vector<variant_block>&,struct in_files&,struct param const*);\n    void compute_minp();\n    void compute_acat(const int&,const int&,const std::vector<variant_block>&);\n    void compute_ftest(const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&);\n    void compute_nnls(const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,std::string const&);\n    void compute_gates(const int&,const std::vector<variant_block>&);\n    double get_me(const Eigen::Ref<const Eigen::MatrixXd>&);\n\n    // final acat round\n    void check_class_genep(std::string const&,std::map<std::string,bool> const&);\n    void add_class(std::string const&,std::vector<std::string> const&,std::map<std::string,bool> const&);\n    void run_single_p_acat(int const&,const int&,const int&,int const&,const std::string&,std::vector<variant_block>&,std::map<std::string, double>&,struct geno_block const*,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,std::vector<std::vector<std::string>>&,struct param const*);\n    std::string print_gene_output(const std::string&,const std::string&,const int&,const int&,const int&,const std::string&,struct param const*);\n    std::string print_sum_stats_gene(const std::string&,const std::string&,const int&,const int&,const int&,struct param const*);\n    std::string print_sum_stats_htp_gene(const std::string&,const std::string&,const int&,const int&,const std::string&,const int&,struct param const*);\n\n    // print results\n    std::string print_output(const int&,const int&,const int&,const int&,const std::string&,struct param const*);\n    std::string print_output(const int&,const std::string&,const int&,const int&,const int&,const std::string&,struct param const*);\n    std::string print_sum_stats(const std::string&,const int&,const int&,const int&,struct param const*);\n    std::string print_sum_stats_htp(const std::string&,const int&,const int&,const std::string&,const int&,struct param const*);\n\n    void get_variant_names(int const&,int const&,std::vector<snp> const&);\n    void reset_vals();\n    void get_pv(const double&);\n    void get_chisq(const double&);\n\n    JTests();\n    ~JTests();\n};\n\ndouble get_acat_robust(const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble get_acat_robust(const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble get_acat(const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble get_acat(const Eigen::Ref<const Eigen::ArrayXd>&); // uniform weights\nbool valid_pval(double const&);\nvoid map_to_vec(int&,std::map<std::string,double>&,Eigen::ArrayXd&);\n\n#endif\n"
  },
  {
    "path": "src/MCC.cpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n/* #include \"Files.hpp\" */\n/* #include \"Geno.hpp\" */\n/* #include \"Pheno.hpp\" */\n\n#include \"MCC.hpp\"\n/* #include \"pgamma/rbase_pgamma.hpp\" */\n#include <boost/math/distributions/gamma.hpp>\n\nusing namespace std;\nusing namespace Eigen;\n\n// R: pgamma(q, shape = shape, scale = scale, lower = FALSE)\ndouble boost_pgamma(double q, double shape, double scale, bool lower)\n{\n  double p;\n  boost::math::gamma_distribution<> dist(shape, scale);\n\n  if(q < 0) {\n    p = 0.99999; // 1.0 - 1e-5;\n  } else {\n    if(lower) {\n      p = boost::math::cdf(dist, q);\n    } else {\n      p = boost::math::cdf(boost::math::complement(dist, q));\n    }\n  }\n\n  return(p);\n}\n\n//---------------------------------\n// MCCResults Constructors\n//---------------------------------\n\nMCCResults::MCCResults(unsigned int N_, unsigned int K_, unsigned int q_, unsigned int M_, const VectorXd &n_)\n{\n  N = N_;\n  K = K_;\n  q = q_;\n  M = M_;\n\n  n = n_.array() - (double)(K);\n  n2 = n.array().square();\n\n  // allocate\n  sum_x.resize(q, M); sum_x_sq.resize(q, M);\n  sum_x2.resize(q, M); sum_x2_sq.resize(q, M);\n  sum_x3.resize(q, M); sum_x4.resize(q, M);\n\n  sum_nx.resize(q, M); sum_nx_sq.resize(q, M);\n  sum_nx2.resize(q, M); sum_nx2_sq.resize(q, M); sum_nx2_cub.resize(q, M);\n  sum_nx3.resize(q, M); sum_nx4.resize(q, M); sum_nx6.resize(q, M);\n\n  A.resize(q, M);\n  EA.resize(q, M); EA2.resize(q, M);\n  EA3.resize(q, M); EA4.resize(q, M);\n\n  variance.resize(q, M); skewness.resize(q, M); kurtosis.resize(q, M);\n\n  S1.resize(q, M); S2.resize(q, M); \n  S1g.resize(q, M); S2g.resize(q, M); L.resize(q, M); \n  SkipBeta.resize(q, M); SkipGamma.resize(q, M); Skip.resize(q, M);\n\n  R.resize(q, M); \n  PvalBeta.resize(q, M); PvalGamma.resize(q, M); Pval.resize(q, M);\n  \n  // DKAT\n  D.resize(q, M);\n  Dm1.resize(q, M); Dm2.resize(q, M); Dm3.resize(q, M);\n\n  PvalD.resize(q, M);\n  ShapeD.resize(q, M); ScaleD.resize(q, M); LocationD.resize(q, M);\n}\n\nMCCResults::~MCCResults() { }\n\n//---------------------\n// MCC Constructors\n//---------------------\n\nMCC::MCC() { }\n\n//---------------------\n// Set up\n//---------------------\n\nvoid MCC::setup_y(const MatrixXb &masked_indivs, const MatrixXd &res, unsigned int _n_covariates)\n{\n  // check dimensions\n  if(masked_indivs.rows() == 0) { throw std::runtime_error(\"setup_y: masked_indivs.rows() == 0\"); }\n  if(masked_indivs.cols() == 0) { throw std::runtime_error(\"setup_y: masked_indivs.cols() == 0\"); }\n  if(res.rows() == 0) { throw std::runtime_error(\"setup_y: res.rows() == 0\"); }\n  if(res.cols() == 0) { throw std::runtime_error(\"setup_y: res.cols() == 0\"); }\n  if(masked_indivs.rows() != res.rows()) { throw std::runtime_error(\"setup_y: masked_indivs.rows() != res.rows()\"); }\n  if(masked_indivs.cols() != res.cols()) { throw std::runtime_error(\"setup_y: masked_indivs.cols() != res.cols()\"); }\n\n  n_samples = res.rows();\n  n_traits = res.cols();\n  n_covariates = _n_covariates;\n\n  Mask = masked_indivs;\n  Neff = Mask.cast<double>().colwise().sum(); \n  \n  // center & set Y = 0 for missing values\n  Yres = res;\n  ArrayXd sums = Mask.select(Yres, 0.0).colwise().sum();\n  ArrayXd means = sums / Neff.array();\n  Yres.array().rowwise() -= means.transpose();\n  Yres.array() *= Mask.array().cast<double>().array(); \n\n  // normalized Y: centered & sum(y^2) = 1\n  // don't confuse with scaled Y, for which sum(y^2) = (N - 1)\n  Ynorm = Yres; // Ynorm is centered because of centered Yres\n  ArrayXd norms = Ynorm.colwise().norm();\n  Ynorm.array().rowwise() /= norms.transpose();\n  Ynorm.array() *= Mask.array().cast<double>().array(); \n\n  precomp_sumy();\n}\n\nvoid MCC::check_setup_data() \n{\n  if(n_traits == 0) { throw std::runtime_error(\"check_setup_data: n_traits == 0\"); }\n\n  if(Mask.cols() == 0) { throw std::runtime_error(\"check_setup_data: Mask.cols() == 0\"); }\n  if(Mask.cols() != n_traits) { throw std::runtime_error(\"check_setup_data: Mask.cols() != n_traits\"); }\n  if(Neff.size() == 0) { throw std::runtime_error(\"check_setup_data: Neff.size() == 0\"); }\n  if(Neff.size() != n_traits) { throw std::runtime_error(\"check_setup_data: Neff.size() != n_traits\"); }\n\n  if(Yres.cols() == 0) { throw std::runtime_error(\"check_setup_data: Yres.cols() == 0\"); }\n  if(Yres.cols() != n_traits) { throw std::runtime_error(\"check_setup_data: Yres.cols() != n_traits\"); }\n\n  if(sum_y.size() == 0) { throw std::runtime_error(\"check_setup_data: sum_y.size() == 0\"); }\n}\n\n//---------------------\n// Pre-compute\n//---------------------\n\nvoid MCC::precomp_sumy()\n{\n  sum_y = Yres.colwise().sum();\n  sum_y_sq = sum_y.array().square();\n\n  sum_y2 = Yres.array().square().colwise().sum();\n  sum_y2_sq = sum_y2.array().square();\n\n  sum_y3 = Yres.array().pow(3).colwise().sum();\n  sum_y4 = Yres.array().pow(4).colwise().sum();\n\n  // sums for Ynorm\n  sum_ny = Ynorm.colwise().sum();\n  sum_ny_sq = sum_ny.array().square();\n\n  sum_ny2 = Ynorm.array().square().colwise().sum();\n  sum_ny2_sq = sum_ny2.array().square();\n  sum_ny2_cub = sum_ny2.array().pow(3);\n\n  sum_ny3 = Ynorm.array().pow(3).colwise().sum();\n  sum_ny4 = Ynorm.array().pow(4).colwise().sum();\n  sum_ny6 = Ynorm.array().pow(6).colwise().sum();\n}\n\n//---------------------\n// Association (X, Y)`\n//---------------------\n\nMCCResults MCC::run(const Eigen::MatrixXd& G)\n{\n  check_setup_data();\n\n  MCCResults mcc_results(n_samples, n_covariates, n_traits, G.cols(), Neff);\n  mcc_results.statistics(Mask, G, Yres);\n  /* mcc_results.expectations(Mask, G, */\n  /*   sum_y, sum_y_sq, sum_y2, sum_y2_sq, sum_y3, sum_y4); */\n  /* mcc_results.moments(); */\n  /* mcc_results.distr(); */\n\n  mcc_results.dkat(Mask, G, Ynorm, \n    sum_ny, sum_ny_sq, sum_ny2, sum_ny2_sq, sum_ny2_cub, sum_ny3, sum_ny4, sum_ny6);\n\n  return(mcc_results);\n}\n\nvoid MCCResults::statistics(const MatrixXb &Mask, const Eigen::MatrixXd &G, const Eigen::MatrixXd &Y)\n{\n  unsigned int i, j;\n  for(i = 0; i < q; i++) {\n    for(j = 0; j < M; j++) {\n      A(i, j) = (Mask.col(i).select(G.col(j), 0.0).array() * Y.col(i).array()).sum();\n    }\n  }\n}\n\nvoid MCCResults::expectations(const MatrixXb &Mask, const Eigen::MatrixXd & G,\n    const VectorXd &sum_y, const VectorXd &sum_y_sq, \n    const VectorXd &sum_y2, const VectorXd &sum_y2_sq,\n    const VectorXd &sum_y3, const VectorXd &sum_y4)\n{\n  unsigned int i, j;\n  \n  VectorXd size1(q), size2(q), size3(q), size4(q), size5(q);\n  MatrixXd term1x(q, M), term2x(q, M), term3x(q, M), term4x(q, M), term5x(q, M);\n  VectorXd term1y(q), term2y(q), term3y(q), term4y(q), term5y(q);\n  \n  //------- Part 0: sums of y, y^2, ...\n  //-----------------------------------\n  // That is precomputed beforehand \n  // and passed by arguments\n\n  //------- Part 1: sums of x, x^2, ...\n  //-----------------------------------\n  \n  // sum_x[i, j] = (G[, j] * Mask[, i]).colSums()\n  for(i = 0; i < q; i++) {\n    for(j = 0; j < M; j++) {\n      sum_x(i, j) = Mask.col(i).select(G.col(j), 0.0).array().sum();\n    }\n  }\n  // sum_x_sq = (sum_x)^2\n  sum_x_sq = sum_x.array().square();\n\n  // sum_x2[i, j] = (G[, j] * Mask[, i]).square().colSums()\n  for(i = 0; i < q; i++) {\n    for(j = 0; j < M; j++) {\n      sum_x2(i, j) = Mask.col(i).select(G.col(j), 0.0).array().square().sum();\n    }\n  }\n  // sum_x2_sq = (sum_x2)^2\n  sum_x2_sq = sum_x2.array().square();\n \n  // sum_x3[i, j] = (G[, j] * Mask[, i]).pow(3).colSums()\n  for(i = 0; i < q; i++) {\n    for(j = 0; j < M; j++) {\n      sum_x3(i, j) = Mask.col(i).select(G.col(j), 0.0).array().pow(3).sum();\n    }\n  }\n\n  // sum_x4[i, j] = (G[, j] * Mask[, i]).pow(4).colSums()\n  for(i = 0; i < q; i++) {\n    for(j = 0; j < M; j++) {\n      sum_x4(i, j) = Mask.col(i).select(G.col(j), 0.0).array().pow(4).sum();\n    }\n  }\n\n  //------- Part 2: expectations of sum(x*y), sum(x*y)^2, ...\n  //---------------------------------------------------------\n  // Expectation 1: E_perm(A) \n  // q x M matrix EA: EA[, j] = (sum_x[, j] * sum_y) / n\n  for(j = 0; j < M; j++) {\n    EA.col(j) = (sum_x.col(j).array() * sum_y.array()) / n.array();\n  }\n\n  // Expectation 2: E_perm(A2) \n  // q x M matrix EA2: EA[, j] = (sum_x2[, i] * sum_y2) / n +\n  //    (sum_x_sq[, i] - sum_x2[, i]) * (sum_y_sq - sum_y2) / (n2 - n)\n  for(j = 0; j < M; j++) {\n    EA2.col(j) = (sum_x2.col(j).array() * sum_y2.array()) / n.array() +\n      (sum_x_sq.col(j).array() - sum_x2.col(j).array())  * (sum_y_sq.array() - sum_y2.array()) / (n2.array() - n.array());\n  }\n\n  // Expectation 3: E_perm(A3) \n  size1 = n;\n  size2 = 3 * n.array() * (n.array() - 1);\n  size3 = n2.array()*n.array() - size2.array() - size1.array();\n  // fill qxM matrices for terms for X\n  term1x = sum_x3;\n  term2x = 3*(sum_x2.array() * sum_x.array() - sum_x3.array());\n  term3x = sum_x_sq.array() * sum_x.array() - term2x.array() - term1x.array();\n  // fill qx1 vectors for terms for Y\n  term1y = sum_y3;\n  term2y = 3*(sum_y2.array() * sum_y.array() - sum_y3.array());\n  term3y = sum_y_sq.array()*sum_y.array() - term2y.array() - term1y.array();\n  // NB: expect (all(n > 2))\n  for(j = 0; j < M; j++) {\n    EA3.col(j) = (term1x.col(j).array() * term1y.array()) / size1.array() + (term2x.col(j).array() * term2y.array()) / size2.array() +\n      // if n > 2\n      (term3x.col(j).array() * term3y.array()) / size3.array();\n  }\n\n  // Expectation 4: E_perm(A^4)\n  size1 = n;\n  size2 = 4*n.array()*(n.array() - 1);\n  size3 = 3*n.array()*(n.array() - 1);\n  size4 = 6*n.array()*(n.array() - 1)*(n.array() - 2);\n  size5 = n2.array()*n2.array() - 6*n2.array()*n.array() + 11*n2.array() - 6*n.array();\n  // fill qxM matrices for terms for X\n  term1x = sum_x4;\n  term2x = 4*(sum_x3.array() * sum_x.array() - term1x.array());\n  term3x = 3*(sum_x2_sq.array() - term1x.array());\n  term4x = 6*sum_x2.array()*sum_x_sq.array() - 12*sum_x3.array()*sum_x.array() - 6*sum_x2_sq.array() + 12*sum_x4.array();\n  term5x = sum_x_sq.array()*sum_x_sq.array() - term4x.array() - term3x.array() - term2x.array() - term1x.array();\n  // fill qx1 vectors for terms for Y\n  term1y = sum_y4.array();\n  term2y = 4*(sum_y3.array() * sum_y.array() - term1y.array());\n  term3y = 3*(sum_y2_sq.array() - term1y.array());\n  term4y = 6*sum_y2.array()*sum_y_sq.array() - 12*sum_y3.array()*sum_y.array() - 6*sum_y2_sq.array() + 12*sum_y4.array();\n  term5y = sum_y_sq.array()*sum_y_sq.array() - term4y.array() - term3y.array() - term2y.array() - term1y.array();\n  // NB: expect all(n > 3)\n  for(j = 0; j < M; j++) {\n    EA4.col(j) = \n      // if n > 2\n      (term1x.col(j).array() * term1y.array()) / size1.array() +\n      (term2x.col(j).array() * term2y.array()) / size2.array() +\n      (term3x.col(j).array() * term3y.array()) / size3.array() +\n      // if n >= 3\n      (term4x.col(j).array() * term4y.array()) / size4.array() +\n      // if n > 3\n      (term5x.col(j).array() * term5y.array()) / size5.array();\n  }\n  \n  //------- Part 3: observed test statistic\n  //---------------------------------------------------------\n  for(j = 1; j < M; j++) {\n  }\n}\n\nvoid MCCResults::moments()\n{\n  variance = EA2.array() - EA.array().square(); // variance\n  // non-centered A: s = (EA3 - 3*EA*V - 3*EA^3) / V^(3/2) \n  skewness = EA3.array() / variance.array().pow(1.5); // skewness\n  kurtosis = EA4.array() / variance.array().square() - 3; // kurtosis\n}\n\nvoid MCCResults::distr()\n{\n  unsigned int i, j;\n  \n  for(i = 0; i < q; i++) {\n    for(j = 0; j < M; j++) {\n      double V = variance(i, j);\n      double k = kurtosis(i, j), s = skewness(i, j);\n      double k2 = k*k, s2 = s*s;\n      double s3 = s2*s, s4 = s2*s2;\n\n      double r = A(i, j) / sqrt(variance(i, j) * (n(i) - 1.0));\n      R(i, j) = r;\n      \n      // --- 1. Fit Beta(alpha, beta) distribution\n      double alpha_d1 = -k2*s2 + 32*k2 - 84*k*s2 + 96*k + 36*s4 - 180*s2;\n      double alpha_d2 = 2*k - 3*s2;\n      bool skip = (alpha_d2 >= 0) | (alpha_d2 == 0);\n      \n      double alpha_t1 = skip ? 0.0 : sqrt(-1.0/alpha_d1);\n      double alpha_t2 = (36*s - 18*s3 + 3*k2*s - 3*k*s3 + 24*k*s) * alpha_t1;\n      double alpha_t3 = 3*k - 3*s2 + 6;\n      double alpha_t4 = -6*s2 + 6*k + 12;\n\n      double alpha1 = (alpha_t3 + alpha_t2 - alpha_t4)/alpha_d2;\n      double alpha2 = (alpha_t3 - alpha_t2 - alpha_t4)/alpha_d2;\n      double beta1 = -(alpha_t3 + alpha_t2)/alpha_d2;\n      double beta2 = -(alpha_t3 - alpha_t2)/alpha_d2;\n\n      bool switch_alpha = (alpha1 <= 0) | (beta1 <= 0);\n      double alpha = switch_alpha ? alpha2 : alpha1;\n      double beta = switch_alpha ? beta2 : beta1;\n\n      skip = skip | ((alpha < 0) & (beta < 0));\n      alpha = skip ? 0.0 : alpha;\n      beta = skip ? 0.0 : beta;\n\n      S1(i, j) = alpha;\n      S2(i, j) = beta;\n\n      // --- 2. Fit Beta Gamma(alpha, beta, location) distribution\n      bool skip_gamma = (abs(s) < nl_dbl_dmin);\n\n      double m2 = V;\n      double m3 = abs(s);\n      double alpha_gamma = 4.0 / (m3*m3);\n      double beta_gamma = sqrt(alpha_gamma / m2);\n      double location_gamma = - alpha_gamma * beta_gamma;\n\n      /* double alpha_gamma = skip_gamma ? 0.0 : (4 / s2); */\n      /* double beta_gamma = skip_gamma ? 0.0 : sqrt(alpha_gamma / V); */\n      /* double location_gamma = skip_gamma ? 0.0 : (-alpha_gamma * beta_gamma); */\n  \n      S1g(i, j) = alpha_gamma;\n      S2g(i, j) = beta_gamma;\n      L(i, j) = location_gamma;\n\n      // ---- 3. P-value calculation for Beta\n      double rprime;\n      double pval_right, pval_left, pval_double;\n\n      bool skip_test = false;\n      if(!skip) {\n        double alpha_beta = alpha + beta;\n        double mean_beta = alpha / alpha_beta;\n        double var_beta = (alpha * beta) / (alpha_beta*alpha_beta * (alpha_beta + 1.0));\n\n        double c0 = mean_beta;\n        double c1 = sqrt(var_beta * (n(i) - 1.0));\n        rprime = c0 + c1 * r;\n\n        skip_test = (rprime < 0) | (rprime > 1);\n        if(!skip_test) {\n          boost::math::beta_distribution<> dist(alpha, beta);\n          double pval_right = boost::math::cdf(boost::math::complement(dist, rprime));\n          double pval_left = boost::math::cdf(dist, rprime);\n          double pval_double = 2*min(pval_right, pval_left);\n          if(pval_double > 1.0) {\n            pval_double = 1.0;\n          }\n          PvalBeta(i, j) = pval_double;\n        }\n      }\n      SkipBeta(i, j) = skip | skip_test;\n\n      // ---- 4. P-value calculation for Gamma\n      bool skip_test_gamma = false;\n      if(!skip_gamma) {\n        // flip the sign of test statistic r?\n        bool flip_sign = (s < 0);\n        double mult_flip = flip_sign ? -1.0 : 1.0;\n        rprime = alpha_gamma * beta_gamma + beta_gamma * beta_gamma * sqrt(V * (n(i) - 1.0)) * (mult_flip * r);\n\n        skip_test_gamma = (rprime < 0);\n        if(!skip_test_gamma) {\n          /* cout << \" s2 = \" << s2 << \" 1/s2 = \" << 1/s2 << \" alpha_gamma = \" << alpha_gamma << \" \" << beta_gamma << \" \" << 1.0 / beta_gamma << endl; */\n          /* cout << \" rprime = \" << rprime << endl; */\n          // CDF from Boost library\n          /* boost::math::gamma_distribution<> dist_gama(alpha_gamma, 1.0 / beta_gamma); */\n          /* pval_right = boost::math::cdf(boost::math::complement(dist_gama, rprime)); */\n          /* pval_left = boost::math::cdf(dist_gama, rprime); */\n          // CDF from R \n          /* pval_right = rbase_pgamma(rprime, alpha_gamma, beta_gamma , 0, 0); // 0 = lower tail, 0 = log_p */\n          /* pval_left = rbase_pgamma(rprime, alpha_gamma, beta_gamma , 1, 0); // 1 = lower tail, 0 = log_p */\n          // CDF from Boost\n          pval_right = boost_pgamma(rprime, alpha_gamma, 1.0 / beta_gamma , false); // upper tail\n          pval_left = boost_pgamma(rprime, alpha_gamma, 1.0 / beta_gamma , true); // lower tail\n          /* cout << \"rprime|alpha|beta|pval_right \" << rprime << \"|\" << alpha_gamma << \"|\" << beta_gamma << \"|\" << pval_right << endl; */\n          pval_double = 2*min(pval_right, pval_left);\n          if(pval_double > 1.0) {\n            pval_double = 1.0;\n          }\n          PvalGamma(i, j) = pval_double;\n        }\n      }\n      SkipGamma(i, j) = skip_gamma | skip_test_gamma;\n\n      // --- Combine Beta and Gamma results\n      // Combine v1: Gamma preferred over Beta\n      if(SkipBeta(i, j) & SkipGamma(i, j)) {\n        // case 1: both Gamma and Beta fail\n        Skip(i, j) = true;\n      } else if(!SkipGamma(i, j)) {\n        // case 2: Gamma ok\n        Skip(i, j) = false;\n        Pval(i, j) = PvalGamma(i, j);\n      } else if(!SkipBeta(i, j)) {\n        // case 3: Gamma fails, Beta ok\n        Skip(i, j) = false;\n        Pval(i, j) = PvalBeta(i, j);\n      }\n      \n      // Combine v2: Gamma\n      /* Skip(i, j) = SkipGamma(i, j); */\n      /* Pval(i, j) = PvalGamma(i, j); */\n      \n      // Combine v3: Beta\n      /* Skip(i, j) = SkipBeta(i, j); */\n      /* Pval(i, j) = PvalBeta(i, j); */\n    } // end of loop for (i, j) = (trait, variant) pair\n  }\n}\n\n//-----------------\n// DKAT\n//-----------------\n\nvoid MCCResults::dkat(const MatrixXb &Mask, const Eigen::MatrixXd & G,\n    const Eigen::MatrixXd & Ynorm, \n    const VectorXd &sum_ny, const VectorXd &sum_ny_sq, \n    const VectorXd &sum_ny2, const VectorXd &sum_ny2_sq, const VectorXd &sum_ny2_cub,\n    const VectorXd &sum_ny3, const VectorXd &sum_ny4, const VectorXd &sum_ny6)\n{\n  unsigned int i, j;\n  double ni;\n  MatrixXd X(N, M);\n  ArrayXd means_X(M), norms_X(M);\n  double pval_right;\n  \n  for(i = 0; i < q; i++) {\n    ni = n(i); // Neff for trait i\n    // create a copy of genotype matrix & use mask for trait i\n    X = G;\n\n    // normalize genotype j: center + normalize\n    for(j = 0; j < M; j++) {\n      means_X(j) = Mask.col(i).select(X.col(j), 0.0).sum() / ni;\n    }\n    X.array().rowwise() -= means_X.transpose();\n    for(j = 0; j < M; j++) {\n      norms_X(j) = Mask.col(i).select(X.col(j), 0.0).norm();\n    }\n    X.array().rowwise() /= norms_X.transpose();\n    for(j = 0; j < M; j++) {\n      X.col(j) = Mask.col(i).select(X.col(j), 0.0);\n    }\n\n    // compute sums\n    sum_nx.row(i) = X.colwise().sum();\n    sum_nx_sq.row(i) = sum_nx.row(i).array().square();\n    sum_nx2.row(i) = X.array().square().colwise().sum();\n    sum_nx2_sq.row(i) = sum_nx2.row(i).array().square();\n    sum_nx2_cub.row(i) = sum_nx2.row(i).array().pow(3);\n\n    sum_nx3.row(i) = X.array().pow(3).colwise().sum();\n    sum_nx4.row(i) = X.array().pow(4).colwise().sum();\n    sum_nx6.row(i) = X.array().pow(6).colwise().sum();\n\n    // test statistic D = R^2 (squared Pearson corr.)\n    D.row(i) = (X.transpose() * Ynorm.col(i)).array().square();\n\n    // Moment 1 of D: mean\n    Dm1.row(i) = sum_nx2.row(i) * sum_ny2(i) / ni;\n\n    // Momemnt 2 of D: variance\n    double T = sum_ny2(i), T2 = sum_ny2_sq(i), S2 = sum_ny4(i);\n    ArrayXd Ts = sum_nx2.row(i), T2s = sum_nx2_sq.row(i), S2s = sum_nx4.row(i);\n\n    double T_sq = T*T;\n    double T_cub = T_sq*T;\n    ArrayXd Ts_sq = Ts.square();\n    ArrayXd Ts_cub = Ts_sq * Ts;\n\n    double ni_1 = ni - 1.0, ni_2 = ni - 2.0, ni_3 = ni - 3.0; \n    /* double ni1 = ni + 1.0, ni2 = ni + 2.0, ni3 = ni + 3.0, ni4 = ni + 4.0; */\n    double ni1 = ni + 1.0, ni4 = ni + 4.0;\n    double ni_sq = ni*ni;\n    double ni_cub = ni_sq * ni, ni_quad = ni_sq * ni_sq;\n    \n    ArrayXd temp1 = 2.0 * (ni_1*T2 - T_sq)*(ni_1*T2s - Ts_sq) / (ni_1*ni_1*ni1*ni_2);\n    double temp21 = ni*ni1*S2 - ni_1*(T_sq + 2*T2);\n    ArrayXd temp22 = ni*ni1*S2s - ni_1*(Ts_sq + 2*T2s);\n    double temp23 = ni1*ni*ni_1*ni_2*ni_3;\n    ArrayXd temp2 = temp21 * temp22 / temp23;\n    Dm2.row(i) = temp1 + temp2;\n    \n    // Momemnt 3 of D: skewness\n    double T3 = sum_ny2_cub(i), S3 = sum_ny6(i);\n    ArrayXd T3s = sum_nx2_cub.row(i), S3s = sum_nx6.row(i);\n    double U = sum_ny3(i) * sum_ny3(i);\n    ArrayXd Us = sum_nx3.row(i).array().square();\n    double R = sum_ny2(i) * sum_ny4(i);\n    ArrayXd Rs = sum_nx2.row(i).array() * sum_nx4.row(i).array();\n    double B = U;\n    ArrayXd Bs = Us;\n\n    ArrayXd t1 = ni_sq * ni1 * (ni_sq + 15*ni - 4) * S3 * S3s;\n    ArrayXd t2 = 4 * (ni_quad - 8*ni_cub + 19*ni_sq - 4*ni - 16) * U * Us;\n    ArrayXd t3 = 24 * (ni_sq - ni - 4) * (U * Bs + B * Us);\n    ArrayXd t4 = 6 * (ni_quad - 8*ni_cub + 21*ni_sq - 6*ni - 24) * B * Bs;\n\n    ArrayXd t5 = 12 * (ni_quad - ni_cub - 8*ni_sq + 36*ni - 48) * R * Rs;\n    ArrayXd t6 = 12 * (ni_cub - 2*ni_sq + 9*ni - 12) * (T*S2*Rs + R*Ts*S2s); \n    ArrayXd t7 = 3 * (ni_quad - 4*ni_cub - 2*ni_sq + 9*ni - 12) * T*Ts*S2*S2s;\n\n    ArrayXd t81 = (ni_cub - 3*ni_sq - 2*ni + 8) * (R*Us + U*Rs);\n    ArrayXd t82 = (ni_cub - 2*ni_sq - 3*ni + 12) * (R*Bs + B*Rs);\n    ArrayXd t8 = 24 * (t81 + t82);\n    ArrayXd t9 = 12 * (ni_sq - ni + 4) * (T*S2*Us + U*Ts*S2s);\n    ArrayXd t10 = 6 * (2*ni_cub - 7*ni_sq - 3*ni + 12) * (T*S2*Bs + B*Ts*S2s);\n\n    ArrayXd t11 = -2*ni*ni_1*(ni_sq - ni + 4) * ((2*U + 3*B)*S3s + (2*Us + 3*Bs)*S3);\n    ArrayXd t12 = -3*ni*ni_1*ni_1*ni4 * ((T*S2 + 4*R)*S3s + (Ts*S2s + 4*Rs)*S3);\n    ArrayXd t13 = 2*ni*ni_1*ni_2 *((T_cub + 6*T*T2 + 8*T3)*S3s + (Ts_cub + 6*Ts*T2s + 8*T3s)*S3);\n    ArrayXd t14 = T_cub * ((ni_cub - 9*ni_sq + 23*ni - 14)*Ts_cub + 6*(ni - 4)*Ts*T2s + 8*T3s);\n    ArrayXd t15 = 6*T*T2*((ni - 4)*Ts_cub + (ni_cub - 9*ni_sq + 24*ni - 14)*Ts*T2s + 4*ni_3*T3s);\n\n    ArrayXd t16 = 8*T3*(Ts_cub + 3*ni_3*Ts*T2s + (ni_cub - 9*ni_sq + 26*ni - 22)*T3s);\n    ArrayXd t17 = -16*(T_cub*Us + U*Ts_cub) - 6*(T*T2*Us + U*Ts*T2s) * (2*ni_sq - 10*ni + 16);\n    ArrayXd t18 = -8*(T3*Us + U*T3s) * (3*ni_sq - 15*ni + 16) - (T_cub*Bs + B*Ts_cub) * (6*ni_sq - 30*ni + 24);\n    ArrayXd t19 = -6*(T*T2*Bs + B*Ts*T2s) * (4*ni_sq - 20*ni + 24) - 8*(T3*Bs + B*T3s) * (3*ni_sq - 15*ni + 24);\n\n    ArrayXd t201 = 24*(T_cub*Rs + R*Ts_cub) + 6*(T*T2*Rs + R*Ts*T2s) * (2*ni_sq- 10*ni + 24);\n    ArrayXd t202 = 8*(T3*Rs + R*T3s) * (3*ni_sq - 15*ni + 24) + (3*ni_sq - 15*ni + 6) * (T_cub*Ts*S2s + T*S2*Ts_cub);\n    ArrayXd t203 = 6*(T*T2*Ts*S2s + Ts*T2s*T*S2) * (ni_sq - 5*ni + 6) + 48*(T3*Ts*S2s + T3s*T*S2);\n    ArrayXd t20 = -ni_2 * (t201 + t202 + t203);\n\n    ArrayXd temp31 = t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8 + t9 + t10 + t11 + t12 + t13 + t14 + t15 + t16 + t17 + t18 + t19 + t20;\n    double temp32 = ni * ni_1 * ni_2 * ni_3 * (ni_3 - 1) * (ni_3 - 2);\n    ArrayXd mom3 = temp31 / temp32;\n\n    Dm3.row(i) = (mom3.transpose() - 3*Dm1.row(i).array() * Dm2.row(i).array() - Dm1.row(i).array().pow(3)) / Dm2.row(i).array().pow(1.5);\n    \n    // Parameters of Gamma distribution of D: shape, scale, location\n    /* shape=4/m3^2 */\n    /* scale=sqrt(m2)*m3/2 */\n    /* location=m1-2*sqrt(m2)/m3 */\n    ShapeD.row(i).array() = 4.0 / Dm3.row(i).array().square();\n    ScaleD.row(i).array() = Dm2.row(i).array().sqrt() * Dm3.row(i).array() / 2.0;\n    LocationD.row(i).array() = Dm1.row(i).array() - 2 * Dm2.row(i).array().sqrt() / Dm3.row(i).array();\n\n    for(j = 0; j < M; j++) {\n      /* gscale = abs(scale) */ \n      /* ssgn = sign(scale) */\n      /* lower.tail = FALSE */\n      /* pval = pgamma(ssgn*(Fstar - location), shape = shape, scale = gscale, */\n      /*   lower.tail = xor(ssgn < 0, lower.tail), log.p = FALSE) */\n      /* cout << \"skewness = \" << Dm3(i, j) << endl; */\n      /* if(abs(Dm3(i, j)) < 1e-6) { */\n      /*   /1* cout << \"approx\" << endl; *1/ */\n      /*   boost::math::normal norm(Dm1(i, j), sqrt(Dm2(i, j))); */\n      /*   pval_right = boost::math::cdf(boost::math::complement(norm, D(i, j))); */\n      /* } else { */\n        /* pval_right = rbase_pgamma(D(i, j) - LocationD(i, j), ShapeD(i, j), ScaleD(i, j) , 0, 0); // 0 = lower tail, 0 = log_p */\n        pval_right = boost_pgamma(D(i, j) - LocationD(i, j), ShapeD(i, j), ScaleD(i, j) , false);\n      /* } */\n      PvalD(i, j) = pval_right;\n\n      Skip(i, j) = false;\n      Pval(i, j) = PvalD(i, j);\n    }\n  }\n}\n"
  },
  {
    "path": "src/MCC.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef TESTMCC_H\n#define TESTMCC_H\n\n//-------------------------------------------------------------\n// Class MCCResults for MCC test results\n//-------------------------------------------------------------\nclass MCCResults \n{\n  public:\n    double nl_dbl_dmin = 10.0 * std::numeric_limits<double>::min();\n\n    unsigned int N; // number of samples                \n    unsigned int K; // numbr of covariates\n    unsigned int q; // number of traits                \n    unsigned int M; // number of variables (to test for association)\n    Eigen::VectorXd n, n2;\n\n    // sum_x: q x M matrices\n    Eigen::MatrixXd sum_x, sum_x_sq;\n    Eigen::MatrixXd sum_x2, sum_x2_sq;\n    Eigen::MatrixXd sum_x3;\n    Eigen::MatrixXd sum_x4;\n\n    Eigen::MatrixXd sum_nx, sum_nx_sq;\n    Eigen::MatrixXd sum_nx2, sum_nx2_sq, sum_nx2_cub;\n    Eigen::MatrixXd sum_nx3, sum_nx4, sum_nx6;\n\n    // expectations: q x M matrices\n    Eigen::MatrixXd A; // observed test statistic\n    Eigen::MatrixXd EA, EA2, EA3, EA4;\n    // moments\n    Eigen::MatrixXd variance, kurtosis, skewness;\n    // Beta/Gamma distr.\n    Eigen::MatrixXd S1, S2;\n    Eigen::MatrixXd S1g, S2g, L;\n    MatrixXb SkipBeta, SkipGamma, Skip;\n    \n    Eigen::MatrixXd R, PvalBeta, PvalGamma, Pval;\n\n    // DKAT\n    Eigen::MatrixXd D;\n    Eigen::MatrixXd Dm1, Dm2, Dm3;\n\n    Eigen::MatrixXd ShapeD, ScaleD, LocationD;\n    Eigen::MatrixXd PvalD;\n\n    //----------\n    // Methods\n    //----------\n    MCCResults();\n    MCCResults(unsigned int, unsigned int, unsigned int, unsigned int, const Eigen::VectorXd &n_);\n    ~MCCResults();\n\n    void statistics(const MatrixXb &, const Eigen::MatrixXd &, const Eigen::MatrixXd &);\n    void expectations(const MatrixXb &, const Eigen::MatrixXd &,\n        const Eigen::VectorXd &, const Eigen::VectorXd &,\n        const Eigen::VectorXd &, const Eigen::VectorXd &,\n        const Eigen::VectorXd &, const Eigen::VectorXd &);\n    void moments();\n    void distr();\n\n    void dkat(const MatrixXb &, const Eigen::MatrixXd &,\n        const Eigen::MatrixXd &, \n        const Eigen::VectorXd &, const Eigen::VectorXd &,\n        const Eigen::VectorXd &, const Eigen::VectorXd &,\n        const Eigen::VectorXd &, const Eigen::VectorXd &,\n        const Eigen::VectorXd &, const Eigen::VectorXd &);\n};\n\n//-------------------------------------------------------------\n// Class MCC for Moment-Matching Correlation (MCC) test\n//-------------------------------------------------------------\nclass MCC\n{\n  public:\n    /********************\n     * Public attributes\n     ********************/\n    int verbose;\n\n    // dimensions\n    unsigned int n_samples; // number of samples\n    unsigned int n_traits; // number of traits\n    unsigned int n_covariates; // number of covariates\n\n    MatrixXb Mask; // masked samples = samples with missing values on phenotypes; 0 = missing \n    Eigen::VectorXd Neff; // number of non-missing samples (per trait)\n\n    Eigen::MatrixXd Yres; // matrix of residualized traits (missing are set to 0)\n    Eigen::MatrixXd Ynorm; // matrix of normalized residualized traits (missing are set to 0)\n\n    // precomputed products\n    Eigen::VectorXd sum_y, sum_y_sq;\n    Eigen::VectorXd sum_y2, sum_y2_sq;\n    Eigen::VectorXd sum_y3;\n    Eigen::VectorXd sum_y4;\n\n    // precomputed products for DKAT\n    Eigen::VectorXd sum_ny, sum_ny_sq;\n    Eigen::VectorXd sum_ny2, sum_ny2_sq, sum_ny2_cub;\n    Eigen::VectorXd sum_ny3, sum_ny4, sum_ny6;\n\n    /********************\n     * Public methods\n     ********************/\n    void setup_y(const MatrixXb &, const Eigen::MatrixXd &res, unsigned int _n_covariates = 1);\n\n    MCCResults run(const Eigen::MatrixXd& G);\n\n    MCC();\n    ~MCC() { };\n\n  private:\n    /********************\n     * Private methods\n     ********************/\n    void check_setup_data();\n    void precomp_sumy();\n};\n\n#endif\n"
  },
  {
    "path": "src/Masks.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Joint_Tests.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"Masks.hpp\"\n\n\nusing namespace std;\nusing namespace Eigen;\n\nGenoMask::GenoMask() { // @suppress(\"Class members should be properly initialized\")\n}\n\nGenoMask::~GenoMask() {\n  // TODO Auto-generated destructor stub\n}\n\nvoid GenoMask::prep_run(struct param& params, struct in_files const& files){\n\n  params.min_MAC_mask = params.vc_with_weights ? 1e-200 : params.min_MAC; // for association tests (use lower threshold if using custom weights)\n  params.min_MAC = 0.5; // set this so can retain singletons (0.5 for dosages)\n  take_max = params.mask_rule_max;\n  take_comphet = params.mask_rule_comphet;\n  w_loo = params.mask_loo;\n  w_lodo = params.mask_lodo;\n  w_vc_tests = params.vc_test;\n  vc_aaf = params.vc_maxAAF;\n  vc_collapse_MAC = params.skat_collapse_MAC;\n  w_vc_cust_weights = params.vc_with_weights;\n  write_masks = params.write_masks;\n  write_snplist = params.write_mask_snplist;\n  force_singleton = params.aaf_file_wSingletons;\n  verbose = params.verbose || params.debug;\n  remeta_save_ld = params.remeta_save_ld;\n\n  if(!take_max && !take_comphet) params.htp_out = false; // due to genocounts with sum rule\n  if(write_masks) gfile_prefix = files.out_file + \"_masks\";\n\n}\n\nvoid GenoMask::setBins(struct param* params, mstream& sout){\n\n  vector<double> tmpv;\n\n  // check if not singleton when using LOO\n  if(!params->mask_loo || ( params->mask_loo && (params->mbins[0] != \"singleton\")) ){\n\n    if(params->mbins.size() >= 1){\n      // convert them to double\n      for( size_t i = 0; i < params->mbins.size(); i++){\n        if(params->mbins[i] == \"all\")\n          tmpv.push_back( 1 );\n        else\n          tmpv.push_back( convertDouble( params->mbins[i], params, sout) );\n      }\n    } else tmpv.push_back( default_aaf );\n\n    if(w_vc_tests) tmpv.push_back( vc_aaf );\n\n    // sort and retain unique values\n    std::sort(tmpv.begin(), tmpv.end());\n    tmpv.erase( unique( tmpv.begin(), tmpv.end() ), tmpv.end() );\n    // store in eigen object\n    aafs = Eigen::Map<Eigen::VectorXd, Eigen::Unaligned>(tmpv.data(), tmpv.size());\n\n    // check validity\n    if( ( (aafs.array() < minAAF) || (aafs.array() > 1) ).any() && !w_vc_tests )\n      throw \"must specify values for --aaf-bins in [\" + to_string( minAAF ) + \", 1]\";\n    else if( (aafs.array()>= 0.5).any() && !w_vc_tests )\n      sout << \"WARNING: For computational efficiency, it is recommended that AAF cutoffs < 0.5\\n\";\n\n    max_aaf = aafs.tail(1)(0);\n  }\n\n  n_aaf_bins = aafs.size();\n\n  if(n_aaf_bins > max_aaf_bins)\n    throw \"Number of AAF bins (=\" + to_string( n_aaf_bins ) + \") above maximum (=\" + to_string( max_aaf_bins ) + \")\\n\";\n\n  IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\", \"\", \"\",\"\",\"\");\n  if(!params->mask_loo)\n    sout << left << std::setw(20) << \" * aaf cutoffs\" << \": [ \" << n_aaf_bins << \" : \"\n      << aafs.transpose().format(Fmt) << \" ] + singletons\\n\";\n  else {\n    sout << left << std::setw(20) << \" * aaf cutoff\" << \": \" ;\n    if(n_aaf_bins == 0) sout << \"singleton\";\n    else sout << aafs(0);\n    sout << endl;\n  }\n\n  n_aaf_bins++; // add singleton bin\n\n  nmasks_total = n_aaf_bins * masks.size(); // total number of masks\n\n  w_regions = params->w_regions;\n  if(w_regions) base_masks = masks;\n}\n\nvoid GenoMask::prepMasks(int const& ntotal, const string& setID) {\n\n  maskinfo tmp_region_mask;\n  std::map <std::string, decltype(tmp_region_mask.region)>::iterator itr;\n\n  // make new set of masks if using set regions\n  if(w_regions){ \n    masks.resize(0);\n    // go through each original mask and create region specific mask\n    for(size_t i = 0; i < base_masks.size(); i++ ){\n      tmp_region_mask = base_masks[i];\n      for (itr = regions[setID].begin(); itr != regions[setID].end(); ++itr) { // make region mak\n        if(w_lodo){ // LODO scheme\n          tmp_region_mask.region_name = \"LODO_\" + itr->first + \".\";\n          tmp_region_mask.region = (get_max(itr->second) & ~itr->second); // unset bits for region\n          masks.push_back(tmp_region_mask);\n        } else {\n          tmp_region_mask.region_name = itr->first + \".\";\n          tmp_region_mask.region = itr->second;\n          masks.push_back(tmp_region_mask);\n        }\n      }\n      if(!w_loo){// add mask across all regions\n        tmp_region_mask = base_masks[i];\n        tmp_region_mask.region |= get_max(itr->second); //set all bits to 1\n        masks.push_back(tmp_region_mask);\n      }\n    }\n    nmasks_total = n_aaf_bins * masks.size();\n    if(write_masks) reset_gvec();\n  } \n\n  Gtmp = MatrixXd::Constant(ntotal, nmasks_total, -3);\n  colset = ArrayXb::Constant( nmasks_total, false );\n  if(!take_max) {\n    non_missing = MatrixXb::Constant(ntotal, nmasks_total, false);\n    if(!take_comphet) nsites = ArrayXi::Constant(nmasks_total, 0);\n  }\n\n  if(write_setlist) {\n    list_masks.resize(nmasks_total);\n    for(size_t i = 0;i < list_masks.size(); i++)\n      list_masks[i].resize(0);\n  }\n  if(write_snplist) {\n    list_snps.resize(nmasks_total);\n    for(size_t i = 0;i < list_snps.size(); i++)\n      list_snps[i].resize(0);\n  }\n\n  if(remeta_save_ld) {\n    remeta_snplist.clear();\n  }\n}\n\nvoid GenoMask::updateMasks(int const& start, int const& bs, struct param* params, struct filter* filters, const Ref<const MatrixXb>& masked_indivs, struct geno_block* gblock, const Ref<const ArrayXd>& vc_weights, vector<variant_block> &all_snps_info, vset& setinfo, vector<snp>& snpinfo, mstream& sout){\n\n  // identify which snps are in each mask\n  set_snp_masks(start, bs, all_snps_info, setinfo, snpinfo, sout);\n  // identify which snps are in each aaf bin\n  set_snp_aafs(start, bs, params->set_aaf, all_snps_info, setinfo, snpinfo, sout);\n\n  MatrixXb Jmat, ur_miss;\n  MatrixXd rare_mask_tmp;\n  SpMat ur_sp_mat;\n  ArrayXi ur_indices;\n  if(w_vc_tests) {\n    Jmat = MatrixXb::Constant(bs, nmasks_total, false);\n    if(setinfo.ultra_rare_ind.segment(start, bs).any()) {\n      int n_ur = setinfo.ultra_rare_ind.segment(start, bs).count();\n      rare_mask_tmp = setinfo.vc_rare_mask; // not safe to update SpMat in parallel (not many columns)\n      ur_indices = ArrayXi::Constant(bs, -1);\n      ur_sp_mat.resize(params->n_samples, n_ur);\n      ur_miss.resize(params->n_samples, n_ur);\n\n      // store the ur variants in spmat & keep track of index/missingness\n      for(int i = 0, j = 0; i < bs; i++){\n        if(!setinfo.ultra_rare_ind(start+i)) continue;\n        MapArXd garr (gblock->Gmat.col(i).data(), params->n_samples, 1);\n        // flip if necessary\n        if(all_snps_info[start+i].af1 > 0.5) ur_sp_mat.col(j) = (garr == -3).select(0, 2 - garr).matrix().sparseView();\n        else ur_sp_mat.col(j) = (garr < 0).select(0, garr).matrix().sparseView();\n        // if using custom user weights, rescale before collapsing ur variants \n        ur_sp_mat.col(j) *= vc_weights(start+i);\n        ur_miss.col(j) = (garr >= 0);\n        // store the index\n        ur_indices(i) = j++;\n      }\n    }\n\n  }\n\n  if(w_vc_tests && remeta_save_ld) {\n    // Find the column for VC tests.\n    int vc_col_idx = -1;\n    for (int j = 1; j < n_aaf_bins; j++) {\n      if (aafs(j-1) == vc_aaf) {\n        vc_col_idx = j;\n        break;\n      }\n    }\n    ArrayXb snp_in_any_mask = (keepmask.rowwise().maxCoeff().array() > 0) && (keepaaf.col(vc_col_idx).array() > 0);\n    for (int k = 0; k < snp_in_any_mask.size(); ++k) {\n      if(snp_in_any_mask(k)) {\n        remeta_snplist.push_back(snpinfo[ setinfo.snp_indices[start + k] ].ID);\n      }\n    }\n  }\n\n  // update each mask \n#if defined(_OPENMP)\n  setNbThreads(1);\n  // use MT in both loops\n#pragma omp parallel for schedule(dynamic) collapse(2)\n#endif\n  for(size_t i = 0; i < masks.size(); i++){\n    for(int j = 0; j < n_aaf_bins; j++){\n\n      int index_start = i * n_aaf_bins + j;\n      ArrayXb colkeep = keepmask.col(i).array() && keepaaf.col(j).array();\n      if(!take_max && !take_comphet) nsites(index_start) += colkeep.count();\n      if(write_snplist) append_snplist(index_start, colkeep, start, setinfo, snpinfo);\n\n      if(w_vc_tests && (j > 0) && ( aafs(j-1) == vc_aaf )) // track variants in mask\n        Jmat.col(index_start) = colkeep.matrix();\n\n      // ignore variants in previous AAF categories (accumulation is in next loop)\n      if(j>0) colkeep = colkeep && !keepaaf.col(j-1).array(); \n\n      // if there are no variants included, continue\n      if( colkeep.count() == 0 ) continue;\n      //if(i==2 && j==1) cerr << i << \" \" << j << \" \" << colkeep.count() << \" \";\n\n      // update mask\n      MapArXd maskvec (Gtmp.col(index_start).data(), params->n_samples, 1);\n\n      if(take_max) {\n\n        SpVec gv, mv;\n        mv = maskvec.matrix().sparseView();\n        for(int k = 0; k < colkeep.size(); k++){\n          if(!colkeep(k)) continue;\n          gv = (vc_weights(start+k) * gblock->Gmat.col(k)).sparseView();\n          mv = gv.cwiseMax(mv);\n        }\n        maskvec = MatrixXd(mv).array();\n\n      } else {\n\n        int l;\n        double ds;\n        SpVec gv;\n\n        for(int k = 0; k < colkeep.size(); k++){\n          if(!colkeep(k)) continue;\n          gv = (vc_weights(start+k) * gblock->Gmat.col(k)).sparseView();\n\n          // sum rule (ignore -3)\n          for (SparseVector<double>::InnerIterator it(gv); it; ++it) {\n            l = it.index();\n            ds = it.value();\n\n            if( !filters->ind_in_analysis(l) || (ds < 0)) continue;\n\n            if( maskvec(l) < 0 ) maskvec(l) = ds;\n            else maskvec(l) += ds;\n          }\n\n          // for genotype counts, identify when (-3) is 0\n          non_missing.col(index_start).array() = non_missing.col(index_start).array() || ( gblock->Gmat.col(k).array() >= 0 );\n        }\n       //if(i==2 && j==1) cout << (maskvec == -3).select(0,maskvec).sum() << endl;\n\n      }\n\n      // get ultra-rare mask if using VC test (take max)\n      if(w_vc_tests && setinfo.ultra_rare_ind.segment(start, bs).any() && ((j == 0) || ( aafs(j-1) <= vc_aaf )) ) {\n        SpVec mv = setinfo.vc_rare_mask.col(index_start);\n        for(int k = 0; k < colkeep.size(); k++){\n          if(!colkeep(k) || !setinfo.ultra_rare_ind(start+k)) continue;\n          mv = mv.cwiseMax( ur_sp_mat.col(ur_indices(k)) );\n          setinfo.vc_rare_mask_non_missing.col(index_start).array() = setinfo.vc_rare_mask_non_missing.col(index_start).array() || ur_miss.col( ur_indices(k) ).array();\n        }\n        rare_mask_tmp.col(index_start) = mv;\n      } \n\n    }\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  if(w_vc_tests) {\n    setinfo.Jmat.middleRows(start, bs) = Jmat;\n    if(setinfo.ultra_rare_ind.segment(start, bs).any()) setinfo.vc_rare_mask = rare_mask_tmp.sparseView();\n  }\n\n}\n\nvoid GenoMask::apply_rule(SpVec& out_mask, SpVec const& Gvec, const Ref<const ArrayXb>& in_analysis, bool const& force_max) { \n\n  int l;\n  double ds;\n\n  if(take_max || force_max) { // max rule to combine variants across sites\n    out_mask = Gvec.cwiseMax(out_mask);\n  } else { // sum rule (ignore missing) \n    MatrixXd out_mask_vec = out_mask;\n    for (SpVec::InnerIterator it(Gvec); it; ++it) {\n      l = it.index();\n      ds = it.value();\n\n      if( !in_analysis(l) || (ds == -3)) continue;\n\n      if( out_mask_vec(l,0) == -3 ) out_mask_vec(l,0) = ds;\n      else if(ds > 0) out_mask_vec(l,0) += ds;\n    }\n    out_mask = out_mask_vec.col(0).sparseView();\n  }\n\n}\n\nvoid GenoMask::apply_rule(Ref<ArrayXd> out_mask, SpVec const& Gvec, const Ref<const ArrayXb>& in_analysis, bool const& force_max) { \n\n  int l;\n  double ds;\n\n  if(take_max || force_max) { // max rule to combine variants across sites\n    SpVec tmpv = out_mask.matrix().sparseView();\n    out_mask = Gvec.cwiseMax(tmpv);\n    out_mask = in_analysis.select(out_mask, -3);;\n  } else { // sum rule (ignore missing) \n    for (SpVec::InnerIterator it(Gvec); it; ++it) {\n      l = it.index();\n      ds = it.value();\n\n      if( !in_analysis(l) || (ds<0)) continue;\n\n      if( out_mask(l) < 0 ) out_mask(l) = ds;\n      else if(ds > 0) out_mask(l) += ds;\n    }\n  }\n\n}\n\nvoid GenoMask::apply_rule(Ref<ArrayXd> maskvec, const Ref<const MatrixXd>& Gmat, const Ref<const ArrayXb>& in_analysis, bool const& force_max) { \n\n  if(take_max || force_max) { // max rule to combine variants across sites\n    maskvec = in_analysis.select(maskvec.max(Gmat.rowwise().maxCoeff().array()), maskvec);\n  } else { // sum rule (ignore missing) \n    ArrayXb non_miss_G = in_analysis && (Gmat.array() >= 0).rowwise().any();\n    maskvec = non_miss_G.select( maskvec.max(0) + (Gmat.array() >= 0).select(Gmat.array(), 0).rowwise().sum(), maskvec);\n  }\n\n}\n\n// should only be called once\nvoid GenoMask::collapse_mask_chunk(const Ref<const ArrayXi>& indices, SpMat const& Gmat_sp, const Ref<const ArrayXb>& is_ultra_rare, const Ref<const ArrayXb>& to_flip, const Ref<const ArrayXd>& vc_weights, Ref<ArrayXd> out_mask, Ref<ArrayXd> out_ur_mask, const Ref<const ArrayXb>& in_analysis){ \n\n  int nkept = indices.size(), icol;\n  double weight;\n  if(nkept == 0) return;\n\n  // collapse variants\n  for(int i = 0; i < nkept; i++){\n    icol = indices(i);\n    weight = vc_weights(icol); // apply custom user weight to ur variant\n\n    if( w_vc_tests && is_ultra_rare(icol) ){ // collapse into a rare mask\n      if( to_flip(icol) ){ // need to flip \n        ArrayXd Gvec = Gmat_sp.col(icol);\n        Gvec = (in_analysis && (Gvec >=0 )).select(2 - Gvec, Gvec);\n        SpVec G_flip = Gvec.matrix().sparseView();\n        apply_rule(out_ur_mask, weight * G_flip, in_analysis, true);\n      } else\n        apply_rule(out_ur_mask, weight * Gmat_sp.col(icol), in_analysis, true);\n    }  \n    // for lovo mask\n    apply_rule(out_mask, weight * Gmat_sp.col(icol), in_analysis, false);\n\n  }\n\n}\n\nvoid GenoMask::updateMasks_loo(const Ref<const ArrayXi>& indices_chunk, bool const& comp_full_mask, SpMat const& Gmat_sp, const Ref<const ArrayXb>& is_ultra_rare, const Ref<const ArrayXb>& to_flip, const Ref<const ArrayXd>& vc_weights, const Ref<const ArrayXd>& excl_vars_mask, const Ref<const ArrayXd>& excl_vars_ur_mask, const Ref<const ArrayXb>& in_analysis, vset& setinfo, int const& nthreads){\n\n  bool with_ur = is_ultra_rare(indices_chunk).any() || (excl_vars_ur_mask > 0).any();\n  int bs = indices_chunk.size();\n  Gtmp.resize(Gmat_sp.rows(), nmasks_total); // incl. full mask\n\n  vector<SpVec> rare_mask_tmp;\n  ArrayXb g_ur_nmiss;\n  SpVec g_ur_start;\n\n  // store matrix as dense (should be fairly small)\n  SpMat Jstar (Gmat_sp.cols(), bs); // Mall x M\n  Jstar.reserve(bs);\n  for(int i = 0; i < bs; i++)\n    Jstar.insert(indices_chunk(i), i) = 1;\n  MatrixXd Gmat_d = Gmat_sp * Jstar;\n\n  if(w_vc_tests && with_ur) { // for skat tests and ur vars present\n    setinfo.vc_rare_mask.resize(Gtmp.rows(), Gtmp.cols());\n    setinfo.vc_rare_mask_non_missing.resize(Gtmp.rows(), Gtmp.cols());\n    g_ur_nmiss = excl_vars_ur_mask >= 0;\n    g_ur_start = excl_vars_ur_mask.max(0).matrix().sparseView();\n    setinfo.vc_rare_mask_non_missing.array().colwise() = g_ur_nmiss;\n    rare_mask_tmp.assign(Gtmp.cols(), g_ur_start);\n  }\n  Gtmp.array().colwise() = excl_vars_mask;\n\n  // start openmp for loop\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int i = 0; i < bs; i++){ // generate lovo mask\n\n    MapArXd maskvec (Gtmp.col(i).data(), Gtmp.rows(), 1);\n    ArrayXb in_loo_mask = ArrayXb::Constant(bs, true); in_loo_mask(i) = false;\n    ArrayXi in_loo_indices = get_true_indices(in_loo_mask);\n\n    if(in_loo_mask.any()){\n      // LOVO mask\n      if(take_max){ // max rule to combine variants across sites\n        maskvec = in_analysis.select(maskvec.max((Gmat_d(all, in_loo_indices) * vc_weights(indices_chunk(in_loo_indices)).matrix().asDiagonal()).rowwise().maxCoeff().array()), maskvec);\n      } else {\n        ArrayXb non_miss_G = in_analysis && (Gmat_d(all, in_loo_indices).array() >= 0).rowwise().any();\n        maskvec = non_miss_G.select( maskvec.max(0) + (Gmat_d(all, in_loo_indices).array() >= 0).select((Gmat_d(all, in_loo_indices) * vc_weights(indices_chunk(in_loo_indices)).matrix().asDiagonal()).array(), 0).rowwise().sum(), maskvec);\n      }\n    }\n\n    if( comp_full_mask && (i == (bs-1)) ) { // compute full mask\n      Gtmp.rightCols(1) = Gtmp.col(i); \n      // add to burden mask for full set\n      apply_rule(Gtmp.rightCols(1).array(), (vc_weights(indices_chunk(i)) * Gmat_d.col(i)).sparseView(), in_analysis, false);\n    }\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(nthreads);\n#endif\n\n  if(w_vc_tests && with_ur) { // for skat tests and ur vars present\n\n    // get spmat for ur variants only\n    int n_ur = is_ultra_rare(indices_chunk).count();\n    ArrayXi ur_indices = ArrayXi::Constant(bs, -1);\n    MatrixXb ur_miss; if(n_ur>0) ur_miss.resize(Gmat_d.rows(), n_ur);\n    SpMat ur_sp_mat; if(n_ur>0) ur_sp_mat.resize(Gmat_d.rows(), n_ur);\n    SpVec ur_mask_all = g_ur_start;\n\n    // store the ur variants in spmat & keep track of index/missingness\n    int m = 0;\n    for (int i = 0; i < bs; i++ ) {\n      if(!is_ultra_rare(indices_chunk(i))) continue;\n      MapArXd garr (Gmat_d.col(i).data(), Gmat_d.rows(), 1);\n      // flip if necessary (missing set to 0)\n      if(to_flip(indices_chunk(i))) ur_sp_mat.col(m) = (garr<0).select(0, 2 - garr).matrix().sparseView();\n      else ur_sp_mat.col(m) = garr.max(0).matrix().sparseView();\n      ur_sp_mat.col(m) *= vc_weights(indices_chunk(i)); // apply custom user weight to ur variant\n      // track missingness\n      ur_miss.col(m) = (garr >= 0);\n      // track max across sites\n      ur_mask_all = ur_mask_all.cwiseMax(ur_sp_mat.col(m));\n      // store the index\n      ur_indices(i) = m++;\n    }\n\n    // start openmp for loop\n#if defined(_OPENMP)\n    setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n    for(int i = 0; i < bs; i++){\n\n      ArrayXb in_loo_mask = ArrayXb::Constant(bs, true); in_loo_mask(i) = false;\n\n      if(n_ur>0){\n        // if not ur site, set to max across ur sites\n        if(!is_ultra_rare(indices_chunk(i))) {\n          rare_mask_tmp[i] = ur_mask_all;\n          setinfo.vc_rare_mask_non_missing.col(i).array() = setinfo.vc_rare_mask_non_missing.col(i).array() || ur_miss.array().rowwise().any();\n        } else // otherwise, take max using lovo\n          for (auto const& j : get_true_indices( in_loo_mask && is_ultra_rare(indices_chunk) )) {\n            rare_mask_tmp[i] = rare_mask_tmp[i].cwiseMax(ur_sp_mat.col(ur_indices(j)));\n            setinfo.vc_rare_mask_non_missing.col(i).array() = setinfo.vc_rare_mask_non_missing.col(i).array() || ur_miss.col(ur_indices(j)).array() ;\n          }\n      }\n\n      if( comp_full_mask && (i == (bs-1)) ) { // compute full mask\n        rare_mask_tmp.back() = rare_mask_tmp[i];\n        setinfo.vc_rare_mask_non_missing.rightCols(1) = setinfo.vc_rare_mask_non_missing.col(i);\n        if(is_ultra_rare(indices_chunk(i))) { // if variant is UR\n          rare_mask_tmp.back() = rare_mask_tmp.back().cwiseMax(ur_sp_mat.col(ur_indices(i)));\n          setinfo.vc_rare_mask_non_missing.rightCols(1).array() = setinfo.vc_rare_mask_non_missing.rightCols(1).array() || ur_miss.col(ur_indices(i)).array();\n        }\n      }\n\n    }\n#if defined(_OPENMP)\n    setNbThreads(nthreads);\n#endif\n\n    for(size_t j = 0; j < rare_mask_tmp.size(); j++)\n      setinfo.vc_rare_mask.col(j) = rare_mask_tmp[j];\n\n  }\n\n}\n\n\nvoid GenoMask::tally_masks(struct param const* params, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, SpMat& vc_rare_mask, MatrixXb& vc_rare_mask_non_missing){\n\n  MatrixXd rare_mask_tmp;\n  if(w_vc_tests) rare_mask_tmp = vc_rare_mask; // not safe to update SpMat in parallel (not many columns)\n\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  // get mask by aggregating across increasing AAF categories\n  for(size_t i = 0; i < masks.size(); i++){\n    // first mask AAF\n    int index_mask = i * n_aaf_bins;\n    double ds;\n    bool column_set = (Gtmp.col(index_mask).array() >= 0).any();\n    colset(index_mask) = column_set;\n\n    // don't parallelize inner loop (cumulative updates)\n    for(int j = 1; j < n_aaf_bins; j++){\n\n      int index_start = index_mask + j;\n      // check if there are variants in mask\n      if(colset(index_start-1) || (Gtmp.col(index_start).array() >= 0).any()) \n        colset(index_start) = true;\n\n      if( !colset(index_start) ) continue;\n\n      // add mask from previous AAF category\n      MapArXd maskvec (Gtmp.col(index_start).data(), params->n_samples, 1);\n\n      if(take_max) {\n\n        SpMat gv, mv;\n        // sparse of current\n        mv = maskvec.matrix().sparseView();\n        // sparse of previous\n        gv = Gtmp.col(index_start-1).sparseView();\n        // aggregate to current\n        maskvec = MatrixXd(gv.cwiseMax(mv)).array();\n\n      } else {\n\n        SpVec gv;\n\n        // add previous\n        gv = Gtmp.col(index_start-1).sparseView();\n\n        // sum rule (ignore -3)\n        for (SparseVector<double>::InnerIterator it(gv); it; ++it) {\n          int l = it.index();\n          ds = it.value();\n\n          if( !filters->ind_in_analysis(l) || (ds < 0)) continue;\n\n          if( maskvec(l) < 0 ) maskvec(l) = ds;\n          else maskvec(l) += ds;\n\n        }\n\n        // for genotype counts, identify when (-3) is 0\n        non_missing.col(index_start).array() = non_missing.col(index_start).array() || non_missing.col(index_start-1).array();\n\n      }\n\n      // update ultra-rare mask\n      if( w_vc_tests && ( aafs(j-1) <= vc_aaf ) && rare_mask_tmp.col(index_start-1).nonZeros() ){\n        rare_mask_tmp.col(index_start) = rare_mask_tmp.col(index_start).cwiseMax(rare_mask_tmp.col(index_start-1));\n        vc_rare_mask_non_missing.col(index_start).array() = vc_rare_mask_non_missing.col(index_start).array() || vc_rare_mask_non_missing.col(index_start-1).array();\n        if( aafs(j-1) == vc_aaf ){\n         for( int k = 1; k <= j; k++){ // remove data not needed anymore\n          rare_mask_tmp.col(index_start-k).array() = 0;\n          vc_rare_mask_non_missing.col(index_start-k).array() = false;\n         }\n        }\n      }\n      \n    }\n  }\n\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  if(w_vc_tests) vc_rare_mask = rare_mask_tmp.sparseView();\n\n}\n\nvoid GenoMask::computeMasks(struct param* params, struct filter* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& ymat, struct geno_block* gblock, vector<variant_block> &all_snps_info, vset& setinfo, vector<snp>& snpinfo, mstream& sout){\n\n  // check size of vblock vector\n  if(((int) all_snps_info.size()) < nmasks_total) all_snps_info.resize( nmasks_total );\n\n  // tally masks\n  tally_masks(params, filters, masked_indivs, setinfo.vc_rare_mask, setinfo.vc_rare_mask_non_missing);\n\n  ArrayXb in_bed = colset;\n\n  // finish building each mask \n#if defined(_OPENMP)\n  setNbThreads(1);\n  // use MT in both loops\n#pragma omp parallel for schedule(dynamic) collapse(2)\n#endif\n  for(size_t i = 0; i < masks.size(); i++){\n    for(int j = 0; j < n_aaf_bins; j++){\n\n      int index_start = i * n_aaf_bins + j;\n\n      // check variants were included in mask\n      if(!colset(index_start)) continue;\n\n      // compute mask\n      buildMask(index_start, setinfo.chrom, setinfo.physpos, params, filters, masked_indivs, ymat, &all_snps_info[index_start]);\n\n      colset(index_start) = !all_snps_info[index_start].ignored;\n\n    }\n  }\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  n_mask_pass = colset.count();\n  if(verbose && (!colset).any()) sout << \"WARNING: \" << (nmasks_total - n_mask_pass) << \"/\" << nmasks_total << \" masks fail MAC filter and will be skipped...\";\n  //cerr << endl << Gtmp.block(0,0,10,5) << endl;\n\n  // reset indices\n  setinfo.snp_indices.resize(n_mask_pass); \n  snpinfo.resize(params->n_variants + n_mask_pass); \n  // update Gmat\n  gblock->Gmat.resize(gblock->Gmat.rows(), n_mask_pass);\n  vector<variant_block> tmp_snp_info; \n  snp tmpsnp;\n\n  if(n_mask_pass == 0){\n    all_snps_info = tmp_snp_info;\n    return;\n  }\n\n  // store masks for testing (ignore those that failed filters)\n  int k = 0;\n  for(size_t i = 0; i < masks.size(); i++){\n    for(int j = 0; j < n_aaf_bins; j++){\n\n      int index_start = i * n_aaf_bins + j;\n\n      std::ostringstream buffer;\n\n      // mask + aaf\n      buffer <<  masks[i].name << \".\";\n      if(j==0) buffer << \"singleton\";\n      else if(aafs(j-1)==1) buffer << \"all\";\n      else buffer << aafs(j-1);\n\n      // save in snpinfo\n      tmpsnp.chrom = setinfo.chrom;\n      tmpsnp.ID = setinfo.ID + \".\" + masks[i].region_name + buffer.str();\n      tmpsnp.physpos = setinfo.physpos;\n      tmpsnp.allele1 = \"ref\";\n      tmpsnp.allele2 = buffer.str();\n\n      if(write_masks && in_bed(index_start)) {\n        write_genovec(index_start);\n        write_genobim(tmpsnp);\n        if(write_setlist) append_setlist(index_start, tmpsnp.ID);\n      }\n\n      if(!colset(index_start)) continue;\n      if(write_snplist) make_snplist(index_start, tmpsnp.ID, tmpsnp.chrom, tmpsnp.physpos);\n\n      // update snpinfo\n      tmpsnp.offset = params->n_variants + k; // new index in snpinfo vec.\n      snpinfo[ params->n_variants + k ] = tmpsnp;\n\n      // save index\n      setinfo.snp_indices[k] = tmpsnp.offset;\n\n      // store mask in G\n      gblock->Gmat.col(k) = Gtmp.col(index_start);\n      all_snps_info[index_start].mask_name = masks[i].name;\n      if(w_vc_tests) {\n        all_snps_info[index_start].col_jmat_skat = index_start;\n        all_snps_info[index_start].skip_for_vc = (j == 0) || ( aafs(j-1) != vc_aaf );\n      }\n      tmp_snp_info.push_back(all_snps_info[index_start]);\n      k++;\n\n    }\n  }\n\n  // update written set list files with new set\n  if(write_setlist) make_setlist(setinfo.ID, setinfo.chrom, setinfo.physpos);\n\n  all_snps_info = tmp_snp_info;\n\n}\n\nvoid GenoMask::computeMasks_loo(const Ref<const ArrayXi>& indices_chunk, bool const& comp_full_mask, struct param* params, struct filter* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& ymat, struct geno_block* gblock, vector<variant_block> &all_snps_info, vset& setinfo, vector<snp>& snpinfo, mstream& sout){\n\n  // check size of vblock vector\n  all_snps_info.resize( nmasks_total );\n\n  // finish building each mask \n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for(int i = 0; i < nmasks_total; i++) // compute mask\n    buildMask(i, setinfo.chrom, setinfo.physpos, params, filters, masked_indivs, ymat, &all_snps_info[i]);\n#if defined(_OPENMP)\n  setNbThreads(params->threads);\n#endif\n\n  n_mask_pass = 0;\n  for(int i = 0; i < nmasks_total; i++) // check if failed\n    n_mask_pass += !all_snps_info[i].ignored;\n  if(verbose && (n_mask_pass < nmasks_total)) sout << \"WARNING: \" << n_mask_pass << \"/\" << nmasks_total << \" masks fail MAC filter...\";\n\n  // reset indices\n  setinfo.snp_indices.resize(n_mask_pass); \n  snpinfo.resize(params->n_variants + n_mask_pass); \n  // update Gmat\n  gblock->Gmat.resize(Gtmp.rows(), n_mask_pass);\n  vector<variant_block> tmp_snp_info; \n  snp tmpsnp;\n\n  // store masks for testing (ignore those that failed filters)\n  int k = 0;\n  std::ostringstream buffer;\n  buffer <<  masks[0].name << \".\" ;\n  if(n_aaf_bins == 1) buffer << \"singleton\";\n  else if(aafs(0)==1) buffer << \"all\";\n  else buffer << aafs(0);\n\n  for(int i = 0; i < nmasks_total; i++){\n\n    if(all_snps_info[i].ignored) continue;\n\n    // update snpinfo\n    tmpsnp.chrom = setinfo.chrom;\n    if(comp_full_mask && (i == (nmasks_total-1) )) { // full mask\n      tmpsnp.ID = setinfo.ID + \".\" + masks[0].region_name + buffer.str();\n      tmpsnp.physpos = setinfo.physpos;\n    } else { // loo mask\n      tmpsnp.ID = setinfo.ID + \".\" + masks[0].region_name + buffer.str() + \"_\" + snpinfo[ setinfo.snp_indices[ indices_chunk(i) ] ].ID;\n      tmpsnp.physpos = snpinfo[ setinfo.snp_indices[ indices_chunk(i) ] ].physpos;\n    }\n    tmpsnp.allele1 = \"ref\";\n    tmpsnp.allele2 = buffer.str();\n    tmpsnp.offset = params->n_variants + k; // new index in snpinfo vec.\n    snpinfo[ params->n_variants + k ] = tmpsnp;\n\n    // save index\n    setinfo.snp_indices[k] = tmpsnp.offset;\n\n    // store mask in G\n    gblock->Gmat.col(k) = Gtmp.col(i);\n    if(w_vc_tests) {\n      all_snps_info[i].col_jmat_skat = i;\n      all_snps_info[i].skip_for_vc = false;\n    }\n    tmp_snp_info.push_back(all_snps_info[i]);\n    k++;\n\n  }\n\n  all_snps_info = tmp_snp_info;\n\n}\n\nvoid GenoMask::set_snp_masks(int const& start, int const& bs, vector<variant_block> const &all_snps_info, vset const& setinfo, vector<snp>& snpinfo, mstream& sout){\n\n  uint64 res;\n  int res2 = 1;\n  keepmask = MatrixXb::Constant(bs, masks.size(), true);\n\n  // go through each mask\n  for(size_t i = 0; i < masks.size(); i++){\n\n    // get snps who match with mask\n    for(int j = 0; j < bs; j++){\n      if(all_snps_info[j].ignored){\n        keepmask(j, i) = false;\n        continue;\n      }\n\n      // check if bit is set for at least one of the categories in mask\n      // bitwise AND should return positive value\n      res = (snpinfo[ setinfo.snp_indices[start + j] ].anno[setinfo.ID].id & masks[i].id);\n      if(w_regions) res2 = (int)(snpinfo[ setinfo.snp_indices[start + j] ].anno[setinfo.ID].regionid & masks[i].region);\n      keepmask(j, i) = (res > 0) && (res2 > 0);\n\n      //cerr << snpinfo[ setinfo.snp_indices[j] ].ID << \" \" <<  snpinfo[ setinfo.snp_indices[j] ].anno[setinfo.ID].id << \"\\t\" << masks[i].id << endl;\n    }\n\n  // cerr << i+1<<\"npass=\"<< keepmask.col(i).count()<<\"\\t\";\n  }\n\n}\n\nvoid GenoMask::set_snp_aafs(int const& start, int const& bs, const bool& aaf_given, vector<variant_block> const &all_snps_info, vset& setinfo, vector<snp> const& snpinfo, mstream& sout){\n\n  double upper;\n  ArrayXb colkeep = ArrayXb::Constant( bs, true );// these will be nested\n  keepaaf = MatrixXb::Constant(bs, n_aaf_bins, true);\n\n  // go through each aaf cutoff (also includes singletons)\n  for(int i = (n_aaf_bins-1); i >= 0; i--){\n    if(i>0) upper = aafs(i-1);\n\n    // get snps who match with mask\n    for(int j = 0; j < bs; j++){\n      if(all_snps_info[j].ignored || ( !colkeep(j) && !(snpinfo[ setinfo.snp_indices[start + j] ].force_singleton || all_snps_info[j].singleton) ) ){\n        colkeep(j) = false;\n        continue;\n      }\n       \n      if( i == 0 ) colkeep(j) = force_singleton ? snpinfo[ setinfo.snp_indices[start + j] ].force_singleton : all_snps_info[j].singleton;\n      else if (force_singleton && snpinfo[ setinfo.snp_indices[start + j] ].force_singleton) colkeep(j) = true;\n      else if(aaf_given) colkeep(j) = (snpinfo[ setinfo.snp_indices[start + j] ].aaf <= upper);\n      else colkeep(j) = (all_snps_info[j].af1 <= upper);\n\n      if(w_vc_tests && (i == (n_aaf_bins-1))) setinfo.ultra_rare_ind(start + j) = all_snps_info[j].mac1 <= vc_collapse_MAC;\n      //cerr << snpinfo[ setinfo.snp_indices[start + j] ].aaf  << \" \" << all_snps_info[j].af1 << endl;\n      //if(i==0 && all_snps_info[j].singleton) cerr << snpinfo[ setinfo.snp_indices[start + j] ].ID << endl;\n    }\n\n    keepaaf.col(i) = colkeep.matrix();\n    //cerr << i+1 << \"/\" << n_aaf_bins << \": \" <<  upper << \"--\"<< keepaaf.col(i).count()<< endl;\n\n  }\n\n}\n\nbool GenoMask::check_in_lovo_mask(const Ref<const ArrayXd>& Geno, struct filter const& filters, string const& setinfo_ID, snp& snp_info, bool& is_ur, bool& to_flip, double& maf, int const& chrom, struct param const* params){\n\n  bool is_singleton = false, in_lovo_set = false;\n  bool non_par = in_non_par(chrom, snp_info.physpos, params);\n  int nmales = 0, lval;\n  double total, aaf, mac = 0, n_nonmiss = 0, mval;\n\n  if(((Geno < -3) || (Geno > 2)).any())\n    throw \"out of bounds genotype value for variant '\" + snp_info.ID + \"'\";\n\n  ArrayXb keep_index = filters.ind_in_analysis && (Geno != -3.0);\n  total = keep_index.select(Geno,0).sum();\n  n_nonmiss = keep_index.count();\n\n  // for MAC, check sex for non-PAR chrX\n  if(non_par) {\n    for (int i = 0, index = 0; i < filters.ind_ignore.size(); i++) {\n      // skip samples that were ignored from the analysis\n      if( filters.ind_ignore(i) ) continue;\n      if( keep_index(index) ){\n        // compute MAC using 0.5*g for males for variants on sex chr non-PAR (males coded as diploid) - sex is 1 for males and 0 o.w.\n        lval = (params->sex(index) == 1);\n        mval = Geno(index) * 0.5 * (2 - lval);\n        // check if not 0/2\n        if( !params->dosage_mode && (lval == 1) && (Geno(index) == 1) )\n          cerr << \"WARNING: genotype is 1 for a male on chrX at \" << snp_info.ID << \" (males should coded as diploid).\";\n        mac += mval;\n        nmales += lval;\n      }\n      index++;\n    }\n  }\n\n  // compute AAF and AAC\n  aaf = total / n_nonmiss / 2.0;\n  maf = min(aaf, 1 - aaf);\n  if(!non_par) mac = total;\n\n  // check if singleton\n  if(!params->singleton_carriers) is_singleton = ( ((int)(mac+0.5)) == 1 );\n  else is_singleton = (filters.ind_in_analysis && (Geno > 0)).count() == 1;\n\n  // compute MAC (nmales=0 in auto/par)\n  mac = min(mac, 2 * n_nonmiss - nmales - mac);\n\n  if(mac < params->min_MAC)\n    return false;\n\n  // check if bit is set for at least one of the categories in mask\n  // bitwise AND should return positive value\n  uint64 res = snp_info.anno[setinfo_ID].id & masks[0].id;\n  if( res == 0 ) return false;\n\n  // check aaf/singleton\n  if( n_aaf_bins == 1 ) in_lovo_set = force_singleton ? snp_info.force_singleton : is_singleton;\n  else if(params->set_aaf) in_lovo_set = (snp_info.aaf <= aafs(0));\n  else in_lovo_set = aaf <= aafs(0);\n\n  if(!in_lovo_set) return false;\n\n  if(w_vc_tests) {\n    is_ur = mac <= vc_collapse_MAC;\n    if(aaf > 0.5) to_flip = true;\n  }\n  //cerr << snp_info.aaf  << \" \" << aaf << endl;\n  //if(i==0 && is_singleton) cerr << \"singleton:\" << snp_info.ID << \"\\n\";\n\n  return true;\n\n}\n\n\nvoid GenoMask::buildMask(int const& isnp, int const& chrom, uint32_t const& physpos, struct param const* params, struct filter const* filters, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXd>& ymat, variant_block* snp_data){\n\n  bool non_par = in_non_par(chrom, physpos, params);\n  int lval, nmales = 0;\n  double ds, total = 0, mac = 0, mval, sum_pos;\n\n  MapArXd maskvec (Gtmp.col(isnp).data(), params->n_samples, 1);\n  // reset variant info\n  prep_snp_stats(snp_data, params);\n\n  // if comphet rule, threshold to 2\n  if(take_comphet) maskvec = maskvec.min(2);\n\n  // if dosages were given and writing to PLINK bed, convert dosages to hardcalls\n  if(params->dosage_mode && write_masks) maskvec = maskvec.round();\n\n  // get counts\n  for (int i = 0, index = 0; i < filters->ind_ignore.size(); i++) {\n\n    // skip samples that were ignored from the analysis\n    if( filters->ind_ignore(i) ) continue;\n    ds = 0;\n\n    if( filters->ind_in_analysis(index) ){\n\n      ds = maskvec(index);\n      // distinguish missing from 0 for sum rule\n      if(!w_loo && !take_max && (ds < 0) && non_missing(index,isnp)) \n        ds = 0;\n\n      if( ds >= 0 ){\n        lval = 0, mval = ds;\n        if(params->test_mode && non_par) {\n          lval = (params->sex(index) == 1);\n          mval = ds * 0.5 * (2 - lval);\n        }\n        total += ds;\n        mac += mval;\n        nmales += lval;\n        snp_data->ns1++;\n\n        // counts by trait\n        if(filters->has_missing(index)) update_trait_counts(index, ds, mval, lval, 0, snp_data, masked_indivs);\n\n        /* // get genotype counts (convert to hardcall)\n        if( params->htp_out && (take_max || take_comphet) ) {\n          if(params->test_mode && (chrom == params->nChrom) && (lval>0)) \n            hc_val = (ds < 1 ? 0 : 2);\n          else\n            hc_val = (int) (ds + 0.5); // round to nearest integer (0/1/2)\n          update_genocounts(params->trait_mode==1, index, hc_val, snp_data->genocounts, masked_indivs, ymat);\n        } else*/\n        if( params->af_cc )\n            update_af_cc(index, ds, snp_data, masked_indivs, ymat);\n        if (!params->split_by_pheno){\n          if(ds >= 1.5) snp_data->n_aa++;\n          else if(ds < 0.5) snp_data->n_rr++;\n          else if(non_par && lval){\n            if (ds < 1) snp_data->n_rr++;\n            else snp_data->n_aa++;\n          }\n        }\n\n      }\n    }\n\n    // force masked entries to be 0\n    maskvec(index++) = ds;\n  }\n  //cerr << maskvec.matrix().transpose().array().head(5) << endl << endl << maskvec.mean()<<endl;\n  if(write_masks) make_genovec(isnp, maskvec, filters);\n\n  // check MAC\n  if(!non_par) mac = total; // use MAC assuming diploid coding\n  // get counts by trait \n  snp_data->mac += mac; // aac\n  snp_data->ns += snp_data->ns1; // ns\n\n  // only do this when masks is in [0,2]\n  if(take_max || take_comphet){\n    // get counts by trait \n    snp_data->nmales += nmales; // nmales\n\n    if(!non_par) {\n      mac = min( mac, 2 * snp_data->ns1 - mac );\n      snp_data->mac = snp_data->mac.min( 2 * snp_data->ns.cast<double>() - snp_data->mac );\n    } else {\n      mac = min(mac, 2 * snp_data->ns1 - nmales - mac); // males are 0/1\n      snp_data->mac = snp_data->mac.min( 2 * snp_data->ns.cast<double>() - snp_data->nmales.cast<double>() - snp_data->mac );\n    }\n\n    if(mac < params->min_MAC_mask) { // don't do this with sum mask\n      snp_data->ignored = true; return;\n    }\n  }\n  snp_data->ignored_trait = snp_data->mac < params->min_MAC_mask;\n\n  // get counts by trait \n  snp_data->af += total;\n\n  if(params->af_cc){\n    snp_data->af_control = snp_data->af - snp_data->af_case;\n    snp_data->af_case /= 2 * snp_data->ns_case.cast<double>();\n    snp_data->ns_control = snp_data->ns - snp_data->ns_case;\n    snp_data->af_control /= 2 * snp_data->ns_control.cast<double>();\n  }\n\n  total /= snp_data->ns1;\n  snp_data->af1 = total / 2; // all traits\n  snp_data->af /= 2 * snp_data->ns.cast<double>(); // single trait\n\n  if(!take_max && !take_comphet) {\n    snp_data->af1 /= nsites(isnp); // take average AAF across sites for sum rule\n    snp_data->af /= nsites(isnp); \n    if(params->af_cc){\n      snp_data->af_case /= nsites(isnp);;\n      snp_data->af_control /= nsites(isnp);;\n    }\n  }\n\n  if( params->htp_out && (take_max || take_comphet) ) \n    compute_genocounts(params->trait_mode==1 || params->trait_mode==3, non_par, mac, maskvec, snp_data->genocounts, params->sex, filters->case_control_indices);\n\n  if(params->use_SPA) {\n    // switch to minor allele\n    snp_data->flipped = ((!take_max && !take_comphet) || (params->test_type > 0)) ? false : (total > 1); // skip for DOM/REC test\n\n    if(snp_data->flipped){\n      maskvec = ( maskvec >= 0 ).select( 2 -  maskvec, maskvec);\n      total = 2 - total;\n    }\n  }\n\n  // apply dominant/recessive encoding & recompute mean\n  if(params->test_type > 0){\n    // convert to hard call if it is in dosage form\n    if(params->dosage_mode) maskvec = maskvec.round();\n\n    if(params->test_type == 1){ //dominant\n      maskvec = (maskvec == 2).select(1, maskvec);\n    } else if(params->test_type == 2){ //recessive\n      maskvec = (maskvec >= 1).select(maskvec - 1, maskvec);\n    }\n\n    sum_pos = ((maskvec >= 0) && filters->ind_in_analysis).select(maskvec, 0).sum();\n    if((params->test_type == 2) && (sum_pos < params->minHOMs)) { // filter on homALT carriers\n      snp_data->ignored = true;\n      return;\n    }\n\n    total = sum_pos / snp_data->ns1;\n    if(total < params->numtol) {\n      snp_data->ignored = true;\n      return;\n    }\n  }\n\n  // impute missing\n  mean_impute_g(total, maskvec, filters->ind_in_analysis);\n\n}\n\n\n\n// compute MAF from AAF\nvoid GenoMask::get_mafs(int const& bs, ArrayXd& mafvec, vector<variant_block> const &all_snps_info){\n\n  for(int j = 0; j < bs; j++){\n    mafvec(j) = min( all_snps_info[j].af1, 1 - all_snps_info[j].af1 );\n  }\n\n}\n\n\nvoid GenoMask::write_info(struct param* params, struct filter const* filters, mstream& sout){\n\n  // write fam file\n  write_famfile(params, filters, sout);\n\n  // prepare ofstream for bim file\n  string fname = gfile_prefix + \".bim\";\n  openStream(&outfile_bim, fname, std::ios::out, sout);\n\n  // write magic number to bed file\n  uchar header[3] = {0x6c, 0x1b, 0x01};\n  fname = gfile_prefix + \".bed\";\n  openStream(&outfile_bed, fname, std::ios::out | std::ios::binary, sout);\n  outfile_bed.write( reinterpret_cast<char*> (&header[0]), sizeof(uchar) * 3);\n\n  // number of bytes [=ceil(N/4.0)]\n  gblock_size = (filters->ind_in_analysis.count() + 3) >> 2;\n  reset_gvec();\n\n  // track number of bits empty 0/2/4/6\n  int nbits_left = 2 * (( gblock_size * 4 ) - filters->ind_in_analysis.count());\n  // set last bits to 0 (use this uchar and apply '&' to last byte)\n  last_byte_correction_factor = (1 << (8 - nbits_left)) - 1; \n\n}\n\n// write to fam\nvoid GenoMask::write_famfile(struct param* params, struct filter const* filters, mstream& sout){\n\n  const string fname = gfile_prefix + \".fam\";\n  Files out;\n  out.openForWrite(fname, sout);\n\n  // columns: FID IID FA MO SEX\n  for (int i = 0; i < filters->ind_in_analysis.size(); i++) {\n    if( filters->ind_in_analysis(i) ){\n      out << \n        params->FIDvec[i][0] << \"\\t\" <<\n        params->FIDvec[i][1] << \"\\t\" <<\n        \"0\\t0\\t\" << params->sex(i) << \"\\t-9\\n\";\n    }\n  }\n\n  out.closeFile();\n\n  params->FIDvec.clear();\n}\n\nvoid GenoMask::reset_gvec(){\n  gvec.resize(nmasks_total);\n  for(int i = 0; i < nmasks_total; i++) \n    gvec[i].resize(gblock_size);\n}\n\n// convert to bits\nvoid GenoMask::make_genovec(int const& isnp, Ref<const ArrayXd> mask, struct filter const* filters){\n\n  int byte, bit_start, hc;\n  setAllBitsOne(isnp);\n\n  for(int i = 0, index = 0; i < mask.size(); i++){\n\n    if( !filters->ind_in_analysis(i) ) continue;\n\n    // round to nearest int\n    hc = (int) (mask(i) + 0.5); \n\n    // using 'ref-last':\n    //  00 -> hom. alt\n    //  10 -> missing\n    //  01 -> het\n    //  11 -> hom. ref\n    //  \n    //  so ignore mask=0 since gvec is initialized to 11 for everyone\n    if(hc == 0) {\n      index++;\n      continue;\n    }\n    byte = index >> 2;\n    bit_start = (index & 3) <<1; \n    set_gvalue(isnp, byte, bit_start, hc);\n    index++;\n  }\n\n  // set trailing bits to 0\n  gvec[isnp][gblock_size-1] &= last_byte_correction_factor;\n\n}\n\nvoid GenoMask::setAllBitsZero(int const& isnp){\n  std::fill(gvec[isnp].begin(), gvec[isnp].end(), 0ULL);\n}\nvoid GenoMask::setAllBitsOne(int const& isnp){\n  std::fill(gvec[isnp].begin(), gvec[isnp].end(), ~0ULL);\n}\nvoid GenoMask::set_gvalue(int const& isnp, int const& byte, int const& bit_start, int const& val){\n  // initial value is : 11\n  if(val < 0) BIT_UNSET(gvec[isnp][byte], bit_start + 1);  // set to 10\n  else if(val == 1) BIT_UNSET(gvec[isnp][byte], bit_start); // set to 01 \n  else if(val == 2) gvec[isnp][byte] &= ~(3<<bit_start); // set to 00\n}\nvoid GenoMask::write_genovec(int const& isnp){\n\n  outfile_bed.write( reinterpret_cast<char*> (&gvec[isnp][0]), gblock_size);\n\n}\n\n// get list of indices for each mask (across all AAF bins)\nvoid GenoMask::build_map(map<string,vector<int>>& mask_map){\n\n  for(size_t i = 0; i < masks.size(); i++){\n    vector<int> myints;\n    for(int j = 0; j < n_aaf_bins; j++){\n      // collect indices\n      int index_start = i * n_aaf_bins + j;\n      myints.push_back(index_start);\n    }\n    // insert in map\n    mask_map[ masks[i].name ] = myints;\n  }\n\n}\n\nstd::string GenoMask::build_header(){\n\n  std::ostringstream buffer;\n  size_t const nmask = mask_out.size();\n\n  // header = ##MASKS=<Mask1=\"X,X\";Mask2=\"X,X\";...;MaskK=\"X,X\">\n  buffer << \"##MASKS=<\";\n  for(size_t i = 0; i < nmask; i++)\n    buffer << mask_out[i][0] << \"=\\\"\" << mask_out[i][1] << \"\\\"\" << ((i+1) < nmask ? \";\" : \"\");\n\n  buffer << \">\\n\";\n\n  return buffer.str();\n}\n\n// prep to write list for variants in each set\nvoid GenoMask::prep_snplist(const std::string& prefix, mstream& sout){\n\n  string outfile = prefix + \"_masks.snplist\";\n  sout << \" * writing list of variants for each mask in file [\" << outfile << \"]\\n\";\n  snplist_out.openForWrite(outfile, sout);\n  list_snps.resize(nmasks_total);\n}\n\n// prep to write set list files\nvoid GenoMask::prep_setlists(const std::string& fin, const std::string& prefix, mstream& sout){\n\n  int nfiles = 0, lineread = 0;\n  string line;\n  std::vector< string > tmp_str_vec, suffix;\n  Files myfile;\n\n  myfile.openForRead(fin, sout);\n  sout << \"   +writing new set list files using [\" << fin << \"] \";\n\n  setfiles_index.resize(nmasks_total);\n  map<string,vector<int>> mask_map;\n  build_map(mask_map);\n\n  while( myfile.readLine(line) ){\n\n    lineread++;\n    tmp_str_vec = string_split(line,\"\\t ,\");\n    // file suffix + list of masks to include\n    if( tmp_str_vec.size() < 2 )\n      throw \"line \" + to_string( lineread ) + \" has too few entries.\" ;\n\n    // get index of masks\n    vector<int> mindices;\n    for(size_t i = 1; i < tmp_str_vec.size(); i++){\n      if (!in_map(tmp_str_vec[i], mask_map)) continue;\n      mindices.insert(mindices.end(), mask_map[ tmp_str_vec[i] ].begin(), mask_map[ tmp_str_vec[i] ].end());\n    }\n    // sort and remove duplicates\n    std::sort(mindices.begin(), mindices.end());\n    mindices.erase( unique( mindices.begin(), mindices.end() ), mindices.end() );\n\n    // check at least one mask\n    if(mindices.size() == 0) continue;\n\n    suffix.push_back(tmp_str_vec[0]);\n    for(size_t i = 0; i < mindices.size(); i++)\n      setfiles_index[ mindices[i] ].push_back(nfiles);\n    nfiles++;\n\n    //cerr << suffix.back() << \" -> \" << mindices.size() << endl;\n  }\n\n  if(nfiles < 1) \n    throw \"all set list files have unknown masks.\";\n\n  sout << \" n_files = \" << nfiles << endl;\n  write_setlist = true;\n\n  // open file for writing\n  setfiles.resize(nfiles);\n  for(int i = 0; i < nfiles; i++) {\n    line = prefix + \"_\" + suffix[i] + \".setlist\";\n    setfiles[i] = std::make_shared<Files>();\n    setfiles[i]->openForWrite( line, sout );\n  }\n  list_masks.resize(nfiles);\n\n}\n\n\nvoid GenoMask::write_genobim(struct snp const& tsnp){\n\n  // write mask info to bim file using ref-last\n  // CHR ID 0 BP ALT REF \n  outfile_bim << tsnp.chrom << \"\\t\" << tsnp.ID << \"\\t0\\t\" << tsnp.physpos << \"\\t\" << tsnp.allele2 << \"\\t\" << tsnp.allele1 << endl;\n\n}\n\nvoid GenoMask::append_snplist(int const& imask, ArrayXb const& colkeep, int const& start, vset const& setinfo, vector<snp> const& snpinfo){\n\n  // add snps\n  if( colkeep.count() == 0 ) return;\n\n  for(int k = 0; k < colkeep.size(); k++){\n    if(!colkeep(k)) continue;\n    list_snps[imask].push_back( snpinfo[ setinfo.snp_indices[start + k] ].ID );\n  }\n}\n\nvoid GenoMask::make_snplist(int const& imask, string const& mask_name, int const& chrom, uint32_t const& pos){\n  // add snplist\n  if( list_snps[imask].size() > 0 )\n    snplist_out << mask_name << \"\\t\" << chrom << \"\\t\" << pos << \"\\t\" << print_csv( list_snps[imask] ) << endl;\n}\n\n\nvoid GenoMask::append_setlist(int const& imask, string const& mname){\n  // add mask name\n  for(size_t i = 0; i < setfiles_index[imask].size(); i++)\n    list_masks[ setfiles_index[imask][i] ].push_back(mname);\n}\n\nvoid GenoMask::make_setlist(string const& sname, int const& chr, uint32_t const& pos){\n  for(size_t i = 0; i < setfiles.size(); i++)\n    if( list_masks[i].size() > 0 )// add set name + masks\n      (*setfiles[i]) << sname << \" \" << chr << \" \" << pos << \" \" << print_csv( list_masks[i] ) << endl;\n}\n\nvoid GenoMask::closeFiles(){\n  outfile_bim.close();\n  outfile_bed.close();\n  if(write_setlist){\n    for(size_t i = 0; i < setfiles.size(); i++) \n      setfiles[i]->closeFile();\n  }\n  if(write_snplist) snplist_out.closeFile();\n}\n\n// for LOVO\nArrayXi check_lovo_snplist(const Ref<const ArrayXi>& indices, vector<uint64> const& offsets, vector<snp> const& snpinfo, string const& masks_loo_snpfile){\n\n  int bs = indices.size();\n  ArrayXi lovo_masks;\n\n  if(masks_loo_snpfile == \"\"){\n    lovo_masks = ArrayXi::LinSpaced(bs, 0, bs-1);;\n    return lovo_masks;\n  }\n\n  string line;\n  map<string, bool> comp_lovo_mask;\n  vector<int> lovo_masks_vec;\n  ifstream infile;\n\n  // get list of all variants for which to compute lovo mask\n  infile.open(masks_loo_snpfile);\n  while(getline(infile, line))\n    comp_lovo_mask[ line ] = true;\n  infile.close();\n\n  if(comp_lovo_mask.size() == 0)\n    throw \"no variants were specified in the '--lovo-snplist' file.\";\n\n  // check variants which are in map\n  for(int i = 0; i < indices.size(); i++)\n    if(in_map(snpinfo[ offsets[indices(i)] ].ID, comp_lovo_mask))\n      lovo_masks_vec.push_back( i );\n\n  if(lovo_masks_vec.size() == 0)\n    throw \"none of the genotyped variants are present in the '--lovo-snplist' file.\";\n\n  lovo_masks = ArrayXi::Map(lovo_masks_vec.data(), lovo_masks_vec.size());\n  return lovo_masks;\n}\n\n\n// make seq(0,n-1) removing i-th entry\nArrayXi get_index_vec_loo(int const& i, int const& n){\n\n  ArrayXi iseq ( n -1 );\n  for(int j = 0, k = 0; j < n; j++){\n    if(j != i) iseq(k++) = j;\n  }\n\n  return iseq;\n}\n"
  },
  {
    "path": "src/Masks.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef MASK_H\n#define MASK_H\n\n\nclass GenoMask {\n\n  public:\n    std::map <std::string, anno_name> annotations; // store identifier as 1 byte vector\n    std::map <std::string, std::map <std::string, uint64>> regions; // store identifier as 1 byte vector\n    std::vector <maskinfo> masks, base_masks;\n    std::vector <std::vector <std::string>> mask_out, list_masks;//contains mask info\n    Eigen::VectorXd aafs;\n    Eigen::ArrayXi nsites;\n    ArrayXb colset;\n    MatrixXb keepaaf, keepmask, non_missing;\n    Eigen::MatrixXd Gtmp; // holds mask\n    std::ofstream outfile_bed, outfile_bim;\n    std::vector<std::shared_ptr<Files>> setfiles;// for written setlist\n    std::vector<std::vector<int>> setfiles_index;// for written setlist\n    Files snplist_out;\n    std::vector <std::vector <std::string>> list_snps;//contains snplist\n\n    double tol = 1e-6;\n    double minAAF = 1e-7, default_aaf = .01;\n    int n_aaf_bins, max_aaf_bins = 12, nmasks_total;\n    int n_mask_pass = 0; // number of masks generated\n    bool write_setlist = false, write_masks = false, write_snplist = false, verbose = false;\n    bool w_regions = false, w_loo = false, w_lodo = false, w_vc_tests = false, w_vc_cust_weights = false;\n    bool take_max = true, take_comphet = false; // either max comphet or sum\n    bool force_singleton = false; // allow user to specify singleton variants\n    std::string gfile_prefix;\n    uint64 gblock_size; // number of bytes to use for bed file format\n    double max_aaf = -1, vc_aaf, vc_collapse_MAC; // maximum AAF to consider\n    uint64 all_masks = 0ULL; // keep track of all annotations considered in analysis\n    std::vector<std::vector<uchar>> gvec;\n    uchar last_byte_correction_factor = 0u;\n\n    bool remeta_save_ld = false;\n    std::vector<std::string> remeta_snplist; // list of snps contained in any mask\n\n    // functions\n    void prep_run(struct param&,struct in_files const&);\n    void setBins(struct param*,mstream&);\n    void prepMasks(const int&,const std::string&);\n    void set_snp_masks(const int&,const int&,std::vector<variant_block> const &,vset const&,std::vector<snp>&,mstream&);\n    void set_snp_aafs(const int&,const int&,const bool&,std::vector<variant_block> const&,vset&,std::vector<snp> const&,mstream&);\n    bool check_in_lovo_mask(const Eigen::Ref<const Eigen::ArrayXd>&,struct filter const&,std::string const&,snp&,bool&,bool&,double&,int const&,struct param const*);\n    void updateMasks(const int&,const int&,struct param*,struct filter*,const Eigen::Ref<const MatrixXb>&,struct geno_block*,const Eigen::Ref<const Eigen::ArrayXd>&,std::vector<variant_block>&,vset&,std::vector<snp>&,mstream&);\n    void apply_rule(SpVec&,SpVec const&,const Eigen::Ref<const ArrayXb>&,bool const&);\n    void apply_rule(Eigen::Ref<Eigen::ArrayXd>,SpVec const&,const Eigen::Ref<const ArrayXb>&,bool const&);\n    void apply_rule(Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const ArrayXb>&,bool const&);\n    void collapse_mask_chunk(const Eigen::Ref<const Eigen::ArrayXi>&,SpMat const&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const ArrayXb>&);\n    void updateMasks_loo(const Eigen::Ref<const Eigen::ArrayXi>&,bool const&,SpMat const&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,vset&,int const&);\n    void tally_masks(struct param const*,struct filter const*,const Eigen::Ref<const MatrixXb>&,SpMat&,MatrixXb&);\n    void computeMasks(struct param*,struct filter*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct geno_block*,std::vector<variant_block>&,vset&,std::vector<snp>&,mstream&);\n    void computeMasks_loo(const Eigen::Ref<const Eigen::ArrayXi>&,bool const&,struct param*,struct filter*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct geno_block*,std::vector<variant_block>&,vset&,std::vector<snp>&,mstream&);\n    void buildMask(const int&,const int&,uint32_t const&,struct param const*,struct filter const*,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,variant_block*);\n\n    void get_mafs(const int&,Eigen::ArrayXd&,std::vector<variant_block> const&);\n\n    void write_info(struct param*,struct filter const*,mstream&);\n    void write_famfile(struct param*,struct filter const*,mstream&);\n    void reset_gvec();\n    void make_genovec(const int&,Eigen::Ref<const Eigen::ArrayXd>,struct filter const*);\n    void write_genovec(const int&);\n    void set_gvalue(const int&,const int&,const int&,const int&);\n    void write_genobim(struct snp const&);\n    void setAllBitsZero(const int&);\n    void setAllBitsOne(const int&);\n    std::string build_header();\n    void build_map(std::map<std::string,std::vector<int>>&);\n    void prep_snplist(const std::string&,mstream& sout);\n    void append_snplist(int const&,ArrayXb const&, int const&,vset const&,std::vector<snp> const&);\n    void make_snplist(int const&,std::string const&,int const&,uint32_t const&);\n    void prep_setlists(const std::string&,const std::string&,mstream& sout);\n    void append_setlist(int const&,std::string const&);\n    void make_setlist(std::string const&,int const&,uint32_t const&);\n    void closeFiles();\n\n\n    GenoMask();\n    ~GenoMask();\n\n};\n\nEigen::ArrayXi check_lovo_snplist(const Eigen::Ref<const Eigen::ArrayXi>&,std::vector<uint64> const&,std::vector<snp> const&,std::string const&);\nEigen::ArrayXi get_index_vec_loo(int const&,int const&);\n\n#endif\n"
  },
  {
    "path": "src/MultiTrait_Tests.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Pheno.hpp\"\n#include \"SKAT.hpp\" // get_lambdas\n#include \"Joint_Tests.hpp\" // get_acat\n#include \"MCC.hpp\"\n\n#include \"MultiTrait_Tests.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\n\n\ndouble get_fisher_robust(const Eigen::Ref<const ArrayXd>& logp)\n{\n  double stat = 2.0 * log(10) * logp.sum();\n  // get logp from chisq(k)\n  double logp_fisher;\n  get_logp(logp_fisher, stat, 2 * logp.size());\n\n  return(logp_fisher);\n}\n\n//------------------------\n// Class MTestsResults\n//------------------------\n\nMTestsResults::MTestsResults(unsigned int n_tests_, unsigned int q_, unsigned int M_) \n{\n  setup(n_tests_, q_, M_);\n}\n\nvoid MTestsResults::setup(unsigned int n_tests_, unsigned int q_, unsigned int M_) \n{\n  // assign\n  n_tests = n_tests_;\n  q = q_;\n  M = M_;\n  // allocate\n  stats_mt.resize(n_tests);\n  logp_mt.resize(n_tests);\n  logp_univar.resize(n_tests);\n}\n\nMTestsResults::~MTestsResults() { }\n\n//---------------\n// Class MTests\n//---------------\n\nMTests::MTests() \n{\n  setup();\n}\n\nMTests::~MTests() { }\n\nvoid MTests::setup() \n{\n  verbose = 0;\n  precomp = false;\n\n  mcc_skew_abs = 1.0;\n  mcc_z2 = 4; // ~ p-value = 0.05\n\n  n_tests = MULTITRAIT_N_TESTS;\n  n_traits = 0;\n  Neff0 = 0.0;\n\n  // bayes test parameters\n  prior_a0 = 6.0;\n  prior_Q0 = 4.0;\n  prior_Mbeta0 = 0.0;\n  prior_Vbeta0 = 0.02;\n\n  // NNLS\n  nnls = NNLS();\n}\n\nvoid MTests::check_setup_no_data() \n{\n  if(n_tests != MULTITRAIT_N_TESTS) { throw std::runtime_error(\"check_setup_no_data: n_tests != MULTITRAIT_N_TESTS\"); }\n}\n\nvoid MTests::check_setup_data() \n{\n  if(n_traits == 0) { throw std::runtime_error(\"check_setup_data: n_traits == 0\"); }\n  if(Mask.cols() == 0) { throw std::runtime_error(\"check_setup_data: Mask.cols() == 0\"); }\n  if(Mask.cols() != n_traits) { throw std::runtime_error(\"check_setup_data: Mask.cols() != n_traits\"); }\n  if(Neff.size() == 0) { throw std::runtime_error(\"check_setup_data: Neff.size() == 0\"); }\n  if(Neff.size() != n_traits) { throw std::runtime_error(\"check_setup_data: Neff.size() != n_traits\"); }\n}\n\nvoid MTests::setup_masks(const MatrixXb &masked_indivs)\n{\n  // check dimensions\n  if(masked_indivs.rows() == 0) { throw std::runtime_error(\"setup_masks: masked_indivs.rows() == 0\"); }\n  if(masked_indivs.cols() == 0) { throw std::runtime_error(\"setup_masks: masked_indivs.cols() == 0\"); }\n\n  Mask = masked_indivs;\n  Neff = Mask.cast<double>().colwise().sum(); \n\n  Mask0 = Mask.col(0);\n  for(unsigned int i = 1; i < Mask.cols(); i++) {\n    Mask0.col(0).array() = Mask0.col(0).array() || Mask.col(i).array();\n  }\n  Neff0 = Mask0.cast<double>().sum();\n}\n\nvoid MTests::setup_yres(const MatrixXd &res)\n{\n  // check dimensions\n  if(res.rows() == 0) { throw std::runtime_error(\"setup_yres: res.rows() == 0\"); }\n  if(res.cols() == 0) { throw std::runtime_error(\"setup_yres: res.cols() == 0\"); }\n  // check masks were setup upstream\n  if(Neff0 == 0.0) { throw std::runtime_error(\"setup_yres: Neff0 == 0\"); }\n\n  // set up #traits\n  n_traits = res.cols();\n\n  // Y: n x q matrix of traits\n  Yres = res;\n  Y0res = res;\n  Y0res.array().colwise() *= Mask0.col(0).array().cast<double>().array();\n\n  // cross-product YtY\n  precomp0_YtY = MatrixXd(n_traits, n_traits).setZero().selfadjointView<Lower>().rankUpdate(Y0res.adjoint());\n  // Syy = covariance of Y\n  precomp0_Syy = precomp0_YtY / (Neff0 - 1.0);\n  precomp0_Syy_inv = precomp0_Syy.llt().solve(MatrixXd::Identity(n_traits, n_traits));\n  // MANOVA-specific \n  precomp0_ld0= precomp0_YtY.ldlt().vectorD().array().log().sum();\n  // Bayes-specific \n  VectorXd Mbeta_0 = VectorXd::Constant(n_traits, prior_Mbeta0);\n  MatrixXd Q0 = MatrixXd::Constant(n_traits, n_traits, 0.0);\n  Q0.diagonal().array() = prior_Q0;\n  double ld = (Q0 + precomp0_YtY).ldlt().vectorD().array().log().sum();\n  precomp0_LL_M0 =  0.5* (double)(n_traits) * log(prior_Vbeta0) - 0.5*(Neff0 + prior_a0 + (double)(n_traits) - 1.0) * ld;\n  // NNLS\n  nnls.ss_weights(precomp0_Syy);\n  // Hiearachical Omnibus\n  VectorXd lambdas = VectorXd::Zero(n_traits);\n  get_lambdas(lambdas, precomp0_Syy, 1e-5);\n  precomp0_lambdas_Syy = lambdas;\n  // normalize eigen values: l_i = l_i / sum(l_i)\n  lambdas /= lambdas.sum();\n  precomp0_lambdas_norm_Syy = lambdas;\n  // Robust Omnibus\n  SelfAdjointEigenSolver<MatrixXd> es(precomp0_Syy);\n  PC_Y0res = Y0res * es.eigenvectors();\n  PC_Y0res.array().colwise() *= Mask0.col(0).array().cast<double>().array();\n  // skewness of PCs\n  compute_skew_pc();\n  // RINT PCs\n  RPC_Y0res = PC_Y0res;\n  for(unsigned int i = 0; i < RPC_Y0res.cols(); i++) {\n    MatrixXd rpc_col = RPC_Y0res.col(i);\n    rint_pheno(rpc_col, Mask0);\n    RPC_Y0res.col(i) = rpc_col;\n  }\n}\n\n//----------------------------------\n// Main function to apply tests\n//----------------------------------\n\nMTestsResults MTests::run_tests_snp(\n    int const& isnp, struct geno_block& gblock,\n    const Ref<const MatrixXd>& yres, const Ref<const RowVectorXd>& p_sd_yres, \n    struct param const& params)\n{\n  check_setup_data();\n\n  MapMatXd Gmat(gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n\n  MTestsResults mt_results(n_tests, n_traits, Gmat.cols());\n\n  assoc_manova(yres, Gmat, mt_results);\n  assoc_omnibus0(yres, Gmat, mt_results);\n  assoc_bayes(yres, Gmat, mt_results);\n\n  // debug\n  if(verbose > 2) {\n    /* dump(yres, Gmat); */\n    const static IOFormat CSVFormat(StreamPrecision, DontAlignCols, \", \", \"\\n\");\n    // dump y\n    ofstream file_y(\"mt.Y.txt\");\n    file_y << yres.format(CSVFormat);\n    file_y.close();\n    // dump X\n    ofstream file_X(\"mt.X.txt\");\n    file_X << Gmat.format(CSVFormat);\n    file_X.close();\n    // dump masks M\n    ofstream file_M(\"mt.M.txt\");\n    file_M << Mask.array().cast<double>().matrix().format(CSVFormat);\n    file_M.close();\n    // dump masks M0\n    ofstream file_M0(\"mt.M0.txt\");\n    file_M0 << Mask0.array().cast<double>().matrix().format(CSVFormat);\n    file_M0.close();\n  }\n\n  return(mt_results);\n}\n\nMTestsResults MTests::run_tests_snp_precomp(\n    int const& isnp, struct geno_block& gblock,\n    struct param const& params)\n{\n  check_setup_data();\n\n  MapMatXd Gmat(gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n\n  MTestsResults mt_results(n_tests, n_traits, Gmat.cols());\n\n  assoc_manova(Y0res, Gmat, mt_results);\n  assoc_omnibus0(Y0res, Gmat, mt_results);\n  assoc_cpc0(PC_Y0res, Gmat, mt_results);\n  assoc_rcpc0(RPC_Y0res, Gmat, mt_results);\n  assoc_bayes(Y0res, Gmat, mt_results);\n  assoc_nnls0(Y0res, Gmat, mt_results);\n  \n  // debug\n  if(verbose > 2) {\n    dump_data(Y0res, Gmat, Mask, Mask0);\n  }\n\n  return(mt_results);\n}\n\n//----------------------------------\n// MANOVA test\n//----------------------------------\n\n// Association scan based on MANOVA\nvoid MTests::assoc_manova(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults& mt_results)\n{\n  unsigned pos_test = 0;\n  unsigned pos_test_npmanova = 5;\n\n  // check dimensions\n  if(Y.rows() == 0) { throw std::runtime_error(\"MANOVA: Y.rows() == 0\"); }\n  if(Y.cols() == 0) { throw std::runtime_error(\"MANOVA: Y.cols() == 0\"); }\n  if(G.rows() == 0) { throw std::runtime_error(\"MANOVA: G.rows() == 0\"); }\n  if(G.cols() == 0) { throw std::runtime_error(\"MANOVA: G.cols() == 0\"); }\n\n  // dimensions\n  unsigned int N = Y.rows();\n  unsigned int q = Y.cols();\n  unsigned int M = G.cols();\n\n  double N_data = Neff0;\n\n  // check dimensions\n  if(Y.cols() != n_traits) { throw std::runtime_error(\"MANOVA: Y.cols() != n_traits\"); }\n  if(G.rows() != N) { throw std::runtime_error(\"MANOVA: #rows in G != N\"); }\n  if(M < 1) { throw std::runtime_error(\"MANOVA: M < 1\"); }\n  if(q < 1) { throw std::runtime_error(\"MANOVA: q < 1\"); }\n\n  // pre-compute matrix products\n  MatrixXd YtY(q, q);\n  if(precomp) YtY = precomp0_YtY;\n  else YtY = MatrixXd(q, q).setZero().selfadjointView<Lower>().rankUpdate(Y.adjoint());\n\n  VectorXd G2 = G.colwise().squaredNorm();\n\n  // Estimates of beta\n  // Bhat, q x M matrix\n  MatrixXd Bhat = (Y.transpose() * G).\n    array().rowwise() / G2.array().transpose();\n\n  // loop over M variants\n  // test statistic for variant i: (q/2 - N + 1) * log(Wi)\n  // where Wi = det(E1) / det(E0) is a Wilkstest statistic for MANOVA\n  mt_results.stats_mt[pos_test].resize(M);\n  mt_results.logp_mt[pos_test].resize(M);\n  mt_results.stats_mt[pos_test_npmanova].resize(M);\n  mt_results.logp_mt[pos_test_npmanova].resize(M);\n\n  double ld0;\n  if(precomp) ld0 = precomp0_ld0;\n  else ld0 = YtY.ldlt().vectorD().array().log().sum();\n  \n  MatrixXd E, zzt;\n  VectorXd b;\n  double ld1, stat_i, pval_i;\n  boost::math::chi_squared dist(q);\n  for(unsigned int i = 0; i < M; i++) {\n    // 1. MANOVA\n    b = Bhat.col(i);\n    zzt = b * b.transpose() * G2[i];\n    E = YtY - zzt;\n    ld1 = E.ldlt().vectorD().array().log().sum();\n    // error: ld1 > ld0\n    if(ld1 > ld0) { throw std::runtime_error(\"MANOVA: log(det(E1)) > log(det(E0))\"); }\n    // MANOVA test statistics & p-value\n    stat_i = ((double)(q) / 2 - N_data + 1.0) * (ld1 - ld0);\n    pval_i = boost::math::cdf(boost::math::complement(dist, stat_i));\n    // store test results\n    mt_results.stats_mt[pos_test][i] = stat_i;\n    mt_results.logp_mt[pos_test][i] = -log10(pval_i);\n\n    // 2. NPMANOVA\n    // NPMANOVA test statistic (pseudo F statistic)\n    unsigned int p0 = 0; // no covariates projected out\n    double mean_SS_E = E.diagonal().sum() / (N_data - (double)(p0) - 1.0);\n    double mean_SS_T = zzt.diagonal().sum(); // df_T = 1, i.e., one SNP is tested (M1)\n    stat_i = mean_SS_T / mean_SS_E; // F tilde in notation of PERMANOVA\n    // EVD on YtY\n    VectorXd lambdas = VectorXd::Zero(q);\n    if(precomp) {\n      lambdas = precomp0_lambdas_norm_Syy;\n    } else {\n      MatrixXd Sy(q, q);\n      Sy = YtY / (Neff0 - 1.0);\n      VectorXd lambdas = VectorXd::Zero(q);\n      get_lambdas(lambdas, Sy, 1e-5);\n      // normalize eigen values: l_i = l_i / sum(l_i)\n      lambdas /= lambdas.sum();\n    }\n    // NPMANOVA p-value\n    // v1\n    /* pval_i =  get_chisq_mix_pv(stat_i, lambdas); */\n    // v2\n    // re-scale so that max lambda is 1 (lambda is sorted)\n    double newQ = stat_i / lambdas.tail(1)(0);\n    VectorXd newL = lambdas / lambdas.tail(1)(0);\n    pval_i = get_kuonen_pv(newQ, newL); // SPA\n    if(pval_i <= 0) {// if SPA failed\n       pval_i = get_liu_pv(newQ, newL); // only use mod Liu if Davies/SPA failed\n    }\n    // store test results\n    mt_results.stats_mt[pos_test_npmanova][i] = stat_i;\n    mt_results.logp_mt[pos_test_npmanova][i] = -log10(pval_i);\n  }\n  \n} \n\n//----------------------------------------\n// NNLS0 test (complete sample overlap)\n//----------------------------------------\n\n// Association NNLS0 scan usign summary stats from NNLS0\nvoid MTests::assoc_nnls0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults& mt_results)\n{\n  unsigned pos_test = 3;\n\n  // check bhat\n  if(mt_results.zscore_univar.size() == 0) { throw std::runtime_error(\"assoc_nnls0: bhat\"); }\n\n  // dimensions\n  unsigned int q = Y.cols();\n  unsigned int M = G.cols();\n\n  // pre-compute matrix products\n  MatrixXd Sy(q,q);\n  if(precomp) {\n    Sy = precomp0_Syy; \n  } else {\n    Sy = MatrixXd(q, q).setZero().selfadjointView<Lower>().rankUpdate(Y.adjoint());\n    Sy /= (Neff0 - 1.0);\n  }\n\n  MatrixXd Sy_inv;\n  if(precomp) Sy_inv = precomp0_Syy_inv;\n  else Sy_inv = Sy.llt().solve(MatrixXd::Identity(q, q));\n\n  // loop over M variants\n  // test statistic for variant i: z' S^{-1} z\n  mt_results.stats_mt[pos_test].resize(M);\n  mt_results.logp_mt[pos_test].resize(M);\n\n  VectorXd z(q);\n  NNLS nnls0 = nnls;\n  for(unsigned int i = 0; i < M; i++) {\n    for(unsigned int j = 0; j < q; j++) {\n      z[j] = mt_results.zscore_univar[i][j]; // dim 1 = variants; dim 2 = traits\n    }\n    \n    nnls0.ss_run(z);\n\n    mt_results.stats_mt[pos_test][i] = 0;\n    mt_results.logp_mt[pos_test][i] = -log10(nnls0.pval_min2);\n  }\n} \n\n//----------------------------------------\n// Omnibus0 test (complete sample overlap)\n//----------------------------------------\n\n// Association scan based on Omnibus + complete sample overlap\nvoid MTests::assoc_omnibus0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults& mt_results)\n{\n  unsigned pos_test = 1;\n  unsigned pos_test_sumz = 4;\n  unsigned pos_test_homnibus = 6;\n\n  // check dimensions\n  if(Y.rows() == 0) { throw std::runtime_error(\"Omnibus0: Y.rows() == 0\"); }\n  if(Y.cols() == 0) { throw std::runtime_error(\"Omnibus0: Y.cols() == 0\"); }\n  if(G.rows() == 0) { throw std::runtime_error(\"Omnibus0: G.rows() == 0\"); }\n  if(G.cols() == 0) { throw std::runtime_error(\"Omnibus0: G.cols() == 0\"); }\n\n  // dimensions\n  unsigned int N = Y.rows();\n  unsigned int q = Y.cols();\n  unsigned int M = G.cols();\n\n  double N_data = Neff0;\n\n  // check dimensions\n  if(Y.cols() != n_traits) { throw std::runtime_error(\"Omnibus0: Y.cols() != n_traits\"); }\n  if(G.rows() != N) { throw std::runtime_error(\"Omnibus0: #rows in G != N\"); }\n  if(M < 1) { throw std::runtime_error(\"Omnibus0: M < 1\"); }\n  if(q < 1) { throw std::runtime_error(\"Omnibus0: q < 1\"); }\n\n  // pre-compute matrix products\n  MatrixXd Sy(q,q);\n  if(precomp) {\n    Sy = precomp0_Syy; \n  } else {\n    Sy = MatrixXd(q, q).setZero().selfadjointView<Lower>().rankUpdate(Y.adjoint());\n    Sy /= (Neff0 - 1.0);\n  }\n\n  MatrixXd Sy_inv;\n  if(precomp) Sy_inv = precomp0_Syy_inv;\n  else Sy_inv = Sy.llt().solve(MatrixXd::Identity(q, q));\n  \n  VectorXd lambdas = VectorXd::Zero(q);\n  if(precomp) {\n      lambdas = precomp0_lambdas_Syy;\n  } else {\n    VectorXd lambdas = VectorXd::Zero(q);\n    get_lambdas(lambdas, Sy, 1e-5);\n  }\n\n  VectorXd G2 = G.colwise().squaredNorm();\n\n  // Marginal Z-scores, q x M matrix\n  MatrixXd Z(q, M); //, B(q, M);\n  VectorXd bhat(M), s2(M);\n  for(unsigned i = 0; i < q; i++) {\n    bhat = (Y.col(i).transpose() * G).array().rowwise() / G2.array().transpose();\n    /* B.row(i) = bhat; */\n    // residuals, s2\n    s2 = (((G.array().rowwise() * bhat.array().transpose()). // predicted yp = X bhat\n      colwise() - Y.col(i).array()). // residuals = y - yp\n      matrix().colwise().squaredNorm()). // residuals^2\n      array() / (N_data - 1.0); // s2 = residuals^2 / (N - 1)\n    Z.row(i) = bhat.array() * (G2.array() / s2.array()).sqrt();\n  }\n\n  // loop over M variants\n  // test statistic for variant i: z' S^{-1} z\n  mt_results.logp_univar[pos_test].resize(M);\n  mt_results.zscore_univar.resize(M);\n  mt_results.stats_mt[pos_test].resize(M);\n  mt_results.logp_mt[pos_test].resize(M);\n  mt_results.stats_mt[pos_test_sumz].resize(M);\n  mt_results.logp_mt[pos_test_sumz].resize(M);\n  mt_results.stats_mt[pos_test_homnibus].resize(M);\n  mt_results.logp_mt[pos_test_homnibus].resize(M);\n\n  VectorXd z(q); //, b(q);\n  double stat_univar, pval_univar, stat_i, pval_i;\n  boost::math::chi_squared dist(q);\n  boost::math::chi_squared dist_univar(1);\n  for(unsigned int i = 0; i < M; i++) {\n    z = Z.col(i);\n    /* b = B.col(i); */\n\n    // univar. tests (single traits)\n    mt_results.logp_univar[pos_test][i].resize(q); // dim 1 = mt; dim 2 = variants; dim 3 = traits\n    mt_results.zscore_univar[i].resize(q); // dim 1 = variants; dim 2 = traits\n    for(unsigned int j = 0; j < q; j++) {\n      stat_univar = z[j] * z[j];\n      pval_univar = boost::math::cdf(boost::math::complement(dist_univar, stat_univar));\n      mt_results.logp_univar[pos_test][i][j] = -log10(pval_univar);\n      mt_results.zscore_univar[i][j] = z[j];\n\n    }\n    // multi-trait test: Omnibus \n    stat_i = z.transpose() * Sy_inv * z;\n    /* double stat_i = b.transpose() * Sy_inv * b; */\n    pval_i = boost::math::cdf(boost::math::complement(dist, stat_i));\n\n    mt_results.stats_mt[pos_test][i] = stat_i;\n    mt_results.logp_mt[pos_test][i] = -log10(pval_i);\n    \n    // multi-trait test: SumZ with T = sum(Z)^2 / sum(V) \n    stat_i = z.sum();\n    stat_i = stat_i*stat_i / Sy.sum();\n    pval_i = boost::math::cdf(boost::math::complement(dist_univar, stat_univar));\n\n    mt_results.stats_mt[pos_test_sumz][i] = stat_i;\n    mt_results.logp_mt[pos_test_sumz][i] = -log10(pval_i);\n\n    // hOmnibus\n    stat_i = z.transpose() * z;\n    double newQ = stat_i / lambdas.tail(1)(0);\n    VectorXd newL = lambdas / lambdas.tail(1)(0);\n    pval_i = get_kuonen_pv(newQ, newL); // SPA\n    if(pval_i <= 0) {// if SPA failed\n       pval_i = get_liu_pv(newQ, newL); // only use mod Liu if Davies/SPA failed\n    }\n    mt_results.stats_mt[pos_test_homnibus][i] = pval_i;\n    mt_results.logp_mt[pos_test_homnibus][i] = -log10(pval_i);\n  }\n} \n\n//----------------------------------------\n// CPC0 test (0 = complete sample overlap)\n//----------------------------------------\n\n// Association scan based on PCs + complete sample overlap\nvoid MTests::assoc_cpc0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults& mt_results)\n{\n  unsigned pos_test = 7;\n  unsigned pos_test_acpc_sumchi2 = 11, pos_test_acpc_fisher = 12, pos_test_acpc_acat = 13;\n\n  // check dimensions\n  if(Y.rows() == 0) { throw std::runtime_error(\"CPC0: Y.rows() == 0\"); }\n  if(Y.cols() == 0) { throw std::runtime_error(\"CPC0: Y.cols() == 0\"); }\n  if(G.rows() == 0) { throw std::runtime_error(\"CPC0: G.rows() == 0\"); }\n  if(G.cols() == 0) { throw std::runtime_error(\"CPC0: G.cols() == 0\"); }\n\n  // dimensions\n  unsigned int N = Y.rows();\n  unsigned int q = Y.cols();\n  unsigned int M = G.cols();\n\n  double N_data = Neff0;\n\n  // check dimensions\n  if(Y.cols() != n_traits) { throw std::runtime_error(\"Omnibus0: Y.cols() != n_traits\"); }\n  if(G.rows() != N) { throw std::runtime_error(\"Omnibus0: #rows in G != N\"); }\n  if(M < 1) { throw std::runtime_error(\"Omnibus0: M < 1\"); }\n  if(q < 1) { throw std::runtime_error(\"Omnibus0: q < 1\"); }\n\n  // pre-compute matrix products\n  VectorXd G2 = G.colwise().squaredNorm();\n\n  // Marginal Z-scores, q x M matrix\n  MatrixXd Z(q, M); //, B(q, M);\n  VectorXd bhat(M), s2(M);\n  for(unsigned i = 0; i < q; i++) {\n    bhat = (Y.col(i).transpose() * G).array().rowwise() / G2.array().transpose();\n    /* B.row(i) = bhat; */\n    // residuals, s2\n    s2 = (((G.array().rowwise() * bhat.array().transpose()). // predicted yp = X bhat\n      colwise() - Y.col(i).array()). // residuals = y - yp\n      matrix().colwise().squaredNorm()). // residuals^2\n      array() / (N_data - 1.0); // s2 = residuals^2 / (N - 1)\n    Z.row(i) = bhat.array() * (G2.array() / s2.array()).sqrt();\n  }\n\n  // loop over M variants\n  // test statistic for variant i: stat(CPC) = Sum z^2\n  mt_results.stats_mt[pos_test].resize(M);\n  mt_results.logp_mt[pos_test].resize(M);\n  mt_results.logp_mt[pos_test_acpc_sumchi2].resize(M);\n  mt_results.logp_mt[pos_test_acpc_fisher].resize(M);\n  mt_results.logp_mt[pos_test_acpc_acat].resize(M);\n  mt_results.zscore_cpc.resize(M);\n  mt_results.zscore_acpc.resize(M);\n\n  VectorXd z(q); //, b(q);\n  double stat_i, pval_i, logp_i;\n  boost::math::chi_squared dist(q), dist_univar(1);\n  VectorXd logp_univar(q);\n  MCC mcc;\n  boost::math::chi_squared chisq(1);\n  for(unsigned int i = 0; i < M; i++) {\n    z = Z.col(i);\n\n    // univar. tests (single traits)\n    for(unsigned int j = 0; j < q; j++) {\n      logp_univar[j] = -log10(boost::math::cdf(boost::math::complement(dist_univar, z[j]*z[j])));\n    }\n\n    // store z-scores\n    mt_results.zscore_cpc[i].resize(q); // dim 1 = variants; dim 2 = traits\n    Map<VectorXd>(mt_results.zscore_cpc[i].data(), q) = z;\n\n    stat_i = z.transpose() * z;\n    pval_i = boost::math::cdf(boost::math::complement(dist, stat_i));\n\n    // store CPC results\n    mt_results.stats_mt[pos_test][i] = stat_i;\n    logp_i = -log10(pval_i);\n    mt_results.logp_mt[pos_test][i] = logp_i;\n\n    // ACPC\n    mt_results.zscore_acpc[i].resize(q); // dim 1 = variants; dim 2 = traits\n    Map<VectorXd>(mt_results.zscore_acpc[i].data(), q) = z;\n    // adjust CPC?\n    bool mcc_failed = false;\n    double z2j, z2j_adj;\n    if(n_skewed_pc) {\n      if((z.array().square() > mcc_z2).any()) {\n        for(unsigned int j = 0; j < q; j++) {\n          z2j = z[j]*z[j];\n          if((skew_PC[j] > mcc_skew_abs) && z2j > mcc_z2) { \n            // adjust\n            mcc.setup_y(Mask0, PC_Y0res.col(j), 1); // ncov analyzed = 1\n            MCCResults mcc_results_i = mcc.run(G.col(i));\n            if(mcc_results_i.Skip(0, 0)) {\n              mcc_failed = true;\n              break;\n            }\n            logp_univar[j] = -log10(mcc_results_i.Pval(0, 0));\n            // adjust Z-score\n            z2j_adj = boost::math::quantile(boost::math::complement(chisq, mcc_results_i.Pval(0, 0)));\n            mt_results.zscore_acpc[i][j] *= sqrt(z2j_adj / z2j);\n          } else { \n            // don't adjust & PC results\n            get_logp(logp_univar[j], z2j);\n          }\n        }\n      }\n    }\n    // check for MCC failure & combine p-values & store\n    if(mcc_failed) {\n      // !NB! how to encode NA p-values?\n      mt_results.logp_mt[pos_test_acpc_sumchi2][i] = -9;\n      mt_results.logp_mt[pos_test_acpc_fisher][i] = -9;\n      mt_results.logp_mt[pos_test_acpc_acat][i] = -9;\n    } else {\n      // sum chi2\n      stat_i = Map<ArrayXd>(mt_results.zscore_acpc[i].data(), q).square().sum();\n      get_logp(logp_i, stat_i, q);\n      mt_results.logp_mt[pos_test_acpc_sumchi2][i] = logp_i;\n      // Fisher\n      logp_i = get_fisher_robust(logp_univar);\n      mt_results.logp_mt[pos_test_acpc_fisher][i] = logp_i;\n      // ACAT\n      logp_i = get_acat(logp_univar);\n      mt_results.logp_mt[pos_test_acpc_acat][i] = logp_i;\n    }\n  }\n} \n\n//------------------------------------------------\n// Robust CPC0 test (0 = complete sample overlap)\n//------------------------------------------------\n\n// Association scan based on Robust PCs + complete sample overlap\nvoid MTests::assoc_rcpc0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults& mt_results)\n{\n  unsigned pos_test_sumchi2 = 8, pos_test_fisher = 9, pos_test_acat = 10;\n\n  // check dimensions\n  if(Y.rows() == 0) { throw std::runtime_error(\"RCPC0: Y.rows() == 0\"); }\n  if(Y.cols() == 0) { throw std::runtime_error(\"RCPC0: Y.cols() == 0\"); }\n  if(G.rows() == 0) { throw std::runtime_error(\"RCPC0: G.rows() == 0\"); }\n  if(G.cols() == 0) { throw std::runtime_error(\"RCPC0: G.cols() == 0\"); }\n\n  // dimensions\n  unsigned int N = Y.rows();\n  unsigned int q = Y.cols();\n  unsigned int M = G.cols();\n\n  double N_data = Neff0;\n\n  // check dimensions\n  if(Y.cols() != n_traits) { throw std::runtime_error(\"Omnibus0: Y.cols() != n_traits\"); }\n  if(G.rows() != N) { throw std::runtime_error(\"Omnibus0: #rows in G != N\"); }\n  if(M < 1) { throw std::runtime_error(\"Omnibus0: M < 1\"); }\n  if(q < 1) { throw std::runtime_error(\"Omnibus0: q < 1\"); }\n\n  // pre-compute matrix products\n  VectorXd G2 = G.colwise().squaredNorm();\n\n  // Marginal Z-scores, q x M matrix\n  MatrixXd Z(q, M); //, B(q, M);\n  VectorXd bhat(M), s2(M);\n  for(unsigned i = 0; i < q; i++) {\n    bhat = (Y.col(i).transpose() * G).array().rowwise() / G2.array().transpose();\n    // residuals, s2\n    s2 = (((G.array().rowwise() * bhat.array().transpose()). // predicted yp = X bhat\n      colwise() - Y.col(i).array()). // residuals = y - yp\n      matrix().colwise().squaredNorm()). // residuals^2\n      array() / (N_data - 1.0); // s2 = residuals^2 / (N - 1)\n    Z.row(i) = bhat.array() * (G2.array() / s2.array()).sqrt();\n  }\n\n  // loop over M variants\n  // test statistic for variant i: P-value(RCPC) = Fisher(P-values of PC1, PC2, ...)\n  /* mt_results.stats_mt[pos_test].resize(M); */\n  mt_results.logp_mt[pos_test_sumchi2].resize(M);\n  mt_results.logp_mt[pos_test_fisher].resize(M);\n  mt_results.logp_mt[pos_test_acat].resize(M);\n  mt_results.zscore_rcpc.resize(M);\n\n  VectorXd z(q); \n  double stat_i, logp_i;\n  boost::math::chi_squared dist(q), dist_univar(1);\n  VectorXd logp_univar(q);\n  for(unsigned int i = 0; i < M; i++) {\n    z = Z.col(i);\n    \n    mt_results.zscore_rcpc[i].resize(q); // dim 1 = variants; dim 2 = traits\n    Map<VectorXd>(mt_results.zscore_rcpc[i].data(), q) = z;\n\n    // univar. tests (single traits)\n    for(unsigned int j = 0; j < q; j++) {\n      logp_univar[j] = -log10(boost::math::cdf(boost::math::complement(dist_univar, z[j]*z[j])));\n    }\n\n    // Sum Chi2\n    stat_i = Map<ArrayXd>(mt_results.zscore_rcpc[i].data(), q).square().sum();\n    get_logp(logp_i, stat_i, q);\n    mt_results.logp_mt[pos_test_sumchi2][i] = logp_i;\n    // Fisher\n    logp_i = get_fisher_robust(logp_univar);\n    mt_results.logp_mt[pos_test_fisher][i] = logp_i;\n    // ACAT\n    logp_i = get_acat(logp_univar);\n    mt_results.logp_mt[pos_test_acat][i] = logp_i;\n  }\n} \n\n//----------------------------------------\n// Bayesian test (BF instead of P-value)\n//----------------------------------------\n\n// Association scan using BF\nvoid MTests::assoc_bayes(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults& mt_results)\n{\n  unsigned pos_test = 2;\n\n  // check dimensions\n  if(Y.rows() == 0) { throw std::runtime_error(\"Bayes: Y.rows() == 0\"); }\n  if(Y.cols() == 0) { throw std::runtime_error(\"Bayes: Y.cols() == 0\"); }\n  if(G.rows() == 0) { throw std::runtime_error(\"Bayes: G.rows() == 0\"); }\n  if(G.cols() == 0) { throw std::runtime_error(\"Bayes: G.cols() == 0\"); }\n\n  // dimensions\n  unsigned int N = Y.rows();\n  unsigned int q = Y.cols();\n  unsigned int M = G.cols();\n\n  double N_data = Neff0;\n\n  // check dimensions\n  if(Y.cols() != n_traits) { throw std::runtime_error(\"MANOVA: Y.cols() != n_traits\"); }\n  if(G.rows() != N) { throw std::runtime_error(\"MANOVA: #rows in G != N\"); }\n  if(M < 1) { throw std::runtime_error(\"MANOVA: M < 1\"); }\n  if(q < 1) { throw std::runtime_error(\"MANOVA: q < 1\"); }\n\n  // variables\n  double ld; // log-determinant of matrices\n  double LL_M0; // log-lik. for M0 \n\n  // pre-compute matrix products\n  MatrixXd YtY(q, q);\n  if(precomp) YtY = precomp0_YtY;\n  else YtY = MatrixXd(q, q).setZero().selfadjointView<Lower>().rankUpdate(Y.adjoint());\n\n  VectorXd G2 = G.colwise().squaredNorm();\n\n  // prior p(Sigma) = IW(a0, Q0)\n  VectorXd Mbeta_0 = VectorXd::Constant(q, prior_Mbeta0);\n  MatrixXd Q0 = MatrixXd::Constant(q, q, 0.0);\n  Q0.diagonal().array() = prior_Q0;\n\n  //--------------------\n  // null model M0\n  //--------------------\n  if(precomp) {\n    LL_M0 = precomp0_LL_M0;\n  } else  {\n    ld = (Q0 + YtY).ldlt().vectorD().array().log().sum();\n    LL_M0 =  0.5* (double)(q) * log(prior_Vbeta0) - 0.5*(N_data + prior_a0 + (double)(q) - 1.0) * ld;\n  }\n  \n  //--------------------\n  // full model M1\n  //--------------------\n\n  // posterior parameters\n  // p(beta - Mbeta_1 | Sigma) = N(Vbeta_1, Sigma)\n  // p(Sigma) = IW(a1, Q1)\n  \n  // Vector Vbeta_1 of size 1 x M\n  VectorXd Vbeta_1 = (G2.array() + 1.0/prior_Vbeta0).inverse();\n\n  // Matrix Mbeta_1 of size q x M\n  // 1. Initialize Mbeta_1\n  MatrixXd Mbeta_1 = (((Y.transpose() * G).\n    // 2. Column-wise update: Mbeta_1 = apply(Mbeta_1, 2, function(x) (x + (Mbeta_0 / Vbeta_0)))\n    array().colwise() + (Mbeta_0.array() / prior_Vbeta0)).\n    // 3. Row-wise update: Mbeta_1 = apply(Mbeta_1, 1, function(x) (x / (1/Vbeta_0 + scprod_G))) %>% t\n    rowwise() * Vbeta_1.transpose().array()).matrix();\n\n  /* double a1 = prior_a0 + (double)(N_data); */\n\n  MatrixXd Q1_common = (Q0.array() + (Mbeta_0.squaredNorm() / prior_Vbeta0) + YtY.array()).matrix();\n  MatrixXd Q1 = MatrixXd(q, q);\n\n  // Loop over M variants\n  mt_results.stats_mt[pos_test].resize(M);\n  mt_results.logp_mt[pos_test].resize(M);\n\n  /* //  0.5*q * log(Vbeta_1) - 0.5*(N + a0 + q - 1) * as.numeric(determinant(Q1, log = TRUE)$modulus) */\n  VectorXd LL_M1_base = Vbeta_1.array().log() * (0.5*(double)(q));\n  for(unsigned int i = 0; i < M; i++) {\n    Q1 = (Q1_common.array() - (Mbeta_1.col(i).squaredNorm() / Vbeta_1[i])).matrix();\n\n    ld = Q1.ldlt().vectorD().array().log().sum();\n    double LL_M1 = LL_M1_base[i] + -0.5 * ((double)(N_data) + prior_a0 + (double)(q) - 1.0) * ld;\n    double BF = exp(LL_M1 - LL_M0);\n\n    mt_results.logp_mt[pos_test][i] = log10(BF);\n  }\n\n} \n\n//----------------------------------\n// Estimate correlations\n//----------------------------------\n\ndouble cor2_mask(const Eigen::ArrayXd &x, const Eigen::ArrayXd &y, \n    const MatrixXb &mask)\n{\n  size_t n = x.rows();\n  size_t n_val = mask.cast<int>().sum();\n  if(n_val <= 1) {\n    throw std::runtime_error(\"0 or 1 samples overlapped for a pair of traits\");\n  }\n  if((size_t)(n_val) > n) {\n    throw std::runtime_error(\"unexpected value of n_val (> n)\");\n  }\n\n  // means for x and y\n  double mx = mask.select(x, 0.0).sum() / (double)(n_val);\n  double my = mask.select(y, 0.0).sum() / (double)(n_val);\n\n  // Formula for the Pearson correlation, rho(x, y)\n  // rho(x, y) = [sum(x_i y_i) - n m_x m_y] / \n  //               [ sqrt{ sum(x_i^2) - n m_x^2 } sqrt{ sum(y_i^2) - n m_y^2 } ] \n  double r = \n      // r_xy = [sum(x_i y_i) - n m_x m_y] / \n      (mask.select(x * y, 0.0).sum() - (double)(n_val)*mx*my) /\n        // [ sqrt{ sum(x_i^2) - n m_x^2 } sqrt{ sum(y_i^2) - n m_y^2 } ] \n        sqrt(\n          (mask.select(x, 0.0).square().sum() - (double)(n_val)*mx*mx) * \n          (mask.select(y, 0.0).square().sum() - (double)(n_val)*my*my));\n\n  return(r);\n}\n\nvoid MTests::compute_cory(const Ref<const MatrixXd>& Y, const Ref<const MatrixXb>& M)\n{\n  size_t p = Y.cols();\n  if(p == 0) {\n    throw std::runtime_error(\"#columns of Y (p) is zero\");\n  }\n\n  Ryy = MatrixXd::Constant(p, p, 0.0);\n  Ryy.diagonal().array() = 1.0;\n\n  for(size_t i = 0; i < p; i++) {\n    for(size_t j = i + 1; j < p; j++) {\n      MatrixXb mask = M.col(i) || M.col(j);\n\n      double r = cor2_mask(Y.col(i), Y.col(j), mask);\n      Ryy(i, j) = r;\n      Ryy(j, i) = r;\n    }\n  }\n}\n  \nvoid MTests::compute_ncory(const Ref<const MatrixXb>& M)\n{\n  size_t p = M.cols();\n  if(p == 0) {\n    throw std::runtime_error(\"#columns of M (p) is zero\");\n  }\n\n  N_Ryy = MatrixXi::Constant(p, p, 0);\n  N_Ryy.diagonal().array() = M.cast<int>().colwise().sum(); \n\n  for(size_t i = 0; i < p; i++) {\n    for(size_t j = i + 1; j < p; j++) {\n      MatrixXb mask = M.col(i) || M.col(j);\n\n      size_t n_val = mask.cast<int>().sum();\n      N_Ryy(i, j) = n_val;\n      N_Ryy(j, i) = n_val;\n    }\n  }\n}\n\nvoid MTests::compute_skew_pc()\n{\n  if(PC_Y0res.cols() != n_traits) { throw std::runtime_error(\"MTests::compute_skew_rcpc: PC_Y0res\"); }\n\n  // for each RPC, compute skewness\n  skew_PC = ArrayXd::Constant(n_traits, 0.0);\n  for(unsigned int i = 0; i < n_traits; i++) {\n    double skew = skew_pheno(PC_Y0res.col(i), Mask0);\n    // debug\n    /* cout << \"skew = \" << skew << endl; */\n    skew_PC[i] = abs(skew);\n  }\n  n_skewed_pc = (skew_PC.array() > mcc_skew_abs).sum();\n}\n\n//----------------------------------\n// Print results\n//----------------------------------\n\nstring MTests::print_sumstats(\n    const MTestsResults& mt_results,\n    int const& isnp, uint32_t const& snp_index,\n    string const& test_string, string const& wgr_string, \n    variant_block* block_info, vector<snp> const& snpinfo, \n    struct param const* params)\n{\n  check_setup_data();\n\n  // check attributes\n  if(mt_results.M == 0) { throw std::runtime_error(\"MTests::print_sumstats: M == 0\"); }\n\n  /* string header; */\n  std::ostringstream buffer_header;\n  buffer_header << snpinfo[snp_index].chrom << \" \" << snpinfo[snp_index].physpos \n    << \" \"<< snpinfo[snp_index].ID << \" \"\n    << snpinfo[snp_index].allele1 << \" \"<< snpinfo[snp_index].allele2 << \" \" \n    << block_info->mac(0) << \" \"\n    << block_info->af(0) << \" \" \n    << Neff0;                                \n  string header = buffer_header.str();\n\n  std::ostringstream buffer;\n  buffer << header; // write header to buffer\n\n  // output p-values\n  double max_logp;\n  size_t pos_ominibus0 = 1;\n  if(mt_results.logp_univar[pos_ominibus0].size() != 1) { throw std::runtime_error(\"MTests::print_sumstats: logp_univar\"); }\n  // single-trait p-values\n  /* for(size_t s = 0; s < n_traits; s++) { */\n  /*   buffer << \" \" << mt_results.logp_univar[pos_ominibus0][0][s]; // write univar p-value */ \n  /* } */\n  // minP / minQ\n  max_logp = mt_results.logp_univar[pos_ominibus0][0][0];\n  for(size_t s = 1; s < n_traits; s++) {\n    double logp_s = mt_results.logp_univar[pos_ominibus0][0][s];\n    if(logp_s > max_logp) max_logp = logp_s;\n  }\n  buffer << \" \" << max_logp;\n  double max_logq = max(0.0, max_logp - log10(n_traits));\n  buffer << \" \" << max_logq;\n  // multi-trait test p-values\n  for(size_t t = 0; t < n_tests; t++) {\n    if(mt_results.logp_mt[t].size() != 1) { throw std::runtime_error(\"MTests::print_sumstats: logp_mt\"); }\n\n    buffer << \" \" << mt_results.logp_mt[t][0]; // write p-value of test #t to buffer\n  }\n  // NNLS0 q-value\n  size_t pos_nnls0 = 3;\n  max_logq = max(0.0, mt_results.logp_mt[pos_nnls0][0] - log10(2.0));\n  buffer << \" \" << max_logq;\n  // Single-trait z-scores\n  for(size_t j = 0; j < n_traits; j++) {\n    buffer << \" \" << mt_results.zscore_univar[0][j]; // dim 1 = variants; dim 2 = traits\n  }\n  // PC z-scores\n  for(size_t j = 0; j < n_traits; j++) {\n    buffer << \" \" << mt_results.zscore_cpc[0][j]; // dim 1 = variants; dim 2 = traits\n  }\n  // Robust PC z-scores\n  for(size_t j = 0; j < n_traits; j++) {\n    buffer << \" \" << mt_results.zscore_rcpc[0][j]; // dim 1 = variants; dim 2 = traits\n  }\n  // Adjusted PC z-scores\n  for(size_t j = 0; j < n_traits; j++) {\n    buffer << \" \" << mt_results.zscore_acpc[0][j]; // dim 1 = variants; dim 2 = traits\n  }\n\n  buffer << endl; // finish writing a line for SNP to buffer\n\n  return(buffer.str());\n}\n\n\n//----------------------------------\n// Debug\n//----------------------------------\n\nvoid MTests::dump_data(const MatrixXd &Y, const MatrixXd &X, const MatrixXb &Mask, const MatrixXb &Mask0)\n{\n  const static IOFormat CSVFormat(StreamPrecision, DontAlignCols, \", \", \"\\n\");\n  // dump Y\n  ofstream file_y(\"mt.Y.txt\");\n  file_y << Y.format(CSVFormat);\n  file_y.close();\n  // dump X\n  ofstream file_X(\"mt.X.txt\");\n  file_X << X.format(CSVFormat);\n  file_X.close();\n  // dump masks M\n  ofstream file_M(\"mt.M.txt\");\n  file_M << Mask.array().cast<double>().matrix().format(CSVFormat);\n  file_M.close();\n  // dump masks M0\n  ofstream file_M0(\"mt.M0.txt\");\n  file_M0 << Mask0.array().cast<double>().matrix().format(CSVFormat);\n  file_M0.close();\n}\n\n"
  },
  {
    "path": "src/MultiTrait_Tests.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef MULTITRAIT_H\n#define MULTITRAIT_H\n\n#include \"NNLS.hpp\"\n\n#define MULTITRAIT_N_TESTS 14\n// 0. MANOVA -- assoc_manova\n// 1. Omnibus 0 --  assoc_omnibus0\n// 2. Bayesian test (Baes Factor or BF) -- assoc_bayes\n// 3. NNLS 0 = Non-negative least squares 0  --  assoc_nnls0\n// 4. SumZ  --  assoc_omnibus0\n// 5. NP-MANOVA 0 = Non-Parametric MANOVA 0 -- assoc_manova\n// 6. Het. Omnibus 0 --  assoc_omnibus0\n// 7. CPC 0  -- assoc_cpc0\n// 8. Robust CPC 0 = RCPC0SUMCHI2 -- assoc_rcpc0\n// 9. Robust CPC 0 = RCPC0FISHER -- assoc_rcpc0\n// 10. Robust CPC 0 = RCPC0ACAT -- assoc_rcpc0\n// 11. Adjusted CPC 0 = ACPC0SUMCHI2  --  assoc_cpc0\n// 12. Adjusted CPC 0 = ACPC0FISHER   --  assoc_cpc0\n// 13. Adjusted CPC 0 = ACPC0ACAT     --  assoc_cpc0\n// * 0 = strict on missing\n\nclass MTestsResults \n{\n  public:\n    unsigned int n_tests; // numbre of multi-trait tests\n    unsigned int q; // number of traits                \n    unsigned int M; // number of variables (to test for association)\n                      \n    // test results\n    std::vector<std::vector<double>> stats_mt; // dim 1 = mt; dim 2 = variants\n    std::vector<std::vector<double>> logp_mt; // dim 1 = mt; dim 2 = variants\n    std::vector<std::vector<vector<double>>> logp_univar; // dim 1 = mt; dim 2 = variants; dim 3 = traits\n    std::vector<std::vector<double>> zscore_univar; // dim 1 = variants; dim 2 = traits\n    std::vector<std::vector<double>> zscore_cpc; // dim 1 = variants; dim 2 = traits\n    std::vector<std::vector<double>> zscore_acpc; // dim 1 = variants; dim 2 = traits\n    std::vector<std::vector<double>> zscore_rcpc; // dim 1 = variants; dim 2 = traits\n                                                \n    //----------\n    // Methods\n    //----------\n    void setup(unsigned int, unsigned int, unsigned int);\n\n    MTestsResults(unsigned int, unsigned int, unsigned int);\n    ~MTestsResults();\n};\n\nclass MTests {\n\n  public:\n    int verbose; // verbose level \n    bool precomp; // precompute products like YtY to speed up computation\n\n    // NNLS object\n    NNLS nnls;\n\n    // MCC for ACPC\n    double mcc_skew_abs; // skewness thr. to apply MCC\n    double mcc_z2; // Z-score squared thr. to apply MCC\n    Eigen::ArrayXd skew_PC; // (absolute) skewness of Robust PCs\n    unsigned int n_skewed_pc; // number of PCs with abs. skewness > thr.\n\n    // information copied from Data class\n    MatrixXb Mask0; // masked samples = samples with missing values on phenotypes; 0 = missing in any phenotype\n    double Neff0; // number of non-missing samples (per trait)\n    MatrixXb Mask; // masked samples = samples with missing values on phenotypes; 0 = missing in a given phenotype; column = trait\n    Eigen::VectorXd Neff; // number of non-missing samples (per trait)\n\n    Eigen::MatrixXd Yres; // matrix of residualized traits (per-trait missing patterns)\n    Eigen::MatrixXd Y0res; // matrix of residualized traits (commont missing pattern across traits)\n    \n    // list of multi-trait tests\n    unsigned int n_tests;\n    std::map<std::string, uint16_t> mt_tests_map = { {\"manova\", 0}, {\"omnibus0\", 1}, {\"bayes\", 2} };\n    std::vector<std::string> test_names = {\"MANOVA\", \"Omnibus0\", \"Bayes\"};\n    uint16_t test_list;\n\n    // dimensions\n    unsigned int n_traits; // number of traits                \n    /* unsigned int N; // number of samples */\n    /* unsigned int q; // number of traits */                \n    /* unsigned int M; // number of variables (to test for association) */\n                    \n    // parameters for Bayesian test\n    double prior_a0;\n    double prior_Q0;\n    double prior_Mbeta0;\n    double prior_Vbeta0;\n\n    // precompute quantities\n    Eigen::MatrixXd precomp0_YtY; // cross-product: Y^T Y\n    Eigen::MatrixXd precomp0_Syy; // covariance of Y: cov(Y)\n    Eigen::MatrixXd precomp0_Syy_inv; // inverse of covariance of Y\n    // MANOVA-specific\n    double precomp0_ld0;\n    // Bayes-specific\n    double precomp0_LL_M0;\n    // Hiearachical Omnibus\n    Eigen::VectorXd precomp0_lambdas_Syy, precomp0_lambdas_norm_Syy;\n    // Robust Omnibus\n    Eigen::MatrixXd PC_Y0res;\n    Eigen::MatrixXd RPC_Y0res;\n\n    // data for traits\n    Eigen::MatrixXd Ryy;\n    Eigen::MatrixXi N_Ryy;\n\n    // test results\n    /* VectorXd stats; */\n    /* VectorXd pvals; */\n    /* /1* vector<string> sum_stats; *1/ */\n    /* vector<vector<double>> logp_mt; // 1 = mt; 2 = variants */\n    /* vector<vector<vector<double>>> logp_univar; // 1 = mt; 2 = variants; 3 = traits */\n\n    // parameters of association tests\n    double nl_dbl_dmin = 10.0 * std::numeric_limits<double>::min();\n\n    //----------\n    // Methods\n    //----------\n    void setup();\n    void setup_masks(const MatrixXb &);\n    void setup_yres(const Eigen::MatrixXd &);\n    void check_setup_no_data();\n    void check_setup_data();\n\n    // data prep.\n    void compute_ncory(const Eigen::Ref<const MatrixXb>& M);\n    void compute_cory(const Eigen::Ref<const Eigen::MatrixXd>& Y, const Eigen::Ref<const MatrixXb>& M);\n    void compute_skew_pc();\n\n    // assoc. tests\n    void apply_tests(const int&,const int&,struct phenodt const*,const Eigen::Ref<const Eigen::MatrixXd>&,struct geno_block const*,std::vector<variant_block>&,std::vector<std::string> const&,struct param const*);\n    void apply_tests_snp(int const&, struct geno_block& , const Eigen::Ref<const Eigen::MatrixXd>&, const Eigen::Ref<const Eigen::RowVectorXd>&, struct param const&);\n    MTestsResults run_tests_snp(int const&, struct geno_block& , const Eigen::Ref<const Eigen::MatrixXd>&, const Eigen::Ref<const Eigen::RowVectorXd>&, struct param const&);\n    MTestsResults run_tests_snp_precomp(int const&, struct geno_block& , struct param const&);\n    \n    void assoc_manova(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults&);\n    void assoc_omnibus0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults&);\n    void assoc_cpc0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults&);\n    void assoc_rcpc0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults&);\n    void assoc_bayes(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults&);\n    void assoc_nnls0(const Eigen::MatrixXd &Y, const Eigen::MatrixXd& G, MTestsResults&);\n\n    // print results \n    /* string print_multitrait_output(int const& snp_index, std::string const& test_string, std::string const& wgr_string, */ \n    /*     variant_block* block_info, std::vector<snp> const& snpinfo, */\n    /*     struct in_files const& files,struct param const* params); */\n    string print_sumstats(const MTestsResults&, int const& snp_index, uint32_t const&, string const& test_string, string const& wgr_string, variant_block* block_info, vector<snp> const& snpinfo, struct param const* params);\n\n    // debug\n    void dump_data(const Eigen::MatrixXd &, const Eigen::MatrixXd &, const MatrixXb &, const MatrixXb &);\n\n    MTests();\n    ~MTests();\n};\n#endif\n"
  },
  {
    "path": "src/NNLS.cpp",
    "content": "/*  \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"NNLS.hpp\"\n#include \"mvtnorm/mvtnorm.h\"\n\nusing namespace Eigen;\nusing namespace std;\n\n/****************************************************\n * Compute weights for the NNLS test\n * ic.weights function in the R package ic.infer\n****************************************************/\n\nEigen::MatrixXd _inverse(const Eigen::MatrixXd& V)\n{\n  int n = V.rows();\n  MatrixXd Vinv = V.llt().solve(MatrixXd::Identity(n, n));\n  /* MatrixXd Vinv = V.inverse(); */\n  return(Vinv);\n}\n\ninline void _assign_wts(vector<double> &wts, int index, double value, int verbose) {\n  wts[index] = value;\n  if(verbose > 1) cout << \" w[\" << index << \"] = \" << value << \n    \" (equal to zero = \" << (value == 0.0) << \")\" << endl;\n}\n\nvoid _complement(int n, const vector<int> &s1, vector<int> &s2)\n{\n  int n_s1 = s1.size();\n  s2.reserve(n - n_s1);\n  \n  std::vector<char> s1_true(n, false);\n  for(int i : s1) {\n    s1_true[i] = true;\n  }\n\n  for(int i = 0; i < n; ++i) {\n    if(!s1_true[i]) {\n      s2.push_back(i);\n    }\n  }\n}\n\n\nEigen::MatrixXd _subset_matrix(const Eigen::MatrixXd& V, \n    vector<int> &rows, vector<int> &cols)\n{\n  int ns = rows.size(), ps = cols.size(); // nxp dimensions of subset matrix\n  Eigen::MatrixXd S(ns, ps);\n\n  for(int i = 0; i < ns; ++i) {\n    for(int j = 0; j < ps; ++j) {\n      S(i, j) = V(rows[i], cols[j]);\n    }\n  }\n\n  return(S);\n}\n\ninline Eigen::MatrixXd _subset_matrix1(const Eigen::MatrixXd& V, \n    vector<int> &rows, vector<int> &cols)\n{\n  return(V(rows, cols));\n}\n\nEigen::MatrixXd jburden_subset_matrix(const Eigen::MatrixXd& V, \n    vector<int> &rows, vector<int> &cols, int method)\n{\n  if(method == 0) {\n    return(_subset_matrix(V, rows, cols));\n  } else if(method == 1) {\n    return(_subset_matrix1(V, rows, cols));\n  } else {\n    throw std::runtime_error(\"jburden_subset_matrix: unknown method\"); \n  }\n}\n\nvoid _wts_subset(const Eigen::MatrixXd &V, vector<int> &s1, double &w1, double &w2, int index)\n{\n  int n = V.rows(); // n rows x n colums\n\n  vector<int> s2;\n  _complement(n, s1, s2);\n\n  // subset V: V11, V22, V12 & V21\n  Eigen::MatrixXd V11 = _subset_matrix(V, s1, s1);\n  Eigen::MatrixXd V22 = _subset_matrix(V, s2, s2);\n  Eigen::MatrixXd V12 = _subset_matrix(V, s1, s2);\n  Eigen::MatrixXd V21 = _subset_matrix(V, s2, s1);\n\n  // matrix V220 = V22 - V21 solve(V11, V12)\n  MatrixXd V11inv = _inverse(V11);\n  MatrixXd V220 = V22 - V21 * V11inv * V12;\n\n  w1 = jburden_pnorm(V11inv) * jburden_pnorm(V220);\n\n  // needed only if n is odd\n  // example 1: skip if n = 4 & index = 1 is the last\n  // example 2: pass if n = 5\n  if(2*(index + 1) != (n - 2)) {\n    // matrix V110 = V11 - V12 solve(V22, V21)\n    MatrixXd V22inv = _inverse(V22);\n    MatrixXd V110 = V11 - V12 * V22inv * V21;\n\n    w2 = jburden_pnorm(V22inv) * jburden_pnorm(V110);\n  } else {\n    w2 = 0.0;\n  }\n}\n\n// main function: compute weights for the NNLS test\nEigen::VectorXd jburden_wts(const Eigen::MatrixXd& V, int verbose)\n{\n  int n = V.rows(); // n rows x n colums\n\n  // output vertor of weihts: w(n), w(n-1), ..., w(1), w(0)\n  int nw = n + 1;  \n  vector<double> wts(nw, 0.0);\n\n  // step 1: wts[0] = w(n)\n  /* wts[0] = jburden_pnorm(V); */\n  // cerr << jburden_pnorm(V) << endl;\n  _assign_wts(wts, 0, jburden_pnorm(V), verbose);\n  \n  // step 2: wts[nw-1] = w(0)\n  MatrixXd Vinv = _inverse(V);\n  /* wts[nw - 1] = jburden_pnorm(Vinv); */\n  _assign_wts(wts, nw - 1, jburden_pnorm(Vinv), verbose);\n\n  // step 3: wts[1] = w(3)\n  int nk = floor((n - 2) / 2);\n  for(int k = 0; k < nk; ++k) {\n    // step 3.1: get all subsets of size (k+1) \n    if(verbose) cout << \" k \" << k << \" / \" << nk << endl;\n\n    list<vector<int>> subsets;\n    jburden_nchoosek(n, k + 1, subsets);\n    int n_subsets = subsets.size();\n    if(verbose) cout << \"   #subsets = \" << n_subsets << endl;\n\n    vector<double> w1(n_subsets);\n    vector<double> w2(n_subsets);\n    int i = 0;\n    list<vector<int>>::iterator it = subsets.begin(); \n    for(; it != subsets.end(); ++it, ++i) {\n      double w1_i, w2_i;\n      _wts_subset(V, *it, w1_i, w2_i, k);\n\n      w1[i] = w1_i;\n      w2[i] = w2_i;\n    }\n\n    // assign weights\n    /* wts[k + 1] = accumulate(w1.begin(), w1.end(), 0.0); */\n    _assign_wts(wts, k + 1, accumulate(w1.begin(), w1.end(), 0.0), verbose);\n\n    /* wts[n - k - 1] = accumulate(w2.begin(), w2.end(), 0.0); */\n    _assign_wts(wts, nw - k - 2, accumulate(w2.begin(), w2.end(), 0.0), verbose);\n  }\n  \n  // two sums of odd and even weights\n  double sum_odd = 0.0, sum_even = 0.0;\n  if(n % 2 == 0) {\n    for(int i = 0; i < nw; ++i) {\n      if(i % 2 == 0) sum_even += wts[i];\n      else sum_odd += wts[i];\n    }\n  } else {\n    for(int i = 0; i < nw; ++i) {\n      if(i % 2 == 0) sum_odd += wts[i];\n      else sum_even += wts[i];\n    }\n  }\n  double sub_odd = 0.5 - sum_odd, sub_even = 0.5 - sum_even;\n\n  if( n % 2 == 0) {\n    // n is even\n    if(n % 4 == 0) {\n      // example: n = 4\n      _assign_wts(wts, n/2, sub_even, verbose);\n      _assign_wts(wts, n/2 + 1, sub_odd, verbose);\n    } else {\n      // example: n = 6\n      _assign_wts(wts, n/2, sub_odd, verbose);\n      _assign_wts(wts, n/2 + 1, sub_even, verbose);\n    }\n  } else {\n    // n is odd\n    if((n + 1) % 4 == 0) {\n      // example: n = {3, 7}\n      _assign_wts(wts, (n + 1)/2 - 1, sub_even, verbose);\n      _assign_wts(wts, (n + 3)/2 - 1, sub_odd, verbose);\n    } else {\n      // example: n = {5, 9}\n      _assign_wts(wts, (n + 1)/2 - 1, sub_odd, verbose);\n      _assign_wts(wts, (n + 3)/2 - 1, sub_even, verbose);\n    }\n  }\n\n  VectorXd ret(nw);\n  for(int i = 0; i < nw; ++i) {\n    ret[i] = wts[i];\n  }\n\n  return(ret);\n}\n\ndouble jburden_pnorm(const Eigen::MatrixXd& A, \n  int maxpts, double abseps, int verbose)\n{\n  int i, j, k; // iterators\n  int n = A.rows(); // n rows x n colums\n  int nc = (n*n - n)/2; // number of elements in low-tri part of A\n  double* bound = new double[n];\n  double* cmat = new double[nc];\n  double error, ret; \n\n  // fill n bounds with 0\n  for(i = 0; i < n; ++i) {\n    bound[i] = 0.0;\n  }\n  \n  // convert A to correlation matrix \n  // (requirement of pmvnorm)\n  /* Eigen::MatrixXd C = wts_cov2cor(A); */\n  vector<double> sd(n);\n  for(int i = 0; i < n; ++i) {\n    sd[i] = sqrt(A(i, i));\n  }\n  \n\n  // fill low-tri version cmat of correlation matrix A\n  /* string name = \"C.matirx.tmp\"; */\n  /* ofstream file(name.c_str()); */\n  k = 0; // counter for filled entries in cmat\n  for(i = 0; i < n; ++i) {\n    for(j = 0; j < i; ++j) {\n      cmat[k] = A(i, j) / (sd[i]*sd[j]);\n      k += 1;\n      /* file << cmat[k] << \"\\t\"; */\n    }\n    /* file << \"\\n\"; */\n  }\n  /* file.close(); */\n \n  // call C++ function that calls the Fortran code\n  ret = pmvnorm_complement(n, maxpts, abseps, bound, cmat, &error);\n  //ret = 1.0;\n\n  return(ret);\n}\n\n/****************************************************\n * Enumerate all sets of k out of n numbers\n * nchoosek function in the R package ic.infer\n****************************************************/\n\n// the number of all set of k out of n\n// Implementation in Boost Library: https://www.boost.org/doc/libs/1_56_0/libs/math/doc/html/math_toolkit/factorials/sf_binomial.html\n// - Binomial coefficients are calculated using table lookup of factorials where possible.\n// - Otherwise, it is implemented in terms of the beta function using the relations.\ndouble jburden_choose_boost(int n, int k)\n{\n  double ret = boost::math::binomial_coefficient<double>(n, k);\n  return ret;\n}\n\n// the number of all set of k out of n\nint jburden_choose(int n, int k)\n{\n  if(k == 0) return 1;\n\n  if(k > n /2) return(jburden_choose(n, n - k));\n  \n  long res = 1;\n\n  for(int i = 1; i <= k; ++i) {\n    res *= n - i + 1;\n    res /= i;\n  }\n\n  return res;\n}\n\n// recursive algorithm for nchoosek\nvoid _nchoosek_rec(int size, int left, int index, \n  vector<int> &l, list<vector<int>> &ll)\n{\n  if(left == 0) { \n    ll.push_back(l);\n    return;\n  }\n  \n  for(int i = index; i < size; i++) {\n    l.push_back(i);\n    _nchoosek_rec(size, left - 1, i + 1, l, ll);\n    l.pop_back();\n  }\n}\n\nvoid jburden_nchoosek(int n, int k, list<vector<int>> &ll)\n{\n  vector<int> l;   \n  l.reserve(k);\n  _nchoosek_rec(n, k, 0, l, ll);\n}\n\n/***************************************\n * NLLS model fitting: jburden_fit_nnls\n***************************************/\n\n// R: x <- c(TRUE, FALSE); which(x)\nvector<int> bool_to_int(vector<bool> &x) \n{ \n  vector<int> y;\n\n  int i = 0;\n  for(vector<bool>::iterator it = x.begin(); it != x.end(); ++it, ++i) {\n    if(*it) {\n      y.push_back(i);\n    }\n  }\n  return(y);\n}\n\n\n// R: x <- c(TRUE, FALSE); max(x)\nint sum_bool(vector<bool> &x) \n{ \n  int sum = accumulate(begin(x), end(x), 0);\n  return(sum);\n}\n\n// R: x <- c(1, 3); subset <- c(TRUE, FALSE); min(x[subset])\ndouble min_vec(VectorXd &x, vector<bool> &subset)\n{\n  int i = 0;\n  vector<bool>::iterator it = subset.begin(); \n  double min;\n\n  // search for first true in subset\n  for(; it != subset.end(); ) {\n    if(*it) break; \n    ++it;\n    ++i;\n  }\n  // initialize the maximum \n  min = x[i];\n  ++it;\n  ++i;\n\n  // go over other elements of x\n  for(; it != subset.end(); ++it, ++i) {\n    if(*it) {\n      double val = x[i];\n      if(val < min) {\n        min = val;\n      }\n    }\n  }\n\n  return(min);\n}\n\n// R: x <- c(1, 3); subset <- c(TRUE, FALSE); max(x[subset])\ndouble max_vec(VectorXd &x, vector<bool> &subset)\n{\n  int i = 0;\n  vector<bool>::iterator it = subset.begin(); \n  double max;\n\n  // early stop: all false in subset\n  int m = sum_bool(subset);\n  if(m == 0) {\n    return(0.0);\n  }\n\n  // search for first true in subset\n  for(; it != subset.end(); ) {\n    if(*it) break; \n    ++it;\n    ++i;\n  }\n  // initialize the maximum \n  max = x[i];\n  ++it;\n  ++i;\n\n  // go over other elements of x\n  for(; it != subset.end(); ++it, ++i) {\n    if(*it) {\n      double val = x[i];\n      if(val > max) {\n        max = val;\n      }\n    }\n  }\n\n  return(max);\n}\n\n// R: x <- c(1, 3); subset <- c(TRUE, FALSE); which.max(x[subset])\nint which_max(VectorXd &x, vector<bool> &subset)\n{\n  int i = 0;\n  vector<bool>::iterator it = subset.begin(); \n  double max; // max value\n  int m; // index with max value (return)\n\n  // search for first true in subset\n  for(; it != subset.end(); ) {\n    if(*it) break; \n    ++it;\n    ++i;\n  }\n  // initialize the maximum \n  max = x[i];\n  m = i;\n  ++it;\n  ++i;\n\n  // go over other elements of x\n  for(; it != subset.end(); ++it, ++i) {\n    if(*it) {\n      double val = x[i];\n      if(val > max) {\n        max = val;\n        m = i;\n      }\n    }\n  }\n\n  return(m);\n}\n\n// R: x <- c(1, 3); subset <- c(TRUE, FALSE); x[subset]\nVectorXd subset_vec(VectorXd &x, vector<bool> subset) \n{\n  int m = sum_bool(subset);\n  VectorXd y(m);\n\n  int i = 0, j = 0;\n  vector<bool>::iterator it = subset.begin(); \n  for(; it != subset.end(); ++it, ++i) {\n    if(*it) {\n      y[j++] = x[i];\n    }\n  }\n\n  return(y);\n}\n\n// R: X <- matrix(1:9, 3, 3); subset <- 1:2; X[subset, subset]\nMatrixXd subset_mat(MatrixXd &X, vector<bool> subset) \n{\n  int m = sum_bool(subset);\n  MatrixXd Y(m, m);\n\n  vector<int> ind = bool_to_int(subset);\n\n  for(int i = 0; i < m; ++i) {\n    int ix = ind[i];\n    for(int j = 0; j < m; ++j) {\n      int jx = ind[j];\n      Y(i, j) = X(ix, jx);\n    }\n  }\n\n  return(Y);\n}\n\n// solve OLS on a subset of variables: y ~ X[, subset] b_subset\nVectorXd solve_s(VectorXd &Xty, MatrixXd &XtX, vector<bool> subset)\n{\n  int p = XtX.rows();\n  int m = sum_bool(subset);\n  VectorXd b(p); // output vector of betas\n\n  if(m > 0) {\n    MatrixXd XtX_subset = subset_mat(XtX, subset);\n    VectorXd Xty_subset = subset_vec(Xty, subset);\n\n    LLT<MatrixXd> llt(XtX_subset);\n    VectorXd b_subset = llt.solve(Xty_subset);\n \n    int i = 0, i_subset = 0;\n    vector<bool>::iterator it = subset.begin(); \n    for(; it != subset.end(); ++it, ++i) {\n      if(*it) {\n        b[i] = b_subset[i_subset];\n        ++i_subset;\n      } else {\n        b[i] = 0.0;\n      }\n    }\n  } else {\n    for(int j = 0; j < p; ++j) {\n      b[j] = 0.0;\n    }\n  }\n\n  return(b);\n}\n\n\n/* S <- (s <= tol) & P */\n/* alpha <- min(b[S] / (b[S] - s[S])) */\n/* b <- b + alpha * (s - b) */\ninline void update_b(VectorXd &b, VectorXd &s, vector<bool> &subset, double tol)\n{\n  // overlap two conditions: s <= tol & subset\n  int n = subset.size();\n  vector<bool> negs_subset(n);\n  for(int i = 0; i < n; i++) {\n    negs_subset[i] = subset[i] & (s[i] <= tol);\n  }\n  \n  // convert boolean indices to integer indices\n  vector<int> ind = bool_to_int(negs_subset);\n  int m = ind.size();\n\n  // initial value of min\n  int k = ind[0];\n  double min_val = b[k] / (b[k] - s[k]);\n  // go over other elements starting from the 2nd index (1)\n  for(int i = 1; i < m; ++i) {\n    k = ind[i];\n    double val = b[k] / (b[k] - s[k]);\n    if(val < min_val) {\n      min_val = val;\n    }\n  }\n\n  double alpha = min_val;\n  /* VectorXd delta = s - b; */\n  b = b + alpha * (s - b);\n}\n\n/* l <- which(b <= 0) */\n/* P[l] <- FALSE */\n/* R[l] <- TRUE */\ninline void update_subsets(VectorXd &b, vector<bool> &P, vector<bool> &R, double tol)\n{\n  int p = b.size();\n\n  for(int i = 0; i < p; ++i) {\n    if(b[i] <= tol) {\n      P[i] = false;\n      R[i] = true;\n    }\n  }\n}\n\n// NNLS model fitting: active set algorithm \n// - summary stat. level data b and V\nint jburden_fit_nnls_cprod(const Eigen::VectorXd &Xty_, const Eigen::MatrixXd& XtX_,\n  Eigen::VectorXd& bhat_out, vector<bool>& selected_out,\n  double tol, bool neg, int maxit, int maxit_inner, int verbose)\n{\n  VectorXd Xty = Xty_;\n  MatrixXd XtX = XtX_;\n  const int p = Xty.size();\n\n  if(neg) {\n    Xty = -1 * Xty;\n  }\n\n  // A. Initialization\n  if(verbose > 2) cout << \"A. Initialization\" << endl;\n\n  vector<bool> P(p, false); // passive set P (empty)\n  vector<bool> R(p, true); // active set R (full)\n\n  VectorXd b(p); // vector b of effect sizes\n  b.setZero(); // initialize b with zeros\n\n  VectorXd w = Xty; // Lagrange multiplier \n     \n  // B. Main loop\n  if(verbose > 2) cout << \"B. Main loop\" << endl;\n  \n  int cnt_main = 0, cnt_inner = 0;\n  int m; // index in R with the maximum w (R subset)\n  while((sum_bool(R) > 0) & (max_vec(w, R) > tol) & (cnt_main <= maxit)) {\n    if(verbose > 2) cout << \" - it (main) \" << cnt_main << endl;\n\n    m = which_max(w, R);\n    if(verbose > 2) cout << \" - max(w) = \" << max_vec(w, R) << endl;\n    P[m] = true;\n    R[m] = false;\n    if(verbose > 2) cout << \" - sum(R) = \" << sum_bool(R) << \"; sum(P) = \" << sum_bool(P) << endl;\n\n    VectorXd s = solve_s(Xty, XtX, P);\n    \n    // C. Inner loop\n    if(verbose > 2) cout << \"C. Inner loop\" << endl;\n\n    cnt_inner = 0;\n    while(min_vec(s, P) <= tol) {\n      if(verbose > 2) cout << \" - it (inner) \" << cnt_inner << endl;\n      \n      update_b(b, s, P, tol);\n      update_subsets(b, P, R, tol);\n      if(verbose > 2) cout << \" - sum(R) = \" << sum_bool(R) << \"; sum(P) = \" << sum_bool(P) << endl;\n\n      s = solve_s(Xty, XtX, P);\n\n      cnt_inner++;\n\n      // early break from the inner loop:\n      // the set of variables included in the model is empty\n      // example: \n      //   - 1st selected variable has b = 1e7, while tol = 1e-6\n      //   - so this only variable is removed from set P and P becomes empty.\n      if(sum_bool(P) == 0) break;\n      // added by JMb (02/16/2022): avoid cases where loops runs indefinitely\n      else if(cnt_inner >= maxit_inner) return(-1);\n    }\n    // early break from the main loop: see above\n    if(sum_bool(P) == 0) break;\n\n    if(verbose > 2) cout << \"D. Store current results\" << endl;\n    b = s;\n    w = Xty - XtX * b; // X' (y - X b) Lagrange multiplier \n\n    cnt_main++;\n  }\n  \n  // check convergence\n  if(cnt_main >= maxit) {\n    return(-1);\n  }\n\n  // return\n  if(neg) {\n    b = -1 * b;\n  }\n  bhat_out = b;\n  selected_out = P; // passive set R (active constraints not applied)\n\n  return(0); // return success\n}\n\n// NNLS model fitting: active set algorithm \n// - individual-level data y an X\nint jburden_fit_nnls(const Eigen::VectorXd &y, const Eigen::MatrixXd& X,\n  Eigen::VectorXd& bhat_out, vector<bool>& selected_out,\n  double tol, bool neg, int maxit, int maxit_inner, int verbose)\n{\n  const int p = X.cols();\n\n  MatrixXd XtX(MatrixXd(p, p).setZero().  \n    selfadjointView<Lower>().rankUpdate(X.adjoint()));\n  VectorXd Xty = X.adjoint() * y;\n  if(neg) {\n    Xty = -1 * Xty;\n  }\n\n  // A. Initialization\n  if(verbose > 2) cout << \"A. Initialization\" << endl;\n\n  vector<bool> P(p, false); // passive set P (empty)\n  vector<bool> R(p, true); // active set R (full)\n\n  VectorXd b(p); // vector b of effect sizes\n  b.setZero(); // initialize b with zeros\n\n  VectorXd w = X.adjoint() * y; // Lagrange multiplier \n  if(neg) {\n    w = -1 * w;\n  }\n     \n  // B. Main loop\n  if(verbose > 2) cout << \"B. Main loop\" << endl;\n  \n  int cnt_main = 0, cnt_inner = 0;\n  int m; // index in R with the maximum w (R subset)\n  while((sum_bool(R) > 0) & (max_vec(w, R) > tol) & (cnt_main <= maxit)) {\n    if(verbose > 2) cout << \" - it (main) \" << cnt_main << endl;\n\n    m = which_max(w, R);\n    if(verbose > 2) cout << \" - max(w) = \" << max_vec(w, R) << endl;\n    P[m] = true;\n    R[m] = false;\n    if(verbose > 2) cout << \" - sum(R) = \" << sum_bool(R) << \"; sum(P) = \" << sum_bool(P) << endl;\n\n    VectorXd s = solve_s(Xty, XtX, P);\n    \n    // C. Inner loop\n    if(verbose > 2) cout << \"C. Inner loop\" << endl;\n\n    cnt_inner = 0;\n    while(min_vec(s, P) <= tol) {\n      if(verbose > 2) cout << \" - it (inner) \" << cnt_inner << endl;\n      \n      update_b(b, s, P, tol);\n      update_subsets(b, P, R, tol);\n      if(verbose > 2) cout << \" - sum(R) = \" << sum_bool(R) << \"; sum(P) = \" << sum_bool(P) << endl;\n\n      s = solve_s(Xty, XtX, P);\n\n      cnt_inner++;\n\n      // early break from the inner loop:\n      // the set of variables included in the model is empty\n      // example: \n      //   - 1st selected variable has b = 1e7, while tol = 1e-6\n      //   - so this only variable is removed from set P and P becomes empty.\n      if(sum_bool(P) == 0) break;\n      // added by JMb (02/16/2022): avoid cases where loops runs indefinitely\n      else if(cnt_inner >= maxit_inner) return(-1);\n    }\n    // early break from the main loop: see above\n    if(sum_bool(P) == 0) break;\n\n    if(verbose > 2) cout << \"D. Store current results\" << endl;\n    b = s;\n    w = Xty - XtX * b; // X' (y - X b) Lagrange multiplier \n\n    cnt_main++;\n  }\n  \n  // check convergence\n  if(cnt_main >= maxit) {\n    return(-1);\n  }\n\n  // return\n  if(neg) {\n    b = -1 * b;\n  }\n  bhat_out = b;\n  selected_out = P; // passive set R (active constraints not applied)\n\n  return(0); // return success\n}\n\n/***************************************\n * NLLS test: jburden_test\n***************************************/\n\n// R: pchisq(10, 1, lower = FALSE)\ndouble jburden_pchisq(double q, int df, bool lower)\n{\n  double p;\n  boost::math::chi_squared dist(df);\n\n  if(lower) {\n    p = boost::math::cdf(dist, q);\n  } else {\n    p = boost::math::cdf(boost::math::complement(dist, q));\n  }\n\n  return(p);\n}\n\ndouble jburden_pchisq_bar(double x, Eigen::VectorXd& wt)\n{\n  // order of weigts: w(n), w(n-1), ..., w(1), w(0)\n  // - the first weight correspond to df = n\n  // - the last weight correspond to df = 0\n\n  int n = wt.size(); // number of weights \n  int df = n - 1; // number of variables tested jointly\n\n  // sum of weights\n  double sum_wt = 0;\n  for(int i = 0; i < n; ++i) {\n    sum_wt += wt[i];\n  }\n\n  // sum of (1 - p-value) for non-zero weights\n  // pp denotes (1 - p-value)\n  double sum_pp = 0.0;\n  int i = 0, dfi = df;\n  for(; i < (n - 1); ++i, --dfi) {\n    double pp_i = jburden_pchisq(x, dfi, false);\n    sum_pp += wt[i] * pp_i;\n\n    //debug\n    /* cout << \" -- jburden_pchisq_bar: i = \" << i */ \n    /*   << \" pp_i = \" << pp_i << \" wt[i] = \" << wt[i] */ \n    /*   << \" sum_pp = \" << sum_pp << endl; */\n  }\n\n  // output p-value\n  // (depreciated) option 1: \n  //  - can produce negative p-values because sum_wt is not exactly 1\n  //  - that might happen only when p-values are very small\n  /* double p = 1.0 - sum_wt + sum_pp; */ \n  // option 2: \n  // - safe to produce non-negative p-values, as wt >= 0\n  double p = sum_pp;\n  \n  // debug\n  /* cout << \"jburden_pchisq_bar: sum_wt = \" << sum_wt << endl; */\n  /* cout << \"jburden_pchisq_bar: p = \" << p << endl; */\n\n  return(p);\n}\n\n/***************************************\n * NNLS adaptive weights\n***************************************/\n\nvoid _sample(int n, int k, vector<int> &l, std::mt19937_64& gen)\n{\n  unordered_set<int> sample;\n  \n  for(int i = n - k; i < n; ++i) {\n    std::uniform_int_distribution<int> distr(0, i);\n    int s = distr(gen);\n\n    if(sample.find(s) == sample.end()) {\n      sample.insert(s);\n    } else {\n      sample.insert(i);\n    }\n  }\n\n  // copy unordered set (sample) to output vector (l)\n  for(unordered_set<int>::iterator it = sample.begin(); it != sample.end(); ++it) {\n    l.push_back(*it);\n  }\n  // sort output vector (l)\n  sort(l.begin(), l.end());\n}\n\nvoid jburden_nchoosek_sample(int n, int k, int s, list<vector<int>> &ll, std::mt19937_64& gen)\n{\n  for(int i = 0; i < s; ++i) {\n    vector<int> sample;\n    sample.reserve(k);\n    _sample(n, k, sample, gen);\n    ll.push_back(sample);\n  }\n}\n\nint jburden_wts_adapt(const Eigen::MatrixXd& V, Eigen::VectorXd& wts_out, std::mt19937_64& gen,\n  int n_approx, bool normalize, int verbose)\n{\n  int n = V.cols(); \n  int nw = n + 1;  \n\n  // output vertor of weights: w(0), w(1), ..., w(n-1), w(n)\n  vector<double> wts(nw, 0.0);\n  vector<int> ind_exact, ind_approx;\n  \n  // A. w(n) = wts[nw - 1]\n  double wts_n = jburden_pnorm(V);\n  if(wts_n < 0) {\n    std::cout << \"ERROR: computing NNLS weight(n = \" << n << \") failed; \" \n      << \"pnorm(V) returned negative value (\" << wts_n << \")\\n\";\n    return(-1);\n  }\n  _assign_wts(wts, nw - 1, jburden_pnorm(V), verbose);\n  ind_exact.push_back(nw - 1);\n      \n  // B. w(0)\n  MatrixXd Vinv = _inverse(V);\n  double wts_0 = jburden_pnorm(Vinv);\n  if(wts_0 < 0) {\n    std::cout << \"ERROR: computing NNLS weight(0) failed; \" \n      << \"pnorm(Vinv) returned negative value (\" << wts_0 << \")\\n\";\n    return(-1);\n  }\n  _assign_wts(wts, 0, wts_0, verbose);\n  ind_exact.push_back(0);\n\n  // C. Outer loop over weights: w(1), w(2), ..., w(n-1)\n  for(int i = 1; i < (nw - 1); ++i) {\n    // NB: choose(40, 20) = 137,846,528,820\n    // (depreciated due to overflow) \n    // int n_sets = jburden_choose(n, i);\n    double n_sets_numeric = jburden_choose_boost(n, i);\n    /* // skip this\n    // check if n_set is not overflowed\n    int max_int = std::numeric_limits<int>::max();\n    bool overflow = (n_sets_numeric > (double)(max_int));\n    if(overflow) { throw std::runtime_error(\"jburden_wts_adapt: integer overflow for #sets\"); }\n    */\n\n    // approximate?\n    bool approx = (n_approx > 0) & ((double)n_approx < n_sets_numeric);\n    \n    // sets of (n_approx or n_sets) indices from the range [0; n]\n    list<vector<int>> sets;\n    if(approx) {\n      // n_approx randomly sampled sets\n      jburden_nchoosek_sample(n, i, n_approx, sets, gen);\n      ind_approx.push_back(i);\n    } else {\n      // all possible sets\n      jburden_nchoosek(n, i, sets);\n      ind_exact.push_back(i);\n    }\n    int n_sets_comp = sets.size();\n\n    // print info.\n    if(verbose) {\n      cout << \" w[\" << i << \"]\" << \"; #sets = \" << n_sets_numeric << \n        \"; approx. = \" << approx << \n        \"; #set to compute = \" << n_sets_comp << endl;\n    }\n    \n    // B. Inner loop over the components in the sum:\n    //  pnorm(V_alpha'^{-1} * pnorm(V_alpha;alpha') over |alpha| = i\n    vector<double> comp(n_sets_comp);\n\n    int j = 0;\n    list<vector<int>>::iterator it = sets.begin(); \n    for(; it != sets.end(); ++it, ++j) {\n      // indices: alpha\n      vector<int> s2 = *it; \n      // indices: complement to alpha\n      vector<int> s1; \n      _complement(n, s2, s1);\n      \n      // subset V: V11, V22, V12 & V21\n      Eigen::MatrixXd V11 = _subset_matrix(V, s1, s1);\n      Eigen::MatrixXd V22 = _subset_matrix(V, s2, s2);\n      Eigen::MatrixXd V12 = _subset_matrix(V, s1, s2);\n      Eigen::MatrixXd V21 = _subset_matrix(V, s2, s1);\n\n      // matrix V220 = V22 - V21 solve(V11, V12)\n      MatrixXd V11inv = _inverse(V11);\n      MatrixXd V220 = V22 - V21 * V11inv * V12;\n\n      double w_comp = jburden_pnorm(V11inv) * jburden_pnorm(V220);\n      if(w_comp < 0) {\n        std::cout << \"ERROR: computing NNLS weight(i =\" << i << \")\" \n          << \" & component(j = \" << j << \" out of \" << n_sets_comp << \") failed; \" \n          << \"pnorm(V11inv)*pnorm(V220) returned negative value (\" << w_comp << \")\\n\";\n        return(-1);\n      }\n      comp.push_back(w_comp);\n    }\n\n    // sum of elements in comp\n    double sum_comp = accumulate(comp.begin(), comp.end(), 0.0);\n\n    // assign wts[i] using elements in comp\n    if(approx) {\n      // 1. Approximated weight\n      // No. elements in comp = n_sets_comp\n      // No. elements in total = n_sets\n      // Approximation by sum of normals: \n      //   wts[i] = n_sets * mu, where mu = sum(comp) / n_sets_comp\n      _assign_wts(wts, i, (sum_comp / n_sets_comp) * n_sets_numeric, verbose);\n    } else {\n      // 2. Exact weight, i.e., n_sets_comp = n_sets\n      _assign_wts(wts, i, sum_comp, verbose);\n    }\n  }\n\n  // debug\n  if(verbose > 2) {\n    cout << \"jburden_wts_adapt: wts (before norm.) = \";\n    for(vector<int>::iterator it = ind_approx.begin(); it < ind_approx.end(); ++it) {\n      cout << \" \" << wts[*it] << \"; \";\n    }\n    cout << endl;\n  }\n\n  // D. normalize in two steps\n  // 1. sum(approximated weights) = 1 - sum(exact weights)\n  //    - exact weights are not touched\n  // 2. max weight = 1.0 - sum(all weights except max weight)\n  //    - force the sum of all weights to be 1.0\n  //      (it will be not exactly equal to 1.0 \n  //       due to the limited numerical precision)\n  if(normalize) {\n    // D1. normalize approximate weights\n    if(ind_approx.size()) {\n      // compute scaling factor = sum_expected / sum_empirical =\n      // = (1 - sum_exact) / sum_approx\n      double sum_exact = 0.0;\n      for(vector<int>::iterator it = ind_exact.begin(); it < ind_exact.end(); ++it) {\n        sum_exact += wts[*it];\n      }\n      double sum_approx = 0.0;\n      for(vector<int>::iterator it = ind_approx.begin(); it < ind_approx.end(); ++it) {\n        sum_approx += wts[*it];\n      }\n      double sc_factor = (1 - sum_exact) / sum_approx;\n\n      // update weights\n      for(vector<int>::iterator it = ind_approx.begin(); it < ind_approx.end(); ++it) {\n        wts[*it] *= sc_factor;\n      }\n    }\n\n    // D2. normalize all weights\n    // find the maximum weights\n    vector<double>::iterator it_max = max_element(wts.begin(), wts.end());\n    // assign [1 - (sum(wts) - max)] to the max element \n    //  - thus, sum(wts) = 1\n    double sum_wts = accumulate(wts.begin(), wts.end(), 0.0);\n    *it_max = 1.0 - (sum_wts - *it_max);\n  }\n\n  if(verbose > 2) {\n    // debug\n    cout << \"jburden_wts_adapt: wts (after norm.) = \";\n    for(vector<int>::iterator it = ind_approx.begin(); it < ind_approx.end(); ++it) {\n      cout << \" \" << wts[*it] << \"; \";\n    }\n    cout << endl;\n\n    // debug\n    double sum_wts = accumulate(wts.begin(), wts.end(), 0.0);\n    cout << \"jburden_wts_adapt: sum_wts = \" << sum_wts << endl;\n  }\n\n  // return\n  //  - returned weights are in the reverse order:\n  //    w(n), w(n-1), ..., w(1), w(0)\n  wts_out.resize(nw);\n  for(int i = 0; i < nw; ++i) {\n    wts_out[i] = wts[nw - 1 - i];\n  }\n\n  // return success\n  return(0);\n}\n\n/***************************************\n * NLLS: the main function: \n * - model fitting\n * - update V matrix & compute weigts\n * - compute p-values using chi2bar \n * - return min(pval_pos_nnls, pval_neg_nnls)\n***************************************/\n\nEigen::MatrixXd _npd(const Eigen::MatrixXd& X, double eps = 1e-6)\n{\n  // method 1: diag(X) = diag(X) + eps\n  /* V = 0.5 * (V + V.transpose()); // force it symmetric */\n  /* double eps = 1e-4; // to be a function argument */\n  /* V.diagonal().array() += eps; // force positive-definite */\n  \n  // method 2: EVD (negative eigenvalues -> 0) + diag(X) = diag(X) + eps\n  MatrixXd Y = 0.5 * (X + X.transpose());\n  SelfAdjointEigenSolver<MatrixXd> solver(Y);\n  VectorXd D = solver.eigenvalues();\n  MatrixXd V = solver.eigenvectors();\n  VectorXd Dplus = D.cwiseMax(0.0);\n  MatrixXd Z = V * Dplus.asDiagonal() * V.transpose();\n  Z.diagonal().array() += eps;\n\n  return(Z);\n}\n\n// main function for the NNLS test\ndouble jburden_test(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, std::mt19937_64& gen,\n  int df, double tol, int n_approx, bool strict, int verbose)\n{\n  // dimensions\n  int n = y.size(), p = X.cols();\n  if(df == 0) {\n    df = n - p;\n  }\n  if(verbose) cout << \" n = \" << n << \"; p = \" << p << \"; df = \" << df << endl;\n\n  // OLS solution\n  if(verbose) cout << \" - OLS\" << endl;\n\n  MatrixXd XtX(MatrixXd(p, p).setZero().  \n    selfadjointView<Lower>().rankUpdate(X.adjoint()));\n  VectorXd Xty = X.adjoint() * y;\n\n  LLT<MatrixXd> llt(XtX);\n  VectorXd bhat = llt.solve(Xty);\n\n  VectorXd fitted = X * bhat;\n  VectorXd resid = y - fitted;\n  double sigma = resid.norm() / sqrt((double)(df));\n  double sigma2 = sigma * sigma;\n  /* MatrixXd V = sigma2 * llt.matrixL().solve(MatrixXd::Identity(p, p)); */\n  MatrixXd V = sigma2 * llt.solve(MatrixXd::Identity(p, p));\n  /* MatrixXd V = sigma2 * XtX.inverse(); */\n\n  /* VectorXd Vib = (1.0 / sigma2) * XtX * bhat; */\n  /* double stat = bhat.adjoint() * Vib; */\n  /* double pval = jburden_pchisq(stat, p, false); */\n\n  // weights for the NNLS test\n  // - common for positive NNLS and negative NNLS\n  if(verbose) cout << \" - NNLS weights\" << endl;\n\n  // debug\n  if(verbose > 2) {\n    VectorXcd evals_complex = V.eigenvalues();\n    VectorXd evals = evals_complex.real(); \n    cout << \" - jburden_test: \" << evals.size() << \" eigenvalues = \" << evals << endl;\n\n    const static IOFormat CSVFormat(StreamPrecision, DontAlignCols, \", \", \"\\n\");\n    string name = \"V.matirx.tmp\";\n    ofstream file(name.c_str());\n    file << V.format(CSVFormat);\n    file.close();\n  }\n\n  // make matrix V positive-definite for computing weights\n  //  - not to be used for computing the test statistic\n  //  - the significance drops a lot, e.g. LDL-PCSK9, \n  //    (i) -log10(P) = 94 for the corrected V;\n  //    (ii) -log10(P) = 288 for the original V\n  MatrixXd Vpd = V; //_npd(V);\n\n  // exact weights\n  /* VectorXd w = jburden_wts(V); */\n  // adaptive weights: the key argument is n_approx\n  VectorXd w;\n  int ret = jburden_wts_adapt(Vpd, w, gen, n_approx, true, verbose);\n  if(ret < 0) {\n    if(strict) {\n      std::cout << \"ERROR: computing NNLS weights failed\"\n        << \" by jburden_wts_adapt\\n\";\n      exit(-1);\n    } else {\n      return(-1.0); // return p-value = -1\n    }\n  }\n  \n  // TEMP\n  /* MatrixXd D = MatrixXd::Constant(p, p, 0.0); */\n  /* D.diagonal().array() = 1.0; */\n  /* VectorXd w = jburden_wts_adapt(D, n_approx, true, verbose); */\n\n  // positive NNLS: b >= 0\n  if(verbose) cout << \" - NNLS b >= 0\" << endl;\n  \n  VectorXd bhat_pos;\n  vector<bool> selected_pos;\n  int ret_pos = jburden_fit_nnls(y, X, bhat_pos, selected_pos, tol, false); // maxit, maxit_inner, verbose);\n  if(ret_pos < 0) {\n    if(strict) {\n      std::cout << \"ERROR: computing NNLS weights failed\"\n        << \" by jburden_fit_nnls\\n\";\n      exit(-1);\n    } else {\n      return(-1.0); // return p-value = -1\n    }\n  }\n  \n  VectorXd Vib_pos = (1.0 / sigma2) * XtX * bhat_pos;\n  double stat_pos = bhat_pos.adjoint() * Vib_pos;\n  double pval_pos = jburden_pchisq_bar(stat_pos, w);\n  \n  // negative NNLS: b <= 0\n  if(verbose) cout << \" - NNLS b <= 0\" << endl;\n\n  VectorXd bhat_neg;\n  vector<bool> selected_neg;\n  int ret_neg = jburden_fit_nnls(y, X, bhat_neg, selected_neg, tol, true); // maxit, maxit_inner, verbose);\n  if(ret_neg < 0) {\n    if(strict) {\n      std::cout << \"ERROR: computing NNLS weights failed\"\n        << \" by jburden_fit_nnls\\n\";\n      exit(-1);\n    } else {\n      return(-1.0); // return p-value = -1\n    }\n  }\n\n  VectorXd Vib_neg = (1.0 / sigma2) * XtX * bhat_neg;\n  double stat_neg = bhat_neg.adjoint() * Vib_neg;\n  double pval_neg = jburden_pchisq_bar(stat_neg, w);\n\n  // return\n  double pval_min2 = min(pval_pos, pval_neg);\n\n  return(pval_min2);\n}\n\n\n/***************************************\n * Methods of class NLLS\n***************************************/\n\n//-------------------\n// NNLS constructors\n//-------------------\nNNLS::NNLS()\n{\n  // assign\n  napprox = 10;\n  normalize = true;\n  tol = 1e-6;\n  maxit = 1000;\n  strict = false;\n  verbose = 0;\n  // defaults\n  set_defaults();\n}\n\n\nNNLS::NNLS(int napprox_, bool normalize_, double tol_, bool strict_, int verbose_)\n{\n  // assign\n  napprox = napprox_;\n  normalize = normalize_;\n  tol = tol_;\n  maxit = 1000;\n  maxit_inner = 500;\n  strict = strict_;\n  verbose = verbose_;\n  // defaults\n  set_defaults();\n}\n\nvoid NNLS::set_defaults()\n{\n  p = 0;\n  nw = 0;\n  msg_error = \"\";\n  fit_pos.executed = false;\n  fit_neg.executed = false;\n  fit_pos.converged = false;\n  fit_neg.converged = false;\n  pval_min2 = -1.0;\n}\n\n//-------------------\n// NNLS weights\n//-------------------\n\nvoid NNLS::compute_weights()\n{\n  if(verbose) cout << \" NNLS: Weights step\\n\";\n\n  // check previous steps\n  if(p == 0) return;\n  if(V.cols() != p) return;\n\n  // assign Vpd\n  Vpd = V; // _npd(V);\n\n  VectorXd w;\n\n  int ret = jburden_wts_adapt(Vpd, w, *gen, napprox, normalize, verbose);\n  if(ret < 0) {\n    msg_error = \"error in computing NNLS weights\";\n\n    if(strict) {\n      std::cout << \"ERROR: computing NNLS weights failed\"\n        << \" by jburden_wts_adapt\\n\";\n      exit(-1);\n    } else {\n      return;\n    }\n  }\n\n  // assign\n  wts = w;   \n  nw = wts.size();\n}\n\n//-------------------\n// NNLS fit (X, y)\n//-------------------\n\n// OLS step\nvoid NNLS::fit_ols(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, \n  int df)\n{\n  if(verbose) cout << \" NNLS: OLS step\\n\";\n\n  int n = X.rows();\n  p = X.cols();\n  if(df == 0) {\n    df = n - p;\n  }\n\n  MatrixXd XtX(MatrixXd(p, p).setZero().  \n    selfadjointView<Lower>().rankUpdate(X.adjoint()));\n  VectorXd Xty = X.adjoint() * y;\n\n  LLT<MatrixXd> llt(XtX);\n  VectorXd bhat = llt.solve(Xty);\n\n  VectorXd fitted = X * bhat;\n  VectorXd resid = y - fitted;\n  double ss = resid.array().square().sum();\n\n  // assign sigma2\n  sigma2 = ss / df;\n  // assign V\n  /* V = sigma2 * llt.matrixL().solve(MatrixXd::Identity(p, p)); */\n  V = sigma2 * llt.solve(MatrixXd::Identity(p, p));\n  // assign XX\n  XX = XtX;\n  // assign bhat\n  bhat_ols = bhat;\n\n  // (optional) OLS test statistic\n  VectorXd Vib = (1.0 / sigma2) * XX * bhat_ols;\n  stat_ols = bhat_ols.adjoint() * Vib;\n  /* double pval = jburden_pchisq(stat, p, false); */\n}\n\n// NNLS fit & inference step\nvoid NNLS::pw_calc_pvals()\n{\n  // assign min p-value if both fits are ok\n  if(fit_pos.converged & fit_neg.converged) {\n    // re-calculate pval_pos\n    VectorXd Vib = (1.0 / sigma2) * XX * fit_pos.bhat;\n    fit_pos.stat = fit_pos.bhat.adjoint() * Vib;\n    fit_pos.pval = jburden_pchisq_bar(fit_pos.stat, wts);\n    // re-calculate pval_neg\n    Vib = (1.0 / sigma2) * XX * fit_neg.bhat;\n    fit_neg.stat = fit_neg.bhat.adjoint() * Vib;\n    fit_neg.pval = jburden_pchisq_bar(fit_neg.stat, wts);\n    // pval min2\n    pval_min2 = min(fit_pos.pval, fit_neg.pval);\n    best_fit = (fit_pos.pval < fit_neg.pval); // 1 = pos, 0 = neg\n  }\n}\n\nvoid NNLS::fit_nnls(const Eigen::VectorXd &y, const Eigen::MatrixXd& X)\n{\n  if(verbose) cout << \" NNLS: Fit step\\n\";\n\n  // NNLS pos: b >= 0\n  fit_nnls_sign(y, X, false, fit_pos);\n  // NNLS neg: b <= 0\n  fit_nnls_sign(y, X, true, fit_neg);\n\n  // assign min p-value if both fits are ok\n  if(fit_pos.converged & fit_neg.converged) {\n    pval_min2 = min(fit_pos.pval, fit_neg.pval);\n    best_fit = (fit_pos.pval < fit_neg.pval); // 1 = pos, 0 = neg\n  }\n}\n\nvoid NNLS::fit_nnls_sign(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, bool neg, struct FitNNLS& fit)\n{\n  // check previous steps\n  if(p == 0) return;\n  if(XX.cols() != p) return;\n  if(nw == 0) return;\n  if(wts.size() != p + 1) return;\n\n  fit.executed = true;\n  VectorXd bhat;\n  vector<bool> selected;\n  int ret = jburden_fit_nnls(y, X, bhat, selected, tol, neg, maxit, maxit_inner, verbose);\n  if(ret < 0) {\n    msg_error = \"error in computing NNLS model fit\";\n\n    if(strict) {\n      std::cout << \"ERROR: computing NNLS model fit failed\"\n        << \" by jburden_fit_nnls\\n\";\n      exit(-1);\n    } else {\n      fit.converged = false;\n      return;\n    }\n  }\n \n  // assign results of model fitting\n  fit.converged = true;\n  fit.bhat = bhat;\n  fit.selected = selected;\n\n  // compute test statistic & assign\n  VectorXd Vib = (1.0 / sigma2) * XX * bhat;\n  fit.stat = bhat.adjoint() * Vib;\n  fit.pval = jburden_pchisq_bar(fit.stat, wts);\n}\n\n//-------------------\n// NNLS fit (b, V)\n//-------------------\n\n// NNLS fit & inference step\nvoid NNLS::ss_fit_nnls()\n{\n  if(verbose) cout << \" NNLS: Fit step\\n\";\n\n  VectorXd Xty = Vinv * bhat_ols;\n  MatrixXd XtX = Vinv;\n\n  // NNLS pos: b >= 0\n  ss_fit_nnls_sign(Xty, XtX, false, fit_pos);\n  // NNLS neg: b <= 0\n  ss_fit_nnls_sign(Xty, XtX, true, fit_neg);\n\n  // assign min p-value if both fits are ok\n  if(fit_pos.converged & fit_neg.converged) {\n    pval_min2 = min(fit_pos.pval, fit_neg.pval);\n    best_fit = (fit_pos.pval < fit_neg.pval); // 1 = pos, 0 = neg\n  }\n}\n\nvoid NNLS::ss_fit_nnls_sign(const Eigen::VectorXd &Xty, const Eigen::MatrixXd& XtX, bool neg, struct FitNNLS& fit)\n{\n  // check previous steps\n  if(p == 0) return;\n  if(nw == 0) return;\n  if(wts.size() != p + 1) return;\n\n  fit.executed = true;\n  VectorXd bhat;\n  vector<bool> selected;\n  int ret = jburden_fit_nnls_cprod(Xty, XtX, bhat, selected, tol, neg, maxit, maxit_inner, verbose);\n  if(ret < 0) {\n    msg_error = \"error in computing NNLS model fit\";\n\n    if(strict) {\n      std::cout << \"ERROR: computing NNLS model fit failed\"\n        << \" by jburden_fit_nnls_ss\\n\";\n      exit(-1);\n    } else {\n      fit.converged = false;\n      return;\n    }\n  }\n \n  // assign results of model fitting\n  fit.converged = true;\n  fit.bhat = bhat;\n  fit.selected = selected;\n\n  // compute test statistic & assign\n  fit.stat = bhat.adjoint() * Vinv * bhat;\n  fit.pval = jburden_pchisq_bar(fit.stat, wts);\n}\n\n\n//-------------------\n// NNLS main\n//-------------------\n\nvoid NNLS::pw_weights(int napprox_)\n{\n  // check dimensions\n  if(V.rows() == 0) { throw std::runtime_error(\"pw_weights: dimensions (nrows = 0)\"); }\n  if(V.cols() == 0) { throw std::runtime_error(\"pw_weights: dimensions (ncols = 0)\"); }\n  if(V.rows() != V.cols()) { throw std::runtime_error(\"pw_weights: dimensions\"); }\n\n  // code copied from compute_weights() with input napprox_ instead of class member napprox\n  VectorXd w;\n\n  int ret = jburden_wts_adapt(V, w, *gen, napprox_, normalize, verbose);\n  if(ret < 0) { \n    if(strict) { throw std::runtime_error(\"pw_weights: error in jburden_wts_adapt\"); }\n    else { return; }\n  }\n\n  // assign\n  napprox = napprox_;\n  wts = w;   \n  nw = wts.size();\n}\n\nvoid NNLS::pw_weights(const Eigen::MatrixXd& V_)\n{\n  // check dimensions\n  if(V_.rows() == 0) { throw std::runtime_error(\"pw_weights: dimensions (nrows = 0)\"); }\n  if(V_.cols() == 0) { throw std::runtime_error(\"pw_weights: dimensions (ncols = 0)\"); }\n  if(V_.rows() != V_.cols()) { throw std::runtime_error(\"pw_weights: dimensions\"); }\n\n  // code copied from compute_weights() with input V_ instead of class member V\n  VectorXd w;\n\n  int ret = jburden_wts_adapt(V_, w, *gen, napprox, normalize, verbose);\n  if(ret < 0) { \n    if(strict) { throw std::runtime_error(\"pw_weights: error in jburden_wts_adapt\"); }\n    else { return; }\n  }\n\n  // assign\n  wts = w;   \n  nw = wts.size();\n}\n\nvoid NNLS::pw_weights(const Eigen::VectorXd& wts_)\n{\n  // check dimensions\n  if(wts_.size() == 0) { throw std::runtime_error(\"pw_weights: dimensions (input weights size = 0)\"); }\n\n  // assign\n  wts = wts_;   \n  nw = wts.size();\n}\n\nvoid NNLS::ss_weights(const Eigen::MatrixXd& V_)\n{\n  if(verbose) print_param();\n\n  // assign\n  V = V_;\n  Vinv = _inverse(V);\n  p = V.rows();\n\n  // check dimensions\n  if(p == 0) { throw std::runtime_error(\"ss_set_V: dimensions (p = 0)\"); }\n  if(V.rows() != V.cols()) { throw std::runtime_error(\"ss_set_V: dimensions\"); }\n\n  compute_weights();\n}\n\n\nvoid NNLS::ss_run(const Eigen::VectorXd &bhat_)\n{\n  if(verbose) print_param();\n\n  // assign\n  bhat_ols = bhat_;\n  p = bhat_ols.size();\n\n  // check dimensions\n  if(p == 0) { throw std::runtime_error(\"ss_run: dimensions (p = 0)\"); }\n  if(bhat_ols.size() != p) { throw std::runtime_error(\"ss_run: dimensions (bhat.size() != p)\"); }\n  if(V.rows() == 0) { throw std::runtime_error(\"ss_run: dimensions (V.rows() = 0)\"); }\n  if(Vinv.rows() == 0) { throw std::runtime_error(\"ss_run: dimensions (Vinv.rows() = 0)\"); }\n  if(V.rows() != p) { throw std::runtime_error(\"ss_run: dimensions\"); }\n  if(Vinv.rows() != p) { throw std::runtime_error(\"ss_run: dimensions\"); }\n\n  // check weights are pre-computed\n  if(nw == 0) { throw std::runtime_error(\"ss_run: weights (nw == 0)\"); }\n  if(nw != (p + 1)) { throw std::runtime_error(\"ss_run: weights (nw != p + 1\"); }\n  if(wts.size() != nw) { throw std::runtime_error(\"ss_run: weights (wts.size() != nw\"); }\n\n  // fit & get p-values\n  ss_fit_nnls(); \n\n  if(verbose) print_results();\n}\n\nvoid NNLS::ss_run(const Eigen::VectorXd &bhat_, const Eigen::MatrixXd& V_)\n{\n  if(verbose) print_param();\n\n  // assign\n  bhat_ols = bhat_;\n  V = V_;\n  Vinv = _inverse(V);\n  p = bhat_ols.size();\n\n  // check dimensions\n  if(V.rows() != V.cols()) { throw std::runtime_error(\"ss_run: dimensions\"); }\n  if(bhat_ols.size() != V.cols()) { throw std::runtime_error(\"ss_run: dimensions\"); }\n\n  compute_weights();\n  ss_fit_nnls(); \n\n  if(verbose) print_results();\n}\n\nvoid NNLS::pw_run(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, int df)\n{\n  if(nw == 0) { throw std::runtime_error(\"pw_run: nw == 0\"); }\n\n  if(verbose) print_param();\n\n  fit_ols(y, X, df);\n  /* compute_weights(); */\n  fit_nnls(y, X);\n\n  if(verbose) print_results();\n}\n\nvoid NNLS::run(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, int df)\n{\n  if(verbose) print_param();\n\n  fit_ols(y, X, df);\n  compute_weights();\n  fit_nnls(y, X);\n\n  if(verbose) print_results();\n}\n"
  },
  {
    "path": "src/NNLS.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef NNLS_H\n#define NNLS_H\n\n#include <vector>\n#include <unordered_set>\n#include <numeric>\n#include <iostream>\n#include <fstream>\n#include <random>\n#include <list>\n\n#include <boost/math/distributions.hpp>\n#include <boost/math/special_functions/binomial.hpp> // binomial_coefficient\n\n#include \"Eigen/Dense\"\n\nusing namespace Eigen;\nusing namespace std;\n\n/****************************************************\n * Declaration of main functions for Joint Burden test \n * with the prefix \"jburden_\" \n****************************************************/\n\n// main function for the NNLS test\n// - input: residualized y and X (covariates and mean are projected out)\n// - output: bhat & p-values for three models\n//   -- OLS: y = Xb + e\n//   -- NNLS positive: y = Xb + e with b >= 0\n//   -- NNLS negative: y = Xb + e with b <= 0\ndouble jburden_test(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, std::mt19937_64& gen,\n  int df = 0, double tol = 1e-6, int n_approx = 100, bool \n  strict = false, int verbose = 0);\n\n// compute exact weights for the NNLS test\nEigen::VectorXd jburden_wts(const Eigen::MatrixXd& V, int verbose = 0);\n// compute adaptive weights for the NNLS test\nint jburden_wts_adapt(const Eigen::MatrixXd& V, Eigen::VectorXd& wts_out, std::mt19937_64& gen,\n    int n_approx = 100, bool normalize = true, int verbose = 0);\n\n// compute CDF for MVN\ndouble jburden_pnorm(const Eigen::MatrixXd& A, \n  int maxpts = 25000, double abseps = 1e-3, int verbose = 0);\n// the active set algorithm for fitting NNLS\nint jburden_fit_nnls(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, \n  Eigen::VectorXd& bhat_out, vector<bool>& selected_out,\n  double tol = 1e-6, bool neg = false, int maxit = 1000, int maxit_inner = 500, int verbose = 0);\n// the active set algorithm for fitting NNLS\nint jburden_fit_nnls_cprod(const Eigen::VectorXd &Xty_, const Eigen::MatrixXd& XtX_,\n  Eigen::VectorXd& bhat_out, vector<bool>& selected_out,\n  double tol = 1e-6, bool neg = false, int maxit = 1000, int maxit_inner = 500, int verbose = 0);\n// NNLS p-value \ndouble jburden_pchisq_bar(double x, Eigen::VectorXd& wt);\n\n// the number of all set of k out of n\nint jburden_choose(int n, int k);\n// enumerate all sets of k out of n numbers\nint jburden_choose(int n, int k);\ndouble jburden_choose_boost(int n, int k);\nvoid jburden_nchoosek(int n, int k, std::list<std::vector<int>> &ll);\nvoid jburden_nchoosek_sample(int n, int k, int s, list<vector<int>> &ll, std::mt19937_64& gen);\n// submatrix\nEigen::MatrixXd jburden_subset_matrix(const Eigen::MatrixXd& V, vector<int> &rows, vector<int> &cols, int method = 1);\n\nstruct FitNNLS \n{\n  bool executed;\n  bool converged;\n  int it;\n  VectorXd bhat;\n  vector<bool> selected;\n  double stat;\n  double pval = -1;\n};\n\n/*\n * TODO: \n *  - method to check params (e.g. maxit > 0) and write an error message;\n *  - add epsilon parameter for _npd function; use case epsilon = 0 for testing;\n *  - optimize computation of weights by parallelization;\n */\nclass NNLS \n{\n  public:\n    int napprox;\n    bool normalize;\n    double tol;\n    int maxit;\n    int maxit_inner;\n    bool strict;\n    int verbose;\n    string msg_error;\n\n    // for random number generation\n    std::mt19937_64* gen;\n\n    // 1. OLS\n    int p; // number of independent variables in y ~ X model, i.e. p = ncol(X)\n    int df;\n    MatrixXd XX;\n    double sigma2;\n    MatrixXd V;\n    VectorXd bhat_ols;\n    double stat_ols;\n    // 2a. fit(y, X): Positive-definite V \n    MatrixXd Vpd;\n    // 2b. fit(b, V): Inverse V\n    MatrixXd Vinv;\n    // 3. Weights for NNLS test\n    int nw;\n    VectorXd wts;\n    // 4. NNLS model fits\n    FitNNLS fit_pos;\n    FitNNLS fit_neg;\n    // 5. P-value\n    bool best_fit; // 1 = pos, 0 = neg\n    double pval_min2;\n\n    void set_defaults();\n    void run(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, int df = 0);\n    // ss = summary statistics\n    void ss_run(const Eigen::VectorXd &bhat_, const Eigen::MatrixXd& V_);\n    void ss_run(const Eigen::VectorXd &bhat_);\n    void ss_weights(const Eigen::MatrixXd& V_);\n    // pw = pre-computed weights\n    void pw_run(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, int df = 0);\n    void pw_weights(const Eigen::MatrixXd& V_);\n    void pw_weights(const Eigen::VectorXd& wts_);\n    void pw_weights(int napprox_);\n    void pw_calc_pvals();\n\n    void compute_weights();\n\n    void fit_ols(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, int df = 0);\n    void fit_nnls(const Eigen::VectorXd &y, const Eigen::MatrixXd& X);\n    void fit_nnls_sign(const Eigen::VectorXd &y, const Eigen::MatrixXd& X, bool neg, struct FitNNLS&);\n\n    void ss_fit_nnls();\n    void ss_fit_nnls_sign(const Eigen::VectorXd &Xty, const Eigen::MatrixXd& XtX, bool neg, struct FitNNLS& fit);\n     \n    void print_param() \n    { \n      cout << \"NNLS parameters: (weights) napprox = \" << napprox << \", normalize = \" << normalize \n        << \"; (model fitting) tol = \" << tol << \", maxit = \" << maxit \n        << \"; (general) verbose = \" << verbose\n        << endl;\n    }\n    void print_results() \n    {\n      cout << \"NNLS results: (model fitting pos/neg) executed = \" << fit_pos.executed << \"/\" << fit_neg.executed \n        << \", converged = \" << fit_pos.converged << \"/\" << fit_neg.converged \n        << \"; (inference) pval_min2 = \" << pval_min2 \n        << \"; (general) error message = \\\"\" << msg_error << \"\\\"\" \n        << endl;\n    }\n\n    // get NNLS results\n    FitNNLS* get_best_fit(bool pos) \n    { \n      FitNNLS* ret = pos ? &fit_pos : &fit_neg; \n      return(ret);\n    };\n\n    // print info\n    string str_bhat_i(unsigned i) \n    {\n      ostringstream buffer;\n      if(pval_min2 == -1) {\n        buffer << \"NA\";\n      } else {\n        if(best_fit) buffer << fit_pos.bhat[i];\n        else buffer << fit_neg.bhat[i];\n      }\n      return(buffer.str());\n    }\n\n    string str_sel_i(unsigned i) \n    {\n      ostringstream buffer;\n      if(pval_min2 == -1) {\n        buffer << \"NA\";\n      } else {\n        if(best_fit) buffer << fit_pos.selected[i];\n        else buffer << fit_neg.selected[i];\n      }\n      return(buffer.str());\n    }\n\n    string str_bhat(bool pos) \n    {\n      ostringstream buffer;\n      for(int i = 0; i < p; i++) {\n        if(pos) buffer << fit_pos.bhat[i] << \" \";\n        else buffer << fit_neg.bhat[i] << \" \";\n      }\n      buffer << endl;\n      return(buffer.str());\n    }\n\n    string str_selected(bool pos) \n    {\n      ostringstream buffer;\n      for(int i = 0; i < p; i++) {\n        if(pos) buffer << fit_pos.selected[i] << \" \";\n        else buffer << fit_neg.selected[i] << \" \";\n      }\n      buffer << endl;\n      return(buffer.str());\n    }\n\n    string str_wts()\n    {\n      ostringstream buffer;\n      for(int i = 0; i < p; i++) {\n        buffer << wts[i] << \" \";\n      }\n      buffer << endl;\n      return(buffer.str());\n    }\n\n    string str_bhat_ols()\n    {\n      ostringstream buffer;\n      for(int i = 0; i < p; i++) {\n        buffer << bhat_ols[i] << \" \";\n      }\n      buffer << endl;\n      return(buffer.str());\n    }\n\n    string str_info()\n    {\n      ostringstream buffer;\n      buffer << \"best_fit \" << best_fit << endl \n        << \"pval_min2 \" << pval_min2 << endl \n        << \"wts \" << str_wts()\n        // NNLS pos\n        << \"nnls_pos\" << endl \n        << \"stat \" << fit_pos.stat << endl << \"pval \" << fit_pos.pval << endl\n        << \"selected_pos \" << str_selected(true) << \"bhat_nnls_pos \" << str_bhat(true)\n        // NNLS neg\n        << \"nnls_neg\" << endl \n        << \"stat \" << fit_neg.stat << endl << \"pval \" << fit_neg.pval << endl\n        << \"selected_neg \" << str_selected(false) << \"bhat_nnls_neg \" << str_bhat(false)\n        // OLS\n        << \"ols\" << endl \n        << \"sigma2 \" << sigma2 << endl << \"stat_ols \" << stat_ols << endl\n        << \"bhat_ols \" << str_bhat_ols();\n\n      return(buffer.str());\n    };\n\n    NNLS(); \n    NNLS(int napprox_, bool normalize_, double tol_, bool strict_, int verbose_);\n    ~NNLS() { };\n};\n\n\n#endif\n"
  },
  {
    "path": "src/Ordinal.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Pheno.hpp\"\n\n#include \"Ordinal.hpp\"\n\nusing namespace Eigen;\nusing namespace std;\n\n//-----------------\n// Local functions\n//-----------------\n\nEigen::MatrixXd orth_matrix(const Eigen::MatrixXd & , const MatrixXb &);\nvoid exp_matrix(Eigen::MatrixXd &);\nvoid exp_matrix_ord(Eigen::MatrixXd &);\nvoid exp_vector(Eigen::VectorXd &);\nEigen::VectorXd dlog_vector(const Eigen::VectorXd & );\nEigen::MatrixXd dlog_matrix(const Eigen::MatrixXd & );\nbool check_nan(double );\n\n//-------------------\n// Class MultiPhen\n//-------------------\n\nvoid MultiPhen::setup_defaults()\n{\n  // settings\n  cnt_fit = 0;\n  verbose = 0;\n  response = \"unknown\";\n  optim = \"WeightHalving\";\n  firth_binom = false; firth_multinom = false;\n  firth_mult = 1.0;\n  reuse_start = false; reset_start = false;\n  approx_offset = false;\n  mac_approx_offset = 0;\n  offset_mode = \"offset\";\n  maxit = 150; maxit2 = 10; maxit3 = 10; strict = false;\n  tol = 1e-4; pseudo_stophalf = 0.0;\n  check_step = true; max_step = 10.0;\n  // statuses\n  set_x = false; set_y = false;\n  // data dimenstions\n  N = 0; Neff = 0; // sample size\n  /* Ncov = 0, Nb = 0, Ncov0 = 0; Ncov1 = 0; // number of covariates */ \n  ncat = 0, ncat1 = 0, ncat1sq = 0; // number of categories \n  // tests\n  pval_thr = 0.1;\n  // model fitting results\n  executed = false; converged = false;\n  trace = false;\n  it = 0; cnt_updates = 0;\n}\n\nMultiPhen::MultiPhen() \n{\n  setup_defaults();\n  test = \"none\";\n}\n\nMultiPhen::MultiPhen(std::string _test) \n{\n  setup_defaults();\n  test = _test;\n}\n\nMultiPhen::MultiPhen(unsigned int test_code) \n{\n  setup_defaults();\n\n  std::map<unsigned int, std::string> test_map = { {0, \"none\"}, {1, \"cov_score\"} };\n  test = test_map[test_code];\n}\n\n// constructor for FitOrdinal\n// - copy model parameters to FitOrdinal object\nFitOrdinal MultiPhen::setup_fit(bool inc_cov, bool inc_phen, bool use_offset)\n{\n  FitOrdinal fit;\n\n  // copy parameters from Ordinal\n  fit.verbose = verbose; \n  fit.response = response; // response type = [binom, multinom]\n  fit.model = model; // model = [POM: Proportional Odds Model, ACL: Adjacent Category Logit]\n  fit.optim = optim; // optimization algorithm = [FisherScoring, WeightHalving]\n  fit.firth_binom = firth_binom; fit.firth_multinom = firth_multinom; // Firth correction\n  fit.firth_mult = firth_mult; \n      \n  fit.maxit = maxit; fit.maxit2 = maxit2; fit.maxit3 = maxit3; fit.strict = strict;\n  fit.tol = tol; fit.pseudo_stophalf = pseudo_stophalf;\n\n  fit.check_step = check_step;\n  fit.max_step = max_step;\n\n  fit.N = N; fit.Neff = Neff; // samples size\n  if(use_offset) {\n    // use offset\n    if(inc_cov) {\n      if(inc_phen) {\n        fit.Ncov = Ny; fit.Nb = Ny; // number of covariates\n      } else {\n        throw std::runtime_error(\"use offset with covariates only (Ncov = Nb = 0)\");\n      }\n    } else {\n      if(inc_phen) {\n        fit.Ncov = Ny; fit.Nb = Ny; // number of covariates\n      } else {\n        throw std::runtime_error(\"use offset with covariates only (Ncov = Nb = 0)\");\n      }\n    }\n  } else {\n    // no offset\n    if(inc_cov) {\n      if(inc_phen) {\n        fit.Ncov = Nx + Ny; fit.Nb = ncat1 + Nx + Ny; // number of covariates\n      } else {\n        fit.Ncov = Nx; fit.Nb = ncat1 + Nx; // number of covariates\n      }\n    } else {\n      if(inc_phen) {\n        fit.Ncov = Ny; fit.Nb = ncat1 + Ny; // number of covariates\n      } else {\n        fit.Ncov = 0; fit.Nb = ncat1; // number of covariates\n      }\n    }\n  }\n\n  fit.ncat = ncat; // number of categories\n  fit.ncat1 = ncat1; fit.ncat1sq = ncat1sq;\n  fit.Ncat = Ncat;\n\n  fit.cur_dev = 0; fit.prev_dev = 0;\n  fit.trace = trace;\n  fit.it = 0; fit.it2 = 0; fit.cnt_updates = 0;\n\n  fit.cnt_fit = cnt_fit++;\n\n  return(fit);\n}\n\nvoid MultiPhen::run(const Eigen::VectorXd & g, \n  const Eigen::MatrixXd& XYR, unsigned int n_cov, unsigned int n_phen)\n{\n  reset_model();\n\n  // check if XYR is set up\n  if(!set_x) throw std::runtime_error(\"run: set_x is false\");\n  // set y\n  setup_y(g); // -> Ym, yb\n  if(!set_y) return; // early stop (example #cat = 1 for imputed variant due to rounding)\n  setup_approx_offset(); // approx_offset\n  // print info\n  if(verbose) cout << \"MultiPhen: Nx = \" << Nx << \" Ny = \" << Ny << endl;\n\n  // test\n  if(test == \"none\") {\n    reset_model();\n    // do nothing\n  } else if(test == \"cov_score_it1\") {\n    maxit = 1; optim = \"FisherScoring\";\n    run_test_score(XYR, true); // inc_cov = false\n  } else if(test == \"nocov_score\") {\n    run_test_score(XYR, false); // inc_cov = false\n  } else if(test == \"cov_score\") {\n    run_test_score(XYR, true); // inc_cov = true\n  } else if(test == \"nocov_lrt\") {\n    run_test_lrt(XYR, false); // inc_cov = false\n  } else if(test == \"cov_lrt\") {\n    run_test_lrt(XYR, true); // inc_cov = true\n  } else if(test == \"offset\") {\n    run_test_offset(XYR);\n  } else if(test == \"nocov_score_addcov\") {\n    run_test_addcov(XYR);\n  } else if(test == \"nocov_score_offset\") {\n    run_test_add_offset(XYR);\n  } else {\n    throw std::runtime_error(\"run: unknown test\");\n  }\n}\n\nvoid MultiPhen::run0(const Eigen::VectorXi & g, const Eigen::MatrixXd& X, const Eigen::MatrixXd& Y, bool score_lrt)\n{\n  // set up Ordinal model (no Firth)\n  Ordinal ord;\n  ord.optim = optim; ord.tol = tol; ord.pseudo_stophalf = pseudo_stophalf; ord.maxit = maxit; ord.maxit2 = maxit2; ord.maxit3 = maxit3; ord.strict = strict;\n  ord.check_step = check_step; ord.max_step = max_step;\n  ord.firth_binom = false; \n\n  if(score_lrt) { // Score test\n    executed = true; converged = false; pval_test = -1.0;\n    FitOrdinal fit;\n  \n    // fit null model\n    fit = ord.fit(g, X);\n    if(!fit.converged) { return; }\n\n    /* // run Score test */\n    converged = fit.converged;\n    pval_test = ord.test_score(fit, Y);\n  } else { // LRT\n    executed = true; converged = false; pval_test = -1.0;\n    FitOrdinal fit0, fit1;\n  \n    // prepare new matrix of covariates X + Y\n    MatrixXd X1(Y.rows(), X.cols() + Y.cols());\n    if(X.cols()) X1.leftCols(X.cols()) = X;\n    X1.rightCols(Y.cols()) = Y;\n  \n    // fit null model\n    fit0 = ord.fit(g, X);\n    if(!fit0.converged) { return; }\n\n    // fit alternative model (Firth)\n    fit1 = ord.fit(g, X1);\n    if(!fit1.converged) { return; }\n    converged = fit1.converged;\n\n    boost::math::chi_squared dist(Y.cols());\n    double stat_lrt = 2 * (fit1.loglik - fit0.loglik);\n    pval_test = boost::math::cdf(boost::math::complement(dist, stat_lrt));\n  }\n}\n\n// XYR = [Intercept, X, Y, Inercept, R]\nFitOrdinal MultiPhen::fit(const Eigen::Ref<const Eigen::MatrixXd> & XYR, bool inc_cov, bool inc_phen, bool use_res)\n{\n  if(use_res) throw std::runtime_error(\"use_res is not implemented yet\");\n\n  // initialize defaults settings \n  bool inc_phen_null = false, inc_phen_firth = inc_phen;\n  bool use_offset = (inc_phen && approx_offset);\n  bool copy_start = (reuse_start && inc_cov && inc_phen && !approx_offset);\n  // update settings for Binom: no firth / firth\n  if(response == \"binom\") {\n    inc_phen_null = firth_binom && !inc_phen && !approx_offset;\n    inc_phen_firth = inc_phen_null ? true : inc_phen;\n  } \n  // update settings for Multinom: no firth / firth\n  if(response == \"multinom\") {\n    inc_phen_null = firth_multinom && !inc_phen && !approx_offset;\n    inc_phen_firth = inc_phen_null ? true : inc_phen;\n  } \n  // create a fit object\n  FitOrdinal fit = setup_fit(inc_cov, inc_phen_firth, use_offset);\n  /* cout << \"done MultiPhen setup_fit: response = \" << response */ \n  /*   << \" Nx = \" << Nx << \" Ny = \" << Ny */ \n  /*   << \" inc_cov = \" << inc_cov << \" inc_phen_firth = \" << inc_phen_firth */ \n  /*   << \" use_offset = \" << use_offset << \" fit.Nb = \" << fit.Nb << \" fit.Ncov = \" << fit.Ncov << endl; */\n\n  // reuse starting par. values\n  if(copy_start) fit.setup_restart(b0);\n\n  // refine fit for Binom only\n  if(response == \"binom\") {\n    // constraint some par. to zero?\n    bool reverse_last = firth_binom && !inc_cov && inc_phen_firth;\n    bool last0 = !reverse_last;\n    if(inc_phen_null) fit.setup_ncov0(Ny, last0, false); // preproc_cov = false\n  }\n  // refine fit for Multinom only\n  if(response == \"multinom\") {\n    // constraint some par. to zero?\n    bool reverse_last = firth_multinom && !inc_cov && inc_phen_firth;\n    bool last0 = !reverse_last;\n    if(inc_phen_null) fit.setup_ncov0(Ny, last0, false); // preproc_cov = false\n  }\n\n  // store offset?\n  if(!inc_phen && approx_offset) fit.store_offset = true;\n  // apply offset?\n  if(use_offset) {\n    if(response == \"binom\") fit.setup_offset_binom(yo, false); // decrement_Nb = false\n    else if (response == \"multinom\") fit.setup_offset_multinom_pom(yo, yo_int);\n    else throw std::runtime_error(\"unknown response\");\n  }\n\n  // do model fitting & control the columns in XYR passed\n  if(response == \"binom\") {\n    if(use_offset) { \n      if(inc_phen_firth) {\n        /* fit.fit_binom(Mask, Ym, XYR.rightCols(Ny21).leftCols(Ny)); // matrix of phenotypes Y */ \n        fit.fit_binom(Mask, Ym, Yres0); // matrix of phenotypes Y \n      } else throw std::runtime_error(\"use offset for the null model\");\n    } else { \n      if(inc_cov) {\n        if(inc_phen_firth) fit.fit_binom(Mask, Ym, XYR.leftCols(Nx1 + Ny)); // X + Y + Intercept\n        else fit.fit_binom(Mask, Ym, XYR.leftCols(Nx1)); // X + Intercept\n      } else {\n        if(inc_phen_firth) fit.fit_binom(Mask, Ym, XYR.rightCols(Ny21).leftCols(Ny1)); // matrix of phenotypes Y + Intercept\n        else fit.fit_binom(Mask, Ym, XYR.leftCols(1)); // Intercept\n      }\n    }\n  } else if(response == \"multinom\") {\n    if(use_offset) {\n      if(inc_phen_firth) fit.fit_multinom_pom(Mask, Ym, XYR.rightCols(Ny21).leftCols(Ny)); // matrix of phenotypes Y \n      else throw std::runtime_error(\"use offset for the null model\");\n    } else {\n      if(inc_cov) {\n        if(inc_phen_firth) fit.fit_multinom_pom(Mask, Ym, XYR.leftCols(Nx1 + Ny).rightCols(Nx + Ny)); // X + Y\n        else fit.fit_multinom_pom(Mask, Ym, XYR.leftCols(Nx1).rightCols(Nx)); // X\n      } else {\n        if(inc_phen_firth) fit.fit_multinom_pom(Mask, Ym, XYR.rightCols(Ny1).leftCols(Ny)); // matrix of phenotypes Y \n        else fit.fit_multinom_pom(Mask, Ym, XYR.leftCols(0)); // 0 columns\n      }\n    }\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n\n  if(trace) {\n    cnt_updates += fit.cnt_updates;\n    it += fit.it;\n  }\n\n  return(fit);\n}\n\nvoid MultiPhen::run_test_addcov(const Eigen::Ref<const Eigen::MatrixXd> & XYR)\n{\n  run_test_score(XYR, false); // inc_cov = false\n  if(pval_test < pval_thr) {\n    run_test_lrt(XYR, true); // inc_cov = true\n  }\n}\n\nvoid MultiPhen::run_test_add_offset(const Eigen::Ref<const Eigen::MatrixXd> & XYR)\n{\n  run_test_score(XYR, false); // inc_cov = false\n  if(pval_test < pval_thr) {\n    run_test_offset(XYR);\n  }\n}\n\nvoid MultiPhen::run_test_offset(const Eigen::Ref<const Eigen::MatrixXd> & XYR)\n{\n  FitOrdinal null0, null, full;\n  VectorXd b0_fit;\n  double ll_null, ll_full;\n  boost::math::chi_squared dist(Ny);\n  double stat_lrt;\n  unsigned int i;\n\n  reset_model(); // reset model fit results\n                 \n  if(response == \"binom\") {\n    executed = true; \n\n    // fit null model\n    null0 = setup_fit(true, false, false); // inc_cov = true, inc_phen = false, use_offset = false\n    null0.store_offset = true;\n    null0.fit_binom(Mask, Ym, XYR.leftCols(Nx1)); // covariates X + Intercept\n\n    if(trace) { cnt_updates += null0.cnt_updates; it += null0.it; }\n\n    if(!null0.converged) return;\n\n    // store offset/weights from the null model\n    yo = null0.yo;\n    yo_int = null0.yo;\n    yo_int.array() -= null0.bhat(0); // substract intercept bhat\n    w0 = null0.wb;\n\n    // residualize phenotypes\n    Yres0 = XYR.rightCols(Ny21).leftCols(Ny); // matrix of phenotypes Y \n    ColPivHouseholderQR<MatrixXd> qrXw;\n    qrXw.compute(MatrixXd(Nx1, Nx1).setZero().selfadjointView<Lower>().rankUpdate((XYR.leftCols(Nx1).array().colwise() * w0.array().sqrt()).matrix().adjoint()));\n    Yres0 -= XYR.leftCols(Nx1).matrix() * qrXw.solve((XYR.leftCols(Nx1).array().colwise() * w0.array()).matrix().transpose() * Yres0);\n    for(i = 0; i < Yres0.cols(); i++) {\n      Yres0.col(i) = Mask.select(Yres0.col(i), 0.0);\n    }\n\n    // extract quantities from null model\n    VectorXd mub0 = yo;\n    exp_vector(mub0); \n    mub0.array() /= (1.0 + mub0.array()); \n\n    // fit full model\n    if(offset_mode == \"offset\") {\n      // full model: logit(g) = offset + Y beta\n      full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      full.Ncov = Ny; full.Nb = Ny; // overwrite Ncov, Nb\n      full.setup_offset_binom(yo, false); // decrement_Nb = false\n      full.fit_binom(Mask, Ym, Yres0); // Logistic phenotype residuals\n\n      if(!full.converged) return;\n      converged = true;\n\n      /* ll_null = 0.0; */ \n      /* ll_null += Ym.col(0).select((1.0 - mub0.array()).log(), 0.0).array().sum(); // controls */\n      /* ll_null += Ym.col(1).select(mub0.array().log(), 0.0).array().sum(); // cases */\n      ll_null = null.loglik_multinom(Mask, Ym); // depends on Y, P, Pk, Mask\n      if(firth_binom) {\n        MatrixXd null_Info = Yres0.transpose() * (Yres0.array().colwise() * w0.array()).matrix();\n        LLT<MatrixXd> llt_null(null_Info);\n        ll_null += llt_null.matrixL().toDenseMatrix().diagonal().array().log().sum();\n      }\n\n      ll_full = full.loglik;\n\n      stat_lrt = 2 * (ll_full - ll_null);\n      pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else if(offset_mode == \"offsetcov\") {\n      if(!firth_binom) throw std::runtime_error(\"offsetcov for firth_binom only\");\n\n      // null model: logit(g) = [offsetcov; Y] [beta0, betaY] wrt betaY = 0\n      MatrixXd Yres0_Int(N, Ny1);\n      Yres0_Int.leftCols(1) = Mask.select(yo_int, 0.0);\n      Yres0_Int.rightCols(Ny) = Yres0;\n\n      null = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      null.Ncov = Ny1; null.Nb = Ny1; // overwrite Ncov, Nb\n      null.setup_ncov0(Ny, true, false); // last0 = true, preproc_cov = false\n      null.fit_binom(Mask, Ym, Yres0_Int); // Logistic phenotype residuals\n\n      if(trace) { cnt_updates += null.cnt_updates; it += null.it; }\n\n      if(!null.converged) return;\n\n      // full model: logit(g) = [offset; Y] beta\n      full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      full.Ncov = Ny1; full.Nb = Ny1; // overwrite Ncov, Nb\n      full.fit_binom(Mask, Ym, Yres0_Int); // Logistic phenotype residuals\n\n      if(trace) { cnt_updates += full.cnt_updates; it += full.it; }\n\n      if(!full.converged) return;\n      converged = true;\n\n      stat_lrt = 2 * (full.loglik - null.loglik);\n      pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else if(offset_mode == \"offsetcov_int\") {\n      if(!firth_binom) throw std::runtime_error(\"offsetcov_int for firth_binom only\");\n\n      b0_fit.resize(2);\n      b0_fit << null0.bhat(0), 1.0;\n\n      // null model: logit(g) = [1, offsetcov; Y] [beta0, betaY] wrt betaY = 0\n      MatrixXd Yres0_Int(N, Ny1 + 1);\n      Yres0_Int.leftCols(1) = XYR.leftCols(1);\n      Yres0_Int.leftCols(2).rightCols(1) = Mask.select(yo_int, 0.0);\n      Yres0_Int.rightCols(Ny) = Yres0;\n\n      null = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      null.Ncov = Ny1 + 1; null.Nb = Ny1 + 1; // overwrite Ncov, Nb\n      null.setup_ncov0(Ny, true, false); // last0 = true, preproc_cov = false\n      null.setup_restart(b0_fit);\n      null.fit_binom(Mask, Ym, Yres0_Int); // Logistic phenotype residuals\n\n      if(trace) { cnt_updates += null.cnt_updates; it += null.it; }\n\n      if(!null.converged) return;\n\n      // full model: logit(g) = [1, offset; Y] beta\n      full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      full.Ncov = Ny1 + 1; full.Nb = Ny1; // overwrite Ncov, Nb\n      null.setup_restart(b0_fit);\n      full.fit_binom(Mask, Ym, Yres0_Int); // Logistic phenotype residuals\n\n      if(trace) { cnt_updates += full.cnt_updates; it += full.it; }\n\n      if(!full.converged) return;\n      converged = true;\n\n      stat_lrt = 2 * (full.loglik - null.loglik);\n      pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else if(offset_mode == \"offset_int\") {\n      if(!firth_binom) throw std::runtime_error(\"offset_int for firth_binom only\");\n\n      // null model: logit(g) = offset + [1; Y] [beta0, betaY] wrt betaY = 0\n      MatrixXd Yres0_Int(N, Ny1);\n      Yres0_Int.leftCols(1) = XYR.leftCols(1);\n      Yres0_Int.rightCols(Ny) = Yres0;\n\n      null = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      null.Ncov = Ny1; null.Nb = Ny1; // overwrite Ncov, Nb\n      null.setup_offset_binom(yo_int, false); // decrement_Nb = false\n      null.setup_ncov0(Ny, true, false); // last0 = true, preproc_cov = false\n      null.fit_binom(Mask, Ym, Yres0_Int); // Logistic phenotype residuals\n\n      if(trace) { cnt_updates += null.cnt_updates; it += null.it; }\n\n      if(!null.converged) return;\n\n      // full model: logit(g) = offset + [1; Y] beta\n      full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      full.Ncov = Ny1; full.Nb = Ny1; // overwrite Ncov, Nb\n      full.setup_offset_binom(yo_int, false); // decrement_Nb = false\n      full.fit_binom(Mask, Ym, Yres0_Int); // Logistic phenotype residuals\n\n      if(trace) { cnt_updates += full.cnt_updates; it += full.it; }\n\n      if(!full.converged) return;\n      converged = true;\n\n      stat_lrt = 2 * (full.loglik - null.loglik);\n      pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else {\n      throw std::runtime_error(\"unknown offset mode\");\n    }\n  } else if(response == \"multinom\") {\n    executed = true; \n\n    // fit null model\n    if(verbose) cout << \"fitting initial null model\" << endl;\n    null = setup_fit(true, false, false); // inc_cov = true, inc_phen = false, use_offset = false\n    null.store_offset = true;\n    null.fit_multinom_pom(Mask, Ym, XYR.leftCols(Nx1).rightCols(Nx)); // covariates X without Intercept\n\n    if(trace) { cnt_updates += null.cnt_updates; it += null.it; }\n\n    if(!null.converged) return;\n    if(verbose) cout << \"initial null converged\" << endl;\n\n    // store offset/weights from the null model\n    yo = null.yo;\n    yo_int = null.yo_int;\n\n    // !NB! not residuals\n    MatrixXd Yres0 = XYR.rightCols(Ny21).leftCols(Ny); // Phenotypes\n\n    if(offset_mode == \"offset\") {\n      // full model: logit(gamma) = offset + Y betaY\n      full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      full.Ncov = Ny; full.Nb = Ny; // overwrite Ncov, Nb\n      full.setup_offset_multinom_pom(yo, yo_int); // manually set up offset\n      full.exclude_intercepts = true; full.exclude_intercepts_offset = false;\n      full.fit_multinom_pom(Mask, Ym, Yres0);\n\n      if(trace) { cnt_updates += full.cnt_updates; it += full.it; }\n\n      if(!full.converged) return;\n      converged = true;\n\n      ll_null = null.loglik_multinom(Mask, Ym); // depends on Y, P, Pk, Mask\n      if(firth_multinom) {\n        MatrixXd null_Info = MatrixXd(Ny, Ny).setZero().selfadjointView<Lower>().\n          rankUpdate((Yres0.array().colwise() * null.WSS1.array()).matrix().adjoint());\n        LLT<MatrixXd> llt_null(null_Info);\n        ll_null += llt_null.matrixL().toDenseMatrix().diagonal().array().log().sum();\n      }\n                                                                         \n      stat_lrt = 2 * (full.loglik - ll_null);\n      pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else if(offset_mode == \"offset_int\") {\n      if(!firth_multinom) throw std::runtime_error(\"offset_int for firth_multinom only\");\n\n      b0_fit.resize(2);\n      b0_fit << yo_int;\n\n      // null model: logit(gamma) = offset + Y betaY wrt betaY = 0\n      null = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      null.Ncov = Ny; null.Nb = Ny + ncat1; // overwrite Ncov, Nb\n      null.setup_offset_multinom_pom(yo, yo_int); // manually set up offset\n      null.exclude_intercepts = false; null.exclude_intercepts_offset = true;\n      null.setup_ncov0(Ny, true, false); // last0 = true, preproc_cov = false\n      null.setup_restart(b0_fit);\n      null.fit_multinom_pom(Mask, Ym, Yres0);\n\n      if(trace) { cnt_updates += null.cnt_updates; it += null.it; }\n\n      if(!null.converged) return;\n      if(verbose) cout << \"null converged\" << endl;\n\n      // full model: logit(gamma) = offset + Y betaY\n      full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true\n      full.Ncov = Ny; full.Nb = Ny + ncat1; // overwrite Ncov, Nb\n      full.setup_offset_multinom_pom(yo, yo_int); // manually set up offset\n      full.exclude_intercepts = false; full.exclude_intercepts_offset = true;\n      full.setup_restart(b0_fit);\n      full.fit_multinom_pom(Mask, Ym, Yres0);\n\n      if(trace) { cnt_updates += full.cnt_updates; it += full.it; }\n\n      if(!full.converged) return;\n      converged = true;\n      if(verbose) cout << \"full converged\" << endl;\n\n      stat_lrt = 2 * (full.loglik - null.loglik);\n      pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n      if(verbose) cout << \"pval_test =  \" << pval_test << endl;\n    } else {\n      throw std::runtime_error(\"unknown offset mode\");\n\n      /* // residualize phenotypes */\n      /* // !NB! not implemented yet */\n\n      /* // full model */\n      /* full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true */\n      /* full.setup_offset_multinom_pom(yo, yo_int); // manually set up offset */\n      /* full.exclude_intercepts = true; */\n      /* full.Ncov = Ny; full.Nb = Ny; // overwrite Ncov, Nb */\n      /* full.fit_multinom_pom(Mask, Ym, XYR.rightCols(Ny21).leftCols(Ny)); // Phenotypes */\n      /* /1* if(offset_mode == \"offset\") { *1/ */\n      /* /1*   full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true *1/ */\n      /* /1*   full.setup_offset_multinom_pom(yo, yo_int); // manually set up offset *1/ */\n      /* /1*   full.exclude_intercepts = true; *1/ */\n      /* /1*   full.Ncov = Ny; full.Nb = Ny; // overwrite Ncov, Nb *1/ */\n      /* /1*   full.fit_multinom_pom(Mask, Ym, XYR.rightCols(Ny21).leftCols(Ny)); // Phenotypes *1/ */\n      /* /1* } else if(offset_mode == \"offset_int\") { *1/ */\n      /* /1*   full = setup_fit(false, true, true); // inc_cov = false, inc_phen = true, use_offset = true *1/ */\n      /* /1*   full.setup_offset_multinom_pom(yo, yo_int); // manually set up offset *1/ */\n      /* /1*   full.exclude_intercepts = false; *1/ */\n      /* /1*   full.Ncov = Ny; full.Nb = ncat1 + Ny; // overwrite Ncov, Nb *1/ */\n      /* /1*   full.fit_multinom_pom(Mask, Ym, XYR.rightCols(Ny21).leftCols(Ny)); // Phenotypes *1/ */\n      /* /1* } else { *1/ */\n      /* /1*   throw std::runtime_error(\"unknown offset mode\"); *1/ */\n      /* /1* } *1/ */\n\n      /* if(trace) { cnt_updates += full.cnt_updates; it += full.it; } */\n\n      /* if(!full.converged) return; */\n      /* converged = true; */\n\n      /* stat_lrt = 2 * (full.loglik - null.loglik); */\n      /* pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt)); */\n    }\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n\n  // store results\n  if(converged) {\n    bhat_y = full.bhat.tail(Ny);\n  }\n}\n\nvoid MultiPhen::run_test_qt(const Eigen::Ref<const Eigen::MatrixXd> & XYR)\n{\n  reset_model(); // reset model fit results\n\n  if(response == \"binom\") {\n    executed = true; \n    converged = true; \n    VectorXd beta_qt = XYR.leftCols(Nx1).transpose() * yb;\n    // residualize\n    VectorXd y_qt = yb - XYR.leftCols(Nx1) * beta_qt;\n    VectorXd x_qt = XYR.leftCols(Nx1 + 1).rightCols(1);\n    // regression\n    /* VectorXd bhat_qt = (y_qt.transpose() * x_qt) / x2; */\n    /* bhat = (Y.col(i).transpose() * G).array().rowwise() / G2.array().transpose(); */\n    /* /1* B.row(i) = bhat; *1/ */\n    /* // residuals, s2 */\n    /* s2 = (((G.array().rowwise() * bhat.array().transpose()). // predicted yp = X bhat */\n    /*   colwise() - Y.col(i).array()). // residuals = y - yp */\n    /*   matrix().colwise().squaredNorm()). // residuals^2 */\n    /*   array() / (N_data - 1.0); // s2 = residuals^2 / (N - 1) */\n    /* Z.row(i) = bhat.array() * (G2.array() / s2.array()).sqrt(); */\n    \n    /* // regression */\n    /* bhat = (Y.col(i).transpose() * G).array().rowwise() / G2.array().transpose(); */\n    /* /1* B.row(i) = bhat; *1/ */\n    /* // residuals, s2 */\n    /* s2 = (((G.array().rowwise() * bhat.array().transpose()). // predicted yp = X bhat */\n    /*   colwise() - Y.col(i).array()). // residuals = y - yp */\n    /*   matrix().colwise().squaredNorm()). // residuals^2 */\n    /*   array() / (N_data - 1.0); // s2 = residuals^2 / (N - 1) */\n    /* Z.row(i) = bhat.array() * (G2.array() / s2.array()).sqrt(); */\n\n\n    /* yb */ \n  /* pval_test = test_score(null, Mask, Ym, yb, XYR, inc_cov); */ \n  } else {\n    return;\n  }\n\n}\n\nvoid MultiPhen::run_test_lrt(const Eigen::Ref<const Eigen::MatrixXd> & XYR, bool inc_cov)\n{\n  reset_model(); // reset MultiPhen model fit results\n  executed = true; \n  \n  FitOrdinal null, full;\n\n  if(reuse_start & !approx_offset) {\n    if(!inc_cov) throw std::runtime_error(\"reuse_start in not available for inc_cov = false\");\n    /* if(approx_offset) throw std::runtime_error(\"reuse_start is not compatible with approx_offset\"); */\n\n    // null model: logit(g) = X alpha \n    null = fit(XYR, inc_cov, false); // inc_cov, inc_phen = false\n    if(!null.converged) return;\n\n    b0 = null.bhat;\n\n    // full model: logit(g) = X alpha + Y beta\n    full = fit(XYR, inc_cov, true); // inc_cov, inc_phen = true\n    // give another chance if reuse_start & reset_start\n    if(reset_start) {\n      reuse_start = false;\n      full = fit(XYR, inc_cov, true); // inc_cov, inc_phen = true\n    }\n    if(!full.converged) return;\n\n    converged = true;\n    boost::math::chi_squared dist(Ny);\n    double stat_lrt = 2 * (full.loglik - null.loglik);\n    pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n  } else if(approx_offset && response == \"binom\") {\n    // null model: logit(g) = X alpha \n    null = fit(XYR, inc_cov, false); // inc_cov, inc_phen = false\n    if(!null.converged) return;\n\n    // store offset/weights from the null mode \n    yo = null.yo;\n    w0 = null.wb;\n\n    Yres0 = XYR.rightCols(Ny21).leftCols(Ny); // matrix of phenotypes Y \n    ColPivHouseholderQR<MatrixXd> qrXw;\n    qrXw.compute(MatrixXd(Nx1, Nx1).setZero().selfadjointView<Lower>().rankUpdate((XYR.leftCols(Nx1).array().colwise() * w0.array().sqrt()).matrix().adjoint()));\n    Yres0 -= XYR.leftCols(Nx1).matrix() * qrXw.solve((XYR.leftCols(Nx1).array().colwise() * w0.array()).matrix().transpose() * Yres0);\n\n    // full model: logit(g) = X alpha + Y beta\n    full = fit(XYR, inc_cov, true); // inc_cov, inc_phen = true\n    if(!full.converged) return;\n    converged = true;\n\n    // problem: null.mub is not at scale [0, 1]\n    /* cout << \"null.mub = \" << null.mub.head(5).transpose() << endl; */\n    VectorXd mub = null.yo;\n    exp_vector(mub); // mub <- exp(mub)\n    mub.array() /= (1.0 + mub.array()); // mub <- exp(mub) / (1 + exp(mub))\n                                        //\n    double ll_null = null.loglik_binom(Mask, Ym);\n    /* double ll_null = 0.0; */ \n    /* ll_null += Ym.col(0).select((1.0 - mub.array()).log(), 0.0).array().sum(); // controls */\n    /* ll_null += Ym.col(1).select(mub.array().log(), 0.0).array().sum(); // cases */\n    if(firth_binom) {\n      MatrixXd null_Info = Yres0.transpose() * (Yres0.array().colwise() * w0.array()).matrix();\n      LLT<MatrixXd> llt_null(null_Info);\n      ll_null += llt_null.matrixL().toDenseMatrix().diagonal().array().log().sum();\n    }\n\n    double ll_full;\n    if(full.firth_binom) {\n      LLT<MatrixXd> llt_full(full.Info);\n      ll_full = full.loglik_binom_firth(Mask, Ym, llt_full);\n    } else {\n      ll_full = full.loglik_binom(Mask, Ym);\n    }\n\n    boost::math::chi_squared dist(Ny);\n    /* double stat_lrt = 2 * (full.loglik - null.loglik); */\n    double stat_lrt = 2 * (ll_full - ll_null);\n    pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n  } else if(approx_offset && response == \"multinom\") {\n    // null model\n    null = fit(XYR, inc_cov, false); // inc_cov, inc_phen = false\n    if(!null.converged) return;\n\n    // store offset vectors\n    yo = null.yo;\n    yo_int = null.yo_int;\n\n    // full model: logit(g) = X alpha + Y beta\n    full = fit(XYR, inc_cov, true); // inc_cov, inc_phen = true\n    if(!full.converged) return;\n    converged = true;\n\n    boost::math::chi_squared dist(Ny);\n    double stat_lrt = 2 * (full.loglik - null.loglik);\n    /* cout << \"stat_lrt = \" << stat_lrt << \" full.loglik = \" << full.loglik << \" null.loglik = \" << null.loglik << endl; */\n    pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n  } else {\n    // null model: logit(g) = X alpha \n    null = fit(XYR, inc_cov, false); // inc_cov, inc_phen = false\n    if(!null.converged) return;\n\n    // full model: logit(g) = X alpha + Y beta\n    full = fit(XYR, inc_cov, true); // inc_cov, inc_phen = true\n    if(!full.converged) return;\n\n    converged = true;\n    boost::math::chi_squared dist(Ny);\n    double stat_lrt = 2 * (full.loglik - null.loglik);\n    /* cout << \" lrt = \" << stat_lrt << \" = \" << full.loglik << \" - \" << null.loglik << endl; */\n    pval_test = (stat_lrt < 0) ? 1 : boost::math::cdf(boost::math::complement(dist, stat_lrt));\n  }\n  // store results\n  if(converged) {\n    bhat_y = full.bhat.tail(Ny);\n  }\n}\n\nvoid MultiPhen::run_test_score(const Eigen::Ref<const Eigen::MatrixXd> & XYR, bool inc_cov)\n{\n  bool _firth_binom = firth_binom, _firth_multinom = firth_multinom, _approx_offset = approx_offset;\n  firth_binom = false; firth_multinom = false; approx_offset = false;\n \n  reset_model(); // reset model fit results\n  executed = true; \n\n  FitOrdinal null = fit(XYR, inc_cov, false); // inc_cov, inc_phen = false\n  if(!null.converged) { return; }\n\n  converged = true; \n  if(trace) { cnt_updates += null.cnt_updates; it += null.it; }\n  pval_test = test_score(null, Mask, Ym, yb, XYR, inc_cov); \n\n  firth_binom = _firth_binom; firth_multinom = _firth_multinom; approx_offset = _approx_offset;\n}\n\nvoid MultiPhen::setup_x(const VectorXb & _Mask,  const Eigen::MatrixXd& XYR, unsigned int n_cov, unsigned int n_phen, \n    bool _pos_intercept_first, bool _pos_phen_first)\n{\n  // check\n  if(XYR.cols() != 2 + n_cov + 2*n_phen) throw std::runtime_error(\"setup_x: dimensions XYR\");\n  if(XYR.rows() != _Mask.size()) throw std::runtime_error(\"setup_x: dimensions XYR and Mask\");\n  // extract dimensions from XYR\n  N = XYR.rows();\n  /* Ncov = n_cov; // Nb = ncat1 + Ncov, where ncat1 depend on g */\n  Nx = n_cov; Nx1 = n_cov + 1; Ny = n_phen; Ny1 = n_phen + 1; Ny21 = Ny1 + n_phen;\n  pos_intercept_first = _pos_intercept_first;\n  pos_phen_first = _pos_phen_first;\n  // Mask\n  Mask = _Mask; // VectorXb::Constant(N, true);\n  Neff = Mask.array().cast<double>().sum();\n  // update status\n  set_x = true;\n}\n\nvoid MultiPhen::reset_model()\n{\n  executed = false; converged = false;\n  pval_test = -1.0;\n  it = 0; cnt_updates = 0;\n}\n\nvoid MultiPhen::setup_approx_offset()\n{\n  if(!set_y) throw std::runtime_error(\"setup_approx_offset: set_y is false\");\n\n  if(mac_approx_offset == 0) {\n    approx_offset = false;\n  } else if(mac_approx_offset == 1) {\n    approx_offset = true;\n  } else if(mac_approx_offset > 1) {\n    if(Ncat_minor <= mac_approx_offset) approx_offset = false;\n    else approx_offset = true;\n  }\n}\n\nvoid MultiPhen::setup_y(const Eigen::VectorXd & _g)\n{\n  // Eigen::VectorXi g = _g.cast<int>(); // 1.6 -> 1\n  Eigen::VectorXi g = _g.array().round().cast<int>(); // 1.6 -> 2\n\n  unsigned int i;\n  std::set<int> genotypes; // ordered (!) set of category levels\n  set<int>::iterator it_set;\n\n  // checks\n  if(N == 0) throw std::runtime_error(\"setup_y: N == 0\");\n  if(g.size() != N) throw std::runtime_error(\"setup_y: g.size() != N\");\n\n  // assign category levels \n  for(i = 0; i < g.size(); i++) if(Mask(i)) genotypes.insert(g[i]);\n\n  // check genotypes levels: 0/1 or 0/1/2\n  /* for(i = 0, it_set = genotypes.begin(); i < genotypes.size(); i++, it_set++) cout << \"genotypes \" << i << \" = \" << *it_set << endl; */\n  /* cout << \"genotypes.size() = \" << genotypes.size() << endl; */\n  if(genotypes.size() == 1) {\n    /* cerr << \"WARNING: number of genotype categories is 1\" << endl; */\n    return;\n  }\n  if(!(genotypes.size() == 2 || genotypes.size() == 3)) throw std::runtime_error(\"setup_y: number of genotype categories must be 2 or 3\");\n\n  // assign ncat, ncat1\n  ncat = genotypes.size();\n  ncat1 = ncat - 1; ncat1sq = ncat1 * ncat1;\n  \n  // assign response\n  if(ncat == 2) response = \"binom\";\n  else if(ncat == 3) response = \"multinom\";\n  else throw std::runtime_error(\"setup_y: unexpected number of genotype categories\");\n\n  // assign Ncov, Nb\n  /* Nb = ncat1 + Ncov; */\n\n  // assign Ym\n  Ym.resize(N, ncat);\n  Ncat = VectorXi::Constant(ncat, 0);\n  Ncat_minor = 0;\n  int Ncat_max = 0;\n  // loop over a a few genotype categories\n  for(i = 0, it_set = genotypes.begin(); i < ncat; i++, it_set++) {\n    Ym.col(i) = Mask.select(g.array() == *it_set, false);\n    /* Ym.col(i) = (g.array() == *it_set); */\n    /* Ym.col(i) = Mask.select(Ym.col(i), false); */\n    Ncat(i) = Ym.col(i).cast<int>().sum();\n    // get the maximum value in Ncat & minor counts in Ncat (all except the maximum)\n    if(Ncat(i) > Ncat_max) Ncat_max = Ncat(i);\n    Ncat_minor += Ncat(i);\n  }\n  Ncat_minor -= Ncat_max;\n  // assign yb if binomial\n  if(response == \"binom\") {\n    yb = Ym.col(1).cast<double>(); // booleans -> 0/1\n  }\n  // update status\n  set_y = true;\n}\n\nvoid MultiPhen::test0(const Eigen::VectorXi & g, const Eigen::MatrixXd& X, const Eigen::MatrixXd& Y,\n      bool firth_binom,\n      std::string optim, double tol, unsigned int maxit, bool check_step, double max_step)\n{\n  executed = true;\n  converged = false;\n  pval_test = -1.0;\n\n  FitOrdinal fit, fit1;\n  \n  // set up Ordinal model (no Firth)\n  Ordinal ord;\n  ord.optim = optim; ord.tol = tol; ord.pseudo_stophalf = pseudo_stophalf; ord.maxit = maxit;\n  ord.check_step = check_step; ord.max_step = max_step;\n  ord.firth_binom = false;\n  \n  // fit null model\n  fit = ord.fit(g, X);\n  if(!fit.converged) { return; }\n\n  // run Score test\n  converged = fit.converged;\n  pval_test = ord.test_score(fit, Y);\n\n  // run LRT test (if needed)\n  if(pval_test < pval_thr) {\n    pval_test = -1.0;\n    converged = false;\n\n    // prepare new matrix of covariates X + Y\n    MatrixXd X1(Y.rows(), X.cols() + Y.cols());\n    if(X.cols()) {\n      X1.leftCols(X.cols()) = X;\n    }\n    X1.rightCols(Y.cols()) = Y;\n\n    if(firth_binom & (ord.response == \"binom\")) {\n      ord.firth_binom = firth_binom;\n\n      // fit null model (Firth) for LRT\n      fit = ord.fit(g, X1, Y.cols());\n      if(!fit.converged) { return; }\n\n      // fit alternative model (Firth)\n      fit1 = ord.fit(g, X1);\n      if(!fit1.converged) { return; }\n      converged = fit1.converged;\n\n      boost::math::chi_squared dist(Y.cols());\n      double stat_lrt = 2 * (fit1.loglik - fit.loglik);\n      pval_test = boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else {\n      // fit alternative model (no Firth)\n      fit1 = ord.fit(g, X1);\n      if(!fit1.converged) { return; }\n      converged = fit1.converged;\n\n      boost::math::chi_squared dist(Y.cols());\n      double stat_lrt = 2 * (fit1.loglik - fit.loglik);\n      pval_test = boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    }\n  }\n}\n\nvoid MultiPhen::test_addcov(const Eigen::VectorXi & g, const Eigen::MatrixXd& X, const Eigen::MatrixXd& Y,\n      bool firth_binom,\n      std::string optim, double tol, unsigned int maxit, bool check_step, double max_step)\n{\n  executed = true;\n  converged = false;\n  pval_test = -1.0;\n\n  FitOrdinal fit, fit1;\n  MatrixXd X0;\n  \n  // set up Ordinal model (no Firth)\n  Ordinal ord;\n  ord.optim = optim; ord.tol = tol; ord.pseudo_stophalf = pseudo_stophalf; ord.maxit = maxit;\n  ord.check_step = check_step; ord.max_step = max_step;\n  ord.firth_binom = false;\n  \n  // fit null model\n  fit = ord.fit(g, X0);\n  if(!fit.converged) { return; }\n\n  // run Score test\n  converged = fit.converged;\n  pval_test = ord.test_score(fit, Y);\n\n  // run LRT test (if needed)\n  if(pval_test < pval_thr) {\n    pval_test = -1.0;\n    converged = false;\n\n    // null model (with covariates)\n    fit = ord.fit(g, X);\n    if(!fit.converged) { return; }\n\n    // prepare new matrix of covariates X + Y\n    MatrixXd X1(Y.rows(), X.cols() + Y.cols());\n    if(X.cols()) {\n      X1.leftCols(X.cols()) = X;\n    }\n    X1.rightCols(Y.cols()) = Y;\n\n    if(firth_binom & (ord.response == \"binom\")) {\n      ord.firth_binom = firth_binom;\n\n      // re-fit null model (Firth)\n      fit = ord.fit(g, X);\n      if(!fit.converged) { return; }\n\n      // fit alternative model (Firth)\n      fit1 = ord.fit(g, X1);\n      if(!fit1.converged) { return; }\n      converged = fit1.converged;\n\n      boost::math::chi_squared dist(Y.cols());\n      double stat_lrt = 2 * (fit1.loglik - fit.loglik);\n      pval_test = boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    } else {\n      // fit alternative model (no Firth)\n      fit1 = ord.fit(g, X1);\n      if(!fit1.converged) { return; }\n      converged = fit1.converged;\n\n      boost::math::chi_squared dist(Y.cols());\n      double stat_lrt = 2 * (fit1.loglik - fit.loglik);\n      pval_test = boost::math::cdf(boost::math::complement(dist, stat_lrt));\n    }\n  }\n}\n\n//------------------------\n// Class FitOrdinal\n//------------------------\n\nvoid FitOrdinal::setup_defaults()\n{ \n  cnt_fit = 0;\n  verbose = 0;\n  // model parameters\n  N = 0; Neff = 0; // sample size\n  Ncov = 0, Nb = 0, Ncov0 = 0; Ncov1 = 0; // number of covariates \n  ncat = 0, ncat1 = 0, ncat1sq = 0; // number of categories \n  \n  firth_binom = false;\n  firth_mult = 1.0;\n\n  apply_start = false;\n  store_offset = false;\n  apply_offset = false;\n  exclude_intercepts = false;\n\n  // model fitting results\n  executed = false; converged = false;\n  trace = false;\n  it = 0; maxit = 0; cnt_updates = 0;\n\n}\n\nFitOrdinal::FitOrdinal()\n{ \n  setup_defaults();\n}\n\n//-----------------------------\n//  Class FitOrdinal: Checkers\n//-----------------------------\n\nvoid FitOrdinal::check_setup_model()\n{\n  if(verbose >= 2) {\n    cout << \"check_setup_model\" << endl;\n    cout << \" --  N = \" << N << \" Neff = \" << Neff << \" Nb = \" << Nb << \" Ncov = \" << Ncov \n      << \" Ncov0 = \" << Ncov0 << \" Ncov1 = \" << Ncov1\n      << \" apply_start = \" << apply_start << \" apply_offset = \" << apply_offset << \" store_offset = \" << store_offset << \" exclude_intercepts = \" << exclude_intercepts << \"exclude_intercepts_offset = \" << exclude_intercepts_offset \n      << \" firth_multinom = \" << firth_multinom << \" firth_binom = \" << firth_binom << \" firth_mult = \" << firth_mult << \" check_step = \" << check_step << \" max_step = \" << max_step \n      << \" maxit = \" << maxit << \" maxit2 = \" << maxit2 << \" maxit3 = \" << maxit3\n      << endl;\n    cout << \" Ncat = \" << Ncat << endl;\n  }\n\n  check_setup_model_common();\n}\n\nvoid FitOrdinal::check_setup_model_common()\n{\n  if(N == 0) { throw std::runtime_error(\"check_setup_model: N == 0\"); }\n  if(Neff == 0) { throw std::runtime_error(\"check_setup_model: Neff == 0\"); }\n  if(Nb == 0) { throw std::runtime_error(\"check_setup_model: Nb == 0\"); }\n  if(ncat == 0) { throw std::runtime_error(\"check_setup_model: ncat == 0\"); }\n}\n\nvoid FitOrdinal::check_setup_data()\n{\n  if(verbose >= 2) cout << \"check_setup_data\\n\";\n\n  check_setup_data_common();\n\n  if(response == \"multinom\") {\n    check_setup_data_multinom();\n  } else if(response == \"binom\") {\n    check_setup_data_binom();\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n}\n\nvoid FitOrdinal::check_setup_data_common()\n{\n  /* cur_Score.resize(Nb); */\n  /* cur_Info.resize(Nb, Nb); */\n  /* cur_v.resize(Nb); cur_b.resize(Nb); */\n}\n\nvoid FitOrdinal::check_setup_data_multinom()\n{\n}\n\nvoid FitOrdinal::check_setup_data_binom()\n{\n  if(mub.size() != N) { throw std::runtime_error(\"check_setup_model: mub.size() != N\"); }\n  if(wb.size() != N) { throw std::runtime_error(\"check_setup_model: wb.size() != N\"); }\n  /* XtW.resize(Nb, N); */\n}\n\n//-----------------------------\n//  Class Ordinal: Constructors\n//-----------------------------\n\nOrdinal::Ordinal() \n{ \n  setup_defaults();\n}\n\nvoid Ordinal::setup_defaults() \n{\n  response = \"multinom\";\n  optim = \"WeightHalving\";\n  firth_binom = false; firth_multinom = false;\n\n  maxit = 100; maxit2 = 7; maxit3 = 25;\n  it2 = 0; strict = false;\n  tol = 1e-4; pseudo_stophalf = 0.0;\n\n  check_step = false;\n  max_step = 10.0;\n\n  preproc_cov = false;\n\n  cur_dev = 0; prev_dev = 0;\n}\n\n// constructor for FitOrdinal\n// - copy model parameters to FitOrdinal object\nFitOrdinal Ordinal::setup_fit()\n{\n  FitOrdinal fit;\n\n  // copy parameters from Ordinal\n  fit.response = response; // response type = [binom, multinom]\n  fit.model = model; // model = [POM: Proportional Odds Model, ACL: Adjacent Category Logit]\n  fit.optim = optim; // optimization algorithm = [FisherScoring, WeightHalving]\n  fit.firth_binom = firth_binom; fit.firth_multinom = firth_multinom; // Firth correction\n      \n  fit.maxit = maxit; fit.maxit2 = maxit2; fit.maxit3 = maxit3; fit.strict = strict;\n  fit.tol = tol; fit.pseudo_stophalf = pseudo_stophalf;\n\n  fit.check_step = check_step;\n  fit.max_step = max_step;\n\n  fit.N = N; fit.Neff = Neff; // samples size\n  fit.Ncov = Ncov; fit.Nb = Nb; // number of covariates\n                    \n  fit.ncat = ncat; // number of categories\n  fit.ncat1 = ncat1; fit.ncat1sq = ncat1sq;\n  fit.Ncat = Ncat;\n\n  fit.cur_dev = 0; fit.prev_dev = 0;\n  fit.it = 0; fit.it2 = 0;\n\n  return(fit);\n}\n\n//--------------------------\n//  MultiPhen: Score Test\n//--------------------------\n\ndouble MultiPhen::test_score(const FitOrdinal & null, \n    const VectorXb & Mask, const MatrixXb & Ym, const Eigen::VectorXd & yb, \n    const Eigen::Ref<const Eigen::MatrixXd> & XYR, bool inc_cov)\n{\n  double pval;\n  if(response == \"multinom\") {\n    if(inc_cov) pval = test_score_multinom_pom(null, Mask, Ym, XYR.leftCols(Nx1).rightCols(Nx), XYR.rightCols(Ny21).leftCols(Ny)); // covariates X (no intercept); phenotypes Y\n    else pval = test_score_multinom_pom(null, Mask, Ym, XYR.leftCols(0), XYR.rightCols(Ny21).leftCols(Ny)); // 0 covarites (no intercept); phenotypes Y\n  } else if(response == \"binom\") {\n    if(inc_cov) pval = test_score_binom(null, Mask, yb, XYR.leftCols(Nx1).rightCols(Nx), XYR.rightCols(Ny21).leftCols(Ny)); // covariates X (no intercept); phenotypes Y\n    else pval = test_score_binom(null, Mask, yb, XYR.leftCols(0), XYR.rightCols(Ny21).leftCols(Ny)); // 0 covarites (no intercept); phenotypes Y\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n  return(pval);\n}\n\ndouble MultiPhen::test_score_multinom_pom(const FitOrdinal & null, \n    const VectorXb & Mask, const MatrixXb & Ym, \n    const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::Ref<const Eigen::MatrixXd> & G)\n{\n  unsigned int k;\n  unsigned int Ng = G.cols(); // l = ncol(G), p = Nb, m = ncat1;\n\n  // check dimensions\n  if(G.cols() == 0) throw std::runtime_error(\"#cols in G is 0\");\n  if(G.rows() == 0) throw std::runtime_error(\"#rows in G is 0\");\n  if(G.rows() != N) throw std::runtime_error(\"#rows in G is different from N\");\n  if(Ym.rows() != N) throw std::runtime_error(\"#rows in Ym is different from N\");\n  if(X.cols() != null.Ncov) throw std::runtime_error(\"#cols in X != null.Ncov (test_score_multinom_pom)\");\n\n  // Score vector with l elements\n  VectorXd Score1 = ((null.V).transpose() * G).colwise().sum();\n  \n  // pre-compute\n  MatrixXd GW = G.transpose() * null.W;\n\n  // Info matrix\n  // 1x1 block of size pxp\n  // P = null.Score\n  // 1x2 block = W = p x m matrix\n  MatrixXd Info1_W(null.Nb, Ng);\n  // fill in part 1 of Info1_W: first ncat1 rows\n  for(k = 0; k < ncat1; k++) {\n    MatrixXd GWs = GW(all, seqN(k, ncat1, ncat1));\n    VectorXd GW1 = GWs.rowwise().sum();\n    Info1_W.row(k) = GW1.array();\n  }\n  // fill in part 2 of Info1_W: last Ncov rows\n  if(null.Ncov) {\n    MatrixXd GW12 = (X.array().colwise() * (null.WSS1).array()).matrix().transpose() * \n      (G.array().colwise() * (null.WSS1).array()).matrix();\n    Info1_W(seqN(ncat1, null.Ncov), all) = GW12;\n  }\n\n  // Info1_Q\n  MatrixXd Info1_Q = MatrixXd(Ng, Ng).setZero().selfadjointView<Lower>().\n      rankUpdate((G.array().colwise() * (null.WSS1).array()).matrix().adjoint());\n\n  // Variance matrix V of the scire Score1\n  // V = (Q - W' Info0^{-1} W\n  LLT<MatrixXd> llt_Info0(null.Info);\n  MatrixXd Var_Score1 = (Info1_Q - (Info1_W.transpose() * llt_Info0.solve(Info1_W)));\n\n  // Test statistic = Score1' Var_Score1^{-1} Score1\n  LLT<MatrixXd> llt_Var(Var_Score1);\n  double stat_score = Score1.transpose() * llt_Var.solve(Score1);\n  \n  // R: pchisq(stat_score, Ng, lower = FALSE)\n  boost::math::chi_squared dist(Ng);\n  double pval = boost::math::cdf(boost::math::complement(dist, stat_score));\n\n  return(pval);\n}\n\ndouble MultiPhen::test_score_binom(const FitOrdinal & null, \n    const VectorXb & Mask, const Eigen::VectorXd & yb, \n    const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::Ref<const Eigen::MatrixXd> & G)\n{\n  unsigned int Ng = G.cols(); // l = ncol(G), p = Nb, m = ncat1;\n\n  // check dimensions\n  if(G.cols() == 0) throw std::runtime_error(\"#cols in G is 0\");\n  if(G.rows() == 0) throw std::runtime_error(\"#rows in G is 0\");\n  if(G.rows() != N) throw std::runtime_error(\"#rows in G is different from N\");\n  if(yb.size() != N) throw std::runtime_error(\"size of yb is different from N\");\n  if(X.cols() != null.Ncov) throw std::runtime_error(\"#cols in X != null.Ncov (test_score_binom)\");\n\n  // Score vector with Ng elements\n  VectorXd Score1 = G.transpose() * (yb - null.mub);\n  \n  // Info matrix \n  // Info1_W\n  MatrixXd Info1_W(null.Nb, Ng);\n  Info1_W.row(0) = (G.array().colwise() * (null.wb).array()).colwise().sum();\n  if(null.Ncov) {\n    Info1_W(seqN(1, null.Ncov), all) = X.transpose() * (G.array().colwise() * (null.wb).array()).matrix();\n  }\n\n  // Info1_Q\n  MatrixXd Info1_Q = MatrixXd(Ng, Ng).setZero().selfadjointView<Lower>().\n      rankUpdate((G.array().colwise() * (null.wb).array().sqrt()).matrix().adjoint());\n\n  // Variance matrix V of the scire Score1\n  // V = (Q - W' Info0^{-1} W\n  LLT<MatrixXd> llt_Info0(null.Info);\n  MatrixXd Var_Score1 = (Info1_Q - (Info1_W.transpose() * llt_Info0.solve(Info1_W)));\n\n  // Test statistic = Score1' Var_Score1^{-1} Score1\n  LLT<MatrixXd> llt_Var(Var_Score1);\n  double stat_score = Score1.transpose() * llt_Var.solve(Score1);\n  \n  // R: pchisq(stat_score, Ng, lower = FALSE)\n  boost::math::chi_squared dist(Ng);\n  double pval = boost::math::cdf(boost::math::complement(dist, stat_score));\n\n  return(pval);\n}\n\n//------------------\n//  Score Test\n//------------------\n\ndouble Ordinal::test_score(const FitOrdinal & null, const Eigen::MatrixXd & G)\n{\n  double pval;\n  if(response == \"multinom\") {\n    pval = test_score_multinom_pom(null, Xcov, G);\n  } else if(response == \"binom\") {\n    pval = test_score_binom(null, Xcov, G);\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n  return(pval);\n}\n\ndouble Ordinal::test_score(const FitOrdinal & null, const Eigen::MatrixXd & X, const Eigen::MatrixXd & G)\n{\n  double pval;\n  if(response == \"multinom\") {\n    pval = test_score_multinom_pom(null, X, G);\n  } else if(response == \"binom\") {\n    pval = test_score_binom(null, X, G);\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n\n  return(pval);\n}\n\ndouble Ordinal::test_score_binom(const FitOrdinal & null, const Eigen::MatrixXd & X, const Eigen::MatrixXd & G)\n{\n  unsigned int Ng = G.cols(); // l = ncol(G), p = Nb, m = ncat1;\n\n  // check dimensions\n  if(G.cols() == 0) throw std::runtime_error(\"#cols in G is 0\");\n  if(G.rows() == 0) throw std::runtime_error(\"#rows in G is 0\");\n  if(G.rows() != N) throw std::runtime_error(\"#rows in G is different from N\");\n\n  // Score vector with l elements\n  VectorXd Score1 = G.transpose() * (yb - null.mub);\n  \n  // Info matrix \n  // Info1_W\n  MatrixXd Info1_W(Nb, Ng);\n  Info1_W.row(0) = (G.array().colwise() * (null.wb).array()).colwise().sum();\n  if(Ncov) {\n    Info1_W(seqN(1, Ncov), all) = X.transpose() * (G.array().colwise() * (null.wb).array()).matrix();\n  }\n  // Info1_Q\n  MatrixXd Info1_Q = MatrixXd(Ng, Ng).setZero().selfadjointView<Lower>().\n      rankUpdate((G.array().colwise() * (null.wb).array().sqrt()).matrix().adjoint());\n\n  // Variance matrix V of the scire Score1\n  // V = (Q - W' Info0^{-1} W\n  LLT<MatrixXd> llt_Info0(null.Info);\n  MatrixXd Var_Score1 = (Info1_Q - (Info1_W.transpose() * llt_Info0.solve(Info1_W)));\n\n  // Test statistic = Score1' Var_Score1^{-1} Score1\n  LLT<MatrixXd> llt_Var(Var_Score1);\n  double stat_score = Score1.transpose() * llt_Var.solve(Score1);\n  \n  // R: pchisq(stat_score, Ng, lower = FALSE)\n  boost::math::chi_squared dist(Ng);\n  double pval = boost::math::cdf(boost::math::complement(dist, stat_score));\n\n  return(pval);\n}\n\ndouble Ordinal::test_score_multinom_pom(const FitOrdinal & null, const Eigen::MatrixXd & X, const Eigen::MatrixXd & G)\n{\n  unsigned int k;\n  unsigned int Ng = G.cols(); // l = ncol(G), p = Nb, m = ncat1;\n\n  // check dimensions\n  if(G.cols() == 0) throw std::runtime_error(\"#cols in G is 0\");\n  if(G.rows() == 0) throw std::runtime_error(\"#rows in G is 0\");\n  if(G.rows() != N) throw std::runtime_error(\"#rows in G is different from N\");\n\n  // Score vector with l elements\n  VectorXd Score1 = ((null.V).transpose() * G).colwise().sum();\n  \n  // pre-compute\n  MatrixXd GW = G.transpose() * null.W;\n\n  // Info matrix\n  // 1x1 block of size pxp\n  // P = null.Score\n  // 1x2 block = W = p x m matrix\n  MatrixXd Info1_W(Nb, Ng);\n  // fill in part 1 of Info1_W: first ncat1 rows\n  for(k = 0; k < ncat1; k++) {\n    MatrixXd GWs = GW(all, seqN(k, ncat1, ncat1));\n    VectorXd GW1 = GWs.rowwise().sum();\n    Info1_W.row(k) = GW1.array();\n  }\n  // fill in part 2 of Info1_W: last Ncov rows\n  if(Ncov) {\n    MatrixXd GW12 = (X.array().colwise() * (null.WSS1).array()).matrix().transpose() * \n      (G.array().colwise() * (null.WSS1).array()).matrix();\n    Info1_W(seqN(ncat1, Ncov), all) = GW12;\n  }\n\n  // Info1_Q\n  MatrixXd Info1_Q = MatrixXd(Ng, Ng).setZero().selfadjointView<Lower>().\n      rankUpdate((G.array().colwise() * (null.WSS1).array()).matrix().adjoint());\n\n  // Variance matrix V of the scire Score1\n  // V = (Q - W' Info0^{-1} W\n  LLT<MatrixXd> llt_Info0(null.Info);\n  MatrixXd Var_Score1 = (Info1_Q - (Info1_W.transpose() * llt_Info0.solve(Info1_W)));\n\n  // Test statistic = Score1' Var_Score1^{-1} Score1\n  LLT<MatrixXd> llt_Var(Var_Score1);\n  double stat_score = Score1.transpose() * llt_Var.solve(Score1);\n  \n  // R: pchisq(stat_score, Ng, lower = FALSE)\n  boost::math::chi_squared dist(Ng);\n  double pval = boost::math::cdf(boost::math::complement(dist, stat_score));\n\n  return(pval);\n}\n\n//------------------\n//  Set up\n//------------------\n\nvoid Ordinal::setup_xy(const Eigen::VectorXi &y, const Eigen::MatrixXd& X)\n{\n  unsigned int i;\n  set<int>::iterator it_set;\n\n  // assign N\n  N = y.size();\n  // assign category levels \n  for(i = 0; i < y.size(); i++) {\n    cat.insert(y[i]);\n  }\n  // assign ncat, ncat1\n  ncat = cat.size();\n  ncat1 = ncat - 1;\n  ncat1sq = ncat1 * ncat1;\n  \n  // check if the type response \n  if(ncat == 2) {\n    response = \"binom\";\n  }\n  \n  // assign Ncov, Nb\n  Ncov = X.cols();\n  Nb = ncat1 + Ncov;\n  // assign Mask\n  Mask = VectorXb::Constant(N, true);\n  // assign Neff\n  Neff = Mask.array().cast<double>().sum();\n  // process X\n  if(Ncov && preproc_cov) {\n    Xcov = orth_matrix(X, Mask);\n  } else {\n    Xcov = X;\n  }\n  // update Ncov: some colinear columns in X might be removed\n  Ncov = Xcov.cols();\n\n  if(Ncov) {\n    Xcov1.resize(Xcov.rows(), Xcov.cols() + 1);\n    Xcov1.col(0).array() = VectorXd::Ones(Xcov1.rows());\n    Xcov1.rightCols(Ncov) = Xcov;\n  } else {\n    Xcov1 = MatrixXd::Ones(N, 1);\n  }\n\n  // assign Y\n  Y.resize(N, ncat);\n  Ncat = VectorXi::Constant(ncat, 0);\n  for(i = 0, it_set = cat.begin(); i < ncat; i++, it_set++) {\n    Y.col(i) = Mask.select(y.array() == *it_set, false);\n    Ncat(i) = Y.col(i).cast<int>().sum();\n  }\n\n  // binom\n  if(response == \"binom\") {\n    yb = Y.col(1).cast<double>(); // booleans -> 0/1\n  }\n}\n\n\n//------------------\n//  Fit\n//------------------\n\nvoid FitOrdinal::setup_restart(const Eigen::VectorXd & _b0)\n{\n  unsigned Nb0 = _b0.size();\n  if(Nb0 == 0) throw std::runtime_error(\"input b0 has size 0\");\n\n  apply_start = true;\n\n  if(Nb0 == Nb) {\n    b0 = _b0;\n  } else if(Nb0 < Nb) {\n    b0.resize(Nb);\n    b0.setZero();\n    b0.head(Nb0) = _b0;\n  } else {\n    throw std::runtime_error(\"Nb0 > Nb\");\n  }\n\n  /* cout << \" _b0 = \" << _b0.transpose() << endl; */\n  /* cout << \" b0 = \" << b0.transpose() << endl; */\n}\n\nvoid FitOrdinal::setup_offset_binom(const Eigen::VectorXd & _yo, bool decrement_Nb) \n{\n  apply_offset = true;\n  exclude_intercepts = true;\n  yo = _yo;\n  // Intercept is not modeled\n  if(decrement_Nb) --Nb; \n}\n\nvoid FitOrdinal::setup_offset_multinom_pom(const Eigen::VectorXd & _yo, const Eigen::VectorXd & _yo_int)\n{\n  apply_offset = true;\n  exclude_intercepts = true;\n  yo = _yo;\n  yo_int = _yo_int;\n}\n\nvoid FitOrdinal::setup_ncov0(unsigned int _Ncov0, bool _last0, bool preproc_cov)\n{\n  Ncov0 = _Ncov0;\n  last0 = _last0;\n\n  if(Ncov0) {\n    if(preproc_cov) throw std::runtime_error(\"preproc_cov is on when Ncov0 != 0\");\n\n    if(response == \"multinom\") {\n      if(Ncov0 > Ncov) throw std::runtime_error(\"Ncov0 > Ncov (multinom)\");\n      Ncov1 = Ncov - Ncov0;\n    } else if(response == \"binom\") {\n      if(Ncov0 > Nb) throw std::runtime_error(\"Ncov0 > Nb (binom)\");\n      Ncov1 = Nb - Ncov0;\n    } else {\n      throw std::runtime_error(\"unknown response\");\n    }\n  }\n}\n\n// main function\nFitOrdinal Ordinal::fit(const Eigen::VectorXi &y, const Eigen::MatrixXd& X,\n    unsigned int Ncov0, bool last0)\n{\n  // set up X & y\n  setup_xy(y, X);\n\n  // fit\n  FitOrdinal fit = setup_fit();\n  fit.setup_ncov0(Ncov0, last0, preproc_cov);\n\n  if(response == \"multinom\") {\n    fit.fit_multinom_pom(Mask, Y, Xcov);\n  } else if(response == \"binom\") {\n    fit.fit_binom(Mask, Y, Xcov1);\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n\n  return(fit);\n}\n\n//----------------------------\n//  Fit Common (FitOrdinal)\n//----------------------------\n\nvoid FitOrdinal::update_fit()\n{\n  bhat = cur_b;\n  loglik = cur_loglik;\n  Score = cur_Score; Info = cur_Info;\n}\n\n//----------------------------\n//  Fit Multinom (FitOrdinal)\n//----------------------------\n\n// main fit multinom function\nvoid FitOrdinal::fit_multinom_pom(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  if(verbose >= 2) cout << \"fit_multinom_pom\\n\"; \n  // start values of parameters\n  check_setup_model();\n  setup_start_multinom();\n  // allocate memory for matrices used in the loop\n  setup_par_multinom();\n  check_setup_data();\n  // optim\n  converged = optimize(Mask, Y, X);\n  // store results into fit\n  update_fit();\n  if(store_offset) {\n    // linear predictor without intercepts\n    if(Ncov) Xb0 = X * bhat.tail(Ncov);\n    else Xb0.setZero();\n    if(apply_offset) Xb0.array() += yo.array();\n    yo = Mask.select(Xb0, 0.0); // overwrite offset vector yo if present\n    // intercepts\n    yo_int = bhat.head(ncat1);\n  }\n}\n\n// set up starting values\nvoid FitOrdinal::setup_start_multinom()\n{\n  if(verbose >= 2) cout << \"setup_start_multinom\\n\"; \n\n  unsigned int i, i_cov, n_nom, n_denom;\n  \n  if(apply_start) {\n    if(b0.size() != Nb) throw std::runtime_error(\"b0 is not of size Nb\");\n  } else {\n    b0.resize(Nb);\n    // initialize intercepts\n    if(!exclude_intercepts) {\n      // v1\n      /* for(i = 0; i < ncat1; i++) { */\n      /*   b0[i] = (double)(1 + i); */\n      /* } */\n      // v2\n      for(i = 0, n_nom = 0, n_denom = Neff; i < ncat1; i++) {\n        n_nom += Ncat(i);\n        n_denom -= Ncat(i);\n        b0[i] = log((double)(n_nom)/(double)(n_denom));\n      }\n      // v3\n      /* Eigen::VectorXd Ncat_half(ncat); */\n      /* for(i = 0; i < ncat; i++) Ncat_half(i) = (double)(Ncat(i)) + 0.5; */\n\n      /* double n_nom_half, n_denom_half; */\n      /* for(i = 0, n_nom_half = 0.0, n_denom_half= (double)(Neff) + ncat*0.5; i < ncat1; i++) { */\n      /*   n_nom_half+= Ncat_half(i); */\n      /*   n_denom_half -= Ncat_half(i); */\n      /*   b0[i] = log(n_nom_half/n_denom_half); */\n      /* } */\n    }\n\n    // initialize covariate effects\n    i_cov = exclude_intercepts ? 0 : ncat1;\n    for(i = i_cov; i < Nb; i++) {\n      b0[i] = 0.0;\n    }\n  }\n\n  if(Ncov0) {\n    if(last0) b0.tail(Ncov0).setZero();\n    else b0.head(Ncov0).setZero();\n  }\n\n  if(verbose >= 3) cout << \" b0 = \" << b0.transpose() << endl;\n}\n\nvoid FitOrdinal::setup_par_multinom()\n{\n  if(verbose >= 2) cout << \"setup_par_multinom\\n\"; \n\n  Xb0.resize(N);\n  Xb.resize(N, ncat1); exp_eta.resize(N, ncat1); gamma.resize(N, ncat1); PQ.resize(N, ncat1);\n  P.resize(N, ncat1); P.setZero();\n  Psum.resize(N); Pk.resize(N);\n\n  D.resize(N, ncat1); D.setZero();\n  V.resize(N, ncat1); V.setZero();\n\n  Q.resize(N, ncat1sq); Q.setZero();\n  S.resize(N, ncat1sq); S.setZero();\n  QS.resize(N, ncat1sq); QS.setZero();\n  W.resize(N, ncat1sq); W.setZero();\n\n  WS2.resize(ncat1sq); WSS1.resize(N);\n  if(Ncov) {\n    XW.resize(Ncov, ncat1sq);\n    XWs.resize(Ncov, ncat1);\n    XW1.resize(Ncov);\n    XW22.resize(Ncov, Ncov);\n  }\n  \n  cur_Score.resize(Nb);\n  cur_Info.resize(Nb, Nb);\n  cur_v.resize(Nb); cur_b.resize(Nb);\n\n  if(Ncov0) {\n    if(last0) {\n      cur_Score.tail(Ncov0).setZero();\n      cur_v.tail(Ncov0).setZero();\n      cur_b.tail(Ncov0).setZero();\n    } else {\n      cur_Score.head(Ncov0).setZero();\n      cur_v.head(Ncov0).setZero();\n      cur_b.head(Ncov0).setZero();\n    }\n  }\n\n  if(firth_multinom) {\n    Ystar.resize(N, ncat);\n  }\n}\n\nbool FitOrdinal::update_par_multinom(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::VectorXd & b, \n    bool pseudo)\n{\n  if(verbose >= 2) cout << \" - update_par_multinom \" << (pseudo ? \"(pseudo)\" : \"\") << \"\\n\"; \n\n  unsigned int i, k,  l, m, start;\n\n  VectorXd b_cov;\n  if(exclude_intercepts) b_cov = b;\n  else b_cov = b.segment(ncat1, Ncov);\n\n  if(Ncov) {\n    if(Ncov0) {\n      if(last0) Xb0 = X.leftCols(Ncov1) * b_cov.head(Ncov1);\n      else throw std::runtime_error(\"!last0 not implemented yet\");\n    } else Xb0 = X * b_cov;\n  } else Xb0.setZero(); \n\n  if(apply_offset) Xb0.array() += yo.array(); // offset \n\n  // update linear predictor Xb with intercepts\n  if(exclude_intercepts) for(i = 0; i < ncat1; i++) Xb.col(i).array() = Xb0.array();\n  else {\n    VectorXd b_int = b.head(ncat1);\n    for(i = 0; i < ncat1; i++) Xb.col(i).array() = Xb0.array() + b_int(i);\n  }\n  if(apply_offset & !exclude_intercepts_offset) for(i = 0; i < ncat1; i++) Xb.col(i).array() += yo_int(i);\n\n  exp_eta = Xb; exp_matrix_ord(exp_eta);\n  gamma.array() = exp_eta.array() / (1.0 + exp_eta.array());\n\n  P = gamma;\n  for(i = 1; i < ncat1; i++) P.col(i).array() -= gamma.col(i - 1).array();\n  Psum = P.rowwise().sum();\n  if((Psum.array() >= 1.0).any()) {\n    cerr << \"WARNING: some elements in Psum >= 1.0\" << endl;\n    return(false);\n  }\n\n  Pk.array() = 1.0 - Psum.array();\n\n  // interim computation of log-lik\n  if(!pseudo) {\n    cur_loglik = loglik_multinom(Mask, Y); // depends on Y, P, Pk, Mask\n    if(verbose > 2) cout << \"  -- (iterim) loglik: \" << cur_loglik << endl;\n    if(check_nan(cur_loglik)) {\n      cerr << \"WARNING: log-lik is NaN or Inf\" << endl;\n      return(false);\n    }\n  }\n \n  // D = (Y[, -ncat] / P) - (Y[, ncat] / Pk)\n  if(!pseudo) for(i = 0; i < ncat1; i++) D.col(i).array() = Y.col(i).cast<double>().array() / P.col(i).array() - Y.col(ncat1).cast<double>().array() / Pk.array();\n  else for(i = 0; i < ncat1; i++) D.col(i).array() = Ystar.col(i).array() / P.col(i).array() - Ystar.col(ncat1).array() / Pk.array();\n\n  // Q = dh / deta\n  PQ.array() = gamma.array() * (1.0 - gamma.array());\n\n  for(m = 0; m < ncat1; m++) {\n    l = m; start = l * ncat1;\n    Q.col(start + m).array() = PQ.col(m).array();\n  }\n  for(m = 1; m < ncat1; m++) {\n    l = m - 1; start = l * ncat1;\n    Q.col(start + m).array() = -1.0 * PQ.col(l).array();\n  }\n\n  // V\n  for(k = 0; k < ncat1; k++) {\n    // cols_Q = (k - 1)*ncat1 + seq(ncat1)\n    // V[, k] = rowSums(D * Q[, cols_Q])\n    V.col(k).array() = (D.array() * Q(all, seqN(k*ncat1, ncat1)).array()).rowwise().sum(); \n  }\n\n  // Sinv or S = diag(1/p) + 1/(1 - sum_p)\n  for(k = 0; k < ncat1sq; k++) { // go through all ncat1sq columns\n    S.col(k) = Pk.array().inverse();\n  }\n  for(k = 0; k < ncat1; k++) { // go through ncat1 columns\n    l = (ncat1 + 1)*k;\n    S.col(l).array() += P.col(k).array().inverse();\n  }\n\n  // W = crossprod(Q, Sinv) %*% Q\n  // (Q'S)'\n  for(l = 0; l < ncat1; l++) {\n    for(m = 0; m < ncat1; m++) {\n      // col_QS = ncat1 * (l - 1) + m\n      // cols_S = ncat1 * (m - 1) + seq(ncat1)\n      // cols_Q = ncat1 * (l - 1) + seq(ncat1)\n      // QS[, col_QS] = rowSums(Q[, cols_Q] * S[, cols_S])\n      k = ncat1 * l + m; // col_QS\n      QS.col(k).array() = (Q(all, seqN(l*ncat1, ncat1)).array() * S(all, seqN(ncat1*m, ncat1)).array()).rowwise().sum();\n    }\n  }\n  for(l = 0; l < ncat1; l++) {\n    for(m = 0; m < ncat1; m++) {\n      // col_W = ncat1 * (l - 1) + m\n      // cols_QS = ncat1 * (m - 1) + seq(ncat1)\n      // cols_Q = ncat1 * (l - 1) + seq(ncat1)\n      // W[, col_W] = rowSums(QS[, cols_QS] * Q[, cols_Q])\n      k = ncat1 * l + m; // col_W\n      W.col(k).array() = (QS(all, seqN(m*ncat1, ncat1)).array() * Q(all, seqN(ncat1*l, ncat1)).array()).rowwise().sum();\n    }\n  }\n\n  // Account for miss. via Mask\n  for(unsigned int i = 0; i < V.cols(); i++) V.col(i) = Mask.select(V.col(i), 0.0);\n  for(unsigned int i = 0; i < W.cols(); i++) W.col(i) = Mask.select(W.col(i), 0.0);\n\n  // Score\n  // Score = c(colSums(V), colSums((crossprod(V, X))))\n  if(!exclude_intercepts) cur_Score.head(ncat1) = V.colwise().sum();\n  if(Ncov) {\n    if(Ncov0) {\n      if(last0) cur_Score.tail(Ncov).head(Ncov1) = (V.transpose() * X.leftCols(Ncov1)).colwise().sum();\n      else throw std::runtime_error(\"!last0 not implemented yet\");\n    } else cur_Score.tail(Ncov) = (V.transpose() * X).colwise().sum();\n  }\n\n  // Info\n  WS2 = W.colwise().sum();\n  WSS1 = W.rowwise().sum().array().sqrt();\n  // Info 1x1 block: Info[seq(ncat1), seq(ncat1)] = matrix(Wsum, ncat1, ncat1)\n  if(!exclude_intercepts) cur_Info(seqN(0, ncat1), seqN(0, ncat1)) = Map<MatrixXd>(WS2.data(), ncat1, ncat1);\n\n  if(Ncov) {\n    if(!exclude_intercepts) {\n      XW = X.transpose() * W;\n      // Info 1x2 & 2x1 blocks\n      for(k = 0; k < ncat1; k++) {\n        // cols_XW = (k - 1) + seq(1, by = ncat1, length = ncat1)\n        // Info[k, seq(ncat, nb)] = rowSums(XW[, cols_XW, drop = FALSE])\n        XWs = XW(all, seqN(k, ncat1, ncat1));\n        XW1 = XWs.rowwise().sum();\n        cur_Info(k, seqN(ncat1, Ncov)).array() = XW1.array();\n        cur_Info(seqN(ncat1, Ncov), k).array() = XW1.array();\n      }\n    }\n    // Info 2x2 block\n    // Info[seq(ncat, nb), seq(ncat, nb)] = crossprod(sqrt(Wsum1) * X)\n    XW22 = MatrixXd(Ncov, Ncov).setZero().selfadjointView<Lower>().\n      rankUpdate((X.array().colwise() * WSS1.array()).matrix().adjoint());\n    if(exclude_intercepts) {\n      cur_Info.array() = XW22.array();\n    } else {\n      cur_Info(seqN(ncat1, Ncov), seqN(ncat1, Ncov)).array() = XW22.array();\n    }\n  }\n\n  // solve: v = solve(Info, Score)\n  // Firth correction?\n  LLT<MatrixXd> llt(cur_Info);\n\n  if(verbose > 2) {\n    ColPivHouseholderQR<MatrixXd> qr;\n    qr.compute(cur_Info);\n    cout << \" qr.isInvertible(cur_info) = \" << qr.isInvertible() << endl;\n  }\n\n  if(!firth_multinom | pseudo) {\n    if(Ncov0) {\n      if(last0) {\n        LLT<MatrixXd> llt1(cur_Info.block(0, 0, ncat1 + Ncov1, ncat1 + Ncov1));\n        cur_v.head(ncat1 + Ncov1) = llt1.solve(cur_Score.head(ncat1 + Ncov1));\n      } else throw std::runtime_error(\"!last0 not implemented yet\");\n    } else cur_v = llt.solve(cur_Score);\n  } else {\n    /* MatrixXd cur_Info_inv = llt.solve(MatrixXd::Identity(Nb, Nb)); */\n    MatrixXd cur_Info_inv;\n    if(Ncov0) {\n      if(last0) cur_Info_inv = cur_Info.block(0, 0, ncat1 + Ncov1, ncat1 + Ncov1).inverse();\n      else throw std::runtime_error(\"!last0 not implemented yet\");\n    } else {\n      // v1\n      /* cur_Info_inv = cur_Info.inverse(); */ \n      // v2\n      ColPivHouseholderQR<MatrixXd> qr;\n      qr.compute(cur_Info);\n      /* if(!qr.isInvertible()) { */\n      /*   cerr << \"WARNING: Info is not invertible\" << endl; */\n      /*   return(false); */\n      /* } */\n      cur_Info_inv = qr.inverse();\n    }\n    if(verbose > 2) cout << \"  -- cur_Info_inv: \" << cur_Info_inv.rows() << \"x\" << cur_Info_inv.cols() << endl;\n\n    MatrixXd diagA(N, ncat1); diagA.setZero();\n    if(Ncov0) {\n      if(last0) {\n        if(Ncov1) diagA = 2 * (X.leftCols(Ncov1) * cur_Info_inv(seqN(ncat1, Ncov1), seqN(0, ncat1))); \n        for(i = 0; i < ncat1; i++) diagA.col(i).array() += cur_Info_inv(i, i);\n        if(Ncov1) diagA.array().colwise() += ((X.leftCols(Ncov1) * cur_Info_inv(seqN(ncat1, Ncov1), seqN(ncat1, Ncov1))).array() * X.leftCols(Ncov1).array()).rowwise().sum();\n      } else throw std::runtime_error(\"!last0 not implemented yet\");\n    } else {\n      if(!exclude_intercepts) {\n        if(Ncov) diagA = 2 * (X * cur_Info_inv(seqN(ncat1, Ncov), seqN(0, ncat1))); \n        for(i = 0; i < ncat1; i++) diagA.col(i).array() += cur_Info_inv(i, i);\n        if(Ncov) diagA.array().colwise() += ((X * cur_Info_inv(seqN(ncat1, Ncov), seqN(ncat1, Ncov))).array() * X.array()).rowwise().sum();\n      } else {\n        if(Ncov) diagA.array().colwise() += ((X * cur_Info_inv).array() * X.array()).rowwise().sum();\n      }\n    }\n    if(verbose > 2) cout << \"  -- diagA: \" << diagA.rows() << \"x\" << diagA.cols() << endl;\n\n    MatrixXd adj_c = 0.5 * diagA.array() * dlog_matrix(Xb).array();\n\n    // adjustment to counts\n    MatrixXd adj_a(N, ncat);\n    adj_a.leftCols(ncat1) = adj_c; \n    adj_a.col(ncat1) *= 0;\n    adj_a.rightCols(ncat1).array() -= adj_c.array();\n\n    // Yadj = Y + adj_a\n    Ystar = Y.array().cast<double>(); \n    Ystar.array() += adj_a.array();\n\n    // re-compute D, V (Q doesn't change as it is function of probs.)\n    for(i = 0; i < ncat1; i++) D.col(i).array() = Ystar.col(i).array() / P.col(i).array() - Ystar.col(ncat1).array() / Pk.array();\n    for(k = 0; k < ncat1; k++) V.col(k).array() = (D.array() * Q(all, seqN(k*ncat1, ncat1)).array()).rowwise().sum(); \n    // account for miss. via Mask\n    for(unsigned int i = 0; i < V.cols(); i++) V.col(i) = Mask.select(V.col(i), 0.0);\n\n    // re-compmute Scores\n    if(!exclude_intercepts) cur_Score.head(ncat1) = V.colwise().sum();\n    if(Ncov) {\n      if(Ncov0) {\n        if(last0) cur_Score.tail(Ncov).head(Ncov1) = (V.transpose() * X.leftCols(Ncov1)).colwise().sum();\n        else throw std::runtime_error(\"!last0 not implemented yet\");\n      } else cur_Score.tail(Ncov) = (V.transpose() * X).colwise().sum();\n    }\n    if(verbose > 2) cout << \"  -- cur_Score: \" << cur_Score.transpose() << endl;\n\n    if(Ncov0) {\n      if(last0) cur_v.head(ncat1 + Ncov1) = cur_Info_inv * cur_Score.head(ncat1 + Ncov1);\n      else throw std::runtime_error(\"!last0 not implemented yet\");\n    } else cur_v = cur_Info_inv * cur_Score;\n    if(verbose > 2) cout << \"  -- cur_v: \" << cur_v.transpose() << endl;\n  }\n\n  if(!pseudo) {\n    if(firth_multinom) cur_loglik = loglik_multinom_firth(Mask, Y, llt, true, cur_loglik); // add = true\n    cur_dev = -2.0 * cur_loglik;\n  }\n\n  // dump\n  if(verbose >= 3) {\n    bool append = true; // (cnt_updates > 1);\n    string name = \"ordinal.txt\";\n    ofstream file;\n    if(append) file.open(name.c_str(), ios::out | ios::app);\n    else file.open(name.c_str(), ios::out);\n\n    double diff = cur_Score.array().abs().maxCoeff(); \n    file << cnt_fit  << \" \" << b.size() << \" \" << cnt_updates << \" \" << cur_loglik << \" \" << cur_dev\n      << \" \" << diff; \n    for(unsigned int i = 0; i < b.size(); i++) {\n      file << \" \" << b(i);\n    }\n    file << endl;\n    file.close();\n\n    if(verbose >= 4) {\n      const static IOFormat CSVFormat(StreamPrecision, DontAlignCols, \", \", \"\\n\");\n      ofstream yfile(\"y.txt\");\n      yfile << Y.col(1).format(CSVFormat);\n      yfile.close();\n      ofstream xfile(\"x.txt\");\n      xfile << X.format(CSVFormat);\n      xfile.close();\n      ofstream mfile(\"m.txt\");\n      mfile << Mask.format(CSVFormat);\n      mfile.close();\n\n      throw std::runtime_error(\"verbose level 4: exit after dumping data into y.txt, x.txt and m.txt\");\n    }\n  }\n\n  return(true);\n}\n\ndouble FitOrdinal::loglik_multinom(const VectorXb & Mask, const MatrixXb & Y)\n{\n  // loglik = colSums(Y*log(cbind(P, Pk))) %>% sum\n  double res = 0.0;\n  // 1, 2, ..., ncat1 categories\n  for(unsigned int i = 0; i < ncat1; i++) {\n    res += Mask.select(Y.col(i).select(P.col(i).array().log(), 0.0), 0.0).array().sum();\n  }\n  // the last ncat category\n  res += Mask.select(Y.col(ncat1).select(Pk.array().log(), 0.0), 0.0).array().sum();\n\n  return(res);\n}\n\ndouble FitOrdinal::loglik_multinom_firth(const VectorXb & Mask, const MatrixXb & Y, const LLT<MatrixXd> & llt,\n    bool add, double base_loglik)\n{\n  double res = add ? base_loglik : loglik_multinom(Mask, Y);\n\n  // https://gist.github.com/redpony/fc8a0db6b20f7b1a3f23\n  double half_logdet = llt.matrixL().toDenseMatrix().diagonal().array().log().sum();\n  /* double half_logdet = log(llt.matrixL().determinant()); */\n\n  res += firth_mult * half_logdet;\n\n  return(res);\n}\n\ndouble FitOrdinal::loglik_multinom_firth(const VectorXb & Mask, const MatrixXb & Y, const MatrixXd & Info)\n{\n  double res = loglik_multinom(Mask, Y);\n\n  double half_logdet = 0.5*log(Info.determinant());\n\n  res += firth_mult * half_logdet;\n\n  return(res);\n}\n\n//----------------------------\n//  Fit Binom (FitOrdinal)\n//----------------------------\n\n// main fit binom function\nvoid FitOrdinal::fit_binom(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  if(verbose >= 2) cout << \"fit_binom\\n\"; \n  /* // start values of parameters */\n  check_setup_model();\n  setup_start_binom();\n  // allocate memory for matrices used in the loop\n  setup_par_binom();\n  check_setup_data();\n  // optim\n  converged = optimize(Mask, Y, X);\n  // store results into fit\n  update_fit();\n  // store offset if specified\n  if(store_offset) {\n    Xb = X * bhat;\n    if(apply_offset) Xb.array() += yo.array();\n    yo = Mask.select(Xb, 0.0); // overwrite offset vector yo if present\n  }\n}\n\n// set up starting values\nvoid FitOrdinal::setup_start_binom()\n{\n  if(verbose >= 2) cout << \"setup_start_binom\\n\";\n\n  unsigned int i;\n  \n  if(apply_start) {\n    if(b0.size() != Nb) throw std::runtime_error(\"b0 is not of size Nb\");\n  } else {\n    b0.resize(Nb);\n    // intercepts\n    b0(0) = log((double)(Ncat(1)/(double)(Ncat(0)))); // log(n1/n0)\n    // covariates\n    for(i = ncat1; i < Nb; i++) {\n      b0[i] = 0.0;\n    }\n  }\n\n  if(Ncov0) {\n    if(last0) b0.tail(Ncov0).setZero();\n    else b0.head(Ncov0).setZero();\n  }\n\n  if(verbose >= 3) cout << \" b0 = \" << b0.transpose() << endl;\n\n}\n\nvoid FitOrdinal::setup_par_binom()\n{\n  if(verbose >= 2) cout << \"setup_par_binom\\n\";\n\n  mub.resize(N); wb.resize(N);\n  XtW.resize(Nb, N);\n\n  cur_Score.resize(Nb);\n  cur_Info.resize(Nb, Nb);\n  cur_v.resize(Nb); cur_b.resize(Nb);\n\n  if(Ncov0) {\n    if(last0) {\n      cur_Score.tail(Ncov0).setZero();\n      cur_v.tail(Ncov0).setZero();\n      cur_b.tail(Ncov0).setZero();\n    } else {\n      cur_Score.head(Ncov0).setZero();\n      cur_v.head(Ncov0).setZero();\n      cur_b.head(Ncov0).setZero();\n    }\n  }\n\n  if(firth_binom) {\n    ystar.resize(N);\n  }\n}\n\nbool FitOrdinal::update_par_binom(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::VectorXd & b)\n{\n  if(verbose >= 2) cout << \" - update_par_binom\\n\"; \n\n  mub = X * b;\n  if(apply_offset) mub.array() += yo.array();\n  exp_vector(mub); // mub <- exp(mub)\n  mub.array() /= (1.0 + mub.array()); // mub <- exp(mub) / (1 + exp(mub))\n\n  wb.array() = Mask.select(mub.array() * (1.0 - mub.array()), 1.0);\n\n  // Score: Score = c(sum(y - mu), X' (y - mu)) \n  cur_Score = X.transpose() * Mask.select((Y.col(1).array().cast<double>() - mub.array()), 0.0).matrix();\n\n  // Info\n  cur_Info = X.transpose() * (X.array().colwise() * wb.array()).matrix();\n\n  // solve: v = solve(Info, Score)\n  LLT<MatrixXd> llt(cur_Info);\n  cur_v = llt.solve(cur_Score);\n\n  cur_loglik = loglik_binom(Mask, Y);\n  cur_dev = -2.0 * cur_loglik;\n\n  // dump\n  if(verbose >= 3) {\n    bool append = true; // (cnt_updates > 1);\n    string name = \"ordinal.txt\";\n    ofstream file;\n    if(append) file.open(name.c_str(), ios::out | ios::app);\n    else file.open(name.c_str(), ios::out);\n\n    double diff = cur_Score.array().abs().maxCoeff(); \n    file << cnt_fit << \" \" << b.size() << \" \" << cnt_updates << \" \" << cur_loglik << \" \" << cur_dev\n      << \" \" << diff; \n    for(unsigned int i = 0; i < b.size(); i++) {\n      file << \" \" << b(i);\n    }\n    file << endl;\n    file.close();\n  }\n\n  return(true);\n}\n\nbool FitOrdinal::update_par_binom_firth(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::VectorXd & b)\n{\n  if(verbose >= 2) cout << \" - update_par_binom_firth\\n\"; \n\n  /* cout << Ncov0 << \" \" << last0 << \" \" << Ncov1 << endl; */\n  /* cout << b.size() << \" \" << X.cols() << \" \" << X.rows() << endl; */\n  if(Ncov0) {\n    if(last0) mub = X.leftCols(Ncov1) * b.head(Ncov1);\n    else mub = X.rightCols(Ncov1) * b.tail(Ncov1);\n  } else {\n    mub = X * b;\n  }\n  if(apply_offset) mub.array() += yo.array();\n  exp_vector(mub); // mub <- exp(mub)\n  mub.array() /= (1.0 + mub.array()); // mub <- exp(mub) / (1 + exp(mub))\n\n  wb.array() = Mask.select(mub.array() * (1.0 - mub.array()), 1.0);\n\n  // Info = X' W X\n  XtW = X.transpose() * wb.array().sqrt().matrix().asDiagonal();\n  cur_Info = XtW * XtW.transpose();\n\n  // derive h\n  LLT<MatrixXd> llt(cur_Info);\n  VectorXd h = (llt.solve(XtW).array() * XtW.array()).colwise().sum();\n\n  // derive pseudo response\n  ystar = Y.col(1).cast<double>().array() + firth_mult * h.array() * (0.5 - mub.array());\n\n  // update Score = Ab + Sb\n  /* Ab = crossprod(X, h * (0.5 - mu)) */\n  /* Sb = crossprod(X, y - mu) */\n  // solve: v = solve(Info, Score)\n\n  if(Ncov0) {\n    if(last0) {\n      LLT<MatrixXd> llt1(cur_Info.block(0, 0, Ncov1, Ncov1));\n      cur_Score.head(Ncov1) = (X.leftCols(Ncov1).transpose() * Mask.select(ystar.array() - mub.array(), 0.0).matrix()).array();\n      cur_v.head(Ncov1) = llt1.solve(cur_Score.head(Ncov1));\n    } else {\n      LLT<MatrixXd> llt1(cur_Info.block(Ncov0, Ncov0, Ncov1, Ncov1));\n      cur_Score.tail(Ncov1) = (X.rightCols(Ncov1).transpose() * Mask.select(ystar.array() - mub.array(), 0.0).matrix()).array();\n      cur_v.tail(Ncov1) = llt1.solve(cur_Score.tail(Ncov1));\n    }\n  } else {\n    cur_Score = (X.transpose() * Mask.select(ystar.array() - mub.array(), 0.0).matrix()).array();\n    cur_v = llt.solve(cur_Score);\n  }\n\n  cur_loglik = loglik_binom_firth(Mask, Y, llt);\n  cur_dev = -2.0 * cur_loglik;\n\n  // dump\n  if(verbose >= 3) {\n    bool append = true; // (cnt_updates > 1);\n    string name = \"ordinal.txt\";\n    ofstream file;\n    if(append) file.open(name.c_str(), ios::out | ios::app);\n    else file.open(name.c_str(), ios::out);\n\n    double diff = cur_Score.array().abs().maxCoeff(); \n    file << cnt_fit << \" \" << b.size() << \" \" << cnt_updates << \" \" << cur_loglik << \" \" << cur_dev\n      << \" \" << diff; \n    for(unsigned int i = 0; i < b.size(); i++) {\n      file << \" \" << b(i);\n    }\n    file << endl;\n    file.close();\n\n    if(verbose >= 4) {\n      const static IOFormat CSVFormat(StreamPrecision, DontAlignCols, \", \", \"\\n\");\n      ofstream yfile(\"y.txt\");\n      yfile << Y.col(1).format(CSVFormat);\n      yfile.close();\n      ofstream xfile(\"x.txt\");\n      xfile << X.format(CSVFormat);\n      xfile.close();\n      ofstream mfile(\"m.txt\");\n      mfile << Mask.format(CSVFormat);\n      mfile.close();\n      \n      throw std::runtime_error(\"verbose level 4: exit after dumping data into y.txt, x.txt and m.txt\");\n    }\n  }\n\n  return(true);\n}\n\nbool FitOrdinal::update_par_binom_pseudo(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::VectorXd & b)\n{\n  if(verbose >= 2) cout << \" - update_par_binom_pseudo\\n\"; \n\n  if(Ncov0) {\n    if(last0) mub = X.leftCols(Ncov1) * b.head(Ncov1);\n    else mub = X.rightCols(Ncov1) * b.tail(Ncov1);\n  } else {\n    mub = X * b;\n  }\n  if(apply_offset) mub.array() += yo.array();\n  exp_vector(mub); // mub <- exp(mub)\n  mub.array() /= (1.0 + mub.array()); // mub <- exp(mub) / (1 + exp(mub))\n\n  wb.array() = Mask.select(mub.array() * (1.0 - mub.array()), 1.0);\n\n  // Info = X' W X\n  XtW = X.transpose() * wb.array().sqrt().matrix().asDiagonal();\n  cur_Info = XtW * XtW.transpose();\n\n  if(Ncov0) {\n    if(last0) {\n      LLT<MatrixXd> llt1(cur_Info.block(0, 0, Ncov1, Ncov1));\n      cur_Score.head(Ncov1) = X.leftCols(Ncov1).transpose() * Mask.select(ystar.array() - mub.array(), 0.0).matrix();\n      cur_v.head(Ncov1) = llt1.solve(cur_Score.head(Ncov1));\n    } else {\n      LLT<MatrixXd> llt1(cur_Info.block(Ncov0, Ncov0, Ncov1, Ncov1));\n      cur_Score.tail(Ncov1) = X.rightCols(Ncov1).transpose() * Mask.select(ystar.array() - mub.array(), 0.0).matrix();\n      cur_v.tail(Ncov1) = llt1.solve(cur_Score.tail(Ncov1));\n    }\n  } else {\n    LLT<MatrixXd> llt(cur_Info);\n    cur_Score = X.transpose() * Mask.select(ystar.array() - mub.array(), 0.0).matrix();\n    cur_v = llt.solve(cur_Score);\n  }\n\n  // What is dev/loglik for pseudo response model?\n  /* cur_loglik = loglik_binom_firth(Mask, Y, llt); */\n  /* cur_dev = -2.0 * cur_loglik; */\n\n  return(true);\n}\n\ndouble FitOrdinal::loglik_binom(const VectorXb & Mask, const MatrixXb & Y)\n{\n  // loglik = sum(Y*log(p) + (1-Y)*log(1-p)), where p = mu\n  double res = 0.0;\n  // contols\n  res += Mask.select(Y.col(0).select((1.0 - mub.array()).log(), 0.0), 0.0).array().sum();\n  // cases\n  res += Mask.select(Y.col(1).select(mub.array().log(), 0.0), 0.0).array().sum();\n\n  return(res);\n}\n\ndouble FitOrdinal::loglik_binom_firth(const VectorXb & Mask, const MatrixXb & Y, const LLT<MatrixXd> & llt)\n{\n  double res = loglik_binom(Mask, Y);\n\n  // https://gist.github.com/redpony/fc8a0db6b20f7b1a3f23\n  double half_logdet = llt.matrixL().toDenseMatrix().diagonal().array().log().sum();\n  /* double half_logdet = log(llt.matrixL().determinant()); */\n\n  /* cout << \"firth_mult = \" << firth_mult << endl; */\n  res += firth_mult * half_logdet;\n\n  return(res);\n}\n\nbool FitOrdinal::stop_criterion()\n{\n  bool stop;\n  // v1: abs. diff. between bhat_cur and bhat_prev\n  // stop = (cur_v.norm() < tol);\n  // v2: abs. max. value of Scores. \n  // - Example: Regenie Firth model fitting \n  stop_value = cur_Score.array().abs().maxCoeff();\n  stop = (stop_value < tol); \n  // v3: relative diff. in deviance. \n  // - Example: glm2::glm.fit2.R\n  /* stop = ((abs(cur_dev - prev_dev) / (1.0 + abs(prev_dev))) < tol); // 1.0 to prevent from division by zero */\n\n  if(verbose >= 2) cout << \"stop_criterion: \" << stop_value << \" < \" << tol << endl;\n\n  return(stop);\n}\n\nbool FitOrdinal::update_par(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X, const Eigen::VectorXd & b, bool pseudo)\n{\n  bool ret;\n\n  // count\n  if(trace) { cnt_updates++; }\n\n  // store prev. values \n  prev_dev = cur_dev;\n\n  // update directions\n  if(response == \"multinom\") {\n    if(pseudo) ret = update_par_multinom(Mask, Y, X, b, true); // pseudo = true\n    else ret = update_par_multinom(Mask, Y, X, b);\n  } else if(response == \"binom\") {\n    if(firth_binom) {\n      if(pseudo) ret = update_par_binom_pseudo(Mask, Y, X, b);\n      else ret = update_par_binom_firth(Mask, Y, X, b);\n    } else {\n      ret = update_par_binom(Mask, Y, X, b);\n    }\n  } else {\n    throw std::runtime_error(\"unknown response\");\n  }\n\n  return(ret);\n}\n\n// optimization loop for binom. response\nbool FitOrdinal::optimize(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  if(verbose >= 2) cout << \"optimize\\n\"; \n\n  bool res;\n\n  // special case when firth_multinom is on\n  /* if(firth_multinom) { */\n  /*   res = optimize_FisherScoring(Mask, Y, X); */\n  /* } else */ \n  if(optim == \"FisherScoring\") {\n    res = optimize_FisherScoring(Mask, Y, X);\n  } else if(optim == \"FisherScoringPseudo\") {\n    if(firth_binom | firth_multinom) res = optimize_FisherScoringPseudo(Mask, Y, X);\n    else res = optimize_FisherScoring(Mask, Y, X);\n  } else if(optim == \"WeightHalving\") {\n    res = optimize_WeightHalving(Mask, Y, X);\n  } else if(optim == \"WeightHalvingPseudo\") {\n    if(firth_binom | firth_multinom) res = optimize_WeightHalvingPseudo(Mask, Y, X);\n    else res = optimize_WeightHalving(Mask, Y, X);\n  } else {\n    throw std::runtime_error(\"unknown optimize\");\n  }\n\n  if(verbose >= 2) cout << \"optimize it = \" << it << \" | cnt_updates = \" << cnt_updates << endl;\n  if(verbose >= 3) cout << \"bhat = \" << cur_b.transpose() << endl;\n  if(verbose >= 3) cout << \"converged = \" << converged << endl;\n\n  return(res);\n}\n\nbool FitOrdinal::optimize_FisherScoring(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  unsigned int i;\n  bool res, up = true;\n  double ratio_step;\n\n  cur_b = b0;\n  for(i = 0; i < maxit; i++) {\n    // update directions\n    up = update_par(Mask, Y, X, cur_b);\n    if(!up) break;\n\n    // check the stopping criteria\n    if(stop_criterion()) break;\n\n    // check the  absolute step size to be less than max_step for each entry of step (cur_v2)\n    if(check_step) {\n      ratio_step = cur_v.array().abs().maxCoeff() / max_step;\n      if(verbose >= 3) cout << \" cur_v.array().abs().maxCoeff() = \" << cur_v.array().abs().maxCoeff() << endl;\n      if(verbose >= 3) cout << \" ratio_step = \" << ratio_step << endl;\n      if(ratio_step > 1.0) {\n        cur_v.array() /= ratio_step;\n      }\n      if(verbose >= 3) cout << \" cur_v.array().abs().maxCoeff() = \" << cur_v.array().abs().maxCoeff() << endl;\n    }\n\n    // update parameters\n    cur_b += cur_v;\n\n    // check if bhat is nan\n    if(cur_b.array().isNaN().any()) { it = i; return false; }\n\n  }\n  // assign # iterations performed\n  it = i;\n  res = (it < maxit) & up;\n\n  // check if any NaN\n  if(cur_Score.array().isNaN().any() | cur_b.array().isNaN().any() | isnan(cur_dev)) {\n    return false; \n  }\n\n  return(res);\n}\n\nbool FitOrdinal::optimize_FisherScoringPseudo(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  unsigned int i, i3;\n  bool res, up = true;\n  double ratio_step;\n\n  if(!(firth_binom | firth_multinom)) throw std::runtime_error(\"optimize_FisherScoringPseudo is for binomial/multinomial response with Firth correction\");\n\n  cur_b = b0;\n  for(i = 0; i < maxit; i++) {\n    // update directions\n    up = update_par(Mask, Y, X, cur_b);\n    if(!up) break;\n\n    // check the stopping criteria\n    if(stop_criterion()) break;\n\n    // check the  absolute step size to be less than max_step for each entry of step (cur_v2)\n    if(check_step) {\n      ratio_step = cur_v.array().abs().maxCoeff() / max_step;\n      if(verbose >= 3) cout << \" cur_v.array().abs().maxCoeff() = \" << cur_v.array().abs().maxCoeff() << endl;\n      if(verbose >= 3) cout << \" ratio_step = \" << ratio_step << endl;\n      if(ratio_step > 1.0) cur_v.array() /= ratio_step;\n      if(verbose >= 3) cout << \" cur_v.array().abs().maxCoeff() = \" << cur_v.array().abs().maxCoeff() << endl;\n    }\n\n    // ystar is derived & stored in update_par_binom_firth\n\n    // Pseudo loop\n    for(i3 = 0; i3 < maxit3; i3++) {\n      if(verbose >= 3) cout << \" - pseudo loop it \" << i3 << endl;\n\n      // update directions\n      up = update_par(Mask, Y, X, cur_b, true); // pseudo = true\n      if(!up) { it = i; return false; }\n\n      // check the stopping criteria\n      if(stop_criterion()) break;\n\n      // check step size\n      if(check_step) {\n        ratio_step = cur_v.array().abs().maxCoeff() / max_step;\n        if(ratio_step > 1.0) cur_v.array() /= ratio_step;\n      }\n\n      // update parameters\n      cur_b += cur_v;\n\n      // check if bhat is nan\n      if(cur_b.array().isNaN().any()) { it = i; return false; }\n    } // end of pseudo loop \n  } // end of main loop\n\n  // assign # iterations performed\n  it = i;\n  res = (i < maxit) & up;\n\n  // check if any NaN\n  if(cur_Score.array().isNaN().any() | cur_b.array().isNaN().any() | isnan(cur_dev)) {\n    return false; \n  }\n\n  return(res);\n}\n\nbool FitOrdinal::optimize_WeightHalving(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  if(verbose >= 2) cout << \"optimize_WeightHalving\\n\";\n\n  unsigned int i, i2;\n  bool res, up = true;\n\n  // declare variables\n  VectorXd cur_b2, cur_v2;\n  double cur_dev2;\n  double denom, ratio_step;\n\n  // initial values for Outer loop\n  cur_b = b0;\n  up = update_par(Mask, Y, X, cur_b); // get (i) the step size (cur_v); (ii) current value of dev\n  if(!up) { it = 0; return false; }\n\n  // Outer loop\n  for(i = 1; i < maxit; i++) {\n    if(verbose >= 3) cout << \" - outer loop it \" << i << endl;\n\n    if(verbose >= 3) cout << \" - cur_b = \" << cur_b.transpose() << endl;\n    if(verbose >= 3) cout << \" - cur_Score = \" << cur_Score.transpose() << \" [max = \" << cur_Score.array().abs().maxCoeff() << \"]\" << endl;\n\n    // stopping criteria is checked here in the beginning rather than in the end of loop\n    // - reason: update_par was called above\n    // - example: i = 0 & no covariates & initial values are proportions --> no optimization inside the Outer/Inner loop is required\n    if(stop_criterion()) break;\n\n    // initial values for Inner loop\n    cur_b2 = cur_b; cur_v2 = cur_v;\n    cur_dev2 = cur_dev;\n\n    denom = 2.0;\n    \n    // Inner loop (step halving)\n    for(i2 = 0; i2 < maxit2; i2++) {\n      if(verbose >= 3 && i2 > 0) cout << \" - inner loop it \" << i2 << endl;\n\n      // update step according to the rule: step(i) = step(initial) / 2^i\n      // one exception from the rule: skip halving at the very first iteration, i2 = 0 --> Fisher Scoring at i2 = 0\n      if(i2) cur_v2.array() /= denom;\n\n      // check the  absolute step size to be less than max_step for each entry of step (cur_v2)\n      if(check_step) {\n        ratio_step = cur_v2.array().abs().maxCoeff() / max_step;\n        if(ratio_step > 1.0) cur_v2.array() /= ratio_step;\n      }\n\n      // update param.\n      // - the baseline value (cur_b2) is fixed\n      // - the increment step (cur_v2) is reduced at each iteration (see the code line above)\n      cur_b = cur_b2 + cur_v2;\n\n      // check if Score is nan\n      if(cur_Score.array().isNaN().any()) { it = i; return false; }\n      // check if bhat is nan\n      if(cur_b.array().isNaN().any()) { it = i; return false; }\n\n      // update Score, Info, loglik, dev\n      up = update_par(Mask, Y, X, cur_b);\n      if(!up) {it = i; return false; }\n\n      // check if cur_dev is nan\n      if(isnan(cur_dev)) { it = i; return false; }\n\n      // stop the inner loop (step halving) if dev. is improved\n      if(cur_dev < cur_dev2) break;\n    }\n\n    // assign Innter loop iterations\n    it2 += i2;\n\n    /* bool strict_WeightHalving = false; */\n    if(strict) {\n      // check if all Inner loop iterations are used & exit\n      if(i2 == maxit2) {\n        // let the first iteration (i = 0) go even when convergence failure\n        if(i) { it = i; return false; }\n      }\n    }\n  }\n\n  // assign # iterations performed\n  it = i;\n  res = (i < maxit) & up;\n\n  // check if any NaN\n  if(cur_Score.array().isNaN().any() | cur_b.array().isNaN().any() | isnan(cur_dev)) {\n    return false; \n  }\n\n  return res;\n}\n\nbool FitOrdinal::optimize_WeightHalvingPseudo(const VectorXb & Mask, const MatrixXb & Y, const Eigen::Ref<const Eigen::MatrixXd> & X)\n{\n  if(verbose >= 2) cout << \"optimize_WeightHalvingPseudo\\n\";\n\n  if(!(firth_binom | firth_multinom)) throw std::runtime_error(\"optimize_WeightHalvingPseudo is for binomial/multinomial response with Firth correction\");\n\n  unsigned int i, i2, i3;\n  bool res, stop, up = true;\n\n  // declare variables\n  VectorXd cur_b2, cur_v2;\n  double cur_dev2;\n  double denom, ratio_step;\n\n  // initial values for Outer loop\n  cur_b = b0;\n  /* update_par(Mask, Y, X, cur_b); // get (i) the step size (cur_v); (ii) current value of dev */\n\n  // Outer loop\n  for(i = 1; i < maxit; i++) {\n    up = update_par(Mask, Y, X, cur_b); \n    if(!up) break;\n\n    if(verbose >= 3) cout << \" - outer loop it \" << i << endl;\n    if(verbose >= 3) cout << \" - cur_b = \" << cur_b.transpose() << endl;\n    if(verbose >= 3) cout << \" - cur_Score = \" << cur_Score.transpose() << \" [max = \" << cur_Score.array().abs().maxCoeff() << \"]\" << endl;\n\n    // stopping criterion\n    if(stop_criterion()) break;\n\n    if(stop_value > pseudo_stophalf) {\n      // initial values for Inner loop\n      cur_b2 = cur_b; cur_v2 = cur_v;\n      cur_dev2 = cur_dev;\n\n      denom = 2.0;\n      \n      // Inner loop (step halving)\n      for(i2 = 0; i2 < maxit2; i2++) {\n        if(verbose >= 3 && i2 > 0) cout << \" - inner loop it \" << i2 << endl;\n\n        // update step according to the rule: step(i) = step(initial) / 2^i\n        // one exception from the rule: skip halving at the very first iteration, i2 = 0 --> Fisher Scoring at i2 = 0\n        if(i2)  cur_v2.array() /= denom;\n\n        // check the  absolute step size to be less than max_step for each entry of step (cur_v2)\n        if(check_step) {\n          ratio_step = cur_v2.array().abs().maxCoeff() / max_step;\n          if(ratio_step > 1.0) cur_v2.array() /= ratio_step;\n        }\n\n        // update param.\n        // - the baseline value (cur_b2) is fixed\n        // - the increment step (cur_v2) is reduced at each iteration (see the code line above)\n        cur_b = cur_b2 + cur_v2;\n\n        // check if Score is nan\n        /* if(cur_Score.array().isNaN().any()) { it = i; return false; } */\n        // check if bhat is nan\n        /* if(cur_b.array().isNaN().any()) { it = i; return false; } */\n\n        // update Score, Info, loglik, dev\n        up = update_par(Mask, Y, X, cur_b);\n        /* if(!up) { it = i; return false; } */\n        if(!up) continue;\n\n        // check if cur_dev is nan\n        /* if(isnan(cur_dev)) { it = i; return false; } */\n\n        // stop the inner loop (step halving) if dev.is improved\n        if(cur_dev < cur_dev2) {\n          break;\n        }\n      } // end of inner loop\n\n      // assign Inner loop iterations\n      it2 += i2;\n    } else { // end of condition to start inner loop\n      // check step size\n      if(check_step) {\n        ratio_step = cur_v.array().abs().maxCoeff() / max_step;\n        if(ratio_step > 1.0) cur_v.array() /= ratio_step;\n      }\n\n      // update parameters\n      cur_b += cur_v;\n    }\n\n    // ystar is derived & stored in update_par_binom_firth\n\n    // Pseudo loop\n    // store initial values before entering Pseudo loop\n    cur_b2 = cur_b; cur_v2 = cur_v; cur_dev2 = cur_dev;\n    bool loop_pseudo = false;\n    for(i3 = 0; i3 < maxit3; i3++) {\n      if(verbose >= 3) cout << \" - pseudo loop it \" << i3 << endl;\n\n      // update directions\n      up = update_par(Mask, Y, X, cur_b, true); // pseudo = true\n      if(!up) break;\n\n      // check the stopping criteria\n      stop = stop_criterion();\n      if(check_nan(stop_value)) break;\n      if(stop) { loop_pseudo = true; break; }\n\n      // check step size\n      if(check_step) {\n        ratio_step = cur_v.array().abs().maxCoeff() / max_step;\n        if(ratio_step > 1.0) cur_v.array() /= ratio_step;\n      }\n\n      // update parameters\n      cur_b += cur_v;\n\n      // check if any NaN\n      /* if(cur_Score.array().isNaN().any() | cur_b.array().isNaN().any()) return false; */ \n    } // end of pseudo loop (i3)\n    // cancel results of Pseudo loop if it failed\n    if(!loop_pseudo) {\n      cur_b = cur_b2; cur_v = cur_v2; cur_dev = cur_dev2;\n    }\n  }\n\n  // assign # iterations performed\n  it = i;\n  res = (i < maxit) & up;\n\n  // check if any NaN\n  if(cur_Score.array().isNaN().any() | cur_b.array().isNaN().any() | isnan(cur_dev)) {\n    return false; \n  }\n\n  return res;\n}\n\n//------------------------------\n// Utils\n//------------------------------\n\nbool check_nan(double x) \n{\n  /* bool res = (boost::math::isnan)(x); */\n  bool res = (boost::math::isnan)(x) | !(boost::math::isnormal)(x);\n  return(res);\n}\n\nvoid exp_matrix(Eigen::MatrixXd & X)\n{\n  // See: mu = binomial()$linkinv(eta)\n  // - https://github.com/wch/r-source/blob/trunk/src/library/stats/src/family.c\n  // - https://stackoverflow.com/a/1566222\n  /* double EPSILON = 2.221e-16; */\n  double EPSILON = 10 * std::numeric_limits<double>::epsilon();\n  double THRESH = 30.0, MTHRESH = -30.0; \n  double INVEPS = 1.0/EPSILON;\n  for(unsigned int i = 0; i < X.cols(); i++) {\n    X.col(i).array() = (X.col(i).array() < MTHRESH).\n      select(EPSILON, (X.col(i).array() > THRESH).\n          select(INVEPS, X.col(i).array().exp()));\n  }\n}\n\nvoid exp_matrix_ord(Eigen::MatrixXd & X)\n{\n  double EPSILON = 10 * std::numeric_limits<double>::epsilon();\n  double THRESH = 30.0, MTHRESH = -30.0; \n  double INVEPS = 1.0/EPSILON;\n\n  unsigned int ncols = X.cols();\n  ArrayXb mask_top = (X.array() > THRESH).rowwise().all(), mask_bottom = (X.array() < MTHRESH).rowwise().all();\n  /* cout << \"any(mask_top) = \" << mask_top.any() << endl; */\n\n  for(unsigned int i = 0; i < ncols; i++) {\n    X.col(i).array() = (X.col(i).array() < MTHRESH).\n      select(EPSILON, (X.col(i).array() > THRESH).\n          select(INVEPS, X.col(i).array().exp()));\n  }\n  // correction for cases: eta{1,2} > THRESH & eta1 < eta2 \n  if(ncols > 1) {\n    if(mask_top.any()) { // scaling factor (0.5, 1) for columns 1,2\n      for(unsigned int i = 0; i < ncols; i++) {\n        double sc = std::pow(0.5, ncols - 1 - i);\n        X.col(i).array() = mask_top.select(sc * X.col(i).array(), X.col(i).array());\n      }\n    }\n    /* if(mask_bottom.any()) { */\n    /*   for(unsigned int i = 0; i < ncols; i++) { */\n    /*     double sc = std::pow(0.5, i); */\n    /*     X.col(i).array() = mask_bottom.select(sc * X.col(i).array(), X.col(i).array()); */\n    /*   } */\n    /* } */\n  }\n}\n\nvoid invlogit_matrix(Eigen::MatrixXd & X)\n{\n  double EPSILON = 10 * std::numeric_limits<double>::epsilon();\n  /* double EPSILON = 2.221e-16; */\n  double THRESH = 30.0, MTHRESH = -30.0; \n  for(unsigned int i = 0; i < X.cols(); i++) {\n    X.col(i).array() = (X.col(i).array() > MTHRESH).\n      select(1.0 / (1.0 + EPSILON), (X.col(i).array() < THRESH).\n          select(EPSILON / (1.0 + EPSILON), 1.0 - 1.0 / (1.0 + X.col(i).array().exp())));\n  }\n}\n\nvoid exp_vector(Eigen::VectorXd & x)\n{\n  double EPSILON = 2.221e-16;\n  double THRESH = 30.0, MTHRESH = -30.0; \n  double INVEPS = 1.0/EPSILON;\n  x.array() = (x.array() < MTHRESH).\n    select(EPSILON, (x.array() > THRESH).\n        select(INVEPS, x.array().exp()));\n}\n\nEigen::VectorXd dlog_vector(const Eigen::VectorXd & x)\n{\n  // dfun = function(x) exp(x)/(1+exp(x))^2 \n  // pfun = function(x) 1/(1+exp(-x)) \n  // ddfun = function(eta) dfun(eta)*(1 - 2*pfun(eta))\n  double EPSILON = 2.221e-16;\n  double THRESH = 30.0; // , MTHRESH = -30.0; \n  // y = extreme value or exp(x)\n  VectorXb mask_extreme = (x.array().abs() > THRESH);\n  VectorXd y = mask_extreme.select(EPSILON, x.array().exp());\n  for(unsigned int i = 0; i < y.size(); i++) {\n    if(!mask_extreme(i)) {\n      y(i) = y(i) * (1.0 - y(i)) / pow(y(i) + 1.0, 3);\n    } else if(x[i] > THRESH) {\n      y(i) *= -1;\n    }\n  }\n  /* y.array() = (x.array() < MTHRESH) */\n  /*   select(EPSILON, (x.array() > THRESH). */\n  /*       select(-1*EPSILON, */ \n  /*         x.array().exp())); */\n  /* y.array() = (x.array() < MTHRESH). */\n  /*   select(x, (x.array() > THRESH). */\n  /*       select(x, */ \n  /*         (y.array() * (1 - y.array())) / (y.array() + 1).pow(3))); */\n  return(y);\n}\n\nEigen::MatrixXd dlog_matrix(const Eigen::MatrixXd & x)\n{\n  MatrixXd y(x.rows(), x.cols());\n  VectorXd ycol(x.rows());\n  for(unsigned int i = 0; i < x.cols(); i++) {\n    ycol = dlog_vector(x.col(i));\n    y.col(i).array() = ycol.array();\n  }\n  return(y);\n}\n\nMatrixXd orth_matrix(const Eigen::MatrixXd & X0, const MatrixXb & Mask)\n{\n\n  // check parameters\n  if(Mask.cols() != 1) {\n    throw std::runtime_error(\"Mask must have 1 column\");\n  }\n  if(X0.rows() != Mask.rows()) {\n    throw std::runtime_error(\"number of rows different for X0 and Mask\");\n  }\n\n  // parameters\n  double tol_eval = 1e-15; // default in Regenie\n                           \n  double Neff = Mask.array().cast<double>().sum();\n\n  // step 0. Copy \n  /* MatrixXd X = Mask.select(X0, 0.0); */\n  MatrixXd X = X0;\n  /* MatrixXd X(nr, nc) */\n  /* for(i = 0; i < nc; i++) { */\n  /*   X.col(i) = Mask.select(X0.col(i), 0.0); */\n  /* } */\n\n  /* // step 1. center (no intercept column in X) */\n  ArrayXd means = X.colwise().sum() / Neff;\n  X.array().rowwise() -= means.transpose();\n  /* ArrayXd means = Mask.select(0.0, X).colwise().sum() / Neff; */\n  /* X.array().rowwise() -= means.transpose(); */\n  // respect the missingness pattern (Mask)\n  /* for(i = 0; i < nc; i++) { */\n  /*   X.col(i) = Mask.select(X.col(i), 0.0); */\n  /* } */\n\n  /* // step 2. Orthogonalize using SVD */\n  MatrixXd XtX = X.transpose() * X;\n  SelfAdjointEigenSolver<MatrixXd> es(XtX);\n  VectorXd D = es.eigenvalues(); // eigenvalues sorted in increasing order\n  MatrixXd V = es.eigenvectors();\n\n  double max_eval = D.tail(1)(0);\n  unsigned int nonzero_eval = (D.array() > max_eval * tol_eval).count();\n  if(nonzero_eval == 0) {\n    throw std::runtime_error(\"no columns left after thr. EVD\");\n  }\n  ArrayXd sds = D.tail(nonzero_eval).array().sqrt();\n\n  X *= V.rightCols(nonzero_eval);\n  X.array().rowwise() /= sds.transpose();\n\n  return X;\n}\n\n// print sum_stats\nstd::string MultiPhen::print_sumstats( int const& isnp, uint32_t const& snp_index,\n    string const& test_string, string const& wgr_string, \n    variant_block* block_info, vector<snp> const& snpinfo, \n    struct param const* params)\n{\n  std::ostringstream buffer_header;\n  buffer_header << snpinfo[snp_index].chrom << \" \" << snpinfo[snp_index].physpos << \" \"\n    << snpinfo[snp_index].ID << \" \"\n    << snpinfo[snp_index].allele1 << \" \" << snpinfo[snp_index].allele2 << \" \"\n    << block_info->mac1 << \" \" << block_info->af1 << \" \" << block_info->ns1;\n  string header = buffer_header.str();\n\n  std::ostringstream buffer;\n  buffer << header; // write header to buffer\n\n  // output p-values\n  if(pval_test == -1.0) {\n    buffer << \" NA\";\n  } else {\n    buffer << \" \" << -log10(pval_test);\n  }\n \n  // model fits results\n  buffer << \" \" << (response == \"binom\" ? 0 : 1);\n  buffer << \" \" << it << \" \" << cnt_updates;\n\n  bool firth = (response == \"binom\" ? firth_binom : false);\n  buffer << \" \" << (firth ? 1 : 0);\n\n  buffer << endl;\n\n  return buffer.str();\n}\n\nstd::string MultiPhen::print_sum_stats_htp(const variant_block* block_info, struct param const* params)\n{\n  std::ostringstream buffer;\n\n  bool test_pass = (pval_test != -1.0);\n\n  // bhat & 95 CI\n  buffer << \"NA\\tNA\\tNA\\t\";\n  // Pvalue\n  if(test_pass) buffer << pval_test << \"\\t\";\n  else buffer << \"NA\\t\";\n\n  // print out AF, counts in cases, counts in controls\n  unsigned int ph = 0;\n  buffer << block_info->af(ph) << \"\\t\";\n  // N, N_Ref, N_Het, N_Alt\n  buffer << (int) block_info->genocounts.block(0,ph,3,1).sum() << \"\\t\" \n    << (int) block_info->genocounts(0,ph) << \"\\t\" \n    << (int) block_info->genocounts(1,ph) << \"\\t\" \n    << (int) block_info->genocounts(2,ph) << \"\\t\";\n  buffer << \"NA\\tNA\\tNA\\tNA\\t\";\n  /* double N_total = block_info->genocounts.block(0,ph,3,1).sum(), */\n  /*        N_ref = block_info->genocounts(0,ph), */\n  /*        N_het = block_info->genocounts(1,ph), */\n  /*        N_alt = block_info->genocounts(2,ph); */\n  /* double AAF = (N_het + 2*N_alt) / (2*N_total); */\n\n  // info column\n  if(test_pass) buffer << \"DF=\" << Ny;\n  else buffer << \"DF=0\";\n  buffer << \";NO_BETA\";\n  buffer << \";IT=\" << it << \";UP=\" << cnt_updates;\n  buffer << \";BIN=\" << (int)(response == \"binom\");\n  if(bhat_y.size() > 0) {\n    buffer << \";BHAT=\";\n    for(unsigned int i = 0; i < bhat_y.size(); i++) {\n      if(i) buffer << \",\" << bhat_y(i);\n      else buffer << bhat_y(i);\n    }\n  }\n\n  buffer << endl;\n\n  return buffer.str();\n}\n\n"
  },
  {
    "path": "src/Ordinal.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef ORDINAL_H\n#define ORDINAL_H\n\n//-------------------\n// Class FitOrdinal\n//-------------------\n\nclass FitOrdinal \n{\n  public:\n    unsigned int cnt_fit;\n    unsigned int verbose;\n    bool executed, converged;\n    bool trace;\n    unsigned int it, it2, maxit, maxit2, maxit3, cnt_updates;\n    bool strict;\n    double stop_value, tol;\n    double pseudo_stophalf;\n\n    bool check_step;\n    double max_step;\n\n    bool apply_start; // apply starting values?\n    bool store_offset; // store offset?\n    bool apply_offset; // apply offset?\n    Eigen::VectorXd yo; // offset vector (linear predictor Xb)\n    Eigen::VectorXd yo_int; // offset intercepts for Multinomial model\n    bool exclude_intercepts; // exclude intercepts, e.g., offset is given\n    bool exclude_intercepts_offset; // exclude intercepts when applying offset\n    Eigen::VectorXd bhat;\n    double loglik;\n\n    Eigen::VectorXd Score;\n    Eigen::MatrixXd Info;\n                   \n    std::string response; // response type = [binom, multinom]\n    std::string model; // model = [POM: Proportional Odds Model, ACL: Adjacent Category Logit]\n    std::string optim; // optimization algorithm = [FisherScoring, WeightHalving, Pseudo]\n    bool firth_binom, firth_multinom; // Firth correction\n    double firth_mult;\n\n    unsigned int N, Neff; // sample size\n    unsigned int Ncov, Nb; // number of covariates in X & number of covariates + all intercepts\n    unsigned int  Ncov0, Ncov1; // number of last covariates in X: the effects are fixed to zero\n    bool last0;\n                    \n    unsigned int ncat, ncat1, ncat1sq; // number of categories \n    Eigen::VectorXi Ncat;\n                              \n    Eigen::VectorXd b0;\n\n    // current results in model fitting\n    Eigen::MatrixXd cur_Info;\n    Eigen::VectorXd cur_Score, cur_v, cur_b;\n    double cur_loglik, cur_dev, prev_dev;\n\n    // intermediate matrices/vectors for model fitting\n    // common + multinomial\n    Eigen::VectorXd Xb0;\n    Eigen::MatrixXd Xb, exp_eta, gamma, PQ;\n    Eigen::MatrixXd P;\n    Eigen::VectorXd Psum, Pk;\n    Eigen::MatrixXd D, Q, V, S, QS, W;\n    Eigen::VectorXd WS2, WSS1; // column sums (dim 2) and sqrt of row sums (dim 1)\n    Eigen::MatrixXd XW, XWs, XW22;\n    Eigen::VectorXd XW1;\n    // binomial\n    Eigen::VectorXd mub, wb;\n    Eigen::MatrixXd XtW;\n    // firth\n    Eigen::VectorXd ystar;\n    Eigen::MatrixXd Ystar;\n\n    void setup_defaults();\n    void check_setup_model();\n    void check_setup_model_common();\n    void check_setup_data();\n    void check_setup_data_common();\n    void check_setup_data_binom();\n    void check_setup_data_multinom();\n    void setup_ncov0(unsigned int , bool, bool );\n\n    void setup_restart(const Eigen::VectorXd &);\n    void setup_offset_binom(const Eigen::VectorXd &, bool);\n    void setup_offset_multinom_pom(const Eigen::VectorXd & , const Eigen::VectorXd & );\n\n    // optimization functions\n    bool stop_criterion();\n    bool update_par(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::VectorXd & , bool pseudo = false);\n    bool optimize(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & X);\n    bool optimize_FisherScoring(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & X);\n    bool optimize_FisherScoringPseudo(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & X);\n    bool optimize_WeightHalving(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & X);\n    bool optimize_WeightHalvingPseudo(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & X);\n    void update_fit();\n\n    // multinom (POM only)\n    void fit_multinom_pom(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & );\n    void setup_start_multinom();\n    void setup_par_multinom();\n    bool update_par_multinom(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::VectorXd & , bool pseudo = false);\n    double loglik_multinom(const VectorXb &, const MatrixXb &);\n    double loglik_multinom_firth(const VectorXb &, const MatrixXb &, const Eigen::LLT<Eigen::MatrixXd> &, bool add = false, double base_loglik = 0.0);\n  /* cur_loglik = firth_multinom ? loglik_multinom_firth(Mask, Y, cur_Info) : loglik_multinom(Mask, Y); */\n  /* cur_loglik =  loglik_multinom(Mask, Y); */\n    double loglik_multinom_firth(const VectorXb &, const MatrixXb &, const Eigen::MatrixXd &);\n    /* void update_fit_multinom(FitOrdinal & ); */\n\n    // binom (logistic regression)\n    void fit_binom(const VectorXb &, const MatrixXb &, const Eigen::Ref<const Eigen::MatrixXd> &);\n    void setup_start_binom();\n    void setup_par_binom();\n    bool update_par_binom(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::VectorXd & );\n    bool update_par_binom_firth(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::VectorXd & );\n    bool update_par_binom_pseudo(const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::VectorXd & );\n    double loglik_binom(const VectorXb &, const MatrixXb &);\n    double loglik_binom_firth(const VectorXb &, const MatrixXb &, const Eigen::LLT<Eigen::MatrixXd> &);\n\n    void setup_xy(const Eigen::VectorXi &, const Eigen::MatrixXd& );\n\n    FitOrdinal();\n    ~FitOrdinal() { };\n};\n\n//-------------------------------------------------------------\n// Class Ordinal\n//-------------------------------------------------------------\nclass Ordinal\n{\n  public:\n\n    std::string response; // response type = [binom, multinom]\n    std::string model; // model = [POM: Proportional Odds Model, ACL: Adjacent Category Logit]\n    std::string optim; // optimization algorithm = [FisherScoring, WeightHalving]\n    bool firth_binom, firth_multinom; // Firth correction\n\n    unsigned int it, it2, maxit, maxit2, maxit3;\n    bool strict;\n    double tol;\n    double pseudo_stophalf;\n    bool converged;\n\n    bool check_step;\n    double max_step;\n                \n    bool inc_intercept; // is the interecept term in X included or not\n    bool preproc_cov; // center/orth. covariates X?\n\n    unsigned int N, Neff; // sample size\n    VectorXb Mask; // masked samples = samples with missing values \n    unsigned int Ncov, Nb; // number of covariates in X & number of covariates + all intercepts\n                    \n    std::set<int> cat; // ordered (!) set of category levels\n    unsigned int ncat, ncat1, ncat1sq; // number of categories \n    Eigen::VectorXi Ncat;\n                              \n    MatrixXb Y; // N x ncat matrix: Y[i,j] = sample i is in j category\n    Eigen::MatrixXd Xcov; // X = U S V'\n    Eigen::MatrixXd Xcov1; // [Intercept, X] = [1s, X]\n    Eigen::VectorXd b0;\n\n    // intermediate matrices/vectors for model fitting\n    // common + multinomial\n    Eigen::VectorXd Xb0;\n    Eigen::MatrixXd Xb, exp_eta, gamma, PQ;\n    Eigen::MatrixXd P;\n    Eigen::VectorXd Psum, Pk;\n    Eigen::MatrixXd D, Q, V, S, QS, W;\n    Eigen::VectorXd WS2, WSS1; // column sums (dim 2) and sqrt of row sums (dim 1)\n    Eigen::MatrixXd XW, XWs, XW22;\n    Eigen::VectorXd XW1;\n\n    // intermediate matrices/vectors for model fitting\n    // binomial\n    Eigen::VectorXd yb;\n    Eigen::VectorXd mub, wb;\n    Eigen::MatrixXd XtW;\n\n    Eigen::MatrixXd cur_Info;\n    Eigen::VectorXd cur_Score, cur_v, cur_b;\n    double cur_loglik, cur_dev, prev_dev;\n\n    void setup_defaults();\n    FitOrdinal setup_fit();\n    void setup_xy(const Eigen::VectorXi &, const Eigen::MatrixXd& );\n\n    FitOrdinal fit(const Eigen::VectorXi &y, const Eigen::MatrixXd& X, \n      unsigned int Ncov0 = 0, bool last0 = true);\n    void update_par(const Eigen::VectorXd &, const Eigen::MatrixXd &);\n    void update_fit_common(FitOrdinal & );\n    bool stop_criterion();\n    bool optimize(const Eigen::MatrixXd &);\n    bool optimize_FisherScoring(const Eigen::MatrixXd &);\n    bool optimize_WeightHalving(const Eigen::MatrixXd &);\n\n    // multinom (POM only)\n    void fit_multinom_pom(const Eigen::MatrixXd& , FitOrdinal & );\n    void setup_start_multinom();\n    void setup_par_multinom();\n    void update_par_multinom(const Eigen::VectorXd &, const Eigen::MatrixXd &);\n    double loglik_multinom();\n    void update_fit_multinom(FitOrdinal & );\n\n    // binom (logistic regression)\n    void fit_binom(const Eigen::MatrixXd& , FitOrdinal & );\n    void setup_start_binom();\n    void setup_par_binom();\n    void update_par_binom(const Eigen::VectorXd &, const Eigen::MatrixXd &);\n    void update_par_binom_firth(const Eigen::VectorXd &, const Eigen::MatrixXd &);\n    double loglik_binom();\n    void update_fit_binom(FitOrdinal & );\n\n    double test_score(const FitOrdinal &, const Eigen::MatrixXd &, const Eigen::MatrixXd &);\n    double test_score(const FitOrdinal &, const Eigen::MatrixXd &);\n    double test_score_multinom_pom(const FitOrdinal &, const Eigen::MatrixXd &, const Eigen::MatrixXd &);\n    double test_score_binom(const FitOrdinal &, const Eigen::MatrixXd &, const Eigen::MatrixXd &);\n\n    Ordinal();\n    ~Ordinal() { };\n};\n\n//-------------------\n// Class MultiPhen\n//-------------------\n\nclass MultiPhen \n{\n  public:\n    unsigned int cnt_fit; // fit index/counter\n    unsigned int verbose; // verbose level\n    std::string response; // response type = [binom, multinom]\n    std::string model; // model = [POM: Proportional Odds Model, ACL: Adjacent Category Logit]\n    std::string optim; // optimization algorithm = [FisherScoring, WeightHalving]\n    bool firth_binom, firth_multinom; // Firth correction\n    double firth_mult; \n    bool reuse_start, reset_start;\n    bool approx_offset;\n    int mac_approx_offset;\n    Eigen::VectorXd b0;\n    Eigen::VectorXd yo;\n    Eigen::VectorXd yo_int; \n    Eigen::VectorXd w0;\n    Eigen::MatrixXd Yres0;\n\n    unsigned int maxit, maxit2, maxit3; // maximum number of interations in Outer/Inner loops of the IRLS\n    bool strict;\n    double tol;\n    double pseudo_stophalf;\n    bool check_step;\n    double max_step;\n    std::string offset_mode;\n\n    bool trace; // trace updates\n    unsigned int it, cnt_updates;\n                          \n    unsigned int N, Neff; // sample size\n    VectorXb Mask; // masked samples = samples with missing values \n    unsigned int Nx, Nx1, Ny, Ny1, Ny21; // columns in XY matrix\n    /* unsigned int Ncov, Nb; // number of covariates in X & number of covariates + all intercepts */\n    /* unsigned int  Ncov0, Ncov1; // number of last covariates in X: the effects are fixed to zero */\n                    \n    bool pos_intercept_first, pos_phen_first;\n\n    unsigned int ncat, ncat1, ncat1sq; // number of categories \n    Eigen::VectorXi Ncat;\n    int Ncat_minor;\n\n    MatrixXb Ym; // N x ncat matrix: Y[i,j] = sample i is in j category. Response for multinomial model.\n    Eigen::VectorXd yb;\n\n    bool set_x, set_y;\n\n    bool executed, converged;\n    double pval_test;\n    double pval_thr;\n    std::string test; // test type = [none, nocov_score, nocov_addcov]\n    Eigen::VectorXd bhat_y;\n\n    void setup_x(const VectorXb& , const Eigen::MatrixXd& , unsigned int , unsigned int , bool , bool );\n    void setup_y(const Eigen::VectorXd & );\n    void setup_approx_offset();\n    void reset_model();\n\n    FitOrdinal setup_fit(bool , bool , bool use_offset = false);\n    FitOrdinal fit(const Eigen::Ref<const Eigen::MatrixXd> & , bool , bool , bool use_res = false);\n\n    void run(const Eigen::VectorXd & , const Eigen::MatrixXd & , unsigned int , unsigned int );\n    void run_test_offset(const Eigen::Ref<const Eigen::MatrixXd> &);\n    void run_test_qt(const Eigen::Ref<const Eigen::MatrixXd> &);\n    void run_test_score(const Eigen::Ref<const Eigen::MatrixXd> & , bool );\n    void run_test_lrt(const Eigen::Ref<const Eigen::MatrixXd> & , bool );\n    void run_test_addcov(const Eigen::Ref<const Eigen::MatrixXd> & );\n    void run_test_add_offset(const Eigen::Ref<const Eigen::MatrixXd> & );\n\n    void run0(const Eigen::VectorXi & , const Eigen::MatrixXd& , const Eigen::MatrixXd& , bool );\n\n    void test0(const Eigen::VectorXi & g, const Eigen::MatrixXd& X, const Eigen::MatrixXd& Y,\n      bool firth_binom = false,\n      std::string optim = \"WeightHalving\", double tol = 1e-4,\n      unsigned int maxit = 100, bool check_step = true, double max_step = 10.0);\n    void test_addcov(const Eigen::VectorXi & g, const Eigen::MatrixXd& X, const Eigen::MatrixXd& Y,\n      bool firth_binom = false,\n      std::string optim = \"WeightHalving\", double tol = 1e-4,\n      unsigned int maxit = 100, bool check_step = true, double max_step = 10.0);\n\n    // score test\n    double test_score(const FitOrdinal & , const VectorXb & , const MatrixXb & , const Eigen::VectorXd & , const Eigen::Ref<const Eigen::MatrixXd> & , bool );\n    double test_score_binom(const FitOrdinal & , const VectorXb & , const Eigen::VectorXd & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::Ref<const Eigen::MatrixXd> & );\n    double test_score_multinom_pom(const FitOrdinal & , const VectorXb & , const MatrixXb & , const Eigen::Ref<const Eigen::MatrixXd> & , const Eigen::Ref<const Eigen::MatrixXd> & );\n\n    std::string print_sumstats(int const& snp_index, uint32_t const&, \n        std::string const& test_string, std::string const& wgr_string, variant_block* block_info, \n        std::vector<snp> const& snpinfo, struct param const* params);\n    std::string print_sum_stats_htp(const variant_block* , struct param const* );\n\n    void setup_defaults();\n    MultiPhen();\n    MultiPhen(std::string );\n    MultiPhen(unsigned int );\n    ~MultiPhen() { };\n};\n \n#endif\n"
  },
  {
    "path": "src/Pheno.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include <unordered_set>\n#if defined(__GNUC__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wint-in-bool-context\"\n#endif\n#include \"Regenie.hpp\"\n#if defined(__GNUC__)\n#pragma GCC diagnostic pop\n#endif\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"Pheno.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\nusing boost::math::normal;\n\n\nvoid read_pheno_and_cov(struct in_files* files, struct param* params, struct filter* filters, struct phenodt* pheno_data, struct ests* m_ests, struct geno_block* gblock, mstream& sout) {\n\n  ArrayXb ind_in_pheno_and_geno = ArrayXb::Constant( params->n_samples, false );\n  ArrayXb ind_in_cov_and_geno = ArrayXb::Constant( params->n_samples, files->cov_file.empty());\n\n  if(params->getCorMat){ // intitiate values for phenotype info\n\n    params->n_pheno = 1;\n    ind_in_pheno_and_geno = ArrayXb::Constant( params->n_samples, true );\n    pheno_data->phenotypes = MatrixXd::Constant( params->n_samples, 1, 1);\n    pheno_data->masked_indivs = MatrixXb::Constant( params->n_samples, 1, true);\n    pheno_data->Neff = pheno_data->masked_indivs.cast<double>().colwise().sum();\n\n  } else { // read in phenotype (mean-impute for QT)\n\n    if(params->transposedPheno)\n      tpheno_read(params, files, filters, pheno_data, ind_in_pheno_and_geno, sout);\n    else\n      pheno_read(params, files, filters, pheno_data, ind_in_pheno_and_geno, sout);\n\n    if(params->trait_mode && !params->test_mode)\n      m_ests->offset_nullreg = MatrixXd::Zero(params->n_samples, params->n_pheno); \n\n  }\n\n  // used for step 2 if using firth and it failed\n  set_pheno_pass(files, params);\n\n  // Intercept\n  pheno_data->new_cov = MatrixXd::Ones(params->n_samples, 1);\n  if(params->print_cov_betas) params->covar_names.push_back(\"Intercept\");\n\n  // read in covariates\n  if(!files->cov_file.empty()) covariate_read(params, files, filters, pheno_data, ind_in_cov_and_geno, sout);\n  if(params->condition_snps)\n      extract_condition_snps(params, files, filters, pheno_data, gblock, ind_in_cov_and_geno, sout);\n  if(params->w_interaction){\n    if(params->interaction_snp) // if doing GxG interaction\n      extract_interaction_snp(params, files, filters, pheno_data, gblock, ind_in_cov_and_geno, sout);\n    else if(params->interaction_prs) // if doing GxPRS interaction\n      extract_interaction_prs(params, files, filters, pheno_data, ind_in_cov_and_geno, sout);\n    if(params->gwas_condtl){ // append to new_cov\n      pheno_data->new_cov.conservativeResize(pheno_data->new_cov.rows(), pheno_data->new_cov.cols() + pheno_data->interaction_cov.cols());\n      pheno_data->new_cov.rightCols(pheno_data->interaction_cov.cols()) = pheno_data->interaction_cov;\n      params->n_cov = pheno_data->new_cov.cols() - 1;// ignore intercept\n    }\n  }\n  //cerr << endl<<pheno_data->new_cov.topRows(5) << endl;\n  //if(params->w_interaction) cerr << endl<<pheno_data->interaction_cov.topRows(5)<< endl;\n\n  // mask individuals \n  filters->ind_in_analysis = ind_in_pheno_and_geno && ind_in_cov_and_geno;\n  setMasks(params, filters, pheno_data, sout);\n  sout << \" * number of individuals used in analysis = \" << params->n_analyzed << endl;\n\n  // check P vs N\n  if(pheno_data->new_cov.cols() >= params->n_samples)\n    throw \"Number of covariates is greater than sample size!\";\n\n  if(!params->getCorMat) {\n\n    // apply rint\n    if(params->rint) {\n      sout << \"   -applying RINT to all phenotypes\\n\";\n      apply_rint(pheno_data, params);\n    }\n    \n    // compute skewness of traits only if MCC test is activated\n    if(params->mcc_test) {\n      sout << \"   -computing phenotypic skewness: \";\n      compute_skew(pheno_data, params);\n      if(params->mcc_skew == 0.0) {\n        pheno_data->mcc_Y = ArrayXb::Constant(params->n_pheno, true);\n      } else {\n        pheno_data->mcc_Y = (pheno_data->skew_Y.abs() > params->mcc_skew); \n        // disable mcc_test if 0 phenotypes need MCC (only relevant when mcc_skew > 0)\n        if(!pheno_data->mcc_Y.any()) {\n          params->mcc_test = false;\n        }\n      }\n      sout << pheno_data->mcc_Y.cast<int>().sum() << \" phenotypes will use the MCC test\\n\";\n    }\n\n    // impute missing\n    pheno_impute_miss(pheno_data, filters->ind_in_analysis, files, params);\n\n    // print case-control counts per trait\n    if(params->trait_mode==1)\n      print_cc_info(params, files, pheno_data, filters->case_control_indices, sout);\n    else if (params->trait_mode==3)\n      print_cox_info(params, files, pheno_data, filters->case_control_indices, sout);\n    else\n      print_info(params, files, pheno_data, filters->case_control_indices, sout);\n\n  }\n\n}\n\nvoid pheno_read(struct param* params, struct in_files* files, struct filter* filters, struct phenodt* pheno_data, Ref<ArrayXb> ind_in_pheno_and_geno, mstream& sout) {\n\n  uint32_t indiv_index;\n  bool all_miss;\n  string line;\n  std::vector< string > tmp_str_vec;\n  ArrayXb keep_cols;\n  findID person;\n  Files fClass;\n\n  sout << left << std::setw(20) << \" * phenotypes\" << \": [\" << files->pheno_file << \"] \";\n  fClass.openForRead(files->pheno_file, sout);\n  fClass.readLine(line);\n  removeCarriageReturn(line); // remove carriage returns at the end of line if any\n\n  // check that FID and IID are first two entries in header\n  tmp_str_vec = string_split(line,\"\\t \");\n  if( tmp_str_vec.size() < 2 ) \n    throw \"header of phenotype file has too few columns.\";\n  else if( (tmp_str_vec[0] != \"FID\") || (tmp_str_vec[1] != \"IID\") ) \n    throw \"header of phenotype file must start with: FID IID.\";\n\n  // check pheno with preds \n  if(params->test_mode && !params->getCorMat) check_blup(files, params, sout);\n\n  // get phenotype names \n  keep_cols = ArrayXb::Constant(tmp_str_vec.size() - 2, true);\n  for(int i = 0; i < keep_cols.size(); i++ ) {\n    if(params->select_phenos_rm) // check if should skip phenotypes\n      keep_cols(i) = !in_map(tmp_str_vec[i+2], filters->pheno_colRm_names);\n    if(!keep_cols(i)) continue;\n    if(params->select_phenos) // check if keeping pheno\n      keep_cols(i) = in_map(tmp_str_vec[i+2], filters->pheno_colKeep_names);\n    if(params->test_mode && !params->skip_blups && keep_cols(i) && params->trait_mode != 3){ // check phenotype had prs from step 1\n      keep_cols(i) = has_blup(tmp_str_vec[i+2], files->blup_files, params, sout);\n    }\n\n    if(keep_cols(i)) files->pheno_names.push_back( tmp_str_vec[i+2] );\n  }\n  params->n_pheno = keep_cols.count();\n\n  // check #pheno\n  if(params->trait_mode == 3){\n    if(params->n_pheno/2 < 1) throw \"need at least one phenotype.\";\n    sout << \"n_pheno = \" << params->n_pheno/2 << endl;\n  }else{\n    if(params->n_pheno < 1) throw \"need at least one phenotype.\";\n    sout << \"n_pheno = \" << params->n_pheno << endl;\n  }\n\n  params->strict_mode |= (params->n_pheno == 1); // drop all missing observations\n\n  // how missingness is handles\n  if( params->strict_mode ) sout << \"   -dropping observations with missing values at any of the phenotypes\" << endl;\n  else if( !params->rm_missing_qt  && (params->trait_mode==0)) sout << \"   -keeping and mean-imputing missing observations (done for each trait)\" << endl;\n\n  // allocate memory\n  pheno_data->phenotypes = MatrixXd::Zero(params->n_samples, params->n_pheno);\n  pheno_data->masked_indivs = MatrixXb::Constant(params->n_samples, params->n_pheno, true);\n  if(params->trait_mode)  \n    pheno_data->phenotypes_raw = MatrixXd::Zero(params->n_samples, params->n_pheno);\n  if(params->trait_mode == 3)\n    pheno_data->cox_max_tau.resize(params->n_pheno);\n\n  // read in data\n  while( fClass.readLine(line) ){\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( (int)tmp_str_vec.size() != (2+keep_cols.size()) )\n      throw \"incorrectly formatted phenotype file.\";\n\n    person = getIndivIndex(tmp_str_vec[0], tmp_str_vec[1], params, sout);\n    if(!person.is_found) continue;\n\n    indiv_index = person.index;\n\n    // check duplicate\n    if( !ind_in_pheno_and_geno(indiv_index) ){\n      ind_in_pheno_and_geno( indiv_index ) = true;\n    } else \n      throw \"individual appears more than once in phenotype file: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1] ;\n\n    // read phenotypes \n    all_miss = true;\n    if (params->trait_mode == 3) {\n      for (const auto& entry: files->t2e_map) {\n        const std::string& time_name = entry.first;\n        const std::string& event_name = entry.second;\n        // find time column index\n        std::vector<std::string>::iterator it_time = std::find(files->pheno_names.begin(), files->pheno_names.end(), time_name);\n        int time_index = std::distance(files->pheno_names.begin(), it_time);\n        // find event column index\n        std::vector<std::string>::iterator it_event = std::find(files->pheno_names.begin(), files->pheno_names.end(), event_name);\n        int event_index = std::distance(files->pheno_names.begin(), it_event);\n\n        int countTrue = 0;\n        int time_ph_index = -1;\n        int event_ph_index = -1;\n        for (int i = 0; i < keep_cols.size(); ++i) {\n          if (keep_cols(i)) {\n            if (countTrue == time_index) {\n              time_ph_index = i;\n            } else if (countTrue == event_index) {\n              event_ph_index = i;\n            }\n            countTrue++;\n          }\n          if (time_ph_index != -1 && event_ph_index != -1) {\n            break;\n          }\n        }\n\n        pheno_data->phenotypes(indiv_index, time_index) = convertDouble(tmp_str_vec[2+time_ph_index], params, sout);\n        pheno_data->phenotypes_raw(indiv_index, time_index) = pheno_data->phenotypes(indiv_index, time_index);\n\n        pheno_data->phenotypes(indiv_index, event_index) = convertDouble(tmp_str_vec[2+event_ph_index], params, sout);\n        if(!params->CC_ZeroOne && (pheno_data->phenotypes(indiv_index, event_index) != params->missing_value_double)) \n            pheno_data->phenotypes(indiv_index, event_index) -= 1; // if using 1/2/NA encoding for BTs\n\n        pheno_data->phenotypes_raw(indiv_index, event_index) = pheno_data->phenotypes(indiv_index, event_index);\n\n        if ((pheno_data->phenotypes_raw(indiv_index, time_index) < 0) && (pheno_data->phenotypes_raw(indiv_index, time_index) != params->missing_value_double)) {\n          throw \"a phenotype time value is <0 for individual: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1] + \" Y=\" + tmp_str_vec[2+time_index];\n        } else if ((pheno_data->phenotypes_raw(indiv_index, event_index) != 0) && (pheno_data->phenotypes_raw(indiv_index, event_index) != 1) && (pheno_data->phenotypes_raw(indiv_index, event_index) != params->missing_value_double)) {\n          throw \"a phenotype censor value is invalid for individual: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1] + \" Y=\" + tmp_str_vec[2+event_index];\n        } else if ((pheno_data->phenotypes_raw(indiv_index, time_index) != params->missing_value_double) && (pheno_data->phenotypes_raw(indiv_index, event_index) == params->missing_value_double)) {\n          throw \"a phenotype has missing censor with non-missing time for individual: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1];\n        } else if ((pheno_data->phenotypes_raw(indiv_index, time_index) == params->missing_value_double)) {\n          pheno_data->masked_indivs(indiv_index, time_index) = false;\n          pheno_data->masked_indivs(indiv_index, event_index) = false;\n          pheno_data->phenotypes_raw(indiv_index, event_index) = params->missing_value_double;\n        }\n\n        if (pheno_data->phenotypes_raw(indiv_index, time_index) != params->missing_value_double) {\n          all_miss = false;\n        }\n      }\n    } else {\n      for(int j = 0, i_pheno = 0; j < keep_cols.size(); j++) {\n\n        if( !keep_cols(j) ) continue;\n\n        pheno_data->phenotypes(indiv_index, i_pheno) = convertDouble(tmp_str_vec[2+j], params, sout);\n\n        // for non-QT, save raw data\n        if (params->trait_mode) {\n\n          if((params->trait_mode==1) && !params->CC_ZeroOne && (pheno_data->phenotypes(indiv_index, i_pheno) != params->missing_value_double)) \n            pheno_data->phenotypes(indiv_index, i_pheno) -= 1; // if using 1/2/NA encoding for BTs\n\n          pheno_data->phenotypes_raw(indiv_index, i_pheno) = pheno_data->phenotypes(indiv_index, i_pheno);\n\n          // for BTs check 0/1/NA values\n          if( (params->trait_mode==1) && (pheno_data->phenotypes_raw(indiv_index, i_pheno)!= 0) && \n            (pheno_data->phenotypes_raw(indiv_index, i_pheno)!= 1) ) {\n\n            if(params->within_sample_l0)\n              throw \"no missing value allowed in phenotype file with option -within\";\n            else if( pheno_data->phenotypes_raw(indiv_index, i_pheno) != params->missing_value_double ) {\n              std::string msg = (params->CC_ZeroOne ? \"0/1/NA\" : \"1/2/NA\");\n              throw \"a phenotype value is not \" + msg + \" for individual: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1] + \" Y=\" + tmp_str_vec[2+j];\n            }\n\n            pheno_data->masked_indivs(indiv_index, i_pheno) = false;\n\n          } else if( (params->trait_mode==2) && (pheno_data->phenotypes_raw(indiv_index, i_pheno)<0) ) { // CT check non-neg\n\n            if(params->within_sample_l0)\n              throw \"no missing value allowed in phenotype file with option -within\";\n            else if( pheno_data->phenotypes_raw(indiv_index, i_pheno) != params->missing_value_double ) {\n              throw \"a phenotype value is <0 for individual: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1] + \" Y=\" + tmp_str_vec[2+j];\n            }\n            pheno_data->masked_indivs(indiv_index, i_pheno) = false;\n          }\n        }\n\n        if( pheno_data->phenotypes(indiv_index, i_pheno) != params->missing_value_double )\n          all_miss = false;\n        else {\n          if( params->test_mode && params->rm_missing_qt ) pheno_data->masked_indivs(indiv_index, i_pheno) = false;\n          if( params->strict_mode ) {\n            pheno_data->masked_indivs.row(indiv_index) = MatrixXb::Constant(1, params->n_pheno, false);\n            all_miss = true;\n            break; // skip rest of the row\n          }\n        }\n\n        i_pheno++;\n      }\n    }\n    if( all_miss ) ind_in_pheno_and_geno( indiv_index ) = false; // if individual has no phenotype data at all\n  }\n\n  // mask individuals in genotype data but not in phenotype data\n  pheno_data->masked_indivs.array().colwise() *= ind_in_pheno_and_geno;\n\n  // check if all individuals have missing/invalid phenotype\n  int mInd;\n  ArrayXi nobs_per_trait = pheno_data->masked_indivs.array().colwise().count().cast<int>();\n  if((nobs_per_trait == 0).all())\n    throw \"all individuals have missing/invalid values for all traits.\" ;\n  if(nobs_per_trait.minCoeff(&mInd) == 0)\n    throw \"all individuals have missing/invalid values for phenotype '\" + files->pheno_names[mInd] + \"'.\" ;\n\n  // ignore traits with fewer than the specified minimum case count\n  if(params->trait_mode==1)\n    rm_phenoCols(ind_in_pheno_and_geno, files, params, pheno_data, sout); \n\n  // number of phenotyped individuals \n  sout <<  \"   -number of phenotyped individuals \" <<\n   (params->strict_mode ? \"with no missing data\" : \"\" ) << \n   \" = \" << ind_in_pheno_and_geno.count() << endl;\n\n  fClass.closeFile();\n\n}\n\n// in transposed format\nvoid tpheno_read(struct param* params, struct in_files* files, struct filter* filters, struct phenodt* pheno_data, Ref<ArrayXb> ind_in_pheno_and_geno, mstream& sout) {\n\n  if (params->trait_mode==3) \n    throw \"Option --tpheno-file is not supported with Time-to-Event traits\";\n  \n  uint32_t nid;\n  string line, yname;\n  std::vector< string > header, tmp_str_vec;\n  map<int,uint32_t> indiv_index;\n  map<int,uint32_t>::iterator itr;\n  Files fClass;\n\n  sout << left << std::setw(20) << \" * phenotypes\" << \": [\" << files->pheno_file << \"] \";\n  fClass.openForRead(files->pheno_file, sout);\n  fClass.readLine(line);\n  removeCarriageReturn(line); // remove carriage returns at the end of line if any\n  header = string_split(line,\"\\t \");\n\n  // parse first line\n  size_t ncols_file = header.size();\n  for(size_t i=0; i < ncols_file; i++ ){\n    if(in_map((int)(i+1), filters->tpheno_colrm)) continue;\n    else if((i+1) == filters->tpheno_indexCol) continue;\n    else { // get index of individuals in genotype file\n\n      if(params->tpheno_iid_only) // assume FID=IID\n        line = header[i] + \"_\" + header[i];\n      else\n        line = header[i];\n\n      if (!in_map(line, params->FID_IID_to_ind)) continue;\n      nid = params->FID_IID_to_ind[line];\n      // check duplicate\n      if( !ind_in_pheno_and_geno(nid) ){\n        ind_in_pheno_and_geno( nid ) = true;\n      } else \n        throw \"individual appears more than once in phenotype file: ID=\" + header[i];\n\n      indiv_index[i] = nid;\n    }\n\n  }\n\n  // check sample size\n  if(indiv_index.size() == 0)\n    throw \"no individuals in phenotype file have genetic data.\";\n\n  // check pheno with preds \n  if(params->test_mode && !params->getCorMat) check_blup(files, params, sout);\n\n  params->n_pheno = 0;\n  int icol = 0;\n  // for each trait\n  while( fClass.readLine(line) ){\n\n    tmp_str_vec = string_split(line,\"\\t \");\n    if( tmp_str_vec.size() != ncols_file )\n      throw \"incorrectly formatted phenotype file.\";\n\n    // check trait name\n    yname = tmp_str_vec[ filters->tpheno_indexCol - 1 ]; \n    if(params->select_phenos_rm && in_map(yname, filters->pheno_colRm_names)) continue;\n    if(params->select_phenos && !in_map(yname, filters->pheno_colKeep_names)) continue;\n    if(params->test_mode && !params->getCorMat && !has_blup(yname, files->blup_files, params, sout)) continue;\n    files->pheno_names.push_back( yname );\n    //cerr << params->n_pheno << \" \" << yname << endl;\n\n    // resize matrices\n    params->n_pheno++;\n    pheno_data->phenotypes.conservativeResize(params->n_samples, params->n_pheno);\n    pheno_data->phenotypes.rightCols(1).array() = 0;\n    pheno_data->masked_indivs.conservativeResize(params->n_samples, params->n_pheno);\n    pheno_data->masked_indivs.rightCols(1).array() = true;\n    if(params->trait_mode) {\n      pheno_data->phenotypes_raw.conservativeResize(params->n_samples, params->n_pheno);\n      pheno_data->phenotypes_raw.rightCols(1).array() = 0;\n    }\n\n    // read in phenotype data\n    for (itr = indiv_index.begin(); itr != indiv_index.end(); ++itr) {\n\n      nid = itr->second;\n      pheno_data->phenotypes(nid, icol) = convertDouble(tmp_str_vec[itr->first], params, sout);\n      \n      if (params->trait_mode) { // for nonQT, save raw data\n\n        if((params->trait_mode==1) && !params->CC_ZeroOne && (pheno_data->phenotypes(nid, icol) != params->missing_value_double)) \n          pheno_data->phenotypes(nid, icol) -= 1; // if using 1/2/NA encoding for BTs\n\n        pheno_data->phenotypes_raw(nid, icol) = pheno_data->phenotypes(nid, icol);\n\n        if((params->trait_mode==1) &&  (pheno_data->phenotypes_raw(nid, icol)!= 0) && \n            (pheno_data->phenotypes_raw(nid, icol)!= 1) ) { // force 0/1/NA for BTs\n\n          if(params->within_sample_l0)\n            throw \"no missing value allowed in phenotype file with option -within\";\n          else if( pheno_data->phenotypes_raw(nid, icol) != params->missing_value_double ){\n            std::string msg = (params->CC_ZeroOne ? \"0/1/NA\" : \"1/2/NA\");\n            throw \"a phenotype value is not \" + msg + \" for individual: ID=\" + header[itr->first] + \" Y=\" + tmp_str_vec[itr->first];\n          }\n\n          pheno_data->masked_indivs(nid, icol) = false;\n        } else if((params->trait_mode==2) && (pheno_data->phenotypes_raw(nid, icol)<0) ) { // force non-neg for CTs\n\n          if(params->within_sample_l0)\n            throw \"no missing value allowed in phenotype file with option -within\";\n          else if( pheno_data->phenotypes_raw(nid, icol) != params->missing_value_double ){\n            throw \"a phenotype value is <0 for individual: ID=\" + header[itr->first] + \" Y=\" + tmp_str_vec[itr->first];\n          }\n\n          pheno_data->masked_indivs(nid, icol) = false;\n        }\n      }\n\n      if(params->test_mode && params->rm_missing_qt && (pheno_data->phenotypes(nid, icol) == params->missing_value_double) ) \n        pheno_data->masked_indivs(nid, icol) = false;\n\n    }\n    icol++;\n  }\n\n  // check #pheno\n  if(params->n_pheno < 1)\n    throw \"need at least one phenotype.\";\n\n  sout << \"n_pheno = \" << params->n_pheno << endl;\n\n  params->strict_mode |= (params->n_pheno == 1); // drop all missing observations\n  // how missingness is handled\n  if( params->strict_mode ) {\n    sout << \"   -dropping observations with missing values at any of the phenotypes\" << endl;\n    pheno_data->masked_indivs.array().colwise() *= pheno_data->masked_indivs.array().rowwise().all();\n  } else if( !params->rm_missing_qt  && (params->trait_mode==0)) \n    sout << \"   -keeping and mean-imputing missing observations (done for each trait)\" << endl;\n  \n  ind_in_pheno_and_geno = pheno_data->masked_indivs.array().rowwise().any(); // if individual has no phenotype data at all\n  // mask individuals in genotype data but not in phenotype data\n  pheno_data->masked_indivs.array().colwise() *= ind_in_pheno_and_geno;\n\n\n  // check if all individuals have missing/invalid phenotype\n  int mInd;\n  ArrayXi nobs_per_trait = pheno_data->masked_indivs.array().colwise().count().cast<int>();\n  if((nobs_per_trait == 0).all())\n    throw \"all individuals have missing/invalid values for all traits.\" ;\n  if(nobs_per_trait.minCoeff(&mInd) == 0)\n    throw \"all individuals have missing/invalid values for phenotype '\" + files->pheno_names[mInd] + \"'.\" ;\n\n  // ignore traits with fewer than the specified minimum case count\n  if(params->trait_mode==1)\n    rm_phenoCols(ind_in_pheno_and_geno, files, params, pheno_data, sout); \n\n  // number of phenotyped individuals \n  sout <<  \"   -number of phenotyped individuals \" <<\n   (params->strict_mode ? \"with no missing data\" : \"\" ) << \n   \" = \" << ind_in_pheno_and_geno.count() << endl;\n\n  fClass.closeFile();\n\n}\n\n// remove phenotypes with low case counts\nvoid rm_phenoCols(Ref<ArrayXb> sample_keep, struct in_files* files, struct param* params, struct phenodt* pheno_data, mstream& sout) { \n\n  ArrayXb colrm = (pheno_data->phenotypes_raw.array() == 1).colwise().count() < params->mcc;\n  int npass = (!colrm).count(); \n  //sout << npass << endl;\n\n  if(npass == 0) \n    throw \"all phenotypes have less than \" + to_string( params->mcc ) +  \" cases.\";\n  else if( colrm.count() == 0 ) \n    return;\n\n  std::vector< string > tmp_str_vec;\n  MatrixXd ynew (params->n_samples, npass);\n  MatrixXb mnew (params->n_samples, npass);\n\n  sout << \"   -removing phenotypes with fewer than \" << params->mcc << \" cases\\n\";\n\n  for(int i = 0, j = 0; i < params->n_pheno; i++ ) {\n\n    if(colrm(i)) {\n      sout << \"    +WARNING: Phenotype '\" << files->pheno_names[i] << \"' has too few cases so it will be ignored.\\n\";\n      continue;\n    } \n\n    //cerr << j+1 << \":\" << files->pheno_names[i] << endl;\n    tmp_str_vec.push_back( files->pheno_names[i] );\n    ynew.col(j) = pheno_data->phenotypes_raw.col(i);\n    mnew.col(j) = pheno_data->masked_indivs.col(i);\n    j++;\n\n  }\n  //sout << pheno_data->masked_indivs.colwise().count().array() << \"\\n\\n\" << mnew.colwise().count().array() << endl;\n\n  files->pheno_names = tmp_str_vec;\n  pheno_data->phenotypes = ynew;\n  if(params->trait_mode) pheno_data->phenotypes_raw = ynew;\n  pheno_data->masked_indivs = mnew;\n  params->n_pheno = pheno_data->masked_indivs.cols();\n\n  // remove samples with no phenotype values\n  sample_keep = sample_keep && (pheno_data->masked_indivs.rowwise().count().array() > 0);\n  sout << \"    + n_pheno = \" << npass << endl;\n\n}\n\nvoid covariate_read(struct param* params, struct in_files* files, struct filter* filters, struct phenodt* pheno_data, Ref<ArrayXb> ind_in_cov_and_geno, mstream& sout) {\n\n  int nc_cat = 0, np_inter = 0;\n  uint32_t indiv_index;\n  ArrayXb keep_cols;\n  ArrayXd inter_cov_column;\n  MatrixXd inter_cov_matrix;\n  string line;\n  std::vector< string > tmp_str_vec, covar_names;\n  std::vector< std::map<std::string,int> > categories;\n  findID person;\n  Files fClass;\n\n  sout << left << std::setw(20) << \" * covariates\" << \": [\" << files->cov_file << \"] \" << flush;\n  fClass.openForRead(files->cov_file, sout);\n  fClass.readLine(line);\n  removeCarriageReturn(line); // remove carriage returns at the end of line if any\n\n  // check header\n  tmp_str_vec = string_split(line,\"\\t \");\n  if( (tmp_str_vec[0] != \"FID\") || (tmp_str_vec[1] != \"IID\") ) \n    throw \"header of covariate file must start with: FID IID.\";\n\n  // get covariate names \n  keep_cols = ArrayXb::Constant(tmp_str_vec.size() - 2, true);\n  for(int i = 0; i < keep_cols.size(); i++) {\n\n    if(params->select_covs_rm) // check if should skip covariates\n      keep_cols(i) = !in_map(tmp_str_vec[i+2], filters->cov_colRm_names);\n    if(!keep_cols(i)) continue;\n\n    if(!params->select_covs && !in_map(tmp_str_vec[i+2], filters->cov_colKeep_names)) // in case specified as categorical\n      filters->cov_colKeep_names[tmp_str_vec[i+2]] = true;\n    else keep_cols(i) = in_map(tmp_str_vec[i+2], filters->cov_colKeep_names);\n\n    // ignore covariates who correspond to analyzed phenotypes\n    std::vector<std::string>::iterator it_pheno = std::find(files->pheno_names.begin(), files->pheno_names.end(), tmp_str_vec[i+2]);\n    if(it_pheno != files->pheno_names.end()) {\n      keep_cols(i) = false;\n      filters->cov_colKeep_names.erase(tmp_str_vec[i+2]);\n    }\n\n    if(keep_cols(i)){\n      covar_names.push_back( tmp_str_vec[i+2] );\n      nc_cat += !filters->cov_colKeep_names[ tmp_str_vec[i+2] ];\n      // with interaction test\n      if(params->w_interaction && !params->interaction_snp && !params->interaction_prs && (filters->interaction_cov == tmp_str_vec[i+2]) ) {\n        np_inter = 1;\n        params->interaction_cat = !filters->cov_colKeep_names[ tmp_str_vec[i+2] ];\n      }\n      if( params->print_cov_betas )\n        params->covar_names.push_back(tmp_str_vec[i+2]);\n    }\n  }\n  categories.resize(nc_cat);\n\n  // check all covariates specified are in the file\n  params->n_cov = keep_cols.count(); \n  if(params->w_interaction && !params->interaction_snp && !params->interaction_prs && (np_inter != 1))\n    throw \"cannot find the interaction covariate specified in the covariate file.\";\n  if( (int)filters->cov_colKeep_names.size() != params->n_cov ) \n    throw \"not all covariates specified are found in the covariate file.\";\n\n  // check #covariates is > 0\n  if(params->n_cov < 1){ // only intercept will be included\n    sout << \"n_cov = \" << params->n_cov << \" (+ intercept)\" << endl;\n    ind_in_cov_and_geno = true;\n    return ;\n  }\n  sout << \"n_cov = \" << params->n_cov << endl;\n\n  // allocate memory \n  pheno_data->new_cov = MatrixXd::Zero(params->n_samples, 1 + params->n_cov - np_inter);\n  pheno_data->new_cov.col(0) = MatrixXd::Ones(params->n_samples, 1);\n  if(params->w_interaction && !params->interaction_snp && !params->interaction_prs) inter_cov_column.resize(params->n_samples);\n\n  // read in data\n  while( fClass.readLine(line) ){\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( (int)tmp_str_vec.size() != (keep_cols.size()+2) )\n      throw \"incorrectly formatted covariate file.\";\n\n    person = getIndivIndex(tmp_str_vec[0], tmp_str_vec[1], params, sout);\n    if(!person.is_found) continue;\n\n    indiv_index = person.index;\n\n    // check duplicate\n    if( !ind_in_cov_and_geno(indiv_index) )\n      ind_in_cov_and_geno(indiv_index) = true;\n    else \n      throw \"individual appears more than once in covariate file: FID=\" + tmp_str_vec[0] + \" IID=\" + tmp_str_vec[1];\n\n    // read covariate data and check for missing values\n    for(int i_cov = 0, i_col = 0, i_cat = 0, j = 0; j < keep_cols.size(); j++) {\n\n      if( !keep_cols(j) ) continue;\n\n      // interaction covariate\n      if(params->w_interaction && !params->interaction_snp && !params->interaction_prs && (covar_names[i_cov] == filters->interaction_cov)){\n\n        if( filters->cov_colKeep_names[ covar_names[i_cov] ] ) // if quantitative\n          inter_cov_column(indiv_index) = convertDouble(tmp_str_vec[2+j], params, sout);\n        else{ // set null category if interaction test and base level is specified\n          if( (categories[i_cat].size() == 0) && filters->interaction_cov_null_level.size() > 0 )\n            categories[i_cat][filters->interaction_cov_null_level] = 0;\n          inter_cov_column(indiv_index) = convertNumLevel(tmp_str_vec[2+j], categories[i_cat++], params, sout);\n        }\n\n        if( inter_cov_column(indiv_index) == params->missing_value_double ) { // ignore individual\n          ind_in_cov_and_geno(indiv_index) = false;\n          break;\n        }\n\n      } else { // regular covariate\n\n        if( filters->cov_colKeep_names[ covar_names[i_cov] ] ) // quantitative\n          pheno_data->new_cov(indiv_index, 1 + i_col) = convertDouble(tmp_str_vec[2+j], params, sout);\n        else // categorical so convert to numerical\n          pheno_data->new_cov(indiv_index, 1 + i_col) = convertNumLevel(tmp_str_vec[2+j], categories[i_cat++], params, sout);\n\n        if( pheno_data->new_cov(indiv_index, 1 + i_col) == params->missing_value_double ) { // ignore individual\n          ind_in_cov_and_geno(indiv_index) = false;\n          break;\n        }\n\n        i_col++;\n      }\n\n      i_cov++;\n    }\n\n  }\n  if( ind_in_cov_and_geno.count() == 0 )\n    throw \"none of the individuals have covariate data (check sample IDs across files)\";\n  //cerr << endl<<pheno_data->new_cov.block(0,0,3,pheno_data->new_cov.cols())<< endl;\n  //if(params->w_interaction && !params->interaction_snp && !params->interaction_prs) cerr << inter_cov_column.head(3);\n\n  // mask individuals in genotype data but not in covariate data\n  pheno_data->new_cov.array().colwise() *= ind_in_cov_and_geno.cast<double>();\n  if(inter_cov_column.size() > 0) inter_cov_column *= ind_in_cov_and_geno.cast<double>();\n\n  // add dummy variables if needed\n  if(nc_cat > 0){\n    int n_dummies;\n    int n_add = check_categories(covar_names, categories, params, filters, sout) - nc_cat + params->interaction_cat; // new columns to add (or remove if single category) & ignore interaction cov if categorical\n\n    MatrixXd full_covarMat (pheno_data->new_cov.rows(), pheno_data->new_cov.cols() + n_add);\n    vector<string> full_cov_names;\n    if(params->print_cov_betas) full_cov_names.resize(params->covar_names.size() + n_add);\n\n    // copy intercept column\n    full_covarMat.col(0) = pheno_data->new_cov.col(0);\n    if(params->print_cov_betas) full_cov_names[0] = params->covar_names[0];\n\n    for(int i = 0, raw_col = 1, full_col = 1, icat = -1; i < params->n_cov; i++){\n      n_dummies = 1;\n\n      if( filters->cov_colKeep_names[ covar_names[i] ] ) { // qCovar so copy column\n\n        if(params->w_interaction && !params->interaction_snp && !params->interaction_prs && (covar_names[i] == filters->interaction_cov)) { \n          inter_cov_matrix = inter_cov_column.matrix(); continue; \n        } else\n          full_covarMat.col(full_col) = pheno_data->new_cov.col(raw_col);\n        if(params->print_cov_betas) full_cov_names[full_col] = params->covar_names[raw_col];\n\n      } else { // cCovar\n\n        icat++;\n\n        if(params->w_interaction && !params->interaction_snp && !params->interaction_prs && (covar_names[i] == filters->interaction_cov)) { \n\n          np_inter = inter_cov_column.maxCoeff(); // get number of dummies to use\n\n          if( np_inter == 0 ) // too few categories\n            throw \"interacting covariate '\" + covar_names[i] + \"' only has a single category.\";\n          else if((inter_cov_column>0).all())\n            throw \"no individual has baseline level specified for the interacting covariate (=\" + filters->interaction_cov_null_level + \")\";\n\n          inter_cov_matrix = get_dummies(inter_cov_column);\n          extract_names(params->interaction_lvl_names, categories[icat]); // save levels\n\n          continue;\n\n        } else {\n\n          n_dummies = pheno_data->new_cov.col(raw_col).maxCoeff();\n          if( n_dummies > 0 ){\n            full_covarMat.block(0, full_col, full_covarMat.rows(), n_dummies) = get_dummies(pheno_data->new_cov.col(raw_col).array());\n            if(params->print_cov_betas) {\n              vector<string> lvl_names;\n              extract_names(lvl_names, categories[icat]); // save levels\n              for(size_t ix = 0; ix < lvl_names.size(); ix++) \n                full_cov_names[full_col + ix] = params->covar_names[raw_col] + \"=\" + lvl_names[ix] ;\n            }\n          }\n\n        }\n\n      }\n      //cerr << i << \" \" << raw_col << \" \" << icat << \" \" << covar_names[i]  << endl;\n\n      raw_col++;\n      full_col += n_dummies;\n    }\n\n    pheno_data->new_cov = full_covarMat;\n    params->n_cov = pheno_data->new_cov.cols() - 1; // ignore intercept\n    if(params->print_cov_betas) params->covar_names = full_cov_names;\n  }\n\n  if(params->w_interaction && !params->interaction_snp && !params->interaction_prs) // save inter cov\n    pheno_data->interaction_cov = filters->cov_colKeep_names[filters->interaction_cov] ? inter_cov_column.matrix() : inter_cov_matrix;\n\n  sout <<  \"   -number of individuals with covariate data = \" << ind_in_cov_and_geno.count() << endl;\n  if(params->w_interaction) {\n    sout <<  \"   -testing for interaction with \"\n      << (params->interaction_snp? \"variant \" : \"\")\n      << \"'\" << filters->interaction_cov << \"'\\n\";\n\n    if((params->trait_mode==0) && !params->no_robust) {\n      sout <<  \"    +using \" << \n        (params->force_robust && params->force_hc4? \"HC4 robust SE\" : \"\" ) << \n        (params->force_robust && !params->force_hc4? \"HC3 robust SE\" : \"\" ) << \n        (!params->force_robust ? \"HLM model\" : \"\" ) << \n        \" when testing variants with MAC below \" << params->rareMAC_inter << endl;\n      if(!params->force_robust && !params->rint)\n        sout <<  \"    +WARNING: HLM should be used with RINTed traits (otherwise use option --apply-rint)\\n\"; \n    }\n\n  }\n\n  fClass.closeFile();\n\n}\n\nvoid setMasks(struct param* params, struct filter* filters, struct phenodt* pheno_data, mstream& sout){\n\n  // mask samples\n  if( params->strict_mode ) // keep if non-missing for all traits\n    filters->ind_in_analysis = filters->ind_in_analysis && pheno_data->masked_indivs.array().rowwise().all();\n  else // keep if non-missing for any trait\n    filters->ind_in_analysis = filters->ind_in_analysis && pheno_data->masked_indivs.array().rowwise().any();\n\n  // individuals kept in the analysis\n  pheno_data->masked_indivs.array().colwise() *= filters->ind_in_analysis;\n\n  // mask Y and X matrices\n  pheno_data->phenotypes.array().colwise() *= filters->ind_in_analysis.cast<double>();\n  if(params->trait_mode) \n    pheno_data->phenotypes_raw.array().colwise() *= filters->ind_in_analysis.cast<double>();\n  pheno_data->new_cov.array().colwise() *= filters->ind_in_analysis.cast<double>();\n  if( params->w_interaction ) \n    pheno_data->interaction_cov.array().colwise() *= filters->ind_in_analysis.cast<double>();\n\n  // identify individuals masked for at least 1 trait\n  filters->has_missing = !(pheno_data->masked_indivs.array().rowwise().all());\n  //for(int i = 0; i <5; i++) cerr << std::boolalpha << filters->has_missing(i) << endl;\n\n  // check sample size\n  params->n_analyzed = filters->ind_in_analysis.count();\n  if( params->n_analyzed < 1 ) \n    throw \"sample size cannot be < 1.\";\n  pheno_data->Neff = pheno_data->masked_indivs.colwise().count().cast<double>();\n  //sout << pheno_data->Neff << endl;\n\n}\n\n\nvoid print_cc_info(struct param* params, struct in_files* files, struct phenodt* pheno_data, std::vector<std::vector<Eigen::ArrayXi>>& case_control_indices, mstream& sout){\n\n  params->pheno_counts = MatrixXi::Constant(files->pheno_names.size(), 2, 0);\n  case_control_indices.resize(files->pheno_names.size());\n\n  // go through each trait and print number of cases and controls\n  sout << \" * case-control counts for each trait:\\n\";\n\n  for (size_t i = 0; i < files->pheno_names.size(); i++){\n    if( !params->pheno_pass(i) ) continue;\n    // save indices of cases & controls\n    get_both_indices(case_control_indices[i], pheno_data->phenotypes_raw.col(i).array() == 1, pheno_data->masked_indivs.col(i).array());\n\n    params->pheno_counts(i, 0) = case_control_indices[i][0].size();\n    params->pheno_counts(i, 1) = case_control_indices[i][1].size();\n    sout << \"   - '\" << files->pheno_names[i] << \"': \" <<\n      params->pheno_counts(i, 0) << \" cases and \" << params->pheno_counts(i, 1) << \" controls\\n\";\n  }\n}\n\nvoid print_info(struct param* params, struct in_files* files, struct phenodt* pheno_data, std::vector<std::vector<Eigen::ArrayXi>>& case_control_indices, mstream& sout){\n\n  params->pheno_counts = MatrixXi::Constant(files->pheno_names.size(), 2, 0);\n  case_control_indices.resize(files->pheno_names.size());\n\n  // go through each trait and print number of samples used\n  sout << \" * number of observations for each trait:\\n\";\n  for (size_t i = 0; i < files->pheno_names.size(); i++)\n    if( params->pheno_pass(i) ) {\n      // save indices of non-missing samples\n      case_control_indices[i].resize(1);\n      case_control_indices[i][0] = get_true_indices(pheno_data->masked_indivs.col(i).array());\n      params->pheno_counts(i, 0) = case_control_indices[i][0].size();\n      sout << \"   - '\" << files->pheno_names[i] << \"': \" << params->pheno_counts(i, 0) << \" observations\\n\";\n    }\n}\n\nvoid print_cox_info(struct param* params, struct in_files* files, struct phenodt* pheno_data, std::vector<std::vector<Eigen::ArrayXi>>& case_control_indices, mstream& sout){\n\n  params->pheno_counts = MatrixXi::Constant(files->pheno_names.size(), 2, 0);\n  case_control_indices.resize(files->pheno_names.size());\n\n  // go through each trait and print number of events and censors\n  sout << \" * number of observations for each trait:\\n\";\n  for (const auto& entry: files->t2e_map) {\n    const std::string& time_name = entry.first;\n    const std::string& event_name = entry.second;\n    // find time column index\n    std::vector<std::string>::iterator it_time = std::find(files->pheno_names.begin(), files->pheno_names.end(), time_name);\n    int time_index = std::distance(files->pheno_names.begin(), it_time);\n    // find event column index\n    std::vector<std::string>::iterator it_event = std::find(files->pheno_names.begin(), files->pheno_names.end(), event_name);\n    int event_index = std::distance(files->pheno_names.begin(), it_event);\n    \n    // save indices of cases & controls\n    get_both_indices(case_control_indices[time_index], pheno_data->phenotypes_raw.col(event_index).array() == 1, pheno_data->masked_indivs.col(time_index).array());\n\n    params->pheno_counts(time_index, 0) = case_control_indices[time_index][0].size();\n    params->pheno_counts(time_index, 1) = case_control_indices[time_index][1].size();\n    sout << \"   - '\" << files->pheno_names[time_index] << \"': \" <<\n      params->pheno_counts(time_index, 0) << \" events and \" << params->pheno_counts(time_index, 1) << \" censors\\n\";\n  }\n}\n\nvoid check_nvals(int const& i_pheno, string const& pheno, struct param const* params, struct phenodt const* pheno_data){\n\n  map<double, bool> uniq_vals;\n  size_t n_min_vals = 2; // o.w. analyze as bt or ct\n\n  for(size_t i = 0; i < params->n_samples; i++){\n    if(!pheno_data->masked_indivs(i, i_pheno)) continue;\n    if(pheno_data->phenotypes(i, i_pheno) == params->missing_value_double) continue;\n    if(!in_map(pheno_data->phenotypes(i, i_pheno), uniq_vals)) {\n      uniq_vals[pheno_data->phenotypes(i, i_pheno)] = true;\n      if(uniq_vals.size() > n_min_vals) return; // more than 2 values\n    }\n  }\n\n  // if here then there are not more than 2 unique values\n  throw \"phenotype '\" + pheno + \"' has very few unique values (=\" + to_string(uniq_vals.size()) + \"). If you really want to analyze it as a QT, use `--force-qt`.\";\n\n}\n\n\nvoid extract_interaction_snp(struct param* params, struct in_files* files, struct filter* filters, struct phenodt* pheno_data, struct geno_block* gblock, Ref<ArrayXb> ind_in_cov_and_geno, mstream& sout) {\n\n  bool mean_impute = false;\n  pheno_data->interaction_cov.resize(params->n_samples, 1);\n  MapArXd Gcov (pheno_data->interaction_cov.col(0).data(), params->n_samples, 1);\n\n  // read snp\n  if(params->interaction_file) {// from external file\n    extract_from_genofile(\"interaction\", Gcov.matrix(), mean_impute, ind_in_cov_and_geno, filters, files, params, sout);\n  } else { // from input file\n    read_snp(mean_impute, params->interaction_snp_offset, Gcov, ind_in_cov_and_geno, filters->ind_ignore, files, gblock->pgr, params, true);\n    /*\n       cerr << params->interaction_snp_offset << \" \" << ind_in_cov_and_geno.count() <<  \"\\n\\n\"\n       << endl << pheno_data->interaction_cov.topRows(5)\n       << endl; exit(-1);\n       */\n  }\n\n  // apply coding\n  code_snp(pheno_data->interaction_cov, ind_in_cov_and_geno, params->interaction_snp_offset, filters, files, params, sout);\n  if(params->debug) cerr << pheno_data->interaction_cov.topRows(5) << endl;\n\n}\n\n\nvoid extract_condition_snps(struct param* params, struct in_files* files, struct filter* filters, struct phenodt* pheno_data, struct geno_block* gblock, Ref<ArrayXb> ind_in_cov_and_geno, mstream& sout) {\n\n  bool mean_impute = true;\n  int count = 0;\n  std::map <std::string, uint64>::iterator itr;\n\n  // Add to covariates\n  int ncols = pheno_data->new_cov.cols(), ncov_snps = filters->condition_snp_names.size();\n  pheno_data->new_cov.conservativeResize( pheno_data->new_cov.rows(), ncols + ncov_snps);\n  MapMatXd Gcov (&(pheno_data->new_cov(0,ncols)), pheno_data->new_cov.rows(), ncov_snps);\n\n  if(params->condition_file) {\n\n    sout << \"    +conditioning on variants in [\" << files->condition_snps_list << \"]\\n\";\n    extract_from_genofile(\"conditional\", Gcov, mean_impute, ind_in_cov_and_geno, filters, files, params, sout);\n\n  } else { // just read the snps\n\n    sout << \"    +conditioning on variants in [\" << files->condition_snps_list << \"] n_used = \" << ncov_snps << endl;\n\n    for (itr = filters->condition_snp_names.begin(); itr != filters->condition_snp_names.end(); ++itr, count++) \n      read_snp(mean_impute, itr->second, Gcov.col(count).array(), ind_in_cov_and_geno, filters->ind_ignore, files, gblock->pgr, params, true);\n\n  }\n\n  if(params->print_cov_betas) { // save SNP names\n    for (itr = filters->condition_snp_names.begin(); itr != filters->condition_snp_names.end(); ++itr) \n      params->covar_names.push_back(itr->first);\n  }\n  \n\n}\n\n\nint check_categories(vector<std::string>& covar, vector<std::map<std::string,int>>& categories, struct param* params, struct filter* filters, mstream& sout){\n\n  int ntotal = 0, n_levels = 0;\n\n  for(size_t i = 0, j = 0; i < covar.size(); i++){\n\n    // skip qCovar\n    if( filters->cov_colKeep_names[ covar[i] ] ) continue;\n\n    n_levels = categories[j++].size();\n\n    if( n_levels > params->max_cat_levels) // too many categories\n      throw \"too many categories for covariate: \" + covar[i] + \" (=\" + to_string( n_levels ) + \"). Either use '--maxCatLevels' or combine categories.\";\n    else if( n_levels == 1) // too few categories\n      sout << \"WARNING: covariate ' \" << covar[i] << \"' only has a single category so it will be ignored\\n\";\n\n    // for interaction test\n    if(params->w_interaction && !params->interaction_snp && (covar[i] == filters->interaction_cov)) // skip it\n      continue;\n     \n    ntotal += n_levels - 1; // add K-1 dummy vars\n  }\n\n  return ntotal;\n}\n\nMatrixXd get_dummies(const Eigen::Ref<const Eigen::ArrayXd>& numCov) {\n\n  int index, nvars = numCov.maxCoeff();\n  MatrixXd dout = MatrixXd::Zero(numCov.size(), nvars);\n  //cerr << dout.rows() << \"***\" << dout.cols() << \"--min=\" << numCov.minCoeff() << endl;\n\n  for(int i = 0; i < numCov.size(); i++){\n    if(numCov(i) == 0) continue; // will go to intercept\n\n    index = numCov(i) - 1;\n    dout(i, index) = 1;\n  }\n\n  return dout;\n\n}\n\n// check if need to add E^2 (i.e. if single column and not 0/1)\nbool add_square_term(const Eigen::Ref<const Eigen::MatrixXd>& X) {\n\n  if(X.cols() > 1) // categorical\n    return false;\n\n  std::unordered_set<double> vals(X.col(0).data(), X.col(0).data() + X.rows());\n  if(vals.size() > 2) // not dichotomous\n    return true;\n\n  if(vals.find(0) != vals.end()) // one of the values is 0\n    return false;\n\n  return true;\n}\n\nvoid extract_names(vector<string>& names, map<string,int>& map_names){\n\n  map<string, int >::iterator itr;\n  names.resize( map_names.size() - 1); // ignore 0 category\n\n  for (itr = map_names.begin(); itr != map_names.end(); ++itr) {\n    if(itr->second == 0) continue; // ignore 0 category\n    names[ itr->second - 1 ] = itr->first; // map_names has values in 0-{K-1}\n  }\n  //for(auto i:names) cerr << i << \"\\n\";\n\n}\n\n// Adjust for covariates (incl. intercept)\n// in step 2, also read blups and check\nvoid prep_run (struct in_files* files, struct filter* filters, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, mstream& sout){\n\n  // for step 2, check blup files\n  if (params->test_mode && !params->getCorMat){\n    // individuals not in blup file will have their phenotypes masked\n    blup_read(files, params, pheno_data, m_ests, filters, sout);\n    if(params->write_samples) write_ids(files, params, pheno_data, sout);\n  }\n\n  // compute N for each trait\n  setMasks(params, filters, pheno_data, sout);\n\n  // for interaction test with BTs, add E^2 to covs\n  if( (params->trait_mode==1) && params->w_interaction && params->gwas_condtl ) {\n    pheno_data->new_cov.conservativeResize( pheno_data->new_cov.rows(), pheno_data->new_cov.cols() + pheno_data->interaction_cov.cols());\n    pheno_data->new_cov.rightCols(pheno_data->interaction_cov.cols()) = pheno_data->interaction_cov.array().square().matrix();\n  }\n\n  if (params->trait_mode == 3) {\n    // check constant covariates\n    // std::cout << \"new_cov: \" << pheno_data->new_cov.block(0,0,5,pheno_data->new_cov.cols());\n    RowVectorXd mu = pheno_data->new_cov.colwise().mean();\n    params->cov_sds = (pheno_data->new_cov.rowwise() - mu).colwise().norm().array() / sqrt(params->n_analyzed);\n    std::vector<int> nonConstantColumns;\n    for (int i = 0; i < pheno_data->new_cov.cols(); ++i) {\n      if (params->cov_sds(i) > params->const_cov_cox_tol) {\n        nonConstantColumns.push_back(i);\n      }\n    }\n\n    if(nonConstantColumns.size() != (size_t)pheno_data->new_cov.cols()) {\n      params->ncov = nonConstantColumns.size();\n      Eigen::ArrayXd filtered_cov_sds(params->ncov);\n      Eigen::MatrixXd new_cov_mtx(pheno_data->new_cov.rows(), params->ncov);\n      vector<string> filtered_cov_names(params->ncov);\n      for (int i = 0; i < params->ncov; ++i) {\n          new_cov_mtx.col(i) = pheno_data->new_cov.col(nonConstantColumns[i]);\n          filtered_cov_sds(i) = params->cov_sds(nonConstantColumns[i]);\n          if(params->print_cov_betas) filtered_cov_names[i] = params->covar_names[nonConstantColumns[i]];\n      }\n      pheno_data->new_cov = std::move(new_cov_mtx);\n      params->cov_sds = filtered_cov_sds;\n      if(params->print_cov_betas) params->covar_names = filtered_cov_names;\n      // std::cout << \"new_cov after filter: \" << pheno_data->new_cov.block(0,0,5,pheno_data->new_cov.cols());\n    }\n  }\n\n  // orthonormal basis (save number of lin. indep. covars.)\n  if(params->print_cov_betas && params->trait_mode) pheno_data->new_cov_raw = pheno_data->new_cov;\n\n  if (pheno_data->new_cov.cols() > 0) {\n    params->ncov = (params->print_cov_betas ? scale_mat(pheno_data->new_cov, filters->ind_in_analysis, params) : getBasis(pheno_data->new_cov, params));\n    // params->ncov = pheno_data->new_cov.cols();\n  } else {\n    params->ncov = 0;\n  }\n\n  if(params->ncov > (int)params->n_samples)\n    throw \"number of covariates is larger than sample size!\";\n  if(params->skip_cov_res && (params->n_pheno != 1)){\n     params->skip_cov_res = false;\n     sout << \"WARNING: ignoring '--nocov-approx' for multi-trait analysis\\n\";\n  }\n\n  ArrayXb pheno_pass_orig = params->pheno_pass;\n  fit_null_models_nonQT(params, pheno_data, m_ests, files, sout);\n\n  // with interaction test, remove colinear columns\n  if( params->w_interaction ) {\n    // apply QR decomp\n    QRcheck(pheno_data->interaction_cov, params->interaction_cat, params->interaction_lvl_names, params->n_analyzed - params->ncov, Eigen::Default, params->numtol);\n    //cerr << pheno_data->interaction_cov.topRows(3) << \"\\n\\n\";\n    params->ncov_interaction = pheno_data->interaction_cov.cols();\n    params->n_tests_per_variant += 3; // marginal + inter + joint\n    params->int_add_extra_term = add_square_term(pheno_data->interaction_cov);\n    params->add_homdev = params->add_homdev && params->int_add_extra_term && (pheno_data->interaction_cov.array()>=1.5).any();\n\n    if(!params->gwas_condtl) { // include main effects of Xinter\n\n      params->int_add_esq = (params->trait_mode==1) && !params->add_homdev && params->int_add_extra_term;\n      if(params->int_add_esq){ // use G_E and G_E^2\n        params->interaction_istart = 2 * params->ncov_interaction;\n        pheno_data->interaction_cov_res.resize(pheno_data->interaction_cov.rows(), pheno_data->interaction_cov.cols() * 2);\n        pheno_data->interaction_cov_res << pheno_data->interaction_cov, pheno_data->interaction_cov.array().square().matrix();\n      } else if(params->add_homdev){ // only with additive coding (add hom. correction term)\n        pheno_data->interaction_homdev = (pheno_data->interaction_cov.array()>=1.5).cast<double>().matrix();\n        params->interaction_istart = 2 * params->ncov_interaction;\n        pheno_data->interaction_cov_res.resize(pheno_data->interaction_cov.rows(), pheno_data->interaction_cov.cols() * 2);\n        pheno_data->interaction_cov_res << pheno_data->interaction_cov, pheno_data->interaction_homdev;\n      } else { // use only G_E\n        params->interaction_istart = params->ncov_interaction;\n        // keep original and residualized version\n        pheno_data->interaction_cov_res = pheno_data->interaction_cov;\n      }\n      //cerr << pheno_data->interaction_cov_res.topRows(5) << \"\\n\\n\";\n\n      // remove covariate effects\n      if(!residualize_matrix(pheno_data->interaction_cov_res, pheno_data->scl_inter_X, pheno_data->new_cov.leftCols( params->ncov + (params->blup_cov && (params->trait_mode == 1) ? -1 : 0)), params->n_analyzed, params->numtol))\n        throw \"Var=0 for the interaction risk factor.\";\n\n    }\n    //cerr << pheno_data->interaction_cov.topRows(3) << \"\\n\\n\";\n\n    // for interaction tests with QTs using HLM - keep raw Y\n    if(params->trait_mode==0)\n      pheno_data->phenotypes_raw = pheno_data->phenotypes;\n\n    // allocate per thread if using OpenMP\n    pheno_data->Hmat.resize(params->neff_threads);\n    pheno_data->scf_i.resize(params->neff_threads);\n    \n  }\n\n  // residualize phenotypes (skipped for nonQTs when testing)\n  if( !params->getCorMat && (!params->test_mode || (params->trait_mode==0)) ) \n    residualize_phenotypes(params, pheno_data, files->pheno_names, sout);\n\n  if(params->print_cov_betas) {\n    // get covariate orthonormal basis and refit null models (more stable)\n    if(params->trait_mode){\n      params->ncov = getBasis(pheno_data->new_cov_raw, params);\n      pheno_data->new_cov = pheno_data->new_cov_raw;\n      pheno_data->new_cov_raw.resize(0,0);\n    }\n    print_cov_betas(params, files, sout);\n    params->pheno_pass = pheno_pass_orig;\n  }\n\n  // if using step 1 preds as covariate\n  check_cov_blup(pheno_data, params);\n\n  // store indices for ADAM\n  if(params->use_adam && params->adam_mini){\n    params->adam_indices.resize(params->n_pheno);\n    for(int ph = 0; ph < params->n_pheno; ph++){\n      if( !params->pheno_pass(ph) ) continue;\n      params->adam_indices[ph].resize(pheno_data->masked_indivs.col(ph).count(),1);\n      for(size_t i = 0, j = 0; i < params->n_samples; i++)\n        if(pheno_data->masked_indivs(i,ph)) params->adam_indices[ph](j++) = i;\n    }\n  }\n}\n\n// get list of phenotypes in pred file\nvoid check_blup(struct in_files* files, struct param* params, mstream& sout) {\n\n  string line, tmp_pheno;\n  std::vector< string > tmp_str_vec;\n  Files fClass;\n\n  // skip reading if specified by user\n  if( params->skip_blups && !params->interaction_prs ) return;\n\n  fClass.openForRead(files->blup_list_file, sout);\n\n  while (fClass.readLine(line)){\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    // each line contains a phenotype name and the corresponding blup file name\n    if( tmp_str_vec.size() != 2 )\n      throw \"step 1 list file is not in the right format : \" + files->blup_list_file;\n\n    if(in_map(tmp_str_vec[0], files->blup_files))\n      throw \"phenotype \\'\" + tmp_str_vec[0] + \"\\' appears more than once in step 1 list file.\";\n\n    files->blup_files[ tmp_str_vec[0] ] = tmp_str_vec[1];\n  }\n\n  fClass.closeFile();\n}\n\nbool has_blup(string const& yname, map<string,string> const& y_read, struct param const* params, mstream& sout) {\n\n  if( ( params->skip_blups && !params->interaction_prs ) || in_map(yname, y_read)) return true;\n\n  sout << \"WARNING: No step 1 file provided for phenotype '\" << yname << \"' so it will be ignored.\\n\";\n  return false;\n\n}\n\n// get list of blup files\nvoid blup_read(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct filter* filters, mstream& sout) {\n\n  int n_masked_prior, n_masked_post;\n  uint32_t indiv_index;\n  double blup_val;\n  string yfile, line;\n  std::vector< string > tmp_str_vec, tmp_prs_vec;\n  ArrayXd full_prs;\n  ArrayXb blupf_mask, all_miss_pheno;\n  Files fClass;\n\n  // allocate memory\n  m_ests->blups = MatrixXd::Zero(params->n_samples, params->n_pheno);\n\n  // skip reading if specified by user\n  if( params->skip_blups && !params->interaction_prs) {\n    string mode;\n    if(params->trait_mode==0) mode = \"linear\";\n    else if(params->trait_mode==1) mode = \"logistic\";\n    else if(params->trait_mode==2) mode = \"poisson\";\n    else if(params->trait_mode==3) mode = \"cox\";\n      sout << \" * no step 1 predictions given. Simple \" << mode << \" regression will be performed\" <<endl;\n    return;\n  } else if(params->interaction_prs) return;\n\n  sout << \" * \" << (params->use_prs ? \"PRS\" : \"LOCO\") << \" predictions : [\" << files->blup_list_file << \"]\\n\";\n  all_miss_pheno = ArrayXb::Constant(params->n_pheno, false);\n\n  // allocate memory for LTCO \n  if(params->w_ltco) m_ests->ltco_prs = MatrixXd::Zero(params->n_samples, params->n_pheno);\n\n  // read blup file for each phenotype\n  for(int ph = 0; ph < params->n_pheno; ph++) {\n\n    if( !params->pheno_pass(ph) ) continue;\n\n    yfile = files->blup_files[ files->pheno_names[ph] ];\n    sout << \"   -file [\" << yfile  << \"] for phenotype '\" << files->pheno_names[ph] << \"'\\n\";\n\n    fClass.openForRead(yfile, sout);\n\n    // to mask all individuals not present in .loco file\n    blupf_mask = ArrayXb::Constant(params->n_samples, false);\n    n_masked_prior = pheno_data->masked_indivs.col(ph).count();\n\n    // read first line which has FID_IID\n    fClass.readLine(line);\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec[0] != \"FID_IID\") \n      throw \"header of blup file must start with FID_IID (=\" + tmp_str_vec[0] + \")\";\n\n    // read second line to check for missing predictions\n    fClass.readLine(line);\n    tmp_prs_vec = string_split(line,\"\\t \");\n\n    if( params->use_prs && (tmp_prs_vec[0] != \"0\") )\n      throw \"second line must start with 0 (=\" + tmp_prs_vec[0] + \").\";\n\n    for (size_t i = 1; i < tmp_str_vec.size(); i++){\n      // ignore sample if it is not in genotype data\n      if (!in_map(tmp_str_vec[i], params->FID_IID_to_ind)) continue;\n      indiv_index = params->FID_IID_to_ind[tmp_str_vec[i]];\n      blup_val = convertDouble(tmp_prs_vec[i], params, sout);\n\n      // ignore samples where prediction is NA\n      blupf_mask( indiv_index ) = (blup_val != params->missing_value_double);\n      //cerr << tmp_str_vec[i] << \"\\t\" << std::boolalpha << blupf_mask( indiv_index ) << endl; \n      if (!blupf_mask( indiv_index )) continue;\n      \n      if( params->use_prs ) \n        m_ests->blups(indiv_index, ph) = blup_val;\n    }\n\n    // mask samples not in file\n    pheno_data->masked_indivs.col(ph).array() = pheno_data->masked_indivs.col(ph).array() && blupf_mask;\n    n_masked_post = pheno_data->masked_indivs.col(ph).count();\n\n    // check not everyone is masked\n    all_miss_pheno(ph) = n_masked_post < 1;\n    if( all_miss_pheno(ph) ) {\n      fClass.closeFile();\n      continue;\n    }\n\n    if( n_masked_post < n_masked_prior ){\n      if((params->trait_mode==1) || (params->trait_mode==3)){ // re-compute case-control indices\n        int event_index = ph;\n        if (params->trait_mode == 3) { // find event column index\n          std::vector<std::string>::iterator it_event = std::find(files->pheno_names.begin(), files->pheno_names.end(), files->t2e_map[files->pheno_names[ph]]);\n          event_index = std::distance(files->pheno_names.begin(), it_event);\n        }\n        get_both_indices(filters->case_control_indices[ph], pheno_data->phenotypes_raw.col(event_index).array() == 1, pheno_data->masked_indivs.col(ph).array());\n        params->pheno_counts(ph, 0) = filters->case_control_indices[ph][0].size();\n        params->pheno_counts(ph, 1) = filters->case_control_indices[ph][1].size();\n      }\n      sout << \"    + \" << n_masked_prior - n_masked_post <<\n        \" individuals with missing LOCO predictions will be ignored for the trait\\n\";\n    }\n\n    if(params->w_ltco){ // go through each line and sum up the loco prs\n\n      bool chr_ltco;\n      int nchr_file = 0;\n      double ds;\n      full_prs = ArrayXd::Zero( params->n_samples );\n\n      // Re-open file (since skipped 2nd row)\n      fClass.closeFile();\n      fClass.openForRead(yfile, sout);\n      fClass.ignoreLines(1); // skip first row\n\n      while( fClass.readLine(line) ){\n\n        tmp_prs_vec = string_split(line,\"\\t \");\n        if( tmp_prs_vec.size() != tmp_str_vec.size() )\n          throw \"number of entries for chromosome \" + tmp_prs_vec[0] + \n            \" does not match with that in header (\" + \n            to_string(tmp_prs_vec.size()) + \" vs \" + to_string(tmp_str_vec.size()) + \")\";\n\n        // check if it is the LTCO chromosome\n        chr_ltco = (chrStrToInt(tmp_prs_vec[0], params->nChrom) == params->ltco_chr);\n\n        for (size_t i = 1; i < tmp_str_vec.size(); i++){\n          // ignore sample if it is not in genotype data\n          if (!in_map(tmp_str_vec[i], params->FID_IID_to_ind)) continue;\n          indiv_index = params->FID_IID_to_ind[tmp_str_vec[i]];\n          if(!pheno_data->masked_indivs(indiv_index,ph)) continue;\n\n          ds = convertDouble(tmp_prs_vec[i], params, sout);\n          if(chr_ltco) m_ests->ltco_prs(indiv_index, ph) = - ds;\n          full_prs(indiv_index) += ds;\n        }\n\n        nchr_file++;\n      }\n\n      if( nchr_file != params->nChrom )\n        throw \"incorrectly formatted file.\";\n\n      m_ests->ltco_prs.col(ph).array() += full_prs / (nchr_file - 1);\n      //cerr << m_ests->ltco_prs.col(ph).head(5)<<endl;\n\n    }\n\n    fClass.closeFile();\n  }\n\n    check_phenos(all_miss_pheno, files->pheno_names, files->out_file + \"_\" + \"pheno_all_miss.txt\", sout);\n\n}\n\nvoid extract_interaction_prs(struct param* params, struct in_files* files, struct filter* filters, struct phenodt* pheno_data, Ref<ArrayXb> ind_in_cov_and_geno, mstream& sout) {\n\n  pheno_data->interaction_cov.resize(params->n_samples, 1);\n  MapArXd PRS (pheno_data->interaction_cov.col(0).data(), params->n_samples, 1);\n\n  // read prs\n  read_prs(PRS, files, params, ind_in_cov_and_geno, sout);\n  if(params->debug) cerr << \"full PRS head:\" << PRS.matrix().transpose().array().head(5) << endl;\n\n  // disable adjusting for PRS\n  params->use_prs = false;\n  params->skip_blups = true;\n\n}\n\nvoid read_prs(Ref<ArrayXd> full_prs, struct in_files* files, struct param* params, Ref<ArrayXb> ind_in_cov_and_geno, mstream& sout) {\n\n  int n_masked_prior, n_masked_post;\n  uint32_t indiv_index;\n  double blup_val;\n  string yfile, line;\n  std::vector< string > tmp_str_vec, tmp_prs_vec;\n  ArrayXb blupf_mask;\n  Files fClass;\n\n  sout << \"    + extracting full PRS using \" << (params->use_prs ? \"PRS\" : \"LOCO\") << \" predictions : [\" << files->blup_list_file << \"]\\n\";\n  if(params->n_pheno > 1) throw \"option '--interaction-prs' only works with a single phenotype\"; \n\n  // read blup file for each phenotype\n  for(int ph = 0; ph < params->n_pheno; ph++) {\n    if( !params->pheno_pass(ph) ) continue;\n\n    yfile = files->blup_files[ files->pheno_names[ph] ];\n    sout << \"    -file [\" << yfile  << \"] for phenotype '\" << files->pheno_names[ph] << \"'\\n\";\n\n    fClass.openForRead(yfile, sout);\n\n    // to mask all individuals not present in .loco file\n    blupf_mask = ArrayXb::Constant(params->n_samples, false);\n    n_masked_prior = ind_in_cov_and_geno.count();\n    full_prs = 0;\n\n    // read first line which has FID_IID\n    fClass.readLine(line);\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    if( tmp_str_vec[0] != \"FID_IID\") \n      throw \"header of blup file must start with FID_IID (=\" + tmp_str_vec[0] + \")\";\n\n    // read second line to check for missing predictions\n    fClass.readLine(line);\n    tmp_prs_vec = string_split(line,\"\\t \");\n\n    if( params->use_prs && (tmp_prs_vec[0] != \"0\") )\n      throw \"second line must start with 0 (=\" + tmp_prs_vec[0] + \").\";\n\n    for (size_t i = 1; i < tmp_str_vec.size(); i++){\n      // ignore sample if it is not in genotype data\n      if (!in_map(tmp_str_vec[i], params->FID_IID_to_ind)) continue;\n      indiv_index = params->FID_IID_to_ind[tmp_str_vec[i]];\n      blup_val = convertDouble(tmp_prs_vec[i], params, sout);\n\n      // ignore samples where prediction is NA\n      blupf_mask( indiv_index ) = (blup_val != params->missing_value_double);\n      //cerr << tmp_str_vec[i] << \"\\t\" << std::boolalpha << blupf_mask( indiv_index ) << endl; \n      if (!blupf_mask( indiv_index )) continue;\n\n      if( params->use_prs ) full_prs(indiv_index) = blup_val;\n    }\n\n    // mask samples not in file\n    ind_in_cov_and_geno = ind_in_cov_and_geno && blupf_mask;\n    n_masked_post = ind_in_cov_and_geno.count();\n\n    // check not everyone is masked\n    if( n_masked_post < 1 )\n      throw \"none of the samples have step 1 predictions in file.\"; \n\n    if( n_masked_post < n_masked_prior ){\n      sout << \"     * \" << n_masked_prior - n_masked_post <<\n        \" individuals with missing LOCO predictions will be ignored for the trait\\n\";\n    }\n\n    if(!params->use_prs){ // go through each line and sum up the loco prs to get the full PRS\n\n      int nchr_file = 0;\n      double ds;\n\n      // Re-open file (since skipped 2nd row)\n      fClass.closeFile();\n      fClass.openForRead(yfile, sout);\n      fClass.ignoreLines(1); // skip first row\n\n      while( fClass.readLine(line) ){\n\n        tmp_prs_vec = string_split(line,\"\\t \");\n        if( tmp_prs_vec.size() != tmp_str_vec.size() )\n          throw \"number of entries for chromosome \" + tmp_prs_vec[0] + \n            \" does not match with that in header (\" + \n            to_string(tmp_prs_vec.size()) + \" vs \" + to_string(tmp_str_vec.size()) + \")\";\n\n        for (size_t i = 1; i < tmp_str_vec.size(); i++){\n          // ignore sample if it is not in genotype data\n          if (!in_map(tmp_str_vec[i], params->FID_IID_to_ind)) continue;\n          indiv_index = params->FID_IID_to_ind[tmp_str_vec[i]];\n          if(!ind_in_cov_and_geno(indiv_index)) continue;\n\n          ds = convertDouble(tmp_prs_vec[i], params, sout);\n          full_prs(indiv_index) += ds;\n        }\n\n        nchr_file++;\n      }\n\n      if( nchr_file != params->nChrom )\n        throw \"incorrectly formatted file (not enough chromosomes).\";\n\n      full_prs /= (nchr_file - 1);\n\n    }\n\n    fClass.closeFile();\n  }\n\n}\n\nvoid check_phenos(Ref<ArrayXb> pheno_ind, vector<string> const& pheno_names, string const& fname, mstream& sout){\n\n  // print phenotype names where ind is true and exit\n  if(!pheno_ind.any()) return;\n\n  Files fout;\n  fout.openForWrite(fname, sout);\n\n  for(int i = 0; i < pheno_ind.size(); i++)\n    if(pheno_ind(i))\n      fout << pheno_names[i] << \"\\n\";\n  fout.closeFile();\n\n  throw \"Problematic phenotypes found (all individuals have missing step 1 predictions); names written to [\" + fname + \"].\\n\" +\n    \"You can use `--phenoExcludeList` to ignore these traits.\";\n\n}\n\n\n// write ids of samples included in step 2 (done for each trait)\nvoid write_ids(struct in_files const* files, struct param* params, struct phenodt const* pheno_data, mstream& sout){\n\n  uint32_t index;\n  map<string, uint32_t >::iterator itr_ind;\n  string idfile;\n  Files fout;\n  vector<string> ids_out_ordered ( params->n_samples );\n\n    sout << \" * user specified to write sample IDs for each trait\"<<endl;\n\n  for( int ph = 0; ph < params->n_pheno; ph++){\n\n    if( !params->pheno_pass(ph) ) continue;\n\n    idfile = files->out_file + \"_\" + files->pheno_names[ph] + \".regenie.ids\";\n    fout.openForWrite(idfile, sout);\n\n    // print phenotype name on 1st line (ensure 2 column format)\n    if( params->print_pheno_name ) fout << files->pheno_names[ph] << \"\\tNA\\n\"; \n\n    // go through map and check if individual is not masked\n    for (itr_ind = params->FID_IID_to_ind.begin(); itr_ind != params->FID_IID_to_ind.end(); ++itr_ind) {\n\n      index = itr_ind->second;\n\n      if( !pheno_data->masked_indivs(index, ph) ) continue;\n      ids_out_ordered[ index ] = params->FIDvec[index][0] + \"\\t\" + params->FIDvec[index][1];\n\n    }\n\n    for(size_t i = 0, j = 0; i < params->n_samples; i++)\n      if(pheno_data->masked_indivs(i, ph))\n        fout << ids_out_ordered[i] << ( ++j == pheno_data->Neff(ph) ? \"\" : \"\\n\" );\n\n    fout.closeFile();\n  } \n\n  if(!params->write_masks) params->FIDvec.clear();\n}\n\nvoid fit_null_models_nonQT(struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct in_files* files, mstream& sout) {\n\n  if(params->print_cov_betas) { \n\n    params->cov_betas.resize(params->ncov, params->n_pheno);\n    params->xtx_inv_diag.resize(params->ncov, params->n_pheno);\n\n    // need to get betas for non-QTs in step 2\n    if(params->trait_mode==1) { // BT\n      fit_null_logistic(true, 0, params, pheno_data, m_ests, files, sout, true);// null logistic\n      // if it fails, try a less stringent convergence criterion\n      if(params->n_pheno == 1 && !params->pheno_pass(0)){\n        params->numtol = 2 * params->numtol_firth; params->pheno_pass(0) = true;\n        fit_null_logistic(true, 0, params, pheno_data, m_ests, files, sout, true);// null logistic\n        params->numtol = 1e-6;\n      }\n      if(params->firth) // null firth\n        for( int ph = 0; ph < params->n_pheno; ++ph ) \n          if(params->pheno_pass(ph))\n            params->pheno_pass(ph) = fit_approx_firth_null(0, ph, pheno_data, m_ests, params->cov_betas.col(ph), params, true);\n\n    } else if(params->trait_mode==2) { // CT\n      fit_null_poisson(0, params, pheno_data, m_ests, files, sout, true);\n    } else if(params->trait_mode==3) {\n      fit_null_cox(false, 0, params, pheno_data, m_ests, files, sout, true);\n    }\n  }\n\n  // compute offset for nonQT (only in step 1)\n  if((params->trait_mode==1) && !params->test_mode) fit_null_logistic(false, 0, params, pheno_data, m_ests, files, sout);\n  else if((params->trait_mode==2) && !params->test_mode) fit_null_poisson(0, params, pheno_data, m_ests, files, sout);\n  else if((params->trait_mode==3) && !params->test_mode) fit_null_cox(false, 0, params, pheno_data, m_ests, files, sout); // use covariates, and make linear prediction\n}\n\nvoid print_cov_betas(struct param* params, struct in_files const* files, mstream& sout){\n\n  sout << \" * covariate effects written to file : [ \" << files->out_file << \"_cov_betas.txt ]\";\n\n  double se, stat, logp;\n  std::ostringstream buffer;\n  Files fout;\n  MeasureTime mt;\n  mt.start_ms();\n\n  //header\n  buffer << \"COVAR\\tPHENO\\tBETA\\tSE\\tPVALUE\\n\";\n\n  // re-scale the betas\n  params->cov_betas.array().colwise() /= params->cov_sds;\n  params->xtx_inv_diag.array().colwise() /= params->cov_sds;\n\n  // for each covariate/phenotype, print beta|SE|p-value\n  for(size_t ic = 0; ic < params->covar_names.size(); ic++)\n    for(int ph = 0; ph < params->n_pheno; ph++){\n      if( !params->pheno_pass(ph) ) {\n        buffer << params->covar_names[ic] << \"\\t\" << files->pheno_names[ph] << \"\\tNA\\tNA\\tNA\\n\";\n        continue;\n      }\n      se = params->xtx_inv_diag(ic, ph);\n      if(se == 0) {\n        buffer << params->covar_names[ic] << \"\\t\" << files->pheno_names[ph] << \"\\tNA\\tNA\\tNA\\n\";\n        continue;\n      }\n      stat = pow(params->cov_betas(ic,ph)/se, 2);\n      get_logp(logp, stat);\n\n      buffer << params->covar_names[ic] << \"\\t\" << files->pheno_names[ph] << \"\\t\" <<\n        params->cov_betas(ic,ph) << \"\\t\" << se << \"\\t\" << convert_logp_raw(logp) << \"\\n\";\n    }\n\n  fout.openForWrite(files->out_file + \"_cov_betas.txt\", sout);\n  fout << buffer.str();\n  fout.closeFile();\n\n  sout << \" ...\" << mt.stop_ms() << \"\\n\";\n\n  params->cov_betas.resize(0,0);\n  params->xtx_inv_diag.resize(0,0);\n  params->covar_names.resize(0);\n}\n\nint getBasis(MatrixXd& X, struct param const* params){\n\n  // eigen-decompose NxK matrix\n  if (params->trait_mode == 3) {\n    RowVectorXd mu = X.colwise().mean();\n    X.rowwise() -= mu;\n    X.array().rowwise() /= params->cov_sds.matrix().transpose().array();\n  }\n  \n  MatrixXd xtx = X.transpose() * X;\n  SelfAdjointEigenSolver<MatrixXd> es(xtx);\n  VectorXd D = es.eigenvalues();\n  MatrixXd V = es.eigenvectors();\n  // create basis set\n  // eigenvalues sorted in increasing order\n  int non_zero_eigen = (D.array() > D.tail(1)(0) * params->eigen_val_rel_tol).count();\n  RowVectorXd vv1 = D.tail(non_zero_eigen).array().sqrt();\n  X *= V.rightCols(non_zero_eigen);\n  X.array().rowwise() /= vv1.array();\n\n  return non_zero_eigen;\n}\n\n  // only keep linearly independent columns in X\nint scale_mat(MatrixXd& X, const Eigen::Ref<const ArrayXb>& ind_in_analysis, struct param* params){\n\n  int ncol_start = X.cols();\n  ArrayXi index_in_analysis = get_true_indices(ind_in_analysis);\n\n  // check rank of X\n  ColPivHouseholderQR<MatrixXd> qrX;\n  qrX.compute(X(index_in_analysis, all));\n  int indCols = qrX.rank();\n\n  if(indCols == 0)\n    throw \"rank of matrix is 0.\";\n  else if ( indCols < X.cols() ){\n    vector<string> new_names;\n    // get indices of columns retained\n    ArrayXi colKeep = qrX.colsPermutation().indices().head(indCols);\n    // sort them to keep order\n    std::sort(colKeep.begin(), colKeep.end());\n    //cerr << \"invert:\" << qrX.isInvertible() << \"\\ndim: \" << ncol_start << \"\\nrank: \" << indCols << \"\\nqr_perm_vec:\"<<colKeep.transpose() << \"\\n\";\n    //for(int i = 0; i < ncol_start; i++) cerr << i << \" - \" << params->covar_names[i] << \" | \" ;\n    // keep only linearly independent columns (avoid full matrix copy)\n    for(int i = 0; i < indCols; i++) {\n      X.col(i) = X.col(colKeep(i)); // overwrite columns starting from leftmost ones\n      new_names.push_back( params->covar_names[ colKeep(i) ]);\n    }\n    X.conservativeResize(X.rows(), indCols);\n    params->covar_names = new_names;\n    cout << \"WARNING: \" << (ncol_start - indCols) << \" variables removed due to multi-colinearity\\n\";\n  } \n\n  // save SD\n  RowVectorXd mu = X(index_in_analysis, all).colwise().mean();\n  params->cov_sds = (X(index_in_analysis, all).rowwise() - mu).colwise().norm().array() / sqrt(params->n_analyzed - 1);\n\n  // SD=0 should be only for intercept column (set it to 1)\n  // get indices where SD < tol\n  ArrayXb zero_sd = (params->cov_sds < params->eigen_val_rel_tol);\n  ArrayXi zero_sd_indices = get_true_indices(zero_sd);\n  for (auto const& index: zero_sd_indices) {\n    if( params->covar_names[index] == \"Intercept\"){\n      params->cov_sds(index) = 1;\n      continue;\n    } else {\n      if(params->debug) {\n        cerr << \"cov_names: \" << print_sv(params->covar_names,\"\\t\") << \"\\n\";\n        cerr << \"X top 2 rows:\\n\" << X.topRows(2) << \"\\n\";\n        cerr << \"SDs:\\n\" << params->cov_sds.matrix().transpose() << \"\\n\";\n        cerr << \"eig. tol: \" << params->eigen_val_rel_tol << \"\\n\";\n      }\n      throw \"SD=0 found for covariate '\" + params->covar_names[index] + \"'; please remove this covariate and re-run.\";\n    }\n  }\n\n  // re-scale X (better for logistic reg convergence)\n  X.array().rowwise() /= params->cov_sds.matrix().transpose().array();\n  \n  return X.cols();\n}\n\nvoid QRcheck(MatrixXd& mat, bool const& replace_names, vector<string>& old_names, int const& n, double const& qr_tol, double const& num_tol, bool const& apply_check_sd){\n\n  vector<string> new_names;\n\n  // find set of linearly independent cols\n  ColPivHouseholderQR<MatrixXd> qrA(mat);\n  qrA.setThreshold(qr_tol); \n  int indCols = qrA.rank();\n\n  if(indCols == 0)\n    throw \"rank of matrix is 0.\";\n  else if ( indCols < mat.cols() ){\n    ArrayXi colKeep = qrA.colsPermutation().indices();\n    // sort them to keep order\n    vector<int> mindices(colKeep.data(), colKeep.data() + indCols);\n    std::sort(mindices.begin(), mindices.end());\n\n    // keep only linearly independent columns\n    MatrixXd tmpM = mat(all, mindices);\n    if(replace_names)\n      for(int i = 0; i < indCols; i++)\n        new_names.push_back( old_names[ mindices[i] ]);\n\n    mat = tmpM;\n    if(replace_names) old_names = new_names;\n  } \n\n  // check no columns has sd = 0\n  if(apply_check_sd) check_sd(mat, n, num_tol);\n\n}\n\nvoid check_sd(const Eigen::Ref<const Eigen::MatrixXd>& mat, int const& n, double const& numtol){\n\n  RowVectorXd mu = mat.colwise().mean();\n  VectorXd sd = (mat.rowwise() - mu).colwise().norm().array() / sqrt(n);\n\n  if(sd.minCoeff() < numtol)\n    throw \"one of the columns has sd=0\";\n\n}\n\n// only in step 2\nvoid check_cov_blup(struct phenodt* pheno_data, struct param* params) {\n\n  params->ncov_analyzed = params->ncov + (int) params->blup_cov;\n\n  // for BTs, add extra column for LOCO PRS\n  if((params->trait_mode==1) && params->blup_cov){\n    pheno_data->new_cov.conservativeResize( pheno_data->new_cov.rows(), pheno_data->new_cov.cols() + 1);\n    params->ncov = pheno_data->new_cov.cols();\n    params->ncov_analyzed = params->ncov;\n  }\n\n}\n\nvoid residualize_phenotypes(struct param* params, struct phenodt* pheno_data, const std::vector<std::string>& pheno_names, mstream& sout) {\n  sout << \"   -residualizing and scaling phenotypes...\";\n  auto t1 = std::chrono::high_resolution_clock::now();\n\n  // compute covariate effects\n  MatrixXd beta;\n  if(params->print_cov_betas) { // X is not orth basis\n    HouseholderQR<MatrixXd> qrX(pheno_data->new_cov);\n    params->cov_betas = qrX.solve(pheno_data->phenotypes);\n    MatrixXd R = qrX.matrixQR().topLeftCorner(params->ncov, params->ncov).template triangularView<Upper>();\n    params->xtx_inv_diag.array().colwise() = R.inverse().array().square().rowwise().sum().sqrt();\n    // get orthonormal basis so xtx_inv = I\n    if(params->trait_mode == 0) params->ncov = getBasis(pheno_data->new_cov, params);\n  }\n  beta = pheno_data->phenotypes.transpose() * pheno_data->new_cov;\n\n  // residuals (centered) then scale\n  pheno_data->phenotypes -= ( (pheno_data->new_cov * beta.transpose()).array() * pheno_data->masked_indivs.array().cast<double>() ).matrix();\n  pheno_data->scale_Y = pheno_data->phenotypes.colwise().norm().array() / sqrt(pheno_data->Neff.matrix().transpose().array() - params->ncov);\n\n  // set sd for phenotypes which are ignored to 1\n  pheno_data->scale_Y = params->pheno_pass.select(pheno_data->scale_Y.transpose().array(), 1).matrix().transpose();\n  if(params->print_cov_betas) params->xtx_inv_diag *= pheno_data->scale_Y.asDiagonal();\n\n  // check sd is not 0 \n  MatrixXd::Index minIndex;\n  if(pheno_data->scale_Y.minCoeff(&minIndex) < params->numtol)\n    throw \"phenotype \\'\" + pheno_names[minIndex] + \"\\' has sd=0.\";\n\n  pheno_data->phenotypes.array().rowwise() /= pheno_data->scale_Y.array();\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n}\n\nbool residualize_matrix(MatrixXd& mat, ArrayXd& scf, const Eigen::Ref<const Eigen::MatrixXd>& X, size_t const& N, double const& numtol) {\n\n  // residuals (centered) \n  MatrixXd beta = mat.transpose() * X;\n  mat -= X * beta.transpose();\n\n  scf = mat.colwise().norm().array() / sqrt(N - X.cols());\n\n  // check sd is not 0 \n  if(scf.minCoeff() < numtol)\n    return false;\n\n  // scale\n  mat.array().rowwise() /= scf.matrix().transpose().array();\n\n  return true;\n}\n\nvoid get_lm_resid(MatrixXd& res, const Eigen::Ref<const Eigen::MatrixXd>& step1_preds, const Eigen::Ref<const Eigen::MatrixXd>& Y) {\n\n  ArrayXd beta = (1 / step1_preds.colwise().squaredNorm().array()) * (step1_preds.array() * Y.array()).colwise().sum();\n  res = Y - step1_preds * beta.matrix().asDiagonal();\n\n}\n\nvoid apply_QR(MatrixXd& mat, struct param const* params, bool const& scale){\n\n  // find set of linearly independent cols\n  ColPivHouseholderQR<MatrixXd> qrA(mat);\n  qrA.setThreshold(params->qr_tol); \n  int indCols = qrA.rank();\n\n  if(indCols == 0)\n    throw \"rank of matrix is 0.\";\n  else if ( indCols < mat.cols() ){\n    ArrayXi colKeep = qrA.colsPermutation().indices();\n    std::vector<int> new_indices;\n\n    // keep only linearly independent columns\n    MatrixXd tmpM (mat.rows(), indCols);\n\n    for(int i = 0; i < indCols; i++)\n      tmpM.col(i) = mat.col( colKeep(i) );\n\n    mat = tmpM;\n  } \n\n  if(scale)\n    rescale_mat(mat, params);\n\n}\n\nvoid rescale_mat(Ref<MatrixXd> mat, struct param const* params){\n\n    RowVectorXd mu = mat.colwise().sum() / params->n_samples;\n    mat.rowwise() -= mu;\n\n    // check sd is not 0 \n    ArrayXd scf = mat.colwise().norm().array() / sqrt(params->n_samples - 1);\n    if(scf.minCoeff() < params->numtol)\n      throw \"sd = 0 occurred\";\n\n    // scale\n    mat.array().rowwise() /= scf.matrix().transpose().array();\n\n}\n\nvoid pheno_impute_miss(struct phenodt* pheno_data, const Eigen::Ref<const ArrayXb>& ind_in_analysis, struct in_files* files, struct param const* params){\n\n  if( (params->trait_mode == 0) && !params->force_qt_run )\n    for(int i = 0; i < params->n_pheno; i++)\n      if( params->pheno_pass(i) ) \n        check_nvals(i, files->pheno_names[i], params, pheno_data); // check there is not a binary trait\n\n  if((params->trait_mode==0) || !params->test_mode){\n    double total, ns;\n\n    // for each trait, impute missing with mean\n    for(int j = 0; j < params->n_pheno; j++)\n      if( params->pheno_pass(j) ){\n\n        MapArXd Y (pheno_data->phenotypes.col(j).data(), params->n_samples, 1);\n        MapArXb mask (pheno_data->masked_indivs.col(j).data(), params->n_samples, 1);\n\n        if(params->trait_mode==0){ // impute missing with mean\n          total = ( Y != params->missing_value_double ).select(Y, 0).sum();\n          ns = ( ind_in_analysis && (Y != params->missing_value_double) ).count();\n          Y = ( Y != params->missing_value_double ).select(Y, total / ns);\n        } else { // mask tracks missingness\n          total = mask.select(Y, 0).sum() / mask.count();\n          Y = mask.select(Y, total);\n        }\n      }\n\n    // apply masking\n    pheno_data->phenotypes.array() *= pheno_data->masked_indivs.array().cast<double>();\n\n  }\n\n}\n\nvoid apply_rint(struct phenodt* pheno_data, struct param const* params){\n\n  // for each trait, apply rank-inverse normal transformation\n  for(int ph = 0; ph < params->n_pheno; ph++)\n    if( params->pheno_pass(ph) )\n      rint_pheno(pheno_data->phenotypes.col(ph), (pheno_data->phenotypes.col(ph).array() != params->missing_value_double) && pheno_data->masked_indivs.col(ph).array());\n\n}\n\nvoid set_pheno_pass(struct in_files const* files, struct param* params){\n\n  bool select_phenos = params->select_pheno_l1.size() > 0;\n  params->pheno_pass = ArrayXb::Constant(params->n_pheno, false);\n  params->pheno_fail_nullreg = ArrayXb::Constant(params->n_pheno, false);\n  for(int ph = 0; ph < params->n_pheno; ph++)\n    if( select_phenos )\n      params->pheno_pass(ph) = in_map( files->pheno_names[ph], params->select_pheno_l1 );\n    else\n      params->pheno_pass(ph) = true;\n  \n  if (params->trait_mode == 3 && !params->t2e_event_l0) {\n    for (const auto& entry: files->t2e_map) {\n      const std::string& event_name = entry.second;\n      // find event column index\n      std::vector<std::string>::const_iterator it_event = std::find(files->pheno_names.begin(), files->pheno_names.end(), event_name);\n      int event_index = std::distance(files->pheno_names.begin(), it_event);\n      params->pheno_pass(event_index) = false;\n    }\n  }\n\n  // sanity check\n  if((!params->pheno_pass).all())\n    throw \"none of the specified phenotypes for level 1 were found.\\n\";\n  if(params->test_l0 && !params->use_loocv && (params->n_pheno > 1))\n    throw \"--test-l0 with k-fold CV is not allowed for multi-trait runs.\\n\";\n\n}\n\nvoid rint_pheno(Ref<MatrixXd> Y, const Eigen::Ref<const ArrayXb>& mask){\n\n  int nvals = mask.count();\n  vector<rank_pair> yvals;\n  yvals.resize(nvals);\n\n  // get the index for each value\n  for(int i = 0, j = 0; i < Y.rows(); i++){\n    if(!mask(i)) continue;\n    yvals[j].val = Y(i,0);\n    yvals[j].index = i;\n    j++;\n  }\n\n  // sort by values keeping track of index\n  std::sort(yvals.begin(), yvals.end(), cmp_rank_pair);\n\n  // take care of ties\n  int n_eq;\n  for(int i = 0; i < nvals; i+=n_eq){\n    n_eq = 1;\n    while(((i+n_eq) < nvals) && (yvals[i+n_eq].val == yvals[i].val)) n_eq++;\n    for(int j = 0; j < n_eq; j++)\n      yvals[i+j].val = (i+1) + (n_eq-1)/2.0;\n  }\n\n  // apply INT with the ranks\n  double kc = 3/8.0, rint_val;\n  normal nd(0,1);\n  //cerr << Y.block(0,0,6,1)<<endl;\n  for(auto const& ypair : yvals){\n    rint_val = (ypair.val - kc) / (nvals - 2 * kc + 1);\n    Y( ypair.index, 0 ) = quantile(nd, rint_val);\n  }\n  //cerr << endl << endl << Y.block(0,0,6,1)<<endl;\n\n}\n\nbool cmp_rank_pair(struct rank_pair& a, struct rank_pair& b) {\n  return a.val < b.val;\n}\n\nvoid compute_skew(struct phenodt* pheno_data, struct param const* params){\n\n  // for each trait, compute skewness\n  pheno_data->skew_Y = ArrayXd::Constant(params->n_pheno, 0.0);\n  for(int ph = 0; ph < params->n_pheno; ph++) {\n    if( params->pheno_pass(ph) ) {\n      double skew = skew_pheno(pheno_data->phenotypes.col(ph), (pheno_data->phenotypes.col(ph).array() != params->missing_value_double) && pheno_data->masked_indivs.col(ph).array());\n      // debug\n      /* cout << \"skew = \" << skew << endl; */\n      pheno_data->skew_Y[ph] = skew;\n    }\n  }\n}\n\ndouble skew_pheno(const Eigen::Ref<const ArrayXd> & Y, const Eigen::Ref<const ArrayXb> & mask)\n{\n  // check arguments\n  if(Y.cols() != 1)\n    throw \"skew_pheno: Y must a matrix with one column\";\n\n  unsigned int n_val = mask.cast<int>().sum();\n  if(n_val == 0)\n    throw \"skew_pheno: all values are missing\";\n\n  double n_val_d = (double)(n_val);\n  double mean_y = mask.select(Y, 0.0).sum() / n_val_d;\n  double skew_y = (mask.select((Y - mean_y).cube(), 0.0).sum() / n_val_d) / pow((mask.select((Y - mean_y).square(), 0.0).sum() / n_val_d), 1.5);\n\n  return skew_y;\n}\n"
  },
  {
    "path": "src/Pheno.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef PHENO_H\n#define PHENO_H\n\nstruct rank_pair {\n  double val;\n  size_t index;\n};\n\nstruct phenodt {\n\n  Eigen::MatrixXd new_cov, new_cov_raw, YtX;\n  Eigen::MatrixXd interaction_cov, interaction_cov_res, interaction_homdev;\n  Eigen::ArrayXd scl_inter_X, scf_sv;\n  std::vector<Eigen::MatrixXd> Hmat;\n  std::vector<Eigen::ArrayXd> scf_i;\n  Eigen::MatrixXd phenotypes;\n  Eigen::MatrixXd phenotypes_raw;\n  MatrixXb masked_indivs;\n  Eigen::ArrayXd Neff; // number of non-missing samples (per trait)\n  Eigen::RowVectorXd scale_Y;\n  Eigen::ArrayXd skew_Y; // skewness of phenotypes\n  ArrayXb mcc_Y; // flags to apply MCC test on phenotypes\n  Eigen::MatrixXd cov_phenotypes; // matrix of covariates/phenotypes for MultiPhen test\n  Eigen::VectorXd cox_max_tau;\n};\n\n\nvoid read_pheno_and_cov(struct in_files*,struct param*,struct filter*,struct phenodt*, struct ests*,struct geno_block*,mstream&);\nvoid pheno_read(struct param*,struct in_files*,struct filter*,struct phenodt*,Eigen::Ref<ArrayXb>,mstream&);\nvoid tpheno_read(struct param*,struct in_files*,struct filter*,struct phenodt*,Eigen::Ref<ArrayXb>,mstream&);\nvoid rm_phenoCols(Eigen::Ref<ArrayXb>,struct in_files*,struct param*,struct phenodt*,mstream&);\nvoid covariate_read(struct param*,struct in_files*,struct filter*,struct phenodt*,Eigen::Ref<ArrayXb>,mstream&);\nvoid setMasks(struct param*,struct filter*,struct phenodt*,mstream&);\nvoid print_cc_info(struct param*,struct in_files*,struct phenodt*,std::vector<std::vector<Eigen::ArrayXi>>&,mstream&);\nvoid print_info(struct param*,struct in_files*,struct phenodt*,std::vector<std::vector<Eigen::ArrayXi>>&,mstream&);\nvoid print_cox_info(struct param*,struct in_files*,struct phenodt*,std::vector<std::vector<Eigen::ArrayXi>>&,mstream&);\nvoid check_nvals(int const&,std::string const&,struct param const*,struct phenodt const*);\nvoid extract_interaction_snp(struct param*,struct in_files*,struct filter*,struct phenodt*,struct geno_block*,Eigen::Ref<ArrayXb>,mstream&);\nvoid extract_condition_snps(struct param*,struct in_files*,struct filter*,struct phenodt*,struct geno_block*,Eigen::Ref<ArrayXb>,mstream&);\nint check_categories(std::vector<std::string>&,std::vector<std::map<std::string,int>>&,struct param*,struct filter*,mstream&);\nEigen::MatrixXd get_dummies(const Eigen::Ref<const Eigen::ArrayXd>&);\nbool add_square_term(const Eigen::Ref<const Eigen::MatrixXd>&);\nvoid extract_names(std::vector<std::string>&,std::map<std::string,int>&);\nint getBasis(Eigen::MatrixXd&,struct param const*);\nint scale_mat(Eigen::MatrixXd&,const Eigen::Ref<const ArrayXb>&,struct param*);\nvoid QRcheck(Eigen::MatrixXd&,bool const&,std::vector<std::string>&,int const&,double const&,double const&, bool const& apply_check_sd = true);\nvoid check_sd(const Eigen::Ref<const Eigen::MatrixXd>&,int const&,double const&);\nvoid check_cov_blup(struct phenodt*,struct param*);\nvoid residualize_phenotypes(struct param*,struct phenodt*,const std::vector<std::string>&,mstream&);\nbool residualize_matrix(Eigen::MatrixXd&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::MatrixXd>&,size_t const&,double const&);\nvoid get_lm_resid(Eigen::MatrixXd&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&);\nvoid apply_QR(Eigen::MatrixXd&,struct param const*,bool const&);\nvoid rescale_mat(Eigen::Ref<Eigen::MatrixXd>,struct param const*);\nvoid prep_run(struct in_files*,struct filter*,struct param*,struct phenodt*,struct ests*,mstream&);\nvoid check_blup(struct in_files*,struct param*,mstream&);\nbool has_blup(std::string const&,std::map<std::string,std::string> const&,struct param const*,mstream&);\nvoid blup_read(struct in_files*,struct param*,struct phenodt*,struct ests*,struct filter*,mstream&);\nvoid extract_interaction_prs(struct param*,struct in_files*,struct filter*,struct phenodt*,Eigen::Ref<ArrayXb>,mstream&);\nvoid read_prs(Eigen::Ref<Eigen::ArrayXd>,struct in_files*,struct param*,Eigen::Ref<ArrayXb>,mstream&);\nvoid check_phenos(Eigen::Ref<ArrayXb>,std::vector<std::string> const&,std::string const&,mstream&);\nvoid write_ids(struct in_files const*,struct param*,struct phenodt const*,mstream&);\nvoid fit_null_models_nonQT(struct param*,struct phenodt*,struct ests*,struct in_files*,mstream&);\nvoid print_cov_betas(struct param*,struct in_files const*,mstream&);\nvoid pheno_impute_miss(struct phenodt*,const Eigen::Ref<const ArrayXb>&,struct in_files*,struct param const*);\nvoid apply_rint(struct phenodt*,struct param const*);\nvoid set_pheno_pass(struct in_files const*,struct param*);\nvoid rint_pheno(Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const ArrayXb>&);\nbool cmp_rank_pair(struct rank_pair&,struct rank_pair&);\nvoid compute_skew(struct phenodt*,struct param const*);\ndouble skew_pheno(const Eigen::Ref<const Eigen::ArrayXd> & , const Eigen::Ref<const ArrayXb> & );\n\n#endif\n\n"
  },
  {
    "path": "src/Regenie.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"cxxopts.hpp\"\n#include <regex>\n#include <chrono>\n#include <time.h>\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Joint_Tests.hpp\"\n#include \"MultiTrait_Tests.hpp\"\n#include \"Ordinal.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"Pheno.hpp\"\n#include \"Masks.hpp\"\n#include \"HLM.hpp\"\n#include \"Data.hpp\"\n\n#include <boost/exception/all.hpp>\n#include <boost/math/special_functions/gamma.hpp>\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\n\n\nmstream::mstream(){ }\nmstream::~mstream(){ }\nMeasureTime::MeasureTime(){ }\nMeasureTime::~MeasureTime(){ }\n\n\nint main( int argc, char** argv ) {\n\n  Data data;\n  read_params_and_check(argc, argv, &data.params, &data.files, &data.in_filters, &data.runtime, data.sout);\n\n  try {// after opening sout\n\n    // for rng\n    std::mt19937_64 rng_rd(data.params.rng_seed);\n    data.params.rng_rd = &rng_rd;\n\n    data.run();\n\n  } catch (bad_alloc& badAlloc) {\n    data.sout << \"ERROR: bad_alloc caught, not enough memory (\" << badAlloc.what() << \")\\n\";\n    exit(EXIT_FAILURE);\n  } catch (const std::string& msg){ \n    data.sout << \"ERROR: \" << msg << endl;\n    exit(EXIT_FAILURE);\n  } catch (const char* msg) {\n    std::string str_msg = msg;\n    data.sout <<  \"ERROR: \" <<  str_msg << endl;\n    exit(EXIT_FAILURE);\n  } catch (boost::exception const& e) {\n    data.sout << \"ERROR: \" << boost::diagnostic_information(e) << endl;\n    exit(EXIT_FAILURE);\n  } catch (std::exception const&  e) {\n    data.sout << \"ERROR: \" << e.what() << endl;\n    exit(EXIT_FAILURE);\n  } catch (...) {\n    data.sout << boost::current_exception_diagnostic_information() << endl;\n    exit(EXIT_FAILURE);\n}\n\n  data.runtime.stop();\n\n  data.sout << \"\\nElapsed time : \" << std::chrono::duration<double>(data.runtime.end - data.runtime.begin).count() << \"s\" << endl;\n  data.sout << \"End time: \" << ctime(&data.runtime.end_time_info) << endl; \n\n}\n\n\nvoid print_header(std::ostream& o){\n\n  std::ostringstream oss;\n  string vnumber;\n  // adjust spacing for version with Boost Iostream library (`.gz` suffix)\n#ifndef HAS_BOOST_IOSTREAM\n  oss << \"  \";\n#endif\n  oss << \"REGENIE v\" << VERSION_NUMBER; \n  vnumber = oss.str();\n\n  int out_width = 6;\n  int total_width = vnumber.size() + out_width * 2;\n\n  o << left << std::setw(14) << \" \" << \"|\" << std::string(total_width, '=')<< \"|\" << endl;\n  o << left << std::setw(14) << \" \" << \"|\" << left << std::setw(out_width) << \" \" <<\n    left << std::setw(total_width - out_width) << vnumber << \"|\" << endl;\n  o << left << std::setw(14) << \" \" << \"|\" << std::string(total_width, '=')<< \"|\\n\\n\";\n\n  o << \"Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov and Jonathan Marchini.\" << endl;\n  o << \"Distributed under the MIT License.\\n\";\n#ifdef HAS_BOOST_IOSTREAM\n  o << \"Compiled with Boost Iostream library.\\n\";\n#endif\n\n#if defined(WITH_HTSLIB)\n  o << \"Compiled with HTSlib.\\n\";\n#endif\n\n  // adding BLAS/LAPACK external routines\n#if defined(WITH_MKL)\n  o << \"Using Intel MKL with Eigen.\\n\";\n#elif defined(WITH_OPENBLAS)\n  o << \"Using BLAS/LAPACK routines from OpenBLAS with Eigen.\\n\";\n#endif\n\n  o << \"\\n\";\n}\n\n\nvoid read_params_and_check(int& argc, char *argv[], struct param* params, struct in_files* files, struct filter* filters, MeasureTime* mt, mstream& sout) {\n\n  cxxopts::Options AllOptions(argv[0], \"\");\n\n  AllOptions.add_options()\n    (\"h,help\", \"print list of available options\")\n    (\"helpFull\", \"print list of all available options\")\n    ;\n\n  // add main options\n  AllOptions.add_options(\"Main\")\n    (\"step\", \"specify if fitting null model (=1) or association testing (=2)\", cxxopts::value<int>(params->run_mode),\"INT\")\n    (\"bed\", \"prefix to PLINK .bed/.bim/.fam files\", cxxopts::value<std::string>(files->bed_prefix),\"PREFIX\")\n    (\"pgen\", \"prefix to PLINK2 .pgen/.pvar/.psam files\", cxxopts::value<std::string>(files->pgen_prefix),\"PREFIX\")\n    (\"bgen\", \"BGEN file\", cxxopts::value<std::string>(files->bgen_file),\"FILE\")\n    (\"sample\", \"sample file corresponding to BGEN file\", cxxopts::value<std::string>(files->sample_file),\"FILE\")\n    (\"bgi\", \"index bgi file corresponding to BGEN file\", cxxopts::value<std::string>(files->bgi_file),\"FILE\")\n    (\"ref-first\", \"use the first allele as the reference for BGEN or PLINK bed/bim/fam input format [default assumes reference is last]\")\n    (\"keep\", \"comma-separated list of files listing samples to retain in the analysis (no header; starts with FID IID)\", cxxopts::value<std::string>(),\"FILE\")\n    (\"remove\", \"comma-separated list of files listing samples to remove from the analysis (no header; starts with FID IID)\", cxxopts::value<std::string>(),\"FILE\")\n    (\"extract\", \"comma-separated list of files with IDs of variants to retain in the analysis\", cxxopts::value<std::string>(),\"FILE\")\n    (\"exclude\", \"comma-separated list of files with IDs of variants to remove from the analysis\", cxxopts::value<std::string>(),\"FILE\")\n    (\"p,phenoFile\", \"phenotype file (header required starting with FID IID)\", cxxopts::value<std::string>(files->pheno_file),\"FILE\")\n    (\"phenoCol\", \"phenotype name in header (use for each phenotype to keep; can use parameter expansion {i:j})\", cxxopts::value< std::vector<std::string> >(),\"STRING\")\n    (\"phenoColList\", \"comma separated list of phenotype names to keep (can use parameter expansion {i:j})\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"eventColList\", \"comma separated list of event status names to keep (can use parameter expansion {i:j})\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"c,covarFile\", \"covariate file (header required starting with FID IID)\", cxxopts::value<std::string>(files->cov_file),\"FILE\")\n    (\"covarCol\", \"covariate name in header (use for each covariate to keep; can use parameter expansion {i:j})\", cxxopts::value< std::vector<std::string> >(),\"STRING\")\n    (\"covarColList\", \"comma separated list of covariate names to keep (can use parameter expansion {i:j})\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"catCovarList\", \"comma separated list of categorical covariates\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"o,out\", \"prefix for output files\", cxxopts::value<std::string>(files->out_file),\"PREFIX\")\n    (\"qt\", \"analyze phenotypes as quantitative\")\n    (\"bt\", \"analyze phenotypes as binary\")\n    (\"t2e\", \"analyze phenotypes as time to event\")\n    (\"1,cc12\", \"use control=1,case=2,missing=NA encoding for binary traits\")\n    (\"b,bsize\", \"size of genotype blocks\", cxxopts::value<int>(params->block_size),\"INT\")\n    (\"cv\", \"number of cross validation (CV) folds\", cxxopts::value<int>(params->cv_folds),\"INT(=5)\")\n    (\"loocv\", \"use leave-one out cross validation (LOOCV)\")\n    (\"l0\", \"number of ridge parameters to use when fitting models within blocks [evenly spaced in (0,1)]\", cxxopts::value<int>(params->n_ridge_l0),\"INT(=5)\")\n    (\"l1\", \"number of ridge parameters to use when fitting model across blocks [evenly spaced in (0,1)]\", cxxopts::value<int>(params->n_ridge_l1),\"INT(=5)\")\n    (\"lowmem\", \"reduce memory usage by writing level 0 predictions to temporary files\")\n    (\"lowmem-prefix\", \"prefix where to write the temporary files in step 1 (default is to use prefix from --out)\", cxxopts::value<std::string>(files->loco_tmp_prefix),\"PREFIX\")\n    (\"split-l0\", \"split level 0 across N jobs and set prefix of output files\", cxxopts::value<std::string>(),\"PREFIX,N\")\n    (\"run-l0\", \"run level 0 for job K in {1..N} using master file created from '--split-l0'\", cxxopts::value<std::string>(),\"FILE,K\")\n    (\"run-l1\", \"run level 1 using master file from '--split-l0'\", cxxopts::value<std::string>(files->split_file),\"FILE\")\n    (\"l1-phenoList\", \"run level 1 for a subset of the phenotypes (specified as comma-separated list)\", cxxopts::value<std::string>(),\"STRING,...,STRING\")\n    (\"keep-l0\", \"avoid deleting the level 0 predictions written on disk after fitting the level 1 models\")\n    (\"strict\", \"remove all samples with missingness at any of the traits\")\n    (\"print-prs\", \"also output polygenic predictions without using LOCO (=whole genome PRS)\")\n    (\"gz\", \"compress output files (gzip format)\")\n    (\"apply-rint\", \"apply Rank-Inverse Normal Transformation to quantitative traits\")\n    (\"apply-rerint\", \"apply Rank-Inverse Normal Transformation to residualized quantitative traits in step 2\")\n    (\"apply-rerint-cov\", \"apply Rank-Inverse Normal Transformation to residualized quantitative traits and project covariates out in step 2\")\n    (\"threads\", \"number of threads\", cxxopts::value<int>(params->threads),\"INT\")\n    (\"pred\", \"file containing the list of predictions files from step 1\", cxxopts::value<std::string>(files->blup_list_file),\"FILE\")\n    (\"ignore-pred\", \"skip reading predictions from step 1 (equivalent to linear/logistic regression with only covariates)\")\n    (\"use-prs\", \"when using whole genome PRS step 1 output in '--pred'\")\n    (\"write-samples\", \"write IDs of samples included for each trait (only in step 2)\")\n    (\"minMAC\", \"minimum minor allele count (MAC) for tested variants\", cxxopts::value<double>(params->min_MAC),\"FLOAT(=5)\")\n    (\"minINFO\", \"minimum imputation info score (Impute/Mach R^2) for tested variants\", cxxopts::value<double>(params->min_INFO),\"DOUBLE(=0)\")\n    (\"no-split\", \"combine asssociation results into a single for all traits\")\n    (\"firth\", \"use Firth correction for p-values less than threshold\")\n    (\"approx\", \"use approximation to Firth correction for computational speedup\")\n    (\"spa\", \"use Saddlepoint approximation (SPA) for p-values less than threshold\")\n    (\"pThresh\", \"P-value threshold below which to apply Firth/SPA correction\", cxxopts::value<double>(params->alpha_pvalue),\"FLOAT(=0.05)\")\n    (\"write-null-firth\", \"store coefficients from null models with approximate Firth for step 2\")\n    (\"compute-all\", \"store Firth estimates for all chromosomes\")\n    (\"use-null-firth\", \"use stored coefficients for null model in approximate Firth\", cxxopts::value<std::string>(files->null_firth_file),\"FILE\")\n    (\"chr\", \"specify chromosome to test in step 2 (use for each chromosome)\", cxxopts::value< std::vector<std::string> >(),\"STRING\")\n    (\"chrList\", \"Comma separated list of chromosomes to test in step 2\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"range\", \"to specify a physical position window for variants to test in step 2\", cxxopts::value<std::string>(),\"CHR:MINPOS-MAXPOS\")\n    (\"sex-specific\", \"for sex-specific analyses (male/female)\", cxxopts::value<std::string>(),\"STRING\")\n    (\"af-cc\", \"print effect allele frequencies among cases/controls for step 2\")\n    (\"test\", \"'additive', 'dominant' or 'recessive' (default is additive test)\", cxxopts::value<std::string>(),\"STRING\")\n    (\"htp\", \"output association files in step 2 in HTP format specifying the cohort name)\", cxxopts::value<std::string>(params->cohort_name),\"STRING\")\n    (\"condition-list\", \"file with list of variants to include as covariates\", cxxopts::value<std::string>(files->condition_snps_list),\"FILE\")\n    (\"condition-file\", \"optional genotype file which contains the variants to include as covariates\", cxxopts::value<std::string>(),\"FORMAT,FILE\")\n    (\"condition-file-sample\", \"sample file accompanying BGEN file with the conditional variants\", cxxopts::value<std::string>(files->condition_snps_info.sample),\"FILE\")\n    (\"interaction\", \"perform interaction testing with a quantitative/categorical covariate\", cxxopts::value<std::string>(filters->interaction_cov),\"STRING\")\n    (\"interaction-snp\", \"perform interaction testing with a variant\", cxxopts::value<std::string>(filters->interaction_cov),\"STRING\")\n    (\"interaction-file\", \"optional genotype file which contains the variant for GxG interaction test\", cxxopts::value<std::string>(),\"FORMAT,FILE\")\n    (\"interaction-file-sample\", \"sample file accompanying BGEN file with the interacting variant\", cxxopts::value<std::string>(files->interaction_snp_info.sample),\"FILE\")\n    (\"interaction-file-reffirst\", \"use the first allele as the reference for the BGEN or PLINK file with the interacting variant [default assumes reference is last]\")\n    (\"interaction-prs\", \"perform interaction testing with the full PRS from step 1\")\n    (\"force-condtl\", \"to also condition on interacting SNP in the marginal GWAS test\")\n    (\"no-condtl\", \"to print out all main effects in GxE interaction test\")\n    (\"rare-mac\", \"minor allele count (MAC) threshold below which to use HLM for interaction testing with QTs\", cxxopts::value<double>(params->rareMAC_inter),\"FLOAT(=1000)\")\n    (\"set-list\", \"file with sets definition\", cxxopts::value<std::string>(files->set_file),\"FILE\")\n    (\"extract-sets\", \"comma-separated list of files with IDs of sets to retain in the analysis\", cxxopts::value<std::string>(),\"FILE\")\n    (\"exclude-sets\", \"comma-separated list of files with IDs of sets to remove from the analysis\", cxxopts::value<std::string>(),\"FILE\")\n    (\"extract-setlist\", \"comma separated list of sets to retain in the analysis\", cxxopts::value<std::string>(),\"STRING\")\n    (\"exclude-setlist\", \"comma separated list of sets to remove from the analysis\", cxxopts::value<std::string>(),\"STRING\")\n    (\"anno-file\", \"file with variant annotations\", cxxopts::value<std::string>(files->anno_file),\"FILE\")\n    (\"anno-labels\", \"file with labels to annotations\", cxxopts::value<std::string>(files->anno_labs_file),\"FILE\")\n    (\"mask-def\", \"file with mask definitions\", cxxopts::value<std::string>(files->mask_file),\"FILE\")\n    (\"aaf-file\", \"file with AAF to use when building masks\", cxxopts::value<std::string>(files->aaf_file),\"FILE\")\n    (\"set-singletons\", \"use 0/1 indicator in third column of AAF file to specify singleton variants\")\n    (\"aaf-bins\", \"comma separated list of AAF bins cutoffs for building masks\", cxxopts::value<std::string>(),\"FLOAT,..,FLOAT\")\n    (\"build-mask\", \"rule to construct masks, can be 'max', 'sum' or 'comphet' (default is max)\", cxxopts::value<std::string>(params->mask_rule),\"STRING\")\n    (\"vc-tests\", \"comma separated list of tests to compute for each set of variants included in a mask [skat/skato/skato-acat/acatv/acato]\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"vc-maxAAF\", \"maximum AAF for variants included in gene-based tests\", cxxopts::value<double>(params->vc_maxAAF),\"FLOAT(=1)\")\n    (\"weights-col\", \"column index (1-based) for user-defined weights in annotation file\", cxxopts::value<int>(params->vc_weight_col))\n    (\"joint\", \"comma spearated list of joint tests to perform\", cxxopts::value<std::string>(params->burden),\"STRING\")\n    (\"singleton-carrier\", \"define singletons as variants with a single carrier in the sample\")\n    (\"write-mask\", \"write masks in PLINK bed/bim/fam format\")\n    (\"mask-lovo\", \"apply Leave-One-Variant-Out (LOVO) scheme when building masks (<set_name>,<mask_name>,<aaf_cutoff>)\", cxxopts::value<std::string>(),\"STRING\")\n    (\"mask-lodo\", \"apply Leave-One-Domain-Out (LODO) scheme when building masks (<set_name>,<mask_name>,<aaf_cutoff>)\", cxxopts::value<std::string>(),\"STRING\")\n    (\"skip-test\", \"skip computing association tests after building masks\")\n    (\"check-burden-files\", \"check annotation file, set list file and mask file for consistency\")\n    (\"strict-check-burden\", \"to exit early if the annotation, set list and mask definition files don't agree\")\n    (\"force-qt\", \"force QT run for traits with few unique values\")\n    (\"par-region\", \"build code to identify PAR region boundaries on chrX\", cxxopts::value<std::string>(params->build_code),\"STRING(=hg38)\")\n    ;\n\n\n  // extended options\n  AllOptions.add_options(\"Additional\")\n    (\"v,verbose\", \"verbose screen output\")\n    (\"version\", \"print version number and exit\")\n    (\"minCaseCount\", \"minimum number of cases per trait\", cxxopts::value<int>(params->mcc),\"INT=10\")\n    (\"tpheno-file\", \"transposed phenotype file (each row is a phenotype)\", cxxopts::value<std::string>(files->pheno_file),\"FILE\")\n    (\"tpheno-indexCol\", \"index of column which contain phenotype name\", cxxopts::value<uint32_t>(filters->tpheno_indexCol),\"INT\")\n    (\"tpheno-ignoreCols\", \"comma separated list of indexes for columns to ignore (can use parameter expansion {i:j})\", cxxopts::value<std::string>(),\"INT,...,INT\")\n    (\"iid-only\", \"to specify if header in transposed phenotype file only contains sample IID\")\n    (\"extract-or\", \"file with IDs of variants to retain in the analysis regardless of MAC\", cxxopts::value<std::string>(),\"FILE\")\n    (\"exclude-or\", \"file with IDs of variants to remove from the analysis if MAC falls below threshold\", cxxopts::value<std::string>(),\"FILE\")\n    (\"setl0\", \"comma separated list of ridge parameters to use when fitting models within blocks\", cxxopts::value<std::string>(), \"FLOAT,..,FLOAT\")\n    (\"setl1\", \"comma separated list of ridge parameters to use when fitting model across blocks\", cxxopts::value<std::string>(), \"FLOAT,..,FLOAT\")\n    (\"use-relative-path\", \"use relative paths for Step 1 pred.list file\")\n    (\"phenoExcludeList\", \"comma separated list of phenotype names to ignore (can use parameter expansion {i:j})\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"covarExcludeList\", \"comma separated list of covariates to ignore (can use parameter expansion {i:j})\", cxxopts::value<std::string>(),\"STRING,..,STRING\")\n    (\"nauto\", \"number of autosomal chromosomes\", cxxopts::value<int>(),\"INT\")\n    (\"exact-p\", \"output uncapped p-values in the summary statistic file with HTP format\")\n    (\"skip-dosage-comp\", \"skip dosage compensation for males in chrX non-PAR regions\")\n    (\"maxCatLevels\", \"maximum number of levels for categorical covariates\", cxxopts::value<int>(params->max_cat_levels),\"INT(=10)\")\n    (\"max-condition-vars\", \"maximum number of variants to include as covariates\", cxxopts::value<uint32_t>(params->max_condition_vars),\"INT(=10000)\")\n    (\"nb\", \"number of blocks to use\", cxxopts::value<int>(params->n_block),\"INT\")\n    (\"starting-block\", \"start run at a specific block/set number for step 2\", cxxopts::value<int>(params->start_block),\"INT\")\n    (\"force-step1\", \"run step 1 for more than 1M variants (not recommended)\")\n    (\"write-mask-snplist\", \"file with list of variants that went into each mask\")\n    (\"minHOMs\", \"minimum number of homozygote ALT carriers in recessive test\", cxxopts::value<double>(params->minHOMs),\"FLOAT(=0)\")\n    (\"skat-params\", \"a1,a2 values for variant weights computed from Beta(MAF,a1,a2) used in gene-based tests\", cxxopts::value<std::string>(),\"FLOAT,FLOAT(=1,25)\")\n    (\"skato-rho\", \"comma-separated list of rho values used for SKATO\", cxxopts::value<std::string>(),\"FLOAT,..,FLOAT\")\n    (\"vc-MACthr\", \"MAC threshold below which to collapse variants for gene-based tests\", cxxopts::value<int>(params->skat_collapse_MAC),\"INT(=10)\")\n    (\"lovo-snplist\", \"list of variants to generate LOVO masks for\", cxxopts::value<std::string>(params->masks_loo_snpfile),\"FILE\")\n    (\"joint-only\", \"only output p-values from joint tests\")\n    (\"force-ltco\", \"use a Leave-Two-Chromosome-Out (LTCO) scheme by specifying additional chromosome to exclude from step 1 LOCO predictions\", cxxopts::value<int>(params->ltco_chr),\"INT\")\n    (\"niter\", \"maximum number of iterations for logistic regression\", cxxopts::value<int>(params->niter_max),\"INT(=50)\")\n    (\"maxstep-null\", \"maximum step size in null Firth logistic regression\", cxxopts::value<int>(params->maxstep_null),\"INT(=25)\")\n    (\"maxiter-null\", \"maximum number of iterations in null Firth logistic regression\", cxxopts::value<int>(params->niter_max_firth_null),\"INT(=1000)\")\n    (\"skip-fast-firth\", \"skip fast implementation of approximate Firth for variants below MAC 50\")\n    (\"force-impute\", \"keep and impute missing observations when in step 2 (default is to drop missing for each trait)\")\n    (\"firth-se\", \"Compute SE for Firth based on effect size estimate and LRT p-value\")\n    (\"print-pheno\", \"Print phenotype name when writing sample IDs to file (only for step 2)\")\n    (\"compute-corr\", \"compute LD matrix (output R^2 values to binary file)\")\n    (\"output-corr-text\", \"output matrix of Pearson correlations to text file\")\n    (\"forcein-vars\", \"retain variants from extract file not present in genetic data file for the LD matrix\")\n    (\"ld-extract\", \"file with list of variants & masks to compute LD matrix\", cxxopts::value<string>(params->ld_list_file),\"FILE\")\n    (\"skip-scaleG\", \"compute LD matrix based on unscaled genotypes\")\n    (\"sparse-thr\", \"threshold used to sparsify the LD matrix\", cxxopts::value<double>(params->ld_sparse_thr),\"FLOAT(=0)\")\n    (\"print-vcov\", \"print variance-covariance matrix for interaction test to file\")\n    ;\n\n  // extra options\n  AllOptions.add_options(\"Extra\")\n    (\"print\", \"print estimated effect sizes from level 0 and level 1 models\")\n    (\"within\", \"use within-sample predictions as input when fitting model across blocks in step 1\")\n    (\"early-exit\", \"Exit program after fitting level 0 models (avoid deleting temporary prediction files from level 0)\")\n    (\"print-cov-betas\", \"Print covariate effects to file (assumes no multi-colinearity)\")\n    (\"prior-alpha\", \"alpha value used when speifying the MAF-dependent prior on SNP effect sizes\", cxxopts::value<double>(params->alpha_prior),\"FLOAT(=-1)\")\n    (\"prs-cov\", \"include step 1 predictions as covariate rather than offset\")\n    (\"test-l0\", \"test association for each level 0 block\")\n    (\"l0-pval-thr\", \"p-value threshold for identifying top SNPs at level 0\", cxxopts::value<double>(params->l0_snp_pval_thr),\"FLOAT\")\n    (\"select-l0\", \"file with p-values for each level 0 block (use as flag if with --test-l0)\", cxxopts::value<std::string>(params->l0_pvals_file)->implicit_value(\"\"),\"FILE\")\n    (\"rm-l0-pct\", \"remove least x% significant blocks from level 1 models\", cxxopts::value<double>(params->rm_l0_pct),\"FLOAT(=0)\")\n    (\"l0-event\", \"use event status as response in level 0 in time-to-event analysis\")\n    (\"l1-full\", \"use all samples for final L1 model in Step 1 logistic ridge with LOOCV\")\n    (\"prop-zero-thr\", \"min. proportion of zeros needed to sparsify the genotype vector\", cxxopts::value<double>(params->prop_zero_thr),\"FLOAT(=0.5)\")\n    (\"force-robust\", \"use robust SE instead of HLM for rare variant GxE test with quantitative traits\")\n    (\"force-hc4\", \"use HC4 instead of HC3 robust SE for rare variant GxE test with quantitative traits\")\n    (\"no-robust\", \"don't use robust SEs or HLM for GxE test\")\n    (\"write-setlist\", \"file with list of masks to combine as sets\", cxxopts::value<std::string>(files->new_sets),\"FILE\")\n    (\"sbat-napprox\", \"number of random draws to use for approximate SBAT test\", cxxopts::value<int>(params->nnls_napprox),\"INT(=10)\")\n    (\"sbat-adapt\", \"use adaptive strategy to compute p-value using fewer weights (k=2)\")\n    (\"sbat-mtw\", \"re-use SBAT weights across all traits\")\n    (\"sbat-verbose\", \"To output detailed SBAT test results\")\n    (\"acat-beta\", \"parameters for Beta(a,b) used for weights in ACAT joint test\", cxxopts::value<std::string>(), \"a,b(=1,25)\")\n    (\"hlm-novquad\", \"remove quadratic term for E in variance function of HLM model (only for GxE interaction test)\")\n    (\"rgc-gene-p\", \"apply optimal strategy to extract single p-value per gene\")\n    (\"rgc-gene-def\", \"file with list of mask groups to run single p-value strategy\", cxxopts::value<std::string>(params->genep_mask_sets_file))\n    (\"skip-sbat\", \"skip running SBAT test for --rgc-gene-p\")\n    (\"multiply-weights\", \"multiply the user defined weights by the default SKAT weights in SKAT/ACAT tests\")\n    (\"htp-with-event\", \"use event name in the Trait column of the HTP sumstats file (instead of TTE)\")\n    (\"skip-cf-burden\", \"skip computing per-mask calibration factor for SKAT tests\")\n    (\"force-mac-filter\", \"apply a seperate MAC filter on a subset of the SNPs\", cxxopts::value<std::string>(), \"snpfile,MAC\")\n    (\"use-adam\", \"use ADAM to fit penalized logistic models\")\n    (\"adam-mini\", \"use mini-batch for ADAM\")\n    (\"ct\", \"analyze phenotypes as counts\")\n    (\"seed\", \"specify seed for random number generation\", cxxopts::value<uint>(params->rng_seed))\n    (\"debug\", \"more verbose screen output for debugging purposes\")\n    (\"mt\", \"run multi-trait tests\")\n    (\"mcc\", \"apply MCC test for quantitative traits\")\n    (\"mcc-skew\", \"absolute phenotypic skewness to activate MCC [default value is 0]\", cxxopts::value<double>(params->mcc_skew),\"FLOAT(=0)\")\n    (\"mcc-thr\", \"threshold to apply MCC if activated [default value is 0.01]\", cxxopts::value<double>(params->mcc_thr),\"FLOAT(=0.01)\")\n    (\"remeta-save-ld\", \"store SKAT matrices for use with remeta\")\n    (\"remeta-ld-spr\", \"sparsity threshold for SKAT matrices\", cxxopts::value<double>(params->remeta_ld_spr),\"FLOAT(=0.01)\")\n    (\"multiphen\", \"run MultiPhen test\")\n    (\"multiphen-thr\", \"threshold to apply LRT for MultiPhen [default value is 0.01]\", cxxopts::value<double>(params->multiphen_thr),\"FLOAT(=0.001)\")\n    (\"multiphen-test\", \"type of MultiPhen test\", cxxopts::value<std::string>(params->multiphen_test),\"STRING\")\n    (\"multiphen-optim\", \"type of MultiPhen optimization algorithm\", cxxopts::value<std::string>(params->multiphen_optim),\"STRING\")\n    (\"multiphen-tol\", \"toleance level for Firth [default value is 1e-4]\", cxxopts::value<double>(params->multiphen_tol),\"FLOAT(=0.0001)\")\n    (\"multiphen-trace\", \"trace model fitting performance for MultiPhen\")\n    (\"multiphen-firth-mult\", \"Firth penalty multiplier [default value is 1]\", cxxopts::value<double>(params->multiphen_firth_mult),\"FLOAT(=1.0)\")\n    (\"multiphen-verbose\", \"MultiPhen verbose level\", cxxopts::value<int>(params->multiphen_verbose),\"INT(=0)\")\n    (\"multiphen-maxstep\", \"Maximum step in IRLS for MultiPhen [default value is 100]\", cxxopts::value<double>(params->multiphen_maxstep),\"FLOAT(=25.0)\")\n    (\"multiphen-approx-offset\", \"MAC to disable MultiPhen offset approximation\", cxxopts::value<int>(params->multiphen_approx_offset),\"INT(=-1)\")\n    (\"multiphen-maxit\", \"MultiPhen maximum number of IRLS iterations\", cxxopts::value<int>(params->multiphen_maxit),\"INT(=150)\")\n    (\"multiphen-maxit2\", \"MultiPhen maximum number of step-halving IRLS iterations\", cxxopts::value<int>(params->multiphen_maxit2),\"INT(=5)\")\n    (\"multiphen-strict\", \"strict mode for MultiPhen IRLS\")\n    (\"multiphen-pseudo-stophalf\", \"Threshold to stop step-halving in pseudo algorithm [default value is 0.0]\", cxxopts::value<double>(params->multiphen_pseudo_stophalf),\"FLOAT(=0.0)\")\n    (\"multiphen-reset-start\", \"reset start values when failed convergence in MultiPhen\")\n    (\"multiphen-offset\", \"offset mode for MultiPhen\", cxxopts::value<std::string>(params->multiphen_offset),\"STRING\")\n    (\"mse-full\", \"calculate MSE for quantitative phenotypes using the full model\")\n    (\"t-test\", \"use t-test for qunatitative phenotypes\")\n    (\"t2e-event-l0\", \"Use event as reponse in level0 for time-to-event phenotype\")\n    (\"t2e-l1-pi6\", \"use heritability to get penalty\")\n    (\"coxnofirth\", \"not using firth in cox model, the test uses likelihood ratio test\")\n    (\"coxscore-exact\", \"use exact score variance\")\n    (\"nocov-approx\", \"skip adjusting for covariates in score test\")\n   ;\n\n  try\n  {\n    bool acato_use_all_rhos = false;\n\n    //AllOptions.parse_positional({\"htp\"});\n    auto vm = AllOptions.parse(argc, argv);\n    auto arguments = vm.arguments();\n    map<string, bool> valid_args;\n    for(const auto &kv: arguments)\n      valid_args[ kv.key() ] = true;\n\n    // help menu\n    if (vm.count(\"help\")){\n      print_header(std::cout);\n      std::cout << AllOptions.help({\"\", \"Main\"}) << '\\n' << params->webinfo << \"\\n\\n\";\n      exit(EXIT_SUCCESS);\n    } else if (vm.count(\"helpFull\")) {\n      print_header(std::cout);\n      std::cout << AllOptions.help({\"\", \"Main\", \"Additional\"}) << '\\n' << params->webinfo << \"\\n\\n\";\n      exit(EXIT_SUCCESS);\n    } else if(vm.count(\"version\")) {\n      std::cout << \"v\" << VERSION_NUMBER << \"\\n\";\n      exit(EXIT_SUCCESS);\n    }\n\n\n    if( vm.unmatched().size() > 0 ) {\n      std::cout << \"\\nERROR: There are unmatched arguments:\\n\";\n      for(auto cn :  vm.unmatched())\n        cout << \"'\" << cn << \"' \";\n      std::cout << \"(Make sure there are no spaces in the options arguments)\\n\";\n      exit(EXIT_FAILURE);\n    }\n    \n    if (!vm.count(\"out\")){\n      print_header(std::cout);\n      std::cout << \"ERROR: You must provide an output prefix using '--out'\" << '\\n' << params->webinfo << \"\\n\\n\";\n      exit(EXIT_FAILURE);\n    }\n\n\n    // Print output to file and to stdout\n    // print command line arguments\n    start_log(files->out_file, mt, sout);\n    vector< string > tmp_str_vec;\n\n    if( (vm.count(\"bgen\") + vm.count(\"bed\")  + vm.count(\"pgen\"))  != 1 )\n      throw \"must use either --bed,--bgen or --pgen.\";\n\n    if( vm.count(\"bgen\") ) params->file_type = \"bgen\";\n    if( vm.count(\"bed\") ) params->file_type = \"bed\";\n    if( vm.count(\"pgen\") ) params->file_type = \"pgen\";\n    if( vm.count(\"sample\") ) params->bgenSample = true;\n    if( vm.count(\"ref-first\") ) params->ref_first = true;\n    if( vm.count(\"bt\") ) params->trait_mode = 1;\n    if( vm.count(\"ct\") ) params->trait_mode = 2;\n    if( vm.count(\"t2e\") ) params->trait_mode = 3;\n    if( vm.count(\"1\") ) params->CC_ZeroOne = false;\n    if( vm.count(\"loocv\") ) params->use_loocv = true;\n    if( vm.count(\"apply-rint\") && !vm.count(\"bt\")) params->rint = true;\n    if( vm.count(\"apply-rerint\") && !vm.count(\"bt\")) params->rerint = true;\n    if( vm.count(\"apply-rerint-cov\") && !vm.count(\"bt\")) params->rerintcov = true;\n    if( vm.count(\"strict\") ) params->strict_mode = true;\n    if( vm.count(\"print-prs\") ) params->print_prs = true;\n    if( vm.count(\"use-relative-path\") ) params->use_rel_path = true;\n    if( vm.count(\"ignore-pred\") ) params->skip_blups = true;\n    if( vm.count(\"use-prs\") ) params->use_prs = true;\n    if( vm.count(\"prs-cov\") ) params->blup_cov = true;\n    if( vm.count(\"force-impute\") ) params->rm_missing_qt = false;\n    if( vm.count(\"no-split\") ) params->split_by_pheno = false;\n    if( vm.count(\"approx\") ) params->firth_approx = true;\n    if( vm.count(\"approx\") && vm.count(\"skip-fast-firth\") ) params->skip_fast_firth = true;\n    if( vm.count(\"nauto\") ) params->nChrom = vm[\"nauto\"].as<int>() + 1;\n    if( vm.count(\"maxstep-null\") | vm.count(\"maxiter-null\") ) params->fix_maxstep_null = true;\n    if( vm.count(\"firth\") ) params->firth = true;\n    if( vm.count(\"write-null-firth\") ) params->write_null_firth = true;\n    if( vm.count(\"use-null-firth\") ) params->use_null_firth = true;\n    if( vm.count(\"compute-all\") ) params->compute_all_chr = true;\n    if( vm.count(\"spa\") ) params->use_SPA = true;\n    if( vm.count(\"minMAC\") ) params->setMinMAC = true;\n    if( vm.count(\"minINFO\") ) params->setMinINFO = true;\n    if( vm.count(\"htp\") ) params->htp_out = params->split_by_pheno = true;\n    if( vm.count(\"htp-with-event\") ) params->htp_use_eventname= true;\n    if( vm.count(\"exact-p\") ) params->uncapped_pvals = true;\n    if( vm.count(\"multiphen\") ) params->split_by_pheno = false;\n    if( vm.count(\"af-cc\") ) params->af_cc = true;\n    if( vm.count(\"tpheno-file\") ) params->transposedPheno = true;\n    if( vm.count(\"v\") ) params->verbose = true;\n    if( vm.count(\"debug\") ) params->verbose = params->debug = true;\n    if( vm.count(\"range\") ) params->set_range = true;\n    if( vm.count(\"print\") ) params->print_block_betas = true;\n    if( vm.count(\"print-cov-betas\") ) params->print_cov_betas = true;\n    if( vm.count(\"test-l0\") ) params->test_l0 = true;\n    if( vm.count(\"l0-event\") ) params->l0_event = true;\n    if( vm.count(\"select-l0\") ) params->select_l0 = true;\n    //if( vm.count(\"nostream\") ) params->streamBGEN = params->fastMode = false;\n    //if( vm.count(\"within\") ) params->within_sample_l0 = true;\n    if( vm.count(\"write-samples\") ) params->write_samples = true;\n    if( vm.count(\"print-pheno\") ) params->print_pheno_name = true;\n    if( vm.count(\"early-exit\") ) params->early_exit = true;\n    if( vm.count(\"force-step1\") ) params->force_run = true;\n    if( (params->run_mode == 1) && vm.count(\"bt\") && vm.count(\"loocv\") && vm.count(\"l1-full\") ) params->l1_full_samples = true;\n    if( vm.count(\"lowmem\") ) params->write_l0_pred = true;\n    if( vm.count(\"keep-l0\") ) params->rm_l0_pred = false;\n    if( vm.count(\"split-l0\") ) params->split_l0 = true;\n    if( vm.count(\"run-l0\") ) { params->run_l0_only = params->write_l0_pred = params->keep_snps = true;}\n    if( vm.count(\"run-l1\") ) params->run_l1_only = params->write_l0_pred = true;\n    if( vm.count(\"firth\") && vm.count(\"firth-se\") ) params->back_correct_se = true;\n    if( vm.count(\"use-adam\") ) params->use_adam = true;\n    if( vm.count(\"adam-mini\") ) params->adam_mini = true;\n    if( vm.count(\"niter\") ) params->niter_max_ridge = params->niter_max;\n    if( vm.count(\"force-ltco\") ) params->w_ltco = true;\n    if( vm.count(\"joint\") ) params->joint_test = true;\n    if( vm.count(\"joint-only\") ) params->p_joint_only = true;\n    if( vm.count(\"nocov-approx\") ) params->skip_cov_res = true;\n    if( vm.count(\"mt\") ) params->trait_set = true;\n    if( vm.count(\"mcc\") ) params->mcc_test = true;\n    if( vm.count(\"multiphen\") ) params->multiphen = true;\n    if( vm.count(\"multiphen-trace\") ) params->multiphen_trace = true;\n    if( vm.count(\"multiphen-strict\") ) params->multiphen_strict = true;\n    if( vm.count(\"multiphen-reset-start\") ) params->multiphen_reset_start = true;\n    if( vm.count(\"multiphen-strict\") ) params->multiphen_strict = true;\n    if( vm.count(\"mse-full\") ) params->mse_full = true;\n    if( vm.count(\"t-test\") ) params->t_test = true;\n    if( vm.count(\"aaf-file\") ) params->set_aaf = true;\n    if( vm.count(\"aaf-file\") && vm.count(\"set-singletons\") ) params->aaf_file_wSingletons = true;\n    if( vm.count(\"singleton-carrier\") ) params->singleton_carriers = true;\n    if( vm.count(\"mask-lovo\") ) params->mask_loo = true;\n    if( vm.count(\"mask-lodo\") ) params->mask_lodo = true;\n    if( vm.count(\"write-mask\") ) params->write_masks = true;\n    if( vm.count(\"write-setlist\") ) params->write_setlist = true;\n    if( vm.count(\"write-mask-snplist\") ) params->write_mask_snplist = true;\n    if( vm.count(\"skip-test\") ) params->skip_test = true;\n    if( vm.count(\"check-burden-files\") ) params->check_mask_files = true;\n    if( vm.count(\"strict-check-burden\") ) params->strict_check_burden = true;\n    if( vm.count(\"force-qt\") ) params->force_qt_run = true;\n    if( vm.count(\"weights-col\") ) params->vc_with_weights = true;\n    if( vm.count(\"multiply-weights\") ) params->vc_multiply_weights = true;\n    if( vm.count(\"skip-dosage-comp\") ) params->skip_dosage_comp = true;\n    if( vm.count(\"sbat-verbose\") ) params->nnls_out_all = true;\n    if( vm.count(\"sbat-adapt\") ) params->nnls_adaptive = true;\n    if( vm.count(\"sbat-mtw\") ) params->nnls_mt_weights = true;\n    if( vm.count(\"skip-cf-burden\") ) params->skip_cf_burden = true;\n    if( vm.count(\"condition-list\") ) { params->condition_snps = true;params->rm_snps = true;}\n    if( vm.count(\"force-robust\") ) params->force_robust = true;\n    if( vm.count(\"force-hc4\") ) params->force_robust = params->force_hc4 = true;\n    if( vm.count(\"no-robust\") ) params->no_robust = true;\n    if( vm.count(\"hlm-novquad\") ) params->hlm_vquad = false;\n    if( vm.count(\"print-vcov\") ) params->print_vcov = true;\n    if( vm.count(\"compute-corr\") || vm.count(\"output-corr-text\") ) {\n      params->getCorMat = true;\n      params->cormat_force_vars = (vm.count(\"forcein-vars\") && vm.count(\"extract\")) || vm.count(\"ld-extract\");\n      params->skip_scaleG = vm.count(\"skip-scaleG\");\n      params->run_mode = 2;\n      params->skip_blups = params->strict_mode = true;\n      params->trait_mode = 0;\n      params->min_MAC = 0.5;\n      if(vm.count(\"output-corr-text\") || vm.count(\"skip-scaleG\")) params->cor_out_txt = true;\n      if(vm.count(\"exclude\")) throw \"cannot use --exclude with --compute-corr (use --extract instead)\";\n      if(vm.count(\"write-mask\")){\n        sout << \"WARNING: option --write-mask cannot be used when computing LD.\\n\" ;\n        params->write_masks = false; valid_args[ \"write-mask\" ] = false;\n      }\n    }\n    if( vm.count(\"gz\") ) {\n# if defined(HAS_BOOST_IOSTREAM)\n      // only works when compiled with boost IO library\n      params->gzOut = true;\n# else\n      sout << \"WARNING: REGENIE was not compiled with Boost Iostream library so ignoring option '--gz'.\\n\";\n      valid_args[ \"gz\" ] = false;\n#endif\n    }\n\n\n    if( vm.count(\"phenoColList\") ) {\n      params->select_phenos = true;\n      tmp_str_vec = string_split(vm[\"phenoColList\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++) {\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->pheno_colKeep_names[cn] = true;\n      }\n    }\n    if( vm.count(\"phenoCol\") ) {\n      params->select_phenos = true;\n      tmp_str_vec = vm[\"phenoCol\"].as<std::vector<string>>();\n      for( size_t i = 0; i < tmp_str_vec.size(); i++)\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->pheno_colKeep_names[cn] = true;\n    }\n    if( vm.count(\"phenoExcludeList\") ) {\n      params->select_phenos_rm = true;\n      tmp_str_vec = string_split(vm[\"phenoExcludeList\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++) {\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->pheno_colRm_names[cn] = true;\n      }\n    }\n    if( (params->trait_mode == 3) && vm.count(\"eventColList\") && vm.count(\"phenoCol\") )\n      throw \"You must specify TTE phenotypes using '--phenoColList' (matching in order with events in '--eventColList').\";\n    if( (params->trait_mode == 3) && (!vm.count(\"eventColList\") || !vm.count(\"phenoColList\")) ) \n      throw \"You must specify both '--phenoColList' and '--eventColList' (same order) for time-to-event analysis.\";\n    if( vm.count(\"eventColList\") ) { // time-to-event names map\n      if( params->trait_mode != 3) \n        throw \"Option --eventColList must be used with '--t2e' for time-to-event analysis\";\n      params->select_phenos = true;\n      tmp_str_vec = string_split(vm[\"eventColList\"].as<string>(),\",\");\n      vector< string > tmp_str_vec_time = string_split(vm[\"phenoColList\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++) {\n        files->t2e_map[tmp_str_vec_time[i]] = tmp_str_vec[i];\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->pheno_colKeep_names[cn] = true;\n      }\n      params->t2e_event_l0 = vm.count(\"t2e-event-l0\");\n      params->t2e_l1_pi6 = vm.count(\"t2e-l1-pi6\");\n      params->cox_nofirth = vm.count(\"coxnofirth\");\n      params->coxscore_exact = vm.count(\"coxscore-exact\");\n    }\n    if( vm.count(\"covarColList\") ) {\n      params->select_covs = true;\n      tmp_str_vec = string_split(vm[\"covarColList\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++){\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->cov_colKeep_names[cn] = true;\n      }\n    }\n    if( vm.count(\"covarCol\") ) {\n      params->select_covs = true;\n      tmp_str_vec = vm[\"covarCol\"].as<std::vector<string>>();\n      for( size_t i = 0; i < tmp_str_vec.size(); i++)\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->cov_colKeep_names[cn] = true;\n    }\n    if( vm.count(\"covarExcludeList\") ) {\n      params->select_covs_rm = true;\n      tmp_str_vec = string_split(vm[\"covarExcludeList\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++) {\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->cov_colRm_names[cn] = true;\n      }\n    }\n    if( vm.count(\"catCovarList\") ) {\n      tmp_str_vec = string_split(vm[\"catCovarList\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++)\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->cov_colKeep_names[cn] = false;\n    }\n    if( (params->run_mode ==2) && (vm.count(\"interaction\") || vm.count(\"interaction-snp\")) ) {\n      params->w_interaction = true;\n      if(vm.count(\"interaction-snp\")) params->interaction_snp = params->w_ltco =  true;\n      check_inter_var(filters->interaction_cov, filters->interaction_cov_null_level, sout);\n      if(!vm.count(\"interaction-snp\") && !in_map(filters->interaction_cov,filters->cov_colKeep_names))\n        filters->cov_colKeep_names[filters->interaction_cov] = true; // assume qt\n      if(vm.count(\"no-condtl\") || (vm.count(\"interaction-snp\") && !vm.count(\"force-condtl\")) )\n        params->gwas_condtl = false;\n    }\n    if( (params->run_mode ==2) && vm.count(\"interaction-prs\") ) {\n      params->w_interaction = true;\n      params->interaction_prs = true;\n      filters->interaction_cov = \"PRS\";\n      if(vm.count(\"no-condtl\") || (!vm.count(\"force-condtl\")) )\n        params->gwas_condtl = false;\n    }\n    if( vm.count(\"tpheno-ignoreCols\") ) {\n      tmp_str_vec = string_split(vm[\"tpheno-ignoreCols\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++) {\n        for(auto cn : check_name(tmp_str_vec[i], sout))\n          filters->tpheno_colrm[ stoi(cn) ] = true;\n      }\n    }\n    if( vm.count(\"chrList\") ) {\n      params->select_chrs = true;\n      tmp_str_vec = string_split(vm[\"chrList\"].as<string>(),\",\");\n      for( size_t ichr = 0; ichr < tmp_str_vec.size(); ichr++)\n        for(auto cn : check_name(tmp_str_vec[ichr], sout))\n          filters->chrKeep_test[ chrStrToInt(cn, params->nChrom) ] = true;\n    }\n    if( vm.count(\"chr\") ) {\n      params->select_chrs = true;\n      tmp_str_vec = vm[\"chr\"].as<std::vector<string>>();\n      for( size_t ichr = 0; ichr < tmp_str_vec.size(); ichr++)\n        filters->chrKeep_test[ chrStrToInt(tmp_str_vec[ichr], params->nChrom) ] = true;\n    }\n    if( vm.count(\"keep\") ){\n      files->file_ind_include = string_split(vm[\"keep\"].as<string>(),\",\");\n      params->keep_indivs = true;\n    }\n    if( vm.count(\"remove\") ){\n      files->file_ind_exclude = string_split(vm[\"remove\"].as<string>(),\",\");\n      params->rm_indivs = true;\n    }\n    if( vm.count(\"extract\") ){\n      files->file_snps_include = string_split(vm[\"extract\"].as<string>(),\",\");\n      params->keep_snps = true;\n    }\n    if( !vm.count(\"run-l0\") && vm.count(\"exclude\") ){\n      files->file_snps_exclude = string_split(vm[\"exclude\"].as<string>(),\",\");\n      params->rm_snps = true;\n    }\n    if( vm.count(\"extract-or\") ){\n      files->file_snps_include_or = string_split(vm[\"extract-or\"].as<string>(),\",\");\n      params->keep_or = true;\n    }\n    if( vm.count(\"exclude-or\") ){\n      files->file_snps_exclude_or = string_split(vm[\"exclude-or\"].as<string>(),\",\");\n      params->rm_or = true;\n    }\n    if( vm.count(\"extract-sets\") ){\n      files->file_sets_include = string_split(vm[\"extract-sets\"].as<string>(),\",\");\n      params->keep_sets = true;\n    }\n    if( vm.count(\"exclude-sets\") ){\n      files->file_sets_exclude = string_split(vm[\"exclude-sets\"].as<string>(),\",\");\n      params->rm_sets = true;\n    }\n    if( vm.count(\"extract-setlist\") ) {\n      params->set_select_list = params->keep_sets = true;\n      files->file_sets_include.resize(1);\n      files->file_sets_include[0] = vm[\"extract-setlist\"].as<string>();\n    }\n    if( vm.count(\"exclude-setlist\") ) {\n      params->set_select_list = params->rm_sets = true;\n      files->file_sets_exclude.resize(1);\n      files->file_sets_exclude[0] = vm[\"exclude-setlist\"].as<string>();\n    }\n    if( vm.count(\"split-l0\") ) { // Format: FILE,INT\n      tmp_str_vec = string_split(vm[\"split-l0\"].as<string>(),\",\");\n      if(tmp_str_vec.size() != 2 )\n        throw \"wrong format for --split-l0 (must be FILE,INT).\";\n      files->split_file = tmp_str_vec[0];\n      params->njobs = atoi( tmp_str_vec[1].c_str() );\n    }\n    if( vm.count(\"run-l0\") ) { // Format: FILE,INT\n      tmp_str_vec = string_split(vm[\"run-l0\"].as<string>(),\",\");\n      if(tmp_str_vec.size() != 2 )\n        throw \"wrong format for --run-l0 (must be FILE,INT).\";\n      files->split_file = tmp_str_vec[0];\n      params->job_num = atoi( tmp_str_vec[1].c_str() );\n      if(params->job_num < 1 )\n        throw \"invalid job number for --run-l0 (must be >=1).\";\n    }\n    if( vm.count(\"condition-file\") ) {\n      tmp_str_vec = string_split(vm[\"condition-file\"].as<string>(),\",\");\n      if(tmp_str_vec.size()<2)\n        throw \"invalid option input for --condition-file\";\n      if((tmp_str_vec[0] != \"bgen\") && (tmp_str_vec[0] != \"bed\") && (tmp_str_vec[0] != \"pgen\"))\n        throw \"invalid file format for --condition-file (either bed/bge/pgen)\";\n      files->condition_snps_info.format = tmp_str_vec[0];\n      files->condition_snps_info.file = tmp_str_vec[1];\n      params->condition_file = true;\n    }\n    if( vm.count(\"interaction-file\") ) {\n      tmp_str_vec = string_split(vm[\"interaction-file\"].as<string>(),\",\");\n      if(tmp_str_vec.size()<2)\n        throw \"invalid option input for --interaction-file\";\n      if((tmp_str_vec[0] != \"bgen\") && (tmp_str_vec[0] != \"bed\") && (tmp_str_vec[0] != \"pgen\"))\n        throw \"invalid file format for --interaction-file (either bed/bge/pgen)\";\n      files->interaction_snp_info.format = tmp_str_vec[0];\n      files->interaction_snp_info.file = tmp_str_vec[1];\n      files->interaction_snp_info.ref_first = vm.count(\"interaction-file-reffirst\") && (tmp_str_vec[0] != \"pgen\");\n      params->interaction_file = true;\n    }\n    if( vm.count(\"test\") ) {\n      if( vm[\"test\"].as<string>() == \"additive\") params->test_type = 0; \n      else if( vm[\"test\"].as<string>() == \"dominant\") params->test_type = 1; \n      else if( vm[\"test\"].as<string>() == \"recessive\") params->test_type = 2; \n      else throw \"unrecognized argument for option --test, must be either 'additive', 'dominant' or 'recessive'.\";\n    }\n    if( vm.count(\"range\") ) { // Format: Chr:min-max\n      char tmp_chr[20];\n      double p0 = -1, p1 = -1;\n      string tmpd = vm[\"range\"].as<string>();\n\n      if(sscanf( tmpd.c_str(), \"%[^:]:%lf-%lf\", tmp_chr, &p0, &p1 ) != 3\n          || (p0 < 0) || (p1 < 0) ) \n        //cerr << tmp_chr << \"\\t\" << p0 << \"\\t\" << p1 << endl;\n        throw \"wrong format for --range (must be CHR:MINPOS-MAXPOS).\";\n\n      tmpd = tmp_chr;\n      params->range_chr = chrStrToInt(tmpd, params->nChrom);\n      params->range_min = min(p0,p1);\n      params->range_max = max(p0,p1);\n    }\n    if(vm.count(\"sex-specific\")){\n      if(vm[\"sex-specific\"].as<string>() == \"male\")\n        params->sex_specific = 1;\n      else if(vm[\"sex-specific\"].as<string>() == \"female\")\n        params->sex_specific = 2;\n      else throw \"unrecognized argument for --sex-specific (should be either male/female)\";\n    }\n\n    if( vm.count(\"build-mask\") ) {\n      if( params->mask_rule == \"max\") params->mask_rule_max = true; \n      else if( params->mask_rule == \"sum\") params->mask_rule_max = false; \n      else if( params->mask_rule == \"comphet\") { \n        params->mask_rule_max = false, params->mask_rule_comphet = true; \n      } else throw \"unrecognized argument for option --build-mask (=\" + params->mask_rule + \").\";\n      if((params->mask_rule == \"sum\") && params->htp_out){\n        sout << \"WARNING: option --htp cannot be used with '--build-mask sum' and will be ignored.\\n\";\n        params->htp_out = false; valid_args[ \"htp\" ] = false;\n      }\n    }\n    if( vm.count(\"acat-beta\") ) {\n      tmp_str_vec = string_split(vm[\"acat-beta\"].as<string>(),\",\");\n      params->acat_a1 = convertDouble( tmp_str_vec[0], params, sout);\n      params->acat_a2 = convertDouble( tmp_str_vec[1], params, sout);\n    }\n    if( vm.count(\"vc-tests\") ) {\n      tmp_str_vec = string_split(vm[\"vc-tests\"].as<string>(),\",\");\n      for( size_t i = 0; i < tmp_str_vec.size(); i++)\n        if(in_map(tmp_str_vec[i], params->vc_tests_map)) BIT_SET(params->vc_test, params->vc_tests_map[tmp_str_vec[i]]);\n        else if(tmp_str_vec[i] == \"acato-full\") {acato_use_all_rhos = true; BIT_SET(params->vc_test, params->vc_tests_map[\"acato\"]);}\n        else throw \"unrecognized VC test: '\" + tmp_str_vec[i] + \"' (accepted=skat/skato/skato-acat/acatv/acato)\";\n    }\n    if( vm.count(\"rgc-gene-p\") && vm.count(\"anno-file\") && vm.count(\"mask-def\") ) {\n      params->apply_gene_pval_strategy = params->joint_test = true;\n      if(!vm.count(\"vc-maxAAF\")) params->vc_maxAAF = 0.01;\n      if(params->burden != \"\") params->burden.append(\",\");\n      params->burden.append(\"acat\");\n      if(!params->trait_mode && !vm.count(\"skip-sbat\")) params->burden.append(\",sbat\");\n      if(params->test_type == 0){\n        BIT_SET(params->vc_test, params->vc_tests_map[\"acatv\"]);\n        BIT_SET(params->vc_test, params->vc_tests_map[\"skato-acat\"]);\n      } else {\n        sout << \"WARNING: SKATO/ACATV will be skipped for non-additive tests.\\n\";\n        params->vc_test = 0;\n      }\n      if(vm.count(\"rgc-gene-def\")) check_file (params->genep_mask_sets_file, \"rgc-gene-def\");\n    } else if(vm.count(\"rgc-gene-p\") || vm.count(\"rgc-gene-def\")) {\n      valid_args[ \"rgc-gene-p\" ] = false; // option is ignored\n      valid_args[ \"rgc-gene-def\" ] = false; // option is ignored\n    }\n\n    if( CHECK_BIT(params->vc_test, params->vc_tests_map[\"acato\"]) ) {// acato\n      BIT_SET(params->vc_test, params->vc_tests_map[\"acatv\"]); // acatv\n      params->skato_rho.resize(2,1); params->skato_rho << 0, 1; // skat & burden\n    }\n    if( acato_use_all_rhos || ( (params->vc_test>>2)&3 ) ) {// skato/skato-acat or acato with all rhos\n      params->skato_rho.resize(8,1); \n      params->skato_rho << 0, 0.1*0.1, 0.2*0.2, 0.3*0.3, 0.4*0.4, 0.5*0.5, 0.5, 1;\n    }\n    if( params->vc_test && vm.count(\"skat-params\") ) {\n      tmp_str_vec = string_split(vm[\"skat-params\"].as<string>(),\",\");\n      params->skat_a1 = convertDouble( tmp_str_vec[0], params, sout);\n      params->skat_a2 = convertDouble( tmp_str_vec[1], params, sout);\n    }\n    if( ((params->vc_test>>1)&15) && vm.count(\"skato-rho\") ) {\n      if(acato_use_all_rhos)\n        sout << \"WARNING: ACATO will use the user-specified rho values for SKATO models.\\n\" ;\n      tmp_str_vec = string_split(vm[\"skato-rho\"].as<string>(),\",\");\n      params->skato_rho = get_unit_params(true, \"--skato-rho\", tmp_str_vec, params, sout);\n      if(params->skato_rho.size() > 1) BIT_SET(params->vc_test, 2);\n    }\n\n    if ( params->run_mode == 1 ) params->test_mode = false;\n    else if (params->run_mode == 2 ) params->test_mode = true;\n    else throw \"specify which mode regenie should be running using option --step.\";\n\n    if(!params->test_mode) {\n\n      // loocv only used with out-of-sample predictions\n      if(params->use_loocv && params->within_sample_l0) {\n        sout << \"WARNING: option --loocv cannot be used with option --within.\\n\" ;\n        params->use_loocv = false; valid_args[ \"loocv\" ] = false;\n      }\n\n      // writing of level 0 predictions only available when using out-of-sample predictions\n      if(params->write_l0_pred && params->within_sample_l0){\n        sout << \"WARNING: option --lowmem cannot be used with option --within.\\n\" ;\n        params->write_l0_pred = false; valid_args[ \"lowmem\" ] = valid_args[ \"lowmem-prefix\" ] = false;\n      }\n\n      // user specified ridge parameters to use at l0\n      if( vm.count(\"setl0\") ) {\n        params->user_ridge_params_l0 = true;\n        tmp_str_vec = string_split(vm[\"setl0\"].as<string>(),\",\");\n        params->lambda = get_unit_params(false, \"--l0\", tmp_str_vec, params, sout);\n        params->n_ridge_l0 = params->lambda.size();\n      } else set_ridge_params(params->n_ridge_l0, params->lambda, sout);\n\n      // user specified ridge parameters to use at l1\n      params->tau.resize(1); // may be assigned for each trait\n      if( vm.count(\"setl1\") ) {\n        params->user_ridge_params_l1 = true;\n        tmp_str_vec = string_split(vm[\"setl1\"].as<string>(),\",\");\n        params->tau[0] = get_unit_params(false, \"--l1\", tmp_str_vec, params, sout);\n        params->n_ridge_l1 = params->tau[0].size();\n      } else set_ridge_params(params->n_ridge_l1, params->tau[0], sout);\n\n      if( params->run_l1_only && vm.count(\"l1-phenoList\") ) {\n        tmp_str_vec = string_split(vm[\"l1-phenoList\"].as<string>(),\",\");\n        for( size_t i = 0; i < tmp_str_vec.size(); i++) {\n          for(auto cn : check_name(tmp_str_vec[i], sout))\n            params->select_pheno_l1[cn] = true;\n        }\n      }\n\n      // firth only done in test mode\n      if(params->firth) params->firth = false;\n      if(params->use_SPA) params->use_SPA = false;\n      valid_args[ \"firth\" ] = valid_args[ \"spa\" ] = valid_args[ \"approx\" ] = false;\n\n      params->test_type = 0;\n      if( vm.count(\"range\") ) {\n        params->set_range = false; valid_args[ \"range\" ] =false;\n        sout << \"WARNING: option --range only works for step 2.\\n\";\n      }\n      if(params->rm_or || params->keep_or){\n        sout << \"WARNING: Options --extract-or/--exclude-or only work in step 2.\\n\";\n        params->rm_or = params->keep_or = false; valid_args[ \"extract-or\" ] = valid_args[ \"exclude-or\" ] = false;\n      }\n\n    } \n    if(params->firth && (params->trait_mode!=1 && params->trait_mode!=3)) {\n      // firth correction is only applied to binary traits and time-to-event traits\n      sout << \"WARNING: option --firth will not be applied (it is only run with binary traits and time-to-event traits).\\n\";\n      params->firth = false; valid_args[ \"firth\" ] = valid_args[ \"approx\" ] = false;\n    } \n    if(params->use_SPA && (params->trait_mode!=1)) {\n      // SPA is only applied to binary traits\n      sout << \"WARNING: option --spa will not be applied (it is only run with binary traits).\\n\";\n      params->use_SPA = false; valid_args[ \"spa\" ] = false;\n    }\n\n    if(vm.count(\"covarExcludeList\") && !vm.count(\"covarFile\")) {\n      params->select_covs_rm = false; valid_args[ \"covarExcludeList\" ] = false;\n    }\n\n    if(params->test_mode && params->use_loocv) {params->use_loocv = false;valid_args[ \"loocv\" ] = false;}\n\n    if( (vm.count(\"write-samples\") || vm.count(\"write-mask\")) && vm.count(\"bgen\") && !vm.count(\"sample\") )\n      throw \"must specify sample file (using --sample) if writing sample IDs to file.\";\n\n    if( vm.count(\"test\") && (params->run_mode !=2)) \n      throw \"can only use --test in step 2 (association testing).\";\n    if( (params->test_type > 0) && params->vc_test) \n      throw \"cannot use --test with --vc-tests.\";\n    if(params->skip_dosage_comp && params->test_type)\n      throw \"cannot use --skip-dosage-comp with --test.\";\n    if( !params->getCorMat && params->joint_test ){\n      if( (params->test_type > 0) && !vm.count(\"rgc-gene-p\")) \n        throw \"cannot use --test with --joint.\";\n      else if ( vm.count(\"sbat-napprox\") && params->nnls_napprox < 1 )\n        throw \"must pass positive integer for --sbat-napprox.\";\n      params->snp_set = true;\n    }\n    if(vm.count(\"sparse-thr\")){\n     if(!vm.count(\"skip-scaleG\") )\n      throw \"cannot use --sparse-thr without --skip-scaleG\";\n     else if((params->ld_sparse_thr < 0) || (params->ld_sparse_thr >=1))\n      throw \"invalid value passed in --sparse-thr (must be in [0,1)\";\n    }\n    if(vm.count(\"ld-extract\") && !vm.count(\"compute-corr\"))\n      throw \"must use --ld-extract with --compute-corr\";\n    if(vm.count(\"ld-extract\") && (vm.count(\"extract-sets\")+vm.count(\"exclude-sets\")+vm.count(\"extract-setlist\")+vm.count(\"exclude-setlist\")))\n      throw \"cannot use --ld-extract with --extract-sets/--exclude-sets\";\n    if( vm.count(\"write-null-firth\") && vm.count(\"use-prs\") )\n      throw \"cannot use --write-null-firth with --use-prs\";\n\n    if( vm.count(\"anno-file\") || vm.count(\"mask-def\") ){\n\n      if( params->getCorMat && !vm.count(\"ld-extract\") )\n        throw \"must use --ld-extract if building masks in LD matrix.\";\n\n      if(vm.count(\"anno-labels\")) params->w_anno_lab = true;\n\n      if( !(vm.count(\"anno-file\") && vm.count(\"mask-def\")) )\n        throw \"must use --anno-file with --mask-def.\";\n\n      if( (params->test_type > 0) && !(params->mask_rule_max || params->mask_rule_comphet) )\n        throw \"only additive test allowed when using 'sum' in --build-mask.\";\n\n      if(params->write_masks && !params->mask_rule_max && !params->mask_rule_comphet )\n        throw \"cannot write masks when using 'sum' in --build-mask.\";\n\n      // store aaf bins if given\n      if( vm.count(\"aaf-bins\") ) \n        tmp_str_vec = string_split(vm[\"aaf-bins\"].as<string>(),\",\");\n      else if(vm.count(\"rgc-gene-p\")) tmp_str_vec = std::vector<std::string>({ \"0.00001\",\"0.0001\",\"0.001\",\"0.01\" }); \n      else tmp_str_vec.resize(0);\n      params->mbins = tmp_str_vec;\n\n      if( vm.count(\"mask-lovo\") ) {\n        int cstart = 1;\n        tmp_str_vec = string_split(vm[\"mask-lovo\"].as<string>(),\",\");\n        if(tmp_str_vec.size() < 3)\n          throw \"wrong format for option --mask-lovo.\";\n        else if ( tmp_str_vec.size() == 4 ) {\n          params->w_regions = true; cstart++;\n        }\n        params->mask_loo_set = tmp_str_vec[0];\n        if(params->w_regions) params->mask_loo_region = tmp_str_vec[cstart-1];\n        params->mask_loo_name = tmp_str_vec[cstart];\n        params->mbins.resize(1);\n        params->mbins[0] = tmp_str_vec[cstart+1]; // either singleton or AAF cutoff\n        if(params->vc_test){\n          if(params->mbins[0] == \"all\") params->vc_maxAAF = 1;\n          else if(params->mbins[0] != \"singleton\") params->vc_maxAAF = convertDouble( params->mbins[0], params, sout);\n        }\n        if(params->write_masks){\n          sout << \"WARNING: cannot use --write-mask with --mask-lovo.\\n\";\n          params->write_masks = false; valid_args[ \"write-mask\" ] = false;\n        }\n        if(params->joint_test)\n          throw \"cannot use --joint with --mask-lovo\";\n        valid_args[ \"vc-maxAAF\" ] = valid_args[ \"aaf-bins\" ] = false;\n      } else if (vm.count(\"lovo-snplist\"))\n        throw \"cannot use --lovo-snplist without --mask-lovo\";\n\n      if( vm.count(\"mask-lodo\") ) {\n        tmp_str_vec = string_split(vm[\"mask-lodo\"].as<string>(),\",\");\n        if(tmp_str_vec.size() != 3)\n          throw \"wrong format for option --mask-lodo.\";\n        else if(vm.count(\"mask-lovo\"))\n          throw \"cannot use --mask-lovo with --mask-lodo.\";\n        params->w_regions = true;\n        params->mask_loo_set = tmp_str_vec[0];\n        params->mask_loo_name = tmp_str_vec[1];\n        params->mbins.resize(1);\n        params->mbins[0] = tmp_str_vec[2]; // either singleton or AAF cutoff\n        if(params->vc_test){\n          if(params->mbins[0] == \"all\") params->vc_maxAAF = 1;\n          else if(params->mbins[0] != \"singleton\") params->vc_maxAAF = convertDouble( params->mbins[0], params, sout);\n        }\n        if(params->write_masks){\n          sout << \"WARNING: cannot use --write-mask with --mask-lodo.\\n\";\n          params->write_masks = false; valid_args[ \"write-mask\" ] = false;\n        }\n        valid_args[ \"vc-maxAAF\" ] = valid_args[ \"aaf-bins\" ] = false;\n      }\n\n      params->snp_set = true;\n      params->build_mask = true;\n\n    }\n    if( params->test_mode && vm.count(\"force-mac-filter\") ) {\n      tmp_str_vec = string_split(vm[\"force-mac-filter\"].as<string>(),\",\");\n      params->forced_MAC_snpfile = tmp_str_vec[0];\n      params->forced_MAC = convertDouble( tmp_str_vec[1], params, sout);\n      if(params->forced_MAC < 0.5) throw \"MAC must be greater than 0.5 for --force-mac-filter\";\n      if(params->rm_or || params->keep_or) throw \"option --force-mac-filter cannot be used with --extract-or/--exclude-or\";\n      if(params->build_mask) throw \"option --force-mac-filter cannot be used when building masks\";\n    } else valid_args[ \"force-mac-filter\" ] = false;\n\n    if(!params->build_mask && params->write_masks) {params->write_masks = false; valid_args[ \"write-mask\" ] = false;}\n    if(!params->build_mask && params->check_mask_files) {params->check_mask_files = false; valid_args[ \"check-burden-files\" ] = false;}\n    if(!params->build_mask && params->strict_check_burden) {params->strict_check_burden = false; valid_args[ \"strict-check-burden\" ] = false;}\n    if(!params->build_mask && params->write_mask_snplist) {params->write_mask_snplist = false; valid_args[ \"write-mask-snplist\" ] = false;}\n    if(!(params->write_masks || params->write_mask_snplist) && params->skip_test) {params->skip_test = false; valid_args[ \"skip-test\" ] = false;}\n    if(!params->w_interaction) params->gwas_condtl = false;\n    if(!params->write_masks && params->write_setlist) {\n      sout << \"WARNING: must use --write-setlist with --write-mask.\\n\";\n      params->write_setlist = false; valid_args[ \"write-setlist\" ] = false;\n    }\n    if((vm.count(\"1\") || vm.count(\"cc12\")) && !(params->trait_mode == 1 || params->trait_mode == 3)) valid_args[ \"1\" ] = valid_args[ \"cc12\" ] = false;\n    if( vm.count(\"write-mask-snplist\") && (vm.count(\"mask-lovo\") || vm.count(\"mask-lodo\")) ) {\n      sout << \"WARNING: cannot use --write-mask-snplist with LOVO/LODO.\\n\";\n      params->write_mask_snplist = false; valid_args[ \"write-mask-snplist\" ] = false;\n    }\n    if( vm.count(\"write-setlist\") && (vm.count(\"mask-lovo\") || vm.count(\"mask-lodo\")) ) {\n      sout << \"WARNING: cannot use --write-setlist with LOVO/LODO.\\n\";\n      params->write_setlist = false; valid_args[ \"write-setlist\" ] = false;\n    }\n\n    if( params->snp_set && !vm.count(\"set-list\") )\n      throw \"must specify set list (using --set-list).\";\n\n    if( params->snp_set && \n        (vm.count(\"extract-sets\")+vm.count(\"exclude-sets\")+vm.count(\"extract-setlist\")+vm.count(\"exclude-setlist\"))>1 \n      )\n      throw \"must use only one of --extract-sets/--exclude-sets/--extract-setlist/--exclude-setlist.\";\n    if( params->w_interaction && params->vc_test ){\n      sout << \"WARNING: skipping non-burden gene-based tests for GxG/GxE mode.\\n\";\n      params->vc_test = 0; params->apply_gene_pval_strategy = params->joint_test = false;\n      valid_args[ \"vc-tests\" ] = valid_args[ \"joint\" ] = valid_args[ \"sbat-adapt\" ] = valid_args[ \"rgc-gene-p\" ] = valid_args[ \"rgc-gene-def\" ] = valid_args[ \"vc-maxAAF\" ] = valid_args[ \"vc-MACthr\" ] = false;\n    }\n\n    if(!params->test_mode && params->setMinMAC){\n      sout << \"WARNING: option --minMAC only works in step 2 of REGENIE.\\n\";\n      params->setMinMAC = false; valid_args[ \"minMAC\" ] = false;\n    }\n    if(params->test_mode && params->min_MAC < 0.5)\n      throw \"minimum MAC must be at least 0.5.\";\n    if(!params->test_mode && params->setMinINFO){\n      sout << \"WARNING: option --minINFO only works in step 2 of REGENIE.\\n\";\n      params->setMinINFO = false; valid_args[ \"minINFO\" ] = false;\n    }\n    if( !params->split_by_pheno && params->w_interaction){\n      sout << \"WARNING: option --no-split does not work for interaction tests.\\n\";\n      params->split_by_pheno = true; valid_args[ \"no-split\" ] = false;\n    }\n    if( !params->split_by_pheno && params->nnls_out_all){\n      sout << \"WARNING: option --no-split does not work with --sbat-verbose.\\n\";\n      params->split_by_pheno = true; valid_args[ \"no-split\" ] = false;\n    }\n    if( vm.count(\"no-split\") && vm.count(\"htp\")){\n      sout << \"WARNING: option --no-split cannot be used with --htp and will be ignored.\\n\";\n      valid_args[ \"no-split\" ] = false;\n    }\n    if(params->uncapped_pvals && !params->htp_out){\n      sout << \"WARNING: option --exact-p must be used with --htp.\\n\";\n      params->uncapped_pvals = false; valid_args[ \"exact-p\" ] = false;\n    }\n    if( (!params->test_mode || (params->trait_mode!=1) || params->htp_out || !params->split_by_pheno) && params->af_cc ) {\n      sout << \"WARNING: disabling option --af-cc (only for BTs in step 2 in native output format split by trait).\\n\";\n      params->af_cc = false; valid_args[ \"af-cc\" ] = false;\n    }\n    if(params->test_mode) check_build_code(params);\n    if(params->rm_snps && params->keep_snps )\n      sout << \"WARNING: only variants which satisfy both extract/exclude options will be kept.\\n\";\n\n    if(params->test_mode && (params->min_INFO < 0 || params->min_INFO > 1) )\n      throw \"minimum info score must be in [0,1].\";\n    if( params->rm_missing_qt && (params->strict_mode || params->trait_mode || !params->test_mode) ) params->rm_missing_qt = false;\n\n    if( !vm.count(\"bsize\") && !params->snp_set ) \n      throw \"must specify the block size using '--bsize'.\";\n    else if(vm.count(\"bsize\") && ( params->block_size < 1 ))\n      throw \"block size must be at least 1.\";\n    if(params->set_aaf && !params->build_mask) params->set_aaf = false;\n    if(params->run_l0_only && params->test_l0)\n      throw \"cannot use --test-l0 with --run-l0\";\n    if(params->test_l0 && params->print_block_betas) \n      throw \"cannot use --test-l0 with --print\";\n    if(params->test_l0 && (params->l0_pvals_file != \"\"))\n      throw \"--select-l0 must be specified without an argument\";\n    if(params->print_cov_betas && (params->w_interaction || params->blup_cov))\n      throw \"cannot use --print-cov-betas with interaction tests or --prs-cov\";\n    if(params->print_cov_betas && !params->test_mode)\n      throw \"can only use --print-cov-betas in step 2\";\n\n    // determine number of threads if not specified\n    if(params->threads < 1)\n      params->threads = std::max(1u, std::thread::hardware_concurrency() - 1); //may return 0 when not able to detect\n\n    // check parallel l0\n    if(params->test_mode && \n        (vm.count(\"split-l0\")||vm.count(\"run-l0\")||vm.count(\"run-l1\")) ) {\n      sout << \"WARNING: options --split-l0/--run-l0/--run-l1 only work in step 1.\\n\";\n      params->split_l0 = params->run_l0_only = params->run_l1_only = false;\n      valid_args[ \"split-l0\" ] = valid_args[ \"run-l0\" ] = valid_args[ \"run-l1\" ] = false;\n    } else if( vm.count(\"nb\") && \n        (vm.count(\"split-l0\")||vm.count(\"run-l0\")||vm.count(\"run-l1\")) ) {\n      sout << \"WARNING: options --split-l0/--run-l0/--run-l1 cannot be used with --nb.\\n\";\n      params->split_l0 = params->run_l0_only = params->run_l1_only = false;\n      valid_args[ \"split-l0\" ] = valid_args[ \"run-l0\" ] = valid_args[ \"run-l1\" ] = false;\n    }\n    if(params->test_l0 && \n        (vm.count(\"split-l0\")||vm.count(\"run-l0\")||vm.count(\"run-l1\")) ) \n      throw \"cannot use --test-l0 with --split-l0/--run-l0/--run-l1\";\n    if( vm.count(\"run-l0\") || vm.count(\"run-l1\") ) \n      check_file(files->split_file, \"run-l0/l1\");\n\n    // set Firth as default if both Firth and SPA are specified\n    if(params->use_SPA && params->firth) {\n      sout << \"WARNING: only one of --firth/--spa can be used. Only Firth will be used.\\n\";\n      params->use_SPA = false; valid_args[ \"spa\" ] = false;\n    }\n    params->mk_snp_map = params->rm_snps || params->keep_snps || params->rm_or || params->keep_or || params->snp_set || params->getCorMat || (params->forced_MAC > 0);\n    params->keep_snp_map = params->rm_or || params->keep_or || params->snp_set || params->getCorMat || (params->forced_MAC > 0);\n\n    // check fallback pvalue threshold\n    if((params->alpha_pvalue < params->nl_dbl_dmin) || (params->alpha_pvalue > 1 - params->numtol) )\n      throw \"Fallback p-value threshold must be in (0,1).\";\n    if(params->firth_approx && !params->firth) {\n      params->firth_approx = false; valid_args[ \"approx\" ] = false;\n    }\n    if( params->skip_cf_burden && !(params->use_SPA || params->firth) ) {\n      params->skip_cf_burden = false; valid_args[ \"skip-cf-burden\" ] = false;\n    }\n\n    // check arguments for logistic regression \n    if(params->trait_mode && (params->niter_max < 1))\n      throw \"invalid argument for --niter (must be positive integer).\";\n    if(params->firth && (params->maxstep_null < 1))\n      throw \"invalid argument for --maxstep-null (must be a positive integer).\";\n    if(params->firth && (params->niter_max_firth_null < 1))\n      throw \"invalid argument for --maxiter-null (must be a positive integer).\";\n    if(params->nChrom < 2)\n      throw \"invalid argument for --nauto (must be > 1).\";\n    if(params->set_range && (params->range_chr == -1))\n      throw \"unrecognized chromosome in --range.\";\n    if(params->rm_indivs && params->keep_indivs )\n      throw \"cannot use both --keep and --remove.\";\n    if(params->rm_or && params->keep_or )\n      throw \"cannot use both --extract-or and --exclude-or.\";\n    if(params->condition_file && !params->condition_snps )\n      throw \"must use --condition-list if using --condition-file.\";\n    if(params->interaction_file && !params->interaction_snp )\n      throw \"must use --interaction-snp if using --interaction-file.\";\n    if( !vm.count(\"covarFile\") && vm.count(\"interaction\") && !vm.count(\"interaction-snp\") )\n      throw \"must use --covarFile if using --interaction.\";\n\n    if( params->test_mode && params->select_chrs && in_map(-1, filters->chrKeep_test) )\n      throw \"invalid chromosome specified by --chr/--chrList.\";\n\n    if(params->test_mode && !params->skip_blups && !vm.count(\"pred\")) \n      throw \"must specify --pred if using --step 2 (otherwise use --ignore-pred).\";\n    if(vm.count(\"interaction\") || vm.count(\"interaction-snp\") || vm.count(\"interaction-prs\")){\n      if(!vm.count(\"interaction-snp\") && !vm.count(\"interaction-prs\") && (!vm.count(\"covarFile\") || !params->test_mode) )\n        throw \"can only use --interaction with --covarFile in step 2.\";\n      if( (vm.count(\"interaction\") + vm.count(\"interaction-prs\") + vm.count(\"interaction-snp\")) > 1 ) \n        throw \"must only specify single interacting variable\";\n      if(params->use_SPA)\n        throw \"cannot use --interaction with SPA test.\";\n      if(vm.count(\"interaction-snp\") && vm.count(\"use-prs\"))\n        throw \"cannot use --interaction-snp with full PRS.\";\n      if(vm.count(\"firth\") && !vm.count(\"approx\")){\n        sout << \"WARNING: using approximate Firth for association testing.\\n\";\n        params->firth_approx = true;\n      }\n    }\n    if(params->skip_test && params->vc_test)\n      throw \"cannot use '--skip-test' with SKAT/SKATO/ACATO\\n\";\n    if(params->vc_test && params->firth && !params->firth_approx){\n      sout << \"WARNING: Using approximate Firth for association testing.\\n\";\n      params->firth_approx = true;\n    }\n    if((params->skato_rho.size() > 0) && (params->skato_rho <0 || params->skato_rho >1).any())\n      throw \"rho values for SKAT-O must be in [0,1]\";\n    if(params->singleton_carriers && params->aaf_file_wSingletons){\n      sout << \"WARNING: Ignoring option --singleton-carrier when using --set-singletons.\\n\";\n      params->singleton_carriers = false; valid_args[ \"singleton-carrier\" ] = false;\n    }\n    if( (vm.count(\"t2e\") + vm.count(\"bt\") + vm.count(\"qt\")) > 1)\n      throw \"must use only one of --qt/--bt/--t2e\";\n    if(params->use_loocv && (params->trait_mode == 3)) {\n      sout << \"WARNING: option --loocv cannot be used with option --t2e.\\n\" ;\n      params->use_loocv = false; valid_args[ \"loocv\" ] = false;\n    }\n    if(params->htp_use_eventname && !(params->htp_out && (params->trait_mode == 3) && params->test_mode)) {\n      sout << \"WARNING: option --htp-with-event only works with --t2e in step 2.\\n\" ;\n      params->htp_use_eventname = false; valid_args[ \"htp-with-event\" ] = false;\n    }\n\n    //params->use_max_bsize = params->mask_loo;\n    if( (params->trait_mode==2) && params->w_interaction)\n      throw \"cannot use interaction tests with count phenotypes.\";\n    if( params->interaction_prs && !(vm.count(\"use-prs\") || vm.count(\"pred\")) )\n      throw \"must supply step 1 predictions.\";\n\n    if(vm.count(\"force-ltco\") && vm.count(\"use-prs\"))\n      throw \"cannot use LTCO with full PRS.\";\n    if(vm.count(\"use-null-firth\") && !params->firth_approx) \n      throw \"option --use-null-firth only wors with approximate Firth test.\";\n    if(vm.count(\"write-null-firth\") && \n        ( (params->test_mode && !params->firth_approx) || (!params->test_mode && (params->trait_mode!=1)) ) ) {\n      sout << \"WARNING: option --write-null-firth only works for BTs with approximate Firth test.\\n\";\n      params->write_null_firth = false; valid_args[ \"write-null-firth\" ] = false;\n    }\n    if( (filters->cov_colKeep_names.size() > 0) && !vm.count(\"covarFile\") )\n      throw \"you specified covariates without specifying a covariate file (using --covarFile).\";\n\n    if(params->transposedPheno){\n      if(vm.count(\"phenoFile\") ) \n        throw \"cannot use both --phenoFile and --tpheno-file.\";\n      if(!vm.count(\"tpheno-indexCol\") ) \n        throw \"must specify --tpheno-indexCol with --tpheno-file.\";\n      if(vm.count(\"iid-only\"))\n        params->tpheno_iid_only = true;\n    }\n    if(vm.count(\"starting-block\")){\n      if(!params->test_mode)\n        throw \"option --starting-block only works in step 2\";\n      else if(params->start_block<1)\n        throw \"starting block must be >=1\";\n      if(vm.count(\"nb\")) params->n_block += params->start_block - 1;\n    }\n    if(vm.count(\"sex-specific\") && (params->file_type == \"bgen\") && !params->bgenSample)\n      throw \"must specifying sample file using --sample for sex-specific analyses\";\n    if(params->blup_cov && (!(params->use_prs || !params->skip_blups) || !params->test_mode || (params->firth && !params->firth_approx) )){\n      params->blup_cov = false; valid_args[ \"prs-cov\" ] = false;\n    }\n\n    if(params->test_mode && (params->file_type == \"pgen\") && !params->fastMode)\n      throw \"cannot use --nostream with PGEN format.\";\n\n    // check apply-rint options\n    if(params->rerint & params->rerintcov)\n      throw \"must select one of the two options, --apply-rerint or --apply-rerint-cov\";\n\n    // check multi-trait settings\n    if(params->trait_set) {\n      if(!params->strict_mode) \n        throw \"--strict mode is required for multi-trait tests\";\n      if(params->split_by_pheno) \n        throw \"--no-split mode is required for multi-trait tests\";\n    }\n\n    if(params->mcc_skew < 0) {\n        throw \"absolute phenotypic skewness must be positive\";\n    }\n    if(params->mcc_skew > 0) {\n      if(!params->mcc_test) {\n        throw \"--mcc must be on when specifying absolute phenotypic skewness\";\n      }\n    }\n    if(params->mcc_test) {\n      // convert mcc thr. from raw to -log10 scale\n      if((params->mcc_thr > 1) && (params->mcc_thr <= 0))\n        throw \"--mcc-thr must be in (0; 1]\";\n      if(params->mcc_thr < 1) \n        params->mcc_apply_thr = true;\n      params->mcc_thr_nlog10 = -log10(params->mcc_thr); // -log10 transformation\n      // debug\n      /* cout << \"mcc_test = \" << params->mcc_test << \" | mcc_apply_thr = \" << params->mcc_apply_thr << \" | mcc_thr  = \" << params->mcc_thr << \" | mcc_thr_nlog10 = \" << params->mcc_thr_nlog10 << \" | mcc_skew = \" << params->mcc_skew << endl; */\n    }\n\n    // check MultiPhen-trait settings\n    if(params->multiphen) {\n      if(!params->strict_mode) throw \"--strict mode is required for MultiPhen test\";\n      /* if(params->split_by_pheno) throw \"--no-split mode is required for MultiPhen test\"; */\n      if((params->multiphen_thr > 1) && (params->multiphen_thr <= 0)) throw \"--multiphen-thr must be in (0; 1]\";\n      params->n_tests_multitrait = 1; // a single test = MultiPhen\n      params->split_by_multitrait = false; // no split of output files\n    }\n\n    // check input files\n    if(params->file_type == \"bgen\") {\n      check_file (files->bgen_file, \"bgen\"); \n      if(params->bgenSample) check_file (files->sample_file, \"sample\"); \n      if(files->bgi_file != \"\") {\n        check_file (files->bgi_file, \"bgi\");\n        params->with_bgi = true;\n      } else {\n        files->bgi_file = files->bgen_file + \".bgi\";\n        params->with_bgi = file_exists (files->bgi_file) ;\n      }\n    }\n    if(vm.count(\"covarFile\")) check_file(files->cov_file,\"covarFile\");\n    if(!params->getCorMat) check_file(files->pheno_file,\"phenoFile\"); \n    if(params->file_type == \"bed\"){\n      vector<string> suffs = {\".bed\",\".bim\",\".fam\"};\n      check_file(files->bed_prefix, suffs, \"bed\");\n    }\n    if(params->file_type == \"pgen\"){\n      vector<string> suffs = {\".pgen\",\".pvar\",\".psam\"};\n      check_file(files->pgen_prefix, suffs, \"pgen\");\n    }\n    if(params->keep_indivs)\n      for(auto cn : files->file_ind_include)\n        check_file(cn, \"keep\");\n    if(params->rm_indivs)\n      for(auto cn : files->file_ind_exclude)\n        check_file(cn, \"remove\");\n    if(!vm.count(\"run-l0\") && params->keep_snps)\n      for(auto cn : files->file_snps_include)\n        check_file(cn, \"extract\");\n    if(params->rm_snps)\n      for(auto cn : files->file_snps_exclude)\n        check_file(cn, \"exclude\");\n    if(params->keep_or)\n      for(auto cn : files->file_snps_include_or)\n        check_file(cn, \"extract-or\");\n    if(params->rm_or)\n      for(auto cn : files->file_snps_exclude_or)\n      check_file(cn, \"exclude-or\");\n    if(params->snp_set) {\n      check_file(files->set_file, \"set-list\");\n      if(!vm.count(\"extract-setlist\") && params->keep_sets)\n        for(auto cn : files->file_sets_include)\n          check_file(cn, \"extract-sets\");\n      if(!vm.count(\"exclude-setlist\") && params->rm_sets)\n        for(auto cn : files->file_sets_exclude)\n          check_file(cn, \"exclude-sets\");\n    }\n    if(params->forced_MAC > 0) check_file(params->forced_MAC_snpfile, \"force-mac-filter\");\n    if(params->select_l0 && !params->test_l0)\n      check_file(params->l0_pvals_file, \"select-l0\");\n    if(vm.count(\"ld-extract\"))\n      check_file(params->ld_list_file, \"ld-extract\");\n    if(params->build_mask){\n      check_file(files->anno_file, \"anno-file\");\n      check_file(files->mask_file, \"mask-def\");\n      if(vm.count(\"anno-labels\")) check_file(files->anno_labs_file, \"anno-labels\");\n    }\n    if(params->set_aaf) check_file(files->aaf_file, \"aaf-file\");\n    if(params->condition_snps) {\n      check_file(files->condition_snps_list, \"condition-list\");\n      if(params->condition_file && (files->condition_snps_info.format == \"bgen\")) {\n        check_file(files->condition_snps_info.file, \"condition-file\");\n        // optional sample file?\n        files->condition_snps_info.with_sample = files->condition_snps_info.sample != \"\";\n        if(files->condition_snps_info.with_sample) check_file(files->condition_snps_info.sample, \"condition-file-sample\");\n        files->condition_snps_info.with_bgi = file_exists (files->condition_snps_info.file + \".bgi\") ;\n      } else if(params->condition_file && (files->condition_snps_info.format == \"bed\")) {\n        vector<string> suffs = {\".bed\",\".bim\",\".fam\"};\n        check_file(files->condition_snps_info.file, suffs, \"condition-file\");\n      } else if(params->condition_file && (files->condition_snps_info.format == \"pgen\")) {\n        vector<string> suffs = {\".pgen\",\".pvar\",\".psam\"};\n        check_file(files->condition_snps_info.file, suffs, \"condition-file\");\n      }\n    }\n    if(params->interaction_snp && params->interaction_file) {\n      if(files->interaction_snp_info.format == \"bgen\") {\n        check_file(files->interaction_snp_info.file, \"interaction-file\");\n        // optional sample file?\n        files->interaction_snp_info.with_sample = files->interaction_snp_info.sample != \"\";\n        if(files->interaction_snp_info.with_sample) check_file(files->interaction_snp_info.sample, \"interaction-file-sample\");\n        files->interaction_snp_info.with_bgi = file_exists (files->interaction_snp_info.file + \".bgi\") ;\n      } else if(files->interaction_snp_info.format == \"bed\") {\n        vector<string> suffs = {\".bed\",\".bim\",\".fam\"};\n        check_file(files->interaction_snp_info.file, suffs, \"interaction-file\");\n      } else if(files->interaction_snp_info.format == \"pgen\") {\n        vector<string> suffs = {\".pgen\",\".pvar\",\".psam\"};\n        check_file(files->interaction_snp_info.file, suffs, \"interaction-file\");\n      }\n    }\n    if(vm.count(\"lovo-snplist\")) check_file(params->masks_loo_snpfile, \"lovo-snplist\");\n\n    if(vm.count(\"remeta-save-ld\") > 0) {\n  #ifndef WITH_HTSLIB\n    throw \"--remeta-save-ld option requires compilation with HTSlib\";\n  #else\n      params->remeta_save_ld = true;\n      if(vm.count(\"remeta-ld-spr\") > 0) {\n        params->remeta_ld_spr = vm[\"remeta-ld-spr\"].as<double>();\n      }\n      if(params->skat_collapse_MAC > 0) {\n        throw \"--remeta-save-ld option requires --vc-MACthr 0\";\n      }\n  #endif\n    }\n\n    check_seed(params->rng_seed, vm.count(\"seed\"));\n    print_args(arguments, valid_args, sout);\n\n  } catch (const cxxopts::OptionException& e) {\n    if (sout.coss.is_open())\n      print_header(sout.coss);\n    print_header(cout);\n    sout << \"ERROR: \" << e.what() << endl << params->err_help << \"\\n\";\n    exit(EXIT_FAILURE);\n  } catch (const std::string& msg) {// after opening sout\n    sout <<  \"ERROR: \" <<  msg << \"\\n\" <<  params->err_help << \"\\n\";\n    exit(EXIT_FAILURE);\n  } catch (const char* msg) {// after opening sout\n    std::string str_msg = msg;\n    sout <<  \"ERROR: \" <<  str_msg << \"\\n\" <<  params->err_help << \"\\n\";\n    exit(EXIT_FAILURE);\n  }\n\n  return;\n}\n\nvoid check_file(string const& infile, string const& option_name){\n\n  if(infile == \"\") \n    throw \"Invalid argument (=' ') specified for option --\" + option_name;\n  else if(!file_exists (infile))\n    throw infile + \" doesn't exist for option --\" + option_name;\n\n}\n\nvoid check_file(string const& infile, vector<string> const& suffixes, string const& option_name){\n\n  if(infile == \"\") \n    throw \"Invalid file argument (=' ') specified for option --\" + option_name;\n  for(auto suffix : suffixes)\n    // allow for gzipped bim/fam/psam/pvar files\n    if(!file_exists (infile + suffix) && ((suffix == \".bed\") || (suffix == \".pgen\") || !file_exists(infile + suffix + \".gz\")))\n      throw infile + suffix + \" doesn't exist for option --\" + option_name;\n\n}\n\n\nvoid start_log(const string& out_file, MeasureTime* mt, mstream& sout){\n\n  string log_name = out_file + \".log\";\n  sout.coss.open(log_name.c_str(), ios::out | ios::trunc); \n  if (!sout.coss.is_open()) {\n    print_header(cout);\n    cout << \"ERROR: Cannot write log file '\" << log_name << \"'\\n\" ;\n    exit(EXIT_FAILURE);\n  } \n\n  mt->init();\n  sout << \"Start time: \" << ctime( &(mt->start_time_info) ) << endl; \n  print_header(sout.coss);\n  print_header(cout);\n  sout << \"Log of output saved in file : \" << log_name << endl<< endl;\n\n}\n\ntemplate <typename T> \nvoid print_args(T arguments, map<string,bool>& amap, mstream& sout){\n\n  // print options\n  sout << \"Options in effect:\\n\";\n\n  for(size_t counter = 0; counter < arguments.size(); counter++){\t  \n    if(!amap[arguments[counter].key()]) continue;\n\n    sout << \"  --\" << arguments[counter].key();\n    if(arguments[counter].value() != \"true\") sout << \" \" << arguments[counter].value(); \n\n    if(counter < (arguments.size() - 1)) sout << \" \\\\\";\n    sout << \"\\n\";\n  }\n\n  sout << \"\\n\";\n\n}\n\nArrayXd get_unit_params(bool const& incl_bound, string const& opt, vector<string> const& str_vec, struct param const* params, mstream& sout){\n\n  std::vector<double> vals;\n\n  for( size_t val = 0; val < str_vec.size(); val++)\n    vals.push_back(convertDouble( str_vec[val], params, sout));\n  std::sort(vals.begin(), vals.end());\n  vals.erase( unique( vals.begin(), vals.end() ), vals.end() );\n\n  ArrayXd vvals = MapArXd( vals.data(), vals.size() ); \n  // check parameters\n  if( incl_bound && ((vvals<0) || (vvals>1)).any() )\n    throw \"must specify values for \" + opt + \" in [0,1].\";\n  else if( !incl_bound && ((vvals<=0) || (vvals>=1)).any() )\n    throw \"must specify values for \" + opt + \" in (0,1).\";\n\n  return vvals;\n\n}\n\nvoid set_ridge_params(int const& nparams, ArrayXd& vec, mstream& sout){\n\n  if(nparams < 2)\n    throw \"number of ridge parameters must be at least 2 (=\" + to_string( nparams ) + \")\";\n\n  // endpoints are 0.01 and 0.99 \n  double step = 1.0 / ( nparams - 1 );\n  vec = ArrayXd::LinSpaced(nparams, 0, nparams-1) * step;\n  vec.head(1) = 0.01;\n  vec.tail(1) = 0.99;\n\n}\n\nvoid print_usage_info(struct param const* params, struct in_files* files, mstream& sout){\n\n  double total_ram;\n  string ram_unit;\n\n  ///// Memory usage\n  if(!params->test_mode){\n    // Step 1\n    // 4P + max( B + PRT, PRT) + #chrs [P:#traits;R=#ridge l0;T=#predictions from l0]\n    int t_eff = ( params->write_l0_pred ? 1 : params->total_n_block );\n    int p_eff = ( params->write_l0_pred ? 1 : params->n_pheno );\n    int b_eff = params->total_n_block;\n\n    total_ram = 4 * params->n_pheno + params->nChrom + params->ncov;\n    total_ram += std::max( params->block_size + params->n_pheno * params->n_ridge_l0 * t_eff, p_eff * params->n_ridge_l0 * b_eff );\n  } else {\n    // Step 2\n    // 3P + B\n    total_ram = params->n_pheno * 3 + params->block_size + params->ncov * 2; // y, mask, y_resid, g, X, X getbasis projection\n    if(params->trait_mode) {\n      total_ram += 3 * params->n_pheno + params->block_size + params->n_pheno * params->ncov; // y_raw, gamma_hat, gamma_hat_mask, g_resid\n      if(params->use_SPA) total_ram += 0.5 * params->block_size; // non_zero_indices of g (4 bytes)\n      if(params->firth_approx) total_ram += params->n_pheno; // cov offset\n      if(params->start_block > params->total_n_block)\n        throw \"Starting block > number of blocks analyzed\";\n    } else total_ram += params->block_size; // for Gresid\n    if((params->file_type == \"bed\") && params->fastMode) total_ram += params->block_size/4.0/sizeof(double); //for extracting snp_data_block\n    if(params->use_max_bsize) total_ram += params->block_size; // loo masks\n    // for Hmat (G_E, G, G*E )\n    if(params->w_interaction) \n      total_ram += params->threads * ((params->gwas_condtl ? 1 : 2) * params->ncov_interaction + 1); \n  }\n\n  total_ram *= params->n_samples * sizeof(double);\n  total_ram += params->nvs_stored * sizeof(struct snp);\n  if( params->getCorMat ){ // M^2 (x2 with txt output) + 3NB\n      total_ram += (params->cor_out_txt && (params->ld_sparse_thr == 0) ? 2 : 1) * params->extract_vars_order.size() * params->extract_vars_order.size() * sizeof(double);\n      total_ram += params->n_samples * params->block_size * sizeof(double);\n  }\n  if( params->use_loocv ) total_ram += params->chunk_mb * 1e6; // max amount of memory used for LOO computations involved\n  if( params->mask_loo ) total_ram += 1e9; // at most 1GB\n  if( params->vc_test ) total_ram += 2 * params->max_bsize * params->max_bsize * sizeof(double); // MxM matrices\n  total_ram /= 1000.0 * 1000.0; \n  if( total_ram > 1000 ) {\n    total_ram /= 1000.0; \n    ram_unit = \"GB\";\n  } else ram_unit = \"MB\";\n\n  int ram_int = (int) ceil( total_ram );\n  sout << \" * approximate memory usage : \" << ram_int << ram_unit << endl;\n\n  ///// Disk space usage\n  if(!params->test_mode && !params->run_l1_only && params->write_l0_pred){\n    if(files->loco_tmp_prefix.empty()) files->loco_tmp_prefix = files->out_file;\n    sout << \" * writing level 0 predictions to disk\" << endl;\n    sout << \"   -\" << (params->rm_l0_pred ? \"temporary \" : \"\") << \"files will have prefix [\" << files->loco_tmp_prefix << \"_l0_Y]\" << endl;\n    // N*P*T*R\n    int b_eff = params->total_n_block;\n    total_ram = params->n_pheno * b_eff * params->n_ridge_l0;\n    total_ram *= params->n_samples * sizeof(double);\n    total_ram /= 1024.0 * 1024.0; \n    if( total_ram > 1000 ) {\n      total_ram /= 1024.0; \n      ram_unit = \"GB\";\n    } else ram_unit = \"MB\";\n    int ram_int = (int) ceil( total_ram );\n    sout << \"   -approximate disk space needed : \" << ram_int << ram_unit << endl;\n  }\n\n  if(params->debug)\n    sout << \" * rng seed : \" << params->rng_seed << \"\\n\";\n}\n\nint chrStrToInt(const string& chrom, const int& nChrom) {\n\n  // if label is chr1, chr2,...\n  string s_chr = std::regex_replace(chrom, std::regex(R\"(^chr)\"), \"\");\n\n  if (isdigit(s_chr[0])) {\n    int chr = atoi(s_chr.c_str());\n    if((chr >= 1) && (chr <= nChrom)) return chr;\n  } else if ( (s_chr == \"X\") || (s_chr == \"XY\") || (s_chr == \"Y\") || (s_chr == \"PAR1\") || (s_chr == \"PAR2\") ) return nChrom;\n\n  return -1;\n}\n\nvector<string> check_name(string const& str, mstream& sout){\n\n  int imin, imax;\n  size_t pos_start = 0, pos_end; \n  string name, pref, suf, strerror;\n  strerror = \"invalid string expansion (=\" + str + \").\\n\";\n  vector<string> strout;\n\n  if(str.size() == 0) return strout;\n\n  pos_end = str.find(\"{\"); \n  if(pos_end == std::string::npos) {\n    strout.push_back(str); return strout;\n  }\n\n  try {\n    // prefix if present\n    name = str.substr (pos_start, pos_end - pos_start);\n    pref = name;\n\n    // find :\n    pos_start = pos_end + 1, pos_end = str.find(\":\"); \n    if(pos_end == std::string::npos) throw strerror;\n    name = str.substr (pos_start, pos_end - pos_start);\n    imin = stoi( name );\n\n    // find }\n    pos_start = pos_end+1, pos_end = str.find(\"}\"); \n    if(pos_end == std::string::npos) throw strerror;\n    name = str.substr (pos_start, pos_end - pos_start);\n    imax = stoi( name );\n\n  } catch (const std::invalid_argument& ia){ \n    throw strerror ;\n  } \n\n  // suffix is present\n  suf = str.substr (pos_end+1, std::string::npos);\n\n  for(int j = imin; j <= imax; j++){\n    name = pref + to_string(j) + suf;\n    strout.push_back(name);\n  }\n\n  return strout;\n}\n\nvoid check_build_code(struct param* params){\n  vector<string> valid_codes = { \"b36\", \"b37\", \"b38\", \"hg18\", \"hg19\", \"hg38\"};\n\n  if (std::find(valid_codes.begin(), valid_codes.end(), params->build_code) == valid_codes.end()){ // format: <end_par1>,<start_par2>\n    int min_npar, max_npar;\n    if((sscanf( params->build_code.c_str(), \"%d,%d\", &min_npar, &max_npar ) != 2) || (min_npar < 1) || (max_npar < 1) || (max_npar < min_npar)) \n      throw \"invalid build code given (valid ones are '\" + print_sv(valid_codes, \"|\") + \"' or [start,end] position of the non-par region)\"; \n    params->par1_max_bound = min_npar - 1;\n    params->par2_min_bound = max_npar + 1;\n\n  } else if((params->build_code == \"b36\") || (params->build_code == \"hg18\")){\n    params->par1_max_bound = 2709520, params->par2_min_bound = 154584238;\n  } else if ((params->build_code == \"b37\") || (params->build_code == \"hg19\")){\n    params->par1_max_bound = 2699520, params->par2_min_bound = 154931044;\n  } else{\n    params->par1_max_bound = 2781479, params->par2_min_bound = 155701383;\n  }\n\n}\n\ndouble convertDouble(const string& val, struct param const* params, mstream& sout){\n\n  if(val == params->missing_pheno_str)\n    return params->missing_value_double;\n  else if( (val == \"nan\") || (val == \"inf\") )\n    return params->missing_value_double;\n\n  double dval;\n  if(sscanf(val.c_str(), \"%lf\", &dval) != 1)\n    throw \"could not convert value to double: '\" + val + \"'\";\n\n  return dval;\n}\n\nfloat convertFloat(const string& val, struct param const* params, mstream& sout){\n\n  if(val == params->missing_pheno_str)\n    return params->missing_value_float;\n  else if( (val == \"nan\") || (val == \"inf\") )\n    return params->missing_value_float;\n\n  float dval;\n  if(sscanf(val.c_str(), \"%f\", &dval) != 1)\n    throw \"could not convert value to float: '\" + val + \"'\";\n\n  return dval;\n}\n\nstring convert_double_to_str(double const& val){\n  char val_str[256];\n  if ((val < 5000) && (val > 1e-5))\n    sprintf(val_str, \"%.6f\", val);\n  else\n    sprintf(val_str, \"%g\", val);\n  return( string(val_str) );\n}\n\nstring convert_logp_raw(double const& logp, double const& log_dbl_min){\n\n  char pval_str[256];\n\n  if(logp <= 3) {\n    sprintf(pval_str, \"%f\", pow(10, -logp));\n  } else if(logp <= log_dbl_min) {\n    sprintf(pval_str, \"%g\", pow(10, -logp));\n  } else {\n    double thr = log(9.95)/log(10);\n    int base = ceil(logp);\n    double res = base - logp;\n    if(res>=thr) {res = 0; base++;}\n    sprintf(pval_str, \"%.1fe-%d\", pow(10, res), base);\n  }\n\n  return( string(pval_str) );\n}\n\n// convert to numerical category using map\ndouble convertNumLevel(const string& val, std::map<std::string,int>& cmap, struct param const* params, mstream& sout){\n\n  if(val == params->missing_pheno_str)\n    return params->missing_value_double;\n  else if( (val == \"nan\") || (val == \"inf\") )\n    return params->missing_value_double;\n\n  if(in_map(val, cmap)) \n    return cmap[val];\n\n  // add to map\n  int newcat = cmap.size();\n  cmap[val] = newcat;\n\n  return newcat;\n}\n\n// for strings with format: str[lvl]\nvoid check_inter_var(std::string& str, std::string& lvl, mstream& sout){\n\n  string name;\n  size_t pos_start = 0, pos_end; \n\n  // check if contains \"[\"\n  pos_end = str.find(\"[\"); \n  if(pos_end == std::string::npos) \n    return ;\n  name = str.substr (pos_start, pos_end - pos_start);\n\n  // find \"]\"\n  pos_start = pos_end + 1, pos_end = str.find(\"]\"); \n  if(pos_end == std::string::npos) \n    throw \"ERROR: Invalid string :\" + str ;\n\n  lvl = str.substr (pos_start, pos_end - pos_start);\n\n  str = name;\n}\n\n// comma separated strings\nstd::string print_csv(const vector<string>& vlist){\n  return print_sv(vlist, \",\");\n}\n\n// semi-colon separated strings\nstd::string print_scsv(const vector<string>& vlist){\n  return print_sv(vlist, \";\");\n}\n\ntemplate <typename T>\nstd::string print_sv(const std::vector<T>& vlist, const string& delim)\n{\n  std::ostringstream buffer;\n  if(!vlist.empty()) {\n    std::copy(std::begin(vlist), std::end(vlist) - 1, std::ostream_iterator<T>(buffer, delim.c_str()));\n    buffer << vlist.back(); // last element\n  }\n  return buffer.str();\n}\n\nvoid removeCarriageReturn(std::string& str) {\n  if (!str.empty() && str.back() == '\\r') {\n    str.pop_back();\n  }\n}\n\nEigen::ArrayXi get_true_indices(const Ref<const ArrayXb>&  bool_arr){\n\n  ArrayXi v_indices ( bool_arr.count() );\n  for(int i = 0, j = 0; i < bool_arr.size(); i++)\n    if(bool_arr(i)) v_indices(j++) = i;\n\n  return v_indices;\n}\n\nvoid get_both_indices(std::vector<Eigen::ArrayXi>& res, const Eigen::Ref<const ArrayXb>& bool_arr){\n\n  res.resize(2);\n  int Ntot = bool_arr.size();\n  res[0].resize(bool_arr.count()); // true entries\n  res[1].resize(Ntot - res[0].size()); // false entries\n  for(int i = 0, j_t = 0, j_f = 0; i < Ntot; i++) {\n    if(bool_arr(i)) res[0](j_t++) = i;\n    else res[1](j_f++) = i;\n  }\n\n}\nvoid get_both_indices(std::vector<Eigen::ArrayXi>& res, const Eigen::Ref<const ArrayXb>& bool_arr, const Eigen::Ref<const ArrayXb>& mask){\n\n  res.resize(2);\n  int Ntot = mask.count();\n  res[0].resize((mask && bool_arr).count()); // true entries\n  res[1].resize(Ntot - res[0].size()); // false entries\n  for(int i = 0, j_t = 0, j_f = 0; i < bool_arr.size(); i++) {\n    if(mask(i)){\n      if(bool_arr(i)) res[0](j_t++) = i;\n      else res[1](j_f++) = i;\n    }\n  }\n\n}\n\nbool is_nan(double const& val){\n  return ((boost::math::isnan)(val) || !(boost::math::isnormal)(val));\n}\n\n// get logp from t-test\nvoid get_logp_ttest(double& logp, const double& tstat, const unsigned int& df)\n{\n  boost::math::students_t dist(df);\n  double pv = 2*boost::math::cdf(boost::math::complement(dist, std::abs(tstat)));\n  if(pv == 0) {\n    double logbeta = boost::math::lgamma(0.5*df) + boost::math::lgamma(0.5) - boost::math::lgamma(0.5*df + 0.5);\n    double df_tstat = df / tstat / tstat;\n    if (df_tstat<1) // approximation is not suitable when df is too large relative to tstat\n      logp = 0.5 * df * (log10(df) - 2 * log10(tstat) - log1p(df_tstat) / log(10)) - log10(0.5 * df) - logbeta / log(10);\n    else\n      logp = -330;\n    logp *= -1;\n  } else logp = -log10(pv);\n}\n\n// get logp from chisq(1)\nvoid get_logp(double& logp, const double& Tstat){\n\n  boost::math::chi_squared chisq1(1);\n\n  if( (Tstat < 0) && (fabs(Tstat) < 1e-6)){logp = 0; return;} // num err\n  else if(Tstat<0) {logp = -1; return;} // fail\n  double pv = cdf(complement(chisq1, Tstat));\n\n  if(pv == 0) logp = log10(2) - 0.5 * log10( 2 * M_PI * Tstat ) - 0.5 * Tstat * M_LOG10E ;\n  else logp = log10(pv);\n\n  logp *= -1;\n\n}\n\n// get logp & chisq1 from pv\nvoid get_logp(const double& pv, double& logp, double& Tstat, double const& dbl_dmin){\n\n  if((pv < 0) || (pv > 1)) { // fail\n    logp = -1; \n    Tstat = 0;\n    return;\n  }\n\n  boost::math::chi_squared chisq1(1);\n\n  double pval = max(dbl_dmin, pv); // to prevent underflow\n  Tstat = quantile(complement(chisq1, pval)); // chisq stat\n  logp = -log10(pval); // -log10p\n\n}\n\n// get logp from chisq(k)\nvoid get_logp(double& logp, const double& Tstat, double const& df){\n\n  boost::math::chi_squared chisqK(df);\n\n  if( (Tstat < 0) && (fabs(Tstat) < 1e-6)){logp = 0; return;} // num err\n  else if(Tstat<0) {logp = -1; return;} // fail\n  double pv = cdf(complement(chisqK, Tstat));\n\n  if(pv == 0) logp = log10(2) - 0.5 * df * log10(2) - boost::math::lgamma(df * 0.5) / log(10) + 0.5 * (df-2) * log10(Tstat) - 0.5 * Tstat * M_LOG10E ;\n  else logp = log10(pv);\n\n  logp *= -1;\n\n}\n\n// get chisq1 & pval from logp\nvoid get_chisq_stat_pv(double& pv, double& Tstat, double const& logp, double const& dbl_dmin, double const& log10_dbl_dmin){\n\n  if(logp<0) { // fail\n    pv = -1; \n    Tstat = 0;\n    return;\n  }\n\n  boost::math::chi_squared chisq1(1);\n\n  if(logp > log10_dbl_dmin){\n    double val = logp * log(100) + log(2/M_PI);\n    Tstat = val - log(val); // approximation for small p-values using Lambert W function\n    pv = dbl_dmin; // prevent underflow\n  } else {\n    pv = pow(10, -logp);\n    Tstat = quantile(complement(chisq1, pv)); // chisq stat\n  }\n\n}\n\nvoid allocate_mat(MatrixXd& M, int const& nrows, int const& ncols){\n  M.resize(nrows, ncols);\n}\n\nstd::string print_mat_dims(MatrixXd const& mat){\n  std::ostringstream buffer;\n  buffer << \"#rows=\" << mat.rows() << \" | #cols=\" <<  mat.cols();\n  return buffer.str();\n}\n\nint parseLine(char* line){\n    // This assumes that a digit will be found and the line ends in \" Kb\".\n    int i = strlen(line);\n    const char* p = line;\n    while (*p <'0' || *p > '9') p++;\n    line[i-3] = '\\0';\n    i = atoi(p);\n    return i;\n}\n\nvoid print_obj(const Ref<const MatrixXd>& mat, string const& fname){\n  // write obj to file\n  IOFormat Fmt(FullPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n  ofstream ofile;\n  ofile.open(fname);\n  ofile << mat.format(Fmt) << \"\\n\";\n  ofile.close();\n}\n\nint get_mem(){ // in MB\n    FILE* file = fopen(\"/proc/self/status\", \"r\");\n    double result = -1;\n    char line[128];\n\n    while (fgets(line, 128, file) != NULL){\n        if (strncmp(line, \"VmRSS:\", 6) == 0){\n            result = parseLine(line) / 1024.0;\n            break;\n        }\n    }\n    fclose(file);\n    return result;\n}\n\nstd::string print_mem(){\n  return \"memory usage=\" + to_string( get_mem() ) + \"MB\";\n}\n\nvoid set_threads(struct param* params) {\n\n#if defined(_OPENMP)\n  omp_set_num_threads(params->threads); // set threads in OpenMP\n  params->neff_threads = params->threads;\n#endif\n#if defined(WITH_MKL)\n  mkl_set_num_threads(params->threads);\n#endif\n  setNbThreads(params->threads);\n\n}\n\nvoid check_seed(uint& seed, bool const& skip_gen_seed) {\n  if(!skip_gen_seed) {\n    std::random_device rd;\n    seed = rd();\n  }\n  srand(seed);\n}\n"
  },
  {
    "path": "src/Regenie.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef REGENIE_H\n#define REGENIE_H\n\n\n#include <vector>\n#include <set>\n#include <string>\n#include <iostream>\n#include <algorithm>\n#include <iomanip>\n#include <random>\n#include <memory>\n#include <map>\n#include <fstream>\n#include <math.h>       /* exp */\n#include <stdio.h>\n#include <stdlib.h>\n#include <thread>\n#include <sys/types.h>\n#include <sys/stat.h>\n\n// if using external LAPACK routines\n#ifdef WITH_OPENBLAS\n// fix conflict between complex and older boost versions\n#include <complex>\n#define lapack_complex_float std::complex<float>\n#define lapack_complex_double std::complex<double>\n#include \"lapacke.h\"\n#elif defined(WITH_MKL)\n#include \"mkl.h\"\n#include \"mkl_lapacke.h\"\n#endif\n\n#ifdef WITH_HTSLIB\n#include \"remeta/regenie_ld_matrix_writer.hpp\"\n#endif\n\n#if defined(__GNUC__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmisleading-indentation\"\n#pragma GCC diagnostic ignored \"-Wint-in-bool-context\"\n#pragma GCC diagnostic ignored \"-Wparentheses\"\n#endif\n#include <boost/math/distributions.hpp>\n\n#include \"Eigen/Dense\"\n#include \"Eigen/StdVector\"\n#include <Eigen/SparseCore>\n\n#if defined(__GNUC__)\n#pragma GCC diagnostic pop\n#endif\n\n#ifdef __linux__\n#include <omp.h>\n#endif\n\n#define MAXFILELEN 2001\n\n#define BIT_SET(a,b) ((a) |= (1ULL<<(b)))\n#define BIT_UNSET(a,b) ((a) &= ~(1ULL << (b)))\n#define CHECK_BIT(a,b) ((a) & (1ULL<<(b)))\n\ntypedef unsigned char uchar;\ntypedef unsigned int uint;\ntypedef unsigned long long uint64;\ntypedef Eigen::Array<bool,Eigen::Dynamic,1> ArrayXb;\ntypedef Eigen::Matrix<bool,Eigen::Dynamic,1> VectorXb;\ntypedef Eigen::Matrix<bool,Eigen::Dynamic,Eigen::Dynamic> MatrixXb;\ntypedef Eigen::Map<Eigen::ArrayXd > MapArXd;\ntypedef Eigen::Map<const Eigen::ArrayXd > MapcArXd;\ntypedef Eigen::Map<Eigen::ArrayXf > MapArXf;\ntypedef Eigen::Map<const Eigen::ArrayXf > MapcArXf;\ntypedef Eigen::Map<Eigen::MatrixXd > MapMatXd;\ntypedef Eigen::Map<const Eigen::MatrixXd > MapcMatXd;\ntypedef Eigen::Map<Eigen::MatrixXf > MapMatXf;\ntypedef Eigen::Map<const Eigen::MatrixXf > MapcMatXf;\ntypedef Eigen::Map<ArrayXb> MapArXb;\ntypedef Eigen::Map<const ArrayXb> MapcArXb;\ntypedef Eigen::Array<uint16_t,Eigen::Dynamic,1> ArrayXt;\ntypedef Eigen::Array<uint64,Eigen::Dynamic,1> ArrayXui;\ntypedef Eigen::SparseVector<double> SpVec;\ntypedef Eigen::SparseMatrix<double> SpMat;\ntypedef Eigen::SparseVector<float> SpVecf;\ntypedef Eigen::SparseMatrix<float> SpMatf;\ntypedef Eigen::SparseMatrix<bool> SpMatb;\n\ninline bool file_exists (const std::string& name) {\n  struct stat buffer;   \n  return (stat (name.c_str(), &buffer) == 0); \n}\n\n// for the log file\nclass mstream\n{\n  public:\n    std::ofstream coss;\n\n    template <class S>\n      mstream& operator<< (const S& val)\n      {\n        coss << val;\n        std::cout << val;\n        return *this;\n      }\n\n    mstream& operator<< (std::ostream& (*pfun)(std::ostream&))\n    {\n      pfun(coss);\n      pfun(std::cout);\n      return *this;\n    };\n\n    mstream(void);\n    ~mstream(void);\n};\n\n\nclass MeasureTime {\n\n  public:\n    std::chrono::steady_clock::time_point begin, end;\n    std::chrono::high_resolution_clock::time_point ms_begin;\n    time_t start_time_info, end_time_info;\n\n    void init() {\n      auto start = std::chrono::system_clock::now(); // wall clock\n      start_time_info = std::chrono::system_clock::to_time_t( start ); \n      begin = std::chrono::steady_clock::now(); // to measure elapsed time\n    }\n\n    void stop(){\n      auto endtime = std::chrono::system_clock::now(); \n      end_time_info = std::chrono::system_clock::to_time_t( endtime ); \n      end = std::chrono::steady_clock::now();\n    }\n\n    void start_ms() {\n      ms_begin = std::chrono::high_resolution_clock::now(); // wall clock\n    }\n\n    std::string stop_ms(){\n      auto ms_end = std::chrono::high_resolution_clock::now(); // wall clock\n      auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ms_end - ms_begin);\n      std::ostringstream buffer;\n      buffer << \"done (\" << duration.count() << \"ms)\";\n      return buffer.str();\n    }\n\n    MeasureTime(void);\n    ~MeasureTime(void);\n};\n\n\nstruct param {\n\n  std::string err_help = \"For list of arguments, run with option --help\\n\"; // for checks\n  std::string webinfo = \"For more information, use option '--help' or visit the website: https://rgcgithub.github.io/regenie/\"; \n\n  //////\n  // global options\n  int run_mode; // running in null model fitting (=1) or association testing (=2)\n  bool test_mode = false; // step 1: false; step 2 = true\n  int trait_mode = 0; // 0=QT,1=BT,2=CT,3=T2E\n  bool strict_mode = false; // remove individuals with any NA\n  bool bgenSample = false; // .sample file for bgen file\n  bool gzOut = false; // to compress output files (.loco and .regenie files)\n  bool transposedPheno = false, tpheno_iid_only = false;\n  bool condition_snps = false, condition_file = false;\n  uint32_t max_condition_vars = 10000;\n  int sex_specific = 0; // 0 = all; 1 = male-only; 2=female-only\n\n  // filters \n  bool rm_indivs = false; // user specify to remove genotyped samples from analysis\n  bool keep_indivs = false; // user specify to keep only select genotyped samples in the analysis\n  bool keep_snps = false, keep_or = false; // user specify to keep select snps in analysis\n  bool rm_snps = false, rm_or = false; // user specify to remove snps from analysis\n  bool mk_snp_map = false, keep_snp_map = false;\n  bool select_phenos = false, select_phenos_rm = false, force_qt_run = false; // user specify which phenotype columns to use\n  bool select_covs = false, select_covs_rm = false, cat_cov = false, print_cov_betas = false; // user specify which covariate columns to use and if categorical covars present\n  int max_cat_levels = 10; // maximum number of categories of categorical covars\n  bool select_chrs = false; // user specify which chromosomes to test\n  std::vector<std::string> covar_names;\n  Eigen::MatrixXd cov_betas, xtx_inv_diag;\n  Eigen::ArrayXd cov_sds;\n\n  // other global options\n  const std::string missing_pheno_str = \"NA\";\n  const double missing_value_double = -999;\n  const float missing_value_float = -999;\n  int nChrom = 23; // total number of chromosome numbers (sex chromosomes collapsed in chr23)\n  bool CC_ZeroOne = true; // BT: 0/1 encoding?\n  int mcc = 10; // minimum case count\n  double numtol = 1e-6, qr_tol = 1e-7;\n  double numtol_cox = 2.5e-4; // tolerance level for cox\n  double numtol_cox_stephalf = 2.5e-4; // tolerance level for cox\n  double numtol_beta_cox = 1e-8; // tolerance level for cox\n  double numtol_firth = 2.5e-4; // tolerance level for firth\n  double numtol_eps = 10 * std::numeric_limits<double>::epsilon();\n  double tol = 1e-8; // for logistic regression\n  double eigen_val_rel_tol = 1e-15;\n  double const_cov_cox_tol = 1e-6;\n  double nl_dbl_dmin = 10.0 * std::numeric_limits<double>::min();\n  double log10_nl_dbl_dmin = -log10(nl_dbl_dmin);\n  int threads = 0, neff_threads = 1;\n  bool t2e_event_l0 = false;\n  bool t2e_l1_pi6 = false;\n  bool cox_nofirth = false;\n  bool coxscore_exact = false;\n  bool verbose = false, debug = false;\n  bool early_exit = false, l1_full_samples = false, rint = false, rerint = false, rerintcov = false;\n  bool split_l0 = false, run_l0_only = false, run_l1_only = false; // run level 0 in parallel across different jobs\n  std::map<std::string, bool> select_pheno_l1;\n  int njobs, job_num, parallel_nGeno, parallel_nBlocks, parallel_nSnps;\n  int start_block = 1;\n  bool use_adam = false, adam_mini = true; // use ADAM for log. reg.\n  double adam_alpha = 0.001, adam_beta1 = 0.9, adam_beta2 = 0.999, adam_eps = 1e-7, adam_batch_size = 128;\n  std::vector<Eigen::ArrayXi> adam_indices;\n  uint rng_seed;\n  std::mt19937_64* rng_rd;\n\n  // for input data\n  uint32_t n_samples = 0, n_analyzed = 0; // number of samples\n  int n_pheno = 0; // number of phenotypes\n  int n_cov = 0; // number of covariates\n  int ncov, ncov_analyzed, ncov_interaction; // number of linearly independent covariates\n  uint32_t n_variants = 0, nvs_stored = 0; // number of variants in genotype file\n  std::map <std::string, uint32_t> FID_IID_to_ind;\n  std::vector< std::vector<std::string> > FIDvec; // store FID/IID separately (for write-samples option)\n  bool with_bgi = false, zlib_compress; // input bgi index file for BGEN format and compression format\n  uint BGENbits = 0; // bit-encoding used in BGEN file\n  bool ref_first = false; // ordering of REF/ALT alleles in input genotype file\n  Eigen::ArrayXi sex; // 0=unknown, 1=male, 2=female\n  std::vector<Eigen::ArrayXd> bed_lookup_table; // plink bed lookup table\n  ArrayXb pheno_pass, pheno_fail_nullreg;\n  Eigen::MatrixXi pheno_counts; // track N for each pheno\n\n  // step 1 \n  int block_size = -1; // number of SNPs per block\n  int cv_folds = 5; // number of CV folds\n  int n_block = -1; // number of blocks to run\n  int total_n_block = 0; // number of blocks to run across all chrs\n  int n_ridge_l0 = 5; // number of ridge parameters at level 0\n  int n_ridge_l1 = 5; // number of ridge parameters at level 1\n  double alpha_prior = -1; // to set MAF dependent prior on the effect sizes\n  int chunk_mb = 1000; // max amount of memory to use with LOOCV\n  bool user_ridge_params_l0 = false, user_ridge_params_l1 = false; // if user specifies ridge parameters\n  bool use_loocv = false; // specify whether to use LOOCV [note: this is not used if method=1]\n  bool make_loco = true; // specify whether to compute & ouput LOCO predictions\n  bool print_prs = false; // specify to print PRS (i.e. no LOCO used)\n  bool write_blups = false; // write BLUP predictions for each chromosome\n  bool use_rel_path = false; // write relative paths in pred.list file\n  bool write_l0_pred = false; // specify whether to write level 0 predictions to file to save on RAM\n  bool rm_l0_pred = true; // specify whether to delete written level 0 predictions after level 1\n  bool print_block_betas = false, test_l0 = false, select_l0 = false; // print betas from level 0 within each block (for debugging)\n  double rm_l0_pct = 0;\n  std::string l0_pvals_file;\n  bool l0_event = false;\n  bool force_run = false; // if using more than max nvariants in step 1\n  int max_step1_variants = 1e6; // prevent users using too many step 1 variants\n  int niter_max_ridge = 100, niter_max_ridge_adam = 25; // max number of iterations for ridge logistic reg.\n  int niter_max_line_search_ridge = 100; // max number of iterations for line search in ridge logistic reg.\n  double l1_ridge_tol = 1e-4; // tolerance level for convergence criteria\n  double l1_ridge_eps = 1e-5; // epsilon used to set weights for 0/1 probabilities\n  double l0_snp_pval_thr = -1;\n  uint32_t print_snpcount = 0; \n  std::vector<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> >  beta_print_out;\n  Eigen::ArrayXd lambda; // ridge parameters at level 0\n  std::vector<Eigen::ArrayXd> tau; // ridge parameters at level 1\n  // TO REMOVE\n  bool within_sample_l0 = false; // specify to use within-sample predictions as features at level 1 (default is to use out-of-sample predictions)\n  Eigen::ArrayXi cv_sizes;\n\n\n  // step 2\n  bool rm_missing_qt = true; // remove missing individuals when performing test with QTs\n  std::string file_type; // type of the genotype file format;\n  bool streamBGEN = true; //  for BGEN v1.2 with 8-bit encoding\n  bool fastMode = true; // use fast version of step 2 \n  bool dosage_mode = false; // track if dosages are present for step 2\n  bool split_by_pheno = true; // specify whether to write testing result in separate phenotype files\n  bool skip_blups = false, blup_cov = false;\n  bool with_flip = true; // can flip to minor allele for all variants\n  bool use_prs = false; // adjust for whole genome PRS (no LOCO)\n  double min_MAC = 5, min_MAC_mask, minHOMs = 0, prop_zero_thr = 0.5; // minimum MAC of SNPs in testing mode\n  bool setMinMAC = false;\n  double min_INFO = 0; // minimum INFO score of SNPs (dosages) in testing mode\n  bool setMinINFO = false;\n  bool write_samples = false; // write sample IDs for each trait\n  double alpha_pvalue = 0.05, zcrit, z_thr, chisq_thr; // significance threshold above which to use firth correction\n  int test_type = 0; // add=0/dom=1/rec=2 test\n  bool w_interaction = false, interaction_cat = false, interaction_snp = false, interaction_prs = false, interaction_file = false, w_ltco = false, print_vcov = false, hlm_vquad = true, int_add_extra_term = false, int_add_esq = false, add_homdev = false; // interaction test\n  int interaction_istart = 0, ltco_chr;\n  uint64 interaction_snp_offset; // index in genotype file\n  bool force_robust = false, force_hc4 = false, no_robust = false; // when using robust SE for rare variants with QTs\n  double rareMAC_inter = 1000; // MAC below which to use HLM\n  int n_tests_per_variant = 1;\n  std::vector<std::string> interaction_lvl_names; // name of levels if using categorical variable for test\n  bool gwas_condtl = true;\n  std::string condtl_suff;\n  // spa\n  bool use_SPA = false; // saddlepoint approximation to estimate pvalue\n  int niter_max_spa = 1000; \n  double tol_spa = pow( std::numeric_limits<double>::epsilon(), 0.25);\n  // firth\n  bool firth = false;// firth correction using LRT\n  bool firth_approx = false, skip_fast_firth = false; // approx. to Firth LRT\n  bool write_null_firth = false, use_null_firth = false, compute_all_chr = false; // write/use null coefficients from approx. Firth\n  int niter_max = 50; // max number of iterations for logistic reg.\n  int niter_max_firth = 250, niter_max_firth_adam = 25; // max number of iterations in Firth logistic reg.\n  int niter_max_firth_null = 1000; // max number of iterations in Firth logistic reg. null model\n  int niter_max_line_search = 25; // max number of iterations for line search in logistic reg.\n  int maxstep = 5; // max step size in penalized logistic regression\n  int maxstep_null = 25; // max step size in null penalized logistic regression\n  bool fix_maxstep_null = false; // if user specifies max step size\n  bool back_correct_se = false; // for SE with Firth\n  // extra\n  bool print_pheno_name = false; // add phenotype name when writing to file with sample IDs\n  bool htp_out = false, htp_use_eventname = false, uncapped_pvals = false, af_cc = false, skip_cov_res = false; \n  std::string cohort_name; // Name of cohort to add in HTP output\n  bool set_range = false, skip_dosage_comp = false;\n  int range_chr; \n  double range_min, range_max; // use genomic region to filter variants\n  std::string build_code = \"hg38\"; // to identify chrX PAR region bounds\n  uint32_t par1_max_bound, par2_min_bound;\n  std::string forced_MAC_snpfile;\n  double forced_MAC = -1;\n\n  // for use with remeta\n  bool remeta_save_ld = false;\n  double remeta_ld_spr = 0.01;\n\n  // snp sets (masks/joint tests)\n  bool snp_set = false; \n  bool build_mask = false; \n  std::map<std::string, uint> vc_tests_map = { {\"acatv\", 0}, {\"skat\", 1}, {\"skato\", 2}, {\"skato-acat\", 3}, {\"acato\", 4} };\n  uint vc_test = 0;\n  bool apply_gene_pval_strategy = false;\n  std::string genep_mask_sets_file = \"\";\n  std::map<std::string, bool> mask_map;\n  double vc_maxAAF = 1; // max AAF for variants in SKAT/ACAT gene-based tests\n  bool w_anno_lab = false, vc_with_weights = false, vc_multiply_weights = false;\n  int vc_weight_col;\n  bool check_mask_files = false, strict_check_burden = false, fail_check = false;\n  bool skip_test = false; // skip computing tests\n  bool joint_test = false; // for step 2 joint testing\n  std::string burden = \"\"; // type of burden test;\n  uint max_set_size = 1000; // maximum number of predictors in joint test\n  bool set_select_list = false; // comma separated list of sets given\n  bool keep_sets = false, rm_sets = false; // user specify to filter sets in analysis\n  bool w_regions = false; // categorize by set regions \n  uint max_cat = 64, nmax_regions = 64; // maximum number of annotations (to fit in uint64)\n  std::vector<std::string> mbins; // temporary object to store aaf bins\n  bool mask_rule_max = true, mask_rule_comphet = false; // default use max to combine mask\n  std::string mask_rule = \"max\";\n  bool set_aaf = false;// for user-given AAFs for building masks\n  bool aaf_file_wSingletons = false;//for choosing snps in singleton masks\n  bool singleton_carriers = false; // carrier count used to define singletons\n  uint64 max_bsize = 0; // number of SNPs per variant set\n  bool write_masks = false, write_setlist = false, write_mask_snplist = false; //write masks to bed file\n  bool mask_loo = false, mask_lodo = false;\n  bool use_max_bsize = false; // set bsize to max set size\n  bool p_joint_only = false;\n  std::string mask_loo_name, mask_loo_set, mask_loo_region, masks_loo_snpfile; // for LOO with masks\n  double mask_loo_aaf;\n  bool nnls_out_all = false, nnls_adaptive = false, nnls_mt_weights = false;\n  int nnls_napprox = 10;\n  double acat_a1 = 1, acat_a2 = 25, skat_a1 = 1, skat_a2 = 25, skat_tol = 1e-5; // for ACAT & SKAT test\n  int skat_collapse_MAC = 10;\n  bool skip_cf_burden = false;\n  Eigen::ArrayXd skato_rho; // rho parameter from skat-o\n\n  // multi-trait tests \n  bool trait_set = false; \n  int n_tests_multitrait = 1; // number of tests per trait set\n  bool split_by_multitrait = false; // specify whether to write multi-trait testing result in separate files\n  // MCC test\n  bool mcc_test = false;\n  bool mcc_apply_thr = false;\n  double mcc_thr = 0.01; // significance threshold above which to use MCC\n  double mcc_thr_nlog10 = 2; \n  double mcc_skew = 0.0; // threshold value of absolute phenotypic skewness to activate MCC\n  // MultiPhen test\n  bool multiphen = false;\n  double multiphen_thr = 0.001; // significance p-value threshold below which to use LRT \n  std::string multiphen_test = \"nocov_score_offset\"; // type of MultiPhen test\n  std::string multiphen_optim = \"WeightHalvingPseudo\"; // type of MultiPhen optimization algo.\n  double multiphen_tol = 2.5e-4; // tolerance level for MultiPhen\n  bool multiphen_trace = false; // trace model fitting performance for MultiPhen\n  double multiphen_firth_mult = 1.0; // multiplier for the Firth penalty\n  int multiphen_verbose = 0; // multiphen verbose level\n  double multiphen_maxstep = 200.0; // max step\n  int multiphen_approx_offset = -1; // MAC when to not use offset for the full MultiPhen model \n  int multiphen_maxit = 150; // maximum number of IRLS iterations \n  int multiphen_maxit2 = 5; // maximum number of step-halving IRLS iterations \n  bool multiphen_strict = false; // strict mode for MultiPhen IRLS\n  double multiphen_pseudo_stophalf = 0.0; // stop step-halving in pseudo model fitting algo.\n  bool multiphen_reset_start = false; // reset start when convergence failure & reusing start\n  std::string multiphen_offset = \"offset_int\";\n  // small n correction for QTs\n  bool mse_full = false;\n  bool t_test = false;\n\n  // ld computation\n  bool getCorMat = false, cor_out_txt = false, cormat_force_vars = false, skip_scaleG = false;\n  int ld_n = 0;\n  double ld_sparse_thr = 0;\n  std::string ld_list_file = \"\";\n  std::vector<uint32_t> ld_sv_offsets;\n  std::map<std::string, uint32_t> extract_vars_order;//order of variants\n};\n\nstruct geno_file_info {\n  std::string file, format;\n  bool with_sample = false, with_bgi = false, ref_first = false;\n  std::string sample;\n};\n\n// for input files\nstruct in_files {\n\n  std::string bed_prefix;\n  std::string pgen_prefix;\n  std::string bgen_file, sample_file, bgi_file = \"\";\n  std::vector<std::string> file_ind_include, file_ind_exclude;\n  std::vector<std::string> file_snps_include, file_snps_exclude;\n  std::vector<std::string> file_snps_include_or, file_snps_exclude_or;\n  std::vector<std::string> file_sets_include, file_sets_exclude;\n  std::string sets_include, sets_exclude;\n  std::string cov_file, pheno_file;\n  std::string loco_tmp_prefix = \"\";\n  std::string split_file;\n  std::string out_file;\n  std::string blup_list_file;\n  std::string null_firth_file;\n  std::vector<std::shared_ptr<std::ofstream>> write_preds_files;\n  std::map<std::string, std::string> blup_files;\n  std::vector<std::string> null_firth_files;\n  std::vector<std::string> pheno_names;\n  std::map<std::string, std::string> t2e_map;\n  std::vector<int> chr_counts, chr_read;\n  uint64 bed_block_size; // prevent overflow\n  std::ifstream geno_ifstream;\n  std::vector<uchar> inbed;\n  std::vector<std::vector<uchar>> bed_data_blocks;\n  std::string set_file, new_sets;\n  std::string anno_file, anno_labs_file, mask_file, aaf_file;\n  std::vector<int> bstart, btot; // for parallel l0\n  std::vector<std::string> mprefix; // for parallel l0\n  std::string condition_snps_list; // for conditional analyses\n  geno_file_info condition_snps_info; \n  geno_file_info interaction_snp_info; \n\n};\n\nstruct filter {\n\n  // to filter phenotype/covariates/genotype\n  std::map<std::string, bool> pheno_colKeep_names, pheno_colRm_names, cov_colKeep_names, cov_colRm_names; //cov keep map: true for qVar, false for catVar\n  std::map<int, bool> tpheno_colrm;\n  uint32_t tpheno_indexCol;\n  std::string interaction_cov;\n  std::string interaction_cov_null_level;//if categorical for GxE / or coding for GxG\n  std::map <int, bool> chrKeep_test;\n  std::map <std::string, uint32_t> snpID_to_ind;\n  ArrayXb ind_ignore, has_missing, ind_in_analysis;\n  uint32_t step1_snp_count = 0;\n  std::map <std::string, std::vector<int>> setID_to_ind;//chr,index,is_kept\n  std::map <std::string, uint64> condition_snp_names;\n  std::vector<std::vector<Eigen::ArrayXi>> case_control_indices; //case-control indices across traits\n\n};\n\nstruct remeta_sumstat_writer {\n\n#ifdef WITH_HTSLIB\n  // one matrix per trait\n  std::vector<RegenieLDMatrixWriter> skat_matrix_writers;\n  // Placeholders for the list of variants in the SKAT matrix and\n  // the gene name for the SKAT matrix. These get updated as we scan\n  // through each gene.\n  std::vector<std::string> *skat_snplist;\n  std::string *gene_name;\n  double sparsity_threshold;\n#endif\n};\n\n\nvoid start_log(const std::string&,MeasureTime*,mstream&);\ntemplate <typename T> \nvoid print_args(T,std::map<std::string,bool>&,mstream&);\n\nvoid print_help(bool const&);\nvoid read_params_and_check(int& argc,char *argv[],struct param*,struct in_files*,struct filter*,MeasureTime*,mstream&);\nvoid check_file(std::string const&,std::string const&);\nvoid check_file(std::string const&,std::vector<std::string> const&,std::string const&);\nvoid print_header(std::ostream&);\nEigen::ArrayXd get_unit_params(bool const&,std::string const&,std::vector<std::string> const&,struct param const*,mstream&);\nvoid set_ridge_params(int const&,Eigen::ArrayXd&,mstream&);\nvoid print_usage_info(struct param const*,struct in_files*,mstream&);\nint chrStrToInt(const std::string&, const int&);\nstd::vector<std::string> check_name(std::string const&,mstream&);\nvoid check_build_code(struct param*);\ndouble convertDouble(const std::string&,struct param const*,mstream&);\nfloat convertFloat(const std::string&,struct param const*,mstream&);\nstd::string convert_double_to_str(double const&);\nstd::string convert_logp_raw(double const& logp, double const& log_dbl_min = -log10(std::numeric_limits<double>::min()) - 1);\ndouble convertNumLevel(const std::string&,std::map<std::string,int>&,struct param const*,mstream&);\nvoid check_inter_var(std::string&,std::string&,mstream&);\nstd::string print_csv(const std::vector<std::string>&);\nstd::string print_scsv(const std::vector<std::string>&);\ntemplate <typename T>\nstd::string print_sv(const std::vector<T>&,const std::string&);\nvoid removeCarriageReturn(std::string&);\nEigen::ArrayXi get_true_indices(const Eigen::Ref<const ArrayXb>&);\nvoid get_both_indices(std::vector<Eigen::ArrayXi>&,const Eigen::Ref<const ArrayXb>&);\nvoid get_both_indices(std::vector<Eigen::ArrayXi>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const ArrayXb>&);\nbool is_nan(double const&);\nvoid get_logp_ttest(double&,const double&,const unsigned int&);\nvoid get_logp(double&,const double&);\nvoid get_logp(const double&,double&,double&,const double&);\nvoid get_logp(double&,const double&,const double&);\nvoid get_chisq_stat_pv(double&,double&,const double&,const double&,const double&);\nvoid allocate_mat(Eigen::MatrixXd&,int const&,int const&);\nstd::string print_mat_dims(Eigen::MatrixXd const&);\nvoid print_obj(const Eigen::Ref<const Eigen::MatrixXd>&,std::string const&);\nint parseLine(char*);\nint get_mem();\nvoid check_seed(uint&,bool const&);\nstd::string print_mem();\n\nvoid set_threads(struct param*);\n\ntemplate <typename KEY, typename VALUE> \nbool in_map(KEY element, std::map<KEY,VALUE> const& emap){\n  return emap.find(element) != emap.end();\n}\n\ntemplate <typename T> int sgn(T val) {\n  return (T(0) < val) - (val < T(0));\n}\n\ntemplate<class T> T get_max(T const& val) {\n  return std::numeric_limits<T>::max();\n}\n\ntemplate <typename U> \nstd::string get_test_list(U const& bit_map, std::map<std::string, U>& srt_map){\n\n  std::vector<std::string> test_list;\n  typename std::map <std::string, U>::iterator itr;\n  for (itr = srt_map.begin(); itr !=  srt_map.end(); ++itr) {\n    // skip nnls_pos and neg\n    if(itr->first == \"sbat_pos\" || itr->first == \"sbat_neg\" || itr->first == \"acatv_acat\" || itr->first == \"gene_p\") continue;\n    if( CHECK_BIT(bit_map, itr->second) ) { // add to test list\n      std::string newstr = itr->first;\n      std::transform(newstr.begin(), newstr.end(), newstr.begin(), ::toupper);\n      test_list.push_back( newstr );\n    }\n  }\n  return print_csv(test_list);\n}\n\n\n\n#endif\n"
  },
  {
    "path": "src/SKAT.cpp",
    "content": "/*  \n\n    This file is part of the regenie software package.\n\n    Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n    Permission is hereby granted, free of charge, to any person obtaining a copy\n    of this software and associated documentation files (the \"Software\"), to deal\n    in the Software without restriction, including without limitation the rights\n    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n    copies of the Software, and to permit persons to whom the Software is\n    furnished to do so, subject to the following conditions:\n\n    The above copyright notice and this permission notice shall be included in all\n    copies or substantial portions of the Software.\n\n    THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n    SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Pheno.hpp\"\n#include \"NNLS.hpp\"\n#include \"Joint_Tests.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"SKAT.hpp\"\n\n#include \"qf/qfc.h\"\n\nusing namespace Eigen;\nusing namespace std;\nusing namespace boost;\nusing boost::math::normal;\nusing boost::math::chi_squared;\nusing boost::math::non_central_chi_squared;\nusing boost::math::beta_distribution;\n\n// numerical integration using quadpack\n// global variable for SKAT-O if used\nArrayXd flipped_skato_rho = ArrayXd::Zero(1);\nArrayXd skato_Qmin_rho = ArrayXd::Zero(1);\nArrayXd skato_tau = ArrayXd::Zero(1);\nVectorXd skato_lambdas = VectorXd::Zero(1);\ndouble skato_muQ = 0;\ndouble skato_fdavies = 0;\ndouble skato_sdQ = 0;\ndouble skato_dfQ = 0;\ndouble skato_upper = 0;\nint skato_state = 0;\n// for LOVO with BTs\nMatrixXd vc_Rvec_start;\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Functions for SKAT/SKAT-O\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid update_vc_gmat(SpMat& mat, ArrayXd& weights, ArrayXd& weights_acat, ArrayXb& ur_ind, int const& start, int const& bs, struct param const& params, const Ref<const ArrayXb>& in_analysis, Ref<MatrixXd> Gmat, vector<variant_block> &all_snps_info, Ref<MatrixXb> Jmat){\n\n  beta_distribution<>  dist(params.skat_a1, params.skat_a2);\n\n  /*if(params.mask_loo){ // update dimensions\n    mat.resize(mat.rows(), bs + Jmat.cols());\n    mat.setZero();\n    weights = ArrayXd::Zero(bs + Jmat.cols(), 1);\n    weights_acat = weights;\n    if(params.debug) cerr << \"Updating VC gmat...\";\n  }*/\n\n#if defined(_OPENMP)\n      setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for (int i = 0; i < bs; ++i) {\n\n    MapArXd Gvec (Gmat.col(i).data(), Gmat.rows(), 1);\n    double maf;\n\n    if(Jmat.row(start + i).any()){ // if variant is in at least one mask\n      // check if ultra-rare (if so set to 0)\n      if(ur_ind(start + i)){\n        Jmat.row(start+i).array() = false; // ignore variant for all sets\n        Gvec = 0; // don't store the variant\n        continue;\n      }\n\n      // flip if af is above 0.5\n      if( all_snps_info[i].af1 > 0.5 ) Gvec = (Gvec == -3).select(-3, 2 - Gvec);\n      maf = min(all_snps_info[i].af1, 1 - all_snps_info[i].af1);\n\n      // impute missing with mean\n      Gvec = (Gvec == -3).select(2 * maf, Gvec);\n      // mask individuals\n      Gvec *= in_analysis.cast<double>();\n      // store SKAT weight\n      if(!params.vc_with_weights){\n        weights(start + i) = pdf(dist, maf);\n        weights_acat(start + i) = weights(start + i) * weights(start + i) * maf * (1-maf); // for acatv\n      } else if(params.vc_multiply_weights){\n        weights(start + i) *= pdf(dist, maf);\n        weights_acat(start + i) = weights(start + i) * weights(start + i) * maf * (1-maf); // for acatv\n      }\n    } else Gvec = 0; // otherwise set the column to 0\n\n  }\n#if defined(_OPENMP)\n      setNbThreads(params.threads);\n#endif\n\n  mat.middleCols(start, bs) = Gmat.sparseView();\n}\n\n// with lovo\nvoid update_vc_gmat(SpMat& mat, ArrayXd& weights, ArrayXd& weights_acat, SpMat const& Gmat_sp, const Ref<const ArrayXb>& ur_ind, const Ref<const ArrayXb>& to_flip, const Ref<const ArrayXd>& mafs, const Ref<const ArrayXb>& in_analysis, struct param const& params){\n\n  beta_distribution<>  dist(params.skat_a1, params.skat_a2);\n  int bs = Gmat_sp.cols(), bsize = floor(1e9/8.0/params.n_samples), start = 0;\n  int nchunks = ceil( bs * 1.0 / bsize );\n\n  for (int j = 0; j < nchunks; ++j) {\n    if( j == (nchunks-1) ) bsize = bs - j * bsize;\n    MatrixXd Gmat = MatrixXd::Zero(Gmat_sp.rows(), bsize); // no mt with spmat\n\n#if defined(_OPENMP)\n    setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n    for (int i = 0; i < bsize; ++i) {\n      if(ur_ind(start + i)) continue; // check if ultra-rare\n\n      MapArXd Gvec (Gmat.col(i).data(), Gmat.rows(), 1);\n      Gvec = Gmat_sp.col(start + i);\n      if(!(Gvec>0).any()) continue; // not used\n\n      // flip if af is above 0.5\n      if( to_flip(start + i) ) Gvec = (Gvec == -3).select(Gvec, 2 - Gvec);\n\n      // impute missing with mean\n      Gvec = (Gvec == -3).select(2 * mafs(start + i), Gvec);\n      // mask individuals\n      Gvec *= in_analysis.cast<double>();\n      // store SKAT weight\n      if(!params.vc_with_weights){\n        weights(start + i) = pdf(dist, mafs(start + i));\n        weights_acat(start + i) = weights(start + i) * weights(start + i) * mafs(start + i) * (1-mafs(start + i)); // for acatv\n      } else if(params.vc_multiply_weights){\n        double v_pdf = pdf(dist, mafs(start + i));\n        weights(start + i) *= v_pdf;\n        weights_acat(start + i) = weights(start + i) * weights(start + i) * mafs(start + i) * (1-mafs(start + i)); // for acatv\n      }\n\n    }\n#if defined(_OPENMP)\n    setNbThreads(params.threads);\n#endif\n    mat.middleCols(start, bsize) = Gmat.sparseView();\n    start += bsize;\n  }\n\n}\n\nbool get_custom_weights(string const& setname, Ref<ArrayXd> weights, Ref<ArrayXd> weights_acat, vector<snp>& snpinfo, vector<uint64> const& indices){\n\n  // load custom user weights\n  for(size_t i = 0; i < indices.size(); i++){\n    if(!in_map(setname, snpinfo[ indices[i] ].set_weight)) // this shouldn't happen\n      throw \"no custom weight found for variant \" + snpinfo[ indices[i] ].ID;\n    weights(i) = snpinfo[ indices[i] ].set_weight[setname];\n  }\n  double sum_w = weights.sum(); // make weights sum to 1\n  if(sum_w == 0) return false;\n\n  weights /= sum_w;\n  weights_acat = weights;\n;\n\n  return true;\n}\n\nbool get_custom_weights(string const& setname, Ref<ArrayXd> weights, vector<snp>& snpinfo, vector<uint64> const& offsets){\n\n  // load custom user weights\n  for(size_t i = 0; i < offsets.size(); i++){\n    if(!in_map(setname, snpinfo[ offsets[i] ].set_weight)) // this shouldn't happen\n      throw \"no custom weight found for variant \" + snpinfo[ offsets[i] ].ID;\n    weights(i) = snpinfo[ offsets[i] ].set_weight[setname];\n  }\n  double sum_w = weights.sum(); // make weights sum to 1\n  if(sum_w == 0) return false;\n\n  weights /= sum_w;\n  return true;\n}\n\n// with lovo\nbool get_custom_weights(string const& setname, Ref<ArrayXd> weights, vector<snp>& snpinfo, const Ref<const ArrayXi>& indices, vector<uint64> const& offsets){\n\n  // load custom user weights\n  for(int i = 0; i < indices.size(); i++){\n    if(!in_map(setname, snpinfo[ offsets[indices(i)] ].set_weight)) // this shouldn't happen\n      throw \"no custom weight found for variant \" + snpinfo[ offsets[indices(i)] ].ID;\n    weights(i) = snpinfo[ offsets[indices(i)] ].set_weight[setname];\n  }\n  double sum_w = weights.sum(); // make weights sum to 1\n  if(sum_w == 0) return false;\n\n  weights /= sum_w;\n  return true;\n}\n\nvoid compute_vc_masks(SpMat& mat, Ref<ArrayXd> weights, Ref<ArrayXd> weights_acat, SpMat& vc_rare_mask, Ref<MatrixXb> vc_rare_non_miss, const Ref<const MatrixXd>& X, struct ests const& m_ests, struct f_ests const& fest, const Ref<const MatrixXd>& yres,  const Ref<const MatrixXd>& yraw, const Ref<const MatrixXb>& masked_indivs, MatrixXb& Jmat, vector<variant_block> &all_snps_info, const Ref<const ArrayXb>& in_analysis, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  prep_ultra_rare_mask(mat, weights, weights_acat, vc_rare_mask, vc_rare_non_miss, Jmat, in_analysis, params);\n\n  //if(params.debug) check_sizes(mat, vc_rare_mask, Jmat);\n\n  if(params.trait_mode==0)\n    compute_vc_masks_qt(mat, weights, weights_acat, X, yres, Jmat, all_snps_info, params, remeta_sumstats);\n  else if(params.trait_mode==1)\n    compute_vc_masks_bt(mat, weights, weights_acat, X, m_ests, fest, yres, yraw, masked_indivs, Jmat, all_snps_info, params, remeta_sumstats);\n  else throw \"not yet implemented\";\n\n}\n\nvoid prep_ultra_rare_mask(SpMat& mat, Ref<ArrayXd> weights, Ref<ArrayXd> weights_acat, SpMat& rare_mask_mat, Ref<MatrixXb> rare_mask_non_miss, MatrixXb& Jmat, const Ref<const ArrayXb>& in_analysis, struct param const& params){\n\n  int nsets = Jmat.cols();\n  int bs = mat.cols() - nsets; // contains single variants + ultra-rare masks\n  // check if ultra-rare mask is used in any of the sets\n  if(rare_mask_mat.nonZeros() == 0) {\n    if(params.debug) cerr << \"No ultra-rare variants (MAC <= \" << params.skat_collapse_MAC << \") present in any of the sets.\\n\";\n    return;\n  }\n\n  ArrayXd gv;\n  boost::math::beta_distribution<>  dist(params.skat_a1, params.skat_a2);\n  double mean, maf;\n\n  for(int iset = 0; iset < nsets; iset++){\n\n    // set entries of individuals not included in the analysis to 0\n    gv = rare_mask_mat.col(iset).cwiseProduct(in_analysis.matrix().cast<double>());\n\n    // check if any UR variants were included\n    Jmat(bs+iset, iset) = (gv>0).any();\n    //cerr << \"#\" << iset << \" \" << Jmat.col(iset).any() << \"/\"<< (gv>0).count() << \"\\n\";\n    if(!Jmat(bs+iset, iset)) continue;\n    rare_mask_non_miss.col(iset).array() = rare_mask_non_miss.col(iset).array() && in_analysis; \n\n    // compute mean\n    mean = gv.sum() / rare_mask_non_miss.col(iset).count();\n    maf = min(mean/2, 1 - mean/2);\n\n    if(params.vc_with_weights && !params.vc_multiply_weights){ // weights already incorporated when taking max of weighted geno\n      weights(bs+iset) = weights_acat(bs+iset) = 1;\n    } else { // use default SKAT/ACAT weight\n      weights(bs+iset) = pdf(dist, maf);\n      weights_acat(bs+iset) = weights(bs+iset) * weights(bs+iset) * maf * (1-maf);\n    }\n\n    if(params.debug) cerr << \"set #\" << iset+1 << \"; rare_mask [mu,nZ,w,w_a] = [\" << mean << \",\" << (gv>0).count() << \",\" << weights(bs+iset) << \",\" << weights_acat(bs+iset) << \"]\";\n\n    // impute missing entries which were set to 0\n    gv = (!in_analysis || rare_mask_non_miss.col(iset).array()).select(gv, mean);\n    mat.col(bs + iset) = gv.matrix().sparseView();\n\n    if(params.debug) cerr << \"\\n\";\n\n  }\n\n}\n\n/////////////////////\n/////////////////////\n///// QTs\n/////////////////////\n/////////////////////\nvoid compute_vc_masks_qt(SpMat& mat, const Ref<const ArrayXd>& weights, const Ref<const ArrayXd>& weights_acat, const Ref<const MatrixXd>& X, const Ref<const MatrixXd>& yres, const Ref<const MatrixXb>& Jmat, vector<variant_block> &all_snps_info, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  if(params.skato_rho.size() == 1)\n    compute_vc_masks_qt_fixed_rho(mat, weights, weights_acat, X, yres, Jmat, all_snps_info, params.skato_rho(0), params.skat_tol, params.nl_dbl_dmin, params.vc_test, params.debug, params, remeta_sumstats);\n  else\n    compute_vc_masks_qt(mat, weights, weights_acat, X, yres, Jmat, all_snps_info, params.skato_rho, params.skat_tol, params.nl_dbl_dmin, params.vc_test, params.debug, params, remeta_sumstats);\n\n}\n\n// for a given rho value\nvoid compute_vc_masks_qt_fixed_rho(SpMat& mat, const Ref<const ArrayXd>& weights, const Ref<const ArrayXd>& weights_acat, const Ref<const MatrixXd>& X, const Ref<const MatrixXd>& yres, const Ref<const MatrixXb>& Jmat, vector<variant_block> &all_snps_info, double const& rho, double const& skat_lambda_tol, double const& nl_dbl_dmin, uint const& vc_test, bool const& debug, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  bool with_acatv = CHECK_BIT(vc_test,0);\n  bool with_skat = (vc_test>>1)&15;\n  int jcol, n_pheno = yres.cols(), nnz;\n  double c1 = sqrt(1 - rho);\n  ArrayXd D;\n  VectorXd lambdas;\n  MatrixXd Qs, Qb, Svals, Kmat, sum_stats, pvals;\n\n  ArrayXi snp_indices = get_true_indices(Jmat.rowwise().any());\n  int bs = snp_indices.size(); // subset to snps included in at least 1 skat mask\n  if( !(weights(snp_indices) > 0).any() ) return;\n\n  // slice sparse matrix (cannot use indexing)\n  SpMat Jstar (Jmat.rows(), bs); // Mall x M\n  Jstar.reserve(bs);\n  MatrixXd weights_ordered;\n  if(params.remeta_save_ld) {\n    weights_ordered.resize(bs, 1);\n  }\n  for(int i = 0; i < bs; i++) {\n    Jstar.insert(snp_indices(i), i) = weights(snp_indices(i));\n    if(params.remeta_save_ld) {\n        weights_ordered(i) = weights(snp_indices(i));\n    }\n  }\n  SpMat mat2 = mat * Jstar; // mat should be pretty sparse since major-ref\n  mat.setZero(); mat.resize(0,0); mat.data().squeeze(); // not needed anymore\n  Jstar.setZero(); Jstar.resize(0,0); Jstar.data().squeeze(); // not needed anymore\n\n  // get score stats & kernel matrices\n  Svals.resize(n_pheno, bs); // PxM\n  Kmat.resize(bs, bs); // MxM\n  compute_vc_mats_qt(Svals, Kmat, X, yres, mat2);\n  mat2.setZero(); mat2.resize(0,0); mat2.data().squeeze(); // not needed anymore\n\n#ifdef WITH_HTSLIB\n  if(params.remeta_save_ld && remeta_sumstats.skat_snplist->size() > 0) {\n    MatrixXd weight_inv = weights_ordered.array()\n                                  .inverse()\n                                  .matrix()\n                                  .asDiagonal();\n    MatrixXd unweighted_Kmat = weight_inv * Kmat * weight_inv;\n    for(int i = 0; i < n_pheno; ++i) {\n      if(remeta_sumstats.sparsity_threshold > 0) {\n        remeta_sumstats.skat_matrix_writers[i].write_matrix_sparse(\n          unweighted_Kmat,\n          *remeta_sumstats.gene_name,\n          *remeta_sumstats.skat_snplist,\n          remeta_sumstats.sparsity_threshold\n        );\n      } else {\n        remeta_sumstats.skat_matrix_writers[i].write_matrix_dense(\n          unweighted_Kmat,\n          *remeta_sumstats.gene_name,\n          *remeta_sumstats.skat_snplist\n        );      \n      }\n    }\n  }\n#endif\n\n  // SKAT for all masks & traits\n  compute_skat_q(Qs, Qb, Svals, Kmat, pvals, weights(snp_indices) != 0, Jmat(snp_indices, all), with_acatv, debug);\n\n  // for now don't parallelize this as it causes issues with qfc lib\n  // but should be ok since dimensions don't depend on N\n  for(size_t imask = 0; imask < all_snps_info.size(); imask++){\n\n    variant_block* block_info = &(all_snps_info[imask]);\n    if(debug) cerr << \"Mask : \" << block_info->mask_name << \"\\n\";\n    if(block_info->sum_stats_vc.size()>0) block_info->sum_stats_vc.clear();\n    if(block_info->skip_for_vc) continue;\n    sum_stats = MatrixXd::Constant(n_pheno, 2, -1); // chisq & logp\n\n    // get index of mask in Jmat\n    jcol = block_info->col_jmat_skat;\n    if(jcol < 0) continue; // this should not happen though\n    MapcArXb Jvec (Jmat.col(jcol).data(), Jmat.rows(), 1);\n    nnz = Jvec.count();\n    if(debug) cerr << \"#sites in mask=\" << nnz << \"\\n\";\n    if(nnz == 0) continue;\n\n    // subset to variants kept in mask\n    ArrayXi m_indices = get_true_indices(Jvec(snp_indices)); // across markers kept in skat tests\n    ArrayXi mall_indices = snp_indices(m_indices); // across all markers in set\n    if(debug) cerr <<\"W(skat):\\n\" << weights(mall_indices).head(min(20,nnz)).matrix().transpose() << \"\\n\";\n\n    // ACAT-V \n    if(with_acatv && (weights_acat(mall_indices) > 0).any()){\n      for(int ph = 0; ph < n_pheno; ph++)\n        get_acatv_pv( ph, pvals(m_indices, ph), weights_acat(mall_indices), sum_stats(ph, 1), sum_stats(ph, 0), nl_dbl_dmin, debug); \n      block_info->sum_stats_vc[\"ACATV\"] = sum_stats;\n      sum_stats.array() = -1; // reset\n    }\n    if(!with_skat) continue;\n\n    // get eigen values of Rsqrt*V*Rsqrt\n    //if(debug) cerr << \"Kmat:\\n\" << Kmat(m_indices, m_indices) << \"\\nrho_Kmat:\\n\" << get_RsKRs(Kmat(m_indices, m_indices), rho, c1) << \"\\n\";\n    get_lambdas(lambdas, get_RsKRs(Kmat(m_indices, m_indices), rho, c1), skat_lambda_tol);\n    if(lambdas.size() == 0) continue;\n    if(debug) cerr << \"L:\" << lambdas.head(min(150, (int) lambdas.size())).transpose() << \"\\n\";\n\n    // compute test statistic & p-value\n    for(int ph = 0; ph < n_pheno; ph++)\n      compute_fixed_skato_p(sum_stats(ph, 1), sum_stats(ph, 0), Qs(ph, jcol), Qb(ph, jcol), rho, lambdas, nl_dbl_dmin, debug);\n\n    if( (sum_stats.col(1).array() >= 0).any() ){\n      string test_name = (rho > 0 ? \"SKAT-RHO\" : \"SKAT\");\n      block_info->sum_stats_vc[test_name] = sum_stats;\n    }\n\n  }\n\n}\n\nvoid compute_vc_masks_qt(SpMat& mat, const Ref<const ArrayXd>& weights, const Ref<const ArrayXd>& weights_acat, const Ref<const MatrixXd>& X, const Ref<const MatrixXd>& yres, const Ref<const MatrixXb>& Jmat, vector<variant_block> &all_snps_info, const Ref<const ArrayXd>& rho_vec, double const& skat_lambda_tol, double const& nl_dbl_dmin, uint const& vc_test, bool const& debug, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  bool with_acatv = CHECK_BIT(vc_test,0);\n  bool with_omnibus = (vc_test>>2)&7; // any of the omnibus tests\n  bool with_skato_int = CHECK_BIT(vc_test,2);\n  bool with_skato_acat = CHECK_BIT(vc_test,3);\n  bool with_acato = CHECK_BIT(vc_test,4);\n  int jcol, n_pheno = yres.cols(), nnz, nrho = rho_vec.size();\n  double minp, gamma1, gamma2, gamma3, tmpv, log10_nl_dbl_dmin = -log10(nl_dbl_dmin);\n  ArrayXd D, p_acato, flip_rho_sqrt;\n  VectorXd lambdas;\n  MatrixXd Qs, Qb, Qopt, Svals, Kmat, cvals, sum_stats, pvals, r_outer_sum;\n  MatrixXd pvs_skato, chisq_skato, pvs_skato_acat, chisq_skato_acat, pvs_acato, chisq_acato;\n\n  cvals.resize(nrho, 5);\n  skato_Qmin_rho.resize(nrho, 1);\n  if(with_acato) p_acato.resize(nrho+1);\n  flipped_skato_rho = 1 - rho_vec;\n  flip_rho_sqrt = flipped_skato_rho.sqrt();\n\n  ArrayXi snp_indices = get_true_indices(Jmat.rowwise().any());\n  int bs = snp_indices.size(); // subset to snps included in at least 1 skat mask\n  if( !(weights(snp_indices) > 0).any() ) return;\n\n  // slice sparse matrix (cannot use indexing)\n  SpMat Jstar (Jmat.rows(), bs); // Mall x M\n  Jstar.reserve(bs);\n  MatrixXd weights_ordered;\n  if(params.remeta_save_ld) {\n    weights_ordered.resize(bs, 1);\n  }\n  for(int i = 0; i < bs; i++) {\n    Jstar.insert(snp_indices(i), i) = weights(snp_indices(i));\n    if(params.remeta_save_ld) {\n        weights_ordered(i) = weights(snp_indices(i));\n    }\n  }\n    \n  SpMat mat2 = mat * Jstar; // mat should be pretty sparse since major-ref\n  mat.setZero(); mat.resize(0,0); mat.data().squeeze(); // not needed anymore\n  Jstar.setZero(); Jstar.resize(0,0); Jstar.data().squeeze(); // not needed anymore\n\n  // get score stats & kernel matrices\n  Svals.resize(n_pheno, bs); // PxM\n  Kmat.resize(bs, bs); // MxM\n  compute_vc_mats_qt(Svals, Kmat, X, yres, mat2);\n  mat2.setZero(); mat2.resize(0,0); mat2.data().squeeze(); // not needed anymore\n\n#ifdef WITH_HTSLIB\n  if(params.remeta_save_ld && remeta_sumstats.skat_snplist->size() > 0) {\n    MatrixXd weight_inv = weights_ordered.array()\n                                  .inverse()\n                                  .matrix()\n                                  .asDiagonal();\n    MatrixXd unweighted_Kmat = weight_inv * Kmat * weight_inv;\n    for(int i = 0; i < n_pheno; ++i) {\n      if(remeta_sumstats.sparsity_threshold > 0) {\n        remeta_sumstats.skat_matrix_writers[i].write_matrix_sparse(\n          unweighted_Kmat,\n          *remeta_sumstats.gene_name,\n          *remeta_sumstats.skat_snplist,\n          remeta_sumstats.sparsity_threshold\n        );\n      } else {\n        remeta_sumstats.skat_matrix_writers[i].write_matrix_dense(\n          unweighted_Kmat,\n          *remeta_sumstats.gene_name,\n          *remeta_sumstats.skat_snplist\n        );      \n      }\n    }\n  }\n#endif\n\n  // SKAT for all masks & traits\n  compute_skat_q(Qs, Qb, Svals, Kmat, pvals, weights(snp_indices) != 0, Jmat(snp_indices, all), with_acatv, debug);\n\n  // for now don't parallelize this as it causes issues with qfc lib\n  // but should be ok since dimensions don't depend on N\n  for(size_t imask = 0; imask < all_snps_info.size(); imask++){\n\n    variant_block* block_info = &(all_snps_info[imask]);\n    if(debug) cerr << \"Mask : \" << block_info->mask_name << \"\\n\";\n    if(block_info->sum_stats_vc.size()>0) block_info->sum_stats_vc.clear();\n    if(block_info->skip_for_vc) continue;\n    pvs_skato = MatrixXd::Constant(n_pheno, nrho, -1);\n    chisq_skato = MatrixXd::Constant(n_pheno, nrho, -1);\n    if(with_skato_acat){\n      pvs_skato_acat = MatrixXd::Constant(n_pheno, 1, -1);\n      chisq_skato_acat = MatrixXd::Constant(n_pheno, 1, -1);\n    }\n    if(with_acato){\n      pvs_acato = MatrixXd::Constant(n_pheno, 1, -1);\n      chisq_acato = MatrixXd::Constant(n_pheno, 1, -1);\n    }\n    sum_stats = MatrixXd::Constant(n_pheno, 2, -1); // chisq & logp\n\n    // get index of mask in Jmat\n    jcol = block_info->col_jmat_skat;\n    if(jcol < 0) continue; // this should not happen though\n    MapcArXb Jvec (Jmat.col(jcol).data(), Jmat.rows(), 1);\n    nnz = Jvec.count();\n    if(debug) cerr << \"#sites in mask=\" << nnz << \"\\n\";\n    if(nnz == 0) continue;\n\n    // subset to variants kept in mask\n    ArrayXi m_indices = get_true_indices(Jvec(snp_indices)); // across markers kept in skat tests\n    ArrayXi mall_indices = snp_indices(m_indices); // across all markers in set\n\n    // ACAT-V \n    if(with_acatv && (weights_acat(mall_indices) > 0).any()){\n      for(int ph = 0; ph < n_pheno; ph++)\n        get_acatv_pv( ph, pvals(m_indices, ph), weights_acat(mall_indices), sum_stats(ph, 1), sum_stats(ph, 0), nl_dbl_dmin, debug); \n      block_info->sum_stats_vc[\"ACATV\"] = sum_stats;\n      sum_stats.array() = -1; // reset\n    }\n    if(!with_omnibus) continue;\n\n    // get eigen values of Zt(I-U)Z\n    if(!get_ztz_evals(Kmat(m_indices, m_indices), r_outer_sum, gamma1, gamma2, gamma3, skat_lambda_tol, debug)) continue;\n\n    if(nnz > 1){\n      get_skato_mom(skato_muQ, skato_fdavies, skato_sdQ, skato_dfQ, skato_tau, skato_lambdas, gamma1, gamma2, gamma3, rho_vec, debug);\n      if(skato_sdQ < 0) continue; // failed\n    }\n    Qopt = Qs.col(jcol) * flipped_skato_rho.matrix().transpose() + Qb.col(jcol) * rho_vec.matrix().transpose(); // P x Nrho\n    if(debug) cerr << \"Q:\" << Qopt.row(0) << \"\\n\";\n\n    for(int j = 0; j < nrho; j++){\n\n      // get eigen values of Rsqrt*ZtZ*Rsqrt\n      get_lambdas(lambdas, get_RsKRs(Kmat(m_indices, m_indices), r_outer_sum, gamma1, rho_vec(j), flip_rho_sqrt(j)), skat_lambda_tol);\n      //if(debug) cerr << \"rho:\" << rho_vec(j) << \"-> L:\" << lambdas.transpose() << \"\\n\";\n      if(lambdas.size() == 0) {\n        if(debug) cerr << \"all eigen values are 0 for rho = \" << rho_vec(j) << \"\\n\";\n        break; // SKAT & SKAT-O failed\n      }\n      // needed for skato (M>1)\n      if(nnz > 1)  get_cvals(j, cvals, lambdas);\n\n      for(int ph = 0; ph < n_pheno; ph++) \n        compute_fixed_skato_p(pvs_skato(ph, j), chisq_skato(ph, j), Qopt(ph, j), rho_vec(j), lambdas, nl_dbl_dmin);\n\n      // store SKAT results\n      if(rho_vec(j)==0) {\n        sum_stats.col(0) = chisq_skato.col(j);\n        sum_stats.col(1) = pvs_skato.col(j);\n      }\n\n      if(nnz == 1) break; // sum stats same for all rhos\n    }\n\n    if((sum_stats.col(1).array() >= 0).any())\n      block_info->sum_stats_vc[\"SKAT\"] = sum_stats;\n\n    if(nnz == 1) { // same p for all tests\n      if((pvs_skato.col(0).array() < 0).all()) continue; // go to next mask set\n      sum_stats.col(0) = chisq_skato.col(0);\n      sum_stats.col(1) = pvs_skato.col(0);\n      if(with_acato)\n        block_info->sum_stats_vc[\"ACATO\"] = sum_stats;\n      if(with_skato_acat)\n        block_info->sum_stats_vc[\"SKATO-ACAT\"] = sum_stats;\n      if(with_skato_int)\n        block_info->sum_stats_vc[\"SKATO\"] = sum_stats;\n      continue;\n    }\n\n    // for each phenotype, check all SKATO pvs are defined\n    if((pvs_skato.array() < 0).rowwise().any().all()) {\n      if(debug) cerr << \"Some SKATO pvalues failed to compute for all the phenotypes (p = \" << pvs_skato << \" )\\n\";\n      continue; // go to next mask set if no phenotype with all pvals defined\n    }\n\n    // Get minimum of p-values and corresponding chisq quantile for each rho\n    for(int ph = 0; ph < n_pheno; ph++) {\n      if( (pvs_skato.row(ph).array() < 0).any() ) {\n        if(debug) cerr << \"One of the SKATO pvalues failed for phenotype (p = \" << pvs_skato.row(ph) << \" )\\n\";\n        continue;\n      }\n\n      if(with_skato_acat){\n        if(debug) cerr << \"skato-acat logp=\" << pvs_skato.row(ph) <<\"\\n\";\n        pvs_skato_acat(ph,0) = get_acat(pvs_skato.row(ph).array());\n        get_chisq_stat_pv(tmpv, chisq_skato_acat(ph, 0), pvs_skato_acat(ph,0), nl_dbl_dmin, log10_nl_dbl_dmin);\n      } \n      if(with_acato){ // include acatv pvalue\n        p_acato(0) = block_info->sum_stats_vc[\"ACATV\"](ph, 1);\n        p_acato.tail(nrho) = pvs_skato.row(ph).transpose().array();\n        if(debug) cerr << \"acato logp=\" << p_acato.matrix().transpose() <<\"\\n\";\n        pvs_acato(ph,0) = get_acat(p_acato);\n        get_chisq_stat_pv(tmpv, chisq_acato(ph, 0), pvs_acato(ph,0), nl_dbl_dmin, log10_nl_dbl_dmin);\n      }\n      if(with_skato_int){\n        minp = max(nl_dbl_dmin, pow(10, -pvs_skato.row(ph).maxCoeff())); // prevent underflow\n        get_Qmin(nrho, minp, skato_Qmin_rho, cvals);\n        if(debug) cerr << \"Qmin=\" << skato_Qmin_rho.matrix().transpose() << \"\\nminP=\" << minp <<\"; logp=\" << pvs_skato.row(ph) <<\"\\n\";\n        // numerical integration\n        get_skato_pv(pvs_skato(ph,0), chisq_skato(ph, 0), minp, nrho, nl_dbl_dmin, debug);\n      }\n\n    }\n\n    // SKATO-ACAT\n    if(with_skato_acat && !(pvs_skato_acat.col(0).array() < 0).all()){\n      sum_stats.col(0) = chisq_skato_acat.col(0);\n      sum_stats.col(1) = pvs_skato_acat.col(0);\n      block_info->sum_stats_vc[\"SKATO-ACAT\"] = sum_stats;\n    }\n    // ACATO\n    if(with_acato && !(pvs_acato.col(0).array() < 0).all()){\n      sum_stats.col(0) = chisq_acato.col(0);\n      sum_stats.col(1) = pvs_acato.col(0);\n      block_info->sum_stats_vc[\"ACATO\"] = sum_stats;\n    }\n    // SKATO\n    if(with_skato_int && !(pvs_skato.col(0).array() < 0).all()){\n      sum_stats.col(0) = chisq_skato.col(0);\n      sum_stats.col(1) = pvs_skato.col(0);\n      block_info->sum_stats_vc[\"SKATO\"] = sum_stats;\n    }\n\n  }\n\n}\n\nvoid compute_vc_mats_qt(Ref<MatrixXd> Svals, Ref<MatrixXd> Kmat, const Ref<const MatrixXd>& X, const Ref<const MatrixXd>& yres, Ref<SpMat> GW){\n\n  // project covariates\n  MatrixXd WGtX = GW.transpose() * X; // MxK\n\n  // get test statistics (PxM matrix)\n  // need to use Gresid (must have yres centered)\n  Svals = yres.transpose() * GW - (yres.transpose() * X) * WGtX.transpose(); // 2nd term: PxK * KxM\n\n  // get kernel matrix (MxM matrix)\n  Kmat = -WGtX * WGtX.transpose();\n  Kmat += GW.transpose() * GW;\n\n}\n\nvoid compute_skat_q(MatrixXd& Qs, MatrixXd& Qb, Ref<MatrixXd> Svals, const Ref<const MatrixXd>& Kmat, MatrixXd& pvals, const Ref<const ArrayXb>& mask_w, const Ref<const MatrixXb>& Jmat, bool const& w_acatv, bool const& debug){\n\n  Qs = Svals.array().square().matrix(); // PxM\n  // if using acat-v, get single variant p-values\n  if(w_acatv) {\n    pvals.resize(Svals.cols(), Svals.rows()); // MxP\n    get_single_pvs(pvals, (Qs * mask_w.select(1/Kmat.diagonal().array(),1).matrix().asDiagonal()).transpose()); \n  }\n  Qs *= Jmat.cast<double>(); // P x Km\n\n  // burden\n  Qb = (Svals * Jmat.cast<double>()).array().square().matrix();\n  if(debug) cerr << \"Q_SKAT for all masks:\\n\" << Qs << \"\\nQ_BURDEN for all masks:\\n\" << Qb << \"\\n\";\n\n}\n\nvoid get_acatv_pv(int const& ph, const Ref<const MatrixXd>& pvals, const Ref<const ArrayXd>& weights, double& logp, double& chisq, double const& nl_dbl_dmin, bool const& debug){\n\n  if(debug && (ph==0)) {\n    int bs = pvals.rows(), nmax = min(150, bs); // avoid over-printing \n    cerr << \"SV log10p:\\n\" << pvals.col(0).transpose().array().head(nmax) << \n    \"\\nWsq:\\n\" << weights.head(nmax).matrix().transpose() << \"\\n\\n\";\n  }\n\n  double tmpv, log10_nl_dbl_dmin = -log10(nl_dbl_dmin);\n  logp = get_acat(pvals.array(), weights);\n  get_chisq_stat_pv(tmpv, chisq, logp, nl_dbl_dmin, log10_nl_dbl_dmin);\n\n}\n\nvoid get_single_pvs(Ref<MatrixXd> pvals, const Ref<const MatrixXd>& chisq_vals){\n  for(int ph = 0; ph < pvals.cols(); ph++)\n    for(int isnp = 0; isnp < pvals.rows(); isnp++)\n      get_logp(pvals(isnp, ph), chisq_vals(isnp, ph));\n  //cerr << pvals.leftCols(1) << \"\\n\\n\" << chisq_vals.leftCols(1) << endl;\n}\n\n/////////////////////\n/////////////////////\n///// BTs\n/////////////////////\n/////////////////////\n\nvoid compute_vc_masks_bt(SpMat& mat, const Ref<const ArrayXd>& weights, const Ref<const ArrayXd>& weights_acat, const Ref<const MatrixXd>& X, struct ests const& m_ests, struct f_ests const& fest, const Ref<const MatrixXd>& yres, const Ref<const MatrixXd>& yraw, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXb>& Jmat, vector<variant_block> &all_snps_info, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  if(params.skato_rho.size() == 1)\n    compute_vc_masks_bt_fixed_rho(mat, weights, weights_acat, X, m_ests, fest, yres, yraw, masked_indivs, Jmat, all_snps_info, params.skato_rho(0), params.skat_tol, params.nl_dbl_dmin, params.firth || params.use_SPA, params.vc_test, params.debug, params, remeta_sumstats);\n  else \n    compute_vc_masks_bt(mat, weights, weights_acat, X, m_ests, fest, yres, yraw, masked_indivs, Jmat, all_snps_info, params.skato_rho, params.skat_tol, params.nl_dbl_dmin, params.firth || params.use_SPA, params.vc_test, params.debug, params, remeta_sumstats);\n\n}\n\nvoid compute_vc_masks_bt_fixed_rho(SpMat& mat, const Ref<const ArrayXd>& weights, const Ref<const ArrayXd>& weights_acat, const Ref<const MatrixXd>& X, struct ests const& m_ests, struct f_ests const& fest, const Ref<const MatrixXd>& yres, const Ref<const MatrixXd>& yraw, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXb>& Jmat, vector<variant_block> &all_snps_info, double const& rho, double const& skat_lambda_tol, double const& nl_dbl_dmin, bool const& apply_correction, uint const& vc_test, bool const& debug, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  bool with_acatv = CHECK_BIT(vc_test,0);\n  bool with_skat = (vc_test>>1)&15;\n  int jcol, n_pheno = yres.cols();\n  double c1 = sqrt(1 - rho);\n  VectorXd lambdas, Qs, Qb;\n  ArrayXb masked_sites;\n  ArrayXd Svals, Rvec_sqrt, pvals;\n  MatrixXd Kmat, GtWX, sum_stats;\n  SpMat GWs;\n\n  MatrixXd pvs_m = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd chisq_m = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd pvs_m_a = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd chisq_m_a = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  sum_stats = MatrixXd::Constant(n_pheno, 2, -1); // chisq & logp\n\n  ArrayXi snp_indices = get_true_indices(Jmat.rowwise().any());\n  int bs = snp_indices.size(); // subset to snps included in at least 1 skat mask\n\n  // slice sparse matrix (cannot use indexing)\n  SpMat Jstar (Jmat.rows(), bs); // M x Mall\n  Jstar.reserve(bs);\n  MatrixXd weights_ordered;\n  if(params.remeta_save_ld) {\n    weights_ordered.resize(bs, 1);\n  }\n  for(int i = 0; i < bs; i++) {\n    Jstar.insert(snp_indices(i), i) = weights(snp_indices(i));\n    if(params.remeta_save_ld) {\n        weights_ordered(i) = weights(snp_indices(i));\n    }\n  }\n  SpMat mat2 = mat * Jstar; // mat should be pretty sparse since major-ref\n  if (!mat2.isCompressed()) mat2.makeCompressed();\n  mat.setZero(); mat.resize(0,0); mat.data().squeeze(); // not needed anymore\n  Jstar.setZero(); Jstar.resize(0,0); Jstar.data().squeeze(); // not needed anymore\n  Svals.resize(bs, 1); // Mx1\n  Kmat.resize(bs, bs); // MxM\n\n  for(int ph = 0; ph < n_pheno; ph++) { \n    if( !params.pheno_pass(ph) ) continue;\n\n    MapcArXd Y (yraw.col(ph).data(), yraw.rows());\n    MapcArXb mask (masked_indivs.col(ph).data(), yraw.rows());\n    MapcArXd Wsqrt (m_ests.Gamma_sqrt.col(ph).data(), yraw.rows());\n    MapcMatXd XWsqrt (m_ests.X_Gamma[ph].data(), yraw.rows(), m_ests.X_Gamma[ph].cols());\n    MapcArXd phat (m_ests.Y_hat_p.col(ph).data(), yraw.rows());\n\n    // get score stats & kernel matrices\n    compute_vc_mats_bt(Svals, Kmat, XWsqrt, Wsqrt * mask.cast<double>(), yres.col(ph), mat2, GWs, GtWX);\n\n    // apply firth/spa corrections (set R=0 if failed)\n    masked_sites = (weights(snp_indices) > 0);\n    Rvec_sqrt = masked_sites.cast<double>();\n    if(apply_correction)\n      correct_vcov(ph, snp_indices, weights(snp_indices), masked_sites, Rvec_sqrt, Svals, Kmat, mat2, GtWX, XWsqrt, GWs, Wsqrt, phat, Y, mask, fest, params);\n\n  #ifdef WITH_HTSLIB\n    if(params.remeta_save_ld && remeta_sumstats.skat_snplist->size() > 0) {\n      MatrixXd weight_inv = weights_ordered.array()\n                                    .inverse()\n                                    .matrix()\n                                    .asDiagonal();\n      MatrixXd unweighted_Kmat = weight_inv * Kmat * weight_inv;\n      if(remeta_sumstats.sparsity_threshold > 0) {\n        remeta_sumstats.skat_matrix_writers[ph].write_matrix_sparse(\n          unweighted_Kmat,\n          *remeta_sumstats.gene_name,\n          *remeta_sumstats.skat_snplist,\n          remeta_sumstats.sparsity_threshold\n        );\n      } else {\n        remeta_sumstats.skat_matrix_writers[ph].write_matrix_dense(\n          unweighted_Kmat,\n          *remeta_sumstats.gene_name,\n          *remeta_sumstats.skat_snplist\n        );\n      }\n    }\n  #endif\n\n    if(with_acatv) {\n      pvals.resize(Svals.size()); // Mx1\n      get_single_pvs_bt(pvals, masked_sites.select(Svals.square() / Kmat.diagonal().array(),1)); \n    }\n\n    // SKAT for all masks (Kmx1)\n    compute_skat_q(Qs, Qb, masked_sites.select(Svals, 0), Kmat, Jmat(snp_indices, all), debug);\n\n    for(size_t imask = 0; imask < all_snps_info.size(); imask++){\n\n      variant_block* block_info = &(all_snps_info[imask]);\n      if(debug) cerr << \"Mask : \" << block_info->mask_name << \"\\n\";\n      if(block_info->skip_for_vc) continue;\n\n      // get index of mask in Jmat\n      jcol = block_info->col_jmat_skat;\n      if(jcol < 0) continue; // this should not happen though\n      MapcArXb Jvec (Jmat.col(jcol).data(), Jmat.rows(), 1);\n      int npass = (Jvec(snp_indices) && masked_sites).count();\n      if(debug) cerr << \"#sites in mask=\" << npass << \"\\n\";\n      if(npass == 0) continue;\n\n      // subset to variants kept in mask\n      ArrayXi m_indices = get_true_indices(Jvec(snp_indices) && masked_sites); // across markers kept in skat tests\n      ArrayXi mall_indices = snp_indices(m_indices); // across all markers in set\n\n      // ACAT-V \n      if(with_acatv)\n        get_acatv_pv( ph, pvals(m_indices).matrix(), weights_acat(mall_indices), pvs_m_a(ph, imask), chisq_m_a(ph, imask), nl_dbl_dmin, debug); \n\n      if(!with_skat) continue;\n\n      // correct using burden test\n      double rfrac = 1;\n      if(apply_correction && !params.skip_cf_burden && (npass > 1)) {// no need if M=1\n\n        // to slice sparse matrix (cannot use indexing)\n        SpMat Jtmp (bs, npass); // Mall x Mpass\n        Jtmp.reserve(npass);\n        for(int i = 0; i < npass; i++)\n          Jtmp.insert(m_indices(i), i) = 1;\n\n        if(!correct_vcov_burden(ph, rfrac, Qb(jcol), Kmat(m_indices, m_indices).sum(), GtWX(all, m_indices), XWsqrt, GWs * Jtmp, Wsqrt, phat, Y, mask, fest.cov_blup_offset, params))\n          continue; // failed to correct with burden mask\n      }\n\n    // get eigen values of Rsqrt*V*Rsqrt\n      get_lambdas(lambdas, get_RsKRs(rfrac * Kmat(m_indices, m_indices), rho, c1), skat_lambda_tol);\n      if(lambdas.size() == 0) continue;\n      //cerr << lambdas << \"\\n\\n\";\n\n      // compute SKAT\n      compute_fixed_skato_p(pvs_m(ph, imask), chisq_m(ph, imask), Qs(jcol), Qb(jcol), rho, lambdas, nl_dbl_dmin, debug);\n\n    }\n  }\n\n  // store results\n  string test_name = (rho > 0 ? \"SKAT-RHO\" : \"SKAT\");\n  for(size_t imask = 0; imask < all_snps_info.size(); imask++){\n    variant_block* block_info = &(all_snps_info[imask]);\n    if(block_info->sum_stats_vc.size()>0) block_info->sum_stats_vc.clear();\n    if((pvs_m_a.col(imask).array() >= 0).any()){// acatv\n      sum_stats.col(0) = chisq_m_a.col(imask);\n      sum_stats.col(1) = pvs_m_a.col(imask);\n      block_info->sum_stats_vc[\"ACATV\"] = sum_stats;\n    }\n    if((pvs_m.col(imask).array() >= 0).any()){\n      sum_stats.col(0) = chisq_m.col(imask);\n      sum_stats.col(1) = pvs_m.col(imask);\n      block_info->sum_stats_vc[test_name] = sum_stats;\n    }\n  }\n\n}\n\nvoid compute_vc_masks_bt(SpMat& mat, const Ref<const ArrayXd>& weights, const Ref<const ArrayXd>& weights_acat, const Ref<const MatrixXd>& X, struct ests const& m_ests, struct f_ests const& fest, const Ref<const MatrixXd>& yres, const Ref<const MatrixXd>& yraw, const Ref<const MatrixXb>& masked_indivs, const Ref<const MatrixXb>& Jmat, vector<variant_block> &all_snps_info, const Ref<const ArrayXd>& rho_vec, double const& skat_lambda_tol, double const& nl_dbl_dmin, bool const& apply_correction, uint const& vc_test, bool const& debug, struct param const& params, struct remeta_sumstat_writer& remeta_sumstats){\n\n  bool with_acatv = CHECK_BIT(vc_test,0);\n  bool with_omnibus = (vc_test>>2)&7;\n  bool with_skato_int = CHECK_BIT(vc_test,2);\n  bool with_skato_acat = CHECK_BIT(vc_test,3);\n  bool with_acato = CHECK_BIT(vc_test,4);\n  int jcol, n_pheno = yres.cols(), nrho = rho_vec.size();\n  double minp, gamma1, gamma2, gamma3, tmpv, log10_nl_dbl_dmin = -log10(nl_dbl_dmin);\n  VectorXd lambdas, Qs, Qb, Qopt;\n  ArrayXb masked_sites;\n  ArrayXd Svals, Rvec_sqrt, pvs_skato, chisq_skato, pvals, p_acato, flip_rho_sqrt;\n  MatrixXd Kmat, GtWX, cvals, sum_stats, r_outer_sum;\n  SpMat GWs;\n\n  MatrixXd pvs_m = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);//skat\n  MatrixXd chisq_m = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd pvs_m_o = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);//skato\n  MatrixXd chisq_m_o = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd pvs_m_o_acat = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);//skato-acat\n  MatrixXd chisq_m_o_acat = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd pvs_m_acato = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);//acato\n  MatrixXd chisq_m_acato = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  MatrixXd pvs_m_a = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);//acatv\n  MatrixXd chisq_m_a = MatrixXd::Constant( n_pheno, all_snps_info.size(), -1);\n  sum_stats = MatrixXd::Constant(n_pheno, 2, -1); // chisq & logp\n  pvs_skato.resize(nrho);\n  chisq_skato.resize(nrho);\n  if(with_acato) p_acato.resize(nrho+1);\n  cvals.resize(nrho, 5);\n  skato_Qmin_rho.resize(nrho, 1);\n  flipped_skato_rho = 1 - rho_vec;\n  flip_rho_sqrt = flipped_skato_rho.sqrt();\n\n  ArrayXi snp_indices = get_true_indices(Jmat.rowwise().any());\n  int bs = snp_indices.size();\n\n  // slice sparse matrix (cannot use indexing)\n  SpMat Jstar (Jmat.rows(), bs); // M x Mall\n  Jstar.reserve(bs);\n  MatrixXd weights_ordered;\n  if(params.remeta_save_ld) {\n    weights_ordered.resize(bs, 1);\n  }\n  for(int i = 0; i < bs; i++) {\n    Jstar.insert(snp_indices(i), i) = weights(snp_indices(i));\n    if(params.remeta_save_ld) {\n        weights_ordered(i) = weights(snp_indices(i));\n    }\n  }\n  SpMat mat2 = mat * Jstar; // mat should be pretty sparse since major-ref\n  if (!mat2.isCompressed()) mat2.makeCompressed();\n  mat.setZero(); mat.resize(0,0); mat.data().squeeze(); // not needed anymore\n  Jstar.setZero(); Jstar.resize(0,0); Jstar.data().squeeze(); // not needed anymore\n  Svals.resize(bs, 1); // Mx1\n  Kmat.resize(bs, bs); // MxM\n\n  for(int ph = 0; ph < n_pheno; ph++) { \n    if( !params.pheno_pass(ph) ) continue;\n\n    MapcArXd Y (yraw.col(ph).data(), yraw.rows());\n    MapcArXb mask (masked_indivs.col(ph).data(), yraw.rows());\n    MapcArXd Wsqrt (m_ests.Gamma_sqrt.col(ph).data(), yraw.rows());\n    MapcMatXd XWsqrt (m_ests.X_Gamma[ph].data(), yraw.rows(), m_ests.X_Gamma[ph].cols());\n    MapcArXd phat (m_ests.Y_hat_p.col(ph).data(), yraw.rows());\n\n    // get score stats & kernel matrices\n    compute_vc_mats_bt(Svals, Kmat, XWsqrt, Wsqrt * mask.cast<double>(), yres.col(ph), mat2, GWs, GtWX);\n\n    // apply firth/spa corrections (set R=0 if failed)\n    masked_sites = (weights(snp_indices) > 0);\n    Rvec_sqrt = masked_sites.cast<double>();\n    if(apply_correction) {\n      correct_vcov(ph, snp_indices, weights(snp_indices), masked_sites, Rvec_sqrt, Svals, Kmat, mat2, GtWX, XWsqrt, GWs, Wsqrt, phat, Y, mask, fest, params);\n    }\n\n    #ifdef WITH_HTSLIB\n      if(params.remeta_save_ld && remeta_sumstats.skat_snplist->size() > 0) {\n        MatrixXd weight_inv = weights_ordered.array()\n                                      .inverse()\n                                      .matrix()\n                                      .asDiagonal();\n        MatrixXd unweighted_Kmat = weight_inv * Kmat * weight_inv;\n\n        if(remeta_sumstats.sparsity_threshold > 0) {\n          remeta_sumstats.skat_matrix_writers[ph].write_matrix_sparse(\n            unweighted_Kmat,\n            *remeta_sumstats.gene_name,\n            *remeta_sumstats.skat_snplist,\n            remeta_sumstats.sparsity_threshold\n          );\n        } else {\n          remeta_sumstats.skat_matrix_writers[ph].write_matrix_dense(\n            unweighted_Kmat,\n            *remeta_sumstats.gene_name,\n            *remeta_sumstats.skat_snplist\n          );\n        }\n      }\n    #endif\n\n    if(with_acatv) {\n      pvals.resize(Svals.size()); // Mx1\n      get_single_pvs_bt(pvals, masked_sites.select(Svals.square() / Kmat.diagonal().array(),1)); \n    }\n\n    // SKAT for all masks (Kmx1)\n    compute_skat_q(Qs, Qb, masked_sites.select(Svals, 0), Kmat, Jmat(snp_indices, all), debug);\n\n    for(size_t imask = 0; imask < all_snps_info.size(); imask++){\n\n      variant_block* block_info = &(all_snps_info[imask]);\n      if(debug) cerr << \"Mask : \" << block_info->mask_name << \"\\n\";\n      if(block_info->skip_for_vc) continue;\n\n      // get index of mask in Jmat\n      jcol = block_info->col_jmat_skat;\n      if(jcol < 0) continue; // this should not happen though\n      MapcArXb Jvec (Jmat.col(jcol).data(), Jmat.rows(), 1);\n      int npass = (Jvec(snp_indices) && masked_sites).count();\n      if(debug) cerr << \"#sites in mask=\" << npass << \"\\n\";\n      if(npass == 0) continue;\n\n      // subset to variants kept in mask\n      ArrayXi m_indices = get_true_indices(Jvec(snp_indices) && masked_sites); // across markers kept in skat tests\n      ArrayXi mall_indices = snp_indices(m_indices); // across all markers in set\n\n      // ACAT-V \n      if(with_acatv)\n        get_acatv_pv( ph, pvals(m_indices).matrix(), weights_acat(mall_indices), pvs_m_a(ph, imask), chisq_m_a(ph, imask), nl_dbl_dmin, debug); \n      if(!with_omnibus) continue;\n\n      // correct using burden test\n      double rfrac = 1;\n      if(apply_correction && !params.skip_cf_burden && (npass > 1)) {// no need if M=1\n\n        // to slice sparse matrix (cannot use indexing)\n        SpMat Jtmp (bs, npass); // Mall x Mpass\n        Jtmp.reserve(npass);\n        for(int i = 0; i < npass; i++)\n          Jtmp.insert(m_indices(i), i) = 1;\n\n        if(!correct_vcov_burden(ph, rfrac, Qb(jcol), Kmat(m_indices, m_indices).sum(), GtWX(all, m_indices), XWsqrt, GWs * Jtmp, Wsqrt, phat, Y, mask, fest.cov_blup_offset, params)) {\n          continue; // failed to correct with burden mask\n        }\n      }\n      if(apply_correction) block_info->cf_burden(ph) = rfrac;\n\n      // get eigen values of Zt(I-U)Z\n      if(!get_ztz_evals(rfrac * Kmat(m_indices, m_indices), r_outer_sum, gamma1, gamma2, gamma3, skat_lambda_tol, debug)) continue;\n\n      if(npass > 1){\n        get_skato_mom(skato_muQ, skato_fdavies, skato_sdQ, skato_dfQ, skato_tau, skato_lambdas, gamma1, gamma2, gamma3, rho_vec, debug);\n        if(skato_sdQ < 0) continue; // failed\n      }\n\n      Qopt = (Qs(jcol) * flipped_skato_rho + Qb(jcol) * rho_vec).matrix(); // Nrho x 1\n      if(debug) cerr << \"Q:\\n\" << std::setprecision(10) << Qopt.transpose() << \"\\n\";\n\n      for(int j = 0; j < nrho; j++){\n\n        // get eigen values of Rsqrt*ZtZ*Rsqrt\n        get_lambdas(lambdas, get_RsKRs(rfrac * Kmat(m_indices, m_indices), r_outer_sum, gamma1, rho_vec(j), flip_rho_sqrt(j)), skat_lambda_tol);\n        if(lambdas.size() == 0) continue;\n        //if(rho_vec(j) >0.9) cerr << \"rho=\" << rho_vec(j) << \"\\nL:\"<<lambdas.matrix().transpose() << \"\\n\";\n\n        // needed for skato (M>1)\n        if(npass > 1)  get_cvals(j, cvals, lambdas);\n\n        compute_fixed_skato_p(pvs_skato(j), chisq_skato(j), Qopt(j), rho_vec(j), lambdas, nl_dbl_dmin);\n\n        // store SKAT results\n        if(rho_vec(j)==0) {\n          pvs_m(ph, imask) = pvs_skato(j);\n          chisq_m(ph, imask) = chisq_skato(j);\n        }\n\n        if(npass == 1) break; // sum stats same for all rhos\n      }\n\n      if(npass == 1) { // same p for all tests\n        if(pvs_skato(0) < 0) continue; // go to next mask set\n        if(with_skato_int){\n          pvs_m_o(ph, imask) = pvs_skato(0);\n          chisq_m_o(ph, imask) = chisq_skato(0);\n        }\n        if(with_skato_acat){\n          pvs_m_o_acat(ph, imask) = pvs_skato(0);\n          chisq_m_o_acat(ph, imask) = chisq_skato(0);\n        }\n        if(with_acato){\n          pvs_m_acato(ph, imask) = pvs_skato(0);\n          chisq_m_acato(ph, imask) = chisq_skato(0);\n        }\n        continue;\n      }\n\n      // check pvs\n      if((pvs_skato < 0).any()) continue;\n\n      if(with_skato_acat){\n        if(debug) cerr << \"skato-acat logp=\" << pvs_skato.matrix().transpose() <<\"\\n\";\n        pvs_m_o_acat(ph, imask) = get_acat(pvs_skato);\n        get_chisq_stat_pv(tmpv, chisq_m_o_acat(ph, imask), pvs_m_o_acat(ph, imask), nl_dbl_dmin, log10_nl_dbl_dmin);\n      } \n      if(with_acato){ // include acatv pvalue\n        p_acato(0) = pvs_m_a(ph, imask);\n        p_acato.tail(nrho) = pvs_skato;\n        if(debug) cerr << \"acato logp=\" << p_acato.matrix().transpose() <<\"\\n\";\n        pvs_m_acato(ph, imask) = get_acat(p_acato);\n        get_chisq_stat_pv(tmpv, chisq_m_acato(ph, imask), pvs_m_acato(ph, imask), nl_dbl_dmin, log10_nl_dbl_dmin);\n      }\n      if(with_skato_int){\n        minp = max(nl_dbl_dmin, pow(10, -pvs_skato.maxCoeff())); // prevent underflow\n        get_Qmin(nrho, minp, skato_Qmin_rho, cvals);\n        if(debug) cerr << \"Qmin=\" << skato_Qmin_rho.matrix().transpose() << \"\\nminP=\" << minp <<\"; logp=\" << pvs_skato.matrix().transpose() <<\"\\n\";\n        // numerical integration\n        get_skato_pv(pvs_m_o(ph, imask), chisq_m_o(ph, imask), minp, nrho, nl_dbl_dmin, debug);\n      }\n\n    }\n  }\n\n  // store skat results\n  for(size_t imask = 0; imask < all_snps_info.size(); imask++){\n    variant_block* block_info = &(all_snps_info[imask]);\n    if(block_info->sum_stats_vc.size()>0) block_info->sum_stats_vc.clear();\n    if((pvs_m_a.col(imask).array() >= 0).any()){// acatv\n      sum_stats.col(0) = chisq_m_a.col(imask);\n      sum_stats.col(1) = pvs_m_a.col(imask);\n      block_info->sum_stats_vc[\"ACATV\"] = sum_stats;\n    }\n    if((pvs_m.col(imask).array() >= 0).any()){//skat\n      sum_stats.col(0) = chisq_m.col(imask);\n      sum_stats.col(1) = pvs_m.col(imask);\n      block_info->sum_stats_vc[\"SKAT\"] = sum_stats;\n    }\n    if((pvs_m_o_acat.col(imask).array() >= 0).any()){// skato-acat\n      sum_stats.col(0) = chisq_m_o_acat.col(imask);\n      sum_stats.col(1) = pvs_m_o_acat.col(imask);\n      block_info->sum_stats_vc[\"SKATO-ACAT\"] = sum_stats;\n    }\n    if((pvs_m_acato.col(imask).array() >= 0).any()){//acato\n      sum_stats.col(0) = chisq_m_acato.col(imask);\n      sum_stats.col(1) = pvs_m_acato.col(imask);\n      block_info->sum_stats_vc[\"ACATO\"] = sum_stats;\n    }\n    if((pvs_m_o.col(imask).array() >= 0).any()){//skato\n      sum_stats.col(0) = chisq_m_o.col(imask);\n      sum_stats.col(1) = pvs_m_o.col(imask);\n      block_info->sum_stats_vc[\"SKATO\"] = sum_stats;\n    }\n  }\n\n}\n\nvoid compute_vc_mats_bt(Ref<ArrayXd> Svals, Ref<MatrixXd> Kmat, const Ref<const MatrixXd>& XWsqrt, const Ref<const ArrayXd>& Wsqrt, const Ref<const MatrixXd>& yres, Ref<SpMat> Gmat, SpMat& GWs, MatrixXd& GtWX){\n\n  // multiply by sqrt(p(1-p)) and mask entries (NxM)\n  GWs = Wsqrt.matrix().asDiagonal() * Gmat; // NxM\n  GtWX = XWsqrt.transpose() * GWs ; // CxM\n\n  // get score stats for all variants (Mx1)\n  // yres is Wsqrt^{-1}(Y-pi)\n  Svals = (GWs.transpose() * yres).array();\n\n  // kernel matrix for all variants (MxM)\n  Kmat = - GtWX.transpose() * GtWX;\n  Kmat += GWs.transpose() * GWs; // ZtZ\n\n}\n\nvoid compute_skat_q(VectorXd& Qs, VectorXd& Qb, const Ref<const ArrayXd>& Svals, Ref<MatrixXd> Kmat, const Ref<const MatrixXb>& Jmat, bool const& debug){\n    \n    Qs = Jmat.transpose().cast<double>() * Svals.square().matrix();\n    // burden\n    Qb = (Jmat.transpose().cast<double>() * Svals.matrix()).array().square().matrix();\n\n    if(debug) cerr << \"Q_SKAT for all masks:\\n\" << Qs.transpose() << \"\\nQ_BURDEN for all masks:\\n\" << Qb.transpose() << \"\\n\";\n\n}\n\nvoid correct_vcov(int const& ph, const Ref<const ArrayXi>& indices, const Ref<const ArrayXd>& weights, Ref<ArrayXb> masked_sites, Ref<ArrayXd> Rvec_sqrt, const Ref<const ArrayXd>& score_stats, Ref<MatrixXd> Kmat, SpMat const& Gsparse, const Ref<const MatrixXd>& GtWX, const Ref<const MatrixXd>& XWsqrt, SpMat const& GWs, const Ref<const ArrayXd>& Wsqrt, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Y, const Ref<const ArrayXb>& mask, struct f_ests const& fest, struct param const& params){\n\n  apply_correction_cc(ph, indices, weights, Rvec_sqrt, score_stats, Kmat.diagonal().array(), Gsparse, GtWX, XWsqrt, GWs, Wsqrt, phat, Y, mask, fest, params, true);\n  if(params.debug) {\n    int bs = Rvec_sqrt.size();\n    cerr << \"Rsqrt:\" << Rvec_sqrt.head(min(150, bs)).matrix().transpose() << \"\\n\";\n  }\n\n  // apply correction factor\n  Kmat = Rvec_sqrt.matrix().asDiagonal() * Kmat * Rvec_sqrt.matrix().asDiagonal();\n  masked_sites = (Rvec_sqrt > 0);\n}\n\n// when using lovo with bts\nvoid check_cc_correction(SpMat& Gsparse, const Ref<const ArrayXd>& weights, const Ref<const MatrixXd>& X, struct ests const& m_ests, struct f_ests const& fest, const Ref<const MatrixXd>& yres, const Ref<const MatrixXd>& yraw, const Ref<const MatrixXb>& masked_indivs, struct param const& params){\n\n  // if no correction, return \n  if(!(params.firth || params.use_SPA)) return;\n\n  vc_Rvec_start.resize(weights.size(), params.n_pheno);\n  vc_Rvec_start.array().colwise() = (weights > 0).cast<double>();\n\n  ArrayXi indices;\n  ArrayXd Svals, varS;\n  MatrixXd GtWX, Kmat;\n  SpMat GWs;\n  Svals.resize(weights.size(), 1); // Mx1\n  Kmat.resize(weights.size(), weights.size()); // MxM\n\n  SpMat mat2 = Gsparse * weights.matrix().asDiagonal(); // include weights to G\n  if (!mat2.isCompressed()) mat2.makeCompressed();\n\n  // loop over each trait\n  for(int ph = 0; ph < params.n_pheno; ph++) { \n    if( !params.pheno_pass(ph) ) continue;\n\n    MapcArXd Y (yraw.col(ph).data(), yraw.rows());\n    MapcArXb mask (masked_indivs.col(ph).data(), yraw.rows());\n    MapcArXd Wsqrt (m_ests.Gamma_sqrt.col(ph).data(), yraw.rows());\n    MapcMatXd XWsqrt (m_ests.X_Gamma[ph].data(), yraw.rows(), m_ests.X_Gamma[ph].cols());\n    MapcArXd phat (m_ests.Y_hat_p.col(ph).data(), yraw.rows());\n\n    // get test stats with no correction\n    compute_vc_mats_bt(Svals, Kmat, XWsqrt, Wsqrt * mask.cast<double>(), yres.col(ph), mat2, GWs, GtWX);\n    varS = Kmat.diagonal().array();\n\n    // apply correction and store Rvecs\n    apply_correction_cc(ph, indices, weights, vc_Rvec_start.col(ph).array(), Svals, varS, mat2, GtWX, XWsqrt, GWs, Wsqrt, phat, Y, mask, fest, params, false);\n  }\n\n  if(params.debug) {\n    int bs = vc_Rvec_start.rows();\n    cerr << \"Rsqrt_start:\" << vc_Rvec_start.block(0, 0, min(150, bs), 1).transpose() << \"\\n\";\n  }\n\n}\n\n// correcting for high cc imbalance\nvoid apply_correction_cc(int const& ph, const Ref<const ArrayXi>& indices, const Ref<const ArrayXd>& weights, Ref<ArrayXd> Rvec, const Ref<const ArrayXd>& score_stats, const Ref<const ArrayXd>& var_score, SpMat const& Gsparse, const Ref<const MatrixXd>& GtWX, const Ref<const MatrixXd>& XWsqrt, SpMat const& GWs, const Ref<const ArrayXd>& Wsqrt, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Y, const Ref<const ArrayXb>& mask, struct f_ests const& fest, struct param const& params, bool const& check_rvec_start){\n\n  bool use_rvec_start = check_rvec_start && (vc_Rvec_start.size() > 0);\n  int npass = Rvec.sum();\n  int ncase = mask.select(Y,0).sum(), n = mask.count(); // approx as can't track samples with missing geno\n\n  // loop over the markers\n#if defined(_OPENMP)\n  setNbThreads(1);\n#pragma omp parallel for schedule(dynamic)\n#endif\n  for (int i = 0; i < Rvec.size(); i++) {\n    if(Rvec(i) == 0) continue;\n    \n    // if already pre-computed\n    if( use_rvec_start ){\n      int ix = indices(i); // match indices (in mask vs in set)\n      if( ix < vc_Rvec_start.rows() ) { // ignore ur masks\n        Rvec(i) = vc_Rvec_start(ix, ph);\n        continue;\n      }\n    }\n\n    bool test_fail = true;\n    double chisq, pv, corrected_var;\n\n    // if Tstat < threshold, no correction done (R=1)\n    double tstat_cur = score_stats(i) / sqrt(var_score(i));\n    if(fabs(tstat_cur) <= params.z_thr) continue;\n\n    MatrixXd Gres = - XWsqrt * GtWX.col(i); // get genotypic residuals\n    Gres += GWs.col(i);\n\n    if(params.use_SPA){ // SPA\n      run_SPA_test_snp(chisq, pv, tstat_cur, var_score(i), true, Gsparse.col(i), Gres.array(), phat, Wsqrt, mask, test_fail, params.tol_spa, params.niter_max_spa, params.missing_value_double, params.nl_dbl_dmin);\n    } else if(params.firth) { // Firth\n      // For rare variants, set entries in Gvec for non-carriers to 0\n      ArrayXi index_carriers;\n      int mac_thr_sparse = (params.skip_fast_firth ? 0 : 50), j = 0, index_j;\n      if((Gsparse.col(i).sum()/ weights(i)) < mac_thr_sparse) {\n        SpVec Gtmp = Gsparse.col(i)/ weights(i);\n        index_carriers.resize(Gtmp.nonZeros());\n        for (SpVec::InnerIterator it(Gtmp); it; ++it) {\n          index_j = it.index();\n          // check for small entries in G (eg with imputed data)\n          if(mask(index_j) && (it.value() > 1e-4)) index_carriers(j++) = index_j;\n        }\n        index_carriers.conservativeResize(j);\n      }\n      double bstart = 0;\n      // if homALT are not present, use estimated beta from firth with no covs as starting value\n      if((Gsparse.col(i).coeffs().maxCoeff()/ weights(i)) < 1.5){\n        int n11 = 0, n01 = 0;\n        SpVec Gtmp = Gsparse.col(i)/ weights(i);\n        for (SpVec::InnerIterator it(Gtmp); it; ++it) {\n          index_j = it.index();\n          if(!mask(index_j) || (it.value() < 0.5)) continue;\n          if(Y(index_j)) n11++;\n          else n01++;\n        }\n        int n10 = ncase - n11;\n        bstart = log((n11 + 0.5) * (n - n11 - n01 - n10 + 0.5) / (n01 + 0.5) / (n10 + 0.5));\n      }\n      // remove skat weights as it can lead to different model fit for ur masks\n      apply_firth_snp(test_fail, chisq, bstart, Gres.cwiseQuotient(Wsqrt.matrix()) / weights(i), index_carriers, Y, fest.cov_blup_offset.col(ph).array(), mask, params);\n    }\n\n    if( test_fail || (chisq == 0) ) { // set R to 0 for variant\n      Rvec(i) = 0;\n      continue;\n    }\n\n    if(params.debug) cerr << \"uncorrected: \" << tstat_cur * tstat_cur << \" [=(\" << score_stats(i) << \")^2/\" << var_score(i) << \"] -> \" << chisq << endl;\n\n    corrected_var = score_stats(i) * score_stats(i) / chisq;\n    Rvec(i) = sqrt(corrected_var / var_score(i));\n\n  }\n#if defined(_OPENMP)\n  setNbThreads(params.threads);\n#endif\n\n  int npass_post = (Rvec > 0).count();\n  if(npass_post < npass) cerr << \"WARNING: Firth/SPA correction failed for \" << npass - npass_post << \"/\" << npass << \" variants.\";\n\n}\n\n// firth wrapper \n/*\nvoid apply_firth_snp(bool& fail, double& lrt, const Ref<const MatrixXd>& Gvec, const Ref<const ArrayXd>& Y, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, struct param const& params) {\n\n  double dev0 = 0, dev;\n  ArrayXd betaold, se, etavec, pivec;\n  betaold = ArrayXd::Zero(1); // start at 0\n\n  fail = !fit_firth_pseudo(dev0, Y, Gvec, offset, mask, pivec, etavec, betaold, se, 1, dev, true, lrt, params.maxstep, params.niter_max_firth/2, params.numtol_firth, &params);\n\n  if(!fail) return;\n\n  betaold = 0; // start at 0\n  fail = !fit_firth_nr(dev0, Y, Gvec, offset, mask, pivec, etavec, betaold, se, 1, dev, true, lrt, params.maxstep, params.niter_max_firth/2, params.numtol_firth, &params);\n\n}\n*/\nvoid apply_firth_snp(bool& fail, double& lrt, double const& bstart, const Ref<const MatrixXd>& Gvec, const Ref<const ArrayXi>& index_carriers, const Ref<const ArrayXd>& Y, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, struct param const& params) {\n\n  double dev0 = 0, betaold = bstart, se;\n\n  // get dev0\n  ArrayXd pivec, wvec, Gvec_mask;\n  get_pvec(pivec, offset, params.numtol_eps);\n  dev0 = get_logist_dev(Y, pivec, mask);\n  if((index_carriers.size() > 0)) { // bug fix to use the right deviance fn if using approximate penalty based on carrier status\n    get_pvec(pivec, offset(index_carriers), params.numtol_eps);\n    get_wvec(pivec, wvec, mask(index_carriers));\n    Gvec_mask = Gvec.col(0)(index_carriers);\n  } else {\n    get_wvec(pivec, wvec, mask);\n    Gvec_mask = mask.select(Gvec.array(),0);\n  }\n  dev0 -= log( (Gvec_mask.square() * wvec).sum() );\n\n  fail = fit_firth_pseudo(dev0, Y, Gvec.col(0), offset, mask, index_carriers, betaold, se, lrt, params.maxstep, params.niter_max_firth/2, params.numtol_firth, &params); // try pseudo\n\n  if(!fail) return;\n\n  betaold = 0; // start at 0\n  fail = !fit_firth(dev0, Y, Gvec, offset, mask, index_carriers, betaold, se, lrt, params.maxstep, params.niter_max_firth/2, params.numtol_firth, &params); // try NR (slower)\n\n  if(!fail || (bstart == 0)) return;\n  betaold = bstart;\n  fail = !fit_firth(dev0, Y, Gvec, offset, mask, index_carriers, betaold, se, lrt, params.maxstep, params.niter_max_firth/2, params.numtol_firth, &params); // try NR (slower)\n}\n\nbool correct_vcov_burden(int const& ph, double& rfrac, double const& qb, double const& var_qb, const Ref<const MatrixXd>& GtWX, const Ref<const MatrixXd>& XWsqrt, SpMat const& GWs, const Ref<const ArrayXd>& Wsqrt, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Y, const Ref<const ArrayXb>& mask, const Ref<const MatrixXd>& offset, struct param const& params){\n\n  if(qb == 0) return true; // no need to apply it\n\n  // check T_burden\n  double tstat_cur = sqrt(qb / var_qb);\n  if(fabs(tstat_cur) <= params.z_thr) return true; // no need to apply it\n\n  SpVec g_burden; // not needed since not using fastSPA\n  bool test_fail = true;\n  double chisq, pv;\n  ArrayXi index_carriers;\n\n  // get residuals for burden mask\n  VectorXd g_res = GWs * VectorXd::Ones(GWs.cols()) - XWsqrt * GtWX.rowwise().sum(); // get mask residuals\n\n  if( params.use_SPA ){ // use SPA\n    run_SPA_test_snp(chisq, pv, tstat_cur, var_qb, false, g_burden, g_res.array(), phat, Wsqrt, mask, test_fail, params.tol_spa, params.niter_max_spa, params.missing_value_double, params.nl_dbl_dmin);\n    /*if(params.debug && !test_fail)\n      cerr << \"SPA // uncorrected: \" << tstat_cur * tstat_cur << \" -> \" << chisq <<\n      \";logp=\"<< pv << \";rfrac=\" << tstat_cur * tstat_cur / chisq << \"\\n\";*/\n\n  } else if( params.firth ){ // use firth\n    apply_firth_snp(test_fail, chisq, 0, g_res.cwiseQuotient(Wsqrt.matrix()), index_carriers, Y, offset.col(ph).array(), mask, params);\n    /*if(params.debug && !test_fail)\n      cerr << \"Firth // uncorrected: \" << tstat_cur * tstat_cur << \" -> \" << chisq <<\n      \";logp=\"<< pv << \";rfrac=\" << tstat_cur * tstat_cur / chisq << \"\\n\";*/\n  }\n\n  if( test_fail || (chisq == 0) ) {\n    if(params.debug) cerr << \"WARNING: failed to correct T_burden.\";\n    return false;\n  }\n\n  // need to make variance bigger (so bigger p-values) so take max\n  rfrac = max(1.0, tstat_cur * tstat_cur / chisq );\n  if(params.debug) cerr << \"T_burden=\" << tstat_cur << \";R_factor_burden:\" << rfrac << \"\\n\";\n\n  return true;\n}\n\nvoid get_single_pvs_bt(Ref<ArrayXd> pvals, const Ref<const ArrayXd>& chisq_vals){\n  //cerr << pvals.matrix().transpose() << \"\\n\\n\" << chisq_vals.matrix().transpose() << endl;\n  for(int isnp = 0; isnp < pvals.rows(); isnp++)\n    get_logp(pvals(isnp), chisq_vals(isnp));\n}\n\n\n/////////////////////\n/////////////////////\n///// General\n/////////////////////\n/////////////////////\n\nEigen::MatrixXd get_RsKRs(const Ref<const MatrixXd>& K, const double& rho, const double& c1){\n\n  int m = K.rows(); // M\n  double c2 = sqrt( 1 - rho + m * rho), gamma1;\n\n  VectorXd b = K.rowwise().sum(); // Mx1\n  gamma1 = b.sum();\n\n  return (\n      (1-rho) * K.array() + \n      c1 * (c2-c1)/m * (b.rowwise().replicate(m) + b.transpose().colwise().replicate(m)).array() + // last term is outer sum of b\n      (c2-c1)/m * (c2-c1)/m * gamma1\n      ).matrix();\n}\n\nEigen::MatrixXd get_RsKRs(const Ref<const MatrixXd>& K, const Ref<const MatrixXd>& b_outer_sum, const double& gamma1, const double& rho, const double& c1){\n\n  int m = K.rows(); // M\n  double c2 = sqrt( 1 - rho + m * rho);\n\n  return (\n      (1-rho) * K.array() + \n      c1 * (c2-c1)/m * b_outer_sum.array() +\n      (c2-c1)/m * (c2-c1)/m * gamma1\n      ).matrix();\n}\n\nvoid get_lambdas(VectorXd& lambdas, const Ref<const MatrixXd>& K, const double& tol){\n\n  if(K.rows() == 1) { // K is scalar (e.g. single variant in set)\n    lambdas = VectorXd::Constant(1, K(0,0));\n    return;\n  }\n\n  // eigenvalues sorted in increasing order\n  SelfAdjointEigenSolver<MatrixXd> esK(K, EigenvaluesOnly);\n  // ignore zero eigen-values (matrix is psd)\n  //int nonzero = (esK.eigenvalues().array() > esK.eigenvalues().tail(1)(0) * tol).count();\n  // use filter strategy in R SKAT\n  int nng = (esK.eigenvalues().array() >= 0).count();\n  int nonzero = (esK.eigenvalues().array() > ( (esK.eigenvalues().array() >= 0).select(esK.eigenvalues().array(),0).sum() / nng * tol) ).count();\n  lambdas = esK.eigenvalues().tail(nonzero);\n}\n\nvoid compute_fixed_skato_p(double& pval, double& chival, double const& Qs, double const& Qb, double const& rho, VectorXd& lambdas, const double& tol, bool const& debug){\n\n  double q = (1 - rho) * Qs + rho * Qb;\n  if(debug) cerr << \"Q:\" << q << \"\\n\";\n\n  if( (rho == 1) || (lambdas.size() == 1) ){ // burden or single variant\n    chival = q / lambdas.tail(1)(0);\n    get_logp(pval, chival); \n  } else compute_skat_pv(pval, chival, q, lambdas, tol);\n\n}\n\nvoid compute_fixed_skato_p(double& pval, double& chival, double& q, double const& rho, VectorXd& lambdas, const double& tol){\n\n  if( (rho == 1) || (lambdas.size() == 1) ){ // burden or single variant\n    chival = q / lambdas.tail(1)(0);\n    get_logp(pval, chival); \n  } else compute_skat_pv(pval, chival, q, lambdas, tol);\n\n}\n\nvoid compute_skat_pv(double& logp, double& chival, double const& Q, VectorXd& lambdas, const double& tol){\n  // use log10P directly to handle small pvalues\n  logp = get_chisq_mix_logp(Q, lambdas, chival);\n}\n\n// returns p-value or -1\ndouble get_chisq_mix_pv(double const& q, const Ref<const VectorXd>& lambdas){\n\n  double pv, pv_davies_thr = 1e-5; // davies can be unreliable if pv is too small\n\n  // re-scale so that max lambda is 1 (lambda is sorted)\n  double newQ = q / lambdas.tail(1)(0);\n  VectorXd newL = lambdas / lambdas.tail(1)(0);\n  //cerr << \"Qval= \" << newQ << \"\\n\";\n  // exact\n  pv = get_davies_pv(newQ, newL, false);\n  //cerr << \"davies: \" << pv << \"\\n\";\n\n  // if failed or is very low, use SPA\n  if(pv <= pv_davies_thr){ \n    pv = get_kuonen_pv(newQ, newL); // SPA\n    //cerr << \"kuonen: \" << pv << \"\\n\";\n    if(pv <= 0) {// if SPA failed\n      pv = get_davies_pv(newQ, newL, true); // use Davies with stringent parameters\n      //cerr << \"davies strict: \" << pv << \"\\n\";\n      if(pv <= 0) {\n        pv = get_liu_pv(newQ, newL); // only use mod Liu if Davies/SPA failed\n        //cerr << \"liu: \" << pv << \"\\n\";\n      }\n    }\n  }\n\n  if((boost::math::isnan)(pv) || !(boost::math::isnormal)(pv)) return -1;\n  return pv;\n\n}\n\n// get log10p or -1\ndouble get_chisq_mix_logp(double const& q, const Ref<const VectorXd>& lambdas, double& chival){\n\n  double logp, pv, pv_davies_thr = 1e-5; // davies can be unreliable if pv is too small\n  double nl_dbl_dmin = 10.0 * std::numeric_limits<double>::min();\n  double log10_nl_dbl_dmin = -log10(nl_dbl_dmin);\n\n  // re-scale so that max lambda is 1 (lambda is sorted)\n  double newQ = q / lambdas.tail(1)(0);\n  VectorXd newL = lambdas / lambdas.tail(1)(0);\n  //cerr << \"Qval= \" << newQ << \"\\n\";\n  // exact\n  pv = get_davies_pv(newQ, newL, false);\n  //cerr << \"davies: \" << pv << \"\\n\";\n\n  // if failed or is very low, use SPA\n  if(pv <= pv_davies_thr){ \n    pv = get_kuonen_pv(newQ, newL); // SPA\n    //cerr << \"kuonen: \" << pv << \"\\n\";\n\n    if(pv <= 0) {// if SPA failed\n      pv = get_davies_pv(newQ, newL, true); // use Davies with stringent parameters\n      //cerr << \"davies strict: \" << pv << \"\\n\";\n\n      if(pv <= 0) {\n        logp = get_liu_pv(newQ, newL, chival); // only use mod Liu if Davies/SPA failed\n        // get corresponding test stat for chisq(1)\n        get_chisq_stat_pv(pv, chival, logp, nl_dbl_dmin, log10_nl_dbl_dmin);\n        //cerr << \"liu: \" << logp << \"\\n\";\n      } else get_logp(pv, logp, chival, nl_dbl_dmin);\n\n    } else get_logp(pv, logp, chival, nl_dbl_dmin);\n\n  } else get_logp(pv, logp, chival, nl_dbl_dmin); \n\n  if(logp < 0) chival = -1;\n\n  return logp;\n\n}\n\n// return 1-F(x) for chisq mixture\ndouble get_davies_pv(double const& q, Ref<VectorXd> lambdas, bool const& force_stringent){\n\n  // use default lim/acc values from CompQuadForm R package and SKAT resp.\n  int k = lambdas.size(), ifault = 0, lim = 1e4; // p & error\n  double cdf, pv, acc1 = 1e-6;\n  if(force_stringent){ lim=1e6; acc1 = 1e-9;}\n  ArrayXd nc = ArrayXd::Constant(k, 0); // ncp\n  ArrayXi df = ArrayXi::Constant(k, 1); // df\n  ArrayXd tr = ArrayXd::Constant(7, 0); // params for qf\n\n  try {\n    cdf = qf(lambdas.data(), nc.data(), df.data(), k, 0, q, lim, acc1, tr.data(), &ifault); \n    pv = 1 - cdf;\n  } catch (...){\n    return -1;\n  }\n  //cerr << \"Davies p=\" << pv << \"\\n\";\n\n  if((ifault != 0) || (pv <= 0) || (pv > 1))\n    return -1;\n\n  return pv;\n}\n\n// return 1-F(x) for chisq mixture\ndouble get_kuonen_pv(const double& q, const Ref<const VectorXd>& L){\n\n  bool success = false;\n  double pv, t_root = -1;\n  MapcArXd lambdas (L.data(), L.size(), 1);\n  //cerr << \"q=\" << q << \"\\ntop lambdas=\" << L.tail(6) << \"\\n\\n\";\n\n  // lambdas are sorted in increasing order (from eigen)\n  double tmin = get_tmin_lambda(q, lambdas);\n  double tmax = get_tmax_lambda(lambdas);\n  //cerr << \"(\" << tmin << \",\" << tmax << \")\\n\\n\";\n  if(tmax < tmin) return -1;\n\n  solve_kp(success, t_root, q, tmin, tmax, lambdas);\n  if(!success) return -1;\n\n  pv = get_spa_pv(t_root, q, lambdas);\n  //cerr << \"SPA p=\" << pv << \"\\n\";\n\n  if((pv <= 0) || (pv > 1)) return -1;\n\n  return pv;\n}\n\ndouble get_tmin_lambda(const double& q, const Ref<const ArrayXd>& lambdas){\n  if(lambdas(0) < 0) // not applicable here since matrix is psd\n    return 1 / (2 * lambdas(0));\n  else if(q > lambdas.sum())\n    return 0;\n  else\n    return -0.5 * lambdas.size() / q;\n}\n\ndouble get_tmax_lambda(const Ref<const ArrayXd>& lambdas){\n  //return 1 / (2 * lambdas.tail(1)(0));\n  return 0.5 - 1e-8; // lambdas are re-scaled so max=1\n}\n\nvoid solve_kp(bool& success, double& t_new,const double& q,const double& tmin,const double& tmax, const Ref<const ArrayXd>& lambdas){\n\n  int niter_cur = 0, niter_max = 1e3;\n  double min_x, max_x, t_old, f_old, f_new, hess, tol = 1e-8;\n\n  min_x = tmin, max_x = tmax;\n  t_old = min_x;\n  // check sign switches\n  if(!valid_bounds(f_old, min_x, tmax, q, lambdas)) {\n    success = false;\n    return;\n  }\n\n  while( niter_cur++ < niter_max ){\n\n    hess = Kpp_lambda(t_old,lambdas);\n    t_new = t_old - f_old / hess;\n    f_new = Kp_lambda(t_new,lambdas) - q;\n\n    //cerr << \"#\" << niter_cur << \": t=\" << t_old << \"->\" << t_new << \" f(t)=\" << f_new << \"; bounds = (\" << min_x << \",\" << max_x << \")\\n\";\n    if( fabs( f_new ) < tol ) break;\n\n    // update bounds on root\n    if( (t_new > min_x) && (t_new < max_x) ){\n      if( f_new > 0) max_x = t_new;\n      else min_x = t_new;\n    } else { // bisection method if t_new went out of bounds and re-compute f_new\n      t_new = ( min_x + max_x ) * 0.5;\n      f_new = Kp_lambda(t_new,lambdas) - q;\n      if(f_new <= 0) min_x = t_new; // reduce interval\n      else max_x = t_new;\n    }\n\n    t_old = t_new;\n    f_old = f_new;\n  }\n\n  // If didn't converge\n  success = niter_cur <= niter_max;\n  //cerr << \"#iterations = \" << niter_cur << \"; f= \" << f_new << endl;\n\n}\n\nbool valid_bounds (double& fmin, double const& tmin, double const& tmax, const double& q, const Ref<const ArrayXd>& lambdas){ \n\n  fmin = Kp_lambda(tmin,lambdas) - q;\n  double fmax = Kp_lambda(tmax,lambdas) - q;\n\n  return ((fmin<=0) && (fmax>=0));\n}\n\ndouble K_lambda (const double& t, const Ref<const ArrayXd>& lambdas){ \n  return -0.5 * (1 - 2 * t * lambdas).log().sum();\n}\n\ndouble Kp_lambda (const double& t, const Ref<const ArrayXd>& lambdas){ \n  return (lambdas / (1 - 2 * t * lambdas)).sum();\n}\n\ndouble Kpp_lambda (const double& t, const Ref<const ArrayXd>& lambdas){ \n  return (( 2 * lambdas.square()) / (1 - 2 * t * lambdas).square()).sum();\n}\n\ndouble get_spa_pv(const double& root,const double& q, const Ref<const ArrayXd>& lambdas){\n\n  double u,w,r,tmp;\n  normal nd(0,1);\n\n  tmp = 2 * (q * root - K_lambda(root, lambdas));\n  if(tmp <= 0) return -1;\n  w = sgn(root) * sqrt( tmp );\n  tmp = Kpp_lambda(root, lambdas);\n  if(tmp <= 0) return -1;\n  u = root * sqrt( tmp );\n  if( fabs(u) < 1e-4 ) return -1;\n\n  r = w + log(u/w) / w;\n  if((boost::math::isnan)(r) || !(boost::math::isnormal)(r)) return -1;\n  return cdf(complement(nd, r));\n\n}\n\ndouble get_liu_pv(double const& q, const Ref<const VectorXd>& lambdas, const bool& lax){\n\n  ArrayXd cvals(6);\n  get_cvals(cvals, lambdas);\n  //cerr << \"cvals liu=\" << cvals.matrix().transpose() << endl;\n  \n  double pv;\n  double tstar = (q - cvals(0)) * cvals(1);\n  double val = tstar * cvals(3) + cvals(2);\n\n  if(val < 0) return -1;\n  if((boost::math::isnan)(cvals(4)) || !(boost::math::isnormal)(cvals(4))) return -1;\n\n  // 0 ncp gives strange behavior with non_central_chi_squared (returns -cdf instead of 1-cdf)\n  if(cvals(5) == 0) pv = cdf(complement(chi_squared(cvals(4)), val));\n  else  pv = cdf(complement(non_central_chi_squared(cvals(4), cvals(5)), val));\n  //cerr << \"pv liu=\" << val << \" -> \" << pv << endl;\n\n  if(!lax && ((pv <= 0) || (pv > 1))) return -1;\n  else if(lax && ((pv < 0) || (pv > 1))) return -1;\n  return pv;\n}\n\n\ndouble get_liu_pv(double const& q, const Ref<const VectorXd>& lambdas, double& chival){\n\n  ArrayXd cvals(6);\n  get_cvals(cvals, lambdas);\n  //cerr << \"cvals liu=\" << cvals.matrix().transpose() << endl;\n  \n  double pv, logpv;\n  double tstar = (q - cvals(0)) * cvals(1);\n  double val = tstar * cvals(3) + cvals(2);\n\n  //cerr << \"liu val = \" << val << \" \";\n  if(val < 0) {\n    chival = -1;\n    return -1;\n  }\n  if((boost::math::isnan)(cvals(4)) || !(boost::math::isnormal)(cvals(4))) return -1;\n\n  // 0 ncp gives strange behavior with non_central_chi_squared (returns -cdf instead of 1-cdf)\n  if(cvals(5) == 0) get_logp(logpv, val, cvals(4));\n  else  {\n    pv = cdf(complement(non_central_chi_squared(cvals(4), cvals(5)), val));\n    logpv = ( ((pv <= 0) || (pv > 1)) ? -1.0 : -log10(pv) );\n  }\n\n  //cerr << \"; params = ( \"  << std::setprecision(10) << cvals(4) << \", \" << cvals(5) << \") -> logpv liu = \" << logpv << \"\\n\";\n\n  if(logpv < 0) chival = -1;\n  chival = val;\n  return logpv;\n}\n\nbool get_ztz_evals(const Ref<const MatrixXd>& Kmat, MatrixXd& outer_sum, double& gamma1, double& gamma2, double& gamma3, double const& skat_lambda_tol, bool const& debug){\n\n  VectorXd ZtZ_rsum = Kmat.rowwise().sum();//Kmx1\n  outer_sum = ZtZ_rsum.rowwise().replicate(Kmat.cols()) + ZtZ_rsum.transpose().colwise().replicate(Kmat.rows());\n  gamma1 = ZtZ_rsum.sum();\n  gamma2 = ZtZ_rsum.squaredNorm();\n  gamma3 = ZtZ_rsum.dot( Kmat * ZtZ_rsum);\n  get_lambdas(skato_lambdas, Kmat - ZtZ_rsum * (ZtZ_rsum/gamma1).transpose(), skat_lambda_tol);\n  if(skato_lambdas.size() == 0) return false;\n  if(debug) {\n    int bs = skato_lambdas.size();\n    cerr << \"L:\\n\" << skato_lambdas.head(min(150,bs)).transpose() << \"\\n\";\n  }\n\n  return true;\n}\n\nvoid get_skato_mom(double& mu, double& sc_fac, double& sd, double& df, ArrayXd& tau, const Ref<const VectorXd>& lambdas, double const& gamma1, double const& gamma2, double const& gamma3, const Ref<const ArrayXd>& rho, bool const& debug){\n\n  double v0, ve, vq;\n\n  mu = lambdas.sum();\n  v0 = 2 * lambdas.squaredNorm();\n  ve = 4 * (gamma3/gamma1 - gamma2*gamma2/gamma1/gamma1);\n  vq = v0 + ve;\n  if(vq < 0){sd = -1; return;}\n  sd = sqrt(vq);\n  sc_fac = sqrt( v0 / vq );\n  df = 0.5 * 0.5 * v0 * v0 / lambdas.array().pow(4).sum();\n  if(debug) cerr << \"[muQ, scFac, sd, df, v0, vq]= [\" << mu << \" \" << sc_fac << \" \" << sd << \" \" << df << \" \" << v0 << \" \" << vq << \" ]\\n\";\n  tau = gamma1 * rho + gamma2/gamma1 * (1-rho);\n  if(debug) cerr << \"tau=\" << tau.matrix().transpose() << \"\\n\";\n\n}\n\nvoid get_cvals(int const& irho, Ref<MatrixXd> cvals, const Ref<const VectorXd>& lambdas){\n\n  double s1, s1_sq, s2, a, dlt;\n\n  cvals(irho, 0) = lambdas.sum();\n  cvals(irho, 1) = lambdas.squaredNorm();\n  cvals(irho, 2) = lambdas.array().pow(3).sum();\n  cvals(irho, 3) = lambdas.array().pow(4).sum();\n  s1 = cvals(irho, 2) / cvals(irho, 1) / sqrt(cvals(irho, 1));\n  s1_sq = s1 * s1;\n  s2 = cvals(irho, 3) / (cvals(irho, 1) * cvals(irho, 1));\n  if(s1_sq <= s2)\n    cvals(irho, 4) = 1 / s2;\n  else {\n    a = 1 / (s1 - sqrt(s1_sq - s2));\n    dlt = (s1 * a - 1) * a * a;\n    cvals(irho, 4) = a * a - 2 * dlt;\n  }\n\n}\n\nvoid get_cvals(Ref<ArrayXd> cvals, const Ref<const VectorXd>& lambdas){\n\n  // cvals = [muQ, invsQ, muX, sX, df, ncp]\n  double c1, c2, c3, c4, s1, s1_sq, s2, df, ncp, a;\n\n  c1 = lambdas.sum();\n  c2 = lambdas.squaredNorm();\n  c3 = lambdas.array().pow(3).sum();\n  c4 = lambdas.array().pow(4).sum();\n  s1 = c3 / c2 / sqrt(c2);\n  s1_sq = s1 * s1;\n  s2 = c4 / (c2 * c2);\n  if(s1_sq <= s2) {\n    df = 1 / s2;\n    a = sqrt(df);\n    ncp = 0;\n  } else {\n    a = 1 / (s1 - sqrt(s1_sq - s2));\n    ncp = (s1 * a - 1) * a * a;\n    df = a * a - 2 * ncp;\n  }\n\n  cvals(0) = c1; //muQ\n  cvals(1) = 1 / sqrt(2 * c2); //invsQ\n  cvals(2) = df + ncp; // muX\n  cvals(3) = sqrt(2) * a; // sX\n  cvals(4) = df;\n  cvals(5) = ncp;\n\n}\n\nvoid get_Qmin(int const& nrho, double& pmin, Ref<ArrayXd> Qmin, const Ref<const MatrixXd>& cvals){\n  for(int j = 0; j < nrho; j++){\n    chi_squared chisq( cvals(j, 4) );\n    Qmin(j) = cvals(j, 0) + (quantile(complement(chisq, pmin)) - cvals(j, 4)) * sqrt(cvals(j, 1)/cvals(j, 4)) ;\n  }\n  skato_upper = ((Qmin + flipped_skato_rho * skato_muQ * (1 - skato_fdavies) / skato_fdavies)/skato_tau).minCoeff();\n}\n\ndouble SKATO_integral_fn(double* x){ // variables used beside x are global\n\n  double val = ((skato_Qmin_rho - skato_tau * (*x)) / flipped_skato_rho).minCoeff();\n  double S, dlt;\n  chi_squared chisq1( 1 );\n\n  if(skato_state == 1) return 0; // skip if failed for other x values\n  if(*x == 0) {skato_state = 1; return 0;} // failed\n\n  // get first term in integral (1-cdf)\n  if( val > (skato_muQ * 1e4) ) S = 0; // value check from SKAT R package\n  else {\n    dlt = (val - skato_muQ) * skato_fdavies + skato_muQ;\n    if(dlt <= 0) S = 1;\n    else{\n      S = get_chisq_mix_pv(dlt, skato_lambdas);\n      //cerr << *x << \" \" << S << \" \" << val << \" \" << skato_muQ << \" \" << skato_sdQ << \" \" << skato_dfQ << endl;\n\n      if(S <= 0) { // failed\n        skato_state = 1; \n        return 0;\n      } else if(S >= 1) S = 1;\n    }\n  }\n\n  return S * pdf(chisq1, *x);\n\n}\n\ndouble SKATO_integral_fn_liu(double* x){ // variables used beside x are global\n\n  double val = ((skato_Qmin_rho - skato_tau * (*x)) / flipped_skato_rho).minCoeff();\n  double S, dlt;\n  chi_squared chisq1( 1 );\n\n  if(skato_state == 1) return 0; // skip if failed for other x values\n  if(*x == 0) {skato_state = 1; return 0;} // failed\n\n  if((boost::math::isnan)(skato_dfQ) || !(boost::math::isnormal)(skato_dfQ)) return -1;\n  chi_squared chisqL( skato_dfQ );\n\n  // get first term in integral\n  dlt = (val - skato_muQ) / skato_sdQ * sqrt(2*skato_dfQ) + skato_dfQ;\n  if(dlt<0) return 0; // cdf=0\n  S = cdf(complement(chisqL, dlt));\n  //cerr << *x << \" \" << S << \" \" << val << \" \" << skato_muQ << \" \" << skato_sdQ << \" \" << skato_dfQ << endl;\n\n  return S * pdf(chisq1, *x);\n\n}\n\n\n// for skato num int\nvoid integrate(double f(double*), double& pv, int const& subd, bool const& debug){\n\n  int neval, ierror, ilimit = subd, last; \n  int lenw = 4 * ilimit;\n  double lower = 0, upper = skato_upper, epsabs = 1e-25, epsrel = pow(std::numeric_limits<double>::epsilon(), .25), result, abserr;\n  VectorXi iwork = VectorXi::Zero(ilimit);\n  VectorXd work = VectorXd::Zero(lenw);\n  skato_state = 0;\n\n  dqags_(f, &lower, &upper, &epsabs, &epsrel, &result, &abserr, &neval, &ierror, &ilimit, &lenw, &last, iwork.data(), work.data());\n  if(ierror != 0) skato_state = 1;\n  if((boost::math::isnan)(result) || !(boost::math::isnormal)(result)) skato_state = 1;\n  if(debug) {\n    cerr << \"Niter=\" << neval << \";integral=\" << result << \"Abs.error=\" << abserr << \";rel.error=\" << epsrel <<  \";fail=\"<< skato_state << \"/\" << ierror << \"\\n\";\n    if(skato_state == 0) for(int i = 1; i < 6; i++) {lower=skato_upper*0.2*i;cerr << \"g(\" << lower << \")=\" << f(&lower) << \" \";}\n  }\n\n  if (skato_state != 0)  pv = -1; \n  else pv = result;\n\n} \n\nvoid get_skato_pv(double &logp, double& chisq, double const& minp, int const& nrhos, double const& nl_dbl_dmin, bool const& debug){\n\n  double a, p_bc = minp * nrhos;\n  chi_squared chisq1( 1 );\n  double tstar = cdf(complement(chisq1, skato_upper)); \n\n  if(minp >= (1 - std::numeric_limits<float>::epsilon())) {logp = 0; chisq=0; return;}\n\n  integrate(SKATO_integral_fn, a, 1000, debug);\n  if(debug) cerr << \"SKATO p=\" << (skato_state == 0 ? (a+tstar) : -1) << \"=\" << a << \"+\" << tstar  << \" (minP=\"<< minp <<\"; Bonf=\" << p_bc << \")\\n\";\n  if(skato_state == 0) a += tstar; // add s(q*) to integral\n\n  if( p_bc < a ) a = p_bc; // bonferroni corrected p\n  else if( (a <= 0) && (p_bc <= 1) ) a = p_bc; // if integrate function failed\n\n  if(a <= 0) {logp = -1; return;} // if pmin=0\n  \n  get_logp(a, logp, chisq, nl_dbl_dmin); \n\n}\n\n// print sum_stats\nvoid print_vc_sumstats(int const& snp_index, string const& test_string, string const& wgr_string, variant_block* block_info, vector<snp> const& snpinfo, struct in_files const& files, struct param const* params){\n\n  int print_index;\n  string header;\n  std::map <std::string, MatrixXd>::iterator itr;\n  if(!params->htp_out) header = print_sum_stats_head(snp_index, snpinfo);\n\n  for (itr = block_info->sum_stats_vc.begin(); itr != block_info->sum_stats_vc.end(); ++itr) {\n    // for each pheno\n    for(int i = 0; i < params->n_pheno; i++) { // col 0 = chisq, col 1 = logp\n\n      // make sure results for test are all on same line\n      print_index = params->split_by_pheno ? i : 0;\n\n      if(itr->second(i, 1) >= 0) {\n        std::ostringstream buffer;\n\n        if(params->htp_out) \n          buffer << print_sum_stats_head_htp(snp_index, files.pheno_names[i], test_string + wgr_string + \"-\" + itr->first, snpinfo, params) << print_sum_stats_htp(-1, -1, itr->second(i, 0), itr->second(i, 1), -1, -1, -1, block_info->genocounts, i, true, 1, params, params->missing_value_double, -1, ( (params->firth || params->use_SPA) && ((itr->first == \"SKATO-ACAT\") || (itr->first == \"SKATO\")) ) ? block_info->cf_burden(i): -1.0, params->missing_value_double);\n        else \n          buffer << (!params->split_by_pheno && (i>0) ? \"\" : header) << print_sum_stats(-1,-1,-1,-1,-1, -1, params->pheno_counts.row(i).sum(), params->pheno_counts(i, 0), params->pheno_counts(i, 1), test_string + \"-\" + itr->first, -1, -1, itr->second(i, 0), itr->second(i, 1), true, 1, params, (i+1));\n\n        block_info->sum_stats[print_index].append( buffer.str() );\n      } else if(!params->split_by_pheno) // print NA sum stats\n        block_info->sum_stats[print_index].append( print_na_sumstats(i, 1, header, test_string + \"-\" + itr->first, block_info, *params) );\n\n    }\n  }\n\n}\n\nvoid check_sizes(SpMat const& Gmat_sp, SpMat const& Gmat_sp_urm, \n    //const Ref<const MatrixXd>& Gmat, \n    const Ref<const MatrixXb>& Jmat){\n\n  cerr << \"Printing sizes of SKAT objects\\n\" <<\n    \"-Gsparse = \" << sizeof(double) * Gmat_sp.nonZeros() / 1024.0 / 1024.0 << \"MB\\n\" <<\n    \"-Jmat = \" << sizeof(Jmat(0,0)) * Jmat.size() / 1024.0 / 1024.0 << \"MB\\n\" <<\n    \"-Gmat_sp_ur = \" << sizeof(double) * Gmat_sp_urm.nonZeros() / 1024.0 / 1024.0 << \"MB\\n\";\n\n}\n"
  },
  {
    "path": "src/SKAT.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef SKAT_H\n#define SKAT_H\n\n// SKAT\nvoid update_vc_gmat(SpMat&,Eigen::ArrayXd&,Eigen::ArrayXd&,ArrayXb&,const int&,const int&,struct param const&,const Eigen::Ref<const ArrayXb>&,Eigen::Ref<Eigen::MatrixXd>,std::vector<variant_block>&,const Eigen::Ref<MatrixXb>);\nvoid update_vc_gmat(SpMat&,Eigen::ArrayXd&,Eigen::ArrayXd&,SpMat const&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct param const&);\nbool get_custom_weights(std::string const&,Eigen::Ref<Eigen::ArrayXd>,std::vector<snp>&,std::vector<uint64> const&);\nbool get_custom_weights(std::string const&,Eigen::Ref<Eigen::ArrayXd>,std::vector<snp>&,const Eigen::Ref<const Eigen::ArrayXi>&,std::vector<uint64> const&);\nvoid compute_vc_masks(SpMat&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<Eigen::ArrayXd>,SpMat&,Eigen::Ref<MatrixXb>,const Eigen::Ref<const Eigen::MatrixXd>&, struct ests const&,struct f_ests const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,MatrixXb&,std::vector<variant_block>&,const Eigen::Ref<const ArrayXb>&,struct param const&,struct remeta_sumstat_writer&);\nvoid prep_ultra_rare_mask(SpMat&,Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<Eigen::ArrayXd>,SpMat&,Eigen::Ref<MatrixXb>,MatrixXb&,const Eigen::Ref<const ArrayXb>&,struct param const&);\nvoid compute_vc_masks_qt(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,std::vector<variant_block>&,struct param const&,struct remeta_sumstat_writer&);\nvoid compute_vc_masks_qt_fixed_rho(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,std::vector<variant_block>&,double const&,double const&,double const&,uint const&,bool const&,struct param const&,struct remeta_sumstat_writer&);\nvoid compute_vc_masks_qt(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,std::vector<variant_block>&,const Eigen::Ref<const Eigen::ArrayXd>&,double const&,double const&,uint const&,bool const&,struct param const&,struct remeta_sumstat_writer&);\nvoid compute_vc_mats_qt(Eigen::Ref<Eigen::MatrixXd>,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<SpMat>);\nvoid compute_skat_q(Eigen::MatrixXd&,Eigen::MatrixXd&,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::MatrixXd>&,Eigen::MatrixXd&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const MatrixXb>&,bool const&,bool const&);\nvoid get_acatv_pv(int const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,double&,double&,double const&,bool const&);\nvoid get_single_pvs(Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::MatrixXd>&);\n\nvoid compute_vc_masks_bt(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct ests const&,struct f_ests const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const MatrixXb>&,std::vector<variant_block>&,struct param const&,struct remeta_sumstat_writer&);\nvoid compute_vc_masks_bt_fixed_rho(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct ests const&,struct f_ests const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const MatrixXb>&,std::vector<variant_block>&,double const&,double const&,double const&,bool const&,uint const&,bool const&,struct param const&,struct remeta_sumstat_writer&);\nvoid compute_vc_masks_bt(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct ests const&,struct f_ests const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,const Eigen::Ref<const MatrixXb>&,std::vector<variant_block>&,const Eigen::Ref<const Eigen::ArrayXd>&,double const&,double const&,bool const&,uint const&,bool const&,struct param const&,struct remeta_sumstat_writer&);\nvoid get_single_pvs_bt(Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const Eigen::ArrayXd>&);\nEigen::MatrixXd get_RsKRs(const Eigen::Ref<const Eigen::MatrixXd>&,const double&,const double&);\nEigen::MatrixXd get_RsKRs(const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const double&,const double&,const double&);\nvoid get_lambdas(Eigen::VectorXd&,const Eigen::Ref<const Eigen::MatrixXd>&,const double&);\nvoid compute_fixed_skato_p(double&,double&,double const&,double const&,double const&,Eigen::VectorXd&,const double&,bool const&);\nvoid compute_fixed_skato_p(double&,double&,double&,double const&,Eigen::VectorXd&,const double&);\nvoid compute_skat_pv(double&,double&,double const&,Eigen::VectorXd&,const double&);\ndouble get_chisq_mix_pv(double const&,const Eigen::Ref<const Eigen::VectorXd>&);\ndouble get_chisq_mix_logp(double const&,const Eigen::Ref<const Eigen::VectorXd>&,double&);\ndouble get_davies_pv(double const&,Eigen::Ref<Eigen::VectorXd>,bool const&);\ndouble get_kuonen_pv(const double&,const Eigen::Ref<const Eigen::VectorXd>&);\ndouble get_liu_pv(const double&,const Eigen::Ref<const Eigen::VectorXd>&,const bool& lax = false);\ndouble get_liu_pv(const double&,const Eigen::Ref<const Eigen::VectorXd>&,double&);\ndouble get_tmin_lambda(const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble get_tmax_lambda(const Eigen::Ref<const Eigen::ArrayXd>&);\nvoid solve_kp(bool&,double&,const double&,const double&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\nbool valid_bounds(double&,double const&,double const&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble K_lambda(const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble Kp_lambda(const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble Kpp_lambda(const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\ndouble get_spa_pv(const double&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&);\n\nvoid compute_vc_mats_bt(Eigen::Ref<Eigen::ArrayXd>,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,Eigen::Ref<SpMat>,SpMat&,Eigen::MatrixXd&);\nvoid compute_skat_q(Eigen::VectorXd&,Eigen::VectorXd&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const MatrixXb>&,bool const&);\n\nvoid correct_vcov(const int&,const Eigen::Ref<const Eigen::ArrayXi>&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<ArrayXb>,Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::MatrixXd>,SpMat const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,SpMat const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct f_ests const&,struct param const&);\nvoid apply_correction_cc(const int&,const Eigen::Ref<const Eigen::ArrayXi>&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,SpMat const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,SpMat const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct f_ests const&,struct param const&,bool const&);\nvoid apply_firth_snp(bool&,double&,double const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXi>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct param const&);\nbool correct_vcov_burden(const int&,double&,double const&,double const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,SpMat const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct param const&);\nbool get_ztz_evals(const Eigen::Ref<const Eigen::MatrixXd>&,Eigen::MatrixXd&,double&,double&,double&,double const&,bool const&);\nvoid get_skato_mom(double&,double&,double&,double&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::VectorXd>&,double const&,double const&,double const&,const Eigen::Ref<const Eigen::ArrayXd>&,bool const&);\nvoid get_cvals(int const&,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::VectorXd>&);\nvoid get_cvals(Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const Eigen::VectorXd>&);\nvoid get_Qmin(int const&,double&,Eigen::Ref<Eigen::ArrayXd>,const Eigen::Ref<const Eigen::MatrixXd>&);\nvoid get_skato_pv(double &,double&,double const&,int const&,double const&,bool const&);\nvoid print_vc_sumstats(int const&,std::string const&,std::string const&,variant_block*,std::vector<snp> const&,struct in_files const&,struct param const*);\n\n// for numerical integration with skat-o\n#ifdef __cplusplus\nextern \"C\"\n{\n#endif\n\n  extern void dqags_(double f(double*),double*,double*,double*,double*,double*,double*,int*,int*,int*,int*,int*,int*,double*);\n  double SKATO_integral_fn(double*);\n  double SKATO_integral_fn_liu(double*);\n\n#ifdef __cplusplus\n}\n#endif\n\n// declare global variables\nextern Eigen::ArrayXd flipped_skato_rho;\nextern Eigen::ArrayXd skato_Qmin_rho;\nextern Eigen::ArrayXd skato_tau;\nextern Eigen::VectorXd skato_lambdas;\nextern double skato_muQ;\nextern double skato_fdavies;\nextern double skato_sdQ;\nextern double skato_dfQ;\nextern double skato_upper;\nextern int skato_state; // positive if integration failed\nvoid integrate(double f(double*),double&,int const&,bool const&);\n\n// for lovo with bts\nextern Eigen::MatrixXd vc_Rvec_start;\nvoid check_cc_correction(SpMat&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,struct ests const&,struct f_ests const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const MatrixXb>&,struct param const&);\n\nvoid check_sizes(SpMat const&, SpMat const&, const Eigen::Ref<const MatrixXb>&);\n\n#endif\n"
  },
  {
    "path": "src/Step1_Models.cpp",
    "content": "/*\n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Joint_Tests.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_ridge.hpp\"\n#include \"cox_score.hpp\"\n#include \"cox_firth.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"HLM.hpp\"\n#include \"Pheno.hpp\"\n#include \"MultiTrait_Tests.hpp\"\n#include \"Ordinal.hpp\"\n#include \"SKAT.hpp\"\n#include \"Masks.hpp\"\n#include \"Data.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\nusing boost::math::beta_distribution;\nusing boost::math::chi_squared;\n\n\n// null models\nvoid fit_null_logistic(bool const& silent, const int& chrom, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct in_files* files, mstream& sout, bool const& save_betas) {\n\n  if(!silent) sout << \"   -fitting null logistic regression on binary phenotypes...\" << flush;\n  if(params->test_mode) params->pheno_pass = true;\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  ArrayXd betaold, etavec, pivec, loco_offset, wvec;\n  MatrixXd XtW;\n  if(params->w_interaction || params->firth || (params->use_SPA && params->vc_test) || params->write_null_firth) m_ests->bhat_start.resize(pheno_data->new_cov.cols(), params->n_pheno);\n  if(params->w_interaction) m_ests->offset_nullreg.resize(pheno_data->new_cov.rows(), params->n_pheno);\n  betaold = ArrayXd::Zero(pheno_data->new_cov.cols());\n\n  for(int i = 0; i < params->n_pheno; ++i ){\n\n    if( !params->pheno_pass(i) ) continue;\n\n    MapArXd Y (pheno_data->phenotypes_raw.col(i).data(), pheno_data->phenotypes_raw.rows());\n    MapArXb mask (pheno_data->masked_indivs.col(i).data(), pheno_data->masked_indivs.rows());\n\n    if(params->blup_cov) {\n      pheno_data->new_cov.rightCols(1) = (m_ests->blups.col(i).array() * mask.cast<double>()).matrix();\n      loco_offset = ArrayXd::Zero(Y.size(), 1);\n    } else if(params->test_mode) \n      loco_offset = m_ests->blups.col(i).array() * mask.cast<double>();\n    else loco_offset = ArrayXd::Zero(Y.size(), 1);\n\n    // starting values\n    betaold = 0;\n    if(params->print_cov_betas) {betaold(0) = (0.5 + mask.select(Y,0).sum()) / (pheno_data->Neff(i) + 1); betaold(0) = log( betaold(0) / (1 - betaold(0))) - loco_offset.mean();}\n    get_pvec(etavec, pivec, betaold, loco_offset, pheno_data->new_cov, params->numtol_eps);\n\n    // check if model converged\n    if(!(fit_logistic(Y, pheno_data->new_cov, loco_offset, mask, pivec, etavec, betaold, params, sout, true, params->numtol) || fit_logistic(Y, pheno_data->new_cov, loco_offset, mask, pivec, etavec, betaold, params, sout, false, params->numtol))) {\n      bool skip_pheno = true;\n\n      // if not, get starting values by omitting loco offset (instead of at 0)\n      if(params->test_mode && !params->skip_blups && !params->blup_cov){\n        if(!silent) sout << \"\\n     WARNING: logistic regression did not converge for phenotype '\" << files->pheno_names[i] <<\"'. Retrying using starting beta from model without LOCO offset.\";\n        ArrayXd loco_dummy = ArrayXd::Zero(Y.size(), 1);\n        pivec = ( 0.5 + Y ) / 2;\n        etavec = mask.select( log(pivec/ (1-pivec)), 0);\n        betaold = 0;\n        if(params->print_cov_betas) {\n          betaold(0) = (0.5 + mask.select(Y,0).sum()) / (pheno_data->Neff(i) + 1); betaold(0) = log( betaold(0) / (1 - betaold(0)));\n          get_pvec(etavec, pivec, betaold, loco_dummy, pheno_data->new_cov, params->numtol_eps);\n        }\n        if( fit_logistic(Y, pheno_data->new_cov, loco_dummy, mask, pivec, etavec, betaold, params, sout, true, params->numtol) || fit_logistic(Y, pheno_data->new_cov, loco_dummy, mask, pivec, etavec, betaold, params, sout, false, params->numtol) ){ \n          get_pvec(etavec, pivec, betaold, loco_dummy, pheno_data->new_cov, params->numtol_eps);\n          skip_pheno = !(fit_logistic(Y, pheno_data->new_cov, loco_offset, mask, pivec, etavec, betaold, params, sout, true, params->numtol) || fit_logistic(Y, pheno_data->new_cov, loco_offset, mask, pivec, etavec, betaold, params, sout, false, params->numtol));\n        }\n      }\n\n      if(skip_pheno){\n        params->pheno_pass(i) = false; // phenotype will be ignored\n        params->pheno_fail_nullreg(i) = true;\n        if(!silent) sout << \"\\n     WARNING: logistic regression did not converge for phenotype '\" << files->pheno_names[i] <<\"'.\";\n        continue;\n        // throw \"logistic regression did not converge for phenotype \" + files->pheno_names[i] + \". Perhaps increase --niter or check the covariates.\";\n      }\n    } \n    \n    if( !silent && (mask && (pivec < params->numtol_eps || pivec > 1 - params->numtol_eps)).any() )\n      sout << \"\\n     WARNING: Fitted probabilities numerically 0/1 occurred (phenotype '\" << files->pheno_names[i] <<\"').\";\n\n    if(params->test_mode){\n      if(save_betas && params->print_cov_betas) {\n        params->cov_betas.col(i) = betaold;\n        // get se\n        get_wvec(pivec, wvec, mask, params->l1_ridge_eps);\n        MatrixXd XWsqrt = ( pheno_data->new_cov.array().colwise() * (wvec.sqrt() * mask.cast<double>()) ).matrix();\n        MatrixXd xtx_inv = ( XWsqrt.transpose() * XWsqrt ).colPivHouseholderQr().inverse();\n        params->xtx_inv_diag.col(i).array() = xtx_inv.diagonal().array().sqrt();\n        continue;\n      }\n      m_ests->Y_hat_p.col(i) = pivec.matrix() ;\n      get_wvec(pivec, wvec, mask, params->l1_ridge_eps);\n      m_ests->Gamma_sqrt.col(i) = wvec.sqrt().matrix();\n      m_ests->Gamma_sqrt_mask.col(i) = (m_ests->Gamma_sqrt.col(i).array() * mask.cast<double>()).matrix();\n      m_ests->X_Gamma[i] = m_ests->Gamma_sqrt_mask.col(i).asDiagonal() * pheno_data->new_cov;\n      getBasis(m_ests->X_Gamma[i], params);\n      if(params->w_interaction || params->firth || (params->use_SPA && params->vc_test)) m_ests->bhat_start.col(i) = betaold.matrix();\n      if(params->w_interaction) m_ests->offset_nullreg.col(i) = etavec;\n    } else {\n      m_ests->offset_nullreg.col(i) = etavec;\n      if(params->write_null_firth) m_ests->bhat_start.col(i) = betaold.matrix();\n    }\n\n    /*\n     Files fstar;\n     fstar.openForWrite(\"offsets.txt\", sout);\n     fstar << etavec;\n     fstar.closeFile();\n     */\n\n  }\n\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  if(!silent) sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\nbool fit_logistic(const Ref<const ArrayXd>& Y1, const Ref<const MatrixXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, ArrayXd& pivec, ArrayXd& etavec, ArrayXd& betavec, struct param const* params, mstream& sout, bool const& check_hs_dev, double const& numtol) {\n\n  bool small_score_reached = false;\n  int niter_cur = 0, niter_search;\n  double dev_old, dev_new=0, diff_dev;\n  ArrayXd score, betanew, wvec, zvec;\n  MatrixXd XtW, XtWX;\n\n  dev_old = get_logist_dev(Y1, pivec, mask);\n  //cerr << dev_old << endl << pivec.head(5)<<\"\\n--\\n\";\n\n  while(niter_cur++ < params->niter_max){\n\n    // p*(1-p) and check for zeroes\n    if( get_wvec(pivec, wvec, mask, params->numtol_eps) ){\n      if(params->verbose) sout << \"ERROR: Zeros occurred in Var(Y) during logistic regression.\\n\";\n      return false;\n    }\n\n    XtW = X1.transpose() * mask.select(wvec,0).matrix().asDiagonal();\n    XtWX = XtW * X1;\n\n    // working vector z = X*beta + (Y-p)/(p*(1-p))\n    zvec = mask.select(etavec - offset + (Y1 - pivec) / wvec, 0);\n\n    // parameter estimate\n    betanew = ( XtWX ).colPivHouseholderQr().solve( XtW * zvec.matrix() ).array();\n\n    // start step-halving\n    for( niter_search = 1; niter_search <= params->niter_max_line_search; niter_search++ ){\n\n      get_pvec(etavec, pivec, betanew, offset, X1, params->numtol_eps);\n      dev_new = get_logist_dev(Y1, pivec, mask);\n\n      if(params->debug) cerr << \"HS#\" << niter_search << setprecision(16) << \": p in (\" << pivec.minCoeff() << \",\" << pivec.maxCoeff() << \"); dev \" << dev_old << \"->\" << dev_new << \" \\n\";\n      if( mask.select((pivec > 0) && (pivec < 1), true).all() && (!check_hs_dev || (dev_new < dev_old)) ) break;\n\n      // adjust step size\n      betanew = (betavec + betanew) / 2;\n\n    }\n    if( niter_search > params->niter_max_line_search ) return false; // step-halving failed\n\n    // stopping criterion\n    score = X1.transpose() * mask.select(Y1 - pivec, 0).matrix();\n    if( score.abs().maxCoeff() < numtol ) break; // prefer for score to be below tol\n    // check for failed convergence early on\n    if(!small_score_reached && (niter_cur < 20) && (score.abs().maxCoeff() < 1)) small_score_reached = true;\n    if(small_score_reached && (niter_cur > 20) && (score.abs().maxCoeff() > 5)) return false; // score should get closer to 0 after this many iters\n\n    diff_dev = abs(dev_new - dev_old)/(0.1 + abs(dev_new));\n    if(params->debug) cerr << \"#\" << niter_cur << \": score_max=\" << score.abs().maxCoeff() << \";dev_diff=\" << setprecision(16) << diff_dev << \"; beta.head=\" << betanew.head(5).matrix().transpose() << \"\\n\";\n\n    betavec = betanew;\n    dev_old = dev_new;\n  }\n  if(params->debug) cerr << \"Log. reg iter#\" << niter_cur << \": beta=\" << betanew.matrix().transpose() << \"; score_max=\" << score.abs().maxCoeff() << \";dev_diff=\" << \n   setprecision(16) << diff_dev << \"\\n\";\n\n  // If didn't converge (check frac. change in deviances)\n  if( ((diff_dev == 0) || (diff_dev >= numtol)) && (niter_cur > params->niter_max) )\n    return false;\n\n  betavec = betanew;\n\n  return true;\n}\n\n// poisson models\nvoid fit_null_poisson(const int& chrom, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct in_files* files, mstream& sout, bool const& save_betas) {\n\n  sout << \"   -fitting null poisson regression...\" << flush;\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  ArrayXd betaold, etavec, pivec, loco_offset, wvec;\n  if(params->w_interaction) m_ests->bhat_start.resize(pheno_data->new_cov.cols(), params->n_pheno);\n\n  for(int i = 0; i < params->n_pheno; ++i ){\n\n    if( !params->pheno_pass(i) ) continue;\n\n    MapArXd Y (pheno_data->phenotypes_raw.col(i).data(), pheno_data->phenotypes_raw.rows());\n    MapArXb mask (pheno_data->masked_indivs.col(i).data(), pheno_data->masked_indivs.rows());\n\n    if(params->test_mode) loco_offset = m_ests->blups.col(i).array() * mask.cast<double>();\n    else loco_offset = ArrayXd::Zero(Y.size(), 1);\n\n    // starting values\n    pivec = Y + 1e-1; // avoid 0\n    etavec = mask.select( log(pivec), 0);\n    betaold = ArrayXd::Zero(pheno_data->new_cov.cols());\n    betaold(0) = etavec.mean() - loco_offset.mean();\n\n    if(!fit_poisson(Y, pheno_data->new_cov, loco_offset, mask, pivec, etavec, betaold, params, sout)){\n      params->pheno_pass(i) = false; // phenotype will be ignored\n      sout << \"\\n     WARNING: poisson regression did not converge for phenotype '\" << files->pheno_names[i] <<\"'.\";\n      continue;\n      // throw \"poisson regression did not converge for phenotype \" + files->pheno_names[i] + \". Perhaps increase --niter?\";\n    }\n    else if( (mask && pivec < params->numtol_eps).any() )\n      sout << \"\\n     WARNING: Fitted rates numerically 0 occurred (phenotype #\" << files->pheno_names[i] <<\").\";\n\n    if(params->test_mode){\n      if(save_betas && params->print_cov_betas) {\n        params->cov_betas.col(i) = betaold;\n        // get se\n        MatrixXd XWsqrt = ( pheno_data->new_cov.array().colwise() * mask.select(pivec,0).sqrt() ).matrix();\n        MatrixXd xtx_inv = ( XWsqrt.transpose() * XWsqrt ).colPivHouseholderQr().inverse();\n        params->xtx_inv_diag.col(i).array() = xtx_inv.diagonal().array().sqrt();\n        continue;\n      }\n      m_ests->Y_hat_p.col(i) = pivec.matrix() ;\n      m_ests->Gamma_sqrt.col(i) = pivec.sqrt().matrix();\n      m_ests->X_Gamma[i] = ( pheno_data->new_cov.array().colwise() * (m_ests->Gamma_sqrt.col(i).array() * mask.cast<double>()) ).matrix();\n      getBasis(m_ests->X_Gamma[i], params);\n      if(params->w_interaction) m_ests->bhat_start.col(i) = betaold.matrix();\n    } else m_ests->offset_nullreg.col(i) = etavec;\n\n    /*\n     Files fstar;\n     fstar.openForWrite(\"offsets.txt\", sout);\n     fstar << etavec;\n     fstar.closeFile();\n     */\n\n  }\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\nbool fit_poisson(const Ref<const ArrayXd>& Y1, const Ref<const MatrixXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, ArrayXd& pivec, ArrayXd& etavec, ArrayXd& betavec, struct param const* params, mstream& sout) {\n\n  bool dev_conv = false;\n  int niter_cur = 0;\n  double dev_old, dev_new=0;\n  ArrayXd score, betanew, zvec;\n  MatrixXd XtW, XtWX;\n\n  dev_old = get_poisson_dev(Y1, pivec, mask);\n  if(params->debug) cerr << \"\\nstarting deviance: \" << dev_old << endl;\n\n  while(niter_cur++ < params->niter_max){\n\n    // check for zeroes\n    if( (mask && (pivec == 0)).any() ){\n      if(params->verbose) sout << \"ERROR: Zeros occurred in Var(Y) during poisson regression.\\n\";\n      return false;\n    }\n\n    XtW = X1.transpose() * mask.select(pivec,0).matrix().asDiagonal();\n    XtWX = XtW * X1;\n\n    // working vector z = X*beta + (Y-p)/w\n    zvec = mask.select(etavec - offset + (Y1 - pivec) / pivec, 0);\n\n    // parameter estimate\n    betanew = ( XtWX ).colPivHouseholderQr().solve( XtW * zvec.matrix() ).array();\n\n    // start step-halving\n    for( int niter_search = 1; niter_search <= params->niter_max_line_search; niter_search++ ){\n\n      get_pvec_poisson(etavec, pivec, betanew, offset, X1, params->numtol_eps);\n      dev_new = get_poisson_dev(Y1, pivec, mask);\n      if( !(mask && (pivec == 0)).any() ) break;\n\n      // adjust step size\n      betanew = (betavec + betanew) / 2;\n\n    }\n    if(params->debug && (niter_cur%5==0)) cerr << \"#\" << niter_cur << \" -> \" << \n      betanew.matrix().transpose() << \" : \" << dev_new << \"\\n\";\n\n    score = X1.transpose() * mask.select(Y1 - pivec, 0).matrix();\n\n    // stopping criterion\n    dev_conv = (abs(dev_new - dev_old)/(0.1 + abs(dev_new))) < params->tol;\n    if( score.abs().maxCoeff() < params->tol ) break;\n\n    betavec = betanew;\n    dev_old = dev_new;\n  }\n\n  // If didn't converge\n  if( !dev_conv && (niter_cur > params->niter_max) )\n    return false;\n\n  betavec = betanew;\n\n  if(params->debug) cerr << \"Final (\" << niter_cur << \") : \" << betavec.matrix().transpose() << \" : \" << score.abs().maxCoeff() << \"\\n\";\n  return true;\n}\n\n// Cox regression null model\nvoid fit_null_cox(bool const& silent, const int& chrom, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct in_files* files, mstream& sout, bool const& save_betas){\n  if(!silent) sout << \"   -fitting null cox regression on time-to-event phenotypes...\" << flush;\n  \n  Eigen::VectorXd loco_offset;\n  auto t1 = std::chrono::high_resolution_clock::now();\n\n  for (const auto& entry: files->t2e_map) {\n    const std::string& time_name = entry.first;\n    const std::string& event_name = entry.second;\n    // find time column index\n    std::vector<std::string>::iterator it_time = std::find(files->pheno_names.begin(), files->pheno_names.end(), time_name);\n    int time_index = std::distance(files->pheno_names.begin(), it_time);\n    Eigen::VectorXd ph_time = pheno_data->phenotypes_raw.col(time_index);\n    MapArXb mask (pheno_data->masked_indivs.col(time_index).data(), pheno_data->masked_indivs.rows());\n\n    // find event column index\n    std::vector<std::string>::iterator it_event = std::find(files->pheno_names.begin(), files->pheno_names.end(), event_name);\n    int event_index = std::distance(files->pheno_names.begin(), it_event);\n    Eigen::VectorXd ph_event = pheno_data->phenotypes_raw.col(event_index);\n    \n    if(params->blup_cov) {\n      pheno_data->new_cov.rightCols(1) = (m_ests->blups.col(time_index).array() * mask.cast<double>()).matrix();\n      loco_offset = Eigen::VectorXd::Zero(ph_time.size());\n    } else if(params->test_mode) \n      loco_offset = (m_ests->blups.col(time_index).array() * mask.cast<double>()).matrix();\n    else loco_offset = Eigen::VectorXd::Zero(ph_time.size());\n\n    survival_data survivalNullData;\n    survivalNullData.setup(ph_time, ph_event, mask, !params->test_mode);\n\n    cox_ridge coxRidge_null_lamb0(survivalNullData, pheno_data->new_cov, loco_offset, mask, 0, params->niter_max, params->niter_max_line_search, params->numtol_cox);\n    coxRidge_null_lamb0.fit(survivalNullData, pheno_data->new_cov, loco_offset, mask);\n\n    if (params->test_mode) {\n      cox_mle coxMLE;\n      coxMLE.setup(survivalNullData, pheno_data->new_cov, loco_offset, mask, params->niter_max, params->niter_max_line_search, params->numtol_cox, false, coxRidge_null_lamb0.beta, coxRidge_null_lamb0.eta);\n      coxMLE.fit(survivalNullData, pheno_data->new_cov, loco_offset, mask);\n\n      if (coxMLE.converge == false) {\n        cox_firth cox_null_model;\n        cox_null_model.setup(survivalNullData, pheno_data->new_cov, loco_offset, pheno_data->new_cov.cols(), params->niter_max, params->niter_max_line_search, params->numtol_cox, 0, params->numtol_beta_cox, params->maxstep_null, false, false);\n        cox_null_model.fit(survivalNullData, pheno_data->new_cov, loco_offset);\n\n        coxMLE.setup(survivalNullData, pheno_data->new_cov, loco_offset, mask, params->niter_max, params->niter_max_line_search, params->numtol_cox, false, cox_null_model.beta, cox_null_model.eta);\n        coxMLE.fit(survivalNullData, pheno_data->new_cov, loco_offset, mask);\n      }\n\n      if (coxMLE.converge == false) {\n        params->pheno_pass(time_index) = false; // phenotype will be ignored\n        params->pheno_fail_nullreg(time_index) = true;\n        if(!silent) sout << \"\\n     WARNING: step2 cox null regression did not converge for phenotype '\" << time_name <<\"'.\";\n        continue;\n      } else {\n        coxMLE.cox_test_prep(survivalNullData, pheno_data->new_cov, loco_offset, mask);\n        m_ests->survival_data_pheno[time_index] = survivalNullData;\n        m_ests->cox_MLE_NULL[time_index] = coxMLE;\n      }\n\n      if(save_betas && params->print_cov_betas) {\n        params->cov_betas.col(time_index) = coxMLE.beta;\n        params->xtx_inv_diag.col(time_index).array() = coxMLE.XtWX.diagonal().array().sqrt();\n      }\n    } else {\n      if (coxRidge_null_lamb0.converge == false) {\n        // try cox firth, without firth\n        cox_firth cox_null_model;\n        cox_null_model.setup(survivalNullData, pheno_data->new_cov, loco_offset, pheno_data->new_cov.cols(), params->niter_max, params->niter_max_line_search, params->numtol_cox, params->numtol_cox_stephalf, params->numtol_beta_cox, params->maxstep_null, false, false);\n        cox_null_model.fit(survivalNullData, pheno_data->new_cov, loco_offset);\n\n        if (cox_null_model.converge == false) {\n          cox_null_model.setup(survivalNullData, pheno_data->new_cov, loco_offset, pheno_data->new_cov.cols(), params->niter_max, params->niter_max_line_search, params->numtol_cox, 0, params->numtol_beta_cox, params->maxstep_null, false, false);\n          cox_null_model.fit(survivalNullData, pheno_data->new_cov, loco_offset);\n        }\n\n        if (cox_null_model.converge == false) {\n          params->pheno_pass(time_index) = false; // phenotype will be ignored\n          params->pheno_fail_nullreg(time_index) = true;\n          if(!silent) sout << \"\\n     WARNING: step1 cox null regression did not converge for phenotype '\" << time_name <<\"'.\";\n          continue;\n        } else {\n          m_ests->offset_nullreg.col(time_index) = cox_null_model.eta;\n        }\n      } else {\n        m_ests->offset_nullreg.col(time_index) = coxRidge_null_lamb0.eta;\n      }\n      \n    }\n  }\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  if(!silent) sout << \"done (\" << duration.count() << \"ms) \"<< endl;\n}\n\ndouble getCoxLambdaMax(const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& gradient) {\n    Eigen::VectorXd g = (Xmat.transpose() * gradient).array().abs();\n    double lambda_max = g.maxCoeff() / 1e-3;\n    return lambda_max;\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          level 0 models\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\nvoid ridge_level_0(const int& block, struct in_files* files, struct param* params, struct filter* filters, struct ests* m_ests, struct geno_block* Gblock, struct phenodt* pheno_data, vector<snp>& snpinfo, struct ridgel0* l0, struct ridgel1* l1, vector<MatrixXb>& masked_in_folds, mstream& sout) {\n\n  sout << \"   -calc level 0 ridge...\" << flush;\n  auto t2 = std::chrono::high_resolution_clock::now();\n\n  int bs = l0->GGt.rows();\n  int block_eff = params->write_l0_pred ? 0 : block;\n  string op_name, out_pheno;\n  ofstream ofile;\n\n  MatrixXd ww1, ww2, beta, pred, vmat, dvec, Xout;\n  MatrixXd ident_l0 = MatrixXd::Identity(bs, bs);\n  MatrixXd p_sum = MatrixXd::Zero(params->n_ridge_l0, params->n_pheno);\n  MatrixXd p_sum2 = MatrixXd::Zero(params->n_ridge_l0, params->n_pheno);\n\n  if(!params->within_sample_l0 && params->print_block_betas) {\n    for(int ph = 0; ph < params->n_pheno; ++ph )\n      if( params->pheno_pass(ph) )\n        params->beta_print_out[ph] = MatrixXd::Zero(params->n_ridge_l0, bs);\n  }\n\n  uint32_t cum_size_folds = 0;\n  for(int i = 0; i < params->cv_folds; ++i ) {\n    // assign masking within folds\n    masked_in_folds[i] = pheno_data->masked_indivs.block(cum_size_folds, 0, params->cv_sizes(i), pheno_data->masked_indivs.cols());\n\n    ww1 = l0->GGt - l0->G_folds[i];\n    SelfAdjointEigenSolver<MatrixXd> eig(ww1);\n    vmat = eig.eigenvectors();\n    dvec = eig.eigenvalues();\n    //if(i == 0)sout << ww1 << endl;\n    ww2 = vmat.transpose() * (l0->GTY - l0->GtY[i]);\n\n    for(int j = 0; j < params->n_ridge_l0; ++j ) {\n\n      // b = U (D+sI)^(-1) U^t GtY\n      beta = vmat * (dvec.array() + params->lambda(j)).inverse().matrix().asDiagonal() * ww2;\n\n      // save beta for each phenotype (only when using out-of-sample pred)\n      if(!params->within_sample_l0 && params->print_block_betas)\n        for(int ph = 0; ph < params->n_pheno; ++ph ) \n          if( params->pheno_pass(ph) )\n            params->beta_print_out[ph].row(j) += beta.col(ph).transpose();\n\n      // out-of-sample predictions (mask missing)\n      pred = ( (beta.transpose() * Gblock->Gmat.block(0, cum_size_folds, bs, params->cv_sizes(i))).array()  * masked_in_folds[i].transpose().array().cast<double>() ).matrix();\n      p_sum.row(j) += pred.rowwise().sum();\n      p_sum2.row(j) += pred.rowwise().squaredNorm();\n\n      // store predictions\n      for(int ph = 0; ph < params->n_pheno; ++ph ) {\n        if( !params->pheno_pass(ph) ) continue;\n        if (params->trait_mode != 3) {\n          l1->test_mat[ph][i].col(block_eff * params->n_ridge_l0 + j) = pred.row(ph).transpose();\n        } else {\n          l1->test_mat_conc[ph].block(cum_size_folds, block_eff * params->n_ridge_l0 + j, params->cv_sizes(i), 1) = pred.row(ph).transpose();\n        }\n        if((block == 0) && (j == 0)) { // same for all blocks & ridge params\n          if (params->trait_mode != 3) {\n            l1->test_pheno[ph][i].col(0) = pheno_data->phenotypes.block(cum_size_folds, ph, params->cv_sizes(i), 1);\n            if (params->trait_mode != 0) {\n              l1->test_pheno_raw[ph][i].col(0) = pheno_data->phenotypes_raw.block(cum_size_folds, ph, params->cv_sizes(i), 1);\n              l1->test_offset[ph][i].col(0) = m_ests->offset_nullreg.block(cum_size_folds, ph, params->cv_sizes(i), 1);\n            }\n          } else {\n            l1->fold_id[ph](seqN(cum_size_folds, params->cv_sizes(i))) = Eigen::VectorXi::Constant(params->cv_sizes(i), i);\n          }\n        }\n      }\n    }\n    cum_size_folds += params->cv_sizes(i);\n  }\n  if(params->debug && (block < 5) && params->trait_mode != 3) {\n    if(params->test_l0)\n      cerr << \"Ymat (Y1):\\n\" << (l1->test_pheno[0][0].topRows(5) - l1->top_snp_pgs[0].topRows(5)) << endl;\n    else\n      cerr << \"Ymat (Y1):\\n\" << l1->test_pheno[0][0].topRows(5) << endl;\n    cerr << \"Wmat (Y1):\\n\" << l1->test_mat[0][0].topRows(5).middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0) << endl;\n  }\n\n  // center and scale using the whole sample\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n\n    if( !params->pheno_pass(ph) ) continue;\n\n    RowVectorXd p_mean, p_invsd;\n    p_mean = p_sum.col(ph).transpose() / pheno_data->Neff(ph);\n    p_invsd = sqrt( (pheno_data->Neff(ph) - 1) / (p_sum2.col(ph).transpose().array() - pheno_data->Neff(ph) * p_mean.array().square()) );\n\n    // scale printed estimates by the sd\n    if(params->print_block_betas)\n      params->beta_print_out[ph].array().colwise() *= p_invsd.transpose().array();\n\n    if(params->write_l0_pred) Xout = MatrixXd::Zero(params->n_samples, params->n_ridge_l0);\n\n    cum_size_folds = 0;\n    for(int i = 0; i < params->cv_folds; ++i ) {\n      if( params->trait_mode != 3){\n        l1->test_mat[ph][i].block(0, block_eff * params->n_ridge_l0, params->cv_sizes(i), params->n_ridge_l0).rowwise() -= p_mean;\n        l1->test_mat[ph][i].block(0, block_eff * params->n_ridge_l0, params->cv_sizes(i), params->n_ridge_l0).array().rowwise() *= p_invsd.array();\n      } else {\n        l1->test_mat_conc[ph].block(cum_size_folds, block_eff * params->n_ridge_l0, params->cv_sizes(i), params->n_ridge_l0).rowwise() -= p_mean;\n        l1->test_mat_conc[ph].block(cum_size_folds, block_eff * params->n_ridge_l0, params->cv_sizes(i), params->n_ridge_l0).array().rowwise() *= p_invsd.array();\n      }\n\n      if(params->write_l0_pred) {\n        if (params->trait_mode != 3) {\n          Xout.block(cum_size_folds, 0, params->cv_sizes(i), params->n_ridge_l0) = l1->test_mat[ph][i].block(0, block_eff * params->n_ridge_l0, params->cv_sizes(i), params->n_ridge_l0);\n        } else {\n          Xout.block(cum_size_folds, 0, params->cv_sizes(i), params->n_ridge_l0) = l1->test_mat_conc[ph].block(cum_size_folds, block_eff * params->n_ridge_l0, params->cv_sizes(i), params->n_ridge_l0);\n        }\n      }\n      cum_size_folds += params->cv_sizes(i);\n    }\n\n    // write predictions to file if specified\n    if(params->write_l0_pred) {\n      write_l0_file(files->write_preds_files[ph].get(), Xout, sout);\n      //if(block ==0 && ph == 0 ) sout << endl << \"Out \" << endl <<  Xout.block(0, 0, 3, 3) << endl;\n    }\n\n  }\n\n  // if printing betas to file (average over folds) [assume snp IDs are unique]\n  //   -> separate file for each block (params->n_ridge_l0 rows & (2+bs) columns)\n  if(!params->within_sample_l0 && params->print_block_betas) {\n    op_name = files->out_file + \"_block\" + to_string(block+1) + \".betas\";\n    openStream(&ofile, op_name, std::ios::out, sout);\n\n    // Header: [TRAIT PARAM snpID1 ... snpIDk]\n    ofile << \"TRAIT PARAM \" ;\n    for(int i = 0; i < bs; ++i )\n      ofile << snpinfo[params->print_snpcount++].ID << \" \";\n    ofile << endl;\n\n    // Each line: [pheno# ridge# beta1 ... betak]\n    for(int ph = 0; ph < params->n_pheno; ++ph ){\n\n      if( !params->pheno_pass(ph) ) continue;\n\n      params->beta_print_out[ph] /= params->cv_folds;\n      for(int j = 0; j < params->n_ridge_l0; ++j ) {\n        ofile << ph + 1 << \" \" <<  j + 1 << \" \";\n        for(int i = 0; i < bs; ++i )\n          ofile << params->beta_print_out[ph](j,i) << \" \";\n        ofile << endl;\n      }\n    }\n    ofile.close();\n  }\n\n  sout << \"done\";\n  auto t3 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n}\n\nvoid ridge_level_0_loocv(const int block, struct in_files* files, struct param* params, struct filter* filters, struct ests* m_ests, struct geno_block* Gblock, struct phenodt* pheno_data, vector<snp>& snpinfo, struct ridgel0* l0, struct ridgel1* l1, mstream& sout) {\n\n  sout << \"   -calc level 0 ridge...\" << flush;\n  auto t2 = std::chrono::high_resolution_clock::now();\n  int bs = l0->GGt_eig_val.size();\n  int block_eff = params->write_l0_pred ? 0 : block; // if writing to file\n  string out_pheno;\n  ofstream ofile;\n  VectorXd z1, gvec;\n  MatrixXd VtG, z2, pred, Xout;\n  RowVectorXd p_mean, p_sd;\n\n  /*\n     if(bs > params->n_samples)\n     throw \"block size must be smaller than the number of samples to perform LOOCV!\";\n     */\n\n  uint64 max_bytes = params->chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (bs * target_size) matrix ]\n  int nchunk = ceil( params->cv_folds * bs * sizeof(double) * 1.0 / max_bytes );\n  if (params->verbose) sout << nchunk << \" chunks...\" << flush;\n  int chunk, size_chunk, target_size = params->cv_folds / nchunk;\n  int j_start;\n\n  // make matrix of (eigen-value + lambda)^(-1)\n  MatrixXd DL_inv = ( l0->GGt_eig_val.rowwise().replicate(params->n_ridge_l0).array().rowwise() + params->lambda.matrix().transpose().array() ).inverse().matrix(); // kxR\n\n  if(!params->test_l0 || (l0->nspns_picked_block.maxCoeff() == 0) || (params->n_pheno == 1)){\n\n    if(params->print_block_betas) // assumes P=1\n      l1->beta_snp_step1.middleRows(filters->step1_snp_count, bs) = l0->GGt_eig_vec * (DL_inv.array().colwise() * l0->Wmat.col(0).array()).matrix();// K x R\n\n    for(chunk = 0; chunk < nchunk; ++chunk ) {\n      size_chunk = chunk == nchunk - 1? params->cv_folds - target_size * chunk : target_size;\n      j_start = chunk * target_size;\n\n      if((params->n_pheno == 1) && l0->subset_l0_snps_gmat)\n        VtG = l0->GGt_eig_vec.transpose() * Gblock->Gmat(l0->indices_gmat_keep, seqN(j_start, size_chunk));\n      else\n        VtG = l0->GGt_eig_vec.transpose() * Gblock->Gmat(all, seqN(j_start, size_chunk));\n      for(int i = 0; i < size_chunk; ++i ) {\n        z1 = VtG.col(i); // Kx1\n        z2 = DL_inv.array().colwise() * z1.array(); // K x R\n        gvec = z2.transpose() * z1; // R x 1\n        if(params->test_l0)\n          pred = z2.transpose() * l0->Wmat - gvec * l0->ymat_res.row(j_start + i);\n        else\n          pred = z2.transpose() * l0->Wmat - gvec * pheno_data->phenotypes.row(j_start + i);\n        pred.array().colwise() /= 1 - gvec.array(); // R x P\n\n        for(int ph = 0; ph < params->n_pheno; ++ph )\n          if( params->pheno_pass(ph) )\n            l1->test_mat_conc[ph].block(j_start + i, block_eff * params->n_ridge_l0, 1, params->n_ridge_l0) = pred.col(ph).transpose();\n      }\n    } \n\n  } else for(chunk = 0; chunk < nchunk; ++chunk ) {\n    size_chunk = chunk == nchunk - 1? params->cv_folds - target_size * chunk : target_size;\n    j_start = chunk * target_size;\n    VtG = l0->GGt_eig_vec.transpose() * Gblock->Gmat(l0->indices_gmat_keep, seqN(j_start, size_chunk)); // k x N\n    MatrixXd gamma_rho = (DL_inv.transpose() * VtG.array().square().matrix()).transpose(); // N x R\n    for(int ph = 0; ph < params->n_pheno; ++ph ) {\n      if(!params->pheno_pass(ph) ) continue;\n      Ref<MatrixXd> X_l1 = l1->test_mat_conc[ph].block(j_start, block_eff * params->n_ridge_l0, size_chunk, params->n_ridge_l0); // NxR\n      X_l1 = VtG.transpose() * (DL_inv.array().colwise() * l0->Wmat.col(ph).array()).matrix() - (gamma_rho.array().colwise() * l0->ymat_res.col(ph).segment(j_start, size_chunk).array()).matrix(); // N x R\n      X_l1.array() /= (1 - gamma_rho.array());\n    }\n  }\n\n  if(params->debug && (block < 5)) {\n    if(params->test_l0)\n      cerr << \"Ymat (Y1-Y5):\\n\" << l0->ymat_res.topLeftCorner(5,min(5, params->n_pheno)) << endl;\n    else\n      cerr << \"Ymat (Y1-Y5):\\n\" << pheno_data->phenotypes.topLeftCorner(5,min(5, params->n_pheno)) << endl;\n    cerr << \"Wmat (Y1):\\n\" << l1->test_mat_conc[0].topRows(5).middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0) << endl;\n  }\n\n  // center and scale within the block\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue;\n    // mask missing first\n    l1->test_mat_conc[ph].middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0).array().colwise() *= pheno_data->masked_indivs.col(ph).array().cast<double>();\n    p_mean = l1->test_mat_conc[ph].middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0).colwise().sum() / pheno_data->Neff(ph);\n    //if(i == 0)sout << i << \" \" << p_mean << endl;\n    l1->test_mat_conc[ph].middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0).rowwise() -= p_mean;\n    // mask missing again\n    l1->test_mat_conc[ph].middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0).array().colwise() *= pheno_data->masked_indivs.col(ph).array().cast<double>();\n    p_sd = l1->test_mat_conc[ph].middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0).colwise().norm() / sqrt(pheno_data->Neff(ph) -1);\n    //if(i == 0)sout << i << \" \" << p_sd << endl;\n    l1->test_mat_conc[ph].middleCols(block_eff * params->n_ridge_l0, params->n_ridge_l0).array().rowwise() /= p_sd.array();\n\n\n    if(params->write_l0_pred) {\n      Xout = l1->test_mat_conc[ph].leftCols(params->n_ridge_l0);\n      write_l0_file(files->write_preds_files[ph].get(), Xout, sout);\n      //if(block < 2 && ph == 0 ) sout << endl << \"Out \" << endl <<  Xout.topLeftCorner(5, Xout.cols()) << endl;\n    }\n\n    if(params->print_block_betas) {\n      l1->beta_snp_step1.middleRows(filters->step1_snp_count, bs) *= (1/p_sd.array()).matrix().asDiagonal();\n      //cerr << \"Gb:\\n\"<<(Gblock->Gmat.transpose() * l1->beta_snp_step1.middleRows(filters->step1_snp_count, bs)).topRows(5) << \"\\n\\n\" << \"W(centered):\\n\"<<\n       // l1->test_mat_conc[0].block(0, block_eff * params->n_ridge_l0, 5, params->n_ridge_l0) << \"\\n\\n\";\n    }\n\n  }\n\n  sout << \"done\";\n  auto t3 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\nvoid write_l0_file(ofstream* ofs, MatrixXd& Xout, mstream& sout){\n\n  ofs->write( reinterpret_cast<char *> (&Xout(0,0)), Xout.size() * sizeof(double) );\n  if( ofs->fail() )\n    throw \"cannot successfully write temporary level 0 predictions to disk\";\n\n}\n\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////          level 1 models\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\nvoid set_mem_l1(struct in_files* files, struct param* params, struct filter* filters, struct ests* m_ests, struct geno_block* Gblock, struct phenodt* pheno_data, struct ridgel1* l1, vector<MatrixXb>& masked_in_folds, mstream& sout){ // when l0 was run in parallel\n\n  if(!(params->use_loocv || params->trait_mode == 3)) {\n    uint32_t cum_size_folds = 0;\n    for(int i = 0; i < params->cv_folds; ++i ) {\n      // assign masking within folds\n      masked_in_folds[i] = pheno_data->masked_indivs.middleRows(cum_size_folds, params->cv_sizes(i));\n      // store predictions\n      for(int ph = 0; ph < params->n_pheno; ++ph ) {\n        if( !params->pheno_pass(ph) ) continue;\n        l1->test_pheno[ph][i] = pheno_data->phenotypes.block(cum_size_folds, ph, params->cv_sizes(i), 1);\n        if (params->trait_mode) {\n          l1->test_pheno_raw[ph][i] = pheno_data->phenotypes_raw.block(cum_size_folds, ph, params->cv_sizes(i), 1);\n          l1->test_offset[ph][i] = m_ests->offset_nullreg.block(cum_size_folds, ph, params->cv_sizes(i), 1);\n        }\n      }\n      cum_size_folds += params->cv_sizes(i);\n    }\n  } else if (params->trait_mode == 3) {\n    uint32_t cum_size_folds = 0;\n    for(int i = 0; i < params->cv_folds; ++i ) {\n      for(int ph = 0; ph < params->n_pheno; ++ph ) {\n        if( !params->pheno_pass(ph) ) continue;\n        l1->fold_id[ph](seqN(cum_size_folds, params->cv_sizes(i))) = Eigen::VectorXi::Constant(params->cv_sizes(i), i);\n      }\n      cum_size_folds += params->cv_sizes(i);\n    }\n  }\n}\n\nvoid ridge_level_1(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ridgel1* l1, mstream& sout) {\n\n  sout << endl << \" Level 1 ridge...\" << endl << flush;\n\n  string in_pheno;\n  ifstream infile;\n  MatrixXd X1, X2, beta_l1, p1, vmat, dvec, dl_inv, XtX_tau;\n  VectorXd VtX2;\n  MatrixXd XtX_sum, XtY_sum;\n\n  // to compute Rsq and MSE of predictions\n  for (int i = 0; i < 5; i++){\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n    if(params->test_l0) l1->cumsum_values_full[i].setZero(params->n_pheno, params->n_ridge_l1);\n  }\n\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue; // should not happen for qts\n\n    sout << \"   -on phenotype \" << ph+1 <<\" (\" << files->pheno_names[ph] << \")...\" << flush;\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    int ph_eff = params->write_l0_pred ? 0 : ph;\n    int bs_l1 = params->total_n_block * params->n_ridge_l0;\n\n    // read in level 0 predictions from file\n    if(params->write_l0_pred)\n      read_l0(ph, ph_eff, files, params, l1, sout);\n    check_l0(ph, ph_eff, params, l1, pheno_data, sout);\n    bs_l1 = l1->test_mat[ph_eff][0].cols();\n    bool use_simple_ridge = (l1->ridge_param_mult == 1).all();\n\n    // compute XtX and Xty for each fold and cum. sum using test_mat's\n    if (!params->within_sample_l0){\n      XtX_sum.setZero(bs_l1, bs_l1);\n      XtY_sum.setZero(bs_l1, 1);\n      for( int i = 0; i < params->cv_folds; ++i ) {\n        l1->X_folds[i] = l1->test_mat[ph_eff][i].transpose() * l1->test_mat[ph_eff][i];\n        l1->XtY[i]     = l1->test_mat[ph_eff][i].transpose() * l1->test_pheno[ph][i];\n        XtX_sum += l1->X_folds[i];\n        XtY_sum += l1->XtY[i];\n      }\n    }\n\n    uint32_t cum_size_folds = 0;\n    for(int i = 0; i < params->cv_folds; ++i ) {\n\n      // use either in-sample or out-of-sample predictions\n      if (params->within_sample_l0) { // DEPRECATED\n        X1 = l1->pred_mat[ph][i].transpose() * l1->pred_mat[ph][i];\n        X2 = l1->pred_mat[ph][i].transpose() * l1->pred_pheno[ph][i];\n      } else{\n        X1 = XtX_sum - l1->X_folds[i];\n        X2 = XtY_sum - l1->XtY[i];\n      }\n\n      if(use_simple_ridge){\n        SelfAdjointEigenSolver<MatrixXd> eigX1(X1);\n        vmat = eigX1.eigenvectors();\n        dvec = eigX1.eigenvalues();\n        VtX2 = vmat.transpose() * X2;\n      // compute solutions for all ridge parameters at once\n        dl_inv = ( dvec.rowwise().replicate(params->n_ridge_l1) + l1->ridge_param_mult.matrix() * params->tau[ph].matrix().transpose() ).array().inverse().matrix();\n        dl_inv.array().colwise() *= VtX2.array();\n        beta_l1 = vmat * dl_inv;\n      } else { // need to compute seperately for each parameter\n        beta_l1.resize(bs_l1, params->n_ridge_l1);\n        for(int j = 0; j < params->n_ridge_l1; ++j) {\n          XtX_tau = X1;\n          XtX_tau.diagonal().array() += params->tau[ph](j) * l1->ridge_param_mult;\n          SelfAdjointEigenSolver<MatrixXd> eigMat(XtX_tau);\n          beta_l1.col(j) = eigMat.eigenvectors() * (1/eigMat.eigenvalues().array()).matrix().asDiagonal() * eigMat.eigenvectors().transpose() * X2;\n        }\n      }\n      if(!params->within_sample_l0) l1->beta_hat_level_1[ph][i] = beta_l1;\n      // p1 is Nfold x nridge_l1 matrix\n      p1 = l1->test_mat[ph_eff][i] * beta_l1;\n      l1->cumsum_values[0].row(ph) += p1.colwise().sum();\n      l1->cumsum_values[1].row(ph).array() += l1->test_pheno[ph][i].array().sum();\n      l1->cumsum_values[2].row(ph) += p1.array().square().matrix().colwise().sum();\n      l1->cumsum_values[3].row(ph).array() += l1->test_pheno[ph][i].array().square().sum();\n      l1->cumsum_values[4].row(ph) += (p1.array().colwise() * l1->test_pheno[ph][i].col(0).array()).matrix().colwise().sum() ;\n      if(params->test_l0){ // pred = p1 + top_snp_pgs; Y is res pheno\n        p1.colwise() += l1->top_snp_pgs[0].col(ph).segment(cum_size_folds, params->cv_sizes(i));\n        l1->cumsum_values_full[0].row(ph) += p1.colwise().sum();\n        l1->cumsum_values_full[1].row(ph).array() += pheno_data->phenotypes.block(cum_size_folds, ph, params->cv_sizes(i), 1).array().sum();\n        l1->cumsum_values_full[2].row(ph) += p1.array().square().matrix().colwise().sum();\n        l1->cumsum_values_full[3].row(ph).array() += pheno_data->phenotypes.block(cum_size_folds, ph, params->cv_sizes(i), 1).array().square().sum();\n        l1->cumsum_values_full[4].row(ph) += (p1.array().colwise() * pheno_data->phenotypes.block(cum_size_folds, ph, params->cv_sizes(i), 1).col(0).array()).matrix().colwise().sum() ;\n      }\n\n      cum_size_folds += params->cv_sizes(i);\n    }\n\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n\n  sout << endl;\n}\n\n\nvoid ridge_level_1_loocv(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ridgel1* l1, mstream& sout) {\n\n  sout << endl << \" Level 1 ridge...\" << flush;\n\n  int bs_l1 = params->total_n_block * params->n_ridge_l0;\n  int ph_eff;\n  string in_pheno;\n  ifstream infile;\n  MatrixXd XH_chunk, Z1, Z2, dl, dl_inv, xtx, tmpMat;\n  VectorXd wvec, zvec, Yvec, tmpVec, calFactor, pred;\n\n  for (int i = 0; i < 5; i++){\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n    if(params->test_l0) l1->cumsum_values_full[i].setZero(params->n_pheno, params->n_ridge_l1);\n  }\n  if(params->test_l0) l1->cumsum_values_full[3].array().colwise() += pheno_data->Neff - params->ncov; \n  else l1->cumsum_values[3].array().colwise() += pheno_data->Neff - params->ncov; // Sy2\n\n  uint64 max_bytes = params->chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (target_size * bs_l1) matrix ]\n  int nchunk = ceil( params->cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  if (params->verbose) sout << nchunk << \" chunks...\";\n  sout << endl;\n  int chunk, size_chunk, target_size = params->cv_folds / nchunk;\n  int j_start;\n\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue; // should not happen for qts\n    sout << \"   -on phenotype \" << ph+1 <<\" (\" << files->pheno_names[ph] <<\")...\" << flush;\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    ph_eff = params->write_l0_pred ? 0 : ph;\n\n    // read in level 0 predictions from file\n    if(params->write_l0_pred)\n      read_l0(ph, ph_eff, files, params, l1, sout);\n    check_l0(ph, ph_eff, params, l1, pheno_data, sout);\n    bs_l1 = l1->test_mat_conc[ph_eff].cols();\n    bool use_simple_ridge = (l1->ridge_param_mult == 1).all();\n    if(params->test_l0)\n      Yvec = pheno_data->phenotypes.col(ph) - l1->top_snp_pgs[0].col(ph);\n    else\n      Yvec = pheno_data->phenotypes.col(ph);\n\n    xtx = l1->test_mat_conc[ph_eff].transpose() * l1->test_mat_conc[ph_eff]; // kxk\n    SelfAdjointEigenSolver<MatrixXd> eigX(xtx);\n    zvec = eigX.eigenvectors().transpose() * (l1->test_mat_conc[ph_eff].transpose() * Yvec); // kx1\n\n    for(chunk = 0; chunk < nchunk; ++chunk) {\n      size_chunk = chunk == nchunk - 1? params->cv_folds - target_size * chunk : target_size;\n      j_start = chunk * target_size;\n      Ref<VectorXd> Y_chunk = Yvec.segment(j_start, size_chunk);\n      Ref<MatrixXd> X_chunk = l1->test_mat_conc[ph_eff].middleRows(j_start, size_chunk);\n\n      tmpMat = X_chunk * eigX.eigenvectors(); // N_c x k\n\n      for(int j = 0; j < params->n_ridge_l1; ++j) { // compute seperately for each parameter\n        if(use_simple_ridge)\n          tmpVec = (1/(eigX.eigenvalues().array() + params->tau[ph](j))).matrix(); // kx1\n        else\n          tmpVec = (1/(eigX.eigenvalues().array() + params->tau[ph](j) * l1->ridge_param_mult)).matrix();\n        calFactor = tmpMat.array().square().matrix() * tmpVec; //N_cx1\n        pred = tmpMat * (tmpVec.array() * zvec.array()).matrix() - (calFactor.array() * Y_chunk.array()).matrix();\n        pred.array() /= (1 - calFactor.array());\n\n        // compute mse and rsq\n        l1->cumsum_values[0](ph, j) += pred.sum(); // Sx\n                                                   // Y is centered so Sy = 0\n        l1->cumsum_values[2](ph, j) += pred.squaredNorm(); // Sx2\n                                                                    // Y is scaled so Sy2 = params->n_samples - ncov\n        l1->cumsum_values[4](ph, j) += pred.dot(Y_chunk); // Sxy\n        if(params->test_l0){ // pred = p1 + top_snp_pgs; Y is res pheno\n          if(j == 0) l1->cumsum_values[3].row(ph).array() += Y_chunk.squaredNorm(); // (Y-PGS) is not standardized\n          pred += l1->top_snp_pgs[0].col(ph).segment(j_start, size_chunk);\n          l1->cumsum_values_full[0](ph, j) += pred.sum(); // Sx\n          l1->cumsum_values_full[2](ph, j) += pred.squaredNorm(); // Sx2\n          l1->cumsum_values_full[4](ph, j) += pred.dot(pheno_data->phenotypes.col(ph).segment(j_start, size_chunk)); // Sxy\n        }\n      }\n    }\n\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n\n  sout << endl;\n}\n\n\n// Logistic models\nvoid ridge_logistic_level_1(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ridgel1* l1, vector<MatrixXb>& masked_in_folds, mstream& sout) {\n\n  sout << endl << \" Level 1 ridge with logistic regression...\" << endl << flush;\n\n  int niter_cur;\n  int ph_eff;\n  string in_pheno;\n  ifstream infile;\n\n  ArrayXd Y1, W1, p1, score;\n  ArrayXd betaold, etavec, pivec, wvec, zvec, betanew, etatest;\n  MatrixXd X1, XtW, XtWX, XtWZ;\n  l1->pheno_l1_not_converged = ArrayXb::Constant(params->n_pheno, false);\n\n  for (int i = 0; i < 6; i++)\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue;\n    sout << \"   -on phenotype \" << ph+1 <<\" (\" << files->pheno_names[ph] <<\")...\" << flush;\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    ph_eff = params->write_l0_pred ? 0 : ph;\n    int bs_l1 = params->total_n_block * params->n_ridge_l0;\n\n    // read in level 0 predictions from file\n    if(params->write_l0_pred)\n      read_l0(ph, ph_eff, files, params, l1, sout);\n    check_l0(ph, ph_eff, params, l1, pheno_data, sout);\n    bs_l1 = l1->test_mat[ph_eff][0].cols();\n    MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n    for(int i = 0; i < params->cv_folds; ++i ) {\n      if( l1->pheno_l1_not_converged(ph) ) break;\n\n      if( params->within_sample_l0 ){\n        X1 = l1->pred_mat[ph][i];\n        Y1 = l1->pred_pheno_raw[ph][i];\n        W1 = l1->pred_offset[ph][i];\n      }\n\n      // starting values for each trait\n      betaold = betanew = ArrayXd::Zero(bs_l1);\n\n      for(int j = 0; j < params->n_ridge_l1; ++j ) {\n        if( l1->pheno_l1_not_converged(ph) ) break;\n\n        niter_cur = 0;\n        // use warm starts (i.e. set final beta of previous ridge param \n        // as initial beta for current ridge param)\n        betaold = betanew;\n\n        while(niter_cur++ < params->niter_max_ridge){\n\n          if(params->within_sample_l0) {\n            etavec = W1 + (X1 * betaold.matrix()).array();\n            pivec = 1 - 1/(etavec.exp() + 1);\n            wvec = pivec * (1 - pivec);\n            // check none of the values are 0\n            if( ( wvec == 0 ).count() > 0 ){\n              sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression! (Try with --loocv)\" << endl;\n              l1->pheno_l1_not_converged(ph) = true;\n              break;\n            }\n            zvec = (etavec - W1) + (Y1 - pivec) / wvec;\n            XtW = X1.transpose() * wvec.matrix().asDiagonal();\n            XtWX = params->tau[ph](j) * l1->ridge_param_mult.matrix().asDiagonal();\n            XtWX += XtW * X1;\n            betanew = XtWX.colPivHouseholderQr().solve(XtW * zvec.matrix()).array();\n            // get the score\n            etavec = W1 + (X1 * betanew.matrix()).array();\n            pivec = 1 - 1/(etavec.exp() + 1);\n            score = (X1.transpose() * (Y1 - pivec).matrix()).array() - params->tau[ph](j) * l1->ridge_param_mult * betanew;\n\n          } else {\n\n            XtWX = params->tau[ph](j) * l1->ridge_param_mult.matrix().asDiagonal();\n            XtWZ = MatrixXd::Zero(bs_l1, 1);\n\n            for(int k = 0; k < params->cv_folds; ++k ) {\n              if( k != i) {\n\n                // get w=p*(1-p) and check none of the values are 0\n                get_pvec(etavec, pivec, betaold, l1->test_offset[ph][k].array(), l1->test_mat[ph_eff][k], params->numtol_eps);\n                if( get_wvec(pivec, wvec, masked_in_folds[k].col(ph).array(), params->l1_ridge_eps) ){\n                  sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression! (Try with --loocv)\" << endl;\n                  l1->pheno_l1_not_converged(ph) = true;\n                  break;\n                }\n\n                zvec = masked_in_folds[k].col(ph).array().select((etavec - l1->test_offset[ph][k].array()) + (l1->test_pheno_raw[ph][k].array() - pivec) / wvec, 0);\n\n                XtW = l1->test_mat[ph_eff][k].transpose() * masked_in_folds[k].col(ph).array().select(wvec,0).matrix().asDiagonal();\n                XtWX += XtW * l1->test_mat[ph_eff][k];\n                XtWZ += XtW * zvec.matrix();\n              }\n            }\n            if( l1->pheno_l1_not_converged(ph) ) break;\n\n            betanew = XtWX.llt().solve(XtWZ).array();\n\n            // start step-halving\n            for( int niter_search = 1; niter_search <= params->niter_max_line_search_ridge; niter_search++ ){\n\n              bool invalid_wvec = false;\n\n              for(int k = 0; k < params->cv_folds; ++k ) {\n                if( k != i) {\n                  // get w=p*(1-p) and check none of the values are 0\n                  get_pvec(etavec, pivec, betanew, l1->test_offset[ph][k].array(), l1->test_mat[ph_eff][k], params->numtol_eps);\n                  invalid_wvec = get_wvec(pivec, wvec, masked_in_folds[k].col(ph).array(), params->l1_ridge_eps);\n                  if( invalid_wvec ) break; // do another halving\n                }\n              }\n\n              if( !invalid_wvec ) break;\n\n              // halve step size\n              betanew = (betaold + betanew) / 2;\n\n            }\n\n            // compute score\n            score = ArrayXd::Zero(bs_l1);\n            for(int k = 0; k < params->cv_folds; ++k ) {\n              if( k != i) {\n                // get w=p*(1-p) and check none of the values are 0\n                get_pvec(etavec, pivec, betanew, l1->test_offset[ph][k].array(), l1->test_mat[ph_eff][k], params->numtol_eps);\n                if( get_wvec(pivec, wvec, masked_in_folds[k].col(ph).array(), params->l1_ridge_eps) ){\n                  sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression! (Try with --loocv)\" << endl;\n                  l1->pheno_l1_not_converged(ph) = true;\n                  break;\n                }\n                score += (l1->test_mat[ph_eff][k].transpose() * masked_in_folds[k].col(ph).array().select(l1->test_pheno_raw[ph][k].array() - pivec, 0).matrix()).array();\n              }\n            }\n            score -= params->tau[ph](j) * l1->ridge_param_mult * betanew;\n\n\n          }\n\n          // stopping criterion\n          if( (score.abs().maxCoeff() < params->l1_ridge_tol) || l1->pheno_l1_not_converged(ph)) break;\n\n          betaold = betanew;\n        }\n\n        //cerr << \"\\nFold=\" << i << \" tau = \" << params->tau[ph](j) << \" beta=\" << betanew.matrix().transpose().array() << endl;\n        //if(i==1) exit(EXIT_FAILURE);\n\n        if(niter_cur > params->niter_max_ridge){\n          sout << \"WARNING: Penalized logistic regression did not converge! (Increase --niter)\\n\";\n          l1->pheno_l1_not_converged(ph) = true;\n          break;\n        } else if(l1->pheno_l1_not_converged(ph)) break;\n        //sout << \"Converged in \"<< niter_cur << \" iterations. Score max = \" << score.abs().maxCoeff() << endl;\n\n\n        etatest = l1->test_offset[ph][i].array() + (l1->test_mat[ph_eff][i] * betanew.matrix()).array();\n        p1 = (1 - 1/(etatest.exp() + 1));\n\n        if(!params->within_sample_l0) l1->beta_hat_level_1[ph][i].col(j) = betanew;\n\n\n        // compute mse\n        for(int l = 0; l < params->cv_sizes(i); l++){\n          if(!masked_in_folds[i](l,ph)) continue;\n\n          // if p is within eps of 0/1, set to eps/1-eps\n          if( p1(l) < params->l1_ridge_eps ) p1(l) = params->l1_ridge_eps;\n          else if( p1(l) > (1-params->l1_ridge_eps) ) p1(l) = 1 - params->l1_ridge_eps;\n\n          l1->cumsum_values[0](ph,j) += p1(l); // Sx\n          l1->cumsum_values[1](ph,j) += l1->test_pheno_raw[ph][i](l,0); // Sy\n          l1->cumsum_values[2](ph,j) += p1(l) * p1(l); // Sx2\n          l1->cumsum_values[3](ph,j) += l1->test_pheno_raw[ph][i](l,0) * l1->test_pheno_raw[ph][i](l,0); // Sy2\n          l1->cumsum_values[4](ph,j) += p1(l) * l1->test_pheno_raw[ph][i](l,0); // Sxy\n          l1->cumsum_values[5](ph,j) += compute_log_lik_bern(l1->test_pheno_raw[ph][i](l,0), p1(l)); // -LL\n        }\n\n      }\n    }\n\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n\n  sout << endl;\n\n}\n\n\nvoid ridge_logistic_level_1_loocv(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct ridgel1* l1, mstream& sout) {\n\n  sout << endl << \" Level 1 ridge with logistic regression...\" << flush;\n\n  int ph_eff, bs_l1 = params->total_n_block * params->n_ridge_l0;\n  double p1;\n  string in_pheno;\n  ifstream infile;\n  ofstream ofile;\n\n  ArrayXd beta, pivec, wvec, v2, pred;\n  MatrixXd XtWX, V1, b_loo;\n  LLT<MatrixXd> Hinv;\n  l1->pheno_l1_not_converged = ArrayXb::Constant(params->n_pheno, false);\n  for (int i = 0; i < 6; i++)\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n\n  uint64 max_bytes = params->chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (bs_l1 * target_size) matrix ]\n  int nchunk = ceil( params->cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  int j_start, chunk, size_chunk, target_size = params->cv_folds / nchunk;\n  sout << (params->verbose ? to_string(nchunk) + \" chunks...\" : \"\" ) << endl;\n  if(params->debug && (params->n_pheno == 1) )\n    openStream(&ofile, files->out_file + \"_beta.l1.txt\", ios::out, sout);\n\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue;\n\n    sout << \"   -on phenotype \" << ph+1 << \" (\" << files->pheno_names[ph] <<\")...\" << flush;\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    ph_eff = params->write_l0_pred ? 0 : ph;\n\n    // read in level 0 predictions from file\n    if(params->write_l0_pred)\n      read_l0(ph, ph_eff, files, params, l1, sout);\n    check_l0(ph, ph_eff, params, l1, pheno_data, sout);\n    bs_l1 = l1->test_mat_conc[ph_eff].cols();\n\n    MapArXd Y (pheno_data->phenotypes_raw.col(ph).data(), pheno_data->phenotypes_raw.rows());\n    MapMatXd X (l1->test_mat_conc[ph_eff].data(), pheno_data->phenotypes_raw.rows(), bs_l1);\n    MapArXd offset (m_ests->offset_nullreg.col(ph).data(), pheno_data->phenotypes_raw.rows());\n    MapArXb mask (pheno_data->masked_indivs.col(ph).data(), pheno_data->masked_indivs.rows());\n\n    // starting values for each trait\n    beta = ArrayXd::Zero(bs_l1);\n    for(int j = 0; j < params->n_ridge_l1; ++j ) {\n      if(params->debug) cerr << \"Ridge param #\" << j+1 << \" (=\" << params->tau[ph](j) << \")\\n\";\n\n      // using warm starts (i.e. set final beta of previous ridge param \n      // as initial beta for current ridge param)\n      if( params->use_adam ) // run ADAM to get close to max\n        run_log_ridge_loocv_adam(ph, params->tau[ph](j), l1->ridge_param_mult, beta, pivec, wvec, Y, X, offset, mask, params, sout);\n\n      if(!run_log_ridge_loocv(params->tau[ph](j), l1->ridge_param_mult, target_size, nchunk, beta, pivec, wvec, Y, X, offset, mask, params, sout)){\n        sout << \"WARNING: Ridge logistic regression did not converge! (Increase --niter)\\n\";\n        l1->pheno_l1_not_converged(ph) = true;\n        break;\n      }\n      if(params->debug && (params->n_pheno == 1) ){\n        IOFormat Fmt(FullPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n        ofile << beta.matrix().transpose().format(Fmt) << \"\\n\";\n      }\n\n      // compute Hinv\n      // zvec = (pheno_data->masked_indivs.col(ph).array()).select( (etavec - m_ests->offset_nullreg.col(ph).array()) + (pheno_data->phenotypes_raw.col(ph).array() - pivec) / wvec, 0);\n      XtWX = params->tau[ph](j) * l1->ridge_param_mult.matrix().asDiagonal(); // compute XtWX in chunks\n      for(chunk = 0; chunk < nchunk; ++chunk){\n        size_chunk = ( chunk == nchunk - 1 ? params->cv_folds - target_size * chunk : target_size );\n        j_start = chunk * target_size;\n\n        Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n        Ref<ArrayXd> w_chunk = wvec.segment(j_start, size_chunk);\n        Ref<ArrayXb> mask_chunk = mask.segment(j_start, size_chunk);\n\n        XtWX.noalias() += Xmat_chunk.transpose() * mask_chunk.select(w_chunk,0).matrix().asDiagonal() * Xmat_chunk;\n      }\n      Hinv.compute( XtWX );\n\n      // LOOCV estimates\n      for(chunk = 0; chunk < nchunk; ++chunk ) {\n        size_chunk = ( chunk == nchunk - 1 ? params->cv_folds - target_size * chunk : target_size );\n        j_start = chunk * target_size;\n\n        Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n        Ref<ArrayXd> Yvec_chunk = Y.segment(j_start, size_chunk);\n        Ref<ArrayXb> mask_chunk = mask.segment(j_start, size_chunk);\n        Ref<ArrayXd> p_chunk = pivec.segment(j_start, size_chunk);\n        Ref<ArrayXd> w_chunk = wvec.segment(j_start, size_chunk);\n        Ref<ArrayXd> off_chunk = offset.segment(j_start, size_chunk);\n\n        V1 = Hinv.solve( Xmat_chunk.transpose() ); // k x n\n        v2 = (Xmat_chunk.array() * V1.transpose().array()).rowwise().sum() * w_chunk;\n        b_loo.resize(beta.size(), size_chunk);\n        b_loo.array().colwise() = beta;\n        b_loo -= V1 * ((Yvec_chunk - p_chunk)/(1-v2)).matrix().asDiagonal();\n        pred = (Xmat_chunk.array() * b_loo.transpose().array()).rowwise().sum() + off_chunk;\n\n        for(int i = 0; i < size_chunk; ++i ) {\n          if(!mask_chunk(i)) continue;\n          p1 = 1 - 1/ ( exp(pred(i)) + 1 );\n\n          // if p is within eps of 0/1, set to eps/1-eps\n          if( p1 < params->l1_ridge_eps ) p1 = params->l1_ridge_eps;\n          else if( p1 > (1-params->l1_ridge_eps) ) p1 = 1 - params->l1_ridge_eps;\n\n          // compute mse and rsq\n          l1->cumsum_values[0](ph,j) += p1; // Sx\n          l1->cumsum_values[1](ph,j) += Yvec_chunk(i,0); // Sy\n          l1->cumsum_values[2](ph,j) += p1 * p1; // Sx2\n          l1->cumsum_values[3](ph,j) += Yvec_chunk(i,0) * Yvec_chunk(i,0); // Sy2\n          l1->cumsum_values[4](ph,j) += p1 * Yvec_chunk(i,0); // Sxy\n          l1->cumsum_values[5](ph,j) += compute_log_lik_bern(Yvec_chunk(i,0), p1); // -LL\n        }\n      }\n\n    }\n\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n\n  if(params->debug && (params->n_pheno == 1) )\n    ofile.close();\n\n  sout << endl;\n}\n\nbool run_log_ridge_loocv(const double& lambda, const Ref<const ArrayXd>& ridge_param_mult, const int& target_size, const int& nchunk, ArrayXd& betaold, ArrayXd& pivec, ArrayXd& wvec, const Ref<const ArrayXd>& Y, Ref<MatrixXd> X, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, struct param* params, mstream& sout) {\n\n  bool dev_conv = false;\n  int bs_l1 = X.cols();\n  int niter_cur = 0, j_start, chunk, size_chunk;\n  double fn_start = 0, fn_end = 0;\n  ArrayXd etavec, betanew, score, step_size, vweights;\n  MatrixXd XtWX;\n  LLT<MatrixXd> Hinv;\n\n  //// get starting values\n  // get w=p*(1-p) and check none of the values are 0\n  get_pvec(etavec, pivec, betaold, offset, X, params->numtol_eps);\n  // get -2f(b)\n  fn_start = get_logist_dev(Y, pivec, mask) + lambda * (ridge_param_mult * betaold).square().sum();\n  if( get_wvec(pivec, wvec, mask, params->l1_ridge_eps) ){\n    sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression.\\n\";\n    return false;\n  }\n  // get the score\n  score = ( X.transpose() * mask.select(Y - pivec, 0).matrix()).array() ;\n  score -= lambda * ridge_param_mult * betaold;\n  // for convergence check\n  //vweights = (X.array().square().matrix().transpose() * mask.select(wvec,0).matrix()).array();\n  //vweights /= mask.select(wvec,0).sum();\n\n  while(niter_cur++ < params->niter_max_ridge) {\n\n    // get step size\n    XtWX = lambda * ridge_param_mult.matrix().asDiagonal(); // compute XtWX in chunks\n    for(chunk = 0; chunk < nchunk; ++chunk ) {\n      size_chunk = ( chunk == nchunk - 1 ? params->cv_folds - target_size * chunk : target_size );\n      j_start = chunk * target_size;\n\n      Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n      Ref<ArrayXd> w_chunk = wvec.segment(j_start, size_chunk); // nx1\n      Ref<const ArrayXb> mask_chunk = mask.segment(j_start, size_chunk); //nx1\n\n      XtWX.noalias() += Xmat_chunk.transpose() * mask_chunk.select(w_chunk,0).matrix().asDiagonal() * Xmat_chunk;\n    }\n    Hinv.compute( XtWX );\n    step_size = Hinv.solve(score.matrix()).array();\n    //cerr << (step_size * score).sum()/(.01 + abs(fn_end)) << \"\\n\\n\";\n\n    // check f(b)\n    for( int niter_search = 1; niter_search <= params->niter_max_line_search; niter_search++ ){\n\n      betanew = betaold + step_size;\n\n      // get w=p*(1-p) and check none of the values are 0\n      get_pvec(etavec, pivec, betanew, offset, X, params->numtol_eps);\n      // -2f(b)\n      fn_end = get_logist_dev(Y, pivec, mask) + lambda * (ridge_param_mult * betanew).square().sum();\n      if( get_wvec(pivec, wvec, mask, params->l1_ridge_eps) ){\n        sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression.\\n\";\n        return false;\n      }\n\n      if(params->debug) cerr << \"#\" << niter_cur << \"(#\" << niter_search << \"): \" << setprecision(16) << fn_start << \"->\" << fn_end << \"\\n\";\n\n      if( fn_end < (fn_start + params->numtol) ) break;\n      // adjusted step size\n      step_size /= 2;\n    }\n\n    // get the score\n    score = ( X.transpose() * mask.select(Y - pivec, 0).matrix()).array() ;\n    score -= lambda * ridge_param_mult * betanew;\n    if(params->debug) cerr << \"#\"<< niter_cur << \": score max = \" << score.abs().maxCoeff() << \";dev_diff=\" << setprecision(16) << abs(fn_end - fn_start)/(.01 + abs(fn_end)) << \"\\n\";\n\n    dev_conv = (abs(fn_end - fn_start)/(.01 + abs(fn_end)) < params->tol); // fractional change - same as glm\n    if( score.abs().maxCoeff() < params->l1_ridge_tol ) \n      //( (vweights * (betanew - betaold).square()).maxCoeff() < params->tol)  // convergence criteria from glmnet\n      break; // prefer for score to be below tol\n\n    betaold = betanew;\n    fn_start = fn_end;\n    //vweights = (X.array().square().matrix().transpose() * mask.select(wvec,0).matrix()).array();\n    //vweights /= mask.select(wvec,0).sum();\n  }\n\n  if( !dev_conv && (niter_cur > params->niter_max_ridge) )\n    return false;\n\n  betaold = betanew;\n  return true;\n}\n\n// Ridge logistic with ADAM using mini batch\nvoid run_log_ridge_loocv_adam(const int& ph, const double& lambda, const Ref<const ArrayXd>& ridge_param_mult, ArrayXd& betavec, ArrayXd& pivec, ArrayXd& wvec, const Ref<const ArrayXd>& Y, Ref<MatrixXd> X, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, struct param* params, mstream& sout) {\n\n  int niter_cur = 0, index;\n  double p_alpha = params->adam_alpha, p_beta1 = params->adam_beta1, p_beta2 = params->adam_beta2, p_eps = params->adam_eps, p_alpha_t;\n  double eta, phat;\n  //cerr << p_alpha << \" \" << p_beta1 << \" \" << p_beta2 << \" \" << p_eps << endl;\n  std::uniform_int_distribution<> d(0, mask.count() - 1);\n  std::mt19937 gen;\n  ArrayXd etavec, gradient_f, mt, vt, step_size;\n\n  // starting values for ADAM params\n  mt = vt = betavec * 0;\n  gradient_f.resize( betavec.size() );\n\n  while(niter_cur++ < params->niter_max_ridge_adam) {\n\n    gradient_f = lambda * ridge_param_mult * betavec;\n\n    if(params->adam_mini){ // ADAM using mini-batch (only non-masked samples)\n\n      for (int i = 0; i < params->adam_batch_size; i++){\n        index = params->adam_indices[ph](d(gen));\n        eta = offset(index) + X.row(index) * betavec.matrix();\n        phat = 1 - 1/(exp(eta) + 1);\n        gradient_f -= X.row(index).transpose().array() * (Y(index)-phat); \n      }\n      gradient_f /= params->adam_batch_size;\n\n    } else {\n\n      get_pvec(etavec, pivec, betavec, offset, X, params->numtol_eps);\n      gradient_f -= ( X.transpose() * mask.select(Y - pivec, 0).matrix()).array() ;\n\n    }\n    //if(niter_cur%100 == 1) sout << \"At iteration #\"<< niter_cur << \"; score max = \" << gradient_f.abs().maxCoeff() << endl;\n\n    mt = p_beta1 * mt + (1 - p_beta1) * gradient_f;\n    vt = p_beta2 * vt + (1 - p_beta2) * gradient_f.square();\n    p_alpha_t = p_alpha * sqrt(1 - pow(p_beta2, niter_cur)) / (1 - pow(p_beta1, niter_cur));\n    step_size = p_alpha_t * mt / (vt.sqrt() + p_eps);\n\n    if( step_size.abs().maxCoeff() < params->numtol ) break;\n\n    betavec -= step_size;\n\n  }\n\n  if(params->verbose) sout << \"ADAM took \"<< niter_cur << \" iterations (score max = \" << gradient_f.abs().maxCoeff() << \")...\";\n\n}\n\n// Poisson regression\nvoid ridge_poisson_level_1(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ridgel1* l1, vector<MatrixXb>& masked_in_folds, mstream& sout) {\n\n  sout << endl << \" Level 1 ridge with poisson regression...\" << endl << flush;\n\n  int niter_cur;\n  int ph_eff;\n  string in_pheno;\n  ifstream infile;\n\n  ArrayXd Y1, W1, p1, score;\n  ArrayXd betaold, etavec, pivec, wvec, zvec, betanew, etatest;\n  MatrixXd X1, XtW, XtWX, XtWZ;\n  l1->pheno_l1_not_converged = ArrayXb::Constant(params->n_pheno, false);\n\n  for (int i = 0; i < 6; i++)\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue;\n    sout << \"   -on phenotype \" << ph+1 <<\" (\" << files->pheno_names[ph] <<\")...\" << flush;\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    ph_eff = params->write_l0_pred ? 0 : ph;\n    int bs_l1 = params->total_n_block * params->n_ridge_l0;\n\n    // read in level 0 predictions from file\n    if(params->write_l0_pred)\n      read_l0(ph, ph_eff, files, params, l1, sout);\n    check_l0(ph, ph_eff, params, l1, pheno_data, sout);\n    bs_l1 = l1->test_mat[ph_eff][0].cols();\n    MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n    for(int i = 0; i < params->cv_folds; ++i ) {\n      if( l1->pheno_l1_not_converged(ph) ) break;\n\n      // starting values for each trait\n      betaold = betanew = ArrayXd::Zero(bs_l1);\n\n      for(int j = 0; j < params->n_ridge_l1; ++j ) {\n        if( l1->pheno_l1_not_converged(ph) ) break;\n\n        niter_cur = 0;\n        // use warm starts (i.e. set final beta of previous ridge param \n        // as initial beta for current ridge param)\n        betaold = betanew;\n\n        while(niter_cur++ < params->niter_max_ridge){\n\n          XtWX = params->tau[ph](j) * l1->ridge_param_mult.matrix().asDiagonal();\n          XtWZ = MatrixXd::Zero(bs_l1, 1);\n\n          for(int k = 0; k < params->cv_folds; ++k ) {\n            if( k != i) {\n\n              // get w=p*(1-p) and check none of the values are 0\n              get_pvec_poisson(etavec, pivec, betaold, l1->test_offset[ph][k].array(), l1->test_mat[ph_eff][k], params->numtol_eps);\n              if( (masked_in_folds[k].col(ph).array() && (pivec == 0) ).any() ){\n                sout << \"ERROR: Zeros occurred in Var(Y) during ridge poisson regression! (Try with --loocv)\" << endl;\n                l1->pheno_l1_not_converged(ph) = true;\n                break;\n              }\n\n              zvec = masked_in_folds[k].col(ph).array().select((etavec - l1->test_offset[ph][k].array()) + (l1->test_pheno_raw[ph][k].array() - pivec) / pivec, 0);\n\n              XtW = l1->test_mat[ph_eff][k].transpose() * masked_in_folds[k].col(ph).array().select(pivec,0).matrix().asDiagonal();\n              XtWX += XtW * l1->test_mat[ph_eff][k];\n              XtWZ += XtW * zvec.matrix();\n            }\n          }\n          if( l1->pheno_l1_not_converged(ph) ) break;\n\n          betanew = XtWX.llt().solve(XtWZ).array();\n\n          // start step-halving\n          for( int niter_search = 1; niter_search <= params->niter_max_line_search_ridge; niter_search++ ){\n\n            bool invalid_pvec = false;\n\n            for(int k = 0; k < params->cv_folds; ++k ) {\n              if( k != i) {\n                // get w=p*(1-p) and check none of the values are 0\n                get_pvec_poisson(etavec, pivec, betanew, l1->test_offset[ph][k].array(), l1->test_mat[ph_eff][k], params->numtol_eps);\n                invalid_pvec = (masked_in_folds[k].col(ph).array() && (pivec == 0) ).any();\n                if( invalid_pvec ) break; // do another halving\n              }\n            }\n\n            if( !invalid_pvec ) break;\n\n            // halve step size\n            betanew = (betaold + betanew) / 2;\n\n          }\n\n          // compute score\n          score = ArrayXd::Zero(bs_l1);\n          for(int k = 0; k < params->cv_folds; ++k ) {\n            if( k != i) {\n              // get w=p*(1-p) and check none of the values are 0\n              get_pvec_poisson(etavec, pivec, betanew, l1->test_offset[ph][k].array(), l1->test_mat[ph_eff][k], params->numtol_eps);\n              if( (masked_in_folds[k].col(ph).array() && (pivec == 0) ).any() ){\n                sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression! (Try with --loocv)\" << endl;\n                l1->pheno_l1_not_converged(ph) = true;\n                break;\n              }\n              score += (l1->test_mat[ph_eff][k].transpose() * masked_in_folds[k].col(ph).array().select(l1->test_pheno_raw[ph][k].array() - pivec, 0).matrix()).array();\n            }\n          }\n          score -= params->tau[ph](j) * l1->ridge_param_mult * betanew;\n\n          // stopping criterion\n          if( (score.abs().maxCoeff() < params->l1_ridge_tol) || l1->pheno_l1_not_converged(ph)) break;\n\n          betaold = betanew;\n        }\n\n        if(params->debug && (ph==0) && (i==0)) cerr << \"\\nFold=\" << i << \" tau = \" << params->tau[ph](j) << \n          \" beta(\" << niter_cur << \")=\" << betanew.matrix().transpose().array() << \" : \" << score.abs().maxCoeff() << endl;\n\n        if(niter_cur > params->niter_max_ridge){\n          sout << \"WARNING: Penalized poisson regression did not converge! (Increase --niter)\\n\";\n          l1->pheno_l1_not_converged(ph) = true;\n          break;\n        } else if(l1->pheno_l1_not_converged(ph)) break;\n        //sout << \"Converged in \"<< niter_cur << \" iterations. ;\n\n        get_pvec_poisson(etatest, p1, betanew, l1->test_offset[ph][i].array(), l1->test_mat[ph_eff][i], params->numtol_eps);\n        l1->beta_hat_level_1[ph][i].col(j) = betanew;\n\n        // compute mse\n        for(int l = 0; l < params->cv_sizes(i); l++){\n          if(!masked_in_folds[i](l,ph)) continue;\n\n          // if p is within eps of 0, set to eps\n          if( p1(l) < params->l1_ridge_eps ) p1(l) = params->l1_ridge_eps;\n\n          l1->cumsum_values[0](ph,j) += p1(l); // Sx\n          l1->cumsum_values[1](ph,j) += l1->test_pheno_raw[ph][i](l,0); // Sy\n          l1->cumsum_values[2](ph,j) += p1(l) * p1(l); // Sx2\n          l1->cumsum_values[3](ph,j) += l1->test_pheno_raw[ph][i](l,0) * l1->test_pheno_raw[ph][i](l,0); // Sy2\n          l1->cumsum_values[4](ph,j) += p1(l) * l1->test_pheno_raw[ph][i](l,0); // Sxy\n          l1->cumsum_values[5](ph,j) += compute_log_lik_poisson(l1->test_pheno_raw[ph][i](l,0), p1(l)); // -LL\n        }\n\n      }\n    }\n\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n\n  sout << endl;\n\n}\n\nvoid ridge_poisson_level_1_loocv(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ests* m_ests, struct ridgel1* l1, mstream& sout) {\n\n  sout << endl << \" Level 1 ridge with poisson regression...\" << flush;\n\n  int ph_eff, bs_l1 = params->total_n_block * params->n_ridge_l0;\n  double v2, pred, p1;\n  string in_pheno;\n  ifstream infile;\n\n  ArrayXd beta, pivec;\n  MatrixXd XtWX, V1, b_loo;\n  LLT<MatrixXd> Hinv;\n  l1->pheno_l1_not_converged = ArrayXb::Constant(params->n_pheno, false);\n  for (int i = 0; i < 6; i++)\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n\n  uint64 max_bytes = params->chunk_mb * 1e6;\n  // amount of RAM used < max_mb [ creating (bs_l1 * target_size) matrix ]\n  int nchunk = ceil( params->cv_folds * bs_l1 * sizeof(double) * 1.0 / max_bytes );\n  int j_start, chunk, size_chunk, target_size = params->cv_folds / nchunk;\n  sout << (params->verbose ? to_string(nchunk) + \" chunks...\" : \"\" ) << endl;\n\n  for(int ph = 0; ph < params->n_pheno; ++ph ) {\n    if( !params->pheno_pass(ph) ) continue;\n\n    sout << \"   -on phenotype \" << ph+1 << \" (\" << files->pheno_names[ph] <<\")...\" << flush;\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    ph_eff = params->write_l0_pred ? 0 : ph;\n\n    // read in level 0 predictions from file\n    if(params->write_l0_pred)\n      read_l0(ph, ph_eff, files, params, l1, sout);\n    check_l0(ph, ph_eff, params, l1, pheno_data, sout);\n    bs_l1 = l1->test_mat[ph_eff][0].cols();\n    MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n    MapArXd Y (pheno_data->phenotypes_raw.col(ph).data(), pheno_data->phenotypes_raw.rows());\n    MapMatXd X (l1->test_mat_conc[ph_eff].data(), pheno_data->phenotypes_raw.rows(), bs_l1);\n    MapArXd offset (m_ests->offset_nullreg.col(ph).data(), pheno_data->phenotypes_raw.rows());\n    MapArXb mask (pheno_data->masked_indivs.col(ph).data(), pheno_data->masked_indivs.rows());\n\n    // starting values for each trait\n    beta = ArrayXd::Zero(bs_l1);\n    for(int j = 0; j < params->n_ridge_l1; ++j ) {\n\n      // using warm starts (i.e. set final beta of previous ridge param \n      // as initial beta for current ridge param)\n      if( params->use_adam ) // run ADAM to get close to max\n        throw \"not yet implemented\"; //run_ct_ridge_loocv_adam(ph, params->tau[ph](j), beta, pivec, Y, X, offset, mask, params, sout);\n\n      if(!run_ct_ridge_loocv(params->tau[ph](j), l1->ridge_param_mult, target_size, nchunk, beta, pivec, Y, X, offset, mask, params, sout)){\n        sout << \"WARNING: Ridge poisson regression did not converge! (Increase --niter)\\n\";\n        l1->pheno_l1_not_converged(ph) = true;\n        break;\n      }\n\n      // compute Hinv\n      XtWX = MatrixXd::Zero(bs_l1, bs_l1);\n      for(chunk = 0; chunk < nchunk; ++chunk){\n        size_chunk = ( chunk == nchunk - 1 ? params->cv_folds - target_size * chunk : target_size );\n        j_start = chunk * target_size;\n\n        Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n        Ref<MatrixXd> w_chunk = pivec.matrix().block(j_start, 0, size_chunk,1);\n        Ref<MatrixXb> mask_chunk = mask.matrix().block(j_start, 0, size_chunk,1);\n\n        XtWX += Xmat_chunk.transpose() * mask_chunk.array().select(w_chunk.array(),0).matrix().asDiagonal() * Xmat_chunk;\n      }\n      Hinv.compute( XtWX + params->tau[ph](j) * ident_l1 );\n\n      // LOOCV estimates\n      for(chunk = 0; chunk < nchunk; ++chunk ) {\n        size_chunk = ( chunk == nchunk - 1 ? params->cv_folds - target_size * chunk : target_size );\n        j_start = chunk * target_size;\n\n        Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n        Ref<MatrixXd> Yvec_chunk = Y.matrix().block(j_start, 0, size_chunk, 1);\n        Ref<MatrixXb> mask_chunk = mask.matrix().block(j_start, 0, size_chunk,1);\n\n        V1 = Hinv.solve( Xmat_chunk.transpose() ); // k x n\n        for(int i = 0; i < size_chunk; ++i ) {\n          if(!mask_chunk(i,0)) continue;\n          v2 = Xmat_chunk.row(i) * V1.col(i);\n          v2 *= pivec(j_start + i);\n          b_loo = (beta - V1.col(i).array() * (Yvec_chunk(i,0) - pivec(j_start + i)) / (1 - v2)).matrix();\n          pred = Xmat_chunk.row(i) * b_loo.col(0);\n          pred += offset(j_start + i);\n          p1 = exp(pred);\n\n          // if p is within eps of 0, set to eps\n          if( p1 < params->l1_ridge_eps ) p1 = params->l1_ridge_eps;\n\n          // compute mse and rsq\n          l1->cumsum_values[0](ph,j) += p1; // Sx\n          l1->cumsum_values[1](ph,j) += Yvec_chunk(i,0); // Sy\n          l1->cumsum_values[2](ph,j) += p1 * p1; // Sx2\n          l1->cumsum_values[3](ph,j) += Yvec_chunk(i,0) * Yvec_chunk(i,0); // Sy2\n          l1->cumsum_values[4](ph,j) += p1 * Yvec_chunk(i,0); // Sxy\n          l1->cumsum_values[5](ph,j) += compute_log_lik_poisson(Yvec_chunk(i,0), p1); // -LL\n        }\n      }\n\n    }\n\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n\n  sout << endl;\n}\n\nbool run_ct_ridge_loocv(const double& lambda, const Ref<const ArrayXd>& ridge_param_mult, const int& target_size, const int& nchunk, ArrayXd& betaold, ArrayXd& pivec, const Ref<const ArrayXd>& Y, Ref<MatrixXd> X, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, struct param* params, mstream& sout) {\n\n  int bs_l1 = X.cols();\n  int niter_cur = 0, j_start, chunk, size_chunk;\n  ArrayXd etavec, zvec, betanew, score;\n  MatrixXd XtWX, XtWZ, V1;\n  LLT<MatrixXd> Hinv;\n  MatrixXd ident_l1 = MatrixXd::Identity(bs_l1,bs_l1);\n\n\n  while(niter_cur++ < params->niter_max_ridge) {\n\n    // get p and check none of the values are 0\n    get_pvec_poisson(etavec, pivec, betaold, offset, X, params->numtol_eps);\n    if( (mask && (pivec == 0)).any() ){\n      sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression.\\n\";\n      return false;\n    }\n    zvec = mask.select( (etavec - offset) + (Y - pivec) / pivec, 0);\n\n    // compute XtWX and XtWZ in chunks\n    XtWX = lambda * ridge_param_mult.matrix().asDiagonal(); \n    XtWZ = MatrixXd::Zero(bs_l1, 1);\n    for(chunk = 0; chunk < nchunk; ++chunk ) {\n      size_chunk = ( chunk == nchunk - 1 ? params->cv_folds - target_size * chunk : target_size );\n      j_start = chunk * target_size;\n\n      Ref<MatrixXd> Xmat_chunk = X.block(j_start, 0, size_chunk, bs_l1); // n x k\n      Ref<MatrixXd> w_chunk = pivec.matrix().block(j_start, 0, size_chunk, 1);\n      Ref<MatrixXd> z_chunk = zvec.matrix().block(j_start, 0, size_chunk,1);\n      Ref<const MatrixXb> mask_chunk = mask.matrix().block(j_start, 0, size_chunk,1);\n\n      V1 = Xmat_chunk.transpose() * mask_chunk.array().select(w_chunk.array(),0).matrix().asDiagonal();\n      XtWX += V1 * Xmat_chunk;\n      XtWZ += V1 * z_chunk;\n    }\n    Hinv.compute( XtWX );\n    betanew = Hinv.solve(XtWZ).array();\n\n    get_pvec_poisson(etavec, pivec, betanew, offset, X, params->numtol_eps);\n    if( (mask && (pivec == 0)).any() ){\n      sout << \"ERROR: Zeros occurred in Var(Y) during ridge logistic regression.\\n\";\n      return false;\n    }\n\n    // get the score\n    score = ( X.transpose() * mask.select(Y - pivec, 0).matrix()).array() ;\n    score -= lambda * ridge_param_mult * betanew;\n\n    if( score.abs().maxCoeff() < params->l1_ridge_tol ) break;\n\n    betaold = betanew;\n  }\n\n  if(niter_cur > params->niter_max_ridge) \n    return false;\n  //sout << \"Converged in \"<< niter_cur << \" iterations. Score max = \" << score.abs().maxCoeff() << endl;\n\n  betaold = betanew;\n  return true;\n}\n\nbool get_wvec(ArrayXd& pivec, ArrayXd& wvec, const Ref<const ArrayXb>& mask, const double& tol){\n\n  wvec = mask.select(pivec*(1-pivec), 1); // get_pvec below causes wvec to be at least 9.999800003056407e-06\n  /*\n  wvec = ArrayXd::Ones( mask.size() );// set all entries to 1\n  // avoid 0 weights by setting w to eps when p is within eps of 0/1\n  // (strategy used in glmnet)\n  for (int i = 0; i < mask.size(); i++){\n    if( !mask(i) ) continue;\n\n    if( pivec(i) < tol) {\n      pivec(i) = 0;\n      wvec(i) = tol;\n    } else if ( pivec(i) > (1-tol) ){\n      pivec(i) = 1;\n      wvec(i) = tol;\n    } else wvec(i) = pivec(i) * (1-pivec(i));\n\n  }\n  //wvec = masks.col(ph).array().select(pivec * (1 - pivec), 1);\n  */\n\n  return (wvec == 0).any();\n}\n\nvoid get_pvec(ArrayXd& etavec, ArrayXd& pivec, const Ref<const ArrayXd>& beta, const Ref<const ArrayXd>& offset, const Ref<const MatrixXd>& Xmat, double const& eps){\n\n  etavec = offset + (Xmat * beta.matrix()).array();\n  get_pvec(pivec, etavec, eps);\n\n}\n\nvoid get_pvec(ArrayXd& etavec, ArrayXd& pivec, const double& beta, const Ref<const ArrayXd>& offset, const Ref<const VectorXd>& Xmat, double const& eps){\n\n  etavec = offset + Xmat.array() * beta;\n  get_pvec(pivec, etavec, eps);\n\n}\n\nvoid get_pvec(ArrayXd& pivec, const Ref<const ArrayXd>& etavec, double const& eps){\n\n  // strategy used in glm\n  pivec = (etavec > ETAMAXTHR).select( 1 /(1+eps),\n      (etavec < ETAMINTHR).select( eps/(1+eps), 1 - 1/(etavec.exp() + 1) ) );\n  //cerr << setprecision(16) << etavec.head(5) << \"\\n\" << pivec.head(5) << \"\\n\";\n\n}\n\n// for firth (ok if wvec=0)\nvoid get_wvec(ArrayXd& pivec, ArrayXd& wvec, const Ref<const ArrayXb>& mask){\n  wvec = mask.select(pivec*(1-pivec), 1);\n}\n\nvoid get_pvec_poisson(ArrayXd& etavec, ArrayXd& pivec, const Ref<const ArrayXd>& beta, const Ref<const ArrayXd>& offset, const Ref<const MatrixXd>& Xmat, double const& eps){\n\n  etavec = offset + (Xmat * beta.matrix()).array();\n  pivec = etavec.exp(); // lambda = E(Y)\n\n}\n\ndouble get_logist_dev(const Ref<const ArrayXd>& Y, const Ref<const ArrayXd>& pi, const Ref<const ArrayXb>& mask){\n\n  double dev = 0;\n\n  for( int i = 0; i < Y.size(); i++)\n    if(mask(i)) dev += compute_log_lik_bern(Y(i), pi(i));\n\n  return 2 * dev; // -2 log.lik\n}\n\ndouble get_poisson_dev(const Ref<const ArrayXd>& Y, const Ref<const ArrayXd>& pi, const Ref<const ArrayXb>& mask){\n\n  double dev = 0;\n\n  for(int i = 0; i < Y.size(); i++)\n    if(mask(i)) dev += compute_log_lik_poisson(Y(i), pi(i));\n\n  return 2 * dev; // -2 log.lik\n}\n\n\ndouble compute_log_lik_bern(const double& y, const double& p){\n  // negative log likelihood for bernoulli\n  return -((y==0) ? log(1-p) : log(p));\n}\n\ndouble compute_log_lik_poisson(const double& y, const double& p){\n  // negative log likelihood for poisson\n  return -(y * log(p) - p); // ignore log(y!) constant\n}\n\ndouble y_log_ypi(const double& y, const double& p){\n  return (y != 0) ? y * log(y/p) : 0;\n}\n\ndouble get_deviance_logistic(const Ref<const ArrayXd>& Y, const Ref<const ArrayXd>& pi, const Ref<const ArrayXd>& wt, const Ref<const ArrayXb>& mask){\n\n  double dev = 0;\n  for(int i = 0; i < Y.size(); i++)\n    if(mask(i)) {\n      dev += 2 * wt(i) * ( y_log_ypi(Y(i), pi(i)) + y_log_ypi(1 - Y(i), 1 - pi(i)) );\n      //cerr << \"(\" << wt(i) << \", \" << Y(i) << \", \" << pi(i) << \") - \" << i << \" -> \" << dev << \"\\n\";\n      //if(i>10) exit(-1);\n    }\n\n  return dev;\n}\n\nvoid test_assoc_block(int const& chrom, int const& block, struct ridgel0& l0,struct ridgel1& l1, struct geno_block* Gblock, struct phenodt* pheno_data, snp const* snpinfo, struct param const& params, mstream& sout){\n\n  sout << \"   -extracting highly associated SNPs...\" << flush;\n  auto t2 = std::chrono::high_resolution_clock::now();\n\n  bool run_algo = params.l0_snp_pval_thr > 0;\n  ArrayXi ind_g_l1;\n  MatrixXd tmpM;\n  l0.picked_top_snp = MatrixXb::Constant(l0.GGt.cols(), params.n_pheno, false);\n\n  if(run_algo){\n    for(int ph = 0; ph < params.n_pheno; ph++)\n      apply_iter_cond(chrom, block, ph, l0, l1, Gblock, snpinfo, params);\n    sout << \"number selected across phenotypes = [ \" << l0.nspns_picked_block.matrix().transpose() << \" ]...\";\n  }\n\n  // discard variants that are picked across all traits\n  ArrayXb rm_var = l0.picked_top_snp.rowwise().all();\n  l0.subset_l0_snps_gmat = rm_var.any();\n  l0.indices_gmat_keep = get_true_indices(!rm_var); // resize matrices to remove the picked SNPs\n  if(l0.subset_l0_snps_gmat) {\n    if(params.use_loocv){\n      SelfAdjointEigenSolver<MatrixXd> esG(l0.GGt(l0.indices_gmat_keep,l0.indices_gmat_keep));\n      l0.GGt_eig_vec = esG.eigenvectors();\n      l0.GGt_eig_val = esG.eigenvalues();\n      l0.Wmat = l0.GGt_eig_vec.transpose() * l0.GTY(l0.indices_gmat_keep, all);\n    } else {\n      l0.GGt = MatrixXd::Zero(l0.indices_gmat_keep.size(),l0.indices_gmat_keep.size());\n      l0.GTY = MatrixXd::Zero(l0.indices_gmat_keep.size(),params.n_pheno);\n      uint32_t cum_size_folds = 0;\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        tmpM = l0.G_folds[i](l0.indices_gmat_keep,l0.indices_gmat_keep); l0.G_folds[i] = tmpM;\n        l0.GGt += l0.G_folds[i];\n        tmpM = l0.GtY[i](l0.indices_gmat_keep,all); l0.GtY[i] = tmpM;\n        l0.GTY += l0.GtY[i];\n        cum_size_folds += params.cv_sizes(i);\n      }\n      tmpM = Gblock->Gmat(l0.indices_gmat_keep, all); Gblock->Gmat = tmpM;\n    }\n  } else if(params.use_loocv){ // loocv\n    SelfAdjointEigenSolver<MatrixXd> esG(l0.GGt);\n    l0.GGt_eig_vec = esG.eigenvectors();\n    l0.GGt_eig_val = esG.eigenvalues();\n    l0.Wmat = l0.GGt_eig_vec.transpose() * l0.GTY;\n  }\n\n  sout << \"done\";\n  auto t3 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\nvoid read_l0(int const& ph, int const& ph_eff, struct in_files* files, struct param* params, struct ridgel1* l1, mstream& sout){\n\n  int start, np, bs_l1_tot = params->total_n_block * params->n_ridge_l0;\n  string fin;\n\n  // resize matrix\n  if(params->use_loocv || params->trait_mode == 3)\n    l1->test_mat_conc[ph_eff].resize(params->n_samples, bs_l1_tot);\n  else for( int i = 0; i < params->cv_folds; ++i )\n    l1->test_mat[ph_eff][i].resize(params->cv_sizes(i), bs_l1_tot);\n\n  // all blocks in same file\n  if(!params->run_l1_only){\n\n    start = 0;\n    np = bs_l1_tot;\n    fin = files->loco_tmp_prefix;\n\n    read_l0_chunk(ph, ph_eff, start, np, fin, params, l1, sout);\n\n  } else { // blocks in separate file\n\n    for(size_t i = 0; i < files->bstart.size(); i++){\n\n      start = files->bstart[i] * params->n_ridge_l0;\n      np = files->btot[i] * params->n_ridge_l0;\n      fin = files->mprefix[i];\n\n      read_l0_chunk(ph, ph_eff, start, np, fin, params, l1, sout);\n    }\n\n  }\n}\n\n// read in l0 predictors in columns [start,start+np)\nvoid read_l0_chunk(int const& ph, int const& ph_eff, int const& start, int const& np, const string& prefix, struct param* params, struct ridgel1* l1, mstream& sout){\n\n  string in_pheno = prefix + \"_l0_Y\" + to_string(ph+1);\n  ifstream infile;\n  openStream(&infile, in_pheno, ios::in | ios::binary, sout);\n\n  if( getSize(in_pheno) != (sizeof(double) * params->n_samples * np ))\n    throw \"file \" + in_pheno + \" is not the right size.\" ;\n  //cerr << in_pheno << \"  \" << getSize(in_pheno) << endl;\n\n  // store back values in test_mat\n  if(params->use_loocv || params->trait_mode == 3) {\n\n    infile.read( reinterpret_cast<char *> (&l1->test_mat_conc[ph_eff](0, start)), params->n_samples * np * sizeof(double) );\n\n    //if(ph == 0) sout << endl << \"In:\\n\" << l1->test_mat_conc[ph_eff].block(0,0,5,6) << endl;\n  } else {\n\n    int nt = 0;\n\n    for( int m = start; nt < np; nt++, m++ )\n      for( int i = 0; i < params->cv_folds; ++i )\n        for( int k = 0; k < params->cv_sizes(i); ++k )\n          infile.read( reinterpret_cast<char *> (&l1->test_mat[ph_eff][i](k,m)), sizeof(double) );\n\n  //if(start==0) cerr << endl <<l1->test_mat[ph_eff][0].block(0,0,3,3) << endl;\n\n  }\n\n  infile.close();\n  \n}\n\nvoid check_l0(int const& ph, int const& ph_eff, struct param* params, struct ridgel1* l1, struct phenodt const* pheno_data, mstream& sout, bool const& silent_mode){\n\n  l1->l0_colkeep.col(ph).array() = true;\n  // adjustment to chr map for level 1 (number of level 0 predictors per chromosome)\n  l1->chrom_map_ndiff.setZero();\n  // to apply ridge parameter\n  l1->ridge_param_mult = ArrayXd::Ones(l1->l0_colkeep.rows());\n\n  // select blocks to retain min shrinkage\n  if(params->select_l0){\n\n    int N = l1->l0_pv_block.rows();\n    MapArXd pv_arr (l1->l0_pv_block.col(ph).data(), N);\n    vector<double> quantile_vec; quantile_vec.resize(N);\n    VectorXd::Map(&quantile_vec[0], N) = pv_arr;\n    if(!silent_mode) sout << \"checking level 0 predictors for strength of associations with phenotype...\" << flush;\n\n    // sort p-values\n    auto const Q1 = N / 4;\n    auto const Q2 = N / 2;\n    auto const Q3 = Q1 + Q2;\n    double thr, beta_q, conf_alpha = 0.05/N; // to be conservative\n    std::sort(quantile_vec.begin(), quantile_vec.end());\n    if(!silent_mode && params->debug) cout << \"[ Q1=\"<< quantile_vec[Q1] << \", Q2=\" << quantile_vec[Q2] << \", Q3=\" << quantile_vec[Q3] << \" ]...\" << flush;\n\n    if (params->rm_l0_pct != 0){ // get the threshold to use & select blocks\n\n      if(!silent_mode) sout << \"removing the least \" << params->rm_l0_pct << \"% significant block level 0 predictors...\" << flush;\n      int N_rm = N * (params->rm_l0_pct/100);\n      if(N_rm >= N) throw \"invalid proportion specified for --rm-l0-pct\";\n      thr = quantile_vec[N_rm-1];\n      for( int i = 0; i < N; ++i ) \n        if(pv_arr(i) <= thr) { // throw out all level 0 predictors from block\n          l1->l0_colkeep.block(i * params->n_ridge_l0, ph, params->n_ridge_l0, 1).array() = false;\n          // subtract from chr_map\n          l1->chrom_map_ndiff( l1->chrom_block(i) - 1 ) += params->n_ridge_l0;\n        }\n\n    } else {\n\n      // with U(0,1) independent p-values under H0, kth order statistic is Beta(k, N-k) \n      thr = quantile_vec[N-1] + 1e-6;\n      for( int i = 0; i < (N-1); ++i ) {\n        beta_distribution<>  bd(i+1, N - i - 1);\n        beta_q = -log10( quantile(bd, conf_alpha/2.0) );\n        if((quantile_vec[N-i-1] < beta_q) || (i==(N-2))){\n          if(i>0) thr = quantile_vec[N-i];\n          break;\n        }\n      }\n\n      int Ntop = ceil(0.05*N);\n      // threshold = median + 3*IQR\n      // or 3 SD above mean?\n      if((pv_arr >= thr).count() >= Ntop){\n        double iqr = quantile_vec[Q3] - quantile_vec[Q1];\n        thr = max(thr, quantile_vec[Q3] + 3 * iqr); \n      }\n\n      // if too many are selected, keep top 1%\n      if((pv_arr >= thr).count() >= Ntop){\n        thr = quantile_vec[N-Ntop]; \n      }\n\n      if(!silent_mode) sout << (pv_arr >= thr).count() << \"/\" << N << \" blocks selected (Upper bound = \" << thr << \")...\" << flush;\n\n      if( (pv_arr >= thr).any() )\n        // go through each block\n        for( int i = 0; i < N; ++i ) \n          if(pv_arr(i) >= thr) { // only use last ridge parameter (min shrinkage)\n            l1->l0_colkeep.block(i * params->n_ridge_l0, ph, params->n_ridge_l0 - 1, 1).array() = false;\n            l1->ridge_param_mult( (i+1) * params->n_ridge_l0 - 1 ) = 0;\n            // subtract from chr_map\n            l1->chrom_map_ndiff( l1->chrom_block(i) - 1 ) += params->n_ridge_l0 - 1;\n          }\n    }\n  }\n\n  // subset columns\n  if(!l1->l0_colkeep.col(ph).all()){\n    ArrayXi ind_keep = get_true_indices(l1->l0_colkeep.col(ph));\n\n    if(params->use_loocv || params->trait_mode == 3){\n\n      //cout << \"\\n\\n\" <<  ind_keep.matrix().transpose() << \"\\n\" << l1->test_mat_conc[ph_eff].rows() << \",\" << l1->test_mat_conc[ph_eff].cols() << \"\\n\";\n      // update over row chunks to keep memory usage low\n      uint64 max_bytes = params->chunk_mb * 1e6;\n      int nchunk = ceil( l1->test_mat_conc[ph_eff].rows() * l1->test_mat_conc[ph_eff].cols() * sizeof(double) * 1.0 / max_bytes );\n      int chunk, size_chunk, j_start, target_size = l1->test_mat_conc[ph_eff].rows() / nchunk, ncols_keep = l1->l0_colkeep.col(ph).count();\n\n      for(chunk = 0; chunk < nchunk; ++chunk ) {\n        size_chunk = chunk == nchunk - 1? l1->test_mat_conc[ph_eff].rows() - target_size * chunk : target_size;\n        j_start = chunk * target_size;\n        MatrixXd tmpMat = l1->test_mat_conc[ph_eff](seqN(j_start, size_chunk), ind_keep);\n        l1->test_mat_conc[ph_eff].block(j_start, 0, size_chunk, ncols_keep) = tmpMat;\n      }\n\n      l1->test_mat_conc[ph_eff].conservativeResize(l1->test_mat_conc[ph_eff].rows(), ncols_keep);\n      //cout << \"\\n\\n\" <<  ph_eff << \":\" << l1->test_mat_conc[ph_eff].rows() << \",\" << l1->test_mat_conc[ph_eff].cols() << \"\\n\";\n\n    } else\n      for( int i = 0; i < params->cv_folds; ++i ) {\n        MatrixXd tmpMat = l1->test_mat[ph_eff][i](all, ind_keep);\n        l1->test_mat[ph_eff][i] = tmpMat;\n      }\n\n    ArrayXd tmpvec = l1->ridge_param_mult( ind_keep );\n    l1->ridge_param_mult = tmpvec;\n  }\n\n  // set ridge params\n  if(!silent_mode) {\n    if(params->trait_mode == 2){\n      double rate = pheno_data->phenotypes_raw.col(ph).sum() / pheno_data->Neff(ph); // masked entries are 0\n      params->tau[ph] = l1->l0_colkeep.col(ph).count() / (1 + params->tau[ph] / (rate * (1 - params->tau[ph]))).log();\n      //cerr << endl << params->tau[i].matrix().transpose() << endl;\n    } else if(params->trait_mode == 3){\n      if (params->t2e_l1_pi6) {\n        params->tau[ph] = l1->l0_colkeep.col(ph).count() * (1 - params->tau[ph]) / params->tau[ph];\n        // Assuming input tau is total SNP heritability on the liability scale= m * 6/pi^2 * (1-h2) / h2\n        params->tau[ph] *= 6 / (M_PI * M_PI);\n      } else {\n        Eigen::VectorXd index = Eigen::VectorXd::LinSpaced(params->n_ridge_l1, 0, params->n_ridge_l1 - 1);\n        params->tau[ph] = ((index.array() / (params->n_ridge_l1 - 1)) * log(1e-6) + log(pheno_data->cox_max_tau[ph])).exp();\n      }\n    }else {\n      params->tau[ph] = l1->l0_colkeep.col(ph).count() * (1 - params->tau[ph]) / params->tau[ph];\n      // Assuming input tau is total SNP heritability on the liability scale= m * 3/pi^2 * (1-h2) / h2\n      if(params->trait_mode == 1) params->tau[ph] *= 3 / (M_PI * M_PI);\n    }\n  }\n\n}\n\nuint64 getSize(string const& fname){\n  \n  struct stat stat_buf;\n  int rc = stat(fname.c_str(), &stat_buf);\n\n  return ( rc == 0 ? stat_buf.st_size : 0);\n\n}\n\n//////////// dev functions\nvoid apply_iter_cond(int const& chrom, int const& block, int const& ph, struct ridgel0& l0, struct ridgel1& l1, struct geno_block* Gblock, snp const* snpinfo, struct param const& params){\n\n  chi_squared chisq(1);\n  int bs = l0.GGt.rows(), maxIndex = 0;\n  if(bs == 1) return;\n  double chisq_thr = quantile(complement(chisq, params.l0_snp_pval_thr)), r2_thr = 0.9, ss_y, ss_x1;\n  double beta_top_snp, ggt_diag = l0.GGt(0,0); // G is residualized & scaled so GtG=N-K\n  vector<int> top_indices;\n  MapArXd GtY (l0.GTY.col(ph).data(), bs);\n  ArrayXd block_top_pgs = ArrayXd::Zero(Gblock->Gmat.cols());\n  ArrayXd snp_pgs, chisq_v, bvec, v_beta, bstart, v_y;\n  MatrixXd tmpM, X2tX1_X1tX1_inv, X1tX1_inv_X1ty, LDmat = (l0.GGt.array() / (params.n_samples - params.ncov_analyzed) ).square().matrix();\n  // initial values\n  l0.nspns_picked_block(ph) = 0;\n  bstart = GtY/ggt_diag; bvec = bstart;\n  ss_y = l0.ymat_res.col(ph).squaredNorm();\n  v_y = (ss_y - bvec * GtY) / (ggt_diag - l0.nspns_picked(ph) - 1);\n  v_beta = v_y/ggt_diag;\n  ArrayXi tmpVi, indices_start(bs); std::iota(indices_start.begin(), indices_start.end(), 0);\n  \n  for(int itr = 1; itr < bs; itr++){\n\n    if( (!l0.picked_top_snp.array().col(ph)).count() == 1 ) break; // must have at least one non-picked SNP\n\n    // run marginal tests\n    chisq_v = bvec.square() / v_beta / v_y;\n    if(chisq_v.maxCoeff(&maxIndex) < chisq_thr) break;\n\n    // keep track of top SNP\n    top_indices.push_back(indices_start(maxIndex));\n    l0.picked_top_snp.array().col(ph)(indices_start(maxIndex)) = true;\n    // get beta and update pgs\n    beta_top_snp = bvec(maxIndex);\n    if(params.debug) cout << \"round \" << itr << \" - top SNP '\" << snpinfo[indices_start(maxIndex)].ID << \"'\" << \": chisq=\" << chisq_v(maxIndex) << \"/beta=\" << beta_top_snp << \"/vy=\" << v_y(maxIndex) << \"\\n\";\n    l0.nspns_picked_block(ph)++;\n    snp_pgs = Gblock->Gmat.row(indices_start(maxIndex)).transpose().array() * beta_top_snp;\n    block_top_pgs += snp_pgs; // update top_snp_pgs\n    // ignore snps in high LD with top SNP\n    l0.picked_top_snp.col(ph).array() = ( LDmat.col(indices_start(maxIndex)).array() > r2_thr ).select(true, l0.picked_top_snp.col(ph).array());\n    // track indices of snps not picked\n    indices_start = get_true_indices(!l0.picked_top_snp.array().col(ph));\n\n    // quantities needed for beta &v(beta)\n    /* // with eigen decomp\n    SelfAdjointEigenSolver<MatrixXd> eig_x1tx1(l0.GGt(top_indices, top_indices));\n    tmpM = eig_x1tx1.eigenvectors() * (1/eig_x1tx1.eigenvalues().array().sqrt()).matrix().asDiagonal();\n    X2tX1_X1tX1_inv = l0.GGt(indices_start, top_indices) * tmpM;\n    X1tX1_inv_X1ty = tmpM.transpose() * GtY(top_indices).matrix();\n    // get bvec conditional on picked snps\n    bvec = bstart(indices_start) - (X2tX1_X1tX1_inv * X1tX1_inv_X1ty).array() / ggt_diag;\n    //if(params.debug) cerr << bvec.head(5).matrix().transpose() << \"\\n\";\n    v_y = ((l0.ymat_res.col(ph) - block_top_pgs.matrix()).squaredNorm() - bvec * GtY(indices_start)) / (ggt_diag - itr - 1);\n    v_beta = (ggt_diag - X2tX1_X1tX1_inv.array().square().rowwise().sum()) / ggt_diag / ggt_diag;  \n*/\n    // switch to cholesky (X1tX1 should always be pd)\n    if(itr == 1){\n      X2tX1_X1tX1_inv = l0.GGt(indices_start,top_indices) / ggt_diag;\n      ss_x1 = GtY(top_indices).square()(0) / ggt_diag;\n    } else if(itr <5) {\n      MatrixXd inv_X1tX1 = l0.GGt(top_indices, top_indices).inverse();\n      X2tX1_X1tX1_inv = l0.GGt(indices_start,top_indices) * inv_X1tX1;\n      //if(params.debug) cerr << (inv_X1tX1 * GtY(top_indices).matrix()).transpose() << \"\\n\";\n      ss_x1 = GtY(top_indices).matrix().transpose() * inv_X1tX1 * GtY(top_indices).matrix();\n    } else {\n      const LLT<MatrixXd> llt_X1tX1 = l0.GGt(top_indices, top_indices).llt();\n      X2tX1_X1tX1_inv = llt_X1tX1.solve(l0.GGt(top_indices, indices_start)).transpose();\n      //if(params.debug) cerr << llt_X1tX1.solve(GtY(top_indices).matrix()).transpose() << \"\\n\";\n      ss_x1 = (GtY(top_indices).matrix().transpose() * llt_X1tX1.solve(GtY(top_indices).matrix()))(0,0);\n    }\n    // get bvec conditional on picked snps\n    bvec = bstart(indices_start) - (X2tX1_X1tX1_inv * GtY(top_indices).matrix()).array() / ggt_diag;\n    //if(params.debug) cerr << bvec.head(5).matrix().transpose() << \"\\n\";\n    v_y = (ss_y - ss_x1 - bvec * GtY(indices_start)) / (ggt_diag - l0.nspns_picked(ph) - l0.nspns_picked_block(ph) - 1);\n    v_beta = (ggt_diag - (X2tX1_X1tX1_inv.array() * l0.GGt(indices_start,top_indices).array()).rowwise().sum()) / ggt_diag / ggt_diag;  \n  }\n\n  if(params.debug) cout << \"max Tchisq in l0 block (removing top SNPs) = \" << chisq_v(maxIndex) << \"\\n\";\n  if(bs>1){ // if at least one snp was picked\n    l1.top_snp_pgs[chrom].col(ph).array() += block_top_pgs; // update top_snp_pgs (sum per chromosome)\n    l1.top_snp_pgs[0].col(ph).array() += block_top_pgs; // update top_snp_pgs (sum across chr)\n    l0.ymat_res.col(ph).array() -= block_top_pgs;\n    l0.nspns_picked(ph) += l0.nspns_picked_block(ph);\n    if(params.use_loocv){ // loocv\n      GtY = (Gblock->Gmat * l0.ymat_res.col(ph)).array();  \n    } else {\n      uint32_t cum_size_folds = 0; GtY = 0;\n      for(int i = 0; i < params.cv_folds; ++i ) {\n        l0.GtY[i].col(ph) = Gblock->Gmat.middleCols(cum_size_folds, params.cv_sizes(i)) * l0.ymat_res.col(ph).segment(cum_size_folds, params.cv_sizes(i));\n        GtY += l0.GtY[i].col(ph).array();\n        cum_size_folds += params.cv_sizes(i);\n      }\n    }\n  }\n}\n\nvoid ridge_cox_level_1(struct in_files* files, struct param* params, struct phenodt* pheno_data, struct ridgel1* l1, struct ests* m_ests, mstream& sout) {\n  sout << endl << \" Level 1 ridge with cox regression...\" << endl << flush;\n  \n  int ph_eff, l0_idx;\n  int time_index, event_index;\n  Eigen::VectorXd ph_time, ph_event;\n  string in_pheno;\n  ifstream infile;\n  l1->pheno_l1_not_converged = ArrayXb::Constant(params->n_pheno, false);\n\n  for (int i = 0; i < 6; i++)\n    l1->cumsum_values[i].setZero(params->n_pheno, params->n_ridge_l1);\n\n  for (const auto& entry: files->t2e_map) {\n    const std::string& time_name = entry.first;\n    const std::string& event_name = entry.second;\n    // find time column index\n    std::vector<std::string>::iterator it_time = std::find(files->pheno_names.begin(), files->pheno_names.end(), time_name);\n    time_index = std::distance(files->pheno_names.begin(), it_time);\n    ph_time = pheno_data->phenotypes_raw.col(time_index);\n    // find event column index\n    std::vector<std::string>::iterator it_event = std::find(files->pheno_names.begin(), files->pheno_names.end(), event_name);\n    event_index = std::distance(files->pheno_names.begin(), it_event);\n    ph_event = pheno_data->phenotypes_raw.col(event_index);\n\n    params->pheno_pass(event_index) = false;\n    \n    sout << \"   -on phenotype \" << time_name << \"...\" << flush;\n\n    auto ts1 = std::chrono::high_resolution_clock::now();\n    ph_eff = params->write_l0_pred ? 0 : time_index;\n    l0_idx = params->t2e_event_l0 ? event_index : time_index;\n    if(params->write_l0_pred)\n      read_l0(l0_idx, ph_eff, files, params, l1, sout);\n\n    MapArXb mask (pheno_data->masked_indivs.col(time_index).data(), pheno_data->masked_indivs.rows());\n\n    // find max lambda for each t2e trait\n    survival_data survivalNullData;\n    survivalNullData.setup(ph_time, ph_event, mask, true);\n    // initialize at lambda 0, to find lambda_max\n    cox_ridge coxRidge_null_lamb0(survivalNullData, l1->test_mat_conc[ph_eff], m_ests->offset_nullreg(all, time_index), mask, 0, params->niter_max, params->niter_max_line_search, params->numtol_cox);\n    coxRidge_null_lamb0.coxGrad(survivalNullData);\n    Eigen::VectorXd gradient = coxRidge_null_lamb0.get_gradient();\n    double lambda_max = getCoxLambdaMax(l1->test_mat_conc[ph_eff], gradient);\n    pheno_data->cox_max_tau[time_index] = lambda_max;\n\n    check_l0(time_index, ph_eff, params, l1, pheno_data, sout);\n\n    for(int i = 0; i < params->cv_folds; ++i ) {\n      ArrayXb fold_train_mask = (l1->fold_id[time_index].array() != i) && mask;\n      ArrayXb fold_test_mask = (l1->fold_id[time_index].array() == i) && mask;\n      \n      survival_data survivalData_fold;\n      survivalData_fold.setup(ph_time, ph_event, fold_train_mask, true);\n      cox_ridge_path coxRidgePath_fold(survivalData_fold, l1->test_mat_conc[ph_eff], m_ests->offset_nullreg.col(time_index), fold_train_mask, params->n_ridge_l1, 1e-4, params->tau[time_index], params->niter_max_ridge, params->niter_max_line_search_ridge, params->l1_ridge_tol, true);\n      coxRidgePath_fold.fit(survivalData_fold, l1->test_mat_conc[ph_eff], m_ests->offset_nullreg.col(time_index), fold_train_mask);\n\n      if (!coxRidgePath_fold.converge.all()) {\n        l1->pheno_l1_not_converged(time_index) = true;\n      }\n      l1->beta_hat_level_1[time_index][i] = coxRidgePath_fold.beta_mx;\n\n      // prediction (eta), and compute deviance on test set\n      survival_data survivalData_test;\n      survivalData_test.setup(ph_time, ph_event, fold_test_mask, true);\n      for (int l = 0; l < params->tau[time_index].size(); ++l) {\n        cox_ridge coxRidge_test(survivalData_test, l1->test_mat_conc[ph_eff], m_ests->offset_nullreg.col(time_index), fold_test_mask, params->tau[time_index](l), params->niter_max_ridge, params->niter_max_line_search_ridge, params->l1_ridge_tol, false, coxRidgePath_fold.beta_mx.col(l));\n        l1->cumsum_values[5](time_index, l) += coxRidge_test.get_null_deviance();\n      }\n    }\n    sout << \"done\";\n    auto ts2 = std::chrono::high_resolution_clock::now();\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1);\n    sout << \" (\" << duration.count() << \"ms) \"<< endl;\n  }\n  sout << endl;\n}\n"
  },
  {
    "path": "src/Step1_Models.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef MODELS_H\n#define MODELS_H\n\n#define ETAMINTHR -30.0\n#define ETAMAXTHR 30.0\n\nstruct ests {\n\n  Eigen::MatrixXd offset_nullreg;\n  Eigen::MatrixXd blups, ltco_prs;\n  Eigen::MatrixXd Gamma_sqrt, Gamma_sqrt_mask, Y_hat_p;\n  std::vector<Eigen::MatrixXd> X_Gamma;\n  Eigen::MatrixXd bhat_start; // for interaction tests\n  std::vector<cox_mle> cox_MLE_NULL;\n  std::vector<survival_data> survival_data_pheno;\n\n};\n\nstruct ridgel0 {\n\n  Eigen::MatrixXd GGt;\n  Eigen::MatrixXd GTY;\n  std::vector<Eigen::MatrixXd> G_folds, GtY; // storage for folds at levle 0\n  Eigen::MatrixXd GGt_eig_vec, GGt_eig_val;\n  Eigen::MatrixXd Wmat, ymat_res;\n  MatrixXb picked_top_snp;\n  bool subset_l0_snps_gmat = false;\n  Eigen::ArrayXi nspns_picked_block, nspns_picked, indices_gmat_keep;\n};\n\nstruct ridgel1 {\n\n  std::vector<Eigen::MatrixXd> X_folds, XtY; // storage for folds at level 1\n  std::vector<std::vector<Eigen::MatrixXd>> pred_mat, test_mat;\n  std::vector<Eigen::MatrixXd> test_mat_conc;\n  std::vector<std::vector<Eigen::MatrixXd>> pred_pheno, test_pheno;\n  std::vector<std::vector<Eigen::MatrixXd>> pred_pheno_raw, test_pheno_raw;\n  std::vector<std::vector<Eigen::MatrixXd>> pred_offset, test_offset;\n  std::vector<Eigen::VectorXi> fold_id; //cox cv fold id\n  std::vector<Eigen::MatrixXd> cumsum_values, cumsum_values_full; // storage of values to compute rsq and values [Sx, Sy, Sx2, Sy2, Sxy]\n  std::vector<std::vector<Eigen::MatrixXd>> beta_hat_level_1;\n  ArrayXb pheno_l1_not_converged;\n  MatrixXb l0_colkeep;\n  Eigen::MatrixXd l0_pv_block;\n  Eigen::ArrayXi chrom_block, chrom_map_ndiff;\n  Eigen::ArrayXd ridge_param_mult;\n  Eigen::MatrixXd beta_snp_step1; // MxR\n  std::vector<Eigen::MatrixXd> top_snp_pgs;\n};\n\n\nvoid fit_null_logistic(bool const&,const int&,struct param*,struct phenodt*,struct ests*,struct in_files*,mstream&,bool const& save_betas = false);\nbool fit_logistic(const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,struct param const*,mstream&,bool const&,double const& numtol = 1e-8);\ndouble get_logist_dev(const Eigen::Ref<const Eigen::ArrayXd>& Y, const Eigen::Ref<const Eigen::ArrayXd>& pi, const Eigen::Ref<const ArrayXb>& mask);\n\nvoid fit_null_poisson(const int&,struct param*,struct phenodt*,struct ests*,struct in_files*,mstream&,bool const& save_betas = false);\nbool fit_poisson(const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,struct param const*,mstream&);\ndouble get_poisson_dev(const Eigen::Ref<const Eigen::ArrayXd>& Y, const Eigen::Ref<const Eigen::ArrayXd>& pi, const Eigen::Ref<const ArrayXb>& mask);\n\nvoid fit_null_cox(bool const&, const int&, struct param*, struct phenodt*, struct ests*, struct in_files*, mstream&, bool const& save_betas = false);\ndouble getCoxLambdaMax(const Eigen::MatrixXd&, const Eigen::VectorXd&);\n\nvoid ridge_level_0(const int&,struct in_files*,struct param*,struct filter*,struct ests*,struct geno_block*,struct phenodt*,std::vector<snp>&,struct ridgel0*,struct ridgel1*,std::vector<MatrixXb>&,mstream&);\nvoid ridge_level_0_loocv(const int,struct in_files*,struct param*,struct filter*,struct ests*,struct geno_block*,struct phenodt*,std::vector<snp>&,struct ridgel0*,struct ridgel1*,mstream&);\nvoid write_l0_file(std::ofstream*,Eigen::MatrixXd&,mstream&);\n\nvoid set_mem_l1(struct in_files*,struct param*,struct filter*,struct ests*,struct geno_block*,struct phenodt*,struct ridgel1*,std::vector<MatrixXb>&,mstream&);\nvoid ridge_level_1(struct in_files*,struct param*,struct phenodt*,struct ridgel1*,mstream&);\nvoid ridge_level_1_loocv(struct in_files*,struct param*,struct phenodt*,struct ridgel1*,mstream&);\n\nvoid ridge_logistic_level_1(struct in_files*,struct param*,struct phenodt*,struct ridgel1*,std::vector<MatrixXb>&,mstream&);\nvoid ridge_logistic_level_1_loocv(struct in_files*,struct param*,struct phenodt*,struct ests*,struct ridgel1*,mstream&);\nbool run_log_ridge_loocv(const double&,const Eigen::Ref<const Eigen::ArrayXd>&,const int&,const int&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct param*,mstream&);\nvoid run_log_ridge_loocv_adam(const int&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct param*,mstream&);\n\nvoid ridge_poisson_level_1(struct in_files*,struct param*,struct phenodt*,struct ridgel1*,std::vector<MatrixXb>&,mstream&);\nvoid ridge_poisson_level_1_loocv(struct in_files*,struct param*,struct phenodt*,struct ests*,struct ridgel1*,mstream&);\nbool run_ct_ridge_loocv(const double&,const Eigen::Ref<const Eigen::ArrayXd>&,const int&,const int&,Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::ArrayXd>&,Eigen::Ref<Eigen::MatrixXd>,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,struct param*,mstream&);\n\nvoid ridge_cox_level_1(struct in_files*, struct param*, struct phenodt*, struct ridgel1*, struct ests*, mstream&);\n\nvoid get_wvec(Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const ArrayXb>&);\nbool get_wvec(Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const ArrayXb>&,const double&);\nvoid get_pvec(Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,double const&);\nvoid get_pvec(Eigen::ArrayXd&,Eigen::ArrayXd&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::VectorXd>&,double const&);\nvoid get_pvec(Eigen::ArrayXd&,const Eigen::Ref<const Eigen::ArrayXd>&,double const&);\nvoid get_pvec_poisson(Eigen::ArrayXd&,Eigen::ArrayXd&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,double const&);\ndouble compute_log_lik_bern(const double&,const double&);\ndouble compute_log_lik_poisson(const double&,const double&);\ndouble y_log_ypi(const double&,const double&);\ndouble get_deviance_logistic(const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\n\nvoid test_assoc_block(int const&,int const&,struct ridgel0&,struct ridgel1&,struct geno_block*,struct phenodt*,snp const*,struct param const&,mstream&);\nvoid apply_iter_cond(int const&,int const&,int const&,struct ridgel0&,struct ridgel1&,struct geno_block*,snp const*,struct param const&);\nvoid read_l0(int const&,int const&,struct in_files*,struct param*,struct ridgel1*,mstream&);\nvoid read_l0_chunk(int const&,int const&,int const&,int const&,const std::string&,struct param*,struct ridgel1*,mstream&);\nvoid check_l0(int const&,int const&,struct param*,struct ridgel1*,struct phenodt const*,mstream&,bool const& silent_mode = false);\n\n\nuint64 getSize(std::string const& fname);\n#endif\n\n"
  },
  {
    "path": "src/Step2_Models.cpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#include \"Regenie.hpp\"\n#include \"Files.hpp\"\n#include \"Geno.hpp\"\n#include \"Joint_Tests.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_score.hpp\"\n#include \"cox_firth.hpp\"\n#include \"Step1_Models.hpp\"\n#include \"Step2_Models.hpp\"\n#include \"HLM.hpp\"\n#include \"Pheno.hpp\"\n#include \"MultiTrait_Tests.hpp\"\n#include \"Ordinal.hpp\"\n#include \"Masks.hpp\"\n#include \"Data.hpp\"\n#include \"MCC.hpp\"\n\nusing namespace std;\nusing namespace Eigen;\nusing namespace boost;\nusing boost::math::normal;\nusing boost::math::chi_squared;\n\n\nvoid blup_read_chr(bool const& silent, int const& chrom, struct ests& m_ests, struct in_files& files, struct filter const& filters, struct phenodt const& pheno_data, struct param& params, mstream& sout) {\n\n  string line, filename, tmp_pheno;\n  std::vector< string > id_strings, tmp_str_vec ;\n  double in_blup;\n  uint32_t indiv_index;\n  Files blupf;\n\n  // skip reading if specified by user or if PRS is given (same for all chromosomes)\n  if( params.use_prs || params.skip_blups ) return;\n\n  m_ests.blups = MatrixXd::Zero(params.n_samples, params.n_pheno);\n\n  if(!silent) sout << \"   -reading loco predictions for the chromosome...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n\n  // read blup file for each phenotype\n  for(int ph = 0; ph < params.n_pheno; ph++) {\n    if( !params.pheno_pass(ph) ) continue;\n\n    filename = files.blup_files[ files.pheno_names[ph] ];\n    ArrayXb read_indiv = ArrayXb::Constant(params.n_samples, false);\n    blupf.openForRead(filename, sout);\n\n    // check header\n    blupf.readLine(line);\n    id_strings = string_split(line,\"\\t \");\n    if( id_strings[0] != \"FID_IID\") \n      throw \"header of blup file must start with FID_IID.\";\n\n    // skip to chr\n    blupf.ignoreLines(chrom-1);\n\n    blupf.readLine(line);\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    // check number of entries is same as in header\n    if(tmp_str_vec.size() != id_strings.size()) \n      throw \"blup file for phenotype '\" + files.pheno_names[ph] + \"' has different number of entries on line \" + to_string( chrom + 1 ) + \" compared to the header (=\" + to_string( tmp_str_vec.size() ) + \" vs \" + to_string( id_strings.size() ) + \").\";\n\n    // check starts with chromosome number\n    if(chrStrToInt(tmp_str_vec[0], params.nChrom) != chrom) \n      throw \"blup file for phenotype '\" + files.pheno_names[ph] + \"' starts with `\" +  tmp_str_vec[0]  + \"`\"\n        + \"instead of chromosome number=\" + to_string( chrom ) + \".\";\n\n    // read blup data\n    for( size_t filecol = 1; filecol < id_strings.size(); filecol++ ) {\n\n      // ignore sample if it is not in genotype data\n      if (!in_map(id_strings[filecol], params.FID_IID_to_ind)) continue;\n      indiv_index = params.FID_IID_to_ind[id_strings[filecol]];\n\n      // ignore sample if it is not included in analysis\n      if(!filters.ind_in_analysis(indiv_index)) continue;\n\n      // ignore sample if it is masked for the trait (prs will be 0)\n      if(!pheno_data.masked_indivs(indiv_index,ph)) continue;\n\n      // check if duplicate\n      if( !read_indiv(indiv_index) )\n        read_indiv(indiv_index) = true;\n      else \n        throw \"individual appears more than once in blup file [\" + filename + \"]: FID_IID=\" + id_strings[filecol];\n\n      in_blup = convertDouble( tmp_str_vec[filecol], &params, sout);\n\n      // if blup is NA then individual must be ignored in analysis for the phenotype (ie mask = 0)\n      if (in_blup == params.missing_value_double)\n        throw \"individual has missing predictions (FID_IID=\" + id_strings[filecol] + \";chr=\" + to_string( chrom ) + \";phenotype=\" + files.pheno_names[ph] + \n          \"). Either ignore these individuals using option '--remove', or skip reading predictions with option '--ignore-pred'.\\n\" + params.err_help ;\n      else if(params.w_ltco && (chrom != params.ltco_chr)) // use ltco\n        m_ests.blups(indiv_index, ph) = in_blup - m_ests.ltco_prs(indiv_index, ph);\n      else // loco\n        m_ests.blups(indiv_index, ph) = in_blup;\n    }\n\n    // force all non-masked samples to have loco predictions\n    //   -> this should not occur as masking of absent samples is done in blup_read() function\n    if( (pheno_data.masked_indivs.col(ph).array() && read_indiv).count() < pheno_data.masked_indivs.col(ph).count() )\n      throw \"all samples included in the analysis (for phenotype \" +\n        files.pheno_names[ph] + \") must have LOCO predictions in file : \" + filename;\n\n    //cerr << m_ests.blups.col(ph).head(5)<<endl;\n\n    blupf.closeFile();\n  }\n\n  if(silent) return;\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\n\n/*\n * // tried this way for step 2 but it is slower than per SNP analysis\n// marginal score test done for all variants/traits\nvoid compute_score(vector<uint64> const& indices, int const& chrom, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, const Ref<const RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, vector<variant_block>& all_snps_info, vector<snp> const& snpinfo, struct ests const& m_ests, struct f_ests& fest, struct in_files const& files, mstream& sout){\n\n  if(params.trait_mode)\n    throw \"not for nonQTs\";//compute_score_bt();\n  else\n    compute_score_qt(indices, chrom, test_string, model_type, yres, p_sd_yres, params, pheno_data, gblock, all_snps_info, snpinfo, files);\n\n}\n\n\nvoid compute_score_qt(vector<uint64> const& indices, int const& chrom, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, const Ref<const RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, vector<variant_block>& all_snps_info, vector<snp> const& snpinfo, struct in_files const& files){\n\n  normal nd(0,1);\n  double zcrit = quantile(complement(nd, .025));\n  MatrixXd stats, bhat, scale_fac_pheno;\n\n  if( params.strict_mode ) {\n\n    gblock.Gmat.array().colwise() *= pheno_data.masked_indivs.col(0).array().cast<double>();\n    stats = yres.transpose() * gblock.Gmat; // PxM\n    stats.array() /= sqrt( params.n_analyzed - params.ncov );\n    // estimate\n    bhat = (stats.array().colwise() * (pheno_data.scale_Y.array() * p_sd_yres.array()).matrix().transpose().array()).matrix() / sqrt(params.n_analyzed - params.ncov); // need to divide by scale_fac for G block_info->scale_fac\n\n  } else {\n\n    // compute GtG for each phenotype (different missing patterns)\n    scale_fac_pheno = pheno_data.masked_indivs.cast<double>().transpose() * gblock.Gmat.array().square().matrix(); // PxM, each element is GtG for each markerxtrait respecting missingness pattern\n    stats = ((yres.transpose() * gblock.Gmat).array() / scale_fac_pheno.array().sqrt()).matrix(); // PxM\n    // estimate\n    bhat = ((stats.array().colwise() * (pheno_data.scale_Y.array() * p_sd_yres.array()).matrix().transpose().array()) / scale_fac_pheno.array().sqrt()).matrix(); // need to divide by scale_fac for G block_info->scale_fac\n\n  }\n  //cerr << stats.block(0,0,5,1).array().square() << endl;\n\n  for(size_t isnp = 0; isnp < indices.size(); isnp++){\n\n    variant_block* block_info = &(all_snps_info[isnp]);\n\n    string tmpstr; // for sum stats\n    if(!params.htp_out) tmpstr = print_sum_stats_head(indices[isnp], snpinfo);\n\n    // beta\n    block_info->bhat = bhat.col(isnp).array() / block_info->scale_fac;\n    // SE\n    block_info->se_b = block_info->bhat / stats.col(isnp).array();\n\n    // get test statistic\n    block_info->chisq_val = stats.col(isnp).array().square();\n\n    for( int i = 0; i < params.n_pheno; ++i ) {\n\n      if( block_info->ignored_trait(i) ) \n        continue;\n\n      // get  pvalue\n      get_logp(block_info->pval_log(i), block_info->chisq_val(i));\n\n      if(!params.p_joint_only)\n        block_info->sum_stats[i] = print_sum_stats_line(indices[isnp], i, tmpstr, test_string, model_type, block_info, snpinfo, files, params);\n\n    }\n  }\n\n}\n*/\n\n// marginal score test for each snp\nvoid compute_score(int const& isnp, int const& snp_index, int const& chrom, int const& thread_num, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, const Ref<const RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, variant_block* block_info, vector<snp> const& snpinfo, struct ests const& m_ests, struct f_ests& fest, struct in_files& files, mstream& sout){\n\n  if(params.trait_mode==1)\n    compute_score_bt(isnp, snp_index, chrom, thread_num, test_string, model_type, yres, params, pheno_data, gblock, block_info, snpinfo, m_ests, fest, files, sout);\n  else if(params.trait_mode==2)\n    compute_score_ct(isnp, snp_index, chrom, thread_num, test_string, model_type, yres, params, pheno_data, gblock, block_info, snpinfo, m_ests, fest, files, sout);\n  else if(params.trait_mode==3)\n    compute_score_cox(isnp, snp_index, chrom, thread_num, test_string, model_type, params, pheno_data, gblock, block_info, snpinfo, m_ests, fest, files, sout);\n  else if(params.trait_mode==0) {\n    if(params.mcc_test) {\n      compute_score_qt_mcc(isnp, snp_index, thread_num, test_string, model_type, yres, p_sd_yres, params, pheno_data, gblock, block_info, snpinfo, files, sout);\n    } else {\n      compute_score_qt(isnp, snp_index, thread_num, test_string, model_type, yres, p_sd_yres, params, pheno_data, gblock, block_info, snpinfo, files, sout);\n    }\n  }\n}\n\n// MCC test stat for QT \nvoid compute_score_qt_mcc(int const& isnp, int const& snp_index, int const& thread_num, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, const Ref<const RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, variant_block* block_info, vector<snp> const& snpinfo, struct in_files& files, mstream& sout){\n\n  double gsc = block_info->flipped ? (4 * params.n_samples + block_info->scale_fac) : block_info->scale_fac;\n  string tmpstr; // for sum stats\n  MapArXd Geno (gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n  data_thread* dt_thr = &(gblock.thread_data[thread_num]);\n\n  if( params.strict_mode ) {\n    double n_sq = sqrt( params.n_analyzed - params.ncov_analyzed );\n    if(params.skip_blups && dt_thr->is_sparse) // Gsparse is on raw scale (must have yres centered)\n      dt_thr->stats = (yres.transpose() * dt_thr->Gsparse.cwiseProduct(pheno_data.masked_indivs.col(0).cast<double>()) / gsc) / n_sq;\n    else\n      dt_thr->stats = (yres.transpose() * (Geno * pheno_data.masked_indivs.col(0).cast<double>().array()).matrix()) / n_sq;\n\n    if(params.htp_out)\n      dt_thr->scores = dt_thr->stats * n_sq * gsc;\n\n    // estimate\n    dt_thr->bhat = dt_thr->stats * ( pheno_data.scale_Y.array() * p_sd_yres.array()).matrix().transpose().array() / ( n_sq * gsc );\n  } else {\n    // compute GtG for each phenotype (different missing patterns)\n    dt_thr->scale_fac_pheno = pheno_data.masked_indivs.transpose().cast<double>() * Geno.square().matrix();\n    dt_thr->stats = (yres.transpose() * Geno.matrix()).array() / dt_thr->scale_fac_pheno.sqrt();\n\n    if(params.htp_out)\n      dt_thr->scores = dt_thr->stats * dt_thr->scale_fac_pheno.sqrt() * gsc;\n\n    // estimate\n    dt_thr->bhat = dt_thr->stats * ( pheno_data.scale_Y.array() * p_sd_yres.array() ).matrix().transpose().array() / ( sqrt(dt_thr->scale_fac_pheno) * gsc );\n  }\n\n  // SE\n  dt_thr->se_b = dt_thr->bhat / dt_thr->stats;\n\n  // get test statistic\n  dt_thr->chisq_val = dt_thr->stats.square();\n\n  // (1) MCC if mcc_apply_thr == false; (2) Score -> MCC if Pval(Score) < mcc_thr\n  MCC mcc;\n  boost::math::chi_squared chisq(1);\n  double chisq_val_adj;\n\n  if(!params.mcc_apply_thr) {\n    // (1) only MCC\n    mcc.setup_y(pheno_data.masked_indivs, yres, params.ncov_analyzed);\n    MCCResults mcc_results = mcc.run(Geno);\n    // store MCC results into dt_thr\n    for( int i = 0; i < params.n_pheno; ++i ) {\n      if(mcc_results.Skip(i, 0)) {\n        dt_thr->pval_log(i) = -1;\n        block_info->test_fail(i) = true;\n      } else {\n        dt_thr->pval_log(i) = -log10(mcc_results.Pval(i, 0));\n        // adjust SE\n        chisq_val_adj = boost::math::quantile(boost::math::complement(chisq, mcc_results.Pval(i, 0)));\n        dt_thr->se_b(i) *= sqrt(dt_thr->chisq_val(i) / chisq_val_adj);\n      }\n    }\n  } else {\n    // (2) Score -> MCC\n    for( int i = 0; i < params.n_pheno; ++i ) {\n      get_logp(dt_thr->pval_log(i), dt_thr->chisq_val(i));\n      // check for skewness of phenotype i\n      if(dt_thr->pval_log(i) > params.mcc_thr_nlog10 && pheno_data.mcc_Y[i]) {\n        mcc.setup_y(pheno_data.masked_indivs.col(i), yres.col(i), params.ncov_analyzed);\n        MCCResults mcc_results_i = mcc.run(Geno);\n        if(mcc_results_i.Skip(0, 0)) {\n          dt_thr->pval_log(i) = -1;\n          block_info->test_fail(i) = true;\n        } else {\n          dt_thr->pval_log(i) = -log10(mcc_results_i.Pval(0, 0));\n          // adjust SE\n          chisq_val_adj = boost::math::quantile(boost::math::complement(chisq, mcc_results_i.Pval(0, 0)));\n          dt_thr->se_b(i) *= sqrt(dt_thr->chisq_val(i) / chisq_val_adj);\n        }\n      }\n    }\n  }\n\n  if(!params.htp_out) tmpstr = print_sum_stats_head(snp_index, snpinfo);\n\n  for( int i = 0; i < params.n_pheno; ++i ) {\n\n    if( !params.pheno_pass(i) || block_info->ignored_trait(i) ) {\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n    if(block_info->flipped) dt_thr->bhat(i) *= -1;\n\n    // get MCC pvalue\n    /* get_logp(dt_thr->pval_log(i), dt_thr->chisq_val(i)); */\n    /* if(mcc_results.Skip(i, 0)) { */\n    /*   dt_thr->pval_log(i) = -1; */\n    /*   block_info->test_fail(i) = true; */\n    /* } else { */\n    /*   dt_thr->pval_log(i) = -log10(mcc_results.Pval(i, 0)); */\n    /* } */\n\n    if(!params.p_joint_only)\n      block_info->sum_stats[i].append( print_sum_stats_line(snp_index, i, tmpstr, test_string, model_type, block_info, dt_thr, snpinfo, files, params) );\n\n  }\n\n}\n// score test stat for QT\nvoid compute_score_qt(int const& isnp, int const& snp_index, int const& thread_num, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, const Ref<const RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, variant_block* block_info, vector<snp> const& snpinfo, struct in_files& files, mstream& sout){\n\n  bool run_full_test = true; // disable this for QTs // !params.skip_cov_res;\n  double denum = 0, gsc = block_info->flipped ? (4 * params.n_samples + block_info->scale_fac) : block_info->scale_fac;\n  string tmpstr; // for sum stats\n  ArrayXd num, denum_arr;\n  MapArXd Geno (gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n  data_thread* dt_thr = &(gblock.thread_data[thread_num]);\n\n  if( !run_full_test ) { // only a single trait (i.e. strict mode) -- covariates are not residualized from G\n    if(dt_thr->is_sparse){\n      num = yres.transpose() * dt_thr->Gsparse;\n      denum = dt_thr->Gsparse.squaredNorm();\n    } else {\n      num = (yres.transpose() * Geno.matrix()).array() * gsc;\n      denum = gsc * gsc * Geno.square().sum(); \n    }\n    dt_thr->stats = num / sqrt(denum);\n\n    // if stats is above threshold, project out covariates and run the full model\n    if(fabs(dt_thr->stats(0)) > params.z_thr) {\n      run_full_test = true;\n      if(!dt_thr->is_sparse) {\n        residualize_geno(pheno_data.new_cov, gblock.Gmat.col(isnp), block_info, params);\n        gsc = block_info->flipped ? (4 * params.n_samples + block_info->scale_fac) : block_info->scale_fac;\n      }\n    } else {\n      if(params.htp_out) {\n        dt_thr->scores = num;\n        dt_thr->skat_var = denum;\n      }\n      dt_thr->bhat = dt_thr->stats / sqrt(denum) * pheno_data.scf_sv;\n    }\n  }\n\n  if( run_full_test ){\n    if( params.strict_mode ) {\n\n      if(dt_thr->is_sparse){\n        ArrayXd XtG = pheno_data.new_cov.transpose() * dt_thr->Gsparse; // k x 1\n        num = yres.transpose() * dt_thr->Gsparse - pheno_data.YtX * XtG.matrix();\n        denum = dt_thr->Gsparse.squaredNorm() - XtG.square().sum();\n      } else {\n        num = (yres.transpose() * Geno.matrix()).array() * gsc;\n        denum = gsc * gsc * (params.n_analyzed - params.ncov_analyzed); \n      }\n\n      dt_thr->stats = num / sqrt(denum);\n      if(params.htp_out) {\n        dt_thr->scores = num;\n        dt_thr->skat_var = denum;\n      }\n\n      // estimate\n      dt_thr->bhat = dt_thr->stats / sqrt(denum) * pheno_data.scf_sv;\n\n    } else {\n\n      // compute GtG for each phenotype (different missing patterns)\n      if(dt_thr->is_sparse){\n        VectorXd XtG = pheno_data.new_cov.transpose() * dt_thr->Gsparse; // k x 1 - do this for all traits (Geno is only residualized once across traits)\n        num = yres.transpose() * dt_thr->Gsparse - pheno_data.YtX * XtG; // P x 1 \n        double XtG_ss = XtG.squaredNorm();\n        denum_arr.resize(params.n_pheno);\n        for (int ph = 0; ph < params.n_pheno; ph++) {\n          SpVec Gm = dt_thr->Gsparse.cwiseProduct(pheno_data.masked_indivs.col(ph).cast<double>()); // N x 1\n          VectorXd XtGm = pheno_data.new_cov.transpose() * Gm;\n          denum_arr(ph) = Gm.squaredNorm() - 2 * XtGm.dot(XtG) + XtG_ss; // last term is an approximation assuming X'X is same for all traits (=I)\n          //VectorXd vm = (pheno_data.new_cov * XtG).cwiseProduct(pheno_data.masked_indivs.col(ph).cast<double>());\n          //denum_arr(ph) = Gm.squaredNorm() - 2 * XtGm.dot(XtG) + vm.squaredNorm(); // correct callculation but more expensive\n        }\n      } else {\n        num = (yres.transpose() * Geno.matrix()).array() * gsc;\n        denum_arr = gsc * gsc * (pheno_data.masked_indivs.transpose().cast<double>() * Geno.square().matrix()); \n      }\n\n      dt_thr->stats = num / denum_arr.sqrt();\n      if(params.htp_out) {\n        dt_thr->scores = num;\n        dt_thr->skat_var = denum_arr;\n      }\n\n      // estimate\n      dt_thr->bhat = dt_thr->stats * pheno_data.scf_sv / denum_arr.sqrt();\n\n    }\n  }\n\n  // correction\n  if(params.mse_full) { \n    unsigned int nk = params.n_analyzed - params.ncov_analyzed;\n    ArrayXd adj_factor = (nk - dt_thr->stats.square()) / (nk - 1);\n    if(params.htp_out) dt_thr->skat_var *= adj_factor;\n    dt_thr->stats /= adj_factor.sqrt();\n  }\n\n  // SE\n  dt_thr->se_b = dt_thr->bhat / dt_thr->stats;\n\n  // get test statistic\n  dt_thr->chisq_val = dt_thr->stats.square();\n\n  if(!params.htp_out) tmpstr = print_sum_stats_head(snp_index, snpinfo);\n\n  for( int i = 0; i < params.n_pheno; ++i ) {\n\n    if( !params.pheno_pass(i) || block_info->ignored_trait(i) ) {\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n    if(block_info->flipped) {\n      dt_thr->bhat(i) *= -1;\n      if (params.htp_out) dt_thr->scores(i) *= -1;\n    }\n\n    // get pvalue\n    if(params.t_test) get_logp_ttest(dt_thr->pval_log(i), dt_thr->stats(i), params.n_analyzed - params.ncov_analyzed - 1);\n    else get_logp(dt_thr->pval_log(i), dt_thr->chisq_val(i));\n\n    if(!params.p_joint_only)\n      block_info->sum_stats[i].append( print_sum_stats_line(snp_index, i, tmpstr, test_string, model_type, block_info, dt_thr, snpinfo, files, params) );\n\n  }\n\n}\n\nvoid compute_score_bt(int const& isnp, int const& snp_index, int const& chrom, int const& thread_num, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, variant_block* block_info, vector<snp> const& snpinfo, struct ests const& m_ests, struct f_ests& fest, struct in_files& files, mstream& sout){\n\n  string tmpstr; \n  VectorXd GW, XtWG;\n  SpVec GWs;\n  data_thread* dt_thr = &(gblock.thread_data[thread_num]);\n\n  // header snp info for sum stats\n  if(!params.htp_out) tmpstr = print_sum_stats_head(snp_index, snpinfo);\n\n  // genotype for marker\n  MapArXd Geno (gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n\n  for( int i = 0; i < params.n_pheno; ++i ) {\n\n    if( !params.pheno_pass(i) || block_info->ignored_trait(i) ){\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n\n    MapArXb mask (pheno_data.masked_indivs.col(i).data(), params.n_samples, 1);\n    MapcMatXd XWsqrt (m_ests.X_Gamma[i].data(), params.n_samples, m_ests.X_Gamma[i].cols());\n\n    // project out covariates from G\n    if(dt_thr->is_sparse) {\n      GWs = dt_thr->Gsparse.cwiseProduct(m_ests.Gamma_sqrt_mask.col(i));\n      XtWG = XWsqrt.transpose() * GWs;\n    } else {\n      GW = (Geno * m_ests.Gamma_sqrt_mask.col(i).array()).matrix();\n      dt_thr->Gres = GW - XWsqrt * (XWsqrt.transpose() * GW);\n    }\n\n    // denominator\n    if(dt_thr->is_sparse) \n      dt_thr->denum(i) = GWs.squaredNorm() - XtWG.squaredNorm();\n    else\n      dt_thr->denum(i) = dt_thr->Gres.squaredNorm();\n\n    double sqrt_denum = sqrt( dt_thr->denum(i) );\n    if( sqrt_denum < params.numtol ){\n      block_info->ignored_trait(i) = true;\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n\n    // score test stat for BT\n    if(dt_thr->is_sparse) \n      dt_thr->stats(i) = GWs.dot(yres.col(i)) / sqrt_denum;\n    else\n      dt_thr->stats(i) = dt_thr->Gres.col(0).dot(yres.col(i)) / sqrt_denum;\n\n    if(params.htp_out) {\n      dt_thr->scores(i) = dt_thr->stats(i) * sqrt_denum;\n      dt_thr->skat_var(i) = dt_thr->denum(i);\n    }\n\n    if(dt_thr->is_sparse && (fabs(dt_thr->stats(i)) > params.z_thr)){ // no need if correction is not applied\n      dt_thr->Gres = -XWsqrt * XtWG;\n      dt_thr->Gres += GWs;\n    }\n    /*\n    if(params.debug) {\n      cerr << \"\\ny:\\n\" << yres.col(i).topRows(2) << endl;\n      cerr << \"\\nscore=\" << dt_thr->stats(i) * sqrt_denum << \" var(score)=\" << dt_thr->denum(i) << endl;\n    }\n    */\n\n    // use firth/spa\n    check_pval_snp(block_info, dt_thr, chrom, i, isnp, pheno_data, gblock, m_ests, fest, params, sout);\n\n    dt_thr->bhat(i) /= block_info->scale_fac;\n    dt_thr->se_b(i) /= block_info->scale_fac;\n    if(block_info->flipped) {\n      dt_thr->bhat(i) *= -1;\n      if (params.htp_out) dt_thr->scores(i) *= -1;\n    }\n\n    // print sum stats\n    if(!params.p_joint_only)\n      block_info->sum_stats[i].append( print_sum_stats_line(snp_index, i, tmpstr, test_string, model_type, block_info, dt_thr, snpinfo, files, params) );\n\n  }\n\n\n}\n\n\n// poisson\nvoid compute_score_ct(int const& isnp, int const& snp_index, int const& chrom, int const& thread_num, string const& test_string, string const& model_type, const Ref<const MatrixXd>& yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, variant_block* block_info, vector<snp> const& snpinfo, struct ests const& m_ests, struct f_ests& fest, struct in_files& files, mstream& sout){\n\n  string tmpstr; \n  MatrixXd GW;\n  SpVec GWs;\n  data_thread* dt_thr = &(gblock.thread_data[thread_num]);\n\n  // header snp info for sum stats\n  if(!params.htp_out) tmpstr = print_sum_stats_head(snp_index, snpinfo);\n\n  // genetype for marker\n  MapArXd Geno (gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n\n  for( int i = 0; i < params.n_pheno; ++i ) {\n\n    if( !params.pheno_pass(i) || block_info->ignored_trait(i) ) {\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n    MapArXb mask (pheno_data.masked_indivs.col(i).data(), params.n_samples, 1);\n    MapcArXd Wsqrt (m_ests.Gamma_sqrt.col(i).data(), params.n_samples, 1);\n    MapcMatXd XWsqrt (m_ests.X_Gamma[i].data(), params.n_samples, m_ests.X_Gamma[i].cols());\n\n    // project out covariates from G\n    if(dt_thr->is_sparse) {\n      GWs = dt_thr->Gsparse.cwiseProduct( (Wsqrt * mask.cast<double>()).matrix() );\n      dt_thr->Gres = -XWsqrt * (XWsqrt.transpose() * GWs);\n      dt_thr->Gres += GWs;\n    } else {\n      GW = (Geno * Wsqrt * mask.cast<double>()).matrix();\n      dt_thr->Gres = GW - XWsqrt * (XWsqrt.transpose() * GW);\n    }\n\n    // denominator\n    dt_thr->denum(i) = dt_thr->Gres.squaredNorm();\n    if( dt_thr->denum(i) < params.numtol ){\n      block_info->ignored_trait(i) = true;\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n    // score test stat for CT\n    if(dt_thr->is_sparse) \n      dt_thr->stats(i) = GWs.dot(yres.col(i)) / sqrt( dt_thr->denum(i) );\n    else\n      dt_thr->stats(i) = dt_thr->Gres.col(0).dot(yres.col(i)) / sqrt( dt_thr->denum(i) );\n\n    if(params.debug) {\n      cerr << \"\\ny:\\n\" << yres.col(i).topRows(2) << endl;\n      cerr << \"\\nGresid:\\n\" << dt_thr->Gres.topRows(2) << endl;\n      if(dt_thr->is_sparse) cerr << \"\\nsum(GW)=\" << GWs.sum() << endl;\n      cerr << \"\\nscore=\" << dt_thr->Gres.col(0).dot(yres.col(i)) << \" var(score)=\" << dt_thr->Gres.squaredNorm() << endl;\n    }\n\n    // apply correction\n    //check_pval_snp(block_info, dt_thr, chrom, i, isnp, pheno_data, gblock, m_ests, fest, params, sout);\n    get_sumstats(false, i, dt_thr);\n\n    dt_thr->bhat(i) /= block_info->scale_fac;\n    dt_thr->se_b(i) /= block_info->scale_fac;\n    if(block_info->flipped) dt_thr->bhat(i) *= -1;\n\n    // print sum stats\n    if(!params.p_joint_only)\n      block_info->sum_stats[i].append( print_sum_stats_line(snp_index, i, tmpstr, test_string, model_type, block_info, dt_thr, snpinfo, files, params) );\n\n  }\n\n\n}\n\nvoid compute_score_cox(int const& isnp, int const& snp_index, int const& chrom, int const& thread_num, string const& test_string, string const& model_type, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, variant_block* block_info, vector<snp> const& snpinfo, struct ests const& m_ests, struct f_ests& fest, struct in_files& files, mstream& sout){\n\n  string tmpstr; \n  data_thread* dt_thr = &(gblock.thread_data[thread_num]);\n\n  Eigen::VectorXd sqrtWG;\n  SpVec sqrtWGs;\n  Eigen::VectorXd RGammaG;\n  Eigen::VectorXd UhalfG;\n  Eigen::VectorXd XtWG;\n  Eigen::VectorXd XtUG;\n  Eigen::VectorXd XtVG;\n  double T;\n\n  // header snp info for sum stats\n  if(!params.htp_out) tmpstr = print_sum_stats_head(snp_index, snpinfo);\n\n  // genotype for marker\n  MapArXd Geno (gblock.Gmat.col(isnp).data(), params.n_samples, 1);\n  \n  for( int i = 0; i < params.n_pheno; ++i ) {\n    if( !params.pheno_pass(i) || block_info->ignored_trait(i) ){\n      if(!params.p_joint_only && !params.split_by_pheno)\n        block_info->sum_stats[i].append( print_na_sumstats(i, 1, tmpstr, test_string, block_info, params) );\n      continue;\n    }\n    MapArXb mask(pheno_data.masked_indivs.col(i).data(), params.n_samples, 1);\n\n    // score stat\n    if (dt_thr->is_sparse) {\n      dt_thr->Gres = dt_thr->Gsparse - m_ests.cox_MLE_NULL[i].X1_X1WX1inv * (dt_thr->Gsparse.transpose() * m_ests.cox_MLE_NULL[i].WX1).transpose();\n    } else {\n      dt_thr->Gres = Geno.matrix() - m_ests.cox_MLE_NULL[i].X1_X1WX1inv * (Geno.matrix().transpose() * m_ests.cox_MLE_NULL[i].WX1).transpose();\n    }\n    T = (dt_thr->Gres.array() * m_ests.cox_MLE_NULL[i].residual.array() * mask.cast<double>()).sum();\n    \n    dt_thr->denum(i) = m_ests.cox_MLE_NULL[i].res_var * (dt_thr->Gres.array()).pow(2).sum();\n    \n    if (params.coxscore_exact) {\n      sqrtWG = dt_thr->Gres.array() * (m_ests.cox_MLE_NULL[i].mu.array().sqrt()) * mask.cast<double>();\n      RGammaG = cumulativeSum_reverse2( m_ests.survival_data_pheno[i].R.transpose() * (m_ests.cox_MLE_NULL[i].w_exp_eta.array() * (m_ests.survival_data_pheno[i].permute_mtx * dt_thr->Gres).array()).matrix());\n      UhalfG = m_ests.cox_MLE_NULL[i].Dhalf.array() * RGammaG.array();\n\n      XtWG = m_ests.cox_MLE_NULL[i].sqrtWX.transpose() * sqrtWG;\n      XtUG = m_ests.cox_MLE_NULL[i].UhalfX.transpose() * UhalfG;\n      XtVG = XtWG - XtUG;\n      dt_thr->denum(i) = sqrtWG.squaredNorm() - UhalfG.squaredNorm() - (XtVG.array() * (m_ests.cox_MLE_NULL[i].cov_inv * XtVG).array()).sum();\n    }\n    dt_thr->stats(i) = T/sqrt(dt_thr->denum(i));\n\n    if(params.htp_out) {\n      dt_thr->scores(i) = dt_thr->stats(i) * sqrt( dt_thr->denum(i) );\n      dt_thr->skat_var(i) = dt_thr->denum(i);\n    }\n\n    // use firth/spa\n    check_pval_snp(block_info, dt_thr, chrom, i, isnp, pheno_data, gblock, m_ests, fest, params, sout);\n\n    dt_thr->bhat(i) /= block_info->scale_fac;\n    dt_thr->se_b(i) /= block_info->scale_fac;\n    if(block_info->flipped) dt_thr->bhat(i) *= -1;\n\n    // print sum stats\n    if(!params.p_joint_only) {\n      block_info->sum_stats[i].append( print_sum_stats_line(snp_index, i, tmpstr, test_string, model_type, block_info, dt_thr, snpinfo, files, params) );\n    }\n  }\n}\n\n// Cox Null firth model\nvoid fit_null_firth_cox(bool const& silent, int const& chrom, struct f_ests* firth_est, struct phenodt* pheno_data, struct ests const* m_ests, struct in_files* files, struct param* params, mstream& sout){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  ArrayXb has_converged = params->pheno_pass; // if null log reg converged\n  IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n\n  if(!silent) sout << \"   -fitting null Firth cox regression on time-to-event phenotypes...\" << flush;\n\n  // fit null firth (in parallel for MT mode)\n#if defined(_OPENMP)\n  if((params->n_pheno>2) && !params->blup_cov) setNbThreads(1); // for < 3, mt in eigen should be similar\n#pragma omp parallel for schedule(dynamic) if((params->n_pheno>2) && !params->blup_cov)\n#endif\n  for( int i = 0; i < params->n_pheno; ++i ) {\n    if( !params->pheno_pass(i) ) continue;\n    if(params->blup_cov) // add step 1 predictions as a covariate (skip multithreading)\n      pheno_data->new_cov.rightCols(1) = m_ests->blups.col(i);\n    \n    Eigen::VectorXd offset;\n    if(params->blup_cov) offset = Eigen::VectorXd::Zero(m_ests->blups.rows()); // if step 1 is covariate\n    else offset = m_ests->blups.col(i).array();\n\n    cox_firth cox_firth_null;\n    cox_firth_null.setup(m_ests->survival_data_pheno[i], pheno_data->new_cov, offset, pheno_data->new_cov.cols(), params->niter_max_firth_null, params->niter_max_line_search, params->numtol_cox, params->numtol_cox_stephalf, params->numtol_beta_cox, params->maxstep_null, !params->cox_nofirth, false, m_ests->cox_MLE_NULL[i].beta);\n    cox_firth_null.fit(m_ests->survival_data_pheno[i], pheno_data->new_cov, offset);\n\n    if( !cox_firth_null.converge ){ // if failed to converge\n      cerr << \"WARNING: Cox regression with Firth correction did not converge. Step-halving tol=\" << params->numtol_cox_stephalf << \"\\n\";\n\n      cerr << \"Retrying with strict convergence criteria: step-halving tol=0.\\n\";\n\n      cox_firth_null.setup(m_ests->survival_data_pheno[i], pheno_data->new_cov, offset, pheno_data->new_cov.cols(), params->niter_max_firth_null, params->niter_max_line_search, params->numtol_cox, 0, params->numtol_beta_cox, params->maxstep_null, !params->cox_nofirth, false, m_ests->cox_MLE_NULL[i].beta);\n      cox_firth_null.fit(m_ests->survival_data_pheno[i], pheno_data->new_cov, offset);\n    }\n\n    if( !cox_firth_null.converge ){ // if failed to converge\n      cerr << \"WARNING: Cox regression with Firth correction did not converge (step-halving tol=0, maximum step size=\" << params->maxstep_null <<\";maximum number of iterations=\" << params->niter_max_firth_null <<\").\";\n\n      cerr << \"Retrying with fallback parameters: (step-halving tol=0, maximum step size=\" << params->maxstep_null/5 <<\";maximum number of iterations=\" << params->niter_max_firth_null*5 <<\";initiate at 0).\\n\";\n\n      cox_firth_null.setup(m_ests->survival_data_pheno[i], pheno_data->new_cov, offset, pheno_data->new_cov.cols(), params->niter_max_firth_null*5, params->niter_max_line_search, params->numtol_cox, 0, params->numtol_beta_cox, params->maxstep_null/5, !params->cox_nofirth, false);\n      cox_firth_null.fit(m_ests->survival_data_pheno[i], pheno_data->new_cov, offset);\n    }\n    has_converged(i) = cox_firth_null.converge;\n    if(!has_converged(i)) continue;\n    firth_est->cov_blup_offset.col(i) = cox_firth_null.eta;\n    firth_est->beta_null_firth.col(i) = cox_firth_null.beta;\n    \n    if(params->write_null_firth)\n      (*firth_est->firth_est_files[i]) << chrom << \" \" << cox_firth_null.beta.transpose().format(Fmt) << endl;\n\n  }\n#if defined(_OPENMP)\n  if((params->n_pheno>2) && !params->blup_cov) setNbThreads(params->threads);\n#endif\n\n  // check if some did not converge\n  if(!has_converged.any()) { //  none passed\n\n    string msg1 = to_string( params->maxstep_null / 5 );\n    string msg2 = to_string( params->niter_max_firth_null * 5 );\n    throw \"Firth penalized cox regression failed to converge for all phenotypes.\"\n      \" Try decreasing the maximum step size using `--maxstep-null` (currently=\" + msg1 +  \") \"\n      \"and increasing the maximum number of iterations using `--maxiter-null` (currently=\" + msg2 + \").\";\n\n  } else if( ((!has_converged) && (params->pheno_pass || params->pheno_fail_nullreg)).any() ) { // some phenotypes failed (at null reg or null firth) - write their names to file\n\n    ArrayXb pheno_flagged = (!has_converged) && (params->pheno_pass || params->pheno_fail_nullreg);\n    Files outf;\n    string failed_file = files->out_file + \"_failedNullFirth_chr\" + to_string(chrom) + \".list\";\n    outf.openForWrite( failed_file, sout);\n    for( int i = 0; i < params->n_pheno; ++i )\n      if(pheno_flagged(i))\n        outf << files->pheno_names[i] << endl;\n    outf.closeFile();\n    sout << \"WARNING: null Firth failed for \" << pheno_flagged.count() << \" phenotypes (list of traits written to '\" << failed_file << \"' and these will be skipped)\\n\";\n    params->pheno_pass = has_converged;\n\n  }\n  if(params->blup_cov)\n    pheno_data->new_cov.rightCols(1).array() = 0;\n\n  if (silent) return;\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n}\n\n// firth cov + test snp\nvoid fit_firth_cox_snp(int const& chrom, int const& ph, int const& isnp, struct param const* params, struct phenodt* pheno_data, struct ests const* m_ests, struct f_ests const* fest, const Ref<const MatrixXd>& Gvec, variant_block* block_info, data_thread* dt_thr, mstream& sout) {\n  // if firth is used, fit based on penalized log-likelihood\n  int col_incl;\n  double lrt;\n  VectorXd beta0;\n  MatrixXd Xmat;\n\n  Xmat = MatrixXd::Zero(params->n_samples, pheno_data->new_cov.cols() + 1); // covariates + tested SNP\n  Xmat << pheno_data->new_cov, Gvec;\n  col_incl = Xmat.cols();\n\n  // null model\n  cox_firth cox_firth_null;\n  cox_firth_null.setup(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph), col_incl-1, params->niter_max_firth, params->niter_max_line_search, params->numtol_cox, params->numtol_cox_stephalf, params->numtol_beta_cox, params->maxstep_null, !params->cox_nofirth, false);\n  cox_firth_null.fit(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph));\n\n  if (!cox_firth_null.converge) {\n    cox_firth_null.setup(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph), col_incl-1, params->niter_max_firth*5, params->niter_max_line_search, params->numtol_cox, 0, params->numtol_beta_cox, params->maxstep/5, !params->cox_nofirth, false);\n    cox_firth_null.fit(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph));\n  }\n\n  if (!cox_firth_null.converge) {\n    if(params->verbose) cerr << \"WARNING: Cox regression with Firth correction null model did not converge!\\n\";\n      block_info->test_fail(ph) = true;\n      return ;\n  }\n\n  // test model\n  cox_firth cox_firth_test;\n  cox_firth_test.setup(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph), col_incl, params->niter_max_firth, params->niter_max_line_search, params->numtol_cox, params->numtol_cox_stephalf, params->numtol_beta_cox, params->maxstep, !params->cox_nofirth, false, cox_firth_null.beta);\n  cox_firth_test.fit(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph));\n\n  if (!cox_firth_test.converge) {\n    cox_firth_test.setup(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph), col_incl, params->niter_max_firth*5, params->niter_max_line_search, params->numtol_cox, 0, params->numtol_beta_cox, params->maxstep/5, !params->cox_nofirth, false);\n    cox_firth_test.fit(m_ests->survival_data_pheno[ph], Xmat, m_ests->blups.col(ph));\n  }\n\n  if (!cox_firth_test.converge) {\n    if(params->verbose) cerr << \"WARNING: Cox regression with Firth correction did not converge!\\n\";\n      block_info->test_fail(ph) = true;\n      return ;\n  }\n\n  dt_thr->bhat(ph) = cox_firth_test.beta.tail(1)(0);\n  if(!params->back_correct_se)\n    dt_thr->se_b(ph) = cox_firth_test.qrsd.inverse().diagonal().array().sqrt().tail(1)(0);\n\n  lrt = 2*(cox_firth_test.loglike.tail(1)(0) - cox_firth_null.loglike.tail(1)(0));\n  if( lrt < 0 ) {\n    block_info->test_fail(ph) = true;\n    return ;\n  }\n  dt_thr->dif_deviance = lrt;\n  return ;\n}\n\n// for approx firth testing step\nvoid fit_firth_cox_snp_fast(int const& chrom, int const& ph, int const& isnp, struct param const* params, struct phenodt* pheno_data, struct ests const* m_ests, struct f_ests const* fest, const Ref<const VectorXd>& Gvec, variant_block* block_info, data_thread* dt_thr, mstream& sout) {\n  // if firth is used, fit based on penalized log-likelihood\n  double lrt;\n\n  // // For rare variants, set entries in Gvec for non-carriers to 0\n  // int mac_thr_sparse = (params->skip_fast_firth ? 0 : 50), i = 0, index_j;\n  // ArrayXi index_carriers;\n  // if(dt_thr->is_sparse && (block_info->mac(ph) < mac_thr_sparse)) {\n  //   index_carriers.resize(dt_thr->Gsparse.nonZeros());\n  //   for (SpVec::InnerIterator it(dt_thr->Gsparse); it; ++it) {\n  //     index_j = it.index();\n  //     // check for small entries in G (eg with imputed data)\n  //     if(mask(index_j) && (it.value() > 1e-4)) index_carriers(i++) = index_j;\n  //   }\n  //   index_carriers.conservativeResize(i);\n  // }\n\n  cox_firth cox_firth_test;\n  cox_firth_test.setup(m_ests->survival_data_pheno[ph], Gvec, fest->cov_blup_offset.col(ph), 1, params->niter_max_firth, params->niter_max_line_search, params->numtol_cox, params->numtol_cox_stephalf, params->numtol_beta_cox, params->maxstep, !params->cox_nofirth, false);\n  cox_firth_test.fit_1(m_ests->survival_data_pheno[ph], Gvec, fest->cov_blup_offset.col(ph));\n\n  if(!cox_firth_test.converge){\n    cox_firth_test.setup(m_ests->survival_data_pheno[ph], Gvec, fest->cov_blup_offset.col(ph), 1, params->niter_max_firth*5, params->niter_max_line_search, params->numtol_cox, 0, params->maxstep/5, !params->cox_nofirth, false);\n    cox_firth_test.fit_1(m_ests->survival_data_pheno[ph], Gvec, fest->cov_blup_offset.col(ph));\n  }\n\n  if(!cox_firth_test.converge){\n    if(params->verbose) cerr << \"WARNING: Cox regression with Firth correction did not converge!\\n\";\n    block_info->test_fail(ph) = true;\n    return ;\n  }\n\n  // compute beta_hat\n  dt_thr->bhat(ph) = cox_firth_test.beta(0);\n  // compute SE based on Hessian for unpenalized LL\n  if(!params->back_correct_se)\n    dt_thr->se_b(ph) = sqrt(1/cox_firth_test.second_der_1);\n\n  lrt = 2*(cox_firth_test.loglike.tail(1)(0) - cox_firth_test.loglike(0));\n  if( lrt < 0 ) {\n    block_info->test_fail(ph) = true;\n    return ;\n  }\n  dt_thr->dif_deviance = lrt;\n  return ;\n}\n\n\n// Firth (currently only used for null approximate firth)\nbool fit_approx_firth_null(int const& chrom, int const& ph, struct phenodt const* pheno_data, struct ests const* m_ests, Ref<ArrayXd> betavec, struct param* params, bool const& save_se) {\n\n  bool success, set_start = true, check_score_inc = true;\n  int col_incl;\n  int maxstep = params->maxstep_null;\n  int niter = params->niter_max_firth_null;\n  double tol = 50*params->numtol;\n  double dev, lrt;\n\n  ArrayXd betaold, se, etavec, pivec, offset;\n\n  MapcArXd Y (pheno_data->phenotypes_raw.col(ph).data(), pheno_data->phenotypes_raw.rows());\n  MapcMatXd Xmat (pheno_data->new_cov.data(), pheno_data->new_cov.rows(), pheno_data->new_cov.cols());\n  MapcArXb mask (pheno_data->masked_indivs.col(ph).data(), pheno_data->masked_indivs.rows());\n  col_incl = Xmat.cols();\n\n  if(params->blup_cov) offset = ArrayXd::Zero(m_ests->blups.rows()); // if step 1 is covariate\n  else offset = m_ests->blups.col(ph).array();\n\n  // with firth approx. => trial 1: use maxstep_null\n  // trial=1+ => start at 0 (update maxstep & niter)\n  // trial=2+ => use fallback options (update maxstep & niter)\n  for(int trial = 0; trial < 4; trial++){\n\n    // starting values\n    if(set_start){\n        if(params->use_null_firth || (trial == 0) ){ // use saved est or those from unpenalized log. reg\n          betaold = betavec.head(Xmat.cols());\n        } else {// set to 0 if null firth failed\n          betaold = 0;\n          //betaold(0) = ( 0.5 + mask.select(Y,0).sum())  / (pheno_data->Neff(ph) + 1);\n          //betaold(0) = log( betaold(0) / (1 - betaold(0) ));\n          // LOCO prediction is offset\n          betaold(0) -= mask.select(offset,0).mean();\n        }\n    }\n    if(trial == 3) {\n      betaold = betavec.head(Xmat.cols()); // try again with original ests\n      check_score_inc = false;\n    }\n\n    success = fit_firth(ph, Y, Xmat, offset, mask, pivec, etavec, betaold, se, col_incl, dev, false, lrt, maxstep, niter, tol, params, check_score_inc);\n\n    if(!params->fix_maxstep_null) { // don't retry with user-given settings\n      if( !success ){ // if failed to converge\n        cerr << \"WARNING: Logistic regression with Firth correction did not converge (maximum step size=\" << maxstep <<\";maximum number of iterations=\" << niter <<\").\";\n\n        // try fitting pseudo-data representation with IRLS\n        double dev0 = 0;\n        if(\n            fit_firth_pseudo(dev0, Y, Xmat, offset, mask, pivec, etavec, betaold, se, col_incl, dev, false, lrt, maxstep, niter, tol, params, check_score_inc)\n          ){\n          success = true;\n          break;\n        }\n\n        if( trial == 1 ){\n          maxstep /= 5;\n          niter *= 5;\n          if(params->debug) cerr << \"Retrying with fallback parameters: (maximum step size=\" << maxstep <<\";maximum number of iterations=\" << niter<<\").\\n\";\n        }\n        if(params->use_adam) set_start = false;\n        continue;\n      }\n    }\n\n    break;\n  }\n\n  // If didn't converge\n  if(!success)\n    return false;\n\n  betavec.head(betaold.size()) = betaold;\n  if(save_se && params->print_cov_betas) { // get se\n    ArrayXd wvec;\n    get_wvec(pivec, wvec, mask);\n    MatrixXd XWsqrt = ( Xmat.array().colwise() * (wvec.sqrt() * mask.cast<double>()) ).matrix();\n    MatrixXd xtx_inv = ( XWsqrt.transpose() * XWsqrt ).colPivHouseholderQr().inverse();\n    params->xtx_inv_diag.col(ph).array() = xtx_inv.diagonal().array().sqrt();\n  }\n  return true;\n\n}\n\n// Approximate null firth model\nvoid fit_null_firth(bool const& silent, int const& chrom, struct f_ests* firth_est, struct phenodt* pheno_data, struct ests const* m_ests, struct in_files* files, struct param* params, mstream& sout){\n\n  auto t1 = std::chrono::high_resolution_clock::now();\n  ArrayXb has_converged = params->pheno_pass; // if null log reg converged\n  IOFormat Fmt(StreamPrecision, DontAlignCols, \" \", \"\\n\", \"\", \"\",\"\",\"\");\n\n  if(!silent && params->firth) sout << \"   -fitting null Firth logistic regression on binary phenotypes...\" << flush;\n\n  // get starting values\n  if(params->use_null_firth) // saved in file\n    get_beta_start_firth(chrom, firth_est, files, params, sout);\n  else // from null log. reg.\n    get_beta_start_firth(firth_est, m_ests);\n\n  // fit null firth (in parallel for MT mode)\n#if defined(_OPENMP)\n  if((params->n_pheno>2) && !params->blup_cov) setNbThreads(1); // for < 3, mt in eigen should be similar\n#pragma omp parallel for schedule(dynamic) if((params->n_pheno>2) && !params->blup_cov)\n#endif\n  for( int i = 0; i < params->n_pheno; ++i ) {\n    if( !params->pheno_pass(i) ) continue;\n\n    if(params->blup_cov) // add step 1 predictions as a covariate (skip multithreading)\n      pheno_data->new_cov.rightCols(1) = m_ests->blups.col(i);\n\n    MapArXd bvec (firth_est->beta_null_firth.col(i).data(), firth_est->beta_null_firth.rows());\n    has_converged(i) = fit_approx_firth_null(chrom, i, pheno_data, m_ests, bvec, params);\n    if(!has_converged(i)) continue; // cannot use break\n\n    if(params->test_mode){\n      firth_est->cov_blup_offset.col(i) = pheno_data->new_cov * bvec.head(pheno_data->new_cov.cols()).matrix(); // store offset used for approx firth\n     if(!params->blup_cov) firth_est->cov_blup_offset.col(i) += m_ests->blups.col(i); // if offset  \n    }\n\n    if(params->write_null_firth)\n      (*firth_est->firth_est_files[i]) << chrom << \" \" << bvec.head(params->ncov).matrix().transpose().format(Fmt) << endl;\n\n  }\n#if defined(_OPENMP)\n  if((params->n_pheno>2) && !params->blup_cov) setNbThreads(params->threads);\n#endif\n\n  // check if some did not converge\n  if(!has_converged.any()) { //  none passed\n\n    string msg1 = to_string( params->maxstep_null / (params->fix_maxstep_null ? 1 : 5) );\n    string msg2 = to_string( params->niter_max_firth_null * (params->fix_maxstep_null ? 1 : 5) );\n    throw \"Firth penalized logistic regression failed to converge for all phenotypes.\"\n      \" Try decreasing the maximum step size using `--maxstep-null` (currently=\" + msg1 +  \") \"\n      \"and increasing the maximum number of iterations using `--maxiter-null` (currently=\" + msg2 + \").\";\n\n  } else if( ((!has_converged) && (params->pheno_pass || params->pheno_fail_nullreg)).any() ) { // some phenotypes failed (at null reg or null firth) - write their names to file\n\n    ArrayXb pheno_flagged = (!has_converged) && (params->pheno_pass || params->pheno_fail_nullreg);\n    Files outf;\n    string failed_file = files->out_file + \"_failedNullFirth_chr\" + to_string(chrom) + \".list\";\n    outf.openForWrite( failed_file, sout);\n    for( int i = 0; i < params->n_pheno; ++i )\n      if(pheno_flagged(i))\n        outf << files->pheno_names[i] << endl;\n    outf.closeFile();\n    sout << \"WARNING: null Firth failed for \" << pheno_flagged.count() << \" phenotypes (list of traits written to '\" << failed_file << \"' and these will be skipped)\\n\";\n    params->pheno_pass = has_converged;\n\n  }\n  if(params->blup_cov)\n    pheno_data->new_cov.rightCols(1).array() = 0;\n\n  if(silent || !params->firth) return;\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n}\n\nvoid fit_firth_logistic_snp(int const& chrom, int const& ph, int const& isnp, bool const& null_fit, struct param const* params, struct phenodt* pheno_data, struct ests const* m_ests, struct f_ests const* fest, const Ref<const MatrixXd>& Gvec, variant_block* block_info, data_thread* dt_thr, mstream& sout) {\n  // if firth is used, fit based on penalized log-likelihood\n\n  bool success;\n  int col_incl;\n  int maxstep = null_fit ? params->maxstep_null : params->maxstep;\n  int niter = null_fit ? params->niter_max_firth_null : params->niter_max_firth;\n  double tol = null_fit ? (10*params->numtol) : params->numtol_firth;\n  double dev, lrt, dev0 = 0;\n\n  ArrayXd betaold, se, etavec, pivec, offset;\n  MatrixXd Xmat;\n\n  MapArXd Y (pheno_data->phenotypes_raw.col(ph).data(), pheno_data->phenotypes_raw.rows());\n  MapArXb mask (pheno_data->masked_indivs.col(ph).data(), pheno_data->masked_indivs.rows());\n\n  if(params->firth_approx){\n    if(null_fit){\n      Xmat = pheno_data->new_cov; // only covariates\n    } else {\n      Xmat = Gvec; // only tested SNP\n    }\n    col_incl = Xmat.cols();\n  } else {\n    Xmat = MatrixXd::Zero(params->n_samples, pheno_data->new_cov.cols() + 1); // covariates + tested SNP\n    Xmat << pheno_data->new_cov, Gvec;\n    col_incl = Xmat.cols();\n    if( null_fit ) col_incl--;\n  }\n\n  // covariate effects added as offset in firth approx.\n  if( params->firth_approx && !null_fit ) offset = fest->cov_blup_offset.col(ph).array(); \n  else offset = m_ests->blups.col(ph).array(); \n\n  // starting values\n  if(null_fit){\n\n    betaold = ArrayXd::Zero(Xmat.cols()); // last entry in exact Firth is kept at 0\n    if(params->firth_approx){\n      // start at logit^-1(mean(Y))-mean(offset)\n      betaold(0) = ( 0.5 + mask.select(Y,0).sum())  / (pheno_data->Neff(ph) + 1);\n      betaold(0) = log( betaold(0) / (1 - betaold(0) ));\n\n      // LOCO prediction is offset\n      betaold(0) -= mask.select(offset,0).mean();\n    } else betaold.head(col_incl) = params->cov_betas.col(ph); // start at estimates from null firth with no snp column\n      \n  } else {\n    // start at 0\n    if(params->firth_approx) betaold = ArrayXd::Zero(col_incl); \n    // start at estimate from null fit\n    else betaold = dt_thr->beta_null_firth.col(0);\n  }\n\n  success = fit_firth_pseudo(dev0, Y, Xmat, offset, mask, pivec, etavec, betaold, se, col_incl, dev, !null_fit, lrt, maxstep, niter/2, tol, params); // try pseudo\n\n  // If didn't converge\n  if(!success){\n    if(!null_fit) { // reset beta\n      if(params->firth_approx) betaold = ArrayXd::Zero(col_incl); \n      else betaold = dt_thr->beta_null_firth.col(0);\n    } else if (fabs(betaold(0))>1e12) {\n      if(params->firth_approx) betaold = 0;\n      else betaold.head(col_incl) = params->cov_betas.col(ph);\n  }\n    success = fit_firth(ph, Y, Xmat, offset, mask, pivec, etavec, betaold, se, col_incl, dev, !null_fit, lrt, maxstep, niter/2, tol, params); // try NR (slower)\n\n    if(!success){\n      if(params->verbose) cerr << \"WARNING: Logistic regression with Firth correction did not converge!\\n\";\n      block_info->test_fail(ph) = true;\n      return ;\n    }\n  }\n  // sout << \"\\nNiter = \" << niter_cur << \" : \" << mod_score.matrix().transpose() << endl;\n\n\n  if(null_fit) {\n    if(!params->firth_approx) dt_thr->beta_null_firth = betaold.matrix();\n  } else {\n    // compute beta_hat\n    dt_thr->bhat(ph) = betaold.tail(1)(0);\n    // compute SE based on Hessian for unpenalized LL\n    if(!params->back_correct_se)\n      dt_thr->se_b(ph) = se.tail(1)(0);\n\n    if( lrt < 0 ) {\n      block_info->test_fail(ph) = true;\n      return ;\n    }\n    dt_thr->dif_deviance = lrt;\n  }\n\n  return ;\n}\n\n// for approx firth testing step\nvoid fit_firth_logistic_snp_fast(int const& chrom, int const& ph, int const& isnp, bool const& null_fit, struct param const* params, struct phenodt* pheno_data, struct ests const* m_ests, struct f_ests const* fest, const Ref<const VectorXd>& Gvec, variant_block* block_info, data_thread* dt_thr, mstream& sout) {\n  // if firth is used, fit based on penalized log-likelihood\n\n  uint fit_state;\n  int maxstep = params->maxstep;\n  int niter = params->niter_max_firth, niter_pseudo = min(niter/2, 50), niter_nr = niter/2;\n  double tol = params->numtol_firth;\n  double lrt, dev0 = 0;\n\n  double bstart = 0, betaold, se;\n  ArrayXd offset;\n\n  MapArXd Y (pheno_data->phenotypes_raw.col(ph).data(), pheno_data->phenotypes_raw.rows());\n  MapArXb mask (pheno_data->masked_indivs.col(ph).data(), pheno_data->masked_indivs.rows());\n  \n  // For rare variants, set entries in Gvec for non-carriers to 0\n  int mac_thr_sparse = (params->skip_fast_firth ? 0 : 50), i = 0, index_j;\n  ArrayXi index_carriers;\n  if(dt_thr->is_sparse && (block_info->mac(ph) < mac_thr_sparse)) {\n    index_carriers.resize(dt_thr->Gsparse.nonZeros());\n    for (SpVec::InnerIterator it(dt_thr->Gsparse); it; ++it) {\n      index_j = it.index();\n      // check for small entries in G (eg with imputed data)\n      if(mask(index_j) && (it.value() > 1e-4)) index_carriers(i++) = index_j;\n      }\n    index_carriers.conservativeResize(i);\n    niter_pseudo = niter/2;\n  }\n\n  // warm starts using estimates ignoring covariates\n  if( params->htp_out && (block_info->genocounts(2,ph) == 0) && (block_info->genocounts(5,ph) == 0) )\n    bstart = log( (block_info->genocounts(1,ph) + 0.5) * (block_info->genocounts(3,ph) + 0.5) / (block_info->genocounts(0,ph) + 0.5) / (block_info->genocounts(4,ph) + 0.5) );\n  betaold = bstart;\n\n  // covariate effects added as offset in firth approx.\n  offset = fest->cov_blup_offset.col(ph).array(); \n\n  // get dev0\n  ArrayXd pivec, wvec, Gvec_mask;\n  get_pvec(pivec, offset, params->numtol_eps);\n  dev0 = get_logist_dev(Y, pivec, mask);\n  if((index_carriers.size() > 0)) { // bug fix to use the right deviance fn if using approximate penalty based on carrier status\n    get_pvec(pivec, offset(index_carriers), params->numtol_eps);\n    get_wvec(pivec, wvec, mask(index_carriers));\n    Gvec_mask = Gvec(index_carriers);\n  } else {\n    get_wvec(pivec, wvec, mask);\n    Gvec_mask = mask.select(Gvec.array(),0);\n  }\n  dev0 -= log( (Gvec_mask.square() * wvec).sum() );\n\n  // fit state =\n  //  0 - fit was successful\n  //  1 - too slow convergence\n  //  2 - diff_beta increased\n  //  3 - fitted p = 0\n  //  4 - lrt < 0\n  fit_state = fit_firth_pseudo(dev0, Y, Gvec, offset, mask, index_carriers, betaold, se, lrt, maxstep, niter_pseudo, tol, params); // try pseudo\n\n  // If didn't converge, try again with NR at 0\n  if(fit_state && (bstart != 0) && index_carriers.size()) {\n    if(params->debug) cerr << \"WARNING: Pseudo-firth did not converge (\" << fit_state << \"; LRT = \" << lrt << \"; dev0 = \" << dev0 << \") !\\n\";\n    betaold = 0;\n    fit_state = !fit_firth(dev0, Y, Gvec, offset, mask, index_carriers, betaold, se, lrt, maxstep, 100, tol, params); // try NR (slower)\n  }\n\n  // If didn't converge, try with NR\n  if(fit_state){\n    if(params->debug) cerr << \"WARNING: NR-firth did not converge (\" << fit_state << \"; LRT = \" << lrt << \") !\\n\";\n    betaold = bstart; \n    fit_state = !fit_firth(dev0, Y, Gvec, offset, mask, index_carriers, betaold, se, lrt, maxstep, niter_nr, tol, params); // try NR (slower)\n  }\n\n  if(fit_state){\n    if(params->verbose) cerr << \"WARNING: Logistic regression with Firth correction did not converge (\" << fit_state << \"; LRT = \" << lrt << \") !\\n\";\n    block_info->test_fail(ph) = true;\n    return ;\n  }\n  // sout << \"\\nNiter = \" << niter_cur << \" : \" << mod_score.matrix().transpose() << endl;\n\n  // compute beta_hat\n  dt_thr->bhat(ph) = betaold;\n  // compute SE based on Hessian for unpenalized LL\n  if(!params->back_correct_se)\n    dt_thr->se_b(ph) = se;\n\n  if( lrt < 0 ) {\n    block_info->test_fail(ph) = true;\n    return ;\n  }\n  dt_thr->dif_deviance = lrt;\n\n  return ;\n}\n\n// use NR or ADAM for Firth\nbool fit_firth(int const& ph, const Ref<const ArrayXd>& Y1, const Ref<const MatrixXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, ArrayXd& pivec, ArrayXd& etavec, ArrayXd& betavec, ArrayXd& sevec, int const& cols_incl, double& dev, bool const& comp_lrt, double& lrt, int const& maxstep_firth, int const& niter_firth, double const& tol, struct param const* params, bool const& check_score_inc) {\n\n  double dev0 = 0;\n\n  // get starting beta from ADAM ( compute and save null deviance )\n  if(!comp_lrt && params->use_adam) \n    fit_firth_adam(ph, dev0, Y1, X1, offset, mask, pivec, etavec, betavec, sevec, cols_incl, dev, comp_lrt, lrt, params);\n\n  return fit_firth_nr(dev0, Y1, X1, offset, mask, pivec, etavec, betavec, sevec, cols_incl, dev, comp_lrt, lrt, maxstep_firth, niter_firth, tol, params, check_score_inc);\n\n}\n\n// fit based on penalized log-likelihood using NR\nbool fit_firth_nr(double& dev0, const Ref<const ArrayXd>& Y1, const Ref<const MatrixXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, ArrayXd& pivec, ArrayXd& etavec, ArrayXd& betavec, ArrayXd& sevec, int const& cols_incl, double& dev, bool const& comp_lrt, double& lrt, int const& maxstep_firth, int const& niter_firth, double const& tol, struct param const* params, bool const& check_score_inc) {\n  // fit with first cols_incl columns of X1 (non-used entries of betavec should be 0)\n  // else assuming using all columns \n\n  int niter_cur = 0, niter_search, nc = X1.cols(), n_score_inc = 0;\n  double dev_old=0, dev_new=0, denum, mx, bdiff = 1, score_max_new, score_max_old = 1e16;\n\n  ArrayXd hvec, mod_score;\n  ArrayXd betanew, step_size, wvec;\n  MatrixXd XtW, XtWX;\n  ColPivHouseholderQR<MatrixXd> qr, qrX;\n\n  if(params->debug) cerr << \"\\nFirth starting beta = \" << betavec.matrix().transpose() << \"\\n\";\n\n  // solve S'(beta) = S(beta) + X'(h*(0.5-p)) = 0\n  betanew = betavec * 0;\n  while(niter_cur++ < niter_firth){\n\n    // update quantities\n    get_pvec(etavec, pivec, betavec, offset, X1, params->numtol_eps);\n    dev_old = get_logist_dev(Y1, pivec, mask);\n    get_wvec(pivec, wvec, mask);\n    XtW = X1.transpose() * wvec.sqrt().matrix().asDiagonal();\n    XtWX = XtW * XtW.transpose();\n    qr.compute(XtWX);\n    // compute deviance\n    dev_old -= qr.logAbsDeterminant();\n    if(comp_lrt && (niter_cur == 1)) // at first iter (i.e. betaSNP=0)\n      dev0 = dev_old;\n\n    // compute diag(H), H = U(U'U)^{-1}U', U = Gamma^(1/2)X\n    hvec = (qr.solve(XtW).array() * XtW.array() ).colwise().sum();\n    // compute modified score & step size\n    if(cols_incl < nc) { \n      qrX.compute(XtWX.block(0, 0, cols_incl, cols_incl));\n      mod_score = (X1.leftCols(cols_incl).transpose() * mask.select( Y1 - pivec + hvec * (0.5 - pivec), 0).matrix() ).array();\n      step_size = qrX.solve( mod_score.matrix() ).array();\n    } else {\n      mod_score = (X1.transpose() * mask.select( Y1 - pivec + hvec * (0.5 - pivec), 0).matrix() ).array();\n      step_size = qr.solve( mod_score.matrix() ).array();\n    }\n\n    // stopping criterion using modified score function\n    // edit 5.31.12 for edge cases with approx Firth\n    score_max_new = mod_score.abs().maxCoeff();\n    if( ( score_max_new < tol) && (niter_cur >= 2) ) break;\n\n    // try to catch convergence failures early\n    if(!comp_lrt){\n      if( score_max_new > score_max_old ) n_score_inc++; // track consecutive increases\n      else n_score_inc = 0;\n      if(check_score_inc && (n_score_inc > 25) ) return false;\n    }\n\n    // force absolute step size to be less than maxstep for each entry of beta\n    mx = step_size.abs().maxCoeff() / maxstep_firth;\n    if( mx > 1 ) step_size /= mx;\n\n    // start step-halving and stop when deviance decreases \n    denum = 2;\n    for( niter_search = 1; niter_search <= params->niter_max_line_search; niter_search++ ){\n\n      // adjusted step size\n      if(niter_search > 1) step_size /= denum;\n\n      ///////// compute corresponding deviance\n      if(cols_incl < nc) \n        betanew.head(cols_incl) = betavec.head(cols_incl) + step_size;\n      else \n        betanew = betavec + step_size;\n\n      get_pvec(etavec, pivec, betanew, offset, X1, params->numtol_eps);\n      dev_new = get_logist_dev(Y1, pivec, mask);\n      get_wvec(pivec, wvec, mask);\n      XtW = X1.transpose() * wvec.sqrt().matrix().asDiagonal();\n      XtWX = XtW * XtW.transpose();\n      qr.compute(XtWX);\n      dev_new -= qr.logAbsDeterminant();\n\n      if(params->debug){\n        if(niter_search == 1) bdiff = step_size.abs().maxCoeff();\n        cerr << \"[\"<<niter_cur << \":\" << niter_search <<\"] L1=\" << setprecision(16)<< dev_new << \"/L0=\"<< dev_old<< \"\\n\";\n      }\n      if( dev_new < dev_old ) break;\n    }\n\n    if( niter_search > params->niter_max_line_search ) {\n      if( comp_lrt ) step_size(0) += 1e-6;\n      else return false; // step-halving failed\n    }\n\n    if(params->debug) cerr << \"[\" << niter_cur <<setprecision(16)<< \"] beta.head=(\" << betanew.head(min(5,cols_incl)).matrix().transpose() << \"...); beta_diff.max=\" << bdiff << \"; score.max=\" << score_max_new << \"\\n\";\n\n\n    if(cols_incl < nc)  \n      betavec.head(cols_incl) += step_size;\n    else\n      betavec += step_size;\n    dev_old = dev_new;\n    score_max_old = score_max_new;\n\n  }\n  if(params->debug) cerr << \"Ni=\" << niter_cur<<setprecision(16) << \"; beta.head=(\" << betavec.head(min(15,cols_incl)).matrix().transpose() << \"); score.max=\" << mod_score.abs().maxCoeff() << \"\\n\";\n\n  // If didn't converge\n  if( niter_cur > niter_firth ) return false;\n\n  dev = dev_new;\n  if( comp_lrt ) {\n    lrt = dev0 - dev_new;\n    if(lrt < 0) return false;\n\n    sevec = qr.inverse().diagonal().array().sqrt();\n  }\n\n  return true;\n}\n\n// using pseudo-data representation with unpenalized logistic (strategy from brglm)\nbool fit_firth_pseudo(double& dev0, const Ref<const ArrayXd>& Y1, const Ref<const MatrixXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, ArrayXd& pivec, ArrayXd& etavec, ArrayXd& betavec, ArrayXd& sevec, int const& cols_incl, double& dev, bool const& comp_lrt, double& lrt, int const& maxstep_firth, int const& niter_firth, double const& tol, struct param const* params, bool const& apply_early_checks) {\n  // fit with first cols_incl columns of X1 (non-used entries of betavec should be 0)\n  // else assuming using all columns \n\n  int niter_cur = 0, niter_log = 0, niter_search, niter_max = 25, nc = X1.cols(), niter_score_max_unchanged = 0;\n  double dev_new=0, mx, maxstep = (comp_lrt && cols_incl == 1) ? 5 : maxstep_firth;\n  double bdiff=1e16, bdiff_new=1e16;\n  double score_max_old = 1e16, score_max_new;\n  //double dev_log0, dev_log1=0;\n\n  ArrayXd hvec, mod_score, ystar, score;\n  ArrayXd betanew, step_size, wvec, zvec;\n  MatrixXd XtW, XtWX;\n  ColPivHouseholderQR<MatrixXd> qr, qrX;\n\n  if(params->debug) cerr << \"\\nPseudo-firth starting beta = \" << betavec.matrix().transpose() << \"\\n\";\n\n  betanew = betavec * 0;\n  while(niter_cur++ < niter_firth){\n\n    // update quantities\n    get_pvec(etavec, pivec, betavec, offset, X1, params->numtol_eps);\n    dev_new = get_logist_dev(Y1, pivec, mask);\n    get_wvec(pivec, wvec, mask);\n    XtW = X1.transpose() * wvec.sqrt().matrix().asDiagonal();\n    XtWX = XtW * XtW.transpose();\n    qr.compute(XtWX);\n    // compute deviance\n    dev_new -= qr.logAbsDeterminant();\n    if(comp_lrt && (niter_cur == 1)) // at first iter (i.e. betaSNP=0)\n      dev0 = dev_new;\n\n    // compute diag(H), H = U(U'U)^{-1}U', U = Gamma^(1/2)X\n    hvec = (qr.solve(XtW).array() * XtW.array() ).colwise().sum();\n    // compute pseudo-response\n    ystar = Y1 + hvec * (0.5 - pivec); \n    // compute modified score & step size\n    if(cols_incl < nc) { \n      qrX.compute(XtWX.block(0, 0, cols_incl, cols_incl));\n      mod_score = (X1.leftCols(cols_incl).transpose() * mask.select(ystar - pivec, 0).matrix() ).array();\n    } else {\n      mod_score = (X1.transpose() * mask.select(ystar - pivec, 0).matrix() ).array();\n    }\n\n    // stopping criterion using modified score function\n    // edit 5.31.12 for edge cases with approx Firth\n    score_max_new = mod_score.abs().maxCoeff();\n    if( (score_max_new < tol) && (niter_cur >= 2) ) {\n      if(params->debug) cerr << \"stopping criterion met (\" << score_max_new << \" < \" << tol << \")\\n\";\n      break;\n    }\n    if(params->debug) cerr << \"[\" << niter_cur <<setprecision(16)<< \"] beta.head=(\" << betavec.head(min(5,cols_incl)).matrix().transpose() << \"...); score.max=\" << score_max_new << \"\\n\";\n    // to catch convergence failure sooner\n    if(apply_early_checks){\n      if( (niter_cur > 2) && (fabs(betavec(0)) > 1e13) ) return false;\n      if(niter_score_max_unchanged > 3) return false;\n      if( (niter_cur > 50) && ((score_max_new > 1000) || (betavec.abs().maxCoeff() > 1e12)) ) return false;\n    }\n    // fit unpenalized logistic on transformed Y\n    niter_log = 0;\n    bdiff = 1e16;\n    //dev_log0 = std::numeric_limits<double>::max();\n    while(niter_log++ < niter_max){\n      // p*(1-p) and check for zeroes\n      if( get_wvec(pivec, wvec, mask, params->numtol_eps) ){\n        if(params->debug) cerr << \"WARNING: pseudo-firth gave fitted p=0 in logistic reg step\\n\";\n        return false;\n      }\n      XtW = X1.leftCols(cols_incl).transpose() * mask.select(wvec,0).matrix().asDiagonal();\n      XtWX = XtW * X1.leftCols(cols_incl);\n      // working vector z = X*beta + (Y-p)/(p*(1-p))\n      zvec = mask.select(etavec - offset + (ystar - pivec) / wvec, 0);\n      // parameter estimate\n      betanew.head(cols_incl) = ( XtWX ).colPivHouseholderQr().solve( XtW * zvec.matrix() ).array();\n\n    // force absolute step size to be less than maxstep for each entry of beta\n      if(comp_lrt && (cols_incl == 1)){ // only do this when testing each SNP\n        step_size = betanew.head(cols_incl) - betavec.head(cols_incl);\n        bdiff_new = fabs(step_size(0));\n        if(bdiff_new > bdiff) { // step size should get smaller closer to soln\n          if(params->debug) cerr << \"WARNING: bdiff in pseudo-firth increased (\" << bdiff << \" -> \" << bdiff_new << \")\\n\";\n          return false; \n        }\n        mx = bdiff_new / maxstep;\n        if( mx > 1 ) {\n          betanew.head(cols_incl) = betavec.head(cols_incl) + step_size / mx;\n          if(params->debug) cerr << \"step = \" << step_size(0) << \" -- mx = \" << mx << \" -- beta = \" << betanew(0) << \"\\n\";\n        }\n      }\n\n      // skip step-halving\n      for( niter_search = 1; niter_search <= params->niter_max_line_search; niter_search++ ){\n        get_pvec(etavec, pivec, betanew, offset, X1, params->numtol_eps);\n        //dev_log1 = get_deviance_logistic((ystar + 0.5 * hvec)/(1+hvec), pivec, 1 + hvec, mask);\n        //if(params->debug) cerr << \"[[\" << niter_log << \" - \" << niter_search <<setprecision(16) << \"]] D0=\" << dev_log0 << \" -> D1=\" << dev_log1 << \"\\n\";\n        //if( dev_log1 < dev_log0 ) break;\n        break;\n        // adjust step size\n        //betanew = (betavec + betanew) / 2;\n      }\n      /*if( niter_search > params->niter_max_line_search ){\n        if(params->debug) cerr << \"step halving failed in pseudo-firth log. reg step\\n\";\n        return false; // step-halving failed\n      }*/\n\n      // stopping criterion\n      score = X1.leftCols(cols_incl).transpose() * mask.select(ystar - pivec, 0).matrix();\n      if( score.abs().maxCoeff() < tol ) break; // prefer for score to be below tol\n\n      if(params->debug) cerr << \"[[\" << niter_log <<setprecision(16) << \"]] beta.head=(\" << betanew.head(min(5,cols_incl)).matrix().transpose() << \"...); bdiff=\" << bdiff_new << \"; score.max=\" << score.abs().maxCoeff() << \"\\n\";\n\n      betavec = betanew;\n      if(comp_lrt && (cols_incl == 1)) bdiff = bdiff_new;\n      //dev_log0 = dev_log1;\n    }\n    if( niter_log > params->niter_max ) return false;\n\n    betavec = betanew;\n    if(score_max_new < score_max_old) {\n      score_max_old = score_max_new;\n      niter_score_max_unchanged = 0;\n    } else niter_score_max_unchanged++;\n  }\n\n  if(params->debug) cerr << \"Ni=\" << niter_cur<<setprecision(16) << \"; beta.head=(\" << betavec.head(min(15,cols_incl)).matrix().transpose() << \"); score.max=\" << mod_score.abs().maxCoeff() << \"\\n\";\n\n  // If didn't converge\n  if( niter_cur > niter_firth ) return false;\n\n  dev = dev_new;\n  if( comp_lrt ) {\n    lrt = dev0 - dev_new;\n    if(lrt < 0) return false;\n\n    sevec = qr.inverse().diagonal().array().sqrt();\n  }\n\n  return true;\n}\n\n// for approx firth testing step\nuint fit_firth_pseudo(double const& dev0, const Ref<const ArrayXd>& Y1, const Ref<const VectorXd>& Gvec, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, const Ref<const ArrayXi>& index_carriers, double& betavec, double& sevec, double& lrt, int const& maxstep_firth, int const& niter_firth, double const& tol, struct param const* params) {\n\n  bool fastFirth = index_carriers.size() > 0;\n  int niter_cur = 0, niter_log = 0, niter_max = 25;\n  double dev_new=0, dev_non_carriers = 0, mx, maxstep = 5;\n  double bdiff=1e16, bdiff_new=1e16;\n  //double dev_log0, dev_log1=0;\n\n  double score, betanew = 0, step_size, XtWX = 0, beta_itr_14 = 0;\n  ArrayXd hvec, ystar, etavec, pivec, wvec, XtWX_diag, Gvec_mask, Gvec_sq;\n\n  if(fastFirth) {\n    get_pvec(etavec, pivec, betavec, offset, Gvec, params->numtol_eps);\n    dev_new = get_logist_dev(Y1, pivec, mask);\n    dev_non_carriers = dev_new - get_logist_dev(Y1(index_carriers), pivec(index_carriers), mask(index_carriers));\n    Gvec_mask = Gvec(index_carriers);\n  } else Gvec_mask = mask.select(Gvec.array(),0);\n  Gvec_sq = Gvec_mask.square();\n\n  if(params->debug) cerr << \"\\nPseudo-firth (fast) starting beta = \" << betavec << \"\\n\";\n\n  while(niter_cur++ < niter_firth){\n\n    // update quantities\n    if(fastFirth) {\n      get_pvec(etavec, pivec, betavec, offset(index_carriers), Gvec(index_carriers), params->numtol_eps);\n      dev_new = dev_non_carriers + get_logist_dev(Y1(index_carriers), pivec, mask(index_carriers));\n      get_wvec(pivec, wvec, mask(index_carriers));\n    } else {\n      get_pvec(etavec, pivec, betavec, offset, Gvec, params->numtol_eps);\n      dev_new = get_logist_dev(Y1, pivec, mask);\n      get_wvec(pivec, wvec, mask);\n    }\n    XtWX_diag = Gvec_sq * wvec;\n    XtWX = XtWX_diag.sum();\n    // compute deviance\n    dev_new -= log(XtWX);\n\n    // compute diag(H), H = U(U'U)^{-1}U', U = Gamma^(1/2)X\n    hvec = XtWX_diag / XtWX;\n    // compute pseudo-response \n    ystar = (fastFirth ? Y1(index_carriers) : Y1) + hvec * (0.5 - pivec); \n    // compute modified score & step size\n    score = (Gvec_mask * (ystar - pivec)).sum();\n\n    // stopping criterion using modified score function\n    // edit 5.31.12 for edge cases with approx Firth\n    if( (fabs(score) < tol) && (niter_cur >= 2) ) {\n      if(params->debug) cerr << \"stopping criterion met (|\" << score << \"| < \" << tol << \")\\n\";\n      break;\n    }\n    if(params->debug) cerr << \"[\" << niter_cur <<setprecision(16)<< \"] beta.head=(\" << betavec << \"...); score=\" << score << \"\\n\";\n\n    // check for change in beta at iteration 15 (if too large, try with NR)\n    if(niter_cur == 14) beta_itr_14 = betavec;\n    if((niter_cur == 15) && (fabs(betavec - beta_itr_14) > .1)) return 1;\n\n    // fit unpenalized logistic on transformed Y\n    niter_log = 0;\n    bdiff = 1e16;\n    //dev_log0 = std::numeric_limits<double>::max();\n    while(niter_log++ < niter_max){\n\n      // force absolute step size to be less than maxstep for each entry of beta\n      step_size = score / XtWX;\n      bdiff_new = fabs(step_size);\n      if(bdiff_new > bdiff) { // step size should get smaller closer to soln\n        if(params->debug) cerr << \"WARNING: bdiff in pseudo-firth increased (\" << bdiff << \" -> \" << bdiff_new << \")\\n\";\n        return 2; \n      }\n      mx = bdiff_new / maxstep;\n\n      // parameter estimate\n      if( mx > 1 ) {\n        betanew = betavec + step_size / mx;\n        if(params->debug) cerr << \"step = \" << step_size << \" -- mx = \" << mx << \" -- beta = \" << betanew << \"\\n\";\n      } else betanew = betavec + step_size;\n\n      // compute score at new beta\n      if(fastFirth) get_pvec(etavec, pivec, betanew, offset(index_carriers), Gvec(index_carriers), params->numtol_eps); \n      else get_pvec(etavec, pivec, betanew, offset, Gvec, params->numtol_eps);\n      score = (Gvec_mask * (ystar - pivec)).sum();\n      if( fabs(score) < tol ) break; // prefer for score to be below tol\n\n      if(params->debug) cerr << \"[[\" << niter_log <<setprecision(16) << \"]] beta=(\" << betanew << \"...); bdiff=\" << bdiff_new << \"; score=\" << score << \"\\n\";\n\n      // p*(1-p) and check for zeroes\n      if( get_wvec(pivec, wvec, (fastFirth ? mask(index_carriers) : mask), params->numtol_eps) ) {\n        if(params->debug) cerr << \"WARNING: pseudo-firth gave fitted p=0 in logistic reg step\\n\";\n        return 3;\n      }\n      XtWX_diag = Gvec_sq * wvec;\n      XtWX = XtWX_diag.sum();\n\n      betavec = betanew;\n      bdiff = bdiff_new;\n      //dev_log0 = dev_log1;\n    }\n    if( niter_log > params->niter_max ) return 1;\n\n    betavec = betanew;\n  }\n\n  if(params->debug) cerr << \"Ni=\" << niter_cur<<setprecision(16) << \"; beta=(\" << betavec << \"); score=\" << score << \"\\n\";\n\n  // If didn't converge\n  if( niter_cur > niter_firth ) return 1;\n\n  lrt = dev0 - dev_new;\n  if(lrt < 0) return 4;\n\n  sevec = sqrt(1/XtWX);\n\n  return 0;\n}\n\n// for approx firth testing step (using NR)\nbool fit_firth(double const& dev0, const Ref<const ArrayXd>& Y1, const Ref<const VectorXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, const Ref<const ArrayXi>& index_carriers, double& betavec, double& sevec, double& lrt, int const& maxstep_firth, int const& niter_firth, double const& tol, struct param const* params) {\n\n  bool fastFirth = index_carriers.size() > 0;\n  int niter_cur = 0, niter_search;\n  double dev_old=0, dev_new=0, dev_non_carriers = 0, denum, mx;\n  double bdiff=1e16;\n\n  double score, betanew = 0, step_size, XtWX = 0;\n  ArrayXd hvec, etavec, pivec, wvec, XtWX_diag, Gvec_mask, Gvec_sq;\n \n  get_pvec(etavec, pivec, betavec, offset, X1, params->numtol_eps);\n  dev_old = get_logist_dev(Y1, pivec, mask);\n  if(fastFirth) {\n    get_pvec(etavec, pivec, betavec, offset(index_carriers), X1(index_carriers), params->numtol_eps);\n    dev_non_carriers = dev_old - get_logist_dev(Y1(index_carriers), pivec, mask(index_carriers));\n    get_wvec(pivec, wvec, mask(index_carriers));\n    Gvec_mask = X1(index_carriers);\n  } else {\n    get_wvec(pivec, wvec, mask);\n    Gvec_mask = mask.select(X1.array(),0);\n  }\n  Gvec_sq = Gvec_mask.square();\n\n  // solve S'(beta) = S(beta) + X'(h*(0.5-p)) = 0\n  // starting values\n  if(params->debug) cerr << \"\\nFirth starting beta = \" << betavec << \"\\n\";\n  // compute deviance\n  XtWX_diag = Gvec_sq * wvec;\n  XtWX = XtWX_diag.sum();\n  dev_old -= log(XtWX);\n\n  while(niter_cur++ < niter_firth){\n\n    // compute diag(H), H = U(U'U)^{-1}U', U = Gamma^(1/2)X\n    hvec = XtWX_diag / XtWX;\n    // compute modified score\n    score = (Gvec_mask * ((fastFirth ? Y1(index_carriers) : Y1) - pivec + hvec * (0.5 - pivec))).sum();\n    // stopping criterion using modified score function\n    // edit 5.31.12 for edge cases with approx Firth\n    if( (fabs(score) < tol) && (niter_cur >= 2) ) break;\n\n    // force absolute step size to be less than maxstep for each entry of beta\n    step_size = score / XtWX;\n    bdiff = fabs(step_size);\n    mx = bdiff / maxstep_firth;\n    if( mx > 1 ) step_size /= mx;\n\n    // start step-halving and stop when deviance decreases \n    denum = 2;\n    for( niter_search = 1; niter_search <= params->niter_max_line_search; niter_search++ ){\n\n      // adjusted step size\n      if(niter_search > 1) step_size /= denum;\n\n      betanew = betavec + step_size;\n\n      ///////// compute corresponding deviance\n      if(fastFirth) {\n        get_pvec(etavec, pivec, betanew, offset(index_carriers), X1(index_carriers), params->numtol_eps); \n        dev_new = dev_non_carriers + get_logist_dev(Y1(index_carriers), pivec, mask(index_carriers));\n      } else {\n        get_pvec(etavec, pivec, betanew, offset, X1, params->numtol_eps);\n        dev_new = get_logist_dev(Y1, pivec, mask);\n      }\n      get_wvec(pivec, wvec, (fastFirth ? mask(index_carriers) : mask));\n      XtWX_diag = Gvec_sq * wvec;\n      XtWX = XtWX_diag.sum();\n      dev_new -= log(XtWX);\n\n      if(params->debug) cerr << \"[\"<<niter_cur << \":\" << niter_search <<\"] L1=\" << setprecision(16)<< dev_new << \"/L0=\"<< dev_old<< \"\\n\";\n      if( dev_new < dev_old ) break;\n    }\n\n    if( niter_search > params->niter_max_line_search ) step_size += 1e-6;\n\n    if(params->debug) cerr << \"[\" << niter_cur <<setprecision(16)<< \"] beta=(\" << betanew << \"...); beta_diff.max=\" << bdiff << \"; score=\" << score << \"\\n\";\n\n    betavec += step_size;\n    dev_old = dev_new;\n\n  }\n  if(params->debug) cerr << \"Ni=\" << niter_cur<<setprecision(16) << \"; beta=(\" << betavec << \"); score=\" << score << \"\\n\";\n\n  // If didn't converge\n  if( niter_cur > niter_firth ) return false;\n\n  lrt = dev0 - dev_new;\n  if(lrt < 0) return false;\n\n  sevec = sqrt(1/XtWX);\n\n  return true;\n}\n\n// fit based on penalized log-likelihood using ADAM\nbool fit_firth_adam(int const& ph, double& dev0, const Ref<const ArrayXd>& Y1, const Ref<const MatrixXd>& X1, const Ref<const ArrayXd>& offset, const Ref<const ArrayXb>& mask, ArrayXd& pivec, ArrayXd& etavec, ArrayXd& betavec, ArrayXd& sevec, int const& cols_incl, double& dev, bool const& comp_lrt, double& lrt, struct param const* params) {\n  // fit with first cols_incl columns of X1 (non-used entries of betavec should be 0)\n  // else assuming using all columns \n\n  bool use_offset = Y1.size() == offset.size();\n  bool force_batch_adam = comp_lrt && params->adam_mini; // force batch adam for 1st iteration to get dev0\n  int niter_cur = 0, index;\n  double p_alpha = params->adam_alpha, p_beta1 = params->adam_beta1, p_beta2 = params->adam_beta2, p_eps = params->adam_eps, p_alpha_t;\n\n  std::uniform_int_distribution<> d(0, mask.count() - 1);\n  std::mt19937 gen;\n  ArrayXd hvec, gradient_f, wvec;\n  ArrayXd mt, vt, step_size, Ytmp, offset_tmp;\n  MatrixXd XtW, XtWX, Xtmp;\n  ColPivHouseholderQR<MatrixXd> qr;\n\n  // starting values for ADAM params\n  mt = vt = betavec.head(cols_incl) * 0;\n\n  // for mini-batch\n  if(params->adam_mini){\n    Xtmp.resize(params->adam_batch_size, X1.cols());\n    Ytmp.resize(params->adam_batch_size);\n    if(use_offset) offset_tmp.resize(params->adam_batch_size);\n  }\n\n  // minimize f=-2*pen.LL using ADAM\n  while(niter_cur++ < params->niter_max_firth_adam){\n\n    if(params->adam_mini && !force_batch_adam){ // ADAM using mini-batch\n\n      for (int i = 0; i < params->adam_batch_size; i++) {\n        index = params->adam_indices[ph](d(gen));\n        Xtmp.row(i) = X1.row(index);\n        Ytmp(i) = Y1(index);\n        if(use_offset) offset_tmp(i) = offset(index);\n      }\n      // update quantities\n      etavec = (Xtmp * betavec.matrix()).array();\n      if(use_offset) etavec += offset_tmp;\n      // fitted probabilities\n      pivec = 1 - 1 / (etavec.exp() + 1) ;\n      wvec = pivec * (1 - pivec);\n      XtW = Xtmp.transpose() * wvec.matrix().asDiagonal();\n      XtWX = XtW * XtW.transpose();\n      qr.compute(XtWX);\n      // compute diag(H), H = U(U'U)^{-1}U', U = Gamma^(1/2)X\n      hvec = (qr.solve(XtW).array() * XtW.array() ).colwise().sum();\n      // compute gradient of f\n      gradient_f = - (Xtmp.leftCols(cols_incl).transpose() * (Ytmp - pivec + hvec * (0.5 - pivec)).matrix() ).array();\n\n    } else {\n\n      // update quantities\n      etavec = (X1 * betavec.matrix()).array();\n      if(use_offset) etavec += offset;\n      // fitted probabilities\n      pivec = 1 - 1 / (etavec.exp() + 1) ;\n      wvec = mask.select( ( pivec * (1 - pivec) ).sqrt(), 0);\n      XtW = X1.transpose() * wvec.matrix().asDiagonal();\n      XtWX = XtW * XtW.transpose();\n      qr.compute(XtWX);\n      // at first iter (i.e. betaSNP=0) this is null deviance\n      if(comp_lrt && (niter_cur == 1))\n        dev0 = get_logist_dev(Y1, pivec, mask) - qr.logAbsDeterminant();\n      // compute diag(H), H = U(U'U)^{-1}U', U = Gamma^(1/2)X\n      hvec = (qr.solve(XtW).array() * XtW.array() ).colwise().sum();\n      // compute gradient of f\n      gradient_f = - (X1.leftCols(cols_incl).transpose() * mask.select( Y1 - pivec + hvec * (0.5 - pivec), 0).matrix() ).array();\n\n    }\n\n    if(params->debug && (niter_cur>1) && (niter_cur%100==0) ) cerr << \"\\nNiter = \" << niter_cur << \" (beta = \" << betavec.matrix().transpose() << \") : \" << gradient_f.matrix().transpose() << endl;\n\n    mt = p_beta1 * mt + (1 - p_beta1) * gradient_f;\n    vt = p_beta2 * vt + (1 - p_beta2) * gradient_f.square();\n    p_alpha_t = p_alpha * sqrt(1 - pow(p_beta2, niter_cur)) / (1 - pow(p_beta1, niter_cur));\n    step_size = p_alpha_t * mt / (vt.sqrt() + p_eps);\n\n    // stopping criterion\n    if( step_size.abs().maxCoeff() < params->numtol) break;\n\n    betavec.head(cols_incl) -= step_size;\n\n  }\n  if(params->debug) cerr << \"ADAM took \"<< niter_cur << \" iterations (score max = \" << gradient_f.abs().maxCoeff() << \")...\";\n\n  return (niter_cur <= params->niter_max_firth_adam);\n}\n\n\nstring get_firth_est_allChr(struct in_files& files, struct filter const& filters, struct ests& m_ests, struct f_ests& fest, struct phenodt& pheno_data, struct param& params, mstream& sout){\n\n  sout << \"   -computing and storing null Firth estimates for all chromosomes...\" << flush;\n  auto t1 = std::chrono::high_resolution_clock::now();\n\n  // go through each chromosome\n  for(int chr = 1; chr <= params.nChrom; chr++){\n\n    if(params.verbose) sout << \"chr\" << chr <<\"...\" << flush;\n\n    // read the prs\n    blup_read_chr(true, chr, m_ests, files, filters, pheno_data, params, sout);\n\n    if (params.trait_mode == 1) {\n      // run null logistic regression to get the starting values for firth\n      fit_null_logistic(true, chr, &params, &pheno_data, &m_ests, &files, sout); // for all phenotypes\n\n      // run null firth for each trait and write estimates to file\n      fit_null_firth(true, chr, &fest, &pheno_data, &m_ests, &files, &params, sout);\n    } else if (params.trait_mode == 3) {\n      // run null cox regression to get the starting values for firth\n      fit_null_cox(true, chr, &params, &pheno_data, &m_ests, &files, sout); // for all phenotypes\n\n      // run null firth for each trait and write estimates to file\n      fit_null_firth_cox(true, chr, &fest, &pheno_data, &m_ests, &files, &params, sout);\n    }\n  }\n\n  string fname = print_null_firth_info(files, fest, params);\n\n  sout << \"done\";\n  auto t2 = std::chrono::high_resolution_clock::now();\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);\n  sout << \" (\" << duration.count() << \"ms) \"<< endl;\n\n  sout << \"    +written to file [\" << fname << \"]\\n\";\n\n  return fname;\n\n}\n\nstring print_null_firth_info(struct in_files const& files, struct f_ests& fest, struct param const& params){\n\n    string path_firth, firth_filename, out_firth_list = files.out_file + \"_firth.list\";\n    \n    ofstream outf;\n    outf.open(out_firth_list);\n    if(outf.fail())\n      throw \"cannot write file : \" + out_firth_list;\n\n    for( int j = 0; j < params.n_pheno; ++j ) {\n      if( !params.pheno_pass(j) ) continue;\n      firth_filename = files.out_file + \"_\" + to_string(j + 1) + \".firth\" + (params.gzOut ? \".gz\" : \"\");\n      path_firth = get_fullpath(firth_filename);\n      outf << files.pheno_names[j]  << \" \" <<  path_firth << endl;\n      fest.firth_est_files[j]->closeFile();\n    }\n\n    outf.close();\n\n    return out_firth_list;\n}\n\nvoid check_beta_start_firth(struct in_files& files, struct param const& params, mstream& sout){\n\n\n  int tmp_index;\n  string line;\n  std::vector< string > tmp_str_vec;\n  ArrayXb read = ArrayXb::Constant(params.n_pheno, false);\n  Files fClass;\n\n  // get list of files containing blups\n  files.null_firth_files.assign(params.n_pheno, \"\");\n  fClass.openForRead(files.null_firth_file, sout);\n\n  while (fClass.readLine(line)){\n    tmp_str_vec = string_split(line,\"\\t \");\n\n    // each line contains a phenotype name and the corresponding blup file name\n    if( tmp_str_vec.size() != 2 )\n      throw \"incorrectly formatted blup list file : \" + files.null_firth_file;\n\n\n    // get index of phenotype in phenotype matrix\n    vector<string>::iterator it = std::find(files.pheno_names.begin(), files.pheno_names.end(), tmp_str_vec[0]);\n    if (it == files.pheno_names.end()) continue; // ignore unrecognized phenotypes\n\n    tmp_index = std::distance(files.pheno_names.begin(), it);\n    files.null_firth_files[tmp_index] = tmp_str_vec[1];\n\n    // check that phenotype only has one file\n    if(read(tmp_index))\n      throw \"phenotype \\'\" + tmp_str_vec[0] + \"\\' appears more than once in file.\";\n    else if(!file_exists(tmp_str_vec[1]))\n      throw \"file \" + tmp_str_vec[1] + \" cannot be opened.\";\n\n    read(tmp_index) = true;\n  }\n\n  // // force all phenotypes in phenotype file to be used\n  // if(read.count() != params.n_pheno) \n  //   throw \"number of valid step 1 files (\" + to_string( read.count() ) + \")  is not equal to the number of phenotypes.\" ;\n\n}\n\nvoid get_beta_start_firth(int const& chrom, struct f_ests* firth_est, struct in_files* files, struct param const* params, mstream& sout){\n\n  int npar, nmax = firth_est->beta_null_firth.rows();\n  double in_beta;\n  string line;\n  std::vector< string > tmp_str_vec ;\n  Files fClass;\n\n  // for each phenotype get b0\n  for( int i = 0; i < params->n_pheno; ++i ) {\n\n    bool chr_found = false;\n\n    // if file has not been given, use 0 as start\n    if(files->null_firth_files[i] == \"\") continue;\n    if( !params->pheno_pass(i) ) continue;\n\n    fClass.openForRead(files->null_firth_files[i], sout);\n\n    while(fClass.readLine(line)){\n      tmp_str_vec = string_split(line,\"\\t \");\n      if(tmp_str_vec.size() == 0)\n        throw \"error reading null firth estimates file\";\n      else if(chrStrToInt(tmp_str_vec[0], params->nChrom) == chrom) {\n        chr_found = true; break;\n      }\n    }\n\n    //cerr << std::boolalpha << chr_found << endl;\n    if(!chr_found) continue; // use 0 as start\n\n    npar = tmp_str_vec.size();\n    if((npar-1) > nmax) \n      throw \"file has more predictors than included in analysis (=\" + to_string(npar) + \" vs \" + to_string(nmax) + \")\";\n\n    for(int j = 1; j < npar; j++ ) {\n      in_beta = convertDouble( tmp_str_vec[j], params, sout);\n      if (in_beta == params->missing_value_double)\n        throw \"no missing values allowed in file\";\n      firth_est->beta_null_firth(j-1,i) = in_beta;\n    }\n\n    fClass.closeFile();\n  }\n\n}\n\nvoid get_beta_start_firth(struct f_ests* firth_est, struct ests const* m_ests){\n  // get b0 from null logistic regression\n  firth_est->beta_null_firth.topRows(m_ests->bhat_start.rows()) = m_ests->bhat_start;\n}\n\nvoid check_pval_snp(variant_block* block_info, data_thread* dt_thr, int const& chrom, int const& ph, int const& isnp, struct phenodt& pheno_data, struct geno_block& gblock, struct ests const& m_ests, struct f_ests& fest, struct param const& params, mstream& sout){\n\n  // if firth isn't used, or Tstat < threshold, no correction done\n  if(!block_info->is_corrected(ph) || (fabs(dt_thr->stats(ph)) <= params.z_thr)){\n    get_sumstats(false, ph, dt_thr);\n    dt_thr->cal_factor(ph) = 1;\n    block_info->is_corrected(ph) = false;\n    return;\n  }\n\n  if(params.firth){ // firth\n    \n    run_firth_correction_snp(chrom, ph, isnp, gblock, block_info, dt_thr, pheno_data, m_ests, fest, params, sout);\n    if(block_info->test_fail(ph)) {\n      get_sumstats(true, ph, dt_thr);\n      return;\n    }\n\n    dt_thr->chisq_val(ph) = dt_thr->dif_deviance;\n    get_logp(dt_thr->pval_log(ph), dt_thr->chisq_val(ph));\n\n    // compute SE from beta & pvalue\n    if( params.back_correct_se && (dt_thr->chisq_val(ph) > 0) )\n      dt_thr->se_b(ph) = fabs(dt_thr->bhat(ph)) / sqrt(dt_thr->chisq_val(ph));\n\n  } else if(params.use_SPA) { // spa\n\n    run_SPA_test(block_info->test_fail(ph), ph, dt_thr, pheno_data.masked_indivs.col(ph).array(), m_ests, params);\n    if(block_info->test_fail(ph)) {\n      get_sumstats(true, ph, dt_thr);\n      return;\n    }\n\n    dt_thr->se_b(ph) = 1 / sqrt(dt_thr->denum(ph));\n    dt_thr->bhat(ph) = sgn(dt_thr->stats(ph)) * sqrt(dt_thr->chisq_val(ph)) * dt_thr->se_b(ph);\n\n  }\n\n  //if(params.debug) cerr << \"uncorrected: \" << dt_thr->stats(ph) * dt_thr->stats(ph) <<  \"] -> \" << dt_thr->chisq_val(ph) << endl;\n  dt_thr->cal_factor(ph) =  dt_thr->chisq_val(ph) == 0 ? 0 : dt_thr->stats(ph) * dt_thr->stats(ph) / dt_thr->chisq_val(ph);\n\n}\n\nvoid get_sumstats(bool const& no_pv, int const& ph, data_thread* dt_thr) {\n\n  // beta & se\n  dt_thr->se_b(ph) = 1 / sqrt(dt_thr->denum(ph));\n  dt_thr->bhat(ph) = dt_thr->stats(ph) * dt_thr->se_b(ph);\n  if(no_pv) return;\n\n  // chisq & lpv\n  dt_thr->chisq_val(ph) = pow(dt_thr->stats(ph), 2);\n  get_logp(dt_thr->pval_log(ph), dt_thr->chisq_val(ph));\n}\n\nvoid run_firth_correction_snp(int const& chrom, int const& ph, int const& isnp, struct geno_block& gblock, variant_block* block_info, data_thread* dt_thr, struct phenodt& pheno_data, struct ests const& m_ests, struct f_ests& fest, struct param const& params, mstream& sout){\n\n  if(!params.firth_approx){ // exact firth\n    if (params.trait_mode == 1) {\n      // obtain null deviance (set SNP effect to 0 and compute max. pen. LL)\n      fit_firth_logistic_snp(chrom, ph, isnp, true, &params, &pheno_data, &m_ests, &fest, gblock.Gmat.col(isnp), block_info, dt_thr, sout);\n      if(block_info->test_fail(ph)) return ;\n      // fit full model and compute deviance\n      fit_firth_logistic_snp(chrom, ph, isnp, false, &params, &pheno_data, &m_ests, &fest, gblock.Gmat.col(isnp), block_info, dt_thr, sout);\n    } else if (params.trait_mode == 3) {\n      if (pheno_data.new_cov.cols() == 0) {\n        fit_firth_cox_snp_fast(chrom, ph, isnp, &params, &pheno_data, &m_ests, &fest, dt_thr->Gres, block_info, dt_thr, sout);\n      } else {\n        fit_firth_cox_snp(chrom, ph, isnp, &params, &pheno_data, &m_ests, &fest, gblock.Gmat.col(isnp), block_info, dt_thr, sout);\n      }\n    }\n  } else { // approx firth - only fit full model\n    if (params.trait_mode == 1) {\n      fit_firth_logistic_snp_fast(chrom, ph, isnp, false, &params, &pheno_data, &m_ests, &fest, dt_thr->Gres.cwiseQuotient(m_ests.Gamma_sqrt.col(ph)), block_info, dt_thr, sout);\n    } else if (params.trait_mode == 3) {\n      fit_firth_cox_snp_fast(chrom, ph, isnp, &params, &pheno_data, &m_ests, &fest, dt_thr->Gres, block_info, dt_thr, sout);\n    }\n  }\n}\n\nvoid run_SPA_test(bool& test_fail, int const& ph, data_thread* dt_thr, const Ref<const ArrayXb>& mask, struct ests const& m_ests, struct param const& params){\n  run_SPA_test_snp(dt_thr->chisq_val(ph), dt_thr->pval_log(ph), dt_thr->stats(ph), dt_thr->denum(ph), dt_thr->fastSPA, dt_thr->Gsparse, dt_thr->Gres.array(), m_ests.Y_hat_p.col(ph).array(), m_ests.Gamma_sqrt.col(ph).array(), mask, test_fail, params.tol_spa, params.niter_max_spa, params.missing_value_double, params.nl_dbl_dmin);\n}\n\nvoid run_SPA_test_snp(double& chisq, double& pv, const double& stats, const double& denum, bool const& fastSPA, SpVec const& Gsparse, const Ref<const ArrayXd>& Gres, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Gamma_sqrt, const Ref<const ArrayXb>& mask, bool& test_fail, const double& tol, const double& niter_max, const double& missing_value_double, const double& nl_dbl_dmin){\n\n  int index_j;\n  double score_num, tval, limK1_low, limK1_high, root_K1, pval1, pval2;\n  spa_data spa_df;\n  ArrayXd Gmu;\n\n  // compute needed quantities\n  spa_df.val_c = sqrt( denum );  // sqrt( G'WG )\n  score_num = stats * spa_df.val_c;\n  spa_df.Gmod = Gres / Gamma_sqrt * mask.cast<double>();\n  Gmu = spa_df.Gmod * phat;\n  spa_df.val_a = Gmu.sum();\n  spa_df.fastSPA = fastSPA;\n\n  if(spa_df.fastSPA){\n    spa_df.val_b = denum;\n    spa_df.val_d = 0;\n    for (SpVec::InnerIterator it(Gsparse); it; ++it) {\n      index_j = it.index();\n      if(!mask(index_j)) continue;\n      spa_df.val_b -= Gres(index_j) * Gres(index_j);\n      spa_df.val_d += Gmu(index_j);\n    }\n  }\n\n  // check if K'(t)= s can be solved\n  limK1_low = (spa_df.Gmod < 0).select(spa_df.Gmod, 0 ).sum() - spa_df.val_a ;\n  limK1_high = (spa_df.Gmod > 0).select(spa_df.Gmod, 0 ).sum() - spa_df.val_a ;\n  if( score_num < limK1_low || score_num > limK1_high ){\n    //if(params.verbose) sout << \"WARNING: SPA failed (solution to K'(t)=s is infinite)\";\n    test_fail = true;\n    return;\n  }\n\n  tval = stats >= 0 ? -stats : stats;\n\n  // 1.for T\n  spa_df.pos_score = true;\n  // solve K'(t)= tval using a mix of Newton-Raphson and bisection method\n  root_K1 = solve_K1_snp(tval, denum, Gsparse, phat, Gamma_sqrt, spa_df, mask, tol, niter_max, missing_value_double);\n  if( root_K1 == missing_value_double ){\n    test_fail = true;\n    return;\n  }\n  // compute pvalue (one tail)\n  get_SPA_pvalue_snp(root_K1, tval, pval1, test_fail, denum, Gsparse, phat, Gamma_sqrt, spa_df, mask);\n  if(test_fail) {return;}\n\n  // 2.for -T\n  spa_df.pos_score = false;\n  // solve K'(t)= tval using a mix of Newton-Raphson and bisection method\n  root_K1 = solve_K1_snp(tval, denum, Gsparse, phat, Gamma_sqrt, spa_df, mask, tol, niter_max, missing_value_double);\n  if( root_K1 == missing_value_double ){\n    test_fail = true;\n    return;\n  }\n  // compute pvalue (other tail)\n  get_SPA_pvalue_snp(root_K1, tval, pval2, test_fail, denum, Gsparse, phat, Gamma_sqrt, spa_df, mask);\n  if(test_fail) {return;}\n\n  // get quantile\n  //cerr << \"\\nstats: \" << stats << \":\" << pval1 << \" \" << pval2 << \"\\n\";\n  if( (pval1 + pval2) > 1 ){\n    test_fail = true;\n    return;\n  }\n  get_logp(pval1+pval2, pv, chisq, nl_dbl_dmin);\n\n}\n\n\n\n// SPA (MT in OpenMP)\ndouble solve_K1_snp(const double& tval, const double& denum, SpVec const& Gsparse, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Gamma_sqrt, struct spa_data& spa_df, const Ref<const ArrayXb>& mask, double const& tol, int const& niter_max, double const& missing_value_double){\n\n  int niter_cur;\n  int lambda = spa_df.pos_score ? 1 : -1; // if score is negative, adjust K' and K''\n  double min_x, max_x, t_old, f_old, t_new = -1, f_new, hess;\n\n  niter_cur = 0;\n  if(tval >=0){min_x = 0, max_x = std::numeric_limits<double>::max();}\n  else{min_x = std::numeric_limits<double>::lowest(), max_x = 0;}\n  t_old = 0;\n  f_old = spa_df.fastSPA ? compute_K1_fast_snp(lambda * t_old, spa_df.val_b, spa_df.val_c, spa_df.val_d, denum, Gsparse, spa_df.Gmod, phat, mask) : compute_K1_snp(lambda * t_old, spa_df.val_a, spa_df.val_c, spa_df.Gmod, phat, mask);\n  f_old *= lambda;\n  f_old -= tval; \n\n  while( niter_cur++ < niter_max ){\n\n    hess = spa_df.fastSPA ? compute_K2_fast_snp(lambda * t_old, spa_df.val_b, spa_df.val_c, spa_df.val_d, denum, Gsparse, spa_df.Gmod, phat, Gamma_sqrt, mask) : compute_K2_snp(lambda * t_old, spa_df.val_a, spa_df.val_c, spa_df.Gmod, phat, Gamma_sqrt, mask);\n    if(hess == 0) return missing_value_double;\n    t_new = t_old - f_old / hess;\n    f_new = spa_df.fastSPA ? compute_K1_fast_snp(lambda * t_new, spa_df.val_b, spa_df.val_c, spa_df.val_d, denum, Gsparse, spa_df.Gmod, phat, mask) : compute_K1_snp(lambda * t_new, spa_df.val_a, spa_df.val_c, spa_df.Gmod, phat, mask);\n    f_new *= lambda;\n    f_new -= tval;\n\n    if( fabs( f_new ) < tol ) break;\n\n    // update bounds on root\n    if( t_new && (t_new > min_x) && (t_new < max_x) ){\n      if( f_new > 0) max_x = t_new;\n      else min_x = t_new;\n    } else{ // bisection method if t_new went out of bounds and re-compute f_new\n      t_new = ( min_x + max_x ) / 2;\n      // if( fabs( min_x - t_new ) < params->tol_spa ) break;\n      f_new = spa_df.fastSPA ? compute_K1_fast_snp(lambda * t_new, spa_df.val_b, spa_df.val_c, spa_df.val_d, denum, Gsparse, spa_df.Gmod, phat, mask) : compute_K1_snp(lambda * t_new, spa_df.val_a, spa_df.val_c, spa_df.Gmod, phat, mask);\n      f_new *= lambda;\n      f_new -= tval;\n      // reduce bounds based on new value\n      if(f_new <= 0) min_x = t_new;\n      else max_x = t_new;\n    }\n\n    t_old = t_new;\n    f_old = f_new;\n  }\n\n  // If didn't converge\n  if( niter_cur > niter_max ){\n    //if(params->verbose) sout << \"WARNING: SPA did not converge to root for K'(t)=s.\\n\";\n    return missing_value_double;\n  }\n  //sout << \"#iterations = \" << niter_cur << \"; f= \" << f_new << endl;\n\n  return t_new;\n}\n\ndouble compute_K_snp(const double& t, const double& a, const double& c, const Ref<const ArrayXd>& Gmod, const Ref<const ArrayXd>& phat, const Ref<const ArrayXb>& mask){\n  double val = mask.select( ( 1 - phat + phat * ( t / c * Gmod ).exp() ).log(), 0).sum() - t * a / c;\n\n  return val;\n}\n\ndouble compute_K_fast_snp(const double& t, const double& b, const double& c, const double& d, const double& denum, SpVec const& Gsparse, const Ref<const ArrayXd>& Gmod, const Ref<const ArrayXd>& phat, const Ref<const ArrayXb>& mask){\n\n  uint32_t index_j;\n  double val = 0;\n\n  for (SpVec::InnerIterator it(Gsparse); it; ++it) {\n    index_j = it.index();\n    if(!mask(index_j)) continue;\n\n    val += log( 1 - phat(index_j) + phat(index_j) * exp( t / c * Gmod(index_j)) );\n  }\n  val += -t * d / c + t * t / 2 / denum * b;\n\n  return val;\n}\n\ndouble compute_K1_snp(const double& t, const double& a, const double& c, const Ref<const ArrayXd>& Gmod, const Ref<const ArrayXd>& phat, const Ref<const ArrayXb>& mask){\n\n  double val = mask.select( ( Gmod * phat / c ) / ( phat + (1 - phat) * ( -t / c * Gmod ).exp() ), 0).sum();\n  val -= a / c;\n\n  return val;\n}\n\ndouble compute_K1_fast_snp(const double& t, const double& b, const double& c, const double& d, const double& denum, SpVec const& Gsparse, const Ref<const ArrayXd>& Gmod, const Ref<const ArrayXd>& phat, const Ref<const ArrayXb>& mask){\n\n  uint32_t index_j;\n  double val = 0;\n\n  for (SpVec::InnerIterator it(Gsparse); it; ++it) {\n    index_j = it.index();\n    if(!mask(index_j)) continue;\n\n    val += ( Gmod(index_j) * phat(index_j) / c ) / ( phat(index_j) + (1 - phat(index_j)) * exp( -t / c * Gmod(index_j)) );\n  }\n  val += -d / c + t / denum * b;\n\n  return val;\n}\n\ndouble compute_K2_snp(const double& t, const double& a, const double& c, const Ref<const ArrayXd>& Gmod, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Gamma_sqrt, const Ref<const ArrayXb>& mask){\n\n  ArrayXd Vexp = -t / c * Gmod;\n  if((mask && (Vexp > MAX_EXP_LIM)).any()) { return 0; }\n  double val = mask.select( ( Gmod.square() * Gamma_sqrt.square() / (c*c) * Vexp.exp()) / ( phat + (1 - phat) * Vexp.exp() ).square(), 0).sum();\n\n  return val;\n}\n\ndouble compute_K2_fast_snp(const double& t, const double& b, const double& c, const double& d, const double& denum, SpVec const& Gsparse, const Ref<const ArrayXd>& Gmod, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Gamma_sqrt, const Ref<const ArrayXb>& mask){\n\n  uint32_t index_j;\n  double val = 0, denum_v, vexp;\n\n  for (SpVec::InnerIterator it(Gsparse); it; ++it) {\n    index_j = it.index();\n    if(!mask(index_j)) continue;\n    vexp = -t / c * Gmod(index_j);\n    if(vexp > MAX_EXP_LIM) { return 0; }\n    denum_v = phat(index_j) + (1 - phat(index_j)) * exp( vexp );\n    val += ( Gmod(index_j) * Gmod(index_j) * Gamma_sqrt(index_j) * Gamma_sqrt(index_j) * exp( vexp ) / (c*c) ) / (denum_v * denum_v);\n    //cerr << \"phat:\" << phat(index_j) << \"; t:\"<< t << \"; c:\" << c << \";G:\"<< Gmod(index_j)<< \" ;denum:\" << denum_v <<\"\\n\";\n  }\n  val += b / denum;\n\n  return val;\n}\n\nvoid get_SPA_pvalue_snp(const double& root, const double& tval, double& pv, bool& test_fail, const double& denum, SpVec const& Gsparse, const Ref<const ArrayXd>& phat, const Ref<const ArrayXd>& Gamma_sqrt, struct spa_data& spa_df, const Ref<const ArrayXb>& mask){\n\n  int lambda = spa_df.pos_score ? 1 : -1; // if score is negative, adjust K and K''\n  double kval, k2val, wval, vval, rval;\n  normal nd(0,1);\n\n  kval = spa_df.fastSPA ? compute_K_fast_snp(lambda * root, spa_df.val_b, spa_df.val_c, spa_df.val_d, denum, Gsparse, spa_df.Gmod, phat, mask) : compute_K_snp(lambda * root, spa_df.val_a, spa_df.val_c, spa_df.Gmod, phat, mask);\n  k2val = spa_df.fastSPA ? compute_K2_fast_snp(lambda * root, spa_df.val_b, spa_df.val_c, spa_df.val_d, denum, Gsparse, spa_df.Gmod, phat, Gamma_sqrt, mask) : compute_K2_snp(lambda * root, spa_df.val_a, spa_df.val_c, spa_df.Gmod, phat, Gamma_sqrt, mask);\n  if(k2val == 0) {\n    test_fail = true;\n    return;\n  }\n\n  wval = sgn(root) * sqrt( 2 * ( root * tval - kval ) );\n  vval = root * sqrt( k2val );\n  //cerr << \" root:\" << root << \" kval:\" << kval << \" k2val:\" << k2val << \" wval:\" << wval << \" vval:\" << vval << \" \";\n  if(vval == 0) { // root is 0 so s=0 (K'(0)=0)\n    pv = 0.5;\n  } else {\n    rval = wval + log( vval / wval ) / wval;\n    pv = cdf(nd, rval); // one-sided\n  }\n  test_fail = false;\n}\n\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n////    Functions for sum stats output\n/////////////////////////////////////////////////\n/////////////////////////////////////////////////\n\n//// header line\nstd::string print_header_output(struct param const* params){\n\n  if(params->split_by_pheno)\n    return print_header_output_single(params);\n  else\n    if(params->trait_set) \n      return print_header_output_all_multitrait(params);\n    else if(params->multiphen)\n      return print_header_output_all_multiphen(params);\n    else\n      return print_header_output_all(params);\n}\n\nstd::string print_header_output_all_multiphen(struct param const* params){\n\n  std::ostringstream buffer;\n\n  buffer << \"CHROM GENPOS ID ALLELE0 ALLELE1 MAC A1FREQ N LOG10P MULTINOM IT UP FIRTH\";\n  buffer << endl;\n\n  return buffer.str();\n}\n\nstd::string print_header_output_all_multitrait(struct param const* params){\n\n  std::ostringstream buffer;\n\n  buffer << \"CHROM GENPOS ID ALLELE0 ALLELE1 MAC A1FREQ N\";\n  // p-values for single-trait tests\n  /* for(int i = 0; i < params->n_pheno; i++) { */\n  /*   buffer << \"LOG10P.Y0\" << i+1 << \" \"; */\n  /* } */\n  buffer << \" LOG10P.MINP0 LOG10Q.MINP0\";\n  // p-values for multi-trait tests\n  buffer << \" LOG10P.MANOVA LOG10P.OMNIBUS0 LOG10BF.BAYES LOG10P.NNLS0 LOG10P.SUMZ0 LOG10P.NPMANOVA LOG10P.HOMNIBUS0 LOG10P.CPC0\"\n      << \" LOG10P.RCPC0SUMCHI2 LOG10P.RCPC0FISHER LOG10P.RCPC0ACAT\" \n      << \" LOG10P.ACPC0SUMCHI2 LOG10P.ACPC0FISHER LOG10P.ACPC0ACAT\" \n      << \" LOG10Q.NNLS0\";\n  // z-scores for single-trait models\n  for(int i = 0; i < params->n_pheno; i++) {\n    buffer << \" \" << \"Z.Y0\" << i+1;\n  }\n  // z-scores for PCs\n  for(int i = 0; i < params->n_pheno; i++) {\n    buffer << \" \" << \"Z.PC0\" << i+1;\n  }\n  // z-scores for Robust PCs\n  for(int i = 0; i < params->n_pheno; i++) {\n    buffer << \" \" << \"Z.RPC0\" << i+1;\n  }\n  // z-scores for Adjusted PCs\n  for(int i = 0; i < params->n_pheno; i++) {\n    buffer << \" \" << \"Z.APC0\" << i+1;\n  }\n  buffer << endl;\n\n  return buffer.str();\n}\n\nstd::string print_header_output_all(struct param const* params){\n\n  int i;\n  std::ostringstream buffer;\n\n  buffer << \"CHROM GENPOS ID ALLELE0 ALLELE1 A1FREQ \" << \n    ( params->af_cc ? \"A1FREQ_CASES A1FREQ_CONTROLS \":\"\") <<\n    ( !params->build_mask && params->dosage_mode ? \"INFO \":\"\") \n    << \"N \" <<\n    ( params->af_cc ? \"N_CASES N_CONTROLS \":\"\") <<\n    \"N_RR N_RA N_AA \" << // across all analyzed samples (with dosages then hardcounts)\n    \"TEST\";\n\n  for(i = 0; i < params->n_pheno; i++) \n    buffer << \" BETA.Y\" << i+1 << \" SE.Y\" << i+1 << \" CHISQ.Y\" << i+1 << \" LOG10P.Y\" << i+1;\n  // end of line\n  buffer << \" EXTRA\\n\";\n\n  return buffer.str();\n}\n\nstd::string print_header_output_single(struct param const* params){\n\n  std::ostringstream buffer;\n\n  buffer << \"CHROM GENPOS ID ALLELE0 ALLELE1 A1FREQ \" << \n    ( params->af_cc ? \"A1FREQ_CASES A1FREQ_CONTROLS \":\"\") <<\n    ( !params->build_mask && params->dosage_mode ? \"INFO \":\"\") << \n    \"N \" <<\n    ( params->af_cc ? \"N_CASES N_CONTROLS \":\"\") <<\n    \"TEST BETA SE CHISQ LOG10P EXTRA\\n\";\n\n  return buffer.str();\n}\n\nstd::string print_header_output_htp(){\n\n  std::ostringstream buffer;\n\n  buffer << \"Name\" << \"\\t\" << \"Chr\" << \"\\t\" << \"Pos\" << \"\\t\" << \"Ref\" << \"\\t\" << \"Alt\" << \"\\t\" << \"Trait\" << \"\\t\" << \"Cohort\" << \"\\t\" << \"Model\" << \"\\t\" << \"Effect\" << \"\\t\" << \"LCI_Effect\" << \"\\t\" << \"UCI_Effect\" << \"\\t\" << \"Pval\" << \"\\t\" << \"AAF\" << \"\\t\" << \"Num_Cases\"<< \"\\t\" << \"Cases_Ref\" << \"\\t\" << \"Cases_Het\" << \"\\t\" << \"Cases_Alt\" << \"\\t\" << \"Num_Controls\" << \"\\t\" << \"Controls_Ref\" << \"\\t\" << \"Controls_Het\"<< \"\\t\"<< \"Controls_Alt\" << \"\\t\" << \"Info\\n\";\n\n  return buffer.str();\n}\n\n//// header info for each snp\nstd::string print_sum_stats_head(const int& snp_count, vector<snp> const& snpinfo){\n\n  std::ostringstream buffer;\n\n  buffer << snpinfo[snp_count].chrom << \" \" << snpinfo[snp_count].physpos << \" \"<< snpinfo[snp_count].ID << \" \"<< snpinfo[snp_count].allele1 << \" \"<< snpinfo[snp_count].allele2 << \" \" ;\n\n  return buffer.str();\n}\n\nstd::string print_sum_stats_head_htp(const int& snp_count, const string& pheno_name, const string& model, vector<snp> const& snpinfo, struct param const* params){\n\n  std::ostringstream buffer;\n\n  buffer << snpinfo[snp_count].ID << \"\\t\"<< snpinfo[snp_count].chrom << \"\\t\" << snpinfo[snp_count].physpos << \"\\t\"<< snpinfo[snp_count].allele1 << \"\\t\"<< snpinfo[snp_count].allele2 << \"\\t\" << pheno_name << \"\\t\" << params->cohort_name << \"\\t\" << model << \"\\t\";\n\n  return buffer.str();\n}\n\n\n// print sum stats row per snp\nstd::string  print_sum_stats_line(int const& snp_index, int const& i, string const& tmpstr, string const& test_string, string const& model_type, variant_block* block_info, data_thread* dt_thr, vector<snp> const& snpinfo, struct in_files& files, struct param const& params){\n\n  std::ostringstream buffer;\n\n  if(params.htp_out) {\n    buffer <<  print_sum_stats_head_htp(snp_index, ( params.htp_use_eventname ? files.t2e_map[files.pheno_names[i]] : files.pheno_names[i] ), model_type, snpinfo, &params) << print_sum_stats_htp(dt_thr->bhat(i), dt_thr->se_b(i), dt_thr->chisq_val(i), dt_thr->pval_log(i), block_info->af(i), block_info->info(i), block_info->mac(i), block_info->genocounts, i, !block_info->test_fail(i), 1, &params, dt_thr->scores(i), dt_thr->cal_factor(i), -1, dt_thr->skat_var(i));\n  } else {\n    buffer << (!params.split_by_pheno && (i>0) ? \"\" : tmpstr) << print_sum_stats((params.split_by_pheno ? block_info->af(i) : block_info->af1), block_info->af_case(i), block_info->af_control(i), block_info->n_rr, block_info->n_aa, (params.split_by_pheno ? block_info->info(i) : block_info->info1), (params.split_by_pheno ? block_info->ns(i) : block_info->ns1), block_info->ns_case(i), block_info->ns_control(i), test_string, dt_thr->bhat(i), dt_thr->se_b(i), dt_thr->chisq_val(i), dt_thr->pval_log(i), !block_info->test_fail(i), 1, &params, (i+1));\n  }\n\n  return buffer.str();\n}\n\n\n//// test info for each snp\nstd::string print_sum_stats(const double& af, const double& af_case, const double& af_control, const int& n_rr, const int& n_aa, const double& info, const int& n, const int& ns_case, const int& ns_control, const string& model, const double& beta, const double& se, const double& chisq, const double& pv, const bool& test_pass, const int& df, struct param const* params, int const& ipheno){\n\n  if(params->split_by_pheno)\n    return print_sum_stats_single(af, af_case, af_control, info, n, ns_case, ns_control, model, beta, se, chisq, pv, test_pass, df, params);\n  else\n    return print_sum_stats_all(af, af_case, af_control, n_rr, n_aa, info, n, ns_case, ns_control, model, beta, se, chisq, pv, test_pass, df, params, ipheno);\n}\n\n// native format - all phenos\nstd::string print_sum_stats_all(const double& af, const double& af_case, const double& af_control, const int& n_rr, const int& n_aa, const double& info, const int& n, const int& ns_case, const int& ns_control, const string& model, const double& beta, const double& se, const double& chisq, const double& pv, const bool& test_pass, const int& df, struct param const* params, int const& ipheno){\n\n  std::ostringstream buffer;\n  bool print_afs = (af >= 0), print_info = (info >= 0), print_se = (se >= 0) && !is_nan(se), print_genoc = (n_rr >= 0);\n  bool print_pv = (chisq>=0) && test_pass && !is_nan(pv);\n\n  // AF N INFO TEST\n  if(ipheno == 1) {\n    if(print_afs) buffer << af ;\n    else buffer << \"NA\" ;\n    if( params->af_cc ){\n      if(print_afs) buffer << \" \" << af_case << \" \" << af_control;\n      else buffer << \" NA NA\";\n    }\n    if(!params->build_mask && params->dosage_mode) {\n      if(print_info) buffer << \" \" << info;\n      else buffer << \" NA\";\n    }\n    buffer << \" \" << n ;\n    if( params->af_cc )  buffer << \" NA NA\";\n    if(print_genoc) buffer << \" \" << n_rr << \" \" << n - n_rr - n_aa << \" \" << n_aa ;\n    else buffer << \" NA NA NA\";\n    buffer << \" \" << model ;\n  }\n\n  // BETA SE\n  if(print_se) buffer << ' ' << beta << ' ' << se;\n  else buffer << \" NA NA\";\n\n  // CHISQ PV\n  if(print_pv) buffer << ' ' << chisq << ' ' << pv;\n  else buffer << \" NA NA\";\n\n  // extra column\n  if(ipheno == params->n_pheno) {\n    if(params->joint_test && (df<0)) buffer << \" DF=NA\\n\";\n    else if(params->joint_test) buffer << \" DF=\" << df << endl;\n    else buffer << \" NA\\n\";\n  }\n\n  return buffer.str();\n}\n\nstd::string print_na_sumstats(int const& ph, int const& df, string const& header, string const& model, variant_block const* block_info, struct param const& params){\n  return ( ( ph==0 ? header : \"\" ) + print_sum_stats_all(block_info->af1, block_info->af_case(ph), block_info->af_control(ph), block_info->n_rr,  block_info->n_aa, block_info->info1, block_info->ns1, block_info->ns_case(ph), block_info->ns_control(ph), model, -1, -1, -1, -1, false, df, &params, ph + 1) ); // pheno index is 1-based\n}\n\n// native format - single pheno\nstd::string print_sum_stats_single(const double& af, const double& af_case, const double& af_control, const double& info, const int& n, const int& ns_case, const int& ns_control, const string& model, const double& beta, const double& se, const double& chisq, const double& pv, const bool& test_pass, const int& df, struct param const* params){\n\n  std::ostringstream buffer;\n  bool print_afs = (af >= 0), print_info = (info >= 0), print_se = (se >= 0) && !is_nan(se);\n  bool print_pv = (chisq>=0) && test_pass && !is_nan(pv);\n\n  // AF N INFO TEST\n  if(print_afs) buffer << af << \" \" ;\n  else buffer << \"NA \" ;\n  if( params->af_cc ){\n    if(print_afs) buffer << af_case << \" \" << af_control << \" \";\n    else buffer << \"NA NA \";\n  }\n  if(!params->build_mask && params->dosage_mode) {\n    if(print_info) buffer << info << \" \";\n    else buffer << \"NA \";\n  }\n  buffer << n ;\n  if( params->af_cc )  buffer << \" \" << ns_case << \" \" << ns_control;\n  buffer << \" \" << model << \" \";\n\n  // BETA SE\n  if(print_se) buffer << beta << ' ' << se;\n  else buffer << \"NA NA\";\n\n  // CHISQ PV\n  if(print_pv) buffer << ' ' << chisq << ' ' << pv;\n  else buffer << \" NA NA\";\n\n  // extra column\n  vector<string> extraCol;\n  if(!test_pass) extraCol.push_back(\"TEST_FAIL\");\n  if(params->joint_test && (df<0)) extraCol.push_back(\"DF=NA\");\n  else if(params->joint_test) extraCol.push_back(\"DF=\" + to_string(df));\n  buffer << \" \" << (extraCol.size() > 0 ? print_scsv(extraCol) : \"NA\") << endl;\n\n  return buffer.str();\n}\n\n\nstd::string print_sum_stats_htp(const double& beta, const double& se, const double& chisq, const double& lpv, const double& af, const double& info, const double& mac, const Ref<const MatrixXi>& genocounts, const int& ph, const bool& test_pass, const int& df, struct param const* params, const double& score, const double& cal_factor, const double& cal_factor_burden, const double& skat_var) {\n\n  std::ostringstream buffer;\n  string outp_val = \"-1\";\n  bool print_beta = test_pass && (se>=0) && !is_nan(se);\n  bool print_pv = test_pass && (chisq>=0) && !is_nan(lpv);\n  double effect_val, outse_val;\n\n  if(print_pv) {\n    if(!params->uncapped_pvals && (lpv > params->log10_nl_dbl_dmin)) outp_val = convert_logp_raw( params->log10_nl_dbl_dmin );\n    else if(lpv > 0) outp_val = convert_logp_raw( lpv );\n    else outp_val = \"0.9999999\";\n  } \n\n  // Effect / CI bounds / Pvalue columns\n  if(print_pv && !print_beta)\n    buffer << \"NA\\tNA\\tNA\\t\" << outp_val << \"\\t\";\n  else if(!print_pv && !print_beta)\n    buffer << \"NA\\tNA\\tNA\\tNA\\t\";\n  else if((params->trait_mode!=1) || ((params->trait_mode==1) && params->firth && test_pass) ){ // non-bt or firth\n\n    if(params->trait_mode==0) // QT\n      buffer << beta << \"\\t\" << (beta - params->zcrit * se) << \"\\t\" << (beta + params->zcrit * se) << \"\\t\";\n    else // BT (on OR scale) or CT\n      buffer << exp(beta) << \"\\t\" << exp(beta - params->zcrit * se) << \"\\t\" << exp(beta + params->zcrit * se) << \"\\t\"; \n\n    if(print_pv) buffer << outp_val << \"\\t\";\n    else buffer << \"NA\\t\";\n\n  } else { // spa/logistic\n\n    if(print_pv) { // for spa or uncorrected logistic score test\n      // compute allelic OR\n      effect_val = (2*genocounts(3,ph)+genocounts(4,ph)+.5)*(2*genocounts(2,ph)+genocounts(1,ph)+.5)/(2*genocounts(5,ph)+genocounts(4,ph)+.5)/(2*genocounts(0,ph)+genocounts(1,ph)+.5);\n      // compute SE = log(allelic OR) / zstat\n      outse_val = fabs(log(effect_val)) / sqrt(chisq);\n      buffer << effect_val << \"\\t\" << effect_val * exp(- params->zcrit * outse_val) << \"\\t\" << effect_val * exp(params->zcrit * outse_val) << \"\\t\" << outp_val << \"\\t\";\n    } else if(!print_beta) \n      buffer << \"NA\" << \"\\t\" << \"NA\" << \"\\t\" << \"NA\" << \"\\t\" << \"NA\" << \"\\t\";\n    else // used in interaction tests\n      buffer << exp(beta) << \"\\t\" << exp(beta - params->zcrit * se) << \"\\t\" << exp(beta + params->zcrit * se) << \"\\tNA\\t\"; \n\n  }\n\n  // print out AF\n  if(af>=0)\n    buffer << af << \"\\t\";\n  else\n    buffer << \"NA\\t\";\n\n  if(mac>0) {\n\n    // print counts in cases\n    buffer << genocounts.block(0,ph,3,1).sum() << \"\\t\" << genocounts(0,ph) << \"\\t\" << genocounts(1,ph) << \"\\t\" << genocounts(2,ph) << \"\\t\";\n    // print counts in controls\n    if(params->trait_mode==1 || params->trait_mode==3)\n      buffer << genocounts.block(3,ph,3,1).sum() << \"\\t\" << genocounts(3,ph) << \"\\t\" << genocounts(4,ph) << \"\\t\" << genocounts(5,ph);\n    else buffer << \"NA\\tNA\\tNA\\tNA\";\n\n  } else { // for skat/acat-type tests\n    buffer << params->pheno_counts(ph, 0) << \"\\tNA\\tNA\\tNA\\t\";\n    if(params->trait_mode==1)\n      buffer << params->pheno_counts(ph, 1) << \"\\tNA\\tNA\\tNA\"; \n    else buffer << \"NA\\tNA\\tNA\\tNA\";\n  }\n\n  // info column\n  vector<string> infoCol;\n  if(print_beta){\n    if(params->trait_mode && test_pass){\n      infoCol.push_back( \"REGENIE_BETA=\" + convert_double_to_str(beta) );\n      infoCol.push_back( \"REGENIE_SE=\" + convert_double_to_str(se) );\n      // SPA/uncorrected logistic => also print SE from allelic OR\n      if((params->trait_mode==1) && print_pv && !params->firth) infoCol.push_back( \"SE=\" + convert_double_to_str(outse_val) );\n    } else if(params->trait_mode){\n      infoCol.push_back( \"REGENIE_BETA=NA\" );\n      infoCol.push_back( \"REGENIE_SE=NA\");\n      // SPA/uncorrected logistic => also print SE from allelic OR\n      if((params->trait_mode==1) && print_pv && !params->firth) infoCol.push_back( \"SE=\" + convert_double_to_str(outse_val) );\n    } else infoCol.push_back( \"REGENIE_SE=\" + to_string(se) );// fot QTs\n  }\n  // info score\n  if(!params->build_mask && params->dosage_mode && (info >= 0) ) infoCol.push_back( \"INFO=\" + convert_double_to_str(info) );\n  // mac\n  if(mac>=0) infoCol.push_back( \"MAC=\" + to_string(mac) );\n  // score test statistic \n  if(score != params->missing_value_double) infoCol.push_back( \"SCORE=\" + convert_double_to_str(score) );\n  if(skat_var != params->missing_value_double) infoCol.push_back(\"SKATV=\" + convert_double_to_str(skat_var*abs(cal_factor)));\n  //if(cal_factor != -1) infoCol.push_back( \"CF=\" + to_string(cal_factor) );\n  if(cal_factor_burden != -1) infoCol.push_back( \"CF_BURDEN=\" + to_string(cal_factor_burden) );\n  // df\n  if(params->joint_test) infoCol.push_back(\"DF=\" + to_string(df));\n  // log10P\n  infoCol.push_back( \"LOG10P=\" + (print_pv ? convert_double_to_str(lpv) : \"NA\") );\n  // indicator for no beta printed (joint or vc tests)\n  if(se<0) infoCol.push_back( \"NO_BETA\" );\n  // print info column\n  buffer << \"\\t\" << (infoCol.size() > 0 ? print_scsv(infoCol) : \"NA\") << \"\\n\";\n\n  return buffer.str();\n}\n\n\n\n//// print summary of step 2 run\nstd::string print_summary(Files* ofile, string const& out, std::vector<std::shared_ptr<Files>>& ofile_split, std::vector< string >const& out_split, int const& n_corrected, struct tally const& snp_tally, struct in_files const& files, struct f_ests& fest, struct param const& params){\n\n  std::ostringstream buffer;\n\n  if(!params.skip_test) {\n\n    if(!params.split_by_pheno){\n      buffer << \"\\nAssociation results stored in file : \" << out << endl;\n      buffer << \" + dictionary with trait names in file : \" << files.out_file << \".regenie.Ydict\\n\";\n      ofile->closeFile();\n    } else {\n      buffer << \"\\nAssociation results stored separately for each trait \" << ( params.htp_out ? \"(HTPv4 format) \" : \"\" ) << \"in files : \\n\";\n      for( int j = 0; j < params.n_pheno; ++j ) {\n        if( !params.pheno_pass(j) ) continue;\n        buffer << \"* [\" << out_split[j] << \"]\\n\";\n        ofile_split[j]->closeFile();\n      }\n    }\n    buffer << endl;\n\n    int n_corrected_cox = 0;\n    if (params.trait_mode == 3) n_corrected_cox = n_corrected / 2;\n    if(params.firth || params.use_SPA) {\n      buffer << \"Number of tests with \" << (params.firth ? \"Firth \" : \"SPA \");\n      if (params.trait_mode == 3) {\n        buffer << \"correction : \" << n_corrected_cox <<  endl;\n        buffer << \"Number of failed tests : (\" << snp_tally.n_failed_tests << \"/\" << n_corrected_cox << \")\\n\";\n      } else {\n        buffer << \"correction : \" << n_corrected <<  endl;\n        buffer << \"Number of failed tests : (\" << snp_tally.n_failed_tests << \"/\" << n_corrected << \")\\n\";\n      }\n      \n    }\n\n  }\n\n  buffer << \"Number of ignored tests due to low MAC \";\n  if( params.setMinINFO ) buffer << \"or info score \";\n  if(params.trait_mode == 3) {\n    buffer << \": \" << snp_tally.n_ignored_snps/2 * params.n_tests_per_variant * params.n_pheno/2 + snp_tally.n_ignored_tests/2 * params.n_tests_per_variant << endl;\n  } else {\n    buffer << \": \" << snp_tally.n_ignored_snps * params.n_tests_per_variant * params.n_pheno + snp_tally.n_ignored_tests * params.n_tests_per_variant << endl;\n  }\n\n  if(params.write_masks)\n    buffer << \"\\nMasks written to : [\" << files.out_file << \"_masks.{bed,bim,fam}]\\n\";\n\n  if(params.write_null_firth){ // store file names with null ests\n    buffer << \"List of files with null Firth estimates written to: [\" \n      << print_null_firth_info(files, fest, params) << \"]\\n\";\n  }\n\n  return buffer.str();\n\n}\n"
  },
  {
    "path": "src/Step2_Models.hpp",
    "content": "/* \n\n   This file is part of the regenie software package.\n\n   Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov & Jonathan Marchini\n\n   Permission is hereby granted, free of charge, to any person obtaining a copy\n   of this software and associated documentation files (the \"Software\"), to deal\n   in the Software without restriction, including without limitation the rights\n   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n   copies of the Software, and to permit persons to whom the Software is\n   furnished to do so, subject to the following conditions:\n\n   The above copyright notice and this permission notice shall be included in all\n   copies or substantial portions of the Software.\n\n   THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n   SOFTWARE.\n\n*/\n\n#ifndef TEST_MODELS_H\n#define TEST_MODELS_H\n\n#define MAX_EXP_LIM 708\n\nstruct f_ests {\n\n  Eigen::MatrixXd cov_blup_offset;\n  Eigen::MatrixXd beta_null_firth;\n  std::vector<std::shared_ptr<Files>> firth_est_files;\n  double deviance_logistic;\n  double bhat_firth, se_b_firth;\n  \n};\n\nstruct spa_data {\n\n  Eigen::ArrayXd Gmod;\n  double val_a, val_b, val_c, val_d;\n  bool pos_score, fastSPA;\n\n};\n\nvoid blup_read_chr(bool const&,int const&,struct ests&,struct in_files&,struct filter const&,struct phenodt const&,struct param&,mstream&);\n\n// score tests\n/* // for all snps/traits\nvoid compute_score(std::vector<uint64> const& indices, int const& chrom, std::string const& test_string, std::string const& model_type, const Eigen::Ref<const Eigen::MatrixXd>& yres, const Eigen::Ref<const Eigen::RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, std::vector<variant_block>& all_snps_info, std::vector<snp> const& snpinfo, struct ests const& m_ests, struct f_ests& fest, struct in_files const& files, mstream& sout);\nvoid compute_score_qt(std::vector<uint64> const& indices, int const& chrom, std::string const& test_string, std::string const& model_type, const Eigen::Ref<const Eigen::MatrixXd>& yres, const Eigen::Ref<const Eigen::RowVectorXd>& p_sd_yres, struct param const& params, struct phenodt& pheno_data, struct geno_block& gblock, std::vector<variant_block>& all_snps_info, std::vector<snp> const& snpinfo, struct in_files const& files);\n*/\n\nvoid compute_score(int const&,int const&,int const&,int const&,std::string const&,std::string const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::RowVectorXd>&,struct param const&,struct phenodt&,struct geno_block&,variant_block*,std::vector<snp> const&,struct ests const&,struct f_ests&,struct in_files&,mstream&);\nvoid compute_score_qt(int const&,int const&,int const&,std::string const&,std::string const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::RowVectorXd>&,struct param const&,struct phenodt&,struct geno_block&,variant_block*,std::vector<snp> const&,struct in_files&,mstream&);\nvoid compute_score_qt_mcc(int const&,int const&,int const&,std::string const&,std::string const&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::RowVectorXd>&,struct param const&,struct phenodt&,struct geno_block&,variant_block*,std::vector<snp> const&,struct in_files&,mstream&);\nvoid compute_score_bt(int const&,int const&,int const&,int const&,std::string const&,std::string const&,const Eigen::Ref<const Eigen::MatrixXd>&,struct param const&,struct phenodt&,struct geno_block&,variant_block*,std::vector<snp> const&,struct ests const&,struct f_ests&,struct in_files&,mstream&);\nvoid compute_score_ct(int const&,int const&,int const&,int const&,std::string const&,std::string const&,const Eigen::Ref<const Eigen::MatrixXd>&,struct param const&,struct phenodt&,struct geno_block&,variant_block*,std::vector<snp> const&,struct ests const&,struct f_ests&,struct in_files&,mstream&);\nvoid compute_score_cox(int const&, int const&, int const&, int const&, std::string const&, std::string const&, struct param const&, struct phenodt&, struct geno_block&, variant_block*, std::vector<snp> const&, struct ests const&, struct f_ests&, struct in_files&, mstream&);\n\nvoid check_pval_snp(variant_block*,data_thread*,int const&,int const&,int const&,struct phenodt&,struct geno_block&,struct ests const&,struct f_ests&,struct param const&,mstream&);\nvoid get_sumstats(bool const&,int const&,data_thread*);\nvoid run_firth_correction_snp(int const&,int const&,int const&,struct geno_block&,variant_block*,data_thread*,struct phenodt&,struct ests const&,struct f_ests&,struct param const&,mstream&);\n\n// firth\nbool fit_approx_firth_null(int const&,int const&,struct phenodt const*,struct ests const*,Eigen::Ref<Eigen::ArrayXd>,struct param*, bool const& save_se = false);\nvoid fit_null_firth(bool const&,int const&,struct f_ests*,struct phenodt*,struct ests const*,struct in_files*,struct param*,mstream&);\nvoid fit_firth_logistic_snp(int const&,int const&,int const&,bool const&,struct param const*,struct phenodt*,struct ests const*,struct f_ests const*,const Eigen::Ref<const Eigen::MatrixXd>&,variant_block*,data_thread*,mstream&);\nvoid fit_firth_logistic_snp_fast(int const&,int const&,int const&,bool const&,struct param const*,struct phenodt*,struct ests const*,struct f_ests const*,const Eigen::Ref<const Eigen::VectorXd>&,variant_block*,data_thread*,mstream&);\nbool fit_firth(int const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,int const&,double&,bool const&,double&,int const&,int const&,double const&,struct param const*,bool const& check_score_inc = true);\nbool fit_firth_nr(double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,int const&,double&,bool const&,double&,int const&,int const&,double const&,struct param const*,bool const& check_score_inc = true);\nbool fit_firth_pseudo(double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,int const&,double&,bool const&,double&,int const&,int const&,double const&,struct param const*, bool const& apply_early_checks = true);\nuint fit_firth_pseudo(double const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::VectorXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const Eigen::ArrayXi>&,double&,double&,double&,int const&,int const&,double const&,struct param const*);\nbool fit_firth(double const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::VectorXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,const Eigen::Ref<const Eigen::ArrayXi>&,double&,double&,double&,int const&,int const&,double const&,struct param const*);\nbool fit_firth_adam(int const&,double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::MatrixXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,Eigen::ArrayXd&,int const&,double&,bool const&,double&,struct param const*);\nstd::string get_firth_est_allChr(struct in_files&,struct filter const& ,struct ests&,struct f_ests&,struct phenodt&,struct param&,mstream&);\nstd::string print_null_firth_info(struct in_files const&,struct f_ests&,struct param const&);\nvoid check_beta_start_firth(struct in_files&,struct param const&,mstream&);\nvoid get_beta_start_firth(const int&,struct f_ests*,struct in_files*,struct param const*,mstream&);\nvoid get_beta_start_firth(struct f_ests*,struct ests const*);\nvoid fit_null_firth_cox(bool const&, int const&, struct f_ests*, struct phenodt*, struct ests const*, struct in_files*, struct param*, mstream&);\nvoid fit_firth_cox_snp(int const&, int const&, int const&, struct param const*, struct phenodt*, struct ests const*, struct f_ests const*, const Eigen::Ref<const Eigen::MatrixXd>&, variant_block*, data_thread*, mstream&);\nvoid fit_firth_cox_snp_fast(int const&, int const&, int const&, struct param const*, struct phenodt*, struct ests const*, struct f_ests const*, const Eigen::Ref<const Eigen::VectorXd>&, variant_block*, data_thread*, mstream&);\n\n\n// spa (multithreading in openmp)\nvoid run_SPA_test(bool&,int const&,data_thread*,const Eigen::Ref<const ArrayXb>&,struct ests const&,struct param const&);\nvoid run_SPA_test_snp(double&,double&,const double&,const double&,bool const&,SpVec const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&,bool&,const double&,const double&,const double&,const double&);\ndouble solve_K1_snp(const double&,const double&,SpVec const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,struct spa_data&,const Eigen::Ref<const ArrayXb>&,const double&,const int&,const double&);\ndouble compute_K_snp(const double&,const double&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\ndouble compute_K1_snp(const double&,const double&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\ndouble compute_K2_snp(const double&,const double&,const double&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\ndouble compute_K_fast_snp(const double&,const double&,const double&,const double&,const double&,SpVec const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\ndouble compute_K1_fast_snp(const double&,const double&,const double&,const double&,const double&,SpVec const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\ndouble compute_K2_fast_snp(const double&,const double&,const double&,const double&,const double&,SpVec const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const ArrayXb>&);\nvoid get_SPA_pvalue_snp(const double&,const double&,double&,bool&,const double&,SpVec const&,const Eigen::Ref<const Eigen::ArrayXd>&,const Eigen::Ref<const Eigen::ArrayXd>&,struct spa_data&,const Eigen::Ref<const ArrayXb>&); \n\n\n// printing sum stats\nstd::string print_header_output(struct param const*);\nstd::string print_header_output_all(struct param const*);\nstd::string print_header_output_all_multitrait(struct param const*);\nstd::string print_header_output_all_multiphen(struct param const*);\nstd::string print_header_output_single(struct param const*);\nstd::string print_header_output_htp();\nstd::string print_sum_stats_head(const int&,std::vector<snp> const&);\nstd::string print_sum_stats_head_htp(const int&,const std::string&,const std::string&,std::vector<snp> const&,struct param const*);\nstd::string print_sum_stats(const double&,const double&,const double&,const int&,const int&,const double&,const int&,const int&,const int&,const std::string&,const double&,const double&,const double&,const double&,const bool&,const int&,struct param const*,int const&);\nstd::string print_sum_stats_all(const double&,const double&,const double&,const int&,const int&,const double&,const int&,const int&,const int&,const std::string&,const double&,const double&,const double&,const double&,const bool&,const int&,struct param const*,int const&);\nstd::string print_na_sumstats(const int&,const int&,const std::string&,const std::string&,variant_block const*,struct param const&);\nstd::string print_sum_stats_single(const double&,const double&,const double&,const double&,const int&,const int&,const int&,const std::string&,const double&,const double&,const double&,const double&,const bool&,const int&,struct param const*);\nstd::string print_sum_stats_htp(const double&,const double&,const double&,const double&,const double&,const double&,const double&,const Eigen::Ref<const Eigen::MatrixXi>&,const int&,const bool&,const int&,struct param const*, const double& score = -999, const double& cal_factor = -1.0, const double& cal_factor_burden = -1.0, const double& skat_diag = -999);\nstd::string print_sum_stats_line(int const&,int const&,std::string const&,std::string const&,std::string const&,variant_block*,data_thread*,std::vector<snp> const&,struct in_files&,struct param const&);\n\nstd::string print_summary(Files*,std::string const&,std::vector<std::shared_ptr<Files>>&,std::vector< std::string >const&,int const&,struct tally const&,struct in_files const&,struct f_ests&,struct param const&);\n\n#endif\n"
  },
  {
    "path": "src/bgen_to_vcf.hpp",
    "content": "\n//          Copyright Gavin Band 2008 - 2012.\n// Distributed under the Boost Software License, Version 1.0.\n//    (See accompanying file LICENSE_1_0.txt or copy at\n//          http://www.boost.org/LICENSE_1_0.txt)\n\n#include <iostream>\n#include <fstream>\n#include <cassert>\n#include <stdexcept>\n#include <memory>\n#include \"genfile/bgen/bgen.hpp\"\n\n// ProbSetter is a callback object appropriate for passing to bgen::read_genotype_data_block() or\n// the synonymous method of genfile::bgen::View. See the comment in bgen.hpp above\n// bgen::read_genotype_data_block(), or the bgen wiki for a description of the API.\n// The purpose of this object is to store genotype probability values in the desired\n// data structure (which here is a vector of vectors of doubles).\nstruct ProbSetter {\n  typedef std::vector< std::vector< double > > Data ;\n  ProbSetter( Data* result ):\n    m_result( result ),\n    m_sample_i(0)\n  {}\n\n  // Called once allowing us to set storage.\n  void initialise( std::size_t number_of_samples, std::size_t number_of_alleles ) {\n    m_result->clear() ;\n    m_result->resize( number_of_samples ) ;\n  }\n\n  // If present with this signature, called once after initialise()\n  // to set the minimum and maximum ploidy and numbers of probabilities among samples in the data.\n  // This enables us to set up storage for the data ahead of time.\n  void set_min_max_ploidy( uint32_t min_ploidy, uint32_t max_ploidy, uint32_t min_entries, uint32_t max_entries ) {\n    for( std::size_t i = 0; i < m_result->size(); ++i ) {\n      m_result->at( i ).reserve( max_entries ) ;\n    }\n  }\n\n  // Called once per sample to determine whether we want data for this sample\n  bool set_sample( std::size_t i ) {\n    m_sample_i = i ;\n    // Yes, here we want info for all samples.\n    return true ;\n  }\n\n  // Called once per sample to set the number of probabilities that are present.\n  void set_number_of_entries(\n      std::size_t ploidy,\n      std::size_t number_of_entries,\n      genfile::OrderType order_type,\n      genfile::ValueType value_type\n      ) {\n    assert( value_type == genfile::eProbability ) ;\n    m_result->at( m_sample_i ).resize( number_of_entries ) ;\n    m_entry_i = 0 ;\n  }\n\n  // Called once for each genotype (or haplotype) probability per sample.\n  void set_value( uint32_t, double value ) {\n    m_result->at( m_sample_i ).at( m_entry_i++ ) = value ;\n  }\n\n  // Ditto, but called if data is missing for this sample.\n  void set_value( uint32_t, genfile::MissingValue value ) {\n    // Here we encode missing probabilities with -1\n    m_result->at( m_sample_i ).at( m_entry_i++ ) = -1 ;\n  }\n\n  // If present with this signature, called once after all data has been set.\n  void finalise() {\n    // nothing to do in this implementation.\n  }\n\n  private:\n  Data* m_result ;\n  std::size_t m_sample_i ;\n  std::size_t m_entry_i ;\n} ;\n\n// BgenParser is a thin wrapper around the core functions in genfile/bgen/bgen.hpp.\n// This class tracks file state and handles passing the right callbacks.\nstruct BgenParser {\n\n\n  BgenParser():\n    m_state( e_NotOpen ),\n    m_have_sample_ids( false )\n  {}\n\n  void open( std::string const& filename ) {\n    m_filename =filename;\n    m_state = e_NotOpen;\n    m_have_sample_ids = false;\n\n    // Open the stream\n    m_stream.reset(new std::ifstream( filename, std::ifstream::binary )) ;\n    if( !*m_stream ) {\n      throw std::invalid_argument( filename ) ;\n    }\n    m_state = e_Open ;\n\n    // Read the offset, header, and sample IDs if present.\n    genfile::bgen::read_offset( *m_stream, &m_offset ) ;\n    genfile::bgen::read_header_block( *m_stream, &m_context ) ;\n    if( m_context.flags & genfile::bgen::e_SampleIdentifiers ) {\n      genfile::bgen::read_sample_identifier_block(\n          *m_stream, m_context,\n          [this]( std::string id ) { m_sample_ids.push_back( id ) ; }\n          ) ;\n      m_have_sample_ids = true ;\n    }\n\n    // Jump to the first variant data block.\n    m_stream->seekg( m_offset + 4 ) ;\n\n    // We keep track of state (though it's not really needed for this implementation.)\n    m_state = e_ReadyForVariant ;\n    //std::cout << m_state << \" \" << e_ReadyForVariant << std::endl;\n  }\n\n  // modified by J Mbatchou (03/19/21)\n  std::ostream& summarise( std::ostream& o ) const {\n\n    o << \"   -summary : bgen file (\"\n      << ( m_context.flags & genfile::bgen::e_Layout2 ? \"v1.2 layout\" : \"v1.1 layout\" )\n      << \", \";\n    if((m_context.flags & genfile::bgen::e_CompressedSNPBlocks) == genfile::bgen::e_ZlibCompression)\n      o << \"zlib \";\n    else if((m_context.flags & genfile::bgen::e_CompressedSNPBlocks) == genfile::bgen::e_ZstdCompression)\n      o << \"zstd \";\n    o << ( m_context.flags & genfile::bgen::e_CompressedSNPBlocks ? \"compressed\" : \"uncompressed\" ) << \")\"\n      << \" with \" \n      << m_context.number_of_samples << \" \" << ( m_have_sample_ids ? \"named\" : \"anonymous\" ) << \" samples and \"\n      << m_context.number_of_variants << \" variants\" ;\n    return o ;\n  }\n\n  // Added by JMbatchou (04/04/21)\n  std::string summarise( ) {\n    std::ostringstream o; \n    o << \"   -summary : bgen file (\"\n      << ( m_context.flags & genfile::bgen::e_Layout2 ? \"v1.2 layout\" : \"v1.1 layout\" )\n      << \", \";\n    if((m_context.flags & genfile::bgen::e_CompressedSNPBlocks) == genfile::bgen::e_ZlibCompression)\n      o << \"zlib \";\n    else if((m_context.flags & genfile::bgen::e_CompressedSNPBlocks) == genfile::bgen::e_ZstdCompression)\n      o << \"zstd \";\n    o << ( m_context.flags & genfile::bgen::e_CompressedSNPBlocks ? \"compressed\" : \"uncompressed\" ) << \")\"\n      << \" with \" \n      << m_context.number_of_samples << \" \" << ( m_have_sample_ids ? \"named\" : \"anonymous\" ) << \" samples and \"\n      << m_context.number_of_variants << \" variants\" ;\n    return o.str() ;\n  }\n\n  bool get_layout () {\n    return (m_context.flags & genfile::bgen::e_Layout2);\n  }\n\n  bool get_compression () {\n    uint32_t const compressionType = (m_context.flags & genfile::bgen::e_CompressedSNPBlocks);\n    return (compressionType == genfile::bgen::e_ZlibCompression) ;\n  }\n\n  uint64_t get_position() {\n    return m_stream->tellg();\n  }\n\n  // added by JMbatchou (12/23/2020)\n  void jumpto( uint64_t pos ){\n    m_stream->seekg( pos, std::ios_base::beg);\n    m_state = e_ReadyForVariant ;\n  }\n\n  int number_of_samples() const {\n    return m_context.number_of_samples ;\n  }\n\n  int number_of_variants() const {\n    return m_context.number_of_variants ;\n  }\n  // Report the sample IDs in the file using the given setter object\n  // (If there are no sample IDs in the file, we report a dummy identifier).\n  template< typename Setter >\n    void get_sample_ids( Setter setter ) {\n      if( m_have_sample_ids ) {\n        for( std::size_t i = 0; i < m_context.number_of_samples; ++i ) {\n          setter( m_sample_ids[i] ) ;\n        }\n      } else {\n        for( std::size_t i = 0; i < m_context.number_of_samples; ++i ) {\n          setter( \"(unknown_sample_\" + std::to_string( i+1 ) + \")\" ) ;\n        }\n      }\n    }\n\n  // Attempt to read identifying information about a variant from the bgen file, returning\n  // it in the given fields.\n  // If this method returns true, data was successfully read, and it should be safe to call read_probs()\n  // or ignore_probs().\n  // If this method returns false, data was not successfully read indicating the end of the file.\n  bool read_variant(\n      std::string* chromosome,\n      uint32_t* position,\n      std::string* rsid,\n      std::vector< std::string >* alleles\n      ) {\n    assert( m_state == e_ReadyForVariant ) ;\n    std::string SNPID ; // read but ignored in this toy implementation\n\n    if(\n        genfile::bgen::read_snp_identifying_data(\n          *m_stream, m_context,\n          &SNPID, rsid, chromosome, position,\n          [&alleles]( std::size_t n ) { alleles->resize( n ) ; },\n          [&alleles]( std::size_t i, std::string const& allele ) { alleles->at(i) = allele ; }\n          )\n      ) {\n      m_state = e_ReadyForProbs ;\n      return true ;\n    } else {\n      return false ;\n    }\n  }\n\n  // Read genotype probability data for the SNP just read using read_variant()\n  // After calling this method it should be safe to call read_variant() to fetch\n  // the next variant from the file.\n  void read_probs( std::vector< std::vector< double > >* probs ) {\n    assert( m_state == e_ReadyForProbs ) ;\n    ProbSetter setter( probs ) ;\n    genfile::bgen::read_and_parse_genotype_data_block< ProbSetter >(\n        *m_stream,\n        m_context,\n        setter,\n        &m_buffer1,\n        &m_buffer2\n        ) ;\n    m_state = e_ReadyForVariant ;\n  }\n\n  // Ignore genotype probability data for the SNP just read using read_variant()\n  // After calling this method it should be safe to call read_variant()\n  // to fetch the next variant from the file.\n  void ignore_probs() {\n    genfile::bgen::ignore_genotype_data_block( *m_stream, m_context ) ;\n    m_state = e_ReadyForVariant ;\n  }\n\n  private:\n  std::string  m_filename ;\n  std::unique_ptr< std::istream > m_stream ;\n\n  // bgen::Context object holds information from the header block,\n  // including bgen flags\n  genfile::bgen::Context m_context ;\n\n  // offset byte from top of bgen file.\n  uint32_t m_offset ;\n\n  // We keep track of our state in the file.\n  // Not strictly necessary for this implentation but makes it clear that\n  // calls must be read_variant() followed by read_probs() (or ignore_probs())\n  // repeatedly.\n  enum State { e_NotOpen = 0, e_Open = 1, e_ReadyForVariant = 2, e_ReadyForProbs = 3, eComplete = 4 } ;\n  State m_state ;\n\n  // If the BGEN file contains samples ids, they will be read here.\n  bool m_have_sample_ids ;\n  std::vector< std::string > m_sample_ids ;\n\n  // Buffers, these are used as working space by bgen implementation.\n  std::vector< genfile::byte_t > m_buffer1, m_buffer2 ;\n} ;\n"
  },
  {
    "path": "src/cox_firth.cpp",
    "content": "#include \"Regenie.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_firth.hpp\"\n\nusing namespace Eigen;\nusing namespace std;\n\ncox_firth::cox_firth(){}\n\nvoid cox_firth::setup(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const int& cols_incl, const int& max_iter, const int& max_inner_iter, const double& tolerance, const double& stephalf_tol, const double& beta_tol, const double& max_step, const bool& use_firth, const bool& verbose_obj, const Eigen::VectorXd& beta_init) {\n\tconverge = false;\n    p = Xmat.cols();\n    \n\t_niter = max_iter;\n    _mxitnr = max_inner_iter;\n    _tol = tolerance;\n    _stephalf_tol = stephalf_tol;\n    _betatol = beta_tol;\n\t_maxstep = max_step;\n\t_usefirth = use_firth;\n    _verbose = verbose_obj;\n    _cols_incl = cols_incl;\n    loglike.resize(_niter + 1);\n    first_der.resize(p);\n    mu.resize(survivalData.n);\n    residual.resize(survivalData.n);\n\n    beta = Eigen::VectorXd::Zero(p);\n    if (beta_init.size() > 0) {\n        beta.head(_cols_incl) = beta_init.head(_cols_incl);\n        eta = Xmat * beta + offset_val;\n    } else {\n        eta = offset_val;\n    }\n    eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0).matrix();\n    if (p == 0) {\n        _usefirth = false;\n    }\n}\n\nvoid cox_firth::cox_firth_likelihood(const survival_data& survivalData, const Eigen::MatrixXd& Xmat) {\n    Eigen::VectorXd w_exp_eta, ww_rsk, S0;\n    Eigen::VectorXd lambda0(survivalData.n);\n    Eigen::MatrixXd S1, GammaX, XtW;\n    Eigen::MatrixXd S2 = Eigen::MatrixXd::Zero(p, p);\n    std::vector<Eigen::MatrixXd> firth_der;\n    double log_terms_sum;\n    second_der = Eigen::MatrixXd::Zero(p, p);\n\n    if (_usefirth) {\n        firth_der.resize(p);\n        for(int i = 0; i < p; i++) {\n            firth_der[i] = Eigen::MatrixXd::Zero(p, p);\n        }\n    }\n\n    exp_eta = eta_order.array().exp();\n    w_exp_eta = survivalData.w.array() * exp_eta.array();\n\n    S0 = cumulativeSum_reverse2(survivalData.R.transpose() * w_exp_eta); // length K, risk set sum at each unique failure time\n    log_terms_sum = (survivalData.ww_k.array() * S0.array().log()).sum();\n\n    loglik_val = (survivalData.w.array() * eta_order.array() * (survivalData.status_order.array() == 1).cast<double>()).sum() - log_terms_sum;\n\n    // double mean_eta = (eta.array() * survivalData.w_orig.array()).sum()/survivalData.w_orig.array().sum();\n    // Eigen::VectorXd eta_center = eta_order.array() - mean_eta;\n    // exp_eta = eta_center.array().exp();\n    // w_exp_eta = survivalData.w.array() * exp_eta.array();\n    // S0 = cumulativeSum_reverse2(survivalData.R.transpose() * w_exp_eta); // length K, risk \n\n    ww_rsk = cumulativeSum(survivalData.ww_k.array() / S0.array());\n    for (unsigned int i = 0; i < survivalData.n; ++i) {\n        if (survivalData.rskcount(i) == 0) {\n            lambda0(i) = 0;\n        } else {\n            lambda0(i) = ww_rsk(int(survivalData.rskcount(i)) - 1);\n        }\n    }\n    mu = lambda0.array() * w_exp_eta.array();\n\n    S1 = survivalData.R.transpose() * ((survivalData.permute_mtx * Xmat).array().colwise() * w_exp_eta.array()).matrix(); // K by p\n\n    GammaX = (survivalData.permute_mtx * Xmat).array().colwise() * w_exp_eta.array().sqrt(); // n by p\n    for (int k = survivalData.n_unique_time - 1; k >= 0; --k) {\n        if (k < survivalData.n_unique_time - 1) {\n            S1.row(k) += S1.row(k+1);\n        }\n\n        std::vector<int> k_indices;\n        for (SpMat::InnerIterator it(survivalData.R, k); it; ++it) {\n            k_indices.push_back(it.index());\n        }\n        \n        // for (int j = 0; j < survivalData.R.cols(); ++j) {\n        //     if (survivalData.R(k, j) != 0) {\n        //         k_indices.push_back(j);\n        //     }\n        // }\n\n        S2 += GammaX(k_indices, all).transpose() * GammaX(k_indices, all);\n\n        second_der = second_der + survivalData.ww_k(k) * (S2/S0(k) - S1.row(k).transpose() * S1.row(k)/(std::pow(S0(k), 2)));\n        if (_usefirth) {\n            for (int t = 0; t < p; ++t) {\n                firth_der[t] += survivalData.ww_k(k) * ((-S2 * S1(k,t) - S2.col(t) * S1.row(k) - S2.row(t).transpose() * S1.row(k))/(std::pow(S0(k), 2)) + 2 * S1.row(k).transpose() * S1.row(k) * S1(k,t)/(std::pow(S0(k), 3)));\n            }\n        }\n    }\n    if (p > 0) qrsd.compute(second_der);\n    residual = survivalData.w.array() * (survivalData.status_order - mu).array();\n    if(_cols_incl < p) {\n        qrsd_incl.compute(second_der.block(0,0,_cols_incl,_cols_incl)); // p-1 by p-1\n        if (_usefirth) {\n            loglik_val += 0.5 * qrsd.logAbsDeterminant();\n            XtW = ((survivalData.permute_mtx * Xmat.leftCols(_cols_incl)).array().colwise() * mu.array().sqrt()).transpose(); // p-1 by n\n            first_der = (survivalData.permute_mtx * Xmat.leftCols(_cols_incl)).transpose() * survivalData.keep_sample_order.select(residual + 0.5 * (qrsd_incl.solve(XtW).array() * XtW.array()).colwise().sum().matrix().transpose(), 0); // qrsd.solve(XtW) is p-1 by n\n            for (int t = 0; t < _cols_incl; ++t) {\n                first_der(t) = first_der(t) + 0.5 * qrsd_incl.solve(firth_der[t].block(0,0,_cols_incl,_cols_incl)).trace();\n            }\n        } else {\n            first_der = (survivalData.permute_mtx * Xmat.leftCols(_cols_incl)).transpose() * residual;\n        }\n    } else {\n        if (_usefirth) {\n            loglik_val += 0.5 * qrsd.logAbsDeterminant();\n            XtW = ((survivalData.permute_mtx * Xmat).array().colwise() * mu.array().sqrt()).transpose(); // p by n\n            first_der = (survivalData.permute_mtx * Xmat).transpose() * survivalData.keep_sample_order.select(residual + 0.5 * (qrsd.solve(XtW).array() * XtW.array()).colwise().sum().matrix().transpose(), 0); // qrsd.solve(XtW) is p by n\n            for (int t = 0; t < p; ++t) {\n                first_der(t) = first_der(t) + 0.5 * qrsd.solve(firth_der[t]).trace();\n            }\n        } else {\n            first_der = (survivalData.permute_mtx * Xmat).transpose() * residual;\n        }\n    }\n}\n\nvoid cox_firth::fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val) {\n    Eigen::VectorXd steps, betanew;\n    int ii;\n    cox_firth_likelihood(survivalData, Xmat);\n    loglike(0) = loglik_val;\n    // std::cout << \"start fitting:\\n\";\n    // std::cout << \"beta: \" << beta << \"\\n\";\n    // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n    // std::cout << \"first_der: \" << first_der << \"\\n\";\n    // std::cout << \"second_der: \" << second_der << \"\\n\";\n    iter = 0;\n    if (p == 0 || _cols_incl == 0) {\n        converge = true;\n        residual = survivalData.permute_mtx.transpose() * residual;\n        loglike.conservativeResize(iter+1);\n        return;\n    }\n    betanew = beta;\n    while (iter++ < _niter) {\n        // std::cout << \"iter: \" << iter << \"\\n\";\n        ii = 0;\n        if (_cols_incl < p) {\n            steps = qrsd_incl.solve(first_der);\n        } else{\n            steps = qrsd.solve(first_der);\n        }\n        // std::cout << \"steps: \" << steps << \"\\n\";\n        for (int i = 0; i < steps.size(); ++i) {\n            if (abs(steps(i)) >= _maxstep) {\n                steps(i) = (steps(i) / fabs(steps(i))) * _maxstep;\n            }\n        }\n        // std::cout << \"adjusted steps: \" << steps << \"\\n\";\n        betanew.head(_cols_incl) = beta.head(_cols_incl) + steps;\n        // std::cout << \"beta: \" << betanew << \"\\n\";\n        eta = Xmat * betanew + offset_val;\n        eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0);\n        cox_firth_likelihood(survivalData, Xmat);\n        // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n        // std::cout << \"diff loglik_val: \" << loglik_val - loglike(iter - 1) << \"\\n\";\n        if ((loglike(iter - 1) - loglik_val) > _stephalf_tol) { // step-halving\n            // std::cout << \"\\nLoglikelihood decreases at iteration \" << iter << \", start step-halving.\\n\";\n            ii = 0;\n            while ((loglike(iter - 1) - loglik_val) > _stephalf_tol) {\n                ++ii;\n                // std::cout << \"inner iteration: \" << ii << \"\\n\";\n                if (ii > _mxitnr) {\n                    // std::cout << \"Convergence issue, inner loop: cannot correct step size, add eps\\n\";\n                    steps.array() += 1e-6;\n                    betanew.head(_cols_incl) = beta.head(_cols_incl) + steps;\n                    eta = Xmat * betanew + offset_val;\n                    eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0);\n                    cox_firth_likelihood(survivalData, Xmat);\n                    break;\n                    // throw std::runtime_error(\"inner loop: cannot correct step size\");\n                }\n                betanew = (beta + betanew)/2;\n                eta = Xmat * betanew + offset_val;\n                eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0);\n                cox_firth_likelihood(survivalData, Xmat);\n                if (_verbose) {\n                    std::cout << \"beta: \" << betanew << \"\\n\";\n                    std::cout << \"Iteration \" << iter << \" Halved, Objective: \" << loglik_val << \"\\n\";\n                }\n            }\n        }\n        loglike(iter) = loglik_val;\n        // std::cout << \"beta: \" << betanew << \"\\n\";\n        // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n        // std::cout << \"loglik_val change: \" << loglik_val - loglike(iter - 1) << \"\\n\";\n        // std::cout << \"first_der max: \" << first_der.array().abs().maxCoeff() << \"\\n\";\n        // std::cout << \"beta change max: \" << (beta - betanew).array().abs().maxCoeff() << \"\\n\";\n        if( first_der.array().abs().maxCoeff() < _tol || (ii <= 1 && (beta - betanew).array().abs().maxCoeff() < _betatol) ) {\n            beta = betanew;\n            converge = true;\n            break;\n        }\n        beta = betanew;\n    }\n    residual = survivalData.permute_mtx.transpose() * residual;\n    loglike.conservativeResize(iter+1);\n    // std::cout << \"finish fitting\\n\";\n}\n\n\nvoid cox_firth::cox_firth_likelihood_1(const survival_data& survivalData, const Eigen::VectorXd& g) {\n    Eigen::VectorXd w_exp_eta, ww_rsk;\n    Eigen::VectorXd lambda0(survivalData.n);\n    Eigen::VectorXd S0, S1, S2, S3;\n    double log_terms_sum;\n\n    exp_eta = eta_order.array().exp();\n    w_exp_eta = survivalData.w.array() * exp_eta.array();\n\n    S0 = cumulativeSum_reverse2(survivalData.R.transpose() * w_exp_eta); // length K, risk set sum at each unique failure time\n    log_terms_sum = (survivalData.ww_k.array() * S0.array().log()).sum();\n\n    loglik_val = (survivalData.w.array() * eta_order.array() * (survivalData.status_order.array() == 1).cast<double>()).sum() - log_terms_sum;\n\n    ww_rsk = cumulativeSum(survivalData.ww_k.array() / S0.array());\n    for (unsigned int i = 0; i < survivalData.n; ++i) {\n        if (survivalData.rskcount(i) == 0) {\n            lambda0(i) = 0;\n        } else {\n            lambda0(i) = ww_rsk(int(survivalData.rskcount(i)) - 1);\n        }\n    }\n    mu = lambda0.array() * w_exp_eta.array();\n\n    S1 = cumulativeSum_reverse2(survivalData.R.transpose() * ((survivalData.permute_mtx * g).array() * w_exp_eta.array()).matrix()); // K by 1\n\n    S2 = cumulativeSum_reverse2(survivalData.R.transpose() * ((survivalData.permute_mtx * g.array().pow(2).matrix()).array() * w_exp_eta.array()).matrix()); // K by 1\n    \n    second_der_1 = (survivalData.ww_k.array() * (S2.array()/S0.array() - S1.array().pow(2)/S0.array().pow(2))).sum();\n\n    residual = survivalData.w.array() * (survivalData.status_order - mu).array();\n\n    if (_usefirth) {\n        loglik_val += 0.5 * log(fabs(second_der_1));\n        \n        S3 = cumulativeSum_reverse2(survivalData.R.transpose() * ((survivalData.permute_mtx * g.array().pow(3).matrix()).array() * w_exp_eta.array()).matrix());\n        \n        first_der_1 = (survivalData.permute_mtx * g).dot(residual) + 0.5 * (survivalData.ww_k.array() * (S3.array()/S0.array() - 3 * S2.array() * S1.array()/S0.array().pow(2) + 2 * S1.array().pow(3)/S0.array().pow(3))).sum()/second_der_1;\n    } else {\n        first_der_1 = (survivalData.permute_mtx * g).dot(residual);\n    }\n}\n\nvoid cox_firth::fit_1(const survival_data& survivalData, const Eigen::VectorXd& g, const Eigen::VectorXd& offset_val) {\n    Eigen::VectorXd betanew;\n    double steps;\n    int ii = 0;\n    cox_firth_likelihood_1(survivalData, g);\n    // std::cout << \"start fitting:\\n\";\n    // std::cout << \"beta: \" << beta << \"\\n\";\n    // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n    // std::cout << \"first_der_1: \" << first_der_1 << \"\\n\";\n    // std::cout << \"second_der_1: \" << second_der_1 << \"\\n\";\n    loglike(0) = loglik_val;\n    iter = 0;\n    while (iter++ < _niter) {\n        // std::cout << \"iter: \" << iter << \"\\n\";\n        steps = first_der_1/second_der_1;\n        // std::cout << \"first der: \" << first_der_1 << \"\\n\";\n        // std::cout << \"second der: \" << second_der_1 << \"\\n\";\n        // std::cout << \"steps: \" << steps << \"\\n\";\n        if (abs(steps) >= _maxstep) {\n            steps = (steps / fabs(steps)) * _maxstep;\n        }\n        // std::cout << \"adjusted steps: \" << steps << \"\\n\";\n        betanew = beta.array() + steps;\n        eta = g * betanew + offset_val;\n        eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0);\n        cox_firth_likelihood_1(survivalData, g);\n        // std::cout << \"beta: \" << betanew << \"\\n\";\n        // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n        // std::cout << \"diff loglik_val: \" << loglik_val - loglike(iter - 1) << \"\\n\";\n        \n        if ((loglike(iter - 1) - loglik_val) > _stephalf_tol) { // step-halving\n            // std::cout << \"\\nLoglikelihood decreases at iteration \" << iter << \", start step-halving.\\n\";\n            ii = 0;\n            while ((loglike(iter - 1) - loglik_val) > _stephalf_tol) {\n                ++ii;\n                // std::cout << \"inner iteration: \" << ii << \"\\n\";\n                if (ii > _mxitnr) {\n                    // std::cout << \"Convergence issue, inner loop: cannot correct step size, add eps\\n\";\n                    steps += 1e-6;\n                    betanew = beta.array() + steps;\n                    eta = g * betanew + offset_val;\n                    eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0);\n                    cox_firth_likelihood_1(survivalData, g);\n                    break;\n                    // throw std::runtime_error(\"inner loop: cannot correct step size\");\n                }\n                betanew = (beta + betanew)/2;\n                eta = g * betanew + offset_val;\n                eta_order = survivalData.keep_sample_order.select(survivalData.permute_mtx * eta, 0);\n                cox_firth_likelihood_1(survivalData, g);\n                if (_verbose) {\n                    std::cout << \"beta: \" << betanew << \"\\n\";\n                    std::cout << \"Iteration \" << iter << \" Halved, Objective: \" << loglik_val << \"\\n\";\n                }\n            }\n        }\n        loglike(iter) = loglik_val;\n        // std::cout << \"beta: \" << betanew << \"\\n\";\n        // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n        // std::cout << \"first_der_1: \" << first_der_1 << \"\\n\";\n        // std::cout << \"second_der_1: \" << second_der_1 << \"\\n\";\n        // std::cout << \"first_der max: \" << fabs(first_der_1) << \"\\n\";\n        // std::cout << \"beta change max: \" << (beta - betanew).array().abs().maxCoeff() << \"\\n\";\n        if (fabs(first_der_1) < _tol || (ii <= 1 && (beta - betanew).array().abs().maxCoeff() < _betatol)) {\n            beta = betanew;\n            converge = true;\n            break;\n        }\n        beta = betanew;\n    }\n    residual = survivalData.permute_mtx.transpose() * residual;\n    loglike.conservativeResize(iter+1);\n    // std::cout << \"finish fitting\\n\";\n}"
  },
  {
    "path": "src/cox_firth.hpp",
    "content": "#ifndef COXFIRTH_H\n#define COXFIRTH_H\n\nclass cox_firth {\n    public:\n\t\tint p;\n        // coefficients\n        Eigen::VectorXd beta;\n        // prediction\n        Eigen::VectorXd eta, eta_order, residual;\n        int iter;\n        bool converge = false;\n\n        // prepare for test\n        Eigen::VectorXd exp_eta;\n        Eigen::VectorXd mu;\n\t\tEigen::VectorXd loglike;\n\t\tdouble loglik_val;\n\t\tEigen::VectorXd first_der;\n\t\tEigen::MatrixXd second_der;\n        Eigen::ColPivHouseholderQR<Eigen::MatrixXd> qrsd, qrsd_incl;\n        double first_der_1;\n\t\tdouble second_der_1;\n\n        cox_firth();\n        void setup(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const int& cols_incl, const int& max_iter = 100, const int& max_inner_iter = 30, const double& tolerance = 1e-6, const double& stephalf_tol = 2.5e-4, const double& beta_tol = 1e-6, const double& max_step = 1, const bool& use_firth = true, const bool& verbose_obj = false, const Eigen::VectorXd& beta_init = Eigen::VectorXd());\n        void cox_firth_likelihood(const survival_data& survivalData, const Eigen::MatrixXd& Xmat);\n\t\tvoid fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val);\n        void cox_firth_likelihood_1(const survival_data& survivalData, const Eigen::VectorXd& g);\n        void fit_1(const survival_data& survivalData, const Eigen::VectorXd& g, const Eigen::VectorXd& offset_val);\n\n    private:\n        int _niter, _mxitnr, _cols_incl;\n        double _tol, _stephalf_tol, _betatol;\n\t\tdouble _maxstep;\n        bool _usefirth, _verbose;\n};\n\n#endif"
  },
  {
    "path": "src/cox_ridge.cpp",
    "content": "#include \"Regenie.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_ridge.hpp\"\n\nusing namespace Eigen;\nusing namespace std;\n\ncox_ridge::cox_ridge(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const double& lambda_val, const int& max_iter, const int& max_inner_iter, const double& tolerance, const bool& verbose_obj, const Eigen::VectorXd& beta_init, const double& null_deviance) {\n    converge = false;\n\n    if (beta_init.size() > 0) {\n        beta = beta_init;\n    } else {\n        beta = Eigen::VectorXd::Zero(Xmat.cols());\n    }\n    lambda = lambda_val;\n    _niter = max_iter;\n    _mxitnr = max_inner_iter;\n    _tol = tolerance;\n    _verbose = verbose_obj;\n    _object.resize(_niter + 1);\n    _deviance.resize(_niter + 1);\n\n    eta = mask.select(Xmat * beta + offset_val, 0).matrix();\n    eta_order = survivalData.permute_mtx * eta;\n    \n    if (null_deviance == -999) {\n        _deviance(0) = _coxDeviance(survivalData);\n    } else {\n        _deviance(0) = null_deviance;\n    }\n    _object(0) = _deviance(0) + lambda * (beta.array().pow(2).sum())/2;\n}\n\nvoid cox_ridge::reset(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const double& lambda_val, const Eigen::VectorXd& beta_init, const double& null_deviance) {\n    // Reset the object's state based on the provided parameters\n    converge = false;\n\n    if (beta_init.size() > 0) {\n        beta = beta_init;\n    } else {\n        beta = Eigen::VectorXd::Zero(Xmat.cols());\n    }\n    lambda = lambda_val;\n\n    // Calculate eta, eta_order, or other necessary calculations\n    eta = mask.select(Xmat * beta + offset_val, 0).matrix();\n    eta_order = survivalData.permute_mtx * eta;\n\n    _deviance.resize(_niter + 1);\n    _object.resize(_niter + 1);\n    if (null_deviance == -999){\n        _deviance(0) = _coxDeviance(survivalData);\n    } else {\n        _deviance(0) = null_deviance;\n    }\n    _object(0) = _deviance(0) + lambda * (beta.array().pow(2).sum())/2;\n}\n\nvoid cox_ridge::coxGrad(const survival_data& survivalData) {\n    double mean_eta = (eta.array() * survivalData.w_orig.array()).sum()/survivalData.w_orig.array().sum();\n    Eigen::VectorXd eta_center = eta_order.array() - mean_eta;\n    Eigen::VectorXd exp_eta = eta_center.array().exp();\n    // Eigen::VectorXd exp_eta = eta_order.array().exp();\n    Eigen::VectorXd rskden = cumulativeSum_reverse2(survivalData.w.array() * exp_eta.array());\n\n    Eigen::VectorXd ww_rsk = survivalData.ww.array() / rskden.array();\n    Eigen::VectorXd ww_rsk2 = survivalData.ww.array() / (rskden.array().pow(2));\n\n    Eigen::VectorXd rskdeninv_n = cumulativeSum(survivalData.dd.array().cast<bool>().select(ww_rsk, 0));\n    Eigen::VectorXd rskdeninv2_n = cumulativeSum(survivalData.dd.array().cast<bool>().select(ww_rsk2, 0));\n\n    Eigen::VectorXd gradient_order = survivalData.w.array() * (survivalData.status_order.array() - exp_eta.array() * rskdeninv_n.array());\n\n    Eigen::VectorXd diag_hessian_order = (survivalData.w.array() * exp_eta.array()).pow(2) * rskdeninv2_n.array() - \n        survivalData.w.array() * exp_eta.array() * rskdeninv_n.array();\n    // change to original order\n    _gradient = survivalData.permute_mtx.transpose() * gradient_order;\n    _diagHessian = survivalData.permute_mtx.transpose() * diag_hessian_order;\n}\n\ndouble cox_ridge::_coxLoglik(const survival_data& survivalData) {\n    Eigen::VectorXd rsk = cumulativeSum_reverse2(survivalData.w.array() * eta_order.array().exp());\n\n    // take just the terms related to actual death times\n    double log_terms_sum = (survivalData.ww.array() * (survivalData.keep_sample_order.select(rsk.array().log(), 0)) * (survivalData.dd.array() == 1).cast<double>()).sum();\n    double loglik_val = (survivalData.w.array() * eta_order.array() * (survivalData.status_order.array() == 1).cast<double>()).sum() - log_terms_sum;\n    return loglik_val;\n}\n\ndouble cox_ridge::_coxDeviance(const survival_data& survivalData) {\n    Eigen::VectorXd w_sub;\n    if (survivalData.unique_time_indices.size() == survivalData.n_events) {\n        // no tie\n        w_sub = Eigen::VectorXd::Ones(survivalData.n_events);\n        w_sub /= survivalData.neff;\n    } else {\n        // tie\n        w_sub.resize(survivalData.unique_time_indices.size());\n        int idx = 0;\n        for (const auto& entry: survivalData.unique_time_indices) {\n            const vector<int>& ties = entry.second;\n            w_sub(idx) = static_cast<double>(ties.size())/survivalData.neff;\n            ++idx;\n        }\n    }\n    double lsat = -(w_sub.array() * (w_sub.array().log())).sum();\n    double loglik_val = _coxLoglik(survivalData);\n\n    double deviance = 2 * (lsat - loglik_val);\n    return deviance;\n}\n\nvoid cox_ridge::fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask) {\n    int p = Xmat.cols();\n    int ii = 0;\n    Eigen::VectorXd beta_old;\n    int break_pt = 1;\n    for (int t = 1; t < _niter + 1; ++t) {\n        ++break_pt;\n        beta_old = beta;\n        coxGrad(survivalData);\n        Eigen::VectorXd z(survivalData.n);\n        z = (_diagHessian.array() != 0).select(_gradient.array()/_diagHessian.array(), 0).matrix();\n        z = mask.select(eta - offset_val, 0) - z;\n        for (unsigned int k = 0; k < p; ++k) {\n            Eigen::VectorXd r = _diagHessian.array() * (z - eta + offset_val).array();\n            eta = eta - mask.select(Xmat.col(k) * beta(k), 0).matrix();\n            beta(k) = (r.dot(Xmat.col(k)) + beta(k) * (Xmat.col(k).array().pow(2) * _diagHessian.array()).sum()) /\n                ((Xmat.col(k).array().pow(2) * _diagHessian.array()).sum() - lambda);\n            eta = eta + mask.select(Xmat.col(k) * beta(k), 0).matrix();\n        }\n        eta_order = survivalData.permute_mtx * eta;\n        _deviance(t) = _coxDeviance(survivalData);\n        _object(t) = _deviance(t) + lambda * (beta.array().pow(2).sum())/2;\n        if (_verbose) {\n            std::cout << \"Iteration \" << t << \" objective: \" << _object(t) << \"; diff: \" << _object(t) - _object(t-1) << \"; rel diff: \" << abs(_object(t) - _object(t - 1)) / (0.1 + abs(_object(t))) << \"; score: \" << (_gradient.transpose() * Xmat - lambda * beta.transpose()).cwiseAbs().maxCoeff() << \"\\n\";\n        }\n\n        if ( (_deviance(t) - _deviance(t-1)) > _tol ) {\n            std::cout << \"\\nDeviance increases at iteration \" << t << \".\\n\";\n            ii = 0;\n            while ( (_deviance(t) - _deviance(t-1)) > _tol ) {\n                ++ii;\n                if (ii > _mxitnr) {\n                    std::cout << \"Convergence issue, inner loop: cannot correct step size\\n\";\n                    return;\n                    // throw std::runtime_error(\"inner loop: cannot correct step size\");\n                }\n                beta = (beta + beta_old)/2;\n                eta = mask.select(Xmat * beta + offset_val, 0).matrix();\n                eta_order = survivalData.permute_mtx * eta;\n                _deviance(t) = _coxDeviance(survivalData);\n                _object(t) = _deviance(t) + lambda * (beta.array().pow(2).sum())/2;\n                if (_verbose) {\n                    std::cout << \"Iteration \" << t << \" Halved, Objective: \" << _object(t) << \"; diff: \" << _object(t) - _object(t-1) << \".\\n\";\n                }\n            }\n        }\n\n        if (abs(_object(t) - _object(t - 1)) / (0.1 + abs(_object(t))) < _tol || (_gradient.transpose() * Xmat - lambda * beta.transpose()).cwiseAbs().maxCoeff() < _tol ) {\n            converge = true;\n            break;\n        }\n    }\n\n    if (break_pt < (_niter + 1)) {\n        _deviance.conservativeResize(break_pt);\n        _object.conservativeResize(break_pt);\n    }\n    dev_ratio = 1 - _deviance(_deviance.size() - 1) / _deviance(0);\n}\n\nEigen::VectorXd cox_ridge::get_gradient() {\n    return _gradient;\n}\n\ndouble cox_ridge::get_deviance() {\n    return _deviance(_deviance.size() - 1);\n}\n\ndouble cox_ridge::get_null_deviance() {\n    return _deviance(0);\n}\n\ndouble cox_ridge::get_object() {\n    return _object(_object.size() - 1);\n}\n\nEigen::VectorXd cox_ridge::get_object_all() {\n    return _object;\n}\n\nEigen::VectorXd cox_ridge::get_deviance_all() {\n    return _deviance;\n}\n\n\ncox_ridge_path::cox_ridge_path(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const int& nlambda, const double& lambda_min_max_ratio, const Eigen::VectorXd& lambda, const int& max_iter, const int& max_inner_iter, const double& tolerance, const bool& verbose_fit) {\n    int p = Xmat.cols();\n    // set lambda_vec\n    if (lambda.size() > 0) {\n        _user_define_lambda = true;\n        _lambda_len = lambda.size();\n        if (lambda.minCoeff() < 0) { throw std::runtime_error(\"lambda must >= 0.\"); }\n        lambda_vec = lambda;\n        std::sort(lambda_vec.data(), lambda_vec.data() + lambda_vec.size(), std::greater<double>());\n    } else {\n        double lambda_min_ratio;\n        if (lambda_min_max_ratio >= 1) { \n            throw std::runtime_error(\"lambda_min_max_ratio should be less than 1.\"); \n        } else if (lambda_min_max_ratio == -1) {\n            if (survivalData.neff < p) {\n                lambda_min_ratio = 1e-2;\n            } else {\n                lambda_min_ratio = 1e-4;\n            }\n        } else {\n            lambda_min_ratio = lambda_min_max_ratio;\n        }\n        _lambda_len = nlambda;\n        cox_ridge coxRidge_null_lamb0(survivalData, Xmat, offset_val, mask, 0, max_iter, max_inner_iter, tolerance);\n        coxRidge_null_lamb0.coxGrad(survivalData);\n        Eigen::VectorXd gradient = coxRidge_null_lamb0.get_gradient();\n        double lambda_max = _getCoxLambdaMax(Xmat, gradient);\n        // lambda_vec = (Eigen::seq(0, _lambda_len - 1) * log(lambda_min_ratio) + log(lambda_max)).exp();\n        Eigen::VectorXd index(nlambda);\n        for (int i = 0; i < nlambda; ++i) {\n            if (i > 0) {\n                index(i) = static_cast<double>(i)/(nlambda - 1);\n            } else {\n                index(i) = i;\n            }\n        }\n        lambda_vec = (index.array() * log(lambda_min_ratio) + log(lambda_max)).exp();\n    }\n    beta_mx.resize(p, _lambda_len);\n    eta_mx.resize(survivalData.n, _lambda_len);\n    object_val.resize(_lambda_len);\n    dev_ratio.resize(_lambda_len);\n    deviance.resize(_lambda_len);\n    converge.resize(_lambda_len);\n    niter = max_iter;\n    mxitnr = max_inner_iter;\n    tol = tolerance;\n    verbose = verbose_fit;\n}\n\ndouble cox_ridge_path::_getCoxLambdaMax(const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& gradient) {\n    Eigen::VectorXd g = (Xmat.transpose() * gradient).array().abs();\n    return g.maxCoeff() / 1e-3;\n}\n\nvoid cox_ridge_path::fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask) {\n    int break_pt = 0;\n    double cur_lambda = lambda_vec(0);\n    cox_ridge coxRidge(survivalData, Xmat, offset_val, mask, cur_lambda, niter, mxitnr, tol, verbose);\n    double nulldev_old = -999;\n    Eigen::VectorXd beta_old(Xmat.cols());\n\n    for (int k = 0; k < _lambda_len; ++k) {\n        ++break_pt;\n        if (k > 0) {\n            cur_lambda = lambda_vec(k);\n            coxRidge.reset(survivalData, Xmat, offset_val, mask, cur_lambda, beta_old, nulldev_old);\n        }\n        if (verbose) {\n            std::cout << \"lambda: \" << cur_lambda << \"\\n\";\n        }\n        coxRidge.fit(survivalData, Xmat, offset_val, mask);\n        std::cout << \"converge: \" << coxRidge.converge << \"\\n\";\n\n        if (coxRidge.converge == false) {\n            converge(k) = false;\n            std::cout << \"Warning: lambda \" << cur_lambda << \" failed to converge.\\n\";\n        } else {\n            converge(k) = true;\n        }\n        beta_old = coxRidge.beta;\n        nulldev_old = coxRidge.get_null_deviance();\n        beta_mx.col(k) = coxRidge.beta;\n        eta_mx.col(k) = coxRidge.eta;\n        deviance(k) = coxRidge.get_deviance();\n        dev_ratio(k) = coxRidge.dev_ratio;\n        object_val(k) = coxRidge.get_object();\n\n        if (k > 4 && _user_define_lambda == false) {\n            if (dev_ratio(k) > 0.99) { break; }\n            if (k > 0 && (dev_ratio(k) - dev_ratio(k - 3)) <\n                1e-3 * dev_ratio(k)) { break; }\n        }\n    }\n\n    if (break_pt < _lambda_len) {\n        beta_mx.conservativeResize(Xmat.cols(), break_pt);\n        eta_mx.conservativeResize(survivalData.n, break_pt);\n        deviance.conservativeResize(break_pt);\n        dev_ratio.conservativeResize(break_pt);\n        lambda_vec.conservativeResize(break_pt);\n        object_val.conservativeResize(break_pt);\n        converge.conservativeResize(break_pt);\n    }\n}"
  },
  {
    "path": "src/cox_ridge.hpp",
    "content": "#ifndef COXL2_H\n#define COXL2_H\n#include \"Regenie.hpp\"\n\nclass cox_ridge_path {\n    public:\n        // coefficients\n        Eigen::MatrixXd beta_mx;\n        Eigen::MatrixXd eta_mx;\n        \n        Eigen::VectorXd lambda_vec;\n\n        // objective value\n        Eigen::VectorXd object_val;\n        Eigen::VectorXd deviance;\n        Eigen::VectorXd dev_ratio;\n        Eigen::Array<bool, Eigen::Dynamic, 1> converge;\n        cox_ridge_path(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const int& nlambda = 100, const double& lambda_min_max_ratio = -1, const Eigen::VectorXd& lambda = Eigen::VectorXd(), const int& max_iter = 100, const int& max_inner_iter = 30, const double& tolerance = 1e-6, const bool& verbose_fit = false);\n        void fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask);\n        \n        // fitting info\n        int niter, mxitnr;\n        double tol;\n        bool verbose;\n\n    private:\n        double _getCoxLambdaMax(const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& gradient);\n        int _lambda_len;\n        bool _user_define_lambda = false;\n};\n\nclass cox_ridge {\n    public:\n        // coefficients\n        Eigen::VectorXd beta;\n        // prediction\n        Eigen::VectorXd eta, eta_order;\n        double lambda;\n        bool converge;\n        double dev_ratio;\n\n        cox_ridge(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const double& lambda_val, const int& max_iter = 100, const int& max_inner_iter = 30, const double& tolerance = 1e-6, const bool& verbose_obj = false, const Eigen::VectorXd& beta_init = Eigen::VectorXd(), const double& null_deviance = -999);\n        void fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask);\n        void reset(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const double& lambda_val, const Eigen::VectorXd& beta_init = Eigen::VectorXd(), const double& null_deviance = -999);\n        void coxGrad(const survival_data& survivalData);\n        Eigen::VectorXd get_gradient();\n        Eigen::VectorXd get_object_all();\n        Eigen::VectorXd get_deviance_all();\n        double get_object();\n        double get_deviance();\n        double get_null_deviance();\n\n    private:\n        // gradient\n        Eigen::VectorXd _gradient, _diagHessian;\n        int _niter, _mxitnr;\n        double _tol;\n        bool _verbose;\n        // objective value\n        Eigen::VectorXd _object;\n        Eigen::VectorXd _deviance;\n        \n        double _coxDeviance(const survival_data& survivalData);\n        double _coxLoglik(const survival_data& survivalData);\n};\n\n#endif"
  },
  {
    "path": "src/cox_score.cpp",
    "content": "#include \"Regenie.hpp\"\n#include \"survival_data.hpp\"\n#include \"cox_ridge.hpp\"\n#include \"cox_score.hpp\"\n\nusing namespace Eigen;\nusing namespace std;\nusing namespace boost;\n\ncox_mle::cox_mle(){}\n\nvoid cox_mle::setup(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const int& max_iter, const int& max_inner_iter, const double& tolerance, const bool& verbose_obj, const Eigen::VectorXd& beta_init, const Eigen::VectorXd& eta_init) {\n    converge = false;\n    p = Xmat.cols();\n\n\t_niter = max_iter;\n    _mxitnr = max_inner_iter;\n    _tol = tolerance;\n    _verbose = verbose_obj;\n\n    if (beta_init.size() > 0) {\n        beta = beta_init;\n        eta = eta_init;\n    } else {\n        beta = Eigen::VectorXd::Zero(p);\n        eta = mask.select(offset_val, 0).matrix();\n    }\n    eta_order = mask.select(survivalData.permute_mtx * eta, 0).matrix();\n    lambda0.resize(survivalData.n);\n    mu.resize(survivalData.n);\n    Y.resize(survivalData.n);\n    residual.resize(survivalData.n);\n    loglike.resize(_niter + 1);\n}\n\nvoid cox_mle::fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask) {\n    Eigen::VectorXd beta_old;\n    Eigen::VectorXd sqrtWY, XtWY;\n    int ii;\n\tcompute_loglike(survivalData);\n\tloglike(0) = loglik_val;\n    // std::cout << \"start MLE fitting:\\n\";\n    // std::cout << \"beta: \" << beta << \"\\n\";\n    // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n    iter = 0;\n    for (int t = 0; t < _niter; ++t) { \n        ii = 0;\n        beta_old = beta;\n        mu = survivalData.w_orig.array() * lambda0.array() * eta.array().exp();\n        residual = survivalData.w_orig.array() * survivalData.status.array() - mu.array();\n        Y = mask.select(eta - offset_val, 0).matrix() +\n            (mu.array() != 0).select(residual.array()/mu.array(), 0).matrix();\n      \t\n        // update beta\n        if (p == 0) {\n            sqrtWX.resize(survivalData.n, 0);\n            XtWX.resize(0,0);\n            converge = true;\n            break;\n        }\n        \n        ++iter;\n        sqrtWY = Y.array() * (mu.array().sqrt());\n\t\tsqrtWX = Xmat.array().colwise() * (mu.array().sqrt());\n        XtWX = sqrtWX.transpose() * sqrtWX;\n\t\tXtWY = sqrtWX.transpose() * sqrtWY;\n\t\tbeta = XtWX.colPivHouseholderQr().solve(XtWY);\n\t\teta = mask.select(Xmat * beta + offset_val, 0).matrix();\n        eta_order = survivalData.permute_mtx * eta;\n        compute_loglike(survivalData);\n        // std::cout << \"iter: \" << iter << \"\\n\";\n        // std::cout << \"beta: \" << beta << \"\\n\";\n        // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n        // std::cout << \"diff loglik_val: \" << loglik_val - loglike(iter - 1) << \"\\n\";\n\n        if ((loglike(iter - 1) - loglik_val) > _tol) { // step-halving\n\t\t\t// std::cout << \"\\nLoglikelihood decreases at iteration \" << iter << \", start step-halving.\\n\";\n            while ((loglike(iter - 1) - loglik_val) > _tol) {\n                ++ii;\n                if (ii > _mxitnr) {\n                    std::cout << \"Convergence issue, inner loop: cannot correct step size\\n\";\n                    return;\n                    // throw std::runtime_error(\"inner loop: cannot correct step size\");\n                }\n                beta = (beta_old + beta)/2;\n                eta = mask.select(Xmat * beta + offset_val, 0).matrix();\n                eta_order = survivalData.permute_mtx * eta;\n                compute_loglike(survivalData);\n                if (_verbose) {\n                    std::cout << \"beta: \" << beta << \"\\n\";\n                    std::cout << \"Iteration \" << iter << \" Halved, Objective: \" << loglik_val << \"\\n\";\n                }\n            }\n\t\t}\n\t\tloglike(iter) = loglik_val;\n\n        // std::cout << \"beta: \" << beta << \"\\n\";\n        // std::cout << \"loglik_val: \" << loglik_val << \"\\n\";\n        // std::cout << \"first_der_1: \" << first_der_1 << \"\\n\";\n        // std::cout << \"second_der_1: \" << second_der_1 << \"\\n\";\n        // std::cout << \"loglike val change: \" << loglike(iter) - loglike(iter - 1) << \"\\n\";\n        // std::cout << \"beta change max: \" << ((beta.array() - beta_old.array()).abs()/(beta.array().abs() + beta_old.array().abs() + _tol)).maxCoeff() << \"\\n\";\n\n        if( loglike(iter) - loglike(iter - 1) < _tol || (ii <= 1 && ((beta.array() - beta_old.array()).abs()/(beta.array().abs() + beta_old.array().abs() + _tol)).maxCoeff() < _tol )) {\n\t\t\tmu = survivalData.w_orig.array() * lambda0.array() * eta.array().exp();\n            residual = survivalData.w_orig.array() * survivalData.status.array() - mu.array();\n        \tY = mask.select(eta - offset_val, 0).matrix() +\n            \t(mu.array() != 0).select(residual.array()/mu.array(), 0).matrix();\n\t\t\tsqrtWX = Xmat.array().colwise() * mu.array().sqrt();\n            XtWX = sqrtWX.transpose() * sqrtWX;\n            converge = true;\n            break;\n      \t\n        }\n    }\n    // std::cout << \"finish fitting\\n\";\n}\n\nvoid cox_mle::compute_loglike(const survival_data& survivalData) {\n    Eigen::VectorXd w_exp_eta = survivalData.w.array() * eta_order.array().exp();\n\n    Eigen::VectorXd S0 = cumulativeSum_reverse2(survivalData.R.transpose() * w_exp_eta); // length K, risk set sum at each unique failure time\n    double log_terms_sum = (survivalData.ww_k.array() * S0.array().log()).sum();\n\n    loglik_val = (survivalData.w.array() * eta_order.array() * (survivalData.status_order.array() == 1).cast<double>()).sum() - log_terms_sum;\n\n    Eigen::VectorXd ww_rsk = cumulativeSum(survivalData.ww_k.array() / S0.array()); // length K\n    for (unsigned int i = 0; i < survivalData.n; ++i) {\n        if (survivalData.rskcount(i) == 0) {\n            lambda0(i) = 0;\n        } else {\n            lambda0(i) = ww_rsk(int(survivalData.rskcount(i)) - 1);\n        }\n    }\n    lambda0 = survivalData.permute_mtx.transpose() * lambda0;\n}\n\nvoid cox_mle::cox_test_prep(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask) {\n    double eta_mean = eta_order.array().sum()/eta_order.size();\n    w_exp_eta = survivalData.w.array() * (eta_order.array() - eta_mean).exp();\n\n    Eigen::VectorXd rskden = cumulativeSum_reverse2(survivalData.R.transpose() * w_exp_eta); // length K\n\n    Dhalf = (survivalData.ww_k.array().sqrt()) / rskden.array();\n\n    Eigen::MatrixXd Gamma_X = (survivalData.permute_mtx * Xmat).array().colwise() * w_exp_eta.array(); // n by p\n    UhalfX.resize(survivalData.n_unique_time, p);\n\n    Eigen::VectorXd RGammaXr;\n    for (int r = 0; r < p; ++r) {\n        RGammaXr = cumulativeSum_reverse2(survivalData.R.transpose() * Gamma_X.col(r)); // length K\n        UhalfX.col(r) = Dhalf.array() * RGammaXr.array();\n    }\n\n    cov_inv.resize(p, p);\n    if (p > 0) {\n        cov_inv = (XtWX - UhalfX.transpose() * UhalfX).colPivHouseholderQr().inverse();\n    }\n\n    Eigen::MatrixXd X1 = MatrixXd::Zero(survivalData.n, p + 1);\n    X1 << Eigen::VectorXd::Ones(survivalData.n), Xmat;\n    WX1 = X1.array().colwise() * mu.array();\n    Eigen::MatrixXd X1tWX1 = X1.transpose() * WX1;\n    X1_X1WX1inv = X1 * X1tWX1.colPivHouseholderQr().inverse();\n    \n    double res_mean = residual.mean();\n    // Compute the variance\n    res_var = (residual.array() - res_mean).square().sum() / (residual.size() - 1);\n}\n"
  },
  {
    "path": "src/cox_score.hpp",
    "content": "#ifndef COXSCORE_H\n#define COXSCORE_H\n\nclass cox_mle {\n    public:\n        int p;\n        // coefficients\n        Eigen::VectorXd beta;\n        Eigen::VectorXd lambda0;\n        Eigen::VectorXd mu;\n        Eigen::VectorXd residual;\n        Eigen::VectorXd Y;\n        Eigen::MatrixXd XtWX;\n        Eigen::MatrixXd sqrtWX;\n        double loglik_val;\n\t\tEigen::VectorXd loglike;\n        // prediction\n        Eigen::VectorXd eta, eta_order;\n        int iter;\n        bool converge;\n\n        // prepare for test\n        int n_events_dd;\n        Eigen::VectorXd w_exp_eta;\n        Eigen::MatrixXd UhalfX;\n        Eigen::VectorXd Dhalf;\n        Eigen::MatrixXd cov_inv;\n        Eigen::MatrixXd X1_X1WX1inv;\n        Eigen::MatrixXd WX1;\n        double res_var;\n        // Eigen::MatrixXd X_XtVXinv;\n        // Eigen::MatrixXd X1_X1tX1invhalf;\n\n        cox_mle();\n        void setup(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask, const int& max_iter = 100, const int& max_inner_iter = 30, const double& tolerance = 1e-6, const bool& verbose_obj = false, const Eigen::VectorXd& beta_init = Eigen::VectorXd(), const Eigen::VectorXd& eta_init = Eigen::VectorXd());\n        void compute_loglike(const survival_data& survivalData);\n        void fit(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask);\n        void cox_test_prep(const survival_data& survivalData, const Eigen::MatrixXd& Xmat, const Eigen::VectorXd& offset_val, const ArrayXb& mask);\n        \n    private:\n        int _niter, _mxitnr;\n        double _tol;\n        bool _verbose;\n};\n\n#endif"
  },
  {
    "path": "src/survival_data.cpp",
    "content": "#include \"Regenie.hpp\"\n#include \"survival_data.hpp\"\n\nusing namespace Eigen;\nusing namespace std;\n\nsurvival_data::survival_data(){};\n\nvoid survival_data::setup(const Eigen::VectorXd& event_time, const Eigen::VectorXd& event_status, const ArrayXb& mask, const bool& norm_weights) {\n    if (event_time.size() != event_status.size()) { throw std::runtime_error(\"event_time and event_status should have same length.\"); }\n    status = mask.select(event_status, -999).matrix();\n    n = event_time.size();\n    neff = (mask == true).count();\n\n    // order event times\n    _getOrder(event_time, status);\n    Eigen::VectorXd time_order = permute_mtx * mask.select(event_time, -999).matrix(); // missing or masked samples are in the front\n    status_order = permute_mtx * status;\n    Eigen::VectorXd keep_sample_double = mask.cast<double>();\n    keep_sample_order = ((permute_mtx * keep_sample_double).array() != 0).cast<bool>();\n\n    w = Eigen::VectorXd::Ones(n);\n    if (norm_weights) w /= neff;\n    w_orig = mask.select(w, 0);\n    w = keep_sample_order.select(w, 0);\n\n    // find ties\n    // time with event\n    n_events = (status_order.array() == 1).count();\n\n    time_order_event.resize(n_events);\n    Eigen::VectorXi time_order_event_index(n_events);\n    int idx = 0;\n    for (unsigned int i = 0; i < n; ++i) {\n        if (status_order(i) == 1) {\n            time_order_event(idx) = time_order(i);\n            time_order_event_index(idx) = i;\n            ++idx;\n        }\n    }\n    _findTies(time_order_event, time_order_event_index);\n\n    dd = keep_sample_order.select(status_order, 0);\n    ww = w;\n    double wsum;\n    for (const auto& entry: ties_index) {\n        const Eigen::VectorXi& ties = entry.second;\n        if (ties.size() > 1) {\n            for (int i = 0; i < ties.size(); i++) {\n                dd(ties(i)) = 0;\n                ww(ties(i)) = 0;\n            }\n            dd(ties(0)) = 1;\n\n            if (norm_weights) {\n                wsum = static_cast<double>(ties.size())/neff;\n            } else {\n                wsum = static_cast<double>(ties.size());\n            }\n            ww(ties(0)) = wsum;\n        }\n    }\n    rskcount = cumulativeSum(dd);\n\n\t// R matrix\n\tn_unique_time = time_first_index.size();\n\tR.resize(n, n_unique_time);\n    // R = Eigen::MatrixXd::Zero(n_unique_time, n);\n\tfor (unsigned int k = 0; k < n_unique_time; ++k) {\n\t\tif (k < n_unique_time - 1) {\n\t\t\tfor (unsigned int i = time_first_index[k]; i < time_first_index[k+1]; ++i){\n\t\t\t\tR.insert(i, k) = 1;\n                // R(k,i) = 1;\n\t\t\t}\n\t\t} else {\n\t\t\tfor (unsigned int i = time_first_index[k]; i < n; ++i){\n\t\t\t\tR.insert(i, k) = 1;\n                // R(k,i) = 1;\n\t\t\t}\n\t\t}\n\t}\n    R.makeCompressed();\n\t\n\tww_k.resize(n_unique_time);\n\tint idx_t = 0;\n\tfor(unsigned int i = 0; i < n; i++) {\n    \tif(dd(i) == 1) {\n\t\t\tww_k(idx_t) = ww(i);\n\t\t\tidx_t += 1;\n    \t}\n\t}\n}\n\nvoid survival_data::_getOrder(const Eigen::VectorXd& time, const Eigen::VectorXd& status) {\n    // order = Eigen::seq(0, n - 1);\n    Eigen::VectorXi order(n);\n    for (unsigned int i = 0; i < n; ++i) {\n        order(i) = i;\n    }\n    \n    std::sort(order.data(), order.data() + n, [&](int i, int j) {\n        if (time(i) < time(j)) {\n            return true;\n        } else if (time(i) > time(j)) {\n            return false;\n        } else {\n            return status(i) > status(j);\n        }\n    });\n\n    permute_mtx.resize(n, n);\n    permute_mtx.reserve(VectorXi::Constant(n,1));\n    for (unsigned int i = 0; i < n; ++i) {\n        permute_mtx.insert(i, order(i)) = 1.0;\n    }\n    permute_mtx.makeCompressed();\n}\n\nvoid survival_data::_findTies(const Eigen::VectorXd& x_sorted, const Eigen::VectorXi& index) {\n    int n_times = x_sorted.size();\n    // indices for each unique time\n    for (int i = 0; i < n_times; ++i) {\n        unique_time_indices[x_sorted(i)].push_back(index(i));\n    }\n\n    for (const auto& entry: unique_time_indices) {\n        double time = entry.first;\n        const vector<int>& indices = entry.second;\n\n        time_first_index.conservativeResize(time_first_index.size() + 1);\n        time_first_index(time_first_index.size() - 1) = indices[0];\n\n        if (indices.size() > 1) {\n            Eigen::VectorXi ties(indices.size());\n            for (unsigned int i = 0; i < indices.size(); ++i) {\n                ties(i) = indices[i];\n            }\n            ties_index[time] = ties;\n        }\n    }\n}\n\nEigen::VectorXd cumulativeSum(const Eigen::VectorXd& x) {\n    int n = x.size();\n    Eigen::VectorXd x_cumsum(n);\n    x_cumsum(0) = x(0);\n    for (int i = 1; i < n; ++i) {\n        x_cumsum(i) = x_cumsum(i - 1) + x(i);\n    }\n    return x_cumsum;\n}\n\nEigen::VectorXd cumulativeSum_reverse2(const Eigen::VectorXd& x) {\n    int n = x.size();\n    Eigen::VectorXd x_cumsum(n);\n    x_cumsum(n-1) = x(n-1);\n    for (int i = n-2; i >= 0; --i) {\n        x_cumsum(i) = x_cumsum(i + 1) + x(i);\n    }\n    return x_cumsum;\n}"
  },
  {
    "path": "src/survival_data.hpp",
    "content": "#ifndef SURDATA_H\n#define SURDATA_H\n\n#include <iostream>\n#include \"Eigen/Dense\"\n#include \"Eigen/Core\"\n#include \"Eigen/Sparse\"\n#include <algorithm>\n#include <cmath>\n#include <map>\n#include <vector>\n\nclass survival_data {\n    public:\n        Eigen::VectorXd status;\n        ArrayXb keep_sample_order;\n\n        // number of samples and variants\n        unsigned int n, neff;\n        // number of event\n        unsigned int n_events;\n        unsigned int n_unique_time;\n        // time order\n        SpMat permute_mtx;\n        // ordered survival data;\n        Eigen::VectorXd status_order;\n        Eigen::VectorXd time_order_event;\n        // tie\n        std::map<double, std::vector<int>> unique_time_indices;\n        Eigen::VectorXd time_first_index;\n        std::map<double, Eigen::VectorXi> ties_index;\n        // weights\n        Eigen::VectorXd w, w_orig;\n        Eigen::VectorXd ww, ww_k;\n        Eigen::VectorXd dd;\n        // Eigen::SparseMatrix<int, Eigen::RowMajor> R;\n        SpMat R; // n by K\n        // Eigen::MatrixXd R;\n        Eigen::VectorXd rskcount;\n        \n        survival_data();\n        void setup(const Eigen::VectorXd& event_time, const Eigen::VectorXd& event_status, const ArrayXb& mask, const bool& norm_weights = false);\n    \n    private:\n        void _getOrder(const Eigen::VectorXd&, const Eigen::VectorXd&);\n        void _findTies(const Eigen::VectorXd&, const Eigen::VectorXi&);\n};\n\nEigen::VectorXd cumulativeSum(const Eigen::VectorXd& x);\nEigen::VectorXd cumulativeSum_reverse2(const Eigen::VectorXd& x);\n\n#endif"
  },
  {
    "path": "test/check_na.sh",
    "content": "#!/usr/bin/env bash\n\n### REGENIE TEST SCRIPT \n## For version<1.0.5.6, will get error if WITH_GZ is set since option '--gz' did not exist\n\ninfo_msg=\"Usage: ./test_bash.sh OPTIONS\\n\"\ninfo_msg+=\"  --path  path to Regenie repository\\n\"\ninfo_msg+=\"  --gz    Flag to specify compilation was done with Boost Iostream library\\n\"\nif [ \"$#\" -eq 0 ]; then\n  echo -e \"$info_msg\"; exit 1\nfi\n\n# Force only Y1 to be analyzed \n# (single trait runs should produce identical results \n#   removing NA rows of trait from phenotype file)\nphenoColArg=\"--phenoCol Y1\"\nbtArg=\"--bt\"\n\nwhile [[ \"$#\" -gt 0 ]]; do\n  case $1 in\n    --path) REGENIE_PATH=\"$2\"; shift ;;\n    --gz) WITH_GZ=1 ;;\n    --all) phenoColArg= ;;\n    --qt) btArg= ;;\n    *) echo -e \"Unknown parameter passed: $1.\\n$info_msg\"; exit 1 ;;\n  esac\n  shift\ndone\n\n\n# quick check src/example folders are present\nif [ ! -d \"${REGENIE_PATH}/src\" ] || [ ! -d \"${REGENIE_PATH}/example\" ]; then\n  echo \"ERROR: First input argument must be the directory where Regenie repo was cloned\"; exit 1\nelse\n  cd $REGENIE_PATH\nfi \n\n# If compiling was done with Boost Iostream library, use gzipped files as input\nif [ \"$WITH_GZ\" = \"1\" ]; then\n  fsuf=.gz\n  arg_gz=\"--gz\"\nfi\n\nREGENIE_PATH=$(pwd)/  # use absolute path\nregenie_bin=`ls regenie* | head -n 1`\nhelp_msg=\"Update to most recent REGENIE version (using 'git pull') and re-compile the software (using 'make clean && make').\"\n\nif [ ! -f \"$regenie_bin\" ]; then\n  echo \"ERROR: Regenie binary cannot be found. Compile the software first using 'make clean && make'\"; exit 1\nfi\n\n\n\n# Run regenie on trait Y1 (missing will be dropped automatically)\necho -e \"Regenie on Y1\\n==================================\"\nrgcmd=\"--step 1 \\\n  --bed example/example \\\n  --covarFile example/covariates.txt${fsuf} \\\n  --phenoFile example/phenotype_bin_wNA.txt \\\n  $phenoColArg \\\n  --bsize 100 \\\n  $btArg \\\n  $arg_gz \\\n  --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out test/fit_bin_out\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n# step 2\nrgcmd=\"--step 2 \\\n  --bed example/example_3chr \\\n  --covarFile example/covariates.txt \\\n  --phenoFile example/phenotype_bin_wNA.txt \\\n  $phenoColArg \\\n  --bsize 200 \\\n  $btArg \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred test/fit_bin_out_pred.list \\\n  --out test/test_bin_out_firth\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n\n###\n# Drop missing from the phenotype file for Y1\n###\necho -e \"Regenie on Y1 dropping missing from phenotype file\\n==================================================\"\necho \"Making new phenotype/covariate files dropping NAs (test/test_bin_noNA*.txt)...\"\ngrep -v \"NA\" example/phenotype_bin_wNA.txt > test/test_bin_noNA.txt\ngrep -wFf <(cut -f1,2 -d ' ' test/test_bin_noNA.txt) example/covariates.txt > test/test_bin_noNA_covs.txt\necho \"Comparing original files to that dropping individuals NAs\"\nif cmp --silent test/test_bin_noNA.txt example/phenotype_bin_wNA.txt && \\\n cmp --silent test/test_bin_noNA_covs.txt example/covariates.txt ; then\n  echo \"Uh oh, files are the same!\"; exit 1\nelse\n  echo -e \"Files are different; proceeding forward...\\n\"\nfi\n\nrgcmd=\"--step 1 \\\n  --bed example/example \\\n  --covarFile test/test_bin_noNA_covs.txt \\\n  --phenoFile test/test_bin_noNA.txt \\\n  $phenoColArg \\\n  --bsize 100 \\\n  $btArg \\\n  $arg_gz \\\n  --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out test/fit_bin_out_noNA\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n# step 2\nrgcmd=\"--step 2 \\\n  --bed example/example_3chr \\\n  --covarFile test/test_bin_noNA_covs.txt \\\n  --phenoFile test/test_bin_noNA.txt \\\n  $phenoColArg \\\n  --bsize 200 \\\n  $btArg \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred test/fit_bin_out_noNA_pred.list \\\n  --out test/test_bin_out_firth_noNA\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n# check files are not empty\nif [ \"`cat test/test_bin_out_firth.regenie | wc -l`\" -le 10 ]; then\n  echo \"Uh oh, result file is empty!\"; exit 1\nelif cmp --silent test/test_bin_out_firth.regenie test/test_bin_out_firth_noNA.regenie ; then\n  echo \"Result files are the same\"\nelse\n  echo \"Uh oh, result files are different\"\nfi\n\n# cleanup\nrm test/test_bin_* test/fit_bin_out*\n"
  },
  {
    "path": "test/test_bash.sh",
    "content": "#!/usr/bin/env bash\n\n### REGENIE TEST SCRIPT \n# Functions used\nhelp_msg=\"Update to most recent REGENIE version (using 'git pull') and re-compile the software.\"\nfail_msg=\"Step 1 of REGENIE did not finish successfully.\"\nerr_msg=\"Uh oh, REGENIE did not build successfully. $help_msg\"\nprint_err () { \n  echo \"$err_msg\"; exit 1 \n}\nprint_simple_err () {\n  echo \"ERROR: ${1}\"; exit 1 \n}\nprint_custom_err () {\n  echo \"ERROR: ${1} $help_msg\"; exit 1 \n}\n\n\n### READ OPTIONS\ninfo_msg='\nUsage: ./test_bash.sh OPTIONS\n          --path  path to Regenie repository\n'\nREGENIE_PATH=$(pwd) # assume current wd\n\nwhile [[ \"$#\" -gt 0 ]]; do\n  case $1 in\n    --path) REGENIE_PATH=\"$2\"; shift ;;\n    -h|--help) echo \"$info_msg\"; exit 1;;\n    *) echo -e \"Unknown parameter passed: $1.\\n$info_msg\"; exit 1 ;;\n  esac\n  shift\ndone\n\n\n# quick check src/example folders are present\nif [ ! -d \"${REGENIE_PATH}/src\" ] || [ ! -d \"${REGENIE_PATH}/example\" ]; then\n  print_simple_err \"First input argument must be the directory where Regenie repo was cloned.\"\nelse\n  cd $REGENIE_PATH\nfi \n\nREGENIE_PATH=$(pwd)/  # use absolute path\nmntpt=\nregenie_bin=`ls regenie* | head -n 1`\n\nif [ ! -f \"$regenie_bin\" ]; then\n  print_simple_err \"Regenie binary cannot be found. Specify the Regenie source path (--path) or compile the software first.\"\nfi\n\n# If compiling was done with Boost Iostream library, use gzipped files as input\nif ./$regenie_bin --version | grep -q \"gz\"; then\n  fsuf=.gz\n  arg_gz=\"--gz\"\nfi\n\n\necho -e \"==>Running step 1 of REGENIE\"\n# Prepare regenie command to run for Step 1\n## with transposed phenotype file format\n#  --tpheno-file ${mntpt}example/tphenotype_bin.txt${fsuf} \\\n#  --tpheno-indexCol 4 \\\n#  --tpheno-ignoreCols {1:3} \\\nbasecmd=\"--step 1 \\\n  --bed ${mntpt}example/example \\\n  --exclude ${mntpt}example/snplist_rm.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 100 \\\n  --bt $arg_gz\"\n\nrgcmd=\"$basecmd \\\n  --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out ${mntpt}test/fit_bin_out\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n## quick check that the correct files have been created\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_out.log\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_pred.list\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif [ \"`grep \\\"0.4504\\\" ${REGENIE_PATH}test/fit_bin_out.log | grep \\\"min value\\\"`\" = \"\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n#### Run step 1 splitting across jobs for level 0\nnjobs=4\necho -e \"==>Re-running step 1 splitting in $njobs jobs\"\n# pt1 - run regenie before l0\nrgcmd=\"$basecmd \\\n  --split-l0 ${mntpt}test/fit_bin_parallel,$njobs \\\n  --out ${mntpt}test/fit_bin_l0\"\n\n./$regenie_bin $rgcmd\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel.master\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n# pt2 - run regenie for l0\nnj=`seq 1 $njobs`\nfor job in $nj; do\n  rgcmd=\"$basecmd \\\n    --run-l0 ${mntpt}test/fit_bin_parallel.master,$job \\\n    --out ${mntpt}test/fit_bin_l0\"\n\n  ./$regenie_bin $rgcmd\n  if [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel_job${job}_l0_Y1\" ]; then\n    print_custom_err \"$fail_msg\"\n  fi\ndone\n\n\n# pt3 - run regenie for l1\nrgcmd=\"$basecmd \\\n  --run-l1 ${mntpt}test/fit_bin_parallel.master \\\n  --out ${mntpt}test/fit_bin_l1\"\n\n./$regenie_bin $rgcmd\n\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif ! cmp --silent \\\n  \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nelif ! cmp --silent \\\n  \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_2.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nfi\n\n\n\n##########\n##########\n#### Step 2\ni=1\necho -e \"==>Running step 2 of REGENIE; test #$i\"\nrgcmd=\"--step 2 \\\n  --bgen ${mntpt}example/example.bgen \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 200 \\\n  --bt \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred ${mntpt}test/fit_bin_out_pred.list \\\n  $arg_gz \\\n  --out ${mntpt}test/test_bin_out_firth\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n##  do this way so zcat works on OSX\nif [ -f ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ]; then\n  ( zcat < ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ) > ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie\nfi\n\nif [ \"`cat ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie | wc -l`\" != \"1001\" ]\nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --force-qt \\\n  --ignore-pred \\\n  --covarColList V1,V5 \\\n  --catCovarList V5 \\\n  --interaction V5 \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\nif [ `grep \"^1 1 .*ADD-INT\" ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie | wc -l` != 5 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example --ref-first \\\n  --covarFile ${mntpt}example/covariates.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --ignore-pred \\\n  --force-qt \\\n  --interaction-snp 1 \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\nrgcmd+=\"2 --interaction-file bgen,example/example.bgen --interaction-file-reffirst\"\n# run regenie\n./$regenie_bin $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie \\\n  ${REGENIE_PATH}test/test_bin_out_inter2_Y1.regenie \nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nbasecmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr \\\n  --ref-first \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --covarColList V{1:2},V4 \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoColList Y2 \\\n  --bsize 100 \\\n  --test dominant \\\n  --force-qt \\\n  --ignore-pred\"\nrgcmd=\"$basecmd \\\n  --chrList 2,3 \\\n  --write-samples \\\n  --print-pheno \\\n  --out ${mntpt}test/test_out\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\n# check files\nif [ ! -f \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" -o -f \"${REGENIE_PATH}test/test_out_Y1.regenie.ids\" ]\nthen\n  print_err\nelif (( $(head -n 1 ${REGENIE_PATH}test/test_out_Y2.regenie.ids | cut -f1) != \"Y2\" )); then\n  print_err\nelif (( $(head -n 1 \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" | tr '\\t' '\\n' | wc -l) != 2 )); then\n  print_err\nelif (( `grep \"mog_\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif (( `grep \"ADD\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif [ \"`cut -d ' ' -f1-5 ${REGENIE_PATH}test/test_out_Y2.regenie | sed '2q;d'`\" != \"`grep \\\"^2\\\" ${REGENIE_PATH}example/example_3chr.bim | head -n 1 | awk '{print $1,$4,$2,$5,$6}'`\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nrgcmd=\"$basecmd \\\n  --force-qt \\\n  --catCovarList V4 \\\n  --extract ${mntpt}test/test_out.snplist \\\n  --out ${mntpt}test/test_out_extract\"\n\nawk '{if($1!=1) {print $2}}'  ${REGENIE_PATH}example/example_3chr.bim > ${REGENIE_PATH}test/test_out.snplist\n\n# run regenie\n./$regenie_bin $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_out_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_extract_Y2.regenie \nthen\n  print_err\nelif (( `grep \"n_cov = 3\" \"${REGENIE_PATH}test/test_out_extract.log\" | wc -l` != 1 )); then\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# First command (V1)\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr_masks \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 10 \\\n  --ignore-pred \\\n  --force-qt \\\n  --htp TEST \\\n  --out ${mntpt}test/test_out_masks_V1\"\n# run regenie\n./$regenie_bin $rgcmd\n\n# Second command (V2)\n# build masks\nawk '{print $4}' ${mntpt}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --write-mask \\\n  --write-setlist ${mntpt}example/example_3chr.write_sets \\\n  --force-qt \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --chrList 1,3 \\\n  --htp TEST \\\n  --write-mask-snplist \\\n  --out ${mntpt}test/test_out_masks_V2\"\n\n# run regenie\n./$regenie_bin $rgcmd \n\nhead ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie -n 3 | tail -n 2 | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp1.txt\ntail -n 1 ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie | cut --complement -f4,5 >> ${REGENIE_PATH}test/tmp1.txt\ncat ${REGENIE_PATH}test/test_out_masks_V1_Y1.regenie | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp2.txt\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/tmp1.txt \\\n  ${REGENIE_PATH}test/tmp2.txt ; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.bed ]; then\n  print_err\nelif [ \"$(hexdump -e \\\"%07_ax\\ \\\"\\ 16/1\\ \\\"\\ %02x\\\"\\ \\\"\\\\n\\\"  -n 3 ${REGENIE_PATH}test/test_out_masks_V2_masks.bed | head -n 1 | awk '{print $2,$3,$4}' | tr ' ' ',')\" != \"6c,1b,01\" ]; then\n  print_err\nelif [ \"`wc -l ${REGENIE_PATH}test/test_out_masks_V2_masks.{bim,fam} | awk '{print $1}' | head -n 2| paste -sd','`\" != \"4,494\" ]; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.snplist ]; then\n  print_err\nelif [ \"`cat ${REGENIE_PATH}test/test_out_masks_V2_tmp2.setlist | head -n 1 | tr ',' '\\n' | wc -l`\" != \"2\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks\nawk '{print $4}' ${mntpt}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --mask-lovo SET1,M1,0.2 \\\n  --force-qt \\\n  --htp TEST \\\n  --out ${mntpt}test/test_out_masks_loo\"\n\n# run regenie\n./$regenie_bin $rgcmd \n\nif [ ! -f ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie ]; then\n  print_err\nelif [ `cat ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 21 ]; then\n  print_err\nelif [ `grep \"_mog\" ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 18 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks using set domains\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotationsV2 \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --check-burden-files \\\n  --force-qt \\\n  --bsize 20 \\\n  --aaf-bins 0.2 \\\n  --out ${mntpt}test/test_out_masks_V3\"\n\n# run regenie\n./$regenie_bin $rgcmd \n\nif ! [[ \"`head -n 1 ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie`\" =~ ^\\#\\#MASKS.* ]]\nthen\n  print_err\nelif [ `grep \"SET2.*.M1\" ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie | wc -l` != \"4\" ]\nthen\n  print_err\nelif [ `grep -e \"->Detected 1\" ${REGENIE_PATH}test/test_out_masks_V3_masks_report.txt | wc -l` != \"4\" ]\nthen\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# conditional analyses\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --sex-specific female \\\n  --out ${mntpt}test/test_out_cond\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --condition-file pgen,${mntpt}example/example \\\n  --out ${mntpt}test/test_out_cond2\"\n\n# run regenie\n./$regenie_bin $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_out_cond_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_cond2_Y2.regenie \nthen\n  print_err\nelif [ `grep \"n_used = 6\" ${REGENIE_PATH}test/test_out_cond*log | wc -l` != \"2\" ]; then\n  print_err\nfi\n\n\n# with skat\n(( i++ ))\necho -e \"==>Running test #$i\"\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoCol Y1 \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --vc-tests skat \\\n  --force-qt \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --write-mask-snplist \\\n  --out ${mntpt}test/test_out_vc\"\n\n# run regenie\n./$regenie_bin $rgcmd \n\nif ! grep -q \"all.*SKAT\" ${REGENIE_PATH}test/test_out_vc_Y1.regenie  \nthen\n  print_err\nfi\n\n##############\necho \"SUCCESS: REGENIE build passed the tests!\"\n# file cleanup\nrm -f ${REGENIE_PATH}test/fit_bin_* ${REGENIE_PATH}test/test_bin_out* ${REGENIE_PATH}test/test_out* ${REGENIE_PATH}test/tmp[12].txt\n\n"
  },
  {
    "path": "test/test_conda.sh",
    "content": "#!/usr/bin/env bash\n\n### REGENIE TEST SCRIPT \n# Functions used\nhelp_msg=\"Rebuild the regenie package using conda install --force-install (or consider compiling from source)\"\nfail_msg=\"Step 1 of REGENIE did not finish successfully.\"\nerr_msg=\"Uh oh, REGENIE did not build successfully. $help_msg\"\nprint_err () { \n  echo \"$err_msg\"; exit 1 \n}\nprint_simple_err () {\n  echo \"ERROR: ${1}\"; exit 1 \n}\nprint_custom_err () {\n  echo \"ERROR: ${1} $help_msg\"; exit 1 \n}\n\n\n### READ OPTIONS\ninfo_msg='\nUsage: ./test_bash.sh OPTIONS\n   --path  path to Regenie repository\n   '\nREGENIE_PATH=$(pwd)  # Assume current directory\n\nwhile [[ \"$#\" -gt 0 ]]; do\n  case $1 in\n    --path) REGENIE_PATH=\"$2\"; shift ;;\n    -h|--help) echo \"$info_msg\"; exit 1;;\n    *) echo -e \"Unknown parameter passed: '$1'\\n$info_msg\"; exit 1;;\n  esac\n  shift\ndone\n\n\n# quick check src/example folders are present\nif [ ! -d \"${REGENIE_PATH}/src\" ] || [ ! -d \"${REGENIE_PATH}/example\" ]; then\n  print_simple_err \"cannot find the REGENIE source directory; see help menu using -h\"\nelse\n  cd $REGENIE_PATH\nfi \nREGENIE_PATH=$(pwd)/  # use absolute path\nmntpt=\n\nif ! command -v regenie &> /dev/null\nthen\n  print_simple_err \"Regenie binary cannot be found.\"\nfi\n\n# If compiling was done with Boost Iostream library, use gzipped files as input\nif regenie --version | grep -q \"gz\"; then\n  fsuf=.gz\n  arg_gz=\"--gz\"\nfi\n\necho -e \"==>Running step 1 of REGENIE\"\n# Prepare regenie command to run for Step 1\n## with transposed phenotype file format\n#  --tpheno-file ${mntpt}example/tphenotype_bin.txt${fsuf} \\\n#  --tpheno-indexCol 4 \\\n#  --tpheno-ignoreCols {1:3} \\\nbasecmd=\"--step 1 \\\n  --bed ${mntpt}example/example \\\n  --exclude ${mntpt}example/snplist_rm.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 100 \\\n  --bt $arg_gz\"\n\nrgcmd=\"$basecmd \\\n  --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out ${mntpt}test/fit_bin_out\"\n\n# run regenie\nregenie $rgcmd\n\n## quick check that the correct files have been created\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_out.log\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_pred.list\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif [ \"`grep \\\"0.4504\\\" ${REGENIE_PATH}test/fit_bin_out.log | grep \\\"min value\\\"`\" = \"\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n#### Run step 1 splitting across jobs for level 0\nnjobs=4\necho -e \"==>Re-running step 1 splitting in $njobs jobs\"\n# pt1 - run regenie before l0\nrgcmd=\"$basecmd \\\n  --split-l0 ${mntpt}test/fit_bin_parallel,$njobs \\\n  --out ${mntpt}test/fit_bin_l0\"\n\nregenie $rgcmd\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel.master\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n# pt2 - run regenie for l0\nnj=`seq 1 $njobs`\nfor job in $nj; do\n  rgcmd=\"$basecmd \\\n    --run-l0 ${mntpt}test/fit_bin_parallel.master,$job \\\n    --out ${mntpt}test/fit_bin_l0\"\n\n  regenie $rgcmd\n  if [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel_job${job}_l0_Y1\" ]; then\n    print_custom_err \"$fail_msg\"\n  fi\ndone\n\n\n# pt3 - run regenie for l1\nrgcmd=\"$basecmd \\\n  --run-l1 ${mntpt}test/fit_bin_parallel.master \\\n  --out ${mntpt}test/fit_bin_l1\"\n\nregenie $rgcmd\n\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif ! cmp --silent \\\n  \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nelif ! cmp --silent \\\n  \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_2.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nfi\n\n\n\n##########\n##########\n#### Step 2\ni=1\necho -e \"==>Running step 2 of REGENIE; test #$i\"\nrgcmd=\"--step 2 \\\n  --bgen ${mntpt}example/example.bgen \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 200 \\\n  --bt \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred ${mntpt}test/fit_bin_out_pred.list \\\n  $arg_gz \\\n  --out ${mntpt}test/test_bin_out_firth\"\n\n# run regenie\nregenie $rgcmd\n\n##  do this way so zcat works on OSX\nif [ -f ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ]; then\n  ( zcat < ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ) > ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie\nfi\n\nif [ \"`cat ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie | wc -l`\" != \"1001\" ]\nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --ignore-pred \\\n  --covarColList V1,V5 \\\n  --catCovarList V5 \\\n  --interaction V5 \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\nregenie $rgcmd\n\nif [ `grep \"^1 1 .*ADD-INT\" ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie | wc -l` != 5 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example --ref-first \\\n  --covarFile ${mntpt}example/covariates.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --ignore-pred \\\n  --interaction-snp 1 \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\nregenie $rgcmd\n\nrgcmd+=\"2 --interaction-file bgen,example/example.bgen --interaction-file-reffirst\"\n# run regenie\nregenie $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie \\\n  ${REGENIE_PATH}test/test_bin_out_inter2_Y1.regenie \nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nbasecmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr \\\n  --ref-first \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --covarColList V{1:2},V4 \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoColList Y2 \\\n  --bsize 100 \\\n  --test dominant \\\n  --ignore-pred\"\nrgcmd=\"$basecmd \\\n  --chrList 2,3 \\\n  --write-samples \\\n  --print-pheno \\\n  --out ${mntpt}test/test_out\"\n\n# run regenie\nregenie $rgcmd\n\n# check files\nif [ ! -f \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" -o -f \"${REGENIE_PATH}test/test_out_Y1.regenie.ids\" ]\nthen\n  print_err\nelif (( $(head -n 1 ${REGENIE_PATH}test/test_out_Y2.regenie.ids | cut -f1) != \"Y2\" )); then\n  print_err\nelif (( $(head -n 1 \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" | tr '\\t' '\\n' | wc -l) != 2 )); then\n  print_err\nelif (( `grep \"mog_\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif (( `grep \"ADD\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif [ \"`cut -d ' ' -f1-5 ${REGENIE_PATH}test/test_out_Y2.regenie | sed '2q;d'`\" != \"`grep \\\"^2\\\" ${REGENIE_PATH}example/example_3chr.bim | head -n 1 | awk '{print $1,$4,$2,$5,$6}'`\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nrgcmd=\"$basecmd \\\n  --catCovarList V4 \\\n  --extract ${mntpt}test/test_out.snplist \\\n  --out ${mntpt}test/test_out_extract\"\n\nawk '{if($1!=1) {print $2}}'  ${REGENIE_PATH}example/example_3chr.bim > ${REGENIE_PATH}test/test_out.snplist\n\n# run regenie\nregenie $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_out_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_extract_Y2.regenie \nthen\n  print_err\nelif (( `grep \"n_cov = 3\" \"${REGENIE_PATH}test/test_out_extract.log\" | wc -l` != 1 )); then\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# First command (V1)\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr_masks \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 10 \\\n  --ignore-pred \\\n  --htp TEST \\\n  --out ${mntpt}test/test_out_masks_V1\"\n# run regenie\nregenie $rgcmd\n\n# Second command (V2)\n# build masks\nawk '{print $4}' ${mntpt}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --write-mask \\\n  --write-setlist ${mntpt}example/example_3chr.write_sets \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --chrList 1,3 \\\n  --htp TEST \\\n  --write-mask-snplist \\\n  --out ${mntpt}test/test_out_masks_V2\"\n\n# run regenie\nregenie $rgcmd \n\nhead ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie -n 3 | tail -n 2 | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp1.txt\ntail -n 1 ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie | cut --complement -f4,5 >> ${REGENIE_PATH}test/tmp1.txt\ncat ${REGENIE_PATH}test/test_out_masks_V1_Y1.regenie | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp2.txt\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/tmp1.txt \\\n  ${REGENIE_PATH}test/tmp2.txt ; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.bed ]; then\n  print_err\nelif [ \"$(hexdump -e \\\"%07_ax\\ \\\"\\ 16/1\\ \\\"\\ %02x\\\"\\ \\\"\\\\n\\\"  -n 3 ${REGENIE_PATH}test/test_out_masks_V2_masks.bed | head -n 1 | awk '{print $2,$3,$4}' | tr ' ' ',')\" != \"6c,1b,01\" ]; then\n  print_err\nelif [ \"`wc -l ${REGENIE_PATH}test/test_out_masks_V2_masks.{bim,fam} | awk '{print $1}' | head -n 2| paste -sd','`\" != \"4,494\" ]; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.snplist ]; then\n  print_err\nelif [ \"`cat ${REGENIE_PATH}test/test_out_masks_V2_tmp2.setlist | head -n 1 | tr ',' '\\n' | wc -l`\" != \"2\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks\nawk '{print $4}' ${mntpt}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --mask-lovo SET1,M1,0.2 \\\n  --htp TEST \\\n  --out ${mntpt}test/test_out_masks_loo\"\n\n# run regenie\nregenie $rgcmd \n\nif [ ! -f ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie ]; then\n  print_err\nelif [ `cat ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 21 ]; then\n  print_err\nelif [ `grep \"_mog\" ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 18 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks using set domains\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotationsV2 \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --check-burden-files \\\n  --bsize 20 \\\n  --aaf-bins 0.2 \\\n  --out ${mntpt}test/test_out_masks_V3\"\n\n# run regenie\nregenie $rgcmd \n\nif ! [[ \"`head -n 1 ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie`\" =~ ^\\#\\#MASKS.* ]]\nthen\n  print_err\nelif [ `grep \"SET2.*.M1\" ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie | wc -l` != \"4\" ]\nthen\n  print_err\nelif [ `grep -e \"->Detected 1\" ${REGENIE_PATH}test/test_out_masks_V3_masks_report.txt | wc -l` != \"4\" ]\nthen\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# conditional analyses\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --sex-specific female \\\n  --out ${mntpt}test/test_out_cond\"\n\n# run regenie\nregenie $rgcmd\n\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --condition-file pgen,${mntpt}example/example \\\n  --out ${mntpt}test/test_out_cond2\"\n\n# run regenie\nregenie $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_out_cond_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_cond2_Y2.regenie \nthen\n  print_err\nelif [ `grep \"n_used = 6\" ${REGENIE_PATH}test/test_out_cond*log | wc -l` != \"2\" ]; then\n  print_err\nfi\n\n\n# with skat\n(( i++ ))\necho -e \"==>Running test #$i\"\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoCol Y1 \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --vc-tests skat \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --write-mask-snplist \\\n  --out ${mntpt}test/test_out_vc\"\n\n# run regenie\nregenie $rgcmd \n\nif ! grep -q \"all.*SKAT\" ${REGENIE_PATH}test/test_out_vc_Y1.regenie  \nthen\n  print_err\nfi\n\n##############\necho \"SUCCESS: REGENIE build passed the tests!\"\n# file cleanup\nrm -f ${REGENIE_PATH}test/fit_bin_* ${REGENIE_PATH}test/test_bin_out* ${REGENIE_PATH}test/test_out* ${REGENIE_PATH}test/tmp[12].txt\n\n"
  },
  {
    "path": "test/test_docker.sh",
    "content": "#!/usr/bin/env bash\n\n### REGENIE TEST SCRIPT TO USE WITH DOCKER IMAGE ###\n# Functions used\nhelp_msg=\"Check the docker image and re-build if needed.\"\nfail_msg=\"Step 1 of REGENIE did not finish successfully.\"\nerr_msg=\"Docker image did not build successfully.\"\nprint_err () { \n  echo \"$err_msg\"; exit 1 \n}\nprint_simple_err () {\n  echo \"ERROR: ${1}\"; exit 1 \n}\nprint_custom_err () {\n  echo \"ERROR: ${1} $help_msg\"; exit 1 \n}\n\n### READ OPTIONS\nif [ \"$#\" -eq 0 ]; then\n  print_simple_err \"Need to specify options. Usage: test_docker.sh <PATH_TO_CLONED_REGENIE_REPO> <DOCKER_IMAGE_TAG>\"\nfi\n\nREGENIE_PATH=\"$1\" \nDOCKER_IMAGE=$2\n\n# quick check src/example folders are present\nif [ ! -d \"${REGENIE_PATH}/src\" ] || [ ! -d \"${REGENIE_PATH}/example\" ]; then\n  print_simple_err \"First input argument must be the directory where Regenie repo was cloned\"\nfi \ncd $REGENIE_PATH\n\n# check docker image\nif [ -z $DOCKER_IMAGE ]; then\n  print_simple_err \"Need to pass docker image tag.\"\nelif [[ \"$(docker images -q $DOCKER_IMAGE 2> /dev/null)\" == \"\" ]]; then\n  print_simple_err \"Image with tag \\\"${DOCKER_IMAGE}\\\" does not exist!\"\nfi\n\n# Create test folder to store results and use as mounting point\nREGENIE_PATH=$(pwd)/  # use absolute path\n# where to mount in container\nmntpt=/docker/ \n\n# If compiling was done with Boost Iostream library, use gzipped files as input\nif docker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie --version | grep -q \"gz\"; then\n  fsuf=.gz\n  arg_gz=\"--gz\"\nfi\n\necho \"** Checking docker image \\\"${DOCKER_IMAGE}\\\" **\"\necho -e \"  -> Mounting directory $REGENIE_PATH to /docker/ \\n\"\n\necho -e \"==>Running step 1 of REGENIE\"\n# Prepare regenie command to run for Step 1\nbasecmd=\"--step 1 \\\n  --bed ${mntpt}example/example \\\n  --exclude ${mntpt}example/snplist_rm.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 100 \\\n  --bt $arg_gz\"\n\nrgcmd=\"$basecmd \\\n  --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out ${mntpt}test/fit_bin_out\"\n\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\n## quick check that the correct files have been created\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_out.log\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_pred.list\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif [ \"`grep \\\"0.4504\\\" ${REGENIE_PATH}test/fit_bin_out.log | grep \\\"min value\\\"`\" = \"\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n\n#### Run step 1 splitting across jobs for level 0\nnjobs=4\necho -e \"==>Re-running step 1 splitting in $njobs jobs\"\n# pt1 - run regenie before l0\nrgcmd=\"$basecmd \\\n  --split-l0 ${mntpt}test/fit_bin_parallel,$njobs \\\n  --out ${mntpt}test/fit_bin_l0\"\n\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel.master\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n# pt2 - run regenie for l0\nnj=`seq 1 $njobs`\nfor job in $nj; do\n  rgcmd=\"$basecmd \\\n    --run-l0 ${mntpt}test/fit_bin_parallel.master,$job \\\n    --out ${mntpt}test/fit_bin_l0\"\n\n  docker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n  if [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel_job${job}_l0_Y1\" ]; then\n    print_custom_err \"$fail_msg\"\n  fi\ndone\n\n\n# pt3 - run regenie for l1\nrgcmd=\"$basecmd \\\n  --run-l1 ${mntpt}test/fit_bin_parallel.master \\\n  --out ${mntpt}test/fit_bin_l1\"\n\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif ! cmp --silent \\\n  \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nelif ! cmp --silent \\\n  \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_2.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nfi\n\n\n\n##########\n##########\n#### Step 2\ni=1\necho -e \"==>Running step 2 of REGENIE; test #$i\"\nrgcmd=\"--step 2 \\\n  --bgen ${mntpt}example/example.bgen \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 200 \\\n  --bt \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred ${mntpt}test/fit_bin_out_pred.list \\\n  $arg_gz \\\n  --out ${mntpt}test/test_bin_out_firth\"\n\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\n##  do this way so zcat works on OSX\nif [ -f ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ]; then\n  ( zcat < ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ) > ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie\nfi\n\nif [ \"`cat ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie | wc -l`\" != \"1001\" ]\nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --ignore-pred \\\n  --covarColList V1,V5 \\\n  --catCovarList V5 \\\n  --interaction V5 \\\n  --force-qt \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif [ `grep \"^1 1 .*ADD-INT\" ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie | wc -l` != 5 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nbasecmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr \\\n  --ref-first \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --covarColList V{1:2},V4 \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoColList Y2 \\\n  --bsize 100 \\\n  --test dominant \\\n  --ignore-pred\"\nrgcmd=\"$basecmd \\\n  --chrList 2,3 \\\n  --write-samples \\\n  --print-pheno \\\n  --force-qt \\\n  --out ${mntpt}test/test_out\"\n\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\n# check files\nif [ ! -f \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" -o -f \"${REGENIE_PATH}test/test_out_Y1.regenie.ids\" ]\nthen\n  print_err\nelif (( $(head -n 1 ${REGENIE_PATH}test/test_out_Y2.regenie.ids | cut -f1) != \"Y2\" )); then\n  print_err\nelif (( $(head -n 1 \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" | tr '\\t' '\\n' | wc -l) != 2 )); then\n  print_err\nelif (( `grep \"mog_\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif (( `grep \"ADD\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif [ \"`cut -d ' ' -f1-5 ${REGENIE_PATH}test/test_out_Y2.regenie | sed '2q;d'`\" != \"`grep \\\"^2\\\" ${REGENIE_PATH}example/example_3chr.bim | head -n 1 | awk '{print $1,$4,$2,$5,$6}'`\" ]; then\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nrgcmd=\"$basecmd \\\n  --catCovarList V4 \\\n  --force-qt \\\n  --extract ${mntpt}test/test_out.snplist \\\n  --out ${mntpt}test/test_out_extract\"\n\nawk '{if($1!=1) {print $2}}'  ${REGENIE_PATH}example/example_3chr.bim > ${REGENIE_PATH}test/test_out.snplist\n\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_out_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_extract_Y2.regenie \nthen\n  print_err\nelif (( `grep \"n_cov = 3\" \"${REGENIE_PATH}test/test_out_extract.log\" | wc -l` != 1 )); then\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# First command (V1)\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr_masks \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 10 \\\n  --ignore-pred \\\n  --htp TEST \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_masks_V1\"\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\n# Second command (V2)\n# build masks\nawk '{print $4}' ${REGENIE_PATH}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --write-mask \\\n  --write-setlist ${mntpt}example/example_3chr.write_sets \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --chrList 1,3 \\\n  --htp TEST \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_masks_V2\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nhead ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie -n 3 | tail -n 2 | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp1.txt\ntail -n 1 ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie | cut --complement -f4,5 >> ${REGENIE_PATH}test/tmp1.txt\ncat ${REGENIE_PATH}test/test_out_masks_V1_Y1.regenie | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp2.txt\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/tmp1.txt \\\n  ${REGENIE_PATH}test/tmp2.txt ; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.bed ]; then\n  print_err\nelif [ \"$(hexdump -e \\\"%07_ax\\ \\\"\\ 16/1\\ \\\"\\ %02x\\\"\\ \\\"\\\\n\\\"  -n 3 ${REGENIE_PATH}test/test_out_masks_V2_masks.bed | head -n 1 | awk '{print $2,$3,$4}' | tr ' ' ',')\" != \"6c,1b,01\" ]; then\n  print_err\nelif [ \"`wc -l ${REGENIE_PATH}test/test_out_masks_V2_masks.{bim,fam} | awk '{print $1}' | head -n 2| paste -sd','`\" != \"4,494\" ]; then\n  print_err\nelif [ \"`cat ${REGENIE_PATH}test/test_out_masks_V2_tmp2.setlist | head -n 1 | tr ',' '\\n' | wc -l`\" != \"2\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks\nawk '{print $4}' ${REGENIE_PATH}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --mask-lovo SET1,M1,0.2 \\\n  --htp TEST \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_masks_loo\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif [ ! -f ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie ]; then\n  print_err\nelif [ `cat ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 21 ]; then\n  print_err\nelif [ `grep \"_mog\" ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 18 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks using set domains\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotationsV2 \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --check-burden-files \\\n  --bsize 20 \\\n  --aaf-bins 0.2 \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_masks_V3\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif ! [[ \"`head -n 1 ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie`\" =~ ^\\#\\#MASKS.* ]]\nthen\n  print_err\nelif [ `grep \"SET2.*.M1\" ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie | wc -l` != \"4\" ]\nthen\n  print_err\nelif [ `grep -e \"->Detected 1\" ${REGENIE_PATH}test/test_out_masks_V3_masks_report.txt | wc -l` != \"4\" ]\nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# conditional analyses\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --sex-specific female \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_cond\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --condition-file pgen,${mntpt}example/example \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_cond2\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif ! cmp --silent \\\n  ${REGENIE_PATH}test/test_out_cond_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_cond2_Y2.regenie \nthen\n  print_err\nelif [ `grep \"n_used = 6\" ${REGENIE_PATH}test/test_out_cond*log | wc -l` != \"2\" ]; then\n  print_err\nfi\n\n\n# with skat\n(( i++ ))\necho -e \"==>Running test #$i\"\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoCol Y1 \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --vc-tests skat \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --write-mask-snplist \\\n  --force-qt \\\n  --out ${mntpt}test/test_out_vc\"\n\n# run regenie\ndocker run -v ${REGENIE_PATH}:${mntpt} --rm $DOCKER_IMAGE regenie $rgcmd\n\nif ! grep -q \"all.*SKAT\" ${REGENIE_PATH}test/test_out_vc_Y1.regenie  \nthen\n  print_err\nfi\n\necho \"SUCCESS: Docker image passed the tests!\"\necho -e \"\\nYou can run regenie using for example:\"\necho -e \"docker run -v <host_path>:<mount_path> $DOCKER_IMAGE regenie <command_options>\\n\"\n# file cleanup\nrm -f ${REGENIE_PATH}test/fit_bin_* ${REGENIE_PATH}test/test_bin_out_* ${REGENIE_PATH}test/test_out* ${REGENIE_PATH}test/tmp[12].txt\n\n"
  },
  {
    "path": "test/test_singularity.sh",
    "content": "#!/usr/bin/env bash\n\n### REGENIE TEST SCRIPT \n# Functions used\nhelp_msg=\"Update to most recent REGENIE version (using 'git pull').\"\nfail_msg=\"Step 1 of REGENIE did not finish successfully.\"\nerr_msg=\"Uh oh, REGENIE did not build successfully. $help_msg\"\nprint_err () { \n  echo \"$err_msg\"; exit 1 \n}\nprint_simple_err () {\n  echo \"ERROR: ${1}\"; exit 1 \n}\nprint_custom_err () {\n  echo \"ERROR: ${1} $help_msg\"; exit 1 \n}\n\n\n### READ OPTIONS\ninfo_msg='\nUsage: ./test_singularity\n'\nREGENIE_PATH=$(dirname $(dirname $(realpath \"${0}\" )))\n\n\n# quick check src/example folders are present\nif [ ! -d \"${REGENIE_PATH}/src\" ] || [ ! -d \"${REGENIE_PATH}/example\" ]; then\n  print_simple_err \"First input argument must be the directory where Regenie repo was cloned.\"\nelse\n  cd $REGENIE_PATH\nfi \n\nREGENIE_PATH=$(pwd)/  # use absolute path\nmntpt=\nregenie_bin=`command -v regenie | head -n 1`\n\nif [ ! -f \"$regenie_bin\" ]; then\n  print_simple_err \"Regenie binary cannot be found. Specify the Regenie source path (--path) or compile the software first.\"\nfi\n\n# If compiling was done with Boost Iostream library, use gzipped files as input\nif $regenie_bin --version | grep -q \"gz\"; then\n  fsuf=.gz\n  arg_gz=\"--gz\"\nfi\n\n\necho -e \"==>Running step 1 of REGENIE\"\n# Prepare regenie command to run for Step 1\n## with transposed phenotype file format\n#  --tpheno-file ${mntpt}example/tphenotype_bin.txt${fsuf} \\\n#  --tpheno-indexCol 4 \\\n#  --tpheno-ignoreCols {1:3} \\\nbasecmd=\"--step 1 \\\n  --bed ${mntpt}example/example \\\n  --exclude ${mntpt}example/snplist_rm.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 100 \\\n  --bt $arg_gz\"\n\nrgcmd=\"$basecmd \\\n  --lowmem \\\n  --lowmem-prefix tmp_rg \\\n  --out ${mntpt}test/fit_bin_out\"\n\n# run regenie\n$regenie_bin $rgcmd\n\n## quick check that the correct files have been created\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_out.log\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_pred.list\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" ] || \\\n  [ ! -f \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif [ \"`grep \\\"0.4504\\\" ${REGENIE_PATH}test/fit_bin_out.log | grep \\\"min value\\\"`\" = \"\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n#### Run step 1 splitting across jobs for level 0\nnjobs=4\necho -e \"==>Re-running step 1 splitting in $njobs jobs\"\n# pt1 - run regenie before l0\nrgcmd=\"$basecmd \\\n  --split-l0 ${mntpt}test/fit_bin_parallel,$njobs \\\n  --out ${mntpt}test/fit_bin_l0\"\n\n$regenie_bin $rgcmd\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel.master\" ]; then\n  print_custom_err \"$fail_msg\"\nfi\n\n# pt2 - run regenie for l0\nnj=`seq 1 $njobs`\nfor job in $nj; do\n  rgcmd=\"$basecmd \\\n    --run-l0 ${mntpt}test/fit_bin_parallel.master,$job \\\n    --out ${mntpt}test/fit_bin_l0\"\n\n  $regenie_bin $rgcmd\n  if [ ! -f \"${REGENIE_PATH}test/fit_bin_parallel_job${job}_l0_Y1\" ]; then\n    print_custom_err \"$fail_msg\"\n  fi\ndone\n\n\n# pt3 - run regenie for l1\nrgcmd=\"$basecmd \\\n  --run-l1 ${mntpt}test/fit_bin_parallel.master \\\n  --out ${mntpt}test/fit_bin_l1\"\n\n$regenie_bin $rgcmd\n\nif [ ! -f \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" ]; then\n  print_custom_err \"$fail_msg\"\nelif ! cmp -s \\\n  \"${REGENIE_PATH}test/fit_bin_out_1.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_1.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nelif ! cmp -s \\\n  \"${REGENIE_PATH}test/fit_bin_out_2.loco$fsuf\" \\\n  \"${REGENIE_PATH}test/fit_bin_l1_2.loco$fsuf\" \nthen\n  print_custom_err \"$fail_msg\"\nfi\n\n\n\n##########\n##########\n#### Step 2\ni=1\necho -e \"==>Running step 2 of REGENIE; test #$i\"\nrgcmd=\"--step 2 \\\n  --bgen ${mntpt}example/example.bgen \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 200 \\\n  --bt \\\n  --firth --approx \\\n  --pThresh 0.01 \\\n  --pred ${mntpt}test/fit_bin_out_pred.list \\\n  $arg_gz \\\n  --out ${mntpt}test/test_bin_out_firth\"\n\n# run regenie\n$regenie_bin $rgcmd\n\n##  do this way so zcat works on OSX\nif [ -f ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ]; then\n  ( zcat < ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie.gz ) > ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie\nfi\n\nif [ \"`cat ${REGENIE_PATH}test/test_bin_out_firth_Y1.regenie | wc -l`\" != \"1001\" ]\nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --ignore-pred \\\n  --covarColList V1,V5 \\\n  --catCovarList V5 \\\n  --interaction V5 \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\n$regenie_bin $rgcmd\n\nif [ `grep \"^1 1 .*ADD-INT\" ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie | wc -l` != 5 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"\\n==>Running test #$i\\n\"\n# interaction tests\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example --ref-first \\\n  --covarFile ${mntpt}example/covariates.txt \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --bsize 200 \\\n  --ignore-pred \\\n  --interaction-snp 1 \\\n  --out ${mntpt}test/test_bin_out_inter\"\n\n# run regenie\n$regenie_bin $rgcmd\n\nrgcmd+=\"2 --interaction-file bgen,example/example.bgen --interaction-file-reffirst\"\n# run regenie\n$regenie_bin $rgcmd\n\nif ! cmp -s \\\n  ${REGENIE_PATH}test/test_bin_out_inter_Y1.regenie \\\n  ${REGENIE_PATH}test/test_bin_out_inter2_Y1.regenie \nthen\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nbasecmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr \\\n  --ref-first \\\n  --covarFile ${mntpt}example/covariates_wBin.txt \\\n  --covarColList V{1:2},V4 \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoColList Y2 \\\n  --bsize 100 \\\n  --test dominant \\\n  --ignore-pred\"\nrgcmd=\"$basecmd \\\n  --chrList 2,3 \\\n  --write-samples \\\n  --print-pheno \\\n  --out ${mntpt}test/test_out\"\n\n# run regenie\n$regenie_bin $rgcmd\n\n# check files\nif [ ! -f \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" -o -f \"${REGENIE_PATH}test/test_out_Y1.regenie.ids\" ]\nthen\n  print_err\nelif (( $(head -n 1 ${REGENIE_PATH}test/test_out_Y2.regenie.ids | cut -f1) != \"Y2\" )); then\n  print_err\nelif (( $(head -n 1 \"${REGENIE_PATH}test/test_out_Y2.regenie.ids\" | tr '\\t' '\\n' | wc -l) != 2 )); then\n  print_err\nelif (( `grep \"mog_\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif (( `grep \"ADD\" \"${REGENIE_PATH}test/test_out_Y2.regenie\" | wc -l` > 0 )); then\n  print_err\nelif [ \"`cut -d ' ' -f1-5 ${REGENIE_PATH}test/test_out_Y2.regenie | sed '2q;d'`\" != \"`grep \\\"^2\\\" ${REGENIE_PATH}example/example_3chr.bim | head -n 1 | awk '{print $1,$4,$2,$5,$6}'`\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# Next test\nrgcmd=\"$basecmd \\\n  --catCovarList V4 \\\n  --extract ${mntpt}test/test_out.snplist \\\n  --out ${mntpt}test/test_out_extract\"\n\nawk '{if($1!=1) {print $2}}'  ${REGENIE_PATH}example/example_3chr.bim > ${REGENIE_PATH}test/test_out.snplist\n\n# run regenie\n$regenie_bin $rgcmd\n\nif ! cmp -s \\\n  ${REGENIE_PATH}test/test_out_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_extract_Y2.regenie \nthen\n  print_err\nelif (( `grep \"n_cov = 3\" \"${REGENIE_PATH}test/test_out_extract.log\" | wc -l` != 1 )); then\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# First command (V1)\nrgcmd=\"--step 2 \\\n  --bed ${mntpt}example/example_3chr_masks \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --bsize 10 \\\n  --ignore-pred \\\n  --htp TEST \\\n  --out ${mntpt}test/test_out_masks_V1\"\n# run regenie\n$regenie_bin $rgcmd\n\n# Second command (V2)\n# build masks\nawk '{print $4}' ${mntpt}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --write-mask \\\n  --write-setlist ${mntpt}example/example_3chr.write_sets \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --chrList 1,3 \\\n  --htp TEST \\\n  --write-mask-snplist \\\n  --out ${mntpt}test/test_out_masks_V2\"\n\n# run regenie\n$regenie_bin $rgcmd \n\nhead ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie -n 3 | tail -n 2 | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp1.txt\ntail -n 1 ${REGENIE_PATH}test/test_out_masks_V2_Y1.regenie | cut --complement -f4,5 >> ${REGENIE_PATH}test/tmp1.txt\ncat ${REGENIE_PATH}test/test_out_masks_V1_Y1.regenie | cut --complement -f4,5 > ${REGENIE_PATH}test/tmp2.txt\n\nif ! cmp -s \\\n  ${REGENIE_PATH}test/tmp1.txt \\\n  ${REGENIE_PATH}test/tmp2.txt ; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.bed ]; then\n  print_err\nelif [ \"$(hexdump -e \\\"%07_ax\\ \\\"\\ 16/1\\ \\\"\\ %02x\\\"\\ \\\"\\\\n\\\"  -n 3 ${REGENIE_PATH}test/test_out_masks_V2_masks.bed | head -n 1 | awk '{print $2,$3,$4}' | tr ' ' ',')\" != \"6c,1b,01\" ]; then\n  print_err\nelif [ \"`wc -l ${REGENIE_PATH}test/test_out_masks_V2_masks.{bim,fam} | awk '{print $1}' | head -n 2| paste -sd','`\" != \"4,494\" ]; then\n  print_err\nelif [ ! -f ${REGENIE_PATH}test/test_out_masks_V2_masks.snplist ]; then\n  print_err\nelif [ \"`cat ${REGENIE_PATH}test/test_out_masks_V2_tmp2.setlist | head -n 1 | tr ',' '\\n' | wc -l`\" != \"2\" ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks\nawk '{print $4}' ${mntpt}example/example_3chr.setlist | tr ',' '\\n' > ${REGENIE_PATH}test/tmp1.txt \nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --extract ${mntpt}test/tmp1.txt \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --mask-lovo SET1,M1,0.2 \\\n  --htp TEST \\\n  --out ${mntpt}test/test_out_masks_loo\"\n\n# run regenie\n$regenie_bin $rgcmd \n\nif [ ! -f ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie ]; then\n  print_err\nelif [ `cat ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 21 ]; then\n  print_err\nelif [ `grep \"_mog\" ${REGENIE_PATH}test/test_out_masks_loo_Y1.regenie | wc -l` != 18 ]; then\n  print_err\nfi\n\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# build masks using set domains\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --remove ${mntpt}example/fid_iid_to_remove.txt \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotationsV2 \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --check-burden-files \\\n  --bsize 20 \\\n  --aaf-bins 0.2 \\\n  --out ${mntpt}test/test_out_masks_V3\"\n\n# run regenie\n$regenie_bin $rgcmd \n\nif ! [[ \"`head -n 1 ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie`\" =~ ^\\#\\#MASKS.* ]]\nthen\n  print_err\nelif [ `grep \"SET2.*.M1\" ${REGENIE_PATH}test/test_out_masks_V3_Y1.regenie | wc -l` != \"4\" ]\nthen\n  print_err\nelif [ `grep -e \"->Detected 1\" ${REGENIE_PATH}test/test_out_masks_V3_masks_report.txt | wc -l` != \"4\" ]\nthen\n  print_err\nfi\n\n(( i++ ))\necho -e \"==>Running test #$i\"\n# conditional analyses\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --sex-specific female \\\n  --out ${mntpt}test/test_out_cond\"\n\n# run regenie\n$regenie_bin $rgcmd\n\nrgcmd=\"${basecmd/_3chr/} \\\n  --condition-list ${mntpt}example/snplist_rm.txt \\\n  --condition-file pgen,${mntpt}example/example \\\n  --out ${mntpt}test/test_out_cond2\"\n\n# run regenie\n$regenie_bin $rgcmd\n\nif ! cmp -s \\\n  ${REGENIE_PATH}test/test_out_cond_Y2.regenie \\\n  ${REGENIE_PATH}test/test_out_cond2_Y2.regenie \nthen\n  print_err\nelif [ `grep \"n_used = 6\" ${REGENIE_PATH}test/test_out_cond*log | wc -l` != \"2\" ]; then\n  print_err\nfi\n\n\n# with skat\n(( i++ ))\necho -e \"==>Running test #$i\"\nrgcmd=\"--step 2 \\\n  --ignore-pred \\\n  --bed ${mntpt}example/example_3chr \\\n  --covarFile ${mntpt}example/covariates.txt${fsuf} \\\n  --phenoFile ${mntpt}example/phenotype_bin.txt${fsuf} \\\n  --phenoCol Y1 \\\n  --set-list ${mntpt}example/example_3chr.setlist \\\n  --anno-file ${mntpt}example/example_3chr.annotations \\\n  --mask-def ${mntpt}example/example_3chr.masks \\\n  --vc-tests skat \\\n  --bsize 15 \\\n  --aaf-bins 0.2 \\\n  --write-mask-snplist \\\n  --out ${mntpt}test/test_out_vc\"\n\n# run regenie\n$regenie_bin $rgcmd \n\nif ! grep -q \"all.*SKAT\" ${REGENIE_PATH}test/test_out_vc_Y1.regenie  \nthen\n  print_err\nfi\n\n##############\necho \"SUCCESS: REGENIE build passed the tests!\"\n# file cleanup\nrm -f ${REGENIE_PATH}test/fit_bin_* ${REGENIE_PATH}test/test_bin_out* ${REGENIE_PATH}test/test_out* ${REGENIE_PATH}test/tmp[12].txt\n\n"
  }
]